babosa 0.3.10 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/Changelog.md +107 -0
  3. data/README.md +5 -23
  4. data/lib/babosa.rb +0 -17
  5. data/lib/babosa/identifier.rb +19 -17
  6. data/lib/babosa/transliterator/base.rb +19 -3
  7. data/lib/babosa/transliterator/hindi.rb +137 -0
  8. data/lib/babosa/transliterator/macedonian.rb +3 -1
  9. data/lib/babosa/transliterator/turkish.rb +8 -0
  10. data/lib/babosa/transliterator/ukrainian.rb +19 -0
  11. data/lib/babosa/transliterator/vietnamese.rb +143 -0
  12. data/lib/babosa/utf8/active_support_proxy.rb +26 -8
  13. data/lib/babosa/utf8/dumb_proxy.rb +23 -16
  14. data/lib/babosa/utf8/java_proxy.rb +1 -1
  15. data/lib/babosa/utf8/proxy.rb +46 -39
  16. data/lib/babosa/utf8/unicode_proxy.rb +3 -1
  17. data/lib/babosa/version.rb +1 -1
  18. data/spec/babosa_spec.rb +50 -37
  19. data/spec/spec_helper.rb +17 -14
  20. data/spec/transliterators/base_spec.rb +3 -3
  21. data/spec/transliterators/bulgarian_spec.rb +1 -1
  22. data/spec/transliterators/danish_spec.rb +1 -1
  23. data/spec/transliterators/german_spec.rb +2 -2
  24. data/spec/transliterators/greek_spec.rb +1 -1
  25. data/spec/transliterators/hindi_spec.rb +17 -0
  26. data/spec/transliterators/latin_spec.rb +9 -0
  27. data/spec/transliterators/norwegian_spec.rb +1 -1
  28. data/spec/transliterators/polish_spec.rb +14 -0
  29. data/spec/transliterators/romanian_spec.rb +1 -1
  30. data/spec/transliterators/serbian_spec.rb +1 -1
  31. data/spec/transliterators/spanish_spec.rb +1 -1
  32. data/spec/transliterators/swedish_spec.rb +1 -1
  33. data/spec/transliterators/turkish_spec.rb +24 -0
  34. data/spec/transliterators/ukrainian_spec.rb +80 -1
  35. data/spec/transliterators/vietnamese_spec.rb +18 -0
  36. data/spec/utf8_proxy_spec.rb +22 -18
  37. metadata +64 -52
  38. data/init.rb +0 -3
@@ -12,6 +12,7 @@ module Babosa
12
12
  "Ц" => "C",
13
13
  "Ѕ" => "Z",
14
14
  "Ј" => "J",
15
+ "Х" => "H",
15
16
  "ѓ" => "gj",
16
17
  "љ" => "lj",
17
18
  "њ" => "nj",
@@ -20,7 +21,8 @@ module Babosa
20
21
  "ж" => "zh",
21
22
  "ц" => "c",
22
23
  "ѕ" => "z",
23
- "ј" => "j"
24
+ "ј" => "j",
25
+ "х" => "h"
24
26
  }
25
27
  end
26
28
  end
@@ -0,0 +1,8 @@
1
+ # encoding: utf-8
2
+
3
+ module Babosa
4
+ module Transliterator
5
+ class Turkish < Latin
6
+ end
7
+ end
8
+ end
@@ -3,8 +3,27 @@ module Babosa
3
3
  module Transliterator
4
4
  class Ukrainian < Cyrillic
5
5
  APPROXIMATIONS = {
6
+ "Г" => "H",
7
+ "г" => "h",
8
+ "Ґ" => "G",
9
+ "ґ" => "g",
10
+ "є" => "ie",
6
11
  "И" => "Y",
7
12
  "и" => "y",
13
+ "І" => "I",
14
+ "і" => "i",
15
+ "ї" => "i",
16
+ "Й" => "Y",
17
+ "й" => "i",
18
+ "Х" => "Kh",
19
+ "х" => "kh",
20
+ "Ц" => "Ts",
21
+ "ц" => 'ts',
22
+ "Щ" => "Shch",
23
+ "щ" => "shch",
24
+ "ю" => "iu",
25
+ "я" => "ia",
26
+ "'" => ""
8
27
  }
9
28
  end
10
29
  end
@@ -0,0 +1,143 @@
1
+ # encoding: utf-8
2
+ module Babosa
3
+ module Transliterator
4
+ class Vietnamese < Latin
5
+ APPROXIMATIONS = {
6
+ "à" => "a",
7
+ "á" => "a",
8
+ "ạ" => "a",
9
+ "ả" => "a",
10
+ "ã" => "a",
11
+ "â" => "a",
12
+ "ầ" => "a",
13
+ "ấ" => "a",
14
+ "ậ" => "a",
15
+ "ẩ" => "a",
16
+ "ẫ" => "a",
17
+ "ă" => "a",
18
+ "ằ" => "a",
19
+ "ắ" => "a",
20
+ "ặ" => "a",
21
+ "ẳ" => "a",
22
+ "ẵ" => "a",
23
+ "À" => "A",
24
+ "Á" => "A",
25
+ "Ạ" => "A",
26
+ "Ả" => "A",
27
+ "Ã" => "A",
28
+ "Â" => "A",
29
+ "Ầ" => "A",
30
+ "Ấ" => "A",
31
+ "Ậ" => "A",
32
+ "Ẩ" => "A",
33
+ "Ẫ" => "A",
34
+ "Ă" => "A",
35
+ "Ằ" => "A",
36
+ "Ắ" => "A",
37
+ "Ặ" => "A",
38
+ "Ẳ" => "A",
39
+ "Ẵ" => "A",
40
+ "ì" => "i",
41
+ "í" => "i",
42
+ "ị" => "i",
43
+ "ỉ" => "i",
44
+ "ĩ" => "i",
45
+ "Ì" => "I",
46
+ "Í" => "I",
47
+ "Ị" => "I",
48
+ "Ỉ" => "I",
49
+ "Ĩ" => "I",
50
+ "ù" => "u",
51
+ "ú" => "u",
52
+ "ụ" => "u",
53
+ "ủ" => "u",
54
+ "ũ" => "u",
55
+ "ư" => "u",
56
+ "ừ" => "u",
57
+ "ứ" => "u",
58
+ "ự" => "u",
59
+ "ử" => "u",
60
+ "ữ" => "u",
61
+ "Ù" => "U",
62
+ "Ú" => "U",
63
+ "Ụ" => "U",
64
+ "Ủ" => "U",
65
+ "Ũ" => "U",
66
+ "Ư" => "U",
67
+ "Ừ" => "U",
68
+ "Ứ" => "U",
69
+ "Ự" => "U",
70
+ "Ử" => "U",
71
+ "Ữ" => "U",
72
+ "è" => "e",
73
+ "é" => "e",
74
+ "ẹ" => "e",
75
+ "ẻ" => "e",
76
+ "ẽ" => "e",
77
+ "ê" => "e",
78
+ "ề" => "e",
79
+ "ế" => "e",
80
+ "ệ" => "e",
81
+ "ể" => "e",
82
+ "ễ" => "e",
83
+ "È" => "E",
84
+ "É" => "E",
85
+ "Ẹ" => "E",
86
+ "Ẻ" => "E",
87
+ "Ẽ" => "E",
88
+ "Ê" => "E",
89
+ "Ề" => "E",
90
+ "Ế" => "E",
91
+ "Ệ" => "E",
92
+ "Ể" => "E",
93
+ "Ễ" => "E",
94
+ "ò" => "o",
95
+ "ó" => "o",
96
+ "ọ" => "o",
97
+ "ỏ" => "o",
98
+ "õ" => "o",
99
+ "ô" => "o",
100
+ "ồ" => "o",
101
+ "ố" => "o",
102
+ "ộ" => "o",
103
+ "ổ" => "o",
104
+ "ỗ" => "o",
105
+ "ơ" => "o",
106
+ "ờ" => "o",
107
+ "ớ" => "o",
108
+ "ợ" => "o",
109
+ "ở" => "o",
110
+ "ỡ" => "o",
111
+ "Ò" => "O",
112
+ "Ó" => "O",
113
+ "Ọ" => "O",
114
+ "Ỏ" => "O",
115
+ "Õ" => "O",
116
+ "Ô" => "O",
117
+ "Ồ" => "O",
118
+ "Ố" => "O",
119
+ "Ộ" => "O",
120
+ "Ổ" => "O",
121
+ "Ỗ" => "O",
122
+ "Ơ" => "O",
123
+ "Ờ" => "O",
124
+ "Ớ" => "O",
125
+ "Ợ" => "O",
126
+ "Ở" => "O",
127
+ "Ỡ" => "O",
128
+ "ỳ" => "y",
129
+ "ý" => "y",
130
+ "ỵ" => "y",
131
+ "ỷ" => "y",
132
+ "ỹ" => "y",
133
+ "Ỳ" => "Y",
134
+ "Ý" => "Y",
135
+ "Ỵ" => "Y",
136
+ "Ỷ" => "Y",
137
+ "Ỹ" => "Y",
138
+ "đ" => "d",
139
+ "Đ" => "D"
140
+ }
141
+ end
142
+ end
143
+ end
@@ -1,19 +1,37 @@
1
+ require 'active_support'
2
+ require 'active_support/multibyte/unicode'
3
+
1
4
  module Babosa
2
5
  module UTF8
3
6
  # A UTF-8 proxy using Active Support's multibyte support.
4
7
  module ActiveSupportProxy
5
- extend UTF8Proxy
8
+ extend ActiveSupport::Multibyte::Unicode
6
9
  extend self
7
- def downcase(string)
8
- ActiveSupport::Multibyte::Chars.new(string).downcase.to_s
9
- end
10
10
 
11
- def upcase(string)
12
- ActiveSupport::Multibyte::Chars.new(string).upcase.to_s
11
+ def self.normalize_utf8(string)
12
+ normalize(string, :c)
13
13
  end
14
14
 
15
- def normalize_utf8(string)
16
- ActiveSupport::Multibyte::Chars.new(string).normalize(:c).to_s
15
+ if ActiveSupport::VERSION::MAJOR == 3
16
+ def downcase(string)
17
+ ActiveSupport::Multibyte::Chars.new(string).downcase.to_s
18
+ end
19
+
20
+ def upcase(string)
21
+ ActiveSupport::Multibyte::Chars.new(string).upcase.to_s
22
+ end
23
+ elsif ActiveSupport::VERSION::MAJOR >= 6
24
+ def self.normalize_utf8(string)
25
+ string.unicode_normalize(:nfc).to_s
26
+ end
27
+
28
+ def downcase(string)
29
+ string.downcase.to_s
30
+ end
31
+
32
+ def upcase(string)
33
+ string.upcase.to_s
34
+ end
17
35
  end
18
36
  end
19
37
  end
@@ -10,32 +10,39 @@ module Babosa
10
10
  # or ActiveSupport should be used instead because they support the full
11
11
  # UTF-8 character range.
12
12
  module DumbProxy
13
- extend UTF8Proxy
13
+ extend Proxy
14
14
  extend self
15
15
 
16
16
  def downcase(string)
17
- string.unpack("U*").map {|char| Mappings::DOWNCASE[char] or char}.flatten.pack("U*")
17
+ string.downcase.unpack("U*").map {|char| Mappings::DOWNCASE[char] or char}.flatten.pack("U*")
18
18
  end
19
19
 
20
20
  def upcase(string)
21
- string.unpack("U*").map {|char| Mappings::UPCASE[char] or char}.flatten.pack("U*")
21
+ string.upcase.unpack("U*").map {|char| Mappings::UPCASE[char] or char}.flatten.pack("U*")
22
22
  end
23
23
 
24
- # This does a very naive Unicode normalization, which should work for
25
- # this library's purposes (i.e., Roman-based codepoints, up to U+017E).
26
- # Do not use reuse this as a general solution! Use a real library like
27
- # Unicode or ActiveSupport instead.
28
- def normalize_utf8(string)
29
- codepoints = string.unpack("U*")
30
- new = []
31
- until codepoints.empty? do
32
- if Mappings::COMPOSITION[codepoints[0..1]]
33
- new << Mappings::COMPOSITION[codepoints.slice!(0,2)]
34
- else
35
- new << codepoints.shift
24
+ if ''.respond_to?(:unicode_normalize)
25
+ def normalize_utf8(string)
26
+ string.unicode_normalize
27
+ end
28
+ else
29
+ # On Ruby 2.2, this uses the native Unicode normalize method. On all
30
+ # other Rubies, it does a very naive Unicode normalization, which should
31
+ # work for this library's purposes (i.e., Roman-based codepoints, up to
32
+ # U+017E). Do not use reuse this as a general solution! Use a real
33
+ # library like Unicode or ActiveSupport instead.
34
+ def normalize_utf8(string)
35
+ codepoints = string.unpack("U*")
36
+ new = []
37
+ until codepoints.empty? do
38
+ if Mappings::COMPOSITION[codepoints[0..1]]
39
+ new << Mappings::COMPOSITION[codepoints.slice!(0,2)]
40
+ else
41
+ new << codepoints.shift
42
+ end
36
43
  end
44
+ new.compact.flatten.pack("U*")
37
45
  end
38
- new.compact.flatten.pack("U*")
39
46
  end
40
47
  end
41
48
  end
@@ -2,7 +2,7 @@ module Babosa
2
2
  module UTF8
3
3
  # A UTF-8 proxy module using Java's built-in Unicode support. Requires JRuby 1.5+.
4
4
  module JavaProxy
5
- extend UTF8Proxy
5
+ extend Proxy
6
6
  extend self
7
7
  java_import java.text.Normalizer
8
8
 
@@ -8,7 +8,7 @@ module Babosa
8
8
 
9
9
  # A UTF-8 proxy for Babosa can be any object which responds to the methods in this module.
10
10
  # The following proxies are provided by Babosa: {ActiveSupportProxy}, {DumbProxy}, {JavaProxy}, and {UnicodeProxy}.
11
- module UTF8Proxy
11
+ module Proxy
12
12
  CP1252 = {
13
13
  128 => [226, 130, 172],
14
14
  129 => nil,
@@ -62,50 +62,57 @@ module Babosa
62
62
  raise NotImplementedError
63
63
  end
64
64
 
65
- # Attempt to replace invalid UTF-8 bytes with valid ones. This method
66
- # naively assumes if you have invalid UTF8 bytes, they are either Windows
67
- # CP-1252 or ISO8859-1. In practice this isn't a bad assumption, but may not
68
- # always work.
69
- def tidy_bytes(string)
70
- bytes = string.unpack("C*")
71
- conts_expected = 0
72
- last_lead = 0
65
+ if ''.respond_to?(:scrub) && !defined?(Rubinius)
66
+ # Attempt to replace invalid UTF-8 bytes with valid ones. This method
67
+ # naively assumes if you have invalid UTF8 bytes, they are either Windows
68
+ # CP-1252 or ISO8859-1. In practice this isn't a bad assumption, but may not
69
+ # always work.
70
+ def tidy_bytes(string)
71
+ string.scrub do |bad|
72
+ tidy_byte(*bad.bytes).flatten.compact.pack('C*').unpack('U*').pack('U*')
73
+ end
74
+ end
75
+ else
76
+ def tidy_bytes(string)
77
+ bytes = string.unpack("C*")
78
+ conts_expected = 0
79
+ last_lead = 0
73
80
 
74
- bytes.each_index do |i|
75
- byte = bytes[i]
76
- is_ascii = byte < 128
77
- is_cont = byte > 127 && byte < 192
78
- is_lead = byte > 191 && byte < 245
79
- is_unused = byte > 240
80
- is_restricted = byte > 244
81
+ bytes.each_index do |i|
82
+ byte = bytes[i]
83
+ is_cont = byte > 127 && byte < 192
84
+ is_lead = byte > 191 && byte < 245
85
+ is_unused = byte > 240
86
+ is_restricted = byte > 244
81
87
 
82
- # Impossible or highly unlikely byte? Clean it.
83
- if is_unused || is_restricted
84
- bytes[i] = tidy_byte(byte)
85
- elsif is_cont
86
- # Not expecting contination byte? Clean up. Otherwise, now expect one less.
87
- conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
88
- else
89
- if conts_expected > 0
90
- # Expected continuation, but got ASCII or leading? Clean backwards up to
91
- # the leading byte.
92
- (1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
93
- conts_expected = 0
94
- end
95
- if is_lead
96
- # Final byte is leading? Clean it.
97
- if i == bytes.length - 1
98
- bytes[i] = tidy_byte(bytes.last)
99
- else
100
- # Valid leading byte? Expect continuations determined by position of
101
- # first zero bit, with max of 3.
102
- conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
103
- last_lead = i
88
+ # Impossible or highly unlikely byte? Clean it.
89
+ if is_unused || is_restricted
90
+ bytes[i] = tidy_byte(byte)
91
+ elsif is_cont
92
+ # Not expecting contination byte? Clean up. Otherwise, now expect one less.
93
+ conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
94
+ else
95
+ if conts_expected > 0
96
+ # Expected continuation, but got ASCII or leading? Clean backwards up to
97
+ # the leading byte.
98
+ (1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
99
+ conts_expected = 0
100
+ end
101
+ if is_lead
102
+ # Final byte is leading? Clean it.
103
+ if i == bytes.length - 1
104
+ bytes[i] = tidy_byte(bytes.last)
105
+ else
106
+ # Valid leading byte? Expect continuations determined by position of
107
+ # first zero bit, with max of 3.
108
+ conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
109
+ last_lead = i
110
+ end
104
111
  end
105
112
  end
106
113
  end
114
+ bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
107
115
  end
108
- bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
109
116
  end
110
117
 
111
118
  private
@@ -1,9 +1,11 @@
1
+ require 'unicode'
2
+
1
3
  module Babosa
2
4
  module UTF8
3
5
  # A UTF-8 proxy using the Unicode gem.
4
6
  # @see http://github.com/blackwinter/unicode
5
7
  module UnicodeProxy
6
- extend UTF8Proxy
8
+ extend Proxy
7
9
  extend self
8
10
  def downcase(string)
9
11
  Unicode.downcase(string)
@@ -1,5 +1,5 @@
1
1
  module Babosa
2
2
  module Version
3
- STRING = "0.3.10"
3
+ STRING = '1.0.4'
4
4
  end
5
5
  end
@@ -4,7 +4,7 @@ require File.expand_path("../spec_helper", __FILE__)
4
4
  describe Babosa::Identifier do
5
5
 
6
6
  it "should respond_to :empty?" do
7
- "".to_slug.should respond_to(:empty?)
7
+ expect("".to_slug).to respond_to(:empty?)
8
8
  end
9
9
 
10
10
  %w[approximate_ascii clean downcase word_chars normalize to_ascii upcase with_dashes].each do |method|
@@ -18,128 +18,141 @@ describe Babosa::Identifier do
18
18
  describe "#word_chars" do
19
19
  it "word_chars! should leave only letters and spaces" do
20
20
  string = "a*$%^$@!@b$%^&*()*!c"
21
- string.to_slug.word_chars.should match(/[a-z ]*/i)
21
+ expect(string.to_slug.word_chars!).to match(/[a-z ]*/i)
22
22
  end
23
23
  end
24
24
 
25
25
  describe "#transliterate" do
26
26
  it "should transliterate to ascii" do
27
- slug = (0xC0..0x17E).to_a.each do |codepoint|
27
+ (0xC0..0x17E).to_a.each do |codepoint|
28
28
  ss = [codepoint].pack("U*").to_slug
29
- ss.approximate_ascii.should match(/[\x0-\x7f]/)
29
+ expect(ss.approximate_ascii!).to match(/[\x0-\x7f]/)
30
30
  end
31
31
  end
32
32
 
33
33
  it "should transliterate uncomposed utf8" do
34
34
  string = [117, 776].pack("U*") # "ü" as ASCII "u" plus COMBINING DIAERESIS
35
- string.to_slug.approximate_ascii.should eql("u")
35
+ expect(string.to_slug.approximate_ascii).to eql("u")
36
36
  end
37
37
 
38
38
  it "should transliterate using multiple transliterators" do
39
39
  string = "свободное režģis"
40
- string.to_slug.approximate_ascii(:latin, :russian).should eql("svobodnoe rezgis")
40
+ expect(string.to_slug.approximate_ascii(:latin, :russian)).to eql("svobodnoe rezgis")
41
41
  end
42
42
  end
43
43
 
44
44
  describe "#downcase" do
45
45
  it "should lowercase strings" do
46
- "FELIZ AÑO".to_slug.downcase.should eql("feliz año")
46
+ expect("FELIZ AÑO".to_slug.downcase).to eql("feliz año")
47
47
  end
48
48
  end
49
49
 
50
50
  describe "#upcase" do
51
51
  it "should uppercase strings" do
52
- "feliz año".to_slug.upcase.should eql("FELIZ AÑO")
52
+ expect("feliz año".to_slug.upcase).to eql("FELIZ AÑO")
53
53
  end
54
54
  end
55
55
 
56
56
  describe "#normalize" do
57
57
 
58
58
  it "should allow passing locale as key for :transliterate" do
59
- "ö".to_slug.clean.normalize(:transliterate => :german).should eql("oe")
59
+ expect("ö".to_slug.clean.normalize(:transliterate => :german)).to eql("oe")
60
60
  end
61
61
 
62
62
  it "should replace whitespace with dashes" do
63
- "a b".to_slug.clean.normalize.should eql("a-b")
63
+ expect("a b".to_slug.clean.normalize).to eql("a-b")
64
64
  end
65
65
 
66
66
  it "should replace multiple spaces with 1 dash" do
67
- "a b".to_slug.clean.normalize.should eql("a-b")
67
+ expect("a b".to_slug.clean.normalize).to eql("a-b")
68
68
  end
69
69
 
70
70
  it "should replace multiple dashes with 1 dash" do
71
- "male - female".to_slug.normalize.should eql("male-female")
71
+ expect("male - female".to_slug.normalize).to eql("male-female")
72
72
  end
73
73
 
74
74
  it "should strip trailing space" do
75
- "ab ".to_slug.normalize.should eql("ab")
75
+ expect("ab ".to_slug.normalize).to eql("ab")
76
76
  end
77
77
 
78
78
  it "should strip leading space" do
79
- " ab".to_slug.normalize.should eql("ab")
79
+ expect(" ab".to_slug.normalize).to eql("ab")
80
80
  end
81
81
 
82
82
  it "should strip trailing slashes" do
83
- "ab-".to_slug.normalize.should eql("ab")
83
+ expect("ab-".to_slug.normalize).to eql("ab")
84
84
  end
85
85
 
86
86
  it "should strip leading slashes" do
87
- "-ab".to_slug.normalize.should eql("ab")
87
+ expect("-ab".to_slug.normalize).to eql("ab")
88
88
  end
89
89
 
90
90
  it "should not modify valid name strings" do
91
- "a-b-c-d".to_slug.normalize.should eql("a-b-c-d")
91
+ expect("a-b-c-d".to_slug.normalize).to eql("a-b-c-d")
92
92
  end
93
93
 
94
94
  it "should not convert underscores" do
95
- "hello_world".to_slug.normalize.should eql("hello_world")
95
+ expect("hello_world".to_slug.normalize).to eql("hello_world")
96
96
  end
97
97
 
98
98
  it "should work with non roman chars" do
99
- "検 索".to_slug.normalize.should eql("検-索")
99
+ expect("検 索".to_slug.normalize).to eql("検-索")
100
100
  end
101
101
 
102
102
  context "with to_ascii option" do
103
103
  it "should approximate and strip non ascii" do
104
104
  ss = "カタカナ: katakana is über cool".to_slug
105
- ss.normalize(:to_ascii => true).should eql("katakana-is-uber-cool")
105
+ expect(ss.normalize(:to_ascii => true)).to eql("katakana-is-uber-cool")
106
106
  end
107
107
  end
108
108
  end
109
109
 
110
110
  describe "#truncate_bytes" do
111
111
  it "should by byte length" do
112
- "üa".to_slug.truncate_bytes(2).should eql("ü")
113
- "üa".to_slug.truncate_bytes(1).should eql("")
114
- "üa".to_slug.truncate_bytes(100).should eql("üa")
115
- "üéøá".to_slug.truncate_bytes(3).should eql("ü")
112
+ expect("üa".to_slug.truncate_bytes(2)).to eql("ü")
113
+ expect("üa".to_slug.truncate_bytes(1)).to eql("")
114
+ expect("üa".to_slug.truncate_bytes(100)).to eql("üa")
115
+ expect("üéøá".to_slug.truncate_bytes(3)).to eql("ü")
116
116
  end
117
117
  end
118
118
 
119
119
  describe "#truncate" do
120
120
  it "should truncate by char length" do
121
- "üa".to_slug.truncate(2).should eql("üa")
122
- "üa".to_slug.truncate(1).should eql("ü")
123
- "üa".to_slug.truncate(100).should eql("üa")
121
+ expect("üa".to_slug.truncate(2)).to eql("üa")
122
+ expect("üa".to_slug.truncate(1)).to eql("ü")
123
+ expect("üa".to_slug.truncate(100)).to eql("üa")
124
124
  end
125
125
  end
126
126
 
127
127
  describe "#with_dashes" do
128
128
  it "should not change byte size when replacing spaces" do
129
- "".to_slug.with_dashes.bytesize.should eql(0)
130
- " ".to_slug.with_dashes.bytesize.should eql(1)
131
- "-abc-".to_slug.with_dashes.bytesize.should eql(5)
132
- " abc ".to_slug.with_dashes.bytesize.should eql(5)
133
- " a bc ".to_slug.with_dashes.bytesize.should eql(7)
129
+ expect("".to_slug.with_dashes.bytesize).to eql(0)
130
+ expect(" ".to_slug.with_dashes.bytesize).to eql(1)
131
+ expect("-abc-".to_slug.with_dashes.bytesize).to eql(5)
132
+ expect(" abc ".to_slug.with_dashes.bytesize).to eql(5)
133
+ expect(" a bc ".to_slug.with_dashes.bytesize).to eql(7)
134
134
  end
135
135
  end
136
136
 
137
137
  describe "#to_ruby_method" do
138
138
  it "should get a string suitable for use as a ruby method" do
139
- "¿¿¿hello... world???".to_slug.to_ruby_method.should eql("hello_world?")
140
- "カタカナ: katakana is über cool".to_slug.to_ruby_method.should eql("katakana_is_uber_cool")
141
- "カタカナ: katakana is über cool!".to_slug.to_ruby_method.should eql("katakana_is_uber_cool!")
142
- "カタカナ: katakana is über cool".to_slug.to_ruby_method(false).should eql("katakana_is_uber_cool")
139
+ expect("¿¿¿hello... world???".to_slug.to_ruby_method).to eql("hello_world?")
140
+ expect("カタカナ: katakana is über cool".to_slug.to_ruby_method).to eql("katakana_is_uber_cool")
141
+ expect("カタカナ: katakana is über cool!".to_slug.to_ruby_method).to eql("katakana_is_uber_cool!")
142
+ expect("カタカナ: katakana is über cool".to_slug.to_ruby_method(false)).to eql("katakana_is_uber_cool")
143
+ end
144
+
145
+ it "should optionally remove trailing punctuation" do
146
+ expect("¿¿¿hello... world???".to_slug.to_ruby_method(false)).to eql("hello_world")
147
+ end
148
+
149
+ it "should raise an error when it would generate an impossible method name" do
150
+ # "1".to_identifier.to_ruby_method
151
+ expect {"1".to_identifier.to_ruby_method}.to raise_error(Babosa::Identifier::Error)
152
+ end
153
+
154
+ it "should raise Babosa::Error error when the string is nil" do
155
+ expect { "".to_slug.to_ruby_method }.to raise_error(Babosa::Identifier::Error)
143
156
  end
144
157
  end
145
- end
158
+ end