babosa 0.3.10 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/Changelog.md +107 -0
  3. data/README.md +5 -23
  4. data/lib/babosa.rb +0 -17
  5. data/lib/babosa/identifier.rb +19 -17
  6. data/lib/babosa/transliterator/base.rb +19 -3
  7. data/lib/babosa/transliterator/hindi.rb +137 -0
  8. data/lib/babosa/transliterator/macedonian.rb +3 -1
  9. data/lib/babosa/transliterator/turkish.rb +8 -0
  10. data/lib/babosa/transliterator/ukrainian.rb +19 -0
  11. data/lib/babosa/transliterator/vietnamese.rb +143 -0
  12. data/lib/babosa/utf8/active_support_proxy.rb +26 -8
  13. data/lib/babosa/utf8/dumb_proxy.rb +23 -16
  14. data/lib/babosa/utf8/java_proxy.rb +1 -1
  15. data/lib/babosa/utf8/proxy.rb +46 -39
  16. data/lib/babosa/utf8/unicode_proxy.rb +3 -1
  17. data/lib/babosa/version.rb +1 -1
  18. data/spec/babosa_spec.rb +50 -37
  19. data/spec/spec_helper.rb +17 -14
  20. data/spec/transliterators/base_spec.rb +3 -3
  21. data/spec/transliterators/bulgarian_spec.rb +1 -1
  22. data/spec/transliterators/danish_spec.rb +1 -1
  23. data/spec/transliterators/german_spec.rb +2 -2
  24. data/spec/transliterators/greek_spec.rb +1 -1
  25. data/spec/transliterators/hindi_spec.rb +17 -0
  26. data/spec/transliterators/latin_spec.rb +9 -0
  27. data/spec/transliterators/norwegian_spec.rb +1 -1
  28. data/spec/transliterators/polish_spec.rb +14 -0
  29. data/spec/transliterators/romanian_spec.rb +1 -1
  30. data/spec/transliterators/serbian_spec.rb +1 -1
  31. data/spec/transliterators/spanish_spec.rb +1 -1
  32. data/spec/transliterators/swedish_spec.rb +1 -1
  33. data/spec/transliterators/turkish_spec.rb +24 -0
  34. data/spec/transliterators/ukrainian_spec.rb +80 -1
  35. data/spec/transliterators/vietnamese_spec.rb +18 -0
  36. data/spec/utf8_proxy_spec.rb +22 -18
  37. metadata +64 -52
  38. data/init.rb +0 -3
@@ -12,6 +12,7 @@ module Babosa
12
12
  "Ц" => "C",
13
13
  "Ѕ" => "Z",
14
14
  "Ј" => "J",
15
+ "Х" => "H",
15
16
  "ѓ" => "gj",
16
17
  "љ" => "lj",
17
18
  "њ" => "nj",
@@ -20,7 +21,8 @@ module Babosa
20
21
  "ж" => "zh",
21
22
  "ц" => "c",
22
23
  "ѕ" => "z",
23
- "ј" => "j"
24
+ "ј" => "j",
25
+ "х" => "h"
24
26
  }
25
27
  end
26
28
  end
@@ -0,0 +1,8 @@
1
+ # encoding: utf-8
2
+
3
+ module Babosa
4
+ module Transliterator
5
+ class Turkish < Latin
6
+ end
7
+ end
8
+ end
@@ -3,8 +3,27 @@ module Babosa
3
3
  module Transliterator
4
4
  class Ukrainian < Cyrillic
5
5
  APPROXIMATIONS = {
6
+ "Г" => "H",
7
+ "г" => "h",
8
+ "Ґ" => "G",
9
+ "ґ" => "g",
10
+ "є" => "ie",
6
11
  "И" => "Y",
7
12
  "и" => "y",
13
+ "І" => "I",
14
+ "і" => "i",
15
+ "ї" => "i",
16
+ "Й" => "Y",
17
+ "й" => "i",
18
+ "Х" => "Kh",
19
+ "х" => "kh",
20
+ "Ц" => "Ts",
21
+ "ц" => 'ts',
22
+ "Щ" => "Shch",
23
+ "щ" => "shch",
24
+ "ю" => "iu",
25
+ "я" => "ia",
26
+ "'" => ""
8
27
  }
9
28
  end
10
29
  end
@@ -0,0 +1,143 @@
1
+ # encoding: utf-8
2
+ module Babosa
3
+ module Transliterator
4
+ class Vietnamese < Latin
5
+ APPROXIMATIONS = {
6
+ "à" => "a",
7
+ "á" => "a",
8
+ "ạ" => "a",
9
+ "ả" => "a",
10
+ "ã" => "a",
11
+ "â" => "a",
12
+ "ầ" => "a",
13
+ "ấ" => "a",
14
+ "ậ" => "a",
15
+ "ẩ" => "a",
16
+ "ẫ" => "a",
17
+ "ă" => "a",
18
+ "ằ" => "a",
19
+ "ắ" => "a",
20
+ "ặ" => "a",
21
+ "ẳ" => "a",
22
+ "ẵ" => "a",
23
+ "À" => "A",
24
+ "Á" => "A",
25
+ "Ạ" => "A",
26
+ "Ả" => "A",
27
+ "Ã" => "A",
28
+ "Â" => "A",
29
+ "Ầ" => "A",
30
+ "Ấ" => "A",
31
+ "Ậ" => "A",
32
+ "Ẩ" => "A",
33
+ "Ẫ" => "A",
34
+ "Ă" => "A",
35
+ "Ằ" => "A",
36
+ "Ắ" => "A",
37
+ "Ặ" => "A",
38
+ "Ẳ" => "A",
39
+ "Ẵ" => "A",
40
+ "ì" => "i",
41
+ "í" => "i",
42
+ "ị" => "i",
43
+ "ỉ" => "i",
44
+ "ĩ" => "i",
45
+ "Ì" => "I",
46
+ "Í" => "I",
47
+ "Ị" => "I",
48
+ "Ỉ" => "I",
49
+ "Ĩ" => "I",
50
+ "ù" => "u",
51
+ "ú" => "u",
52
+ "ụ" => "u",
53
+ "ủ" => "u",
54
+ "ũ" => "u",
55
+ "ư" => "u",
56
+ "ừ" => "u",
57
+ "ứ" => "u",
58
+ "ự" => "u",
59
+ "ử" => "u",
60
+ "ữ" => "u",
61
+ "Ù" => "U",
62
+ "Ú" => "U",
63
+ "Ụ" => "U",
64
+ "Ủ" => "U",
65
+ "Ũ" => "U",
66
+ "Ư" => "U",
67
+ "Ừ" => "U",
68
+ "Ứ" => "U",
69
+ "Ự" => "U",
70
+ "Ử" => "U",
71
+ "Ữ" => "U",
72
+ "è" => "e",
73
+ "é" => "e",
74
+ "ẹ" => "e",
75
+ "ẻ" => "e",
76
+ "ẽ" => "e",
77
+ "ê" => "e",
78
+ "ề" => "e",
79
+ "ế" => "e",
80
+ "ệ" => "e",
81
+ "ể" => "e",
82
+ "ễ" => "e",
83
+ "È" => "E",
84
+ "É" => "E",
85
+ "Ẹ" => "E",
86
+ "Ẻ" => "E",
87
+ "Ẽ" => "E",
88
+ "Ê" => "E",
89
+ "Ề" => "E",
90
+ "Ế" => "E",
91
+ "Ệ" => "E",
92
+ "Ể" => "E",
93
+ "Ễ" => "E",
94
+ "ò" => "o",
95
+ "ó" => "o",
96
+ "ọ" => "o",
97
+ "ỏ" => "o",
98
+ "õ" => "o",
99
+ "ô" => "o",
100
+ "ồ" => "o",
101
+ "ố" => "o",
102
+ "ộ" => "o",
103
+ "ổ" => "o",
104
+ "ỗ" => "o",
105
+ "ơ" => "o",
106
+ "ờ" => "o",
107
+ "ớ" => "o",
108
+ "ợ" => "o",
109
+ "ở" => "o",
110
+ "ỡ" => "o",
111
+ "Ò" => "O",
112
+ "Ó" => "O",
113
+ "Ọ" => "O",
114
+ "Ỏ" => "O",
115
+ "Õ" => "O",
116
+ "Ô" => "O",
117
+ "Ồ" => "O",
118
+ "Ố" => "O",
119
+ "Ộ" => "O",
120
+ "Ổ" => "O",
121
+ "Ỗ" => "O",
122
+ "Ơ" => "O",
123
+ "Ờ" => "O",
124
+ "Ớ" => "O",
125
+ "Ợ" => "O",
126
+ "Ở" => "O",
127
+ "Ỡ" => "O",
128
+ "ỳ" => "y",
129
+ "ý" => "y",
130
+ "ỵ" => "y",
131
+ "ỷ" => "y",
132
+ "ỹ" => "y",
133
+ "Ỳ" => "Y",
134
+ "Ý" => "Y",
135
+ "Ỵ" => "Y",
136
+ "Ỷ" => "Y",
137
+ "Ỹ" => "Y",
138
+ "đ" => "d",
139
+ "Đ" => "D"
140
+ }
141
+ end
142
+ end
143
+ end
@@ -1,19 +1,37 @@
1
+ require 'active_support'
2
+ require 'active_support/multibyte/unicode'
3
+
1
4
  module Babosa
2
5
  module UTF8
3
6
  # A UTF-8 proxy using Active Support's multibyte support.
4
7
  module ActiveSupportProxy
5
- extend UTF8Proxy
8
+ extend ActiveSupport::Multibyte::Unicode
6
9
  extend self
7
- def downcase(string)
8
- ActiveSupport::Multibyte::Chars.new(string).downcase.to_s
9
- end
10
10
 
11
- def upcase(string)
12
- ActiveSupport::Multibyte::Chars.new(string).upcase.to_s
11
+ def self.normalize_utf8(string)
12
+ normalize(string, :c)
13
13
  end
14
14
 
15
- def normalize_utf8(string)
16
- ActiveSupport::Multibyte::Chars.new(string).normalize(:c).to_s
15
+ if ActiveSupport::VERSION::MAJOR == 3
16
+ def downcase(string)
17
+ ActiveSupport::Multibyte::Chars.new(string).downcase.to_s
18
+ end
19
+
20
+ def upcase(string)
21
+ ActiveSupport::Multibyte::Chars.new(string).upcase.to_s
22
+ end
23
+ elsif ActiveSupport::VERSION::MAJOR >= 6
24
+ def self.normalize_utf8(string)
25
+ string.unicode_normalize(:nfc).to_s
26
+ end
27
+
28
+ def downcase(string)
29
+ string.downcase.to_s
30
+ end
31
+
32
+ def upcase(string)
33
+ string.upcase.to_s
34
+ end
17
35
  end
18
36
  end
19
37
  end
@@ -10,32 +10,39 @@ module Babosa
10
10
  # or ActiveSupport should be used instead because they support the full
11
11
  # UTF-8 character range.
12
12
  module DumbProxy
13
- extend UTF8Proxy
13
+ extend Proxy
14
14
  extend self
15
15
 
16
16
  def downcase(string)
17
- string.unpack("U*").map {|char| Mappings::DOWNCASE[char] or char}.flatten.pack("U*")
17
+ string.downcase.unpack("U*").map {|char| Mappings::DOWNCASE[char] or char}.flatten.pack("U*")
18
18
  end
19
19
 
20
20
  def upcase(string)
21
- string.unpack("U*").map {|char| Mappings::UPCASE[char] or char}.flatten.pack("U*")
21
+ string.upcase.unpack("U*").map {|char| Mappings::UPCASE[char] or char}.flatten.pack("U*")
22
22
  end
23
23
 
24
- # This does a very naive Unicode normalization, which should work for
25
- # this library's purposes (i.e., Roman-based codepoints, up to U+017E).
26
- # Do not use reuse this as a general solution! Use a real library like
27
- # Unicode or ActiveSupport instead.
28
- def normalize_utf8(string)
29
- codepoints = string.unpack("U*")
30
- new = []
31
- until codepoints.empty? do
32
- if Mappings::COMPOSITION[codepoints[0..1]]
33
- new << Mappings::COMPOSITION[codepoints.slice!(0,2)]
34
- else
35
- new << codepoints.shift
24
+ if ''.respond_to?(:unicode_normalize)
25
+ def normalize_utf8(string)
26
+ string.unicode_normalize
27
+ end
28
+ else
29
+ # On Ruby 2.2, this uses the native Unicode normalize method. On all
30
+ # other Rubies, it does a very naive Unicode normalization, which should
31
+ # work for this library's purposes (i.e., Roman-based codepoints, up to
32
+ # U+017E). Do not use reuse this as a general solution! Use a real
33
+ # library like Unicode or ActiveSupport instead.
34
+ def normalize_utf8(string)
35
+ codepoints = string.unpack("U*")
36
+ new = []
37
+ until codepoints.empty? do
38
+ if Mappings::COMPOSITION[codepoints[0..1]]
39
+ new << Mappings::COMPOSITION[codepoints.slice!(0,2)]
40
+ else
41
+ new << codepoints.shift
42
+ end
36
43
  end
44
+ new.compact.flatten.pack("U*")
37
45
  end
38
- new.compact.flatten.pack("U*")
39
46
  end
40
47
  end
41
48
  end
@@ -2,7 +2,7 @@ module Babosa
2
2
  module UTF8
3
3
  # A UTF-8 proxy module using Java's built-in Unicode support. Requires JRuby 1.5+.
4
4
  module JavaProxy
5
- extend UTF8Proxy
5
+ extend Proxy
6
6
  extend self
7
7
  java_import java.text.Normalizer
8
8
 
@@ -8,7 +8,7 @@ module Babosa
8
8
 
9
9
  # A UTF-8 proxy for Babosa can be any object which responds to the methods in this module.
10
10
  # The following proxies are provided by Babosa: {ActiveSupportProxy}, {DumbProxy}, {JavaProxy}, and {UnicodeProxy}.
11
- module UTF8Proxy
11
+ module Proxy
12
12
  CP1252 = {
13
13
  128 => [226, 130, 172],
14
14
  129 => nil,
@@ -62,50 +62,57 @@ module Babosa
62
62
  raise NotImplementedError
63
63
  end
64
64
 
65
- # Attempt to replace invalid UTF-8 bytes with valid ones. This method
66
- # naively assumes if you have invalid UTF8 bytes, they are either Windows
67
- # CP-1252 or ISO8859-1. In practice this isn't a bad assumption, but may not
68
- # always work.
69
- def tidy_bytes(string)
70
- bytes = string.unpack("C*")
71
- conts_expected = 0
72
- last_lead = 0
65
+ if ''.respond_to?(:scrub) && !defined?(Rubinius)
66
+ # Attempt to replace invalid UTF-8 bytes with valid ones. This method
67
+ # naively assumes if you have invalid UTF8 bytes, they are either Windows
68
+ # CP-1252 or ISO8859-1. In practice this isn't a bad assumption, but may not
69
+ # always work.
70
+ def tidy_bytes(string)
71
+ string.scrub do |bad|
72
+ tidy_byte(*bad.bytes).flatten.compact.pack('C*').unpack('U*').pack('U*')
73
+ end
74
+ end
75
+ else
76
+ def tidy_bytes(string)
77
+ bytes = string.unpack("C*")
78
+ conts_expected = 0
79
+ last_lead = 0
73
80
 
74
- bytes.each_index do |i|
75
- byte = bytes[i]
76
- is_ascii = byte < 128
77
- is_cont = byte > 127 && byte < 192
78
- is_lead = byte > 191 && byte < 245
79
- is_unused = byte > 240
80
- is_restricted = byte > 244
81
+ bytes.each_index do |i|
82
+ byte = bytes[i]
83
+ is_cont = byte > 127 && byte < 192
84
+ is_lead = byte > 191 && byte < 245
85
+ is_unused = byte > 240
86
+ is_restricted = byte > 244
81
87
 
82
- # Impossible or highly unlikely byte? Clean it.
83
- if is_unused || is_restricted
84
- bytes[i] = tidy_byte(byte)
85
- elsif is_cont
86
- # Not expecting contination byte? Clean up. Otherwise, now expect one less.
87
- conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
88
- else
89
- if conts_expected > 0
90
- # Expected continuation, but got ASCII or leading? Clean backwards up to
91
- # the leading byte.
92
- (1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
93
- conts_expected = 0
94
- end
95
- if is_lead
96
- # Final byte is leading? Clean it.
97
- if i == bytes.length - 1
98
- bytes[i] = tidy_byte(bytes.last)
99
- else
100
- # Valid leading byte? Expect continuations determined by position of
101
- # first zero bit, with max of 3.
102
- conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
103
- last_lead = i
88
+ # Impossible or highly unlikely byte? Clean it.
89
+ if is_unused || is_restricted
90
+ bytes[i] = tidy_byte(byte)
91
+ elsif is_cont
92
+ # Not expecting contination byte? Clean up. Otherwise, now expect one less.
93
+ conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
94
+ else
95
+ if conts_expected > 0
96
+ # Expected continuation, but got ASCII or leading? Clean backwards up to
97
+ # the leading byte.
98
+ (1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
99
+ conts_expected = 0
100
+ end
101
+ if is_lead
102
+ # Final byte is leading? Clean it.
103
+ if i == bytes.length - 1
104
+ bytes[i] = tidy_byte(bytes.last)
105
+ else
106
+ # Valid leading byte? Expect continuations determined by position of
107
+ # first zero bit, with max of 3.
108
+ conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
109
+ last_lead = i
110
+ end
104
111
  end
105
112
  end
106
113
  end
114
+ bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
107
115
  end
108
- bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
109
116
  end
110
117
 
111
118
  private
@@ -1,9 +1,11 @@
1
+ require 'unicode'
2
+
1
3
  module Babosa
2
4
  module UTF8
3
5
  # A UTF-8 proxy using the Unicode gem.
4
6
  # @see http://github.com/blackwinter/unicode
5
7
  module UnicodeProxy
6
- extend UTF8Proxy
8
+ extend Proxy
7
9
  extend self
8
10
  def downcase(string)
9
11
  Unicode.downcase(string)
@@ -1,5 +1,5 @@
1
1
  module Babosa
2
2
  module Version
3
- STRING = "0.3.10"
3
+ STRING = '1.0.4'
4
4
  end
5
5
  end
@@ -4,7 +4,7 @@ require File.expand_path("../spec_helper", __FILE__)
4
4
  describe Babosa::Identifier do
5
5
 
6
6
  it "should respond_to :empty?" do
7
- "".to_slug.should respond_to(:empty?)
7
+ expect("".to_slug).to respond_to(:empty?)
8
8
  end
9
9
 
10
10
  %w[approximate_ascii clean downcase word_chars normalize to_ascii upcase with_dashes].each do |method|
@@ -18,128 +18,141 @@ describe Babosa::Identifier do
18
18
  describe "#word_chars" do
19
19
  it "word_chars! should leave only letters and spaces" do
20
20
  string = "a*$%^$@!@b$%^&*()*!c"
21
- string.to_slug.word_chars.should match(/[a-z ]*/i)
21
+ expect(string.to_slug.word_chars!).to match(/[a-z ]*/i)
22
22
  end
23
23
  end
24
24
 
25
25
  describe "#transliterate" do
26
26
  it "should transliterate to ascii" do
27
- slug = (0xC0..0x17E).to_a.each do |codepoint|
27
+ (0xC0..0x17E).to_a.each do |codepoint|
28
28
  ss = [codepoint].pack("U*").to_slug
29
- ss.approximate_ascii.should match(/[\x0-\x7f]/)
29
+ expect(ss.approximate_ascii!).to match(/[\x0-\x7f]/)
30
30
  end
31
31
  end
32
32
 
33
33
  it "should transliterate uncomposed utf8" do
34
34
  string = [117, 776].pack("U*") # "ü" as ASCII "u" plus COMBINING DIAERESIS
35
- string.to_slug.approximate_ascii.should eql("u")
35
+ expect(string.to_slug.approximate_ascii).to eql("u")
36
36
  end
37
37
 
38
38
  it "should transliterate using multiple transliterators" do
39
39
  string = "свободное režģis"
40
- string.to_slug.approximate_ascii(:latin, :russian).should eql("svobodnoe rezgis")
40
+ expect(string.to_slug.approximate_ascii(:latin, :russian)).to eql("svobodnoe rezgis")
41
41
  end
42
42
  end
43
43
 
44
44
  describe "#downcase" do
45
45
  it "should lowercase strings" do
46
- "FELIZ AÑO".to_slug.downcase.should eql("feliz año")
46
+ expect("FELIZ AÑO".to_slug.downcase).to eql("feliz año")
47
47
  end
48
48
  end
49
49
 
50
50
  describe "#upcase" do
51
51
  it "should uppercase strings" do
52
- "feliz año".to_slug.upcase.should eql("FELIZ AÑO")
52
+ expect("feliz año".to_slug.upcase).to eql("FELIZ AÑO")
53
53
  end
54
54
  end
55
55
 
56
56
  describe "#normalize" do
57
57
 
58
58
  it "should allow passing locale as key for :transliterate" do
59
- "ö".to_slug.clean.normalize(:transliterate => :german).should eql("oe")
59
+ expect("ö".to_slug.clean.normalize(:transliterate => :german)).to eql("oe")
60
60
  end
61
61
 
62
62
  it "should replace whitespace with dashes" do
63
- "a b".to_slug.clean.normalize.should eql("a-b")
63
+ expect("a b".to_slug.clean.normalize).to eql("a-b")
64
64
  end
65
65
 
66
66
  it "should replace multiple spaces with 1 dash" do
67
- "a b".to_slug.clean.normalize.should eql("a-b")
67
+ expect("a b".to_slug.clean.normalize).to eql("a-b")
68
68
  end
69
69
 
70
70
  it "should replace multiple dashes with 1 dash" do
71
- "male - female".to_slug.normalize.should eql("male-female")
71
+ expect("male - female".to_slug.normalize).to eql("male-female")
72
72
  end
73
73
 
74
74
  it "should strip trailing space" do
75
- "ab ".to_slug.normalize.should eql("ab")
75
+ expect("ab ".to_slug.normalize).to eql("ab")
76
76
  end
77
77
 
78
78
  it "should strip leading space" do
79
- " ab".to_slug.normalize.should eql("ab")
79
+ expect(" ab".to_slug.normalize).to eql("ab")
80
80
  end
81
81
 
82
82
  it "should strip trailing slashes" do
83
- "ab-".to_slug.normalize.should eql("ab")
83
+ expect("ab-".to_slug.normalize).to eql("ab")
84
84
  end
85
85
 
86
86
  it "should strip leading slashes" do
87
- "-ab".to_slug.normalize.should eql("ab")
87
+ expect("-ab".to_slug.normalize).to eql("ab")
88
88
  end
89
89
 
90
90
  it "should not modify valid name strings" do
91
- "a-b-c-d".to_slug.normalize.should eql("a-b-c-d")
91
+ expect("a-b-c-d".to_slug.normalize).to eql("a-b-c-d")
92
92
  end
93
93
 
94
94
  it "should not convert underscores" do
95
- "hello_world".to_slug.normalize.should eql("hello_world")
95
+ expect("hello_world".to_slug.normalize).to eql("hello_world")
96
96
  end
97
97
 
98
98
  it "should work with non roman chars" do
99
- "検 索".to_slug.normalize.should eql("検-索")
99
+ expect("検 索".to_slug.normalize).to eql("検-索")
100
100
  end
101
101
 
102
102
  context "with to_ascii option" do
103
103
  it "should approximate and strip non ascii" do
104
104
  ss = "カタカナ: katakana is über cool".to_slug
105
- ss.normalize(:to_ascii => true).should eql("katakana-is-uber-cool")
105
+ expect(ss.normalize(:to_ascii => true)).to eql("katakana-is-uber-cool")
106
106
  end
107
107
  end
108
108
  end
109
109
 
110
110
  describe "#truncate_bytes" do
111
111
  it "should by byte length" do
112
- "üa".to_slug.truncate_bytes(2).should eql("ü")
113
- "üa".to_slug.truncate_bytes(1).should eql("")
114
- "üa".to_slug.truncate_bytes(100).should eql("üa")
115
- "üéøá".to_slug.truncate_bytes(3).should eql("ü")
112
+ expect("üa".to_slug.truncate_bytes(2)).to eql("ü")
113
+ expect("üa".to_slug.truncate_bytes(1)).to eql("")
114
+ expect("üa".to_slug.truncate_bytes(100)).to eql("üa")
115
+ expect("üéøá".to_slug.truncate_bytes(3)).to eql("ü")
116
116
  end
117
117
  end
118
118
 
119
119
  describe "#truncate" do
120
120
  it "should truncate by char length" do
121
- "üa".to_slug.truncate(2).should eql("üa")
122
- "üa".to_slug.truncate(1).should eql("ü")
123
- "üa".to_slug.truncate(100).should eql("üa")
121
+ expect("üa".to_slug.truncate(2)).to eql("üa")
122
+ expect("üa".to_slug.truncate(1)).to eql("ü")
123
+ expect("üa".to_slug.truncate(100)).to eql("üa")
124
124
  end
125
125
  end
126
126
 
127
127
  describe "#with_dashes" do
128
128
  it "should not change byte size when replacing spaces" do
129
- "".to_slug.with_dashes.bytesize.should eql(0)
130
- " ".to_slug.with_dashes.bytesize.should eql(1)
131
- "-abc-".to_slug.with_dashes.bytesize.should eql(5)
132
- " abc ".to_slug.with_dashes.bytesize.should eql(5)
133
- " a bc ".to_slug.with_dashes.bytesize.should eql(7)
129
+ expect("".to_slug.with_dashes.bytesize).to eql(0)
130
+ expect(" ".to_slug.with_dashes.bytesize).to eql(1)
131
+ expect("-abc-".to_slug.with_dashes.bytesize).to eql(5)
132
+ expect(" abc ".to_slug.with_dashes.bytesize).to eql(5)
133
+ expect(" a bc ".to_slug.with_dashes.bytesize).to eql(7)
134
134
  end
135
135
  end
136
136
 
137
137
  describe "#to_ruby_method" do
138
138
  it "should get a string suitable for use as a ruby method" do
139
- "¿¿¿hello... world???".to_slug.to_ruby_method.should eql("hello_world?")
140
- "カタカナ: katakana is über cool".to_slug.to_ruby_method.should eql("katakana_is_uber_cool")
141
- "カタカナ: katakana is über cool!".to_slug.to_ruby_method.should eql("katakana_is_uber_cool!")
142
- "カタカナ: katakana is über cool".to_slug.to_ruby_method(false).should eql("katakana_is_uber_cool")
139
+ expect("¿¿¿hello... world???".to_slug.to_ruby_method).to eql("hello_world?")
140
+ expect("カタカナ: katakana is über cool".to_slug.to_ruby_method).to eql("katakana_is_uber_cool")
141
+ expect("カタカナ: katakana is über cool!".to_slug.to_ruby_method).to eql("katakana_is_uber_cool!")
142
+ expect("カタカナ: katakana is über cool".to_slug.to_ruby_method(false)).to eql("katakana_is_uber_cool")
143
+ end
144
+
145
+ it "should optionally remove trailing punctuation" do
146
+ expect("¿¿¿hello... world???".to_slug.to_ruby_method(false)).to eql("hello_world")
147
+ end
148
+
149
+ it "should raise an error when it would generate an impossible method name" do
150
+ # "1".to_identifier.to_ruby_method
151
+ expect {"1".to_identifier.to_ruby_method}.to raise_error(Babosa::Identifier::Error)
152
+ end
153
+
154
+ it "should raise Babosa::Error error when the string is nil" do
155
+ expect { "".to_slug.to_ruby_method }.to raise_error(Babosa::Identifier::Error)
143
156
  end
144
157
  end
145
- end
158
+ end