babosa 0.3.10 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Changelog.md +107 -0
- data/README.md +5 -23
- data/lib/babosa.rb +0 -17
- data/lib/babosa/identifier.rb +19 -17
- data/lib/babosa/transliterator/base.rb +19 -3
- data/lib/babosa/transliterator/hindi.rb +137 -0
- data/lib/babosa/transliterator/macedonian.rb +3 -1
- data/lib/babosa/transliterator/turkish.rb +8 -0
- data/lib/babosa/transliterator/ukrainian.rb +19 -0
- data/lib/babosa/transliterator/vietnamese.rb +143 -0
- data/lib/babosa/utf8/active_support_proxy.rb +26 -8
- data/lib/babosa/utf8/dumb_proxy.rb +23 -16
- data/lib/babosa/utf8/java_proxy.rb +1 -1
- data/lib/babosa/utf8/proxy.rb +46 -39
- data/lib/babosa/utf8/unicode_proxy.rb +3 -1
- data/lib/babosa/version.rb +1 -1
- data/spec/babosa_spec.rb +50 -37
- data/spec/spec_helper.rb +17 -14
- data/spec/transliterators/base_spec.rb +3 -3
- data/spec/transliterators/bulgarian_spec.rb +1 -1
- data/spec/transliterators/danish_spec.rb +1 -1
- data/spec/transliterators/german_spec.rb +2 -2
- data/spec/transliterators/greek_spec.rb +1 -1
- data/spec/transliterators/hindi_spec.rb +17 -0
- data/spec/transliterators/latin_spec.rb +9 -0
- data/spec/transliterators/norwegian_spec.rb +1 -1
- data/spec/transliterators/polish_spec.rb +14 -0
- data/spec/transliterators/romanian_spec.rb +1 -1
- data/spec/transliterators/serbian_spec.rb +1 -1
- data/spec/transliterators/spanish_spec.rb +1 -1
- data/spec/transliterators/swedish_spec.rb +1 -1
- data/spec/transliterators/turkish_spec.rb +24 -0
- data/spec/transliterators/ukrainian_spec.rb +80 -1
- data/spec/transliterators/vietnamese_spec.rb +18 -0
- data/spec/utf8_proxy_spec.rb +22 -18
- metadata +64 -52
- data/init.rb +0 -3
@@ -12,6 +12,7 @@ module Babosa
|
|
12
12
|
"Ц" => "C",
|
13
13
|
"Ѕ" => "Z",
|
14
14
|
"Ј" => "J",
|
15
|
+
"Х" => "H",
|
15
16
|
"ѓ" => "gj",
|
16
17
|
"љ" => "lj",
|
17
18
|
"њ" => "nj",
|
@@ -20,7 +21,8 @@ module Babosa
|
|
20
21
|
"ж" => "zh",
|
21
22
|
"ц" => "c",
|
22
23
|
"ѕ" => "z",
|
23
|
-
"ј" => "j"
|
24
|
+
"ј" => "j",
|
25
|
+
"х" => "h"
|
24
26
|
}
|
25
27
|
end
|
26
28
|
end
|
@@ -3,8 +3,27 @@ module Babosa
|
|
3
3
|
module Transliterator
|
4
4
|
class Ukrainian < Cyrillic
|
5
5
|
APPROXIMATIONS = {
|
6
|
+
"Г" => "H",
|
7
|
+
"г" => "h",
|
8
|
+
"Ґ" => "G",
|
9
|
+
"ґ" => "g",
|
10
|
+
"є" => "ie",
|
6
11
|
"И" => "Y",
|
7
12
|
"и" => "y",
|
13
|
+
"І" => "I",
|
14
|
+
"і" => "i",
|
15
|
+
"ї" => "i",
|
16
|
+
"Й" => "Y",
|
17
|
+
"й" => "i",
|
18
|
+
"Х" => "Kh",
|
19
|
+
"х" => "kh",
|
20
|
+
"Ц" => "Ts",
|
21
|
+
"ц" => 'ts',
|
22
|
+
"Щ" => "Shch",
|
23
|
+
"щ" => "shch",
|
24
|
+
"ю" => "iu",
|
25
|
+
"я" => "ia",
|
26
|
+
"'" => ""
|
8
27
|
}
|
9
28
|
end
|
10
29
|
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Babosa
|
3
|
+
module Transliterator
|
4
|
+
class Vietnamese < Latin
|
5
|
+
APPROXIMATIONS = {
|
6
|
+
"à" => "a",
|
7
|
+
"á" => "a",
|
8
|
+
"ạ" => "a",
|
9
|
+
"ả" => "a",
|
10
|
+
"ã" => "a",
|
11
|
+
"â" => "a",
|
12
|
+
"ầ" => "a",
|
13
|
+
"ấ" => "a",
|
14
|
+
"ậ" => "a",
|
15
|
+
"ẩ" => "a",
|
16
|
+
"ẫ" => "a",
|
17
|
+
"ă" => "a",
|
18
|
+
"ằ" => "a",
|
19
|
+
"ắ" => "a",
|
20
|
+
"ặ" => "a",
|
21
|
+
"ẳ" => "a",
|
22
|
+
"ẵ" => "a",
|
23
|
+
"À" => "A",
|
24
|
+
"Á" => "A",
|
25
|
+
"Ạ" => "A",
|
26
|
+
"Ả" => "A",
|
27
|
+
"Ã" => "A",
|
28
|
+
"Â" => "A",
|
29
|
+
"Ầ" => "A",
|
30
|
+
"Ấ" => "A",
|
31
|
+
"Ậ" => "A",
|
32
|
+
"Ẩ" => "A",
|
33
|
+
"Ẫ" => "A",
|
34
|
+
"Ă" => "A",
|
35
|
+
"Ằ" => "A",
|
36
|
+
"Ắ" => "A",
|
37
|
+
"Ặ" => "A",
|
38
|
+
"Ẳ" => "A",
|
39
|
+
"Ẵ" => "A",
|
40
|
+
"ì" => "i",
|
41
|
+
"í" => "i",
|
42
|
+
"ị" => "i",
|
43
|
+
"ỉ" => "i",
|
44
|
+
"ĩ" => "i",
|
45
|
+
"Ì" => "I",
|
46
|
+
"Í" => "I",
|
47
|
+
"Ị" => "I",
|
48
|
+
"Ỉ" => "I",
|
49
|
+
"Ĩ" => "I",
|
50
|
+
"ù" => "u",
|
51
|
+
"ú" => "u",
|
52
|
+
"ụ" => "u",
|
53
|
+
"ủ" => "u",
|
54
|
+
"ũ" => "u",
|
55
|
+
"ư" => "u",
|
56
|
+
"ừ" => "u",
|
57
|
+
"ứ" => "u",
|
58
|
+
"ự" => "u",
|
59
|
+
"ử" => "u",
|
60
|
+
"ữ" => "u",
|
61
|
+
"Ù" => "U",
|
62
|
+
"Ú" => "U",
|
63
|
+
"Ụ" => "U",
|
64
|
+
"Ủ" => "U",
|
65
|
+
"Ũ" => "U",
|
66
|
+
"Ư" => "U",
|
67
|
+
"Ừ" => "U",
|
68
|
+
"Ứ" => "U",
|
69
|
+
"Ự" => "U",
|
70
|
+
"Ử" => "U",
|
71
|
+
"Ữ" => "U",
|
72
|
+
"è" => "e",
|
73
|
+
"é" => "e",
|
74
|
+
"ẹ" => "e",
|
75
|
+
"ẻ" => "e",
|
76
|
+
"ẽ" => "e",
|
77
|
+
"ê" => "e",
|
78
|
+
"ề" => "e",
|
79
|
+
"ế" => "e",
|
80
|
+
"ệ" => "e",
|
81
|
+
"ể" => "e",
|
82
|
+
"ễ" => "e",
|
83
|
+
"È" => "E",
|
84
|
+
"É" => "E",
|
85
|
+
"Ẹ" => "E",
|
86
|
+
"Ẻ" => "E",
|
87
|
+
"Ẽ" => "E",
|
88
|
+
"Ê" => "E",
|
89
|
+
"Ề" => "E",
|
90
|
+
"Ế" => "E",
|
91
|
+
"Ệ" => "E",
|
92
|
+
"Ể" => "E",
|
93
|
+
"Ễ" => "E",
|
94
|
+
"ò" => "o",
|
95
|
+
"ó" => "o",
|
96
|
+
"ọ" => "o",
|
97
|
+
"ỏ" => "o",
|
98
|
+
"õ" => "o",
|
99
|
+
"ô" => "o",
|
100
|
+
"ồ" => "o",
|
101
|
+
"ố" => "o",
|
102
|
+
"ộ" => "o",
|
103
|
+
"ổ" => "o",
|
104
|
+
"ỗ" => "o",
|
105
|
+
"ơ" => "o",
|
106
|
+
"ờ" => "o",
|
107
|
+
"ớ" => "o",
|
108
|
+
"ợ" => "o",
|
109
|
+
"ở" => "o",
|
110
|
+
"ỡ" => "o",
|
111
|
+
"Ò" => "O",
|
112
|
+
"Ó" => "O",
|
113
|
+
"Ọ" => "O",
|
114
|
+
"Ỏ" => "O",
|
115
|
+
"Õ" => "O",
|
116
|
+
"Ô" => "O",
|
117
|
+
"Ồ" => "O",
|
118
|
+
"Ố" => "O",
|
119
|
+
"Ộ" => "O",
|
120
|
+
"Ổ" => "O",
|
121
|
+
"Ỗ" => "O",
|
122
|
+
"Ơ" => "O",
|
123
|
+
"Ờ" => "O",
|
124
|
+
"Ớ" => "O",
|
125
|
+
"Ợ" => "O",
|
126
|
+
"Ở" => "O",
|
127
|
+
"Ỡ" => "O",
|
128
|
+
"ỳ" => "y",
|
129
|
+
"ý" => "y",
|
130
|
+
"ỵ" => "y",
|
131
|
+
"ỷ" => "y",
|
132
|
+
"ỹ" => "y",
|
133
|
+
"Ỳ" => "Y",
|
134
|
+
"Ý" => "Y",
|
135
|
+
"Ỵ" => "Y",
|
136
|
+
"Ỷ" => "Y",
|
137
|
+
"Ỹ" => "Y",
|
138
|
+
"đ" => "d",
|
139
|
+
"Đ" => "D"
|
140
|
+
}
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -1,19 +1,37 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
require 'active_support/multibyte/unicode'
|
3
|
+
|
1
4
|
module Babosa
|
2
5
|
module UTF8
|
3
6
|
# A UTF-8 proxy using Active Support's multibyte support.
|
4
7
|
module ActiveSupportProxy
|
5
|
-
extend
|
8
|
+
extend ActiveSupport::Multibyte::Unicode
|
6
9
|
extend self
|
7
|
-
def downcase(string)
|
8
|
-
ActiveSupport::Multibyte::Chars.new(string).downcase.to_s
|
9
|
-
end
|
10
10
|
|
11
|
-
def
|
12
|
-
|
11
|
+
def self.normalize_utf8(string)
|
12
|
+
normalize(string, :c)
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
if ActiveSupport::VERSION::MAJOR == 3
|
16
|
+
def downcase(string)
|
17
|
+
ActiveSupport::Multibyte::Chars.new(string).downcase.to_s
|
18
|
+
end
|
19
|
+
|
20
|
+
def upcase(string)
|
21
|
+
ActiveSupport::Multibyte::Chars.new(string).upcase.to_s
|
22
|
+
end
|
23
|
+
elsif ActiveSupport::VERSION::MAJOR >= 6
|
24
|
+
def self.normalize_utf8(string)
|
25
|
+
string.unicode_normalize(:nfc).to_s
|
26
|
+
end
|
27
|
+
|
28
|
+
def downcase(string)
|
29
|
+
string.downcase.to_s
|
30
|
+
end
|
31
|
+
|
32
|
+
def upcase(string)
|
33
|
+
string.upcase.to_s
|
34
|
+
end
|
17
35
|
end
|
18
36
|
end
|
19
37
|
end
|
@@ -10,32 +10,39 @@ module Babosa
|
|
10
10
|
# or ActiveSupport should be used instead because they support the full
|
11
11
|
# UTF-8 character range.
|
12
12
|
module DumbProxy
|
13
|
-
extend
|
13
|
+
extend Proxy
|
14
14
|
extend self
|
15
15
|
|
16
16
|
def downcase(string)
|
17
|
-
string.unpack("U*").map {|char| Mappings::DOWNCASE[char] or char}.flatten.pack("U*")
|
17
|
+
string.downcase.unpack("U*").map {|char| Mappings::DOWNCASE[char] or char}.flatten.pack("U*")
|
18
18
|
end
|
19
19
|
|
20
20
|
def upcase(string)
|
21
|
-
string.unpack("U*").map {|char| Mappings::UPCASE[char] or char}.flatten.pack("U*")
|
21
|
+
string.upcase.unpack("U*").map {|char| Mappings::UPCASE[char] or char}.flatten.pack("U*")
|
22
22
|
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
24
|
+
if ''.respond_to?(:unicode_normalize)
|
25
|
+
def normalize_utf8(string)
|
26
|
+
string.unicode_normalize
|
27
|
+
end
|
28
|
+
else
|
29
|
+
# On Ruby 2.2, this uses the native Unicode normalize method. On all
|
30
|
+
# other Rubies, it does a very naive Unicode normalization, which should
|
31
|
+
# work for this library's purposes (i.e., Roman-based codepoints, up to
|
32
|
+
# U+017E). Do not use reuse this as a general solution! Use a real
|
33
|
+
# library like Unicode or ActiveSupport instead.
|
34
|
+
def normalize_utf8(string)
|
35
|
+
codepoints = string.unpack("U*")
|
36
|
+
new = []
|
37
|
+
until codepoints.empty? do
|
38
|
+
if Mappings::COMPOSITION[codepoints[0..1]]
|
39
|
+
new << Mappings::COMPOSITION[codepoints.slice!(0,2)]
|
40
|
+
else
|
41
|
+
new << codepoints.shift
|
42
|
+
end
|
36
43
|
end
|
44
|
+
new.compact.flatten.pack("U*")
|
37
45
|
end
|
38
|
-
new.compact.flatten.pack("U*")
|
39
46
|
end
|
40
47
|
end
|
41
48
|
end
|
data/lib/babosa/utf8/proxy.rb
CHANGED
@@ -8,7 +8,7 @@ module Babosa
|
|
8
8
|
|
9
9
|
# A UTF-8 proxy for Babosa can be any object which responds to the methods in this module.
|
10
10
|
# The following proxies are provided by Babosa: {ActiveSupportProxy}, {DumbProxy}, {JavaProxy}, and {UnicodeProxy}.
|
11
|
-
module
|
11
|
+
module Proxy
|
12
12
|
CP1252 = {
|
13
13
|
128 => [226, 130, 172],
|
14
14
|
129 => nil,
|
@@ -62,50 +62,57 @@ module Babosa
|
|
62
62
|
raise NotImplementedError
|
63
63
|
end
|
64
64
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
65
|
+
if ''.respond_to?(:scrub) && !defined?(Rubinius)
|
66
|
+
# Attempt to replace invalid UTF-8 bytes with valid ones. This method
|
67
|
+
# naively assumes if you have invalid UTF8 bytes, they are either Windows
|
68
|
+
# CP-1252 or ISO8859-1. In practice this isn't a bad assumption, but may not
|
69
|
+
# always work.
|
70
|
+
def tidy_bytes(string)
|
71
|
+
string.scrub do |bad|
|
72
|
+
tidy_byte(*bad.bytes).flatten.compact.pack('C*').unpack('U*').pack('U*')
|
73
|
+
end
|
74
|
+
end
|
75
|
+
else
|
76
|
+
def tidy_bytes(string)
|
77
|
+
bytes = string.unpack("C*")
|
78
|
+
conts_expected = 0
|
79
|
+
last_lead = 0
|
73
80
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
is_restricted = byte > 244
|
81
|
+
bytes.each_index do |i|
|
82
|
+
byte = bytes[i]
|
83
|
+
is_cont = byte > 127 && byte < 192
|
84
|
+
is_lead = byte > 191 && byte < 245
|
85
|
+
is_unused = byte > 240
|
86
|
+
is_restricted = byte > 244
|
81
87
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
88
|
+
# Impossible or highly unlikely byte? Clean it.
|
89
|
+
if is_unused || is_restricted
|
90
|
+
bytes[i] = tidy_byte(byte)
|
91
|
+
elsif is_cont
|
92
|
+
# Not expecting contination byte? Clean up. Otherwise, now expect one less.
|
93
|
+
conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
|
94
|
+
else
|
95
|
+
if conts_expected > 0
|
96
|
+
# Expected continuation, but got ASCII or leading? Clean backwards up to
|
97
|
+
# the leading byte.
|
98
|
+
(1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
|
99
|
+
conts_expected = 0
|
100
|
+
end
|
101
|
+
if is_lead
|
102
|
+
# Final byte is leading? Clean it.
|
103
|
+
if i == bytes.length - 1
|
104
|
+
bytes[i] = tidy_byte(bytes.last)
|
105
|
+
else
|
106
|
+
# Valid leading byte? Expect continuations determined by position of
|
107
|
+
# first zero bit, with max of 3.
|
108
|
+
conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
|
109
|
+
last_lead = i
|
110
|
+
end
|
104
111
|
end
|
105
112
|
end
|
106
113
|
end
|
114
|
+
bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
|
107
115
|
end
|
108
|
-
bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
|
109
116
|
end
|
110
117
|
|
111
118
|
private
|
@@ -1,9 +1,11 @@
|
|
1
|
+
require 'unicode'
|
2
|
+
|
1
3
|
module Babosa
|
2
4
|
module UTF8
|
3
5
|
# A UTF-8 proxy using the Unicode gem.
|
4
6
|
# @see http://github.com/blackwinter/unicode
|
5
7
|
module UnicodeProxy
|
6
|
-
extend
|
8
|
+
extend Proxy
|
7
9
|
extend self
|
8
10
|
def downcase(string)
|
9
11
|
Unicode.downcase(string)
|
data/lib/babosa/version.rb
CHANGED
data/spec/babosa_spec.rb
CHANGED
@@ -4,7 +4,7 @@ require File.expand_path("../spec_helper", __FILE__)
|
|
4
4
|
describe Babosa::Identifier do
|
5
5
|
|
6
6
|
it "should respond_to :empty?" do
|
7
|
-
"".to_slug.
|
7
|
+
expect("".to_slug).to respond_to(:empty?)
|
8
8
|
end
|
9
9
|
|
10
10
|
%w[approximate_ascii clean downcase word_chars normalize to_ascii upcase with_dashes].each do |method|
|
@@ -18,128 +18,141 @@ describe Babosa::Identifier do
|
|
18
18
|
describe "#word_chars" do
|
19
19
|
it "word_chars! should leave only letters and spaces" do
|
20
20
|
string = "a*$%^$@!@b$%^&*()*!c"
|
21
|
-
string.to_slug.word_chars.
|
21
|
+
expect(string.to_slug.word_chars!).to match(/[a-z ]*/i)
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
25
|
describe "#transliterate" do
|
26
26
|
it "should transliterate to ascii" do
|
27
|
-
|
27
|
+
(0xC0..0x17E).to_a.each do |codepoint|
|
28
28
|
ss = [codepoint].pack("U*").to_slug
|
29
|
-
ss.approximate_ascii.
|
29
|
+
expect(ss.approximate_ascii!).to match(/[\x0-\x7f]/)
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
33
|
it "should transliterate uncomposed utf8" do
|
34
34
|
string = [117, 776].pack("U*") # "ü" as ASCII "u" plus COMBINING DIAERESIS
|
35
|
-
string.to_slug.approximate_ascii.
|
35
|
+
expect(string.to_slug.approximate_ascii).to eql("u")
|
36
36
|
end
|
37
37
|
|
38
38
|
it "should transliterate using multiple transliterators" do
|
39
39
|
string = "свободное režģis"
|
40
|
-
string.to_slug.approximate_ascii(:latin, :russian).
|
40
|
+
expect(string.to_slug.approximate_ascii(:latin, :russian)).to eql("svobodnoe rezgis")
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
44
|
describe "#downcase" do
|
45
45
|
it "should lowercase strings" do
|
46
|
-
"FELIZ AÑO".to_slug.downcase.
|
46
|
+
expect("FELIZ AÑO".to_slug.downcase).to eql("feliz año")
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
50
|
describe "#upcase" do
|
51
51
|
it "should uppercase strings" do
|
52
|
-
"feliz año".to_slug.upcase.
|
52
|
+
expect("feliz año".to_slug.upcase).to eql("FELIZ AÑO")
|
53
53
|
end
|
54
54
|
end
|
55
55
|
|
56
56
|
describe "#normalize" do
|
57
57
|
|
58
58
|
it "should allow passing locale as key for :transliterate" do
|
59
|
-
"ö".to_slug.clean.normalize(:transliterate => :german).
|
59
|
+
expect("ö".to_slug.clean.normalize(:transliterate => :german)).to eql("oe")
|
60
60
|
end
|
61
61
|
|
62
62
|
it "should replace whitespace with dashes" do
|
63
|
-
"a b".to_slug.clean.normalize.
|
63
|
+
expect("a b".to_slug.clean.normalize).to eql("a-b")
|
64
64
|
end
|
65
65
|
|
66
66
|
it "should replace multiple spaces with 1 dash" do
|
67
|
-
"a b".to_slug.clean.normalize.
|
67
|
+
expect("a b".to_slug.clean.normalize).to eql("a-b")
|
68
68
|
end
|
69
69
|
|
70
70
|
it "should replace multiple dashes with 1 dash" do
|
71
|
-
"male - female".to_slug.normalize.
|
71
|
+
expect("male - female".to_slug.normalize).to eql("male-female")
|
72
72
|
end
|
73
73
|
|
74
74
|
it "should strip trailing space" do
|
75
|
-
"ab ".to_slug.normalize.
|
75
|
+
expect("ab ".to_slug.normalize).to eql("ab")
|
76
76
|
end
|
77
77
|
|
78
78
|
it "should strip leading space" do
|
79
|
-
" ab".to_slug.normalize.
|
79
|
+
expect(" ab".to_slug.normalize).to eql("ab")
|
80
80
|
end
|
81
81
|
|
82
82
|
it "should strip trailing slashes" do
|
83
|
-
"ab-".to_slug.normalize.
|
83
|
+
expect("ab-".to_slug.normalize).to eql("ab")
|
84
84
|
end
|
85
85
|
|
86
86
|
it "should strip leading slashes" do
|
87
|
-
"-ab".to_slug.normalize.
|
87
|
+
expect("-ab".to_slug.normalize).to eql("ab")
|
88
88
|
end
|
89
89
|
|
90
90
|
it "should not modify valid name strings" do
|
91
|
-
"a-b-c-d".to_slug.normalize.
|
91
|
+
expect("a-b-c-d".to_slug.normalize).to eql("a-b-c-d")
|
92
92
|
end
|
93
93
|
|
94
94
|
it "should not convert underscores" do
|
95
|
-
"hello_world".to_slug.normalize.
|
95
|
+
expect("hello_world".to_slug.normalize).to eql("hello_world")
|
96
96
|
end
|
97
97
|
|
98
98
|
it "should work with non roman chars" do
|
99
|
-
"検 索".to_slug.normalize.
|
99
|
+
expect("検 索".to_slug.normalize).to eql("検-索")
|
100
100
|
end
|
101
101
|
|
102
102
|
context "with to_ascii option" do
|
103
103
|
it "should approximate and strip non ascii" do
|
104
104
|
ss = "カタカナ: katakana is über cool".to_slug
|
105
|
-
ss.normalize(:to_ascii => true).
|
105
|
+
expect(ss.normalize(:to_ascii => true)).to eql("katakana-is-uber-cool")
|
106
106
|
end
|
107
107
|
end
|
108
108
|
end
|
109
109
|
|
110
110
|
describe "#truncate_bytes" do
|
111
111
|
it "should by byte length" do
|
112
|
-
"üa".to_slug.truncate_bytes(2).
|
113
|
-
"üa".to_slug.truncate_bytes(1).
|
114
|
-
"üa".to_slug.truncate_bytes(100).
|
115
|
-
"üéøá".to_slug.truncate_bytes(3).
|
112
|
+
expect("üa".to_slug.truncate_bytes(2)).to eql("ü")
|
113
|
+
expect("üa".to_slug.truncate_bytes(1)).to eql("")
|
114
|
+
expect("üa".to_slug.truncate_bytes(100)).to eql("üa")
|
115
|
+
expect("üéøá".to_slug.truncate_bytes(3)).to eql("ü")
|
116
116
|
end
|
117
117
|
end
|
118
118
|
|
119
119
|
describe "#truncate" do
|
120
120
|
it "should truncate by char length" do
|
121
|
-
"üa".to_slug.truncate(2).
|
122
|
-
"üa".to_slug.truncate(1).
|
123
|
-
"üa".to_slug.truncate(100).
|
121
|
+
expect("üa".to_slug.truncate(2)).to eql("üa")
|
122
|
+
expect("üa".to_slug.truncate(1)).to eql("ü")
|
123
|
+
expect("üa".to_slug.truncate(100)).to eql("üa")
|
124
124
|
end
|
125
125
|
end
|
126
126
|
|
127
127
|
describe "#with_dashes" do
|
128
128
|
it "should not change byte size when replacing spaces" do
|
129
|
-
"".to_slug.with_dashes.bytesize.
|
130
|
-
" ".to_slug.with_dashes.bytesize.
|
131
|
-
"-abc-".to_slug.with_dashes.bytesize.
|
132
|
-
" abc ".to_slug.with_dashes.bytesize.
|
133
|
-
" a bc ".to_slug.with_dashes.bytesize.
|
129
|
+
expect("".to_slug.with_dashes.bytesize).to eql(0)
|
130
|
+
expect(" ".to_slug.with_dashes.bytesize).to eql(1)
|
131
|
+
expect("-abc-".to_slug.with_dashes.bytesize).to eql(5)
|
132
|
+
expect(" abc ".to_slug.with_dashes.bytesize).to eql(5)
|
133
|
+
expect(" a bc ".to_slug.with_dashes.bytesize).to eql(7)
|
134
134
|
end
|
135
135
|
end
|
136
136
|
|
137
137
|
describe "#to_ruby_method" do
|
138
138
|
it "should get a string suitable for use as a ruby method" do
|
139
|
-
"¿¿¿hello... world???".to_slug.to_ruby_method.
|
140
|
-
"カタカナ: katakana is über cool".to_slug.to_ruby_method.
|
141
|
-
"カタカナ: katakana is über cool!".to_slug.to_ruby_method.
|
142
|
-
"カタカナ: katakana is über cool".to_slug.to_ruby_method(false).
|
139
|
+
expect("¿¿¿hello... world???".to_slug.to_ruby_method).to eql("hello_world?")
|
140
|
+
expect("カタカナ: katakana is über cool".to_slug.to_ruby_method).to eql("katakana_is_uber_cool")
|
141
|
+
expect("カタカナ: katakana is über cool!".to_slug.to_ruby_method).to eql("katakana_is_uber_cool!")
|
142
|
+
expect("カタカナ: katakana is über cool".to_slug.to_ruby_method(false)).to eql("katakana_is_uber_cool")
|
143
|
+
end
|
144
|
+
|
145
|
+
it "should optionally remove trailing punctuation" do
|
146
|
+
expect("¿¿¿hello... world???".to_slug.to_ruby_method(false)).to eql("hello_world")
|
147
|
+
end
|
148
|
+
|
149
|
+
it "should raise an error when it would generate an impossible method name" do
|
150
|
+
# "1".to_identifier.to_ruby_method
|
151
|
+
expect {"1".to_identifier.to_ruby_method}.to raise_error(Babosa::Identifier::Error)
|
152
|
+
end
|
153
|
+
|
154
|
+
it "should raise Babosa::Error error when the string is nil" do
|
155
|
+
expect { "".to_slug.to_ruby_method }.to raise_error(Babosa::Identifier::Error)
|
143
156
|
end
|
144
157
|
end
|
145
|
-
end
|
158
|
+
end
|