babosa 0.3.10 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Changelog.md +107 -0
- data/README.md +5 -23
- data/lib/babosa.rb +0 -17
- data/lib/babosa/identifier.rb +19 -17
- data/lib/babosa/transliterator/base.rb +19 -3
- data/lib/babosa/transliterator/hindi.rb +137 -0
- data/lib/babosa/transliterator/macedonian.rb +3 -1
- data/lib/babosa/transliterator/turkish.rb +8 -0
- data/lib/babosa/transliterator/ukrainian.rb +19 -0
- data/lib/babosa/transliterator/vietnamese.rb +143 -0
- data/lib/babosa/utf8/active_support_proxy.rb +26 -8
- data/lib/babosa/utf8/dumb_proxy.rb +23 -16
- data/lib/babosa/utf8/java_proxy.rb +1 -1
- data/lib/babosa/utf8/proxy.rb +46 -39
- data/lib/babosa/utf8/unicode_proxy.rb +3 -1
- data/lib/babosa/version.rb +1 -1
- data/spec/babosa_spec.rb +50 -37
- data/spec/spec_helper.rb +17 -14
- data/spec/transliterators/base_spec.rb +3 -3
- data/spec/transliterators/bulgarian_spec.rb +1 -1
- data/spec/transliterators/danish_spec.rb +1 -1
- data/spec/transliterators/german_spec.rb +2 -2
- data/spec/transliterators/greek_spec.rb +1 -1
- data/spec/transliterators/hindi_spec.rb +17 -0
- data/spec/transliterators/latin_spec.rb +9 -0
- data/spec/transliterators/norwegian_spec.rb +1 -1
- data/spec/transliterators/polish_spec.rb +14 -0
- data/spec/transliterators/romanian_spec.rb +1 -1
- data/spec/transliterators/serbian_spec.rb +1 -1
- data/spec/transliterators/spanish_spec.rb +1 -1
- data/spec/transliterators/swedish_spec.rb +1 -1
- data/spec/transliterators/turkish_spec.rb +24 -0
- data/spec/transliterators/ukrainian_spec.rb +80 -1
- data/spec/transliterators/vietnamese_spec.rb +18 -0
- data/spec/utf8_proxy_spec.rb +22 -18
- metadata +64 -52
- data/init.rb +0 -3
@@ -12,6 +12,7 @@ module Babosa
|
|
12
12
|
"Ц" => "C",
|
13
13
|
"Ѕ" => "Z",
|
14
14
|
"Ј" => "J",
|
15
|
+
"Х" => "H",
|
15
16
|
"ѓ" => "gj",
|
16
17
|
"љ" => "lj",
|
17
18
|
"њ" => "nj",
|
@@ -20,7 +21,8 @@ module Babosa
|
|
20
21
|
"ж" => "zh",
|
21
22
|
"ц" => "c",
|
22
23
|
"ѕ" => "z",
|
23
|
-
"ј" => "j"
|
24
|
+
"ј" => "j",
|
25
|
+
"х" => "h"
|
24
26
|
}
|
25
27
|
end
|
26
28
|
end
|
@@ -3,8 +3,27 @@ module Babosa
|
|
3
3
|
module Transliterator
|
4
4
|
class Ukrainian < Cyrillic
|
5
5
|
APPROXIMATIONS = {
|
6
|
+
"Г" => "H",
|
7
|
+
"г" => "h",
|
8
|
+
"Ґ" => "G",
|
9
|
+
"ґ" => "g",
|
10
|
+
"є" => "ie",
|
6
11
|
"И" => "Y",
|
7
12
|
"и" => "y",
|
13
|
+
"І" => "I",
|
14
|
+
"і" => "i",
|
15
|
+
"ї" => "i",
|
16
|
+
"Й" => "Y",
|
17
|
+
"й" => "i",
|
18
|
+
"Х" => "Kh",
|
19
|
+
"х" => "kh",
|
20
|
+
"Ц" => "Ts",
|
21
|
+
"ц" => 'ts',
|
22
|
+
"Щ" => "Shch",
|
23
|
+
"щ" => "shch",
|
24
|
+
"ю" => "iu",
|
25
|
+
"я" => "ia",
|
26
|
+
"'" => ""
|
8
27
|
}
|
9
28
|
end
|
10
29
|
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Babosa
|
3
|
+
module Transliterator
|
4
|
+
class Vietnamese < Latin
|
5
|
+
APPROXIMATIONS = {
|
6
|
+
"à" => "a",
|
7
|
+
"á" => "a",
|
8
|
+
"ạ" => "a",
|
9
|
+
"ả" => "a",
|
10
|
+
"ã" => "a",
|
11
|
+
"â" => "a",
|
12
|
+
"ầ" => "a",
|
13
|
+
"ấ" => "a",
|
14
|
+
"ậ" => "a",
|
15
|
+
"ẩ" => "a",
|
16
|
+
"ẫ" => "a",
|
17
|
+
"ă" => "a",
|
18
|
+
"ằ" => "a",
|
19
|
+
"ắ" => "a",
|
20
|
+
"ặ" => "a",
|
21
|
+
"ẳ" => "a",
|
22
|
+
"ẵ" => "a",
|
23
|
+
"À" => "A",
|
24
|
+
"Á" => "A",
|
25
|
+
"Ạ" => "A",
|
26
|
+
"Ả" => "A",
|
27
|
+
"Ã" => "A",
|
28
|
+
"Â" => "A",
|
29
|
+
"Ầ" => "A",
|
30
|
+
"Ấ" => "A",
|
31
|
+
"Ậ" => "A",
|
32
|
+
"Ẩ" => "A",
|
33
|
+
"Ẫ" => "A",
|
34
|
+
"Ă" => "A",
|
35
|
+
"Ằ" => "A",
|
36
|
+
"Ắ" => "A",
|
37
|
+
"Ặ" => "A",
|
38
|
+
"Ẳ" => "A",
|
39
|
+
"Ẵ" => "A",
|
40
|
+
"ì" => "i",
|
41
|
+
"í" => "i",
|
42
|
+
"ị" => "i",
|
43
|
+
"ỉ" => "i",
|
44
|
+
"ĩ" => "i",
|
45
|
+
"Ì" => "I",
|
46
|
+
"Í" => "I",
|
47
|
+
"Ị" => "I",
|
48
|
+
"Ỉ" => "I",
|
49
|
+
"Ĩ" => "I",
|
50
|
+
"ù" => "u",
|
51
|
+
"ú" => "u",
|
52
|
+
"ụ" => "u",
|
53
|
+
"ủ" => "u",
|
54
|
+
"ũ" => "u",
|
55
|
+
"ư" => "u",
|
56
|
+
"ừ" => "u",
|
57
|
+
"ứ" => "u",
|
58
|
+
"ự" => "u",
|
59
|
+
"ử" => "u",
|
60
|
+
"ữ" => "u",
|
61
|
+
"Ù" => "U",
|
62
|
+
"Ú" => "U",
|
63
|
+
"Ụ" => "U",
|
64
|
+
"Ủ" => "U",
|
65
|
+
"Ũ" => "U",
|
66
|
+
"Ư" => "U",
|
67
|
+
"Ừ" => "U",
|
68
|
+
"Ứ" => "U",
|
69
|
+
"Ự" => "U",
|
70
|
+
"Ử" => "U",
|
71
|
+
"Ữ" => "U",
|
72
|
+
"è" => "e",
|
73
|
+
"é" => "e",
|
74
|
+
"ẹ" => "e",
|
75
|
+
"ẻ" => "e",
|
76
|
+
"ẽ" => "e",
|
77
|
+
"ê" => "e",
|
78
|
+
"ề" => "e",
|
79
|
+
"ế" => "e",
|
80
|
+
"ệ" => "e",
|
81
|
+
"ể" => "e",
|
82
|
+
"ễ" => "e",
|
83
|
+
"È" => "E",
|
84
|
+
"É" => "E",
|
85
|
+
"Ẹ" => "E",
|
86
|
+
"Ẻ" => "E",
|
87
|
+
"Ẽ" => "E",
|
88
|
+
"Ê" => "E",
|
89
|
+
"Ề" => "E",
|
90
|
+
"Ế" => "E",
|
91
|
+
"Ệ" => "E",
|
92
|
+
"Ể" => "E",
|
93
|
+
"Ễ" => "E",
|
94
|
+
"ò" => "o",
|
95
|
+
"ó" => "o",
|
96
|
+
"ọ" => "o",
|
97
|
+
"ỏ" => "o",
|
98
|
+
"õ" => "o",
|
99
|
+
"ô" => "o",
|
100
|
+
"ồ" => "o",
|
101
|
+
"ố" => "o",
|
102
|
+
"ộ" => "o",
|
103
|
+
"ổ" => "o",
|
104
|
+
"ỗ" => "o",
|
105
|
+
"ơ" => "o",
|
106
|
+
"ờ" => "o",
|
107
|
+
"ớ" => "o",
|
108
|
+
"ợ" => "o",
|
109
|
+
"ở" => "o",
|
110
|
+
"ỡ" => "o",
|
111
|
+
"Ò" => "O",
|
112
|
+
"Ó" => "O",
|
113
|
+
"Ọ" => "O",
|
114
|
+
"Ỏ" => "O",
|
115
|
+
"Õ" => "O",
|
116
|
+
"Ô" => "O",
|
117
|
+
"Ồ" => "O",
|
118
|
+
"Ố" => "O",
|
119
|
+
"Ộ" => "O",
|
120
|
+
"Ổ" => "O",
|
121
|
+
"Ỗ" => "O",
|
122
|
+
"Ơ" => "O",
|
123
|
+
"Ờ" => "O",
|
124
|
+
"Ớ" => "O",
|
125
|
+
"Ợ" => "O",
|
126
|
+
"Ở" => "O",
|
127
|
+
"Ỡ" => "O",
|
128
|
+
"ỳ" => "y",
|
129
|
+
"ý" => "y",
|
130
|
+
"ỵ" => "y",
|
131
|
+
"ỷ" => "y",
|
132
|
+
"ỹ" => "y",
|
133
|
+
"Ỳ" => "Y",
|
134
|
+
"Ý" => "Y",
|
135
|
+
"Ỵ" => "Y",
|
136
|
+
"Ỷ" => "Y",
|
137
|
+
"Ỹ" => "Y",
|
138
|
+
"đ" => "d",
|
139
|
+
"Đ" => "D"
|
140
|
+
}
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -1,19 +1,37 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
require 'active_support/multibyte/unicode'
|
3
|
+
|
1
4
|
module Babosa
|
2
5
|
module UTF8
|
3
6
|
# A UTF-8 proxy using Active Support's multibyte support.
|
4
7
|
module ActiveSupportProxy
|
5
|
-
extend
|
8
|
+
extend ActiveSupport::Multibyte::Unicode
|
6
9
|
extend self
|
7
|
-
def downcase(string)
|
8
|
-
ActiveSupport::Multibyte::Chars.new(string).downcase.to_s
|
9
|
-
end
|
10
10
|
|
11
|
-
def
|
12
|
-
|
11
|
+
def self.normalize_utf8(string)
|
12
|
+
normalize(string, :c)
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
if ActiveSupport::VERSION::MAJOR == 3
|
16
|
+
def downcase(string)
|
17
|
+
ActiveSupport::Multibyte::Chars.new(string).downcase.to_s
|
18
|
+
end
|
19
|
+
|
20
|
+
def upcase(string)
|
21
|
+
ActiveSupport::Multibyte::Chars.new(string).upcase.to_s
|
22
|
+
end
|
23
|
+
elsif ActiveSupport::VERSION::MAJOR >= 6
|
24
|
+
def self.normalize_utf8(string)
|
25
|
+
string.unicode_normalize(:nfc).to_s
|
26
|
+
end
|
27
|
+
|
28
|
+
def downcase(string)
|
29
|
+
string.downcase.to_s
|
30
|
+
end
|
31
|
+
|
32
|
+
def upcase(string)
|
33
|
+
string.upcase.to_s
|
34
|
+
end
|
17
35
|
end
|
18
36
|
end
|
19
37
|
end
|
@@ -10,32 +10,39 @@ module Babosa
|
|
10
10
|
# or ActiveSupport should be used instead because they support the full
|
11
11
|
# UTF-8 character range.
|
12
12
|
module DumbProxy
|
13
|
-
extend
|
13
|
+
extend Proxy
|
14
14
|
extend self
|
15
15
|
|
16
16
|
def downcase(string)
|
17
|
-
string.unpack("U*").map {|char| Mappings::DOWNCASE[char] or char}.flatten.pack("U*")
|
17
|
+
string.downcase.unpack("U*").map {|char| Mappings::DOWNCASE[char] or char}.flatten.pack("U*")
|
18
18
|
end
|
19
19
|
|
20
20
|
def upcase(string)
|
21
|
-
string.unpack("U*").map {|char| Mappings::UPCASE[char] or char}.flatten.pack("U*")
|
21
|
+
string.upcase.unpack("U*").map {|char| Mappings::UPCASE[char] or char}.flatten.pack("U*")
|
22
22
|
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
24
|
+
if ''.respond_to?(:unicode_normalize)
|
25
|
+
def normalize_utf8(string)
|
26
|
+
string.unicode_normalize
|
27
|
+
end
|
28
|
+
else
|
29
|
+
# On Ruby 2.2, this uses the native Unicode normalize method. On all
|
30
|
+
# other Rubies, it does a very naive Unicode normalization, which should
|
31
|
+
# work for this library's purposes (i.e., Roman-based codepoints, up to
|
32
|
+
# U+017E). Do not use reuse this as a general solution! Use a real
|
33
|
+
# library like Unicode or ActiveSupport instead.
|
34
|
+
def normalize_utf8(string)
|
35
|
+
codepoints = string.unpack("U*")
|
36
|
+
new = []
|
37
|
+
until codepoints.empty? do
|
38
|
+
if Mappings::COMPOSITION[codepoints[0..1]]
|
39
|
+
new << Mappings::COMPOSITION[codepoints.slice!(0,2)]
|
40
|
+
else
|
41
|
+
new << codepoints.shift
|
42
|
+
end
|
36
43
|
end
|
44
|
+
new.compact.flatten.pack("U*")
|
37
45
|
end
|
38
|
-
new.compact.flatten.pack("U*")
|
39
46
|
end
|
40
47
|
end
|
41
48
|
end
|
data/lib/babosa/utf8/proxy.rb
CHANGED
@@ -8,7 +8,7 @@ module Babosa
|
|
8
8
|
|
9
9
|
# A UTF-8 proxy for Babosa can be any object which responds to the methods in this module.
|
10
10
|
# The following proxies are provided by Babosa: {ActiveSupportProxy}, {DumbProxy}, {JavaProxy}, and {UnicodeProxy}.
|
11
|
-
module
|
11
|
+
module Proxy
|
12
12
|
CP1252 = {
|
13
13
|
128 => [226, 130, 172],
|
14
14
|
129 => nil,
|
@@ -62,50 +62,57 @@ module Babosa
|
|
62
62
|
raise NotImplementedError
|
63
63
|
end
|
64
64
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
65
|
+
if ''.respond_to?(:scrub) && !defined?(Rubinius)
|
66
|
+
# Attempt to replace invalid UTF-8 bytes with valid ones. This method
|
67
|
+
# naively assumes if you have invalid UTF8 bytes, they are either Windows
|
68
|
+
# CP-1252 or ISO8859-1. In practice this isn't a bad assumption, but may not
|
69
|
+
# always work.
|
70
|
+
def tidy_bytes(string)
|
71
|
+
string.scrub do |bad|
|
72
|
+
tidy_byte(*bad.bytes).flatten.compact.pack('C*').unpack('U*').pack('U*')
|
73
|
+
end
|
74
|
+
end
|
75
|
+
else
|
76
|
+
def tidy_bytes(string)
|
77
|
+
bytes = string.unpack("C*")
|
78
|
+
conts_expected = 0
|
79
|
+
last_lead = 0
|
73
80
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
is_restricted = byte > 244
|
81
|
+
bytes.each_index do |i|
|
82
|
+
byte = bytes[i]
|
83
|
+
is_cont = byte > 127 && byte < 192
|
84
|
+
is_lead = byte > 191 && byte < 245
|
85
|
+
is_unused = byte > 240
|
86
|
+
is_restricted = byte > 244
|
81
87
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
88
|
+
# Impossible or highly unlikely byte? Clean it.
|
89
|
+
if is_unused || is_restricted
|
90
|
+
bytes[i] = tidy_byte(byte)
|
91
|
+
elsif is_cont
|
92
|
+
# Not expecting contination byte? Clean up. Otherwise, now expect one less.
|
93
|
+
conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
|
94
|
+
else
|
95
|
+
if conts_expected > 0
|
96
|
+
# Expected continuation, but got ASCII or leading? Clean backwards up to
|
97
|
+
# the leading byte.
|
98
|
+
(1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
|
99
|
+
conts_expected = 0
|
100
|
+
end
|
101
|
+
if is_lead
|
102
|
+
# Final byte is leading? Clean it.
|
103
|
+
if i == bytes.length - 1
|
104
|
+
bytes[i] = tidy_byte(bytes.last)
|
105
|
+
else
|
106
|
+
# Valid leading byte? Expect continuations determined by position of
|
107
|
+
# first zero bit, with max of 3.
|
108
|
+
conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
|
109
|
+
last_lead = i
|
110
|
+
end
|
104
111
|
end
|
105
112
|
end
|
106
113
|
end
|
114
|
+
bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
|
107
115
|
end
|
108
|
-
bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
|
109
116
|
end
|
110
117
|
|
111
118
|
private
|
@@ -1,9 +1,11 @@
|
|
1
|
+
require 'unicode'
|
2
|
+
|
1
3
|
module Babosa
|
2
4
|
module UTF8
|
3
5
|
# A UTF-8 proxy using the Unicode gem.
|
4
6
|
# @see http://github.com/blackwinter/unicode
|
5
7
|
module UnicodeProxy
|
6
|
-
extend
|
8
|
+
extend Proxy
|
7
9
|
extend self
|
8
10
|
def downcase(string)
|
9
11
|
Unicode.downcase(string)
|
data/lib/babosa/version.rb
CHANGED
data/spec/babosa_spec.rb
CHANGED
@@ -4,7 +4,7 @@ require File.expand_path("../spec_helper", __FILE__)
|
|
4
4
|
describe Babosa::Identifier do
|
5
5
|
|
6
6
|
it "should respond_to :empty?" do
|
7
|
-
"".to_slug.
|
7
|
+
expect("".to_slug).to respond_to(:empty?)
|
8
8
|
end
|
9
9
|
|
10
10
|
%w[approximate_ascii clean downcase word_chars normalize to_ascii upcase with_dashes].each do |method|
|
@@ -18,128 +18,141 @@ describe Babosa::Identifier do
|
|
18
18
|
describe "#word_chars" do
|
19
19
|
it "word_chars! should leave only letters and spaces" do
|
20
20
|
string = "a*$%^$@!@b$%^&*()*!c"
|
21
|
-
string.to_slug.word_chars.
|
21
|
+
expect(string.to_slug.word_chars!).to match(/[a-z ]*/i)
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
25
|
describe "#transliterate" do
|
26
26
|
it "should transliterate to ascii" do
|
27
|
-
|
27
|
+
(0xC0..0x17E).to_a.each do |codepoint|
|
28
28
|
ss = [codepoint].pack("U*").to_slug
|
29
|
-
ss.approximate_ascii.
|
29
|
+
expect(ss.approximate_ascii!).to match(/[\x0-\x7f]/)
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
33
|
it "should transliterate uncomposed utf8" do
|
34
34
|
string = [117, 776].pack("U*") # "ü" as ASCII "u" plus COMBINING DIAERESIS
|
35
|
-
string.to_slug.approximate_ascii.
|
35
|
+
expect(string.to_slug.approximate_ascii).to eql("u")
|
36
36
|
end
|
37
37
|
|
38
38
|
it "should transliterate using multiple transliterators" do
|
39
39
|
string = "свободное režģis"
|
40
|
-
string.to_slug.approximate_ascii(:latin, :russian).
|
40
|
+
expect(string.to_slug.approximate_ascii(:latin, :russian)).to eql("svobodnoe rezgis")
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
44
|
describe "#downcase" do
|
45
45
|
it "should lowercase strings" do
|
46
|
-
"FELIZ AÑO".to_slug.downcase.
|
46
|
+
expect("FELIZ AÑO".to_slug.downcase).to eql("feliz año")
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
50
|
describe "#upcase" do
|
51
51
|
it "should uppercase strings" do
|
52
|
-
"feliz año".to_slug.upcase.
|
52
|
+
expect("feliz año".to_slug.upcase).to eql("FELIZ AÑO")
|
53
53
|
end
|
54
54
|
end
|
55
55
|
|
56
56
|
describe "#normalize" do
|
57
57
|
|
58
58
|
it "should allow passing locale as key for :transliterate" do
|
59
|
-
"ö".to_slug.clean.normalize(:transliterate => :german).
|
59
|
+
expect("ö".to_slug.clean.normalize(:transliterate => :german)).to eql("oe")
|
60
60
|
end
|
61
61
|
|
62
62
|
it "should replace whitespace with dashes" do
|
63
|
-
"a b".to_slug.clean.normalize.
|
63
|
+
expect("a b".to_slug.clean.normalize).to eql("a-b")
|
64
64
|
end
|
65
65
|
|
66
66
|
it "should replace multiple spaces with 1 dash" do
|
67
|
-
"a b".to_slug.clean.normalize.
|
67
|
+
expect("a b".to_slug.clean.normalize).to eql("a-b")
|
68
68
|
end
|
69
69
|
|
70
70
|
it "should replace multiple dashes with 1 dash" do
|
71
|
-
"male - female".to_slug.normalize.
|
71
|
+
expect("male - female".to_slug.normalize).to eql("male-female")
|
72
72
|
end
|
73
73
|
|
74
74
|
it "should strip trailing space" do
|
75
|
-
"ab ".to_slug.normalize.
|
75
|
+
expect("ab ".to_slug.normalize).to eql("ab")
|
76
76
|
end
|
77
77
|
|
78
78
|
it "should strip leading space" do
|
79
|
-
" ab".to_slug.normalize.
|
79
|
+
expect(" ab".to_slug.normalize).to eql("ab")
|
80
80
|
end
|
81
81
|
|
82
82
|
it "should strip trailing slashes" do
|
83
|
-
"ab-".to_slug.normalize.
|
83
|
+
expect("ab-".to_slug.normalize).to eql("ab")
|
84
84
|
end
|
85
85
|
|
86
86
|
it "should strip leading slashes" do
|
87
|
-
"-ab".to_slug.normalize.
|
87
|
+
expect("-ab".to_slug.normalize).to eql("ab")
|
88
88
|
end
|
89
89
|
|
90
90
|
it "should not modify valid name strings" do
|
91
|
-
"a-b-c-d".to_slug.normalize.
|
91
|
+
expect("a-b-c-d".to_slug.normalize).to eql("a-b-c-d")
|
92
92
|
end
|
93
93
|
|
94
94
|
it "should not convert underscores" do
|
95
|
-
"hello_world".to_slug.normalize.
|
95
|
+
expect("hello_world".to_slug.normalize).to eql("hello_world")
|
96
96
|
end
|
97
97
|
|
98
98
|
it "should work with non roman chars" do
|
99
|
-
"検 索".to_slug.normalize.
|
99
|
+
expect("検 索".to_slug.normalize).to eql("検-索")
|
100
100
|
end
|
101
101
|
|
102
102
|
context "with to_ascii option" do
|
103
103
|
it "should approximate and strip non ascii" do
|
104
104
|
ss = "カタカナ: katakana is über cool".to_slug
|
105
|
-
ss.normalize(:to_ascii => true).
|
105
|
+
expect(ss.normalize(:to_ascii => true)).to eql("katakana-is-uber-cool")
|
106
106
|
end
|
107
107
|
end
|
108
108
|
end
|
109
109
|
|
110
110
|
describe "#truncate_bytes" do
|
111
111
|
it "should by byte length" do
|
112
|
-
"üa".to_slug.truncate_bytes(2).
|
113
|
-
"üa".to_slug.truncate_bytes(1).
|
114
|
-
"üa".to_slug.truncate_bytes(100).
|
115
|
-
"üéøá".to_slug.truncate_bytes(3).
|
112
|
+
expect("üa".to_slug.truncate_bytes(2)).to eql("ü")
|
113
|
+
expect("üa".to_slug.truncate_bytes(1)).to eql("")
|
114
|
+
expect("üa".to_slug.truncate_bytes(100)).to eql("üa")
|
115
|
+
expect("üéøá".to_slug.truncate_bytes(3)).to eql("ü")
|
116
116
|
end
|
117
117
|
end
|
118
118
|
|
119
119
|
describe "#truncate" do
|
120
120
|
it "should truncate by char length" do
|
121
|
-
"üa".to_slug.truncate(2).
|
122
|
-
"üa".to_slug.truncate(1).
|
123
|
-
"üa".to_slug.truncate(100).
|
121
|
+
expect("üa".to_slug.truncate(2)).to eql("üa")
|
122
|
+
expect("üa".to_slug.truncate(1)).to eql("ü")
|
123
|
+
expect("üa".to_slug.truncate(100)).to eql("üa")
|
124
124
|
end
|
125
125
|
end
|
126
126
|
|
127
127
|
describe "#with_dashes" do
|
128
128
|
it "should not change byte size when replacing spaces" do
|
129
|
-
"".to_slug.with_dashes.bytesize.
|
130
|
-
" ".to_slug.with_dashes.bytesize.
|
131
|
-
"-abc-".to_slug.with_dashes.bytesize.
|
132
|
-
" abc ".to_slug.with_dashes.bytesize.
|
133
|
-
" a bc ".to_slug.with_dashes.bytesize.
|
129
|
+
expect("".to_slug.with_dashes.bytesize).to eql(0)
|
130
|
+
expect(" ".to_slug.with_dashes.bytesize).to eql(1)
|
131
|
+
expect("-abc-".to_slug.with_dashes.bytesize).to eql(5)
|
132
|
+
expect(" abc ".to_slug.with_dashes.bytesize).to eql(5)
|
133
|
+
expect(" a bc ".to_slug.with_dashes.bytesize).to eql(7)
|
134
134
|
end
|
135
135
|
end
|
136
136
|
|
137
137
|
describe "#to_ruby_method" do
|
138
138
|
it "should get a string suitable for use as a ruby method" do
|
139
|
-
"¿¿¿hello... world???".to_slug.to_ruby_method.
|
140
|
-
"カタカナ: katakana is über cool".to_slug.to_ruby_method.
|
141
|
-
"カタカナ: katakana is über cool!".to_slug.to_ruby_method.
|
142
|
-
"カタカナ: katakana is über cool".to_slug.to_ruby_method(false).
|
139
|
+
expect("¿¿¿hello... world???".to_slug.to_ruby_method).to eql("hello_world?")
|
140
|
+
expect("カタカナ: katakana is über cool".to_slug.to_ruby_method).to eql("katakana_is_uber_cool")
|
141
|
+
expect("カタカナ: katakana is über cool!".to_slug.to_ruby_method).to eql("katakana_is_uber_cool!")
|
142
|
+
expect("カタカナ: katakana is über cool".to_slug.to_ruby_method(false)).to eql("katakana_is_uber_cool")
|
143
|
+
end
|
144
|
+
|
145
|
+
it "should optionally remove trailing punctuation" do
|
146
|
+
expect("¿¿¿hello... world???".to_slug.to_ruby_method(false)).to eql("hello_world")
|
147
|
+
end
|
148
|
+
|
149
|
+
it "should raise an error when it would generate an impossible method name" do
|
150
|
+
# "1".to_identifier.to_ruby_method
|
151
|
+
expect {"1".to_identifier.to_ruby_method}.to raise_error(Babosa::Identifier::Error)
|
152
|
+
end
|
153
|
+
|
154
|
+
it "should raise Babosa::Error error when the string is nil" do
|
155
|
+
expect { "".to_slug.to_ruby_method }.to raise_error(Babosa::Identifier::Error)
|
143
156
|
end
|
144
157
|
end
|
145
|
-
end
|
158
|
+
end
|