kebab 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gemtest +0 -0
- data/Changelog.md +99 -0
- data/MIT-LICENSE +19 -0
- data/README.md +26 -0
- data/Rakefile +34 -0
- data/lib/kebab.rb +18 -0
- data/lib/kebab/identifier.rb +294 -0
- data/lib/kebab/transliterator/base.rb +110 -0
- data/lib/kebab/transliterator/bulgarian.rb +27 -0
- data/lib/kebab/transliterator/cyrillic.rb +108 -0
- data/lib/kebab/transliterator/danish.rb +15 -0
- data/lib/kebab/transliterator/german.rb +15 -0
- data/lib/kebab/transliterator/greek.rb +77 -0
- data/lib/kebab/transliterator/hindi.rb +137 -0
- data/lib/kebab/transliterator/latin.rb +199 -0
- data/lib/kebab/transliterator/macedonian.rb +29 -0
- data/lib/kebab/transliterator/norwegian.rb +14 -0
- data/lib/kebab/transliterator/romanian.rb +13 -0
- data/lib/kebab/transliterator/russian.rb +22 -0
- data/lib/kebab/transliterator/serbian.rb +34 -0
- data/lib/kebab/transliterator/spanish.rb +9 -0
- data/lib/kebab/transliterator/swedish.rb +16 -0
- data/lib/kebab/transliterator/turkish.rb +8 -0
- data/lib/kebab/transliterator/ukrainian.rb +30 -0
- data/lib/kebab/transliterator/vietnamese.rb +143 -0
- data/lib/kebab/utf8/active_support_proxy.rb +26 -0
- data/lib/kebab/utf8/dumb_proxy.rb +49 -0
- data/lib/kebab/utf8/java_proxy.rb +22 -0
- data/lib/kebab/utf8/mappings.rb +193 -0
- data/lib/kebab/utf8/proxy.rb +125 -0
- data/lib/kebab/utf8/unicode_proxy.rb +23 -0
- data/lib/kebab/version.rb +5 -0
- data/spec/kebab_spec.rb +155 -0
- data/spec/spec_helper.rb +45 -0
- data/spec/transliterators/base_spec.rb +16 -0
- data/spec/transliterators/bulgarian_spec.rb +20 -0
- data/spec/transliterators/danish_spec.rb +17 -0
- data/spec/transliterators/german_spec.rb +17 -0
- data/spec/transliterators/greek_spec.rb +17 -0
- data/spec/transliterators/hindi_spec.rb +17 -0
- data/spec/transliterators/latin_spec.rb +9 -0
- data/spec/transliterators/macedonian_spec.rb +9 -0
- data/spec/transliterators/norwegian_spec.rb +18 -0
- data/spec/transliterators/polish_spec.rb +14 -0
- data/spec/transliterators/romanian_spec.rb +19 -0
- data/spec/transliterators/russian_spec.rb +9 -0
- data/spec/transliterators/serbian_spec.rb +25 -0
- data/spec/transliterators/spanish_spec.rb +13 -0
- data/spec/transliterators/swedish_spec.rb +18 -0
- data/spec/transliterators/turkish_spec.rb +24 -0
- data/spec/transliterators/ukrainian_spec.rb +88 -0
- data/spec/transliterators/vietnamese_spec.rb +18 -0
- data/spec/utf8_proxy_spec.rb +53 -0
- metadata +167 -0
@@ -0,0 +1,199 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Kebab
|
3
|
+
module Transliterator
|
4
|
+
class Latin < Base
|
5
|
+
|
6
|
+
APPROXIMATIONS = {
|
7
|
+
"À" => "A",
|
8
|
+
"Á" => "A",
|
9
|
+
"Â" => "A",
|
10
|
+
"Ã" => "A",
|
11
|
+
"Ä" => "A",
|
12
|
+
"Å" => "A",
|
13
|
+
"Æ" => "Ae",
|
14
|
+
"Ç" => "C",
|
15
|
+
"È" => "E",
|
16
|
+
"É" => "E",
|
17
|
+
"Ê" => "E",
|
18
|
+
"Ë" => "E",
|
19
|
+
"Ì" => "I",
|
20
|
+
"Í" => "I",
|
21
|
+
"Î" => "I",
|
22
|
+
"Ï" => "I",
|
23
|
+
"Ð" => "D",
|
24
|
+
"Ñ" => "N",
|
25
|
+
"Ò" => "O",
|
26
|
+
"Ó" => "O",
|
27
|
+
"Ô" => "O",
|
28
|
+
"Õ" => "O",
|
29
|
+
"Ö" => "O",
|
30
|
+
"Ø" => "O",
|
31
|
+
"Ù" => "U",
|
32
|
+
"Ú" => "U",
|
33
|
+
"Û" => "U",
|
34
|
+
"Ü" => "U",
|
35
|
+
"Ý" => "Y",
|
36
|
+
"Þ" => "Th",
|
37
|
+
"ß" => "ss",
|
38
|
+
"à" => "a" ,
|
39
|
+
"á" => "a",
|
40
|
+
"â" => "a",
|
41
|
+
"ã" => "a",
|
42
|
+
"ä" => "a",
|
43
|
+
"å" => "a",
|
44
|
+
"æ" => "ae",
|
45
|
+
"ç" => "c" ,
|
46
|
+
"è" => "e",
|
47
|
+
"é" => "e",
|
48
|
+
"ê" => "e",
|
49
|
+
"ë" => "e",
|
50
|
+
"ì" => "i",
|
51
|
+
"í" => "i",
|
52
|
+
"î" => "i",
|
53
|
+
"ï" => "i",
|
54
|
+
"ð" => "d",
|
55
|
+
"ñ" => "n",
|
56
|
+
"ò" => "o",
|
57
|
+
"ó" => "o",
|
58
|
+
"ô" => "o",
|
59
|
+
"õ" => "o",
|
60
|
+
"ö" => "o",
|
61
|
+
"ø" => "o",
|
62
|
+
"ù" => "u",
|
63
|
+
"ú" => "u",
|
64
|
+
"û" => "u",
|
65
|
+
"ü" => "u",
|
66
|
+
"ý" => "y",
|
67
|
+
"þ" => "th",
|
68
|
+
"ÿ" => "y",
|
69
|
+
"Ā" => "A",
|
70
|
+
"Ă" => "A",
|
71
|
+
"Ą" => "A",
|
72
|
+
"Ć" => "C",
|
73
|
+
"Ĉ" => "C",
|
74
|
+
"Ċ" => "C",
|
75
|
+
"Č" => "C",
|
76
|
+
"Ď" => "D",
|
77
|
+
"Đ" => "D",
|
78
|
+
"Ē" => "E",
|
79
|
+
"Ĕ" => "E",
|
80
|
+
"Ė" => "E",
|
81
|
+
"Ę" => "E",
|
82
|
+
"Ě" => "E",
|
83
|
+
"Ĝ" => "G",
|
84
|
+
"Ğ" => "G",
|
85
|
+
"Ġ" => "G",
|
86
|
+
"Ģ" => "G",
|
87
|
+
"Ĥ" => "H",
|
88
|
+
"Ħ" => "H",
|
89
|
+
"Ĩ" => "I",
|
90
|
+
"Ī" => "I",
|
91
|
+
"Ĭ" => "I",
|
92
|
+
"Į" => "I",
|
93
|
+
"İ" => "I",
|
94
|
+
"IJ" => "Ij",
|
95
|
+
"Ĵ" => "J",
|
96
|
+
"Ķ" => "K",
|
97
|
+
"Ĺ" => "L",
|
98
|
+
"Ļ" => "L",
|
99
|
+
"Ľ" => "L",
|
100
|
+
"Ŀ" => "L",
|
101
|
+
"Ł" => "L",
|
102
|
+
"Ń" => "N",
|
103
|
+
"Ņ" => "N",
|
104
|
+
"Ň" => "N",
|
105
|
+
"Ŋ" => "Ng",
|
106
|
+
"Ō" => "O",
|
107
|
+
"Ŏ" => "O",
|
108
|
+
"Ő" => "O",
|
109
|
+
"Œ" => "OE",
|
110
|
+
"Ŕ" => "R",
|
111
|
+
"Ŗ" => "R",
|
112
|
+
"Ř" => "R",
|
113
|
+
"Ś" => "S",
|
114
|
+
"Ŝ" => "S",
|
115
|
+
"Ş" => "S",
|
116
|
+
"Š" => "S",
|
117
|
+
"Ţ" => "T",
|
118
|
+
"Ť" => "T",
|
119
|
+
"Ŧ" => "T",
|
120
|
+
"Ũ" => "U",
|
121
|
+
"Ū" => "U",
|
122
|
+
"Ŭ" => "U",
|
123
|
+
"Ů" => "U",
|
124
|
+
"Ű" => "U",
|
125
|
+
"Ų" => "U",
|
126
|
+
"Ŵ" => "W",
|
127
|
+
"Ŷ" => "Y",
|
128
|
+
"Ÿ" => "Y",
|
129
|
+
"Ź" => "Z",
|
130
|
+
"Ż" => "Z",
|
131
|
+
"Ž" => "Z",
|
132
|
+
"ā" => "a",
|
133
|
+
"ă" => "a",
|
134
|
+
"ą" => "a",
|
135
|
+
"ć" => "c",
|
136
|
+
"ĉ" => "c",
|
137
|
+
"ċ" => "c",
|
138
|
+
"č" => "c",
|
139
|
+
"ď" => "d",
|
140
|
+
"đ" => "d",
|
141
|
+
"ē" => "e",
|
142
|
+
"ĕ" => "e",
|
143
|
+
"ė" => "e",
|
144
|
+
"ę" => "e",
|
145
|
+
"ě" => "e",
|
146
|
+
"ĝ" => "g",
|
147
|
+
"ğ" => "g",
|
148
|
+
"ġ" => "g",
|
149
|
+
"ģ" => "g",
|
150
|
+
"ĥ" => "h",
|
151
|
+
"ħ" => "h",
|
152
|
+
"ĩ" => "i",
|
153
|
+
"ī" => "i",
|
154
|
+
"ĭ" => "i",
|
155
|
+
"į" => "i",
|
156
|
+
"ı" => "i",
|
157
|
+
"ij" => "ij",
|
158
|
+
"ĵ" => "j",
|
159
|
+
"ķ" => "k",
|
160
|
+
"ĸ" => "k",
|
161
|
+
"ĺ" => "l",
|
162
|
+
"ļ" => "l",
|
163
|
+
"ľ" => "l",
|
164
|
+
"ŀ" => "l",
|
165
|
+
"ł" => "l",
|
166
|
+
"ń" => "n",
|
167
|
+
"ņ" => "n",
|
168
|
+
"ň" => "n",
|
169
|
+
"ʼn" => "n",
|
170
|
+
"ŋ" => "ng",
|
171
|
+
"ō" => "o",
|
172
|
+
"ŏ" => "o",
|
173
|
+
"ő" => "o",
|
174
|
+
"œ" => "oe",
|
175
|
+
"ŕ" => "r",
|
176
|
+
"ŗ" => "r",
|
177
|
+
"ř" => "r",
|
178
|
+
"ś" => "s",
|
179
|
+
"ŝ" => "s",
|
180
|
+
"ş" => "s",
|
181
|
+
"š" => "s",
|
182
|
+
"ţ" => "t",
|
183
|
+
"ť" => "t",
|
184
|
+
"ŧ" => "t",
|
185
|
+
"ũ" => "u",
|
186
|
+
"ū" => "u",
|
187
|
+
"ŭ" => "u",
|
188
|
+
"ů" => "u",
|
189
|
+
"ű" => "u",
|
190
|
+
"ų" => "u",
|
191
|
+
"ŵ" => "w",
|
192
|
+
"ŷ" => "y",
|
193
|
+
"ž" => "z",
|
194
|
+
"ź" => "z",
|
195
|
+
"ż" => "z"
|
196
|
+
}
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Kebab
|
3
|
+
module Transliterator
|
4
|
+
class Macedonian < Cyrillic
|
5
|
+
APPROXIMATIONS = {
|
6
|
+
"Ѓ" => "Gj",
|
7
|
+
"Љ" => "Lj",
|
8
|
+
"Њ" => "Nj",
|
9
|
+
"Ќ" => "Kj",
|
10
|
+
"Џ" => "Dzh",
|
11
|
+
"Ж" => "Zh",
|
12
|
+
"Ц" => "C",
|
13
|
+
"Ѕ" => "Z",
|
14
|
+
"Ј" => "J",
|
15
|
+
"Х" => "H",
|
16
|
+
"ѓ" => "gj",
|
17
|
+
"љ" => "lj",
|
18
|
+
"њ" => "nj",
|
19
|
+
"ќ" => "kj",
|
20
|
+
"џ" => "dzh",
|
21
|
+
"ж" => "zh",
|
22
|
+
"ц" => "c",
|
23
|
+
"ѕ" => "z",
|
24
|
+
"ј" => "j",
|
25
|
+
"х" => "h"
|
26
|
+
}
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Kebab
|
3
|
+
module Transliterator
|
4
|
+
class Russian < Cyrillic
|
5
|
+
APPROXIMATIONS = {
|
6
|
+
"Й" => "I",
|
7
|
+
"М" => "M",
|
8
|
+
"Х" => "H",
|
9
|
+
"Ц" => "Ts",
|
10
|
+
"Ш" => "Sh",
|
11
|
+
"Щ" => "Sch",
|
12
|
+
"Ю" => "U",
|
13
|
+
"Я" => "Ya",
|
14
|
+
"й" => "i",
|
15
|
+
"х" => "h",
|
16
|
+
"ц" => "ts",
|
17
|
+
"щ" => "sch",
|
18
|
+
"ю" => "u"
|
19
|
+
}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Kebab
|
4
|
+
module Transliterator
|
5
|
+
class Serbian < Latin
|
6
|
+
APPROXIMATIONS = Cyrillic.const_get(:APPROXIMATIONS).merge({
|
7
|
+
"Ð" => "Dj",
|
8
|
+
"Č" => "Ch",
|
9
|
+
"Š" => "Sh",
|
10
|
+
"č" => "ch",
|
11
|
+
"đ" => "dj",
|
12
|
+
"š" => "sh",
|
13
|
+
"Ћ" => "C",
|
14
|
+
"Ц" => "C",
|
15
|
+
"Ч" => "Ch",
|
16
|
+
"Ђ" => "Dj",
|
17
|
+
"Џ" => "Dz",
|
18
|
+
"Х" => "H",
|
19
|
+
"Ј" => "J",
|
20
|
+
"Љ" => "Lj",
|
21
|
+
"Њ" => "Nj",
|
22
|
+
"ц" => "c",
|
23
|
+
"ћ" => "c",
|
24
|
+
"ч" => "ch",
|
25
|
+
"ђ" => "dj",
|
26
|
+
"џ" => "dz",
|
27
|
+
"х" => "h",
|
28
|
+
"ј" => "j",
|
29
|
+
"љ" => "lj",
|
30
|
+
"њ" => "nj"
|
31
|
+
})
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Kebab
|
3
|
+
module Transliterator
|
4
|
+
class Ukrainian < Cyrillic
|
5
|
+
APPROXIMATIONS = {
|
6
|
+
"Г" => "H",
|
7
|
+
"г" => "h",
|
8
|
+
"Ґ" => "G",
|
9
|
+
"ґ" => "g",
|
10
|
+
"є" => "ie",
|
11
|
+
"И" => "Y",
|
12
|
+
"и" => "y",
|
13
|
+
"І" => "I",
|
14
|
+
"і" => "i",
|
15
|
+
"ї" => "i",
|
16
|
+
"Й" => "Y",
|
17
|
+
"й" => "i",
|
18
|
+
"Х" => "Kh",
|
19
|
+
"х" => "kh",
|
20
|
+
"Ц" => "Ts",
|
21
|
+
"ц" => 'ts',
|
22
|
+
"Щ" => "Shch",
|
23
|
+
"щ" => "shch",
|
24
|
+
"ю" => "iu",
|
25
|
+
"я" => "ia",
|
26
|
+
"'" => ""
|
27
|
+
}
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Kebab
|
3
|
+
module Transliterator
|
4
|
+
class Vietnamese < Latin
|
5
|
+
APPROXIMATIONS = {
|
6
|
+
"à" => "a",
|
7
|
+
"á" => "a",
|
8
|
+
"ạ" => "a",
|
9
|
+
"ả" => "a",
|
10
|
+
"ã" => "a",
|
11
|
+
"â" => "a",
|
12
|
+
"ầ" => "a",
|
13
|
+
"ấ" => "a",
|
14
|
+
"ậ" => "a",
|
15
|
+
"ẩ" => "a",
|
16
|
+
"ẫ" => "a",
|
17
|
+
"ă" => "a",
|
18
|
+
"ằ" => "a",
|
19
|
+
"ắ" => "a",
|
20
|
+
"ặ" => "a",
|
21
|
+
"ẳ" => "a",
|
22
|
+
"ẵ" => "a",
|
23
|
+
"À" => "A",
|
24
|
+
"Á" => "A",
|
25
|
+
"Ạ" => "A",
|
26
|
+
"Ả" => "A",
|
27
|
+
"Ã" => "A",
|
28
|
+
"Â" => "A",
|
29
|
+
"Ầ" => "A",
|
30
|
+
"Ấ" => "A",
|
31
|
+
"Ậ" => "A",
|
32
|
+
"Ẩ" => "A",
|
33
|
+
"Ẫ" => "A",
|
34
|
+
"Ă" => "A",
|
35
|
+
"Ằ" => "A",
|
36
|
+
"Ắ" => "A",
|
37
|
+
"Ặ" => "A",
|
38
|
+
"Ẳ" => "A",
|
39
|
+
"Ẵ" => "A",
|
40
|
+
"ì" => "i",
|
41
|
+
"í" => "i",
|
42
|
+
"ị" => "i",
|
43
|
+
"ỉ" => "i",
|
44
|
+
"ĩ" => "i",
|
45
|
+
"Ì" => "I",
|
46
|
+
"Í" => "I",
|
47
|
+
"Ị" => "I",
|
48
|
+
"Ỉ" => "I",
|
49
|
+
"Ĩ" => "I",
|
50
|
+
"ù" => "u",
|
51
|
+
"ú" => "u",
|
52
|
+
"ụ" => "u",
|
53
|
+
"ủ" => "u",
|
54
|
+
"ũ" => "u",
|
55
|
+
"ư" => "u",
|
56
|
+
"ừ" => "u",
|
57
|
+
"ứ" => "u",
|
58
|
+
"ự" => "u",
|
59
|
+
"ử" => "u",
|
60
|
+
"ữ" => "u",
|
61
|
+
"Ù" => "U",
|
62
|
+
"Ú" => "U",
|
63
|
+
"Ụ" => "U",
|
64
|
+
"Ủ" => "U",
|
65
|
+
"Ũ" => "U",
|
66
|
+
"Ư" => "U",
|
67
|
+
"Ừ" => "U",
|
68
|
+
"Ứ" => "U",
|
69
|
+
"Ự" => "U",
|
70
|
+
"Ử" => "U",
|
71
|
+
"Ữ" => "U",
|
72
|
+
"è" => "e",
|
73
|
+
"é" => "e",
|
74
|
+
"ẹ" => "e",
|
75
|
+
"ẻ" => "e",
|
76
|
+
"ẽ" => "e",
|
77
|
+
"ê" => "e",
|
78
|
+
"ề" => "e",
|
79
|
+
"ế" => "e",
|
80
|
+
"ệ" => "e",
|
81
|
+
"ể" => "e",
|
82
|
+
"ễ" => "e",
|
83
|
+
"È" => "E",
|
84
|
+
"É" => "E",
|
85
|
+
"Ẹ" => "E",
|
86
|
+
"Ẻ" => "E",
|
87
|
+
"Ẽ" => "E",
|
88
|
+
"Ê" => "E",
|
89
|
+
"Ề" => "E",
|
90
|
+
"Ế" => "E",
|
91
|
+
"Ệ" => "E",
|
92
|
+
"Ể" => "E",
|
93
|
+
"Ễ" => "E",
|
94
|
+
"ò" => "o",
|
95
|
+
"ó" => "o",
|
96
|
+
"ọ" => "o",
|
97
|
+
"ỏ" => "o",
|
98
|
+
"õ" => "o",
|
99
|
+
"ô" => "o",
|
100
|
+
"ồ" => "o",
|
101
|
+
"ố" => "o",
|
102
|
+
"ộ" => "o",
|
103
|
+
"ổ" => "o",
|
104
|
+
"ỗ" => "o",
|
105
|
+
"ơ" => "o",
|
106
|
+
"ờ" => "o",
|
107
|
+
"ớ" => "o",
|
108
|
+
"ợ" => "o",
|
109
|
+
"ở" => "o",
|
110
|
+
"ỡ" => "o",
|
111
|
+
"Ò" => "O",
|
112
|
+
"Ó" => "O",
|
113
|
+
"Ọ" => "O",
|
114
|
+
"Ỏ" => "O",
|
115
|
+
"Õ" => "O",
|
116
|
+
"Ô" => "O",
|
117
|
+
"Ồ" => "O",
|
118
|
+
"Ố" => "O",
|
119
|
+
"Ộ" => "O",
|
120
|
+
"Ổ" => "O",
|
121
|
+
"Ỗ" => "O",
|
122
|
+
"Ơ" => "O",
|
123
|
+
"Ờ" => "O",
|
124
|
+
"Ớ" => "O",
|
125
|
+
"Ợ" => "O",
|
126
|
+
"Ở" => "O",
|
127
|
+
"Ỡ" => "O",
|
128
|
+
"ỳ" => "y",
|
129
|
+
"ý" => "y",
|
130
|
+
"ỵ" => "y",
|
131
|
+
"ỷ" => "y",
|
132
|
+
"ỹ" => "y",
|
133
|
+
"Ỳ" => "Y",
|
134
|
+
"Ý" => "Y",
|
135
|
+
"Ỵ" => "Y",
|
136
|
+
"Ỷ" => "Y",
|
137
|
+
"Ỹ" => "Y",
|
138
|
+
"đ" => "d",
|
139
|
+
"Đ" => "D"
|
140
|
+
}
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|