kebab 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gemtest +0 -0
  3. data/Changelog.md +99 -0
  4. data/MIT-LICENSE +19 -0
  5. data/README.md +26 -0
  6. data/Rakefile +34 -0
  7. data/lib/kebab.rb +18 -0
  8. data/lib/kebab/identifier.rb +294 -0
  9. data/lib/kebab/transliterator/base.rb +110 -0
  10. data/lib/kebab/transliterator/bulgarian.rb +27 -0
  11. data/lib/kebab/transliterator/cyrillic.rb +108 -0
  12. data/lib/kebab/transliterator/danish.rb +15 -0
  13. data/lib/kebab/transliterator/german.rb +15 -0
  14. data/lib/kebab/transliterator/greek.rb +77 -0
  15. data/lib/kebab/transliterator/hindi.rb +137 -0
  16. data/lib/kebab/transliterator/latin.rb +199 -0
  17. data/lib/kebab/transliterator/macedonian.rb +29 -0
  18. data/lib/kebab/transliterator/norwegian.rb +14 -0
  19. data/lib/kebab/transliterator/romanian.rb +13 -0
  20. data/lib/kebab/transliterator/russian.rb +22 -0
  21. data/lib/kebab/transliterator/serbian.rb +34 -0
  22. data/lib/kebab/transliterator/spanish.rb +9 -0
  23. data/lib/kebab/transliterator/swedish.rb +16 -0
  24. data/lib/kebab/transliterator/turkish.rb +8 -0
  25. data/lib/kebab/transliterator/ukrainian.rb +30 -0
  26. data/lib/kebab/transliterator/vietnamese.rb +143 -0
  27. data/lib/kebab/utf8/active_support_proxy.rb +26 -0
  28. data/lib/kebab/utf8/dumb_proxy.rb +49 -0
  29. data/lib/kebab/utf8/java_proxy.rb +22 -0
  30. data/lib/kebab/utf8/mappings.rb +193 -0
  31. data/lib/kebab/utf8/proxy.rb +125 -0
  32. data/lib/kebab/utf8/unicode_proxy.rb +23 -0
  33. data/lib/kebab/version.rb +5 -0
  34. data/spec/kebab_spec.rb +155 -0
  35. data/spec/spec_helper.rb +45 -0
  36. data/spec/transliterators/base_spec.rb +16 -0
  37. data/spec/transliterators/bulgarian_spec.rb +20 -0
  38. data/spec/transliterators/danish_spec.rb +17 -0
  39. data/spec/transliterators/german_spec.rb +17 -0
  40. data/spec/transliterators/greek_spec.rb +17 -0
  41. data/spec/transliterators/hindi_spec.rb +17 -0
  42. data/spec/transliterators/latin_spec.rb +9 -0
  43. data/spec/transliterators/macedonian_spec.rb +9 -0
  44. data/spec/transliterators/norwegian_spec.rb +18 -0
  45. data/spec/transliterators/polish_spec.rb +14 -0
  46. data/spec/transliterators/romanian_spec.rb +19 -0
  47. data/spec/transliterators/russian_spec.rb +9 -0
  48. data/spec/transliterators/serbian_spec.rb +25 -0
  49. data/spec/transliterators/spanish_spec.rb +13 -0
  50. data/spec/transliterators/swedish_spec.rb +18 -0
  51. data/spec/transliterators/turkish_spec.rb +24 -0
  52. data/spec/transliterators/ukrainian_spec.rb +88 -0
  53. data/spec/transliterators/vietnamese_spec.rb +18 -0
  54. data/spec/utf8_proxy_spec.rb +53 -0
  55. metadata +167 -0
@@ -0,0 +1,199 @@
1
+ # encoding: utf-8
2
+ module Kebab
3
+ module Transliterator
4
+ class Latin < Base
5
+
6
+ APPROXIMATIONS = {
7
+ "À" => "A",
8
+ "Á" => "A",
9
+ "Â" => "A",
10
+ "Ã" => "A",
11
+ "Ä" => "A",
12
+ "Å" => "A",
13
+ "Æ" => "Ae",
14
+ "Ç" => "C",
15
+ "È" => "E",
16
+ "É" => "E",
17
+ "Ê" => "E",
18
+ "Ë" => "E",
19
+ "Ì" => "I",
20
+ "Í" => "I",
21
+ "Î" => "I",
22
+ "Ï" => "I",
23
+ "Ð" => "D",
24
+ "Ñ" => "N",
25
+ "Ò" => "O",
26
+ "Ó" => "O",
27
+ "Ô" => "O",
28
+ "Õ" => "O",
29
+ "Ö" => "O",
30
+ "Ø" => "O",
31
+ "Ù" => "U",
32
+ "Ú" => "U",
33
+ "Û" => "U",
34
+ "Ü" => "U",
35
+ "Ý" => "Y",
36
+ "Þ" => "Th",
37
+ "ß" => "ss",
38
+ "à" => "a" ,
39
+ "á" => "a",
40
+ "â" => "a",
41
+ "ã" => "a",
42
+ "ä" => "a",
43
+ "å" => "a",
44
+ "æ" => "ae",
45
+ "ç" => "c" ,
46
+ "è" => "e",
47
+ "é" => "e",
48
+ "ê" => "e",
49
+ "ë" => "e",
50
+ "ì" => "i",
51
+ "í" => "i",
52
+ "î" => "i",
53
+ "ï" => "i",
54
+ "ð" => "d",
55
+ "ñ" => "n",
56
+ "ò" => "o",
57
+ "ó" => "o",
58
+ "ô" => "o",
59
+ "õ" => "o",
60
+ "ö" => "o",
61
+ "ø" => "o",
62
+ "ù" => "u",
63
+ "ú" => "u",
64
+ "û" => "u",
65
+ "ü" => "u",
66
+ "ý" => "y",
67
+ "þ" => "th",
68
+ "ÿ" => "y",
69
+ "Ā" => "A",
70
+ "Ă" => "A",
71
+ "Ą" => "A",
72
+ "Ć" => "C",
73
+ "Ĉ" => "C",
74
+ "Ċ" => "C",
75
+ "Č" => "C",
76
+ "Ď" => "D",
77
+ "Đ" => "D",
78
+ "Ē" => "E",
79
+ "Ĕ" => "E",
80
+ "Ė" => "E",
81
+ "Ę" => "E",
82
+ "Ě" => "E",
83
+ "Ĝ" => "G",
84
+ "Ğ" => "G",
85
+ "Ġ" => "G",
86
+ "Ģ" => "G",
87
+ "Ĥ" => "H",
88
+ "Ħ" => "H",
89
+ "Ĩ" => "I",
90
+ "Ī" => "I",
91
+ "Ĭ" => "I",
92
+ "Į" => "I",
93
+ "İ" => "I",
94
+ "IJ" => "Ij",
95
+ "Ĵ" => "J",
96
+ "Ķ" => "K",
97
+ "Ĺ" => "L",
98
+ "Ļ" => "L",
99
+ "Ľ" => "L",
100
+ "Ŀ" => "L",
101
+ "Ł" => "L",
102
+ "Ń" => "N",
103
+ "Ņ" => "N",
104
+ "Ň" => "N",
105
+ "Ŋ" => "Ng",
106
+ "Ō" => "O",
107
+ "Ŏ" => "O",
108
+ "Ő" => "O",
109
+ "Œ" => "OE",
110
+ "Ŕ" => "R",
111
+ "Ŗ" => "R",
112
+ "Ř" => "R",
113
+ "Ś" => "S",
114
+ "Ŝ" => "S",
115
+ "Ş" => "S",
116
+ "Š" => "S",
117
+ "Ţ" => "T",
118
+ "Ť" => "T",
119
+ "Ŧ" => "T",
120
+ "Ũ" => "U",
121
+ "Ū" => "U",
122
+ "Ŭ" => "U",
123
+ "Ů" => "U",
124
+ "Ű" => "U",
125
+ "Ų" => "U",
126
+ "Ŵ" => "W",
127
+ "Ŷ" => "Y",
128
+ "Ÿ" => "Y",
129
+ "Ź" => "Z",
130
+ "Ż" => "Z",
131
+ "Ž" => "Z",
132
+ "ā" => "a",
133
+ "ă" => "a",
134
+ "ą" => "a",
135
+ "ć" => "c",
136
+ "ĉ" => "c",
137
+ "ċ" => "c",
138
+ "č" => "c",
139
+ "ď" => "d",
140
+ "đ" => "d",
141
+ "ē" => "e",
142
+ "ĕ" => "e",
143
+ "ė" => "e",
144
+ "ę" => "e",
145
+ "ě" => "e",
146
+ "ĝ" => "g",
147
+ "ğ" => "g",
148
+ "ġ" => "g",
149
+ "ģ" => "g",
150
+ "ĥ" => "h",
151
+ "ħ" => "h",
152
+ "ĩ" => "i",
153
+ "ī" => "i",
154
+ "ĭ" => "i",
155
+ "į" => "i",
156
+ "ı" => "i",
157
+ "ij" => "ij",
158
+ "ĵ" => "j",
159
+ "ķ" => "k",
160
+ "ĸ" => "k",
161
+ "ĺ" => "l",
162
+ "ļ" => "l",
163
+ "ľ" => "l",
164
+ "ŀ" => "l",
165
+ "ł" => "l",
166
+ "ń" => "n",
167
+ "ņ" => "n",
168
+ "ň" => "n",
169
+ "ʼn" => "n",
170
+ "ŋ" => "ng",
171
+ "ō" => "o",
172
+ "ŏ" => "o",
173
+ "ő" => "o",
174
+ "œ" => "oe",
175
+ "ŕ" => "r",
176
+ "ŗ" => "r",
177
+ "ř" => "r",
178
+ "ś" => "s",
179
+ "ŝ" => "s",
180
+ "ş" => "s",
181
+ "š" => "s",
182
+ "ţ" => "t",
183
+ "ť" => "t",
184
+ "ŧ" => "t",
185
+ "ũ" => "u",
186
+ "ū" => "u",
187
+ "ŭ" => "u",
188
+ "ů" => "u",
189
+ "ű" => "u",
190
+ "ų" => "u",
191
+ "ŵ" => "w",
192
+ "ŷ" => "y",
193
+ "ž" => "z",
194
+ "ź" => "z",
195
+ "ż" => "z"
196
+ }
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,29 @@
1
+ # encoding: utf-8
2
+ module Kebab
3
+ module Transliterator
4
+ class Macedonian < Cyrillic
5
+ APPROXIMATIONS = {
6
+ "Ѓ" => "Gj",
7
+ "Љ" => "Lj",
8
+ "Њ" => "Nj",
9
+ "Ќ" => "Kj",
10
+ "Џ" => "Dzh",
11
+ "Ж" => "Zh",
12
+ "Ц" => "C",
13
+ "Ѕ" => "Z",
14
+ "Ј" => "J",
15
+ "Х" => "H",
16
+ "ѓ" => "gj",
17
+ "љ" => "lj",
18
+ "њ" => "nj",
19
+ "ќ" => "kj",
20
+ "џ" => "dzh",
21
+ "ж" => "zh",
22
+ "ц" => "c",
23
+ "ѕ" => "z",
24
+ "ј" => "j",
25
+ "х" => "h"
26
+ }
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+ module Kebab
3
+ module Transliterator
4
+ class Norwegian < Latin
5
+ APPROXIMATIONS = {
6
+ "ø" => "oe",
7
+ "å" => "aa",
8
+ "Ø" => "Oe",
9
+ "Å" => "Aa"
10
+ }
11
+ end
12
+ end
13
+ end
14
+
@@ -0,0 +1,13 @@
1
+ # encoding: utf-8
2
+ module Kebab
3
+ module Transliterator
4
+ class Romanian < Latin
5
+ APPROXIMATIONS = {
6
+ "ș" => "s",
7
+ "ț" => "t",
8
+ "Ș" => "S",
9
+ "Ț" => "T"
10
+ }
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,22 @@
1
+ # encoding: utf-8
2
+ module Kebab
3
+ module Transliterator
4
+ class Russian < Cyrillic
5
+ APPROXIMATIONS = {
6
+ "Й" => "I",
7
+ "М" => "M",
8
+ "Х" => "H",
9
+ "Ц" => "Ts",
10
+ "Ш" => "Sh",
11
+ "Щ" => "Sch",
12
+ "Ю" => "U",
13
+ "Я" => "Ya",
14
+ "й" => "i",
15
+ "х" => "h",
16
+ "ц" => "ts",
17
+ "щ" => "sch",
18
+ "ю" => "u"
19
+ }
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+
3
+ module Kebab
4
+ module Transliterator
5
+ class Serbian < Latin
6
+ APPROXIMATIONS = Cyrillic.const_get(:APPROXIMATIONS).merge({
7
+ "Ð" => "Dj",
8
+ "Č" => "Ch",
9
+ "Š" => "Sh",
10
+ "č" => "ch",
11
+ "đ" => "dj",
12
+ "š" => "sh",
13
+ "Ћ" => "C",
14
+ "Ц" => "C",
15
+ "Ч" => "Ch",
16
+ "Ђ" => "Dj",
17
+ "Џ" => "Dz",
18
+ "Х" => "H",
19
+ "Ј" => "J",
20
+ "Љ" => "Lj",
21
+ "Њ" => "Nj",
22
+ "ц" => "c",
23
+ "ћ" => "c",
24
+ "ч" => "ch",
25
+ "ђ" => "dj",
26
+ "џ" => "dz",
27
+ "х" => "h",
28
+ "ј" => "j",
29
+ "љ" => "lj",
30
+ "њ" => "nj"
31
+ })
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,9 @@
1
+ # encoding: utf-8
2
+
3
+ module Kebab
4
+ module Transliterator
5
+ class Spanish < Latin
6
+ APPROXIMATIONS = {"ñ" => "ni", "Ñ" => "Ni"}
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,16 @@
1
+ # encoding: utf-8
2
+ module Kebab
3
+ module Transliterator
4
+ class Swedish < Latin
5
+ APPROXIMATIONS = {
6
+ "å" => "aa",
7
+ "ä" => "ae",
8
+ "ö" => "oe",
9
+ "Å" => "Aa",
10
+ "Ä" => "Ae",
11
+ "Ö" => "Oe"
12
+ }
13
+ end
14
+ end
15
+ end
16
+
@@ -0,0 +1,8 @@
1
+ # encoding: utf-8
2
+
3
+ module Kebab
4
+ module Transliterator
5
+ class Turkish < Latin
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,30 @@
1
+ # encoding: utf-8
2
+ module Kebab
3
+ module Transliterator
4
+ class Ukrainian < Cyrillic
5
+ APPROXIMATIONS = {
6
+ "Г" => "H",
7
+ "г" => "h",
8
+ "Ґ" => "G",
9
+ "ґ" => "g",
10
+ "є" => "ie",
11
+ "И" => "Y",
12
+ "и" => "y",
13
+ "І" => "I",
14
+ "і" => "i",
15
+ "ї" => "i",
16
+ "Й" => "Y",
17
+ "й" => "i",
18
+ "Х" => "Kh",
19
+ "х" => "kh",
20
+ "Ц" => "Ts",
21
+ "ц" => 'ts',
22
+ "Щ" => "Shch",
23
+ "щ" => "shch",
24
+ "ю" => "iu",
25
+ "я" => "ia",
26
+ "'" => ""
27
+ }
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,143 @@
1
+ # encoding: utf-8
2
+ module Kebab
3
+ module Transliterator
4
+ class Vietnamese < Latin
5
+ APPROXIMATIONS = {
6
+ "à" => "a",
7
+ "á" => "a",
8
+ "ạ" => "a",
9
+ "ả" => "a",
10
+ "ã" => "a",
11
+ "â" => "a",
12
+ "ầ" => "a",
13
+ "ấ" => "a",
14
+ "ậ" => "a",
15
+ "ẩ" => "a",
16
+ "ẫ" => "a",
17
+ "ă" => "a",
18
+ "ằ" => "a",
19
+ "ắ" => "a",
20
+ "ặ" => "a",
21
+ "ẳ" => "a",
22
+ "ẵ" => "a",
23
+ "À" => "A",
24
+ "Á" => "A",
25
+ "Ạ" => "A",
26
+ "Ả" => "A",
27
+ "Ã" => "A",
28
+ "Â" => "A",
29
+ "Ầ" => "A",
30
+ "Ấ" => "A",
31
+ "Ậ" => "A",
32
+ "Ẩ" => "A",
33
+ "Ẫ" => "A",
34
+ "Ă" => "A",
35
+ "Ằ" => "A",
36
+ "Ắ" => "A",
37
+ "Ặ" => "A",
38
+ "Ẳ" => "A",
39
+ "Ẵ" => "A",
40
+ "ì" => "i",
41
+ "í" => "i",
42
+ "ị" => "i",
43
+ "ỉ" => "i",
44
+ "ĩ" => "i",
45
+ "Ì" => "I",
46
+ "Í" => "I",
47
+ "Ị" => "I",
48
+ "Ỉ" => "I",
49
+ "Ĩ" => "I",
50
+ "ù" => "u",
51
+ "ú" => "u",
52
+ "ụ" => "u",
53
+ "ủ" => "u",
54
+ "ũ" => "u",
55
+ "ư" => "u",
56
+ "ừ" => "u",
57
+ "ứ" => "u",
58
+ "ự" => "u",
59
+ "ử" => "u",
60
+ "ữ" => "u",
61
+ "Ù" => "U",
62
+ "Ú" => "U",
63
+ "Ụ" => "U",
64
+ "Ủ" => "U",
65
+ "Ũ" => "U",
66
+ "Ư" => "U",
67
+ "Ừ" => "U",
68
+ "Ứ" => "U",
69
+ "Ự" => "U",
70
+ "Ử" => "U",
71
+ "Ữ" => "U",
72
+ "è" => "e",
73
+ "é" => "e",
74
+ "ẹ" => "e",
75
+ "ẻ" => "e",
76
+ "ẽ" => "e",
77
+ "ê" => "e",
78
+ "ề" => "e",
79
+ "ế" => "e",
80
+ "ệ" => "e",
81
+ "ể" => "e",
82
+ "ễ" => "e",
83
+ "È" => "E",
84
+ "É" => "E",
85
+ "Ẹ" => "E",
86
+ "Ẻ" => "E",
87
+ "Ẽ" => "E",
88
+ "Ê" => "E",
89
+ "Ề" => "E",
90
+ "Ế" => "E",
91
+ "Ệ" => "E",
92
+ "Ể" => "E",
93
+ "Ễ" => "E",
94
+ "ò" => "o",
95
+ "ó" => "o",
96
+ "ọ" => "o",
97
+ "ỏ" => "o",
98
+ "õ" => "o",
99
+ "ô" => "o",
100
+ "ồ" => "o",
101
+ "ố" => "o",
102
+ "ộ" => "o",
103
+ "ổ" => "o",
104
+ "ỗ" => "o",
105
+ "ơ" => "o",
106
+ "ờ" => "o",
107
+ "ớ" => "o",
108
+ "ợ" => "o",
109
+ "ở" => "o",
110
+ "ỡ" => "o",
111
+ "Ò" => "O",
112
+ "Ó" => "O",
113
+ "Ọ" => "O",
114
+ "Ỏ" => "O",
115
+ "Õ" => "O",
116
+ "Ô" => "O",
117
+ "Ồ" => "O",
118
+ "Ố" => "O",
119
+ "Ộ" => "O",
120
+ "Ổ" => "O",
121
+ "Ỗ" => "O",
122
+ "Ơ" => "O",
123
+ "Ờ" => "O",
124
+ "Ớ" => "O",
125
+ "Ợ" => "O",
126
+ "Ở" => "O",
127
+ "Ỡ" => "O",
128
+ "ỳ" => "y",
129
+ "ý" => "y",
130
+ "ỵ" => "y",
131
+ "ỷ" => "y",
132
+ "ỹ" => "y",
133
+ "Ỳ" => "Y",
134
+ "Ý" => "Y",
135
+ "Ỵ" => "Y",
136
+ "Ỷ" => "Y",
137
+ "Ỹ" => "Y",
138
+ "đ" => "d",
139
+ "Đ" => "D"
140
+ }
141
+ end
142
+ end
143
+ end