babosa 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+ module Babosa
3
+ module Transliterator
4
+ class Danish < Latin
5
+ APPROXIMATIONS = {
6
+ "æ" => "ae",
7
+ "ø" => "oe",
8
+ "å" => "aa",
9
+ "Ø" => "Oe",
10
+ "Å" => "Aa"
11
+ }
12
+ end
13
+ end
14
+ end
15
+
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+ module Babosa
3
+ module Transliterator
4
+ class German < Latin
5
+ APPROXIMATIONS = {
6
+ "ä" => "ae",
7
+ "ö" => "oe",
8
+ "ü" => "ue",
9
+ "Ä" => "Ae",
10
+ "Ö" => "Oe",
11
+ "Ü" => "Ue"
12
+ }
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,199 @@
1
+ # encoding: utf-8
2
+ module Babosa
3
+ module Transliterator
4
+ class Latin < Base
5
+
6
+ APPROXIMATIONS = {
7
+ "À" => "A",
8
+ "Á" => "A",
9
+ "Â" => "A",
10
+ "Ã" => "A",
11
+ "Ä" => "A",
12
+ "Å" => "A",
13
+ "Æ" => "Ae",
14
+ "Ç" => "C",
15
+ "È" => "E",
16
+ "É" => "E",
17
+ "Ê" => "E",
18
+ "Ë" => "E",
19
+ "Ì" => "I",
20
+ "Í" => "I",
21
+ "Î" => "I",
22
+ "Ï" => "I",
23
+ "Ð" => "D",
24
+ "Ñ" => "N",
25
+ "Ò" => "O",
26
+ "Ó" => "O",
27
+ "Ô" => "O",
28
+ "Õ" => "O",
29
+ "Ö" => "O",
30
+ "Ø" => "O",
31
+ "Ù" => "U",
32
+ "Ú" => "U",
33
+ "Û" => "U",
34
+ "Ü" => "U",
35
+ "Ý" => "Y",
36
+ "Þ" => "Th",
37
+ "ß" => "ss",
38
+ "à" => "a" ,
39
+ "á" => "a",
40
+ "â" => "a",
41
+ "ã" => "a",
42
+ "ä" => "a",
43
+ "å" => "a",
44
+ "æ" => "ae",
45
+ "ç" => "c" ,
46
+ "è" => "e",
47
+ "é" => "e",
48
+ "ê" => "e",
49
+ "ë" => "e",
50
+ "ì" => "i",
51
+ "í" => "i",
52
+ "î" => "i",
53
+ "ï" => "i",
54
+ "ð" => "d",
55
+ "ñ" => "n",
56
+ "ò" => "o",
57
+ "ó" => "o",
58
+ "ô" => "o",
59
+ "õ" => "o",
60
+ "ö" => "o",
61
+ "ø" => "o",
62
+ "ù" => "u",
63
+ "ú" => "u",
64
+ "û" => "u",
65
+ "ü" => "u",
66
+ "ý" => "y",
67
+ "þ" => "th",
68
+ "ÿ" => "y",
69
+ "Ā" => "A",
70
+ "Ă" => "A",
71
+ "Ą" => "A",
72
+ "Ć" => "C",
73
+ "Ĉ" => "C",
74
+ "Ċ" => "C",
75
+ "Č" => "C",
76
+ "Ď" => "D",
77
+ "Đ" => "D",
78
+ "Ē" => "E",
79
+ "Ĕ" => "E",
80
+ "Ė" => "E",
81
+ "Ę" => "E",
82
+ "Ě" => "E",
83
+ "Ĝ" => "G",
84
+ "Ğ" => "G",
85
+ "Ġ" => "G",
86
+ "Ģ" => "G",
87
+ "Ĥ" => "H",
88
+ "Ħ" => "H",
89
+ "Ĩ" => "I",
90
+ "Ī" => "I",
91
+ "Ĭ" => "I",
92
+ "Į" => "I",
93
+ "İ" => "I",
94
+ "IJ" => "Ij",
95
+ "Ĵ" => "J",
96
+ "Ķ" => "K",
97
+ "Ĺ" => "L",
98
+ "Ļ" => "L",
99
+ "Ľ" => "L",
100
+ "Ŀ" => "L",
101
+ "Ł" => "L",
102
+ "Ń" => "N",
103
+ "Ņ" => "N",
104
+ "Ň" => "N",
105
+ "Ŋ" => "Ng",
106
+ "Ō" => "O",
107
+ "Ŏ" => "O",
108
+ "Ő" => "O",
109
+ "Œ" => "OE",
110
+ "Ŕ" => "R",
111
+ "Ŗ" => "R",
112
+ "Ř" => "R",
113
+ "Ś" => "S",
114
+ "Ŝ" => "S",
115
+ "Ş" => "S",
116
+ "Š" => "S",
117
+ "Ţ" => "T",
118
+ "Ť" => "T",
119
+ "Ŧ" => "T",
120
+ "Ũ" => "U",
121
+ "Ū" => "U",
122
+ "Ŭ" => "U",
123
+ "Ů" => "U",
124
+ "Ű" => "U",
125
+ "Ų" => "U",
126
+ "Ŵ" => "W",
127
+ "Ŷ" => "Y",
128
+ "Ÿ" => "Y",
129
+ "Ź" => "Z",
130
+ "Ż" => "Z",
131
+ "Ž" => "Z",
132
+ "ā" => "a",
133
+ "ă" => "a",
134
+ "ą" => "a",
135
+ "ć" => "c",
136
+ "ĉ" => "c",
137
+ "ċ" => "c",
138
+ "č" => "c",
139
+ "ď" => "d",
140
+ "đ" => "d",
141
+ "ē" => "e",
142
+ "ĕ" => "e",
143
+ "ė" => "e",
144
+ "ę" => "e",
145
+ "ě" => "e",
146
+ "ĝ" => "g",
147
+ "ğ" => "g",
148
+ "ġ" => "g",
149
+ "ģ" => "g",
150
+ "ĥ" => "h",
151
+ "ħ" => "h",
152
+ "ĩ" => "i",
153
+ "ī" => "i",
154
+ "ĭ" => "i",
155
+ "į" => "i",
156
+ "ı" => "i",
157
+ "ij" => "ij",
158
+ "ĵ" => "j",
159
+ "ķ" => "k",
160
+ "ĸ" => "k",
161
+ "ĺ" => "l",
162
+ "ļ" => "l",
163
+ "ľ" => "l",
164
+ "ŀ" => "l",
165
+ "ł" => "l",
166
+ "ń" => "n",
167
+ "ņ" => "n",
168
+ "ň" => "n",
169
+ "ʼn" => "n",
170
+ "ŋ" => "ng",
171
+ "ō" => "o",
172
+ "ŏ" => "o",
173
+ "ő" => "o",
174
+ "œ" => "oe",
175
+ "ŕ" => "r",
176
+ "ŗ" => "r",
177
+ "ř" => "r",
178
+ "ś" => "s",
179
+ "ŝ" => "s",
180
+ "ş" => "s",
181
+ "š" => "s",
182
+ "ţ" => "t",
183
+ "ť" => "t",
184
+ "ŧ" => "t",
185
+ "ũ" => "u",
186
+ "ū" => "u",
187
+ "ŭ" => "u",
188
+ "ů" => "u",
189
+ "ű" => "u",
190
+ "ų" => "u",
191
+ "ŵ" => "w",
192
+ "ŷ" => "y",
193
+ "ž" => "z",
194
+ "ź" => "z",
195
+ "ż" => "z"
196
+ }
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,22 @@
1
+ # encoding: utf-8
2
+ module Babosa
3
+ module Transliterator
4
+ class Russian < Cyrillic
5
+ APPROXIMATIONS = {
6
+ "Й" => "I",
7
+ "М" => "M",
8
+ "Х" => "H",
9
+ "Ц" => "Ts",
10
+ "Ш" => "Sh",
11
+ "Щ" => "Sch",
12
+ "Ю" => "U",
13
+ "Я" => "Ya",
14
+ "й" => "i",
15
+ "х" => "h",
16
+ "ц" => "ts",
17
+ "щ" => "sch",
18
+ "ю" => "u"
19
+ }
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+
3
+ module Babosa
4
+ module Transliterator
5
+ class Serbian < Latin
6
+ APPROXIMATIONS = Cyrillic.const_get(:APPROXIMATIONS).merge({
7
+ "Ð" => "Dj",
8
+ "Č" => "Ch",
9
+ "Š" => "Sh",
10
+ "č" => "ch",
11
+ "đ" => "dj",
12
+ "š" => "sh",
13
+ "Ћ" => "C",
14
+ "Ц" => "C",
15
+ "Ч" => "Ch",
16
+ "Ђ" => "Dj",
17
+ "Џ" => "Dz",
18
+ "Х" => "H",
19
+ "Ј" => "J",
20
+ "Љ" => "Lj",
21
+ "Њ" => "Nj",
22
+ "ц" => "c",
23
+ "ћ" => "c",
24
+ "ч" => "ch",
25
+ "ђ" => "dj",
26
+ "џ" => "dz",
27
+ "х" => "h",
28
+ "ј" => "j",
29
+ "љ" => "lj",
30
+ "њ" => "nj"
31
+ })
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,9 @@
1
+ # encoding: utf-8
2
+
3
+ module Babosa
4
+ module Transliterator
5
+ class Spanish < Latin
6
+ APPROXIMATIONS = {"ñ" => "ni", "Ñ" => "Ni"}
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+ module Babosa
3
+ module Transliterator
4
+ class Ukranian < Cyrillic
5
+ APPROXIMATIONS = {
6
+ "И" => "Y",
7
+ "и" => "y",
8
+ }
9
+ end
10
+ end
11
+ end
@@ -1,4 +1,5 @@
1
1
  require File.expand_path("../mappings", __FILE__)
2
+
2
3
  module Babosa
3
4
  module UTF8
4
5
 
@@ -1,5 +1,5 @@
1
1
  module Babosa
2
2
  module Version
3
- STRING = "0.2.2"
3
+ STRING = "0.3.0"
4
4
  end
5
5
  end
@@ -0,0 +1,131 @@
1
+ # encoding: utf-8
2
+ require File.expand_path("../spec_helper", __FILE__)
3
+
4
+ describe Babosa::Identifier do
5
+
6
+ it "should respond_to :empty?" do
7
+ "".to_slug.should respond_to(:empty?)
8
+ end
9
+
10
+ %w[approximate_ascii clean downcase word_chars normalize to_ascii upcase with_dashes].each do |method|
11
+ describe "##{method}" do
12
+ it "should work with invalid UTF-8 strings" do
13
+ expect {"\x93abc".to_slug.send method}.not_to raise_exception
14
+ end
15
+ end
16
+ end
17
+
18
+ describe "#word_chars" do
19
+ it "word_chars! should leave only letters and spaces" do
20
+ string = "a*$%^$@!@b$%^&*()*!c"
21
+ string.to_slug.word_chars.should match(/[a-z ]*/i)
22
+ end
23
+ end
24
+
25
+ describe "#transliterate" do
26
+ it "should transliterate to ascii" do
27
+ slug = (0xC0..0x17E).to_a.each do |codepoint|
28
+ ss = [codepoint].pack("U*").to_slug
29
+ ss.approximate_ascii.should match(/[\x0-\x7f]/)
30
+ end
31
+ end
32
+
33
+ it "should transliterate uncomposed utf8" do
34
+ string = [117, 776].pack("U*") # "ü" as ASCII "u" plus COMBINING DIAERESIS
35
+ string.to_slug.approximate_ascii.should eql("u")
36
+ end
37
+ end
38
+
39
+ describe "#downcase" do
40
+ it "should lowercase strings" do
41
+ "FELIZ AÑO".to_slug.downcase.should eql("feliz año")
42
+ end
43
+ end
44
+
45
+ describe "#upcase" do
46
+ it "should uppercase strings" do
47
+ "feliz año".to_slug.upcase.should eql("FELIZ AÑO")
48
+ end
49
+ end
50
+
51
+ describe "#normalize" do
52
+ it "should replace whitespace with dashes" do
53
+ "a b".to_slug.clean.normalize.should eql("a-b")
54
+ end
55
+
56
+ it "should replace multiple spaces with 1 dash" do
57
+ "a b".to_slug.clean.normalize.should eql("a-b")
58
+ end
59
+
60
+ it "should replace multiple dashes with 1 dash" do
61
+ "male - female".to_slug.normalize.should eql("male-female")
62
+ end
63
+
64
+ it "should strip trailing space" do
65
+ "ab ".to_slug.normalize.should eql("ab")
66
+ end
67
+
68
+ it "should strip leading space" do
69
+ " ab".to_slug.normalize.should eql("ab")
70
+ end
71
+
72
+ it "should strip trailing slashes" do
73
+ "ab-".to_slug.normalize.should eql("ab")
74
+ end
75
+
76
+ it "should strip leading slashes" do
77
+ "-ab".to_slug.normalize.should eql("ab")
78
+ end
79
+
80
+ it "should not modify valid name strings" do
81
+ "a-b-c-d".to_slug.normalize.should eql("a-b-c-d")
82
+ end
83
+
84
+ it "should work with non roman chars" do
85
+ "検 索".to_slug.normalize.should eql("検-索")
86
+ end
87
+
88
+ context "with to_ascii option" do
89
+ it "should approximate and strip non ascii" do
90
+ ss = "カタカナ: katakana is über cool".to_slug
91
+ ss.normalize(:to_ascii => true).should eql("katakana-is-uber-cool")
92
+ end
93
+ end
94
+ end
95
+
96
+ describe "#truncate_bytes" do
97
+ it "should by byte length" do
98
+ "üa".to_slug.truncate_bytes(2).should eql("ü")
99
+ "üa".to_slug.truncate_bytes(1).should eql("")
100
+ "üa".to_slug.truncate_bytes(100).should eql("üa")
101
+ "üéøá".to_slug.truncate_bytes(3).should eql("ü")
102
+ end
103
+ end
104
+
105
+ describe "#truncate" do
106
+ it "should truncate by char length" do
107
+ "üa".to_slug.truncate(2).should eql("üa")
108
+ "üa".to_slug.truncate(1).should eql("ü")
109
+ "üa".to_slug.truncate(100).should eql("üa")
110
+ end
111
+ end
112
+
113
+ describe "#with_dashes" do
114
+ it "should not change byte size when replacing spaces" do
115
+ "".to_slug.with_dashes.bytesize.should eql(0)
116
+ " ".to_slug.with_dashes.bytesize.should eql(1)
117
+ "-abc-".to_slug.with_dashes.bytesize.should eql(5)
118
+ " abc ".to_slug.with_dashes.bytesize.should eql(5)
119
+ " a bc ".to_slug.with_dashes.bytesize.should eql(7)
120
+ end
121
+ end
122
+
123
+ describe "#to_ruby_method" do
124
+ it "should get a string suitable for use as a ruby method" do
125
+ "¿¿¿hello... world???".to_slug.to_ruby_method.should eql("hello_world?")
126
+ "カタカナ: katakana is über cool".to_slug.to_ruby_method.should eql("katakana_is_uber_cool")
127
+ "カタカナ: katakana is über cool!".to_slug.to_ruby_method.should eql("katakana_is_uber_cool!")
128
+ "カタカナ: katakana is über cool".to_slug.to_ruby_method(false).should eql("katakana_is_uber_cool")
129
+ end
130
+ end
131
+ end