babosa 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+ module Babosa
3
+ module Transliterator
4
+ class Danish < Latin
5
+ APPROXIMATIONS = {
6
+ "æ" => "ae",
7
+ "ø" => "oe",
8
+ "å" => "aa",
9
+ "Ø" => "Oe",
10
+ "Å" => "Aa"
11
+ }
12
+ end
13
+ end
14
+ end
15
+
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+ module Babosa
3
+ module Transliterator
4
+ class German < Latin
5
+ APPROXIMATIONS = {
6
+ "ä" => "ae",
7
+ "ö" => "oe",
8
+ "ü" => "ue",
9
+ "Ä" => "Ae",
10
+ "Ö" => "Oe",
11
+ "Ü" => "Ue"
12
+ }
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,199 @@
1
+ # encoding: utf-8
2
+ module Babosa
3
+ module Transliterator
4
+ class Latin < Base
5
+
6
+ APPROXIMATIONS = {
7
+ "À" => "A",
8
+ "Á" => "A",
9
+ "Â" => "A",
10
+ "Ã" => "A",
11
+ "Ä" => "A",
12
+ "Å" => "A",
13
+ "Æ" => "Ae",
14
+ "Ç" => "C",
15
+ "È" => "E",
16
+ "É" => "E",
17
+ "Ê" => "E",
18
+ "Ë" => "E",
19
+ "Ì" => "I",
20
+ "Í" => "I",
21
+ "Î" => "I",
22
+ "Ï" => "I",
23
+ "Ð" => "D",
24
+ "Ñ" => "N",
25
+ "Ò" => "O",
26
+ "Ó" => "O",
27
+ "Ô" => "O",
28
+ "Õ" => "O",
29
+ "Ö" => "O",
30
+ "Ø" => "O",
31
+ "Ù" => "U",
32
+ "Ú" => "U",
33
+ "Û" => "U",
34
+ "Ü" => "U",
35
+ "Ý" => "Y",
36
+ "Þ" => "Th",
37
+ "ß" => "ss",
38
+ "à" => "a" ,
39
+ "á" => "a",
40
+ "â" => "a",
41
+ "ã" => "a",
42
+ "ä" => "a",
43
+ "å" => "a",
44
+ "æ" => "ae",
45
+ "ç" => "c" ,
46
+ "è" => "e",
47
+ "é" => "e",
48
+ "ê" => "e",
49
+ "ë" => "e",
50
+ "ì" => "i",
51
+ "í" => "i",
52
+ "î" => "i",
53
+ "ï" => "i",
54
+ "ð" => "d",
55
+ "ñ" => "n",
56
+ "ò" => "o",
57
+ "ó" => "o",
58
+ "ô" => "o",
59
+ "õ" => "o",
60
+ "ö" => "o",
61
+ "ø" => "o",
62
+ "ù" => "u",
63
+ "ú" => "u",
64
+ "û" => "u",
65
+ "ü" => "u",
66
+ "ý" => "y",
67
+ "þ" => "th",
68
+ "ÿ" => "y",
69
+ "Ā" => "A",
70
+ "Ă" => "A",
71
+ "Ą" => "A",
72
+ "Ć" => "C",
73
+ "Ĉ" => "C",
74
+ "Ċ" => "C",
75
+ "Č" => "C",
76
+ "Ď" => "D",
77
+ "Đ" => "D",
78
+ "Ē" => "E",
79
+ "Ĕ" => "E",
80
+ "Ė" => "E",
81
+ "Ę" => "E",
82
+ "Ě" => "E",
83
+ "Ĝ" => "G",
84
+ "Ğ" => "G",
85
+ "Ġ" => "G",
86
+ "Ģ" => "G",
87
+ "Ĥ" => "H",
88
+ "Ħ" => "H",
89
+ "Ĩ" => "I",
90
+ "Ī" => "I",
91
+ "Ĭ" => "I",
92
+ "Į" => "I",
93
+ "İ" => "I",
94
+ "IJ" => "Ij",
95
+ "Ĵ" => "J",
96
+ "Ķ" => "K",
97
+ "Ĺ" => "L",
98
+ "Ļ" => "L",
99
+ "Ľ" => "L",
100
+ "Ŀ" => "L",
101
+ "Ł" => "L",
102
+ "Ń" => "N",
103
+ "Ņ" => "N",
104
+ "Ň" => "N",
105
+ "Ŋ" => "Ng",
106
+ "Ō" => "O",
107
+ "Ŏ" => "O",
108
+ "Ő" => "O",
109
+ "Œ" => "OE",
110
+ "Ŕ" => "R",
111
+ "Ŗ" => "R",
112
+ "Ř" => "R",
113
+ "Ś" => "S",
114
+ "Ŝ" => "S",
115
+ "Ş" => "S",
116
+ "Š" => "S",
117
+ "Ţ" => "T",
118
+ "Ť" => "T",
119
+ "Ŧ" => "T",
120
+ "Ũ" => "U",
121
+ "Ū" => "U",
122
+ "Ŭ" => "U",
123
+ "Ů" => "U",
124
+ "Ű" => "U",
125
+ "Ų" => "U",
126
+ "Ŵ" => "W",
127
+ "Ŷ" => "Y",
128
+ "Ÿ" => "Y",
129
+ "Ź" => "Z",
130
+ "Ż" => "Z",
131
+ "Ž" => "Z",
132
+ "ā" => "a",
133
+ "ă" => "a",
134
+ "ą" => "a",
135
+ "ć" => "c",
136
+ "ĉ" => "c",
137
+ "ċ" => "c",
138
+ "č" => "c",
139
+ "ď" => "d",
140
+ "đ" => "d",
141
+ "ē" => "e",
142
+ "ĕ" => "e",
143
+ "ė" => "e",
144
+ "ę" => "e",
145
+ "ě" => "e",
146
+ "ĝ" => "g",
147
+ "ğ" => "g",
148
+ "ġ" => "g",
149
+ "ģ" => "g",
150
+ "ĥ" => "h",
151
+ "ħ" => "h",
152
+ "ĩ" => "i",
153
+ "ī" => "i",
154
+ "ĭ" => "i",
155
+ "į" => "i",
156
+ "ı" => "i",
157
+ "ij" => "ij",
158
+ "ĵ" => "j",
159
+ "ķ" => "k",
160
+ "ĸ" => "k",
161
+ "ĺ" => "l",
162
+ "ļ" => "l",
163
+ "ľ" => "l",
164
+ "ŀ" => "l",
165
+ "ł" => "l",
166
+ "ń" => "n",
167
+ "ņ" => "n",
168
+ "ň" => "n",
169
+ "ʼn" => "n",
170
+ "ŋ" => "ng",
171
+ "ō" => "o",
172
+ "ŏ" => "o",
173
+ "ő" => "o",
174
+ "œ" => "oe",
175
+ "ŕ" => "r",
176
+ "ŗ" => "r",
177
+ "ř" => "r",
178
+ "ś" => "s",
179
+ "ŝ" => "s",
180
+ "ş" => "s",
181
+ "š" => "s",
182
+ "ţ" => "t",
183
+ "ť" => "t",
184
+ "ŧ" => "t",
185
+ "ũ" => "u",
186
+ "ū" => "u",
187
+ "ŭ" => "u",
188
+ "ů" => "u",
189
+ "ű" => "u",
190
+ "ų" => "u",
191
+ "ŵ" => "w",
192
+ "ŷ" => "y",
193
+ "ž" => "z",
194
+ "ź" => "z",
195
+ "ż" => "z"
196
+ }
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,22 @@
1
+ # encoding: utf-8
2
+ module Babosa
3
+ module Transliterator
4
+ class Russian < Cyrillic
5
+ APPROXIMATIONS = {
6
+ "Й" => "I",
7
+ "М" => "M",
8
+ "Х" => "H",
9
+ "Ц" => "Ts",
10
+ "Ш" => "Sh",
11
+ "Щ" => "Sch",
12
+ "Ю" => "U",
13
+ "Я" => "Ya",
14
+ "й" => "i",
15
+ "х" => "h",
16
+ "ц" => "ts",
17
+ "щ" => "sch",
18
+ "ю" => "u"
19
+ }
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+
3
+ module Babosa
4
+ module Transliterator
5
+ class Serbian < Latin
6
+ APPROXIMATIONS = Cyrillic.const_get(:APPROXIMATIONS).merge({
7
+ "Ð" => "Dj",
8
+ "Č" => "Ch",
9
+ "Š" => "Sh",
10
+ "č" => "ch",
11
+ "đ" => "dj",
12
+ "š" => "sh",
13
+ "Ћ" => "C",
14
+ "Ц" => "C",
15
+ "Ч" => "Ch",
16
+ "Ђ" => "Dj",
17
+ "Џ" => "Dz",
18
+ "Х" => "H",
19
+ "Ј" => "J",
20
+ "Љ" => "Lj",
21
+ "Њ" => "Nj",
22
+ "ц" => "c",
23
+ "ћ" => "c",
24
+ "ч" => "ch",
25
+ "ђ" => "dj",
26
+ "џ" => "dz",
27
+ "х" => "h",
28
+ "ј" => "j",
29
+ "љ" => "lj",
30
+ "њ" => "nj"
31
+ })
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,9 @@
1
+ # encoding: utf-8
2
+
3
+ module Babosa
4
+ module Transliterator
5
+ class Spanish < Latin
6
+ APPROXIMATIONS = {"ñ" => "ni", "Ñ" => "Ni"}
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+ module Babosa
3
+ module Transliterator
4
+ class Ukranian < Cyrillic
5
+ APPROXIMATIONS = {
6
+ "И" => "Y",
7
+ "и" => "y",
8
+ }
9
+ end
10
+ end
11
+ end
@@ -1,4 +1,5 @@
1
1
  require File.expand_path("../mappings", __FILE__)
2
+
2
3
  module Babosa
3
4
  module UTF8
4
5
 
@@ -1,5 +1,5 @@
1
1
  module Babosa
2
2
  module Version
3
- STRING = "0.2.2"
3
+ STRING = "0.3.0"
4
4
  end
5
5
  end
@@ -0,0 +1,131 @@
1
+ # encoding: utf-8
2
+ require File.expand_path("../spec_helper", __FILE__)
3
+
4
+ describe Babosa::Identifier do
5
+
6
+ it "should respond_to :empty?" do
7
+ "".to_slug.should respond_to(:empty?)
8
+ end
9
+
10
+ %w[approximate_ascii clean downcase word_chars normalize to_ascii upcase with_dashes].each do |method|
11
+ describe "##{method}" do
12
+ it "should work with invalid UTF-8 strings" do
13
+ expect {"\x93abc".to_slug.send method}.not_to raise_exception
14
+ end
15
+ end
16
+ end
17
+
18
+ describe "#word_chars" do
19
+ it "word_chars! should leave only letters and spaces" do
20
+ string = "a*$%^$@!@b$%^&*()*!c"
21
+ string.to_slug.word_chars.should match(/[a-z ]*/i)
22
+ end
23
+ end
24
+
25
+ describe "#transliterate" do
26
+ it "should transliterate to ascii" do
27
+ slug = (0xC0..0x17E).to_a.each do |codepoint|
28
+ ss = [codepoint].pack("U*").to_slug
29
+ ss.approximate_ascii.should match(/[\x0-\x7f]/)
30
+ end
31
+ end
32
+
33
+ it "should transliterate uncomposed utf8" do
34
+ string = [117, 776].pack("U*") # "ü" as ASCII "u" plus COMBINING DIAERESIS
35
+ string.to_slug.approximate_ascii.should eql("u")
36
+ end
37
+ end
38
+
39
+ describe "#downcase" do
40
+ it "should lowercase strings" do
41
+ "FELIZ AÑO".to_slug.downcase.should eql("feliz año")
42
+ end
43
+ end
44
+
45
+ describe "#upcase" do
46
+ it "should uppercase strings" do
47
+ "feliz año".to_slug.upcase.should eql("FELIZ AÑO")
48
+ end
49
+ end
50
+
51
+ describe "#normalize" do
52
+ it "should replace whitespace with dashes" do
53
+ "a b".to_slug.clean.normalize.should eql("a-b")
54
+ end
55
+
56
+ it "should replace multiple spaces with 1 dash" do
57
+ "a b".to_slug.clean.normalize.should eql("a-b")
58
+ end
59
+
60
+ it "should replace multiple dashes with 1 dash" do
61
+ "male - female".to_slug.normalize.should eql("male-female")
62
+ end
63
+
64
+ it "should strip trailing space" do
65
+ "ab ".to_slug.normalize.should eql("ab")
66
+ end
67
+
68
+ it "should strip leading space" do
69
+ " ab".to_slug.normalize.should eql("ab")
70
+ end
71
+
72
+ it "should strip trailing slashes" do
73
+ "ab-".to_slug.normalize.should eql("ab")
74
+ end
75
+
76
+ it "should strip leading slashes" do
77
+ "-ab".to_slug.normalize.should eql("ab")
78
+ end
79
+
80
+ it "should not modify valid name strings" do
81
+ "a-b-c-d".to_slug.normalize.should eql("a-b-c-d")
82
+ end
83
+
84
+ it "should work with non roman chars" do
85
+ "検 索".to_slug.normalize.should eql("検-索")
86
+ end
87
+
88
+ context "with to_ascii option" do
89
+ it "should approximate and strip non ascii" do
90
+ ss = "カタカナ: katakana is über cool".to_slug
91
+ ss.normalize(:to_ascii => true).should eql("katakana-is-uber-cool")
92
+ end
93
+ end
94
+ end
95
+
96
+ describe "#truncate_bytes" do
97
+ it "should by byte length" do
98
+ "üa".to_slug.truncate_bytes(2).should eql("ü")
99
+ "üa".to_slug.truncate_bytes(1).should eql("")
100
+ "üa".to_slug.truncate_bytes(100).should eql("üa")
101
+ "üéøá".to_slug.truncate_bytes(3).should eql("ü")
102
+ end
103
+ end
104
+
105
+ describe "#truncate" do
106
+ it "should truncate by char length" do
107
+ "üa".to_slug.truncate(2).should eql("üa")
108
+ "üa".to_slug.truncate(1).should eql("ü")
109
+ "üa".to_slug.truncate(100).should eql("üa")
110
+ end
111
+ end
112
+
113
+ describe "#with_dashes" do
114
+ it "should not change byte size when replacing spaces" do
115
+ "".to_slug.with_dashes.bytesize.should eql(0)
116
+ " ".to_slug.with_dashes.bytesize.should eql(1)
117
+ "-abc-".to_slug.with_dashes.bytesize.should eql(5)
118
+ " abc ".to_slug.with_dashes.bytesize.should eql(5)
119
+ " a bc ".to_slug.with_dashes.bytesize.should eql(7)
120
+ end
121
+ end
122
+
123
+ describe "#to_ruby_method" do
124
+ it "should get a string suitable for use as a ruby method" do
125
+ "¿¿¿hello... world???".to_slug.to_ruby_method.should eql("hello_world?")
126
+ "カタカナ: katakana is über cool".to_slug.to_ruby_method.should eql("katakana_is_uber_cool")
127
+ "カタカナ: katakana is über cool!".to_slug.to_ruby_method.should eql("katakana_is_uber_cool!")
128
+ "カタカナ: katakana is über cool".to_slug.to_ruby_method(false).should eql("katakana_is_uber_cool")
129
+ end
130
+ end
131
+ end