babosa 0.3.11 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Changelog.md +93 -17
- data/lib/babosa.rb +0 -17
- data/lib/babosa/identifier.rb +18 -16
- data/lib/babosa/transliterator/base.rb +16 -3
- data/lib/babosa/transliterator/ukrainian.rb +19 -0
- data/lib/babosa/utf8/active_support_proxy.rb +5 -11
- data/lib/babosa/utf8/dumb_proxy.rb +23 -16
- data/lib/babosa/utf8/java_proxy.rb +1 -1
- data/lib/babosa/utf8/proxy.rb +46 -39
- data/lib/babosa/utf8/unicode_proxy.rb +3 -1
- data/lib/babosa/version.rb +1 -1
- data/spec/babosa_spec.rb +45 -36
- data/spec/spec_helper.rb +8 -14
- data/spec/transliterators/base_spec.rb +3 -3
- data/spec/transliterators/bulgarian_spec.rb +1 -1
- data/spec/transliterators/danish_spec.rb +1 -1
- data/spec/transliterators/german_spec.rb +2 -2
- data/spec/transliterators/greek_spec.rb +1 -1
- data/spec/transliterators/latin_spec.rb +9 -0
- data/spec/transliterators/norwegian_spec.rb +1 -1
- data/spec/transliterators/polish_spec.rb +14 -0
- data/spec/transliterators/romanian_spec.rb +1 -1
- data/spec/transliterators/serbian_spec.rb +1 -1
- data/spec/transliterators/spanish_spec.rb +1 -1
- data/spec/transliterators/swedish_spec.rb +1 -1
- data/spec/transliterators/ukrainian_spec.rb +80 -1
- data/spec/transliterators/vietnamese_spec.rb +1 -1
- data/spec/utf8_proxy_spec.rb +10 -18
- metadata +42 -29
- data/init.rb +0 -3
- data/lib/babosa/candidates.rb +0 -45
- data/lib/babosa/generator.rb +0 -24
@@ -1,9 +1,11 @@
|
|
1
|
+
require 'unicode'
|
2
|
+
|
1
3
|
module Babosa
|
2
4
|
module UTF8
|
3
5
|
# A UTF-8 proxy using the Unicode gem.
|
4
6
|
# @see http://github.com/blackwinter/unicode
|
5
7
|
module UnicodeProxy
|
6
|
-
extend
|
8
|
+
extend Proxy
|
7
9
|
extend self
|
8
10
|
def downcase(string)
|
9
11
|
Unicode.downcase(string)
|
data/lib/babosa/version.rb
CHANGED
data/spec/babosa_spec.rb
CHANGED
@@ -4,7 +4,7 @@ require File.expand_path("../spec_helper", __FILE__)
|
|
4
4
|
describe Babosa::Identifier do
|
5
5
|
|
6
6
|
it "should respond_to :empty?" do
|
7
|
-
"".to_slug.
|
7
|
+
expect("".to_slug).to respond_to(:empty?)
|
8
8
|
end
|
9
9
|
|
10
10
|
%w[approximate_ascii clean downcase word_chars normalize to_ascii upcase with_dashes].each do |method|
|
@@ -18,128 +18,137 @@ describe Babosa::Identifier do
|
|
18
18
|
describe "#word_chars" do
|
19
19
|
it "word_chars! should leave only letters and spaces" do
|
20
20
|
string = "a*$%^$@!@b$%^&*()*!c"
|
21
|
-
string.to_slug.word_chars.
|
21
|
+
expect(string.to_slug.word_chars!).to match(/[a-z ]*/i)
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
25
|
describe "#transliterate" do
|
26
26
|
it "should transliterate to ascii" do
|
27
|
-
|
27
|
+
(0xC0..0x17E).to_a.each do |codepoint|
|
28
28
|
ss = [codepoint].pack("U*").to_slug
|
29
|
-
ss.approximate_ascii.
|
29
|
+
expect(ss.approximate_ascii!).to match(/[\x0-\x7f]/)
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
33
|
it "should transliterate uncomposed utf8" do
|
34
34
|
string = [117, 776].pack("U*") # "ü" as ASCII "u" plus COMBINING DIAERESIS
|
35
|
-
string.to_slug.approximate_ascii.
|
35
|
+
expect(string.to_slug.approximate_ascii).to eql("u")
|
36
36
|
end
|
37
37
|
|
38
38
|
it "should transliterate using multiple transliterators" do
|
39
39
|
string = "свободное režģis"
|
40
|
-
string.to_slug.approximate_ascii(:latin, :russian).
|
40
|
+
expect(string.to_slug.approximate_ascii(:latin, :russian)).to eql("svobodnoe rezgis")
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
44
|
describe "#downcase" do
|
45
45
|
it "should lowercase strings" do
|
46
|
-
"FELIZ AÑO".to_slug.downcase.
|
46
|
+
expect("FELIZ AÑO".to_slug.downcase).to eql("feliz año")
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
50
|
describe "#upcase" do
|
51
51
|
it "should uppercase strings" do
|
52
|
-
"feliz año".to_slug.upcase.
|
52
|
+
expect("feliz año".to_slug.upcase).to eql("FELIZ AÑO")
|
53
53
|
end
|
54
54
|
end
|
55
55
|
|
56
56
|
describe "#normalize" do
|
57
57
|
|
58
58
|
it "should allow passing locale as key for :transliterate" do
|
59
|
-
"ö".to_slug.clean.normalize(:transliterate => :german).
|
59
|
+
expect("ö".to_slug.clean.normalize(:transliterate => :german)).to eql("oe")
|
60
60
|
end
|
61
61
|
|
62
62
|
it "should replace whitespace with dashes" do
|
63
|
-
"a b".to_slug.clean.normalize.
|
63
|
+
expect("a b".to_slug.clean.normalize).to eql("a-b")
|
64
64
|
end
|
65
65
|
|
66
66
|
it "should replace multiple spaces with 1 dash" do
|
67
|
-
"a b".to_slug.clean.normalize.
|
67
|
+
expect("a b".to_slug.clean.normalize).to eql("a-b")
|
68
68
|
end
|
69
69
|
|
70
70
|
it "should replace multiple dashes with 1 dash" do
|
71
|
-
"male - female".to_slug.normalize.
|
71
|
+
expect("male - female".to_slug.normalize).to eql("male-female")
|
72
72
|
end
|
73
73
|
|
74
74
|
it "should strip trailing space" do
|
75
|
-
"ab ".to_slug.normalize.
|
75
|
+
expect("ab ".to_slug.normalize).to eql("ab")
|
76
76
|
end
|
77
77
|
|
78
78
|
it "should strip leading space" do
|
79
|
-
" ab".to_slug.normalize.
|
79
|
+
expect(" ab".to_slug.normalize).to eql("ab")
|
80
80
|
end
|
81
81
|
|
82
82
|
it "should strip trailing slashes" do
|
83
|
-
"ab-".to_slug.normalize.
|
83
|
+
expect("ab-".to_slug.normalize).to eql("ab")
|
84
84
|
end
|
85
85
|
|
86
86
|
it "should strip leading slashes" do
|
87
|
-
"-ab".to_slug.normalize.
|
87
|
+
expect("-ab".to_slug.normalize).to eql("ab")
|
88
88
|
end
|
89
89
|
|
90
90
|
it "should not modify valid name strings" do
|
91
|
-
"a-b-c-d".to_slug.normalize.
|
91
|
+
expect("a-b-c-d".to_slug.normalize).to eql("a-b-c-d")
|
92
92
|
end
|
93
93
|
|
94
94
|
it "should not convert underscores" do
|
95
|
-
"hello_world".to_slug.normalize.
|
95
|
+
expect("hello_world".to_slug.normalize).to eql("hello_world")
|
96
96
|
end
|
97
97
|
|
98
98
|
it "should work with non roman chars" do
|
99
|
-
"検 索".to_slug.normalize.
|
99
|
+
expect("検 索".to_slug.normalize).to eql("検-索")
|
100
100
|
end
|
101
101
|
|
102
102
|
context "with to_ascii option" do
|
103
103
|
it "should approximate and strip non ascii" do
|
104
104
|
ss = "カタカナ: katakana is über cool".to_slug
|
105
|
-
ss.normalize(:to_ascii => true).
|
105
|
+
expect(ss.normalize(:to_ascii => true)).to eql("katakana-is-uber-cool")
|
106
106
|
end
|
107
107
|
end
|
108
108
|
end
|
109
109
|
|
110
110
|
describe "#truncate_bytes" do
|
111
111
|
it "should by byte length" do
|
112
|
-
"üa".to_slug.truncate_bytes(2).
|
113
|
-
"üa".to_slug.truncate_bytes(1).
|
114
|
-
"üa".to_slug.truncate_bytes(100).
|
115
|
-
"üéøá".to_slug.truncate_bytes(3).
|
112
|
+
expect("üa".to_slug.truncate_bytes(2)).to eql("ü")
|
113
|
+
expect("üa".to_slug.truncate_bytes(1)).to eql("")
|
114
|
+
expect("üa".to_slug.truncate_bytes(100)).to eql("üa")
|
115
|
+
expect("üéøá".to_slug.truncate_bytes(3)).to eql("ü")
|
116
116
|
end
|
117
117
|
end
|
118
118
|
|
119
119
|
describe "#truncate" do
|
120
120
|
it "should truncate by char length" do
|
121
|
-
"üa".to_slug.truncate(2).
|
122
|
-
"üa".to_slug.truncate(1).
|
123
|
-
"üa".to_slug.truncate(100).
|
121
|
+
expect("üa".to_slug.truncate(2)).to eql("üa")
|
122
|
+
expect("üa".to_slug.truncate(1)).to eql("ü")
|
123
|
+
expect("üa".to_slug.truncate(100)).to eql("üa")
|
124
124
|
end
|
125
125
|
end
|
126
126
|
|
127
127
|
describe "#with_dashes" do
|
128
128
|
it "should not change byte size when replacing spaces" do
|
129
|
-
"".to_slug.with_dashes.bytesize.
|
130
|
-
" ".to_slug.with_dashes.bytesize.
|
131
|
-
"-abc-".to_slug.with_dashes.bytesize.
|
132
|
-
" abc ".to_slug.with_dashes.bytesize.
|
133
|
-
" a bc ".to_slug.with_dashes.bytesize.
|
129
|
+
expect("".to_slug.with_dashes.bytesize).to eql(0)
|
130
|
+
expect(" ".to_slug.with_dashes.bytesize).to eql(1)
|
131
|
+
expect("-abc-".to_slug.with_dashes.bytesize).to eql(5)
|
132
|
+
expect(" abc ".to_slug.with_dashes.bytesize).to eql(5)
|
133
|
+
expect(" a bc ".to_slug.with_dashes.bytesize).to eql(7)
|
134
134
|
end
|
135
135
|
end
|
136
136
|
|
137
137
|
describe "#to_ruby_method" do
|
138
138
|
it "should get a string suitable for use as a ruby method" do
|
139
|
-
"¿¿¿hello... world???".to_slug.to_ruby_method.
|
140
|
-
"カタカナ: katakana is über cool".to_slug.to_ruby_method.
|
141
|
-
"カタカナ: katakana is über cool!".to_slug.to_ruby_method.
|
142
|
-
"カタカナ: katakana is über cool".to_slug.to_ruby_method(false).
|
139
|
+
expect("¿¿¿hello... world???".to_slug.to_ruby_method).to eql("hello_world?")
|
140
|
+
expect("カタカナ: katakana is über cool".to_slug.to_ruby_method).to eql("katakana_is_uber_cool")
|
141
|
+
expect("カタカナ: katakana is über cool!".to_slug.to_ruby_method).to eql("katakana_is_uber_cool!")
|
142
|
+
expect("カタカナ: katakana is über cool".to_slug.to_ruby_method(false)).to eql("katakana_is_uber_cool")
|
143
|
+
end
|
144
|
+
|
145
|
+
it "should optionally remove trailing punctuation" do
|
146
|
+
expect("¿¿¿hello... world???".to_slug.to_ruby_method(false)).to eql("hello_world")
|
147
|
+
end
|
148
|
+
|
149
|
+
it "should raise an error when it would generate an impossible method name" do
|
150
|
+
# "1".to_identifier.to_ruby_method
|
151
|
+
expect {"1".to_identifier.to_ruby_method}.to raise_error(Babosa::Identifier::Error)
|
143
152
|
end
|
144
153
|
end
|
145
154
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,25 +1,19 @@
|
|
1
|
-
#
|
2
|
-
$LOAD_PATH << File.expand_path("../lib", __FILE__)
|
3
|
-
$LOAD_PATH.uniq!
|
1
|
+
# coding: utf-8
|
4
2
|
|
5
|
-
if ENV[
|
6
|
-
require
|
3
|
+
if ENV['COV']
|
4
|
+
require 'simplecov'
|
7
5
|
SimpleCov.start
|
8
6
|
end
|
9
7
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
require "rubygems"
|
14
|
-
require "babosa"
|
15
|
-
require "active_support"
|
8
|
+
require 'bundler/setup'
|
9
|
+
require 'babosa'
|
16
10
|
|
17
11
|
shared_examples_for "a latin transliterator" do
|
18
12
|
let(:t) { described_class.instance }
|
19
13
|
|
20
14
|
it "should transliterate latin characters" do
|
21
15
|
string = (0xC0..0x17E).to_a.pack("U*")
|
22
|
-
t.transliterate(string).
|
16
|
+
expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
|
23
17
|
end
|
24
18
|
end
|
25
19
|
|
@@ -28,7 +22,7 @@ shared_examples_for "a cyrillic transliterator" do
|
|
28
22
|
|
29
23
|
it "should transliterate cyrillic characters" do
|
30
24
|
string = "Славься, Отечество наше свободное"
|
31
|
-
t.transliterate(string).
|
25
|
+
expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
|
32
26
|
end
|
33
27
|
end
|
34
28
|
|
@@ -37,6 +31,6 @@ shared_examples_for "a greek transliterator" do
|
|
37
31
|
|
38
32
|
it "should transliterate greek characters" do
|
39
33
|
string = "Γερμανία"
|
40
|
-
t.transliterate(string).
|
34
|
+
expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
|
41
35
|
end
|
42
36
|
end
|
@@ -3,14 +3,14 @@ require File.expand_path("../../spec_helper", __FILE__)
|
|
3
3
|
|
4
4
|
describe Babosa::Transliterator::Base do
|
5
5
|
|
6
|
-
|
6
|
+
let(:t) {Babosa::Transliterator::Base.instance}
|
7
7
|
|
8
8
|
it "should transliterate 'smart' quotes" do
|
9
|
-
|
9
|
+
expect(t.transliterate("’")).to eql("'")
|
10
10
|
end
|
11
11
|
|
12
12
|
it "should transliterate non-breaking spaces" do
|
13
|
-
|
13
|
+
expect(t.transliterate("\xc2\xa0")).to eql(" ")
|
14
14
|
end
|
15
15
|
|
16
16
|
end
|
@@ -7,11 +7,11 @@ describe Babosa::Transliterator::German do
|
|
7
7
|
it_behaves_like "a latin transliterator"
|
8
8
|
|
9
9
|
it "should transliterate Eszett" do
|
10
|
-
t.transliterate("ß").
|
10
|
+
expect(t.transliterate("ß")).to eql("ss")
|
11
11
|
end
|
12
12
|
|
13
13
|
it "should transliterate vowels with umlauts" do
|
14
|
-
t.transliterate("üöä").
|
14
|
+
expect(t.transliterate("üöä")).to eql("ueoeae")
|
15
15
|
end
|
16
16
|
|
17
17
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path("../../spec_helper", __FILE__)
|
3
|
+
|
4
|
+
describe Babosa::Transliterator::Romanian do
|
5
|
+
|
6
|
+
let(:t) { described_class.instance }
|
7
|
+
it_behaves_like "a latin transliterator"
|
8
|
+
|
9
|
+
it "should transliterate various characters" do
|
10
|
+
expect(t.transliterate("ĄąĆćĘꣳŃńÓ󌜏źŻż")).to eql("AaCcEeLlNnOoSsZzZz")
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
@@ -6,4 +6,83 @@ describe Babosa::Transliterator::Ukrainian do
|
|
6
6
|
let(:t) { described_class.instance }
|
7
7
|
it_behaves_like "a cyrillic transliterator"
|
8
8
|
|
9
|
-
|
9
|
+
it "should transliterate Cyrillic characters" do
|
10
|
+
examples = {
|
11
|
+
"Алушта" => "Alushta",
|
12
|
+
"Андрій" => "Andrii",
|
13
|
+
"Борщагівка" => "Borshchahivka",
|
14
|
+
"Борисенко" => "Borysenko",
|
15
|
+
"Вінниця" => "Vinnytsia",
|
16
|
+
"Володимир" => "Volodymyr",
|
17
|
+
"Гадяч" => "Hadiach",
|
18
|
+
"Богдан" => "Bohdan",
|
19
|
+
"Ґалаґан" => "Galagan",
|
20
|
+
"Ґорґани" => "Gorgany",
|
21
|
+
"Донецьк" => "Donetsk",
|
22
|
+
"Дмитро" => "Dmytro",
|
23
|
+
"Рівне" => "Rivne",
|
24
|
+
"Олег" => "Oleh",
|
25
|
+
"Есмань" => "Esman",
|
26
|
+
"Єнакієве" => "Yenakiieve",
|
27
|
+
"Гаєвич" => "Haievych",
|
28
|
+
"Короп'є" => "Koropie",
|
29
|
+
"Житомир" => "Zhytomyr",
|
30
|
+
"Жанна" => "Zhanna",
|
31
|
+
"Жежелів" => "Zhezheliv",
|
32
|
+
"Закарпаття" => "Zakarpattia",
|
33
|
+
"Казимирчук" => "Kazymyrchuk",
|
34
|
+
"Медвин" => "Medvyn",
|
35
|
+
"Михайленко" => "Mykhailenko",
|
36
|
+
"Іванків" => "Ivankiv",
|
37
|
+
"Іващенко" => "Ivashchenko",
|
38
|
+
"Їжакевич" => "Yizhakevych",
|
39
|
+
"Кадиївка" => "Kadyivka",
|
40
|
+
"Мар'їне" => "Marine",
|
41
|
+
"Йосипівка" => "Yosypivka",
|
42
|
+
"Стрий" => "Stryi",
|
43
|
+
"Олексій" => "Oleksii",
|
44
|
+
"Київ" => "Kyiv",
|
45
|
+
"Коваленко" => "Kovalenko",
|
46
|
+
"Лебедин" => "Lebedyn",
|
47
|
+
"Леонід" => "Leonid",
|
48
|
+
"Миколаїв" => "Mykolaiv",
|
49
|
+
"Маринич" => "Marynych",
|
50
|
+
"Ніжин" => "Nizhyn",
|
51
|
+
"Наталія" => "Nataliia",
|
52
|
+
"Одеса" => "Odesa",
|
53
|
+
"Онищенко" => "Onyshchenko",
|
54
|
+
"Полтава" => "Poltava",
|
55
|
+
"Петро" => "Petro",
|
56
|
+
"Решетилівка" => "Reshetylivka",
|
57
|
+
"Рибчинський" => "Rybchynskyi",
|
58
|
+
"Суми" => "Sumy",
|
59
|
+
"Соломія" => "Solomiia",
|
60
|
+
"Тернопіль" => "Ternopil",
|
61
|
+
"Троць" => "Trots",
|
62
|
+
"Ужгород" => "Uzhhorod",
|
63
|
+
"Уляна" => "Uliana",
|
64
|
+
"Фастів" => "Fastiv",
|
65
|
+
"Філіпчук" => "Filipchuk",
|
66
|
+
"Харків" => "Kharkiv",
|
67
|
+
"Христина" => "Khrystyna",
|
68
|
+
"Біла Церква" => "Bila Tserkva",
|
69
|
+
"Стеценко" => "Stetsenko",
|
70
|
+
"Чернівці" => "Chernivtsi",
|
71
|
+
"Шевченко" => "Shevchenko",
|
72
|
+
"Шостка" => "Shostka",
|
73
|
+
"Кишеньки" => "Kyshenky",
|
74
|
+
"Щербухи" => "Shcherbukhy",
|
75
|
+
"Гоща" => "Hoshcha",
|
76
|
+
"Гаращенко" => "Harashchenko",
|
77
|
+
"Юрій" => "Yurii",
|
78
|
+
"Корюківка" => "Koriukivka",
|
79
|
+
"Яготин" => "Yahotyn",
|
80
|
+
"Ярошенко" => "Yaroshenko",
|
81
|
+
"Костянтин" => "Kostiantyn",
|
82
|
+
"Знам'янка" => "Znamianka",
|
83
|
+
"Феодосія" => "Feodosiia"
|
84
|
+
}
|
85
|
+
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|