babosa 0.3.11 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Changelog.md +93 -17
- data/lib/babosa.rb +0 -17
- data/lib/babosa/identifier.rb +18 -16
- data/lib/babosa/transliterator/base.rb +16 -3
- data/lib/babosa/transliterator/ukrainian.rb +19 -0
- data/lib/babosa/utf8/active_support_proxy.rb +5 -11
- data/lib/babosa/utf8/dumb_proxy.rb +23 -16
- data/lib/babosa/utf8/java_proxy.rb +1 -1
- data/lib/babosa/utf8/proxy.rb +46 -39
- data/lib/babosa/utf8/unicode_proxy.rb +3 -1
- data/lib/babosa/version.rb +1 -1
- data/spec/babosa_spec.rb +45 -36
- data/spec/spec_helper.rb +8 -14
- data/spec/transliterators/base_spec.rb +3 -3
- data/spec/transliterators/bulgarian_spec.rb +1 -1
- data/spec/transliterators/danish_spec.rb +1 -1
- data/spec/transliterators/german_spec.rb +2 -2
- data/spec/transliterators/greek_spec.rb +1 -1
- data/spec/transliterators/latin_spec.rb +9 -0
- data/spec/transliterators/norwegian_spec.rb +1 -1
- data/spec/transliterators/polish_spec.rb +14 -0
- data/spec/transliterators/romanian_spec.rb +1 -1
- data/spec/transliterators/serbian_spec.rb +1 -1
- data/spec/transliterators/spanish_spec.rb +1 -1
- data/spec/transliterators/swedish_spec.rb +1 -1
- data/spec/transliterators/ukrainian_spec.rb +80 -1
- data/spec/transliterators/vietnamese_spec.rb +1 -1
- data/spec/utf8_proxy_spec.rb +10 -18
- metadata +42 -29
- data/init.rb +0 -3
- data/lib/babosa/candidates.rb +0 -45
- data/lib/babosa/generator.rb +0 -24
@@ -1,9 +1,11 @@
|
|
1
|
+
require 'unicode'
|
2
|
+
|
1
3
|
module Babosa
|
2
4
|
module UTF8
|
3
5
|
# A UTF-8 proxy using the Unicode gem.
|
4
6
|
# @see http://github.com/blackwinter/unicode
|
5
7
|
module UnicodeProxy
|
6
|
-
extend
|
8
|
+
extend Proxy
|
7
9
|
extend self
|
8
10
|
def downcase(string)
|
9
11
|
Unicode.downcase(string)
|
data/lib/babosa/version.rb
CHANGED
data/spec/babosa_spec.rb
CHANGED
@@ -4,7 +4,7 @@ require File.expand_path("../spec_helper", __FILE__)
|
|
4
4
|
describe Babosa::Identifier do
|
5
5
|
|
6
6
|
it "should respond_to :empty?" do
|
7
|
-
"".to_slug.
|
7
|
+
expect("".to_slug).to respond_to(:empty?)
|
8
8
|
end
|
9
9
|
|
10
10
|
%w[approximate_ascii clean downcase word_chars normalize to_ascii upcase with_dashes].each do |method|
|
@@ -18,128 +18,137 @@ describe Babosa::Identifier do
|
|
18
18
|
describe "#word_chars" do
|
19
19
|
it "word_chars! should leave only letters and spaces" do
|
20
20
|
string = "a*$%^$@!@b$%^&*()*!c"
|
21
|
-
string.to_slug.word_chars.
|
21
|
+
expect(string.to_slug.word_chars!).to match(/[a-z ]*/i)
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
25
|
describe "#transliterate" do
|
26
26
|
it "should transliterate to ascii" do
|
27
|
-
|
27
|
+
(0xC0..0x17E).to_a.each do |codepoint|
|
28
28
|
ss = [codepoint].pack("U*").to_slug
|
29
|
-
ss.approximate_ascii.
|
29
|
+
expect(ss.approximate_ascii!).to match(/[\x0-\x7f]/)
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
33
|
it "should transliterate uncomposed utf8" do
|
34
34
|
string = [117, 776].pack("U*") # "ü" as ASCII "u" plus COMBINING DIAERESIS
|
35
|
-
string.to_slug.approximate_ascii.
|
35
|
+
expect(string.to_slug.approximate_ascii).to eql("u")
|
36
36
|
end
|
37
37
|
|
38
38
|
it "should transliterate using multiple transliterators" do
|
39
39
|
string = "свободное režģis"
|
40
|
-
string.to_slug.approximate_ascii(:latin, :russian).
|
40
|
+
expect(string.to_slug.approximate_ascii(:latin, :russian)).to eql("svobodnoe rezgis")
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
44
|
describe "#downcase" do
|
45
45
|
it "should lowercase strings" do
|
46
|
-
"FELIZ AÑO".to_slug.downcase.
|
46
|
+
expect("FELIZ AÑO".to_slug.downcase).to eql("feliz año")
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
50
|
describe "#upcase" do
|
51
51
|
it "should uppercase strings" do
|
52
|
-
"feliz año".to_slug.upcase.
|
52
|
+
expect("feliz año".to_slug.upcase).to eql("FELIZ AÑO")
|
53
53
|
end
|
54
54
|
end
|
55
55
|
|
56
56
|
describe "#normalize" do
|
57
57
|
|
58
58
|
it "should allow passing locale as key for :transliterate" do
|
59
|
-
"ö".to_slug.clean.normalize(:transliterate => :german).
|
59
|
+
expect("ö".to_slug.clean.normalize(:transliterate => :german)).to eql("oe")
|
60
60
|
end
|
61
61
|
|
62
62
|
it "should replace whitespace with dashes" do
|
63
|
-
"a b".to_slug.clean.normalize.
|
63
|
+
expect("a b".to_slug.clean.normalize).to eql("a-b")
|
64
64
|
end
|
65
65
|
|
66
66
|
it "should replace multiple spaces with 1 dash" do
|
67
|
-
"a b".to_slug.clean.normalize.
|
67
|
+
expect("a b".to_slug.clean.normalize).to eql("a-b")
|
68
68
|
end
|
69
69
|
|
70
70
|
it "should replace multiple dashes with 1 dash" do
|
71
|
-
"male - female".to_slug.normalize.
|
71
|
+
expect("male - female".to_slug.normalize).to eql("male-female")
|
72
72
|
end
|
73
73
|
|
74
74
|
it "should strip trailing space" do
|
75
|
-
"ab ".to_slug.normalize.
|
75
|
+
expect("ab ".to_slug.normalize).to eql("ab")
|
76
76
|
end
|
77
77
|
|
78
78
|
it "should strip leading space" do
|
79
|
-
" ab".to_slug.normalize.
|
79
|
+
expect(" ab".to_slug.normalize).to eql("ab")
|
80
80
|
end
|
81
81
|
|
82
82
|
it "should strip trailing slashes" do
|
83
|
-
"ab-".to_slug.normalize.
|
83
|
+
expect("ab-".to_slug.normalize).to eql("ab")
|
84
84
|
end
|
85
85
|
|
86
86
|
it "should strip leading slashes" do
|
87
|
-
"-ab".to_slug.normalize.
|
87
|
+
expect("-ab".to_slug.normalize).to eql("ab")
|
88
88
|
end
|
89
89
|
|
90
90
|
it "should not modify valid name strings" do
|
91
|
-
"a-b-c-d".to_slug.normalize.
|
91
|
+
expect("a-b-c-d".to_slug.normalize).to eql("a-b-c-d")
|
92
92
|
end
|
93
93
|
|
94
94
|
it "should not convert underscores" do
|
95
|
-
"hello_world".to_slug.normalize.
|
95
|
+
expect("hello_world".to_slug.normalize).to eql("hello_world")
|
96
96
|
end
|
97
97
|
|
98
98
|
it "should work with non roman chars" do
|
99
|
-
"検 索".to_slug.normalize.
|
99
|
+
expect("検 索".to_slug.normalize).to eql("検-索")
|
100
100
|
end
|
101
101
|
|
102
102
|
context "with to_ascii option" do
|
103
103
|
it "should approximate and strip non ascii" do
|
104
104
|
ss = "カタカナ: katakana is über cool".to_slug
|
105
|
-
ss.normalize(:to_ascii => true).
|
105
|
+
expect(ss.normalize(:to_ascii => true)).to eql("katakana-is-uber-cool")
|
106
106
|
end
|
107
107
|
end
|
108
108
|
end
|
109
109
|
|
110
110
|
describe "#truncate_bytes" do
|
111
111
|
it "should by byte length" do
|
112
|
-
"üa".to_slug.truncate_bytes(2).
|
113
|
-
"üa".to_slug.truncate_bytes(1).
|
114
|
-
"üa".to_slug.truncate_bytes(100).
|
115
|
-
"üéøá".to_slug.truncate_bytes(3).
|
112
|
+
expect("üa".to_slug.truncate_bytes(2)).to eql("ü")
|
113
|
+
expect("üa".to_slug.truncate_bytes(1)).to eql("")
|
114
|
+
expect("üa".to_slug.truncate_bytes(100)).to eql("üa")
|
115
|
+
expect("üéøá".to_slug.truncate_bytes(3)).to eql("ü")
|
116
116
|
end
|
117
117
|
end
|
118
118
|
|
119
119
|
describe "#truncate" do
|
120
120
|
it "should truncate by char length" do
|
121
|
-
"üa".to_slug.truncate(2).
|
122
|
-
"üa".to_slug.truncate(1).
|
123
|
-
"üa".to_slug.truncate(100).
|
121
|
+
expect("üa".to_slug.truncate(2)).to eql("üa")
|
122
|
+
expect("üa".to_slug.truncate(1)).to eql("ü")
|
123
|
+
expect("üa".to_slug.truncate(100)).to eql("üa")
|
124
124
|
end
|
125
125
|
end
|
126
126
|
|
127
127
|
describe "#with_dashes" do
|
128
128
|
it "should not change byte size when replacing spaces" do
|
129
|
-
"".to_slug.with_dashes.bytesize.
|
130
|
-
" ".to_slug.with_dashes.bytesize.
|
131
|
-
"-abc-".to_slug.with_dashes.bytesize.
|
132
|
-
" abc ".to_slug.with_dashes.bytesize.
|
133
|
-
" a bc ".to_slug.with_dashes.bytesize.
|
129
|
+
expect("".to_slug.with_dashes.bytesize).to eql(0)
|
130
|
+
expect(" ".to_slug.with_dashes.bytesize).to eql(1)
|
131
|
+
expect("-abc-".to_slug.with_dashes.bytesize).to eql(5)
|
132
|
+
expect(" abc ".to_slug.with_dashes.bytesize).to eql(5)
|
133
|
+
expect(" a bc ".to_slug.with_dashes.bytesize).to eql(7)
|
134
134
|
end
|
135
135
|
end
|
136
136
|
|
137
137
|
describe "#to_ruby_method" do
|
138
138
|
it "should get a string suitable for use as a ruby method" do
|
139
|
-
"¿¿¿hello... world???".to_slug.to_ruby_method.
|
140
|
-
"カタカナ: katakana is über cool".to_slug.to_ruby_method.
|
141
|
-
"カタカナ: katakana is über cool!".to_slug.to_ruby_method.
|
142
|
-
"カタカナ: katakana is über cool".to_slug.to_ruby_method(false).
|
139
|
+
expect("¿¿¿hello... world???".to_slug.to_ruby_method).to eql("hello_world?")
|
140
|
+
expect("カタカナ: katakana is über cool".to_slug.to_ruby_method).to eql("katakana_is_uber_cool")
|
141
|
+
expect("カタカナ: katakana is über cool!".to_slug.to_ruby_method).to eql("katakana_is_uber_cool!")
|
142
|
+
expect("カタカナ: katakana is über cool".to_slug.to_ruby_method(false)).to eql("katakana_is_uber_cool")
|
143
|
+
end
|
144
|
+
|
145
|
+
it "should optionally remove trailing punctuation" do
|
146
|
+
expect("¿¿¿hello... world???".to_slug.to_ruby_method(false)).to eql("hello_world")
|
147
|
+
end
|
148
|
+
|
149
|
+
it "should raise an error when it would generate an impossible method name" do
|
150
|
+
# "1".to_identifier.to_ruby_method
|
151
|
+
expect {"1".to_identifier.to_ruby_method}.to raise_error(Babosa::Identifier::Error)
|
143
152
|
end
|
144
153
|
end
|
145
154
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,25 +1,19 @@
|
|
1
|
-
#
|
2
|
-
$LOAD_PATH << File.expand_path("../lib", __FILE__)
|
3
|
-
$LOAD_PATH.uniq!
|
1
|
+
# coding: utf-8
|
4
2
|
|
5
|
-
if ENV[
|
6
|
-
require
|
3
|
+
if ENV['COV']
|
4
|
+
require 'simplecov'
|
7
5
|
SimpleCov.start
|
8
6
|
end
|
9
7
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
require "rubygems"
|
14
|
-
require "babosa"
|
15
|
-
require "active_support"
|
8
|
+
require 'bundler/setup'
|
9
|
+
require 'babosa'
|
16
10
|
|
17
11
|
shared_examples_for "a latin transliterator" do
|
18
12
|
let(:t) { described_class.instance }
|
19
13
|
|
20
14
|
it "should transliterate latin characters" do
|
21
15
|
string = (0xC0..0x17E).to_a.pack("U*")
|
22
|
-
t.transliterate(string).
|
16
|
+
expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
|
23
17
|
end
|
24
18
|
end
|
25
19
|
|
@@ -28,7 +22,7 @@ shared_examples_for "a cyrillic transliterator" do
|
|
28
22
|
|
29
23
|
it "should transliterate cyrillic characters" do
|
30
24
|
string = "Славься, Отечество наше свободное"
|
31
|
-
t.transliterate(string).
|
25
|
+
expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
|
32
26
|
end
|
33
27
|
end
|
34
28
|
|
@@ -37,6 +31,6 @@ shared_examples_for "a greek transliterator" do
|
|
37
31
|
|
38
32
|
it "should transliterate greek characters" do
|
39
33
|
string = "Γερμανία"
|
40
|
-
t.transliterate(string).
|
34
|
+
expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
|
41
35
|
end
|
42
36
|
end
|
@@ -3,14 +3,14 @@ require File.expand_path("../../spec_helper", __FILE__)
|
|
3
3
|
|
4
4
|
describe Babosa::Transliterator::Base do
|
5
5
|
|
6
|
-
|
6
|
+
let(:t) {Babosa::Transliterator::Base.instance}
|
7
7
|
|
8
8
|
it "should transliterate 'smart' quotes" do
|
9
|
-
|
9
|
+
expect(t.transliterate("’")).to eql("'")
|
10
10
|
end
|
11
11
|
|
12
12
|
it "should transliterate non-breaking spaces" do
|
13
|
-
|
13
|
+
expect(t.transliterate("\xc2\xa0")).to eql(" ")
|
14
14
|
end
|
15
15
|
|
16
16
|
end
|
@@ -7,11 +7,11 @@ describe Babosa::Transliterator::German do
|
|
7
7
|
it_behaves_like "a latin transliterator"
|
8
8
|
|
9
9
|
it "should transliterate Eszett" do
|
10
|
-
t.transliterate("ß").
|
10
|
+
expect(t.transliterate("ß")).to eql("ss")
|
11
11
|
end
|
12
12
|
|
13
13
|
it "should transliterate vowels with umlauts" do
|
14
|
-
t.transliterate("üöä").
|
14
|
+
expect(t.transliterate("üöä")).to eql("ueoeae")
|
15
15
|
end
|
16
16
|
|
17
17
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path("../../spec_helper", __FILE__)
|
3
|
+
|
4
|
+
describe Babosa::Transliterator::Romanian do
|
5
|
+
|
6
|
+
let(:t) { described_class.instance }
|
7
|
+
it_behaves_like "a latin transliterator"
|
8
|
+
|
9
|
+
it "should transliterate various characters" do
|
10
|
+
expect(t.transliterate("ĄąĆćĘꣳŃńÓóŚśŹźŻż")).to eql("AaCcEeLlNnOoSsZzZz")
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
@@ -6,4 +6,83 @@ describe Babosa::Transliterator::Ukrainian do
|
|
6
6
|
let(:t) { described_class.instance }
|
7
7
|
it_behaves_like "a cyrillic transliterator"
|
8
8
|
|
9
|
-
|
9
|
+
it "should transliterate Cyrillic characters" do
|
10
|
+
examples = {
|
11
|
+
"Алушта" => "Alushta",
|
12
|
+
"Андрій" => "Andrii",
|
13
|
+
"Борщагівка" => "Borshchahivka",
|
14
|
+
"Борисенко" => "Borysenko",
|
15
|
+
"Вінниця" => "Vinnytsia",
|
16
|
+
"Володимир" => "Volodymyr",
|
17
|
+
"Гадяч" => "Hadiach",
|
18
|
+
"Богдан" => "Bohdan",
|
19
|
+
"Ґалаґан" => "Galagan",
|
20
|
+
"Ґорґани" => "Gorgany",
|
21
|
+
"Донецьк" => "Donetsk",
|
22
|
+
"Дмитро" => "Dmytro",
|
23
|
+
"Рівне" => "Rivne",
|
24
|
+
"Олег" => "Oleh",
|
25
|
+
"Есмань" => "Esman",
|
26
|
+
"Єнакієве" => "Yenakiieve",
|
27
|
+
"Гаєвич" => "Haievych",
|
28
|
+
"Короп'є" => "Koropie",
|
29
|
+
"Житомир" => "Zhytomyr",
|
30
|
+
"Жанна" => "Zhanna",
|
31
|
+
"Жежелів" => "Zhezheliv",
|
32
|
+
"Закарпаття" => "Zakarpattia",
|
33
|
+
"Казимирчук" => "Kazymyrchuk",
|
34
|
+
"Медвин" => "Medvyn",
|
35
|
+
"Михайленко" => "Mykhailenko",
|
36
|
+
"Іванків" => "Ivankiv",
|
37
|
+
"Іващенко" => "Ivashchenko",
|
38
|
+
"Їжакевич" => "Yizhakevych",
|
39
|
+
"Кадиївка" => "Kadyivka",
|
40
|
+
"Мар'їне" => "Marine",
|
41
|
+
"Йосипівка" => "Yosypivka",
|
42
|
+
"Стрий" => "Stryi",
|
43
|
+
"Олексій" => "Oleksii",
|
44
|
+
"Київ" => "Kyiv",
|
45
|
+
"Коваленко" => "Kovalenko",
|
46
|
+
"Лебедин" => "Lebedyn",
|
47
|
+
"Леонід" => "Leonid",
|
48
|
+
"Миколаїв" => "Mykolaiv",
|
49
|
+
"Маринич" => "Marynych",
|
50
|
+
"Ніжин" => "Nizhyn",
|
51
|
+
"Наталія" => "Nataliia",
|
52
|
+
"Одеса" => "Odesa",
|
53
|
+
"Онищенко" => "Onyshchenko",
|
54
|
+
"Полтава" => "Poltava",
|
55
|
+
"Петро" => "Petro",
|
56
|
+
"Решетилівка" => "Reshetylivka",
|
57
|
+
"Рибчинський" => "Rybchynskyi",
|
58
|
+
"Суми" => "Sumy",
|
59
|
+
"Соломія" => "Solomiia",
|
60
|
+
"Тернопіль" => "Ternopil",
|
61
|
+
"Троць" => "Trots",
|
62
|
+
"Ужгород" => "Uzhhorod",
|
63
|
+
"Уляна" => "Uliana",
|
64
|
+
"Фастів" => "Fastiv",
|
65
|
+
"Філіпчук" => "Filipchuk",
|
66
|
+
"Харків" => "Kharkiv",
|
67
|
+
"Христина" => "Khrystyna",
|
68
|
+
"Біла Церква" => "Bila Tserkva",
|
69
|
+
"Стеценко" => "Stetsenko",
|
70
|
+
"Чернівці" => "Chernivtsi",
|
71
|
+
"Шевченко" => "Shevchenko",
|
72
|
+
"Шостка" => "Shostka",
|
73
|
+
"Кишеньки" => "Kyshenky",
|
74
|
+
"Щербухи" => "Shcherbukhy",
|
75
|
+
"Гоща" => "Hoshcha",
|
76
|
+
"Гаращенко" => "Harashchenko",
|
77
|
+
"Юрій" => "Yurii",
|
78
|
+
"Корюківка" => "Koriukivka",
|
79
|
+
"Яготин" => "Yahotyn",
|
80
|
+
"Ярошенко" => "Yaroshenko",
|
81
|
+
"Костянтин" => "Kostiantyn",
|
82
|
+
"Знам'янка" => "Znamianka",
|
83
|
+
"Феодосія" => "Feodosiia"
|
84
|
+
}
|
85
|
+
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|