babosa 0.3.11 → 2.0.0.beta
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/Changelog.md +117 -17
- data/README.md +82 -119
- data/Rakefile +9 -8
- data/lib/babosa.rb +2 -21
- data/lib/babosa/identifier.rb +87 -124
- data/lib/babosa/transliterator/base.rb +59 -43
- data/lib/babosa/transliterator/bulgarian.rb +3 -2
- data/lib/babosa/transliterator/cyrillic.rb +5 -5
- data/lib/babosa/transliterator/danish.rb +3 -3
- data/lib/babosa/transliterator/german.rb +3 -2
- data/lib/babosa/transliterator/greek.rb +4 -3
- data/lib/babosa/transliterator/hindi.rb +138 -0
- data/lib/babosa/transliterator/latin.rb +5 -5
- data/lib/babosa/transliterator/macedonian.rb +3 -2
- data/lib/babosa/transliterator/norwegian.rb +3 -3
- data/lib/babosa/transliterator/romanian.rb +3 -2
- data/lib/babosa/transliterator/russian.rb +3 -2
- data/lib/babosa/transliterator/serbian.rb +29 -27
- data/lib/babosa/transliterator/spanish.rb +2 -2
- data/lib/babosa/transliterator/swedish.rb +3 -3
- data/lib/babosa/transliterator/turkish.rb +8 -0
- data/lib/babosa/transliterator/ukrainian.rb +23 -3
- data/lib/babosa/transliterator/vietnamese.rb +4 -3
- data/lib/babosa/version.rb +3 -1
- data/spec/identifier_spec.rb +157 -0
- data/spec/spec_helper.rb +15 -12
- data/spec/transliterators/base_spec.rb +7 -8
- data/spec/transliterators/bulgarian_spec.rb +4 -5
- data/spec/transliterators/danish_spec.rb +5 -6
- data/spec/transliterators/german_spec.rb +6 -7
- data/spec/transliterators/greek_spec.rb +7 -7
- data/spec/transliterators/hindi_spec.rb +17 -0
- data/spec/transliterators/latin_spec.rb +8 -0
- data/spec/transliterators/macedonian_spec.rb +3 -4
- data/spec/transliterators/norwegian_spec.rb +4 -4
- data/spec/transliterators/polish_spec.rb +12 -0
- data/spec/transliterators/romanian_spec.rb +5 -6
- data/spec/transliterators/russian_spec.rb +3 -4
- data/spec/transliterators/serbian_spec.rb +6 -7
- data/spec/transliterators/spanish_spec.rb +5 -6
- data/spec/transliterators/swedish_spec.rb +7 -7
- data/spec/transliterators/turkish_spec.rb +24 -0
- data/spec/transliterators/ukrainian_spec.rb +81 -3
- data/spec/transliterators/vietnamese_spec.rb +10 -10
- metadata +41 -46
- data/init.rb +0 -3
- data/lib/babosa/candidates.rb +0 -45
- data/lib/babosa/generator.rb +0 -24
- data/lib/babosa/utf8/active_support_proxy.rb +0 -20
- data/lib/babosa/utf8/dumb_proxy.rb +0 -42
- data/lib/babosa/utf8/java_proxy.rb +0 -22
- data/lib/babosa/utf8/mappings.rb +0 -193
- data/lib/babosa/utf8/proxy.rb +0 -118
- data/lib/babosa/utf8/unicode_proxy.rb +0 -21
- data/spec/babosa_spec.rb +0 -145
- data/spec/utf8_proxy_spec.rb +0 -48
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Babosa
|
3
4
|
module Transliterator
|
4
5
|
class Vietnamese < Latin
|
@@ -137,7 +138,7 @@ module Babosa
|
|
137
138
|
"Ỹ" => "Y",
|
138
139
|
"đ" => "d",
|
139
140
|
"Đ" => "D"
|
140
|
-
}
|
141
|
+
}.freeze
|
141
142
|
end
|
142
143
|
end
|
143
|
-
end
|
144
|
+
end
|
data/lib/babosa/version.rb
CHANGED
@@ -0,0 +1,157 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe Babosa::Identifier do
|
6
|
+
it "should respond_to :empty?" do
|
7
|
+
expect("".to_slug).to respond_to(:empty?)
|
8
|
+
end
|
9
|
+
|
10
|
+
%w[approximate_ascii clean downcase word_chars normalize to_ascii upcase with_dashes].each do |method|
|
11
|
+
describe "##{method}" do
|
12
|
+
it "should work with invalid UTF-8 strings" do
|
13
|
+
expect { "\x93abc".to_slug.send method }.not_to raise_exception
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "#word_chars" do
|
19
|
+
it "word_chars! should leave only letters and spaces" do
|
20
|
+
string = "a*$%^$@!@b$%^&*()*!c"
|
21
|
+
expect(string.to_slug.word_chars!).to match(/[a-z ]*/i)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe "#transliterate" do
|
26
|
+
it "should transliterate to ascii" do
|
27
|
+
(0xC0..0x17E).to_a.each do |codepoint|
|
28
|
+
ss = [codepoint].pack("U*").to_slug
|
29
|
+
expect(ss.approximate_ascii!).to match(/[\x0-\x7f]/)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should transliterate uncomposed utf8" do
|
34
|
+
string = [117, 776].pack("U*") # "ü" as ASCII "u" plus COMBINING DIAERESIS
|
35
|
+
expect(string.to_slug.approximate_ascii).to eql("u")
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should transliterate using multiple transliterators" do
|
39
|
+
string = "свободное režģis"
|
40
|
+
expect(string.to_slug.approximate_ascii(:latin, :russian)).to eql("svobodnoe rezgis")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
describe "#downcase" do
|
45
|
+
it "should lowercase strings" do
|
46
|
+
expect("FELIZ AÑO".to_slug.downcase).to eql("feliz año")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe "#upcase" do
|
51
|
+
it "should uppercase strings" do
|
52
|
+
expect("feliz año".to_slug.upcase).to eql("FELIZ AÑO")
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe "#normalize" do
|
57
|
+
it "should allow passing locale as key for :transliterate" do
|
58
|
+
expect("ö".to_slug.clean.normalize(transliterate: :german)).to eql("oe")
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should replace whitespace with dashes" do
|
62
|
+
expect("a b".to_slug.clean.normalize).to eql("a-b")
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should replace multiple spaces with 1 dash" do
|
66
|
+
expect("a b".to_slug.clean.normalize).to eql("a-b")
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should replace multiple dashes with 1 dash" do
|
70
|
+
expect("male - female".to_slug.normalize).to eql("male-female")
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should strip trailing space" do
|
74
|
+
expect("ab ".to_slug.normalize).to eql("ab")
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should strip leading space" do
|
78
|
+
expect(" ab".to_slug.normalize).to eql("ab")
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should strip trailing slashes" do
|
82
|
+
expect("ab-".to_slug.normalize).to eql("ab")
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should strip leading slashes" do
|
86
|
+
expect("-ab".to_slug.normalize).to eql("ab")
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should not modify valid name strings" do
|
90
|
+
expect("a-b-c-d".to_slug.normalize).to eql("a-b-c-d")
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should not convert underscores" do
|
94
|
+
expect("hello_world".to_slug.normalize).to eql("hello_world")
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should work with non roman chars" do
|
98
|
+
expect("検 索".to_slug.normalize).to eql("検-索")
|
99
|
+
end
|
100
|
+
|
101
|
+
context "with to_ascii option" do
|
102
|
+
it "should approximate and strip non ascii" do
|
103
|
+
ss = "カタカナ: katakana is über cool".to_slug
|
104
|
+
expect(ss.normalize(to_ascii: true)).to eql("katakana-is-uber-cool")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
describe "#truncate_bytes" do
|
110
|
+
it "should by byte length" do
|
111
|
+
expect("üa".to_slug.truncate_bytes(2)).to eql("ü")
|
112
|
+
expect("üa".to_slug.truncate_bytes(1)).to eql("")
|
113
|
+
expect("üa".to_slug.truncate_bytes(100)).to eql("üa")
|
114
|
+
expect("üéøá".to_slug.truncate_bytes(3)).to eql("ü")
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
describe "#truncate" do
|
119
|
+
it "should truncate by char length" do
|
120
|
+
expect("üa".to_slug.truncate(2)).to eql("üa")
|
121
|
+
expect("üa".to_slug.truncate(1)).to eql("ü")
|
122
|
+
expect("üa".to_slug.truncate(100)).to eql("üa")
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
describe "#with_dashes" do
|
127
|
+
it "should not change byte size when replacing spaces" do
|
128
|
+
expect("".to_slug.with_dashes.bytesize).to eql(0)
|
129
|
+
expect(" ".to_slug.with_dashes.bytesize).to eql(1)
|
130
|
+
expect("-abc-".to_slug.with_dashes.bytesize).to eql(5)
|
131
|
+
expect(" abc ".to_slug.with_dashes.bytesize).to eql(5)
|
132
|
+
expect(" a bc ".to_slug.with_dashes.bytesize).to eql(7)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
describe "#to_ruby_method" do
|
137
|
+
it "should get a string suitable for use as a ruby method" do
|
138
|
+
expect("¿¿¿hello... world???".to_slug.to_ruby_method).to eql("hello_world?")
|
139
|
+
expect("カタカナ: katakana is über cool".to_slug.to_ruby_method).to eql("katakana_is_uber_cool")
|
140
|
+
expect("カタカナ: katakana is über cool!".to_slug.to_ruby_method).to eql("katakana_is_uber_cool!")
|
141
|
+
expect("カタカナ: katakana is über cool".to_slug.to_ruby_method(allow_bangs: false)).to eql("katakana_is_uber_cool")
|
142
|
+
end
|
143
|
+
|
144
|
+
it "should optionally remove trailing punctuation" do
|
145
|
+
expect("¿¿¿hello... world???".to_slug.to_ruby_method(allow_bangs: false)).to eql("hello_world")
|
146
|
+
end
|
147
|
+
|
148
|
+
it "should raise an error when it would generate an impossible method name" do
|
149
|
+
# "1".to_identifier.to_ruby_method
|
150
|
+
expect { "1".to_identifier.to_ruby_method }.to raise_error(Babosa::Identifier::Error)
|
151
|
+
end
|
152
|
+
|
153
|
+
it "should raise Babosa::Error error when the string is nil" do
|
154
|
+
expect { "".to_slug.to_ruby_method }.to raise_error(Babosa::Identifier::Error)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,25 +1,19 @@
|
|
1
|
-
#
|
2
|
-
$LOAD_PATH << File.expand_path("../lib", __FILE__)
|
3
|
-
$LOAD_PATH.uniq!
|
1
|
+
# frozen_string_literal: true
|
4
2
|
|
5
3
|
if ENV["COV"]
|
6
4
|
require "simplecov"
|
7
5
|
SimpleCov.start
|
8
6
|
end
|
9
7
|
|
10
|
-
|
11
|
-
$KCODE = 'UTF8' if RUBY_VERSION < '1.9'
|
12
|
-
|
13
|
-
require "rubygems"
|
8
|
+
require "bundler/setup"
|
14
9
|
require "babosa"
|
15
|
-
require "active_support"
|
16
10
|
|
17
11
|
shared_examples_for "a latin transliterator" do
|
18
12
|
let(:t) { described_class.instance }
|
19
13
|
|
20
14
|
it "should transliterate latin characters" do
|
21
15
|
string = (0xC0..0x17E).to_a.pack("U*")
|
22
|
-
t.transliterate(string).
|
16
|
+
expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
|
23
17
|
end
|
24
18
|
end
|
25
19
|
|
@@ -28,7 +22,7 @@ shared_examples_for "a cyrillic transliterator" do
|
|
28
22
|
|
29
23
|
it "should transliterate cyrillic characters" do
|
30
24
|
string = "Славься, Отечество наше свободное"
|
31
|
-
t.transliterate(string).
|
25
|
+
expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
|
32
26
|
end
|
33
27
|
end
|
34
28
|
|
@@ -37,6 +31,15 @@ shared_examples_for "a greek transliterator" do
|
|
37
31
|
|
38
32
|
it "should transliterate greek characters" do
|
39
33
|
string = "Γερμανία"
|
40
|
-
t.transliterate(string).
|
34
|
+
expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
|
41
35
|
end
|
42
|
-
end
|
36
|
+
end
|
37
|
+
|
38
|
+
shared_examples_for "a hindi transliterator" do
|
39
|
+
let(:t) { described_class.instance }
|
40
|
+
|
41
|
+
it "should transliterate hindi characters" do
|
42
|
+
string = "आदित्य तापड़िया"
|
43
|
+
expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
|
44
|
+
end
|
45
|
+
end
|
@@ -1,16 +1,15 @@
|
|
1
|
-
#
|
2
|
-
require File.expand_path("../../spec_helper", __FILE__)
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require "spec_helper"
|
5
4
|
|
6
|
-
|
5
|
+
describe Babosa::Transliterator::Base do
|
6
|
+
let(:t) { Babosa::Transliterator::Base.instance }
|
7
7
|
|
8
8
|
it "should transliterate 'smart' quotes" do
|
9
|
-
|
9
|
+
expect(t.transliterate("’")).to eql("'")
|
10
10
|
end
|
11
11
|
|
12
12
|
it "should transliterate non-breaking spaces" do
|
13
|
-
|
13
|
+
expect(t.transliterate("\xc2\xa0")).to eql(" ")
|
14
14
|
end
|
15
|
-
|
16
|
-
end
|
15
|
+
end
|
@@ -1,8 +1,8 @@
|
|
1
|
-
#
|
2
|
-
require File.expand_path("../../spec_helper", __FILE__)
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require "spec_helper"
|
5
4
|
|
5
|
+
describe Babosa::Transliterator::Bulgarian do
|
6
6
|
let(:t) { described_class.instance }
|
7
7
|
it_behaves_like "a cyrillic transliterator"
|
8
8
|
|
@@ -14,7 +14,6 @@ describe Babosa::Transliterator::Bulgarian do
|
|
14
14
|
"Щъркел" => "Shturkel",
|
15
15
|
"полицай" => "policai"
|
16
16
|
}
|
17
|
-
examples.each {|k, v| t.transliterate(k).
|
17
|
+
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
18
18
|
end
|
19
|
-
|
20
19
|
end
|
@@ -1,8 +1,8 @@
|
|
1
|
-
#
|
2
|
-
require File.expand_path("../../spec_helper", __FILE__)
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require "spec_helper"
|
5
4
|
|
5
|
+
describe Babosa::Transliterator::Danish do
|
6
6
|
let(:t) { described_class.instance }
|
7
7
|
it_behaves_like "a latin transliterator"
|
8
8
|
|
@@ -11,7 +11,6 @@ describe Babosa::Transliterator::Danish do
|
|
11
11
|
"Ærøskøbing" => "Aeroeskoebing",
|
12
12
|
"Årslev" => "Aarslev"
|
13
13
|
}
|
14
|
-
examples.each {|k, v| t.transliterate(k).
|
14
|
+
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
15
15
|
end
|
16
|
-
|
17
|
-
end
|
16
|
+
end
|
@@ -1,17 +1,16 @@
|
|
1
|
-
#
|
2
|
-
require File.expand_path("../../spec_helper", __FILE__)
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require "spec_helper"
|
5
4
|
|
5
|
+
describe Babosa::Transliterator::German do
|
6
6
|
let(:t) { described_class.instance }
|
7
7
|
it_behaves_like "a latin transliterator"
|
8
8
|
|
9
9
|
it "should transliterate Eszett" do
|
10
|
-
t.transliterate("ß").
|
10
|
+
expect(t.transliterate("ß")).to eql("ss")
|
11
11
|
end
|
12
12
|
|
13
13
|
it "should transliterate vowels with umlauts" do
|
14
|
-
t.transliterate("üöä").
|
14
|
+
expect(t.transliterate("üöä")).to eql("ueoeae")
|
15
15
|
end
|
16
|
-
|
17
|
-
end
|
16
|
+
end
|
@@ -1,17 +1,17 @@
|
|
1
|
-
#
|
2
|
-
require File.expand_path("../../spec_helper", __FILE__)
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require "spec_helper"
|
5
4
|
|
5
|
+
describe Babosa::Transliterator::Greek do
|
6
6
|
let(:t) { described_class.instance }
|
7
7
|
it_behaves_like "a greek transliterator"
|
8
8
|
|
9
9
|
it "should transliterate various characters" do
|
10
10
|
examples = {
|
11
|
-
"Γερμανία"
|
12
|
-
"Αυστρία"
|
13
|
-
"Ιταλία"
|
11
|
+
"Γερμανία" => "Germania",
|
12
|
+
"Αυστρία" => "Aystria",
|
13
|
+
"Ιταλία" => "Italia"
|
14
14
|
}
|
15
|
-
examples.each {|k, v| t.transliterate(k).
|
15
|
+
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
16
16
|
end
|
17
17
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe Babosa::Transliterator::Hindi do
|
6
|
+
let(:t) { described_class.instance }
|
7
|
+
it_behaves_like "a hindi transliterator"
|
8
|
+
|
9
|
+
it "should transliterate hindi characters" do
|
10
|
+
examples = {
|
11
|
+
"आदित्य" => "aadity",
|
12
|
+
"सबरीमाला करवाना पायसम" => "sbriimaalaa krvaanaa paaysm",
|
13
|
+
"सक्रांति आँख" => "skraanti aankh"
|
14
|
+
}
|
15
|
+
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
16
|
+
end
|
17
|
+
end
|
@@ -1,9 +1,8 @@
|
|
1
|
-
#
|
2
|
-
require File.expand_path("../../spec_helper", __FILE__)
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require "spec_helper"
|
5
4
|
|
5
|
+
describe Babosa::Transliterator::Macedonian do
|
6
6
|
let(:t) { described_class.instance }
|
7
7
|
it_behaves_like "a cyrillic transliterator"
|
8
|
-
|
9
8
|
end
|
@@ -1,8 +1,8 @@
|
|
1
|
-
#
|
2
|
-
require File.expand_path("../../spec_helper", __FILE__)
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require "spec_helper"
|
5
4
|
|
5
|
+
describe Babosa::Transliterator::Norwegian do
|
6
6
|
let(:t) { described_class.instance }
|
7
7
|
it_behaves_like "a latin transliterator"
|
8
8
|
|
@@ -13,6 +13,6 @@ describe Babosa::Transliterator::Norwegian do
|
|
13
13
|
"Åre" => "Aare",
|
14
14
|
"Håkon" => "Haakon"
|
15
15
|
}
|
16
|
-
examples.each {|k, v| t.transliterate(k).
|
16
|
+
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
17
17
|
end
|
18
18
|
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe Babosa::Transliterator::Romanian do
|
6
|
+
let(:t) { described_class.instance }
|
7
|
+
it_behaves_like "a latin transliterator"
|
8
|
+
|
9
|
+
it "should transliterate various characters" do
|
10
|
+
expect(t.transliterate("ĄąĆćĘꣳŃńÓ󌜏źŻż")).to eql("AaCcEeLlNnOoSsZzZz")
|
11
|
+
end
|
12
|
+
end
|
@@ -1,8 +1,8 @@
|
|
1
|
-
#
|
2
|
-
require File.expand_path("../../spec_helper", __FILE__)
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require "spec_helper"
|
5
4
|
|
5
|
+
describe Babosa::Transliterator::Romanian do
|
6
6
|
let(:t) { described_class.instance }
|
7
7
|
it_behaves_like "a latin transliterator"
|
8
8
|
|
@@ -13,7 +13,6 @@ describe Babosa::Transliterator::Romanian do
|
|
13
13
|
"Țară" => "Tara",
|
14
14
|
"Șanț" => "Sant"
|
15
15
|
}
|
16
|
-
examples.each {|k, v| t.transliterate(k).
|
16
|
+
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
17
17
|
end
|
18
|
-
|
19
|
-
end
|
18
|
+
end
|