babosa 1.0.4 → 2.0.0.beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Changelog.md +12 -0
- data/README.md +80 -117
- data/Rakefile +9 -8
- data/lib/babosa.rb +2 -4
- data/lib/babosa/identifier.rb +82 -121
- data/lib/babosa/transliterator/base.rb +57 -56
- data/lib/babosa/transliterator/bulgarian.rb +3 -2
- data/lib/babosa/transliterator/cyrillic.rb +5 -5
- data/lib/babosa/transliterator/danish.rb +3 -3
- data/lib/babosa/transliterator/german.rb +3 -2
- data/lib/babosa/transliterator/greek.rb +4 -3
- data/lib/babosa/transliterator/hindi.rb +3 -2
- data/lib/babosa/transliterator/latin.rb +5 -5
- data/lib/babosa/transliterator/macedonian.rb +3 -2
- data/lib/babosa/transliterator/norwegian.rb +3 -3
- data/lib/babosa/transliterator/romanian.rb +3 -2
- data/lib/babosa/transliterator/russian.rb +3 -2
- data/lib/babosa/transliterator/serbian.rb +29 -27
- data/lib/babosa/transliterator/spanish.rb +2 -2
- data/lib/babosa/transliterator/swedish.rb +3 -3
- data/lib/babosa/transliterator/turkish.rb +8 -8
- data/lib/babosa/transliterator/ukrainian.rb +5 -4
- data/lib/babosa/transliterator/vietnamese.rb +4 -3
- data/lib/babosa/version.rb +3 -1
- data/spec/{babosa_spec.rb → identifier_spec.rb} +9 -10
- data/spec/spec_helper.rb +6 -6
- data/spec/transliterators/base_spec.rb +5 -6
- data/spec/transliterators/bulgarian_spec.rb +4 -5
- data/spec/transliterators/danish_spec.rb +5 -6
- data/spec/transliterators/german_spec.rb +4 -5
- data/spec/transliterators/greek_spec.rb +7 -7
- data/spec/transliterators/hindi_spec.rb +7 -7
- data/spec/transliterators/latin_spec.rb +3 -4
- data/spec/transliterators/macedonian_spec.rb +3 -4
- data/spec/transliterators/norwegian_spec.rb +4 -4
- data/spec/transliterators/polish_spec.rb +3 -5
- data/spec/transliterators/romanian_spec.rb +5 -6
- data/spec/transliterators/russian_spec.rb +3 -4
- data/spec/transliterators/serbian_spec.rb +6 -7
- data/spec/transliterators/spanish_spec.rb +4 -5
- data/spec/transliterators/swedish_spec.rb +7 -7
- data/spec/transliterators/turkish_spec.rb +24 -24
- data/spec/transliterators/ukrainian_spec.rb +74 -75
- data/spec/transliterators/vietnamese_spec.rb +10 -10
- metadata +17 -38
- data/lib/babosa/utf8/active_support_proxy.rb +0 -38
- data/lib/babosa/utf8/dumb_proxy.rb +0 -49
- data/lib/babosa/utf8/java_proxy.rb +0 -22
- data/lib/babosa/utf8/mappings.rb +0 -193
- data/lib/babosa/utf8/proxy.rb +0 -125
- data/lib/babosa/utf8/unicode_proxy.rb +0 -23
- data/spec/utf8_proxy_spec.rb +0 -52
@@ -1,24 +1,24 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
let(:t) { described_class.instance }
|
7
|
-
it_behaves_like "a latin transliterator"
|
8
|
-
|
9
|
-
it "should transliterate various characters" do
|
10
|
-
examples = {
|
11
|
-
"Nâzım" => "Nazim",
|
12
|
-
"sükûnet" => "sukunet",
|
13
|
-
"millîleştirmek" => "millilestirmek",
|
14
|
-
"mêmur" => "memur",
|
15
|
-
"lôkman" => "lokman",
|
16
|
-
"yoğurt" => "yogurt",
|
17
|
-
"şair" => "sair",
|
18
|
-
"İzmir" => "Izmir",
|
19
|
-
"yığın" => "yigin",
|
20
|
-
"çarşı" => "carsi"
|
21
|
-
}
|
22
|
-
examples.each {|k, v| expect(t.transliterate(k)).to eql(v)}
|
23
|
-
end
|
24
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe Babosa::Transliterator::Turkish do
|
6
|
+
let(:t) { described_class.instance }
|
7
|
+
it_behaves_like "a latin transliterator"
|
8
|
+
|
9
|
+
it "should transliterate various characters" do
|
10
|
+
examples = {
|
11
|
+
"Nâzım" => "Nazim",
|
12
|
+
"sükûnet" => "sukunet",
|
13
|
+
"millîleştirmek" => "millilestirmek",
|
14
|
+
"mêmur" => "memur",
|
15
|
+
"lôkman" => "lokman",
|
16
|
+
"yoğurt" => "yogurt",
|
17
|
+
"şair" => "sair",
|
18
|
+
"İzmir" => "Izmir",
|
19
|
+
"yığın" => "yigin",
|
20
|
+
"çarşı" => "carsi"
|
21
|
+
}
|
22
|
+
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
23
|
+
end
|
24
|
+
end
|
@@ -1,88 +1,87 @@
|
|
1
|
-
#
|
2
|
-
require File.expand_path("../../spec_helper", __FILE__)
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require "spec_helper"
|
5
4
|
|
5
|
+
describe Babosa::Transliterator::Ukrainian do
|
6
6
|
let(:t) { described_class.instance }
|
7
7
|
it_behaves_like "a cyrillic transliterator"
|
8
8
|
|
9
9
|
it "should transliterate Cyrillic characters" do
|
10
10
|
examples = {
|
11
|
-
"Алушта"
|
12
|
-
"Андрій"
|
13
|
-
"Борщагівка"
|
14
|
-
"Борисенко"
|
15
|
-
"Вінниця"
|
16
|
-
"Володимир"
|
17
|
-
"Гадяч"
|
18
|
-
"Богдан"
|
19
|
-
"Ґалаґан"
|
20
|
-
"Ґорґани"
|
21
|
-
"Донецьк"
|
22
|
-
"Дмитро"
|
23
|
-
"Рівне"
|
24
|
-
"Олег"
|
25
|
-
"Есмань"
|
26
|
-
"Єнакієве"
|
27
|
-
"Гаєвич"
|
28
|
-
"Короп'є"
|
29
|
-
"Житомир"
|
30
|
-
"Жанна"
|
31
|
-
"Жежелів"
|
32
|
-
"Закарпаття"
|
33
|
-
"Казимирчук"
|
34
|
-
"Медвин"
|
35
|
-
"Михайленко"
|
36
|
-
"Іванків"
|
37
|
-
"Іващенко"
|
38
|
-
"Їжакевич"
|
39
|
-
"Кадиївка"
|
40
|
-
"Мар'їне"
|
41
|
-
"Йосипівка"
|
42
|
-
"Стрий"
|
43
|
-
"Олексій"
|
44
|
-
"Київ"
|
45
|
-
"Коваленко"
|
46
|
-
"Лебедин"
|
47
|
-
"Леонід"
|
48
|
-
"Миколаїв"
|
49
|
-
"Маринич"
|
50
|
-
"Ніжин"
|
51
|
-
"Наталія"
|
52
|
-
"Одеса"
|
53
|
-
"Онищенко"
|
54
|
-
"Полтава"
|
55
|
-
"Петро"
|
11
|
+
"Алушта" => "Alushta",
|
12
|
+
"Андрій" => "Andrii",
|
13
|
+
"Борщагівка" => "Borshchahivka",
|
14
|
+
"Борисенко" => "Borysenko",
|
15
|
+
"Вінниця" => "Vinnytsia",
|
16
|
+
"Володимир" => "Volodymyr",
|
17
|
+
"Гадяч" => "Hadiach",
|
18
|
+
"Богдан" => "Bohdan",
|
19
|
+
"Ґалаґан" => "Galagan",
|
20
|
+
"Ґорґани" => "Gorgany",
|
21
|
+
"Донецьк" => "Donetsk",
|
22
|
+
"Дмитро" => "Dmytro",
|
23
|
+
"Рівне" => "Rivne",
|
24
|
+
"Олег" => "Oleh",
|
25
|
+
"Есмань" => "Esman",
|
26
|
+
"Єнакієве" => "Yenakiieve",
|
27
|
+
"Гаєвич" => "Haievych",
|
28
|
+
"Короп'є" => "Koropie",
|
29
|
+
"Житомир" => "Zhytomyr",
|
30
|
+
"Жанна" => "Zhanna",
|
31
|
+
"Жежелів" => "Zhezheliv",
|
32
|
+
"Закарпаття" => "Zakarpattia",
|
33
|
+
"Казимирчук" => "Kazymyrchuk",
|
34
|
+
"Медвин" => "Medvyn",
|
35
|
+
"Михайленко" => "Mykhailenko",
|
36
|
+
"Іванків" => "Ivankiv",
|
37
|
+
"Іващенко" => "Ivashchenko",
|
38
|
+
"Їжакевич" => "Yizhakevych",
|
39
|
+
"Кадиївка" => "Kadyivka",
|
40
|
+
"Мар'їне" => "Marine",
|
41
|
+
"Йосипівка" => "Yosypivka",
|
42
|
+
"Стрий" => "Stryi",
|
43
|
+
"Олексій" => "Oleksii",
|
44
|
+
"Київ" => "Kyiv",
|
45
|
+
"Коваленко" => "Kovalenko",
|
46
|
+
"Лебедин" => "Lebedyn",
|
47
|
+
"Леонід" => "Leonid",
|
48
|
+
"Миколаїв" => "Mykolaiv",
|
49
|
+
"Маринич" => "Marynych",
|
50
|
+
"Ніжин" => "Nizhyn",
|
51
|
+
"Наталія" => "Nataliia",
|
52
|
+
"Одеса" => "Odesa",
|
53
|
+
"Онищенко" => "Onyshchenko",
|
54
|
+
"Полтава" => "Poltava",
|
55
|
+
"Петро" => "Petro",
|
56
56
|
"Решетилівка" => "Reshetylivka",
|
57
57
|
"Рибчинський" => "Rybchynskyi",
|
58
|
-
"Суми"
|
59
|
-
"Соломія"
|
60
|
-
"Тернопіль"
|
61
|
-
"Троць"
|
62
|
-
"Ужгород"
|
63
|
-
"Уляна"
|
64
|
-
"Фастів"
|
65
|
-
"Філіпчук"
|
66
|
-
"Харків"
|
67
|
-
"Христина"
|
58
|
+
"Суми" => "Sumy",
|
59
|
+
"Соломія" => "Solomiia",
|
60
|
+
"Тернопіль" => "Ternopil",
|
61
|
+
"Троць" => "Trots",
|
62
|
+
"Ужгород" => "Uzhhorod",
|
63
|
+
"Уляна" => "Uliana",
|
64
|
+
"Фастів" => "Fastiv",
|
65
|
+
"Філіпчук" => "Filipchuk",
|
66
|
+
"Харків" => "Kharkiv",
|
67
|
+
"Христина" => "Khrystyna",
|
68
68
|
"Біла Церква" => "Bila Tserkva",
|
69
|
-
"Стеценко"
|
70
|
-
"Чернівці"
|
71
|
-
"Шевченко"
|
72
|
-
"Шостка"
|
73
|
-
"Кишеньки"
|
74
|
-
"Щербухи"
|
75
|
-
"Гоща"
|
76
|
-
"Гаращенко"
|
77
|
-
"Юрій"
|
78
|
-
"Корюківка"
|
79
|
-
"Яготин"
|
80
|
-
"Ярошенко"
|
81
|
-
"Костянтин"
|
82
|
-
"Знам'янка"
|
83
|
-
"Феодосія"
|
69
|
+
"Стеценко" => "Stetsenko",
|
70
|
+
"Чернівці" => "Chernivtsi",
|
71
|
+
"Шевченко" => "Shevchenko",
|
72
|
+
"Шостка" => "Shostka",
|
73
|
+
"Кишеньки" => "Kyshenky",
|
74
|
+
"Щербухи" => "Shcherbukhy",
|
75
|
+
"Гоща" => "Hoshcha",
|
76
|
+
"Гаращенко" => "Harashchenko",
|
77
|
+
"Юрій" => "Yurii",
|
78
|
+
"Корюківка" => "Koriukivka",
|
79
|
+
"Яготин" => "Yahotyn",
|
80
|
+
"Ярошенко" => "Yaroshenko",
|
81
|
+
"Костянтин" => "Kostiantyn",
|
82
|
+
"Знам'янка" => "Znamianka",
|
83
|
+
"Феодосія" => "Feodosiia"
|
84
84
|
}
|
85
85
|
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
86
86
|
end
|
87
|
-
|
88
|
-
end
|
87
|
+
end
|
@@ -1,18 +1,18 @@
|
|
1
|
-
#
|
2
|
-
require File.expand_path("../../spec_helper", __FILE__)
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require "spec_helper"
|
5
4
|
|
5
|
+
describe Babosa::Transliterator::Vietnamese do
|
6
6
|
let(:t) { described_class.instance }
|
7
7
|
it_behaves_like "a latin transliterator"
|
8
8
|
|
9
9
|
it "should transliterate various characters" do
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
examples.each {|k, v| expect(t.transliterate(k)).to eql(v)}
|
10
|
+
examples = {
|
11
|
+
"làm" => "lam",
|
12
|
+
"đàn ông" => "dan ong",
|
13
|
+
"thật" => "that",
|
14
|
+
"khổ" => "kho"
|
15
|
+
}
|
16
|
+
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
17
17
|
end
|
18
18
|
end
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: babosa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0.beta
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Norman Clarke
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -39,35 +39,21 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 3.7.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rake
|
42
|
+
name: rubocop
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
58
44
|
requirements:
|
59
45
|
- - ">="
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
47
|
+
version: 0.93.0
|
62
48
|
type: :development
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
52
|
- - ">="
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
54
|
+
version: 0.93.0
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
56
|
+
name: simplecov
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
72
58
|
requirements:
|
73
59
|
- - ">="
|
@@ -114,14 +100,8 @@ files:
|
|
114
100
|
- lib/babosa/transliterator/turkish.rb
|
115
101
|
- lib/babosa/transliterator/ukrainian.rb
|
116
102
|
- lib/babosa/transliterator/vietnamese.rb
|
117
|
-
- lib/babosa/utf8/active_support_proxy.rb
|
118
|
-
- lib/babosa/utf8/dumb_proxy.rb
|
119
|
-
- lib/babosa/utf8/java_proxy.rb
|
120
|
-
- lib/babosa/utf8/mappings.rb
|
121
|
-
- lib/babosa/utf8/proxy.rb
|
122
|
-
- lib/babosa/utf8/unicode_proxy.rb
|
123
103
|
- lib/babosa/version.rb
|
124
|
-
- spec/
|
104
|
+
- spec/identifier_spec.rb
|
125
105
|
- spec/spec_helper.rb
|
126
106
|
- spec/transliterators/base_spec.rb
|
127
107
|
- spec/transliterators/bulgarian_spec.rb
|
@@ -141,11 +121,10 @@ files:
|
|
141
121
|
- spec/transliterators/turkish_spec.rb
|
142
122
|
- spec/transliterators/ukrainian_spec.rb
|
143
123
|
- spec/transliterators/vietnamese_spec.rb
|
144
|
-
- spec/utf8_proxy_spec.rb
|
145
124
|
homepage: http://github.com/norman/babosa
|
146
125
|
licenses: []
|
147
126
|
metadata: {}
|
148
|
-
post_install_message:
|
127
|
+
post_install_message:
|
149
128
|
rdoc_options: []
|
150
129
|
require_paths:
|
151
130
|
- lib
|
@@ -153,15 +132,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
153
132
|
requirements:
|
154
133
|
- - ">="
|
155
134
|
- !ruby/object:Gem::Version
|
156
|
-
version: 2.
|
135
|
+
version: 2.5.0
|
157
136
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
158
137
|
requirements:
|
159
|
-
- - "
|
138
|
+
- - ">"
|
160
139
|
- !ruby/object:Gem::Version
|
161
|
-
version:
|
140
|
+
version: 1.3.1
|
162
141
|
requirements: []
|
163
|
-
rubygems_version: 3.1.
|
164
|
-
signing_key:
|
142
|
+
rubygems_version: 3.1.4
|
143
|
+
signing_key:
|
165
144
|
specification_version: 4
|
166
145
|
summary: A library for creating slugs.
|
167
146
|
test_files: []
|
@@ -1,38 +0,0 @@
|
|
1
|
-
require 'active_support'
|
2
|
-
require 'active_support/multibyte/unicode'
|
3
|
-
|
4
|
-
module Babosa
|
5
|
-
module UTF8
|
6
|
-
# A UTF-8 proxy using Active Support's multibyte support.
|
7
|
-
module ActiveSupportProxy
|
8
|
-
extend ActiveSupport::Multibyte::Unicode
|
9
|
-
extend self
|
10
|
-
|
11
|
-
def self.normalize_utf8(string)
|
12
|
-
normalize(string, :c)
|
13
|
-
end
|
14
|
-
|
15
|
-
if ActiveSupport::VERSION::MAJOR == 3
|
16
|
-
def downcase(string)
|
17
|
-
ActiveSupport::Multibyte::Chars.new(string).downcase.to_s
|
18
|
-
end
|
19
|
-
|
20
|
-
def upcase(string)
|
21
|
-
ActiveSupport::Multibyte::Chars.new(string).upcase.to_s
|
22
|
-
end
|
23
|
-
elsif ActiveSupport::VERSION::MAJOR >= 6
|
24
|
-
def self.normalize_utf8(string)
|
25
|
-
string.unicode_normalize(:nfc).to_s
|
26
|
-
end
|
27
|
-
|
28
|
-
def downcase(string)
|
29
|
-
string.downcase.to_s
|
30
|
-
end
|
31
|
-
|
32
|
-
def upcase(string)
|
33
|
-
string.upcase.to_s
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
@@ -1,49 +0,0 @@
|
|
1
|
-
require File.expand_path("../mappings", __FILE__)
|
2
|
-
|
3
|
-
module Babosa
|
4
|
-
module UTF8
|
5
|
-
|
6
|
-
# This module provides fallback UTF-8 support when nothing else is
|
7
|
-
# available. It does case folding for Roman alphabet-based characters
|
8
|
-
# commonly used by Western European languages and little else, making it
|
9
|
-
# useless for Russian, Bulgarian, Greek, etc. If at all possible, Unicode
|
10
|
-
# or ActiveSupport should be used instead because they support the full
|
11
|
-
# UTF-8 character range.
|
12
|
-
module DumbProxy
|
13
|
-
extend Proxy
|
14
|
-
extend self
|
15
|
-
|
16
|
-
def downcase(string)
|
17
|
-
string.downcase.unpack("U*").map {|char| Mappings::DOWNCASE[char] or char}.flatten.pack("U*")
|
18
|
-
end
|
19
|
-
|
20
|
-
def upcase(string)
|
21
|
-
string.upcase.unpack("U*").map {|char| Mappings::UPCASE[char] or char}.flatten.pack("U*")
|
22
|
-
end
|
23
|
-
|
24
|
-
if ''.respond_to?(:unicode_normalize)
|
25
|
-
def normalize_utf8(string)
|
26
|
-
string.unicode_normalize
|
27
|
-
end
|
28
|
-
else
|
29
|
-
# On Ruby 2.2, this uses the native Unicode normalize method. On all
|
30
|
-
# other Rubies, it does a very naive Unicode normalization, which should
|
31
|
-
# work for this library's purposes (i.e., Roman-based codepoints, up to
|
32
|
-
# U+017E). Do not use reuse this as a general solution! Use a real
|
33
|
-
# library like Unicode or ActiveSupport instead.
|
34
|
-
def normalize_utf8(string)
|
35
|
-
codepoints = string.unpack("U*")
|
36
|
-
new = []
|
37
|
-
until codepoints.empty? do
|
38
|
-
if Mappings::COMPOSITION[codepoints[0..1]]
|
39
|
-
new << Mappings::COMPOSITION[codepoints.slice!(0,2)]
|
40
|
-
else
|
41
|
-
new << codepoints.shift
|
42
|
-
end
|
43
|
-
end
|
44
|
-
new.compact.flatten.pack("U*")
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|