babosa 1.0.4 → 2.0.0.beta
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Changelog.md +12 -0
- data/README.md +80 -117
- data/Rakefile +9 -8
- data/lib/babosa.rb +2 -4
- data/lib/babosa/identifier.rb +82 -121
- data/lib/babosa/transliterator/base.rb +57 -56
- data/lib/babosa/transliterator/bulgarian.rb +3 -2
- data/lib/babosa/transliterator/cyrillic.rb +5 -5
- data/lib/babosa/transliterator/danish.rb +3 -3
- data/lib/babosa/transliterator/german.rb +3 -2
- data/lib/babosa/transliterator/greek.rb +4 -3
- data/lib/babosa/transliterator/hindi.rb +3 -2
- data/lib/babosa/transliterator/latin.rb +5 -5
- data/lib/babosa/transliterator/macedonian.rb +3 -2
- data/lib/babosa/transliterator/norwegian.rb +3 -3
- data/lib/babosa/transliterator/romanian.rb +3 -2
- data/lib/babosa/transliterator/russian.rb +3 -2
- data/lib/babosa/transliterator/serbian.rb +29 -27
- data/lib/babosa/transliterator/spanish.rb +2 -2
- data/lib/babosa/transliterator/swedish.rb +3 -3
- data/lib/babosa/transliterator/turkish.rb +8 -8
- data/lib/babosa/transliterator/ukrainian.rb +5 -4
- data/lib/babosa/transliterator/vietnamese.rb +4 -3
- data/lib/babosa/version.rb +3 -1
- data/spec/{babosa_spec.rb → identifier_spec.rb} +9 -10
- data/spec/spec_helper.rb +6 -6
- data/spec/transliterators/base_spec.rb +5 -6
- data/spec/transliterators/bulgarian_spec.rb +4 -5
- data/spec/transliterators/danish_spec.rb +5 -6
- data/spec/transliterators/german_spec.rb +4 -5
- data/spec/transliterators/greek_spec.rb +7 -7
- data/spec/transliterators/hindi_spec.rb +7 -7
- data/spec/transliterators/latin_spec.rb +3 -4
- data/spec/transliterators/macedonian_spec.rb +3 -4
- data/spec/transliterators/norwegian_spec.rb +4 -4
- data/spec/transliterators/polish_spec.rb +3 -5
- data/spec/transliterators/romanian_spec.rb +5 -6
- data/spec/transliterators/russian_spec.rb +3 -4
- data/spec/transliterators/serbian_spec.rb +6 -7
- data/spec/transliterators/spanish_spec.rb +4 -5
- data/spec/transliterators/swedish_spec.rb +7 -7
- data/spec/transliterators/turkish_spec.rb +24 -24
- data/spec/transliterators/ukrainian_spec.rb +74 -75
- data/spec/transliterators/vietnamese_spec.rb +10 -10
- metadata +17 -38
- data/lib/babosa/utf8/active_support_proxy.rb +0 -38
- data/lib/babosa/utf8/dumb_proxy.rb +0 -49
- data/lib/babosa/utf8/java_proxy.rb +0 -22
- data/lib/babosa/utf8/mappings.rb +0 -193
- data/lib/babosa/utf8/proxy.rb +0 -125
- data/lib/babosa/utf8/unicode_proxy.rb +0 -23
- data/spec/utf8_proxy_spec.rb +0 -52
@@ -1,24 +1,24 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
let(:t) { described_class.instance }
|
7
|
-
it_behaves_like "a latin transliterator"
|
8
|
-
|
9
|
-
it "should transliterate various characters" do
|
10
|
-
examples = {
|
11
|
-
"Nâzım" => "Nazim",
|
12
|
-
"sükûnet" => "sukunet",
|
13
|
-
"millîleştirmek" => "millilestirmek",
|
14
|
-
"mêmur" => "memur",
|
15
|
-
"lôkman" => "lokman",
|
16
|
-
"yoğurt" => "yogurt",
|
17
|
-
"şair" => "sair",
|
18
|
-
"İzmir" => "Izmir",
|
19
|
-
"yığın" => "yigin",
|
20
|
-
"çarşı" => "carsi"
|
21
|
-
}
|
22
|
-
examples.each {|k, v| expect(t.transliterate(k)).to eql(v)}
|
23
|
-
end
|
24
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe Babosa::Transliterator::Turkish do
|
6
|
+
let(:t) { described_class.instance }
|
7
|
+
it_behaves_like "a latin transliterator"
|
8
|
+
|
9
|
+
it "should transliterate various characters" do
|
10
|
+
examples = {
|
11
|
+
"Nâzım" => "Nazim",
|
12
|
+
"sükûnet" => "sukunet",
|
13
|
+
"millîleştirmek" => "millilestirmek",
|
14
|
+
"mêmur" => "memur",
|
15
|
+
"lôkman" => "lokman",
|
16
|
+
"yoğurt" => "yogurt",
|
17
|
+
"şair" => "sair",
|
18
|
+
"İzmir" => "Izmir",
|
19
|
+
"yığın" => "yigin",
|
20
|
+
"çarşı" => "carsi"
|
21
|
+
}
|
22
|
+
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
23
|
+
end
|
24
|
+
end
|
@@ -1,88 +1,87 @@
|
|
1
|
-
#
|
2
|
-
require File.expand_path("../../spec_helper", __FILE__)
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require "spec_helper"
|
5
4
|
|
5
|
+
describe Babosa::Transliterator::Ukrainian do
|
6
6
|
let(:t) { described_class.instance }
|
7
7
|
it_behaves_like "a cyrillic transliterator"
|
8
8
|
|
9
9
|
it "should transliterate Cyrillic characters" do
|
10
10
|
examples = {
|
11
|
-
"Алушта"
|
12
|
-
"Андрій"
|
13
|
-
"Борщагівка"
|
14
|
-
"Борисенко"
|
15
|
-
"Вінниця"
|
16
|
-
"Володимир"
|
17
|
-
"Гадяч"
|
18
|
-
"Богдан"
|
19
|
-
"Ґалаґан"
|
20
|
-
"Ґорґани"
|
21
|
-
"Донецьк"
|
22
|
-
"Дмитро"
|
23
|
-
"Рівне"
|
24
|
-
"Олег"
|
25
|
-
"Есмань"
|
26
|
-
"Єнакієве"
|
27
|
-
"Гаєвич"
|
28
|
-
"Короп'є"
|
29
|
-
"Житомир"
|
30
|
-
"Жанна"
|
31
|
-
"Жежелів"
|
32
|
-
"Закарпаття"
|
33
|
-
"Казимирчук"
|
34
|
-
"Медвин"
|
35
|
-
"Михайленко"
|
36
|
-
"Іванків"
|
37
|
-
"Іващенко"
|
38
|
-
"Їжакевич"
|
39
|
-
"Кадиївка"
|
40
|
-
"Мар'їне"
|
41
|
-
"Йосипівка"
|
42
|
-
"Стрий"
|
43
|
-
"Олексій"
|
44
|
-
"Київ"
|
45
|
-
"Коваленко"
|
46
|
-
"Лебедин"
|
47
|
-
"Леонід"
|
48
|
-
"Миколаїв"
|
49
|
-
"Маринич"
|
50
|
-
"Ніжин"
|
51
|
-
"Наталія"
|
52
|
-
"Одеса"
|
53
|
-
"Онищенко"
|
54
|
-
"Полтава"
|
55
|
-
"Петро"
|
11
|
+
"Алушта" => "Alushta",
|
12
|
+
"Андрій" => "Andrii",
|
13
|
+
"Борщагівка" => "Borshchahivka",
|
14
|
+
"Борисенко" => "Borysenko",
|
15
|
+
"Вінниця" => "Vinnytsia",
|
16
|
+
"Володимир" => "Volodymyr",
|
17
|
+
"Гадяч" => "Hadiach",
|
18
|
+
"Богдан" => "Bohdan",
|
19
|
+
"Ґалаґан" => "Galagan",
|
20
|
+
"Ґорґани" => "Gorgany",
|
21
|
+
"Донецьк" => "Donetsk",
|
22
|
+
"Дмитро" => "Dmytro",
|
23
|
+
"Рівне" => "Rivne",
|
24
|
+
"Олег" => "Oleh",
|
25
|
+
"Есмань" => "Esman",
|
26
|
+
"Єнакієве" => "Yenakiieve",
|
27
|
+
"Гаєвич" => "Haievych",
|
28
|
+
"Короп'є" => "Koropie",
|
29
|
+
"Житомир" => "Zhytomyr",
|
30
|
+
"Жанна" => "Zhanna",
|
31
|
+
"Жежелів" => "Zhezheliv",
|
32
|
+
"Закарпаття" => "Zakarpattia",
|
33
|
+
"Казимирчук" => "Kazymyrchuk",
|
34
|
+
"Медвин" => "Medvyn",
|
35
|
+
"Михайленко" => "Mykhailenko",
|
36
|
+
"Іванків" => "Ivankiv",
|
37
|
+
"Іващенко" => "Ivashchenko",
|
38
|
+
"Їжакевич" => "Yizhakevych",
|
39
|
+
"Кадиївка" => "Kadyivka",
|
40
|
+
"Мар'їне" => "Marine",
|
41
|
+
"Йосипівка" => "Yosypivka",
|
42
|
+
"Стрий" => "Stryi",
|
43
|
+
"Олексій" => "Oleksii",
|
44
|
+
"Київ" => "Kyiv",
|
45
|
+
"Коваленко" => "Kovalenko",
|
46
|
+
"Лебедин" => "Lebedyn",
|
47
|
+
"Леонід" => "Leonid",
|
48
|
+
"Миколаїв" => "Mykolaiv",
|
49
|
+
"Маринич" => "Marynych",
|
50
|
+
"Ніжин" => "Nizhyn",
|
51
|
+
"Наталія" => "Nataliia",
|
52
|
+
"Одеса" => "Odesa",
|
53
|
+
"Онищенко" => "Onyshchenko",
|
54
|
+
"Полтава" => "Poltava",
|
55
|
+
"Петро" => "Petro",
|
56
56
|
"Решетилівка" => "Reshetylivka",
|
57
57
|
"Рибчинський" => "Rybchynskyi",
|
58
|
-
"Суми"
|
59
|
-
"Соломія"
|
60
|
-
"Тернопіль"
|
61
|
-
"Троць"
|
62
|
-
"Ужгород"
|
63
|
-
"Уляна"
|
64
|
-
"Фастів"
|
65
|
-
"Філіпчук"
|
66
|
-
"Харків"
|
67
|
-
"Христина"
|
58
|
+
"Суми" => "Sumy",
|
59
|
+
"Соломія" => "Solomiia",
|
60
|
+
"Тернопіль" => "Ternopil",
|
61
|
+
"Троць" => "Trots",
|
62
|
+
"Ужгород" => "Uzhhorod",
|
63
|
+
"Уляна" => "Uliana",
|
64
|
+
"Фастів" => "Fastiv",
|
65
|
+
"Філіпчук" => "Filipchuk",
|
66
|
+
"Харків" => "Kharkiv",
|
67
|
+
"Христина" => "Khrystyna",
|
68
68
|
"Біла Церква" => "Bila Tserkva",
|
69
|
-
"Стеценко"
|
70
|
-
"Чернівці"
|
71
|
-
"Шевченко"
|
72
|
-
"Шостка"
|
73
|
-
"Кишеньки"
|
74
|
-
"Щербухи"
|
75
|
-
"Гоща"
|
76
|
-
"Гаращенко"
|
77
|
-
"Юрій"
|
78
|
-
"Корюківка"
|
79
|
-
"Яготин"
|
80
|
-
"Ярошенко"
|
81
|
-
"Костянтин"
|
82
|
-
"Знам'янка"
|
83
|
-
"Феодосія"
|
69
|
+
"Стеценко" => "Stetsenko",
|
70
|
+
"Чернівці" => "Chernivtsi",
|
71
|
+
"Шевченко" => "Shevchenko",
|
72
|
+
"Шостка" => "Shostka",
|
73
|
+
"Кишеньки" => "Kyshenky",
|
74
|
+
"Щербухи" => "Shcherbukhy",
|
75
|
+
"Гоща" => "Hoshcha",
|
76
|
+
"Гаращенко" => "Harashchenko",
|
77
|
+
"Юрій" => "Yurii",
|
78
|
+
"Корюківка" => "Koriukivka",
|
79
|
+
"Яготин" => "Yahotyn",
|
80
|
+
"Ярошенко" => "Yaroshenko",
|
81
|
+
"Костянтин" => "Kostiantyn",
|
82
|
+
"Знам'янка" => "Znamianka",
|
83
|
+
"Феодосія" => "Feodosiia"
|
84
84
|
}
|
85
85
|
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
86
86
|
end
|
87
|
-
|
88
|
-
end
|
87
|
+
end
|
@@ -1,18 +1,18 @@
|
|
1
|
-
#
|
2
|
-
require File.expand_path("../../spec_helper", __FILE__)
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require "spec_helper"
|
5
4
|
|
5
|
+
describe Babosa::Transliterator::Vietnamese do
|
6
6
|
let(:t) { described_class.instance }
|
7
7
|
it_behaves_like "a latin transliterator"
|
8
8
|
|
9
9
|
it "should transliterate various characters" do
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
examples.each {|k, v| expect(t.transliterate(k)).to eql(v)}
|
10
|
+
examples = {
|
11
|
+
"làm" => "lam",
|
12
|
+
"đàn ông" => "dan ong",
|
13
|
+
"thật" => "that",
|
14
|
+
"khổ" => "kho"
|
15
|
+
}
|
16
|
+
examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
|
17
17
|
end
|
18
18
|
end
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: babosa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0.beta
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Norman Clarke
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -39,35 +39,21 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 3.7.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rake
|
42
|
+
name: rubocop
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
58
44
|
requirements:
|
59
45
|
- - ">="
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
47
|
+
version: 0.93.0
|
62
48
|
type: :development
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
52
|
- - ">="
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
54
|
+
version: 0.93.0
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
56
|
+
name: simplecov
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
72
58
|
requirements:
|
73
59
|
- - ">="
|
@@ -114,14 +100,8 @@ files:
|
|
114
100
|
- lib/babosa/transliterator/turkish.rb
|
115
101
|
- lib/babosa/transliterator/ukrainian.rb
|
116
102
|
- lib/babosa/transliterator/vietnamese.rb
|
117
|
-
- lib/babosa/utf8/active_support_proxy.rb
|
118
|
-
- lib/babosa/utf8/dumb_proxy.rb
|
119
|
-
- lib/babosa/utf8/java_proxy.rb
|
120
|
-
- lib/babosa/utf8/mappings.rb
|
121
|
-
- lib/babosa/utf8/proxy.rb
|
122
|
-
- lib/babosa/utf8/unicode_proxy.rb
|
123
103
|
- lib/babosa/version.rb
|
124
|
-
- spec/
|
104
|
+
- spec/identifier_spec.rb
|
125
105
|
- spec/spec_helper.rb
|
126
106
|
- spec/transliterators/base_spec.rb
|
127
107
|
- spec/transliterators/bulgarian_spec.rb
|
@@ -141,11 +121,10 @@ files:
|
|
141
121
|
- spec/transliterators/turkish_spec.rb
|
142
122
|
- spec/transliterators/ukrainian_spec.rb
|
143
123
|
- spec/transliterators/vietnamese_spec.rb
|
144
|
-
- spec/utf8_proxy_spec.rb
|
145
124
|
homepage: http://github.com/norman/babosa
|
146
125
|
licenses: []
|
147
126
|
metadata: {}
|
148
|
-
post_install_message:
|
127
|
+
post_install_message:
|
149
128
|
rdoc_options: []
|
150
129
|
require_paths:
|
151
130
|
- lib
|
@@ -153,15 +132,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
153
132
|
requirements:
|
154
133
|
- - ">="
|
155
134
|
- !ruby/object:Gem::Version
|
156
|
-
version: 2.
|
135
|
+
version: 2.5.0
|
157
136
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
158
137
|
requirements:
|
159
|
-
- - "
|
138
|
+
- - ">"
|
160
139
|
- !ruby/object:Gem::Version
|
161
|
-
version:
|
140
|
+
version: 1.3.1
|
162
141
|
requirements: []
|
163
|
-
rubygems_version: 3.1.
|
164
|
-
signing_key:
|
142
|
+
rubygems_version: 3.1.4
|
143
|
+
signing_key:
|
165
144
|
specification_version: 4
|
166
145
|
summary: A library for creating slugs.
|
167
146
|
test_files: []
|
@@ -1,38 +0,0 @@
|
|
1
|
-
require 'active_support'
|
2
|
-
require 'active_support/multibyte/unicode'
|
3
|
-
|
4
|
-
module Babosa
|
5
|
-
module UTF8
|
6
|
-
# A UTF-8 proxy using Active Support's multibyte support.
|
7
|
-
module ActiveSupportProxy
|
8
|
-
extend ActiveSupport::Multibyte::Unicode
|
9
|
-
extend self
|
10
|
-
|
11
|
-
def self.normalize_utf8(string)
|
12
|
-
normalize(string, :c)
|
13
|
-
end
|
14
|
-
|
15
|
-
if ActiveSupport::VERSION::MAJOR == 3
|
16
|
-
def downcase(string)
|
17
|
-
ActiveSupport::Multibyte::Chars.new(string).downcase.to_s
|
18
|
-
end
|
19
|
-
|
20
|
-
def upcase(string)
|
21
|
-
ActiveSupport::Multibyte::Chars.new(string).upcase.to_s
|
22
|
-
end
|
23
|
-
elsif ActiveSupport::VERSION::MAJOR >= 6
|
24
|
-
def self.normalize_utf8(string)
|
25
|
-
string.unicode_normalize(:nfc).to_s
|
26
|
-
end
|
27
|
-
|
28
|
-
def downcase(string)
|
29
|
-
string.downcase.to_s
|
30
|
-
end
|
31
|
-
|
32
|
-
def upcase(string)
|
33
|
-
string.upcase.to_s
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
@@ -1,49 +0,0 @@
|
|
1
|
-
require File.expand_path("../mappings", __FILE__)
|
2
|
-
|
3
|
-
module Babosa
|
4
|
-
module UTF8
|
5
|
-
|
6
|
-
# This module provides fallback UTF-8 support when nothing else is
|
7
|
-
# available. It does case folding for Roman alphabet-based characters
|
8
|
-
# commonly used by Western European languages and little else, making it
|
9
|
-
# useless for Russian, Bulgarian, Greek, etc. If at all possible, Unicode
|
10
|
-
# or ActiveSupport should be used instead because they support the full
|
11
|
-
# UTF-8 character range.
|
12
|
-
module DumbProxy
|
13
|
-
extend Proxy
|
14
|
-
extend self
|
15
|
-
|
16
|
-
def downcase(string)
|
17
|
-
string.downcase.unpack("U*").map {|char| Mappings::DOWNCASE[char] or char}.flatten.pack("U*")
|
18
|
-
end
|
19
|
-
|
20
|
-
def upcase(string)
|
21
|
-
string.upcase.unpack("U*").map {|char| Mappings::UPCASE[char] or char}.flatten.pack("U*")
|
22
|
-
end
|
23
|
-
|
24
|
-
if ''.respond_to?(:unicode_normalize)
|
25
|
-
def normalize_utf8(string)
|
26
|
-
string.unicode_normalize
|
27
|
-
end
|
28
|
-
else
|
29
|
-
# On Ruby 2.2, this uses the native Unicode normalize method. On all
|
30
|
-
# other Rubies, it does a very naive Unicode normalization, which should
|
31
|
-
# work for this library's purposes (i.e., Roman-based codepoints, up to
|
32
|
-
# U+017E). Do not use reuse this as a general solution! Use a real
|
33
|
-
# library like Unicode or ActiveSupport instead.
|
34
|
-
def normalize_utf8(string)
|
35
|
-
codepoints = string.unpack("U*")
|
36
|
-
new = []
|
37
|
-
until codepoints.empty? do
|
38
|
-
if Mappings::COMPOSITION[codepoints[0..1]]
|
39
|
-
new << Mappings::COMPOSITION[codepoints.slice!(0,2)]
|
40
|
-
else
|
41
|
-
new << codepoints.shift
|
42
|
-
end
|
43
|
-
end
|
44
|
-
new.compact.flatten.pack("U*")
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|