babosa 1.0.4 → 2.0.0.beta

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/Changelog.md +12 -0
  3. data/README.md +80 -117
  4. data/Rakefile +9 -8
  5. data/lib/babosa.rb +2 -4
  6. data/lib/babosa/identifier.rb +82 -121
  7. data/lib/babosa/transliterator/base.rb +57 -56
  8. data/lib/babosa/transliterator/bulgarian.rb +3 -2
  9. data/lib/babosa/transliterator/cyrillic.rb +5 -5
  10. data/lib/babosa/transliterator/danish.rb +3 -3
  11. data/lib/babosa/transliterator/german.rb +3 -2
  12. data/lib/babosa/transliterator/greek.rb +4 -3
  13. data/lib/babosa/transliterator/hindi.rb +3 -2
  14. data/lib/babosa/transliterator/latin.rb +5 -5
  15. data/lib/babosa/transliterator/macedonian.rb +3 -2
  16. data/lib/babosa/transliterator/norwegian.rb +3 -3
  17. data/lib/babosa/transliterator/romanian.rb +3 -2
  18. data/lib/babosa/transliterator/russian.rb +3 -2
  19. data/lib/babosa/transliterator/serbian.rb +29 -27
  20. data/lib/babosa/transliterator/spanish.rb +2 -2
  21. data/lib/babosa/transliterator/swedish.rb +3 -3
  22. data/lib/babosa/transliterator/turkish.rb +8 -8
  23. data/lib/babosa/transliterator/ukrainian.rb +5 -4
  24. data/lib/babosa/transliterator/vietnamese.rb +4 -3
  25. data/lib/babosa/version.rb +3 -1
  26. data/spec/{babosa_spec.rb → identifier_spec.rb} +9 -10
  27. data/spec/spec_helper.rb +6 -6
  28. data/spec/transliterators/base_spec.rb +5 -6
  29. data/spec/transliterators/bulgarian_spec.rb +4 -5
  30. data/spec/transliterators/danish_spec.rb +5 -6
  31. data/spec/transliterators/german_spec.rb +4 -5
  32. data/spec/transliterators/greek_spec.rb +7 -7
  33. data/spec/transliterators/hindi_spec.rb +7 -7
  34. data/spec/transliterators/latin_spec.rb +3 -4
  35. data/spec/transliterators/macedonian_spec.rb +3 -4
  36. data/spec/transliterators/norwegian_spec.rb +4 -4
  37. data/spec/transliterators/polish_spec.rb +3 -5
  38. data/spec/transliterators/romanian_spec.rb +5 -6
  39. data/spec/transliterators/russian_spec.rb +3 -4
  40. data/spec/transliterators/serbian_spec.rb +6 -7
  41. data/spec/transliterators/spanish_spec.rb +4 -5
  42. data/spec/transliterators/swedish_spec.rb +7 -7
  43. data/spec/transliterators/turkish_spec.rb +24 -24
  44. data/spec/transliterators/ukrainian_spec.rb +74 -75
  45. data/spec/transliterators/vietnamese_spec.rb +10 -10
  46. metadata +17 -38
  47. data/lib/babosa/utf8/active_support_proxy.rb +0 -38
  48. data/lib/babosa/utf8/dumb_proxy.rb +0 -49
  49. data/lib/babosa/utf8/java_proxy.rb +0 -22
  50. data/lib/babosa/utf8/mappings.rb +0 -193
  51. data/lib/babosa/utf8/proxy.rb +0 -125
  52. data/lib/babosa/utf8/unicode_proxy.rb +0 -23
  53. data/spec/utf8_proxy_spec.rb +0 -52
@@ -1,24 +1,24 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
3
-
4
- describe Babosa::Transliterator::Turkish do
5
-
6
- let(:t) { described_class.instance }
7
- it_behaves_like "a latin transliterator"
8
-
9
- it "should transliterate various characters" do
10
- examples = {
11
- "Nâzım" => "Nazim",
12
- "sükûnet" => "sukunet",
13
- "millîleştirmek" => "millilestirmek",
14
- "mêmur" => "memur",
15
- "lôkman" => "lokman",
16
- "yoğurt" => "yogurt",
17
- "şair" => "sair",
18
- "İzmir" => "Izmir",
19
- "yığın" => "yigin",
20
- "çarşı" => "carsi"
21
- }
22
- examples.each {|k, v| expect(t.transliterate(k)).to eql(v)}
23
- end
24
- end
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ describe Babosa::Transliterator::Turkish do
6
+ let(:t) { described_class.instance }
7
+ it_behaves_like "a latin transliterator"
8
+
9
+ it "should transliterate various characters" do
10
+ examples = {
11
+ "Nâzım" => "Nazim",
12
+ "sükûnet" => "sukunet",
13
+ "millîleştirmek" => "millilestirmek",
14
+ "mêmur" => "memur",
15
+ "lôkman" => "lokman",
16
+ "yoğurt" => "yogurt",
17
+ "şair" => "sair",
18
+ "İzmir" => "Izmir",
19
+ "yığın" => "yigin",
20
+ "çarşı" => "carsi"
21
+ }
22
+ examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
23
+ end
24
+ end
@@ -1,88 +1,87 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
1
+ # frozen_string_literal: true
3
2
 
4
- describe Babosa::Transliterator::Ukrainian do
3
+ require "spec_helper"
5
4
 
5
+ describe Babosa::Transliterator::Ukrainian do
6
6
  let(:t) { described_class.instance }
7
7
  it_behaves_like "a cyrillic transliterator"
8
8
 
9
9
  it "should transliterate Cyrillic characters" do
10
10
  examples = {
11
- "Алушта" => "Alushta",
12
- "Андрій" => "Andrii",
13
- "Борщагівка" => "Borshchahivka",
14
- "Борисенко" => "Borysenko",
15
- "Вінниця" => "Vinnytsia",
16
- "Володимир" => "Volodymyr",
17
- "Гадяч" => "Hadiach",
18
- "Богдан" => "Bohdan",
19
- "Ґалаґан" => "Galagan",
20
- "Ґорґани" => "Gorgany",
21
- "Донецьк" => "Donetsk",
22
- "Дмитро" => "Dmytro",
23
- "Рівне" => "Rivne",
24
- "Олег" => "Oleh",
25
- "Есмань" => "Esman",
26
- "Єнакієве" => "Yenakiieve",
27
- "Гаєвич" => "Haievych",
28
- "Короп'є" => "Koropie",
29
- "Житомир" => "Zhytomyr",
30
- "Жанна" => "Zhanna",
31
- "Жежелів" => "Zhezheliv",
32
- "Закарпаття" => "Zakarpattia",
33
- "Казимирчук" => "Kazymyrchuk",
34
- "Медвин" => "Medvyn",
35
- "Михайленко" => "Mykhailenko",
36
- "Іванків" => "Ivankiv",
37
- "Іващенко" => "Ivashchenko",
38
- "Їжакевич" => "Yizhakevych",
39
- "Кадиївка" => "Kadyivka",
40
- "Мар'їне" => "Marine",
41
- "Йосипівка" => "Yosypivka",
42
- "Стрий" => "Stryi",
43
- "Олексій" => "Oleksii",
44
- "Київ" => "Kyiv",
45
- "Коваленко" => "Kovalenko",
46
- "Лебедин" => "Lebedyn",
47
- "Леонід" => "Leonid",
48
- "Миколаїв" => "Mykolaiv",
49
- "Маринич" => "Marynych",
50
- "Ніжин" => "Nizhyn",
51
- "Наталія" => "Nataliia",
52
- "Одеса" => "Odesa",
53
- "Онищенко" => "Onyshchenko",
54
- "Полтава" => "Poltava",
55
- "Петро" => "Petro",
11
+ "Алушта" => "Alushta",
12
+ "Андрій" => "Andrii",
13
+ "Борщагівка" => "Borshchahivka",
14
+ "Борисенко" => "Borysenko",
15
+ "Вінниця" => "Vinnytsia",
16
+ "Володимир" => "Volodymyr",
17
+ "Гадяч" => "Hadiach",
18
+ "Богдан" => "Bohdan",
19
+ "Ґалаґан" => "Galagan",
20
+ "Ґорґани" => "Gorgany",
21
+ "Донецьк" => "Donetsk",
22
+ "Дмитро" => "Dmytro",
23
+ "Рівне" => "Rivne",
24
+ "Олег" => "Oleh",
25
+ "Есмань" => "Esman",
26
+ "Єнакієве" => "Yenakiieve",
27
+ "Гаєвич" => "Haievych",
28
+ "Короп'є" => "Koropie",
29
+ "Житомир" => "Zhytomyr",
30
+ "Жанна" => "Zhanna",
31
+ "Жежелів" => "Zhezheliv",
32
+ "Закарпаття" => "Zakarpattia",
33
+ "Казимирчук" => "Kazymyrchuk",
34
+ "Медвин" => "Medvyn",
35
+ "Михайленко" => "Mykhailenko",
36
+ "Іванків" => "Ivankiv",
37
+ "Іващенко" => "Ivashchenko",
38
+ "Їжакевич" => "Yizhakevych",
39
+ "Кадиївка" => "Kadyivka",
40
+ "Мар'їне" => "Marine",
41
+ "Йосипівка" => "Yosypivka",
42
+ "Стрий" => "Stryi",
43
+ "Олексій" => "Oleksii",
44
+ "Київ" => "Kyiv",
45
+ "Коваленко" => "Kovalenko",
46
+ "Лебедин" => "Lebedyn",
47
+ "Леонід" => "Leonid",
48
+ "Миколаїв" => "Mykolaiv",
49
+ "Маринич" => "Marynych",
50
+ "Ніжин" => "Nizhyn",
51
+ "Наталія" => "Nataliia",
52
+ "Одеса" => "Odesa",
53
+ "Онищенко" => "Onyshchenko",
54
+ "Полтава" => "Poltava",
55
+ "Петро" => "Petro",
56
56
  "Решетилівка" => "Reshetylivka",
57
57
  "Рибчинський" => "Rybchynskyi",
58
- "Суми" => "Sumy",
59
- "Соломія" => "Solomiia",
60
- "Тернопіль" => "Ternopil",
61
- "Троць" => "Trots",
62
- "Ужгород" => "Uzhhorod",
63
- "Уляна" => "Uliana",
64
- "Фастів" => "Fastiv",
65
- "Філіпчук" => "Filipchuk",
66
- "Харків" => "Kharkiv",
67
- "Христина" => "Khrystyna",
58
+ "Суми" => "Sumy",
59
+ "Соломія" => "Solomiia",
60
+ "Тернопіль" => "Ternopil",
61
+ "Троць" => "Trots",
62
+ "Ужгород" => "Uzhhorod",
63
+ "Уляна" => "Uliana",
64
+ "Фастів" => "Fastiv",
65
+ "Філіпчук" => "Filipchuk",
66
+ "Харків" => "Kharkiv",
67
+ "Христина" => "Khrystyna",
68
68
  "Біла Церква" => "Bila Tserkva",
69
- "Стеценко" => "Stetsenko",
70
- "Чернівці" => "Chernivtsi",
71
- "Шевченко" => "Shevchenko",
72
- "Шостка" => "Shostka",
73
- "Кишеньки" => "Kyshenky",
74
- "Щербухи" => "Shcherbukhy",
75
- "Гоща" => "Hoshcha",
76
- "Гаращенко" => "Harashchenko",
77
- "Юрій" => "Yurii",
78
- "Корюківка" => "Koriukivka",
79
- "Яготин" => "Yahotyn",
80
- "Ярошенко" => "Yaroshenko",
81
- "Костянтин" => "Kostiantyn",
82
- "Знам'янка" => "Znamianka",
83
- "Феодосія" => "Feodosiia"
69
+ "Стеценко" => "Stetsenko",
70
+ "Чернівці" => "Chernivtsi",
71
+ "Шевченко" => "Shevchenko",
72
+ "Шостка" => "Shostka",
73
+ "Кишеньки" => "Kyshenky",
74
+ "Щербухи" => "Shcherbukhy",
75
+ "Гоща" => "Hoshcha",
76
+ "Гаращенко" => "Harashchenko",
77
+ "Юрій" => "Yurii",
78
+ "Корюківка" => "Koriukivka",
79
+ "Яготин" => "Yahotyn",
80
+ "Ярошенко" => "Yaroshenko",
81
+ "Костянтин" => "Kostiantyn",
82
+ "Знам'янка" => "Znamianka",
83
+ "Феодосія" => "Feodosiia"
84
84
  }
85
85
  examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
86
86
  end
87
-
88
- end
87
+ end
@@ -1,18 +1,18 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
1
+ # frozen_string_literal: true
3
2
 
4
- describe Babosa::Transliterator::Vietnamese do
3
+ require "spec_helper"
5
4
 
5
+ describe Babosa::Transliterator::Vietnamese do
6
6
  let(:t) { described_class.instance }
7
7
  it_behaves_like "a latin transliterator"
8
8
 
9
9
  it "should transliterate various characters" do
10
- examples = {
11
- "làm" => "lam",
12
- "đàn ông" => "dan ong",
13
- "thật" => "that",
14
- "khổ" => "kho"
15
- }
16
- examples.each {|k, v| expect(t.transliterate(k)).to eql(v)}
10
+ examples = {
11
+ "làm" => "lam",
12
+ "đàn ông" => "dan ong",
13
+ "thật" => "that",
14
+ "khổ" => "kho"
15
+ }
16
+ examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
17
17
  end
18
18
  end
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: babosa
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 2.0.0.beta
5
5
  platform: ruby
6
6
  authors:
7
7
  - Norman Clarke
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-06 00:00:00.000000000 Z
11
+ date: 2020-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: activesupport
14
+ name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 3.2.0
19
+ version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 3.2.0
26
+ version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -39,35 +39,21 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: 3.7.0
41
41
  - !ruby/object:Gem::Dependency
42
- name: simplecov
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: rake
42
+ name: rubocop
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
45
  - - ">="
60
46
  - !ruby/object:Gem::Version
61
- version: '0'
47
+ version: 0.93.0
62
48
  type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
52
  - - ">="
67
53
  - !ruby/object:Gem::Version
68
- version: '0'
54
+ version: 0.93.0
69
55
  - !ruby/object:Gem::Dependency
70
- name: unicode
56
+ name: simplecov
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
59
  - - ">="
@@ -114,14 +100,8 @@ files:
114
100
  - lib/babosa/transliterator/turkish.rb
115
101
  - lib/babosa/transliterator/ukrainian.rb
116
102
  - lib/babosa/transliterator/vietnamese.rb
117
- - lib/babosa/utf8/active_support_proxy.rb
118
- - lib/babosa/utf8/dumb_proxy.rb
119
- - lib/babosa/utf8/java_proxy.rb
120
- - lib/babosa/utf8/mappings.rb
121
- - lib/babosa/utf8/proxy.rb
122
- - lib/babosa/utf8/unicode_proxy.rb
123
103
  - lib/babosa/version.rb
124
- - spec/babosa_spec.rb
104
+ - spec/identifier_spec.rb
125
105
  - spec/spec_helper.rb
126
106
  - spec/transliterators/base_spec.rb
127
107
  - spec/transliterators/bulgarian_spec.rb
@@ -141,11 +121,10 @@ files:
141
121
  - spec/transliterators/turkish_spec.rb
142
122
  - spec/transliterators/ukrainian_spec.rb
143
123
  - spec/transliterators/vietnamese_spec.rb
144
- - spec/utf8_proxy_spec.rb
145
124
  homepage: http://github.com/norman/babosa
146
125
  licenses: []
147
126
  metadata: {}
148
- post_install_message:
127
+ post_install_message:
149
128
  rdoc_options: []
150
129
  require_paths:
151
130
  - lib
@@ -153,15 +132,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
153
132
  requirements:
154
133
  - - ">="
155
134
  - !ruby/object:Gem::Version
156
- version: 2.0.0
135
+ version: 2.5.0
157
136
  required_rubygems_version: !ruby/object:Gem::Requirement
158
137
  requirements:
159
- - - ">="
138
+ - - ">"
160
139
  - !ruby/object:Gem::Version
161
- version: '0'
140
+ version: 1.3.1
162
141
  requirements: []
163
- rubygems_version: 3.1.2
164
- signing_key:
142
+ rubygems_version: 3.1.4
143
+ signing_key:
165
144
  specification_version: 4
166
145
  summary: A library for creating slugs.
167
146
  test_files: []
@@ -1,38 +0,0 @@
1
- require 'active_support'
2
- require 'active_support/multibyte/unicode'
3
-
4
- module Babosa
5
- module UTF8
6
- # A UTF-8 proxy using Active Support's multibyte support.
7
- module ActiveSupportProxy
8
- extend ActiveSupport::Multibyte::Unicode
9
- extend self
10
-
11
- def self.normalize_utf8(string)
12
- normalize(string, :c)
13
- end
14
-
15
- if ActiveSupport::VERSION::MAJOR == 3
16
- def downcase(string)
17
- ActiveSupport::Multibyte::Chars.new(string).downcase.to_s
18
- end
19
-
20
- def upcase(string)
21
- ActiveSupport::Multibyte::Chars.new(string).upcase.to_s
22
- end
23
- elsif ActiveSupport::VERSION::MAJOR >= 6
24
- def self.normalize_utf8(string)
25
- string.unicode_normalize(:nfc).to_s
26
- end
27
-
28
- def downcase(string)
29
- string.downcase.to_s
30
- end
31
-
32
- def upcase(string)
33
- string.upcase.to_s
34
- end
35
- end
36
- end
37
- end
38
- end
@@ -1,49 +0,0 @@
1
- require File.expand_path("../mappings", __FILE__)
2
-
3
- module Babosa
4
- module UTF8
5
-
6
- # This module provides fallback UTF-8 support when nothing else is
7
- # available. It does case folding for Roman alphabet-based characters
8
- # commonly used by Western European languages and little else, making it
9
- # useless for Russian, Bulgarian, Greek, etc. If at all possible, Unicode
10
- # or ActiveSupport should be used instead because they support the full
11
- # UTF-8 character range.
12
- module DumbProxy
13
- extend Proxy
14
- extend self
15
-
16
- def downcase(string)
17
- string.downcase.unpack("U*").map {|char| Mappings::DOWNCASE[char] or char}.flatten.pack("U*")
18
- end
19
-
20
- def upcase(string)
21
- string.upcase.unpack("U*").map {|char| Mappings::UPCASE[char] or char}.flatten.pack("U*")
22
- end
23
-
24
- if ''.respond_to?(:unicode_normalize)
25
- def normalize_utf8(string)
26
- string.unicode_normalize
27
- end
28
- else
29
- # On Ruby 2.2, this uses the native Unicode normalize method. On all
30
- # other Rubies, it does a very naive Unicode normalization, which should
31
- # work for this library's purposes (i.e., Roman-based codepoints, up to
32
- # U+017E). Do not use reuse this as a general solution! Use a real
33
- # library like Unicode or ActiveSupport instead.
34
- def normalize_utf8(string)
35
- codepoints = string.unpack("U*")
36
- new = []
37
- until codepoints.empty? do
38
- if Mappings::COMPOSITION[codepoints[0..1]]
39
- new << Mappings::COMPOSITION[codepoints.slice!(0,2)]
40
- else
41
- new << codepoints.shift
42
- end
43
- end
44
- new.compact.flatten.pack("U*")
45
- end
46
- end
47
- end
48
- end
49
- end