babosa 1.0.4 → 2.0.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/Changelog.md +12 -0
  3. data/README.md +80 -117
  4. data/Rakefile +9 -8
  5. data/lib/babosa.rb +2 -4
  6. data/lib/babosa/identifier.rb +82 -121
  7. data/lib/babosa/transliterator/base.rb +57 -56
  8. data/lib/babosa/transliterator/bulgarian.rb +3 -2
  9. data/lib/babosa/transliterator/cyrillic.rb +5 -5
  10. data/lib/babosa/transliterator/danish.rb +3 -3
  11. data/lib/babosa/transliterator/german.rb +3 -2
  12. data/lib/babosa/transliterator/greek.rb +4 -3
  13. data/lib/babosa/transliterator/hindi.rb +3 -2
  14. data/lib/babosa/transliterator/latin.rb +5 -5
  15. data/lib/babosa/transliterator/macedonian.rb +3 -2
  16. data/lib/babosa/transliterator/norwegian.rb +3 -3
  17. data/lib/babosa/transliterator/romanian.rb +3 -2
  18. data/lib/babosa/transliterator/russian.rb +3 -2
  19. data/lib/babosa/transliterator/serbian.rb +29 -27
  20. data/lib/babosa/transliterator/spanish.rb +2 -2
  21. data/lib/babosa/transliterator/swedish.rb +3 -3
  22. data/lib/babosa/transliterator/turkish.rb +8 -8
  23. data/lib/babosa/transliterator/ukrainian.rb +5 -4
  24. data/lib/babosa/transliterator/vietnamese.rb +4 -3
  25. data/lib/babosa/version.rb +3 -1
  26. data/spec/{babosa_spec.rb → identifier_spec.rb} +9 -10
  27. data/spec/spec_helper.rb +6 -6
  28. data/spec/transliterators/base_spec.rb +5 -6
  29. data/spec/transliterators/bulgarian_spec.rb +4 -5
  30. data/spec/transliterators/danish_spec.rb +5 -6
  31. data/spec/transliterators/german_spec.rb +4 -5
  32. data/spec/transliterators/greek_spec.rb +7 -7
  33. data/spec/transliterators/hindi_spec.rb +7 -7
  34. data/spec/transliterators/latin_spec.rb +3 -4
  35. data/spec/transliterators/macedonian_spec.rb +3 -4
  36. data/spec/transliterators/norwegian_spec.rb +4 -4
  37. data/spec/transliterators/polish_spec.rb +3 -5
  38. data/spec/transliterators/romanian_spec.rb +5 -6
  39. data/spec/transliterators/russian_spec.rb +3 -4
  40. data/spec/transliterators/serbian_spec.rb +6 -7
  41. data/spec/transliterators/spanish_spec.rb +4 -5
  42. data/spec/transliterators/swedish_spec.rb +7 -7
  43. data/spec/transliterators/turkish_spec.rb +24 -24
  44. data/spec/transliterators/ukrainian_spec.rb +74 -75
  45. data/spec/transliterators/vietnamese_spec.rb +10 -10
  46. metadata +17 -38
  47. data/lib/babosa/utf8/active_support_proxy.rb +0 -38
  48. data/lib/babosa/utf8/dumb_proxy.rb +0 -49
  49. data/lib/babosa/utf8/java_proxy.rb +0 -22
  50. data/lib/babosa/utf8/mappings.rb +0 -193
  51. data/lib/babosa/utf8/proxy.rb +0 -125
  52. data/lib/babosa/utf8/unicode_proxy.rb +0 -23
  53. data/spec/utf8_proxy_spec.rb +0 -52
@@ -1,24 +1,24 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
3
-
4
- describe Babosa::Transliterator::Turkish do
5
-
6
- let(:t) { described_class.instance }
7
- it_behaves_like "a latin transliterator"
8
-
9
- it "should transliterate various characters" do
10
- examples = {
11
- "Nâzım" => "Nazim",
12
- "sükûnet" => "sukunet",
13
- "millîleştirmek" => "millilestirmek",
14
- "mêmur" => "memur",
15
- "lôkman" => "lokman",
16
- "yoğurt" => "yogurt",
17
- "şair" => "sair",
18
- "İzmir" => "Izmir",
19
- "yığın" => "yigin",
20
- "çarşı" => "carsi"
21
- }
22
- examples.each {|k, v| expect(t.transliterate(k)).to eql(v)}
23
- end
24
- end
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ describe Babosa::Transliterator::Turkish do
6
+ let(:t) { described_class.instance }
7
+ it_behaves_like "a latin transliterator"
8
+
9
+ it "should transliterate various characters" do
10
+ examples = {
11
+ "Nâzım" => "Nazim",
12
+ "sükûnet" => "sukunet",
13
+ "millîleştirmek" => "millilestirmek",
14
+ "mêmur" => "memur",
15
+ "lôkman" => "lokman",
16
+ "yoğurt" => "yogurt",
17
+ "şair" => "sair",
18
+ "İzmir" => "Izmir",
19
+ "yığın" => "yigin",
20
+ "çarşı" => "carsi"
21
+ }
22
+ examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
23
+ end
24
+ end
@@ -1,88 +1,87 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
1
+ # frozen_string_literal: true
3
2
 
4
- describe Babosa::Transliterator::Ukrainian do
3
+ require "spec_helper"
5
4
 
5
+ describe Babosa::Transliterator::Ukrainian do
6
6
  let(:t) { described_class.instance }
7
7
  it_behaves_like "a cyrillic transliterator"
8
8
 
9
9
  it "should transliterate Cyrillic characters" do
10
10
  examples = {
11
- "Алушта" => "Alushta",
12
- "Андрій" => "Andrii",
13
- "Борщагівка" => "Borshchahivka",
14
- "Борисенко" => "Borysenko",
15
- "Вінниця" => "Vinnytsia",
16
- "Володимир" => "Volodymyr",
17
- "Гадяч" => "Hadiach",
18
- "Богдан" => "Bohdan",
19
- "Ґалаґан" => "Galagan",
20
- "Ґорґани" => "Gorgany",
21
- "Донецьк" => "Donetsk",
22
- "Дмитро" => "Dmytro",
23
- "Рівне" => "Rivne",
24
- "Олег" => "Oleh",
25
- "Есмань" => "Esman",
26
- "Єнакієве" => "Yenakiieve",
27
- "Гаєвич" => "Haievych",
28
- "Короп'є" => "Koropie",
29
- "Житомир" => "Zhytomyr",
30
- "Жанна" => "Zhanna",
31
- "Жежелів" => "Zhezheliv",
32
- "Закарпаття" => "Zakarpattia",
33
- "Казимирчук" => "Kazymyrchuk",
34
- "Медвин" => "Medvyn",
35
- "Михайленко" => "Mykhailenko",
36
- "Іванків" => "Ivankiv",
37
- "Іващенко" => "Ivashchenko",
38
- "Їжакевич" => "Yizhakevych",
39
- "Кадиївка" => "Kadyivka",
40
- "Мар'їне" => "Marine",
41
- "Йосипівка" => "Yosypivka",
42
- "Стрий" => "Stryi",
43
- "Олексій" => "Oleksii",
44
- "Київ" => "Kyiv",
45
- "Коваленко" => "Kovalenko",
46
- "Лебедин" => "Lebedyn",
47
- "Леонід" => "Leonid",
48
- "Миколаїв" => "Mykolaiv",
49
- "Маринич" => "Marynych",
50
- "Ніжин" => "Nizhyn",
51
- "Наталія" => "Nataliia",
52
- "Одеса" => "Odesa",
53
- "Онищенко" => "Onyshchenko",
54
- "Полтава" => "Poltava",
55
- "Петро" => "Petro",
11
+ "Алушта" => "Alushta",
12
+ "Андрій" => "Andrii",
13
+ "Борщагівка" => "Borshchahivka",
14
+ "Борисенко" => "Borysenko",
15
+ "Вінниця" => "Vinnytsia",
16
+ "Володимир" => "Volodymyr",
17
+ "Гадяч" => "Hadiach",
18
+ "Богдан" => "Bohdan",
19
+ "Ґалаґан" => "Galagan",
20
+ "Ґорґани" => "Gorgany",
21
+ "Донецьк" => "Donetsk",
22
+ "Дмитро" => "Dmytro",
23
+ "Рівне" => "Rivne",
24
+ "Олег" => "Oleh",
25
+ "Есмань" => "Esman",
26
+ "Єнакієве" => "Yenakiieve",
27
+ "Гаєвич" => "Haievych",
28
+ "Короп'є" => "Koropie",
29
+ "Житомир" => "Zhytomyr",
30
+ "Жанна" => "Zhanna",
31
+ "Жежелів" => "Zhezheliv",
32
+ "Закарпаття" => "Zakarpattia",
33
+ "Казимирчук" => "Kazymyrchuk",
34
+ "Медвин" => "Medvyn",
35
+ "Михайленко" => "Mykhailenko",
36
+ "Іванків" => "Ivankiv",
37
+ "Іващенко" => "Ivashchenko",
38
+ "Їжакевич" => "Yizhakevych",
39
+ "Кадиївка" => "Kadyivka",
40
+ "Мар'їне" => "Marine",
41
+ "Йосипівка" => "Yosypivka",
42
+ "Стрий" => "Stryi",
43
+ "Олексій" => "Oleksii",
44
+ "Київ" => "Kyiv",
45
+ "Коваленко" => "Kovalenko",
46
+ "Лебедин" => "Lebedyn",
47
+ "Леонід" => "Leonid",
48
+ "Миколаїв" => "Mykolaiv",
49
+ "Маринич" => "Marynych",
50
+ "Ніжин" => "Nizhyn",
51
+ "Наталія" => "Nataliia",
52
+ "Одеса" => "Odesa",
53
+ "Онищенко" => "Onyshchenko",
54
+ "Полтава" => "Poltava",
55
+ "Петро" => "Petro",
56
56
  "Решетилівка" => "Reshetylivka",
57
57
  "Рибчинський" => "Rybchynskyi",
58
- "Суми" => "Sumy",
59
- "Соломія" => "Solomiia",
60
- "Тернопіль" => "Ternopil",
61
- "Троць" => "Trots",
62
- "Ужгород" => "Uzhhorod",
63
- "Уляна" => "Uliana",
64
- "Фастів" => "Fastiv",
65
- "Філіпчук" => "Filipchuk",
66
- "Харків" => "Kharkiv",
67
- "Христина" => "Khrystyna",
58
+ "Суми" => "Sumy",
59
+ "Соломія" => "Solomiia",
60
+ "Тернопіль" => "Ternopil",
61
+ "Троць" => "Trots",
62
+ "Ужгород" => "Uzhhorod",
63
+ "Уляна" => "Uliana",
64
+ "Фастів" => "Fastiv",
65
+ "Філіпчук" => "Filipchuk",
66
+ "Харків" => "Kharkiv",
67
+ "Христина" => "Khrystyna",
68
68
  "Біла Церква" => "Bila Tserkva",
69
- "Стеценко" => "Stetsenko",
70
- "Чернівці" => "Chernivtsi",
71
- "Шевченко" => "Shevchenko",
72
- "Шостка" => "Shostka",
73
- "Кишеньки" => "Kyshenky",
74
- "Щербухи" => "Shcherbukhy",
75
- "Гоща" => "Hoshcha",
76
- "Гаращенко" => "Harashchenko",
77
- "Юрій" => "Yurii",
78
- "Корюківка" => "Koriukivka",
79
- "Яготин" => "Yahotyn",
80
- "Ярошенко" => "Yaroshenko",
81
- "Костянтин" => "Kostiantyn",
82
- "Знам'янка" => "Znamianka",
83
- "Феодосія" => "Feodosiia"
69
+ "Стеценко" => "Stetsenko",
70
+ "Чернівці" => "Chernivtsi",
71
+ "Шевченко" => "Shevchenko",
72
+ "Шостка" => "Shostka",
73
+ "Кишеньки" => "Kyshenky",
74
+ "Щербухи" => "Shcherbukhy",
75
+ "Гоща" => "Hoshcha",
76
+ "Гаращенко" => "Harashchenko",
77
+ "Юрій" => "Yurii",
78
+ "Корюківка" => "Koriukivka",
79
+ "Яготин" => "Yahotyn",
80
+ "Ярошенко" => "Yaroshenko",
81
+ "Костянтин" => "Kostiantyn",
82
+ "Знам'янка" => "Znamianka",
83
+ "Феодосія" => "Feodosiia"
84
84
  }
85
85
  examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
86
86
  end
87
-
88
- end
87
+ end
@@ -1,18 +1,18 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
1
+ # frozen_string_literal: true
3
2
 
4
- describe Babosa::Transliterator::Vietnamese do
3
+ require "spec_helper"
5
4
 
5
+ describe Babosa::Transliterator::Vietnamese do
6
6
  let(:t) { described_class.instance }
7
7
  it_behaves_like "a latin transliterator"
8
8
 
9
9
  it "should transliterate various characters" do
10
- examples = {
11
- "làm" => "lam",
12
- "đàn ông" => "dan ong",
13
- "thật" => "that",
14
- "khổ" => "kho"
15
- }
16
- examples.each {|k, v| expect(t.transliterate(k)).to eql(v)}
10
+ examples = {
11
+ "làm" => "lam",
12
+ "đàn ông" => "dan ong",
13
+ "thật" => "that",
14
+ "khổ" => "kho"
15
+ }
16
+ examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
17
17
  end
18
18
  end
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: babosa
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 2.0.0.beta
5
5
  platform: ruby
6
6
  authors:
7
7
  - Norman Clarke
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-06 00:00:00.000000000 Z
11
+ date: 2020-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: activesupport
14
+ name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 3.2.0
19
+ version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 3.2.0
26
+ version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -39,35 +39,21 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: 3.7.0
41
41
  - !ruby/object:Gem::Dependency
42
- name: simplecov
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: rake
42
+ name: rubocop
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
45
  - - ">="
60
46
  - !ruby/object:Gem::Version
61
- version: '0'
47
+ version: 0.93.0
62
48
  type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
52
  - - ">="
67
53
  - !ruby/object:Gem::Version
68
- version: '0'
54
+ version: 0.93.0
69
55
  - !ruby/object:Gem::Dependency
70
- name: unicode
56
+ name: simplecov
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
59
  - - ">="
@@ -114,14 +100,8 @@ files:
114
100
  - lib/babosa/transliterator/turkish.rb
115
101
  - lib/babosa/transliterator/ukrainian.rb
116
102
  - lib/babosa/transliterator/vietnamese.rb
117
- - lib/babosa/utf8/active_support_proxy.rb
118
- - lib/babosa/utf8/dumb_proxy.rb
119
- - lib/babosa/utf8/java_proxy.rb
120
- - lib/babosa/utf8/mappings.rb
121
- - lib/babosa/utf8/proxy.rb
122
- - lib/babosa/utf8/unicode_proxy.rb
123
103
  - lib/babosa/version.rb
124
- - spec/babosa_spec.rb
104
+ - spec/identifier_spec.rb
125
105
  - spec/spec_helper.rb
126
106
  - spec/transliterators/base_spec.rb
127
107
  - spec/transliterators/bulgarian_spec.rb
@@ -141,11 +121,10 @@ files:
141
121
  - spec/transliterators/turkish_spec.rb
142
122
  - spec/transliterators/ukrainian_spec.rb
143
123
  - spec/transliterators/vietnamese_spec.rb
144
- - spec/utf8_proxy_spec.rb
145
124
  homepage: http://github.com/norman/babosa
146
125
  licenses: []
147
126
  metadata: {}
148
- post_install_message:
127
+ post_install_message:
149
128
  rdoc_options: []
150
129
  require_paths:
151
130
  - lib
@@ -153,15 +132,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
153
132
  requirements:
154
133
  - - ">="
155
134
  - !ruby/object:Gem::Version
156
- version: 2.0.0
135
+ version: 2.5.0
157
136
  required_rubygems_version: !ruby/object:Gem::Requirement
158
137
  requirements:
159
- - - ">="
138
+ - - ">"
160
139
  - !ruby/object:Gem::Version
161
- version: '0'
140
+ version: 1.3.1
162
141
  requirements: []
163
- rubygems_version: 3.1.2
164
- signing_key:
142
+ rubygems_version: 3.1.4
143
+ signing_key:
165
144
  specification_version: 4
166
145
  summary: A library for creating slugs.
167
146
  test_files: []
@@ -1,38 +0,0 @@
1
- require 'active_support'
2
- require 'active_support/multibyte/unicode'
3
-
4
- module Babosa
5
- module UTF8
6
- # A UTF-8 proxy using Active Support's multibyte support.
7
- module ActiveSupportProxy
8
- extend ActiveSupport::Multibyte::Unicode
9
- extend self
10
-
11
- def self.normalize_utf8(string)
12
- normalize(string, :c)
13
- end
14
-
15
- if ActiveSupport::VERSION::MAJOR == 3
16
- def downcase(string)
17
- ActiveSupport::Multibyte::Chars.new(string).downcase.to_s
18
- end
19
-
20
- def upcase(string)
21
- ActiveSupport::Multibyte::Chars.new(string).upcase.to_s
22
- end
23
- elsif ActiveSupport::VERSION::MAJOR >= 6
24
- def self.normalize_utf8(string)
25
- string.unicode_normalize(:nfc).to_s
26
- end
27
-
28
- def downcase(string)
29
- string.downcase.to_s
30
- end
31
-
32
- def upcase(string)
33
- string.upcase.to_s
34
- end
35
- end
36
- end
37
- end
38
- end
@@ -1,49 +0,0 @@
1
- require File.expand_path("../mappings", __FILE__)
2
-
3
- module Babosa
4
- module UTF8
5
-
6
- # This module provides fallback UTF-8 support when nothing else is
7
- # available. It does case folding for Roman alphabet-based characters
8
- # commonly used by Western European languages and little else, making it
9
- # useless for Russian, Bulgarian, Greek, etc. If at all possible, Unicode
10
- # or ActiveSupport should be used instead because they support the full
11
- # UTF-8 character range.
12
- module DumbProxy
13
- extend Proxy
14
- extend self
15
-
16
- def downcase(string)
17
- string.downcase.unpack("U*").map {|char| Mappings::DOWNCASE[char] or char}.flatten.pack("U*")
18
- end
19
-
20
- def upcase(string)
21
- string.upcase.unpack("U*").map {|char| Mappings::UPCASE[char] or char}.flatten.pack("U*")
22
- end
23
-
24
- if ''.respond_to?(:unicode_normalize)
25
- def normalize_utf8(string)
26
- string.unicode_normalize
27
- end
28
- else
29
- # On Ruby 2.2, this uses the native Unicode normalize method. On all
30
- # other Rubies, it does a very naive Unicode normalization, which should
31
- # work for this library's purposes (i.e., Roman-based codepoints, up to
32
- # U+017E). Do not use reuse this as a general solution! Use a real
33
- # library like Unicode or ActiveSupport instead.
34
- def normalize_utf8(string)
35
- codepoints = string.unpack("U*")
36
- new = []
37
- until codepoints.empty? do
38
- if Mappings::COMPOSITION[codepoints[0..1]]
39
- new << Mappings::COMPOSITION[codepoints.slice!(0,2)]
40
- else
41
- new << codepoints.shift
42
- end
43
- end
44
- new.compact.flatten.pack("U*")
45
- end
46
- end
47
- end
48
- end
49
- end