babosa 1.0.4 → 2.0.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/Changelog.md +12 -0
  3. data/README.md +80 -117
  4. data/Rakefile +9 -8
  5. data/lib/babosa.rb +2 -4
  6. data/lib/babosa/identifier.rb +82 -121
  7. data/lib/babosa/transliterator/base.rb +57 -56
  8. data/lib/babosa/transliterator/bulgarian.rb +3 -2
  9. data/lib/babosa/transliterator/cyrillic.rb +5 -5
  10. data/lib/babosa/transliterator/danish.rb +3 -3
  11. data/lib/babosa/transliterator/german.rb +3 -2
  12. data/lib/babosa/transliterator/greek.rb +4 -3
  13. data/lib/babosa/transliterator/hindi.rb +3 -2
  14. data/lib/babosa/transliterator/latin.rb +5 -5
  15. data/lib/babosa/transliterator/macedonian.rb +3 -2
  16. data/lib/babosa/transliterator/norwegian.rb +3 -3
  17. data/lib/babosa/transliterator/romanian.rb +3 -2
  18. data/lib/babosa/transliterator/russian.rb +3 -2
  19. data/lib/babosa/transliterator/serbian.rb +29 -27
  20. data/lib/babosa/transliterator/spanish.rb +2 -2
  21. data/lib/babosa/transliterator/swedish.rb +3 -3
  22. data/lib/babosa/transliterator/turkish.rb +8 -8
  23. data/lib/babosa/transliterator/ukrainian.rb +5 -4
  24. data/lib/babosa/transliterator/vietnamese.rb +4 -3
  25. data/lib/babosa/version.rb +3 -1
  26. data/spec/{babosa_spec.rb → identifier_spec.rb} +9 -10
  27. data/spec/spec_helper.rb +6 -6
  28. data/spec/transliterators/base_spec.rb +5 -6
  29. data/spec/transliterators/bulgarian_spec.rb +4 -5
  30. data/spec/transliterators/danish_spec.rb +5 -6
  31. data/spec/transliterators/german_spec.rb +4 -5
  32. data/spec/transliterators/greek_spec.rb +7 -7
  33. data/spec/transliterators/hindi_spec.rb +7 -7
  34. data/spec/transliterators/latin_spec.rb +3 -4
  35. data/spec/transliterators/macedonian_spec.rb +3 -4
  36. data/spec/transliterators/norwegian_spec.rb +4 -4
  37. data/spec/transliterators/polish_spec.rb +3 -5
  38. data/spec/transliterators/romanian_spec.rb +5 -6
  39. data/spec/transliterators/russian_spec.rb +3 -4
  40. data/spec/transliterators/serbian_spec.rb +6 -7
  41. data/spec/transliterators/spanish_spec.rb +4 -5
  42. data/spec/transliterators/swedish_spec.rb +7 -7
  43. data/spec/transliterators/turkish_spec.rb +24 -24
  44. data/spec/transliterators/ukrainian_spec.rb +74 -75
  45. data/spec/transliterators/vietnamese_spec.rb +10 -10
  46. metadata +17 -38
  47. data/lib/babosa/utf8/active_support_proxy.rb +0 -38
  48. data/lib/babosa/utf8/dumb_proxy.rb +0 -49
  49. data/lib/babosa/utf8/java_proxy.rb +0 -22
  50. data/lib/babosa/utf8/mappings.rb +0 -193
  51. data/lib/babosa/utf8/proxy.rb +0 -125
  52. data/lib/babosa/utf8/unicode_proxy.rb +0 -23
  53. data/spec/utf8_proxy_spec.rb +0 -52
@@ -1,31 +1,11 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
- require 'singleton'
3
+ require "singleton"
4
4
 
5
5
  module Babosa
6
-
7
6
  module Transliterator
8
-
9
- autoload :Bulgarian, "babosa/transliterator/bulgarian"
10
- autoload :Cyrillic, "babosa/transliterator/cyrillic"
11
- autoload :Danish, "babosa/transliterator/danish"
12
- autoload :German, "babosa/transliterator/german"
13
- autoload :Hindi, "babosa/transliterator/hindi"
14
- autoload :Latin, "babosa/transliterator/latin"
15
- autoload :Macedonian, "babosa/transliterator/macedonian"
16
- autoload :Norwegian, "babosa/transliterator/norwegian"
17
- autoload :Romanian, "babosa/transliterator/romanian"
18
- autoload :Russian, "babosa/transliterator/russian"
19
- autoload :Serbian, "babosa/transliterator/serbian"
20
- autoload :Spanish, "babosa/transliterator/spanish"
21
- autoload :Swedish, "babosa/transliterator/swedish"
22
- autoload :Ukrainian, "babosa/transliterator/ukrainian"
23
- autoload :Greek, "babosa/transliterator/greek"
24
- autoload :Vietnamese, "babosa/transliterator/vietnamese"
25
- autoload :Turkish, "babosa/transliterator/turkish"
26
-
27
7
  def self.get(symbol)
28
- class_name = symbol.to_s.split("_").map {|a| a.gsub(/\b('?[a-z])/) { $1.upcase }}.join
8
+ class_name = symbol.to_s.split("_").map { |a| a.gsub(/\b('?[a-z])/) { Regexp.last_match(1).upcase } }.join
29
9
  const_get(class_name)
30
10
  end
31
11
 
@@ -47,36 +27,39 @@ module Babosa
47
27
  "”" => '"',
48
28
  "„" => '"',
49
29
  "‟" => '"',
50
- '' => "'",
51
- '' => ",",
52
- '' => ".",
53
- '' => "!",
54
- '' => '?',
55
- '' => ',',
56
- '' => '(',
57
- '' => ')',
58
- '' => '[',
59
- '' => ']',
60
- '' => ';',
61
- '' => ':',
62
- '' => '<',
63
- '' => '>',
64
- # various kinds of space characters
65
- "\xc2\xa0" => " ",
66
- "\xe2\x80\x80" => " ",
67
- "\xe2\x80\x81" => " ",
68
- "\xe2\x80\x82" => " ",
69
- "\xe2\x80\x83" => " ",
70
- "\xe2\x80\x84" => " ",
71
- "\xe2\x80\x85" => " ",
72
- "\xe2\x80\x86" => " ",
73
- "\xe2\x80\x87" => " ",
74
- "\xe2\x80\x88" => " ",
75
- "\xe2\x80\x89" => " ",
76
- "\xe2\x80\x8a" => " ",
77
- "\xe2\x81\x9f" => " ",
78
- "\xe3\x80\x80" => " ",
79
- }.freeze
30
+ "" => "'",
31
+ "" => ",",
32
+ "" => ".",
33
+ "" => "!",
34
+ "" => "?",
35
+ "" => ",",
36
+ "" => "(",
37
+ "" => ")",
38
+ "" => "[",
39
+ "" => "]",
40
+ "" => ";",
41
+ "" => ":",
42
+ "" => "<",
43
+ "" => ">"
44
+ }.merge(
45
+ {
46
+ # various kinds of space characters
47
+ "\xc2\xa0" => " ",
48
+ "\xe2\x80\x80" => " ",
49
+ "\xe2\x80\x81" => " ",
50
+ "\xe2\x80\x82" => " ",
51
+ "\xe2\x80\x83" => " ",
52
+ "\xe2\x80\x84" => " ",
53
+ "\xe2\x80\x85" => " ",
54
+ "\xe2\x80\x86" => " ",
55
+ "\xe2\x80\x87" => " ",
56
+ "\xe2\x80\x88" => " ",
57
+ "\xe2\x80\x89" => " ",
58
+ "\xe2\x80\x8a" => " ",
59
+ "\xe2\x81\x9f" => " ",
60
+ "\xe3\x80\x80" => " "
61
+ }
62
+ ).freeze
80
63
 
81
64
  attr_reader :approximations
82
65
 
@@ -87,8 +70,8 @@ module Babosa
87
70
  @approximations = {}
88
71
  end
89
72
  self.class.const_get(:APPROXIMATIONS).inject(@approximations) do |memo, object|
90
- index = object[0].unpack("U").shift
91
- value = object[1].unpack("C*")
73
+ index = object[0].codepoints.shift
74
+ value = object[1].codepoints
92
75
  memo[index] = value.length == 1 ? value[0] : value
93
76
  memo
94
77
  end
@@ -103,8 +86,26 @@ module Babosa
103
86
 
104
87
  # Transliterates a string.
105
88
  def transliterate(string)
106
- string.unpack("U*").map {|char| self[char] || char}.flatten.pack("U*")
89
+ string.codepoints.map { |char| self[char] || char }.flatten.pack("U*")
107
90
  end
108
91
  end
109
92
  end
110
93
  end
94
+
95
+ require "babosa/transliterator/cyrillic"
96
+ require "babosa/transliterator/latin"
97
+ require "babosa/transliterator/bulgarian"
98
+ require "babosa/transliterator/danish"
99
+ require "babosa/transliterator/german"
100
+ require "babosa/transliterator/hindi"
101
+ require "babosa/transliterator/macedonian"
102
+ require "babosa/transliterator/norwegian"
103
+ require "babosa/transliterator/romanian"
104
+ require "babosa/transliterator/russian"
105
+ require "babosa/transliterator/serbian"
106
+ require "babosa/transliterator/spanish"
107
+ require "babosa/transliterator/swedish"
108
+ require "babosa/transliterator/ukrainian"
109
+ require "babosa/transliterator/greek"
110
+ require "babosa/transliterator/vietnamese"
111
+ require "babosa/transliterator/turkish"
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Bulgarian < Cyrillic
@@ -21,7 +22,7 @@ module Babosa
21
22
  "ь" => "i",
22
23
  "ю" => "iu",
23
24
  "я" => "ia"
24
- }
25
+ }.freeze
25
26
  end
26
27
  end
27
28
  end
@@ -1,7 +1,7 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
-
5
5
  # Approximations are based on GOST 7.79, System B:
6
6
  # http://en.wikipedia.org/wiki/ISO_9#GOST_7.79
7
7
  class Cyrillic < Base
@@ -97,11 +97,11 @@ module Babosa
97
97
  "Ѵ" => "Yh",
98
98
  "ѵ" => "yh",
99
99
  "Ґ" => "G",
100
- "ґ" => "g",
101
- }
100
+ "ґ" => "g"
101
+ }.freeze
102
102
 
103
103
  def transliterate(string)
104
- super.gsub(/(c)z([ieyj])/) { "#{$1}#{$2}" }
104
+ super.gsub(/(c)z([ieyj])/) { "#{Regexp.last_match(1)}#{Regexp.last_match(2)}" }
105
105
  end
106
106
  end
107
107
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Danish < Latin
@@ -8,8 +9,7 @@ module Babosa
8
9
  "å" => "aa",
9
10
  "Ø" => "Oe",
10
11
  "Å" => "Aa"
11
- }
12
+ }.freeze
12
13
  end
13
14
  end
14
15
  end
15
-
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class German < Latin
@@ -9,7 +10,7 @@ module Babosa
9
10
  "Ä" => "Ae",
10
11
  "Ö" => "Oe",
11
12
  "Ü" => "Ue"
12
- }
13
+ }.freeze
13
14
  end
14
15
  end
15
16
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Greek < Base
@@ -71,7 +72,7 @@ module Babosa
71
72
  "Ώ" => "O",
72
73
  "ω" => "o",
73
74
  "ώ" => "o"
74
- }
75
+ }.freeze
75
76
  end
76
77
  end
77
- end
78
+ end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Hindi < Base
@@ -131,7 +132,7 @@ module Babosa
131
132
  "ॽ" => "?",
132
133
  "ॾ" => "ddd",
133
134
  "ॿ" => "bb"
134
- }
135
+ }.freeze
135
136
  end
136
137
  end
137
138
  end
@@ -1,8 +1,8 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Latin < Base
5
-
6
6
  APPROXIMATIONS = {
7
7
  "À" => "A",
8
8
  "Á" => "A",
@@ -35,14 +35,14 @@ module Babosa
35
35
  "Ý" => "Y",
36
36
  "Þ" => "Th",
37
37
  "ß" => "ss",
38
- "à" => "a" ,
38
+ "à" => "a",
39
39
  "á" => "a",
40
40
  "â" => "a",
41
41
  "ã" => "a",
42
42
  "ä" => "a",
43
43
  "å" => "a",
44
44
  "æ" => "ae",
45
- "ç" => "c" ,
45
+ "ç" => "c",
46
46
  "è" => "e",
47
47
  "é" => "e",
48
48
  "ê" => "e",
@@ -193,7 +193,7 @@ module Babosa
193
193
  "ž" => "z",
194
194
  "ź" => "z",
195
195
  "ż" => "z"
196
- }
196
+ }.freeze
197
197
  end
198
198
  end
199
199
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Macedonian < Cyrillic
@@ -23,7 +24,7 @@ module Babosa
23
24
  "ѕ" => "z",
24
25
  "ј" => "j",
25
26
  "х" => "h"
26
- }
27
+ }.freeze
27
28
  end
28
29
  end
29
30
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Norwegian < Latin
@@ -7,8 +8,7 @@ module Babosa
7
8
  "å" => "aa",
8
9
  "Ø" => "Oe",
9
10
  "Å" => "Aa"
10
- }
11
+ }.freeze
11
12
  end
12
13
  end
13
14
  end
14
-
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Romanian < Latin
@@ -7,7 +8,7 @@ module Babosa
7
8
  "ț" => "t",
8
9
  "Ș" => "S",
9
10
  "Ț" => "T"
10
- }
11
+ }.freeze
11
12
  end
12
13
  end
13
14
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Russian < Cyrillic
@@ -16,7 +17,7 @@ module Babosa
16
17
  "ц" => "ts",
17
18
  "щ" => "sch",
18
19
  "ю" => "u"
19
- }
20
+ }.freeze
20
21
  end
21
22
  end
22
23
  end
@@ -1,34 +1,36 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Babosa
4
4
  module Transliterator
5
5
  class Serbian < Latin
6
- APPROXIMATIONS = Cyrillic.const_get(:APPROXIMATIONS).merge({
7
- "Ð" => "Dj",
8
- "Č" => "Ch",
9
- "Š" => "Sh",
10
- "č" => "ch",
11
- "đ" => "dj",
12
- "š" => "sh",
13
- "Ћ" => "C",
14
- "Ц" => "C",
15
- "Ч" => "Ch",
16
- "Ђ" => "Dj",
17
- "Џ" => "Dz",
18
- "Х" => "H",
19
- "Ј" => "J",
20
- "Љ" => "Lj",
21
- "Њ" => "Nj",
22
- "ц" => "c",
23
- "ћ" => "c",
24
- "ч" => "ch",
25
- "ђ" => "dj",
26
- "џ" => "dz",
27
- "х" => "h",
28
- "ј" => "j",
29
- "љ" => "lj",
30
- "њ" => "nj"
31
- })
6
+ APPROXIMATIONS = Cyrillic.const_get(:APPROXIMATIONS).merge(
7
+ {
8
+ "Ð" => "Dj",
9
+ "Č" => "Ch",
10
+ "Š" => "Sh",
11
+ "č" => "ch",
12
+ "đ" => "dj",
13
+ "š" => "sh",
14
+ "Ћ" => "C",
15
+ "Ц" => "C",
16
+ "Ч" => "Ch",
17
+ "Ђ" => "Dj",
18
+ "Џ" => "Dz",
19
+ "Х" => "H",
20
+ "Ј" => "J",
21
+ "Љ" => "Lj",
22
+ "Њ" => "Nj",
23
+ "ц" => "c",
24
+ "ћ" => "c",
25
+ "ч" => "ch",
26
+ "ђ" => "dj",
27
+ "џ" => "dz",
28
+ "х" => "h",
29
+ "ј" => "j",
30
+ "љ" => "lj",
31
+ "њ" => "nj"
32
+ }
33
+ )
32
34
  end
33
35
  end
34
36
  end
@@ -1,9 +1,9 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Babosa
4
4
  module Transliterator
5
5
  class Spanish < Latin
6
- APPROXIMATIONS = {"ñ" => "ni", "Ñ" => "Ni"}
6
+ APPROXIMATIONS = {"ñ" => "ni", "Ñ" => "Ni"}.freeze
7
7
  end
8
8
  end
9
9
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Swedish < Latin
@@ -9,8 +10,7 @@ module Babosa
9
10
  "Å" => "Aa",
10
11
  "Ä" => "Ae",
11
12
  "Ö" => "Oe"
12
- }
13
+ }.freeze
13
14
  end
14
15
  end
15
16
  end
16
-