babosa 1.0.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/Changelog.md +20 -0
  5. data/README.md +83 -121
  6. data/Rakefile +9 -8
  7. data/lib/babosa.rb +2 -4
  8. data/lib/babosa/identifier.rb +104 -129
  9. data/lib/babosa/transliterator/base.rb +57 -54
  10. data/lib/babosa/transliterator/bulgarian.rb +3 -2
  11. data/lib/babosa/transliterator/cyrillic.rb +5 -5
  12. data/lib/babosa/transliterator/danish.rb +3 -3
  13. data/lib/babosa/transliterator/german.rb +3 -2
  14. data/lib/babosa/transliterator/greek.rb +4 -3
  15. data/lib/babosa/transliterator/hindi.rb +138 -0
  16. data/lib/babosa/transliterator/latin.rb +5 -5
  17. data/lib/babosa/transliterator/macedonian.rb +3 -2
  18. data/lib/babosa/transliterator/norwegian.rb +3 -3
  19. data/lib/babosa/transliterator/romanian.rb +3 -2
  20. data/lib/babosa/transliterator/russian.rb +3 -2
  21. data/lib/babosa/transliterator/serbian.rb +29 -27
  22. data/lib/babosa/transliterator/spanish.rb +2 -2
  23. data/lib/babosa/transliterator/swedish.rb +3 -3
  24. data/lib/babosa/transliterator/turkish.rb +8 -0
  25. data/lib/babosa/transliterator/ukrainian.rb +5 -4
  26. data/lib/babosa/transliterator/vietnamese.rb +4 -3
  27. data/lib/babosa/version.rb +3 -1
  28. data/spec/{babosa_spec.rb → identifier_spec.rb} +18 -15
  29. data/spec/spec_helper.rb +15 -6
  30. data/spec/transliterators/base_spec.rb +5 -6
  31. data/spec/transliterators/bulgarian_spec.rb +4 -5
  32. data/spec/transliterators/danish_spec.rb +5 -6
  33. data/spec/transliterators/german_spec.rb +4 -5
  34. data/spec/transliterators/greek_spec.rb +7 -7
  35. data/spec/transliterators/hindi_spec.rb +17 -0
  36. data/spec/transliterators/latin_spec.rb +3 -4
  37. data/spec/transliterators/macedonian_spec.rb +3 -4
  38. data/spec/transliterators/norwegian_spec.rb +4 -4
  39. data/spec/transliterators/polish_spec.rb +3 -5
  40. data/spec/transliterators/romanian_spec.rb +5 -6
  41. data/spec/transliterators/russian_spec.rb +3 -4
  42. data/spec/transliterators/serbian_spec.rb +6 -7
  43. data/spec/transliterators/spanish_spec.rb +4 -5
  44. data/spec/transliterators/swedish_spec.rb +7 -7
  45. data/spec/transliterators/turkish_spec.rb +24 -0
  46. data/spec/transliterators/ukrainian_spec.rb +74 -75
  47. data/spec/transliterators/vietnamese_spec.rb +10 -10
  48. metadata +50 -41
  49. metadata.gz.sig +2 -0
  50. data/lib/babosa/utf8/active_support_proxy.rb +0 -26
  51. data/lib/babosa/utf8/dumb_proxy.rb +0 -49
  52. data/lib/babosa/utf8/java_proxy.rb +0 -22
  53. data/lib/babosa/utf8/mappings.rb +0 -193
  54. data/lib/babosa/utf8/proxy.rb +0 -125
  55. data/lib/babosa/utf8/unicode_proxy.rb +0 -23
  56. data/spec/utf8_proxy_spec.rb +0 -52
@@ -1,29 +1,11 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
- require 'singleton'
3
+ require "singleton"
4
4
 
5
5
  module Babosa
6
-
7
6
  module Transliterator
8
-
9
- autoload :Bulgarian, "babosa/transliterator/bulgarian"
10
- autoload :Cyrillic, "babosa/transliterator/cyrillic"
11
- autoload :Danish, "babosa/transliterator/danish"
12
- autoload :German, "babosa/transliterator/german"
13
- autoload :Latin, "babosa/transliterator/latin"
14
- autoload :Macedonian, "babosa/transliterator/macedonian"
15
- autoload :Norwegian, "babosa/transliterator/norwegian"
16
- autoload :Romanian, "babosa/transliterator/romanian"
17
- autoload :Russian, "babosa/transliterator/russian"
18
- autoload :Serbian, "babosa/transliterator/serbian"
19
- autoload :Spanish, "babosa/transliterator/spanish"
20
- autoload :Swedish, "babosa/transliterator/swedish"
21
- autoload :Ukrainian, "babosa/transliterator/ukrainian"
22
- autoload :Greek, "babosa/transliterator/greek"
23
- autoload :Vietnamese, "babosa/transliterator/vietnamese"
24
-
25
7
  def self.get(symbol)
26
- class_name = symbol.to_s.split("_").map {|a| a.gsub(/\b('?[a-z])/) { $1.upcase }}.join
8
+ class_name = symbol.to_s.split("_").map { |a| a.gsub(/\b('?[a-z])/) { Regexp.last_match(1).upcase } }.join
27
9
  const_get(class_name)
28
10
  end
29
11
 
@@ -45,36 +27,39 @@ module Babosa
45
27
  "”" => '"',
46
28
  "„" => '"',
47
29
  "‟" => '"',
48
- '' => "'",
49
- '' => ",",
50
- '' => ".",
51
- '' => "!",
52
- '' => '?',
53
- '' => ',',
54
- '' => '(',
55
- '' => ')',
56
- '' => '[',
57
- '' => ']',
58
- '' => ';',
59
- '' => ':',
60
- '' => '<',
61
- '' => '>',
62
- # various kinds of space characters
63
- "\xc2\xa0" => " ",
64
- "\xe2\x80\x80" => " ",
65
- "\xe2\x80\x81" => " ",
66
- "\xe2\x80\x82" => " ",
67
- "\xe2\x80\x83" => " ",
68
- "\xe2\x80\x84" => " ",
69
- "\xe2\x80\x85" => " ",
70
- "\xe2\x80\x86" => " ",
71
- "\xe2\x80\x87" => " ",
72
- "\xe2\x80\x88" => " ",
73
- "\xe2\x80\x89" => " ",
74
- "\xe2\x80\x8a" => " ",
75
- "\xe2\x81\x9f" => " ",
76
- "\xe3\x80\x80" => " ",
77
- }.freeze
30
+ "" => "'",
31
+ "" => ",",
32
+ "" => ".",
33
+ "" => "!",
34
+ "" => "?",
35
+ "" => ",",
36
+ "" => "(",
37
+ "" => ")",
38
+ "" => "[",
39
+ "" => "]",
40
+ "" => ";",
41
+ "" => ":",
42
+ "" => "<",
43
+ "" => ">"
44
+ }.merge(
45
+ {
46
+ # various kinds of space characters
47
+ "\xc2\xa0" => " ",
48
+ "\xe2\x80\x80" => " ",
49
+ "\xe2\x80\x81" => " ",
50
+ "\xe2\x80\x82" => " ",
51
+ "\xe2\x80\x83" => " ",
52
+ "\xe2\x80\x84" => " ",
53
+ "\xe2\x80\x85" => " ",
54
+ "\xe2\x80\x86" => " ",
55
+ "\xe2\x80\x87" => " ",
56
+ "\xe2\x80\x88" => " ",
57
+ "\xe2\x80\x89" => " ",
58
+ "\xe2\x80\x8a" => " ",
59
+ "\xe2\x81\x9f" => " ",
60
+ "\xe3\x80\x80" => " "
61
+ }
62
+ ).freeze
78
63
 
79
64
  attr_reader :approximations
80
65
 
@@ -85,8 +70,8 @@ module Babosa
85
70
  @approximations = {}
86
71
  end
87
72
  self.class.const_get(:APPROXIMATIONS).inject(@approximations) do |memo, object|
88
- index = object[0].unpack("U").shift
89
- value = object[1].unpack("C*")
73
+ index = object[0].codepoints.shift
74
+ value = object[1].codepoints
90
75
  memo[index] = value.length == 1 ? value[0] : value
91
76
  memo
92
77
  end
@@ -101,8 +86,26 @@ module Babosa
101
86
 
102
87
  # Transliterates a string.
103
88
  def transliterate(string)
104
- string.unpack("U*").map {|char| self[char] || char}.flatten.pack("U*")
89
+ string.codepoints.map { |char| self[char] || char }.flatten.pack("U*")
105
90
  end
106
91
  end
107
92
  end
108
93
  end
94
+
95
+ require "babosa/transliterator/cyrillic"
96
+ require "babosa/transliterator/latin"
97
+ require "babosa/transliterator/bulgarian"
98
+ require "babosa/transliterator/danish"
99
+ require "babosa/transliterator/german"
100
+ require "babosa/transliterator/hindi"
101
+ require "babosa/transliterator/macedonian"
102
+ require "babosa/transliterator/norwegian"
103
+ require "babosa/transliterator/romanian"
104
+ require "babosa/transliterator/russian"
105
+ require "babosa/transliterator/serbian"
106
+ require "babosa/transliterator/spanish"
107
+ require "babosa/transliterator/swedish"
108
+ require "babosa/transliterator/ukrainian"
109
+ require "babosa/transliterator/greek"
110
+ require "babosa/transliterator/vietnamese"
111
+ require "babosa/transliterator/turkish"
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Bulgarian < Cyrillic
@@ -21,7 +22,7 @@ module Babosa
21
22
  "ь" => "i",
22
23
  "ю" => "iu",
23
24
  "я" => "ia"
24
- }
25
+ }.freeze
25
26
  end
26
27
  end
27
28
  end
@@ -1,7 +1,7 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
-
5
5
  # Approximations are based on GOST 7.79, System B:
6
6
  # http://en.wikipedia.org/wiki/ISO_9#GOST_7.79
7
7
  class Cyrillic < Base
@@ -97,11 +97,11 @@ module Babosa
97
97
  "Ѵ" => "Yh",
98
98
  "ѵ" => "yh",
99
99
  "Ґ" => "G",
100
- "ґ" => "g",
101
- }
100
+ "ґ" => "g"
101
+ }.freeze
102
102
 
103
103
  def transliterate(string)
104
- super.gsub(/(c)z([ieyj])/) { "#{$1}#{$2}" }
104
+ super.gsub(/(c)z([ieyj])/) { "#{Regexp.last_match(1)}#{Regexp.last_match(2)}" }
105
105
  end
106
106
  end
107
107
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Danish < Latin
@@ -8,8 +9,7 @@ module Babosa
8
9
  "å" => "aa",
9
10
  "Ø" => "Oe",
10
11
  "Å" => "Aa"
11
- }
12
+ }.freeze
12
13
  end
13
14
  end
14
15
  end
15
-
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class German < Latin
@@ -9,7 +10,7 @@ module Babosa
9
10
  "Ä" => "Ae",
10
11
  "Ö" => "Oe",
11
12
  "Ü" => "Ue"
12
- }
13
+ }.freeze
13
14
  end
14
15
  end
15
16
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Greek < Base
@@ -71,7 +72,7 @@ module Babosa
71
72
  "Ώ" => "O",
72
73
  "ω" => "o",
73
74
  "ώ" => "o"
74
- }
75
+ }.freeze
75
76
  end
76
77
  end
77
- end
78
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Babosa
4
+ module Transliterator
5
+ class Hindi < Base
6
+ APPROXIMATIONS = {
7
+ "ऀ" => "n",
8
+ "ँ" => "n",
9
+ "ं" => "n",
10
+ "ः" => "h",
11
+ "ऄ" => "a",
12
+ "अ" => "a",
13
+ "आ" => "aa",
14
+ "इ" => "i",
15
+ "ई" => "ii",
16
+ "उ" => "u",
17
+ "ऊ" => "uu",
18
+ "ऋ" => "ri",
19
+ "ऌ" => "lri",
20
+ "ऍ" => "e",
21
+ "ऎ" => "e",
22
+ "ए" => "e",
23
+ "ऐ" => "ei",
24
+ "ऑ" => "o",
25
+ "ऒ" => "o",
26
+ "ओ" => "o",
27
+ "औ" => "ou",
28
+ "क" => "k",
29
+ "ख" => "kh",
30
+ "ग" => "g",
31
+ "घ" => "gh",
32
+ "ङ" => "d",
33
+ "च" => "ch",
34
+ "छ" => "chh",
35
+ "ज" => "j",
36
+ "झ" => "jh",
37
+ "ञ" => "ny",
38
+ "ट" => "tt",
39
+ "ठ" => "tth",
40
+ "ड" => "dd",
41
+ "ढ" => "ddh",
42
+ "ण" => "nn",
43
+ "त" => "t",
44
+ "थ" => "th",
45
+ "द" => "d",
46
+ "ध" => "dh",
47
+ "न" => "n",
48
+ "ऩ" => "nnn",
49
+ "प" => "p",
50
+ "फ" => "ph",
51
+ "ब" => "b",
52
+ "भ" => "bh",
53
+ "म" => "m",
54
+ "य" => "y",
55
+ "र" => "r",
56
+ "ऱ" => "rr",
57
+ "ल" => "l",
58
+ "ळ" => "ll",
59
+ "ऴ" => "ll",
60
+ "व" => "v",
61
+ "श" => "sh",
62
+ "ष" => "ss",
63
+ "स" => "s",
64
+ "ह" => "h",
65
+ "ऺ" => "oe",
66
+ "ऻ" => "ooe",
67
+ "़" => "",
68
+ "ऽ" => "-",
69
+ "ा" => "aa",
70
+ "ि" => "i",
71
+ "ी" => "ii",
72
+ "ु" => "u",
73
+ "ू" => "uu",
74
+ "ृ" => "r",
75
+ "ॄ" => "rr",
76
+ "ॅ" => "e",
77
+ "ॆ" => "e",
78
+ "े" => "e",
79
+ "ै" => "ai",
80
+ "ॉ" => "o",
81
+ "ॊ" => "o",
82
+ "ो" => "o",
83
+ "ौ" => "au",
84
+ "्" => "",
85
+ "ॎ" => "e",
86
+ "ॏ" => "aw",
87
+ "ॐ" => "om",
88
+ "॑" => "",
89
+ "॒" => "_",
90
+ "॓" => "",
91
+ "॔" => "",
92
+ "ॕ" => "ee",
93
+ "ॖ" => "ue",
94
+ "ॗ" => "uue",
95
+ "क़" => "q",
96
+ "ख़" => "khh",
97
+ "ग़" => "ghh",
98
+ "ज़" => "za",
99
+ "ड़" => "dddh",
100
+ "ढ़" => "rh",
101
+ "फ़" => "f",
102
+ "य़" => "yy",
103
+ "ॠ" => "rri",
104
+ "ॡ" => "lr",
105
+ "ॢ" => "l",
106
+ "ॣ" => "l",
107
+ "।" => ".",
108
+ "॥" => "..",
109
+ "०" => "0",
110
+ "१" => "1",
111
+ "२" => "2",
112
+ "३" => "3",
113
+ "४" => "4",
114
+ "५" => "5",
115
+ "६" => "6",
116
+ "७" => "7",
117
+ "८" => "8",
118
+ "९" => "9",
119
+ "॰" => ".",
120
+ "ॱ" => ".",
121
+ "ॲ" => "a",
122
+ "ॳ" => "oe",
123
+ "ॴ" => "ooe",
124
+ "ॵ" => "aw",
125
+ "ॶ" => "ue",
126
+ "ॷ" => "uue",
127
+ "ॸ" => "dd",
128
+ "ॹ" => "zh",
129
+ "ॺ" => "y",
130
+ "ॻ" => "gg",
131
+ "ॼ" => "jj",
132
+ "ॽ" => "?",
133
+ "ॾ" => "ddd",
134
+ "ॿ" => "bb"
135
+ }.freeze
136
+ end
137
+ end
138
+ end
@@ -1,8 +1,8 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Latin < Base
5
-
6
6
  APPROXIMATIONS = {
7
7
  "À" => "A",
8
8
  "Á" => "A",
@@ -35,14 +35,14 @@ module Babosa
35
35
  "Ý" => "Y",
36
36
  "Þ" => "Th",
37
37
  "ß" => "ss",
38
- "à" => "a" ,
38
+ "à" => "a",
39
39
  "á" => "a",
40
40
  "â" => "a",
41
41
  "ã" => "a",
42
42
  "ä" => "a",
43
43
  "å" => "a",
44
44
  "æ" => "ae",
45
- "ç" => "c" ,
45
+ "ç" => "c",
46
46
  "è" => "e",
47
47
  "é" => "e",
48
48
  "ê" => "e",
@@ -193,7 +193,7 @@ module Babosa
193
193
  "ž" => "z",
194
194
  "ź" => "z",
195
195
  "ż" => "z"
196
- }
196
+ }.freeze
197
197
  end
198
198
  end
199
199
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Macedonian < Cyrillic
@@ -23,7 +24,7 @@ module Babosa
23
24
  "ѕ" => "z",
24
25
  "ј" => "j",
25
26
  "х" => "h"
26
- }
27
+ }.freeze
27
28
  end
28
29
  end
29
30
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Norwegian < Latin
@@ -7,8 +8,7 @@ module Babosa
7
8
  "å" => "aa",
8
9
  "Ø" => "Oe",
9
10
  "Å" => "Aa"
10
- }
11
+ }.freeze
11
12
  end
12
13
  end
13
14
  end
14
-
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Romanian < Latin
@@ -7,7 +8,7 @@ module Babosa
7
8
  "ț" => "t",
8
9
  "Ș" => "S",
9
10
  "Ț" => "T"
10
- }
11
+ }.freeze
11
12
  end
12
13
  end
13
14
  end