babosa 1.0.4 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/Changelog.md +12 -0
  5. data/README.md +81 -119
  6. data/Rakefile +9 -8
  7. data/lib/babosa.rb +2 -4
  8. data/lib/babosa/identifier.rb +104 -129
  9. data/lib/babosa/transliterator/base.rb +57 -56
  10. data/lib/babosa/transliterator/bulgarian.rb +3 -2
  11. data/lib/babosa/transliterator/cyrillic.rb +5 -5
  12. data/lib/babosa/transliterator/danish.rb +3 -3
  13. data/lib/babosa/transliterator/german.rb +3 -2
  14. data/lib/babosa/transliterator/greek.rb +4 -3
  15. data/lib/babosa/transliterator/hindi.rb +3 -2
  16. data/lib/babosa/transliterator/latin.rb +5 -5
  17. data/lib/babosa/transliterator/macedonian.rb +3 -2
  18. data/lib/babosa/transliterator/norwegian.rb +3 -3
  19. data/lib/babosa/transliterator/romanian.rb +3 -2
  20. data/lib/babosa/transliterator/russian.rb +3 -2
  21. data/lib/babosa/transliterator/serbian.rb +29 -27
  22. data/lib/babosa/transliterator/spanish.rb +2 -2
  23. data/lib/babosa/transliterator/swedish.rb +3 -3
  24. data/lib/babosa/transliterator/turkish.rb +8 -8
  25. data/lib/babosa/transliterator/ukrainian.rb +5 -4
  26. data/lib/babosa/transliterator/vietnamese.rb +4 -3
  27. data/lib/babosa/version.rb +3 -1
  28. data/spec/{babosa_spec.rb → identifier_spec.rb} +13 -14
  29. data/spec/spec_helper.rb +6 -6
  30. data/spec/transliterators/base_spec.rb +5 -6
  31. data/spec/transliterators/bulgarian_spec.rb +4 -5
  32. data/spec/transliterators/danish_spec.rb +5 -6
  33. data/spec/transliterators/german_spec.rb +4 -5
  34. data/spec/transliterators/greek_spec.rb +7 -7
  35. data/spec/transliterators/hindi_spec.rb +7 -7
  36. data/spec/transliterators/latin_spec.rb +3 -4
  37. data/spec/transliterators/macedonian_spec.rb +3 -4
  38. data/spec/transliterators/norwegian_spec.rb +4 -4
  39. data/spec/transliterators/polish_spec.rb +3 -5
  40. data/spec/transliterators/romanian_spec.rb +5 -6
  41. data/spec/transliterators/russian_spec.rb +3 -4
  42. data/spec/transliterators/serbian_spec.rb +6 -7
  43. data/spec/transliterators/spanish_spec.rb +4 -5
  44. data/spec/transliterators/swedish_spec.rb +7 -7
  45. data/spec/transliterators/turkish_spec.rb +24 -24
  46. data/spec/transliterators/ukrainian_spec.rb +74 -75
  47. data/spec/transliterators/vietnamese_spec.rb +10 -10
  48. metadata +44 -38
  49. metadata.gz.sig +2 -0
  50. data/lib/babosa/utf8/active_support_proxy.rb +0 -38
  51. data/lib/babosa/utf8/dumb_proxy.rb +0 -49
  52. data/lib/babosa/utf8/java_proxy.rb +0 -22
  53. data/lib/babosa/utf8/mappings.rb +0 -193
  54. data/lib/babosa/utf8/proxy.rb +0 -125
  55. data/lib/babosa/utf8/unicode_proxy.rb +0 -23
  56. data/spec/utf8_proxy_spec.rb +0 -52
@@ -1,31 +1,11 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
- require 'singleton'
3
+ require "singleton"
4
4
 
5
5
  module Babosa
6
-
7
6
  module Transliterator
8
-
9
- autoload :Bulgarian, "babosa/transliterator/bulgarian"
10
- autoload :Cyrillic, "babosa/transliterator/cyrillic"
11
- autoload :Danish, "babosa/transliterator/danish"
12
- autoload :German, "babosa/transliterator/german"
13
- autoload :Hindi, "babosa/transliterator/hindi"
14
- autoload :Latin, "babosa/transliterator/latin"
15
- autoload :Macedonian, "babosa/transliterator/macedonian"
16
- autoload :Norwegian, "babosa/transliterator/norwegian"
17
- autoload :Romanian, "babosa/transliterator/romanian"
18
- autoload :Russian, "babosa/transliterator/russian"
19
- autoload :Serbian, "babosa/transliterator/serbian"
20
- autoload :Spanish, "babosa/transliterator/spanish"
21
- autoload :Swedish, "babosa/transliterator/swedish"
22
- autoload :Ukrainian, "babosa/transliterator/ukrainian"
23
- autoload :Greek, "babosa/transliterator/greek"
24
- autoload :Vietnamese, "babosa/transliterator/vietnamese"
25
- autoload :Turkish, "babosa/transliterator/turkish"
26
-
27
7
  def self.get(symbol)
28
- class_name = symbol.to_s.split("_").map {|a| a.gsub(/\b('?[a-z])/) { $1.upcase }}.join
8
+ class_name = symbol.to_s.split("_").map { |a| a.gsub(/\b('?[a-z])/) { Regexp.last_match(1).upcase } }.join
29
9
  const_get(class_name)
30
10
  end
31
11
 
@@ -47,36 +27,39 @@ module Babosa
47
27
  "”" => '"',
48
28
  "„" => '"',
49
29
  "‟" => '"',
50
- '' => "'",
51
- '' => ",",
52
- '' => ".",
53
- '' => "!",
54
- '' => '?',
55
- '' => ',',
56
- '' => '(',
57
- '' => ')',
58
- '' => '[',
59
- '' => ']',
60
- '' => ';',
61
- '' => ':',
62
- '' => '<',
63
- '' => '>',
64
- # various kinds of space characters
65
- "\xc2\xa0" => " ",
66
- "\xe2\x80\x80" => " ",
67
- "\xe2\x80\x81" => " ",
68
- "\xe2\x80\x82" => " ",
69
- "\xe2\x80\x83" => " ",
70
- "\xe2\x80\x84" => " ",
71
- "\xe2\x80\x85" => " ",
72
- "\xe2\x80\x86" => " ",
73
- "\xe2\x80\x87" => " ",
74
- "\xe2\x80\x88" => " ",
75
- "\xe2\x80\x89" => " ",
76
- "\xe2\x80\x8a" => " ",
77
- "\xe2\x81\x9f" => " ",
78
- "\xe3\x80\x80" => " ",
79
- }.freeze
30
+ "" => "'",
31
+ "" => ",",
32
+ "" => ".",
33
+ "" => "!",
34
+ "" => "?",
35
+ "" => ",",
36
+ "" => "(",
37
+ "" => ")",
38
+ "" => "[",
39
+ "" => "]",
40
+ "" => ";",
41
+ "" => ":",
42
+ "" => "<",
43
+ "" => ">"
44
+ }.merge(
45
+ {
46
+ # various kinds of space characters
47
+ "\xc2\xa0" => " ",
48
+ "\xe2\x80\x80" => " ",
49
+ "\xe2\x80\x81" => " ",
50
+ "\xe2\x80\x82" => " ",
51
+ "\xe2\x80\x83" => " ",
52
+ "\xe2\x80\x84" => " ",
53
+ "\xe2\x80\x85" => " ",
54
+ "\xe2\x80\x86" => " ",
55
+ "\xe2\x80\x87" => " ",
56
+ "\xe2\x80\x88" => " ",
57
+ "\xe2\x80\x89" => " ",
58
+ "\xe2\x80\x8a" => " ",
59
+ "\xe2\x81\x9f" => " ",
60
+ "\xe3\x80\x80" => " "
61
+ }
62
+ ).freeze
80
63
 
81
64
  attr_reader :approximations
82
65
 
@@ -87,8 +70,8 @@ module Babosa
87
70
  @approximations = {}
88
71
  end
89
72
  self.class.const_get(:APPROXIMATIONS).inject(@approximations) do |memo, object|
90
- index = object[0].unpack("U").shift
91
- value = object[1].unpack("C*")
73
+ index = object[0].codepoints.shift
74
+ value = object[1].codepoints
92
75
  memo[index] = value.length == 1 ? value[0] : value
93
76
  memo
94
77
  end
@@ -103,8 +86,26 @@ module Babosa
103
86
 
104
87
  # Transliterates a string.
105
88
  def transliterate(string)
106
- string.unpack("U*").map {|char| self[char] || char}.flatten.pack("U*")
89
+ string.codepoints.map { |char| self[char] || char }.flatten.pack("U*")
107
90
  end
108
91
  end
109
92
  end
110
93
  end
94
+
95
+ require "babosa/transliterator/cyrillic"
96
+ require "babosa/transliterator/latin"
97
+ require "babosa/transliterator/bulgarian"
98
+ require "babosa/transliterator/danish"
99
+ require "babosa/transliterator/german"
100
+ require "babosa/transliterator/hindi"
101
+ require "babosa/transliterator/macedonian"
102
+ require "babosa/transliterator/norwegian"
103
+ require "babosa/transliterator/romanian"
104
+ require "babosa/transliterator/russian"
105
+ require "babosa/transliterator/serbian"
106
+ require "babosa/transliterator/spanish"
107
+ require "babosa/transliterator/swedish"
108
+ require "babosa/transliterator/ukrainian"
109
+ require "babosa/transliterator/greek"
110
+ require "babosa/transliterator/vietnamese"
111
+ require "babosa/transliterator/turkish"
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Bulgarian < Cyrillic
@@ -21,7 +22,7 @@ module Babosa
21
22
  "ь" => "i",
22
23
  "ю" => "iu",
23
24
  "я" => "ia"
24
- }
25
+ }.freeze
25
26
  end
26
27
  end
27
28
  end
@@ -1,7 +1,7 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
-
5
5
  # Approximations are based on GOST 7.79, System B:
6
6
  # http://en.wikipedia.org/wiki/ISO_9#GOST_7.79
7
7
  class Cyrillic < Base
@@ -97,11 +97,11 @@ module Babosa
97
97
  "Ѵ" => "Yh",
98
98
  "ѵ" => "yh",
99
99
  "Ґ" => "G",
100
- "ґ" => "g",
101
- }
100
+ "ґ" => "g"
101
+ }.freeze
102
102
 
103
103
  def transliterate(string)
104
- super.gsub(/(c)z([ieyj])/) { "#{$1}#{$2}" }
104
+ super.gsub(/(c)z([ieyj])/) { "#{Regexp.last_match(1)}#{Regexp.last_match(2)}" }
105
105
  end
106
106
  end
107
107
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Danish < Latin
@@ -8,8 +9,7 @@ module Babosa
8
9
  "å" => "aa",
9
10
  "Ø" => "Oe",
10
11
  "Å" => "Aa"
11
- }
12
+ }.freeze
12
13
  end
13
14
  end
14
15
  end
15
-
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class German < Latin
@@ -9,7 +10,7 @@ module Babosa
9
10
  "Ä" => "Ae",
10
11
  "Ö" => "Oe",
11
12
  "Ü" => "Ue"
12
- }
13
+ }.freeze
13
14
  end
14
15
  end
15
16
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Greek < Base
@@ -71,7 +72,7 @@ module Babosa
71
72
  "Ώ" => "O",
72
73
  "ω" => "o",
73
74
  "ώ" => "o"
74
- }
75
+ }.freeze
75
76
  end
76
77
  end
77
- end
78
+ end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Hindi < Base
@@ -131,7 +132,7 @@ module Babosa
131
132
  "ॽ" => "?",
132
133
  "ॾ" => "ddd",
133
134
  "ॿ" => "bb"
134
- }
135
+ }.freeze
135
136
  end
136
137
  end
137
138
  end
@@ -1,8 +1,8 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Latin < Base
5
-
6
6
  APPROXIMATIONS = {
7
7
  "À" => "A",
8
8
  "Á" => "A",
@@ -35,14 +35,14 @@ module Babosa
35
35
  "Ý" => "Y",
36
36
  "Þ" => "Th",
37
37
  "ß" => "ss",
38
- "à" => "a" ,
38
+ "à" => "a",
39
39
  "á" => "a",
40
40
  "â" => "a",
41
41
  "ã" => "a",
42
42
  "ä" => "a",
43
43
  "å" => "a",
44
44
  "æ" => "ae",
45
- "ç" => "c" ,
45
+ "ç" => "c",
46
46
  "è" => "e",
47
47
  "é" => "e",
48
48
  "ê" => "e",
@@ -193,7 +193,7 @@ module Babosa
193
193
  "ž" => "z",
194
194
  "ź" => "z",
195
195
  "ż" => "z"
196
- }
196
+ }.freeze
197
197
  end
198
198
  end
199
199
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Macedonian < Cyrillic
@@ -23,7 +24,7 @@ module Babosa
23
24
  "ѕ" => "z",
24
25
  "ј" => "j",
25
26
  "х" => "h"
26
- }
27
+ }.freeze
27
28
  end
28
29
  end
29
30
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Norwegian < Latin
@@ -7,8 +8,7 @@ module Babosa
7
8
  "å" => "aa",
8
9
  "Ø" => "Oe",
9
10
  "Å" => "Aa"
10
- }
11
+ }.freeze
11
12
  end
12
13
  end
13
14
  end
14
-
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Romanian < Latin
@@ -7,7 +8,7 @@ module Babosa
7
8
  "ț" => "t",
8
9
  "Ș" => "S",
9
10
  "Ț" => "T"
10
- }
11
+ }.freeze
11
12
  end
12
13
  end
13
14
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Russian < Cyrillic
@@ -16,7 +17,7 @@ module Babosa
16
17
  "ц" => "ts",
17
18
  "щ" => "sch",
18
19
  "ю" => "u"
19
- }
20
+ }.freeze
20
21
  end
21
22
  end
22
23
  end
@@ -1,34 +1,36 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Babosa
4
4
  module Transliterator
5
5
  class Serbian < Latin
6
- APPROXIMATIONS = Cyrillic.const_get(:APPROXIMATIONS).merge({
7
- "Ð" => "Dj",
8
- "Č" => "Ch",
9
- "Š" => "Sh",
10
- "č" => "ch",
11
- "đ" => "dj",
12
- "š" => "sh",
13
- "Ћ" => "C",
14
- "Ц" => "C",
15
- "Ч" => "Ch",
16
- "Ђ" => "Dj",
17
- "Џ" => "Dz",
18
- "Х" => "H",
19
- "Ј" => "J",
20
- "Љ" => "Lj",
21
- "Њ" => "Nj",
22
- "ц" => "c",
23
- "ћ" => "c",
24
- "ч" => "ch",
25
- "ђ" => "dj",
26
- "џ" => "dz",
27
- "х" => "h",
28
- "ј" => "j",
29
- "љ" => "lj",
30
- "њ" => "nj"
31
- })
6
+ APPROXIMATIONS = Cyrillic.const_get(:APPROXIMATIONS).merge(
7
+ {
8
+ "Ð" => "Dj",
9
+ "Č" => "Ch",
10
+ "Š" => "Sh",
11
+ "č" => "ch",
12
+ "đ" => "dj",
13
+ "š" => "sh",
14
+ "Ћ" => "C",
15
+ "Ц" => "C",
16
+ "Ч" => "Ch",
17
+ "Ђ" => "Dj",
18
+ "Џ" => "Dz",
19
+ "Х" => "H",
20
+ "Ј" => "J",
21
+ "Љ" => "Lj",
22
+ "Њ" => "Nj",
23
+ "ц" => "c",
24
+ "ћ" => "c",
25
+ "ч" => "ch",
26
+ "ђ" => "dj",
27
+ "џ" => "dz",
28
+ "х" => "h",
29
+ "ј" => "j",
30
+ "љ" => "lj",
31
+ "њ" => "nj"
32
+ }
33
+ )
32
34
  end
33
35
  end
34
36
  end
@@ -1,9 +1,9 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Babosa
4
4
  module Transliterator
5
5
  class Spanish < Latin
6
- APPROXIMATIONS = {"ñ" => "ni", "Ñ" => "Ni"}
6
+ APPROXIMATIONS = {"ñ" => "ni", "Ñ" => "Ni"}.freeze
7
7
  end
8
8
  end
9
9
  end
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Swedish < Latin
@@ -9,8 +10,7 @@ module Babosa
9
10
  "Å" => "Aa",
10
11
  "Ä" => "Ae",
11
12
  "Ö" => "Oe"
12
- }
13
+ }.freeze
13
14
  end
14
15
  end
15
16
  end
16
-