babosa 0.3.10 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/Changelog.md +107 -0
  3. data/README.md +5 -23
  4. data/lib/babosa.rb +0 -17
  5. data/lib/babosa/identifier.rb +19 -17
  6. data/lib/babosa/transliterator/base.rb +19 -3
  7. data/lib/babosa/transliterator/hindi.rb +137 -0
  8. data/lib/babosa/transliterator/macedonian.rb +3 -1
  9. data/lib/babosa/transliterator/turkish.rb +8 -0
  10. data/lib/babosa/transliterator/ukrainian.rb +19 -0
  11. data/lib/babosa/transliterator/vietnamese.rb +143 -0
  12. data/lib/babosa/utf8/active_support_proxy.rb +26 -8
  13. data/lib/babosa/utf8/dumb_proxy.rb +23 -16
  14. data/lib/babosa/utf8/java_proxy.rb +1 -1
  15. data/lib/babosa/utf8/proxy.rb +46 -39
  16. data/lib/babosa/utf8/unicode_proxy.rb +3 -1
  17. data/lib/babosa/version.rb +1 -1
  18. data/spec/babosa_spec.rb +50 -37
  19. data/spec/spec_helper.rb +17 -14
  20. data/spec/transliterators/base_spec.rb +3 -3
  21. data/spec/transliterators/bulgarian_spec.rb +1 -1
  22. data/spec/transliterators/danish_spec.rb +1 -1
  23. data/spec/transliterators/german_spec.rb +2 -2
  24. data/spec/transliterators/greek_spec.rb +1 -1
  25. data/spec/transliterators/hindi_spec.rb +17 -0
  26. data/spec/transliterators/latin_spec.rb +9 -0
  27. data/spec/transliterators/norwegian_spec.rb +1 -1
  28. data/spec/transliterators/polish_spec.rb +14 -0
  29. data/spec/transliterators/romanian_spec.rb +1 -1
  30. data/spec/transliterators/serbian_spec.rb +1 -1
  31. data/spec/transliterators/spanish_spec.rb +1 -1
  32. data/spec/transliterators/swedish_spec.rb +1 -1
  33. data/spec/transliterators/turkish_spec.rb +24 -0
  34. data/spec/transliterators/ukrainian_spec.rb +80 -1
  35. data/spec/transliterators/vietnamese_spec.rb +18 -0
  36. data/spec/utf8_proxy_spec.rb +22 -18
  37. metadata +64 -52
  38. data/init.rb +0 -3
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7878565d1bfb436b7110d42e81dff1eb589f86e10e0919ded4b2de695784fae3
4
+ data.tar.gz: f6f3e7cc2b4876a940ec66fa1895f9cd3390526c62cebc888110494b72d77fe5
5
+ SHA512:
6
+ metadata.gz: 6ea0ff964d688d9ca29710da13207c12d49509c13d72f8198a33d38141f7f6ad39b5792f5937a94a44b3976dcd68b24233c11b3418dc9d100a170caa799404ed
7
+ data.tar.gz: 1b37fd01a1907f244e112171da5dd942105181b860e6dc05ede3e1798bfdf1e50e8543b69757109dc9bd7dfddecb6ba2fbec2b59f38843223052c559f085490e
@@ -0,0 +1,107 @@
1
+ # Babosa Changelog
2
+
3
+ ## 1.0.4
4
+
5
+ * Fix nil being cast to frozen string (https://github.com/norman/babosa/pull/52)
6
+
7
+ ## 1.0.3
8
+
9
+ * Fix Active Support 6 deprecations (https://github.com/norman/babosa/pull/50)
10
+
11
+ ## 1.0.2
12
+
13
+ * Fix regression in ActiveSupport UTF8 proxy.
14
+
15
+ ## 1.0.1
16
+
17
+ * Fix error with tidy_bytes on Rubinius.
18
+ * Simplify Active Support UTF8 proxy.
19
+ * Fix `allow_bangs` argument to to_ruby_method being silently ignored.
20
+ * Raise error when generating an impossible Ruby method name.
21
+
22
+ ## 1.0.0
23
+
24
+ * Adopt semantic versioning.
25
+ * When using Active Support, require 3.2 or greater.
26
+ * Require Ruby 2.0 or greater.
27
+ * Fix Ruby warnings.
28
+ * Improve support for Ukrainian.
29
+ * Support some additional punctuation characters used by Chinese and others.
30
+ * Add Polish spec.
31
+ * Use native Unicode normalization on Ruby 2.2 in UTF8::DumbProxy.
32
+ * Invoke Ruby-native upcase/downcase in UTF8::DumbProxy.
33
+ * Proxy `tidy_bytes` method to Active Support when possible.
34
+ * Remove SlugString constant.
35
+
36
+ ## 0.3.11
37
+
38
+ * Add support for Vietnamese.
39
+
40
+ ## 0.3.10
41
+
42
+ * Fix Macedonian "S/S". Don't `include JRuby` unnecessarily.
43
+
44
+ ## 0.3.9
45
+
46
+ * Add missing Greek vowels with diaeresis.
47
+
48
+ ## 0.3.8
49
+
50
+ * Correct and improve Macedonian support.
51
+
52
+ ## 0.3.7
53
+
54
+ * Fix compatibility with Ruby 1.8.7.
55
+ * Add Swedish support.
56
+
57
+ ## 0.3.6
58
+
59
+ * Allow multiple transliterators.
60
+ * Add Greek support.
61
+
62
+ ## 0.3.5
63
+
64
+ * Don't strip underscores from identifiers.
65
+
66
+ ## 0.3.4
67
+
68
+ * Add Romanian support.
69
+
70
+ ## 0.3.3
71
+
72
+ * Add Norwegian support.
73
+
74
+ ## 0.3.2
75
+
76
+ * Improve Macedonian support.
77
+
78
+ ## 0.3.1
79
+
80
+ * Small fixes to Cyrillic.
81
+
82
+ ## 0.3.0
83
+
84
+ * Cyrillic support.
85
+ * Improve support for various Unicode spaces and dashes.
86
+
87
+ ## 0.2.2
88
+
89
+ * Fix for "smart" quote handling.
90
+
91
+ ## 0.2.1
92
+
93
+ * Implement #empty? for compatiblity with Active Support's #blank?.
94
+
95
+ ## 0.2.0
96
+
97
+ * Add support for Danish.
98
+ * Add method to generate Ruby identifiers.
99
+ * Improve performance.
100
+
101
+ ## 0.1.1
102
+
103
+ * Add support for Serbian.
104
+
105
+ ## 0.1.0
106
+
107
+ * Initial extraction from FriendlyId.
data/README.md CHANGED
@@ -218,8 +218,8 @@ Babosa can be installed via Rubygems:
218
218
 
219
219
  You can get the source code from its [Github repository](http://github.com/norman/babosa).
220
220
 
221
- Babosa is tested to be compatible with Ruby 1.8.7-2.0.0, JRuby 1.4+, and
222
- Rubinius 1.0+ It's probably compatible with other Rubies as well.
221
+ Babosa is tested to be compatible with Ruby 2.x, JRuby 1.7+, and
222
+ Rubinius 2.x It's probably compatible with other Rubies as well.
223
223
 
224
224
  ## Reporting bugs
225
225
 
@@ -239,6 +239,8 @@ tracker](http://github.com/norman/babosa/issues).
239
239
 
240
240
  Many thanks to the following people for their help:
241
241
 
242
+ * [Dmitry A. Ilyashevich](https://github.com/dmitry-ilyashevich) - Deprecation fixes
243
+ * [anhkind](https://github.com/anhkind) - Vietnamese support
242
244
  * [Martins Zakis](https://github.com/martins) - Bug fixes
243
245
  * [Vassilis Rodokanakis](https://github.com/vrodokanakis) - Greek support
244
246
  * [Peco Danajlovski](https://github.com/Vortex) - Macedonian support
@@ -254,26 +256,6 @@ Many thanks to the following people for their help:
254
256
  * [Molte Emil Strange Andersen](https://github.com/molte) - Danish support
255
257
  * [Milan Dobrota](https://github.com/milandobrota) - Serbian support
256
258
 
257
-
258
- ## Changelog
259
-
260
- * 0.3.10 - Fixed Macedonian "S/S". Don't `include JRuby` unnecessarily.
261
- * 0.3.9 - Added missing Greek vowels with diaeresis.
262
- * 0.3.8 - Correct and improve Macedonian support.
263
- * 0.3.7 - Fix compatibility with Ruby 1.8.7. Add Swedish support.
264
- * 0.3.6 - Allow multiple transliterators. Add Greek support.
265
- * 0.3.5 - Don't strip underscores from identifiers.
266
- * 0.3.4 - Add Romanian support.
267
- * 0.3.3 - Add Norwegian support.
268
- * 0.3.2 - Improve Macedonian support.
269
- * 0.3.1 - Small fixes to Cyrillic.
270
- * 0.3.0 - Cyrillic support. Improve support for various Unicode spaces and dashes.
271
- * 0.2.2 - Fix for "smart" quote handling.
272
- * 0.2.1 - Implement #empty? for compatiblity with Active Support's #blank?.
273
- * 0.2.0 - Added support for Danish. Added method to generate Ruby identifiers. Improved performance.
274
- * 0.1.1 - Added support for Serbian.
275
- * 0.1.0 - Initial extraction from FriendlyId.
276
-
277
259
  ## Copyright
278
260
 
279
261
  Copyright (c) 2010-2013 Norman Clarke
@@ -294,4 +276,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
294
276
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
295
277
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
296
278
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
297
- SOFTWARE.
279
+ SOFTWARE.
@@ -9,23 +9,6 @@ class String
9
9
  Babosa::Identifier.new self
10
10
  end
11
11
  alias to_slug to_identifier
12
-
13
- # Compatibility with 1.8.6
14
- if !public_method_defined? :bytesize
15
- def bytesize
16
- unpack("C*").length
17
- end
18
- end
19
-
20
- # Define unless Active Support has already added this method.
21
- if !public_method_defined? :classify
22
- # Convert from underscores to class name. E.g.:
23
- # hello_world => HelloWorld
24
- def classify
25
- split("_").map {|a| a.gsub(/\b('?[a-z])/) { $1.upcase }}.join
26
- end
27
- end
28
-
29
12
  end
30
13
 
31
14
  require "babosa/transliterator/base"
@@ -30,12 +30,14 @@ module Babosa
30
30
  # @see http://www.utf8-chartable.de/unicode-utf8-table.pl?utf8=dec Unicode character table
31
31
  class Identifier
32
32
 
33
+ Error = Class.new(StandardError)
34
+
33
35
  attr_reader :wrapped_string
34
36
  alias to_s wrapped_string
35
37
 
36
38
  @@utf8_proxy = if Babosa.jruby15?
37
39
  UTF8::JavaProxy
38
- elsif defined? Unicode
40
+ elsif defined? Unicode::VERSION
39
41
  UTF8::UnicodeProxy
40
42
  elsif defined? ActiveSupport
41
43
  UTF8::ActiveSupportProxy
@@ -44,13 +46,13 @@ module Babosa
44
46
  end
45
47
 
46
48
  # Return the proxy used for UTF-8 support.
47
- # @see Babosa::UTF8::UTF8Proxy
49
+ # @see Babosa::UTF8::Proxy
48
50
  def self.utf8_proxy
49
51
  @@utf8_proxy
50
52
  end
51
53
 
52
54
  # Set a proxy object used for UTF-8 support.
53
- # @see Babosa::UTF8::UTF8Proxy
55
+ # @see Babosa::UTF8::Proxy
54
56
  def self.utf8_proxy=(obj)
55
57
  @@utf8_proxy = obj
56
58
  end
@@ -100,16 +102,17 @@ module Babosa
100
102
  # string.transliterate # => "¡Feliz ano!"
101
103
  # string.transliterate :spanish # => "¡Feliz anio!"
102
104
  #
103
- # You can modify the built-in approximations, or add your own:
105
+ # The approximations are an array, which you can modify if you choose:
104
106
  #
105
107
  # # Make Spanish use "nh" rather than "nn"
106
- # Babosa::Characters.add_approximations(:spanish, "ñ" => "nh")
108
+ # Babosa::Transliterator::Spanish::APPROXIMATIONS["ñ"] = "nh"
107
109
  #
108
110
  # Notice that this method does not simply convert to ASCII; if you want
109
111
  # to remove non-ASCII characters such as "¡" and "¿", use {#to_ascii!}:
110
112
  #
111
113
  # string.transliterate!(:spanish) # => "¡Feliz anio!"
112
114
  # string.transliterate! # => "¡Feliz anio!"
115
+ #
113
116
  # @param *args <Symbol>
114
117
  # @return String
115
118
  def transliterate!(*kinds)
@@ -142,13 +145,8 @@ module Babosa
142
145
  # @param Options
143
146
  # @return String
144
147
  def normalize!(options = nil)
145
- # Handle deprecated usage
146
- if options == true
147
- warn "#normalize! now takes a hash of options rather than a boolean"
148
- options = default_normalize_options.merge(:to_ascii => true)
149
- else
150
- options = default_normalize_options.merge(options || {})
151
- end
148
+ options = default_normalize_options.merge(options || {})
149
+
152
150
  if translit_option = options[:transliterate]
153
151
  if translit_option != true
154
152
  transliterate!(*translit_option)
@@ -168,10 +166,14 @@ module Babosa
168
166
  # Normalize a string so that it can safely be used as a Ruby method name.
169
167
  def to_ruby_method!(allow_bangs = true)
170
168
  leader, trailer = @wrapped_string.strip.scan(/\A(.+)(.)\z/).flatten
169
+ leader = leader.to_s.dup
170
+ trailer = trailer.to_s.dup
171
171
  if allow_bangs
172
- trailer.downcase.gsub!(/[^a-z0-9!=\\\\?]/, '')
172
+ trailer.downcase!
173
+ trailer.gsub!(/[^a-z0-9!=\\?]/, '')
173
174
  else
174
- trailer.downcase.gsub!(/[^a-z0-9]/, '')
175
+ trailer.downcase!
176
+ trailer.gsub!(/[^a-z0-9]/, '')
175
177
  end
176
178
  id = leader.to_identifier
177
179
  id.transliterate!
@@ -180,6 +182,9 @@ module Babosa
180
182
  id.word_chars!
181
183
  id.clean!
182
184
  @wrapped_string = id.to_s + trailer
185
+ if @wrapped_string == ""
186
+ raise Error, "Input generates impossible Ruby method name"
187
+ end
183
188
  with_separators!("_")
184
189
  end
185
190
 
@@ -285,7 +290,4 @@ module Babosa
285
290
  id
286
291
  end
287
292
  end
288
-
289
- # Identifier is aliased as SlugString to support older versions of FriendlyId.
290
- SlugString = Identifier
291
293
  end
@@ -10,6 +10,7 @@ module Babosa
10
10
  autoload :Cyrillic, "babosa/transliterator/cyrillic"
11
11
  autoload :Danish, "babosa/transliterator/danish"
12
12
  autoload :German, "babosa/transliterator/german"
13
+ autoload :Hindi, "babosa/transliterator/hindi"
13
14
  autoload :Latin, "babosa/transliterator/latin"
14
15
  autoload :Macedonian, "babosa/transliterator/macedonian"
15
16
  autoload :Norwegian, "babosa/transliterator/norwegian"
@@ -20,13 +21,15 @@ module Babosa
20
21
  autoload :Swedish, "babosa/transliterator/swedish"
21
22
  autoload :Ukrainian, "babosa/transliterator/ukrainian"
22
23
  autoload :Greek, "babosa/transliterator/greek"
24
+ autoload :Vietnamese, "babosa/transliterator/vietnamese"
25
+ autoload :Turkish, "babosa/transliterator/turkish"
23
26
 
24
27
  def self.get(symbol)
25
- const_get(symbol.to_s.classify)
28
+ class_name = symbol.to_s.split("_").map {|a| a.gsub(/\b('?[a-z])/) { $1.upcase }}.join
29
+ const_get(class_name)
26
30
  end
27
31
 
28
32
  class Base
29
-
30
33
  include Singleton
31
34
 
32
35
  APPROXIMATIONS = {
@@ -38,7 +41,6 @@ module Babosa
38
41
  "–" => "-",
39
42
  "—" => "-",
40
43
  "―" => "-",
41
- "―" => "-",
42
44
  "‘" => "'",
43
45
  "‛" => "'",
44
46
  "“" => '"',
@@ -46,6 +48,19 @@ module Babosa
46
48
  "„" => '"',
47
49
  "‟" => '"',
48
50
  '’' => "'",
51
+ ',' => ",",
52
+ '。' => ".",
53
+ '!' => "!",
54
+ '?' => '?',
55
+ '、' => ',',
56
+ '(' => '(',
57
+ ')' => ')',
58
+ '【' => '[',
59
+ '】' => ']',
60
+ ';' => ';',
61
+ ':' => ':',
62
+ '《' => '<',
63
+ '》' => '>',
49
64
  # various kinds of space characters
50
65
  "\xc2\xa0" => " ",
51
66
  "\xe2\x80\x80" => " ",
@@ -86,6 +101,7 @@ module Babosa
86
101
  @approximations[codepoint]
87
102
  end
88
103
 
104
+ # Transliterates a string.
89
105
  def transliterate(string)
90
106
  string.unpack("U*").map {|char| self[char] || char}.flatten.pack("U*")
91
107
  end
@@ -0,0 +1,137 @@
1
+ # encoding: utf-8
2
+ module Babosa
3
+ module Transliterator
4
+ class Hindi < Base
5
+ APPROXIMATIONS = {
6
+ "ऀ" => "n",
7
+ "ँ" => "n",
8
+ "ं" => "n",
9
+ "ः" => "h",
10
+ "ऄ" => "a",
11
+ "अ" => "a",
12
+ "आ" => "aa",
13
+ "इ" => "i",
14
+ "ई" => "ii",
15
+ "उ" => "u",
16
+ "ऊ" => "uu",
17
+ "ऋ" => "ri",
18
+ "ऌ" => "lri",
19
+ "ऍ" => "e",
20
+ "ऎ" => "e",
21
+ "ए" => "e",
22
+ "ऐ" => "ei",
23
+ "ऑ" => "o",
24
+ "ऒ" => "o",
25
+ "ओ" => "o",
26
+ "औ" => "ou",
27
+ "क" => "k",
28
+ "ख" => "kh",
29
+ "ग" => "g",
30
+ "घ" => "gh",
31
+ "ङ" => "d",
32
+ "च" => "ch",
33
+ "छ" => "chh",
34
+ "ज" => "j",
35
+ "झ" => "jh",
36
+ "ञ" => "ny",
37
+ "ट" => "tt",
38
+ "ठ" => "tth",
39
+ "ड" => "dd",
40
+ "ढ" => "ddh",
41
+ "ण" => "nn",
42
+ "त" => "t",
43
+ "थ" => "th",
44
+ "द" => "d",
45
+ "ध" => "dh",
46
+ "न" => "n",
47
+ "ऩ" => "nnn",
48
+ "प" => "p",
49
+ "फ" => "ph",
50
+ "ब" => "b",
51
+ "भ" => "bh",
52
+ "म" => "m",
53
+ "य" => "y",
54
+ "र" => "r",
55
+ "ऱ" => "rr",
56
+ "ल" => "l",
57
+ "ळ" => "ll",
58
+ "ऴ" => "ll",
59
+ "व" => "v",
60
+ "श" => "sh",
61
+ "ष" => "ss",
62
+ "स" => "s",
63
+ "ह" => "h",
64
+ "ऺ" => "oe",
65
+ "ऻ" => "ooe",
66
+ "़" => "",
67
+ "ऽ" => "-",
68
+ "ा" => "aa",
69
+ "ि" => "i",
70
+ "ी" => "ii",
71
+ "ु" => "u",
72
+ "ू" => "uu",
73
+ "ृ" => "r",
74
+ "ॄ" => "rr",
75
+ "ॅ" => "e",
76
+ "ॆ" => "e",
77
+ "े" => "e",
78
+ "ै" => "ai",
79
+ "ॉ" => "o",
80
+ "ॊ" => "o",
81
+ "ो" => "o",
82
+ "ौ" => "au",
83
+ "्" => "",
84
+ "ॎ" => "e",
85
+ "ॏ" => "aw",
86
+ "ॐ" => "om",
87
+ "॑" => "",
88
+ "॒" => "_",
89
+ "॓" => "",
90
+ "॔" => "",
91
+ "ॕ" => "ee",
92
+ "ॖ" => "ue",
93
+ "ॗ" => "uue",
94
+ "क़" => "q",
95
+ "ख़" => "khh",
96
+ "ग़" => "ghh",
97
+ "ज़" => "za",
98
+ "ड़" => "dddh",
99
+ "ढ़" => "rh",
100
+ "फ़" => "f",
101
+ "य़" => "yy",
102
+ "ॠ" => "rri",
103
+ "ॡ" => "lr",
104
+ "ॢ" => "l",
105
+ "ॣ" => "l",
106
+ "।" => ".",
107
+ "॥" => "..",
108
+ "०" => "0",
109
+ "१" => "1",
110
+ "२" => "2",
111
+ "३" => "3",
112
+ "४" => "4",
113
+ "५" => "5",
114
+ "६" => "6",
115
+ "७" => "7",
116
+ "८" => "8",
117
+ "९" => "9",
118
+ "॰" => ".",
119
+ "ॱ" => ".",
120
+ "ॲ" => "a",
121
+ "ॳ" => "oe",
122
+ "ॴ" => "ooe",
123
+ "ॵ" => "aw",
124
+ "ॶ" => "ue",
125
+ "ॷ" => "uue",
126
+ "ॸ" => "dd",
127
+ "ॹ" => "zh",
128
+ "ॺ" => "y",
129
+ "ॻ" => "gg",
130
+ "ॼ" => "jj",
131
+ "ॽ" => "?",
132
+ "ॾ" => "ddd",
133
+ "ॿ" => "bb"
134
+ }
135
+ end
136
+ end
137
+ end