babosa 1.0.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/Changelog.md +20 -0
  5. data/README.md +83 -121
  6. data/Rakefile +9 -8
  7. data/lib/babosa.rb +2 -4
  8. data/lib/babosa/identifier.rb +104 -129
  9. data/lib/babosa/transliterator/base.rb +57 -54
  10. data/lib/babosa/transliterator/bulgarian.rb +3 -2
  11. data/lib/babosa/transliterator/cyrillic.rb +5 -5
  12. data/lib/babosa/transliterator/danish.rb +3 -3
  13. data/lib/babosa/transliterator/german.rb +3 -2
  14. data/lib/babosa/transliterator/greek.rb +4 -3
  15. data/lib/babosa/transliterator/hindi.rb +138 -0
  16. data/lib/babosa/transliterator/latin.rb +5 -5
  17. data/lib/babosa/transliterator/macedonian.rb +3 -2
  18. data/lib/babosa/transliterator/norwegian.rb +3 -3
  19. data/lib/babosa/transliterator/romanian.rb +3 -2
  20. data/lib/babosa/transliterator/russian.rb +3 -2
  21. data/lib/babosa/transliterator/serbian.rb +29 -27
  22. data/lib/babosa/transliterator/spanish.rb +2 -2
  23. data/lib/babosa/transliterator/swedish.rb +3 -3
  24. data/lib/babosa/transliterator/turkish.rb +8 -0
  25. data/lib/babosa/transliterator/ukrainian.rb +5 -4
  26. data/lib/babosa/transliterator/vietnamese.rb +4 -3
  27. data/lib/babosa/version.rb +3 -1
  28. data/spec/{babosa_spec.rb → identifier_spec.rb} +18 -15
  29. data/spec/spec_helper.rb +15 -6
  30. data/spec/transliterators/base_spec.rb +5 -6
  31. data/spec/transliterators/bulgarian_spec.rb +4 -5
  32. data/spec/transliterators/danish_spec.rb +5 -6
  33. data/spec/transliterators/german_spec.rb +4 -5
  34. data/spec/transliterators/greek_spec.rb +7 -7
  35. data/spec/transliterators/hindi_spec.rb +17 -0
  36. data/spec/transliterators/latin_spec.rb +3 -4
  37. data/spec/transliterators/macedonian_spec.rb +3 -4
  38. data/spec/transliterators/norwegian_spec.rb +4 -4
  39. data/spec/transliterators/polish_spec.rb +3 -5
  40. data/spec/transliterators/romanian_spec.rb +5 -6
  41. data/spec/transliterators/russian_spec.rb +3 -4
  42. data/spec/transliterators/serbian_spec.rb +6 -7
  43. data/spec/transliterators/spanish_spec.rb +4 -5
  44. data/spec/transliterators/swedish_spec.rb +7 -7
  45. data/spec/transliterators/turkish_spec.rb +24 -0
  46. data/spec/transliterators/ukrainian_spec.rb +74 -75
  47. data/spec/transliterators/vietnamese_spec.rb +10 -10
  48. metadata +50 -41
  49. metadata.gz.sig +2 -0
  50. data/lib/babosa/utf8/active_support_proxy.rb +0 -26
  51. data/lib/babosa/utf8/dumb_proxy.rb +0 -49
  52. data/lib/babosa/utf8/java_proxy.rb +0 -22
  53. data/lib/babosa/utf8/mappings.rb +0 -193
  54. data/lib/babosa/utf8/proxy.rb +0 -125
  55. data/lib/babosa/utf8/unicode_proxy.rb +0 -23
  56. data/spec/utf8_proxy_spec.rb +0 -52
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 63fa1835d7bbc606c85b2c8315e8619c37bc7703
4
- data.tar.gz: 4b0f3a169ed08de3a1614026e58fbc0dc00e58bc
2
+ SHA256:
3
+ metadata.gz: 84fa74468828d44925314cc1bcf6d297aa3a94ebfd993bca491686c92c936667
4
+ data.tar.gz: 618a52e9d52878fe8bc5933879115d32dd31b212a9cd2178e78bb7320d186c43
5
5
  SHA512:
6
- metadata.gz: 8581a919789c35ab9fe3c077190dcb93a20da9b0ed5ebf05cb7342fb2c4c9eeb8181e3d93850fd7564f31e95cd0195274f24e2fde938c7e6c9027aa02a4ccd15
7
- data.tar.gz: 6a86041c30dcd1cdbf5a39b1d2f319425656e57135856aa90a32fffd181fe85748676f74fbb25254294f8d8c6c63f1574b087a32f1d2d112ce84ae40f932c8b4
6
+ metadata.gz: d8d86867859c5af0f8f80631b5d7aefe14cf87d61ec5908fa08d68fa38cc2e287c77c2c59c41377eb41e6524ba723ed511d1d09e5837259aad4a1ced7162c25a
7
+ data.tar.gz: 005dbd796a469420ddc905e2fcf4261703f3777ab29000102039e52b907d51d51c3ab69f11779bfc3e76ed19c24fad997733d008c7d08f50dc1446fa230b7e3d
checksums.yaml.gz.sig ADDED
Binary file
data.tar.gz.sig ADDED
Binary file
data/Changelog.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # Babosa Changelog
2
2
 
3
+ ## 2.0.0
4
+
5
+ This release contains no important changes. I had a week off from work and
6
+ decided to refactor the code. However there are some small breaking changes so
7
+ I have released it as 2.0.0.
8
+
9
+ * Refactor internals for simplicity
10
+ * Use built-in Ruby UTF-8 support in places of other gems.
11
+ * Drop support for Ruby < 2.5.0.
12
+ * `Babosa::Identifier#word_chars` no longer removes dashes
13
+ * `Babosa::Identifier#to_ruby_method` default argument `allow_bangs` is now a keyword argument
14
+
15
+ ## 1.0.4
16
+
17
+ * Fix nil being cast to frozen string (https://github.com/norman/babosa/pull/52)
18
+
19
+ ## 1.0.3
20
+
21
+ * Fix Active Support 6 deprecations (https://github.com/norman/babosa/pull/50)
22
+
3
23
  ## 1.0.2
4
24
 
5
25
  * Fix regression in ActiveSupport UTF8 proxy.
data/README.md CHANGED
@@ -1,7 +1,6 @@
1
1
  # Babosa
2
2
 
3
- [![Build Status](https://travis-ci.org/norman/babosa.png?branch=master)](https://travis-ci.org/norman/babosa)
4
-
3
+ [![Build Status](https://github.com/norman/babosa/actions/workflows/main.yml/badge.svg)](https://github.com/norman/babosa/actions)
5
4
 
6
5
  Babosa is a library for creating human-friendly identifiers, aka "slugs". It can
7
6
  also be useful for normalizing and sanitizing data.
@@ -15,12 +14,16 @@ FriendlyId.
15
14
 
16
15
  ### Transliterate UTF-8 characters to ASCII
17
16
 
18
- "Gölcük, Turkey".to_slug.transliterate.to_s #=> "Golcuk, Turkey"
17
+ ```ruby
18
+ "Gölcük, Turkey".to_slug.transliterate.to_s #=> "Golcuk, Turkey"
19
+ ```
19
20
 
20
21
  ### Locale sensitive transliteration, with support for many languages
21
22
 
22
- "Jürgen Müller".to_slug.transliterate.to_s #=> "Jurgen Muller"
23
- "Jürgen Müller".to_slug.transliterate(:german).to_s #=> "Juergen Mueller"
23
+ ```ruby
24
+ "Jürgen Müller".to_slug.transliterate.to_s #=> "Jurgen Muller"
25
+ "Jürgen Müller".to_slug.transliterate(:german).to_s #=> "Juergen Mueller"
26
+ ```
24
27
 
25
28
  Currently supported languages include:
26
29
 
@@ -28,6 +31,7 @@ Currently supported languages include:
28
31
  * Danish
29
32
  * German
30
33
  * Greek
34
+ * Hindi
31
35
  * Macedonian
32
36
  * Norwegian
33
37
  * Romanian
@@ -35,124 +39,125 @@ Currently supported languages include:
35
39
  * Serbian
36
40
  * Spanish
37
41
  * Swedish
42
+ * Turkish
38
43
  * Ukrainian
44
+ * Vietnamese
45
+
46
+ Additionally there are generic transliterators for transliterating from the
47
+ Cyrillic alphabet and Latin alphabet with diacritics. The Latin transliterator
48
+ can be used, for example, with Czech. There is also a transliterator named
49
+ "Hindi" which may be sufficient for other Indic languages using Devanagari, but
50
+ I do not know enough to say whether the transliterations would make sense.
39
51
 
40
52
  I'll gladly accept contributions from fluent speakers to support more languages.
41
53
 
42
54
  ### Strip non-ASCII characters
43
55
 
44
- "Gölcük, Turkey".to_slug.to_ascii.to_s #=> "Glck, Turkey"
56
+ ```ruby
57
+ "Gölcük, Turkey".to_slug.to_ascii.to_s #=> "Glck, Turkey"
58
+ ```
45
59
 
46
60
  ### Truncate by characters
47
61
 
48
- "üüü".to_slug.truncate(2).to_s #=> "üü"
62
+ ```ruby
63
+ "üüü".to_slug.truncate(2).to_s #=> "üü"
64
+ ```
49
65
 
50
66
  ### Truncate by bytes
51
67
 
52
68
  This can be useful to ensure the generated slug will fit in a database column
53
69
  whose length is limited by bytes rather than UTF-8 characters.
54
70
 
55
- "üüü".to_slug.truncate_bytes(2).to_s #=> "ü"
71
+ ```ruby
72
+ "üüü".to_slug.truncate_bytes(2).to_s #=> "ü"
73
+ ```
56
74
 
57
75
  ### Remove punctuation chars
58
76
 
59
- "this is, um, **really** cool, huh?".to_slug.word_chars.to_s #=> "this is um really cool huh"
77
+ ```ruby
78
+ "this is, um, **really** cool, huh?".to_slug.word_chars.to_s #=> "this is um really cool huh"
79
+ ```
60
80
 
61
81
  ### All-in-one
62
82
 
63
- "Gölcük, Turkey".to_slug.normalize.to_s #=> "golcuk-turkey"
83
+ ```ruby
84
+ "Gölcük, Turkey".to_slug.normalize.to_s #=> "golcuk-turkey"
85
+ ```
64
86
 
65
87
  ### Other stuff
66
88
 
67
- #### Using Babosa With FriendlyId 4
68
-
69
- require "babosa"
70
-
71
- class Person < ActiveRecord::Base
72
- friendly_id :name, use: :slugged
73
-
74
- def normalize_friendly_id(input)
75
- input.to_s.to_slug.normalize(transliterations: :russian).to_s
76
- end
77
- end
78
-
79
- #### Pedantic UTF-8 support
89
+ #### Using Babosa With FriendlyId 4+
80
90
 
81
- Babosa goes out of its way to handle [nasty Unicode issues you might never think
82
- you would have](https://github.com/norman/enc/blob/master/equivalence.rb) by
83
- checking, sanitizing and normalizing your string input.
91
+ ```ruby
92
+ require "babosa"
84
93
 
85
- It will automatically use whatever Unicode library you have loaded before
86
- Babosa, or fall back to a simple built-in library. Supported
87
- Unicode libraries include:
94
+ class Person < ActiveRecord::Base
95
+ friendly_id :name, use: :slugged
88
96
 
89
- * Java (only on JRuby of course)
90
- * Active Support
91
- * [Unicode](https://github.com/blackwinter/unicode)
92
- * Built-in
97
+ def normalize_friendly_id(input)
98
+ input.to_s.to_slug.normalize(transliterations: :russian).to_s
99
+ end
100
+ end
101
+ ```
93
102
 
94
- This built-in module is much faster than Active Support but much slower than
95
- Java or Unicode. It can only do **very** naive Unicode composition to ensure
96
- that, for example, "é" will always be composed to a single codepoint rather than
97
- an "e" and a "´" - making it safe to use as a hash key.
103
+ #### UTF-8 support
98
104
 
99
- But seriously - save yourself the headache and install a real Unicode library.
100
- If you are using Babosa with a language that uses the Cyrillic alphabet, Babosa
101
- requires either Unicode, Active Support or Java.
105
+ Babosa normalizes all input strings [to NFC](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms).
102
106
 
103
107
  #### Ruby Method Names
104
108
 
105
- Babosa can also generate strings for Ruby method names. (Yes, Ruby 1.9 can use
109
+ Babosa can generate strings for Ruby method names. (Yes, Ruby 1.9+ can use
106
110
  UTF-8 chars in method names, but you may not want to):
107
111
 
108
112
 
109
- "this is a method".to_slug.to_ruby_method! #=> this_is_a_method
110
- "über cool stuff!".to_slug.to_ruby_method! #=> uber_cool_stuff!
113
+ ```ruby
114
+ "this is a method".to_slug.to_ruby_method! #=> this_is_a_method
115
+ "über cool stuff!".to_slug.to_ruby_method! #=> uber_cool_stuff!
111
116
 
112
- # You can also disallow trailing punctuation chars
113
- "über cool stuff!".to_slug.to_ruby_method(false) #=> uber_cool_stuff
117
+ # You can also disallow trailing punctuation chars
118
+ "über cool stuff!".to_slug.to_ruby_method(allow_bangs: false) #=> uber_cool_stuff
119
+ ```
114
120
 
115
121
  #### Easy to Extend
116
122
 
117
123
  You can add custom transliterators for your language with very little code. For
118
124
  example here's the transliterator for German:
119
125
 
120
- # encoding: utf-8
121
- module Babosa
122
- module Transliterator
123
- class German < Latin
124
- APPROXIMATIONS = {
125
- "ä" => "ae",
126
- "ö" => "oe",
127
- "ü" => "ue",
128
- "Ä" => "Ae",
129
- "Ö" => "Oe",
130
- "Ü" => "Ue"
131
- }
132
- end
133
- end
126
+ ```ruby
127
+ module Babosa
128
+ module Transliterator
129
+ class German < Latin
130
+ APPROXIMATIONS = {
131
+ "ä" => "ae",
132
+ "ö" => "oe",
133
+ "ü" => "ue",
134
+ "Ä" => "Ae",
135
+ "Ö" => "Oe",
136
+ "Ü" => "Ue"
137
+ }
134
138
  end
139
+ end
140
+ end
141
+ ```
135
142
 
136
143
  And a spec (you can use this as a template):
137
144
 
138
- # encoding: utf-8
139
- require File.expand_path("../../spec_helper", __FILE__)
145
+ ```ruby
146
+ require "spec_helper"
140
147
 
141
- describe Babosa::Transliterator::German do
148
+ describe Babosa::Transliterator::German do
149
+ let(:t) { described_class.instance }
150
+ it_behaves_like "a latin transliterator"
142
151
 
143
- let(:t) { described_class.instance }
144
- it_behaves_like "a latin transliterator"
145
-
146
- it "should transliterate Eszett" do
147
- t.transliterate("ß").should eql("ss")
148
- end
149
-
150
- it "should transliterate vowels with umlauts" do
151
- t.transliterate("üöä").should eql("ueoeae")
152
- end
153
-
154
- end
152
+ it "should transliterate Eszett" do
153
+ t.transliterate("ß").should eql("ss")
154
+ end
155
155
 
156
+ it "should transliterate vowels with umlauts" do
157
+ t.transliterate("üöä").should eql("ueoeae")
158
+ end
159
+ end
160
+ ```
156
161
 
157
162
  ### Rails 3.x and higher
158
163
 
@@ -167,46 +172,6 @@ and
167
172
  [parameterize](http://api.rubyonrails.org/classes/ActiveSupport/Inflector.html#method-i-parameterize)
168
173
  to see if they suit your needs.
169
174
 
170
- ### Babosa vs. Stringex
171
-
172
- Babosa provides much of the functionality provided by the
173
- [Stringex](https://github.com/rsl/stringex) gem, but in the subjective opinion
174
- of the author, is for most use cases a better choice.
175
-
176
- #### Fewer Features
177
-
178
- Stringex offers functionality for storing slugs in an Active Record model, like
179
- a simple version of [FriendlyId](http://github.com/norman/friendly_id), in
180
- addition to string processing. Babosa only does string processing.
181
-
182
- #### Less Aggressive Unicode Transliteration
183
-
184
- Stringex uses an agressive Unicode to ASCII mapping which outputs gibberish for
185
- almost anything but Western European langages and Mandarin Chinese. Babosa
186
- supports only languages for which fluent speakers have provided
187
- transliterations, to ensure that the output makes sense to users.
188
-
189
- #### Unicode Support
190
-
191
- Stringex does no Unicode normalization or validation before transliterating
192
- strings, so if you pass in strings with encoding errors or with different
193
- Unicode normalizations, you'll get unpredictable results.
194
-
195
- #### No Locale Assumptions
196
-
197
- Babosa avoids making assumptions about locales like Stringex does, so it doesn't
198
- offer transliterations like this out of the box:
199
-
200
- "$12 worth of Ruby power".to_url => "12-dollars-worth-of-ruby-power"
201
-
202
- This is because the symbol "$" is used in many Latin American countries for the
203
- peso. Stringex does this in many places, for example, transliterating all Han
204
- characters into Pinyin, effectively treating Japanese text as if it were
205
- Mandarin Chinese.
206
-
207
-
208
- ### More info
209
-
210
175
  Please see the [API docs](http://rubydoc.info/github/norman/babosa/master/frames) and source code for
211
176
  more info.
212
177
 
@@ -218,9 +183,6 @@ Babosa can be installed via Rubygems:
218
183
 
219
184
  You can get the source code from its [Github repository](http://github.com/norman/babosa).
220
185
 
221
- Babosa is tested to be compatible with Ruby 1.8.7-2.0.0, JRuby 1.4+, and
222
- Rubinius 1.0+ It's probably compatible with other Rubies as well.
223
-
224
186
  ## Reporting bugs
225
187
 
226
188
  Please use Babosa's [Github issue
@@ -229,7 +191,7 @@ tracker](http://github.com/norman/babosa/issues).
229
191
 
230
192
  ## Misc
231
193
 
232
- "Babosa" means slug in Spanish.
194
+ "Babosa" means "slug" in Spanish.
233
195
 
234
196
  ## Author
235
197
 
@@ -239,6 +201,7 @@ tracker](http://github.com/norman/babosa/issues).
239
201
 
240
202
  Many thanks to the following people for their help:
241
203
 
204
+ * [Dmitry A. Ilyashevich](https://github.com/dmitry-ilyashevich) - Deprecation fixes
242
205
  * [anhkind](https://github.com/anhkind) - Vietnamese support
243
206
  * [Martins Zakis](https://github.com/martins) - Bug fixes
244
207
  * [Vassilis Rodokanakis](https://github.com/vrodokanakis) - Greek support
@@ -255,10 +218,9 @@ Many thanks to the following people for their help:
255
218
  * [Molte Emil Strange Andersen](https://github.com/molte) - Danish support
256
219
  * [Milan Dobrota](https://github.com/milandobrota) - Serbian support
257
220
 
258
-
259
221
  ## Copyright
260
222
 
261
- Copyright (c) 2010-2013 Norman Clarke
223
+ Copyright (c) 2010-2020 Norman Clarke
262
224
 
263
225
  Permission is hereby granted, free of charge, to any person obtaining a copy of
264
226
  this software and associated documentation files (the "Software"), to deal in
@@ -276,4 +238,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
276
238
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
277
239
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
278
240
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
279
- SOFTWARE.
241
+ SOFTWARE.
data/Rakefile CHANGED
@@ -1,10 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "rubygems"
2
4
  require "rake/testtask"
3
5
  require "rake/clean"
4
6
  require "rubygems/package_task"
7
+ require "rubocop/rake_task"
5
8
 
6
- task :default => :spec
7
- task :test => :spec
9
+ task default: [:rubocop, :spec]
10
+ task test: :spec
8
11
 
9
12
  CLEAN << "pkg" << "doc" << "coverage" << ".yardoc"
10
13
 
@@ -14,6 +17,7 @@ begin
14
17
  t.options = ["--output-dir=doc"]
15
18
  end
16
19
  rescue LoadError
20
+ puts "Yard not present"
17
21
  end
18
22
 
19
23
  begin
@@ -23,12 +27,9 @@ begin
23
27
  Rake::Task["spec"].execute
24
28
  end
25
29
  rescue LoadError
30
+ puts "SimpleCov not present"
26
31
  end
27
32
 
28
- gemspec = File.expand_path("../babosa.gemspec", __FILE__)
29
- if File.exist? gemspec
30
- Gem::PackageTask.new(eval(File.read(gemspec))) { |pkg| }
31
- end
32
-
33
- require 'rspec/core/rake_task'
33
+ require "rspec/core/rake_task"
34
34
  RSpec::Core::RakeTask.new(:spec)
35
+ RuboCop::RakeTask.new
data/lib/babosa.rb CHANGED
@@ -1,7 +1,6 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Babosa
2
- def self.jruby15?
3
- JRUBY_VERSION >= "1.5" rescue false
4
- end
5
4
  end
6
5
 
7
6
  class String
@@ -12,5 +11,4 @@ class String
12
11
  end
13
12
 
14
13
  require "babosa/transliterator/base"
15
- require "babosa/utf8/proxy"
16
14
  require "babosa/identifier"
@@ -1,16 +1,6 @@
1
- # encoding: utf-8
2
- module Babosa
3
-
4
- # Codepoints for characters that will be deleted by +#word_chars!+.
5
- STRIPPABLE = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 19,
6
- 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39,
7
- 40, 41, 42, 43, 44, 45, 46, 47, 58, 59, 60, 61, 62, 63, 64, 91, 92, 93, 94,
8
- 96, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136,
9
- 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151,
10
- 152, 153, 154, 155, 156, 157, 158, 159, 161, 162, 163, 164, 165, 166, 167,
11
- 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 182, 183, 184,
12
- 185, 187, 188, 189, 190, 191, 215, 247, 8203, 8204, 8205, 8239, 65279]
1
+ # frozen_string_literal: true
13
2
 
3
+ module Babosa
14
4
  # This class provides some string-manipulation methods specific to slugs.
15
5
  #
16
6
  # Note that this class includes many "bang methods" such as {#clean!} and
@@ -20,7 +10,7 @@ module Babosa
20
10
  # it is generated dynamically.
21
11
  #
22
12
  # All of the bang methods return an instance of String, while the bangless
23
- # versions return an instance of Babosa::Identifier, so that calls to methods
13
+ # versions return an instance of {Babosa::Identifier}, so that calls to methods
24
14
  # specific to this class can be chained:
25
15
  #
26
16
  # string = Identifier.new("hello world")
@@ -29,71 +19,47 @@ module Babosa
29
19
  #
30
20
  # @see http://www.utf8-chartable.de/unicode-utf8-table.pl?utf8=dec Unicode character table
31
21
  class Identifier
32
-
33
22
  Error = Class.new(StandardError)
34
23
 
35
24
  attr_reader :wrapped_string
36
25
  alias to_s wrapped_string
37
26
 
38
- @@utf8_proxy = if Babosa.jruby15?
39
- UTF8::JavaProxy
40
- elsif defined? Unicode
41
- UTF8::UnicodeProxy
42
- elsif defined? ActiveSupport
43
- UTF8::ActiveSupportProxy
44
- else
45
- UTF8::DumbProxy
46
- end
47
-
48
- # Return the proxy used for UTF-8 support.
49
- # @see Babosa::UTF8::Proxy
50
- def self.utf8_proxy
51
- @@utf8_proxy
52
- end
53
-
54
- # Set a proxy object used for UTF-8 support.
55
- # @see Babosa::UTF8::Proxy
56
- def self.utf8_proxy=(obj)
57
- @@utf8_proxy = obj
58
- end
59
-
60
27
  def method_missing(symbol, *args, &block)
61
28
  @wrapped_string.__send__(symbol, *args, &block)
62
29
  end
63
30
 
31
+ def respond_to_missing?(name, include_all)
32
+ @wrapped_string.respond_to?(name, include_all)
33
+ end
34
+
64
35
  # @param string [#to_s] The string to use as the basis of the Identifier.
65
36
  def initialize(string)
66
- @wrapped_string = string.to_s
37
+ @wrapped_string = string.to_s.dup
67
38
  tidy_bytes!
68
39
  normalize_utf8!
69
40
  end
70
41
 
71
- def ==(value)
72
- @wrapped_string.to_s == value.to_s
42
+ def ==(other)
43
+ to_s == other.to_s
73
44
  end
74
45
 
75
- def eql?(value)
76
- @wrapped_string == value
46
+ def eql?(other)
47
+ self == other
77
48
  end
78
49
 
79
- def empty?
80
- # included to make this class :respond_to? :empty for compatibility with Active Support's
81
- # #blank?
82
- @wrapped_string.empty?
83
- end
84
-
85
- # Approximate an ASCII string. This works only for Western strings using
86
- # characters that are Roman-alphabet characters + diacritics. Non-letter
87
- # characters are left unmodified.
50
+ # Approximate an ASCII string. This works only for strings using characters
51
+ # that are Roman-alphabet characters + diacritics. Non-letter characters
52
+ # are left unmodified.
88
53
  #
89
- # string = Identifier.new "Łódź
54
+ # string = Identifier.new "Łódź, Poland"
90
55
  # string.transliterate # => "Lodz, Poland"
91
56
  # string = Identifier.new "日本"
92
57
  # string.transliterate # => "日本"
93
58
  #
94
- # You can pass any key(s) from +Characters.approximations+ as arguments. This allows
95
- # for contextual approximations. Various languages are supported, you can see which ones
96
- # by looking at the source of {Babosa::Transliterator::Base}.
59
+ # You can pass the names of any transliterator class as arguments. This
60
+ # allows for contextual approximations. Various languages are supported,
61
+ # you can see which ones by looking at the source of
62
+ # {Babosa::Transliterator::Base}.
97
63
  #
98
64
  # string = Identifier.new "Jürgen Müller"
99
65
  # string.transliterate # => "Jurgen Muller"
@@ -111,7 +77,7 @@ module Babosa
111
77
  # to remove non-ASCII characters such as "¡" and "¿", use {#to_ascii!}:
112
78
  #
113
79
  # string.transliterate!(:spanish) # => "¡Feliz anio!"
114
- # string.transliterate! # => "¡Feliz anio!"
80
+ # string.to_ascii! # => "Feliz anio!"
115
81
  #
116
82
  # @param *args <Symbol>
117
83
  # @return String
@@ -122,40 +88,50 @@ module Babosa
122
88
  transliterator = Transliterator.get(kind).instance
123
89
  @wrapped_string = transliterator.transliterate(@wrapped_string)
124
90
  end
125
- @wrapped_string
91
+ to_s
126
92
  end
127
93
 
128
94
  # Converts dashes to spaces, removes leading and trailing spaces, and
129
95
  # replaces multiple whitespace characters with a single space.
96
+ #
130
97
  # @return String
131
98
  def clean!
132
- @wrapped_string = @wrapped_string.gsub("-", " ").squeeze(" ").strip
99
+ gsub!(/[- ]+/, " ")
100
+ strip!
101
+ to_s
133
102
  end
134
103
 
135
104
  # Remove any non-word characters. For this library's purposes, this means
136
- # anything other than letters, numbers, spaces, newlines and linefeeds.
105
+ # anything other than letters, numbers, spaces, underscores, dashes,
106
+ # newlines, and linefeeds.
107
+ #
137
108
  # @return String
138
109
  def word_chars!
139
- @wrapped_string = (unpack("U*") - Babosa::STRIPPABLE).pack("U*")
110
+ # `^\p{letter}` = Any non-Unicode letter
111
+ # `&&` = add the following character class
112
+ # `[^ _\n\r]` = Anything other than space, underscore, newline or linefeed
113
+ gsub!(/[[^\p{letter}]&&[^ \d_\-\n\r]]/, "")
114
+ to_s
140
115
  end
141
116
 
142
117
  # Normalize the string for use as a URL slug. Note that in this context,
143
118
  # +normalize+ means, strip, remove non-letters/numbers, downcasing,
144
119
  # truncating to 255 bytes and converting whitespace to dashes.
145
- # @param Options
120
+ #
121
+ # @param options [Hash]
146
122
  # @return String
147
- def normalize!(options = nil)
148
- options = default_normalize_options.merge(options || {})
123
+ def normalize!(options = {})
124
+ options = default_normalize_options.merge(options)
149
125
 
150
- if translit_option = options[:transliterate]
151
- if translit_option != true
152
- transliterate!(*translit_option)
153
- else
126
+ if options[:transliterate]
127
+ option = options[:transliterate]
128
+ if option == true
154
129
  transliterate!(*options[:transliterations])
130
+ else
131
+ transliterate!(*option)
155
132
  end
156
133
  end
157
134
  to_ascii! if options[:to_ascii]
158
- clean!
159
135
  word_chars!
160
136
  clean!
161
137
  downcase!
@@ -164,105 +140,103 @@ module Babosa
164
140
  end
165
141
 
166
142
  # Normalize a string so that it can safely be used as a Ruby method name.
167
- def to_ruby_method!(allow_bangs = true)
168
- leader, trailer = @wrapped_string.strip.scan(/\A(.+)(.)\z/).flatten
169
- leader = leader.to_s
170
- trailer = trailer.to_s
171
- if allow_bangs
172
- trailer.downcase!
173
- trailer.gsub!(/[^a-z0-9!=\\?]/, '')
174
- else
175
- trailer.downcase!
176
- trailer.gsub!(/[^a-z0-9]/, '')
177
- end
178
- id = leader.to_identifier
179
- id.transliterate!
180
- id.to_ascii!
181
- id.clean!
182
- id.word_chars!
183
- id.clean!
184
- @wrapped_string = id.to_s + trailer
185
- if @wrapped_string == ""
186
- raise Error, "Input generates impossible Ruby method name"
187
- end
143
+ #
144
+ # @param allow_bangs [Boolean]
145
+ # @return String
146
+ def to_ruby_method!(allow_bangs: true)
147
+ last_char = self[-1]
148
+ transliterate!
149
+ to_ascii!
150
+ word_chars!
151
+ strip_leading_digits!
152
+ clean!
153
+ @wrapped_string += last_char if allow_bangs && ["!", "?"].include?(last_char)
154
+ raise Error, "Input generates impossible Ruby method name" if self == ""
155
+
188
156
  with_separators!("_")
189
157
  end
190
158
 
191
159
  # Delete any non-ascii characters.
160
+ #
192
161
  # @return String
193
162
  def to_ascii!
194
- @wrapped_string = @wrapped_string.gsub(/[^\x00-\x7f]/u, '')
163
+ gsub!(/[^\x00-\x7f]/u, "")
164
+ to_s
195
165
  end
196
166
 
197
167
  # Truncate the string to +max+ characters.
168
+ #
198
169
  # @example
199
170
  # "üéøá".to_identifier.truncate(3) #=> "üéø"
171
+ #
172
+ # @param max [Integer] The maximum number of characters.
200
173
  # @return String
201
174
  def truncate!(max)
202
- @wrapped_string = unpack("U*")[0...max].pack("U*")
175
+ @wrapped_string = slice(0, max)
203
176
  end
204
177
 
205
178
  # Truncate the string to +max+ bytes. This can be useful for ensuring that
206
179
  # a UTF-8 string will always fit into a database column with a certain max
207
180
  # byte length. The resulting string may be less than +max+ if the string must
208
181
  # be truncated at a multibyte character boundary.
182
+ #
209
183
  # @example
210
184
  # "üéøá".to_identifier.truncate_bytes(3) #=> "ü"
185
+ #
186
+ # @param max [Integer] The maximum number of bytes.
211
187
  # @return String
212
188
  def truncate_bytes!(max)
213
- return @wrapped_string if @wrapped_string.bytesize <= max
214
- curr = 0
215
- new = []
216
- unpack("U*").each do |char|
217
- break if curr > max
218
- char = [char].pack("U")
219
- curr += char.bytesize
220
- if curr <= max
221
- new << char
222
- end
223
- end
224
- @wrapped_string = new.join
189
+ truncate!(max)
190
+ chop! until bytesize <= max
225
191
  end
226
192
 
227
193
  # Replaces whitespace with dashes ("-").
194
+ #
195
+ # @param char [String] the separator character to use.
228
196
  # @return String
229
197
  def with_separators!(char = "-")
230
- @wrapped_string = @wrapped_string.gsub(/\s/u, char)
231
- end
232
-
233
- # Perform UTF-8 sensitive upcasing.
234
- # @return String
235
- def upcase!
236
- @wrapped_string = @@utf8_proxy.upcase(@wrapped_string)
198
+ gsub!(/\s/u, char)
199
+ to_s
237
200
  end
238
201
 
239
- # Perform UTF-8 sensitive downcasing.
202
+ # Perform Unicode composition on the wrapped string.
203
+ #
240
204
  # @return String
241
- def downcase!
242
- @wrapped_string = @@utf8_proxy.downcase(@wrapped_string)
205
+ def normalize_utf8!
206
+ unicode_normalize!(:nfc)
207
+ to_s
243
208
  end
244
209
 
245
- # Perform Unicode composition on the wrapped string.
210
+ # Strip any leading digits.
211
+ #
246
212
  # @return String
247
- def normalize_utf8!
248
- @wrapped_string = @@utf8_proxy.normalize_utf8(@wrapped_string)
213
+ def strip_leading_digits!
214
+ gsub!(/^\d+/, "")
215
+ to_s
249
216
  end
250
217
 
251
218
  # Attempt to convert characters encoded using CP1252 and IS0-8859-1 to
252
219
  # UTF-8.
253
220
  # @return String
254
221
  def tidy_bytes!
255
- @wrapped_string = @@utf8_proxy.tidy_bytes(@wrapped_string)
222
+ scrub! do |bad|
223
+ bad.encode(Encoding::UTF_8, Encoding::Windows_1252, invalid: :replace, undef: :replace)
224
+ end
225
+ to_s
256
226
  end
257
227
 
258
- %w[transliterate clean downcase word_chars normalize normalize_utf8
259
- tidy_bytes to_ascii to_ruby_method truncate truncate_bytes upcase
260
- with_separators].each do |method|
261
- class_eval(<<-EOM, __FILE__, __LINE__ + 1)
228
+ %w[clean downcase normalize normalize_utf8 strip_leading_digits
229
+ tidy_bytes to_ascii transliterate truncate truncate_bytes upcase
230
+ with_separators word_chars].each do |method|
231
+ class_eval(<<-METHOD, __FILE__, __LINE__ + 1)
262
232
  def #{method}(*args)
263
- send_to_new_instance(:#{method}!, *args)
233
+ with_new_instance { |id| id.send(:#{method}!, *args) }
264
234
  end
265
- EOM
235
+ METHOD
236
+ end
237
+
238
+ def to_ruby_method(allow_bangs: true)
239
+ with_new_instance { |id| id.to_ruby_method!(allow_bangs: allow_bangs) }
266
240
  end
267
241
 
268
242
  def to_identifier
@@ -271,7 +245,7 @@ module Babosa
271
245
 
272
246
  # The default options for {#normalize!}. Override to set your own defaults.
273
247
  def default_normalize_options
274
- {:transliterate => true, :max_length => 255, :separator => "-"}
248
+ {transliterate: :latin, max_length: 255, separator: "-"}
275
249
  end
276
250
 
277
251
  alias approximate_ascii transliterate
@@ -282,12 +256,13 @@ module Babosa
282
256
 
283
257
  private
284
258
 
285
- # Used as the basis of the bangless methods.
286
- def send_to_new_instance(*args)
287
- id = Identifier.allocate
288
- id.instance_variable_set :@wrapped_string, to_s
289
- id.send(*args)
290
- id
259
+ # Used as the basis of the non-mutating (bangless) methods.
260
+ def with_new_instance
261
+ Identifier.allocate.tap do |id|
262
+ id.instance_variable_set :@wrapped_string, to_s
263
+
264
+ yield id
265
+ end
291
266
  end
292
267
  end
293
268
  end