kebab 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gemtest +0 -0
  3. data/Changelog.md +99 -0
  4. data/MIT-LICENSE +19 -0
  5. data/README.md +26 -0
  6. data/Rakefile +34 -0
  7. data/lib/kebab.rb +18 -0
  8. data/lib/kebab/identifier.rb +294 -0
  9. data/lib/kebab/transliterator/base.rb +110 -0
  10. data/lib/kebab/transliterator/bulgarian.rb +27 -0
  11. data/lib/kebab/transliterator/cyrillic.rb +108 -0
  12. data/lib/kebab/transliterator/danish.rb +15 -0
  13. data/lib/kebab/transliterator/german.rb +15 -0
  14. data/lib/kebab/transliterator/greek.rb +77 -0
  15. data/lib/kebab/transliterator/hindi.rb +137 -0
  16. data/lib/kebab/transliterator/latin.rb +199 -0
  17. data/lib/kebab/transliterator/macedonian.rb +29 -0
  18. data/lib/kebab/transliterator/norwegian.rb +14 -0
  19. data/lib/kebab/transliterator/romanian.rb +13 -0
  20. data/lib/kebab/transliterator/russian.rb +22 -0
  21. data/lib/kebab/transliterator/serbian.rb +34 -0
  22. data/lib/kebab/transliterator/spanish.rb +9 -0
  23. data/lib/kebab/transliterator/swedish.rb +16 -0
  24. data/lib/kebab/transliterator/turkish.rb +8 -0
  25. data/lib/kebab/transliterator/ukrainian.rb +30 -0
  26. data/lib/kebab/transliterator/vietnamese.rb +143 -0
  27. data/lib/kebab/utf8/active_support_proxy.rb +26 -0
  28. data/lib/kebab/utf8/dumb_proxy.rb +49 -0
  29. data/lib/kebab/utf8/java_proxy.rb +22 -0
  30. data/lib/kebab/utf8/mappings.rb +193 -0
  31. data/lib/kebab/utf8/proxy.rb +125 -0
  32. data/lib/kebab/utf8/unicode_proxy.rb +23 -0
  33. data/lib/kebab/version.rb +5 -0
  34. data/spec/kebab_spec.rb +155 -0
  35. data/spec/spec_helper.rb +45 -0
  36. data/spec/transliterators/base_spec.rb +16 -0
  37. data/spec/transliterators/bulgarian_spec.rb +20 -0
  38. data/spec/transliterators/danish_spec.rb +17 -0
  39. data/spec/transliterators/german_spec.rb +17 -0
  40. data/spec/transliterators/greek_spec.rb +17 -0
  41. data/spec/transliterators/hindi_spec.rb +17 -0
  42. data/spec/transliterators/latin_spec.rb +9 -0
  43. data/spec/transliterators/macedonian_spec.rb +9 -0
  44. data/spec/transliterators/norwegian_spec.rb +18 -0
  45. data/spec/transliterators/polish_spec.rb +14 -0
  46. data/spec/transliterators/romanian_spec.rb +19 -0
  47. data/spec/transliterators/russian_spec.rb +9 -0
  48. data/spec/transliterators/serbian_spec.rb +25 -0
  49. data/spec/transliterators/spanish_spec.rb +13 -0
  50. data/spec/transliterators/swedish_spec.rb +18 -0
  51. data/spec/transliterators/turkish_spec.rb +24 -0
  52. data/spec/transliterators/ukrainian_spec.rb +88 -0
  53. data/spec/transliterators/vietnamese_spec.rb +18 -0
  54. data/spec/utf8_proxy_spec.rb +53 -0
  55. metadata +167 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 87bf31a3fd0eae739657da5215e01e9997814e36fac547bdad9a3b82b4a7cbb5
4
+ data.tar.gz: ba4be2bf8b9d9f531c2e89cc378fb77bb46c2bafd1238938750d6ca703fd8e72
5
+ SHA512:
6
+ metadata.gz: fecfbf09cebc692a71693ccf8e1153135164f0dc353338dd262a2e1ce459fe706d95567bf1f7d65f9d0ef09d4c79df4c345c4c62572b2b4187686604ea5a39f0
7
+ data.tar.gz: 4ac136a1bdda3d4a7dfb986a75580c2b06882799aa8f6f7b20c8573ff3394e950c610ec9bafc0f8729b2b666bb4bda1cc5b73323730ccd57ef14fc3947e7b332
File without changes
@@ -0,0 +1,99 @@
1
+ # Kebab Changelog
2
+
3
+ ## 1.0.2
4
+
5
+ * Fix regression in ActiveSupport UTF8 proxy.
6
+
7
+ ## 1.0.1
8
+
9
+ * Fix error with tidy_bytes on Rubinius.
10
+ * Simplify Active Support UTF8 proxy.
11
+ * Fix `allow_bangs` argument to to_ruby_method being silently ignored.
12
+ * Raise error when generating an impossible Ruby method name.
13
+
14
+ ## 1.0.0
15
+
16
+ * Adopt semantic versioning.
17
+ * When using Active Support, require 3.2 or greater.
18
+ * Require Ruby 2.0 or greater.
19
+ * Fix Ruby warnings.
20
+ * Improve support for Ukrainian.
21
+ * Support some additional punctuation characters used by Chinese and others.
22
+ * Add Polish spec.
23
+ * Use native Unicode normalization on Ruby 2.2 in UTF8::DumbProxy.
24
+ * Invoke Ruby-native upcase/downcase in UTF8::DumbProxy.
25
+ * Proxy `tidy_bytes` method to Active Support when possible.
26
+ * Remove SlugString constant.
27
+
28
+ ## 0.3.11
29
+
30
+ * Add support for Vietnamese.
31
+
32
+ ## 0.3.10
33
+
34
+ * Fix Macedonian "S/S". Don't `include JRuby` unnecessarily.
35
+
36
+ ## 0.3.9
37
+
38
+ * Add missing Greek vowels with diaeresis.
39
+
40
+ ## 0.3.8
41
+
42
+ * Correct and improve Macedonian support.
43
+
44
+ ## 0.3.7
45
+
46
+ * Fix compatibility with Ruby 1.8.7.
47
+ * Add Swedish support.
48
+
49
+ ## 0.3.6
50
+
51
+ * Allow multiple transliterators.
52
+ * Add Greek support.
53
+
54
+ ## 0.3.5
55
+
56
+ * Don't strip underscores from identifiers.
57
+
58
+ ## 0.3.4
59
+
60
+ * Add Romanian support.
61
+
62
+ ## 0.3.3
63
+
64
+ * Add Norwegian support.
65
+
66
+ ## 0.3.2
67
+
68
+ * Improve Macedonian support.
69
+
70
+ ## 0.3.1
71
+
72
+ * Small fixes to Cyrillic.
73
+
74
+ ## 0.3.0
75
+
76
+ * Cyrillic support.
77
+ * Improve support for various Unicode spaces and dashes.
78
+
79
+ ## 0.2.2
80
+
81
+ * Fix for "smart" quote handling.
82
+
83
+ ## 0.2.1
84
+
85
+ * Implement #empty? for compatiblity with Active Support's #blank?.
86
+
87
+ ## 0.2.0
88
+
89
+ * Add support for Danish.
90
+ * Add method to generate Ruby identifiers.
91
+ * Improve performance.
92
+
93
+ ## 0.1.1
94
+
95
+ * Add support for Serbian.
96
+
97
+ ## 0.1.0
98
+
99
+ * Initial extraction from FriendlyId.
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2010 Norman Clarke
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
@@ -0,0 +1,26 @@
1
+ # Kebab
2
+
3
+ The original is [here](http://github.com/norman/kebab). You should probably
4
+ use that unless you hate monkey patching as much as I do.
5
+
6
+ ## Copyright
7
+
8
+ Copyright (c) 2010-2013 Norman Clarke
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
11
+ this software and associated documentation files (the "Software"), to deal in
12
+ the Software without restriction, including without limitation the rights to
13
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
14
+ of the Software, and to permit persons to whom the Software is furnished to do
15
+ so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
@@ -0,0 +1,34 @@
1
+ require "rubygems"
2
+ require "rake/testtask"
3
+ require "rake/clean"
4
+ require "rubygems/package_task"
5
+
6
+ task :default => :spec
7
+ task :test => :spec
8
+
9
+ CLEAN << "pkg" << "doc" << "coverage" << ".yardoc"
10
+
11
+ begin
12
+ require "yard"
13
+ YARD::Rake::YardocTask.new do |t|
14
+ t.options = ["--output-dir=doc"]
15
+ end
16
+ rescue LoadError
17
+ end
18
+
19
+ begin
20
+ desc "Run SimpleCov"
21
+ task :coverage do
22
+ ENV["COV"] = "true"
23
+ Rake::Task["spec"].execute
24
+ end
25
+ rescue LoadError
26
+ end
27
+
28
+ gemspec = File.expand_path("../kebab.gemspec", __FILE__)
29
+ if File.exist? gemspec
30
+ Gem::PackageTask.new(eval(File.read(gemspec))) { |pkg| }
31
+ end
32
+
33
+ require 'rspec/core/rake_task'
34
+ RSpec::Core::RakeTask.new(:spec)
@@ -0,0 +1,18 @@
1
+ module Kebab
2
+ def self.jruby15?
3
+ JRUBY_VERSION >= "1.5" rescue false
4
+ end
5
+
6
+ refine String do
7
+ def to_identifier
8
+ Kebab::Identifier.new self
9
+ end
10
+
11
+ alias to_slug to_identifier
12
+ alias skewer to_identifier
13
+ end
14
+ end
15
+
16
+ require "kebab/transliterator/base"
17
+ require "kebab/utf8/proxy"
18
+ require "kebab/identifier"
@@ -0,0 +1,294 @@
1
+ # encoding: utf-8
2
+ module Kebab
3
+
4
+ # Codepoints for characters that will be deleted by +#word_chars!+.
5
+ STRIPPABLE = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 19,
6
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39,
7
+ 40, 41, 42, 43, 44, 45, 46, 47, 58, 59, 60, 61, 62, 63, 64, 91, 92, 93, 94,
8
+ 96, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136,
9
+ 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151,
10
+ 152, 153, 154, 155, 156, 157, 158, 159, 161, 162, 163, 164, 165, 166, 167,
11
+ 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 182, 183, 184,
12
+ 185, 187, 188, 189, 190, 191, 215, 247, 8203, 8204, 8205, 8239, 65279]
13
+
14
+ # This class provides some string-manipulation methods specific to slugs.
15
+ #
16
+ # Note that this class includes many "bang methods" such as {#clean!} and
17
+ # {#normalize!} that perform actions on the string in-place. Each of these
18
+ # methods has a corresponding "bangless" method (i.e., +Identifier#clean!+
19
+ # and +Identifier#clean+) which does not appear in the documentation because
20
+ # it is generated dynamically.
21
+ #
22
+ # All of the bang methods return an instance of String, while the bangless
23
+ # versions return an instance of Kebab::Identifier, so that calls to methods
24
+ # specific to this class can be chained:
25
+ #
26
+ # string = Identifier.new("hello world")
27
+ # string.with_separators! # => "hello-world"
28
+ # string.with_separators # => <Kebab::Identifier:0x000001013e1590 @wrapped_string="hello-world">
29
+ #
30
+ # @see http://www.utf8-chartable.de/unicode-utf8-table.pl?utf8=dec Unicode character table
31
+ class Identifier
32
+ using Kebab
33
+
34
+ Error = Class.new(StandardError)
35
+
36
+ attr_reader :wrapped_string
37
+ alias to_s wrapped_string
38
+
39
+ @@utf8_proxy = if Kebab.jruby15?
40
+ UTF8::JavaProxy
41
+ elsif defined? Unicode::VERSION
42
+ UTF8::UnicodeProxy
43
+ elsif defined? ActiveSupport
44
+ UTF8::ActiveSupportProxy
45
+ else
46
+ UTF8::DumbProxy
47
+ end
48
+
49
+ # Return the proxy used for UTF-8 support.
50
+ # @see Kebab::UTF8::Proxy
51
+ def self.utf8_proxy
52
+ @@utf8_proxy
53
+ end
54
+
55
+ # Set a proxy object used for UTF-8 support.
56
+ # @see Kebab::UTF8::Proxy
57
+ def self.utf8_proxy=(obj)
58
+ @@utf8_proxy = obj
59
+ end
60
+
61
+ def method_missing(symbol, *args, &block)
62
+ @wrapped_string.__send__(symbol, *args, &block)
63
+ end
64
+
65
+ # @param string [#to_s] The string to use as the basis of the Identifier.
66
+ def initialize(string)
67
+ @wrapped_string = string.to_s
68
+ tidy_bytes!
69
+ normalize_utf8!
70
+ end
71
+
72
+ def ==(value)
73
+ @wrapped_string.to_s == value.to_s
74
+ end
75
+
76
+ def eql?(value)
77
+ @wrapped_string == value
78
+ end
79
+
80
+ def empty?
81
+ # included to make this class :respond_to? :empty for compatibility with Active Support's
82
+ # #blank?
83
+ @wrapped_string.empty?
84
+ end
85
+
86
+ # Approximate an ASCII string. This works only for Western strings using
87
+ # characters that are Roman-alphabet characters + diacritics. Non-letter
88
+ # characters are left unmodified.
89
+ #
90
+ # string = Identifier.new "Łódź
91
+ # string.transliterate # => "Lodz, Poland"
92
+ # string = Identifier.new "日本"
93
+ # string.transliterate # => "日本"
94
+ #
95
+ # You can pass any key(s) from +Characters.approximations+ as arguments. This allows
96
+ # for contextual approximations. Various languages are supported, you can see which ones
97
+ # by looking at the source of {Kebab::Transliterator::Base}.
98
+ #
99
+ # string = Identifier.new "Jürgen Müller"
100
+ # string.transliterate # => "Jurgen Muller"
101
+ # string.transliterate :german # => "Juergen Mueller"
102
+ # string = Identifier.new "¡Feliz año!"
103
+ # string.transliterate # => "¡Feliz ano!"
104
+ # string.transliterate :spanish # => "¡Feliz anio!"
105
+ #
106
+ # The approximations are an array, which you can modify if you choose:
107
+ #
108
+ # # Make Spanish use "nh" rather than "nn"
109
+ # Kebab::Transliterator::Spanish::APPROXIMATIONS["ñ"] = "nh"
110
+ #
111
+ # Notice that this method does not simply convert to ASCII; if you want
112
+ # to remove non-ASCII characters such as "¡" and "¿", use {#to_ascii!}:
113
+ #
114
+ # string.transliterate!(:spanish) # => "¡Feliz anio!"
115
+ # string.transliterate! # => "¡Feliz anio!"
116
+ #
117
+ # @param *args <Symbol>
118
+ # @return String
119
+ def transliterate!(*kinds)
120
+ kinds.compact!
121
+ kinds = [:latin] if kinds.empty?
122
+ kinds.each do |kind|
123
+ transliterator = Transliterator.get(kind).instance
124
+ @wrapped_string = transliterator.transliterate(@wrapped_string)
125
+ end
126
+ @wrapped_string
127
+ end
128
+
129
+ # Converts dashes to spaces, removes leading and trailing spaces, and
130
+ # replaces multiple whitespace characters with a single space.
131
+ # @return String
132
+ def clean!
133
+ @wrapped_string = @wrapped_string.gsub("-", " ").squeeze(" ").strip
134
+ end
135
+
136
+ # Remove any non-word characters. For this library's purposes, this means
137
+ # anything other than letters, numbers, spaces, newlines and linefeeds.
138
+ # @return String
139
+ def word_chars!
140
+ @wrapped_string = (unpack("U*") - Kebab::STRIPPABLE).pack("U*")
141
+ end
142
+
143
+ # Normalize the string for use as a URL slug. Note that in this context,
144
+ # +normalize+ means, strip, remove non-letters/numbers, downcasing,
145
+ # truncating to 255 bytes and converting whitespace to dashes.
146
+ # @param Options
147
+ # @return String
148
+ def normalize!(options = nil)
149
+ options = default_normalize_options.merge(options || {})
150
+
151
+ if translit_option = options[:transliterate]
152
+ if translit_option != true
153
+ transliterate!(*translit_option)
154
+ else
155
+ transliterate!(*options[:transliterations])
156
+ end
157
+ end
158
+ to_ascii! if options[:to_ascii]
159
+ clean!
160
+ word_chars!
161
+ clean!
162
+ downcase!
163
+ truncate_bytes!(options[:max_length])
164
+ with_separators!(options[:separator])
165
+ end
166
+
167
+ # Normalize a string so that it can safely be used as a Ruby method name.
168
+ def to_ruby_method!(allow_bangs = true)
169
+ leader, trailer = @wrapped_string.strip.scan(/\A(.+)(.)\z/).flatten
170
+ leader = leader.to_s
171
+ trailer = trailer.to_s
172
+ if allow_bangs
173
+ trailer.downcase!
174
+ trailer.gsub!(/[^a-z0-9!=\\?]/, '')
175
+ else
176
+ trailer.downcase!
177
+ trailer.gsub!(/[^a-z0-9]/, '')
178
+ end
179
+ id = leader.to_identifier
180
+ id.transliterate!
181
+ id.to_ascii!
182
+ id.clean!
183
+ id.word_chars!
184
+ id.clean!
185
+ @wrapped_string = id.to_s + trailer
186
+ if @wrapped_string == ""
187
+ raise Error, "Input generates impossible Ruby method name"
188
+ end
189
+ with_separators!("_")
190
+ end
191
+
192
+ # Delete any non-ascii characters.
193
+ # @return String
194
+ def to_ascii!
195
+ @wrapped_string = @wrapped_string.gsub(/[^\x00-\x7f]/u, '')
196
+ end
197
+
198
+ # Truncate the string to +max+ characters.
199
+ # @example
200
+ # "üéøá".to_identifier.truncate(3) #=> "üéø"
201
+ # @return String
202
+ def truncate!(max)
203
+ @wrapped_string = unpack("U*")[0...max].pack("U*")
204
+ end
205
+
206
+ # Truncate the string to +max+ bytes. This can be useful for ensuring that
207
+ # a UTF-8 string will always fit into a database column with a certain max
208
+ # byte length. The resulting string may be less than +max+ if the string must
209
+ # be truncated at a multibyte character boundary.
210
+ # @example
211
+ # "üéøá".to_identifier.truncate_bytes(3) #=> "ü"
212
+ # @return String
213
+ def truncate_bytes!(max)
214
+ return @wrapped_string if @wrapped_string.bytesize <= max
215
+ curr = 0
216
+ new = []
217
+ unpack("U*").each do |char|
218
+ break if curr > max
219
+ char = [char].pack("U")
220
+ curr += char.bytesize
221
+ if curr <= max
222
+ new << char
223
+ end
224
+ end
225
+ @wrapped_string = new.join
226
+ end
227
+
228
+ # Replaces whitespace with dashes ("-").
229
+ # @return String
230
+ def with_separators!(char = "-")
231
+ @wrapped_string = @wrapped_string.gsub(/\s/u, char)
232
+ end
233
+
234
+ # Perform UTF-8 sensitive upcasing.
235
+ # @return String
236
+ def upcase!
237
+ @wrapped_string = @@utf8_proxy.upcase(@wrapped_string)
238
+ end
239
+
240
+ # Perform UTF-8 sensitive downcasing.
241
+ # @return String
242
+ def downcase!
243
+ @wrapped_string = @@utf8_proxy.downcase(@wrapped_string)
244
+ end
245
+
246
+ # Perform Unicode composition on the wrapped string.
247
+ # @return String
248
+ def normalize_utf8!
249
+ @wrapped_string = @@utf8_proxy.normalize_utf8(@wrapped_string)
250
+ end
251
+
252
+ # Attempt to convert characters encoded using CP1252 and IS0-8859-1 to
253
+ # UTF-8.
254
+ # @return String
255
+ def tidy_bytes!
256
+ @wrapped_string = @@utf8_proxy.tidy_bytes(@wrapped_string)
257
+ end
258
+
259
+ %w[transliterate clean downcase word_chars normalize normalize_utf8
260
+ tidy_bytes to_ascii to_ruby_method truncate truncate_bytes upcase
261
+ with_separators].each do |method|
262
+ class_eval(<<-EOM, __FILE__, __LINE__ + 1)
263
+ def #{method}(*args)
264
+ send_to_new_instance(:#{method}!, *args)
265
+ end
266
+ EOM
267
+ end
268
+
269
+ def to_identifier
270
+ self
271
+ end
272
+
273
+ # The default options for {#normalize!}. Override to set your own defaults.
274
+ def default_normalize_options
275
+ {:transliterate => true, :max_length => 255, :separator => "-"}
276
+ end
277
+
278
+ alias approximate_ascii transliterate
279
+ alias approximate_ascii! transliterate!
280
+ alias with_dashes with_separators
281
+ alias with_dashes! with_separators!
282
+ alias to_slug to_identifier
283
+
284
+ private
285
+
286
+ # Used as the basis of the bangless methods.
287
+ def send_to_new_instance(*args)
288
+ id = Identifier.allocate
289
+ id.instance_variable_set :@wrapped_string, to_s
290
+ id.send(*args)
291
+ id
292
+ end
293
+ end
294
+ end