kebab 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gemtest +0 -0
  3. data/Changelog.md +99 -0
  4. data/MIT-LICENSE +19 -0
  5. data/README.md +26 -0
  6. data/Rakefile +34 -0
  7. data/lib/kebab.rb +18 -0
  8. data/lib/kebab/identifier.rb +294 -0
  9. data/lib/kebab/transliterator/base.rb +110 -0
  10. data/lib/kebab/transliterator/bulgarian.rb +27 -0
  11. data/lib/kebab/transliterator/cyrillic.rb +108 -0
  12. data/lib/kebab/transliterator/danish.rb +15 -0
  13. data/lib/kebab/transliterator/german.rb +15 -0
  14. data/lib/kebab/transliterator/greek.rb +77 -0
  15. data/lib/kebab/transliterator/hindi.rb +137 -0
  16. data/lib/kebab/transliterator/latin.rb +199 -0
  17. data/lib/kebab/transliterator/macedonian.rb +29 -0
  18. data/lib/kebab/transliterator/norwegian.rb +14 -0
  19. data/lib/kebab/transliterator/romanian.rb +13 -0
  20. data/lib/kebab/transliterator/russian.rb +22 -0
  21. data/lib/kebab/transliterator/serbian.rb +34 -0
  22. data/lib/kebab/transliterator/spanish.rb +9 -0
  23. data/lib/kebab/transliterator/swedish.rb +16 -0
  24. data/lib/kebab/transliterator/turkish.rb +8 -0
  25. data/lib/kebab/transliterator/ukrainian.rb +30 -0
  26. data/lib/kebab/transliterator/vietnamese.rb +143 -0
  27. data/lib/kebab/utf8/active_support_proxy.rb +26 -0
  28. data/lib/kebab/utf8/dumb_proxy.rb +49 -0
  29. data/lib/kebab/utf8/java_proxy.rb +22 -0
  30. data/lib/kebab/utf8/mappings.rb +193 -0
  31. data/lib/kebab/utf8/proxy.rb +125 -0
  32. data/lib/kebab/utf8/unicode_proxy.rb +23 -0
  33. data/lib/kebab/version.rb +5 -0
  34. data/spec/kebab_spec.rb +155 -0
  35. data/spec/spec_helper.rb +45 -0
  36. data/spec/transliterators/base_spec.rb +16 -0
  37. data/spec/transliterators/bulgarian_spec.rb +20 -0
  38. data/spec/transliterators/danish_spec.rb +17 -0
  39. data/spec/transliterators/german_spec.rb +17 -0
  40. data/spec/transliterators/greek_spec.rb +17 -0
  41. data/spec/transliterators/hindi_spec.rb +17 -0
  42. data/spec/transliterators/latin_spec.rb +9 -0
  43. data/spec/transliterators/macedonian_spec.rb +9 -0
  44. data/spec/transliterators/norwegian_spec.rb +18 -0
  45. data/spec/transliterators/polish_spec.rb +14 -0
  46. data/spec/transliterators/romanian_spec.rb +19 -0
  47. data/spec/transliterators/russian_spec.rb +9 -0
  48. data/spec/transliterators/serbian_spec.rb +25 -0
  49. data/spec/transliterators/spanish_spec.rb +13 -0
  50. data/spec/transliterators/swedish_spec.rb +18 -0
  51. data/spec/transliterators/turkish_spec.rb +24 -0
  52. data/spec/transliterators/ukrainian_spec.rb +88 -0
  53. data/spec/transliterators/vietnamese_spec.rb +18 -0
  54. data/spec/utf8_proxy_spec.rb +53 -0
  55. metadata +167 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 87bf31a3fd0eae739657da5215e01e9997814e36fac547bdad9a3b82b4a7cbb5
4
+ data.tar.gz: ba4be2bf8b9d9f531c2e89cc378fb77bb46c2bafd1238938750d6ca703fd8e72
5
+ SHA512:
6
+ metadata.gz: fecfbf09cebc692a71693ccf8e1153135164f0dc353338dd262a2e1ce459fe706d95567bf1f7d65f9d0ef09d4c79df4c345c4c62572b2b4187686604ea5a39f0
7
+ data.tar.gz: 4ac136a1bdda3d4a7dfb986a75580c2b06882799aa8f6f7b20c8573ff3394e950c610ec9bafc0f8729b2b666bb4bda1cc5b73323730ccd57ef14fc3947e7b332
File without changes
@@ -0,0 +1,99 @@
1
+ # Kebab Changelog
2
+
3
+ ## 1.0.2
4
+
5
+ * Fix regression in ActiveSupport UTF8 proxy.
6
+
7
+ ## 1.0.1
8
+
9
+ * Fix error with tidy_bytes on Rubinius.
10
+ * Simplify Active Support UTF8 proxy.
11
+ * Fix `allow_bangs` argument to to_ruby_method being silently ignored.
12
+ * Raise error when generating an impossible Ruby method name.
13
+
14
+ ## 1.0.0
15
+
16
+ * Adopt semantic versioning.
17
+ * When using Active Support, require 3.2 or greater.
18
+ * Require Ruby 2.0 or greater.
19
+ * Fix Ruby warnings.
20
+ * Improve support for Ukrainian.
21
+ * Support some additional punctuation characters used by Chinese and others.
22
+ * Add Polish spec.
23
+ * Use native Unicode normalization on Ruby 2.2 in UTF8::DumbProxy.
24
+ * Invoke Ruby-native upcase/downcase in UTF8::DumbProxy.
25
+ * Proxy `tidy_bytes` method to Active Support when possible.
26
+ * Remove SlugString constant.
27
+
28
+ ## 0.3.11
29
+
30
+ * Add support for Vietnamese.
31
+
32
+ ## 0.3.10
33
+
34
+ * Fix Macedonian "S/S". Don't `include JRuby` unnecessarily.
35
+
36
+ ## 0.3.9
37
+
38
+ * Add missing Greek vowels with diaeresis.
39
+
40
+ ## 0.3.8
41
+
42
+ * Correct and improve Macedonian support.
43
+
44
+ ## 0.3.7
45
+
46
+ * Fix compatibility with Ruby 1.8.7.
47
+ * Add Swedish support.
48
+
49
+ ## 0.3.6
50
+
51
+ * Allow multiple transliterators.
52
+ * Add Greek support.
53
+
54
+ ## 0.3.5
55
+
56
+ * Don't strip underscores from identifiers.
57
+
58
+ ## 0.3.4
59
+
60
+ * Add Romanian support.
61
+
62
+ ## 0.3.3
63
+
64
+ * Add Norwegian support.
65
+
66
+ ## 0.3.2
67
+
68
+ * Improve Macedonian support.
69
+
70
+ ## 0.3.1
71
+
72
+ * Small fixes to Cyrillic.
73
+
74
+ ## 0.3.0
75
+
76
+ * Cyrillic support.
77
+ * Improve support for various Unicode spaces and dashes.
78
+
79
+ ## 0.2.2
80
+
81
+ * Fix for "smart" quote handling.
82
+
83
+ ## 0.2.1
84
+
85
+ * Implement #empty? for compatiblity with Active Support's #blank?.
86
+
87
+ ## 0.2.0
88
+
89
+ * Add support for Danish.
90
+ * Add method to generate Ruby identifiers.
91
+ * Improve performance.
92
+
93
+ ## 0.1.1
94
+
95
+ * Add support for Serbian.
96
+
97
+ ## 0.1.0
98
+
99
+ * Initial extraction from FriendlyId.
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2010 Norman Clarke
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
@@ -0,0 +1,26 @@
1
+ # Kebab
2
+
3
+ The original is [here](http://github.com/norman/kebab). You should probably
4
+ use that unless you hate monkey patching as much as I do.
5
+
6
+ ## Copyright
7
+
8
+ Copyright (c) 2010-2013 Norman Clarke
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
11
+ this software and associated documentation files (the "Software"), to deal in
12
+ the Software without restriction, including without limitation the rights to
13
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
14
+ of the Software, and to permit persons to whom the Software is furnished to do
15
+ so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
@@ -0,0 +1,34 @@
1
+ require "rubygems"
2
+ require "rake/testtask"
3
+ require "rake/clean"
4
+ require "rubygems/package_task"
5
+
6
+ task :default => :spec
7
+ task :test => :spec
8
+
9
+ CLEAN << "pkg" << "doc" << "coverage" << ".yardoc"
10
+
11
+ begin
12
+ require "yard"
13
+ YARD::Rake::YardocTask.new do |t|
14
+ t.options = ["--output-dir=doc"]
15
+ end
16
+ rescue LoadError
17
+ end
18
+
19
+ begin
20
+ desc "Run SimpleCov"
21
+ task :coverage do
22
+ ENV["COV"] = "true"
23
+ Rake::Task["spec"].execute
24
+ end
25
+ rescue LoadError
26
+ end
27
+
28
+ gemspec = File.expand_path("../kebab.gemspec", __FILE__)
29
+ if File.exist? gemspec
30
+ Gem::PackageTask.new(eval(File.read(gemspec))) { |pkg| }
31
+ end
32
+
33
+ require 'rspec/core/rake_task'
34
+ RSpec::Core::RakeTask.new(:spec)
@@ -0,0 +1,18 @@
1
+ module Kebab
2
+ def self.jruby15?
3
+ JRUBY_VERSION >= "1.5" rescue false
4
+ end
5
+
6
+ refine String do
7
+ def to_identifier
8
+ Kebab::Identifier.new self
9
+ end
10
+
11
+ alias to_slug to_identifier
12
+ alias skewer to_identifier
13
+ end
14
+ end
15
+
16
+ require "kebab/transliterator/base"
17
+ require "kebab/utf8/proxy"
18
+ require "kebab/identifier"
@@ -0,0 +1,294 @@
1
+ # encoding: utf-8
2
+ module Kebab
3
+
4
+ # Codepoints for characters that will be deleted by +#word_chars!+.
5
+ STRIPPABLE = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 19,
6
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39,
7
+ 40, 41, 42, 43, 44, 45, 46, 47, 58, 59, 60, 61, 62, 63, 64, 91, 92, 93, 94,
8
+ 96, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136,
9
+ 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151,
10
+ 152, 153, 154, 155, 156, 157, 158, 159, 161, 162, 163, 164, 165, 166, 167,
11
+ 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 182, 183, 184,
12
+ 185, 187, 188, 189, 190, 191, 215, 247, 8203, 8204, 8205, 8239, 65279]
13
+
14
+ # This class provides some string-manipulation methods specific to slugs.
15
+ #
16
+ # Note that this class includes many "bang methods" such as {#clean!} and
17
+ # {#normalize!} that perform actions on the string in-place. Each of these
18
+ # methods has a corresponding "bangless" method (i.e., +Identifier#clean!+
19
+ # and +Identifier#clean+) which does not appear in the documentation because
20
+ # it is generated dynamically.
21
+ #
22
+ # All of the bang methods return an instance of String, while the bangless
23
+ # versions return an instance of Kebab::Identifier, so that calls to methods
24
+ # specific to this class can be chained:
25
+ #
26
+ # string = Identifier.new("hello world")
27
+ # string.with_separators! # => "hello-world"
28
+ # string.with_separators # => <Kebab::Identifier:0x000001013e1590 @wrapped_string="hello-world">
29
+ #
30
+ # @see http://www.utf8-chartable.de/unicode-utf8-table.pl?utf8=dec Unicode character table
31
+ class Identifier
32
+ using Kebab
33
+
34
+ Error = Class.new(StandardError)
35
+
36
+ attr_reader :wrapped_string
37
+ alias to_s wrapped_string
38
+
39
+ @@utf8_proxy = if Kebab.jruby15?
40
+ UTF8::JavaProxy
41
+ elsif defined? Unicode::VERSION
42
+ UTF8::UnicodeProxy
43
+ elsif defined? ActiveSupport
44
+ UTF8::ActiveSupportProxy
45
+ else
46
+ UTF8::DumbProxy
47
+ end
48
+
49
+ # Return the proxy used for UTF-8 support.
50
+ # @see Kebab::UTF8::Proxy
51
+ def self.utf8_proxy
52
+ @@utf8_proxy
53
+ end
54
+
55
+ # Set a proxy object used for UTF-8 support.
56
+ # @see Kebab::UTF8::Proxy
57
+ def self.utf8_proxy=(obj)
58
+ @@utf8_proxy = obj
59
+ end
60
+
61
+ def method_missing(symbol, *args, &block)
62
+ @wrapped_string.__send__(symbol, *args, &block)
63
+ end
64
+
65
+ # @param string [#to_s] The string to use as the basis of the Identifier.
66
+ def initialize(string)
67
+ @wrapped_string = string.to_s
68
+ tidy_bytes!
69
+ normalize_utf8!
70
+ end
71
+
72
+ def ==(value)
73
+ @wrapped_string.to_s == value.to_s
74
+ end
75
+
76
+ def eql?(value)
77
+ @wrapped_string == value
78
+ end
79
+
80
+ def empty?
81
+ # included to make this class :respond_to? :empty for compatibility with Active Support's
82
+ # #blank?
83
+ @wrapped_string.empty?
84
+ end
85
+
86
+ # Approximate an ASCII string. This works only for Western strings using
87
+ # characters that are Roman-alphabet characters + diacritics. Non-letter
88
+ # characters are left unmodified.
89
+ #
90
+ # string = Identifier.new "Łódź
91
+ # string.transliterate # => "Lodz, Poland"
92
+ # string = Identifier.new "日本"
93
+ # string.transliterate # => "日本"
94
+ #
95
+ # You can pass any key(s) from +Characters.approximations+ as arguments. This allows
96
+ # for contextual approximations. Various languages are supported, you can see which ones
97
+ # by looking at the source of {Kebab::Transliterator::Base}.
98
+ #
99
+ # string = Identifier.new "Jürgen Müller"
100
+ # string.transliterate # => "Jurgen Muller"
101
+ # string.transliterate :german # => "Juergen Mueller"
102
+ # string = Identifier.new "¡Feliz año!"
103
+ # string.transliterate # => "¡Feliz ano!"
104
+ # string.transliterate :spanish # => "¡Feliz anio!"
105
+ #
106
+ # The approximations are an array, which you can modify if you choose:
107
+ #
108
+ # # Make Spanish use "nh" rather than "nn"
109
+ # Kebab::Transliterator::Spanish::APPROXIMATIONS["ñ"] = "nh"
110
+ #
111
+ # Notice that this method does not simply convert to ASCII; if you want
112
+ # to remove non-ASCII characters such as "¡" and "¿", use {#to_ascii!}:
113
+ #
114
+ # string.transliterate!(:spanish) # => "¡Feliz anio!"
115
+ # string.transliterate! # => "¡Feliz anio!"
116
+ #
117
+ # @param *args <Symbol>
118
+ # @return String
119
+ def transliterate!(*kinds)
120
+ kinds.compact!
121
+ kinds = [:latin] if kinds.empty?
122
+ kinds.each do |kind|
123
+ transliterator = Transliterator.get(kind).instance
124
+ @wrapped_string = transliterator.transliterate(@wrapped_string)
125
+ end
126
+ @wrapped_string
127
+ end
128
+
129
+ # Converts dashes to spaces, removes leading and trailing spaces, and
130
+ # replaces multiple whitespace characters with a single space.
131
+ # @return String
132
+ def clean!
133
+ @wrapped_string = @wrapped_string.gsub("-", " ").squeeze(" ").strip
134
+ end
135
+
136
+ # Remove any non-word characters. For this library's purposes, this means
137
+ # anything other than letters, numbers, spaces, newlines and linefeeds.
138
+ # @return String
139
+ def word_chars!
140
+ @wrapped_string = (unpack("U*") - Kebab::STRIPPABLE).pack("U*")
141
+ end
142
+
143
+ # Normalize the string for use as a URL slug. Note that in this context,
144
+ # +normalize+ means, strip, remove non-letters/numbers, downcasing,
145
+ # truncating to 255 bytes and converting whitespace to dashes.
146
+ # @param Options
147
+ # @return String
148
+ def normalize!(options = nil)
149
+ options = default_normalize_options.merge(options || {})
150
+
151
+ if translit_option = options[:transliterate]
152
+ if translit_option != true
153
+ transliterate!(*translit_option)
154
+ else
155
+ transliterate!(*options[:transliterations])
156
+ end
157
+ end
158
+ to_ascii! if options[:to_ascii]
159
+ clean!
160
+ word_chars!
161
+ clean!
162
+ downcase!
163
+ truncate_bytes!(options[:max_length])
164
+ with_separators!(options[:separator])
165
+ end
166
+
167
+ # Normalize a string so that it can safely be used as a Ruby method name.
168
+ def to_ruby_method!(allow_bangs = true)
169
+ leader, trailer = @wrapped_string.strip.scan(/\A(.+)(.)\z/).flatten
170
+ leader = leader.to_s
171
+ trailer = trailer.to_s
172
+ if allow_bangs
173
+ trailer.downcase!
174
+ trailer.gsub!(/[^a-z0-9!=\\?]/, '')
175
+ else
176
+ trailer.downcase!
177
+ trailer.gsub!(/[^a-z0-9]/, '')
178
+ end
179
+ id = leader.to_identifier
180
+ id.transliterate!
181
+ id.to_ascii!
182
+ id.clean!
183
+ id.word_chars!
184
+ id.clean!
185
+ @wrapped_string = id.to_s + trailer
186
+ if @wrapped_string == ""
187
+ raise Error, "Input generates impossible Ruby method name"
188
+ end
189
+ with_separators!("_")
190
+ end
191
+
192
+ # Delete any non-ascii characters.
193
+ # @return String
194
+ def to_ascii!
195
+ @wrapped_string = @wrapped_string.gsub(/[^\x00-\x7f]/u, '')
196
+ end
197
+
198
+ # Truncate the string to +max+ characters.
199
+ # @example
200
+ # "üéøá".to_identifier.truncate(3) #=> "üéø"
201
+ # @return String
202
+ def truncate!(max)
203
+ @wrapped_string = unpack("U*")[0...max].pack("U*")
204
+ end
205
+
206
+ # Truncate the string to +max+ bytes. This can be useful for ensuring that
207
+ # a UTF-8 string will always fit into a database column with a certain max
208
+ # byte length. The resulting string may be less than +max+ if the string must
209
+ # be truncated at a multibyte character boundary.
210
+ # @example
211
+ # "üéøá".to_identifier.truncate_bytes(3) #=> "ü"
212
+ # @return String
213
+ def truncate_bytes!(max)
214
+ return @wrapped_string if @wrapped_string.bytesize <= max
215
+ curr = 0
216
+ new = []
217
+ unpack("U*").each do |char|
218
+ break if curr > max
219
+ char = [char].pack("U")
220
+ curr += char.bytesize
221
+ if curr <= max
222
+ new << char
223
+ end
224
+ end
225
+ @wrapped_string = new.join
226
+ end
227
+
228
+ # Replaces whitespace with dashes ("-").
229
+ # @return String
230
+ def with_separators!(char = "-")
231
+ @wrapped_string = @wrapped_string.gsub(/\s/u, char)
232
+ end
233
+
234
+ # Perform UTF-8 sensitive upcasing.
235
+ # @return String
236
+ def upcase!
237
+ @wrapped_string = @@utf8_proxy.upcase(@wrapped_string)
238
+ end
239
+
240
+ # Perform UTF-8 sensitive downcasing.
241
+ # @return String
242
+ def downcase!
243
+ @wrapped_string = @@utf8_proxy.downcase(@wrapped_string)
244
+ end
245
+
246
+ # Perform Unicode composition on the wrapped string.
247
+ # @return String
248
+ def normalize_utf8!
249
+ @wrapped_string = @@utf8_proxy.normalize_utf8(@wrapped_string)
250
+ end
251
+
252
+ # Attempt to convert characters encoded using CP1252 and IS0-8859-1 to
253
+ # UTF-8.
254
+ # @return String
255
+ def tidy_bytes!
256
+ @wrapped_string = @@utf8_proxy.tidy_bytes(@wrapped_string)
257
+ end
258
+
259
+ %w[transliterate clean downcase word_chars normalize normalize_utf8
260
+ tidy_bytes to_ascii to_ruby_method truncate truncate_bytes upcase
261
+ with_separators].each do |method|
262
+ class_eval(<<-EOM, __FILE__, __LINE__ + 1)
263
+ def #{method}(*args)
264
+ send_to_new_instance(:#{method}!, *args)
265
+ end
266
+ EOM
267
+ end
268
+
269
+ def to_identifier
270
+ self
271
+ end
272
+
273
+ # The default options for {#normalize!}. Override to set your own defaults.
274
+ def default_normalize_options
275
+ {:transliterate => true, :max_length => 255, :separator => "-"}
276
+ end
277
+
278
+ alias approximate_ascii transliterate
279
+ alias approximate_ascii! transliterate!
280
+ alias with_dashes with_separators
281
+ alias with_dashes! with_separators!
282
+ alias to_slug to_identifier
283
+
284
+ private
285
+
286
+ # Used as the basis of the bangless methods.
287
+ def send_to_new_instance(*args)
288
+ id = Identifier.allocate
289
+ id.instance_variable_set :@wrapped_string, to_s
290
+ id.send(*args)
291
+ id
292
+ end
293
+ end
294
+ end