friendly_id 3.0.6 → 3.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,397 +1,27 @@
1
1
  # encoding: utf-8
2
2
  module FriendlyId
3
3
 
4
- # This class provides some string-manipulation methods specific to slugs.
5
- # Its Unicode support is provided by ActiveSupport::Multibyte::Chars; this
6
- # is needed primarily for Unicode encoding normalization and proper
7
- # calculation of string lengths.
8
- #
9
- # Note that this class includes many "bang methods" such as {#clean!} and {#normalize!}
10
- # that perform actions on the string in-place. Each of these methods has a
11
- # corresponding "bangless" method (i.e., +SlugString#clean!+ and +SlugString#clean+)
12
- # which does not appear in the documentation because it is generated dynamically.
13
- #
14
- # All of the bang methods return an instance of String, while the bangless
15
- # versions return an instance of FriendlyId::SlugString, so that calls to
16
- # methods specific to this class can be chained:
17
- #
18
- # string = SlugString.new("hello world")
19
- # string.with_dashes! # => "hello-world"
20
- # string.with_dashes # => <FriendlyId::SlugString:0x000001013e1590 @wrapped_string="hello-world">
21
- #
22
- # @see http://www.utf8-chartable.de/unicode-utf8-table.pl?utf8=dec Unicode character table
23
- # @see FriendlyId::SlugString::dump_approximations
24
- class SlugString < ActiveSupport::Multibyte::Chars
25
-
26
- # All values are Unicode decimal characters or character arrays.
27
- APPROXIMATIONS = {
28
- :common => Hash[
29
- 192, 65, 193, 65, 194, 65, 195, 65, 196, 65, 197, 65, 198, [65, 69],
30
- 199, 67, 200, 69, 201, 69, 202, 69, 203, 69, 204, 73, 205, 73, 206,
31
- 73, 207, 73, 208, 68, 209, 78, 210, 79, 211, 79, 212, 79, 213, 79,
32
- 214, 79, 215, 120, 216, 79, 217, 85, 218, 85, 219, 85, 220, 85, 221,
33
- 89, 222, [84, 104], 223, [115, 115], 224, 97, 225, 97, 226, 97, 227,
34
- 97, 228, 97, 229, 97, 230, [97, 101], 231, 99, 232, 101, 233, 101,
35
- 234, 101, 235, 101, 236, 105, 237, 105, 238, 105, 239, 105, 240, 100,
36
- 241, 110, 242, 111, 243, 111, 244, 111, 245, 111, 246, 111, 248, 111,
37
- 249, 117, 250, 117, 251, 117, 252, 117, 253, 121, 254, [116, 104],
38
- 255, 121, 256, 65, 257, 97, 258, 65, 259, 97, 260, 65, 261, 97, 262,
39
- 67, 263, 99, 264, 67, 265, 99, 266, 67, 267, 99, 268, 67, 269, 99,
40
- 270, 68, 271, 100, 272, 68, 273, 100, 274, 69, 275, 101, 276, 69, 277,
41
- 101, 278, 69, 279, 101, 280, 69, 281, 101, 282, 69, 283, 101, 284, 71,
42
- 285, 103, 286, 71, 287, 103, 288, 71, 289, 103, 290, 71, 291, 103,
43
- 292, 72, 293, 104, 294, 72, 295, 104, 296, 73, 297, 105, 298, 73, 299,
44
- 105, 300, 73, 301, 105, 302, 73, 303, 105, 304, 73, 305, 105, 306,
45
- [73, 74], 307, [105, 106], 308, 74, 309, 106, 310, 75, 311, 107, 312,
46
- 107, 313, 76, 314, 108, 315, 76, 316, 108, 317, 76, 318, 108, 319, 76,
47
- 320, 108, 321, 76, 322, 108, 323, 78, 324, 110, 325, 78, 326, 110,
48
- 327, 78, 328, 110, 329, [39, 110], 330, [78, 71], 331, [110, 103],
49
- 332, 79, 333, 111, 334, 79, 335, 111, 336, 79, 337, 111, 338, [79,
50
- 69], 339, [111, 101], 340, 82, 341, 114, 342, 82, 343, 114, 344, 82,
51
- 345, 114, 346, 83, 347, 115, 348, 83, 349, 115, 350, 83, 351, 115,
52
- 352, 83, 353, 115, 354, 84, 355, 116, 356, 84, 357, 116, 358, 84, 359,
53
- 116, 360, 85, 361, 117, 362, 85, 363, 117, 364, 85, 365, 117, 366, 85,
54
- 367, 117, 368, 85, 369, 117, 370, 85, 371, 117, 372, 87, 373, 119,
55
- 374, 89, 375, 121, 376, 89, 377, 90, 378, 122, 379, 90, 380, 122, 381,
56
- 90, 382, 122
57
- ].freeze,
58
- :german => Hash[252, [117, 101], 246, [111, 101], 228, [97, 101]],
59
- :spanish => Hash[209, [78, 110], 241, [110, 110]]
60
- }
61
-
62
- # CP-1252 decimal byte => UTF-8 approximation as an array of bytes
63
- CP1252 = {
64
- 128 => [226, 130, 172],
65
- 129 => nil,
66
- 130 => [226, 128, 154],
67
- 131 => [198, 146],
68
- 132 => [226, 128, 158],
69
- 133 => [226, 128, 166],
70
- 134 => [226, 128, 160],
71
- 135 => [226, 128, 161],
72
- 136 => [203, 134],
73
- 137 => [226, 128, 176],
74
- 138 => [197, 160],
75
- 139 => [226, 128, 185],
76
- 140 => [197, 146],
77
- 141 => nil,
78
- 142 => [197, 189],
79
- 143 => nil,
80
- 144 => nil,
81
- 145 => [226, 128, 152],
82
- 146 => [226, 128, 153],
83
- 147 => [226, 128, 156],
84
- 148 => [226, 128, 157],
85
- 149 => [226, 128, 162],
86
- 150 => [226, 128, 147],
87
- 151 => [226, 128, 148],
88
- 152 => [203, 156],
89
- 153 => [226, 132, 162],
90
- 154 => [197, 161],
91
- 155 => [226, 128, 186],
92
- 156 => [197, 147],
93
- 157 => nil,
94
- 158 => [197, 190],
95
- 159 => [197, 184]
96
- }
97
-
98
- cattr_accessor :approximations
99
- self.approximations = []
100
-
101
- # This method can be used by developers wishing to debug the
102
- # {APPROXIMATIONS} hashes, which are written in a hard-to-read format.
103
- # @return Hash
104
- # @example
105
- #
106
- # > ruby -rrubygems -rlib/friendly_id -e 'p FriendlyId::SlugString.dump_approximations'
107
- #
108
- # {:common =>
109
- # {"À"=>"A", "Á"=>"A", "Â"=>"A", "Ã"=>"A", "Ä"=>"A", "Å"=>"A", "Æ"=>"AE",
110
- # "Ç"=>"C", "È"=>"E", "É"=>"E", "Ê"=>"E", "Ë"=>"E", "Ì"=>"I", "Í"=>"I",
111
- # "Î"=>"I", "Ï"=>"I", "Ð"=>"D", "Ñ"=>"N", "Ò"=>"O", "Ó"=>"O", "Ô"=>"O",
112
- # "Õ"=>"O", "Ö"=>"O", "×"=>"x", "Ø"=>"O", "Ù"=>"U", "Ú"=>"U", "Û"=>"U",
113
- # "Ü"=>"U", "Ý"=>"Y", "Þ"=>"Th", "ß"=>"ss", "à"=>"a", "á"=>"a", "â"=>"a",
114
- # "ã"=>"a", "ä"=>"a", "å"=>"a", "æ"=>"ae", "ç"=>"c", "è"=>"e", "é"=>"e",
115
- # "ê"=>"e", "ë"=>"e", "ì"=>"i", "í"=>"i", "î"=>"i", "ï"=>"i", "ð"=>"d",
116
- # "ñ"=>"n", "ò"=>"o", "ó"=>"o", "ô"=>"o", "õ"=>"o", "ö"=>"o", "ø"=>"o",
117
- # "ù"=>"u", "ú"=>"u", "û"=>"u", "ü"=>"u", "ý"=>"y", "þ"=>"th", "ÿ"=>"y",
118
- # "Ā"=>"A", "ā"=>"a", "Ă"=>"A", "ă"=>"a", "Ą"=>"A", "ą"=>"a", "Ć"=>"C",
119
- # "ć"=>"c", "Ĉ"=>"C", "ĉ"=>"c", "Ċ"=>"C", "ċ"=>"c", "Č"=>"C", "č"=>"c",
120
- # "Ď"=>"D", "ď"=>"d", "Đ"=>"D", "đ"=>"d", "Ē"=>"E", "ē"=>"e", "Ĕ"=>"E",
121
- # "ĕ"=>"e", "Ė"=>"E", "ė"=>"e", "Ę"=>"E", "ę"=>"e", "Ě"=>"E", "ě"=>"e",
122
- # "Ĝ"=>"G", "ĝ"=>"g", "Ğ"=>"G", "ğ"=>"g", "Ġ"=>"G", "ġ"=>"g", "Ģ"=>"G",
123
- # "ģ"=>"g", "Ĥ"=>"H", "ĥ"=>"h", "Ħ"=>"H", "ħ"=>"h", "Ĩ"=>"I", "ĩ"=>"i",
124
- # "Ī"=>"I", "ī"=>"i", "Ĭ"=>"I", "ĭ"=>"i", "Į"=>"I", "į"=>"i", "İ"=>"I",
125
- # "ı"=>"i", "IJ"=>"IJ", "ij"=>"ij", "Ĵ"=>"J", "ĵ"=>"j", "Ķ"=>"K", "ķ"=>"k",
126
- # "ĸ"=>"k", "Ĺ"=>"L", "ĺ"=>"l", "Ļ"=>"L", "ļ"=>"l", "Ľ"=>"L", "ľ"=>"l",
127
- # "Ŀ"=>"L", "ŀ"=>"l", "Ł"=>"L", "ł"=>"l", "Ń"=>"N", "ń"=>"n", "Ņ"=>"N",
128
- # "ņ"=>"n", "Ň"=>"N", "ň"=>"n", "ʼn"=>"'n", "Ŋ"=>"NG", "ŋ"=>"ng",
129
- # "Ō"=>"O", "ō"=>"o", "Ŏ"=>"O", "ŏ"=>"o", "Ő"=>"O", "ő"=>"o", "Œ"=>"OE",
130
- # "œ"=>"oe", "Ŕ"=>"R", "ŕ"=>"r", "Ŗ"=>"R", "ŗ"=>"r", "Ř"=>"R", "ř"=>"r",
131
- # "Ś"=>"S", "ś"=>"s", "Ŝ"=>"S", "ŝ"=>"s", "Ş"=>"S", "ş"=>"s", "Š"=>"S",
132
- # "š"=>"s", "Ţ"=>"T", "ţ"=>"t", "Ť"=>"T", "ť"=>"t", "Ŧ"=>"T", "ŧ"=>"t",
133
- # "Ũ"=>"U", "ũ"=>"u", "Ū"=>"U", "ū"=>"u", "Ŭ"=>"U", "ŭ"=>"u", "Ů"=>"U",
134
- # "ů"=>"u", "Ű"=>"U", "ű"=>"u", "Ų"=>"U", "ų"=>"u", "Ŵ"=>"W", "ŵ"=>"w",
135
- # "Ŷ"=>"Y", "ŷ"=>"y", "Ÿ"=>"Y", "Ź"=>"Z", "ź"=>"z", "Ż"=>"Z", "ż"=>"z",
136
- # "Ž"=>"Z", "ž"=>"z"},
137
- # :german => {"ü"=>"ue", "ö"=>"oe", "ä"=>"ae"},
138
- # :spanish => {"Ñ"=>"Nn", "ñ"=>"nn"}}
139
- def self.dump_approximations
140
- Hash[APPROXIMATIONS.map do |name, approx|
141
- [name, Hash[approx.map {|key, value| [[key].pack("U*"), [value].flatten.pack("U*")]}]]
142
- end]
143
- end
144
-
145
-
146
- # @param string [String] The string to use as the basis of the SlugString.
147
- def initialize(string)
148
- super string.to_s
149
- tidy_bytes!
150
- end
151
-
152
- # Approximate an ASCII string. This works only for Western strings using
153
- # characters that are Roman-alphabet characters + diacritics. Non-letter
154
- # characters are left unmodified.
155
- #
156
- # string = SlugString.new "Łódź, Poland"
157
- # string.approximate_ascii # => "Lodz, Poland"
158
- # string = SlugString.new "日本"
159
- # string.approximate_ascii # => "日本"
160
- #
161
- # You can pass any key(s) from {APPROXIMATIONS} as arguments. This allows
162
- # for contextual approximations. By default; +:spanish+ and +:german+ are
163
- # provided:
164
- #
165
- # string = SlugString.new "Jürgen Müller"
166
- # string.approximate_ascii # => "Jurgen Muller"
167
- # string.approximate_ascii :german # => "Juergen Mueller"
168
- # string = SlugString.new "¡Feliz año!"
169
- # string.approximate_ascii # => "¡Feliz ano!"
170
- # string.approximate_ascii :spanish # => "¡Feliz anno!"
171
- #
172
- # You can modify the built-in approximations, or add your own:
173
- #
174
- # # Make Spanish use "nh" rather than "nn"
175
- # FriendlyId::SlugString::APPROXIMATIONS[:spanish] = {
176
- # # Ñ => "Nh"
177
- # 209 => [78, 104],
178
- # # ñ => "nh"
179
- # 241 => [110, 104]
180
- # }
181
- #
182
- # It's also possible to use a custom approximation for all strings:
183
- #
184
- # FriendlyId::SlugString.approximations << :german
185
- #
186
- # Notice that this method does not simply convert to ASCII; if you want
187
- # to remove non-ASCII characters such as "¡" and "¿", use {#to_ascii!}:
188
- #
189
- # string.approximate_ascii!(:spanish) # => "¡Feliz anno!"
190
- # string.to_ascii! # => "Feliz anno!"
191
- # @param *args <Symbol>
192
- # @return String
193
- def approximate_ascii!(*args)
194
- @maps = (self.class.approximations + args.flatten + [:common]).flatten.uniq
195
- @wrapped_string = normalize_utf8(:c).unpack("U*").map { |char| approx_char(char) }.flatten.pack("U*")
196
- end
197
-
198
- # Removes leading and trailing spaces or dashses, and replaces multiple
199
- # whitespace characters with a single space.
200
- # @return String
201
- def clean!
202
- @wrapped_string = @wrapped_string.gsub(/\A\-|\-\z/, "").gsub(/\s+/u, " ").strip
203
- end
204
-
205
- # Lowercases the string. Note that this works for Unicode strings,
206
- # though your milage may vary with Greek and Turkic strings.
207
- # @return String
208
- def downcase!
209
- @wrapped_string = apply_mapping :lowercase_mapping
210
- end
211
-
212
- if defined? ActiveSupport::Multibyte::Unicode
213
- def apply_mapping(*args)
214
- ActiveSupport::Multibyte::Unicode.apply_mapping(@wrapped_string, *args)
215
- end
216
- end
217
-
218
- # Remove any non-word characters.
219
- # @return String
220
- def word_chars!
221
- @wrapped_string = normalize_utf8(:c).unpack("U*").map { |char|
222
- case char
223
- # control chars
224
- when 0..31
225
- # punctuation; 45 is "-" (HYPHEN-MINUS) and allowed
226
- when 33..44
227
- # more puncuation
228
- when 46..47
229
- # more puncuation and other symbols
230
- when 58..64
231
- # brackets and other symbols
232
- when 91..96
233
- # braces, pipe, tilde, etc.
234
- when 123..191
235
- else char
236
- end
237
- }.compact.pack("U*")
238
- end
239
-
240
- # Normalize the string for a given {FriendlyId::Configuration}.
241
- # @param config [FriendlyId::Configuration]
242
- # @return String
243
- def normalize_for!(config)
244
- approximate_ascii!(config.ascii_approximation_options) if config.approximate_ascii?
245
- to_ascii! if config.strip_non_ascii?
246
- normalize!
247
- end
248
-
249
- alias :normalize_utf8 :normalize rescue NoMethodError
250
-
251
- # Normalize the string for use as a FriendlyId. Note that in
252
- # this context, +normalize+ means, strip, remove non-letters/numbers,
253
- # downcasing and converting whitespace to dashes.
254
- # ActiveSupport::Multibyte::Chars#normalize is aliased to +normalize_utf8+
255
- # in this subclass.
256
- # @return String
257
- def normalize!
258
- clean!
259
- word_chars!
260
- downcase!
261
- with_dashes!
262
- end
263
-
264
- # Attempt to replace invalid UTF-8 bytes with valid ones. This method
265
- # naively assumes if you have invalid UTF8 bytes, they are either Windows
266
- # CP-1252 or ISO8859-1. In practice this isn't a bad assumption, but may not
267
- # always work.
268
- #
269
- # Passing +true+ will forcibly tidy all bytes, assuming that the string's
270
- # encoding is CP-1252 or ISO-8859-1.
271
- def tidy_bytes!(force = false)
272
-
273
- if force
274
- @wrapped_string = @wrapped_string.unpack("C*").map do |b|
275
- tidy_byte(b)
276
- end.flatten.compact.pack("C*").unpack("U*").pack("U*")
277
- end
278
-
279
- bytes = @wrapped_string.unpack("C*")
280
- conts_expected = 0
281
- last_lead = 0
282
-
283
- bytes.each_index do |i|
284
-
285
- byte = bytes[i]
286
- is_ascii = byte < 128
287
- is_cont = byte > 127 && byte < 192
288
- is_lead = byte > 191 && byte < 245
289
- is_unused = byte > 240
290
- is_restricted = byte > 244
291
-
292
- # Impossible or highly unlikely byte? Clean it.
293
- if is_unused || is_restricted
294
- bytes[i] = tidy_byte(byte)
295
- elsif is_cont
296
- # Not expecting contination byte? Clean up. Otherwise, now expect one less.
297
- conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
298
- else
299
- if conts_expected > 0
300
- # Expected continuation, but got ASCII or leading? Clean backwards up to
301
- # the leading byte.
302
- (1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
303
- conts_expected = 0
304
- end
305
- if is_lead
306
- # Final byte is leading? Clean it.
307
- if i == bytes.length - 1
308
- bytes[i] = tidy_byte(bytes.last)
309
- else
310
- # Valid leading byte? Expect continuations determined by position of
311
- # first zero bit, with max of 3.
312
- conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
313
- last_lead = i
314
- end
315
- end
316
- end
317
- end
318
- @wrapped_string = bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
319
- end
320
-
321
- # Delete any non-ascii characters.
322
- # @return String
323
- def to_ascii!
324
- if ">= 1.9".respond_to?(:force_encoding)
325
- @wrapped_string.encode!("ASCII", :invalid => :replace, :undef => :replace,
326
- :replace => "")
327
- else
328
- @wrapped_string = tidy_bytes.normalize_utf8(:c).unpack("U*").reject {|char| char > 127}.pack("U*")
329
- end
330
- end
331
-
332
- # Truncate the string to +max+ length.
333
- # @return String
334
- def truncate!(max)
335
- @wrapped_string = self[0...max].to_s if length > max
336
- end
337
-
338
- # Upper-cases the string. Note that this works for Unicode strings,
339
- # though your milage may vary with Greek and Turkic strings.
340
- # @return String
341
- def upcase!
342
- @wrapped_string = apply_mapping :uppercase_mapping
343
- end
344
-
345
- # Validate that the slug string is not blank or reserved, and truncate
346
- # it to the max length if necessary.
347
- # @param config [FriendlyId::Configuration]
348
- # @return String
349
- # @raise FriendlyId::BlankError
350
- # @raise FriendlyId::ReservedError
351
- def validate_for!(config)
352
- truncate!(config.max_length)
353
- raise FriendlyId::BlankError if blank?
354
- raise FriendlyId::ReservedError if config.reserved?(self)
355
- self
356
- end
357
-
358
- # Replaces whitespace with dashes ("-").
359
- # @return String
360
- def with_dashes!
361
- @wrapped_string = @wrapped_string.gsub(/[\s\-]+/u, "-")
362
- end
363
-
364
- %w[approximate_ascii clean downcase word_chars normalize normalize_for tidy_bytes
365
- to_ascii truncate upcase with_dashes].each do |method|
366
- class_eval(<<-EOM)
367
- def #{method}(*args)
368
- send_to_new_instance(:#{method}!, *args)
369
- end
370
- EOM
371
- end
372
-
373
- private
374
-
375
- # Look up the character's approximation in the configured maps.
376
- def approx_char(char)
377
- @maps.each do |map|
378
- if new_char = APPROXIMATIONS[map][char]
379
- return new_char
380
- end
381
- end
382
- char
383
- end
384
-
385
- # Used as the basis of the bangless methods.
386
- def send_to_new_instance(*args)
387
- string = SlugString.new self
388
- string.send(*args)
389
- string
390
- end
391
-
392
- def tidy_byte(byte)
393
- byte < 160 ? CP1252[byte] : byte < 192 ? [194, byte] : [195, byte - 64]
394
- end
395
-
4
+ class SlugString < Babosa::SlugString
5
+ # Normalize the string for a given {FriendlyId::Configuration}.
6
+ # @param config [FriendlyId::Configuration]
7
+ # @return String
8
+ def normalize_for!(config)
9
+ approximate_ascii!(config.ascii_approximation_options) if config.approximate_ascii?
10
+ to_ascii! if config.strip_non_ascii?
11
+ normalize!
12
+ end
13
+
14
+ # Validate that the slug string is not blank or reserved, and truncate
15
+ # it to the max length if necessary.
16
+ # @param config [FriendlyId::Configuration]
17
+ # @return String
18
+ # @raise FriendlyId::BlankError
19
+ # @raise FriendlyId::ReservedError
20
+ def validate_for!(config)
21
+ truncate_bytes!(config.max_length)
22
+ raise FriendlyId::BlankError if blank?
23
+ raise FriendlyId::ReservedError if config.reserved?(self)
24
+ self
25
+ end
396
26
  end
397
27
  end
@@ -54,8 +54,12 @@ module FriendlyId
54
54
  end
55
55
 
56
56
  # The friendly id.
57
- def friendly_id
58
- slug.to_friendly_id if slug?
57
+ # @param
58
+ def friendly_id(skip_cache = false)
59
+ if friendly_id_config.cache_column? && !skip_cache
60
+ friendly_id = send(friendly_id_config.cache_column)
61
+ end
62
+ friendly_id || (slug.to_friendly_id if slug?)
59
63
  end
60
64
 
61
65
  # Clean up the string before setting it as the friendly_id. You can override
@@ -98,4 +102,4 @@ module FriendlyId
98
102
  end
99
103
  end
100
104
  end
101
- end
105
+ end
@@ -212,9 +212,8 @@ module FriendlyId
212
212
  klass.friendly_id_config.stubs(:approximate_ascii?).returns(true)
213
213
  klass.friendly_id_config.stubs(:ascii_approximation_options).returns(:spanish)
214
214
  instance = klass.send(create_method, :name => "Cañón")
215
- assert_equal "cannon", instance.friendly_id
215
+ assert_equal "canion", instance.friendly_id
216
216
  end
217
-
218
217
  end
219
218
 
220
219
  # Tests for FriendlyId::Status.