friendly_id 3.0.6 → 3.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,397 +1,27 @@
1
1
  # encoding: utf-8
2
2
  module FriendlyId
3
3
 
4
- # This class provides some string-manipulation methods specific to slugs.
5
- # Its Unicode support is provided by ActiveSupport::Multibyte::Chars; this
6
- # is needed primarily for Unicode encoding normalization and proper
7
- # calculation of string lengths.
8
- #
9
- # Note that this class includes many "bang methods" such as {#clean!} and {#normalize!}
10
- # that perform actions on the string in-place. Each of these methods has a
11
- # corresponding "bangless" method (i.e., +SlugString#clean!+ and +SlugString#clean+)
12
- # which does not appear in the documentation because it is generated dynamically.
13
- #
14
- # All of the bang methods return an instance of String, while the bangless
15
- # versions return an instance of FriendlyId::SlugString, so that calls to
16
- # methods specific to this class can be chained:
17
- #
18
- # string = SlugString.new("hello world")
19
- # string.with_dashes! # => "hello-world"
20
- # string.with_dashes # => <FriendlyId::SlugString:0x000001013e1590 @wrapped_string="hello-world">
21
- #
22
- # @see http://www.utf8-chartable.de/unicode-utf8-table.pl?utf8=dec Unicode character table
23
- # @see FriendlyId::SlugString::dump_approximations
24
- class SlugString < ActiveSupport::Multibyte::Chars
25
-
26
- # All values are Unicode decimal characters or character arrays.
27
- APPROXIMATIONS = {
28
- :common => Hash[
29
- 192, 65, 193, 65, 194, 65, 195, 65, 196, 65, 197, 65, 198, [65, 69],
30
- 199, 67, 200, 69, 201, 69, 202, 69, 203, 69, 204, 73, 205, 73, 206,
31
- 73, 207, 73, 208, 68, 209, 78, 210, 79, 211, 79, 212, 79, 213, 79,
32
- 214, 79, 215, 120, 216, 79, 217, 85, 218, 85, 219, 85, 220, 85, 221,
33
- 89, 222, [84, 104], 223, [115, 115], 224, 97, 225, 97, 226, 97, 227,
34
- 97, 228, 97, 229, 97, 230, [97, 101], 231, 99, 232, 101, 233, 101,
35
- 234, 101, 235, 101, 236, 105, 237, 105, 238, 105, 239, 105, 240, 100,
36
- 241, 110, 242, 111, 243, 111, 244, 111, 245, 111, 246, 111, 248, 111,
37
- 249, 117, 250, 117, 251, 117, 252, 117, 253, 121, 254, [116, 104],
38
- 255, 121, 256, 65, 257, 97, 258, 65, 259, 97, 260, 65, 261, 97, 262,
39
- 67, 263, 99, 264, 67, 265, 99, 266, 67, 267, 99, 268, 67, 269, 99,
40
- 270, 68, 271, 100, 272, 68, 273, 100, 274, 69, 275, 101, 276, 69, 277,
41
- 101, 278, 69, 279, 101, 280, 69, 281, 101, 282, 69, 283, 101, 284, 71,
42
- 285, 103, 286, 71, 287, 103, 288, 71, 289, 103, 290, 71, 291, 103,
43
- 292, 72, 293, 104, 294, 72, 295, 104, 296, 73, 297, 105, 298, 73, 299,
44
- 105, 300, 73, 301, 105, 302, 73, 303, 105, 304, 73, 305, 105, 306,
45
- [73, 74], 307, [105, 106], 308, 74, 309, 106, 310, 75, 311, 107, 312,
46
- 107, 313, 76, 314, 108, 315, 76, 316, 108, 317, 76, 318, 108, 319, 76,
47
- 320, 108, 321, 76, 322, 108, 323, 78, 324, 110, 325, 78, 326, 110,
48
- 327, 78, 328, 110, 329, [39, 110], 330, [78, 71], 331, [110, 103],
49
- 332, 79, 333, 111, 334, 79, 335, 111, 336, 79, 337, 111, 338, [79,
50
- 69], 339, [111, 101], 340, 82, 341, 114, 342, 82, 343, 114, 344, 82,
51
- 345, 114, 346, 83, 347, 115, 348, 83, 349, 115, 350, 83, 351, 115,
52
- 352, 83, 353, 115, 354, 84, 355, 116, 356, 84, 357, 116, 358, 84, 359,
53
- 116, 360, 85, 361, 117, 362, 85, 363, 117, 364, 85, 365, 117, 366, 85,
54
- 367, 117, 368, 85, 369, 117, 370, 85, 371, 117, 372, 87, 373, 119,
55
- 374, 89, 375, 121, 376, 89, 377, 90, 378, 122, 379, 90, 380, 122, 381,
56
- 90, 382, 122
57
- ].freeze,
58
- :german => Hash[252, [117, 101], 246, [111, 101], 228, [97, 101]],
59
- :spanish => Hash[209, [78, 110], 241, [110, 110]]
60
- }
61
-
62
- # CP-1252 decimal byte => UTF-8 approximation as an array of bytes
63
- CP1252 = {
64
- 128 => [226, 130, 172],
65
- 129 => nil,
66
- 130 => [226, 128, 154],
67
- 131 => [198, 146],
68
- 132 => [226, 128, 158],
69
- 133 => [226, 128, 166],
70
- 134 => [226, 128, 160],
71
- 135 => [226, 128, 161],
72
- 136 => [203, 134],
73
- 137 => [226, 128, 176],
74
- 138 => [197, 160],
75
- 139 => [226, 128, 185],
76
- 140 => [197, 146],
77
- 141 => nil,
78
- 142 => [197, 189],
79
- 143 => nil,
80
- 144 => nil,
81
- 145 => [226, 128, 152],
82
- 146 => [226, 128, 153],
83
- 147 => [226, 128, 156],
84
- 148 => [226, 128, 157],
85
- 149 => [226, 128, 162],
86
- 150 => [226, 128, 147],
87
- 151 => [226, 128, 148],
88
- 152 => [203, 156],
89
- 153 => [226, 132, 162],
90
- 154 => [197, 161],
91
- 155 => [226, 128, 186],
92
- 156 => [197, 147],
93
- 157 => nil,
94
- 158 => [197, 190],
95
- 159 => [197, 184]
96
- }
97
-
98
- cattr_accessor :approximations
99
- self.approximations = []
100
-
101
- # This method can be used by developers wishing to debug the
102
- # {APPROXIMATIONS} hashes, which are written in a hard-to-read format.
103
- # @return Hash
104
- # @example
105
- #
106
- # > ruby -rrubygems -rlib/friendly_id -e 'p FriendlyId::SlugString.dump_approximations'
107
- #
108
- # {:common =>
109
- # {"À"=>"A", "Á"=>"A", "Â"=>"A", "Ã"=>"A", "Ä"=>"A", "Å"=>"A", "Æ"=>"AE",
110
- # "Ç"=>"C", "È"=>"E", "É"=>"E", "Ê"=>"E", "Ë"=>"E", "Ì"=>"I", "Í"=>"I",
111
- # "Î"=>"I", "Ï"=>"I", "Ð"=>"D", "Ñ"=>"N", "Ò"=>"O", "Ó"=>"O", "Ô"=>"O",
112
- # "Õ"=>"O", "Ö"=>"O", "×"=>"x", "Ø"=>"O", "Ù"=>"U", "Ú"=>"U", "Û"=>"U",
113
- # "Ü"=>"U", "Ý"=>"Y", "Þ"=>"Th", "ß"=>"ss", "à"=>"a", "á"=>"a", "â"=>"a",
114
- # "ã"=>"a", "ä"=>"a", "å"=>"a", "æ"=>"ae", "ç"=>"c", "è"=>"e", "é"=>"e",
115
- # "ê"=>"e", "ë"=>"e", "ì"=>"i", "í"=>"i", "î"=>"i", "ï"=>"i", "ð"=>"d",
116
- # "ñ"=>"n", "ò"=>"o", "ó"=>"o", "ô"=>"o", "õ"=>"o", "ö"=>"o", "ø"=>"o",
117
- # "ù"=>"u", "ú"=>"u", "û"=>"u", "ü"=>"u", "ý"=>"y", "þ"=>"th", "ÿ"=>"y",
118
- # "Ā"=>"A", "ā"=>"a", "Ă"=>"A", "ă"=>"a", "Ą"=>"A", "ą"=>"a", "Ć"=>"C",
119
- # "ć"=>"c", "Ĉ"=>"C", "ĉ"=>"c", "Ċ"=>"C", "ċ"=>"c", "Č"=>"C", "č"=>"c",
120
- # "Ď"=>"D", "ď"=>"d", "Đ"=>"D", "đ"=>"d", "Ē"=>"E", "ē"=>"e", "Ĕ"=>"E",
121
- # "ĕ"=>"e", "Ė"=>"E", "ė"=>"e", "Ę"=>"E", "ę"=>"e", "Ě"=>"E", "ě"=>"e",
122
- # "Ĝ"=>"G", "ĝ"=>"g", "Ğ"=>"G", "ğ"=>"g", "Ġ"=>"G", "ġ"=>"g", "Ģ"=>"G",
123
- # "ģ"=>"g", "Ĥ"=>"H", "ĥ"=>"h", "Ħ"=>"H", "ħ"=>"h", "Ĩ"=>"I", "ĩ"=>"i",
124
- # "Ī"=>"I", "ī"=>"i", "Ĭ"=>"I", "ĭ"=>"i", "Į"=>"I", "į"=>"i", "İ"=>"I",
125
- # "ı"=>"i", "IJ"=>"IJ", "ij"=>"ij", "Ĵ"=>"J", "ĵ"=>"j", "Ķ"=>"K", "ķ"=>"k",
126
- # "ĸ"=>"k", "Ĺ"=>"L", "ĺ"=>"l", "Ļ"=>"L", "ļ"=>"l", "Ľ"=>"L", "ľ"=>"l",
127
- # "Ŀ"=>"L", "ŀ"=>"l", "Ł"=>"L", "ł"=>"l", "Ń"=>"N", "ń"=>"n", "Ņ"=>"N",
128
- # "ņ"=>"n", "Ň"=>"N", "ň"=>"n", "ʼn"=>"'n", "Ŋ"=>"NG", "ŋ"=>"ng",
129
- # "Ō"=>"O", "ō"=>"o", "Ŏ"=>"O", "ŏ"=>"o", "Ő"=>"O", "ő"=>"o", "Œ"=>"OE",
130
- # "œ"=>"oe", "Ŕ"=>"R", "ŕ"=>"r", "Ŗ"=>"R", "ŗ"=>"r", "Ř"=>"R", "ř"=>"r",
131
- # "Ś"=>"S", "ś"=>"s", "Ŝ"=>"S", "ŝ"=>"s", "Ş"=>"S", "ş"=>"s", "Š"=>"S",
132
- # "š"=>"s", "Ţ"=>"T", "ţ"=>"t", "Ť"=>"T", "ť"=>"t", "Ŧ"=>"T", "ŧ"=>"t",
133
- # "Ũ"=>"U", "ũ"=>"u", "Ū"=>"U", "ū"=>"u", "Ŭ"=>"U", "ŭ"=>"u", "Ů"=>"U",
134
- # "ů"=>"u", "Ű"=>"U", "ű"=>"u", "Ų"=>"U", "ų"=>"u", "Ŵ"=>"W", "ŵ"=>"w",
135
- # "Ŷ"=>"Y", "ŷ"=>"y", "Ÿ"=>"Y", "Ź"=>"Z", "ź"=>"z", "Ż"=>"Z", "ż"=>"z",
136
- # "Ž"=>"Z", "ž"=>"z"},
137
- # :german => {"ü"=>"ue", "ö"=>"oe", "ä"=>"ae"},
138
- # :spanish => {"Ñ"=>"Nn", "ñ"=>"nn"}}
139
- def self.dump_approximations
140
- Hash[APPROXIMATIONS.map do |name, approx|
141
- [name, Hash[approx.map {|key, value| [[key].pack("U*"), [value].flatten.pack("U*")]}]]
142
- end]
143
- end
144
-
145
-
146
- # @param string [String] The string to use as the basis of the SlugString.
147
- def initialize(string)
148
- super string.to_s
149
- tidy_bytes!
150
- end
151
-
152
- # Approximate an ASCII string. This works only for Western strings using
153
- # characters that are Roman-alphabet characters + diacritics. Non-letter
154
- # characters are left unmodified.
155
- #
156
- # string = SlugString.new "Łódź, Poland"
157
- # string.approximate_ascii # => "Lodz, Poland"
158
- # string = SlugString.new "日本"
159
- # string.approximate_ascii # => "日本"
160
- #
161
- # You can pass any key(s) from {APPROXIMATIONS} as arguments. This allows
162
- # for contextual approximations. By default; +:spanish+ and +:german+ are
163
- # provided:
164
- #
165
- # string = SlugString.new "Jürgen Müller"
166
- # string.approximate_ascii # => "Jurgen Muller"
167
- # string.approximate_ascii :german # => "Juergen Mueller"
168
- # string = SlugString.new "¡Feliz año!"
169
- # string.approximate_ascii # => "¡Feliz ano!"
170
- # string.approximate_ascii :spanish # => "¡Feliz anno!"
171
- #
172
- # You can modify the built-in approximations, or add your own:
173
- #
174
- # # Make Spanish use "nh" rather than "nn"
175
- # FriendlyId::SlugString::APPROXIMATIONS[:spanish] = {
176
- # # Ñ => "Nh"
177
- # 209 => [78, 104],
178
- # # ñ => "nh"
179
- # 241 => [110, 104]
180
- # }
181
- #
182
- # It's also possible to use a custom approximation for all strings:
183
- #
184
- # FriendlyId::SlugString.approximations << :german
185
- #
186
- # Notice that this method does not simply convert to ASCII; if you want
187
- # to remove non-ASCII characters such as "¡" and "¿", use {#to_ascii!}:
188
- #
189
- # string.approximate_ascii!(:spanish) # => "¡Feliz anno!"
190
- # string.to_ascii! # => "Feliz anno!"
191
- # @param *args <Symbol>
192
- # @return String
193
- def approximate_ascii!(*args)
194
- @maps = (self.class.approximations + args.flatten + [:common]).flatten.uniq
195
- @wrapped_string = normalize_utf8(:c).unpack("U*").map { |char| approx_char(char) }.flatten.pack("U*")
196
- end
197
-
198
- # Removes leading and trailing spaces or dashses, and replaces multiple
199
- # whitespace characters with a single space.
200
- # @return String
201
- def clean!
202
- @wrapped_string = @wrapped_string.gsub(/\A\-|\-\z/, "").gsub(/\s+/u, " ").strip
203
- end
204
-
205
- # Lowercases the string. Note that this works for Unicode strings,
206
- # though your milage may vary with Greek and Turkic strings.
207
- # @return String
208
- def downcase!
209
- @wrapped_string = apply_mapping :lowercase_mapping
210
- end
211
-
212
- if defined? ActiveSupport::Multibyte::Unicode
213
- def apply_mapping(*args)
214
- ActiveSupport::Multibyte::Unicode.apply_mapping(@wrapped_string, *args)
215
- end
216
- end
217
-
218
- # Remove any non-word characters.
219
- # @return String
220
- def word_chars!
221
- @wrapped_string = normalize_utf8(:c).unpack("U*").map { |char|
222
- case char
223
- # control chars
224
- when 0..31
225
- # punctuation; 45 is "-" (HYPHEN-MINUS) and allowed
226
- when 33..44
227
- # more puncuation
228
- when 46..47
229
- # more puncuation and other symbols
230
- when 58..64
231
- # brackets and other symbols
232
- when 91..96
233
- # braces, pipe, tilde, etc.
234
- when 123..191
235
- else char
236
- end
237
- }.compact.pack("U*")
238
- end
239
-
240
- # Normalize the string for a given {FriendlyId::Configuration}.
241
- # @param config [FriendlyId::Configuration]
242
- # @return String
243
- def normalize_for!(config)
244
- approximate_ascii!(config.ascii_approximation_options) if config.approximate_ascii?
245
- to_ascii! if config.strip_non_ascii?
246
- normalize!
247
- end
248
-
249
- alias :normalize_utf8 :normalize rescue NoMethodError
250
-
251
- # Normalize the string for use as a FriendlyId. Note that in
252
- # this context, +normalize+ means, strip, remove non-letters/numbers,
253
- # downcasing and converting whitespace to dashes.
254
- # ActiveSupport::Multibyte::Chars#normalize is aliased to +normalize_utf8+
255
- # in this subclass.
256
- # @return String
257
- def normalize!
258
- clean!
259
- word_chars!
260
- downcase!
261
- with_dashes!
262
- end
263
-
264
- # Attempt to replace invalid UTF-8 bytes with valid ones. This method
265
- # naively assumes if you have invalid UTF8 bytes, they are either Windows
266
- # CP-1252 or ISO8859-1. In practice this isn't a bad assumption, but may not
267
- # always work.
268
- #
269
- # Passing +true+ will forcibly tidy all bytes, assuming that the string's
270
- # encoding is CP-1252 or ISO-8859-1.
271
- def tidy_bytes!(force = false)
272
-
273
- if force
274
- @wrapped_string = @wrapped_string.unpack("C*").map do |b|
275
- tidy_byte(b)
276
- end.flatten.compact.pack("C*").unpack("U*").pack("U*")
277
- end
278
-
279
- bytes = @wrapped_string.unpack("C*")
280
- conts_expected = 0
281
- last_lead = 0
282
-
283
- bytes.each_index do |i|
284
-
285
- byte = bytes[i]
286
- is_ascii = byte < 128
287
- is_cont = byte > 127 && byte < 192
288
- is_lead = byte > 191 && byte < 245
289
- is_unused = byte > 240
290
- is_restricted = byte > 244
291
-
292
- # Impossible or highly unlikely byte? Clean it.
293
- if is_unused || is_restricted
294
- bytes[i] = tidy_byte(byte)
295
- elsif is_cont
296
- # Not expecting contination byte? Clean up. Otherwise, now expect one less.
297
- conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
298
- else
299
- if conts_expected > 0
300
- # Expected continuation, but got ASCII or leading? Clean backwards up to
301
- # the leading byte.
302
- (1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
303
- conts_expected = 0
304
- end
305
- if is_lead
306
- # Final byte is leading? Clean it.
307
- if i == bytes.length - 1
308
- bytes[i] = tidy_byte(bytes.last)
309
- else
310
- # Valid leading byte? Expect continuations determined by position of
311
- # first zero bit, with max of 3.
312
- conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
313
- last_lead = i
314
- end
315
- end
316
- end
317
- end
318
- @wrapped_string = bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
319
- end
320
-
321
- # Delete any non-ascii characters.
322
- # @return String
323
- def to_ascii!
324
- if ">= 1.9".respond_to?(:force_encoding)
325
- @wrapped_string.encode!("ASCII", :invalid => :replace, :undef => :replace,
326
- :replace => "")
327
- else
328
- @wrapped_string = tidy_bytes.normalize_utf8(:c).unpack("U*").reject {|char| char > 127}.pack("U*")
329
- end
330
- end
331
-
332
- # Truncate the string to +max+ length.
333
- # @return String
334
- def truncate!(max)
335
- @wrapped_string = self[0...max].to_s if length > max
336
- end
337
-
338
- # Upper-cases the string. Note that this works for Unicode strings,
339
- # though your milage may vary with Greek and Turkic strings.
340
- # @return String
341
- def upcase!
342
- @wrapped_string = apply_mapping :uppercase_mapping
343
- end
344
-
345
- # Validate that the slug string is not blank or reserved, and truncate
346
- # it to the max length if necessary.
347
- # @param config [FriendlyId::Configuration]
348
- # @return String
349
- # @raise FriendlyId::BlankError
350
- # @raise FriendlyId::ReservedError
351
- def validate_for!(config)
352
- truncate!(config.max_length)
353
- raise FriendlyId::BlankError if blank?
354
- raise FriendlyId::ReservedError if config.reserved?(self)
355
- self
356
- end
357
-
358
- # Replaces whitespace with dashes ("-").
359
- # @return String
360
- def with_dashes!
361
- @wrapped_string = @wrapped_string.gsub(/[\s\-]+/u, "-")
362
- end
363
-
364
- %w[approximate_ascii clean downcase word_chars normalize normalize_for tidy_bytes
365
- to_ascii truncate upcase with_dashes].each do |method|
366
- class_eval(<<-EOM)
367
- def #{method}(*args)
368
- send_to_new_instance(:#{method}!, *args)
369
- end
370
- EOM
371
- end
372
-
373
- private
374
-
375
- # Look up the character's approximation in the configured maps.
376
- def approx_char(char)
377
- @maps.each do |map|
378
- if new_char = APPROXIMATIONS[map][char]
379
- return new_char
380
- end
381
- end
382
- char
383
- end
384
-
385
- # Used as the basis of the bangless methods.
386
- def send_to_new_instance(*args)
387
- string = SlugString.new self
388
- string.send(*args)
389
- string
390
- end
391
-
392
- def tidy_byte(byte)
393
- byte < 160 ? CP1252[byte] : byte < 192 ? [194, byte] : [195, byte - 64]
394
- end
395
-
4
+ class SlugString < Babosa::SlugString
5
+ # Normalize the string for a given {FriendlyId::Configuration}.
6
+ # @param config [FriendlyId::Configuration]
7
+ # @return String
8
+ def normalize_for!(config)
9
+ approximate_ascii!(config.ascii_approximation_options) if config.approximate_ascii?
10
+ to_ascii! if config.strip_non_ascii?
11
+ normalize!
12
+ end
13
+
14
+ # Validate that the slug string is not blank or reserved, and truncate
15
+ # it to the max length if necessary.
16
+ # @param config [FriendlyId::Configuration]
17
+ # @return String
18
+ # @raise FriendlyId::BlankError
19
+ # @raise FriendlyId::ReservedError
20
+ def validate_for!(config)
21
+ truncate_bytes!(config.max_length)
22
+ raise FriendlyId::BlankError if blank?
23
+ raise FriendlyId::ReservedError if config.reserved?(self)
24
+ self
25
+ end
396
26
  end
397
27
  end
@@ -54,8 +54,12 @@ module FriendlyId
54
54
  end
55
55
 
56
56
  # The friendly id.
57
- def friendly_id
58
- slug.to_friendly_id if slug?
57
+ # @param
58
+ def friendly_id(skip_cache = false)
59
+ if friendly_id_config.cache_column? && !skip_cache
60
+ friendly_id = send(friendly_id_config.cache_column)
61
+ end
62
+ friendly_id || (slug.to_friendly_id if slug?)
59
63
  end
60
64
 
61
65
  # Clean up the string before setting it as the friendly_id. You can override
@@ -98,4 +102,4 @@ module FriendlyId
98
102
  end
99
103
  end
100
104
  end
101
- end
105
+ end
@@ -212,9 +212,8 @@ module FriendlyId
212
212
  klass.friendly_id_config.stubs(:approximate_ascii?).returns(true)
213
213
  klass.friendly_id_config.stubs(:ascii_approximation_options).returns(:spanish)
214
214
  instance = klass.send(create_method, :name => "Cañón")
215
- assert_equal "cannon", instance.friendly_id
215
+ assert_equal "canion", instance.friendly_id
216
216
  end
217
-
218
217
  end
219
218
 
220
219
  # Tests for FriendlyId::Status.