activesupport 3.0.0.beta3 → 3.0.0.beta4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of activesupport might be problematic. Click here for more details.

Files changed (63) hide show
  1. data/CHANGELOG +57 -0
  2. data/lib/active_support/builder.rb +6 -0
  3. data/lib/active_support/cache.rb +428 -70
  4. data/lib/active_support/cache/compressed_mem_cache_store.rb +6 -15
  5. data/lib/active_support/cache/file_store.rb +139 -41
  6. data/lib/active_support/cache/mem_cache_store.rb +115 -76
  7. data/lib/active_support/cache/memory_store.rb +127 -27
  8. data/lib/active_support/cache/strategy/local_cache.rb +109 -57
  9. data/lib/active_support/cache/synchronized_memory_store.rb +2 -38
  10. data/lib/active_support/callbacks.rb +27 -27
  11. data/lib/active_support/configurable.rb +19 -18
  12. data/lib/active_support/core_ext/array/conversions.rb +30 -26
  13. data/lib/active_support/core_ext/array/random_access.rb +19 -5
  14. data/lib/active_support/core_ext/benchmark.rb +0 -12
  15. data/lib/active_support/core_ext/class/attribute.rb +1 -4
  16. data/lib/active_support/core_ext/class/inheritable_attributes.rb +3 -0
  17. data/lib/active_support/core_ext/date/calculations.rb +27 -8
  18. data/lib/active_support/core_ext/date/conversions.rb +1 -0
  19. data/lib/active_support/core_ext/date_time/conversions.rb +9 -3
  20. data/lib/active_support/core_ext/file.rb +1 -0
  21. data/lib/active_support/core_ext/hash/conversions.rb +14 -137
  22. data/lib/active_support/core_ext/kernel/debugger.rb +1 -1
  23. data/lib/active_support/core_ext/kernel/reporting.rb +2 -1
  24. data/lib/active_support/core_ext/load_error.rb +1 -0
  25. data/lib/active_support/core_ext/logger.rb +1 -1
  26. data/lib/active_support/core_ext/module/attr_internal.rb +2 -2
  27. data/lib/active_support/core_ext/object/to_param.rb +2 -2
  28. data/lib/active_support/core_ext/object/with_options.rb +2 -0
  29. data/lib/active_support/core_ext/string.rb +1 -0
  30. data/lib/active_support/core_ext/string/conversions.rb +35 -1
  31. data/lib/active_support/core_ext/string/encoding.rb +11 -0
  32. data/lib/active_support/core_ext/string/filters.rb +29 -0
  33. data/lib/active_support/core_ext/string/inflections.rb +0 -11
  34. data/lib/active_support/core_ext/string/interpolation.rb +1 -0
  35. data/lib/active_support/core_ext/string/multibyte.rb +16 -19
  36. data/lib/active_support/core_ext/time/calculations.rb +7 -6
  37. data/lib/active_support/core_ext/uri.rb +8 -3
  38. data/lib/active_support/dependencies.rb +33 -1
  39. data/lib/active_support/duration.rb +1 -0
  40. data/lib/active_support/hash_with_indifferent_access.rb +5 -1
  41. data/lib/active_support/i18n.rb +7 -2
  42. data/lib/active_support/inflector/transliterate.rb +58 -38
  43. data/lib/active_support/json/encoding.rb +28 -5
  44. data/lib/active_support/lazy_load_hooks.rb +14 -4
  45. data/lib/active_support/locale/en.yml +4 -1
  46. data/lib/active_support/message_verifier.rb +4 -4
  47. data/lib/active_support/multibyte.rb +1 -19
  48. data/lib/active_support/multibyte/chars.rb +143 -427
  49. data/lib/active_support/multibyte/unicode.rb +393 -0
  50. data/lib/active_support/notifications/fanout.rb +15 -5
  51. data/lib/active_support/notifications/instrumenter.rb +10 -4
  52. data/lib/active_support/railtie.rb +36 -0
  53. data/lib/active_support/rescuable.rb +1 -0
  54. data/lib/active_support/ruby/shim.rb +1 -0
  55. data/lib/active_support/testing/declarative.rb +1 -1
  56. data/lib/active_support/testing/isolation.rb +2 -1
  57. data/lib/active_support/testing/setup_and_teardown.rb +3 -0
  58. data/lib/active_support/values/time_zone.rb +20 -30
  59. data/lib/active_support/values/unicode_tables.dat +0 -0
  60. data/lib/active_support/version.rb +1 -1
  61. data/lib/active_support/xml_mini.rb +126 -1
  62. metadata +8 -61
  63. data/lib/active_support/multibyte/unicode_database.rb +0 -71
@@ -0,0 +1,393 @@
1
+ module ActiveSupport
2
+ module Multibyte
3
+ module Unicode
4
+
5
+ extend self
6
+
7
+ # A list of all available normalization forms. See http://www.unicode.org/reports/tr15/tr15-29.html for more
8
+ # information about normalization.
9
+ NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
10
+
11
+ # The Unicode version that is supported by the implementation
12
+ UNICODE_VERSION = '5.1.0'
13
+
14
+ # The default normalization used for operations that require normalization. It can be set to any of the
15
+ # normalizations in NORMALIZATION_FORMS.
16
+ #
17
+ # Example:
18
+ # ActiveSupport::Multibyte::Unicode.default_normalization_form = :c
19
+ attr_accessor :default_normalization_form
20
+ @default_normalization_form = :kc
21
+
22
+ # Hangul character boundaries and properties
23
+ HANGUL_SBASE = 0xAC00
24
+ HANGUL_LBASE = 0x1100
25
+ HANGUL_VBASE = 0x1161
26
+ HANGUL_TBASE = 0x11A7
27
+ HANGUL_LCOUNT = 19
28
+ HANGUL_VCOUNT = 21
29
+ HANGUL_TCOUNT = 28
30
+ HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT
31
+ HANGUL_SCOUNT = 11172
32
+ HANGUL_SLAST = HANGUL_SBASE + HANGUL_SCOUNT
33
+ HANGUL_JAMO_FIRST = 0x1100
34
+ HANGUL_JAMO_LAST = 0x11FF
35
+
36
+ # All the unicode whitespace
37
+ WHITESPACE = [
38
+ (0x0009..0x000D).to_a, # White_Space # Cc [5] <control-0009>..<control-000D>
39
+ 0x0020, # White_Space # Zs SPACE
40
+ 0x0085, # White_Space # Cc <control-0085>
41
+ 0x00A0, # White_Space # Zs NO-BREAK SPACE
42
+ 0x1680, # White_Space # Zs OGHAM SPACE MARK
43
+ 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
44
+ (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
45
+ 0x2028, # White_Space # Zl LINE SEPARATOR
46
+ 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
47
+ 0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
48
+ 0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
49
+ 0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
50
+ ].flatten.freeze
51
+
52
+ # BOM (byte order mark) can also be seen as whitespace, it's a non-rendering character used to distinguish
53
+ # between little and big endian. This is not an issue in utf-8, so it must be ignored.
54
+ LEADERS_AND_TRAILERS = WHITESPACE + [65279] # ZERO-WIDTH NO-BREAK SPACE aka BOM
55
+
56
+ # Returns a regular expression pattern that matches the passed Unicode codepoints
57
+ def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
58
+ array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|')
59
+ end
60
+ TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
61
+ LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
62
+
63
+ # Unpack the string at codepoints boundaries. Raises an EncodingError when the encoding of the string isn't
64
+ # valid UTF-8.
65
+ #
66
+ # Example:
67
+ # Unicode.u_unpack('Café') #=> [67, 97, 102, 233]
68
+ def u_unpack(string)
69
+ begin
70
+ string.unpack 'U*'
71
+ rescue ArgumentError
72
+ raise EncodingError, 'malformed UTF-8 character'
73
+ end
74
+ end
75
+
76
+ # Detect whether the codepoint is in a certain character class. Returns +true+ when it's in the specified
77
+ # character class and +false+ otherwise. Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
78
+ # <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
79
+ #
80
+ # Primarily used by the grapheme cluster support.
81
+ def in_char_class?(codepoint, classes)
82
+ classes.detect { |c| database.boundary[c] === codepoint } ? true : false
83
+ end
84
+
85
+ # Unpack the string at grapheme boundaries. Returns a list of character lists.
86
+ #
87
+ # Example:
88
+ # Unicode.g_unpack('क्षि') #=> [[2325, 2381], [2359], [2367]]
89
+ # Unicode.g_unpack('Café') #=> [[67], [97], [102], [233]]
90
+ def g_unpack(string)
91
+ codepoints = u_unpack(string)
92
+ unpacked = []
93
+ pos = 0
94
+ marker = 0
95
+ eoc = codepoints.length
96
+ while(pos < eoc)
97
+ pos += 1
98
+ previous = codepoints[pos-1]
99
+ current = codepoints[pos]
100
+ if (
101
+ # CR X LF
102
+ one = ( previous == database.boundary[:cr] and current == database.boundary[:lf] ) or
103
+ # L X (L|V|LV|LVT)
104
+ two = ( database.boundary[:l] === previous and in_char_class?(current, [:l,:v,:lv,:lvt]) ) or
105
+ # (LV|V) X (V|T)
106
+ three = ( in_char_class?(previous, [:lv,:v]) and in_char_class?(current, [:v,:t]) ) or
107
+ # (LVT|T) X (T)
108
+ four = ( in_char_class?(previous, [:lvt,:t]) and database.boundary[:t] === current ) or
109
+ # X Extend
110
+ five = (database.boundary[:extend] === current)
111
+ )
112
+ else
113
+ unpacked << codepoints[marker..pos-1]
114
+ marker = pos
115
+ end
116
+ end
117
+ unpacked
118
+ end
119
+
120
+ # Reverse operation of g_unpack.
121
+ #
122
+ # Example:
123
+ # Unicode.g_pack(Unicode.g_unpack('क्षि')) #=> 'क्षि'
124
+ def g_pack(unpacked)
125
+ (unpacked.flatten).pack('U*')
126
+ end
127
+
128
+ # Re-order codepoints so the string becomes canonical.
129
+ def reorder_characters(codepoints)
130
+ length = codepoints.length- 1
131
+ pos = 0
132
+ while pos < length do
133
+ cp1, cp2 = database.codepoints[codepoints[pos]], database.codepoints[codepoints[pos+1]]
134
+ if (cp1.combining_class > cp2.combining_class) && (cp2.combining_class > 0)
135
+ codepoints[pos..pos+1] = cp2.code, cp1.code
136
+ pos += (pos > 0 ? -1 : 1)
137
+ else
138
+ pos += 1
139
+ end
140
+ end
141
+ codepoints
142
+ end
143
+
144
+ # Decompose composed characters to the decomposed form.
145
+ def decompose_codepoints(type, codepoints)
146
+ codepoints.inject([]) do |decomposed, cp|
147
+ # if it's a hangul syllable starter character
148
+ if HANGUL_SBASE <= cp and cp < HANGUL_SLAST
149
+ sindex = cp - HANGUL_SBASE
150
+ ncp = [] # new codepoints
151
+ ncp << HANGUL_LBASE + sindex / HANGUL_NCOUNT
152
+ ncp << HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
153
+ tindex = sindex % HANGUL_TCOUNT
154
+ ncp << (HANGUL_TBASE + tindex) unless tindex == 0
155
+ decomposed.concat ncp
156
+ # if the codepoint is decomposable in with the current decomposition type
157
+ elsif (ncp = database.codepoints[cp].decomp_mapping) and (!database.codepoints[cp].decomp_type || type == :compatability)
158
+ decomposed.concat decompose_codepoints(type, ncp.dup)
159
+ else
160
+ decomposed << cp
161
+ end
162
+ end
163
+ end
164
+
165
+ # Compose decomposed characters to the composed form.
166
+ def compose_codepoints(codepoints)
167
+ pos = 0
168
+ eoa = codepoints.length - 1
169
+ starter_pos = 0
170
+ starter_char = codepoints[0]
171
+ previous_combining_class = -1
172
+ while pos < eoa
173
+ pos += 1
174
+ lindex = starter_char - HANGUL_LBASE
175
+ # -- Hangul
176
+ if 0 <= lindex and lindex < HANGUL_LCOUNT
177
+ vindex = codepoints[starter_pos+1] - HANGUL_VBASE rescue vindex = -1
178
+ if 0 <= vindex and vindex < HANGUL_VCOUNT
179
+ tindex = codepoints[starter_pos+2] - HANGUL_TBASE rescue tindex = -1
180
+ if 0 <= tindex and tindex < HANGUL_TCOUNT
181
+ j = starter_pos + 2
182
+ eoa -= 2
183
+ else
184
+ tindex = 0
185
+ j = starter_pos + 1
186
+ eoa -= 1
187
+ end
188
+ codepoints[starter_pos..j] = (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT + tindex + HANGUL_SBASE
189
+ end
190
+ starter_pos += 1
191
+ starter_char = codepoints[starter_pos]
192
+ # -- Other characters
193
+ else
194
+ current_char = codepoints[pos]
195
+ current = database.codepoints[current_char]
196
+ if current.combining_class > previous_combining_class
197
+ if ref = database.composition_map[starter_char]
198
+ composition = ref[current_char]
199
+ else
200
+ composition = nil
201
+ end
202
+ unless composition.nil?
203
+ codepoints[starter_pos] = composition
204
+ starter_char = composition
205
+ codepoints.delete_at pos
206
+ eoa -= 1
207
+ pos -= 1
208
+ previous_combining_class = -1
209
+ else
210
+ previous_combining_class = current.combining_class
211
+ end
212
+ else
213
+ previous_combining_class = current.combining_class
214
+ end
215
+ if current.combining_class == 0
216
+ starter_pos = pos
217
+ starter_char = codepoints[pos]
218
+ end
219
+ end
220
+ end
221
+ codepoints
222
+ end
223
+
224
+ # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
225
+ #
226
+ # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1.
227
+ def tidy_bytes(string, force = false)
228
+ if force
229
+ return string.unpack("C*").map do |b|
230
+ tidy_byte(b)
231
+ end.flatten.compact.pack("C*").unpack("U*").pack("U*")
232
+ end
233
+
234
+ bytes = string.unpack("C*")
235
+ conts_expected = 0
236
+ last_lead = 0
237
+
238
+ bytes.each_index do |i|
239
+
240
+ byte = bytes[i]
241
+ is_ascii = byte < 128
242
+ is_cont = byte > 127 && byte < 192
243
+ is_lead = byte > 191 && byte < 245
244
+ is_unused = byte > 240
245
+ is_restricted = byte > 244
246
+
247
+ # Impossible or highly unlikely byte? Clean it.
248
+ if is_unused || is_restricted
249
+ bytes[i] = tidy_byte(byte)
250
+ elsif is_cont
251
+ # Not expecting contination byte? Clean up. Otherwise, now expect one less.
252
+ conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
253
+ else
254
+ if conts_expected > 0
255
+ # Expected continuation, but got ASCII or leading? Clean backwards up to
256
+ # the leading byte.
257
+ (1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
258
+ conts_expected = 0
259
+ end
260
+ if is_lead
261
+ # Final byte is leading? Clean it.
262
+ if i == bytes.length - 1
263
+ bytes[i] = tidy_byte(bytes.last)
264
+ else
265
+ # Valid leading byte? Expect continuations determined by position of
266
+ # first zero bit, with max of 3.
267
+ conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
268
+ last_lead = i
269
+ end
270
+ end
271
+ end
272
+ end
273
+ bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
274
+ end
275
+
276
+ # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
277
+ # passing strings to databases and validations.
278
+ #
279
+ # * <tt>string</tt> - The string to perform normalization on.
280
+ # * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
281
+ # <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
282
+ # ActiveSupport::Multibyte.default_normalization_form
283
+ def normalize(string, form=nil)
284
+ form ||= @default_normalization_form
285
+ # See http://www.unicode.org/reports/tr15, Table 1
286
+ codepoints = u_unpack(string)
287
+ case form
288
+ when :d
289
+ reorder_characters(decompose_codepoints(:canonical, codepoints))
290
+ when :c
291
+ compose_codepoints(reorder_characters(decompose_codepoints(:canonical, codepoints)))
292
+ when :kd
293
+ reorder_characters(decompose_codepoints(:compatability, codepoints))
294
+ when :kc
295
+ compose_codepoints(reorder_characters(decompose_codepoints(:compatability, codepoints)))
296
+ else
297
+ raise ArgumentError, "#{form} is not a valid normalization variant", caller
298
+ end.pack('U*')
299
+ end
300
+
301
+ def apply_mapping(string, mapping) #:nodoc:
302
+ u_unpack(string).map do |codepoint|
303
+ cp = database.codepoints[codepoint]
304
+ if cp and (ncp = cp.send(mapping)) and ncp > 0
305
+ ncp
306
+ else
307
+ codepoint
308
+ end
309
+ end.pack('U*')
310
+ end
311
+
312
+ # Holds data about a codepoint in the Unicode database
313
+ class Codepoint
314
+ attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
315
+ end
316
+
317
+ # Holds static data from the Unicode database
318
+ class UnicodeDatabase
319
+ ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
320
+
321
+ attr_writer(*ATTRIBUTES)
322
+
323
+ def initialize
324
+ @codepoints = Hash.new(Codepoint.new)
325
+ @composition_exclusion = []
326
+ @composition_map = {}
327
+ @boundary = {}
328
+ @cp1252 = {}
329
+ end
330
+
331
+ # Lazy load the Unicode database so it's only loaded when it's actually used
332
+ ATTRIBUTES.each do |attr_name|
333
+ class_eval(<<-EOS, __FILE__, __LINE__ + 1)
334
+ def #{attr_name} # def codepoints
335
+ load # load
336
+ @#{attr_name} # @codepoints
337
+ end # end
338
+ EOS
339
+ end
340
+
341
+ # Loads the Unicode database and returns all the internal objects of UnicodeDatabase.
342
+ def load
343
+ begin
344
+ @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read }
345
+ rescue Exception => e
346
+ raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
347
+ end
348
+
349
+ # Redefine the === method so we can write shorter rules for grapheme cluster breaks
350
+ @boundary.each do |k,_|
351
+ @boundary[k].instance_eval do
352
+ def ===(other)
353
+ detect { |i| i === other } ? true : false
354
+ end
355
+ end if @boundary[k].kind_of?(Array)
356
+ end
357
+
358
+ # define attr_reader methods for the instance variables
359
+ class << self
360
+ attr_reader(*ATTRIBUTES)
361
+ end
362
+ end
363
+
364
+ # Returns the directory in which the data files are stored
365
+ def self.dirname
366
+ File.dirname(__FILE__) + '/../values/'
367
+ end
368
+
369
+ # Returns the filename for the data file for this version
370
+ def self.filename
371
+ File.expand_path File.join(dirname, "unicode_tables.dat")
372
+ end
373
+ end
374
+
375
+ private
376
+
377
+ def tidy_byte(byte)
378
+ if byte < 160
379
+ [database.cp1252[byte] || byte].pack("U").unpack("C*")
380
+ elsif byte < 192
381
+ [194, byte]
382
+ else
383
+ [195, byte - 64]
384
+ end
385
+ end
386
+
387
+ def database
388
+ @database ||= UnicodeDatabase.new
389
+ end
390
+
391
+ end
392
+ end
393
+ end
@@ -19,8 +19,8 @@ module ActiveSupport
19
19
  end
20
20
 
21
21
  def unsubscribe(subscriber)
22
- @subscribers.delete(subscriber)
23
22
  @listeners_for.clear
23
+ @subscribers.reject! {|s| s.matches?(subscriber)}
24
24
  end
25
25
 
26
26
  def publish(name, *args)
@@ -60,7 +60,7 @@ module ActiveSupport
60
60
  end
61
61
 
62
62
  def publish(*args)
63
- return unless matches?(args.first)
63
+ return unless subscribed_to?(args.first)
64
64
  push(*args)
65
65
  true
66
66
  end
@@ -69,10 +69,20 @@ module ActiveSupport
69
69
  true
70
70
  end
71
71
 
72
- private
73
- def matches?(name)
74
- !@pattern || @pattern =~ name.to_s
72
+ def subscribed_to?(name)
73
+ !@pattern || @pattern =~ name.to_s
74
+ end
75
+
76
+ def matches?(subscriber_or_name)
77
+ case subscriber_or_name
78
+ when String
79
+ @pattern && @pattern =~ subscriber_or_name
80
+ when self
81
+ true
75
82
  end
83
+ end
84
+
85
+ private
76
86
 
77
87
  def push(*args)
78
88
  @block.call(*args)
@@ -12,12 +12,18 @@ module ActiveSupport
12
12
  end
13
13
 
14
14
  # Instrument the given block by measuring the time taken to execute it
15
- # and publish it.
15
+ # and publish it. Notice that events get sent even if an error occurs
16
+ # in the passed-in block
16
17
  def instrument(name, payload={})
17
18
  time = Time.now
18
- result = yield(payload) if block_given?
19
- @notifier.publish(name, time, Time.now, @id, payload)
20
- result
19
+ begin
20
+ yield(payload) if block_given?
21
+ rescue Exception => e
22
+ payload[:exception] = [e.class.name, e.message]
23
+ raise e
24
+ ensure
25
+ @notifier.publish(name, time, Time.now, @id, payload)
26
+ end
21
27
  end
22
28
 
23
29
  private