activesupport 5.2.4.4 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of activesupport might be problematic. Click here for more details.

Files changed (138) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +327 -408
  3. data/MIT-LICENSE +1 -1
  4. data/README.rdoc +3 -2
  5. data/lib/active_support.rb +2 -1
  6. data/lib/active_support/actionable_error.rb +48 -0
  7. data/lib/active_support/backtrace_cleaner.rb +28 -1
  8. data/lib/active_support/cache.rb +45 -23
  9. data/lib/active_support/cache/file_store.rb +22 -22
  10. data/lib/active_support/cache/mem_cache_store.rb +17 -2
  11. data/lib/active_support/cache/memory_store.rb +7 -2
  12. data/lib/active_support/cache/null_store.rb +5 -0
  13. data/lib/active_support/cache/redis_cache_store.rb +47 -25
  14. data/lib/active_support/callbacks.rb +16 -5
  15. data/lib/active_support/concern.rb +24 -1
  16. data/lib/active_support/configurable.rb +7 -11
  17. data/lib/active_support/core_ext/array.rb +1 -1
  18. data/lib/active_support/core_ext/array/access.rb +18 -6
  19. data/lib/active_support/core_ext/array/extract.rb +21 -0
  20. data/lib/active_support/core_ext/array/prepend_and_append.rb +2 -6
  21. data/lib/active_support/core_ext/class/attribute.rb +11 -16
  22. data/lib/active_support/core_ext/class/subclasses.rb +1 -1
  23. data/lib/active_support/core_ext/date/calculations.rb +6 -5
  24. data/lib/active_support/core_ext/date_and_time/calculations.rb +24 -47
  25. data/lib/active_support/core_ext/date_time/calculations.rb +1 -1
  26. data/lib/active_support/core_ext/enumerable.rb +97 -73
  27. data/lib/active_support/core_ext/hash.rb +1 -2
  28. data/lib/active_support/core_ext/hash/compact.rb +2 -26
  29. data/lib/active_support/core_ext/hash/deep_transform_values.rb +46 -0
  30. data/lib/active_support/core_ext/hash/except.rb +1 -1
  31. data/lib/active_support/core_ext/hash/keys.rb +0 -29
  32. data/lib/active_support/core_ext/hash/slice.rb +3 -25
  33. data/lib/active_support/core_ext/hash/transform_values.rb +2 -29
  34. data/lib/active_support/core_ext/integer/multiple.rb +1 -1
  35. data/lib/active_support/core_ext/kernel.rb +0 -1
  36. data/lib/active_support/core_ext/load_error.rb +1 -1
  37. data/lib/active_support/core_ext/module.rb +0 -1
  38. data/lib/active_support/core_ext/module/attribute_accessors.rb +7 -10
  39. data/lib/active_support/core_ext/module/attribute_accessors_per_thread.rb +13 -19
  40. data/lib/active_support/core_ext/module/delegation.rb +33 -7
  41. data/lib/active_support/core_ext/module/introspection.rb +37 -13
  42. data/lib/active_support/core_ext/module/reachable.rb +1 -6
  43. data/lib/active_support/core_ext/module/redefine_method.rb +8 -17
  44. data/lib/active_support/core_ext/numeric.rb +0 -1
  45. data/lib/active_support/core_ext/numeric/conversions.rb +124 -128
  46. data/lib/active_support/core_ext/numeric/inquiry.rb +2 -25
  47. data/lib/active_support/core_ext/object/blank.rb +1 -2
  48. data/lib/active_support/core_ext/object/duplicable.rb +7 -114
  49. data/lib/active_support/core_ext/object/json.rb +1 -0
  50. data/lib/active_support/core_ext/object/try.rb +15 -7
  51. data/lib/active_support/core_ext/object/with_options.rb +1 -1
  52. data/lib/active_support/core_ext/range/compare_range.rb +22 -13
  53. data/lib/active_support/core_ext/range/conversions.rb +31 -29
  54. data/lib/active_support/core_ext/range/include_range.rb +6 -0
  55. data/lib/active_support/core_ext/regexp.rb +0 -4
  56. data/lib/active_support/core_ext/securerandom.rb +23 -3
  57. data/lib/active_support/core_ext/string/access.rb +8 -0
  58. data/lib/active_support/core_ext/string/filters.rb +42 -1
  59. data/lib/active_support/core_ext/string/inflections.rb +7 -2
  60. data/lib/active_support/core_ext/string/multibyte.rb +4 -3
  61. data/lib/active_support/core_ext/string/output_safety.rb +61 -5
  62. data/lib/active_support/core_ext/string/strip.rb +3 -1
  63. data/lib/active_support/core_ext/time/calculations.rb +31 -2
  64. data/lib/active_support/core_ext/uri.rb +1 -0
  65. data/lib/active_support/current_attributes.rb +8 -0
  66. data/lib/active_support/dependencies.rb +69 -16
  67. data/lib/active_support/dependencies/zeitwerk_integration.rb +110 -0
  68. data/lib/active_support/deprecation.rb +1 -1
  69. data/lib/active_support/deprecation/behaviors.rb +1 -1
  70. data/lib/active_support/deprecation/method_wrappers.rb +8 -20
  71. data/lib/active_support/deprecation/proxy_wrappers.rb +24 -5
  72. data/lib/active_support/descendants_tracker.rb +56 -9
  73. data/lib/active_support/duration.rb +4 -3
  74. data/lib/active_support/duration/iso8601_parser.rb +2 -3
  75. data/lib/active_support/duration/iso8601_serializer.rb +3 -4
  76. data/lib/active_support/encrypted_configuration.rb +0 -4
  77. data/lib/active_support/encrypted_file.rb +2 -1
  78. data/lib/active_support/evented_file_update_checker.rb +39 -9
  79. data/lib/active_support/execution_wrapper.rb +1 -0
  80. data/lib/active_support/gem_version.rb +4 -4
  81. data/lib/active_support/hash_with_indifferent_access.rb +22 -18
  82. data/lib/active_support/i18n.rb +1 -0
  83. data/lib/active_support/i18n_railtie.rb +9 -1
  84. data/lib/active_support/inflector/inflections.rb +1 -4
  85. data/lib/active_support/inflector/methods.rb +15 -27
  86. data/lib/active_support/inflector/transliterate.rb +47 -18
  87. data/lib/active_support/json/decoding.rb +23 -23
  88. data/lib/active_support/json/encoding.rb +6 -2
  89. data/lib/active_support/key_generator.rb +0 -32
  90. data/lib/active_support/lazy_load_hooks.rb +5 -1
  91. data/lib/active_support/locale/en.rb +31 -0
  92. data/lib/active_support/log_subscriber.rb +31 -8
  93. data/lib/active_support/logger.rb +0 -15
  94. data/lib/active_support/logger_silence.rb +28 -12
  95. data/lib/active_support/logger_thread_safe_level.rb +26 -4
  96. data/lib/active_support/message_encryptor.rb +3 -5
  97. data/lib/active_support/message_verifier.rb +3 -3
  98. data/lib/active_support/multibyte/chars.rb +29 -48
  99. data/lib/active_support/multibyte/unicode.rb +44 -281
  100. data/lib/active_support/notifications.rb +41 -4
  101. data/lib/active_support/notifications/fanout.rb +98 -13
  102. data/lib/active_support/notifications/instrumenter.rb +79 -8
  103. data/lib/active_support/number_helper.rb +7 -0
  104. data/lib/active_support/number_helper/number_to_currency_converter.rb +2 -2
  105. data/lib/active_support/number_helper/number_to_delimited_converter.rb +3 -1
  106. data/lib/active_support/number_helper/number_to_human_converter.rb +3 -1
  107. data/lib/active_support/number_helper/number_to_human_size_converter.rb +3 -1
  108. data/lib/active_support/number_helper/number_to_percentage_converter.rb +3 -1
  109. data/lib/active_support/number_helper/number_to_phone_converter.rb +2 -0
  110. data/lib/active_support/number_helper/number_to_rounded_converter.rb +5 -3
  111. data/lib/active_support/ordered_options.rb +1 -1
  112. data/lib/active_support/parameter_filter.rb +129 -0
  113. data/lib/active_support/rails.rb +0 -6
  114. data/lib/active_support/reloader.rb +4 -5
  115. data/lib/active_support/security_utils.rb +1 -1
  116. data/lib/active_support/subscriber.rb +65 -26
  117. data/lib/active_support/tagged_logging.rb +13 -4
  118. data/lib/active_support/test_case.rb +91 -0
  119. data/lib/active_support/testing/assertions.rb +15 -1
  120. data/lib/active_support/testing/deprecation.rb +0 -1
  121. data/lib/active_support/testing/file_fixtures.rb +2 -0
  122. data/lib/active_support/testing/isolation.rb +2 -2
  123. data/lib/active_support/testing/method_call_assertions.rb +28 -1
  124. data/lib/active_support/testing/parallelization.rb +128 -0
  125. data/lib/active_support/testing/stream.rb +1 -1
  126. data/lib/active_support/testing/time_helpers.rb +7 -7
  127. data/lib/active_support/time_with_zone.rb +15 -5
  128. data/lib/active_support/values/time_zone.rb +12 -7
  129. data/lib/active_support/xml_mini.rb +2 -9
  130. data/lib/active_support/xml_mini/jdom.rb +2 -2
  131. data/lib/active_support/xml_mini/libxml.rb +2 -2
  132. data/lib/active_support/xml_mini/libxmlsax.rb +4 -4
  133. data/lib/active_support/xml_mini/nokogiri.rb +2 -2
  134. data/lib/active_support/xml_mini/nokogirisax.rb +3 -3
  135. data/lib/active_support/xml_mini/rexml.rb +2 -2
  136. metadata +34 -9
  137. data/lib/active_support/core_ext/kernel/agnostics.rb +0 -13
  138. data/lib/active_support/values/unicode_tables.dat +0 -0
@@ -6,7 +6,6 @@ require "logger"
6
6
 
7
7
  module ActiveSupport
8
8
  class Logger < ::Logger
9
- include ActiveSupport::LoggerThreadSafeLevel
10
9
  include LoggerSilence
11
10
 
12
11
  # Returns true if the logger destination matches one of the sources
@@ -81,20 +80,6 @@ module ActiveSupport
81
80
  def initialize(*args)
82
81
  super
83
82
  @formatter = SimpleFormatter.new
84
- after_initialize if respond_to? :after_initialize
85
- end
86
-
87
- def add(severity, message = nil, progname = nil, &block)
88
- return true if @logdev.nil? || (severity || UNKNOWN) < level
89
- super
90
- end
91
-
92
- Logger::Severity.constants.each do |severity|
93
- class_eval(<<-EOT, __FILE__, __LINE__ + 1)
94
- def #{severity.downcase}? # def debug?
95
- Logger::#{severity} >= level # DEBUG >= level
96
- end # end
97
- EOT
98
83
  end
99
84
 
100
85
  # Simple formatter which only displays the message.
@@ -2,28 +2,44 @@
2
2
 
3
3
  require "active_support/concern"
4
4
  require "active_support/core_ext/module/attribute_accessors"
5
- require "concurrent"
5
+ require "active_support/logger_thread_safe_level"
6
6
 
7
7
  module LoggerSilence
8
8
  extend ActiveSupport::Concern
9
9
 
10
10
  included do
11
- cattr_accessor :silencer, default: true
11
+ ActiveSupport::Deprecation.warn(
12
+ "Including LoggerSilence is deprecated and will be removed in Rails 6.1. " \
13
+ "Please use `ActiveSupport::LoggerSilence` instead"
14
+ )
15
+
16
+ include ActiveSupport::LoggerSilence
12
17
  end
18
+ end
19
+
20
+ module ActiveSupport
21
+ module LoggerSilence
22
+ extend ActiveSupport::Concern
23
+
24
+ included do
25
+ cattr_accessor :silencer, default: true
26
+ include ActiveSupport::LoggerThreadSafeLevel
27
+ end
13
28
 
14
- # Silences the logger for the duration of the block.
15
- def silence(temporary_level = Logger::ERROR)
16
- if silencer
17
- begin
18
- old_local_level = local_level
19
- self.local_level = temporary_level
29
+ # Silences the logger for the duration of the block.
30
+ def silence(temporary_level = Logger::ERROR)
31
+ if silencer
32
+ begin
33
+ old_local_level = local_level
34
+ self.local_level = temporary_level
20
35
 
36
+ yield self
37
+ ensure
38
+ self.local_level = old_local_level
39
+ end
40
+ else
21
41
  yield self
22
- ensure
23
- self.local_level = old_local_level
24
42
  end
25
- else
26
- yield self
27
43
  end
28
44
  end
29
45
  end
@@ -1,14 +1,31 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "active_support/concern"
4
+ require "active_support/core_ext/module/attribute_accessors"
5
+ require "concurrent"
4
6
  require "fiber"
5
7
 
6
8
  module ActiveSupport
7
9
  module LoggerThreadSafeLevel # :nodoc:
8
10
  extend ActiveSupport::Concern
9
11
 
12
+ included do
13
+ cattr_accessor :local_levels, default: Concurrent::Map.new(initial_capacity: 2), instance_accessor: false
14
+ end
15
+
16
+ Logger::Severity.constants.each do |severity|
17
+ class_eval(<<-EOT, __FILE__, __LINE__ + 1)
18
+ def #{severity.downcase}? # def debug?
19
+ Logger::#{severity} >= level # DEBUG >= level
20
+ end # end
21
+ EOT
22
+ end
23
+
10
24
  def after_initialize
11
- @local_levels = Concurrent::Map.new(initial_capacity: 2)
25
+ ActiveSupport::Deprecation.warn(
26
+ "Logger don't need to call #after_initialize directly anymore. It will be deprecated without replacement in " \
27
+ "Rails 6.1."
28
+ )
12
29
  end
13
30
 
14
31
  def local_log_id
@@ -16,19 +33,24 @@ module ActiveSupport
16
33
  end
17
34
 
18
35
  def local_level
19
- @local_levels[local_log_id]
36
+ self.class.local_levels[local_log_id]
20
37
  end
21
38
 
22
39
  def local_level=(level)
23
40
  if level
24
- @local_levels[local_log_id] = level
41
+ self.class.local_levels[local_log_id] = level
25
42
  else
26
- @local_levels.delete(local_log_id)
43
+ self.class.local_levels.delete(local_log_id)
27
44
  end
28
45
  end
29
46
 
30
47
  def level
31
48
  local_level || super
32
49
  end
50
+
51
+ def add(severity, message = nil, progname = nil, &block) # :nodoc:
52
+ return true if @logdev.nil? || (severity || UNKNOWN) < level
53
+ super
54
+ end
33
55
  end
34
56
  end
@@ -53,7 +53,7 @@ module ActiveSupport
53
53
  # crypt.encrypt_and_sign(parcel, expires_in: 1.month)
54
54
  # crypt.encrypt_and_sign(doowad, expires_at: Time.now.end_of_year)
55
55
  #
56
- # Then the messages can be verified and returned upto the expire time.
56
+ # Then the messages can be verified and returned up to the expire time.
57
57
  # Thereafter, verifying returns +nil+.
58
58
  #
59
59
  # === Rotating keys
@@ -182,7 +182,7 @@ module ActiveSupport
182
182
 
183
183
  def _decrypt(encrypted_message, purpose)
184
184
  cipher = new_cipher
185
- encrypted_data, iv, auth_tag = encrypted_message.split("--".freeze).map { |v| ::Base64.strict_decode64(v) }
185
+ encrypted_data, iv, auth_tag = encrypted_message.split("--").map { |v| ::Base64.strict_decode64(v) }
186
186
 
187
187
  # Currently the OpenSSL bindings do not raise an error if auth_tag is
188
188
  # truncated, which would allow an attacker to easily forge it. See
@@ -210,9 +210,7 @@ module ActiveSupport
210
210
  OpenSSL::Cipher.new(@cipher)
211
211
  end
212
212
 
213
- def verifier
214
- @verifier
215
- end
213
+ attr_reader :verifier
216
214
 
217
215
  def aead_mode?
218
216
  @aead_mode ||= new_cipher.authenticated?
@@ -71,7 +71,7 @@ module ActiveSupport
71
71
  # @verifier.generate(parcel, expires_in: 1.month)
72
72
  # @verifier.generate(doowad, expires_at: Time.now.end_of_year)
73
73
  #
74
- # Then the messages can be verified and returned upto the expire time.
74
+ # Then the messages can be verified and returned up to the expire time.
75
75
  # Thereafter, the +verified+ method returns +nil+ while +verify+ raises
76
76
  # <tt>ActiveSupport::MessageVerifier::InvalidSignature</tt>.
77
77
  #
@@ -122,7 +122,7 @@ module ActiveSupport
122
122
  def valid_message?(signed_message)
123
123
  return if signed_message.nil? || !signed_message.valid_encoding? || signed_message.blank?
124
124
 
125
- data, digest = signed_message.split("--".freeze)
125
+ data, digest = signed_message.split("--")
126
126
  data.present? && digest.present? && ActiveSupport::SecurityUtils.secure_compare(digest, generate_digest(data))
127
127
  end
128
128
 
@@ -150,7 +150,7 @@ module ActiveSupport
150
150
  def verified(signed_message, purpose: nil, **)
151
151
  if valid_message?(signed_message)
152
152
  begin
153
- data = signed_message.split("--".freeze)[0]
153
+ data = signed_message.split("--")[0]
154
154
  message = Messages::Metadata.verify(decode(data), purpose)
155
155
  @serializer.load(message) if message
156
156
  rescue ArgumentError => argument_error
@@ -4,7 +4,6 @@ require "active_support/json"
4
4
  require "active_support/core_ext/string/access"
5
5
  require "active_support/core_ext/string/behavior"
6
6
  require "active_support/core_ext/module/delegation"
7
- require "active_support/core_ext/regexp"
8
7
 
9
8
  module ActiveSupport #:nodoc:
10
9
  module Multibyte #:nodoc:
@@ -18,7 +17,7 @@ module ActiveSupport #:nodoc:
18
17
  # through the +mb_chars+ method. Methods which would normally return a
19
18
  # String object now return a Chars object so methods can be chained.
20
19
  #
21
- # 'The Perfect String '.mb_chars.downcase.strip.normalize
20
+ # 'The Perfect String '.mb_chars.downcase.strip
22
21
  # # => #<ActiveSupport::Multibyte::Chars:0x007fdc434ccc10 @wrapped_string="the perfect string">
23
22
  #
24
23
  # Chars objects are perfectly interchangeable with String objects as long as
@@ -77,6 +76,11 @@ module ActiveSupport #:nodoc:
77
76
  # Returns +true+ when the proxy class can handle the string. Returns
78
77
  # +false+ otherwise.
79
78
  def self.consumes?(string)
79
+ ActiveSupport::Deprecation.warn(<<-MSG.squish)
80
+ ActiveSupport::Multibyte::Chars.consumes? is deprecated and will be
81
+ removed from Rails 6.1. Use string.is_utf8? instead.
82
+ MSG
83
+
80
84
  string.encoding == Encoding::UTF_8
81
85
  end
82
86
 
@@ -109,7 +113,7 @@ module ActiveSupport #:nodoc:
109
113
  #
110
114
  # 'Café'.mb_chars.reverse.to_s # => 'éfaC'
111
115
  def reverse
112
- chars(Unicode.unpack_graphemes(@wrapped_string).reverse.flatten.pack("U*"))
116
+ chars(@wrapped_string.scan(/\X/).reverse.join)
113
117
  end
114
118
 
115
119
  # Limits the byte size of the string to a number of bytes without breaking
@@ -118,35 +122,7 @@ module ActiveSupport #:nodoc:
118
122
  #
119
123
  # 'こんにちは'.mb_chars.limit(7).to_s # => "こん"
120
124
  def limit(limit)
121
- slice(0...translate_offset(limit))
122
- end
123
-
124
- # Converts characters in the string to uppercase.
125
- #
126
- # 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s # => "LAURENT, OÙ SONT LES TESTS ?"
127
- def upcase
128
- chars Unicode.upcase(@wrapped_string)
129
- end
130
-
131
- # Converts characters in the string to lowercase.
132
- #
133
- # 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s # => "věda a výzkum"
134
- def downcase
135
- chars Unicode.downcase(@wrapped_string)
136
- end
137
-
138
- # Converts characters in the string to the opposite case.
139
- #
140
- # 'El Cañón'.mb_chars.swapcase.to_s # => "eL cAÑÓN"
141
- def swapcase
142
- chars Unicode.swapcase(@wrapped_string)
143
- end
144
-
145
- # Converts the first character to uppercase and the remainder to lowercase.
146
- #
147
- # 'über'.mb_chars.capitalize.to_s # => "Über"
148
- def capitalize
149
- (slice(0) || chars("")).upcase + (slice(1..-1) || chars("")).downcase
125
+ truncate_bytes(limit, omission: nil)
150
126
  end
151
127
 
152
128
  # Capitalizes the first letter of every word, when possible.
@@ -154,7 +130,7 @@ module ActiveSupport #:nodoc:
154
130
  # "ÉL QUE SE ENTERÓ".mb_chars.titleize.to_s # => "Él Que Se Enteró"
155
131
  # "日本語".mb_chars.titleize.to_s # => "日本語"
156
132
  def titleize
157
- chars(downcase.to_s.gsub(/\b('?\S)/u) { Unicode.upcase($1) })
133
+ chars(downcase.to_s.gsub(/\b('?\S)/u) { $1.upcase })
158
134
  end
159
135
  alias_method :titlecase, :titleize
160
136
 
@@ -166,7 +142,24 @@ module ActiveSupport #:nodoc:
166
142
  # <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
167
143
  # ActiveSupport::Multibyte::Unicode.default_normalization_form
168
144
  def normalize(form = nil)
169
- chars(Unicode.normalize(@wrapped_string, form))
145
+ form ||= Unicode.default_normalization_form
146
+
147
+ # See https://www.unicode.org/reports/tr15, Table 1
148
+ if alias_form = Unicode::NORMALIZATION_FORM_ALIASES[form]
149
+ ActiveSupport::Deprecation.warn(<<-MSG.squish)
150
+ ActiveSupport::Multibyte::Chars#normalize is deprecated and will be
151
+ removed from Rails 6.1. Use #unicode_normalize(:#{alias_form}) instead.
152
+ MSG
153
+
154
+ send(:unicode_normalize, alias_form)
155
+ else
156
+ ActiveSupport::Deprecation.warn(<<-MSG.squish)
157
+ ActiveSupport::Multibyte::Chars#normalize is deprecated and will be
158
+ removed from Rails 6.1. Use #unicode_normalize instead.
159
+ MSG
160
+
161
+ raise ArgumentError, "#{form} is not a valid normalization variant", caller
162
+ end
170
163
  end
171
164
 
172
165
  # Performs canonical decomposition on all the characters.
@@ -190,7 +183,7 @@ module ActiveSupport #:nodoc:
190
183
  # 'क्षि'.mb_chars.length # => 4
191
184
  # 'क्षि'.mb_chars.grapheme_length # => 3
192
185
  def grapheme_length
193
- Unicode.unpack_graphemes(@wrapped_string).length
186
+ @wrapped_string.scan(/\X/).length
194
187
  end
195
188
 
196
189
  # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
@@ -206,7 +199,7 @@ module ActiveSupport #:nodoc:
206
199
  to_s.as_json(options)
207
200
  end
208
201
 
209
- %w(capitalize downcase reverse tidy_bytes upcase).each do |method|
202
+ %w(reverse tidy_bytes).each do |method|
210
203
  define_method("#{method}!") do |*args|
211
204
  @wrapped_string = send(method, *args).to_s
212
205
  self
@@ -215,18 +208,6 @@ module ActiveSupport #:nodoc:
215
208
 
216
209
  private
217
210
 
218
- def translate_offset(byte_offset)
219
- return nil if byte_offset.nil?
220
- return 0 if @wrapped_string == ""
221
-
222
- begin
223
- @wrapped_string.byteslice(0...byte_offset).unpack("U*").length
224
- rescue ArgumentError
225
- byte_offset -= 1
226
- retry
227
- end
228
- end
229
-
230
211
  def chars(string)
231
212
  self.class.new(string)
232
213
  end
@@ -6,12 +6,19 @@ module ActiveSupport
6
6
  extend self
7
7
 
8
8
  # A list of all available normalization forms.
9
- # See http://www.unicode.org/reports/tr15/tr15-29.html for more
9
+ # See https://www.unicode.org/reports/tr15/tr15-29.html for more
10
10
  # information about normalization.
11
11
  NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
12
12
 
13
+ NORMALIZATION_FORM_ALIASES = { # :nodoc:
14
+ c: :nfc,
15
+ d: :nfd,
16
+ kc: :nfkc,
17
+ kd: :nfkd
18
+ }
19
+
13
20
  # The Unicode version that is supported by the implementation
14
- UNICODE_VERSION = "9.0.0"
21
+ UNICODE_VERSION = RbConfig::CONFIG["UNICODE_VERSION"]
15
22
 
16
23
  # The default normalization used for operations that require
17
24
  # normalization. It can be set to any of the normalizations
@@ -21,199 +28,44 @@ module ActiveSupport
21
28
  attr_accessor :default_normalization_form
22
29
  @default_normalization_form = :kc
23
30
 
24
- # Hangul character boundaries and properties
25
- HANGUL_SBASE = 0xAC00
26
- HANGUL_LBASE = 0x1100
27
- HANGUL_VBASE = 0x1161
28
- HANGUL_TBASE = 0x11A7
29
- HANGUL_LCOUNT = 19
30
- HANGUL_VCOUNT = 21
31
- HANGUL_TCOUNT = 28
32
- HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT
33
- HANGUL_SCOUNT = 11172
34
- HANGUL_SLAST = HANGUL_SBASE + HANGUL_SCOUNT
35
-
36
- # Detect whether the codepoint is in a certain character class. Returns
37
- # +true+ when it's in the specified character class and +false+ otherwise.
38
- # Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
39
- # <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
40
- #
41
- # Primarily used by the grapheme cluster support.
42
- def in_char_class?(codepoint, classes)
43
- classes.detect { |c| database.boundary[c] === codepoint } ? true : false
44
- end
45
-
46
31
  # Unpack the string at grapheme boundaries. Returns a list of character
47
32
  # lists.
48
33
  #
49
34
  # Unicode.unpack_graphemes('क्षि') # => [[2325, 2381], [2359], [2367]]
50
35
  # Unicode.unpack_graphemes('Café') # => [[67], [97], [102], [233]]
51
36
  def unpack_graphemes(string)
52
- codepoints = string.codepoints.to_a
53
- unpacked = []
54
- pos = 0
55
- marker = 0
56
- eoc = codepoints.length
57
- while (pos < eoc)
58
- pos += 1
59
- previous = codepoints[pos - 1]
60
- current = codepoints[pos]
61
-
62
- # See http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
63
- should_break =
64
- if pos == eoc
65
- true
66
- # GB3. CR X LF
67
- elsif previous == database.boundary[:cr] && current == database.boundary[:lf]
68
- false
69
- # GB4. (Control|CR|LF) ÷
70
- elsif previous && in_char_class?(previous, [:control, :cr, :lf])
71
- true
72
- # GB5. ÷ (Control|CR|LF)
73
- elsif in_char_class?(current, [:control, :cr, :lf])
74
- true
75
- # GB6. L X (L|V|LV|LVT)
76
- elsif database.boundary[:l] === previous && in_char_class?(current, [:l, :v, :lv, :lvt])
77
- false
78
- # GB7. (LV|V) X (V|T)
79
- elsif in_char_class?(previous, [:lv, :v]) && in_char_class?(current, [:v, :t])
80
- false
81
- # GB8. (LVT|T) X (T)
82
- elsif in_char_class?(previous, [:lvt, :t]) && database.boundary[:t] === current
83
- false
84
- # GB9. X (Extend | ZWJ)
85
- elsif in_char_class?(current, [:extend, :zwj])
86
- false
87
- # GB9a. X SpacingMark
88
- elsif database.boundary[:spacingmark] === current
89
- false
90
- # GB9b. Prepend X
91
- elsif database.boundary[:prepend] === previous
92
- false
93
- # GB10. (E_Base | EBG) Extend* X E_Modifier
94
- elsif (marker...pos).any? { |i| in_char_class?(codepoints[i], [:e_base, :e_base_gaz]) && codepoints[i + 1...pos].all? { |c| database.boundary[:extend] === c } } && database.boundary[:e_modifier] === current
95
- false
96
- # GB11. ZWJ X (Glue_After_Zwj | EBG)
97
- elsif database.boundary[:zwj] === previous && in_char_class?(current, [:glue_after_zwj, :e_base_gaz])
98
- false
99
- # GB12. ^ (RI RI)* RI X RI
100
- # GB13. [^RI] (RI RI)* RI X RI
101
- elsif codepoints[marker..pos].all? { |c| database.boundary[:regional_indicator] === c } && codepoints[marker..pos].count { |c| database.boundary[:regional_indicator] === c }.even?
102
- false
103
- # GB999. Any ÷ Any
104
- else
105
- true
106
- end
37
+ ActiveSupport::Deprecation.warn(<<-MSG.squish)
38
+ ActiveSupport::Multibyte::Unicode#unpack_graphemes is deprecated and will be
39
+ removed from Rails 6.1. Use string.scan(/\X/).map(&:codepoints) instead.
40
+ MSG
107
41
 
108
- if should_break
109
- unpacked << codepoints[marker..pos - 1]
110
- marker = pos
111
- end
112
- end
113
- unpacked
42
+ string.scan(/\X/).map(&:codepoints)
114
43
  end
115
44
 
116
45
  # Reverse operation of unpack_graphemes.
117
46
  #
118
47
  # Unicode.pack_graphemes(Unicode.unpack_graphemes('क्षि')) # => 'क्षि'
119
48
  def pack_graphemes(unpacked)
120
- unpacked.flatten.pack("U*")
121
- end
49
+ ActiveSupport::Deprecation.warn(<<-MSG.squish)
50
+ ActiveSupport::Multibyte::Unicode#pack_graphemes is deprecated and will be
51
+ removed from Rails 6.1. Use array.flatten.pack("U*") instead.
52
+ MSG
122
53
 
123
- # Re-order codepoints so the string becomes canonical.
124
- def reorder_characters(codepoints)
125
- length = codepoints.length - 1
126
- pos = 0
127
- while pos < length do
128
- cp1, cp2 = database.codepoints[codepoints[pos]], database.codepoints[codepoints[pos + 1]]
129
- if (cp1.combining_class > cp2.combining_class) && (cp2.combining_class > 0)
130
- codepoints[pos..pos + 1] = cp2.code, cp1.code
131
- pos += (pos > 0 ? -1 : 1)
132
- else
133
- pos += 1
134
- end
135
- end
136
- codepoints
54
+ unpacked.flatten.pack("U*")
137
55
  end
138
56
 
139
57
  # Decompose composed characters to the decomposed form.
140
58
  def decompose(type, codepoints)
141
- codepoints.inject([]) do |decomposed, cp|
142
- # if it's a hangul syllable starter character
143
- if HANGUL_SBASE <= cp && cp < HANGUL_SLAST
144
- sindex = cp - HANGUL_SBASE
145
- ncp = [] # new codepoints
146
- ncp << HANGUL_LBASE + sindex / HANGUL_NCOUNT
147
- ncp << HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
148
- tindex = sindex % HANGUL_TCOUNT
149
- ncp << (HANGUL_TBASE + tindex) unless tindex == 0
150
- decomposed.concat ncp
151
- # if the codepoint is decomposable in with the current decomposition type
152
- elsif (ncp = database.codepoints[cp].decomp_mapping) && (!database.codepoints[cp].decomp_type || type == :compatibility)
153
- decomposed.concat decompose(type, ncp.dup)
154
- else
155
- decomposed << cp
156
- end
59
+ if type == :compatibility
60
+ codepoints.pack("U*").unicode_normalize(:nfkd).codepoints
61
+ else
62
+ codepoints.pack("U*").unicode_normalize(:nfd).codepoints
157
63
  end
158
64
  end
159
65
 
160
66
  # Compose decomposed characters to the composed form.
161
67
  def compose(codepoints)
162
- pos = 0
163
- eoa = codepoints.length - 1
164
- starter_pos = 0
165
- starter_char = codepoints[0]
166
- previous_combining_class = -1
167
- while pos < eoa
168
- pos += 1
169
- lindex = starter_char - HANGUL_LBASE
170
- # -- Hangul
171
- if 0 <= lindex && lindex < HANGUL_LCOUNT
172
- vindex = codepoints[starter_pos + 1] - HANGUL_VBASE rescue vindex = -1
173
- if 0 <= vindex && vindex < HANGUL_VCOUNT
174
- tindex = codepoints[starter_pos + 2] - HANGUL_TBASE rescue tindex = -1
175
- if 0 <= tindex && tindex < HANGUL_TCOUNT
176
- j = starter_pos + 2
177
- eoa -= 2
178
- else
179
- tindex = 0
180
- j = starter_pos + 1
181
- eoa -= 1
182
- end
183
- codepoints[starter_pos..j] = (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT + tindex + HANGUL_SBASE
184
- end
185
- starter_pos += 1
186
- starter_char = codepoints[starter_pos]
187
- # -- Other characters
188
- else
189
- current_char = codepoints[pos]
190
- current = database.codepoints[current_char]
191
- if current.combining_class > previous_combining_class
192
- if ref = database.composition_map[starter_char]
193
- composition = ref[current_char]
194
- else
195
- composition = nil
196
- end
197
- unless composition.nil?
198
- codepoints[starter_pos] = composition
199
- starter_char = composition
200
- codepoints.delete_at pos
201
- eoa -= 1
202
- pos -= 1
203
- previous_combining_class = -1
204
- else
205
- previous_combining_class = current.combining_class
206
- end
207
- else
208
- previous_combining_class = current.combining_class
209
- end
210
- if current.combining_class == 0
211
- starter_pos = pos
212
- starter_char = codepoints[pos]
213
- end
214
- end
215
- end
216
- codepoints
68
+ codepoints.pack("U*").unicode_normalize(:nfc).codepoints
217
69
  end
218
70
 
219
71
  # Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars.
@@ -265,130 +117,41 @@ module ActiveSupport
265
117
  # Default is ActiveSupport::Multibyte::Unicode.default_normalization_form.
266
118
  def normalize(string, form = nil)
267
119
  form ||= @default_normalization_form
268
- # See http://www.unicode.org/reports/tr15, Table 1
269
- codepoints = string.codepoints.to_a
270
- case form
271
- when :d
272
- reorder_characters(decompose(:canonical, codepoints))
273
- when :c
274
- compose(reorder_characters(decompose(:canonical, codepoints)))
275
- when :kd
276
- reorder_characters(decompose(:compatibility, codepoints))
277
- when :kc
278
- compose(reorder_characters(decompose(:compatibility, codepoints)))
279
- else
280
- raise ArgumentError, "#{form} is not a valid normalization variant", caller
281
- end.pack("U*".freeze)
282
- end
283
-
284
- def downcase(string)
285
- apply_mapping string, :lowercase_mapping
286
- end
287
120
 
288
- def upcase(string)
289
- apply_mapping string, :uppercase_mapping
290
- end
121
+ # See https://www.unicode.org/reports/tr15, Table 1
122
+ if alias_form = NORMALIZATION_FORM_ALIASES[form]
123
+ ActiveSupport::Deprecation.warn(<<-MSG.squish)
124
+ ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be
125
+ removed from Rails 6.1. Use String#unicode_normalize(:#{alias_form}) instead.
126
+ MSG
291
127
 
292
- def swapcase(string)
293
- apply_mapping string, :swapcase_mapping
294
- end
295
-
296
- # Holds data about a codepoint in the Unicode database.
297
- class Codepoint
298
- attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
299
-
300
- # Initializing Codepoint object with default values
301
- def initialize
302
- @combining_class = 0
303
- @uppercase_mapping = 0
304
- @lowercase_mapping = 0
305
- end
128
+ string.unicode_normalize(alias_form)
129
+ else
130
+ ActiveSupport::Deprecation.warn(<<-MSG.squish)
131
+ ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be
132
+ removed from Rails 6.1. Use String#unicode_normalize instead.
133
+ MSG
306
134
 
307
- def swapcase_mapping
308
- uppercase_mapping > 0 ? uppercase_mapping : lowercase_mapping
135
+ raise ArgumentError, "#{form} is not a valid normalization variant", caller
309
136
  end
310
137
  end
311
138
 
312
- # Holds static data from the Unicode database.
313
- class UnicodeDatabase
314
- ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
315
-
316
- attr_writer(*ATTRIBUTES)
139
+ %w(downcase upcase swapcase).each do |method|
140
+ define_method(method) do |string|
141
+ ActiveSupport::Deprecation.warn(<<-MSG.squish)
142
+ ActiveSupport::Multibyte::Unicode##{method} is deprecated and
143
+ will be removed from Rails 6.1. Use String methods directly.
144
+ MSG
317
145
 
318
- def initialize
319
- @codepoints = Hash.new(Codepoint.new)
320
- @composition_exclusion = []
321
- @composition_map = {}
322
- @boundary = {}
323
- @cp1252 = {}
324
- end
325
-
326
- # Lazy load the Unicode database so it's only loaded when it's actually used
327
- ATTRIBUTES.each do |attr_name|
328
- class_eval(<<-EOS, __FILE__, __LINE__ + 1)
329
- def #{attr_name} # def codepoints
330
- load # load
331
- @#{attr_name} # @codepoints
332
- end # end
333
- EOS
334
- end
335
-
336
- # Loads the Unicode database and returns all the internal objects of
337
- # UnicodeDatabase.
338
- def load
339
- begin
340
- @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, "rb") { |f| Marshal.load f.read }
341
- rescue => e
342
- raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
343
- end
344
-
345
- # Redefine the === method so we can write shorter rules for grapheme cluster breaks
346
- @boundary.each_key do |k|
347
- @boundary[k].instance_eval do
348
- def ===(other)
349
- detect { |i| i === other } ? true : false
350
- end
351
- end if @boundary[k].kind_of?(Array)
352
- end
353
-
354
- # define attr_reader methods for the instance variables
355
- class << self
356
- attr_reader(*ATTRIBUTES)
357
- end
358
- end
359
-
360
- # Returns the directory in which the data files are stored.
361
- def self.dirname
362
- File.expand_path("../values", __dir__)
363
- end
364
-
365
- # Returns the filename for the data file for this version.
366
- def self.filename
367
- File.expand_path File.join(dirname, "unicode_tables.dat")
146
+ string.send(method)
368
147
  end
369
148
  end
370
149
 
371
150
  private
372
151
 
373
- def apply_mapping(string, mapping)
374
- database.codepoints
375
- string.each_codepoint.map do |codepoint|
376
- cp = database.codepoints[codepoint]
377
- if cp && (ncp = cp.send(mapping)) && ncp > 0
378
- ncp
379
- else
380
- codepoint
381
- end
382
- end.pack("U*")
383
- end
384
-
385
152
  def recode_windows1252_chars(string)
386
153
  string.encode(Encoding::UTF_8, Encoding::Windows_1252, invalid: :replace, undef: :replace)
387
154
  end
388
-
389
- def database
390
- @database ||= UnicodeDatabase.new
391
- end
392
155
  end
393
156
  end
394
157
  end