activesupport 5.2.4.3 → 7.0.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of activesupport might be problematic. Click here for more details.

Files changed (228) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +244 -459
  3. data/MIT-LICENSE +1 -1
  4. data/README.rdoc +4 -3
  5. data/lib/active_support/actionable_error.rb +48 -0
  6. data/lib/active_support/array_inquirer.rb +2 -2
  7. data/lib/active_support/backtrace_cleaner.rb +31 -5
  8. data/lib/active_support/benchmarkable.rb +3 -3
  9. data/lib/active_support/cache/file_store.rb +47 -41
  10. data/lib/active_support/cache/mem_cache_store.rb +151 -40
  11. data/lib/active_support/cache/memory_store.rb +68 -34
  12. data/lib/active_support/cache/null_store.rb +16 -3
  13. data/lib/active_support/cache/redis_cache_store.rb +103 -101
  14. data/lib/active_support/cache/strategy/local_cache.rb +56 -64
  15. data/lib/active_support/cache.rb +333 -116
  16. data/lib/active_support/callbacks.rb +244 -128
  17. data/lib/active_support/code_generator.rb +65 -0
  18. data/lib/active_support/concern.rb +72 -5
  19. data/lib/active_support/concurrency/load_interlock_aware_monitor.rb +16 -0
  20. data/lib/active_support/concurrency/share_lock.rb +2 -3
  21. data/lib/active_support/configurable.rb +15 -16
  22. data/lib/active_support/configuration_file.rb +51 -0
  23. data/lib/active_support/core_ext/array/access.rb +15 -7
  24. data/lib/active_support/core_ext/array/conversions.rb +18 -17
  25. data/lib/active_support/core_ext/array/deprecated_conversions.rb +25 -0
  26. data/lib/active_support/core_ext/array/extract.rb +21 -0
  27. data/lib/active_support/core_ext/array/grouping.rb +6 -6
  28. data/lib/active_support/core_ext/array/inquiry.rb +2 -2
  29. data/lib/active_support/core_ext/array.rb +2 -1
  30. data/lib/active_support/core_ext/benchmark.rb +2 -2
  31. data/lib/active_support/core_ext/big_decimal/conversions.rb +1 -1
  32. data/lib/active_support/core_ext/class/attribute.rb +32 -47
  33. data/lib/active_support/core_ext/class/subclasses.rb +9 -22
  34. data/lib/active_support/core_ext/date/blank.rb +1 -1
  35. data/lib/active_support/core_ext/date/calculations.rb +15 -14
  36. data/lib/active_support/core_ext/date/conversions.rb +16 -15
  37. data/lib/active_support/core_ext/date/deprecated_conversions.rb +26 -0
  38. data/lib/active_support/core_ext/date.rb +1 -0
  39. data/lib/active_support/core_ext/date_and_time/calculations.rb +41 -51
  40. data/lib/active_support/core_ext/date_and_time/compatibility.rb +15 -0
  41. data/lib/active_support/core_ext/date_and_time/zones.rb +0 -1
  42. data/lib/active_support/core_ext/date_time/blank.rb +1 -1
  43. data/lib/active_support/core_ext/date_time/calculations.rb +1 -1
  44. data/lib/active_support/core_ext/date_time/conversions.rb +13 -14
  45. data/lib/active_support/core_ext/date_time/deprecated_conversions.rb +22 -0
  46. data/lib/active_support/core_ext/date_time.rb +1 -0
  47. data/lib/active_support/core_ext/digest/uuid.rb +39 -13
  48. data/lib/active_support/core_ext/enumerable.rb +241 -76
  49. data/lib/active_support/core_ext/file/atomic.rb +3 -1
  50. data/lib/active_support/core_ext/hash/conversions.rb +3 -4
  51. data/lib/active_support/core_ext/hash/deep_transform_values.rb +46 -0
  52. data/lib/active_support/core_ext/hash/except.rb +2 -2
  53. data/lib/active_support/core_ext/hash/indifferent_access.rb +3 -3
  54. data/lib/active_support/core_ext/hash/keys.rb +2 -31
  55. data/lib/active_support/core_ext/hash/slice.rb +6 -27
  56. data/lib/active_support/core_ext/hash.rb +1 -2
  57. data/lib/active_support/core_ext/integer/multiple.rb +1 -1
  58. data/lib/active_support/core_ext/kernel/reporting.rb +4 -4
  59. data/lib/active_support/core_ext/kernel/singleton_class.rb +1 -1
  60. data/lib/active_support/core_ext/kernel.rb +0 -1
  61. data/lib/active_support/core_ext/load_error.rb +1 -1
  62. data/lib/active_support/core_ext/module/attr_internal.rb +2 -2
  63. data/lib/active_support/core_ext/module/attribute_accessors.rb +32 -39
  64. data/lib/active_support/core_ext/module/attribute_accessors_per_thread.rb +35 -28
  65. data/lib/active_support/core_ext/module/concerning.rb +8 -2
  66. data/lib/active_support/core_ext/module/delegation.rb +70 -33
  67. data/lib/active_support/core_ext/module/introspection.rb +16 -15
  68. data/lib/active_support/core_ext/module/redefine_method.rb +8 -17
  69. data/lib/active_support/core_ext/module.rb +0 -1
  70. data/lib/active_support/core_ext/name_error.rb +23 -2
  71. data/lib/active_support/core_ext/numeric/conversions.rb +132 -129
  72. data/lib/active_support/core_ext/numeric/deprecated_conversions.rb +60 -0
  73. data/lib/active_support/core_ext/numeric.rb +1 -1
  74. data/lib/active_support/core_ext/object/acts_like.rb +29 -5
  75. data/lib/active_support/core_ext/object/blank.rb +3 -4
  76. data/lib/active_support/core_ext/object/deep_dup.rb +1 -1
  77. data/lib/active_support/core_ext/object/duplicable.rb +14 -110
  78. data/lib/active_support/core_ext/object/json.rb +44 -27
  79. data/lib/active_support/core_ext/object/to_query.rb +2 -2
  80. data/lib/active_support/core_ext/object/try.rb +24 -14
  81. data/lib/active_support/core_ext/object/with_options.rb +21 -2
  82. data/lib/active_support/core_ext/pathname/existence.rb +21 -0
  83. data/lib/active_support/core_ext/pathname.rb +3 -0
  84. data/lib/active_support/core_ext/range/compare_range.rb +23 -27
  85. data/lib/active_support/core_ext/range/conversions.rb +32 -30
  86. data/lib/active_support/core_ext/range/deprecated_conversions.rb +26 -0
  87. data/lib/active_support/core_ext/range/each.rb +1 -2
  88. data/lib/active_support/core_ext/range/include_time_with_zone.rb +4 -20
  89. data/lib/active_support/core_ext/range/overlaps.rb +1 -1
  90. data/lib/active_support/core_ext/range.rb +1 -1
  91. data/lib/active_support/core_ext/regexp.rb +8 -5
  92. data/lib/active_support/core_ext/securerandom.rb +23 -3
  93. data/lib/active_support/core_ext/string/access.rb +5 -16
  94. data/lib/active_support/core_ext/string/conversions.rb +3 -2
  95. data/lib/active_support/core_ext/string/filters.rb +42 -1
  96. data/lib/active_support/core_ext/string/inflections.rb +46 -7
  97. data/lib/active_support/core_ext/string/inquiry.rb +2 -1
  98. data/lib/active_support/core_ext/string/multibyte.rb +6 -5
  99. data/lib/active_support/core_ext/string/output_safety.rb +129 -20
  100. data/lib/active_support/core_ext/string/starts_ends_with.rb +2 -2
  101. data/lib/active_support/core_ext/string/strip.rb +3 -1
  102. data/lib/active_support/core_ext/symbol/starts_ends_with.rb +6 -0
  103. data/lib/active_support/core_ext/symbol.rb +3 -0
  104. data/lib/active_support/core_ext/time/calculations.rb +59 -10
  105. data/lib/active_support/core_ext/time/conversions.rb +15 -12
  106. data/lib/active_support/core_ext/time/deprecated_conversions.rb +22 -0
  107. data/lib/active_support/core_ext/time/zones.rb +7 -22
  108. data/lib/active_support/core_ext/time.rb +1 -0
  109. data/lib/active_support/core_ext/uri.rb +3 -22
  110. data/lib/active_support/core_ext.rb +2 -1
  111. data/lib/active_support/current_attributes/test_helper.rb +13 -0
  112. data/lib/active_support/current_attributes.rb +47 -16
  113. data/lib/active_support/dependencies/interlock.rb +10 -18
  114. data/lib/active_support/dependencies/require_dependency.rb +28 -0
  115. data/lib/active_support/dependencies.rb +60 -715
  116. data/lib/active_support/deprecation/behaviors.rb +21 -5
  117. data/lib/active_support/deprecation/disallowed.rb +56 -0
  118. data/lib/active_support/deprecation/instance_delegator.rb +0 -1
  119. data/lib/active_support/deprecation/method_wrappers.rb +18 -23
  120. data/lib/active_support/deprecation/proxy_wrappers.rb +31 -8
  121. data/lib/active_support/deprecation/reporting.rb +50 -7
  122. data/lib/active_support/deprecation.rb +7 -2
  123. data/lib/active_support/descendants_tracker.rb +190 -34
  124. data/lib/active_support/digest.rb +5 -3
  125. data/lib/active_support/duration/iso8601_parser.rb +5 -7
  126. data/lib/active_support/duration/iso8601_serializer.rb +27 -15
  127. data/lib/active_support/duration.rb +149 -67
  128. data/lib/active_support/encrypted_configuration.rb +12 -5
  129. data/lib/active_support/encrypted_file.rb +23 -5
  130. data/lib/active_support/environment_inquirer.rb +20 -0
  131. data/lib/active_support/error_reporter.rb +117 -0
  132. data/lib/active_support/evented_file_update_checker.rb +85 -122
  133. data/lib/active_support/execution_context/test_helper.rb +13 -0
  134. data/lib/active_support/execution_context.rb +53 -0
  135. data/lib/active_support/execution_wrapper.rb +44 -21
  136. data/lib/active_support/executor/test_helper.rb +7 -0
  137. data/lib/active_support/file_update_checker.rb +0 -1
  138. data/lib/active_support/fork_tracker.rb +71 -0
  139. data/lib/active_support/gem_version.rb +5 -5
  140. data/lib/active_support/hash_with_indifferent_access.rb +73 -43
  141. data/lib/active_support/html_safe_translation.rb +43 -0
  142. data/lib/active_support/i18n.rb +2 -0
  143. data/lib/active_support/i18n_railtie.rb +15 -8
  144. data/lib/active_support/inflector/inflections.rb +25 -14
  145. data/lib/active_support/inflector/methods.rb +38 -71
  146. data/lib/active_support/inflector/transliterate.rb +47 -18
  147. data/lib/active_support/isolated_execution_state.rb +72 -0
  148. data/lib/active_support/json/decoding.rb +25 -26
  149. data/lib/active_support/json/encoding.rb +14 -6
  150. data/lib/active_support/key_generator.rb +23 -38
  151. data/lib/active_support/lazy_load_hooks.rb +19 -5
  152. data/lib/active_support/locale/en.rb +33 -0
  153. data/lib/active_support/locale/en.yml +8 -4
  154. data/lib/active_support/log_subscriber/test_helper.rb +2 -2
  155. data/lib/active_support/log_subscriber.rb +51 -11
  156. data/lib/active_support/logger.rb +6 -22
  157. data/lib/active_support/logger_silence.rb +11 -19
  158. data/lib/active_support/logger_thread_safe_level.rb +45 -10
  159. data/lib/active_support/message_encryptor.rb +20 -19
  160. data/lib/active_support/message_verifier.rb +53 -21
  161. data/lib/active_support/messages/metadata.rb +13 -4
  162. data/lib/active_support/messages/rotation_configuration.rb +2 -1
  163. data/lib/active_support/messages/rotator.rb +10 -9
  164. data/lib/active_support/multibyte/chars.rb +17 -76
  165. data/lib/active_support/multibyte/unicode.rb +7 -331
  166. data/lib/active_support/multibyte.rb +1 -1
  167. data/lib/active_support/notifications/fanout.rb +163 -37
  168. data/lib/active_support/notifications/instrumenter.rb +90 -11
  169. data/lib/active_support/notifications.rb +88 -30
  170. data/lib/active_support/number_helper/number_converter.rb +6 -9
  171. data/lib/active_support/number_helper/number_to_currency_converter.rb +12 -12
  172. data/lib/active_support/number_helper/number_to_delimited_converter.rb +4 -3
  173. data/lib/active_support/number_helper/number_to_human_converter.rb +4 -3
  174. data/lib/active_support/number_helper/number_to_human_size_converter.rb +5 -4
  175. data/lib/active_support/number_helper/number_to_percentage_converter.rb +3 -1
  176. data/lib/active_support/number_helper/number_to_phone_converter.rb +3 -2
  177. data/lib/active_support/number_helper/number_to_rounded_converter.rb +12 -7
  178. data/lib/active_support/number_helper/rounding_helper.rb +12 -32
  179. data/lib/active_support/number_helper.rb +36 -12
  180. data/lib/active_support/option_merger.rb +15 -4
  181. data/lib/active_support/ordered_hash.rb +2 -2
  182. data/lib/active_support/ordered_options.rb +14 -4
  183. data/lib/active_support/parameter_filter.rb +138 -0
  184. data/lib/active_support/per_thread_registry.rb +6 -1
  185. data/lib/active_support/rails.rb +1 -10
  186. data/lib/active_support/railtie.rb +77 -5
  187. data/lib/active_support/reloader.rb +5 -6
  188. data/lib/active_support/rescuable.rb +8 -8
  189. data/lib/active_support/ruby_features.rb +7 -0
  190. data/lib/active_support/secure_compare_rotator.rb +51 -0
  191. data/lib/active_support/security_utils.rb +19 -12
  192. data/lib/active_support/string_inquirer.rb +2 -3
  193. data/lib/active_support/subscriber.rb +79 -46
  194. data/lib/active_support/tagged_logging.rb +58 -9
  195. data/lib/active_support/test_case.rb +79 -0
  196. data/lib/active_support/testing/assertions.rb +62 -11
  197. data/lib/active_support/testing/deprecation.rb +52 -2
  198. data/lib/active_support/testing/file_fixtures.rb +2 -0
  199. data/lib/active_support/testing/isolation.rb +4 -4
  200. data/lib/active_support/testing/method_call_assertions.rb +32 -5
  201. data/lib/active_support/testing/parallelization/server.rb +82 -0
  202. data/lib/active_support/testing/parallelization/worker.rb +103 -0
  203. data/lib/active_support/testing/parallelization.rb +55 -0
  204. data/lib/active_support/testing/parallelize_executor.rb +76 -0
  205. data/lib/active_support/testing/stream.rb +4 -7
  206. data/lib/active_support/testing/tagged_logging.rb +1 -1
  207. data/lib/active_support/testing/time_helpers.rb +60 -14
  208. data/lib/active_support/time_with_zone.rb +139 -64
  209. data/lib/active_support/values/time_zone.rb +66 -30
  210. data/lib/active_support/version.rb +1 -1
  211. data/lib/active_support/xml_mini/jdom.rb +3 -4
  212. data/lib/active_support/xml_mini/libxml.rb +7 -7
  213. data/lib/active_support/xml_mini/libxmlsax.rb +5 -5
  214. data/lib/active_support/xml_mini/nokogiri.rb +6 -6
  215. data/lib/active_support/xml_mini/nokogirisax.rb +4 -4
  216. data/lib/active_support/xml_mini/rexml.rb +11 -4
  217. data/lib/active_support/xml_mini.rb +7 -14
  218. data/lib/active_support.rb +30 -1
  219. metadata +64 -35
  220. data/lib/active_support/core_ext/array/prepend_and_append.rb +0 -9
  221. data/lib/active_support/core_ext/hash/compact.rb +0 -29
  222. data/lib/active_support/core_ext/hash/transform_values.rb +0 -32
  223. data/lib/active_support/core_ext/kernel/agnostics.rb +0 -13
  224. data/lib/active_support/core_ext/marshal.rb +0 -24
  225. data/lib/active_support/core_ext/module/reachable.rb +0 -11
  226. data/lib/active_support/core_ext/numeric/inquiry.rb +0 -28
  227. data/lib/active_support/core_ext/range/include_range.rb +0 -3
  228. data/lib/active_support/values/unicode_tables.dat +0 -0
@@ -4,10 +4,9 @@ require "active_support/json"
4
4
  require "active_support/core_ext/string/access"
5
5
  require "active_support/core_ext/string/behavior"
6
6
  require "active_support/core_ext/module/delegation"
7
- require "active_support/core_ext/regexp"
8
7
 
9
- module ActiveSupport #:nodoc:
10
- module Multibyte #:nodoc:
8
+ module ActiveSupport # :nodoc:
9
+ module Multibyte # :nodoc:
11
10
  # Chars enables you to work transparently with UTF-8 encoding in the Ruby
12
11
  # String class without having extensive knowledge about the encoding. A
13
12
  # Chars object accepts a string upon initialization and proxies String
@@ -18,7 +17,7 @@ module ActiveSupport #:nodoc:
18
17
  # through the +mb_chars+ method. Methods which would normally return a
19
18
  # String object now return a Chars object so methods can be chained.
20
19
  #
21
- # 'The Perfect String '.mb_chars.downcase.strip.normalize
20
+ # 'The Perfect String '.mb_chars.downcase.strip
22
21
  # # => #<ActiveSupport::Multibyte::Chars:0x007fdc434ccc10 @wrapped_string="the perfect string">
23
22
  #
24
23
  # Chars objects are perfectly interchangeable with String objects as long as
@@ -49,7 +48,7 @@ module ActiveSupport #:nodoc:
49
48
  alias to_s wrapped_string
50
49
  alias to_str wrapped_string
51
50
 
52
- delegate :<=>, :=~, :acts_like_string?, to: :wrapped_string
51
+ delegate :<=>, :=~, :match?, :acts_like_string?, to: :wrapped_string
53
52
 
54
53
  # Creates a new Chars instance by wrapping _string_.
55
54
  def initialize(string)
@@ -60,7 +59,7 @@ module ActiveSupport #:nodoc:
60
59
  # Forward all undefined methods to the wrapped string.
61
60
  def method_missing(method, *args, &block)
62
61
  result = @wrapped_string.__send__(method, *args, &block)
63
- if /!$/.match?(method)
62
+ if method.end_with?("!")
64
63
  self if result
65
64
  else
66
65
  result.kind_of?(String) ? chars(result) : result
@@ -74,12 +73,6 @@ module ActiveSupport #:nodoc:
74
73
  @wrapped_string.respond_to?(method, include_private)
75
74
  end
76
75
 
77
- # Returns +true+ when the proxy class can handle the string. Returns
78
- # +false+ otherwise.
79
- def self.consumes?(string)
80
- string.encoding == Encoding::UTF_8
81
- end
82
-
83
76
  # Works just like <tt>String#split</tt>, with the exception that the items
84
77
  # in the resulting list are Chars instances instead of String. This makes
85
78
  # chaining methods easier.
@@ -109,7 +102,7 @@ module ActiveSupport #:nodoc:
109
102
  #
110
103
  # 'Café'.mb_chars.reverse.to_s # => 'éfaC'
111
104
  def reverse
112
- chars(Unicode.unpack_graphemes(@wrapped_string).reverse.flatten.pack("U*"))
105
+ chars(@wrapped_string.grapheme_clusters.reverse.join)
113
106
  end
114
107
 
115
108
  # Limits the byte size of the string to a number of bytes without breaking
@@ -118,35 +111,7 @@ module ActiveSupport #:nodoc:
118
111
  #
119
112
  # 'こんにちは'.mb_chars.limit(7).to_s # => "こん"
120
113
  def limit(limit)
121
- slice(0...translate_offset(limit))
122
- end
123
-
124
- # Converts characters in the string to uppercase.
125
- #
126
- # 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s # => "LAURENT, OÙ SONT LES TESTS ?"
127
- def upcase
128
- chars Unicode.upcase(@wrapped_string)
129
- end
130
-
131
- # Converts characters in the string to lowercase.
132
- #
133
- # 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s # => "věda a výzkum"
134
- def downcase
135
- chars Unicode.downcase(@wrapped_string)
136
- end
137
-
138
- # Converts characters in the string to the opposite case.
139
- #
140
- # 'El Cañón'.mb_chars.swapcase.to_s # => "eL cAÑÓN"
141
- def swapcase
142
- chars Unicode.swapcase(@wrapped_string)
143
- end
144
-
145
- # Converts the first character to uppercase and the remainder to lowercase.
146
- #
147
- # 'über'.mb_chars.capitalize.to_s # => "Über"
148
- def capitalize
149
- (slice(0) || chars("")).upcase + (slice(1..-1) || chars("")).downcase
114
+ chars(@wrapped_string.truncate_bytes(limit, omission: nil))
150
115
  end
151
116
 
152
117
  # Capitalizes the first letter of every word, when possible.
@@ -154,33 +119,22 @@ module ActiveSupport #:nodoc:
154
119
  # "ÉL QUE SE ENTERÓ".mb_chars.titleize.to_s # => "Él Que Se Enteró"
155
120
  # "日本語".mb_chars.titleize.to_s # => "日本語"
156
121
  def titleize
157
- chars(downcase.to_s.gsub(/\b('?\S)/u) { Unicode.upcase($1) })
122
+ chars(downcase.to_s.gsub(/\b('?\S)/u) { $1.upcase })
158
123
  end
159
124
  alias_method :titlecase, :titleize
160
125
 
161
- # Returns the KC normalization of the string by default. NFKC is
162
- # considered the best normalization form for passing strings to databases
163
- # and validations.
164
- #
165
- # * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
166
- # <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
167
- # ActiveSupport::Multibyte::Unicode.default_normalization_form
168
- def normalize(form = nil)
169
- chars(Unicode.normalize(@wrapped_string, form))
170
- end
171
-
172
126
  # Performs canonical decomposition on all the characters.
173
127
  #
174
- # 'é'.length # => 2
175
- # 'é'.mb_chars.decompose.to_s.length # => 3
128
+ # 'é'.length # => 1
129
+ # 'é'.mb_chars.decompose.to_s.length # => 2
176
130
  def decompose
177
131
  chars(Unicode.decompose(:canonical, @wrapped_string.codepoints.to_a).pack("U*"))
178
132
  end
179
133
 
180
134
  # Performs composition on all the characters.
181
135
  #
182
- # 'é'.length # => 3
183
- # 'é'.mb_chars.compose.to_s.length # => 2
136
+ # 'é'.length # => 1
137
+ # 'é'.mb_chars.compose.to_s.length # => 1
184
138
  def compose
185
139
  chars(Unicode.compose(@wrapped_string.codepoints.to_a).pack("U*"))
186
140
  end
@@ -188,9 +142,9 @@ module ActiveSupport #:nodoc:
188
142
  # Returns the number of grapheme clusters in the string.
189
143
  #
190
144
  # 'क्षि'.mb_chars.length # => 4
191
- # 'क्षि'.mb_chars.grapheme_length # => 3
145
+ # 'क्षि'.mb_chars.grapheme_length # => 2
192
146
  def grapheme_length
193
- Unicode.unpack_graphemes(@wrapped_string).length
147
+ @wrapped_string.grapheme_clusters.length
194
148
  end
195
149
 
196
150
  # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
@@ -202,31 +156,18 @@ module ActiveSupport #:nodoc:
202
156
  chars(Unicode.tidy_bytes(@wrapped_string, force))
203
157
  end
204
158
 
205
- def as_json(options = nil) #:nodoc:
159
+ def as_json(options = nil) # :nodoc:
206
160
  to_s.as_json(options)
207
161
  end
208
162
 
209
- %w(capitalize downcase reverse tidy_bytes upcase).each do |method|
163
+ %w(reverse tidy_bytes).each do |method|
210
164
  define_method("#{method}!") do |*args|
211
- @wrapped_string = send(method, *args).to_s
165
+ @wrapped_string = public_send(method, *args).to_s
212
166
  self
213
167
  end
214
168
  end
215
169
 
216
170
  private
217
-
218
- def translate_offset(byte_offset)
219
- return nil if byte_offset.nil?
220
- return 0 if @wrapped_string == ""
221
-
222
- begin
223
- @wrapped_string.byteslice(0...byte_offset).unpack("U*").length
224
- rescue ArgumentError
225
- byte_offset -= 1
226
- retry
227
- end
228
- end
229
-
230
171
  def chars(string)
231
172
  self.class.new(string)
232
173
  end
@@ -5,215 +5,21 @@ module ActiveSupport
5
5
  module Unicode
6
6
  extend self
7
7
 
8
- # A list of all available normalization forms.
9
- # See http://www.unicode.org/reports/tr15/tr15-29.html for more
10
- # information about normalization.
11
- NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
12
-
13
8
  # The Unicode version that is supported by the implementation
14
- UNICODE_VERSION = "9.0.0"
15
-
16
- # The default normalization used for operations that require
17
- # normalization. It can be set to any of the normalizations
18
- # in NORMALIZATION_FORMS.
19
- #
20
- # ActiveSupport::Multibyte::Unicode.default_normalization_form = :c
21
- attr_accessor :default_normalization_form
22
- @default_normalization_form = :kc
23
-
24
- # Hangul character boundaries and properties
25
- HANGUL_SBASE = 0xAC00
26
- HANGUL_LBASE = 0x1100
27
- HANGUL_VBASE = 0x1161
28
- HANGUL_TBASE = 0x11A7
29
- HANGUL_LCOUNT = 19
30
- HANGUL_VCOUNT = 21
31
- HANGUL_TCOUNT = 28
32
- HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT
33
- HANGUL_SCOUNT = 11172
34
- HANGUL_SLAST = HANGUL_SBASE + HANGUL_SCOUNT
35
-
36
- # Detect whether the codepoint is in a certain character class. Returns
37
- # +true+ when it's in the specified character class and +false+ otherwise.
38
- # Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
39
- # <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
40
- #
41
- # Primarily used by the grapheme cluster support.
42
- def in_char_class?(codepoint, classes)
43
- classes.detect { |c| database.boundary[c] === codepoint } ? true : false
44
- end
45
-
46
- # Unpack the string at grapheme boundaries. Returns a list of character
47
- # lists.
48
- #
49
- # Unicode.unpack_graphemes('क्षि') # => [[2325, 2381], [2359], [2367]]
50
- # Unicode.unpack_graphemes('Café') # => [[67], [97], [102], [233]]
51
- def unpack_graphemes(string)
52
- codepoints = string.codepoints.to_a
53
- unpacked = []
54
- pos = 0
55
- marker = 0
56
- eoc = codepoints.length
57
- while (pos < eoc)
58
- pos += 1
59
- previous = codepoints[pos - 1]
60
- current = codepoints[pos]
61
-
62
- # See http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
63
- should_break =
64
- if pos == eoc
65
- true
66
- # GB3. CR X LF
67
- elsif previous == database.boundary[:cr] && current == database.boundary[:lf]
68
- false
69
- # GB4. (Control|CR|LF) ÷
70
- elsif previous && in_char_class?(previous, [:control, :cr, :lf])
71
- true
72
- # GB5. ÷ (Control|CR|LF)
73
- elsif in_char_class?(current, [:control, :cr, :lf])
74
- true
75
- # GB6. L X (L|V|LV|LVT)
76
- elsif database.boundary[:l] === previous && in_char_class?(current, [:l, :v, :lv, :lvt])
77
- false
78
- # GB7. (LV|V) X (V|T)
79
- elsif in_char_class?(previous, [:lv, :v]) && in_char_class?(current, [:v, :t])
80
- false
81
- # GB8. (LVT|T) X (T)
82
- elsif in_char_class?(previous, [:lvt, :t]) && database.boundary[:t] === current
83
- false
84
- # GB9. X (Extend | ZWJ)
85
- elsif in_char_class?(current, [:extend, :zwj])
86
- false
87
- # GB9a. X SpacingMark
88
- elsif database.boundary[:spacingmark] === current
89
- false
90
- # GB9b. Prepend X
91
- elsif database.boundary[:prepend] === previous
92
- false
93
- # GB10. (E_Base | EBG) Extend* X E_Modifier
94
- elsif (marker...pos).any? { |i| in_char_class?(codepoints[i], [:e_base, :e_base_gaz]) && codepoints[i + 1...pos].all? { |c| database.boundary[:extend] === c } } && database.boundary[:e_modifier] === current
95
- false
96
- # GB11. ZWJ X (Glue_After_Zwj | EBG)
97
- elsif database.boundary[:zwj] === previous && in_char_class?(current, [:glue_after_zwj, :e_base_gaz])
98
- false
99
- # GB12. ^ (RI RI)* RI X RI
100
- # GB13. [^RI] (RI RI)* RI X RI
101
- elsif codepoints[marker..pos].all? { |c| database.boundary[:regional_indicator] === c } && codepoints[marker..pos].count { |c| database.boundary[:regional_indicator] === c }.even?
102
- false
103
- # GB999. Any ÷ Any
104
- else
105
- true
106
- end
107
-
108
- if should_break
109
- unpacked << codepoints[marker..pos - 1]
110
- marker = pos
111
- end
112
- end
113
- unpacked
114
- end
115
-
116
- # Reverse operation of unpack_graphemes.
117
- #
118
- # Unicode.pack_graphemes(Unicode.unpack_graphemes('क्षि')) # => 'क्षि'
119
- def pack_graphemes(unpacked)
120
- unpacked.flatten.pack("U*")
121
- end
122
-
123
- # Re-order codepoints so the string becomes canonical.
124
- def reorder_characters(codepoints)
125
- length = codepoints.length - 1
126
- pos = 0
127
- while pos < length do
128
- cp1, cp2 = database.codepoints[codepoints[pos]], database.codepoints[codepoints[pos + 1]]
129
- if (cp1.combining_class > cp2.combining_class) && (cp2.combining_class > 0)
130
- codepoints[pos..pos + 1] = cp2.code, cp1.code
131
- pos += (pos > 0 ? -1 : 1)
132
- else
133
- pos += 1
134
- end
135
- end
136
- codepoints
137
- end
9
+ UNICODE_VERSION = RbConfig::CONFIG["UNICODE_VERSION"]
138
10
 
139
11
  # Decompose composed characters to the decomposed form.
140
12
  def decompose(type, codepoints)
141
- codepoints.inject([]) do |decomposed, cp|
142
- # if it's a hangul syllable starter character
143
- if HANGUL_SBASE <= cp && cp < HANGUL_SLAST
144
- sindex = cp - HANGUL_SBASE
145
- ncp = [] # new codepoints
146
- ncp << HANGUL_LBASE + sindex / HANGUL_NCOUNT
147
- ncp << HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
148
- tindex = sindex % HANGUL_TCOUNT
149
- ncp << (HANGUL_TBASE + tindex) unless tindex == 0
150
- decomposed.concat ncp
151
- # if the codepoint is decomposable in with the current decomposition type
152
- elsif (ncp = database.codepoints[cp].decomp_mapping) && (!database.codepoints[cp].decomp_type || type == :compatibility)
153
- decomposed.concat decompose(type, ncp.dup)
154
- else
155
- decomposed << cp
156
- end
13
+ if type == :compatibility
14
+ codepoints.pack("U*").unicode_normalize(:nfkd).codepoints
15
+ else
16
+ codepoints.pack("U*").unicode_normalize(:nfd).codepoints
157
17
  end
158
18
  end
159
19
 
160
20
  # Compose decomposed characters to the composed form.
161
21
  def compose(codepoints)
162
- pos = 0
163
- eoa = codepoints.length - 1
164
- starter_pos = 0
165
- starter_char = codepoints[0]
166
- previous_combining_class = -1
167
- while pos < eoa
168
- pos += 1
169
- lindex = starter_char - HANGUL_LBASE
170
- # -- Hangul
171
- if 0 <= lindex && lindex < HANGUL_LCOUNT
172
- vindex = codepoints[starter_pos + 1] - HANGUL_VBASE rescue vindex = -1
173
- if 0 <= vindex && vindex < HANGUL_VCOUNT
174
- tindex = codepoints[starter_pos + 2] - HANGUL_TBASE rescue tindex = -1
175
- if 0 <= tindex && tindex < HANGUL_TCOUNT
176
- j = starter_pos + 2
177
- eoa -= 2
178
- else
179
- tindex = 0
180
- j = starter_pos + 1
181
- eoa -= 1
182
- end
183
- codepoints[starter_pos..j] = (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT + tindex + HANGUL_SBASE
184
- end
185
- starter_pos += 1
186
- starter_char = codepoints[starter_pos]
187
- # -- Other characters
188
- else
189
- current_char = codepoints[pos]
190
- current = database.codepoints[current_char]
191
- if current.combining_class > previous_combining_class
192
- if ref = database.composition_map[starter_char]
193
- composition = ref[current_char]
194
- else
195
- composition = nil
196
- end
197
- unless composition.nil?
198
- codepoints[starter_pos] = composition
199
- starter_char = composition
200
- codepoints.delete_at pos
201
- eoa -= 1
202
- pos -= 1
203
- previous_combining_class = -1
204
- else
205
- previous_combining_class = current.combining_class
206
- end
207
- else
208
- previous_combining_class = current.combining_class
209
- end
210
- if current.combining_class == 0
211
- starter_pos = pos
212
- starter_char = codepoints[pos]
213
- end
214
- end
215
- end
216
- codepoints
22
+ codepoints.pack("U*").unicode_normalize(:nfc).codepoints
217
23
  end
218
24
 
219
25
  # Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars.
@@ -224,7 +30,7 @@ module ActiveSupport
224
30
  # Passing +true+ will forcibly tidy all bytes, assuming that the string's
225
31
  # encoding is entirely CP1252 or ISO-8859-1.
226
32
  def tidy_bytes(string, force = false)
227
- return string if string.empty?
33
+ return string if string.empty? || string.ascii_only?
228
34
  return recode_windows1252_chars(string) if force
229
35
  string.scrub { |bad| recode_windows1252_chars(bad) }
230
36
  end
@@ -255,140 +61,10 @@ module ActiveSupport
255
61
  end
256
62
  end
257
63
 
258
- # Returns the KC normalization of the string by default. NFKC is
259
- # considered the best normalization form for passing strings to databases
260
- # and validations.
261
- #
262
- # * <tt>string</tt> - The string to perform normalization on.
263
- # * <tt>form</tt> - The form you want to normalize in. Should be one of
264
- # the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
265
- # Default is ActiveSupport::Multibyte::Unicode.default_normalization_form.
266
- def normalize(string, form = nil)
267
- form ||= @default_normalization_form
268
- # See http://www.unicode.org/reports/tr15, Table 1
269
- codepoints = string.codepoints.to_a
270
- case form
271
- when :d
272
- reorder_characters(decompose(:canonical, codepoints))
273
- when :c
274
- compose(reorder_characters(decompose(:canonical, codepoints)))
275
- when :kd
276
- reorder_characters(decompose(:compatibility, codepoints))
277
- when :kc
278
- compose(reorder_characters(decompose(:compatibility, codepoints)))
279
- else
280
- raise ArgumentError, "#{form} is not a valid normalization variant", caller
281
- end.pack("U*".freeze)
282
- end
283
-
284
- def downcase(string)
285
- apply_mapping string, :lowercase_mapping
286
- end
287
-
288
- def upcase(string)
289
- apply_mapping string, :uppercase_mapping
290
- end
291
-
292
- def swapcase(string)
293
- apply_mapping string, :swapcase_mapping
294
- end
295
-
296
- # Holds data about a codepoint in the Unicode database.
297
- class Codepoint
298
- attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
299
-
300
- # Initializing Codepoint object with default values
301
- def initialize
302
- @combining_class = 0
303
- @uppercase_mapping = 0
304
- @lowercase_mapping = 0
305
- end
306
-
307
- def swapcase_mapping
308
- uppercase_mapping > 0 ? uppercase_mapping : lowercase_mapping
309
- end
310
- end
311
-
312
- # Holds static data from the Unicode database.
313
- class UnicodeDatabase
314
- ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
315
-
316
- attr_writer(*ATTRIBUTES)
317
-
318
- def initialize
319
- @codepoints = Hash.new(Codepoint.new)
320
- @composition_exclusion = []
321
- @composition_map = {}
322
- @boundary = {}
323
- @cp1252 = {}
324
- end
325
-
326
- # Lazy load the Unicode database so it's only loaded when it's actually used
327
- ATTRIBUTES.each do |attr_name|
328
- class_eval(<<-EOS, __FILE__, __LINE__ + 1)
329
- def #{attr_name} # def codepoints
330
- load # load
331
- @#{attr_name} # @codepoints
332
- end # end
333
- EOS
334
- end
335
-
336
- # Loads the Unicode database and returns all the internal objects of
337
- # UnicodeDatabase.
338
- def load
339
- begin
340
- @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, "rb") { |f| Marshal.load f.read }
341
- rescue => e
342
- raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
343
- end
344
-
345
- # Redefine the === method so we can write shorter rules for grapheme cluster breaks
346
- @boundary.each_key do |k|
347
- @boundary[k].instance_eval do
348
- def ===(other)
349
- detect { |i| i === other } ? true : false
350
- end
351
- end if @boundary[k].kind_of?(Array)
352
- end
353
-
354
- # define attr_reader methods for the instance variables
355
- class << self
356
- attr_reader(*ATTRIBUTES)
357
- end
358
- end
359
-
360
- # Returns the directory in which the data files are stored.
361
- def self.dirname
362
- File.expand_path("../values", __dir__)
363
- end
364
-
365
- # Returns the filename for the data file for this version.
366
- def self.filename
367
- File.expand_path File.join(dirname, "unicode_tables.dat")
368
- end
369
- end
370
-
371
64
  private
372
-
373
- def apply_mapping(string, mapping)
374
- database.codepoints
375
- string.each_codepoint.map do |codepoint|
376
- cp = database.codepoints[codepoint]
377
- if cp && (ncp = cp.send(mapping)) && ncp > 0
378
- ncp
379
- else
380
- codepoint
381
- end
382
- end.pack("U*")
383
- end
384
-
385
65
  def recode_windows1252_chars(string)
386
66
  string.encode(Encoding::UTF_8, Encoding::Windows_1252, invalid: :replace, undef: :replace)
387
67
  end
388
-
389
- def database
390
- @database ||= UnicodeDatabase.new
391
- end
392
68
  end
393
69
  end
394
70
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module ActiveSupport #:nodoc:
3
+ module ActiveSupport # :nodoc:
4
4
  module Multibyte
5
5
  autoload :Chars, "active_support/multibyte/chars"
6
6
  autoload :Unicode, "active_support/multibyte/unicode"