activesupport 5.2.4.3 → 7.0.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of activesupport might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +244 -459
- data/MIT-LICENSE +1 -1
- data/README.rdoc +4 -3
- data/lib/active_support/actionable_error.rb +48 -0
- data/lib/active_support/array_inquirer.rb +2 -2
- data/lib/active_support/backtrace_cleaner.rb +31 -5
- data/lib/active_support/benchmarkable.rb +3 -3
- data/lib/active_support/cache/file_store.rb +47 -41
- data/lib/active_support/cache/mem_cache_store.rb +151 -40
- data/lib/active_support/cache/memory_store.rb +68 -34
- data/lib/active_support/cache/null_store.rb +16 -3
- data/lib/active_support/cache/redis_cache_store.rb +103 -101
- data/lib/active_support/cache/strategy/local_cache.rb +56 -64
- data/lib/active_support/cache.rb +333 -116
- data/lib/active_support/callbacks.rb +244 -128
- data/lib/active_support/code_generator.rb +65 -0
- data/lib/active_support/concern.rb +72 -5
- data/lib/active_support/concurrency/load_interlock_aware_monitor.rb +16 -0
- data/lib/active_support/concurrency/share_lock.rb +2 -3
- data/lib/active_support/configurable.rb +15 -16
- data/lib/active_support/configuration_file.rb +51 -0
- data/lib/active_support/core_ext/array/access.rb +15 -7
- data/lib/active_support/core_ext/array/conversions.rb +18 -17
- data/lib/active_support/core_ext/array/deprecated_conversions.rb +25 -0
- data/lib/active_support/core_ext/array/extract.rb +21 -0
- data/lib/active_support/core_ext/array/grouping.rb +6 -6
- data/lib/active_support/core_ext/array/inquiry.rb +2 -2
- data/lib/active_support/core_ext/array.rb +2 -1
- data/lib/active_support/core_ext/benchmark.rb +2 -2
- data/lib/active_support/core_ext/big_decimal/conversions.rb +1 -1
- data/lib/active_support/core_ext/class/attribute.rb +32 -47
- data/lib/active_support/core_ext/class/subclasses.rb +9 -22
- data/lib/active_support/core_ext/date/blank.rb +1 -1
- data/lib/active_support/core_ext/date/calculations.rb +15 -14
- data/lib/active_support/core_ext/date/conversions.rb +16 -15
- data/lib/active_support/core_ext/date/deprecated_conversions.rb +26 -0
- data/lib/active_support/core_ext/date.rb +1 -0
- data/lib/active_support/core_ext/date_and_time/calculations.rb +41 -51
- data/lib/active_support/core_ext/date_and_time/compatibility.rb +15 -0
- data/lib/active_support/core_ext/date_and_time/zones.rb +0 -1
- data/lib/active_support/core_ext/date_time/blank.rb +1 -1
- data/lib/active_support/core_ext/date_time/calculations.rb +1 -1
- data/lib/active_support/core_ext/date_time/conversions.rb +13 -14
- data/lib/active_support/core_ext/date_time/deprecated_conversions.rb +22 -0
- data/lib/active_support/core_ext/date_time.rb +1 -0
- data/lib/active_support/core_ext/digest/uuid.rb +39 -13
- data/lib/active_support/core_ext/enumerable.rb +241 -76
- data/lib/active_support/core_ext/file/atomic.rb +3 -1
- data/lib/active_support/core_ext/hash/conversions.rb +3 -4
- data/lib/active_support/core_ext/hash/deep_transform_values.rb +46 -0
- data/lib/active_support/core_ext/hash/except.rb +2 -2
- data/lib/active_support/core_ext/hash/indifferent_access.rb +3 -3
- data/lib/active_support/core_ext/hash/keys.rb +2 -31
- data/lib/active_support/core_ext/hash/slice.rb +6 -27
- data/lib/active_support/core_ext/hash.rb +1 -2
- data/lib/active_support/core_ext/integer/multiple.rb +1 -1
- data/lib/active_support/core_ext/kernel/reporting.rb +4 -4
- data/lib/active_support/core_ext/kernel/singleton_class.rb +1 -1
- data/lib/active_support/core_ext/kernel.rb +0 -1
- data/lib/active_support/core_ext/load_error.rb +1 -1
- data/lib/active_support/core_ext/module/attr_internal.rb +2 -2
- data/lib/active_support/core_ext/module/attribute_accessors.rb +32 -39
- data/lib/active_support/core_ext/module/attribute_accessors_per_thread.rb +35 -28
- data/lib/active_support/core_ext/module/concerning.rb +8 -2
- data/lib/active_support/core_ext/module/delegation.rb +70 -33
- data/lib/active_support/core_ext/module/introspection.rb +16 -15
- data/lib/active_support/core_ext/module/redefine_method.rb +8 -17
- data/lib/active_support/core_ext/module.rb +0 -1
- data/lib/active_support/core_ext/name_error.rb +23 -2
- data/lib/active_support/core_ext/numeric/conversions.rb +132 -129
- data/lib/active_support/core_ext/numeric/deprecated_conversions.rb +60 -0
- data/lib/active_support/core_ext/numeric.rb +1 -1
- data/lib/active_support/core_ext/object/acts_like.rb +29 -5
- data/lib/active_support/core_ext/object/blank.rb +3 -4
- data/lib/active_support/core_ext/object/deep_dup.rb +1 -1
- data/lib/active_support/core_ext/object/duplicable.rb +14 -110
- data/lib/active_support/core_ext/object/json.rb +44 -27
- data/lib/active_support/core_ext/object/to_query.rb +2 -2
- data/lib/active_support/core_ext/object/try.rb +24 -14
- data/lib/active_support/core_ext/object/with_options.rb +21 -2
- data/lib/active_support/core_ext/pathname/existence.rb +21 -0
- data/lib/active_support/core_ext/pathname.rb +3 -0
- data/lib/active_support/core_ext/range/compare_range.rb +23 -27
- data/lib/active_support/core_ext/range/conversions.rb +32 -30
- data/lib/active_support/core_ext/range/deprecated_conversions.rb +26 -0
- data/lib/active_support/core_ext/range/each.rb +1 -2
- data/lib/active_support/core_ext/range/include_time_with_zone.rb +4 -20
- data/lib/active_support/core_ext/range/overlaps.rb +1 -1
- data/lib/active_support/core_ext/range.rb +1 -1
- data/lib/active_support/core_ext/regexp.rb +8 -5
- data/lib/active_support/core_ext/securerandom.rb +23 -3
- data/lib/active_support/core_ext/string/access.rb +5 -16
- data/lib/active_support/core_ext/string/conversions.rb +3 -2
- data/lib/active_support/core_ext/string/filters.rb +42 -1
- data/lib/active_support/core_ext/string/inflections.rb +46 -7
- data/lib/active_support/core_ext/string/inquiry.rb +2 -1
- data/lib/active_support/core_ext/string/multibyte.rb +6 -5
- data/lib/active_support/core_ext/string/output_safety.rb +129 -20
- data/lib/active_support/core_ext/string/starts_ends_with.rb +2 -2
- data/lib/active_support/core_ext/string/strip.rb +3 -1
- data/lib/active_support/core_ext/symbol/starts_ends_with.rb +6 -0
- data/lib/active_support/core_ext/symbol.rb +3 -0
- data/lib/active_support/core_ext/time/calculations.rb +59 -10
- data/lib/active_support/core_ext/time/conversions.rb +15 -12
- data/lib/active_support/core_ext/time/deprecated_conversions.rb +22 -0
- data/lib/active_support/core_ext/time/zones.rb +7 -22
- data/lib/active_support/core_ext/time.rb +1 -0
- data/lib/active_support/core_ext/uri.rb +3 -22
- data/lib/active_support/core_ext.rb +2 -1
- data/lib/active_support/current_attributes/test_helper.rb +13 -0
- data/lib/active_support/current_attributes.rb +47 -16
- data/lib/active_support/dependencies/interlock.rb +10 -18
- data/lib/active_support/dependencies/require_dependency.rb +28 -0
- data/lib/active_support/dependencies.rb +60 -715
- data/lib/active_support/deprecation/behaviors.rb +21 -5
- data/lib/active_support/deprecation/disallowed.rb +56 -0
- data/lib/active_support/deprecation/instance_delegator.rb +0 -1
- data/lib/active_support/deprecation/method_wrappers.rb +18 -23
- data/lib/active_support/deprecation/proxy_wrappers.rb +31 -8
- data/lib/active_support/deprecation/reporting.rb +50 -7
- data/lib/active_support/deprecation.rb +7 -2
- data/lib/active_support/descendants_tracker.rb +190 -34
- data/lib/active_support/digest.rb +5 -3
- data/lib/active_support/duration/iso8601_parser.rb +5 -7
- data/lib/active_support/duration/iso8601_serializer.rb +27 -15
- data/lib/active_support/duration.rb +149 -67
- data/lib/active_support/encrypted_configuration.rb +12 -5
- data/lib/active_support/encrypted_file.rb +23 -5
- data/lib/active_support/environment_inquirer.rb +20 -0
- data/lib/active_support/error_reporter.rb +117 -0
- data/lib/active_support/evented_file_update_checker.rb +85 -122
- data/lib/active_support/execution_context/test_helper.rb +13 -0
- data/lib/active_support/execution_context.rb +53 -0
- data/lib/active_support/execution_wrapper.rb +44 -21
- data/lib/active_support/executor/test_helper.rb +7 -0
- data/lib/active_support/file_update_checker.rb +0 -1
- data/lib/active_support/fork_tracker.rb +71 -0
- data/lib/active_support/gem_version.rb +5 -5
- data/lib/active_support/hash_with_indifferent_access.rb +73 -43
- data/lib/active_support/html_safe_translation.rb +43 -0
- data/lib/active_support/i18n.rb +2 -0
- data/lib/active_support/i18n_railtie.rb +15 -8
- data/lib/active_support/inflector/inflections.rb +25 -14
- data/lib/active_support/inflector/methods.rb +38 -71
- data/lib/active_support/inflector/transliterate.rb +47 -18
- data/lib/active_support/isolated_execution_state.rb +72 -0
- data/lib/active_support/json/decoding.rb +25 -26
- data/lib/active_support/json/encoding.rb +14 -6
- data/lib/active_support/key_generator.rb +23 -38
- data/lib/active_support/lazy_load_hooks.rb +19 -5
- data/lib/active_support/locale/en.rb +33 -0
- data/lib/active_support/locale/en.yml +8 -4
- data/lib/active_support/log_subscriber/test_helper.rb +2 -2
- data/lib/active_support/log_subscriber.rb +51 -11
- data/lib/active_support/logger.rb +6 -22
- data/lib/active_support/logger_silence.rb +11 -19
- data/lib/active_support/logger_thread_safe_level.rb +45 -10
- data/lib/active_support/message_encryptor.rb +20 -19
- data/lib/active_support/message_verifier.rb +53 -21
- data/lib/active_support/messages/metadata.rb +13 -4
- data/lib/active_support/messages/rotation_configuration.rb +2 -1
- data/lib/active_support/messages/rotator.rb +10 -9
- data/lib/active_support/multibyte/chars.rb +17 -76
- data/lib/active_support/multibyte/unicode.rb +7 -331
- data/lib/active_support/multibyte.rb +1 -1
- data/lib/active_support/notifications/fanout.rb +163 -37
- data/lib/active_support/notifications/instrumenter.rb +90 -11
- data/lib/active_support/notifications.rb +88 -30
- data/lib/active_support/number_helper/number_converter.rb +6 -9
- data/lib/active_support/number_helper/number_to_currency_converter.rb +12 -12
- data/lib/active_support/number_helper/number_to_delimited_converter.rb +4 -3
- data/lib/active_support/number_helper/number_to_human_converter.rb +4 -3
- data/lib/active_support/number_helper/number_to_human_size_converter.rb +5 -4
- data/lib/active_support/number_helper/number_to_percentage_converter.rb +3 -1
- data/lib/active_support/number_helper/number_to_phone_converter.rb +3 -2
- data/lib/active_support/number_helper/number_to_rounded_converter.rb +12 -7
- data/lib/active_support/number_helper/rounding_helper.rb +12 -32
- data/lib/active_support/number_helper.rb +36 -12
- data/lib/active_support/option_merger.rb +15 -4
- data/lib/active_support/ordered_hash.rb +2 -2
- data/lib/active_support/ordered_options.rb +14 -4
- data/lib/active_support/parameter_filter.rb +138 -0
- data/lib/active_support/per_thread_registry.rb +6 -1
- data/lib/active_support/rails.rb +1 -10
- data/lib/active_support/railtie.rb +77 -5
- data/lib/active_support/reloader.rb +5 -6
- data/lib/active_support/rescuable.rb +8 -8
- data/lib/active_support/ruby_features.rb +7 -0
- data/lib/active_support/secure_compare_rotator.rb +51 -0
- data/lib/active_support/security_utils.rb +19 -12
- data/lib/active_support/string_inquirer.rb +2 -3
- data/lib/active_support/subscriber.rb +79 -46
- data/lib/active_support/tagged_logging.rb +58 -9
- data/lib/active_support/test_case.rb +79 -0
- data/lib/active_support/testing/assertions.rb +62 -11
- data/lib/active_support/testing/deprecation.rb +52 -2
- data/lib/active_support/testing/file_fixtures.rb +2 -0
- data/lib/active_support/testing/isolation.rb +4 -4
- data/lib/active_support/testing/method_call_assertions.rb +32 -5
- data/lib/active_support/testing/parallelization/server.rb +82 -0
- data/lib/active_support/testing/parallelization/worker.rb +103 -0
- data/lib/active_support/testing/parallelization.rb +55 -0
- data/lib/active_support/testing/parallelize_executor.rb +76 -0
- data/lib/active_support/testing/stream.rb +4 -7
- data/lib/active_support/testing/tagged_logging.rb +1 -1
- data/lib/active_support/testing/time_helpers.rb +60 -14
- data/lib/active_support/time_with_zone.rb +139 -64
- data/lib/active_support/values/time_zone.rb +66 -30
- data/lib/active_support/version.rb +1 -1
- data/lib/active_support/xml_mini/jdom.rb +3 -4
- data/lib/active_support/xml_mini/libxml.rb +7 -7
- data/lib/active_support/xml_mini/libxmlsax.rb +5 -5
- data/lib/active_support/xml_mini/nokogiri.rb +6 -6
- data/lib/active_support/xml_mini/nokogirisax.rb +4 -4
- data/lib/active_support/xml_mini/rexml.rb +11 -4
- data/lib/active_support/xml_mini.rb +7 -14
- data/lib/active_support.rb +30 -1
- metadata +64 -35
- data/lib/active_support/core_ext/array/prepend_and_append.rb +0 -9
- data/lib/active_support/core_ext/hash/compact.rb +0 -29
- data/lib/active_support/core_ext/hash/transform_values.rb +0 -32
- data/lib/active_support/core_ext/kernel/agnostics.rb +0 -13
- data/lib/active_support/core_ext/marshal.rb +0 -24
- data/lib/active_support/core_ext/module/reachable.rb +0 -11
- data/lib/active_support/core_ext/numeric/inquiry.rb +0 -28
- data/lib/active_support/core_ext/range/include_range.rb +0 -3
- data/lib/active_support/values/unicode_tables.dat +0 -0
@@ -4,10 +4,9 @@ require "active_support/json"
|
|
4
4
|
require "active_support/core_ext/string/access"
|
5
5
|
require "active_support/core_ext/string/behavior"
|
6
6
|
require "active_support/core_ext/module/delegation"
|
7
|
-
require "active_support/core_ext/regexp"
|
8
7
|
|
9
|
-
module ActiveSupport
|
10
|
-
module Multibyte
|
8
|
+
module ActiveSupport # :nodoc:
|
9
|
+
module Multibyte # :nodoc:
|
11
10
|
# Chars enables you to work transparently with UTF-8 encoding in the Ruby
|
12
11
|
# String class without having extensive knowledge about the encoding. A
|
13
12
|
# Chars object accepts a string upon initialization and proxies String
|
@@ -18,7 +17,7 @@ module ActiveSupport #:nodoc:
|
|
18
17
|
# through the +mb_chars+ method. Methods which would normally return a
|
19
18
|
# String object now return a Chars object so methods can be chained.
|
20
19
|
#
|
21
|
-
# 'The Perfect String '.mb_chars.downcase.strip
|
20
|
+
# 'The Perfect String '.mb_chars.downcase.strip
|
22
21
|
# # => #<ActiveSupport::Multibyte::Chars:0x007fdc434ccc10 @wrapped_string="the perfect string">
|
23
22
|
#
|
24
23
|
# Chars objects are perfectly interchangeable with String objects as long as
|
@@ -49,7 +48,7 @@ module ActiveSupport #:nodoc:
|
|
49
48
|
alias to_s wrapped_string
|
50
49
|
alias to_str wrapped_string
|
51
50
|
|
52
|
-
delegate :<=>, :=~, :acts_like_string?, to: :wrapped_string
|
51
|
+
delegate :<=>, :=~, :match?, :acts_like_string?, to: :wrapped_string
|
53
52
|
|
54
53
|
# Creates a new Chars instance by wrapping _string_.
|
55
54
|
def initialize(string)
|
@@ -60,7 +59,7 @@ module ActiveSupport #:nodoc:
|
|
60
59
|
# Forward all undefined methods to the wrapped string.
|
61
60
|
def method_missing(method, *args, &block)
|
62
61
|
result = @wrapped_string.__send__(method, *args, &block)
|
63
|
-
if
|
62
|
+
if method.end_with?("!")
|
64
63
|
self if result
|
65
64
|
else
|
66
65
|
result.kind_of?(String) ? chars(result) : result
|
@@ -74,12 +73,6 @@ module ActiveSupport #:nodoc:
|
|
74
73
|
@wrapped_string.respond_to?(method, include_private)
|
75
74
|
end
|
76
75
|
|
77
|
-
# Returns +true+ when the proxy class can handle the string. Returns
|
78
|
-
# +false+ otherwise.
|
79
|
-
def self.consumes?(string)
|
80
|
-
string.encoding == Encoding::UTF_8
|
81
|
-
end
|
82
|
-
|
83
76
|
# Works just like <tt>String#split</tt>, with the exception that the items
|
84
77
|
# in the resulting list are Chars instances instead of String. This makes
|
85
78
|
# chaining methods easier.
|
@@ -109,7 +102,7 @@ module ActiveSupport #:nodoc:
|
|
109
102
|
#
|
110
103
|
# 'Café'.mb_chars.reverse.to_s # => 'éfaC'
|
111
104
|
def reverse
|
112
|
-
chars(
|
105
|
+
chars(@wrapped_string.grapheme_clusters.reverse.join)
|
113
106
|
end
|
114
107
|
|
115
108
|
# Limits the byte size of the string to a number of bytes without breaking
|
@@ -118,35 +111,7 @@ module ActiveSupport #:nodoc:
|
|
118
111
|
#
|
119
112
|
# 'こんにちは'.mb_chars.limit(7).to_s # => "こん"
|
120
113
|
def limit(limit)
|
121
|
-
|
122
|
-
end
|
123
|
-
|
124
|
-
# Converts characters in the string to uppercase.
|
125
|
-
#
|
126
|
-
# 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s # => "LAURENT, OÙ SONT LES TESTS ?"
|
127
|
-
def upcase
|
128
|
-
chars Unicode.upcase(@wrapped_string)
|
129
|
-
end
|
130
|
-
|
131
|
-
# Converts characters in the string to lowercase.
|
132
|
-
#
|
133
|
-
# 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s # => "věda a výzkum"
|
134
|
-
def downcase
|
135
|
-
chars Unicode.downcase(@wrapped_string)
|
136
|
-
end
|
137
|
-
|
138
|
-
# Converts characters in the string to the opposite case.
|
139
|
-
#
|
140
|
-
# 'El Cañón'.mb_chars.swapcase.to_s # => "eL cAÑÓN"
|
141
|
-
def swapcase
|
142
|
-
chars Unicode.swapcase(@wrapped_string)
|
143
|
-
end
|
144
|
-
|
145
|
-
# Converts the first character to uppercase and the remainder to lowercase.
|
146
|
-
#
|
147
|
-
# 'über'.mb_chars.capitalize.to_s # => "Über"
|
148
|
-
def capitalize
|
149
|
-
(slice(0) || chars("")).upcase + (slice(1..-1) || chars("")).downcase
|
114
|
+
chars(@wrapped_string.truncate_bytes(limit, omission: nil))
|
150
115
|
end
|
151
116
|
|
152
117
|
# Capitalizes the first letter of every word, when possible.
|
@@ -154,33 +119,22 @@ module ActiveSupport #:nodoc:
|
|
154
119
|
# "ÉL QUE SE ENTERÓ".mb_chars.titleize.to_s # => "Él Que Se Enteró"
|
155
120
|
# "日本語".mb_chars.titleize.to_s # => "日本語"
|
156
121
|
def titleize
|
157
|
-
chars(downcase.to_s.gsub(/\b('?\S)/u) {
|
122
|
+
chars(downcase.to_s.gsub(/\b('?\S)/u) { $1.upcase })
|
158
123
|
end
|
159
124
|
alias_method :titlecase, :titleize
|
160
125
|
|
161
|
-
# Returns the KC normalization of the string by default. NFKC is
|
162
|
-
# considered the best normalization form for passing strings to databases
|
163
|
-
# and validations.
|
164
|
-
#
|
165
|
-
# * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
|
166
|
-
# <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
|
167
|
-
# ActiveSupport::Multibyte::Unicode.default_normalization_form
|
168
|
-
def normalize(form = nil)
|
169
|
-
chars(Unicode.normalize(@wrapped_string, form))
|
170
|
-
end
|
171
|
-
|
172
126
|
# Performs canonical decomposition on all the characters.
|
173
127
|
#
|
174
|
-
# 'é'.length # =>
|
175
|
-
# 'é'.mb_chars.decompose.to_s.length # =>
|
128
|
+
# 'é'.length # => 1
|
129
|
+
# 'é'.mb_chars.decompose.to_s.length # => 2
|
176
130
|
def decompose
|
177
131
|
chars(Unicode.decompose(:canonical, @wrapped_string.codepoints.to_a).pack("U*"))
|
178
132
|
end
|
179
133
|
|
180
134
|
# Performs composition on all the characters.
|
181
135
|
#
|
182
|
-
# 'é'.length # =>
|
183
|
-
# 'é'.mb_chars.compose.to_s.length # =>
|
136
|
+
# 'é'.length # => 1
|
137
|
+
# 'é'.mb_chars.compose.to_s.length # => 1
|
184
138
|
def compose
|
185
139
|
chars(Unicode.compose(@wrapped_string.codepoints.to_a).pack("U*"))
|
186
140
|
end
|
@@ -188,9 +142,9 @@ module ActiveSupport #:nodoc:
|
|
188
142
|
# Returns the number of grapheme clusters in the string.
|
189
143
|
#
|
190
144
|
# 'क्षि'.mb_chars.length # => 4
|
191
|
-
# 'क्षि'.mb_chars.grapheme_length # =>
|
145
|
+
# 'क्षि'.mb_chars.grapheme_length # => 2
|
192
146
|
def grapheme_length
|
193
|
-
|
147
|
+
@wrapped_string.grapheme_clusters.length
|
194
148
|
end
|
195
149
|
|
196
150
|
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
|
@@ -202,31 +156,18 @@ module ActiveSupport #:nodoc:
|
|
202
156
|
chars(Unicode.tidy_bytes(@wrapped_string, force))
|
203
157
|
end
|
204
158
|
|
205
|
-
def as_json(options = nil)
|
159
|
+
def as_json(options = nil) # :nodoc:
|
206
160
|
to_s.as_json(options)
|
207
161
|
end
|
208
162
|
|
209
|
-
%w(
|
163
|
+
%w(reverse tidy_bytes).each do |method|
|
210
164
|
define_method("#{method}!") do |*args|
|
211
|
-
@wrapped_string =
|
165
|
+
@wrapped_string = public_send(method, *args).to_s
|
212
166
|
self
|
213
167
|
end
|
214
168
|
end
|
215
169
|
|
216
170
|
private
|
217
|
-
|
218
|
-
def translate_offset(byte_offset)
|
219
|
-
return nil if byte_offset.nil?
|
220
|
-
return 0 if @wrapped_string == ""
|
221
|
-
|
222
|
-
begin
|
223
|
-
@wrapped_string.byteslice(0...byte_offset).unpack("U*").length
|
224
|
-
rescue ArgumentError
|
225
|
-
byte_offset -= 1
|
226
|
-
retry
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
171
|
def chars(string)
|
231
172
|
self.class.new(string)
|
232
173
|
end
|
@@ -5,215 +5,21 @@ module ActiveSupport
|
|
5
5
|
module Unicode
|
6
6
|
extend self
|
7
7
|
|
8
|
-
# A list of all available normalization forms.
|
9
|
-
# See http://www.unicode.org/reports/tr15/tr15-29.html for more
|
10
|
-
# information about normalization.
|
11
|
-
NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
|
12
|
-
|
13
8
|
# The Unicode version that is supported by the implementation
|
14
|
-
UNICODE_VERSION = "
|
15
|
-
|
16
|
-
# The default normalization used for operations that require
|
17
|
-
# normalization. It can be set to any of the normalizations
|
18
|
-
# in NORMALIZATION_FORMS.
|
19
|
-
#
|
20
|
-
# ActiveSupport::Multibyte::Unicode.default_normalization_form = :c
|
21
|
-
attr_accessor :default_normalization_form
|
22
|
-
@default_normalization_form = :kc
|
23
|
-
|
24
|
-
# Hangul character boundaries and properties
|
25
|
-
HANGUL_SBASE = 0xAC00
|
26
|
-
HANGUL_LBASE = 0x1100
|
27
|
-
HANGUL_VBASE = 0x1161
|
28
|
-
HANGUL_TBASE = 0x11A7
|
29
|
-
HANGUL_LCOUNT = 19
|
30
|
-
HANGUL_VCOUNT = 21
|
31
|
-
HANGUL_TCOUNT = 28
|
32
|
-
HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT
|
33
|
-
HANGUL_SCOUNT = 11172
|
34
|
-
HANGUL_SLAST = HANGUL_SBASE + HANGUL_SCOUNT
|
35
|
-
|
36
|
-
# Detect whether the codepoint is in a certain character class. Returns
|
37
|
-
# +true+ when it's in the specified character class and +false+ otherwise.
|
38
|
-
# Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
|
39
|
-
# <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
|
40
|
-
#
|
41
|
-
# Primarily used by the grapheme cluster support.
|
42
|
-
def in_char_class?(codepoint, classes)
|
43
|
-
classes.detect { |c| database.boundary[c] === codepoint } ? true : false
|
44
|
-
end
|
45
|
-
|
46
|
-
# Unpack the string at grapheme boundaries. Returns a list of character
|
47
|
-
# lists.
|
48
|
-
#
|
49
|
-
# Unicode.unpack_graphemes('क्षि') # => [[2325, 2381], [2359], [2367]]
|
50
|
-
# Unicode.unpack_graphemes('Café') # => [[67], [97], [102], [233]]
|
51
|
-
def unpack_graphemes(string)
|
52
|
-
codepoints = string.codepoints.to_a
|
53
|
-
unpacked = []
|
54
|
-
pos = 0
|
55
|
-
marker = 0
|
56
|
-
eoc = codepoints.length
|
57
|
-
while (pos < eoc)
|
58
|
-
pos += 1
|
59
|
-
previous = codepoints[pos - 1]
|
60
|
-
current = codepoints[pos]
|
61
|
-
|
62
|
-
# See http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
|
63
|
-
should_break =
|
64
|
-
if pos == eoc
|
65
|
-
true
|
66
|
-
# GB3. CR X LF
|
67
|
-
elsif previous == database.boundary[:cr] && current == database.boundary[:lf]
|
68
|
-
false
|
69
|
-
# GB4. (Control|CR|LF) ÷
|
70
|
-
elsif previous && in_char_class?(previous, [:control, :cr, :lf])
|
71
|
-
true
|
72
|
-
# GB5. ÷ (Control|CR|LF)
|
73
|
-
elsif in_char_class?(current, [:control, :cr, :lf])
|
74
|
-
true
|
75
|
-
# GB6. L X (L|V|LV|LVT)
|
76
|
-
elsif database.boundary[:l] === previous && in_char_class?(current, [:l, :v, :lv, :lvt])
|
77
|
-
false
|
78
|
-
# GB7. (LV|V) X (V|T)
|
79
|
-
elsif in_char_class?(previous, [:lv, :v]) && in_char_class?(current, [:v, :t])
|
80
|
-
false
|
81
|
-
# GB8. (LVT|T) X (T)
|
82
|
-
elsif in_char_class?(previous, [:lvt, :t]) && database.boundary[:t] === current
|
83
|
-
false
|
84
|
-
# GB9. X (Extend | ZWJ)
|
85
|
-
elsif in_char_class?(current, [:extend, :zwj])
|
86
|
-
false
|
87
|
-
# GB9a. X SpacingMark
|
88
|
-
elsif database.boundary[:spacingmark] === current
|
89
|
-
false
|
90
|
-
# GB9b. Prepend X
|
91
|
-
elsif database.boundary[:prepend] === previous
|
92
|
-
false
|
93
|
-
# GB10. (E_Base | EBG) Extend* X E_Modifier
|
94
|
-
elsif (marker...pos).any? { |i| in_char_class?(codepoints[i], [:e_base, :e_base_gaz]) && codepoints[i + 1...pos].all? { |c| database.boundary[:extend] === c } } && database.boundary[:e_modifier] === current
|
95
|
-
false
|
96
|
-
# GB11. ZWJ X (Glue_After_Zwj | EBG)
|
97
|
-
elsif database.boundary[:zwj] === previous && in_char_class?(current, [:glue_after_zwj, :e_base_gaz])
|
98
|
-
false
|
99
|
-
# GB12. ^ (RI RI)* RI X RI
|
100
|
-
# GB13. [^RI] (RI RI)* RI X RI
|
101
|
-
elsif codepoints[marker..pos].all? { |c| database.boundary[:regional_indicator] === c } && codepoints[marker..pos].count { |c| database.boundary[:regional_indicator] === c }.even?
|
102
|
-
false
|
103
|
-
# GB999. Any ÷ Any
|
104
|
-
else
|
105
|
-
true
|
106
|
-
end
|
107
|
-
|
108
|
-
if should_break
|
109
|
-
unpacked << codepoints[marker..pos - 1]
|
110
|
-
marker = pos
|
111
|
-
end
|
112
|
-
end
|
113
|
-
unpacked
|
114
|
-
end
|
115
|
-
|
116
|
-
# Reverse operation of unpack_graphemes.
|
117
|
-
#
|
118
|
-
# Unicode.pack_graphemes(Unicode.unpack_graphemes('क्षि')) # => 'क्षि'
|
119
|
-
def pack_graphemes(unpacked)
|
120
|
-
unpacked.flatten.pack("U*")
|
121
|
-
end
|
122
|
-
|
123
|
-
# Re-order codepoints so the string becomes canonical.
|
124
|
-
def reorder_characters(codepoints)
|
125
|
-
length = codepoints.length - 1
|
126
|
-
pos = 0
|
127
|
-
while pos < length do
|
128
|
-
cp1, cp2 = database.codepoints[codepoints[pos]], database.codepoints[codepoints[pos + 1]]
|
129
|
-
if (cp1.combining_class > cp2.combining_class) && (cp2.combining_class > 0)
|
130
|
-
codepoints[pos..pos + 1] = cp2.code, cp1.code
|
131
|
-
pos += (pos > 0 ? -1 : 1)
|
132
|
-
else
|
133
|
-
pos += 1
|
134
|
-
end
|
135
|
-
end
|
136
|
-
codepoints
|
137
|
-
end
|
9
|
+
UNICODE_VERSION = RbConfig::CONFIG["UNICODE_VERSION"]
|
138
10
|
|
139
11
|
# Decompose composed characters to the decomposed form.
|
140
12
|
def decompose(type, codepoints)
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
ncp = [] # new codepoints
|
146
|
-
ncp << HANGUL_LBASE + sindex / HANGUL_NCOUNT
|
147
|
-
ncp << HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
|
148
|
-
tindex = sindex % HANGUL_TCOUNT
|
149
|
-
ncp << (HANGUL_TBASE + tindex) unless tindex == 0
|
150
|
-
decomposed.concat ncp
|
151
|
-
# if the codepoint is decomposable in with the current decomposition type
|
152
|
-
elsif (ncp = database.codepoints[cp].decomp_mapping) && (!database.codepoints[cp].decomp_type || type == :compatibility)
|
153
|
-
decomposed.concat decompose(type, ncp.dup)
|
154
|
-
else
|
155
|
-
decomposed << cp
|
156
|
-
end
|
13
|
+
if type == :compatibility
|
14
|
+
codepoints.pack("U*").unicode_normalize(:nfkd).codepoints
|
15
|
+
else
|
16
|
+
codepoints.pack("U*").unicode_normalize(:nfd).codepoints
|
157
17
|
end
|
158
18
|
end
|
159
19
|
|
160
20
|
# Compose decomposed characters to the composed form.
|
161
21
|
def compose(codepoints)
|
162
|
-
|
163
|
-
eoa = codepoints.length - 1
|
164
|
-
starter_pos = 0
|
165
|
-
starter_char = codepoints[0]
|
166
|
-
previous_combining_class = -1
|
167
|
-
while pos < eoa
|
168
|
-
pos += 1
|
169
|
-
lindex = starter_char - HANGUL_LBASE
|
170
|
-
# -- Hangul
|
171
|
-
if 0 <= lindex && lindex < HANGUL_LCOUNT
|
172
|
-
vindex = codepoints[starter_pos + 1] - HANGUL_VBASE rescue vindex = -1
|
173
|
-
if 0 <= vindex && vindex < HANGUL_VCOUNT
|
174
|
-
tindex = codepoints[starter_pos + 2] - HANGUL_TBASE rescue tindex = -1
|
175
|
-
if 0 <= tindex && tindex < HANGUL_TCOUNT
|
176
|
-
j = starter_pos + 2
|
177
|
-
eoa -= 2
|
178
|
-
else
|
179
|
-
tindex = 0
|
180
|
-
j = starter_pos + 1
|
181
|
-
eoa -= 1
|
182
|
-
end
|
183
|
-
codepoints[starter_pos..j] = (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT + tindex + HANGUL_SBASE
|
184
|
-
end
|
185
|
-
starter_pos += 1
|
186
|
-
starter_char = codepoints[starter_pos]
|
187
|
-
# -- Other characters
|
188
|
-
else
|
189
|
-
current_char = codepoints[pos]
|
190
|
-
current = database.codepoints[current_char]
|
191
|
-
if current.combining_class > previous_combining_class
|
192
|
-
if ref = database.composition_map[starter_char]
|
193
|
-
composition = ref[current_char]
|
194
|
-
else
|
195
|
-
composition = nil
|
196
|
-
end
|
197
|
-
unless composition.nil?
|
198
|
-
codepoints[starter_pos] = composition
|
199
|
-
starter_char = composition
|
200
|
-
codepoints.delete_at pos
|
201
|
-
eoa -= 1
|
202
|
-
pos -= 1
|
203
|
-
previous_combining_class = -1
|
204
|
-
else
|
205
|
-
previous_combining_class = current.combining_class
|
206
|
-
end
|
207
|
-
else
|
208
|
-
previous_combining_class = current.combining_class
|
209
|
-
end
|
210
|
-
if current.combining_class == 0
|
211
|
-
starter_pos = pos
|
212
|
-
starter_char = codepoints[pos]
|
213
|
-
end
|
214
|
-
end
|
215
|
-
end
|
216
|
-
codepoints
|
22
|
+
codepoints.pack("U*").unicode_normalize(:nfc).codepoints
|
217
23
|
end
|
218
24
|
|
219
25
|
# Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars.
|
@@ -224,7 +30,7 @@ module ActiveSupport
|
|
224
30
|
# Passing +true+ will forcibly tidy all bytes, assuming that the string's
|
225
31
|
# encoding is entirely CP1252 or ISO-8859-1.
|
226
32
|
def tidy_bytes(string, force = false)
|
227
|
-
return string if string.empty?
|
33
|
+
return string if string.empty? || string.ascii_only?
|
228
34
|
return recode_windows1252_chars(string) if force
|
229
35
|
string.scrub { |bad| recode_windows1252_chars(bad) }
|
230
36
|
end
|
@@ -255,140 +61,10 @@ module ActiveSupport
|
|
255
61
|
end
|
256
62
|
end
|
257
63
|
|
258
|
-
# Returns the KC normalization of the string by default. NFKC is
|
259
|
-
# considered the best normalization form for passing strings to databases
|
260
|
-
# and validations.
|
261
|
-
#
|
262
|
-
# * <tt>string</tt> - The string to perform normalization on.
|
263
|
-
# * <tt>form</tt> - The form you want to normalize in. Should be one of
|
264
|
-
# the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
|
265
|
-
# Default is ActiveSupport::Multibyte::Unicode.default_normalization_form.
|
266
|
-
def normalize(string, form = nil)
|
267
|
-
form ||= @default_normalization_form
|
268
|
-
# See http://www.unicode.org/reports/tr15, Table 1
|
269
|
-
codepoints = string.codepoints.to_a
|
270
|
-
case form
|
271
|
-
when :d
|
272
|
-
reorder_characters(decompose(:canonical, codepoints))
|
273
|
-
when :c
|
274
|
-
compose(reorder_characters(decompose(:canonical, codepoints)))
|
275
|
-
when :kd
|
276
|
-
reorder_characters(decompose(:compatibility, codepoints))
|
277
|
-
when :kc
|
278
|
-
compose(reorder_characters(decompose(:compatibility, codepoints)))
|
279
|
-
else
|
280
|
-
raise ArgumentError, "#{form} is not a valid normalization variant", caller
|
281
|
-
end.pack("U*".freeze)
|
282
|
-
end
|
283
|
-
|
284
|
-
def downcase(string)
|
285
|
-
apply_mapping string, :lowercase_mapping
|
286
|
-
end
|
287
|
-
|
288
|
-
def upcase(string)
|
289
|
-
apply_mapping string, :uppercase_mapping
|
290
|
-
end
|
291
|
-
|
292
|
-
def swapcase(string)
|
293
|
-
apply_mapping string, :swapcase_mapping
|
294
|
-
end
|
295
|
-
|
296
|
-
# Holds data about a codepoint in the Unicode database.
|
297
|
-
class Codepoint
|
298
|
-
attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
|
299
|
-
|
300
|
-
# Initializing Codepoint object with default values
|
301
|
-
def initialize
|
302
|
-
@combining_class = 0
|
303
|
-
@uppercase_mapping = 0
|
304
|
-
@lowercase_mapping = 0
|
305
|
-
end
|
306
|
-
|
307
|
-
def swapcase_mapping
|
308
|
-
uppercase_mapping > 0 ? uppercase_mapping : lowercase_mapping
|
309
|
-
end
|
310
|
-
end
|
311
|
-
|
312
|
-
# Holds static data from the Unicode database.
|
313
|
-
class UnicodeDatabase
|
314
|
-
ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
|
315
|
-
|
316
|
-
attr_writer(*ATTRIBUTES)
|
317
|
-
|
318
|
-
def initialize
|
319
|
-
@codepoints = Hash.new(Codepoint.new)
|
320
|
-
@composition_exclusion = []
|
321
|
-
@composition_map = {}
|
322
|
-
@boundary = {}
|
323
|
-
@cp1252 = {}
|
324
|
-
end
|
325
|
-
|
326
|
-
# Lazy load the Unicode database so it's only loaded when it's actually used
|
327
|
-
ATTRIBUTES.each do |attr_name|
|
328
|
-
class_eval(<<-EOS, __FILE__, __LINE__ + 1)
|
329
|
-
def #{attr_name} # def codepoints
|
330
|
-
load # load
|
331
|
-
@#{attr_name} # @codepoints
|
332
|
-
end # end
|
333
|
-
EOS
|
334
|
-
end
|
335
|
-
|
336
|
-
# Loads the Unicode database and returns all the internal objects of
|
337
|
-
# UnicodeDatabase.
|
338
|
-
def load
|
339
|
-
begin
|
340
|
-
@codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, "rb") { |f| Marshal.load f.read }
|
341
|
-
rescue => e
|
342
|
-
raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
|
343
|
-
end
|
344
|
-
|
345
|
-
# Redefine the === method so we can write shorter rules for grapheme cluster breaks
|
346
|
-
@boundary.each_key do |k|
|
347
|
-
@boundary[k].instance_eval do
|
348
|
-
def ===(other)
|
349
|
-
detect { |i| i === other } ? true : false
|
350
|
-
end
|
351
|
-
end if @boundary[k].kind_of?(Array)
|
352
|
-
end
|
353
|
-
|
354
|
-
# define attr_reader methods for the instance variables
|
355
|
-
class << self
|
356
|
-
attr_reader(*ATTRIBUTES)
|
357
|
-
end
|
358
|
-
end
|
359
|
-
|
360
|
-
# Returns the directory in which the data files are stored.
|
361
|
-
def self.dirname
|
362
|
-
File.expand_path("../values", __dir__)
|
363
|
-
end
|
364
|
-
|
365
|
-
# Returns the filename for the data file for this version.
|
366
|
-
def self.filename
|
367
|
-
File.expand_path File.join(dirname, "unicode_tables.dat")
|
368
|
-
end
|
369
|
-
end
|
370
|
-
|
371
64
|
private
|
372
|
-
|
373
|
-
def apply_mapping(string, mapping)
|
374
|
-
database.codepoints
|
375
|
-
string.each_codepoint.map do |codepoint|
|
376
|
-
cp = database.codepoints[codepoint]
|
377
|
-
if cp && (ncp = cp.send(mapping)) && ncp > 0
|
378
|
-
ncp
|
379
|
-
else
|
380
|
-
codepoint
|
381
|
-
end
|
382
|
-
end.pack("U*")
|
383
|
-
end
|
384
|
-
|
385
65
|
def recode_windows1252_chars(string)
|
386
66
|
string.encode(Encoding::UTF_8, Encoding::Windows_1252, invalid: :replace, undef: :replace)
|
387
67
|
end
|
388
|
-
|
389
|
-
def database
|
390
|
-
@database ||= UnicodeDatabase.new
|
391
|
-
end
|
392
68
|
end
|
393
69
|
end
|
394
70
|
end
|