activesupport 5.2.7 → 6.0.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of activesupport might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +182 -566
- data/MIT-LICENSE +1 -1
- data/README.rdoc +1 -1
- data/lib/active_support/backtrace_cleaner.rb +23 -0
- data/lib/active_support/cache/file_store.rb +19 -12
- data/lib/active_support/cache/mem_cache_store.rb +16 -2
- data/lib/active_support/cache/memory_store.rb +5 -0
- data/lib/active_support/cache/null_store.rb +5 -0
- data/lib/active_support/cache/redis_cache_store.rb +39 -20
- data/lib/active_support/cache.rb +40 -18
- data/lib/active_support/callbacks.rb +16 -5
- data/lib/active_support/configurable.rb +4 -8
- data/lib/active_support/core_ext/array/extract.rb +21 -0
- data/lib/active_support/core_ext/array/prepend_and_append.rb +2 -6
- data/lib/active_support/core_ext/array.rb +1 -1
- data/lib/active_support/core_ext/class/attribute.rb +1 -1
- data/lib/active_support/core_ext/class/subclasses.rb +1 -1
- data/lib/active_support/core_ext/date/calculations.rb +6 -5
- data/lib/active_support/core_ext/date_and_time/calculations.rb +24 -17
- data/lib/active_support/core_ext/date_time/calculations.rb +1 -1
- data/lib/active_support/core_ext/enumerable.rb +71 -67
- data/lib/active_support/core_ext/hash/compact.rb +2 -26
- data/lib/active_support/core_ext/hash/keys.rb +0 -29
- data/lib/active_support/core_ext/hash/slice.rb +3 -25
- data/lib/active_support/core_ext/hash/transform_values.rb +2 -29
- data/lib/active_support/core_ext/hash.rb +0 -2
- data/lib/active_support/core_ext/integer/multiple.rb +1 -1
- data/lib/active_support/core_ext/load_error.rb +1 -1
- data/lib/active_support/core_ext/module/attribute_accessors.rb +2 -5
- data/lib/active_support/core_ext/module/attribute_accessors_per_thread.rb +8 -14
- data/lib/active_support/core_ext/module/delegation.rb +27 -7
- data/lib/active_support/core_ext/module/introspection.rb +37 -13
- data/lib/active_support/core_ext/module/reachable.rb +1 -6
- data/lib/active_support/core_ext/module/redefine_method.rb +8 -17
- data/lib/active_support/core_ext/module.rb +0 -1
- data/lib/active_support/core_ext/numeric/conversions.rb +124 -128
- data/lib/active_support/core_ext/numeric/inquiry.rb +2 -25
- data/lib/active_support/core_ext/numeric.rb +0 -1
- data/lib/active_support/core_ext/object/blank.rb +1 -2
- data/lib/active_support/core_ext/object/duplicable.rb +5 -2
- data/lib/active_support/core_ext/object/json.rb +1 -0
- data/lib/active_support/core_ext/object/try.rb +15 -7
- data/lib/active_support/core_ext/object/with_options.rb +1 -1
- data/lib/active_support/core_ext/range/compare_range.rb +1 -1
- data/lib/active_support/core_ext/range/conversions.rb +31 -29
- data/lib/active_support/core_ext/range/include_range.rb +6 -0
- data/lib/active_support/core_ext/regexp.rb +0 -4
- data/lib/active_support/core_ext/securerandom.rb +23 -3
- data/lib/active_support/core_ext/string/access.rb +8 -0
- data/lib/active_support/core_ext/string/filters.rb +41 -0
- data/lib/active_support/core_ext/string/multibyte.rb +4 -3
- data/lib/active_support/core_ext/string/output_safety.rb +16 -5
- data/lib/active_support/core_ext/string/strip.rb +3 -1
- data/lib/active_support/core_ext/uri.rb +1 -0
- data/lib/active_support/current_attributes.rb +2 -0
- data/lib/active_support/dependencies.rb +28 -11
- data/lib/active_support/deprecation/behaviors.rb +1 -1
- data/lib/active_support/deprecation/method_wrappers.rb +4 -5
- data/lib/active_support/deprecation/proxy_wrappers.rb +0 -2
- data/lib/active_support/deprecation.rb +1 -1
- data/lib/active_support/descendants_tracker.rb +6 -5
- data/lib/active_support/duration/iso8601_parser.rb +2 -3
- data/lib/active_support/duration/iso8601_serializer.rb +3 -4
- data/lib/active_support/duration.rb +12 -14
- data/lib/active_support/encrypted_configuration.rb +0 -4
- data/lib/active_support/evented_file_update_checker.rb +25 -7
- data/lib/active_support/execution_wrapper.rb +14 -16
- data/lib/active_support/gem_version.rb +4 -4
- data/lib/active_support/hash_with_indifferent_access.rb +16 -28
- data/lib/active_support/i18n.rb +1 -0
- data/lib/active_support/i18n_railtie.rb +8 -1
- data/lib/active_support/inflector/inflections.rb +1 -4
- data/lib/active_support/inflector/methods.rb +15 -27
- data/lib/active_support/inflector/transliterate.rb +6 -6
- data/lib/active_support/json/decoding.rb +23 -23
- data/lib/active_support/json/encoding.rb +6 -2
- data/lib/active_support/key_generator.rb +0 -32
- data/lib/active_support/lazy_load_hooks.rb +5 -1
- data/lib/active_support/locale/en.rb +31 -0
- data/lib/active_support/log_subscriber.rb +31 -8
- data/lib/active_support/logger.rb +0 -15
- data/lib/active_support/logger_silence.rb +28 -12
- data/lib/active_support/logger_thread_safe_level.rb +27 -6
- data/lib/active_support/message_encryptor.rb +2 -4
- data/lib/active_support/message_verifier.rb +2 -2
- data/lib/active_support/multibyte/chars.rb +29 -48
- data/lib/active_support/multibyte/unicode.rb +44 -281
- data/lib/active_support/notifications/fanout.rb +42 -4
- data/lib/active_support/notifications/instrumenter.rb +73 -2
- data/lib/active_support/notifications.rb +32 -4
- data/lib/active_support/number_helper/number_to_currency_converter.rb +2 -2
- data/lib/active_support/number_helper/number_to_delimited_converter.rb +3 -1
- data/lib/active_support/number_helper/number_to_human_converter.rb +3 -1
- data/lib/active_support/number_helper/number_to_human_size_converter.rb +3 -1
- data/lib/active_support/number_helper/number_to_percentage_converter.rb +3 -1
- data/lib/active_support/number_helper/number_to_phone_converter.rb +2 -0
- data/lib/active_support/number_helper/number_to_rounded_converter.rb +5 -3
- data/lib/active_support/number_helper.rb +7 -0
- data/lib/active_support/ordered_options.rb +1 -1
- data/lib/active_support/parameter_filter.rb +124 -0
- data/lib/active_support/rails.rb +0 -6
- data/lib/active_support/reloader.rb +5 -6
- data/lib/active_support/subscriber.rb +16 -26
- data/lib/active_support/tagged_logging.rb +13 -4
- data/lib/active_support/test_case.rb +91 -0
- data/lib/active_support/testing/assertions.rb +15 -1
- data/lib/active_support/testing/deprecation.rb +0 -1
- data/lib/active_support/testing/file_fixtures.rb +2 -0
- data/lib/active_support/testing/isolation.rb +2 -2
- data/lib/active_support/testing/method_call_assertions.rb +28 -1
- data/lib/active_support/testing/parallelization.rb +109 -0
- data/lib/active_support/testing/stream.rb +1 -1
- data/lib/active_support/testing/time_helpers.rb +7 -7
- data/lib/active_support/time_with_zone.rb +15 -5
- data/lib/active_support/values/time_zone.rb +12 -7
- data/lib/active_support/xml_mini/jdom.rb +2 -2
- data/lib/active_support/xml_mini/libxml.rb +2 -2
- data/lib/active_support/xml_mini/libxmlsax.rb +4 -4
- data/lib/active_support/xml_mini/nokogiri.rb +2 -2
- data/lib/active_support/xml_mini/nokogirisax.rb +3 -3
- data/lib/active_support/xml_mini/rexml.rb +2 -2
- data/lib/active_support/xml_mini.rb +2 -9
- data/lib/active_support.rb +1 -1
- metadata +12 -10
- data/lib/active_support/core_ext/digest.rb +0 -3
- data/lib/active_support/values/unicode_tables.dat +0 -0
@@ -1,34 +1,55 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "active_support/concern"
|
4
|
-
require "
|
4
|
+
require "active_support/core_ext/module/attribute_accessors"
|
5
|
+
require "concurrent"
|
5
6
|
|
6
7
|
module ActiveSupport
|
7
8
|
module LoggerThreadSafeLevel # :nodoc:
|
8
9
|
extend ActiveSupport::Concern
|
9
10
|
|
11
|
+
included do
|
12
|
+
cattr_accessor :local_levels, default: Concurrent::Map.new(initial_capacity: 2), instance_accessor: false
|
13
|
+
end
|
14
|
+
|
15
|
+
Logger::Severity.constants.each do |severity|
|
16
|
+
class_eval(<<-EOT, __FILE__, __LINE__ + 1)
|
17
|
+
def #{severity.downcase}? # def debug?
|
18
|
+
Logger::#{severity} >= level # DEBUG >= level
|
19
|
+
end # end
|
20
|
+
EOT
|
21
|
+
end
|
22
|
+
|
10
23
|
def after_initialize
|
11
|
-
|
24
|
+
ActiveSupport::Deprecation.warn(
|
25
|
+
"Logger don't need to call #after_initialize directly anymore. It will be deprecated without replacement in " \
|
26
|
+
"Rails 6.1."
|
27
|
+
)
|
12
28
|
end
|
13
29
|
|
14
30
|
def local_log_id
|
15
|
-
|
31
|
+
Thread.current.__id__
|
16
32
|
end
|
17
33
|
|
18
34
|
def local_level
|
19
|
-
|
35
|
+
self.class.local_levels[local_log_id]
|
20
36
|
end
|
21
37
|
|
22
38
|
def local_level=(level)
|
23
39
|
if level
|
24
|
-
|
40
|
+
self.class.local_levels[local_log_id] = level
|
25
41
|
else
|
26
|
-
|
42
|
+
self.class.local_levels.delete(local_log_id)
|
27
43
|
end
|
28
44
|
end
|
29
45
|
|
30
46
|
def level
|
31
47
|
local_level || super
|
32
48
|
end
|
49
|
+
|
50
|
+
def add(severity, message = nil, progname = nil, &block) # :nodoc:
|
51
|
+
return true if @logdev.nil? || (severity || UNKNOWN) < level
|
52
|
+
super
|
53
|
+
end
|
33
54
|
end
|
34
55
|
end
|
@@ -182,7 +182,7 @@ module ActiveSupport
|
|
182
182
|
|
183
183
|
def _decrypt(encrypted_message, purpose)
|
184
184
|
cipher = new_cipher
|
185
|
-
encrypted_data, iv, auth_tag = encrypted_message.split("--"
|
185
|
+
encrypted_data, iv, auth_tag = encrypted_message.split("--").map { |v| ::Base64.strict_decode64(v) }
|
186
186
|
|
187
187
|
# Currently the OpenSSL bindings do not raise an error if auth_tag is
|
188
188
|
# truncated, which would allow an attacker to easily forge it. See
|
@@ -210,9 +210,7 @@ module ActiveSupport
|
|
210
210
|
OpenSSL::Cipher.new(@cipher)
|
211
211
|
end
|
212
212
|
|
213
|
-
|
214
|
-
@verifier
|
215
|
-
end
|
213
|
+
attr_reader :verifier
|
216
214
|
|
217
215
|
def aead_mode?
|
218
216
|
@aead_mode ||= new_cipher.authenticated?
|
@@ -122,7 +122,7 @@ module ActiveSupport
|
|
122
122
|
def valid_message?(signed_message)
|
123
123
|
return if signed_message.nil? || !signed_message.valid_encoding? || signed_message.blank?
|
124
124
|
|
125
|
-
data, digest = signed_message.split("--"
|
125
|
+
data, digest = signed_message.split("--")
|
126
126
|
data.present? && digest.present? && ActiveSupport::SecurityUtils.secure_compare(digest, generate_digest(data))
|
127
127
|
end
|
128
128
|
|
@@ -150,7 +150,7 @@ module ActiveSupport
|
|
150
150
|
def verified(signed_message, purpose: nil, **)
|
151
151
|
if valid_message?(signed_message)
|
152
152
|
begin
|
153
|
-
data = signed_message.split("--"
|
153
|
+
data = signed_message.split("--")[0]
|
154
154
|
message = Messages::Metadata.verify(decode(data), purpose)
|
155
155
|
@serializer.load(message) if message
|
156
156
|
rescue ArgumentError => argument_error
|
@@ -4,7 +4,6 @@ require "active_support/json"
|
|
4
4
|
require "active_support/core_ext/string/access"
|
5
5
|
require "active_support/core_ext/string/behavior"
|
6
6
|
require "active_support/core_ext/module/delegation"
|
7
|
-
require "active_support/core_ext/regexp"
|
8
7
|
|
9
8
|
module ActiveSupport #:nodoc:
|
10
9
|
module Multibyte #:nodoc:
|
@@ -18,7 +17,7 @@ module ActiveSupport #:nodoc:
|
|
18
17
|
# through the +mb_chars+ method. Methods which would normally return a
|
19
18
|
# String object now return a Chars object so methods can be chained.
|
20
19
|
#
|
21
|
-
# 'The Perfect String '.mb_chars.downcase.strip
|
20
|
+
# 'The Perfect String '.mb_chars.downcase.strip
|
22
21
|
# # => #<ActiveSupport::Multibyte::Chars:0x007fdc434ccc10 @wrapped_string="the perfect string">
|
23
22
|
#
|
24
23
|
# Chars objects are perfectly interchangeable with String objects as long as
|
@@ -77,6 +76,11 @@ module ActiveSupport #:nodoc:
|
|
77
76
|
# Returns +true+ when the proxy class can handle the string. Returns
|
78
77
|
# +false+ otherwise.
|
79
78
|
def self.consumes?(string)
|
79
|
+
ActiveSupport::Deprecation.warn(<<-MSG.squish)
|
80
|
+
ActiveSupport::Multibyte::Chars.consumes? is deprecated and will be
|
81
|
+
removed from Rails 6.1. Use string.is_utf8? instead.
|
82
|
+
MSG
|
83
|
+
|
80
84
|
string.encoding == Encoding::UTF_8
|
81
85
|
end
|
82
86
|
|
@@ -109,7 +113,7 @@ module ActiveSupport #:nodoc:
|
|
109
113
|
#
|
110
114
|
# 'Café'.mb_chars.reverse.to_s # => 'éfaC'
|
111
115
|
def reverse
|
112
|
-
chars(
|
116
|
+
chars(@wrapped_string.scan(/\X/).reverse.join)
|
113
117
|
end
|
114
118
|
|
115
119
|
# Limits the byte size of the string to a number of bytes without breaking
|
@@ -118,35 +122,7 @@ module ActiveSupport #:nodoc:
|
|
118
122
|
#
|
119
123
|
# 'こんにちは'.mb_chars.limit(7).to_s # => "こん"
|
120
124
|
def limit(limit)
|
121
|
-
|
122
|
-
end
|
123
|
-
|
124
|
-
# Converts characters in the string to uppercase.
|
125
|
-
#
|
126
|
-
# 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s # => "LAURENT, OÙ SONT LES TESTS ?"
|
127
|
-
def upcase
|
128
|
-
chars Unicode.upcase(@wrapped_string)
|
129
|
-
end
|
130
|
-
|
131
|
-
# Converts characters in the string to lowercase.
|
132
|
-
#
|
133
|
-
# 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s # => "věda a výzkum"
|
134
|
-
def downcase
|
135
|
-
chars Unicode.downcase(@wrapped_string)
|
136
|
-
end
|
137
|
-
|
138
|
-
# Converts characters in the string to the opposite case.
|
139
|
-
#
|
140
|
-
# 'El Cañón'.mb_chars.swapcase.to_s # => "eL cAÑÓN"
|
141
|
-
def swapcase
|
142
|
-
chars Unicode.swapcase(@wrapped_string)
|
143
|
-
end
|
144
|
-
|
145
|
-
# Converts the first character to uppercase and the remainder to lowercase.
|
146
|
-
#
|
147
|
-
# 'über'.mb_chars.capitalize.to_s # => "Über"
|
148
|
-
def capitalize
|
149
|
-
(slice(0) || chars("")).upcase + (slice(1..-1) || chars("")).downcase
|
125
|
+
truncate_bytes(limit, omission: nil)
|
150
126
|
end
|
151
127
|
|
152
128
|
# Capitalizes the first letter of every word, when possible.
|
@@ -154,7 +130,7 @@ module ActiveSupport #:nodoc:
|
|
154
130
|
# "ÉL QUE SE ENTERÓ".mb_chars.titleize.to_s # => "Él Que Se Enteró"
|
155
131
|
# "日本語".mb_chars.titleize.to_s # => "日本語"
|
156
132
|
def titleize
|
157
|
-
chars(downcase.to_s.gsub(/\b('?\S)/u) {
|
133
|
+
chars(downcase.to_s.gsub(/\b('?\S)/u) { $1.upcase })
|
158
134
|
end
|
159
135
|
alias_method :titlecase, :titleize
|
160
136
|
|
@@ -166,7 +142,24 @@ module ActiveSupport #:nodoc:
|
|
166
142
|
# <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
|
167
143
|
# ActiveSupport::Multibyte::Unicode.default_normalization_form
|
168
144
|
def normalize(form = nil)
|
169
|
-
|
145
|
+
form ||= Unicode.default_normalization_form
|
146
|
+
|
147
|
+
# See https://www.unicode.org/reports/tr15, Table 1
|
148
|
+
if alias_form = Unicode::NORMALIZATION_FORM_ALIASES[form]
|
149
|
+
ActiveSupport::Deprecation.warn(<<-MSG.squish)
|
150
|
+
ActiveSupport::Multibyte::Chars#normalize is deprecated and will be
|
151
|
+
removed from Rails 6.1. Use #unicode_normalize(:#{alias_form}) instead.
|
152
|
+
MSG
|
153
|
+
|
154
|
+
send(:unicode_normalize, alias_form)
|
155
|
+
else
|
156
|
+
ActiveSupport::Deprecation.warn(<<-MSG.squish)
|
157
|
+
ActiveSupport::Multibyte::Chars#normalize is deprecated and will be
|
158
|
+
removed from Rails 6.1. Use #unicode_normalize instead.
|
159
|
+
MSG
|
160
|
+
|
161
|
+
raise ArgumentError, "#{form} is not a valid normalization variant", caller
|
162
|
+
end
|
170
163
|
end
|
171
164
|
|
172
165
|
# Performs canonical decomposition on all the characters.
|
@@ -190,7 +183,7 @@ module ActiveSupport #:nodoc:
|
|
190
183
|
# 'क्षि'.mb_chars.length # => 4
|
191
184
|
# 'क्षि'.mb_chars.grapheme_length # => 3
|
192
185
|
def grapheme_length
|
193
|
-
|
186
|
+
@wrapped_string.scan(/\X/).length
|
194
187
|
end
|
195
188
|
|
196
189
|
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
|
@@ -206,7 +199,7 @@ module ActiveSupport #:nodoc:
|
|
206
199
|
to_s.as_json(options)
|
207
200
|
end
|
208
201
|
|
209
|
-
%w(
|
202
|
+
%w(reverse tidy_bytes).each do |method|
|
210
203
|
define_method("#{method}!") do |*args|
|
211
204
|
@wrapped_string = send(method, *args).to_s
|
212
205
|
self
|
@@ -215,18 +208,6 @@ module ActiveSupport #:nodoc:
|
|
215
208
|
|
216
209
|
private
|
217
210
|
|
218
|
-
def translate_offset(byte_offset)
|
219
|
-
return nil if byte_offset.nil?
|
220
|
-
return 0 if @wrapped_string == ""
|
221
|
-
|
222
|
-
begin
|
223
|
-
@wrapped_string.byteslice(0...byte_offset).unpack("U*").length
|
224
|
-
rescue ArgumentError
|
225
|
-
byte_offset -= 1
|
226
|
-
retry
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
211
|
def chars(string)
|
231
212
|
self.class.new(string)
|
232
213
|
end
|
@@ -6,12 +6,19 @@ module ActiveSupport
|
|
6
6
|
extend self
|
7
7
|
|
8
8
|
# A list of all available normalization forms.
|
9
|
-
# See
|
9
|
+
# See https://www.unicode.org/reports/tr15/tr15-29.html for more
|
10
10
|
# information about normalization.
|
11
11
|
NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
|
12
12
|
|
13
|
+
NORMALIZATION_FORM_ALIASES = { # :nodoc:
|
14
|
+
c: :nfc,
|
15
|
+
d: :nfd,
|
16
|
+
kc: :nfkc,
|
17
|
+
kd: :nfkd
|
18
|
+
}
|
19
|
+
|
13
20
|
# The Unicode version that is supported by the implementation
|
14
|
-
UNICODE_VERSION = "
|
21
|
+
UNICODE_VERSION = RbConfig::CONFIG["UNICODE_VERSION"]
|
15
22
|
|
16
23
|
# The default normalization used for operations that require
|
17
24
|
# normalization. It can be set to any of the normalizations
|
@@ -21,199 +28,44 @@ module ActiveSupport
|
|
21
28
|
attr_accessor :default_normalization_form
|
22
29
|
@default_normalization_form = :kc
|
23
30
|
|
24
|
-
# Hangul character boundaries and properties
|
25
|
-
HANGUL_SBASE = 0xAC00
|
26
|
-
HANGUL_LBASE = 0x1100
|
27
|
-
HANGUL_VBASE = 0x1161
|
28
|
-
HANGUL_TBASE = 0x11A7
|
29
|
-
HANGUL_LCOUNT = 19
|
30
|
-
HANGUL_VCOUNT = 21
|
31
|
-
HANGUL_TCOUNT = 28
|
32
|
-
HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT
|
33
|
-
HANGUL_SCOUNT = 11172
|
34
|
-
HANGUL_SLAST = HANGUL_SBASE + HANGUL_SCOUNT
|
35
|
-
|
36
|
-
# Detect whether the codepoint is in a certain character class. Returns
|
37
|
-
# +true+ when it's in the specified character class and +false+ otherwise.
|
38
|
-
# Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
|
39
|
-
# <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
|
40
|
-
#
|
41
|
-
# Primarily used by the grapheme cluster support.
|
42
|
-
def in_char_class?(codepoint, classes)
|
43
|
-
classes.detect { |c| database.boundary[c] === codepoint } ? true : false
|
44
|
-
end
|
45
|
-
|
46
31
|
# Unpack the string at grapheme boundaries. Returns a list of character
|
47
32
|
# lists.
|
48
33
|
#
|
49
34
|
# Unicode.unpack_graphemes('क्षि') # => [[2325, 2381], [2359], [2367]]
|
50
35
|
# Unicode.unpack_graphemes('Café') # => [[67], [97], [102], [233]]
|
51
36
|
def unpack_graphemes(string)
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
eoc = codepoints.length
|
57
|
-
while (pos < eoc)
|
58
|
-
pos += 1
|
59
|
-
previous = codepoints[pos - 1]
|
60
|
-
current = codepoints[pos]
|
61
|
-
|
62
|
-
# See http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
|
63
|
-
should_break =
|
64
|
-
if pos == eoc
|
65
|
-
true
|
66
|
-
# GB3. CR X LF
|
67
|
-
elsif previous == database.boundary[:cr] && current == database.boundary[:lf]
|
68
|
-
false
|
69
|
-
# GB4. (Control|CR|LF) ÷
|
70
|
-
elsif previous && in_char_class?(previous, [:control, :cr, :lf])
|
71
|
-
true
|
72
|
-
# GB5. ÷ (Control|CR|LF)
|
73
|
-
elsif in_char_class?(current, [:control, :cr, :lf])
|
74
|
-
true
|
75
|
-
# GB6. L X (L|V|LV|LVT)
|
76
|
-
elsif database.boundary[:l] === previous && in_char_class?(current, [:l, :v, :lv, :lvt])
|
77
|
-
false
|
78
|
-
# GB7. (LV|V) X (V|T)
|
79
|
-
elsif in_char_class?(previous, [:lv, :v]) && in_char_class?(current, [:v, :t])
|
80
|
-
false
|
81
|
-
# GB8. (LVT|T) X (T)
|
82
|
-
elsif in_char_class?(previous, [:lvt, :t]) && database.boundary[:t] === current
|
83
|
-
false
|
84
|
-
# GB9. X (Extend | ZWJ)
|
85
|
-
elsif in_char_class?(current, [:extend, :zwj])
|
86
|
-
false
|
87
|
-
# GB9a. X SpacingMark
|
88
|
-
elsif database.boundary[:spacingmark] === current
|
89
|
-
false
|
90
|
-
# GB9b. Prepend X
|
91
|
-
elsif database.boundary[:prepend] === previous
|
92
|
-
false
|
93
|
-
# GB10. (E_Base | EBG) Extend* X E_Modifier
|
94
|
-
elsif (marker...pos).any? { |i| in_char_class?(codepoints[i], [:e_base, :e_base_gaz]) && codepoints[i + 1...pos].all? { |c| database.boundary[:extend] === c } } && database.boundary[:e_modifier] === current
|
95
|
-
false
|
96
|
-
# GB11. ZWJ X (Glue_After_Zwj | EBG)
|
97
|
-
elsif database.boundary[:zwj] === previous && in_char_class?(current, [:glue_after_zwj, :e_base_gaz])
|
98
|
-
false
|
99
|
-
# GB12. ^ (RI RI)* RI X RI
|
100
|
-
# GB13. [^RI] (RI RI)* RI X RI
|
101
|
-
elsif codepoints[marker..pos].all? { |c| database.boundary[:regional_indicator] === c } && codepoints[marker..pos].count { |c| database.boundary[:regional_indicator] === c }.even?
|
102
|
-
false
|
103
|
-
# GB999. Any ÷ Any
|
104
|
-
else
|
105
|
-
true
|
106
|
-
end
|
37
|
+
ActiveSupport::Deprecation.warn(<<-MSG.squish)
|
38
|
+
ActiveSupport::Multibyte::Unicode#unpack_graphemes is deprecated and will be
|
39
|
+
removed from Rails 6.1. Use string.scan(/\X/).map(&:codepoints) instead.
|
40
|
+
MSG
|
107
41
|
|
108
|
-
|
109
|
-
unpacked << codepoints[marker..pos - 1]
|
110
|
-
marker = pos
|
111
|
-
end
|
112
|
-
end
|
113
|
-
unpacked
|
42
|
+
string.scan(/\X/).map(&:codepoints)
|
114
43
|
end
|
115
44
|
|
116
45
|
# Reverse operation of unpack_graphemes.
|
117
46
|
#
|
118
47
|
# Unicode.pack_graphemes(Unicode.unpack_graphemes('क्षि')) # => 'क्षि'
|
119
48
|
def pack_graphemes(unpacked)
|
120
|
-
|
121
|
-
|
49
|
+
ActiveSupport::Deprecation.warn(<<-MSG.squish)
|
50
|
+
ActiveSupport::Multibyte::Unicode#pack_graphemes is deprecated and will be
|
51
|
+
removed from Rails 6.1. Use array.flatten.pack("U*") instead.
|
52
|
+
MSG
|
122
53
|
|
123
|
-
|
124
|
-
def reorder_characters(codepoints)
|
125
|
-
length = codepoints.length - 1
|
126
|
-
pos = 0
|
127
|
-
while pos < length do
|
128
|
-
cp1, cp2 = database.codepoints[codepoints[pos]], database.codepoints[codepoints[pos + 1]]
|
129
|
-
if (cp1.combining_class > cp2.combining_class) && (cp2.combining_class > 0)
|
130
|
-
codepoints[pos..pos + 1] = cp2.code, cp1.code
|
131
|
-
pos += (pos > 0 ? -1 : 1)
|
132
|
-
else
|
133
|
-
pos += 1
|
134
|
-
end
|
135
|
-
end
|
136
|
-
codepoints
|
54
|
+
unpacked.flatten.pack("U*")
|
137
55
|
end
|
138
56
|
|
139
57
|
# Decompose composed characters to the decomposed form.
|
140
58
|
def decompose(type, codepoints)
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
ncp = [] # new codepoints
|
146
|
-
ncp << HANGUL_LBASE + sindex / HANGUL_NCOUNT
|
147
|
-
ncp << HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
|
148
|
-
tindex = sindex % HANGUL_TCOUNT
|
149
|
-
ncp << (HANGUL_TBASE + tindex) unless tindex == 0
|
150
|
-
decomposed.concat ncp
|
151
|
-
# if the codepoint is decomposable in with the current decomposition type
|
152
|
-
elsif (ncp = database.codepoints[cp].decomp_mapping) && (!database.codepoints[cp].decomp_type || type == :compatibility)
|
153
|
-
decomposed.concat decompose(type, ncp.dup)
|
154
|
-
else
|
155
|
-
decomposed << cp
|
156
|
-
end
|
59
|
+
if type == :compatibility
|
60
|
+
codepoints.pack("U*").unicode_normalize(:nfkd).codepoints
|
61
|
+
else
|
62
|
+
codepoints.pack("U*").unicode_normalize(:nfd).codepoints
|
157
63
|
end
|
158
64
|
end
|
159
65
|
|
160
66
|
# Compose decomposed characters to the composed form.
|
161
67
|
def compose(codepoints)
|
162
|
-
|
163
|
-
eoa = codepoints.length - 1
|
164
|
-
starter_pos = 0
|
165
|
-
starter_char = codepoints[0]
|
166
|
-
previous_combining_class = -1
|
167
|
-
while pos < eoa
|
168
|
-
pos += 1
|
169
|
-
lindex = starter_char - HANGUL_LBASE
|
170
|
-
# -- Hangul
|
171
|
-
if 0 <= lindex && lindex < HANGUL_LCOUNT
|
172
|
-
vindex = codepoints[starter_pos + 1] - HANGUL_VBASE rescue vindex = -1
|
173
|
-
if 0 <= vindex && vindex < HANGUL_VCOUNT
|
174
|
-
tindex = codepoints[starter_pos + 2] - HANGUL_TBASE rescue tindex = -1
|
175
|
-
if 0 <= tindex && tindex < HANGUL_TCOUNT
|
176
|
-
j = starter_pos + 2
|
177
|
-
eoa -= 2
|
178
|
-
else
|
179
|
-
tindex = 0
|
180
|
-
j = starter_pos + 1
|
181
|
-
eoa -= 1
|
182
|
-
end
|
183
|
-
codepoints[starter_pos..j] = (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT + tindex + HANGUL_SBASE
|
184
|
-
end
|
185
|
-
starter_pos += 1
|
186
|
-
starter_char = codepoints[starter_pos]
|
187
|
-
# -- Other characters
|
188
|
-
else
|
189
|
-
current_char = codepoints[pos]
|
190
|
-
current = database.codepoints[current_char]
|
191
|
-
if current.combining_class > previous_combining_class
|
192
|
-
if ref = database.composition_map[starter_char]
|
193
|
-
composition = ref[current_char]
|
194
|
-
else
|
195
|
-
composition = nil
|
196
|
-
end
|
197
|
-
unless composition.nil?
|
198
|
-
codepoints[starter_pos] = composition
|
199
|
-
starter_char = composition
|
200
|
-
codepoints.delete_at pos
|
201
|
-
eoa -= 1
|
202
|
-
pos -= 1
|
203
|
-
previous_combining_class = -1
|
204
|
-
else
|
205
|
-
previous_combining_class = current.combining_class
|
206
|
-
end
|
207
|
-
else
|
208
|
-
previous_combining_class = current.combining_class
|
209
|
-
end
|
210
|
-
if current.combining_class == 0
|
211
|
-
starter_pos = pos
|
212
|
-
starter_char = codepoints[pos]
|
213
|
-
end
|
214
|
-
end
|
215
|
-
end
|
216
|
-
codepoints
|
68
|
+
codepoints.pack("U*").unicode_normalize(:nfc).codepoints
|
217
69
|
end
|
218
70
|
|
219
71
|
# Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars.
|
@@ -265,130 +117,41 @@ module ActiveSupport
|
|
265
117
|
# Default is ActiveSupport::Multibyte::Unicode.default_normalization_form.
|
266
118
|
def normalize(string, form = nil)
|
267
119
|
form ||= @default_normalization_form
|
268
|
-
# See http://www.unicode.org/reports/tr15, Table 1
|
269
|
-
codepoints = string.codepoints.to_a
|
270
|
-
case form
|
271
|
-
when :d
|
272
|
-
reorder_characters(decompose(:canonical, codepoints))
|
273
|
-
when :c
|
274
|
-
compose(reorder_characters(decompose(:canonical, codepoints)))
|
275
|
-
when :kd
|
276
|
-
reorder_characters(decompose(:compatibility, codepoints))
|
277
|
-
when :kc
|
278
|
-
compose(reorder_characters(decompose(:compatibility, codepoints)))
|
279
|
-
else
|
280
|
-
raise ArgumentError, "#{form} is not a valid normalization variant", caller
|
281
|
-
end.pack("U*".freeze)
|
282
|
-
end
|
283
|
-
|
284
|
-
def downcase(string)
|
285
|
-
apply_mapping string, :lowercase_mapping
|
286
|
-
end
|
287
120
|
|
288
|
-
|
289
|
-
|
290
|
-
|
121
|
+
# See https://www.unicode.org/reports/tr15, Table 1
|
122
|
+
if alias_form = NORMALIZATION_FORM_ALIASES[form]
|
123
|
+
ActiveSupport::Deprecation.warn(<<-MSG.squish)
|
124
|
+
ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be
|
125
|
+
removed from Rails 6.1. Use String#unicode_normalize(:#{alias_form}) instead.
|
126
|
+
MSG
|
291
127
|
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
|
299
|
-
|
300
|
-
# Initializing Codepoint object with default values
|
301
|
-
def initialize
|
302
|
-
@combining_class = 0
|
303
|
-
@uppercase_mapping = 0
|
304
|
-
@lowercase_mapping = 0
|
305
|
-
end
|
128
|
+
string.unicode_normalize(alias_form)
|
129
|
+
else
|
130
|
+
ActiveSupport::Deprecation.warn(<<-MSG.squish)
|
131
|
+
ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be
|
132
|
+
removed from Rails 6.1. Use String#unicode_normalize instead.
|
133
|
+
MSG
|
306
134
|
|
307
|
-
|
308
|
-
uppercase_mapping > 0 ? uppercase_mapping : lowercase_mapping
|
135
|
+
raise ArgumentError, "#{form} is not a valid normalization variant", caller
|
309
136
|
end
|
310
137
|
end
|
311
138
|
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
139
|
+
%w(downcase upcase swapcase).each do |method|
|
140
|
+
define_method(method) do |string|
|
141
|
+
ActiveSupport::Deprecation.warn(<<-MSG.squish)
|
142
|
+
ActiveSupport::Multibyte::Unicode##{method} is deprecated and
|
143
|
+
will be removed from Rails 6.1. Use String methods directly.
|
144
|
+
MSG
|
317
145
|
|
318
|
-
|
319
|
-
@codepoints = Hash.new(Codepoint.new)
|
320
|
-
@composition_exclusion = []
|
321
|
-
@composition_map = {}
|
322
|
-
@boundary = {}
|
323
|
-
@cp1252 = {}
|
324
|
-
end
|
325
|
-
|
326
|
-
# Lazy load the Unicode database so it's only loaded when it's actually used
|
327
|
-
ATTRIBUTES.each do |attr_name|
|
328
|
-
class_eval(<<-EOS, __FILE__, __LINE__ + 1)
|
329
|
-
def #{attr_name} # def codepoints
|
330
|
-
load # load
|
331
|
-
@#{attr_name} # @codepoints
|
332
|
-
end # end
|
333
|
-
EOS
|
334
|
-
end
|
335
|
-
|
336
|
-
# Loads the Unicode database and returns all the internal objects of
|
337
|
-
# UnicodeDatabase.
|
338
|
-
def load
|
339
|
-
begin
|
340
|
-
@codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, "rb") { |f| Marshal.load f.read }
|
341
|
-
rescue => e
|
342
|
-
raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
|
343
|
-
end
|
344
|
-
|
345
|
-
# Redefine the === method so we can write shorter rules for grapheme cluster breaks
|
346
|
-
@boundary.each_key do |k|
|
347
|
-
@boundary[k].instance_eval do
|
348
|
-
def ===(other)
|
349
|
-
detect { |i| i === other } ? true : false
|
350
|
-
end
|
351
|
-
end if @boundary[k].kind_of?(Array)
|
352
|
-
end
|
353
|
-
|
354
|
-
# define attr_reader methods for the instance variables
|
355
|
-
class << self
|
356
|
-
attr_reader(*ATTRIBUTES)
|
357
|
-
end
|
358
|
-
end
|
359
|
-
|
360
|
-
# Returns the directory in which the data files are stored.
|
361
|
-
def self.dirname
|
362
|
-
File.expand_path("../values", __dir__)
|
363
|
-
end
|
364
|
-
|
365
|
-
# Returns the filename for the data file for this version.
|
366
|
-
def self.filename
|
367
|
-
File.expand_path File.join(dirname, "unicode_tables.dat")
|
146
|
+
string.send(method)
|
368
147
|
end
|
369
148
|
end
|
370
149
|
|
371
150
|
private
|
372
151
|
|
373
|
-
def apply_mapping(string, mapping)
|
374
|
-
database.codepoints
|
375
|
-
string.each_codepoint.map do |codepoint|
|
376
|
-
cp = database.codepoints[codepoint]
|
377
|
-
if cp && (ncp = cp.send(mapping)) && ncp > 0
|
378
|
-
ncp
|
379
|
-
else
|
380
|
-
codepoint
|
381
|
-
end
|
382
|
-
end.pack("U*")
|
383
|
-
end
|
384
|
-
|
385
152
|
def recode_windows1252_chars(string)
|
386
153
|
string.encode(Encoding::UTF_8, Encoding::Windows_1252, invalid: :replace, undef: :replace)
|
387
154
|
end
|
388
|
-
|
389
|
-
def database
|
390
|
-
@database ||= UnicodeDatabase.new
|
391
|
-
end
|
392
155
|
end
|
393
156
|
end
|
394
157
|
end
|