activesupport 4.0.12 → 7.0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of activesupport might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/CHANGELOG.md +249 -501
- data/MIT-LICENSE +2 -2
- data/README.rdoc +10 -5
- data/lib/active_support/actionable_error.rb +48 -0
- data/lib/active_support/all.rb +5 -3
- data/lib/active_support/array_inquirer.rb +48 -0
- data/lib/active_support/backtrace_cleaner.rb +41 -13
- data/lib/active_support/benchmarkable.rb +7 -15
- data/lib/active_support/builder.rb +3 -1
- data/lib/active_support/cache/file_store.rb +96 -74
- data/lib/active_support/cache/mem_cache_store.rb +211 -103
- data/lib/active_support/cache/memory_store.rb +90 -58
- data/lib/active_support/cache/null_store.rb +19 -7
- data/lib/active_support/cache/redis_cache_store.rb +468 -0
- data/lib/active_support/cache/strategy/local_cache.rb +86 -83
- data/lib/active_support/cache/strategy/local_cache_middleware.rb +45 -0
- data/lib/active_support/cache.rb +580 -241
- data/lib/active_support/callbacks.rb +812 -425
- data/lib/active_support/code_generator.rb +65 -0
- data/lib/active_support/concern.rb +103 -14
- data/lib/active_support/concurrency/load_interlock_aware_monitor.rb +33 -0
- data/lib/active_support/concurrency/share_lock.rb +226 -0
- data/lib/active_support/configurable.rb +21 -19
- data/lib/active_support/configuration_file.rb +51 -0
- data/lib/active_support/core_ext/array/access.rb +47 -1
- data/lib/active_support/core_ext/array/conversions.rb +35 -44
- data/lib/active_support/core_ext/array/deprecated_conversions.rb +25 -0
- data/lib/active_support/core_ext/array/extract.rb +21 -0
- data/lib/active_support/core_ext/array/extract_options.rb +2 -0
- data/lib/active_support/core_ext/array/grouping.rb +26 -16
- data/lib/active_support/core_ext/array/inquiry.rb +19 -0
- data/lib/active_support/core_ext/array/wrap.rb +7 -4
- data/lib/active_support/core_ext/array.rb +10 -7
- data/lib/active_support/core_ext/benchmark.rb +5 -3
- data/lib/active_support/core_ext/big_decimal/conversions.rb +9 -26
- data/lib/active_support/core_ext/big_decimal.rb +3 -1
- data/lib/active_support/core_ext/class/attribute.rb +52 -49
- data/lib/active_support/core_ext/class/attribute_accessors.rb +5 -169
- data/lib/active_support/core_ext/class/subclasses.rb +25 -26
- data/lib/active_support/core_ext/class.rb +4 -4
- data/lib/active_support/core_ext/date/acts_like.rb +3 -1
- data/lib/active_support/core_ext/date/blank.rb +14 -0
- data/lib/active_support/core_ext/date/calculations.rb +31 -18
- data/lib/active_support/core_ext/date/conversions.rb +43 -32
- data/lib/active_support/core_ext/date/deprecated_conversions.rb +26 -0
- data/lib/active_support/core_ext/date/zones.rb +5 -34
- data/lib/active_support/core_ext/date.rb +7 -4
- data/lib/active_support/core_ext/date_and_time/calculations.rb +198 -66
- data/lib/active_support/core_ext/date_and_time/compatibility.rb +31 -0
- data/lib/active_support/core_ext/date_and_time/zones.rb +40 -0
- data/lib/active_support/core_ext/date_time/acts_like.rb +4 -2
- data/lib/active_support/core_ext/date_time/blank.rb +14 -0
- data/lib/active_support/core_ext/date_time/calculations.rb +79 -38
- data/lib/active_support/core_ext/date_time/compatibility.rb +18 -0
- data/lib/active_support/core_ext/date_time/conversions.rb +31 -26
- data/lib/active_support/core_ext/date_time/deprecated_conversions.rb +22 -0
- data/lib/active_support/core_ext/date_time.rb +8 -4
- data/lib/active_support/core_ext/digest/uuid.rb +79 -0
- data/lib/active_support/core_ext/digest.rb +3 -0
- data/lib/active_support/core_ext/enumerable.rb +249 -17
- data/lib/active_support/core_ext/file/atomic.rb +41 -32
- data/lib/active_support/core_ext/file.rb +3 -1
- data/lib/active_support/core_ext/hash/conversions.rb +71 -49
- data/lib/active_support/core_ext/hash/deep_merge.rb +9 -13
- data/lib/active_support/core_ext/hash/deep_transform_values.rb +46 -0
- data/lib/active_support/core_ext/hash/except.rb +14 -5
- data/lib/active_support/core_ext/hash/indifferent_access.rb +5 -3
- data/lib/active_support/core_ext/hash/keys.rb +39 -56
- data/lib/active_support/core_ext/hash/reverse_merge.rb +5 -2
- data/lib/active_support/core_ext/hash/slice.rb +8 -23
- data/lib/active_support/core_ext/hash.rb +10 -8
- data/lib/active_support/core_ext/integer/inflections.rb +3 -1
- data/lib/active_support/core_ext/integer/multiple.rb +3 -1
- data/lib/active_support/core_ext/integer/time.rb +11 -33
- data/lib/active_support/core_ext/integer.rb +5 -3
- data/lib/active_support/core_ext/kernel/concern.rb +14 -0
- data/lib/active_support/core_ext/kernel/reporting.rb +9 -78
- data/lib/active_support/core_ext/kernel/singleton_class.rb +2 -0
- data/lib/active_support/core_ext/kernel.rb +5 -4
- data/lib/active_support/core_ext/load_error.rb +5 -21
- data/lib/active_support/core_ext/module/aliasing.rb +6 -44
- data/lib/active_support/core_ext/module/anonymous.rb +12 -1
- data/lib/active_support/core_ext/module/attr_internal.rb +8 -8
- data/lib/active_support/core_ext/module/attribute_accessors.rb +186 -44
- data/lib/active_support/core_ext/module/attribute_accessors_per_thread.rb +157 -0
- data/lib/active_support/core_ext/module/concerning.rb +140 -0
- data/lib/active_support/core_ext/module/delegation.rb +172 -45
- data/lib/active_support/core_ext/module/deprecation.rb +3 -3
- data/lib/active_support/core_ext/module/introspection.rb +23 -38
- data/lib/active_support/core_ext/module/redefine_method.rb +40 -0
- data/lib/active_support/core_ext/module/remove_method.rb +8 -3
- data/lib/active_support/core_ext/module.rb +13 -10
- data/lib/active_support/core_ext/name_error.rb +45 -4
- data/lib/active_support/core_ext/numeric/bytes.rb +22 -0
- data/lib/active_support/core_ext/numeric/conversions.rb +135 -127
- data/lib/active_support/core_ext/numeric/deprecated_conversions.rb +60 -0
- data/lib/active_support/core_ext/numeric/time.rb +37 -50
- data/lib/active_support/core_ext/numeric.rb +6 -3
- data/lib/active_support/core_ext/object/acts_like.rb +41 -6
- data/lib/active_support/core_ext/object/blank.rb +70 -20
- data/lib/active_support/core_ext/object/conversions.rb +6 -4
- data/lib/active_support/core_ext/object/deep_dup.rb +19 -10
- data/lib/active_support/core_ext/object/duplicable.rb +17 -47
- data/lib/active_support/core_ext/object/inclusion.rb +18 -15
- data/lib/active_support/core_ext/object/instance_variables.rb +3 -1
- data/lib/active_support/core_ext/object/json.rb +244 -0
- data/lib/active_support/core_ext/object/to_param.rb +3 -1
- data/lib/active_support/core_ext/object/to_query.rb +21 -8
- data/lib/active_support/core_ext/object/try.rb +106 -26
- data/lib/active_support/core_ext/object/with_options.rb +64 -5
- data/lib/active_support/core_ext/object.rb +14 -12
- data/lib/active_support/core_ext/pathname/existence.rb +21 -0
- data/lib/active_support/core_ext/pathname.rb +3 -0
- data/lib/active_support/core_ext/range/compare_range.rb +57 -0
- data/lib/active_support/core_ext/range/conversions.rb +37 -15
- data/lib/active_support/core_ext/range/deprecated_conversions.rb +26 -0
- data/lib/active_support/core_ext/range/each.rb +18 -17
- data/lib/active_support/core_ext/range/include_time_with_zone.rb +7 -0
- data/lib/active_support/core_ext/range/overlaps.rb +2 -0
- data/lib/active_support/core_ext/range.rb +7 -4
- data/lib/active_support/core_ext/regexp.rb +10 -1
- data/lib/active_support/core_ext/securerandom.rb +45 -0
- data/lib/active_support/core_ext/string/access.rb +42 -51
- data/lib/active_support/core_ext/string/behavior.rb +3 -1
- data/lib/active_support/core_ext/string/conversions.rb +18 -13
- data/lib/active_support/core_ext/string/exclude.rb +5 -3
- data/lib/active_support/core_ext/string/filters.rb +97 -7
- data/lib/active_support/core_ext/string/indent.rb +6 -4
- data/lib/active_support/core_ext/string/inflections.rb +106 -25
- data/lib/active_support/core_ext/string/inquiry.rb +4 -1
- data/lib/active_support/core_ext/string/multibyte.rb +18 -9
- data/lib/active_support/core_ext/string/output_safety.rb +227 -54
- data/lib/active_support/core_ext/string/starts_ends_with.rb +4 -2
- data/lib/active_support/core_ext/string/strip.rb +6 -5
- data/lib/active_support/core_ext/string/zones.rb +4 -1
- data/lib/active_support/core_ext/string.rb +15 -13
- data/lib/active_support/core_ext/symbol/starts_ends_with.rb +6 -0
- data/lib/active_support/core_ext/symbol.rb +3 -0
- data/lib/active_support/core_ext/time/acts_like.rb +3 -1
- data/lib/active_support/core_ext/time/calculations.rb +178 -116
- data/lib/active_support/core_ext/time/compatibility.rb +16 -0
- data/lib/active_support/core_ext/time/conversions.rb +37 -25
- data/lib/active_support/core_ext/time/deprecated_conversions.rb +22 -0
- data/lib/active_support/core_ext/time/zones.rb +44 -42
- data/lib/active_support/core_ext/time.rb +8 -5
- data/lib/active_support/core_ext/uri.rb +4 -25
- data/lib/active_support/core_ext.rb +4 -2
- data/lib/active_support/current_attributes/test_helper.rb +13 -0
- data/lib/active_support/current_attributes.rb +226 -0
- data/lib/active_support/dependencies/autoload.rb +3 -1
- data/lib/active_support/dependencies/interlock.rb +49 -0
- data/lib/active_support/dependencies/require_dependency.rb +28 -0
- data/lib/active_support/dependencies.rb +71 -696
- data/lib/active_support/deprecation/behaviors.rb +65 -16
- data/lib/active_support/deprecation/constant_accessor.rb +52 -0
- data/lib/active_support/deprecation/disallowed.rb +56 -0
- data/lib/active_support/deprecation/instance_delegator.rb +16 -2
- data/lib/active_support/deprecation/method_wrappers.rb +62 -21
- data/lib/active_support/deprecation/proxy_wrappers.rb +82 -31
- data/lib/active_support/deprecation/reporting.rb +81 -18
- data/lib/active_support/deprecation.rb +19 -11
- data/lib/active_support/descendants_tracker.rb +192 -34
- data/lib/active_support/digest.rb +22 -0
- data/lib/active_support/duration/iso8601_parser.rb +123 -0
- data/lib/active_support/duration/iso8601_serializer.rb +67 -0
- data/lib/active_support/duration.rb +437 -39
- data/lib/active_support/encrypted_configuration.rb +56 -0
- data/lib/active_support/encrypted_file.rb +117 -0
- data/lib/active_support/environment_inquirer.rb +20 -0
- data/lib/active_support/error_reporter.rb +117 -0
- data/lib/active_support/evented_file_update_checker.rb +170 -0
- data/lib/active_support/execution_context/test_helper.rb +13 -0
- data/lib/active_support/execution_context.rb +53 -0
- data/lib/active_support/execution_wrapper.rb +151 -0
- data/lib/active_support/executor/test_helper.rb +7 -0
- data/lib/active_support/executor.rb +8 -0
- data/lib/active_support/file_update_checker.rb +62 -37
- data/lib/active_support/fork_tracker.rb +71 -0
- data/lib/active_support/gem_version.rb +17 -0
- data/lib/active_support/gzip.rb +7 -5
- data/lib/active_support/hash_with_indifferent_access.rb +207 -54
- data/lib/active_support/html_safe_translation.rb +43 -0
- data/lib/active_support/i18n.rb +10 -6
- data/lib/active_support/i18n_railtie.rb +48 -19
- data/lib/active_support/inflections.rb +19 -12
- data/lib/active_support/inflector/inflections.rb +97 -37
- data/lib/active_support/inflector/methods.rb +192 -157
- data/lib/active_support/inflector/transliterate.rb +83 -33
- data/lib/active_support/inflector.rb +7 -5
- data/lib/active_support/isolated_execution_state.rb +64 -0
- data/lib/active_support/json/decoding.rb +37 -42
- data/lib/active_support/json/encoding.rb +93 -293
- data/lib/active_support/json.rb +4 -2
- data/lib/active_support/key_generator.rb +30 -47
- data/lib/active_support/lazy_load_hooks.rb +54 -21
- data/lib/active_support/locale/en.rb +33 -0
- data/lib/active_support/locale/en.yml +10 -4
- data/lib/active_support/log_subscriber/test_helper.rb +14 -12
- data/lib/active_support/log_subscriber.rb +61 -18
- data/lib/active_support/logger.rb +40 -4
- data/lib/active_support/logger_silence.rb +17 -20
- data/lib/active_support/logger_thread_safe_level.rb +69 -0
- data/lib/active_support/message_encryptor.rb +178 -55
- data/lib/active_support/message_verifier.rb +195 -26
- data/lib/active_support/messages/metadata.rb +80 -0
- data/lib/active_support/messages/rotation_configuration.rb +23 -0
- data/lib/active_support/messages/rotator.rb +57 -0
- data/lib/active_support/multibyte/chars.rb +45 -92
- data/lib/active_support/multibyte/unicode.rb +44 -377
- data/lib/active_support/multibyte.rb +5 -3
- data/lib/active_support/notifications/fanout.rb +177 -44
- data/lib/active_support/notifications/instrumenter.rb +117 -17
- data/lib/active_support/notifications.rb +106 -39
- data/lib/active_support/number_helper/number_converter.rb +181 -0
- data/lib/active_support/number_helper/number_to_currency_converter.rb +46 -0
- data/lib/active_support/number_helper/number_to_delimited_converter.rb +30 -0
- data/lib/active_support/number_helper/number_to_human_converter.rb +69 -0
- data/lib/active_support/number_helper/number_to_human_size_converter.rb +60 -0
- data/lib/active_support/number_helper/number_to_percentage_converter.rb +16 -0
- data/lib/active_support/number_helper/number_to_phone_converter.rb +59 -0
- data/lib/active_support/number_helper/number_to_rounded_converter.rb +59 -0
- data/lib/active_support/number_helper/rounding_helper.rb +46 -0
- data/lib/active_support/number_helper.rb +152 -394
- data/lib/active_support/option_merger.rb +18 -5
- data/lib/active_support/ordered_hash.rb +8 -6
- data/lib/active_support/ordered_options.rb +43 -7
- data/lib/active_support/parameter_filter.rb +138 -0
- data/lib/active_support/per_thread_registry.rb +24 -11
- data/lib/active_support/proxy_object.rb +2 -0
- data/lib/active_support/rails.rb +10 -11
- data/lib/active_support/railtie.rb +118 -12
- data/lib/active_support/reloader.rb +130 -0
- data/lib/active_support/rescuable.rb +112 -57
- data/lib/active_support/ruby_features.rb +7 -0
- data/lib/active_support/secure_compare_rotator.rb +51 -0
- data/lib/active_support/security_utils.rb +38 -0
- data/lib/active_support/string_inquirer.rb +11 -4
- data/lib/active_support/subscriber.rb +109 -39
- data/lib/active_support/tagged_logging.rb +54 -17
- data/lib/active_support/test_case.rb +121 -37
- data/lib/active_support/testing/assertions.rb +177 -39
- data/lib/active_support/testing/autorun.rb +5 -3
- data/lib/active_support/testing/constant_lookup.rb +3 -6
- data/lib/active_support/testing/declarative.rb +10 -22
- data/lib/active_support/testing/deprecation.rb +65 -11
- data/lib/active_support/testing/file_fixtures.rb +38 -0
- data/lib/active_support/testing/isolation.rb +56 -87
- data/lib/active_support/testing/method_call_assertions.rb +70 -0
- data/lib/active_support/testing/parallelization/server.rb +82 -0
- data/lib/active_support/testing/parallelization/worker.rb +103 -0
- data/lib/active_support/testing/parallelization.rb +55 -0
- data/lib/active_support/testing/parallelize_executor.rb +76 -0
- data/lib/active_support/testing/setup_and_teardown.rb +30 -10
- data/lib/active_support/testing/stream.rb +41 -0
- data/lib/active_support/testing/tagged_logging.rb +6 -4
- data/lib/active_support/testing/time_helpers.rb +246 -0
- data/lib/active_support/time.rb +13 -13
- data/lib/active_support/time_with_zone.rb +315 -90
- data/lib/active_support/values/time_zone.rb +306 -135
- data/lib/active_support/version.rb +6 -7
- data/lib/active_support/xml_mini/jdom.rb +117 -115
- data/lib/active_support/xml_mini/libxml.rb +22 -21
- data/lib/active_support/xml_mini/libxmlsax.rb +17 -19
- data/lib/active_support/xml_mini/nokogiri.rb +19 -19
- data/lib/active_support/xml_mini/nokogirisax.rb +16 -17
- data/lib/active_support/xml_mini/rexml.rb +25 -17
- data/lib/active_support/xml_mini.rb +67 -56
- data/lib/active_support.rb +58 -3
- metadata +125 -66
- data/lib/active_support/basic_object.rb +0 -11
- data/lib/active_support/buffered_logger.rb +0 -21
- data/lib/active_support/concurrency/latch.rb +0 -27
- data/lib/active_support/core_ext/array/prepend_and_append.rb +0 -7
- data/lib/active_support/core_ext/array/uniq_by.rb +0 -19
- data/lib/active_support/core_ext/class/delegating_attributes.rb +0 -40
- data/lib/active_support/core_ext/date_time/zones.rb +0 -24
- data/lib/active_support/core_ext/hash/diff.rb +0 -14
- data/lib/active_support/core_ext/kernel/agnostics.rb +0 -11
- data/lib/active_support/core_ext/kernel/debugger.rb +0 -10
- data/lib/active_support/core_ext/logger.rb +0 -67
- data/lib/active_support/core_ext/marshal.rb +0 -21
- data/lib/active_support/core_ext/module/qualified_const.rb +0 -52
- data/lib/active_support/core_ext/module/reachable.rb +0 -8
- data/lib/active_support/core_ext/object/to_json.rb +0 -27
- data/lib/active_support/core_ext/proc.rb +0 -17
- data/lib/active_support/core_ext/range/include_range.rb +0 -23
- data/lib/active_support/core_ext/string/encoding.rb +0 -8
- data/lib/active_support/core_ext/struct.rb +0 -6
- data/lib/active_support/core_ext/thread.rb +0 -79
- data/lib/active_support/core_ext/time/marshal.rb +0 -30
- data/lib/active_support/file_watcher.rb +0 -36
- data/lib/active_support/json/variable.rb +0 -18
- data/lib/active_support/testing/pending.rb +0 -14
- data/lib/active_support/values/unicode_tables.dat +0 -0
@@ -1,403 +1,70 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module ActiveSupport
|
3
4
|
module Multibyte
|
4
5
|
module Unicode
|
5
|
-
|
6
6
|
extend self
|
7
7
|
|
8
|
-
# A list of all available normalization forms.
|
9
|
-
# See http://www.unicode.org/reports/tr15/tr15-29.html for more
|
10
|
-
# information about normalization.
|
11
|
-
NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
|
12
|
-
|
13
8
|
# The Unicode version that is supported by the implementation
|
14
|
-
UNICODE_VERSION =
|
15
|
-
|
16
|
-
# The default normalization used for operations that require
|
17
|
-
# normalization. It can be set to any of the normalizations
|
18
|
-
# in NORMALIZATION_FORMS.
|
19
|
-
#
|
20
|
-
# ActiveSupport::Multibyte::Unicode.default_normalization_form = :c
|
21
|
-
attr_accessor :default_normalization_form
|
22
|
-
@default_normalization_form = :kc
|
23
|
-
|
24
|
-
# Hangul character boundaries and properties
|
25
|
-
HANGUL_SBASE = 0xAC00
|
26
|
-
HANGUL_LBASE = 0x1100
|
27
|
-
HANGUL_VBASE = 0x1161
|
28
|
-
HANGUL_TBASE = 0x11A7
|
29
|
-
HANGUL_LCOUNT = 19
|
30
|
-
HANGUL_VCOUNT = 21
|
31
|
-
HANGUL_TCOUNT = 28
|
32
|
-
HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT
|
33
|
-
HANGUL_SCOUNT = 11172
|
34
|
-
HANGUL_SLAST = HANGUL_SBASE + HANGUL_SCOUNT
|
35
|
-
HANGUL_JAMO_FIRST = 0x1100
|
36
|
-
HANGUL_JAMO_LAST = 0x11FF
|
37
|
-
|
38
|
-
# All the unicode whitespace
|
39
|
-
WHITESPACE = [
|
40
|
-
(0x0009..0x000D).to_a, # White_Space # Cc [5] <control-0009>..<control-000D>
|
41
|
-
0x0020, # White_Space # Zs SPACE
|
42
|
-
0x0085, # White_Space # Cc <control-0085>
|
43
|
-
0x00A0, # White_Space # Zs NO-BREAK SPACE
|
44
|
-
0x1680, # White_Space # Zs OGHAM SPACE MARK
|
45
|
-
0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
|
46
|
-
(0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
|
47
|
-
0x2028, # White_Space # Zl LINE SEPARATOR
|
48
|
-
0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
|
49
|
-
0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
|
50
|
-
0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
|
51
|
-
0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
|
52
|
-
].flatten.freeze
|
53
|
-
|
54
|
-
# BOM (byte order mark) can also be seen as whitespace, it's a
|
55
|
-
# non-rendering character used to distinguish between little and big
|
56
|
-
# endian. This is not an issue in utf-8, so it must be ignored.
|
57
|
-
LEADERS_AND_TRAILERS = WHITESPACE + [65279] # ZERO-WIDTH NO-BREAK SPACE aka BOM
|
58
|
-
|
59
|
-
# Returns a regular expression pattern that matches the passed Unicode
|
60
|
-
# codepoints.
|
61
|
-
def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
|
62
|
-
array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|')
|
63
|
-
end
|
64
|
-
TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
|
65
|
-
LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
|
66
|
-
|
67
|
-
# Detect whether the codepoint is in a certain character class. Returns
|
68
|
-
# +true+ when it's in the specified character class and +false+ otherwise.
|
69
|
-
# Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
|
70
|
-
# <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
|
71
|
-
#
|
72
|
-
# Primarily used by the grapheme cluster support.
|
73
|
-
def in_char_class?(codepoint, classes)
|
74
|
-
classes.detect { |c| database.boundary[c] === codepoint } ? true : false
|
75
|
-
end
|
76
|
-
|
77
|
-
# Unpack the string at grapheme boundaries. Returns a list of character
|
78
|
-
# lists.
|
79
|
-
#
|
80
|
-
# Unicode.unpack_graphemes('क्षि') # => [[2325, 2381], [2359], [2367]]
|
81
|
-
# Unicode.unpack_graphemes('Café') # => [[67], [97], [102], [233]]
|
82
|
-
def unpack_graphemes(string)
|
83
|
-
codepoints = string.codepoints.to_a
|
84
|
-
unpacked = []
|
85
|
-
pos = 0
|
86
|
-
marker = 0
|
87
|
-
eoc = codepoints.length
|
88
|
-
while(pos < eoc)
|
89
|
-
pos += 1
|
90
|
-
previous = codepoints[pos-1]
|
91
|
-
current = codepoints[pos]
|
92
|
-
if (
|
93
|
-
# CR X LF
|
94
|
-
( previous == database.boundary[:cr] and current == database.boundary[:lf] ) or
|
95
|
-
# L X (L|V|LV|LVT)
|
96
|
-
( database.boundary[:l] === previous and in_char_class?(current, [:l,:v,:lv,:lvt]) ) or
|
97
|
-
# (LV|V) X (V|T)
|
98
|
-
( in_char_class?(previous, [:lv,:v]) and in_char_class?(current, [:v,:t]) ) or
|
99
|
-
# (LVT|T) X (T)
|
100
|
-
( in_char_class?(previous, [:lvt,:t]) and database.boundary[:t] === current ) or
|
101
|
-
# X Extend
|
102
|
-
(database.boundary[:extend] === current)
|
103
|
-
)
|
104
|
-
else
|
105
|
-
unpacked << codepoints[marker..pos-1]
|
106
|
-
marker = pos
|
107
|
-
end
|
108
|
-
end
|
109
|
-
unpacked
|
110
|
-
end
|
111
|
-
|
112
|
-
# Reverse operation of unpack_graphemes.
|
113
|
-
#
|
114
|
-
# Unicode.pack_graphemes(Unicode.unpack_graphemes('क्षि')) # => 'क्षि'
|
115
|
-
def pack_graphemes(unpacked)
|
116
|
-
unpacked.flatten.pack('U*')
|
117
|
-
end
|
118
|
-
|
119
|
-
# Re-order codepoints so the string becomes canonical.
|
120
|
-
def reorder_characters(codepoints)
|
121
|
-
length = codepoints.length- 1
|
122
|
-
pos = 0
|
123
|
-
while pos < length do
|
124
|
-
cp1, cp2 = database.codepoints[codepoints[pos]], database.codepoints[codepoints[pos+1]]
|
125
|
-
if (cp1.combining_class > cp2.combining_class) && (cp2.combining_class > 0)
|
126
|
-
codepoints[pos..pos+1] = cp2.code, cp1.code
|
127
|
-
pos += (pos > 0 ? -1 : 1)
|
128
|
-
else
|
129
|
-
pos += 1
|
130
|
-
end
|
131
|
-
end
|
132
|
-
codepoints
|
133
|
-
end
|
9
|
+
UNICODE_VERSION = RbConfig::CONFIG["UNICODE_VERSION"]
|
134
10
|
|
135
11
|
# Decompose composed characters to the decomposed form.
|
136
12
|
def decompose(type, codepoints)
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
ncp = [] # new codepoints
|
142
|
-
ncp << HANGUL_LBASE + sindex / HANGUL_NCOUNT
|
143
|
-
ncp << HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
|
144
|
-
tindex = sindex % HANGUL_TCOUNT
|
145
|
-
ncp << (HANGUL_TBASE + tindex) unless tindex == 0
|
146
|
-
decomposed.concat ncp
|
147
|
-
# if the codepoint is decomposable in with the current decomposition type
|
148
|
-
elsif (ncp = database.codepoints[cp].decomp_mapping) and (!database.codepoints[cp].decomp_type || type == :compatibility)
|
149
|
-
decomposed.concat decompose(type, ncp.dup)
|
150
|
-
else
|
151
|
-
decomposed << cp
|
152
|
-
end
|
13
|
+
if type == :compatibility
|
14
|
+
codepoints.pack("U*").unicode_normalize(:nfkd).codepoints
|
15
|
+
else
|
16
|
+
codepoints.pack("U*").unicode_normalize(:nfd).codepoints
|
153
17
|
end
|
154
18
|
end
|
155
19
|
|
156
20
|
# Compose decomposed characters to the composed form.
|
157
21
|
def compose(codepoints)
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
j = starter_pos + 2
|
173
|
-
eoa -= 2
|
174
|
-
else
|
175
|
-
tindex = 0
|
176
|
-
j = starter_pos + 1
|
177
|
-
eoa -= 1
|
178
|
-
end
|
179
|
-
codepoints[starter_pos..j] = (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT + tindex + HANGUL_SBASE
|
180
|
-
end
|
181
|
-
starter_pos += 1
|
182
|
-
starter_char = codepoints[starter_pos]
|
183
|
-
# -- Other characters
|
184
|
-
else
|
185
|
-
current_char = codepoints[pos]
|
186
|
-
current = database.codepoints[current_char]
|
187
|
-
if current.combining_class > previous_combining_class
|
188
|
-
if ref = database.composition_map[starter_char]
|
189
|
-
composition = ref[current_char]
|
190
|
-
else
|
191
|
-
composition = nil
|
192
|
-
end
|
193
|
-
unless composition.nil?
|
194
|
-
codepoints[starter_pos] = composition
|
195
|
-
starter_char = composition
|
196
|
-
codepoints.delete_at pos
|
197
|
-
eoa -= 1
|
198
|
-
pos -= 1
|
199
|
-
previous_combining_class = -1
|
200
|
-
else
|
201
|
-
previous_combining_class = current.combining_class
|
202
|
-
end
|
203
|
-
else
|
204
|
-
previous_combining_class = current.combining_class
|
205
|
-
end
|
206
|
-
if current.combining_class == 0
|
207
|
-
starter_pos = pos
|
208
|
-
starter_char = codepoints[pos]
|
209
|
-
end
|
210
|
-
end
|
211
|
-
end
|
212
|
-
codepoints
|
213
|
-
end
|
214
|
-
|
215
|
-
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
|
216
|
-
# resulting in a valid UTF-8 string.
|
217
|
-
#
|
218
|
-
# Passing +true+ will forcibly tidy all bytes, assuming that the string's
|
219
|
-
# encoding is entirely CP1252 or ISO-8859-1.
|
220
|
-
def tidy_bytes(string, force = false)
|
221
|
-
if force
|
222
|
-
return string.unpack("C*").map do |b|
|
223
|
-
tidy_byte(b)
|
224
|
-
end.flatten.compact.pack("C*").unpack("U*").pack("U*")
|
225
|
-
end
|
226
|
-
|
227
|
-
bytes = string.unpack("C*")
|
228
|
-
conts_expected = 0
|
229
|
-
last_lead = 0
|
230
|
-
|
231
|
-
bytes.each_index do |i|
|
232
|
-
|
233
|
-
byte = bytes[i]
|
234
|
-
is_cont = byte > 127 && byte < 192
|
235
|
-
is_lead = byte > 191 && byte < 245
|
236
|
-
is_unused = byte > 240
|
237
|
-
is_restricted = byte > 244
|
238
|
-
|
239
|
-
# Impossible or highly unlikely byte? Clean it.
|
240
|
-
if is_unused || is_restricted
|
241
|
-
bytes[i] = tidy_byte(byte)
|
242
|
-
elsif is_cont
|
243
|
-
# Not expecting continuation byte? Clean up. Otherwise, now expect one less.
|
244
|
-
conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
|
245
|
-
else
|
246
|
-
if conts_expected > 0
|
247
|
-
# Expected continuation, but got ASCII or leading? Clean backwards up to
|
248
|
-
# the leading byte.
|
249
|
-
(1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
|
250
|
-
conts_expected = 0
|
251
|
-
end
|
252
|
-
if is_lead
|
253
|
-
# Final byte is leading? Clean it.
|
254
|
-
if i == bytes.length - 1
|
255
|
-
bytes[i] = tidy_byte(bytes.last)
|
256
|
-
else
|
257
|
-
# Valid leading byte? Expect continuations determined by position of
|
258
|
-
# first zero bit, with max of 3.
|
259
|
-
conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
|
260
|
-
last_lead = i
|
261
|
-
end
|
262
|
-
end
|
263
|
-
end
|
264
|
-
end
|
265
|
-
bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
|
266
|
-
end
|
267
|
-
|
268
|
-
# Returns the KC normalization of the string by default. NFKC is
|
269
|
-
# considered the best normalization form for passing strings to databases
|
270
|
-
# and validations.
|
271
|
-
#
|
272
|
-
# * <tt>string</tt> - The string to perform normalization on.
|
273
|
-
# * <tt>form</tt> - The form you want to normalize in. Should be one of
|
274
|
-
# the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
|
275
|
-
# Default is ActiveSupport::Multibyte.default_normalization_form.
|
276
|
-
def normalize(string, form=nil)
|
277
|
-
form ||= @default_normalization_form
|
278
|
-
# See http://www.unicode.org/reports/tr15, Table 1
|
279
|
-
codepoints = string.codepoints.to_a
|
280
|
-
case form
|
281
|
-
when :d
|
282
|
-
reorder_characters(decompose(:canonical, codepoints))
|
283
|
-
when :c
|
284
|
-
compose(reorder_characters(decompose(:canonical, codepoints)))
|
285
|
-
when :kd
|
286
|
-
reorder_characters(decompose(:compatibility, codepoints))
|
287
|
-
when :kc
|
288
|
-
compose(reorder_characters(decompose(:compatibility, codepoints)))
|
289
|
-
else
|
290
|
-
raise ArgumentError, "#{form} is not a valid normalization variant", caller
|
291
|
-
end.pack('U*')
|
292
|
-
end
|
293
|
-
|
294
|
-
def downcase(string)
|
295
|
-
apply_mapping string, :lowercase_mapping
|
296
|
-
end
|
297
|
-
|
298
|
-
def upcase(string)
|
299
|
-
apply_mapping string, :uppercase_mapping
|
300
|
-
end
|
301
|
-
|
302
|
-
def swapcase(string)
|
303
|
-
apply_mapping string, :swapcase_mapping
|
304
|
-
end
|
305
|
-
|
306
|
-
# Holds data about a codepoint in the Unicode database.
|
307
|
-
class Codepoint
|
308
|
-
attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
|
309
|
-
|
310
|
-
def swapcase_mapping
|
311
|
-
uppercase_mapping > 0 ? uppercase_mapping : lowercase_mapping
|
312
|
-
end
|
313
|
-
end
|
314
|
-
|
315
|
-
# Holds static data from the Unicode database.
|
316
|
-
class UnicodeDatabase
|
317
|
-
ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
|
318
|
-
|
319
|
-
attr_writer(*ATTRIBUTES)
|
320
|
-
|
321
|
-
def initialize
|
322
|
-
@codepoints = Hash.new(Codepoint.new)
|
323
|
-
@composition_exclusion = []
|
324
|
-
@composition_map = {}
|
325
|
-
@boundary = {}
|
326
|
-
@cp1252 = {}
|
327
|
-
end
|
328
|
-
|
329
|
-
# Lazy load the Unicode database so it's only loaded when it's actually used
|
330
|
-
ATTRIBUTES.each do |attr_name|
|
331
|
-
class_eval(<<-EOS, __FILE__, __LINE__ + 1)
|
332
|
-
def #{attr_name} # def codepoints
|
333
|
-
load # load
|
334
|
-
@#{attr_name} # @codepoints
|
335
|
-
end # end
|
336
|
-
EOS
|
22
|
+
codepoints.pack("U*").unicode_normalize(:nfc).codepoints
|
23
|
+
end
|
24
|
+
|
25
|
+
# Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars.
|
26
|
+
if !defined?(Rubinius)
|
27
|
+
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
|
28
|
+
# resulting in a valid UTF-8 string.
|
29
|
+
#
|
30
|
+
# Passing +true+ will forcibly tidy all bytes, assuming that the string's
|
31
|
+
# encoding is entirely CP1252 or ISO-8859-1.
|
32
|
+
def tidy_bytes(string, force = false)
|
33
|
+
return string if string.empty? || string.ascii_only?
|
34
|
+
return recode_windows1252_chars(string) if force
|
35
|
+
string.scrub { |bad| recode_windows1252_chars(bad) }
|
337
36
|
end
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
37
|
+
else
|
38
|
+
def tidy_bytes(string, force = false)
|
39
|
+
return string if string.empty?
|
40
|
+
return recode_windows1252_chars(string) if force
|
41
|
+
|
42
|
+
# We can't transcode to the same format, so we choose a nearly-identical encoding.
|
43
|
+
# We're going to 'transcode' bytes from UTF-8 when possible, then fall back to
|
44
|
+
# CP1252 when we get errors. The final string will be 'converted' back to UTF-8
|
45
|
+
# before returning.
|
46
|
+
reader = Encoding::Converter.new(Encoding::UTF_8, Encoding::UTF_16LE)
|
47
|
+
|
48
|
+
source = string.dup
|
49
|
+
out = "".force_encoding(Encoding::UTF_16LE)
|
50
|
+
|
51
|
+
loop do
|
52
|
+
reader.primitive_convert(source, out)
|
53
|
+
_, _, _, error_bytes, _ = reader.primitive_errinfo
|
54
|
+
break if error_bytes.nil?
|
55
|
+
out << error_bytes.encode(Encoding::UTF_16LE, Encoding::Windows_1252, invalid: :replace, undef: :replace)
|
346
56
|
end
|
347
57
|
|
348
|
-
|
349
|
-
@boundary.each do |k,_|
|
350
|
-
@boundary[k].instance_eval do
|
351
|
-
def ===(other)
|
352
|
-
detect { |i| i === other } ? true : false
|
353
|
-
end
|
354
|
-
end if @boundary[k].kind_of?(Array)
|
355
|
-
end
|
58
|
+
reader.finish
|
356
59
|
|
357
|
-
|
358
|
-
class << self
|
359
|
-
attr_reader(*ATTRIBUTES)
|
360
|
-
end
|
361
|
-
end
|
362
|
-
|
363
|
-
# Returns the directory in which the data files are stored.
|
364
|
-
def self.dirname
|
365
|
-
File.dirname(__FILE__) + '/../values/'
|
366
|
-
end
|
367
|
-
|
368
|
-
# Returns the filename for the data file for this version.
|
369
|
-
def self.filename
|
370
|
-
File.expand_path File.join(dirname, "unicode_tables.dat")
|
60
|
+
out.encode!(Encoding::UTF_8)
|
371
61
|
end
|
372
62
|
end
|
373
63
|
|
374
64
|
private
|
375
|
-
|
376
|
-
|
377
|
-
string.each_codepoint.map do |codepoint|
|
378
|
-
cp = database.codepoints[codepoint]
|
379
|
-
if cp and (ncp = cp.send(mapping)) and ncp > 0
|
380
|
-
ncp
|
381
|
-
else
|
382
|
-
codepoint
|
383
|
-
end
|
384
|
-
end.pack('U*')
|
385
|
-
end
|
386
|
-
|
387
|
-
def tidy_byte(byte)
|
388
|
-
if byte < 160
|
389
|
-
[database.cp1252[byte] || byte].pack("U").unpack("C*")
|
390
|
-
elsif byte < 192
|
391
|
-
[194, byte]
|
392
|
-
else
|
393
|
-
[195, byte - 64]
|
65
|
+
def recode_windows1252_chars(string)
|
66
|
+
string.encode(Encoding::UTF_8, Encoding::Windows_1252, invalid: :replace, undef: :replace)
|
394
67
|
end
|
395
|
-
end
|
396
|
-
|
397
|
-
def database
|
398
|
-
@database ||= UnicodeDatabase.new
|
399
|
-
end
|
400
|
-
|
401
68
|
end
|
402
69
|
end
|
403
70
|
end
|
@@ -1,7 +1,9 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveSupport # :nodoc:
|
2
4
|
module Multibyte
|
3
|
-
autoload :Chars,
|
4
|
-
autoload :Unicode,
|
5
|
+
autoload :Chars, "active_support/multibyte/chars"
|
6
|
+
autoload :Unicode, "active_support/multibyte/unicode"
|
5
7
|
|
6
8
|
# The proxy class returned when calling mb_chars. You can use this accessor
|
7
9
|
# to configure your own proxy class so you can support other encodings. See
|