activesupport 4.0.12 → 7.0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of activesupport might be problematic. Click here for more details.

Files changed (295) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +249 -501
  3. data/MIT-LICENSE +2 -2
  4. data/README.rdoc +10 -5
  5. data/lib/active_support/actionable_error.rb +48 -0
  6. data/lib/active_support/all.rb +5 -3
  7. data/lib/active_support/array_inquirer.rb +48 -0
  8. data/lib/active_support/backtrace_cleaner.rb +41 -13
  9. data/lib/active_support/benchmarkable.rb +7 -15
  10. data/lib/active_support/builder.rb +3 -1
  11. data/lib/active_support/cache/file_store.rb +96 -74
  12. data/lib/active_support/cache/mem_cache_store.rb +211 -103
  13. data/lib/active_support/cache/memory_store.rb +90 -58
  14. data/lib/active_support/cache/null_store.rb +19 -7
  15. data/lib/active_support/cache/redis_cache_store.rb +468 -0
  16. data/lib/active_support/cache/strategy/local_cache.rb +86 -83
  17. data/lib/active_support/cache/strategy/local_cache_middleware.rb +45 -0
  18. data/lib/active_support/cache.rb +580 -241
  19. data/lib/active_support/callbacks.rb +812 -425
  20. data/lib/active_support/code_generator.rb +65 -0
  21. data/lib/active_support/concern.rb +103 -14
  22. data/lib/active_support/concurrency/load_interlock_aware_monitor.rb +33 -0
  23. data/lib/active_support/concurrency/share_lock.rb +226 -0
  24. data/lib/active_support/configurable.rb +21 -19
  25. data/lib/active_support/configuration_file.rb +51 -0
  26. data/lib/active_support/core_ext/array/access.rb +47 -1
  27. data/lib/active_support/core_ext/array/conversions.rb +35 -44
  28. data/lib/active_support/core_ext/array/deprecated_conversions.rb +25 -0
  29. data/lib/active_support/core_ext/array/extract.rb +21 -0
  30. data/lib/active_support/core_ext/array/extract_options.rb +2 -0
  31. data/lib/active_support/core_ext/array/grouping.rb +26 -16
  32. data/lib/active_support/core_ext/array/inquiry.rb +19 -0
  33. data/lib/active_support/core_ext/array/wrap.rb +7 -4
  34. data/lib/active_support/core_ext/array.rb +10 -7
  35. data/lib/active_support/core_ext/benchmark.rb +5 -3
  36. data/lib/active_support/core_ext/big_decimal/conversions.rb +9 -26
  37. data/lib/active_support/core_ext/big_decimal.rb +3 -1
  38. data/lib/active_support/core_ext/class/attribute.rb +52 -49
  39. data/lib/active_support/core_ext/class/attribute_accessors.rb +5 -169
  40. data/lib/active_support/core_ext/class/subclasses.rb +25 -26
  41. data/lib/active_support/core_ext/class.rb +4 -4
  42. data/lib/active_support/core_ext/date/acts_like.rb +3 -1
  43. data/lib/active_support/core_ext/date/blank.rb +14 -0
  44. data/lib/active_support/core_ext/date/calculations.rb +31 -18
  45. data/lib/active_support/core_ext/date/conversions.rb +43 -32
  46. data/lib/active_support/core_ext/date/deprecated_conversions.rb +26 -0
  47. data/lib/active_support/core_ext/date/zones.rb +5 -34
  48. data/lib/active_support/core_ext/date.rb +7 -4
  49. data/lib/active_support/core_ext/date_and_time/calculations.rb +198 -66
  50. data/lib/active_support/core_ext/date_and_time/compatibility.rb +31 -0
  51. data/lib/active_support/core_ext/date_and_time/zones.rb +40 -0
  52. data/lib/active_support/core_ext/date_time/acts_like.rb +4 -2
  53. data/lib/active_support/core_ext/date_time/blank.rb +14 -0
  54. data/lib/active_support/core_ext/date_time/calculations.rb +79 -38
  55. data/lib/active_support/core_ext/date_time/compatibility.rb +18 -0
  56. data/lib/active_support/core_ext/date_time/conversions.rb +31 -26
  57. data/lib/active_support/core_ext/date_time/deprecated_conversions.rb +22 -0
  58. data/lib/active_support/core_ext/date_time.rb +8 -4
  59. data/lib/active_support/core_ext/digest/uuid.rb +79 -0
  60. data/lib/active_support/core_ext/digest.rb +3 -0
  61. data/lib/active_support/core_ext/enumerable.rb +249 -17
  62. data/lib/active_support/core_ext/file/atomic.rb +41 -32
  63. data/lib/active_support/core_ext/file.rb +3 -1
  64. data/lib/active_support/core_ext/hash/conversions.rb +71 -49
  65. data/lib/active_support/core_ext/hash/deep_merge.rb +9 -13
  66. data/lib/active_support/core_ext/hash/deep_transform_values.rb +46 -0
  67. data/lib/active_support/core_ext/hash/except.rb +14 -5
  68. data/lib/active_support/core_ext/hash/indifferent_access.rb +5 -3
  69. data/lib/active_support/core_ext/hash/keys.rb +39 -56
  70. data/lib/active_support/core_ext/hash/reverse_merge.rb +5 -2
  71. data/lib/active_support/core_ext/hash/slice.rb +8 -23
  72. data/lib/active_support/core_ext/hash.rb +10 -8
  73. data/lib/active_support/core_ext/integer/inflections.rb +3 -1
  74. data/lib/active_support/core_ext/integer/multiple.rb +3 -1
  75. data/lib/active_support/core_ext/integer/time.rb +11 -33
  76. data/lib/active_support/core_ext/integer.rb +5 -3
  77. data/lib/active_support/core_ext/kernel/concern.rb +14 -0
  78. data/lib/active_support/core_ext/kernel/reporting.rb +9 -78
  79. data/lib/active_support/core_ext/kernel/singleton_class.rb +2 -0
  80. data/lib/active_support/core_ext/kernel.rb +5 -4
  81. data/lib/active_support/core_ext/load_error.rb +5 -21
  82. data/lib/active_support/core_ext/module/aliasing.rb +6 -44
  83. data/lib/active_support/core_ext/module/anonymous.rb +12 -1
  84. data/lib/active_support/core_ext/module/attr_internal.rb +8 -8
  85. data/lib/active_support/core_ext/module/attribute_accessors.rb +186 -44
  86. data/lib/active_support/core_ext/module/attribute_accessors_per_thread.rb +157 -0
  87. data/lib/active_support/core_ext/module/concerning.rb +140 -0
  88. data/lib/active_support/core_ext/module/delegation.rb +172 -45
  89. data/lib/active_support/core_ext/module/deprecation.rb +3 -3
  90. data/lib/active_support/core_ext/module/introspection.rb +23 -38
  91. data/lib/active_support/core_ext/module/redefine_method.rb +40 -0
  92. data/lib/active_support/core_ext/module/remove_method.rb +8 -3
  93. data/lib/active_support/core_ext/module.rb +13 -10
  94. data/lib/active_support/core_ext/name_error.rb +45 -4
  95. data/lib/active_support/core_ext/numeric/bytes.rb +22 -0
  96. data/lib/active_support/core_ext/numeric/conversions.rb +135 -127
  97. data/lib/active_support/core_ext/numeric/deprecated_conversions.rb +60 -0
  98. data/lib/active_support/core_ext/numeric/time.rb +37 -50
  99. data/lib/active_support/core_ext/numeric.rb +6 -3
  100. data/lib/active_support/core_ext/object/acts_like.rb +41 -6
  101. data/lib/active_support/core_ext/object/blank.rb +70 -20
  102. data/lib/active_support/core_ext/object/conversions.rb +6 -4
  103. data/lib/active_support/core_ext/object/deep_dup.rb +19 -10
  104. data/lib/active_support/core_ext/object/duplicable.rb +17 -47
  105. data/lib/active_support/core_ext/object/inclusion.rb +18 -15
  106. data/lib/active_support/core_ext/object/instance_variables.rb +3 -1
  107. data/lib/active_support/core_ext/object/json.rb +244 -0
  108. data/lib/active_support/core_ext/object/to_param.rb +3 -1
  109. data/lib/active_support/core_ext/object/to_query.rb +21 -8
  110. data/lib/active_support/core_ext/object/try.rb +106 -26
  111. data/lib/active_support/core_ext/object/with_options.rb +64 -5
  112. data/lib/active_support/core_ext/object.rb +14 -12
  113. data/lib/active_support/core_ext/pathname/existence.rb +21 -0
  114. data/lib/active_support/core_ext/pathname.rb +3 -0
  115. data/lib/active_support/core_ext/range/compare_range.rb +57 -0
  116. data/lib/active_support/core_ext/range/conversions.rb +37 -15
  117. data/lib/active_support/core_ext/range/deprecated_conversions.rb +26 -0
  118. data/lib/active_support/core_ext/range/each.rb +18 -17
  119. data/lib/active_support/core_ext/range/include_time_with_zone.rb +7 -0
  120. data/lib/active_support/core_ext/range/overlaps.rb +2 -0
  121. data/lib/active_support/core_ext/range.rb +7 -4
  122. data/lib/active_support/core_ext/regexp.rb +10 -1
  123. data/lib/active_support/core_ext/securerandom.rb +45 -0
  124. data/lib/active_support/core_ext/string/access.rb +42 -51
  125. data/lib/active_support/core_ext/string/behavior.rb +3 -1
  126. data/lib/active_support/core_ext/string/conversions.rb +18 -13
  127. data/lib/active_support/core_ext/string/exclude.rb +5 -3
  128. data/lib/active_support/core_ext/string/filters.rb +97 -7
  129. data/lib/active_support/core_ext/string/indent.rb +6 -4
  130. data/lib/active_support/core_ext/string/inflections.rb +106 -25
  131. data/lib/active_support/core_ext/string/inquiry.rb +4 -1
  132. data/lib/active_support/core_ext/string/multibyte.rb +18 -9
  133. data/lib/active_support/core_ext/string/output_safety.rb +227 -54
  134. data/lib/active_support/core_ext/string/starts_ends_with.rb +4 -2
  135. data/lib/active_support/core_ext/string/strip.rb +6 -5
  136. data/lib/active_support/core_ext/string/zones.rb +4 -1
  137. data/lib/active_support/core_ext/string.rb +15 -13
  138. data/lib/active_support/core_ext/symbol/starts_ends_with.rb +6 -0
  139. data/lib/active_support/core_ext/symbol.rb +3 -0
  140. data/lib/active_support/core_ext/time/acts_like.rb +3 -1
  141. data/lib/active_support/core_ext/time/calculations.rb +178 -116
  142. data/lib/active_support/core_ext/time/compatibility.rb +16 -0
  143. data/lib/active_support/core_ext/time/conversions.rb +37 -25
  144. data/lib/active_support/core_ext/time/deprecated_conversions.rb +22 -0
  145. data/lib/active_support/core_ext/time/zones.rb +44 -42
  146. data/lib/active_support/core_ext/time.rb +8 -5
  147. data/lib/active_support/core_ext/uri.rb +4 -25
  148. data/lib/active_support/core_ext.rb +4 -2
  149. data/lib/active_support/current_attributes/test_helper.rb +13 -0
  150. data/lib/active_support/current_attributes.rb +226 -0
  151. data/lib/active_support/dependencies/autoload.rb +3 -1
  152. data/lib/active_support/dependencies/interlock.rb +49 -0
  153. data/lib/active_support/dependencies/require_dependency.rb +28 -0
  154. data/lib/active_support/dependencies.rb +71 -696
  155. data/lib/active_support/deprecation/behaviors.rb +65 -16
  156. data/lib/active_support/deprecation/constant_accessor.rb +52 -0
  157. data/lib/active_support/deprecation/disallowed.rb +56 -0
  158. data/lib/active_support/deprecation/instance_delegator.rb +16 -2
  159. data/lib/active_support/deprecation/method_wrappers.rb +62 -21
  160. data/lib/active_support/deprecation/proxy_wrappers.rb +82 -31
  161. data/lib/active_support/deprecation/reporting.rb +81 -18
  162. data/lib/active_support/deprecation.rb +19 -11
  163. data/lib/active_support/descendants_tracker.rb +192 -34
  164. data/lib/active_support/digest.rb +22 -0
  165. data/lib/active_support/duration/iso8601_parser.rb +123 -0
  166. data/lib/active_support/duration/iso8601_serializer.rb +67 -0
  167. data/lib/active_support/duration.rb +437 -39
  168. data/lib/active_support/encrypted_configuration.rb +56 -0
  169. data/lib/active_support/encrypted_file.rb +117 -0
  170. data/lib/active_support/environment_inquirer.rb +20 -0
  171. data/lib/active_support/error_reporter.rb +117 -0
  172. data/lib/active_support/evented_file_update_checker.rb +170 -0
  173. data/lib/active_support/execution_context/test_helper.rb +13 -0
  174. data/lib/active_support/execution_context.rb +53 -0
  175. data/lib/active_support/execution_wrapper.rb +151 -0
  176. data/lib/active_support/executor/test_helper.rb +7 -0
  177. data/lib/active_support/executor.rb +8 -0
  178. data/lib/active_support/file_update_checker.rb +62 -37
  179. data/lib/active_support/fork_tracker.rb +71 -0
  180. data/lib/active_support/gem_version.rb +17 -0
  181. data/lib/active_support/gzip.rb +7 -5
  182. data/lib/active_support/hash_with_indifferent_access.rb +207 -54
  183. data/lib/active_support/html_safe_translation.rb +43 -0
  184. data/lib/active_support/i18n.rb +10 -6
  185. data/lib/active_support/i18n_railtie.rb +48 -19
  186. data/lib/active_support/inflections.rb +19 -12
  187. data/lib/active_support/inflector/inflections.rb +97 -37
  188. data/lib/active_support/inflector/methods.rb +192 -157
  189. data/lib/active_support/inflector/transliterate.rb +83 -33
  190. data/lib/active_support/inflector.rb +7 -5
  191. data/lib/active_support/isolated_execution_state.rb +64 -0
  192. data/lib/active_support/json/decoding.rb +37 -42
  193. data/lib/active_support/json/encoding.rb +93 -293
  194. data/lib/active_support/json.rb +4 -2
  195. data/lib/active_support/key_generator.rb +30 -47
  196. data/lib/active_support/lazy_load_hooks.rb +54 -21
  197. data/lib/active_support/locale/en.rb +33 -0
  198. data/lib/active_support/locale/en.yml +10 -4
  199. data/lib/active_support/log_subscriber/test_helper.rb +14 -12
  200. data/lib/active_support/log_subscriber.rb +61 -18
  201. data/lib/active_support/logger.rb +40 -4
  202. data/lib/active_support/logger_silence.rb +17 -20
  203. data/lib/active_support/logger_thread_safe_level.rb +69 -0
  204. data/lib/active_support/message_encryptor.rb +178 -55
  205. data/lib/active_support/message_verifier.rb +195 -26
  206. data/lib/active_support/messages/metadata.rb +80 -0
  207. data/lib/active_support/messages/rotation_configuration.rb +23 -0
  208. data/lib/active_support/messages/rotator.rb +57 -0
  209. data/lib/active_support/multibyte/chars.rb +45 -92
  210. data/lib/active_support/multibyte/unicode.rb +44 -377
  211. data/lib/active_support/multibyte.rb +5 -3
  212. data/lib/active_support/notifications/fanout.rb +177 -44
  213. data/lib/active_support/notifications/instrumenter.rb +117 -17
  214. data/lib/active_support/notifications.rb +106 -39
  215. data/lib/active_support/number_helper/number_converter.rb +181 -0
  216. data/lib/active_support/number_helper/number_to_currency_converter.rb +46 -0
  217. data/lib/active_support/number_helper/number_to_delimited_converter.rb +30 -0
  218. data/lib/active_support/number_helper/number_to_human_converter.rb +69 -0
  219. data/lib/active_support/number_helper/number_to_human_size_converter.rb +60 -0
  220. data/lib/active_support/number_helper/number_to_percentage_converter.rb +16 -0
  221. data/lib/active_support/number_helper/number_to_phone_converter.rb +59 -0
  222. data/lib/active_support/number_helper/number_to_rounded_converter.rb +59 -0
  223. data/lib/active_support/number_helper/rounding_helper.rb +46 -0
  224. data/lib/active_support/number_helper.rb +152 -394
  225. data/lib/active_support/option_merger.rb +18 -5
  226. data/lib/active_support/ordered_hash.rb +8 -6
  227. data/lib/active_support/ordered_options.rb +43 -7
  228. data/lib/active_support/parameter_filter.rb +138 -0
  229. data/lib/active_support/per_thread_registry.rb +24 -11
  230. data/lib/active_support/proxy_object.rb +2 -0
  231. data/lib/active_support/rails.rb +10 -11
  232. data/lib/active_support/railtie.rb +118 -12
  233. data/lib/active_support/reloader.rb +130 -0
  234. data/lib/active_support/rescuable.rb +112 -57
  235. data/lib/active_support/ruby_features.rb +7 -0
  236. data/lib/active_support/secure_compare_rotator.rb +51 -0
  237. data/lib/active_support/security_utils.rb +38 -0
  238. data/lib/active_support/string_inquirer.rb +11 -4
  239. data/lib/active_support/subscriber.rb +109 -39
  240. data/lib/active_support/tagged_logging.rb +54 -17
  241. data/lib/active_support/test_case.rb +121 -37
  242. data/lib/active_support/testing/assertions.rb +177 -39
  243. data/lib/active_support/testing/autorun.rb +5 -3
  244. data/lib/active_support/testing/constant_lookup.rb +3 -6
  245. data/lib/active_support/testing/declarative.rb +10 -22
  246. data/lib/active_support/testing/deprecation.rb +65 -11
  247. data/lib/active_support/testing/file_fixtures.rb +38 -0
  248. data/lib/active_support/testing/isolation.rb +56 -87
  249. data/lib/active_support/testing/method_call_assertions.rb +70 -0
  250. data/lib/active_support/testing/parallelization/server.rb +82 -0
  251. data/lib/active_support/testing/parallelization/worker.rb +103 -0
  252. data/lib/active_support/testing/parallelization.rb +55 -0
  253. data/lib/active_support/testing/parallelize_executor.rb +76 -0
  254. data/lib/active_support/testing/setup_and_teardown.rb +30 -10
  255. data/lib/active_support/testing/stream.rb +41 -0
  256. data/lib/active_support/testing/tagged_logging.rb +6 -4
  257. data/lib/active_support/testing/time_helpers.rb +246 -0
  258. data/lib/active_support/time.rb +13 -13
  259. data/lib/active_support/time_with_zone.rb +315 -90
  260. data/lib/active_support/values/time_zone.rb +306 -135
  261. data/lib/active_support/version.rb +6 -7
  262. data/lib/active_support/xml_mini/jdom.rb +117 -115
  263. data/lib/active_support/xml_mini/libxml.rb +22 -21
  264. data/lib/active_support/xml_mini/libxmlsax.rb +17 -19
  265. data/lib/active_support/xml_mini/nokogiri.rb +19 -19
  266. data/lib/active_support/xml_mini/nokogirisax.rb +16 -17
  267. data/lib/active_support/xml_mini/rexml.rb +25 -17
  268. data/lib/active_support/xml_mini.rb +67 -56
  269. data/lib/active_support.rb +58 -3
  270. metadata +125 -66
  271. data/lib/active_support/basic_object.rb +0 -11
  272. data/lib/active_support/buffered_logger.rb +0 -21
  273. data/lib/active_support/concurrency/latch.rb +0 -27
  274. data/lib/active_support/core_ext/array/prepend_and_append.rb +0 -7
  275. data/lib/active_support/core_ext/array/uniq_by.rb +0 -19
  276. data/lib/active_support/core_ext/class/delegating_attributes.rb +0 -40
  277. data/lib/active_support/core_ext/date_time/zones.rb +0 -24
  278. data/lib/active_support/core_ext/hash/diff.rb +0 -14
  279. data/lib/active_support/core_ext/kernel/agnostics.rb +0 -11
  280. data/lib/active_support/core_ext/kernel/debugger.rb +0 -10
  281. data/lib/active_support/core_ext/logger.rb +0 -67
  282. data/lib/active_support/core_ext/marshal.rb +0 -21
  283. data/lib/active_support/core_ext/module/qualified_const.rb +0 -52
  284. data/lib/active_support/core_ext/module/reachable.rb +0 -8
  285. data/lib/active_support/core_ext/object/to_json.rb +0 -27
  286. data/lib/active_support/core_ext/proc.rb +0 -17
  287. data/lib/active_support/core_ext/range/include_range.rb +0 -23
  288. data/lib/active_support/core_ext/string/encoding.rb +0 -8
  289. data/lib/active_support/core_ext/struct.rb +0 -6
  290. data/lib/active_support/core_ext/thread.rb +0 -79
  291. data/lib/active_support/core_ext/time/marshal.rb +0 -30
  292. data/lib/active_support/file_watcher.rb +0 -36
  293. data/lib/active_support/json/variable.rb +0 -18
  294. data/lib/active_support/testing/pending.rb +0 -14
  295. data/lib/active_support/values/unicode_tables.dat +0 -0
@@ -1,403 +1,70 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module ActiveSupport
3
4
  module Multibyte
4
5
  module Unicode
5
-
6
6
  extend self
7
7
 
8
- # A list of all available normalization forms.
9
- # See http://www.unicode.org/reports/tr15/tr15-29.html for more
10
- # information about normalization.
11
- NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
12
-
13
8
  # The Unicode version that is supported by the implementation
14
- UNICODE_VERSION = '6.2.0'
15
-
16
- # The default normalization used for operations that require
17
- # normalization. It can be set to any of the normalizations
18
- # in NORMALIZATION_FORMS.
19
- #
20
- # ActiveSupport::Multibyte::Unicode.default_normalization_form = :c
21
- attr_accessor :default_normalization_form
22
- @default_normalization_form = :kc
23
-
24
- # Hangul character boundaries and properties
25
- HANGUL_SBASE = 0xAC00
26
- HANGUL_LBASE = 0x1100
27
- HANGUL_VBASE = 0x1161
28
- HANGUL_TBASE = 0x11A7
29
- HANGUL_LCOUNT = 19
30
- HANGUL_VCOUNT = 21
31
- HANGUL_TCOUNT = 28
32
- HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT
33
- HANGUL_SCOUNT = 11172
34
- HANGUL_SLAST = HANGUL_SBASE + HANGUL_SCOUNT
35
- HANGUL_JAMO_FIRST = 0x1100
36
- HANGUL_JAMO_LAST = 0x11FF
37
-
38
- # All the unicode whitespace
39
- WHITESPACE = [
40
- (0x0009..0x000D).to_a, # White_Space # Cc [5] <control-0009>..<control-000D>
41
- 0x0020, # White_Space # Zs SPACE
42
- 0x0085, # White_Space # Cc <control-0085>
43
- 0x00A0, # White_Space # Zs NO-BREAK SPACE
44
- 0x1680, # White_Space # Zs OGHAM SPACE MARK
45
- 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
46
- (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
47
- 0x2028, # White_Space # Zl LINE SEPARATOR
48
- 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
49
- 0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
50
- 0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
51
- 0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
52
- ].flatten.freeze
53
-
54
- # BOM (byte order mark) can also be seen as whitespace, it's a
55
- # non-rendering character used to distinguish between little and big
56
- # endian. This is not an issue in utf-8, so it must be ignored.
57
- LEADERS_AND_TRAILERS = WHITESPACE + [65279] # ZERO-WIDTH NO-BREAK SPACE aka BOM
58
-
59
- # Returns a regular expression pattern that matches the passed Unicode
60
- # codepoints.
61
- def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
62
- array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|')
63
- end
64
- TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
65
- LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
66
-
67
- # Detect whether the codepoint is in a certain character class. Returns
68
- # +true+ when it's in the specified character class and +false+ otherwise.
69
- # Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
70
- # <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
71
- #
72
- # Primarily used by the grapheme cluster support.
73
- def in_char_class?(codepoint, classes)
74
- classes.detect { |c| database.boundary[c] === codepoint } ? true : false
75
- end
76
-
77
- # Unpack the string at grapheme boundaries. Returns a list of character
78
- # lists.
79
- #
80
- # Unicode.unpack_graphemes('क्षि') # => [[2325, 2381], [2359], [2367]]
81
- # Unicode.unpack_graphemes('Café') # => [[67], [97], [102], [233]]
82
- def unpack_graphemes(string)
83
- codepoints = string.codepoints.to_a
84
- unpacked = []
85
- pos = 0
86
- marker = 0
87
- eoc = codepoints.length
88
- while(pos < eoc)
89
- pos += 1
90
- previous = codepoints[pos-1]
91
- current = codepoints[pos]
92
- if (
93
- # CR X LF
94
- ( previous == database.boundary[:cr] and current == database.boundary[:lf] ) or
95
- # L X (L|V|LV|LVT)
96
- ( database.boundary[:l] === previous and in_char_class?(current, [:l,:v,:lv,:lvt]) ) or
97
- # (LV|V) X (V|T)
98
- ( in_char_class?(previous, [:lv,:v]) and in_char_class?(current, [:v,:t]) ) or
99
- # (LVT|T) X (T)
100
- ( in_char_class?(previous, [:lvt,:t]) and database.boundary[:t] === current ) or
101
- # X Extend
102
- (database.boundary[:extend] === current)
103
- )
104
- else
105
- unpacked << codepoints[marker..pos-1]
106
- marker = pos
107
- end
108
- end
109
- unpacked
110
- end
111
-
112
- # Reverse operation of unpack_graphemes.
113
- #
114
- # Unicode.pack_graphemes(Unicode.unpack_graphemes('क्षि')) # => 'क्षि'
115
- def pack_graphemes(unpacked)
116
- unpacked.flatten.pack('U*')
117
- end
118
-
119
- # Re-order codepoints so the string becomes canonical.
120
- def reorder_characters(codepoints)
121
- length = codepoints.length- 1
122
- pos = 0
123
- while pos < length do
124
- cp1, cp2 = database.codepoints[codepoints[pos]], database.codepoints[codepoints[pos+1]]
125
- if (cp1.combining_class > cp2.combining_class) && (cp2.combining_class > 0)
126
- codepoints[pos..pos+1] = cp2.code, cp1.code
127
- pos += (pos > 0 ? -1 : 1)
128
- else
129
- pos += 1
130
- end
131
- end
132
- codepoints
133
- end
9
+ UNICODE_VERSION = RbConfig::CONFIG["UNICODE_VERSION"]
134
10
 
135
11
  # Decompose composed characters to the decomposed form.
136
12
  def decompose(type, codepoints)
137
- codepoints.inject([]) do |decomposed, cp|
138
- # if it's a hangul syllable starter character
139
- if HANGUL_SBASE <= cp and cp < HANGUL_SLAST
140
- sindex = cp - HANGUL_SBASE
141
- ncp = [] # new codepoints
142
- ncp << HANGUL_LBASE + sindex / HANGUL_NCOUNT
143
- ncp << HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
144
- tindex = sindex % HANGUL_TCOUNT
145
- ncp << (HANGUL_TBASE + tindex) unless tindex == 0
146
- decomposed.concat ncp
147
- # if the codepoint is decomposable in with the current decomposition type
148
- elsif (ncp = database.codepoints[cp].decomp_mapping) and (!database.codepoints[cp].decomp_type || type == :compatibility)
149
- decomposed.concat decompose(type, ncp.dup)
150
- else
151
- decomposed << cp
152
- end
13
+ if type == :compatibility
14
+ codepoints.pack("U*").unicode_normalize(:nfkd).codepoints
15
+ else
16
+ codepoints.pack("U*").unicode_normalize(:nfd).codepoints
153
17
  end
154
18
  end
155
19
 
156
20
  # Compose decomposed characters to the composed form.
157
21
  def compose(codepoints)
158
- pos = 0
159
- eoa = codepoints.length - 1
160
- starter_pos = 0
161
- starter_char = codepoints[0]
162
- previous_combining_class = -1
163
- while pos < eoa
164
- pos += 1
165
- lindex = starter_char - HANGUL_LBASE
166
- # -- Hangul
167
- if 0 <= lindex and lindex < HANGUL_LCOUNT
168
- vindex = codepoints[starter_pos+1] - HANGUL_VBASE rescue vindex = -1
169
- if 0 <= vindex and vindex < HANGUL_VCOUNT
170
- tindex = codepoints[starter_pos+2] - HANGUL_TBASE rescue tindex = -1
171
- if 0 <= tindex and tindex < HANGUL_TCOUNT
172
- j = starter_pos + 2
173
- eoa -= 2
174
- else
175
- tindex = 0
176
- j = starter_pos + 1
177
- eoa -= 1
178
- end
179
- codepoints[starter_pos..j] = (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT + tindex + HANGUL_SBASE
180
- end
181
- starter_pos += 1
182
- starter_char = codepoints[starter_pos]
183
- # -- Other characters
184
- else
185
- current_char = codepoints[pos]
186
- current = database.codepoints[current_char]
187
- if current.combining_class > previous_combining_class
188
- if ref = database.composition_map[starter_char]
189
- composition = ref[current_char]
190
- else
191
- composition = nil
192
- end
193
- unless composition.nil?
194
- codepoints[starter_pos] = composition
195
- starter_char = composition
196
- codepoints.delete_at pos
197
- eoa -= 1
198
- pos -= 1
199
- previous_combining_class = -1
200
- else
201
- previous_combining_class = current.combining_class
202
- end
203
- else
204
- previous_combining_class = current.combining_class
205
- end
206
- if current.combining_class == 0
207
- starter_pos = pos
208
- starter_char = codepoints[pos]
209
- end
210
- end
211
- end
212
- codepoints
213
- end
214
-
215
- # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
216
- # resulting in a valid UTF-8 string.
217
- #
218
- # Passing +true+ will forcibly tidy all bytes, assuming that the string's
219
- # encoding is entirely CP1252 or ISO-8859-1.
220
- def tidy_bytes(string, force = false)
221
- if force
222
- return string.unpack("C*").map do |b|
223
- tidy_byte(b)
224
- end.flatten.compact.pack("C*").unpack("U*").pack("U*")
225
- end
226
-
227
- bytes = string.unpack("C*")
228
- conts_expected = 0
229
- last_lead = 0
230
-
231
- bytes.each_index do |i|
232
-
233
- byte = bytes[i]
234
- is_cont = byte > 127 && byte < 192
235
- is_lead = byte > 191 && byte < 245
236
- is_unused = byte > 240
237
- is_restricted = byte > 244
238
-
239
- # Impossible or highly unlikely byte? Clean it.
240
- if is_unused || is_restricted
241
- bytes[i] = tidy_byte(byte)
242
- elsif is_cont
243
- # Not expecting continuation byte? Clean up. Otherwise, now expect one less.
244
- conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
245
- else
246
- if conts_expected > 0
247
- # Expected continuation, but got ASCII or leading? Clean backwards up to
248
- # the leading byte.
249
- (1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
250
- conts_expected = 0
251
- end
252
- if is_lead
253
- # Final byte is leading? Clean it.
254
- if i == bytes.length - 1
255
- bytes[i] = tidy_byte(bytes.last)
256
- else
257
- # Valid leading byte? Expect continuations determined by position of
258
- # first zero bit, with max of 3.
259
- conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
260
- last_lead = i
261
- end
262
- end
263
- end
264
- end
265
- bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
266
- end
267
-
268
- # Returns the KC normalization of the string by default. NFKC is
269
- # considered the best normalization form for passing strings to databases
270
- # and validations.
271
- #
272
- # * <tt>string</tt> - The string to perform normalization on.
273
- # * <tt>form</tt> - The form you want to normalize in. Should be one of
274
- # the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
275
- # Default is ActiveSupport::Multibyte.default_normalization_form.
276
- def normalize(string, form=nil)
277
- form ||= @default_normalization_form
278
- # See http://www.unicode.org/reports/tr15, Table 1
279
- codepoints = string.codepoints.to_a
280
- case form
281
- when :d
282
- reorder_characters(decompose(:canonical, codepoints))
283
- when :c
284
- compose(reorder_characters(decompose(:canonical, codepoints)))
285
- when :kd
286
- reorder_characters(decompose(:compatibility, codepoints))
287
- when :kc
288
- compose(reorder_characters(decompose(:compatibility, codepoints)))
289
- else
290
- raise ArgumentError, "#{form} is not a valid normalization variant", caller
291
- end.pack('U*')
292
- end
293
-
294
- def downcase(string)
295
- apply_mapping string, :lowercase_mapping
296
- end
297
-
298
- def upcase(string)
299
- apply_mapping string, :uppercase_mapping
300
- end
301
-
302
- def swapcase(string)
303
- apply_mapping string, :swapcase_mapping
304
- end
305
-
306
- # Holds data about a codepoint in the Unicode database.
307
- class Codepoint
308
- attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
309
-
310
- def swapcase_mapping
311
- uppercase_mapping > 0 ? uppercase_mapping : lowercase_mapping
312
- end
313
- end
314
-
315
- # Holds static data from the Unicode database.
316
- class UnicodeDatabase
317
- ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
318
-
319
- attr_writer(*ATTRIBUTES)
320
-
321
- def initialize
322
- @codepoints = Hash.new(Codepoint.new)
323
- @composition_exclusion = []
324
- @composition_map = {}
325
- @boundary = {}
326
- @cp1252 = {}
327
- end
328
-
329
- # Lazy load the Unicode database so it's only loaded when it's actually used
330
- ATTRIBUTES.each do |attr_name|
331
- class_eval(<<-EOS, __FILE__, __LINE__ + 1)
332
- def #{attr_name} # def codepoints
333
- load # load
334
- @#{attr_name} # @codepoints
335
- end # end
336
- EOS
22
+ codepoints.pack("U*").unicode_normalize(:nfc).codepoints
23
+ end
24
+
25
+ # Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars.
26
+ if !defined?(Rubinius)
27
+ # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
28
+ # resulting in a valid UTF-8 string.
29
+ #
30
+ # Passing +true+ will forcibly tidy all bytes, assuming that the string's
31
+ # encoding is entirely CP1252 or ISO-8859-1.
32
+ def tidy_bytes(string, force = false)
33
+ return string if string.empty? || string.ascii_only?
34
+ return recode_windows1252_chars(string) if force
35
+ string.scrub { |bad| recode_windows1252_chars(bad) }
337
36
  end
338
-
339
- # Loads the Unicode database and returns all the internal objects of
340
- # UnicodeDatabase.
341
- def load
342
- begin
343
- @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read }
344
- rescue => e
345
- raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
37
+ else
38
+ def tidy_bytes(string, force = false)
39
+ return string if string.empty?
40
+ return recode_windows1252_chars(string) if force
41
+
42
+ # We can't transcode to the same format, so we choose a nearly-identical encoding.
43
+ # We're going to 'transcode' bytes from UTF-8 when possible, then fall back to
44
+ # CP1252 when we get errors. The final string will be 'converted' back to UTF-8
45
+ # before returning.
46
+ reader = Encoding::Converter.new(Encoding::UTF_8, Encoding::UTF_16LE)
47
+
48
+ source = string.dup
49
+ out = "".force_encoding(Encoding::UTF_16LE)
50
+
51
+ loop do
52
+ reader.primitive_convert(source, out)
53
+ _, _, _, error_bytes, _ = reader.primitive_errinfo
54
+ break if error_bytes.nil?
55
+ out << error_bytes.encode(Encoding::UTF_16LE, Encoding::Windows_1252, invalid: :replace, undef: :replace)
346
56
  end
347
57
 
348
- # Redefine the === method so we can write shorter rules for grapheme cluster breaks
349
- @boundary.each do |k,_|
350
- @boundary[k].instance_eval do
351
- def ===(other)
352
- detect { |i| i === other } ? true : false
353
- end
354
- end if @boundary[k].kind_of?(Array)
355
- end
58
+ reader.finish
356
59
 
357
- # define attr_reader methods for the instance variables
358
- class << self
359
- attr_reader(*ATTRIBUTES)
360
- end
361
- end
362
-
363
- # Returns the directory in which the data files are stored.
364
- def self.dirname
365
- File.dirname(__FILE__) + '/../values/'
366
- end
367
-
368
- # Returns the filename for the data file for this version.
369
- def self.filename
370
- File.expand_path File.join(dirname, "unicode_tables.dat")
60
+ out.encode!(Encoding::UTF_8)
371
61
  end
372
62
  end
373
63
 
374
64
  private
375
-
376
- def apply_mapping(string, mapping) #:nodoc:
377
- string.each_codepoint.map do |codepoint|
378
- cp = database.codepoints[codepoint]
379
- if cp and (ncp = cp.send(mapping)) and ncp > 0
380
- ncp
381
- else
382
- codepoint
383
- end
384
- end.pack('U*')
385
- end
386
-
387
- def tidy_byte(byte)
388
- if byte < 160
389
- [database.cp1252[byte] || byte].pack("U").unpack("C*")
390
- elsif byte < 192
391
- [194, byte]
392
- else
393
- [195, byte - 64]
65
+ def recode_windows1252_chars(string)
66
+ string.encode(Encoding::UTF_8, Encoding::Windows_1252, invalid: :replace, undef: :replace)
394
67
  end
395
- end
396
-
397
- def database
398
- @database ||= UnicodeDatabase.new
399
- end
400
-
401
68
  end
402
69
  end
403
70
  end
@@ -1,7 +1,9 @@
1
- module ActiveSupport #:nodoc:
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveSupport # :nodoc:
2
4
  module Multibyte
3
- autoload :Chars, 'active_support/multibyte/chars'
4
- autoload :Unicode, 'active_support/multibyte/unicode'
5
+ autoload :Chars, "active_support/multibyte/chars"
6
+ autoload :Unicode, "active_support/multibyte/unicode"
5
7
 
6
8
  # The proxy class returned when calling mb_chars. You can use this accessor
7
9
  # to configure your own proxy class so you can support other encodings. See