core_ext 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +3 -0
- data/lib/core_ext/array/access.rb +76 -0
- data/lib/core_ext/array/conversions.rb +211 -0
- data/lib/core_ext/array/extract_options.rb +29 -0
- data/lib/core_ext/array/grouping.rb +116 -0
- data/lib/core_ext/array/inquiry.rb +17 -0
- data/lib/core_ext/array/prepend_and_append.rb +7 -0
- data/lib/core_ext/array/wrap.rb +46 -0
- data/lib/core_ext/array.rb +7 -0
- data/lib/core_ext/array_inquirer.rb +44 -0
- data/lib/core_ext/benchmark.rb +14 -0
- data/lib/core_ext/benchmarkable.rb +49 -0
- data/lib/core_ext/big_decimal/conversions.rb +14 -0
- data/lib/core_ext/big_decimal.rb +1 -0
- data/lib/core_ext/builder.rb +6 -0
- data/lib/core_ext/callbacks.rb +770 -0
- data/lib/core_ext/class/attribute.rb +128 -0
- data/lib/core_ext/class/attribute_accessors.rb +4 -0
- data/lib/core_ext/class/subclasses.rb +42 -0
- data/lib/core_ext/class.rb +2 -0
- data/lib/core_ext/concern.rb +142 -0
- data/lib/core_ext/configurable.rb +148 -0
- data/lib/core_ext/date/acts_like.rb +8 -0
- data/lib/core_ext/date/blank.rb +12 -0
- data/lib/core_ext/date/calculations.rb +143 -0
- data/lib/core_ext/date/conversions.rb +93 -0
- data/lib/core_ext/date/zones.rb +6 -0
- data/lib/core_ext/date.rb +5 -0
- data/lib/core_ext/date_and_time/calculations.rb +328 -0
- data/lib/core_ext/date_and_time/zones.rb +40 -0
- data/lib/core_ext/date_time/acts_like.rb +14 -0
- data/lib/core_ext/date_time/blank.rb +12 -0
- data/lib/core_ext/date_time/calculations.rb +177 -0
- data/lib/core_ext/date_time/conversions.rb +104 -0
- data/lib/core_ext/date_time/zones.rb +6 -0
- data/lib/core_ext/date_time.rb +5 -0
- data/lib/core_ext/deprecation/behaviors.rb +86 -0
- data/lib/core_ext/deprecation/instance_delegator.rb +24 -0
- data/lib/core_ext/deprecation/method_wrappers.rb +70 -0
- data/lib/core_ext/deprecation/proxy_wrappers.rb +149 -0
- data/lib/core_ext/deprecation/reporting.rb +105 -0
- data/lib/core_ext/deprecation.rb +43 -0
- data/lib/core_ext/digest/uuid.rb +51 -0
- data/lib/core_ext/duration.rb +157 -0
- data/lib/core_ext/enumerable.rb +106 -0
- data/lib/core_ext/file/atomic.rb +68 -0
- data/lib/core_ext/file.rb +1 -0
- data/lib/core_ext/hash/compact.rb +20 -0
- data/lib/core_ext/hash/conversions.rb +261 -0
- data/lib/core_ext/hash/deep_merge.rb +38 -0
- data/lib/core_ext/hash/except.rb +22 -0
- data/lib/core_ext/hash/indifferent_access.rb +23 -0
- data/lib/core_ext/hash/keys.rb +170 -0
- data/lib/core_ext/hash/reverse_merge.rb +22 -0
- data/lib/core_ext/hash/slice.rb +48 -0
- data/lib/core_ext/hash/transform_values.rb +29 -0
- data/lib/core_ext/hash.rb +9 -0
- data/lib/core_ext/hash_with_indifferent_access.rb +298 -0
- data/lib/core_ext/inflections.rb +70 -0
- data/lib/core_ext/inflector/inflections.rb +244 -0
- data/lib/core_ext/inflector/methods.rb +381 -0
- data/lib/core_ext/inflector/transliterate.rb +112 -0
- data/lib/core_ext/inflector.rb +7 -0
- data/lib/core_ext/integer/inflections.rb +29 -0
- data/lib/core_ext/integer/multiple.rb +10 -0
- data/lib/core_ext/integer/time.rb +29 -0
- data/lib/core_ext/integer.rb +3 -0
- data/lib/core_ext/json/decoding.rb +67 -0
- data/lib/core_ext/json/encoding.rb +127 -0
- data/lib/core_ext/json.rb +2 -0
- data/lib/core_ext/kernel/agnostics.rb +11 -0
- data/lib/core_ext/kernel/concern.rb +10 -0
- data/lib/core_ext/kernel/reporting.rb +41 -0
- data/lib/core_ext/kernel/singleton_class.rb +6 -0
- data/lib/core_ext/kernel.rb +4 -0
- data/lib/core_ext/load_error.rb +30 -0
- data/lib/core_ext/logger.rb +57 -0
- data/lib/core_ext/logger_silence.rb +24 -0
- data/lib/core_ext/marshal.rb +19 -0
- data/lib/core_ext/module/aliasing.rb +74 -0
- data/lib/core_ext/module/anonymous.rb +28 -0
- data/lib/core_ext/module/attr_internal.rb +36 -0
- data/lib/core_ext/module/attribute_accessors.rb +212 -0
- data/lib/core_ext/module/concerning.rb +135 -0
- data/lib/core_ext/module/delegation.rb +218 -0
- data/lib/core_ext/module/deprecation.rb +23 -0
- data/lib/core_ext/module/introspection.rb +62 -0
- data/lib/core_ext/module/method_transplanting.rb +3 -0
- data/lib/core_ext/module/qualified_const.rb +52 -0
- data/lib/core_ext/module/reachable.rb +8 -0
- data/lib/core_ext/module/remove_method.rb +35 -0
- data/lib/core_ext/module.rb +11 -0
- data/lib/core_ext/multibyte/chars.rb +231 -0
- data/lib/core_ext/multibyte/unicode.rb +388 -0
- data/lib/core_ext/multibyte.rb +21 -0
- data/lib/core_ext/name_error.rb +31 -0
- data/lib/core_ext/numeric/bytes.rb +64 -0
- data/lib/core_ext/numeric/conversions.rb +132 -0
- data/lib/core_ext/numeric/inquiry.rb +26 -0
- data/lib/core_ext/numeric/time.rb +74 -0
- data/lib/core_ext/numeric.rb +4 -0
- data/lib/core_ext/object/acts_like.rb +10 -0
- data/lib/core_ext/object/blank.rb +140 -0
- data/lib/core_ext/object/conversions.rb +4 -0
- data/lib/core_ext/object/deep_dup.rb +53 -0
- data/lib/core_ext/object/duplicable.rb +98 -0
- data/lib/core_ext/object/inclusion.rb +27 -0
- data/lib/core_ext/object/instance_variables.rb +28 -0
- data/lib/core_ext/object/json.rb +199 -0
- data/lib/core_ext/object/to_param.rb +1 -0
- data/lib/core_ext/object/to_query.rb +84 -0
- data/lib/core_ext/object/try.rb +146 -0
- data/lib/core_ext/object/with_options.rb +69 -0
- data/lib/core_ext/object.rb +14 -0
- data/lib/core_ext/option_merger.rb +25 -0
- data/lib/core_ext/ordered_hash.rb +48 -0
- data/lib/core_ext/ordered_options.rb +81 -0
- data/lib/core_ext/range/conversions.rb +34 -0
- data/lib/core_ext/range/each.rb +21 -0
- data/lib/core_ext/range/include_range.rb +23 -0
- data/lib/core_ext/range/overlaps.rb +8 -0
- data/lib/core_ext/range.rb +4 -0
- data/lib/core_ext/regexp.rb +5 -0
- data/lib/core_ext/rescuable.rb +119 -0
- data/lib/core_ext/securerandom.rb +23 -0
- data/lib/core_ext/security_utils.rb +20 -0
- data/lib/core_ext/string/access.rb +104 -0
- data/lib/core_ext/string/behavior.rb +6 -0
- data/lib/core_ext/string/conversions.rb +56 -0
- data/lib/core_ext/string/exclude.rb +11 -0
- data/lib/core_ext/string/filters.rb +102 -0
- data/lib/core_ext/string/indent.rb +43 -0
- data/lib/core_ext/string/inflections.rb +235 -0
- data/lib/core_ext/string/inquiry.rb +13 -0
- data/lib/core_ext/string/multibyte.rb +53 -0
- data/lib/core_ext/string/output_safety.rb +261 -0
- data/lib/core_ext/string/starts_ends_with.rb +4 -0
- data/lib/core_ext/string/strip.rb +23 -0
- data/lib/core_ext/string/zones.rb +14 -0
- data/lib/core_ext/string.rb +13 -0
- data/lib/core_ext/string_inquirer.rb +26 -0
- data/lib/core_ext/tagged_logging.rb +78 -0
- data/lib/core_ext/test_case.rb +88 -0
- data/lib/core_ext/testing/assertions.rb +99 -0
- data/lib/core_ext/testing/autorun.rb +12 -0
- data/lib/core_ext/testing/composite_filter.rb +54 -0
- data/lib/core_ext/testing/constant_lookup.rb +50 -0
- data/lib/core_ext/testing/declarative.rb +26 -0
- data/lib/core_ext/testing/deprecation.rb +36 -0
- data/lib/core_ext/testing/file_fixtures.rb +34 -0
- data/lib/core_ext/testing/isolation.rb +115 -0
- data/lib/core_ext/testing/method_call_assertions.rb +41 -0
- data/lib/core_ext/testing/setup_and_teardown.rb +50 -0
- data/lib/core_ext/testing/stream.rb +42 -0
- data/lib/core_ext/testing/tagged_logging.rb +25 -0
- data/lib/core_ext/testing/time_helpers.rb +134 -0
- data/lib/core_ext/time/acts_like.rb +8 -0
- data/lib/core_ext/time/calculations.rb +284 -0
- data/lib/core_ext/time/conversions.rb +66 -0
- data/lib/core_ext/time/zones.rb +95 -0
- data/lib/core_ext/time.rb +20 -0
- data/lib/core_ext/time_with_zone.rb +503 -0
- data/lib/core_ext/time_zone.rb +464 -0
- data/lib/core_ext/uri.rb +25 -0
- data/lib/core_ext/version.rb +3 -0
- data/lib/core_ext/xml_mini/jdom.rb +181 -0
- data/lib/core_ext/xml_mini/libxml.rb +79 -0
- data/lib/core_ext/xml_mini/libxmlsax.rb +85 -0
- data/lib/core_ext/xml_mini/nokogiri.rb +83 -0
- data/lib/core_ext/xml_mini/nokogirisax.rb +87 -0
- data/lib/core_ext/xml_mini/rexml.rb +130 -0
- data/lib/core_ext/xml_mini.rb +194 -0
- data/lib/core_ext.rb +3 -0
- metadata +310 -0
@@ -0,0 +1,388 @@
|
|
1
|
+
module CoreExt
|
2
|
+
module Multibyte
|
3
|
+
module Unicode
|
4
|
+
|
5
|
+
extend self
|
6
|
+
|
7
|
+
# A list of all available normalization forms.
|
8
|
+
# See http://www.unicode.org/reports/tr15/tr15-29.html for more
|
9
|
+
# information about normalization.
|
10
|
+
NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
|
11
|
+
|
12
|
+
# The Unicode version that is supported by the implementation
|
13
|
+
UNICODE_VERSION = '8.0.0'
|
14
|
+
|
15
|
+
# The default normalization used for operations that require
|
16
|
+
# normalization. It can be set to any of the normalizations
|
17
|
+
# in NORMALIZATION_FORMS.
|
18
|
+
#
|
19
|
+
# CoreExt::Multibyte::Unicode.default_normalization_form = :c
|
20
|
+
attr_accessor :default_normalization_form
|
21
|
+
@default_normalization_form = :kc
|
22
|
+
|
23
|
+
# Hangul character boundaries and properties
|
24
|
+
HANGUL_SBASE = 0xAC00
|
25
|
+
HANGUL_LBASE = 0x1100
|
26
|
+
HANGUL_VBASE = 0x1161
|
27
|
+
HANGUL_TBASE = 0x11A7
|
28
|
+
HANGUL_LCOUNT = 19
|
29
|
+
HANGUL_VCOUNT = 21
|
30
|
+
HANGUL_TCOUNT = 28
|
31
|
+
HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT
|
32
|
+
HANGUL_SCOUNT = 11172
|
33
|
+
HANGUL_SLAST = HANGUL_SBASE + HANGUL_SCOUNT
|
34
|
+
HANGUL_JAMO_FIRST = 0x1100
|
35
|
+
HANGUL_JAMO_LAST = 0x11FF
|
36
|
+
|
37
|
+
# All the unicode whitespace
|
38
|
+
WHITESPACE = [
|
39
|
+
(0x0009..0x000D).to_a, # White_Space # Cc [5] <control-0009>..<control-000D>
|
40
|
+
0x0020, # White_Space # Zs SPACE
|
41
|
+
0x0085, # White_Space # Cc <control-0085>
|
42
|
+
0x00A0, # White_Space # Zs NO-BREAK SPACE
|
43
|
+
0x1680, # White_Space # Zs OGHAM SPACE MARK
|
44
|
+
(0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
|
45
|
+
0x2028, # White_Space # Zl LINE SEPARATOR
|
46
|
+
0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
|
47
|
+
0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
|
48
|
+
0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
|
49
|
+
0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
|
50
|
+
].flatten.freeze
|
51
|
+
|
52
|
+
# BOM (byte order mark) can also be seen as whitespace, it's a
|
53
|
+
# non-rendering character used to distinguish between little and big
|
54
|
+
# endian. This is not an issue in utf-8, so it must be ignored.
|
55
|
+
LEADERS_AND_TRAILERS = WHITESPACE + [65279] # ZERO-WIDTH NO-BREAK SPACE aka BOM
|
56
|
+
|
57
|
+
# Returns a regular expression pattern that matches the passed Unicode
|
58
|
+
# codepoints.
|
59
|
+
def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
|
60
|
+
array_of_codepoints.collect{ |e| [e].pack 'U*'.freeze }.join('|'.freeze)
|
61
|
+
end
|
62
|
+
TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
|
63
|
+
LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
|
64
|
+
|
65
|
+
# Detect whether the codepoint is in a certain character class. Returns
|
66
|
+
# +true+ when it's in the specified character class and +false+ otherwise.
|
67
|
+
# Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
|
68
|
+
# <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
|
69
|
+
#
|
70
|
+
# Primarily used by the grapheme cluster support.
|
71
|
+
def in_char_class?(codepoint, classes)
|
72
|
+
classes.detect { |c| database.boundary[c] === codepoint } ? true : false
|
73
|
+
end
|
74
|
+
|
75
|
+
# Unpack the string at grapheme boundaries. Returns a list of character
|
76
|
+
# lists.
|
77
|
+
#
|
78
|
+
# Unicode.unpack_graphemes('क्षि') # => [[2325, 2381], [2359], [2367]]
|
79
|
+
# Unicode.unpack_graphemes('Café') # => [[67], [97], [102], [233]]
|
80
|
+
def unpack_graphemes(string)
|
81
|
+
codepoints = string.codepoints.to_a
|
82
|
+
unpacked = []
|
83
|
+
pos = 0
|
84
|
+
marker = 0
|
85
|
+
eoc = codepoints.length
|
86
|
+
while(pos < eoc)
|
87
|
+
pos += 1
|
88
|
+
previous = codepoints[pos-1]
|
89
|
+
current = codepoints[pos]
|
90
|
+
if (
|
91
|
+
# CR X LF
|
92
|
+
( previous == database.boundary[:cr] and current == database.boundary[:lf] ) or
|
93
|
+
# L X (L|V|LV|LVT)
|
94
|
+
( database.boundary[:l] === previous and in_char_class?(current, [:l,:v,:lv,:lvt]) ) or
|
95
|
+
# (LV|V) X (V|T)
|
96
|
+
( in_char_class?(previous, [:lv,:v]) and in_char_class?(current, [:v,:t]) ) or
|
97
|
+
# (LVT|T) X (T)
|
98
|
+
( in_char_class?(previous, [:lvt,:t]) and database.boundary[:t] === current ) or
|
99
|
+
# X Extend
|
100
|
+
(database.boundary[:extend] === current)
|
101
|
+
)
|
102
|
+
else
|
103
|
+
unpacked << codepoints[marker..pos-1]
|
104
|
+
marker = pos
|
105
|
+
end
|
106
|
+
end
|
107
|
+
unpacked
|
108
|
+
end
|
109
|
+
|
110
|
+
# Reverse operation of unpack_graphemes.
|
111
|
+
#
|
112
|
+
# Unicode.pack_graphemes(Unicode.unpack_graphemes('क्षि')) # => 'क्षि'
|
113
|
+
def pack_graphemes(unpacked)
|
114
|
+
unpacked.flatten.pack('U*')
|
115
|
+
end
|
116
|
+
|
117
|
+
# Re-order codepoints so the string becomes canonical.
|
118
|
+
def reorder_characters(codepoints)
|
119
|
+
length = codepoints.length- 1
|
120
|
+
pos = 0
|
121
|
+
while pos < length do
|
122
|
+
cp1, cp2 = database.codepoints[codepoints[pos]], database.codepoints[codepoints[pos+1]]
|
123
|
+
if (cp1.combining_class > cp2.combining_class) && (cp2.combining_class > 0)
|
124
|
+
codepoints[pos..pos+1] = cp2.code, cp1.code
|
125
|
+
pos += (pos > 0 ? -1 : 1)
|
126
|
+
else
|
127
|
+
pos += 1
|
128
|
+
end
|
129
|
+
end
|
130
|
+
codepoints
|
131
|
+
end
|
132
|
+
|
133
|
+
# Decompose composed characters to the decomposed form.
|
134
|
+
def decompose(type, codepoints)
|
135
|
+
codepoints.inject([]) do |decomposed, cp|
|
136
|
+
# if it's a hangul syllable starter character
|
137
|
+
if HANGUL_SBASE <= cp and cp < HANGUL_SLAST
|
138
|
+
sindex = cp - HANGUL_SBASE
|
139
|
+
ncp = [] # new codepoints
|
140
|
+
ncp << HANGUL_LBASE + sindex / HANGUL_NCOUNT
|
141
|
+
ncp << HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
|
142
|
+
tindex = sindex % HANGUL_TCOUNT
|
143
|
+
ncp << (HANGUL_TBASE + tindex) unless tindex == 0
|
144
|
+
decomposed.concat ncp
|
145
|
+
# if the codepoint is decomposable in with the current decomposition type
|
146
|
+
elsif (ncp = database.codepoints[cp].decomp_mapping) and (!database.codepoints[cp].decomp_type || type == :compatibility)
|
147
|
+
decomposed.concat decompose(type, ncp.dup)
|
148
|
+
else
|
149
|
+
decomposed << cp
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# Compose decomposed characters to the composed form.
|
155
|
+
def compose(codepoints)
|
156
|
+
pos = 0
|
157
|
+
eoa = codepoints.length - 1
|
158
|
+
starter_pos = 0
|
159
|
+
starter_char = codepoints[0]
|
160
|
+
previous_combining_class = -1
|
161
|
+
while pos < eoa
|
162
|
+
pos += 1
|
163
|
+
lindex = starter_char - HANGUL_LBASE
|
164
|
+
# -- Hangul
|
165
|
+
if 0 <= lindex and lindex < HANGUL_LCOUNT
|
166
|
+
vindex = codepoints[starter_pos+1] - HANGUL_VBASE rescue vindex = -1
|
167
|
+
if 0 <= vindex and vindex < HANGUL_VCOUNT
|
168
|
+
tindex = codepoints[starter_pos+2] - HANGUL_TBASE rescue tindex = -1
|
169
|
+
if 0 <= tindex and tindex < HANGUL_TCOUNT
|
170
|
+
j = starter_pos + 2
|
171
|
+
eoa -= 2
|
172
|
+
else
|
173
|
+
tindex = 0
|
174
|
+
j = starter_pos + 1
|
175
|
+
eoa -= 1
|
176
|
+
end
|
177
|
+
codepoints[starter_pos..j] = (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT + tindex + HANGUL_SBASE
|
178
|
+
end
|
179
|
+
starter_pos += 1
|
180
|
+
starter_char = codepoints[starter_pos]
|
181
|
+
# -- Other characters
|
182
|
+
else
|
183
|
+
current_char = codepoints[pos]
|
184
|
+
current = database.codepoints[current_char]
|
185
|
+
if current.combining_class > previous_combining_class
|
186
|
+
if ref = database.composition_map[starter_char]
|
187
|
+
composition = ref[current_char]
|
188
|
+
else
|
189
|
+
composition = nil
|
190
|
+
end
|
191
|
+
unless composition.nil?
|
192
|
+
codepoints[starter_pos] = composition
|
193
|
+
starter_char = composition
|
194
|
+
codepoints.delete_at pos
|
195
|
+
eoa -= 1
|
196
|
+
pos -= 1
|
197
|
+
previous_combining_class = -1
|
198
|
+
else
|
199
|
+
previous_combining_class = current.combining_class
|
200
|
+
end
|
201
|
+
else
|
202
|
+
previous_combining_class = current.combining_class
|
203
|
+
end
|
204
|
+
if current.combining_class == 0
|
205
|
+
starter_pos = pos
|
206
|
+
starter_char = codepoints[pos]
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
codepoints
|
211
|
+
end
|
212
|
+
|
213
|
+
# Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars.
|
214
|
+
if !defined?(Rubinius)
|
215
|
+
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
|
216
|
+
# resulting in a valid UTF-8 string.
|
217
|
+
#
|
218
|
+
# Passing +true+ will forcibly tidy all bytes, assuming that the string's
|
219
|
+
# encoding is entirely CP1252 or ISO-8859-1.
|
220
|
+
def tidy_bytes(string, force = false)
|
221
|
+
return string if string.empty?
|
222
|
+
return recode_windows1252_chars(string) if force
|
223
|
+
string.scrub { |bad| recode_windows1252_chars(bad) }
|
224
|
+
end
|
225
|
+
else
|
226
|
+
def tidy_bytes(string, force = false)
|
227
|
+
return string if string.empty?
|
228
|
+
return recode_windows1252_chars(string) if force
|
229
|
+
|
230
|
+
# We can't transcode to the same format, so we choose a nearly-identical encoding.
|
231
|
+
# We're going to 'transcode' bytes from UTF-8 when possible, then fall back to
|
232
|
+
# CP1252 when we get errors. The final string will be 'converted' back to UTF-8
|
233
|
+
# before returning.
|
234
|
+
reader = Encoding::Converter.new(Encoding::UTF_8, Encoding::UTF_16LE)
|
235
|
+
|
236
|
+
source = string.dup
|
237
|
+
out = ''.force_encoding(Encoding::UTF_16LE)
|
238
|
+
|
239
|
+
loop do
|
240
|
+
reader.primitive_convert(source, out)
|
241
|
+
_, _, _, error_bytes, _ = reader.primitive_errinfo
|
242
|
+
break if error_bytes.nil?
|
243
|
+
out << error_bytes.encode(Encoding::UTF_16LE, Encoding::Windows_1252, invalid: :replace, undef: :replace)
|
244
|
+
end
|
245
|
+
|
246
|
+
reader.finish
|
247
|
+
|
248
|
+
out.encode!(Encoding::UTF_8)
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
# Returns the KC normalization of the string by default. NFKC is
|
253
|
+
# considered the best normalization form for passing strings to databases
|
254
|
+
# and validations.
|
255
|
+
#
|
256
|
+
# * <tt>string</tt> - The string to perform normalization on.
|
257
|
+
# * <tt>form</tt> - The form you want to normalize in. Should be one of
|
258
|
+
# the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
|
259
|
+
# Default is CoreExt::Multibyte::Unicode.default_normalization_form.
|
260
|
+
def normalize(string, form=nil)
|
261
|
+
form ||= @default_normalization_form
|
262
|
+
# See http://www.unicode.org/reports/tr15, Table 1
|
263
|
+
codepoints = string.codepoints.to_a
|
264
|
+
case form
|
265
|
+
when :d
|
266
|
+
reorder_characters(decompose(:canonical, codepoints))
|
267
|
+
when :c
|
268
|
+
compose(reorder_characters(decompose(:canonical, codepoints)))
|
269
|
+
when :kd
|
270
|
+
reorder_characters(decompose(:compatibility, codepoints))
|
271
|
+
when :kc
|
272
|
+
compose(reorder_characters(decompose(:compatibility, codepoints)))
|
273
|
+
else
|
274
|
+
raise ArgumentError, "#{form} is not a valid normalization variant", caller
|
275
|
+
end.pack('U*'.freeze)
|
276
|
+
end
|
277
|
+
|
278
|
+
def downcase(string)
|
279
|
+
apply_mapping string, :lowercase_mapping
|
280
|
+
end
|
281
|
+
|
282
|
+
def upcase(string)
|
283
|
+
apply_mapping string, :uppercase_mapping
|
284
|
+
end
|
285
|
+
|
286
|
+
def swapcase(string)
|
287
|
+
apply_mapping string, :swapcase_mapping
|
288
|
+
end
|
289
|
+
|
290
|
+
# Holds data about a codepoint in the Unicode database.
|
291
|
+
class Codepoint
|
292
|
+
attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
|
293
|
+
|
294
|
+
# Initializing Codepoint object with default values
|
295
|
+
def initialize
|
296
|
+
@combining_class = 0
|
297
|
+
@uppercase_mapping = 0
|
298
|
+
@lowercase_mapping = 0
|
299
|
+
end
|
300
|
+
|
301
|
+
def swapcase_mapping
|
302
|
+
uppercase_mapping > 0 ? uppercase_mapping : lowercase_mapping
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
# Holds static data from the Unicode database.
|
307
|
+
class UnicodeDatabase
|
308
|
+
ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
|
309
|
+
|
310
|
+
attr_writer(*ATTRIBUTES)
|
311
|
+
|
312
|
+
def initialize
|
313
|
+
@codepoints = Hash.new(Codepoint.new)
|
314
|
+
@composition_exclusion = []
|
315
|
+
@composition_map = {}
|
316
|
+
@boundary = {}
|
317
|
+
@cp1252 = {}
|
318
|
+
end
|
319
|
+
|
320
|
+
# Lazy load the Unicode database so it's only loaded when it's actually used
|
321
|
+
ATTRIBUTES.each do |attr_name|
|
322
|
+
class_eval(<<-EOS, __FILE__, __LINE__ + 1)
|
323
|
+
def #{attr_name} # def codepoints
|
324
|
+
load # load
|
325
|
+
@#{attr_name} # @codepoints
|
326
|
+
end # end
|
327
|
+
EOS
|
328
|
+
end
|
329
|
+
|
330
|
+
# Loads the Unicode database and returns all the internal objects of
|
331
|
+
# UnicodeDatabase.
|
332
|
+
def load
|
333
|
+
begin
|
334
|
+
@codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read }
|
335
|
+
rescue => e
|
336
|
+
raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), CoreExt::Multibyte is unusable")
|
337
|
+
end
|
338
|
+
|
339
|
+
# Redefine the === method so we can write shorter rules for grapheme cluster breaks
|
340
|
+
@boundary.each_key do |k|
|
341
|
+
@boundary[k].instance_eval do
|
342
|
+
def ===(other)
|
343
|
+
detect { |i| i === other } ? true : false
|
344
|
+
end
|
345
|
+
end if @boundary[k].kind_of?(Array)
|
346
|
+
end
|
347
|
+
|
348
|
+
# define attr_reader methods for the instance variables
|
349
|
+
class << self
|
350
|
+
attr_reader(*ATTRIBUTES)
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
# Returns the directory in which the data files are stored.
|
355
|
+
def self.dirname
|
356
|
+
File.dirname(__FILE__) + '/../values/'
|
357
|
+
end
|
358
|
+
|
359
|
+
# Returns the filename for the data file for this version.
|
360
|
+
def self.filename
|
361
|
+
File.expand_path File.join(dirname, "unicode_tables.dat")
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
private
|
366
|
+
|
367
|
+
def apply_mapping(string, mapping) #:nodoc:
|
368
|
+
database.codepoints
|
369
|
+
string.each_codepoint.map do |codepoint|
|
370
|
+
cp = database.codepoints[codepoint]
|
371
|
+
if cp and (ncp = cp.send(mapping)) and ncp > 0
|
372
|
+
ncp
|
373
|
+
else
|
374
|
+
codepoint
|
375
|
+
end
|
376
|
+
end.pack('U*')
|
377
|
+
end
|
378
|
+
|
379
|
+
def recode_windows1252_chars(string)
|
380
|
+
string.encode(Encoding::UTF_8, Encoding::Windows_1252, invalid: :replace, undef: :replace)
|
381
|
+
end
|
382
|
+
|
383
|
+
def database
|
384
|
+
@database ||= UnicodeDatabase.new
|
385
|
+
end
|
386
|
+
end
|
387
|
+
end
|
388
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module CoreExt #:nodoc:
|
2
|
+
module Multibyte
|
3
|
+
require 'core_ext/multibyte/chars'
|
4
|
+
require 'core_ext/multibyte/unicode'
|
5
|
+
|
6
|
+
# The proxy class returned when calling mb_chars. You can use this accessor
|
7
|
+
# to configure your own proxy class so you can support other encodings. See
|
8
|
+
# the CoreExt::Multibyte::Chars implementation for an example how to
|
9
|
+
# do this.
|
10
|
+
#
|
11
|
+
# CoreExt::Multibyte.proxy_class = CharsForUTF32
|
12
|
+
def self.proxy_class=(klass)
|
13
|
+
@proxy_class = klass
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns the current proxy class.
|
17
|
+
def self.proxy_class
|
18
|
+
@proxy_class ||= CoreExt::Multibyte::Chars
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
class NameError
|
2
|
+
# Extract the name of the missing constant from the exception message.
|
3
|
+
#
|
4
|
+
# begin
|
5
|
+
# HelloWorld
|
6
|
+
# rescue NameError => e
|
7
|
+
# e.missing_name
|
8
|
+
# end
|
9
|
+
# # => "HelloWorld"
|
10
|
+
def missing_name
|
11
|
+
if /undefined local variable or method/ !~ message
|
12
|
+
$1 if /((::)?([A-Z]\w*)(::[A-Z]\w*)*)$/ =~ message
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Was this exception raised because the given name was missing?
|
17
|
+
#
|
18
|
+
# begin
|
19
|
+
# HelloWorld
|
20
|
+
# rescue NameError => e
|
21
|
+
# e.missing_name?("HelloWorld")
|
22
|
+
# end
|
23
|
+
# # => true
|
24
|
+
def missing_name?(name)
|
25
|
+
if name.is_a? Symbol
|
26
|
+
self.name == name
|
27
|
+
else
|
28
|
+
missing_name == name.to_s
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
class Numeric
|
2
|
+
KILOBYTE = 1024
|
3
|
+
MEGABYTE = KILOBYTE * 1024
|
4
|
+
GIGABYTE = MEGABYTE * 1024
|
5
|
+
TERABYTE = GIGABYTE * 1024
|
6
|
+
PETABYTE = TERABYTE * 1024
|
7
|
+
EXABYTE = PETABYTE * 1024
|
8
|
+
|
9
|
+
# Enables the use of byte calculations and declarations, like 45.bytes + 2.6.megabytes
|
10
|
+
#
|
11
|
+
# 2.bytes # => 2
|
12
|
+
def bytes
|
13
|
+
self
|
14
|
+
end
|
15
|
+
alias :byte :bytes
|
16
|
+
|
17
|
+
# Returns the number of bytes equivalent to the kilobytes provided.
|
18
|
+
#
|
19
|
+
# 2.kilobytes # => 2048
|
20
|
+
def kilobytes
|
21
|
+
self * KILOBYTE
|
22
|
+
end
|
23
|
+
alias :kilobyte :kilobytes
|
24
|
+
|
25
|
+
# Returns the number of bytes equivalent to the megabytes provided.
|
26
|
+
#
|
27
|
+
# 2.megabytes # => 2_097_152
|
28
|
+
def megabytes
|
29
|
+
self * MEGABYTE
|
30
|
+
end
|
31
|
+
alias :megabyte :megabytes
|
32
|
+
|
33
|
+
# Returns the number of bytes equivalent to the gigabytes provided.
|
34
|
+
#
|
35
|
+
# 2.gigabytes # => 2_147_483_648
|
36
|
+
def gigabytes
|
37
|
+
self * GIGABYTE
|
38
|
+
end
|
39
|
+
alias :gigabyte :gigabytes
|
40
|
+
|
41
|
+
# Returns the number of bytes equivalent to the terabytes provided.
|
42
|
+
#
|
43
|
+
# 2.terabytes # => 2_199_023_255_552
|
44
|
+
def terabytes
|
45
|
+
self * TERABYTE
|
46
|
+
end
|
47
|
+
alias :terabyte :terabytes
|
48
|
+
|
49
|
+
# Returns the number of bytes equivalent to the petabytes provided.
|
50
|
+
#
|
51
|
+
# 2.petabytes # => 2_251_799_813_685_248
|
52
|
+
def petabytes
|
53
|
+
self * PETABYTE
|
54
|
+
end
|
55
|
+
alias :petabyte :petabytes
|
56
|
+
|
57
|
+
# Returns the number of bytes equivalent to the exabytes provided.
|
58
|
+
#
|
59
|
+
# 2.exabytes # => 2_305_843_009_213_693_952
|
60
|
+
def exabytes
|
61
|
+
self * EXABYTE
|
62
|
+
end
|
63
|
+
alias :exabyte :exabytes
|
64
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
require 'core_ext/big_decimal/conversions'
|
2
|
+
require 'core_ext/deprecation'
|
3
|
+
|
4
|
+
module CoreExt::NumericWithFormat
|
5
|
+
|
6
|
+
# Provides options for converting numbers into formatted strings.
|
7
|
+
# Options are provided for phone numbers, currency, percentage,
|
8
|
+
# precision, positional notation, file size and pretty printing.
|
9
|
+
#
|
10
|
+
# ==== Options
|
11
|
+
#
|
12
|
+
# For details on which formats use which options, see CoreExt::NumberHelper
|
13
|
+
#
|
14
|
+
# ==== Examples
|
15
|
+
#
|
16
|
+
# Phone Numbers:
|
17
|
+
# 5551234.to_s(:phone) # => 555-1234
|
18
|
+
# 1235551234.to_s(:phone) # => 123-555-1234
|
19
|
+
# 1235551234.to_s(:phone, area_code: true) # => (123) 555-1234
|
20
|
+
# 1235551234.to_s(:phone, delimiter: ' ') # => 123 555 1234
|
21
|
+
# 1235551234.to_s(:phone, area_code: true, extension: 555) # => (123) 555-1234 x 555
|
22
|
+
# 1235551234.to_s(:phone, country_code: 1) # => +1-123-555-1234
|
23
|
+
# 1235551234.to_s(:phone, country_code: 1, extension: 1343, delimiter: '.')
|
24
|
+
# # => +1.123.555.1234 x 1343
|
25
|
+
#
|
26
|
+
# Currency:
|
27
|
+
# 1234567890.50.to_s(:currency) # => $1,234,567,890.50
|
28
|
+
# 1234567890.506.to_s(:currency) # => $1,234,567,890.51
|
29
|
+
# 1234567890.506.to_s(:currency, precision: 3) # => $1,234,567,890.506
|
30
|
+
# 1234567890.506.to_s(:currency, locale: :fr) # => 1 234 567 890,51 €
|
31
|
+
# -1234567890.50.to_s(:currency, negative_format: '(%u%n)')
|
32
|
+
# # => ($1,234,567,890.50)
|
33
|
+
# 1234567890.50.to_s(:currency, unit: '£', separator: ',', delimiter: '')
|
34
|
+
# # => £1234567890,50
|
35
|
+
# 1234567890.50.to_s(:currency, unit: '£', separator: ',', delimiter: '', format: '%n %u')
|
36
|
+
# # => 1234567890,50 £
|
37
|
+
#
|
38
|
+
# Percentage:
|
39
|
+
# 100.to_s(:percentage) # => 100.000%
|
40
|
+
# 100.to_s(:percentage, precision: 0) # => 100%
|
41
|
+
# 1000.to_s(:percentage, delimiter: '.', separator: ',') # => 1.000,000%
|
42
|
+
# 302.24398923423.to_s(:percentage, precision: 5) # => 302.24399%
|
43
|
+
# 1000.to_s(:percentage, locale: :fr) # => 1 000,000%
|
44
|
+
# 100.to_s(:percentage, format: '%n %') # => 100.000 %
|
45
|
+
#
|
46
|
+
# Delimited:
|
47
|
+
# 12345678.to_s(:delimited) # => 12,345,678
|
48
|
+
# 12345678.05.to_s(:delimited) # => 12,345,678.05
|
49
|
+
# 12345678.to_s(:delimited, delimiter: '.') # => 12.345.678
|
50
|
+
# 12345678.to_s(:delimited, delimiter: ',') # => 12,345,678
|
51
|
+
# 12345678.05.to_s(:delimited, separator: ' ') # => 12,345,678 05
|
52
|
+
# 12345678.05.to_s(:delimited, locale: :fr) # => 12 345 678,05
|
53
|
+
# 98765432.98.to_s(:delimited, delimiter: ' ', separator: ',')
|
54
|
+
# # => 98 765 432,98
|
55
|
+
#
|
56
|
+
# Rounded:
|
57
|
+
# 111.2345.to_s(:rounded) # => 111.235
|
58
|
+
# 111.2345.to_s(:rounded, precision: 2) # => 111.23
|
59
|
+
# 13.to_s(:rounded, precision: 5) # => 13.00000
|
60
|
+
# 389.32314.to_s(:rounded, precision: 0) # => 389
|
61
|
+
# 111.2345.to_s(:rounded, significant: true) # => 111
|
62
|
+
# 111.2345.to_s(:rounded, precision: 1, significant: true) # => 100
|
63
|
+
# 13.to_s(:rounded, precision: 5, significant: true) # => 13.000
|
64
|
+
# 111.234.to_s(:rounded, locale: :fr) # => 111,234
|
65
|
+
# 13.to_s(:rounded, precision: 5, significant: true, strip_insignificant_zeros: true)
|
66
|
+
# # => 13
|
67
|
+
# 389.32314.to_s(:rounded, precision: 4, significant: true) # => 389.3
|
68
|
+
# 1111.2345.to_s(:rounded, precision: 2, separator: ',', delimiter: '.')
|
69
|
+
# # => 1.111,23
|
70
|
+
#
|
71
|
+
# Human-friendly size in Bytes:
|
72
|
+
# 123.to_s(:human_size) # => 123 Bytes
|
73
|
+
# 1234.to_s(:human_size) # => 1.21 KB
|
74
|
+
# 12345.to_s(:human_size) # => 12.1 KB
|
75
|
+
# 1234567.to_s(:human_size) # => 1.18 MB
|
76
|
+
# 1234567890.to_s(:human_size) # => 1.15 GB
|
77
|
+
# 1234567890123.to_s(:human_size) # => 1.12 TB
|
78
|
+
# 1234567.to_s(:human_size, precision: 2) # => 1.2 MB
|
79
|
+
# 483989.to_s(:human_size, precision: 2) # => 470 KB
|
80
|
+
# 1234567.to_s(:human_size, precision: 2, separator: ',') # => 1,2 MB
|
81
|
+
# 1234567890123.to_s(:human_size, precision: 5) # => "1.1228 TB"
|
82
|
+
# 524288000.to_s(:human_size, precision: 5) # => "500 MB"
|
83
|
+
#
|
84
|
+
# Human-friendly format:
|
85
|
+
# 123.to_s(:human) # => "123"
|
86
|
+
# 1234.to_s(:human) # => "1.23 Thousand"
|
87
|
+
# 12345.to_s(:human) # => "12.3 Thousand"
|
88
|
+
# 1234567.to_s(:human) # => "1.23 Million"
|
89
|
+
# 1234567890.to_s(:human) # => "1.23 Billion"
|
90
|
+
# 1234567890123.to_s(:human) # => "1.23 Trillion"
|
91
|
+
# 1234567890123456.to_s(:human) # => "1.23 Quadrillion"
|
92
|
+
# 1234567890123456789.to_s(:human) # => "1230 Quadrillion"
|
93
|
+
# 489939.to_s(:human, precision: 2) # => "490 Thousand"
|
94
|
+
# 489939.to_s(:human, precision: 4) # => "489.9 Thousand"
|
95
|
+
# 1234567.to_s(:human, precision: 4,
|
96
|
+
# significant: false) # => "1.2346 Million"
|
97
|
+
# 1234567.to_s(:human, precision: 1,
|
98
|
+
# separator: ',',
|
99
|
+
# significant: false) # => "1,2 Million"
|
100
|
+
def to_s(*args)
|
101
|
+
format, options = args
|
102
|
+
options ||= {}
|
103
|
+
|
104
|
+
case format
|
105
|
+
when :phone
|
106
|
+
return CoreExt::NumberHelper.number_to_phone(self, options)
|
107
|
+
when :currency
|
108
|
+
return CoreExt::NumberHelper.number_to_currency(self, options)
|
109
|
+
when :percentage
|
110
|
+
return CoreExt::NumberHelper.number_to_percentage(self, options)
|
111
|
+
when :delimited
|
112
|
+
return CoreExt::NumberHelper.number_to_delimited(self, options)
|
113
|
+
when :rounded
|
114
|
+
return CoreExt::NumberHelper.number_to_rounded(self, options)
|
115
|
+
when :human
|
116
|
+
return CoreExt::NumberHelper.number_to_human(self, options)
|
117
|
+
when :human_size
|
118
|
+
return CoreExt::NumberHelper.number_to_human_size(self, options)
|
119
|
+
else
|
120
|
+
super
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def to_formatted_s(*args)
|
125
|
+
to_s(*args)
|
126
|
+
end
|
127
|
+
deprecate to_formatted_s: :to_s
|
128
|
+
end
|
129
|
+
|
130
|
+
[Fixnum, Bignum, Float, BigDecimal].each do |klass|
|
131
|
+
klass.prepend(CoreExt::NumericWithFormat)
|
132
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
unless 1.respond_to?(:positive?) # TODO: Remove this file when we drop support to ruby < 2.3
|
2
|
+
class Numeric
|
3
|
+
# Returns true if the number is positive.
|
4
|
+
#
|
5
|
+
# 1.positive? # => true
|
6
|
+
# 0.positive? # => false
|
7
|
+
# -1.positive? # => false
|
8
|
+
def positive?
|
9
|
+
self > 0
|
10
|
+
end
|
11
|
+
|
12
|
+
# Returns true if the number is negative.
|
13
|
+
#
|
14
|
+
# -1.negative? # => true
|
15
|
+
# 0.negative? # => false
|
16
|
+
# 1.negative? # => false
|
17
|
+
def negative?
|
18
|
+
self < 0
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class Complex
|
23
|
+
undef :positive?
|
24
|
+
undef :negative?
|
25
|
+
end
|
26
|
+
end
|