addressable 2.8.1 → 2.8.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ddda72232f6aef9f6f4311c2855f1b3fea9acc80f51c4e5e90bd23820b0d74e
4
- data.tar.gz: 88f208cb2d73dec64663e6e3c1710dc08b188402937038fefce6a2d47debc97d
3
+ metadata.gz: 4023555b4bb6c374726bb9647a29d30943c637115619670a09632fe7a4e28765
4
+ data.tar.gz: 95ad31a9e1dd01f2bc9a7c097eb452f07ed52be16dcd2ad4946652648dd7808c
5
5
  SHA512:
6
- metadata.gz: 4e1b0c83fc2f2e54cba6804f3840c8bb29f0d73259979d5bf678ebd5d32257b91585d9b71a780321427835d10f98b9c07969267878f0b032c53d031c30202a4c
7
- data.tar.gz: 35abc3652c8ff92032411e4daad7133b7c4649aff4a1a25fe622cf1c9b661d56f096a1084392e053788baea79f551a9d4a448d16c9e61e23bf4d9692a6728bf3
6
+ metadata.gz: d7dd6eed173bc4a06a3abb74f6fdbf501ee12b3fcb2828c0f21db98d1779175bce4dc900999269db24cc8647c3148b06cf1d9c768c97c7591a48f1e43751bb7b
7
+ data.tar.gz: 3bbe976f23a4c3a6776a44d6ff2147b9ab87418162af57b2f278089528416eb3d564b114f96f165946d9126ee68d5da86c0ac01a9e2fa2792547bb312f40f046
data/CHANGELOG.md CHANGED
@@ -1,20 +1,54 @@
1
- # Addressable 2.8.1
1
+ # Addressable 2.8.6 <a name="v2.8.6">
2
+ - Memoize regexps for common character classes ([#524])
3
+
4
+ [#524]: https://github.com/sporkmonger/addressable/pull/524
5
+
6
+ # Addressable 2.8.5 <a name="v2.8.5">
7
+ - Fix thread safety issue with encoding tables ([#515])
8
+ - Define URI::NONE as a module to avoid serialization issues ([#509])
9
+ - Fix YAML serialization ([#508])
10
+
11
+ [#508]: https://github.com/sporkmonger/addressable/pull/508
12
+ [#509]: https://github.com/sporkmonger/addressable/pull/509
13
+ [#515]: https://github.com/sporkmonger/addressable/pull/515
14
+
15
+ # Addressable 2.8.4 <a name="v2.8.4">
16
+ - Restore `Addressable::IDNA.unicode_normalize_kc` as a deprecated method ([#504])
17
+
18
+ [#504]: https://github.com/sporkmonger/addressable/pull/504
19
+
20
+ # Addressable 2.8.3 <a name="v2.8.3">
21
+ - Fix template expand level 2 hash support for non-string objects ([#499], [#498])
22
+
23
+ [#499]: https://github.com/sporkmonger/addressable/pull/499
24
+ [#498]: https://github.com/sporkmonger/addressable/pull/498
25
+
26
+ # Addressable 2.8.2 <a name="v2.8.2">
27
+ - Improve cache hits and JIT friendliness ([#486](https://github.com/sporkmonger/addressable/pull/486))
28
+ - Improve code style and test coverage ([#482](https://github.com/sporkmonger/addressable/pull/482))
29
+ - Ensure reset of deferred validation ([#481](https://github.com/sporkmonger/addressable/pull/481))
30
+ - Resolve normalization differences between `IDNA::Native` and `IDNA::Pure` ([#408](https://github.com/sporkmonger/addressable/issues/408), [#492])
31
+ - Remove redundant colon in `Addressable::URI::CharacterClasses::AUTHORITY` regex ([#438](https://github.com/sporkmonger/addressable/pull/438)) (accidentally reverted by [#449] merge but [added back](https://github.com/sporkmonger/addressable/pull/492#discussion_r1105125280) in [#492])
32
+
33
+ [#492]: https://github.com/sporkmonger/addressable/pull/492
34
+
35
+ # Addressable 2.8.1 <a name="v2.8.1">
2
36
  - refactor `Addressable::URI.normalize_path` to address linter offenses ([#430](https://github.com/sporkmonger/addressable/pull/430))
3
- - remove redundant colon in `Addressable::URI::CharacterClasses::AUTHORITY` regex ([#438](https://github.com/sporkmonger/addressable/pull/438))
4
37
  - update gemspec to reflect supported Ruby versions ([#466], [#464], [#463])
5
38
  - compatibility w/ public_suffix 5.x ([#466], [#465], [#460])
6
39
  - fixes "invalid byte sequence in UTF-8" exception when unencoding URLs containing non UTF-8 characters ([#459](https://github.com/sporkmonger/addressable/pull/459))
7
- - `Ractor` compatibility ([#449](https://github.com/sporkmonger/addressable/pull/449))
40
+ - `Ractor` compatibility ([#449])
8
41
  - use the whole string instead of a single line for template match ([#431](https://github.com/sporkmonger/addressable/pull/431))
9
42
  - force UTF-8 encoding only if needed ([#341](https://github.com/sporkmonger/addressable/pull/341))
10
43
 
44
+ [#449]: https://github.com/sporkmonger/addressable/pull/449
11
45
  [#460]: https://github.com/sporkmonger/addressable/pull/460
12
46
  [#463]: https://github.com/sporkmonger/addressable/pull/463
13
47
  [#464]: https://github.com/sporkmonger/addressable/pull/464
14
48
  [#465]: https://github.com/sporkmonger/addressable/pull/465
15
49
  [#466]: https://github.com/sporkmonger/addressable/pull/466
16
50
 
17
- # Addressable 2.8.0
51
+ # Addressable 2.8.0 <a name="v2.8.0">
18
52
  - fixes ReDoS vulnerability in Addressable::Template#match
19
53
  - no longer replaces `+` with spaces in queries for non-http(s) schemes
20
54
  - fixed encoding ipv6 literals
@@ -26,14 +60,14 @@
26
60
  - performance improvements
27
61
  - switch CI/CD to GitHub Actions
28
62
 
29
- # Addressable 2.7.0
63
+ # Addressable 2.7.0 <a name="v2.7.0">
30
64
  - added `:compacted` flag to `normalized_query`
31
65
  - `heuristic_parse` handles `mailto:` more intuitively
32
66
  - dropped explicit support for JRuby 9.0.5.0
33
67
  - compatibility w/ public_suffix 4.x
34
68
  - performance improvements
35
69
 
36
- # Addressable 2.6.0
70
+ # Addressable 2.6.0 <a name="v2.6.0">
37
71
  - added `tld=` method to allow assignment to the public suffix
38
72
  - most `heuristic_parse` patterns are now case-insensitive
39
73
  - `heuristic_parse` handles more `file://` URI variations
@@ -44,17 +78,17 @@
44
78
  - minor performance improvements in regexps
45
79
  - fixes to eliminate warnings
46
80
 
47
- # Addressable 2.5.2
81
+ # Addressable 2.5.2 <a name="v2.5.2">
48
82
  - better support for frozen string literals
49
83
  - fixed bug w/ uppercase characters in scheme
50
84
  - IDNA errors w/ emoji URLs
51
85
  - compatibility w/ public_suffix 3.x
52
86
 
53
- # Addressable 2.5.1
87
+ # Addressable 2.5.1 <a name="v2.5.1">
54
88
  - allow unicode normalization to be disabled for URI Template expansion
55
89
  - removed duplicate test
56
90
 
57
- # Addressable 2.5.0
91
+ # Addressable 2.5.0 <a name="v2.5.0">
58
92
  - dropping support for Ruby 1.9
59
93
  - adding support for Ruby 2.4 preview
60
94
  - add support for public suffixes and tld; first runtime dependency
@@ -68,7 +102,7 @@
68
102
  - host parts longer than 63 bytes will be ignored and not passed to libidn
69
103
  - normalized values always encoded as UTF-8
70
104
 
71
- # Addressable 2.4.0
105
+ # Addressable 2.4.0 <a name="v2.4.0">
72
106
  - support for 1.8.x dropped
73
107
  - double quotes in a host now raises an error
74
108
  - newlines in host will no longer get unescaped during normalization
@@ -80,17 +114,17 @@
80
114
  - fixed minor bug where an exception would be thrown for a missing ACE suffix
81
115
  - better partial expansion of URI templates
82
116
 
83
- # Addressable 2.3.8
117
+ # Addressable 2.3.8 <a name="v2.3.8">
84
118
  - fix warnings
85
119
  - update dependency gems
86
120
  - support for 1.8.x officially deprecated
87
121
 
88
- # Addressable 2.3.7
122
+ # Addressable 2.3.7 <a name="v2.3.7">
89
123
  - fix scenario in which invalid URIs don't get an exception until inspected
90
124
  - handle hostnames with two adjacent periods correctly
91
125
  - upgrade of RSpec
92
126
 
93
- # Addressable 2.3.6
127
+ # Addressable 2.3.6 <a name="v2.3.6">
94
128
  - normalization drops empty query string
95
129
  - better handling in template extract for missing values
96
130
  - template modifier for `'?'` now treated as optional
@@ -99,19 +133,19 @@
99
133
  - added `:sorted` option to normalization of query strings
100
134
  - fixed issue with normalization of hosts given in `'example.com.'` form
101
135
 
102
- # Addressable 2.3.5
136
+ # Addressable 2.3.5 <a name="v2.3.5">
103
137
  - added Addressable::URI#empty? method
104
138
  - Addressable::URI#hostname methods now strip square brackets from IPv6 hosts
105
139
  - compatibility with Net::HTTP in Ruby 2.0.0
106
140
  - Addressable::URI#route_from should always give relative URIs
107
141
 
108
- # Addressable 2.3.4
142
+ # Addressable 2.3.4 <a name="v2.3.4">
109
143
  - fixed issue with encoding altering its inputs
110
144
  - query string normalization now leaves ';' characters alone
111
145
  - FakeFS is detected before attempting to load unicode tables
112
146
  - additional testing to ensure frozen objects don't cause problems
113
147
 
114
- # Addressable 2.3.3
148
+ # Addressable 2.3.3 <a name="v2.3.3">
115
149
  - fixed issue with converting common primitives during template expansion
116
150
  - fixed port encoding issue
117
151
  - removed a few warnings
@@ -120,59 +154,59 @@
120
154
  - no template match should now result in nil instead of an empty MatchData
121
155
  - added license information to gemspec
122
156
 
123
- # Addressable 2.3.2
157
+ # Addressable 2.3.2 <a name="v2.3.2">
124
158
  - added Addressable::URI#default_port method
125
159
  - fixed issue with Marshalling Unicode data on Windows
126
160
  - improved heuristic parsing to better handle IPv4 addresses
127
161
 
128
- # Addressable 2.3.1
162
+ # Addressable 2.3.1 <a name="v2.3.1">
129
163
  - fixed missing unicode data file
130
164
 
131
- # Addressable 2.3.0
165
+ # Addressable 2.3.0 <a name="v2.3.0">
132
166
  - updated Addressable::Template to use RFC 6570, level 4
133
167
  - fixed compatibility problems with some versions of Ruby
134
168
  - moved unicode tables into a data file for performance reasons
135
169
  - removing support for multiple query value notations
136
170
 
137
- # Addressable 2.2.8
171
+ # Addressable 2.2.8 <a name="v2.2.8">
138
172
  - fixed issues with dot segment removal code
139
173
  - form encoding can now handle multiple values per key
140
174
  - updated development environment
141
175
 
142
- # Addressable 2.2.7
176
+ # Addressable 2.2.7 <a name="v2.2.7">
143
177
  - fixed issues related to Addressable::URI#query_values=
144
178
  - the Addressable::URI.parse method is now polymorphic
145
179
 
146
- # Addressable 2.2.6
180
+ # Addressable 2.2.6 <a name="v2.2.6">
147
181
  - changed the way ambiguous paths are handled
148
182
  - fixed bug with frozen URIs
149
183
  - https supported in heuristic parsing
150
184
 
151
- # Addressable 2.2.5
185
+ # Addressable 2.2.5 <a name="v2.2.5">
152
186
  - 'parsing' a pre-parsed URI object is now a dup operation
153
187
  - introduced conditional support for libidn
154
188
  - fixed normalization issue on ampersands in query strings
155
189
  - added additional tests around handling of query strings
156
190
 
157
- # Addressable 2.2.4
191
+ # Addressable 2.2.4 <a name="v2.2.4">
158
192
  - added origin support from draft-ietf-websec-origin-00
159
193
  - resolved issue with attempting to navigate below root
160
194
  - fixed bug with string splitting in query strings
161
195
 
162
- # Addressable 2.2.3
196
+ # Addressable 2.2.3 <a name="v2.2.3">
163
197
  - added :flat_array notation for query strings
164
198
 
165
- # Addressable 2.2.2
199
+ # Addressable 2.2.2 <a name="v2.2.2">
166
200
  - fixed issue with percent escaping of '+' character in query strings
167
201
 
168
- # Addressable 2.2.1
202
+ # Addressable 2.2.1 <a name="v2.2.1">
169
203
  - added support for application/x-www-form-urlencoded.
170
204
 
171
- # Addressable 2.2.0
205
+ # Addressable 2.2.0 <a name="v2.2.0">
172
206
  - added site methods
173
207
  - improved documentation
174
208
 
175
- # Addressable 2.1.2
209
+ # Addressable 2.1.2 <a name="v2.1.2">
176
210
  - added HTTP request URI methods
177
211
  - better handling of Windows file paths
178
212
  - validation_deferred boolean replaced with defer_validation block
@@ -180,14 +214,14 @@
180
214
  - fixed issue with constructing URIs with relative paths
181
215
  - fixed warnings
182
216
 
183
- # Addressable 2.1.1
217
+ # Addressable 2.1.1 <a name="v2.1.1">
184
218
  - more type checking changes
185
219
  - fixed issue with unicode normalization
186
220
  - added method to find template defaults
187
221
  - symbolic keys are now allowed in template mappings
188
222
  - numeric values and symbolic values are now allowed in template mappings
189
223
 
190
- # Addressable 2.1.0
224
+ # Addressable 2.1.0 <a name="v2.1.0">
191
225
  - refactored URI template support out into its own class
192
226
  - removed extract method due to being useless and unreliable
193
227
  - removed Addressable::URI.expand_template
@@ -201,15 +235,15 @@
201
235
  - worked around issue with freezing URIs
202
236
  - improved specs
203
237
 
204
- # Addressable 2.0.2
238
+ # Addressable 2.0.2 <a name="v2.0.2">
205
239
  - fixed issue with URI template expansion
206
240
  - fixed issue with percent escaping characters 0-15
207
241
 
208
- # Addressable 2.0.1
242
+ # Addressable 2.0.1 <a name="v2.0.1">
209
243
  - fixed issue with query string assignment
210
244
  - fixed issue with improperly encoded components
211
245
 
212
- # Addressable 2.0.0
246
+ # Addressable 2.0.0 <a name="v2.0.0">
213
247
  - the initialize method now takes an options hash as its only parameter
214
248
  - added query_values method to URI class
215
249
  - completely replaced IDNA implementation with pure Ruby
@@ -224,20 +258,20 @@
224
258
  - updated URI Template code to match v 03 of the draft spec
225
259
  - added a bunch of new specifications
226
260
 
227
- # Addressable 1.0.4
261
+ # Addressable 1.0.4 <a name="v1.0.4">
228
262
  - switched to using RSpec's pending system for specs that rely on IDN
229
263
  - fixed issue with creating URIs with paths that are not prefixed with '/'
230
264
 
231
- # Addressable 1.0.3
265
+ # Addressable 1.0.3 <a name="v1.0.3">
232
266
  - implemented a hash method
233
267
 
234
- # Addressable 1.0.2
268
+ # Addressable 1.0.2 <a name="v1.0.2">
235
269
  - fixed minor bug with the extract_mapping method
236
270
 
237
- # Addressable 1.0.1
271
+ # Addressable 1.0.1 <a name="v1.0.1">
238
272
  - fixed minor bug with the extract_mapping method
239
273
 
240
- # Addressable 1.0.0
274
+ # Addressable 1.0.0 <a name="v1.0.0">
241
275
  - heuristic parse method added
242
276
  - parsing is slightly more strict
243
277
  - replaced to_h with to_hash
@@ -246,16 +280,16 @@
246
280
  - improved heckle rake task
247
281
  - no surviving heckle mutations
248
282
 
249
- # Addressable 0.1.2
283
+ # Addressable 0.1.2 <a name="v0.1.2">
250
284
  - improved normalization
251
285
  - fixed bug in joining algorithm
252
286
  - updated specifications
253
287
 
254
- # Addressable 0.1.1
288
+ # Addressable 0.1.1 <a name="v0.1.1">
255
289
  - updated documentation
256
290
  - added URI Template variable extraction
257
291
 
258
- # Addressable 0.1.0
292
+ # Addressable 0.1.0 <a name="v0.1.0">
259
293
  - initial release
260
294
  - implementation based on RFC 3986, 3987
261
295
  - support for IRIs via libidn
data/Rakefile CHANGED
@@ -20,11 +20,17 @@ additionally provides extensive support for IRIs and URI templates.
20
20
  TEXT
21
21
 
22
22
  PKG_FILES = FileList[
23
- "lib/**/*", "spec/**/*", "vendor/**/*", "data/**/*",
24
- "tasks/**/*",
25
- "[A-Z]*", "Rakefile"
26
- ].exclude(/pkg/).exclude(/database\.yml/).
27
- exclude(/Gemfile\.lock/).exclude(/[_\.]git$/)
23
+ "data/**/*",
24
+ "lib/**/*.rb",
25
+ "spec/**/*.rb",
26
+ "tasks/**/*.rake",
27
+ "addressable.gemspec",
28
+ "CHANGELOG.md",
29
+ "Gemfile",
30
+ "LICENSE.txt",
31
+ "README.md",
32
+ "Rakefile",
33
+ ]
28
34
 
29
35
  task :default => "spec"
30
36
 
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ # stub: addressable 2.8.6 ruby lib
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "addressable".freeze
6
+ s.version = "2.8.6".freeze
7
+
8
+ s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
9
+ s.metadata = { "changelog_uri" => "https://github.com/sporkmonger/addressable/blob/main/CHANGELOG.md#v2.8.6" } if s.respond_to? :metadata=
10
+ s.require_paths = ["lib".freeze]
11
+ s.authors = ["Bob Aman".freeze]
12
+ s.date = "2023-12-09"
13
+ s.description = "Addressable is an alternative implementation to the URI implementation that is\npart of Ruby's standard library. It is flexible, offers heuristic parsing, and\nadditionally provides extensive support for IRIs and URI templates.\n".freeze
14
+ s.email = "bob@sporkmonger.com".freeze
15
+ s.extra_rdoc_files = ["README.md".freeze]
16
+ s.files = ["CHANGELOG.md".freeze, "Gemfile".freeze, "LICENSE.txt".freeze, "README.md".freeze, "Rakefile".freeze, "addressable.gemspec".freeze, "data/unicode.data".freeze, "lib/addressable.rb".freeze, "lib/addressable/idna.rb".freeze, "lib/addressable/idna/native.rb".freeze, "lib/addressable/idna/pure.rb".freeze, "lib/addressable/template.rb".freeze, "lib/addressable/uri.rb".freeze, "lib/addressable/version.rb".freeze, "spec/addressable/idna_spec.rb".freeze, "spec/addressable/net_http_compat_spec.rb".freeze, "spec/addressable/security_spec.rb".freeze, "spec/addressable/template_spec.rb".freeze, "spec/addressable/uri_spec.rb".freeze, "spec/spec_helper.rb".freeze, "tasks/clobber.rake".freeze, "tasks/gem.rake".freeze, "tasks/git.rake".freeze, "tasks/metrics.rake".freeze, "tasks/profile.rake".freeze, "tasks/rspec.rake".freeze, "tasks/yard.rake".freeze]
17
+ s.homepage = "https://github.com/sporkmonger/addressable".freeze
18
+ s.licenses = ["Apache-2.0".freeze]
19
+ s.rdoc_options = ["--main".freeze, "README.md".freeze]
20
+ s.required_ruby_version = Gem::Requirement.new(">= 2.2".freeze)
21
+ s.rubygems_version = "3.4.22".freeze
22
+ s.summary = "URI Implementation".freeze
23
+
24
+ s.specification_version = 4
25
+
26
+ s.add_runtime_dependency(%q<public_suffix>.freeze, [">= 2.0.2".freeze, "< 6.0".freeze])
27
+ s.add_development_dependency(%q<bundler>.freeze, [">= 1.0".freeze, "< 3.0".freeze])
28
+ end
@@ -29,8 +29,14 @@ module Addressable
29
29
  IDN::Punycode.decode(value.to_s)
30
30
  end
31
31
 
32
- def self.unicode_normalize_kc(value)
33
- IDN::Stringprep.nfkc_normalize(value.to_s)
32
+ class << self
33
+ # @deprecated Use {String#unicode_normalize(:nfkc)} instead
34
+ def unicode_normalize_kc(value)
35
+ value.to_s.unicode_normalize(:nfkc)
36
+ end
37
+
38
+ extend Gem::Deprecate
39
+ deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4
34
40
  end
35
41
 
36
42
  def self.to_ascii(value)
@@ -66,7 +66,7 @@ module Addressable
66
66
  # domain name as described in RFC 3490.
67
67
  def self.to_ascii(input)
68
68
  input = input.to_s unless input.is_a?(String)
69
- input = input.dup
69
+ input = input.dup.force_encoding(Encoding::UTF_8).unicode_normalize(:nfkc)
70
70
  if input.respond_to?(:force_encoding)
71
71
  input.force_encoding(Encoding::ASCII_8BIT)
72
72
  end
@@ -77,7 +77,7 @@ module Addressable
77
77
  part.force_encoding(Encoding::ASCII_8BIT)
78
78
  end
79
79
  if part =~ UTF8_REGEX && part =~ UTF8_REGEX_MULTIBYTE
80
- ACE_PREFIX + punycode_encode(unicode_normalize_kc(part))
80
+ ACE_PREFIX + punycode_encode(part)
81
81
  else
82
82
  part
83
83
  end
@@ -112,13 +112,14 @@ module Addressable
112
112
  output
113
113
  end
114
114
 
115
- # Unicode normalization form KC.
116
- def self.unicode_normalize_kc(input)
117
- input = input.to_s unless input.is_a?(String)
118
- unpacked = input.unpack("U*")
119
- unpacked =
120
- unicode_compose(unicode_sort_canonical(unicode_decompose(unpacked)))
121
- return unpacked.pack("U*")
115
+ class << self
116
+ # @deprecated Use {String#unicode_normalize(:nfkc)} instead
117
+ def unicode_normalize_kc(value)
118
+ value.to_s.unicode_normalize(:nfkc)
119
+ end
120
+
121
+ extend Gem::Deprecate
122
+ deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4
122
123
  end
123
124
 
124
125
  ##
@@ -136,164 +137,6 @@ module Addressable
136
137
  end
137
138
  private_class_method :unicode_downcase
138
139
 
139
- def self.unicode_compose(unpacked)
140
- unpacked_result = []
141
- length = unpacked.length
142
-
143
- return unpacked if length == 0
144
-
145
- starter = unpacked[0]
146
- starter_cc = lookup_unicode_combining_class(starter)
147
- starter_cc = 256 if starter_cc != 0
148
- for i in 1...length
149
- ch = unpacked[i]
150
-
151
- if (starter_cc == 0 &&
152
- (composite = unicode_compose_pair(starter, ch)) != nil)
153
- starter = composite
154
- else
155
- unpacked_result << starter
156
- starter = ch
157
- end
158
- end
159
- unpacked_result << starter
160
- return unpacked_result
161
- end
162
- private_class_method :unicode_compose
163
-
164
- def self.unicode_compose_pair(ch_one, ch_two)
165
- if ch_one >= HANGUL_LBASE && ch_one < HANGUL_LBASE + HANGUL_LCOUNT &&
166
- ch_two >= HANGUL_VBASE && ch_two < HANGUL_VBASE + HANGUL_VCOUNT
167
- # Hangul L + V
168
- return HANGUL_SBASE + (
169
- (ch_one - HANGUL_LBASE) * HANGUL_VCOUNT + (ch_two - HANGUL_VBASE)
170
- ) * HANGUL_TCOUNT
171
- elsif ch_one >= HANGUL_SBASE &&
172
- ch_one < HANGUL_SBASE + HANGUL_SCOUNT &&
173
- (ch_one - HANGUL_SBASE) % HANGUL_TCOUNT == 0 &&
174
- ch_two >= HANGUL_TBASE && ch_two < HANGUL_TBASE + HANGUL_TCOUNT
175
- # Hangul LV + T
176
- return ch_one + (ch_two - HANGUL_TBASE)
177
- end
178
-
179
- p = []
180
-
181
- ucs4_to_utf8(ch_one, p)
182
- ucs4_to_utf8(ch_two, p)
183
-
184
- return lookup_unicode_composition(p)
185
- end
186
- private_class_method :unicode_compose_pair
187
-
188
- def self.ucs4_to_utf8(char, buffer)
189
- if char < 128
190
- buffer << char
191
- elsif char < 2048
192
- buffer << (char >> 6 | 192)
193
- buffer << (char & 63 | 128)
194
- elsif char < 0x10000
195
- buffer << (char >> 12 | 224)
196
- buffer << (char >> 6 & 63 | 128)
197
- buffer << (char & 63 | 128)
198
- elsif char < 0x200000
199
- buffer << (char >> 18 | 240)
200
- buffer << (char >> 12 & 63 | 128)
201
- buffer << (char >> 6 & 63 | 128)
202
- buffer << (char & 63 | 128)
203
- elsif char < 0x4000000
204
- buffer << (char >> 24 | 248)
205
- buffer << (char >> 18 & 63 | 128)
206
- buffer << (char >> 12 & 63 | 128)
207
- buffer << (char >> 6 & 63 | 128)
208
- buffer << (char & 63 | 128)
209
- elsif char < 0x80000000
210
- buffer << (char >> 30 | 252)
211
- buffer << (char >> 24 & 63 | 128)
212
- buffer << (char >> 18 & 63 | 128)
213
- buffer << (char >> 12 & 63 | 128)
214
- buffer << (char >> 6 & 63 | 128)
215
- buffer << (char & 63 | 128)
216
- end
217
- end
218
- private_class_method :ucs4_to_utf8
219
-
220
- def self.unicode_sort_canonical(unpacked)
221
- unpacked = unpacked.dup
222
- i = 1
223
- length = unpacked.length
224
-
225
- return unpacked if length < 2
226
-
227
- while i < length
228
- last = unpacked[i-1]
229
- ch = unpacked[i]
230
- last_cc = lookup_unicode_combining_class(last)
231
- cc = lookup_unicode_combining_class(ch)
232
- if cc != 0 && last_cc != 0 && last_cc > cc
233
- unpacked[i] = last
234
- unpacked[i-1] = ch
235
- i -= 1 if i > 1
236
- else
237
- i += 1
238
- end
239
- end
240
- return unpacked
241
- end
242
- private_class_method :unicode_sort_canonical
243
-
244
- def self.unicode_decompose(unpacked)
245
- unpacked_result = []
246
- for cp in unpacked
247
- if cp >= HANGUL_SBASE && cp < HANGUL_SBASE + HANGUL_SCOUNT
248
- l, v, t = unicode_decompose_hangul(cp)
249
- unpacked_result << l
250
- unpacked_result << v if v
251
- unpacked_result << t if t
252
- else
253
- dc = lookup_unicode_compatibility(cp)
254
- unless dc
255
- unpacked_result << cp
256
- else
257
- unpacked_result.concat(unicode_decompose(dc.unpack("U*")))
258
- end
259
- end
260
- end
261
- return unpacked_result
262
- end
263
- private_class_method :unicode_decompose
264
-
265
- def self.unicode_decompose_hangul(codepoint)
266
- sindex = codepoint - HANGUL_SBASE;
267
- if sindex < 0 || sindex >= HANGUL_SCOUNT
268
- l = codepoint
269
- v = t = nil
270
- return l, v, t
271
- end
272
- l = HANGUL_LBASE + sindex / HANGUL_NCOUNT
273
- v = HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
274
- t = HANGUL_TBASE + sindex % HANGUL_TCOUNT
275
- if t == HANGUL_TBASE
276
- t = nil
277
- end
278
- return l, v, t
279
- end
280
- private_class_method :unicode_decompose_hangul
281
-
282
- def self.lookup_unicode_combining_class(codepoint)
283
- codepoint_data = UNICODE_DATA[codepoint]
284
- (codepoint_data ?
285
- (codepoint_data[UNICODE_DATA_COMBINING_CLASS] || 0) :
286
- 0)
287
- end
288
- private_class_method :lookup_unicode_combining_class
289
-
290
- def self.lookup_unicode_compatibility(codepoint)
291
- codepoint_data = UNICODE_DATA[codepoint]
292
- (codepoint_data ?
293
- codepoint_data[UNICODE_DATA_COMPATIBILITY] : nil)
294
- end
295
- private_class_method :lookup_unicode_compatibility
296
-
297
140
  def self.lookup_unicode_lowercase(codepoint)
298
141
  codepoint_data = UNICODE_DATA[codepoint]
299
142
  (codepoint_data ?
@@ -302,21 +145,6 @@ module Addressable
302
145
  end
303
146
  private_class_method :lookup_unicode_lowercase
304
147
 
305
- def self.lookup_unicode_composition(unpacked)
306
- return COMPOSITION_TABLE[unpacked]
307
- end
308
- private_class_method :lookup_unicode_composition
309
-
310
- HANGUL_SBASE = 0xac00
311
- HANGUL_LBASE = 0x1100
312
- HANGUL_LCOUNT = 19
313
- HANGUL_VBASE = 0x1161
314
- HANGUL_VCOUNT = 21
315
- HANGUL_TBASE = 0x11a7
316
- HANGUL_TCOUNT = 28
317
- HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT # 588
318
- HANGUL_SCOUNT = HANGUL_LCOUNT * HANGUL_NCOUNT # 11172
319
-
320
148
  UNICODE_DATA_COMBINING_CLASS = 0
321
149
  UNICODE_DATA_EXCLUSION = 1
322
150
  UNICODE_DATA_CANONICAL = 2
@@ -892,25 +892,24 @@ module Addressable
892
892
  # operator.
893
893
  #
894
894
  # @param [Hash, Array, String] value
895
- # Normalizes keys and values with IDNA#unicode_normalize_kc
895
+ # Normalizes unicode keys and values with String#unicode_normalize (NFC)
896
896
  #
897
897
  # @return [Hash, Array, String] The normalized values
898
898
  def normalize_value(value)
899
- unless value.is_a?(Hash)
900
- value = value.respond_to?(:to_ary) ? value.to_ary : value.to_str
901
- end
902
-
903
899
  # Handle unicode normalization
904
- if value.kind_of?(Array)
905
- value.map! { |val| Addressable::IDNA.unicode_normalize_kc(val) }
900
+ if value.respond_to?(:to_ary)
901
+ value.to_ary.map! { |val| normalize_value(val) }
906
902
  elsif value.kind_of?(Hash)
907
903
  value = value.inject({}) { |acc, (k, v)|
908
- acc[Addressable::IDNA.unicode_normalize_kc(k)] =
909
- Addressable::IDNA.unicode_normalize_kc(v)
904
+ acc[normalize_value(k)] = normalize_value(v)
910
905
  acc
911
906
  }
912
907
  else
913
- value = Addressable::IDNA.unicode_normalize_kc(value)
908
+ value = value.to_s if !value.kind_of?(String)
909
+ if value.encoding != Encoding::UTF_8
910
+ value = value.dup.force_encoding(Encoding::UTF_8)
911
+ end
912
+ value = value.unicode_normalize(:nfc)
914
913
  end
915
914
  value
916
915
  end