addressable 2.8.0 → 2.8.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 03a21b1eab156a16e90bd7963af85980edfbddc8f3dbe052766303dba76cc000
4
- data.tar.gz: 03eca5d86f4c70f9320000f36e3cff4fd8023342a4e0ac855d0ef1ec89ee6183
3
+ metadata.gz: 4023555b4bb6c374726bb9647a29d30943c637115619670a09632fe7a4e28765
4
+ data.tar.gz: 95ad31a9e1dd01f2bc9a7c097eb452f07ed52be16dcd2ad4946652648dd7808c
5
5
  SHA512:
6
- metadata.gz: d504f9475ad823f5bb077b9c039a2c91c83e52c20896247a7289b61725c61b1ddefe8ae06155fb018fc67087cf04276081b42105a18394b45e2374ad0b2fadb0
7
- data.tar.gz: b81766fbcb9335d5ca94403b62d3b2a6fae31b66cd3c05f48e1885eaf07883bfa1321b6930271fe1415135aec687af51312a26ce27bd4b83b2ac6424dec597c9
6
+ metadata.gz: d7dd6eed173bc4a06a3abb74f6fdbf501ee12b3fcb2828c0f21db98d1779175bce4dc900999269db24cc8647c3148b06cf1d9c768c97c7591a48f1e43751bb7b
7
+ data.tar.gz: 3bbe976f23a4c3a6776a44d6ff2147b9ab87418162af57b2f278089528416eb3d564b114f96f165946d9126ee68d5da86c0ac01a9e2fa2792547bb312f40f046
data/CHANGELOG.md CHANGED
@@ -1,4 +1,54 @@
1
- # Addressable 2.8.0
1
+ # Addressable 2.8.6 <a name="v2.8.6">
2
+ - Memoize regexps for common character classes ([#524])
3
+
4
+ [#524]: https://github.com/sporkmonger/addressable/pull/524
5
+
6
+ # Addressable 2.8.5 <a name="v2.8.5">
7
+ - Fix thread safety issue with encoding tables ([#515])
8
+ - Define URI::NONE as a module to avoid serialization issues ([#509])
9
+ - Fix YAML serialization ([#508])
10
+
11
+ [#508]: https://github.com/sporkmonger/addressable/pull/508
12
+ [#509]: https://github.com/sporkmonger/addressable/pull/509
13
+ [#515]: https://github.com/sporkmonger/addressable/pull/515
14
+
15
+ # Addressable 2.8.4 <a name="v2.8.4">
16
+ - Restore `Addressable::IDNA.unicode_normalize_kc` as a deprecated method ([#504])
17
+
18
+ [#504]: https://github.com/sporkmonger/addressable/pull/504
19
+
20
+ # Addressable 2.8.3 <a name="v2.8.3">
21
+ - Fix template expand level 2 hash support for non-string objects ([#499], [#498])
22
+
23
+ [#499]: https://github.com/sporkmonger/addressable/pull/499
24
+ [#498]: https://github.com/sporkmonger/addressable/pull/498
25
+
26
+ # Addressable 2.8.2 <a name="v2.8.2">
27
+ - Improve cache hits and JIT friendliness ([#486](https://github.com/sporkmonger/addressable/pull/486))
28
+ - Improve code style and test coverage ([#482](https://github.com/sporkmonger/addressable/pull/482))
29
+ - Ensure reset of deferred validation ([#481](https://github.com/sporkmonger/addressable/pull/481))
30
+ - Resolve normalization differences between `IDNA::Native` and `IDNA::Pure` ([#408](https://github.com/sporkmonger/addressable/issues/408), [#492])
31
+ - Remove redundant colon in `Addressable::URI::CharacterClasses::AUTHORITY` regex ([#438](https://github.com/sporkmonger/addressable/pull/438)) (accidentally reverted by [#449] merge but [added back](https://github.com/sporkmonger/addressable/pull/492#discussion_r1105125280) in [#492])
32
+
33
+ [#492]: https://github.com/sporkmonger/addressable/pull/492
34
+
35
+ # Addressable 2.8.1 <a name="v2.8.1">
36
+ - refactor `Addressable::URI.normalize_path` to address linter offenses ([#430](https://github.com/sporkmonger/addressable/pull/430))
37
+ - update gemspec to reflect supported Ruby versions ([#466], [#464], [#463])
38
+ - compatibility w/ public_suffix 5.x ([#466], [#465], [#460])
39
+ - fixes "invalid byte sequence in UTF-8" exception when unencoding URLs containing non UTF-8 characters ([#459](https://github.com/sporkmonger/addressable/pull/459))
40
+ - `Ractor` compatibility ([#449])
41
+ - use the whole string instead of a single line for template match ([#431](https://github.com/sporkmonger/addressable/pull/431))
42
+ - force UTF-8 encoding only if needed ([#341](https://github.com/sporkmonger/addressable/pull/341))
43
+
44
+ [#449]: https://github.com/sporkmonger/addressable/pull/449
45
+ [#460]: https://github.com/sporkmonger/addressable/pull/460
46
+ [#463]: https://github.com/sporkmonger/addressable/pull/463
47
+ [#464]: https://github.com/sporkmonger/addressable/pull/464
48
+ [#465]: https://github.com/sporkmonger/addressable/pull/465
49
+ [#466]: https://github.com/sporkmonger/addressable/pull/466
50
+
51
+ # Addressable 2.8.0 <a name="v2.8.0">
2
52
  - fixes ReDoS vulnerability in Addressable::Template#match
3
53
  - no longer replaces `+` with spaces in queries for non-http(s) schemes
4
54
  - fixed encoding ipv6 literals
@@ -10,14 +60,14 @@
10
60
  - performance improvements
11
61
  - switch CI/CD to GitHub Actions
12
62
 
13
- # Addressable 2.7.0
63
+ # Addressable 2.7.0 <a name="v2.7.0">
14
64
  - added `:compacted` flag to `normalized_query`
15
65
  - `heuristic_parse` handles `mailto:` more intuitively
16
66
  - dropped explicit support for JRuby 9.0.5.0
17
67
  - compatibility w/ public_suffix 4.x
18
68
  - performance improvements
19
69
 
20
- # Addressable 2.6.0
70
+ # Addressable 2.6.0 <a name="v2.6.0">
21
71
  - added `tld=` method to allow assignment to the public suffix
22
72
  - most `heuristic_parse` patterns are now case-insensitive
23
73
  - `heuristic_parse` handles more `file://` URI variations
@@ -28,17 +78,17 @@
28
78
  - minor performance improvements in regexps
29
79
  - fixes to eliminate warnings
30
80
 
31
- # Addressable 2.5.2
81
+ # Addressable 2.5.2 <a name="v2.5.2">
32
82
  - better support for frozen string literals
33
83
  - fixed bug w/ uppercase characters in scheme
34
84
  - IDNA errors w/ emoji URLs
35
85
  - compatibility w/ public_suffix 3.x
36
86
 
37
- # Addressable 2.5.1
87
+ # Addressable 2.5.1 <a name="v2.5.1">
38
88
  - allow unicode normalization to be disabled for URI Template expansion
39
89
  - removed duplicate test
40
90
 
41
- # Addressable 2.5.0
91
+ # Addressable 2.5.0 <a name="v2.5.0">
42
92
  - dropping support for Ruby 1.9
43
93
  - adding support for Ruby 2.4 preview
44
94
  - add support for public suffixes and tld; first runtime dependency
@@ -52,7 +102,7 @@
52
102
  - host parts longer than 63 bytes will be ignored and not passed to libidn
53
103
  - normalized values always encoded as UTF-8
54
104
 
55
- # Addressable 2.4.0
105
+ # Addressable 2.4.0 <a name="v2.4.0">
56
106
  - support for 1.8.x dropped
57
107
  - double quotes in a host now raises an error
58
108
  - newlines in host will no longer get unescaped during normalization
@@ -64,17 +114,17 @@
64
114
  - fixed minor bug where an exception would be thrown for a missing ACE suffix
65
115
  - better partial expansion of URI templates
66
116
 
67
- # Addressable 2.3.8
117
+ # Addressable 2.3.8 <a name="v2.3.8">
68
118
  - fix warnings
69
119
  - update dependency gems
70
120
  - support for 1.8.x officially deprecated
71
121
 
72
- # Addressable 2.3.7
122
+ # Addressable 2.3.7 <a name="v2.3.7">
73
123
  - fix scenario in which invalid URIs don't get an exception until inspected
74
124
  - handle hostnames with two adjacent periods correctly
75
125
  - upgrade of RSpec
76
126
 
77
- # Addressable 2.3.6
127
+ # Addressable 2.3.6 <a name="v2.3.6">
78
128
  - normalization drops empty query string
79
129
  - better handling in template extract for missing values
80
130
  - template modifier for `'?'` now treated as optional
@@ -83,19 +133,19 @@
83
133
  - added `:sorted` option to normalization of query strings
84
134
  - fixed issue with normalization of hosts given in `'example.com.'` form
85
135
 
86
- # Addressable 2.3.5
136
+ # Addressable 2.3.5 <a name="v2.3.5">
87
137
  - added Addressable::URI#empty? method
88
138
  - Addressable::URI#hostname methods now strip square brackets from IPv6 hosts
89
139
  - compatibility with Net::HTTP in Ruby 2.0.0
90
140
  - Addressable::URI#route_from should always give relative URIs
91
141
 
92
- # Addressable 2.3.4
142
+ # Addressable 2.3.4 <a name="v2.3.4">
93
143
  - fixed issue with encoding altering its inputs
94
144
  - query string normalization now leaves ';' characters alone
95
145
  - FakeFS is detected before attempting to load unicode tables
96
146
  - additional testing to ensure frozen objects don't cause problems
97
147
 
98
- # Addressable 2.3.3
148
+ # Addressable 2.3.3 <a name="v2.3.3">
99
149
  - fixed issue with converting common primitives during template expansion
100
150
  - fixed port encoding issue
101
151
  - removed a few warnings
@@ -104,59 +154,59 @@
104
154
  - no template match should now result in nil instead of an empty MatchData
105
155
  - added license information to gemspec
106
156
 
107
- # Addressable 2.3.2
157
+ # Addressable 2.3.2 <a name="v2.3.2">
108
158
  - added Addressable::URI#default_port method
109
159
  - fixed issue with Marshalling Unicode data on Windows
110
160
  - improved heuristic parsing to better handle IPv4 addresses
111
161
 
112
- # Addressable 2.3.1
162
+ # Addressable 2.3.1 <a name="v2.3.1">
113
163
  - fixed missing unicode data file
114
164
 
115
- # Addressable 2.3.0
165
+ # Addressable 2.3.0 <a name="v2.3.0">
116
166
  - updated Addressable::Template to use RFC 6570, level 4
117
167
  - fixed compatibility problems with some versions of Ruby
118
168
  - moved unicode tables into a data file for performance reasons
119
169
  - removing support for multiple query value notations
120
170
 
121
- # Addressable 2.2.8
171
+ # Addressable 2.2.8 <a name="v2.2.8">
122
172
  - fixed issues with dot segment removal code
123
173
  - form encoding can now handle multiple values per key
124
174
  - updated development environment
125
175
 
126
- # Addressable 2.2.7
176
+ # Addressable 2.2.7 <a name="v2.2.7">
127
177
  - fixed issues related to Addressable::URI#query_values=
128
178
  - the Addressable::URI.parse method is now polymorphic
129
179
 
130
- # Addressable 2.2.6
180
+ # Addressable 2.2.6 <a name="v2.2.6">
131
181
  - changed the way ambiguous paths are handled
132
182
  - fixed bug with frozen URIs
133
183
  - https supported in heuristic parsing
134
184
 
135
- # Addressable 2.2.5
185
+ # Addressable 2.2.5 <a name="v2.2.5">
136
186
  - 'parsing' a pre-parsed URI object is now a dup operation
137
187
  - introduced conditional support for libidn
138
188
  - fixed normalization issue on ampersands in query strings
139
189
  - added additional tests around handling of query strings
140
190
 
141
- # Addressable 2.2.4
191
+ # Addressable 2.2.4 <a name="v2.2.4">
142
192
  - added origin support from draft-ietf-websec-origin-00
143
193
  - resolved issue with attempting to navigate below root
144
194
  - fixed bug with string splitting in query strings
145
195
 
146
- # Addressable 2.2.3
196
+ # Addressable 2.2.3 <a name="v2.2.3">
147
197
  - added :flat_array notation for query strings
148
198
 
149
- # Addressable 2.2.2
199
+ # Addressable 2.2.2 <a name="v2.2.2">
150
200
  - fixed issue with percent escaping of '+' character in query strings
151
201
 
152
- # Addressable 2.2.1
202
+ # Addressable 2.2.1 <a name="v2.2.1">
153
203
  - added support for application/x-www-form-urlencoded.
154
204
 
155
- # Addressable 2.2.0
205
+ # Addressable 2.2.0 <a name="v2.2.0">
156
206
  - added site methods
157
207
  - improved documentation
158
208
 
159
- # Addressable 2.1.2
209
+ # Addressable 2.1.2 <a name="v2.1.2">
160
210
  - added HTTP request URI methods
161
211
  - better handling of Windows file paths
162
212
  - validation_deferred boolean replaced with defer_validation block
@@ -164,14 +214,14 @@
164
214
  - fixed issue with constructing URIs with relative paths
165
215
  - fixed warnings
166
216
 
167
- # Addressable 2.1.1
217
+ # Addressable 2.1.1 <a name="v2.1.1">
168
218
  - more type checking changes
169
219
  - fixed issue with unicode normalization
170
220
  - added method to find template defaults
171
221
  - symbolic keys are now allowed in template mappings
172
222
  - numeric values and symbolic values are now allowed in template mappings
173
223
 
174
- # Addressable 2.1.0
224
+ # Addressable 2.1.0 <a name="v2.1.0">
175
225
  - refactored URI template support out into its own class
176
226
  - removed extract method due to being useless and unreliable
177
227
  - removed Addressable::URI.expand_template
@@ -185,15 +235,15 @@
185
235
  - worked around issue with freezing URIs
186
236
  - improved specs
187
237
 
188
- # Addressable 2.0.2
238
+ # Addressable 2.0.2 <a name="v2.0.2">
189
239
  - fixed issue with URI template expansion
190
240
  - fixed issue with percent escaping characters 0-15
191
241
 
192
- # Addressable 2.0.1
242
+ # Addressable 2.0.1 <a name="v2.0.1">
193
243
  - fixed issue with query string assignment
194
244
  - fixed issue with improperly encoded components
195
245
 
196
- # Addressable 2.0.0
246
+ # Addressable 2.0.0 <a name="v2.0.0">
197
247
  - the initialize method now takes an options hash as its only parameter
198
248
  - added query_values method to URI class
199
249
  - completely replaced IDNA implementation with pure Ruby
@@ -208,20 +258,20 @@
208
258
  - updated URI Template code to match v 03 of the draft spec
209
259
  - added a bunch of new specifications
210
260
 
211
- # Addressable 1.0.4
261
+ # Addressable 1.0.4 <a name="v1.0.4">
212
262
  - switched to using RSpec's pending system for specs that rely on IDN
213
263
  - fixed issue with creating URIs with paths that are not prefixed with '/'
214
264
 
215
- # Addressable 1.0.3
265
+ # Addressable 1.0.3 <a name="v1.0.3">
216
266
  - implemented a hash method
217
267
 
218
- # Addressable 1.0.2
268
+ # Addressable 1.0.2 <a name="v1.0.2">
219
269
  - fixed minor bug with the extract_mapping method
220
270
 
221
- # Addressable 1.0.1
271
+ # Addressable 1.0.1 <a name="v1.0.1">
222
272
  - fixed minor bug with the extract_mapping method
223
273
 
224
- # Addressable 1.0.0
274
+ # Addressable 1.0.0 <a name="v1.0.0">
225
275
  - heuristic parse method added
226
276
  - parsing is slightly more strict
227
277
  - replaced to_h with to_hash
@@ -230,16 +280,16 @@
230
280
  - improved heckle rake task
231
281
  - no surviving heckle mutations
232
282
 
233
- # Addressable 0.1.2
283
+ # Addressable 0.1.2 <a name="v0.1.2">
234
284
  - improved normalization
235
285
  - fixed bug in joining algorithm
236
286
  - updated specifications
237
287
 
238
- # Addressable 0.1.1
288
+ # Addressable 0.1.1 <a name="v0.1.1">
239
289
  - updated documentation
240
290
  - added URI Template variable extraction
241
291
 
242
- # Addressable 0.1.0
292
+ # Addressable 0.1.0 <a name="v0.1.0">
243
293
  - initial release
244
294
  - implementation based on RFC 3986, 3987
245
295
  - support for IRIs via libidn
data/Gemfile CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  source 'https://rubygems.org'
4
4
 
5
- gemspec(path: __FILE__ == "(eval)" ? ".." : ".")
5
+ gemspec
6
6
 
7
7
  group :test do
8
8
  gem 'rspec', '~> 3.8'
@@ -25,4 +25,6 @@ group :test, :development do
25
25
  gem "rake", ">= 12.3.3"
26
26
  end
27
27
 
28
- gem "idn-ruby", platform: :mri
28
+ unless ENV["IDNA_MODE"] == "pure"
29
+ gem "idn-ruby", platform: :mri
30
+ end
data/Rakefile CHANGED
@@ -20,11 +20,17 @@ additionally provides extensive support for IRIs and URI templates.
20
20
  TEXT
21
21
 
22
22
  PKG_FILES = FileList[
23
- "lib/**/*", "spec/**/*", "vendor/**/*", "data/**/*",
24
- "tasks/**/*",
25
- "[A-Z]*", "Rakefile"
26
- ].exclude(/pkg/).exclude(/database\.yml/).
27
- exclude(/Gemfile\.lock/).exclude(/[_\.]git$/)
23
+ "data/**/*",
24
+ "lib/**/*.rb",
25
+ "spec/**/*.rb",
26
+ "tasks/**/*.rake",
27
+ "addressable.gemspec",
28
+ "CHANGELOG.md",
29
+ "Gemfile",
30
+ "LICENSE.txt",
31
+ "README.md",
32
+ "Rakefile",
33
+ ]
28
34
 
29
35
  task :default => "spec"
30
36
 
data/addressable.gemspec CHANGED
@@ -1,14 +1,15 @@
1
1
  # -*- encoding: utf-8 -*-
2
- # stub: addressable 2.8.0 ruby lib
2
+ # stub: addressable 2.8.6 ruby lib
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "addressable".freeze
6
- s.version = "2.8.0"
6
+ s.version = "2.8.6".freeze
7
7
 
8
8
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
9
+ s.metadata = { "changelog_uri" => "https://github.com/sporkmonger/addressable/blob/main/CHANGELOG.md#v2.8.6" } if s.respond_to? :metadata=
9
10
  s.require_paths = ["lib".freeze]
10
11
  s.authors = ["Bob Aman".freeze]
11
- s.date = "2021-07-03"
12
+ s.date = "2023-12-09"
12
13
  s.description = "Addressable is an alternative implementation to the URI implementation that is\npart of Ruby's standard library. It is flexible, offers heuristic parsing, and\nadditionally provides extensive support for IRIs and URI templates.\n".freeze
13
14
  s.email = "bob@sporkmonger.com".freeze
14
15
  s.extra_rdoc_files = ["README.md".freeze]
@@ -16,22 +17,12 @@ Gem::Specification.new do |s|
16
17
  s.homepage = "https://github.com/sporkmonger/addressable".freeze
17
18
  s.licenses = ["Apache-2.0".freeze]
18
19
  s.rdoc_options = ["--main".freeze, "README.md".freeze]
19
- s.required_ruby_version = Gem::Requirement.new(">= 2.0".freeze)
20
- s.rubygems_version = "3.0.3".freeze
20
+ s.required_ruby_version = Gem::Requirement.new(">= 2.2".freeze)
21
+ s.rubygems_version = "3.4.22".freeze
21
22
  s.summary = "URI Implementation".freeze
22
23
 
23
- if s.respond_to? :specification_version then
24
- s.specification_version = 4
24
+ s.specification_version = 4
25
25
 
26
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
27
- s.add_runtime_dependency(%q<public_suffix>.freeze, [">= 2.0.2", "< 5.0"])
28
- s.add_development_dependency(%q<bundler>.freeze, [">= 1.0", "< 3.0"])
29
- else
30
- s.add_dependency(%q<public_suffix>.freeze, [">= 2.0.2", "< 5.0"])
31
- s.add_dependency(%q<bundler>.freeze, [">= 1.0", "< 3.0"])
32
- end
33
- else
34
- s.add_dependency(%q<public_suffix>.freeze, [">= 2.0.2", "< 5.0"])
35
- s.add_dependency(%q<bundler>.freeze, [">= 1.0", "< 3.0"])
36
- end
26
+ s.add_runtime_dependency(%q<public_suffix>.freeze, [">= 2.0.2".freeze, "< 6.0".freeze])
27
+ s.add_development_dependency(%q<bundler>.freeze, [">= 1.0".freeze, "< 3.0".freeze])
37
28
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # encoding:utf-8
4
3
  #--
5
4
  # Copyright (C) Bob Aman
6
5
  #
@@ -30,8 +29,14 @@ module Addressable
30
29
  IDN::Punycode.decode(value.to_s)
31
30
  end
32
31
 
33
- def self.unicode_normalize_kc(value)
34
- IDN::Stringprep.nfkc_normalize(value.to_s)
32
+ class << self
33
+ # @deprecated Use {String#unicode_normalize(:nfkc)} instead
34
+ def unicode_normalize_kc(value)
35
+ value.to_s.unicode_normalize(:nfkc)
36
+ end
37
+
38
+ extend Gem::Deprecate
39
+ deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4
35
40
  end
36
41
 
37
42
  def self.to_ascii(value)
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # encoding:utf-8
4
3
  #--
5
4
  # Copyright (C) Bob Aman
6
5
  #
@@ -67,7 +66,7 @@ module Addressable
67
66
  # domain name as described in RFC 3490.
68
67
  def self.to_ascii(input)
69
68
  input = input.to_s unless input.is_a?(String)
70
- input = input.dup
69
+ input = input.dup.force_encoding(Encoding::UTF_8).unicode_normalize(:nfkc)
71
70
  if input.respond_to?(:force_encoding)
72
71
  input.force_encoding(Encoding::ASCII_8BIT)
73
72
  end
@@ -78,7 +77,7 @@ module Addressable
78
77
  part.force_encoding(Encoding::ASCII_8BIT)
79
78
  end
80
79
  if part =~ UTF8_REGEX && part =~ UTF8_REGEX_MULTIBYTE
81
- ACE_PREFIX + punycode_encode(unicode_normalize_kc(part))
80
+ ACE_PREFIX + punycode_encode(part)
82
81
  else
83
82
  part
84
83
  end
@@ -113,13 +112,14 @@ module Addressable
113
112
  output
114
113
  end
115
114
 
116
- # Unicode normalization form KC.
117
- def self.unicode_normalize_kc(input)
118
- input = input.to_s unless input.is_a?(String)
119
- unpacked = input.unpack("U*")
120
- unpacked =
121
- unicode_compose(unicode_sort_canonical(unicode_decompose(unpacked)))
122
- return unpacked.pack("U*")
115
+ class << self
116
+ # @deprecated Use {String#unicode_normalize(:nfkc)} instead
117
+ def unicode_normalize_kc(value)
118
+ value.to_s.unicode_normalize(:nfkc)
119
+ end
120
+
121
+ extend Gem::Deprecate
122
+ deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4
123
123
  end
124
124
 
125
125
  ##
@@ -137,164 +137,6 @@ module Addressable
137
137
  end
138
138
  private_class_method :unicode_downcase
139
139
 
140
- def self.unicode_compose(unpacked)
141
- unpacked_result = []
142
- length = unpacked.length
143
-
144
- return unpacked if length == 0
145
-
146
- starter = unpacked[0]
147
- starter_cc = lookup_unicode_combining_class(starter)
148
- starter_cc = 256 if starter_cc != 0
149
- for i in 1...length
150
- ch = unpacked[i]
151
-
152
- if (starter_cc == 0 &&
153
- (composite = unicode_compose_pair(starter, ch)) != nil)
154
- starter = composite
155
- else
156
- unpacked_result << starter
157
- starter = ch
158
- end
159
- end
160
- unpacked_result << starter
161
- return unpacked_result
162
- end
163
- private_class_method :unicode_compose
164
-
165
- def self.unicode_compose_pair(ch_one, ch_two)
166
- if ch_one >= HANGUL_LBASE && ch_one < HANGUL_LBASE + HANGUL_LCOUNT &&
167
- ch_two >= HANGUL_VBASE && ch_two < HANGUL_VBASE + HANGUL_VCOUNT
168
- # Hangul L + V
169
- return HANGUL_SBASE + (
170
- (ch_one - HANGUL_LBASE) * HANGUL_VCOUNT + (ch_two - HANGUL_VBASE)
171
- ) * HANGUL_TCOUNT
172
- elsif ch_one >= HANGUL_SBASE &&
173
- ch_one < HANGUL_SBASE + HANGUL_SCOUNT &&
174
- (ch_one - HANGUL_SBASE) % HANGUL_TCOUNT == 0 &&
175
- ch_two >= HANGUL_TBASE && ch_two < HANGUL_TBASE + HANGUL_TCOUNT
176
- # Hangul LV + T
177
- return ch_one + (ch_two - HANGUL_TBASE)
178
- end
179
-
180
- p = []
181
-
182
- ucs4_to_utf8(ch_one, p)
183
- ucs4_to_utf8(ch_two, p)
184
-
185
- return lookup_unicode_composition(p)
186
- end
187
- private_class_method :unicode_compose_pair
188
-
189
- def self.ucs4_to_utf8(char, buffer)
190
- if char < 128
191
- buffer << char
192
- elsif char < 2048
193
- buffer << (char >> 6 | 192)
194
- buffer << (char & 63 | 128)
195
- elsif char < 0x10000
196
- buffer << (char >> 12 | 224)
197
- buffer << (char >> 6 & 63 | 128)
198
- buffer << (char & 63 | 128)
199
- elsif char < 0x200000
200
- buffer << (char >> 18 | 240)
201
- buffer << (char >> 12 & 63 | 128)
202
- buffer << (char >> 6 & 63 | 128)
203
- buffer << (char & 63 | 128)
204
- elsif char < 0x4000000
205
- buffer << (char >> 24 | 248)
206
- buffer << (char >> 18 & 63 | 128)
207
- buffer << (char >> 12 & 63 | 128)
208
- buffer << (char >> 6 & 63 | 128)
209
- buffer << (char & 63 | 128)
210
- elsif char < 0x80000000
211
- buffer << (char >> 30 | 252)
212
- buffer << (char >> 24 & 63 | 128)
213
- buffer << (char >> 18 & 63 | 128)
214
- buffer << (char >> 12 & 63 | 128)
215
- buffer << (char >> 6 & 63 | 128)
216
- buffer << (char & 63 | 128)
217
- end
218
- end
219
- private_class_method :ucs4_to_utf8
220
-
221
- def self.unicode_sort_canonical(unpacked)
222
- unpacked = unpacked.dup
223
- i = 1
224
- length = unpacked.length
225
-
226
- return unpacked if length < 2
227
-
228
- while i < length
229
- last = unpacked[i-1]
230
- ch = unpacked[i]
231
- last_cc = lookup_unicode_combining_class(last)
232
- cc = lookup_unicode_combining_class(ch)
233
- if cc != 0 && last_cc != 0 && last_cc > cc
234
- unpacked[i] = last
235
- unpacked[i-1] = ch
236
- i -= 1 if i > 1
237
- else
238
- i += 1
239
- end
240
- end
241
- return unpacked
242
- end
243
- private_class_method :unicode_sort_canonical
244
-
245
- def self.unicode_decompose(unpacked)
246
- unpacked_result = []
247
- for cp in unpacked
248
- if cp >= HANGUL_SBASE && cp < HANGUL_SBASE + HANGUL_SCOUNT
249
- l, v, t = unicode_decompose_hangul(cp)
250
- unpacked_result << l
251
- unpacked_result << v if v
252
- unpacked_result << t if t
253
- else
254
- dc = lookup_unicode_compatibility(cp)
255
- unless dc
256
- unpacked_result << cp
257
- else
258
- unpacked_result.concat(unicode_decompose(dc.unpack("U*")))
259
- end
260
- end
261
- end
262
- return unpacked_result
263
- end
264
- private_class_method :unicode_decompose
265
-
266
- def self.unicode_decompose_hangul(codepoint)
267
- sindex = codepoint - HANGUL_SBASE;
268
- if sindex < 0 || sindex >= HANGUL_SCOUNT
269
- l = codepoint
270
- v = t = nil
271
- return l, v, t
272
- end
273
- l = HANGUL_LBASE + sindex / HANGUL_NCOUNT
274
- v = HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
275
- t = HANGUL_TBASE + sindex % HANGUL_TCOUNT
276
- if t == HANGUL_TBASE
277
- t = nil
278
- end
279
- return l, v, t
280
- end
281
- private_class_method :unicode_decompose_hangul
282
-
283
- def self.lookup_unicode_combining_class(codepoint)
284
- codepoint_data = UNICODE_DATA[codepoint]
285
- (codepoint_data ?
286
- (codepoint_data[UNICODE_DATA_COMBINING_CLASS] || 0) :
287
- 0)
288
- end
289
- private_class_method :lookup_unicode_combining_class
290
-
291
- def self.lookup_unicode_compatibility(codepoint)
292
- codepoint_data = UNICODE_DATA[codepoint]
293
- (codepoint_data ?
294
- codepoint_data[UNICODE_DATA_COMPATIBILITY] : nil)
295
- end
296
- private_class_method :lookup_unicode_compatibility
297
-
298
140
  def self.lookup_unicode_lowercase(codepoint)
299
141
  codepoint_data = UNICODE_DATA[codepoint]
300
142
  (codepoint_data ?
@@ -303,21 +145,6 @@ module Addressable
303
145
  end
304
146
  private_class_method :lookup_unicode_lowercase
305
147
 
306
- def self.lookup_unicode_composition(unpacked)
307
- return COMPOSITION_TABLE[unpacked]
308
- end
309
- private_class_method :lookup_unicode_composition
310
-
311
- HANGUL_SBASE = 0xac00
312
- HANGUL_LBASE = 0x1100
313
- HANGUL_LCOUNT = 19
314
- HANGUL_VBASE = 0x1161
315
- HANGUL_VCOUNT = 21
316
- HANGUL_TBASE = 0x11a7
317
- HANGUL_TCOUNT = 28
318
- HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT # 588
319
- HANGUL_SCOUNT = HANGUL_LCOUNT * HANGUL_NCOUNT # 11172
320
-
321
148
  UNICODE_DATA_COMBINING_CLASS = 0
322
149
  UNICODE_DATA_EXCLUSION = 1
323
150
  UNICODE_DATA_CANONICAL = 2
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # encoding:utf-8
4
3
  #--
5
4
  # Copyright (C) Bob Aman
6
5
  #