uri-whatwg_parser 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -3
- data/lib/uri/whatwg_parser/host_parser.rb +118 -17
- data/lib/uri/whatwg_parser/version.rb +1 -1
- data/lib/uri/whatwg_parser.rb +14 -13
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 712c7079461310fdab82e79bea4491ce91a4a8b26827a338334a0e161520a8c0
|
|
4
|
+
data.tar.gz: 85871841fdee54a61c942134398bfb85e2e89f798144921464058e4edd2ea8fb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 42b046952ea2817c3db80edcc69bafbf86822c9b88f23837b4decb9d9693b0044f68b9a74c920707e16ee28121f2c03d6992df300227b32cf62a3995e7af7ee5
|
|
7
|
+
data.tar.gz: aa9c3d27934eec97f11f579cc2b194b5b88f0d69da5c224cb4ebf6fd13878c8a493a7e3391f7f985674e602629da303364aa49c7ed9b88f0b101b66c304db6d2
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -2,9 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Ruby implementation of the [WHATWG URL Living Standard](https://url.spec.whatwg.org/).
|
|
4
4
|
|
|
5
|
-
The latest revision that this package implements of the standard is ([
|
|
6
|
-
|
|
7
|
-
NOTE: Some features haven't been implemented yet. Please see the TODO for details.
|
|
5
|
+
The latest revision that this package implements of the standard is ([18 August 2025](https://url.spec.whatwg.org/commit-snapshots/05a5d834deba31622390ee05a3dcbc22496b7bb5/)).
|
|
8
6
|
|
|
9
7
|
## Installation
|
|
10
8
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "uri/idna"
|
|
4
|
-
require "ipaddr"
|
|
5
4
|
require_relative "parser_helper"
|
|
6
5
|
|
|
7
6
|
class URI::WhatwgParser
|
|
@@ -66,26 +65,128 @@ class URI::WhatwgParser
|
|
|
66
65
|
output.join(".")
|
|
67
66
|
end
|
|
68
67
|
|
|
69
|
-
def parse_ipv6(
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
68
|
+
def parse_ipv6(input)
|
|
69
|
+
input = input[1..-2] if input.start_with?("[") && input.end_with?("]")
|
|
70
|
+
address = Array.new(8, 0)
|
|
71
|
+
piece_index = 0
|
|
72
|
+
compress = nil
|
|
73
|
+
chars = input.chars
|
|
74
|
+
i = 0
|
|
75
|
+
|
|
76
|
+
if chars[i] == ":"
|
|
77
|
+
raise ParseError, "invalid IPv6 format" unless chars[i + 1] == ":"
|
|
78
|
+
i += 2
|
|
79
|
+
compress = piece_index
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
while i < chars.length
|
|
83
|
+
raise ParseError, "invalid IPv6 format" if piece_index == 8
|
|
84
|
+
|
|
85
|
+
if chars[i] == ":"
|
|
86
|
+
raise ParseError, "invalid IPv6 format" if compress
|
|
87
|
+
i += 1
|
|
88
|
+
compress = piece_index
|
|
89
|
+
next
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
value = 0
|
|
93
|
+
length = 0
|
|
94
|
+
while length < 4 && i < chars.length && chars[i].match?(/[0-9A-Fa-f]/)
|
|
95
|
+
value = value * 16 + chars[i].to_i(16)
|
|
96
|
+
i += 1
|
|
97
|
+
length += 1
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
if chars[i] == "."
|
|
101
|
+
# IPv4-mapped address must be valid and complete, no trailing dot
|
|
102
|
+
ipv4_piece = chars[i - length, chars.length - (i - length)].join
|
|
103
|
+
parts = ipv4_piece.split(".")
|
|
104
|
+
if parts.length != 4 || parts.any? { |p| p.empty? } || ipv4_piece.end_with?(".")
|
|
105
|
+
raise ParseError, "invalid IPv6 format"
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
ipv4 = parse_ipv4(ipv4_piece)
|
|
109
|
+
address[piece_index] = (ipv4 >> 16) & 0xFFFF
|
|
110
|
+
address[piece_index + 1] = ipv4 & 0xFFFF
|
|
111
|
+
piece_index += 2
|
|
112
|
+
i = chars.length
|
|
113
|
+
break
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
raise ParseError, "invalid IPv6 format" if length == 0
|
|
117
|
+
|
|
118
|
+
address[piece_index] = value
|
|
119
|
+
piece_index += 1
|
|
120
|
+
|
|
121
|
+
if i < chars.length
|
|
122
|
+
if chars[i] == ":"
|
|
123
|
+
i += 1
|
|
124
|
+
elsif chars[i] != nil
|
|
125
|
+
raise ParseError, "invalid IPv6 format"
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
if compress
|
|
131
|
+
swaps = piece_index - compress
|
|
132
|
+
(0...swaps).each do |j|
|
|
133
|
+
address[7 - j] = address[compress + swaps - 1 - j]
|
|
134
|
+
address[compress + swaps - 1 - j] = 0
|
|
135
|
+
end
|
|
136
|
+
elsif piece_index != 8
|
|
137
|
+
raise ParseError, "invalid IPv6 format"
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
compress_ipv6(address)
|
|
77
141
|
end
|
|
78
142
|
|
|
79
|
-
def compress_ipv6(
|
|
80
|
-
|
|
81
|
-
|
|
143
|
+
def compress_ipv6(address)
|
|
144
|
+
# Find the longest run of zeros for '::' compression
|
|
145
|
+
best_base = nil
|
|
146
|
+
best_len = 0
|
|
147
|
+
base = nil
|
|
148
|
+
len = 0
|
|
149
|
+
|
|
150
|
+
8.times do |idx|
|
|
151
|
+
if address[idx] == 0
|
|
152
|
+
base = idx if base.nil?
|
|
153
|
+
len += 1
|
|
154
|
+
else
|
|
155
|
+
if len > best_len
|
|
156
|
+
best_base = base
|
|
157
|
+
best_len = len
|
|
158
|
+
end
|
|
159
|
+
base = nil
|
|
160
|
+
len = 0
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
if len > best_len
|
|
165
|
+
best_base = base
|
|
166
|
+
best_len = len
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Only compress if the run is at least two 0s
|
|
170
|
+
if best_len < 2
|
|
171
|
+
best_base = nil
|
|
172
|
+
end
|
|
82
173
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
174
|
+
# Build the string with '::' for the longest zero run
|
|
175
|
+
result = []
|
|
176
|
+
idx = 0
|
|
177
|
+
while idx < 8
|
|
178
|
+
if best_base == idx
|
|
179
|
+
result << "" if idx == 0
|
|
180
|
+
result << ""
|
|
181
|
+
idx += best_len
|
|
182
|
+
result << "" if idx == 8
|
|
183
|
+
next
|
|
184
|
+
end
|
|
185
|
+
result << address[idx].to_s(16)
|
|
186
|
+
idx += 1
|
|
86
187
|
end
|
|
87
188
|
|
|
88
|
-
|
|
189
|
+
"[#{result.join(":").gsub(/:{3,}/, "::")}]"
|
|
89
190
|
end
|
|
90
191
|
|
|
91
192
|
def parse_opaque_host(host)
|
|
@@ -126,7 +227,7 @@ class URI::WhatwgParser
|
|
|
126
227
|
validation_error = false
|
|
127
228
|
r = 10
|
|
128
229
|
|
|
129
|
-
if str.size >= 2 &&
|
|
230
|
+
if str.size >= 2 && str.start_with?("0x", "0X")
|
|
130
231
|
validation_error = true
|
|
131
232
|
str = str[2..-1]
|
|
132
233
|
r = 16
|
data/lib/uri/whatwg_parser.rb
CHANGED
|
@@ -126,22 +126,22 @@ module URI
|
|
|
126
126
|
@state = :path_or_authority_state
|
|
127
127
|
@pos += 1
|
|
128
128
|
else
|
|
129
|
-
@parse_result[:
|
|
129
|
+
@parse_result[:opaque] = ""
|
|
130
130
|
@state = :opaque_path_state
|
|
131
131
|
end
|
|
132
132
|
else
|
|
133
133
|
@buffer.clear
|
|
134
|
-
@pos
|
|
134
|
+
@pos = -1
|
|
135
135
|
@state = :no_scheme_state
|
|
136
136
|
end
|
|
137
137
|
end
|
|
138
138
|
|
|
139
139
|
def no_scheme_state(c)
|
|
140
|
-
raise ParseError, "scheme is missing" if @base.nil? || !@base[:opaque].nil? && c != "#"
|
|
140
|
+
raise ParseError, "scheme is missing" if @base.nil? || (!@base[:opaque].nil? && c != "#")
|
|
141
141
|
|
|
142
142
|
if !@base[:opaque].nil? && c == "#"
|
|
143
143
|
@parse_result[:scheme] = @base[:scheme]
|
|
144
|
-
@
|
|
144
|
+
@paths = @base_paths
|
|
145
145
|
@parse_result[:query] = @base[:query]
|
|
146
146
|
@parse_result[:fragment] = nil
|
|
147
147
|
@state = :fragment_state
|
|
@@ -180,10 +180,11 @@ module URI
|
|
|
180
180
|
elsif special_url? && c == "\\"
|
|
181
181
|
@state = :relative_slash_state
|
|
182
182
|
else
|
|
183
|
+
|
|
183
184
|
@parse_result[:userinfo] = @base[:userinfo]
|
|
184
185
|
@parse_result[:host] = @base[:host]
|
|
185
186
|
@parse_result[:port] = @base[:port]
|
|
186
|
-
@
|
|
187
|
+
@paths = @base_paths
|
|
187
188
|
@parse_result[:query] = @base[:query]
|
|
188
189
|
|
|
189
190
|
if c == "?"
|
|
@@ -396,7 +397,6 @@ module URI
|
|
|
396
397
|
@paths ||= []
|
|
397
398
|
|
|
398
399
|
if (c.nil? || c == "/") || (special_url? && c == "\/") || (c == "?" || c == "#")
|
|
399
|
-
|
|
400
400
|
if double_dot_path_segments?(@buffer)
|
|
401
401
|
shorten_url_path
|
|
402
402
|
if c != "/" || (special_url? && c == "\/")
|
|
@@ -408,6 +408,7 @@ module URI
|
|
|
408
408
|
if @parse_result[:scheme] == "file" && @paths.empty? && windows_drive_letter?(@buffer)
|
|
409
409
|
@buffer[1] = ":"
|
|
410
410
|
end
|
|
411
|
+
|
|
411
412
|
@paths << @buffer
|
|
412
413
|
end
|
|
413
414
|
|
|
@@ -433,13 +434,13 @@ module URI
|
|
|
433
434
|
@parse_result[:fragment] = nil
|
|
434
435
|
@state = :fragment_state
|
|
435
436
|
elsif c == " "
|
|
436
|
-
if rest.start_with?("?"
|
|
437
|
-
@parse_result[:
|
|
437
|
+
if rest.start_with?("?", "#")
|
|
438
|
+
@parse_result[:opaque] += "%20"
|
|
438
439
|
else
|
|
439
|
-
@parse_result[:
|
|
440
|
+
@parse_result[:opaque] += " "
|
|
440
441
|
end
|
|
441
442
|
elsif !c.nil?
|
|
442
|
-
@parse_result[:
|
|
443
|
+
@parse_result[:opaque] += percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET, @encoding)
|
|
443
444
|
end
|
|
444
445
|
end
|
|
445
446
|
|
|
@@ -488,10 +489,10 @@ module URI
|
|
|
488
489
|
end
|
|
489
490
|
|
|
490
491
|
def shorten_url_path
|
|
491
|
-
return if @
|
|
492
|
+
return if @paths.nil?
|
|
492
493
|
|
|
493
|
-
return true if @parse_result[:scheme] == "file" && @
|
|
494
|
-
@
|
|
494
|
+
return true if @parse_result[:scheme] == "file" && @paths.length == 1 && normalized_windows_drive_letter?(@paths.first)
|
|
495
|
+
@paths.pop
|
|
495
496
|
end
|
|
496
497
|
|
|
497
498
|
def rest
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: uri-whatwg_parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Yuji Yaginuma
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: uri
|
|
@@ -86,7 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
86
86
|
- !ruby/object:Gem::Version
|
|
87
87
|
version: '0'
|
|
88
88
|
requirements: []
|
|
89
|
-
rubygems_version: 3.6.
|
|
89
|
+
rubygems_version: 3.6.7
|
|
90
90
|
specification_version: 4
|
|
91
91
|
summary: Ruby implementation of the WHATWG URL Living Standard
|
|
92
92
|
test_files: []
|