uri-whatwg_parser 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +1 -1
- data/lib/uri/whatwg_parser/generic.rb +2 -2
- data/lib/uri/whatwg_parser/host_parser.rb +37 -24
- data/lib/uri/whatwg_parser/parser_helper.rb +10 -18
- data/lib/uri/whatwg_parser/version.rb +1 -1
- data/lib/uri/whatwg_parser.rb +144 -77
- metadata +1 -15
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bc2c1e5428af4eaed582a417234dec63ac59554af14bbbbd1cc0c3017e8e32b9
|
|
4
|
+
data.tar.gz: 749b4ba051cb58a73f0ef8ebe64d6426a4d1f0803176641ff75af539757130c1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a28ffa266d8013c02ed0da9bfd285eaff113527772019a135ddd092310b942c01b6beab710336d37665f8ec3dcc18829edad14a0a97168ef1fc3c651148eb4f0
|
|
7
|
+
data.tar.gz: 1ba90031895fe24a39b1ae7ff127ea7af821cb39a0801b83234c40b63801c9ee6c886f1bb0e423bc9c6e9c817d31bea29661db7e22978c8b66891b9864a953f4
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Ruby implementation of the [WHATWG URL Living Standard](https://url.spec.whatwg.org/).
|
|
4
4
|
|
|
5
|
-
The latest revision that this package implements of the standard is
|
|
5
|
+
The latest revision that this package implements of the standard is [13 January 2026](https://url.spec.whatwg.org/commit-snapshots/b6b3251fe911ab33d68fb051efe0e4d39ae4145e/).
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
@@ -62,7 +62,7 @@ module URI
|
|
|
62
62
|
if host.nil? || host.empty? || scheme == "file"
|
|
63
63
|
raise InvalidURIError, "cannot set user when host is nil or file schme"
|
|
64
64
|
end
|
|
65
|
-
set_user(URI::DEFAULT_PARSER.
|
|
65
|
+
set_user(URI::DEFAULT_PARSER.utf8_percent_encode_string(v, URI::WhatwgParser::USERINFO_PERCENT_ENCODE_SET))
|
|
66
66
|
end
|
|
67
67
|
|
|
68
68
|
def password=(v)
|
|
@@ -72,7 +72,7 @@ module URI
|
|
|
72
72
|
if host.nil? || host.empty? || scheme == "file"
|
|
73
73
|
raise InvalidURIError, "cannot set password when host is nil or file schme"
|
|
74
74
|
end
|
|
75
|
-
set_password(URI::DEFAULT_PARSER.
|
|
75
|
+
set_password(URI::DEFAULT_PARSER.utf8_percent_encode_string(v, URI::WhatwgParser::USERINFO_PERCENT_ENCODE_SET))
|
|
76
76
|
end
|
|
77
77
|
|
|
78
78
|
def host=(v)
|
|
@@ -7,8 +7,10 @@ class URI::WhatwgParser
|
|
|
7
7
|
class HostParser
|
|
8
8
|
include ParserHelper
|
|
9
9
|
|
|
10
|
-
FORBIDDEN_HOST_CODE_POINT = ["\x00", "\t", "\x0a", "\x0d", " ", "#", "/", ":", "<", ">", "?", "@", "[", "\\", "]", "^", "|"]
|
|
11
|
-
FORBIDDEN_DOMAIN_CODE_POINT = FORBIDDEN_HOST_CODE_POINT
|
|
10
|
+
FORBIDDEN_HOST_CODE_POINT = Set["\x00", "\t", "\x0a", "\x0d", " ", "#", "/", ":", "<", ">", "?", "@", "[", "\\", "]", "^", "|"]
|
|
11
|
+
FORBIDDEN_DOMAIN_CODE_POINT = FORBIDDEN_HOST_CODE_POINT | C0_CONTROL_PERCENT_ENCODE_SET | Set["%", "\x7f"]
|
|
12
|
+
FORBIDDEN_HOST_REGEX = Regexp.union(FORBIDDEN_HOST_CODE_POINT.to_a)
|
|
13
|
+
FORBIDDEN_DOMAIN_REGEX = Regexp.union(FORBIDDEN_DOMAIN_CODE_POINT.to_a)
|
|
12
14
|
|
|
13
15
|
def parse(input, opaque = false) # :nodoc:
|
|
14
16
|
return "" if input&.empty?
|
|
@@ -39,8 +41,7 @@ class URI::WhatwgParser
|
|
|
39
41
|
raise URI::WhatwgParser::ParseError, "invalid IPv4 format" if parts.size > 4
|
|
40
42
|
numbers = []
|
|
41
43
|
parts.each do |part|
|
|
42
|
-
|
|
43
|
-
numbers << value
|
|
44
|
+
numbers << parse_ipv4_number(part)
|
|
44
45
|
end
|
|
45
46
|
|
|
46
47
|
(numbers.size-1).times {|i| raise URI::WhatwgParser::ParseError, "invalid IPv4 format" if numbers[i] > 255 }
|
|
@@ -191,7 +192,7 @@ class URI::WhatwgParser
|
|
|
191
192
|
|
|
192
193
|
def parse_opaque_host(host)
|
|
193
194
|
raise ParseError if include_forbidden_host_code_point?(host)
|
|
194
|
-
host.chars.map { |c|
|
|
195
|
+
host.chars.map { |c| utf8_percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET) }.join
|
|
195
196
|
end
|
|
196
197
|
|
|
197
198
|
def percent_decode(str)
|
|
@@ -201,51 +202,63 @@ class URI::WhatwgParser
|
|
|
201
202
|
end
|
|
202
203
|
|
|
203
204
|
def ends_in_number?(domain)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
205
|
+
return false if domain.empty?
|
|
206
|
+
|
|
207
|
+
if domain.end_with?(".")
|
|
208
|
+
# Remove trailing dot and find the actual last segment
|
|
209
|
+
domain_without_trailing = domain[0...-1]
|
|
210
|
+
return false if domain_without_trailing.empty?
|
|
211
|
+
|
|
212
|
+
last_dot = domain_without_trailing.rindex(".")
|
|
213
|
+
last = last_dot ? domain_without_trailing[last_dot + 1..-1] : domain_without_trailing
|
|
214
|
+
else
|
|
215
|
+
# Find the last segment after the last dot
|
|
216
|
+
last_dot = domain.rindex(".")
|
|
217
|
+
last = last_dot ? domain[last_dot + 1..-1] : domain
|
|
208
218
|
end
|
|
209
219
|
|
|
210
|
-
|
|
211
|
-
return true if last
|
|
220
|
+
return false if last.empty?
|
|
221
|
+
return true if last.match?(/\A\d+\z/)
|
|
212
222
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
return false
|
|
223
|
+
if last.start_with?("0x", "0X")
|
|
224
|
+
hex = last[2..-1] || ""
|
|
225
|
+
return true if hex.empty? || hex.match?(/\A[0-9A-Fa-f]+\z/)
|
|
217
226
|
end
|
|
218
227
|
|
|
219
|
-
|
|
228
|
+
false
|
|
220
229
|
end
|
|
221
230
|
|
|
222
231
|
def parse_ipv4_number(str)
|
|
223
232
|
raise ParseError, "invalid IPv4 format" if str&.empty?
|
|
224
233
|
|
|
225
|
-
validation_error = false
|
|
226
234
|
r = 10
|
|
227
235
|
|
|
228
236
|
if str.size >= 2 && str.start_with?("0x", "0X")
|
|
229
|
-
validation_error = true
|
|
230
237
|
str = str[2..-1]
|
|
231
238
|
r = 16
|
|
232
239
|
elsif str.size >= 2 && str.start_with?("0")
|
|
233
|
-
validation_error = true
|
|
234
240
|
str = str[1..-1]
|
|
235
241
|
r = 8
|
|
236
242
|
end
|
|
237
243
|
|
|
238
|
-
return 0
|
|
244
|
+
return 0 if str.empty?
|
|
239
245
|
|
|
240
246
|
begin
|
|
241
|
-
|
|
242
|
-
return output, validation_error
|
|
247
|
+
Integer(str, r)
|
|
243
248
|
rescue ArgumentError
|
|
244
249
|
raise ParseError, "invalid IPv4 format"
|
|
245
250
|
end
|
|
246
251
|
end
|
|
247
252
|
|
|
248
253
|
def domain_to_ascii(domain)
|
|
254
|
+
# If domain is already ASCII-only, lowercase, and doesn't contain punycode prefix
|
|
255
|
+
# we can skip IDNA processing
|
|
256
|
+
if domain.ascii_only? && domain == domain.downcase && !domain.include?("xn--")
|
|
257
|
+
raise ParseError, "including invalid value in host" if include_forbidden_domain_code_point?(domain)
|
|
258
|
+
raise ParseError, "host can't be empty" if domain.empty?
|
|
259
|
+
return domain
|
|
260
|
+
end
|
|
261
|
+
|
|
249
262
|
ascii_domain = URI::IDNA.whatwg_to_ascii(domain.force_encoding(Encoding::UTF_8), be_strict: false)
|
|
250
263
|
|
|
251
264
|
raise ParseError, "including invalid value in host" if include_forbidden_domain_code_point?(ascii_domain)
|
|
@@ -255,11 +268,11 @@ class URI::WhatwgParser
|
|
|
255
268
|
end
|
|
256
269
|
|
|
257
270
|
def include_forbidden_domain_code_point?(str)
|
|
258
|
-
|
|
271
|
+
str.match?(FORBIDDEN_DOMAIN_REGEX)
|
|
259
272
|
end
|
|
260
273
|
|
|
261
274
|
def include_forbidden_host_code_point?(str)
|
|
262
|
-
|
|
275
|
+
str.match?(FORBIDDEN_HOST_REGEX)
|
|
263
276
|
end
|
|
264
277
|
end
|
|
265
278
|
end
|
|
@@ -1,31 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
3
5
|
class URI::WhatwgParser
|
|
4
6
|
module ParserHelper
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
ASCII_DIGIT = ("0".."9").to_a
|
|
8
|
-
|
|
9
|
-
def ascii_alpha?(c)
|
|
10
|
-
ASCII_ALPHA.include?(c)
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
def ascii_alphanumerica?(c)
|
|
14
|
-
ascii_alpha?(c) || ascii_digit?(c)
|
|
15
|
-
end
|
|
7
|
+
# NOTE: This set isn't accurate, but it's OK now because greater than `0x7e` is checked inside a method.
|
|
8
|
+
C0_CONTROL_PERCENT_ENCODE_SET = Set.new((0..0x1f).map(&:chr))
|
|
16
9
|
|
|
17
|
-
def
|
|
18
|
-
ASCII_DIGIT.include?(c)
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def percent_encode(c, encode_set, encoding = Encoding::UTF_8)
|
|
10
|
+
def utf8_percent_encode(c, encode_set)
|
|
22
11
|
return c unless encode_set.include?(c) || c.ord > 0x7e
|
|
23
12
|
|
|
24
13
|
# For ASCII single-byte characters
|
|
25
14
|
return "%%%02X" % c.ord if c.bytesize == 1
|
|
26
15
|
|
|
27
|
-
|
|
28
|
-
|
|
16
|
+
c.bytes.map { |b| "%%%02X" % b }.join
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def utf8_percent_encode_string(str, encode_set)
|
|
20
|
+
str.chars.map { |c| utf8_percent_encode(c, encode_set) }.join
|
|
29
21
|
end
|
|
30
22
|
end
|
|
31
23
|
end
|
data/lib/uri/whatwg_parser.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "set"
|
|
3
4
|
require "uri"
|
|
4
5
|
require_relative "whatwg_parser/error"
|
|
5
6
|
require_relative "whatwg_parser/version"
|
|
@@ -13,19 +14,28 @@ module URI
|
|
|
13
14
|
|
|
14
15
|
SPECIAL_SCHEME = { "ftp" => 21, "file" => nil, "http" => 80, "https" => 443, "ws" => 80, "wss" => 443 }
|
|
15
16
|
|
|
16
|
-
FRAGMENT_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET
|
|
17
|
-
QUERY_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET
|
|
18
|
-
SPECIAL_QUERY_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET
|
|
19
|
-
PATH_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET
|
|
20
|
-
USERINFO_PERCENT_ENCODE_SET = PATH_PERCENT_ENCODE_SET
|
|
17
|
+
FRAGMENT_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET | Set[" ", "\"", "<", ">", "`"]
|
|
18
|
+
QUERY_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET | Set[" ", "\"", "#", "<", ">"]
|
|
19
|
+
SPECIAL_QUERY_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET | Set["'"]
|
|
20
|
+
PATH_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET | Set["?", "^", "`", "{", "}"]
|
|
21
|
+
USERINFO_PERCENT_ENCODE_SET = PATH_PERCENT_ENCODE_SET | Set["/", ":", ";", "=", "@", "[", "\\", "]", "|"]
|
|
21
22
|
|
|
22
|
-
SINGLE_DOT_PATH_SEGMENTS = [".", "%2e", "%2E"]
|
|
23
|
-
DOUBLE_DOT_PATH_SEGMENTS = ["..", ".%2e", ".%2E", "%2e.", "%2e%2e", "%2e%2E", "%2E.", "%2E%2e", "%2E%2E"]
|
|
23
|
+
SINGLE_DOT_PATH_SEGMENTS = Set[".", "%2e", "%2E"]
|
|
24
|
+
DOUBLE_DOT_PATH_SEGMENTS = Set["..", ".%2e", ".%2E", "%2e.", "%2e%2e", "%2e%2E", "%2E.", "%2E%2e", "%2E%2E"]
|
|
24
25
|
|
|
25
26
|
WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:|])\\z")
|
|
26
27
|
NORMALIZED_WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:])\\z")
|
|
27
28
|
STARTS_WITH_WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:|])(?:[/\\?#])?\\z")
|
|
28
29
|
|
|
30
|
+
VALID_SIGNS_FOR_SCHEME = Set["+", "-", "."]
|
|
31
|
+
DELIMITER_SIGNS = Set["/", "?", "#"]
|
|
32
|
+
|
|
33
|
+
WS_SCHEMES = Set["ws", "wss"]
|
|
34
|
+
|
|
35
|
+
ASCII_ALPHA_LOWERCASE = Set.new(("a".."z").to_a)
|
|
36
|
+
ASCII_ALPHA_UPPERCASE = Set.new(("A".."Z").to_a)
|
|
37
|
+
ASCII_DIGIT = Set.new(("0".."9").to_a)
|
|
38
|
+
|
|
29
39
|
def initialize
|
|
30
40
|
reset
|
|
31
41
|
@host_parser = HostParser.new
|
|
@@ -35,15 +45,15 @@ module URI
|
|
|
35
45
|
{}
|
|
36
46
|
end
|
|
37
47
|
|
|
38
|
-
def parse(input, base: nil,
|
|
39
|
-
URI.for(*self.split(input, base: base,
|
|
48
|
+
def parse(input, base: nil, url: nil, state_override: nil) # :nodoc:
|
|
49
|
+
URI.for(*self.split(input, base: base, url: url, state_override: state_override))
|
|
40
50
|
end
|
|
41
51
|
|
|
42
|
-
def split(input, base: nil,
|
|
52
|
+
def split(input, base: nil, url: nil, state_override: nil) # :nodoc:
|
|
43
53
|
reset
|
|
44
54
|
@base = nil
|
|
45
55
|
if base != nil
|
|
46
|
-
ary = split(base, base: nil
|
|
56
|
+
ary = split(base, base: nil)
|
|
47
57
|
@base = { scheme: ary[0], userinfo: ary[1], host: ary[2], port: ary[3], registry: ary[4], path: ary[5], opaque: ary[6], query: ary[7], fragment: ary[8]}
|
|
48
58
|
@base_paths = @paths
|
|
49
59
|
reset
|
|
@@ -52,7 +62,10 @@ module URI
|
|
|
52
62
|
if url
|
|
53
63
|
raise ArgumentError, "bad argument (expected URI object)" unless url.is_a?(URI::Generic)
|
|
54
64
|
@parse_result.merge!(url.component.zip(url.send(:component_ary)).to_h)
|
|
55
|
-
@
|
|
65
|
+
@username = url.user
|
|
66
|
+
@password = url.password
|
|
67
|
+
@parse_result.delete(:userinfo)
|
|
68
|
+
@special_url = special_url?(@parse_result[:scheme])
|
|
56
69
|
end
|
|
57
70
|
|
|
58
71
|
if state_override
|
|
@@ -63,30 +76,27 @@ module URI
|
|
|
63
76
|
raise ParseError, "uri can't be empty" if (input.nil? || input.empty?) && @base.nil?
|
|
64
77
|
end
|
|
65
78
|
|
|
66
|
-
|
|
67
|
-
@input = input.dup
|
|
79
|
+
input = input.dup
|
|
68
80
|
|
|
69
81
|
unless url
|
|
70
|
-
|
|
71
|
-
@input.sub!(/[\u0000-\u0020]*\z/, "")
|
|
82
|
+
remove_c0_control_or_space!(input)
|
|
72
83
|
end
|
|
73
84
|
|
|
74
|
-
|
|
75
|
-
@input.delete!("\n")
|
|
76
|
-
@input.delete!("\r")
|
|
85
|
+
input.delete!("\t\n\r") if /[\t\n\r]/.match?(input)
|
|
77
86
|
|
|
87
|
+
@input_chars = input.chars
|
|
88
|
+
input_chars_length = @input_chars.length
|
|
78
89
|
@pos = 0
|
|
79
90
|
|
|
80
|
-
while @pos <=
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
break if ret == :terminate
|
|
91
|
+
while @pos <= input_chars_length
|
|
92
|
+
dispatch_state(@input_chars[@pos])
|
|
93
|
+
break if @terminate
|
|
84
94
|
@pos += 1
|
|
85
95
|
end
|
|
86
96
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
@parse_result
|
|
97
|
+
userinfo = [@username, @password].compact.reject(&:empty?).join(":")
|
|
98
|
+
path = "/#{@paths.join("/")}" if @paths && !@paths.empty?
|
|
99
|
+
[@parse_result[:scheme], userinfo, @parse_result[:host], @parse_result[:port], @parse_result[:registry], path, @parse_result[:opaque], @parse_result[:query], @parse_result[:fragment]]
|
|
90
100
|
end
|
|
91
101
|
|
|
92
102
|
def join(*uris)
|
|
@@ -101,14 +111,33 @@ module URI
|
|
|
101
111
|
uri
|
|
102
112
|
end
|
|
103
113
|
|
|
104
|
-
def encode_userinfo(str)
|
|
105
|
-
str.chars.map do |char|
|
|
106
|
-
percent_encode(char, USERINFO_PERCENT_ENCODE_SET)
|
|
107
|
-
end.join
|
|
108
|
-
end
|
|
109
|
-
|
|
110
114
|
private
|
|
111
115
|
|
|
116
|
+
def dispatch_state(c)
|
|
117
|
+
case @state
|
|
118
|
+
when :scheme_start_state then scheme_start_state(c)
|
|
119
|
+
when :scheme_state then scheme_state(c)
|
|
120
|
+
when :no_scheme_state then no_scheme_state(c)
|
|
121
|
+
when :special_relative_or_authority_state then special_relative_or_authority_state(c)
|
|
122
|
+
when :path_or_authority_state then path_or_authority_state(c)
|
|
123
|
+
when :relative_state then relative_state(c)
|
|
124
|
+
when :relative_slash_state then relative_slash_state(c)
|
|
125
|
+
when :special_authority_slashes_state then special_authority_slashes_state(c)
|
|
126
|
+
when :special_authority_ignore_slashes_state then special_authority_ignore_slashes_state(c)
|
|
127
|
+
when :authority_state then authority_state(c)
|
|
128
|
+
when :host_state then host_state(c)
|
|
129
|
+
when :port_state then port_state(c)
|
|
130
|
+
when :file_state then file_state(c)
|
|
131
|
+
when :file_slash_state then file_slash_state(c)
|
|
132
|
+
when :file_host_state then file_host_state(c)
|
|
133
|
+
when :path_start_state then path_start_state(c)
|
|
134
|
+
when :path_state then path_state(c)
|
|
135
|
+
when :opaque_path_state then opaque_path_state(c)
|
|
136
|
+
when :query_state then query_state(c)
|
|
137
|
+
when :fragment_state then fragment_state(c)
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
112
141
|
def reset
|
|
113
142
|
@buffer = +""
|
|
114
143
|
@at_sign_seen = nil
|
|
@@ -117,13 +146,18 @@ module URI
|
|
|
117
146
|
@paths = nil
|
|
118
147
|
@username = nil
|
|
119
148
|
@password = nil
|
|
120
|
-
@parse_result = { scheme: nil,
|
|
149
|
+
@parse_result = { scheme: nil, host: nil, port: nil, registry: nil, path: nil, opaque: nil, query: nil, fragment: nil }
|
|
121
150
|
@state_override = nil
|
|
122
151
|
@state = :scheme_start_state
|
|
152
|
+
@special_url = nil
|
|
153
|
+
@terminate = nil
|
|
123
154
|
end
|
|
124
155
|
|
|
125
156
|
def scheme_start_state(c)
|
|
126
|
-
if
|
|
157
|
+
if ASCII_ALPHA_LOWERCASE.include?(c)
|
|
158
|
+
@buffer << c
|
|
159
|
+
@state = :scheme_state
|
|
160
|
+
elsif ASCII_ALPHA_UPPERCASE.include?(c)
|
|
127
161
|
@buffer << c.downcase
|
|
128
162
|
@state = :scheme_state
|
|
129
163
|
elsif @state_override.nil?
|
|
@@ -135,7 +169,9 @@ module URI
|
|
|
135
169
|
end
|
|
136
170
|
|
|
137
171
|
def scheme_state(c)
|
|
138
|
-
if
|
|
172
|
+
if ASCII_ALPHA_LOWERCASE.include?(c) || ASCII_DIGIT.include?(c) || VALID_SIGNS_FOR_SCHEME.include?(c)
|
|
173
|
+
@buffer << c
|
|
174
|
+
elsif ASCII_ALPHA_UPPERCASE.include?(c)
|
|
139
175
|
@buffer << c.downcase
|
|
140
176
|
elsif c == ":"
|
|
141
177
|
if @state_override
|
|
@@ -143,17 +179,20 @@ module URI
|
|
|
143
179
|
(!special_url? && special_url?(@buffer)) ||
|
|
144
180
|
((includes_credentials? || !@parse_result[:port].nil?) && @buffer == "file") ||
|
|
145
181
|
(@parse_result[:scheme] == "file" && @parse_result[:host]&.empty?)
|
|
146
|
-
|
|
182
|
+
@terminate = true
|
|
183
|
+
return
|
|
147
184
|
end
|
|
148
185
|
end
|
|
149
186
|
|
|
150
187
|
@parse_result[:scheme] = @buffer
|
|
188
|
+
@special_url = special_url?(@buffer)
|
|
151
189
|
|
|
152
190
|
if @state_override
|
|
153
191
|
if SPECIAL_SCHEME.value?(@parse_result[:port].to_i)
|
|
154
192
|
@parse_result[:port] = nil
|
|
155
193
|
end
|
|
156
|
-
|
|
194
|
+
@terminate = true
|
|
195
|
+
return
|
|
157
196
|
end
|
|
158
197
|
|
|
159
198
|
@buffer = +""
|
|
@@ -164,11 +203,11 @@ module URI
|
|
|
164
203
|
@state = :special_relative_or_authority_state
|
|
165
204
|
elsif special_url?
|
|
166
205
|
@state = :special_authority_slashes_state
|
|
167
|
-
elsif
|
|
206
|
+
elsif @input_chars[@pos + 1] == "/"
|
|
168
207
|
@state = :path_or_authority_state
|
|
169
208
|
@pos += 1
|
|
170
209
|
else
|
|
171
|
-
@parse_result[:opaque] = ""
|
|
210
|
+
@parse_result[:opaque] = +""
|
|
172
211
|
@state = :opaque_path_state
|
|
173
212
|
end
|
|
174
213
|
elsif @state_override.nil?
|
|
@@ -185,6 +224,7 @@ module URI
|
|
|
185
224
|
|
|
186
225
|
if !@base[:opaque].nil? && c == "#"
|
|
187
226
|
@parse_result[:scheme] = @base[:scheme]
|
|
227
|
+
@special_url = special_url?(@base[:scheme])
|
|
188
228
|
@paths = @base_paths
|
|
189
229
|
@parse_result[:query] = @base[:query]
|
|
190
230
|
@parse_result[:fragment] = nil
|
|
@@ -199,7 +239,7 @@ module URI
|
|
|
199
239
|
end
|
|
200
240
|
|
|
201
241
|
def special_relative_or_authority_state(c)
|
|
202
|
-
if c == "/" &&
|
|
242
|
+
if c == "/" && @input_chars[@pos + 1] == "/"
|
|
203
243
|
@state = :special_authority_ignore_slashes_state
|
|
204
244
|
@pos -= 1
|
|
205
245
|
else
|
|
@@ -219,6 +259,7 @@ module URI
|
|
|
219
259
|
|
|
220
260
|
def relative_state(c)
|
|
221
261
|
@parse_result[:scheme] = @base[:scheme]
|
|
262
|
+
@special_url = special_url?(@base[:scheme])
|
|
222
263
|
if c == "/"
|
|
223
264
|
@state = :relative_slash_state
|
|
224
265
|
elsif special_url? && c == "\\"
|
|
@@ -246,7 +287,7 @@ module URI
|
|
|
246
287
|
end
|
|
247
288
|
|
|
248
289
|
def relative_slash_state(c)
|
|
249
|
-
if special_url
|
|
290
|
+
if @special_url && (c == "/" || c == "\\")
|
|
250
291
|
@state = :special_authority_ignore_slashes_state
|
|
251
292
|
elsif c == "/"
|
|
252
293
|
@state = :authority_state
|
|
@@ -260,7 +301,7 @@ module URI
|
|
|
260
301
|
end
|
|
261
302
|
|
|
262
303
|
def special_authority_slashes_state(c)
|
|
263
|
-
if c == "/" &&
|
|
304
|
+
if c == "/" && @input_chars[@pos + 1] == "/"
|
|
264
305
|
@state = :special_authority_ignore_slashes_state
|
|
265
306
|
@pos += 1
|
|
266
307
|
else
|
|
@@ -280,23 +321,23 @@ module URI
|
|
|
280
321
|
if c == "@"
|
|
281
322
|
@buffer.prepend("%40") if @at_sign_seen
|
|
282
323
|
@at_sign_seen = true
|
|
283
|
-
@buffer.
|
|
324
|
+
@buffer.each_char do |char|
|
|
284
325
|
if char == ":" && !@password_token_seen
|
|
285
326
|
@password_token_seen = true
|
|
286
327
|
next
|
|
287
328
|
end
|
|
288
329
|
|
|
289
|
-
encoded_char =
|
|
330
|
+
encoded_char = utf8_percent_encode(char, USERINFO_PERCENT_ENCODE_SET)
|
|
290
331
|
|
|
291
332
|
if @password_token_seen
|
|
292
|
-
@password
|
|
333
|
+
(@password ||= +"") << encoded_char
|
|
293
334
|
else
|
|
294
|
-
@username
|
|
335
|
+
(@username ||= +"") << encoded_char
|
|
295
336
|
end
|
|
296
337
|
end
|
|
297
338
|
|
|
298
339
|
@buffer.clear
|
|
299
|
-
elsif c.nil? ||
|
|
340
|
+
elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
|
|
300
341
|
raise ParseError, "host is missing" if @at_sign_seen && @buffer.empty?
|
|
301
342
|
|
|
302
343
|
@pos -= (@buffer.size + 1)
|
|
@@ -315,20 +356,23 @@ module URI
|
|
|
315
356
|
raise ParseError, "host is missing" if @buffer.empty?
|
|
316
357
|
raise ParseError, "invalid host" if @state_override && @state_override == :hostname_state
|
|
317
358
|
|
|
318
|
-
@parse_result[:host] = @host_parser.parse(@buffer,
|
|
359
|
+
@parse_result[:host] = @host_parser.parse(@buffer, !@special_url)
|
|
319
360
|
@buffer.clear
|
|
320
361
|
@state = :port_state
|
|
321
|
-
elsif c.nil? ||
|
|
362
|
+
elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
|
|
322
363
|
@pos -= 1
|
|
323
|
-
if special_url
|
|
364
|
+
if @special_url && @buffer.empty?
|
|
324
365
|
raise ParseError, "host is missing"
|
|
325
366
|
elsif @state_override && @buffer.empty? && (includes_credentials? || !@parse_result[:port].nil?)
|
|
326
367
|
raise ParseError, "invalid host"
|
|
327
368
|
else
|
|
328
|
-
@parse_result[:host] = @host_parser.parse(@buffer,
|
|
369
|
+
@parse_result[:host] = @host_parser.parse(@buffer, !@special_url)
|
|
329
370
|
@buffer.clear
|
|
330
371
|
@state = :path_start_state
|
|
331
|
-
|
|
372
|
+
if @state_override
|
|
373
|
+
@terminate = true
|
|
374
|
+
return
|
|
375
|
+
end
|
|
332
376
|
end
|
|
333
377
|
else
|
|
334
378
|
@inside_brackets = true if c == "["
|
|
@@ -338,9 +382,9 @@ module URI
|
|
|
338
382
|
end
|
|
339
383
|
|
|
340
384
|
def port_state(c)
|
|
341
|
-
if
|
|
385
|
+
if ASCII_DIGIT.include?(c)
|
|
342
386
|
@buffer << c
|
|
343
|
-
elsif c.nil? ||
|
|
387
|
+
elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\") || @state_override
|
|
344
388
|
unless @buffer.empty?
|
|
345
389
|
port = Integer(@buffer, 10)
|
|
346
390
|
raise ParseError, "port is invalid value" if port < 0 || port > 65535
|
|
@@ -351,7 +395,10 @@ module URI
|
|
|
351
395
|
end
|
|
352
396
|
|
|
353
397
|
@buffer.clear
|
|
354
|
-
|
|
398
|
+
if @state_override
|
|
399
|
+
@terminate = true
|
|
400
|
+
return
|
|
401
|
+
end
|
|
355
402
|
end
|
|
356
403
|
|
|
357
404
|
raise ParseError, "port is invalid value" if @state_override
|
|
@@ -364,6 +411,7 @@ module URI
|
|
|
364
411
|
|
|
365
412
|
def file_state(c)
|
|
366
413
|
@parse_result[:scheme] = "file"
|
|
414
|
+
@special_url = true
|
|
367
415
|
@parse_result[:host] = nil
|
|
368
416
|
|
|
369
417
|
if c == "/" || c == "\\"
|
|
@@ -412,20 +460,26 @@ module URI
|
|
|
412
460
|
end
|
|
413
461
|
|
|
414
462
|
def file_host_state(c)
|
|
415
|
-
if c.nil? || c
|
|
463
|
+
if c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
|
|
416
464
|
@pos -= 1
|
|
417
465
|
|
|
418
466
|
if !@state_override && windows_drive_letter?(@buffer)
|
|
419
467
|
@state = :path_state
|
|
420
468
|
elsif @buffer.empty?
|
|
421
469
|
@parse_result[:host] = nil
|
|
422
|
-
|
|
470
|
+
if @state_override
|
|
471
|
+
@terminate = true
|
|
472
|
+
return
|
|
473
|
+
end
|
|
423
474
|
@state = :path_start_state
|
|
424
475
|
else
|
|
425
|
-
host = @host_parser.parse(@buffer,
|
|
476
|
+
host = @host_parser.parse(@buffer, !@special_url)
|
|
426
477
|
host = "" if host == "localhost"
|
|
427
478
|
@parse_result[:host] = host
|
|
428
|
-
|
|
479
|
+
if @state_override
|
|
480
|
+
@terminate = true
|
|
481
|
+
return
|
|
482
|
+
end
|
|
429
483
|
@buffer.clear
|
|
430
484
|
@state = :path_start_state
|
|
431
485
|
end
|
|
@@ -435,7 +489,7 @@ module URI
|
|
|
435
489
|
end
|
|
436
490
|
|
|
437
491
|
def path_start_state(c)
|
|
438
|
-
if special_url
|
|
492
|
+
if @special_url
|
|
439
493
|
@pos -= 1 if c != "/" && c != "\\"
|
|
440
494
|
@state = :path_state
|
|
441
495
|
elsif !@state_override && c == "?"
|
|
@@ -454,14 +508,14 @@ module URI
|
|
|
454
508
|
def path_state(c)
|
|
455
509
|
@paths ||= []
|
|
456
510
|
|
|
457
|
-
if (c.nil? || c == "/") || (special_url
|
|
511
|
+
if (c.nil? || c == "/") || (@special_url && c == "\\") || (!@state_override && (c == "?" || c == "#"))
|
|
458
512
|
if double_dot_path_segments?(@buffer)
|
|
459
513
|
shorten_url_path
|
|
460
514
|
|
|
461
|
-
if c != "/" && !(special_url
|
|
515
|
+
if c != "/" && !(@special_url && c == "\\")
|
|
462
516
|
@paths << ""
|
|
463
517
|
end
|
|
464
|
-
elsif single_dot_path_segments?(@buffer) && c != "/" && !((special_url
|
|
518
|
+
elsif single_dot_path_segments?(@buffer) && c != "/" && !((@special_url && c == "\\"))
|
|
465
519
|
@paths << ""
|
|
466
520
|
elsif !single_dot_path_segments?(@buffer)
|
|
467
521
|
if @parse_result[:scheme] == "file" && @paths.empty? && windows_drive_letter?(@buffer)
|
|
@@ -481,7 +535,7 @@ module URI
|
|
|
481
535
|
@state = :fragment_state
|
|
482
536
|
end
|
|
483
537
|
else
|
|
484
|
-
@buffer <<
|
|
538
|
+
@buffer << utf8_percent_encode(c, PATH_PERCENT_ENCODE_SET)
|
|
485
539
|
end
|
|
486
540
|
end
|
|
487
541
|
|
|
@@ -493,24 +547,22 @@ module URI
|
|
|
493
547
|
@parse_result[:fragment] = nil
|
|
494
548
|
@state = :fragment_state
|
|
495
549
|
elsif c == " "
|
|
496
|
-
|
|
497
|
-
|
|
550
|
+
first_of_rest = @input_chars[@pos + 1]
|
|
551
|
+
if first_of_rest == "?" || first_of_rest == "#"
|
|
552
|
+
@parse_result[:opaque] << "%20"
|
|
498
553
|
else
|
|
499
|
-
@parse_result[:opaque]
|
|
554
|
+
@parse_result[:opaque] << " "
|
|
500
555
|
end
|
|
501
556
|
elsif !c.nil?
|
|
502
|
-
@parse_result[:opaque]
|
|
557
|
+
@parse_result[:opaque] << utf8_percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET)
|
|
503
558
|
end
|
|
504
559
|
end
|
|
505
560
|
|
|
506
561
|
def query_state(c)
|
|
507
|
-
if @encoding != Encoding::UTF_8 && (!special_url? || %w[ws wss].include?(@parse_result[:scheme]))
|
|
508
|
-
@encoding = Encoding::UTF_8
|
|
509
|
-
end
|
|
510
|
-
|
|
511
562
|
if c.nil? || (!@state_override && c == "#")
|
|
512
|
-
query_percent_encode_set = special_url
|
|
513
|
-
|
|
563
|
+
query_percent_encode_set = @special_url ? SPECIAL_QUERY_PERCENT_ENCODE_SET : QUERY_PERCENT_ENCODE_SET
|
|
564
|
+
# TODO: We need to consider encoding here.
|
|
565
|
+
@parse_result[:query] = utf8_percent_encode_string(@buffer, query_percent_encode_set)
|
|
514
566
|
@buffer.clear
|
|
515
567
|
@state = :fragment_state if c == "#"
|
|
516
568
|
elsif !c.nil?
|
|
@@ -520,7 +572,7 @@ module URI
|
|
|
520
572
|
|
|
521
573
|
def fragment_state(c)
|
|
522
574
|
return if c.nil?
|
|
523
|
-
@parse_result[:fragment]
|
|
575
|
+
(@parse_result[:fragment] ||= +"") << utf8_percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET)
|
|
524
576
|
end
|
|
525
577
|
|
|
526
578
|
def windows_drive_letter?(str)
|
|
@@ -554,11 +606,11 @@ module URI
|
|
|
554
606
|
end
|
|
555
607
|
|
|
556
608
|
def includes_credentials?
|
|
557
|
-
|
|
609
|
+
(@username && !@username.empty?) || (@password && !@password.empty?)
|
|
558
610
|
end
|
|
559
611
|
|
|
560
612
|
def rest
|
|
561
|
-
@
|
|
613
|
+
@input_chars[@pos + 1..]&.join
|
|
562
614
|
end
|
|
563
615
|
|
|
564
616
|
def convert_to_uri(uri)
|
|
@@ -571,6 +623,21 @@ module URI
|
|
|
571
623
|
"bad argument (expected URI object or URI string)"
|
|
572
624
|
end
|
|
573
625
|
end
|
|
626
|
+
|
|
627
|
+
if RUBY_VERSION >= "4.0"
|
|
628
|
+
def remove_c0_control_or_space!(str)
|
|
629
|
+
if /[\u0000-\u0020]/.match?(str)
|
|
630
|
+
str.strip!("\u0000-\u0020")
|
|
631
|
+
end
|
|
632
|
+
end
|
|
633
|
+
else
|
|
634
|
+
def remove_c0_control_or_space!(str)
|
|
635
|
+
if /[\u0000-\u0020]/.match?(str)
|
|
636
|
+
str.sub!(/\A[\u0000-\u0020]*/, "")
|
|
637
|
+
str.sub!(/[\u0000-\u0020]*\z/, "")
|
|
638
|
+
end
|
|
639
|
+
end
|
|
640
|
+
end
|
|
574
641
|
end
|
|
575
642
|
|
|
576
643
|
WHATWG_PARSER = URI::WhatwgParser.new
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: uri-whatwg_parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Yuji Yaginuma
|
|
@@ -37,20 +37,6 @@ dependencies:
|
|
|
37
37
|
- - ">="
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
39
|
version: '0'
|
|
40
|
-
- !ruby/object:Gem::Dependency
|
|
41
|
-
name: debug
|
|
42
|
-
requirement: !ruby/object:Gem::Requirement
|
|
43
|
-
requirements:
|
|
44
|
-
- - ">="
|
|
45
|
-
- !ruby/object:Gem::Version
|
|
46
|
-
version: '0'
|
|
47
|
-
type: :development
|
|
48
|
-
prerelease: false
|
|
49
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
-
requirements:
|
|
51
|
-
- - ">="
|
|
52
|
-
- !ruby/object:Gem::Version
|
|
53
|
-
version: '0'
|
|
54
40
|
email:
|
|
55
41
|
- yuuji.yaginuma@gmail.com
|
|
56
42
|
executables: []
|