uri-whatwg_parser 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -1
- data/lib/uri/whatwg_parser/generic.rb +103 -31
- data/lib/uri/whatwg_parser/host_parser.rb +37 -24
- data/lib/uri/whatwg_parser/parser_helper.rb +10 -18
- data/lib/uri/whatwg_parser/version.rb +1 -1
- data/lib/uri/whatwg_parser.rb +183 -101
- metadata +3 -17
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7e3c073b711f9600fd66938070b1cc254a1067b684437f27038a69bc65076c9c
|
|
4
|
+
data.tar.gz: fe833972e0fe8265958d94a97b127983bca064cd29f554c78fb1c2459be008f9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 541f8b1b1d02b2ac2f16b4af59e5fe3b02e01b2f291529b2698590d01dfc6e1da528926920af1dad8c2d21a11437030356859b30d6e972d2fd3e54aad04cd89a
|
|
7
|
+
data.tar.gz: 639c0241664afd68d4f0f4c2cea36e71f484cb53215fc12bb89e65749626e1769e9a777420201468e7863a624afe62a0bb8839776a9e565252a35211c845dbf4
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Ruby implementation of the [WHATWG URL Living Standard](https://url.spec.whatwg.org/).
|
|
4
4
|
|
|
5
|
-
The latest revision that this package implements of the standard is
|
|
5
|
+
The latest revision that this package implements of the standard is [14 April 2026](https://url.spec.whatwg.org/commit-snapshots/b11d73b8caefe90403afe19210db05acba897722/)
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
@@ -3,16 +3,11 @@ require "uri/generic"
|
|
|
3
3
|
module URI
|
|
4
4
|
class WhatwgParser
|
|
5
5
|
module Generic
|
|
6
|
-
def initialize(scheme,
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
parser = DEFAULT_PARSER,
|
|
12
|
-
arg_check = false)
|
|
13
|
-
|
|
14
|
-
return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
|
|
15
|
-
return super if registry
|
|
6
|
+
def initialize(scheme, userinfo, host, port, registry, path, opaque, query, fragment, parser = DEFAULT_PARSER, arg_check = false)
|
|
7
|
+
@parsed_by_whatwg_parser = parser.is_a?(URI::WhatwgParser)
|
|
8
|
+
unless parser.is_a?(URI::WhatwgParser)
|
|
9
|
+
return super(scheme, userinfo, host, port, registry, path, opaque, query, fragment)
|
|
10
|
+
end
|
|
16
11
|
|
|
17
12
|
@scheme = nil
|
|
18
13
|
@user = nil
|
|
@@ -20,6 +15,7 @@ module URI
|
|
|
20
15
|
@host = nil
|
|
21
16
|
@port = nil
|
|
22
17
|
@path = nil
|
|
18
|
+
@raw_path = nil
|
|
23
19
|
@query = nil
|
|
24
20
|
@opaque = nil
|
|
25
21
|
@fragment = nil
|
|
@@ -32,64 +28,67 @@ module URI
|
|
|
32
28
|
self.set_path(path)
|
|
33
29
|
self.query = query
|
|
34
30
|
self.set_opaque(opaque)
|
|
35
|
-
|
|
31
|
+
@fragment = fragment
|
|
32
|
+
@raw_path = parser&.path
|
|
36
33
|
|
|
37
34
|
self.set_path("") if !@path && !@opaque
|
|
38
|
-
|
|
35
|
+
parser.parse(to_s) if arg_check
|
|
39
36
|
|
|
40
37
|
@scheme&.freeze
|
|
41
38
|
self.set_port(self.default_port) if self.default_port && !@port
|
|
42
39
|
end
|
|
43
40
|
|
|
44
41
|
def merge(oth)
|
|
45
|
-
|
|
42
|
+
return super unless @parsed_by_whatwg_parser
|
|
43
|
+
|
|
44
|
+
parser.join(self.to_s, oth.to_s)
|
|
46
45
|
end
|
|
47
46
|
alias + merge
|
|
48
47
|
|
|
49
48
|
def scheme=(v)
|
|
50
|
-
return super unless
|
|
49
|
+
return super unless @parsed_by_whatwg_parser
|
|
51
50
|
return if v.nil? || v.empty?
|
|
52
51
|
|
|
53
|
-
parse_result =
|
|
52
|
+
parse_result = parser.split("#{v}:", url: self, state_override: :scheme_start_state)
|
|
54
53
|
set_scheme(parse_result[0])
|
|
55
54
|
set_port(parse_result[3])
|
|
56
55
|
end
|
|
57
56
|
|
|
58
57
|
def user=(v)
|
|
59
|
-
return super unless
|
|
58
|
+
return super unless @parsed_by_whatwg_parser
|
|
60
59
|
return v unless v
|
|
61
60
|
|
|
62
61
|
if host.nil? || host.empty? || scheme == "file"
|
|
63
62
|
raise InvalidURIError, "cannot set user when host is nil or file schme"
|
|
64
63
|
end
|
|
65
|
-
set_user(
|
|
64
|
+
set_user(parser.utf8_percent_encode_string(v, URI::WhatwgParser::USERINFO_PERCENT_ENCODE_SET))
|
|
66
65
|
end
|
|
67
66
|
|
|
68
67
|
def password=(v)
|
|
69
|
-
return super unless
|
|
68
|
+
return super unless @parsed_by_whatwg_parser
|
|
70
69
|
return v unless v
|
|
71
70
|
|
|
72
71
|
if host.nil? || host.empty? || scheme == "file"
|
|
73
72
|
raise InvalidURIError, "cannot set password when host is nil or file schme"
|
|
74
73
|
end
|
|
75
|
-
set_password(
|
|
74
|
+
set_password(parser.utf8_percent_encode_string(v, URI::WhatwgParser::USERINFO_PERCENT_ENCODE_SET))
|
|
76
75
|
end
|
|
77
76
|
|
|
78
77
|
def host=(v)
|
|
79
|
-
return super unless
|
|
78
|
+
return super unless @parsed_by_whatwg_parser
|
|
80
79
|
return if v.nil?
|
|
81
80
|
|
|
82
81
|
if @opaque
|
|
83
|
-
raise InvalidURIError, "cannot set host with
|
|
82
|
+
raise InvalidURIError, "cannot set host with opaque"
|
|
84
83
|
end
|
|
85
84
|
|
|
86
|
-
parse_result =
|
|
85
|
+
parse_result = parser.split(v.to_s, url: self, state_override: :host_state)
|
|
87
86
|
set_host(parse_result[2])
|
|
88
87
|
set_port(parse_result[3])
|
|
89
88
|
end
|
|
90
89
|
|
|
91
90
|
def port=(v)
|
|
92
|
-
return super unless
|
|
91
|
+
return super unless @parsed_by_whatwg_parser
|
|
93
92
|
return if v.nil?
|
|
94
93
|
|
|
95
94
|
if v.to_s.empty?
|
|
@@ -101,24 +100,55 @@ module URI
|
|
|
101
100
|
raise InvalidURIError, "cannot set port when host is nil or scheme is file"
|
|
102
101
|
end
|
|
103
102
|
|
|
104
|
-
parse_result =
|
|
103
|
+
parse_result = parser.split("#{v}:", url: self, state_override: :port_state)
|
|
105
104
|
set_port(parse_result[3])
|
|
106
105
|
end
|
|
107
106
|
|
|
108
107
|
def path=(v)
|
|
109
|
-
return super unless
|
|
108
|
+
return super unless @parsed_by_whatwg_parser
|
|
110
109
|
return if v.nil?
|
|
111
110
|
|
|
112
111
|
if @opaque
|
|
113
112
|
raise InvalidURIError, "path conflicts with opaque"
|
|
114
113
|
end
|
|
115
114
|
|
|
116
|
-
parse_result =
|
|
115
|
+
parse_result = parser.split(v.to_s, url: self, state_override: :path_start_state)
|
|
116
|
+
@raw_path = parser.path
|
|
117
117
|
set_path(parse_result[5])
|
|
118
118
|
end
|
|
119
119
|
|
|
120
|
+
def query=(v)
|
|
121
|
+
return super unless @parsed_by_whatwg_parser
|
|
122
|
+
|
|
123
|
+
if v.nil? || v.empty?
|
|
124
|
+
@query = nil
|
|
125
|
+
return
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
v = v.start_with?("?") ? v[1..-1] : v
|
|
129
|
+
@query = +""
|
|
130
|
+
|
|
131
|
+
parse_result = parser.split(v, url: self, state_override: :query_state)
|
|
132
|
+
@query = parse_result[7].to_s
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def fragment=(v)
|
|
136
|
+
return super unless @parsed_by_whatwg_parser
|
|
137
|
+
|
|
138
|
+
if v.nil? || v.empty?
|
|
139
|
+
@fragment = nil
|
|
140
|
+
return
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
v = v.start_with?("#") ? v[1..-1] : v
|
|
144
|
+
@fragment = +""
|
|
145
|
+
|
|
146
|
+
parse_result = parser.split(v, url: self, state_override: :fragment_state)
|
|
147
|
+
@fragment = parse_result[8].to_s
|
|
148
|
+
end
|
|
149
|
+
|
|
120
150
|
def userinfo=(userinfo)
|
|
121
|
-
return super unless
|
|
151
|
+
return super unless @parsed_by_whatwg_parser
|
|
122
152
|
|
|
123
153
|
user, password = split_userinfo(userinfo)
|
|
124
154
|
self.user = user
|
|
@@ -126,17 +156,59 @@ module URI
|
|
|
126
156
|
end
|
|
127
157
|
|
|
128
158
|
def check_opaque(v)
|
|
129
|
-
return super unless
|
|
159
|
+
return super unless @parsed_by_whatwg_parser
|
|
160
|
+
|
|
130
161
|
return v unless v
|
|
131
162
|
|
|
132
|
-
if @host || @port || @user
|
|
133
|
-
raise InvalidURIError, "cannot set opaque with host, port,
|
|
163
|
+
if @host || @port || @user
|
|
164
|
+
raise InvalidURIError, "cannot set opaque with host, port, or userinfo"
|
|
134
165
|
end
|
|
135
166
|
|
|
136
167
|
self.set_opaque(v)
|
|
137
|
-
|
|
168
|
+
# NOTE: WHATWG URL Living Standard doesn't define "opaque" setter. So parse a URL whole.
|
|
169
|
+
parser.parse(to_s)
|
|
138
170
|
true
|
|
139
171
|
end
|
|
172
|
+
|
|
173
|
+
def to_s
|
|
174
|
+
return super unless @parsed_by_whatwg_parser
|
|
175
|
+
|
|
176
|
+
str = "".dup
|
|
177
|
+
if @scheme
|
|
178
|
+
str << @scheme
|
|
179
|
+
str << ":"
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
if @host || %w[file postgres].include?(@scheme)
|
|
183
|
+
str << "//"
|
|
184
|
+
end
|
|
185
|
+
if self.userinfo
|
|
186
|
+
str << self.userinfo
|
|
187
|
+
str << "@"
|
|
188
|
+
end
|
|
189
|
+
if @host
|
|
190
|
+
str << @host
|
|
191
|
+
end
|
|
192
|
+
if @port && @port != self.default_port
|
|
193
|
+
str << ":"
|
|
194
|
+
str << @port.to_s
|
|
195
|
+
end
|
|
196
|
+
if @host.nil? && @opaque.nil? && @raw_path && @raw_path.length > 1 && @raw_path[0] == ""
|
|
197
|
+
str << "/."
|
|
198
|
+
end
|
|
199
|
+
str << @path if @path
|
|
200
|
+
str << @opaque if @opaque
|
|
201
|
+
if @query
|
|
202
|
+
str << "?"
|
|
203
|
+
str << @query
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
if @fragment
|
|
207
|
+
str << "#"
|
|
208
|
+
str << @fragment
|
|
209
|
+
end
|
|
210
|
+
str
|
|
211
|
+
end
|
|
140
212
|
end
|
|
141
213
|
end
|
|
142
214
|
end
|
|
@@ -7,8 +7,10 @@ class URI::WhatwgParser
|
|
|
7
7
|
class HostParser
|
|
8
8
|
include ParserHelper
|
|
9
9
|
|
|
10
|
-
FORBIDDEN_HOST_CODE_POINT = ["\x00", "\t", "\x0a", "\x0d", " ", "#", "/", ":", "<", ">", "?", "@", "[", "\\", "]", "^", "|"]
|
|
11
|
-
FORBIDDEN_DOMAIN_CODE_POINT = FORBIDDEN_HOST_CODE_POINT
|
|
10
|
+
FORBIDDEN_HOST_CODE_POINT = Set["\x00", "\t", "\x0a", "\x0d", " ", "#", "/", ":", "<", ">", "?", "@", "[", "\\", "]", "^", "|"]
|
|
11
|
+
FORBIDDEN_DOMAIN_CODE_POINT = FORBIDDEN_HOST_CODE_POINT | C0_CONTROL_PERCENT_ENCODE_SET | Set["%", "\x7f"]
|
|
12
|
+
FORBIDDEN_HOST_REGEX = Regexp.union(FORBIDDEN_HOST_CODE_POINT.to_a)
|
|
13
|
+
FORBIDDEN_DOMAIN_REGEX = Regexp.union(FORBIDDEN_DOMAIN_CODE_POINT.to_a)
|
|
12
14
|
|
|
13
15
|
def parse(input, opaque = false) # :nodoc:
|
|
14
16
|
return "" if input&.empty?
|
|
@@ -39,8 +41,7 @@ class URI::WhatwgParser
|
|
|
39
41
|
raise URI::WhatwgParser::ParseError, "invalid IPv4 format" if parts.size > 4
|
|
40
42
|
numbers = []
|
|
41
43
|
parts.each do |part|
|
|
42
|
-
|
|
43
|
-
numbers << value
|
|
44
|
+
numbers << parse_ipv4_number(part)
|
|
44
45
|
end
|
|
45
46
|
|
|
46
47
|
(numbers.size-1).times {|i| raise URI::WhatwgParser::ParseError, "invalid IPv4 format" if numbers[i] > 255 }
|
|
@@ -191,7 +192,7 @@ class URI::WhatwgParser
|
|
|
191
192
|
|
|
192
193
|
def parse_opaque_host(host)
|
|
193
194
|
raise ParseError if include_forbidden_host_code_point?(host)
|
|
194
|
-
host.chars.map { |c|
|
|
195
|
+
host.chars.map { |c| utf8_percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET) }.join
|
|
195
196
|
end
|
|
196
197
|
|
|
197
198
|
def percent_decode(str)
|
|
@@ -201,51 +202,63 @@ class URI::WhatwgParser
|
|
|
201
202
|
end
|
|
202
203
|
|
|
203
204
|
def ends_in_number?(domain)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
205
|
+
return false if domain.empty?
|
|
206
|
+
|
|
207
|
+
if domain.end_with?(".")
|
|
208
|
+
# Remove trailing dot and find the actual last segment
|
|
209
|
+
domain_without_trailing = domain[0...-1]
|
|
210
|
+
return false if domain_without_trailing.empty?
|
|
211
|
+
|
|
212
|
+
last_dot = domain_without_trailing.rindex(".")
|
|
213
|
+
last = last_dot ? domain_without_trailing[last_dot + 1..-1] : domain_without_trailing
|
|
214
|
+
else
|
|
215
|
+
# Find the last segment after the last dot
|
|
216
|
+
last_dot = domain.rindex(".")
|
|
217
|
+
last = last_dot ? domain[last_dot + 1..-1] : domain
|
|
208
218
|
end
|
|
209
219
|
|
|
210
|
-
|
|
211
|
-
return true if last
|
|
220
|
+
return false if last.empty?
|
|
221
|
+
return true if last.match?(/\A\d+\z/)
|
|
212
222
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
return false
|
|
223
|
+
if last.start_with?("0x", "0X")
|
|
224
|
+
hex = last[2..-1] || ""
|
|
225
|
+
return true if hex.empty? || hex.match?(/\A[0-9A-Fa-f]+\z/)
|
|
217
226
|
end
|
|
218
227
|
|
|
219
|
-
|
|
228
|
+
false
|
|
220
229
|
end
|
|
221
230
|
|
|
222
231
|
def parse_ipv4_number(str)
|
|
223
232
|
raise ParseError, "invalid IPv4 format" if str&.empty?
|
|
224
233
|
|
|
225
|
-
validation_error = false
|
|
226
234
|
r = 10
|
|
227
235
|
|
|
228
236
|
if str.size >= 2 && str.start_with?("0x", "0X")
|
|
229
|
-
validation_error = true
|
|
230
237
|
str = str[2..-1]
|
|
231
238
|
r = 16
|
|
232
239
|
elsif str.size >= 2 && str.start_with?("0")
|
|
233
|
-
validation_error = true
|
|
234
240
|
str = str[1..-1]
|
|
235
241
|
r = 8
|
|
236
242
|
end
|
|
237
243
|
|
|
238
|
-
return 0
|
|
244
|
+
return 0 if str.empty?
|
|
239
245
|
|
|
240
246
|
begin
|
|
241
|
-
|
|
242
|
-
return output, validation_error
|
|
247
|
+
Integer(str, r)
|
|
243
248
|
rescue ArgumentError
|
|
244
249
|
raise ParseError, "invalid IPv4 format"
|
|
245
250
|
end
|
|
246
251
|
end
|
|
247
252
|
|
|
248
253
|
def domain_to_ascii(domain)
|
|
254
|
+
# If domain is already ASCII-only, lowercase, and doesn't contain punycode prefix
|
|
255
|
+
# we can skip IDNA processing
|
|
256
|
+
if domain.ascii_only? && domain == domain.downcase && !domain.include?("xn--")
|
|
257
|
+
raise ParseError, "including invalid value in host" if include_forbidden_domain_code_point?(domain)
|
|
258
|
+
raise ParseError, "host can't be empty" if domain.empty?
|
|
259
|
+
return domain
|
|
260
|
+
end
|
|
261
|
+
|
|
249
262
|
ascii_domain = URI::IDNA.whatwg_to_ascii(domain.force_encoding(Encoding::UTF_8), be_strict: false)
|
|
250
263
|
|
|
251
264
|
raise ParseError, "including invalid value in host" if include_forbidden_domain_code_point?(ascii_domain)
|
|
@@ -255,11 +268,11 @@ class URI::WhatwgParser
|
|
|
255
268
|
end
|
|
256
269
|
|
|
257
270
|
def include_forbidden_domain_code_point?(str)
|
|
258
|
-
|
|
271
|
+
str.match?(FORBIDDEN_DOMAIN_REGEX)
|
|
259
272
|
end
|
|
260
273
|
|
|
261
274
|
def include_forbidden_host_code_point?(str)
|
|
262
|
-
|
|
275
|
+
str.match?(FORBIDDEN_HOST_REGEX)
|
|
263
276
|
end
|
|
264
277
|
end
|
|
265
278
|
end
|
|
@@ -1,31 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
3
5
|
class URI::WhatwgParser
|
|
4
6
|
module ParserHelper
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
ASCII_DIGIT = ("0".."9").to_a
|
|
8
|
-
|
|
9
|
-
def ascii_alpha?(c)
|
|
10
|
-
ASCII_ALPHA.include?(c)
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
def ascii_alphanumerica?(c)
|
|
14
|
-
ascii_alpha?(c) || ascii_digit?(c)
|
|
15
|
-
end
|
|
7
|
+
# NOTE: This set isn't accurate, but it's OK now because greater than `0x7e` is checked inside a method.
|
|
8
|
+
C0_CONTROL_PERCENT_ENCODE_SET = Set.new((0..0x1f).map(&:chr))
|
|
16
9
|
|
|
17
|
-
def
|
|
18
|
-
ASCII_DIGIT.include?(c)
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def percent_encode(c, encode_set, encoding = Encoding::UTF_8)
|
|
10
|
+
def utf8_percent_encode(c, encode_set)
|
|
22
11
|
return c unless encode_set.include?(c) || c.ord > 0x7e
|
|
23
12
|
|
|
24
13
|
# For ASCII single-byte characters
|
|
25
14
|
return "%%%02X" % c.ord if c.bytesize == 1
|
|
26
15
|
|
|
27
|
-
|
|
28
|
-
|
|
16
|
+
c.bytes.map { |b| "%%%02X" % b }.join
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def utf8_percent_encode_string(str, encode_set)
|
|
20
|
+
str.chars.map { |c| utf8_percent_encode(c, encode_set) }.join
|
|
29
21
|
end
|
|
30
22
|
end
|
|
31
23
|
end
|
data/lib/uri/whatwg_parser.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "set"
|
|
3
4
|
require "uri"
|
|
4
5
|
require_relative "whatwg_parser/error"
|
|
5
6
|
require_relative "whatwg_parser/version"
|
|
@@ -13,19 +14,30 @@ module URI
|
|
|
13
14
|
|
|
14
15
|
SPECIAL_SCHEME = { "ftp" => 21, "file" => nil, "http" => 80, "https" => 443, "ws" => 80, "wss" => 443 }
|
|
15
16
|
|
|
16
|
-
FRAGMENT_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET
|
|
17
|
-
QUERY_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET
|
|
18
|
-
SPECIAL_QUERY_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET
|
|
19
|
-
PATH_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET
|
|
20
|
-
USERINFO_PERCENT_ENCODE_SET = PATH_PERCENT_ENCODE_SET
|
|
17
|
+
FRAGMENT_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET | Set[" ", "\"", "<", ">", "`"]
|
|
18
|
+
QUERY_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET | Set[" ", "\"", "#", "<", ">"]
|
|
19
|
+
SPECIAL_QUERY_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET | Set["'"]
|
|
20
|
+
PATH_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET | Set["?", "^", "`", "{", "}"]
|
|
21
|
+
USERINFO_PERCENT_ENCODE_SET = PATH_PERCENT_ENCODE_SET | Set["/", ":", ";", "=", "@", "[", "\\", "]", "|"]
|
|
21
22
|
|
|
22
|
-
SINGLE_DOT_PATH_SEGMENTS = [".", "%2e", "%2E"]
|
|
23
|
-
DOUBLE_DOT_PATH_SEGMENTS = ["..", ".%2e", ".%2E", "%2e.", "%2e%2e", "%2e%2E", "%2E.", "%2E%2e", "%2E%2E"]
|
|
23
|
+
SINGLE_DOT_PATH_SEGMENTS = Set[".", "%2e", "%2E"]
|
|
24
|
+
DOUBLE_DOT_PATH_SEGMENTS = Set["..", ".%2e", ".%2E", "%2e.", "%2e%2e", "%2e%2E", "%2E.", "%2E%2e", "%2E%2E"]
|
|
24
25
|
|
|
25
26
|
WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:|])\\z")
|
|
26
27
|
NORMALIZED_WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:])\\z")
|
|
27
28
|
STARTS_WITH_WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:|])(?:[/\\?#])?\\z")
|
|
28
29
|
|
|
30
|
+
VALID_SIGNS_FOR_SCHEME = Set["+", "-", "."]
|
|
31
|
+
DELIMITER_SIGNS = Set["/", "?", "#"]
|
|
32
|
+
|
|
33
|
+
WS_SCHEMES = Set["ws", "wss"]
|
|
34
|
+
|
|
35
|
+
ASCII_ALPHA_LOWERCASE = Set.new(("a".."z").to_a)
|
|
36
|
+
ASCII_ALPHA_UPPERCASE = Set.new(("A".."Z").to_a)
|
|
37
|
+
ASCII_DIGIT = Set.new(("0".."9").to_a)
|
|
38
|
+
|
|
39
|
+
attr_reader :path
|
|
40
|
+
|
|
29
41
|
def initialize
|
|
30
42
|
reset
|
|
31
43
|
@host_parser = HostParser.new
|
|
@@ -35,24 +47,27 @@ module URI
|
|
|
35
47
|
{}
|
|
36
48
|
end
|
|
37
49
|
|
|
38
|
-
def parse(input, base: nil,
|
|
39
|
-
URI.for(*self.split(input, base: base,
|
|
50
|
+
def parse(input, base: nil, url: nil, state_override: nil) # :nodoc:
|
|
51
|
+
URI.for(*self.split(input, base: base, url: url, state_override: state_override), self)
|
|
40
52
|
end
|
|
41
53
|
|
|
42
|
-
def split(input, base: nil,
|
|
54
|
+
def split(input, base: nil, url: nil, state_override: nil) # :nodoc:
|
|
43
55
|
reset
|
|
44
56
|
@base = nil
|
|
45
57
|
if base != nil
|
|
46
|
-
ary = split(base, base: nil
|
|
47
|
-
@base = { scheme: ary[0], userinfo: ary[1], host: ary[2], port: ary[3],
|
|
48
|
-
@
|
|
58
|
+
ary = split(base, base: nil)
|
|
59
|
+
@base = { scheme: ary[0], userinfo: ary[1], host: ary[2], port: ary[3], query: ary[7], fragment: ary[8]}
|
|
60
|
+
@base_path = @path
|
|
49
61
|
reset
|
|
50
62
|
end
|
|
51
63
|
|
|
52
64
|
if url
|
|
53
65
|
raise ArgumentError, "bad argument (expected URI object)" unless url.is_a?(URI::Generic)
|
|
54
66
|
@parse_result.merge!(url.component.zip(url.send(:component_ary)).to_h)
|
|
55
|
-
@
|
|
67
|
+
@username = url.user
|
|
68
|
+
@password = url.password
|
|
69
|
+
@parse_result.delete(:userinfo)
|
|
70
|
+
@special_url = special_url?(@parse_result[:scheme])
|
|
56
71
|
end
|
|
57
72
|
|
|
58
73
|
if state_override
|
|
@@ -63,30 +78,33 @@ module URI
|
|
|
63
78
|
raise ParseError, "uri can't be empty" if (input.nil? || input.empty?) && @base.nil?
|
|
64
79
|
end
|
|
65
80
|
|
|
66
|
-
|
|
67
|
-
@input = input.dup
|
|
81
|
+
input = input.dup
|
|
68
82
|
|
|
69
83
|
unless url
|
|
70
|
-
|
|
71
|
-
@input.sub!(/[\u0000-\u0020]*\z/, "")
|
|
84
|
+
remove_c0_control_or_space!(input)
|
|
72
85
|
end
|
|
73
86
|
|
|
74
|
-
|
|
75
|
-
@input.delete!("\n")
|
|
76
|
-
@input.delete!("\r")
|
|
87
|
+
input.delete!("\t\n\r") if /[\t\n\r]/.match?(input)
|
|
77
88
|
|
|
89
|
+
@input_chars = input.chars
|
|
90
|
+
input_chars_length = @input_chars.length
|
|
78
91
|
@pos = 0
|
|
79
92
|
|
|
80
|
-
while @pos <=
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
break if ret == :terminate
|
|
93
|
+
while @pos <= input_chars_length
|
|
94
|
+
dispatch_state(@input_chars[@pos])
|
|
95
|
+
break if @terminate
|
|
84
96
|
@pos += 1
|
|
85
97
|
end
|
|
86
98
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
99
|
+
userinfo = [@username, @password].compact.reject(&:empty?).join(":")
|
|
100
|
+
if @path
|
|
101
|
+
if @path.is_a?(Array)
|
|
102
|
+
path = "/#{@path.join("/")}"
|
|
103
|
+
else
|
|
104
|
+
opaque = @path
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
[@parse_result[:scheme], userinfo, @parse_result[:host], @parse_result[:port], nil, path, opaque, @parse_result[:query], @parse_result[:fragment]]
|
|
90
108
|
end
|
|
91
109
|
|
|
92
110
|
def join(*uris)
|
|
@@ -101,29 +119,53 @@ module URI
|
|
|
101
119
|
uri
|
|
102
120
|
end
|
|
103
121
|
|
|
104
|
-
def encode_userinfo(str)
|
|
105
|
-
str.chars.map do |char|
|
|
106
|
-
percent_encode(char, USERINFO_PERCENT_ENCODE_SET)
|
|
107
|
-
end.join
|
|
108
|
-
end
|
|
109
|
-
|
|
110
122
|
private
|
|
111
123
|
|
|
124
|
+
def dispatch_state(c)
|
|
125
|
+
case @state
|
|
126
|
+
when :scheme_start_state then scheme_start_state(c)
|
|
127
|
+
when :scheme_state then scheme_state(c)
|
|
128
|
+
when :no_scheme_state then no_scheme_state(c)
|
|
129
|
+
when :special_relative_or_authority_state then special_relative_or_authority_state(c)
|
|
130
|
+
when :path_or_authority_state then path_or_authority_state(c)
|
|
131
|
+
when :relative_state then relative_state(c)
|
|
132
|
+
when :relative_slash_state then relative_slash_state(c)
|
|
133
|
+
when :special_authority_slashes_state then special_authority_slashes_state(c)
|
|
134
|
+
when :special_authority_ignore_slashes_state then special_authority_ignore_slashes_state(c)
|
|
135
|
+
when :authority_state then authority_state(c)
|
|
136
|
+
when :host_state then host_state(c)
|
|
137
|
+
when :port_state then port_state(c)
|
|
138
|
+
when :file_state then file_state(c)
|
|
139
|
+
when :file_slash_state then file_slash_state(c)
|
|
140
|
+
when :file_host_state then file_host_state(c)
|
|
141
|
+
when :path_start_state then path_start_state(c)
|
|
142
|
+
when :path_state then path_state(c)
|
|
143
|
+
when :opaque_path_state then opaque_path_state(c)
|
|
144
|
+
when :query_state then query_state(c)
|
|
145
|
+
when :fragment_state then fragment_state(c)
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
112
149
|
def reset
|
|
113
150
|
@buffer = +""
|
|
114
151
|
@at_sign_seen = nil
|
|
115
152
|
@password_token_seen = nil
|
|
116
153
|
@inside_brackets = nil
|
|
117
|
-
@
|
|
154
|
+
@path = nil
|
|
118
155
|
@username = nil
|
|
119
156
|
@password = nil
|
|
120
|
-
@parse_result = {
|
|
157
|
+
@parse_result = {}
|
|
121
158
|
@state_override = nil
|
|
122
159
|
@state = :scheme_start_state
|
|
160
|
+
@special_url = nil
|
|
161
|
+
@terminate = nil
|
|
123
162
|
end
|
|
124
163
|
|
|
125
164
|
def scheme_start_state(c)
|
|
126
|
-
if
|
|
165
|
+
if ASCII_ALPHA_LOWERCASE.include?(c)
|
|
166
|
+
@buffer << c
|
|
167
|
+
@state = :scheme_state
|
|
168
|
+
elsif ASCII_ALPHA_UPPERCASE.include?(c)
|
|
127
169
|
@buffer << c.downcase
|
|
128
170
|
@state = :scheme_state
|
|
129
171
|
elsif @state_override.nil?
|
|
@@ -135,7 +177,9 @@ module URI
|
|
|
135
177
|
end
|
|
136
178
|
|
|
137
179
|
def scheme_state(c)
|
|
138
|
-
if
|
|
180
|
+
if ASCII_ALPHA_LOWERCASE.include?(c) || ASCII_DIGIT.include?(c) || VALID_SIGNS_FOR_SCHEME.include?(c)
|
|
181
|
+
@buffer << c
|
|
182
|
+
elsif ASCII_ALPHA_UPPERCASE.include?(c)
|
|
139
183
|
@buffer << c.downcase
|
|
140
184
|
elsif c == ":"
|
|
141
185
|
if @state_override
|
|
@@ -143,17 +187,20 @@ module URI
|
|
|
143
187
|
(!special_url? && special_url?(@buffer)) ||
|
|
144
188
|
((includes_credentials? || !@parse_result[:port].nil?) && @buffer == "file") ||
|
|
145
189
|
(@parse_result[:scheme] == "file" && @parse_result[:host]&.empty?)
|
|
146
|
-
|
|
190
|
+
@terminate = true
|
|
191
|
+
return
|
|
147
192
|
end
|
|
148
193
|
end
|
|
149
194
|
|
|
150
195
|
@parse_result[:scheme] = @buffer
|
|
196
|
+
@special_url = special_url?(@buffer)
|
|
151
197
|
|
|
152
198
|
if @state_override
|
|
153
199
|
if SPECIAL_SCHEME.value?(@parse_result[:port].to_i)
|
|
154
200
|
@parse_result[:port] = nil
|
|
155
201
|
end
|
|
156
|
-
|
|
202
|
+
@terminate = true
|
|
203
|
+
return
|
|
157
204
|
end
|
|
158
205
|
|
|
159
206
|
@buffer = +""
|
|
@@ -164,11 +211,11 @@ module URI
|
|
|
164
211
|
@state = :special_relative_or_authority_state
|
|
165
212
|
elsif special_url?
|
|
166
213
|
@state = :special_authority_slashes_state
|
|
167
|
-
elsif
|
|
214
|
+
elsif @input_chars[@pos + 1] == "/"
|
|
168
215
|
@state = :path_or_authority_state
|
|
169
216
|
@pos += 1
|
|
170
217
|
else
|
|
171
|
-
@
|
|
218
|
+
@path = +""
|
|
172
219
|
@state = :opaque_path_state
|
|
173
220
|
end
|
|
174
221
|
elsif @state_override.nil?
|
|
@@ -181,11 +228,12 @@ module URI
|
|
|
181
228
|
end
|
|
182
229
|
|
|
183
230
|
def no_scheme_state(c)
|
|
184
|
-
raise ParseError, "scheme is missing" if @base.nil? || (
|
|
231
|
+
raise ParseError, "scheme is missing" if @base.nil? || (has_opaque_path?(@base_path) && c != "#")
|
|
185
232
|
|
|
186
|
-
if
|
|
233
|
+
if has_opaque_path?(@base_path) && c == "#"
|
|
187
234
|
@parse_result[:scheme] = @base[:scheme]
|
|
188
|
-
@
|
|
235
|
+
@special_url = special_url?(@base[:scheme])
|
|
236
|
+
@path = @base_path
|
|
189
237
|
@parse_result[:query] = @base[:query]
|
|
190
238
|
@parse_result[:fragment] = nil
|
|
191
239
|
@state = :fragment_state
|
|
@@ -199,7 +247,7 @@ module URI
|
|
|
199
247
|
end
|
|
200
248
|
|
|
201
249
|
def special_relative_or_authority_state(c)
|
|
202
|
-
if c == "/" &&
|
|
250
|
+
if c == "/" && @input_chars[@pos + 1] == "/"
|
|
203
251
|
@state = :special_authority_ignore_slashes_state
|
|
204
252
|
@pos -= 1
|
|
205
253
|
else
|
|
@@ -219,6 +267,7 @@ module URI
|
|
|
219
267
|
|
|
220
268
|
def relative_state(c)
|
|
221
269
|
@parse_result[:scheme] = @base[:scheme]
|
|
270
|
+
@special_url = special_url?(@base[:scheme])
|
|
222
271
|
if c == "/"
|
|
223
272
|
@state = :relative_slash_state
|
|
224
273
|
elsif special_url? && c == "\\"
|
|
@@ -227,7 +276,7 @@ module URI
|
|
|
227
276
|
@username, @password = @base[:userinfo].split(":") if @base[:userinfo]
|
|
228
277
|
@parse_result[:host] = @base[:host]
|
|
229
278
|
@parse_result[:port] = @base[:port]
|
|
230
|
-
@
|
|
279
|
+
@path = @base_path
|
|
231
280
|
@parse_result[:query] = @base[:query]
|
|
232
281
|
|
|
233
282
|
if c == "?"
|
|
@@ -246,7 +295,7 @@ module URI
|
|
|
246
295
|
end
|
|
247
296
|
|
|
248
297
|
def relative_slash_state(c)
|
|
249
|
-
if special_url
|
|
298
|
+
if @special_url && (c == "/" || c == "\\")
|
|
250
299
|
@state = :special_authority_ignore_slashes_state
|
|
251
300
|
elsif c == "/"
|
|
252
301
|
@state = :authority_state
|
|
@@ -260,7 +309,7 @@ module URI
|
|
|
260
309
|
end
|
|
261
310
|
|
|
262
311
|
def special_authority_slashes_state(c)
|
|
263
|
-
if c == "/" &&
|
|
312
|
+
if c == "/" && @input_chars[@pos + 1] == "/"
|
|
264
313
|
@state = :special_authority_ignore_slashes_state
|
|
265
314
|
@pos += 1
|
|
266
315
|
else
|
|
@@ -280,23 +329,23 @@ module URI
|
|
|
280
329
|
if c == "@"
|
|
281
330
|
@buffer.prepend("%40") if @at_sign_seen
|
|
282
331
|
@at_sign_seen = true
|
|
283
|
-
@buffer.
|
|
332
|
+
@buffer.each_char do |char|
|
|
284
333
|
if char == ":" && !@password_token_seen
|
|
285
334
|
@password_token_seen = true
|
|
286
335
|
next
|
|
287
336
|
end
|
|
288
337
|
|
|
289
|
-
encoded_char =
|
|
338
|
+
encoded_char = utf8_percent_encode(char, USERINFO_PERCENT_ENCODE_SET)
|
|
290
339
|
|
|
291
340
|
if @password_token_seen
|
|
292
|
-
@password
|
|
341
|
+
(@password ||= +"") << encoded_char
|
|
293
342
|
else
|
|
294
|
-
@username
|
|
343
|
+
(@username ||= +"") << encoded_char
|
|
295
344
|
end
|
|
296
345
|
end
|
|
297
346
|
|
|
298
347
|
@buffer.clear
|
|
299
|
-
elsif c.nil? ||
|
|
348
|
+
elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
|
|
300
349
|
raise ParseError, "host is missing" if @at_sign_seen && @buffer.empty?
|
|
301
350
|
|
|
302
351
|
@pos -= (@buffer.size + 1)
|
|
@@ -315,20 +364,23 @@ module URI
|
|
|
315
364
|
raise ParseError, "host is missing" if @buffer.empty?
|
|
316
365
|
raise ParseError, "invalid host" if @state_override && @state_override == :hostname_state
|
|
317
366
|
|
|
318
|
-
@parse_result[:host] = @host_parser.parse(@buffer,
|
|
367
|
+
@parse_result[:host] = @host_parser.parse(@buffer, !@special_url)
|
|
319
368
|
@buffer.clear
|
|
320
369
|
@state = :port_state
|
|
321
|
-
elsif c.nil? ||
|
|
370
|
+
elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
|
|
322
371
|
@pos -= 1
|
|
323
|
-
if special_url
|
|
372
|
+
if @special_url && @buffer.empty?
|
|
324
373
|
raise ParseError, "host is missing"
|
|
325
374
|
elsif @state_override && @buffer.empty? && (includes_credentials? || !@parse_result[:port].nil?)
|
|
326
375
|
raise ParseError, "invalid host"
|
|
327
376
|
else
|
|
328
|
-
@parse_result[:host] = @host_parser.parse(@buffer,
|
|
377
|
+
@parse_result[:host] = @host_parser.parse(@buffer, !@special_url)
|
|
329
378
|
@buffer.clear
|
|
330
379
|
@state = :path_start_state
|
|
331
|
-
|
|
380
|
+
if @state_override
|
|
381
|
+
@terminate = true
|
|
382
|
+
return
|
|
383
|
+
end
|
|
332
384
|
end
|
|
333
385
|
else
|
|
334
386
|
@inside_brackets = true if c == "["
|
|
@@ -338,9 +390,9 @@ module URI
|
|
|
338
390
|
end
|
|
339
391
|
|
|
340
392
|
def port_state(c)
|
|
341
|
-
if
|
|
393
|
+
if ASCII_DIGIT.include?(c)
|
|
342
394
|
@buffer << c
|
|
343
|
-
elsif c.nil? ||
|
|
395
|
+
elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\") || @state_override
|
|
344
396
|
unless @buffer.empty?
|
|
345
397
|
port = Integer(@buffer, 10)
|
|
346
398
|
raise ParseError, "port is invalid value" if port < 0 || port > 65535
|
|
@@ -351,7 +403,10 @@ module URI
|
|
|
351
403
|
end
|
|
352
404
|
|
|
353
405
|
@buffer.clear
|
|
354
|
-
|
|
406
|
+
if @state_override
|
|
407
|
+
@terminate = true
|
|
408
|
+
return
|
|
409
|
+
end
|
|
355
410
|
end
|
|
356
411
|
|
|
357
412
|
raise ParseError, "port is invalid value" if @state_override
|
|
@@ -364,6 +419,7 @@ module URI
|
|
|
364
419
|
|
|
365
420
|
def file_state(c)
|
|
366
421
|
@parse_result[:scheme] = "file"
|
|
422
|
+
@special_url = true
|
|
367
423
|
@parse_result[:host] = nil
|
|
368
424
|
|
|
369
425
|
if c == "/" || c == "\\"
|
|
@@ -382,7 +438,7 @@ module URI
|
|
|
382
438
|
if !starts_with_windows_drive_letter?(rest)
|
|
383
439
|
shorten_url_path
|
|
384
440
|
else
|
|
385
|
-
@
|
|
441
|
+
@path = nil
|
|
386
442
|
end
|
|
387
443
|
@state = :path_state
|
|
388
444
|
@pos -= 1
|
|
@@ -399,10 +455,10 @@ module URI
|
|
|
399
455
|
else
|
|
400
456
|
if !@base.nil? && @base[:scheme] == "file"
|
|
401
457
|
@parse_result[:host] = @base[:host]
|
|
402
|
-
if !starts_with_windows_drive_letter?(rest) && @
|
|
403
|
-
if @
|
|
404
|
-
@
|
|
405
|
-
@
|
|
458
|
+
if !starts_with_windows_drive_letter?(rest) && @base_path && normalized_windows_drive_letter?(@base_path[0])
|
|
459
|
+
if @path.nil?
|
|
460
|
+
@path ||= []
|
|
461
|
+
@path[0] = @base_path[0]
|
|
406
462
|
end
|
|
407
463
|
end
|
|
408
464
|
end
|
|
@@ -412,20 +468,26 @@ module URI
|
|
|
412
468
|
end
|
|
413
469
|
|
|
414
470
|
def file_host_state(c)
|
|
415
|
-
if c.nil? || c
|
|
471
|
+
if c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
|
|
416
472
|
@pos -= 1
|
|
417
473
|
|
|
418
474
|
if !@state_override && windows_drive_letter?(@buffer)
|
|
419
475
|
@state = :path_state
|
|
420
476
|
elsif @buffer.empty?
|
|
421
477
|
@parse_result[:host] = nil
|
|
422
|
-
|
|
478
|
+
if @state_override
|
|
479
|
+
@terminate = true
|
|
480
|
+
return
|
|
481
|
+
end
|
|
423
482
|
@state = :path_start_state
|
|
424
483
|
else
|
|
425
|
-
host = @host_parser.parse(@buffer,
|
|
484
|
+
host = @host_parser.parse(@buffer, !@special_url)
|
|
426
485
|
host = "" if host == "localhost"
|
|
427
486
|
@parse_result[:host] = host
|
|
428
|
-
|
|
487
|
+
if @state_override
|
|
488
|
+
@terminate = true
|
|
489
|
+
return
|
|
490
|
+
end
|
|
429
491
|
@buffer.clear
|
|
430
492
|
@state = :path_start_state
|
|
431
493
|
end
|
|
@@ -435,7 +497,7 @@ module URI
|
|
|
435
497
|
end
|
|
436
498
|
|
|
437
499
|
def path_start_state(c)
|
|
438
|
-
if special_url
|
|
500
|
+
if @special_url
|
|
439
501
|
@pos -= 1 if c != "/" && c != "\\"
|
|
440
502
|
@state = :path_state
|
|
441
503
|
elsif !@state_override && c == "?"
|
|
@@ -446,29 +508,29 @@ module URI
|
|
|
446
508
|
@pos -= 1 if c != "/"
|
|
447
509
|
@state = :path_state
|
|
448
510
|
elsif @state_override && @parse_result[:host].nil?
|
|
449
|
-
@
|
|
450
|
-
@
|
|
511
|
+
@path ||= []
|
|
512
|
+
@path << ""
|
|
451
513
|
end
|
|
452
514
|
end
|
|
453
515
|
|
|
454
516
|
def path_state(c)
|
|
455
|
-
@
|
|
517
|
+
@path ||= []
|
|
456
518
|
|
|
457
|
-
if (c.nil? || c == "/") || (special_url
|
|
519
|
+
if (c.nil? || c == "/") || (@special_url && c == "\\") || (!@state_override && (c == "?" || c == "#"))
|
|
458
520
|
if double_dot_path_segments?(@buffer)
|
|
459
521
|
shorten_url_path
|
|
460
522
|
|
|
461
|
-
if c != "/" && !(special_url
|
|
462
|
-
@
|
|
523
|
+
if c != "/" && !(@special_url && c == "\\")
|
|
524
|
+
@path << ""
|
|
463
525
|
end
|
|
464
|
-
elsif single_dot_path_segments?(@buffer) && c != "/" && !((special_url
|
|
465
|
-
@
|
|
526
|
+
elsif single_dot_path_segments?(@buffer) && c != "/" && !((@special_url && c == "\\"))
|
|
527
|
+
@path << ""
|
|
466
528
|
elsif !single_dot_path_segments?(@buffer)
|
|
467
|
-
if @parse_result[:scheme] == "file" && @
|
|
529
|
+
if @parse_result[:scheme] == "file" && @path.empty? && windows_drive_letter?(@buffer)
|
|
468
530
|
@buffer[1] = ":"
|
|
469
531
|
end
|
|
470
532
|
|
|
471
|
-
@
|
|
533
|
+
@path << @buffer
|
|
472
534
|
end
|
|
473
535
|
|
|
474
536
|
@buffer = +""
|
|
@@ -477,11 +539,11 @@ module URI
|
|
|
477
539
|
@parse_result[:query] = nil
|
|
478
540
|
@state = :query_state
|
|
479
541
|
elsif c == "#"
|
|
480
|
-
@parse_result[:
|
|
542
|
+
@parse_result[:fragment] = nil
|
|
481
543
|
@state = :fragment_state
|
|
482
544
|
end
|
|
483
545
|
else
|
|
484
|
-
@buffer <<
|
|
546
|
+
@buffer << utf8_percent_encode(c, PATH_PERCENT_ENCODE_SET)
|
|
485
547
|
end
|
|
486
548
|
end
|
|
487
549
|
|
|
@@ -493,26 +555,27 @@ module URI
|
|
|
493
555
|
@parse_result[:fragment] = nil
|
|
494
556
|
@state = :fragment_state
|
|
495
557
|
elsif c == " "
|
|
496
|
-
|
|
497
|
-
|
|
558
|
+
first_of_rest = @input_chars[@pos + 1]
|
|
559
|
+
if first_of_rest == "?" || first_of_rest == "#"
|
|
560
|
+
@path += "%20"
|
|
498
561
|
else
|
|
499
|
-
@
|
|
562
|
+
@path += " "
|
|
500
563
|
end
|
|
501
564
|
elsif !c.nil?
|
|
502
|
-
@
|
|
565
|
+
@path += utf8_percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET)
|
|
503
566
|
end
|
|
504
567
|
end
|
|
505
568
|
|
|
506
569
|
def query_state(c)
|
|
507
|
-
if @encoding != Encoding::UTF_8 && (!special_url? || %w[ws wss].include?(@parse_result[:scheme]))
|
|
508
|
-
@encoding = Encoding::UTF_8
|
|
509
|
-
end
|
|
510
|
-
|
|
511
570
|
if c.nil? || (!@state_override && c == "#")
|
|
512
|
-
query_percent_encode_set = special_url
|
|
513
|
-
|
|
571
|
+
query_percent_encode_set = @special_url ? SPECIAL_QUERY_PERCENT_ENCODE_SET : QUERY_PERCENT_ENCODE_SET
|
|
572
|
+
# TODO: We need to consider encoding here.
|
|
573
|
+
@parse_result[:query] = utf8_percent_encode_string(@buffer, query_percent_encode_set)
|
|
514
574
|
@buffer.clear
|
|
515
|
-
|
|
575
|
+
if c == "#"
|
|
576
|
+
@parse_result[:fragment] = +""
|
|
577
|
+
@state = :fragment_state
|
|
578
|
+
end
|
|
516
579
|
elsif !c.nil?
|
|
517
580
|
@buffer << c
|
|
518
581
|
end
|
|
@@ -520,7 +583,7 @@ module URI
|
|
|
520
583
|
|
|
521
584
|
def fragment_state(c)
|
|
522
585
|
return if c.nil?
|
|
523
|
-
@parse_result[:fragment]
|
|
586
|
+
(@parse_result[:fragment] ||= +"") << utf8_percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET)
|
|
524
587
|
end
|
|
525
588
|
|
|
526
589
|
def windows_drive_letter?(str)
|
|
@@ -548,17 +611,17 @@ module URI
|
|
|
548
611
|
end
|
|
549
612
|
|
|
550
613
|
def shorten_url_path
|
|
551
|
-
return if @
|
|
552
|
-
return if @parse_result[:scheme] == "file" && @
|
|
553
|
-
@
|
|
614
|
+
return if @path.nil? || @path.is_a?(String)
|
|
615
|
+
return if @parse_result[:scheme] == "file" && @path.length == 1 && normalized_windows_drive_letter?(@path.first)
|
|
616
|
+
@path.pop
|
|
554
617
|
end
|
|
555
618
|
|
|
556
619
|
def includes_credentials?
|
|
557
|
-
|
|
620
|
+
(@username && !@username.empty?) || (@password && !@password.empty?)
|
|
558
621
|
end
|
|
559
622
|
|
|
560
623
|
def rest
|
|
561
|
-
@
|
|
624
|
+
@input_chars[@pos + 1..]&.join
|
|
562
625
|
end
|
|
563
626
|
|
|
564
627
|
def convert_to_uri(uri)
|
|
@@ -571,6 +634,25 @@ module URI
|
|
|
571
634
|
"bad argument (expected URI object or URI string)"
|
|
572
635
|
end
|
|
573
636
|
end
|
|
637
|
+
|
|
638
|
+
if RUBY_VERSION >= "4.0"
|
|
639
|
+
def remove_c0_control_or_space!(str)
|
|
640
|
+
if /[\u0000-\u0020]/.match?(str)
|
|
641
|
+
str.strip!("\u0000-\u0020")
|
|
642
|
+
end
|
|
643
|
+
end
|
|
644
|
+
else
|
|
645
|
+
def remove_c0_control_or_space!(str)
|
|
646
|
+
if /[\u0000-\u0020]/.match?(str)
|
|
647
|
+
str.sub!(/\A[\u0000-\u0020]*/, "")
|
|
648
|
+
str.sub!(/[\u0000-\u0020]*\z/, "")
|
|
649
|
+
end
|
|
650
|
+
end
|
|
651
|
+
end
|
|
652
|
+
|
|
653
|
+
def has_opaque_path?(path)
|
|
654
|
+
path.is_a?(String)
|
|
655
|
+
end
|
|
574
656
|
end
|
|
575
657
|
|
|
576
658
|
WHATWG_PARSER = URI::WhatwgParser.new
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: uri-whatwg_parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Yuji Yaginuma
|
|
@@ -37,20 +37,6 @@ dependencies:
|
|
|
37
37
|
- - ">="
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
39
|
version: '0'
|
|
40
|
-
- !ruby/object:Gem::Dependency
|
|
41
|
-
name: debug
|
|
42
|
-
requirement: !ruby/object:Gem::Requirement
|
|
43
|
-
requirements:
|
|
44
|
-
- - ">="
|
|
45
|
-
- !ruby/object:Gem::Version
|
|
46
|
-
version: '0'
|
|
47
|
-
type: :development
|
|
48
|
-
prerelease: false
|
|
49
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
-
requirements:
|
|
51
|
-
- - ">="
|
|
52
|
-
- !ruby/object:Gem::Version
|
|
53
|
-
version: '0'
|
|
54
40
|
email:
|
|
55
41
|
- yuuji.yaginuma@gmail.com
|
|
56
42
|
executables: []
|
|
@@ -80,14 +66,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
80
66
|
requirements:
|
|
81
67
|
- - ">="
|
|
82
68
|
- !ruby/object:Gem::Version
|
|
83
|
-
version: 3.
|
|
69
|
+
version: 3.0.0
|
|
84
70
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
85
71
|
requirements:
|
|
86
72
|
- - ">="
|
|
87
73
|
- !ruby/object:Gem::Version
|
|
88
74
|
version: '0'
|
|
89
75
|
requirements: []
|
|
90
|
-
rubygems_version: 4.0.
|
|
76
|
+
rubygems_version: 4.0.10
|
|
91
77
|
specification_version: 4
|
|
92
78
|
summary: Ruby implementation of the WHATWG URL Living Standard
|
|
93
79
|
test_files: []
|