uri_pattern 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +10 -0
- data/LICENSE.txt +21 -0
- data/README.md +109 -0
- data/Rakefile +11 -0
- data/lib/uri_pattern/canonicalization.rb +76 -0
- data/lib/uri_pattern/compiler.rb +380 -0
- data/lib/uri_pattern/component_pattern.rb +42 -0
- data/lib/uri_pattern/match_result.rb +25 -0
- data/lib/uri_pattern/pattern_string.rb +327 -0
- data/lib/uri_pattern/tokenizer.rb +170 -0
- data/lib/uri_pattern/url_parser.rb +487 -0
- data/lib/uri_pattern/version.rb +5 -0
- data/lib/uri_pattern.rb +378 -0
- metadata +68 -0
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "uri"
|
|
4
|
+
require "uri/whatwg_parser"
|
|
5
|
+
|
|
6
|
+
class URIPattern
|
|
7
|
+
module URLParser
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def split_components(url, base_url: nil)
|
|
11
|
+
url = resolve(url, base_url) if base_url && !url.empty?
|
|
12
|
+
parsed = URI::WhatwgParser.new.split(url)
|
|
13
|
+
userinfo = parsed[WHATWG_USERINFO] || ""
|
|
14
|
+
user, pass = userinfo.include?(":") ? userinfo.split(":", 2) : [userinfo, nil]
|
|
15
|
+
{
|
|
16
|
+
protocol: parsed[WHATWG_SCHEME] || "",
|
|
17
|
+
username: user || "",
|
|
18
|
+
password: pass || "",
|
|
19
|
+
hostname: parsed[WHATWG_HOST] || "",
|
|
20
|
+
port: parsed[WHATWG_PORT] ? parsed[WHATWG_PORT].to_s : "",
|
|
21
|
+
pathname: parsed[WHATWG_PATH] || parsed[WHATWG_OPAQUE_PATH] || "",
|
|
22
|
+
query: parsed[WHATWG_QUERY] || "",
|
|
23
|
+
fragment: parsed[WHATWG_FRAGMENT] || ""
|
|
24
|
+
}
|
|
25
|
+
rescue URIPattern::Error
|
|
26
|
+
raise
|
|
27
|
+
rescue => e
|
|
28
|
+
raise URIPattern::Error, "Failed to parse URL #{url.inspect}: #{e.message}"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def resolve(relative, base_url)
|
|
32
|
+
URI::WhatwgParser.new.parse(relative, base: base_url).to_s
|
|
33
|
+
rescue => e
|
|
34
|
+
raise URIPattern::Error, "Failed to resolve URL: #{e.message}"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Parse a constructor string into its eight pattern components, following the
|
|
38
|
+
# WHATWG URLPattern "parse a constructor string" algorithm:
|
|
39
|
+
# https://urlpattern.spec.whatwg.org/#constructor-string-parsing
|
|
40
|
+
#
|
|
41
|
+
# Returns a hash keyed by the eight component symbols. A component that does not
|
|
42
|
+
# appear in the input is left as nil so that defaults can be applied downstream.
|
|
43
|
+
def split_pattern(pattern)
|
|
44
|
+
tokens = URIPattern::Tokenizer.new(pattern, policy: :lenient).tokenize
|
|
45
|
+
raw = ConstructorStringParser.new(pattern, tokens).parse
|
|
46
|
+
{
|
|
47
|
+
protocol: raw[:protocol],
|
|
48
|
+
username: raw[:username],
|
|
49
|
+
password: raw[:password],
|
|
50
|
+
hostname: raw[:hostname],
|
|
51
|
+
port: raw[:port],
|
|
52
|
+
pathname: raw[:pathname],
|
|
53
|
+
query: raw[:search],
|
|
54
|
+
fragment: raw[:hash]
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Indices in the array returned by URI::WhatwgParser#split:
|
|
59
|
+
# [scheme, userinfo, host, port, nil, path, opaque_path, query, fragment]
|
|
60
|
+
WHATWG_SCHEME = 0
|
|
61
|
+
WHATWG_USERINFO = 1
|
|
62
|
+
WHATWG_HOST = 2
|
|
63
|
+
WHATWG_PORT = 3
|
|
64
|
+
WHATWG_PATH = 5
|
|
65
|
+
WHATWG_OPAQUE_PATH = 6
|
|
66
|
+
WHATWG_QUERY = 7
|
|
67
|
+
WHATWG_FRAGMENT = 8
|
|
68
|
+
|
|
69
|
+
DEFAULT_PORTS = {
|
|
70
|
+
"http" => 80,
|
|
71
|
+
"https" => 443,
|
|
72
|
+
"ws" => 80,
|
|
73
|
+
"wss" => 443,
|
|
74
|
+
"ftp" => 21
|
|
75
|
+
}.freeze
|
|
76
|
+
|
|
77
|
+
# Normalize a port string for use as a match input component.
|
|
78
|
+
# Strips tabs, takes leading numeric digits, and suppresses the default port.
|
|
79
|
+
# Returns nil if the port string has no leading digits (parse failure).
|
|
80
|
+
def normalize_port_input(port_str, protocol = "")
|
|
81
|
+
port = port_str.to_s.gsub(/[\t\f]/, "")
|
|
82
|
+
digits = port.match(/\A\d*/)[0]
|
|
83
|
+
return nil if digits.empty? && !port.empty?
|
|
84
|
+
return nil if digits.length > 0 && digits.to_i > 65535
|
|
85
|
+
default = DEFAULT_PORTS[protocol.to_s.downcase]
|
|
86
|
+
default && default.to_s == digits ? "" : digits
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
SPECIAL_SCHEMES_SET = Set.new(%w[http https ws wss ftp file]).freeze
|
|
90
|
+
|
|
91
|
+
# Normalize a hostname: IDN, and strip CR/LF/tab.
|
|
92
|
+
def normalize_hostname_input(hostname)
|
|
93
|
+
return "" if hostname.nil? || hostname.empty?
|
|
94
|
+
h = hostname.gsub(/[\r\n\t]/, "")
|
|
95
|
+
return "" if h.empty?
|
|
96
|
+
URI::WhatwgParser.new.split("https://#{h}/")[WHATWG_HOST] || h
|
|
97
|
+
rescue
|
|
98
|
+
h
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Normalize a hash input through WHATWG URL rules for each component.
|
|
102
|
+
# Returns nil if a required component fails normalization.
|
|
103
|
+
def normalize_hash_input(hash)
|
|
104
|
+
protocol = hash[:protocol].to_s.downcase
|
|
105
|
+
# Opaque path: non-special scheme, no username/password/hostname/port set
|
|
106
|
+
opaque_path = !protocol.empty? && !SPECIAL_SCHEMES_SET.include?(protocol) &&
|
|
107
|
+
(hash[:hostname].nil? || hash[:hostname].to_s.empty?) &&
|
|
108
|
+
(hash[:username].nil? || hash[:username].to_s.empty?) &&
|
|
109
|
+
(hash[:password].nil? || hash[:password].to_s.empty?) &&
|
|
110
|
+
(hash[:port].nil? || hash[:port].to_s.empty?)
|
|
111
|
+
result = {}
|
|
112
|
+
hash.each do |k, v|
|
|
113
|
+
result[k] = case k
|
|
114
|
+
when :protocol
|
|
115
|
+
norm = canonicalize_protocol_input(v.to_s)
|
|
116
|
+
return nil if norm.nil?
|
|
117
|
+
norm
|
|
118
|
+
when :port
|
|
119
|
+
norm = normalize_port_input(v.to_s, protocol)
|
|
120
|
+
return nil if norm.nil?
|
|
121
|
+
norm
|
|
122
|
+
when :pathname
|
|
123
|
+
canonicalize_pathname_run(v.to_s, opaque_path: opaque_path)
|
|
124
|
+
when :hostname
|
|
125
|
+
normalize_hostname_input(v.to_s)
|
|
126
|
+
when :username
|
|
127
|
+
canonicalize_username_run(v.to_s)
|
|
128
|
+
when :password
|
|
129
|
+
canonicalize_password_run(v.to_s)
|
|
130
|
+
when :query
|
|
131
|
+
canonicalize_search_run(v.to_s)
|
|
132
|
+
when :fragment
|
|
133
|
+
canonicalize_hash_run(v.to_s)
|
|
134
|
+
else
|
|
135
|
+
v.to_s
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
result
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# "canonicalize a protocol" on a match input: a scheme is ASCII, starts with a
|
|
142
|
+
# letter, and contains only letters, digits, "+", "-" and ".". A value with any
|
|
143
|
+
# other code point (e.g. "café") cannot be a protocol, so matching fails.
|
|
144
|
+
def canonicalize_protocol_input(value)
|
|
145
|
+
return "" if value.empty?
|
|
146
|
+
return nil unless value.match?(/\A[a-zA-Z][a-zA-Z0-9+.\-]*\z/)
|
|
147
|
+
value.downcase
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# --- "dummy URL" canonicalization of a fixed pattern run --------------------
|
|
151
|
+
#
|
|
152
|
+
# The WHATWG URLPattern spec canonicalizes each fixed-text part of a pattern by
|
|
153
|
+
# running it through a throwaway ("dummy") URL, so the URL parser applies the
|
|
154
|
+
# exact spec percent-encode set and (for pathname) dot-segment handling. We
|
|
155
|
+
# delegate here instead of maintaining encode-set tables by hand, which both
|
|
156
|
+
# simplifies the code and tracks the spec precisely.
|
|
157
|
+
#
|
|
158
|
+
# DUMMY_URL is the spec's "create a dummy URL" input verbatim
|
|
159
|
+
# (https://urlpattern.spec.whatwg.org/ — "Let dummyInput be `https://dummy.invalid/`").
|
|
160
|
+
DUMMY_URL = "https://dummy.invalid/"
|
|
161
|
+
|
|
162
|
+
def dummy_url
|
|
163
|
+
URI::WhatwgParser.new.parse(DUMMY_URL)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# "canonicalize a search" / "...hash" / "...username" / "...password": the
|
|
167
|
+
# polyfill sets the corresponding URL component and reads it back. The
|
|
168
|
+
# uri-whatwg_parser setters run the basic URL parser with the matching state
|
|
169
|
+
# override and apply the spec encode sets (special-query for search, userinfo
|
|
170
|
+
# for username/password, etc.).
|
|
171
|
+
def canonicalize_search_run(run)
|
|
172
|
+
u = dummy_url
|
|
173
|
+
u.query = run
|
|
174
|
+
u.query.to_s
|
|
175
|
+
rescue => e
|
|
176
|
+
raise URIPattern::Error, "Invalid search #{run.inspect}: #{e.message}"
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def canonicalize_hash_run(run)
|
|
180
|
+
u = dummy_url
|
|
181
|
+
u.fragment = run
|
|
182
|
+
u.fragment.to_s
|
|
183
|
+
rescue => e
|
|
184
|
+
raise URIPattern::Error, "Invalid hash #{run.inspect}: #{e.message}"
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def canonicalize_username_run(run)
|
|
188
|
+
u = dummy_url
|
|
189
|
+
u.user = run
|
|
190
|
+
u.user.to_s
|
|
191
|
+
rescue => e
|
|
192
|
+
raise URIPattern::Error, "Invalid username #{run.inspect}: #{e.message}"
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def canonicalize_password_run(run)
|
|
196
|
+
u = dummy_url
|
|
197
|
+
u.password = run
|
|
198
|
+
u.password.to_s
|
|
199
|
+
rescue => e
|
|
200
|
+
raise URIPattern::Error, "Invalid password #{run.inspect}: #{e.message}"
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# "canonicalize a pathname" / "canonicalize an opaque pathname": run the fixed
|
|
204
|
+
# text through a dummy URL via full parsing (so "#"/"?" terminate the path and
|
|
205
|
+
# dot segments collapse, matching the polyfill). A non-opaque run that is not
|
|
206
|
+
# "/"-prefixed gets the spec's "/-" prefix trick so a leading "../" is preserved
|
|
207
|
+
# rather than collapsed against the root.
|
|
208
|
+
def canonicalize_pathname_run(run, opaque_path: false)
|
|
209
|
+
return run if run.empty?
|
|
210
|
+
if opaque_path
|
|
211
|
+
parsed = URI::WhatwgParser.new.split("data:#{run}")
|
|
212
|
+
(parsed[WHATWG_OPAQUE_PATH] || parsed[WHATWG_PATH]).to_s
|
|
213
|
+
else
|
|
214
|
+
lead = run.start_with?("/")
|
|
215
|
+
modified = lead ? run : "/-#{run}"
|
|
216
|
+
# Append the run as the dummy URL's path. The run supplies its own leading
|
|
217
|
+
# "/", so drop DUMMY_URL's trailing slash before joining. Parsing the whole
|
|
218
|
+
# URL (rather than resolving the run against DUMMY_URL as a base) keeps a
|
|
219
|
+
# leading "//" a path instead of an authority, and lets "#"/"?" terminate.
|
|
220
|
+
parsed = URI::WhatwgParser.new.split(DUMMY_URL.chomp("/") + modified)
|
|
221
|
+
pathname = parsed[WHATWG_PATH].to_s
|
|
222
|
+
lead ? pathname : pathname.sub(%r{\A/-}, "")
|
|
223
|
+
end
|
|
224
|
+
rescue => e
|
|
225
|
+
raise URIPattern::Error, "Invalid pathname #{run.inspect}: #{e.message}"
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Implements the WHATWG URLPattern "constructor string parser" state machine.
|
|
230
|
+
# https://urlpattern.spec.whatwg.org/#constructor-string-parsing
|
|
231
|
+
#
|
|
232
|
+
# Walks the (regexp-coalesced) token list with a state machine, recording each
|
|
233
|
+
# component into `result` as it is delimited. Component keys use the spec names
|
|
234
|
+
# (`:search` / `:hash`); URLParser.split_pattern maps them to `:query` / `:fragment`.
|
|
235
|
+
class ConstructorStringParser
|
|
236
|
+
NON_SPECIAL_CHAR_TYPES = %i[char escaped_char invalid_char].freeze
|
|
237
|
+
SEARCH_PREFIX_BLOCKERS = %i[name regexp close asterisk].freeze
|
|
238
|
+
|
|
239
|
+
def initialize(input, tokens)
|
|
240
|
+
@input = input
|
|
241
|
+
@tokens = tokens
|
|
242
|
+
@result = {}
|
|
243
|
+
@component_start = 0
|
|
244
|
+
@token_index = 0
|
|
245
|
+
@token_increment = 1
|
|
246
|
+
@group_depth = 0
|
|
247
|
+
@ipv6_depth = 0
|
|
248
|
+
@protocol_special = false
|
|
249
|
+
@state = :init
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def parse
|
|
253
|
+
while @token_index < @tokens.length
|
|
254
|
+
@token_increment = 1
|
|
255
|
+
|
|
256
|
+
if current.type == :end
|
|
257
|
+
case @state
|
|
258
|
+
when :init
|
|
259
|
+
rewind
|
|
260
|
+
if hash_prefix?
|
|
261
|
+
change_state(:hash, 1)
|
|
262
|
+
elsif search_prefix?
|
|
263
|
+
change_state(:search, 1)
|
|
264
|
+
else
|
|
265
|
+
change_state(:pathname, 0)
|
|
266
|
+
end
|
|
267
|
+
@token_index += @token_increment
|
|
268
|
+
next
|
|
269
|
+
when :authority
|
|
270
|
+
rewind_and_set_state(:hostname)
|
|
271
|
+
@token_index += @token_increment
|
|
272
|
+
next
|
|
273
|
+
else
|
|
274
|
+
change_state(:done, 0)
|
|
275
|
+
break
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
if group_open?
|
|
280
|
+
@group_depth += 1
|
|
281
|
+
@token_index += @token_increment
|
|
282
|
+
next
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
if @group_depth.positive?
|
|
286
|
+
if group_close?
|
|
287
|
+
@group_depth -= 1
|
|
288
|
+
else
|
|
289
|
+
@token_index += @token_increment
|
|
290
|
+
next
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
step_state
|
|
295
|
+
|
|
296
|
+
@token_index += @token_increment
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
@result[:port] = "" if @result.key?(:hostname) && !@result.key?(:port)
|
|
300
|
+
@result
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
private
|
|
304
|
+
|
|
305
|
+
def step_state
|
|
306
|
+
case @state
|
|
307
|
+
when :init
|
|
308
|
+
rewind_and_set_state(:protocol) if protocol_suffix?
|
|
309
|
+
when :protocol
|
|
310
|
+
step_protocol
|
|
311
|
+
when :authority
|
|
312
|
+
if identity_terminator?
|
|
313
|
+
rewind_and_set_state(:username)
|
|
314
|
+
elsif pathname_start? || search_prefix? || hash_prefix?
|
|
315
|
+
rewind_and_set_state(:hostname)
|
|
316
|
+
end
|
|
317
|
+
when :username
|
|
318
|
+
if password_prefix?
|
|
319
|
+
change_state(:password, 1)
|
|
320
|
+
elsif identity_terminator?
|
|
321
|
+
change_state(:hostname, 1)
|
|
322
|
+
end
|
|
323
|
+
when :password
|
|
324
|
+
change_state(:hostname, 1) if identity_terminator?
|
|
325
|
+
when :hostname
|
|
326
|
+
step_hostname
|
|
327
|
+
when :port
|
|
328
|
+
step_port_or_pathname
|
|
329
|
+
when :pathname
|
|
330
|
+
if search_prefix?
|
|
331
|
+
change_state(:search, 1)
|
|
332
|
+
elsif hash_prefix?
|
|
333
|
+
change_state(:hash, 1)
|
|
334
|
+
end
|
|
335
|
+
when :search
|
|
336
|
+
change_state(:hash, 1) if hash_prefix?
|
|
337
|
+
when :hash
|
|
338
|
+
# nothing to do
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
def step_protocol
|
|
343
|
+
return unless protocol_suffix?
|
|
344
|
+
|
|
345
|
+
compute_protocol_matches_special_scheme
|
|
346
|
+
next_state = :pathname
|
|
347
|
+
skip = 1
|
|
348
|
+
if next_is_authority_slashes?
|
|
349
|
+
next_state = :authority
|
|
350
|
+
skip = 3
|
|
351
|
+
elsif @protocol_special
|
|
352
|
+
next_state = :authority
|
|
353
|
+
end
|
|
354
|
+
change_state(next_state, skip)
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
def step_hostname
|
|
358
|
+
if ipv6_open?
|
|
359
|
+
@ipv6_depth += 1
|
|
360
|
+
elsif ipv6_close?
|
|
361
|
+
@ipv6_depth -= 1
|
|
362
|
+
elsif port_prefix? && @ipv6_depth.zero?
|
|
363
|
+
change_state(:port, 1)
|
|
364
|
+
else
|
|
365
|
+
step_port_or_pathname
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
def step_port_or_pathname
|
|
370
|
+
if pathname_start?
|
|
371
|
+
change_state(:pathname, 0)
|
|
372
|
+
elsif search_prefix?
|
|
373
|
+
change_state(:search, 1)
|
|
374
|
+
elsif hash_prefix?
|
|
375
|
+
change_state(:hash, 1)
|
|
376
|
+
end
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
def current
|
|
380
|
+
@tokens[@token_index]
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
# "get a safe token": out-of-range indices resolve to the trailing :end token.
|
|
384
|
+
def safe_token(index)
|
|
385
|
+
return @tokens[index] if index < @tokens.length
|
|
386
|
+
@tokens[@tokens.length - 1]
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
def non_special_pattern_char?(index, value)
|
|
390
|
+
token = safe_token(index)
|
|
391
|
+
return false unless token.value == value
|
|
392
|
+
NON_SPECIAL_CHAR_TYPES.include?(token.type)
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
def protocol_suffix? = non_special_pattern_char?(@token_index, ":")
|
|
396
|
+
def identity_terminator? = non_special_pattern_char?(@token_index, "@")
|
|
397
|
+
def password_prefix? = non_special_pattern_char?(@token_index, ":")
|
|
398
|
+
def port_prefix? = non_special_pattern_char?(@token_index, ":")
|
|
399
|
+
def pathname_start? = non_special_pattern_char?(@token_index, "/")
|
|
400
|
+
def hash_prefix? = non_special_pattern_char?(@token_index, "#")
|
|
401
|
+
def ipv6_open? = non_special_pattern_char?(@token_index, "[")
|
|
402
|
+
def ipv6_close? = non_special_pattern_char?(@token_index, "]")
|
|
403
|
+
def group_open? = current.type == :open
|
|
404
|
+
def group_close? = current.type == :close
|
|
405
|
+
|
|
406
|
+
def search_prefix?
|
|
407
|
+
return true if non_special_pattern_char?(@token_index, "?")
|
|
408
|
+
return false unless current.value == "?"
|
|
409
|
+
|
|
410
|
+
previous_index = @token_index - 1
|
|
411
|
+
return true if previous_index.negative?
|
|
412
|
+
|
|
413
|
+
!SEARCH_PREFIX_BLOCKERS.include?(safe_token(previous_index).type)
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
def next_is_authority_slashes?
|
|
417
|
+
non_special_pattern_char?(@token_index + 1, "/") &&
|
|
418
|
+
non_special_pattern_char?(@token_index + 2, "/")
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
def change_state(new_state, skip)
|
|
422
|
+
unless %i[init authority done].include?(@state)
|
|
423
|
+
@result[@state] = make_component_string
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
apply_implicit_defaults(new_state) if @state != :init && new_state != :done
|
|
427
|
+
|
|
428
|
+
change_state_without_setting_component(new_state, skip)
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
# Advance to +new_state+, skipping +skip+ tokens and marking the new component's
|
|
432
|
+
# start, without finalizing the current component or applying defaults. Mirrors
|
|
433
|
+
# the spec/polyfill "change state without setting component" helper.
|
|
434
|
+
def change_state_without_setting_component(new_state, skip)
|
|
435
|
+
@state = new_state
|
|
436
|
+
@token_index += skip
|
|
437
|
+
@component_start = @token_index
|
|
438
|
+
@token_increment = 0
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# When a transition skips over earlier components, those components still need a
|
|
442
|
+
# value. Per the spec's constructor-string parser, jumping from an authority-side
|
|
443
|
+
# state straight to a later one fills the skipped slots with their defaults
|
|
444
|
+
# (empty, or "/" for a special-scheme pathname). Driven by @state -> new_state.
|
|
445
|
+
def apply_implicit_defaults(new_state)
|
|
446
|
+
if %i[protocol authority username password].include?(@state) &&
|
|
447
|
+
%i[port pathname search hash].include?(new_state) &&
|
|
448
|
+
!@result.key?(:hostname)
|
|
449
|
+
@result[:hostname] = ""
|
|
450
|
+
end
|
|
451
|
+
if %i[protocol authority username password hostname port].include?(@state) &&
|
|
452
|
+
%i[search hash].include?(new_state) &&
|
|
453
|
+
!@result.key?(:pathname)
|
|
454
|
+
@result[:pathname] = @protocol_special ? "/" : ""
|
|
455
|
+
end
|
|
456
|
+
if %i[protocol authority username password hostname port pathname].include?(@state) &&
|
|
457
|
+
new_state == :hash &&
|
|
458
|
+
!@result.key?(:search)
|
|
459
|
+
@result[:search] = ""
|
|
460
|
+
end
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
def rewind
|
|
464
|
+
@token_index = @component_start
|
|
465
|
+
@token_increment = 0
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
def rewind_and_set_state(new_state)
|
|
469
|
+
rewind
|
|
470
|
+
@state = new_state
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
def make_component_string
|
|
474
|
+
token = @tokens[@token_index]
|
|
475
|
+
start_token = safe_token(@component_start)
|
|
476
|
+
@input[start_token.index...token.index]
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
def compute_protocol_matches_special_scheme
|
|
480
|
+
protocol_string = make_component_string
|
|
481
|
+
compiled = URIPattern::ComponentPattern.new(protocol_string, component: :protocol)
|
|
482
|
+
@protocol_special = URLParser::SPECIAL_SCHEMES_SET.any? { |scheme| compiled.match(scheme) }
|
|
483
|
+
rescue URIPattern::Error
|
|
484
|
+
@protocol_special = false
|
|
485
|
+
end
|
|
486
|
+
end
|
|
487
|
+
end
|