uri-whatwg_parser 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +2 -1
- data/lib/uri/whatwg_parser/host_parser.rb +2 -2
- data/lib/uri/whatwg_parser/parser_helper.rb +11 -7
- data/lib/uri/whatwg_parser/version.rb +1 -1
- data/lib/uri/whatwg_parser.rb +108 -133
- metadata +5 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2061d819a10365a97f389d9cb70325c87b75c390c4145897ed640c3e4cc85f70
|
4
|
+
data.tar.gz: 905ec3bdd174039fc91ce433f84a079fe12ff93f9ac999f420b08760b3fcee53
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fc4a0d83fb46e959feb1f7990fa1f119428764447c6d4d455ec5287d5823b19eec845ef4b8912ceeffd6233266b3142859a837c7a26e47e20315d64bb5660006
|
7
|
+
data.tar.gz: bd36808311ae2330cd897ba824219d82cff0497d75531441b3679def9ff5ea1969297ccfcbb8cd02b83909a2f7a5c0de7f0d505bb79e1117494544dd3d4b3cfa
|
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -17,6 +17,8 @@ gem install uri-whatwg_parser
|
|
17
17
|
This gem is compatible with [`uri`](https://github.com/ruby/uri) gem and automatically switches parser's behavior. So users don't need to set up.
|
18
18
|
|
19
19
|
```ruby
|
20
|
+
require "uri/whatwg_parser"
|
21
|
+
|
20
22
|
URI.parse("http://日本語.jp")
|
21
23
|
# => #<URI::HTTP http://xn--wgv71a119e.jp>
|
22
24
|
```
|
@@ -31,7 +33,6 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
31
33
|
|
32
34
|
* Support state override
|
33
35
|
* Support validations
|
34
|
-
* Support encodings other than UTF-8
|
35
36
|
|
36
37
|
## Contributing
|
37
38
|
|
@@ -156,11 +156,11 @@ class URI::WhatwgParser
|
|
156
156
|
end
|
157
157
|
|
158
158
|
def include_forbidden_domain_code_point?(str)
|
159
|
-
str.
|
159
|
+
FORBIDDEN_DOMAIN_CODE_POINT.any? {|c| str.include?(c) }
|
160
160
|
end
|
161
161
|
|
162
162
|
def include_forbidden_host_code_point?(str)
|
163
|
-
str.
|
163
|
+
FORBIDDEN_HOST_CODE_POINT.any? {|c| str.include?(c) }
|
164
164
|
end
|
165
165
|
end
|
166
166
|
end
|
@@ -2,8 +2,9 @@
|
|
2
2
|
|
3
3
|
class URI::WhatwgParser
|
4
4
|
module ParserHelper
|
5
|
-
|
6
|
-
|
5
|
+
C0_CONTROL_PERCENT_ENCODE_SET = (0..0x1f).map(&:chr)
|
6
|
+
ASCII_ALPHA = ("a".."z").to_a + ("A".."Z").to_a
|
7
|
+
ASCII_DIGIT = ("0".."9").to_a
|
7
8
|
|
8
9
|
def ascii_alpha?(c)
|
9
10
|
ASCII_ALPHA.include?(c)
|
@@ -17,11 +18,14 @@ class URI::WhatwgParser
|
|
17
18
|
ASCII_DIGIT.include?(c)
|
18
19
|
end
|
19
20
|
|
20
|
-
def percent_encode(c, encode_set)
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
c
|
21
|
+
def percent_encode(c, encode_set, encoding = Encoding::UTF_8)
|
22
|
+
return c unless encode_set.include?(c) || c.ord > 0x7e
|
23
|
+
|
24
|
+
# For ASCII single-byte characters
|
25
|
+
return "%%%02X" % c.ord if c.bytesize == 1
|
26
|
+
|
27
|
+
bytes = c.encoding == encoding ? c.bytes : c.encode(encoding).bytes
|
28
|
+
bytes.map { |b| "%%%02X" % b }.join
|
25
29
|
end
|
26
30
|
end
|
27
31
|
end
|
data/lib/uri/whatwg_parser.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "strscan"
|
4
3
|
require "uri"
|
5
4
|
require_relative "whatwg_parser/error"
|
6
5
|
require_relative "whatwg_parser/version"
|
@@ -12,8 +11,6 @@ module URI
|
|
12
11
|
include ParserHelper
|
13
12
|
|
14
13
|
SPECIAL_SCHEME = { "ftp" => 21, "file" => nil, "http" => 80, "https" => 443, "ws" => 80, "wss" => 443 }
|
15
|
-
ASCII_ALPHA = ("a".."z").to_a + ("A".."Z").to_a
|
16
|
-
ASCII_DIGIT = ("0".."9").to_a
|
17
14
|
|
18
15
|
FRAGMENT_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET + [" ", "\"", "<", ">", "`"]
|
19
16
|
QUERY_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET + [" ", "\"", "#", "<", ">"]
|
@@ -37,45 +34,40 @@ module URI
|
|
37
34
|
{}
|
38
35
|
end
|
39
36
|
|
40
|
-
def parse(uri, base = nil) # :nodoc:
|
37
|
+
def parse(uri, base = nil, encoding = Encoding::UTF_8) # :nodoc:
|
41
38
|
reset
|
42
|
-
URI.for(*self.split(uri, base))
|
39
|
+
URI.for(*self.split(uri, base, encoding))
|
43
40
|
end
|
44
41
|
|
45
|
-
def split(uri, base = nil) # :nodoc:
|
42
|
+
def split(uri, base = nil, encoding = Encoding::UTF_8) # :nodoc:
|
46
43
|
@base = nil
|
47
44
|
if base != nil
|
48
|
-
ary = split(base)
|
45
|
+
ary = split(base, nil, encoding)
|
49
46
|
@base = { scheme: ary[0], userinfo: ary[1], host: ary[2], port: ary[3], registry: ary[4], path: ary[5], opaque: ary[6], query: ary[7], fragment: ary[8]}
|
50
47
|
@base_paths = @paths
|
51
48
|
reset
|
52
49
|
end
|
53
50
|
|
54
|
-
|
55
|
-
uri.
|
56
|
-
uri.
|
57
|
-
uri.
|
58
|
-
uri.delete!("\
|
59
|
-
uri.delete!("\
|
51
|
+
@encoding = encoding
|
52
|
+
@uri = uri.dup
|
53
|
+
@uri.sub!(/\A[\u0000-\u0020]*/, "")
|
54
|
+
@uri.sub!(/[\u0000-\u0020]*\z/, "")
|
55
|
+
@uri.delete!("\t")
|
56
|
+
@uri.delete!("\n")
|
57
|
+
@uri.delete!("\r")
|
60
58
|
|
61
59
|
raise ParseError, "uri can't be empty" if uri.empty? && @base.nil?
|
62
60
|
|
63
|
-
@
|
61
|
+
@pos = 0
|
64
62
|
|
65
|
-
|
66
|
-
c = @
|
67
|
-
send(
|
68
|
-
|
69
|
-
if @force_continue
|
70
|
-
@force_continue = false
|
71
|
-
next
|
72
|
-
end
|
73
|
-
|
74
|
-
break if c.nil? && @scanner.eos?
|
63
|
+
while @pos <= @uri.length
|
64
|
+
c = @uri[@pos]
|
65
|
+
send(@state, c)
|
66
|
+
@pos += 1
|
75
67
|
end
|
76
68
|
|
77
69
|
@parse_result[:userinfo] = "#{@username}:#{@password}" if !@username.nil? || !@password.nil?
|
78
|
-
@parse_result[:path] = "/#{@paths.join("/")}" if !@paths.empty?
|
70
|
+
@parse_result[:path] = "/#{@paths.join("/")}" if @paths && !@paths.empty?
|
79
71
|
|
80
72
|
@parse_result.values
|
81
73
|
end
|
@@ -95,12 +87,11 @@ module URI
|
|
95
87
|
private
|
96
88
|
|
97
89
|
def reset
|
98
|
-
@scanner = nil
|
99
90
|
@buffer = +""
|
100
91
|
@at_sign_seen = nil
|
101
92
|
@password_token_seen = nil
|
102
93
|
@inside_brackets = nil
|
103
|
-
@paths =
|
94
|
+
@paths = nil
|
104
95
|
@username = nil
|
105
96
|
@password = nil
|
106
97
|
@parse_result = { scheme: nil, userinfo: nil, host: nil, port: nil, registry: nil, path: nil, opaque: nil, query: nil, fragment: nil }
|
@@ -108,23 +99,19 @@ module URI
|
|
108
99
|
@state = :scheme_start_state
|
109
100
|
end
|
110
101
|
|
111
|
-
def
|
102
|
+
def scheme_start_state(c)
|
112
103
|
if ascii_alpha?(c)
|
113
|
-
@buffer
|
104
|
+
@buffer << c.downcase
|
114
105
|
@state = :scheme_state
|
115
106
|
else
|
116
|
-
|
117
|
-
@force_continue = true
|
118
|
-
else
|
119
|
-
@scanner.pos -= c.bytesize
|
120
|
-
end
|
107
|
+
@pos -= 1
|
121
108
|
@state = :no_scheme_state
|
122
109
|
end
|
123
110
|
end
|
124
111
|
|
125
|
-
def
|
112
|
+
def scheme_state(c)
|
126
113
|
if ascii_alphanumerica?(c) || ["+", "-", "."].include?(c)
|
127
|
-
@buffer
|
114
|
+
@buffer << c.downcase
|
128
115
|
elsif c == ":"
|
129
116
|
@parse_result[:scheme] = @buffer
|
130
117
|
@buffer = +""
|
@@ -135,58 +122,58 @@ module URI
|
|
135
122
|
@state = :special_relative_or_authority_state
|
136
123
|
elsif special_url?
|
137
124
|
@state = :special_authority_slashes_state
|
138
|
-
elsif
|
125
|
+
elsif rest.start_with?("/")
|
139
126
|
@state = :path_or_authority_state
|
140
|
-
@
|
127
|
+
@pos += 1
|
141
128
|
else
|
142
|
-
@parse_result[:path] =
|
129
|
+
@parse_result[:path] = nil
|
143
130
|
@state = :opaque_path_state
|
144
131
|
end
|
145
132
|
else
|
146
|
-
@buffer
|
147
|
-
|
133
|
+
@buffer.clear
|
134
|
+
@pos -= 1
|
148
135
|
@state = :no_scheme_state
|
149
136
|
end
|
150
137
|
end
|
151
138
|
|
152
|
-
def
|
139
|
+
def no_scheme_state(c)
|
153
140
|
raise ParseError, "scheme is missing" if @base.nil? || !@base[:opaque].nil? && c != "#"
|
154
141
|
|
155
142
|
if !@base[:opaque].nil? && c == "#"
|
156
143
|
@parse_result[:scheme] = @base[:scheme]
|
157
144
|
@parse_result[:path] = @base[:path]
|
158
145
|
@parse_result[:query] = @base[:query]
|
159
|
-
@parse_result[:fragment] =
|
146
|
+
@parse_result[:fragment] = nil
|
160
147
|
@state = :fragment_state
|
161
148
|
elsif @base[:scheme] != "file"
|
162
149
|
@state = :relative_state
|
163
|
-
|
150
|
+
@pos -= 1
|
164
151
|
else
|
165
152
|
@state = :file_state
|
166
|
-
|
153
|
+
@pos -= 1
|
167
154
|
end
|
168
155
|
end
|
169
156
|
|
170
|
-
def
|
171
|
-
if c == "/" &&
|
157
|
+
def special_relative_or_authority_state(c)
|
158
|
+
if c == "/" && rest.start_with?("/")
|
172
159
|
@state = :special_authority_ignore_slashes_state
|
173
|
-
|
160
|
+
@pos -= 1
|
174
161
|
else
|
175
162
|
@state = :relative_state
|
176
|
-
|
163
|
+
@pos -= 1
|
177
164
|
end
|
178
165
|
end
|
179
166
|
|
180
|
-
def
|
167
|
+
def path_or_authority_state(c)
|
181
168
|
if c == "/"
|
182
169
|
@state = :authority_state
|
183
170
|
else
|
184
171
|
@state = :path_state
|
185
|
-
|
172
|
+
@pos -= 1
|
186
173
|
end
|
187
174
|
end
|
188
175
|
|
189
|
-
def
|
176
|
+
def relative_state(c)
|
190
177
|
@parse_result[:scheme] = @base[:scheme]
|
191
178
|
if c == "/"
|
192
179
|
@state = :relative_slash_state
|
@@ -200,21 +187,21 @@ module URI
|
|
200
187
|
@parse_result[:query] = @base[:query]
|
201
188
|
|
202
189
|
if c == "?"
|
203
|
-
@parse_result[:query] =
|
190
|
+
@parse_result[:query] = nil
|
204
191
|
@state = :query_state
|
205
192
|
elsif c == "#"
|
206
|
-
@parse_result[:fragment] =
|
193
|
+
@parse_result[:fragment] = nil
|
207
194
|
@state = :fragment_state
|
208
195
|
elsif !c.nil?
|
209
196
|
@parse_result[:query] = nil
|
210
197
|
shorten_url_path
|
211
198
|
@state = :path_state
|
212
|
-
@
|
199
|
+
@pos -= 1
|
213
200
|
end
|
214
201
|
end
|
215
202
|
end
|
216
203
|
|
217
|
-
def
|
204
|
+
def relative_slash_state(c)
|
218
205
|
if special_url? && (c == "/" || c == "\\")
|
219
206
|
@state = :special_authority_ignore_slashes_state
|
220
207
|
elsif c == "/"
|
@@ -224,28 +211,28 @@ module URI
|
|
224
211
|
@parse_result[:host] = @base[:host]
|
225
212
|
@parse_result[:port] = @base[:port]
|
226
213
|
@state = :path_state
|
227
|
-
|
214
|
+
@pos -= 1
|
228
215
|
end
|
229
216
|
end
|
230
217
|
|
231
|
-
def
|
232
|
-
if c == "/" &&
|
218
|
+
def special_authority_slashes_state(c)
|
219
|
+
if c == "/" && rest.start_with?("/")
|
233
220
|
@state = :special_authority_ignore_slashes_state
|
234
|
-
@
|
221
|
+
@pos += 1
|
235
222
|
else
|
236
223
|
@state = :special_authority_ignore_slashes_state
|
237
|
-
|
224
|
+
@pos -= 1
|
238
225
|
end
|
239
226
|
end
|
240
227
|
|
241
|
-
def
|
228
|
+
def special_authority_ignore_slashes_state(c)
|
242
229
|
if c != "/" && c != "\\"
|
243
230
|
@state = :authority_state
|
244
|
-
|
231
|
+
@pos -= 1
|
245
232
|
end
|
246
233
|
end
|
247
234
|
|
248
|
-
def
|
235
|
+
def authority_state(c)
|
249
236
|
if c == "@"
|
250
237
|
@buffer.prepend("%40") if @at_sign_seen
|
251
238
|
@at_sign_seen = true
|
@@ -255,7 +242,7 @@ module URI
|
|
255
242
|
next
|
256
243
|
end
|
257
244
|
|
258
|
-
encoded_char = percent_encode(char, USERINFO_PERCENT_ENCODE_SET)
|
245
|
+
encoded_char = percent_encode(char, USERINFO_PERCENT_ENCODE_SET, @encoding)
|
259
246
|
|
260
247
|
if @password_token_seen
|
261
248
|
@password = @password.to_s + encoded_char
|
@@ -264,48 +251,44 @@ module URI
|
|
264
251
|
end
|
265
252
|
end
|
266
253
|
|
267
|
-
@buffer
|
254
|
+
@buffer.clear
|
268
255
|
elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
|
269
256
|
raise ParseError, "host is missing" if @at_sign_seen && @buffer.empty?
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
else
|
274
|
-
@scanner.pos -= (@buffer.bytesize + c.bytesize.to_i)
|
275
|
-
end
|
276
|
-
@buffer = +""
|
257
|
+
|
258
|
+
@pos -= (@buffer.size + 1)
|
259
|
+
@buffer.clear
|
277
260
|
@state = :host_state
|
278
261
|
else
|
279
262
|
@buffer << c
|
280
263
|
end
|
281
264
|
end
|
282
265
|
|
283
|
-
def
|
266
|
+
def host_state(c)
|
284
267
|
if c == ":" && !@inside_brackets
|
285
268
|
raise ParseError, "host is missing" if @buffer.empty?
|
286
269
|
|
287
270
|
@parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
|
288
|
-
@buffer
|
271
|
+
@buffer.clear
|
289
272
|
@state = :port_state
|
290
273
|
elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
|
291
|
-
|
274
|
+
@pos -= 1
|
292
275
|
if special_url? && @buffer.empty?
|
293
276
|
raise ParseError, "host is missing"
|
294
277
|
else
|
295
278
|
@parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
|
296
|
-
@buffer
|
279
|
+
@buffer.clear
|
297
280
|
@state = :path_start_state
|
298
281
|
end
|
299
282
|
else
|
300
283
|
@inside_brackets = true if c == "["
|
301
284
|
@inside_brackets = false if c == "]"
|
302
|
-
@buffer
|
285
|
+
@buffer << c
|
303
286
|
end
|
304
287
|
end
|
305
288
|
|
306
|
-
def
|
289
|
+
def port_state(c)
|
307
290
|
if ascii_digit?(c)
|
308
|
-
@buffer
|
291
|
+
@buffer << c
|
309
292
|
elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
|
310
293
|
unless @buffer.empty?
|
311
294
|
begin
|
@@ -316,19 +299,19 @@ module URI
|
|
316
299
|
raise ParseError, "port is invalid value"
|
317
300
|
end
|
318
301
|
|
319
|
-
@buffer
|
302
|
+
@buffer.clear
|
320
303
|
end
|
321
304
|
|
322
305
|
@state = :path_start_state
|
323
|
-
|
306
|
+
@pos -= 1
|
324
307
|
else
|
325
308
|
raise ParseError, "port is invalid value"
|
326
309
|
end
|
327
310
|
end
|
328
311
|
|
329
|
-
def
|
312
|
+
def file_state(c)
|
330
313
|
@parse_result[:scheme] = "file"
|
331
|
-
@parse_result[:host] =
|
314
|
+
@parse_result[:host] = nil
|
332
315
|
|
333
316
|
if c == "/" || c == "\\"
|
334
317
|
@state = :file_slash_state
|
@@ -336,50 +319,50 @@ module URI
|
|
336
319
|
@parse_result[:host] = @base[:host]
|
337
320
|
@parse_result[:query] = @base[:query]
|
338
321
|
if c == "?"
|
339
|
-
@parse_result[:query] =
|
322
|
+
@parse_result[:query] = nil
|
340
323
|
@state = :query_state
|
341
324
|
elsif c == "#"
|
342
|
-
@parse_result[:fragment] =
|
325
|
+
@parse_result[:fragment] = nil
|
343
326
|
@state = :fragment_state
|
344
327
|
elsif !c.nil?
|
345
328
|
@parse_result[:query] = nil
|
346
|
-
if !starts_with_windows_drive_letter?(
|
329
|
+
if !starts_with_windows_drive_letter?(rest)
|
347
330
|
shorten_url_path
|
348
331
|
else
|
349
|
-
@paths =
|
332
|
+
@paths = nil
|
350
333
|
end
|
351
334
|
@state = :path_state
|
352
|
-
|
335
|
+
@pos -= 1
|
353
336
|
end
|
354
337
|
else
|
355
338
|
@state = :path_state
|
356
|
-
|
339
|
+
@pos -= 1
|
357
340
|
end
|
358
341
|
end
|
359
342
|
|
360
|
-
def
|
343
|
+
def file_slash_state(c)
|
361
344
|
if c == "/" || c == "\\"
|
362
345
|
@state = :file_host_state
|
363
346
|
else
|
364
347
|
if !@base.nil? && @base[:scheme] == "file"
|
365
348
|
@parse_result[:host] = @base[:host]
|
366
|
-
if !starts_with_windows_drive_letter?(
|
367
|
-
@paths[0]
|
349
|
+
if !starts_with_windows_drive_letter?(rest) && @base_paths && normalized_windows_drive_letter?(@base_paths[0])
|
350
|
+
@paths[0] << @base_paths[0]
|
368
351
|
end
|
369
352
|
end
|
370
353
|
@state = :path_state
|
371
|
-
|
354
|
+
@pos -= 1
|
372
355
|
end
|
373
356
|
end
|
374
357
|
|
375
|
-
def
|
358
|
+
def file_host_state(c)
|
376
359
|
if c.nil? || c == "/" || c == "\\" || c == "?" || c == "#"
|
377
|
-
@
|
360
|
+
@pos -= 1
|
378
361
|
|
379
362
|
if windows_drive_letter?(@buffer)
|
380
363
|
@state = :path_state
|
381
364
|
elsif @buffer.empty?
|
382
|
-
@parse_result[:host] =
|
365
|
+
@parse_result[:host] = nil
|
383
366
|
@state = :path_start_state
|
384
367
|
else
|
385
368
|
host = @host_parser.parse(@buffer, !special_url?)
|
@@ -387,31 +370,31 @@ module URI
|
|
387
370
|
@parse_result[:host] = host
|
388
371
|
end
|
389
372
|
|
390
|
-
@buffer
|
373
|
+
@buffer.clear
|
391
374
|
@state = :path_start_state
|
392
375
|
end
|
393
376
|
end
|
394
377
|
|
395
|
-
@buffer
|
378
|
+
@buffer << c unless c.nil?
|
396
379
|
end
|
397
380
|
|
398
|
-
def
|
399
|
-
return if c.nil?
|
400
|
-
|
381
|
+
def path_start_state(c)
|
401
382
|
if special_url?
|
402
|
-
@
|
383
|
+
@pos -= 1 if c != "/" && c != "\\"
|
403
384
|
@state = :path_state
|
404
385
|
elsif c == "?"
|
405
386
|
@state = :query_state
|
406
387
|
elsif c == "#"
|
407
388
|
@state = :fragment_state
|
408
389
|
elsif c != nil
|
409
|
-
@
|
390
|
+
@pos -= 1 if c != "/"
|
410
391
|
@state = :path_state
|
411
392
|
end
|
412
393
|
end
|
413
394
|
|
414
|
-
def
|
395
|
+
def path_state(c)
|
396
|
+
@paths ||= []
|
397
|
+
|
415
398
|
if (c.nil? || c == "/") || (special_url? && c == "\/") || (c == "?" || c == "#")
|
416
399
|
|
417
400
|
if double_dot_path_segments?(@buffer)
|
@@ -431,57 +414,53 @@ module URI
|
|
431
414
|
@buffer = +""
|
432
415
|
|
433
416
|
if c == "?"
|
434
|
-
@parse_result[:query] =
|
417
|
+
@parse_result[:query] = nil
|
435
418
|
@state = :query_state
|
436
419
|
elsif c == "#"
|
437
|
-
@parse_result[:frament] =
|
420
|
+
@parse_result[:frament] = nil
|
438
421
|
@state = :fragment_state
|
439
422
|
end
|
440
423
|
else
|
441
|
-
@buffer << percent_encode(c, PATH_PERCENT_ENCODE_SET)
|
424
|
+
@buffer << percent_encode(c, PATH_PERCENT_ENCODE_SET, @encoding)
|
442
425
|
end
|
443
426
|
end
|
444
427
|
|
445
|
-
def
|
428
|
+
def opaque_path_state(c)
|
446
429
|
if c == "?"
|
447
|
-
@parse_result[:query] =
|
430
|
+
@parse_result[:query] = nil
|
448
431
|
@state = :query_state
|
449
432
|
elsif c == "#"
|
450
|
-
@parse_result[:fragment] =
|
433
|
+
@parse_result[:fragment] = nil
|
451
434
|
@state = :fragment_state
|
452
435
|
elsif c == " "
|
453
|
-
if
|
436
|
+
if rest.start_with?("?") || rest.start_with?("#")
|
454
437
|
@parse_result[:path] = @parse_result[:path].to_s + "%20"
|
455
438
|
else
|
456
439
|
@parse_result[:path] = @parse_result[:path].to_s + " "
|
457
440
|
end
|
458
441
|
elsif !c.nil?
|
459
|
-
@parse_result[:path] = @parse_result[:path].to_s + percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET)
|
442
|
+
@parse_result[:path] = @parse_result[:path].to_s + percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET, @encoding)
|
460
443
|
end
|
461
444
|
end
|
462
445
|
|
463
|
-
def
|
446
|
+
def query_state(c)
|
447
|
+
if @encoding != Encoding::UTF_8 && (!special_url? || %w[ws wss].include?(@parse_result[:scheme]))
|
448
|
+
@encoding = Encoding::UTF_8
|
449
|
+
end
|
450
|
+
|
464
451
|
if c.nil? || c == "#"
|
465
452
|
query_percent_encode_set = special_url? ? SPECIAL_QUERY_PERCENT_ENCODE_SET : QUERY_PERCENT_ENCODE_SET
|
466
|
-
@parse_result[:query] = @buffer.chars.map { |c| percent_encode(c, query_percent_encode_set) }.join
|
467
|
-
@buffer
|
453
|
+
@parse_result[:query] = @buffer.chars.map { |c| percent_encode(c, query_percent_encode_set, @encoding) }.join
|
454
|
+
@buffer.clear
|
468
455
|
@state = :fragment_state if c == "#"
|
469
456
|
elsif !c.nil?
|
470
457
|
@buffer << c
|
471
458
|
end
|
472
459
|
end
|
473
460
|
|
474
|
-
def
|
461
|
+
def fragment_state(c)
|
475
462
|
return if c.nil?
|
476
|
-
@parse_result[:fragment] = @parse_result[:fragment].to_s + percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET)
|
477
|
-
end
|
478
|
-
|
479
|
-
def c0_control_or_space?(c)
|
480
|
-
c0_control? || c == " "
|
481
|
-
end
|
482
|
-
|
483
|
-
def c0_control?(c)
|
484
|
-
C0_CONTROL.include?(c.ord)
|
463
|
+
@parse_result[:fragment] = @parse_result[:fragment].to_s + percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET, @encoding)
|
485
464
|
end
|
486
465
|
|
487
466
|
def windows_drive_letter?(str)
|
@@ -497,7 +476,7 @@ module URI
|
|
497
476
|
end
|
498
477
|
|
499
478
|
def special_url?
|
500
|
-
SPECIAL_SCHEME.
|
479
|
+
SPECIAL_SCHEME.key?(@parse_result[:scheme])
|
501
480
|
end
|
502
481
|
|
503
482
|
def single_dot_path_segments?(c)
|
@@ -515,12 +494,8 @@ module URI
|
|
515
494
|
@parse_result[:path]&.chomp!
|
516
495
|
end
|
517
496
|
|
518
|
-
def
|
519
|
-
|
520
|
-
@force_continue = true
|
521
|
-
else
|
522
|
-
@scanner.pos -= c.bytesize
|
523
|
-
end
|
497
|
+
def rest
|
498
|
+
@uri[@pos+1..]
|
524
499
|
end
|
525
500
|
end
|
526
501
|
end
|
metadata
CHANGED
@@ -1,42 +1,28 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uri-whatwg_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yuji Yaginuma
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-06-12 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
|
-
- !ruby/object:Gem::Dependency
|
13
|
-
name: strscan
|
14
|
-
requirement: !ruby/object:Gem::Requirement
|
15
|
-
requirements:
|
16
|
-
- - ">="
|
17
|
-
- !ruby/object:Gem::Version
|
18
|
-
version: '0'
|
19
|
-
type: :runtime
|
20
|
-
prerelease: false
|
21
|
-
version_requirements: !ruby/object:Gem::Requirement
|
22
|
-
requirements:
|
23
|
-
- - ">="
|
24
|
-
- !ruby/object:Gem::Version
|
25
|
-
version: '0'
|
26
12
|
- !ruby/object:Gem::Dependency
|
27
13
|
name: uri
|
28
14
|
requirement: !ruby/object:Gem::Requirement
|
29
15
|
requirements:
|
30
16
|
- - ">="
|
31
17
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
18
|
+
version: 1.0.0
|
33
19
|
type: :runtime
|
34
20
|
prerelease: false
|
35
21
|
version_requirements: !ruby/object:Gem::Requirement
|
36
22
|
requirements:
|
37
23
|
- - ">="
|
38
24
|
- !ruby/object:Gem::Version
|
39
|
-
version:
|
25
|
+
version: 1.0.0
|
40
26
|
- !ruby/object:Gem::Dependency
|
41
27
|
name: uri-idna
|
42
28
|
requirement: !ruby/object:Gem::Requirement
|
@@ -71,6 +57,7 @@ executables: []
|
|
71
57
|
extensions: []
|
72
58
|
extra_rdoc_files: []
|
73
59
|
files:
|
60
|
+
- CHANGELOG.md
|
74
61
|
- LICENSE.txt
|
75
62
|
- README.md
|
76
63
|
- Rakefile
|