uri-whatwg_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f268e45182363045ad2658cd458e9bd6c1309688889a056ab3c507c1805b41f6
4
+ data.tar.gz: 842b5a01c0860e9293f5c1b58d71fbd22fbc38de8775488865f2ecd060f5d546
5
+ SHA512:
6
+ metadata.gz: df6b26333982d0ab865d4c3b436b6f0b61ba06e2e034c8e8453586bfd5dbffe0493be6bc36b295f4d7ca8511892c454c8b3b178beb387c7e8588290f25cbd462
7
+ data.tar.gz: ad8179360bcd2b0e963841936978aa04f38075142762114c7fb0a7c7dae459ad54f2e31deaa8cd9e9a16820d34235d9ad89038c6677648fcc40135f0350e5867
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 Yuji Yaginuma
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # Uri::WhatwgParser
2
+
3
+ Ruby implementation of the [WHATWG URL Living Standard](https://url.spec.whatwg.org/).
4
+
5
+ The latest revision that this package implements of the standard is ([24 March 2025](https://url.spec.whatwg.org/commit-snapshots/cc8b776b89a6d92b5cc74581c8d90450d3c1e762/)).
6
+
7
+ NOTE: Some features haven't been implemented yet. Please see the TODO for details.
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ gem install uri-whatwg_parser
13
+ ```
14
+
15
+ ## Usage
16
+
17
+ This gem is compatible with [`uri`](https://github.com/ruby/uri) gem and automatically switches parser's behavior. So users don't need to set up.
18
+
19
+ ```ruby
20
+ URI.parse("http://日本語.jp")
21
+ # => #<URI::HTTP http://xn--wgv71a119e.jp>
22
+ ```
23
+
24
+ ## Development
25
+
26
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
27
+
28
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
29
+
30
+ ## TODO
31
+
32
+ * Support passing `base`
33
+ * Support state override
34
+ * Support validations
35
+ * Support encodings other than UTF-8
36
+
37
+ ## Contributing
38
+
39
+ Bug reports and pull requests are welcome on GitHub at https://github.com/y-yagi/uri-whatwg_parser.
40
+
41
+ ## License
42
+
43
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rake/testtask"
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << "test"
8
+ t.test_files = FileList["test/**/test_*.rb"]
9
+ end
10
+
11
+ task :download_wpt_resources do
12
+ Dir.chdir "test/resources" do
13
+ system("curl -O https://raw.githubusercontent.com/web-platform-tests/wpt/master/url/resources/urltestdata.json", exception: true)
14
+ end
15
+ end
16
+
17
+ task default: :test
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+ class URI::WhatwgParser
3
+ class Error < URI::Error; end
4
+ class ParseError < Error; end
5
+ end
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri/idna"
4
+ require "ipaddr"
5
+ require_relative "parser_helper"
6
+
7
+ class URI::WhatwgParser
8
+ class HostParser
9
+ include ParserHelper
10
+
11
+ FORBIDDEN_HOST_CODE_POINT = ["\x00", "\t", "\x0a", "\x0d", " ", "#", "/", ":", "<", ">", "?", "@", "[", "\\", "]", "^", "|"]
12
+ FORBIDDEN_DOMAIN_CODE_POINT = FORBIDDEN_HOST_CODE_POINT + C0_CONTROL + ["%"]
13
+
14
+ def parse(input, opaque = false) # :nodoc:
15
+ return if input&.empty?
16
+
17
+ if input.start_with?("[")
18
+ raise ParseError unless input.end_with?("]")
19
+ return parse_ipv6(input)
20
+ end
21
+
22
+ return parse_opaque_host(input) if opaque
23
+
24
+ domain = percent_decode(input)
25
+ ascii_domain = URI::IDNA.whatwg_to_ascii(domain.force_encoding(Encoding::UTF_8))
26
+ if ends_in_number?(ascii_domain)
27
+ ipv4 = parse_ipv4(ascii_domain)
28
+ return serialize_ipv4(ipv4)
29
+ end
30
+
31
+ raise ParseError if include_forbidden_domain_code_point?(ascii_domain)
32
+ ascii_domain
33
+ rescue URI::IDNA::Error, Encoding::CompatibilityError, ArgumentError => _e
34
+ raise ParseError
35
+ end
36
+
37
+ private
38
+
39
+ def parse_ipv4(host)
40
+ parts = host.split(".")
41
+ raise URI::WhatwgParser::ParseError if parts.size > 4
42
+ numbers = []
43
+ parts.each do |part|
44
+ value, _validation_error = parse_ipv4_number(part)
45
+ numbers << value
46
+ end
47
+
48
+ (numbers.size-1).times {|i| raise URI::WhatwgParser::ParseError if numbers[i] > 255 }
49
+
50
+ raise ParseError if numbers.last >= 256 ** (5 - numbers.size)
51
+
52
+ ipv4 = numbers.pop
53
+ numbers.each_with_index do |number, index|
54
+ ipv4 += number * (256 ** (3 - index))
55
+ end
56
+
57
+ ipv4
58
+ end
59
+
60
+ def serialize_ipv4(ipv4)
61
+ output = []
62
+ 4.times.each do |_|
63
+ output.prepend("#{ipv4 % 256}")
64
+ ipv4 /= 256
65
+ end
66
+
67
+ output.join(".")
68
+ end
69
+
70
+ def parse_ipv6(host)
71
+ "[#{IPAddr.new(host).to_s}]"
72
+ rescue
73
+ raise ParseError
74
+ end
75
+
76
+ def parse_opaque_host(host)
77
+ raise ParseError if include_forbidden_host_code_point?(host)
78
+ host.chars.map { |c| percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET) }.join
79
+ end
80
+
81
+ def percent_decode(str)
82
+ str.gsub(/%[0-9A-Fa-f]{2}/) do |m|
83
+ m[1..2].to_i(16).chr
84
+ end
85
+ rescue ArgumentError
86
+ raise ParseError
87
+ end
88
+
89
+ def ends_in_number?(domain)
90
+ parts = domain.split(".")
91
+ return false if parts.size == 0
92
+
93
+ last = parts.last
94
+ return true if last.chars.all? { |c| ascii_digit?(c) }
95
+
96
+ begin
97
+ parse_ipv4_number(last)
98
+ rescue ParseError
99
+ return false
100
+ end
101
+
102
+ true
103
+ end
104
+
105
+ def parse_ipv4_number(str)
106
+ raise ParseError if str&.empty?
107
+
108
+ validation_error = false
109
+ r = 10
110
+
111
+ if str.size >= 2 && (str.start_with?("0x") || str.start_with?("0X"))
112
+ validation_error = true
113
+ str = str[2..-1]
114
+ r = 16
115
+ elsif str.size >= 2 && str.start_with?("0")
116
+ validation_error = true
117
+ str = str[1..-1]
118
+ r = 8
119
+ end
120
+
121
+ return 0, true if str.empty?
122
+
123
+ begin
124
+ output = Integer(str, r)
125
+ return output, validation_error
126
+ rescue ArgumentError
127
+ raise ParseError
128
+ end
129
+ end
130
+
131
+ def include_forbidden_domain_code_point?(str)
132
+ str.chars.intersect?(FORBIDDEN_DOMAIN_CODE_POINT)
133
+ end
134
+
135
+ def include_forbidden_host_code_point?(str)
136
+ str.chars.intersect?(FORBIDDEN_HOST_CODE_POINT)
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ class URI::WhatwgParser
4
+ module ParserHelper
5
+ C0_CONTROL = (0..0x1f).to_a
6
+ C0_CONTROL_PERCENT_ENCODE_SET = C0_CONTROL.map(&:chr)
7
+
8
+ def ascii_alpha?(c)
9
+ ASCII_ALPHA.include?(c)
10
+ end
11
+
12
+ def ascii_alphanumerica?(c)
13
+ ascii_alpha?(c) || ascii_digit?(c)
14
+ end
15
+
16
+ def ascii_digit?(c)
17
+ ASCII_DIGIT.include?(c)
18
+ end
19
+
20
+ def percent_encode(c, encode_set)
21
+ if encode_set.include?(c) || c.ord > 0x7e
22
+ return c.unpack("C*").map { |b| sprintf("%%%02X", b) }.join
23
+ end
24
+ c
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module URI
4
+ class WhatwgParser
5
+ VERSION = "0.1.0"
6
+ end
7
+ end
@@ -0,0 +1,391 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "strscan"
4
+ require "uri"
5
+ require_relative "whatwg_parser/error"
6
+ require_relative "whatwg_parser/version"
7
+ require_relative "whatwg_parser/parser_helper"
8
+ require_relative "whatwg_parser/host_parser"
9
+
10
+ module URI
11
+ class WhatwgParser
12
+ include ParserHelper
13
+
14
+ SPECIAL_SCHEME = { "ftp" => 21, "file" => nil, "http" => 80, "https" => 443, "ws" => 80, "wss" => 443 }
15
+ ASCII_ALPHA = ("a".."z").to_a + ("A".."Z").to_a
16
+ ASCII_DIGIT = ("0".."9").to_a
17
+
18
+ FRAGMENT_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET + [" ", "\"", "<", ">", "`"]
19
+ QUERY_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET + [" ", "\"", "#", "<", ">"]
20
+ SPECIAL_QUERY_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET + ["'"]
21
+ PATH_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET + ["?", "^", "`", "{", "}"]
22
+ USERINFO_PERCENT_ENCODE_SET = PATH_PERCENT_ENCODE_SET + ["/", ":", ";", "=","@", "[", "\\", "]", "|"]
23
+
24
+ SINGLE_DOT_PATH_SEGMENTS = [".", "%2e", "%2E"]
25
+ DOUBLE_DOT_PATH_SEGMENTS = ["..", ".%2e", ".%2E", "%2e.", "%2e%2e", "%2e%2E", "%2E.", "%2E%2e", "%2E%2E"]
26
+
27
+ WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:|])\\z")
28
+ NORMALIZED_WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:])\\z")
29
+ STARTS_WITH_wINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:|])(?:[/\\?#])?\\z")
30
+
31
+ def initialize
32
+ reset
33
+ @host_parser = HostParser.new
34
+ end
35
+
36
+ def regexp
37
+ {}
38
+ end
39
+
40
+ def parse(uri) # :nodoc:
41
+ reset
42
+ URI.for(*self.split(uri))
43
+ end
44
+
45
+ def split(uri) # :nodoc:
46
+ uri = uri.dup
47
+ uri.gsub!(/\A[\u0000-\u0020]*/, "")
48
+ uri.gsub!(/[\u0000-\u0020]*\z/, "")
49
+ uri.delete!("\t")
50
+ uri.delete!("\n")
51
+ uri.delete!("\r")
52
+
53
+ raise ParseError if uri.empty?
54
+
55
+ @scanner = StringScanner.new(uri)
56
+
57
+ loop do
58
+ c = @scanner.getch
59
+ send("on_#{@state}", c)
60
+
61
+ break if c.nil? && @scanner.eos?
62
+ end
63
+
64
+ @parse_result[:userinfo] = "#{@username}:#{@password}" if !@username.nil? || !@password.nil?
65
+ @parse_result[:path] = "/#{@paths.join("/")}" if !@paths.empty?
66
+
67
+ @parse_result.values
68
+ end
69
+
70
+ private
71
+
72
+ def reset
73
+ @state = nil
74
+ @scanner = nil
75
+ @buffer = +""
76
+ @at_sign_seen = nil
77
+ @password_token_seen = nil
78
+ @inside_brackets = nil
79
+ @paths = []
80
+ @username = nil
81
+ @password = nil
82
+ @parse_result = { scheme: nil, userinfo: nil, host: nil, port: nil, registry: nil, path: nil, opaque: nil, query: nil, fragment: nil }
83
+ @state = :scheme_start_state
84
+ end
85
+
86
+ def on_scheme_start_state(c)
87
+ if ascii_alpha?(c)
88
+ @buffer += c.downcase
89
+ @state = :scheme_state
90
+ else
91
+ @scanner.pos -= c.bytesize unless c.nil?
92
+ @state = :no_scheme_state
93
+ end
94
+ end
95
+
96
+ def on_scheme_state(c)
97
+ if ascii_alphanumerica?(c) || ["+", "-", "."].include?(c)
98
+ @buffer += c.downcase
99
+ elsif c == ":"
100
+ @parse_result[:scheme] = @buffer
101
+ @buffer = +""
102
+
103
+ if @parse_result[:scheme] == "file"
104
+ @state = :file_state
105
+ elsif special_url?
106
+ @state = :special_authority_slashes_state
107
+ elsif @scanner.rest.start_with?("/")
108
+ @state = :path_or_authority_state
109
+ @scanner.pos += c.bytesize
110
+ else
111
+ @parse_result[:path] = ""
112
+ @state = :opaque_path_state
113
+ end
114
+ else
115
+ @buffer = +""
116
+ @scanner.pos = 0
117
+ @state = :no_scheme_state
118
+ end
119
+ end
120
+
121
+ def on_no_scheme_state(c)
122
+ raise ParseError
123
+ end
124
+
125
+ def on_special_relative_or_authority_state(c)
126
+ if c == "/" && @scanner.rest.start_with?("/")
127
+ @state = :special_authority_ignore_slashes_state
128
+ @scanner.pos += c.bytesize
129
+ else
130
+ @state = :relative_state
131
+ @scanner.pos -= c.bytesize
132
+ end
133
+ end
134
+
135
+ def on_path_or_authority_state(c)
136
+ if c == "/"
137
+ @state = :authority_state
138
+ else
139
+ @state = :path_state
140
+ @scanner.pos -= c.bytesize
141
+ end
142
+ end
143
+
144
+ def on_special_authority_slashes_state(c)
145
+ if c != "\\" && c != "/"
146
+ @state = :authority_state
147
+ @scanner.pos -= c.bytesize
148
+ end
149
+ end
150
+
151
+ def on_authority_state(c)
152
+ if c == "@"
153
+ @buffer.prepend("%40") if @at_sign_seen
154
+ @at_sign_seen = true
155
+ @buffer.chars.each do |char|
156
+ if char == ":" && !@password_token_seen
157
+ @password_token_seen = true
158
+ next
159
+ end
160
+
161
+ encoded_char = percent_encode(char, USERINFO_PERCENT_ENCODE_SET)
162
+
163
+ if @password_token_seen
164
+ @password = @password.to_s + encoded_char
165
+ else
166
+ @username = @username.to_s + encoded_char
167
+ end
168
+ end
169
+
170
+ @buffer = +""
171
+ elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
172
+ raise ParseError if @at_sign_seen && @buffer.empty?
173
+ @scanner.pos -= (@buffer.bytesize + c&.bytesize.to_i)
174
+ @buffer = +""
175
+ @state = :host_state
176
+ else
177
+ @buffer << c
178
+ end
179
+ end
180
+
181
+ def on_host_state(c)
182
+ if c == ":" && !@inside_brackets
183
+ raise ParseError if @buffer.empty?
184
+
185
+ @parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
186
+ @buffer = +""
187
+ @state = :port_state
188
+ elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
189
+ @scanner.pos -= c.bytesize unless c.nil?
190
+ if special_url? && @buffer.empty?
191
+ raise ParseError
192
+ else
193
+ @parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
194
+ @buffer = +""
195
+ @state = :path_start_state
196
+ end
197
+ else
198
+ @inside_brackets = true if c == "["
199
+ @inside_brackets = false if c == "]"
200
+ @buffer += c
201
+ end
202
+ end
203
+
204
+ def on_port_state(c)
205
+ if ascii_digit?(c)
206
+ @buffer += c
207
+ elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
208
+ unless @buffer.empty?
209
+ begin
210
+ port = Integer(@buffer)
211
+ raise ParseError if port < 0 || port > 65535
212
+ @parse_result[:port] = port unless SPECIAL_SCHEME[@parse_result[:scheme]] == port
213
+ rescue ArgumentError
214
+ raise ParseError
215
+ end
216
+
217
+ @buffer = +""
218
+ end
219
+
220
+ @state = :path_start_state
221
+ @scanner.pos -= c.bytesize unless c.nil?
222
+ else
223
+ raise ParseError
224
+ end
225
+ end
226
+
227
+ def on_file_state(c)
228
+ @parse_result[:scheme] = "file"
229
+ @parse_result[:host] = ""
230
+
231
+ if c == "/" || c == "\\"
232
+ @state = :file_slash_state
233
+ else
234
+ @scanner.pos -= c.bytesize unless c.nil?
235
+ @state = :path_state
236
+ end
237
+ end
238
+
239
+ def on_file_slash_state(c)
240
+ if c == "/" || c == "\\"
241
+ @state = :file_host_state
242
+ else
243
+ @scanner.pos -= c.bytesize unless c.nil?
244
+ @state = :path_state
245
+ end
246
+ end
247
+
248
+ def on_file_host_state(c)
249
+ if c.nil? || c == "/" || c == "\\" || c == "?" || c == "#"
250
+ @scanner.pos -= c.bytesize unless c.nil?
251
+
252
+ if windows_drive_letter?(@buffer)
253
+ @state = :path_state
254
+ elsif @buffer.empty?
255
+ @parse_result[:host] = ""
256
+ @state = :path_start_state
257
+ else
258
+ host = @host_parser.parse(@buffer, !special_url?)
259
+ if host != "localhost"
260
+ @parse_result[:host] = host
261
+ end
262
+
263
+ @buffer = +""
264
+ @state = :path_start_state
265
+ end
266
+ end
267
+
268
+ @buffer += c unless c.nil?
269
+ end
270
+
271
+ def on_path_start_state(c)
272
+ return if c.nil?
273
+
274
+ if special_url?
275
+ @scanner.pos -= c.bytesize if c != "/" && c != "\\"
276
+ @state = :path_state
277
+ elsif c == "?"
278
+ @state = :query_state
279
+ elsif c == "#"
280
+ @state = :fragment_state
281
+ elsif c != nil
282
+ @scanner.pos -= c.bytesize if c != "/"
283
+ @state = :path_state
284
+ end
285
+ end
286
+
287
+ def on_path_state(c)
288
+ if (c.nil? || c == "/") || (special_url? && c == "\/") || (c == "?" || c == "#")
289
+
290
+ if double_dot_path_segments?(@buffer)
291
+ shorten_url_path
292
+ if c != "/" || (special_url? && c == "\/")
293
+ @paths << ""
294
+ end
295
+ elsif single_dot_path_segments?(@buffer) && (c != "/" || (special_url? && c == "\/"))
296
+ @paths << ""
297
+ elsif !single_dot_path_segments?(@buffer)
298
+ if @parse_result[:scheme] == "file" && @paths.empty? && windows_drive_letter?(@buffer)
299
+ @buffer[1] = ":"
300
+ end
301
+ @paths << @buffer
302
+ end
303
+
304
+ @buffer = +""
305
+
306
+ if c == "?"
307
+ @parse_result[:query] = ""
308
+ @state = :query_state
309
+ elsif c == "#"
310
+ @parse_result[:frament] = ""
311
+ @state = :fragment_state
312
+ end
313
+ else
314
+ @buffer << percent_encode(c, PATH_PERCENT_ENCODE_SET)
315
+ end
316
+ end
317
+
318
+ def on_opaque_path_state(c)
319
+ if c == "?"
320
+ @parse_result[:query] = ""
321
+ @state = :query_state
322
+ elsif c == "#"
323
+ @parse_result[:fragment] = ""
324
+ @state = :fragment_state
325
+ elsif c == " "
326
+ if @scanner.rest.start_with?("?") || @scanner.rest.start_with?("#")
327
+ @parse_result[:path] = @parse_result[:path].to_s + "%20"
328
+ else
329
+ @parse_result[:path] = @parse_result[:path].to_s + " "
330
+ end
331
+ elsif !c.nil?
332
+ @parse_result[:path] = @parse_result[:path].to_s + percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET)
333
+ end
334
+ end
335
+
336
+ def on_query_state(c)
337
+ if c.nil? || c == "#"
338
+ query_percent_encode_set = special_url? ? SPECIAL_QUERY_PERCENT_ENCODE_SET : QUERY_PERCENT_ENCODE_SET
339
+ @parse_result[:query] = @buffer.chars.map { |c| percent_encode(c, query_percent_encode_set) }.join
340
+ @buffer = +""
341
+ @state = :fragment_state if c == "#"
342
+ elsif !c.nil?
343
+ @buffer << c
344
+ end
345
+ end
346
+
347
+ def on_fragment_state(c)
348
+ return if c.nil?
349
+ @parse_result[:fragment] = @parse_result[:fragment].to_s + percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET)
350
+ end
351
+
352
+ def c0_control_or_space?(c)
353
+ c0_control? || c == " "
354
+ end
355
+
356
+ def c0_control?(c)
357
+ C0_CONTROL.include?(c.ord)
358
+ end
359
+
360
+ def windows_drive_letter?(str)
361
+ WINDOWS_DRIVE_LETTER.match?(str)
362
+ end
363
+
364
+ def normalized_windows_drive_letter?(str)
365
+ NORMALIZED_WINDOWS_DRIVE_LETTER.match?(str)
366
+ end
367
+
368
+ def special_url?
369
+ SPECIAL_SCHEME.keys.include?(@parse_result[:scheme])
370
+ end
371
+
372
+ def single_dot_path_segments?(c)
373
+ SINGLE_DOT_PATH_SEGMENTS.include?(c)
374
+ end
375
+
376
+ def double_dot_path_segments?(c)
377
+ DOUBLE_DOT_PATH_SEGMENTS.include?(c)
378
+ end
379
+
380
+ def shorten_url_path
381
+ return if @parse_result[:path]&.empty?
382
+
383
+ return true if @parse_result[:scheme] == "file" && @parse_result[:path]&.length == 1 && normalized_windows_drive_letter?(@parse_result[:path])
384
+ @parse_result[:path]&.chomp!
385
+ end
386
+ end
387
+ end
388
+
389
+ URI.send(:remove_const, :DEFAULT_PARSER) if defined?(URI::DEFAULT_PARSER)
390
+ URI::DEFAULT_PARSER = URI::WhatwgParser.new
391
+ URI.parser = URI::DEFAULT_PARSER
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: uri-whatwg_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Yuji Yaginuma
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 2025-05-04 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: strscan
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: uri
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: uri-idna
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: debug
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ email:
69
+ - yuuji.yaginuma@gmail.com
70
+ executables: []
71
+ extensions: []
72
+ extra_rdoc_files: []
73
+ files:
74
+ - LICENSE.txt
75
+ - README.md
76
+ - Rakefile
77
+ - lib/uri/whatwg_parser.rb
78
+ - lib/uri/whatwg_parser/error.rb
79
+ - lib/uri/whatwg_parser/host_parser.rb
80
+ - lib/uri/whatwg_parser/parser_helper.rb
81
+ - lib/uri/whatwg_parser/version.rb
82
+ homepage: https://github.com/y-yagi/uri-whatwg_parser
83
+ licenses:
84
+ - MIT
85
+ metadata:
86
+ homepage_uri: https://github.com/y-yagi/uri-whatwg_parser
87
+ rubygems_mfa_required: 'true'
88
+ rdoc_options: []
89
+ require_paths:
90
+ - lib
91
+ required_ruby_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: 3.2.0
96
+ required_rubygems_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ requirements: []
102
+ rubygems_version: 3.6.2
103
+ specification_version: 4
104
+ summary: Ruby implementation of the WHATWG URL Living Standard
105
+ test_files: []