uri-whatwg_parser 0.1.8 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4463680cf6d13f3daf3513549ebe358f7b45484888704bbefffcecbb1790b8c7
4
- data.tar.gz: b5ec5b9ad687c53f71f2fa21bcb049215dfff6ecdcb8305594f569ba86c0b210
3
+ metadata.gz: bc2c1e5428af4eaed582a417234dec63ac59554af14bbbbd1cc0c3017e8e32b9
4
+ data.tar.gz: 749b4ba051cb58a73f0ef8ebe64d6426a4d1f0803176641ff75af539757130c1
5
5
  SHA512:
6
- metadata.gz: 3ff5fd142e640d265e4d3bd11174e7658732430055d45cce16364bc783c85f7f642d3654d9621e2fa198d63f8e82959fa5d4588d611f6c3e3c3f2d46f382df9e
7
- data.tar.gz: b389a5a39685d81ce3ade458b1d61746e9b0dca6e227b73658fe6c04910f20908a1669fd884c5e445bb401edc1db3915e21d15bdec8acf1b441c9c9b3e7e65d8
6
+ metadata.gz: a28ffa266d8013c02ed0da9bfd285eaff113527772019a135ddd092310b942c01b6beab710336d37665f8ec3dcc18829edad14a0a97168ef1fc3c651148eb4f0
7
+ data.tar.gz: 1ba90031895fe24a39b1ae7ff127ea7af821cb39a0801b83234c40b63801c9ee6c886f1bb0e423bc9c6e9c817d31bea29661db7e22978c8b66891b9864a953f4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.2.1
2
+
3
+ * Improve the performance of `parse`
4
+
5
+ ## 0.2.0
6
+
7
+ * Fix setter methods compliant with WHATWG URL Living Standard
8
+ * Fix several incorrect parsing processes
9
+
1
10
  ## 0.1.8
2
11
 
3
12
  * Support `.build` method
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Ruby implementation of the [WHATWG URL Living Standard](https://url.spec.whatwg.org/).
4
4
 
5
- The latest revision that this package implements of the standard is ([30 October 2025](https://url.spec.whatwg.org/commit-snapshots/52526653e848c5a56598c84aa4bc8ac9025fb66b/)).
5
+ The latest revision that this package implements of the standard is [13 January 2026](https://url.spec.whatwg.org/commit-snapshots/b6b3251fe911ab33d68fb051efe0e4d39ae4145e/).
6
6
 
7
7
  ## Installation
8
8
 
@@ -29,7 +29,6 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
29
29
 
30
30
  ## TODO
31
31
 
32
- * Support state override
33
32
  * Support validations
34
33
 
35
34
  ## Contributing
data/Rakefile CHANGED
@@ -11,6 +11,7 @@ end
11
11
  task :download_wpt_resources do
12
12
  Dir.chdir "test/resources" do
13
13
  system("curl -O https://raw.githubusercontent.com/web-platform-tests/wpt/master/url/resources/urltestdata.json", exception: true)
14
+ system("curl -O https://raw.githubusercontent.com/web-platform-tests/wpt/master/url/resources/setters_tests.json", exception: true)
14
15
  end
15
16
  end
16
17
 
@@ -10,6 +10,10 @@ module URI
10
10
  fragment,
11
11
  parser = DEFAULT_PARSER,
12
12
  arg_check = false)
13
+
14
+ return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
15
+ return super if registry
16
+
13
17
  @scheme = nil
14
18
  @user = nil
15
19
  @password = nil
@@ -33,91 +37,96 @@ module URI
33
37
  self.set_path("") if !@path && !@opaque
34
38
  DEFAULT_PARSER.parse(to_s) if arg_check
35
39
 
36
- if registry
37
- raise InvalidURIError,
38
- "the scheme #{@scheme} does not accept registry part: #{registry} (or bad hostname?)"
39
- end
40
-
41
40
  @scheme&.freeze
42
41
  self.set_port(self.default_port) if self.default_port && !@port
43
42
  end
44
43
 
45
-
46
44
  def merge(oth)
47
45
  URI::DEFAULT_PARSER.join(self.to_s, oth.to_s)
48
46
  end
49
47
  alias + merge
50
48
 
51
- def check_scheme(v)
52
- self.set_scheme(v)
53
- DEFAULT_PARSER.parse(to_s)
54
- true
55
- end
49
+ def scheme=(v)
50
+ return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
51
+ return if v.nil? || v.empty?
56
52
 
57
- def check_user(v)
58
- if @opaque
59
- raise InvalidURIError, "cannot set user with opaque"
60
- end
53
+ parse_result = URI::DEFAULT_PARSER.split("#{v}:", url: self, state_override: :scheme_start_state)
54
+ set_scheme(parse_result[0])
55
+ set_port(parse_result[3])
56
+ end
61
57
 
58
+ def user=(v)
59
+ return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
62
60
  return v unless v
63
61
 
64
- self.set_user(v)
65
- DEFAULT_PARSER.parse(to_s)
66
- true
62
+ if host.nil? || host.empty? || scheme == "file"
63
+ raise InvalidURIError, "cannot set user when host is nil or file schme"
64
+ end
65
+ set_user(URI::DEFAULT_PARSER.utf8_percent_encode_string(v, URI::WhatwgParser::USERINFO_PERCENT_ENCODE_SET))
67
66
  end
68
67
 
69
- def check_password(v, user = @user)
70
- if @opaque
71
- raise InvalidURIError, "cannot set password with opaque"
72
- end
68
+ def password=(v)
69
+ return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
73
70
  return v unless v
74
71
 
75
- if !user
76
- raise InvalidURIError, "password component depends user component"
72
+ if host.nil? || host.empty? || scheme == "file"
73
+ raise InvalidURIError, "cannot set password when host is nil or file schme"
77
74
  end
78
-
79
- self.set_password(v)
80
- DEFAULT_PARSER.parse(to_s)
81
- true
75
+ set_password(URI::DEFAULT_PARSER.utf8_percent_encode_string(v, URI::WhatwgParser::USERINFO_PERCENT_ENCODE_SET))
82
76
  end
83
77
 
84
- def check_host(v)
85
- return v unless v
78
+ def host=(v)
79
+ return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
80
+ return if v.nil?
86
81
 
87
82
  if @opaque
88
83
  raise InvalidURIError, "cannot set host with registry or opaque"
89
84
  end
90
85
 
91
- self.set_host(v)
92
- DEFAULT_PARSER.parse(to_s)
93
- true
86
+ parse_result = URI::DEFAULT_PARSER.split(v.to_s, url: self, state_override: :host_state)
87
+ set_host(parse_result[2])
88
+ set_port(parse_result[3])
94
89
  end
95
90
 
96
- def check_port(v)
97
- return v unless v
91
+ def port=(v)
92
+ return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
93
+ return if v.nil?
98
94
 
99
- if @opaque
100
- raise InvalidURIError, "cannot set port with registry or opaque"
95
+ if v.to_s.empty?
96
+ set_port(nil)
97
+ return
101
98
  end
102
99
 
103
- self.set_port(v)
104
- DEFAULT_PARSER.parse(to_s)
105
- true
100
+ if host.nil? || host.empty? || scheme == "file"
101
+ raise InvalidURIError, "cannot set port when host is nil or scheme is file"
102
+ end
103
+
104
+ parse_result = URI::DEFAULT_PARSER.split("#{v}:", url: self, state_override: :port_state)
105
+ set_port(parse_result[3])
106
106
  end
107
107
 
108
- def check_path(v)
109
- return v unless v
108
+ def path=(v)
109
+ return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
110
+ return if v.nil?
110
111
 
111
112
  if @opaque
112
113
  raise InvalidURIError, "path conflicts with opaque"
113
114
  end
114
115
 
115
- self.set_path(v)
116
- DEFAULT_PARSER.parse(to_s)
117
- true
116
+ parse_result = URI::DEFAULT_PARSER.split(v.to_s, url: self, state_override: :path_start_state)
117
+ set_path(parse_result[5])
118
+ end
119
+
120
+ def userinfo=(userinfo)
121
+ return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
122
+
123
+ user, password = split_userinfo(userinfo)
124
+ self.user = user
125
+ self.password = password
118
126
  end
119
127
 
120
128
  def check_opaque(v)
129
+ return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
121
130
  return v unless v
122
131
 
123
132
  if @host || @port || @user || @path
@@ -7,11 +7,13 @@ class URI::WhatwgParser
7
7
  class HostParser
8
8
  include ParserHelper
9
9
 
10
- FORBIDDEN_HOST_CODE_POINT = ["\x00", "\t", "\x0a", "\x0d", " ", "#", "/", ":", "<", ">", "?", "@", "[", "\\", "]", "^", "|"]
11
- FORBIDDEN_DOMAIN_CODE_POINT = FORBIDDEN_HOST_CODE_POINT + C0_CONTROL_PERCENT_ENCODE_SET + ["%", "\x7f"]
10
+ FORBIDDEN_HOST_CODE_POINT = Set["\x00", "\t", "\x0a", "\x0d", " ", "#", "/", ":", "<", ">", "?", "@", "[", "\\", "]", "^", "|"]
11
+ FORBIDDEN_DOMAIN_CODE_POINT = FORBIDDEN_HOST_CODE_POINT | C0_CONTROL_PERCENT_ENCODE_SET | Set["%", "\x7f"]
12
+ FORBIDDEN_HOST_REGEX = Regexp.union(FORBIDDEN_HOST_CODE_POINT.to_a)
13
+ FORBIDDEN_DOMAIN_REGEX = Regexp.union(FORBIDDEN_DOMAIN_CODE_POINT.to_a)
12
14
 
13
15
  def parse(input, opaque = false) # :nodoc:
14
- return if input&.empty?
16
+ return "" if input&.empty?
15
17
 
16
18
  if input.start_with?("[")
17
19
  raise ParseError, "invalid IPv6 format" unless input.end_with?("]")
@@ -39,8 +41,7 @@ class URI::WhatwgParser
39
41
  raise URI::WhatwgParser::ParseError, "invalid IPv4 format" if parts.size > 4
40
42
  numbers = []
41
43
  parts.each do |part|
42
- value, _validation_error = parse_ipv4_number(part)
43
- numbers << value
44
+ numbers << parse_ipv4_number(part)
44
45
  end
45
46
 
46
47
  (numbers.size-1).times {|i| raise URI::WhatwgParser::ParseError, "invalid IPv4 format" if numbers[i] > 255 }
@@ -191,63 +192,73 @@ class URI::WhatwgParser
191
192
 
192
193
  def parse_opaque_host(host)
193
194
  raise ParseError if include_forbidden_host_code_point?(host)
194
- host.chars.map { |c| percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET) }.join
195
+ host.chars.map { |c| utf8_percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET) }.join
195
196
  end
196
197
 
197
198
  def percent_decode(str)
198
199
  str.gsub(/%[0-9A-Fa-f]{2}/) do |m|
199
200
  m[1..2].to_i(16).chr
200
201
  end
201
- rescue ArgumentError
202
- raise ParseError, "including invalid value in host"
203
202
  end
204
203
 
205
204
  def ends_in_number?(domain)
206
- parts = domain.split(".", -1)
207
- if parts.last == ""
208
- return false if parts.size == 1
209
- parts.pop
205
+ return false if domain.empty?
206
+
207
+ if domain.end_with?(".")
208
+ # Remove trailing dot and find the actual last segment
209
+ domain_without_trailing = domain[0...-1]
210
+ return false if domain_without_trailing.empty?
211
+
212
+ last_dot = domain_without_trailing.rindex(".")
213
+ last = last_dot ? domain_without_trailing[last_dot + 1..-1] : domain_without_trailing
214
+ else
215
+ # Find the last segment after the last dot
216
+ last_dot = domain.rindex(".")
217
+ last = last_dot ? domain[last_dot + 1..-1] : domain
210
218
  end
211
219
 
212
- last = parts.last
213
- return true if last != "" && last.chars.all? { |c| ascii_digit?(c) }
220
+ return false if last.empty?
221
+ return true if last.match?(/\A\d+\z/)
214
222
 
215
- begin
216
- parse_ipv4_number(last)
217
- rescue ParseError
218
- return false
223
+ if last.start_with?("0x", "0X")
224
+ hex = last[2..-1] || ""
225
+ return true if hex.empty? || hex.match?(/\A[0-9A-Fa-f]+\z/)
219
226
  end
220
227
 
221
- true
228
+ false
222
229
  end
223
230
 
224
231
  def parse_ipv4_number(str)
225
232
  raise ParseError, "invalid IPv4 format" if str&.empty?
226
233
 
227
- validation_error = false
228
234
  r = 10
229
235
 
230
236
  if str.size >= 2 && str.start_with?("0x", "0X")
231
- validation_error = true
232
237
  str = str[2..-1]
233
238
  r = 16
234
239
  elsif str.size >= 2 && str.start_with?("0")
235
- validation_error = true
236
240
  str = str[1..-1]
237
241
  r = 8
238
242
  end
239
243
 
240
- return 0, true if str.empty?
244
+ return 0 if str.empty?
241
245
 
242
246
  begin
243
- output = Integer(str, r)
244
- return output, validation_error
247
+ Integer(str, r)
245
248
  rescue ArgumentError
246
249
  raise ParseError, "invalid IPv4 format"
247
250
  end
248
251
  end
249
252
 
250
253
  def domain_to_ascii(domain)
254
+ # If domain is already ASCII-only, lowercase, and doesn't contain punycode prefix
255
+ # we can skip IDNA processing
256
+ if domain.ascii_only? && domain == domain.downcase && !domain.include?("xn--")
257
+ raise ParseError, "including invalid value in host" if include_forbidden_domain_code_point?(domain)
258
+ raise ParseError, "host can't be empty" if domain.empty?
259
+ return domain
260
+ end
261
+
251
262
  ascii_domain = URI::IDNA.whatwg_to_ascii(domain.force_encoding(Encoding::UTF_8), be_strict: false)
252
263
 
253
264
  raise ParseError, "including invalid value in host" if include_forbidden_domain_code_point?(ascii_domain)
@@ -257,11 +268,11 @@ class URI::WhatwgParser
257
268
  end
258
269
 
259
270
  def include_forbidden_domain_code_point?(str)
260
- FORBIDDEN_DOMAIN_CODE_POINT.any? {|c| str.include?(c) }
271
+ str.match?(FORBIDDEN_DOMAIN_REGEX)
261
272
  end
262
273
 
263
274
  def include_forbidden_host_code_point?(str)
264
- FORBIDDEN_HOST_CODE_POINT.any? {|c| str.include?(c) }
275
+ str.match?(FORBIDDEN_HOST_REGEX)
265
276
  end
266
277
  end
267
278
  end
@@ -1,31 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "set"
4
+
3
5
  class URI::WhatwgParser
4
6
  module ParserHelper
5
- C0_CONTROL_PERCENT_ENCODE_SET = (0..0x1f).map(&:chr)
6
- ASCII_ALPHA = ("a".."z").to_a + ("A".."Z").to_a
7
- ASCII_DIGIT = ("0".."9").to_a
8
-
9
- def ascii_alpha?(c)
10
- ASCII_ALPHA.include?(c)
11
- end
12
-
13
- def ascii_alphanumerica?(c)
14
- ascii_alpha?(c) || ascii_digit?(c)
15
- end
7
+ # NOTE: This set isn't accurate, but it's OK now because greater than `0x7e` is checked inside a method.
8
+ C0_CONTROL_PERCENT_ENCODE_SET = Set.new((0..0x1f).map(&:chr))
16
9
 
17
- def ascii_digit?(c)
18
- ASCII_DIGIT.include?(c)
19
- end
20
-
21
- def percent_encode(c, encode_set, encoding = Encoding::UTF_8)
10
+ def utf8_percent_encode(c, encode_set)
22
11
  return c unless encode_set.include?(c) || c.ord > 0x7e
23
12
 
24
13
  # For ASCII single-byte characters
25
14
  return "%%%02X" % c.ord if c.bytesize == 1
26
15
 
27
- bytes = c.encoding == encoding ? c.bytes : c.encode(encoding).bytes
28
- bytes.map { |b| "%%%02X" % b }.join
16
+ c.bytes.map { |b| "%%%02X" % b }.join
17
+ end
18
+
19
+ def utf8_percent_encode_string(str, encode_set)
20
+ str.chars.map { |c| utf8_percent_encode(c, encode_set) }.join
29
21
  end
30
22
  end
31
23
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module URI
4
4
  class WhatwgParser
5
- VERSION = "0.1.8"
5
+ VERSION = "0.2.1"
6
6
  end
7
7
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "set"
3
4
  require "uri"
4
5
  require_relative "whatwg_parser/error"
5
6
  require_relative "whatwg_parser/version"
@@ -13,18 +14,27 @@ module URI
13
14
 
14
15
  SPECIAL_SCHEME = { "ftp" => 21, "file" => nil, "http" => 80, "https" => 443, "ws" => 80, "wss" => 443 }
15
16
 
16
- FRAGMENT_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET + [" ", "\"", "<", ">", "`"]
17
- QUERY_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET + [" ", "\"", "#", "<", ">"]
18
- SPECIAL_QUERY_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET + ["'"]
19
- PATH_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET + ["?", "^", "`", "{", "}"]
20
- USERINFO_PERCENT_ENCODE_SET = PATH_PERCENT_ENCODE_SET + ["/", ":", ";", "=","@", "[", "\\", "]", "|"]
17
+ FRAGMENT_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET | Set[" ", "\"", "<", ">", "`"]
18
+ QUERY_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET | Set[" ", "\"", "#", "<", ">"]
19
+ SPECIAL_QUERY_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET | Set["'"]
20
+ PATH_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET | Set["?", "^", "`", "{", "}"]
21
+ USERINFO_PERCENT_ENCODE_SET = PATH_PERCENT_ENCODE_SET | Set["/", ":", ";", "=", "@", "[", "\\", "]", "|"]
21
22
 
22
- SINGLE_DOT_PATH_SEGMENTS = [".", "%2e", "%2E"]
23
- DOUBLE_DOT_PATH_SEGMENTS = ["..", ".%2e", ".%2E", "%2e.", "%2e%2e", "%2e%2E", "%2E.", "%2E%2e", "%2E%2E"]
23
+ SINGLE_DOT_PATH_SEGMENTS = Set[".", "%2e", "%2E"]
24
+ DOUBLE_DOT_PATH_SEGMENTS = Set["..", ".%2e", ".%2E", "%2e.", "%2e%2e", "%2e%2E", "%2E.", "%2E%2e", "%2E%2E"]
24
25
 
25
26
  WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:|])\\z")
26
27
  NORMALIZED_WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:])\\z")
27
- STARTS_WITH_wINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:|])(?:[/\\?#])?\\z")
28
+ STARTS_WITH_WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:|])(?:[/\\?#])?\\z")
29
+
30
+ VALID_SIGNS_FOR_SCHEME = Set["+", "-", "."]
31
+ DELIMITER_SIGNS = Set["/", "?", "#"]
32
+
33
+ WS_SCHEMES = Set["ws", "wss"]
34
+
35
+ ASCII_ALPHA_LOWERCASE = Set.new(("a".."z").to_a)
36
+ ASCII_ALPHA_UPPERCASE = Set.new(("A".."Z").to_a)
37
+ ASCII_DIGIT = Set.new(("0".."9").to_a)
28
38
 
29
39
  def initialize
30
40
  reset
@@ -35,51 +45,67 @@ module URI
35
45
  {}
36
46
  end
37
47
 
38
- def parse(uri, base = nil, encoding = Encoding::UTF_8) # :nodoc:
39
- URI.for(*self.split(uri, base, encoding))
48
+ def parse(input, base: nil, url: nil, state_override: nil) # :nodoc:
49
+ URI.for(*self.split(input, base: base, url: url, state_override: state_override))
40
50
  end
41
51
 
42
- def split(uri, base = nil, encoding = Encoding::UTF_8) # :nodoc:
52
+ def split(input, base: nil, url: nil, state_override: nil) # :nodoc:
43
53
  reset
44
54
  @base = nil
45
55
  if base != nil
46
- ary = split(base, nil, encoding)
56
+ ary = split(base, base: nil)
47
57
  @base = { scheme: ary[0], userinfo: ary[1], host: ary[2], port: ary[3], registry: ary[4], path: ary[5], opaque: ary[6], query: ary[7], fragment: ary[8]}
48
58
  @base_paths = @paths
49
59
  reset
50
60
  end
51
61
 
52
- @encoding = encoding
53
- @uri = uri.dup
54
- @uri.sub!(/\A[\u0000-\u0020]*/, "")
55
- @uri.sub!(/[\u0000-\u0020]*\z/, "")
56
- @uri.delete!("\t")
57
- @uri.delete!("\n")
58
- @uri.delete!("\r")
62
+ if url
63
+ raise ArgumentError, "bad argument (expected URI object)" unless url.is_a?(URI::Generic)
64
+ @parse_result.merge!(url.component.zip(url.send(:component_ary)).to_h)
65
+ @username = url.user
66
+ @password = url.password
67
+ @parse_result.delete(:userinfo)
68
+ @special_url = special_url?(@parse_result[:scheme])
69
+ end
70
+
71
+ if state_override
72
+ @state = state_override.to_sym
73
+ @state_override = @state
74
+ raise ArgumentError, "state override is invalid" if !state_override.to_s.end_with?("_state") || !respond_to?(@state_override, private: true)
75
+ else
76
+ raise ParseError, "uri can't be empty" if (input.nil? || input.empty?) && @base.nil?
77
+ end
59
78
 
60
- raise ParseError, "uri can't be empty" if uri.empty? && @base.nil?
79
+ input = input.dup
61
80
 
81
+ unless url
82
+ remove_c0_control_or_space!(input)
83
+ end
84
+
85
+ input.delete!("\t\n\r") if /[\t\n\r]/.match?(input)
86
+
87
+ @input_chars = input.chars
88
+ input_chars_length = @input_chars.length
62
89
  @pos = 0
63
90
 
64
- while @pos <= @uri.length
65
- c = @uri[@pos]
66
- send(@state, c)
91
+ while @pos <= input_chars_length
92
+ dispatch_state(@input_chars[@pos])
93
+ break if @terminate
67
94
  @pos += 1
68
95
  end
69
96
 
70
- @parse_result[:userinfo] = [@username, @password].compact.reject(&:empty?).join(":")
71
- @parse_result[:path] = "/#{@paths.join("/")}" if @paths && !@paths.empty?
72
-
73
- @parse_result.values
97
+ userinfo = [@username, @password].compact.reject(&:empty?).join(":")
98
+ path = "/#{@paths.join("/")}" if @paths && !@paths.empty?
99
+ [@parse_result[:scheme], userinfo, @parse_result[:host], @parse_result[:port], @parse_result[:registry], path, @parse_result[:opaque], @parse_result[:query], @parse_result[:fragment]]
74
100
  end
75
101
 
76
102
  def join(*uris)
77
103
  return parse(uris[0]) if uris.size == 1
78
104
 
79
105
  base, input = uris.shift(2)
80
- uri = parse(input.to_s, base.to_s)
106
+ uri = parse(input.to_s, base: base.to_s)
81
107
  uris.each do |input|
82
- uri = parse(input.to_s, uri.to_s)
108
+ uri = parse(input.to_s, base: uri.to_s)
83
109
  end
84
110
 
85
111
  uri
@@ -87,6 +113,31 @@ module URI
87
113
 
88
114
  private
89
115
 
116
+ def dispatch_state(c)
117
+ case @state
118
+ when :scheme_start_state then scheme_start_state(c)
119
+ when :scheme_state then scheme_state(c)
120
+ when :no_scheme_state then no_scheme_state(c)
121
+ when :special_relative_or_authority_state then special_relative_or_authority_state(c)
122
+ when :path_or_authority_state then path_or_authority_state(c)
123
+ when :relative_state then relative_state(c)
124
+ when :relative_slash_state then relative_slash_state(c)
125
+ when :special_authority_slashes_state then special_authority_slashes_state(c)
126
+ when :special_authority_ignore_slashes_state then special_authority_ignore_slashes_state(c)
127
+ when :authority_state then authority_state(c)
128
+ when :host_state then host_state(c)
129
+ when :port_state then port_state(c)
130
+ when :file_state then file_state(c)
131
+ when :file_slash_state then file_slash_state(c)
132
+ when :file_host_state then file_host_state(c)
133
+ when :path_start_state then path_start_state(c)
134
+ when :path_state then path_state(c)
135
+ when :opaque_path_state then opaque_path_state(c)
136
+ when :query_state then query_state(c)
137
+ when :fragment_state then fragment_state(c)
138
+ end
139
+ end
140
+
90
141
  def reset
91
142
  @buffer = +""
92
143
  @at_sign_seen = nil
@@ -95,26 +146,55 @@ module URI
95
146
  @paths = nil
96
147
  @username = nil
97
148
  @password = nil
98
- @parse_result = { scheme: nil, userinfo: nil, host: nil, port: nil, registry: nil, path: nil, opaque: nil, query: nil, fragment: nil }
99
- @force_continue = false
149
+ @parse_result = { scheme: nil, host: nil, port: nil, registry: nil, path: nil, opaque: nil, query: nil, fragment: nil }
150
+ @state_override = nil
100
151
  @state = :scheme_start_state
152
+ @special_url = nil
153
+ @terminate = nil
101
154
  end
102
155
 
103
156
  def scheme_start_state(c)
104
- if ascii_alpha?(c)
157
+ if ASCII_ALPHA_LOWERCASE.include?(c)
158
+ @buffer << c
159
+ @state = :scheme_state
160
+ elsif ASCII_ALPHA_UPPERCASE.include?(c)
105
161
  @buffer << c.downcase
106
162
  @state = :scheme_state
107
- else
163
+ elsif @state_override.nil?
108
164
  @pos -= 1
109
165
  @state = :no_scheme_state
166
+ else
167
+ raise ParseError, "scheme is invalid value"
110
168
  end
111
169
  end
112
170
 
113
171
  def scheme_state(c)
114
- if ascii_alphanumerica?(c) || ["+", "-", "."].include?(c)
172
+ if ASCII_ALPHA_LOWERCASE.include?(c) || ASCII_DIGIT.include?(c) || VALID_SIGNS_FOR_SCHEME.include?(c)
173
+ @buffer << c
174
+ elsif ASCII_ALPHA_UPPERCASE.include?(c)
115
175
  @buffer << c.downcase
116
176
  elsif c == ":"
177
+ if @state_override
178
+ if (special_url? && !special_url?(@buffer)) ||
179
+ (!special_url? && special_url?(@buffer)) ||
180
+ ((includes_credentials? || !@parse_result[:port].nil?) && @buffer == "file") ||
181
+ (@parse_result[:scheme] == "file" && @parse_result[:host]&.empty?)
182
+ @terminate = true
183
+ return
184
+ end
185
+ end
186
+
117
187
  @parse_result[:scheme] = @buffer
188
+ @special_url = special_url?(@buffer)
189
+
190
+ if @state_override
191
+ if SPECIAL_SCHEME.value?(@parse_result[:port].to_i)
192
+ @parse_result[:port] = nil
193
+ end
194
+ @terminate = true
195
+ return
196
+ end
197
+
118
198
  @buffer = +""
119
199
 
120
200
  if @parse_result[:scheme] == "file"
@@ -123,17 +203,19 @@ module URI
123
203
  @state = :special_relative_or_authority_state
124
204
  elsif special_url?
125
205
  @state = :special_authority_slashes_state
126
- elsif rest.start_with?("/")
206
+ elsif @input_chars[@pos + 1] == "/"
127
207
  @state = :path_or_authority_state
128
208
  @pos += 1
129
209
  else
130
- @parse_result[:opaque] = ""
210
+ @parse_result[:opaque] = +""
131
211
  @state = :opaque_path_state
132
212
  end
133
- else
213
+ elsif @state_override.nil?
134
214
  @buffer.clear
135
215
  @pos = -1
136
216
  @state = :no_scheme_state
217
+ else
218
+ raise ParseError, "parsing scheme failed"
137
219
  end
138
220
  end
139
221
 
@@ -142,6 +224,7 @@ module URI
142
224
 
143
225
  if !@base[:opaque].nil? && c == "#"
144
226
  @parse_result[:scheme] = @base[:scheme]
227
+ @special_url = special_url?(@base[:scheme])
145
228
  @paths = @base_paths
146
229
  @parse_result[:query] = @base[:query]
147
230
  @parse_result[:fragment] = nil
@@ -156,7 +239,7 @@ module URI
156
239
  end
157
240
 
158
241
  def special_relative_or_authority_state(c)
159
- if c == "/" && rest.start_with?("/")
242
+ if c == "/" && @input_chars[@pos + 1] == "/"
160
243
  @state = :special_authority_ignore_slashes_state
161
244
  @pos -= 1
162
245
  else
@@ -176,6 +259,7 @@ module URI
176
259
 
177
260
  def relative_state(c)
178
261
  @parse_result[:scheme] = @base[:scheme]
262
+ @special_url = special_url?(@base[:scheme])
179
263
  if c == "/"
180
264
  @state = :relative_slash_state
181
265
  elsif special_url? && c == "\\"
@@ -203,7 +287,7 @@ module URI
203
287
  end
204
288
 
205
289
  def relative_slash_state(c)
206
- if special_url? && (c == "/" || c == "\\")
290
+ if @special_url && (c == "/" || c == "\\")
207
291
  @state = :special_authority_ignore_slashes_state
208
292
  elsif c == "/"
209
293
  @state = :authority_state
@@ -217,7 +301,7 @@ module URI
217
301
  end
218
302
 
219
303
  def special_authority_slashes_state(c)
220
- if c == "/" && rest.start_with?("/")
304
+ if c == "/" && @input_chars[@pos + 1] == "/"
221
305
  @state = :special_authority_ignore_slashes_state
222
306
  @pos += 1
223
307
  else
@@ -237,23 +321,23 @@ module URI
237
321
  if c == "@"
238
322
  @buffer.prepend("%40") if @at_sign_seen
239
323
  @at_sign_seen = true
240
- @buffer.chars.each do |char|
324
+ @buffer.each_char do |char|
241
325
  if char == ":" && !@password_token_seen
242
326
  @password_token_seen = true
243
327
  next
244
328
  end
245
329
 
246
- encoded_char = percent_encode(char, USERINFO_PERCENT_ENCODE_SET, @encoding)
330
+ encoded_char = utf8_percent_encode(char, USERINFO_PERCENT_ENCODE_SET)
247
331
 
248
332
  if @password_token_seen
249
- @password = @password.to_s + encoded_char
333
+ (@password ||= +"") << encoded_char
250
334
  else
251
- @username = @username.to_s + encoded_char
335
+ (@username ||= +"") << encoded_char
252
336
  end
253
337
  end
254
338
 
255
339
  @buffer.clear
256
- elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
340
+ elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
257
341
  raise ParseError, "host is missing" if @at_sign_seen && @buffer.empty?
258
342
 
259
343
  @pos -= (@buffer.size + 1)
@@ -265,20 +349,30 @@ module URI
265
349
  end
266
350
 
267
351
  def host_state(c)
268
- if c == ":" && !@inside_brackets
352
+ if @state_override && @parse_result[:scheme] == "file"
353
+ @pos -= 1
354
+ @state = :file_host_state
355
+ elsif c == ":" && !@inside_brackets
269
356
  raise ParseError, "host is missing" if @buffer.empty?
357
+ raise ParseError, "invalid host" if @state_override && @state_override == :hostname_state
270
358
 
271
- @parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
359
+ @parse_result[:host] = @host_parser.parse(@buffer, !@special_url)
272
360
  @buffer.clear
273
361
  @state = :port_state
274
- elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
362
+ elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
275
363
  @pos -= 1
276
- if special_url? && @buffer.empty?
364
+ if @special_url && @buffer.empty?
277
365
  raise ParseError, "host is missing"
366
+ elsif @state_override && @buffer.empty? && (includes_credentials? || !@parse_result[:port].nil?)
367
+ raise ParseError, "invalid host"
278
368
  else
279
- @parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
369
+ @parse_result[:host] = @host_parser.parse(@buffer, !@special_url)
280
370
  @buffer.clear
281
371
  @state = :path_start_state
372
+ if @state_override
373
+ @terminate = true
374
+ return
375
+ end
282
376
  end
283
377
  else
284
378
  @inside_brackets = true if c == "["
@@ -288,21 +382,26 @@ module URI
288
382
  end
289
383
 
290
384
  def port_state(c)
291
- if ascii_digit?(c)
385
+ if ASCII_DIGIT.include?(c)
292
386
  @buffer << c
293
- elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
387
+ elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\") || @state_override
294
388
  unless @buffer.empty?
295
- begin
296
- port = Integer(@buffer, 10)
297
- raise ParseError, "port is invalid value" if port < 0 || port > 65535
298
- @parse_result[:port] = port unless SPECIAL_SCHEME[@parse_result[:scheme]] == port
299
- rescue ArgumentError
300
- raise ParseError, "port is invalid value"
389
+ port = Integer(@buffer, 10)
390
+ raise ParseError, "port is invalid value" if port < 0 || port > 65535
391
+ if SPECIAL_SCHEME[@parse_result[:scheme]] == port
392
+ @parse_result[:port] = nil
393
+ else
394
+ @parse_result[:port] = port
301
395
  end
302
396
 
303
397
  @buffer.clear
398
+ if @state_override
399
+ @terminate = true
400
+ return
401
+ end
304
402
  end
305
403
 
404
+ raise ParseError, "port is invalid value" if @state_override
306
405
  @state = :path_start_state
307
406
  @pos -= 1
308
407
  else
@@ -312,6 +411,7 @@ module URI
312
411
 
313
412
  def file_state(c)
314
413
  @parse_result[:scheme] = "file"
414
+ @special_url = true
315
415
  @parse_result[:host] = nil
316
416
 
317
417
  if c == "/" || c == "\\"
@@ -348,7 +448,10 @@ module URI
348
448
  if !@base.nil? && @base[:scheme] == "file"
349
449
  @parse_result[:host] = @base[:host]
350
450
  if !starts_with_windows_drive_letter?(rest) && @base_paths && normalized_windows_drive_letter?(@base_paths[0])
351
- @paths[0] << @base_paths[0]
451
+ if @paths.nil?
452
+ @paths ||= []
453
+ @paths[0] = @base_paths[0]
454
+ end
352
455
  end
353
456
  end
354
457
  @state = :path_state
@@ -357,52 +460,62 @@ module URI
357
460
  end
358
461
 
359
462
  def file_host_state(c)
360
- if c.nil? || c == "/" || c == "\\" || c == "?" || c == "#"
463
+ if c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
361
464
  @pos -= 1
362
465
 
363
- if windows_drive_letter?(@buffer)
466
+ if !@state_override && windows_drive_letter?(@buffer)
364
467
  @state = :path_state
365
468
  elsif @buffer.empty?
366
469
  @parse_result[:host] = nil
470
+ if @state_override
471
+ @terminate = true
472
+ return
473
+ end
367
474
  @state = :path_start_state
368
475
  else
369
- host = @host_parser.parse(@buffer, !special_url?)
370
- if host != "localhost"
371
- @parse_result[:host] = host
476
+ host = @host_parser.parse(@buffer, !@special_url)
477
+ host = "" if host == "localhost"
478
+ @parse_result[:host] = host
479
+ if @state_override
480
+ @terminate = true
481
+ return
372
482
  end
373
-
374
483
  @buffer.clear
375
484
  @state = :path_start_state
376
485
  end
486
+ else
487
+ @buffer << c unless c.nil?
377
488
  end
378
-
379
- @buffer << c unless c.nil?
380
489
  end
381
490
 
382
491
  def path_start_state(c)
383
- if special_url?
492
+ if @special_url
384
493
  @pos -= 1 if c != "/" && c != "\\"
385
494
  @state = :path_state
386
- elsif c == "?"
495
+ elsif !@state_override && c == "?"
387
496
  @state = :query_state
388
- elsif c == "#"
497
+ elsif !@state_override && c == "#"
389
498
  @state = :fragment_state
390
499
  elsif c != nil
391
500
  @pos -= 1 if c != "/"
392
501
  @state = :path_state
502
+ elsif @state_override && @parse_result[:host].nil?
503
+ @paths ||= []
504
+ @paths << ""
393
505
  end
394
506
  end
395
507
 
396
508
  def path_state(c)
397
509
  @paths ||= []
398
510
 
399
- if (c.nil? || c == "/") || (special_url? && c == "\/") || (c == "?" || c == "#")
511
+ if (c.nil? || c == "/") || (@special_url && c == "\\") || (!@state_override && (c == "?" || c == "#"))
400
512
  if double_dot_path_segments?(@buffer)
401
513
  shorten_url_path
402
- if c != "/" || (special_url? && c == "\/")
514
+
515
+ if c != "/" && !(@special_url && c == "\\")
403
516
  @paths << ""
404
517
  end
405
- elsif single_dot_path_segments?(@buffer) && (c != "/" || (special_url? && c == "\/"))
518
+ elsif single_dot_path_segments?(@buffer) && c != "/" && !((@special_url && c == "\\"))
406
519
  @paths << ""
407
520
  elsif !single_dot_path_segments?(@buffer)
408
521
  if @parse_result[:scheme] == "file" && @paths.empty? && windows_drive_letter?(@buffer)
@@ -422,7 +535,7 @@ module URI
422
535
  @state = :fragment_state
423
536
  end
424
537
  else
425
- @buffer << percent_encode(c, PATH_PERCENT_ENCODE_SET, @encoding)
538
+ @buffer << utf8_percent_encode(c, PATH_PERCENT_ENCODE_SET)
426
539
  end
427
540
  end
428
541
 
@@ -434,24 +547,22 @@ module URI
434
547
  @parse_result[:fragment] = nil
435
548
  @state = :fragment_state
436
549
  elsif c == " "
437
- if rest.start_with?("?", "#")
438
- @parse_result[:opaque] += "%20"
550
+ first_of_rest = @input_chars[@pos + 1]
551
+ if first_of_rest == "?" || first_of_rest == "#"
552
+ @parse_result[:opaque] << "%20"
439
553
  else
440
- @parse_result[:opaque] += " "
554
+ @parse_result[:opaque] << " "
441
555
  end
442
556
  elsif !c.nil?
443
- @parse_result[:opaque] += percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET, @encoding)
557
+ @parse_result[:opaque] << utf8_percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET)
444
558
  end
445
559
  end
446
560
 
447
561
  def query_state(c)
448
- if @encoding != Encoding::UTF_8 && (!special_url? || %w[ws wss].include?(@parse_result[:scheme]))
449
- @encoding = Encoding::UTF_8
450
- end
451
-
452
- if c.nil? || c == "#"
453
- query_percent_encode_set = special_url? ? SPECIAL_QUERY_PERCENT_ENCODE_SET : QUERY_PERCENT_ENCODE_SET
454
- @parse_result[:query] = @buffer.chars.map { |c| percent_encode(c, query_percent_encode_set, @encoding) }.join
562
+ if c.nil? || (!@state_override && c == "#")
563
+ query_percent_encode_set = @special_url ? SPECIAL_QUERY_PERCENT_ENCODE_SET : QUERY_PERCENT_ENCODE_SET
564
+ # TODO: We need to consider encoding here.
565
+ @parse_result[:query] = utf8_percent_encode_string(@buffer, query_percent_encode_set)
455
566
  @buffer.clear
456
567
  @state = :fragment_state if c == "#"
457
568
  elsif !c.nil?
@@ -461,7 +572,7 @@ module URI
461
572
 
462
573
  def fragment_state(c)
463
574
  return if c.nil?
464
- @parse_result[:fragment] = @parse_result[:fragment].to_s + percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET, @encoding)
575
+ (@parse_result[:fragment] ||= +"") << utf8_percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET)
465
576
  end
466
577
 
467
578
  def windows_drive_letter?(str)
@@ -469,15 +580,15 @@ module URI
469
580
  end
470
581
 
471
582
  def starts_with_windows_drive_letter?(str)
472
- STARTS_WITH_wINDOWS_DRIVE_LETTER.match?(str)
583
+ STARTS_WITH_WINDOWS_DRIVE_LETTER.match?(str)
473
584
  end
474
585
 
475
586
  def normalized_windows_drive_letter?(str)
476
587
  NORMALIZED_WINDOWS_DRIVE_LETTER.match?(str)
477
588
  end
478
589
 
479
- def special_url?
480
- SPECIAL_SCHEME.key?(@parse_result[:scheme])
590
+ def special_url?(str = @parse_result[:scheme])
591
+ SPECIAL_SCHEME.key?(str)
481
592
  end
482
593
 
483
594
  def single_dot_path_segments?(c)
@@ -490,13 +601,16 @@ module URI
490
601
 
491
602
  def shorten_url_path
492
603
  return if @paths.nil?
493
-
494
- return true if @parse_result[:scheme] == "file" && @paths.length == 1 && normalized_windows_drive_letter?(@paths.first)
604
+ return if @parse_result[:scheme] == "file" && @paths.length == 1 && normalized_windows_drive_letter?(@paths.first)
495
605
  @paths.pop
496
606
  end
497
607
 
608
+ def includes_credentials?
609
+ (@username && !@username.empty?) || (@password && !@password.empty?)
610
+ end
611
+
498
612
  def rest
499
- @uri[@pos+1..]
613
+ @input_chars[@pos + 1..]&.join
500
614
  end
501
615
 
502
616
  def convert_to_uri(uri)
@@ -509,9 +623,26 @@ module URI
509
623
  "bad argument (expected URI object or URI string)"
510
624
  end
511
625
  end
626
+
627
+ if RUBY_VERSION >= "4.0"
628
+ def remove_c0_control_or_space!(str)
629
+ if /[\u0000-\u0020]/.match?(str)
630
+ str.strip!("\u0000-\u0020")
631
+ end
632
+ end
633
+ else
634
+ def remove_c0_control_or_space!(str)
635
+ if /[\u0000-\u0020]/.match?(str)
636
+ str.sub!(/\A[\u0000-\u0020]*/, "")
637
+ str.sub!(/[\u0000-\u0020]*\z/, "")
638
+ end
639
+ end
640
+ end
512
641
  end
642
+
643
+ WHATWG_PARSER = URI::WhatwgParser.new
513
644
  end
514
645
 
515
646
  URI.send(:remove_const, :DEFAULT_PARSER) if defined?(URI::DEFAULT_PARSER)
516
- URI::DEFAULT_PARSER = URI::WhatwgParser.new
647
+ URI::DEFAULT_PARSER = URI::WHATWG_PARSER
517
648
  URI.parser = URI::DEFAULT_PARSER
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uri-whatwg_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuji Yaginuma
@@ -37,20 +37,6 @@ dependencies:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
39
  version: '0'
40
- - !ruby/object:Gem::Dependency
41
- name: debug
42
- requirement: !ruby/object:Gem::Requirement
43
- requirements:
44
- - - ">="
45
- - !ruby/object:Gem::Version
46
- version: '0'
47
- type: :development
48
- prerelease: false
49
- version_requirements: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - ">="
52
- - !ruby/object:Gem::Version
53
- version: '0'
54
40
  email:
55
41
  - yuuji.yaginuma@gmail.com
56
42
  executables: []
@@ -87,7 +73,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
87
73
  - !ruby/object:Gem::Version
88
74
  version: '0'
89
75
  requirements: []
90
- rubygems_version: 3.6.7
76
+ rubygems_version: 4.0.3
91
77
  specification_version: 4
92
78
  summary: Ruby implementation of the WHATWG URL Living Standard
93
79
  test_files: []