uri-whatwg_parser 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ae0566e4902f0c52f40969d4b55ae40ac98b075bae062d8167895e847e91854
4
- data.tar.gz: 1c9c1a764beb16e6719822138df7ef36fe5f02423052b4f436ae164a70eb7cab
3
+ metadata.gz: bc2c1e5428af4eaed582a417234dec63ac59554af14bbbbd1cc0c3017e8e32b9
4
+ data.tar.gz: 749b4ba051cb58a73f0ef8ebe64d6426a4d1f0803176641ff75af539757130c1
5
5
  SHA512:
6
- metadata.gz: d6dcb5018a93cd1f9a17ed84e5d409a0d6413efd405e50ea80f46169ad4b1970dd3e8732da209171c44a2f1a672f7e12b6914f1c5efa95ed882644a4e86393c1
7
- data.tar.gz: 69e9afe795ab64751158bf69665de207649119c42f46a1aee9f34faea73a3756d1d4f4c2b5ffd22eab159c932056fbf1820f6dfc2855015d57dfb0c83910185b
6
+ metadata.gz: a28ffa266d8013c02ed0da9bfd285eaff113527772019a135ddd092310b942c01b6beab710336d37665f8ec3dcc18829edad14a0a97168ef1fc3c651148eb4f0
7
+ data.tar.gz: 1ba90031895fe24a39b1ae7ff127ea7af821cb39a0801b83234c40b63801c9ee6c886f1bb0e423bc9c6e9c817d31bea29661db7e22978c8b66891b9864a953f4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.1
2
+
3
+ * Improve the performance of `parse`
4
+
1
5
  ## 0.2.0
2
6
 
3
7
  * Fix setter methods compliant with WHATWG URL Living Standard
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Ruby implementation of the [WHATWG URL Living Standard](https://url.spec.whatwg.org/).
4
4
 
5
- The latest revision that this package implements of the standard is ([30 October 2025](https://url.spec.whatwg.org/commit-snapshots/52526653e848c5a56598c84aa4bc8ac9025fb66b/)).
5
+ The latest revision that this package implements of the standard is [13 January 2026](https://url.spec.whatwg.org/commit-snapshots/b6b3251fe911ab33d68fb051efe0e4d39ae4145e/).
6
6
 
7
7
  ## Installation
8
8
 
@@ -62,7 +62,7 @@ module URI
62
62
  if host.nil? || host.empty? || scheme == "file"
63
63
  raise InvalidURIError, "cannot set user when host is nil or file schme"
64
64
  end
65
- set_user(URI::DEFAULT_PARSER.encode_userinfo(v))
65
+ set_user(URI::DEFAULT_PARSER.utf8_percent_encode_string(v, URI::WhatwgParser::USERINFO_PERCENT_ENCODE_SET))
66
66
  end
67
67
 
68
68
  def password=(v)
@@ -72,7 +72,7 @@ module URI
72
72
  if host.nil? || host.empty? || scheme == "file"
73
73
  raise InvalidURIError, "cannot set password when host is nil or file schme"
74
74
  end
75
- set_password(URI::DEFAULT_PARSER.encode_userinfo(v))
75
+ set_password(URI::DEFAULT_PARSER.utf8_percent_encode_string(v, URI::WhatwgParser::USERINFO_PERCENT_ENCODE_SET))
76
76
  end
77
77
 
78
78
  def host=(v)
@@ -7,8 +7,10 @@ class URI::WhatwgParser
7
7
  class HostParser
8
8
  include ParserHelper
9
9
 
10
- FORBIDDEN_HOST_CODE_POINT = ["\x00", "\t", "\x0a", "\x0d", " ", "#", "/", ":", "<", ">", "?", "@", "[", "\\", "]", "^", "|"]
11
- FORBIDDEN_DOMAIN_CODE_POINT = FORBIDDEN_HOST_CODE_POINT + C0_CONTROL_PERCENT_ENCODE_SET + ["%", "\x7f"]
10
+ FORBIDDEN_HOST_CODE_POINT = Set["\x00", "\t", "\x0a", "\x0d", " ", "#", "/", ":", "<", ">", "?", "@", "[", "\\", "]", "^", "|"]
11
+ FORBIDDEN_DOMAIN_CODE_POINT = FORBIDDEN_HOST_CODE_POINT | C0_CONTROL_PERCENT_ENCODE_SET | Set["%", "\x7f"]
12
+ FORBIDDEN_HOST_REGEX = Regexp.union(FORBIDDEN_HOST_CODE_POINT.to_a)
13
+ FORBIDDEN_DOMAIN_REGEX = Regexp.union(FORBIDDEN_DOMAIN_CODE_POINT.to_a)
12
14
 
13
15
  def parse(input, opaque = false) # :nodoc:
14
16
  return "" if input&.empty?
@@ -39,8 +41,7 @@ class URI::WhatwgParser
39
41
  raise URI::WhatwgParser::ParseError, "invalid IPv4 format" if parts.size > 4
40
42
  numbers = []
41
43
  parts.each do |part|
42
- value, _validation_error = parse_ipv4_number(part)
43
- numbers << value
44
+ numbers << parse_ipv4_number(part)
44
45
  end
45
46
 
46
47
  (numbers.size-1).times {|i| raise URI::WhatwgParser::ParseError, "invalid IPv4 format" if numbers[i] > 255 }
@@ -191,7 +192,7 @@ class URI::WhatwgParser
191
192
 
192
193
  def parse_opaque_host(host)
193
194
  raise ParseError if include_forbidden_host_code_point?(host)
194
- host.chars.map { |c| percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET) }.join
195
+ host.chars.map { |c| utf8_percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET) }.join
195
196
  end
196
197
 
197
198
  def percent_decode(str)
@@ -201,51 +202,63 @@ class URI::WhatwgParser
201
202
  end
202
203
 
203
204
  def ends_in_number?(domain)
204
- parts = domain.split(".", -1)
205
- if parts.last == ""
206
- return false if parts.size == 1
207
- parts.pop
205
+ return false if domain.empty?
206
+
207
+ if domain.end_with?(".")
208
+ # Remove trailing dot and find the actual last segment
209
+ domain_without_trailing = domain[0...-1]
210
+ return false if domain_without_trailing.empty?
211
+
212
+ last_dot = domain_without_trailing.rindex(".")
213
+ last = last_dot ? domain_without_trailing[last_dot + 1..-1] : domain_without_trailing
214
+ else
215
+ # Find the last segment after the last dot
216
+ last_dot = domain.rindex(".")
217
+ last = last_dot ? domain[last_dot + 1..-1] : domain
208
218
  end
209
219
 
210
- last = parts.last
211
- return true if last != "" && last.chars.all? { |c| ascii_digit?(c) }
220
+ return false if last.empty?
221
+ return true if last.match?(/\A\d+\z/)
212
222
 
213
- begin
214
- parse_ipv4_number(last)
215
- rescue ParseError
216
- return false
223
+ if last.start_with?("0x", "0X")
224
+ hex = last[2..-1] || ""
225
+ return true if hex.empty? || hex.match?(/\A[0-9A-Fa-f]+\z/)
217
226
  end
218
227
 
219
- true
228
+ false
220
229
  end
221
230
 
222
231
  def parse_ipv4_number(str)
223
232
  raise ParseError, "invalid IPv4 format" if str&.empty?
224
233
 
225
- validation_error = false
226
234
  r = 10
227
235
 
228
236
  if str.size >= 2 && str.start_with?("0x", "0X")
229
- validation_error = true
230
237
  str = str[2..-1]
231
238
  r = 16
232
239
  elsif str.size >= 2 && str.start_with?("0")
233
- validation_error = true
234
240
  str = str[1..-1]
235
241
  r = 8
236
242
  end
237
243
 
238
- return 0, true if str.empty?
244
+ return 0 if str.empty?
239
245
 
240
246
  begin
241
- output = Integer(str, r)
242
- return output, validation_error
247
+ Integer(str, r)
243
248
  rescue ArgumentError
244
249
  raise ParseError, "invalid IPv4 format"
245
250
  end
246
251
  end
247
252
 
248
253
  def domain_to_ascii(domain)
254
+ # If domain is already ASCII-only, lowercase, and doesn't contain punycode prefix
255
+ # we can skip IDNA processing
256
+ if domain.ascii_only? && domain == domain.downcase && !domain.include?("xn--")
257
+ raise ParseError, "including invalid value in host" if include_forbidden_domain_code_point?(domain)
258
+ raise ParseError, "host can't be empty" if domain.empty?
259
+ return domain
260
+ end
261
+
249
262
  ascii_domain = URI::IDNA.whatwg_to_ascii(domain.force_encoding(Encoding::UTF_8), be_strict: false)
250
263
 
251
264
  raise ParseError, "including invalid value in host" if include_forbidden_domain_code_point?(ascii_domain)
@@ -255,11 +268,11 @@ class URI::WhatwgParser
255
268
  end
256
269
 
257
270
  def include_forbidden_domain_code_point?(str)
258
- FORBIDDEN_DOMAIN_CODE_POINT.any? {|c| str.include?(c) }
271
+ str.match?(FORBIDDEN_DOMAIN_REGEX)
259
272
  end
260
273
 
261
274
  def include_forbidden_host_code_point?(str)
262
- FORBIDDEN_HOST_CODE_POINT.any? {|c| str.include?(c) }
275
+ str.match?(FORBIDDEN_HOST_REGEX)
263
276
  end
264
277
  end
265
278
  end
@@ -1,31 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "set"
4
+
3
5
  class URI::WhatwgParser
4
6
  module ParserHelper
5
- C0_CONTROL_PERCENT_ENCODE_SET = (0..0x1f).map(&:chr)
6
- ASCII_ALPHA = ("a".."z").to_a + ("A".."Z").to_a
7
- ASCII_DIGIT = ("0".."9").to_a
8
-
9
- def ascii_alpha?(c)
10
- ASCII_ALPHA.include?(c)
11
- end
12
-
13
- def ascii_alphanumerica?(c)
14
- ascii_alpha?(c) || ascii_digit?(c)
15
- end
7
+ # NOTE: This set isn't accurate, but it's OK now because greater than `0x7e` is checked inside a method.
8
+ C0_CONTROL_PERCENT_ENCODE_SET = Set.new((0..0x1f).map(&:chr))
16
9
 
17
- def ascii_digit?(c)
18
- ASCII_DIGIT.include?(c)
19
- end
20
-
21
- def percent_encode(c, encode_set, encoding = Encoding::UTF_8)
10
+ def utf8_percent_encode(c, encode_set)
22
11
  return c unless encode_set.include?(c) || c.ord > 0x7e
23
12
 
24
13
  # For ASCII single-byte characters
25
14
  return "%%%02X" % c.ord if c.bytesize == 1
26
15
 
27
- bytes = c.encoding == encoding ? c.bytes : c.encode(encoding).bytes
28
- bytes.map { |b| "%%%02X" % b }.join
16
+ c.bytes.map { |b| "%%%02X" % b }.join
17
+ end
18
+
19
+ def utf8_percent_encode_string(str, encode_set)
20
+ str.chars.map { |c| utf8_percent_encode(c, encode_set) }.join
29
21
  end
30
22
  end
31
23
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module URI
4
4
  class WhatwgParser
5
- VERSION = "0.2.0"
5
+ VERSION = "0.2.1"
6
6
  end
7
7
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "set"
3
4
  require "uri"
4
5
  require_relative "whatwg_parser/error"
5
6
  require_relative "whatwg_parser/version"
@@ -13,19 +14,28 @@ module URI
13
14
 
14
15
  SPECIAL_SCHEME = { "ftp" => 21, "file" => nil, "http" => 80, "https" => 443, "ws" => 80, "wss" => 443 }
15
16
 
16
- FRAGMENT_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET + [" ", "\"", "<", ">", "`"]
17
- QUERY_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET + [" ", "\"", "#", "<", ">"]
18
- SPECIAL_QUERY_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET + ["'"]
19
- PATH_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET + ["?", "^", "`", "{", "}"]
20
- USERINFO_PERCENT_ENCODE_SET = PATH_PERCENT_ENCODE_SET + ["/", ":", ";", "=","@", "[", "\\", "]", "|"]
17
+ FRAGMENT_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET | Set[" ", "\"", "<", ">", "`"]
18
+ QUERY_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET | Set[" ", "\"", "#", "<", ">"]
19
+ SPECIAL_QUERY_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET | Set["'"]
20
+ PATH_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET | Set["?", "^", "`", "{", "}"]
21
+ USERINFO_PERCENT_ENCODE_SET = PATH_PERCENT_ENCODE_SET | Set["/", ":", ";", "=", "@", "[", "\\", "]", "|"]
21
22
 
22
- SINGLE_DOT_PATH_SEGMENTS = [".", "%2e", "%2E"]
23
- DOUBLE_DOT_PATH_SEGMENTS = ["..", ".%2e", ".%2E", "%2e.", "%2e%2e", "%2e%2E", "%2E.", "%2E%2e", "%2E%2E"]
23
+ SINGLE_DOT_PATH_SEGMENTS = Set[".", "%2e", "%2E"]
24
+ DOUBLE_DOT_PATH_SEGMENTS = Set["..", ".%2e", ".%2E", "%2e.", "%2e%2e", "%2e%2E", "%2E.", "%2E%2e", "%2E%2E"]
24
25
 
25
26
  WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:|])\\z")
26
27
  NORMALIZED_WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:])\\z")
27
28
  STARTS_WITH_WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:|])(?:[/\\?#])?\\z")
28
29
 
30
+ VALID_SIGNS_FOR_SCHEME = Set["+", "-", "."]
31
+ DELIMITER_SIGNS = Set["/", "?", "#"]
32
+
33
+ WS_SCHEMES = Set["ws", "wss"]
34
+
35
+ ASCII_ALPHA_LOWERCASE = Set.new(("a".."z").to_a)
36
+ ASCII_ALPHA_UPPERCASE = Set.new(("A".."Z").to_a)
37
+ ASCII_DIGIT = Set.new(("0".."9").to_a)
38
+
29
39
  def initialize
30
40
  reset
31
41
  @host_parser = HostParser.new
@@ -35,15 +45,15 @@ module URI
35
45
  {}
36
46
  end
37
47
 
38
- def parse(input, base: nil, encoding: Encoding::UTF_8, url: nil, state_override: nil) # :nodoc:
39
- URI.for(*self.split(input, base: base, encoding: encoding, url: url, state_override: state_override))
48
+ def parse(input, base: nil, url: nil, state_override: nil) # :nodoc:
49
+ URI.for(*self.split(input, base: base, url: url, state_override: state_override))
40
50
  end
41
51
 
42
- def split(input, base: nil, encoding: Encoding::UTF_8, url: nil, state_override: nil) # :nodoc:
52
+ def split(input, base: nil, url: nil, state_override: nil) # :nodoc:
43
53
  reset
44
54
  @base = nil
45
55
  if base != nil
46
- ary = split(base, base: nil, encoding: encoding)
56
+ ary = split(base, base: nil)
47
57
  @base = { scheme: ary[0], userinfo: ary[1], host: ary[2], port: ary[3], registry: ary[4], path: ary[5], opaque: ary[6], query: ary[7], fragment: ary[8]}
48
58
  @base_paths = @paths
49
59
  reset
@@ -52,7 +62,10 @@ module URI
52
62
  if url
53
63
  raise ArgumentError, "bad argument (expected URI object)" unless url.is_a?(URI::Generic)
54
64
  @parse_result.merge!(url.component.zip(url.send(:component_ary)).to_h)
55
- @parse_result[:path] = nil
65
+ @username = url.user
66
+ @password = url.password
67
+ @parse_result.delete(:userinfo)
68
+ @special_url = special_url?(@parse_result[:scheme])
56
69
  end
57
70
 
58
71
  if state_override
@@ -63,30 +76,27 @@ module URI
63
76
  raise ParseError, "uri can't be empty" if (input.nil? || input.empty?) && @base.nil?
64
77
  end
65
78
 
66
- @encoding = encoding
67
- @input = input.dup
79
+ input = input.dup
68
80
 
69
81
  unless url
70
- @input.sub!(/\A[\u0000-\u0020]*/, "")
71
- @input.sub!(/[\u0000-\u0020]*\z/, "")
82
+ remove_c0_control_or_space!(input)
72
83
  end
73
84
 
74
- @input.delete!("\t")
75
- @input.delete!("\n")
76
- @input.delete!("\r")
85
+ input.delete!("\t\n\r") if /[\t\n\r]/.match?(input)
77
86
 
87
+ @input_chars = input.chars
88
+ input_chars_length = @input_chars.length
78
89
  @pos = 0
79
90
 
80
- while @pos <= @input.length
81
- c = @input[@pos]
82
- ret = send(@state, c)
83
- break if ret == :terminate
91
+ while @pos <= input_chars_length
92
+ dispatch_state(@input_chars[@pos])
93
+ break if @terminate
84
94
  @pos += 1
85
95
  end
86
96
 
87
- @parse_result[:userinfo] = [@username, @password].compact.reject(&:empty?).join(":")
88
- @parse_result[:path] = "/#{@paths.join("/")}" if @paths && !@paths.empty?
89
- @parse_result.values
97
+ userinfo = [@username, @password].compact.reject(&:empty?).join(":")
98
+ path = "/#{@paths.join("/")}" if @paths && !@paths.empty?
99
+ [@parse_result[:scheme], userinfo, @parse_result[:host], @parse_result[:port], @parse_result[:registry], path, @parse_result[:opaque], @parse_result[:query], @parse_result[:fragment]]
90
100
  end
91
101
 
92
102
  def join(*uris)
@@ -101,14 +111,33 @@ module URI
101
111
  uri
102
112
  end
103
113
 
104
- def encode_userinfo(str)
105
- str.chars.map do |char|
106
- percent_encode(char, USERINFO_PERCENT_ENCODE_SET)
107
- end.join
108
- end
109
-
110
114
  private
111
115
 
116
+ def dispatch_state(c)
117
+ case @state
118
+ when :scheme_start_state then scheme_start_state(c)
119
+ when :scheme_state then scheme_state(c)
120
+ when :no_scheme_state then no_scheme_state(c)
121
+ when :special_relative_or_authority_state then special_relative_or_authority_state(c)
122
+ when :path_or_authority_state then path_or_authority_state(c)
123
+ when :relative_state then relative_state(c)
124
+ when :relative_slash_state then relative_slash_state(c)
125
+ when :special_authority_slashes_state then special_authority_slashes_state(c)
126
+ when :special_authority_ignore_slashes_state then special_authority_ignore_slashes_state(c)
127
+ when :authority_state then authority_state(c)
128
+ when :host_state then host_state(c)
129
+ when :port_state then port_state(c)
130
+ when :file_state then file_state(c)
131
+ when :file_slash_state then file_slash_state(c)
132
+ when :file_host_state then file_host_state(c)
133
+ when :path_start_state then path_start_state(c)
134
+ when :path_state then path_state(c)
135
+ when :opaque_path_state then opaque_path_state(c)
136
+ when :query_state then query_state(c)
137
+ when :fragment_state then fragment_state(c)
138
+ end
139
+ end
140
+
112
141
  def reset
113
142
  @buffer = +""
114
143
  @at_sign_seen = nil
@@ -117,13 +146,18 @@ module URI
117
146
  @paths = nil
118
147
  @username = nil
119
148
  @password = nil
120
- @parse_result = { scheme: nil, userinfo: nil, host: nil, port: nil, registry: nil, path: nil, opaque: nil, query: nil, fragment: nil }
149
+ @parse_result = { scheme: nil, host: nil, port: nil, registry: nil, path: nil, opaque: nil, query: nil, fragment: nil }
121
150
  @state_override = nil
122
151
  @state = :scheme_start_state
152
+ @special_url = nil
153
+ @terminate = nil
123
154
  end
124
155
 
125
156
  def scheme_start_state(c)
126
- if ascii_alpha?(c)
157
+ if ASCII_ALPHA_LOWERCASE.include?(c)
158
+ @buffer << c
159
+ @state = :scheme_state
160
+ elsif ASCII_ALPHA_UPPERCASE.include?(c)
127
161
  @buffer << c.downcase
128
162
  @state = :scheme_state
129
163
  elsif @state_override.nil?
@@ -135,7 +169,9 @@ module URI
135
169
  end
136
170
 
137
171
  def scheme_state(c)
138
- if ascii_alphanumerica?(c) || ["+", "-", "."].include?(c)
172
+ if ASCII_ALPHA_LOWERCASE.include?(c) || ASCII_DIGIT.include?(c) || VALID_SIGNS_FOR_SCHEME.include?(c)
173
+ @buffer << c
174
+ elsif ASCII_ALPHA_UPPERCASE.include?(c)
139
175
  @buffer << c.downcase
140
176
  elsif c == ":"
141
177
  if @state_override
@@ -143,17 +179,20 @@ module URI
143
179
  (!special_url? && special_url?(@buffer)) ||
144
180
  ((includes_credentials? || !@parse_result[:port].nil?) && @buffer == "file") ||
145
181
  (@parse_result[:scheme] == "file" && @parse_result[:host]&.empty?)
146
- return :terminate
182
+ @terminate = true
183
+ return
147
184
  end
148
185
  end
149
186
 
150
187
  @parse_result[:scheme] = @buffer
188
+ @special_url = special_url?(@buffer)
151
189
 
152
190
  if @state_override
153
191
  if SPECIAL_SCHEME.value?(@parse_result[:port].to_i)
154
192
  @parse_result[:port] = nil
155
193
  end
156
- return :terminate
194
+ @terminate = true
195
+ return
157
196
  end
158
197
 
159
198
  @buffer = +""
@@ -164,11 +203,11 @@ module URI
164
203
  @state = :special_relative_or_authority_state
165
204
  elsif special_url?
166
205
  @state = :special_authority_slashes_state
167
- elsif rest.start_with?("/")
206
+ elsif @input_chars[@pos + 1] == "/"
168
207
  @state = :path_or_authority_state
169
208
  @pos += 1
170
209
  else
171
- @parse_result[:opaque] = ""
210
+ @parse_result[:opaque] = +""
172
211
  @state = :opaque_path_state
173
212
  end
174
213
  elsif @state_override.nil?
@@ -185,6 +224,7 @@ module URI
185
224
 
186
225
  if !@base[:opaque].nil? && c == "#"
187
226
  @parse_result[:scheme] = @base[:scheme]
227
+ @special_url = special_url?(@base[:scheme])
188
228
  @paths = @base_paths
189
229
  @parse_result[:query] = @base[:query]
190
230
  @parse_result[:fragment] = nil
@@ -199,7 +239,7 @@ module URI
199
239
  end
200
240
 
201
241
  def special_relative_or_authority_state(c)
202
- if c == "/" && rest.start_with?("/")
242
+ if c == "/" && @input_chars[@pos + 1] == "/"
203
243
  @state = :special_authority_ignore_slashes_state
204
244
  @pos -= 1
205
245
  else
@@ -219,6 +259,7 @@ module URI
219
259
 
220
260
  def relative_state(c)
221
261
  @parse_result[:scheme] = @base[:scheme]
262
+ @special_url = special_url?(@base[:scheme])
222
263
  if c == "/"
223
264
  @state = :relative_slash_state
224
265
  elsif special_url? && c == "\\"
@@ -246,7 +287,7 @@ module URI
246
287
  end
247
288
 
248
289
  def relative_slash_state(c)
249
- if special_url? && (c == "/" || c == "\\")
290
+ if @special_url && (c == "/" || c == "\\")
250
291
  @state = :special_authority_ignore_slashes_state
251
292
  elsif c == "/"
252
293
  @state = :authority_state
@@ -260,7 +301,7 @@ module URI
260
301
  end
261
302
 
262
303
  def special_authority_slashes_state(c)
263
- if c == "/" && rest.start_with?("/")
304
+ if c == "/" && @input_chars[@pos + 1] == "/"
264
305
  @state = :special_authority_ignore_slashes_state
265
306
  @pos += 1
266
307
  else
@@ -280,23 +321,23 @@ module URI
280
321
  if c == "@"
281
322
  @buffer.prepend("%40") if @at_sign_seen
282
323
  @at_sign_seen = true
283
- @buffer.chars.each do |char|
324
+ @buffer.each_char do |char|
284
325
  if char == ":" && !@password_token_seen
285
326
  @password_token_seen = true
286
327
  next
287
328
  end
288
329
 
289
- encoded_char = percent_encode(char, USERINFO_PERCENT_ENCODE_SET, @encoding)
330
+ encoded_char = utf8_percent_encode(char, USERINFO_PERCENT_ENCODE_SET)
290
331
 
291
332
  if @password_token_seen
292
- @password = @password.to_s + encoded_char
333
+ (@password ||= +"") << encoded_char
293
334
  else
294
- @username = @username.to_s + encoded_char
335
+ (@username ||= +"") << encoded_char
295
336
  end
296
337
  end
297
338
 
298
339
  @buffer.clear
299
- elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
340
+ elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
300
341
  raise ParseError, "host is missing" if @at_sign_seen && @buffer.empty?
301
342
 
302
343
  @pos -= (@buffer.size + 1)
@@ -315,20 +356,23 @@ module URI
315
356
  raise ParseError, "host is missing" if @buffer.empty?
316
357
  raise ParseError, "invalid host" if @state_override && @state_override == :hostname_state
317
358
 
318
- @parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
359
+ @parse_result[:host] = @host_parser.parse(@buffer, !@special_url)
319
360
  @buffer.clear
320
361
  @state = :port_state
321
- elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
362
+ elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
322
363
  @pos -= 1
323
- if special_url? && @buffer.empty?
364
+ if @special_url && @buffer.empty?
324
365
  raise ParseError, "host is missing"
325
366
  elsif @state_override && @buffer.empty? && (includes_credentials? || !@parse_result[:port].nil?)
326
367
  raise ParseError, "invalid host"
327
368
  else
328
- @parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
369
+ @parse_result[:host] = @host_parser.parse(@buffer, !@special_url)
329
370
  @buffer.clear
330
371
  @state = :path_start_state
331
- return :terminate if @state_override
372
+ if @state_override
373
+ @terminate = true
374
+ return
375
+ end
332
376
  end
333
377
  else
334
378
  @inside_brackets = true if c == "["
@@ -338,9 +382,9 @@ module URI
338
382
  end
339
383
 
340
384
  def port_state(c)
341
- if ascii_digit?(c)
385
+ if ASCII_DIGIT.include?(c)
342
386
  @buffer << c
343
- elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\") || @state_override
387
+ elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\") || @state_override
344
388
  unless @buffer.empty?
345
389
  port = Integer(@buffer, 10)
346
390
  raise ParseError, "port is invalid value" if port < 0 || port > 65535
@@ -351,7 +395,10 @@ module URI
351
395
  end
352
396
 
353
397
  @buffer.clear
354
- return :terminate if @state_override
398
+ if @state_override
399
+ @terminate = true
400
+ return
401
+ end
355
402
  end
356
403
 
357
404
  raise ParseError, "port is invalid value" if @state_override
@@ -364,6 +411,7 @@ module URI
364
411
 
365
412
  def file_state(c)
366
413
  @parse_result[:scheme] = "file"
414
+ @special_url = true
367
415
  @parse_result[:host] = nil
368
416
 
369
417
  if c == "/" || c == "\\"
@@ -412,20 +460,26 @@ module URI
412
460
  end
413
461
 
414
462
  def file_host_state(c)
415
- if c.nil? || c == "/" || c == "\\" || c == "?" || c == "#"
463
+ if c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
416
464
  @pos -= 1
417
465
 
418
466
  if !@state_override && windows_drive_letter?(@buffer)
419
467
  @state = :path_state
420
468
  elsif @buffer.empty?
421
469
  @parse_result[:host] = nil
422
- return :terminate if @state_override
470
+ if @state_override
471
+ @terminate = true
472
+ return
473
+ end
423
474
  @state = :path_start_state
424
475
  else
425
- host = @host_parser.parse(@buffer, !special_url?)
476
+ host = @host_parser.parse(@buffer, !@special_url)
426
477
  host = "" if host == "localhost"
427
478
  @parse_result[:host] = host
428
- return :terminate if @state_override
479
+ if @state_override
480
+ @terminate = true
481
+ return
482
+ end
429
483
  @buffer.clear
430
484
  @state = :path_start_state
431
485
  end
@@ -435,7 +489,7 @@ module URI
435
489
  end
436
490
 
437
491
  def path_start_state(c)
438
- if special_url?
492
+ if @special_url
439
493
  @pos -= 1 if c != "/" && c != "\\"
440
494
  @state = :path_state
441
495
  elsif !@state_override && c == "?"
@@ -454,14 +508,14 @@ module URI
454
508
  def path_state(c)
455
509
  @paths ||= []
456
510
 
457
- if (c.nil? || c == "/") || (special_url? && c == "\\") || (!@state_override && (c == "?" || c == "#"))
511
+ if (c.nil? || c == "/") || (@special_url && c == "\\") || (!@state_override && (c == "?" || c == "#"))
458
512
  if double_dot_path_segments?(@buffer)
459
513
  shorten_url_path
460
514
 
461
- if c != "/" && !(special_url? && c == "\\")
515
+ if c != "/" && !(@special_url && c == "\\")
462
516
  @paths << ""
463
517
  end
464
- elsif single_dot_path_segments?(@buffer) && c != "/" && !((special_url? && c == "\\"))
518
+ elsif single_dot_path_segments?(@buffer) && c != "/" && !((@special_url && c == "\\"))
465
519
  @paths << ""
466
520
  elsif !single_dot_path_segments?(@buffer)
467
521
  if @parse_result[:scheme] == "file" && @paths.empty? && windows_drive_letter?(@buffer)
@@ -481,7 +535,7 @@ module URI
481
535
  @state = :fragment_state
482
536
  end
483
537
  else
484
- @buffer << percent_encode(c, PATH_PERCENT_ENCODE_SET, @encoding)
538
+ @buffer << utf8_percent_encode(c, PATH_PERCENT_ENCODE_SET)
485
539
  end
486
540
  end
487
541
 
@@ -493,24 +547,22 @@ module URI
493
547
  @parse_result[:fragment] = nil
494
548
  @state = :fragment_state
495
549
  elsif c == " "
496
- if rest.start_with?("?", "#")
497
- @parse_result[:opaque] += "%20"
550
+ first_of_rest = @input_chars[@pos + 1]
551
+ if first_of_rest == "?" || first_of_rest == "#"
552
+ @parse_result[:opaque] << "%20"
498
553
  else
499
- @parse_result[:opaque] += " "
554
+ @parse_result[:opaque] << " "
500
555
  end
501
556
  elsif !c.nil?
502
- @parse_result[:opaque] += percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET, @encoding)
557
+ @parse_result[:opaque] << utf8_percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET)
503
558
  end
504
559
  end
505
560
 
506
561
  def query_state(c)
507
- if @encoding != Encoding::UTF_8 && (!special_url? || %w[ws wss].include?(@parse_result[:scheme]))
508
- @encoding = Encoding::UTF_8
509
- end
510
-
511
562
  if c.nil? || (!@state_override && c == "#")
512
- query_percent_encode_set = special_url? ? SPECIAL_QUERY_PERCENT_ENCODE_SET : QUERY_PERCENT_ENCODE_SET
513
- @parse_result[:query] = @buffer.chars.map { |c| percent_encode(c, query_percent_encode_set, @encoding) }.join
563
+ query_percent_encode_set = @special_url ? SPECIAL_QUERY_PERCENT_ENCODE_SET : QUERY_PERCENT_ENCODE_SET
564
+ # TODO: We need to consider encoding here.
565
+ @parse_result[:query] = utf8_percent_encode_string(@buffer, query_percent_encode_set)
514
566
  @buffer.clear
515
567
  @state = :fragment_state if c == "#"
516
568
  elsif !c.nil?
@@ -520,7 +572,7 @@ module URI
520
572
 
521
573
  def fragment_state(c)
522
574
  return if c.nil?
523
- @parse_result[:fragment] = @parse_result[:fragment].to_s + percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET, @encoding)
575
+ (@parse_result[:fragment] ||= +"") << utf8_percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET)
524
576
  end
525
577
 
526
578
  def windows_drive_letter?(str)
@@ -554,11 +606,11 @@ module URI
554
606
  end
555
607
 
556
608
  def includes_credentials?
557
- !@parse_result[:userinfo].nil? || (@username && !@username.empty?) || (@password && !@password.empty?)
609
+ (@username && !@username.empty?) || (@password && !@password.empty?)
558
610
  end
559
611
 
560
612
  def rest
561
- @input[@pos+1..]
613
+ @input_chars[@pos + 1..]&.join
562
614
  end
563
615
 
564
616
  def convert_to_uri(uri)
@@ -571,6 +623,21 @@ module URI
571
623
  "bad argument (expected URI object or URI string)"
572
624
  end
573
625
  end
626
+
627
+ if RUBY_VERSION >= "4.0"
628
+ def remove_c0_control_or_space!(str)
629
+ if /[\u0000-\u0020]/.match?(str)
630
+ str.strip!("\u0000-\u0020")
631
+ end
632
+ end
633
+ else
634
+ def remove_c0_control_or_space!(str)
635
+ if /[\u0000-\u0020]/.match?(str)
636
+ str.sub!(/\A[\u0000-\u0020]*/, "")
637
+ str.sub!(/[\u0000-\u0020]*\z/, "")
638
+ end
639
+ end
640
+ end
574
641
  end
575
642
 
576
643
  WHATWG_PARSER = URI::WhatwgParser.new
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uri-whatwg_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuji Yaginuma
@@ -37,20 +37,6 @@ dependencies:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
39
  version: '0'
40
- - !ruby/object:Gem::Dependency
41
- name: debug
42
- requirement: !ruby/object:Gem::Requirement
43
- requirements:
44
- - - ">="
45
- - !ruby/object:Gem::Version
46
- version: '0'
47
- type: :development
48
- prerelease: false
49
- version_requirements: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - ">="
52
- - !ruby/object:Gem::Version
53
- version: '0'
54
40
  email:
55
41
  - yuuji.yaginuma@gmail.com
56
42
  executables: []