uri-whatwg_parser 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ae0566e4902f0c52f40969d4b55ae40ac98b075bae062d8167895e847e91854
4
- data.tar.gz: 1c9c1a764beb16e6719822138df7ef36fe5f02423052b4f436ae164a70eb7cab
3
+ metadata.gz: 7e3c073b711f9600fd66938070b1cc254a1067b684437f27038a69bc65076c9c
4
+ data.tar.gz: fe833972e0fe8265958d94a97b127983bca064cd29f554c78fb1c2459be008f9
5
5
  SHA512:
6
- metadata.gz: d6dcb5018a93cd1f9a17ed84e5d409a0d6413efd405e50ea80f46169ad4b1970dd3e8732da209171c44a2f1a672f7e12b6914f1c5efa95ed882644a4e86393c1
7
- data.tar.gz: 69e9afe795ab64751158bf69665de207649119c42f46a1aee9f34faea73a3756d1d4f4c2b5ffd22eab159c932056fbf1820f6dfc2855015d57dfb0c83910185b
6
+ metadata.gz: 541f8b1b1d02b2ac2f16b4af59e5fe3b02e01b2f291529b2698590d01dfc6e1da528926920af1dad8c2d21a11437030356859b30d6e972d2fd3e54aad04cd89a
7
+ data.tar.gz: 639c0241664afd68d4f0f4c2cea36e71f484cb53215fc12bb89e65749626e1769e9a777420201468e7863a624afe62a0bb8839776a9e565252a35211c845dbf4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.3.0
2
+
3
+ * Fix several incorrect parsing processes
4
+ * Fix serialize URL
5
+
6
+ ## 0.2.1
7
+
8
+ * Improve the performance of `parse`
9
+
1
10
  ## 0.2.0
2
11
 
3
12
  * Fix setter methods compliant with WHATWG URL Living Standard
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Ruby implementation of the [WHATWG URL Living Standard](https://url.spec.whatwg.org/).
4
4
 
5
- The latest revision that this package implements of the standard is ([30 October 2025](https://url.spec.whatwg.org/commit-snapshots/52526653e848c5a56598c84aa4bc8ac9025fb66b/)).
5
+ The latest revision that this package implements of the standard is [14 April 2026](https://url.spec.whatwg.org/commit-snapshots/b11d73b8caefe90403afe19210db05acba897722/)
6
6
 
7
7
  ## Installation
8
8
 
@@ -3,16 +3,11 @@ require "uri/generic"
3
3
  module URI
4
4
  class WhatwgParser
5
5
  module Generic
6
- def initialize(scheme,
7
- userinfo, host, port, registry,
8
- path, opaque,
9
- query,
10
- fragment,
11
- parser = DEFAULT_PARSER,
12
- arg_check = false)
13
-
14
- return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
15
- return super if registry
6
+ def initialize(scheme, userinfo, host, port, registry, path, opaque, query, fragment, parser = DEFAULT_PARSER, arg_check = false)
7
+ @parsed_by_whatwg_parser = parser.is_a?(URI::WhatwgParser)
8
+ unless parser.is_a?(URI::WhatwgParser)
9
+ return super(scheme, userinfo, host, port, registry, path, opaque, query, fragment)
10
+ end
16
11
 
17
12
  @scheme = nil
18
13
  @user = nil
@@ -20,6 +15,7 @@ module URI
20
15
  @host = nil
21
16
  @port = nil
22
17
  @path = nil
18
+ @raw_path = nil
23
19
  @query = nil
24
20
  @opaque = nil
25
21
  @fragment = nil
@@ -32,64 +28,67 @@ module URI
32
28
  self.set_path(path)
33
29
  self.query = query
34
30
  self.set_opaque(opaque)
35
- self.fragment=(fragment)
31
+ @fragment = fragment
32
+ @raw_path = parser&.path
36
33
 
37
34
  self.set_path("") if !@path && !@opaque
38
- DEFAULT_PARSER.parse(to_s) if arg_check
35
+ parser.parse(to_s) if arg_check
39
36
 
40
37
  @scheme&.freeze
41
38
  self.set_port(self.default_port) if self.default_port && !@port
42
39
  end
43
40
 
44
41
  def merge(oth)
45
- URI::DEFAULT_PARSER.join(self.to_s, oth.to_s)
42
+ return super unless @parsed_by_whatwg_parser
43
+
44
+ parser.join(self.to_s, oth.to_s)
46
45
  end
47
46
  alias + merge
48
47
 
49
48
  def scheme=(v)
50
- return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
49
+ return super unless @parsed_by_whatwg_parser
51
50
  return if v.nil? || v.empty?
52
51
 
53
- parse_result = URI::DEFAULT_PARSER.split("#{v}:", url: self, state_override: :scheme_start_state)
52
+ parse_result = parser.split("#{v}:", url: self, state_override: :scheme_start_state)
54
53
  set_scheme(parse_result[0])
55
54
  set_port(parse_result[3])
56
55
  end
57
56
 
58
57
  def user=(v)
59
- return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
58
+ return super unless @parsed_by_whatwg_parser
60
59
  return v unless v
61
60
 
62
61
  if host.nil? || host.empty? || scheme == "file"
63
62
  raise InvalidURIError, "cannot set user when host is nil or file schme"
64
63
  end
65
- set_user(URI::DEFAULT_PARSER.encode_userinfo(v))
64
+ set_user(parser.utf8_percent_encode_string(v, URI::WhatwgParser::USERINFO_PERCENT_ENCODE_SET))
66
65
  end
67
66
 
68
67
  def password=(v)
69
- return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
68
+ return super unless @parsed_by_whatwg_parser
70
69
  return v unless v
71
70
 
72
71
  if host.nil? || host.empty? || scheme == "file"
73
72
  raise InvalidURIError, "cannot set password when host is nil or file schme"
74
73
  end
75
- set_password(URI::DEFAULT_PARSER.encode_userinfo(v))
74
+ set_password(parser.utf8_percent_encode_string(v, URI::WhatwgParser::USERINFO_PERCENT_ENCODE_SET))
76
75
  end
77
76
 
78
77
  def host=(v)
79
- return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
78
+ return super unless @parsed_by_whatwg_parser
80
79
  return if v.nil?
81
80
 
82
81
  if @opaque
83
- raise InvalidURIError, "cannot set host with registry or opaque"
82
+ raise InvalidURIError, "cannot set host with opaque"
84
83
  end
85
84
 
86
- parse_result = URI::DEFAULT_PARSER.split(v.to_s, url: self, state_override: :host_state)
85
+ parse_result = parser.split(v.to_s, url: self, state_override: :host_state)
87
86
  set_host(parse_result[2])
88
87
  set_port(parse_result[3])
89
88
  end
90
89
 
91
90
  def port=(v)
92
- return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
91
+ return super unless @parsed_by_whatwg_parser
93
92
  return if v.nil?
94
93
 
95
94
  if v.to_s.empty?
@@ -101,24 +100,55 @@ module URI
101
100
  raise InvalidURIError, "cannot set port when host is nil or scheme is file"
102
101
  end
103
102
 
104
- parse_result = URI::DEFAULT_PARSER.split("#{v}:", url: self, state_override: :port_state)
103
+ parse_result = parser.split("#{v}:", url: self, state_override: :port_state)
105
104
  set_port(parse_result[3])
106
105
  end
107
106
 
108
107
  def path=(v)
109
- return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
108
+ return super unless @parsed_by_whatwg_parser
110
109
  return if v.nil?
111
110
 
112
111
  if @opaque
113
112
  raise InvalidURIError, "path conflicts with opaque"
114
113
  end
115
114
 
116
- parse_result = URI::DEFAULT_PARSER.split(v.to_s, url: self, state_override: :path_start_state)
115
+ parse_result = parser.split(v.to_s, url: self, state_override: :path_start_state)
116
+ @raw_path = parser.path
117
117
  set_path(parse_result[5])
118
118
  end
119
119
 
120
+ def query=(v)
121
+ return super unless @parsed_by_whatwg_parser
122
+
123
+ if v.nil? || v.empty?
124
+ @query = nil
125
+ return
126
+ end
127
+
128
+ v = v.start_with?("?") ? v[1..-1] : v
129
+ @query = +""
130
+
131
+ parse_result = parser.split(v, url: self, state_override: :query_state)
132
+ @query = parse_result[7].to_s
133
+ end
134
+
135
+ def fragment=(v)
136
+ return super unless @parsed_by_whatwg_parser
137
+
138
+ if v.nil? || v.empty?
139
+ @fragment = nil
140
+ return
141
+ end
142
+
143
+ v = v.start_with?("#") ? v[1..-1] : v
144
+ @fragment = +""
145
+
146
+ parse_result = parser.split(v, url: self, state_override: :fragment_state)
147
+ @fragment = parse_result[8].to_s
148
+ end
149
+
120
150
  def userinfo=(userinfo)
121
- return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
151
+ return super unless @parsed_by_whatwg_parser
122
152
 
123
153
  user, password = split_userinfo(userinfo)
124
154
  self.user = user
@@ -126,17 +156,59 @@ module URI
126
156
  end
127
157
 
128
158
  def check_opaque(v)
129
- return super unless URI::DEFAULT_PARSER.is_a?(URI::WhatwgParser)
159
+ return super unless @parsed_by_whatwg_parser
160
+
130
161
  return v unless v
131
162
 
132
- if @host || @port || @user || @path
133
- raise InvalidURIError, "cannot set opaque with host, port, userinfo or path"
163
+ if @host || @port || @user
164
+ raise InvalidURIError, "cannot set opaque with host, port, or userinfo"
134
165
  end
135
166
 
136
167
  self.set_opaque(v)
137
- DEFAULT_PARSER.parse(to_s)
168
+ # NOTE: WHATWG URL Living Standard doesn't define "opaque" setter. So parse a URL whole.
169
+ parser.parse(to_s)
138
170
  true
139
171
  end
172
+
173
+ def to_s
174
+ return super unless @parsed_by_whatwg_parser
175
+
176
+ str = "".dup
177
+ if @scheme
178
+ str << @scheme
179
+ str << ":"
180
+ end
181
+
182
+ if @host || %w[file postgres].include?(@scheme)
183
+ str << "//"
184
+ end
185
+ if self.userinfo
186
+ str << self.userinfo
187
+ str << "@"
188
+ end
189
+ if @host
190
+ str << @host
191
+ end
192
+ if @port && @port != self.default_port
193
+ str << ":"
194
+ str << @port.to_s
195
+ end
196
+ if @host.nil? && @opaque.nil? && @raw_path && @raw_path.length > 1 && @raw_path[0] == ""
197
+ str << "/."
198
+ end
199
+ str << @path if @path
200
+ str << @opaque if @opaque
201
+ if @query
202
+ str << "?"
203
+ str << @query
204
+ end
205
+
206
+ if @fragment
207
+ str << "#"
208
+ str << @fragment
209
+ end
210
+ str
211
+ end
140
212
  end
141
213
  end
142
214
  end
@@ -7,8 +7,10 @@ class URI::WhatwgParser
7
7
  class HostParser
8
8
  include ParserHelper
9
9
 
10
- FORBIDDEN_HOST_CODE_POINT = ["\x00", "\t", "\x0a", "\x0d", " ", "#", "/", ":", "<", ">", "?", "@", "[", "\\", "]", "^", "|"]
11
- FORBIDDEN_DOMAIN_CODE_POINT = FORBIDDEN_HOST_CODE_POINT + C0_CONTROL_PERCENT_ENCODE_SET + ["%", "\x7f"]
10
+ FORBIDDEN_HOST_CODE_POINT = Set["\x00", "\t", "\x0a", "\x0d", " ", "#", "/", ":", "<", ">", "?", "@", "[", "\\", "]", "^", "|"]
11
+ FORBIDDEN_DOMAIN_CODE_POINT = FORBIDDEN_HOST_CODE_POINT | C0_CONTROL_PERCENT_ENCODE_SET | Set["%", "\x7f"]
12
+ FORBIDDEN_HOST_REGEX = Regexp.union(FORBIDDEN_HOST_CODE_POINT.to_a)
13
+ FORBIDDEN_DOMAIN_REGEX = Regexp.union(FORBIDDEN_DOMAIN_CODE_POINT.to_a)
12
14
 
13
15
  def parse(input, opaque = false) # :nodoc:
14
16
  return "" if input&.empty?
@@ -39,8 +41,7 @@ class URI::WhatwgParser
39
41
  raise URI::WhatwgParser::ParseError, "invalid IPv4 format" if parts.size > 4
40
42
  numbers = []
41
43
  parts.each do |part|
42
- value, _validation_error = parse_ipv4_number(part)
43
- numbers << value
44
+ numbers << parse_ipv4_number(part)
44
45
  end
45
46
 
46
47
  (numbers.size-1).times {|i| raise URI::WhatwgParser::ParseError, "invalid IPv4 format" if numbers[i] > 255 }
@@ -191,7 +192,7 @@ class URI::WhatwgParser
191
192
 
192
193
  def parse_opaque_host(host)
193
194
  raise ParseError if include_forbidden_host_code_point?(host)
194
- host.chars.map { |c| percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET) }.join
195
+ host.chars.map { |c| utf8_percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET) }.join
195
196
  end
196
197
 
197
198
  def percent_decode(str)
@@ -201,51 +202,63 @@ class URI::WhatwgParser
201
202
  end
202
203
 
203
204
  def ends_in_number?(domain)
204
- parts = domain.split(".", -1)
205
- if parts.last == ""
206
- return false if parts.size == 1
207
- parts.pop
205
+ return false if domain.empty?
206
+
207
+ if domain.end_with?(".")
208
+ # Remove trailing dot and find the actual last segment
209
+ domain_without_trailing = domain[0...-1]
210
+ return false if domain_without_trailing.empty?
211
+
212
+ last_dot = domain_without_trailing.rindex(".")
213
+ last = last_dot ? domain_without_trailing[last_dot + 1..-1] : domain_without_trailing
214
+ else
215
+ # Find the last segment after the last dot
216
+ last_dot = domain.rindex(".")
217
+ last = last_dot ? domain[last_dot + 1..-1] : domain
208
218
  end
209
219
 
210
- last = parts.last
211
- return true if last != "" && last.chars.all? { |c| ascii_digit?(c) }
220
+ return false if last.empty?
221
+ return true if last.match?(/\A\d+\z/)
212
222
 
213
- begin
214
- parse_ipv4_number(last)
215
- rescue ParseError
216
- return false
223
+ if last.start_with?("0x", "0X")
224
+ hex = last[2..-1] || ""
225
+ return true if hex.empty? || hex.match?(/\A[0-9A-Fa-f]+\z/)
217
226
  end
218
227
 
219
- true
228
+ false
220
229
  end
221
230
 
222
231
  def parse_ipv4_number(str)
223
232
  raise ParseError, "invalid IPv4 format" if str&.empty?
224
233
 
225
- validation_error = false
226
234
  r = 10
227
235
 
228
236
  if str.size >= 2 && str.start_with?("0x", "0X")
229
- validation_error = true
230
237
  str = str[2..-1]
231
238
  r = 16
232
239
  elsif str.size >= 2 && str.start_with?("0")
233
- validation_error = true
234
240
  str = str[1..-1]
235
241
  r = 8
236
242
  end
237
243
 
238
- return 0, true if str.empty?
244
+ return 0 if str.empty?
239
245
 
240
246
  begin
241
- output = Integer(str, r)
242
- return output, validation_error
247
+ Integer(str, r)
243
248
  rescue ArgumentError
244
249
  raise ParseError, "invalid IPv4 format"
245
250
  end
246
251
  end
247
252
 
248
253
  def domain_to_ascii(domain)
254
+ # If domain is already ASCII-only, lowercase, and doesn't contain punycode prefix
255
+ # we can skip IDNA processing
256
+ if domain.ascii_only? && domain == domain.downcase && !domain.include?("xn--")
257
+ raise ParseError, "including invalid value in host" if include_forbidden_domain_code_point?(domain)
258
+ raise ParseError, "host can't be empty" if domain.empty?
259
+ return domain
260
+ end
261
+
249
262
  ascii_domain = URI::IDNA.whatwg_to_ascii(domain.force_encoding(Encoding::UTF_8), be_strict: false)
250
263
 
251
264
  raise ParseError, "including invalid value in host" if include_forbidden_domain_code_point?(ascii_domain)
@@ -255,11 +268,11 @@ class URI::WhatwgParser
255
268
  end
256
269
 
257
270
  def include_forbidden_domain_code_point?(str)
258
- FORBIDDEN_DOMAIN_CODE_POINT.any? {|c| str.include?(c) }
271
+ str.match?(FORBIDDEN_DOMAIN_REGEX)
259
272
  end
260
273
 
261
274
  def include_forbidden_host_code_point?(str)
262
- FORBIDDEN_HOST_CODE_POINT.any? {|c| str.include?(c) }
275
+ str.match?(FORBIDDEN_HOST_REGEX)
263
276
  end
264
277
  end
265
278
  end
@@ -1,31 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "set"
4
+
3
5
  class URI::WhatwgParser
4
6
  module ParserHelper
5
- C0_CONTROL_PERCENT_ENCODE_SET = (0..0x1f).map(&:chr)
6
- ASCII_ALPHA = ("a".."z").to_a + ("A".."Z").to_a
7
- ASCII_DIGIT = ("0".."9").to_a
8
-
9
- def ascii_alpha?(c)
10
- ASCII_ALPHA.include?(c)
11
- end
12
-
13
- def ascii_alphanumerica?(c)
14
- ascii_alpha?(c) || ascii_digit?(c)
15
- end
7
+ # NOTE: This set isn't accurate, but it's OK now because greater than `0x7e` is checked inside a method.
8
+ C0_CONTROL_PERCENT_ENCODE_SET = Set.new((0..0x1f).map(&:chr))
16
9
 
17
- def ascii_digit?(c)
18
- ASCII_DIGIT.include?(c)
19
- end
20
-
21
- def percent_encode(c, encode_set, encoding = Encoding::UTF_8)
10
+ def utf8_percent_encode(c, encode_set)
22
11
  return c unless encode_set.include?(c) || c.ord > 0x7e
23
12
 
24
13
  # For ASCII single-byte characters
25
14
  return "%%%02X" % c.ord if c.bytesize == 1
26
15
 
27
- bytes = c.encoding == encoding ? c.bytes : c.encode(encoding).bytes
28
- bytes.map { |b| "%%%02X" % b }.join
16
+ c.bytes.map { |b| "%%%02X" % b }.join
17
+ end
18
+
19
+ def utf8_percent_encode_string(str, encode_set)
20
+ str.chars.map { |c| utf8_percent_encode(c, encode_set) }.join
29
21
  end
30
22
  end
31
23
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module URI
4
4
  class WhatwgParser
5
- VERSION = "0.2.0"
5
+ VERSION = "0.3.0"
6
6
  end
7
7
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "set"
3
4
  require "uri"
4
5
  require_relative "whatwg_parser/error"
5
6
  require_relative "whatwg_parser/version"
@@ -13,19 +14,30 @@ module URI
13
14
 
14
15
  SPECIAL_SCHEME = { "ftp" => 21, "file" => nil, "http" => 80, "https" => 443, "ws" => 80, "wss" => 443 }
15
16
 
16
- FRAGMENT_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET + [" ", "\"", "<", ">", "`"]
17
- QUERY_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET + [" ", "\"", "#", "<", ">"]
18
- SPECIAL_QUERY_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET + ["'"]
19
- PATH_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET + ["?", "^", "`", "{", "}"]
20
- USERINFO_PERCENT_ENCODE_SET = PATH_PERCENT_ENCODE_SET + ["/", ":", ";", "=","@", "[", "\\", "]", "|"]
17
+ FRAGMENT_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET | Set[" ", "\"", "<", ">", "`"]
18
+ QUERY_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET | Set[" ", "\"", "#", "<", ">"]
19
+ SPECIAL_QUERY_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET | Set["'"]
20
+ PATH_PERCENT_ENCODE_SET = QUERY_PERCENT_ENCODE_SET | Set["?", "^", "`", "{", "}"]
21
+ USERINFO_PERCENT_ENCODE_SET = PATH_PERCENT_ENCODE_SET | Set["/", ":", ";", "=", "@", "[", "\\", "]", "|"]
21
22
 
22
- SINGLE_DOT_PATH_SEGMENTS = [".", "%2e", "%2E"]
23
- DOUBLE_DOT_PATH_SEGMENTS = ["..", ".%2e", ".%2E", "%2e.", "%2e%2e", "%2e%2E", "%2E.", "%2E%2e", "%2E%2E"]
23
+ SINGLE_DOT_PATH_SEGMENTS = Set[".", "%2e", "%2E"]
24
+ DOUBLE_DOT_PATH_SEGMENTS = Set["..", ".%2e", ".%2E", "%2e.", "%2e%2e", "%2e%2E", "%2E.", "%2E%2e", "%2E%2E"]
24
25
 
25
26
  WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:|])\\z")
26
27
  NORMALIZED_WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:])\\z")
27
28
  STARTS_WITH_WINDOWS_DRIVE_LETTER = Regexp.new("\\A([a-zA-Z][:|])(?:[/\\?#])?\\z")
28
29
 
30
+ VALID_SIGNS_FOR_SCHEME = Set["+", "-", "."]
31
+ DELIMITER_SIGNS = Set["/", "?", "#"]
32
+
33
+ WS_SCHEMES = Set["ws", "wss"]
34
+
35
+ ASCII_ALPHA_LOWERCASE = Set.new(("a".."z").to_a)
36
+ ASCII_ALPHA_UPPERCASE = Set.new(("A".."Z").to_a)
37
+ ASCII_DIGIT = Set.new(("0".."9").to_a)
38
+
39
+ attr_reader :path
40
+
29
41
  def initialize
30
42
  reset
31
43
  @host_parser = HostParser.new
@@ -35,24 +47,27 @@ module URI
35
47
  {}
36
48
  end
37
49
 
38
- def parse(input, base: nil, encoding: Encoding::UTF_8, url: nil, state_override: nil) # :nodoc:
39
- URI.for(*self.split(input, base: base, encoding: encoding, url: url, state_override: state_override))
50
+ def parse(input, base: nil, url: nil, state_override: nil) # :nodoc:
51
+ URI.for(*self.split(input, base: base, url: url, state_override: state_override), self)
40
52
  end
41
53
 
42
- def split(input, base: nil, encoding: Encoding::UTF_8, url: nil, state_override: nil) # :nodoc:
54
+ def split(input, base: nil, url: nil, state_override: nil) # :nodoc:
43
55
  reset
44
56
  @base = nil
45
57
  if base != nil
46
- ary = split(base, base: nil, encoding: encoding)
47
- @base = { scheme: ary[0], userinfo: ary[1], host: ary[2], port: ary[3], registry: ary[4], path: ary[5], opaque: ary[6], query: ary[7], fragment: ary[8]}
48
- @base_paths = @paths
58
+ ary = split(base, base: nil)
59
+ @base = { scheme: ary[0], userinfo: ary[1], host: ary[2], port: ary[3], query: ary[7], fragment: ary[8]}
60
+ @base_path = @path
49
61
  reset
50
62
  end
51
63
 
52
64
  if url
53
65
  raise ArgumentError, "bad argument (expected URI object)" unless url.is_a?(URI::Generic)
54
66
  @parse_result.merge!(url.component.zip(url.send(:component_ary)).to_h)
55
- @parse_result[:path] = nil
67
+ @username = url.user
68
+ @password = url.password
69
+ @parse_result.delete(:userinfo)
70
+ @special_url = special_url?(@parse_result[:scheme])
56
71
  end
57
72
 
58
73
  if state_override
@@ -63,30 +78,33 @@ module URI
63
78
  raise ParseError, "uri can't be empty" if (input.nil? || input.empty?) && @base.nil?
64
79
  end
65
80
 
66
- @encoding = encoding
67
- @input = input.dup
81
+ input = input.dup
68
82
 
69
83
  unless url
70
- @input.sub!(/\A[\u0000-\u0020]*/, "")
71
- @input.sub!(/[\u0000-\u0020]*\z/, "")
84
+ remove_c0_control_or_space!(input)
72
85
  end
73
86
 
74
- @input.delete!("\t")
75
- @input.delete!("\n")
76
- @input.delete!("\r")
87
+ input.delete!("\t\n\r") if /[\t\n\r]/.match?(input)
77
88
 
89
+ @input_chars = input.chars
90
+ input_chars_length = @input_chars.length
78
91
  @pos = 0
79
92
 
80
- while @pos <= @input.length
81
- c = @input[@pos]
82
- ret = send(@state, c)
83
- break if ret == :terminate
93
+ while @pos <= input_chars_length
94
+ dispatch_state(@input_chars[@pos])
95
+ break if @terminate
84
96
  @pos += 1
85
97
  end
86
98
 
87
- @parse_result[:userinfo] = [@username, @password].compact.reject(&:empty?).join(":")
88
- @parse_result[:path] = "/#{@paths.join("/")}" if @paths && !@paths.empty?
89
- @parse_result.values
99
+ userinfo = [@username, @password].compact.reject(&:empty?).join(":")
100
+ if @path
101
+ if @path.is_a?(Array)
102
+ path = "/#{@path.join("/")}"
103
+ else
104
+ opaque = @path
105
+ end
106
+ end
107
+ [@parse_result[:scheme], userinfo, @parse_result[:host], @parse_result[:port], nil, path, opaque, @parse_result[:query], @parse_result[:fragment]]
90
108
  end
91
109
 
92
110
  def join(*uris)
@@ -101,29 +119,53 @@ module URI
101
119
  uri
102
120
  end
103
121
 
104
- def encode_userinfo(str)
105
- str.chars.map do |char|
106
- percent_encode(char, USERINFO_PERCENT_ENCODE_SET)
107
- end.join
108
- end
109
-
110
122
  private
111
123
 
124
+ def dispatch_state(c)
125
+ case @state
126
+ when :scheme_start_state then scheme_start_state(c)
127
+ when :scheme_state then scheme_state(c)
128
+ when :no_scheme_state then no_scheme_state(c)
129
+ when :special_relative_or_authority_state then special_relative_or_authority_state(c)
130
+ when :path_or_authority_state then path_or_authority_state(c)
131
+ when :relative_state then relative_state(c)
132
+ when :relative_slash_state then relative_slash_state(c)
133
+ when :special_authority_slashes_state then special_authority_slashes_state(c)
134
+ when :special_authority_ignore_slashes_state then special_authority_ignore_slashes_state(c)
135
+ when :authority_state then authority_state(c)
136
+ when :host_state then host_state(c)
137
+ when :port_state then port_state(c)
138
+ when :file_state then file_state(c)
139
+ when :file_slash_state then file_slash_state(c)
140
+ when :file_host_state then file_host_state(c)
141
+ when :path_start_state then path_start_state(c)
142
+ when :path_state then path_state(c)
143
+ when :opaque_path_state then opaque_path_state(c)
144
+ when :query_state then query_state(c)
145
+ when :fragment_state then fragment_state(c)
146
+ end
147
+ end
148
+
112
149
  def reset
113
150
  @buffer = +""
114
151
  @at_sign_seen = nil
115
152
  @password_token_seen = nil
116
153
  @inside_brackets = nil
117
- @paths = nil
154
+ @path = nil
118
155
  @username = nil
119
156
  @password = nil
120
- @parse_result = { scheme: nil, userinfo: nil, host: nil, port: nil, registry: nil, path: nil, opaque: nil, query: nil, fragment: nil }
157
+ @parse_result = {}
121
158
  @state_override = nil
122
159
  @state = :scheme_start_state
160
+ @special_url = nil
161
+ @terminate = nil
123
162
  end
124
163
 
125
164
  def scheme_start_state(c)
126
- if ascii_alpha?(c)
165
+ if ASCII_ALPHA_LOWERCASE.include?(c)
166
+ @buffer << c
167
+ @state = :scheme_state
168
+ elsif ASCII_ALPHA_UPPERCASE.include?(c)
127
169
  @buffer << c.downcase
128
170
  @state = :scheme_state
129
171
  elsif @state_override.nil?
@@ -135,7 +177,9 @@ module URI
135
177
  end
136
178
 
137
179
  def scheme_state(c)
138
- if ascii_alphanumerica?(c) || ["+", "-", "."].include?(c)
180
+ if ASCII_ALPHA_LOWERCASE.include?(c) || ASCII_DIGIT.include?(c) || VALID_SIGNS_FOR_SCHEME.include?(c)
181
+ @buffer << c
182
+ elsif ASCII_ALPHA_UPPERCASE.include?(c)
139
183
  @buffer << c.downcase
140
184
  elsif c == ":"
141
185
  if @state_override
@@ -143,17 +187,20 @@ module URI
143
187
  (!special_url? && special_url?(@buffer)) ||
144
188
  ((includes_credentials? || !@parse_result[:port].nil?) && @buffer == "file") ||
145
189
  (@parse_result[:scheme] == "file" && @parse_result[:host]&.empty?)
146
- return :terminate
190
+ @terminate = true
191
+ return
147
192
  end
148
193
  end
149
194
 
150
195
  @parse_result[:scheme] = @buffer
196
+ @special_url = special_url?(@buffer)
151
197
 
152
198
  if @state_override
153
199
  if SPECIAL_SCHEME.value?(@parse_result[:port].to_i)
154
200
  @parse_result[:port] = nil
155
201
  end
156
- return :terminate
202
+ @terminate = true
203
+ return
157
204
  end
158
205
 
159
206
  @buffer = +""
@@ -164,11 +211,11 @@ module URI
164
211
  @state = :special_relative_or_authority_state
165
212
  elsif special_url?
166
213
  @state = :special_authority_slashes_state
167
- elsif rest.start_with?("/")
214
+ elsif @input_chars[@pos + 1] == "/"
168
215
  @state = :path_or_authority_state
169
216
  @pos += 1
170
217
  else
171
- @parse_result[:opaque] = ""
218
+ @path = +""
172
219
  @state = :opaque_path_state
173
220
  end
174
221
  elsif @state_override.nil?
@@ -181,11 +228,12 @@ module URI
181
228
  end
182
229
 
183
230
  def no_scheme_state(c)
184
- raise ParseError, "scheme is missing" if @base.nil? || (!@base[:opaque].nil? && c != "#")
231
+ raise ParseError, "scheme is missing" if @base.nil? || (has_opaque_path?(@base_path) && c != "#")
185
232
 
186
- if !@base[:opaque].nil? && c == "#"
233
+ if has_opaque_path?(@base_path) && c == "#"
187
234
  @parse_result[:scheme] = @base[:scheme]
188
- @paths = @base_paths
235
+ @special_url = special_url?(@base[:scheme])
236
+ @path = @base_path
189
237
  @parse_result[:query] = @base[:query]
190
238
  @parse_result[:fragment] = nil
191
239
  @state = :fragment_state
@@ -199,7 +247,7 @@ module URI
199
247
  end
200
248
 
201
249
  def special_relative_or_authority_state(c)
202
- if c == "/" && rest.start_with?("/")
250
+ if c == "/" && @input_chars[@pos + 1] == "/"
203
251
  @state = :special_authority_ignore_slashes_state
204
252
  @pos -= 1
205
253
  else
@@ -219,6 +267,7 @@ module URI
219
267
 
220
268
  def relative_state(c)
221
269
  @parse_result[:scheme] = @base[:scheme]
270
+ @special_url = special_url?(@base[:scheme])
222
271
  if c == "/"
223
272
  @state = :relative_slash_state
224
273
  elsif special_url? && c == "\\"
@@ -227,7 +276,7 @@ module URI
227
276
  @username, @password = @base[:userinfo].split(":") if @base[:userinfo]
228
277
  @parse_result[:host] = @base[:host]
229
278
  @parse_result[:port] = @base[:port]
230
- @paths = @base_paths
279
+ @path = @base_path
231
280
  @parse_result[:query] = @base[:query]
232
281
 
233
282
  if c == "?"
@@ -246,7 +295,7 @@ module URI
246
295
  end
247
296
 
248
297
  def relative_slash_state(c)
249
- if special_url? && (c == "/" || c == "\\")
298
+ if @special_url && (c == "/" || c == "\\")
250
299
  @state = :special_authority_ignore_slashes_state
251
300
  elsif c == "/"
252
301
  @state = :authority_state
@@ -260,7 +309,7 @@ module URI
260
309
  end
261
310
 
262
311
  def special_authority_slashes_state(c)
263
- if c == "/" && rest.start_with?("/")
312
+ if c == "/" && @input_chars[@pos + 1] == "/"
264
313
  @state = :special_authority_ignore_slashes_state
265
314
  @pos += 1
266
315
  else
@@ -280,23 +329,23 @@ module URI
280
329
  if c == "@"
281
330
  @buffer.prepend("%40") if @at_sign_seen
282
331
  @at_sign_seen = true
283
- @buffer.chars.each do |char|
332
+ @buffer.each_char do |char|
284
333
  if char == ":" && !@password_token_seen
285
334
  @password_token_seen = true
286
335
  next
287
336
  end
288
337
 
289
- encoded_char = percent_encode(char, USERINFO_PERCENT_ENCODE_SET, @encoding)
338
+ encoded_char = utf8_percent_encode(char, USERINFO_PERCENT_ENCODE_SET)
290
339
 
291
340
  if @password_token_seen
292
- @password = @password.to_s + encoded_char
341
+ (@password ||= +"") << encoded_char
293
342
  else
294
- @username = @username.to_s + encoded_char
343
+ (@username ||= +"") << encoded_char
295
344
  end
296
345
  end
297
346
 
298
347
  @buffer.clear
299
- elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
348
+ elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
300
349
  raise ParseError, "host is missing" if @at_sign_seen && @buffer.empty?
301
350
 
302
351
  @pos -= (@buffer.size + 1)
@@ -315,20 +364,23 @@ module URI
315
364
  raise ParseError, "host is missing" if @buffer.empty?
316
365
  raise ParseError, "invalid host" if @state_override && @state_override == :hostname_state
317
366
 
318
- @parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
367
+ @parse_result[:host] = @host_parser.parse(@buffer, !@special_url)
319
368
  @buffer.clear
320
369
  @state = :port_state
321
- elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
370
+ elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
322
371
  @pos -= 1
323
- if special_url? && @buffer.empty?
372
+ if @special_url && @buffer.empty?
324
373
  raise ParseError, "host is missing"
325
374
  elsif @state_override && @buffer.empty? && (includes_credentials? || !@parse_result[:port].nil?)
326
375
  raise ParseError, "invalid host"
327
376
  else
328
- @parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
377
+ @parse_result[:host] = @host_parser.parse(@buffer, !@special_url)
329
378
  @buffer.clear
330
379
  @state = :path_start_state
331
- return :terminate if @state_override
380
+ if @state_override
381
+ @terminate = true
382
+ return
383
+ end
332
384
  end
333
385
  else
334
386
  @inside_brackets = true if c == "["
@@ -338,9 +390,9 @@ module URI
338
390
  end
339
391
 
340
392
  def port_state(c)
341
- if ascii_digit?(c)
393
+ if ASCII_DIGIT.include?(c)
342
394
  @buffer << c
343
- elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\") || @state_override
395
+ elsif c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\") || @state_override
344
396
  unless @buffer.empty?
345
397
  port = Integer(@buffer, 10)
346
398
  raise ParseError, "port is invalid value" if port < 0 || port > 65535
@@ -351,7 +403,10 @@ module URI
351
403
  end
352
404
 
353
405
  @buffer.clear
354
- return :terminate if @state_override
406
+ if @state_override
407
+ @terminate = true
408
+ return
409
+ end
355
410
  end
356
411
 
357
412
  raise ParseError, "port is invalid value" if @state_override
@@ -364,6 +419,7 @@ module URI
364
419
 
365
420
  def file_state(c)
366
421
  @parse_result[:scheme] = "file"
422
+ @special_url = true
367
423
  @parse_result[:host] = nil
368
424
 
369
425
  if c == "/" || c == "\\"
@@ -382,7 +438,7 @@ module URI
382
438
  if !starts_with_windows_drive_letter?(rest)
383
439
  shorten_url_path
384
440
  else
385
- @paths = nil
441
+ @path = nil
386
442
  end
387
443
  @state = :path_state
388
444
  @pos -= 1
@@ -399,10 +455,10 @@ module URI
399
455
  else
400
456
  if !@base.nil? && @base[:scheme] == "file"
401
457
  @parse_result[:host] = @base[:host]
402
- if !starts_with_windows_drive_letter?(rest) && @base_paths && normalized_windows_drive_letter?(@base_paths[0])
403
- if @paths.nil?
404
- @paths ||= []
405
- @paths[0] = @base_paths[0]
458
+ if !starts_with_windows_drive_letter?(rest) && @base_path && normalized_windows_drive_letter?(@base_path[0])
459
+ if @path.nil?
460
+ @path ||= []
461
+ @path[0] = @base_path[0]
406
462
  end
407
463
  end
408
464
  end
@@ -412,20 +468,26 @@ module URI
412
468
  end
413
469
 
414
470
  def file_host_state(c)
415
- if c.nil? || c == "/" || c == "\\" || c == "?" || c == "#"
471
+ if c.nil? || DELIMITER_SIGNS.include?(c) || (@special_url && c == "\\")
416
472
  @pos -= 1
417
473
 
418
474
  if !@state_override && windows_drive_letter?(@buffer)
419
475
  @state = :path_state
420
476
  elsif @buffer.empty?
421
477
  @parse_result[:host] = nil
422
- return :terminate if @state_override
478
+ if @state_override
479
+ @terminate = true
480
+ return
481
+ end
423
482
  @state = :path_start_state
424
483
  else
425
- host = @host_parser.parse(@buffer, !special_url?)
484
+ host = @host_parser.parse(@buffer, !@special_url)
426
485
  host = "" if host == "localhost"
427
486
  @parse_result[:host] = host
428
- return :terminate if @state_override
487
+ if @state_override
488
+ @terminate = true
489
+ return
490
+ end
429
491
  @buffer.clear
430
492
  @state = :path_start_state
431
493
  end
@@ -435,7 +497,7 @@ module URI
435
497
  end
436
498
 
437
499
  def path_start_state(c)
438
- if special_url?
500
+ if @special_url
439
501
  @pos -= 1 if c != "/" && c != "\\"
440
502
  @state = :path_state
441
503
  elsif !@state_override && c == "?"
@@ -446,29 +508,29 @@ module URI
446
508
  @pos -= 1 if c != "/"
447
509
  @state = :path_state
448
510
  elsif @state_override && @parse_result[:host].nil?
449
- @paths ||= []
450
- @paths << ""
511
+ @path ||= []
512
+ @path << ""
451
513
  end
452
514
  end
453
515
 
454
516
  def path_state(c)
455
- @paths ||= []
517
+ @path ||= []
456
518
 
457
- if (c.nil? || c == "/") || (special_url? && c == "\\") || (!@state_override && (c == "?" || c == "#"))
519
+ if (c.nil? || c == "/") || (@special_url && c == "\\") || (!@state_override && (c == "?" || c == "#"))
458
520
  if double_dot_path_segments?(@buffer)
459
521
  shorten_url_path
460
522
 
461
- if c != "/" && !(special_url? && c == "\\")
462
- @paths << ""
523
+ if c != "/" && !(@special_url && c == "\\")
524
+ @path << ""
463
525
  end
464
- elsif single_dot_path_segments?(@buffer) && c != "/" && !((special_url? && c == "\\"))
465
- @paths << ""
526
+ elsif single_dot_path_segments?(@buffer) && c != "/" && !((@special_url && c == "\\"))
527
+ @path << ""
466
528
  elsif !single_dot_path_segments?(@buffer)
467
- if @parse_result[:scheme] == "file" && @paths.empty? && windows_drive_letter?(@buffer)
529
+ if @parse_result[:scheme] == "file" && @path.empty? && windows_drive_letter?(@buffer)
468
530
  @buffer[1] = ":"
469
531
  end
470
532
 
471
- @paths << @buffer
533
+ @path << @buffer
472
534
  end
473
535
 
474
536
  @buffer = +""
@@ -477,11 +539,11 @@ module URI
477
539
  @parse_result[:query] = nil
478
540
  @state = :query_state
479
541
  elsif c == "#"
480
- @parse_result[:frament] = nil
542
+ @parse_result[:fragment] = nil
481
543
  @state = :fragment_state
482
544
  end
483
545
  else
484
- @buffer << percent_encode(c, PATH_PERCENT_ENCODE_SET, @encoding)
546
+ @buffer << utf8_percent_encode(c, PATH_PERCENT_ENCODE_SET)
485
547
  end
486
548
  end
487
549
 
@@ -493,26 +555,27 @@ module URI
493
555
  @parse_result[:fragment] = nil
494
556
  @state = :fragment_state
495
557
  elsif c == " "
496
- if rest.start_with?("?", "#")
497
- @parse_result[:opaque] += "%20"
558
+ first_of_rest = @input_chars[@pos + 1]
559
+ if first_of_rest == "?" || first_of_rest == "#"
560
+ @path += "%20"
498
561
  else
499
- @parse_result[:opaque] += " "
562
+ @path += " "
500
563
  end
501
564
  elsif !c.nil?
502
- @parse_result[:opaque] += percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET, @encoding)
565
+ @path += utf8_percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET)
503
566
  end
504
567
  end
505
568
 
506
569
  def query_state(c)
507
- if @encoding != Encoding::UTF_8 && (!special_url? || %w[ws wss].include?(@parse_result[:scheme]))
508
- @encoding = Encoding::UTF_8
509
- end
510
-
511
570
  if c.nil? || (!@state_override && c == "#")
512
- query_percent_encode_set = special_url? ? SPECIAL_QUERY_PERCENT_ENCODE_SET : QUERY_PERCENT_ENCODE_SET
513
- @parse_result[:query] = @buffer.chars.map { |c| percent_encode(c, query_percent_encode_set, @encoding) }.join
571
+ query_percent_encode_set = @special_url ? SPECIAL_QUERY_PERCENT_ENCODE_SET : QUERY_PERCENT_ENCODE_SET
572
+ # TODO: We need to consider encoding here.
573
+ @parse_result[:query] = utf8_percent_encode_string(@buffer, query_percent_encode_set)
514
574
  @buffer.clear
515
- @state = :fragment_state if c == "#"
575
+ if c == "#"
576
+ @parse_result[:fragment] = +""
577
+ @state = :fragment_state
578
+ end
516
579
  elsif !c.nil?
517
580
  @buffer << c
518
581
  end
@@ -520,7 +583,7 @@ module URI
520
583
 
521
584
  def fragment_state(c)
522
585
  return if c.nil?
523
- @parse_result[:fragment] = @parse_result[:fragment].to_s + percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET, @encoding)
586
+ (@parse_result[:fragment] ||= +"") << utf8_percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET)
524
587
  end
525
588
 
526
589
  def windows_drive_letter?(str)
@@ -548,17 +611,17 @@ module URI
548
611
  end
549
612
 
550
613
  def shorten_url_path
551
- return if @paths.nil?
552
- return if @parse_result[:scheme] == "file" && @paths.length == 1 && normalized_windows_drive_letter?(@paths.first)
553
- @paths.pop
614
+ return if @path.nil? || @path.is_a?(String)
615
+ return if @parse_result[:scheme] == "file" && @path.length == 1 && normalized_windows_drive_letter?(@path.first)
616
+ @path.pop
554
617
  end
555
618
 
556
619
  def includes_credentials?
557
- !@parse_result[:userinfo].nil? || (@username && !@username.empty?) || (@password && !@password.empty?)
620
+ (@username && !@username.empty?) || (@password && !@password.empty?)
558
621
  end
559
622
 
560
623
  def rest
561
- @input[@pos+1..]
624
+ @input_chars[@pos + 1..]&.join
562
625
  end
563
626
 
564
627
  def convert_to_uri(uri)
@@ -571,6 +634,25 @@ module URI
571
634
  "bad argument (expected URI object or URI string)"
572
635
  end
573
636
  end
637
+
638
+ if RUBY_VERSION >= "4.0"
639
+ def remove_c0_control_or_space!(str)
640
+ if /[\u0000-\u0020]/.match?(str)
641
+ str.strip!("\u0000-\u0020")
642
+ end
643
+ end
644
+ else
645
+ def remove_c0_control_or_space!(str)
646
+ if /[\u0000-\u0020]/.match?(str)
647
+ str.sub!(/\A[\u0000-\u0020]*/, "")
648
+ str.sub!(/[\u0000-\u0020]*\z/, "")
649
+ end
650
+ end
651
+ end
652
+
653
+ def has_opaque_path?(path)
654
+ path.is_a?(String)
655
+ end
574
656
  end
575
657
 
576
658
  WHATWG_PARSER = URI::WhatwgParser.new
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uri-whatwg_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuji Yaginuma
@@ -37,20 +37,6 @@ dependencies:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
39
  version: '0'
40
- - !ruby/object:Gem::Dependency
41
- name: debug
42
- requirement: !ruby/object:Gem::Requirement
43
- requirements:
44
- - - ">="
45
- - !ruby/object:Gem::Version
46
- version: '0'
47
- type: :development
48
- prerelease: false
49
- version_requirements: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - ">="
52
- - !ruby/object:Gem::Version
53
- version: '0'
54
40
  email:
55
41
  - yuuji.yaginuma@gmail.com
56
42
  executables: []
@@ -80,14 +66,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
80
66
  requirements:
81
67
  - - ">="
82
68
  - !ruby/object:Gem::Version
83
- version: 3.2.0
69
+ version: 3.0.0
84
70
  required_rubygems_version: !ruby/object:Gem::Requirement
85
71
  requirements:
86
72
  - - ">="
87
73
  - !ruby/object:Gem::Version
88
74
  version: '0'
89
75
  requirements: []
90
- rubygems_version: 4.0.3
76
+ rubygems_version: 4.0.10
91
77
  specification_version: 4
92
78
  summary: Ruby implementation of the WHATWG URL Living Standard
93
79
  test_files: []