uri-whatwg_parser 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2061d819a10365a97f389d9cb70325c87b75c390c4145897ed640c3e4cc85f70
4
- data.tar.gz: 905ec3bdd174039fc91ce433f84a079fe12ff93f9ac999f420b08760b3fcee53
3
+ metadata.gz: 712c7079461310fdab82e79bea4491ce91a4a8b26827a338334a0e161520a8c0
4
+ data.tar.gz: 85871841fdee54a61c942134398bfb85e2e89f798144921464058e4edd2ea8fb
5
5
  SHA512:
6
- metadata.gz: fc4a0d83fb46e959feb1f7990fa1f119428764447c6d4d455ec5287d5823b19eec845ef4b8912ceeffd6233266b3142859a837c7a26e47e20315d64bb5660006
7
- data.tar.gz: bd36808311ae2330cd897ba824219d82cff0497d75531441b3679def9ff5ea1969297ccfcbb8cd02b83909a2f7a5c0de7f0d505bb79e1117494544dd3d4b3cfa
6
+ metadata.gz: 42b046952ea2817c3db80edcc69bafbf86822c9b88f23837b4decb9d9693b0044f68b9a74c920707e16ee28121f2c03d6992df300227b32cf62a3995e7af7ee5
7
+ data.tar.gz: aa9c3d27934eec97f11f579cc2b194b5b88f0d69da5c224cb4ebf6fd13878c8a493a7e3391f7f985674e602629da303364aa49c7ed9b88f0b101b66c304db6d2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.1.6
2
+
3
+ * Fix `opaque` part parsing
4
+ * Correctly join multiple paths
5
+
6
+ ## 0.1.5
7
+
8
+ * Remove `ipaddr` dependency
9
+
1
10
  ## 0.1.4
2
11
 
3
12
  * Support passing `encoding` option
data/README.md CHANGED
@@ -2,9 +2,7 @@
2
2
 
3
3
  Ruby implementation of the [WHATWG URL Living Standard](https://url.spec.whatwg.org/).
4
4
 
5
- The latest revision that this package implements of the standard is ([24 March 2025](https://url.spec.whatwg.org/commit-snapshots/cc8b776b89a6d92b5cc74581c8d90450d3c1e762/)).
6
-
7
- NOTE: Some features haven't been implemented yet. Please see the TODO for details.
5
+ The latest revision that this package implements of the standard is ([18 August 2025](https://url.spec.whatwg.org/commit-snapshots/05a5d834deba31622390ee05a3dcbc22496b7bb5/)).
8
6
 
9
7
  ## Installation
10
8
 
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "uri/idna"
4
- require "ipaddr"
5
4
  require_relative "parser_helper"
6
5
 
7
6
  class URI::WhatwgParser
@@ -66,26 +65,128 @@ class URI::WhatwgParser
66
65
  output.join(".")
67
66
  end
68
67
 
69
- def parse_ipv6(host)
70
- addr = IPAddr.new(host)
71
- # NOTE: URL Standard doesn't support `zone_id`.
72
- raise ParseError, "invalid IPv6 format" unless addr.zone_id.nil?
73
- formatted_addr = (("%.32x" % addr).gsub(/.{4}(?!$)/, '\&:'))
74
- "[#{compress_ipv6(formatted_addr)}]"
75
- rescue IPAddr::InvalidAddressError
76
- raise ParseError, "invalid IPv6 format"
68
+ def parse_ipv6(input)
69
+ input = input[1..-2] if input.start_with?("[") && input.end_with?("]")
70
+ address = Array.new(8, 0)
71
+ piece_index = 0
72
+ compress = nil
73
+ chars = input.chars
74
+ i = 0
75
+
76
+ if chars[i] == ":"
77
+ raise ParseError, "invalid IPv6 format" unless chars[i + 1] == ":"
78
+ i += 2
79
+ compress = piece_index
80
+ end
81
+
82
+ while i < chars.length
83
+ raise ParseError, "invalid IPv6 format" if piece_index == 8
84
+
85
+ if chars[i] == ":"
86
+ raise ParseError, "invalid IPv6 format" if compress
87
+ i += 1
88
+ compress = piece_index
89
+ next
90
+ end
91
+
92
+ value = 0
93
+ length = 0
94
+ while length < 4 && i < chars.length && chars[i].match?(/[0-9A-Fa-f]/)
95
+ value = value * 16 + chars[i].to_i(16)
96
+ i += 1
97
+ length += 1
98
+ end
99
+
100
+ if chars[i] == "."
101
+ # IPv4-mapped address must be valid and complete, no trailing dot
102
+ ipv4_piece = chars[i - length, chars.length - (i - length)].join
103
+ parts = ipv4_piece.split(".")
104
+ if parts.length != 4 || parts.any? { |p| p.empty? } || ipv4_piece.end_with?(".")
105
+ raise ParseError, "invalid IPv6 format"
106
+ end
107
+
108
+ ipv4 = parse_ipv4(ipv4_piece)
109
+ address[piece_index] = (ipv4 >> 16) & 0xFFFF
110
+ address[piece_index + 1] = ipv4 & 0xFFFF
111
+ piece_index += 2
112
+ i = chars.length
113
+ break
114
+ end
115
+
116
+ raise ParseError, "invalid IPv6 format" if length == 0
117
+
118
+ address[piece_index] = value
119
+ piece_index += 1
120
+
121
+ if i < chars.length
122
+ if chars[i] == ":"
123
+ i += 1
124
+ elsif chars[i] != nil
125
+ raise ParseError, "invalid IPv6 format"
126
+ end
127
+ end
128
+ end
129
+
130
+ if compress
131
+ swaps = piece_index - compress
132
+ (0...swaps).each do |j|
133
+ address[7 - j] = address[compress + swaps - 1 - j]
134
+ address[compress + swaps - 1 - j] = 0
135
+ end
136
+ elsif piece_index != 8
137
+ raise ParseError, "invalid IPv6 format"
138
+ end
139
+
140
+ compress_ipv6(address)
77
141
  end
78
142
 
79
- def compress_ipv6(ip)
80
- output = ip.split(":").map { |term| term.sub(/\A0+/, "").empty? ? "0" : term.sub(/\A0+/, "") }.join(":")
81
- zeros = output.scan(/:?(?:0+:?){2,}/)
143
+ def compress_ipv6(address)
144
+ # Find the longest run of zeros for '::' compression
145
+ best_base = nil
146
+ best_len = 0
147
+ base = nil
148
+ len = 0
149
+
150
+ 8.times do |idx|
151
+ if address[idx] == 0
152
+ base = idx if base.nil?
153
+ len += 1
154
+ else
155
+ if len > best_len
156
+ best_base = base
157
+ best_len = len
158
+ end
159
+ base = nil
160
+ len = 0
161
+ end
162
+ end
163
+
164
+ if len > best_len
165
+ best_base = base
166
+ best_len = len
167
+ end
168
+
169
+ # Only compress if the run is at least two 0s
170
+ if best_len < 2
171
+ best_base = nil
172
+ end
82
173
 
83
- unless zeros.empty?
84
- max = zeros.max_by { |item| item.gsub(":", "").length }
85
- output = output.sub(max, "::")
174
+ # Build the string with '::' for the longest zero run
175
+ result = []
176
+ idx = 0
177
+ while idx < 8
178
+ if best_base == idx
179
+ result << "" if idx == 0
180
+ result << ""
181
+ idx += best_len
182
+ result << "" if idx == 8
183
+ next
184
+ end
185
+ result << address[idx].to_s(16)
186
+ idx += 1
86
187
  end
87
188
 
88
- output
189
+ "[#{result.join(":").gsub(/:{3,}/, "::")}]"
89
190
  end
90
191
 
91
192
  def parse_opaque_host(host)
@@ -126,7 +227,7 @@ class URI::WhatwgParser
126
227
  validation_error = false
127
228
  r = 10
128
229
 
129
- if str.size >= 2 && (str.start_with?("0x") || str.start_with?("0X"))
230
+ if str.size >= 2 && str.start_with?("0x", "0X")
130
231
  validation_error = true
131
232
  str = str[2..-1]
132
233
  r = 16
@@ -2,6 +2,6 @@
2
2
 
3
3
  module URI
4
4
  class WhatwgParser
5
- VERSION = "0.1.4"
5
+ VERSION = "0.1.6"
6
6
  end
7
7
  end
@@ -126,22 +126,22 @@ module URI
126
126
  @state = :path_or_authority_state
127
127
  @pos += 1
128
128
  else
129
- @parse_result[:path] = nil
129
+ @parse_result[:opaque] = ""
130
130
  @state = :opaque_path_state
131
131
  end
132
132
  else
133
133
  @buffer.clear
134
- @pos -= 1
134
+ @pos = -1
135
135
  @state = :no_scheme_state
136
136
  end
137
137
  end
138
138
 
139
139
  def no_scheme_state(c)
140
- raise ParseError, "scheme is missing" if @base.nil? || !@base[:opaque].nil? && c != "#"
140
+ raise ParseError, "scheme is missing" if @base.nil? || (!@base[:opaque].nil? && c != "#")
141
141
 
142
142
  if !@base[:opaque].nil? && c == "#"
143
143
  @parse_result[:scheme] = @base[:scheme]
144
- @parse_result[:path] = @base[:path]
144
+ @paths = @base_paths
145
145
  @parse_result[:query] = @base[:query]
146
146
  @parse_result[:fragment] = nil
147
147
  @state = :fragment_state
@@ -180,10 +180,11 @@ module URI
180
180
  elsif special_url? && c == "\\"
181
181
  @state = :relative_slash_state
182
182
  else
183
+
183
184
  @parse_result[:userinfo] = @base[:userinfo]
184
185
  @parse_result[:host] = @base[:host]
185
186
  @parse_result[:port] = @base[:port]
186
- @parse_result[:path] = @base[:path]
187
+ @paths = @base_paths
187
188
  @parse_result[:query] = @base[:query]
188
189
 
189
190
  if c == "?"
@@ -396,7 +397,6 @@ module URI
396
397
  @paths ||= []
397
398
 
398
399
  if (c.nil? || c == "/") || (special_url? && c == "\/") || (c == "?" || c == "#")
399
-
400
400
  if double_dot_path_segments?(@buffer)
401
401
  shorten_url_path
402
402
  if c != "/" || (special_url? && c == "\/")
@@ -408,6 +408,7 @@ module URI
408
408
  if @parse_result[:scheme] == "file" && @paths.empty? && windows_drive_letter?(@buffer)
409
409
  @buffer[1] = ":"
410
410
  end
411
+
411
412
  @paths << @buffer
412
413
  end
413
414
 
@@ -433,13 +434,13 @@ module URI
433
434
  @parse_result[:fragment] = nil
434
435
  @state = :fragment_state
435
436
  elsif c == " "
436
- if rest.start_with?("?") || rest.start_with?("#")
437
- @parse_result[:path] = @parse_result[:path].to_s + "%20"
437
+ if rest.start_with?("?", "#")
438
+ @parse_result[:opaque] += "%20"
438
439
  else
439
- @parse_result[:path] = @parse_result[:path].to_s + " "
440
+ @parse_result[:opaque] += " "
440
441
  end
441
442
  elsif !c.nil?
442
- @parse_result[:path] = @parse_result[:path].to_s + percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET, @encoding)
443
+ @parse_result[:opaque] += percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET, @encoding)
443
444
  end
444
445
  end
445
446
 
@@ -488,10 +489,10 @@ module URI
488
489
  end
489
490
 
490
491
  def shorten_url_path
491
- return if @parse_result[:path]&.empty?
492
+ return if @paths.nil?
492
493
 
493
- return true if @parse_result[:scheme] == "file" && @parse_result[:path]&.length == 1 && normalized_windows_drive_letter?(@parse_result[:path])
494
- @parse_result[:path]&.chomp!
494
+ return true if @parse_result[:scheme] == "file" && @paths.length == 1 && normalized_windows_drive_letter?(@paths.first)
495
+ @paths.pop
495
496
  end
496
497
 
497
498
  def rest
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uri-whatwg_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuji Yaginuma
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-06-12 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: uri
@@ -86,7 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
86
86
  - !ruby/object:Gem::Version
87
87
  version: '0'
88
88
  requirements: []
89
- rubygems_version: 3.6.2
89
+ rubygems_version: 3.6.7
90
90
  specification_version: 4
91
91
  summary: Ruby implementation of the WHATWG URL Living Standard
92
92
  test_files: []