uri-whatwg_parser 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +2 -1
- data/lib/uri/whatwg_parser/parser_helper.rb +8 -5
- data/lib/uri/whatwg_parser/version.rb +1 -1
- data/lib/uri/whatwg_parser.rb +28 -23
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2061d819a10365a97f389d9cb70325c87b75c390c4145897ed640c3e4cc85f70
|
4
|
+
data.tar.gz: 905ec3bdd174039fc91ce433f84a079fe12ff93f9ac999f420b08760b3fcee53
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fc4a0d83fb46e959feb1f7990fa1f119428764447c6d4d455ec5287d5823b19eec845ef4b8912ceeffd6233266b3142859a837c7a26e47e20315d64bb5660006
|
7
|
+
data.tar.gz: bd36808311ae2330cd897ba824219d82cff0497d75531441b3679def9ff5ea1969297ccfcbb8cd02b83909a2f7a5c0de7f0d505bb79e1117494544dd3d4b3cfa
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -17,6 +17,8 @@ gem install uri-whatwg_parser
|
|
17
17
|
This gem is compatible with [`uri`](https://github.com/ruby/uri) gem and automatically switches parser's behavior. So users don't need to set up.
|
18
18
|
|
19
19
|
```ruby
|
20
|
+
require "uri/whatwg_parser"
|
21
|
+
|
20
22
|
URI.parse("http://日本語.jp")
|
21
23
|
# => #<URI::HTTP http://xn--wgv71a119e.jp>
|
22
24
|
```
|
@@ -31,7 +33,6 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
31
33
|
|
32
34
|
* Support state override
|
33
35
|
* Support validations
|
34
|
-
* Support encodings other than UTF-8
|
35
36
|
|
36
37
|
## Contributing
|
37
38
|
|
@@ -18,11 +18,14 @@ class URI::WhatwgParser
|
|
18
18
|
ASCII_DIGIT.include?(c)
|
19
19
|
end
|
20
20
|
|
21
|
-
def percent_encode(c, encode_set)
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
c
|
21
|
+
def percent_encode(c, encode_set, encoding = Encoding::UTF_8)
|
22
|
+
return c unless encode_set.include?(c) || c.ord > 0x7e
|
23
|
+
|
24
|
+
# For ASCII single-byte characters
|
25
|
+
return "%%%02X" % c.ord if c.bytesize == 1
|
26
|
+
|
27
|
+
bytes = c.encoding == encoding ? c.bytes : c.encode(encoding).bytes
|
28
|
+
bytes.map { |b| "%%%02X" % b }.join
|
26
29
|
end
|
27
30
|
end
|
28
31
|
end
|
data/lib/uri/whatwg_parser.rb
CHANGED
@@ -34,20 +34,21 @@ module URI
|
|
34
34
|
{}
|
35
35
|
end
|
36
36
|
|
37
|
-
def parse(uri, base = nil) # :nodoc:
|
37
|
+
def parse(uri, base = nil, encoding = Encoding::UTF_8) # :nodoc:
|
38
38
|
reset
|
39
|
-
URI.for(*self.split(uri, base))
|
39
|
+
URI.for(*self.split(uri, base, encoding))
|
40
40
|
end
|
41
41
|
|
42
|
-
def split(uri, base = nil) # :nodoc:
|
42
|
+
def split(uri, base = nil, encoding = Encoding::UTF_8) # :nodoc:
|
43
43
|
@base = nil
|
44
44
|
if base != nil
|
45
|
-
ary = split(base)
|
45
|
+
ary = split(base, nil, encoding)
|
46
46
|
@base = { scheme: ary[0], userinfo: ary[1], host: ary[2], port: ary[3], registry: ary[4], path: ary[5], opaque: ary[6], query: ary[7], fragment: ary[8]}
|
47
47
|
@base_paths = @paths
|
48
48
|
reset
|
49
49
|
end
|
50
50
|
|
51
|
+
@encoding = encoding
|
51
52
|
@uri = uri.dup
|
52
53
|
@uri.sub!(/\A[\u0000-\u0020]*/, "")
|
53
54
|
@uri.sub!(/[\u0000-\u0020]*\z/, "")
|
@@ -100,7 +101,7 @@ module URI
|
|
100
101
|
|
101
102
|
def scheme_start_state(c)
|
102
103
|
if ascii_alpha?(c)
|
103
|
-
@buffer
|
104
|
+
@buffer << c.downcase
|
104
105
|
@state = :scheme_state
|
105
106
|
else
|
106
107
|
@pos -= 1
|
@@ -110,7 +111,7 @@ module URI
|
|
110
111
|
|
111
112
|
def scheme_state(c)
|
112
113
|
if ascii_alphanumerica?(c) || ["+", "-", "."].include?(c)
|
113
|
-
@buffer
|
114
|
+
@buffer << c.downcase
|
114
115
|
elsif c == ":"
|
115
116
|
@parse_result[:scheme] = @buffer
|
116
117
|
@buffer = +""
|
@@ -129,7 +130,7 @@ module URI
|
|
129
130
|
@state = :opaque_path_state
|
130
131
|
end
|
131
132
|
else
|
132
|
-
@buffer
|
133
|
+
@buffer.clear
|
133
134
|
@pos -= 1
|
134
135
|
@state = :no_scheme_state
|
135
136
|
end
|
@@ -241,7 +242,7 @@ module URI
|
|
241
242
|
next
|
242
243
|
end
|
243
244
|
|
244
|
-
encoded_char = percent_encode(char, USERINFO_PERCENT_ENCODE_SET)
|
245
|
+
encoded_char = percent_encode(char, USERINFO_PERCENT_ENCODE_SET, @encoding)
|
245
246
|
|
246
247
|
if @password_token_seen
|
247
248
|
@password = @password.to_s + encoded_char
|
@@ -250,12 +251,12 @@ module URI
|
|
250
251
|
end
|
251
252
|
end
|
252
253
|
|
253
|
-
@buffer
|
254
|
+
@buffer.clear
|
254
255
|
elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
|
255
256
|
raise ParseError, "host is missing" if @at_sign_seen && @buffer.empty?
|
256
257
|
|
257
258
|
@pos -= (@buffer.size + 1)
|
258
|
-
@buffer
|
259
|
+
@buffer.clear
|
259
260
|
@state = :host_state
|
260
261
|
else
|
261
262
|
@buffer << c
|
@@ -267,7 +268,7 @@ module URI
|
|
267
268
|
raise ParseError, "host is missing" if @buffer.empty?
|
268
269
|
|
269
270
|
@parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
|
270
|
-
@buffer
|
271
|
+
@buffer.clear
|
271
272
|
@state = :port_state
|
272
273
|
elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
|
273
274
|
@pos -= 1
|
@@ -275,19 +276,19 @@ module URI
|
|
275
276
|
raise ParseError, "host is missing"
|
276
277
|
else
|
277
278
|
@parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
|
278
|
-
@buffer
|
279
|
+
@buffer.clear
|
279
280
|
@state = :path_start_state
|
280
281
|
end
|
281
282
|
else
|
282
283
|
@inside_brackets = true if c == "["
|
283
284
|
@inside_brackets = false if c == "]"
|
284
|
-
@buffer
|
285
|
+
@buffer << c
|
285
286
|
end
|
286
287
|
end
|
287
288
|
|
288
289
|
def port_state(c)
|
289
290
|
if ascii_digit?(c)
|
290
|
-
@buffer
|
291
|
+
@buffer << c
|
291
292
|
elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
|
292
293
|
unless @buffer.empty?
|
293
294
|
begin
|
@@ -298,7 +299,7 @@ module URI
|
|
298
299
|
raise ParseError, "port is invalid value"
|
299
300
|
end
|
300
301
|
|
301
|
-
@buffer
|
302
|
+
@buffer.clear
|
302
303
|
end
|
303
304
|
|
304
305
|
@state = :path_start_state
|
@@ -346,7 +347,7 @@ module URI
|
|
346
347
|
if !@base.nil? && @base[:scheme] == "file"
|
347
348
|
@parse_result[:host] = @base[:host]
|
348
349
|
if !starts_with_windows_drive_letter?(rest) && @base_paths && normalized_windows_drive_letter?(@base_paths[0])
|
349
|
-
@paths[0]
|
350
|
+
@paths[0] << @base_paths[0]
|
350
351
|
end
|
351
352
|
end
|
352
353
|
@state = :path_state
|
@@ -369,12 +370,12 @@ module URI
|
|
369
370
|
@parse_result[:host] = host
|
370
371
|
end
|
371
372
|
|
372
|
-
@buffer
|
373
|
+
@buffer.clear
|
373
374
|
@state = :path_start_state
|
374
375
|
end
|
375
376
|
end
|
376
377
|
|
377
|
-
@buffer
|
378
|
+
@buffer << c unless c.nil?
|
378
379
|
end
|
379
380
|
|
380
381
|
def path_start_state(c)
|
@@ -420,7 +421,7 @@ module URI
|
|
420
421
|
@state = :fragment_state
|
421
422
|
end
|
422
423
|
else
|
423
|
-
@buffer << percent_encode(c, PATH_PERCENT_ENCODE_SET)
|
424
|
+
@buffer << percent_encode(c, PATH_PERCENT_ENCODE_SET, @encoding)
|
424
425
|
end
|
425
426
|
end
|
426
427
|
|
@@ -438,15 +439,19 @@ module URI
|
|
438
439
|
@parse_result[:path] = @parse_result[:path].to_s + " "
|
439
440
|
end
|
440
441
|
elsif !c.nil?
|
441
|
-
@parse_result[:path] = @parse_result[:path].to_s + percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET)
|
442
|
+
@parse_result[:path] = @parse_result[:path].to_s + percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET, @encoding)
|
442
443
|
end
|
443
444
|
end
|
444
445
|
|
445
446
|
def query_state(c)
|
447
|
+
if @encoding != Encoding::UTF_8 && (!special_url? || %w[ws wss].include?(@parse_result[:scheme]))
|
448
|
+
@encoding = Encoding::UTF_8
|
449
|
+
end
|
450
|
+
|
446
451
|
if c.nil? || c == "#"
|
447
452
|
query_percent_encode_set = special_url? ? SPECIAL_QUERY_PERCENT_ENCODE_SET : QUERY_PERCENT_ENCODE_SET
|
448
|
-
@parse_result[:query] = @buffer.chars.map { |c| percent_encode(c, query_percent_encode_set) }.join
|
449
|
-
@buffer
|
453
|
+
@parse_result[:query] = @buffer.chars.map { |c| percent_encode(c, query_percent_encode_set, @encoding) }.join
|
454
|
+
@buffer.clear
|
450
455
|
@state = :fragment_state if c == "#"
|
451
456
|
elsif !c.nil?
|
452
457
|
@buffer << c
|
@@ -455,7 +460,7 @@ module URI
|
|
455
460
|
|
456
461
|
def fragment_state(c)
|
457
462
|
return if c.nil?
|
458
|
-
@parse_result[:fragment] = @parse_result[:fragment].to_s + percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET)
|
463
|
+
@parse_result[:fragment] = @parse_result[:fragment].to_s + percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET, @encoding)
|
459
464
|
end
|
460
465
|
|
461
466
|
def windows_drive_letter?(str)
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uri-whatwg_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yuji Yaginuma
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-06-12 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: uri
|
@@ -15,14 +15,14 @@ dependencies:
|
|
15
15
|
requirements:
|
16
16
|
- - ">="
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version:
|
18
|
+
version: 1.0.0
|
19
19
|
type: :runtime
|
20
20
|
prerelease: false
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
22
22
|
requirements:
|
23
23
|
- - ">="
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version:
|
25
|
+
version: 1.0.0
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: uri-idna
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|