uri-whatwg_parser 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d94a0f3f96af7620d7592bb1c0b215b52bfcb7e19c2be83177407a67ae8eaa79
4
- data.tar.gz: 14cd125a49fc9b61eb5f70621cfc8685e0cb9b309f048f5528e6c6a37d389eaf
3
+ metadata.gz: 2061d819a10365a97f389d9cb70325c87b75c390c4145897ed640c3e4cc85f70
4
+ data.tar.gz: 905ec3bdd174039fc91ce433f84a079fe12ff93f9ac999f420b08760b3fcee53
5
5
  SHA512:
6
- metadata.gz: 31e07e1a13f149e0188cd7701e2e0828fa66b078004b30bbe55ae1a34989cb68cf4190a0283d6969b718f90712a1cbf61f1efee095ab5847ea6771caf3936661
7
- data.tar.gz: 3e251062ddd86971a0829f83fb27f0b1da05016544b95516f859ce66cb6bd35d61abea75dd1bf693d663de47853c82e7ff326d771f4924a3827fe38c8f5a3df4
6
+ metadata.gz: fc4a0d83fb46e959feb1f7990fa1f119428764447c6d4d455ec5287d5823b19eec845ef4b8912ceeffd6233266b3142859a837c7a26e47e20315d64bb5660006
7
+ data.tar.gz: bd36808311ae2330cd897ba824219d82cff0497d75531441b3679def9ff5ea1969297ccfcbb8cd02b83909a2f7a5c0de7f0d505bb79e1117494544dd3d4b3cfa
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.1.4
2
+
3
+ * Support passing `encoding` option
4
+
1
5
  ## 0.1.3
2
6
 
3
7
  * Improve performance of parsing
data/README.md CHANGED
@@ -17,6 +17,8 @@ gem install uri-whatwg_parser
17
17
  This gem is compatible with [`uri`](https://github.com/ruby/uri) gem and automatically switches parser's behavior. So users don't need to set up.
18
18
 
19
19
  ```ruby
20
+ require "uri/whatwg_parser"
21
+
20
22
  URI.parse("http://日本語.jp")
21
23
  # => #<URI::HTTP http://xn--wgv71a119e.jp>
22
24
  ```
@@ -31,7 +33,6 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
31
33
 
32
34
  * Support state override
33
35
  * Support validations
34
- * Support encodings other than UTF-8
35
36
 
36
37
  ## Contributing
37
38
 
@@ -18,11 +18,14 @@ class URI::WhatwgParser
18
18
  ASCII_DIGIT.include?(c)
19
19
  end
20
20
 
21
- def percent_encode(c, encode_set)
22
- if encode_set.include?(c) || c.ord > 0x7e
23
- return c.unpack("C*").map { |b| sprintf("%%%02X", b) }.join
24
- end
25
- c
21
+ def percent_encode(c, encode_set, encoding = Encoding::UTF_8)
22
+ return c unless encode_set.include?(c) || c.ord > 0x7e
23
+
24
+ # For ASCII single-byte characters
25
+ return "%%%02X" % c.ord if c.bytesize == 1
26
+
27
+ bytes = c.encoding == encoding ? c.bytes : c.encode(encoding).bytes
28
+ bytes.map { |b| "%%%02X" % b }.join
26
29
  end
27
30
  end
28
31
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module URI
4
4
  class WhatwgParser
5
- VERSION = "0.1.3"
5
+ VERSION = "0.1.4"
6
6
  end
7
7
  end
@@ -34,20 +34,21 @@ module URI
34
34
  {}
35
35
  end
36
36
 
37
- def parse(uri, base = nil) # :nodoc:
37
+ def parse(uri, base = nil, encoding = Encoding::UTF_8) # :nodoc:
38
38
  reset
39
- URI.for(*self.split(uri, base))
39
+ URI.for(*self.split(uri, base, encoding))
40
40
  end
41
41
 
42
- def split(uri, base = nil) # :nodoc:
42
+ def split(uri, base = nil, encoding = Encoding::UTF_8) # :nodoc:
43
43
  @base = nil
44
44
  if base != nil
45
- ary = split(base)
45
+ ary = split(base, nil, encoding)
46
46
  @base = { scheme: ary[0], userinfo: ary[1], host: ary[2], port: ary[3], registry: ary[4], path: ary[5], opaque: ary[6], query: ary[7], fragment: ary[8]}
47
47
  @base_paths = @paths
48
48
  reset
49
49
  end
50
50
 
51
+ @encoding = encoding
51
52
  @uri = uri.dup
52
53
  @uri.sub!(/\A[\u0000-\u0020]*/, "")
53
54
  @uri.sub!(/[\u0000-\u0020]*\z/, "")
@@ -100,7 +101,7 @@ module URI
100
101
 
101
102
  def scheme_start_state(c)
102
103
  if ascii_alpha?(c)
103
- @buffer += c.downcase
104
+ @buffer << c.downcase
104
105
  @state = :scheme_state
105
106
  else
106
107
  @pos -= 1
@@ -110,7 +111,7 @@ module URI
110
111
 
111
112
  def scheme_state(c)
112
113
  if ascii_alphanumerica?(c) || ["+", "-", "."].include?(c)
113
- @buffer += c.downcase
114
+ @buffer << c.downcase
114
115
  elsif c == ":"
115
116
  @parse_result[:scheme] = @buffer
116
117
  @buffer = +""
@@ -129,7 +130,7 @@ module URI
129
130
  @state = :opaque_path_state
130
131
  end
131
132
  else
132
- @buffer = +""
133
+ @buffer.clear
133
134
  @pos -= 1
134
135
  @state = :no_scheme_state
135
136
  end
@@ -241,7 +242,7 @@ module URI
241
242
  next
242
243
  end
243
244
 
244
- encoded_char = percent_encode(char, USERINFO_PERCENT_ENCODE_SET)
245
+ encoded_char = percent_encode(char, USERINFO_PERCENT_ENCODE_SET, @encoding)
245
246
 
246
247
  if @password_token_seen
247
248
  @password = @password.to_s + encoded_char
@@ -250,12 +251,12 @@ module URI
250
251
  end
251
252
  end
252
253
 
253
- @buffer = +""
254
+ @buffer.clear
254
255
  elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
255
256
  raise ParseError, "host is missing" if @at_sign_seen && @buffer.empty?
256
257
 
257
258
  @pos -= (@buffer.size + 1)
258
- @buffer = +""
259
+ @buffer.clear
259
260
  @state = :host_state
260
261
  else
261
262
  @buffer << c
@@ -267,7 +268,7 @@ module URI
267
268
  raise ParseError, "host is missing" if @buffer.empty?
268
269
 
269
270
  @parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
270
- @buffer = +""
271
+ @buffer.clear
271
272
  @state = :port_state
272
273
  elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
273
274
  @pos -= 1
@@ -275,19 +276,19 @@ module URI
275
276
  raise ParseError, "host is missing"
276
277
  else
277
278
  @parse_result[:host] = @host_parser.parse(@buffer, !special_url?)
278
- @buffer = +""
279
+ @buffer.clear
279
280
  @state = :path_start_state
280
281
  end
281
282
  else
282
283
  @inside_brackets = true if c == "["
283
284
  @inside_brackets = false if c == "]"
284
- @buffer += c
285
+ @buffer << c
285
286
  end
286
287
  end
287
288
 
288
289
  def port_state(c)
289
290
  if ascii_digit?(c)
290
- @buffer += c
291
+ @buffer << c
291
292
  elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
292
293
  unless @buffer.empty?
293
294
  begin
@@ -298,7 +299,7 @@ module URI
298
299
  raise ParseError, "port is invalid value"
299
300
  end
300
301
 
301
- @buffer = +""
302
+ @buffer.clear
302
303
  end
303
304
 
304
305
  @state = :path_start_state
@@ -346,7 +347,7 @@ module URI
346
347
  if !@base.nil? && @base[:scheme] == "file"
347
348
  @parse_result[:host] = @base[:host]
348
349
  if !starts_with_windows_drive_letter?(rest) && @base_paths && normalized_windows_drive_letter?(@base_paths[0])
349
- @paths[0] += @base_paths[0]
350
+ @paths[0] << @base_paths[0]
350
351
  end
351
352
  end
352
353
  @state = :path_state
@@ -369,12 +370,12 @@ module URI
369
370
  @parse_result[:host] = host
370
371
  end
371
372
 
372
- @buffer = +""
373
+ @buffer.clear
373
374
  @state = :path_start_state
374
375
  end
375
376
  end
376
377
 
377
- @buffer += c unless c.nil?
378
+ @buffer << c unless c.nil?
378
379
  end
379
380
 
380
381
  def path_start_state(c)
@@ -420,7 +421,7 @@ module URI
420
421
  @state = :fragment_state
421
422
  end
422
423
  else
423
- @buffer << percent_encode(c, PATH_PERCENT_ENCODE_SET)
424
+ @buffer << percent_encode(c, PATH_PERCENT_ENCODE_SET, @encoding)
424
425
  end
425
426
  end
426
427
 
@@ -438,15 +439,19 @@ module URI
438
439
  @parse_result[:path] = @parse_result[:path].to_s + " "
439
440
  end
440
441
  elsif !c.nil?
441
- @parse_result[:path] = @parse_result[:path].to_s + percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET)
442
+ @parse_result[:path] = @parse_result[:path].to_s + percent_encode(c, C0_CONTROL_PERCENT_ENCODE_SET, @encoding)
442
443
  end
443
444
  end
444
445
 
445
446
  def query_state(c)
447
+ if @encoding != Encoding::UTF_8 && (!special_url? || %w[ws wss].include?(@parse_result[:scheme]))
448
+ @encoding = Encoding::UTF_8
449
+ end
450
+
446
451
  if c.nil? || c == "#"
447
452
  query_percent_encode_set = special_url? ? SPECIAL_QUERY_PERCENT_ENCODE_SET : QUERY_PERCENT_ENCODE_SET
448
- @parse_result[:query] = @buffer.chars.map { |c| percent_encode(c, query_percent_encode_set) }.join
449
- @buffer = +""
453
+ @parse_result[:query] = @buffer.chars.map { |c| percent_encode(c, query_percent_encode_set, @encoding) }.join
454
+ @buffer.clear
450
455
  @state = :fragment_state if c == "#"
451
456
  elsif !c.nil?
452
457
  @buffer << c
@@ -455,7 +460,7 @@ module URI
455
460
 
456
461
  def fragment_state(c)
457
462
  return if c.nil?
458
- @parse_result[:fragment] = @parse_result[:fragment].to_s + percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET)
463
+ @parse_result[:fragment] = @parse_result[:fragment].to_s + percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET, @encoding)
459
464
  end
460
465
 
461
466
  def windows_drive_letter?(str)
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uri-whatwg_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuji Yaginuma
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-05-25 00:00:00.000000000 Z
10
+ date: 2025-06-12 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: uri
@@ -15,14 +15,14 @@ dependencies:
15
15
  requirements:
16
16
  - - ">="
17
17
  - !ruby/object:Gem::Version
18
- version: '0'
18
+ version: 1.0.0
19
19
  type: :runtime
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - ">="
24
24
  - !ruby/object:Gem::Version
25
- version: '0'
25
+ version: 1.0.0
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: uri-idna
28
28
  requirement: !ruby/object:Gem::Requirement