uri-whatwg_parser 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 57eb48896a4f467a9b2f4d5a9b195a8df8a3b1f707d680711e777b1226d5471a
4
- data.tar.gz: 90e1c4a439d6309ac14c7f8e46ace4128c8f8469d275e29b9afa06b60d1e8ac9
3
+ metadata.gz: d94a0f3f96af7620d7592bb1c0b215b52bfcb7e19c2be83177407a67ae8eaa79
4
+ data.tar.gz: 14cd125a49fc9b61eb5f70621cfc8685e0cb9b309f048f5528e6c6a37d389eaf
5
5
  SHA512:
6
- metadata.gz: dd95276e9e4df35db76eb196c46b72f1300d777c45161dcf58d3253e477db846d7662cd9f0e837fcc37a3ddd047700c024ecc5c1adeed83c24a478026b188936
7
- data.tar.gz: 3cb8ee2b663bb4938bd099106874bed736aa709a38ff4b435803c8ff1752f229a4d8cd83ffb920fc1e29f5ebfc1815f5796105a6f54bb99d241f4b5adb605efa
6
+ metadata.gz: 31e07e1a13f149e0188cd7701e2e0828fa66b078004b30bbe55ae1a34989cb68cf4190a0283d6969b718f90712a1cbf61f1efee095ab5847ea6771caf3936661
7
+ data.tar.gz: 3e251062ddd86971a0829f83fb27f0b1da05016544b95516f859ce66cb6bd35d61abea75dd1bf693d663de47853c82e7ff326d771f4924a3827fe38c8f5a3df4
data/CHANGELOG.md ADDED
@@ -0,0 +1,15 @@
1
+ ## 0.1.3
2
+
3
+ * Improve performance of parsing
4
+
5
+ ## 0.1.2
6
+
7
+ * Correctly compress IPv6 host
8
+
9
+ ## 0.1.1
10
+
11
+ * Support `base` option
12
+
13
+ ## 0.1.0
14
+
15
+ * Initial release
@@ -156,11 +156,11 @@ class URI::WhatwgParser
156
156
  end
157
157
 
158
158
  def include_forbidden_domain_code_point?(str)
159
- str.chars.intersect?(FORBIDDEN_DOMAIN_CODE_POINT)
159
+ FORBIDDEN_DOMAIN_CODE_POINT.any? {|c| str.include?(c) }
160
160
  end
161
161
 
162
162
  def include_forbidden_host_code_point?(str)
163
- str.chars.intersect?(FORBIDDEN_HOST_CODE_POINT)
163
+ FORBIDDEN_HOST_CODE_POINT.any? {|c| str.include?(c) }
164
164
  end
165
165
  end
166
166
  end
@@ -2,8 +2,9 @@
2
2
 
3
3
  class URI::WhatwgParser
4
4
  module ParserHelper
5
- C0_CONTROL = (0..0x1f).to_a
6
- C0_CONTROL_PERCENT_ENCODE_SET = C0_CONTROL.map(&:chr)
5
+ C0_CONTROL_PERCENT_ENCODE_SET = (0..0x1f).map(&:chr)
6
+ ASCII_ALPHA = ("a".."z").to_a + ("A".."Z").to_a
7
+ ASCII_DIGIT = ("0".."9").to_a
7
8
 
8
9
  def ascii_alpha?(c)
9
10
  ASCII_ALPHA.include?(c)
@@ -2,6 +2,6 @@
2
2
 
3
3
  module URI
4
4
  class WhatwgParser
5
- VERSION = "0.1.2"
5
+ VERSION = "0.1.3"
6
6
  end
7
7
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "strscan"
4
3
  require "uri"
5
4
  require_relative "whatwg_parser/error"
6
5
  require_relative "whatwg_parser/version"
@@ -12,8 +11,6 @@ module URI
12
11
  include ParserHelper
13
12
 
14
13
  SPECIAL_SCHEME = { "ftp" => 21, "file" => nil, "http" => 80, "https" => 443, "ws" => 80, "wss" => 443 }
15
- ASCII_ALPHA = ("a".."z").to_a + ("A".."Z").to_a
16
- ASCII_DIGIT = ("0".."9").to_a
17
14
 
18
15
  FRAGMENT_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET + [" ", "\"", "<", ">", "`"]
19
16
  QUERY_PERCENT_ENCODE_SET = C0_CONTROL_PERCENT_ENCODE_SET + [" ", "\"", "#", "<", ">"]
@@ -51,31 +48,25 @@ module URI
51
48
  reset
52
49
  end
53
50
 
54
- uri = uri.dup
55
- uri.gsub!(/\A[\u0000-\u0020]*/, "")
56
- uri.gsub!(/[\u0000-\u0020]*\z/, "")
57
- uri.delete!("\t")
58
- uri.delete!("\n")
59
- uri.delete!("\r")
51
+ @uri = uri.dup
52
+ @uri.sub!(/\A[\u0000-\u0020]*/, "")
53
+ @uri.sub!(/[\u0000-\u0020]*\z/, "")
54
+ @uri.delete!("\t")
55
+ @uri.delete!("\n")
56
+ @uri.delete!("\r")
60
57
 
61
58
  raise ParseError, "uri can't be empty" if uri.empty? && @base.nil?
62
59
 
63
- @scanner = StringScanner.new(uri)
60
+ @pos = 0
64
61
 
65
- loop do
66
- c = @scanner.getch
67
- send("on_#{@state}", c)
68
-
69
- if @force_continue
70
- @force_continue = false
71
- next
72
- end
73
-
74
- break if c.nil? && @scanner.eos?
62
+ while @pos <= @uri.length
63
+ c = @uri[@pos]
64
+ send(@state, c)
65
+ @pos += 1
75
66
  end
76
67
 
77
68
  @parse_result[:userinfo] = "#{@username}:#{@password}" if !@username.nil? || !@password.nil?
78
- @parse_result[:path] = "/#{@paths.join("/")}" if !@paths.empty?
69
+ @parse_result[:path] = "/#{@paths.join("/")}" if @paths && !@paths.empty?
79
70
 
80
71
  @parse_result.values
81
72
  end
@@ -95,12 +86,11 @@ module URI
95
86
  private
96
87
 
97
88
  def reset
98
- @scanner = nil
99
89
  @buffer = +""
100
90
  @at_sign_seen = nil
101
91
  @password_token_seen = nil
102
92
  @inside_brackets = nil
103
- @paths = []
93
+ @paths = nil
104
94
  @username = nil
105
95
  @password = nil
106
96
  @parse_result = { scheme: nil, userinfo: nil, host: nil, port: nil, registry: nil, path: nil, opaque: nil, query: nil, fragment: nil }
@@ -108,21 +98,17 @@ module URI
108
98
  @state = :scheme_start_state
109
99
  end
110
100
 
111
- def on_scheme_start_state(c)
101
+ def scheme_start_state(c)
112
102
  if ascii_alpha?(c)
113
103
  @buffer += c.downcase
114
104
  @state = :scheme_state
115
105
  else
116
- if c.nil?
117
- @force_continue = true
118
- else
119
- @scanner.pos -= c.bytesize
120
- end
106
+ @pos -= 1
121
107
  @state = :no_scheme_state
122
108
  end
123
109
  end
124
110
 
125
- def on_scheme_state(c)
111
+ def scheme_state(c)
126
112
  if ascii_alphanumerica?(c) || ["+", "-", "."].include?(c)
127
113
  @buffer += c.downcase
128
114
  elsif c == ":"
@@ -135,58 +121,58 @@ module URI
135
121
  @state = :special_relative_or_authority_state
136
122
  elsif special_url?
137
123
  @state = :special_authority_slashes_state
138
- elsif @scanner.rest.start_with?("/")
124
+ elsif rest.start_with?("/")
139
125
  @state = :path_or_authority_state
140
- @scanner.pos += c.bytesize
126
+ @pos += 1
141
127
  else
142
- @parse_result[:path] = ""
128
+ @parse_result[:path] = nil
143
129
  @state = :opaque_path_state
144
130
  end
145
131
  else
146
132
  @buffer = +""
147
- decrease_pos(c)
133
+ @pos -= 1
148
134
  @state = :no_scheme_state
149
135
  end
150
136
  end
151
137
 
152
- def on_no_scheme_state(c)
138
+ def no_scheme_state(c)
153
139
  raise ParseError, "scheme is missing" if @base.nil? || !@base[:opaque].nil? && c != "#"
154
140
 
155
141
  if !@base[:opaque].nil? && c == "#"
156
142
  @parse_result[:scheme] = @base[:scheme]
157
143
  @parse_result[:path] = @base[:path]
158
144
  @parse_result[:query] = @base[:query]
159
- @parse_result[:fragment] = ""
145
+ @parse_result[:fragment] = nil
160
146
  @state = :fragment_state
161
147
  elsif @base[:scheme] != "file"
162
148
  @state = :relative_state
163
- decrease_pos(c)
149
+ @pos -= 1
164
150
  else
165
151
  @state = :file_state
166
- decrease_pos(c)
152
+ @pos -= 1
167
153
  end
168
154
  end
169
155
 
170
- def on_special_relative_or_authority_state(c)
171
- if c == "/" && @scanner.rest.start_with?("/")
156
+ def special_relative_or_authority_state(c)
157
+ if c == "/" && rest.start_with?("/")
172
158
  @state = :special_authority_ignore_slashes_state
173
- decrease_pos(c)
159
+ @pos -= 1
174
160
  else
175
161
  @state = :relative_state
176
- decrease_pos(c)
162
+ @pos -= 1
177
163
  end
178
164
  end
179
165
 
180
- def on_path_or_authority_state(c)
166
+ def path_or_authority_state(c)
181
167
  if c == "/"
182
168
  @state = :authority_state
183
169
  else
184
170
  @state = :path_state
185
- decrease_pos(c)
171
+ @pos -= 1
186
172
  end
187
173
  end
188
174
 
189
- def on_relative_state(c)
175
+ def relative_state(c)
190
176
  @parse_result[:scheme] = @base[:scheme]
191
177
  if c == "/"
192
178
  @state = :relative_slash_state
@@ -200,21 +186,21 @@ module URI
200
186
  @parse_result[:query] = @base[:query]
201
187
 
202
188
  if c == "?"
203
- @parse_result[:query] = ""
189
+ @parse_result[:query] = nil
204
190
  @state = :query_state
205
191
  elsif c == "#"
206
- @parse_result[:fragment] = ""
192
+ @parse_result[:fragment] = nil
207
193
  @state = :fragment_state
208
194
  elsif !c.nil?
209
195
  @parse_result[:query] = nil
210
196
  shorten_url_path
211
197
  @state = :path_state
212
- @scanner.pos -= c.bytesize
198
+ @pos -= 1
213
199
  end
214
200
  end
215
201
  end
216
202
 
217
- def on_relative_slash_state(c)
203
+ def relative_slash_state(c)
218
204
  if special_url? && (c == "/" || c == "\\")
219
205
  @state = :special_authority_ignore_slashes_state
220
206
  elsif c == "/"
@@ -224,28 +210,28 @@ module URI
224
210
  @parse_result[:host] = @base[:host]
225
211
  @parse_result[:port] = @base[:port]
226
212
  @state = :path_state
227
- decrease_pos(c)
213
+ @pos -= 1
228
214
  end
229
215
  end
230
216
 
231
- def on_special_authority_slashes_state(c)
232
- if c == "/" && @scanner.rest.start_with?("/")
217
+ def special_authority_slashes_state(c)
218
+ if c == "/" && rest.start_with?("/")
233
219
  @state = :special_authority_ignore_slashes_state
234
- @scanner.pos += c.bytesize
220
+ @pos += 1
235
221
  else
236
222
  @state = :special_authority_ignore_slashes_state
237
- decrease_pos(c)
223
+ @pos -= 1
238
224
  end
239
225
  end
240
226
 
241
- def on_special_authority_ignore_slashes_state(c)
227
+ def special_authority_ignore_slashes_state(c)
242
228
  if c != "/" && c != "\\"
243
229
  @state = :authority_state
244
- decrease_pos(c)
230
+ @pos -= 1
245
231
  end
246
232
  end
247
233
 
248
- def on_authority_state(c)
234
+ def authority_state(c)
249
235
  if c == "@"
250
236
  @buffer.prepend("%40") if @at_sign_seen
251
237
  @at_sign_seen = true
@@ -267,12 +253,8 @@ module URI
267
253
  @buffer = +""
268
254
  elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
269
255
  raise ParseError, "host is missing" if @at_sign_seen && @buffer.empty?
270
- if c.nil?
271
- @force_continue = true
272
- @scanner.pos -= @buffer.bytesize
273
- else
274
- @scanner.pos -= (@buffer.bytesize + c.bytesize.to_i)
275
- end
256
+
257
+ @pos -= (@buffer.size + 1)
276
258
  @buffer = +""
277
259
  @state = :host_state
278
260
  else
@@ -280,7 +262,7 @@ module URI
280
262
  end
281
263
  end
282
264
 
283
- def on_host_state(c)
265
+ def host_state(c)
284
266
  if c == ":" && !@inside_brackets
285
267
  raise ParseError, "host is missing" if @buffer.empty?
286
268
 
@@ -288,7 +270,7 @@ module URI
288
270
  @buffer = +""
289
271
  @state = :port_state
290
272
  elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
291
- decrease_pos(c)
273
+ @pos -= 1
292
274
  if special_url? && @buffer.empty?
293
275
  raise ParseError, "host is missing"
294
276
  else
@@ -303,7 +285,7 @@ module URI
303
285
  end
304
286
  end
305
287
 
306
- def on_port_state(c)
288
+ def port_state(c)
307
289
  if ascii_digit?(c)
308
290
  @buffer += c
309
291
  elsif c.nil? || ["/", "?", "#"].include?(c) || (special_url? && c == "\\")
@@ -320,15 +302,15 @@ module URI
320
302
  end
321
303
 
322
304
  @state = :path_start_state
323
- decrease_pos(c)
305
+ @pos -= 1
324
306
  else
325
307
  raise ParseError, "port is invalid value"
326
308
  end
327
309
  end
328
310
 
329
- def on_file_state(c)
311
+ def file_state(c)
330
312
  @parse_result[:scheme] = "file"
331
- @parse_result[:host] = ""
313
+ @parse_result[:host] = nil
332
314
 
333
315
  if c == "/" || c == "\\"
334
316
  @state = :file_slash_state
@@ -336,50 +318,50 @@ module URI
336
318
  @parse_result[:host] = @base[:host]
337
319
  @parse_result[:query] = @base[:query]
338
320
  if c == "?"
339
- @parse_result[:query] = ""
321
+ @parse_result[:query] = nil
340
322
  @state = :query_state
341
323
  elsif c == "#"
342
- @parse_result[:fragment] = ""
324
+ @parse_result[:fragment] = nil
343
325
  @state = :fragment_state
344
326
  elsif !c.nil?
345
327
  @parse_result[:query] = nil
346
- if !starts_with_windows_drive_letter?(@scanner.rest)
328
+ if !starts_with_windows_drive_letter?(rest)
347
329
  shorten_url_path
348
330
  else
349
- @paths = []
331
+ @paths = nil
350
332
  end
351
333
  @state = :path_state
352
- decrease_pos(c)
334
+ @pos -= 1
353
335
  end
354
336
  else
355
337
  @state = :path_state
356
- decrease_pos(c)
338
+ @pos -= 1
357
339
  end
358
340
  end
359
341
 
360
- def on_file_slash_state(c)
342
+ def file_slash_state(c)
361
343
  if c == "/" || c == "\\"
362
344
  @state = :file_host_state
363
345
  else
364
346
  if !@base.nil? && @base[:scheme] == "file"
365
347
  @parse_result[:host] = @base[:host]
366
- if !starts_with_windows_drive_letter?(@scanner.rest) && normalized_windows_drive_letter?(@base_paths[0])
348
+ if !starts_with_windows_drive_letter?(rest) && @base_paths && normalized_windows_drive_letter?(@base_paths[0])
367
349
  @paths[0] += @base_paths[0]
368
350
  end
369
351
  end
370
352
  @state = :path_state
371
- decrease_pos(c)
353
+ @pos -= 1
372
354
  end
373
355
  end
374
356
 
375
- def on_file_host_state(c)
357
+ def file_host_state(c)
376
358
  if c.nil? || c == "/" || c == "\\" || c == "?" || c == "#"
377
- @scanner.pos -= c.bytesize unless c.nil?
359
+ @pos -= 1
378
360
 
379
361
  if windows_drive_letter?(@buffer)
380
362
  @state = :path_state
381
363
  elsif @buffer.empty?
382
- @parse_result[:host] = ""
364
+ @parse_result[:host] = nil
383
365
  @state = :path_start_state
384
366
  else
385
367
  host = @host_parser.parse(@buffer, !special_url?)
@@ -395,23 +377,23 @@ module URI
395
377
  @buffer += c unless c.nil?
396
378
  end
397
379
 
398
- def on_path_start_state(c)
399
- return if c.nil?
400
-
380
+ def path_start_state(c)
401
381
  if special_url?
402
- @scanner.pos -= c.bytesize if c != "/" && c != "\\"
382
+ @pos -= 1 if c != "/" && c != "\\"
403
383
  @state = :path_state
404
384
  elsif c == "?"
405
385
  @state = :query_state
406
386
  elsif c == "#"
407
387
  @state = :fragment_state
408
388
  elsif c != nil
409
- @scanner.pos -= c.bytesize if c != "/"
389
+ @pos -= 1 if c != "/"
410
390
  @state = :path_state
411
391
  end
412
392
  end
413
393
 
414
- def on_path_state(c)
394
+ def path_state(c)
395
+ @paths ||= []
396
+
415
397
  if (c.nil? || c == "/") || (special_url? && c == "\/") || (c == "?" || c == "#")
416
398
 
417
399
  if double_dot_path_segments?(@buffer)
@@ -431,10 +413,10 @@ module URI
431
413
  @buffer = +""
432
414
 
433
415
  if c == "?"
434
- @parse_result[:query] = ""
416
+ @parse_result[:query] = nil
435
417
  @state = :query_state
436
418
  elsif c == "#"
437
- @parse_result[:frament] = ""
419
+ @parse_result[:frament] = nil
438
420
  @state = :fragment_state
439
421
  end
440
422
  else
@@ -442,15 +424,15 @@ module URI
442
424
  end
443
425
  end
444
426
 
445
- def on_opaque_path_state(c)
427
+ def opaque_path_state(c)
446
428
  if c == "?"
447
- @parse_result[:query] = ""
429
+ @parse_result[:query] = nil
448
430
  @state = :query_state
449
431
  elsif c == "#"
450
- @parse_result[:fragment] = ""
432
+ @parse_result[:fragment] = nil
451
433
  @state = :fragment_state
452
434
  elsif c == " "
453
- if @scanner.rest.start_with?("?") || @scanner.rest.start_with?("#")
435
+ if rest.start_with?("?") || rest.start_with?("#")
454
436
  @parse_result[:path] = @parse_result[:path].to_s + "%20"
455
437
  else
456
438
  @parse_result[:path] = @parse_result[:path].to_s + " "
@@ -460,7 +442,7 @@ module URI
460
442
  end
461
443
  end
462
444
 
463
- def on_query_state(c)
445
+ def query_state(c)
464
446
  if c.nil? || c == "#"
465
447
  query_percent_encode_set = special_url? ? SPECIAL_QUERY_PERCENT_ENCODE_SET : QUERY_PERCENT_ENCODE_SET
466
448
  @parse_result[:query] = @buffer.chars.map { |c| percent_encode(c, query_percent_encode_set) }.join
@@ -471,19 +453,11 @@ module URI
471
453
  end
472
454
  end
473
455
 
474
- def on_fragment_state(c)
456
+ def fragment_state(c)
475
457
  return if c.nil?
476
458
  @parse_result[:fragment] = @parse_result[:fragment].to_s + percent_encode(c, FRAGMENT_PERCENT_ENCODE_SET)
477
459
  end
478
460
 
479
- def c0_control_or_space?(c)
480
- c0_control? || c == " "
481
- end
482
-
483
- def c0_control?(c)
484
- C0_CONTROL.include?(c.ord)
485
- end
486
-
487
461
  def windows_drive_letter?(str)
488
462
  WINDOWS_DRIVE_LETTER.match?(str)
489
463
  end
@@ -497,7 +471,7 @@ module URI
497
471
  end
498
472
 
499
473
  def special_url?
500
- SPECIAL_SCHEME.keys.include?(@parse_result[:scheme])
474
+ SPECIAL_SCHEME.key?(@parse_result[:scheme])
501
475
  end
502
476
 
503
477
  def single_dot_path_segments?(c)
@@ -515,12 +489,8 @@ module URI
515
489
  @parse_result[:path]&.chomp!
516
490
  end
517
491
 
518
- def decrease_pos(c)
519
- if c.nil?
520
- @force_continue = true
521
- else
522
- @scanner.pos -= c.bytesize
523
- end
492
+ def rest
493
+ @uri[@pos+1..]
524
494
  end
525
495
  end
526
496
  end
metadata CHANGED
@@ -1,28 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uri-whatwg_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuji Yaginuma
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-05-12 00:00:00.000000000 Z
10
+ date: 2025-05-25 00:00:00.000000000 Z
11
11
  dependencies:
12
- - !ruby/object:Gem::Dependency
13
- name: strscan
14
- requirement: !ruby/object:Gem::Requirement
15
- requirements:
16
- - - ">="
17
- - !ruby/object:Gem::Version
18
- version: '0'
19
- type: :runtime
20
- prerelease: false
21
- version_requirements: !ruby/object:Gem::Requirement
22
- requirements:
23
- - - ">="
24
- - !ruby/object:Gem::Version
25
- version: '0'
26
12
  - !ruby/object:Gem::Dependency
27
13
  name: uri
28
14
  requirement: !ruby/object:Gem::Requirement
@@ -71,6 +57,7 @@ executables: []
71
57
  extensions: []
72
58
  extra_rdoc_files: []
73
59
  files:
60
+ - CHANGELOG.md
74
61
  - LICENSE.txt
75
62
  - README.md
76
63
  - Rakefile