parselly 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Parselly
4
- VERSION = '1.0.0'
4
+ VERSION = '1.2.0'
5
5
  end
data/lib/parselly.rb CHANGED
@@ -8,6 +8,21 @@ require_relative 'parselly/parser'
8
8
  require_relative 'parselly/version'
9
9
 
10
10
  module Parselly
11
+ ParseResult = Struct.new(:ast, :errors)
12
+
13
+ class ParseError < StandardError
14
+ attr_reader :error
15
+
16
+ def initialize(error)
17
+ @error = error
18
+ super(error[:message])
19
+ end
20
+ end
21
+
22
+ def parse(selector, tolerant: false)
23
+ Parser.new.parse(selector, tolerant: tolerant)
24
+ end
25
+
11
26
  def sanitize(selector)
12
27
  scanner = StringScanner.new(selector)
13
28
  result = +''
@@ -46,5 +61,5 @@ module Parselly
46
61
  "\\#{char.ord.to_s(16)} "
47
62
  end
48
63
 
49
- module_function :sanitize, :escaped_hex
64
+ module_function :parse, :sanitize, :escaped_hex
50
65
  end
data/parser.y CHANGED
@@ -82,7 +82,7 @@ rule
82
82
 
83
83
  type_selector
84
84
  : IDENT
85
- { result = Node.new(:type_selector, val[0], @current_position) }
85
+ { result = Node.new(:type_selector, identifier_value(val[0]), @current_position, raw_value: identifier_raw(val[0])) }
86
86
  | STAR
87
87
  { result = Node.new(:universal_selector, '*', @current_position) }
88
88
  ;
@@ -102,30 +102,30 @@ rule
102
102
 
103
103
  id_selector
104
104
  : HASH IDENT
105
- { result = Node.new(:id_selector, val[1], @current_position) }
105
+ { result = Node.new(:id_selector, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
106
106
  ;
107
107
 
108
108
  class_selector
109
109
  : DOT IDENT
110
- { result = Node.new(:class_selector, val[1], @current_position) }
110
+ { result = Node.new(:class_selector, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
111
111
  ;
112
112
 
113
113
  attribute_selector
114
114
  : LBRACKET IDENT RBRACKET
115
- { result = Node.new(:attribute_selector, val[1], @current_position) }
115
+ { result = Node.new(:attribute_selector, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
116
116
  | LBRACKET IDENT attr_matcher STRING RBRACKET
117
117
  {
118
118
  result = Node.new(:attribute_selector, nil, @current_position)
119
- result.add_child(Node.new(:attribute, val[1], @current_position))
119
+ result.add_child(Node.new(:attribute, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])))
120
120
  result.add_child(val[2])
121
121
  result.add_child(Node.new(:value, val[3], @current_position))
122
122
  }
123
123
  | LBRACKET IDENT attr_matcher IDENT RBRACKET
124
124
  {
125
125
  result = Node.new(:attribute_selector, nil, @current_position)
126
- result.add_child(Node.new(:attribute, val[1], @current_position))
126
+ result.add_child(Node.new(:attribute, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])))
127
127
  result.add_child(val[2])
128
- result.add_child(Node.new(:value, val[3], @current_position))
128
+ result.add_child(Node.new(:value, identifier_value(val[3]), @current_position, raw_value: identifier_raw(val[3])))
129
129
  }
130
130
  ;
131
131
 
@@ -146,18 +146,24 @@ rule
146
146
 
147
147
  pseudo_class_selector
148
148
  : COLON IDENT
149
- { result = Node.new(:pseudo_class, val[1], @current_position) }
149
+ { result = Node.new(:pseudo_class, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
150
150
  | COLON IDENT LPAREN any_value RPAREN
151
151
  {
152
- fn = Node.new(:pseudo_function, val[1], @current_position)
152
+ fn = Node.new(:pseudo_function, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1]))
153
153
  fn.add_child(val[3])
154
154
  result = fn
155
155
  }
156
+ | IDENT LPAREN any_value RPAREN
157
+ {
158
+ fn = Node.new(:pseudo_function, identifier_value(val[0]), @current_position, raw_value: identifier_raw(val[0]))
159
+ fn.add_child(val[2])
160
+ result = fn
161
+ }
156
162
  ;
157
163
 
158
164
  pseudo_element_selector
159
165
  : COLON COLON IDENT
160
- { result = Node.new(:pseudo_element, val[2], @current_position) }
166
+ { result = Node.new(:pseudo_element, identifier_value(val[2]), @current_position, raw_value: identifier_raw(val[2])) }
161
167
  ;
162
168
 
163
169
  any_value
@@ -258,34 +264,128 @@ rule
258
264
  end
259
265
 
260
266
  ---- header
267
+ require 'set'
268
+
269
+ # Pre-computed sets for faster lookup
270
+ CAN_END_COMPOUND = Set[:IDENT, :STAR, :RPAREN, :RBRACKET].freeze
271
+ CAN_START_COMPOUND = Set[:IDENT, :STAR, :DOT, :HASH, :LBRACKET, :COLON].freeze
272
+ TYPE_SELECTOR_TYPES = Set[:IDENT, :STAR].freeze
273
+ SUBCLASS_SELECTOR_TYPES = Set[:DOT, :HASH, :LBRACKET, :COLON].freeze
274
+ SUBCLASS_SELECTOR_END_TYPES = Set[:IDENT, :RBRACKET, :RPAREN].freeze
275
+ NTH_PSEUDO_NAMES = Set['nth-child', 'nth-last-child', 'nth-of-type', 'nth-last-of-type', 'nth-col', 'nth-last-col'].freeze
276
+ AN_PLUS_B_REGEX = /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)$/.freeze
261
277
 
262
278
  ---- inner
263
- def parse(input)
279
+ def parse(input, tolerant: false)
280
+ @tolerant = tolerant
281
+ @errors = []
282
+ @error_index = nil
283
+ @suppress_errors = false
264
284
  @lexer = Parselly::Lexer.new(input)
265
- @tokens = @lexer.tokenize
285
+ begin
286
+ @tokens = @lexer.tokenize
287
+ rescue RuntimeError => e
288
+ if tolerant
289
+ @errors << parse_error_from_exception(e)
290
+ return Parselly::ParseResult.new(nil, @errors)
291
+ end
292
+ raise
293
+ end
266
294
  preprocess_tokens!
267
295
  @index = 0
268
- @current_position = { line: 1, column: 1 }
296
+ @current_position = { line: 1, column: 1, offset: 0 }
297
+
298
+ if tolerant
299
+ ast = parse_with_recovery
300
+ normalize_an_plus_b(ast) if ast
301
+ return Parselly::ParseResult.new(ast, @errors)
302
+ end
303
+
269
304
  ast = do_parse
270
305
  normalize_an_plus_b(ast)
271
306
  ast
272
307
  end
273
308
 
309
+ def parse_with_recovery
310
+ do_parse
311
+ rescue Parselly::ParseError, RuntimeError
312
+ parse_partial_ast
313
+ end
314
+
315
+ def parse_partial_ast
316
+ return nil unless @tokens && !@tokens.empty?
317
+
318
+ eof_token = @tokens.last if @tokens.last && @tokens.last[0] == false
319
+ tokens = @tokens.dup
320
+ tokens.pop if eof_token
321
+ limit = @error_index || tokens.length
322
+
323
+ while limit > 0
324
+ truncated = tokens[0...limit]
325
+ truncated << eof_token if eof_token
326
+ begin
327
+ return parse_from_tokens(truncated, suppress_errors: true)
328
+ rescue Parselly::ParseError, RuntimeError
329
+ limit -= 1
330
+ end
331
+ end
332
+ nil
333
+ end
334
+
335
+ def parse_from_tokens(tokens, suppress_errors: false)
336
+ @tokens = tokens
337
+ @index = 0
338
+ @current_position = { line: 1, column: 1, offset: 0 }
339
+ @suppress_errors = suppress_errors
340
+ do_parse
341
+ ensure
342
+ @suppress_errors = false
343
+ end
344
+
345
+ def parse_error_from_exception(error)
346
+ line = nil
347
+ column = nil
348
+ offset = nil
349
+ if error.message =~ /at (\d+):(\d+)/
350
+ line = Regexp.last_match(1).to_i
351
+ column = Regexp.last_match(2).to_i
352
+ end
353
+ if error.message =~ /offset (\d+)/
354
+ offset = Regexp.last_match(1).to_i
355
+ end
356
+ { message: error.message, line: line, column: column, offset: offset }
357
+ end
358
+
359
+ def identifier_value(token)
360
+ token.respond_to?(:value) ? token.value : token
361
+ end
362
+
363
+ def identifier_raw(token)
364
+ token.respond_to?(:raw) ? token.raw : token
365
+ end
366
+
274
367
  def preprocess_tokens!
275
- new_tokens = []
276
- i = 0
277
- while i < @tokens.size
278
- token = @tokens[i]
279
- next_token = @tokens[i + 1]
280
- new_tokens << token
281
- if next_token && needs_descendant?(token, next_token)
282
- pos = { line: token[2][:line], column: token[2][:column] }
283
- new_tokens << [:DESCENDANT, ' ', pos]
368
+ return if @tokens.size <= 1
369
+
370
+ new_tokens = Array.new(@tokens.size + (@tokens.size / 2)) # Pre-allocate with conservative estimate
371
+ new_tokens_idx = 0
372
+
373
+ last_idx = @tokens.size - 1
374
+ @tokens.each_with_index do |token, i|
375
+ new_tokens[new_tokens_idx] = token
376
+ new_tokens_idx += 1
377
+
378
+ if i < last_idx
379
+ next_token = @tokens[i + 1]
380
+ if needs_descendant?(token, next_token)
381
+ pos = { line: token[2][:line], column: token[2][:column], offset: token[2][:offset] }
382
+ new_tokens[new_tokens_idx] = [:DESCENDANT, ' ', pos]
383
+ new_tokens_idx += 1
384
+ end
284
385
  end
285
- i += 1
286
386
  end
287
387
 
288
- @tokens = new_tokens
388
+ @tokens = new_tokens.first(new_tokens_idx)
289
389
  end
290
390
 
291
391
  # Insert DESCENDANT combinator if:
@@ -297,62 +397,42 @@ def needs_descendant?(current, next_tok)
297
397
  current_type = current[0]
298
398
  next_type = next_tok[0]
299
399
 
300
- can_end = can_end_compound?(current_type)
301
- can_start = can_start_compound?(next_type)
302
-
303
400
  # Type selector followed by subclass selector = same compound
304
- if [:IDENT, :STAR].include?(current_type) &&
305
- [:DOT, :HASH, :LBRACKET, :COLON].include?(next_type)
306
- return false
401
+ # Subclass selector followed by subclass selector = same compound
402
+ if SUBCLASS_SELECTOR_TYPES.include?(next_type)
403
+ return false if TYPE_SELECTOR_TYPES.include?(current_type) ||
404
+ SUBCLASS_SELECTOR_END_TYPES.include?(current_type)
307
405
  end
308
406
 
309
- can_end && can_start
310
- end
311
-
312
- def can_end_compound?(token_type)
313
- [:IDENT, :STAR, :RPAREN, :RBRACKET].include?(token_type)
314
- end
315
-
316
- def can_start_compound?(token_type)
317
- # Type selectors and subclass selectors can start a compound selector
318
- [:IDENT, :STAR, :DOT, :HASH, :LBRACKET, :COLON].include?(token_type)
407
+ CAN_END_COMPOUND.include?(current_type) && CAN_START_COMPOUND.include?(next_type)
319
408
  end
320
409
 
321
410
  def normalize_an_plus_b(node)
322
411
  return unless node.respond_to?(:children) && node.children
323
412
 
324
- if node.type == :pseudo_function && nth_pseudo?(node.value)
413
+ if node.type == :pseudo_function && NTH_PSEUDO_NAMES.include?(node.value)
325
414
  child = node.children.first
326
- if child && child.type == :selector_list
415
+ if child&.type == :selector_list
327
416
  an_plus_b_value = extract_an_plus_b_value(child)
328
417
  if an_plus_b_value
329
- node.children[0] = Node.new(:an_plus_b, an_plus_b_value, child.position)
418
+ node.replace_child(0, Node.new(:an_plus_b, an_plus_b_value, child.position))
330
419
  end
331
420
  end
332
421
  end
333
422
  node.children.compact.each { |child| normalize_an_plus_b(child) }
334
423
  end
335
424
 
336
- def nth_pseudo?(name)
337
- %w[nth-child nth-last-child nth-of-type nth-last-of-type nth-col nth-last-col].include?(name)
338
- end
339
-
340
425
  def extract_an_plus_b_value(selector_list_node)
341
426
  return nil unless selector_list_node.children.size == 1
342
427
 
343
428
  seq = selector_list_node.children.first
344
- return nil unless seq.type == :simple_selector_sequence
345
- return nil unless seq.children.size == 1
429
+ return nil unless seq.type == :simple_selector_sequence && seq.children.size == 1
346
430
 
347
431
  type_sel = seq.children.first
348
432
  return nil unless type_sel.type == :type_selector
349
433
 
350
434
  value = type_sel.value
351
- if value =~ /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)$/
352
- value
353
- else
354
- nil
355
- end
435
+ value if value =~ AN_PLUS_B_REGEX
356
436
  end
357
437
 
358
438
  def next_token
@@ -368,5 +448,16 @@ end
368
448
  def on_error(token_id, val, vstack)
369
449
  token_name = token_to_str(token_id) || '?'
370
450
  pos = @current_position || { line: '?', column: '?' }
371
- raise "Parse error: unexpected #{token_name} '#{val}' at #{pos[:line]}:#{pos[:column]}"
451
+ error = {
452
+ message: "Parse error: unexpected #{token_name} '#{val}' at #{pos[:line]}:#{pos[:column]}",
453
+ line: pos[:line],
454
+ column: pos[:column],
455
+ offset: pos[:offset]
456
+ }
457
+ if @tolerant
458
+ @errors << error unless @suppress_errors
459
+ @error_index ||= [@index - 1, 0].max
460
+ raise Parselly::ParseError, error
461
+ end
462
+ raise error[:message]
372
463
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parselly
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yudai Takada
@@ -51,7 +51,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
51
51
  - !ruby/object:Gem::Version
52
52
  version: '0'
53
53
  requirements: []
54
- rubygems_version: 3.6.9
54
+ rubygems_version: 4.0.4
55
55
  specification_version: 4
56
56
  summary: Pure Ruby CSS selector parser.
57
57
  test_files: []