parselly 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/parser.y CHANGED
@@ -1,15 +1,15 @@
1
1
  class Parselly::Parser
2
2
  expect 0
3
3
  error_on_expect_mismatch
4
- token IDENT STRING NUMBER
4
+ token IDENT STRING BAD_STRING NUMBER OF
5
5
  HASH DOT STAR
6
6
  LBRACKET RBRACKET
7
7
  LPAREN RPAREN
8
8
  COLON COMMA
9
- CHILD ADJACENT SIBLING DESCENDANT
9
+ CHILD ADJACENT SIBLING DESCENDANT COLUMN
10
10
  EQUAL INCLUDES DASHMATCH
11
11
  PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH
12
- MINUS
12
+ MINUS PIPE
13
13
 
14
14
  # Precedence rules to resolve shift/reduce conflicts in an_plus_b grammar
15
15
  # These rules ensure that in patterns like "2n+1" or "n-3", the operators
@@ -24,7 +24,7 @@ rule
24
24
  selector_list
25
25
  : complex_selector (COMMA complex_selector)*
26
26
  {
27
- result = Node.new(:selector_list, nil, @current_position)
27
+ result = Node.new(:selector_list, nil, val[0].position)
28
28
  result.add_child(val[0])
29
29
  val[1].each { |pair| result.add_child(pair[1]) }
30
30
  }
@@ -50,13 +50,15 @@ rule
50
50
 
51
51
  combinator
52
52
  : CHILD
53
- { result = Node.new(:child_combinator, '>', @current_position) }
53
+ { result = Node.new(:child_combinator, '>', token_position(val[0])) }
54
54
  | ADJACENT
55
- { result = Node.new(:adjacent_combinator, '+', @current_position) }
55
+ { result = Node.new(:adjacent_combinator, '+', token_position(val[0])) }
56
56
  | SIBLING
57
- { result = Node.new(:sibling_combinator, '~', @current_position) }
57
+ { result = Node.new(:sibling_combinator, '~', token_position(val[0])) }
58
58
  | DESCENDANT
59
- { result = Node.new(:descendant_combinator, ' ', @current_position) }
59
+ { result = Node.new(:descendant_combinator, ' ', token_position(val[0])) }
60
+ | COLUMN
61
+ { result = Node.new(:column_combinator, '||', token_position(val[0])) }
60
62
  ;
61
63
 
62
64
  compound_selector
@@ -82,9 +84,69 @@ rule
82
84
 
83
85
  type_selector
84
86
  : IDENT
85
- { result = Node.new(:type_selector, identifier_value(val[0]), @current_position, raw_value: identifier_raw(val[0])) }
87
+ { result = Node.new(:type_selector, token_value(val[0]), token_position(val[0]), raw_value: token_raw(val[0])) }
86
88
  | STAR
87
- { result = Node.new(:universal_selector, '*', @current_position) }
89
+ { result = Node.new(:universal_selector, '*', token_position(val[0])) }
90
+ | IDENT PIPE IDENT
91
+ {
92
+ result = Node.new(
93
+ :type_selector,
94
+ token_value(val[2]),
95
+ token_position(val[0]),
96
+ raw_value: "#{token_raw(val[0])}|#{token_raw(val[2])}",
97
+ namespace: token_value(val[0])
98
+ )
99
+ }
100
+ | STAR PIPE IDENT
101
+ {
102
+ result = Node.new(
103
+ :type_selector,
104
+ token_value(val[2]),
105
+ token_position(val[0]),
106
+ raw_value: "*|#{token_raw(val[2])}",
107
+ namespace: '*'
108
+ )
109
+ }
110
+ | PIPE IDENT
111
+ {
112
+ result = Node.new(
113
+ :type_selector,
114
+ token_value(val[1]),
115
+ token_position(val[0]),
116
+ raw_value: "|#{token_raw(val[1])}",
117
+ namespace: ''
118
+ )
119
+ }
120
+ | IDENT PIPE STAR
121
+ {
122
+ result = Node.new(
123
+ :universal_selector,
124
+ '*',
125
+ token_position(val[0]),
126
+ raw_value: "#{token_raw(val[0])}|*",
127
+ namespace: token_value(val[0])
128
+ )
129
+ }
130
+ | STAR PIPE STAR
131
+ {
132
+ result = Node.new(
133
+ :universal_selector,
134
+ '*',
135
+ token_position(val[0]),
136
+ raw_value: '*|*',
137
+ namespace: '*'
138
+ )
139
+ }
140
+ | PIPE STAR
141
+ {
142
+ result = Node.new(
143
+ :universal_selector,
144
+ '*',
145
+ token_position(val[0]),
146
+ raw_value: '|*',
147
+ namespace: ''
148
+ )
149
+ }
88
150
  ;
89
151
 
90
152
  subclass_selector
@@ -102,149 +164,232 @@ rule
102
164
 
103
165
  id_selector
104
166
  : HASH IDENT
105
- { result = Node.new(:id_selector, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
167
+ { result = Node.new(:id_selector, token_value(val[1]), token_position(val[0]), raw_value: token_raw(val[1])) }
106
168
  ;
107
169
 
108
170
  class_selector
109
171
  : DOT IDENT
110
- { result = Node.new(:class_selector, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
172
+ { result = Node.new(:class_selector, token_value(val[1]), token_position(val[0]), raw_value: token_raw(val[1])) }
111
173
  ;
112
174
 
113
175
  attribute_selector
114
- : LBRACKET IDENT RBRACKET
115
- { result = Node.new(:attribute_selector, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
116
- | LBRACKET IDENT attr_matcher STRING RBRACKET
176
+ : LBRACKET attribute_name RBRACKET
117
177
  {
118
- result = Node.new(:attribute_selector, nil, @current_position)
119
- result.add_child(Node.new(:attribute, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])))
120
- result.add_child(val[2])
121
- result.add_child(Node.new(:value, val[3], @current_position))
178
+ result = Node.new(
179
+ :attribute_selector,
180
+ val[1].value,
181
+ token_position(val[0]),
182
+ raw_value: val[1].raw_value,
183
+ namespace: val[1].namespace
184
+ )
122
185
  }
123
- | LBRACKET IDENT attr_matcher IDENT RBRACKET
186
+ | LBRACKET attribute_name attr_matcher attribute_value attr_modifier RBRACKET
124
187
  {
125
- result = Node.new(:attribute_selector, nil, @current_position)
126
- result.add_child(Node.new(:attribute, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])))
188
+ result = Node.new(:attribute_selector, nil, token_position(val[0]), modifier: val[4])
189
+ result.add_child(val[1])
127
190
  result.add_child(val[2])
128
- result.add_child(Node.new(:value, identifier_value(val[3]), @current_position, raw_value: identifier_raw(val[3])))
191
+ result.add_child(val[3])
192
+ }
193
+ ;
194
+
195
+ attribute_name
196
+ : IDENT
197
+ {
198
+ result = Node.new(:attribute, token_value(val[0]), token_position(val[0]), raw_value: token_raw(val[0]))
199
+ }
200
+ | IDENT PIPE IDENT
201
+ {
202
+ result = Node.new(
203
+ :attribute,
204
+ token_value(val[2]),
205
+ token_position(val[0]),
206
+ raw_value: "#{token_raw(val[0])}|#{token_raw(val[2])}",
207
+ namespace: token_value(val[0])
208
+ )
209
+ }
210
+ | STAR PIPE IDENT
211
+ {
212
+ result = Node.new(
213
+ :attribute,
214
+ token_value(val[2]),
215
+ token_position(val[0]),
216
+ raw_value: "*|#{token_raw(val[2])}",
217
+ namespace: '*'
218
+ )
219
+ }
220
+ | PIPE IDENT
221
+ {
222
+ result = Node.new(
223
+ :attribute,
224
+ token_value(val[1]),
225
+ token_position(val[0]),
226
+ raw_value: "|#{token_raw(val[1])}",
227
+ namespace: ''
228
+ )
129
229
  }
130
230
  ;
131
231
 
132
232
  attr_matcher
133
233
  : EQUAL
134
- { result = Node.new(:equal_operator, '=', @current_position) }
234
+ { result = Node.new(:equal_operator, '=', token_position(val[0])) }
135
235
  | INCLUDES
136
- { result = Node.new(:includes_operator, '~=', @current_position) }
236
+ { result = Node.new(:includes_operator, '~=', token_position(val[0])) }
137
237
  | DASHMATCH
138
- { result = Node.new(:dashmatch_operator, '|=', @current_position) }
238
+ { result = Node.new(:dashmatch_operator, '|=', token_position(val[0])) }
139
239
  | PREFIXMATCH
140
- { result = Node.new(:prefixmatch_operator, '^=', @current_position) }
240
+ { result = Node.new(:prefixmatch_operator, '^=', token_position(val[0])) }
141
241
  | SUFFIXMATCH
142
- { result = Node.new(:suffixmatch_operator, '$=', @current_position) }
242
+ { result = Node.new(:suffixmatch_operator, '$=', token_position(val[0])) }
143
243
  | SUBSTRINGMATCH
144
- { result = Node.new(:substringmatch_operator, '*=', @current_position) }
244
+ { result = Node.new(:substringmatch_operator, '*=', token_position(val[0])) }
245
+ ;
246
+
247
+ attribute_value
248
+ : STRING
249
+ { result = Node.new(:value, token_value(val[0]), token_position(val[0]), raw_value: token_raw(val[0]), quote: token_quote(val[0])) }
250
+ | IDENT
251
+ { result = Node.new(:value, token_value(val[0]), token_position(val[0]), raw_value: token_raw(val[0])) }
252
+ | NUMBER
253
+ { result = Node.new(:value, token_value(val[0]), token_position(val[0]), raw_value: token_raw(val[0])) }
254
+ ;
255
+
256
+ attr_modifier
257
+ :
258
+ { result = nil }
259
+ | IDENT
260
+ { result = attribute_modifier_value(val[0]) }
145
261
  ;
146
262
 
147
263
  pseudo_class_selector
148
264
  : COLON IDENT
149
- { result = Node.new(:pseudo_class, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
150
- | COLON IDENT LPAREN any_value RPAREN
151
265
  {
152
- fn = Node.new(:pseudo_function, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1]))
153
- fn.add_child(val[3])
154
- result = fn
266
+ name = token_value(val[1])
267
+ node_type = LEGACY_PSEUDO_ELEMENT_NAMES.include?(pseudo_name(name)) ? :pseudo_element : :pseudo_class
268
+ result = Node.new(node_type, name, token_position(val[0]), raw_value: token_raw(val[1]), prefix: ':')
155
269
  }
156
- | IDENT LPAREN any_value RPAREN
270
+ | COLON IDENT LPAREN any_value RPAREN
157
271
  {
158
- fn = Node.new(:pseudo_function, identifier_value(val[0]), @current_position, raw_value: identifier_raw(val[0]))
159
- fn.add_child(val[2])
272
+ fn = Node.new(:pseudo_function, token_value(val[1]), token_position(val[0]), raw_value: token_raw(val[1]), prefix: ':')
273
+ fn.add_child(normalize_pseudo_argument(fn.value, val[3]))
160
274
  result = fn
161
275
  }
162
276
  ;
163
277
 
164
278
  pseudo_element_selector
165
279
  : COLON COLON IDENT
166
- { result = Node.new(:pseudo_element, identifier_value(val[2]), @current_position, raw_value: identifier_raw(val[2])) }
280
+ { result = Node.new(:pseudo_element, token_value(val[2]), token_position(val[0]), raw_value: token_raw(val[2]), prefix: '::') }
281
+ | COLON COLON IDENT LPAREN any_value RPAREN
282
+ {
283
+ fn = Node.new(:pseudo_element_function, token_value(val[2]), token_position(val[0]), raw_value: token_raw(val[2]), prefix: '::')
284
+ fn.add_child(val[4])
285
+ result = fn
286
+ }
167
287
  ;
168
288
 
169
289
  any_value
170
- : STRING
171
- { result = Node.new(:argument, val[0], @current_position) }
290
+ : nth_of_value
291
+ { result = val[0] }
292
+ | STRING
293
+ { result = Node.new(:argument, token_value(val[0]), token_position(val[0]), raw_value: token_raw(val[0]), quote: token_quote(val[0])) }
172
294
  | an_plus_b
173
295
  { result = val[0] }
174
296
  | relative_selector_list
175
297
  { result = val[0] }
176
298
  ;
177
299
 
300
+ nth_of_value
301
+ : nth_of_an_plus_b OF relative_selector_list
302
+ {
303
+ result = Node.new(:nth_selector_argument, nil, val[0].position)
304
+ result.add_child(val[0])
305
+ result.add_child(val[2])
306
+ }
307
+ ;
308
+
309
+ nth_of_an_plus_b
310
+ : an_plus_b
311
+ { result = val[0] }
312
+ | IDENT
313
+ {
314
+ value = token_value(val[0])
315
+ unless value =~ AN_PLUS_B_REGEX
316
+ raise Parselly::SyntaxError, parse_error("Parse error: invalid An+B value '#{value}'", token_position(val[0]))
317
+ end
318
+
319
+ result = Node.new(:an_plus_b, value, token_position(val[0]), raw_value: token_raw(val[0]))
320
+ }
321
+ ;
322
+
178
323
  an_plus_b
179
324
  # Positive coefficient cases
180
325
  : NUMBER IDENT ADJACENT NUMBER
181
326
  {
182
327
  # Handle 'An+B' like '2n+1'
183
- result = Node.new(:an_plus_b, "#{val[0]}#{val[1]}+#{val[3]}", @current_position)
328
+ result = Node.new(:an_plus_b, "#{token_value(val[0])}#{token_value(val[1])}+#{token_value(val[3])}", token_position(val[0]))
184
329
  }
185
330
  | NUMBER IDENT MINUS NUMBER
186
331
  {
187
332
  # Handle 'An-B' like '2n-1'
188
- result = Node.new(:an_plus_b, "#{val[0]}#{val[1]}-#{val[3]}", @current_position)
333
+ result = Node.new(:an_plus_b, "#{token_value(val[0])}#{token_value(val[1])}-#{token_value(val[3])}", token_position(val[0]))
189
334
  }
190
335
  | NUMBER IDENT
191
336
  {
192
337
  # Handle 'An' like '2n' or composite like '2n-1' (when '-1' is part of IDENT)
193
- result = Node.new(:an_plus_b, "#{val[0]}#{val[1]}", @current_position)
338
+ result = Node.new(:an_plus_b, "#{token_value(val[0])}#{token_value(val[1])}", token_position(val[0]))
194
339
  }
195
340
  | IDENT ADJACENT NUMBER
196
341
  {
197
342
  # Handle 'n+B' like 'n+5' or keywords followed by offset (rare but valid)
198
- result = Node.new(:an_plus_b, "#{val[0]}+#{val[2]}", @current_position)
343
+ result = Node.new(:an_plus_b, "#{token_value(val[0])}+#{token_value(val[2])}", token_position(val[0]))
199
344
  }
200
345
  | IDENT MINUS NUMBER
201
346
  {
202
347
  # Handle 'n-B' like 'n-3'
203
- result = Node.new(:an_plus_b, "#{val[0]}-#{val[2]}", @current_position)
348
+ result = Node.new(:an_plus_b, "#{token_value(val[0])}-#{token_value(val[2])}", token_position(val[0]))
204
349
  }
205
350
  # Negative coefficient cases
206
351
  | MINUS NUMBER IDENT ADJACENT NUMBER
207
352
  {
208
353
  # Handle '-An+B' like '-2n+1'
209
- result = Node.new(:an_plus_b, "-#{val[1]}#{val[2]}+#{val[4]}", @current_position)
354
+ result = Node.new(:an_plus_b, "-#{token_value(val[1])}#{token_value(val[2])}+#{token_value(val[4])}", token_position(val[0]))
210
355
  }
211
356
  | MINUS NUMBER IDENT MINUS NUMBER
212
357
  {
213
358
  # Handle '-An-B' like '-2n-1'
214
- result = Node.new(:an_plus_b, "-#{val[1]}#{val[2]}-#{val[4]}", @current_position)
359
+ result = Node.new(:an_plus_b, "-#{token_value(val[1])}#{token_value(val[2])}-#{token_value(val[4])}", token_position(val[0]))
215
360
  }
216
361
  | MINUS NUMBER IDENT
217
362
  {
218
363
  # Handle '-An' like '-2n' or composite like '-2n+1' (when '+1' is part of IDENT)
219
- result = Node.new(:an_plus_b, "-#{val[1]}#{val[2]}", @current_position)
364
+ result = Node.new(:an_plus_b, "-#{token_value(val[1])}#{token_value(val[2])}", token_position(val[0]))
220
365
  }
221
366
  | MINUS IDENT ADJACENT NUMBER
222
367
  {
223
368
  # Handle '-n+B' like '-n+3'
224
- result = Node.new(:an_plus_b, "-#{val[1]}+#{val[3]}", @current_position)
369
+ result = Node.new(:an_plus_b, "-#{token_value(val[1])}+#{token_value(val[3])}", token_position(val[0]))
225
370
  }
226
371
  | MINUS IDENT MINUS NUMBER
227
372
  {
228
373
  # Handle '-n-B' like '-n-2'
229
- result = Node.new(:an_plus_b, "-#{val[1]}-#{val[3]}", @current_position)
374
+ result = Node.new(:an_plus_b, "-#{token_value(val[1])}-#{token_value(val[3])}", token_position(val[0]))
230
375
  }
231
376
  | MINUS IDENT
232
377
  {
233
378
  # Handle '-n' or composite like '-n+3' (when '+3' is part of IDENT)
234
- result = Node.new(:an_plus_b, "-#{val[1]}", @current_position)
379
+ result = Node.new(:an_plus_b, "-#{token_value(val[1])}", token_position(val[0]))
235
380
  }
236
381
  # Simple cases
237
382
  | NUMBER
238
383
  {
239
384
  # Handle just a number like '3'
240
- result = Node.new(:an_plus_b, val[0].to_s, @current_position)
385
+ result = Node.new(:an_plus_b, token_value(val[0]).to_s, token_position(val[0]))
241
386
  }
242
387
  ;
243
388
 
244
389
  relative_selector_list
245
390
  : relative_selector (COMMA relative_selector)*
246
391
  {
247
- result = Node.new(:selector_list, nil, @current_position)
392
+ result = Node.new(:selector_list, nil, val[0].position)
248
393
  result.add_child(val[0])
249
394
  val[1].each { |pair| result.add_child(pair[1]) }
250
395
  }
@@ -267,49 +412,122 @@ end
267
412
  require 'set'
268
413
 
269
414
  # Pre-computed sets for faster lookup
270
- CAN_END_COMPOUND = Set[:IDENT, :STAR, :RPAREN, :RBRACKET].freeze
415
+ CAN_END_COMPOUND = Set[:IDENT, :STAR, :RPAREN, :RBRACKET, :NUMBER].freeze
271
416
  CAN_START_COMPOUND = Set[:IDENT, :STAR, :DOT, :HASH, :LBRACKET, :COLON].freeze
272
- TYPE_SELECTOR_TYPES = Set[:IDENT, :STAR].freeze
273
- SUBCLASS_SELECTOR_TYPES = Set[:DOT, :HASH, :LBRACKET, :COLON].freeze
274
- SUBCLASS_SELECTOR_END_TYPES = Set[:IDENT, :RBRACKET, :RPAREN].freeze
275
417
  NTH_PSEUDO_NAMES = Set['nth-child', 'nth-last-child', 'nth-of-type', 'nth-last-of-type', 'nth-col', 'nth-last-col'].freeze
276
- AN_PLUS_B_REGEX = /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)$/.freeze
418
+ AN_PLUS_B_REGEX = /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)$/i.freeze
419
+ SELECTOR_LIST_PSEUDO_NAMES = Set['is', 'where', 'not'].freeze
420
+ RELATIVE_SELECTOR_LIST_PSEUDO_NAMES = Set['has'].freeze
421
+ LEGACY_PSEUDO_ELEMENT_NAMES = Set['before', 'after', 'first-line', 'first-letter'].freeze
422
+ ATTRIBUTE_MODIFIERS = Set['i', 's'].freeze
277
423
 
278
424
  ---- inner
279
- def parse(input, tolerant: false)
425
+ def parse(input, tolerant: false, max_length: nil, max_tokens: nil, max_depth: nil, freeze: false)
280
426
  @tolerant = tolerant
281
427
  @errors = []
282
428
  @error_index = nil
283
429
  @suppress_errors = false
430
+ @max_depth = max_depth
431
+ @freeze_tree = freeze
432
+
433
+ unless input.is_a?(String)
434
+ error = parse_error('Input must be a String', { line: 1, column: 1, offset: 0 })
435
+ return Parselly::ParseResult.new(nil, [error]) if tolerant
436
+
437
+ raise Parselly::ParseError, error
438
+ end
439
+
440
+ if max_length && input.length > max_length
441
+ error = parse_error("Input exceeds max_length #{max_length}", { line: 1, column: 1, offset: 0 })
442
+ return Parselly::ParseResult.new(nil, [error]) if tolerant
443
+
444
+ raise Parselly::ParseError, error
445
+ end
446
+
284
447
  @lexer = Parselly::Lexer.new(input)
285
448
  begin
286
449
  @tokens = @lexer.tokenize
287
- rescue RuntimeError => e
450
+ rescue Parselly::ParseError, RuntimeError => e
288
451
  if tolerant
289
452
  @errors << parse_error_from_exception(e)
290
453
  return Parselly::ParseResult.new(nil, @errors)
291
454
  end
292
455
  raise
293
456
  end
457
+
458
+ if max_tokens && @tokens.size > max_tokens
459
+ error = parse_error("Input exceeds max_tokens #{max_tokens}", @tokens[max_tokens][2])
460
+ return Parselly::ParseResult.new(nil, [error]) if tolerant
461
+
462
+ raise Parselly::ParseError, error
463
+ end
464
+
294
465
  preprocess_tokens!
295
466
  @index = 0
296
467
  @current_position = { line: 1, column: 1, offset: 0 }
297
468
 
298
469
  if tolerant
299
470
  ast = parse_with_recovery
300
- normalize_an_plus_b(ast) if ast
471
+ ast = validate_or_recover_tolerant_ast(ast) if ast
472
+ ast.freeze_tree if ast && @freeze_tree
301
473
  return Parselly::ParseResult.new(ast, @errors)
302
474
  end
303
475
 
304
476
  ast = do_parse
305
- normalize_an_plus_b(ast)
477
+ finalize_ast(ast)
478
+ ast.freeze_tree if @freeze_tree
306
479
  ast
307
480
  end
308
481
 
309
482
  def parse_with_recovery
310
483
  do_parse
311
484
  rescue Parselly::ParseError, RuntimeError
312
- parse_partial_ast
485
+ parse_selector_list_recovery || parse_partial_ast
486
+ end
487
+
488
+ def validate_or_recover_tolerant_ast(ast)
489
+ finalize_ast(ast)
490
+ ast
491
+ rescue Parselly::ParseError => e
492
+ @errors << parse_error_from_exception(e)
493
+ parse_selector_list_recovery(validate: true) || ast
494
+ end
495
+
496
+ def parse_selector_list_recovery(validate: false)
497
+ return nil unless @tokens && @tokens.any? { |token| token[0] == :COMMA }
498
+
499
+ eof_token = @tokens.last if @tokens.last && @tokens.last[0] == false
500
+ body_tokens = eof_token ? @tokens[0...-1] : @tokens
501
+ segments = []
502
+ current = []
503
+
504
+ body_tokens.each do |token|
505
+ if token[0] == :COMMA
506
+ segments << current
507
+ current = []
508
+ else
509
+ current << token
510
+ end
511
+ end
512
+ segments << current
513
+
514
+ result = Node.new(:selector_list, nil, body_tokens.first&.[](2) || { line: 1, column: 1, offset: 0 })
515
+ recovered = false
516
+
517
+ segments.each do |segment|
518
+ next if segment.empty?
519
+
520
+ begin
521
+ parsed = parse_from_tokens(segment + [eof_token || [false, nil, segment.last[2]]], suppress_errors: true)
522
+ finalize_ast(parsed) if validate
523
+ result.add_child(parsed)
524
+ recovered = true
525
+ rescue Parselly::ParseError, RuntimeError
526
+ next
527
+ end
528
+ end
529
+
530
+ recovered ? result : nil
313
531
  end
314
532
 
315
533
  def parse_partial_ast
@@ -343,6 +561,8 @@ ensure
343
561
  end
344
562
 
345
563
  def parse_error_from_exception(error)
564
+ return error.error if error.respond_to?(:error)
565
+
346
566
  line = nil
347
567
  column = nil
348
568
  offset = nil
@@ -356,17 +576,61 @@ def parse_error_from_exception(error)
356
576
  { message: error.message, line: line, column: column, offset: offset }
357
577
  end
358
578
 
359
- def identifier_value(token)
579
+ def parse_error(message, position)
580
+ {
581
+ message: message,
582
+ line: position[:line],
583
+ column: position[:column],
584
+ offset: position[:offset]
585
+ }.tap do |error|
586
+ error[:end_line] = position[:end_line] if position.key?(:end_line)
587
+ error[:end_column] = position[:end_column] if position.key?(:end_column)
588
+ error[:end_offset] = position[:end_offset] if position.key?(:end_offset)
589
+ end
590
+ end
591
+
592
+ def token_value(token)
360
593
  token.respond_to?(:value) ? token.value : token
361
594
  end
362
595
 
363
- def identifier_raw(token)
364
- token.respond_to?(:raw) ? token.raw : token
596
+ def token_raw(token)
597
+ token.respond_to?(:raw) ? token.raw : token_value(token)
598
+ end
599
+
600
+ def token_position(token)
601
+ token.respond_to?(:position) && token.position ? token.position : @current_position
602
+ end
603
+
604
+ def token_quote(token)
605
+ token.respond_to?(:quote) ? token.quote : nil
606
+ end
607
+
608
+ def pseudo_name(name)
609
+ name.to_s.downcase
610
+ end
611
+
612
+ def attribute_modifier_value(token)
613
+ modifier = token_value(token).to_s
614
+ normalized_modifier = modifier.downcase
615
+ return normalized_modifier if ATTRIBUTE_MODIFIERS.include?(normalized_modifier)
616
+
617
+ raise_syntax_error("Parse error: invalid attribute modifier '#{modifier}'", token_position(token))
618
+ end
619
+
620
+ def raise_syntax_error(message, position)
621
+ error = parse_error(message, position)
622
+ if @tolerant
623
+ @errors << error unless @suppress_errors
624
+ @error_index ||= [@index - 1, 0].max
625
+ end
626
+ raise Parselly::SyntaxError, error
365
627
  end
366
628
 
367
629
  def preprocess_tokens!
368
630
  return if @tokens.size <= 1
369
631
 
632
+ mark_nth_of_tokens!
633
+
370
634
  new_tokens = Array.new(@tokens.size + (@tokens.size / 2)) # Pre-allocate with conservative estimate
371
635
  new_tokens_idx = 0
372
636
 
@@ -378,7 +642,7 @@ def preprocess_tokens!
378
642
  if i < last_idx
379
643
  next_token = @tokens[i + 1]
380
644
  if needs_descendant?(token, next_token)
381
- pos = { line: token[2][:line], column: token[2][:column], offset: token[2][:offset] }
645
+ pos = next_token[2]
382
646
  new_tokens[new_tokens_idx] = [:DESCENDANT, ' ', pos]
383
647
  new_tokens_idx += 1
384
648
  end
@@ -388,38 +652,129 @@ def preprocess_tokens!
388
652
  @tokens = new_tokens.first(new_tokens_idx)
389
653
  end
390
654
 
391
- # Insert DESCENDANT combinator if:
392
- # - Current token can end a compound selector
393
- # - Next token can start a compound selector
394
- # - EXCEPT when current is type_selector and next is subclass_selector
395
- # (they belong to the same compound selector)
655
+ def mark_nth_of_tokens!
656
+ paren_depth = 0
657
+ last_idx = @tokens.size - 1
658
+
659
+ @tokens.each_with_index do |token, index|
660
+ case token[0]
661
+ when :LPAREN
662
+ paren_depth += 1
663
+ when :RPAREN
664
+ paren_depth -= 1 if paren_depth.positive?
665
+ when :IDENT
666
+ next unless paren_depth.positive?
667
+ next unless token_value(token[1]) == 'of'
668
+ next if index.zero? || index >= last_idx
669
+
670
+ previous_token = @tokens[index - 1]
671
+ next_token = @tokens[index + 1]
672
+ if token_gap?(previous_token, token) && token_gap?(token, next_token) &&
673
+ CAN_START_COMPOUND.include?(next_token[0])
674
+ token[0] = :OF
675
+ end
676
+ end
677
+ end
678
+ end
679
+
680
+ # Insert DESCENDANT combinator only when actual ignored input
681
+ # (CSS whitespace or comments) separated two compound selector tokens.
396
682
  def needs_descendant?(current, next_tok)
397
683
  current_type = current[0]
398
684
  next_type = next_tok[0]
399
685
 
400
- # Type selector followed by subclass selector = same compound
401
- # Subclass selector followed by subclass selector = same compound
402
- if SUBCLASS_SELECTOR_TYPES.include?(next_type)
403
- return false if TYPE_SELECTOR_TYPES.include?(current_type) ||
404
- SUBCLASS_SELECTOR_END_TYPES.include?(current_type)
405
- end
686
+ CAN_END_COMPOUND.include?(current_type) &&
687
+ CAN_START_COMPOUND.include?(next_type) &&
688
+ token_gap?(current, next_tok)
689
+ end
406
690
 
407
- CAN_END_COMPOUND.include?(current_type) && CAN_START_COMPOUND.include?(next_type)
691
+ def token_gap?(current, next_tok)
692
+ current_end = current[2][:end_offset] || current[2][:offset]
693
+ next_tok[2][:offset] > current_end
408
694
  end
409
695
 
410
- def normalize_an_plus_b(node)
696
+ def finalize_ast(node)
697
+ validate_known_pseudo_functions!(node)
698
+ validate_max_depth!(node) if @max_depth
699
+ end
700
+
701
+ def validate_known_pseudo_functions!(node)
411
702
  return unless node.respond_to?(:children) && node.children
412
703
 
413
- if node.type == :pseudo_function && NTH_PSEUDO_NAMES.include?(node.value)
414
- child = node.children.first
415
- if child&.type == :selector_list
416
- an_plus_b_value = extract_an_plus_b_value(child)
417
- if an_plus_b_value
418
- node.replace_child(0, Node.new(:an_plus_b, an_plus_b_value, child.position))
419
- end
704
+ if node.type == :pseudo_function
705
+ name = pseudo_name(node.value)
706
+ validate_nth_pseudo!(node) if NTH_PSEUDO_NAMES.include?(name)
707
+ validate_selector_list_pseudo!(node) if SELECTOR_LIST_PSEUDO_NAMES.include?(name)
708
+ validate_relative_selector_list_pseudo!(node) if RELATIVE_SELECTOR_LIST_PSEUDO_NAMES.include?(name)
709
+ end
710
+
711
+ node.children.compact.each { |child| validate_known_pseudo_functions!(child) }
712
+ end
713
+
714
+ def validate_nth_pseudo!(node)
715
+ child = node.children.first
716
+ return if child&.type == :an_plus_b
717
+ return if child&.type == :nth_selector_argument
718
+
719
+ raise Parselly::SyntaxError, parse_error(
720
+ "Parse error: invalid argument for :#{node.value}()",
721
+ child&.position || node.position
722
+ )
723
+ end
724
+
725
+ def validate_selector_list_pseudo!(node)
726
+ child = node.children.first
727
+ return if child&.type == :selector_list && !relative_selector_list?(child)
728
+
729
+ raise Parselly::SyntaxError, parse_error(
730
+ "Parse error: invalid argument for :#{node.value}()",
731
+ child&.position || node.position
732
+ )
733
+ end
734
+
735
+ def validate_relative_selector_list_pseudo!(node)
736
+ child = node.children.first
737
+ return if child&.type == :selector_list
738
+
739
+ raise Parselly::SyntaxError, parse_error(
740
+ "Parse error: invalid argument for :#{node.value}()",
741
+ child&.position || node.position
742
+ )
743
+ end
744
+
745
+ def relative_selector_list?(node)
746
+ node.type == :selector_list &&
747
+ node.children.any? { |child| relative_selector?(child) }
748
+ end
749
+
750
+ def relative_selector?(node)
751
+ node.type == :selector && node.children.first &&
752
+ node.children.first.type.to_s.end_with?('_combinator')
753
+ end
754
+
755
+ def validate_max_depth!(node)
756
+ stack = [[node, 1]]
757
+
758
+ until stack.empty?
759
+ current, depth = stack.pop
760
+ if depth > @max_depth
761
+ raise Parselly::ParseError, parse_error(
762
+ "Input exceeds max_depth #{@max_depth}",
763
+ current.position
764
+ )
420
765
  end
766
+ current.children.each { |child| stack << [child, depth + 1] }
421
767
  end
422
- node.children.compact.each { |child| normalize_an_plus_b(child) }
768
+ end
769
+
770
+ def normalize_pseudo_argument(name, argument)
771
+ return argument unless NTH_PSEUDO_NAMES.include?(pseudo_name(name))
772
+ return argument unless argument&.type == :selector_list
773
+
774
+ an_plus_b_value = extract_an_plus_b_value(argument)
775
+ return argument unless an_plus_b_value
776
+
777
+ Node.new(:an_plus_b, an_plus_b_value, argument.position, raw_value: an_plus_b_value)
423
778
  end
424
779
 
425
780
  def extract_an_plus_b_value(selector_list_node)
@@ -442,22 +797,20 @@ def next_token
442
797
  @index += 1
443
798
  @current_position = token_position
444
799
 
445
- [token_type, token_value]
800
+ [token_type, parser_token_value(token_value, token_position)]
801
+ end
802
+
803
+ def parser_token_value(value, position)
804
+ if value.respond_to?(:position)
805
+ value.position ||= position if value.respond_to?(:position=)
806
+ return value
807
+ end
808
+
809
+ Parselly::Lexer::TokenValue.new(value: value, raw: value, position: position)
446
810
  end
447
811
 
448
812
  def on_error(token_id, val, vstack)
449
813
  token_name = token_to_str(token_id) || '?'
450
814
  pos = @current_position || { line: '?', column: '?' }
451
- error = {
452
- message: "Parse error: unexpected #{token_name} '#{val}' at #{pos[:line]}:#{pos[:column]}",
453
- line: pos[:line],
454
- column: pos[:column],
455
- offset: pos[:offset]
456
- }
457
- if @tolerant
458
- @errors << error unless @suppress_errors
459
- @error_index ||= [@index - 1, 0].max
460
- raise Parselly::ParseError, error
461
- end
462
- raise error[:message]
815
+ raise_syntax_error("Parse error: unexpected #{token_name} '#{token_value(val)}' at #{pos[:line]}:#{pos[:column]}", pos)
463
816
  end