parselly 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/parselly/lexer.rb +44 -15
- data/lib/parselly/node.rb +139 -15
- data/lib/parselly/parser.rb +271 -173
- data/lib/parselly/version.rb +1 -1
- data/lib/parselly.rb +16 -1
- data/parser.y +146 -55
- metadata +2 -2
data/lib/parselly/version.rb
CHANGED
data/lib/parselly.rb
CHANGED
|
@@ -8,6 +8,21 @@ require_relative 'parselly/parser'
|
|
|
8
8
|
require_relative 'parselly/version'
|
|
9
9
|
|
|
10
10
|
module Parselly
|
|
11
|
+
ParseResult = Struct.new(:ast, :errors)
|
|
12
|
+
|
|
13
|
+
class ParseError < StandardError
|
|
14
|
+
attr_reader :error
|
|
15
|
+
|
|
16
|
+
def initialize(error)
|
|
17
|
+
@error = error
|
|
18
|
+
super(error[:message])
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def parse(selector, tolerant: false)
|
|
23
|
+
Parser.new.parse(selector, tolerant: tolerant)
|
|
24
|
+
end
|
|
25
|
+
|
|
11
26
|
def sanitize(selector)
|
|
12
27
|
scanner = StringScanner.new(selector)
|
|
13
28
|
result = +''
|
|
@@ -46,5 +61,5 @@ module Parselly
|
|
|
46
61
|
"\\#{char.ord.to_s(16)} "
|
|
47
62
|
end
|
|
48
63
|
|
|
49
|
-
module_function :sanitize, :escaped_hex
|
|
64
|
+
module_function :parse, :sanitize, :escaped_hex
|
|
50
65
|
end
|
data/parser.y
CHANGED
|
@@ -82,7 +82,7 @@ rule
|
|
|
82
82
|
|
|
83
83
|
type_selector
|
|
84
84
|
: IDENT
|
|
85
|
-
{ result = Node.new(:type_selector, val[0], @current_position) }
|
|
85
|
+
{ result = Node.new(:type_selector, identifier_value(val[0]), @current_position, raw_value: identifier_raw(val[0])) }
|
|
86
86
|
| STAR
|
|
87
87
|
{ result = Node.new(:universal_selector, '*', @current_position) }
|
|
88
88
|
;
|
|
@@ -102,30 +102,30 @@ rule
|
|
|
102
102
|
|
|
103
103
|
id_selector
|
|
104
104
|
: HASH IDENT
|
|
105
|
-
{ result = Node.new(:id_selector, val[1], @current_position) }
|
|
105
|
+
{ result = Node.new(:id_selector, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
|
|
106
106
|
;
|
|
107
107
|
|
|
108
108
|
class_selector
|
|
109
109
|
: DOT IDENT
|
|
110
|
-
{ result = Node.new(:class_selector, val[1], @current_position) }
|
|
110
|
+
{ result = Node.new(:class_selector, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
|
|
111
111
|
;
|
|
112
112
|
|
|
113
113
|
attribute_selector
|
|
114
114
|
: LBRACKET IDENT RBRACKET
|
|
115
|
-
{ result = Node.new(:attribute_selector, val[1], @current_position) }
|
|
115
|
+
{ result = Node.new(:attribute_selector, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
|
|
116
116
|
| LBRACKET IDENT attr_matcher STRING RBRACKET
|
|
117
117
|
{
|
|
118
118
|
result = Node.new(:attribute_selector, nil, @current_position)
|
|
119
|
-
result.add_child(Node.new(:attribute, val[1], @current_position))
|
|
119
|
+
result.add_child(Node.new(:attribute, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])))
|
|
120
120
|
result.add_child(val[2])
|
|
121
121
|
result.add_child(Node.new(:value, val[3], @current_position))
|
|
122
122
|
}
|
|
123
123
|
| LBRACKET IDENT attr_matcher IDENT RBRACKET
|
|
124
124
|
{
|
|
125
125
|
result = Node.new(:attribute_selector, nil, @current_position)
|
|
126
|
-
result.add_child(Node.new(:attribute, val[1], @current_position))
|
|
126
|
+
result.add_child(Node.new(:attribute, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])))
|
|
127
127
|
result.add_child(val[2])
|
|
128
|
-
result.add_child(Node.new(:value, val[3], @current_position))
|
|
128
|
+
result.add_child(Node.new(:value, identifier_value(val[3]), @current_position, raw_value: identifier_raw(val[3])))
|
|
129
129
|
}
|
|
130
130
|
;
|
|
131
131
|
|
|
@@ -146,18 +146,24 @@ rule
|
|
|
146
146
|
|
|
147
147
|
pseudo_class_selector
|
|
148
148
|
: COLON IDENT
|
|
149
|
-
{ result = Node.new(:pseudo_class, val[1], @current_position) }
|
|
149
|
+
{ result = Node.new(:pseudo_class, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
|
|
150
150
|
| COLON IDENT LPAREN any_value RPAREN
|
|
151
151
|
{
|
|
152
|
-
fn = Node.new(:pseudo_function, val[1], @current_position)
|
|
152
|
+
fn = Node.new(:pseudo_function, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1]))
|
|
153
153
|
fn.add_child(val[3])
|
|
154
154
|
result = fn
|
|
155
155
|
}
|
|
156
|
+
| IDENT LPAREN any_value RPAREN
|
|
157
|
+
{
|
|
158
|
+
fn = Node.new(:pseudo_function, identifier_value(val[0]), @current_position, raw_value: identifier_raw(val[0]))
|
|
159
|
+
fn.add_child(val[2])
|
|
160
|
+
result = fn
|
|
161
|
+
}
|
|
156
162
|
;
|
|
157
163
|
|
|
158
164
|
pseudo_element_selector
|
|
159
165
|
: COLON COLON IDENT
|
|
160
|
-
{ result = Node.new(:pseudo_element, val[2], @current_position) }
|
|
166
|
+
{ result = Node.new(:pseudo_element, identifier_value(val[2]), @current_position, raw_value: identifier_raw(val[2])) }
|
|
161
167
|
;
|
|
162
168
|
|
|
163
169
|
any_value
|
|
@@ -258,34 +264,128 @@ rule
|
|
|
258
264
|
end
|
|
259
265
|
|
|
260
266
|
---- header
|
|
267
|
+
require 'set'
|
|
268
|
+
|
|
269
|
+
# Pre-computed sets for faster lookup
|
|
270
|
+
CAN_END_COMPOUND = Set[:IDENT, :STAR, :RPAREN, :RBRACKET].freeze
|
|
271
|
+
CAN_START_COMPOUND = Set[:IDENT, :STAR, :DOT, :HASH, :LBRACKET, :COLON].freeze
|
|
272
|
+
TYPE_SELECTOR_TYPES = Set[:IDENT, :STAR].freeze
|
|
273
|
+
SUBCLASS_SELECTOR_TYPES = Set[:DOT, :HASH, :LBRACKET, :COLON].freeze
|
|
274
|
+
SUBCLASS_SELECTOR_END_TYPES = Set[:IDENT, :RBRACKET, :RPAREN].freeze
|
|
275
|
+
NTH_PSEUDO_NAMES = Set['nth-child', 'nth-last-child', 'nth-of-type', 'nth-last-of-type', 'nth-col', 'nth-last-col'].freeze
|
|
276
|
+
AN_PLUS_B_REGEX = /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)$/.freeze
|
|
261
277
|
|
|
262
278
|
---- inner
|
|
263
|
-
def parse(input)
|
|
279
|
+
def parse(input, tolerant: false)
|
|
280
|
+
@tolerant = tolerant
|
|
281
|
+
@errors = []
|
|
282
|
+
@error_index = nil
|
|
283
|
+
@suppress_errors = false
|
|
264
284
|
@lexer = Parselly::Lexer.new(input)
|
|
265
|
-
|
|
285
|
+
begin
|
|
286
|
+
@tokens = @lexer.tokenize
|
|
287
|
+
rescue RuntimeError => e
|
|
288
|
+
if tolerant
|
|
289
|
+
@errors << parse_error_from_exception(e)
|
|
290
|
+
return Parselly::ParseResult.new(nil, @errors)
|
|
291
|
+
end
|
|
292
|
+
raise
|
|
293
|
+
end
|
|
266
294
|
preprocess_tokens!
|
|
267
295
|
@index = 0
|
|
268
|
-
@current_position = { line: 1, column: 1 }
|
|
296
|
+
@current_position = { line: 1, column: 1, offset: 0 }
|
|
297
|
+
|
|
298
|
+
if tolerant
|
|
299
|
+
ast = parse_with_recovery
|
|
300
|
+
normalize_an_plus_b(ast) if ast
|
|
301
|
+
return Parselly::ParseResult.new(ast, @errors)
|
|
302
|
+
end
|
|
303
|
+
|
|
269
304
|
ast = do_parse
|
|
270
305
|
normalize_an_plus_b(ast)
|
|
271
306
|
ast
|
|
272
307
|
end
|
|
273
308
|
|
|
309
|
+
def parse_with_recovery
|
|
310
|
+
do_parse
|
|
311
|
+
rescue Parselly::ParseError, RuntimeError
|
|
312
|
+
parse_partial_ast
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
def parse_partial_ast
|
|
316
|
+
return nil unless @tokens && !@tokens.empty?
|
|
317
|
+
|
|
318
|
+
eof_token = @tokens.last if @tokens.last && @tokens.last[0] == false
|
|
319
|
+
tokens = @tokens.dup
|
|
320
|
+
tokens.pop if eof_token
|
|
321
|
+
limit = @error_index || tokens.length
|
|
322
|
+
|
|
323
|
+
while limit > 0
|
|
324
|
+
truncated = tokens[0...limit]
|
|
325
|
+
truncated << eof_token if eof_token
|
|
326
|
+
begin
|
|
327
|
+
return parse_from_tokens(truncated, suppress_errors: true)
|
|
328
|
+
rescue Parselly::ParseError, RuntimeError
|
|
329
|
+
limit -= 1
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
nil
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def parse_from_tokens(tokens, suppress_errors: false)
|
|
336
|
+
@tokens = tokens
|
|
337
|
+
@index = 0
|
|
338
|
+
@current_position = { line: 1, column: 1, offset: 0 }
|
|
339
|
+
@suppress_errors = suppress_errors
|
|
340
|
+
do_parse
|
|
341
|
+
ensure
|
|
342
|
+
@suppress_errors = false
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def parse_error_from_exception(error)
|
|
346
|
+
line = nil
|
|
347
|
+
column = nil
|
|
348
|
+
offset = nil
|
|
349
|
+
if error.message =~ /at (\d+):(\d+)/
|
|
350
|
+
line = Regexp.last_match(1).to_i
|
|
351
|
+
column = Regexp.last_match(2).to_i
|
|
352
|
+
end
|
|
353
|
+
if error.message =~ /offset (\d+)/
|
|
354
|
+
offset = Regexp.last_match(1).to_i
|
|
355
|
+
end
|
|
356
|
+
{ message: error.message, line: line, column: column, offset: offset }
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
def identifier_value(token)
|
|
360
|
+
token.respond_to?(:value) ? token.value : token
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
def identifier_raw(token)
|
|
364
|
+
token.respond_to?(:raw) ? token.raw : token
|
|
365
|
+
end
|
|
366
|
+
|
|
274
367
|
def preprocess_tokens!
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
368
|
+
return if @tokens.size <= 1
|
|
369
|
+
|
|
370
|
+
new_tokens = Array.new(@tokens.size + (@tokens.size / 2)) # Pre-allocate with conservative estimate
|
|
371
|
+
new_tokens_idx = 0
|
|
372
|
+
|
|
373
|
+
last_idx = @tokens.size - 1
|
|
374
|
+
@tokens.each_with_index do |token, i|
|
|
375
|
+
new_tokens[new_tokens_idx] = token
|
|
376
|
+
new_tokens_idx += 1
|
|
377
|
+
|
|
378
|
+
if i < last_idx
|
|
379
|
+
next_token = @tokens[i + 1]
|
|
380
|
+
if needs_descendant?(token, next_token)
|
|
381
|
+
pos = { line: token[2][:line], column: token[2][:column], offset: token[2][:offset] }
|
|
382
|
+
new_tokens[new_tokens_idx] = [:DESCENDANT, ' ', pos]
|
|
383
|
+
new_tokens_idx += 1
|
|
384
|
+
end
|
|
284
385
|
end
|
|
285
|
-
i += 1
|
|
286
386
|
end
|
|
287
387
|
|
|
288
|
-
@tokens = new_tokens
|
|
388
|
+
@tokens = new_tokens.first(new_tokens_idx)
|
|
289
389
|
end
|
|
290
390
|
|
|
291
391
|
# Insert DESCENDANT combinator if:
|
|
@@ -297,62 +397,42 @@ def needs_descendant?(current, next_tok)
|
|
|
297
397
|
current_type = current[0]
|
|
298
398
|
next_type = next_tok[0]
|
|
299
399
|
|
|
300
|
-
can_end = can_end_compound?(current_type)
|
|
301
|
-
can_start = can_start_compound?(next_type)
|
|
302
|
-
|
|
303
400
|
# Type selector followed by subclass selector = same compound
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
return false
|
|
401
|
+
# Subclass selector followed by subclass selector = same compound
|
|
402
|
+
if SUBCLASS_SELECTOR_TYPES.include?(next_type)
|
|
403
|
+
return false if TYPE_SELECTOR_TYPES.include?(current_type) ||
|
|
404
|
+
SUBCLASS_SELECTOR_END_TYPES.include?(current_type)
|
|
307
405
|
end
|
|
308
406
|
|
|
309
|
-
|
|
310
|
-
end
|
|
311
|
-
|
|
312
|
-
def can_end_compound?(token_type)
|
|
313
|
-
[:IDENT, :STAR, :RPAREN, :RBRACKET].include?(token_type)
|
|
314
|
-
end
|
|
315
|
-
|
|
316
|
-
def can_start_compound?(token_type)
|
|
317
|
-
# Type selectors and subclass selectors can start a compound selector
|
|
318
|
-
[:IDENT, :STAR, :DOT, :HASH, :LBRACKET, :COLON].include?(token_type)
|
|
407
|
+
CAN_END_COMPOUND.include?(current_type) && CAN_START_COMPOUND.include?(next_type)
|
|
319
408
|
end
|
|
320
409
|
|
|
321
410
|
def normalize_an_plus_b(node)
|
|
322
411
|
return unless node.respond_to?(:children) && node.children
|
|
323
412
|
|
|
324
|
-
if node.type == :pseudo_function &&
|
|
413
|
+
if node.type == :pseudo_function && NTH_PSEUDO_NAMES.include?(node.value)
|
|
325
414
|
child = node.children.first
|
|
326
|
-
if child
|
|
415
|
+
if child&.type == :selector_list
|
|
327
416
|
an_plus_b_value = extract_an_plus_b_value(child)
|
|
328
417
|
if an_plus_b_value
|
|
329
|
-
node.
|
|
418
|
+
node.replace_child(0, Node.new(:an_plus_b, an_plus_b_value, child.position))
|
|
330
419
|
end
|
|
331
420
|
end
|
|
332
421
|
end
|
|
333
422
|
node.children.compact.each { |child| normalize_an_plus_b(child) }
|
|
334
423
|
end
|
|
335
424
|
|
|
336
|
-
def nth_pseudo?(name)
|
|
337
|
-
%w[nth-child nth-last-child nth-of-type nth-last-of-type nth-col nth-last-col].include?(name)
|
|
338
|
-
end
|
|
339
|
-
|
|
340
425
|
def extract_an_plus_b_value(selector_list_node)
|
|
341
426
|
return nil unless selector_list_node.children.size == 1
|
|
342
427
|
|
|
343
428
|
seq = selector_list_node.children.first
|
|
344
|
-
return nil unless seq.type == :simple_selector_sequence
|
|
345
|
-
return nil unless seq.children.size == 1
|
|
429
|
+
return nil unless seq.type == :simple_selector_sequence && seq.children.size == 1
|
|
346
430
|
|
|
347
431
|
type_sel = seq.children.first
|
|
348
432
|
return nil unless type_sel.type == :type_selector
|
|
349
433
|
|
|
350
434
|
value = type_sel.value
|
|
351
|
-
if value =~
|
|
352
|
-
value
|
|
353
|
-
else
|
|
354
|
-
nil
|
|
355
|
-
end
|
|
435
|
+
value if value =~ AN_PLUS_B_REGEX
|
|
356
436
|
end
|
|
357
437
|
|
|
358
438
|
def next_token
|
|
@@ -368,5 +448,16 @@ end
|
|
|
368
448
|
def on_error(token_id, val, vstack)
|
|
369
449
|
token_name = token_to_str(token_id) || '?'
|
|
370
450
|
pos = @current_position || { line: '?', column: '?' }
|
|
371
|
-
|
|
451
|
+
error = {
|
|
452
|
+
message: "Parse error: unexpected #{token_name} '#{val}' at #{pos[:line]}:#{pos[:column]}",
|
|
453
|
+
line: pos[:line],
|
|
454
|
+
column: pos[:column],
|
|
455
|
+
offset: pos[:offset]
|
|
456
|
+
}
|
|
457
|
+
if @tolerant
|
|
458
|
+
@errors << error unless @suppress_errors
|
|
459
|
+
@error_index ||= [@index - 1, 0].max
|
|
460
|
+
raise Parselly::ParseError, error
|
|
461
|
+
end
|
|
462
|
+
raise error[:message]
|
|
372
463
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: parselly
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Yudai Takada
|
|
@@ -51,7 +51,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
51
51
|
- !ruby/object:Gem::Version
|
|
52
52
|
version: '0'
|
|
53
53
|
requirements: []
|
|
54
|
-
rubygems_version:
|
|
54
|
+
rubygems_version: 4.0.4
|
|
55
55
|
specification_version: 4
|
|
56
56
|
summary: Pure Ruby CSS selector parser.
|
|
57
57
|
test_files: []
|