parselly 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/parselly/lexer.rb +44 -15
- data/lib/parselly/node.rb +139 -15
- data/lib/parselly/parser.rb +271 -173
- data/lib/parselly/version.rb +1 -1
- data/lib/parselly.rb +16 -1
- data/parser.y +146 -55
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e1e4e245059433130b385388d399fe29355a1c679a15327a02cb3b49e69ae23b
|
|
4
|
+
data.tar.gz: ceb1167e8a32c25543b96988f754f76eb01fe287fc279a6ceb8dd26ffc258ca9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 33dab2f628019bbd51d482c53ae267f98914a5a561beb146ce57b3625f30535b661de168b2c6150faa79f16af8a49a8c15e7d6047a118577359fdd9334249a7f
|
|
7
|
+
data.tar.gz: 38803e8cc427a8eaa0b2a63f9446cf91334a368a7ea86908da80eb62cca3569a7b44f4c9536165a57f99048cf604dd948814b12dff25ac49c43ab637ec344324
|
data/lib/parselly/lexer.rb
CHANGED
|
@@ -4,6 +4,12 @@ require 'strscan'
|
|
|
4
4
|
|
|
5
5
|
module Parselly
|
|
6
6
|
class Lexer
|
|
7
|
+
Identifier = Struct.new(:value, :raw) do
|
|
8
|
+
def to_s
|
|
9
|
+
value
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
7
13
|
TOKENS = {
|
|
8
14
|
# Combinators
|
|
9
15
|
'>' => :CHILD,
|
|
@@ -31,6 +37,23 @@ module Parselly
|
|
|
31
37
|
'*=' => :SUBSTRINGMATCH
|
|
32
38
|
}.freeze
|
|
33
39
|
|
|
40
|
+
# Pre-compiled regular expressions for better performance
|
|
41
|
+
MULTI_CHAR_OPERATORS = [
|
|
42
|
+
[/~=/, :INCLUDES],
|
|
43
|
+
[/\|=/, :DASHMATCH],
|
|
44
|
+
[/\^=/, :PREFIXMATCH],
|
|
45
|
+
[/\$=/, :SUFFIXMATCH],
|
|
46
|
+
[/\*=/, :SUBSTRINGMATCH]
|
|
47
|
+
].freeze
|
|
48
|
+
|
|
49
|
+
SINGLE_CHAR_OPERATOR_REGEX = /[>+~\[\]():,.#*=-]/.freeze
|
|
50
|
+
WHITESPACE_REGEX = /[ \t\n\r]+/.freeze
|
|
51
|
+
STRING_DOUBLE_REGEX = /"([^"\\]|\\.)*"/.freeze
|
|
52
|
+
STRING_SINGLE_REGEX = /'([^'\\]|\\.)*'/.freeze
|
|
53
|
+
IDENTIFIER_REGEX = /(?:--|-?[a-zA-Z_])(?:[\w-]|\\[^\n\r\f])*/.freeze
|
|
54
|
+
NUMBER_REGEX = /\d+(\.\d+)?/.freeze
|
|
55
|
+
ESCAPE_REGEX = /\\(.)/.freeze
|
|
56
|
+
|
|
34
57
|
attr_reader :line, :column
|
|
35
58
|
|
|
36
59
|
def initialize(input)
|
|
@@ -45,7 +68,7 @@ module Parselly
|
|
|
45
68
|
skip_whitespace
|
|
46
69
|
break if @scanner.eos?
|
|
47
70
|
|
|
48
|
-
pos = { line: @line, column: @column }
|
|
71
|
+
pos = { line: @line, column: @column, offset: @scanner.pos }
|
|
49
72
|
|
|
50
73
|
if (token = scan_string)
|
|
51
74
|
@tokens << [:STRING, token, pos]
|
|
@@ -57,35 +80,40 @@ module Parselly
|
|
|
57
80
|
@tokens << [:IDENT, token, pos]
|
|
58
81
|
else
|
|
59
82
|
char = @scanner.getch
|
|
60
|
-
raise "Unexpected character: #{char} at #{pos[:line]}:#{pos[:column]}"
|
|
83
|
+
raise "Unexpected character: #{char} at #{pos[:line]}:#{pos[:column]} (offset #{pos[:offset]})"
|
|
61
84
|
end
|
|
62
85
|
end
|
|
63
86
|
|
|
64
|
-
@tokens << [false, nil, { line: @line, column: @column }]
|
|
87
|
+
@tokens << [false, nil, { line: @line, column: @column, offset: @scanner.pos }]
|
|
65
88
|
@tokens
|
|
66
89
|
end
|
|
67
90
|
|
|
68
91
|
private
|
|
69
92
|
|
|
70
93
|
def skip_whitespace
|
|
71
|
-
while @scanner.scan(
|
|
72
|
-
@scanner.matched
|
|
73
|
-
|
|
94
|
+
while @scanner.scan(WHITESPACE_REGEX)
|
|
95
|
+
matched = @scanner.matched
|
|
96
|
+
newline_count = matched.count("\n")
|
|
97
|
+
if newline_count > 0
|
|
98
|
+
@line += newline_count
|
|
99
|
+
@column = matched.size - matched.rindex("\n")
|
|
100
|
+
else
|
|
101
|
+
@column += matched.size
|
|
74
102
|
end
|
|
75
103
|
end
|
|
76
104
|
end
|
|
77
105
|
|
|
78
106
|
def scan_operator
|
|
79
107
|
# Check multi-character operators first
|
|
80
|
-
|
|
81
|
-
if @scanner.scan(
|
|
108
|
+
MULTI_CHAR_OPERATORS.each do |regex, token|
|
|
109
|
+
if @scanner.scan(regex)
|
|
82
110
|
update_position(@scanner.matched)
|
|
83
|
-
return
|
|
111
|
+
return token
|
|
84
112
|
end
|
|
85
113
|
end
|
|
86
114
|
|
|
87
115
|
# Single character operators
|
|
88
|
-
return unless @scanner.scan(
|
|
116
|
+
return unless @scanner.scan(SINGLE_CHAR_OPERATOR_REGEX)
|
|
89
117
|
|
|
90
118
|
char = @scanner.matched
|
|
91
119
|
update_position(char)
|
|
@@ -99,11 +127,11 @@ module Parselly
|
|
|
99
127
|
# as raw text for simplicity. Identifiers process escapes to support patterns
|
|
100
128
|
# like .hover\:bg-blue-500, but strings in attributes don't require this.
|
|
101
129
|
def scan_string
|
|
102
|
-
if @scanner.scan(
|
|
130
|
+
if @scanner.scan(STRING_DOUBLE_REGEX)
|
|
103
131
|
str = @scanner.matched
|
|
104
132
|
update_position(str)
|
|
105
133
|
str[1..-2] # Remove quotes
|
|
106
|
-
elsif @scanner.scan(
|
|
134
|
+
elsif @scanner.scan(STRING_SINGLE_REGEX)
|
|
107
135
|
str = @scanner.matched
|
|
108
136
|
update_position(str)
|
|
109
137
|
str[1..-2] # Remove quotes
|
|
@@ -118,16 +146,17 @@ module Parselly
|
|
|
118
146
|
# While custom properties are technically only valid in property contexts (not selectors),
|
|
119
147
|
# this parser accepts them as a superset of valid CSS for flexibility. In practice,
|
|
120
148
|
# selectors like .--invalid-class would parse but aren't valid CSS selectors.
|
|
121
|
-
return unless @scanner.scan(
|
|
149
|
+
return unless @scanner.scan(IDENTIFIER_REGEX)
|
|
122
150
|
|
|
123
151
|
ident = @scanner.matched
|
|
124
152
|
update_position(ident)
|
|
125
153
|
# Remove backslashes from escaped characters
|
|
126
|
-
ident.gsub(
|
|
154
|
+
normalized = ident.gsub(ESCAPE_REGEX, '\1')
|
|
155
|
+
Identifier.new(normalized, ident)
|
|
127
156
|
end
|
|
128
157
|
|
|
129
158
|
def scan_number
|
|
130
|
-
return unless @scanner.scan(
|
|
159
|
+
return unless @scanner.scan(NUMBER_REGEX)
|
|
131
160
|
|
|
132
161
|
num = @scanner.matched
|
|
133
162
|
update_position(num)
|
data/lib/parselly/node.rb
CHANGED
|
@@ -8,7 +8,7 @@ module Parselly
|
|
|
8
8
|
# child nodes, parent reference, and source position.
|
|
9
9
|
#
|
|
10
10
|
# @example Creating a simple AST node
|
|
11
|
-
# node = Parselly::Node.new(:type_selector, 'div', { line: 1, column: 1 })
|
|
11
|
+
# node = Parselly::Node.new(:type_selector, 'div', { line: 1, column: 1, offset: 0 })
|
|
12
12
|
# node.add_child(Parselly::Node.new(:class_selector, 'container'))
|
|
13
13
|
#
|
|
14
14
|
# @example Traversing the AST
|
|
@@ -16,19 +16,32 @@ module Parselly
|
|
|
16
16
|
# node.descendants # Returns array of all descendant nodes
|
|
17
17
|
# node.siblings # Returns array of sibling nodes
|
|
18
18
|
class Node
|
|
19
|
-
attr_accessor :type, :value, :children, :parent, :position
|
|
19
|
+
attr_accessor :type, :value, :raw_value, :children, :parent, :position
|
|
20
20
|
|
|
21
21
|
# Creates a new AST node.
|
|
22
22
|
#
|
|
23
23
|
# @param type [Symbol] the type of the node (e.g., :type_selector, :class_selector)
|
|
24
24
|
# @param value [String, nil] optional value associated with the node
|
|
25
|
-
# @param position [Hash] source position with :line and :
|
|
26
|
-
|
|
25
|
+
# @param position [Hash] source position with :line, :column, and :offset keys
|
|
26
|
+
# @param line [Integer, nil] optional line number (keyword alternative)
|
|
27
|
+
# @param column [Integer, nil] optional column number (keyword alternative)
|
|
28
|
+
# @param offset [Integer, nil] optional offset (keyword alternative)
|
|
29
|
+
def initialize(type, value = nil, position = {}, raw_value: nil, line: nil, column: nil, offset: nil)
|
|
27
30
|
@type = type
|
|
28
31
|
@value = value
|
|
32
|
+
@raw_value = raw_value.nil? ? value : raw_value
|
|
29
33
|
@children = []
|
|
30
34
|
@parent = nil
|
|
31
|
-
|
|
35
|
+
unless position.nil? || position.is_a?(Hash)
|
|
36
|
+
raise ArgumentError, 'position must be a Hash'
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
resolved_position = position ? position.dup : {}
|
|
40
|
+
resolved_position[:line] = line unless line.nil?
|
|
41
|
+
resolved_position[:column] = column unless column.nil?
|
|
42
|
+
resolved_position[:offset] = offset unless offset.nil?
|
|
43
|
+
@position = resolved_position
|
|
44
|
+
@descendants_cache = nil
|
|
32
45
|
end
|
|
33
46
|
|
|
34
47
|
# Adds a child node to this node.
|
|
@@ -40,9 +53,28 @@ module Parselly
|
|
|
40
53
|
|
|
41
54
|
node.parent = self
|
|
42
55
|
@children << node
|
|
56
|
+
invalidate_cache
|
|
43
57
|
node
|
|
44
58
|
end
|
|
45
59
|
|
|
60
|
+
# Replaces a child node at the specified index.
|
|
61
|
+
#
|
|
62
|
+
# @param index [Integer] the index of the child to replace
|
|
63
|
+
# @param new_node [Node] the new child node
|
|
64
|
+
# @return [Node, nil] the new node, or nil if invalid parameters
|
|
65
|
+
def replace_child(index, new_node)
|
|
66
|
+
return nil if new_node.nil?
|
|
67
|
+
return nil if index < 0 || index >= @children.size
|
|
68
|
+
|
|
69
|
+
old_node = @children[index]
|
|
70
|
+
old_node.parent = nil if old_node
|
|
71
|
+
|
|
72
|
+
@children[index] = new_node
|
|
73
|
+
new_node.parent = self
|
|
74
|
+
invalidate_cache
|
|
75
|
+
new_node
|
|
76
|
+
end
|
|
77
|
+
|
|
46
78
|
# Returns an array of all ancestor nodes from parent to root.
|
|
47
79
|
#
|
|
48
80
|
# @return [Array<Node>] array of ancestor nodes
|
|
@@ -60,12 +92,41 @@ module Parselly
|
|
|
60
92
|
#
|
|
61
93
|
# @return [Array<Node>] array of all descendant nodes
|
|
62
94
|
def descendants
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
95
|
+
return @descendants_cache if @descendants_cache
|
|
96
|
+
|
|
97
|
+
@descendants_cache = []
|
|
98
|
+
queue = @children.dup
|
|
99
|
+
until queue.empty?
|
|
100
|
+
node = queue.shift
|
|
101
|
+
@descendants_cache << node
|
|
102
|
+
queue.concat(node.children) unless node.children.empty?
|
|
67
103
|
end
|
|
68
|
-
|
|
104
|
+
@descendants_cache
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Depth-first traversal of this node and its descendants.
|
|
108
|
+
#
|
|
109
|
+
# @return [Enumerator, Node] enumerator if no block, otherwise self
|
|
110
|
+
def each
|
|
111
|
+
return enum_for(:each) unless block_given?
|
|
112
|
+
|
|
113
|
+
stack = [self]
|
|
114
|
+
until stack.empty?
|
|
115
|
+
node = stack.pop
|
|
116
|
+
yield node
|
|
117
|
+
children = node.children
|
|
118
|
+
stack.concat(children.reverse) if children && !children.empty?
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
self
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Finds all nodes of a given type in this subtree.
|
|
125
|
+
#
|
|
126
|
+
# @param type [Symbol] the node type to match
|
|
127
|
+
# @return [Array<Node>] array of matching nodes
|
|
128
|
+
def find_all(type)
|
|
129
|
+
each.with_object([]) { |node, acc| acc << node if node.type == type }
|
|
69
130
|
end
|
|
70
131
|
|
|
71
132
|
# Returns an array of sibling nodes (excluding self).
|
|
@@ -150,7 +211,8 @@ module Parselly
|
|
|
150
211
|
#
|
|
151
212
|
# @return [Boolean] true if an ID selector is present
|
|
152
213
|
def id?
|
|
153
|
-
|
|
214
|
+
return true if type == :id_selector
|
|
215
|
+
descendants.any? { |node| node.type == :id_selector }
|
|
154
216
|
end
|
|
155
217
|
|
|
156
218
|
# Extracts the ID value from this node or its descendants.
|
|
@@ -159,8 +221,10 @@ module Parselly
|
|
|
159
221
|
def id
|
|
160
222
|
return value if type == :id_selector
|
|
161
223
|
|
|
162
|
-
|
|
163
|
-
|
|
224
|
+
descendants.each do |node|
|
|
225
|
+
return node.value if node.type == :id_selector
|
|
226
|
+
end
|
|
227
|
+
nil
|
|
164
228
|
end
|
|
165
229
|
|
|
166
230
|
# Extracts all class names from this node and its descendants.
|
|
@@ -179,7 +243,8 @@ module Parselly
|
|
|
179
243
|
#
|
|
180
244
|
# @return [Boolean] true if an attribute selector is present
|
|
181
245
|
def attribute?
|
|
182
|
-
|
|
246
|
+
return true if type == :attribute_selector
|
|
247
|
+
descendants.any? { |node| node.type == :attribute_selector }
|
|
183
248
|
end
|
|
184
249
|
|
|
185
250
|
# Extracts all attribute selectors from this node and its descendants.
|
|
@@ -202,6 +267,24 @@ module Parselly
|
|
|
202
267
|
result
|
|
203
268
|
end
|
|
204
269
|
|
|
270
|
+
# Extracts detailed attribute selector nodes from this node and its descendants.
|
|
271
|
+
#
|
|
272
|
+
# @return [Array<Hash>] array of attribute selector detail hashes
|
|
273
|
+
# Each hash contains :name, :operator (optional), and :value (optional) keys
|
|
274
|
+
def attribute_selectors
|
|
275
|
+
result = []
|
|
276
|
+
|
|
277
|
+
if type == :attribute_selector
|
|
278
|
+
result << extract_attribute_node(self)
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
descendants.each do |node|
|
|
282
|
+
result << extract_attribute_node(node) if node.type == :attribute_selector
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
result
|
|
286
|
+
end
|
|
287
|
+
|
|
205
288
|
# Extracts all pseudo-classes and pseudo-elements from this node and its descendants.
|
|
206
289
|
#
|
|
207
290
|
# @return [Array<String>] array of pseudo-class and pseudo-element names
|
|
@@ -243,11 +326,22 @@ module Parselly
|
|
|
243
326
|
#
|
|
244
327
|
# @return [Boolean] true if a type selector is present
|
|
245
328
|
def type_selector?
|
|
246
|
-
|
|
329
|
+
return true if type == :type_selector
|
|
330
|
+
descendants.any? { |node| node.type == :type_selector }
|
|
247
331
|
end
|
|
248
332
|
|
|
249
333
|
private
|
|
250
334
|
|
|
335
|
+
# Invalidates the descendants cache for this node and all ancestors.
|
|
336
|
+
# This ensures that cached descendants are cleared when the tree structure changes.
|
|
337
|
+
def invalidate_cache
|
|
338
|
+
node = self
|
|
339
|
+
while node
|
|
340
|
+
node.instance_variable_set(:@descendants_cache, nil)
|
|
341
|
+
node = node.parent
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
|
|
251
345
|
# Helper method to extract attribute information from an attribute_selector node.
|
|
252
346
|
#
|
|
253
347
|
# @param node [Node] an attribute_selector node
|
|
@@ -277,6 +371,36 @@ module Parselly
|
|
|
277
371
|
info
|
|
278
372
|
end
|
|
279
373
|
|
|
374
|
+
# Helper method to extract detailed attribute selector data.
|
|
375
|
+
#
|
|
376
|
+
# @param node [Node] an attribute_selector node
|
|
377
|
+
# @return [Hash] attribute selector detail hash
|
|
378
|
+
def extract_attribute_node(node)
|
|
379
|
+
info = {}
|
|
380
|
+
|
|
381
|
+
if node.value
|
|
382
|
+
info[:name] = node.value
|
|
383
|
+
info[:raw_name] = node.raw_value
|
|
384
|
+
return info
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
node.children.each do |child|
|
|
388
|
+
case child.type
|
|
389
|
+
when :attribute
|
|
390
|
+
info[:name] = child.value
|
|
391
|
+
info[:raw_name] = child.raw_value
|
|
392
|
+
when :equal_operator, :includes_operator, :dashmatch_operator,
|
|
393
|
+
:prefixmatch_operator, :suffixmatch_operator, :substringmatch_operator
|
|
394
|
+
info[:operator] = child.value
|
|
395
|
+
when :value
|
|
396
|
+
info[:value] = child.value
|
|
397
|
+
info[:raw_value] = child.raw_value
|
|
398
|
+
end
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
info
|
|
402
|
+
end
|
|
403
|
+
|
|
280
404
|
# Helper method to build an attribute selector string.
|
|
281
405
|
#
|
|
282
406
|
# @return [String] the attribute selector string
|