parselly 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 117ef0c09557018d7129fa29c61565b90432dfeeec3a6b7b8bc9df6f06dcd06a
4
- data.tar.gz: edfa4d22bbc8ffe26e9b6118993b0af67cbb4578471f8b2c62a807dcac554e95
3
+ metadata.gz: e1e4e245059433130b385388d399fe29355a1c679a15327a02cb3b49e69ae23b
4
+ data.tar.gz: ceb1167e8a32c25543b96988f754f76eb01fe287fc279a6ceb8dd26ffc258ca9
5
5
  SHA512:
6
- metadata.gz: 72e2ffff39cc66e2fb68da3d8e69eba7e2435d380d6246f6deb2d2800e75ca64e459cd8e3a029a9f4c30f6548125fbcf7bbafd3ed2efd6e31766052de3aceebb
7
- data.tar.gz: bf902d1d2cfcc2f88c5824b8448a8354dfffac62dcdd36e4c33bcff84f2e32f72b9120f674a581d22cab0bcb12e559691a3c507236f67786b94eb34430e255d6
6
+ metadata.gz: 33dab2f628019bbd51d482c53ae267f98914a5a561beb146ce57b3625f30535b661de168b2c6150faa79f16af8a49a8c15e7d6047a118577359fdd9334249a7f
7
+ data.tar.gz: 38803e8cc427a8eaa0b2a63f9446cf91334a368a7ea86908da80eb62cca3569a7b44f4c9536165a57f99048cf604dd948814b12dff25ac49c43ab637ec344324
@@ -4,6 +4,12 @@ require 'strscan'
4
4
 
5
5
  module Parselly
6
6
  class Lexer
7
+ Identifier = Struct.new(:value, :raw) do
8
+ def to_s
9
+ value
10
+ end
11
+ end
12
+
7
13
  TOKENS = {
8
14
  # Combinators
9
15
  '>' => :CHILD,
@@ -31,6 +37,23 @@ module Parselly
31
37
  '*=' => :SUBSTRINGMATCH
32
38
  }.freeze
33
39
 
40
+ # Pre-compiled regular expressions for better performance
41
+ MULTI_CHAR_OPERATORS = [
42
+ [/~=/, :INCLUDES],
43
+ [/\|=/, :DASHMATCH],
44
+ [/\^=/, :PREFIXMATCH],
45
+ [/\$=/, :SUFFIXMATCH],
46
+ [/\*=/, :SUBSTRINGMATCH]
47
+ ].freeze
48
+
49
+ SINGLE_CHAR_OPERATOR_REGEX = /[>+~\[\]():,.#*=-]/.freeze
50
+ WHITESPACE_REGEX = /[ \t\n\r]+/.freeze
51
+ STRING_DOUBLE_REGEX = /"([^"\\]|\\.)*"/.freeze
52
+ STRING_SINGLE_REGEX = /'([^'\\]|\\.)*'/.freeze
53
+ IDENTIFIER_REGEX = /(?:--|-?[a-zA-Z_])(?:[\w-]|\\[^\n\r\f])*/.freeze
54
+ NUMBER_REGEX = /\d+(\.\d+)?/.freeze
55
+ ESCAPE_REGEX = /\\(.)/.freeze
56
+
34
57
  attr_reader :line, :column
35
58
 
36
59
  def initialize(input)
@@ -45,7 +68,7 @@ module Parselly
45
68
  skip_whitespace
46
69
  break if @scanner.eos?
47
70
 
48
- pos = { line: @line, column: @column }
71
+ pos = { line: @line, column: @column, offset: @scanner.pos }
49
72
 
50
73
  if (token = scan_string)
51
74
  @tokens << [:STRING, token, pos]
@@ -57,35 +80,40 @@ module Parselly
57
80
  @tokens << [:IDENT, token, pos]
58
81
  else
59
82
  char = @scanner.getch
60
- raise "Unexpected character: #{char} at #{pos[:line]}:#{pos[:column]}"
83
+ raise "Unexpected character: #{char} at #{pos[:line]}:#{pos[:column]} (offset #{pos[:offset]})"
61
84
  end
62
85
  end
63
86
 
64
- @tokens << [false, nil, { line: @line, column: @column }]
87
+ @tokens << [false, nil, { line: @line, column: @column, offset: @scanner.pos }]
65
88
  @tokens
66
89
  end
67
90
 
68
91
  private
69
92
 
70
93
  def skip_whitespace
71
- while @scanner.scan(/[ \t\n\r]+/)
72
- @scanner.matched.each_char do |char|
73
- update_position(char)
94
+ while @scanner.scan(WHITESPACE_REGEX)
95
+ matched = @scanner.matched
96
+ newline_count = matched.count("\n")
97
+ if newline_count > 0
98
+ @line += newline_count
99
+ @column = matched.size - matched.rindex("\n")
100
+ else
101
+ @column += matched.size
74
102
  end
75
103
  end
76
104
  end
77
105
 
78
106
  def scan_operator
79
107
  # Check multi-character operators first
80
- ['~=', '|=', '^=', '$=', '*='].each do |op|
81
- if @scanner.scan(/#{Regexp.escape(op)}/)
108
+ MULTI_CHAR_OPERATORS.each do |regex, token|
109
+ if @scanner.scan(regex)
82
110
  update_position(@scanner.matched)
83
- return TOKENS[op]
111
+ return token
84
112
  end
85
113
  end
86
114
 
87
115
  # Single character operators
88
- return unless @scanner.scan(/[>+~\[\]():,.#*=-]/)
116
+ return unless @scanner.scan(SINGLE_CHAR_OPERATOR_REGEX)
89
117
 
90
118
  char = @scanner.matched
91
119
  update_position(char)
@@ -99,11 +127,11 @@ module Parselly
99
127
  # as raw text for simplicity. Identifiers process escapes to support patterns
100
128
  # like .hover\:bg-blue-500, but strings in attributes don't require this.
101
129
  def scan_string
102
- if @scanner.scan(/"([^"\\]|\\.)*"/)
130
+ if @scanner.scan(STRING_DOUBLE_REGEX)
103
131
  str = @scanner.matched
104
132
  update_position(str)
105
133
  str[1..-2] # Remove quotes
106
- elsif @scanner.scan(/'([^'\\]|\\.)*'/)
134
+ elsif @scanner.scan(STRING_SINGLE_REGEX)
107
135
  str = @scanner.matched
108
136
  update_position(str)
109
137
  str[1..-2] # Remove quotes
@@ -118,16 +146,17 @@ module Parselly
118
146
  # While custom properties are technically only valid in property contexts (not selectors),
119
147
  # this parser accepts them as a superset of valid CSS for flexibility. In practice,
120
148
  # selectors like .--invalid-class would parse but aren't valid CSS selectors.
121
- return unless @scanner.scan(/(?:--|-?[a-zA-Z_])(?:[\w-]|\\[^\n\r\f])*/)
149
+ return unless @scanner.scan(IDENTIFIER_REGEX)
122
150
 
123
151
  ident = @scanner.matched
124
152
  update_position(ident)
125
153
  # Remove backslashes from escaped characters
126
- ident.gsub(/\\(.)/, '\1')
154
+ normalized = ident.gsub(ESCAPE_REGEX, '\1')
155
+ Identifier.new(normalized, ident)
127
156
  end
128
157
 
129
158
  def scan_number
130
- return unless @scanner.scan(/\d+(\.\d+)?/)
159
+ return unless @scanner.scan(NUMBER_REGEX)
131
160
 
132
161
  num = @scanner.matched
133
162
  update_position(num)
data/lib/parselly/node.rb CHANGED
@@ -8,7 +8,7 @@ module Parselly
8
8
  # child nodes, parent reference, and source position.
9
9
  #
10
10
  # @example Creating a simple AST node
11
- # node = Parselly::Node.new(:type_selector, 'div', { line: 1, column: 1 })
11
+ # node = Parselly::Node.new(:type_selector, 'div', { line: 1, column: 1, offset: 0 })
12
12
  # node.add_child(Parselly::Node.new(:class_selector, 'container'))
13
13
  #
14
14
  # @example Traversing the AST
@@ -16,19 +16,32 @@ module Parselly
16
16
  # node.descendants # Returns array of all descendant nodes
17
17
  # node.siblings # Returns array of sibling nodes
18
18
  class Node
19
- attr_accessor :type, :value, :children, :parent, :position
19
+ attr_accessor :type, :value, :raw_value, :children, :parent, :position
20
20
 
21
21
  # Creates a new AST node.
22
22
  #
23
23
  # @param type [Symbol] the type of the node (e.g., :type_selector, :class_selector)
24
24
  # @param value [String, nil] optional value associated with the node
25
- # @param position [Hash] source position with :line and :column keys
26
- def initialize(type, value = nil, position = {})
25
+ # @param position [Hash] source position with :line, :column, and :offset keys
26
+ # @param line [Integer, nil] optional line number (keyword alternative)
27
+ # @param column [Integer, nil] optional column number (keyword alternative)
28
+ # @param offset [Integer, nil] optional offset (keyword alternative)
29
+ def initialize(type, value = nil, position = {}, raw_value: nil, line: nil, column: nil, offset: nil)
27
30
  @type = type
28
31
  @value = value
32
+ @raw_value = raw_value.nil? ? value : raw_value
29
33
  @children = []
30
34
  @parent = nil
31
- @position = position
35
+ unless position.nil? || position.is_a?(Hash)
36
+ raise ArgumentError, 'position must be a Hash'
37
+ end
38
+
39
+ resolved_position = position ? position.dup : {}
40
+ resolved_position[:line] = line unless line.nil?
41
+ resolved_position[:column] = column unless column.nil?
42
+ resolved_position[:offset] = offset unless offset.nil?
43
+ @position = resolved_position
44
+ @descendants_cache = nil
32
45
  end
33
46
 
34
47
  # Adds a child node to this node.
@@ -40,9 +53,28 @@ module Parselly
40
53
 
41
54
  node.parent = self
42
55
  @children << node
56
+ invalidate_cache
43
57
  node
44
58
  end
45
59
 
60
+ # Replaces a child node at the specified index.
61
+ #
62
+ # @param index [Integer] the index of the child to replace
63
+ # @param new_node [Node] the new child node
64
+ # @return [Node, nil] the new node, or nil if invalid parameters
65
+ def replace_child(index, new_node)
66
+ return nil if new_node.nil?
67
+ return nil if index < 0 || index >= @children.size
68
+
69
+ old_node = @children[index]
70
+ old_node.parent = nil if old_node
71
+
72
+ @children[index] = new_node
73
+ new_node.parent = self
74
+ invalidate_cache
75
+ new_node
76
+ end
77
+
46
78
  # Returns an array of all ancestor nodes from parent to root.
47
79
  #
48
80
  # @return [Array<Node>] array of ancestor nodes
@@ -60,12 +92,41 @@ module Parselly
60
92
  #
61
93
  # @return [Array<Node>] array of all descendant nodes
62
94
  def descendants
63
- result = []
64
- @children.each do |child|
65
- result << child
66
- result.concat(child.descendants)
95
+ return @descendants_cache if @descendants_cache
96
+
97
+ @descendants_cache = []
98
+ queue = @children.dup
99
+ until queue.empty?
100
+ node = queue.shift
101
+ @descendants_cache << node
102
+ queue.concat(node.children) unless node.children.empty?
67
103
  end
68
- result
104
+ @descendants_cache
105
+ end
106
+
107
+ # Depth-first traversal of this node and its descendants.
108
+ #
109
+ # @return [Enumerator, Node] enumerator if no block, otherwise self
110
+ def each
111
+ return enum_for(:each) unless block_given?
112
+
113
+ stack = [self]
114
+ until stack.empty?
115
+ node = stack.pop
116
+ yield node
117
+ children = node.children
118
+ stack.concat(children.reverse) if children && !children.empty?
119
+ end
120
+
121
+ self
122
+ end
123
+
124
+ # Finds all nodes of a given type in this subtree.
125
+ #
126
+ # @param type [Symbol] the node type to match
127
+ # @return [Array<Node>] array of matching nodes
128
+ def find_all(type)
129
+ each.with_object([]) { |node, acc| acc << node if node.type == type }
69
130
  end
70
131
 
71
132
  # Returns an array of sibling nodes (excluding self).
@@ -150,7 +211,8 @@ module Parselly
150
211
  #
151
212
  # @return [Boolean] true if an ID selector is present
152
213
  def id?
153
- type == :id_selector || descendants.any? { |node| node.type == :id_selector }
214
+ return true if type == :id_selector
215
+ descendants.any? { |node| node.type == :id_selector }
154
216
  end
155
217
 
156
218
  # Extracts the ID value from this node or its descendants.
@@ -159,8 +221,10 @@ module Parselly
159
221
  def id
160
222
  return value if type == :id_selector
161
223
 
162
- id_node = descendants.find { |node| node.type == :id_selector }
163
- id_node&.value
224
+ descendants.each do |node|
225
+ return node.value if node.type == :id_selector
226
+ end
227
+ nil
164
228
  end
165
229
 
166
230
  # Extracts all class names from this node and its descendants.
@@ -179,7 +243,8 @@ module Parselly
179
243
  #
180
244
  # @return [Boolean] true if an attribute selector is present
181
245
  def attribute?
182
- type == :attribute_selector || descendants.any? { |node| node.type == :attribute_selector }
246
+ return true if type == :attribute_selector
247
+ descendants.any? { |node| node.type == :attribute_selector }
183
248
  end
184
249
 
185
250
  # Extracts all attribute selectors from this node and its descendants.
@@ -202,6 +267,24 @@ module Parselly
202
267
  result
203
268
  end
204
269
 
270
+ # Extracts detailed attribute selector nodes from this node and its descendants.
271
+ #
272
+ # @return [Array<Hash>] array of attribute selector detail hashes
273
+ # Each hash contains :name, :operator (optional), and :value (optional) keys
274
+ def attribute_selectors
275
+ result = []
276
+
277
+ if type == :attribute_selector
278
+ result << extract_attribute_node(self)
279
+ end
280
+
281
+ descendants.each do |node|
282
+ result << extract_attribute_node(node) if node.type == :attribute_selector
283
+ end
284
+
285
+ result
286
+ end
287
+
205
288
  # Extracts all pseudo-classes and pseudo-elements from this node and its descendants.
206
289
  #
207
290
  # @return [Array<String>] array of pseudo-class and pseudo-element names
@@ -243,11 +326,22 @@ module Parselly
243
326
  #
244
327
  # @return [Boolean] true if a type selector is present
245
328
  def type_selector?
246
- type == :type_selector || descendants.any? { |node| node.type == :type_selector }
329
+ return true if type == :type_selector
330
+ descendants.any? { |node| node.type == :type_selector }
247
331
  end
248
332
 
249
333
  private
250
334
 
335
+ # Invalidates the descendants cache for this node and all ancestors.
336
+ # This ensures that cached descendants are cleared when the tree structure changes.
337
+ def invalidate_cache
338
+ node = self
339
+ while node
340
+ node.instance_variable_set(:@descendants_cache, nil)
341
+ node = node.parent
342
+ end
343
+ end
344
+
251
345
  # Helper method to extract attribute information from an attribute_selector node.
252
346
  #
253
347
  # @param node [Node] an attribute_selector node
@@ -277,6 +371,36 @@ module Parselly
277
371
  info
278
372
  end
279
373
 
374
+ # Helper method to extract detailed attribute selector data.
375
+ #
376
+ # @param node [Node] an attribute_selector node
377
+ # @return [Hash] attribute selector detail hash
378
+ def extract_attribute_node(node)
379
+ info = {}
380
+
381
+ if node.value
382
+ info[:name] = node.value
383
+ info[:raw_name] = node.raw_value
384
+ return info
385
+ end
386
+
387
+ node.children.each do |child|
388
+ case child.type
389
+ when :attribute
390
+ info[:name] = child.value
391
+ info[:raw_name] = child.raw_value
392
+ when :equal_operator, :includes_operator, :dashmatch_operator,
393
+ :prefixmatch_operator, :suffixmatch_operator, :substringmatch_operator
394
+ info[:operator] = child.value
395
+ when :value
396
+ info[:value] = child.value
397
+ info[:raw_value] = child.raw_value
398
+ end
399
+ end
400
+
401
+ info
402
+ end
403
+
280
404
  # Helper method to build an attribute selector string.
281
405
  #
282
406
  # @return [String] the attribute selector string