parselly 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 117ef0c09557018d7129fa29c61565b90432dfeeec3a6b7b8bc9df6f06dcd06a
4
- data.tar.gz: edfa4d22bbc8ffe26e9b6118993b0af67cbb4578471f8b2c62a807dcac554e95
3
+ metadata.gz: c245172165bcac6e4b24a355b6e26a4039960fb8654374587523c3041015bb96
4
+ data.tar.gz: 1897eee14cb66e216422815883375837168e8850d17e87a98ca7a05873d18d58
5
5
  SHA512:
6
- metadata.gz: 72e2ffff39cc66e2fb68da3d8e69eba7e2435d380d6246f6deb2d2800e75ca64e459cd8e3a029a9f4c30f6548125fbcf7bbafd3ed2efd6e31766052de3aceebb
7
- data.tar.gz: bf902d1d2cfcc2f88c5824b8448a8354dfffac62dcdd36e4c33bcff84f2e32f72b9120f674a581d22cab0bcb12e559691a3c507236f67786b94eb34430e255d6
6
+ metadata.gz: 011ea12078d3311c28d00864167fa5cd7a5a9b1afd24feacccb4df2631b00e095aedf231793bd93bf7717b2bf99b7bfd28a6918d7b5d0f3e2794ac3d5f0faa6b
7
+ data.tar.gz: 261006f641a09ecea004423a68601b0a7c4941d35cf13ddd0acd74dfc530a1de19a2e4d5d370cef8a366271a2a713756ba6ce3f7fd1b1fe8fbe464e534a3549a
@@ -31,6 +31,23 @@ module Parselly
31
31
  '*=' => :SUBSTRINGMATCH
32
32
  }.freeze
33
33
 
34
+ # Pre-compiled regular expressions for better performance
35
+ MULTI_CHAR_OPERATORS = [
36
+ [/~=/, :INCLUDES],
37
+ [/\|=/, :DASHMATCH],
38
+ [/\^=/, :PREFIXMATCH],
39
+ [/\$=/, :SUFFIXMATCH],
40
+ [/\*=/, :SUBSTRINGMATCH]
41
+ ].freeze
42
+
43
+ SINGLE_CHAR_OPERATOR_REGEX = /[>+~\[\]():,.#*=-]/.freeze
44
+ WHITESPACE_REGEX = /[ \t\n\r]+/.freeze
45
+ STRING_DOUBLE_REGEX = /"([^"\\]|\\.)*"/.freeze
46
+ STRING_SINGLE_REGEX = /'([^'\\]|\\.)*'/.freeze
47
+ IDENTIFIER_REGEX = /(?:--|-?[a-zA-Z_])(?:[\w-]|\\[^\n\r\f])*/.freeze
48
+ NUMBER_REGEX = /\d+(\.\d+)?/.freeze
49
+ ESCAPE_REGEX = /\\(.)/.freeze
50
+
34
51
  attr_reader :line, :column
35
52
 
36
53
  def initialize(input)
@@ -68,24 +85,29 @@ module Parselly
68
85
  private
69
86
 
70
87
  def skip_whitespace
71
- while @scanner.scan(/[ \t\n\r]+/)
72
- @scanner.matched.each_char do |char|
73
- update_position(char)
88
+ while @scanner.scan(WHITESPACE_REGEX)
89
+ matched = @scanner.matched
90
+ newline_count = matched.count("\n")
91
+ if newline_count > 0
92
+ @line += newline_count
93
+ @column = matched.size - matched.rindex("\n")
94
+ else
95
+ @column += matched.size
74
96
  end
75
97
  end
76
98
  end
77
99
 
78
100
  def scan_operator
79
101
  # Check multi-character operators first
80
- ['~=', '|=', '^=', '$=', '*='].each do |op|
81
- if @scanner.scan(/#{Regexp.escape(op)}/)
102
+ MULTI_CHAR_OPERATORS.each do |regex, token|
103
+ if @scanner.scan(regex)
82
104
  update_position(@scanner.matched)
83
- return TOKENS[op]
105
+ return token
84
106
  end
85
107
  end
86
108
 
87
109
  # Single character operators
88
- return unless @scanner.scan(/[>+~\[\]():,.#*=-]/)
110
+ return unless @scanner.scan(SINGLE_CHAR_OPERATOR_REGEX)
89
111
 
90
112
  char = @scanner.matched
91
113
  update_position(char)
@@ -99,11 +121,11 @@ module Parselly
99
121
  # as raw text for simplicity. Identifiers process escapes to support patterns
100
122
  # like .hover\:bg-blue-500, but strings in attributes don't require this.
101
123
  def scan_string
102
- if @scanner.scan(/"([^"\\]|\\.)*"/)
124
+ if @scanner.scan(STRING_DOUBLE_REGEX)
103
125
  str = @scanner.matched
104
126
  update_position(str)
105
127
  str[1..-2] # Remove quotes
106
- elsif @scanner.scan(/'([^'\\]|\\.)*'/)
128
+ elsif @scanner.scan(STRING_SINGLE_REGEX)
107
129
  str = @scanner.matched
108
130
  update_position(str)
109
131
  str[1..-2] # Remove quotes
@@ -118,16 +140,16 @@ module Parselly
118
140
  # While custom properties are technically only valid in property contexts (not selectors),
119
141
  # this parser accepts them as a superset of valid CSS for flexibility. In practice,
120
142
  # selectors like .--invalid-class would parse but aren't valid CSS selectors.
121
- return unless @scanner.scan(/(?:--|-?[a-zA-Z_])(?:[\w-]|\\[^\n\r\f])*/)
143
+ return unless @scanner.scan(IDENTIFIER_REGEX)
122
144
 
123
145
  ident = @scanner.matched
124
146
  update_position(ident)
125
147
  # Remove backslashes from escaped characters
126
- ident.gsub(/\\(.)/, '\1')
148
+ ident.gsub(ESCAPE_REGEX, '\1')
127
149
  end
128
150
 
129
151
  def scan_number
130
- return unless @scanner.scan(/\d+(\.\d+)?/)
152
+ return unless @scanner.scan(NUMBER_REGEX)
131
153
 
132
154
  num = @scanner.matched
133
155
  update_position(num)
data/lib/parselly/node.rb CHANGED
@@ -29,6 +29,7 @@ module Parselly
29
29
  @children = []
30
30
  @parent = nil
31
31
  @position = position
32
+ @descendants_cache = nil
32
33
  end
33
34
 
34
35
  # Adds a child node to this node.
@@ -40,9 +41,28 @@ module Parselly
40
41
 
41
42
  node.parent = self
42
43
  @children << node
44
+ invalidate_cache
43
45
  node
44
46
  end
45
47
 
48
+ # Replaces a child node at the specified index.
49
+ #
50
+ # @param index [Integer] the index of the child to replace
51
+ # @param new_node [Node] the new child node
52
+ # @return [Node, nil] the new node, or nil if invalid parameters
53
+ def replace_child(index, new_node)
54
+ return nil if new_node.nil?
55
+ return nil if index < 0 || index >= @children.size
56
+
57
+ old_node = @children[index]
58
+ old_node.parent = nil if old_node
59
+
60
+ @children[index] = new_node
61
+ new_node.parent = self
62
+ invalidate_cache
63
+ new_node
64
+ end
65
+
46
66
  # Returns an array of all ancestor nodes from parent to root.
47
67
  #
48
68
  # @return [Array<Node>] array of ancestor nodes
@@ -60,12 +80,16 @@ module Parselly
60
80
  #
61
81
  # @return [Array<Node>] array of all descendant nodes
62
82
  def descendants
63
- result = []
64
- @children.each do |child|
65
- result << child
66
- result.concat(child.descendants)
83
+ return @descendants_cache if @descendants_cache
84
+
85
+ @descendants_cache = []
86
+ queue = @children.dup
87
+ until queue.empty?
88
+ node = queue.shift
89
+ @descendants_cache << node
90
+ queue.concat(node.children) unless node.children.empty?
67
91
  end
68
- result
92
+ @descendants_cache
69
93
  end
70
94
 
71
95
  # Returns an array of sibling nodes (excluding self).
@@ -150,7 +174,8 @@ module Parselly
150
174
  #
151
175
  # @return [Boolean] true if an ID selector is present
152
176
  def id?
153
- type == :id_selector || descendants.any? { |node| node.type == :id_selector }
177
+ return true if type == :id_selector
178
+ descendants.any? { |node| node.type == :id_selector }
154
179
  end
155
180
 
156
181
  # Extracts the ID value from this node or its descendants.
@@ -159,8 +184,10 @@ module Parselly
159
184
  def id
160
185
  return value if type == :id_selector
161
186
 
162
- id_node = descendants.find { |node| node.type == :id_selector }
163
- id_node&.value
187
+ descendants.each do |node|
188
+ return node.value if node.type == :id_selector
189
+ end
190
+ nil
164
191
  end
165
192
 
166
193
  # Extracts all class names from this node and its descendants.
@@ -179,7 +206,8 @@ module Parselly
179
206
  #
180
207
  # @return [Boolean] true if an attribute selector is present
181
208
  def attribute?
182
- type == :attribute_selector || descendants.any? { |node| node.type == :attribute_selector }
209
+ return true if type == :attribute_selector
210
+ descendants.any? { |node| node.type == :attribute_selector }
183
211
  end
184
212
 
185
213
  # Extracts all attribute selectors from this node and its descendants.
@@ -243,11 +271,22 @@ module Parselly
243
271
  #
244
272
  # @return [Boolean] true if a type selector is present
245
273
  def type_selector?
246
- type == :type_selector || descendants.any? { |node| node.type == :type_selector }
274
+ return true if type == :type_selector
275
+ descendants.any? { |node| node.type == :type_selector }
247
276
  end
248
277
 
249
278
  private
250
279
 
280
+ # Invalidates the descendants cache for this node and all ancestors.
281
+ # This ensures that cached descendants are cleared when the tree structure changes.
282
+ def invalidate_cache
283
+ node = self
284
+ while node
285
+ node.instance_variable_set(:@descendants_cache, nil)
286
+ node = node.parent
287
+ end
288
+ end
289
+
251
290
  # Helper method to extract attribute information from an attribute_selector node.
252
291
  #
253
292
  # @param node [Node] an attribute_selector node
@@ -653,11 +653,20 @@ end
653
653
  end
654
654
  ###### racc/parser.rb end
655
655
 
656
+ require 'set'
657
+
658
+ # Pre-computed sets for faster lookup
659
+ CAN_END_COMPOUND = Set[:IDENT, :STAR, :RPAREN, :RBRACKET].freeze
660
+ CAN_START_COMPOUND = Set[:IDENT, :STAR, :DOT, :HASH, :LBRACKET, :COLON].freeze
661
+ TYPE_SELECTOR_TYPES = Set[:IDENT, :STAR].freeze
662
+ SUBCLASS_SELECTOR_TYPES = Set[:DOT, :HASH, :LBRACKET, :COLON].freeze
663
+ NTH_PSEUDO_NAMES = Set['nth-child', 'nth-last-child', 'nth-of-type', 'nth-last-of-type', 'nth-col', 'nth-last-col'].freeze
664
+ AN_PLUS_B_REGEX = /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)$/.freeze
656
665
 
657
666
  module Parselly
658
667
  class Parser < Racc::Parser
659
668
 
660
- module_eval(<<'...end parser.y/module_eval...', 'parser.y', 263)
669
+ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 272)
661
670
  def parse(input)
662
671
  @lexer = Parselly::Lexer.new(input)
663
672
  @tokens = @lexer.tokenize
@@ -670,20 +679,27 @@ def parse(input)
670
679
  end
671
680
 
672
681
  def preprocess_tokens!
673
- new_tokens = []
674
- i = 0
675
- while i < @tokens.size
676
- token = @tokens[i]
677
- next_token = @tokens[i + 1]
678
- new_tokens << token
679
- if next_token && needs_descendant?(token, next_token)
680
- pos = { line: token[2][:line], column: token[2][:column] }
681
- new_tokens << [:DESCENDANT, ' ', pos]
682
+ return if @tokens.size <= 1
683
+
684
+ new_tokens = Array.new(@tokens.size + (@tokens.size / 2)) # Pre-allocate with conservative estimate
685
+ new_tokens_idx = 0
686
+
687
+ last_idx = @tokens.size - 1
688
+ @tokens.each_with_index do |token, i|
689
+ new_tokens[new_tokens_idx] = token
690
+ new_tokens_idx += 1
691
+
692
+ if i < last_idx
693
+ next_token = @tokens[i + 1]
694
+ if needs_descendant?(token, next_token)
695
+ pos = { line: token[2][:line], column: token[2][:column] }
696
+ new_tokens[new_tokens_idx] = [:DESCENDANT, ' ', pos]
697
+ new_tokens_idx += 1
698
+ end
682
699
  end
683
- i += 1
684
700
  end
685
701
 
686
- @tokens = new_tokens
702
+ @tokens = new_tokens.first(new_tokens_idx)
687
703
  end
688
704
 
689
705
  # Insert DESCENDANT combinator if:
@@ -695,62 +711,39 @@ def needs_descendant?(current, next_tok)
695
711
  current_type = current[0]
696
712
  next_type = next_tok[0]
697
713
 
698
- can_end = can_end_compound?(current_type)
699
- can_start = can_start_compound?(next_type)
700
-
701
714
  # Type selector followed by subclass selector = same compound
702
- if [:IDENT, :STAR].include?(current_type) &&
703
- [:DOT, :HASH, :LBRACKET, :COLON].include?(next_type)
704
- return false
705
- end
706
-
707
- can_end && can_start
708
- end
715
+ return false if TYPE_SELECTOR_TYPES.include?(current_type) &&
716
+ SUBCLASS_SELECTOR_TYPES.include?(next_type)
709
717
 
710
- def can_end_compound?(token_type)
711
- [:IDENT, :STAR, :RPAREN, :RBRACKET].include?(token_type)
712
- end
713
-
714
- def can_start_compound?(token_type)
715
- # Type selectors and subclass selectors can start a compound selector
716
- [:IDENT, :STAR, :DOT, :HASH, :LBRACKET, :COLON].include?(token_type)
718
+ CAN_END_COMPOUND.include?(current_type) && CAN_START_COMPOUND.include?(next_type)
717
719
  end
718
720
 
719
721
  def normalize_an_plus_b(node)
720
722
  return unless node.respond_to?(:children) && node.children
721
723
 
722
- if node.type == :pseudo_function && nth_pseudo?(node.value)
724
+ if node.type == :pseudo_function && NTH_PSEUDO_NAMES.include?(node.value)
723
725
  child = node.children.first
724
- if child && child.type == :selector_list
726
+ if child&.type == :selector_list
725
727
  an_plus_b_value = extract_an_plus_b_value(child)
726
728
  if an_plus_b_value
727
- node.children[0] = Node.new(:an_plus_b, an_plus_b_value, child.position)
729
+ node.replace_child(0, Node.new(:an_plus_b, an_plus_b_value, child.position))
728
730
  end
729
731
  end
730
732
  end
731
733
  node.children.compact.each { |child| normalize_an_plus_b(child) }
732
734
  end
733
735
 
734
- def nth_pseudo?(name)
735
- %w[nth-child nth-last-child nth-of-type nth-last-of-type nth-col nth-last-col].include?(name)
736
- end
737
-
738
736
  def extract_an_plus_b_value(selector_list_node)
739
737
  return nil unless selector_list_node.children.size == 1
740
738
 
741
739
  seq = selector_list_node.children.first
742
- return nil unless seq.type == :simple_selector_sequence
743
- return nil unless seq.children.size == 1
740
+ return nil unless seq.type == :simple_selector_sequence && seq.children.size == 1
744
741
 
745
742
  type_sel = seq.children.first
746
743
  return nil unless type_sel.type == :type_selector
747
744
 
748
745
  value = type_sel.value
749
- if value =~ /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)$/
750
- value
751
- else
752
- nil
753
- end
746
+ value if value =~ AN_PLUS_B_REGEX
754
747
  end
755
748
 
756
749
  def next_token
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Parselly
4
- VERSION = '1.0.0'
4
+ VERSION = '1.1.0'
5
5
  end
data/parser.y CHANGED
@@ -258,6 +258,15 @@ rule
258
258
  end
259
259
 
260
260
  ---- header
261
+ require 'set'
262
+
263
+ # Pre-computed sets for faster lookup
264
+ CAN_END_COMPOUND = Set[:IDENT, :STAR, :RPAREN, :RBRACKET].freeze
265
+ CAN_START_COMPOUND = Set[:IDENT, :STAR, :DOT, :HASH, :LBRACKET, :COLON].freeze
266
+ TYPE_SELECTOR_TYPES = Set[:IDENT, :STAR].freeze
267
+ SUBCLASS_SELECTOR_TYPES = Set[:DOT, :HASH, :LBRACKET, :COLON].freeze
268
+ NTH_PSEUDO_NAMES = Set['nth-child', 'nth-last-child', 'nth-of-type', 'nth-last-of-type', 'nth-col', 'nth-last-col'].freeze
269
+ AN_PLUS_B_REGEX = /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)$/.freeze
261
270
 
262
271
  ---- inner
263
272
  def parse(input)
@@ -272,20 +281,27 @@ def parse(input)
272
281
  end
273
282
 
274
283
  def preprocess_tokens!
275
- new_tokens = []
276
- i = 0
277
- while i < @tokens.size
278
- token = @tokens[i]
279
- next_token = @tokens[i + 1]
280
- new_tokens << token
281
- if next_token && needs_descendant?(token, next_token)
282
- pos = { line: token[2][:line], column: token[2][:column] }
283
- new_tokens << [:DESCENDANT, ' ', pos]
284
+ return if @tokens.size <= 1
285
+
286
+ new_tokens = Array.new(@tokens.size + (@tokens.size / 2)) # Pre-allocate with conservative estimate
287
+ new_tokens_idx = 0
288
+
289
+ last_idx = @tokens.size - 1
290
+ @tokens.each_with_index do |token, i|
291
+ new_tokens[new_tokens_idx] = token
292
+ new_tokens_idx += 1
293
+
294
+ if i < last_idx
295
+ next_token = @tokens[i + 1]
296
+ if needs_descendant?(token, next_token)
297
+ pos = { line: token[2][:line], column: token[2][:column] }
298
+ new_tokens[new_tokens_idx] = [:DESCENDANT, ' ', pos]
299
+ new_tokens_idx += 1
300
+ end
284
301
  end
285
- i += 1
286
302
  end
287
303
 
288
- @tokens = new_tokens
304
+ @tokens = new_tokens.first(new_tokens_idx)
289
305
  end
290
306
 
291
307
  # Insert DESCENDANT combinator if:
@@ -297,62 +313,39 @@ def needs_descendant?(current, next_tok)
297
313
  current_type = current[0]
298
314
  next_type = next_tok[0]
299
315
 
300
- can_end = can_end_compound?(current_type)
301
- can_start = can_start_compound?(next_type)
302
-
303
316
  # Type selector followed by subclass selector = same compound
304
- if [:IDENT, :STAR].include?(current_type) &&
305
- [:DOT, :HASH, :LBRACKET, :COLON].include?(next_type)
306
- return false
307
- end
308
-
309
- can_end && can_start
310
- end
317
+ return false if TYPE_SELECTOR_TYPES.include?(current_type) &&
318
+ SUBCLASS_SELECTOR_TYPES.include?(next_type)
311
319
 
312
- def can_end_compound?(token_type)
313
- [:IDENT, :STAR, :RPAREN, :RBRACKET].include?(token_type)
314
- end
315
-
316
- def can_start_compound?(token_type)
317
- # Type selectors and subclass selectors can start a compound selector
318
- [:IDENT, :STAR, :DOT, :HASH, :LBRACKET, :COLON].include?(token_type)
320
+ CAN_END_COMPOUND.include?(current_type) && CAN_START_COMPOUND.include?(next_type)
319
321
  end
320
322
 
321
323
  def normalize_an_plus_b(node)
322
324
  return unless node.respond_to?(:children) && node.children
323
325
 
324
- if node.type == :pseudo_function && nth_pseudo?(node.value)
326
+ if node.type == :pseudo_function && NTH_PSEUDO_NAMES.include?(node.value)
325
327
  child = node.children.first
326
- if child && child.type == :selector_list
328
+ if child&.type == :selector_list
327
329
  an_plus_b_value = extract_an_plus_b_value(child)
328
330
  if an_plus_b_value
329
- node.children[0] = Node.new(:an_plus_b, an_plus_b_value, child.position)
331
+ node.replace_child(0, Node.new(:an_plus_b, an_plus_b_value, child.position))
330
332
  end
331
333
  end
332
334
  end
333
335
  node.children.compact.each { |child| normalize_an_plus_b(child) }
334
336
  end
335
337
 
336
- def nth_pseudo?(name)
337
- %w[nth-child nth-last-child nth-of-type nth-last-of-type nth-col nth-last-col].include?(name)
338
- end
339
-
340
338
  def extract_an_plus_b_value(selector_list_node)
341
339
  return nil unless selector_list_node.children.size == 1
342
340
 
343
341
  seq = selector_list_node.children.first
344
- return nil unless seq.type == :simple_selector_sequence
345
- return nil unless seq.children.size == 1
342
+ return nil unless seq.type == :simple_selector_sequence && seq.children.size == 1
346
343
 
347
344
  type_sel = seq.children.first
348
345
  return nil unless type_sel.type == :type_selector
349
346
 
350
347
  value = type_sel.value
351
- if value =~ /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)$/
352
- value
353
- else
354
- nil
355
- end
348
+ value if value =~ AN_PLUS_B_REGEX
356
349
  end
357
350
 
358
351
  def next_token
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parselly
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yudai Takada