parselly 0.1.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4c687b7a170537d06d5a7fa1bbcc374c0826b73c167917cf42a44edd7eea9928
4
- data.tar.gz: 2dcce5b4d335f893ad3a30587e264c1c0dd6d0177d2f1a4790b23096b8f0a257
3
+ metadata.gz: c245172165bcac6e4b24a355b6e26a4039960fb8654374587523c3041015bb96
4
+ data.tar.gz: 1897eee14cb66e216422815883375837168e8850d17e87a98ca7a05873d18d58
5
5
  SHA512:
6
- metadata.gz: 12982469258ee3f3ce04343948d6a605cbcdb91c87e1af25b3dd2233c1cb29512e89716dcf765c6680909096e78f23a40041ff5b25d753c058b2e34afac82fed
7
- data.tar.gz: 7cc9778bd541fc4a4c160d407ce3ee85399efadd7532220053b6e5700e9ae962e95657856cb1ba6c4a12cb955a931446b9dd98918e8c4d39455f0ce2b0b0af8c
6
+ metadata.gz: 011ea12078d3311c28d00864167fa5cd7a5a9b1afd24feacccb4df2631b00e095aedf231793bd93bf7717b2bf99b7bfd28a6918d7b5d0f3e2794ac3d5f0faa6b
7
+ data.tar.gz: 261006f641a09ecea004423a68601b0a7c4941d35cf13ddd0acd74dfc530a1de19a2e4d5d370cef8a366271a2a713756ba6ce3f7fd1b1fe8fbe464e534a3549a
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --require spec_helper
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Parselly
1
+ # Parselly [![Gem Version](https://badge.fury.io/rb/parselly.svg)](https://badge.fury.io/rb/parselly) [![CI](https://github.com/ydah/parselly/actions/workflows/test.yml/badge.svg)](https://github.com/ydah/parselly/actions/workflows/test.yml)
2
2
 
3
3
  Parselly is a module providing a simple way to parse and extract data from a css selector.
4
4
 
data/Rakefile CHANGED
@@ -1,11 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "bundler/gem_tasks"
4
- require "rake/testtask"
3
+ require 'bundler/gem_tasks'
5
4
 
6
- Rake::TestTask.new(:test) do |t|
7
- t.libs << "test/lib"
8
- t.test_files = FileList["test/**/test_*.rb"]
5
+ namespace 'build' do
6
+ desc 'build parser from parser.y'
7
+ task :parser do
8
+ sh 'bundle exec racc parser.y --embedded --frozen -o lib/parselly/parser.rb -t --log-file=parser.output'
9
+ end
9
10
  end
10
11
 
11
- task default: :test
12
+ require 'rspec/core/rake_task'
13
+ RSpec::Core::RakeTask.new(:spec) do |spec|
14
+ spec.pattern = FileList['spec/**/*_spec.rb']
15
+ end
16
+ task spec: 'build:parser'
17
+
18
+ task default: :spec
@@ -0,0 +1,170 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'strscan'
4
+
5
+ module Parselly
6
+ class Lexer
7
+ TOKENS = {
8
+ # Combinators
9
+ '>' => :CHILD,
10
+ '+' => :ADJACENT,
11
+ '~' => :SIBLING,
12
+
13
+ # Delimiters
14
+ '[' => :LBRACKET,
15
+ ']' => :RBRACKET,
16
+ '(' => :LPAREN,
17
+ ')' => :RPAREN,
18
+ ':' => :COLON,
19
+ ',' => :COMMA,
20
+ '.' => :DOT,
21
+ '#' => :HASH,
22
+ '*' => :STAR,
23
+ '=' => :EQUAL,
24
+ '-' => :MINUS,
25
+
26
+ # Attribute operators
27
+ '~=' => :INCLUDES,
28
+ '|=' => :DASHMATCH,
29
+ '^=' => :PREFIXMATCH,
30
+ '$=' => :SUFFIXMATCH,
31
+ '*=' => :SUBSTRINGMATCH
32
+ }.freeze
33
+
34
+ # Pre-compiled regular expressions for better performance
35
+ MULTI_CHAR_OPERATORS = [
36
+ [/~=/, :INCLUDES],
37
+ [/\|=/, :DASHMATCH],
38
+ [/\^=/, :PREFIXMATCH],
39
+ [/\$=/, :SUFFIXMATCH],
40
+ [/\*=/, :SUBSTRINGMATCH]
41
+ ].freeze
42
+
43
+ SINGLE_CHAR_OPERATOR_REGEX = /[>+~\[\]():,.#*=-]/.freeze
44
+ WHITESPACE_REGEX = /[ \t\n\r]+/.freeze
45
+ STRING_DOUBLE_REGEX = /"([^"\\]|\\.)*"/.freeze
46
+ STRING_SINGLE_REGEX = /'([^'\\]|\\.)*'/.freeze
47
+ IDENTIFIER_REGEX = /(?:--|-?[a-zA-Z_])(?:[\w-]|\\[^\n\r\f])*/.freeze
48
+ NUMBER_REGEX = /\d+(\.\d+)?/.freeze
49
+ ESCAPE_REGEX = /\\(.)/.freeze
50
+
51
+ attr_reader :line, :column
52
+
53
+ def initialize(input)
54
+ @scanner = StringScanner.new(input)
55
+ @line = 1
56
+ @column = 1
57
+ @tokens = []
58
+ end
59
+
60
+ def tokenize
61
+ until @scanner.eos?
62
+ skip_whitespace
63
+ break if @scanner.eos?
64
+
65
+ pos = { line: @line, column: @column }
66
+
67
+ if (token = scan_string)
68
+ @tokens << [:STRING, token, pos]
69
+ elsif (token = scan_number)
70
+ @tokens << [:NUMBER, token, pos]
71
+ elsif (token = scan_operator)
72
+ @tokens << [token, @scanner.matched, pos]
73
+ elsif (token = scan_identifier)
74
+ @tokens << [:IDENT, token, pos]
75
+ else
76
+ char = @scanner.getch
77
+ raise "Unexpected character: #{char} at #{pos[:line]}:#{pos[:column]}"
78
+ end
79
+ end
80
+
81
+ @tokens << [false, nil, { line: @line, column: @column }]
82
+ @tokens
83
+ end
84
+
85
+ private
86
+
87
+ def skip_whitespace
88
+ while @scanner.scan(WHITESPACE_REGEX)
89
+ matched = @scanner.matched
90
+ newline_count = matched.count("\n")
91
+ if newline_count > 0
92
+ @line += newline_count
93
+ @column = matched.size - matched.rindex("\n")
94
+ else
95
+ @column += matched.size
96
+ end
97
+ end
98
+ end
99
+
100
+ def scan_operator
101
+ # Check multi-character operators first
102
+ MULTI_CHAR_OPERATORS.each do |regex, token|
103
+ if @scanner.scan(regex)
104
+ update_position(@scanner.matched)
105
+ return token
106
+ end
107
+ end
108
+
109
+ # Single character operators
110
+ return unless @scanner.scan(SINGLE_CHAR_OPERATOR_REGEX)
111
+
112
+ char = @scanner.matched
113
+ update_position(char)
114
+ TOKENS[char]
115
+ end
116
+
117
+ # NOTE: Unlike identifiers (where backslash escapes are processed),
118
+ # escape sequences inside strings (e.g., \n, \", \', \\) are NOT processed.
119
+ # The raw string content is returned as-is after removing outer quotes.
120
+ # This is a known limitation for attribute values, as strings are treated
121
+ # as raw text for simplicity. Identifiers process escapes to support patterns
122
+ # like .hover\:bg-blue-500, but strings in attributes don't require this.
123
+ def scan_string
124
+ if @scanner.scan(STRING_DOUBLE_REGEX)
125
+ str = @scanner.matched
126
+ update_position(str)
127
+ str[1..-2] # Remove quotes
128
+ elsif @scanner.scan(STRING_SINGLE_REGEX)
129
+ str = @scanner.matched
130
+ update_position(str)
131
+ str[1..-2] # Remove quotes
132
+ end
133
+ end
134
+
135
+ def scan_identifier
136
+ # Match identifiers with optional escape sequences
137
+ # CSS allows \<any-char> as escape in identifiers (e.g., .hover\:bg-blue-500)
138
+ #
139
+ # NOTE: This also accepts CSS custom properties starting with -- (e.g., --my-variable).
140
+ # While custom properties are technically only valid in property contexts (not selectors),
141
+ # this parser accepts them as a superset of valid CSS for flexibility. In practice,
142
+ # selectors like .--invalid-class would parse but aren't valid CSS selectors.
143
+ return unless @scanner.scan(IDENTIFIER_REGEX)
144
+
145
+ ident = @scanner.matched
146
+ update_position(ident)
147
+ # Remove backslashes from escaped characters
148
+ ident.gsub(ESCAPE_REGEX, '\1')
149
+ end
150
+
151
+ def scan_number
152
+ return unless @scanner.scan(NUMBER_REGEX)
153
+
154
+ num = @scanner.matched
155
+ update_position(num)
156
+ num
157
+ end
158
+
159
+ def update_position(text)
160
+ text.each_char do |char|
161
+ if char == "\n"
162
+ @line += 1
163
+ @column = 1
164
+ else
165
+ @column += 1
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,350 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parselly
4
+ # Represents a node in the Abstract Syntax Tree (AST) for CSS selectors.
5
+ #
6
+ # Each Node represents a parsed CSS selector component (e.g., type selector,
7
+ # class selector, combinator, or selector list) with its type, optional value,
8
+ # child nodes, parent reference, and source position.
9
+ #
10
+ # @example Creating a simple AST node
11
+ # node = Parselly::Node.new(:type_selector, 'div', { line: 1, column: 1 })
12
+ # node.add_child(Parselly::Node.new(:class_selector, 'container'))
13
+ #
14
+ # @example Traversing the AST
15
+ # node.ancestors # Returns array of ancestor nodes
16
+ # node.descendants # Returns array of all descendant nodes
17
+ # node.siblings # Returns array of sibling nodes
18
+ class Node
19
+ attr_accessor :type, :value, :children, :parent, :position
20
+
21
+ # Creates a new AST node.
22
+ #
23
+ # @param type [Symbol] the type of the node (e.g., :type_selector, :class_selector)
24
+ # @param value [String, nil] optional value associated with the node
25
+ # @param position [Hash] source position with :line and :column keys
26
+ def initialize(type, value = nil, position = {})
27
+ @type = type
28
+ @value = value
29
+ @children = []
30
+ @parent = nil
31
+ @position = position
32
+ @descendants_cache = nil
33
+ end
34
+
35
+ # Adds a child node to this node.
36
+ #
37
+ # @param node [Node, nil] the child node to add
38
+ # @return [Node, nil] the added node, or nil if the input was nil
39
+ def add_child(node)
40
+ return nil if node.nil?
41
+
42
+ node.parent = self
43
+ @children << node
44
+ invalidate_cache
45
+ node
46
+ end
47
+
48
+ # Replaces a child node at the specified index.
49
+ #
50
+ # @param index [Integer] the index of the child to replace
51
+ # @param new_node [Node] the new child node
52
+ # @return [Node, nil] the new node, or nil if invalid parameters
53
+ def replace_child(index, new_node)
54
+ return nil if new_node.nil?
55
+ return nil if index < 0 || index >= @children.size
56
+
57
+ old_node = @children[index]
58
+ old_node.parent = nil if old_node
59
+
60
+ @children[index] = new_node
61
+ new_node.parent = self
62
+ invalidate_cache
63
+ new_node
64
+ end
65
+
66
+ # Returns an array of all ancestor nodes from parent to root.
67
+ #
68
+ # @return [Array<Node>] array of ancestor nodes
69
+ def ancestors
70
+ result = []
71
+ node = parent
72
+ while node
73
+ result << node
74
+ node = node.parent
75
+ end
76
+ result
77
+ end
78
+
79
+ # Returns an array of all descendant nodes (children, grandchildren, etc.).
80
+ #
81
+ # @return [Array<Node>] array of all descendant nodes
82
+ def descendants
83
+ return @descendants_cache if @descendants_cache
84
+
85
+ @descendants_cache = []
86
+ queue = @children.dup
87
+ until queue.empty?
88
+ node = queue.shift
89
+ @descendants_cache << node
90
+ queue.concat(node.children) unless node.children.empty?
91
+ end
92
+ @descendants_cache
93
+ end
94
+
95
+ # Returns an array of sibling nodes (excluding self).
96
+ #
97
+ # @return [Array<Node>] array of sibling nodes, or empty array if no parent
98
+ def siblings
99
+ return [] unless parent
100
+
101
+ parent.children.reject { |child| child == self }
102
+ end
103
+
104
+ # Returns a tree representation of this node and its descendants.
105
+ #
106
+ # @param indent [Integer] indentation level for the tree display
107
+ # @return [String] formatted tree string
108
+ def to_tree(indent = 0)
109
+ lines = []
110
+ prefix = ' ' * indent
111
+ pos_info = position.empty? ? '' : " [#{position[:line]}:#{position[:column]}]"
112
+
113
+ lines << "#{prefix}#{type}#{"(#{value.inspect})" if value}#{pos_info}"
114
+
115
+ children.each do |child|
116
+ lines << child.to_tree(indent + 1)
117
+ end
118
+
119
+ lines.join("\n")
120
+ end
121
+
122
+ def inspect
123
+ "#<#{self.class.name} type=#{type} value=#{value.inspect} children=#{children.size}>"
124
+ end
125
+
126
+ # Converts the AST node back to a CSS selector string.
127
+ #
128
+ # @return [String] the CSS selector string representation of this node
129
+ def to_selector
130
+ case type
131
+ when :selector_list
132
+ children.map(&:to_selector).join(', ')
133
+ when :selector
134
+ children.map(&:to_selector).join
135
+ when :simple_selector_sequence
136
+ children.map(&:to_selector).join
137
+ when :type_selector
138
+ value
139
+ when :universal_selector
140
+ value
141
+ when :id_selector
142
+ "##{value}"
143
+ when :class_selector
144
+ ".#{value}"
145
+ when :attribute_selector
146
+ build_attribute_selector
147
+ when :pseudo_class
148
+ ":#{value}"
149
+ when :pseudo_element
150
+ "::#{value}"
151
+ when :pseudo_function
152
+ ":#{value}(#{children.map(&:to_selector).join})"
153
+ when :child_combinator
154
+ ' > '
155
+ when :adjacent_combinator
156
+ ' + '
157
+ when :sibling_combinator
158
+ ' ~ '
159
+ when :descendant_combinator
160
+ ' '
161
+ when :an_plus_b, :argument
162
+ value
163
+ when :attribute, :value
164
+ value
165
+ when :equal_operator, :includes_operator, :dashmatch_operator,
166
+ :prefixmatch_operator, :suffixmatch_operator, :substringmatch_operator
167
+ value
168
+ else
169
+ children.map(&:to_selector).join
170
+ end
171
+ end
172
+
173
+ # Checks if this node or any descendant contains an ID selector.
174
+ #
175
+ # @return [Boolean] true if an ID selector is present
176
+ def id?
177
+ return true if type == :id_selector
178
+ descendants.any? { |node| node.type == :id_selector }
179
+ end
180
+
181
+ # Extracts the ID value from this node or its descendants.
182
+ #
183
+ # @return [String, nil] the ID value without the '#' prefix, or nil if no ID selector is found
184
+ def id
185
+ return value if type == :id_selector
186
+
187
+ descendants.each do |node|
188
+ return node.value if node.type == :id_selector
189
+ end
190
+ nil
191
+ end
192
+
193
+ # Extracts all class names from this node and its descendants.
194
+ #
195
+ # @return [Array<String>] array of class names without the '.' prefix
196
+ def classes
197
+ result = []
198
+ result << value if type == :class_selector
199
+ descendants.each do |node|
200
+ result << node.value if node.type == :class_selector
201
+ end
202
+ result
203
+ end
204
+
205
+ # Checks if this node or any descendant contains an attribute selector.
206
+ #
207
+ # @return [Boolean] true if an attribute selector is present
208
+ def attribute?
209
+ return true if type == :attribute_selector
210
+ descendants.any? { |node| node.type == :attribute_selector }
211
+ end
212
+
213
+ # Extracts all attribute selectors from this node and its descendants.
214
+ #
215
+ # @return [Array<Hash>] array of attribute information hashes
216
+ # Each hash contains :name, :operator (optional), and :value (optional) keys
217
+ def attributes
218
+ result = []
219
+
220
+ if type == :attribute_selector
221
+ result << extract_attribute_info(self)
222
+ end
223
+
224
+ descendants.each do |node|
225
+ if node.type == :attribute_selector
226
+ result << extract_attribute_info(node)
227
+ end
228
+ end
229
+
230
+ result
231
+ end
232
+
233
+ # Extracts all pseudo-classes and pseudo-elements from this node and its descendants.
234
+ #
235
+ # @return [Array<String>] array of pseudo-class and pseudo-element names
236
+ def pseudo_classes
237
+ result = []
238
+
239
+ if [:pseudo_class, :pseudo_element, :pseudo_function].include?(type)
240
+ result << value
241
+ end
242
+
243
+ descendants.each do |node|
244
+ if [:pseudo_class, :pseudo_element, :pseudo_function].include?(node.type)
245
+ result << node.value
246
+ end
247
+ end
248
+
249
+ result
250
+ end
251
+
252
+ # Checks if this selector is a compound selector, as defined by CSS.
253
+ # A compound selector combines multiple simple selectors (type, class, id,
254
+ # attribute, pseudo-class) without combinators (e.g., `div.class#id[attr]:hover`).
255
+ # Returns true if more than one simple selector type is present.
256
+ #
257
+ # @return [Boolean] true if this node represents a compound selector
258
+ def compound_selector?
259
+ types = []
260
+
261
+ types << :id if id?
262
+ types << :class unless classes.empty?
263
+ types << :attribute if attribute?
264
+ types << :pseudo unless pseudo_classes.empty?
265
+ types << :type if type_selector?
266
+
267
+ types.size > 1
268
+ end
269
+
270
+ # Checks if this node or any descendant contains a type selector.
271
+ #
272
+ # @return [Boolean] true if a type selector is present
273
+ def type_selector?
274
+ return true if type == :type_selector
275
+ descendants.any? { |node| node.type == :type_selector }
276
+ end
277
+
278
+ private
279
+
280
+ # Invalidates the descendants cache for this node and all ancestors.
281
+ # This ensures that cached descendants are cleared when the tree structure changes.
282
+ def invalidate_cache
283
+ node = self
284
+ while node
285
+ node.instance_variable_set(:@descendants_cache, nil)
286
+ node = node.parent
287
+ end
288
+ end
289
+
290
+ # Helper method to extract attribute information from an attribute_selector node.
291
+ #
292
+ # @param node [Node] an attribute_selector node
293
+ # @return [Hash] attribute information hash
294
+ def extract_attribute_info(node)
295
+ info = {}
296
+
297
+ # Simple attribute selector like [disabled]
298
+ if node.value
299
+ info[:name] = node.value
300
+ return info
301
+ end
302
+
303
+ # Attribute selector with operator and value like [type="text"]
304
+ node.children.each do |child|
305
+ case child.type
306
+ when :attribute
307
+ info[:name] = child.value
308
+ when :equal_operator, :includes_operator, :dashmatch_operator,
309
+ :prefixmatch_operator, :suffixmatch_operator, :substringmatch_operator
310
+ info[:operator] = child.value
311
+ when :value
312
+ info[:value] = child.value
313
+ end
314
+ end
315
+
316
+ info
317
+ end
318
+
319
+ # Helper method to build an attribute selector string.
320
+ #
321
+ # @return [String] the attribute selector string
322
+ def build_attribute_selector
323
+ # Simple attribute selector like [disabled]
324
+ return "[#{value}]" if value
325
+
326
+ # Attribute selector with operator and value like [type="text"]
327
+ attr_name = nil
328
+ operator = nil
329
+ attr_value = nil
330
+
331
+ children.each do |child|
332
+ case child.type
333
+ when :attribute
334
+ attr_name = child.value
335
+ when :equal_operator, :includes_operator, :dashmatch_operator,
336
+ :prefixmatch_operator, :suffixmatch_operator, :substringmatch_operator
337
+ operator = child.value
338
+ when :value
339
+ attr_value = child.value
340
+ end
341
+ end
342
+
343
+ if operator && attr_value
344
+ "[#{attr_name}#{operator}\"#{attr_value}\"]"
345
+ else
346
+ "[#{attr_name}]"
347
+ end
348
+ end
349
+ end
350
+ end