parselly 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4c687b7a170537d06d5a7fa1bbcc374c0826b73c167917cf42a44edd7eea9928
4
- data.tar.gz: 2dcce5b4d335f893ad3a30587e264c1c0dd6d0177d2f1a4790b23096b8f0a257
3
+ metadata.gz: 117ef0c09557018d7129fa29c61565b90432dfeeec3a6b7b8bc9df6f06dcd06a
4
+ data.tar.gz: edfa4d22bbc8ffe26e9b6118993b0af67cbb4578471f8b2c62a807dcac554e95
5
5
  SHA512:
6
- metadata.gz: 12982469258ee3f3ce04343948d6a605cbcdb91c87e1af25b3dd2233c1cb29512e89716dcf765c6680909096e78f23a40041ff5b25d753c058b2e34afac82fed
7
- data.tar.gz: 7cc9778bd541fc4a4c160d407ce3ee85399efadd7532220053b6e5700e9ae962e95657856cb1ba6c4a12cb955a931446b9dd98918e8c4d39455f0ce2b0b0af8c
6
+ metadata.gz: 72e2ffff39cc66e2fb68da3d8e69eba7e2435d380d6246f6deb2d2800e75ca64e459cd8e3a029a9f4c30f6548125fbcf7bbafd3ed2efd6e31766052de3aceebb
7
+ data.tar.gz: bf902d1d2cfcc2f88c5824b8448a8354dfffac62dcdd36e4c33bcff84f2e32f72b9120f674a581d22cab0bcb12e559691a3c507236f67786b94eb34430e255d6
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --require spec_helper
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Parselly
1
+ # Parselly [![Gem Version](https://badge.fury.io/rb/parselly.svg)](https://badge.fury.io/rb/parselly) [![CI](https://github.com/ydah/parselly/actions/workflows/test.yml/badge.svg)](https://github.com/ydah/parselly/actions/workflows/test.yml)
2
2
 
3
3
  Parselly is a module providing a simple way to parse and extract data from a css selector.
4
4
 
data/Rakefile CHANGED
@@ -1,11 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "bundler/gem_tasks"
4
- require "rake/testtask"
3
+ require 'bundler/gem_tasks'
5
4
 
6
- Rake::TestTask.new(:test) do |t|
7
- t.libs << "test/lib"
8
- t.test_files = FileList["test/**/test_*.rb"]
5
+ namespace 'build' do
6
+ desc 'build parser from parser.y'
7
+ task :parser do
8
+ sh 'bundle exec racc parser.y --embedded --frozen -o lib/parselly/parser.rb -t --log-file=parser.output'
9
+ end
9
10
  end
10
11
 
11
- task default: :test
12
+ require 'rspec/core/rake_task'
13
+ RSpec::Core::RakeTask.new(:spec) do |spec|
14
+ spec.pattern = FileList['spec/**/*_spec.rb']
15
+ end
16
+ task spec: 'build:parser'
17
+
18
+ task default: :spec
@@ -0,0 +1,148 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'strscan'
4
+
5
+ module Parselly
6
+ class Lexer
7
+ TOKENS = {
8
+ # Combinators
9
+ '>' => :CHILD,
10
+ '+' => :ADJACENT,
11
+ '~' => :SIBLING,
12
+
13
+ # Delimiters
14
+ '[' => :LBRACKET,
15
+ ']' => :RBRACKET,
16
+ '(' => :LPAREN,
17
+ ')' => :RPAREN,
18
+ ':' => :COLON,
19
+ ',' => :COMMA,
20
+ '.' => :DOT,
21
+ '#' => :HASH,
22
+ '*' => :STAR,
23
+ '=' => :EQUAL,
24
+ '-' => :MINUS,
25
+
26
+ # Attribute operators
27
+ '~=' => :INCLUDES,
28
+ '|=' => :DASHMATCH,
29
+ '^=' => :PREFIXMATCH,
30
+ '$=' => :SUFFIXMATCH,
31
+ '*=' => :SUBSTRINGMATCH
32
+ }.freeze
33
+
34
+ attr_reader :line, :column
35
+
36
+ def initialize(input)
37
+ @scanner = StringScanner.new(input)
38
+ @line = 1
39
+ @column = 1
40
+ @tokens = []
41
+ end
42
+
43
+ def tokenize
44
+ until @scanner.eos?
45
+ skip_whitespace
46
+ break if @scanner.eos?
47
+
48
+ pos = { line: @line, column: @column }
49
+
50
+ if (token = scan_string)
51
+ @tokens << [:STRING, token, pos]
52
+ elsif (token = scan_number)
53
+ @tokens << [:NUMBER, token, pos]
54
+ elsif (token = scan_operator)
55
+ @tokens << [token, @scanner.matched, pos]
56
+ elsif (token = scan_identifier)
57
+ @tokens << [:IDENT, token, pos]
58
+ else
59
+ char = @scanner.getch
60
+ raise "Unexpected character: #{char} at #{pos[:line]}:#{pos[:column]}"
61
+ end
62
+ end
63
+
64
+ @tokens << [false, nil, { line: @line, column: @column }]
65
+ @tokens
66
+ end
67
+
68
+ private
69
+
70
+ def skip_whitespace
71
+ while @scanner.scan(/[ \t\n\r]+/)
72
+ @scanner.matched.each_char do |char|
73
+ update_position(char)
74
+ end
75
+ end
76
+ end
77
+
78
+ def scan_operator
79
+ # Check multi-character operators first
80
+ ['~=', '|=', '^=', '$=', '*='].each do |op|
81
+ if @scanner.scan(/#{Regexp.escape(op)}/)
82
+ update_position(@scanner.matched)
83
+ return TOKENS[op]
84
+ end
85
+ end
86
+
87
+ # Single character operators
88
+ return unless @scanner.scan(/[>+~\[\]():,.#*=-]/)
89
+
90
+ char = @scanner.matched
91
+ update_position(char)
92
+ TOKENS[char]
93
+ end
94
+
95
+ # NOTE: Unlike identifiers (where backslash escapes are processed),
96
+ # escape sequences inside strings (e.g., \n, \", \', \\) are NOT processed.
97
+ # The raw string content is returned as-is after removing outer quotes.
98
+ # This is a known limitation for attribute values, as strings are treated
99
+ # as raw text for simplicity. Identifiers process escapes to support patterns
100
+ # like .hover\:bg-blue-500, but strings in attributes don't require this.
101
+ def scan_string
102
+ if @scanner.scan(/"([^"\\]|\\.)*"/)
103
+ str = @scanner.matched
104
+ update_position(str)
105
+ str[1..-2] # Remove quotes
106
+ elsif @scanner.scan(/'([^'\\]|\\.)*'/)
107
+ str = @scanner.matched
108
+ update_position(str)
109
+ str[1..-2] # Remove quotes
110
+ end
111
+ end
112
+
113
+ def scan_identifier
114
+ # Match identifiers with optional escape sequences
115
+ # CSS allows \<any-char> as escape in identifiers (e.g., .hover\:bg-blue-500)
116
+ #
117
+ # NOTE: This also accepts CSS custom properties starting with -- (e.g., --my-variable).
118
+ # While custom properties are technically only valid in property contexts (not selectors),
119
+ # this parser accepts them as a superset of valid CSS for flexibility. In practice,
120
+ # selectors like .--invalid-class would parse but aren't valid CSS selectors.
121
+ return unless @scanner.scan(/(?:--|-?[a-zA-Z_])(?:[\w-]|\\[^\n\r\f])*/)
122
+
123
+ ident = @scanner.matched
124
+ update_position(ident)
125
+ # Remove backslashes from escaped characters
126
+ ident.gsub(/\\(.)/, '\1')
127
+ end
128
+
129
+ def scan_number
130
+ return unless @scanner.scan(/\d+(\.\d+)?/)
131
+
132
+ num = @scanner.matched
133
+ update_position(num)
134
+ num
135
+ end
136
+
137
+ def update_position(text)
138
+ text.each_char do |char|
139
+ if char == "\n"
140
+ @line += 1
141
+ @column = 1
142
+ else
143
+ @column += 1
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,311 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parselly
4
+ # Represents a node in the Abstract Syntax Tree (AST) for CSS selectors.
5
+ #
6
+ # Each Node represents a parsed CSS selector component (e.g., type selector,
7
+ # class selector, combinator, or selector list) with its type, optional value,
8
+ # child nodes, parent reference, and source position.
9
+ #
10
+ # @example Creating a simple AST node
11
+ # node = Parselly::Node.new(:type_selector, 'div', { line: 1, column: 1 })
12
+ # node.add_child(Parselly::Node.new(:class_selector, 'container'))
13
+ #
14
+ # @example Traversing the AST
15
+ # node.ancestors # Returns array of ancestor nodes
16
+ # node.descendants # Returns array of all descendant nodes
17
+ # node.siblings # Returns array of sibling nodes
18
+ class Node
19
+ attr_accessor :type, :value, :children, :parent, :position
20
+
21
+ # Creates a new AST node.
22
+ #
23
+ # @param type [Symbol] the type of the node (e.g., :type_selector, :class_selector)
24
+ # @param value [String, nil] optional value associated with the node
25
+ # @param position [Hash] source position with :line and :column keys
26
+ def initialize(type, value = nil, position = {})
27
+ @type = type
28
+ @value = value
29
+ @children = []
30
+ @parent = nil
31
+ @position = position
32
+ end
33
+
34
+ # Adds a child node to this node.
35
+ #
36
+ # @param node [Node, nil] the child node to add
37
+ # @return [Node, nil] the added node, or nil if the input was nil
38
+ def add_child(node)
39
+ return nil if node.nil?
40
+
41
+ node.parent = self
42
+ @children << node
43
+ node
44
+ end
45
+
46
+ # Returns an array of all ancestor nodes from parent to root.
47
+ #
48
+ # @return [Array<Node>] array of ancestor nodes
49
+ def ancestors
50
+ result = []
51
+ node = parent
52
+ while node
53
+ result << node
54
+ node = node.parent
55
+ end
56
+ result
57
+ end
58
+
59
+ # Returns an array of all descendant nodes (children, grandchildren, etc.).
60
+ #
61
+ # @return [Array<Node>] array of all descendant nodes
62
+ def descendants
63
+ result = []
64
+ @children.each do |child|
65
+ result << child
66
+ result.concat(child.descendants)
67
+ end
68
+ result
69
+ end
70
+
71
+ # Returns an array of sibling nodes (excluding self).
72
+ #
73
+ # @return [Array<Node>] array of sibling nodes, or empty array if no parent
74
+ def siblings
75
+ return [] unless parent
76
+
77
+ parent.children.reject { |child| child == self }
78
+ end
79
+
80
+ # Returns a tree representation of this node and its descendants.
81
+ #
82
+ # @param indent [Integer] indentation level for the tree display
83
+ # @return [String] formatted tree string
84
+ def to_tree(indent = 0)
85
+ lines = []
86
+ prefix = ' ' * indent
87
+ pos_info = position.empty? ? '' : " [#{position[:line]}:#{position[:column]}]"
88
+
89
+ lines << "#{prefix}#{type}#{"(#{value.inspect})" if value}#{pos_info}"
90
+
91
+ children.each do |child|
92
+ lines << child.to_tree(indent + 1)
93
+ end
94
+
95
+ lines.join("\n")
96
+ end
97
+
98
+ def inspect
99
+ "#<#{self.class.name} type=#{type} value=#{value.inspect} children=#{children.size}>"
100
+ end
101
+
102
+ # Converts the AST node back to a CSS selector string.
103
+ #
104
+ # @return [String] the CSS selector string representation of this node
105
+ def to_selector
106
+ case type
107
+ when :selector_list
108
+ children.map(&:to_selector).join(', ')
109
+ when :selector
110
+ children.map(&:to_selector).join
111
+ when :simple_selector_sequence
112
+ children.map(&:to_selector).join
113
+ when :type_selector
114
+ value
115
+ when :universal_selector
116
+ value
117
+ when :id_selector
118
+ "##{value}"
119
+ when :class_selector
120
+ ".#{value}"
121
+ when :attribute_selector
122
+ build_attribute_selector
123
+ when :pseudo_class
124
+ ":#{value}"
125
+ when :pseudo_element
126
+ "::#{value}"
127
+ when :pseudo_function
128
+ ":#{value}(#{children.map(&:to_selector).join})"
129
+ when :child_combinator
130
+ ' > '
131
+ when :adjacent_combinator
132
+ ' + '
133
+ when :sibling_combinator
134
+ ' ~ '
135
+ when :descendant_combinator
136
+ ' '
137
+ when :an_plus_b, :argument
138
+ value
139
+ when :attribute, :value
140
+ value
141
+ when :equal_operator, :includes_operator, :dashmatch_operator,
142
+ :prefixmatch_operator, :suffixmatch_operator, :substringmatch_operator
143
+ value
144
+ else
145
+ children.map(&:to_selector).join
146
+ end
147
+ end
148
+
149
+ # Checks if this node or any descendant contains an ID selector.
150
+ #
151
+ # @return [Boolean] true if an ID selector is present
152
+ def id?
153
+ type == :id_selector || descendants.any? { |node| node.type == :id_selector }
154
+ end
155
+
156
+ # Extracts the ID value from this node or its descendants.
157
+ #
158
+ # @return [String, nil] the ID value without the '#' prefix, or nil if no ID selector is found
159
+ def id
160
+ return value if type == :id_selector
161
+
162
+ id_node = descendants.find { |node| node.type == :id_selector }
163
+ id_node&.value
164
+ end
165
+
166
+ # Extracts all class names from this node and its descendants.
167
+ #
168
+ # @return [Array<String>] array of class names without the '.' prefix
169
+ def classes
170
+ result = []
171
+ result << value if type == :class_selector
172
+ descendants.each do |node|
173
+ result << node.value if node.type == :class_selector
174
+ end
175
+ result
176
+ end
177
+
178
+ # Checks if this node or any descendant contains an attribute selector.
179
+ #
180
+ # @return [Boolean] true if an attribute selector is present
181
+ def attribute?
182
+ type == :attribute_selector || descendants.any? { |node| node.type == :attribute_selector }
183
+ end
184
+
185
+ # Extracts all attribute selectors from this node and its descendants.
186
+ #
187
+ # @return [Array<Hash>] array of attribute information hashes
188
+ # Each hash contains :name, :operator (optional), and :value (optional) keys
189
+ def attributes
190
+ result = []
191
+
192
+ if type == :attribute_selector
193
+ result << extract_attribute_info(self)
194
+ end
195
+
196
+ descendants.each do |node|
197
+ if node.type == :attribute_selector
198
+ result << extract_attribute_info(node)
199
+ end
200
+ end
201
+
202
+ result
203
+ end
204
+
205
+ # Extracts all pseudo-classes and pseudo-elements from this node and its descendants.
206
+ #
207
+ # @return [Array<String>] array of pseudo-class and pseudo-element names
208
+ def pseudo_classes
209
+ result = []
210
+
211
+ if [:pseudo_class, :pseudo_element, :pseudo_function].include?(type)
212
+ result << value
213
+ end
214
+
215
+ descendants.each do |node|
216
+ if [:pseudo_class, :pseudo_element, :pseudo_function].include?(node.type)
217
+ result << node.value
218
+ end
219
+ end
220
+
221
+ result
222
+ end
223
+
224
+ # Checks if this selector is a compound selector, as defined by CSS.
225
+ # A compound selector combines multiple simple selectors (type, class, id,
226
+ # attribute, pseudo-class) without combinators (e.g., `div.class#id[attr]:hover`).
227
+ # Returns true if more than one simple selector type is present.
228
+ #
229
+ # @return [Boolean] true if this node represents a compound selector
230
+ def compound_selector?
231
+ types = []
232
+
233
+ types << :id if id?
234
+ types << :class unless classes.empty?
235
+ types << :attribute if attribute?
236
+ types << :pseudo unless pseudo_classes.empty?
237
+ types << :type if type_selector?
238
+
239
+ types.size > 1
240
+ end
241
+
242
+ # Checks if this node or any descendant contains a type selector.
243
+ #
244
+ # @return [Boolean] true if a type selector is present
245
+ def type_selector?
246
+ type == :type_selector || descendants.any? { |node| node.type == :type_selector }
247
+ end
248
+
249
+ private
250
+
251
+ # Helper method to extract attribute information from an attribute_selector node.
252
+ #
253
+ # @param node [Node] an attribute_selector node
254
+ # @return [Hash] attribute information hash
255
+ def extract_attribute_info(node)
256
+ info = {}
257
+
258
+ # Simple attribute selector like [disabled]
259
+ if node.value
260
+ info[:name] = node.value
261
+ return info
262
+ end
263
+
264
+ # Attribute selector with operator and value like [type="text"]
265
+ node.children.each do |child|
266
+ case child.type
267
+ when :attribute
268
+ info[:name] = child.value
269
+ when :equal_operator, :includes_operator, :dashmatch_operator,
270
+ :prefixmatch_operator, :suffixmatch_operator, :substringmatch_operator
271
+ info[:operator] = child.value
272
+ when :value
273
+ info[:value] = child.value
274
+ end
275
+ end
276
+
277
+ info
278
+ end
279
+
280
+ # Helper method to build an attribute selector string.
281
+ #
282
+ # @return [String] the attribute selector string
283
+ def build_attribute_selector
284
+ # Simple attribute selector like [disabled]
285
+ return "[#{value}]" if value
286
+
287
+ # Attribute selector with operator and value like [type="text"]
288
+ attr_name = nil
289
+ operator = nil
290
+ attr_value = nil
291
+
292
+ children.each do |child|
293
+ case child.type
294
+ when :attribute
295
+ attr_name = child.value
296
+ when :equal_operator, :includes_operator, :dashmatch_operator,
297
+ :prefixmatch_operator, :suffixmatch_operator, :substringmatch_operator
298
+ operator = child.value
299
+ when :value
300
+ attr_value = child.value
301
+ end
302
+ end
303
+
304
+ if operator && attr_value
305
+ "[#{attr_name}#{operator}\"#{attr_value}\"]"
306
+ else
307
+ "[#{attr_name}]"
308
+ end
309
+ end
310
+ end
311
+ end