parselly 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parselly
4
+ VERSION = '1.0.0'
5
+ end
data/lib/parselly.rb CHANGED
@@ -2,18 +2,19 @@
2
2
 
3
3
  require 'strscan'
4
4
 
5
- class Parselly
6
- VERSION = "0.1.0"
5
+ require_relative 'parselly/lexer'
6
+ require_relative 'parselly/node'
7
+ require_relative 'parselly/parser'
8
+ require_relative 'parselly/version'
7
9
 
8
- def self.sanitize(selector)
10
+ module Parselly
11
+ def sanitize(selector)
9
12
  scanner = StringScanner.new(selector)
10
13
  result = +''
11
14
 
12
15
  # Special case: if the selector is of length 1 and
13
16
  # the first character is `-`
14
- if selector.length == 1 && scanner.peek(1) == '-'
15
- return "\\#{selector}"
16
- end
17
+ return "\\#{selector}" if selector.length == 1 && scanner.peek(1) == '-'
17
18
 
18
19
  until scanner.eos?
19
20
  # NULL character (U+0000)
@@ -27,7 +28,7 @@ class Parselly
27
28
  result << escaped_hex(scanner.matched)
28
29
  # Second character is a digit and first is `-`
29
30
  elsif scanner.pos == 1 && scanner.scan(/\d/) &&
30
- scanner.pre_match == '-'
31
+ scanner.pre_match == '-'
31
32
  result << escaped_hex(scanner.matched)
32
33
  # Alphanumeric characters, `-`, `_`
33
34
  elsif scanner.scan(/[a-zA-Z0-9\-_]/)
@@ -41,8 +42,9 @@ class Parselly
41
42
  result
42
43
  end
43
44
 
44
- def self.escaped_hex(char)
45
+ def escaped_hex(char)
45
46
  "\\#{char.ord.to_s(16)} "
46
47
  end
47
- private_class_method :escaped_hex
48
+
49
+ module_function :sanitize, :escaped_hex
48
50
  end
data/parser.y ADDED
@@ -0,0 +1,372 @@
1
+ class Parselly::Parser
2
+ expect 0
3
+ error_on_expect_mismatch
4
+ token IDENT STRING NUMBER
5
+ HASH DOT STAR
6
+ LBRACKET RBRACKET
7
+ LPAREN RPAREN
8
+ COLON COMMA
9
+ CHILD ADJACENT SIBLING DESCENDANT
10
+ EQUAL INCLUDES DASHMATCH
11
+ PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH
12
+ MINUS
13
+
14
+ # Precedence rules to resolve shift/reduce conflicts in an_plus_b grammar
15
+ # These rules ensure that in patterns like "2n+1" or "n-3", the operators
16
+ # (+/-) are shifted rather than reducing early. This allows proper parsing
17
+ # of An+B notation used in :nth-child() and similar pseudo-classes.
18
+ # Lower precedence comes first
19
+ prechigh
20
+ left ADJACENT MINUS # In an_plus_b context, shift these operators
21
+ nonassoc IDENT # Prevent premature reduction when IDENT follows NUMBER
22
+ preclow
23
+ rule
24
+ selector_list
25
+ : complex_selector (COMMA complex_selector)*
26
+ {
27
+ result = Node.new(:selector_list, nil, @current_position)
28
+ result.add_child(val[0])
29
+ val[1].each { |pair| result.add_child(pair[1]) }
30
+ }
31
+ ;
32
+
33
+ complex_selector
34
+ : compound_selector (combinator compound_selector)*
35
+ {
36
+ if val[1].empty?
37
+ result = val[0]
38
+ else
39
+ result = val[0]
40
+ val[1].each do |pair|
41
+ node = Node.new(:selector, nil, result.position)
42
+ node.add_child(result)
43
+ node.add_child(pair[0])
44
+ node.add_child(pair[1])
45
+ result = node
46
+ end
47
+ end
48
+ }
49
+ ;
50
+
51
+ combinator
52
+ : CHILD
53
+ { result = Node.new(:child_combinator, '>', @current_position) }
54
+ | ADJACENT
55
+ { result = Node.new(:adjacent_combinator, '+', @current_position) }
56
+ | SIBLING
57
+ { result = Node.new(:sibling_combinator, '~', @current_position) }
58
+ | DESCENDANT
59
+ { result = Node.new(:descendant_combinator, ' ', @current_position) }
60
+ ;
61
+
62
+ compound_selector
63
+ : simple_selector_head simple_selector_tail
64
+ {
65
+ result = Node.new(:simple_selector_sequence, nil, val[0].position)
66
+ result.add_child(val[0])
67
+ val[1].each { |sel| result.add_child(sel) } unless val[1].empty?
68
+ }
69
+ ;
70
+
71
+ simple_selector_head
72
+ : type_selector
73
+ { result = val[0] }
74
+ | subclass_selector
75
+ { result = val[0] }
76
+ ;
77
+
78
+ simple_selector_tail
79
+ : subclass_selector*
80
+ { result = val[0] }
81
+ ;
82
+
83
+ type_selector
84
+ : IDENT
85
+ { result = Node.new(:type_selector, val[0], @current_position) }
86
+ | STAR
87
+ { result = Node.new(:universal_selector, '*', @current_position) }
88
+ ;
89
+
90
+ subclass_selector
91
+ : id_selector
92
+ { result = val[0] }
93
+ | class_selector
94
+ { result = val[0] }
95
+ | attribute_selector
96
+ { result = val[0] }
97
+ | pseudo_class_selector
98
+ { result = val[0] }
99
+ | pseudo_element_selector
100
+ { result = val[0] }
101
+ ;
102
+
103
+ id_selector
104
+ : HASH IDENT
105
+ { result = Node.new(:id_selector, val[1], @current_position) }
106
+ ;
107
+
108
+ class_selector
109
+ : DOT IDENT
110
+ { result = Node.new(:class_selector, val[1], @current_position) }
111
+ ;
112
+
113
+ attribute_selector
114
+ : LBRACKET IDENT RBRACKET
115
+ { result = Node.new(:attribute_selector, val[1], @current_position) }
116
+ | LBRACKET IDENT attr_matcher STRING RBRACKET
117
+ {
118
+ result = Node.new(:attribute_selector, nil, @current_position)
119
+ result.add_child(Node.new(:attribute, val[1], @current_position))
120
+ result.add_child(val[2])
121
+ result.add_child(Node.new(:value, val[3], @current_position))
122
+ }
123
+ | LBRACKET IDENT attr_matcher IDENT RBRACKET
124
+ {
125
+ result = Node.new(:attribute_selector, nil, @current_position)
126
+ result.add_child(Node.new(:attribute, val[1], @current_position))
127
+ result.add_child(val[2])
128
+ result.add_child(Node.new(:value, val[3], @current_position))
129
+ }
130
+ ;
131
+
132
+ attr_matcher
133
+ : EQUAL
134
+ { result = Node.new(:equal_operator, '=', @current_position) }
135
+ | INCLUDES
136
+ { result = Node.new(:includes_operator, '~=', @current_position) }
137
+ | DASHMATCH
138
+ { result = Node.new(:dashmatch_operator, '|=', @current_position) }
139
+ | PREFIXMATCH
140
+ { result = Node.new(:prefixmatch_operator, '^=', @current_position) }
141
+ | SUFFIXMATCH
142
+ { result = Node.new(:suffixmatch_operator, '$=', @current_position) }
143
+ | SUBSTRINGMATCH
144
+ { result = Node.new(:substringmatch_operator, '*=', @current_position) }
145
+ ;
146
+
147
+ pseudo_class_selector
148
+ : COLON IDENT
149
+ { result = Node.new(:pseudo_class, val[1], @current_position) }
150
+ | COLON IDENT LPAREN any_value RPAREN
151
+ {
152
+ fn = Node.new(:pseudo_function, val[1], @current_position)
153
+ fn.add_child(val[3])
154
+ result = fn
155
+ }
156
+ ;
157
+
158
+ pseudo_element_selector
159
+ : COLON COLON IDENT
160
+ { result = Node.new(:pseudo_element, val[2], @current_position) }
161
+ ;
162
+
163
+ any_value
164
+ : STRING
165
+ { result = Node.new(:argument, val[0], @current_position) }
166
+ | an_plus_b
167
+ { result = val[0] }
168
+ | relative_selector_list
169
+ { result = val[0] }
170
+ ;
171
+
172
+ an_plus_b
173
+ # Positive coefficient cases
174
+ : NUMBER IDENT ADJACENT NUMBER
175
+ {
176
+ # Handle 'An+B' like '2n+1'
177
+ result = Node.new(:an_plus_b, "#{val[0]}#{val[1]}+#{val[3]}", @current_position)
178
+ }
179
+ | NUMBER IDENT MINUS NUMBER
180
+ {
181
+ # Handle 'An-B' like '2n-1'
182
+ result = Node.new(:an_plus_b, "#{val[0]}#{val[1]}-#{val[3]}", @current_position)
183
+ }
184
+ | NUMBER IDENT
185
+ {
186
+ # Handle 'An' like '2n' or composite like '2n-1' (when '-1' is part of IDENT)
187
+ result = Node.new(:an_plus_b, "#{val[0]}#{val[1]}", @current_position)
188
+ }
189
+ | IDENT ADJACENT NUMBER
190
+ {
191
+ # Handle 'n+B' like 'n+5' or keywords followed by offset (rare but valid)
192
+ result = Node.new(:an_plus_b, "#{val[0]}+#{val[2]}", @current_position)
193
+ }
194
+ | IDENT MINUS NUMBER
195
+ {
196
+ # Handle 'n-B' like 'n-3'
197
+ result = Node.new(:an_plus_b, "#{val[0]}-#{val[2]}", @current_position)
198
+ }
199
+ # Negative coefficient cases
200
+ | MINUS NUMBER IDENT ADJACENT NUMBER
201
+ {
202
+ # Handle '-An+B' like '-2n+1'
203
+ result = Node.new(:an_plus_b, "-#{val[1]}#{val[2]}+#{val[4]}", @current_position)
204
+ }
205
+ | MINUS NUMBER IDENT MINUS NUMBER
206
+ {
207
+ # Handle '-An-B' like '-2n-1'
208
+ result = Node.new(:an_plus_b, "-#{val[1]}#{val[2]}-#{val[4]}", @current_position)
209
+ }
210
+ | MINUS NUMBER IDENT
211
+ {
212
+ # Handle '-An' like '-2n' or composite like '-2n+1' (when '+1' is part of IDENT)
213
+ result = Node.new(:an_plus_b, "-#{val[1]}#{val[2]}", @current_position)
214
+ }
215
+ | MINUS IDENT ADJACENT NUMBER
216
+ {
217
+ # Handle '-n+B' like '-n+3'
218
+ result = Node.new(:an_plus_b, "-#{val[1]}+#{val[3]}", @current_position)
219
+ }
220
+ | MINUS IDENT MINUS NUMBER
221
+ {
222
+ # Handle '-n-B' like '-n-2'
223
+ result = Node.new(:an_plus_b, "-#{val[1]}-#{val[3]}", @current_position)
224
+ }
225
+ | MINUS IDENT
226
+ {
227
+ # Handle '-n' or composite like '-n+3' (when '+3' is part of IDENT)
228
+ result = Node.new(:an_plus_b, "-#{val[1]}", @current_position)
229
+ }
230
+ # Simple cases
231
+ | NUMBER
232
+ {
233
+ # Handle just a number like '3'
234
+ result = Node.new(:an_plus_b, val[0].to_s, @current_position)
235
+ }
236
+ ;
237
+
238
+ relative_selector_list
239
+ : relative_selector (COMMA relative_selector)*
240
+ {
241
+ result = Node.new(:selector_list, nil, @current_position)
242
+ result.add_child(val[0])
243
+ val[1].each { |pair| result.add_child(pair[1]) }
244
+ }
245
+ ;
246
+
247
+ relative_selector
248
+ : complex_selector
249
+ { result = val[0] }
250
+ | combinator complex_selector
251
+ {
252
+ result = Node.new(:selector, nil, val[0].position)
253
+ result.add_child(val[0])
254
+ result.add_child(val[1])
255
+ }
256
+ ;
257
+
258
+ end
259
+
260
+ ---- header
261
+
262
+ ---- inner
263
+ def parse(input)
264
+ @lexer = Parselly::Lexer.new(input)
265
+ @tokens = @lexer.tokenize
266
+ preprocess_tokens!
267
+ @index = 0
268
+ @current_position = { line: 1, column: 1 }
269
+ ast = do_parse
270
+ normalize_an_plus_b(ast)
271
+ ast
272
+ end
273
+
274
+ def preprocess_tokens!
275
+ new_tokens = []
276
+ i = 0
277
+ while i < @tokens.size
278
+ token = @tokens[i]
279
+ next_token = @tokens[i + 1]
280
+ new_tokens << token
281
+ if next_token && needs_descendant?(token, next_token)
282
+ pos = { line: token[2][:line], column: token[2][:column] }
283
+ new_tokens << [:DESCENDANT, ' ', pos]
284
+ end
285
+ i += 1
286
+ end
287
+
288
+ @tokens = new_tokens
289
+ end
290
+
291
+ # Insert DESCENDANT combinator if:
292
+ # - Current token can end a compound selector
293
+ # - Next token can start a compound selector
294
+ # - EXCEPT when current is type_selector and next is subclass_selector
295
+ # (they belong to the same compound selector)
296
+ def needs_descendant?(current, next_tok)
297
+ current_type = current[0]
298
+ next_type = next_tok[0]
299
+
300
+ can_end = can_end_compound?(current_type)
301
+ can_start = can_start_compound?(next_type)
302
+
303
+ # Type selector followed by subclass selector = same compound
304
+ if [:IDENT, :STAR].include?(current_type) &&
305
+ [:DOT, :HASH, :LBRACKET, :COLON].include?(next_type)
306
+ return false
307
+ end
308
+
309
+ can_end && can_start
310
+ end
311
+
312
+ def can_end_compound?(token_type)
313
+ [:IDENT, :STAR, :RPAREN, :RBRACKET].include?(token_type)
314
+ end
315
+
316
+ def can_start_compound?(token_type)
317
+ # Type selectors and subclass selectors can start a compound selector
318
+ [:IDENT, :STAR, :DOT, :HASH, :LBRACKET, :COLON].include?(token_type)
319
+ end
320
+
321
+ def normalize_an_plus_b(node)
322
+ return unless node.respond_to?(:children) && node.children
323
+
324
+ if node.type == :pseudo_function && nth_pseudo?(node.value)
325
+ child = node.children.first
326
+ if child && child.type == :selector_list
327
+ an_plus_b_value = extract_an_plus_b_value(child)
328
+ if an_plus_b_value
329
+ node.children[0] = Node.new(:an_plus_b, an_plus_b_value, child.position)
330
+ end
331
+ end
332
+ end
333
+ node.children.compact.each { |child| normalize_an_plus_b(child) }
334
+ end
335
+
336
+ def nth_pseudo?(name)
337
+ %w[nth-child nth-last-child nth-of-type nth-last-of-type nth-col nth-last-col].include?(name)
338
+ end
339
+
340
+ def extract_an_plus_b_value(selector_list_node)
341
+ return nil unless selector_list_node.children.size == 1
342
+
343
+ seq = selector_list_node.children.first
344
+ return nil unless seq.type == :simple_selector_sequence
345
+ return nil unless seq.children.size == 1
346
+
347
+ type_sel = seq.children.first
348
+ return nil unless type_sel.type == :type_selector
349
+
350
+ value = type_sel.value
351
+ if value =~ /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)$/
352
+ value
353
+ else
354
+ nil
355
+ end
356
+ end
357
+
358
+ def next_token
359
+ return [false, nil] if @index >= @tokens.size
360
+
361
+ token_type, token_value, token_position = @tokens[@index]
362
+ @index += 1
363
+ @current_position = token_position
364
+
365
+ [token_type, token_value]
366
+ end
367
+
368
+ def on_error(token_id, val, vstack)
369
+ token_name = token_to_str(token_id) || '?'
370
+ pos = @current_position || { line: '?', column: '?' }
371
+ raise "Parse error: unexpected #{token_name} '#{val}' at #{pos[:line]}:#{pos[:column]}"
372
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parselly
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yudai Takada
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-01-24 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: Parselly is a pure Ruby CSS selector parser. Provides a simple and easy-to-use
13
13
  API for parsing CSS selectors.
@@ -17,10 +17,16 @@ executables: []
17
17
  extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
+ - ".rspec"
20
21
  - MIT
21
22
  - README.md
22
23
  - Rakefile
23
24
  - lib/parselly.rb
25
+ - lib/parselly/lexer.rb
26
+ - lib/parselly/node.rb
27
+ - lib/parselly/parser.rb
28
+ - lib/parselly/version.rb
29
+ - parser.y
24
30
  homepage: https://github.com/ydah/parselly
25
31
  licenses:
26
32
  - MIT
@@ -38,14 +44,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
38
44
  requirements:
39
45
  - - ">="
40
46
  - !ruby/object:Gem::Version
41
- version: '2.5'
47
+ version: '2.7'
42
48
  required_rubygems_version: !ruby/object:Gem::Requirement
43
49
  requirements:
44
50
  - - ">="
45
51
  - !ruby/object:Gem::Version
46
52
  version: '0'
47
53
  requirements: []
48
- rubygems_version: 3.7.0.dev
54
+ rubygems_version: 3.6.9
49
55
  specification_version: 4
50
56
  summary: Pure Ruby CSS selector parser.
51
57
  test_files: []