parselly 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +56 -9
- data/Rakefile +10 -0
- data/lib/parselly/lexer.rb +278 -68
- data/lib/parselly/node.rb +434 -205
- data/lib/parselly/parser.rb +799 -325
- data/lib/parselly/version.rb +1 -1
- data/lib/parselly.rb +57 -10
- data/parser.y +454 -101
- metadata +3 -3
data/parser.y
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
class Parselly::Parser
|
|
2
2
|
expect 0
|
|
3
3
|
error_on_expect_mismatch
|
|
4
|
-
token IDENT STRING NUMBER
|
|
4
|
+
token IDENT STRING BAD_STRING NUMBER OF
|
|
5
5
|
HASH DOT STAR
|
|
6
6
|
LBRACKET RBRACKET
|
|
7
7
|
LPAREN RPAREN
|
|
8
8
|
COLON COMMA
|
|
9
|
-
CHILD ADJACENT SIBLING DESCENDANT
|
|
9
|
+
CHILD ADJACENT SIBLING DESCENDANT COLUMN
|
|
10
10
|
EQUAL INCLUDES DASHMATCH
|
|
11
11
|
PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH
|
|
12
|
-
MINUS
|
|
12
|
+
MINUS PIPE
|
|
13
13
|
|
|
14
14
|
# Precedence rules to resolve shift/reduce conflicts in an_plus_b grammar
|
|
15
15
|
# These rules ensure that in patterns like "2n+1" or "n-3", the operators
|
|
@@ -24,7 +24,7 @@ rule
|
|
|
24
24
|
selector_list
|
|
25
25
|
: complex_selector (COMMA complex_selector)*
|
|
26
26
|
{
|
|
27
|
-
result = Node.new(:selector_list, nil,
|
|
27
|
+
result = Node.new(:selector_list, nil, val[0].position)
|
|
28
28
|
result.add_child(val[0])
|
|
29
29
|
val[1].each { |pair| result.add_child(pair[1]) }
|
|
30
30
|
}
|
|
@@ -50,13 +50,15 @@ rule
|
|
|
50
50
|
|
|
51
51
|
combinator
|
|
52
52
|
: CHILD
|
|
53
|
-
{ result = Node.new(:child_combinator, '>',
|
|
53
|
+
{ result = Node.new(:child_combinator, '>', token_position(val[0])) }
|
|
54
54
|
| ADJACENT
|
|
55
|
-
{ result = Node.new(:adjacent_combinator, '+',
|
|
55
|
+
{ result = Node.new(:adjacent_combinator, '+', token_position(val[0])) }
|
|
56
56
|
| SIBLING
|
|
57
|
-
{ result = Node.new(:sibling_combinator, '~',
|
|
57
|
+
{ result = Node.new(:sibling_combinator, '~', token_position(val[0])) }
|
|
58
58
|
| DESCENDANT
|
|
59
|
-
{ result = Node.new(:descendant_combinator, ' ',
|
|
59
|
+
{ result = Node.new(:descendant_combinator, ' ', token_position(val[0])) }
|
|
60
|
+
| COLUMN
|
|
61
|
+
{ result = Node.new(:column_combinator, '||', token_position(val[0])) }
|
|
60
62
|
;
|
|
61
63
|
|
|
62
64
|
compound_selector
|
|
@@ -82,9 +84,69 @@ rule
|
|
|
82
84
|
|
|
83
85
|
type_selector
|
|
84
86
|
: IDENT
|
|
85
|
-
{ result = Node.new(:type_selector,
|
|
87
|
+
{ result = Node.new(:type_selector, token_value(val[0]), token_position(val[0]), raw_value: token_raw(val[0])) }
|
|
86
88
|
| STAR
|
|
87
|
-
{ result = Node.new(:universal_selector, '*',
|
|
89
|
+
{ result = Node.new(:universal_selector, '*', token_position(val[0])) }
|
|
90
|
+
| IDENT PIPE IDENT
|
|
91
|
+
{
|
|
92
|
+
result = Node.new(
|
|
93
|
+
:type_selector,
|
|
94
|
+
token_value(val[2]),
|
|
95
|
+
token_position(val[0]),
|
|
96
|
+
raw_value: "#{token_raw(val[0])}|#{token_raw(val[2])}",
|
|
97
|
+
namespace: token_value(val[0])
|
|
98
|
+
)
|
|
99
|
+
}
|
|
100
|
+
| STAR PIPE IDENT
|
|
101
|
+
{
|
|
102
|
+
result = Node.new(
|
|
103
|
+
:type_selector,
|
|
104
|
+
token_value(val[2]),
|
|
105
|
+
token_position(val[0]),
|
|
106
|
+
raw_value: "*|#{token_raw(val[2])}",
|
|
107
|
+
namespace: '*'
|
|
108
|
+
)
|
|
109
|
+
}
|
|
110
|
+
| PIPE IDENT
|
|
111
|
+
{
|
|
112
|
+
result = Node.new(
|
|
113
|
+
:type_selector,
|
|
114
|
+
token_value(val[1]),
|
|
115
|
+
token_position(val[0]),
|
|
116
|
+
raw_value: "|#{token_raw(val[1])}",
|
|
117
|
+
namespace: ''
|
|
118
|
+
)
|
|
119
|
+
}
|
|
120
|
+
| IDENT PIPE STAR
|
|
121
|
+
{
|
|
122
|
+
result = Node.new(
|
|
123
|
+
:universal_selector,
|
|
124
|
+
'*',
|
|
125
|
+
token_position(val[0]),
|
|
126
|
+
raw_value: "#{token_raw(val[0])}|*",
|
|
127
|
+
namespace: token_value(val[0])
|
|
128
|
+
)
|
|
129
|
+
}
|
|
130
|
+
| STAR PIPE STAR
|
|
131
|
+
{
|
|
132
|
+
result = Node.new(
|
|
133
|
+
:universal_selector,
|
|
134
|
+
'*',
|
|
135
|
+
token_position(val[0]),
|
|
136
|
+
raw_value: '*|*',
|
|
137
|
+
namespace: '*'
|
|
138
|
+
)
|
|
139
|
+
}
|
|
140
|
+
| PIPE STAR
|
|
141
|
+
{
|
|
142
|
+
result = Node.new(
|
|
143
|
+
:universal_selector,
|
|
144
|
+
'*',
|
|
145
|
+
token_position(val[0]),
|
|
146
|
+
raw_value: '|*',
|
|
147
|
+
namespace: ''
|
|
148
|
+
)
|
|
149
|
+
}
|
|
88
150
|
;
|
|
89
151
|
|
|
90
152
|
subclass_selector
|
|
@@ -102,149 +164,232 @@ rule
|
|
|
102
164
|
|
|
103
165
|
id_selector
|
|
104
166
|
: HASH IDENT
|
|
105
|
-
{ result = Node.new(:id_selector,
|
|
167
|
+
{ result = Node.new(:id_selector, token_value(val[1]), token_position(val[0]), raw_value: token_raw(val[1])) }
|
|
106
168
|
;
|
|
107
169
|
|
|
108
170
|
class_selector
|
|
109
171
|
: DOT IDENT
|
|
110
|
-
{ result = Node.new(:class_selector,
|
|
172
|
+
{ result = Node.new(:class_selector, token_value(val[1]), token_position(val[0]), raw_value: token_raw(val[1])) }
|
|
111
173
|
;
|
|
112
174
|
|
|
113
175
|
attribute_selector
|
|
114
|
-
: LBRACKET
|
|
115
|
-
{ result = Node.new(:attribute_selector, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
|
|
116
|
-
| LBRACKET IDENT attr_matcher STRING RBRACKET
|
|
176
|
+
: LBRACKET attribute_name RBRACKET
|
|
117
177
|
{
|
|
118
|
-
result = Node.new(
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
178
|
+
result = Node.new(
|
|
179
|
+
:attribute_selector,
|
|
180
|
+
val[1].value,
|
|
181
|
+
token_position(val[0]),
|
|
182
|
+
raw_value: val[1].raw_value,
|
|
183
|
+
namespace: val[1].namespace
|
|
184
|
+
)
|
|
122
185
|
}
|
|
123
|
-
| LBRACKET
|
|
186
|
+
| LBRACKET attribute_name attr_matcher attribute_value attr_modifier RBRACKET
|
|
124
187
|
{
|
|
125
|
-
result = Node.new(:attribute_selector, nil,
|
|
126
|
-
result.add_child(
|
|
188
|
+
result = Node.new(:attribute_selector, nil, token_position(val[0]), modifier: val[4])
|
|
189
|
+
result.add_child(val[1])
|
|
127
190
|
result.add_child(val[2])
|
|
128
|
-
result.add_child(
|
|
191
|
+
result.add_child(val[3])
|
|
192
|
+
}
|
|
193
|
+
;
|
|
194
|
+
|
|
195
|
+
attribute_name
|
|
196
|
+
: IDENT
|
|
197
|
+
{
|
|
198
|
+
result = Node.new(:attribute, token_value(val[0]), token_position(val[0]), raw_value: token_raw(val[0]))
|
|
199
|
+
}
|
|
200
|
+
| IDENT PIPE IDENT
|
|
201
|
+
{
|
|
202
|
+
result = Node.new(
|
|
203
|
+
:attribute,
|
|
204
|
+
token_value(val[2]),
|
|
205
|
+
token_position(val[0]),
|
|
206
|
+
raw_value: "#{token_raw(val[0])}|#{token_raw(val[2])}",
|
|
207
|
+
namespace: token_value(val[0])
|
|
208
|
+
)
|
|
209
|
+
}
|
|
210
|
+
| STAR PIPE IDENT
|
|
211
|
+
{
|
|
212
|
+
result = Node.new(
|
|
213
|
+
:attribute,
|
|
214
|
+
token_value(val[2]),
|
|
215
|
+
token_position(val[0]),
|
|
216
|
+
raw_value: "*|#{token_raw(val[2])}",
|
|
217
|
+
namespace: '*'
|
|
218
|
+
)
|
|
219
|
+
}
|
|
220
|
+
| PIPE IDENT
|
|
221
|
+
{
|
|
222
|
+
result = Node.new(
|
|
223
|
+
:attribute,
|
|
224
|
+
token_value(val[1]),
|
|
225
|
+
token_position(val[0]),
|
|
226
|
+
raw_value: "|#{token_raw(val[1])}",
|
|
227
|
+
namespace: ''
|
|
228
|
+
)
|
|
129
229
|
}
|
|
130
230
|
;
|
|
131
231
|
|
|
132
232
|
attr_matcher
|
|
133
233
|
: EQUAL
|
|
134
|
-
{ result = Node.new(:equal_operator, '=',
|
|
234
|
+
{ result = Node.new(:equal_operator, '=', token_position(val[0])) }
|
|
135
235
|
| INCLUDES
|
|
136
|
-
{ result = Node.new(:includes_operator, '~=',
|
|
236
|
+
{ result = Node.new(:includes_operator, '~=', token_position(val[0])) }
|
|
137
237
|
| DASHMATCH
|
|
138
|
-
{ result = Node.new(:dashmatch_operator, '|=',
|
|
238
|
+
{ result = Node.new(:dashmatch_operator, '|=', token_position(val[0])) }
|
|
139
239
|
| PREFIXMATCH
|
|
140
|
-
{ result = Node.new(:prefixmatch_operator, '^=',
|
|
240
|
+
{ result = Node.new(:prefixmatch_operator, '^=', token_position(val[0])) }
|
|
141
241
|
| SUFFIXMATCH
|
|
142
|
-
{ result = Node.new(:suffixmatch_operator, '$=',
|
|
242
|
+
{ result = Node.new(:suffixmatch_operator, '$=', token_position(val[0])) }
|
|
143
243
|
| SUBSTRINGMATCH
|
|
144
|
-
{ result = Node.new(:substringmatch_operator, '*=',
|
|
244
|
+
{ result = Node.new(:substringmatch_operator, '*=', token_position(val[0])) }
|
|
245
|
+
;
|
|
246
|
+
|
|
247
|
+
attribute_value
|
|
248
|
+
: STRING
|
|
249
|
+
{ result = Node.new(:value, token_value(val[0]), token_position(val[0]), raw_value: token_raw(val[0]), quote: token_quote(val[0])) }
|
|
250
|
+
| IDENT
|
|
251
|
+
{ result = Node.new(:value, token_value(val[0]), token_position(val[0]), raw_value: token_raw(val[0])) }
|
|
252
|
+
| NUMBER
|
|
253
|
+
{ result = Node.new(:value, token_value(val[0]), token_position(val[0]), raw_value: token_raw(val[0])) }
|
|
254
|
+
;
|
|
255
|
+
|
|
256
|
+
attr_modifier
|
|
257
|
+
:
|
|
258
|
+
{ result = nil }
|
|
259
|
+
| IDENT
|
|
260
|
+
{ result = attribute_modifier_value(val[0]) }
|
|
145
261
|
;
|
|
146
262
|
|
|
147
263
|
pseudo_class_selector
|
|
148
264
|
: COLON IDENT
|
|
149
|
-
{ result = Node.new(:pseudo_class, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
|
|
150
|
-
| COLON IDENT LPAREN any_value RPAREN
|
|
151
265
|
{
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
result =
|
|
266
|
+
name = token_value(val[1])
|
|
267
|
+
node_type = LEGACY_PSEUDO_ELEMENT_NAMES.include?(pseudo_name(name)) ? :pseudo_element : :pseudo_class
|
|
268
|
+
result = Node.new(node_type, name, token_position(val[0]), raw_value: token_raw(val[1]), prefix: ':')
|
|
155
269
|
}
|
|
156
|
-
| IDENT LPAREN any_value RPAREN
|
|
270
|
+
| COLON IDENT LPAREN any_value RPAREN
|
|
157
271
|
{
|
|
158
|
-
fn = Node.new(:pseudo_function,
|
|
159
|
-
fn.add_child(val[
|
|
272
|
+
fn = Node.new(:pseudo_function, token_value(val[1]), token_position(val[0]), raw_value: token_raw(val[1]), prefix: ':')
|
|
273
|
+
fn.add_child(normalize_pseudo_argument(fn.value, val[3]))
|
|
160
274
|
result = fn
|
|
161
275
|
}
|
|
162
276
|
;
|
|
163
277
|
|
|
164
278
|
pseudo_element_selector
|
|
165
279
|
: COLON COLON IDENT
|
|
166
|
-
{ result = Node.new(:pseudo_element,
|
|
280
|
+
{ result = Node.new(:pseudo_element, token_value(val[2]), token_position(val[0]), raw_value: token_raw(val[2]), prefix: '::') }
|
|
281
|
+
| COLON COLON IDENT LPAREN any_value RPAREN
|
|
282
|
+
{
|
|
283
|
+
fn = Node.new(:pseudo_element_function, token_value(val[2]), token_position(val[0]), raw_value: token_raw(val[2]), prefix: '::')
|
|
284
|
+
fn.add_child(val[4])
|
|
285
|
+
result = fn
|
|
286
|
+
}
|
|
167
287
|
;
|
|
168
288
|
|
|
169
289
|
any_value
|
|
170
|
-
:
|
|
171
|
-
{ result =
|
|
290
|
+
: nth_of_value
|
|
291
|
+
{ result = val[0] }
|
|
292
|
+
| STRING
|
|
293
|
+
{ result = Node.new(:argument, token_value(val[0]), token_position(val[0]), raw_value: token_raw(val[0]), quote: token_quote(val[0])) }
|
|
172
294
|
| an_plus_b
|
|
173
295
|
{ result = val[0] }
|
|
174
296
|
| relative_selector_list
|
|
175
297
|
{ result = val[0] }
|
|
176
298
|
;
|
|
177
299
|
|
|
300
|
+
nth_of_value
|
|
301
|
+
: nth_of_an_plus_b OF relative_selector_list
|
|
302
|
+
{
|
|
303
|
+
result = Node.new(:nth_selector_argument, nil, val[0].position)
|
|
304
|
+
result.add_child(val[0])
|
|
305
|
+
result.add_child(val[2])
|
|
306
|
+
}
|
|
307
|
+
;
|
|
308
|
+
|
|
309
|
+
nth_of_an_plus_b
|
|
310
|
+
: an_plus_b
|
|
311
|
+
{ result = val[0] }
|
|
312
|
+
| IDENT
|
|
313
|
+
{
|
|
314
|
+
value = token_value(val[0])
|
|
315
|
+
unless value =~ AN_PLUS_B_REGEX
|
|
316
|
+
raise Parselly::SyntaxError, parse_error("Parse error: invalid An+B value '#{value}'", token_position(val[0]))
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
result = Node.new(:an_plus_b, value, token_position(val[0]), raw_value: token_raw(val[0]))
|
|
320
|
+
}
|
|
321
|
+
;
|
|
322
|
+
|
|
178
323
|
an_plus_b
|
|
179
324
|
# Positive coefficient cases
|
|
180
325
|
: NUMBER IDENT ADJACENT NUMBER
|
|
181
326
|
{
|
|
182
327
|
# Handle 'An+B' like '2n+1'
|
|
183
|
-
result = Node.new(:an_plus_b, "#{val[0]}#{val[1]}+#{val[3]}",
|
|
328
|
+
result = Node.new(:an_plus_b, "#{token_value(val[0])}#{token_value(val[1])}+#{token_value(val[3])}", token_position(val[0]))
|
|
184
329
|
}
|
|
185
330
|
| NUMBER IDENT MINUS NUMBER
|
|
186
331
|
{
|
|
187
332
|
# Handle 'An-B' like '2n-1'
|
|
188
|
-
result = Node.new(:an_plus_b, "#{val[0]}#{val[1]}-#{val[3]}",
|
|
333
|
+
result = Node.new(:an_plus_b, "#{token_value(val[0])}#{token_value(val[1])}-#{token_value(val[3])}", token_position(val[0]))
|
|
189
334
|
}
|
|
190
335
|
| NUMBER IDENT
|
|
191
336
|
{
|
|
192
337
|
# Handle 'An' like '2n' or composite like '2n-1' (when '-1' is part of IDENT)
|
|
193
|
-
result = Node.new(:an_plus_b, "#{val[0]}#{val[1]}",
|
|
338
|
+
result = Node.new(:an_plus_b, "#{token_value(val[0])}#{token_value(val[1])}", token_position(val[0]))
|
|
194
339
|
}
|
|
195
340
|
| IDENT ADJACENT NUMBER
|
|
196
341
|
{
|
|
197
342
|
# Handle 'n+B' like 'n+5' or keywords followed by offset (rare but valid)
|
|
198
|
-
result = Node.new(:an_plus_b, "#{val[0]}+#{val[2]}",
|
|
343
|
+
result = Node.new(:an_plus_b, "#{token_value(val[0])}+#{token_value(val[2])}", token_position(val[0]))
|
|
199
344
|
}
|
|
200
345
|
| IDENT MINUS NUMBER
|
|
201
346
|
{
|
|
202
347
|
# Handle 'n-B' like 'n-3'
|
|
203
|
-
result = Node.new(:an_plus_b, "#{val[0]}-#{val[2]}",
|
|
348
|
+
result = Node.new(:an_plus_b, "#{token_value(val[0])}-#{token_value(val[2])}", token_position(val[0]))
|
|
204
349
|
}
|
|
205
350
|
# Negative coefficient cases
|
|
206
351
|
| MINUS NUMBER IDENT ADJACENT NUMBER
|
|
207
352
|
{
|
|
208
353
|
# Handle '-An+B' like '-2n+1'
|
|
209
|
-
result = Node.new(:an_plus_b, "-#{val[1]}#{val[2]}+#{val[4]}",
|
|
354
|
+
result = Node.new(:an_plus_b, "-#{token_value(val[1])}#{token_value(val[2])}+#{token_value(val[4])}", token_position(val[0]))
|
|
210
355
|
}
|
|
211
356
|
| MINUS NUMBER IDENT MINUS NUMBER
|
|
212
357
|
{
|
|
213
358
|
# Handle '-An-B' like '-2n-1'
|
|
214
|
-
result = Node.new(:an_plus_b, "-#{val[1]}#{val[2]}-#{val[4]}",
|
|
359
|
+
result = Node.new(:an_plus_b, "-#{token_value(val[1])}#{token_value(val[2])}-#{token_value(val[4])}", token_position(val[0]))
|
|
215
360
|
}
|
|
216
361
|
| MINUS NUMBER IDENT
|
|
217
362
|
{
|
|
218
363
|
# Handle '-An' like '-2n' or composite like '-2n+1' (when '+1' is part of IDENT)
|
|
219
|
-
result = Node.new(:an_plus_b, "-#{val[1]}#{val[2]}",
|
|
364
|
+
result = Node.new(:an_plus_b, "-#{token_value(val[1])}#{token_value(val[2])}", token_position(val[0]))
|
|
220
365
|
}
|
|
221
366
|
| MINUS IDENT ADJACENT NUMBER
|
|
222
367
|
{
|
|
223
368
|
# Handle '-n+B' like '-n+3'
|
|
224
|
-
result = Node.new(:an_plus_b, "-#{val[1]}+#{val[3]}",
|
|
369
|
+
result = Node.new(:an_plus_b, "-#{token_value(val[1])}+#{token_value(val[3])}", token_position(val[0]))
|
|
225
370
|
}
|
|
226
371
|
| MINUS IDENT MINUS NUMBER
|
|
227
372
|
{
|
|
228
373
|
# Handle '-n-B' like '-n-2'
|
|
229
|
-
result = Node.new(:an_plus_b, "-#{val[1]}-#{val[3]}",
|
|
374
|
+
result = Node.new(:an_plus_b, "-#{token_value(val[1])}-#{token_value(val[3])}", token_position(val[0]))
|
|
230
375
|
}
|
|
231
376
|
| MINUS IDENT
|
|
232
377
|
{
|
|
233
378
|
# Handle '-n' or composite like '-n+3' (when '+3' is part of IDENT)
|
|
234
|
-
result = Node.new(:an_plus_b, "-#{val[1]}",
|
|
379
|
+
result = Node.new(:an_plus_b, "-#{token_value(val[1])}", token_position(val[0]))
|
|
235
380
|
}
|
|
236
381
|
# Simple cases
|
|
237
382
|
| NUMBER
|
|
238
383
|
{
|
|
239
384
|
# Handle just a number like '3'
|
|
240
|
-
result = Node.new(:an_plus_b, val[0].to_s,
|
|
385
|
+
result = Node.new(:an_plus_b, token_value(val[0]).to_s, token_position(val[0]))
|
|
241
386
|
}
|
|
242
387
|
;
|
|
243
388
|
|
|
244
389
|
relative_selector_list
|
|
245
390
|
: relative_selector (COMMA relative_selector)*
|
|
246
391
|
{
|
|
247
|
-
result = Node.new(:selector_list, nil,
|
|
392
|
+
result = Node.new(:selector_list, nil, val[0].position)
|
|
248
393
|
result.add_child(val[0])
|
|
249
394
|
val[1].each { |pair| result.add_child(pair[1]) }
|
|
250
395
|
}
|
|
@@ -267,49 +412,122 @@ end
|
|
|
267
412
|
require 'set'
|
|
268
413
|
|
|
269
414
|
# Pre-computed sets for faster lookup
|
|
270
|
-
CAN_END_COMPOUND = Set[:IDENT, :STAR, :RPAREN, :RBRACKET].freeze
|
|
415
|
+
CAN_END_COMPOUND = Set[:IDENT, :STAR, :RPAREN, :RBRACKET, :NUMBER].freeze
|
|
271
416
|
CAN_START_COMPOUND = Set[:IDENT, :STAR, :DOT, :HASH, :LBRACKET, :COLON].freeze
|
|
272
|
-
TYPE_SELECTOR_TYPES = Set[:IDENT, :STAR].freeze
|
|
273
|
-
SUBCLASS_SELECTOR_TYPES = Set[:DOT, :HASH, :LBRACKET, :COLON].freeze
|
|
274
|
-
SUBCLASS_SELECTOR_END_TYPES = Set[:IDENT, :RBRACKET, :RPAREN].freeze
|
|
275
417
|
NTH_PSEUDO_NAMES = Set['nth-child', 'nth-last-child', 'nth-of-type', 'nth-last-of-type', 'nth-col', 'nth-last-col'].freeze
|
|
276
|
-
AN_PLUS_B_REGEX = /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)
|
|
418
|
+
AN_PLUS_B_REGEX = /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)$/i.freeze
|
|
419
|
+
SELECTOR_LIST_PSEUDO_NAMES = Set['is', 'where', 'not'].freeze
|
|
420
|
+
RELATIVE_SELECTOR_LIST_PSEUDO_NAMES = Set['has'].freeze
|
|
421
|
+
LEGACY_PSEUDO_ELEMENT_NAMES = Set['before', 'after', 'first-line', 'first-letter'].freeze
|
|
422
|
+
ATTRIBUTE_MODIFIERS = Set['i', 's'].freeze
|
|
277
423
|
|
|
278
424
|
---- inner
|
|
279
|
-
def parse(input, tolerant: false)
|
|
425
|
+
def parse(input, tolerant: false, max_length: nil, max_tokens: nil, max_depth: nil, freeze: false)
|
|
280
426
|
@tolerant = tolerant
|
|
281
427
|
@errors = []
|
|
282
428
|
@error_index = nil
|
|
283
429
|
@suppress_errors = false
|
|
430
|
+
@max_depth = max_depth
|
|
431
|
+
@freeze_tree = freeze
|
|
432
|
+
|
|
433
|
+
unless input.is_a?(String)
|
|
434
|
+
error = parse_error('Input must be a String', { line: 1, column: 1, offset: 0 })
|
|
435
|
+
return Parselly::ParseResult.new(nil, [error]) if tolerant
|
|
436
|
+
|
|
437
|
+
raise Parselly::ParseError, error
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
if max_length && input.length > max_length
|
|
441
|
+
error = parse_error("Input exceeds max_length #{max_length}", { line: 1, column: 1, offset: 0 })
|
|
442
|
+
return Parselly::ParseResult.new(nil, [error]) if tolerant
|
|
443
|
+
|
|
444
|
+
raise Parselly::ParseError, error
|
|
445
|
+
end
|
|
446
|
+
|
|
284
447
|
@lexer = Parselly::Lexer.new(input)
|
|
285
448
|
begin
|
|
286
449
|
@tokens = @lexer.tokenize
|
|
287
|
-
rescue RuntimeError => e
|
|
450
|
+
rescue Parselly::ParseError, RuntimeError => e
|
|
288
451
|
if tolerant
|
|
289
452
|
@errors << parse_error_from_exception(e)
|
|
290
453
|
return Parselly::ParseResult.new(nil, @errors)
|
|
291
454
|
end
|
|
292
455
|
raise
|
|
293
456
|
end
|
|
457
|
+
|
|
458
|
+
if max_tokens && @tokens.size > max_tokens
|
|
459
|
+
error = parse_error("Input exceeds max_tokens #{max_tokens}", @tokens[max_tokens][2])
|
|
460
|
+
return Parselly::ParseResult.new(nil, [error]) if tolerant
|
|
461
|
+
|
|
462
|
+
raise Parselly::ParseError, error
|
|
463
|
+
end
|
|
464
|
+
|
|
294
465
|
preprocess_tokens!
|
|
295
466
|
@index = 0
|
|
296
467
|
@current_position = { line: 1, column: 1, offset: 0 }
|
|
297
468
|
|
|
298
469
|
if tolerant
|
|
299
470
|
ast = parse_with_recovery
|
|
300
|
-
|
|
471
|
+
ast = validate_or_recover_tolerant_ast(ast) if ast
|
|
472
|
+
ast.freeze_tree if ast && @freeze_tree
|
|
301
473
|
return Parselly::ParseResult.new(ast, @errors)
|
|
302
474
|
end
|
|
303
475
|
|
|
304
476
|
ast = do_parse
|
|
305
|
-
|
|
477
|
+
finalize_ast(ast)
|
|
478
|
+
ast.freeze_tree if @freeze_tree
|
|
306
479
|
ast
|
|
307
480
|
end
|
|
308
481
|
|
|
309
482
|
def parse_with_recovery
|
|
310
483
|
do_parse
|
|
311
484
|
rescue Parselly::ParseError, RuntimeError
|
|
312
|
-
parse_partial_ast
|
|
485
|
+
parse_selector_list_recovery || parse_partial_ast
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
def validate_or_recover_tolerant_ast(ast)
|
|
489
|
+
finalize_ast(ast)
|
|
490
|
+
ast
|
|
491
|
+
rescue Parselly::ParseError => e
|
|
492
|
+
@errors << parse_error_from_exception(e)
|
|
493
|
+
parse_selector_list_recovery(validate: true) || ast
|
|
494
|
+
end
|
|
495
|
+
|
|
496
|
+
def parse_selector_list_recovery(validate: false)
|
|
497
|
+
return nil unless @tokens && @tokens.any? { |token| token[0] == :COMMA }
|
|
498
|
+
|
|
499
|
+
eof_token = @tokens.last if @tokens.last && @tokens.last[0] == false
|
|
500
|
+
body_tokens = eof_token ? @tokens[0...-1] : @tokens
|
|
501
|
+
segments = []
|
|
502
|
+
current = []
|
|
503
|
+
|
|
504
|
+
body_tokens.each do |token|
|
|
505
|
+
if token[0] == :COMMA
|
|
506
|
+
segments << current
|
|
507
|
+
current = []
|
|
508
|
+
else
|
|
509
|
+
current << token
|
|
510
|
+
end
|
|
511
|
+
end
|
|
512
|
+
segments << current
|
|
513
|
+
|
|
514
|
+
result = Node.new(:selector_list, nil, body_tokens.first&.[](2) || { line: 1, column: 1, offset: 0 })
|
|
515
|
+
recovered = false
|
|
516
|
+
|
|
517
|
+
segments.each do |segment|
|
|
518
|
+
next if segment.empty?
|
|
519
|
+
|
|
520
|
+
begin
|
|
521
|
+
parsed = parse_from_tokens(segment + [eof_token || [false, nil, segment.last[2]]], suppress_errors: true)
|
|
522
|
+
finalize_ast(parsed) if validate
|
|
523
|
+
result.add_child(parsed)
|
|
524
|
+
recovered = true
|
|
525
|
+
rescue Parselly::ParseError, RuntimeError
|
|
526
|
+
next
|
|
527
|
+
end
|
|
528
|
+
end
|
|
529
|
+
|
|
530
|
+
recovered ? result : nil
|
|
313
531
|
end
|
|
314
532
|
|
|
315
533
|
def parse_partial_ast
|
|
@@ -343,6 +561,8 @@ ensure
|
|
|
343
561
|
end
|
|
344
562
|
|
|
345
563
|
def parse_error_from_exception(error)
|
|
564
|
+
return error.error if error.respond_to?(:error)
|
|
565
|
+
|
|
346
566
|
line = nil
|
|
347
567
|
column = nil
|
|
348
568
|
offset = nil
|
|
@@ -356,17 +576,61 @@ def parse_error_from_exception(error)
|
|
|
356
576
|
{ message: error.message, line: line, column: column, offset: offset }
|
|
357
577
|
end
|
|
358
578
|
|
|
359
|
-
def
|
|
579
|
+
def parse_error(message, position)
|
|
580
|
+
{
|
|
581
|
+
message: message,
|
|
582
|
+
line: position[:line],
|
|
583
|
+
column: position[:column],
|
|
584
|
+
offset: position[:offset]
|
|
585
|
+
}.tap do |error|
|
|
586
|
+
error[:end_line] = position[:end_line] if position.key?(:end_line)
|
|
587
|
+
error[:end_column] = position[:end_column] if position.key?(:end_column)
|
|
588
|
+
error[:end_offset] = position[:end_offset] if position.key?(:end_offset)
|
|
589
|
+
end
|
|
590
|
+
end
|
|
591
|
+
|
|
592
|
+
def token_value(token)
|
|
360
593
|
token.respond_to?(:value) ? token.value : token
|
|
361
594
|
end
|
|
362
595
|
|
|
363
|
-
def
|
|
364
|
-
token.respond_to?(:raw) ? token.raw : token
|
|
596
|
+
def token_raw(token)
|
|
597
|
+
token.respond_to?(:raw) ? token.raw : token_value(token)
|
|
598
|
+
end
|
|
599
|
+
|
|
600
|
+
def token_position(token)
|
|
601
|
+
token.respond_to?(:position) && token.position ? token.position : @current_position
|
|
602
|
+
end
|
|
603
|
+
|
|
604
|
+
def token_quote(token)
|
|
605
|
+
token.respond_to?(:quote) ? token.quote : nil
|
|
606
|
+
end
|
|
607
|
+
|
|
608
|
+
def pseudo_name(name)
|
|
609
|
+
name.to_s.downcase
|
|
610
|
+
end
|
|
611
|
+
|
|
612
|
+
def attribute_modifier_value(token)
|
|
613
|
+
modifier = token_value(token).to_s
|
|
614
|
+
normalized_modifier = modifier.downcase
|
|
615
|
+
return normalized_modifier if ATTRIBUTE_MODIFIERS.include?(normalized_modifier)
|
|
616
|
+
|
|
617
|
+
raise_syntax_error("Parse error: invalid attribute modifier '#{modifier}'", token_position(token))
|
|
618
|
+
end
|
|
619
|
+
|
|
620
|
+
def raise_syntax_error(message, position)
|
|
621
|
+
error = parse_error(message, position)
|
|
622
|
+
if @tolerant
|
|
623
|
+
@errors << error unless @suppress_errors
|
|
624
|
+
@error_index ||= [@index - 1, 0].max
|
|
625
|
+
end
|
|
626
|
+
raise Parselly::SyntaxError, error
|
|
365
627
|
end
|
|
366
628
|
|
|
367
629
|
def preprocess_tokens!
|
|
368
630
|
return if @tokens.size <= 1
|
|
369
631
|
|
|
632
|
+
mark_nth_of_tokens!
|
|
633
|
+
|
|
370
634
|
new_tokens = Array.new(@tokens.size + (@tokens.size / 2)) # Pre-allocate with conservative estimate
|
|
371
635
|
new_tokens_idx = 0
|
|
372
636
|
|
|
@@ -378,7 +642,7 @@ def preprocess_tokens!
|
|
|
378
642
|
if i < last_idx
|
|
379
643
|
next_token = @tokens[i + 1]
|
|
380
644
|
if needs_descendant?(token, next_token)
|
|
381
|
-
pos =
|
|
645
|
+
pos = next_token[2]
|
|
382
646
|
new_tokens[new_tokens_idx] = [:DESCENDANT, ' ', pos]
|
|
383
647
|
new_tokens_idx += 1
|
|
384
648
|
end
|
|
@@ -388,38 +652,129 @@ def preprocess_tokens!
|
|
|
388
652
|
@tokens = new_tokens.first(new_tokens_idx)
|
|
389
653
|
end
|
|
390
654
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
655
|
+
def mark_nth_of_tokens!
|
|
656
|
+
paren_depth = 0
|
|
657
|
+
last_idx = @tokens.size - 1
|
|
658
|
+
|
|
659
|
+
@tokens.each_with_index do |token, index|
|
|
660
|
+
case token[0]
|
|
661
|
+
when :LPAREN
|
|
662
|
+
paren_depth += 1
|
|
663
|
+
when :RPAREN
|
|
664
|
+
paren_depth -= 1 if paren_depth.positive?
|
|
665
|
+
when :IDENT
|
|
666
|
+
next unless paren_depth.positive?
|
|
667
|
+
next unless token_value(token[1]) == 'of'
|
|
668
|
+
next if index.zero? || index >= last_idx
|
|
669
|
+
|
|
670
|
+
previous_token = @tokens[index - 1]
|
|
671
|
+
next_token = @tokens[index + 1]
|
|
672
|
+
if token_gap?(previous_token, token) && token_gap?(token, next_token) &&
|
|
673
|
+
CAN_START_COMPOUND.include?(next_token[0])
|
|
674
|
+
token[0] = :OF
|
|
675
|
+
end
|
|
676
|
+
end
|
|
677
|
+
end
|
|
678
|
+
end
|
|
679
|
+
|
|
680
|
+
# Insert DESCENDANT combinator only when actual ignored input
|
|
681
|
+
# (CSS whitespace or comments) separated two compound selector tokens.
|
|
396
682
|
def needs_descendant?(current, next_tok)
|
|
397
683
|
current_type = current[0]
|
|
398
684
|
next_type = next_tok[0]
|
|
399
685
|
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
SUBCLASS_SELECTOR_END_TYPES.include?(current_type)
|
|
405
|
-
end
|
|
686
|
+
CAN_END_COMPOUND.include?(current_type) &&
|
|
687
|
+
CAN_START_COMPOUND.include?(next_type) &&
|
|
688
|
+
token_gap?(current, next_tok)
|
|
689
|
+
end
|
|
406
690
|
|
|
407
|
-
|
|
691
|
+
def token_gap?(current, next_tok)
|
|
692
|
+
current_end = current[2][:end_offset] || current[2][:offset]
|
|
693
|
+
next_tok[2][:offset] > current_end
|
|
408
694
|
end
|
|
409
695
|
|
|
410
|
-
def
|
|
696
|
+
def finalize_ast(node)
|
|
697
|
+
validate_known_pseudo_functions!(node)
|
|
698
|
+
validate_max_depth!(node) if @max_depth
|
|
699
|
+
end
|
|
700
|
+
|
|
701
|
+
def validate_known_pseudo_functions!(node)
|
|
411
702
|
return unless node.respond_to?(:children) && node.children
|
|
412
703
|
|
|
413
|
-
if node.type == :pseudo_function
|
|
414
|
-
|
|
415
|
-
if
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
704
|
+
if node.type == :pseudo_function
|
|
705
|
+
name = pseudo_name(node.value)
|
|
706
|
+
validate_nth_pseudo!(node) if NTH_PSEUDO_NAMES.include?(name)
|
|
707
|
+
validate_selector_list_pseudo!(node) if SELECTOR_LIST_PSEUDO_NAMES.include?(name)
|
|
708
|
+
validate_relative_selector_list_pseudo!(node) if RELATIVE_SELECTOR_LIST_PSEUDO_NAMES.include?(name)
|
|
709
|
+
end
|
|
710
|
+
|
|
711
|
+
node.children.compact.each { |child| validate_known_pseudo_functions!(child) }
|
|
712
|
+
end
|
|
713
|
+
|
|
714
|
+
def validate_nth_pseudo!(node)
|
|
715
|
+
child = node.children.first
|
|
716
|
+
return if child&.type == :an_plus_b
|
|
717
|
+
return if child&.type == :nth_selector_argument
|
|
718
|
+
|
|
719
|
+
raise Parselly::SyntaxError, parse_error(
|
|
720
|
+
"Parse error: invalid argument for :#{node.value}()",
|
|
721
|
+
child&.position || node.position
|
|
722
|
+
)
|
|
723
|
+
end
|
|
724
|
+
|
|
725
|
+
def validate_selector_list_pseudo!(node)
|
|
726
|
+
child = node.children.first
|
|
727
|
+
return if child&.type == :selector_list && !relative_selector_list?(child)
|
|
728
|
+
|
|
729
|
+
raise Parselly::SyntaxError, parse_error(
|
|
730
|
+
"Parse error: invalid argument for :#{node.value}()",
|
|
731
|
+
child&.position || node.position
|
|
732
|
+
)
|
|
733
|
+
end
|
|
734
|
+
|
|
735
|
+
def validate_relative_selector_list_pseudo!(node)
|
|
736
|
+
child = node.children.first
|
|
737
|
+
return if child&.type == :selector_list
|
|
738
|
+
|
|
739
|
+
raise Parselly::SyntaxError, parse_error(
|
|
740
|
+
"Parse error: invalid argument for :#{node.value}()",
|
|
741
|
+
child&.position || node.position
|
|
742
|
+
)
|
|
743
|
+
end
|
|
744
|
+
|
|
745
|
+
def relative_selector_list?(node)
|
|
746
|
+
node.type == :selector_list &&
|
|
747
|
+
node.children.any? { |child| relative_selector?(child) }
|
|
748
|
+
end
|
|
749
|
+
|
|
750
|
+
def relative_selector?(node)
|
|
751
|
+
node.type == :selector && node.children.first &&
|
|
752
|
+
node.children.first.type.to_s.end_with?('_combinator')
|
|
753
|
+
end
|
|
754
|
+
|
|
755
|
+
def validate_max_depth!(node)
|
|
756
|
+
stack = [[node, 1]]
|
|
757
|
+
|
|
758
|
+
until stack.empty?
|
|
759
|
+
current, depth = stack.pop
|
|
760
|
+
if depth > @max_depth
|
|
761
|
+
raise Parselly::ParseError, parse_error(
|
|
762
|
+
"Input exceeds max_depth #{@max_depth}",
|
|
763
|
+
current.position
|
|
764
|
+
)
|
|
420
765
|
end
|
|
766
|
+
current.children.each { |child| stack << [child, depth + 1] }
|
|
421
767
|
end
|
|
422
|
-
|
|
768
|
+
end
|
|
769
|
+
|
|
770
|
+
def normalize_pseudo_argument(name, argument)
|
|
771
|
+
return argument unless NTH_PSEUDO_NAMES.include?(pseudo_name(name))
|
|
772
|
+
return argument unless argument&.type == :selector_list
|
|
773
|
+
|
|
774
|
+
an_plus_b_value = extract_an_plus_b_value(argument)
|
|
775
|
+
return argument unless an_plus_b_value
|
|
776
|
+
|
|
777
|
+
Node.new(:an_plus_b, an_plus_b_value, argument.position, raw_value: an_plus_b_value)
|
|
423
778
|
end
|
|
424
779
|
|
|
425
780
|
def extract_an_plus_b_value(selector_list_node)
|
|
@@ -442,22 +797,20 @@ def next_token
|
|
|
442
797
|
@index += 1
|
|
443
798
|
@current_position = token_position
|
|
444
799
|
|
|
445
|
-
[token_type, token_value]
|
|
800
|
+
[token_type, parser_token_value(token_value, token_position)]
|
|
801
|
+
end
|
|
802
|
+
|
|
803
|
+
def parser_token_value(value, position)
|
|
804
|
+
if value.respond_to?(:position)
|
|
805
|
+
value.position ||= position if value.respond_to?(:position=)
|
|
806
|
+
return value
|
|
807
|
+
end
|
|
808
|
+
|
|
809
|
+
Parselly::Lexer::TokenValue.new(value: value, raw: value, position: position)
|
|
446
810
|
end
|
|
447
811
|
|
|
448
812
|
def on_error(token_id, val, vstack)
|
|
449
813
|
token_name = token_to_str(token_id) || '?'
|
|
450
814
|
pos = @current_position || { line: '?', column: '?' }
|
|
451
|
-
error
|
|
452
|
-
message: "Parse error: unexpected #{token_name} '#{val}' at #{pos[:line]}:#{pos[:column]}",
|
|
453
|
-
line: pos[:line],
|
|
454
|
-
column: pos[:column],
|
|
455
|
-
offset: pos[:offset]
|
|
456
|
-
}
|
|
457
|
-
if @tolerant
|
|
458
|
-
@errors << error unless @suppress_errors
|
|
459
|
-
@error_index ||= [@index - 1, 0].max
|
|
460
|
-
raise Parselly::ParseError, error
|
|
461
|
-
end
|
|
462
|
-
raise error[:message]
|
|
815
|
+
raise_syntax_error("Parse error: unexpected #{token_name} '#{token_value(val)}' at #{pos[:line]}:#{pos[:column]}", pos)
|
|
463
816
|
end
|