syntax_tree-css 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1188 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module CSS
5
+ # Parses CSS3 stylesheets according to https://www.w3.org/TR/css-syntax-3
6
+ # from the version dated 24 December 2021.
7
+ class Parser
8
+ # Represents any kind of error that occurs during parsing.
9
+ class ParseError < StandardError
10
+ end
11
+
12
+ # This is used to communicate between the various tokenization algorithms.
13
+ # It transports a value along with the new index.
14
+ class State
15
+ attr_reader :value, :index
16
+
17
+ def initialize(value, index)
18
+ @value = value
19
+ @index = index
20
+ end
21
+ end
22
+
23
+ # https://www.w3.org/TR/css-syntax-3/#digit
24
+ DIGIT = "[0-9]"
25
+
26
+ # https://www.w3.org/TR/css-syntax-3/#uppercase-letter
27
+ UPPERCASE_LETTER = "[A-Z]"
28
+
29
+ # https://www.w3.org/TR/css-syntax-3/#lowercase-letter
30
+ LOWERCASE_LETTER = "[a-z]"
31
+
32
+ # https://www.w3.org/TR/css-syntax-3/#letter
33
+ LETTER = "[#{UPPERCASE_LETTER}#{LOWERCASE_LETTER}]"
34
+
35
+ # https://www.w3.org/TR/css-syntax-3/#non-ascii-code-point
36
+ NONASCII = "[\u{80}-\u{10FFFF}]"
37
+
38
+ # https://www.w3.org/TR/css-syntax-3/#ident-start-code-point
39
+ IDENT_START = "[#{LETTER}#{NONASCII}_]"
40
+
41
+ # https://www.w3.org/TR/css-syntax-3/#ident-code-point
42
+ IDENT = "[#{IDENT_START}#{DIGIT}-]"
43
+
44
+ # https://www.w3.org/TR/css-syntax-3/#non-printable-code-point
45
+ NON_PRINTABLE = "[\x00-\x08\x0B\x0E-\x1F\x7F]"
46
+
47
+ # https://www.w3.org/TR/css-syntax-3/#whitespace
48
+ WHITESPACE = "[\n\t ]"
49
+
50
+ attr_reader :source, :errors
51
+
52
+ def initialize(source)
53
+ @source = preprocess(source)
54
+ @errors = []
55
+ end
56
+
57
+ def error?
58
+ errors.any?
59
+ end
60
+
61
+ #-------------------------------------------------------------------------
62
+ # 5.3. Parser Entry Points
63
+ # https://www.w3.org/TR/css-syntax-3/#parser-entry-points
64
+ #-------------------------------------------------------------------------
65
+
66
+ # 5.3.1. Parse something according to a CSS grammar
67
+ # https://www.w3.org/TR/css-syntax-3/#parse-grammar
68
+ def parse(grammar: :stylesheet)
69
+ case grammar
70
+ in :stylesheet
71
+ parse_css_stylesheet
72
+ else
73
+ raise ArgumentError, "Unsupported grammar: #{grammar}"
74
+ end
75
+ end
76
+
77
+ # 5.3.3. Parse a stylesheet
78
+ # https://www.w3.org/TR/css-syntax-3/#parse-stylesheet
79
+ def parse_stylesheet
80
+ tokens = tokenize
81
+ rules = consume_rule_list(tokens, top_level: true)
82
+
83
+ location =
84
+ if rules.any?
85
+ rules.first.location.to(rules.last.location)
86
+ else
87
+ tokens.reverse_each.first.location
88
+ end
89
+
90
+ StyleSheet.new(rules: rules, location: location)
91
+ end
92
+
93
+ # 5.3.4. Parse a list of rules
94
+ # https://www.w3.org/TR/css-syntax-3/#parse-list-of-rules
95
+ def parse_rule_list
96
+ consume_rule_list(tokenize, top_level: false)
97
+ end
98
+
99
+ # 5.3.5. Parse a rule
100
+ # https://www.w3.org/TR/css-syntax-3/#parse-rule
101
+ def parse_rule
102
+ # 1.
103
+ tokens = tokenize
104
+
105
+ # 2.
106
+ loop do
107
+ case tokens.peek
108
+ in CommentToken | WhitespaceToken
109
+ tokens.next
110
+ else
111
+ break
112
+ end
113
+ end
114
+
115
+ # 3.
116
+ rule = nil
117
+
118
+ case tokens.peek
119
+ in EOFToken
120
+ return ParseError.new("Unexpected end of input parsing rule")
121
+ in AtKeywordToken
122
+ rule = consume_at_rule(tokens)
123
+ else
124
+ rule = consume_qualified_rule(tokens)
125
+ return ParseError.new("Expected a rule at #{tokens.peek.location.start_char}") unless rule
126
+ end
127
+
128
+ # 4.
129
+ loop do
130
+ case tokens.peek
131
+ in CommentToken | WhitespaceToken
132
+ tokens.next
133
+ else
134
+ break
135
+ end
136
+ end
137
+
138
+ # 5.
139
+ case tokens.peek
140
+ in EOFToken
141
+ rule
142
+ else
143
+ ParseError.new("Expected end of input parsing rule")
144
+ end
145
+ end
146
+
147
+ # 5.3.6. Parse a declaration
148
+ # https://www.w3.org/TR/css-syntax-3/#parse-declaration
149
+ def parse_declaration
150
+ # 1.
151
+ tokens = tokenize
152
+
153
+ # 2.
154
+ loop do
155
+ case tokens.peek
156
+ in CommentToken | WhitespaceToken
157
+ tokens.next
158
+ else
159
+ break
160
+ end
161
+ end
162
+
163
+ # 3.
164
+ case tokens.peek
165
+ in IdentToken
166
+ # do nothing
167
+ in EOFToken
168
+ return ParseError.new("Unexpected end of input parsing declaration")
169
+ else
170
+ return ParseError.new("Expected an identifier at #{tokens.peek.location.start_char}")
171
+ end
172
+
173
+ # 4.
174
+ if (declaration = consume_declaration(tokens))
175
+ declaration
176
+ else
177
+ ParseError.new("Expected a declaration at #{tokens.peek.location.start_char}")
178
+ end
179
+ end
180
+
181
+ # 5.3.8. Parse a list of declarations
182
+ # https://www.w3.org/TR/css-syntax-3/#parse-list-of-declarations
183
+ def parse_declaration_list
184
+ consume_declaration_list(tokenize)
185
+ end
186
+
187
+ # 5.3.9. Parse a component value
188
+ # https://www.w3.org/TR/css-syntax-3/#parse-component-value
189
+ def parse_component_value
190
+ # 1.
191
+ tokens = tokenize
192
+
193
+ # 2.
194
+ loop do
195
+ case tokens.peek
196
+ in CommentToken | WhitespaceToken
197
+ tokens.next
198
+ else
199
+ break
200
+ end
201
+ end
202
+
203
+ # 3.
204
+ if tokens.peek.is_a?(EOFToken)
205
+ return ParseError.new("Unexpected end of input parsing component value")
206
+ end
207
+
208
+ # 4.
209
+ value = consume_component_value(tokens)
210
+
211
+ # 5.
212
+ loop do
213
+ case tokens.peek
214
+ in CommentToken | WhitespaceToken
215
+ tokens.next
216
+ else
217
+ break
218
+ end
219
+ end
220
+
221
+ # 6.
222
+ if tokens.peek.is_a?(EOFToken)
223
+ value
224
+ else
225
+ ParseError.new("Expected end of input parsing component value")
226
+ end
227
+ end
228
+
229
+ # 5.3.10. Parse a list of component values
230
+ # https://www.w3.org/TR/css-syntax-3/#parse-list-of-component-values
231
+ def parse_component_values
232
+ tokens = tokenize
233
+ values = []
234
+
235
+ values << consume_component_value(tokens) until tokens.peek.is_a?(EOFToken)
236
+ values
237
+ end
238
+
239
+ private
240
+
241
+ #-------------------------------------------------------------------------
242
+ # 3. Tokenizing and Parsing CSS
243
+ # https://www.w3.org/TR/css-syntax-3/#tokenizing-and-parsing
244
+ #-------------------------------------------------------------------------
245
+
246
+ # 3.3. Preprocessing the input stream
247
+ # https://www.w3.org/TR/css-syntax-3/#input-preprocessing
248
+ def preprocess(input)
249
+ input.gsub(/\r\n?|\f/, "\n").gsub(/\x00/, "\u{FFFD}")
250
+
251
+ # We should also be replacing surrogate characters in the input stream
252
+ # with the replacement character, but it's not entirely possible to do
253
+ # that if the string is already UTF-8 encoded. Until we dive further
254
+ # into encoding and handle fallback encodings, we'll just skip this.
255
+ # .gsub(/[\u{D800}-\u{DFFF}]/, "\u{FFFD}")
256
+ end
257
+
258
+ #-------------------------------------------------------------------------
259
+ # 4. Tokenization
260
+ # https://www.w3.org/TR/css-syntax-3/#tokenization
261
+ #-------------------------------------------------------------------------
262
+
263
+ # Create an enumerator of tokens from the source.
264
+ def tokenize
265
+ Enumerator.new do |enum|
266
+ index = 0
267
+
268
+ while index < source.length
269
+ state = consume_token(index)
270
+
271
+ enum << state.value
272
+ index = state.index
273
+ end
274
+
275
+ enum << EOFToken[index]
276
+ end
277
+ end
278
+
279
+ # 4.3.1. Consume a token
280
+ # https://www.w3.org/TR/css-syntax-3/#consume-token
281
+ def consume_token(index)
282
+ case source[index..]
283
+ when %r{\A/\*}
284
+ consume_comment(index)
285
+ when /\A#{WHITESPACE}+/o
286
+ State.new(WhitespaceToken.new(value: $&, location: index...(index + $&.length)), index + $&.length)
287
+ when /\A["']/
288
+ consume_string(index, $&)
289
+ when /\A#/
290
+ if ident?(source[index + 1]) || valid_escape?(source[index + 1], source[index + 2])
291
+ state = consume_ident_sequence(index + 1)
292
+
293
+ State.new(
294
+ HashToken.new(
295
+ value: state.value,
296
+ type: start_ident_sequence?(index + 1) ? "id" : "unrestricted",
297
+ location: index...state.index
298
+ ),
299
+ state.index
300
+ )
301
+ else
302
+ State.new(DelimToken.new(value: "#", location: index...(index + 1)), index + 1)
303
+ end
304
+ when /\A\(/
305
+ State.new(OpenParenToken.new(location: index...(index + 1)), index + 1)
306
+ when /\A\)/
307
+ State.new(CloseParenToken.new(location: index...(index + 1)), index + 1)
308
+ when /\A\+/
309
+ if start_number?(index + 1)
310
+ consume_numeric(index)
311
+ else
312
+ State.new(DelimToken.new(value: "+", location: index...(index + 1)), index + 1)
313
+ end
314
+ when /\A,/
315
+ State.new(CommaToken.new(location: index...(index + 1)), index + 1)
316
+ when /\A-/
317
+ if start_number?(index)
318
+ consume_numeric(index)
319
+ elsif source[index + 1] == "-" && source[index + 2] == ">"
320
+ State.new(CDCToken.new(location: index...(index + 3)), index + 3)
321
+ elsif start_ident_sequence?(index)
322
+ consume_ident_like(index)
323
+ else
324
+ State.new(DelimToken.new(value: "-", location: index...(index + 1)), index + 1)
325
+ end
326
+ when /\A\./
327
+ if start_number?(index)
328
+ consume_numeric(index)
329
+ else
330
+ State.new(DelimToken.new(value: ".", location: index...(index + 1)), index + 1)
331
+ end
332
+ when /\A:/
333
+ State.new(ColonToken.new(location: index...(index + 1)), index + 1)
334
+ when /\A;/
335
+ State.new(SemicolonToken.new(location: index...(index + 1)), index + 1)
336
+ when /\A</
337
+ if source[index...(index + 4)] == "<!--"
338
+ State.new(CDOToken.new(location: index...(index + 4)), index + 4)
339
+ else
340
+ State.new(DelimToken.new(value: "<", location: index...(index + 1)), index + 1)
341
+ end
342
+ when /\A@/
343
+ if start_ident_sequence?(index + 1)
344
+ state = consume_ident_sequence(index + 1)
345
+ State.new(AtKeywordToken.new(value: state.value, location: index...state.index), state.index)
346
+ else
347
+ State.new(DelimToken.new(value: "@", location: index...(index + 1)), index + 1)
348
+ end
349
+ when /\A\[/
350
+ State.new(OpenSquareToken.new(location: index...(index + 1)), index + 1)
351
+ when %r{\A\\}
352
+ if valid_escape?(source[index], source[index + 1])
353
+ consume_ident_like(index)
354
+ else
355
+ errors << ParseError.new("invalid escape at #{index}")
356
+ State.new(DelimToken.new(value: "\\", location: index...(index + 1)), index + 1)
357
+ end
358
+ when /\A\]/
359
+ State.new(CloseSquareToken.new(location: index...(index + 1)), index + 1)
360
+ when /\A\{/
361
+ State.new(OpenCurlyToken.new(location: index...(index + 1)), index + 1)
362
+ when /\A\}/
363
+ State.new(CloseCurlyToken.new(location: index...(index + 1)), index + 1)
364
+ when /\A#{DIGIT}/o
365
+ consume_numeric(index)
366
+ when /\A#{IDENT_START}/o
367
+ consume_ident_like(index)
368
+ when "", nil
369
+ State.new(EOFToken[index], index)
370
+ else
371
+ State.new(DelimToken.new(value: source[index], location: index...(index + 1)), index + 1)
372
+ end
373
+ end
374
+
375
+ # 4.3.2. Consume comments
376
+ # https://www.w3.org/TR/css-syntax-3/#consume-comments
377
+ def consume_comment(index)
378
+ ending = source.index("*/", index + 2)
379
+
380
+ if ending.nil?
381
+ errors << ParseError.new("unterminated comment starting at #{index}")
382
+ location = index...source.length
383
+ State.new(CommentToken.new(value: source[location], location: location), source.length)
384
+ else
385
+ location = index...(ending + 2)
386
+ State.new(CommentToken.new(value: source[location], location: location), ending + 2)
387
+ end
388
+ end
389
+
390
+ # 4.3.3. Consume a numeric token
391
+ # https://www.w3.org/TR/css-syntax-3/#consume-numeric-token
392
+ def consume_numeric(index)
393
+ start = index
394
+ state = consume_number(index)
395
+
396
+ value, type = state.value
397
+ index = state.index
398
+
399
+ if start_ident_sequence?(index)
400
+ state = consume_ident_sequence(index)
401
+ State.new(DimensionToken.new(value: value, unit: state.value, type: type, location: start...index), state.index)
402
+ elsif source[index] == "%"
403
+ index += 1
404
+ State.new(PercentageToken.new(value: value, type: type, location: start...index), index)
405
+ else
406
+ State.new(NumberToken.new(value: value, type: type, location: start...index), index)
407
+ end
408
+ end
409
+
410
+ # 4.3.4. Consume an ident-like token
411
+ # https://www.w3.org/TR/css-syntax-3/#consume-ident-like-token
412
+ def consume_ident_like(index)
413
+ start = index
414
+ state = consume_ident_sequence(index)
415
+
416
+ index = state.index
417
+ string = state.value
418
+
419
+ if (string.casecmp("url") == 0) && (source[index] == "(")
420
+ index += 1 # (
421
+
422
+ # While the next two input code points are whitespace, consume the
423
+ # next input code point.
424
+ while whitespace?(source[index]) && whitespace?(source[index + 1])
425
+ index += 1
426
+ end
427
+
428
+ if /["']/.match?(source[index]) || (whitespace?(source[index]) && /["']/.match?(source[index + 1]))
429
+ State.new(FunctionToken.new(value: string, location: start...index), index)
430
+ else
431
+ consume_url(start)
432
+ end
433
+ elsif source[index] == "("
434
+ index += 1
435
+ State.new(FunctionToken.new(value: string, location: start...index), index)
436
+ elsif (string.casecmp("u") == 0) && (state = consume_urange(index - 1))
437
+ state
438
+ else
439
+ State.new(IdentToken.new(value: string, location: start...index), index)
440
+ end
441
+ end
442
+
443
+ # 4.3.5. Consume a string token
444
+ # https://www.w3.org/TR/css-syntax-3/#consume-string-token
445
+ def consume_string(index, quote)
446
+ start = index
447
+ index += 1
448
+ value = +""
449
+
450
+ while index <= source.length
451
+ case source[index]
452
+ when quote
453
+ return State.new(StringToken.new(value: value, location: start...(index + 1)), index + 1)
454
+ when nil
455
+ errors << ParseError.new("unterminated string at #{start}")
456
+ return State.new(StringToken.new(value: value, location: start...index), index)
457
+ when "\n"
458
+ errors << ParseError.new("newline in string at #{index}")
459
+ return State.new(BadStringToken.new(value: value, location: start...index), index)
460
+ when "\\"
461
+ index += 1
462
+
463
+ if index == source.length
464
+ next
465
+ elsif source[index] == "\n"
466
+ value << source[index]
467
+ index += 1
468
+ else
469
+ state = consume_escaped_code_point(index)
470
+ value << state.value
471
+ index = state.index
472
+ end
473
+ else
474
+ value << source[index]
475
+ index += 1
476
+ end
477
+ end
478
+ end
479
+
480
+ # 4.3.6. Consume a url token
481
+ # https://www.w3.org/TR/css-syntax-3/#consume-url-token
482
+ def consume_url(index)
483
+ # 1.
484
+ value = +""
485
+
486
+ # 2.
487
+ start = index
488
+ index += 4 # url(
489
+ index += 1 while whitespace?(source[index])
490
+
491
+ # 3.
492
+ while index <= source.length
493
+ case source[index..]
494
+ when /\A\)/
495
+ return State.new(URLToken.new(value: value, location: start...(index + 1)), index + 1)
496
+ when "", nil
497
+ errors << ParseError.new("unterminated url at #{start}")
498
+ return State.new(URLToken.new(value: value, location: start...index), index)
499
+ when /\A#{WHITESPACE}+/o
500
+ index += $&.length
501
+
502
+ case source[index]
503
+ when ")"
504
+ return State.new(URLToken.new(value: value, location: start...(index + 1)), index + 1)
505
+ when nil
506
+ errors << ParseError.new("unterminated url at #{start}")
507
+ return State.new(URLToken.new(value: value, location: start...index), index)
508
+ else
509
+ errors << ParseError.new("invalid url at #{start}")
510
+ state = consume_bad_url_remnants(index)
511
+ return State.new(BadURLToken.new(value: value + state.value, location: start...state.index), state.index)
512
+ end
513
+ when /\A["'(]|#{NON_PRINTABLE}/o
514
+ errors << ParseError.new("invalid character in url at #{index}")
515
+ state = consume_bad_url_remnants(index)
516
+ return State.new(BadURLToken.new(value: value + state.value, location: start...state.index), state.index)
517
+ when %r{\A\\}
518
+ if valid_escape?(source[index], source[index + 1])
519
+ state = consume_escaped_code_point(index + 1)
520
+ value << state.value
521
+ index = state.index
522
+ else
523
+ errors << ParseError.new("invalid escape at #{index}")
524
+ state = consume_bad_url_remnants(index)
525
+ return State.new(BadURLToken.new(value: value + state.value, location: start...state.index), state.index)
526
+ end
527
+ else
528
+ value << source[index]
529
+ index += 1
530
+ end
531
+ end
532
+ end
533
+
534
+ # 4.3.7. Consume an escaped code point
535
+ # https://www.w3.org/TR/css-syntax-3/#consume-escaped-code-point
536
+ def consume_escaped_code_point(index)
537
+ replacement = "\u{FFFD}"
538
+
539
+ if /\A(\h{1,6})#{WHITESPACE}?/o =~ source[index..]
540
+ ord = $1.to_i(16)
541
+
542
+ if ord == 0 || (0xD800..0xDFFF).cover?(ord) || ord > 0x10FFFF
543
+ State.new(replacement, index + $&.length)
544
+ else
545
+ State.new(ord.chr(Encoding::UTF_8), index + $&.length)
546
+ end
547
+ elsif index == source.length
548
+ State.new(replacement, index)
549
+ else
550
+ State.new(source[index], index + 1)
551
+ end
552
+ end
553
+
554
+ # 4.3.8. Check if two code points are a valid escape
555
+ # https://www.w3.org/TR/css-syntax-3/#starts-with-a-valid-escape
556
+ def valid_escape?(left, right)
557
+ (left == "\\") && (right != "\n")
558
+ end
559
+
560
+ # 4.3.9. Check if three code points would start an ident sequence
561
+ # https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier
562
+ def start_ident_sequence?(index)
563
+ first, second, third = source[index...(index + 3)].chars
564
+
565
+ case first
566
+ when "-"
567
+ (/#{IDENT_START}/o.match?(second) || (second == "-")) ||
568
+ valid_escape?(second, third)
569
+ when /#{IDENT_START}/o
570
+ true
571
+ when "\\"
572
+ valid_escape?(first, second)
573
+ else
574
+ false
575
+ end
576
+ end
577
+
578
+ # 4.3.10. Check if three code points would start a number
579
+ # https://www.w3.org/TR/css-syntax-3/#starts-with-a-number
580
+ def start_number?(index)
581
+ first, second, third = source[index...(index + 3)].chars
582
+
583
+ case first
584
+ when "+", "-"
585
+ digit?(second) || (second == "." && digit?(third))
586
+ when "."
587
+ digit?(second)
588
+ when /#{DIGIT}/o
589
+ true
590
+ else
591
+ false
592
+ end
593
+ end
594
+
595
+ # 4.3.11. Consume an ident sequence
596
+ # https://www.w3.org/TR/css-syntax-3/#consume-an-ident-sequence
597
+ def consume_ident_sequence(index)
598
+ result = +""
599
+
600
+ while index <= source.length
601
+ if ident?(source[index])
602
+ result << source[index]
603
+ index += 1
604
+ elsif valid_escape?(source[index], source[index + 1])
605
+ state = consume_escaped_code_point(index + 1)
606
+ result << state.value
607
+ index = state.index
608
+ else
609
+ return State.new(result, index)
610
+ end
611
+ end
612
+ end
613
+
614
+ # 4.3.12. Consume a number
615
+ # https://www.w3.org/TR/css-syntax-3/#consume-a-number
616
+ def consume_number(index)
617
+ # 1.
618
+ repr = +""
619
+ type = "integer"
620
+
621
+ # 2.
622
+ if /[+-]/.match?(source[index])
623
+ repr << source[index]
624
+ index += 1
625
+ end
626
+
627
+ # 3.
628
+ while digit?(source[index])
629
+ repr << source[index]
630
+ index += 1
631
+ end
632
+
633
+ # 4.
634
+ if source[index] == "." && digit?(source[index + 1])
635
+ repr += source[index..(index + 1)]
636
+ index += 2
637
+ type = "number"
638
+
639
+ while digit?(source[index])
640
+ repr << source[index]
641
+ index += 1
642
+ end
643
+ end
644
+
645
+ # 5.
646
+ if /\A[Ee][+-]?#{DIGIT}+/o =~ source[index..]
647
+ repr += $&
648
+ index += $&.length
649
+ type = "number"
650
+ end
651
+
652
+ # 6., 7.
653
+ State.new([convert_to_number(repr), type], index)
654
+ end
655
+
656
+ # 4.3.13. Convert a string to a number
657
+ # https://www.w3.org/TR/css-syntax-3/#convert-a-string-to-a-number
658
+ def convert_to_number(value)
659
+ pattern = %r{
660
+ \A
661
+ (?<sign>[+-]?)
662
+ (?<integer>#{DIGIT}*)
663
+ (?<decimal>\.?)
664
+ (?<fractional>#{DIGIT}*)
665
+ (?<exponent_indicator>[Ee]?)
666
+ (?<exponent_sign>[+-]?)
667
+ (?<exponent>#{DIGIT}*)
668
+ \z
669
+ }ox
670
+
671
+ if (match = pattern.match(value))
672
+ s = match[:sign] == "-" ? -1 : 1
673
+ i = match[:integer].to_i
674
+ f = 0
675
+ d = 0
676
+
677
+ unless match[:fractional].empty?
678
+ f = match[:fractional].to_i
679
+ d = match[:fractional].length
680
+ end
681
+
682
+ t = match[:exponent_sign] == "-" ? -1 : 1
683
+ e = match[:exponent].to_i
684
+
685
+ s * (i + f * 10**(-d)) * 10**(t * e)
686
+ else
687
+ raise ParseError, "convert_to_number called with invalid value: #{value}"
688
+ end
689
+ end
690
+
691
+ # 4.3.14. Consume the remnants of a bad url
692
+ # https://www.w3.org/TR/css-syntax-3/#consume-remnants-of-bad-url
693
+ def consume_bad_url_remnants(index)
694
+ value = +""
695
+
696
+ while index <= source.length
697
+ case source[index..]
698
+ when "", nil
699
+ return State.new(value, index)
700
+ when /\A\)/
701
+ value << ")"
702
+ return State.new(value, index + 1)
703
+ else
704
+ if valid_escape?(source[index], source[index + 1])
705
+ state = consume_escaped_code_point(index)
706
+ value << state.value
707
+ index = state.index
708
+ else
709
+ value << source[index]
710
+ index += 1
711
+ end
712
+ end
713
+ end
714
+ end
715
+
716
+ # https://www.w3.org/TR/css-syntax-3/#digit
717
+ def digit?(value)
718
+ /#{DIGIT}/o.match?(value)
719
+ end
720
+
721
+ # https://www.w3.org/TR/css-syntax-3/#ident-code-point
722
+ def ident?(value)
723
+ /#{IDENT}/o.match?(value)
724
+ end
725
+
726
+ # https://www.w3.org/TR/css-syntax-3/#whitespace
727
+ def whitespace?(value)
728
+ /#{WHITESPACE}/o.match?(value)
729
+ end
730
+
731
+ #-------------------------------------------------------------------------
732
+ # 5. Parsing
733
+ # https://www.w3.org/TR/css-syntax-3/#parsing
734
+ #-------------------------------------------------------------------------
735
+
736
+ # 5.4.1. Consume a list of rules
737
+ # https://www.w3.org/TR/css-syntax-3/#consume-list-of-rules
738
+ def consume_rule_list(tokens, top_level: true)
739
+ rules = []
740
+
741
+ loop do
742
+ case tokens.peek
743
+ in CommentToken | WhitespaceToken
744
+ tokens.next
745
+ in EOFToken
746
+ return rules
747
+ in CDCToken | CDOToken
748
+ if top_level
749
+ tokens.next
750
+ else
751
+ rule = consume_qualified_rule(tokens)
752
+ rules << rule if rule
753
+ end
754
+ in AtKeywordToken
755
+ rules << consume_at_rule(tokens)
756
+ else
757
+ rule = consume_qualified_rule(tokens)
758
+ rules << rule if rule
759
+ end
760
+ end
761
+ end
762
+
763
+ # 5.4.2. Consume an at-rule
764
+ # https://www.w3.org/TR/css-syntax-3/#consume-at-rule
765
+ def consume_at_rule(tokens)
766
+ name_token = tokens.next
767
+ prelude = []
768
+ block = nil
769
+
770
+ loop do
771
+ case tokens.peek
772
+ in SemicolonToken[location:]
773
+ tokens.next
774
+ return AtRule.new(name: name_token.value, prelude: prelude, block: block, location: name_token.location.to(location))
775
+ in EOFToken[location:]
776
+ errors << ParseError.new("Unexpected EOF while parsing at-rule")
777
+ return AtRule.new(name: name_token.value, prelude: prelude, block: block, location: name_token.location.to(location))
778
+ in OpenCurlyToken
779
+ block = consume_simple_block(tokens)
780
+ return AtRule.new(name: name_token.value, prelude: prelude, block: block, location: name_token.location.to(block.location))
781
+ else
782
+ prelude << consume_component_value(tokens)
783
+ end
784
+ end
785
+ end
786
+
787
+ # 5.4.3. Consume a qualified rule
788
+ # https://www.w3.org/TR/css-syntax-3/#consume-qualified-rule
789
+ def consume_qualified_rule(tokens)
790
+ prelude = []
791
+ block = nil
792
+
793
+ loop do
794
+ case tokens.peek
795
+ in EOFToken
796
+ errors << ParseError.new("Unexpected EOF while parsing qualified rule")
797
+ return nil
798
+ in OpenCurlyToken
799
+ block = consume_simple_block(tokens)
800
+ location = prelude.any? ? prelude.first.location.to(block.location) : block.location
801
+ return QualifiedRule.new(prelude: prelude, block: block, location: location)
802
+ else
803
+ prelude << consume_component_value(tokens)
804
+ end
805
+ end
806
+ end
807
+
808
+ # 5.4.4. Consume a style block’s contents
809
+ # https://www.w3.org/TR/css-syntax-3/#consume-style-block
810
+ def consume_style_block_contents(tokens)
811
+ declarations = []
812
+ rules = []
813
+
814
+ loop do
815
+ case tokens.peek
816
+ in SemicolonToken | WhitespaceToken
817
+ tokens.next
818
+ in EOFToken
819
+ tokens.next
820
+ return declarations + rules
821
+ in AtKeywordToken
822
+ rules << consume_at_rule(tokens)
823
+ in IdentToken
824
+ list = [tokens.next]
825
+
826
+ loop do
827
+ case tokens.peek
828
+ in EOFToken
829
+ list << tokens.next
830
+ break
831
+ in SemicolonToken
832
+ list << tokens.next
833
+ list << EOFToken[list.last.location.end_char]
834
+ break
835
+ else
836
+ list << consume_component_value(tokens)
837
+ end
838
+ end
839
+
840
+ declaration = consume_declaration(list.to_enum)
841
+ declarations << declaration if declaration
842
+ in DelimToken[value: "&"]
843
+ rule = consume_qualified_rule(tokens)
844
+ rules << rule if rule
845
+ in { location: }
846
+ errors << ParseError.new("Unexpected token while parsing style block at #{location.start_char}")
847
+
848
+ until %i[semicolon EOF].include?(tokens.peek.type)
849
+ consume_component_value(tokens)
850
+ end
851
+ end
852
+ end
853
+ end
854
+
855
+ # 5.4.5. Consume a list of declarations
856
+ # https://www.w3.org/TR/css-syntax-3/#consume-list-of-declarations
857
+ def consume_declaration_list(tokens)
858
+ declarations = []
859
+
860
+ loop do
861
+ case tokens.peek
862
+ in SemicolonToken | WhitespaceToken
863
+ tokens.next
864
+ in EOFToken
865
+ tokens.next
866
+ return declarations
867
+ in AtKeywordToken
868
+ declarations << consume_at_rule(tokens)
869
+ in IdentToken
870
+ list = [tokens.next]
871
+
872
+ loop do
873
+ case tokens.peek
874
+ in EOFToken | SemicolonToken
875
+ break
876
+ else
877
+ list << consume_component_value(tokens)
878
+ end
879
+ end
880
+
881
+ if tokens.peek.is_a?(EOFToken)
882
+ list << tokens.next
883
+
884
+ declaration = consume_declaration(list.to_enum)
885
+ declarations << declaration if declaration
886
+
887
+ return declarations
888
+ else
889
+ tokens.next
890
+ list << EOFToken[list.last.location.end_char]
891
+
892
+ declaration = consume_declaration(list.to_enum)
893
+ declarations << declaration if declaration
894
+ end
895
+ else
896
+ errors << ParseError.new("Unexpected token while parsing declaration list at #{tokens.peek.location.start_char}")
897
+
898
+ loop do
899
+ case tokens.peek
900
+ in EOFToken | SemicolonToken
901
+ break
902
+ else
903
+ consume_component_value(tokens)
904
+ end
905
+ end
906
+ end
907
+ end
908
+ end
909
+
910
+ # 5.4.6. Consume a declaration
911
+ # https://www.w3.org/TR/css-syntax-3/#consume-declaration
912
+ def consume_declaration(tokens)
913
+ name = tokens.next
914
+ value = []
915
+ important = false
916
+
917
+ # 1.
918
+ loop do
919
+ case tokens.peek
920
+ in CommentToken | WhitespaceToken
921
+ tokens.next
922
+ else
923
+ break
924
+ end
925
+ end
926
+
927
+ # 2.
928
+ case tokens.peek
929
+ in ColonToken
930
+ tokens.next
931
+ else
932
+ errors << ParseError.new("Expected colon at #{tokens.peek.location.start_char}")
933
+ return
934
+ end
935
+
936
+ # 3.
937
+ loop do
938
+ case tokens.peek
939
+ in CommentToken | WhitespaceToken
940
+ tokens.next
941
+ else
942
+ break
943
+ end
944
+ end
945
+
946
+ # 4.
947
+ value << consume_component_value(tokens) until tokens.peek.is_a?(EOFToken)
948
+
949
+ # 5.
950
+ case value.reject { |token| token.is_a?(WhitespaceToken) || token.is_a?(CommentToken) }[-2..]
951
+ in [DelimToken[value: "!"] => first, IdentToken[value: /\Aimportant\z/i] => second]
952
+ value.delete(first)
953
+ value.delete(second)
954
+ important = true
955
+ else
956
+ end
957
+
958
+ # 6.
959
+ loop do
960
+ case value[-1]
961
+ in CommentToken | WhitespaceToken
962
+ value.pop
963
+ else
964
+ break
965
+ end
966
+ end
967
+
968
+ # 7.
969
+ location = name.location
970
+ location = location.to(value.last.location) if value.any?
971
+ Declaration.new(name: name.value, value: value, important: important, location: location)
972
+ end
973
+
974
+ # 5.4.7. Consume a component value
975
+ # https://www.w3.org/TR/css-syntax-3/#consume-component-value
976
+ def consume_component_value(tokens)
977
+ case tokens.peek
978
+ in OpenCurlyToken | OpenSquareToken | OpenParenToken
979
+ consume_simple_block(tokens)
980
+ in FunctionToken
981
+ consume_function(tokens)
982
+ else
983
+ tokens.next
984
+ end
985
+ end
986
+
987
+ # 5.4.8. Consume a simple block
988
+ # https://www.w3.org/TR/css-syntax-3/#consume-simple-block
989
+ def consume_simple_block(tokens)
990
+ token = tokens.next
991
+ ending = {
992
+ OpenParenToken => CloseParenToken,
993
+ OpenSquareToken => CloseSquareToken,
994
+ OpenCurlyToken => CloseCurlyToken
995
+ }[token.class]
996
+
997
+ value = []
998
+
999
+ loop do
1000
+ case tokens.peek
1001
+ when ending
1002
+ location = token.location.to(tokens.next.location)
1003
+ return SimpleBlock.new(token: token.value, value: value, location: location)
1004
+ when EOFToken
1005
+ errors << ParseError.new("Unexpected EOF while parsing simple block at #{token.location.start_char}")
1006
+ return SimpleBlock.new(token: token.value, value: value, location: token.location.to(tokens.peek.location))
1007
+ else
1008
+ value << consume_component_value(tokens)
1009
+ end
1010
+ end
1011
+ end
1012
+
1013
+ # 5.4.9. Consume a function
1014
+ # https://www.w3.org/TR/css-syntax-3/#consume-function
1015
+ def consume_function(tokens)
1016
+ name_token = tokens.next
1017
+ value = []
1018
+
1019
+ loop do
1020
+ case tokens.peek
1021
+ in CloseParenToken[location:]
1022
+ tokens.next
1023
+ return Function.new(name: name_token.value, value: value, location: name_token.location.to(location))
1024
+ in EOFToken[location:]
1025
+ errors << ParseError.new("Unexpected EOF while parsing function at #{name_token.location.start_char}")
1026
+ return Function.new(name: name_token.value, value: value, location: name_token.location.to(location))
1027
+ else
1028
+ value << consume_component_value(tokens)
1029
+ end
1030
+ end
1031
+ end
1032
+
1033
+ #-------------------------------------------------------------------------
1034
+ # 7. The Unicode-Range microsyntax
1035
+ # https://www.w3.org/TR/css-syntax-3/#urange
1036
+ #-------------------------------------------------------------------------
1037
+
1038
+ # 7.1. The <urange> type
1039
+ # https://www.w3.org/TR/css-syntax-3/#urange-syntax
1040
+ def consume_urange(index)
1041
+ start = index
1042
+ index += 1 # to move past the "u"
1043
+
1044
+ # At this point we've already consumed the "u". We need to gather up a
1045
+ # couple of component values to see if it matches the grammar first,
1046
+ # before we concatenate all of the representations together.
1047
+ #
1048
+ # To do this, we're going to build a little state machine. It's going to
1049
+ # walk through with each input. If we receive an input for which there
1050
+ # isn't a transition from the current state and the current state is not
1051
+ # a final state, then we exit. Otherwise if it is a final state, we
1052
+ # attempt to parse a urange token from the concatenation of the values
1053
+ # of the tokens.
1054
+ #
1055
+ # ┌───┐ ┌───┐ ── ? ──────> ┌───┐ ──┐
1056
+ # ──> │ 1 │ ── + ──> │ 2 │ ── ident ──> │|3|│ ?
1057
+ # └───┘ └───┘ ┌───> └───┘ <─┘
1058
+ # ││ │
1059
+ # │└─── dimension ───────────┘
1060
+ # └──── number ─────> ┌───┐ ┌───┐ ──┐
1061
+ # ┌─── dimension ─── │|4|│ ── ? ──> │|5|│ ?
1062
+ # │ ┌── number ─ └───┘ └───┘ <─┘
1063
+ # V V
1064
+ # ┌───┐ ┌───┐
1065
+ # │|6|│ │|7|│
1066
+ # └───┘ └───┘
1067
+ #
1068
+ tokens = []
1069
+ box = 1
1070
+
1071
+ loop do
1072
+ state = consume_token(index)
1073
+ box =
1074
+ case [box, state.value]
1075
+ in [1, DelimToken[value: "+"]] then 2
1076
+ in [1, DimensionToken] then 3
1077
+ in [1, NumberToken] then 4
1078
+ in [2, DelimToken[value: "?"]] then 3
1079
+ in [2, IdentToken] then 3
1080
+ in [3, DelimToken[value: "?"]] then 3
1081
+ in [4, DelimToken[value: "?"]] then 5
1082
+ in [4, DimensionToken] then 6
1083
+ in [4, NumberToken] then 7
1084
+ in [5, DelimToken[value: "?"]] then 5
1085
+ else
1086
+ if [3, 4, 5, 6, 7].include?(box)
1087
+ break # final states
1088
+ else
1089
+ return
1090
+ end
1091
+ end
1092
+
1093
+ tokens << state.value
1094
+ index = state.index
1095
+ end
1096
+
1097
+ # 2.
1098
+ text = "u" + tokens.map { |token| source[token.location.to_range] }.join
1099
+ return if text[1] != "+"
1100
+ index = 2
1101
+
1102
+ # 3.
1103
+ match = text[index..].match(/\A\h*\?*/)
1104
+ return unless match
1105
+
1106
+ value = match[0]
1107
+ return unless (1..6).cover?(value.length)
1108
+
1109
+ index += value.length
1110
+ start_value, end_value =
1111
+ if value.end_with?("?")
1112
+ return if index != text.length
1113
+ [value.gsub("?", "0").hex, value.gsub("?", "F").hex]
1114
+ else
1115
+ [value.hex, value.hex]
1116
+ end
1117
+
1118
+ # 4.
1119
+ if index == text.length
1120
+ return unless valid_urange?(start_value, end_value)
1121
+
1122
+ ending = start + text.length
1123
+ return State.new(URange.new(start_value: start_value, end_value: end_value, location: start...ending), ending)
1124
+ end
1125
+
1126
+ # 5.
1127
+ return if text[index] != "-"
1128
+ index += 1
1129
+
1130
+ # 6.
1131
+ match = text[index..].match(/\A\h*/)
1132
+ return if !match || match[0].length > 6
1133
+
1134
+ end_value = match[0].hex
1135
+ index += match[0].length
1136
+ return if index != text.length
1137
+
1138
+ # 7.
1139
+ return unless valid_urange?(start_value, end_value)
1140
+
1141
+ ending = start + text.length
1142
+ State.new(URange.new(start_value: start_value, end_value: end_value, location: start...ending), ending)
1143
+ end
1144
+
1145
+ # Checks that the start and end value of a urange are valid.
1146
+ def valid_urange?(start_value, end_value)
1147
+ if end_value > 0x10FFFF
1148
+ errors << ParseError.new("Invalid urange. #{end_value} greater than 0x10FFFF")
1149
+ false
1150
+ elsif start_value > end_value
1151
+ errors << ParseError.new("Invalid urange. #{start_value} greater than #{end_value}")
1152
+ false
1153
+ else
1154
+ true
1155
+ end
1156
+ end
1157
+
1158
+ #-------------------------------------------------------------------------
1159
+ # 9. CSS stylesheets
1160
+ # https://www.w3.org/TR/css-syntax-3/#css-stylesheets
1161
+ #-------------------------------------------------------------------------
1162
+
1163
+ # https://www.w3.org/TR/css-syntax-3/#parse-a-css-stylesheet
1164
+ def parse_css_stylesheet
1165
+ stylesheet = parse_stylesheet
1166
+ rules =
1167
+ stylesheet.rules.map do |rule|
1168
+ rule.is_a?(QualifiedRule) ? create_style_rule(rule) : rule
1169
+ end
1170
+
1171
+ CSSStyleSheet.new(rules: rules, location: stylesheet.location)
1172
+ end
1173
+
1174
+ # 9.1. Style rules
1175
+ # https://www.w3.org/TR/css-syntax-3/#style-rules
1176
+ def create_style_rule(rule)
1177
+ slct_tokens = [*rule.prelude, EOFToken[rule.location.end_char]]
1178
+ decl_tokens = [*rule.block.value, EOFToken[rule.location.end_char]]
1179
+
1180
+ StyleRule.new(
1181
+ selectors: Selectors.new(slct_tokens).parse,
1182
+ declarations: consume_style_block_contents(decl_tokens.to_enum),
1183
+ location: rule.location
1184
+ )
1185
+ end
1186
+ end
1187
+ end
1188
+ end