syntax_tree-css 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/main.yml +34 -0
- data/.gitignore +2 -0
- data/.gitmodules +3 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +36 -0
- data/LICENSE +21 -0
- data/README.md +73 -0
- data/Rakefile +16 -0
- data/lib/syntax_tree/css/basic_visitor.rb +23 -0
- data/lib/syntax_tree/css/format.rb +14 -0
- data/lib/syntax_tree/css/nodes.rb +969 -0
- data/lib/syntax_tree/css/parser.rb +1188 -0
- data/lib/syntax_tree/css/pretty_print.rb +441 -0
- data/lib/syntax_tree/css/selectors.rb +519 -0
- data/lib/syntax_tree/css/version.rb +7 -0
- data/lib/syntax_tree/css/visitor.rb +154 -0
- data/lib/syntax_tree/css.rb +31 -0
- data/syntax_tree-css.gemspec +33 -0
- metadata +147 -0
@@ -0,0 +1,1188 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SyntaxTree
|
4
|
+
module CSS
|
5
|
+
# Parses CSS3 stylesheets according to https://www.w3.org/TR/css-syntax-3
|
6
|
+
# from the version dated 24 December 2021.
|
7
|
+
class Parser
|
8
|
+
# Represents any kind of error that occurs during parsing.
|
9
|
+
class ParseError < StandardError
|
10
|
+
end
|
11
|
+
|
12
|
+
# This is used to communicate between the various tokenization algorithms.
|
13
|
+
# It transports a value along with the new index.
|
14
|
+
class State
|
15
|
+
attr_reader :value, :index
|
16
|
+
|
17
|
+
def initialize(value, index)
|
18
|
+
@value = value
|
19
|
+
@index = index
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# https://www.w3.org/TR/css-syntax-3/#digit
|
24
|
+
DIGIT = "[0-9]"
|
25
|
+
|
26
|
+
# https://www.w3.org/TR/css-syntax-3/#uppercase-letter
|
27
|
+
UPPERCASE_LETTER = "[A-Z]"
|
28
|
+
|
29
|
+
# https://www.w3.org/TR/css-syntax-3/#lowercase-letter
|
30
|
+
LOWERCASE_LETTER = "[a-z]"
|
31
|
+
|
32
|
+
# https://www.w3.org/TR/css-syntax-3/#letter
|
33
|
+
LETTER = "[#{UPPERCASE_LETTER}#{LOWERCASE_LETTER}]"
|
34
|
+
|
35
|
+
# https://www.w3.org/TR/css-syntax-3/#non-ascii-code-point
|
36
|
+
NONASCII = "[\u{80}-\u{10FFFF}]"
|
37
|
+
|
38
|
+
# https://www.w3.org/TR/css-syntax-3/#ident-start-code-point
|
39
|
+
IDENT_START = "[#{LETTER}#{NONASCII}_]"
|
40
|
+
|
41
|
+
# https://www.w3.org/TR/css-syntax-3/#ident-code-point
|
42
|
+
IDENT = "[#{IDENT_START}#{DIGIT}-]"
|
43
|
+
|
44
|
+
# https://www.w3.org/TR/css-syntax-3/#non-printable-code-point
|
45
|
+
NON_PRINTABLE = "[\x00-\x08\x0B\x0E-\x1F\x7F]"
|
46
|
+
|
47
|
+
# https://www.w3.org/TR/css-syntax-3/#whitespace
|
48
|
+
WHITESPACE = "[\n\t ]"
|
49
|
+
|
50
|
+
attr_reader :source, :errors
|
51
|
+
|
52
|
+
def initialize(source)
|
53
|
+
@source = preprocess(source)
|
54
|
+
@errors = []
|
55
|
+
end
|
56
|
+
|
57
|
+
def error?
|
58
|
+
errors.any?
|
59
|
+
end
|
60
|
+
|
61
|
+
#-------------------------------------------------------------------------
|
62
|
+
# 5.3. Parser Entry Points
|
63
|
+
# https://www.w3.org/TR/css-syntax-3/#parser-entry-points
|
64
|
+
#-------------------------------------------------------------------------
|
65
|
+
|
66
|
+
# 5.3.1. Parse something according to a CSS grammar
|
67
|
+
# https://www.w3.org/TR/css-syntax-3/#parse-grammar
|
68
|
+
def parse(grammar: :stylesheet)
|
69
|
+
case grammar
|
70
|
+
in :stylesheet
|
71
|
+
parse_css_stylesheet
|
72
|
+
else
|
73
|
+
raise ArgumentError, "Unsupported grammar: #{grammar}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# 5.3.3. Parse a stylesheet
|
78
|
+
# https://www.w3.org/TR/css-syntax-3/#parse-stylesheet
|
79
|
+
def parse_stylesheet
|
80
|
+
tokens = tokenize
|
81
|
+
rules = consume_rule_list(tokens, top_level: true)
|
82
|
+
|
83
|
+
location =
|
84
|
+
if rules.any?
|
85
|
+
rules.first.location.to(rules.last.location)
|
86
|
+
else
|
87
|
+
tokens.reverse_each.first.location
|
88
|
+
end
|
89
|
+
|
90
|
+
StyleSheet.new(rules: rules, location: location)
|
91
|
+
end
|
92
|
+
|
93
|
+
# 5.3.4. Parse a list of rules
|
94
|
+
# https://www.w3.org/TR/css-syntax-3/#parse-list-of-rules
|
95
|
+
def parse_rule_list
|
96
|
+
consume_rule_list(tokenize, top_level: false)
|
97
|
+
end
|
98
|
+
|
99
|
+
# 5.3.5. Parse a rule
|
100
|
+
# https://www.w3.org/TR/css-syntax-3/#parse-rule
|
101
|
+
def parse_rule
|
102
|
+
# 1.
|
103
|
+
tokens = tokenize
|
104
|
+
|
105
|
+
# 2.
|
106
|
+
loop do
|
107
|
+
case tokens.peek
|
108
|
+
in CommentToken | WhitespaceToken
|
109
|
+
tokens.next
|
110
|
+
else
|
111
|
+
break
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# 3.
|
116
|
+
rule = nil
|
117
|
+
|
118
|
+
case tokens.peek
|
119
|
+
in EOFToken
|
120
|
+
return ParseError.new("Unexpected end of input parsing rule")
|
121
|
+
in AtKeywordToken
|
122
|
+
rule = consume_at_rule(tokens)
|
123
|
+
else
|
124
|
+
rule = consume_qualified_rule(tokens)
|
125
|
+
return ParseError.new("Expected a rule at #{tokens.peek.location.start_char}") unless rule
|
126
|
+
end
|
127
|
+
|
128
|
+
# 4.
|
129
|
+
loop do
|
130
|
+
case tokens.peek
|
131
|
+
in CommentToken | WhitespaceToken
|
132
|
+
tokens.next
|
133
|
+
else
|
134
|
+
break
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# 5.
|
139
|
+
case tokens.peek
|
140
|
+
in EOFToken
|
141
|
+
rule
|
142
|
+
else
|
143
|
+
ParseError.new("Expected end of input parsing rule")
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# 5.3.6. Parse a declaration
|
148
|
+
# https://www.w3.org/TR/css-syntax-3/#parse-declaration
|
149
|
+
def parse_declaration
|
150
|
+
# 1.
|
151
|
+
tokens = tokenize
|
152
|
+
|
153
|
+
# 2.
|
154
|
+
loop do
|
155
|
+
case tokens.peek
|
156
|
+
in CommentToken | WhitespaceToken
|
157
|
+
tokens.next
|
158
|
+
else
|
159
|
+
break
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# 3.
|
164
|
+
case tokens.peek
|
165
|
+
in IdentToken
|
166
|
+
# do nothing
|
167
|
+
in EOFToken
|
168
|
+
return ParseError.new("Unexpected end of input parsing declaration")
|
169
|
+
else
|
170
|
+
return ParseError.new("Expected an identifier at #{tokens.peek.location.start_char}")
|
171
|
+
end
|
172
|
+
|
173
|
+
# 4.
|
174
|
+
if (declaration = consume_declaration(tokens))
|
175
|
+
declaration
|
176
|
+
else
|
177
|
+
ParseError.new("Expected a declaration at #{tokens.peek.location.start_char}")
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# 5.3.8. Parse a list of declarations
|
182
|
+
# https://www.w3.org/TR/css-syntax-3/#parse-list-of-declarations
|
183
|
+
def parse_declaration_list
|
184
|
+
consume_declaration_list(tokenize)
|
185
|
+
end
|
186
|
+
|
187
|
+
# 5.3.9. Parse a component value
|
188
|
+
# https://www.w3.org/TR/css-syntax-3/#parse-component-value
|
189
|
+
def parse_component_value
|
190
|
+
# 1.
|
191
|
+
tokens = tokenize
|
192
|
+
|
193
|
+
# 2.
|
194
|
+
loop do
|
195
|
+
case tokens.peek
|
196
|
+
in CommentToken | WhitespaceToken
|
197
|
+
tokens.next
|
198
|
+
else
|
199
|
+
break
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
# 3.
|
204
|
+
if tokens.peek.is_a?(EOFToken)
|
205
|
+
return ParseError.new("Unexpected end of input parsing component value")
|
206
|
+
end
|
207
|
+
|
208
|
+
# 4.
|
209
|
+
value = consume_component_value(tokens)
|
210
|
+
|
211
|
+
# 5.
|
212
|
+
loop do
|
213
|
+
case tokens.peek
|
214
|
+
in CommentToken | WhitespaceToken
|
215
|
+
tokens.next
|
216
|
+
else
|
217
|
+
break
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
# 6.
|
222
|
+
if tokens.peek.is_a?(EOFToken)
|
223
|
+
value
|
224
|
+
else
|
225
|
+
ParseError.new("Expected end of input parsing component value")
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
# 5.3.10. Parse a list of component values
|
230
|
+
# https://www.w3.org/TR/css-syntax-3/#parse-list-of-component-values
|
231
|
+
def parse_component_values
|
232
|
+
tokens = tokenize
|
233
|
+
values = []
|
234
|
+
|
235
|
+
values << consume_component_value(tokens) until tokens.peek.is_a?(EOFToken)
|
236
|
+
values
|
237
|
+
end
|
238
|
+
|
239
|
+
private
|
240
|
+
|
241
|
+
#-------------------------------------------------------------------------
|
242
|
+
# 3. Tokenizing and Parsing CSS
|
243
|
+
# https://www.w3.org/TR/css-syntax-3/#tokenizing-and-parsing
|
244
|
+
#-------------------------------------------------------------------------
|
245
|
+
|
246
|
+
# 3.3. Preprocessing the input stream
|
247
|
+
# https://www.w3.org/TR/css-syntax-3/#input-preprocessing
|
248
|
+
def preprocess(input)
|
249
|
+
input.gsub(/\r\n?|\f/, "\n").gsub(/\x00/, "\u{FFFD}")
|
250
|
+
|
251
|
+
# We should also be replacing surrogate characters in the input stream
|
252
|
+
# with the replacement character, but it's not entirely possible to do
|
253
|
+
# that if the string is already UTF-8 encoded. Until we dive further
|
254
|
+
# into encoding and handle fallback encodings, we'll just skip this.
|
255
|
+
# .gsub(/[\u{D800}-\u{DFFF}]/, "\u{FFFD}")
|
256
|
+
end
|
257
|
+
|
258
|
+
#-------------------------------------------------------------------------
|
259
|
+
# 4. Tokenization
|
260
|
+
# https://www.w3.org/TR/css-syntax-3/#tokenization
|
261
|
+
#-------------------------------------------------------------------------
|
262
|
+
|
263
|
+
# Create an enumerator of tokens from the source.
|
264
|
+
def tokenize
|
265
|
+
Enumerator.new do |enum|
|
266
|
+
index = 0
|
267
|
+
|
268
|
+
while index < source.length
|
269
|
+
state = consume_token(index)
|
270
|
+
|
271
|
+
enum << state.value
|
272
|
+
index = state.index
|
273
|
+
end
|
274
|
+
|
275
|
+
enum << EOFToken[index]
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
# 4.3.1. Consume a token
|
280
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-token
|
281
|
+
def consume_token(index)
|
282
|
+
case source[index..]
|
283
|
+
when %r{\A/\*}
|
284
|
+
consume_comment(index)
|
285
|
+
when /\A#{WHITESPACE}+/o
|
286
|
+
State.new(WhitespaceToken.new(value: $&, location: index...(index + $&.length)), index + $&.length)
|
287
|
+
when /\A["']/
|
288
|
+
consume_string(index, $&)
|
289
|
+
when /\A#/
|
290
|
+
if ident?(source[index + 1]) || valid_escape?(source[index + 1], source[index + 2])
|
291
|
+
state = consume_ident_sequence(index + 1)
|
292
|
+
|
293
|
+
State.new(
|
294
|
+
HashToken.new(
|
295
|
+
value: state.value,
|
296
|
+
type: start_ident_sequence?(index + 1) ? "id" : "unrestricted",
|
297
|
+
location: index...state.index
|
298
|
+
),
|
299
|
+
state.index
|
300
|
+
)
|
301
|
+
else
|
302
|
+
State.new(DelimToken.new(value: "#", location: index...(index + 1)), index + 1)
|
303
|
+
end
|
304
|
+
when /\A\(/
|
305
|
+
State.new(OpenParenToken.new(location: index...(index + 1)), index + 1)
|
306
|
+
when /\A\)/
|
307
|
+
State.new(CloseParenToken.new(location: index...(index + 1)), index + 1)
|
308
|
+
when /\A\+/
|
309
|
+
if start_number?(index + 1)
|
310
|
+
consume_numeric(index)
|
311
|
+
else
|
312
|
+
State.new(DelimToken.new(value: "+", location: index...(index + 1)), index + 1)
|
313
|
+
end
|
314
|
+
when /\A,/
|
315
|
+
State.new(CommaToken.new(location: index...(index + 1)), index + 1)
|
316
|
+
when /\A-/
|
317
|
+
if start_number?(index)
|
318
|
+
consume_numeric(index)
|
319
|
+
elsif source[index + 1] == "-" && source[index + 2] == ">"
|
320
|
+
State.new(CDCToken.new(location: index...(index + 3)), index + 3)
|
321
|
+
elsif start_ident_sequence?(index)
|
322
|
+
consume_ident_like(index)
|
323
|
+
else
|
324
|
+
State.new(DelimToken.new(value: "-", location: index...(index + 1)), index + 1)
|
325
|
+
end
|
326
|
+
when /\A\./
|
327
|
+
if start_number?(index)
|
328
|
+
consume_numeric(index)
|
329
|
+
else
|
330
|
+
State.new(DelimToken.new(value: ".", location: index...(index + 1)), index + 1)
|
331
|
+
end
|
332
|
+
when /\A:/
|
333
|
+
State.new(ColonToken.new(location: index...(index + 1)), index + 1)
|
334
|
+
when /\A;/
|
335
|
+
State.new(SemicolonToken.new(location: index...(index + 1)), index + 1)
|
336
|
+
when /\A</
|
337
|
+
if source[index...(index + 4)] == "<!--"
|
338
|
+
State.new(CDOToken.new(location: index...(index + 4)), index + 4)
|
339
|
+
else
|
340
|
+
State.new(DelimToken.new(value: "<", location: index...(index + 1)), index + 1)
|
341
|
+
end
|
342
|
+
when /\A@/
|
343
|
+
if start_ident_sequence?(index + 1)
|
344
|
+
state = consume_ident_sequence(index + 1)
|
345
|
+
State.new(AtKeywordToken.new(value: state.value, location: index...state.index), state.index)
|
346
|
+
else
|
347
|
+
State.new(DelimToken.new(value: "@", location: index...(index + 1)), index + 1)
|
348
|
+
end
|
349
|
+
when /\A\[/
|
350
|
+
State.new(OpenSquareToken.new(location: index...(index + 1)), index + 1)
|
351
|
+
when %r{\A\\}
|
352
|
+
if valid_escape?(source[index], source[index + 1])
|
353
|
+
consume_ident_like(index)
|
354
|
+
else
|
355
|
+
errors << ParseError.new("invalid escape at #{index}")
|
356
|
+
State.new(DelimToken.new(value: "\\", location: index...(index + 1)), index + 1)
|
357
|
+
end
|
358
|
+
when /\A\]/
|
359
|
+
State.new(CloseSquareToken.new(location: index...(index + 1)), index + 1)
|
360
|
+
when /\A\{/
|
361
|
+
State.new(OpenCurlyToken.new(location: index...(index + 1)), index + 1)
|
362
|
+
when /\A\}/
|
363
|
+
State.new(CloseCurlyToken.new(location: index...(index + 1)), index + 1)
|
364
|
+
when /\A#{DIGIT}/o
|
365
|
+
consume_numeric(index)
|
366
|
+
when /\A#{IDENT_START}/o
|
367
|
+
consume_ident_like(index)
|
368
|
+
when "", nil
|
369
|
+
State.new(EOFToken[index], index)
|
370
|
+
else
|
371
|
+
State.new(DelimToken.new(value: source[index], location: index...(index + 1)), index + 1)
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
# 4.3.2. Consume comments
|
376
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-comments
|
377
|
+
def consume_comment(index)
|
378
|
+
ending = source.index("*/", index + 2)
|
379
|
+
|
380
|
+
if ending.nil?
|
381
|
+
errors << ParseError.new("unterminated comment starting at #{index}")
|
382
|
+
location = index...source.length
|
383
|
+
State.new(CommentToken.new(value: source[location], location: location), source.length)
|
384
|
+
else
|
385
|
+
location = index...(ending + 2)
|
386
|
+
State.new(CommentToken.new(value: source[location], location: location), ending + 2)
|
387
|
+
end
|
388
|
+
end
|
389
|
+
|
390
|
+
# 4.3.3. Consume a numeric token
|
391
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-numeric-token
|
392
|
+
def consume_numeric(index)
|
393
|
+
start = index
|
394
|
+
state = consume_number(index)
|
395
|
+
|
396
|
+
value, type = state.value
|
397
|
+
index = state.index
|
398
|
+
|
399
|
+
if start_ident_sequence?(index)
|
400
|
+
state = consume_ident_sequence(index)
|
401
|
+
State.new(DimensionToken.new(value: value, unit: state.value, type: type, location: start...index), state.index)
|
402
|
+
elsif source[index] == "%"
|
403
|
+
index += 1
|
404
|
+
State.new(PercentageToken.new(value: value, type: type, location: start...index), index)
|
405
|
+
else
|
406
|
+
State.new(NumberToken.new(value: value, type: type, location: start...index), index)
|
407
|
+
end
|
408
|
+
end
|
409
|
+
|
410
|
+
# 4.3.4. Consume an ident-like token
|
411
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-ident-like-token
|
412
|
+
def consume_ident_like(index)
|
413
|
+
start = index
|
414
|
+
state = consume_ident_sequence(index)
|
415
|
+
|
416
|
+
index = state.index
|
417
|
+
string = state.value
|
418
|
+
|
419
|
+
if (string.casecmp("url") == 0) && (source[index] == "(")
|
420
|
+
index += 1 # (
|
421
|
+
|
422
|
+
# While the next two input code points are whitespace, consume the
|
423
|
+
# next input code point.
|
424
|
+
while whitespace?(source[index]) && whitespace?(source[index + 1])
|
425
|
+
index += 1
|
426
|
+
end
|
427
|
+
|
428
|
+
if /["']/.match?(source[index]) || (whitespace?(source[index]) && /["']/.match?(source[index + 1]))
|
429
|
+
State.new(FunctionToken.new(value: string, location: start...index), index)
|
430
|
+
else
|
431
|
+
consume_url(start)
|
432
|
+
end
|
433
|
+
elsif source[index] == "("
|
434
|
+
index += 1
|
435
|
+
State.new(FunctionToken.new(value: string, location: start...index), index)
|
436
|
+
elsif (string.casecmp("u") == 0) && (state = consume_urange(index - 1))
|
437
|
+
state
|
438
|
+
else
|
439
|
+
State.new(IdentToken.new(value: string, location: start...index), index)
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
443
|
+
# 4.3.5. Consume a string token
|
444
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-string-token
|
445
|
+
def consume_string(index, quote)
|
446
|
+
start = index
|
447
|
+
index += 1
|
448
|
+
value = +""
|
449
|
+
|
450
|
+
while index <= source.length
|
451
|
+
case source[index]
|
452
|
+
when quote
|
453
|
+
return State.new(StringToken.new(value: value, location: start...(index + 1)), index + 1)
|
454
|
+
when nil
|
455
|
+
errors << ParseError.new("unterminated string at #{start}")
|
456
|
+
return State.new(StringToken.new(value: value, location: start...index), index)
|
457
|
+
when "\n"
|
458
|
+
errors << ParseError.new("newline in string at #{index}")
|
459
|
+
return State.new(BadStringToken.new(value: value, location: start...index), index)
|
460
|
+
when "\\"
|
461
|
+
index += 1
|
462
|
+
|
463
|
+
if index == source.length
|
464
|
+
next
|
465
|
+
elsif source[index] == "\n"
|
466
|
+
value << source[index]
|
467
|
+
index += 1
|
468
|
+
else
|
469
|
+
state = consume_escaped_code_point(index)
|
470
|
+
value << state.value
|
471
|
+
index = state.index
|
472
|
+
end
|
473
|
+
else
|
474
|
+
value << source[index]
|
475
|
+
index += 1
|
476
|
+
end
|
477
|
+
end
|
478
|
+
end
|
479
|
+
|
480
|
+
# 4.3.6. Consume a url token
|
481
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-url-token
|
482
|
+
def consume_url(index)
|
483
|
+
# 1.
|
484
|
+
value = +""
|
485
|
+
|
486
|
+
# 2.
|
487
|
+
start = index
|
488
|
+
index += 4 # url(
|
489
|
+
index += 1 while whitespace?(source[index])
|
490
|
+
|
491
|
+
# 3.
|
492
|
+
while index <= source.length
|
493
|
+
case source[index..]
|
494
|
+
when /\A\)/
|
495
|
+
return State.new(URLToken.new(value: value, location: start...(index + 1)), index + 1)
|
496
|
+
when "", nil
|
497
|
+
errors << ParseError.new("unterminated url at #{start}")
|
498
|
+
return State.new(URLToken.new(value: value, location: start...index), index)
|
499
|
+
when /\A#{WHITESPACE}+/o
|
500
|
+
index += $&.length
|
501
|
+
|
502
|
+
case source[index]
|
503
|
+
when ")"
|
504
|
+
return State.new(URLToken.new(value: value, location: start...(index + 1)), index + 1)
|
505
|
+
when nil
|
506
|
+
errors << ParseError.new("unterminated url at #{start}")
|
507
|
+
return State.new(URLToken.new(value: value, location: start...index), index)
|
508
|
+
else
|
509
|
+
errors << ParseError.new("invalid url at #{start}")
|
510
|
+
state = consume_bad_url_remnants(index)
|
511
|
+
return State.new(BadURLToken.new(value: value + state.value, location: start...state.index), state.index)
|
512
|
+
end
|
513
|
+
when /\A["'(]|#{NON_PRINTABLE}/o
|
514
|
+
errors << ParseError.new("invalid character in url at #{index}")
|
515
|
+
state = consume_bad_url_remnants(index)
|
516
|
+
return State.new(BadURLToken.new(value: value + state.value, location: start...state.index), state.index)
|
517
|
+
when %r{\A\\}
|
518
|
+
if valid_escape?(source[index], source[index + 1])
|
519
|
+
state = consume_escaped_code_point(index + 1)
|
520
|
+
value << state.value
|
521
|
+
index = state.index
|
522
|
+
else
|
523
|
+
errors << ParseError.new("invalid escape at #{index}")
|
524
|
+
state = consume_bad_url_remnants(index)
|
525
|
+
return State.new(BadURLToken.new(value: value + state.value, location: start...state.index), state.index)
|
526
|
+
end
|
527
|
+
else
|
528
|
+
value << source[index]
|
529
|
+
index += 1
|
530
|
+
end
|
531
|
+
end
|
532
|
+
end
|
533
|
+
|
534
|
+
# 4.3.7. Consume an escaped code point
|
535
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-escaped-code-point
|
536
|
+
def consume_escaped_code_point(index)
|
537
|
+
replacement = "\u{FFFD}"
|
538
|
+
|
539
|
+
if /\A(\h{1,6})#{WHITESPACE}?/o =~ source[index..]
|
540
|
+
ord = $1.to_i(16)
|
541
|
+
|
542
|
+
if ord == 0 || (0xD800..0xDFFF).cover?(ord) || ord > 0x10FFFF
|
543
|
+
State.new(replacement, index + $&.length)
|
544
|
+
else
|
545
|
+
State.new(ord.chr(Encoding::UTF_8), index + $&.length)
|
546
|
+
end
|
547
|
+
elsif index == source.length
|
548
|
+
State.new(replacement, index)
|
549
|
+
else
|
550
|
+
State.new(source[index], index + 1)
|
551
|
+
end
|
552
|
+
end
|
553
|
+
|
554
|
+
# 4.3.8. Check if two code points are a valid escape
|
555
|
+
# https://www.w3.org/TR/css-syntax-3/#starts-with-a-valid-escape
|
556
|
+
def valid_escape?(left, right)
|
557
|
+
(left == "\\") && (right != "\n")
|
558
|
+
end
|
559
|
+
|
560
|
+
# 4.3.9. Check if three code points would start an ident sequence
|
561
|
+
# https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier
|
562
|
+
def start_ident_sequence?(index)
|
563
|
+
first, second, third = source[index...(index + 3)].chars
|
564
|
+
|
565
|
+
case first
|
566
|
+
when "-"
|
567
|
+
(/#{IDENT_START}/o.match?(second) || (second == "-")) ||
|
568
|
+
valid_escape?(second, third)
|
569
|
+
when /#{IDENT_START}/o
|
570
|
+
true
|
571
|
+
when "\\"
|
572
|
+
valid_escape?(first, second)
|
573
|
+
else
|
574
|
+
false
|
575
|
+
end
|
576
|
+
end
|
577
|
+
|
578
|
+
# 4.3.10. Check if three code points would start a number
|
579
|
+
# https://www.w3.org/TR/css-syntax-3/#starts-with-a-number
|
580
|
+
def start_number?(index)
|
581
|
+
first, second, third = source[index...(index + 3)].chars
|
582
|
+
|
583
|
+
case first
|
584
|
+
when "+", "-"
|
585
|
+
digit?(second) || (second == "." && digit?(third))
|
586
|
+
when "."
|
587
|
+
digit?(second)
|
588
|
+
when /#{DIGIT}/o
|
589
|
+
true
|
590
|
+
else
|
591
|
+
false
|
592
|
+
end
|
593
|
+
end
|
594
|
+
|
595
|
+
# 4.3.11. Consume an ident sequence
|
596
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-an-ident-sequence
|
597
|
+
def consume_ident_sequence(index)
|
598
|
+
result = +""
|
599
|
+
|
600
|
+
while index <= source.length
|
601
|
+
if ident?(source[index])
|
602
|
+
result << source[index]
|
603
|
+
index += 1
|
604
|
+
elsif valid_escape?(source[index], source[index + 1])
|
605
|
+
state = consume_escaped_code_point(index + 1)
|
606
|
+
result << state.value
|
607
|
+
index = state.index
|
608
|
+
else
|
609
|
+
return State.new(result, index)
|
610
|
+
end
|
611
|
+
end
|
612
|
+
end
|
613
|
+
|
614
|
+
# 4.3.12. Consume a number
|
615
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-a-number
|
616
|
+
def consume_number(index)
|
617
|
+
# 1.
|
618
|
+
repr = +""
|
619
|
+
type = "integer"
|
620
|
+
|
621
|
+
# 2.
|
622
|
+
if /[+-]/.match?(source[index])
|
623
|
+
repr << source[index]
|
624
|
+
index += 1
|
625
|
+
end
|
626
|
+
|
627
|
+
# 3.
|
628
|
+
while digit?(source[index])
|
629
|
+
repr << source[index]
|
630
|
+
index += 1
|
631
|
+
end
|
632
|
+
|
633
|
+
# 4.
|
634
|
+
if source[index] == "." && digit?(source[index + 1])
|
635
|
+
repr += source[index..(index + 1)]
|
636
|
+
index += 2
|
637
|
+
type = "number"
|
638
|
+
|
639
|
+
while digit?(source[index])
|
640
|
+
repr << source[index]
|
641
|
+
index += 1
|
642
|
+
end
|
643
|
+
end
|
644
|
+
|
645
|
+
# 5.
|
646
|
+
if /\A[Ee][+-]?#{DIGIT}+/o =~ source[index..]
|
647
|
+
repr += $&
|
648
|
+
index += $&.length
|
649
|
+
type = "number"
|
650
|
+
end
|
651
|
+
|
652
|
+
# 6., 7.
|
653
|
+
State.new([convert_to_number(repr), type], index)
|
654
|
+
end
|
655
|
+
|
656
|
+
# 4.3.13. Convert a string to a number
|
657
|
+
# https://www.w3.org/TR/css-syntax-3/#convert-a-string-to-a-number
|
658
|
+
def convert_to_number(value)
|
659
|
+
pattern = %r{
|
660
|
+
\A
|
661
|
+
(?<sign>[+-]?)
|
662
|
+
(?<integer>#{DIGIT}*)
|
663
|
+
(?<decimal>\.?)
|
664
|
+
(?<fractional>#{DIGIT}*)
|
665
|
+
(?<exponent_indicator>[Ee]?)
|
666
|
+
(?<exponent_sign>[+-]?)
|
667
|
+
(?<exponent>#{DIGIT}*)
|
668
|
+
\z
|
669
|
+
}ox
|
670
|
+
|
671
|
+
if (match = pattern.match(value))
|
672
|
+
s = match[:sign] == "-" ? -1 : 1
|
673
|
+
i = match[:integer].to_i
|
674
|
+
f = 0
|
675
|
+
d = 0
|
676
|
+
|
677
|
+
unless match[:fractional].empty?
|
678
|
+
f = match[:fractional].to_i
|
679
|
+
d = match[:fractional].length
|
680
|
+
end
|
681
|
+
|
682
|
+
t = match[:exponent_sign] == "-" ? -1 : 1
|
683
|
+
e = match[:exponent].to_i
|
684
|
+
|
685
|
+
s * (i + f * 10**(-d)) * 10**(t * e)
|
686
|
+
else
|
687
|
+
raise ParseError, "convert_to_number called with invalid value: #{value}"
|
688
|
+
end
|
689
|
+
end
|
690
|
+
|
691
|
+
# 4.3.14. Consume the remnants of a bad url
|
692
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-remnants-of-bad-url
|
693
|
+
def consume_bad_url_remnants(index)
|
694
|
+
value = +""
|
695
|
+
|
696
|
+
while index <= source.length
|
697
|
+
case source[index..]
|
698
|
+
when "", nil
|
699
|
+
return State.new(value, index)
|
700
|
+
when /\A\)/
|
701
|
+
value << ")"
|
702
|
+
return State.new(value, index + 1)
|
703
|
+
else
|
704
|
+
if valid_escape?(source[index], source[index + 1])
|
705
|
+
state = consume_escaped_code_point(index)
|
706
|
+
value << state.value
|
707
|
+
index = state.index
|
708
|
+
else
|
709
|
+
value << source[index]
|
710
|
+
index += 1
|
711
|
+
end
|
712
|
+
end
|
713
|
+
end
|
714
|
+
end
|
715
|
+
|
716
|
+
# https://www.w3.org/TR/css-syntax-3/#digit
|
717
|
+
def digit?(value)
|
718
|
+
/#{DIGIT}/o.match?(value)
|
719
|
+
end
|
720
|
+
|
721
|
+
# https://www.w3.org/TR/css-syntax-3/#ident-code-point
|
722
|
+
def ident?(value)
|
723
|
+
/#{IDENT}/o.match?(value)
|
724
|
+
end
|
725
|
+
|
726
|
+
# https://www.w3.org/TR/css-syntax-3/#whitespace
|
727
|
+
def whitespace?(value)
|
728
|
+
/#{WHITESPACE}/o.match?(value)
|
729
|
+
end
|
730
|
+
|
731
|
+
#-------------------------------------------------------------------------
|
732
|
+
# 5. Parsing
|
733
|
+
# https://www.w3.org/TR/css-syntax-3/#parsing
|
734
|
+
#-------------------------------------------------------------------------
|
735
|
+
|
736
|
+
# 5.4.1. Consume a list of rules
|
737
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-list-of-rules
|
738
|
+
def consume_rule_list(tokens, top_level: true)
|
739
|
+
rules = []
|
740
|
+
|
741
|
+
loop do
|
742
|
+
case tokens.peek
|
743
|
+
in CommentToken | WhitespaceToken
|
744
|
+
tokens.next
|
745
|
+
in EOFToken
|
746
|
+
return rules
|
747
|
+
in CDCToken | CDOToken
|
748
|
+
if top_level
|
749
|
+
tokens.next
|
750
|
+
else
|
751
|
+
rule = consume_qualified_rule(tokens)
|
752
|
+
rules << rule if rule
|
753
|
+
end
|
754
|
+
in AtKeywordToken
|
755
|
+
rules << consume_at_rule(tokens)
|
756
|
+
else
|
757
|
+
rule = consume_qualified_rule(tokens)
|
758
|
+
rules << rule if rule
|
759
|
+
end
|
760
|
+
end
|
761
|
+
end
|
762
|
+
|
763
|
+
# 5.4.2. Consume an at-rule
|
764
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-at-rule
|
765
|
+
def consume_at_rule(tokens)
|
766
|
+
name_token = tokens.next
|
767
|
+
prelude = []
|
768
|
+
block = nil
|
769
|
+
|
770
|
+
loop do
|
771
|
+
case tokens.peek
|
772
|
+
in SemicolonToken[location:]
|
773
|
+
tokens.next
|
774
|
+
return AtRule.new(name: name_token.value, prelude: prelude, block: block, location: name_token.location.to(location))
|
775
|
+
in EOFToken[location:]
|
776
|
+
errors << ParseError.new("Unexpected EOF while parsing at-rule")
|
777
|
+
return AtRule.new(name: name_token.value, prelude: prelude, block: block, location: name_token.location.to(location))
|
778
|
+
in OpenCurlyToken
|
779
|
+
block = consume_simple_block(tokens)
|
780
|
+
return AtRule.new(name: name_token.value, prelude: prelude, block: block, location: name_token.location.to(block.location))
|
781
|
+
else
|
782
|
+
prelude << consume_component_value(tokens)
|
783
|
+
end
|
784
|
+
end
|
785
|
+
end
|
786
|
+
|
787
|
+
# 5.4.3. Consume a qualified rule
|
788
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-qualified-rule
|
789
|
+
def consume_qualified_rule(tokens)
|
790
|
+
prelude = []
|
791
|
+
block = nil
|
792
|
+
|
793
|
+
loop do
|
794
|
+
case tokens.peek
|
795
|
+
in EOFToken
|
796
|
+
errors << ParseError.new("Unexpected EOF while parsing qualified rule")
|
797
|
+
return nil
|
798
|
+
in OpenCurlyToken
|
799
|
+
block = consume_simple_block(tokens)
|
800
|
+
location = prelude.any? ? prelude.first.location.to(block.location) : block.location
|
801
|
+
return QualifiedRule.new(prelude: prelude, block: block, location: location)
|
802
|
+
else
|
803
|
+
prelude << consume_component_value(tokens)
|
804
|
+
end
|
805
|
+
end
|
806
|
+
end
|
807
|
+
|
808
|
+
# 5.4.4. Consume a style block’s contents
|
809
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-style-block
|
810
|
+
def consume_style_block_contents(tokens)
|
811
|
+
declarations = []
|
812
|
+
rules = []
|
813
|
+
|
814
|
+
loop do
|
815
|
+
case tokens.peek
|
816
|
+
in SemicolonToken | WhitespaceToken
|
817
|
+
tokens.next
|
818
|
+
in EOFToken
|
819
|
+
tokens.next
|
820
|
+
return declarations + rules
|
821
|
+
in AtKeywordToken
|
822
|
+
rules << consume_at_rule(tokens)
|
823
|
+
in IdentToken
|
824
|
+
list = [tokens.next]
|
825
|
+
|
826
|
+
loop do
|
827
|
+
case tokens.peek
|
828
|
+
in EOFToken
|
829
|
+
list << tokens.next
|
830
|
+
break
|
831
|
+
in SemicolonToken
|
832
|
+
list << tokens.next
|
833
|
+
list << EOFToken[list.last.location.end_char]
|
834
|
+
break
|
835
|
+
else
|
836
|
+
list << consume_component_value(tokens)
|
837
|
+
end
|
838
|
+
end
|
839
|
+
|
840
|
+
declaration = consume_declaration(list.to_enum)
|
841
|
+
declarations << declaration if declaration
|
842
|
+
in DelimToken[value: "&"]
|
843
|
+
rule = consume_qualified_rule(tokens)
|
844
|
+
rules << rule if rule
|
845
|
+
in { location: }
|
846
|
+
errors << ParseError.new("Unexpected token while parsing style block at #{location.start_char}")
|
847
|
+
|
848
|
+
until %i[semicolon EOF].include?(tokens.peek.type)
|
849
|
+
consume_component_value(tokens)
|
850
|
+
end
|
851
|
+
end
|
852
|
+
end
|
853
|
+
end
|
854
|
+
|
855
|
+
# 5.4.5. Consume a list of declarations
|
856
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-list-of-declarations
|
857
|
+
def consume_declaration_list(tokens)
|
858
|
+
declarations = []
|
859
|
+
|
860
|
+
loop do
|
861
|
+
case tokens.peek
|
862
|
+
in SemicolonToken | WhitespaceToken
|
863
|
+
tokens.next
|
864
|
+
in EOFToken
|
865
|
+
tokens.next
|
866
|
+
return declarations
|
867
|
+
in AtKeywordToken
|
868
|
+
declarations << consume_at_rule(tokens)
|
869
|
+
in IdentToken
|
870
|
+
list = [tokens.next]
|
871
|
+
|
872
|
+
loop do
|
873
|
+
case tokens.peek
|
874
|
+
in EOFToken | SemicolonToken
|
875
|
+
break
|
876
|
+
else
|
877
|
+
list << consume_component_value(tokens)
|
878
|
+
end
|
879
|
+
end
|
880
|
+
|
881
|
+
if tokens.peek.is_a?(EOFToken)
|
882
|
+
list << tokens.next
|
883
|
+
|
884
|
+
declaration = consume_declaration(list.to_enum)
|
885
|
+
declarations << declaration if declaration
|
886
|
+
|
887
|
+
return declarations
|
888
|
+
else
|
889
|
+
tokens.next
|
890
|
+
list << EOFToken[list.last.location.end_char]
|
891
|
+
|
892
|
+
declaration = consume_declaration(list.to_enum)
|
893
|
+
declarations << declaration if declaration
|
894
|
+
end
|
895
|
+
else
|
896
|
+
errors << ParseError.new("Unexpected token while parsing declaration list at #{tokens.peek.location.start_char}")
|
897
|
+
|
898
|
+
loop do
|
899
|
+
case tokens.peek
|
900
|
+
in EOFToken | SemicolonToken
|
901
|
+
break
|
902
|
+
else
|
903
|
+
consume_component_value(tokens)
|
904
|
+
end
|
905
|
+
end
|
906
|
+
end
|
907
|
+
end
|
908
|
+
end
|
909
|
+
|
910
|
+
# 5.4.6. Consume a declaration
|
911
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-declaration
|
912
|
+
def consume_declaration(tokens)
|
913
|
+
name = tokens.next
|
914
|
+
value = []
|
915
|
+
important = false
|
916
|
+
|
917
|
+
# 1.
|
918
|
+
loop do
|
919
|
+
case tokens.peek
|
920
|
+
in CommentToken | WhitespaceToken
|
921
|
+
tokens.next
|
922
|
+
else
|
923
|
+
break
|
924
|
+
end
|
925
|
+
end
|
926
|
+
|
927
|
+
# 2.
|
928
|
+
case tokens.peek
|
929
|
+
in ColonToken
|
930
|
+
tokens.next
|
931
|
+
else
|
932
|
+
errors << ParseError.new("Expected colon at #{tokens.peek.location.start_char}")
|
933
|
+
return
|
934
|
+
end
|
935
|
+
|
936
|
+
# 3.
|
937
|
+
loop do
|
938
|
+
case tokens.peek
|
939
|
+
in CommentToken | WhitespaceToken
|
940
|
+
tokens.next
|
941
|
+
else
|
942
|
+
break
|
943
|
+
end
|
944
|
+
end
|
945
|
+
|
946
|
+
# 4.
|
947
|
+
value << consume_component_value(tokens) until tokens.peek.is_a?(EOFToken)
|
948
|
+
|
949
|
+
# 5.
|
950
|
+
case value.reject { |token| token.is_a?(WhitespaceToken) || token.is_a?(CommentToken) }[-2..]
|
951
|
+
in [DelimToken[value: "!"] => first, IdentToken[value: /\Aimportant\z/i] => second]
|
952
|
+
value.delete(first)
|
953
|
+
value.delete(second)
|
954
|
+
important = true
|
955
|
+
else
|
956
|
+
end
|
957
|
+
|
958
|
+
# 6.
|
959
|
+
loop do
|
960
|
+
case value[-1]
|
961
|
+
in CommentToken | WhitespaceToken
|
962
|
+
value.pop
|
963
|
+
else
|
964
|
+
break
|
965
|
+
end
|
966
|
+
end
|
967
|
+
|
968
|
+
# 7.
|
969
|
+
location = name.location
|
970
|
+
location = location.to(value.last.location) if value.any?
|
971
|
+
Declaration.new(name: name.value, value: value, important: important, location: location)
|
972
|
+
end
|
973
|
+
|
974
|
+
# 5.4.7. Consume a component value
|
975
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-component-value
|
976
|
+
def consume_component_value(tokens)
|
977
|
+
case tokens.peek
|
978
|
+
in OpenCurlyToken | OpenSquareToken | OpenParenToken
|
979
|
+
consume_simple_block(tokens)
|
980
|
+
in FunctionToken
|
981
|
+
consume_function(tokens)
|
982
|
+
else
|
983
|
+
tokens.next
|
984
|
+
end
|
985
|
+
end
|
986
|
+
|
987
|
+
# 5.4.8. Consume a simple block
|
988
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-simple-block
|
989
|
+
def consume_simple_block(tokens)
|
990
|
+
token = tokens.next
|
991
|
+
ending = {
|
992
|
+
OpenParenToken => CloseParenToken,
|
993
|
+
OpenSquareToken => CloseSquareToken,
|
994
|
+
OpenCurlyToken => CloseCurlyToken
|
995
|
+
}[token.class]
|
996
|
+
|
997
|
+
value = []
|
998
|
+
|
999
|
+
loop do
|
1000
|
+
case tokens.peek
|
1001
|
+
when ending
|
1002
|
+
location = token.location.to(tokens.next.location)
|
1003
|
+
return SimpleBlock.new(token: token.value, value: value, location: location)
|
1004
|
+
when EOFToken
|
1005
|
+
errors << ParseError.new("Unexpected EOF while parsing simple block at #{token.location.start_char}")
|
1006
|
+
return SimpleBlock.new(token: token.value, value: value, location: token.location.to(tokens.peek.location))
|
1007
|
+
else
|
1008
|
+
value << consume_component_value(tokens)
|
1009
|
+
end
|
1010
|
+
end
|
1011
|
+
end
|
1012
|
+
|
1013
|
+
# 5.4.9. Consume a function
|
1014
|
+
# https://www.w3.org/TR/css-syntax-3/#consume-function
|
1015
|
+
def consume_function(tokens)
|
1016
|
+
name_token = tokens.next
|
1017
|
+
value = []
|
1018
|
+
|
1019
|
+
loop do
|
1020
|
+
case tokens.peek
|
1021
|
+
in CloseParenToken[location:]
|
1022
|
+
tokens.next
|
1023
|
+
return Function.new(name: name_token.value, value: value, location: name_token.location.to(location))
|
1024
|
+
in EOFToken[location:]
|
1025
|
+
errors << ParseError.new("Unexpected EOF while parsing function at #{name_token.location.start_char}")
|
1026
|
+
return Function.new(name: name_token.value, value: value, location: name_token.location.to(location))
|
1027
|
+
else
|
1028
|
+
value << consume_component_value(tokens)
|
1029
|
+
end
|
1030
|
+
end
|
1031
|
+
end
|
1032
|
+
|
1033
|
+
#-------------------------------------------------------------------------
|
1034
|
+
# 7. The Unicode-Range microsyntax
|
1035
|
+
# https://www.w3.org/TR/css-syntax-3/#urange
|
1036
|
+
#-------------------------------------------------------------------------
|
1037
|
+
|
1038
|
+
# 7.1. The <urange> type
|
1039
|
+
# https://www.w3.org/TR/css-syntax-3/#urange-syntax
|
1040
|
+
def consume_urange(index)
|
1041
|
+
start = index
|
1042
|
+
index += 1 # to move past the "u"
|
1043
|
+
|
1044
|
+
# At this point we've already consumed the "u". We need to gather up a
|
1045
|
+
# couple of component values to see if it matches the grammar first,
|
1046
|
+
# before we concatenate all of the representations together.
|
1047
|
+
#
|
1048
|
+
# To do this, we're going to build a little state machine. It's going to
|
1049
|
+
# walk through with each input. If we receive an input for which there
|
1050
|
+
# isn't a transition from the current state and the current state is not
|
1051
|
+
# a final state, then we exit. Otherwise if it is a final state, we
|
1052
|
+
# attempt to parse a urange token from the concatenation of the values
|
1053
|
+
# of the tokens.
|
1054
|
+
#
|
1055
|
+
# ┌───┐ ┌───┐ ── ? ──────> ┌───┐ ──┐
|
1056
|
+
# ──> │ 1 │ ── + ──> │ 2 │ ── ident ──> │|3|│ ?
|
1057
|
+
# └───┘ └───┘ ┌───> └───┘ <─┘
|
1058
|
+
# ││ │
|
1059
|
+
# │└─── dimension ───────────┘
|
1060
|
+
# └──── number ─────> ┌───┐ ┌───┐ ──┐
|
1061
|
+
# ┌─── dimension ─── │|4|│ ── ? ──> │|5|│ ?
|
1062
|
+
# │ ┌── number ─ └───┘ └───┘ <─┘
|
1063
|
+
# V V
|
1064
|
+
# ┌───┐ ┌───┐
|
1065
|
+
# │|6|│ │|7|│
|
1066
|
+
# └───┘ └───┘
|
1067
|
+
#
|
1068
|
+
tokens = []
|
1069
|
+
box = 1
|
1070
|
+
|
1071
|
+
loop do
|
1072
|
+
state = consume_token(index)
|
1073
|
+
box =
|
1074
|
+
case [box, state.value]
|
1075
|
+
in [1, DelimToken[value: "+"]] then 2
|
1076
|
+
in [1, DimensionToken] then 3
|
1077
|
+
in [1, NumberToken] then 4
|
1078
|
+
in [2, DelimToken[value: "?"]] then 3
|
1079
|
+
in [2, IdentToken] then 3
|
1080
|
+
in [3, DelimToken[value: "?"]] then 3
|
1081
|
+
in [4, DelimToken[value: "?"]] then 5
|
1082
|
+
in [4, DimensionToken] then 6
|
1083
|
+
in [4, NumberToken] then 7
|
1084
|
+
in [5, DelimToken[value: "?"]] then 5
|
1085
|
+
else
|
1086
|
+
if [3, 4, 5, 6, 7].include?(box)
|
1087
|
+
break # final states
|
1088
|
+
else
|
1089
|
+
return
|
1090
|
+
end
|
1091
|
+
end
|
1092
|
+
|
1093
|
+
tokens << state.value
|
1094
|
+
index = state.index
|
1095
|
+
end
|
1096
|
+
|
1097
|
+
# 2.
|
1098
|
+
text = "u" + tokens.map { |token| source[token.location.to_range] }.join
|
1099
|
+
return if text[1] != "+"
|
1100
|
+
index = 2
|
1101
|
+
|
1102
|
+
# 3.
|
1103
|
+
match = text[index..].match(/\A\h*\?*/)
|
1104
|
+
return unless match
|
1105
|
+
|
1106
|
+
value = match[0]
|
1107
|
+
return unless (1..6).cover?(value.length)
|
1108
|
+
|
1109
|
+
index += value.length
|
1110
|
+
start_value, end_value =
|
1111
|
+
if value.end_with?("?")
|
1112
|
+
return if index != text.length
|
1113
|
+
[value.gsub("?", "0").hex, value.gsub("?", "F").hex]
|
1114
|
+
else
|
1115
|
+
[value.hex, value.hex]
|
1116
|
+
end
|
1117
|
+
|
1118
|
+
# 4.
|
1119
|
+
if index == text.length
|
1120
|
+
return unless valid_urange?(start_value, end_value)
|
1121
|
+
|
1122
|
+
ending = start + text.length
|
1123
|
+
return State.new(URange.new(start_value: start_value, end_value: end_value, location: start...ending), ending)
|
1124
|
+
end
|
1125
|
+
|
1126
|
+
# 5.
|
1127
|
+
return if text[index] != "-"
|
1128
|
+
index += 1
|
1129
|
+
|
1130
|
+
# 6.
|
1131
|
+
match = text[index..].match(/\A\h*/)
|
1132
|
+
return if !match || match[0].length > 6
|
1133
|
+
|
1134
|
+
end_value = match[0].hex
|
1135
|
+
index += match[0].length
|
1136
|
+
return if index != text.length
|
1137
|
+
|
1138
|
+
# 7.
|
1139
|
+
return unless valid_urange?(start_value, end_value)
|
1140
|
+
|
1141
|
+
ending = start + text.length
|
1142
|
+
State.new(URange.new(start_value: start_value, end_value: end_value, location: start...ending), ending)
|
1143
|
+
end
|
1144
|
+
|
1145
|
+
# Checks that the start and end value of a urange are valid.
|
1146
|
+
def valid_urange?(start_value, end_value)
|
1147
|
+
if end_value > 0x10FFFF
|
1148
|
+
errors << ParseError.new("Invalid urange. #{end_value} greater than 0x10FFFF")
|
1149
|
+
false
|
1150
|
+
elsif start_value > end_value
|
1151
|
+
errors << ParseError.new("Invalid urange. #{start_value} greater than #{end_value}")
|
1152
|
+
false
|
1153
|
+
else
|
1154
|
+
true
|
1155
|
+
end
|
1156
|
+
end
|
1157
|
+
|
1158
|
+
#-------------------------------------------------------------------------
|
1159
|
+
# 9. CSS stylesheets
|
1160
|
+
# https://www.w3.org/TR/css-syntax-3/#css-stylesheets
|
1161
|
+
#-------------------------------------------------------------------------
|
1162
|
+
|
1163
|
+
# https://www.w3.org/TR/css-syntax-3/#parse-a-css-stylesheet
|
1164
|
+
def parse_css_stylesheet
|
1165
|
+
stylesheet = parse_stylesheet
|
1166
|
+
rules =
|
1167
|
+
stylesheet.rules.map do |rule|
|
1168
|
+
rule.is_a?(QualifiedRule) ? create_style_rule(rule) : rule
|
1169
|
+
end
|
1170
|
+
|
1171
|
+
CSSStyleSheet.new(rules: rules, location: stylesheet.location)
|
1172
|
+
end
|
1173
|
+
|
1174
|
+
# 9.1. Style rules
|
1175
|
+
# https://www.w3.org/TR/css-syntax-3/#style-rules
|
1176
|
+
def create_style_rule(rule)
|
1177
|
+
slct_tokens = [*rule.prelude, EOFToken[rule.location.end_char]]
|
1178
|
+
decl_tokens = [*rule.block.value, EOFToken[rule.location.end_char]]
|
1179
|
+
|
1180
|
+
StyleRule.new(
|
1181
|
+
selectors: Selectors.new(slct_tokens).parse,
|
1182
|
+
declarations: consume_style_block_contents(decl_tokens.to_enum),
|
1183
|
+
location: rule.location
|
1184
|
+
)
|
1185
|
+
end
|
1186
|
+
end
|
1187
|
+
end
|
1188
|
+
end
|