caps 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +11 -0
- data/.rspec +3 -0
- data/.rubocop.yml +47 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +72 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +12 -0
- data/lib/caps/errors.rb +6 -0
- data/lib/caps/parser/consumers.rb +273 -0
- data/lib/caps/parser/entrypoints.rb +114 -0
- data/lib/caps/parser/helpers.rb +96 -0
- data/lib/caps/parser/infra.rb +54 -0
- data/lib/caps/parser.rb +14 -0
- data/lib/caps/tokenizer/helpers.rb +69 -0
- data/lib/caps/tokenizer/infra.rb +163 -0
- data/lib/caps/tokenizer/location.rb +25 -0
- data/lib/caps/tokenizer.rb +464 -0
- data/lib/caps/version.rb +5 -0
- data/lib/caps.rb +9 -0
- data/tokens.json +1 -0
- metadata +68 -0
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "tokenizer/helpers"
|
|
4
|
+
require_relative "tokenizer/location"
|
|
5
|
+
require_relative "tokenizer/infra"
|
|
6
|
+
|
|
7
|
+
module Caps
|
|
8
|
+
class Tokenizer
|
|
9
|
+
using Caps::Tokenizer::Helpers
|
|
10
|
+
|
|
11
|
+
LINE_FEED = "\u000a"
|
|
12
|
+
REPLACEMENT_CHARACTER = "\ufffd"
|
|
13
|
+
SOLIDUS = "/"
|
|
14
|
+
REVERSE_SOLIDUS = "\\"
|
|
15
|
+
ASTERISK = "*"
|
|
16
|
+
SINGLE_QUOTE = "'"
|
|
17
|
+
DOUBLE_QUOTE = '"'
|
|
18
|
+
NUMBER_SIGN = "#"
|
|
19
|
+
HYPHEN_MINUS = "\u002d"
|
|
20
|
+
LEFT_PARENS = "("
|
|
21
|
+
RIGHT_PARENS = ")"
|
|
22
|
+
PLUS_SIGN = "+"
|
|
23
|
+
COMMA = ","
|
|
24
|
+
FULL_STOP = "."
|
|
25
|
+
COLON = ":"
|
|
26
|
+
SEMI = ";"
|
|
27
|
+
LESS_THAN = "<"
|
|
28
|
+
COMMERCIAL_AT = "@"
|
|
29
|
+
LEFT_SQUARE = "["
|
|
30
|
+
RIGHT_SQUARE = "]"
|
|
31
|
+
LEFT_CURLY = "{"
|
|
32
|
+
RIGHT_CURLY = "}"
|
|
33
|
+
PERCENTAGE = "%"
|
|
34
|
+
GREATER_THAN = ">"
|
|
35
|
+
EXCLAMATION = "!"
|
|
36
|
+
|
|
37
|
+
MAXIMUM_ALLOWED_CODEPOINT = 0x110000
|
|
38
|
+
|
|
39
|
+
def push_node(type, **opts)
|
|
40
|
+
@tokens << { type: }.merge(opts)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def consume_token
|
|
44
|
+
consume_comment
|
|
45
|
+
return if eof?
|
|
46
|
+
|
|
47
|
+
chr = peek
|
|
48
|
+
case
|
|
49
|
+
when chr.whitespace?
|
|
50
|
+
consume_whitespace
|
|
51
|
+
when [SINGLE_QUOTE, DOUBLE_QUOTE].include?(chr)
|
|
52
|
+
consume_string
|
|
53
|
+
when chr == NUMBER_SIGN
|
|
54
|
+
return consume_hash_token if peek1.ident_char? || valid_escape?(offset: 1)
|
|
55
|
+
|
|
56
|
+
consume_delim_token
|
|
57
|
+
|
|
58
|
+
when chr == LEFT_PARENS
|
|
59
|
+
pack_one(:left_parens)
|
|
60
|
+
when chr == RIGHT_PARENS
|
|
61
|
+
pack_one(:right_parens)
|
|
62
|
+
when chr == COLON
|
|
63
|
+
pack_one(:colon)
|
|
64
|
+
when chr == SEMI
|
|
65
|
+
pack_one(:semicolon)
|
|
66
|
+
when chr == COMMA
|
|
67
|
+
pack_one(:comma)
|
|
68
|
+
when chr == LEFT_SQUARE
|
|
69
|
+
pack_one(:left_square)
|
|
70
|
+
when chr == RIGHT_SQUARE
|
|
71
|
+
pack_one(:right_square)
|
|
72
|
+
when chr == LEFT_CURLY
|
|
73
|
+
pack_one(:left_curly)
|
|
74
|
+
when chr == RIGHT_CURLY
|
|
75
|
+
pack_one(:right_curly)
|
|
76
|
+
when chr == FULL_STOP
|
|
77
|
+
if peek1.digit?
|
|
78
|
+
consume_numeric
|
|
79
|
+
else
|
|
80
|
+
consume_delim_token
|
|
81
|
+
end
|
|
82
|
+
when chr == HYPHEN_MINUS
|
|
83
|
+
if peek1.digit?
|
|
84
|
+
consume_numeric
|
|
85
|
+
elsif peek1 == HYPHEN_MINUS && peek2 == GREATER_THAN
|
|
86
|
+
consume_cdc_token
|
|
87
|
+
elsif ident_sequence_start?
|
|
88
|
+
consume_ident_token
|
|
89
|
+
else
|
|
90
|
+
consume_delim_token
|
|
91
|
+
end
|
|
92
|
+
when chr == LESS_THAN
|
|
93
|
+
is_cdo = isolated do
|
|
94
|
+
advance # consume LESS_THAN
|
|
95
|
+
next_three = [peek, peek1, peek2]
|
|
96
|
+
next_three == [EXCLAMATION, HYPHEN_MINUS, HYPHEN_MINUS]
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
if is_cdo
|
|
100
|
+
consume_cdo_token
|
|
101
|
+
else
|
|
102
|
+
consume_delim_token
|
|
103
|
+
end
|
|
104
|
+
when chr == COMMERCIAL_AT
|
|
105
|
+
is_at_keyword = isolated do
|
|
106
|
+
advance # consume COMMERCIAL_AT
|
|
107
|
+
ident_sequence_start?
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
loc = mark_pos
|
|
111
|
+
|
|
112
|
+
if is_at_keyword
|
|
113
|
+
advance # consume COMMERCIAL_AT
|
|
114
|
+
@tokens << {
|
|
115
|
+
type: :at_keyword,
|
|
116
|
+
value: consume_ident_sequence,
|
|
117
|
+
position: loc.finish
|
|
118
|
+
}
|
|
119
|
+
else
|
|
120
|
+
consume_delim_token
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
when chr == REVERSE_SOLIDUS
|
|
124
|
+
if valid_escape?
|
|
125
|
+
consume_ident_token
|
|
126
|
+
else
|
|
127
|
+
loc = mark_pos
|
|
128
|
+
@tokens << {
|
|
129
|
+
type: :delim,
|
|
130
|
+
value: advance,
|
|
131
|
+
position: loc.finish
|
|
132
|
+
}
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
when chr.digit?
|
|
136
|
+
consume_numeric
|
|
137
|
+
|
|
138
|
+
when chr.ident_start?
|
|
139
|
+
consume_ident_token
|
|
140
|
+
|
|
141
|
+
else
|
|
142
|
+
consume_delim_token
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def consume_cdo_token
|
|
147
|
+
loc = mark_pos
|
|
148
|
+
4.times { advance }
|
|
149
|
+
|
|
150
|
+
@tokens << {
|
|
151
|
+
type: :cdo,
|
|
152
|
+
position: loc.finish
|
|
153
|
+
}
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def consume_cdc_token
|
|
157
|
+
# the first hyphen has NOT been consumed. Advance three times.
|
|
158
|
+
loc = mark_pos
|
|
159
|
+
3.times { advance }
|
|
160
|
+
|
|
161
|
+
@tokens << {
|
|
162
|
+
type: :cdc,
|
|
163
|
+
position: loc.finish
|
|
164
|
+
}
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def consume_ident_token
|
|
168
|
+
loc = mark_pos
|
|
169
|
+
string = consume_ident_sequence
|
|
170
|
+
if string.casecmp?("url") && peek == LEFT_PARENS
|
|
171
|
+
advance # consume LEFT_PARENS
|
|
172
|
+
advance while peek.whitespace? && peek1.whitespace?
|
|
173
|
+
quotes = [DOUBLE_QUOTE, SINGLE_QUOTE]
|
|
174
|
+
if quotes.include?(peek) || (peek1.whitespace? && quotes.include?(peek))
|
|
175
|
+
@tokens << {
|
|
176
|
+
type: :function,
|
|
177
|
+
value: string,
|
|
178
|
+
position: loc.finish
|
|
179
|
+
}
|
|
180
|
+
# next we will have optional whitespace followed by a string, so
|
|
181
|
+
# just create the function token and move on.
|
|
182
|
+
else
|
|
183
|
+
# LEFT_PARENS was already consumed at this point, just consume the
|
|
184
|
+
# url token and get the result.
|
|
185
|
+
consume_url_token(loc)
|
|
186
|
+
end
|
|
187
|
+
elsif peek1 == LEFT_PARENS
|
|
188
|
+
advance
|
|
189
|
+
@tokens << {
|
|
190
|
+
type: :function,
|
|
191
|
+
value: string,
|
|
192
|
+
position: loc.finish
|
|
193
|
+
}
|
|
194
|
+
else
|
|
195
|
+
@tokens << {
|
|
196
|
+
type: :ident,
|
|
197
|
+
value: string,
|
|
198
|
+
position: loc.finish
|
|
199
|
+
}
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def consume_url_token(loc = nil)
|
|
204
|
+
loc ||= mark_pos
|
|
205
|
+
# Consume as much whitespace as possible
|
|
206
|
+
advance while peek.whitespace?
|
|
207
|
+
value = []
|
|
208
|
+
|
|
209
|
+
loop do
|
|
210
|
+
chr = peek
|
|
211
|
+
case
|
|
212
|
+
when chr == RIGHT_PARENS
|
|
213
|
+
advance
|
|
214
|
+
break
|
|
215
|
+
|
|
216
|
+
when eof?
|
|
217
|
+
break
|
|
218
|
+
|
|
219
|
+
when [DOUBLE_QUOTE, SINGLE_QUOTE, LEFT_PARENS].include?(chr), chr.non_printable?
|
|
220
|
+
# Parse error. Consume what's left of the url, return BAD_URL.
|
|
221
|
+
consume_bad_url
|
|
222
|
+
@tokens << {
|
|
223
|
+
type: :bad_url,
|
|
224
|
+
position: loc.finish
|
|
225
|
+
}
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
when chr == REVERSE_SOLIDUS
|
|
229
|
+
if valid_escape?
|
|
230
|
+
value << consume_escaped_codepoint
|
|
231
|
+
else
|
|
232
|
+
consume_bad_url
|
|
233
|
+
@tokens << {
|
|
234
|
+
type: :bad_url,
|
|
235
|
+
position: loc.finish
|
|
236
|
+
}
|
|
237
|
+
return
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
else
|
|
241
|
+
value << advance
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
@tokens << {
|
|
246
|
+
type: :url,
|
|
247
|
+
value: value.join,
|
|
248
|
+
position: loc.finish
|
|
249
|
+
}
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def consume_bad_url
|
|
253
|
+
loop do
|
|
254
|
+
case
|
|
255
|
+
when eof?, peek == RIGHT_PARENS
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
when valid_escape?
|
|
259
|
+
consume_escaped_codepoint
|
|
260
|
+
|
|
261
|
+
else
|
|
262
|
+
advance
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def consume_whitespace
|
|
268
|
+
pack_while(:whitespace) { peek.whitespace? }
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def consume_comment
|
|
272
|
+
return if peek != SOLIDUS || peek1 != ASTERISK
|
|
273
|
+
|
|
274
|
+
loc = mark_pos
|
|
275
|
+
2.times { advance } # Consume '/' and '*'
|
|
276
|
+
comment_data = scoped do
|
|
277
|
+
until eof?
|
|
278
|
+
break if peek == ASTERISK && peek1 == SOLIDUS
|
|
279
|
+
|
|
280
|
+
advance
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
return if eof? # Malformed sheet?
|
|
285
|
+
|
|
286
|
+
2.times { advance } # Consume '*' and '/'
|
|
287
|
+
|
|
288
|
+
@tokens << {
|
|
289
|
+
type: :comment,
|
|
290
|
+
value: comment_data.join,
|
|
291
|
+
position: loc.finish
|
|
292
|
+
}
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
def consume_string
|
|
296
|
+
loc = mark_pos
|
|
297
|
+
ending_point = advance
|
|
298
|
+
type = :string
|
|
299
|
+
|
|
300
|
+
value = scoped do
|
|
301
|
+
until eof?
|
|
302
|
+
break if peek == ending_point
|
|
303
|
+
|
|
304
|
+
if peek == LINE_FEED
|
|
305
|
+
# Do not advance. Only create bad-string and stop.
|
|
306
|
+
type = :bad_string
|
|
307
|
+
break
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
if peek == REVERSE_SOLIDUS
|
|
311
|
+
advance and return if peek1.nil?
|
|
312
|
+
|
|
313
|
+
2.times { advance }
|
|
314
|
+
next
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
advance
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
advance unless eof? # consume the ending_point left
|
|
322
|
+
|
|
323
|
+
@tokens << {
|
|
324
|
+
type:,
|
|
325
|
+
delimiter: ending_point,
|
|
326
|
+
value: value.join,
|
|
327
|
+
position: loc.finish
|
|
328
|
+
}
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
def consume_escaped_codepoint
|
|
332
|
+
# Assumes REVERSE_SOLIDUS was already consumed
|
|
333
|
+
if peek.hex?
|
|
334
|
+
hex = scoped do
|
|
335
|
+
advance
|
|
336
|
+
|
|
337
|
+
len = 0
|
|
338
|
+
until eof?
|
|
339
|
+
break unless peek.hex?
|
|
340
|
+
|
|
341
|
+
advance
|
|
342
|
+
len += 1
|
|
343
|
+
break if len == 5
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
advance if peek.whitespace?
|
|
347
|
+
hex = hex.join.to_i(16)
|
|
348
|
+
uni = [hex].pack("U")
|
|
349
|
+
return REPLACEMENT_CHARACTER if hex.zero? || uni.surrogate? || hex > MAXIMUM_ALLOWED_CODEPOINT
|
|
350
|
+
|
|
351
|
+
uni
|
|
352
|
+
elsif eof?
|
|
353
|
+
REPLACEMENT_CHARACTER
|
|
354
|
+
else
|
|
355
|
+
advance
|
|
356
|
+
end
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
def consume_ident_sequence
|
|
360
|
+
result = []
|
|
361
|
+
until eof?
|
|
362
|
+
p = peek
|
|
363
|
+
|
|
364
|
+
if p.ident_char?
|
|
365
|
+
result << advance
|
|
366
|
+
elsif valid_escape?
|
|
367
|
+
advance
|
|
368
|
+
result << consume_escaped_codepoint
|
|
369
|
+
else
|
|
370
|
+
break
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
result.join
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
def consume_hash_token
|
|
378
|
+
loc = mark_pos
|
|
379
|
+
advance # consume "#"
|
|
380
|
+
flag = ident_sequence_start? ? :id : nil
|
|
381
|
+
value = consume_ident_sequence
|
|
382
|
+
@tokens << {
|
|
383
|
+
type: :hash,
|
|
384
|
+
literal: @contents[loc.start[:idx]..@idx],
|
|
385
|
+
flag:,
|
|
386
|
+
value:,
|
|
387
|
+
position: loc.finish
|
|
388
|
+
}
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
def consume_delim_token
|
|
392
|
+
pack_one(:delim)
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
def consume_numeric
|
|
396
|
+
loc = mark_pos
|
|
397
|
+
number = consume_number
|
|
398
|
+
|
|
399
|
+
if ident_sequence_start?
|
|
400
|
+
@tokens << {
|
|
401
|
+
type: :dimension,
|
|
402
|
+
value: number[:value],
|
|
403
|
+
flag: number[:type],
|
|
404
|
+
unit: consume_ident_sequence,
|
|
405
|
+
position: loc.finish
|
|
406
|
+
}
|
|
407
|
+
elsif peek == PERCENTAGE
|
|
408
|
+
advance # consume "%"
|
|
409
|
+
@tokens << {
|
|
410
|
+
type: :percentage,
|
|
411
|
+
value: number[:value],
|
|
412
|
+
position: loc.finish
|
|
413
|
+
}
|
|
414
|
+
else
|
|
415
|
+
@tokens << {
|
|
416
|
+
type: :numeric,
|
|
417
|
+
value: number[:value],
|
|
418
|
+
flag: number[:type],
|
|
419
|
+
position: loc.finish
|
|
420
|
+
}
|
|
421
|
+
end
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
def consume_number
|
|
425
|
+
type = :integer
|
|
426
|
+
repr = []
|
|
427
|
+
repr << advance if [PLUS_SIGN, HYPHEN_MINUS].include? peek
|
|
428
|
+
repr << advance while peek.digit?
|
|
429
|
+
|
|
430
|
+
if peek == FULL_STOP && peek1.digit?
|
|
431
|
+
repr << advance # Consume "."
|
|
432
|
+
repr << advance while peek.digit?
|
|
433
|
+
type = :number
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
p = peek
|
|
437
|
+
p1 = peek1
|
|
438
|
+
p2 = peek2
|
|
439
|
+
if %w[E e].include?(p) &&
|
|
440
|
+
(p1.digit? || ([PLUS_SIGN, HYPHEN_MINUS].include?(p1) && p2.digit?))
|
|
441
|
+
type = :number
|
|
442
|
+
repr << advance # consume "e" or "E"
|
|
443
|
+
repr << advance if [PLUS_SIGN, HYPHEN_MINUS].include?(p1) # consume optional sign
|
|
444
|
+
repr << advance while peek.digit?
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
repr = repr.join
|
|
448
|
+
|
|
449
|
+
{
|
|
450
|
+
type:,
|
|
451
|
+
value: type == :integer ? repr.to_i : repr.to_f
|
|
452
|
+
}
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def self.stringify(tokens)
|
|
456
|
+
tokens.map do |i|
|
|
457
|
+
objs = [i[:type].to_s, "("]
|
|
458
|
+
objs << i[:value].inspect if i.key? :value
|
|
459
|
+
objs << ")"
|
|
460
|
+
objs.join
|
|
461
|
+
end.join(" ")
|
|
462
|
+
end
|
|
463
|
+
end
|
|
464
|
+
end
|
data/lib/caps/version.rb
ADDED