caps 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,464 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "tokenizer/helpers"
4
+ require_relative "tokenizer/location"
5
+ require_relative "tokenizer/infra"
6
+
7
+ module Caps
8
+ class Tokenizer
9
+ using Caps::Tokenizer::Helpers
10
+
11
+ LINE_FEED = "\u000a"
12
+ REPLACEMENT_CHARACTER = "\ufffd"
13
+ SOLIDUS = "/"
14
+ REVERSE_SOLIDUS = "\\"
15
+ ASTERISK = "*"
16
+ SINGLE_QUOTE = "'"
17
+ DOUBLE_QUOTE = '"'
18
+ NUMBER_SIGN = "#"
19
+ HYPHEN_MINUS = "\u002d"
20
+ LEFT_PARENS = "("
21
+ RIGHT_PARENS = ")"
22
+ PLUS_SIGN = "+"
23
+ COMMA = ","
24
+ FULL_STOP = "."
25
+ COLON = ":"
26
+ SEMI = ";"
27
+ LESS_THAN = "<"
28
+ COMMERCIAL_AT = "@"
29
+ LEFT_SQUARE = "["
30
+ RIGHT_SQUARE = "]"
31
+ LEFT_CURLY = "{"
32
+ RIGHT_CURLY = "}"
33
+ PERCENTAGE = "%"
34
+ GREATER_THAN = ">"
35
+ EXCLAMATION = "!"
36
+
37
+ MAXIMUM_ALLOWED_CODEPOINT = 0x110000
38
+
39
+ def push_node(type, **opts)
40
+ @tokens << { type: }.merge(opts)
41
+ end
42
+
43
+ def consume_token
44
+ consume_comment
45
+ return if eof?
46
+
47
+ chr = peek
48
+ case
49
+ when chr.whitespace?
50
+ consume_whitespace
51
+ when [SINGLE_QUOTE, DOUBLE_QUOTE].include?(chr)
52
+ consume_string
53
+ when chr == NUMBER_SIGN
54
+ return consume_hash_token if peek1.ident_char? || valid_escape?(offset: 1)
55
+
56
+ consume_delim_token
57
+
58
+ when chr == LEFT_PARENS
59
+ pack_one(:left_parens)
60
+ when chr == RIGHT_PARENS
61
+ pack_one(:right_parens)
62
+ when chr == COLON
63
+ pack_one(:colon)
64
+ when chr == SEMI
65
+ pack_one(:semicolon)
66
+ when chr == COMMA
67
+ pack_one(:comma)
68
+ when chr == LEFT_SQUARE
69
+ pack_one(:left_square)
70
+ when chr == RIGHT_SQUARE
71
+ pack_one(:right_square)
72
+ when chr == LEFT_CURLY
73
+ pack_one(:left_curly)
74
+ when chr == RIGHT_CURLY
75
+ pack_one(:right_curly)
76
+ when chr == FULL_STOP
77
+ if peek1.digit?
78
+ consume_numeric
79
+ else
80
+ consume_delim_token
81
+ end
82
+ when chr == HYPHEN_MINUS
83
+ if peek1.digit?
84
+ consume_numeric
85
+ elsif peek1 == HYPHEN_MINUS && peek2 == GREATER_THAN
86
+ consume_cdc_token
87
+ elsif ident_sequence_start?
88
+ consume_ident_token
89
+ else
90
+ consume_delim_token
91
+ end
92
+ when chr == LESS_THAN
93
+ is_cdo = isolated do
94
+ advance # consume LESS_THAN
95
+ next_three = [peek, peek1, peek2]
96
+ next_three == [EXCLAMATION, HYPHEN_MINUS, HYPHEN_MINUS]
97
+ end
98
+
99
+ if is_cdo
100
+ consume_cdo_token
101
+ else
102
+ consume_delim_token
103
+ end
104
+ when chr == COMMERCIAL_AT
105
+ is_at_keyword = isolated do
106
+ advance # consume COMMERCIAL_AT
107
+ ident_sequence_start?
108
+ end
109
+
110
+ loc = mark_pos
111
+
112
+ if is_at_keyword
113
+ advance # consume COMMERCIAL_AT
114
+ @tokens << {
115
+ type: :at_keyword,
116
+ value: consume_ident_sequence,
117
+ position: loc.finish
118
+ }
119
+ else
120
+ consume_delim_token
121
+ end
122
+
123
+ when chr == REVERSE_SOLIDUS
124
+ if valid_escape?
125
+ consume_ident_token
126
+ else
127
+ loc = mark_pos
128
+ @tokens << {
129
+ type: :delim,
130
+ value: advance,
131
+ position: loc.finish
132
+ }
133
+ end
134
+
135
+ when chr.digit?
136
+ consume_numeric
137
+
138
+ when chr.ident_start?
139
+ consume_ident_token
140
+
141
+ else
142
+ consume_delim_token
143
+ end
144
+ end
145
+
146
+ def consume_cdo_token
147
+ loc = mark_pos
148
+ 4.times { advance }
149
+
150
+ @tokens << {
151
+ type: :cdo,
152
+ position: loc.finish
153
+ }
154
+ end
155
+
156
+ def consume_cdc_token
157
+ # the first hyphen has NOT been consumed. Advance three times.
158
+ loc = mark_pos
159
+ 3.times { advance }
160
+
161
+ @tokens << {
162
+ type: :cdc,
163
+ position: loc.finish
164
+ }
165
+ end
166
+
167
+ def consume_ident_token
168
+ loc = mark_pos
169
+ string = consume_ident_sequence
170
+ if string.casecmp?("url") && peek == LEFT_PARENS
171
+ advance # consume LEFT_PARENS
172
+ advance while peek.whitespace? && peek1.whitespace?
173
+ quotes = [DOUBLE_QUOTE, SINGLE_QUOTE]
174
+ if quotes.include?(peek) || (peek1.whitespace? && quotes.include?(peek))
175
+ @tokens << {
176
+ type: :function,
177
+ value: string,
178
+ position: loc.finish
179
+ }
180
+ # next we will have optional whitespace followed by a string, so
181
+ # just create the function token and move on.
182
+ else
183
+ # LEFT_PARENS was already consumed at this point, just consume the
184
+ # url token and get the result.
185
+ consume_url_token(loc)
186
+ end
187
+ elsif peek1 == LEFT_PARENS
188
+ advance
189
+ @tokens << {
190
+ type: :function,
191
+ value: string,
192
+ position: loc.finish
193
+ }
194
+ else
195
+ @tokens << {
196
+ type: :ident,
197
+ value: string,
198
+ position: loc.finish
199
+ }
200
+ end
201
+ end
202
+
203
+ def consume_url_token(loc = nil)
204
+ loc ||= mark_pos
205
+ # Consume as much whitespace as possible
206
+ advance while peek.whitespace?
207
+ value = []
208
+
209
+ loop do
210
+ chr = peek
211
+ case
212
+ when chr == RIGHT_PARENS
213
+ advance
214
+ break
215
+
216
+ when eof?
217
+ break
218
+
219
+ when [DOUBLE_QUOTE, SINGLE_QUOTE, LEFT_PARENS].include?(chr), chr.non_printable?
220
+ # Parse error. Consume what's left of the url, return BAD_URL.
221
+ consume_bad_url
222
+ @tokens << {
223
+ type: :bad_url,
224
+ position: loc.finish
225
+ }
226
+ return
227
+
228
+ when chr == REVERSE_SOLIDUS
229
+ if valid_escape?
230
+ value << consume_escaped_codepoint
231
+ else
232
+ consume_bad_url
233
+ @tokens << {
234
+ type: :bad_url,
235
+ position: loc.finish
236
+ }
237
+ return
238
+ end
239
+
240
+ else
241
+ value << advance
242
+ end
243
+ end
244
+
245
+ @tokens << {
246
+ type: :url,
247
+ value: value.join,
248
+ position: loc.finish
249
+ }
250
+ end
251
+
252
+ def consume_bad_url
253
+ loop do
254
+ case
255
+ when eof?, peek == RIGHT_PARENS
256
+ return
257
+
258
+ when valid_escape?
259
+ consume_escaped_codepoint
260
+
261
+ else
262
+ advance
263
+ end
264
+ end
265
+ end
266
+
267
+ def consume_whitespace
268
+ pack_while(:whitespace) { peek.whitespace? }
269
+ end
270
+
271
+ def consume_comment
272
+ return if peek != SOLIDUS || peek1 != ASTERISK
273
+
274
+ loc = mark_pos
275
+ 2.times { advance } # Consume '/' and '*'
276
+ comment_data = scoped do
277
+ until eof?
278
+ break if peek == ASTERISK && peek1 == SOLIDUS
279
+
280
+ advance
281
+ end
282
+ end
283
+
284
+ return if eof? # Malformed sheet?
285
+
286
+ 2.times { advance } # Consume '*' and '/'
287
+
288
+ @tokens << {
289
+ type: :comment,
290
+ value: comment_data.join,
291
+ position: loc.finish
292
+ }
293
+ end
294
+
295
+ def consume_string
296
+ loc = mark_pos
297
+ ending_point = advance
298
+ type = :string
299
+
300
+ value = scoped do
301
+ until eof?
302
+ break if peek == ending_point
303
+
304
+ if peek == LINE_FEED
305
+ # Do not advance. Only create bad-string and stop.
306
+ type = :bad_string
307
+ break
308
+ end
309
+
310
+ if peek == REVERSE_SOLIDUS
311
+ advance and return if peek1.nil?
312
+
313
+ 2.times { advance }
314
+ next
315
+ end
316
+
317
+ advance
318
+ end
319
+ end
320
+
321
+ advance unless eof? # consume the ending_point left
322
+
323
+ @tokens << {
324
+ type:,
325
+ delimiter: ending_point,
326
+ value: value.join,
327
+ position: loc.finish
328
+ }
329
+ end
330
+
331
+ def consume_escaped_codepoint
332
+ # Assumes REVERSE_SOLIDUS was already consumed
333
+ if peek.hex?
334
+ hex = scoped do
335
+ advance
336
+
337
+ len = 0
338
+ until eof?
339
+ break unless peek.hex?
340
+
341
+ advance
342
+ len += 1
343
+ break if len == 5
344
+ end
345
+ end
346
+ advance if peek.whitespace?
347
+ hex = hex.join.to_i(16)
348
+ uni = [hex].pack("U")
349
+ return REPLACEMENT_CHARACTER if hex.zero? || uni.surrogate? || hex > MAXIMUM_ALLOWED_CODEPOINT
350
+
351
+ uni
352
+ elsif eof?
353
+ REPLACEMENT_CHARACTER
354
+ else
355
+ advance
356
+ end
357
+ end
358
+
359
+ def consume_ident_sequence
360
+ result = []
361
+ until eof?
362
+ p = peek
363
+
364
+ if p.ident_char?
365
+ result << advance
366
+ elsif valid_escape?
367
+ advance
368
+ result << consume_escaped_codepoint
369
+ else
370
+ break
371
+ end
372
+ end
373
+
374
+ result.join
375
+ end
376
+
377
+ def consume_hash_token
378
+ loc = mark_pos
379
+ advance # consume "#"
380
+ flag = ident_sequence_start? ? :id : nil
381
+ value = consume_ident_sequence
382
+ @tokens << {
383
+ type: :hash,
384
+ literal: @contents[loc.start[:idx]..@idx],
385
+ flag:,
386
+ value:,
387
+ position: loc.finish
388
+ }
389
+ end
390
+
391
+ def consume_delim_token
392
+ pack_one(:delim)
393
+ end
394
+
395
+ def consume_numeric
396
+ loc = mark_pos
397
+ number = consume_number
398
+
399
+ if ident_sequence_start?
400
+ @tokens << {
401
+ type: :dimension,
402
+ value: number[:value],
403
+ flag: number[:type],
404
+ unit: consume_ident_sequence,
405
+ position: loc.finish
406
+ }
407
+ elsif peek == PERCENTAGE
408
+ advance # consume "%"
409
+ @tokens << {
410
+ type: :percentage,
411
+ value: number[:value],
412
+ position: loc.finish
413
+ }
414
+ else
415
+ @tokens << {
416
+ type: :numeric,
417
+ value: number[:value],
418
+ flag: number[:type],
419
+ position: loc.finish
420
+ }
421
+ end
422
+ end
423
+
424
+ def consume_number
425
+ type = :integer
426
+ repr = []
427
+ repr << advance if [PLUS_SIGN, HYPHEN_MINUS].include? peek
428
+ repr << advance while peek.digit?
429
+
430
+ if peek == FULL_STOP && peek1.digit?
431
+ repr << advance # Consume "."
432
+ repr << advance while peek.digit?
433
+ type = :number
434
+ end
435
+
436
+ p = peek
437
+ p1 = peek1
438
+ p2 = peek2
439
+ if %w[E e].include?(p) &&
440
+ (p1.digit? || ([PLUS_SIGN, HYPHEN_MINUS].include?(p1) && p2.digit?))
441
+ type = :number
442
+ repr << advance # consume "e" or "E"
443
+ repr << advance if [PLUS_SIGN, HYPHEN_MINUS].include?(p1) # consume optional sign
444
+ repr << advance while peek.digit?
445
+ end
446
+
447
+ repr = repr.join
448
+
449
+ {
450
+ type:,
451
+ value: type == :integer ? repr.to_i : repr.to_f
452
+ }
453
+ end
454
+
455
+ def self.stringify(tokens)
456
+ tokens.map do |i|
457
+ objs = [i[:type].to_s, "("]
458
+ objs << i[:value].inspect if i.key? :value
459
+ objs << ")"
460
+ objs.join
461
+ end.join(" ")
462
+ end
463
+ end
464
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caps
4
+ VERSION = "0.1.0"
5
+ end
data/lib/caps.rb ADDED
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "caps/version"
4
+ require_relative "caps/errors"
5
+ require_relative "caps/tokenizer"
6
+ require_relative "caps/parser"
7
+
8
+ module Caps
9
+ end