minicss 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +10 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +66 -0
  5. data/ACKNOWLEDGMENTS.md +47 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +21 -0
  8. data/README.md +178 -0
  9. data/Rakefile +12 -0
  10. data/lib/minicss/ast/at_rule.rb +17 -0
  11. data/lib/minicss/ast/bad_token.rb +14 -0
  12. data/lib/minicss/ast/block.rb +29 -0
  13. data/lib/minicss/ast/decl.rb +17 -0
  14. data/lib/minicss/ast/decl_list.rb +18 -0
  15. data/lib/minicss/ast/dimension.rb +14 -0
  16. data/lib/minicss/ast/function.rb +15 -0
  17. data/lib/minicss/ast/number.rb +14 -0
  18. data/lib/minicss/ast/percentage.rb +8 -0
  19. data/lib/minicss/ast/rule.rb +28 -0
  20. data/lib/minicss/ast/string_token.rb +14 -0
  21. data/lib/minicss/ast/syntax_error.rb +13 -0
  22. data/lib/minicss/ast/unicode_range.rb +13 -0
  23. data/lib/minicss/ast/url.rb +13 -0
  24. data/lib/minicss/ast.rb +72 -0
  25. data/lib/minicss/css/ast/at_rule.rb +19 -0
  26. data/lib/minicss/css/ast/declaration.rb +21 -0
  27. data/lib/minicss/css/ast/declaration_list.rb +11 -0
  28. data/lib/minicss/css/ast/function.rb +20 -0
  29. data/lib/minicss/css/ast/qualified_rule.rb +19 -0
  30. data/lib/minicss/css/ast/simple_block.rb +37 -0
  31. data/lib/minicss/css/ast/stylesheet.rb +17 -0
  32. data/lib/minicss/css/ast.rb +9 -0
  33. data/lib/minicss/css/errors.rb +8 -0
  34. data/lib/minicss/css/parser.rb +360 -0
  35. data/lib/minicss/css/position.rb +15 -0
  36. data/lib/minicss/css/refinements.rb +78 -0
  37. data/lib/minicss/css/token.rb +28 -0
  38. data/lib/minicss/css/token_stream.rb +56 -0
  39. data/lib/minicss/css/tokenizer.rb +572 -0
  40. data/lib/minicss/css.rb +10 -0
  41. data/lib/minicss/errors.rb +6 -0
  42. data/lib/minicss/sel.rb +382 -0
  43. data/lib/minicss/serializer.rb +59 -0
  44. data/lib/minicss/version.rb +5 -0
  45. data/lib/minicss.rb +53 -0
  46. metadata +87 -0
@@ -0,0 +1,572 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniCSS
4
+ module CSS
5
+ class Tokenizer
6
+ CARRIAGE_RETURN = "\u000D"
7
+ FORM_FEED = "\u000C"
8
+ LINE_FEED = "\u000A"
9
+ NULL = "\u0000"
10
+ REPLACEMENT_CHARACTER = "\uFFFD"
11
+ MAXIMUM_ALLOWED_CODEPOINT = "\u{10FFFF}"
12
+ SOLIDUS = "\u002F"
13
+ ASTERISK = "\u002A"
14
+ QUOTATION_MARK = "\u0022"
15
+ NUMBER_SIGN = "\u0023"
16
+ APOSTROPHE = "\u0027"
17
+ LEFT_PARENTHESIS = "\u0028"
18
+ RIGHT_PARENTHESIS = "\u0029"
19
+ PLUS_SIGN = "\u002B"
20
+ COMMA = "\u002C"
21
+ HYPHEN_MINUS = "\u002D"
22
+ FULL_STOP = "\u002E"
23
+ COLON = "\u003A"
24
+ SEMICOLON = "\u003B"
25
+ LESS_THAN = "\u003C"
26
+ COMMERCIAL_AT = "\u0040"
27
+ LEFT_SQUARE_BRACKET = "\u005B"
28
+ REVERSE_SOLIDUS = "\u005C"
29
+ RIGHT_SQUARE_BRACKET = "\u005D"
30
+ LEFT_CURLY = "\u007B"
31
+ RIGHT_CURLY = "\u007D"
32
+ PERCENTAGE_SIGN = "\u0025"
33
+ GREATER_THAN = "\u003E"
34
+ QUESTION_MARK = "\u003F"
35
+
36
+ attr_reader :unicode_ranges_allowed, :tokens
37
+ alias unicode_ranges_allowed? unicode_ranges_allowed
38
+
39
+ using StringRefinements
40
+
41
+ def initialize(input, allow_unicode_ranges: false)
42
+ @input = (input.is_a?(IO) ? input.read : input.to_s)
43
+ .encode("UTF-8", invalid: :replace, undef: :replace, replace: "\uFFFD")
44
+ .gsub("\r\n", "\n")
45
+ .gsub("\u000C", "\n")
46
+ .gsub("\u0000", "\uFFFD")
47
+ .chars
48
+ @len = @input.length
49
+ @unicode_ranges_allowed = allow_unicode_ranges
50
+ @line = 1
51
+ @column = 1
52
+ @idx = 0
53
+ @tokens = []
54
+ end
55
+
56
+ def eof? = @idx >= @len
57
+ def peek = (eof? ? EOF.instance : @input[@idx])
58
+ def peek1 = (@idx + 1 >= @len ? EOF.instance : @input[@idx + 1])
59
+ def last = @input[@idx - 1]
60
+ def consumed_len = @idx - @token_start[:offset]
61
+
62
+ def consume
63
+ peek.tap do |v|
64
+ next if eof?
65
+
66
+ @idx += 1
67
+ if v.newline?
68
+ @line += 1
69
+ @column = 1
70
+ else
71
+ @column += 1
72
+ end
73
+ end
74
+ end
75
+
76
+ def start_token!
77
+ @token_start = pos
78
+ end
79
+
80
+ def pos = { offset: @idx, line: @line, column: @column }
81
+
82
+ # push_token pushes a token to the token list.
83
+ def push_token(type, **)
84
+ pos_start = Position.new(*@token_start.values)
85
+ pos_end = Position.new(*pos.values)
86
+
87
+ @tokens << Token.new(type, pos_start, pos_end, **)
88
+ end
89
+
90
+ def valid_escape?(value = nil)
91
+ p1, p2 = value || [peek, peek1]
92
+ p1 == REVERSE_SOLIDUS && !p2&.newline?
93
+ end
94
+
95
+ def consume_escaped_code_point
96
+ p = peek
97
+
98
+ return REPLACEMENT_CHARACTER if p.eof?
99
+
100
+ # Case 1: Hex escape
101
+ if p.hex?
102
+ result = []
103
+ max = 6
104
+ while peek.hex? && max.positive?
105
+ result << consume
106
+ max -= 1
107
+ end
108
+ consume if peek.whitespace?
109
+ val = result.join.hex
110
+ return REPLACEMENT_CHARACTER if val.zero? || val.surrogate? || val.overflows_maximum_codepoint?
111
+
112
+ return [val].pack("U")
113
+ end
114
+
115
+ # Case 2: Single-character escape
116
+ # If it's NOT hex and NOT newline, consume the next character
117
+ return consume unless p.newline?
118
+
119
+ # Case 3: Backslash followed by newline → parse error, return U+FFFD
120
+ REPLACEMENT_CHARACTER
121
+ end
122
+
123
+ def ident_sequence_start?
124
+ val = @input[@idx...(@idx + 3)]
125
+ if val[0] == HYPHEN_MINUS
126
+ (val[1]&.ident_start? || val[1] == HYPHEN_MINUS) || valid_escape?(val[1...])
127
+ elsif val[0]&.ident_start?
128
+ true
129
+ elsif val[0] == REVERSE_SOLIDUS
130
+ valid_escape?(val)
131
+ else
132
+ false
133
+ end
134
+ end
135
+
136
+ def consume_ident_sequence
137
+ result = []
138
+ until eof?
139
+ if peek.ident_point?
140
+ result << consume
141
+ elsif valid_escape?
142
+ consume
143
+ result << consume_escaped_code_point
144
+ else
145
+ break
146
+ end
147
+ end
148
+
149
+ result.join
150
+ end
151
+
152
+ def unicode_range_start?
153
+ chars = @input[@idx...(@idx + 3)]
154
+
155
+ (chars[0]&.downcase == "u" &&
156
+ chars[1] == PLUS_SIGN &&
157
+ chars[2] == QUESTION_MARK) || chars[2]&.hex?
158
+ end
159
+
160
+ def hydrate_tokens
161
+ @tokens.each do |tok|
162
+ tok.literal ||= @input[tok.pos_start.offset...tok.pos_end.offset].join
163
+ end
164
+ end
165
+
166
+ # ------- Parser starts here.
167
+
168
+ def tokenize
169
+ consume_token until eof?
170
+ hydrate_tokens
171
+ nil
172
+ end
173
+
174
+ def consume_token
175
+ consume_comments
176
+ consume_whitespace
177
+ case peek
178
+ when QUOTATION_MARK, APOSTROPHE
179
+ consume_string_token
180
+
181
+ when NUMBER_SIGN
182
+ start_token!
183
+
184
+ if peek1.ident_point? || valid_escape?(@input[(@idx + 1)..(@idx + 3)])
185
+ consume
186
+ flag = peek.ident_start? ? :id : :unrestricted
187
+ value = consume_ident_sequence
188
+ push_token(:hash, literal: "##{value}", flag:)
189
+ return
190
+ end
191
+
192
+ consume
193
+ push_token(:delim)
194
+
195
+ when LEFT_PARENTHESIS
196
+ start_token!
197
+ consume
198
+ push_token(:left_parenthesis)
199
+
200
+ when RIGHT_PARENTHESIS
201
+ start_token!
202
+ consume
203
+ push_token(:right_parenthesis)
204
+
205
+ when PLUS_SIGN
206
+ if peek1.digit? || (peek1 == FULL_STOP && @input[@idx + 2].digit?)
207
+ consume_numeric_token
208
+ else
209
+ start_token!
210
+ consume
211
+ push_token(:delim)
212
+ end
213
+
214
+ when COMMA
215
+ start_token!
216
+ consume
217
+ push_token(:comma)
218
+
219
+ when HYPHEN_MINUS
220
+ if peek1.digit? || (peek1 == FULL_STOP && @input[@idx + 2].digit?)
221
+ consume_numeric_token
222
+ return
223
+ end
224
+
225
+ nexts = @input[@idx...(@idx + 3)]
226
+ if nexts[1] == HYPHEN_MINUS && nexts[2] == GREATER_THAN
227
+ start_token!
228
+ 3.times { consume }
229
+ push_token(:cdc)
230
+ return
231
+ end
232
+
233
+ if ident_sequence_start?
234
+ consume_ident_like_token
235
+ return
236
+ end
237
+
238
+ start_token!
239
+ consume
240
+ push_token(:delim)
241
+
242
+ when FULL_STOP
243
+ return consume_numeric_token if peek1.digit?
244
+
245
+ start_token!
246
+ consume
247
+ push_token(:delim)
248
+
249
+ when COLON
250
+ start_token!
251
+ consume
252
+ push_token(:colon)
253
+
254
+ when SEMICOLON
255
+ start_token!
256
+ consume
257
+ push_token(:semicolon)
258
+
259
+ when LESS_THAN
260
+ start_token!
261
+ if @input[@idx...(@idx + 4)].join == "<!--"
262
+ 4.times { consume }
263
+ push_token(:cdo)
264
+ return
265
+ end
266
+
267
+ consume
268
+ push_token(:delim)
269
+
270
+ when COMMERCIAL_AT
271
+ start_token!
272
+ consume
273
+
274
+ if ident_sequence_start?
275
+ val = consume_ident_sequence
276
+ push_token(:at_keyword, literal: "@#{val}")
277
+ return
278
+ end
279
+
280
+ push_token(:delim)
281
+
282
+ when LEFT_SQUARE_BRACKET
283
+ start_token!
284
+ consume
285
+ push_token(:left_square_bracket)
286
+
287
+ when REVERSE_SOLIDUS
288
+ return consume_ident_like_token if valid_escape?
289
+
290
+ start_token!
291
+ consume
292
+ push_token(:delim)
293
+
294
+ when RIGHT_SQUARE_BRACKET
295
+ start_token!
296
+ consume
297
+ push_token(:right_square_bracket)
298
+
299
+ when LEFT_CURLY
300
+ start_token!
301
+ consume
302
+ push_token(:left_curly)
303
+
304
+ when RIGHT_CURLY
305
+ start_token!
306
+ consume
307
+ push_token(:right_curly)
308
+
309
+ when "u", "U"
310
+ return consume_unicode_range if unicode_ranges_allowed? && unicode_range_start?
311
+
312
+ consume_ident_like_token
313
+
314
+ else
315
+ return if peek.eof?
316
+ return consume_whitespace if peek.whitespace?
317
+ return consume_numeric_token if peek.digit?
318
+ return consume_ident_like_token if peek.ident_start?
319
+
320
+ start_token!
321
+ consume
322
+ push_token(:delim)
323
+ end
324
+ end
325
+
326
+ def consume_unicode_range
327
+ start_token!
328
+ 2.times { consume }
329
+ tmp = []
330
+ tmp << consume while peek.hex? && tmp.length < 6
331
+ tmp << consume while peek == QUESTION_MARK && tmp.length < 6
332
+ end_range = 0
333
+
334
+ if tmp.include? QUESTION_MARK
335
+ start_range = tmp.map { it == QUESTION_MARK ? "0" : it }.join.hex
336
+ end_range = tmp.map { it == QUESTION_MARK ? "F" : it }.join.hex
337
+ return push_token(:unicode_range, start: start_range, end: end_range)
338
+ end
339
+
340
+ start_range = tmp.join.hex
341
+ if peek == HYPHEN_MINUS && peek1.hex?
342
+ consume # consume HYPHEN_MINUS
343
+ tmp.clear
344
+ tmp << consume while peek.hex? && tmp.length < 6
345
+ end_range = tmp.join.hex
346
+ else
347
+ end_range = start_range
348
+ end
349
+
350
+ push_token(:unicode_range, start: start_range, end: end_range)
351
+ end
352
+
353
+ # consume_comments consumes all comments until either something that's not
354
+ # a comment is found, or the input stream ends.
355
+ def consume_comments
356
+ loop do
357
+ return unless peek == SOLIDUS && peek1 == ASTERISK
358
+
359
+ 2.times { consume }
360
+
361
+ until peek == ASTERISK && peek1 == SOLIDUS
362
+ return if peek.eof?
363
+
364
+ consume
365
+ end
366
+
367
+ 2.times { consume }
368
+ end
369
+ end
370
+
371
+ # consume_whitespace consumes all possible whitespaces
372
+ def consume_whitespace
373
+ saw_whitespace = false
374
+
375
+ loop do
376
+ # comments should be treated like whitespace
377
+ if peek == SOLIDUS && peek1 == ASTERISK
378
+ start_token! unless saw_whitespace
379
+ saw_whitespace = true
380
+ consume_comments
381
+ next
382
+ end
383
+
384
+ break unless peek.whitespace?
385
+
386
+ start_token! unless saw_whitespace
387
+ saw_whitespace = true
388
+ consume
389
+ end
390
+
391
+ push_token(:whitespace) if saw_whitespace
392
+ end
393
+
394
+ # consume_string_token consumes a given string token until its closing token
395
+ # is encountered, or EOF is reached
396
+ def consume_string_token(closing_token = nil)
397
+ closing_token ||= consume
398
+ start_token!
399
+ str = []
400
+
401
+ until eof?
402
+ char = peek
403
+ case char
404
+ when closing_token
405
+ break
406
+ when REVERSE_SOLIDUS
407
+ p1 = peek1
408
+ if p1.eof?
409
+ consume
410
+ next
411
+ elsif p1.newline?
412
+ 2.times { consume }
413
+ next
414
+ elsif valid_escape?
415
+ consume
416
+ escaped = consume_escaped_code_point
417
+ str << escaped if escaped
418
+ next
419
+ end
420
+ else
421
+ if char.newline?
422
+ push_token(:bad_string)
423
+ return
424
+ end
425
+ str << consume
426
+ end
427
+ end
428
+
429
+ push_token(:string, literal: str.join, quoting: closing_token)
430
+ consume unless eof?
431
+ end
432
+
433
+ def consume_numeric_token
434
+ start_token!
435
+ number = consume_number
436
+
437
+ if ident_sequence_start?
438
+ unit = consume_ident_sequence
439
+ push_token(:dimension, unit:, **number)
440
+ elsif peek == PERCENTAGE_SIGN
441
+ consume # Consume PERCENTAGE_SIGN
442
+ push_token(:percentage, **number)
443
+ else
444
+ push_token(:number, **number)
445
+ end
446
+ end
447
+
448
+ def consume_number
449
+ type = :integer
450
+ number_part = []
451
+ exponent_part = []
452
+ sign_character = nil
453
+
454
+ if [PLUS_SIGN, HYPHEN_MINUS].include?(peek)
455
+ sign_character = consume
456
+ number_part << sign_character
457
+ end
458
+
459
+ number_part << consume while peek.digit?
460
+
461
+ if peek == FULL_STOP && peek1.digit?
462
+ number_part << consume
463
+ number_part << consume while peek.digit?
464
+ type = :number
465
+ end
466
+
467
+ nexts = @input[@idx..(@idx + 3)]
468
+ if nexts[0]&.downcase == "e" &&
469
+ (([HYPHEN_MINUS, PLUS_SIGN].include?(nexts[1]) && nexts[2]&.digit?) \
470
+ || nexts[1]&.digit?)
471
+ consume # consume E or e
472
+ exponent_part << consume if [HYPHEN_MINUS, PLUS_SIGN].include? peek
473
+ exponent_part << consume while peek.digit?
474
+ type = :number
475
+ end
476
+
477
+ value = number_part.join.to_f
478
+
479
+ unless exponent_part.empty?
480
+ exponent = 10 ** exponent_part.join.to_i(10)
481
+ value *= exponent
482
+ end
483
+
484
+ { value:, type:, sign_character: }
485
+ end
486
+
487
+ def consume_ident_like_token
488
+ start_token!
489
+ str = consume_ident_sequence
490
+ if str.downcase == "url" && peek == LEFT_PARENTHESIS
491
+ consume # consume LEFT_PARENTHESIS
492
+ consume while peek.whitespace? && peek1.whitespace?
493
+ if peek.one_of?(QUOTATION_MARK, APOSTROPHE) || (peek.whitespace? && peek1.one_of?(QUOTATION_MARK, APOSTROPHE))
494
+ push_token(:function, value: str)
495
+ return
496
+ end
497
+
498
+ return consume_url_token
499
+ end
500
+
501
+ if peek == LEFT_PARENTHESIS
502
+ consume
503
+ push_token(:function, literal: "#{str}(", name: str)
504
+ return
505
+ end
506
+
507
+ push_token(:ident, literal: str)
508
+ end
509
+
510
+ def consume_url_token
511
+ data = []
512
+ consume while peek.whitespace?
513
+
514
+ loop do
515
+ p = peek
516
+ if p == RIGHT_PARENTHESIS
517
+ consume
518
+ push_token(:url, value: data.join)
519
+ return
520
+ end
521
+
522
+ return push_token(:url, value: data.join) if p.eof?
523
+
524
+ if p.whitespace?
525
+ consume while peek.whitespace?
526
+ if peek == RIGHT_PARENTHESIS
527
+ consume
528
+ return push_token(:url, value: data.join)
529
+ elsif eof?
530
+ return push_token(:url, value: data.join)
531
+ else
532
+ consume_bad_url
533
+ return push_token(:bad_url)
534
+ end
535
+ end
536
+
537
+ if p.one_of?(QUOTATION_MARK, APOSTROPHE, LEFT_PARENTHESIS) || p.non_printable?
538
+ consume_bad_url
539
+ return push_token(:bad_url)
540
+ end
541
+
542
+ if p == REVERSE_SOLIDUS
543
+ if valid_escape?
544
+ consume
545
+ data << consume_escaped_code_point
546
+ next
547
+ else
548
+ consume_bad_url
549
+ return push_token(:bad_url)
550
+ end
551
+ end
552
+
553
+ data << consume
554
+ end
555
+ end
556
+
557
+ def consume_bad_url
558
+ until eof?
559
+ if peek == RIGHT_PARENTHESIS
560
+ consume
561
+ return
562
+ elsif valid_escape?
563
+ consume
564
+ consume_escaped_code_point
565
+ else
566
+ consume
567
+ end
568
+ end
569
+ end
570
+ end
571
+ end
572
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "css/errors"
4
+ require_relative "css/refinements"
5
+ require_relative "css/tokenizer"
6
+ require_relative "css/position"
7
+ require_relative "css/token"
8
+ require_relative "css/token_stream"
9
+ require_relative "css/parser"
10
+ require_relative "css/ast"
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniCSS
4
+ class Error < StandardError; end
5
+ class InvalidRuleError < Error; end
6
+ end