cataract 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci-manual-rubies.yml +27 -0
  3. data/.overcommit.yml +1 -1
  4. data/.rubocop.yml +62 -0
  5. data/.rubocop_todo.yml +186 -0
  6. data/BENCHMARKS.md +60 -139
  7. data/CHANGELOG.md +14 -0
  8. data/README.md +30 -2
  9. data/Rakefile +49 -22
  10. data/cataract.gemspec +4 -1
  11. data/ext/cataract/cataract.c +47 -47
  12. data/ext/cataract/css_parser.c +17 -33
  13. data/ext/cataract/merge.c +58 -2
  14. data/lib/cataract/at_rule.rb +8 -9
  15. data/lib/cataract/declaration.rb +18 -0
  16. data/lib/cataract/import_resolver.rb +3 -4
  17. data/lib/cataract/pure/byte_constants.rb +69 -0
  18. data/lib/cataract/pure/helpers.rb +35 -0
  19. data/lib/cataract/pure/imports.rb +255 -0
  20. data/lib/cataract/pure/merge.rb +1146 -0
  21. data/lib/cataract/pure/parser.rb +1236 -0
  22. data/lib/cataract/pure/serializer.rb +590 -0
  23. data/lib/cataract/pure/specificity.rb +206 -0
  24. data/lib/cataract/pure.rb +130 -0
  25. data/lib/cataract/rule.rb +22 -13
  26. data/lib/cataract/stylesheet.rb +14 -9
  27. data/lib/cataract/version.rb +1 -1
  28. data/lib/cataract.rb +18 -5
  29. metadata +12 -25
  30. data/benchmarks/benchmark_harness.rb +0 -193
  31. data/benchmarks/benchmark_merging.rb +0 -121
  32. data/benchmarks/benchmark_optimization_comparison.rb +0 -168
  33. data/benchmarks/benchmark_parsing.rb +0 -153
  34. data/benchmarks/benchmark_ragel_removal.rb +0 -56
  35. data/benchmarks/benchmark_runner.rb +0 -70
  36. data/benchmarks/benchmark_serialization.rb +0 -180
  37. data/benchmarks/benchmark_shorthand.rb +0 -109
  38. data/benchmarks/benchmark_shorthand_expansion.rb +0 -176
  39. data/benchmarks/benchmark_specificity.rb +0 -124
  40. data/benchmarks/benchmark_string_allocation.rb +0 -151
  41. data/benchmarks/benchmark_stylesheet_to_s.rb +0 -62
  42. data/benchmarks/benchmark_to_s_cached.rb +0 -55
  43. data/benchmarks/benchmark_value_splitter.rb +0 -54
  44. data/benchmarks/benchmark_yjit.rb +0 -158
  45. data/benchmarks/benchmark_yjit_workers.rb +0 -61
  46. data/benchmarks/profile_to_s.rb +0 -23
  47. data/benchmarks/speedup_calculator.rb +0 -83
  48. data/benchmarks/system_metadata.rb +0 -81
  49. data/benchmarks/templates/benchmarks.md.erb +0 -221
  50. data/benchmarks/yjit_tests.rb +0 -141
  51. data/scripts/fuzzer/run.rb +0 -828
  52. data/scripts/fuzzer/worker.rb +0 -99
  53. data/scripts/generate_benchmarks_md.rb +0 -155
@@ -0,0 +1,590 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Pure Ruby CSS parser - Serialization methods
4
+ # NO REGEXP ALLOWED - char-by-char parsing only
5
+
6
+ module Cataract
7
+ # Serialize stylesheet to compact CSS string
8
+ #
9
+ # @param rules [Array<Rule>] Array of rules
10
+ # @param media_index [Hash] Media query symbol => array of rule IDs
11
+ # @param charset [String, nil] @charset value
12
+ # @param has_nesting [Boolean] Whether any nested rules exist
13
+ # @return [String] Compact CSS string
14
+ def self._stylesheet_to_s(rules, media_index, charset, has_nesting)
15
+ result = +''
16
+
17
+ # Add @charset if present
18
+ unless charset.nil?
19
+ result << "@charset \"#{charset}\";\n"
20
+ end
21
+
22
+ # Fast path: no nesting - use simple algorithm
23
+ unless has_nesting
24
+ return stylesheet_to_s_original(rules, media_index, result)
25
+ end
26
+
27
+ # Build parent-child relationships
28
+ rule_children = {}
29
+ rules.each do |rule|
30
+ next unless rule.parent_rule_id
31
+
32
+ parent_id = rule.parent_rule_id.is_a?(Integer) ? rule.parent_rule_id : rule.parent_rule_id.to_i
33
+ rule_children[parent_id] ||= []
34
+ rule_children[parent_id] << rule
35
+ end
36
+
37
+ # Build rule_id => media_symbol map
38
+ rule_to_media = {}
39
+ media_index.each do |media_sym, rule_ids|
40
+ rule_ids.each do |rule_id|
41
+ rule_to_media[rule_id] = media_sym
42
+ end
43
+ end
44
+
45
+ # Serialize top-level rules only (those without parent_rule_id)
46
+ current_media = nil
47
+ in_media_block = false
48
+
49
+ rules.each do |rule|
50
+ # Skip rules that have a parent (they'll be serialized as nested)
51
+ next if rule.parent_rule_id
52
+
53
+ rule_media = rule_to_media[rule.id]
54
+
55
+ if rule_media.nil?
56
+ # Close any open media block
57
+ if in_media_block
58
+ result << "}\n"
59
+ in_media_block = false
60
+ current_media = nil
61
+ end
62
+ else
63
+ # Media query
64
+ if current_media.nil? || current_media != rule_media
65
+ if in_media_block
66
+ result << "}\n"
67
+ end
68
+ current_media = rule_media
69
+ result << "@media #{current_media} {\n"
70
+ in_media_block = true
71
+ end
72
+ end
73
+
74
+ serialize_rule_with_nesting(result, rule, rule_children, rule_to_media)
75
+ end
76
+
77
+ if in_media_block
78
+ result << "}\n"
79
+ end
80
+
81
+ result
82
+ end
83
+
84
+ # Helper: serialize rules without nesting support
85
+ def self.stylesheet_to_s_original(rules, media_index, result)
86
+ # Build rule_id => media_symbol map
87
+ rule_to_media = {}
88
+ media_index.each do |media_sym, rule_ids|
89
+ rule_ids.each do |rule_id|
90
+ rule_to_media[rule_id] = media_sym
91
+ end
92
+ end
93
+
94
+ # Iterate through rules in insertion order, grouping consecutive media queries
95
+ current_media = nil
96
+ in_media_block = false
97
+
98
+ rules.each do |rule|
99
+ rule_media = rule_to_media[rule.id]
100
+
101
+ if rule_media.nil?
102
+ # Not in any media query - close any open media block first
103
+ if in_media_block
104
+ result << "}\n"
105
+ in_media_block = false
106
+ current_media = nil
107
+ end
108
+ else
109
+ # This rule is in a media query
110
+ # Check if media query changed from previous rule
111
+ if current_media.nil? || current_media != rule_media
112
+ # Close previous media block if open
113
+ if in_media_block
114
+ result << "}\n"
115
+ end
116
+
117
+ # Open new media block
118
+ current_media = rule_media
119
+ result << "@media #{current_media} {\n"
120
+ in_media_block = true
121
+ end
122
+ end
123
+
124
+ serialize_rule(result, rule)
125
+ end
126
+
127
+ # Close final media block if still open
128
+ if in_media_block
129
+ result << "}\n"
130
+ end
131
+
132
+ result
133
+ end
134
+
135
+ # Helper: serialize a rule with its nested children
136
+ def self.serialize_rule_with_nesting(result, rule, rule_children, rule_to_media)
137
+ # Start selector
138
+ result << "#{rule.selector} { "
139
+
140
+ # Serialize declarations
141
+ has_declarations = !rule.declarations.empty?
142
+ if has_declarations
143
+ serialize_declarations(result, rule.declarations)
144
+ end
145
+
146
+ # Get nested children for this rule
147
+ children = rule_children[rule.id] || []
148
+
149
+ # Serialize nested children
150
+ children.each_with_index do |child, index|
151
+ # Add space before nested content
152
+ # - Always add space if we had declarations
153
+ # - Add space between nested rules (not before first if no declarations)
154
+ if has_declarations || index > 0
155
+ result << ' '
156
+ end
157
+
158
+ # Determine if we need to reconstruct the nested selector with &
159
+ nested_selector = reconstruct_nested_selector(rule.selector, child.selector, child.nesting_style)
160
+
161
+ # Check if this child has @media nesting (parent_rule_id present but nesting_style is nil)
162
+ if child.nesting_style.nil? && rule_to_media[child.id]
163
+ # This is a nested @media rule
164
+ media_sym = rule_to_media[child.id]
165
+ result << "@media #{media_sym} { "
166
+ serialize_declarations(result, child.declarations)
167
+
168
+ # Recursively serialize any children of this @media rule
169
+ media_children = rule_children[child.id] || []
170
+ media_children.each_with_index do |media_child, media_idx|
171
+ result << ' ' if media_idx > 0 || !child.declarations.empty?
172
+
173
+ nested_media_selector = reconstruct_nested_selector(
174
+ child.selector, media_child.selector,
175
+ media_child.nesting_style
176
+ )
177
+
178
+ result << "#{nested_media_selector} { "
179
+ serialize_declarations(result, media_child.declarations)
180
+ result << ' }'
181
+ end
182
+
183
+ result << ' }'
184
+ else
185
+ # Regular nested selector
186
+ result << "#{nested_selector} { "
187
+ serialize_declarations(result, child.declarations)
188
+
189
+ # Recursively serialize any children of this nested rule
190
+ grandchildren = rule_children[child.id] || []
191
+ grandchildren.each_with_index do |grandchild, grandchild_idx|
192
+ result << ' ' if grandchild_idx > 0 || !child.declarations.empty?
193
+
194
+ nested_grandchild_selector = reconstruct_nested_selector(
195
+ child.selector,
196
+ grandchild.selector,
197
+ grandchild.nesting_style
198
+ )
199
+
200
+ result << "#{nested_grandchild_selector} { "
201
+ serialize_declarations(result, grandchild.declarations)
202
+ result << ' }'
203
+ end
204
+
205
+ result << ' }'
206
+ end
207
+ end
208
+
209
+ result << " }\n"
210
+ end
211
+
212
+ # Reconstruct nested selector representation
213
+ # If nesting_style == 1 (explicit), try to use & notation
214
+ # If nesting_style == 0 (implicit), use plain selector
215
+ def self.reconstruct_nested_selector(parent_selector, child_selector, nesting_style)
216
+ return child_selector if nesting_style.nil?
217
+
218
+ if nesting_style == 1 # NESTING_STYLE_EXPLICIT
219
+ # Try to reconstruct & notation
220
+ # ".parent .child" with parent ".parent" => "& .child"
221
+ # ".parent:hover" with parent ".parent" => "&:hover"
222
+ if child_selector.start_with?(parent_selector)
223
+ rest = child_selector[parent_selector.length..-1]
224
+ return "&#{rest}"
225
+ end
226
+ # More complex cases like ".parent .foo .child"
227
+ child_selector.sub(parent_selector, '&')
228
+ else # NESTING_STYLE_IMPLICIT
229
+ # Remove parent prefix for implicit nesting
230
+ # ".parent .child" with parent ".parent" => ".child"
231
+ if child_selector.start_with?(parent_selector)
232
+ rest = child_selector[parent_selector.length..-1]
233
+ return rest.lstrip
234
+ end
235
+ child_selector
236
+ end
237
+ end
238
+
239
+ # Helper: serialize a single rule
240
+ def self.serialize_rule(result, rule)
241
+ # Check if this is an AtRule
242
+ if rule.is_a?(AtRule)
243
+ serialize_at_rule(result, rule)
244
+ return
245
+ end
246
+
247
+ # Regular Rule serialization
248
+ result << "#{rule.selector} { "
249
+ serialize_declarations(result, rule.declarations)
250
+ result << " }\n"
251
+ end
252
+
253
+ # Helper: serialize declarations (compact, single line)
254
+ def self.serialize_declarations(result, declarations)
255
+ declarations.each_with_index do |decl, i|
256
+ important_suffix = decl.important ? ' !important;' : ';'
257
+ separator = i < declarations.length - 1 ? ' ' : ''
258
+ result << "#{decl.property}: #{decl.value}#{important_suffix}#{separator}"
259
+ end
260
+ end
261
+
262
+ # Helper: serialize declarations (formatted, one per line)
263
+ def self.serialize_declarations_formatted(result, declarations, indent)
264
+ declarations.each do |decl|
265
+ result << indent
266
+ result << decl.property
267
+ result << ': '
268
+ result << decl.value
269
+
270
+ if decl.important
271
+ result << ' !important'
272
+ end
273
+
274
+ result << ";\n"
275
+ end
276
+ end
277
+
278
+ # Helper: serialize an at-rule (@keyframes, @font-face, etc)
279
+ def self.serialize_at_rule(result, at_rule)
280
+ result << "#{at_rule.selector} {\n"
281
+
282
+ # Check if content is rules or declarations
283
+ if at_rule.content.length > 0
284
+ first = at_rule.content[0]
285
+
286
+ if first.is_a?(Rule)
287
+ # Serialize as nested rules (e.g., @keyframes)
288
+ at_rule.content.each do |nested_rule|
289
+ result << " #{nested_rule.selector} { "
290
+ serialize_declarations(result, nested_rule.declarations)
291
+ result << " }\n"
292
+ end
293
+ else
294
+ # Serialize as declarations (e.g., @font-face)
295
+ result << ' '
296
+ serialize_declarations(result, at_rule.content)
297
+ result << "\n"
298
+ end
299
+ end
300
+
301
+ result << "}\n"
302
+ end
303
+
304
+ # Serialize stylesheet to formatted CSS string (with indentation)
305
+ #
306
+ # @param rules [Array<Rule>] Array of rules
307
+ # @param media_index [Hash] Media query symbol => array of rule IDs
308
+ # @param charset [String, nil] @charset value
309
+ # @param has_nesting [Boolean] Whether any nested rules exist
310
+ # @return [String] Formatted CSS string
311
+ def self._stylesheet_to_formatted_s(rules, media_index, charset, has_nesting)
312
+ result = +''
313
+
314
+ # Add @charset if present
315
+ unless charset.nil?
316
+ result << "@charset \"#{charset}\";\n"
317
+ end
318
+
319
+ # Fast path: no nesting - use simple algorithm
320
+ unless has_nesting
321
+ return stylesheet_to_formatted_s_original(rules, media_index, result)
322
+ end
323
+
324
+ # Build parent-child relationships
325
+ rule_children = {}
326
+ rules.each do |rule|
327
+ next unless rule.parent_rule_id
328
+
329
+ parent_id = rule.parent_rule_id.is_a?(Integer) ? rule.parent_rule_id : rule.parent_rule_id.to_i
330
+ rule_children[parent_id] ||= []
331
+ rule_children[parent_id] << rule
332
+ end
333
+
334
+ # Build rule_id => media_symbol map
335
+ rule_to_media = {}
336
+ media_index.each do |media_sym, rule_ids|
337
+ rule_ids.each do |rule_id|
338
+ rule_to_media[rule_id] = media_sym
339
+ end
340
+ end
341
+
342
+ # Serialize top-level rules only
343
+ current_media = nil
344
+ in_media_block = false
345
+
346
+ rules.each do |rule|
347
+ next if rule.parent_rule_id
348
+
349
+ rule_media = rule_to_media[rule.id]
350
+
351
+ if rule_media.nil?
352
+ if in_media_block
353
+ result << "}\n"
354
+ in_media_block = false
355
+ current_media = nil
356
+ end
357
+
358
+ serialize_rule_with_nesting_formatted(result, rule, rule_children, rule_to_media, '')
359
+ else
360
+ if current_media.nil? || current_media != rule_media
361
+ if in_media_block
362
+ result << "}\n"
363
+ elsif result.length > 0
364
+ result << "\n"
365
+ end
366
+ current_media = rule_media
367
+ result << "@media #{current_media} {\n"
368
+ in_media_block = true
369
+ end
370
+
371
+ serialize_rule_with_nesting_formatted(result, rule, rule_children, rule_to_media, ' ')
372
+ end
373
+ end
374
+
375
+ if in_media_block
376
+ result << "}\n"
377
+ end
378
+
379
+ result
380
+ end
381
+
382
+ # Helper: formatted serialization without nesting support
383
+ def self.stylesheet_to_formatted_s_original(rules, media_index, result)
384
+ # Build rule_id => media_symbol map
385
+ rule_to_media = {}
386
+ media_index.each do |media_sym, rule_ids|
387
+ rule_ids.each do |rule_id|
388
+ rule_to_media[rule_id] = media_sym
389
+ end
390
+ end
391
+
392
+ # Iterate through rules, grouping consecutive media queries
393
+ current_media = nil
394
+ in_media_block = false
395
+ rule_index = 0
396
+
397
+ rules.each do |rule|
398
+ rule_media = rule_to_media[rule.id]
399
+ is_first_rule = (rule_index == 0)
400
+
401
+ if rule_media.nil?
402
+ # Not in any media query - close any open media block first
403
+ if in_media_block
404
+ result << "}\n"
405
+ in_media_block = false
406
+ current_media = nil
407
+ end
408
+
409
+ # Add blank line prefix for non-first rules
410
+ result << "\n" unless is_first_rule
411
+
412
+ # Output rule with no indentation (always single newline suffix)
413
+ serialize_rule_formatted(result, rule, '', true)
414
+ else
415
+ # This rule is in a media query
416
+ if current_media.nil? || current_media != rule_media
417
+ # Close previous media block if open
418
+ if in_media_block
419
+ result << "}\n"
420
+ end
421
+
422
+ # Add blank line prefix for non-first rules
423
+ result << "\n" unless is_first_rule
424
+
425
+ # Open new media block
426
+ current_media = rule_media
427
+ result << "@media #{current_media} {\n"
428
+ in_media_block = true
429
+ end
430
+
431
+ # Serialize rule inside media block with 2-space indentation
432
+ # Rules inside media blocks always get single newline (is_last=true)
433
+ serialize_rule_formatted(result, rule, ' ', true)
434
+ end
435
+
436
+ rule_index += 1
437
+ end
438
+
439
+ # Close final media block if still open
440
+ if in_media_block
441
+ result << "}\n"
442
+ end
443
+
444
+ result
445
+ end
446
+
447
+ # Helper: serialize a rule with nested children (formatted)
448
+ def self.serialize_rule_with_nesting_formatted(result, rule, rule_children, rule_to_media, indent)
449
+ # Selector line with opening brace
450
+ result << indent
451
+ result << rule.selector
452
+ result << " {\n"
453
+
454
+ # Serialize declarations (one per line)
455
+ unless rule.declarations.empty?
456
+ serialize_declarations_formatted(result, rule.declarations, "#{indent} ")
457
+ end
458
+
459
+ # Get nested children
460
+ children = rule_children[rule.id] || []
461
+
462
+ # Serialize nested children
463
+ children.each do |child|
464
+ nested_selector = reconstruct_nested_selector(rule.selector, child.selector, child.nesting_style)
465
+
466
+ if child.nesting_style.nil? && rule_to_media[child.id]
467
+ # Nested @media
468
+ media_sym = rule_to_media[child.id]
469
+ result << indent
470
+ result << " @media #{media_sym} {\n"
471
+
472
+ unless child.declarations.empty?
473
+ serialize_declarations_formatted(result, child.declarations, "#{indent} ")
474
+ end
475
+
476
+ # Recursively handle media children
477
+ media_children = rule_children[child.id] || []
478
+ media_children.each do |media_child|
479
+ nested_media_selector = reconstruct_nested_selector(
480
+ child.selector,
481
+ media_child.selector,
482
+ media_child.nesting_style
483
+ )
484
+
485
+ result << indent
486
+ result << " #{nested_media_selector} {\n"
487
+ unless media_child.declarations.empty?
488
+ serialize_declarations_formatted(result, media_child.declarations, "#{indent} ")
489
+ end
490
+ result << indent
491
+ result << " }\n"
492
+ end
493
+
494
+ result << indent
495
+ result << " }\n"
496
+ else
497
+ # Regular nested selector
498
+ result << indent
499
+ result << " #{nested_selector} {\n"
500
+
501
+ unless child.declarations.empty?
502
+ serialize_declarations_formatted(result, child.declarations, "#{indent} ")
503
+ end
504
+
505
+ # Recursively handle grandchildren
506
+ grandchildren = rule_children[child.id] || []
507
+ grandchildren.each do |grandchild|
508
+ nested_grandchild_selector = reconstruct_nested_selector(
509
+ child.selector,
510
+ grandchild.selector,
511
+ grandchild.nesting_style
512
+ )
513
+
514
+ result << indent
515
+ result << " #{nested_grandchild_selector} {\n"
516
+ unless grandchild.declarations.empty?
517
+ serialize_declarations_formatted(result, grandchild.declarations, "#{indent} ")
518
+ end
519
+ result << indent
520
+ result << " }\n"
521
+ end
522
+
523
+ result << indent
524
+ result << " }\n"
525
+ end
526
+ end
527
+
528
+ # Closing brace
529
+ result << indent
530
+ result << "}\n"
531
+ end
532
+
533
+ # Helper: serialize a single rule with formatting
534
+ def self.serialize_rule_formatted(result, rule, indent, is_last_rule = false)
535
+ # Check if this is an AtRule
536
+ if rule.is_a?(AtRule)
537
+ serialize_at_rule_formatted(result, rule, indent)
538
+ return
539
+ end
540
+
541
+ # Regular Rule serialization with formatting
542
+ # Selector line with opening brace
543
+ result << indent
544
+ result << rule.selector
545
+ result << " {\n"
546
+
547
+ # Declarations (one per line)
548
+ serialize_declarations_formatted(result, rule.declarations, "#{indent} ")
549
+
550
+ # Closing brace - double newline for all except last rule
551
+ result << indent
552
+ result << (is_last_rule ? "}\n" : "}\n\n")
553
+ end
554
+
555
+ # Helper: serialize an at-rule with formatting
556
+ def self.serialize_at_rule_formatted(result, at_rule, indent)
557
+ result << indent
558
+ result << at_rule.selector
559
+ result << " {\n"
560
+
561
+ # Check if content is rules or declarations
562
+ if at_rule.content.length > 0
563
+ first = at_rule.content[0]
564
+
565
+ if first.is_a?(Rule)
566
+ # Serialize as nested rules (e.g., @keyframes) with formatting
567
+ at_rule.content.each do |nested_rule|
568
+ # Nested selector with opening brace (2-space indent)
569
+ result << indent
570
+ result << ' '
571
+ result << nested_rule.selector
572
+ result << " {\n"
573
+
574
+ # Declarations (one per line, 4-space indent)
575
+ serialize_declarations_formatted(result, nested_rule.declarations, "#{indent} ")
576
+
577
+ # Closing brace (2-space indent)
578
+ result << indent
579
+ result << " }\n"
580
+ end
581
+ else
582
+ # Serialize as declarations (e.g., @font-face, one per line)
583
+ serialize_declarations_formatted(result, at_rule.content, "#{indent} ")
584
+ end
585
+ end
586
+
587
+ result << indent
588
+ result << "}\n"
589
+ end
590
+ end