cataract 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-tidy +30 -0
  3. data/.github/workflows/ci-macos.yml +12 -0
  4. data/.github/workflows/ci.yml +77 -0
  5. data/.github/workflows/test.yml +76 -0
  6. data/.gitignore +45 -0
  7. data/.overcommit.yml +38 -0
  8. data/.rubocop.yml +83 -0
  9. data/BENCHMARKS.md +201 -0
  10. data/CHANGELOG.md +1 -0
  11. data/Gemfile +27 -0
  12. data/LICENSE +21 -0
  13. data/RAGEL_MIGRATION.md +60 -0
  14. data/README.md +292 -0
  15. data/Rakefile +209 -0
  16. data/benchmarks/benchmark_harness.rb +193 -0
  17. data/benchmarks/benchmark_merging.rb +121 -0
  18. data/benchmarks/benchmark_optimization_comparison.rb +168 -0
  19. data/benchmarks/benchmark_parsing.rb +153 -0
  20. data/benchmarks/benchmark_ragel_removal.rb +56 -0
  21. data/benchmarks/benchmark_runner.rb +70 -0
  22. data/benchmarks/benchmark_serialization.rb +180 -0
  23. data/benchmarks/benchmark_shorthand.rb +109 -0
  24. data/benchmarks/benchmark_shorthand_expansion.rb +176 -0
  25. data/benchmarks/benchmark_specificity.rb +124 -0
  26. data/benchmarks/benchmark_string_allocation.rb +151 -0
  27. data/benchmarks/benchmark_stylesheet_to_s.rb +62 -0
  28. data/benchmarks/benchmark_to_s_cached.rb +55 -0
  29. data/benchmarks/benchmark_value_splitter.rb +54 -0
  30. data/benchmarks/benchmark_yjit.rb +158 -0
  31. data/benchmarks/benchmark_yjit_workers.rb +61 -0
  32. data/benchmarks/profile_to_s.rb +23 -0
  33. data/benchmarks/speedup_calculator.rb +83 -0
  34. data/benchmarks/system_metadata.rb +81 -0
  35. data/benchmarks/templates/benchmarks.md.erb +221 -0
  36. data/benchmarks/yjit_tests.rb +141 -0
  37. data/cataract.gemspec +34 -0
  38. data/cliff.toml +92 -0
  39. data/examples/color_conversion_visual_test/color_conversion_test.html +3603 -0
  40. data/examples/color_conversion_visual_test/generate.rb +202 -0
  41. data/examples/color_conversion_visual_test/template.html.erb +259 -0
  42. data/examples/css_analyzer/analyzer.rb +164 -0
  43. data/examples/css_analyzer/analyzers/base.rb +33 -0
  44. data/examples/css_analyzer/analyzers/colors.rb +133 -0
  45. data/examples/css_analyzer/analyzers/important.rb +88 -0
  46. data/examples/css_analyzer/analyzers/properties.rb +61 -0
  47. data/examples/css_analyzer/analyzers/specificity.rb +68 -0
  48. data/examples/css_analyzer/templates/report.html.erb +575 -0
  49. data/examples/css_analyzer.rb +69 -0
  50. data/examples/github_analysis.html +5343 -0
  51. data/ext/cataract/cataract.c +1086 -0
  52. data/ext/cataract/cataract.h +174 -0
  53. data/ext/cataract/css_parser.c +1435 -0
  54. data/ext/cataract/extconf.rb +48 -0
  55. data/ext/cataract/import_scanner.c +174 -0
  56. data/ext/cataract/merge.c +973 -0
  57. data/ext/cataract/shorthand_expander.c +902 -0
  58. data/ext/cataract/specificity.c +213 -0
  59. data/ext/cataract/value_splitter.c +116 -0
  60. data/ext/cataract_color/cataract_color.c +16 -0
  61. data/ext/cataract_color/color_conversion.c +1687 -0
  62. data/ext/cataract_color/color_conversion.h +136 -0
  63. data/ext/cataract_color/color_conversion_lab.c +571 -0
  64. data/ext/cataract_color/color_conversion_named.c +259 -0
  65. data/ext/cataract_color/color_conversion_oklab.c +547 -0
  66. data/ext/cataract_color/extconf.rb +23 -0
  67. data/ext/cataract_old/cataract.c +393 -0
  68. data/ext/cataract_old/cataract.h +250 -0
  69. data/ext/cataract_old/css_parser.c +933 -0
  70. data/ext/cataract_old/extconf.rb +67 -0
  71. data/ext/cataract_old/import_scanner.c +174 -0
  72. data/ext/cataract_old/merge.c +776 -0
  73. data/ext/cataract_old/shorthand_expander.c +902 -0
  74. data/ext/cataract_old/specificity.c +213 -0
  75. data/ext/cataract_old/stylesheet.c +290 -0
  76. data/ext/cataract_old/value_splitter.c +116 -0
  77. data/lib/cataract/at_rule.rb +97 -0
  78. data/lib/cataract/color_conversion.rb +18 -0
  79. data/lib/cataract/declarations.rb +332 -0
  80. data/lib/cataract/import_resolver.rb +210 -0
  81. data/lib/cataract/rule.rb +131 -0
  82. data/lib/cataract/stylesheet.rb +716 -0
  83. data/lib/cataract/stylesheet_scope.rb +257 -0
  84. data/lib/cataract/version.rb +5 -0
  85. data/lib/cataract.rb +107 -0
  86. data/lib/tasks/gem.rake +158 -0
  87. data/scripts/fuzzer/run.rb +828 -0
  88. data/scripts/fuzzer/worker.rb +99 -0
  89. data/scripts/generate_benchmarks_md.rb +155 -0
  90. metadata +135 -0
@@ -0,0 +1,1435 @@
1
+ /*
2
+ * css_parser_new.c - New CSS parser implementation with flat rule array
3
+ *
4
+ * Key differences from original:
5
+ * - Flat @rules array with rule IDs (0-indexed)
6
+ * - Separate @media_index hash mapping media queries to rule ID arrays
7
+ * - Handles nested @media queries by combining conditions
8
+ *
9
+ * TODO: Unify !important detection into a macro/helper function
10
+ * Currently duplicated in parse_declarations() and parse_mixed_block()
11
+ */
12
+
13
+ #include "cataract.h"
14
+ #include <string.h>
15
+
16
+ // Parser context passed through recursive calls
17
+ typedef struct {
18
+ VALUE rules_array; // Array of Rule structs
19
+ VALUE media_index; // Hash: Symbol => Array of rule IDs
20
+ int rule_id_counter; // Next rule ID (0-indexed)
21
+ int media_query_count; // Safety limit for media queries
22
+ st_table *media_cache; // Parse-time cache: string => parsed media types
23
+ int has_nesting; // Set to 1 if any nested rules are created
24
+ int depth; // Current recursion depth (safety limit)
25
+ } ParserContext;
26
+
27
+ // Macro to skip CSS comments /* ... */
28
+ // Usage: SKIP_COMMENT(p, end) where p is current position, end is limit
29
+ // Side effect: advances p past the comment and continues to next iteration
30
+ // Note: Uses RB_UNLIKELY since comments are rare in typical CSS
31
+ #define SKIP_COMMENT(ptr, limit) \
32
+ if (RB_UNLIKELY((ptr) + 1 < (limit) && *(ptr) == '/' && *((ptr) + 1) == '*')) { \
33
+ (ptr) += 2; \
34
+ while ((ptr) + 1 < (limit) && !(*(ptr) == '*' && *((ptr) + 1) == '/')) (ptr)++; \
35
+ if ((ptr) + 1 < (limit)) (ptr) += 2; \
36
+ continue; \
37
+ }
38
+
39
+ // Find matching closing brace for a block
40
+ // Input: start = position after opening '{', end = limit
41
+ // Returns: pointer to matching '}' (or end if not found)
42
+ // Note: Handles nested braces by tracking depth
43
+ static inline const char* find_matching_brace(const char *start, const char *end) {
44
+ int depth = 1;
45
+ const char *p = start;
46
+ while (p < end && depth > 0) {
47
+ if (*p == '{') depth++;
48
+ else if (*p == '}') depth--;
49
+ if (depth > 0) p++;
50
+ }
51
+ return p;
52
+ }
53
+
54
+ // Find matching closing paren
55
+ // Input: start = position after opening '(', end = limit
56
+ // Returns: pointer to matching ')' (or end if not found)
57
+ // Note: Handles nested parens by tracking depth
58
+ static inline const char* find_matching_paren(const char *start, const char *end) {
59
+ int depth = 1;
60
+ const char *p = start;
61
+ while (p < end && depth > 0) {
62
+ if (*p == '(') depth++;
63
+ else if (*p == ')') depth--;
64
+ if (depth > 0) p++;
65
+ }
66
+ return p;
67
+ }
68
+
69
+ // Lowercase property name (CSS property names are ASCII-only)
70
+ // Non-static so merge_new.c can use it
71
+ VALUE lowercase_property(VALUE property_str) {
72
+ Check_Type(property_str, T_STRING);
73
+
74
+ long len = RSTRING_LEN(property_str);
75
+ const char *src = RSTRING_PTR(property_str);
76
+
77
+ VALUE result = rb_str_buf_new(len);
78
+ rb_enc_associate(result, rb_usascii_encoding());
79
+
80
+ for (long i = 0; i < len; i++) {
81
+ char c = src[i];
82
+ if (c >= 'A' && c <= 'Z') {
83
+ c += 32; // Lowercase
84
+ }
85
+ rb_str_buf_cat(result, &c, 1);
86
+ }
87
+
88
+ return result;
89
+ }
90
+
91
+ /*
92
+ * Check if a block contains nested selectors (not just declarations)
93
+ *
94
+ * Per W3C spec, nested selectors cannot start with identifiers to avoid ambiguity.
95
+ * They must start with: &, ., #, [, :, *, >, +, ~, or @media/@supports/etc
96
+ *
97
+ * Example CSS blocks:
98
+ * "color: red; font-size: 14px;" -> 0 (declarations only)
99
+ * "color: red; & .child { ... }" -> 1 (has nested selector)
100
+ * "color: red; @media (...) { ... }" -> 1 (has nested @media)
101
+ *
102
+ * Returns: 1 if nested selectors found, 0 if only declarations
103
+ */
104
+ static int has_nested_selectors(const char *start, const char *end) {
105
+ const char *p = start;
106
+
107
+ while (p < end) {
108
+ // Skip whitespace
109
+ trim_leading(&p, end);
110
+ if (p >= end) break;
111
+
112
+ // Skip comments
113
+ SKIP_COMMENT(p, end);
114
+
115
+ // Check for nested selector indicators
116
+ // Example: "color: red; & .child { font: 14px; }"
117
+ // ^p (at &) - nested selector indicator
118
+ char c = *p;
119
+ if (c == '&' || c == '.' || c == '#' || c == '[' || c == ':' ||
120
+ c == '*' || c == '>' || c == '+' || c == '~') {
121
+ // Look ahead - if followed by {, it's likely a nested selector
122
+ // Example: "& .child { font: 14px; }"
123
+ // ^p ^lookahead (at {) - confirms nested selector
124
+ const char *lookahead = p + 1;
125
+ while (lookahead < end && *lookahead != '{' && *lookahead != ';' && *lookahead != '\n') {
126
+ lookahead++;
127
+ }
128
+ if (lookahead < end && *lookahead == '{') {
129
+ return 1; // Found nested selector
130
+ }
131
+ }
132
+
133
+ // Check for @media, @supports, etc nested inside
134
+ // Example: "color: red; @media (min-width: 768px) { ... }"
135
+ // ^p (at @) - nested at-rule
136
+ if (c == '@') {
137
+ return 1; // Nested at-rule
138
+ }
139
+
140
+ // Skip to next line or semicolon
141
+ // Example: "color: red; font-size: 14px;"
142
+ // ^p ^p (after skip) - continue checking
143
+ while (p < end && *p != ';' && *p != '\n') p++;
144
+ if (p < end) p++;
145
+ }
146
+
147
+ return 0; // No nested selectors found
148
+ }
149
+
150
+ /*
151
+ * Resolve nested selector against parent selector
152
+ *
153
+ * Examples:
154
+ * resolve_nested_selector(".parent", "& .child") => ".parent .child" (explicit)
155
+ * resolve_nested_selector(".parent", "&:hover") => ".parent:hover" (explicit)
156
+ * resolve_nested_selector(".parent", "&.active") => ".parent.active" (explicit)
157
+ * resolve_nested_selector(".parent", ".child") => ".parent .child" (implicit)
158
+ * resolve_nested_selector(".parent", "> .child") => ".parent > .child" (implicit combinator)
159
+ *
160
+ * Returns: [resolved_selector (String), nesting_style (Fixnum)]
161
+ * nesting_style: 0 = NESTING_STYLE_IMPLICIT, 1 = NESTING_STYLE_EXPLICIT
162
+ */
163
+ static VALUE resolve_nested_selector(VALUE parent_selector, const char *nested_sel, long nested_len) {
164
+ const char *parent = RSTRING_PTR(parent_selector);
165
+ long parent_len = RSTRING_LEN(parent_selector);
166
+
167
+ // Check if nested selector contains &
168
+ int has_ampersand = 0;
169
+ for (long i = 0; i < nested_len; i++) {
170
+ if (nested_sel[i] == '&') {
171
+ has_ampersand = 1;
172
+ break;
173
+ }
174
+ }
175
+
176
+ VALUE resolved;
177
+ int nesting_style;
178
+
179
+ if (has_ampersand) {
180
+ // Explicit nesting - replace & with parent
181
+ // Example: parent=".button", nested="&:hover" => ".button:hover"
182
+ // &:hover
183
+ // ^ - Replace & with ".button"
184
+ // ^^^^^^ - Copy rest as-is
185
+ nesting_style = NESTING_STYLE_EXPLICIT;
186
+
187
+ // Check if selector starts with a combinator (relative selector)
188
+ // Example: "+ .bar + &" should become ".foo + .bar + .foo"
189
+ const char *nested_trimmed = nested_sel;
190
+ const char *nested_trimmed_end = nested_sel + nested_len;
191
+ trim_leading(&nested_trimmed, nested_trimmed_end);
192
+
193
+ int starts_with_combinator = 0;
194
+ if (nested_trimmed < nested_trimmed_end) {
195
+ char first_char = *nested_trimmed;
196
+ if (first_char == '+' || first_char == '>' || first_char == '~') {
197
+ starts_with_combinator = 1;
198
+ }
199
+ }
200
+
201
+ // Build result by replacing & with parent (add extra space if starts with combinator)
202
+ VALUE result = rb_str_buf_new(parent_len + nested_len + (starts_with_combinator ? parent_len + 2 : 0));
203
+ rb_enc_associate(result, rb_utf8_encoding());
204
+
205
+ // If starts with combinator, prepend parent first with space
206
+ // Example: "+ .bar + &" => ".foo + .bar + .foo"
207
+ if (starts_with_combinator) {
208
+ rb_str_buf_cat(result, parent, parent_len);
209
+ rb_str_buf_cat(result, " ", 1);
210
+ }
211
+
212
+ long i = 0;
213
+ while (i < nested_len) {
214
+ if (nested_sel[i] == '&') { // At: '&'
215
+ // Replace & with parent selector
216
+ rb_str_buf_cat(result, parent, parent_len); // Output: ".button"
217
+ i++; // Move to: ':'
218
+ } else {
219
+ // Copy character as-is
220
+ rb_str_buf_cat(result, &nested_sel[i], 1); // Output: ':hover'
221
+ i++;
222
+ }
223
+ }
224
+
225
+ resolved = result;
226
+ } else {
227
+ // Implicit nesting - prepend parent with appropriate spacing
228
+ // Example: parent=".parent", nested=".child" => ".parent .child"
229
+ // .child
230
+ // - Prepend ".parent " before ".child"
231
+ // Example: parent=".parent", nested="> .child" => ".parent > .child"
232
+ // > .child
233
+ // - Prepend ".parent " before "> .child"
234
+ nesting_style = NESTING_STYLE_IMPLICIT;
235
+
236
+ const char *nested_trimmed = nested_sel;
237
+ const char *nested_end = nested_sel + nested_len;
238
+
239
+ // Trim leading whitespace from nested selector
240
+ trim_leading(&nested_trimmed, nested_end);
241
+ long trimmed_len = nested_end - nested_trimmed;
242
+
243
+ VALUE result = rb_str_buf_new(parent_len + 1 + trimmed_len);
244
+ rb_enc_associate(result, rb_utf8_encoding());
245
+
246
+ // Add parent // Output: ".parent"
247
+ rb_str_buf_cat(result, parent, parent_len);
248
+
249
+ // Add separator space (before combinator or for implicit descendant) // Output: " "
250
+ rb_str_buf_cat(result, " ", 1);
251
+
252
+ // Add nested selector (trimmed) // Output: ".child"
253
+ rb_str_buf_cat(result, nested_trimmed, trimmed_len);
254
+
255
+ resolved = result;
256
+ }
257
+
258
+ // Return array [resolved_selector, nesting_style]
259
+ VALUE result_array = rb_ary_new_from_args(2, resolved, INT2FIX(nesting_style));
260
+
261
+ // Guard parent_selector since we extracted C pointer and did allocations
262
+ RB_GC_GUARD(parent_selector);
263
+
264
+ return result_array;
265
+ }
266
+
267
+ /*
268
+ * Extract media types from a media query string
269
+ * Examples:
270
+ * "screen" => [:screen]
271
+ * "screen, print" => [:screen, :print]
272
+ * "screen and (min-width: 768px)" => [:screen]
273
+ * "(min-width: 768px)" => [] // No media type, just condition
274
+ *
275
+ * Returns: Ruby array of symbols
276
+ */
277
+ static VALUE extract_media_types(const char *query, long query_len) {
278
+ VALUE types = rb_ary_new();
279
+
280
+ const char *p = query;
281
+ const char *end = query + query_len;
282
+
283
+ while (p < end) {
284
+ // Skip whitespace
285
+ while (p < end && IS_WHITESPACE(*p)) p++;
286
+ if (p >= end) break;
287
+
288
+ // Check for opening paren (skip conditions like "(min-width: 768px)")
289
+ if (*p == '(') {
290
+ // Skip to matching closing paren
291
+ const char *closing = find_matching_paren(p, end);
292
+ p = (closing < end) ? closing + 1 : closing;
293
+ continue;
294
+ }
295
+
296
+ // Find end of word (media type or keyword)
297
+ const char *word_start = p;
298
+ while (p < end && !IS_WHITESPACE(*p) && *p != ',' && *p != '(' && *p != ':') {
299
+ p++;
300
+ }
301
+
302
+ if (p > word_start) {
303
+ long word_len = p - word_start;
304
+
305
+ // Check if this is a media feature (followed by ':')
306
+ // Example: "orientation" in "orientation: landscape" is not a media type
307
+ int is_media_feature = (p < end && *p == ':');
308
+
309
+ // Check if it's a keyword (and, or, not, only)
310
+ int is_keyword = (word_len == 3 && strncmp(word_start, "and", 3) == 0) ||
311
+ (word_len == 2 && strncmp(word_start, "or", 2) == 0) ||
312
+ (word_len == 3 && strncmp(word_start, "not", 3) == 0) ||
313
+ (word_len == 4 && strncmp(word_start, "only", 4) == 0);
314
+
315
+ if (!is_keyword && !is_media_feature) {
316
+ // This is a media type - add it as symbol
317
+ VALUE type_sym = ID2SYM(rb_intern2(word_start, word_len));
318
+ rb_ary_push(types, type_sym);
319
+ }
320
+ }
321
+
322
+ // Skip to comma or end
323
+ while (p < end && *p != ',') {
324
+ if (*p == '(') {
325
+ // Skip condition
326
+ const char *closing = find_matching_paren(p, end);
327
+ p = (closing < end) ? closing + 1 : closing;
328
+ } else {
329
+ p++;
330
+ }
331
+ }
332
+
333
+ if (p < end && *p == ',') p++; // Skip comma
334
+ }
335
+
336
+ return types;
337
+ }
338
+
339
+ /*
340
+ * Add rule ID to media index for a given media query symbol
341
+ * Creates array if it doesn't exist yet
342
+ */
343
+ static void add_to_media_index(VALUE media_index, VALUE media_sym, int rule_id) {
344
+ VALUE rule_ids = rb_hash_aref(media_index, media_sym);
345
+
346
+ if (NIL_P(rule_ids)) {
347
+ rule_ids = rb_ary_new();
348
+ rb_hash_aset(media_index, media_sym, rule_ids);
349
+ }
350
+
351
+ rb_ary_push(rule_ids, INT2FIX(rule_id));
352
+ }
353
+
354
+ /*
355
+ * Update media index with rule ID for given media query
356
+ * Extracts media types and adds rule to each type's array
357
+ * Also adds to the full query symbol
358
+ */
359
+ static void update_media_index(ParserContext *ctx, VALUE media_sym, int rule_id) {
360
+ if (NIL_P(media_sym)) {
361
+ return; // No media query - rule applies to all media
362
+ }
363
+
364
+ // Add to full query symbol
365
+ add_to_media_index(ctx->media_index, media_sym, rule_id);
366
+
367
+ // Extract media types and add to each (if different from full query)
368
+ VALUE media_str = rb_sym2str(media_sym);
369
+ const char *query = RSTRING_PTR(media_str);
370
+ long query_len = RSTRING_LEN(media_str);
371
+
372
+ VALUE media_types = extract_media_types(query, query_len);
373
+ long types_len = RARRAY_LEN(media_types);
374
+
375
+ for (long i = 0; i < types_len; i++) {
376
+ VALUE type_sym = rb_ary_entry(media_types, i);
377
+ // Only add if different from full query (avoid duplicates)
378
+ if (type_sym != media_sym) {
379
+ add_to_media_index(ctx->media_index, type_sym, rule_id);
380
+ }
381
+ }
382
+
383
+ // Guard media_str since we extracted C pointer and called extract_media_types (which allocates)
384
+ RB_GC_GUARD(media_str);
385
+ }
386
+
387
+ /*
388
+ * Parse declaration block into array of Declaration structs
389
+ *
390
+ * Example input: "color: red; background: url(image.png); font-size: 14px !important"
391
+ * Example output: [Declaration("color", "red", false),
392
+ * Declaration("background", "url(image.png)", false),
393
+ * Declaration("font-size", "14px", true)]
394
+ *
395
+ * Handles:
396
+ * - Multiple declarations separated by semicolons
397
+ * - Values containing parentheses (e.g., url(...), rgba(...))
398
+ * - !important flag
399
+ */
400
+ static VALUE parse_declarations(const char *start, const char *end) {
401
+ VALUE declarations = rb_ary_new();
402
+
403
+ const char *pos = start;
404
+ while (pos < end) {
405
+ // Skip whitespace and semicolons
406
+ while (pos < end && (IS_WHITESPACE(*pos) || *pos == ';')) {
407
+ pos++;
408
+ }
409
+ if (pos >= end) break;
410
+
411
+ // Find property (up to colon)
412
+ // Example: "color: red; ..."
413
+ // ^pos ^pos (at :)
414
+ const char *prop_start = pos;
415
+ while (pos < end && *pos != ':') pos++;
416
+ if (pos >= end) break; // No colon found
417
+
418
+ const char *prop_end = pos;
419
+ // Trim whitespace from property
420
+ trim_trailing(prop_start, &prop_end);
421
+ trim_leading(&prop_start, prop_end);
422
+
423
+ pos++; // Skip colon
424
+
425
+ // Skip whitespace after colon
426
+ while (pos < end && IS_WHITESPACE(*pos)) {
427
+ pos++;
428
+ }
429
+
430
+ // Find value (up to semicolon or end)
431
+ // Must track paren depth to avoid breaking on semicolons inside url() or rgba()
432
+ // Example: "url(data:image/svg+xml;base64,...); next-prop: ..."
433
+ // ^val_start ^pos (at ; outside parens)
434
+ const char *val_start = pos;
435
+ int paren_depth = 0;
436
+ while (pos < end) {
437
+ if (*pos == '(') { // At: '('
438
+ paren_depth++; // Depth: 1
439
+ } else if (*pos == ')') { // At: ')'
440
+ paren_depth--; // Depth: 0
441
+ } else if (*pos == ';' && paren_depth == 0) { // At: ';' (outside parens)
442
+ break; // Found terminating semicolon
443
+ }
444
+ pos++;
445
+ }
446
+ const char *val_end = pos;
447
+
448
+ // Trim trailing whitespace from value
449
+ trim_trailing(val_start, &val_end);
450
+
451
+ // Check for !important
452
+ int is_important = 0;
453
+ if (val_end - val_start >= 10) { // strlen("!important") = 10
454
+ const char *check = val_end - 10;
455
+ while (check < val_end && IS_WHITESPACE(*check)) check++;
456
+ if (check < val_end && *check == '!') {
457
+ check++;
458
+ while (check < val_end && IS_WHITESPACE(*check)) check++;
459
+ // strncmp safely handles remaining length check
460
+ if (check + 9 <= val_end && strncmp(check, "important", 9) == 0) {
461
+ is_important = 1;
462
+ const char *important_pos = check - 1;
463
+ while (important_pos > val_start && (IS_WHITESPACE(*(important_pos-1)) || *(important_pos-1) == '!')) {
464
+ important_pos--;
465
+ }
466
+ val_end = important_pos;
467
+ }
468
+ }
469
+ }
470
+
471
+ // Final trim
472
+ trim_trailing(val_start, &val_end);
473
+
474
+ // Skip if value is empty
475
+ if (val_end > val_start) {
476
+ long prop_len = prop_end - prop_start;
477
+ long val_len = val_end - val_start;
478
+
479
+ // Check property name length
480
+ if (prop_len > MAX_PROPERTY_NAME_LENGTH) {
481
+ rb_raise(eSizeError,
482
+ "Property name too long: %ld bytes (max %d)",
483
+ prop_len, MAX_PROPERTY_NAME_LENGTH);
484
+ }
485
+
486
+ // Check property value length
487
+ if (val_len > MAX_PROPERTY_VALUE_LENGTH) {
488
+ rb_raise(eSizeError,
489
+ "Property value too long: %ld bytes (max %d)",
490
+ val_len, MAX_PROPERTY_VALUE_LENGTH);
491
+ }
492
+
493
+ // Create property string and lowercase it
494
+ VALUE property_raw = rb_usascii_str_new(prop_start, prop_len);
495
+ VALUE property = lowercase_property(property_raw);
496
+ VALUE value = rb_utf8_str_new(val_start, val_len);
497
+
498
+ // Create Declaration struct
499
+ VALUE decl = rb_struct_new(cDeclaration,
500
+ property,
501
+ value,
502
+ is_important ? Qtrue : Qfalse
503
+ );
504
+
505
+ rb_ary_push(declarations, decl);
506
+ }
507
+
508
+ if (pos < end && *pos == ';') pos++; // Skip semicolon if present
509
+ }
510
+
511
+ return declarations;
512
+ }
513
+
514
+ // Forward declarations
515
+ static void parse_css_recursive(ParserContext *ctx, const char *css, const char *pe,
516
+ VALUE parent_media_sym, VALUE parent_selector, VALUE parent_rule_id);
517
+ static VALUE combine_media_queries(VALUE parent, VALUE child);
518
+
519
+ /*
520
+ * Combine parent and child media queries
521
+ * Examples:
522
+ * parent="screen", child="min-width: 500px" => "screen and (min-width: 500px)"
523
+ * parent=nil, child="print" => "print"
524
+ * Note: child may have had outer parens stripped, so we re-add them for conditions
525
+ */
526
+ static VALUE combine_media_queries(VALUE parent, VALUE child) {
527
+ if (NIL_P(parent)) {
528
+ return child;
529
+ }
530
+ if (NIL_P(child)) {
531
+ return parent;
532
+ }
533
+
534
+ // Combine: "parent and child"
535
+ VALUE parent_str = rb_sym2str(parent);
536
+ VALUE child_str = rb_sym2str(child);
537
+
538
+ VALUE combined = rb_str_dup(parent_str);
539
+ rb_str_cat2(combined, " and ");
540
+
541
+ // If child is a condition (contains ':'), wrap it in parentheses
542
+ // Example: "min-width: 500px" => "(min-width: 500px)"
543
+ const char *child_ptr = RSTRING_PTR(child_str);
544
+ long child_len = RSTRING_LEN(child_str);
545
+ int has_colon = 0;
546
+ int already_wrapped = (child_len >= 2 && child_ptr[0] == '(' && child_ptr[child_len - 1] == ')');
547
+
548
+ for (long i = 0; i < child_len && !has_colon; i++) {
549
+ if (child_ptr[i] == ':') {
550
+ has_colon = 1;
551
+ }
552
+ }
553
+
554
+ if (has_colon && !already_wrapped) {
555
+ rb_str_cat2(combined, "(");
556
+ rb_str_append(combined, child_str);
557
+ rb_str_cat2(combined, ")");
558
+ } else {
559
+ rb_str_append(combined, child_str);
560
+ }
561
+
562
+ return ID2SYM(rb_intern_str(combined));
563
+ }
564
+
565
+ /*
566
+ * Intern media query string to symbol with safety check
567
+ * Strips outer parentheses from standalone conditions like "(orientation: landscape)"
568
+ */
569
+ static VALUE intern_media_query_safe(ParserContext *ctx, const char *query_str, long query_len) {
570
+ if (query_len == 0) {
571
+ return Qnil;
572
+ }
573
+
574
+ // Safety check
575
+ if (ctx->media_query_count >= MAX_MEDIA_QUERIES) {
576
+ rb_raise(eSizeError,
577
+ "Exceeded maximum unique media queries (%d)",
578
+ MAX_MEDIA_QUERIES);
579
+ }
580
+
581
+ // Strip outer parentheses from standalone conditions
582
+ // Example: "(orientation: landscape)" => "orientation: landscape"
583
+ // But keep: "screen and (min-width: 500px)" as-is
584
+ const char *start = query_str;
585
+ const char *end = query_str + query_len;
586
+
587
+ // Trim whitespace
588
+ while (start < end && IS_WHITESPACE(*start)) start++;
589
+ while (end > start && IS_WHITESPACE(*(end - 1))) end--;
590
+
591
+ if (end > start && *start == '(' && *(end - 1) == ')') {
592
+ // Check if this is a simple wrapped condition (no other parens/operators)
593
+ int depth = 0;
594
+ int has_and_or = 0;
595
+ for (const char *p = start; p < end; p++) {
596
+ if (*p == '(') depth++;
597
+ else if (*p == ')') depth--;
598
+ // Check for "and" or "or" at depth 0 (outside our outer parens)
599
+ if (depth == 0 && p + 3 < end &&
600
+ (strncmp(p, " and ", 5) == 0 || strncmp(p, " or ", 4) == 0)) {
601
+ has_and_or = 1;
602
+ break;
603
+ }
604
+ }
605
+
606
+ // Strip outer parens if depth stays >= 1 (no operators outside) and no and/or
607
+ if (!has_and_or && depth == 0) {
608
+ start++; // Skip opening (
609
+ end--; // Skip closing )
610
+ }
611
+ }
612
+
613
+ long final_len = end - start;
614
+ VALUE query_string = rb_usascii_str_new(start, final_len);
615
+ VALUE sym = ID2SYM(rb_intern_str(query_string));
616
+ ctx->media_query_count++;
617
+
618
+ return sym;
619
+ }
620
+
621
+ /*
622
+ * Parse mixed declarations and nested selectors from a block
623
+ * Used when a CSS rule block contains both declarations and nested rules
624
+ *
625
+ * Example CSS block being parsed:
626
+ * .parent {
627
+ * color: red; <- declaration
628
+ * & .child { <- nested selector
629
+ * font-size: 14px;
630
+ * }
631
+ * @media (min-width: 768px) { <- nested @media
632
+ * padding: 10px;
633
+ * }
634
+ * }
635
+ *
636
+ * Returns: Array of declarations (only the declarations, not nested rules)
637
+ */
638
+ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char *end,
639
+ VALUE parent_selector, VALUE parent_rule_id, VALUE parent_media_sym) {
640
+ // Check recursion depth to prevent stack overflow
641
+ if (ctx->depth > MAX_PARSE_DEPTH) {
642
+ rb_raise(eDepthError,
643
+ "CSS nesting too deep: exceeded maximum depth of %d",
644
+ MAX_PARSE_DEPTH);
645
+ }
646
+
647
+ VALUE declarations = rb_ary_new();
648
+ const char *p = start;
649
+
650
+ while (p < end) {
651
+ trim_leading(&p, end);
652
+ if (p >= end) break;
653
+
654
+ SKIP_COMMENT(p, end);
655
+
656
+ // Check if this is a nested @media query
657
+ if (*p == '@' && p + 6 < end && strncmp(p, "@media", 6) == 0 &&
658
+ (p + 6 == end || IS_WHITESPACE(p[6]))) {
659
+ // Nested @media - parse with parent selector as context
660
+ const char *media_start = p + 6;
661
+ trim_leading(&media_start, end);
662
+
663
+ // Find opening brace
664
+ const char *media_query_end = media_start;
665
+ while (media_query_end < end && *media_query_end != '{') {
666
+ media_query_end++;
667
+ }
668
+ if (media_query_end >= end) break;
669
+
670
+ // Extract media query
671
+ const char *media_query_start = media_start;
672
+ const char *media_query_end_trimmed = media_query_end;
673
+ trim_trailing(media_query_start, &media_query_end_trimmed);
674
+ VALUE media_sym = intern_media_query_safe(ctx, media_query_start, media_query_end_trimmed - media_query_start);
675
+
676
+ p = media_query_end + 1; // Skip {
677
+
678
+ // Find matching closing brace
679
+ const char *media_block_start = p;
680
+ const char *media_block_end = find_matching_brace(p, end);
681
+ p = media_block_end;
682
+
683
+ if (p < end) p++; // Skip }
684
+
685
+ // Combine media queries: parent + child
686
+ VALUE combined_media_sym = combine_media_queries(parent_media_sym, media_sym);
687
+
688
+ // Parse the block with parse_mixed_block to support further nesting
689
+ // Create a rule ID for this media rule
690
+ int media_rule_id = ctx->rule_id_counter++;
691
+
692
+ // Reserve position for parent rule
693
+ long parent_pos = RARRAY_LEN(ctx->rules_array);
694
+ rb_ary_push(ctx->rules_array, Qnil);
695
+
696
+ // Parse mixed block (may contain declarations and/or nested @media)
697
+ ctx->depth++;
698
+ VALUE media_declarations = parse_mixed_block(ctx, media_block_start, media_block_end,
699
+ parent_selector, INT2FIX(media_rule_id), combined_media_sym);
700
+ ctx->depth--;
701
+
702
+ // Create rule with the parent selector and declarations, associated with combined media query
703
+ VALUE rule = rb_struct_new(cRule,
704
+ INT2FIX(media_rule_id),
705
+ parent_selector,
706
+ media_declarations,
707
+ Qnil, // specificity
708
+ parent_rule_id, // Link to parent for nested @media serialization
709
+ Qnil // nesting_style (nil for @media nesting)
710
+ );
711
+
712
+ // Mark that we have nesting (only set once)
713
+ if (!ctx->has_nesting && !NIL_P(parent_rule_id)) {
714
+ ctx->has_nesting = 1;
715
+ }
716
+
717
+ // Replace placeholder with actual rule
718
+ rb_ary_store(ctx->rules_array, parent_pos, rule);
719
+ update_media_index(ctx, combined_media_sym, media_rule_id);
720
+
721
+ continue;
722
+ }
723
+
724
+ // Check if this is a nested selector (starts with nesting indicators)
725
+ // Example within parse_mixed_block:
726
+ // Input block: "color: red; & .child { font: 14px; }"
727
+ // ^p (at &) - nested selector detected
728
+ char c = *p;
729
+ if (c == '&' || c == '.' || c == '#' || c == '[' || c == ':' ||
730
+ c == '*' || c == '>' || c == '+' || c == '~' || c == '@') {
731
+ // This is likely a nested selector - find the opening brace
732
+ // Example: "& .child { font: 14px; }"
733
+ // ^nested_sel_start ^p (at {)
734
+ const char *nested_sel_start = p;
735
+ while (p < end && *p != '{') p++;
736
+ if (p >= end) break;
737
+
738
+ const char *nested_sel_end = p;
739
+ trim_trailing(nested_sel_start, &nested_sel_end);
740
+
741
+ p++; // Skip {
742
+
743
+ // Find matching closing brace
744
+ // Example: "& .child { font: 14px; }"
745
+ // ^nested_block_start ^nested_block_end (at })
746
+ const char *nested_block_start = p;
747
+ const char *nested_block_end = find_matching_brace(p, end);
748
+ p = nested_block_end;
749
+
750
+ if (p < end) p++; // Skip }
751
+
752
+ // Split nested selector on commas and create a rule for each
753
+ // Example: "& .child, & .sibling { ... }" creates 2 nested rules
754
+ const char *seg_start = nested_sel_start;
755
+ const char *seg = nested_sel_start;
756
+
757
+ while (seg <= nested_sel_end) {
758
+ if (seg == nested_sel_end || *seg == ',') { // At: ',' or end
759
+ // Trim segment
760
+ while (seg_start < seg && IS_WHITESPACE(*seg_start)) {
761
+ seg_start++;
762
+ }
763
+
764
+ const char *seg_end_ptr = seg;
765
+ while (seg_end_ptr > seg_start && IS_WHITESPACE(*(seg_end_ptr - 1))) {
766
+ seg_end_ptr--;
767
+ }
768
+
769
+ if (seg_end_ptr > seg_start) {
770
+ // Resolve nested selector
771
+ VALUE result = resolve_nested_selector(parent_selector, seg_start, seg_end_ptr - seg_start);
772
+ VALUE resolved_selector = rb_ary_entry(result, 0);
773
+ VALUE nesting_style = rb_ary_entry(result, 1);
774
+
775
+ // Get rule ID
776
+ int rule_id = ctx->rule_id_counter++;
777
+
778
+ // Recursively parse nested block
779
+ ctx->depth++;
780
+ VALUE nested_declarations = parse_mixed_block(ctx, nested_block_start, nested_block_end,
781
+ resolved_selector, INT2FIX(rule_id), parent_media_sym);
782
+ ctx->depth--;
783
+
784
+ // Create rule for nested selector
785
+ VALUE rule = rb_struct_new(cRule,
786
+ INT2FIX(rule_id),
787
+ resolved_selector,
788
+ nested_declarations,
789
+ Qnil, // specificity
790
+ parent_rule_id,
791
+ nesting_style
792
+ );
793
+
794
+ // Mark that we have nesting (only set once)
795
+ if (!ctx->has_nesting && !NIL_P(parent_rule_id)) {
796
+ ctx->has_nesting = 1;
797
+ }
798
+
799
+ rb_ary_push(ctx->rules_array, rule);
800
+ update_media_index(ctx, parent_media_sym, rule_id);
801
+ }
802
+
803
+ seg_start = seg + 1;
804
+ }
805
+ seg++;
806
+ }
807
+
808
+ continue;
809
+ }
810
+
811
+ // This is a declaration - parse it
812
+ const char *prop_start = p;
813
+ while (p < end && *p != ':' && *p != ';' && *p != '{') p++;
814
+ if (p >= end || *p != ':') {
815
+ // Malformed - skip to semicolon
816
+ while (p < end && *p != ';') p++;
817
+ if (p < end) p++;
818
+ continue;
819
+ }
820
+
821
+ const char *prop_end = p;
822
+ trim_trailing(prop_start, &prop_end);
823
+
824
+ p++; // Skip :
825
+ trim_leading(&p, end);
826
+
827
+ const char *val_start = p;
828
+ int important = 0;
829
+
830
+ // Find end of value (semicolon or closing brace or end)
831
+ while (p < end && *p != ';' && *p != '}') p++;
832
+ const char *val_end = p;
833
+
834
+ // Check for !important
835
+ const char *important_check = val_end - 10; // " !important"
836
+ if (important_check >= val_start) {
837
+ trim_trailing(val_start, &val_end);
838
+ if (val_end - val_start >= 10) {
839
+ if (strncmp(val_end - 10, "!important", 10) == 0) {
840
+ important = 1;
841
+ val_end -= 10;
842
+ trim_trailing(val_start, &val_end);
843
+ }
844
+ }
845
+ } else {
846
+ trim_trailing(val_start, &val_end);
847
+ }
848
+
849
+ if (p < end && *p == ';') p++;
850
+
851
+ // Create declaration
852
+ if (prop_end > prop_start && val_end > val_start) {
853
+ long prop_len = prop_end - prop_start;
854
+ long val_len = val_end - val_start;
855
+
856
+ // Check property name length
857
+ if (prop_len > MAX_PROPERTY_NAME_LENGTH) {
858
+ rb_raise(eSizeError,
859
+ "Property name too long: %ld bytes (max %d)",
860
+ prop_len, MAX_PROPERTY_NAME_LENGTH);
861
+ }
862
+
863
+ // Check property value length
864
+ if (val_len > MAX_PROPERTY_VALUE_LENGTH) {
865
+ rb_raise(eSizeError,
866
+ "Property value too long: %ld bytes (max %d)",
867
+ val_len, MAX_PROPERTY_VALUE_LENGTH);
868
+ }
869
+
870
+ VALUE property_raw = rb_usascii_str_new(prop_start, prop_len);
871
+ VALUE property = lowercase_property(property_raw);
872
+ VALUE value = rb_utf8_str_new(val_start, val_len);
873
+
874
+ VALUE decl = rb_struct_new(cDeclaration,
875
+ property,
876
+ value,
877
+ important ? Qtrue : Qfalse
878
+ );
879
+
880
+ rb_ary_push(declarations, decl);
881
+ }
882
+ }
883
+
884
+ return declarations;
885
+ }
886
+
887
+ /*
888
+ * Parse CSS recursively with media query context and optional parent selector for nesting
889
+ *
890
+ * parent_media_sym: Parent media query symbol (or Qnil for no media context)
891
+ * parent_selector: Parent selector string for nested rules (or Qnil for top-level)
892
+ * parent_rule_id: Parent rule ID (Fixnum) for nested rules (or Qnil for top-level)
893
+ */
894
+ static void parse_css_recursive(ParserContext *ctx, const char *css, const char *pe,
895
+ VALUE parent_media_sym, VALUE parent_selector, VALUE parent_rule_id) {
896
+ // Check recursion depth to prevent stack overflow
897
+ if (ctx->depth > MAX_PARSE_DEPTH) {
898
+ rb_raise(eDepthError,
899
+ "CSS nesting too deep: exceeded maximum depth of %d",
900
+ MAX_PARSE_DEPTH);
901
+ }
902
+
903
+ const char *p = css;
904
+
905
+ const char *selector_start = NULL;
906
+ const char *decl_start = NULL;
907
+ int brace_depth = 0;
908
+
909
+ while (p < pe) {
910
+ // Skip whitespace
911
+ while (p < pe && IS_WHITESPACE(*p)) p++;
912
+ if (p >= pe) break;
913
+
914
+ // Skip comments (rare in typical CSS)
915
+ SKIP_COMMENT(p, pe);
916
+
917
+ // Check for @media at-rule (only at depth 0)
918
+ if (RB_UNLIKELY(brace_depth == 0 && p + 6 < pe && *p == '@' &&
919
+ strncmp(p + 1, "media", 5) == 0 && IS_WHITESPACE(p[6]))) {
920
+ p += 6; // Skip "@media"
921
+
922
+ // Skip whitespace
923
+ while (p < pe && IS_WHITESPACE(*p)) p++;
924
+
925
+ // Find media query (up to opening brace)
926
+ const char *mq_start = p;
927
+ while (p < pe && *p != '{') p++;
928
+ const char *mq_end = p;
929
+
930
+ // Trim
931
+ trim_trailing(mq_start, &mq_end);
932
+
933
+ if (p >= pe || *p != '{') {
934
+ continue; // Malformed
935
+ }
936
+
937
+ // Intern media query
938
+ VALUE child_media_sym = intern_media_query_safe(ctx, mq_start, mq_end - mq_start);
939
+
940
+ // Combine with parent
941
+ VALUE combined_media_sym = combine_media_queries(parent_media_sym, child_media_sym);
942
+
943
+ p++; // Skip opening {
944
+
945
+ // Find matching closing brace
946
+ const char *block_start = p;
947
+ const char *block_end = find_matching_brace(p, pe);
948
+ p = block_end;
949
+
950
+ // Recursively parse @media block with combined media context
951
+ ctx->depth++;
952
+ parse_css_recursive(ctx, block_start, block_end, combined_media_sym, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
953
+ ctx->depth--;
954
+
955
+ if (p < pe && *p == '}') p++;
956
+ continue;
957
+ }
958
+
959
+ // Check for conditional group at-rules: @supports, @layer, @container, @scope
960
+ // AND nested block at-rules: @keyframes, @font-face, @page
961
+ // These behave like @media but don't affect media context
962
+ if (RB_UNLIKELY(brace_depth == 0 && *p == '@')) {
963
+ const char *at_start = p + 1;
964
+ const char *at_name_end = at_start;
965
+
966
+ // Find end of at-rule name (stop at whitespace or opening brace)
967
+ while (at_name_end < pe && !IS_WHITESPACE(*at_name_end) && *at_name_end != '{') {
968
+ at_name_end++;
969
+ }
970
+
971
+ long at_name_len = at_name_end - at_start;
972
+
973
+ // Check if this is a conditional group rule
974
+ int is_conditional_group =
975
+ (at_name_len == 8 && strncmp(at_start, "supports", 8) == 0) ||
976
+ (at_name_len == 5 && strncmp(at_start, "layer", 5) == 0) ||
977
+ (at_name_len == 9 && strncmp(at_start, "container", 9) == 0) ||
978
+ (at_name_len == 5 && strncmp(at_start, "scope", 5) == 0);
979
+
980
+ if (is_conditional_group) {
981
+ // Skip to opening brace
982
+ p = at_name_end;
983
+ while (p < pe && *p != '{') p++;
984
+
985
+ if (p >= pe || *p != '{') {
986
+ continue; // Malformed
987
+ }
988
+
989
+ p++; // Skip opening {
990
+
991
+ // Find matching closing brace
992
+ const char *block_start = p;
993
+ const char *block_end = find_matching_brace(p, pe);
994
+ p = block_end;
995
+
996
+ // Recursively parse block content (preserve parent media context)
997
+ ctx->depth++;
998
+ parse_css_recursive(ctx, block_start, block_end, parent_media_sym, parent_selector, parent_rule_id);
999
+ ctx->depth--;
1000
+
1001
+ if (p < pe && *p == '}') p++;
1002
+ continue;
1003
+ }
1004
+
1005
+ // Check for @keyframes (contains <rule-list>)
1006
+ // TODO: Test perf gains by using RB_UNLIKELY(is_keyframes) wrapper
1007
+ int is_keyframes =
1008
+ (at_name_len == 9 && strncmp(at_start, "keyframes", 9) == 0) ||
1009
+ (at_name_len == 17 && strncmp(at_start, "-webkit-keyframes", 17) == 0) ||
1010
+ (at_name_len == 13 && strncmp(at_start, "-moz-keyframes", 13) == 0);
1011
+
1012
+ if (is_keyframes) {
1013
+ // Build full selector string: "@keyframes fade"
1014
+ const char *selector_start = p; // Points to '@'
1015
+ p = at_name_end;
1016
+ while (p < pe && *p != '{') p++;
1017
+
1018
+ if (p >= pe || *p != '{') {
1019
+ continue; // Malformed
1020
+ }
1021
+
1022
+ const char *selector_end = p;
1023
+ while (selector_end > selector_start && IS_WHITESPACE(*(selector_end - 1))) {
1024
+ selector_end--;
1025
+ }
1026
+ VALUE selector = rb_utf8_str_new(selector_start, selector_end - selector_start);
1027
+
1028
+ p++; // Skip opening {
1029
+
1030
+ // Find matching closing brace
1031
+ const char *block_start = p;
1032
+ const char *block_end = find_matching_brace(p, pe);
1033
+ p = block_end;
1034
+
1035
+ // Parse keyframe blocks as rules (from/to/0%/50% etc)
1036
+ ParserContext nested_ctx = {
1037
+ .rules_array = rb_ary_new(),
1038
+ .media_index = rb_hash_new(),
1039
+ .rule_id_counter = 0,
1040
+ .media_query_count = 0,
1041
+ .media_cache = NULL,
1042
+ .has_nesting = 0,
1043
+ .depth = 0
1044
+ };
1045
+ parse_css_recursive(&nested_ctx, block_start, block_end, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
1046
+
1047
+ // Get rule ID and increment
1048
+ int rule_id = ctx->rule_id_counter++;
1049
+
1050
+ // Create AtRule with nested rules
1051
+ VALUE at_rule = rb_struct_new(cAtRule,
1052
+ INT2FIX(rule_id),
1053
+ selector,
1054
+ nested_ctx.rules_array, // Array of Rule (keyframe blocks)
1055
+ Qnil);
1056
+
1057
+ // Add to rules array
1058
+ rb_ary_push(ctx->rules_array, at_rule);
1059
+
1060
+ // Add to media index if in media query
1061
+ if (!NIL_P(parent_media_sym)) {
1062
+ VALUE rule_ids = rb_hash_aref(ctx->media_index, parent_media_sym);
1063
+ if (NIL_P(rule_ids)) {
1064
+ rule_ids = rb_ary_new();
1065
+ rb_hash_aset(ctx->media_index, parent_media_sym, rule_ids);
1066
+ }
1067
+ rb_ary_push(rule_ids, INT2FIX(rule_id));
1068
+ }
1069
+
1070
+ if (p < pe && *p == '}') p++;
1071
+ continue;
1072
+ }
1073
+
1074
+ // Check for @font-face (contains <declaration-list>)
1075
+ int is_font_face = (at_name_len == 9 && strncmp(at_start, "font-face", 9) == 0);
1076
+
1077
+ if (is_font_face) {
1078
+ // Build selector string: "@font-face"
1079
+ const char *selector_start = p; // Points to '@'
1080
+ p = at_name_end;
1081
+ while (p < pe && *p != '{') p++;
1082
+
1083
+ if (p >= pe || *p != '{') {
1084
+ continue; // Malformed
1085
+ }
1086
+
1087
+ const char *selector_end = p;
1088
+ while (selector_end > selector_start && IS_WHITESPACE(*(selector_end - 1))) {
1089
+ selector_end--;
1090
+ }
1091
+ VALUE selector = rb_utf8_str_new(selector_start, selector_end - selector_start);
1092
+
1093
+ p++; // Skip opening {
1094
+
1095
+ // Find matching closing brace
1096
+ const char *decl_start = p;
1097
+ const char *decl_end = find_matching_brace(p, pe);
1098
+ p = decl_end;
1099
+
1100
+ // Parse declarations
1101
+ VALUE declarations = parse_declarations(decl_start, decl_end);
1102
+
1103
+ // Get rule ID and increment
1104
+ int rule_id = ctx->rule_id_counter++;
1105
+
1106
+ // Create AtRule with declarations
1107
+ VALUE at_rule = rb_struct_new(cAtRule,
1108
+ INT2FIX(rule_id),
1109
+ selector,
1110
+ declarations, // Array of Declaration
1111
+ Qnil);
1112
+
1113
+ // Add to rules array
1114
+ rb_ary_push(ctx->rules_array, at_rule);
1115
+
1116
+ // Add to media index if in media query
1117
+ if (!NIL_P(parent_media_sym)) {
1118
+ VALUE rule_ids = rb_hash_aref(ctx->media_index, parent_media_sym);
1119
+ if (NIL_P(rule_ids)) {
1120
+ rule_ids = rb_ary_new();
1121
+ rb_hash_aset(ctx->media_index, parent_media_sym, rule_ids);
1122
+ }
1123
+ rb_ary_push(rule_ids, INT2FIX(rule_id));
1124
+ }
1125
+
1126
+ if (p < pe && *p == '}') p++;
1127
+ continue;
1128
+ }
1129
+ }
1130
+
1131
+ // Opening brace
1132
+ if (*p == '{') {
1133
+ if (brace_depth == 0 && selector_start != NULL) {
1134
+ decl_start = p + 1;
1135
+ }
1136
+ brace_depth++;
1137
+ p++;
1138
+ continue;
1139
+ }
1140
+
1141
+ // Closing brace
1142
+ if (*p == '}') {
1143
+ brace_depth--;
1144
+ if (brace_depth == 0 && selector_start != NULL && decl_start != NULL) {
1145
+ // We've found a complete CSS rule block - now determine if it has nesting
1146
+ // Example: .parent { color: red; & .child { font-size: 14px; } }
1147
+ // ^selector_start ^decl_start ^p (at })
1148
+ int has_nesting = has_nested_selectors(decl_start, p);
1149
+
1150
+ // Get selector string
1151
+ const char *sel_end = decl_start - 1;
1152
+ while (sel_end > selector_start && IS_WHITESPACE(*(sel_end - 1))) {
1153
+ sel_end--;
1154
+ }
1155
+
1156
+ if (!has_nesting) {
1157
+ // FAST PATH: No nesting - parse as pure declarations
1158
+ VALUE declarations = parse_declarations(decl_start, p);
1159
+
1160
+ // Split on commas to handle multi-selector rules
1161
+ // Example: ".a, .b, .c { color: red; }" creates 3 separate rules
1162
+ // ^selector_start ^sel_end
1163
+ // ^seg_start=seg (scanning for commas)
1164
+ const char *seg_start = selector_start;
1165
+ const char *seg = selector_start;
1166
+
1167
+ while (seg <= sel_end) {
1168
+ if (seg == sel_end || *seg == ',') { // At: ',' or end
1169
+ // Trim segment
1170
+ while (seg_start < seg && IS_WHITESPACE(*seg_start)) {
1171
+ seg_start++;
1172
+ }
1173
+
1174
+ const char *seg_end_ptr = seg;
1175
+ while (seg_end_ptr > seg_start && IS_WHITESPACE(*(seg_end_ptr - 1))) {
1176
+ seg_end_ptr--;
1177
+ }
1178
+
1179
+ if (seg_end_ptr > seg_start) {
1180
+ VALUE selector = rb_utf8_str_new(seg_start, seg_end_ptr - seg_start);
1181
+
1182
+ // Resolve against parent if nested
1183
+ VALUE resolved_selector;
1184
+ VALUE nesting_style_val;
1185
+ VALUE parent_id_val;
1186
+
1187
+ if (!NIL_P(parent_selector)) {
1188
+ // This is a nested rule - resolve selector
1189
+ VALUE result = resolve_nested_selector(parent_selector, RSTRING_PTR(selector), RSTRING_LEN(selector));
1190
+ resolved_selector = rb_ary_entry(result, 0);
1191
+ nesting_style_val = rb_ary_entry(result, 1);
1192
+ parent_id_val = parent_rule_id;
1193
+ } else {
1194
+ // Top-level rule
1195
+ resolved_selector = selector;
1196
+ nesting_style_val = Qnil;
1197
+ parent_id_val = Qnil;
1198
+ }
1199
+
1200
+ // Get rule ID and increment
1201
+ int rule_id = ctx->rule_id_counter++;
1202
+
1203
+ // Create Rule
1204
+ VALUE rule = rb_struct_new(cRule,
1205
+ INT2FIX(rule_id),
1206
+ resolved_selector,
1207
+ rb_ary_dup(declarations),
1208
+ Qnil, // specificity
1209
+ parent_id_val,
1210
+ nesting_style_val
1211
+ );
1212
+
1213
+ // Mark that we have nesting (only set once)
1214
+ if (!ctx->has_nesting && !NIL_P(parent_id_val)) {
1215
+ ctx->has_nesting = 1;
1216
+ }
1217
+
1218
+ rb_ary_push(ctx->rules_array, rule);
1219
+
1220
+ // Update media index
1221
+ update_media_index(ctx, parent_media_sym, rule_id);
1222
+ }
1223
+
1224
+ seg_start = seg + 1;
1225
+ }
1226
+ seg++;
1227
+ }
1228
+ } else {
1229
+ // NESTED PATH: Parse mixed declarations + nested rules
1230
+ // For each comma-separated parent selector, parse the block with that parent
1231
+ //
1232
+ // Example: ".a, .b { color: red; & .child { font: 14px; } }"
1233
+ // ^selector_start ^sel_end
1234
+ // Creates:
1235
+ // - .a with declarations [color: red]
1236
+ // - .a .child with declarations [font: 14px]
1237
+ // - .b with declarations [color: red]
1238
+ // - .b .child with declarations [font: 14px]
1239
+ const char *seg_start = selector_start;
1240
+ const char *seg = selector_start;
1241
+
1242
+ while (seg <= sel_end) {
1243
+ if (seg == sel_end || *seg == ',') { // At: ',' or end
1244
+ // Trim segment
1245
+ while (seg_start < seg && IS_WHITESPACE(*seg_start)) {
1246
+ seg_start++;
1247
+ }
1248
+
1249
+ const char *seg_end_ptr = seg;
1250
+ while (seg_end_ptr > seg_start && IS_WHITESPACE(*(seg_end_ptr - 1))) {
1251
+ seg_end_ptr--;
1252
+ }
1253
+
1254
+ if (seg_end_ptr > seg_start) {
1255
+ VALUE current_selector = rb_utf8_str_new(seg_start, seg_end_ptr - seg_start);
1256
+
1257
+ // Resolve against parent if we're already nested
1258
+ VALUE resolved_current;
1259
+ VALUE current_nesting_style;
1260
+ VALUE current_parent_id;
1261
+
1262
+ if (!NIL_P(parent_selector)) {
1263
+ VALUE result = resolve_nested_selector(parent_selector, RSTRING_PTR(current_selector), RSTRING_LEN(current_selector));
1264
+ resolved_current = rb_ary_entry(result, 0);
1265
+ current_nesting_style = rb_ary_entry(result, 1);
1266
+ current_parent_id = parent_rule_id;
1267
+ } else {
1268
+ resolved_current = current_selector;
1269
+ current_nesting_style = Qnil;
1270
+ current_parent_id = Qnil;
1271
+ }
1272
+
1273
+ // Get rule ID for current selector (increment to reserve it)
1274
+ int current_rule_id = ctx->rule_id_counter++;
1275
+
1276
+ // Reserve parent's position in rules array with placeholder
1277
+ // This ensures parent comes before nested rules in array order (per W3C spec)
1278
+ long parent_position = RARRAY_LEN(ctx->rules_array);
1279
+ rb_ary_push(ctx->rules_array, Qnil);
1280
+
1281
+ // Parse mixed block (declarations + nested selectors)
1282
+ // Nested rules will be added AFTER the placeholder
1283
+ ctx->depth++;
1284
+ VALUE parent_declarations = parse_mixed_block(ctx, decl_start, p,
1285
+ resolved_current, INT2FIX(current_rule_id), parent_media_sym);
1286
+ ctx->depth--;
1287
+
1288
+ // Create parent rule and replace placeholder
1289
+ // Always create the rule (even if empty) to avoid edge cases
1290
+ VALUE rule = rb_struct_new(cRule,
1291
+ INT2FIX(current_rule_id),
1292
+ resolved_current,
1293
+ parent_declarations,
1294
+ Qnil, // specificity
1295
+ current_parent_id,
1296
+ current_nesting_style
1297
+ );
1298
+
1299
+ // Mark that we have nesting (only set once)
1300
+ if (!ctx->has_nesting && !NIL_P(current_parent_id)) {
1301
+ ctx->has_nesting = 1;
1302
+ }
1303
+
1304
+ // Replace placeholder with actual rule - just pointer assignment, fast!
1305
+ rb_ary_store(ctx->rules_array, parent_position, rule);
1306
+ update_media_index(ctx, parent_media_sym, current_rule_id);
1307
+ }
1308
+
1309
+ seg_start = seg + 1;
1310
+ }
1311
+ seg++;
1312
+ }
1313
+ }
1314
+
1315
+ selector_start = NULL;
1316
+ decl_start = NULL;
1317
+ }
1318
+ p++;
1319
+ continue;
1320
+ }
1321
+
1322
+ // Start of selector
1323
+ if (brace_depth == 0 && selector_start == NULL) {
1324
+ selector_start = p;
1325
+ }
1326
+
1327
+ p++;
1328
+ }
1329
+ }
1330
+
1331
+ /*
1332
+ * Parse media query string and extract media types (Ruby-facing function)
1333
+ * Example: "screen, print" => [:screen, :print]
1334
+ * Example: "screen and (min-width: 768px)" => [:screen]
1335
+ *
1336
+ * @param media_query_sym [Symbol] Media query as symbol
1337
+ * @return [Array<Symbol>] Array of media type symbols
1338
+ */
1339
+ VALUE parse_media_types(VALUE self, VALUE media_query_sym) {
1340
+ Check_Type(media_query_sym, T_SYMBOL);
1341
+
1342
+ VALUE query_string = rb_sym2str(media_query_sym);
1343
+ const char *query_str = RSTRING_PTR(query_string);
1344
+ long query_len = RSTRING_LEN(query_string);
1345
+
1346
+ return extract_media_types(query_str, query_len);
1347
+ }
1348
+
1349
+ /*
1350
+ * Main parse entry point
1351
+ * Returns: { rules: [...], media_index: {...}, charset: "..." | nil, last_rule_id: N }
1352
+ */
1353
+ VALUE parse_css_new_impl(VALUE css_string, int rule_id_offset) {
1354
+ Check_Type(css_string, T_STRING);
1355
+
1356
+ const char *css = RSTRING_PTR(css_string);
1357
+ const char *pe = css + RSTRING_LEN(css_string);
1358
+ const char *p = css;
1359
+
1360
+ VALUE charset = Qnil;
1361
+
1362
+ // Extract @charset
1363
+ if (RSTRING_LEN(css_string) > 10 && strncmp(css, "@charset ", 9) == 0) {
1364
+ char *quote_start = strchr(css + 9, '"');
1365
+ if (quote_start != NULL) {
1366
+ char *quote_end = strchr(quote_start + 1, '"');
1367
+ if (quote_end != NULL) {
1368
+ charset = rb_str_new(quote_start + 1, quote_end - quote_start - 1);
1369
+ char *semicolon = quote_end + 1;
1370
+ while (semicolon < pe && IS_WHITESPACE(*semicolon)) {
1371
+ semicolon++;
1372
+ }
1373
+ if (semicolon < pe && *semicolon == ';') {
1374
+ p = semicolon + 1;
1375
+ }
1376
+ }
1377
+ }
1378
+ }
1379
+
1380
+ // Skip @import statements - they should be handled by ImportResolver at Ruby level
1381
+ // Per CSS spec, @import must come before all rules (except @charset)
1382
+ while (p < pe) {
1383
+ // Skip whitespace
1384
+ while (p < pe && IS_WHITESPACE(*p)) p++;
1385
+ if (p >= pe) break;
1386
+
1387
+ // Skip comments
1388
+ if (p + 1 < pe && p[0] == '/' && p[1] == '*') {
1389
+ p += 2;
1390
+ while (p + 1 < pe) {
1391
+ if (p[0] == '*' && p[1] == '/') {
1392
+ p += 2;
1393
+ break;
1394
+ }
1395
+ p++;
1396
+ }
1397
+ continue;
1398
+ }
1399
+
1400
+ // Check for @import
1401
+ if (p + 7 <= pe && *p == '@' && strncasecmp(p + 1, "import", 6) == 0 &&
1402
+ (p + 7 >= pe || IS_WHITESPACE(p[7]) || p[7] == '\'' || p[7] == '"')) {
1403
+ // Skip to semicolon
1404
+ while (p < pe && *p != ';') p++;
1405
+ if (p < pe) p++; // Skip semicolon
1406
+ continue;
1407
+ }
1408
+
1409
+ // Hit non-@import content, stop skipping
1410
+ break;
1411
+ }
1412
+
1413
+ // Initialize parser context with offset
1414
+ ParserContext ctx;
1415
+ ctx.rules_array = rb_ary_new();
1416
+ ctx.media_index = rb_hash_new();
1417
+ ctx.rule_id_counter = rule_id_offset; // Start from offset
1418
+ ctx.media_query_count = 0;
1419
+ ctx.media_cache = NULL; // Removed - no perf benefit
1420
+ ctx.has_nesting = 0; // Will be set to 1 if any nested rules are created
1421
+ ctx.depth = 0; // Start at depth 0
1422
+
1423
+ // Parse CSS (top-level, no parent context)
1424
+ parse_css_recursive(&ctx, p, pe, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
1425
+
1426
+ // Build result hash
1427
+ VALUE result = rb_hash_new();
1428
+ rb_hash_aset(result, ID2SYM(rb_intern("rules")), ctx.rules_array);
1429
+ rb_hash_aset(result, ID2SYM(rb_intern("_media_index")), ctx.media_index);
1430
+ rb_hash_aset(result, ID2SYM(rb_intern("charset")), charset);
1431
+ rb_hash_aset(result, ID2SYM(rb_intern("last_rule_id")), INT2FIX(ctx.rule_id_counter));
1432
+ rb_hash_aset(result, ID2SYM(rb_intern("_has_nesting")), ctx.has_nesting ? Qtrue : Qfalse);
1433
+
1434
+ return result;
1435
+ }