RubyGems - cataract - Versions diffs - 0.1.0 - Mend

cataract 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

checksums.yaml +7 -0
data/.clang-tidy +30 -0
data/.github/workflows/ci-macos.yml +12 -0
data/.github/workflows/ci.yml +77 -0
data/.github/workflows/test.yml +76 -0
data/.gitignore +45 -0
data/.overcommit.yml +38 -0
data/.rubocop.yml +83 -0
data/BENCHMARKS.md +201 -0
data/CHANGELOG.md +1 -0
data/Gemfile +27 -0
data/LICENSE +21 -0
data/RAGEL_MIGRATION.md +60 -0
data/README.md +292 -0
data/Rakefile +209 -0
data/benchmarks/benchmark_harness.rb +193 -0
data/benchmarks/benchmark_merging.rb +121 -0
data/benchmarks/benchmark_optimization_comparison.rb +168 -0
data/benchmarks/benchmark_parsing.rb +153 -0
data/benchmarks/benchmark_ragel_removal.rb +56 -0
data/benchmarks/benchmark_runner.rb +70 -0
data/benchmarks/benchmark_serialization.rb +180 -0
data/benchmarks/benchmark_shorthand.rb +109 -0
data/benchmarks/benchmark_shorthand_expansion.rb +176 -0
data/benchmarks/benchmark_specificity.rb +124 -0
data/benchmarks/benchmark_string_allocation.rb +151 -0
data/benchmarks/benchmark_stylesheet_to_s.rb +62 -0
data/benchmarks/benchmark_to_s_cached.rb +55 -0
data/benchmarks/benchmark_value_splitter.rb +54 -0
data/benchmarks/benchmark_yjit.rb +158 -0
data/benchmarks/benchmark_yjit_workers.rb +61 -0
data/benchmarks/profile_to_s.rb +23 -0
data/benchmarks/speedup_calculator.rb +83 -0
data/benchmarks/system_metadata.rb +81 -0
data/benchmarks/templates/benchmarks.md.erb +221 -0
data/benchmarks/yjit_tests.rb +141 -0
data/cataract.gemspec +34 -0
data/cliff.toml +92 -0
data/examples/color_conversion_visual_test/color_conversion_test.html +3603 -0
data/examples/color_conversion_visual_test/generate.rb +202 -0
data/examples/color_conversion_visual_test/template.html.erb +259 -0
data/examples/css_analyzer/analyzer.rb +164 -0
data/examples/css_analyzer/analyzers/base.rb +33 -0
data/examples/css_analyzer/analyzers/colors.rb +133 -0
data/examples/css_analyzer/analyzers/important.rb +88 -0
data/examples/css_analyzer/analyzers/properties.rb +61 -0
data/examples/css_analyzer/analyzers/specificity.rb +68 -0
data/examples/css_analyzer/templates/report.html.erb +575 -0
data/examples/css_analyzer.rb +69 -0
data/examples/github_analysis.html +5343 -0
data/ext/cataract/cataract.c +1086 -0
data/ext/cataract/cataract.h +174 -0
data/ext/cataract/css_parser.c +1435 -0
data/ext/cataract/extconf.rb +48 -0
data/ext/cataract/import_scanner.c +174 -0
data/ext/cataract/merge.c +973 -0
data/ext/cataract/shorthand_expander.c +902 -0
data/ext/cataract/specificity.c +213 -0
data/ext/cataract/value_splitter.c +116 -0
data/ext/cataract_color/cataract_color.c +16 -0
data/ext/cataract_color/color_conversion.c +1687 -0
data/ext/cataract_color/color_conversion.h +136 -0
data/ext/cataract_color/color_conversion_lab.c +571 -0
data/ext/cataract_color/color_conversion_named.c +259 -0
data/ext/cataract_color/color_conversion_oklab.c +547 -0
data/ext/cataract_color/extconf.rb +23 -0
data/ext/cataract_old/cataract.c +393 -0
data/ext/cataract_old/cataract.h +250 -0
data/ext/cataract_old/css_parser.c +933 -0
data/ext/cataract_old/extconf.rb +67 -0
data/ext/cataract_old/import_scanner.c +174 -0
data/ext/cataract_old/merge.c +776 -0
data/ext/cataract_old/shorthand_expander.c +902 -0
data/ext/cataract_old/specificity.c +213 -0
data/ext/cataract_old/stylesheet.c +290 -0
data/ext/cataract_old/value_splitter.c +116 -0
data/lib/cataract/at_rule.rb +97 -0
data/lib/cataract/color_conversion.rb +18 -0
data/lib/cataract/declarations.rb +332 -0
data/lib/cataract/import_resolver.rb +210 -0
data/lib/cataract/rule.rb +131 -0
data/lib/cataract/stylesheet.rb +716 -0
data/lib/cataract/stylesheet_scope.rb +257 -0
data/lib/cataract/version.rb +5 -0
data/lib/cataract.rb +107 -0
data/lib/tasks/gem.rake +158 -0
data/scripts/fuzzer/run.rb +828 -0
data/scripts/fuzzer/worker.rb +99 -0
data/scripts/generate_benchmarks_md.rb +155 -0
metadata +135 -0

data/ext/cataract_old/css_parser.c ADDED Viewed

@@ -0,0 +1,933 @@
+/*
+ * css_parser.c - CSS parser implementation
+ *
+ * Handles: selectors, declaration blocks, @media, @supports, @keyframes, @font-face, etc.
+ *
+ * This is a character-by-character state machine parser.
+ */
+#include "cataract.h"
+#include <string.h>
+// Parser states
+typedef enum {
+    STATE_INITIAL,       // Start of parsing or after closing }
+    STATE_SELECTOR,      // Parsing selector
+    STATE_DECLARATIONS   // Inside { } parsing declarations
+} ParserState;
+// Forward declarations
+VALUE parse_css_impl(VALUE css_string, int depth, VALUE parent_media_query);
+VALUE parse_media_query(const char *query_str, long query_len);
+VALUE parse_declarations_string(const char *start, const char *end);
+static char* copy_without_comments(const char *start, const char *end, long *out_len);
+// Context for merging hash callbacks
+struct merge_hash_ctx {
+    VALUE target_hash;
+};
+// Callback for merging inner_hash into target hash
+// Both inner and target have structure: {query_string => {media_types: [...], rules: [...]}}
+static int merge_hash_callback(VALUE key, VALUE inner_group, VALUE arg) {
+    struct merge_hash_ctx *ctx = (struct merge_hash_ctx *)arg;
+    VALUE our_group = rb_hash_aref(ctx->target_hash, key);
+    if (NIL_P(our_group)) {
+        // No existing group for this query string - just add it
+        rb_hash_aset(ctx->target_hash, key, inner_group);
+    } else {
+        // Merge the rules arrays from both groups
+        VALUE our_rules = rb_hash_aref(our_group, ID2SYM(rb_intern("rules")));
+        VALUE inner_rules = rb_hash_aref(inner_group, ID2SYM(rb_intern("rules")));
+        long inner_len = RARRAY_LEN(inner_rules);
+        for (long i = 0; i < inner_len; i++) {
+            rb_ary_push(our_rules, RARRAY_AREF(inner_rules, i));
+        }
+    }
+    return ST_CONTINUE;
+}
+// ============================================================================
+// CSS Parsing Helper Functions
+// ============================================================================
+// Parse declaration block and extract Declaration structs
+void capture_declarations_fn(
+    const char **decl_start_ptr,
+    const char *p,
+    VALUE *current_declarations,
+    const char *css_string_base
+) {
+    // Guard against multiple firings - only process if decl_start is set
+    if (*decl_start_ptr == NULL) {
+        DEBUG_PRINTF("[capture_declarations] SKIPPED: decl_start is NULL\n");
+        return;
+    }
+    const char *decl_start = *decl_start_ptr;
+    // Initialize declarations array if needed
+    if (NIL_P(*current_declarations)) {
+        *current_declarations = rb_ary_new();
+    }
+    const char *start = decl_start;
+    const char *end = p;
+    DEBUG_PRINTF("[capture_declarations] Parsing declarations from %td to %td: '%.*s'\n",
+                 (ptrdiff_t)(decl_start - css_string_base), (ptrdiff_t)(p - css_string_base),
+                 (int)(end - start), start);
+    // Fast path: check if there are any comments in the declaration block
+    int has_comments = 0;
+    for (const char *check = start; check + 1 < end; check++) {
+        if (*check == '/' && *(check + 1) == '*') {
+            has_comments = 1;
+            break;
+        }
+    }
+    // If there are comments, strip them first (rare case)
+    char *clean_buffer = NULL;
+    const char *clean_end = end;
+    if (has_comments) {
+        long clean_len;
+        clean_buffer = copy_without_comments(start, end, &clean_len);
+        start = clean_buffer;
+        clean_end = clean_buffer + clean_len;
+    }
+    // Simple C-level parser for declarations
+    // Input: "color: red; background: blue !important"
+    // Output: Array of Declaration structs
+    const char *pos = start;
+    while (pos < clean_end) {
+        // Skip whitespace and semicolons
+        while (pos < clean_end && (IS_WHITESPACE(*pos) || *pos == ';')) {
+            pos++;
+        }
+        if (pos >= clean_end) break;
+        // Find property (up to colon)
+        const char *prop_start = pos;
+        while (pos < clean_end && *pos != ':') pos++;
+        if (pos >= clean_end) break;  // No colon found
+        const char *prop_end = pos;
+        // Trim whitespace from property
+        trim_trailing(prop_start, &prop_end);
+        trim_leading(&prop_start, prop_end);
+        pos++;  // Skip colon
+        // Skip whitespace after colon
+        while (pos < clean_end && IS_WHITESPACE(*pos)) {
+            pos++;
+        }
+        // Find value (up to semicolon or end)
+        // Handle parentheses: semicolons inside () don't terminate the value
+        const char *val_start = pos;
+        int paren_depth = 0;
+        while (pos < clean_end) {
+            if (*pos == '(') {
+                paren_depth++;
+            } else if (*pos == ')') {
+                paren_depth--;
+            } else if (*pos == ';' && paren_depth == 0) {
+                break;  // Found terminating semicolon
+            }
+            pos++;
+        }
+        const char *val_end = pos;
+        // Trim trailing whitespace from value
+        trim_trailing(val_start, &val_end);
+        // Check for !important
+        int is_important = 0;
+        // Look backwards for "!important"
+        if (val_end - val_start >= 10) {  // strlen("!important") = 10
+            const char *check = val_end - 10;
+            while (check < val_end && IS_WHITESPACE(*check)) check++;
+            if (check < val_end && *check == '!') {
+                check++;
+                while (check < val_end && IS_WHITESPACE(*check)) check++;
+                if ((val_end - check) >= 9 && strncmp(check, "important", 9) == 0) {
+                    is_important = 1;
+                    const char *important_pos = check - 1;
+                    while (important_pos > val_start && (IS_WHITESPACE(*(important_pos-1)) || *(important_pos-1) == '!')) {
+                        important_pos--;
+                    }
+                    val_end = important_pos;
+                }
+            }
+        }
+        // Final trim of trailing whitespace/newlines from value (after !important removal)
+        trim_trailing(val_start, &val_end);
+        // Skip if value is empty (e.g., "color: !important" with no actual value)
+        if (val_end > val_start) {
+            // Sanity check: property name length
+            long prop_len = prop_end - prop_start;
+            if (prop_len > MAX_PROPERTY_NAME_LENGTH) {
+                DEBUG_PRINTF("[capture_declarations] Skipping property: name too long (%ld > %d)\n",
+                             prop_len, MAX_PROPERTY_NAME_LENGTH);
+                continue;
+            }
+            // Sanity check: value length
+            long val_len = val_end - val_start;
+            if (val_len > MAX_PROPERTY_VALUE_LENGTH) {
+                DEBUG_PRINTF("[capture_declarations] Skipping property: value too long (%ld > %d)\n",
+                             val_len, MAX_PROPERTY_VALUE_LENGTH);
+                continue;
+            }
+            // Create property string and lowercase it (CSS property names are ASCII-only)
+            VALUE property_raw = rb_usascii_str_new(prop_start, prop_len);
+            VALUE property = lowercase_property(property_raw);
+            VALUE value = rb_utf8_str_new(val_start, val_end - val_start);
+            DEBUG_PRINTF("[capture_declarations] Found: property='%s' value='%s' important=%d\n",
+                         RSTRING_PTR(property), RSTRING_PTR(value), is_important);
+            // Create Declaration struct
+            VALUE decl = rb_struct_new(
+                cDeclaration,
+                property,
+                value,
+                is_important ? Qtrue : Qfalse
+            );
+            rb_ary_push(*current_declarations, decl);
+            // Protect temporaries from GC (in case compiler optimizes them to registers)
+            RB_GC_GUARD(property);
+            RB_GC_GUARD(value);
+            RB_GC_GUARD(decl);
+        } else {
+            DEBUG_PRINTF("[capture_declarations] Skipping empty value for property at pos %td\n",
+                         (ptrdiff_t)(prop_start - css_string_base));
+        }
+        if (pos < clean_end && *pos == ';') pos++;  // Skip semicolon if present
+    }
+    // Free temporary buffer if allocated
+    if (clean_buffer) {
+        xfree(clean_buffer);
+    }
+    // Reset for next rule
+    *decl_start_ptr = NULL;
+}
+// Create Rule structs from current selectors and declarations
+void finish_rule_fn(
+    int inside_at_rule_block,
+    VALUE *current_selectors,
+    VALUE *current_declarations,
+    VALUE *current_media_types,
+    VALUE rules_by_media,  // Hash: {query_string => {media_types: [...], rules: [...]}}
+    const char **mark_ptr
+) {
+    // Skip if we're scanning at-rule block content (will be parsed recursively)
+    if (inside_at_rule_block) {
+        DEBUG_PRINTF("[finish_rule] SKIPPED (inside media block)\n");
+        goto cleanup;
+    }
+    // Create one rule for each selector in the list
+    if (NIL_P(*current_selectors) || NIL_P(*current_declarations)) {
+        goto cleanup;
+    }
+    long len = RARRAY_LEN(*current_selectors);
+    DEBUG_PRINTF("[finish_rule] Creating %ld rule(s)\n", len);
+    for (long i = 0; i < len; i++) {
+        VALUE sel = RARRAY_AREF(*current_selectors, i);
+        DEBUG_PRINTF("[finish_rule] Rule %ld: selector='%s'\n", i, RSTRING_PTR(sel));
+        // Determine media query string for grouping
+        VALUE query_string = Qnil;
+        VALUE media_types_array = Qnil;
+        if (!NIL_P(*current_media_types)) {
+            query_string = rb_hash_aref(*current_media_types, ID2SYM(rb_intern("query_string")));
+            media_types_array = rb_hash_aref(*current_media_types, ID2SYM(rb_intern("media_types")));
+            DEBUG_PRINTF("[finish_rule] current_media_types present, query_string=%s\n",
+                        NIL_P(query_string) ? "nil" : RSTRING_PTR(query_string));
+        } else {
+            DEBUG_PRINTF("[finish_rule] No media types (default/all)\n");
+        }
+        // query_string is nil for non-media rules (default/all)
+        // Create rule (media info stored at group level, not on rule)
+        VALUE rule = rb_struct_new(cRule,
+            sel,                                // selector
+            rb_ary_dup(*current_declarations),   // declarations
+            Qnil                                // specificity (calculated on demand)
+        );
+        // Get or create the group structure for this media query
+        VALUE group = rb_hash_aref(rules_by_media, query_string);
+        if (NIL_P(group)) {
+            // Create new group: {media_types: [...], rules: [...]}
+            group = rb_hash_new();
+            // Default to [:all] for non-media rules (css_parser gem compatibility)
+            VALUE media_types_for_group = NIL_P(media_types_array) ?
+                                         rb_ary_new_from_args(1, ID2SYM(rb_intern("all"))) :
+                                         media_types_array;
+            rb_hash_aset(group, ID2SYM(rb_intern("media_types")), media_types_for_group);
+            rb_hash_aset(group, ID2SYM(rb_intern("rules")), rb_ary_new());
+            rb_hash_aset(rules_by_media, query_string, group);
+        }
+        // Add rule to the group's rules array
+        VALUE rules_array = rb_hash_aref(group, ID2SYM(rb_intern("rules")));
+        rb_ary_push(rules_array, rule);
+    }
+cleanup:
+    *current_selectors = Qnil;
+    *current_declarations = Qnil;
+    // Reset mark for next rule (in case it wasn't reset by capture action)
+    *mark_ptr = NULL;
+}
+// Parse media query string and return hash with query string and media types
+// Returns: {query_string: "...", media_types: [...]}
+// Example: "screen and (min-width: 768px)" -> {query_string: "screen and (min-width: 768px)", media_types: [:screen]}
+// Example: "screen, print" -> {query_string: "screen, print", media_types: [:screen, :print]}
+//
+// Algorithm: Scan for identifiers (alphanumeric + dash), skip keywords and parens
+VALUE parse_media_query(const char *query_str, long query_len) {
+    VALUE mq_types = rb_ary_new();
+    const char *p = query_str;
+    const char *pe = query_str + query_len;
+    int in_parens = 0;
+    while (p < pe) {
+        // Skip whitespace
+        while (p < pe && IS_WHITESPACE(*p)) p++;
+        if (p >= pe) break;
+        // Track parentheses (skip content inside parens like "(min-width: 768px)")
+        if (*p == '(') {
+            in_parens++;
+            p++;
+            continue;
+        }
+        if (*p == ')') {
+            if (in_parens > 0) in_parens--;
+            p++;
+            continue;
+        }
+        // Skip non-identifier characters when not in parens
+        if (!in_parens && (*p == ',' || *p == ':' || *p == ';')) {
+            p++;
+            continue;
+        }
+        // Inside parens - skip everything
+        if (in_parens) {
+            p++;
+            continue;
+        }
+        // Check if this looks like an identifier start (letter or dash)
+        if ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || *p == '-') {
+            const char *ident_start = p;
+            // Scan identifier (letters, digits, dashes)
+            while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
+                              (*p >= '0' && *p <= '9') || *p == '-')) {
+                p++;
+            }
+            long ident_len = p - ident_start;
+            // Check if it's a keyword to skip
+            int is_keyword =
+                (ident_len == 3 && (strncmp(ident_start, "and", 3) == 0 || strncmp(ident_start, "not", 3) == 0)) ||
+                (ident_len == 2 && strncmp(ident_start, "or", 2) == 0) ||
+                (ident_len == 4 && strncmp(ident_start, "only", 4) == 0);
+            if (!is_keyword) {
+                // Capture as media type
+                ID media_id = rb_intern2(ident_start, ident_len);
+                VALUE media_sym = ID2SYM(media_id);
+                rb_ary_push(mq_types, media_sym);
+                DEBUG_PRINTF("[mq_parser] captured media type: %.*s\n", (int)ident_len, ident_start);
+            } else {
+                DEBUG_PRINTF("[mq_parser] skipped keyword: %.*s\n", (int)ident_len, ident_start);
+            }
+        } else {
+            // Not an identifier, skip character
+            p++;
+        }
+    }
+    // Return hash with both query string and media types array
+    VALUE result = rb_hash_new();
+    VALUE query_string = rb_utf8_str_new(query_str, query_len);
+    rb_hash_aset(result, ID2SYM(rb_intern("query_string")), query_string);
+    rb_hash_aset(result, ID2SYM(rb_intern("media_types")), mq_types);
+    return result;
+}
+// Helper: Copy string segment skipping comments
+// Allocates new buffer and returns it with new length
+static char* copy_without_comments(const char *start, const char *end, long *out_len) {
+    long max_len = end - start;
+    char *buffer = ALLOC_N(char, max_len);
+    char *write_pos = buffer;
+    const char *read_pos = start;
+    while (read_pos < end) {
+        // Check for comment start
+        if (read_pos + 1 < end && *read_pos == '/' && *(read_pos + 1) == '*') {
+            // Skip past comment
+            read_pos += 2;
+            while (read_pos + 1 < end) {
+                if (*read_pos == '*' && *(read_pos + 1) == '/') {
+                    read_pos += 2;
+                    break;
+                }
+                read_pos++;
+            }
+        } else {
+            // Copy character
+            *write_pos++ = *read_pos++;
+        }
+    }
+    *out_len = write_pos - buffer;
+    return buffer;
+}
+// Parse declarations string into array of Declaration structs
+// Used by parse_declarations Ruby wrapper
+VALUE parse_declarations_string(const char *start, const char *end) {
+    VALUE declarations = rb_ary_new();
+    // Fast path: check if there are any comments
+    int has_comments = 0;
+    for (const char *check = start; check + 1 < end; check++) {
+        if (*check == '/' && *(check + 1) == '*') {
+            has_comments = 1;
+            break;
+        }
+    }
+    // If there are comments, strip them first (rare case)
+    char *clean_buffer = NULL;
+    const char *clean_end = end;
+    if (has_comments) {
+        long clean_len;
+        clean_buffer = copy_without_comments(start, end, &clean_len);
+        start = clean_buffer;
+        clean_end = clean_buffer + clean_len;
+    }
+    const char *pos = start;
+    while (pos < clean_end) {
+        // Skip whitespace and semicolons
+        while (pos < clean_end && (IS_WHITESPACE(*pos) || *pos == ';')) pos++;
+        if (pos >= clean_end) break;
+        // Find property (up to colon)
+        const char *prop_start = pos;
+        while (pos < clean_end && *pos != ':') pos++;
+        if (pos >= clean_end) break;  // No colon found
+        const char *prop_end = pos;
+        trim_trailing(prop_start, &prop_end);
+        trim_leading(&prop_start, prop_end);
+        pos++;  // Skip colon
+        trim_leading(&pos, clean_end);
+        // Find value (up to semicolon or end), handling parentheses
+        const char *val_start = pos;
+        int paren_depth = 0;
+        while (pos < clean_end) {
+            if (*pos == '(') paren_depth++;
+            else if (*pos == ')') paren_depth--;
+            else if (*pos == ';' && paren_depth == 0) break;
+            pos++;
+        }
+        const char *val_end = pos;
+        trim_trailing(val_start, &val_end);
+        // Check for !important
+        int is_important = 0;
+        if (val_end - val_start >= 10) {  // strlen("!important") = 10
+            const char *check = val_end - 10;
+            while (check < val_end && IS_WHITESPACE(*check)) check++;
+            if (check < val_end && *check == '!') {
+                check++;
+                while (check < val_end && IS_WHITESPACE(*check)) check++;
+                if ((val_end - check) >= 9 && strncmp(check, "important", 9) == 0) {
+                    is_important = 1;
+                    const char *important_pos = check - 1;
+                    while (important_pos > val_start && (IS_WHITESPACE(*(important_pos-1)) || *(important_pos-1) == '!')) {
+                        important_pos--;
+                    }
+                    val_end = important_pos;
+                    trim_trailing(val_start, &val_end);
+                }
+            }
+        }
+        // Skip if value is empty
+        if (val_end > val_start) {
+            long prop_len = prop_end - prop_start;
+            if (prop_len > MAX_PROPERTY_NAME_LENGTH) continue;
+            long val_len = val_end - val_start;
+            if (val_len > MAX_PROPERTY_VALUE_LENGTH) continue;
+            // Create property string and lowercase it
+            VALUE property_raw = rb_usascii_str_new(prop_start, prop_len);
+            VALUE property = lowercase_property(property_raw);
+            VALUE value = rb_utf8_str_new(val_start, val_len);
+            // Create Declaration struct
+            VALUE decl = rb_struct_new(cDeclaration,
+                property, value, is_important ? Qtrue : Qfalse);
+            rb_ary_push(declarations, decl);
+        }
+    }
+    // Free temporary buffer if allocated
+    if (clean_buffer) {
+        xfree(clean_buffer);
+    }
+    return declarations;
+}
+// ============================================================================
+// Main CSS Parser
+// ============================================================================
+/*
+ * CSS parser implementation
+ *
+ * Parses selectors, declarations, and @rules. Creates Rule structs.
+ *
+ * @param css_string [String] CSS to parse
+ * @param depth [Integer] Recursion depth (for error handling)
+ * @param parent_media_query [VALUE] Parent media query hash (for nested @media), or Qnil
+ * @return [Hash] {query_string => [Rule]} grouped by media query
+ */
+VALUE parse_css_impl(VALUE css_string, int depth, VALUE parent_media_query) {
+    Check_Type(css_string, T_STRING);
+    const char *p = RSTRING_PTR(css_string);
+    const char *pe = p + RSTRING_LEN(css_string);
+    const char *css_string_base = p;
+    // State variables
+    ParserState state = STATE_INITIAL;
+    const char *mark = NULL;
+    const char *decl_start = NULL;
+    const char *selector_start = NULL;
+    // Ruby objects
+    VALUE rules_by_media = rb_hash_new();  // Hash: {query_string => {media_types: [...], rules: [...]}}
+    VALUE current_selectors = Qnil;
+    VALUE current_declarations = Qnil;
+    VALUE selector = Qnil;
+    VALUE current_media_types = parent_media_query;  // Inherit parent's media context
+    while (p < pe) {
+        char c = *p;
+        // Skip whitespace in most states
+        if (IS_WHITESPACE(c) && state != STATE_DECLARATIONS && state != STATE_SELECTOR) {
+            p++;
+            continue;
+        }
+        // Skip comments everywhere
+        if (c == '/' && p + 1 < pe && *(p + 1) == '*') {
+            // Find end of comment
+            p += 2;
+            while (p + 1 < pe) {
+                if (*p == '*' && *(p + 1) == '/') {
+                    p += 2;
+                    break;
+                }
+                p++;
+            }
+            continue;
+        }
+        switch (state) {
+            case STATE_INITIAL:
+                if (c == '@') {
+                    // @rule detected - parse it
+                    const char *at_start = p + 1;  // Skip @
+                    const char *at_end = at_start;
+                    // Find end of @rule name (until space or {)
+                    while (at_end < pe && !IS_WHITESPACE(*at_end) && *at_end != '{' && *at_end != ';') {
+                        at_end++;
+                    }
+                    long name_len = at_end - at_start;
+                    char at_name[256];
+                    if (name_len > 255) name_len = 255;
+                    strncpy(at_name, at_start, name_len);
+                    at_name[name_len] = '\0';
+                    DEBUG_PRINTF("[pure_c] @rule detected: @%s at pos %td\n", at_name, (ptrdiff_t)(p - css_string_base));
+                    // Skip to prelude start (after name, before {)
+                    p = at_end;
+                    while (p < pe && IS_WHITESPACE(*p)) p++;
+                    const char *prelude_start = p;
+                    // Check for statement-style @rule (ends with ;)
+                    const char *check = p;
+                    while (check < pe && *check != '{' && *check != ';') check++;
+                    if (check >= pe) {
+                        // Incomplete - skip
+                        p = pe;
+                        break;
+                    }
+                    if (*check == ';') {
+                        // Statement-style @rule (@charset, @import, etc.) - skip it
+                        p = check + 1;
+                        DEBUG_PRINTF("[pure_c] Skipped statement @rule @%s\n", at_name);
+                        break;
+                    }
+                    // Block-style @rule - find prelude end (the {)
+                    while (p < pe && *p != '{') p++;
+                    if (p >= pe) break;  // Incomplete
+                    const char *prelude_end = p;
+                    // Trim whitespace from prelude
+                    while (prelude_end > prelude_start && IS_WHITESPACE(*(prelude_end - 1))) {
+                        prelude_end--;
+                    }
+                    long prelude_len = prelude_end - prelude_start;
+                    p++;  // Skip opening {
+                    // Find matching closing brace
+                    int brace_depth = 1;
+                    const char *block_start = p;
+                    while (p < pe && brace_depth > 0) {
+                        if (*p == '{') {
+                            brace_depth++;
+                        } else if (*p == '}') {
+                            brace_depth--;
+                        } else if (*p == '/' && p + 1 < pe && *(p + 1) == '*') {
+                            // Skip comments when counting braces
+                            p += 2;
+                            while (p + 1 < pe && !(*p == '*' && *(p + 1) == '/')) p++;
+                            if (p + 1 < pe) p += 2;
+                            continue;
+                        }
+                        p++;
+                    }
+                    const char *block_end = p - 1;  // Before closing }
+                    long block_len = block_end - block_start;
+                    DEBUG_PRINTF("[pure_c] @%s block: %ld bytes\n", at_name, block_len);
+                    // Process based on @rule type
+                    if (strcmp(at_name, "media") == 0) {
+                        // Parse media query for this block
+                        VALUE media_query = parse_media_query(prelude_start, prelude_len);
+                        // Combine with parent media query if nested (per W3C spec)
+                        VALUE combined_media_query = media_query;
+                        if (!NIL_P(parent_media_query)) {
+                            VALUE parent_qs = rb_hash_aref(parent_media_query, ID2SYM(rb_intern("query_string")));
+                            VALUE current_qs = rb_hash_aref(media_query, ID2SYM(rb_intern("query_string")));
+                            // Combine: "screen" + " and " + "(min-width: 768px)" = "screen and (min-width: 768px)"
+                            VALUE combined_qs = rb_str_new_cstr("");
+                            if (!NIL_P(parent_qs)) rb_str_append(combined_qs, parent_qs);
+                            if (!NIL_P(parent_qs) && !NIL_P(current_qs)) rb_str_cat2(combined_qs, " and ");
+                            if (!NIL_P(current_qs)) rb_str_append(combined_qs, current_qs);
+                            // Combine media_types arrays (union of parent and current)
+                            VALUE parent_media_types = rb_hash_aref(parent_media_query, ID2SYM(rb_intern("media_types")));
+                            VALUE current_media_types = rb_hash_aref(media_query, ID2SYM(rb_intern("media_types")));
+                            VALUE combined_media_types = rb_ary_dup(parent_media_types);
+                            // Add current media types if they're not already in the array
+                            long current_len = RARRAY_LEN(current_media_types);
+                            for (long i = 0; i < current_len; i++) {
+                                VALUE media_type = RARRAY_AREF(current_media_types, i);
+                                if (!rb_ary_includes(combined_media_types, media_type)) {
+                                    rb_ary_push(combined_media_types, media_type);
+                                }
+                            }
+                            combined_media_query = rb_hash_new();
+                            rb_hash_aset(combined_media_query, ID2SYM(rb_intern("query_string")), combined_qs);
+                            rb_hash_aset(combined_media_query, ID2SYM(rb_intern("media_types")), combined_media_types);
+                        }
+                        // Recursively parse block content with combined media context
+                        VALUE block_content = rb_str_new(block_start, block_len);
+                        VALUE inner_hash = parse_css_impl(block_content, depth + 1, combined_media_query);
+                        // Merge inner_hash into our rules_by_media using rb_hash_foreach
+                        struct merge_hash_ctx merge_ctx = { rules_by_media };
+                        rb_hash_foreach(inner_hash, merge_hash_callback, (VALUE)&merge_ctx);
+                        RB_GC_GUARD(media_query);
+                        RB_GC_GUARD(combined_media_query);
+                        RB_GC_GUARD(block_content);
+                        RB_GC_GUARD(inner_hash);
+                    } else if (strcmp(at_name, "supports") == 0 || strcmp(at_name, "layer") == 0 ||
+                               strcmp(at_name, "container") == 0 || strcmp(at_name, "scope") == 0) {
+                        // Conditional group rules - recursively parse and merge
+                        VALUE block_content = rb_str_new(block_start, block_len);
+                        VALUE inner_hash = parse_css_impl(block_content, depth + 1, parent_media_query);
+                        // Merge inner_hash into rules_by_media using rb_hash_foreach
+                        struct merge_hash_ctx merge_ctx = { rules_by_media };
+                        rb_hash_foreach(inner_hash, merge_hash_callback, (VALUE)&merge_ctx);
+                        RB_GC_GUARD(block_content);
+                        RB_GC_GUARD(inner_hash);
+                    } else if (strstr(at_name, "keyframes") != NULL) {
+                        // @keyframes - create dummy rule with animation name
+                        // Strip whitespace without rb_funcall
+                        VALUE animation_name = strip_string(prelude_start, prelude_len);
+                        // Build selector: "@keyframes " + name
+                        VALUE sel = UTF8_STR("@");
+                        rb_str_cat(sel, at_name, strlen(at_name));
+                        rb_str_cat2(sel, " ");
+                        rb_str_append(sel, animation_name);
+                        VALUE rule = rb_struct_new(cRule,
+                            sel,                                    // selector
+                            rb_ary_new(),                          // declarations (empty)
+                            Qnil                                    // specificity
+                        );
+                        // Add to rules_by_media under current media context
+                        VALUE query_string = NIL_P(parent_media_query) ? Qnil :
+                                            rb_hash_aref(parent_media_query, ID2SYM(rb_intern("query_string")));
+                        VALUE media_types_array = NIL_P(parent_media_query) ?
+                                                 rb_ary_new_from_args(1, ID2SYM(rb_intern("all"))) :
+                                                 rb_hash_aref(parent_media_query, ID2SYM(rb_intern("media_types")));
+                        // Get or create group
+                        VALUE group = rb_hash_aref(rules_by_media, query_string);
+                        if (NIL_P(group)) {
+                            group = rb_hash_new();
+                            rb_hash_aset(group, ID2SYM(rb_intern("media_types")), media_types_array);
+                            rb_hash_aset(group, ID2SYM(rb_intern("rules")), rb_ary_new());
+                            rb_hash_aset(rules_by_media, query_string, group);
+                        }
+                        VALUE rules_array = rb_hash_aref(group, ID2SYM(rb_intern("rules")));
+                        rb_ary_push(rules_array, rule);
+                        RB_GC_GUARD(animation_name);
+                        RB_GC_GUARD(sel);
+                        RB_GC_GUARD(rule);
+                    } else if (strcmp(at_name, "font-face") == 0 || strcmp(at_name, "property") == 0 ||
+                               strcmp(at_name, "page") == 0 || strcmp(at_name, "counter-style") == 0) {
+                        // Descriptor-based @rules - parse block as declarations
+                        // Wrap in dummy selector for parsing
+                        VALUE wrapped = UTF8_STR("* { ");
+                        rb_str_cat(wrapped, block_start, block_len);
+                        rb_str_cat2(wrapped, " }");
+                        VALUE dummy_hash = parse_css_impl(wrapped, depth + 1, parent_media_query);
+                        VALUE declarations = Qnil;
+                        // Extract first rule from the dummy parse (should be under nil key)
+                        // dummy_hash structure: {query_string => {media_types: [...], rules: [...]}}
+                        VALUE dummy_group = rb_hash_aref(dummy_hash, Qnil);
+                        if (!NIL_P(dummy_group)) {
+                            VALUE dummy_rules = rb_hash_aref(dummy_group, ID2SYM(rb_intern("rules")));
+                            if (!NIL_P(dummy_rules) && RARRAY_LEN(dummy_rules) > 0) {
+                                VALUE first_rule = RARRAY_AREF(dummy_rules, 0);
+                                declarations = rb_struct_aref(first_rule, INT2FIX(RULE_DECLARATIONS));
+                                // Build selector: "@" + name + [" " + prelude]
+                                VALUE sel = UTF8_STR("@");
+                                rb_str_cat(sel, at_name, strlen(at_name));
+                                if (prelude_len > 0) {
+                                    // Strip whitespace without rb_funcall
+                                    VALUE prelude_val = strip_string(prelude_start, prelude_len);
+                                    if (RSTRING_LEN(prelude_val) > 0) {
+                                        rb_str_cat2(sel, " ");
+                                        rb_str_append(sel, prelude_val);
+                                    }
+                                    RB_GC_GUARD(prelude_val);
+                                }
+                                VALUE rule = rb_struct_new(cRule,
+                                    sel,                                    // selector
+                                    declarations,                           // declarations
+                                    Qnil                                    // specificity
+                                );
+                                // Add to rules_by_media under current media context
+                                VALUE query_string = NIL_P(parent_media_query) ? Qnil :
+                                                    rb_hash_aref(parent_media_query, ID2SYM(rb_intern("query_string")));
+                                VALUE media_types_array = NIL_P(parent_media_query) ?
+                                                         rb_ary_new_from_args(1, ID2SYM(rb_intern("all"))) :
+                                                         rb_hash_aref(parent_media_query, ID2SYM(rb_intern("media_types")));
+                                // Get or create group
+                                VALUE group = rb_hash_aref(rules_by_media, query_string);
+                                if (NIL_P(group)) {
+                                    group = rb_hash_new();
+                                    rb_hash_aset(group, ID2SYM(rb_intern("media_types")), media_types_array);
+                                    rb_hash_aset(group, ID2SYM(rb_intern("rules")), rb_ary_new());
+                                    rb_hash_aset(rules_by_media, query_string, group);
+                                }
+                                VALUE rules_array = rb_hash_aref(group, ID2SYM(rb_intern("rules")));
+                                rb_ary_push(rules_array, rule);
+                                RB_GC_GUARD(sel);
+                                RB_GC_GUARD(rule);
+                            }
+                        }
+                        RB_GC_GUARD(wrapped);
+                        RB_GC_GUARD(dummy_hash);
+                        RB_GC_GUARD(dummy_group);
+                        RB_GC_GUARD(declarations);
+                    } else {
+                        // Unknown @rule - skip it
+                        DEBUG_PRINTF("[pure_c] Skipping unknown @rule: @%s\n", at_name);
+                    }
+                } else if (c == '}') {
+                    // Stray closing brace - ignore
+                    p++;
+                } else if (!IS_WHITESPACE(c)) {
+                    // Start of selector
+                    selector_start = p;
+                    state = STATE_SELECTOR;
+                    DEBUG_PRINTF("[pure_c] Starting selector at pos %td\n", (ptrdiff_t)(p - css_string_base));
+                }
+                break;
+            case STATE_SELECTOR:
+                if (c == '{') {
+                    // End of selector, start of declarations
+                    if (selector_start != NULL) {
+                        const char *selector_end = p;
+                        // Trim trailing whitespace
+                        while (selector_end > selector_start && IS_WHITESPACE(*(selector_end - 1))) {
+                            selector_end--;
+                        }
+                        // Split on comma and capture each selector
+                        const char *seg_start = selector_start;
+                        const char *seg = selector_start;
+                        if (NIL_P(current_selectors)) {
+                            current_selectors = rb_ary_new();
+                        }
+                        while (seg <= selector_end) {
+                            if (seg == selector_end || *seg == ',') {
+                                // Capture segment
+                                const char *seg_end = seg;
+                                // Trim whitespace from segment
+                                while (seg_end > seg_start && IS_WHITESPACE(*(seg_end - 1))) {
+                                    seg_end--;
+                                }
+                                while (seg_start < seg_end && IS_WHITESPACE(*seg_start)) {
+                                    seg_start++;
+                                }
+                                if (seg_end > seg_start) {
+                                    VALUE sel = rb_utf8_str_new(seg_start, seg_end - seg_start);
+                                    rb_ary_push(current_selectors, sel);
+                                    DEBUG_PRINTF("[pure_c] Captured selector: '%s'\n", RSTRING_PTR(sel));
+                                }
+                                seg_start = seg + 1;  // Skip comma
+                            }
+                            seg++;
+                        }
+                        selector_start = NULL;
+                    }
+                    p++;  // Skip {
+                    decl_start = p;
+                    state = STATE_DECLARATIONS;
+                    DEBUG_PRINTF("[pure_c] Starting declarations at pos %td\n", (ptrdiff_t)(p - css_string_base));
+                } else {
+                    // Continue parsing selector
+                    p++;
+                }
+                break;
+            case STATE_DECLARATIONS:
+                if (c == '}') {
+                    // End of declaration block
+                    // Capture declarations
+                    capture_declarations_fn(&decl_start, p, &current_declarations, css_string_base);
+                    // Create rule(s)
+                    finish_rule_fn(0, &current_selectors, &current_declarations,
+                                   &current_media_types, rules_by_media, &mark);
+                    p++;  // Skip }
+                    state = STATE_INITIAL;
+                    DEBUG_PRINTF("[pure_c] Finished rule, back to initial at pos %td\n", (ptrdiff_t)(p - css_string_base));
+                } else {
+                    // Continue parsing declarations
+                    p++;
+                }
+                break;
+        }
+    }
+    // Cleanup: if we ended in the middle of parsing, try to finish
+    if (state == STATE_DECLARATIONS && decl_start != NULL) {
+        capture_declarations_fn(&decl_start, p, &current_declarations, css_string_base);
+        finish_rule_fn(0, &current_selectors, &current_declarations,
+                       &current_media_types, rules_by_media, &mark);
+    }
+    return rules_by_media;
+}