cataract 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,11 @@
12
12
 
13
13
  #include "cataract.h"
14
14
  #include <string.h>
15
+ #include <stdint.h>
16
+
17
+ // Use uint8_t for boolean flags to reduce struct size and improve cache efficiency
18
+ // (int is 4 bytes, uint8_t is 1 byte - saves 27 bytes across 9 flags)
19
+ #define BOOLEAN uint8_t
15
20
 
16
21
  // Parser context passed through recursive calls
17
22
  typedef struct {
@@ -19,13 +24,29 @@ typedef struct {
19
24
  VALUE media_index; // Hash: Symbol => Array of rule IDs
20
25
  VALUE selector_lists; // Hash: list_id => Array of rule IDs
21
26
  VALUE imports_array; // Array of ImportStatement structs
27
+ VALUE media_queries; // Array of MediaQuery structs
28
+ VALUE media_query_lists; // Hash: list_id => Array of MediaQuery IDs
22
29
  int rule_id_counter; // Next rule ID (0-indexed)
23
30
  int next_selector_list_id; // Next selector list ID (0-indexed)
31
+ int media_query_id_counter; // Next MediaQuery ID (0-indexed)
32
+ int next_media_query_list_id; // Next media query list ID (0-indexed)
24
33
  int media_query_count; // Safety limit for media queries
25
34
  st_table *media_cache; // Parse-time cache: string => parsed media types
26
- int has_nesting; // Set to 1 if any nested rules are created
27
- int selector_lists_enabled; // Parser option: track selector lists (1=enabled, 0=disabled)
28
- int depth; // Current recursion depth (safety limit)
35
+ BOOLEAN has_nesting; // Set to 1 if any nested rules are created
36
+ BOOLEAN selector_lists_enabled; // Parser option: track selector lists (1=enabled, 0=disabled)
37
+ BOOLEAN depth; // Current recursion depth (safety limit)
38
+ // URL conversion options
39
+ VALUE base_uri; // Base URI for resolving relative URLs (Qnil if disabled)
40
+ VALUE uri_resolver; // Proc to call for URL resolution (Qnil for default)
41
+ BOOLEAN absolute_paths; // Whether to convert relative URLs to absolute
42
+ // Parse error checking options
43
+ VALUE css_string; // Full CSS string for error position calculation
44
+ BOOLEAN check_empty_values; // Raise error on empty declaration values
45
+ BOOLEAN check_malformed_declarations; // Raise error on declarations without colons
46
+ BOOLEAN check_invalid_selectors; // Raise error on empty/malformed selectors
47
+ BOOLEAN check_invalid_selector_syntax; // Raise error on syntax violations (.. ## etc)
48
+ BOOLEAN check_malformed_at_rules; // Raise error on @media/@supports without conditions
49
+ BOOLEAN check_unclosed_blocks; // Raise error on missing closing braces
29
50
  } ParserContext;
30
51
 
31
52
  // Macro to skip CSS comments /* ... */
@@ -55,6 +76,20 @@ static inline const char* find_matching_brace(const char *start, const char *end
55
76
  return p;
56
77
  }
57
78
 
79
+ // Find matching closing brace with strict error checking
80
+ // Input: start = position after opening '{', end = limit, check_unclosed = whether to raise error
81
+ // Returns: pointer to matching '}' (raises error if not found and check_unclosed is true)
82
+ static inline const char* find_matching_brace_strict(const char *start, const char *end, int check_unclosed) {
83
+ const char *closing_brace = find_matching_brace(start, end);
84
+
85
+ // Check if we found the closing brace
86
+ if (check_unclosed && closing_brace >= end) {
87
+ rb_raise(eParseError, "Unclosed block: missing closing brace");
88
+ }
89
+
90
+ return closing_brace;
91
+ }
92
+
58
93
  // Find matching closing paren
59
94
  // Input: start = position after opening '(', end = limit
60
95
  // Returns: pointer to matching ')' (or end if not found)
@@ -70,6 +105,99 @@ static inline const char* find_matching_paren(const char *start, const char *end
70
105
  return p;
71
106
  }
72
107
 
108
+ // Helper function to raise ParseError with automatic position calculation
109
+ // Does not return - raises error and exits
110
+ __attribute__((noreturn))
111
+ static void raise_parse_error_at(ParserContext *ctx, const char *error_pos, const char *message, const char *error_type) {
112
+ const char *css = RSTRING_PTR(ctx->css_string);
113
+ long pos = error_pos - css;
114
+
115
+ // Build keyword args hash
116
+ VALUE kwargs = rb_hash_new();
117
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
118
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(pos));
119
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern(error_type)));
120
+
121
+ // Create ParseError instance
122
+ VALUE msg_str = rb_str_new_cstr(message);
123
+ VALUE argv[2] = {msg_str, kwargs};
124
+ VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
125
+
126
+ // Raise the error
127
+ rb_exc_raise(error);
128
+ }
129
+
130
+ // Check if a selector contains only valid CSS selector characters and sequences
131
+ // Returns 1 if valid, 0 if invalid
132
+ // Valid characters: a-z A-Z 0-9 - _ . # [ ] : * > + ~ ( ) ' " = ^ $ | \ & % / whitespace
133
+ static inline int is_valid_selector(const char *start, const char *end) {
134
+ const char *p = start;
135
+ while (p < end) {
136
+ unsigned char c = (unsigned char)*p;
137
+
138
+ // Check for invalid character sequences
139
+ if (p + 1 < end) {
140
+ // Double dot (..) is invalid
141
+ if (c == '.' && *(p + 1) == '.') {
142
+ return 0;
143
+ }
144
+ // Double hash (##) is invalid
145
+ if (c == '#' && *(p + 1) == '#') {
146
+ return 0;
147
+ }
148
+ }
149
+
150
+ // Alphanumeric
151
+ if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) {
152
+ p++;
153
+ continue;
154
+ }
155
+
156
+ // Whitespace
157
+ if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
158
+ p++;
159
+ continue;
160
+ }
161
+
162
+ // Valid CSS selector special characters
163
+ switch (c) {
164
+ case '-': // Hyphen (in identifiers, attribute selectors)
165
+ case '_': // Underscore (in identifiers)
166
+ case '.': // Class selector
167
+ case '#': // ID selector
168
+ case '[': // Attribute selector start
169
+ case ']': // Attribute selector end
170
+ case ':': // Pseudo-class/element (:: is valid for pseudo-elements)
171
+ case '*': // Universal selector, attribute operator
172
+ case '>': // Child combinator
173
+ case '+': // Adjacent sibling combinator
174
+ case '~': // General sibling combinator
175
+ case '(': // Pseudo-class function
176
+ case ')': // Pseudo-class function end
177
+ case '\'': // String in attribute selector
178
+ case '"': // String in attribute selector
179
+ case '=': // Attribute operator
180
+ case '^': // Attribute operator ^=
181
+ case '$': // Attribute operator $=
182
+ case '|': // Attribute operator |=, namespace separator
183
+ case '\\': // Escape character
184
+ case '&': // Nesting selector
185
+ case '%': // Sometimes used in selectors
186
+ case '/': // Sometimes used in selectors
187
+ case '!': // Negation (though rare)
188
+ case ',': // List separator (shouldn't be here after splitting, but allow it)
189
+ p++;
190
+ break;
191
+
192
+ default:
193
+ // Invalid character found
194
+ return 0;
195
+ }
196
+ }
197
+
198
+ return 1;
199
+ }
200
+
73
201
  // Lowercase property name (CSS property names are ASCII-only)
74
202
  // Non-static so merge_new.c can use it
75
203
  VALUE lowercase_property(VALUE property_str) {
@@ -169,7 +297,7 @@ static VALUE resolve_nested_selector(VALUE parent_selector, const char *nested_s
169
297
  long parent_len = RSTRING_LEN(parent_selector);
170
298
 
171
299
  // Check if nested selector contains &
172
- int has_ampersand = 0;
300
+ BOOLEAN has_ampersand = 0;
173
301
  for (long i = 0; i < nested_len; i++) {
174
302
  if (nested_sel[i] == '&') {
175
303
  has_ampersand = 1;
@@ -384,12 +512,188 @@ static void update_media_index(ParserContext *ctx, VALUE media_sym, int rule_id)
384
512
  }
385
513
 
386
514
  // Add to full query symbol (after media types for insertion order)
387
- add_to_media_index(ctx->media_index, media_sym, rule_id);
515
+ // BUT: skip if it contains a comma (comma-separated list like "screen, print")
516
+ // because we already added each individual type above
517
+ int has_comma = 0;
518
+ for (long i = 0; i < query_len; i++) {
519
+ if (query[i] == ',') {
520
+ has_comma = 1;
521
+ break;
522
+ }
523
+ }
524
+ if (!has_comma) {
525
+ add_to_media_index(ctx->media_index, media_sym, rule_id);
526
+ }
388
527
 
389
528
  // Guard media_str since we extracted C pointer and called extract_media_types (which allocates)
390
529
  RB_GC_GUARD(media_str);
391
530
  }
392
531
 
532
+ // Helper struct for passing arguments to resolver callback
533
+ typedef struct {
534
+ VALUE uri_resolver;
535
+ VALUE base_uri;
536
+ VALUE url_str;
537
+ } ResolverArgs;
538
+
539
+ // Callback for rb_protect to call the resolver proc
540
+ static VALUE call_resolver(VALUE arg) {
541
+ ResolverArgs *args = (ResolverArgs *)arg;
542
+ return rb_funcall(args->uri_resolver, rb_intern("call"), 2, args->base_uri, args->url_str);
543
+ }
544
+
545
+ /*
546
+ * Convert relative URLs in a CSS value to absolute URLs
547
+ *
548
+ * Scans for url() patterns and resolves relative URLs using the resolver proc.
549
+ * Returns a new Ruby string with resolved URLs, or the original if no conversion needed.
550
+ */
551
+ static VALUE convert_urls_in_value(VALUE value_str, VALUE base_uri, VALUE uri_resolver) {
552
+ const char *val = RSTRING_PTR(value_str);
553
+ long len = RSTRING_LEN(value_str);
554
+
555
+ // Quick check: does value contain 'url('?
556
+ const char *url_check = val;
557
+ int has_url = 0;
558
+ while (url_check < val + len - 3) {
559
+ if ((*url_check == 'u' || *url_check == 'U') &&
560
+ (*(url_check + 1) == 'r' || *(url_check + 1) == 'R') &&
561
+ (*(url_check + 2) == 'l' || *(url_check + 2) == 'L') &&
562
+ *(url_check + 3) == '(') {
563
+ has_url = 1;
564
+ break;
565
+ }
566
+ url_check++;
567
+ }
568
+ if (!has_url) return value_str;
569
+
570
+ // Build result string
571
+ VALUE result = rb_str_new("", 0);
572
+ const char *pos = val;
573
+
574
+ while (pos < val + len) {
575
+ // Look for 'url(' - case insensitive
576
+ if (pos + 3 < val + len &&
577
+ (*pos == 'u' || *pos == 'U') &&
578
+ (*(pos + 1) == 'r' || *(pos + 1) == 'R') &&
579
+ (*(pos + 2) == 'l' || *(pos + 2) == 'L') &&
580
+ *(pos + 3) == '(') {
581
+
582
+ // Append 'url('
583
+ rb_str_cat(result, "url(", 4);
584
+ pos += 4;
585
+
586
+ // Skip whitespace after (
587
+ while (pos < val + len && IS_WHITESPACE(*pos)) pos++;
588
+
589
+ // Determine quote character (if any)
590
+ char quote = 0;
591
+ if (pos < val + len && (*pos == '\'' || *pos == '"')) {
592
+ quote = *pos;
593
+ pos++;
594
+ }
595
+
596
+ // Find end of URL
597
+ const char *url_start = pos;
598
+ if (quote) {
599
+ // Quoted URL - find closing quote
600
+ while (pos < val + len && *pos != quote) {
601
+ if (*pos == '\\' && pos + 1 < val + len) {
602
+ pos += 2; // Skip escaped char
603
+ } else {
604
+ pos++;
605
+ }
606
+ }
607
+ } else {
608
+ // Unquoted URL - find ) or whitespace
609
+ while (pos < val + len && *pos != ')' && !IS_WHITESPACE(*pos)) {
610
+ pos++;
611
+ }
612
+ }
613
+ const char *url_end = pos;
614
+
615
+ // Extract URL string
616
+ long url_len = url_end - url_start;
617
+ VALUE url_str = rb_str_new(url_start, url_len);
618
+
619
+ // Check if URL needs resolution (is relative)
620
+ int needs_resolution = 0;
621
+ if (url_len > 0) {
622
+ // Check for absolute URLs or data URIs that don't need resolution
623
+ const char *u = url_start;
624
+ if ((url_len >= 5 && strncmp(u, "data:", 5) == 0) ||
625
+ (url_len >= 7 && strncmp(u, "http://", 7) == 0) ||
626
+ (url_len >= 8 && strncmp(u, "https://", 8) == 0) ||
627
+ (url_len >= 2 && strncmp(u, "//", 2) == 0) ||
628
+ (url_len >= 1 && *u == '#')) { // Fragment reference
629
+ needs_resolution = 0;
630
+ } else {
631
+ needs_resolution = 1;
632
+ }
633
+ }
634
+
635
+ if (needs_resolution) {
636
+ // Resolve using the resolver proc (always provided by Ruby side)
637
+ // Wrap in rb_protect to catch exceptions
638
+ ResolverArgs args = { uri_resolver, base_uri, url_str };
639
+ int state = 0;
640
+ VALUE resolved = rb_protect(call_resolver, (VALUE)&args, &state);
641
+
642
+ if (state) {
643
+ // Exception occurred - preserve original URL
644
+ rb_set_errinfo(Qnil); // Clear exception
645
+ if (quote) {
646
+ rb_str_cat(result, &quote, 1);
647
+ rb_str_append(result, url_str);
648
+ rb_str_cat(result, &quote, 1);
649
+ } else {
650
+ rb_str_append(result, url_str);
651
+ }
652
+ } else {
653
+ // Output with single quotes (canonical format)
654
+ rb_str_cat(result, "'", 1);
655
+ rb_str_append(result, resolved);
656
+ rb_str_cat(result, "'", 1);
657
+ }
658
+
659
+ RB_GC_GUARD(resolved);
660
+ } else {
661
+ // Keep original URL with original quoting
662
+ if (quote) {
663
+ rb_str_cat(result, &quote, 1);
664
+ rb_str_append(result, url_str);
665
+ rb_str_cat(result, &quote, 1);
666
+ } else {
667
+ rb_str_append(result, url_str);
668
+ }
669
+ }
670
+
671
+ RB_GC_GUARD(url_str);
672
+
673
+ // Skip closing quote if present
674
+ if (quote && pos < val + len && *pos == quote) {
675
+ pos++;
676
+ }
677
+
678
+ // Skip whitespace before )
679
+ while (pos < val + len && IS_WHITESPACE(*pos)) pos++;
680
+
681
+ // Skip closing )
682
+ if (pos < val + len && *pos == ')') {
683
+ rb_str_cat(result, ")", 1);
684
+ pos++;
685
+ }
686
+ } else {
687
+ // Regular character - append to result
688
+ rb_str_cat(result, pos, 1);
689
+ pos++;
690
+ }
691
+ }
692
+
693
+ RB_GC_GUARD(result);
694
+ return result;
695
+ }
696
+
393
697
  /*
394
698
  * Parse declaration block into array of Declaration structs
395
699
  *
@@ -403,7 +707,7 @@ static void update_media_index(ParserContext *ctx, VALUE media_sym, int rule_id)
403
707
  * - Values containing parentheses (e.g., url(...), rgba(...))
404
708
  * - !important flag
405
709
  */
406
- static VALUE parse_declarations(const char *start, const char *end) {
710
+ static VALUE parse_declarations(const char *start, const char *end, ParserContext *ctx) {
407
711
  VALUE declarations = rb_ary_new();
408
712
 
409
713
  const char *pos = start;
@@ -422,6 +726,46 @@ static VALUE parse_declarations(const char *start, const char *end) {
422
726
 
423
727
  // Malformed declaration - skip to next semicolon to recover
424
728
  if (pos >= end || *pos != ':') {
729
+ if (ctx->check_malformed_declarations) {
730
+ // Extract property text for error message
731
+ const char *prop_text_end = pos;
732
+ trim_trailing(prop_start, &prop_text_end);
733
+ long prop_text_len = prop_text_end - prop_start;
734
+
735
+ const char *css = RSTRING_PTR(ctx->css_string);
736
+ long error_pos = prop_start - css;
737
+
738
+ if (prop_text_len == 0) {
739
+ // Build keyword args hash
740
+ VALUE kwargs = rb_hash_new();
741
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
742
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
743
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("malformed_declaration")));
744
+
745
+ VALUE msg_str = rb_str_new_cstr("Malformed declaration: missing property name");
746
+ VALUE argv[2] = {msg_str, kwargs};
747
+ VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
748
+ rb_exc_raise(error);
749
+ } else {
750
+ // Limit property name to 200 chars in error message
751
+ int display_len = (prop_text_len > 200) ? 200 : (int)prop_text_len;
752
+ char error_msg[256];
753
+ snprintf(error_msg, sizeof(error_msg),
754
+ "Malformed declaration: missing colon after '%.*s'",
755
+ display_len, prop_start);
756
+
757
+ // Build keyword args hash
758
+ VALUE kwargs = rb_hash_new();
759
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
760
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
761
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("malformed_declaration")));
762
+
763
+ VALUE msg_str = rb_str_new_cstr(error_msg);
764
+ VALUE argv[2] = {msg_str, kwargs};
765
+ VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
766
+ rb_exc_raise(error);
767
+ }
768
+ }
425
769
  while (pos < end && *pos != ';') pos++;
426
770
  if (pos < end) pos++; // Skip the semicolon
427
771
  continue;
@@ -461,7 +805,7 @@ static VALUE parse_declarations(const char *start, const char *end) {
461
805
  trim_trailing(val_start, &val_end);
462
806
 
463
807
  // Check for !important
464
- int is_important = 0;
808
+ BOOLEAN is_important = 0;
465
809
  if (val_end - val_start >= 10) { // strlen("!important") = 10
466
810
  const char *check = val_end - 10;
467
811
  while (check < val_end && IS_WHITESPACE(*check)) check++;
@@ -483,6 +827,34 @@ static VALUE parse_declarations(const char *start, const char *end) {
483
827
  // Final trim
484
828
  trim_trailing(val_start, &val_end);
485
829
 
830
+ // Check for empty value
831
+ if (val_end <= val_start && ctx->check_empty_values) {
832
+ long prop_len = prop_end - prop_start;
833
+ const char *css = RSTRING_PTR(ctx->css_string);
834
+ long error_pos = val_start - css;
835
+
836
+ // Build error message
837
+ int display_len = (prop_len > 200) ? 200 : (int)prop_len;
838
+ char error_msg[256];
839
+ snprintf(error_msg, sizeof(error_msg),
840
+ "Empty value for property '%.*s'",
841
+ display_len, prop_start);
842
+
843
+ // Build keyword args hash
844
+ VALUE kwargs = rb_hash_new();
845
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
846
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
847
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("empty_value")));
848
+
849
+ // Create ParseError instance: ParseError.new(message, **kwargs)
850
+ VALUE msg_str = rb_str_new_cstr(error_msg);
851
+ VALUE argv[2] = {msg_str, kwargs};
852
+ VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
853
+
854
+ // Raise the error
855
+ rb_exc_raise(error);
856
+ }
857
+
486
858
  // Skip if value is empty
487
859
  if (val_end > val_start) {
488
860
  long prop_len = prop_end - prop_start;
@@ -513,6 +885,11 @@ static VALUE parse_declarations(const char *start, const char *end) {
513
885
  }
514
886
  VALUE value = rb_utf8_str_new(val_start, val_len);
515
887
 
888
+ // Convert relative URLs to absolute if enabled
889
+ if (ctx && ctx->absolute_paths && !NIL_P(ctx->base_uri)) {
890
+ value = convert_urls_in_value(value, ctx->base_uri, ctx->uri_resolver);
891
+ }
892
+
516
893
  // Create Declaration struct
517
894
  VALUE decl = rb_struct_new(cDeclaration,
518
895
  property,
@@ -531,7 +908,7 @@ static VALUE parse_declarations(const char *start, const char *end) {
531
908
 
532
909
  // Forward declarations
533
910
  static void parse_css_recursive(ParserContext *ctx, const char *css, const char *pe,
534
- VALUE parent_media_sym, VALUE parent_selector, VALUE parent_rule_id);
911
+ VALUE parent_media_sym, VALUE parent_selector, VALUE parent_rule_id, int parent_media_query_id);
535
912
  static VALUE combine_media_queries(VALUE parent, VALUE child);
536
913
 
537
914
  /*
@@ -630,7 +1007,7 @@ static VALUE intern_media_query_safe(ParserContext *ctx, const char *query_str,
630
1007
  * Returns: Array of declarations (only the declarations, not nested rules)
631
1008
  */
632
1009
  static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char *end,
633
- VALUE parent_selector, VALUE parent_rule_id, VALUE parent_media_sym) {
1010
+ VALUE parent_selector, VALUE parent_rule_id, VALUE parent_media_sym, int parent_media_query_id) {
634
1011
  // Check recursion depth to prevent stack overflow
635
1012
  if (ctx->depth > MAX_PARSE_DEPTH) {
636
1013
  rb_raise(eDepthError,
@@ -661,23 +1038,109 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
661
1038
  }
662
1039
  if (media_query_end >= end) break;
663
1040
 
664
- // Extract media query
1041
+ // Extract media query string
665
1042
  const char *media_query_start = media_start;
666
1043
  const char *media_query_end_trimmed = media_query_end;
667
1044
  trim_trailing(media_query_start, &media_query_end_trimmed);
668
- VALUE media_sym = intern_media_query_safe(ctx, media_query_start, media_query_end_trimmed - media_query_start);
1045
+
1046
+ // Parse media query and create MediaQuery object
1047
+ const char *mq_ptr = media_query_start;
1048
+ VALUE media_type;
1049
+ VALUE media_conditions = Qnil;
1050
+
1051
+ if (*mq_ptr == '(') {
1052
+ // Starts with '(' - just conditions, type defaults to :all
1053
+ media_type = ID2SYM(rb_intern("all"));
1054
+ media_conditions = rb_utf8_str_new(mq_ptr, media_query_end_trimmed - mq_ptr);
1055
+ } else {
1056
+ // Extract media type (first word)
1057
+ const char *type_start = mq_ptr;
1058
+ while (mq_ptr < media_query_end_trimmed && !IS_WHITESPACE(*mq_ptr) && *mq_ptr != '(') mq_ptr++;
1059
+ VALUE type_str = rb_utf8_str_new(type_start, mq_ptr - type_start);
1060
+ media_type = ID2SYM(rb_intern_str(type_str));
1061
+
1062
+ // Skip "and" keyword if present
1063
+ while (mq_ptr < media_query_end_trimmed && IS_WHITESPACE(*mq_ptr)) mq_ptr++;
1064
+ if (mq_ptr + 3 <= media_query_end_trimmed && strncmp(mq_ptr, "and", 3) == 0) {
1065
+ mq_ptr += 3;
1066
+ while (mq_ptr < media_query_end_trimmed && IS_WHITESPACE(*mq_ptr)) mq_ptr++;
1067
+ }
1068
+ if (mq_ptr < media_query_end_trimmed) {
1069
+ media_conditions = rb_utf8_str_new(mq_ptr, media_query_end_trimmed - mq_ptr);
1070
+ }
1071
+ }
1072
+
1073
+ // Create MediaQuery object
1074
+ VALUE media_query = rb_struct_new(cMediaQuery,
1075
+ INT2FIX(ctx->media_query_id_counter),
1076
+ media_type,
1077
+ media_conditions
1078
+ );
1079
+ rb_ary_push(ctx->media_queries, media_query);
1080
+ int nested_media_query_id = ctx->media_query_id_counter;
1081
+ ctx->media_query_id_counter++;
669
1082
 
670
1083
  p = media_query_end + 1; // Skip {
671
1084
 
672
1085
  // Find matching closing brace
673
1086
  const char *media_block_start = p;
674
- const char *media_block_end = find_matching_brace(p, end);
1087
+ const char *media_block_end = find_matching_brace_strict(p, end, ctx->check_unclosed_blocks);
675
1088
  p = media_block_end;
676
1089
 
677
1090
  if (p < end) p++; // Skip }
678
1091
 
679
- // Combine media queries: parent + child
680
- VALUE combined_media_sym = combine_media_queries(parent_media_sym, media_sym);
1092
+ // Handle combining media queries when parent has media too
1093
+ int combined_media_query_id = nested_media_query_id;
1094
+ if (parent_media_query_id >= 0) {
1095
+ // Get parent MediaQuery
1096
+ VALUE parent_mq = rb_ary_entry(ctx->media_queries, parent_media_query_id);
1097
+
1098
+ // This should never happen - parent_media_query_id should always be valid
1099
+ if (NIL_P(parent_mq)) {
1100
+ rb_raise(eParseError,
1101
+ "Invalid parent_media_query_id: %d (not found in media_queries array)",
1102
+ parent_media_query_id);
1103
+ }
1104
+
1105
+ VALUE parent_type = rb_struct_aref(parent_mq, INT2FIX(1)); // type field
1106
+ VALUE parent_conditions = rb_struct_aref(parent_mq, INT2FIX(2)); // conditions field
1107
+
1108
+ // Combine: parent conditions + " and " + child conditions
1109
+ VALUE combined_conditions;
1110
+ if (!NIL_P(parent_conditions) && !NIL_P(media_conditions)) {
1111
+ combined_conditions = rb_str_new_cstr("");
1112
+ rb_str_append(combined_conditions, parent_conditions);
1113
+ rb_str_cat2(combined_conditions, " and ");
1114
+ rb_str_append(combined_conditions, media_conditions);
1115
+ } else if (!NIL_P(parent_conditions)) {
1116
+ combined_conditions = parent_conditions;
1117
+ } else {
1118
+ combined_conditions = media_conditions;
1119
+ }
1120
+
1121
+ // Determine combined type (if parent is :all, use child type; if child is :all, use parent type; if both have types, use parent type)
1122
+ VALUE combined_type;
1123
+ ID all_id = rb_intern("all");
1124
+ if (SYM2ID(parent_type) == all_id) {
1125
+ combined_type = media_type;
1126
+ } else {
1127
+ combined_type = parent_type;
1128
+ }
1129
+
1130
+ // Create combined MediaQuery
1131
+ VALUE combined_mq = rb_struct_new(cMediaQuery,
1132
+ INT2FIX(ctx->media_query_id_counter),
1133
+ combined_type,
1134
+ combined_conditions
1135
+ );
1136
+ rb_ary_push(ctx->media_queries, combined_mq);
1137
+ combined_media_query_id = ctx->media_query_id_counter;
1138
+ ctx->media_query_id_counter++;
1139
+
1140
+ // Guard combined_conditions since we built it with rb_str_new_cstr/rb_str_append
1141
+ // and it's used in rb_struct_new above (rb_ary_push could trigger GC)
1142
+ RB_GC_GUARD(combined_conditions);
1143
+ }
681
1144
 
682
1145
  // Parse the block with parse_mixed_block to support further nesting
683
1146
  // Create a rule ID for this media rule
@@ -690,10 +1153,11 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
690
1153
  // Parse mixed block (may contain declarations and/or nested @media)
691
1154
  ctx->depth++;
692
1155
  VALUE media_declarations = parse_mixed_block(ctx, media_block_start, media_block_end,
693
- parent_selector, INT2FIX(media_rule_id), combined_media_sym);
1156
+ parent_selector, INT2FIX(media_rule_id), Qnil, combined_media_query_id);
694
1157
  ctx->depth--;
695
1158
 
696
1159
  // Create rule with the parent selector and declarations, associated with combined media query
1160
+ VALUE media_query_id_val = INT2FIX(combined_media_query_id);
697
1161
  VALUE rule = rb_struct_new(cRule,
698
1162
  INT2FIX(media_rule_id),
699
1163
  parent_selector,
@@ -701,7 +1165,8 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
701
1165
  Qnil, // specificity
702
1166
  parent_rule_id, // Link to parent for nested @media serialization
703
1167
  Qnil, // nesting_style (nil for @media nesting)
704
- Qnil // selector_list_id
1168
+ Qnil, // selector_list_id
1169
+ media_query_id_val // media_query_id from parent context
705
1170
  );
706
1171
 
707
1172
  // Mark that we have nesting (only set once)
@@ -711,7 +1176,13 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
711
1176
 
712
1177
  // Replace placeholder with actual rule
713
1178
  rb_ary_store(ctx->rules_array, parent_pos, rule);
714
- update_media_index(ctx, combined_media_sym, media_rule_id);
1179
+
1180
+ // Update media_index using the MediaQuery's type symbol
1181
+ VALUE combined_mq = rb_ary_entry(ctx->media_queries, combined_media_query_id);
1182
+ if (!NIL_P(combined_mq)) {
1183
+ VALUE mq_type = rb_struct_aref(combined_mq, INT2FIX(1)); // type field
1184
+ update_media_index(ctx, mq_type, media_rule_id);
1185
+ }
715
1186
 
716
1187
  continue;
717
1188
  }
@@ -739,7 +1210,7 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
739
1210
  // Example: "& .child { font: 14px; }"
740
1211
  // ^nested_block_start ^nested_block_end (at })
741
1212
  const char *nested_block_start = p;
742
- const char *nested_block_end = find_matching_brace(p, end);
1213
+ const char *nested_block_end = find_matching_brace_strict(p, end, ctx->check_unclosed_blocks);
743
1214
  p = nested_block_end;
744
1215
 
745
1216
  if (p < end) p++; // Skip }
@@ -770,13 +1241,18 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
770
1241
  // Get rule ID
771
1242
  int rule_id = ctx->rule_id_counter++;
772
1243
 
1244
+ // Reserve position in rules array (ensures sequential IDs match array indices)
1245
+ long rule_position = RARRAY_LEN(ctx->rules_array);
1246
+ rb_ary_push(ctx->rules_array, Qnil); // Placeholder
1247
+
773
1248
  // Recursively parse nested block
774
1249
  ctx->depth++;
775
1250
  VALUE nested_declarations = parse_mixed_block(ctx, nested_block_start, nested_block_end,
776
- resolved_selector, INT2FIX(rule_id), parent_media_sym);
1251
+ resolved_selector, INT2FIX(rule_id), parent_media_sym, parent_media_query_id);
777
1252
  ctx->depth--;
778
1253
 
779
1254
  // Create rule for nested selector
1255
+ VALUE media_query_id_val = (parent_media_query_id >= 0) ? INT2FIX(parent_media_query_id) : Qnil;
780
1256
  VALUE rule = rb_struct_new(cRule,
781
1257
  INT2FIX(rule_id),
782
1258
  resolved_selector,
@@ -784,7 +1260,8 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
784
1260
  Qnil, // specificity
785
1261
  parent_rule_id,
786
1262
  nesting_style,
787
- Qnil // selector_list_id
1263
+ Qnil, // selector_list_id
1264
+ media_query_id_val // media_query_id from parent context
788
1265
  );
789
1266
 
790
1267
  // Mark that we have nesting (only set once)
@@ -792,7 +1269,8 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
792
1269
  ctx->has_nesting = 1;
793
1270
  }
794
1271
 
795
- rb_ary_push(ctx->rules_array, rule);
1272
+ // Replace placeholder with actual rule
1273
+ rb_ary_store(ctx->rules_array, rule_position, rule);
796
1274
  update_media_index(ctx, parent_media_sym, rule_id);
797
1275
  }
798
1276
 
@@ -821,7 +1299,7 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
821
1299
  trim_leading(&p, end);
822
1300
 
823
1301
  const char *val_start = p;
824
- int important = 0;
1302
+ BOOLEAN important = 0;
825
1303
 
826
1304
  // Find end of value (semicolon or closing brace or end)
827
1305
  while (p < end && *p != ';' && *p != '}') p++;
@@ -874,6 +1352,11 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
874
1352
  }
875
1353
  VALUE value = rb_utf8_str_new(val_start, val_len);
876
1354
 
1355
+ // Convert relative URLs to absolute if enabled
1356
+ if (ctx->absolute_paths && !NIL_P(ctx->base_uri)) {
1357
+ value = convert_urls_in_value(value, ctx->base_uri, ctx->uri_resolver);
1358
+ }
1359
+
877
1360
  VALUE decl = rb_struct_new(cDeclaration,
878
1361
  property,
879
1362
  value,
@@ -956,6 +1439,7 @@ static void parse_import_statement(ParserContext *ctx, const char **p_ptr, const
956
1439
 
957
1440
  // Check for optional media query (everything until semicolon)
958
1441
  VALUE media = Qnil;
1442
+ VALUE media_query_id_val = Qnil;
959
1443
  if (p < pe && *p != ';') {
960
1444
  const char *media_start = p;
961
1445
 
@@ -970,8 +1454,73 @@ static void parse_import_statement(ParserContext *ctx, const char **p_ptr, const
970
1454
  }
971
1455
 
972
1456
  if (media_end > media_start) {
973
- VALUE media_str = rb_utf8_str_new(media_start, media_end - media_start);
974
- media = ID2SYM(rb_intern_str(media_str));
1457
+ // media field should be a String, not a Symbol
1458
+ media = rb_utf8_str_new(media_start, media_end - media_start);
1459
+
1460
+ // Split comma-separated media queries (same as @media blocks)
1461
+ VALUE media_query_ids = rb_ary_new();
1462
+
1463
+ const char *query_start = media_start;
1464
+ for (const char *p_comma = media_start; p_comma <= media_end; p_comma++) {
1465
+ if (p_comma == media_end || *p_comma == ',') {
1466
+ const char *query_end = p_comma;
1467
+
1468
+ // Trim whitespace from this query
1469
+ while (query_start < query_end && IS_WHITESPACE(*query_start)) query_start++;
1470
+ while (query_end > query_start && IS_WHITESPACE(*(query_end - 1))) query_end--;
1471
+
1472
+ if (query_start < query_end) {
1473
+ // Parse this individual media query
1474
+ const char *mq_ptr = query_start;
1475
+ VALUE media_type;
1476
+ VALUE media_conditions = Qnil;
1477
+
1478
+ if (*mq_ptr == '(') {
1479
+ // Starts with '(' - just conditions, type defaults to :all
1480
+ media_type = ID2SYM(rb_intern("all"));
1481
+ media_conditions = rb_utf8_str_new(mq_ptr, query_end - mq_ptr);
1482
+ } else {
1483
+ // Extract media type (first word)
1484
+ const char *type_start = mq_ptr;
1485
+ while (mq_ptr < query_end && !IS_WHITESPACE(*mq_ptr) && *mq_ptr != '(') mq_ptr++;
1486
+ VALUE type_str = rb_utf8_str_new(type_start, mq_ptr - type_start);
1487
+ media_type = ID2SYM(rb_intern_str(type_str));
1488
+
1489
+ // Skip whitespace
1490
+ while (mq_ptr < query_end && IS_WHITESPACE(*mq_ptr)) mq_ptr++;
1491
+
1492
+ // Check if there are conditions (rest of string)
1493
+ if (mq_ptr < query_end) {
1494
+ media_conditions = rb_utf8_str_new(mq_ptr, query_end - mq_ptr);
1495
+ }
1496
+ }
1497
+
1498
+ // Create MediaQuery struct
1499
+ VALUE media_query = rb_struct_new(cMediaQuery,
1500
+ INT2FIX(ctx->media_query_id_counter),
1501
+ media_type,
1502
+ media_conditions
1503
+ );
1504
+
1505
+ rb_ary_push(ctx->media_queries, media_query);
1506
+ rb_ary_push(media_query_ids, INT2FIX(ctx->media_query_id_counter));
1507
+ ctx->media_query_id_counter++;
1508
+ }
1509
+
1510
+ // Move to start of next query
1511
+ query_start = p_comma + 1;
1512
+ }
1513
+ }
1514
+
1515
+ // If multiple queries, track them as a list
1516
+ if (RARRAY_LEN(media_query_ids) > 1) {
1517
+ int media_query_list_id = ctx->next_media_query_list_id;
1518
+ rb_hash_aset(ctx->media_query_lists, INT2FIX(media_query_list_id), media_query_ids);
1519
+ ctx->next_media_query_list_id++;
1520
+ }
1521
+
1522
+ // Use first query ID for the import statement
1523
+ media_query_id_val = rb_ary_entry(media_query_ids, 0);
975
1524
  }
976
1525
  }
977
1526
 
@@ -983,12 +1532,13 @@ static void parse_import_statement(ParserContext *ctx, const char **p_ptr, const
983
1532
  INT2FIX(ctx->rule_id_counter),
984
1533
  url,
985
1534
  media,
1535
+ media_query_id_val,
986
1536
  Qfalse);
987
1537
 
988
1538
  DEBUG_PRINTF("[IMPORT_STMT] Created import: id=%d, url=%s, media=%s\n",
989
1539
  ctx->rule_id_counter,
990
1540
  RSTRING_PTR(url),
991
- NIL_P(media) ? "nil" : RSTRING_PTR(rb_sym2str(media)));
1541
+ NIL_P(media) ? "nil" : RSTRING_PTR(media));
992
1542
 
993
1543
  rb_ary_push(ctx->imports_array, import_stmt);
994
1544
  ctx->rule_id_counter++;
@@ -1008,7 +1558,7 @@ static void parse_import_statement(ParserContext *ctx, const char **p_ptr, const
1008
1558
  * parent_rule_id: Parent rule ID (Fixnum) for nested rules (or Qnil for top-level)
1009
1559
  */
1010
1560
  static void parse_css_recursive(ParserContext *ctx, const char *css, const char *pe,
1011
- VALUE parent_media_sym, VALUE parent_selector, VALUE parent_rule_id) {
1561
+ VALUE parent_media_sym, VALUE parent_selector, VALUE parent_rule_id, int parent_media_query_id) {
1012
1562
  // Check recursion depth to prevent stack overflow
1013
1563
  if (ctx->depth > MAX_PARSE_DEPTH) {
1014
1564
  rb_raise(eDepthError,
@@ -1070,26 +1620,150 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1070
1620
  // Trim
1071
1621
  trim_trailing(mq_start, &mq_end);
1072
1622
 
1623
+ // Check for empty media query
1624
+ if (mq_end <= mq_start) {
1625
+ if (ctx->check_malformed_at_rules) {
1626
+ raise_parse_error_at(ctx, mq_start, "Malformed @media: missing media query", "malformed_at_rule");
1627
+ } else {
1628
+ // Empty media query with check disabled - skip @media wrapper and parse contents as regular rules
1629
+ if (p >= pe || *p != '{') {
1630
+ continue; // Malformed structure
1631
+ }
1632
+ p++; // Skip opening {
1633
+ const char *block_start = p;
1634
+ const char *block_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
1635
+ p = block_end;
1636
+
1637
+ // Parse block contents with NO media query context
1638
+ ctx->depth++;
1639
+ parse_css_recursive(ctx, block_start, block_end, parent_media_sym, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID, parent_media_query_id);
1640
+ ctx->depth--;
1641
+
1642
+ if (p < pe && *p == '}') p++;
1643
+ continue;
1644
+ }
1645
+ }
1646
+
1073
1647
  if (p >= pe || *p != '{') {
1074
1648
  continue; // Malformed
1075
1649
  }
1076
1650
 
1077
- // Intern media query
1078
- VALUE child_media_sym = intern_media_query_safe(ctx, mq_start, mq_end - mq_start);
1651
+ // Split comma-separated media queries (e.g., "screen, print" -> ["screen", "print"])
1652
+ // Per W3C spec, comma acts as logical OR - each query is independent
1653
+ VALUE media_query_ids = rb_ary_new();
1654
+
1655
+ const char *query_start = mq_start;
1656
+ for (const char *p_comma = mq_start; p_comma <= mq_end; p_comma++) {
1657
+ if (p_comma == mq_end || *p_comma == ',') {
1658
+ const char *query_end = p_comma;
1659
+
1660
+ // Trim whitespace from this query
1661
+ while (query_start < query_end && IS_WHITESPACE(*query_start)) query_start++;
1662
+ while (query_end > query_start && IS_WHITESPACE(*(query_end - 1))) query_end--;
1663
+
1664
+ if (query_start < query_end) {
1665
+ // Parse this individual media query
1666
+ const char *mq_ptr = query_start;
1667
+ VALUE media_type;
1668
+ VALUE media_conditions = Qnil;
1669
+
1670
+ if (*mq_ptr == '(') {
1671
+ // Starts with '(' - just conditions, type defaults to :all
1672
+ media_type = ID2SYM(rb_intern("all"));
1673
+ media_conditions = rb_utf8_str_new(mq_ptr, query_end - mq_ptr);
1674
+ } else {
1675
+ // Extract media type (first word, stopping at whitespace, comma, or '(')
1676
+ const char *type_start = mq_ptr;
1677
+ while (mq_ptr < query_end && !IS_WHITESPACE(*mq_ptr) && *mq_ptr != '(') mq_ptr++;
1678
+ VALUE type_str = rb_utf8_str_new(type_start, mq_ptr - type_start);
1679
+ media_type = ID2SYM(rb_intern_str(type_str));
1680
+
1681
+ // Skip whitespace and "and" keyword if present
1682
+ while (mq_ptr < query_end && IS_WHITESPACE(*mq_ptr)) mq_ptr++;
1683
+ if (mq_ptr + 3 <= query_end && strncmp(mq_ptr, "and", 3) == 0) {
1684
+ mq_ptr += 3;
1685
+ while (mq_ptr < query_end && IS_WHITESPACE(*mq_ptr)) mq_ptr++;
1686
+ }
1687
+
1688
+ // Rest is conditions
1689
+ if (mq_ptr < query_end) {
1690
+ media_conditions = rb_utf8_str_new(mq_ptr, query_end - mq_ptr);
1691
+ }
1692
+ }
1693
+
1694
+ // Create MediaQuery object for this query
1695
+ VALUE media_query = rb_struct_new(cMediaQuery,
1696
+ INT2FIX(ctx->media_query_id_counter),
1697
+ media_type,
1698
+ media_conditions
1699
+ );
1700
+ rb_ary_push(ctx->media_queries, media_query);
1701
+ rb_ary_push(media_query_ids, INT2FIX(ctx->media_query_id_counter));
1702
+ ctx->media_query_id_counter++;
1703
+ }
1704
+
1705
+ // Move to start of next query
1706
+ query_start = p_comma + 1;
1707
+ }
1708
+ }
1079
1709
 
1080
- // Combine with parent
1710
+ // If multiple queries, track them as a list for serialization
1711
+ int media_query_list_id = -1;
1712
+ if (RARRAY_LEN(media_query_ids) > 1) {
1713
+ media_query_list_id = ctx->next_media_query_list_id;
1714
+ rb_hash_aset(ctx->media_query_lists, INT2FIX(media_query_list_id), media_query_ids);
1715
+ ctx->next_media_query_list_id++;
1716
+ }
1717
+
1718
+ // Use first query ID as the primary one for rules in this block
1719
+ int current_media_query_id = FIX2INT(rb_ary_entry(media_query_ids, 0));
1720
+
1721
+ // Handle nested @media by combining with parent
1722
+ if (parent_media_query_id >= 0) {
1723
+ VALUE parent_mq = rb_ary_entry(ctx->media_queries, parent_media_query_id);
1724
+ VALUE parent_type = rb_struct_aref(parent_mq, INT2FIX(1)); // type field
1725
+ VALUE parent_conditions = rb_struct_aref(parent_mq, INT2FIX(2)); // conditions field
1726
+
1727
+ // Get child media query (first one in the list)
1728
+ VALUE child_mq = rb_ary_entry(ctx->media_queries, current_media_query_id);
1729
+ VALUE child_conditions = rb_struct_aref(child_mq, INT2FIX(2)); // conditions field
1730
+
1731
+ // Combined type is parent's type (outermost wins, child type ignored)
1732
+ VALUE combined_type = parent_type;
1733
+ VALUE combined_conditions;
1734
+
1735
+ if (!NIL_P(parent_conditions) && !NIL_P(child_conditions)) {
1736
+ combined_conditions = rb_sprintf("%"PRIsVALUE" and %"PRIsVALUE, parent_conditions, child_conditions);
1737
+ } else if (!NIL_P(parent_conditions)) {
1738
+ combined_conditions = parent_conditions;
1739
+ } else {
1740
+ combined_conditions = child_conditions;
1741
+ }
1742
+
1743
+ VALUE combined_mq = rb_struct_new(cMediaQuery,
1744
+ INT2FIX(ctx->media_query_id_counter),
1745
+ combined_type,
1746
+ combined_conditions
1747
+ );
1748
+ rb_ary_push(ctx->media_queries, combined_mq);
1749
+ current_media_query_id = ctx->media_query_id_counter;
1750
+ ctx->media_query_id_counter++;
1751
+ }
1752
+
1753
+ // For backwards compat, also create symbol (will be removed later)
1754
+ VALUE child_media_sym = intern_media_query_safe(ctx, mq_start, mq_end - mq_start);
1081
1755
  VALUE combined_media_sym = combine_media_queries(parent_media_sym, child_media_sym);
1082
1756
 
1083
1757
  p++; // Skip opening {
1084
1758
 
1085
1759
  // Find matching closing brace
1086
1760
  const char *block_start = p;
1087
- const char *block_end = find_matching_brace(p, pe);
1761
+ const char *block_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
1088
1762
  p = block_end;
1089
1763
 
1090
- // Recursively parse @media block with combined media context
1764
+ // Recursively parse @media block with new media query context
1091
1765
  ctx->depth++;
1092
- parse_css_recursive(ctx, block_start, block_end, combined_media_sym, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
1766
+ parse_css_recursive(ctx, block_start, block_end, combined_media_sym, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID, current_media_query_id);
1093
1767
  ctx->depth--;
1094
1768
 
1095
1769
  if (p < pe && *p == '}') p++;
@@ -1111,13 +1785,22 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1111
1785
  long at_name_len = at_name_end - at_start;
1112
1786
 
1113
1787
  // Check if this is a conditional group rule
1114
- int is_conditional_group =
1788
+ BOOLEAN is_conditional_group =
1115
1789
  (at_name_len == 8 && strncmp(at_start, "supports", 8) == 0) ||
1116
1790
  (at_name_len == 5 && strncmp(at_start, "layer", 5) == 0) ||
1117
1791
  (at_name_len == 9 && strncmp(at_start, "container", 9) == 0) ||
1118
1792
  (at_name_len == 5 && strncmp(at_start, "scope", 5) == 0);
1119
1793
 
1120
1794
  if (is_conditional_group) {
1795
+ // Check if this rule requires a condition
1796
+ BOOLEAN requires_condition =
1797
+ (at_name_len == 8 && strncmp(at_start, "supports", 8) == 0) ||
1798
+ (at_name_len == 9 && strncmp(at_start, "container", 9) == 0);
1799
+
1800
+ // Extract condition (between at-rule name and opening brace)
1801
+ const char *cond_start = at_name_end;
1802
+ while (cond_start < pe && IS_WHITESPACE(*cond_start)) cond_start++;
1803
+
1121
1804
  // Skip to opening brace
1122
1805
  p = at_name_end;
1123
1806
  while (p < pe && *p != '{') p++;
@@ -1126,16 +1809,27 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1126
1809
  continue; // Malformed
1127
1810
  }
1128
1811
 
1812
+ // Trim condition
1813
+ const char *cond_end = p;
1814
+ while (cond_end > cond_start && IS_WHITESPACE(*(cond_end - 1))) cond_end--;
1815
+
1816
+ // Check for missing condition
1817
+ if (requires_condition && cond_end <= cond_start && ctx->check_malformed_at_rules) {
1818
+ char error_msg[100];
1819
+ snprintf(error_msg, sizeof(error_msg), "Malformed @%.*s: missing condition", (int)at_name_len, at_start);
1820
+ raise_parse_error_at(ctx, at_start - 1, error_msg, "malformed_at_rule");
1821
+ }
1822
+
1129
1823
  p++; // Skip opening {
1130
1824
 
1131
1825
  // Find matching closing brace
1132
1826
  const char *block_start = p;
1133
- const char *block_end = find_matching_brace(p, pe);
1827
+ const char *block_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
1134
1828
  p = block_end;
1135
1829
 
1136
1830
  // Recursively parse block content (preserve parent media context)
1137
1831
  ctx->depth++;
1138
- parse_css_recursive(ctx, block_start, block_end, parent_media_sym, parent_selector, parent_rule_id);
1832
+ parse_css_recursive(ctx, block_start, block_end, parent_media_sym, parent_selector, parent_rule_id, parent_media_query_id);
1139
1833
  ctx->depth--;
1140
1834
 
1141
1835
  if (p < pe && *p == '}') p++;
@@ -1144,7 +1838,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1144
1838
 
1145
1839
  // Check for @keyframes (contains <rule-list>)
1146
1840
  // TODO: Test perf gains by using RB_UNLIKELY(is_keyframes) wrapper
1147
- int is_keyframes =
1841
+ BOOLEAN is_keyframes =
1148
1842
  (at_name_len == 9 && strncmp(at_start, "keyframes", 9) == 0) ||
1149
1843
  (at_name_len == 17 && strncmp(at_start, "-webkit-keyframes", 17) == 0) ||
1150
1844
  (at_name_len == 13 && strncmp(at_start, "-moz-keyframes", 13) == 0);
@@ -1169,7 +1863,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1169
1863
 
1170
1864
  // Find matching closing brace
1171
1865
  const char *block_start = p;
1172
- const char *block_end = find_matching_brace(p, pe);
1866
+ const char *block_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
1173
1867
  p = block_end;
1174
1868
 
1175
1869
  // Parse keyframe blocks as rules (from/to/0%/50% etc)
@@ -1186,7 +1880,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1186
1880
  .selector_lists_enabled = ctx->selector_lists_enabled,
1187
1881
  .depth = 0
1188
1882
  };
1189
- parse_css_recursive(&nested_ctx, block_start, block_end, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
1883
+ parse_css_recursive(&nested_ctx, block_start, block_end, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID, NO_MEDIA_QUERY_ID);
1190
1884
 
1191
1885
  // Get rule ID and increment
1192
1886
  int rule_id = ctx->rule_id_counter++;
@@ -1196,7 +1890,9 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1196
1890
  INT2FIX(rule_id),
1197
1891
  selector,
1198
1892
  nested_ctx.rules_array, // Array of Rule (keyframe blocks)
1199
- Qnil);
1893
+ Qnil, // specificity
1894
+ Qnil // media_query_id
1895
+ );
1200
1896
 
1201
1897
  // Add to rules array
1202
1898
  rb_ary_push(ctx->rules_array, at_rule);
@@ -1216,7 +1912,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1216
1912
  }
1217
1913
 
1218
1914
  // Check for @font-face (contains <declaration-list>)
1219
- int is_font_face = (at_name_len == 9 && strncmp(at_start, "font-face", 9) == 0);
1915
+ BOOLEAN is_font_face = (at_name_len == 9 && strncmp(at_start, "font-face", 9) == 0);
1220
1916
 
1221
1917
  if (is_font_face) {
1222
1918
  // Build selector string: "@font-face"
@@ -1238,11 +1934,11 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1238
1934
 
1239
1935
  // Find matching closing brace
1240
1936
  const char *decl_start = p;
1241
- const char *decl_end = find_matching_brace(p, pe);
1937
+ const char *decl_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
1242
1938
  p = decl_end;
1243
1939
 
1244
1940
  // Parse declarations
1245
- VALUE declarations = parse_declarations(decl_start, decl_end);
1941
+ VALUE declarations = parse_declarations(decl_start, decl_end, ctx);
1246
1942
 
1247
1943
  // Get rule ID and increment
1248
1944
  int rule_id = ctx->rule_id_counter++;
@@ -1252,7 +1948,9 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1252
1948
  INT2FIX(rule_id),
1253
1949
  selector,
1254
1950
  declarations, // Array of Declaration
1255
- Qnil);
1951
+ Qnil, // specificity
1952
+ Qnil // media_query_id
1953
+ );
1256
1954
 
1257
1955
  // Add to rules array
1258
1956
  rb_ary_push(ctx->rules_array, at_rule);
@@ -1274,6 +1972,10 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1274
1972
 
1275
1973
  // Opening brace
1276
1974
  if (*p == '{') {
1975
+ // Check for empty selector (opening brace with no selector before it)
1976
+ if (ctx->check_invalid_selectors && brace_depth == 0 && selector_start == NULL) {
1977
+ raise_parse_error_at(ctx, p, "Invalid selector: empty selector", "invalid_selector");
1978
+ }
1277
1979
  if (brace_depth == 0 && selector_start != NULL) {
1278
1980
  decl_start = p + 1;
1279
1981
  }
@@ -1289,7 +1991,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1289
1991
  // We've found a complete CSS rule block - now determine if it has nesting
1290
1992
  // Example: .parent { color: red; & .child { font-size: 14px; } }
1291
1993
  // ^selector_start ^decl_start ^p (at })
1292
- int has_nesting = has_nested_selectors(decl_start, p);
1994
+ BOOLEAN has_nesting = has_nested_selectors(decl_start, p);
1293
1995
 
1294
1996
  // Get selector string
1295
1997
  const char *sel_end = decl_start - 1;
@@ -1297,9 +1999,26 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1297
1999
  sel_end--;
1298
2000
  }
1299
2001
 
2002
+ // Check for empty selector
2003
+ if (ctx->check_invalid_selectors && sel_end <= selector_start) {
2004
+ const char *css = RSTRING_PTR(ctx->css_string);
2005
+ long error_pos = selector_start - css;
2006
+
2007
+ // Build keyword args hash
2008
+ VALUE kwargs = rb_hash_new();
2009
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
2010
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
2011
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("invalid_selector")));
2012
+
2013
+ VALUE msg_str = rb_str_new_cstr("Invalid selector: empty selector");
2014
+ VALUE argv[2] = {msg_str, kwargs};
2015
+ VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
2016
+ rb_exc_raise(error);
2017
+ }
2018
+
1300
2019
  if (!has_nesting) {
1301
2020
  // FAST PATH: No nesting - parse as pure declarations
1302
- VALUE declarations = parse_declarations(decl_start, p);
2021
+ VALUE declarations = parse_declarations(decl_start, p, ctx);
1303
2022
 
1304
2023
  // Split on commas to handle multi-selector rules
1305
2024
  // Example: ".a, .b, .c { color: red; }" creates 3 separate rules
@@ -1343,6 +2062,37 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1343
2062
  }
1344
2063
 
1345
2064
  if (seg_end_ptr > seg_start) {
2065
+ // Check for invalid selectors
2066
+ if (ctx->check_invalid_selectors) {
2067
+ // Check if selector starts with combinator
2068
+ char first_char = *seg_start;
2069
+ if (first_char == '>' || first_char == '+' || first_char == '~') {
2070
+ const char *css = RSTRING_PTR(ctx->css_string);
2071
+ long error_pos = seg_start - css;
2072
+
2073
+ char error_msg[256];
2074
+ snprintf(error_msg, sizeof(error_msg),
2075
+ "Invalid selector: selector cannot start with combinator '%c'",
2076
+ first_char);
2077
+
2078
+ // Build keyword args hash
2079
+ VALUE kwargs = rb_hash_new();
2080
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
2081
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
2082
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("invalid_selector")));
2083
+
2084
+ VALUE msg_str = rb_str_new_cstr(error_msg);
2085
+ VALUE argv[2] = {msg_str, kwargs};
2086
+ VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
2087
+ rb_exc_raise(error);
2088
+ }
2089
+ }
2090
+
2091
+ // Check for invalid selector syntax (whitelist validation)
2092
+ if (ctx->check_invalid_selector_syntax && !is_valid_selector(seg_start, seg_end_ptr)) {
2093
+ raise_parse_error_at(ctx, seg_start, "Invalid selector syntax: selector contains invalid characters", "invalid_selector_syntax");
2094
+ }
2095
+
1346
2096
  VALUE selector = rb_utf8_str_new(seg_start, seg_end_ptr - seg_start);
1347
2097
 
1348
2098
  // Resolve against parent if nested
@@ -1390,6 +2140,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1390
2140
  }
1391
2141
 
1392
2142
  // Create Rule
2143
+ VALUE media_query_id_val = (parent_media_query_id >= 0) ? INT2FIX(parent_media_query_id) : Qnil;
1393
2144
  VALUE rule = rb_struct_new(cRule,
1394
2145
  INT2FIX(rule_id),
1395
2146
  resolved_selector,
@@ -1397,7 +2148,8 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1397
2148
  Qnil, // specificity
1398
2149
  parent_id_val,
1399
2150
  nesting_style_val,
1400
- selector_list_id_val
2151
+ selector_list_id_val,
2152
+ media_query_id_val // media_query_id from parent context
1401
2153
  );
1402
2154
 
1403
2155
  // Track rule in selector list if applicable
@@ -1414,6 +2166,9 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1414
2166
 
1415
2167
  // Update media index
1416
2168
  update_media_index(ctx, parent_media_sym, rule_id);
2169
+ } else if (ctx->check_invalid_selector_syntax && selector_count > 1) {
2170
+ // Empty selector in comma-separated list (e.g., "h1, , h3")
2171
+ raise_parse_error_at(ctx, seg_start, "Invalid selector syntax: empty selector in comma-separated list", "invalid_selector_syntax");
1417
2172
  }
1418
2173
 
1419
2174
  seg_start = seg + 1;
@@ -1499,7 +2254,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1499
2254
  // Nested rules will be added AFTER the placeholder
1500
2255
  ctx->depth++;
1501
2256
  VALUE parent_declarations = parse_mixed_block(ctx, decl_start, p,
1502
- resolved_current, INT2FIX(current_rule_id), parent_media_sym);
2257
+ resolved_current, INT2FIX(current_rule_id), parent_media_sym, parent_media_query_id);
1503
2258
  ctx->depth--;
1504
2259
 
1505
2260
  // Determine selector_list_id value
@@ -1507,6 +2262,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1507
2262
 
1508
2263
  // Create parent rule and replace placeholder
1509
2264
  // Always create the rule (even if empty) to avoid edge cases
2265
+ VALUE media_query_id_val = (parent_media_query_id >= 0) ? INT2FIX(parent_media_query_id) : Qnil;
1510
2266
  VALUE rule = rb_struct_new(cRule,
1511
2267
  INT2FIX(current_rule_id),
1512
2268
  resolved_current,
@@ -1514,7 +2270,8 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1514
2270
  Qnil, // specificity
1515
2271
  current_parent_id,
1516
2272
  current_nesting_style,
1517
- selector_list_id_val
2273
+ selector_list_id_val,
2274
+ media_query_id_val // media_query_id from parent context
1518
2275
  );
1519
2276
 
1520
2277
  // Track rule in selector list if applicable
@@ -1553,6 +2310,11 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1553
2310
 
1554
2311
  p++;
1555
2312
  }
2313
+
2314
+ // Check for unclosed blocks at end of parsing
2315
+ if (ctx->check_unclosed_blocks && brace_depth > 0) {
2316
+ rb_raise(eParseError, "Unclosed block: missing closing brace");
2317
+ }
1556
2318
  }
1557
2319
 
1558
2320
  /*
@@ -1586,7 +2348,48 @@ VALUE parse_css_new_impl(VALUE css_string, VALUE parser_options, int rule_id_off
1586
2348
 
1587
2349
  // Read parser options
1588
2350
  VALUE selector_lists_opt = rb_hash_aref(parser_options, ID2SYM(rb_intern("selector_lists")));
1589
- int selector_lists_enabled = (NIL_P(selector_lists_opt) || RTEST(selector_lists_opt)) ? 1 : 0;
2351
+ BOOLEAN selector_lists_enabled = (NIL_P(selector_lists_opt) || RTEST(selector_lists_opt)) ? 1 : 0;
2352
+
2353
+ // URL conversion options
2354
+ VALUE base_uri = rb_hash_aref(parser_options, ID2SYM(rb_intern("base_uri")));
2355
+ VALUE absolute_paths_opt = rb_hash_aref(parser_options, ID2SYM(rb_intern("absolute_paths")));
2356
+ VALUE uri_resolver = rb_hash_aref(parser_options, ID2SYM(rb_intern("uri_resolver")));
2357
+ BOOLEAN absolute_paths = RTEST(absolute_paths_opt) ? 1 : 0;
2358
+
2359
+ // Parse error options
2360
+ VALUE raise_parse_errors_opt = rb_hash_aref(parser_options, ID2SYM(rb_intern("raise_parse_errors")));
2361
+ BOOLEAN check_empty_values = 0;
2362
+ BOOLEAN check_malformed_declarations = 0;
2363
+ BOOLEAN check_invalid_selectors = 0;
2364
+ BOOLEAN check_invalid_selector_syntax = 0;
2365
+ BOOLEAN check_malformed_at_rules = 0;
2366
+ BOOLEAN check_unclosed_blocks = 0;
2367
+
2368
+ if (RTEST(raise_parse_errors_opt)) {
2369
+ if (TYPE(raise_parse_errors_opt) == T_HASH) {
2370
+ // Hash of specific error types
2371
+ VALUE empty_values_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("empty_values")));
2372
+ VALUE malformed_declarations_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("malformed_declarations")));
2373
+ VALUE invalid_selectors_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("invalid_selectors")));
2374
+ VALUE invalid_selector_syntax_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("invalid_selector_syntax")));
2375
+ VALUE malformed_at_rules_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("malformed_at_rules")));
2376
+ VALUE unclosed_blocks_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("unclosed_blocks")));
2377
+ check_empty_values = RTEST(empty_values_opt) ? 1 : 0;
2378
+ check_malformed_declarations = RTEST(malformed_declarations_opt) ? 1 : 0;
2379
+ check_invalid_selectors = RTEST(invalid_selectors_opt) ? 1 : 0;
2380
+ check_invalid_selector_syntax = RTEST(invalid_selector_syntax_opt) ? 1 : 0;
2381
+ check_malformed_at_rules = RTEST(malformed_at_rules_opt) ? 1 : 0;
2382
+ check_unclosed_blocks = RTEST(unclosed_blocks_opt) ? 1 : 0;
2383
+ } else {
2384
+ // true - enable all checks
2385
+ check_empty_values = 1;
2386
+ check_malformed_declarations = 1;
2387
+ check_invalid_selectors = 1;
2388
+ check_invalid_selector_syntax = 1;
2389
+ check_malformed_at_rules = 1;
2390
+ check_unclosed_blocks = 1;
2391
+ }
2392
+ }
1590
2393
 
1591
2394
  const char *css = RSTRING_PTR(css_string);
1592
2395
  const char *pe = css + RSTRING_LEN(css_string);
@@ -1624,23 +2427,41 @@ VALUE parse_css_new_impl(VALUE css_string, VALUE parser_options, int rule_id_off
1624
2427
  ctx.media_index = rb_hash_new();
1625
2428
  ctx.selector_lists = rb_hash_new();
1626
2429
  ctx.imports_array = rb_ary_new();
2430
+ ctx.media_queries = rb_ary_new();
2431
+ ctx.media_query_lists = rb_hash_new();
1627
2432
  ctx.rule_id_counter = rule_id_offset; // Start from offset
1628
2433
  ctx.next_selector_list_id = 0; // Start from 0
2434
+ ctx.media_query_id_counter = 0; // Start from 0
2435
+ ctx.next_media_query_list_id = 0; // Start from 0
1629
2436
  ctx.media_query_count = 0;
1630
2437
  ctx.media_cache = NULL; // Removed - no perf benefit
1631
2438
  ctx.has_nesting = 0; // Will be set to 1 if any nested rules are created
1632
2439
  ctx.selector_lists_enabled = selector_lists_enabled;
1633
2440
  ctx.depth = 0; // Start at depth 0
2441
+ // URL conversion options
2442
+ ctx.base_uri = base_uri;
2443
+ ctx.uri_resolver = uri_resolver;
2444
+ ctx.absolute_paths = absolute_paths;
2445
+ // Parse error options
2446
+ ctx.css_string = css_string;
2447
+ ctx.check_empty_values = check_empty_values;
2448
+ ctx.check_malformed_declarations = check_malformed_declarations;
2449
+ ctx.check_invalid_selectors = check_invalid_selectors;
2450
+ ctx.check_invalid_selector_syntax = check_invalid_selector_syntax;
2451
+ ctx.check_malformed_at_rules = check_malformed_at_rules;
2452
+ ctx.check_unclosed_blocks = check_unclosed_blocks;
1634
2453
 
1635
2454
  // Parse CSS (top-level, no parent context)
1636
2455
  DEBUG_PRINTF("[PARSE] Starting parse_css_recursive from: %.80s\n", p);
1637
- parse_css_recursive(&ctx, p, pe, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
2456
+ parse_css_recursive(&ctx, p, pe, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID, NO_MEDIA_QUERY_ID);
1638
2457
 
1639
2458
  // Build result hash
1640
2459
  VALUE result = rb_hash_new();
1641
2460
  rb_hash_aset(result, ID2SYM(rb_intern("rules")), ctx.rules_array);
1642
2461
  rb_hash_aset(result, ID2SYM(rb_intern("_media_index")), ctx.media_index);
2462
+ rb_hash_aset(result, ID2SYM(rb_intern("media_queries")), ctx.media_queries);
1643
2463
  rb_hash_aset(result, ID2SYM(rb_intern("_selector_lists")), ctx.selector_lists);
2464
+ rb_hash_aset(result, ID2SYM(rb_intern("_media_query_lists")), ctx.media_query_lists);
1644
2465
  rb_hash_aset(result, ID2SYM(rb_intern("imports")), ctx.imports_array);
1645
2466
  rb_hash_aset(result, ID2SYM(rb_intern("charset")), charset);
1646
2467
  rb_hash_aset(result, ID2SYM(rb_intern("last_rule_id")), INT2FIX(ctx.rule_id_counter));
@@ -1649,8 +2470,12 @@ VALUE parse_css_new_impl(VALUE css_string, VALUE parser_options, int rule_id_off
1649
2470
  RB_GC_GUARD(charset);
1650
2471
  RB_GC_GUARD(ctx.rules_array);
1651
2472
  RB_GC_GUARD(ctx.media_index);
2473
+ RB_GC_GUARD(ctx.media_queries);
1652
2474
  RB_GC_GUARD(ctx.selector_lists);
2475
+ RB_GC_GUARD(ctx.media_query_lists);
1653
2476
  RB_GC_GUARD(ctx.imports_array);
2477
+ RB_GC_GUARD(ctx.base_uri);
2478
+ RB_GC_GUARD(ctx.uri_resolver);
1654
2479
  RB_GC_GUARD(result);
1655
2480
 
1656
2481
  return result;