cataract 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,13 +19,21 @@ typedef struct {
19
19
  VALUE media_index; // Hash: Symbol => Array of rule IDs
20
20
  VALUE selector_lists; // Hash: list_id => Array of rule IDs
21
21
  VALUE imports_array; // Array of ImportStatement structs
22
+ VALUE media_queries; // Array of MediaQuery structs
23
+ VALUE media_query_lists; // Hash: list_id => Array of MediaQuery IDs
22
24
  int rule_id_counter; // Next rule ID (0-indexed)
23
25
  int next_selector_list_id; // Next selector list ID (0-indexed)
26
+ int media_query_id_counter; // Next MediaQuery ID (0-indexed)
27
+ int next_media_query_list_id; // Next media query list ID (0-indexed)
24
28
  int media_query_count; // Safety limit for media queries
25
29
  st_table *media_cache; // Parse-time cache: string => parsed media types
26
30
  int has_nesting; // Set to 1 if any nested rules are created
27
31
  int selector_lists_enabled; // Parser option: track selector lists (1=enabled, 0=disabled)
28
32
  int depth; // Current recursion depth (safety limit)
33
+ // URL conversion options
34
+ VALUE base_uri; // Base URI for resolving relative URLs (Qnil if disabled)
35
+ VALUE uri_resolver; // Proc to call for URL resolution (Qnil for default)
36
+ int absolute_paths; // Whether to convert relative URLs to absolute
29
37
  } ParserContext;
30
38
 
31
39
  // Macro to skip CSS comments /* ... */
@@ -384,12 +392,188 @@ static void update_media_index(ParserContext *ctx, VALUE media_sym, int rule_id)
384
392
  }
385
393
 
386
394
  // Add to full query symbol (after media types for insertion order)
387
- add_to_media_index(ctx->media_index, media_sym, rule_id);
395
+ // BUT: skip if it contains a comma (comma-separated list like "screen, print")
396
+ // because we already added each individual type above
397
+ int has_comma = 0;
398
+ for (long i = 0; i < query_len; i++) {
399
+ if (query[i] == ',') {
400
+ has_comma = 1;
401
+ break;
402
+ }
403
+ }
404
+ if (!has_comma) {
405
+ add_to_media_index(ctx->media_index, media_sym, rule_id);
406
+ }
388
407
 
389
408
  // Guard media_str since we extracted C pointer and called extract_media_types (which allocates)
390
409
  RB_GC_GUARD(media_str);
391
410
  }
392
411
 
412
+ // Helper struct for passing arguments to resolver callback
413
+ typedef struct {
414
+ VALUE uri_resolver;
415
+ VALUE base_uri;
416
+ VALUE url_str;
417
+ } ResolverArgs;
418
+
419
+ // Callback for rb_protect to call the resolver proc
420
+ static VALUE call_resolver(VALUE arg) {
421
+ ResolverArgs *args = (ResolverArgs *)arg;
422
+ return rb_funcall(args->uri_resolver, rb_intern("call"), 2, args->base_uri, args->url_str);
423
+ }
424
+
425
+ /*
426
+ * Convert relative URLs in a CSS value to absolute URLs
427
+ *
428
+ * Scans for url() patterns and resolves relative URLs using the resolver proc.
429
+ * Returns a new Ruby string with resolved URLs, or the original if no conversion needed.
430
+ */
431
+ static VALUE convert_urls_in_value(VALUE value_str, VALUE base_uri, VALUE uri_resolver) {
432
+ const char *val = RSTRING_PTR(value_str);
433
+ long len = RSTRING_LEN(value_str);
434
+
435
+ // Quick check: does value contain 'url('?
436
+ const char *url_check = val;
437
+ int has_url = 0;
438
+ while (url_check < val + len - 3) {
439
+ if ((*url_check == 'u' || *url_check == 'U') &&
440
+ (*(url_check + 1) == 'r' || *(url_check + 1) == 'R') &&
441
+ (*(url_check + 2) == 'l' || *(url_check + 2) == 'L') &&
442
+ *(url_check + 3) == '(') {
443
+ has_url = 1;
444
+ break;
445
+ }
446
+ url_check++;
447
+ }
448
+ if (!has_url) return value_str;
449
+
450
+ // Build result string
451
+ VALUE result = rb_str_new("", 0);
452
+ const char *pos = val;
453
+
454
+ while (pos < val + len) {
455
+ // Look for 'url(' - case insensitive
456
+ if (pos + 3 < val + len &&
457
+ (*pos == 'u' || *pos == 'U') &&
458
+ (*(pos + 1) == 'r' || *(pos + 1) == 'R') &&
459
+ (*(pos + 2) == 'l' || *(pos + 2) == 'L') &&
460
+ *(pos + 3) == '(') {
461
+
462
+ // Append 'url('
463
+ rb_str_cat(result, "url(", 4);
464
+ pos += 4;
465
+
466
+ // Skip whitespace after (
467
+ while (pos < val + len && IS_WHITESPACE(*pos)) pos++;
468
+
469
+ // Determine quote character (if any)
470
+ char quote = 0;
471
+ if (pos < val + len && (*pos == '\'' || *pos == '"')) {
472
+ quote = *pos;
473
+ pos++;
474
+ }
475
+
476
+ // Find end of URL
477
+ const char *url_start = pos;
478
+ if (quote) {
479
+ // Quoted URL - find closing quote
480
+ while (pos < val + len && *pos != quote) {
481
+ if (*pos == '\\' && pos + 1 < val + len) {
482
+ pos += 2; // Skip escaped char
483
+ } else {
484
+ pos++;
485
+ }
486
+ }
487
+ } else {
488
+ // Unquoted URL - find ) or whitespace
489
+ while (pos < val + len && *pos != ')' && !IS_WHITESPACE(*pos)) {
490
+ pos++;
491
+ }
492
+ }
493
+ const char *url_end = pos;
494
+
495
+ // Extract URL string
496
+ long url_len = url_end - url_start;
497
+ VALUE url_str = rb_str_new(url_start, url_len);
498
+
499
+ // Check if URL needs resolution (is relative)
500
+ int needs_resolution = 0;
501
+ if (url_len > 0) {
502
+ // Check for absolute URLs or data URIs that don't need resolution
503
+ const char *u = url_start;
504
+ if ((url_len >= 5 && strncmp(u, "data:", 5) == 0) ||
505
+ (url_len >= 7 && strncmp(u, "http://", 7) == 0) ||
506
+ (url_len >= 8 && strncmp(u, "https://", 8) == 0) ||
507
+ (url_len >= 2 && strncmp(u, "//", 2) == 0) ||
508
+ (url_len >= 1 && *u == '#')) { // Fragment reference
509
+ needs_resolution = 0;
510
+ } else {
511
+ needs_resolution = 1;
512
+ }
513
+ }
514
+
515
+ if (needs_resolution) {
516
+ // Resolve using the resolver proc (always provided by Ruby side)
517
+ // Wrap in rb_protect to catch exceptions
518
+ ResolverArgs args = { uri_resolver, base_uri, url_str };
519
+ int state = 0;
520
+ VALUE resolved = rb_protect(call_resolver, (VALUE)&args, &state);
521
+
522
+ if (state) {
523
+ // Exception occurred - preserve original URL
524
+ rb_set_errinfo(Qnil); // Clear exception
525
+ if (quote) {
526
+ rb_str_cat(result, &quote, 1);
527
+ rb_str_append(result, url_str);
528
+ rb_str_cat(result, &quote, 1);
529
+ } else {
530
+ rb_str_append(result, url_str);
531
+ }
532
+ } else {
533
+ // Output with single quotes (canonical format)
534
+ rb_str_cat(result, "'", 1);
535
+ rb_str_append(result, resolved);
536
+ rb_str_cat(result, "'", 1);
537
+ }
538
+
539
+ RB_GC_GUARD(resolved);
540
+ } else {
541
+ // Keep original URL with original quoting
542
+ if (quote) {
543
+ rb_str_cat(result, &quote, 1);
544
+ rb_str_append(result, url_str);
545
+ rb_str_cat(result, &quote, 1);
546
+ } else {
547
+ rb_str_append(result, url_str);
548
+ }
549
+ }
550
+
551
+ RB_GC_GUARD(url_str);
552
+
553
+ // Skip closing quote if present
554
+ if (quote && pos < val + len && *pos == quote) {
555
+ pos++;
556
+ }
557
+
558
+ // Skip whitespace before )
559
+ while (pos < val + len && IS_WHITESPACE(*pos)) pos++;
560
+
561
+ // Skip closing )
562
+ if (pos < val + len && *pos == ')') {
563
+ rb_str_cat(result, ")", 1);
564
+ pos++;
565
+ }
566
+ } else {
567
+ // Regular character - append to result
568
+ rb_str_cat(result, pos, 1);
569
+ pos++;
570
+ }
571
+ }
572
+
573
+ RB_GC_GUARD(result);
574
+ return result;
575
+ }
576
+
393
577
  /*
394
578
  * Parse declaration block into array of Declaration structs
395
579
  *
@@ -403,7 +587,7 @@ static void update_media_index(ParserContext *ctx, VALUE media_sym, int rule_id)
403
587
  * - Values containing parentheses (e.g., url(...), rgba(...))
404
588
  * - !important flag
405
589
  */
406
- static VALUE parse_declarations(const char *start, const char *end) {
590
+ static VALUE parse_declarations(const char *start, const char *end, ParserContext *ctx) {
407
591
  VALUE declarations = rb_ary_new();
408
592
 
409
593
  const char *pos = start;
@@ -513,6 +697,11 @@ static VALUE parse_declarations(const char *start, const char *end) {
513
697
  }
514
698
  VALUE value = rb_utf8_str_new(val_start, val_len);
515
699
 
700
+ // Convert relative URLs to absolute if enabled
701
+ if (ctx && ctx->absolute_paths && !NIL_P(ctx->base_uri)) {
702
+ value = convert_urls_in_value(value, ctx->base_uri, ctx->uri_resolver);
703
+ }
704
+
516
705
  // Create Declaration struct
517
706
  VALUE decl = rb_struct_new(cDeclaration,
518
707
  property,
@@ -531,7 +720,7 @@ static VALUE parse_declarations(const char *start, const char *end) {
531
720
 
532
721
  // Forward declarations
533
722
  static void parse_css_recursive(ParserContext *ctx, const char *css, const char *pe,
534
- VALUE parent_media_sym, VALUE parent_selector, VALUE parent_rule_id);
723
+ VALUE parent_media_sym, VALUE parent_selector, VALUE parent_rule_id, int parent_media_query_id);
535
724
  static VALUE combine_media_queries(VALUE parent, VALUE child);
536
725
 
537
726
  /*
@@ -630,7 +819,7 @@ static VALUE intern_media_query_safe(ParserContext *ctx, const char *query_str,
630
819
  * Returns: Array of declarations (only the declarations, not nested rules)
631
820
  */
632
821
  static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char *end,
633
- VALUE parent_selector, VALUE parent_rule_id, VALUE parent_media_sym) {
822
+ VALUE parent_selector, VALUE parent_rule_id, VALUE parent_media_sym, int parent_media_query_id) {
634
823
  // Check recursion depth to prevent stack overflow
635
824
  if (ctx->depth > MAX_PARSE_DEPTH) {
636
825
  rb_raise(eDepthError,
@@ -661,11 +850,47 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
661
850
  }
662
851
  if (media_query_end >= end) break;
663
852
 
664
- // Extract media query
853
+ // Extract media query string
665
854
  const char *media_query_start = media_start;
666
855
  const char *media_query_end_trimmed = media_query_end;
667
856
  trim_trailing(media_query_start, &media_query_end_trimmed);
668
- VALUE media_sym = intern_media_query_safe(ctx, media_query_start, media_query_end_trimmed - media_query_start);
857
+
858
+ // Parse media query and create MediaQuery object
859
+ const char *mq_ptr = media_query_start;
860
+ VALUE media_type;
861
+ VALUE media_conditions = Qnil;
862
+
863
+ if (*mq_ptr == '(') {
864
+ // Starts with '(' - just conditions, type defaults to :all
865
+ media_type = ID2SYM(rb_intern("all"));
866
+ media_conditions = rb_utf8_str_new(mq_ptr, media_query_end_trimmed - mq_ptr);
867
+ } else {
868
+ // Extract media type (first word)
869
+ const char *type_start = mq_ptr;
870
+ while (mq_ptr < media_query_end_trimmed && !IS_WHITESPACE(*mq_ptr) && *mq_ptr != '(') mq_ptr++;
871
+ VALUE type_str = rb_utf8_str_new(type_start, mq_ptr - type_start);
872
+ media_type = ID2SYM(rb_intern_str(type_str));
873
+
874
+ // Skip "and" keyword if present
875
+ while (mq_ptr < media_query_end_trimmed && IS_WHITESPACE(*mq_ptr)) mq_ptr++;
876
+ if (mq_ptr + 3 <= media_query_end_trimmed && strncmp(mq_ptr, "and", 3) == 0) {
877
+ mq_ptr += 3;
878
+ while (mq_ptr < media_query_end_trimmed && IS_WHITESPACE(*mq_ptr)) mq_ptr++;
879
+ }
880
+ if (mq_ptr < media_query_end_trimmed) {
881
+ media_conditions = rb_utf8_str_new(mq_ptr, media_query_end_trimmed - mq_ptr);
882
+ }
883
+ }
884
+
885
+ // Create MediaQuery object
886
+ VALUE media_query = rb_struct_new(cMediaQuery,
887
+ INT2FIX(ctx->media_query_id_counter),
888
+ media_type,
889
+ media_conditions
890
+ );
891
+ rb_ary_push(ctx->media_queries, media_query);
892
+ int nested_media_query_id = ctx->media_query_id_counter;
893
+ ctx->media_query_id_counter++;
669
894
 
670
895
  p = media_query_end + 1; // Skip {
671
896
 
@@ -676,8 +901,58 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
676
901
 
677
902
  if (p < end) p++; // Skip }
678
903
 
679
- // Combine media queries: parent + child
680
- VALUE combined_media_sym = combine_media_queries(parent_media_sym, media_sym);
904
+ // Handle combining media queries when parent has media too
905
+ int combined_media_query_id = nested_media_query_id;
906
+ if (parent_media_query_id >= 0) {
907
+ // Get parent MediaQuery
908
+ VALUE parent_mq = rb_ary_entry(ctx->media_queries, parent_media_query_id);
909
+
910
+ // This should never happen - parent_media_query_id should always be valid
911
+ if (NIL_P(parent_mq)) {
912
+ rb_raise(eParserError,
913
+ "Invalid parent_media_query_id: %d (not found in media_queries array)",
914
+ parent_media_query_id);
915
+ }
916
+
917
+ VALUE parent_type = rb_struct_aref(parent_mq, INT2FIX(1)); // type field
918
+ VALUE parent_conditions = rb_struct_aref(parent_mq, INT2FIX(2)); // conditions field
919
+
920
+ // Combine: parent conditions + " and " + child conditions
921
+ VALUE combined_conditions;
922
+ if (!NIL_P(parent_conditions) && !NIL_P(media_conditions)) {
923
+ combined_conditions = rb_str_new_cstr("");
924
+ rb_str_append(combined_conditions, parent_conditions);
925
+ rb_str_cat2(combined_conditions, " and ");
926
+ rb_str_append(combined_conditions, media_conditions);
927
+ } else if (!NIL_P(parent_conditions)) {
928
+ combined_conditions = parent_conditions;
929
+ } else {
930
+ combined_conditions = media_conditions;
931
+ }
932
+
933
+ // Determine combined type (if parent is :all, use child type; if child is :all, use parent type; if both have types, use parent type)
934
+ VALUE combined_type;
935
+ ID all_id = rb_intern("all");
936
+ if (SYM2ID(parent_type) == all_id) {
937
+ combined_type = media_type;
938
+ } else {
939
+ combined_type = parent_type;
940
+ }
941
+
942
+ // Create combined MediaQuery
943
+ VALUE combined_mq = rb_struct_new(cMediaQuery,
944
+ INT2FIX(ctx->media_query_id_counter),
945
+ combined_type,
946
+ combined_conditions
947
+ );
948
+ rb_ary_push(ctx->media_queries, combined_mq);
949
+ combined_media_query_id = ctx->media_query_id_counter;
950
+ ctx->media_query_id_counter++;
951
+
952
+ // Guard combined_conditions since we built it with rb_str_new_cstr/rb_str_append
953
+ // and it's used in rb_struct_new above (rb_ary_push could trigger GC)
954
+ RB_GC_GUARD(combined_conditions);
955
+ }
681
956
 
682
957
  // Parse the block with parse_mixed_block to support further nesting
683
958
  // Create a rule ID for this media rule
@@ -690,10 +965,11 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
690
965
  // Parse mixed block (may contain declarations and/or nested @media)
691
966
  ctx->depth++;
692
967
  VALUE media_declarations = parse_mixed_block(ctx, media_block_start, media_block_end,
693
- parent_selector, INT2FIX(media_rule_id), combined_media_sym);
968
+ parent_selector, INT2FIX(media_rule_id), Qnil, combined_media_query_id);
694
969
  ctx->depth--;
695
970
 
696
971
  // Create rule with the parent selector and declarations, associated with combined media query
972
+ VALUE media_query_id_val = INT2FIX(combined_media_query_id);
697
973
  VALUE rule = rb_struct_new(cRule,
698
974
  INT2FIX(media_rule_id),
699
975
  parent_selector,
@@ -701,7 +977,8 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
701
977
  Qnil, // specificity
702
978
  parent_rule_id, // Link to parent for nested @media serialization
703
979
  Qnil, // nesting_style (nil for @media nesting)
704
- Qnil // selector_list_id
980
+ Qnil, // selector_list_id
981
+ media_query_id_val // media_query_id from parent context
705
982
  );
706
983
 
707
984
  // Mark that we have nesting (only set once)
@@ -711,7 +988,13 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
711
988
 
712
989
  // Replace placeholder with actual rule
713
990
  rb_ary_store(ctx->rules_array, parent_pos, rule);
714
- update_media_index(ctx, combined_media_sym, media_rule_id);
991
+
992
+ // Update media_index using the MediaQuery's type symbol
993
+ VALUE combined_mq = rb_ary_entry(ctx->media_queries, combined_media_query_id);
994
+ if (!NIL_P(combined_mq)) {
995
+ VALUE mq_type = rb_struct_aref(combined_mq, INT2FIX(1)); // type field
996
+ update_media_index(ctx, mq_type, media_rule_id);
997
+ }
715
998
 
716
999
  continue;
717
1000
  }
@@ -770,13 +1053,18 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
770
1053
  // Get rule ID
771
1054
  int rule_id = ctx->rule_id_counter++;
772
1055
 
1056
+ // Reserve position in rules array (ensures sequential IDs match array indices)
1057
+ long rule_position = RARRAY_LEN(ctx->rules_array);
1058
+ rb_ary_push(ctx->rules_array, Qnil); // Placeholder
1059
+
773
1060
  // Recursively parse nested block
774
1061
  ctx->depth++;
775
1062
  VALUE nested_declarations = parse_mixed_block(ctx, nested_block_start, nested_block_end,
776
- resolved_selector, INT2FIX(rule_id), parent_media_sym);
1063
+ resolved_selector, INT2FIX(rule_id), parent_media_sym, parent_media_query_id);
777
1064
  ctx->depth--;
778
1065
 
779
1066
  // Create rule for nested selector
1067
+ VALUE media_query_id_val = (parent_media_query_id >= 0) ? INT2FIX(parent_media_query_id) : Qnil;
780
1068
  VALUE rule = rb_struct_new(cRule,
781
1069
  INT2FIX(rule_id),
782
1070
  resolved_selector,
@@ -784,7 +1072,8 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
784
1072
  Qnil, // specificity
785
1073
  parent_rule_id,
786
1074
  nesting_style,
787
- Qnil // selector_list_id
1075
+ Qnil, // selector_list_id
1076
+ media_query_id_val // media_query_id from parent context
788
1077
  );
789
1078
 
790
1079
  // Mark that we have nesting (only set once)
@@ -792,7 +1081,8 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
792
1081
  ctx->has_nesting = 1;
793
1082
  }
794
1083
 
795
- rb_ary_push(ctx->rules_array, rule);
1084
+ // Replace placeholder with actual rule
1085
+ rb_ary_store(ctx->rules_array, rule_position, rule);
796
1086
  update_media_index(ctx, parent_media_sym, rule_id);
797
1087
  }
798
1088
 
@@ -874,6 +1164,11 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
874
1164
  }
875
1165
  VALUE value = rb_utf8_str_new(val_start, val_len);
876
1166
 
1167
+ // Convert relative URLs to absolute if enabled
1168
+ if (ctx->absolute_paths && !NIL_P(ctx->base_uri)) {
1169
+ value = convert_urls_in_value(value, ctx->base_uri, ctx->uri_resolver);
1170
+ }
1171
+
877
1172
  VALUE decl = rb_struct_new(cDeclaration,
878
1173
  property,
879
1174
  value,
@@ -956,6 +1251,7 @@ static void parse_import_statement(ParserContext *ctx, const char **p_ptr, const
956
1251
 
957
1252
  // Check for optional media query (everything until semicolon)
958
1253
  VALUE media = Qnil;
1254
+ VALUE media_query_id_val = Qnil;
959
1255
  if (p < pe && *p != ';') {
960
1256
  const char *media_start = p;
961
1257
 
@@ -970,8 +1266,73 @@ static void parse_import_statement(ParserContext *ctx, const char **p_ptr, const
970
1266
  }
971
1267
 
972
1268
  if (media_end > media_start) {
973
- VALUE media_str = rb_utf8_str_new(media_start, media_end - media_start);
974
- media = ID2SYM(rb_intern_str(media_str));
1269
+ // media field should be a String, not a Symbol
1270
+ media = rb_utf8_str_new(media_start, media_end - media_start);
1271
+
1272
+ // Split comma-separated media queries (same as @media blocks)
1273
+ VALUE media_query_ids = rb_ary_new();
1274
+
1275
+ const char *query_start = media_start;
1276
+ for (const char *p_comma = media_start; p_comma <= media_end; p_comma++) {
1277
+ if (p_comma == media_end || *p_comma == ',') {
1278
+ const char *query_end = p_comma;
1279
+
1280
+ // Trim whitespace from this query
1281
+ while (query_start < query_end && IS_WHITESPACE(*query_start)) query_start++;
1282
+ while (query_end > query_start && IS_WHITESPACE(*(query_end - 1))) query_end--;
1283
+
1284
+ if (query_start < query_end) {
1285
+ // Parse this individual media query
1286
+ const char *mq_ptr = query_start;
1287
+ VALUE media_type;
1288
+ VALUE media_conditions = Qnil;
1289
+
1290
+ if (*mq_ptr == '(') {
1291
+ // Starts with '(' - just conditions, type defaults to :all
1292
+ media_type = ID2SYM(rb_intern("all"));
1293
+ media_conditions = rb_utf8_str_new(mq_ptr, query_end - mq_ptr);
1294
+ } else {
1295
+ // Extract media type (first word)
1296
+ const char *type_start = mq_ptr;
1297
+ while (mq_ptr < query_end && !IS_WHITESPACE(*mq_ptr) && *mq_ptr != '(') mq_ptr++;
1298
+ VALUE type_str = rb_utf8_str_new(type_start, mq_ptr - type_start);
1299
+ media_type = ID2SYM(rb_intern_str(type_str));
1300
+
1301
+ // Skip whitespace
1302
+ while (mq_ptr < query_end && IS_WHITESPACE(*mq_ptr)) mq_ptr++;
1303
+
1304
+ // Check if there are conditions (rest of string)
1305
+ if (mq_ptr < query_end) {
1306
+ media_conditions = rb_utf8_str_new(mq_ptr, query_end - mq_ptr);
1307
+ }
1308
+ }
1309
+
1310
+ // Create MediaQuery struct
1311
+ VALUE media_query = rb_struct_new(cMediaQuery,
1312
+ INT2FIX(ctx->media_query_id_counter),
1313
+ media_type,
1314
+ media_conditions
1315
+ );
1316
+
1317
+ rb_ary_push(ctx->media_queries, media_query);
1318
+ rb_ary_push(media_query_ids, INT2FIX(ctx->media_query_id_counter));
1319
+ ctx->media_query_id_counter++;
1320
+ }
1321
+
1322
+ // Move to start of next query
1323
+ query_start = p_comma + 1;
1324
+ }
1325
+ }
1326
+
1327
+ // If multiple queries, track them as a list
1328
+ if (RARRAY_LEN(media_query_ids) > 1) {
1329
+ int media_query_list_id = ctx->next_media_query_list_id;
1330
+ rb_hash_aset(ctx->media_query_lists, INT2FIX(media_query_list_id), media_query_ids);
1331
+ ctx->next_media_query_list_id++;
1332
+ }
1333
+
1334
+ // Use first query ID for the import statement
1335
+ media_query_id_val = rb_ary_entry(media_query_ids, 0);
975
1336
  }
976
1337
  }
977
1338
 
@@ -983,12 +1344,13 @@ static void parse_import_statement(ParserContext *ctx, const char **p_ptr, const
983
1344
  INT2FIX(ctx->rule_id_counter),
984
1345
  url,
985
1346
  media,
1347
+ media_query_id_val,
986
1348
  Qfalse);
987
1349
 
988
1350
  DEBUG_PRINTF("[IMPORT_STMT] Created import: id=%d, url=%s, media=%s\n",
989
1351
  ctx->rule_id_counter,
990
1352
  RSTRING_PTR(url),
991
- NIL_P(media) ? "nil" : RSTRING_PTR(rb_sym2str(media)));
1353
+ NIL_P(media) ? "nil" : RSTRING_PTR(media));
992
1354
 
993
1355
  rb_ary_push(ctx->imports_array, import_stmt);
994
1356
  ctx->rule_id_counter++;
@@ -1008,7 +1370,7 @@ static void parse_import_statement(ParserContext *ctx, const char **p_ptr, const
1008
1370
  * parent_rule_id: Parent rule ID (Fixnum) for nested rules (or Qnil for top-level)
1009
1371
  */
1010
1372
  static void parse_css_recursive(ParserContext *ctx, const char *css, const char *pe,
1011
- VALUE parent_media_sym, VALUE parent_selector, VALUE parent_rule_id) {
1373
+ VALUE parent_media_sym, VALUE parent_selector, VALUE parent_rule_id, int parent_media_query_id) {
1012
1374
  // Check recursion depth to prevent stack overflow
1013
1375
  if (ctx->depth > MAX_PARSE_DEPTH) {
1014
1376
  rb_raise(eDepthError,
@@ -1074,10 +1436,110 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1074
1436
  continue; // Malformed
1075
1437
  }
1076
1438
 
1077
- // Intern media query
1078
- VALUE child_media_sym = intern_media_query_safe(ctx, mq_start, mq_end - mq_start);
1439
+ // Split comma-separated media queries (e.g., "screen, print" -> ["screen", "print"])
1440
+ // Per W3C spec, comma acts as logical OR - each query is independent
1441
+ VALUE media_query_ids = rb_ary_new();
1442
+
1443
+ const char *query_start = mq_start;
1444
+ for (const char *p_comma = mq_start; p_comma <= mq_end; p_comma++) {
1445
+ if (p_comma == mq_end || *p_comma == ',') {
1446
+ const char *query_end = p_comma;
1447
+
1448
+ // Trim whitespace from this query
1449
+ while (query_start < query_end && IS_WHITESPACE(*query_start)) query_start++;
1450
+ while (query_end > query_start && IS_WHITESPACE(*(query_end - 1))) query_end--;
1451
+
1452
+ if (query_start < query_end) {
1453
+ // Parse this individual media query
1454
+ const char *mq_ptr = query_start;
1455
+ VALUE media_type;
1456
+ VALUE media_conditions = Qnil;
1457
+
1458
+ if (*mq_ptr == '(') {
1459
+ // Starts with '(' - just conditions, type defaults to :all
1460
+ media_type = ID2SYM(rb_intern("all"));
1461
+ media_conditions = rb_utf8_str_new(mq_ptr, query_end - mq_ptr);
1462
+ } else {
1463
+ // Extract media type (first word, stopping at whitespace, comma, or '(')
1464
+ const char *type_start = mq_ptr;
1465
+ while (mq_ptr < query_end && !IS_WHITESPACE(*mq_ptr) && *mq_ptr != '(') mq_ptr++;
1466
+ VALUE type_str = rb_utf8_str_new(type_start, mq_ptr - type_start);
1467
+ media_type = ID2SYM(rb_intern_str(type_str));
1468
+
1469
+ // Skip whitespace and "and" keyword if present
1470
+ while (mq_ptr < query_end && IS_WHITESPACE(*mq_ptr)) mq_ptr++;
1471
+ if (mq_ptr + 3 <= query_end && strncmp(mq_ptr, "and", 3) == 0) {
1472
+ mq_ptr += 3;
1473
+ while (mq_ptr < query_end && IS_WHITESPACE(*mq_ptr)) mq_ptr++;
1474
+ }
1475
+
1476
+ // Rest is conditions
1477
+ if (mq_ptr < query_end) {
1478
+ media_conditions = rb_utf8_str_new(mq_ptr, query_end - mq_ptr);
1479
+ }
1480
+ }
1481
+
1482
+ // Create MediaQuery object for this query
1483
+ VALUE media_query = rb_struct_new(cMediaQuery,
1484
+ INT2FIX(ctx->media_query_id_counter),
1485
+ media_type,
1486
+ media_conditions
1487
+ );
1488
+ rb_ary_push(ctx->media_queries, media_query);
1489
+ rb_ary_push(media_query_ids, INT2FIX(ctx->media_query_id_counter));
1490
+ ctx->media_query_id_counter++;
1491
+ }
1492
+
1493
+ // Move to start of next query
1494
+ query_start = p_comma + 1;
1495
+ }
1496
+ }
1079
1497
 
1080
- // Combine with parent
1498
+ // If multiple queries, track them as a list for serialization
1499
+ int media_query_list_id = -1;
1500
+ if (RARRAY_LEN(media_query_ids) > 1) {
1501
+ media_query_list_id = ctx->next_media_query_list_id;
1502
+ rb_hash_aset(ctx->media_query_lists, INT2FIX(media_query_list_id), media_query_ids);
1503
+ ctx->next_media_query_list_id++;
1504
+ }
1505
+
1506
+ // Use first query ID as the primary one for rules in this block
1507
+ int current_media_query_id = FIX2INT(rb_ary_entry(media_query_ids, 0));
1508
+
1509
+ // Handle nested @media by combining with parent
1510
+ if (parent_media_query_id >= 0) {
1511
+ VALUE parent_mq = rb_ary_entry(ctx->media_queries, parent_media_query_id);
1512
+ VALUE parent_type = rb_struct_aref(parent_mq, INT2FIX(1)); // type field
1513
+ VALUE parent_conditions = rb_struct_aref(parent_mq, INT2FIX(2)); // conditions field
1514
+
1515
+ // Get child media query (first one in the list)
1516
+ VALUE child_mq = rb_ary_entry(ctx->media_queries, current_media_query_id);
1517
+ VALUE child_conditions = rb_struct_aref(child_mq, INT2FIX(2)); // conditions field
1518
+
1519
+ // Combined type is parent's type (outermost wins, child type ignored)
1520
+ VALUE combined_type = parent_type;
1521
+ VALUE combined_conditions;
1522
+
1523
+ if (!NIL_P(parent_conditions) && !NIL_P(child_conditions)) {
1524
+ combined_conditions = rb_sprintf("%"PRIsVALUE" and %"PRIsVALUE, parent_conditions, child_conditions);
1525
+ } else if (!NIL_P(parent_conditions)) {
1526
+ combined_conditions = parent_conditions;
1527
+ } else {
1528
+ combined_conditions = child_conditions;
1529
+ }
1530
+
1531
+ VALUE combined_mq = rb_struct_new(cMediaQuery,
1532
+ INT2FIX(ctx->media_query_id_counter),
1533
+ combined_type,
1534
+ combined_conditions
1535
+ );
1536
+ rb_ary_push(ctx->media_queries, combined_mq);
1537
+ current_media_query_id = ctx->media_query_id_counter;
1538
+ ctx->media_query_id_counter++;
1539
+ }
1540
+
1541
+ // For backwards compat, also create symbol (will be removed later)
1542
+ VALUE child_media_sym = intern_media_query_safe(ctx, mq_start, mq_end - mq_start);
1081
1543
  VALUE combined_media_sym = combine_media_queries(parent_media_sym, child_media_sym);
1082
1544
 
1083
1545
  p++; // Skip opening {
@@ -1087,9 +1549,9 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1087
1549
  const char *block_end = find_matching_brace(p, pe);
1088
1550
  p = block_end;
1089
1551
 
1090
- // Recursively parse @media block with combined media context
1552
+ // Recursively parse @media block with new media query context
1091
1553
  ctx->depth++;
1092
- parse_css_recursive(ctx, block_start, block_end, combined_media_sym, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
1554
+ parse_css_recursive(ctx, block_start, block_end, combined_media_sym, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID, current_media_query_id);
1093
1555
  ctx->depth--;
1094
1556
 
1095
1557
  if (p < pe && *p == '}') p++;
@@ -1135,7 +1597,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1135
1597
 
1136
1598
  // Recursively parse block content (preserve parent media context)
1137
1599
  ctx->depth++;
1138
- parse_css_recursive(ctx, block_start, block_end, parent_media_sym, parent_selector, parent_rule_id);
1600
+ parse_css_recursive(ctx, block_start, block_end, parent_media_sym, parent_selector, parent_rule_id, parent_media_query_id);
1139
1601
  ctx->depth--;
1140
1602
 
1141
1603
  if (p < pe && *p == '}') p++;
@@ -1186,7 +1648,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1186
1648
  .selector_lists_enabled = ctx->selector_lists_enabled,
1187
1649
  .depth = 0
1188
1650
  };
1189
- parse_css_recursive(&nested_ctx, block_start, block_end, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
1651
+ parse_css_recursive(&nested_ctx, block_start, block_end, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID, NO_MEDIA_QUERY_ID);
1190
1652
 
1191
1653
  // Get rule ID and increment
1192
1654
  int rule_id = ctx->rule_id_counter++;
@@ -1196,7 +1658,9 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1196
1658
  INT2FIX(rule_id),
1197
1659
  selector,
1198
1660
  nested_ctx.rules_array, // Array of Rule (keyframe blocks)
1199
- Qnil);
1661
+ Qnil, // specificity
1662
+ Qnil // media_query_id
1663
+ );
1200
1664
 
1201
1665
  // Add to rules array
1202
1666
  rb_ary_push(ctx->rules_array, at_rule);
@@ -1242,7 +1706,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1242
1706
  p = decl_end;
1243
1707
 
1244
1708
  // Parse declarations
1245
- VALUE declarations = parse_declarations(decl_start, decl_end);
1709
+ VALUE declarations = parse_declarations(decl_start, decl_end, ctx);
1246
1710
 
1247
1711
  // Get rule ID and increment
1248
1712
  int rule_id = ctx->rule_id_counter++;
@@ -1252,7 +1716,9 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1252
1716
  INT2FIX(rule_id),
1253
1717
  selector,
1254
1718
  declarations, // Array of Declaration
1255
- Qnil);
1719
+ Qnil, // specificity
1720
+ Qnil // media_query_id
1721
+ );
1256
1722
 
1257
1723
  // Add to rules array
1258
1724
  rb_ary_push(ctx->rules_array, at_rule);
@@ -1299,7 +1765,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1299
1765
 
1300
1766
  if (!has_nesting) {
1301
1767
  // FAST PATH: No nesting - parse as pure declarations
1302
- VALUE declarations = parse_declarations(decl_start, p);
1768
+ VALUE declarations = parse_declarations(decl_start, p, ctx);
1303
1769
 
1304
1770
  // Split on commas to handle multi-selector rules
1305
1771
  // Example: ".a, .b, .c { color: red; }" creates 3 separate rules
@@ -1390,6 +1856,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1390
1856
  }
1391
1857
 
1392
1858
  // Create Rule
1859
+ VALUE media_query_id_val = (parent_media_query_id >= 0) ? INT2FIX(parent_media_query_id) : Qnil;
1393
1860
  VALUE rule = rb_struct_new(cRule,
1394
1861
  INT2FIX(rule_id),
1395
1862
  resolved_selector,
@@ -1397,7 +1864,8 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1397
1864
  Qnil, // specificity
1398
1865
  parent_id_val,
1399
1866
  nesting_style_val,
1400
- selector_list_id_val
1867
+ selector_list_id_val,
1868
+ media_query_id_val // media_query_id from parent context
1401
1869
  );
1402
1870
 
1403
1871
  // Track rule in selector list if applicable
@@ -1499,7 +1967,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1499
1967
  // Nested rules will be added AFTER the placeholder
1500
1968
  ctx->depth++;
1501
1969
  VALUE parent_declarations = parse_mixed_block(ctx, decl_start, p,
1502
- resolved_current, INT2FIX(current_rule_id), parent_media_sym);
1970
+ resolved_current, INT2FIX(current_rule_id), parent_media_sym, parent_media_query_id);
1503
1971
  ctx->depth--;
1504
1972
 
1505
1973
  // Determine selector_list_id value
@@ -1507,6 +1975,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1507
1975
 
1508
1976
  // Create parent rule and replace placeholder
1509
1977
  // Always create the rule (even if empty) to avoid edge cases
1978
+ VALUE media_query_id_val = (parent_media_query_id >= 0) ? INT2FIX(parent_media_query_id) : Qnil;
1510
1979
  VALUE rule = rb_struct_new(cRule,
1511
1980
  INT2FIX(current_rule_id),
1512
1981
  resolved_current,
@@ -1514,7 +1983,8 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1514
1983
  Qnil, // specificity
1515
1984
  current_parent_id,
1516
1985
  current_nesting_style,
1517
- selector_list_id_val
1986
+ selector_list_id_val,
1987
+ media_query_id_val // media_query_id from parent context
1518
1988
  );
1519
1989
 
1520
1990
  // Track rule in selector list if applicable
@@ -1588,6 +2058,12 @@ VALUE parse_css_new_impl(VALUE css_string, VALUE parser_options, int rule_id_off
1588
2058
  VALUE selector_lists_opt = rb_hash_aref(parser_options, ID2SYM(rb_intern("selector_lists")));
1589
2059
  int selector_lists_enabled = (NIL_P(selector_lists_opt) || RTEST(selector_lists_opt)) ? 1 : 0;
1590
2060
 
2061
+ // URL conversion options
2062
+ VALUE base_uri = rb_hash_aref(parser_options, ID2SYM(rb_intern("base_uri")));
2063
+ VALUE absolute_paths_opt = rb_hash_aref(parser_options, ID2SYM(rb_intern("absolute_paths")));
2064
+ VALUE uri_resolver = rb_hash_aref(parser_options, ID2SYM(rb_intern("uri_resolver")));
2065
+ int absolute_paths = RTEST(absolute_paths_opt) ? 1 : 0;
2066
+
1591
2067
  const char *css = RSTRING_PTR(css_string);
1592
2068
  const char *pe = css + RSTRING_LEN(css_string);
1593
2069
  const char *p = css;
@@ -1624,23 +2100,33 @@ VALUE parse_css_new_impl(VALUE css_string, VALUE parser_options, int rule_id_off
1624
2100
  ctx.media_index = rb_hash_new();
1625
2101
  ctx.selector_lists = rb_hash_new();
1626
2102
  ctx.imports_array = rb_ary_new();
2103
+ ctx.media_queries = rb_ary_new();
2104
+ ctx.media_query_lists = rb_hash_new();
1627
2105
  ctx.rule_id_counter = rule_id_offset; // Start from offset
1628
2106
  ctx.next_selector_list_id = 0; // Start from 0
2107
+ ctx.media_query_id_counter = 0; // Start from 0
2108
+ ctx.next_media_query_list_id = 0; // Start from 0
1629
2109
  ctx.media_query_count = 0;
1630
2110
  ctx.media_cache = NULL; // Removed - no perf benefit
1631
2111
  ctx.has_nesting = 0; // Will be set to 1 if any nested rules are created
1632
2112
  ctx.selector_lists_enabled = selector_lists_enabled;
1633
2113
  ctx.depth = 0; // Start at depth 0
2114
+ // URL conversion options
2115
+ ctx.base_uri = base_uri;
2116
+ ctx.uri_resolver = uri_resolver;
2117
+ ctx.absolute_paths = absolute_paths;
1634
2118
 
1635
2119
  // Parse CSS (top-level, no parent context)
1636
2120
  DEBUG_PRINTF("[PARSE] Starting parse_css_recursive from: %.80s\n", p);
1637
- parse_css_recursive(&ctx, p, pe, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
2121
+ parse_css_recursive(&ctx, p, pe, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID, NO_MEDIA_QUERY_ID);
1638
2122
 
1639
2123
  // Build result hash
1640
2124
  VALUE result = rb_hash_new();
1641
2125
  rb_hash_aset(result, ID2SYM(rb_intern("rules")), ctx.rules_array);
1642
2126
  rb_hash_aset(result, ID2SYM(rb_intern("_media_index")), ctx.media_index);
2127
+ rb_hash_aset(result, ID2SYM(rb_intern("media_queries")), ctx.media_queries);
1643
2128
  rb_hash_aset(result, ID2SYM(rb_intern("_selector_lists")), ctx.selector_lists);
2129
+ rb_hash_aset(result, ID2SYM(rb_intern("_media_query_lists")), ctx.media_query_lists);
1644
2130
  rb_hash_aset(result, ID2SYM(rb_intern("imports")), ctx.imports_array);
1645
2131
  rb_hash_aset(result, ID2SYM(rb_intern("charset")), charset);
1646
2132
  rb_hash_aset(result, ID2SYM(rb_intern("last_rule_id")), INT2FIX(ctx.rule_id_counter));
@@ -1649,8 +2135,12 @@ VALUE parse_css_new_impl(VALUE css_string, VALUE parser_options, int rule_id_off
1649
2135
  RB_GC_GUARD(charset);
1650
2136
  RB_GC_GUARD(ctx.rules_array);
1651
2137
  RB_GC_GUARD(ctx.media_index);
2138
+ RB_GC_GUARD(ctx.media_queries);
1652
2139
  RB_GC_GUARD(ctx.selector_lists);
2140
+ RB_GC_GUARD(ctx.media_query_lists);
1653
2141
  RB_GC_GUARD(ctx.imports_array);
2142
+ RB_GC_GUARD(ctx.base_uri);
2143
+ RB_GC_GUARD(ctx.uri_resolver);
1654
2144
  RB_GC_GUARD(result);
1655
2145
 
1656
2146
  return result;