cataract 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,7 @@
17
17
  typedef struct {
18
18
  VALUE rules_array; // Array of Rule structs
19
19
  VALUE media_index; // Hash: Symbol => Array of rule IDs
20
+ VALUE imports_array; // Array of ImportStatement structs
20
21
  int rule_id_counter; // Next rule ID (0-indexed)
21
22
  int media_query_count; // Safety limit for media queries
22
23
  st_table *media_cache; // Parse-time cache: string => parsed media types
@@ -868,6 +869,119 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
868
869
  return declarations;
869
870
  }
870
871
 
872
+ /*
873
+ * Parse @import statement
874
+ * @import "url" [media-query];
875
+ * @import url("url") [media-query];
876
+ *
877
+ * Modifies ctx->imports_array and ctx->rule_id_counter
878
+ */
879
+ static void parse_import_statement(ParserContext *ctx, const char **p_ptr, const char *pe) {
880
+ const char *p = *p_ptr;
881
+
882
+ DEBUG_PRINTF("[IMPORT_STMT] Starting parse, input: %.50s\n", p);
883
+
884
+ // Skip whitespace
885
+ while (p < pe && IS_WHITESPACE(*p)) p++;
886
+
887
+ // Check for optional url(
888
+ int has_url_function = 0;
889
+ if (p + 4 <= pe && strncmp(p, "url(", 4) == 0) {
890
+ has_url_function = 1;
891
+ p += 4;
892
+
893
+ // Skip whitespace after url(
894
+ while (p < pe && IS_WHITESPACE(*p)) p++;
895
+ }
896
+
897
+ // Find opening quote
898
+ if (p >= pe || (*p != '"' && *p != '\'')) {
899
+ // Invalid @import, skip to semicolon
900
+ while (p < pe && *p != ';') p++;
901
+ if (p < pe) p++;
902
+ *p_ptr = p;
903
+ return;
904
+ }
905
+
906
+ char quote_char = *p;
907
+ p++; // Skip opening quote
908
+
909
+ const char *url_start = p;
910
+
911
+ // Find closing quote (handle escaped quotes)
912
+ while (p < pe && *p != quote_char) {
913
+ if (*p == '\\' && p + 1 < pe) {
914
+ p += 2; // Skip escaped character
915
+ } else {
916
+ p++;
917
+ }
918
+ }
919
+
920
+ if (p >= pe) {
921
+ // Unterminated string
922
+ *p_ptr = p;
923
+ return;
924
+ }
925
+
926
+ long url_len = p - url_start;
927
+ VALUE url = rb_utf8_str_new(url_start, url_len);
928
+ p++; // Skip closing quote
929
+
930
+ // Skip closing paren if we had url(
931
+ if (has_url_function) {
932
+ while (p < pe && IS_WHITESPACE(*p)) p++;
933
+ if (p < pe && *p == ')') p++;
934
+ }
935
+
936
+ // Skip whitespace
937
+ while (p < pe && IS_WHITESPACE(*p)) p++;
938
+
939
+ // Check for optional media query (everything until semicolon)
940
+ VALUE media = Qnil;
941
+ if (p < pe && *p != ';') {
942
+ const char *media_start = p;
943
+
944
+ // Find semicolon
945
+ while (p < pe && *p != ';') p++;
946
+
947
+ const char *media_end = p;
948
+
949
+ // Trim trailing whitespace from media query
950
+ while (media_end > media_start && IS_WHITESPACE(*(media_end - 1))) {
951
+ media_end--;
952
+ }
953
+
954
+ if (media_end > media_start) {
955
+ VALUE media_str = rb_utf8_str_new(media_start, media_end - media_start);
956
+ media = ID2SYM(rb_intern_str(media_str));
957
+ }
958
+ }
959
+
960
+ // Skip semicolon
961
+ if (p < pe && *p == ';') p++;
962
+
963
+ // Create ImportStatement (resolved: false by default)
964
+ VALUE import_stmt = rb_struct_new(cImportStatement,
965
+ INT2FIX(ctx->rule_id_counter),
966
+ url,
967
+ media,
968
+ Qfalse);
969
+
970
+ DEBUG_PRINTF("[IMPORT_STMT] Created import: id=%d, url=%s, media=%s\n",
971
+ ctx->rule_id_counter,
972
+ RSTRING_PTR(url),
973
+ NIL_P(media) ? "nil" : RSTRING_PTR(rb_sym2str(media)));
974
+
975
+ rb_ary_push(ctx->imports_array, import_stmt);
976
+ ctx->rule_id_counter++;
977
+
978
+ *p_ptr = p;
979
+
980
+ RB_GC_GUARD(url);
981
+ RB_GC_GUARD(media);
982
+ RB_GC_GUARD(import_stmt);
983
+ }
984
+
871
985
  /*
872
986
  * Parse CSS recursively with media query context and optional parent selector for nesting
873
987
  *
@@ -898,6 +1012,30 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
898
1012
  // Skip comments (rare in typical CSS)
899
1013
  SKIP_COMMENT(p, pe);
900
1014
 
1015
+ // Hail mary ...
1016
+ // DEBUG_PRINTF("[LOOP] At position, char='%c' (0x%02x), brace_depth=%d, next 20 chars: %.20s\n",
1017
+ // *p >= 32 && *p <= 126 ? *p : '?', (unsigned char)*p, brace_depth, p);
1018
+
1019
+ // Check for @import at-rule (only at top level, before any rules)
1020
+ if (RB_UNLIKELY(brace_depth == 0 && p + 7 < pe && *p == '@' &&
1021
+ strncmp(p + 1, "import", 6) == 0 && IS_WHITESPACE(p[7]))) {
1022
+ DEBUG_PRINTF("[IMPORT] Found @import at position, rules_count=%ld\n", RARRAY_LEN(ctx->rules_array));
1023
+ // Check if we've already seen a rule
1024
+ if (RARRAY_LEN(ctx->rules_array) > 0) {
1025
+ // Warn and skip - @import must come before rules
1026
+ rb_warn("CSS @import ignored: @import must appear before all rules (found import after rules)");
1027
+ // Skip to semicolon
1028
+ while (p < pe && *p != ';') p++;
1029
+ if (p < pe) p++;
1030
+ continue;
1031
+ }
1032
+
1033
+ p += 7; // Skip "@import "
1034
+ parse_import_statement(ctx, &p, pe);
1035
+ DEBUG_PRINTF("[IMPORT] After parsing, imports_count=%ld\n", RARRAY_LEN(ctx->imports_array));
1036
+ continue;
1037
+ }
1038
+
901
1039
  // Check for @media at-rule (only at depth 0)
902
1040
  if (RB_UNLIKELY(brace_depth == 0 && p + 6 < pe && *p == '@' &&
903
1041
  strncmp(p + 1, "media", 5) == 0 && IS_WHITESPACE(p[6]))) {
@@ -1306,6 +1444,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1306
1444
  // Start of selector
1307
1445
  if (brace_depth == 0 && selector_start == NULL) {
1308
1446
  selector_start = p;
1447
+ DEBUG_PRINTF("[SELECTOR] Starting selector at: %.50s\n", selector_start);
1309
1448
  }
1310
1449
 
1311
1450
  p++;
@@ -1337,6 +1476,9 @@ VALUE parse_media_types(VALUE self, VALUE media_query_sym) {
1337
1476
  VALUE parse_css_new_impl(VALUE css_string, int rule_id_offset) {
1338
1477
  Check_Type(css_string, T_STRING);
1339
1478
 
1479
+ DEBUG_PRINTF("\n[PARSE_NEW] ========== NEW PARSE CALL ==========\n");
1480
+ DEBUG_PRINTF("[PARSE_NEW] Input CSS (first 100 chars): %.100s\n", RSTRING_PTR(css_string));
1481
+
1340
1482
  const char *css = RSTRING_PTR(css_string);
1341
1483
  const char *pe = css + RSTRING_LEN(css_string);
1342
1484
  const char *p = css;
@@ -1345,59 +1487,33 @@ VALUE parse_css_new_impl(VALUE css_string, int rule_id_offset) {
1345
1487
 
1346
1488
  // Extract @charset
1347
1489
  if (RSTRING_LEN(css_string) > 10 && strncmp(css, "@charset ", 9) == 0) {
1490
+ DEBUG_PRINTF("[CHARSET] Found @charset at start\n");
1348
1491
  char *quote_start = strchr(css + 9, '"');
1349
1492
  if (quote_start != NULL) {
1350
1493
  char *quote_end = strchr(quote_start + 1, '"');
1351
1494
  if (quote_end != NULL) {
1352
1495
  charset = rb_str_new(quote_start + 1, quote_end - quote_start - 1);
1496
+ DEBUG_PRINTF("[CHARSET] Extracted charset: %s\n", RSTRING_PTR(charset));
1353
1497
  char *semicolon = quote_end + 1;
1354
1498
  while (semicolon < pe && IS_WHITESPACE(*semicolon)) {
1355
1499
  semicolon++;
1356
1500
  }
1357
1501
  if (semicolon < pe && *semicolon == ';') {
1358
1502
  p = semicolon + 1;
1503
+ DEBUG_PRINTF("[CHARSET] Advanced past semicolon, remaining: %.50s\n", p);
1359
1504
  }
1360
1505
  }
1361
1506
  }
1362
1507
  }
1363
1508
 
1364
- // Skip @import statements - they should be handled by ImportResolver at Ruby level
1365
- // Per CSS spec, @import must come before all rules (except @charset)
1366
- while (p < pe) {
1367
- // Skip whitespace
1368
- while (p < pe && IS_WHITESPACE(*p)) p++;
1369
- if (p >= pe) break;
1370
-
1371
- // Skip comments
1372
- if (p + 1 < pe && p[0] == '/' && p[1] == '*') {
1373
- p += 2;
1374
- while (p + 1 < pe) {
1375
- if (p[0] == '*' && p[1] == '/') {
1376
- p += 2;
1377
- break;
1378
- }
1379
- p++;
1380
- }
1381
- continue;
1382
- }
1383
-
1384
- // Check for @import
1385
- if (p + 7 <= pe && *p == '@' && strncasecmp(p + 1, "import", 6) == 0 &&
1386
- (p + 7 >= pe || IS_WHITESPACE(p[7]) || p[7] == '\'' || p[7] == '"')) {
1387
- // Skip to semicolon
1388
- while (p < pe && *p != ';') p++;
1389
- if (p < pe) p++; // Skip semicolon
1390
- continue;
1391
- }
1392
-
1393
- // Hit non-@import content, stop skipping
1394
- break;
1395
- }
1509
+ // @import statements are now handled in parse_css_recursive
1510
+ // They must come before all rules (except @charset) per CSS spec
1396
1511
 
1397
1512
  // Initialize parser context with offset
1398
1513
  ParserContext ctx;
1399
1514
  ctx.rules_array = rb_ary_new();
1400
1515
  ctx.media_index = rb_hash_new();
1516
+ ctx.imports_array = rb_ary_new();
1401
1517
  ctx.rule_id_counter = rule_id_offset; // Start from offset
1402
1518
  ctx.media_query_count = 0;
1403
1519
  ctx.media_cache = NULL; // Removed - no perf benefit
@@ -1405,15 +1521,23 @@ VALUE parse_css_new_impl(VALUE css_string, int rule_id_offset) {
1405
1521
  ctx.depth = 0; // Start at depth 0
1406
1522
 
1407
1523
  // Parse CSS (top-level, no parent context)
1524
+ DEBUG_PRINTF("[PARSE] Starting parse_css_recursive from: %.80s\n", p);
1408
1525
  parse_css_recursive(&ctx, p, pe, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
1409
1526
 
1410
1527
  // Build result hash
1411
1528
  VALUE result = rb_hash_new();
1412
1529
  rb_hash_aset(result, ID2SYM(rb_intern("rules")), ctx.rules_array);
1413
1530
  rb_hash_aset(result, ID2SYM(rb_intern("_media_index")), ctx.media_index);
1531
+ rb_hash_aset(result, ID2SYM(rb_intern("imports")), ctx.imports_array);
1414
1532
  rb_hash_aset(result, ID2SYM(rb_intern("charset")), charset);
1415
1533
  rb_hash_aset(result, ID2SYM(rb_intern("last_rule_id")), INT2FIX(ctx.rule_id_counter));
1416
1534
  rb_hash_aset(result, ID2SYM(rb_intern("_has_nesting")), ctx.has_nesting ? Qtrue : Qfalse);
1417
1535
 
1536
+ RB_GC_GUARD(charset);
1537
+ RB_GC_GUARD(ctx.rules_array);
1538
+ RB_GC_GUARD(ctx.media_index);
1539
+ RB_GC_GUARD(ctx.imports_array);
1540
+ RB_GC_GUARD(result);
1541
+
1418
1542
  return result;
1419
1543
  }
@@ -21,8 +21,8 @@ def config_str_buf_optimization?
21
21
  enable_config('str-buf-optimization', true)
22
22
  end
23
23
 
24
- # Compile main file, parser, merge, and supporting files
25
- $objs = ['cataract.o', 'css_parser.o', 'merge.o', 'shorthand_expander.o', 'specificity.o', 'value_splitter.o',
24
+ # Compile main file, parser, flatten, and supporting files
25
+ $objs = ['cataract.o', 'css_parser.o', 'flatten.o', 'shorthand_expander.o', 'specificity.o', 'value_splitter.o',
26
26
  'import_scanner.o']
27
27
 
28
28
  # Suppress warnings