cataract 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci-manual-rubies.yml +18 -1
- data/.rubocop.yml +36 -6
- data/.rubocop_todo.yml +7 -7
- data/BENCHMARKS.md +30 -30
- data/CHANGELOG.md +10 -0
- data/RAGEL_MIGRATION.md +2 -2
- data/README.md +7 -2
- data/Rakefile +24 -11
- data/cataract.gemspec +1 -1
- data/ext/cataract/cataract.c +12 -3
- data/ext/cataract/cataract.h +5 -3
- data/ext/cataract/css_parser.c +156 -32
- data/ext/cataract/extconf.rb +2 -2
- data/ext/cataract/{merge.c → flatten.c} +520 -468
- data/ext/cataract/shorthand_expander.c +164 -115
- data/lib/cataract/import_resolver.rb +60 -39
- data/lib/cataract/import_statement.rb +49 -0
- data/lib/cataract/pure/{merge.rb → flatten.rb} +39 -40
- data/lib/cataract/pure/imports.rb +13 -0
- data/lib/cataract/pure/parser.rb +108 -4
- data/lib/cataract/pure.rb +32 -9
- data/lib/cataract/rule.rb +51 -6
- data/lib/cataract/stylesheet.rb +343 -41
- data/lib/cataract/version.rb +1 -1
- data/lib/cataract.rb +28 -24
- metadata +4 -3
data/ext/cataract/css_parser.c
CHANGED
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
typedef struct {
|
|
18
18
|
VALUE rules_array; // Array of Rule structs
|
|
19
19
|
VALUE media_index; // Hash: Symbol => Array of rule IDs
|
|
20
|
+
VALUE imports_array; // Array of ImportStatement structs
|
|
20
21
|
int rule_id_counter; // Next rule ID (0-indexed)
|
|
21
22
|
int media_query_count; // Safety limit for media queries
|
|
22
23
|
st_table *media_cache; // Parse-time cache: string => parsed media types
|
|
@@ -868,6 +869,119 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
|
|
|
868
869
|
return declarations;
|
|
869
870
|
}
|
|
870
871
|
|
|
872
|
+
/*
|
|
873
|
+
* Parse @import statement
|
|
874
|
+
* @import "url" [media-query];
|
|
875
|
+
* @import url("url") [media-query];
|
|
876
|
+
*
|
|
877
|
+
* Modifies ctx->imports_array and ctx->rule_id_counter
|
|
878
|
+
*/
|
|
879
|
+
static void parse_import_statement(ParserContext *ctx, const char **p_ptr, const char *pe) {
|
|
880
|
+
const char *p = *p_ptr;
|
|
881
|
+
|
|
882
|
+
DEBUG_PRINTF("[IMPORT_STMT] Starting parse, input: %.50s\n", p);
|
|
883
|
+
|
|
884
|
+
// Skip whitespace
|
|
885
|
+
while (p < pe && IS_WHITESPACE(*p)) p++;
|
|
886
|
+
|
|
887
|
+
// Check for optional url(
|
|
888
|
+
int has_url_function = 0;
|
|
889
|
+
if (p + 4 <= pe && strncmp(p, "url(", 4) == 0) {
|
|
890
|
+
has_url_function = 1;
|
|
891
|
+
p += 4;
|
|
892
|
+
|
|
893
|
+
// Skip whitespace after url(
|
|
894
|
+
while (p < pe && IS_WHITESPACE(*p)) p++;
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
// Find opening quote
|
|
898
|
+
if (p >= pe || (*p != '"' && *p != '\'')) {
|
|
899
|
+
// Invalid @import, skip to semicolon
|
|
900
|
+
while (p < pe && *p != ';') p++;
|
|
901
|
+
if (p < pe) p++;
|
|
902
|
+
*p_ptr = p;
|
|
903
|
+
return;
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
char quote_char = *p;
|
|
907
|
+
p++; // Skip opening quote
|
|
908
|
+
|
|
909
|
+
const char *url_start = p;
|
|
910
|
+
|
|
911
|
+
// Find closing quote (handle escaped quotes)
|
|
912
|
+
while (p < pe && *p != quote_char) {
|
|
913
|
+
if (*p == '\\' && p + 1 < pe) {
|
|
914
|
+
p += 2; // Skip escaped character
|
|
915
|
+
} else {
|
|
916
|
+
p++;
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
if (p >= pe) {
|
|
921
|
+
// Unterminated string
|
|
922
|
+
*p_ptr = p;
|
|
923
|
+
return;
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
long url_len = p - url_start;
|
|
927
|
+
VALUE url = rb_utf8_str_new(url_start, url_len);
|
|
928
|
+
p++; // Skip closing quote
|
|
929
|
+
|
|
930
|
+
// Skip closing paren if we had url(
|
|
931
|
+
if (has_url_function) {
|
|
932
|
+
while (p < pe && IS_WHITESPACE(*p)) p++;
|
|
933
|
+
if (p < pe && *p == ')') p++;
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
// Skip whitespace
|
|
937
|
+
while (p < pe && IS_WHITESPACE(*p)) p++;
|
|
938
|
+
|
|
939
|
+
// Check for optional media query (everything until semicolon)
|
|
940
|
+
VALUE media = Qnil;
|
|
941
|
+
if (p < pe && *p != ';') {
|
|
942
|
+
const char *media_start = p;
|
|
943
|
+
|
|
944
|
+
// Find semicolon
|
|
945
|
+
while (p < pe && *p != ';') p++;
|
|
946
|
+
|
|
947
|
+
const char *media_end = p;
|
|
948
|
+
|
|
949
|
+
// Trim trailing whitespace from media query
|
|
950
|
+
while (media_end > media_start && IS_WHITESPACE(*(media_end - 1))) {
|
|
951
|
+
media_end--;
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
if (media_end > media_start) {
|
|
955
|
+
VALUE media_str = rb_utf8_str_new(media_start, media_end - media_start);
|
|
956
|
+
media = ID2SYM(rb_intern_str(media_str));
|
|
957
|
+
}
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
// Skip semicolon
|
|
961
|
+
if (p < pe && *p == ';') p++;
|
|
962
|
+
|
|
963
|
+
// Create ImportStatement (resolved: false by default)
|
|
964
|
+
VALUE import_stmt = rb_struct_new(cImportStatement,
|
|
965
|
+
INT2FIX(ctx->rule_id_counter),
|
|
966
|
+
url,
|
|
967
|
+
media,
|
|
968
|
+
Qfalse);
|
|
969
|
+
|
|
970
|
+
DEBUG_PRINTF("[IMPORT_STMT] Created import: id=%d, url=%s, media=%s\n",
|
|
971
|
+
ctx->rule_id_counter,
|
|
972
|
+
RSTRING_PTR(url),
|
|
973
|
+
NIL_P(media) ? "nil" : RSTRING_PTR(rb_sym2str(media)));
|
|
974
|
+
|
|
975
|
+
rb_ary_push(ctx->imports_array, import_stmt);
|
|
976
|
+
ctx->rule_id_counter++;
|
|
977
|
+
|
|
978
|
+
*p_ptr = p;
|
|
979
|
+
|
|
980
|
+
RB_GC_GUARD(url);
|
|
981
|
+
RB_GC_GUARD(media);
|
|
982
|
+
RB_GC_GUARD(import_stmt);
|
|
983
|
+
}
|
|
984
|
+
|
|
871
985
|
/*
|
|
872
986
|
* Parse CSS recursively with media query context and optional parent selector for nesting
|
|
873
987
|
*
|
|
@@ -898,6 +1012,30 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
898
1012
|
// Skip comments (rare in typical CSS)
|
|
899
1013
|
SKIP_COMMENT(p, pe);
|
|
900
1014
|
|
|
1015
|
+
// Hail mary ...
|
|
1016
|
+
// DEBUG_PRINTF("[LOOP] At position, char='%c' (0x%02x), brace_depth=%d, next 20 chars: %.20s\n",
|
|
1017
|
+
// *p >= 32 && *p <= 126 ? *p : '?', (unsigned char)*p, brace_depth, p);
|
|
1018
|
+
|
|
1019
|
+
// Check for @import at-rule (only at top level, before any rules)
|
|
1020
|
+
if (RB_UNLIKELY(brace_depth == 0 && p + 7 < pe && *p == '@' &&
|
|
1021
|
+
strncmp(p + 1, "import", 6) == 0 && IS_WHITESPACE(p[7]))) {
|
|
1022
|
+
DEBUG_PRINTF("[IMPORT] Found @import at position, rules_count=%ld\n", RARRAY_LEN(ctx->rules_array));
|
|
1023
|
+
// Check if we've already seen a rule
|
|
1024
|
+
if (RARRAY_LEN(ctx->rules_array) > 0) {
|
|
1025
|
+
// Warn and skip - @import must come before rules
|
|
1026
|
+
rb_warn("CSS @import ignored: @import must appear before all rules (found import after rules)");
|
|
1027
|
+
// Skip to semicolon
|
|
1028
|
+
while (p < pe && *p != ';') p++;
|
|
1029
|
+
if (p < pe) p++;
|
|
1030
|
+
continue;
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
p += 7; // Skip "@import "
|
|
1034
|
+
parse_import_statement(ctx, &p, pe);
|
|
1035
|
+
DEBUG_PRINTF("[IMPORT] After parsing, imports_count=%ld\n", RARRAY_LEN(ctx->imports_array));
|
|
1036
|
+
continue;
|
|
1037
|
+
}
|
|
1038
|
+
|
|
901
1039
|
// Check for @media at-rule (only at depth 0)
|
|
902
1040
|
if (RB_UNLIKELY(brace_depth == 0 && p + 6 < pe && *p == '@' &&
|
|
903
1041
|
strncmp(p + 1, "media", 5) == 0 && IS_WHITESPACE(p[6]))) {
|
|
@@ -1306,6 +1444,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1306
1444
|
// Start of selector
|
|
1307
1445
|
if (brace_depth == 0 && selector_start == NULL) {
|
|
1308
1446
|
selector_start = p;
|
|
1447
|
+
DEBUG_PRINTF("[SELECTOR] Starting selector at: %.50s\n", selector_start);
|
|
1309
1448
|
}
|
|
1310
1449
|
|
|
1311
1450
|
p++;
|
|
@@ -1337,6 +1476,9 @@ VALUE parse_media_types(VALUE self, VALUE media_query_sym) {
|
|
|
1337
1476
|
VALUE parse_css_new_impl(VALUE css_string, int rule_id_offset) {
|
|
1338
1477
|
Check_Type(css_string, T_STRING);
|
|
1339
1478
|
|
|
1479
|
+
DEBUG_PRINTF("\n[PARSE_NEW] ========== NEW PARSE CALL ==========\n");
|
|
1480
|
+
DEBUG_PRINTF("[PARSE_NEW] Input CSS (first 100 chars): %.100s\n", RSTRING_PTR(css_string));
|
|
1481
|
+
|
|
1340
1482
|
const char *css = RSTRING_PTR(css_string);
|
|
1341
1483
|
const char *pe = css + RSTRING_LEN(css_string);
|
|
1342
1484
|
const char *p = css;
|
|
@@ -1345,59 +1487,33 @@ VALUE parse_css_new_impl(VALUE css_string, int rule_id_offset) {
|
|
|
1345
1487
|
|
|
1346
1488
|
// Extract @charset
|
|
1347
1489
|
if (RSTRING_LEN(css_string) > 10 && strncmp(css, "@charset ", 9) == 0) {
|
|
1490
|
+
DEBUG_PRINTF("[CHARSET] Found @charset at start\n");
|
|
1348
1491
|
char *quote_start = strchr(css + 9, '"');
|
|
1349
1492
|
if (quote_start != NULL) {
|
|
1350
1493
|
char *quote_end = strchr(quote_start + 1, '"');
|
|
1351
1494
|
if (quote_end != NULL) {
|
|
1352
1495
|
charset = rb_str_new(quote_start + 1, quote_end - quote_start - 1);
|
|
1496
|
+
DEBUG_PRINTF("[CHARSET] Extracted charset: %s\n", RSTRING_PTR(charset));
|
|
1353
1497
|
char *semicolon = quote_end + 1;
|
|
1354
1498
|
while (semicolon < pe && IS_WHITESPACE(*semicolon)) {
|
|
1355
1499
|
semicolon++;
|
|
1356
1500
|
}
|
|
1357
1501
|
if (semicolon < pe && *semicolon == ';') {
|
|
1358
1502
|
p = semicolon + 1;
|
|
1503
|
+
DEBUG_PRINTF("[CHARSET] Advanced past semicolon, remaining: %.50s\n", p);
|
|
1359
1504
|
}
|
|
1360
1505
|
}
|
|
1361
1506
|
}
|
|
1362
1507
|
}
|
|
1363
1508
|
|
|
1364
|
-
//
|
|
1365
|
-
//
|
|
1366
|
-
while (p < pe) {
|
|
1367
|
-
// Skip whitespace
|
|
1368
|
-
while (p < pe && IS_WHITESPACE(*p)) p++;
|
|
1369
|
-
if (p >= pe) break;
|
|
1370
|
-
|
|
1371
|
-
// Skip comments
|
|
1372
|
-
if (p + 1 < pe && p[0] == '/' && p[1] == '*') {
|
|
1373
|
-
p += 2;
|
|
1374
|
-
while (p + 1 < pe) {
|
|
1375
|
-
if (p[0] == '*' && p[1] == '/') {
|
|
1376
|
-
p += 2;
|
|
1377
|
-
break;
|
|
1378
|
-
}
|
|
1379
|
-
p++;
|
|
1380
|
-
}
|
|
1381
|
-
continue;
|
|
1382
|
-
}
|
|
1383
|
-
|
|
1384
|
-
// Check for @import
|
|
1385
|
-
if (p + 7 <= pe && *p == '@' && strncasecmp(p + 1, "import", 6) == 0 &&
|
|
1386
|
-
(p + 7 >= pe || IS_WHITESPACE(p[7]) || p[7] == '\'' || p[7] == '"')) {
|
|
1387
|
-
// Skip to semicolon
|
|
1388
|
-
while (p < pe && *p != ';') p++;
|
|
1389
|
-
if (p < pe) p++; // Skip semicolon
|
|
1390
|
-
continue;
|
|
1391
|
-
}
|
|
1392
|
-
|
|
1393
|
-
// Hit non-@import content, stop skipping
|
|
1394
|
-
break;
|
|
1395
|
-
}
|
|
1509
|
+
// @import statements are now handled in parse_css_recursive
|
|
1510
|
+
// They must come before all rules (except @charset) per CSS spec
|
|
1396
1511
|
|
|
1397
1512
|
// Initialize parser context with offset
|
|
1398
1513
|
ParserContext ctx;
|
|
1399
1514
|
ctx.rules_array = rb_ary_new();
|
|
1400
1515
|
ctx.media_index = rb_hash_new();
|
|
1516
|
+
ctx.imports_array = rb_ary_new();
|
|
1401
1517
|
ctx.rule_id_counter = rule_id_offset; // Start from offset
|
|
1402
1518
|
ctx.media_query_count = 0;
|
|
1403
1519
|
ctx.media_cache = NULL; // Removed - no perf benefit
|
|
@@ -1405,15 +1521,23 @@ VALUE parse_css_new_impl(VALUE css_string, int rule_id_offset) {
|
|
|
1405
1521
|
ctx.depth = 0; // Start at depth 0
|
|
1406
1522
|
|
|
1407
1523
|
// Parse CSS (top-level, no parent context)
|
|
1524
|
+
DEBUG_PRINTF("[PARSE] Starting parse_css_recursive from: %.80s\n", p);
|
|
1408
1525
|
parse_css_recursive(&ctx, p, pe, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
|
|
1409
1526
|
|
|
1410
1527
|
// Build result hash
|
|
1411
1528
|
VALUE result = rb_hash_new();
|
|
1412
1529
|
rb_hash_aset(result, ID2SYM(rb_intern("rules")), ctx.rules_array);
|
|
1413
1530
|
rb_hash_aset(result, ID2SYM(rb_intern("_media_index")), ctx.media_index);
|
|
1531
|
+
rb_hash_aset(result, ID2SYM(rb_intern("imports")), ctx.imports_array);
|
|
1414
1532
|
rb_hash_aset(result, ID2SYM(rb_intern("charset")), charset);
|
|
1415
1533
|
rb_hash_aset(result, ID2SYM(rb_intern("last_rule_id")), INT2FIX(ctx.rule_id_counter));
|
|
1416
1534
|
rb_hash_aset(result, ID2SYM(rb_intern("_has_nesting")), ctx.has_nesting ? Qtrue : Qfalse);
|
|
1417
1535
|
|
|
1536
|
+
RB_GC_GUARD(charset);
|
|
1537
|
+
RB_GC_GUARD(ctx.rules_array);
|
|
1538
|
+
RB_GC_GUARD(ctx.media_index);
|
|
1539
|
+
RB_GC_GUARD(ctx.imports_array);
|
|
1540
|
+
RB_GC_GUARD(result);
|
|
1541
|
+
|
|
1418
1542
|
return result;
|
|
1419
1543
|
}
|
data/ext/cataract/extconf.rb
CHANGED
|
@@ -21,8 +21,8 @@ def config_str_buf_optimization?
|
|
|
21
21
|
enable_config('str-buf-optimization', true)
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
# Compile main file, parser,
|
|
25
|
-
$objs = ['cataract.o', 'css_parser.o', '
|
|
24
|
+
# Compile main file, parser, flatten, and supporting files
|
|
25
|
+
$objs = ['cataract.o', 'css_parser.o', 'flatten.o', 'shorthand_expander.o', 'specificity.o', 'value_splitter.o',
|
|
26
26
|
'import_scanner.o']
|
|
27
27
|
|
|
28
28
|
# Suppress warnings
|