cataract 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 90e5adcec05553f73e5829b769d010ecda9ca5bf5eb1b2cc621b74707758c913
4
- data.tar.gz: 4a82a843a72f2cc3c944798af3b7e7a8271b5ad696c1ed53247854d5ecdda7bb
3
+ metadata.gz: 898883ee4a30ccd34c034b4799e7a43eccae7f79693a4309d7ac7ef130f21840
4
+ data.tar.gz: 511eaf6d4a9fd1f1a601a999f496bc3493900daa2696d20bc1a6b97eaff60eae
5
5
  SHA512:
6
- metadata.gz: afb9718d653c7bc0c905eb3d273afec8a364ae37de2952ea3285ecf3ef7f5ff29b40887348ef3646adedfab0baeff873211a883750e7c9b992f3e6a25b14cbb0
7
- data.tar.gz: c40ba4cefa95024f34c7d9306d93ad07b8f448e847c51231381fa83833f2b97e314e5fbab9d7fb2a8edd1fe45fea0760c6db514c79225829e397c339fd5951a6
6
+ metadata.gz: 59f87ff83a95fecb4e508a71e3055d8ebc53c23ad2d1316358e4a9e53ff8532c66b5686611e1706d6d95f1309428eda2e2df3b1884cbcb8d318dc866a256920b
7
+ data.tar.gz: 85eb22a3be35e25d7cfd01fcbb2bd9148411e3cb4ee49ccfd808f57ec17a6a8c95c1dc56b29cb563cc9cc7523fbacfc4083a5953dcf3010c6cb14c872a48872c
data/.rubocop.yml CHANGED
@@ -178,4 +178,5 @@ Cataract/BanAssertIncludes:
178
178
  - 'test/color/**/*.rb'
179
179
  - 'test/test_benchmark_doc_generator.rb'
180
180
  - 'test/test_speedup_calculator.rb'
181
+ - 'test/test_parse_errors.rb' # Parse error tests check error messages with assert_match
181
182
  - 'test/support/**/*' # Support files define assert_contains which uses assert_includes internally
data/BENCHMARKS.md CHANGED
@@ -20,17 +20,35 @@ Time to parse CSS into internal data structures
20
20
 
21
21
  | Test Case | Native | Pure (no YJIT) | Pure (YJIT) |
22
22
  |-----------|--------|----------------|-------------|
23
- | Small CSS (64 lines, 1.0KB) | 37.4K i/s | 3.25K i/s | 13.43K i/s |
24
- | Medium CSS with @media (139 lines, 1.6KB) | 34.71K i/s | 2.04K i/s | 9.15K i/s |
25
- | Selector lists (3500 lines, 62.5KB, 500 lists) | 471.8 i/s | 56.7 i/s | 214.1 i/s |
23
+ | Small CSS (64 lines, 1.0KB) | 37.27K i/s | 3.32K i/s | 13.48K i/s |
24
+ | Medium CSS with @media (139 lines, 1.6KB) | 34.66K i/s | 2.06K i/s | 8.94K i/s |
25
+ | Selector lists (3500 lines, 62.5KB, 500 lists) | 485.7 i/s | 55.7 i/s | 213.6 i/s |
26
26
 
27
27
  ### Speedups
28
28
 
29
29
  | Comparison | Speedup |
30
30
  |------------|---------|
31
- | Native vs Pure (no YJIT) | 12.76x faster (avg) |
32
- | Native vs Pure (YJIT) | 2.99x faster (avg) |
33
- | YJIT impact on Pure Ruby | 4.22x faster (avg) |
31
+ | Native vs Pure (no YJIT) | 14.4x faster (avg) |
32
+ | Native vs Pure (YJIT) | 3.38x faster (avg) |
33
+ | YJIT impact on Pure Ruby | 4.23x faster (avg) |
34
+
35
+ ### Parse Error Checking Overhead
36
+
37
+ Parse error detection can be enabled with `raise_parse_errors: true`. This compares performance impact:
38
+
39
+ | Configuration | Native | Pure (no YJIT) | Pure (YJIT) |
40
+ |---------------|--------|----------------|-------------|
41
+ | Medium CSS (139 lines) - no error checking | 35.18K i/s | 2.07K i/s | 9.05K i/s |
42
+ | Medium CSS (139 lines) - with error checking | 34.84K i/s | 1.9K i/s | 8.19K i/s |
43
+
44
+ **Overhead Analysis:**
45
+
46
+
47
+ | Implementation | Overhead |
48
+ |----------------|----------|
49
+ | Native | ~0% (within noise) |
50
+ | Pure (no YJIT) | 8.9% slower |
51
+ | Pure (YJIT) | 10.4% slower |
34
52
 
35
53
  ---
36
54
 
data/CHANGELOG.md CHANGED
@@ -1,4 +1,14 @@
1
- ## [ Unreleased ]
1
+ ## [Unreleased]
2
+
3
+ ## [0.2.5 - 2025-11-25]
4
+
5
+ - Feature: Parse error detection with `raise_parse_errors` option - validates CSS structure and raises `ParseError` exceptions for malformed input with line/column tracking
6
+ - Feature: Granular error control - enable specific checks (empty values, malformed declarations, invalid selectors, invalid selector syntax, malformed at-rules, unclosed blocks)
7
+ - Feature: Type safety validation for C extension - `Stylesheet.parse` and `Stylesheet.new` now validate argument types and raise clear `TypeError` instead of segfaulting
8
+ - Feature: Selector syntax validation using whitelist approach - catches invalid characters and sequences like `..class`, `##id`, `???`
9
+ - Fix: `add_block` with multiple `@import` statements now correctly tracks media type for each import instead of reusing the first import's media context
10
+ - Performance: Parse error checking adds minimal overhead (effectively zero for C/Pure Ruby, ~5% for Pure Ruby with YJIT)
11
+ - Testing: Fuzzer corpus enhanced with invalid CSS patterns for crash testing
2
12
 
3
13
  ## [0.2.4 - 2025-11-23]
4
14
  - MediaQuery first-class objects: Refactored media queries from simple symbols to proper structs with id, type, and conditions, enabling accurate
@@ -14,7 +14,7 @@ VALUE cMediaQuery;
14
14
  VALUE eCataractError;
15
15
  VALUE eDepthError;
16
16
  VALUE eSizeError;
17
- VALUE eParserError;
17
+ VALUE eParseError;
18
18
 
19
19
  // ============================================================================
20
20
  // Helper Functions
@@ -1408,10 +1408,10 @@ void Init_native_extension(void) {
1408
1408
  eSizeError = rb_define_class_under(mCataract, "SizeError", eCataractError);
1409
1409
  }
1410
1410
 
1411
- if (rb_const_defined(mCataract, rb_intern("ParserError"))) {
1412
- eParserError = rb_const_get(mCataract, rb_intern("ParserError"));
1411
+ if (rb_const_defined(mCataract, rb_intern("ParseError"))) {
1412
+ eParseError = rb_const_get(mCataract, rb_intern("ParseError"));
1413
1413
  } else {
1414
- eParserError = rb_define_class_under(mCataract, "ParserError", eCataractError);
1414
+ eParseError = rb_define_class_under(mCataract, "ParseError", eCataractError);
1415
1415
  }
1416
1416
 
1417
1417
  // Reuse Ruby-defined structs (they must be defined before loading this extension)
@@ -19,7 +19,7 @@ extern VALUE cMediaQuery;
19
19
  extern VALUE eCataractError;
20
20
  extern VALUE eDepthError;
21
21
  extern VALUE eSizeError;
22
- extern VALUE eParserError;
22
+ extern VALUE eParseError;
23
23
 
24
24
  // ============================================================================
25
25
  // Struct field indices
@@ -12,6 +12,11 @@
12
12
 
13
13
  #include "cataract.h"
14
14
  #include <string.h>
15
+ #include <stdint.h>
16
+
17
+ // Use uint8_t for boolean flags to reduce struct size and improve cache efficiency
18
+ // (int is 4 bytes, uint8_t is 1 byte - saves 27 bytes across 9 flags)
19
+ #define BOOLEAN uint8_t
15
20
 
16
21
  // Parser context passed through recursive calls
17
22
  typedef struct {
@@ -27,13 +32,21 @@ typedef struct {
27
32
  int next_media_query_list_id; // Next media query list ID (0-indexed)
28
33
  int media_query_count; // Safety limit for media queries
29
34
  st_table *media_cache; // Parse-time cache: string => parsed media types
30
- int has_nesting; // Set to 1 if any nested rules are created
31
- int selector_lists_enabled; // Parser option: track selector lists (1=enabled, 0=disabled)
32
- int depth; // Current recursion depth (safety limit)
35
+ BOOLEAN has_nesting; // Set to 1 if any nested rules are created
36
+ BOOLEAN selector_lists_enabled; // Parser option: track selector lists (1=enabled, 0=disabled)
37
+ BOOLEAN depth; // Current recursion depth (safety limit)
33
38
  // URL conversion options
34
39
  VALUE base_uri; // Base URI for resolving relative URLs (Qnil if disabled)
35
40
  VALUE uri_resolver; // Proc to call for URL resolution (Qnil for default)
36
- int absolute_paths; // Whether to convert relative URLs to absolute
41
+ BOOLEAN absolute_paths; // Whether to convert relative URLs to absolute
42
+ // Parse error checking options
43
+ VALUE css_string; // Full CSS string for error position calculation
44
+ BOOLEAN check_empty_values; // Raise error on empty declaration values
45
+ BOOLEAN check_malformed_declarations; // Raise error on declarations without colons
46
+ BOOLEAN check_invalid_selectors; // Raise error on empty/malformed selectors
47
+ BOOLEAN check_invalid_selector_syntax; // Raise error on syntax violations (.. ## etc)
48
+ BOOLEAN check_malformed_at_rules; // Raise error on @media/@supports without conditions
49
+ BOOLEAN check_unclosed_blocks; // Raise error on missing closing braces
37
50
  } ParserContext;
38
51
 
39
52
  // Macro to skip CSS comments /* ... */
@@ -63,6 +76,20 @@ static inline const char* find_matching_brace(const char *start, const char *end
63
76
  return p;
64
77
  }
65
78
 
79
+ // Find matching closing brace with strict error checking
80
+ // Input: start = position after opening '{', end = limit, check_unclosed = whether to raise error
81
+ // Returns: pointer to matching '}' (raises error if not found and check_unclosed is true)
82
+ static inline const char* find_matching_brace_strict(const char *start, const char *end, int check_unclosed) {
83
+ const char *closing_brace = find_matching_brace(start, end);
84
+
85
+ // Check if we found the closing brace
86
+ if (check_unclosed && closing_brace >= end) {
87
+ rb_raise(eParseError, "Unclosed block: missing closing brace");
88
+ }
89
+
90
+ return closing_brace;
91
+ }
92
+
66
93
  // Find matching closing paren
67
94
  // Input: start = position after opening '(', end = limit
68
95
  // Returns: pointer to matching ')' (or end if not found)
@@ -78,6 +105,99 @@ static inline const char* find_matching_paren(const char *start, const char *end
78
105
  return p;
79
106
  }
80
107
 
108
+ // Helper function to raise ParseError with automatic position calculation
109
+ // Does not return - raises error and exits
110
+ __attribute__((noreturn))
111
+ static void raise_parse_error_at(ParserContext *ctx, const char *error_pos, const char *message, const char *error_type) {
112
+ const char *css = RSTRING_PTR(ctx->css_string);
113
+ long pos = error_pos - css;
114
+
115
+ // Build keyword args hash
116
+ VALUE kwargs = rb_hash_new();
117
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
118
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(pos));
119
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern(error_type)));
120
+
121
+ // Create ParseError instance
122
+ VALUE msg_str = rb_str_new_cstr(message);
123
+ VALUE argv[2] = {msg_str, kwargs};
124
+ VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
125
+
126
+ // Raise the error
127
+ rb_exc_raise(error);
128
+ }
129
+
130
+ // Check if a selector contains only valid CSS selector characters and sequences
131
+ // Returns 1 if valid, 0 if invalid
132
+ // Valid characters: a-z A-Z 0-9 - _ . # [ ] : * > + ~ ( ) ' " = ^ $ | \ & % / whitespace
133
+ static inline int is_valid_selector(const char *start, const char *end) {
134
+ const char *p = start;
135
+ while (p < end) {
136
+ unsigned char c = (unsigned char)*p;
137
+
138
+ // Check for invalid character sequences
139
+ if (p + 1 < end) {
140
+ // Double dot (..) is invalid
141
+ if (c == '.' && *(p + 1) == '.') {
142
+ return 0;
143
+ }
144
+ // Double hash (##) is invalid
145
+ if (c == '#' && *(p + 1) == '#') {
146
+ return 0;
147
+ }
148
+ }
149
+
150
+ // Alphanumeric
151
+ if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) {
152
+ p++;
153
+ continue;
154
+ }
155
+
156
+ // Whitespace
157
+ if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
158
+ p++;
159
+ continue;
160
+ }
161
+
162
+ // Valid CSS selector special characters
163
+ switch (c) {
164
+ case '-': // Hyphen (in identifiers, attribute selectors)
165
+ case '_': // Underscore (in identifiers)
166
+ case '.': // Class selector
167
+ case '#': // ID selector
168
+ case '[': // Attribute selector start
169
+ case ']': // Attribute selector end
170
+ case ':': // Pseudo-class/element (:: is valid for pseudo-elements)
171
+ case '*': // Universal selector, attribute operator
172
+ case '>': // Child combinator
173
+ case '+': // Adjacent sibling combinator
174
+ case '~': // General sibling combinator
175
+ case '(': // Pseudo-class function
176
+ case ')': // Pseudo-class function end
177
+ case '\'': // String in attribute selector
178
+ case '"': // String in attribute selector
179
+ case '=': // Attribute operator
180
+ case '^': // Attribute operator ^=
181
+ case '$': // Attribute operator $=
182
+ case '|': // Attribute operator |=, namespace separator
183
+ case '\\': // Escape character
184
+ case '&': // Nesting selector
185
+ case '%': // Sometimes used in selectors
186
+ case '/': // Sometimes used in selectors
187
+ case '!': // Negation (though rare)
188
+ case ',': // List separator (shouldn't be here after splitting, but allow it)
189
+ p++;
190
+ break;
191
+
192
+ default:
193
+ // Invalid character found
194
+ return 0;
195
+ }
196
+ }
197
+
198
+ return 1;
199
+ }
200
+
81
201
  // Lowercase property name (CSS property names are ASCII-only)
82
202
  // Non-static so merge_new.c can use it
83
203
  VALUE lowercase_property(VALUE property_str) {
@@ -177,7 +297,7 @@ static VALUE resolve_nested_selector(VALUE parent_selector, const char *nested_s
177
297
  long parent_len = RSTRING_LEN(parent_selector);
178
298
 
179
299
  // Check if nested selector contains &
180
- int has_ampersand = 0;
300
+ BOOLEAN has_ampersand = 0;
181
301
  for (long i = 0; i < nested_len; i++) {
182
302
  if (nested_sel[i] == '&') {
183
303
  has_ampersand = 1;
@@ -606,6 +726,46 @@ static VALUE parse_declarations(const char *start, const char *end, ParserContex
606
726
 
607
727
  // Malformed declaration - skip to next semicolon to recover
608
728
  if (pos >= end || *pos != ':') {
729
+ if (ctx->check_malformed_declarations) {
730
+ // Extract property text for error message
731
+ const char *prop_text_end = pos;
732
+ trim_trailing(prop_start, &prop_text_end);
733
+ long prop_text_len = prop_text_end - prop_start;
734
+
735
+ const char *css = RSTRING_PTR(ctx->css_string);
736
+ long error_pos = prop_start - css;
737
+
738
+ if (prop_text_len == 0) {
739
+ // Build keyword args hash
740
+ VALUE kwargs = rb_hash_new();
741
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
742
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
743
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("malformed_declaration")));
744
+
745
+ VALUE msg_str = rb_str_new_cstr("Malformed declaration: missing property name");
746
+ VALUE argv[2] = {msg_str, kwargs};
747
+ VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
748
+ rb_exc_raise(error);
749
+ } else {
750
+ // Limit property name to 200 chars in error message
751
+ int display_len = (prop_text_len > 200) ? 200 : (int)prop_text_len;
752
+ char error_msg[256];
753
+ snprintf(error_msg, sizeof(error_msg),
754
+ "Malformed declaration: missing colon after '%.*s'",
755
+ display_len, prop_start);
756
+
757
+ // Build keyword args hash
758
+ VALUE kwargs = rb_hash_new();
759
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
760
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
761
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("malformed_declaration")));
762
+
763
+ VALUE msg_str = rb_str_new_cstr(error_msg);
764
+ VALUE argv[2] = {msg_str, kwargs};
765
+ VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
766
+ rb_exc_raise(error);
767
+ }
768
+ }
609
769
  while (pos < end && *pos != ';') pos++;
610
770
  if (pos < end) pos++; // Skip the semicolon
611
771
  continue;
@@ -645,7 +805,7 @@ static VALUE parse_declarations(const char *start, const char *end, ParserContex
645
805
  trim_trailing(val_start, &val_end);
646
806
 
647
807
  // Check for !important
648
- int is_important = 0;
808
+ BOOLEAN is_important = 0;
649
809
  if (val_end - val_start >= 10) { // strlen("!important") = 10
650
810
  const char *check = val_end - 10;
651
811
  while (check < val_end && IS_WHITESPACE(*check)) check++;
@@ -667,6 +827,34 @@ static VALUE parse_declarations(const char *start, const char *end, ParserContex
667
827
  // Final trim
668
828
  trim_trailing(val_start, &val_end);
669
829
 
830
+ // Check for empty value
831
+ if (val_end <= val_start && ctx->check_empty_values) {
832
+ long prop_len = prop_end - prop_start;
833
+ const char *css = RSTRING_PTR(ctx->css_string);
834
+ long error_pos = val_start - css;
835
+
836
+ // Build error message
837
+ int display_len = (prop_len > 200) ? 200 : (int)prop_len;
838
+ char error_msg[256];
839
+ snprintf(error_msg, sizeof(error_msg),
840
+ "Empty value for property '%.*s'",
841
+ display_len, prop_start);
842
+
843
+ // Build keyword args hash
844
+ VALUE kwargs = rb_hash_new();
845
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
846
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
847
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("empty_value")));
848
+
849
+ // Create ParseError instance: ParseError.new(message, **kwargs)
850
+ VALUE msg_str = rb_str_new_cstr(error_msg);
851
+ VALUE argv[2] = {msg_str, kwargs};
852
+ VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
853
+
854
+ // Raise the error
855
+ rb_exc_raise(error);
856
+ }
857
+
670
858
  // Skip if value is empty
671
859
  if (val_end > val_start) {
672
860
  long prop_len = prop_end - prop_start;
@@ -896,7 +1084,7 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
896
1084
 
897
1085
  // Find matching closing brace
898
1086
  const char *media_block_start = p;
899
- const char *media_block_end = find_matching_brace(p, end);
1087
+ const char *media_block_end = find_matching_brace_strict(p, end, ctx->check_unclosed_blocks);
900
1088
  p = media_block_end;
901
1089
 
902
1090
  if (p < end) p++; // Skip }
@@ -909,7 +1097,7 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
909
1097
 
910
1098
  // This should never happen - parent_media_query_id should always be valid
911
1099
  if (NIL_P(parent_mq)) {
912
- rb_raise(eParserError,
1100
+ rb_raise(eParseError,
913
1101
  "Invalid parent_media_query_id: %d (not found in media_queries array)",
914
1102
  parent_media_query_id);
915
1103
  }
@@ -1022,7 +1210,7 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
1022
1210
  // Example: "& .child { font: 14px; }"
1023
1211
  // ^nested_block_start ^nested_block_end (at })
1024
1212
  const char *nested_block_start = p;
1025
- const char *nested_block_end = find_matching_brace(p, end);
1213
+ const char *nested_block_end = find_matching_brace_strict(p, end, ctx->check_unclosed_blocks);
1026
1214
  p = nested_block_end;
1027
1215
 
1028
1216
  if (p < end) p++; // Skip }
@@ -1111,7 +1299,7 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
1111
1299
  trim_leading(&p, end);
1112
1300
 
1113
1301
  const char *val_start = p;
1114
- int important = 0;
1302
+ BOOLEAN important = 0;
1115
1303
 
1116
1304
  // Find end of value (semicolon or closing brace or end)
1117
1305
  while (p < end && *p != ';' && *p != '}') p++;
@@ -1432,6 +1620,30 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1432
1620
  // Trim
1433
1621
  trim_trailing(mq_start, &mq_end);
1434
1622
 
1623
+ // Check for empty media query
1624
+ if (mq_end <= mq_start) {
1625
+ if (ctx->check_malformed_at_rules) {
1626
+ raise_parse_error_at(ctx, mq_start, "Malformed @media: missing media query", "malformed_at_rule");
1627
+ } else {
1628
+ // Empty media query with check disabled - skip @media wrapper and parse contents as regular rules
1629
+ if (p >= pe || *p != '{') {
1630
+ continue; // Malformed structure
1631
+ }
1632
+ p++; // Skip opening {
1633
+ const char *block_start = p;
1634
+ const char *block_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
1635
+ p = block_end;
1636
+
1637
+ // Parse block contents with NO media query context
1638
+ ctx->depth++;
1639
+ parse_css_recursive(ctx, block_start, block_end, parent_media_sym, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID, parent_media_query_id);
1640
+ ctx->depth--;
1641
+
1642
+ if (p < pe && *p == '}') p++;
1643
+ continue;
1644
+ }
1645
+ }
1646
+
1435
1647
  if (p >= pe || *p != '{') {
1436
1648
  continue; // Malformed
1437
1649
  }
@@ -1546,7 +1758,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1546
1758
 
1547
1759
  // Find matching closing brace
1548
1760
  const char *block_start = p;
1549
- const char *block_end = find_matching_brace(p, pe);
1761
+ const char *block_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
1550
1762
  p = block_end;
1551
1763
 
1552
1764
  // Recursively parse @media block with new media query context
@@ -1573,13 +1785,22 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1573
1785
  long at_name_len = at_name_end - at_start;
1574
1786
 
1575
1787
  // Check if this is a conditional group rule
1576
- int is_conditional_group =
1788
+ BOOLEAN is_conditional_group =
1577
1789
  (at_name_len == 8 && strncmp(at_start, "supports", 8) == 0) ||
1578
1790
  (at_name_len == 5 && strncmp(at_start, "layer", 5) == 0) ||
1579
1791
  (at_name_len == 9 && strncmp(at_start, "container", 9) == 0) ||
1580
1792
  (at_name_len == 5 && strncmp(at_start, "scope", 5) == 0);
1581
1793
 
1582
1794
  if (is_conditional_group) {
1795
+ // Check if this rule requires a condition
1796
+ BOOLEAN requires_condition =
1797
+ (at_name_len == 8 && strncmp(at_start, "supports", 8) == 0) ||
1798
+ (at_name_len == 9 && strncmp(at_start, "container", 9) == 0);
1799
+
1800
+ // Extract condition (between at-rule name and opening brace)
1801
+ const char *cond_start = at_name_end;
1802
+ while (cond_start < pe && IS_WHITESPACE(*cond_start)) cond_start++;
1803
+
1583
1804
  // Skip to opening brace
1584
1805
  p = at_name_end;
1585
1806
  while (p < pe && *p != '{') p++;
@@ -1588,11 +1809,22 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1588
1809
  continue; // Malformed
1589
1810
  }
1590
1811
 
1812
+ // Trim condition
1813
+ const char *cond_end = p;
1814
+ while (cond_end > cond_start && IS_WHITESPACE(*(cond_end - 1))) cond_end--;
1815
+
1816
+ // Check for missing condition
1817
+ if (requires_condition && cond_end <= cond_start && ctx->check_malformed_at_rules) {
1818
+ char error_msg[100];
1819
+ snprintf(error_msg, sizeof(error_msg), "Malformed @%.*s: missing condition", (int)at_name_len, at_start);
1820
+ raise_parse_error_at(ctx, at_start - 1, error_msg, "malformed_at_rule");
1821
+ }
1822
+
1591
1823
  p++; // Skip opening {
1592
1824
 
1593
1825
  // Find matching closing brace
1594
1826
  const char *block_start = p;
1595
- const char *block_end = find_matching_brace(p, pe);
1827
+ const char *block_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
1596
1828
  p = block_end;
1597
1829
 
1598
1830
  // Recursively parse block content (preserve parent media context)
@@ -1606,7 +1838,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1606
1838
 
1607
1839
  // Check for @keyframes (contains <rule-list>)
1608
1840
  // TODO: Test perf gains by using RB_UNLIKELY(is_keyframes) wrapper
1609
- int is_keyframes =
1841
+ BOOLEAN is_keyframes =
1610
1842
  (at_name_len == 9 && strncmp(at_start, "keyframes", 9) == 0) ||
1611
1843
  (at_name_len == 17 && strncmp(at_start, "-webkit-keyframes", 17) == 0) ||
1612
1844
  (at_name_len == 13 && strncmp(at_start, "-moz-keyframes", 13) == 0);
@@ -1631,7 +1863,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1631
1863
 
1632
1864
  // Find matching closing brace
1633
1865
  const char *block_start = p;
1634
- const char *block_end = find_matching_brace(p, pe);
1866
+ const char *block_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
1635
1867
  p = block_end;
1636
1868
 
1637
1869
  // Parse keyframe blocks as rules (from/to/0%/50% etc)
@@ -1680,7 +1912,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1680
1912
  }
1681
1913
 
1682
1914
  // Check for @font-face (contains <declaration-list>)
1683
- int is_font_face = (at_name_len == 9 && strncmp(at_start, "font-face", 9) == 0);
1915
+ BOOLEAN is_font_face = (at_name_len == 9 && strncmp(at_start, "font-face", 9) == 0);
1684
1916
 
1685
1917
  if (is_font_face) {
1686
1918
  // Build selector string: "@font-face"
@@ -1702,7 +1934,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1702
1934
 
1703
1935
  // Find matching closing brace
1704
1936
  const char *decl_start = p;
1705
- const char *decl_end = find_matching_brace(p, pe);
1937
+ const char *decl_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
1706
1938
  p = decl_end;
1707
1939
 
1708
1940
  // Parse declarations
@@ -1740,6 +1972,10 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1740
1972
 
1741
1973
  // Opening brace
1742
1974
  if (*p == '{') {
1975
+ // Check for empty selector (opening brace with no selector before it)
1976
+ if (ctx->check_invalid_selectors && brace_depth == 0 && selector_start == NULL) {
1977
+ raise_parse_error_at(ctx, p, "Invalid selector: empty selector", "invalid_selector");
1978
+ }
1743
1979
  if (brace_depth == 0 && selector_start != NULL) {
1744
1980
  decl_start = p + 1;
1745
1981
  }
@@ -1755,7 +1991,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1755
1991
  // We've found a complete CSS rule block - now determine if it has nesting
1756
1992
  // Example: .parent { color: red; & .child { font-size: 14px; } }
1757
1993
  // ^selector_start ^decl_start ^p (at })
1758
- int has_nesting = has_nested_selectors(decl_start, p);
1994
+ BOOLEAN has_nesting = has_nested_selectors(decl_start, p);
1759
1995
 
1760
1996
  // Get selector string
1761
1997
  const char *sel_end = decl_start - 1;
@@ -1763,6 +1999,23 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1763
1999
  sel_end--;
1764
2000
  }
1765
2001
 
2002
+ // Check for empty selector
2003
+ if (ctx->check_invalid_selectors && sel_end <= selector_start) {
2004
+ const char *css = RSTRING_PTR(ctx->css_string);
2005
+ long error_pos = selector_start - css;
2006
+
2007
+ // Build keyword args hash
2008
+ VALUE kwargs = rb_hash_new();
2009
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
2010
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
2011
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("invalid_selector")));
2012
+
2013
+ VALUE msg_str = rb_str_new_cstr("Invalid selector: empty selector");
2014
+ VALUE argv[2] = {msg_str, kwargs};
2015
+ VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
2016
+ rb_exc_raise(error);
2017
+ }
2018
+
1766
2019
  if (!has_nesting) {
1767
2020
  // FAST PATH: No nesting - parse as pure declarations
1768
2021
  VALUE declarations = parse_declarations(decl_start, p, ctx);
@@ -1809,6 +2062,37 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1809
2062
  }
1810
2063
 
1811
2064
  if (seg_end_ptr > seg_start) {
2065
+ // Check for invalid selectors
2066
+ if (ctx->check_invalid_selectors) {
2067
+ // Check if selector starts with combinator
2068
+ char first_char = *seg_start;
2069
+ if (first_char == '>' || first_char == '+' || first_char == '~') {
2070
+ const char *css = RSTRING_PTR(ctx->css_string);
2071
+ long error_pos = seg_start - css;
2072
+
2073
+ char error_msg[256];
2074
+ snprintf(error_msg, sizeof(error_msg),
2075
+ "Invalid selector: selector cannot start with combinator '%c'",
2076
+ first_char);
2077
+
2078
+ // Build keyword args hash
2079
+ VALUE kwargs = rb_hash_new();
2080
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
2081
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
2082
+ rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("invalid_selector")));
2083
+
2084
+ VALUE msg_str = rb_str_new_cstr(error_msg);
2085
+ VALUE argv[2] = {msg_str, kwargs};
2086
+ VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
2087
+ rb_exc_raise(error);
2088
+ }
2089
+ }
2090
+
2091
+ // Check for invalid selector syntax (whitelist validation)
2092
+ if (ctx->check_invalid_selector_syntax && !is_valid_selector(seg_start, seg_end_ptr)) {
2093
+ raise_parse_error_at(ctx, seg_start, "Invalid selector syntax: selector contains invalid characters", "invalid_selector_syntax");
2094
+ }
2095
+
1812
2096
  VALUE selector = rb_utf8_str_new(seg_start, seg_end_ptr - seg_start);
1813
2097
 
1814
2098
  // Resolve against parent if nested
@@ -1882,6 +2166,9 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
1882
2166
 
1883
2167
  // Update media index
1884
2168
  update_media_index(ctx, parent_media_sym, rule_id);
2169
+ } else if (ctx->check_invalid_selector_syntax && selector_count > 1) {
2170
+ // Empty selector in comma-separated list (e.g., "h1, , h3")
2171
+ raise_parse_error_at(ctx, seg_start, "Invalid selector syntax: empty selector in comma-separated list", "invalid_selector_syntax");
1885
2172
  }
1886
2173
 
1887
2174
  seg_start = seg + 1;
@@ -2023,6 +2310,11 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
2023
2310
 
2024
2311
  p++;
2025
2312
  }
2313
+
2314
+ // Check for unclosed blocks at end of parsing
2315
+ if (ctx->check_unclosed_blocks && brace_depth > 0) {
2316
+ rb_raise(eParseError, "Unclosed block: missing closing brace");
2317
+ }
2026
2318
  }
2027
2319
 
2028
2320
  /*
@@ -2056,13 +2348,48 @@ VALUE parse_css_new_impl(VALUE css_string, VALUE parser_options, int rule_id_off
2056
2348
 
2057
2349
  // Read parser options
2058
2350
  VALUE selector_lists_opt = rb_hash_aref(parser_options, ID2SYM(rb_intern("selector_lists")));
2059
- int selector_lists_enabled = (NIL_P(selector_lists_opt) || RTEST(selector_lists_opt)) ? 1 : 0;
2351
+ BOOLEAN selector_lists_enabled = (NIL_P(selector_lists_opt) || RTEST(selector_lists_opt)) ? 1 : 0;
2060
2352
 
2061
2353
  // URL conversion options
2062
2354
  VALUE base_uri = rb_hash_aref(parser_options, ID2SYM(rb_intern("base_uri")));
2063
2355
  VALUE absolute_paths_opt = rb_hash_aref(parser_options, ID2SYM(rb_intern("absolute_paths")));
2064
2356
  VALUE uri_resolver = rb_hash_aref(parser_options, ID2SYM(rb_intern("uri_resolver")));
2065
- int absolute_paths = RTEST(absolute_paths_opt) ? 1 : 0;
2357
+ BOOLEAN absolute_paths = RTEST(absolute_paths_opt) ? 1 : 0;
2358
+
2359
+ // Parse error options
2360
+ VALUE raise_parse_errors_opt = rb_hash_aref(parser_options, ID2SYM(rb_intern("raise_parse_errors")));
2361
+ BOOLEAN check_empty_values = 0;
2362
+ BOOLEAN check_malformed_declarations = 0;
2363
+ BOOLEAN check_invalid_selectors = 0;
2364
+ BOOLEAN check_invalid_selector_syntax = 0;
2365
+ BOOLEAN check_malformed_at_rules = 0;
2366
+ BOOLEAN check_unclosed_blocks = 0;
2367
+
2368
+ if (RTEST(raise_parse_errors_opt)) {
2369
+ if (TYPE(raise_parse_errors_opt) == T_HASH) {
2370
+ // Hash of specific error types
2371
+ VALUE empty_values_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("empty_values")));
2372
+ VALUE malformed_declarations_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("malformed_declarations")));
2373
+ VALUE invalid_selectors_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("invalid_selectors")));
2374
+ VALUE invalid_selector_syntax_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("invalid_selector_syntax")));
2375
+ VALUE malformed_at_rules_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("malformed_at_rules")));
2376
+ VALUE unclosed_blocks_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("unclosed_blocks")));
2377
+ check_empty_values = RTEST(empty_values_opt) ? 1 : 0;
2378
+ check_malformed_declarations = RTEST(malformed_declarations_opt) ? 1 : 0;
2379
+ check_invalid_selectors = RTEST(invalid_selectors_opt) ? 1 : 0;
2380
+ check_invalid_selector_syntax = RTEST(invalid_selector_syntax_opt) ? 1 : 0;
2381
+ check_malformed_at_rules = RTEST(malformed_at_rules_opt) ? 1 : 0;
2382
+ check_unclosed_blocks = RTEST(unclosed_blocks_opt) ? 1 : 0;
2383
+ } else {
2384
+ // true - enable all checks
2385
+ check_empty_values = 1;
2386
+ check_malformed_declarations = 1;
2387
+ check_invalid_selectors = 1;
2388
+ check_invalid_selector_syntax = 1;
2389
+ check_malformed_at_rules = 1;
2390
+ check_unclosed_blocks = 1;
2391
+ }
2392
+ }
2066
2393
 
2067
2394
  const char *css = RSTRING_PTR(css_string);
2068
2395
  const char *pe = css + RSTRING_LEN(css_string);
@@ -2115,6 +2442,14 @@ VALUE parse_css_new_impl(VALUE css_string, VALUE parser_options, int rule_id_off
2115
2442
  ctx.base_uri = base_uri;
2116
2443
  ctx.uri_resolver = uri_resolver;
2117
2444
  ctx.absolute_paths = absolute_paths;
2445
+ // Parse error options
2446
+ ctx.css_string = css_string;
2447
+ ctx.check_empty_values = check_empty_values;
2448
+ ctx.check_malformed_declarations = check_malformed_declarations;
2449
+ ctx.check_invalid_selectors = check_invalid_selectors;
2450
+ ctx.check_invalid_selector_syntax = check_invalid_selector_syntax;
2451
+ ctx.check_malformed_at_rules = check_malformed_at_rules;
2452
+ ctx.check_unclosed_blocks = check_unclosed_blocks;
2118
2453
 
2119
2454
  // Parse CSS (top-level, no parent context)
2120
2455
  DEBUG_PRINTF("[PARSE] Starting parse_css_recursive from: %.80s\n", p);
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cataract
4
+ class Error < StandardError; end
5
+
6
+ # Error raised during import resolution
7
+ class ImportError < Error; end
8
+
9
+ # Parsing errors
10
+ class DepthError < Error; end
11
+ class SizeError < Error; end
12
+ # Internal parser consistency errors
13
+
14
+ # Error raised when invalid CSS is encountered in strict mode
15
+ class ParseError < Error
16
+ attr_reader :line, :column, :error_type
17
+
18
+ # @param message [String] Error message (without position info)
19
+ # @param css [String, nil] Full CSS string for calculating position
20
+ # @param pos [Integer, nil] Byte position in CSS where error occurred
21
+ # @param line [Integer, nil] Line number (if already calculated)
22
+ # @param column [Integer, nil] Column number (if already calculated)
23
+ # @param type [Symbol, nil] Type of parse error (:empty_value, :malformed_declaration, etc.)
24
+ def initialize(message, css: nil, pos: nil, line: nil, column: nil, type: nil)
25
+ # Calculate line/column from css and pos if provided
26
+ if css && pos
27
+ @line = css.byteslice(0, pos).count("\n") + 1
28
+ line_start = css.rindex("\n", pos - 1)
29
+ @column = line_start ? pos - line_start : pos + 1
30
+ else
31
+ @line = line
32
+ @column = column
33
+ end
34
+
35
+ @error_type = type
36
+
37
+ # Build message with position info
38
+ full_message = if @line && @column
39
+ "#{message} at line #{@line}, column #{@column}"
40
+ elsif @line
41
+ "#{message} at line #{@line}"
42
+ else
43
+ message
44
+ end
45
+
46
+ super(full_message)
47
+ end
48
+ end
49
+ end
@@ -46,6 +46,10 @@ module Cataract
46
46
  BYTE_BANG = 33 # '!'
47
47
  BYTE_PERCENT = 37 # '%'
48
48
  BYTE_SLASH_FWD = 47 # '/' (also defined as BYTE_SLASH above)
49
+ BYTE_EQUALS = 61 # '='
50
+ BYTE_CARET = 94 # '^'
51
+ BYTE_DOLLAR = 36 # '$'
52
+ BYTE_PIPE = 124 # '|'
49
53
 
50
54
  # Specific lowercase letters (for keyword matching)
51
55
  BYTE_LOWER_U = 117 # 'u'
@@ -64,6 +64,9 @@ module Cataract
64
64
  end
65
65
 
66
66
  def initialize(css_string, parser_options: {}, parent_media_sym: nil, parent_media_query_id: nil, depth: 0)
67
+ # Type validation
68
+ raise TypeError, "css_string must be a String, got #{css_string.class}" unless css_string.is_a?(String)
69
+
67
70
  # Private: Internal parsing state
68
71
  @_css = css_string.dup.freeze
69
72
  @_pos = 0
@@ -77,7 +80,8 @@ module Cataract
77
80
  selector_lists: true,
78
81
  base_uri: nil,
79
82
  absolute_paths: false,
80
- uri_resolver: nil
83
+ uri_resolver: nil,
84
+ raise_parse_errors: false
81
85
  }.merge(parser_options)
82
86
 
83
87
  # Private: Extract options to ivars to avoid repeated hash lookups in hot path
@@ -86,6 +90,34 @@ module Cataract
86
90
  @_absolute_paths = @_parser_options[:absolute_paths]
87
91
  @_uri_resolver = @_parser_options[:uri_resolver] || Cataract::DEFAULT_URI_RESOLVER
88
92
 
93
+ # Parse error handling options - extract to ivars for hot path performance
94
+ @_raise_parse_errors = @_parser_options[:raise_parse_errors]
95
+ if @_raise_parse_errors.is_a?(Hash)
96
+ # Granular control - default all to false (opt-in)
97
+ @_check_empty_values = @_raise_parse_errors[:empty_values] || false
98
+ @_check_malformed_declarations = @_raise_parse_errors[:malformed_declarations] || false
99
+ @_check_invalid_selectors = @_raise_parse_errors[:invalid_selectors] || false
100
+ @_check_invalid_selector_syntax = @_raise_parse_errors[:invalid_selector_syntax] || false
101
+ @_check_malformed_at_rules = @_raise_parse_errors[:malformed_at_rules] || false
102
+ @_check_unclosed_blocks = @_raise_parse_errors[:unclosed_blocks] || false
103
+ elsif @_raise_parse_errors == true
104
+ # Enable all error checks
105
+ @_check_empty_values = true
106
+ @_check_malformed_declarations = true
107
+ @_check_invalid_selectors = true
108
+ @_check_invalid_selector_syntax = true
109
+ @_check_malformed_at_rules = true
110
+ @_check_unclosed_blocks = true
111
+ else
112
+ # Disabled
113
+ @_check_empty_values = false
114
+ @_check_malformed_declarations = false
115
+ @_check_invalid_selectors = false
116
+ @_check_invalid_selector_syntax = false
117
+ @_check_malformed_at_rules = false
118
+ @_check_unclosed_blocks = false
119
+ end
120
+
89
121
  # Private: Internal counters
90
122
  @_media_query_id_counter = 0 # Next MediaQuery ID (0-indexed)
91
123
  @_next_selector_list_id = 0 # Counter for selector list IDs
@@ -143,6 +175,13 @@ module Cataract
143
175
 
144
176
  selectors.each do |individual_selector|
145
177
  individual_selector.strip!
178
+
179
+ # Check for empty selector in comma-separated list
180
+ if @_check_invalid_selector_syntax && individual_selector.empty? && selectors.size > 1
181
+ raise ParseError.new('Invalid selector syntax: empty selector in comma-separated list',
182
+ css: @_css, pos: decl_start, type: :invalid_selector_syntax)
183
+ end
184
+
146
185
  next if individual_selector.empty?
147
186
 
148
187
  # Get rule ID for this selector
@@ -194,6 +233,13 @@ module Cataract
194
233
 
195
234
  selectors.each do |individual_selector|
196
235
  individual_selector.strip!
236
+
237
+ # Check for empty selector in comma-separated list
238
+ if @_check_invalid_selector_syntax && individual_selector.empty? && selectors.size > 1
239
+ raise ParseError.new('Invalid selector syntax: empty selector in comma-separated list',
240
+ css: @_css, pos: decl_start, type: :invalid_selector_syntax)
241
+ end
242
+
197
243
  next if individual_selector.empty?
198
244
 
199
245
  rule_id = @_rule_id_counter
@@ -303,6 +349,54 @@ module Cataract
303
349
  end until @_pos == old_pos # No progress made # rubocop:disable Lint/Loop
304
350
  end
305
351
 
352
+ # Check if a selector contains only valid CSS selector characters and sequences
353
+ # Returns true if valid, false if invalid
354
+ # Valid characters: a-z A-Z 0-9 - _ . # [ ] : * > + ~ ( ) ' " = ^ $ | \ & % / whitespace
355
+ def valid_selector_syntax?(selector_text)
356
+ i = 0
357
+ len = selector_text.bytesize
358
+
359
+ while i < len
360
+ byte = selector_text.getbyte(i)
361
+
362
+ # Check for invalid character sequences
363
+ if i + 1 < len
364
+ next_byte = selector_text.getbyte(i + 1)
365
+ # Double dot (..) is invalid
366
+ return false if byte == BYTE_DOT && next_byte == BYTE_DOT
367
+ # Double hash (##) is invalid
368
+ return false if byte == BYTE_HASH && next_byte == BYTE_HASH
369
+ end
370
+
371
+ # Alphanumeric
372
+ if (byte >= BYTE_LOWER_A && byte <= BYTE_LOWER_Z) || (byte >= BYTE_UPPER_A && byte <= BYTE_UPPER_Z) || (byte >= BYTE_DIGIT_0 && byte <= BYTE_DIGIT_9)
373
+ i += 1
374
+ next
375
+ end
376
+
377
+ # Whitespace
378
+ if byte == BYTE_SPACE || byte == BYTE_TAB || byte == BYTE_NEWLINE || byte == BYTE_CR
379
+ i += 1
380
+ next
381
+ end
382
+
383
+ # Valid CSS selector special characters
384
+ case byte
385
+ when BYTE_HYPHEN, BYTE_UNDERSCORE, BYTE_DOT, BYTE_HASH, BYTE_LBRACKET, BYTE_RBRACKET,
386
+ BYTE_COLON, BYTE_ASTERISK, BYTE_GT, BYTE_PLUS, BYTE_TILDE, BYTE_LPAREN, BYTE_RPAREN,
387
+ BYTE_SQUOTE, BYTE_DQUOTE, BYTE_EQUALS, BYTE_CARET, BYTE_DOLLAR,
388
+ BYTE_PIPE, BYTE_BACKSLASH, BYTE_AMPERSAND, BYTE_PERCENT, BYTE_SLASH, BYTE_BANG,
389
+ BYTE_COMMA
390
+ i += 1
391
+ else
392
+ # Invalid character found
393
+ return false
394
+ end
395
+ end
396
+
397
+ true
398
+ end
399
+
306
400
  # Parse a single CSS declaration (property: value)
307
401
  #
308
402
  # Performance-critical helper that parses one declaration.
@@ -410,6 +504,12 @@ module Cataract
410
504
  pos += 1
411
505
  end
412
506
 
507
+ # Reached EOF without finding matching closing brace
508
+ if @_check_unclosed_blocks && depth > 0
509
+ raise ParseError.new('Unclosed block: missing closing brace',
510
+ css: @_css, pos: start_pos - 1, type: :unclosed_block)
511
+ end
512
+
413
513
  pos
414
514
  end
415
515
 
@@ -433,6 +533,29 @@ module Cataract
433
533
 
434
534
  # Trim whitespace from selector (in-place to avoid allocation)
435
535
  selector_text.strip!
536
+
537
+ # Validate selector (strict mode) - only if enabled to avoid overhead
538
+ if @_check_invalid_selectors
539
+ # Check for empty selector
540
+ if selector_text.empty?
541
+ raise ParseError.new('Invalid selector: empty selector',
542
+ css: @_css, pos: start_pos, type: :invalid_selector)
543
+ end
544
+
545
+ # Check if selector starts with a combinator (>, +, ~)
546
+ first_char = selector_text.getbyte(0)
547
+ if first_char == BYTE_GT || first_char == BYTE_PLUS || first_char == BYTE_TILDE
548
+ raise ParseError.new("Invalid selector: selector cannot start with combinator '#{selector_text[0]}'",
549
+ css: @_css, pos: start_pos, type: :invalid_selector)
550
+ end
551
+ end
552
+
553
+ # Check selector syntax (whitelist validation for invalid characters/sequences)
554
+ if @_check_invalid_selector_syntax && !valid_selector_syntax?(selector_text)
555
+ raise ParseError.new('Invalid selector syntax: selector contains invalid characters',
556
+ css: @_css, pos: start_pos, type: :invalid_selector_syntax)
557
+ end
558
+
436
559
  selector_text
437
560
  end
438
561
 
@@ -514,7 +637,7 @@ module Cataract
514
637
 
515
638
  # This should never happen - parent_media_query_id should always be valid
516
639
  if parent_mq.nil?
517
- raise ParserError, "Invalid parent_media_query_id: #{parent_media_query_id} (not found in @media_queries)"
640
+ raise ParseError, "Invalid parent_media_query_id: #{parent_media_query_id} (not found in @media_queries)"
518
641
  end
519
642
 
520
643
  # Combine parent media query with child
@@ -675,6 +798,18 @@ module Cataract
675
798
 
676
799
  # Skip if no colon found (malformed)
677
800
  if eof? || peek_byte != BYTE_COLON
801
+ # Check for malformed declaration (strict mode)
802
+ if @_check_malformed_declarations
803
+ property_text = byteslice_encoded(property_start, @_pos - property_start).strip
804
+ if property_text.empty?
805
+ raise ParseError.new('Malformed declaration: missing property name',
806
+ css: @_css, pos: property_start, type: :malformed_declaration)
807
+ else
808
+ raise ParseError.new("Malformed declaration: missing colon after property '#{property_text}'",
809
+ css: @_css, pos: property_start, type: :malformed_declaration)
810
+ end
811
+ end
812
+
678
813
  # Try to recover by finding next ; or }
679
814
  skip_to_semicolon_or_brace
680
815
  next
@@ -726,7 +861,7 @@ module Cataract
726
861
  value.strip!
727
862
 
728
863
  # Check for !important (byte-by-byte, no regexp)
729
- if value.bytesize > 10
864
+ if value.bytesize >= 10
730
865
  # Scan backwards to find !important
731
866
  i = value.bytesize - 1
732
867
  # Skip trailing whitespace
@@ -757,6 +892,12 @@ module Cataract
757
892
  end
758
893
  end
759
894
 
895
+ # Check for empty value (strict mode) - only if enabled to avoid overhead
896
+ if @_check_empty_values && value.empty?
897
+ raise ParseError.new("Empty value for property '#{property}'",
898
+ css: @_css, pos: property_start, type: :empty_value)
899
+ end
900
+
760
901
  # Skip semicolon if present
761
902
  @_pos += 1 if peek_byte == BYTE_SEMICOLON
762
903
 
@@ -827,13 +968,27 @@ module Cataract
827
968
  if AT_RULE_TYPES.include?(at_rule_name)
828
969
  skip_ws_and_comments
829
970
 
971
+ # Remember start of condition for error reporting
972
+ condition_start = @_pos
973
+
830
974
  # Skip to opening brace
975
+ condition_end = @_pos
831
976
  while !eof? && peek_byte != BYTE_LBRACE
977
+ condition_end = @_pos
832
978
  @_pos += 1
833
979
  end
834
980
 
835
981
  return if eof? || peek_byte != BYTE_LBRACE
836
982
 
983
+ # Validate condition (strict mode) - @supports, @container, @scope require conditions
984
+ if @_check_malformed_at_rules && (at_rule_name == 'supports' || at_rule_name == 'container' || at_rule_name == 'scope')
985
+ condition_str = byteslice_encoded(condition_start, condition_end - condition_start).strip
986
+ if condition_str.empty?
987
+ raise ParseError.new("Malformed @#{at_rule_name}: missing condition",
988
+ css: @_css, pos: condition_start, type: :malformed_at_rule)
989
+ end
990
+ end
991
+
837
992
  @_pos += 1 # skip '{'
838
993
 
839
994
  # Find matching closing brace
@@ -908,6 +1063,13 @@ module Cataract
908
1063
  child_media_string = byteslice_encoded(mq_start, mq_end - mq_start)
909
1064
  # Keep media query exactly as written - parentheses are required per CSS spec
910
1065
  child_media_string.strip!
1066
+
1067
+ # Validate @media has a query (strict mode)
1068
+ if @_check_malformed_at_rules && child_media_string.empty?
1069
+ raise ParseError.new('Malformed @media: missing media query or condition',
1070
+ css: @_css, pos: mq_start, type: :malformed_at_rule)
1071
+ end
1072
+
911
1073
  child_media_sym = child_media_string.to_sym
912
1074
 
913
1075
  # Split comma-separated media queries (e.g., "screen, print" -> ["screen", "print"])
data/lib/cataract/pure.rb CHANGED
@@ -25,6 +25,8 @@ module Cataract
25
25
  class SizeError < Error; end
26
26
  end
27
27
 
28
+ require_relative 'error'
29
+
28
30
  require_relative 'version'
29
31
  require_relative 'constants'
30
32
 
@@ -109,6 +109,9 @@ module Cataract
109
109
  # @option options [Hash] :parser ({}) Parser configuration options
110
110
  # - :selector_lists [Boolean] (true) Track selector lists for W3C-compliant serialization
111
111
  def initialize(options = {})
112
+ # Type validation
113
+ raise TypeError, "options must be a Hash, got #{options.class}" unless options.is_a?(Hash)
114
+
112
115
  # Support :imports as alias for :import (backwards compatibility)
113
116
  options[:import] = options.delete(:imports) if options.key?(:imports) && !options.key?(:import)
114
117
 
@@ -119,12 +122,27 @@ module Cataract
119
122
  base_dir: nil,
120
123
  absolute_paths: false,
121
124
  uri_resolver: nil,
122
- parser: {}
125
+ parser: {},
126
+ raise_parse_errors: false
123
127
  }.merge(options)
124
128
 
129
+ # Type validation for specific options
130
+ if @options[:import_fetcher] && !@options[:import_fetcher].respond_to?(:call)
131
+ raise TypeError, "import_fetcher must be a Proc or callable, got #{@options[:import_fetcher].class}"
132
+ end
133
+
134
+ if @options[:base_uri] && !@options[:base_uri].is_a?(String)
135
+ raise TypeError, "base_uri must be a String, got #{@options[:base_uri].class}"
136
+ end
137
+
138
+ if @options[:uri_resolver] && !@options[:uri_resolver].respond_to?(:call)
139
+ raise TypeError, "uri_resolver must be a Proc or callable, got #{@options[:uri_resolver].class}"
140
+ end
141
+
125
142
  # Parser options with defaults (stored for passing to parser)
126
143
  @parser_options = {
127
- selector_lists: true
144
+ selector_lists: true,
145
+ raise_parse_errors: @options[:raise_parse_errors]
128
146
  }.merge(@options[:parser] || {})
129
147
 
130
148
  @rules = [] # Flat array of Rule structs
@@ -822,6 +840,10 @@ module Cataract
822
840
  new_imports = result[:imports]
823
841
  new_imports.each do |import|
824
842
  import.id += offset
843
+ # Update media_query_id to point to offsetted MediaQuery
844
+ if import.media_query_id
845
+ import.media_query_id += media_query_id_offset
846
+ end
825
847
  @imports << import
826
848
  end
827
849
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Cataract
4
- VERSION = '0.2.4'
4
+ VERSION = '0.2.5'
5
5
  end
data/lib/cataract.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'cataract/version'
4
+ require_relative 'cataract/error'
4
5
  require_relative 'cataract/constants'
5
6
 
6
7
  # Load struct definitions first (before C extension or pure Ruby)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cataract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Cook
@@ -83,6 +83,7 @@ files:
83
83
  - lib/cataract/constants.rb
84
84
  - lib/cataract/declaration.rb
85
85
  - lib/cataract/declarations.rb
86
+ - lib/cataract/error.rb
86
87
  - lib/cataract/import_resolver.rb
87
88
  - lib/cataract/import_statement.rb
88
89
  - lib/cataract/media_query.rb