cataract 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -0
- data/BENCHMARKS.md +24 -6
- data/CHANGELOG.md +11 -1
- data/ext/cataract/cataract.c +4 -4
- data/ext/cataract/cataract.h +1 -1
- data/ext/cataract/css_parser.c +355 -20
- data/lib/cataract/error.rb +49 -0
- data/lib/cataract/pure/byte_constants.rb +4 -0
- data/lib/cataract/pure/parser.rb +165 -3
- data/lib/cataract/pure.rb +2 -0
- data/lib/cataract/stylesheet.rb +24 -2
- data/lib/cataract/version.rb +1 -1
- data/lib/cataract.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 898883ee4a30ccd34c034b4799e7a43eccae7f79693a4309d7ac7ef130f21840
|
|
4
|
+
data.tar.gz: 511eaf6d4a9fd1f1a601a999f496bc3493900daa2696d20bc1a6b97eaff60eae
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 59f87ff83a95fecb4e508a71e3055d8ebc53c23ad2d1316358e4a9e53ff8532c66b5686611e1706d6d95f1309428eda2e2df3b1884cbcb8d318dc866a256920b
|
|
7
|
+
data.tar.gz: 85eb22a3be35e25d7cfd01fcbb2bd9148411e3cb4ee49ccfd808f57ec17a6a8c95c1dc56b29cb563cc9cc7523fbacfc4083a5953dcf3010c6cb14c872a48872c
|
data/.rubocop.yml
CHANGED
|
@@ -178,4 +178,5 @@ Cataract/BanAssertIncludes:
|
|
|
178
178
|
- 'test/color/**/*.rb'
|
|
179
179
|
- 'test/test_benchmark_doc_generator.rb'
|
|
180
180
|
- 'test/test_speedup_calculator.rb'
|
|
181
|
+
- 'test/test_parse_errors.rb' # Parse error tests check error messages with assert_match
|
|
181
182
|
- 'test/support/**/*' # Support files define assert_contains which uses assert_includes internally
|
data/BENCHMARKS.md
CHANGED
|
@@ -20,17 +20,35 @@ Time to parse CSS into internal data structures
|
|
|
20
20
|
|
|
21
21
|
| Test Case | Native | Pure (no YJIT) | Pure (YJIT) |
|
|
22
22
|
|-----------|--------|----------------|-------------|
|
|
23
|
-
| Small CSS (64 lines, 1.0KB) | 37.
|
|
24
|
-
| Medium CSS with @media (139 lines, 1.6KB) | 34.
|
|
25
|
-
| Selector lists (3500 lines, 62.5KB, 500 lists) |
|
|
23
|
+
| Small CSS (64 lines, 1.0KB) | 37.27K i/s | 3.32K i/s | 13.48K i/s |
|
|
24
|
+
| Medium CSS with @media (139 lines, 1.6KB) | 34.66K i/s | 2.06K i/s | 8.94K i/s |
|
|
25
|
+
| Selector lists (3500 lines, 62.5KB, 500 lists) | 485.7 i/s | 55.7 i/s | 213.6 i/s |
|
|
26
26
|
|
|
27
27
|
### Speedups
|
|
28
28
|
|
|
29
29
|
| Comparison | Speedup |
|
|
30
30
|
|------------|---------|
|
|
31
|
-
| Native vs Pure (no YJIT) |
|
|
32
|
-
| Native vs Pure (YJIT) |
|
|
33
|
-
| YJIT impact on Pure Ruby | 4.
|
|
31
|
+
| Native vs Pure (no YJIT) | 14.4x faster (avg) |
|
|
32
|
+
| Native vs Pure (YJIT) | 3.38x faster (avg) |
|
|
33
|
+
| YJIT impact on Pure Ruby | 4.23x faster (avg) |
|
|
34
|
+
|
|
35
|
+
### Parse Error Checking Overhead
|
|
36
|
+
|
|
37
|
+
Parse error detection can be enabled with `raise_parse_errors: true`. This compares performance impact:
|
|
38
|
+
|
|
39
|
+
| Configuration | Native | Pure (no YJIT) | Pure (YJIT) |
|
|
40
|
+
|---------------|--------|----------------|-------------|
|
|
41
|
+
| Medium CSS (139 lines) - no error checking | 35.18K i/s | 2.07K i/s | 9.05K i/s |
|
|
42
|
+
| Medium CSS (139 lines) - with error checking | 34.84K i/s | 1.9K i/s | 8.19K i/s |
|
|
43
|
+
|
|
44
|
+
**Overhead Analysis:**
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
| Implementation | Overhead |
|
|
48
|
+
|----------------|----------|
|
|
49
|
+
| Native | ~0% (within noise) |
|
|
50
|
+
| Pure (no YJIT) | 8.9% slower |
|
|
51
|
+
| Pure (YJIT) | 10.4% slower |
|
|
34
52
|
|
|
35
53
|
---
|
|
36
54
|
|
data/CHANGELOG.md
CHANGED
|
@@ -1,4 +1,14 @@
|
|
|
1
|
-
## [
|
|
1
|
+
## [Unreleased]
|
|
2
|
+
|
|
3
|
+
## [0.2.5 - 2025-11-25]
|
|
4
|
+
|
|
5
|
+
- Feature: Parse error detection with `raise_parse_errors` option - validates CSS structure and raises `ParseError` exceptions for malformed input with line/column tracking
|
|
6
|
+
- Feature: Granular error control - enable specific checks (empty values, malformed declarations, invalid selectors, invalid selector syntax, malformed at-rules, unclosed blocks)
|
|
7
|
+
- Feature: Type safety validation for C extension - `Stylesheet.parse` and `Stylesheet.new` now validate argument types and raise clear `TypeError` instead of segfaulting
|
|
8
|
+
- Feature: Selector syntax validation using whitelist approach - catches invalid characters and sequences like `..class`, `##id`, `???`
|
|
9
|
+
- Fix: `add_block` with multiple `@import` statements now correctly tracks media type for each import instead of reusing the first import's media context
|
|
10
|
+
- Performance: Parse error checking adds minimal overhead (effectively zero for C/Pure Ruby, ~5% for Pure Ruby with YJIT)
|
|
11
|
+
- Testing: Fuzzer corpus enhanced with invalid CSS patterns for crash testing
|
|
2
12
|
|
|
3
13
|
## [0.2.4 - 2025-11-23]
|
|
4
14
|
- MediaQuery first-class objects: Refactored media queries from simple symbols to proper structs with id, type, and conditions, enabling accurate
|
data/ext/cataract/cataract.c
CHANGED
|
@@ -14,7 +14,7 @@ VALUE cMediaQuery;
|
|
|
14
14
|
VALUE eCataractError;
|
|
15
15
|
VALUE eDepthError;
|
|
16
16
|
VALUE eSizeError;
|
|
17
|
-
VALUE
|
|
17
|
+
VALUE eParseError;
|
|
18
18
|
|
|
19
19
|
// ============================================================================
|
|
20
20
|
// Helper Functions
|
|
@@ -1408,10 +1408,10 @@ void Init_native_extension(void) {
|
|
|
1408
1408
|
eSizeError = rb_define_class_under(mCataract, "SizeError", eCataractError);
|
|
1409
1409
|
}
|
|
1410
1410
|
|
|
1411
|
-
if (rb_const_defined(mCataract, rb_intern("
|
|
1412
|
-
|
|
1411
|
+
if (rb_const_defined(mCataract, rb_intern("ParseError"))) {
|
|
1412
|
+
eParseError = rb_const_get(mCataract, rb_intern("ParseError"));
|
|
1413
1413
|
} else {
|
|
1414
|
-
|
|
1414
|
+
eParseError = rb_define_class_under(mCataract, "ParseError", eCataractError);
|
|
1415
1415
|
}
|
|
1416
1416
|
|
|
1417
1417
|
// Reuse Ruby-defined structs (they must be defined before loading this extension)
|
data/ext/cataract/cataract.h
CHANGED
|
@@ -19,7 +19,7 @@ extern VALUE cMediaQuery;
|
|
|
19
19
|
extern VALUE eCataractError;
|
|
20
20
|
extern VALUE eDepthError;
|
|
21
21
|
extern VALUE eSizeError;
|
|
22
|
-
extern VALUE
|
|
22
|
+
extern VALUE eParseError;
|
|
23
23
|
|
|
24
24
|
// ============================================================================
|
|
25
25
|
// Struct field indices
|
data/ext/cataract/css_parser.c
CHANGED
|
@@ -12,6 +12,11 @@
|
|
|
12
12
|
|
|
13
13
|
#include "cataract.h"
|
|
14
14
|
#include <string.h>
|
|
15
|
+
#include <stdint.h>
|
|
16
|
+
|
|
17
|
+
// Use uint8_t for boolean flags to reduce struct size and improve cache efficiency
|
|
18
|
+
// (int is 4 bytes, uint8_t is 1 byte - saves 27 bytes across 9 flags)
|
|
19
|
+
#define BOOLEAN uint8_t
|
|
15
20
|
|
|
16
21
|
// Parser context passed through recursive calls
|
|
17
22
|
typedef struct {
|
|
@@ -27,13 +32,21 @@ typedef struct {
|
|
|
27
32
|
int next_media_query_list_id; // Next media query list ID (0-indexed)
|
|
28
33
|
int media_query_count; // Safety limit for media queries
|
|
29
34
|
st_table *media_cache; // Parse-time cache: string => parsed media types
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
35
|
+
BOOLEAN has_nesting; // Set to 1 if any nested rules are created
|
|
36
|
+
BOOLEAN selector_lists_enabled; // Parser option: track selector lists (1=enabled, 0=disabled)
|
|
37
|
+
BOOLEAN depth; // Current recursion depth (safety limit)
|
|
33
38
|
// URL conversion options
|
|
34
39
|
VALUE base_uri; // Base URI for resolving relative URLs (Qnil if disabled)
|
|
35
40
|
VALUE uri_resolver; // Proc to call for URL resolution (Qnil for default)
|
|
36
|
-
|
|
41
|
+
BOOLEAN absolute_paths; // Whether to convert relative URLs to absolute
|
|
42
|
+
// Parse error checking options
|
|
43
|
+
VALUE css_string; // Full CSS string for error position calculation
|
|
44
|
+
BOOLEAN check_empty_values; // Raise error on empty declaration values
|
|
45
|
+
BOOLEAN check_malformed_declarations; // Raise error on declarations without colons
|
|
46
|
+
BOOLEAN check_invalid_selectors; // Raise error on empty/malformed selectors
|
|
47
|
+
BOOLEAN check_invalid_selector_syntax; // Raise error on syntax violations (.. ## etc)
|
|
48
|
+
BOOLEAN check_malformed_at_rules; // Raise error on @media/@supports without conditions
|
|
49
|
+
BOOLEAN check_unclosed_blocks; // Raise error on missing closing braces
|
|
37
50
|
} ParserContext;
|
|
38
51
|
|
|
39
52
|
// Macro to skip CSS comments /* ... */
|
|
@@ -63,6 +76,20 @@ static inline const char* find_matching_brace(const char *start, const char *end
|
|
|
63
76
|
return p;
|
|
64
77
|
}
|
|
65
78
|
|
|
79
|
+
// Find matching closing brace with strict error checking
|
|
80
|
+
// Input: start = position after opening '{', end = limit, check_unclosed = whether to raise error
|
|
81
|
+
// Returns: pointer to matching '}' (raises error if not found and check_unclosed is true)
|
|
82
|
+
static inline const char* find_matching_brace_strict(const char *start, const char *end, int check_unclosed) {
|
|
83
|
+
const char *closing_brace = find_matching_brace(start, end);
|
|
84
|
+
|
|
85
|
+
// Check if we found the closing brace
|
|
86
|
+
if (check_unclosed && closing_brace >= end) {
|
|
87
|
+
rb_raise(eParseError, "Unclosed block: missing closing brace");
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return closing_brace;
|
|
91
|
+
}
|
|
92
|
+
|
|
66
93
|
// Find matching closing paren
|
|
67
94
|
// Input: start = position after opening '(', end = limit
|
|
68
95
|
// Returns: pointer to matching ')' (or end if not found)
|
|
@@ -78,6 +105,99 @@ static inline const char* find_matching_paren(const char *start, const char *end
|
|
|
78
105
|
return p;
|
|
79
106
|
}
|
|
80
107
|
|
|
108
|
+
// Helper function to raise ParseError with automatic position calculation
|
|
109
|
+
// Does not return - raises error and exits
|
|
110
|
+
__attribute__((noreturn))
|
|
111
|
+
static void raise_parse_error_at(ParserContext *ctx, const char *error_pos, const char *message, const char *error_type) {
|
|
112
|
+
const char *css = RSTRING_PTR(ctx->css_string);
|
|
113
|
+
long pos = error_pos - css;
|
|
114
|
+
|
|
115
|
+
// Build keyword args hash
|
|
116
|
+
VALUE kwargs = rb_hash_new();
|
|
117
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
|
|
118
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(pos));
|
|
119
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern(error_type)));
|
|
120
|
+
|
|
121
|
+
// Create ParseError instance
|
|
122
|
+
VALUE msg_str = rb_str_new_cstr(message);
|
|
123
|
+
VALUE argv[2] = {msg_str, kwargs};
|
|
124
|
+
VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
|
|
125
|
+
|
|
126
|
+
// Raise the error
|
|
127
|
+
rb_exc_raise(error);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Check if a selector contains only valid CSS selector characters and sequences
|
|
131
|
+
// Returns 1 if valid, 0 if invalid
|
|
132
|
+
// Valid characters: a-z A-Z 0-9 - _ . # [ ] : * > + ~ ( ) ' " = ^ $ | \ & % / whitespace
|
|
133
|
+
static inline int is_valid_selector(const char *start, const char *end) {
|
|
134
|
+
const char *p = start;
|
|
135
|
+
while (p < end) {
|
|
136
|
+
unsigned char c = (unsigned char)*p;
|
|
137
|
+
|
|
138
|
+
// Check for invalid character sequences
|
|
139
|
+
if (p + 1 < end) {
|
|
140
|
+
// Double dot (..) is invalid
|
|
141
|
+
if (c == '.' && *(p + 1) == '.') {
|
|
142
|
+
return 0;
|
|
143
|
+
}
|
|
144
|
+
// Double hash (##) is invalid
|
|
145
|
+
if (c == '#' && *(p + 1) == '#') {
|
|
146
|
+
return 0;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Alphanumeric
|
|
151
|
+
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) {
|
|
152
|
+
p++;
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Whitespace
|
|
157
|
+
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
|
|
158
|
+
p++;
|
|
159
|
+
continue;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Valid CSS selector special characters
|
|
163
|
+
switch (c) {
|
|
164
|
+
case '-': // Hyphen (in identifiers, attribute selectors)
|
|
165
|
+
case '_': // Underscore (in identifiers)
|
|
166
|
+
case '.': // Class selector
|
|
167
|
+
case '#': // ID selector
|
|
168
|
+
case '[': // Attribute selector start
|
|
169
|
+
case ']': // Attribute selector end
|
|
170
|
+
case ':': // Pseudo-class/element (:: is valid for pseudo-elements)
|
|
171
|
+
case '*': // Universal selector, attribute operator
|
|
172
|
+
case '>': // Child combinator
|
|
173
|
+
case '+': // Adjacent sibling combinator
|
|
174
|
+
case '~': // General sibling combinator
|
|
175
|
+
case '(': // Pseudo-class function
|
|
176
|
+
case ')': // Pseudo-class function end
|
|
177
|
+
case '\'': // String in attribute selector
|
|
178
|
+
case '"': // String in attribute selector
|
|
179
|
+
case '=': // Attribute operator
|
|
180
|
+
case '^': // Attribute operator ^=
|
|
181
|
+
case '$': // Attribute operator $=
|
|
182
|
+
case '|': // Attribute operator |=, namespace separator
|
|
183
|
+
case '\\': // Escape character
|
|
184
|
+
case '&': // Nesting selector
|
|
185
|
+
case '%': // Sometimes used in selectors
|
|
186
|
+
case '/': // Sometimes used in selectors
|
|
187
|
+
case '!': // Negation (though rare)
|
|
188
|
+
case ',': // List separator (shouldn't be here after splitting, but allow it)
|
|
189
|
+
p++;
|
|
190
|
+
break;
|
|
191
|
+
|
|
192
|
+
default:
|
|
193
|
+
// Invalid character found
|
|
194
|
+
return 0;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return 1;
|
|
199
|
+
}
|
|
200
|
+
|
|
81
201
|
// Lowercase property name (CSS property names are ASCII-only)
|
|
82
202
|
// Non-static so merge_new.c can use it
|
|
83
203
|
VALUE lowercase_property(VALUE property_str) {
|
|
@@ -177,7 +297,7 @@ static VALUE resolve_nested_selector(VALUE parent_selector, const char *nested_s
|
|
|
177
297
|
long parent_len = RSTRING_LEN(parent_selector);
|
|
178
298
|
|
|
179
299
|
// Check if nested selector contains &
|
|
180
|
-
|
|
300
|
+
BOOLEAN has_ampersand = 0;
|
|
181
301
|
for (long i = 0; i < nested_len; i++) {
|
|
182
302
|
if (nested_sel[i] == '&') {
|
|
183
303
|
has_ampersand = 1;
|
|
@@ -606,6 +726,46 @@ static VALUE parse_declarations(const char *start, const char *end, ParserContex
|
|
|
606
726
|
|
|
607
727
|
// Malformed declaration - skip to next semicolon to recover
|
|
608
728
|
if (pos >= end || *pos != ':') {
|
|
729
|
+
if (ctx->check_malformed_declarations) {
|
|
730
|
+
// Extract property text for error message
|
|
731
|
+
const char *prop_text_end = pos;
|
|
732
|
+
trim_trailing(prop_start, &prop_text_end);
|
|
733
|
+
long prop_text_len = prop_text_end - prop_start;
|
|
734
|
+
|
|
735
|
+
const char *css = RSTRING_PTR(ctx->css_string);
|
|
736
|
+
long error_pos = prop_start - css;
|
|
737
|
+
|
|
738
|
+
if (prop_text_len == 0) {
|
|
739
|
+
// Build keyword args hash
|
|
740
|
+
VALUE kwargs = rb_hash_new();
|
|
741
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
|
|
742
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
|
|
743
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("malformed_declaration")));
|
|
744
|
+
|
|
745
|
+
VALUE msg_str = rb_str_new_cstr("Malformed declaration: missing property name");
|
|
746
|
+
VALUE argv[2] = {msg_str, kwargs};
|
|
747
|
+
VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
|
|
748
|
+
rb_exc_raise(error);
|
|
749
|
+
} else {
|
|
750
|
+
// Limit property name to 200 chars in error message
|
|
751
|
+
int display_len = (prop_text_len > 200) ? 200 : (int)prop_text_len;
|
|
752
|
+
char error_msg[256];
|
|
753
|
+
snprintf(error_msg, sizeof(error_msg),
|
|
754
|
+
"Malformed declaration: missing colon after '%.*s'",
|
|
755
|
+
display_len, prop_start);
|
|
756
|
+
|
|
757
|
+
// Build keyword args hash
|
|
758
|
+
VALUE kwargs = rb_hash_new();
|
|
759
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
|
|
760
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
|
|
761
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("malformed_declaration")));
|
|
762
|
+
|
|
763
|
+
VALUE msg_str = rb_str_new_cstr(error_msg);
|
|
764
|
+
VALUE argv[2] = {msg_str, kwargs};
|
|
765
|
+
VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
|
|
766
|
+
rb_exc_raise(error);
|
|
767
|
+
}
|
|
768
|
+
}
|
|
609
769
|
while (pos < end && *pos != ';') pos++;
|
|
610
770
|
if (pos < end) pos++; // Skip the semicolon
|
|
611
771
|
continue;
|
|
@@ -645,7 +805,7 @@ static VALUE parse_declarations(const char *start, const char *end, ParserContex
|
|
|
645
805
|
trim_trailing(val_start, &val_end);
|
|
646
806
|
|
|
647
807
|
// Check for !important
|
|
648
|
-
|
|
808
|
+
BOOLEAN is_important = 0;
|
|
649
809
|
if (val_end - val_start >= 10) { // strlen("!important") = 10
|
|
650
810
|
const char *check = val_end - 10;
|
|
651
811
|
while (check < val_end && IS_WHITESPACE(*check)) check++;
|
|
@@ -667,6 +827,34 @@ static VALUE parse_declarations(const char *start, const char *end, ParserContex
|
|
|
667
827
|
// Final trim
|
|
668
828
|
trim_trailing(val_start, &val_end);
|
|
669
829
|
|
|
830
|
+
// Check for empty value
|
|
831
|
+
if (val_end <= val_start && ctx->check_empty_values) {
|
|
832
|
+
long prop_len = prop_end - prop_start;
|
|
833
|
+
const char *css = RSTRING_PTR(ctx->css_string);
|
|
834
|
+
long error_pos = val_start - css;
|
|
835
|
+
|
|
836
|
+
// Build error message
|
|
837
|
+
int display_len = (prop_len > 200) ? 200 : (int)prop_len;
|
|
838
|
+
char error_msg[256];
|
|
839
|
+
snprintf(error_msg, sizeof(error_msg),
|
|
840
|
+
"Empty value for property '%.*s'",
|
|
841
|
+
display_len, prop_start);
|
|
842
|
+
|
|
843
|
+
// Build keyword args hash
|
|
844
|
+
VALUE kwargs = rb_hash_new();
|
|
845
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
|
|
846
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
|
|
847
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("empty_value")));
|
|
848
|
+
|
|
849
|
+
// Create ParseError instance: ParseError.new(message, **kwargs)
|
|
850
|
+
VALUE msg_str = rb_str_new_cstr(error_msg);
|
|
851
|
+
VALUE argv[2] = {msg_str, kwargs};
|
|
852
|
+
VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
|
|
853
|
+
|
|
854
|
+
// Raise the error
|
|
855
|
+
rb_exc_raise(error);
|
|
856
|
+
}
|
|
857
|
+
|
|
670
858
|
// Skip if value is empty
|
|
671
859
|
if (val_end > val_start) {
|
|
672
860
|
long prop_len = prop_end - prop_start;
|
|
@@ -896,7 +1084,7 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
|
|
|
896
1084
|
|
|
897
1085
|
// Find matching closing brace
|
|
898
1086
|
const char *media_block_start = p;
|
|
899
|
-
const char *media_block_end =
|
|
1087
|
+
const char *media_block_end = find_matching_brace_strict(p, end, ctx->check_unclosed_blocks);
|
|
900
1088
|
p = media_block_end;
|
|
901
1089
|
|
|
902
1090
|
if (p < end) p++; // Skip }
|
|
@@ -909,7 +1097,7 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
|
|
|
909
1097
|
|
|
910
1098
|
// This should never happen - parent_media_query_id should always be valid
|
|
911
1099
|
if (NIL_P(parent_mq)) {
|
|
912
|
-
rb_raise(
|
|
1100
|
+
rb_raise(eParseError,
|
|
913
1101
|
"Invalid parent_media_query_id: %d (not found in media_queries array)",
|
|
914
1102
|
parent_media_query_id);
|
|
915
1103
|
}
|
|
@@ -1022,7 +1210,7 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
|
|
|
1022
1210
|
// Example: "& .child { font: 14px; }"
|
|
1023
1211
|
// ^nested_block_start ^nested_block_end (at })
|
|
1024
1212
|
const char *nested_block_start = p;
|
|
1025
|
-
const char *nested_block_end =
|
|
1213
|
+
const char *nested_block_end = find_matching_brace_strict(p, end, ctx->check_unclosed_blocks);
|
|
1026
1214
|
p = nested_block_end;
|
|
1027
1215
|
|
|
1028
1216
|
if (p < end) p++; // Skip }
|
|
@@ -1111,7 +1299,7 @@ static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char
|
|
|
1111
1299
|
trim_leading(&p, end);
|
|
1112
1300
|
|
|
1113
1301
|
const char *val_start = p;
|
|
1114
|
-
|
|
1302
|
+
BOOLEAN important = 0;
|
|
1115
1303
|
|
|
1116
1304
|
// Find end of value (semicolon or closing brace or end)
|
|
1117
1305
|
while (p < end && *p != ';' && *p != '}') p++;
|
|
@@ -1432,6 +1620,30 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1432
1620
|
// Trim
|
|
1433
1621
|
trim_trailing(mq_start, &mq_end);
|
|
1434
1622
|
|
|
1623
|
+
// Check for empty media query
|
|
1624
|
+
if (mq_end <= mq_start) {
|
|
1625
|
+
if (ctx->check_malformed_at_rules) {
|
|
1626
|
+
raise_parse_error_at(ctx, mq_start, "Malformed @media: missing media query", "malformed_at_rule");
|
|
1627
|
+
} else {
|
|
1628
|
+
// Empty media query with check disabled - skip @media wrapper and parse contents as regular rules
|
|
1629
|
+
if (p >= pe || *p != '{') {
|
|
1630
|
+
continue; // Malformed structure
|
|
1631
|
+
}
|
|
1632
|
+
p++; // Skip opening {
|
|
1633
|
+
const char *block_start = p;
|
|
1634
|
+
const char *block_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
|
|
1635
|
+
p = block_end;
|
|
1636
|
+
|
|
1637
|
+
// Parse block contents with NO media query context
|
|
1638
|
+
ctx->depth++;
|
|
1639
|
+
parse_css_recursive(ctx, block_start, block_end, parent_media_sym, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID, parent_media_query_id);
|
|
1640
|
+
ctx->depth--;
|
|
1641
|
+
|
|
1642
|
+
if (p < pe && *p == '}') p++;
|
|
1643
|
+
continue;
|
|
1644
|
+
}
|
|
1645
|
+
}
|
|
1646
|
+
|
|
1435
1647
|
if (p >= pe || *p != '{') {
|
|
1436
1648
|
continue; // Malformed
|
|
1437
1649
|
}
|
|
@@ -1546,7 +1758,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1546
1758
|
|
|
1547
1759
|
// Find matching closing brace
|
|
1548
1760
|
const char *block_start = p;
|
|
1549
|
-
const char *block_end =
|
|
1761
|
+
const char *block_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
|
|
1550
1762
|
p = block_end;
|
|
1551
1763
|
|
|
1552
1764
|
// Recursively parse @media block with new media query context
|
|
@@ -1573,13 +1785,22 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1573
1785
|
long at_name_len = at_name_end - at_start;
|
|
1574
1786
|
|
|
1575
1787
|
// Check if this is a conditional group rule
|
|
1576
|
-
|
|
1788
|
+
BOOLEAN is_conditional_group =
|
|
1577
1789
|
(at_name_len == 8 && strncmp(at_start, "supports", 8) == 0) ||
|
|
1578
1790
|
(at_name_len == 5 && strncmp(at_start, "layer", 5) == 0) ||
|
|
1579
1791
|
(at_name_len == 9 && strncmp(at_start, "container", 9) == 0) ||
|
|
1580
1792
|
(at_name_len == 5 && strncmp(at_start, "scope", 5) == 0);
|
|
1581
1793
|
|
|
1582
1794
|
if (is_conditional_group) {
|
|
1795
|
+
// Check if this rule requires a condition
|
|
1796
|
+
BOOLEAN requires_condition =
|
|
1797
|
+
(at_name_len == 8 && strncmp(at_start, "supports", 8) == 0) ||
|
|
1798
|
+
(at_name_len == 9 && strncmp(at_start, "container", 9) == 0);
|
|
1799
|
+
|
|
1800
|
+
// Extract condition (between at-rule name and opening brace)
|
|
1801
|
+
const char *cond_start = at_name_end;
|
|
1802
|
+
while (cond_start < pe && IS_WHITESPACE(*cond_start)) cond_start++;
|
|
1803
|
+
|
|
1583
1804
|
// Skip to opening brace
|
|
1584
1805
|
p = at_name_end;
|
|
1585
1806
|
while (p < pe && *p != '{') p++;
|
|
@@ -1588,11 +1809,22 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1588
1809
|
continue; // Malformed
|
|
1589
1810
|
}
|
|
1590
1811
|
|
|
1812
|
+
// Trim condition
|
|
1813
|
+
const char *cond_end = p;
|
|
1814
|
+
while (cond_end > cond_start && IS_WHITESPACE(*(cond_end - 1))) cond_end--;
|
|
1815
|
+
|
|
1816
|
+
// Check for missing condition
|
|
1817
|
+
if (requires_condition && cond_end <= cond_start && ctx->check_malformed_at_rules) {
|
|
1818
|
+
char error_msg[100];
|
|
1819
|
+
snprintf(error_msg, sizeof(error_msg), "Malformed @%.*s: missing condition", (int)at_name_len, at_start);
|
|
1820
|
+
raise_parse_error_at(ctx, at_start - 1, error_msg, "malformed_at_rule");
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1591
1823
|
p++; // Skip opening {
|
|
1592
1824
|
|
|
1593
1825
|
// Find matching closing brace
|
|
1594
1826
|
const char *block_start = p;
|
|
1595
|
-
const char *block_end =
|
|
1827
|
+
const char *block_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
|
|
1596
1828
|
p = block_end;
|
|
1597
1829
|
|
|
1598
1830
|
// Recursively parse block content (preserve parent media context)
|
|
@@ -1606,7 +1838,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1606
1838
|
|
|
1607
1839
|
// Check for @keyframes (contains <rule-list>)
|
|
1608
1840
|
// TODO: Test perf gains by using RB_UNLIKELY(is_keyframes) wrapper
|
|
1609
|
-
|
|
1841
|
+
BOOLEAN is_keyframes =
|
|
1610
1842
|
(at_name_len == 9 && strncmp(at_start, "keyframes", 9) == 0) ||
|
|
1611
1843
|
(at_name_len == 17 && strncmp(at_start, "-webkit-keyframes", 17) == 0) ||
|
|
1612
1844
|
(at_name_len == 13 && strncmp(at_start, "-moz-keyframes", 13) == 0);
|
|
@@ -1631,7 +1863,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1631
1863
|
|
|
1632
1864
|
// Find matching closing brace
|
|
1633
1865
|
const char *block_start = p;
|
|
1634
|
-
const char *block_end =
|
|
1866
|
+
const char *block_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
|
|
1635
1867
|
p = block_end;
|
|
1636
1868
|
|
|
1637
1869
|
// Parse keyframe blocks as rules (from/to/0%/50% etc)
|
|
@@ -1680,7 +1912,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1680
1912
|
}
|
|
1681
1913
|
|
|
1682
1914
|
// Check for @font-face (contains <declaration-list>)
|
|
1683
|
-
|
|
1915
|
+
BOOLEAN is_font_face = (at_name_len == 9 && strncmp(at_start, "font-face", 9) == 0);
|
|
1684
1916
|
|
|
1685
1917
|
if (is_font_face) {
|
|
1686
1918
|
// Build selector string: "@font-face"
|
|
@@ -1702,7 +1934,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1702
1934
|
|
|
1703
1935
|
// Find matching closing brace
|
|
1704
1936
|
const char *decl_start = p;
|
|
1705
|
-
const char *decl_end =
|
|
1937
|
+
const char *decl_end = find_matching_brace_strict(p, pe, ctx->check_unclosed_blocks);
|
|
1706
1938
|
p = decl_end;
|
|
1707
1939
|
|
|
1708
1940
|
// Parse declarations
|
|
@@ -1740,6 +1972,10 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1740
1972
|
|
|
1741
1973
|
// Opening brace
|
|
1742
1974
|
if (*p == '{') {
|
|
1975
|
+
// Check for empty selector (opening brace with no selector before it)
|
|
1976
|
+
if (ctx->check_invalid_selectors && brace_depth == 0 && selector_start == NULL) {
|
|
1977
|
+
raise_parse_error_at(ctx, p, "Invalid selector: empty selector", "invalid_selector");
|
|
1978
|
+
}
|
|
1743
1979
|
if (brace_depth == 0 && selector_start != NULL) {
|
|
1744
1980
|
decl_start = p + 1;
|
|
1745
1981
|
}
|
|
@@ -1755,7 +1991,7 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1755
1991
|
// We've found a complete CSS rule block - now determine if it has nesting
|
|
1756
1992
|
// Example: .parent { color: red; & .child { font-size: 14px; } }
|
|
1757
1993
|
// ^selector_start ^decl_start ^p (at })
|
|
1758
|
-
|
|
1994
|
+
BOOLEAN has_nesting = has_nested_selectors(decl_start, p);
|
|
1759
1995
|
|
|
1760
1996
|
// Get selector string
|
|
1761
1997
|
const char *sel_end = decl_start - 1;
|
|
@@ -1763,6 +1999,23 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1763
1999
|
sel_end--;
|
|
1764
2000
|
}
|
|
1765
2001
|
|
|
2002
|
+
// Check for empty selector
|
|
2003
|
+
if (ctx->check_invalid_selectors && sel_end <= selector_start) {
|
|
2004
|
+
const char *css = RSTRING_PTR(ctx->css_string);
|
|
2005
|
+
long error_pos = selector_start - css;
|
|
2006
|
+
|
|
2007
|
+
// Build keyword args hash
|
|
2008
|
+
VALUE kwargs = rb_hash_new();
|
|
2009
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
|
|
2010
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
|
|
2011
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("invalid_selector")));
|
|
2012
|
+
|
|
2013
|
+
VALUE msg_str = rb_str_new_cstr("Invalid selector: empty selector");
|
|
2014
|
+
VALUE argv[2] = {msg_str, kwargs};
|
|
2015
|
+
VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
|
|
2016
|
+
rb_exc_raise(error);
|
|
2017
|
+
}
|
|
2018
|
+
|
|
1766
2019
|
if (!has_nesting) {
|
|
1767
2020
|
// FAST PATH: No nesting - parse as pure declarations
|
|
1768
2021
|
VALUE declarations = parse_declarations(decl_start, p, ctx);
|
|
@@ -1809,6 +2062,37 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1809
2062
|
}
|
|
1810
2063
|
|
|
1811
2064
|
if (seg_end_ptr > seg_start) {
|
|
2065
|
+
// Check for invalid selectors
|
|
2066
|
+
if (ctx->check_invalid_selectors) {
|
|
2067
|
+
// Check if selector starts with combinator
|
|
2068
|
+
char first_char = *seg_start;
|
|
2069
|
+
if (first_char == '>' || first_char == '+' || first_char == '~') {
|
|
2070
|
+
const char *css = RSTRING_PTR(ctx->css_string);
|
|
2071
|
+
long error_pos = seg_start - css;
|
|
2072
|
+
|
|
2073
|
+
char error_msg[256];
|
|
2074
|
+
snprintf(error_msg, sizeof(error_msg),
|
|
2075
|
+
"Invalid selector: selector cannot start with combinator '%c'",
|
|
2076
|
+
first_char);
|
|
2077
|
+
|
|
2078
|
+
// Build keyword args hash
|
|
2079
|
+
VALUE kwargs = rb_hash_new();
|
|
2080
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("css")), ctx->css_string);
|
|
2081
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("pos")), LONG2NUM(error_pos));
|
|
2082
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("invalid_selector")));
|
|
2083
|
+
|
|
2084
|
+
VALUE msg_str = rb_str_new_cstr(error_msg);
|
|
2085
|
+
VALUE argv[2] = {msg_str, kwargs};
|
|
2086
|
+
VALUE error = rb_funcallv_kw(eParseError, rb_intern("new"), 2, argv, RB_PASS_KEYWORDS);
|
|
2087
|
+
rb_exc_raise(error);
|
|
2088
|
+
}
|
|
2089
|
+
}
|
|
2090
|
+
|
|
2091
|
+
// Check for invalid selector syntax (whitelist validation)
|
|
2092
|
+
if (ctx->check_invalid_selector_syntax && !is_valid_selector(seg_start, seg_end_ptr)) {
|
|
2093
|
+
raise_parse_error_at(ctx, seg_start, "Invalid selector syntax: selector contains invalid characters", "invalid_selector_syntax");
|
|
2094
|
+
}
|
|
2095
|
+
|
|
1812
2096
|
VALUE selector = rb_utf8_str_new(seg_start, seg_end_ptr - seg_start);
|
|
1813
2097
|
|
|
1814
2098
|
// Resolve against parent if nested
|
|
@@ -1882,6 +2166,9 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
1882
2166
|
|
|
1883
2167
|
// Update media index
|
|
1884
2168
|
update_media_index(ctx, parent_media_sym, rule_id);
|
|
2169
|
+
} else if (ctx->check_invalid_selector_syntax && selector_count > 1) {
|
|
2170
|
+
// Empty selector in comma-separated list (e.g., "h1, , h3")
|
|
2171
|
+
raise_parse_error_at(ctx, seg_start, "Invalid selector syntax: empty selector in comma-separated list", "invalid_selector_syntax");
|
|
1885
2172
|
}
|
|
1886
2173
|
|
|
1887
2174
|
seg_start = seg + 1;
|
|
@@ -2023,6 +2310,11 @@ static void parse_css_recursive(ParserContext *ctx, const char *css, const char
|
|
|
2023
2310
|
|
|
2024
2311
|
p++;
|
|
2025
2312
|
}
|
|
2313
|
+
|
|
2314
|
+
// Check for unclosed blocks at end of parsing
|
|
2315
|
+
if (ctx->check_unclosed_blocks && brace_depth > 0) {
|
|
2316
|
+
rb_raise(eParseError, "Unclosed block: missing closing brace");
|
|
2317
|
+
}
|
|
2026
2318
|
}
|
|
2027
2319
|
|
|
2028
2320
|
/*
|
|
@@ -2056,13 +2348,48 @@ VALUE parse_css_new_impl(VALUE css_string, VALUE parser_options, int rule_id_off
|
|
|
2056
2348
|
|
|
2057
2349
|
// Read parser options
|
|
2058
2350
|
VALUE selector_lists_opt = rb_hash_aref(parser_options, ID2SYM(rb_intern("selector_lists")));
|
|
2059
|
-
|
|
2351
|
+
BOOLEAN selector_lists_enabled = (NIL_P(selector_lists_opt) || RTEST(selector_lists_opt)) ? 1 : 0;
|
|
2060
2352
|
|
|
2061
2353
|
// URL conversion options
|
|
2062
2354
|
VALUE base_uri = rb_hash_aref(parser_options, ID2SYM(rb_intern("base_uri")));
|
|
2063
2355
|
VALUE absolute_paths_opt = rb_hash_aref(parser_options, ID2SYM(rb_intern("absolute_paths")));
|
|
2064
2356
|
VALUE uri_resolver = rb_hash_aref(parser_options, ID2SYM(rb_intern("uri_resolver")));
|
|
2065
|
-
|
|
2357
|
+
BOOLEAN absolute_paths = RTEST(absolute_paths_opt) ? 1 : 0;
|
|
2358
|
+
|
|
2359
|
+
// Parse error options
|
|
2360
|
+
VALUE raise_parse_errors_opt = rb_hash_aref(parser_options, ID2SYM(rb_intern("raise_parse_errors")));
|
|
2361
|
+
BOOLEAN check_empty_values = 0;
|
|
2362
|
+
BOOLEAN check_malformed_declarations = 0;
|
|
2363
|
+
BOOLEAN check_invalid_selectors = 0;
|
|
2364
|
+
BOOLEAN check_invalid_selector_syntax = 0;
|
|
2365
|
+
BOOLEAN check_malformed_at_rules = 0;
|
|
2366
|
+
BOOLEAN check_unclosed_blocks = 0;
|
|
2367
|
+
|
|
2368
|
+
if (RTEST(raise_parse_errors_opt)) {
|
|
2369
|
+
if (TYPE(raise_parse_errors_opt) == T_HASH) {
|
|
2370
|
+
// Hash of specific error types
|
|
2371
|
+
VALUE empty_values_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("empty_values")));
|
|
2372
|
+
VALUE malformed_declarations_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("malformed_declarations")));
|
|
2373
|
+
VALUE invalid_selectors_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("invalid_selectors")));
|
|
2374
|
+
VALUE invalid_selector_syntax_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("invalid_selector_syntax")));
|
|
2375
|
+
VALUE malformed_at_rules_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("malformed_at_rules")));
|
|
2376
|
+
VALUE unclosed_blocks_opt = rb_hash_aref(raise_parse_errors_opt, ID2SYM(rb_intern("unclosed_blocks")));
|
|
2377
|
+
check_empty_values = RTEST(empty_values_opt) ? 1 : 0;
|
|
2378
|
+
check_malformed_declarations = RTEST(malformed_declarations_opt) ? 1 : 0;
|
|
2379
|
+
check_invalid_selectors = RTEST(invalid_selectors_opt) ? 1 : 0;
|
|
2380
|
+
check_invalid_selector_syntax = RTEST(invalid_selector_syntax_opt) ? 1 : 0;
|
|
2381
|
+
check_malformed_at_rules = RTEST(malformed_at_rules_opt) ? 1 : 0;
|
|
2382
|
+
check_unclosed_blocks = RTEST(unclosed_blocks_opt) ? 1 : 0;
|
|
2383
|
+
} else {
|
|
2384
|
+
// true - enable all checks
|
|
2385
|
+
check_empty_values = 1;
|
|
2386
|
+
check_malformed_declarations = 1;
|
|
2387
|
+
check_invalid_selectors = 1;
|
|
2388
|
+
check_invalid_selector_syntax = 1;
|
|
2389
|
+
check_malformed_at_rules = 1;
|
|
2390
|
+
check_unclosed_blocks = 1;
|
|
2391
|
+
}
|
|
2392
|
+
}
|
|
2066
2393
|
|
|
2067
2394
|
const char *css = RSTRING_PTR(css_string);
|
|
2068
2395
|
const char *pe = css + RSTRING_LEN(css_string);
|
|
@@ -2115,6 +2442,14 @@ VALUE parse_css_new_impl(VALUE css_string, VALUE parser_options, int rule_id_off
|
|
|
2115
2442
|
ctx.base_uri = base_uri;
|
|
2116
2443
|
ctx.uri_resolver = uri_resolver;
|
|
2117
2444
|
ctx.absolute_paths = absolute_paths;
|
|
2445
|
+
// Parse error options
|
|
2446
|
+
ctx.css_string = css_string;
|
|
2447
|
+
ctx.check_empty_values = check_empty_values;
|
|
2448
|
+
ctx.check_malformed_declarations = check_malformed_declarations;
|
|
2449
|
+
ctx.check_invalid_selectors = check_invalid_selectors;
|
|
2450
|
+
ctx.check_invalid_selector_syntax = check_invalid_selector_syntax;
|
|
2451
|
+
ctx.check_malformed_at_rules = check_malformed_at_rules;
|
|
2452
|
+
ctx.check_unclosed_blocks = check_unclosed_blocks;
|
|
2118
2453
|
|
|
2119
2454
|
// Parse CSS (top-level, no parent context)
|
|
2120
2455
|
DEBUG_PRINTF("[PARSE] Starting parse_css_recursive from: %.80s\n", p);
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cataract
|
|
4
|
+
class Error < StandardError; end
|
|
5
|
+
|
|
6
|
+
# Error raised during import resolution
|
|
7
|
+
class ImportError < Error; end
|
|
8
|
+
|
|
9
|
+
# Parsing errors
|
|
10
|
+
class DepthError < Error; end
|
|
11
|
+
class SizeError < Error; end
|
|
12
|
+
# Internal parser consistency errors
|
|
13
|
+
|
|
14
|
+
# Error raised when invalid CSS is encountered in strict mode
|
|
15
|
+
class ParseError < Error
|
|
16
|
+
attr_reader :line, :column, :error_type
|
|
17
|
+
|
|
18
|
+
# @param message [String] Error message (without position info)
|
|
19
|
+
# @param css [String, nil] Full CSS string for calculating position
|
|
20
|
+
# @param pos [Integer, nil] Byte position in CSS where error occurred
|
|
21
|
+
# @param line [Integer, nil] Line number (if already calculated)
|
|
22
|
+
# @param column [Integer, nil] Column number (if already calculated)
|
|
23
|
+
# @param type [Symbol, nil] Type of parse error (:empty_value, :malformed_declaration, etc.)
|
|
24
|
+
def initialize(message, css: nil, pos: nil, line: nil, column: nil, type: nil)
|
|
25
|
+
# Calculate line/column from css and pos if provided
|
|
26
|
+
if css && pos
|
|
27
|
+
@line = css.byteslice(0, pos).count("\n") + 1
|
|
28
|
+
line_start = css.rindex("\n", pos - 1)
|
|
29
|
+
@column = line_start ? pos - line_start : pos + 1
|
|
30
|
+
else
|
|
31
|
+
@line = line
|
|
32
|
+
@column = column
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
@error_type = type
|
|
36
|
+
|
|
37
|
+
# Build message with position info
|
|
38
|
+
full_message = if @line && @column
|
|
39
|
+
"#{message} at line #{@line}, column #{@column}"
|
|
40
|
+
elsif @line
|
|
41
|
+
"#{message} at line #{@line}"
|
|
42
|
+
else
|
|
43
|
+
message
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
super(full_message)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -46,6 +46,10 @@ module Cataract
|
|
|
46
46
|
BYTE_BANG = 33 # '!'
|
|
47
47
|
BYTE_PERCENT = 37 # '%'
|
|
48
48
|
BYTE_SLASH_FWD = 47 # '/' (also defined as BYTE_SLASH above)
|
|
49
|
+
BYTE_EQUALS = 61 # '='
|
|
50
|
+
BYTE_CARET = 94 # '^'
|
|
51
|
+
BYTE_DOLLAR = 36 # '$'
|
|
52
|
+
BYTE_PIPE = 124 # '|'
|
|
49
53
|
|
|
50
54
|
# Specific lowercase letters (for keyword matching)
|
|
51
55
|
BYTE_LOWER_U = 117 # 'u'
|
data/lib/cataract/pure/parser.rb
CHANGED
|
@@ -64,6 +64,9 @@ module Cataract
|
|
|
64
64
|
end
|
|
65
65
|
|
|
66
66
|
def initialize(css_string, parser_options: {}, parent_media_sym: nil, parent_media_query_id: nil, depth: 0)
|
|
67
|
+
# Type validation
|
|
68
|
+
raise TypeError, "css_string must be a String, got #{css_string.class}" unless css_string.is_a?(String)
|
|
69
|
+
|
|
67
70
|
# Private: Internal parsing state
|
|
68
71
|
@_css = css_string.dup.freeze
|
|
69
72
|
@_pos = 0
|
|
@@ -77,7 +80,8 @@ module Cataract
|
|
|
77
80
|
selector_lists: true,
|
|
78
81
|
base_uri: nil,
|
|
79
82
|
absolute_paths: false,
|
|
80
|
-
uri_resolver: nil
|
|
83
|
+
uri_resolver: nil,
|
|
84
|
+
raise_parse_errors: false
|
|
81
85
|
}.merge(parser_options)
|
|
82
86
|
|
|
83
87
|
# Private: Extract options to ivars to avoid repeated hash lookups in hot path
|
|
@@ -86,6 +90,34 @@ module Cataract
|
|
|
86
90
|
@_absolute_paths = @_parser_options[:absolute_paths]
|
|
87
91
|
@_uri_resolver = @_parser_options[:uri_resolver] || Cataract::DEFAULT_URI_RESOLVER
|
|
88
92
|
|
|
93
|
+
# Parse error handling options - extract to ivars for hot path performance
|
|
94
|
+
@_raise_parse_errors = @_parser_options[:raise_parse_errors]
|
|
95
|
+
if @_raise_parse_errors.is_a?(Hash)
|
|
96
|
+
# Granular control - default all to false (opt-in)
|
|
97
|
+
@_check_empty_values = @_raise_parse_errors[:empty_values] || false
|
|
98
|
+
@_check_malformed_declarations = @_raise_parse_errors[:malformed_declarations] || false
|
|
99
|
+
@_check_invalid_selectors = @_raise_parse_errors[:invalid_selectors] || false
|
|
100
|
+
@_check_invalid_selector_syntax = @_raise_parse_errors[:invalid_selector_syntax] || false
|
|
101
|
+
@_check_malformed_at_rules = @_raise_parse_errors[:malformed_at_rules] || false
|
|
102
|
+
@_check_unclosed_blocks = @_raise_parse_errors[:unclosed_blocks] || false
|
|
103
|
+
elsif @_raise_parse_errors == true
|
|
104
|
+
# Enable all error checks
|
|
105
|
+
@_check_empty_values = true
|
|
106
|
+
@_check_malformed_declarations = true
|
|
107
|
+
@_check_invalid_selectors = true
|
|
108
|
+
@_check_invalid_selector_syntax = true
|
|
109
|
+
@_check_malformed_at_rules = true
|
|
110
|
+
@_check_unclosed_blocks = true
|
|
111
|
+
else
|
|
112
|
+
# Disabled
|
|
113
|
+
@_check_empty_values = false
|
|
114
|
+
@_check_malformed_declarations = false
|
|
115
|
+
@_check_invalid_selectors = false
|
|
116
|
+
@_check_invalid_selector_syntax = false
|
|
117
|
+
@_check_malformed_at_rules = false
|
|
118
|
+
@_check_unclosed_blocks = false
|
|
119
|
+
end
|
|
120
|
+
|
|
89
121
|
# Private: Internal counters
|
|
90
122
|
@_media_query_id_counter = 0 # Next MediaQuery ID (0-indexed)
|
|
91
123
|
@_next_selector_list_id = 0 # Counter for selector list IDs
|
|
@@ -143,6 +175,13 @@ module Cataract
|
|
|
143
175
|
|
|
144
176
|
selectors.each do |individual_selector|
|
|
145
177
|
individual_selector.strip!
|
|
178
|
+
|
|
179
|
+
# Check for empty selector in comma-separated list
|
|
180
|
+
if @_check_invalid_selector_syntax && individual_selector.empty? && selectors.size > 1
|
|
181
|
+
raise ParseError.new('Invalid selector syntax: empty selector in comma-separated list',
|
|
182
|
+
css: @_css, pos: decl_start, type: :invalid_selector_syntax)
|
|
183
|
+
end
|
|
184
|
+
|
|
146
185
|
next if individual_selector.empty?
|
|
147
186
|
|
|
148
187
|
# Get rule ID for this selector
|
|
@@ -194,6 +233,13 @@ module Cataract
|
|
|
194
233
|
|
|
195
234
|
selectors.each do |individual_selector|
|
|
196
235
|
individual_selector.strip!
|
|
236
|
+
|
|
237
|
+
# Check for empty selector in comma-separated list
|
|
238
|
+
if @_check_invalid_selector_syntax && individual_selector.empty? && selectors.size > 1
|
|
239
|
+
raise ParseError.new('Invalid selector syntax: empty selector in comma-separated list',
|
|
240
|
+
css: @_css, pos: decl_start, type: :invalid_selector_syntax)
|
|
241
|
+
end
|
|
242
|
+
|
|
197
243
|
next if individual_selector.empty?
|
|
198
244
|
|
|
199
245
|
rule_id = @_rule_id_counter
|
|
@@ -303,6 +349,54 @@ module Cataract
|
|
|
303
349
|
end until @_pos == old_pos # No progress made # rubocop:disable Lint/Loop
|
|
304
350
|
end
|
|
305
351
|
|
|
352
|
+
# Check if a selector contains only valid CSS selector characters and sequences
|
|
353
|
+
# Returns true if valid, false if invalid
|
|
354
|
+
# Valid characters: a-z A-Z 0-9 - _ . # [ ] : * > + ~ ( ) ' " = ^ $ | \ & % / whitespace
|
|
355
|
+
def valid_selector_syntax?(selector_text)
|
|
356
|
+
i = 0
|
|
357
|
+
len = selector_text.bytesize
|
|
358
|
+
|
|
359
|
+
while i < len
|
|
360
|
+
byte = selector_text.getbyte(i)
|
|
361
|
+
|
|
362
|
+
# Check for invalid character sequences
|
|
363
|
+
if i + 1 < len
|
|
364
|
+
next_byte = selector_text.getbyte(i + 1)
|
|
365
|
+
# Double dot (..) is invalid
|
|
366
|
+
return false if byte == BYTE_DOT && next_byte == BYTE_DOT
|
|
367
|
+
# Double hash (##) is invalid
|
|
368
|
+
return false if byte == BYTE_HASH && next_byte == BYTE_HASH
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
# Alphanumeric
|
|
372
|
+
if (byte >= BYTE_LOWER_A && byte <= BYTE_LOWER_Z) || (byte >= BYTE_UPPER_A && byte <= BYTE_UPPER_Z) || (byte >= BYTE_DIGIT_0 && byte <= BYTE_DIGIT_9)
|
|
373
|
+
i += 1
|
|
374
|
+
next
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
# Whitespace
|
|
378
|
+
if byte == BYTE_SPACE || byte == BYTE_TAB || byte == BYTE_NEWLINE || byte == BYTE_CR
|
|
379
|
+
i += 1
|
|
380
|
+
next
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
# Valid CSS selector special characters
|
|
384
|
+
case byte
|
|
385
|
+
when BYTE_HYPHEN, BYTE_UNDERSCORE, BYTE_DOT, BYTE_HASH, BYTE_LBRACKET, BYTE_RBRACKET,
|
|
386
|
+
BYTE_COLON, BYTE_ASTERISK, BYTE_GT, BYTE_PLUS, BYTE_TILDE, BYTE_LPAREN, BYTE_RPAREN,
|
|
387
|
+
BYTE_SQUOTE, BYTE_DQUOTE, BYTE_EQUALS, BYTE_CARET, BYTE_DOLLAR,
|
|
388
|
+
BYTE_PIPE, BYTE_BACKSLASH, BYTE_AMPERSAND, BYTE_PERCENT, BYTE_SLASH, BYTE_BANG,
|
|
389
|
+
BYTE_COMMA
|
|
390
|
+
i += 1
|
|
391
|
+
else
|
|
392
|
+
# Invalid character found
|
|
393
|
+
return false
|
|
394
|
+
end
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
true
|
|
398
|
+
end
|
|
399
|
+
|
|
306
400
|
# Parse a single CSS declaration (property: value)
|
|
307
401
|
#
|
|
308
402
|
# Performance-critical helper that parses one declaration.
|
|
@@ -410,6 +504,12 @@ module Cataract
|
|
|
410
504
|
pos += 1
|
|
411
505
|
end
|
|
412
506
|
|
|
507
|
+
# Reached EOF without finding matching closing brace
|
|
508
|
+
if @_check_unclosed_blocks && depth > 0
|
|
509
|
+
raise ParseError.new('Unclosed block: missing closing brace',
|
|
510
|
+
css: @_css, pos: start_pos - 1, type: :unclosed_block)
|
|
511
|
+
end
|
|
512
|
+
|
|
413
513
|
pos
|
|
414
514
|
end
|
|
415
515
|
|
|
@@ -433,6 +533,29 @@ module Cataract
|
|
|
433
533
|
|
|
434
534
|
# Trim whitespace from selector (in-place to avoid allocation)
|
|
435
535
|
selector_text.strip!
|
|
536
|
+
|
|
537
|
+
# Validate selector (strict mode) - only if enabled to avoid overhead
|
|
538
|
+
if @_check_invalid_selectors
|
|
539
|
+
# Check for empty selector
|
|
540
|
+
if selector_text.empty?
|
|
541
|
+
raise ParseError.new('Invalid selector: empty selector',
|
|
542
|
+
css: @_css, pos: start_pos, type: :invalid_selector)
|
|
543
|
+
end
|
|
544
|
+
|
|
545
|
+
# Check if selector starts with a combinator (>, +, ~)
|
|
546
|
+
first_char = selector_text.getbyte(0)
|
|
547
|
+
if first_char == BYTE_GT || first_char == BYTE_PLUS || first_char == BYTE_TILDE
|
|
548
|
+
raise ParseError.new("Invalid selector: selector cannot start with combinator '#{selector_text[0]}'",
|
|
549
|
+
css: @_css, pos: start_pos, type: :invalid_selector)
|
|
550
|
+
end
|
|
551
|
+
end
|
|
552
|
+
|
|
553
|
+
# Check selector syntax (whitelist validation for invalid characters/sequences)
|
|
554
|
+
if @_check_invalid_selector_syntax && !valid_selector_syntax?(selector_text)
|
|
555
|
+
raise ParseError.new('Invalid selector syntax: selector contains invalid characters',
|
|
556
|
+
css: @_css, pos: start_pos, type: :invalid_selector_syntax)
|
|
557
|
+
end
|
|
558
|
+
|
|
436
559
|
selector_text
|
|
437
560
|
end
|
|
438
561
|
|
|
@@ -514,7 +637,7 @@ module Cataract
|
|
|
514
637
|
|
|
515
638
|
# This should never happen - parent_media_query_id should always be valid
|
|
516
639
|
if parent_mq.nil?
|
|
517
|
-
raise
|
|
640
|
+
raise ParseError, "Invalid parent_media_query_id: #{parent_media_query_id} (not found in @media_queries)"
|
|
518
641
|
end
|
|
519
642
|
|
|
520
643
|
# Combine parent media query with child
|
|
@@ -675,6 +798,18 @@ module Cataract
|
|
|
675
798
|
|
|
676
799
|
# Skip if no colon found (malformed)
|
|
677
800
|
if eof? || peek_byte != BYTE_COLON
|
|
801
|
+
# Check for malformed declaration (strict mode)
|
|
802
|
+
if @_check_malformed_declarations
|
|
803
|
+
property_text = byteslice_encoded(property_start, @_pos - property_start).strip
|
|
804
|
+
if property_text.empty?
|
|
805
|
+
raise ParseError.new('Malformed declaration: missing property name',
|
|
806
|
+
css: @_css, pos: property_start, type: :malformed_declaration)
|
|
807
|
+
else
|
|
808
|
+
raise ParseError.new("Malformed declaration: missing colon after property '#{property_text}'",
|
|
809
|
+
css: @_css, pos: property_start, type: :malformed_declaration)
|
|
810
|
+
end
|
|
811
|
+
end
|
|
812
|
+
|
|
678
813
|
# Try to recover by finding next ; or }
|
|
679
814
|
skip_to_semicolon_or_brace
|
|
680
815
|
next
|
|
@@ -726,7 +861,7 @@ module Cataract
|
|
|
726
861
|
value.strip!
|
|
727
862
|
|
|
728
863
|
# Check for !important (byte-by-byte, no regexp)
|
|
729
|
-
if value.bytesize
|
|
864
|
+
if value.bytesize >= 10
|
|
730
865
|
# Scan backwards to find !important
|
|
731
866
|
i = value.bytesize - 1
|
|
732
867
|
# Skip trailing whitespace
|
|
@@ -757,6 +892,12 @@ module Cataract
|
|
|
757
892
|
end
|
|
758
893
|
end
|
|
759
894
|
|
|
895
|
+
# Check for empty value (strict mode) - only if enabled to avoid overhead
|
|
896
|
+
if @_check_empty_values && value.empty?
|
|
897
|
+
raise ParseError.new("Empty value for property '#{property}'",
|
|
898
|
+
css: @_css, pos: property_start, type: :empty_value)
|
|
899
|
+
end
|
|
900
|
+
|
|
760
901
|
# Skip semicolon if present
|
|
761
902
|
@_pos += 1 if peek_byte == BYTE_SEMICOLON
|
|
762
903
|
|
|
@@ -827,13 +968,27 @@ module Cataract
|
|
|
827
968
|
if AT_RULE_TYPES.include?(at_rule_name)
|
|
828
969
|
skip_ws_and_comments
|
|
829
970
|
|
|
971
|
+
# Remember start of condition for error reporting
|
|
972
|
+
condition_start = @_pos
|
|
973
|
+
|
|
830
974
|
# Skip to opening brace
|
|
975
|
+
condition_end = @_pos
|
|
831
976
|
while !eof? && peek_byte != BYTE_LBRACE
|
|
977
|
+
condition_end = @_pos
|
|
832
978
|
@_pos += 1
|
|
833
979
|
end
|
|
834
980
|
|
|
835
981
|
return if eof? || peek_byte != BYTE_LBRACE
|
|
836
982
|
|
|
983
|
+
# Validate condition (strict mode) - @supports, @container, @scope require conditions
|
|
984
|
+
if @_check_malformed_at_rules && (at_rule_name == 'supports' || at_rule_name == 'container' || at_rule_name == 'scope')
|
|
985
|
+
condition_str = byteslice_encoded(condition_start, condition_end - condition_start).strip
|
|
986
|
+
if condition_str.empty?
|
|
987
|
+
raise ParseError.new("Malformed @#{at_rule_name}: missing condition",
|
|
988
|
+
css: @_css, pos: condition_start, type: :malformed_at_rule)
|
|
989
|
+
end
|
|
990
|
+
end
|
|
991
|
+
|
|
837
992
|
@_pos += 1 # skip '{'
|
|
838
993
|
|
|
839
994
|
# Find matching closing brace
|
|
@@ -908,6 +1063,13 @@ module Cataract
|
|
|
908
1063
|
child_media_string = byteslice_encoded(mq_start, mq_end - mq_start)
|
|
909
1064
|
# Keep media query exactly as written - parentheses are required per CSS spec
|
|
910
1065
|
child_media_string.strip!
|
|
1066
|
+
|
|
1067
|
+
# Validate @media has a query (strict mode)
|
|
1068
|
+
if @_check_malformed_at_rules && child_media_string.empty?
|
|
1069
|
+
raise ParseError.new('Malformed @media: missing media query or condition',
|
|
1070
|
+
css: @_css, pos: mq_start, type: :malformed_at_rule)
|
|
1071
|
+
end
|
|
1072
|
+
|
|
911
1073
|
child_media_sym = child_media_string.to_sym
|
|
912
1074
|
|
|
913
1075
|
# Split comma-separated media queries (e.g., "screen, print" -> ["screen", "print"])
|
data/lib/cataract/pure.rb
CHANGED
data/lib/cataract/stylesheet.rb
CHANGED
|
@@ -109,6 +109,9 @@ module Cataract
|
|
|
109
109
|
# @option options [Hash] :parser ({}) Parser configuration options
|
|
110
110
|
# - :selector_lists [Boolean] (true) Track selector lists for W3C-compliant serialization
|
|
111
111
|
def initialize(options = {})
|
|
112
|
+
# Type validation
|
|
113
|
+
raise TypeError, "options must be a Hash, got #{options.class}" unless options.is_a?(Hash)
|
|
114
|
+
|
|
112
115
|
# Support :imports as alias for :import (backwards compatibility)
|
|
113
116
|
options[:import] = options.delete(:imports) if options.key?(:imports) && !options.key?(:import)
|
|
114
117
|
|
|
@@ -119,12 +122,27 @@ module Cataract
|
|
|
119
122
|
base_dir: nil,
|
|
120
123
|
absolute_paths: false,
|
|
121
124
|
uri_resolver: nil,
|
|
122
|
-
parser: {}
|
|
125
|
+
parser: {},
|
|
126
|
+
raise_parse_errors: false
|
|
123
127
|
}.merge(options)
|
|
124
128
|
|
|
129
|
+
# Type validation for specific options
|
|
130
|
+
if @options[:import_fetcher] && !@options[:import_fetcher].respond_to?(:call)
|
|
131
|
+
raise TypeError, "import_fetcher must be a Proc or callable, got #{@options[:import_fetcher].class}"
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
if @options[:base_uri] && !@options[:base_uri].is_a?(String)
|
|
135
|
+
raise TypeError, "base_uri must be a String, got #{@options[:base_uri].class}"
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
if @options[:uri_resolver] && !@options[:uri_resolver].respond_to?(:call)
|
|
139
|
+
raise TypeError, "uri_resolver must be a Proc or callable, got #{@options[:uri_resolver].class}"
|
|
140
|
+
end
|
|
141
|
+
|
|
125
142
|
# Parser options with defaults (stored for passing to parser)
|
|
126
143
|
@parser_options = {
|
|
127
|
-
selector_lists: true
|
|
144
|
+
selector_lists: true,
|
|
145
|
+
raise_parse_errors: @options[:raise_parse_errors]
|
|
128
146
|
}.merge(@options[:parser] || {})
|
|
129
147
|
|
|
130
148
|
@rules = [] # Flat array of Rule structs
|
|
@@ -822,6 +840,10 @@ module Cataract
|
|
|
822
840
|
new_imports = result[:imports]
|
|
823
841
|
new_imports.each do |import|
|
|
824
842
|
import.id += offset
|
|
843
|
+
# Update media_query_id to point to offsetted MediaQuery
|
|
844
|
+
if import.media_query_id
|
|
845
|
+
import.media_query_id += media_query_id_offset
|
|
846
|
+
end
|
|
825
847
|
@imports << import
|
|
826
848
|
end
|
|
827
849
|
|
data/lib/cataract/version.rb
CHANGED
data/lib/cataract.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cataract
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- James Cook
|
|
@@ -83,6 +83,7 @@ files:
|
|
|
83
83
|
- lib/cataract/constants.rb
|
|
84
84
|
- lib/cataract/declaration.rb
|
|
85
85
|
- lib/cataract/declarations.rb
|
|
86
|
+
- lib/cataract/error.rb
|
|
86
87
|
- lib/cataract/import_resolver.rb
|
|
87
88
|
- lib/cataract/import_statement.rb
|
|
88
89
|
- lib/cataract/media_query.rb
|