cataract 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-tidy +30 -0
- data/.github/workflows/ci-macos.yml +12 -0
- data/.github/workflows/ci.yml +77 -0
- data/.github/workflows/test.yml +76 -0
- data/.gitignore +45 -0
- data/.overcommit.yml +38 -0
- data/.rubocop.yml +83 -0
- data/BENCHMARKS.md +201 -0
- data/CHANGELOG.md +1 -0
- data/Gemfile +27 -0
- data/LICENSE +21 -0
- data/RAGEL_MIGRATION.md +60 -0
- data/README.md +292 -0
- data/Rakefile +209 -0
- data/benchmarks/benchmark_harness.rb +193 -0
- data/benchmarks/benchmark_merging.rb +121 -0
- data/benchmarks/benchmark_optimization_comparison.rb +168 -0
- data/benchmarks/benchmark_parsing.rb +153 -0
- data/benchmarks/benchmark_ragel_removal.rb +56 -0
- data/benchmarks/benchmark_runner.rb +70 -0
- data/benchmarks/benchmark_serialization.rb +180 -0
- data/benchmarks/benchmark_shorthand.rb +109 -0
- data/benchmarks/benchmark_shorthand_expansion.rb +176 -0
- data/benchmarks/benchmark_specificity.rb +124 -0
- data/benchmarks/benchmark_string_allocation.rb +151 -0
- data/benchmarks/benchmark_stylesheet_to_s.rb +62 -0
- data/benchmarks/benchmark_to_s_cached.rb +55 -0
- data/benchmarks/benchmark_value_splitter.rb +54 -0
- data/benchmarks/benchmark_yjit.rb +158 -0
- data/benchmarks/benchmark_yjit_workers.rb +61 -0
- data/benchmarks/profile_to_s.rb +23 -0
- data/benchmarks/speedup_calculator.rb +83 -0
- data/benchmarks/system_metadata.rb +81 -0
- data/benchmarks/templates/benchmarks.md.erb +221 -0
- data/benchmarks/yjit_tests.rb +141 -0
- data/cataract.gemspec +34 -0
- data/cliff.toml +92 -0
- data/examples/color_conversion_visual_test/color_conversion_test.html +3603 -0
- data/examples/color_conversion_visual_test/generate.rb +202 -0
- data/examples/color_conversion_visual_test/template.html.erb +259 -0
- data/examples/css_analyzer/analyzer.rb +164 -0
- data/examples/css_analyzer/analyzers/base.rb +33 -0
- data/examples/css_analyzer/analyzers/colors.rb +133 -0
- data/examples/css_analyzer/analyzers/important.rb +88 -0
- data/examples/css_analyzer/analyzers/properties.rb +61 -0
- data/examples/css_analyzer/analyzers/specificity.rb +68 -0
- data/examples/css_analyzer/templates/report.html.erb +575 -0
- data/examples/css_analyzer.rb +69 -0
- data/examples/github_analysis.html +5343 -0
- data/ext/cataract/cataract.c +1086 -0
- data/ext/cataract/cataract.h +174 -0
- data/ext/cataract/css_parser.c +1435 -0
- data/ext/cataract/extconf.rb +48 -0
- data/ext/cataract/import_scanner.c +174 -0
- data/ext/cataract/merge.c +973 -0
- data/ext/cataract/shorthand_expander.c +902 -0
- data/ext/cataract/specificity.c +213 -0
- data/ext/cataract/value_splitter.c +116 -0
- data/ext/cataract_color/cataract_color.c +16 -0
- data/ext/cataract_color/color_conversion.c +1687 -0
- data/ext/cataract_color/color_conversion.h +136 -0
- data/ext/cataract_color/color_conversion_lab.c +571 -0
- data/ext/cataract_color/color_conversion_named.c +259 -0
- data/ext/cataract_color/color_conversion_oklab.c +547 -0
- data/ext/cataract_color/extconf.rb +23 -0
- data/ext/cataract_old/cataract.c +393 -0
- data/ext/cataract_old/cataract.h +250 -0
- data/ext/cataract_old/css_parser.c +933 -0
- data/ext/cataract_old/extconf.rb +67 -0
- data/ext/cataract_old/import_scanner.c +174 -0
- data/ext/cataract_old/merge.c +776 -0
- data/ext/cataract_old/shorthand_expander.c +902 -0
- data/ext/cataract_old/specificity.c +213 -0
- data/ext/cataract_old/stylesheet.c +290 -0
- data/ext/cataract_old/value_splitter.c +116 -0
- data/lib/cataract/at_rule.rb +97 -0
- data/lib/cataract/color_conversion.rb +18 -0
- data/lib/cataract/declarations.rb +332 -0
- data/lib/cataract/import_resolver.rb +210 -0
- data/lib/cataract/rule.rb +131 -0
- data/lib/cataract/stylesheet.rb +716 -0
- data/lib/cataract/stylesheet_scope.rb +257 -0
- data/lib/cataract/version.rb +5 -0
- data/lib/cataract.rb +107 -0
- data/lib/tasks/gem.rake +158 -0
- data/scripts/fuzzer/run.rb +828 -0
- data/scripts/fuzzer/worker.rb +99 -0
- data/scripts/generate_benchmarks_md.rb +155 -0
- metadata +135 -0
|
@@ -0,0 +1,1435 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* css_parser_new.c - New CSS parser implementation with flat rule array
|
|
3
|
+
*
|
|
4
|
+
* Key differences from original:
|
|
5
|
+
* - Flat @rules array with rule IDs (0-indexed)
|
|
6
|
+
* - Separate @media_index hash mapping media queries to rule ID arrays
|
|
7
|
+
* - Handles nested @media queries by combining conditions
|
|
8
|
+
*
|
|
9
|
+
* TODO: Unify !important detection into a macro/helper function
|
|
10
|
+
* Currently duplicated in parse_declarations() and parse_mixed_block()
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
#include "cataract.h"
|
|
14
|
+
#include <string.h>
|
|
15
|
+
|
|
16
|
+
// Parser context passed through recursive calls
|
|
17
|
+
typedef struct {
|
|
18
|
+
VALUE rules_array; // Array of Rule structs
|
|
19
|
+
VALUE media_index; // Hash: Symbol => Array of rule IDs
|
|
20
|
+
int rule_id_counter; // Next rule ID (0-indexed)
|
|
21
|
+
int media_query_count; // Safety limit for media queries
|
|
22
|
+
st_table *media_cache; // Parse-time cache: string => parsed media types
|
|
23
|
+
int has_nesting; // Set to 1 if any nested rules are created
|
|
24
|
+
int depth; // Current recursion depth (safety limit)
|
|
25
|
+
} ParserContext;
|
|
26
|
+
|
|
27
|
+
// Macro to skip CSS comments /* ... */
|
|
28
|
+
// Usage: SKIP_COMMENT(p, end) where p is current position, end is limit
|
|
29
|
+
// Side effect: advances p past the comment and continues to next iteration
|
|
30
|
+
// Note: Uses RB_UNLIKELY since comments are rare in typical CSS
|
|
31
|
+
#define SKIP_COMMENT(ptr, limit) \
|
|
32
|
+
if (RB_UNLIKELY((ptr) + 1 < (limit) && *(ptr) == '/' && *((ptr) + 1) == '*')) { \
|
|
33
|
+
(ptr) += 2; \
|
|
34
|
+
while ((ptr) + 1 < (limit) && !(*(ptr) == '*' && *((ptr) + 1) == '/')) (ptr)++; \
|
|
35
|
+
if ((ptr) + 1 < (limit)) (ptr) += 2; \
|
|
36
|
+
continue; \
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Find matching closing brace for a block
|
|
40
|
+
// Input: start = position after opening '{', end = limit
|
|
41
|
+
// Returns: pointer to matching '}' (or end if not found)
|
|
42
|
+
// Note: Handles nested braces by tracking depth
|
|
43
|
+
static inline const char* find_matching_brace(const char *start, const char *end) {
|
|
44
|
+
int depth = 1;
|
|
45
|
+
const char *p = start;
|
|
46
|
+
while (p < end && depth > 0) {
|
|
47
|
+
if (*p == '{') depth++;
|
|
48
|
+
else if (*p == '}') depth--;
|
|
49
|
+
if (depth > 0) p++;
|
|
50
|
+
}
|
|
51
|
+
return p;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Find matching closing paren
|
|
55
|
+
// Input: start = position after opening '(', end = limit
|
|
56
|
+
// Returns: pointer to matching ')' (or end if not found)
|
|
57
|
+
// Note: Handles nested parens by tracking depth
|
|
58
|
+
static inline const char* find_matching_paren(const char *start, const char *end) {
|
|
59
|
+
int depth = 1;
|
|
60
|
+
const char *p = start;
|
|
61
|
+
while (p < end && depth > 0) {
|
|
62
|
+
if (*p == '(') depth++;
|
|
63
|
+
else if (*p == ')') depth--;
|
|
64
|
+
if (depth > 0) p++;
|
|
65
|
+
}
|
|
66
|
+
return p;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Lowercase property name (CSS property names are ASCII-only)
|
|
70
|
+
// Non-static so merge_new.c can use it
|
|
71
|
+
VALUE lowercase_property(VALUE property_str) {
|
|
72
|
+
Check_Type(property_str, T_STRING);
|
|
73
|
+
|
|
74
|
+
long len = RSTRING_LEN(property_str);
|
|
75
|
+
const char *src = RSTRING_PTR(property_str);
|
|
76
|
+
|
|
77
|
+
VALUE result = rb_str_buf_new(len);
|
|
78
|
+
rb_enc_associate(result, rb_usascii_encoding());
|
|
79
|
+
|
|
80
|
+
for (long i = 0; i < len; i++) {
|
|
81
|
+
char c = src[i];
|
|
82
|
+
if (c >= 'A' && c <= 'Z') {
|
|
83
|
+
c += 32; // Lowercase
|
|
84
|
+
}
|
|
85
|
+
rb_str_buf_cat(result, &c, 1);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return result;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/*
|
|
92
|
+
* Check if a block contains nested selectors (not just declarations)
|
|
93
|
+
*
|
|
94
|
+
* Per W3C spec, nested selectors cannot start with identifiers to avoid ambiguity.
|
|
95
|
+
* They must start with: &, ., #, [, :, *, >, +, ~, or @media/@supports/etc
|
|
96
|
+
*
|
|
97
|
+
* Example CSS blocks:
|
|
98
|
+
* "color: red; font-size: 14px;" -> 0 (declarations only)
|
|
99
|
+
* "color: red; & .child { ... }" -> 1 (has nested selector)
|
|
100
|
+
* "color: red; @media (...) { ... }" -> 1 (has nested @media)
|
|
101
|
+
*
|
|
102
|
+
* Returns: 1 if nested selectors found, 0 if only declarations
|
|
103
|
+
*/
|
|
104
|
+
static int has_nested_selectors(const char *start, const char *end) {
|
|
105
|
+
const char *p = start;
|
|
106
|
+
|
|
107
|
+
while (p < end) {
|
|
108
|
+
// Skip whitespace
|
|
109
|
+
trim_leading(&p, end);
|
|
110
|
+
if (p >= end) break;
|
|
111
|
+
|
|
112
|
+
// Skip comments
|
|
113
|
+
SKIP_COMMENT(p, end);
|
|
114
|
+
|
|
115
|
+
// Check for nested selector indicators
|
|
116
|
+
// Example: "color: red; & .child { font: 14px; }"
|
|
117
|
+
// ^p (at &) - nested selector indicator
|
|
118
|
+
char c = *p;
|
|
119
|
+
if (c == '&' || c == '.' || c == '#' || c == '[' || c == ':' ||
|
|
120
|
+
c == '*' || c == '>' || c == '+' || c == '~') {
|
|
121
|
+
// Look ahead - if followed by {, it's likely a nested selector
|
|
122
|
+
// Example: "& .child { font: 14px; }"
|
|
123
|
+
// ^p ^lookahead (at {) - confirms nested selector
|
|
124
|
+
const char *lookahead = p + 1;
|
|
125
|
+
while (lookahead < end && *lookahead != '{' && *lookahead != ';' && *lookahead != '\n') {
|
|
126
|
+
lookahead++;
|
|
127
|
+
}
|
|
128
|
+
if (lookahead < end && *lookahead == '{') {
|
|
129
|
+
return 1; // Found nested selector
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Check for @media, @supports, etc nested inside
|
|
134
|
+
// Example: "color: red; @media (min-width: 768px) { ... }"
|
|
135
|
+
// ^p (at @) - nested at-rule
|
|
136
|
+
if (c == '@') {
|
|
137
|
+
return 1; // Nested at-rule
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Skip to next line or semicolon
|
|
141
|
+
// Example: "color: red; font-size: 14px;"
|
|
142
|
+
// ^p ^p (after skip) - continue checking
|
|
143
|
+
while (p < end && *p != ';' && *p != '\n') p++;
|
|
144
|
+
if (p < end) p++;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return 0; // No nested selectors found
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/*
|
|
151
|
+
* Resolve nested selector against parent selector
|
|
152
|
+
*
|
|
153
|
+
* Examples:
|
|
154
|
+
* resolve_nested_selector(".parent", "& .child") => ".parent .child" (explicit)
|
|
155
|
+
* resolve_nested_selector(".parent", "&:hover") => ".parent:hover" (explicit)
|
|
156
|
+
* resolve_nested_selector(".parent", "&.active") => ".parent.active" (explicit)
|
|
157
|
+
* resolve_nested_selector(".parent", ".child") => ".parent .child" (implicit)
|
|
158
|
+
* resolve_nested_selector(".parent", "> .child") => ".parent > .child" (implicit combinator)
|
|
159
|
+
*
|
|
160
|
+
* Returns: [resolved_selector (String), nesting_style (Fixnum)]
|
|
161
|
+
* nesting_style: 0 = NESTING_STYLE_IMPLICIT, 1 = NESTING_STYLE_EXPLICIT
|
|
162
|
+
*/
|
|
163
|
+
static VALUE resolve_nested_selector(VALUE parent_selector, const char *nested_sel, long nested_len) {
|
|
164
|
+
const char *parent = RSTRING_PTR(parent_selector);
|
|
165
|
+
long parent_len = RSTRING_LEN(parent_selector);
|
|
166
|
+
|
|
167
|
+
// Check if nested selector contains &
|
|
168
|
+
int has_ampersand = 0;
|
|
169
|
+
for (long i = 0; i < nested_len; i++) {
|
|
170
|
+
if (nested_sel[i] == '&') {
|
|
171
|
+
has_ampersand = 1;
|
|
172
|
+
break;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
VALUE resolved;
|
|
177
|
+
int nesting_style;
|
|
178
|
+
|
|
179
|
+
if (has_ampersand) {
|
|
180
|
+
// Explicit nesting - replace & with parent
|
|
181
|
+
// Example: parent=".button", nested="&:hover" => ".button:hover"
|
|
182
|
+
// &:hover
|
|
183
|
+
// ^ - Replace & with ".button"
|
|
184
|
+
// ^^^^^^ - Copy rest as-is
|
|
185
|
+
nesting_style = NESTING_STYLE_EXPLICIT;
|
|
186
|
+
|
|
187
|
+
// Check if selector starts with a combinator (relative selector)
|
|
188
|
+
// Example: "+ .bar + &" should become ".foo + .bar + .foo"
|
|
189
|
+
const char *nested_trimmed = nested_sel;
|
|
190
|
+
const char *nested_trimmed_end = nested_sel + nested_len;
|
|
191
|
+
trim_leading(&nested_trimmed, nested_trimmed_end);
|
|
192
|
+
|
|
193
|
+
int starts_with_combinator = 0;
|
|
194
|
+
if (nested_trimmed < nested_trimmed_end) {
|
|
195
|
+
char first_char = *nested_trimmed;
|
|
196
|
+
if (first_char == '+' || first_char == '>' || first_char == '~') {
|
|
197
|
+
starts_with_combinator = 1;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Build result by replacing & with parent (add extra space if starts with combinator)
|
|
202
|
+
VALUE result = rb_str_buf_new(parent_len + nested_len + (starts_with_combinator ? parent_len + 2 : 0));
|
|
203
|
+
rb_enc_associate(result, rb_utf8_encoding());
|
|
204
|
+
|
|
205
|
+
// If starts with combinator, prepend parent first with space
|
|
206
|
+
// Example: "+ .bar + &" => ".foo + .bar + .foo"
|
|
207
|
+
if (starts_with_combinator) {
|
|
208
|
+
rb_str_buf_cat(result, parent, parent_len);
|
|
209
|
+
rb_str_buf_cat(result, " ", 1);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
long i = 0;
|
|
213
|
+
while (i < nested_len) {
|
|
214
|
+
if (nested_sel[i] == '&') { // At: '&'
|
|
215
|
+
// Replace & with parent selector
|
|
216
|
+
rb_str_buf_cat(result, parent, parent_len); // Output: ".button"
|
|
217
|
+
i++; // Move to: ':'
|
|
218
|
+
} else {
|
|
219
|
+
// Copy character as-is
|
|
220
|
+
rb_str_buf_cat(result, &nested_sel[i], 1); // Output: ':hover'
|
|
221
|
+
i++;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
resolved = result;
|
|
226
|
+
} else {
|
|
227
|
+
// Implicit nesting - prepend parent with appropriate spacing
|
|
228
|
+
// Example: parent=".parent", nested=".child" => ".parent .child"
|
|
229
|
+
// .child
|
|
230
|
+
// - Prepend ".parent " before ".child"
|
|
231
|
+
// Example: parent=".parent", nested="> .child" => ".parent > .child"
|
|
232
|
+
// > .child
|
|
233
|
+
// - Prepend ".parent " before "> .child"
|
|
234
|
+
nesting_style = NESTING_STYLE_IMPLICIT;
|
|
235
|
+
|
|
236
|
+
const char *nested_trimmed = nested_sel;
|
|
237
|
+
const char *nested_end = nested_sel + nested_len;
|
|
238
|
+
|
|
239
|
+
// Trim leading whitespace from nested selector
|
|
240
|
+
trim_leading(&nested_trimmed, nested_end);
|
|
241
|
+
long trimmed_len = nested_end - nested_trimmed;
|
|
242
|
+
|
|
243
|
+
VALUE result = rb_str_buf_new(parent_len + 1 + trimmed_len);
|
|
244
|
+
rb_enc_associate(result, rb_utf8_encoding());
|
|
245
|
+
|
|
246
|
+
// Add parent // Output: ".parent"
|
|
247
|
+
rb_str_buf_cat(result, parent, parent_len);
|
|
248
|
+
|
|
249
|
+
// Add separator space (before combinator or for implicit descendant) // Output: " "
|
|
250
|
+
rb_str_buf_cat(result, " ", 1);
|
|
251
|
+
|
|
252
|
+
// Add nested selector (trimmed) // Output: ".child"
|
|
253
|
+
rb_str_buf_cat(result, nested_trimmed, trimmed_len);
|
|
254
|
+
|
|
255
|
+
resolved = result;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Return array [resolved_selector, nesting_style]
|
|
259
|
+
VALUE result_array = rb_ary_new_from_args(2, resolved, INT2FIX(nesting_style));
|
|
260
|
+
|
|
261
|
+
// Guard parent_selector since we extracted C pointer and did allocations
|
|
262
|
+
RB_GC_GUARD(parent_selector);
|
|
263
|
+
|
|
264
|
+
return result_array;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/*
|
|
268
|
+
* Extract media types from a media query string
|
|
269
|
+
* Examples:
|
|
270
|
+
* "screen" => [:screen]
|
|
271
|
+
* "screen, print" => [:screen, :print]
|
|
272
|
+
* "screen and (min-width: 768px)" => [:screen]
|
|
273
|
+
* "(min-width: 768px)" => [] // No media type, just condition
|
|
274
|
+
*
|
|
275
|
+
* Returns: Ruby array of symbols
|
|
276
|
+
*/
|
|
277
|
+
static VALUE extract_media_types(const char *query, long query_len) {
|
|
278
|
+
VALUE types = rb_ary_new();
|
|
279
|
+
|
|
280
|
+
const char *p = query;
|
|
281
|
+
const char *end = query + query_len;
|
|
282
|
+
|
|
283
|
+
while (p < end) {
|
|
284
|
+
// Skip whitespace
|
|
285
|
+
while (p < end && IS_WHITESPACE(*p)) p++;
|
|
286
|
+
if (p >= end) break;
|
|
287
|
+
|
|
288
|
+
// Check for opening paren (skip conditions like "(min-width: 768px)")
|
|
289
|
+
if (*p == '(') {
|
|
290
|
+
// Skip to matching closing paren
|
|
291
|
+
const char *closing = find_matching_paren(p, end);
|
|
292
|
+
p = (closing < end) ? closing + 1 : closing;
|
|
293
|
+
continue;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Find end of word (media type or keyword)
|
|
297
|
+
const char *word_start = p;
|
|
298
|
+
while (p < end && !IS_WHITESPACE(*p) && *p != ',' && *p != '(' && *p != ':') {
|
|
299
|
+
p++;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
if (p > word_start) {
|
|
303
|
+
long word_len = p - word_start;
|
|
304
|
+
|
|
305
|
+
// Check if this is a media feature (followed by ':')
|
|
306
|
+
// Example: "orientation" in "orientation: landscape" is not a media type
|
|
307
|
+
int is_media_feature = (p < end && *p == ':');
|
|
308
|
+
|
|
309
|
+
// Check if it's a keyword (and, or, not, only)
|
|
310
|
+
int is_keyword = (word_len == 3 && strncmp(word_start, "and", 3) == 0) ||
|
|
311
|
+
(word_len == 2 && strncmp(word_start, "or", 2) == 0) ||
|
|
312
|
+
(word_len == 3 && strncmp(word_start, "not", 3) == 0) ||
|
|
313
|
+
(word_len == 4 && strncmp(word_start, "only", 4) == 0);
|
|
314
|
+
|
|
315
|
+
if (!is_keyword && !is_media_feature) {
|
|
316
|
+
// This is a media type - add it as symbol
|
|
317
|
+
VALUE type_sym = ID2SYM(rb_intern2(word_start, word_len));
|
|
318
|
+
rb_ary_push(types, type_sym);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Skip to comma or end
|
|
323
|
+
while (p < end && *p != ',') {
|
|
324
|
+
if (*p == '(') {
|
|
325
|
+
// Skip condition
|
|
326
|
+
const char *closing = find_matching_paren(p, end);
|
|
327
|
+
p = (closing < end) ? closing + 1 : closing;
|
|
328
|
+
} else {
|
|
329
|
+
p++;
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
if (p < end && *p == ',') p++; // Skip comma
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
return types;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/*
|
|
340
|
+
* Add rule ID to media index for a given media query symbol
|
|
341
|
+
* Creates array if it doesn't exist yet
|
|
342
|
+
*/
|
|
343
|
+
static void add_to_media_index(VALUE media_index, VALUE media_sym, int rule_id) {
|
|
344
|
+
VALUE rule_ids = rb_hash_aref(media_index, media_sym);
|
|
345
|
+
|
|
346
|
+
if (NIL_P(rule_ids)) {
|
|
347
|
+
rule_ids = rb_ary_new();
|
|
348
|
+
rb_hash_aset(media_index, media_sym, rule_ids);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
rb_ary_push(rule_ids, INT2FIX(rule_id));
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/*
|
|
355
|
+
* Update media index with rule ID for given media query
|
|
356
|
+
* Extracts media types and adds rule to each type's array
|
|
357
|
+
* Also adds to the full query symbol
|
|
358
|
+
*/
|
|
359
|
+
static void update_media_index(ParserContext *ctx, VALUE media_sym, int rule_id) {
|
|
360
|
+
if (NIL_P(media_sym)) {
|
|
361
|
+
return; // No media query - rule applies to all media
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Add to full query symbol
|
|
365
|
+
add_to_media_index(ctx->media_index, media_sym, rule_id);
|
|
366
|
+
|
|
367
|
+
// Extract media types and add to each (if different from full query)
|
|
368
|
+
VALUE media_str = rb_sym2str(media_sym);
|
|
369
|
+
const char *query = RSTRING_PTR(media_str);
|
|
370
|
+
long query_len = RSTRING_LEN(media_str);
|
|
371
|
+
|
|
372
|
+
VALUE media_types = extract_media_types(query, query_len);
|
|
373
|
+
long types_len = RARRAY_LEN(media_types);
|
|
374
|
+
|
|
375
|
+
for (long i = 0; i < types_len; i++) {
|
|
376
|
+
VALUE type_sym = rb_ary_entry(media_types, i);
|
|
377
|
+
// Only add if different from full query (avoid duplicates)
|
|
378
|
+
if (type_sym != media_sym) {
|
|
379
|
+
add_to_media_index(ctx->media_index, type_sym, rule_id);
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// Guard media_str since we extracted C pointer and called extract_media_types (which allocates)
|
|
384
|
+
RB_GC_GUARD(media_str);
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
/*
|
|
388
|
+
* Parse declaration block into array of Declaration structs
|
|
389
|
+
*
|
|
390
|
+
* Example input: "color: red; background: url(image.png); font-size: 14px !important"
|
|
391
|
+
* Example output: [Declaration("color", "red", false),
|
|
392
|
+
* Declaration("background", "url(image.png)", false),
|
|
393
|
+
* Declaration("font-size", "14px", true)]
|
|
394
|
+
*
|
|
395
|
+
* Handles:
|
|
396
|
+
* - Multiple declarations separated by semicolons
|
|
397
|
+
* - Values containing parentheses (e.g., url(...), rgba(...))
|
|
398
|
+
* - !important flag
|
|
399
|
+
*/
|
|
400
|
+
static VALUE parse_declarations(const char *start, const char *end) {
|
|
401
|
+
VALUE declarations = rb_ary_new();
|
|
402
|
+
|
|
403
|
+
const char *pos = start;
|
|
404
|
+
while (pos < end) {
|
|
405
|
+
// Skip whitespace and semicolons
|
|
406
|
+
while (pos < end && (IS_WHITESPACE(*pos) || *pos == ';')) {
|
|
407
|
+
pos++;
|
|
408
|
+
}
|
|
409
|
+
if (pos >= end) break;
|
|
410
|
+
|
|
411
|
+
// Find property (up to colon)
|
|
412
|
+
// Example: "color: red; ..."
|
|
413
|
+
// ^pos ^pos (at :)
|
|
414
|
+
const char *prop_start = pos;
|
|
415
|
+
while (pos < end && *pos != ':') pos++;
|
|
416
|
+
if (pos >= end) break; // No colon found
|
|
417
|
+
|
|
418
|
+
const char *prop_end = pos;
|
|
419
|
+
// Trim whitespace from property
|
|
420
|
+
trim_trailing(prop_start, &prop_end);
|
|
421
|
+
trim_leading(&prop_start, prop_end);
|
|
422
|
+
|
|
423
|
+
pos++; // Skip colon
|
|
424
|
+
|
|
425
|
+
// Skip whitespace after colon
|
|
426
|
+
while (pos < end && IS_WHITESPACE(*pos)) {
|
|
427
|
+
pos++;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// Find value (up to semicolon or end)
|
|
431
|
+
// Must track paren depth to avoid breaking on semicolons inside url() or rgba()
|
|
432
|
+
// Example: "url(data:image/svg+xml;base64,...); next-prop: ..."
|
|
433
|
+
// ^val_start ^pos (at ; outside parens)
|
|
434
|
+
const char *val_start = pos;
|
|
435
|
+
int paren_depth = 0;
|
|
436
|
+
while (pos < end) {
|
|
437
|
+
if (*pos == '(') { // At: '('
|
|
438
|
+
paren_depth++; // Depth: 1
|
|
439
|
+
} else if (*pos == ')') { // At: ')'
|
|
440
|
+
paren_depth--; // Depth: 0
|
|
441
|
+
} else if (*pos == ';' && paren_depth == 0) { // At: ';' (outside parens)
|
|
442
|
+
break; // Found terminating semicolon
|
|
443
|
+
}
|
|
444
|
+
pos++;
|
|
445
|
+
}
|
|
446
|
+
const char *val_end = pos;
|
|
447
|
+
|
|
448
|
+
// Trim trailing whitespace from value
|
|
449
|
+
trim_trailing(val_start, &val_end);
|
|
450
|
+
|
|
451
|
+
// Check for !important
|
|
452
|
+
int is_important = 0;
|
|
453
|
+
if (val_end - val_start >= 10) { // strlen("!important") = 10
|
|
454
|
+
const char *check = val_end - 10;
|
|
455
|
+
while (check < val_end && IS_WHITESPACE(*check)) check++;
|
|
456
|
+
if (check < val_end && *check == '!') {
|
|
457
|
+
check++;
|
|
458
|
+
while (check < val_end && IS_WHITESPACE(*check)) check++;
|
|
459
|
+
// strncmp safely handles remaining length check
|
|
460
|
+
if (check + 9 <= val_end && strncmp(check, "important", 9) == 0) {
|
|
461
|
+
is_important = 1;
|
|
462
|
+
const char *important_pos = check - 1;
|
|
463
|
+
while (important_pos > val_start && (IS_WHITESPACE(*(important_pos-1)) || *(important_pos-1) == '!')) {
|
|
464
|
+
important_pos--;
|
|
465
|
+
}
|
|
466
|
+
val_end = important_pos;
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
// Final trim
|
|
472
|
+
trim_trailing(val_start, &val_end);
|
|
473
|
+
|
|
474
|
+
// Skip if value is empty
|
|
475
|
+
if (val_end > val_start) {
|
|
476
|
+
long prop_len = prop_end - prop_start;
|
|
477
|
+
long val_len = val_end - val_start;
|
|
478
|
+
|
|
479
|
+
// Check property name length
|
|
480
|
+
if (prop_len > MAX_PROPERTY_NAME_LENGTH) {
|
|
481
|
+
rb_raise(eSizeError,
|
|
482
|
+
"Property name too long: %ld bytes (max %d)",
|
|
483
|
+
prop_len, MAX_PROPERTY_NAME_LENGTH);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// Check property value length
|
|
487
|
+
if (val_len > MAX_PROPERTY_VALUE_LENGTH) {
|
|
488
|
+
rb_raise(eSizeError,
|
|
489
|
+
"Property value too long: %ld bytes (max %d)",
|
|
490
|
+
val_len, MAX_PROPERTY_VALUE_LENGTH);
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
// Create property string and lowercase it
|
|
494
|
+
VALUE property_raw = rb_usascii_str_new(prop_start, prop_len);
|
|
495
|
+
VALUE property = lowercase_property(property_raw);
|
|
496
|
+
VALUE value = rb_utf8_str_new(val_start, val_len);
|
|
497
|
+
|
|
498
|
+
// Create Declaration struct
|
|
499
|
+
VALUE decl = rb_struct_new(cDeclaration,
|
|
500
|
+
property,
|
|
501
|
+
value,
|
|
502
|
+
is_important ? Qtrue : Qfalse
|
|
503
|
+
);
|
|
504
|
+
|
|
505
|
+
rb_ary_push(declarations, decl);
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
if (pos < end && *pos == ';') pos++; // Skip semicolon if present
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
return declarations;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// Forward declarations
|
|
515
|
+
static void parse_css_recursive(ParserContext *ctx, const char *css, const char *pe,
|
|
516
|
+
VALUE parent_media_sym, VALUE parent_selector, VALUE parent_rule_id);
|
|
517
|
+
static VALUE combine_media_queries(VALUE parent, VALUE child);
|
|
518
|
+
|
|
519
|
+
/*
|
|
520
|
+
* Combine parent and child media queries
|
|
521
|
+
* Examples:
|
|
522
|
+
* parent="screen", child="min-width: 500px" => "screen and (min-width: 500px)"
|
|
523
|
+
* parent=nil, child="print" => "print"
|
|
524
|
+
* Note: child may have had outer parens stripped, so we re-add them for conditions
|
|
525
|
+
*/
|
|
526
|
+
static VALUE combine_media_queries(VALUE parent, VALUE child) {
|
|
527
|
+
if (NIL_P(parent)) {
|
|
528
|
+
return child;
|
|
529
|
+
}
|
|
530
|
+
if (NIL_P(child)) {
|
|
531
|
+
return parent;
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// Combine: "parent and child"
|
|
535
|
+
VALUE parent_str = rb_sym2str(parent);
|
|
536
|
+
VALUE child_str = rb_sym2str(child);
|
|
537
|
+
|
|
538
|
+
VALUE combined = rb_str_dup(parent_str);
|
|
539
|
+
rb_str_cat2(combined, " and ");
|
|
540
|
+
|
|
541
|
+
// If child is a condition (contains ':'), wrap it in parentheses
|
|
542
|
+
// Example: "min-width: 500px" => "(min-width: 500px)"
|
|
543
|
+
const char *child_ptr = RSTRING_PTR(child_str);
|
|
544
|
+
long child_len = RSTRING_LEN(child_str);
|
|
545
|
+
int has_colon = 0;
|
|
546
|
+
int already_wrapped = (child_len >= 2 && child_ptr[0] == '(' && child_ptr[child_len - 1] == ')');
|
|
547
|
+
|
|
548
|
+
for (long i = 0; i < child_len && !has_colon; i++) {
|
|
549
|
+
if (child_ptr[i] == ':') {
|
|
550
|
+
has_colon = 1;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
if (has_colon && !already_wrapped) {
|
|
555
|
+
rb_str_cat2(combined, "(");
|
|
556
|
+
rb_str_append(combined, child_str);
|
|
557
|
+
rb_str_cat2(combined, ")");
|
|
558
|
+
} else {
|
|
559
|
+
rb_str_append(combined, child_str);
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
return ID2SYM(rb_intern_str(combined));
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
/*
|
|
566
|
+
* Intern media query string to symbol with safety check
|
|
567
|
+
* Strips outer parentheses from standalone conditions like "(orientation: landscape)"
|
|
568
|
+
*/
|
|
569
|
+
static VALUE intern_media_query_safe(ParserContext *ctx, const char *query_str, long query_len) {
|
|
570
|
+
if (query_len == 0) {
|
|
571
|
+
return Qnil;
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
// Safety check
|
|
575
|
+
if (ctx->media_query_count >= MAX_MEDIA_QUERIES) {
|
|
576
|
+
rb_raise(eSizeError,
|
|
577
|
+
"Exceeded maximum unique media queries (%d)",
|
|
578
|
+
MAX_MEDIA_QUERIES);
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// Strip outer parentheses from standalone conditions
|
|
582
|
+
// Example: "(orientation: landscape)" => "orientation: landscape"
|
|
583
|
+
// But keep: "screen and (min-width: 500px)" as-is
|
|
584
|
+
const char *start = query_str;
|
|
585
|
+
const char *end = query_str + query_len;
|
|
586
|
+
|
|
587
|
+
// Trim whitespace
|
|
588
|
+
while (start < end && IS_WHITESPACE(*start)) start++;
|
|
589
|
+
while (end > start && IS_WHITESPACE(*(end - 1))) end--;
|
|
590
|
+
|
|
591
|
+
if (end > start && *start == '(' && *(end - 1) == ')') {
|
|
592
|
+
// Check if this is a simple wrapped condition (no other parens/operators)
|
|
593
|
+
int depth = 0;
|
|
594
|
+
int has_and_or = 0;
|
|
595
|
+
for (const char *p = start; p < end; p++) {
|
|
596
|
+
if (*p == '(') depth++;
|
|
597
|
+
else if (*p == ')') depth--;
|
|
598
|
+
// Check for "and" or "or" at depth 0 (outside our outer parens)
|
|
599
|
+
if (depth == 0 && p + 3 < end &&
|
|
600
|
+
(strncmp(p, " and ", 5) == 0 || strncmp(p, " or ", 4) == 0)) {
|
|
601
|
+
has_and_or = 1;
|
|
602
|
+
break;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
// Strip outer parens if depth stays >= 1 (no operators outside) and no and/or
|
|
607
|
+
if (!has_and_or && depth == 0) {
|
|
608
|
+
start++; // Skip opening (
|
|
609
|
+
end--; // Skip closing )
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
long final_len = end - start;
|
|
614
|
+
VALUE query_string = rb_usascii_str_new(start, final_len);
|
|
615
|
+
VALUE sym = ID2SYM(rb_intern_str(query_string));
|
|
616
|
+
ctx->media_query_count++;
|
|
617
|
+
|
|
618
|
+
return sym;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
/*
|
|
622
|
+
* Parse mixed declarations and nested selectors from a block
|
|
623
|
+
* Used when a CSS rule block contains both declarations and nested rules
|
|
624
|
+
*
|
|
625
|
+
* Example CSS block being parsed:
|
|
626
|
+
* .parent {
|
|
627
|
+
* color: red; <- declaration
|
|
628
|
+
* & .child { <- nested selector
|
|
629
|
+
* font-size: 14px;
|
|
630
|
+
* }
|
|
631
|
+
* @media (min-width: 768px) { <- nested @media
|
|
632
|
+
* padding: 10px;
|
|
633
|
+
* }
|
|
634
|
+
* }
|
|
635
|
+
*
|
|
636
|
+
* Returns: Array of declarations (only the declarations, not nested rules)
|
|
637
|
+
*/
|
|
638
|
+
static VALUE parse_mixed_block(ParserContext *ctx, const char *start, const char *end,
|
|
639
|
+
VALUE parent_selector, VALUE parent_rule_id, VALUE parent_media_sym) {
|
|
640
|
+
// Check recursion depth to prevent stack overflow
|
|
641
|
+
if (ctx->depth > MAX_PARSE_DEPTH) {
|
|
642
|
+
rb_raise(eDepthError,
|
|
643
|
+
"CSS nesting too deep: exceeded maximum depth of %d",
|
|
644
|
+
MAX_PARSE_DEPTH);
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
VALUE declarations = rb_ary_new();
|
|
648
|
+
const char *p = start;
|
|
649
|
+
|
|
650
|
+
while (p < end) {
|
|
651
|
+
trim_leading(&p, end);
|
|
652
|
+
if (p >= end) break;
|
|
653
|
+
|
|
654
|
+
SKIP_COMMENT(p, end);
|
|
655
|
+
|
|
656
|
+
// Check if this is a nested @media query
|
|
657
|
+
if (*p == '@' && p + 6 < end && strncmp(p, "@media", 6) == 0 &&
|
|
658
|
+
(p + 6 == end || IS_WHITESPACE(p[6]))) {
|
|
659
|
+
// Nested @media - parse with parent selector as context
|
|
660
|
+
const char *media_start = p + 6;
|
|
661
|
+
trim_leading(&media_start, end);
|
|
662
|
+
|
|
663
|
+
// Find opening brace
|
|
664
|
+
const char *media_query_end = media_start;
|
|
665
|
+
while (media_query_end < end && *media_query_end != '{') {
|
|
666
|
+
media_query_end++;
|
|
667
|
+
}
|
|
668
|
+
if (media_query_end >= end) break;
|
|
669
|
+
|
|
670
|
+
// Extract media query
|
|
671
|
+
const char *media_query_start = media_start;
|
|
672
|
+
const char *media_query_end_trimmed = media_query_end;
|
|
673
|
+
trim_trailing(media_query_start, &media_query_end_trimmed);
|
|
674
|
+
VALUE media_sym = intern_media_query_safe(ctx, media_query_start, media_query_end_trimmed - media_query_start);
|
|
675
|
+
|
|
676
|
+
p = media_query_end + 1; // Skip {
|
|
677
|
+
|
|
678
|
+
// Find matching closing brace
|
|
679
|
+
const char *media_block_start = p;
|
|
680
|
+
const char *media_block_end = find_matching_brace(p, end);
|
|
681
|
+
p = media_block_end;
|
|
682
|
+
|
|
683
|
+
if (p < end) p++; // Skip }
|
|
684
|
+
|
|
685
|
+
// Combine media queries: parent + child
|
|
686
|
+
VALUE combined_media_sym = combine_media_queries(parent_media_sym, media_sym);
|
|
687
|
+
|
|
688
|
+
// Parse the block with parse_mixed_block to support further nesting
|
|
689
|
+
// Create a rule ID for this media rule
|
|
690
|
+
int media_rule_id = ctx->rule_id_counter++;
|
|
691
|
+
|
|
692
|
+
// Reserve position for parent rule
|
|
693
|
+
long parent_pos = RARRAY_LEN(ctx->rules_array);
|
|
694
|
+
rb_ary_push(ctx->rules_array, Qnil);
|
|
695
|
+
|
|
696
|
+
// Parse mixed block (may contain declarations and/or nested @media)
|
|
697
|
+
ctx->depth++;
|
|
698
|
+
VALUE media_declarations = parse_mixed_block(ctx, media_block_start, media_block_end,
|
|
699
|
+
parent_selector, INT2FIX(media_rule_id), combined_media_sym);
|
|
700
|
+
ctx->depth--;
|
|
701
|
+
|
|
702
|
+
// Create rule with the parent selector and declarations, associated with combined media query
|
|
703
|
+
VALUE rule = rb_struct_new(cRule,
|
|
704
|
+
INT2FIX(media_rule_id),
|
|
705
|
+
parent_selector,
|
|
706
|
+
media_declarations,
|
|
707
|
+
Qnil, // specificity
|
|
708
|
+
parent_rule_id, // Link to parent for nested @media serialization
|
|
709
|
+
Qnil // nesting_style (nil for @media nesting)
|
|
710
|
+
);
|
|
711
|
+
|
|
712
|
+
// Mark that we have nesting (only set once)
|
|
713
|
+
if (!ctx->has_nesting && !NIL_P(parent_rule_id)) {
|
|
714
|
+
ctx->has_nesting = 1;
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
// Replace placeholder with actual rule
|
|
718
|
+
rb_ary_store(ctx->rules_array, parent_pos, rule);
|
|
719
|
+
update_media_index(ctx, combined_media_sym, media_rule_id);
|
|
720
|
+
|
|
721
|
+
continue;
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
// Check if this is a nested selector (starts with nesting indicators)
|
|
725
|
+
// Example within parse_mixed_block:
|
|
726
|
+
// Input block: "color: red; & .child { font: 14px; }"
|
|
727
|
+
// ^p (at &) - nested selector detected
|
|
728
|
+
char c = *p;
|
|
729
|
+
if (c == '&' || c == '.' || c == '#' || c == '[' || c == ':' ||
|
|
730
|
+
c == '*' || c == '>' || c == '+' || c == '~' || c == '@') {
|
|
731
|
+
// This is likely a nested selector - find the opening brace
|
|
732
|
+
// Example: "& .child { font: 14px; }"
|
|
733
|
+
// ^nested_sel_start ^p (at {)
|
|
734
|
+
const char *nested_sel_start = p;
|
|
735
|
+
while (p < end && *p != '{') p++;
|
|
736
|
+
if (p >= end) break;
|
|
737
|
+
|
|
738
|
+
const char *nested_sel_end = p;
|
|
739
|
+
trim_trailing(nested_sel_start, &nested_sel_end);
|
|
740
|
+
|
|
741
|
+
p++; // Skip {
|
|
742
|
+
|
|
743
|
+
// Find matching closing brace
|
|
744
|
+
// Example: "& .child { font: 14px; }"
|
|
745
|
+
// ^nested_block_start ^nested_block_end (at })
|
|
746
|
+
const char *nested_block_start = p;
|
|
747
|
+
const char *nested_block_end = find_matching_brace(p, end);
|
|
748
|
+
p = nested_block_end;
|
|
749
|
+
|
|
750
|
+
if (p < end) p++; // Skip }
|
|
751
|
+
|
|
752
|
+
// Split nested selector on commas and create a rule for each
|
|
753
|
+
// Example: "& .child, & .sibling { ... }" creates 2 nested rules
|
|
754
|
+
const char *seg_start = nested_sel_start;
|
|
755
|
+
const char *seg = nested_sel_start;
|
|
756
|
+
|
|
757
|
+
while (seg <= nested_sel_end) {
|
|
758
|
+
if (seg == nested_sel_end || *seg == ',') { // At: ',' or end
|
|
759
|
+
// Trim segment
|
|
760
|
+
while (seg_start < seg && IS_WHITESPACE(*seg_start)) {
|
|
761
|
+
seg_start++;
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
const char *seg_end_ptr = seg;
|
|
765
|
+
while (seg_end_ptr > seg_start && IS_WHITESPACE(*(seg_end_ptr - 1))) {
|
|
766
|
+
seg_end_ptr--;
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
if (seg_end_ptr > seg_start) {
|
|
770
|
+
// Resolve nested selector
|
|
771
|
+
VALUE result = resolve_nested_selector(parent_selector, seg_start, seg_end_ptr - seg_start);
|
|
772
|
+
VALUE resolved_selector = rb_ary_entry(result, 0);
|
|
773
|
+
VALUE nesting_style = rb_ary_entry(result, 1);
|
|
774
|
+
|
|
775
|
+
// Get rule ID
|
|
776
|
+
int rule_id = ctx->rule_id_counter++;
|
|
777
|
+
|
|
778
|
+
// Recursively parse nested block
|
|
779
|
+
ctx->depth++;
|
|
780
|
+
VALUE nested_declarations = parse_mixed_block(ctx, nested_block_start, nested_block_end,
|
|
781
|
+
resolved_selector, INT2FIX(rule_id), parent_media_sym);
|
|
782
|
+
ctx->depth--;
|
|
783
|
+
|
|
784
|
+
// Create rule for nested selector
|
|
785
|
+
VALUE rule = rb_struct_new(cRule,
|
|
786
|
+
INT2FIX(rule_id),
|
|
787
|
+
resolved_selector,
|
|
788
|
+
nested_declarations,
|
|
789
|
+
Qnil, // specificity
|
|
790
|
+
parent_rule_id,
|
|
791
|
+
nesting_style
|
|
792
|
+
);
|
|
793
|
+
|
|
794
|
+
// Mark that we have nesting (only set once)
|
|
795
|
+
if (!ctx->has_nesting && !NIL_P(parent_rule_id)) {
|
|
796
|
+
ctx->has_nesting = 1;
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
rb_ary_push(ctx->rules_array, rule);
|
|
800
|
+
update_media_index(ctx, parent_media_sym, rule_id);
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
seg_start = seg + 1;
|
|
804
|
+
}
|
|
805
|
+
seg++;
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
continue;
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
// This is a declaration - parse it
|
|
812
|
+
const char *prop_start = p;
|
|
813
|
+
while (p < end && *p != ':' && *p != ';' && *p != '{') p++;
|
|
814
|
+
if (p >= end || *p != ':') {
|
|
815
|
+
// Malformed - skip to semicolon
|
|
816
|
+
while (p < end && *p != ';') p++;
|
|
817
|
+
if (p < end) p++;
|
|
818
|
+
continue;
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
const char *prop_end = p;
|
|
822
|
+
trim_trailing(prop_start, &prop_end);
|
|
823
|
+
|
|
824
|
+
p++; // Skip :
|
|
825
|
+
trim_leading(&p, end);
|
|
826
|
+
|
|
827
|
+
const char *val_start = p;
|
|
828
|
+
int important = 0;
|
|
829
|
+
|
|
830
|
+
// Find end of value (semicolon or closing brace or end)
|
|
831
|
+
while (p < end && *p != ';' && *p != '}') p++;
|
|
832
|
+
const char *val_end = p;
|
|
833
|
+
|
|
834
|
+
// Check for !important
|
|
835
|
+
const char *important_check = val_end - 10; // " !important"
|
|
836
|
+
if (important_check >= val_start) {
|
|
837
|
+
trim_trailing(val_start, &val_end);
|
|
838
|
+
if (val_end - val_start >= 10) {
|
|
839
|
+
if (strncmp(val_end - 10, "!important", 10) == 0) {
|
|
840
|
+
important = 1;
|
|
841
|
+
val_end -= 10;
|
|
842
|
+
trim_trailing(val_start, &val_end);
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
} else {
|
|
846
|
+
trim_trailing(val_start, &val_end);
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
if (p < end && *p == ';') p++;
|
|
850
|
+
|
|
851
|
+
// Create declaration
|
|
852
|
+
if (prop_end > prop_start && val_end > val_start) {
|
|
853
|
+
long prop_len = prop_end - prop_start;
|
|
854
|
+
long val_len = val_end - val_start;
|
|
855
|
+
|
|
856
|
+
// Check property name length
|
|
857
|
+
if (prop_len > MAX_PROPERTY_NAME_LENGTH) {
|
|
858
|
+
rb_raise(eSizeError,
|
|
859
|
+
"Property name too long: %ld bytes (max %d)",
|
|
860
|
+
prop_len, MAX_PROPERTY_NAME_LENGTH);
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
// Check property value length
|
|
864
|
+
if (val_len > MAX_PROPERTY_VALUE_LENGTH) {
|
|
865
|
+
rb_raise(eSizeError,
|
|
866
|
+
"Property value too long: %ld bytes (max %d)",
|
|
867
|
+
val_len, MAX_PROPERTY_VALUE_LENGTH);
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
VALUE property_raw = rb_usascii_str_new(prop_start, prop_len);
|
|
871
|
+
VALUE property = lowercase_property(property_raw);
|
|
872
|
+
VALUE value = rb_utf8_str_new(val_start, val_len);
|
|
873
|
+
|
|
874
|
+
VALUE decl = rb_struct_new(cDeclaration,
|
|
875
|
+
property,
|
|
876
|
+
value,
|
|
877
|
+
important ? Qtrue : Qfalse
|
|
878
|
+
);
|
|
879
|
+
|
|
880
|
+
rb_ary_push(declarations, decl);
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
return declarations;
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
/*
|
|
888
|
+
* Parse CSS recursively with media query context and optional parent selector for nesting
|
|
889
|
+
*
|
|
890
|
+
* parent_media_sym: Parent media query symbol (or Qnil for no media context)
|
|
891
|
+
* parent_selector: Parent selector string for nested rules (or Qnil for top-level)
|
|
892
|
+
* parent_rule_id: Parent rule ID (Fixnum) for nested rules (or Qnil for top-level)
|
|
893
|
+
*/
|
|
894
|
+
static void parse_css_recursive(ParserContext *ctx, const char *css, const char *pe,
|
|
895
|
+
VALUE parent_media_sym, VALUE parent_selector, VALUE parent_rule_id) {
|
|
896
|
+
// Check recursion depth to prevent stack overflow
|
|
897
|
+
if (ctx->depth > MAX_PARSE_DEPTH) {
|
|
898
|
+
rb_raise(eDepthError,
|
|
899
|
+
"CSS nesting too deep: exceeded maximum depth of %d",
|
|
900
|
+
MAX_PARSE_DEPTH);
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
const char *p = css;
|
|
904
|
+
|
|
905
|
+
const char *selector_start = NULL;
|
|
906
|
+
const char *decl_start = NULL;
|
|
907
|
+
int brace_depth = 0;
|
|
908
|
+
|
|
909
|
+
while (p < pe) {
|
|
910
|
+
// Skip whitespace
|
|
911
|
+
while (p < pe && IS_WHITESPACE(*p)) p++;
|
|
912
|
+
if (p >= pe) break;
|
|
913
|
+
|
|
914
|
+
// Skip comments (rare in typical CSS)
|
|
915
|
+
SKIP_COMMENT(p, pe);
|
|
916
|
+
|
|
917
|
+
// Check for @media at-rule (only at depth 0)
|
|
918
|
+
if (RB_UNLIKELY(brace_depth == 0 && p + 6 < pe && *p == '@' &&
|
|
919
|
+
strncmp(p + 1, "media", 5) == 0 && IS_WHITESPACE(p[6]))) {
|
|
920
|
+
p += 6; // Skip "@media"
|
|
921
|
+
|
|
922
|
+
// Skip whitespace
|
|
923
|
+
while (p < pe && IS_WHITESPACE(*p)) p++;
|
|
924
|
+
|
|
925
|
+
// Find media query (up to opening brace)
|
|
926
|
+
const char *mq_start = p;
|
|
927
|
+
while (p < pe && *p != '{') p++;
|
|
928
|
+
const char *mq_end = p;
|
|
929
|
+
|
|
930
|
+
// Trim
|
|
931
|
+
trim_trailing(mq_start, &mq_end);
|
|
932
|
+
|
|
933
|
+
if (p >= pe || *p != '{') {
|
|
934
|
+
continue; // Malformed
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
// Intern media query
|
|
938
|
+
VALUE child_media_sym = intern_media_query_safe(ctx, mq_start, mq_end - mq_start);
|
|
939
|
+
|
|
940
|
+
// Combine with parent
|
|
941
|
+
VALUE combined_media_sym = combine_media_queries(parent_media_sym, child_media_sym);
|
|
942
|
+
|
|
943
|
+
p++; // Skip opening {
|
|
944
|
+
|
|
945
|
+
// Find matching closing brace
|
|
946
|
+
const char *block_start = p;
|
|
947
|
+
const char *block_end = find_matching_brace(p, pe);
|
|
948
|
+
p = block_end;
|
|
949
|
+
|
|
950
|
+
// Recursively parse @media block with combined media context
|
|
951
|
+
ctx->depth++;
|
|
952
|
+
parse_css_recursive(ctx, block_start, block_end, combined_media_sym, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
|
|
953
|
+
ctx->depth--;
|
|
954
|
+
|
|
955
|
+
if (p < pe && *p == '}') p++;
|
|
956
|
+
continue;
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
// Check for conditional group at-rules: @supports, @layer, @container, @scope
|
|
960
|
+
// AND nested block at-rules: @keyframes, @font-face, @page
|
|
961
|
+
// These behave like @media but don't affect media context
|
|
962
|
+
if (RB_UNLIKELY(brace_depth == 0 && *p == '@')) {
|
|
963
|
+
const char *at_start = p + 1;
|
|
964
|
+
const char *at_name_end = at_start;
|
|
965
|
+
|
|
966
|
+
// Find end of at-rule name (stop at whitespace or opening brace)
|
|
967
|
+
while (at_name_end < pe && !IS_WHITESPACE(*at_name_end) && *at_name_end != '{') {
|
|
968
|
+
at_name_end++;
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
long at_name_len = at_name_end - at_start;
|
|
972
|
+
|
|
973
|
+
// Check if this is a conditional group rule
|
|
974
|
+
int is_conditional_group =
|
|
975
|
+
(at_name_len == 8 && strncmp(at_start, "supports", 8) == 0) ||
|
|
976
|
+
(at_name_len == 5 && strncmp(at_start, "layer", 5) == 0) ||
|
|
977
|
+
(at_name_len == 9 && strncmp(at_start, "container", 9) == 0) ||
|
|
978
|
+
(at_name_len == 5 && strncmp(at_start, "scope", 5) == 0);
|
|
979
|
+
|
|
980
|
+
if (is_conditional_group) {
|
|
981
|
+
// Skip to opening brace
|
|
982
|
+
p = at_name_end;
|
|
983
|
+
while (p < pe && *p != '{') p++;
|
|
984
|
+
|
|
985
|
+
if (p >= pe || *p != '{') {
|
|
986
|
+
continue; // Malformed
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
p++; // Skip opening {
|
|
990
|
+
|
|
991
|
+
// Find matching closing brace
|
|
992
|
+
const char *block_start = p;
|
|
993
|
+
const char *block_end = find_matching_brace(p, pe);
|
|
994
|
+
p = block_end;
|
|
995
|
+
|
|
996
|
+
// Recursively parse block content (preserve parent media context)
|
|
997
|
+
ctx->depth++;
|
|
998
|
+
parse_css_recursive(ctx, block_start, block_end, parent_media_sym, parent_selector, parent_rule_id);
|
|
999
|
+
ctx->depth--;
|
|
1000
|
+
|
|
1001
|
+
if (p < pe && *p == '}') p++;
|
|
1002
|
+
continue;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
// Check for @keyframes (contains <rule-list>)
|
|
1006
|
+
// TODO: Test perf gains by using RB_UNLIKELY(is_keyframes) wrapper
|
|
1007
|
+
int is_keyframes =
|
|
1008
|
+
(at_name_len == 9 && strncmp(at_start, "keyframes", 9) == 0) ||
|
|
1009
|
+
(at_name_len == 17 && strncmp(at_start, "-webkit-keyframes", 17) == 0) ||
|
|
1010
|
+
(at_name_len == 13 && strncmp(at_start, "-moz-keyframes", 13) == 0);
|
|
1011
|
+
|
|
1012
|
+
if (is_keyframes) {
|
|
1013
|
+
// Build full selector string: "@keyframes fade"
|
|
1014
|
+
const char *selector_start = p; // Points to '@'
|
|
1015
|
+
p = at_name_end;
|
|
1016
|
+
while (p < pe && *p != '{') p++;
|
|
1017
|
+
|
|
1018
|
+
if (p >= pe || *p != '{') {
|
|
1019
|
+
continue; // Malformed
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
const char *selector_end = p;
|
|
1023
|
+
while (selector_end > selector_start && IS_WHITESPACE(*(selector_end - 1))) {
|
|
1024
|
+
selector_end--;
|
|
1025
|
+
}
|
|
1026
|
+
VALUE selector = rb_utf8_str_new(selector_start, selector_end - selector_start);
|
|
1027
|
+
|
|
1028
|
+
p++; // Skip opening {
|
|
1029
|
+
|
|
1030
|
+
// Find matching closing brace
|
|
1031
|
+
const char *block_start = p;
|
|
1032
|
+
const char *block_end = find_matching_brace(p, pe);
|
|
1033
|
+
p = block_end;
|
|
1034
|
+
|
|
1035
|
+
// Parse keyframe blocks as rules (from/to/0%/50% etc)
|
|
1036
|
+
ParserContext nested_ctx = {
|
|
1037
|
+
.rules_array = rb_ary_new(),
|
|
1038
|
+
.media_index = rb_hash_new(),
|
|
1039
|
+
.rule_id_counter = 0,
|
|
1040
|
+
.media_query_count = 0,
|
|
1041
|
+
.media_cache = NULL,
|
|
1042
|
+
.has_nesting = 0,
|
|
1043
|
+
.depth = 0
|
|
1044
|
+
};
|
|
1045
|
+
parse_css_recursive(&nested_ctx, block_start, block_end, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
|
|
1046
|
+
|
|
1047
|
+
// Get rule ID and increment
|
|
1048
|
+
int rule_id = ctx->rule_id_counter++;
|
|
1049
|
+
|
|
1050
|
+
// Create AtRule with nested rules
|
|
1051
|
+
VALUE at_rule = rb_struct_new(cAtRule,
|
|
1052
|
+
INT2FIX(rule_id),
|
|
1053
|
+
selector,
|
|
1054
|
+
nested_ctx.rules_array, // Array of Rule (keyframe blocks)
|
|
1055
|
+
Qnil);
|
|
1056
|
+
|
|
1057
|
+
// Add to rules array
|
|
1058
|
+
rb_ary_push(ctx->rules_array, at_rule);
|
|
1059
|
+
|
|
1060
|
+
// Add to media index if in media query
|
|
1061
|
+
if (!NIL_P(parent_media_sym)) {
|
|
1062
|
+
VALUE rule_ids = rb_hash_aref(ctx->media_index, parent_media_sym);
|
|
1063
|
+
if (NIL_P(rule_ids)) {
|
|
1064
|
+
rule_ids = rb_ary_new();
|
|
1065
|
+
rb_hash_aset(ctx->media_index, parent_media_sym, rule_ids);
|
|
1066
|
+
}
|
|
1067
|
+
rb_ary_push(rule_ids, INT2FIX(rule_id));
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
if (p < pe && *p == '}') p++;
|
|
1071
|
+
continue;
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
// Check for @font-face (contains <declaration-list>)
|
|
1075
|
+
int is_font_face = (at_name_len == 9 && strncmp(at_start, "font-face", 9) == 0);
|
|
1076
|
+
|
|
1077
|
+
if (is_font_face) {
|
|
1078
|
+
// Build selector string: "@font-face"
|
|
1079
|
+
const char *selector_start = p; // Points to '@'
|
|
1080
|
+
p = at_name_end;
|
|
1081
|
+
while (p < pe && *p != '{') p++;
|
|
1082
|
+
|
|
1083
|
+
if (p >= pe || *p != '{') {
|
|
1084
|
+
continue; // Malformed
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
const char *selector_end = p;
|
|
1088
|
+
while (selector_end > selector_start && IS_WHITESPACE(*(selector_end - 1))) {
|
|
1089
|
+
selector_end--;
|
|
1090
|
+
}
|
|
1091
|
+
VALUE selector = rb_utf8_str_new(selector_start, selector_end - selector_start);
|
|
1092
|
+
|
|
1093
|
+
p++; // Skip opening {
|
|
1094
|
+
|
|
1095
|
+
// Find matching closing brace
|
|
1096
|
+
const char *decl_start = p;
|
|
1097
|
+
const char *decl_end = find_matching_brace(p, pe);
|
|
1098
|
+
p = decl_end;
|
|
1099
|
+
|
|
1100
|
+
// Parse declarations
|
|
1101
|
+
VALUE declarations = parse_declarations(decl_start, decl_end);
|
|
1102
|
+
|
|
1103
|
+
// Get rule ID and increment
|
|
1104
|
+
int rule_id = ctx->rule_id_counter++;
|
|
1105
|
+
|
|
1106
|
+
// Create AtRule with declarations
|
|
1107
|
+
VALUE at_rule = rb_struct_new(cAtRule,
|
|
1108
|
+
INT2FIX(rule_id),
|
|
1109
|
+
selector,
|
|
1110
|
+
declarations, // Array of Declaration
|
|
1111
|
+
Qnil);
|
|
1112
|
+
|
|
1113
|
+
// Add to rules array
|
|
1114
|
+
rb_ary_push(ctx->rules_array, at_rule);
|
|
1115
|
+
|
|
1116
|
+
// Add to media index if in media query
|
|
1117
|
+
if (!NIL_P(parent_media_sym)) {
|
|
1118
|
+
VALUE rule_ids = rb_hash_aref(ctx->media_index, parent_media_sym);
|
|
1119
|
+
if (NIL_P(rule_ids)) {
|
|
1120
|
+
rule_ids = rb_ary_new();
|
|
1121
|
+
rb_hash_aset(ctx->media_index, parent_media_sym, rule_ids);
|
|
1122
|
+
}
|
|
1123
|
+
rb_ary_push(rule_ids, INT2FIX(rule_id));
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
if (p < pe && *p == '}') p++;
|
|
1127
|
+
continue;
|
|
1128
|
+
}
|
|
1129
|
+
}
|
|
1130
|
+
|
|
1131
|
+
// Opening brace
|
|
1132
|
+
if (*p == '{') {
|
|
1133
|
+
if (brace_depth == 0 && selector_start != NULL) {
|
|
1134
|
+
decl_start = p + 1;
|
|
1135
|
+
}
|
|
1136
|
+
brace_depth++;
|
|
1137
|
+
p++;
|
|
1138
|
+
continue;
|
|
1139
|
+
}
|
|
1140
|
+
|
|
1141
|
+
// Closing brace
|
|
1142
|
+
if (*p == '}') {
|
|
1143
|
+
brace_depth--;
|
|
1144
|
+
if (brace_depth == 0 && selector_start != NULL && decl_start != NULL) {
|
|
1145
|
+
// We've found a complete CSS rule block - now determine if it has nesting
|
|
1146
|
+
// Example: .parent { color: red; & .child { font-size: 14px; } }
|
|
1147
|
+
// ^selector_start ^decl_start ^p (at })
|
|
1148
|
+
int has_nesting = has_nested_selectors(decl_start, p);
|
|
1149
|
+
|
|
1150
|
+
// Get selector string
|
|
1151
|
+
const char *sel_end = decl_start - 1;
|
|
1152
|
+
while (sel_end > selector_start && IS_WHITESPACE(*(sel_end - 1))) {
|
|
1153
|
+
sel_end--;
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
if (!has_nesting) {
|
|
1157
|
+
// FAST PATH: No nesting - parse as pure declarations
|
|
1158
|
+
VALUE declarations = parse_declarations(decl_start, p);
|
|
1159
|
+
|
|
1160
|
+
// Split on commas to handle multi-selector rules
|
|
1161
|
+
// Example: ".a, .b, .c { color: red; }" creates 3 separate rules
|
|
1162
|
+
// ^selector_start ^sel_end
|
|
1163
|
+
// ^seg_start=seg (scanning for commas)
|
|
1164
|
+
const char *seg_start = selector_start;
|
|
1165
|
+
const char *seg = selector_start;
|
|
1166
|
+
|
|
1167
|
+
while (seg <= sel_end) {
|
|
1168
|
+
if (seg == sel_end || *seg == ',') { // At: ',' or end
|
|
1169
|
+
// Trim segment
|
|
1170
|
+
while (seg_start < seg && IS_WHITESPACE(*seg_start)) {
|
|
1171
|
+
seg_start++;
|
|
1172
|
+
}
|
|
1173
|
+
|
|
1174
|
+
const char *seg_end_ptr = seg;
|
|
1175
|
+
while (seg_end_ptr > seg_start && IS_WHITESPACE(*(seg_end_ptr - 1))) {
|
|
1176
|
+
seg_end_ptr--;
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
if (seg_end_ptr > seg_start) {
|
|
1180
|
+
VALUE selector = rb_utf8_str_new(seg_start, seg_end_ptr - seg_start);
|
|
1181
|
+
|
|
1182
|
+
// Resolve against parent if nested
|
|
1183
|
+
VALUE resolved_selector;
|
|
1184
|
+
VALUE nesting_style_val;
|
|
1185
|
+
VALUE parent_id_val;
|
|
1186
|
+
|
|
1187
|
+
if (!NIL_P(parent_selector)) {
|
|
1188
|
+
// This is a nested rule - resolve selector
|
|
1189
|
+
VALUE result = resolve_nested_selector(parent_selector, RSTRING_PTR(selector), RSTRING_LEN(selector));
|
|
1190
|
+
resolved_selector = rb_ary_entry(result, 0);
|
|
1191
|
+
nesting_style_val = rb_ary_entry(result, 1);
|
|
1192
|
+
parent_id_val = parent_rule_id;
|
|
1193
|
+
} else {
|
|
1194
|
+
// Top-level rule
|
|
1195
|
+
resolved_selector = selector;
|
|
1196
|
+
nesting_style_val = Qnil;
|
|
1197
|
+
parent_id_val = Qnil;
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
// Get rule ID and increment
|
|
1201
|
+
int rule_id = ctx->rule_id_counter++;
|
|
1202
|
+
|
|
1203
|
+
// Create Rule
|
|
1204
|
+
VALUE rule = rb_struct_new(cRule,
|
|
1205
|
+
INT2FIX(rule_id),
|
|
1206
|
+
resolved_selector,
|
|
1207
|
+
rb_ary_dup(declarations),
|
|
1208
|
+
Qnil, // specificity
|
|
1209
|
+
parent_id_val,
|
|
1210
|
+
nesting_style_val
|
|
1211
|
+
);
|
|
1212
|
+
|
|
1213
|
+
// Mark that we have nesting (only set once)
|
|
1214
|
+
if (!ctx->has_nesting && !NIL_P(parent_id_val)) {
|
|
1215
|
+
ctx->has_nesting = 1;
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
rb_ary_push(ctx->rules_array, rule);
|
|
1219
|
+
|
|
1220
|
+
// Update media index
|
|
1221
|
+
update_media_index(ctx, parent_media_sym, rule_id);
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
seg_start = seg + 1;
|
|
1225
|
+
}
|
|
1226
|
+
seg++;
|
|
1227
|
+
}
|
|
1228
|
+
} else {
|
|
1229
|
+
// NESTED PATH: Parse mixed declarations + nested rules
|
|
1230
|
+
// For each comma-separated parent selector, parse the block with that parent
|
|
1231
|
+
//
|
|
1232
|
+
// Example: ".a, .b { color: red; & .child { font: 14px; } }"
|
|
1233
|
+
// ^selector_start ^sel_end
|
|
1234
|
+
// Creates:
|
|
1235
|
+
// - .a with declarations [color: red]
|
|
1236
|
+
// - .a .child with declarations [font: 14px]
|
|
1237
|
+
// - .b with declarations [color: red]
|
|
1238
|
+
// - .b .child with declarations [font: 14px]
|
|
1239
|
+
const char *seg_start = selector_start;
|
|
1240
|
+
const char *seg = selector_start;
|
|
1241
|
+
|
|
1242
|
+
while (seg <= sel_end) {
|
|
1243
|
+
if (seg == sel_end || *seg == ',') { // At: ',' or end
|
|
1244
|
+
// Trim segment
|
|
1245
|
+
while (seg_start < seg && IS_WHITESPACE(*seg_start)) {
|
|
1246
|
+
seg_start++;
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
const char *seg_end_ptr = seg;
|
|
1250
|
+
while (seg_end_ptr > seg_start && IS_WHITESPACE(*(seg_end_ptr - 1))) {
|
|
1251
|
+
seg_end_ptr--;
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
if (seg_end_ptr > seg_start) {
|
|
1255
|
+
VALUE current_selector = rb_utf8_str_new(seg_start, seg_end_ptr - seg_start);
|
|
1256
|
+
|
|
1257
|
+
// Resolve against parent if we're already nested
|
|
1258
|
+
VALUE resolved_current;
|
|
1259
|
+
VALUE current_nesting_style;
|
|
1260
|
+
VALUE current_parent_id;
|
|
1261
|
+
|
|
1262
|
+
if (!NIL_P(parent_selector)) {
|
|
1263
|
+
VALUE result = resolve_nested_selector(parent_selector, RSTRING_PTR(current_selector), RSTRING_LEN(current_selector));
|
|
1264
|
+
resolved_current = rb_ary_entry(result, 0);
|
|
1265
|
+
current_nesting_style = rb_ary_entry(result, 1);
|
|
1266
|
+
current_parent_id = parent_rule_id;
|
|
1267
|
+
} else {
|
|
1268
|
+
resolved_current = current_selector;
|
|
1269
|
+
current_nesting_style = Qnil;
|
|
1270
|
+
current_parent_id = Qnil;
|
|
1271
|
+
}
|
|
1272
|
+
|
|
1273
|
+
// Get rule ID for current selector (increment to reserve it)
|
|
1274
|
+
int current_rule_id = ctx->rule_id_counter++;
|
|
1275
|
+
|
|
1276
|
+
// Reserve parent's position in rules array with placeholder
|
|
1277
|
+
// This ensures parent comes before nested rules in array order (per W3C spec)
|
|
1278
|
+
long parent_position = RARRAY_LEN(ctx->rules_array);
|
|
1279
|
+
rb_ary_push(ctx->rules_array, Qnil);
|
|
1280
|
+
|
|
1281
|
+
// Parse mixed block (declarations + nested selectors)
|
|
1282
|
+
// Nested rules will be added AFTER the placeholder
|
|
1283
|
+
ctx->depth++;
|
|
1284
|
+
VALUE parent_declarations = parse_mixed_block(ctx, decl_start, p,
|
|
1285
|
+
resolved_current, INT2FIX(current_rule_id), parent_media_sym);
|
|
1286
|
+
ctx->depth--;
|
|
1287
|
+
|
|
1288
|
+
// Create parent rule and replace placeholder
|
|
1289
|
+
// Always create the rule (even if empty) to avoid edge cases
|
|
1290
|
+
VALUE rule = rb_struct_new(cRule,
|
|
1291
|
+
INT2FIX(current_rule_id),
|
|
1292
|
+
resolved_current,
|
|
1293
|
+
parent_declarations,
|
|
1294
|
+
Qnil, // specificity
|
|
1295
|
+
current_parent_id,
|
|
1296
|
+
current_nesting_style
|
|
1297
|
+
);
|
|
1298
|
+
|
|
1299
|
+
// Mark that we have nesting (only set once)
|
|
1300
|
+
if (!ctx->has_nesting && !NIL_P(current_parent_id)) {
|
|
1301
|
+
ctx->has_nesting = 1;
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
// Replace placeholder with actual rule - just pointer assignment, fast!
|
|
1305
|
+
rb_ary_store(ctx->rules_array, parent_position, rule);
|
|
1306
|
+
update_media_index(ctx, parent_media_sym, current_rule_id);
|
|
1307
|
+
}
|
|
1308
|
+
|
|
1309
|
+
seg_start = seg + 1;
|
|
1310
|
+
}
|
|
1311
|
+
seg++;
|
|
1312
|
+
}
|
|
1313
|
+
}
|
|
1314
|
+
|
|
1315
|
+
selector_start = NULL;
|
|
1316
|
+
decl_start = NULL;
|
|
1317
|
+
}
|
|
1318
|
+
p++;
|
|
1319
|
+
continue;
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
// Start of selector
|
|
1323
|
+
if (brace_depth == 0 && selector_start == NULL) {
|
|
1324
|
+
selector_start = p;
|
|
1325
|
+
}
|
|
1326
|
+
|
|
1327
|
+
p++;
|
|
1328
|
+
}
|
|
1329
|
+
}
|
|
1330
|
+
|
|
1331
|
+
/*
|
|
1332
|
+
* Parse media query string and extract media types (Ruby-facing function)
|
|
1333
|
+
* Example: "screen, print" => [:screen, :print]
|
|
1334
|
+
* Example: "screen and (min-width: 768px)" => [:screen]
|
|
1335
|
+
*
|
|
1336
|
+
* @param media_query_sym [Symbol] Media query as symbol
|
|
1337
|
+
* @return [Array<Symbol>] Array of media type symbols
|
|
1338
|
+
*/
|
|
1339
|
+
VALUE parse_media_types(VALUE self, VALUE media_query_sym) {
|
|
1340
|
+
Check_Type(media_query_sym, T_SYMBOL);
|
|
1341
|
+
|
|
1342
|
+
VALUE query_string = rb_sym2str(media_query_sym);
|
|
1343
|
+
const char *query_str = RSTRING_PTR(query_string);
|
|
1344
|
+
long query_len = RSTRING_LEN(query_string);
|
|
1345
|
+
|
|
1346
|
+
return extract_media_types(query_str, query_len);
|
|
1347
|
+
}
|
|
1348
|
+
|
|
1349
|
+
/*
|
|
1350
|
+
* Main parse entry point
|
|
1351
|
+
* Returns: { rules: [...], media_index: {...}, charset: "..." | nil, last_rule_id: N }
|
|
1352
|
+
*/
|
|
1353
|
+
VALUE parse_css_new_impl(VALUE css_string, int rule_id_offset) {
|
|
1354
|
+
Check_Type(css_string, T_STRING);
|
|
1355
|
+
|
|
1356
|
+
const char *css = RSTRING_PTR(css_string);
|
|
1357
|
+
const char *pe = css + RSTRING_LEN(css_string);
|
|
1358
|
+
const char *p = css;
|
|
1359
|
+
|
|
1360
|
+
VALUE charset = Qnil;
|
|
1361
|
+
|
|
1362
|
+
// Extract @charset
|
|
1363
|
+
if (RSTRING_LEN(css_string) > 10 && strncmp(css, "@charset ", 9) == 0) {
|
|
1364
|
+
char *quote_start = strchr(css + 9, '"');
|
|
1365
|
+
if (quote_start != NULL) {
|
|
1366
|
+
char *quote_end = strchr(quote_start + 1, '"');
|
|
1367
|
+
if (quote_end != NULL) {
|
|
1368
|
+
charset = rb_str_new(quote_start + 1, quote_end - quote_start - 1);
|
|
1369
|
+
char *semicolon = quote_end + 1;
|
|
1370
|
+
while (semicolon < pe && IS_WHITESPACE(*semicolon)) {
|
|
1371
|
+
semicolon++;
|
|
1372
|
+
}
|
|
1373
|
+
if (semicolon < pe && *semicolon == ';') {
|
|
1374
|
+
p = semicolon + 1;
|
|
1375
|
+
}
|
|
1376
|
+
}
|
|
1377
|
+
}
|
|
1378
|
+
}
|
|
1379
|
+
|
|
1380
|
+
// Skip @import statements - they should be handled by ImportResolver at Ruby level
|
|
1381
|
+
// Per CSS spec, @import must come before all rules (except @charset)
|
|
1382
|
+
while (p < pe) {
|
|
1383
|
+
// Skip whitespace
|
|
1384
|
+
while (p < pe && IS_WHITESPACE(*p)) p++;
|
|
1385
|
+
if (p >= pe) break;
|
|
1386
|
+
|
|
1387
|
+
// Skip comments
|
|
1388
|
+
if (p + 1 < pe && p[0] == '/' && p[1] == '*') {
|
|
1389
|
+
p += 2;
|
|
1390
|
+
while (p + 1 < pe) {
|
|
1391
|
+
if (p[0] == '*' && p[1] == '/') {
|
|
1392
|
+
p += 2;
|
|
1393
|
+
break;
|
|
1394
|
+
}
|
|
1395
|
+
p++;
|
|
1396
|
+
}
|
|
1397
|
+
continue;
|
|
1398
|
+
}
|
|
1399
|
+
|
|
1400
|
+
// Check for @import
|
|
1401
|
+
if (p + 7 <= pe && *p == '@' && strncasecmp(p + 1, "import", 6) == 0 &&
|
|
1402
|
+
(p + 7 >= pe || IS_WHITESPACE(p[7]) || p[7] == '\'' || p[7] == '"')) {
|
|
1403
|
+
// Skip to semicolon
|
|
1404
|
+
while (p < pe && *p != ';') p++;
|
|
1405
|
+
if (p < pe) p++; // Skip semicolon
|
|
1406
|
+
continue;
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
// Hit non-@import content, stop skipping
|
|
1410
|
+
break;
|
|
1411
|
+
}
|
|
1412
|
+
|
|
1413
|
+
// Initialize parser context with offset
|
|
1414
|
+
ParserContext ctx;
|
|
1415
|
+
ctx.rules_array = rb_ary_new();
|
|
1416
|
+
ctx.media_index = rb_hash_new();
|
|
1417
|
+
ctx.rule_id_counter = rule_id_offset; // Start from offset
|
|
1418
|
+
ctx.media_query_count = 0;
|
|
1419
|
+
ctx.media_cache = NULL; // Removed - no perf benefit
|
|
1420
|
+
ctx.has_nesting = 0; // Will be set to 1 if any nested rules are created
|
|
1421
|
+
ctx.depth = 0; // Start at depth 0
|
|
1422
|
+
|
|
1423
|
+
// Parse CSS (top-level, no parent context)
|
|
1424
|
+
parse_css_recursive(&ctx, p, pe, NO_PARENT_MEDIA, NO_PARENT_SELECTOR, NO_PARENT_RULE_ID);
|
|
1425
|
+
|
|
1426
|
+
// Build result hash
|
|
1427
|
+
VALUE result = rb_hash_new();
|
|
1428
|
+
rb_hash_aset(result, ID2SYM(rb_intern("rules")), ctx.rules_array);
|
|
1429
|
+
rb_hash_aset(result, ID2SYM(rb_intern("_media_index")), ctx.media_index);
|
|
1430
|
+
rb_hash_aset(result, ID2SYM(rb_intern("charset")), charset);
|
|
1431
|
+
rb_hash_aset(result, ID2SYM(rb_intern("last_rule_id")), INT2FIX(ctx.rule_id_counter));
|
|
1432
|
+
rb_hash_aset(result, ID2SYM(rb_intern("_has_nesting")), ctx.has_nesting ? Qtrue : Qfalse);
|
|
1433
|
+
|
|
1434
|
+
return result;
|
|
1435
|
+
}
|