cataract 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-tidy +30 -0
- data/.github/workflows/ci-macos.yml +12 -0
- data/.github/workflows/ci.yml +77 -0
- data/.github/workflows/test.yml +76 -0
- data/.gitignore +45 -0
- data/.overcommit.yml +38 -0
- data/.rubocop.yml +83 -0
- data/BENCHMARKS.md +201 -0
- data/CHANGELOG.md +1 -0
- data/Gemfile +27 -0
- data/LICENSE +21 -0
- data/RAGEL_MIGRATION.md +60 -0
- data/README.md +292 -0
- data/Rakefile +209 -0
- data/benchmarks/benchmark_harness.rb +193 -0
- data/benchmarks/benchmark_merging.rb +121 -0
- data/benchmarks/benchmark_optimization_comparison.rb +168 -0
- data/benchmarks/benchmark_parsing.rb +153 -0
- data/benchmarks/benchmark_ragel_removal.rb +56 -0
- data/benchmarks/benchmark_runner.rb +70 -0
- data/benchmarks/benchmark_serialization.rb +180 -0
- data/benchmarks/benchmark_shorthand.rb +109 -0
- data/benchmarks/benchmark_shorthand_expansion.rb +176 -0
- data/benchmarks/benchmark_specificity.rb +124 -0
- data/benchmarks/benchmark_string_allocation.rb +151 -0
- data/benchmarks/benchmark_stylesheet_to_s.rb +62 -0
- data/benchmarks/benchmark_to_s_cached.rb +55 -0
- data/benchmarks/benchmark_value_splitter.rb +54 -0
- data/benchmarks/benchmark_yjit.rb +158 -0
- data/benchmarks/benchmark_yjit_workers.rb +61 -0
- data/benchmarks/profile_to_s.rb +23 -0
- data/benchmarks/speedup_calculator.rb +83 -0
- data/benchmarks/system_metadata.rb +81 -0
- data/benchmarks/templates/benchmarks.md.erb +221 -0
- data/benchmarks/yjit_tests.rb +141 -0
- data/cataract.gemspec +34 -0
- data/cliff.toml +92 -0
- data/examples/color_conversion_visual_test/color_conversion_test.html +3603 -0
- data/examples/color_conversion_visual_test/generate.rb +202 -0
- data/examples/color_conversion_visual_test/template.html.erb +259 -0
- data/examples/css_analyzer/analyzer.rb +164 -0
- data/examples/css_analyzer/analyzers/base.rb +33 -0
- data/examples/css_analyzer/analyzers/colors.rb +133 -0
- data/examples/css_analyzer/analyzers/important.rb +88 -0
- data/examples/css_analyzer/analyzers/properties.rb +61 -0
- data/examples/css_analyzer/analyzers/specificity.rb +68 -0
- data/examples/css_analyzer/templates/report.html.erb +575 -0
- data/examples/css_analyzer.rb +69 -0
- data/examples/github_analysis.html +5343 -0
- data/ext/cataract/cataract.c +1086 -0
- data/ext/cataract/cataract.h +174 -0
- data/ext/cataract/css_parser.c +1435 -0
- data/ext/cataract/extconf.rb +48 -0
- data/ext/cataract/import_scanner.c +174 -0
- data/ext/cataract/merge.c +973 -0
- data/ext/cataract/shorthand_expander.c +902 -0
- data/ext/cataract/specificity.c +213 -0
- data/ext/cataract/value_splitter.c +116 -0
- data/ext/cataract_color/cataract_color.c +16 -0
- data/ext/cataract_color/color_conversion.c +1687 -0
- data/ext/cataract_color/color_conversion.h +136 -0
- data/ext/cataract_color/color_conversion_lab.c +571 -0
- data/ext/cataract_color/color_conversion_named.c +259 -0
- data/ext/cataract_color/color_conversion_oklab.c +547 -0
- data/ext/cataract_color/extconf.rb +23 -0
- data/ext/cataract_old/cataract.c +393 -0
- data/ext/cataract_old/cataract.h +250 -0
- data/ext/cataract_old/css_parser.c +933 -0
- data/ext/cataract_old/extconf.rb +67 -0
- data/ext/cataract_old/import_scanner.c +174 -0
- data/ext/cataract_old/merge.c +776 -0
- data/ext/cataract_old/shorthand_expander.c +902 -0
- data/ext/cataract_old/specificity.c +213 -0
- data/ext/cataract_old/stylesheet.c +290 -0
- data/ext/cataract_old/value_splitter.c +116 -0
- data/lib/cataract/at_rule.rb +97 -0
- data/lib/cataract/color_conversion.rb +18 -0
- data/lib/cataract/declarations.rb +332 -0
- data/lib/cataract/import_resolver.rb +210 -0
- data/lib/cataract/rule.rb +131 -0
- data/lib/cataract/stylesheet.rb +716 -0
- data/lib/cataract/stylesheet_scope.rb +257 -0
- data/lib/cataract/version.rb +5 -0
- data/lib/cataract.rb +107 -0
- data/lib/tasks/gem.rake +158 -0
- data/scripts/fuzzer/run.rb +828 -0
- data/scripts/fuzzer/worker.rb +99 -0
- data/scripts/generate_benchmarks_md.rb +155 -0
- metadata +135 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* specificity.c - CSS selector specificity calculator
|
|
3
|
+
*
|
|
4
|
+
* Calculates CSS selector specificity according to W3C spec:
|
|
5
|
+
* https://www.w3.org/TR/selectors/#specificity
|
|
6
|
+
*
|
|
7
|
+
* Specificity = a*100 + b*10 + c*1 where:
|
|
8
|
+
* a = count of ID selectors (#id)
|
|
9
|
+
* b = count of class selectors (.class), attributes ([attr]), and pseudo-classes (:hover)
|
|
10
|
+
* c = count of type selectors (div) and pseudo-elements (::before)
|
|
11
|
+
*
|
|
12
|
+
* Special handling:
|
|
13
|
+
* - :not() doesn't count itself, but its content does
|
|
14
|
+
* - Legacy pseudo-elements with single colon (:before) count as pseudo-elements
|
|
15
|
+
* - Universal selector (*) has zero specificity
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#include "cataract.h"
|
|
19
|
+
#include <string.h>
|
|
20
|
+
|
|
21
|
+
// Calculate specificity for a CSS selector string
|
|
22
|
+
VALUE calculate_specificity(VALUE self, VALUE selector_string) {
|
|
23
|
+
Check_Type(selector_string, T_STRING);
|
|
24
|
+
|
|
25
|
+
const char *p = RSTRING_PTR(selector_string);
|
|
26
|
+
const char *pe = p + RSTRING_LEN(selector_string);
|
|
27
|
+
|
|
28
|
+
// Counters for specificity components
|
|
29
|
+
int id_count = 0;
|
|
30
|
+
int class_count = 0;
|
|
31
|
+
int attr_count = 0;
|
|
32
|
+
int pseudo_class_count = 0;
|
|
33
|
+
int pseudo_element_count = 0;
|
|
34
|
+
int element_count = 0;
|
|
35
|
+
|
|
36
|
+
while (p < pe) {
|
|
37
|
+
char c = *p;
|
|
38
|
+
|
|
39
|
+
// Skip whitespace and combinators
|
|
40
|
+
if (IS_WHITESPACE(c) || c == '>' || c == '+' || c == '~' || c == ',') {
|
|
41
|
+
p++;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ID selector: #id
|
|
46
|
+
if (c == '#') {
|
|
47
|
+
id_count++;
|
|
48
|
+
p++;
|
|
49
|
+
// Skip the identifier
|
|
50
|
+
while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
|
|
51
|
+
(*p >= '0' && *p <= '9') || *p == '-' || *p == '_')) {
|
|
52
|
+
p++;
|
|
53
|
+
}
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Class selector: .class
|
|
58
|
+
if (c == '.') {
|
|
59
|
+
class_count++;
|
|
60
|
+
p++;
|
|
61
|
+
// Skip the identifier
|
|
62
|
+
while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
|
|
63
|
+
(*p >= '0' && *p <= '9') || *p == '-' || *p == '_')) {
|
|
64
|
+
p++;
|
|
65
|
+
}
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Attribute selector: [attr] or [attr=value]
|
|
70
|
+
if (c == '[') {
|
|
71
|
+
attr_count++;
|
|
72
|
+
p++;
|
|
73
|
+
// Skip to closing bracket
|
|
74
|
+
int bracket_depth = 1;
|
|
75
|
+
while (p < pe && bracket_depth > 0) {
|
|
76
|
+
if (*p == '[') bracket_depth++;
|
|
77
|
+
else if (*p == ']') bracket_depth--;
|
|
78
|
+
p++;
|
|
79
|
+
}
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Pseudo-element (::) or pseudo-class (:)
|
|
84
|
+
if (c == ':') {
|
|
85
|
+
p++;
|
|
86
|
+
int is_pseudo_element = 0;
|
|
87
|
+
|
|
88
|
+
// Check for double colon (::)
|
|
89
|
+
if (p < pe && *p == ':') {
|
|
90
|
+
is_pseudo_element = 1;
|
|
91
|
+
p++;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Extract pseudo name
|
|
95
|
+
const char *pseudo_start = p;
|
|
96
|
+
while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
|
|
97
|
+
(*p >= '0' && *p <= '9') || *p == '-')) {
|
|
98
|
+
p++;
|
|
99
|
+
}
|
|
100
|
+
long pseudo_len = p - pseudo_start;
|
|
101
|
+
|
|
102
|
+
// Check for legacy pseudo-elements (single colon but should be double)
|
|
103
|
+
// :before, :after, :first-line, :first-letter, :selection
|
|
104
|
+
int is_legacy_pseudo_element = 0;
|
|
105
|
+
if (!is_pseudo_element && pseudo_len > 0) {
|
|
106
|
+
is_legacy_pseudo_element =
|
|
107
|
+
(pseudo_len == 6 && strncmp(pseudo_start, "before", 6) == 0) ||
|
|
108
|
+
(pseudo_len == 5 && strncmp(pseudo_start, "after", 5) == 0) ||
|
|
109
|
+
(pseudo_len == 10 && strncmp(pseudo_start, "first-line", 10) == 0) ||
|
|
110
|
+
(pseudo_len == 12 && strncmp(pseudo_start, "first-letter", 12) == 0) ||
|
|
111
|
+
(pseudo_len == 9 && strncmp(pseudo_start, "selection", 9) == 0);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Check for :not() - it doesn't count itself, but its content does
|
|
115
|
+
int is_not = (pseudo_len == 3 && strncmp(pseudo_start, "not", 3) == 0);
|
|
116
|
+
|
|
117
|
+
// Skip function arguments if present
|
|
118
|
+
if (p < pe && *p == '(') {
|
|
119
|
+
p++;
|
|
120
|
+
int paren_depth = 1;
|
|
121
|
+
|
|
122
|
+
// If it's :not(), we need to calculate specificity of the content
|
|
123
|
+
if (is_not) {
|
|
124
|
+
const char *not_content_start = p;
|
|
125
|
+
|
|
126
|
+
// Find closing paren
|
|
127
|
+
while (p < pe && paren_depth > 0) {
|
|
128
|
+
if (*p == '(') paren_depth++;
|
|
129
|
+
else if (*p == ')') paren_depth--;
|
|
130
|
+
if (paren_depth > 0) p++;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const char *not_content_end = p;
|
|
134
|
+
long not_content_len = not_content_end - not_content_start;
|
|
135
|
+
|
|
136
|
+
// Recursively calculate specificity of :not() content
|
|
137
|
+
if (not_content_len > 0) {
|
|
138
|
+
VALUE not_content = rb_str_new(not_content_start, not_content_len);
|
|
139
|
+
VALUE not_spec = calculate_specificity(self, not_content);
|
|
140
|
+
int not_specificity = NUM2INT(not_spec);
|
|
141
|
+
|
|
142
|
+
// Add :not() content's specificity to our counts
|
|
143
|
+
int additional_a = not_specificity / 100;
|
|
144
|
+
int additional_b = (not_specificity % 100) / 10;
|
|
145
|
+
int additional_c = not_specificity % 10;
|
|
146
|
+
|
|
147
|
+
id_count += additional_a;
|
|
148
|
+
class_count += additional_b;
|
|
149
|
+
element_count += additional_c;
|
|
150
|
+
|
|
151
|
+
RB_GC_GUARD(not_content);
|
|
152
|
+
RB_GC_GUARD(not_spec);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
p++; // Skip closing paren
|
|
156
|
+
} else {
|
|
157
|
+
// Skip other function arguments
|
|
158
|
+
while (p < pe && paren_depth > 0) {
|
|
159
|
+
if (*p == '(') paren_depth++;
|
|
160
|
+
else if (*p == ')') paren_depth--;
|
|
161
|
+
p++;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Count the pseudo-class/element
|
|
165
|
+
if (is_pseudo_element || is_legacy_pseudo_element) {
|
|
166
|
+
pseudo_element_count++;
|
|
167
|
+
} else {
|
|
168
|
+
pseudo_class_count++;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
} else {
|
|
172
|
+
// No function arguments - count the pseudo-class/element
|
|
173
|
+
if (is_not) {
|
|
174
|
+
// :not without parens is invalid, but don't count it
|
|
175
|
+
} else if (is_pseudo_element || is_legacy_pseudo_element) {
|
|
176
|
+
pseudo_element_count++;
|
|
177
|
+
} else {
|
|
178
|
+
pseudo_class_count++;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Universal selector: *
|
|
185
|
+
if (c == '*') {
|
|
186
|
+
// Universal selector has specificity 0, don't count
|
|
187
|
+
p++;
|
|
188
|
+
continue;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Type selector (element name): div, span, etc.
|
|
192
|
+
if ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')) {
|
|
193
|
+
element_count++;
|
|
194
|
+
// Skip the identifier
|
|
195
|
+
while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
|
|
196
|
+
(*p >= '0' && *p <= '9') || *p == '-' || *p == '_')) {
|
|
197
|
+
p++;
|
|
198
|
+
}
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Unknown character, skip it
|
|
203
|
+
p++;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Calculate specificity using W3C formula:
|
|
207
|
+
// IDs * 100 + (classes + attributes + pseudo-classes) * 10 + (elements + pseudo-elements) * 1
|
|
208
|
+
int specificity = (id_count * 100) +
|
|
209
|
+
((class_count + attr_count + pseudo_class_count) * 10) +
|
|
210
|
+
((element_count + pseudo_element_count) * 1);
|
|
211
|
+
|
|
212
|
+
return INT2NUM(specificity);
|
|
213
|
+
}
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
#include "cataract.h"
|
|
2
|
+
#include <stdio.h>
|
|
3
|
+
|
|
4
|
+
/*
|
|
5
|
+
* C implementation of Stylesheet#to_s with no rb_funcall
|
|
6
|
+
* Optimized for new hash structure: {query_string => {media_types: [...], rules: [...]}}
|
|
7
|
+
*
|
|
8
|
+
* This provides ~36% speedup over the Ruby implementation for serialization,
|
|
9
|
+
* which is important since to_s is a hot path in the premailer use case.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
// Context for merge callback within a group
|
|
13
|
+
struct merge_selector_ctx {
|
|
14
|
+
VALUE merged_rules;
|
|
15
|
+
VALUE self;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
// Callback for merging rules with the same selector within a media group
|
|
19
|
+
static int merge_selector_callback(VALUE selector, VALUE selector_rules, VALUE arg) {
|
|
20
|
+
struct merge_selector_ctx *ctx = (struct merge_selector_ctx *)arg;
|
|
21
|
+
|
|
22
|
+
// If only one rule, use it directly
|
|
23
|
+
if (RARRAY_LEN(selector_rules) == 1) {
|
|
24
|
+
rb_ary_push(ctx->merged_rules, RARRAY_AREF(selector_rules, 0));
|
|
25
|
+
return ST_CONTINUE;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Multiple rules with same selector - merge them
|
|
29
|
+
VALUE first_rule = RARRAY_AREF(selector_rules, 0);
|
|
30
|
+
VALUE specificity = rb_struct_aref(first_rule, INT2FIX(RULE_SPECIFICITY));
|
|
31
|
+
|
|
32
|
+
// Merge declarations for this selector (C function, no rb_funcall)
|
|
33
|
+
VALUE merged_declarations = cataract_merge(ctx->self, selector_rules);
|
|
34
|
+
|
|
35
|
+
// Create new merged Rule struct
|
|
36
|
+
VALUE merged_rule = rb_struct_new(cRule, selector, merged_declarations, specificity);
|
|
37
|
+
rb_ary_push(ctx->merged_rules, merged_rule);
|
|
38
|
+
|
|
39
|
+
return ST_CONTINUE;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Context for processing each media group
|
|
43
|
+
struct process_group_ctx {
|
|
44
|
+
VALUE result;
|
|
45
|
+
VALUE self;
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
// Callback for processing each media query group
|
|
49
|
+
static int process_group_callback(VALUE query_string, VALUE group_hash, VALUE arg) {
|
|
50
|
+
struct process_group_ctx *ctx = (struct process_group_ctx *)arg;
|
|
51
|
+
|
|
52
|
+
// Extract rules array from group hash
|
|
53
|
+
VALUE rules_array = rb_hash_aref(group_hash, ID2SYM(rb_intern("rules")));
|
|
54
|
+
if (NIL_P(rules_array) || RARRAY_LEN(rules_array) == 0) {
|
|
55
|
+
return ST_CONTINUE; // Skip empty groups
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Group rules by selector for merging
|
|
59
|
+
VALUE rules_by_selector = rb_hash_new();
|
|
60
|
+
long rules_len = RARRAY_LEN(rules_array);
|
|
61
|
+
|
|
62
|
+
for (long i = 0; i < rules_len; i++) {
|
|
63
|
+
VALUE rule = RARRAY_AREF(rules_array, i);
|
|
64
|
+
VALUE selector = rb_struct_aref(rule, INT2FIX(RULE_SELECTOR));
|
|
65
|
+
|
|
66
|
+
VALUE selector_group = rb_hash_aref(rules_by_selector, selector);
|
|
67
|
+
if (NIL_P(selector_group)) {
|
|
68
|
+
selector_group = rb_ary_new();
|
|
69
|
+
rb_hash_aset(rules_by_selector, selector, selector_group);
|
|
70
|
+
}
|
|
71
|
+
rb_ary_push(selector_group, rule);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Merge rules with same selector
|
|
75
|
+
VALUE merged_rules = rb_ary_new();
|
|
76
|
+
struct merge_selector_ctx merge_ctx = { merged_rules, ctx->self };
|
|
77
|
+
rb_hash_foreach(rules_by_selector, merge_selector_callback, (VALUE)&merge_ctx);
|
|
78
|
+
|
|
79
|
+
// Check if this is a media query or not
|
|
80
|
+
int has_media_query = !NIL_P(query_string);
|
|
81
|
+
|
|
82
|
+
if (has_media_query) {
|
|
83
|
+
// Output @media wrapper
|
|
84
|
+
rb_str_buf_cat2(ctx->result, "@media ");
|
|
85
|
+
rb_str_buf_append(ctx->result, query_string);
|
|
86
|
+
rb_str_buf_cat2(ctx->result, " {\n");
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Output each merged rule
|
|
90
|
+
long merged_len = RARRAY_LEN(merged_rules);
|
|
91
|
+
for (long j = 0; j < merged_len; j++) {
|
|
92
|
+
VALUE rule = RARRAY_AREF(merged_rules, j);
|
|
93
|
+
VALUE selector = rb_struct_aref(rule, INT2FIX(RULE_SELECTOR));
|
|
94
|
+
VALUE declarations = rb_struct_aref(rule, INT2FIX(RULE_DECLARATIONS));
|
|
95
|
+
|
|
96
|
+
if (has_media_query) {
|
|
97
|
+
rb_str_buf_cat2(ctx->result, " ");
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
rb_str_buf_append(ctx->result, selector);
|
|
101
|
+
rb_str_buf_cat2(ctx->result, " { ");
|
|
102
|
+
|
|
103
|
+
// C function, no rb_funcall
|
|
104
|
+
VALUE decls_str = declarations_array_to_s(declarations);
|
|
105
|
+
rb_str_buf_append(ctx->result, decls_str);
|
|
106
|
+
|
|
107
|
+
rb_str_buf_cat2(ctx->result, " }\n");
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (has_media_query) {
|
|
111
|
+
rb_str_buf_cat2(ctx->result, "}\n");
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
RB_GC_GUARD(rules_array);
|
|
115
|
+
RB_GC_GUARD(rules_by_selector);
|
|
116
|
+
RB_GC_GUARD(merged_rules);
|
|
117
|
+
|
|
118
|
+
return ST_CONTINUE;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Main function: stylesheet_to_s_c(rule_groups_hash, charset)
|
|
122
|
+
// New signature: takes hash structure {query_string => {media_types: [...], rules: [...]}}
|
|
123
|
+
VALUE stylesheet_to_s_c(VALUE self, VALUE rule_groups, VALUE charset) {
|
|
124
|
+
Check_Type(rule_groups, T_HASH);
|
|
125
|
+
|
|
126
|
+
long num_groups = RHASH_SIZE(rule_groups);
|
|
127
|
+
|
|
128
|
+
// Handle empty stylesheet
|
|
129
|
+
if (num_groups == 0) {
|
|
130
|
+
if (!NIL_P(charset)) {
|
|
131
|
+
// Even empty stylesheet should emit @charset if present
|
|
132
|
+
VALUE result = UTF8_STR("@charset \"");
|
|
133
|
+
rb_str_buf_append(result, charset);
|
|
134
|
+
rb_str_buf_cat2(result, "\";\n");
|
|
135
|
+
return result;
|
|
136
|
+
}
|
|
137
|
+
return UTF8_STR("");
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Allocate result string with reasonable capacity
|
|
141
|
+
VALUE result = rb_str_buf_new(num_groups * 100);
|
|
142
|
+
|
|
143
|
+
// Emit @charset first if present (must be first per W3C spec)
|
|
144
|
+
if (!NIL_P(charset)) {
|
|
145
|
+
rb_str_buf_cat2(result, "@charset \"");
|
|
146
|
+
rb_str_buf_append(result, charset);
|
|
147
|
+
rb_str_buf_cat2(result, "\";\n");
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Process each media query group
|
|
151
|
+
struct process_group_ctx ctx = { result, self };
|
|
152
|
+
rb_hash_foreach(rule_groups, process_group_callback, (VALUE)&ctx);
|
|
153
|
+
|
|
154
|
+
RB_GC_GUARD(result);
|
|
155
|
+
|
|
156
|
+
return result;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// ============================================================================
|
|
160
|
+
// Formatted output (to_formatted_s)
|
|
161
|
+
// ============================================================================
|
|
162
|
+
|
|
163
|
+
// Context for formatted processing
|
|
164
|
+
struct format_group_ctx {
|
|
165
|
+
VALUE result;
|
|
166
|
+
VALUE self;
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
// Callback for formatted output with newlines and 2-space indentation
|
|
170
|
+
static int format_group_callback(VALUE query_string, VALUE group_hash, VALUE arg) {
|
|
171
|
+
struct format_group_ctx *ctx = (struct format_group_ctx *)arg;
|
|
172
|
+
|
|
173
|
+
// Extract rules array from group hash
|
|
174
|
+
VALUE rules_array = rb_hash_aref(group_hash, ID2SYM(rb_intern("rules")));
|
|
175
|
+
if (NIL_P(rules_array) || RARRAY_LEN(rules_array) == 0) {
|
|
176
|
+
return ST_CONTINUE; // Skip empty groups
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Group rules by selector for merging
|
|
180
|
+
VALUE rules_by_selector = rb_hash_new();
|
|
181
|
+
long rules_len = RARRAY_LEN(rules_array);
|
|
182
|
+
|
|
183
|
+
for (long i = 0; i < rules_len; i++) {
|
|
184
|
+
VALUE rule = RARRAY_AREF(rules_array, i);
|
|
185
|
+
VALUE selector = rb_struct_aref(rule, INT2FIX(RULE_SELECTOR));
|
|
186
|
+
|
|
187
|
+
VALUE selector_group = rb_hash_aref(rules_by_selector, selector);
|
|
188
|
+
if (NIL_P(selector_group)) {
|
|
189
|
+
selector_group = rb_ary_new();
|
|
190
|
+
rb_hash_aset(rules_by_selector, selector, selector_group);
|
|
191
|
+
}
|
|
192
|
+
rb_ary_push(selector_group, rule);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Merge rules with same selector
|
|
196
|
+
VALUE merged_rules = rb_ary_new();
|
|
197
|
+
struct merge_selector_ctx merge_ctx = { merged_rules, ctx->self };
|
|
198
|
+
rb_hash_foreach(rules_by_selector, merge_selector_callback, (VALUE)&merge_ctx);
|
|
199
|
+
|
|
200
|
+
// Check if this is a media query or not
|
|
201
|
+
int has_media_query = !NIL_P(query_string);
|
|
202
|
+
|
|
203
|
+
if (has_media_query) {
|
|
204
|
+
// Output @media wrapper
|
|
205
|
+
rb_str_buf_cat2(ctx->result, "@media ");
|
|
206
|
+
rb_str_buf_append(ctx->result, query_string);
|
|
207
|
+
rb_str_buf_cat2(ctx->result, " {\n");
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Output each merged rule with formatting
|
|
211
|
+
long merged_len = RARRAY_LEN(merged_rules);
|
|
212
|
+
for (long j = 0; j < merged_len; j++) {
|
|
213
|
+
VALUE rule = RARRAY_AREF(merged_rules, j);
|
|
214
|
+
VALUE selector = rb_struct_aref(rule, INT2FIX(RULE_SELECTOR));
|
|
215
|
+
VALUE declarations = rb_struct_aref(rule, INT2FIX(RULE_DECLARATIONS));
|
|
216
|
+
|
|
217
|
+
// Indent selector if inside media query
|
|
218
|
+
if (has_media_query) {
|
|
219
|
+
rb_str_buf_cat2(ctx->result, " ");
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Selector on its own line
|
|
223
|
+
rb_str_buf_append(ctx->result, selector);
|
|
224
|
+
rb_str_buf_cat2(ctx->result, " {\n");
|
|
225
|
+
|
|
226
|
+
// Declarations indented with 2 spaces (or 4 if inside media query)
|
|
227
|
+
const char *indent = has_media_query ? " " : " ";
|
|
228
|
+
rb_str_buf_cat2(ctx->result, indent);
|
|
229
|
+
|
|
230
|
+
// Get declarations string
|
|
231
|
+
VALUE decls_str = declarations_array_to_s(declarations);
|
|
232
|
+
rb_str_buf_append(ctx->result, decls_str);
|
|
233
|
+
|
|
234
|
+
rb_str_buf_cat2(ctx->result, "\n");
|
|
235
|
+
|
|
236
|
+
// Closing brace
|
|
237
|
+
if (has_media_query) {
|
|
238
|
+
rb_str_buf_cat2(ctx->result, " ");
|
|
239
|
+
}
|
|
240
|
+
rb_str_buf_cat2(ctx->result, "}\n");
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
if (has_media_query) {
|
|
244
|
+
rb_str_buf_cat2(ctx->result, "}\n");
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
RB_GC_GUARD(rules_array);
|
|
248
|
+
RB_GC_GUARD(rules_by_selector);
|
|
249
|
+
RB_GC_GUARD(merged_rules);
|
|
250
|
+
|
|
251
|
+
return ST_CONTINUE;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// stylesheet_to_formatted_s_c(rule_groups_hash, charset)
|
|
255
|
+
// Returns formatted multi-line output with 2-space indentation
|
|
256
|
+
// Not optimized for performance since it's not in the hot path
|
|
257
|
+
VALUE stylesheet_to_formatted_s_c(VALUE self, VALUE rule_groups, VALUE charset) {
|
|
258
|
+
Check_Type(rule_groups, T_HASH);
|
|
259
|
+
|
|
260
|
+
long num_groups = RHASH_SIZE(rule_groups);
|
|
261
|
+
|
|
262
|
+
// Handle empty stylesheet
|
|
263
|
+
if (num_groups == 0) {
|
|
264
|
+
if (!NIL_P(charset)) {
|
|
265
|
+
VALUE result = UTF8_STR("@charset \"");
|
|
266
|
+
rb_str_buf_append(result, charset);
|
|
267
|
+
rb_str_buf_cat2(result, "\";\n");
|
|
268
|
+
return result;
|
|
269
|
+
}
|
|
270
|
+
return UTF8_STR("");
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Simple allocation - let Ruby resize as needed (not in hot path)
|
|
274
|
+
VALUE result = UTF8_STR("");
|
|
275
|
+
|
|
276
|
+
// Emit @charset first if present
|
|
277
|
+
if (!NIL_P(charset)) {
|
|
278
|
+
rb_str_buf_cat2(result, "@charset \"");
|
|
279
|
+
rb_str_buf_append(result, charset);
|
|
280
|
+
rb_str_buf_cat2(result, "\";\n");
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Process each media query group with formatting
|
|
284
|
+
struct format_group_ctx ctx = { result, self };
|
|
285
|
+
rb_hash_foreach(rule_groups, format_group_callback, (VALUE)&ctx);
|
|
286
|
+
|
|
287
|
+
RB_GC_GUARD(result);
|
|
288
|
+
|
|
289
|
+
return result;
|
|
290
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* value_splitter.c - CSS value splitting utility
|
|
3
|
+
*
|
|
4
|
+
* Purpose: Split CSS declaration values on whitespace while preserving content
|
|
5
|
+
* inside functions and quoted strings.
|
|
6
|
+
*
|
|
7
|
+
* Examples:
|
|
8
|
+
* "1px 2px 3px 4px" => ["1px", "2px", "3px", "4px"]
|
|
9
|
+
* "10px calc(100% - 20px)" => ["10px", "calc(100% - 20px)"]
|
|
10
|
+
* "rgb(255, 0, 0) blue" => ["rgb(255, 0, 0)", "blue"]
|
|
11
|
+
* "'Helvetica Neue', sans-serif" => ["'Helvetica Neue',", "sans-serif"]
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
#include "cataract.h"
|
|
15
|
+
|
|
16
|
+
/*
|
|
17
|
+
* Split a CSS declaration value on whitespace while preserving content
|
|
18
|
+
* inside functions and quoted strings.
|
|
19
|
+
*
|
|
20
|
+
* Algorithm:
|
|
21
|
+
* - Track parenthesis depth for functions like calc(), rgb()
|
|
22
|
+
* - Track quote state for strings like 'Helvetica Neue'
|
|
23
|
+
* - Split on whitespace only when depth=0 and not in quotes
|
|
24
|
+
*
|
|
25
|
+
* @param value [String] Pre-parsed CSS declaration value (assumed well-formed)
|
|
26
|
+
* @return [Array<String>] Array of value tokens
|
|
27
|
+
*/
|
|
28
|
+
VALUE cataract_split_value(VALUE self, VALUE value) {
|
|
29
|
+
Check_Type(value, T_STRING);
|
|
30
|
+
const char *str = RSTRING_PTR(value);
|
|
31
|
+
long len = RSTRING_LEN(value);
|
|
32
|
+
|
|
33
|
+
// Sanity check: reject unreasonably long values (DoS protection)
|
|
34
|
+
if (len > 65536) {
|
|
35
|
+
rb_raise(rb_eArgError, "CSS value too long (max 64KB)");
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Result array
|
|
39
|
+
VALUE result = rb_ary_new();
|
|
40
|
+
|
|
41
|
+
// State tracking
|
|
42
|
+
int paren_depth = 0;
|
|
43
|
+
int in_quotes = 0;
|
|
44
|
+
char quote_char = '\0';
|
|
45
|
+
const char *token_start = NULL;
|
|
46
|
+
const char *p = str;
|
|
47
|
+
const char *pe = str + len;
|
|
48
|
+
|
|
49
|
+
while (p < pe) {
|
|
50
|
+
char c = *p;
|
|
51
|
+
|
|
52
|
+
// Handle quotes
|
|
53
|
+
if ((c == '"' || c == '\'') && !in_quotes) {
|
|
54
|
+
// Opening quote
|
|
55
|
+
in_quotes = 1;
|
|
56
|
+
quote_char = c;
|
|
57
|
+
if (token_start == NULL) token_start = p;
|
|
58
|
+
p++;
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (in_quotes && c == quote_char) {
|
|
63
|
+
// Closing quote
|
|
64
|
+
in_quotes = 0;
|
|
65
|
+
p++;
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Handle parentheses (only when not in quotes)
|
|
70
|
+
if (!in_quotes) {
|
|
71
|
+
if (c == '(') {
|
|
72
|
+
paren_depth++;
|
|
73
|
+
if (token_start == NULL) token_start = p;
|
|
74
|
+
p++;
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (c == ')') {
|
|
79
|
+
paren_depth--;
|
|
80
|
+
p++;
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Handle whitespace (delimiter when depth=0 and not quoted)
|
|
85
|
+
if (IS_WHITESPACE(c)) {
|
|
86
|
+
if (paren_depth == 0 && !in_quotes) {
|
|
87
|
+
// Emit token if we have one
|
|
88
|
+
if (token_start != NULL) {
|
|
89
|
+
size_t token_len = p - token_start;
|
|
90
|
+
VALUE token = rb_str_new(token_start, token_len);
|
|
91
|
+
rb_ary_push(result, token);
|
|
92
|
+
token_start = NULL;
|
|
93
|
+
}
|
|
94
|
+
p++;
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
// else: whitespace inside function/quotes, part of token
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Regular character - mark start if needed
|
|
102
|
+
if (token_start == NULL) {
|
|
103
|
+
token_start = p;
|
|
104
|
+
}
|
|
105
|
+
p++;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Emit final token if any
|
|
109
|
+
if (token_start != NULL) {
|
|
110
|
+
size_t token_len = pe - token_start;
|
|
111
|
+
VALUE token = rb_str_new(token_start, token_len);
|
|
112
|
+
rb_ary_push(result, token);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return result;
|
|
116
|
+
}
|