cataract 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-tidy +30 -0
- data/.github/workflows/ci-macos.yml +12 -0
- data/.github/workflows/ci.yml +77 -0
- data/.github/workflows/test.yml +76 -0
- data/.gitignore +45 -0
- data/.overcommit.yml +38 -0
- data/.rubocop.yml +83 -0
- data/BENCHMARKS.md +201 -0
- data/CHANGELOG.md +1 -0
- data/Gemfile +27 -0
- data/LICENSE +21 -0
- data/RAGEL_MIGRATION.md +60 -0
- data/README.md +292 -0
- data/Rakefile +209 -0
- data/benchmarks/benchmark_harness.rb +193 -0
- data/benchmarks/benchmark_merging.rb +121 -0
- data/benchmarks/benchmark_optimization_comparison.rb +168 -0
- data/benchmarks/benchmark_parsing.rb +153 -0
- data/benchmarks/benchmark_ragel_removal.rb +56 -0
- data/benchmarks/benchmark_runner.rb +70 -0
- data/benchmarks/benchmark_serialization.rb +180 -0
- data/benchmarks/benchmark_shorthand.rb +109 -0
- data/benchmarks/benchmark_shorthand_expansion.rb +176 -0
- data/benchmarks/benchmark_specificity.rb +124 -0
- data/benchmarks/benchmark_string_allocation.rb +151 -0
- data/benchmarks/benchmark_stylesheet_to_s.rb +62 -0
- data/benchmarks/benchmark_to_s_cached.rb +55 -0
- data/benchmarks/benchmark_value_splitter.rb +54 -0
- data/benchmarks/benchmark_yjit.rb +158 -0
- data/benchmarks/benchmark_yjit_workers.rb +61 -0
- data/benchmarks/profile_to_s.rb +23 -0
- data/benchmarks/speedup_calculator.rb +83 -0
- data/benchmarks/system_metadata.rb +81 -0
- data/benchmarks/templates/benchmarks.md.erb +221 -0
- data/benchmarks/yjit_tests.rb +141 -0
- data/cataract.gemspec +34 -0
- data/cliff.toml +92 -0
- data/examples/color_conversion_visual_test/color_conversion_test.html +3603 -0
- data/examples/color_conversion_visual_test/generate.rb +202 -0
- data/examples/color_conversion_visual_test/template.html.erb +259 -0
- data/examples/css_analyzer/analyzer.rb +164 -0
- data/examples/css_analyzer/analyzers/base.rb +33 -0
- data/examples/css_analyzer/analyzers/colors.rb +133 -0
- data/examples/css_analyzer/analyzers/important.rb +88 -0
- data/examples/css_analyzer/analyzers/properties.rb +61 -0
- data/examples/css_analyzer/analyzers/specificity.rb +68 -0
- data/examples/css_analyzer/templates/report.html.erb +575 -0
- data/examples/css_analyzer.rb +69 -0
- data/examples/github_analysis.html +5343 -0
- data/ext/cataract/cataract.c +1086 -0
- data/ext/cataract/cataract.h +174 -0
- data/ext/cataract/css_parser.c +1435 -0
- data/ext/cataract/extconf.rb +48 -0
- data/ext/cataract/import_scanner.c +174 -0
- data/ext/cataract/merge.c +973 -0
- data/ext/cataract/shorthand_expander.c +902 -0
- data/ext/cataract/specificity.c +213 -0
- data/ext/cataract/value_splitter.c +116 -0
- data/ext/cataract_color/cataract_color.c +16 -0
- data/ext/cataract_color/color_conversion.c +1687 -0
- data/ext/cataract_color/color_conversion.h +136 -0
- data/ext/cataract_color/color_conversion_lab.c +571 -0
- data/ext/cataract_color/color_conversion_named.c +259 -0
- data/ext/cataract_color/color_conversion_oklab.c +547 -0
- data/ext/cataract_color/extconf.rb +23 -0
- data/ext/cataract_old/cataract.c +393 -0
- data/ext/cataract_old/cataract.h +250 -0
- data/ext/cataract_old/css_parser.c +933 -0
- data/ext/cataract_old/extconf.rb +67 -0
- data/ext/cataract_old/import_scanner.c +174 -0
- data/ext/cataract_old/merge.c +776 -0
- data/ext/cataract_old/shorthand_expander.c +902 -0
- data/ext/cataract_old/specificity.c +213 -0
- data/ext/cataract_old/stylesheet.c +290 -0
- data/ext/cataract_old/value_splitter.c +116 -0
- data/lib/cataract/at_rule.rb +97 -0
- data/lib/cataract/color_conversion.rb +18 -0
- data/lib/cataract/declarations.rb +332 -0
- data/lib/cataract/import_resolver.rb +210 -0
- data/lib/cataract/rule.rb +131 -0
- data/lib/cataract/stylesheet.rb +716 -0
- data/lib/cataract/stylesheet_scope.rb +257 -0
- data/lib/cataract/version.rb +5 -0
- data/lib/cataract.rb +107 -0
- data/lib/tasks/gem.rake +158 -0
- data/scripts/fuzzer/run.rb +828 -0
- data/scripts/fuzzer/worker.rb +99 -0
- data/scripts/generate_benchmarks_md.rb +155 -0
- metadata +135 -0
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
#include <ruby.h>
|
|
2
|
+
#include <stdio.h>
|
|
3
|
+
#include "cataract.h"
|
|
4
|
+
|
|
5
|
+
// Global struct class definitions (declared extern in cataract.h)
|
|
6
|
+
VALUE cDeclaration;
|
|
7
|
+
VALUE cRule;
|
|
8
|
+
|
|
9
|
+
// Error class definitions (declared extern in cataract.h)
|
|
10
|
+
VALUE eCataractError;
|
|
11
|
+
VALUE eParseError;
|
|
12
|
+
VALUE eDepthError;
|
|
13
|
+
VALUE eSizeError;
|
|
14
|
+
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// Ruby Bindings and Public API
|
|
17
|
+
// ============================================================================
|
|
18
|
+
|
|
19
|
+
static VALUE parse_css_internal(VALUE self, VALUE css_string, int depth) {
|
|
20
|
+
// Check recursion depth to prevent stack overflow and memory exhaustion
|
|
21
|
+
if (depth > MAX_PARSE_DEPTH) {
|
|
22
|
+
rb_raise(eDepthError,
|
|
23
|
+
"CSS nesting too deep: exceeded maximum depth of %d",
|
|
24
|
+
MAX_PARSE_DEPTH);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
Check_Type(css_string, T_STRING);
|
|
28
|
+
|
|
29
|
+
// Extract @charset if present (must be at very start per W3C spec)
|
|
30
|
+
// Handled separately because @charset must be at the absolute start
|
|
31
|
+
// and can be processed with simple string operations
|
|
32
|
+
VALUE charset = Qnil;
|
|
33
|
+
const char *css_start = RSTRING_PTR(css_string);
|
|
34
|
+
long css_len = RSTRING_LEN(css_string);
|
|
35
|
+
|
|
36
|
+
// Check for @charset at very start: @charset "UTF-8";
|
|
37
|
+
// Per spec: exact syntax with double quotes required
|
|
38
|
+
if (css_len > 10 && strncmp(css_start, "@charset ", 9) == 0) {
|
|
39
|
+
// Find opening quote
|
|
40
|
+
char *quote_start = strchr(css_start + 9, '"');
|
|
41
|
+
if (quote_start != NULL) {
|
|
42
|
+
// Find closing quote and semicolon
|
|
43
|
+
char *quote_end = strchr(quote_start + 1, '"');
|
|
44
|
+
if (quote_end != NULL) {
|
|
45
|
+
char *semicolon = quote_end + 1;
|
|
46
|
+
// Skip whitespace between quote and semicolon
|
|
47
|
+
while (semicolon < css_start + css_len && IS_WHITESPACE(*semicolon)) {
|
|
48
|
+
semicolon++;
|
|
49
|
+
}
|
|
50
|
+
if (semicolon < css_start + css_len && *semicolon == ';') {
|
|
51
|
+
// Valid @charset rule found
|
|
52
|
+
charset = rb_str_new(quote_start + 1, quote_end - quote_start - 1);
|
|
53
|
+
DEBUG_PRINTF("[@charset] Extracted: '%s'\n", RSTRING_PTR(charset));
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Parse CSS using our C parser implementation
|
|
60
|
+
// Returns hash: {query_string => [rules]} already grouped
|
|
61
|
+
VALUE rules_by_media = parse_css_impl(css_string, depth, Qnil);
|
|
62
|
+
|
|
63
|
+
// GC Guard: Protect Ruby objects from garbage collection
|
|
64
|
+
RB_GC_GUARD(css_string);
|
|
65
|
+
RB_GC_GUARD(rules_by_media);
|
|
66
|
+
RB_GC_GUARD(charset);
|
|
67
|
+
|
|
68
|
+
// At depth 0 (top-level parse), return hash with rules and charset
|
|
69
|
+
// Nested parses (depth > 0) return the hash directly
|
|
70
|
+
if (depth == 0) {
|
|
71
|
+
VALUE result = rb_hash_new();
|
|
72
|
+
rb_hash_aset(result, ID2SYM(rb_intern("rules")), rules_by_media);
|
|
73
|
+
rb_hash_aset(result, ID2SYM(rb_intern("charset")), charset);
|
|
74
|
+
return result;
|
|
75
|
+
}
|
|
76
|
+
return rules_by_media;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/*
|
|
80
|
+
* Ruby-facing wrapper for parse_declarations
|
|
81
|
+
*
|
|
82
|
+
* @param declarations_string [String] CSS declarations like "color: red; margin: 10px"
|
|
83
|
+
* @return [Array<Declaration>] Array of parsed declaration structs
|
|
84
|
+
*/
|
|
85
|
+
static VALUE parse_declarations(VALUE self, VALUE declarations_string) {
|
|
86
|
+
Check_Type(declarations_string, T_STRING);
|
|
87
|
+
|
|
88
|
+
const char *input = RSTRING_PTR(declarations_string);
|
|
89
|
+
long input_len = RSTRING_LEN(declarations_string);
|
|
90
|
+
|
|
91
|
+
// Strip outer braces and whitespace (css_parser compatibility)
|
|
92
|
+
const char *start = input;
|
|
93
|
+
const char *end = input + input_len;
|
|
94
|
+
|
|
95
|
+
while (start < end && (IS_WHITESPACE(*start) || *start == '{')) start++;
|
|
96
|
+
while (end > start && (IS_WHITESPACE(*(end-1)) || *(end-1) == '}')) end--;
|
|
97
|
+
|
|
98
|
+
VALUE result = parse_declarations_string(start, end);
|
|
99
|
+
|
|
100
|
+
RB_GC_GUARD(result);
|
|
101
|
+
return result;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Public wrapper for Ruby - starts at depth 0
|
|
105
|
+
static VALUE parse_css(VALUE self, VALUE css_string) {
|
|
106
|
+
// Verify that cRule was initialized in Init_cataract
|
|
107
|
+
if (cRule == Qnil || cRule == 0) {
|
|
108
|
+
rb_raise(rb_eRuntimeError, "cRule struct class not initialized - Init_cataract may have failed");
|
|
109
|
+
}
|
|
110
|
+
return parse_css_internal(self, css_string, 0);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/*
|
|
114
|
+
* Convert array of Rule structs to full CSS string
|
|
115
|
+
* Format: "selector { prop: value; }\nselector2 { prop: value; }"
|
|
116
|
+
*/
|
|
117
|
+
static VALUE rules_to_s(VALUE self, VALUE rules_array) {
|
|
118
|
+
Check_Type(rules_array, T_ARRAY);
|
|
119
|
+
|
|
120
|
+
long len = RARRAY_LEN(rules_array);
|
|
121
|
+
if (len == 0) {
|
|
122
|
+
return rb_str_new_cstr("");
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Estimate: ~100 chars per rule (selector + declarations)
|
|
126
|
+
VALUE result = rb_str_buf_new(len * 100);
|
|
127
|
+
|
|
128
|
+
for (long i = 0; i < len; i++) {
|
|
129
|
+
VALUE rule = rb_ary_entry(rules_array, i);
|
|
130
|
+
|
|
131
|
+
// Validate this is a Rule struct
|
|
132
|
+
if (!RB_TYPE_P(rule, T_STRUCT)) {
|
|
133
|
+
rb_raise(rb_eTypeError,
|
|
134
|
+
"Expected array of Rule structs, got %s at index %ld",
|
|
135
|
+
rb_obj_classname(rule), i);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Extract: selector, declarations, specificity, media_query
|
|
139
|
+
VALUE selector = rb_struct_aref(rule, INT2FIX(RULE_SELECTOR));
|
|
140
|
+
VALUE declarations = rb_struct_aref(rule, INT2FIX(RULE_DECLARATIONS));
|
|
141
|
+
|
|
142
|
+
// Append selector
|
|
143
|
+
rb_str_buf_append(result, selector);
|
|
144
|
+
rb_str_buf_cat2(result, " { ");
|
|
145
|
+
|
|
146
|
+
// Serialize each declaration
|
|
147
|
+
long decl_len = RARRAY_LEN(declarations);
|
|
148
|
+
for (long j = 0; j < decl_len; j++) {
|
|
149
|
+
VALUE decl = rb_ary_entry(declarations, j);
|
|
150
|
+
|
|
151
|
+
VALUE property = rb_struct_aref(decl, INT2FIX(DECL_PROPERTY));
|
|
152
|
+
VALUE value = rb_struct_aref(decl, INT2FIX(DECL_VALUE));
|
|
153
|
+
VALUE important = rb_struct_aref(decl, INT2FIX(DECL_IMPORTANT));
|
|
154
|
+
|
|
155
|
+
rb_str_buf_append(result, property);
|
|
156
|
+
rb_str_buf_cat2(result, ": ");
|
|
157
|
+
rb_str_buf_append(result, value);
|
|
158
|
+
|
|
159
|
+
if (RTEST(important)) {
|
|
160
|
+
rb_str_buf_cat2(result, " !important");
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
rb_str_buf_cat2(result, "; ");
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
rb_str_buf_cat2(result, "}\n");
|
|
167
|
+
|
|
168
|
+
RB_GC_GUARD(rule);
|
|
169
|
+
RB_GC_GUARD(selector);
|
|
170
|
+
RB_GC_GUARD(declarations);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
RB_GC_GUARD(result);
|
|
174
|
+
return result;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/*
|
|
178
|
+
* Convert array of Declaration structs to CSS string
|
|
179
|
+
* Format: "prop: value; prop2: value2 !important; "
|
|
180
|
+
*
|
|
181
|
+
* This is the core serialization logic used by both:
|
|
182
|
+
* - Declarations#to_s (instance method)
|
|
183
|
+
* - Internal C serialization (stylesheet.c)
|
|
184
|
+
*
|
|
185
|
+
* Exported (non-static) so stylesheet.c can call it
|
|
186
|
+
*/
|
|
187
|
+
VALUE declarations_array_to_s(VALUE declarations_array) {
|
|
188
|
+
Check_Type(declarations_array, T_ARRAY);
|
|
189
|
+
|
|
190
|
+
long len = RARRAY_LEN(declarations_array);
|
|
191
|
+
if (len == 0) {
|
|
192
|
+
return rb_str_new_cstr("");
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Use rb_str_buf_new for efficient string building
|
|
196
|
+
VALUE result = rb_str_buf_new(len * 32); // Estimate 32 chars per declaration
|
|
197
|
+
|
|
198
|
+
for (long i = 0; i < len; i++) {
|
|
199
|
+
VALUE decl = rb_ary_entry(declarations_array, i);
|
|
200
|
+
|
|
201
|
+
// Validate this is a Declaration struct
|
|
202
|
+
if (!RB_TYPE_P(decl, T_STRUCT) || rb_obj_class(decl) != cDeclaration) {
|
|
203
|
+
rb_raise(rb_eTypeError,
|
|
204
|
+
"Expected array of Declaration structs, got %s at index %ld",
|
|
205
|
+
rb_obj_classname(decl), i);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Extract struct fields
|
|
209
|
+
VALUE property = rb_struct_aref(decl, INT2FIX(DECL_PROPERTY));
|
|
210
|
+
VALUE value = rb_struct_aref(decl, INT2FIX(DECL_VALUE));
|
|
211
|
+
VALUE important = rb_struct_aref(decl, INT2FIX(DECL_IMPORTANT));
|
|
212
|
+
|
|
213
|
+
// Append: "property: value"
|
|
214
|
+
rb_str_buf_append(result, property);
|
|
215
|
+
rb_str_buf_cat2(result, ": ");
|
|
216
|
+
rb_str_buf_append(result, value);
|
|
217
|
+
|
|
218
|
+
// Append " !important" if needed
|
|
219
|
+
if (RTEST(important)) {
|
|
220
|
+
rb_str_buf_cat2(result, " !important");
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
rb_str_buf_cat2(result, "; ");
|
|
224
|
+
|
|
225
|
+
RB_GC_GUARD(decl);
|
|
226
|
+
RB_GC_GUARD(property);
|
|
227
|
+
RB_GC_GUARD(value);
|
|
228
|
+
RB_GC_GUARD(important);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Strip trailing space
|
|
232
|
+
rb_str_set_len(result, RSTRING_LEN(result) - 1);
|
|
233
|
+
|
|
234
|
+
RB_GC_GUARD(result);
|
|
235
|
+
return result;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/*
|
|
239
|
+
* Instance method: Declarations#to_s
|
|
240
|
+
* Converts declarations to CSS string
|
|
241
|
+
*
|
|
242
|
+
* @return [String] CSS declarations like "color: red; margin: 10px !important;"
|
|
243
|
+
*/
|
|
244
|
+
static VALUE declarations_to_s_method(VALUE self) {
|
|
245
|
+
// Get @values instance variable (array of Declaration structs)
|
|
246
|
+
VALUE values = rb_ivar_get(self, rb_intern("@values"));
|
|
247
|
+
|
|
248
|
+
// Call core serialization function
|
|
249
|
+
return declarations_array_to_s(values);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
void Init_cataract() {
|
|
253
|
+
VALUE module = rb_define_module("Cataract");
|
|
254
|
+
|
|
255
|
+
// Initialize merge constants (cached strings and symbol IDs)
|
|
256
|
+
init_merge_constants();
|
|
257
|
+
|
|
258
|
+
// Define error class hierarchy
|
|
259
|
+
eCataractError = rb_define_class_under(module, "Error", rb_eStandardError);
|
|
260
|
+
eParseError = rb_define_class_under(module, "ParseError", eCataractError);
|
|
261
|
+
eDepthError = rb_define_class_under(module, "DepthError", eCataractError);
|
|
262
|
+
eSizeError = rb_define_class_under(module, "SizeError", eCataractError);
|
|
263
|
+
|
|
264
|
+
// Define Cataract::Declarations class (Ruby side will add methods)
|
|
265
|
+
VALUE cDeclarations = rb_define_class_under(module, "Declarations", rb_cObject);
|
|
266
|
+
|
|
267
|
+
// Define Cataract::Declaration = Struct.new(:property, :value, :important)
|
|
268
|
+
cDeclaration = rb_struct_define_under(
|
|
269
|
+
module,
|
|
270
|
+
"Declaration",
|
|
271
|
+
"property",
|
|
272
|
+
"value",
|
|
273
|
+
"important",
|
|
274
|
+
NULL
|
|
275
|
+
);
|
|
276
|
+
|
|
277
|
+
// Add methods to Declarations class
|
|
278
|
+
rb_define_method(cDeclarations, "to_s", declarations_to_s_method, 0);
|
|
279
|
+
|
|
280
|
+
// Define Cataract::Rule = Struct.new(:selector, :declarations, :specificity)
|
|
281
|
+
// Note: media_query removed - media info now stored at group level in hash structure
|
|
282
|
+
cRule = rb_struct_define_under(
|
|
283
|
+
module,
|
|
284
|
+
"Rule",
|
|
285
|
+
"selector",
|
|
286
|
+
"declarations",
|
|
287
|
+
"specificity",
|
|
288
|
+
NULL
|
|
289
|
+
);
|
|
290
|
+
|
|
291
|
+
// Define Cataract::Stylesheet class (Ruby side will reopen and add methods)
|
|
292
|
+
rb_define_class_under(module, "Stylesheet", rb_cObject);
|
|
293
|
+
|
|
294
|
+
rb_define_module_function(module, "parse_css", parse_css, 1);
|
|
295
|
+
rb_define_module_function(module, "parse_declarations", parse_declarations, 1);
|
|
296
|
+
rb_define_module_function(module, "calculate_specificity", calculate_specificity, 1);
|
|
297
|
+
rb_define_module_function(module, "merge_rules", cataract_merge_wrapper, 1);
|
|
298
|
+
rb_define_module_function(module, "apply_cascade", cataract_merge_wrapper, 1); // Alias with better name
|
|
299
|
+
/* @api private */
|
|
300
|
+
rb_define_module_function(module, "_rules_to_s", rules_to_s, 1);
|
|
301
|
+
|
|
302
|
+
/* @api private */
|
|
303
|
+
rb_define_module_function(module, "_split_value", cataract_split_value, 1);
|
|
304
|
+
/* @api private */
|
|
305
|
+
rb_define_module_function(module, "_expand_margin", cataract_expand_margin, 1);
|
|
306
|
+
/* @api private */
|
|
307
|
+
rb_define_module_function(module, "_expand_padding", cataract_expand_padding, 1);
|
|
308
|
+
/* @api private */
|
|
309
|
+
rb_define_module_function(module, "_expand_border_color", cataract_expand_border_color, 1);
|
|
310
|
+
/* @api private */
|
|
311
|
+
rb_define_module_function(module, "_expand_border_style", cataract_expand_border_style, 1);
|
|
312
|
+
/* @api private */
|
|
313
|
+
rb_define_module_function(module, "_expand_border_width", cataract_expand_border_width, 1);
|
|
314
|
+
/* @api private */
|
|
315
|
+
rb_define_module_function(module, "_expand_border", cataract_expand_border, 1);
|
|
316
|
+
/* @api private */
|
|
317
|
+
rb_define_module_function(module, "_expand_border_side", cataract_expand_border_side, 2);
|
|
318
|
+
/* @api private */
|
|
319
|
+
rb_define_module_function(module, "_expand_font", cataract_expand_font, 1);
|
|
320
|
+
/* @api private */
|
|
321
|
+
rb_define_module_function(module, "_expand_list_style", cataract_expand_list_style, 1);
|
|
322
|
+
/* @api private */
|
|
323
|
+
rb_define_module_function(module, "_expand_background", cataract_expand_background, 1);
|
|
324
|
+
|
|
325
|
+
// Shorthand creation (inverse of expansion)
|
|
326
|
+
/* @api private */
|
|
327
|
+
rb_define_module_function(module, "_create_margin_shorthand", cataract_create_margin_shorthand, 1);
|
|
328
|
+
/* @api private */
|
|
329
|
+
rb_define_module_function(module, "_create_padding_shorthand", cataract_create_padding_shorthand, 1);
|
|
330
|
+
/* @api private */
|
|
331
|
+
rb_define_module_function(module, "_create_border_width_shorthand", cataract_create_border_width_shorthand, 1);
|
|
332
|
+
/* @api private */
|
|
333
|
+
rb_define_module_function(module, "_create_border_style_shorthand", cataract_create_border_style_shorthand, 1);
|
|
334
|
+
/* @api private */
|
|
335
|
+
rb_define_module_function(module, "_create_border_color_shorthand", cataract_create_border_color_shorthand, 1);
|
|
336
|
+
/* @api private */
|
|
337
|
+
rb_define_module_function(module, "_create_border_shorthand", cataract_create_border_shorthand, 1);
|
|
338
|
+
/* @api private */
|
|
339
|
+
rb_define_module_function(module, "_create_background_shorthand", cataract_create_background_shorthand, 1);
|
|
340
|
+
/* @api private */
|
|
341
|
+
rb_define_module_function(module, "_create_font_shorthand", cataract_create_font_shorthand, 1);
|
|
342
|
+
/* @api private */
|
|
343
|
+
rb_define_module_function(module, "_create_list_style_shorthand", cataract_create_list_style_shorthand, 1);
|
|
344
|
+
|
|
345
|
+
// Serialization
|
|
346
|
+
/* @api private */
|
|
347
|
+
rb_define_module_function(module, "_stylesheet_to_s_c", stylesheet_to_s_c, 2);
|
|
348
|
+
/* @api private */
|
|
349
|
+
rb_define_module_function(module, "_stylesheet_to_formatted_s_c", stylesheet_to_formatted_s_c, 2);
|
|
350
|
+
|
|
351
|
+
// Import scanning
|
|
352
|
+
rb_define_module_function(module, "extract_imports", extract_imports, 1);
|
|
353
|
+
|
|
354
|
+
// Export string allocation mode as a constant for verification in benchmarks
|
|
355
|
+
#ifdef DISABLE_STR_BUF_OPTIMIZATION
|
|
356
|
+
rb_define_const(module, "STRING_ALLOC_MODE", ID2SYM(rb_intern("dynamic")));
|
|
357
|
+
#else
|
|
358
|
+
rb_define_const(module, "STRING_ALLOC_MODE", ID2SYM(rb_intern("buffer")));
|
|
359
|
+
#endif
|
|
360
|
+
|
|
361
|
+
// Export compile-time optimization flags as a hash for runtime introspection
|
|
362
|
+
VALUE compile_flags = rb_hash_new();
|
|
363
|
+
|
|
364
|
+
#ifdef DISABLE_STR_BUF_OPTIMIZATION
|
|
365
|
+
rb_hash_aset(compile_flags, ID2SYM(rb_intern("str_buf_optimization")), Qfalse);
|
|
366
|
+
#else
|
|
367
|
+
rb_hash_aset(compile_flags, ID2SYM(rb_intern("str_buf_optimization")), Qtrue);
|
|
368
|
+
#endif
|
|
369
|
+
|
|
370
|
+
#ifdef CATARACT_DEBUG
|
|
371
|
+
rb_hash_aset(compile_flags, ID2SYM(rb_intern("debug")), Qtrue);
|
|
372
|
+
#else
|
|
373
|
+
rb_hash_aset(compile_flags, ID2SYM(rb_intern("debug")), Qfalse);
|
|
374
|
+
#endif
|
|
375
|
+
|
|
376
|
+
#ifdef DISABLE_LOOP_UNROLL
|
|
377
|
+
rb_hash_aset(compile_flags, ID2SYM(rb_intern("loop_unroll")), Qfalse);
|
|
378
|
+
#else
|
|
379
|
+
rb_hash_aset(compile_flags, ID2SYM(rb_intern("loop_unroll")), Qtrue);
|
|
380
|
+
#endif
|
|
381
|
+
|
|
382
|
+
// Note: Compiler flags like -O3, -march=native, -funroll-loops don't have
|
|
383
|
+
// preprocessor defines, so we can't detect them at runtime. They're purely
|
|
384
|
+
// compiler optimizations that affect the generated code.
|
|
385
|
+
|
|
386
|
+
rb_define_const(module, "COMPILE_FLAGS", compile_flags);
|
|
387
|
+
|
|
388
|
+
// NOTE: Color conversion is now a separate extension (cataract_color)
|
|
389
|
+
// It's initialized when you require 'cataract/color_conversion'
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// NOTE: shorthand_expander.c and value_splitter.c are now compiled separately (not included)
|
|
393
|
+
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
#ifndef CATARACT_H
|
|
2
|
+
#define CATARACT_H
|
|
3
|
+
|
|
4
|
+
#include <ruby.h>
|
|
5
|
+
#include <ruby/encoding.h>
|
|
6
|
+
|
|
7
|
+
// ============================================================================
|
|
8
|
+
// Global struct class references (defined in cataract.c, declared extern here)
|
|
9
|
+
// ============================================================================
|
|
10
|
+
|
|
11
|
+
extern VALUE cDeclaration;
|
|
12
|
+
extern VALUE cRule;
|
|
13
|
+
|
|
14
|
+
// Error class references
|
|
15
|
+
extern VALUE eCataractError;
|
|
16
|
+
extern VALUE eParseError;
|
|
17
|
+
extern VALUE eDepthError;
|
|
18
|
+
extern VALUE eSizeError;
|
|
19
|
+
|
|
20
|
+
// ============================================================================
|
|
21
|
+
// Struct field indices
|
|
22
|
+
// ============================================================================
|
|
23
|
+
|
|
24
|
+
// Rule struct field indices (selector, declarations, specificity)
|
|
25
|
+
#define RULE_SELECTOR 0
|
|
26
|
+
#define RULE_DECLARATIONS 1
|
|
27
|
+
#define RULE_SPECIFICITY 2
|
|
28
|
+
|
|
29
|
+
// Declaration struct field indices (property, value, important)
|
|
30
|
+
#define DECL_PROPERTY 0
|
|
31
|
+
#define DECL_VALUE 1
|
|
32
|
+
#define DECL_IMPORTANT 2
|
|
33
|
+
|
|
34
|
+
// ============================================================================
|
|
35
|
+
// Macros
|
|
36
|
+
// ============================================================================
|
|
37
|
+
|
|
38
|
+
// Whitespace detection
|
|
39
|
+
#define IS_WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r')
|
|
40
|
+
|
|
41
|
+
// US-ASCII string literal creation (compile-time length for efficiency)
|
|
42
|
+
// Use this for string literals like "margin-top" to avoid strlen() at runtime
|
|
43
|
+
// Example: USASCII_STR("margin-top") expands to rb_usascii_str_new("margin-top", 10)
|
|
44
|
+
#define USASCII_STR(str) rb_usascii_str_new((str), sizeof(str) - 1)
|
|
45
|
+
|
|
46
|
+
// UTF-8 string literal creation (compile-time length for efficiency)
|
|
47
|
+
// Use this for string literals that may be concatenated with UTF-8 content
|
|
48
|
+
// Example: UTF8_STR("@") expands to rb_utf8_str_new("@", 1)
|
|
49
|
+
#define UTF8_STR(str) rb_utf8_str_new((str), sizeof(str) - 1)
|
|
50
|
+
|
|
51
|
+
// Debug output (disabled by default)
|
|
52
|
+
// #define CATARACT_DEBUG 1
|
|
53
|
+
|
|
54
|
+
#ifdef CATARACT_DEBUG
|
|
55
|
+
#define DEBUG_PRINTF(...) printf(__VA_ARGS__)
|
|
56
|
+
#else
|
|
57
|
+
#define DEBUG_PRINTF(...) ((void)0)
|
|
58
|
+
#endif
|
|
59
|
+
|
|
60
|
+
// String allocation optimization (enabled by default)
|
|
61
|
+
// Uses rb_str_buf_new for pre-allocation when building selector strings
|
|
62
|
+
//
|
|
63
|
+
// Disable for benchmarking baseline:
|
|
64
|
+
// Development: DISABLE_STR_BUF_OPTIMIZATION=1 rake compile
|
|
65
|
+
// Gem install: gem install cataract -- --disable-str-buf-optimization
|
|
66
|
+
//
|
|
67
|
+
#ifndef DISABLE_STR_BUF_OPTIMIZATION
|
|
68
|
+
#define STR_NEW_WITH_CAPACITY(capacity) rb_str_buf_new(capacity)
|
|
69
|
+
#define STR_NEW_CSTR(str) rb_str_new_cstr(str)
|
|
70
|
+
#else
|
|
71
|
+
#define STR_NEW_WITH_CAPACITY(capacity) rb_str_new_cstr("")
|
|
72
|
+
#define STR_NEW_CSTR(str) rb_str_new_cstr(str)
|
|
73
|
+
#endif
|
|
74
|
+
|
|
75
|
+
// Sanity limits for CSS properties and values
|
|
76
|
+
// These prevent crashes from pathological inputs (fuzzer-found edge cases)
|
|
77
|
+
// Override at compile time if needed: -DMAX_PROPERTY_NAME_LENGTH=512
|
|
78
|
+
#ifndef MAX_PROPERTY_NAME_LENGTH
|
|
79
|
+
#define MAX_PROPERTY_NAME_LENGTH 256 // Reasonable max for property names (e.g., "background-position-x")
|
|
80
|
+
#endif
|
|
81
|
+
|
|
82
|
+
#ifndef MAX_PROPERTY_VALUE_LENGTH
|
|
83
|
+
#define MAX_PROPERTY_VALUE_LENGTH 32768 // 32KB - handles large data URLs and complex values
|
|
84
|
+
#endif
|
|
85
|
+
|
|
86
|
+
#ifndef MAX_AT_RULE_BLOCK_LENGTH
|
|
87
|
+
#define MAX_AT_RULE_BLOCK_LENGTH 1048576 // 1MB - max size for @media, @supports, etc. block content
|
|
88
|
+
#endif
|
|
89
|
+
|
|
90
|
+
#ifndef MAX_PARSE_DEPTH
|
|
91
|
+
#define MAX_PARSE_DEPTH 10 // Max recursion depth for nested @media/@supports blocks
|
|
92
|
+
#endif
|
|
93
|
+
|
|
94
|
+
// ============================================================================
|
|
95
|
+
// Inline helper functions
|
|
96
|
+
// ============================================================================
|
|
97
|
+
|
|
98
|
+
// Trim leading whitespace - modifies start pointer
|
|
99
|
+
static inline void trim_leading(const char **start, const char *end) {
|
|
100
|
+
while (*start < end && IS_WHITESPACE(**start)) {
|
|
101
|
+
(*start)++;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Trim trailing whitespace - modifies end pointer
|
|
106
|
+
static inline void trim_trailing(const char *start, const char **end) {
|
|
107
|
+
while (*end > start && IS_WHITESPACE(*(*end - 1))) {
|
|
108
|
+
(*end)--;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Strip whitespace from both ends and return new string
|
|
113
|
+
static inline VALUE strip_string(const char *str, long len) {
|
|
114
|
+
const char *start = str;
|
|
115
|
+
const char *end = str + len;
|
|
116
|
+
trim_leading(&start, end);
|
|
117
|
+
trim_trailing(start, &end);
|
|
118
|
+
return rb_str_new(start, end - start);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Lowercase property name (CSS property names are ASCII-only)
|
|
122
|
+
//
|
|
123
|
+
// Performance: Manual loop unrolling (USE_LOOP_UNROLL) provides ~6.6% speedup
|
|
124
|
+
// on Apple Silicon M1 (tested with bootstrap.css parsing benchmark).
|
|
125
|
+
static inline VALUE lowercase_property(VALUE property_str) {
|
|
126
|
+
Check_Type(property_str, T_STRING);
|
|
127
|
+
|
|
128
|
+
long len = RSTRING_LEN(property_str);
|
|
129
|
+
const char *src = RSTRING_PTR(property_str);
|
|
130
|
+
|
|
131
|
+
// Create new US-ASCII string with same length (CSS property names are ASCII-only)
|
|
132
|
+
VALUE result = rb_str_buf_new(len);
|
|
133
|
+
rb_enc_associate(result, rb_usascii_encoding());
|
|
134
|
+
|
|
135
|
+
#ifndef DISABLE_LOOP_UNROLL
|
|
136
|
+
// Manual loop unrolling: process 4 chars at a time (default, ~6.6% faster on M1)
|
|
137
|
+
// Benefits: Fewer loop iterations, better ILP, fewer rb_str_buf_cat calls
|
|
138
|
+
long i = 0;
|
|
139
|
+
|
|
140
|
+
// Process 4 characters at a time
|
|
141
|
+
for (; i + 3 < len; i += 4) {
|
|
142
|
+
char c0 = src[i];
|
|
143
|
+
char c1 = src[i+1];
|
|
144
|
+
char c2 = src[i+2];
|
|
145
|
+
char c3 = src[i+3];
|
|
146
|
+
|
|
147
|
+
// Lowercase each character
|
|
148
|
+
if (c0 >= 'A' && c0 <= 'Z') c0 += 32;
|
|
149
|
+
if (c1 >= 'A' && c1 <= 'Z') c1 += 32;
|
|
150
|
+
if (c2 >= 'A' && c2 <= 'Z') c2 += 32;
|
|
151
|
+
if (c3 >= 'A' && c3 <= 'Z') c3 += 32;
|
|
152
|
+
|
|
153
|
+
char buf[4] = {c0, c1, c2, c3};
|
|
154
|
+
rb_str_buf_cat(result, buf, 4);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Handle remaining characters (0-3)
|
|
158
|
+
for (; i < len; i++) {
|
|
159
|
+
char c = src[i];
|
|
160
|
+
if (c >= 'A' && c <= 'Z') {
|
|
161
|
+
c += 32;
|
|
162
|
+
}
|
|
163
|
+
rb_str_buf_cat(result, &c, 1);
|
|
164
|
+
}
|
|
165
|
+
#else
|
|
166
|
+
// Unrolling disabled: process one character at a time
|
|
167
|
+
for (long i = 0; i < len; i++) {
|
|
168
|
+
char c = src[i];
|
|
169
|
+
// Lowercase ASCII letters only (CSS properties are ASCII)
|
|
170
|
+
if (c >= 'A' && c <= 'Z') {
|
|
171
|
+
c = c + ('a' - 'A');
|
|
172
|
+
}
|
|
173
|
+
rb_str_buf_cat(result, &c, 1);
|
|
174
|
+
}
|
|
175
|
+
#endif
|
|
176
|
+
|
|
177
|
+
return result;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// ============================================================================
|
|
181
|
+
// Function declarations (implemented in various .c/.rl files)
|
|
182
|
+
// ============================================================================
|
|
183
|
+
|
|
184
|
+
// Serialization functions (cataract.c)
|
|
185
|
+
// Helper function for internal use (called by stylesheet.c)
|
|
186
|
+
VALUE declarations_array_to_s(VALUE declarations_array);
|
|
187
|
+
|
|
188
|
+
// Stylesheet serialization (stylesheet.c)
|
|
189
|
+
VALUE stylesheet_to_s_c(VALUE self, VALUE rules_array, VALUE charset);
|
|
190
|
+
VALUE stylesheet_to_formatted_s_c(VALUE self, VALUE rules_array, VALUE charset);
|
|
191
|
+
|
|
192
|
+
// Import scanning (import_scanner.c)
|
|
193
|
+
VALUE extract_imports(VALUE self, VALUE css_string);
|
|
194
|
+
|
|
195
|
+
// Merge/cascade functions (merge.c)
|
|
196
|
+
void init_merge_constants(void);
|
|
197
|
+
VALUE cataract_merge(VALUE self, VALUE rules_array);
|
|
198
|
+
VALUE cataract_merge_wrapper(VALUE self, VALUE input);
|
|
199
|
+
|
|
200
|
+
// Shorthand expansion (shorthand_expander.rl)
|
|
201
|
+
VALUE cataract_split_value(VALUE self, VALUE value);
|
|
202
|
+
VALUE cataract_expand_margin(VALUE self, VALUE value);
|
|
203
|
+
VALUE cataract_expand_padding(VALUE self, VALUE value);
|
|
204
|
+
VALUE cataract_expand_border_color(VALUE self, VALUE value);
|
|
205
|
+
VALUE cataract_expand_border_style(VALUE self, VALUE value);
|
|
206
|
+
VALUE cataract_expand_border_width(VALUE self, VALUE value);
|
|
207
|
+
VALUE cataract_expand_border(VALUE self, VALUE value);
|
|
208
|
+
VALUE cataract_expand_border_side(VALUE self, VALUE side, VALUE value);
|
|
209
|
+
VALUE cataract_expand_font(VALUE self, VALUE value);
|
|
210
|
+
VALUE cataract_expand_list_style(VALUE self, VALUE value);
|
|
211
|
+
VALUE cataract_expand_background(VALUE self, VALUE value);
|
|
212
|
+
|
|
213
|
+
// Shorthand creation (shorthand_expander.rl)
|
|
214
|
+
VALUE cataract_create_margin_shorthand(VALUE self, VALUE properties);
|
|
215
|
+
VALUE cataract_create_padding_shorthand(VALUE self, VALUE properties);
|
|
216
|
+
VALUE cataract_create_border_width_shorthand(VALUE self, VALUE properties);
|
|
217
|
+
VALUE cataract_create_border_style_shorthand(VALUE self, VALUE properties);
|
|
218
|
+
VALUE cataract_create_border_color_shorthand(VALUE self, VALUE properties);
|
|
219
|
+
VALUE cataract_create_border_shorthand(VALUE self, VALUE properties);
|
|
220
|
+
VALUE cataract_create_background_shorthand(VALUE self, VALUE properties);
|
|
221
|
+
VALUE cataract_create_font_shorthand(VALUE self, VALUE properties);
|
|
222
|
+
VALUE cataract_create_list_style_shorthand(VALUE self, VALUE properties);
|
|
223
|
+
|
|
224
|
+
// CSS parser implementation (css_parser.c)
|
|
225
|
+
VALUE parse_css_impl(VALUE css_string, int depth, VALUE parent_media_query);
|
|
226
|
+
|
|
227
|
+
// CSS parsing helper functions (css_parser.c)
|
|
228
|
+
VALUE parse_media_query(const char *query_str, long query_len);
|
|
229
|
+
VALUE parse_declarations_string(const char *start, const char *end);
|
|
230
|
+
void capture_declarations_fn(
|
|
231
|
+
const char **decl_start_ptr,
|
|
232
|
+
const char *p,
|
|
233
|
+
VALUE *current_declarations,
|
|
234
|
+
const char *css_string_base
|
|
235
|
+
);
|
|
236
|
+
void finish_rule_fn(
|
|
237
|
+
int inside_at_rule_block,
|
|
238
|
+
VALUE *current_selectors,
|
|
239
|
+
VALUE *current_declarations,
|
|
240
|
+
VALUE *current_media_types,
|
|
241
|
+
VALUE rules_array,
|
|
242
|
+
const char **mark_ptr
|
|
243
|
+
);
|
|
244
|
+
|
|
245
|
+
// Specificity calculator (specificity.c)
|
|
246
|
+
VALUE calculate_specificity(VALUE self, VALUE selector_string);
|
|
247
|
+
|
|
248
|
+
// NOTE: Color conversion moved to separate extension (ext/cataract_color/)
|
|
249
|
+
|
|
250
|
+
#endif // CATARACT_H
|