cataract 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-tidy +30 -0
- data/.github/workflows/ci-macos.yml +12 -0
- data/.github/workflows/ci.yml +77 -0
- data/.github/workflows/test.yml +76 -0
- data/.gitignore +45 -0
- data/.overcommit.yml +38 -0
- data/.rubocop.yml +83 -0
- data/BENCHMARKS.md +201 -0
- data/CHANGELOG.md +1 -0
- data/Gemfile +27 -0
- data/LICENSE +21 -0
- data/RAGEL_MIGRATION.md +60 -0
- data/README.md +292 -0
- data/Rakefile +209 -0
- data/benchmarks/benchmark_harness.rb +193 -0
- data/benchmarks/benchmark_merging.rb +121 -0
- data/benchmarks/benchmark_optimization_comparison.rb +168 -0
- data/benchmarks/benchmark_parsing.rb +153 -0
- data/benchmarks/benchmark_ragel_removal.rb +56 -0
- data/benchmarks/benchmark_runner.rb +70 -0
- data/benchmarks/benchmark_serialization.rb +180 -0
- data/benchmarks/benchmark_shorthand.rb +109 -0
- data/benchmarks/benchmark_shorthand_expansion.rb +176 -0
- data/benchmarks/benchmark_specificity.rb +124 -0
- data/benchmarks/benchmark_string_allocation.rb +151 -0
- data/benchmarks/benchmark_stylesheet_to_s.rb +62 -0
- data/benchmarks/benchmark_to_s_cached.rb +55 -0
- data/benchmarks/benchmark_value_splitter.rb +54 -0
- data/benchmarks/benchmark_yjit.rb +158 -0
- data/benchmarks/benchmark_yjit_workers.rb +61 -0
- data/benchmarks/profile_to_s.rb +23 -0
- data/benchmarks/speedup_calculator.rb +83 -0
- data/benchmarks/system_metadata.rb +81 -0
- data/benchmarks/templates/benchmarks.md.erb +221 -0
- data/benchmarks/yjit_tests.rb +141 -0
- data/cataract.gemspec +34 -0
- data/cliff.toml +92 -0
- data/examples/color_conversion_visual_test/color_conversion_test.html +3603 -0
- data/examples/color_conversion_visual_test/generate.rb +202 -0
- data/examples/color_conversion_visual_test/template.html.erb +259 -0
- data/examples/css_analyzer/analyzer.rb +164 -0
- data/examples/css_analyzer/analyzers/base.rb +33 -0
- data/examples/css_analyzer/analyzers/colors.rb +133 -0
- data/examples/css_analyzer/analyzers/important.rb +88 -0
- data/examples/css_analyzer/analyzers/properties.rb +61 -0
- data/examples/css_analyzer/analyzers/specificity.rb +68 -0
- data/examples/css_analyzer/templates/report.html.erb +575 -0
- data/examples/css_analyzer.rb +69 -0
- data/examples/github_analysis.html +5343 -0
- data/ext/cataract/cataract.c +1086 -0
- data/ext/cataract/cataract.h +174 -0
- data/ext/cataract/css_parser.c +1435 -0
- data/ext/cataract/extconf.rb +48 -0
- data/ext/cataract/import_scanner.c +174 -0
- data/ext/cataract/merge.c +973 -0
- data/ext/cataract/shorthand_expander.c +902 -0
- data/ext/cataract/specificity.c +213 -0
- data/ext/cataract/value_splitter.c +116 -0
- data/ext/cataract_color/cataract_color.c +16 -0
- data/ext/cataract_color/color_conversion.c +1687 -0
- data/ext/cataract_color/color_conversion.h +136 -0
- data/ext/cataract_color/color_conversion_lab.c +571 -0
- data/ext/cataract_color/color_conversion_named.c +259 -0
- data/ext/cataract_color/color_conversion_oklab.c +547 -0
- data/ext/cataract_color/extconf.rb +23 -0
- data/ext/cataract_old/cataract.c +393 -0
- data/ext/cataract_old/cataract.h +250 -0
- data/ext/cataract_old/css_parser.c +933 -0
- data/ext/cataract_old/extconf.rb +67 -0
- data/ext/cataract_old/import_scanner.c +174 -0
- data/ext/cataract_old/merge.c +776 -0
- data/ext/cataract_old/shorthand_expander.c +902 -0
- data/ext/cataract_old/specificity.c +213 -0
- data/ext/cataract_old/stylesheet.c +290 -0
- data/ext/cataract_old/value_splitter.c +116 -0
- data/lib/cataract/at_rule.rb +97 -0
- data/lib/cataract/color_conversion.rb +18 -0
- data/lib/cataract/declarations.rb +332 -0
- data/lib/cataract/import_resolver.rb +210 -0
- data/lib/cataract/rule.rb +131 -0
- data/lib/cataract/stylesheet.rb +716 -0
- data/lib/cataract/stylesheet_scope.rb +257 -0
- data/lib/cataract/version.rb +5 -0
- data/lib/cataract.rb +107 -0
- data/lib/tasks/gem.rake +158 -0
- data/scripts/fuzzer/run.rb +828 -0
- data/scripts/fuzzer/worker.rb +99 -0
- data/scripts/generate_benchmarks_md.rb +155 -0
- metadata +135 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* specificity.c - CSS selector specificity calculator
|
|
3
|
+
*
|
|
4
|
+
* Calculates CSS selector specificity according to W3C spec:
|
|
5
|
+
* https://www.w3.org/TR/selectors/#specificity
|
|
6
|
+
*
|
|
7
|
+
* Specificity = a*100 + b*10 + c*1 where:
|
|
8
|
+
* a = count of ID selectors (#id)
|
|
9
|
+
* b = count of class selectors (.class), attributes ([attr]), and pseudo-classes (:hover)
|
|
10
|
+
* c = count of type selectors (div) and pseudo-elements (::before)
|
|
11
|
+
*
|
|
12
|
+
* Special handling:
|
|
13
|
+
* - :not() doesn't count itself, but its content does
|
|
14
|
+
* - Legacy pseudo-elements with single colon (:before) count as pseudo-elements
|
|
15
|
+
* - Universal selector (*) has zero specificity
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#include "cataract.h"
|
|
19
|
+
#include <string.h>
|
|
20
|
+
|
|
21
|
+
// Calculate specificity for a CSS selector string
|
|
22
|
+
VALUE calculate_specificity(VALUE self, VALUE selector_string) {
|
|
23
|
+
Check_Type(selector_string, T_STRING);
|
|
24
|
+
|
|
25
|
+
const char *p = RSTRING_PTR(selector_string);
|
|
26
|
+
const char *pe = p + RSTRING_LEN(selector_string);
|
|
27
|
+
|
|
28
|
+
// Counters for specificity components
|
|
29
|
+
int id_count = 0;
|
|
30
|
+
int class_count = 0;
|
|
31
|
+
int attr_count = 0;
|
|
32
|
+
int pseudo_class_count = 0;
|
|
33
|
+
int pseudo_element_count = 0;
|
|
34
|
+
int element_count = 0;
|
|
35
|
+
|
|
36
|
+
while (p < pe) {
|
|
37
|
+
char c = *p;
|
|
38
|
+
|
|
39
|
+
// Skip whitespace and combinators
|
|
40
|
+
if (IS_WHITESPACE(c) || c == '>' || c == '+' || c == '~' || c == ',') {
|
|
41
|
+
p++;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ID selector: #id
|
|
46
|
+
if (c == '#') {
|
|
47
|
+
id_count++;
|
|
48
|
+
p++;
|
|
49
|
+
// Skip the identifier
|
|
50
|
+
while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
|
|
51
|
+
(*p >= '0' && *p <= '9') || *p == '-' || *p == '_')) {
|
|
52
|
+
p++;
|
|
53
|
+
}
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Class selector: .class
|
|
58
|
+
if (c == '.') {
|
|
59
|
+
class_count++;
|
|
60
|
+
p++;
|
|
61
|
+
// Skip the identifier
|
|
62
|
+
while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
|
|
63
|
+
(*p >= '0' && *p <= '9') || *p == '-' || *p == '_')) {
|
|
64
|
+
p++;
|
|
65
|
+
}
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Attribute selector: [attr] or [attr=value]
|
|
70
|
+
if (c == '[') {
|
|
71
|
+
attr_count++;
|
|
72
|
+
p++;
|
|
73
|
+
// Skip to closing bracket
|
|
74
|
+
int bracket_depth = 1;
|
|
75
|
+
while (p < pe && bracket_depth > 0) {
|
|
76
|
+
if (*p == '[') bracket_depth++;
|
|
77
|
+
else if (*p == ']') bracket_depth--;
|
|
78
|
+
p++;
|
|
79
|
+
}
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Pseudo-element (::) or pseudo-class (:)
|
|
84
|
+
if (c == ':') {
|
|
85
|
+
p++;
|
|
86
|
+
int is_pseudo_element = 0;
|
|
87
|
+
|
|
88
|
+
// Check for double colon (::)
|
|
89
|
+
if (p < pe && *p == ':') {
|
|
90
|
+
is_pseudo_element = 1;
|
|
91
|
+
p++;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Extract pseudo name
|
|
95
|
+
const char *pseudo_start = p;
|
|
96
|
+
while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
|
|
97
|
+
(*p >= '0' && *p <= '9') || *p == '-')) {
|
|
98
|
+
p++;
|
|
99
|
+
}
|
|
100
|
+
long pseudo_len = p - pseudo_start;
|
|
101
|
+
|
|
102
|
+
// Check for legacy pseudo-elements (single colon but should be double)
|
|
103
|
+
// :before, :after, :first-line, :first-letter, :selection
|
|
104
|
+
int is_legacy_pseudo_element = 0;
|
|
105
|
+
if (!is_pseudo_element && pseudo_len > 0) {
|
|
106
|
+
is_legacy_pseudo_element =
|
|
107
|
+
(pseudo_len == 6 && strncmp(pseudo_start, "before", 6) == 0) ||
|
|
108
|
+
(pseudo_len == 5 && strncmp(pseudo_start, "after", 5) == 0) ||
|
|
109
|
+
(pseudo_len == 10 && strncmp(pseudo_start, "first-line", 10) == 0) ||
|
|
110
|
+
(pseudo_len == 12 && strncmp(pseudo_start, "first-letter", 12) == 0) ||
|
|
111
|
+
(pseudo_len == 9 && strncmp(pseudo_start, "selection", 9) == 0);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Check for :not() - it doesn't count itself, but its content does
|
|
115
|
+
int is_not = (pseudo_len == 3 && strncmp(pseudo_start, "not", 3) == 0);
|
|
116
|
+
|
|
117
|
+
// Skip function arguments if present
|
|
118
|
+
if (p < pe && *p == '(') {
|
|
119
|
+
p++;
|
|
120
|
+
int paren_depth = 1;
|
|
121
|
+
|
|
122
|
+
// If it's :not(), we need to calculate specificity of the content
|
|
123
|
+
if (is_not) {
|
|
124
|
+
const char *not_content_start = p;
|
|
125
|
+
|
|
126
|
+
// Find closing paren
|
|
127
|
+
while (p < pe && paren_depth > 0) {
|
|
128
|
+
if (*p == '(') paren_depth++;
|
|
129
|
+
else if (*p == ')') paren_depth--;
|
|
130
|
+
if (paren_depth > 0) p++;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const char *not_content_end = p;
|
|
134
|
+
long not_content_len = not_content_end - not_content_start;
|
|
135
|
+
|
|
136
|
+
// Recursively calculate specificity of :not() content
|
|
137
|
+
if (not_content_len > 0) {
|
|
138
|
+
VALUE not_content = rb_str_new(not_content_start, not_content_len);
|
|
139
|
+
VALUE not_spec = calculate_specificity(self, not_content);
|
|
140
|
+
int not_specificity = NUM2INT(not_spec);
|
|
141
|
+
|
|
142
|
+
// Add :not() content's specificity to our counts
|
|
143
|
+
int additional_a = not_specificity / 100;
|
|
144
|
+
int additional_b = (not_specificity % 100) / 10;
|
|
145
|
+
int additional_c = not_specificity % 10;
|
|
146
|
+
|
|
147
|
+
id_count += additional_a;
|
|
148
|
+
class_count += additional_b;
|
|
149
|
+
element_count += additional_c;
|
|
150
|
+
|
|
151
|
+
RB_GC_GUARD(not_content);
|
|
152
|
+
RB_GC_GUARD(not_spec);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
p++; // Skip closing paren
|
|
156
|
+
} else {
|
|
157
|
+
// Skip other function arguments
|
|
158
|
+
while (p < pe && paren_depth > 0) {
|
|
159
|
+
if (*p == '(') paren_depth++;
|
|
160
|
+
else if (*p == ')') paren_depth--;
|
|
161
|
+
p++;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Count the pseudo-class/element
|
|
165
|
+
if (is_pseudo_element || is_legacy_pseudo_element) {
|
|
166
|
+
pseudo_element_count++;
|
|
167
|
+
} else {
|
|
168
|
+
pseudo_class_count++;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
} else {
|
|
172
|
+
// No function arguments - count the pseudo-class/element
|
|
173
|
+
if (is_not) {
|
|
174
|
+
// :not without parens is invalid, but don't count it
|
|
175
|
+
} else if (is_pseudo_element || is_legacy_pseudo_element) {
|
|
176
|
+
pseudo_element_count++;
|
|
177
|
+
} else {
|
|
178
|
+
pseudo_class_count++;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Universal selector: *
|
|
185
|
+
if (c == '*') {
|
|
186
|
+
// Universal selector has specificity 0, don't count
|
|
187
|
+
p++;
|
|
188
|
+
continue;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Type selector (element name): div, span, etc.
|
|
192
|
+
if ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')) {
|
|
193
|
+
element_count++;
|
|
194
|
+
// Skip the identifier
|
|
195
|
+
while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
|
|
196
|
+
(*p >= '0' && *p <= '9') || *p == '-' || *p == '_')) {
|
|
197
|
+
p++;
|
|
198
|
+
}
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Unknown character, skip it
|
|
203
|
+
p++;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Calculate specificity using W3C formula:
|
|
207
|
+
// IDs * 100 + (classes + attributes + pseudo-classes) * 10 + (elements + pseudo-elements) * 1
|
|
208
|
+
int specificity = (id_count * 100) +
|
|
209
|
+
((class_count + attr_count + pseudo_class_count) * 10) +
|
|
210
|
+
((element_count + pseudo_element_count) * 1);
|
|
211
|
+
|
|
212
|
+
return INT2NUM(specificity);
|
|
213
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* value_splitter.c - CSS value splitting utility
|
|
3
|
+
*
|
|
4
|
+
* Purpose: Split CSS declaration values on whitespace while preserving content
|
|
5
|
+
* inside functions and quoted strings.
|
|
6
|
+
*
|
|
7
|
+
* Examples:
|
|
8
|
+
* "1px 2px 3px 4px" => ["1px", "2px", "3px", "4px"]
|
|
9
|
+
* "10px calc(100% - 20px)" => ["10px", "calc(100% - 20px)"]
|
|
10
|
+
* "rgb(255, 0, 0) blue" => ["rgb(255, 0, 0)", "blue"]
|
|
11
|
+
* "'Helvetica Neue', sans-serif" => ["'Helvetica Neue',", "sans-serif"]
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
#include "cataract.h"
|
|
15
|
+
|
|
16
|
+
/*
|
|
17
|
+
* Split a CSS declaration value on whitespace while preserving content
|
|
18
|
+
* inside functions and quoted strings.
|
|
19
|
+
*
|
|
20
|
+
* Algorithm:
|
|
21
|
+
* - Track parenthesis depth for functions like calc(), rgb()
|
|
22
|
+
* - Track quote state for strings like 'Helvetica Neue'
|
|
23
|
+
* - Split on whitespace only when depth=0 and not in quotes
|
|
24
|
+
*
|
|
25
|
+
* @param value [String] Pre-parsed CSS declaration value (assumed well-formed)
|
|
26
|
+
* @return [Array<String>] Array of value tokens
|
|
27
|
+
*/
|
|
28
|
+
VALUE cataract_split_value(VALUE self, VALUE value) {
|
|
29
|
+
Check_Type(value, T_STRING);
|
|
30
|
+
const char *str = RSTRING_PTR(value);
|
|
31
|
+
long len = RSTRING_LEN(value);
|
|
32
|
+
|
|
33
|
+
// Sanity check: reject unreasonably long values (DoS protection)
|
|
34
|
+
if (len > 65536) {
|
|
35
|
+
rb_raise(rb_eArgError, "CSS value too long (max 64KB)");
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Result array
|
|
39
|
+
VALUE result = rb_ary_new();
|
|
40
|
+
|
|
41
|
+
// State tracking
|
|
42
|
+
int paren_depth = 0;
|
|
43
|
+
int in_quotes = 0;
|
|
44
|
+
char quote_char = '\0';
|
|
45
|
+
const char *token_start = NULL;
|
|
46
|
+
const char *p = str;
|
|
47
|
+
const char *pe = str + len;
|
|
48
|
+
|
|
49
|
+
while (p < pe) {
|
|
50
|
+
char c = *p;
|
|
51
|
+
|
|
52
|
+
// Handle quotes
|
|
53
|
+
if ((c == '"' || c == '\'') && !in_quotes) {
|
|
54
|
+
// Opening quote
|
|
55
|
+
in_quotes = 1;
|
|
56
|
+
quote_char = c;
|
|
57
|
+
if (token_start == NULL) token_start = p;
|
|
58
|
+
p++;
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (in_quotes && c == quote_char) {
|
|
63
|
+
// Closing quote
|
|
64
|
+
in_quotes = 0;
|
|
65
|
+
p++;
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Handle parentheses (only when not in quotes)
|
|
70
|
+
if (!in_quotes) {
|
|
71
|
+
if (c == '(') {
|
|
72
|
+
paren_depth++;
|
|
73
|
+
if (token_start == NULL) token_start = p;
|
|
74
|
+
p++;
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (c == ')') {
|
|
79
|
+
paren_depth--;
|
|
80
|
+
p++;
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Handle whitespace (delimiter when depth=0 and not quoted)
|
|
85
|
+
if (IS_WHITESPACE(c)) {
|
|
86
|
+
if (paren_depth == 0 && !in_quotes) {
|
|
87
|
+
// Emit token if we have one
|
|
88
|
+
if (token_start != NULL) {
|
|
89
|
+
size_t token_len = p - token_start;
|
|
90
|
+
VALUE token = rb_str_new(token_start, token_len);
|
|
91
|
+
rb_ary_push(result, token);
|
|
92
|
+
token_start = NULL;
|
|
93
|
+
}
|
|
94
|
+
p++;
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
// else: whitespace inside function/quotes, part of token
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Regular character - mark start if needed
|
|
102
|
+
if (token_start == NULL) {
|
|
103
|
+
token_start = p;
|
|
104
|
+
}
|
|
105
|
+
p++;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Emit final token if any
|
|
109
|
+
if (token_start != NULL) {
|
|
110
|
+
size_t token_len = pe - token_start;
|
|
111
|
+
VALUE token = rb_str_new(token_start, token_len);
|
|
112
|
+
rb_ary_push(result, token);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return result;
|
|
116
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
// cataract_color.c - Color conversion extension entry point
|
|
2
|
+
// This is a separate extension loaded on-demand via require 'cataract/color_conversion'
|
|
3
|
+
|
|
4
|
+
#include <ruby.h>
|
|
5
|
+
|
|
6
|
+
// Forward declaration from color_conversion.c
|
|
7
|
+
void Init_color_conversion(VALUE mCataract);
|
|
8
|
+
|
|
9
|
+
// Extension initialization - called when the .so is loaded
|
|
10
|
+
void Init_cataract_color(void) {
|
|
11
|
+
// Get the Cataract module (must already be loaded by main extension)
|
|
12
|
+
VALUE mCataract = rb_const_get(rb_cObject, rb_intern("Cataract"));
|
|
13
|
+
|
|
14
|
+
// Initialize color conversion methods on Cataract::Stylesheet
|
|
15
|
+
Init_color_conversion(mCataract);
|
|
16
|
+
}
|