cataract 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-tidy +30 -0
  3. data/.github/workflows/ci-macos.yml +12 -0
  4. data/.github/workflows/ci.yml +77 -0
  5. data/.github/workflows/test.yml +76 -0
  6. data/.gitignore +45 -0
  7. data/.overcommit.yml +38 -0
  8. data/.rubocop.yml +83 -0
  9. data/BENCHMARKS.md +201 -0
  10. data/CHANGELOG.md +1 -0
  11. data/Gemfile +27 -0
  12. data/LICENSE +21 -0
  13. data/RAGEL_MIGRATION.md +60 -0
  14. data/README.md +292 -0
  15. data/Rakefile +209 -0
  16. data/benchmarks/benchmark_harness.rb +193 -0
  17. data/benchmarks/benchmark_merging.rb +121 -0
  18. data/benchmarks/benchmark_optimization_comparison.rb +168 -0
  19. data/benchmarks/benchmark_parsing.rb +153 -0
  20. data/benchmarks/benchmark_ragel_removal.rb +56 -0
  21. data/benchmarks/benchmark_runner.rb +70 -0
  22. data/benchmarks/benchmark_serialization.rb +180 -0
  23. data/benchmarks/benchmark_shorthand.rb +109 -0
  24. data/benchmarks/benchmark_shorthand_expansion.rb +176 -0
  25. data/benchmarks/benchmark_specificity.rb +124 -0
  26. data/benchmarks/benchmark_string_allocation.rb +151 -0
  27. data/benchmarks/benchmark_stylesheet_to_s.rb +62 -0
  28. data/benchmarks/benchmark_to_s_cached.rb +55 -0
  29. data/benchmarks/benchmark_value_splitter.rb +54 -0
  30. data/benchmarks/benchmark_yjit.rb +158 -0
  31. data/benchmarks/benchmark_yjit_workers.rb +61 -0
  32. data/benchmarks/profile_to_s.rb +23 -0
  33. data/benchmarks/speedup_calculator.rb +83 -0
  34. data/benchmarks/system_metadata.rb +81 -0
  35. data/benchmarks/templates/benchmarks.md.erb +221 -0
  36. data/benchmarks/yjit_tests.rb +141 -0
  37. data/cataract.gemspec +34 -0
  38. data/cliff.toml +92 -0
  39. data/examples/color_conversion_visual_test/color_conversion_test.html +3603 -0
  40. data/examples/color_conversion_visual_test/generate.rb +202 -0
  41. data/examples/color_conversion_visual_test/template.html.erb +259 -0
  42. data/examples/css_analyzer/analyzer.rb +164 -0
  43. data/examples/css_analyzer/analyzers/base.rb +33 -0
  44. data/examples/css_analyzer/analyzers/colors.rb +133 -0
  45. data/examples/css_analyzer/analyzers/important.rb +88 -0
  46. data/examples/css_analyzer/analyzers/properties.rb +61 -0
  47. data/examples/css_analyzer/analyzers/specificity.rb +68 -0
  48. data/examples/css_analyzer/templates/report.html.erb +575 -0
  49. data/examples/css_analyzer.rb +69 -0
  50. data/examples/github_analysis.html +5343 -0
  51. data/ext/cataract/cataract.c +1086 -0
  52. data/ext/cataract/cataract.h +174 -0
  53. data/ext/cataract/css_parser.c +1435 -0
  54. data/ext/cataract/extconf.rb +48 -0
  55. data/ext/cataract/import_scanner.c +174 -0
  56. data/ext/cataract/merge.c +973 -0
  57. data/ext/cataract/shorthand_expander.c +902 -0
  58. data/ext/cataract/specificity.c +213 -0
  59. data/ext/cataract/value_splitter.c +116 -0
  60. data/ext/cataract_color/cataract_color.c +16 -0
  61. data/ext/cataract_color/color_conversion.c +1687 -0
  62. data/ext/cataract_color/color_conversion.h +136 -0
  63. data/ext/cataract_color/color_conversion_lab.c +571 -0
  64. data/ext/cataract_color/color_conversion_named.c +259 -0
  65. data/ext/cataract_color/color_conversion_oklab.c +547 -0
  66. data/ext/cataract_color/extconf.rb +23 -0
  67. data/ext/cataract_old/cataract.c +393 -0
  68. data/ext/cataract_old/cataract.h +250 -0
  69. data/ext/cataract_old/css_parser.c +933 -0
  70. data/ext/cataract_old/extconf.rb +67 -0
  71. data/ext/cataract_old/import_scanner.c +174 -0
  72. data/ext/cataract_old/merge.c +776 -0
  73. data/ext/cataract_old/shorthand_expander.c +902 -0
  74. data/ext/cataract_old/specificity.c +213 -0
  75. data/ext/cataract_old/stylesheet.c +290 -0
  76. data/ext/cataract_old/value_splitter.c +116 -0
  77. data/lib/cataract/at_rule.rb +97 -0
  78. data/lib/cataract/color_conversion.rb +18 -0
  79. data/lib/cataract/declarations.rb +332 -0
  80. data/lib/cataract/import_resolver.rb +210 -0
  81. data/lib/cataract/rule.rb +131 -0
  82. data/lib/cataract/stylesheet.rb +716 -0
  83. data/lib/cataract/stylesheet_scope.rb +257 -0
  84. data/lib/cataract/version.rb +5 -0
  85. data/lib/cataract.rb +107 -0
  86. data/lib/tasks/gem.rake +158 -0
  87. data/scripts/fuzzer/run.rb +828 -0
  88. data/scripts/fuzzer/worker.rb +99 -0
  89. data/scripts/generate_benchmarks_md.rb +155 -0
  90. metadata +135 -0
@@ -0,0 +1,213 @@
1
+ /*
2
+ * specificity.c - CSS selector specificity calculator
3
+ *
4
+ * Calculates CSS selector specificity according to W3C spec:
5
+ * https://www.w3.org/TR/selectors/#specificity
6
+ *
7
+ * Specificity = a*100 + b*10 + c*1 where:
8
+ * a = count of ID selectors (#id)
9
+ * b = count of class selectors (.class), attributes ([attr]), and pseudo-classes (:hover)
10
+ * c = count of type selectors (div) and pseudo-elements (::before)
11
+ *
12
+ * Special handling:
13
+ * - :not() doesn't count itself, but its content does
14
+ * - Legacy pseudo-elements with single colon (:before) count as pseudo-elements
15
+ * - Universal selector (*) has zero specificity
16
+ */
17
+
18
+ #include "cataract.h"
19
+ #include <string.h>
20
+
21
+ // Calculate specificity for a CSS selector string
22
+ VALUE calculate_specificity(VALUE self, VALUE selector_string) {
23
+ Check_Type(selector_string, T_STRING);
24
+
25
+ const char *p = RSTRING_PTR(selector_string);
26
+ const char *pe = p + RSTRING_LEN(selector_string);
27
+
28
+ // Counters for specificity components
29
+ int id_count = 0;
30
+ int class_count = 0;
31
+ int attr_count = 0;
32
+ int pseudo_class_count = 0;
33
+ int pseudo_element_count = 0;
34
+ int element_count = 0;
35
+
36
+ while (p < pe) {
37
+ char c = *p;
38
+
39
+ // Skip whitespace and combinators
40
+ if (IS_WHITESPACE(c) || c == '>' || c == '+' || c == '~' || c == ',') {
41
+ p++;
42
+ continue;
43
+ }
44
+
45
+ // ID selector: #id
46
+ if (c == '#') {
47
+ id_count++;
48
+ p++;
49
+ // Skip the identifier
50
+ while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
51
+ (*p >= '0' && *p <= '9') || *p == '-' || *p == '_')) {
52
+ p++;
53
+ }
54
+ continue;
55
+ }
56
+
57
+ // Class selector: .class
58
+ if (c == '.') {
59
+ class_count++;
60
+ p++;
61
+ // Skip the identifier
62
+ while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
63
+ (*p >= '0' && *p <= '9') || *p == '-' || *p == '_')) {
64
+ p++;
65
+ }
66
+ continue;
67
+ }
68
+
69
+ // Attribute selector: [attr] or [attr=value]
70
+ if (c == '[') {
71
+ attr_count++;
72
+ p++;
73
+ // Skip to closing bracket
74
+ int bracket_depth = 1;
75
+ while (p < pe && bracket_depth > 0) {
76
+ if (*p == '[') bracket_depth++;
77
+ else if (*p == ']') bracket_depth--;
78
+ p++;
79
+ }
80
+ continue;
81
+ }
82
+
83
+ // Pseudo-element (::) or pseudo-class (:)
84
+ if (c == ':') {
85
+ p++;
86
+ int is_pseudo_element = 0;
87
+
88
+ // Check for double colon (::)
89
+ if (p < pe && *p == ':') {
90
+ is_pseudo_element = 1;
91
+ p++;
92
+ }
93
+
94
+ // Extract pseudo name
95
+ const char *pseudo_start = p;
96
+ while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
97
+ (*p >= '0' && *p <= '9') || *p == '-')) {
98
+ p++;
99
+ }
100
+ long pseudo_len = p - pseudo_start;
101
+
102
+ // Check for legacy pseudo-elements (single colon but should be double)
103
+ // :before, :after, :first-line, :first-letter, :selection
104
+ int is_legacy_pseudo_element = 0;
105
+ if (!is_pseudo_element && pseudo_len > 0) {
106
+ is_legacy_pseudo_element =
107
+ (pseudo_len == 6 && strncmp(pseudo_start, "before", 6) == 0) ||
108
+ (pseudo_len == 5 && strncmp(pseudo_start, "after", 5) == 0) ||
109
+ (pseudo_len == 10 && strncmp(pseudo_start, "first-line", 10) == 0) ||
110
+ (pseudo_len == 12 && strncmp(pseudo_start, "first-letter", 12) == 0) ||
111
+ (pseudo_len == 9 && strncmp(pseudo_start, "selection", 9) == 0);
112
+ }
113
+
114
+ // Check for :not() - it doesn't count itself, but its content does
115
+ int is_not = (pseudo_len == 3 && strncmp(pseudo_start, "not", 3) == 0);
116
+
117
+ // Skip function arguments if present
118
+ if (p < pe && *p == '(') {
119
+ p++;
120
+ int paren_depth = 1;
121
+
122
+ // If it's :not(), we need to calculate specificity of the content
123
+ if (is_not) {
124
+ const char *not_content_start = p;
125
+
126
+ // Find closing paren
127
+ while (p < pe && paren_depth > 0) {
128
+ if (*p == '(') paren_depth++;
129
+ else if (*p == ')') paren_depth--;
130
+ if (paren_depth > 0) p++;
131
+ }
132
+
133
+ const char *not_content_end = p;
134
+ long not_content_len = not_content_end - not_content_start;
135
+
136
+ // Recursively calculate specificity of :not() content
137
+ if (not_content_len > 0) {
138
+ VALUE not_content = rb_str_new(not_content_start, not_content_len);
139
+ VALUE not_spec = calculate_specificity(self, not_content);
140
+ int not_specificity = NUM2INT(not_spec);
141
+
142
+ // Add :not() content's specificity to our counts
143
+ int additional_a = not_specificity / 100;
144
+ int additional_b = (not_specificity % 100) / 10;
145
+ int additional_c = not_specificity % 10;
146
+
147
+ id_count += additional_a;
148
+ class_count += additional_b;
149
+ element_count += additional_c;
150
+
151
+ RB_GC_GUARD(not_content);
152
+ RB_GC_GUARD(not_spec);
153
+ }
154
+
155
+ p++; // Skip closing paren
156
+ } else {
157
+ // Skip other function arguments
158
+ while (p < pe && paren_depth > 0) {
159
+ if (*p == '(') paren_depth++;
160
+ else if (*p == ')') paren_depth--;
161
+ p++;
162
+ }
163
+
164
+ // Count the pseudo-class/element
165
+ if (is_pseudo_element || is_legacy_pseudo_element) {
166
+ pseudo_element_count++;
167
+ } else {
168
+ pseudo_class_count++;
169
+ }
170
+ }
171
+ } else {
172
+ // No function arguments - count the pseudo-class/element
173
+ if (is_not) {
174
+ // :not without parens is invalid, but don't count it
175
+ } else if (is_pseudo_element || is_legacy_pseudo_element) {
176
+ pseudo_element_count++;
177
+ } else {
178
+ pseudo_class_count++;
179
+ }
180
+ }
181
+ continue;
182
+ }
183
+
184
+ // Universal selector: *
185
+ if (c == '*') {
186
+ // Universal selector has specificity 0, don't count
187
+ p++;
188
+ continue;
189
+ }
190
+
191
+ // Type selector (element name): div, span, etc.
192
+ if ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')) {
193
+ element_count++;
194
+ // Skip the identifier
195
+ while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
196
+ (*p >= '0' && *p <= '9') || *p == '-' || *p == '_')) {
197
+ p++;
198
+ }
199
+ continue;
200
+ }
201
+
202
+ // Unknown character, skip it
203
+ p++;
204
+ }
205
+
206
+ // Calculate specificity using W3C formula:
207
+ // IDs * 100 + (classes + attributes + pseudo-classes) * 10 + (elements + pseudo-elements) * 1
208
+ int specificity = (id_count * 100) +
209
+ ((class_count + attr_count + pseudo_class_count) * 10) +
210
+ ((element_count + pseudo_element_count) * 1);
211
+
212
+ return INT2NUM(specificity);
213
+ }
@@ -0,0 +1,116 @@
1
+ /*
2
+ * value_splitter.c - CSS value splitting utility
3
+ *
4
+ * Purpose: Split CSS declaration values on whitespace while preserving content
5
+ * inside functions and quoted strings.
6
+ *
7
+ * Examples:
8
+ * "1px 2px 3px 4px" => ["1px", "2px", "3px", "4px"]
9
+ * "10px calc(100% - 20px)" => ["10px", "calc(100% - 20px)"]
10
+ * "rgb(255, 0, 0) blue" => ["rgb(255, 0, 0)", "blue"]
11
+ * "'Helvetica Neue', sans-serif" => ["'Helvetica Neue',", "sans-serif"]
12
+ */
13
+
14
+ #include "cataract.h"
15
+
16
+ /*
17
+ * Split a CSS declaration value on whitespace while preserving content
18
+ * inside functions and quoted strings.
19
+ *
20
+ * Algorithm:
21
+ * - Track parenthesis depth for functions like calc(), rgb()
22
+ * - Track quote state for strings like 'Helvetica Neue'
23
+ * - Split on whitespace only when depth=0 and not in quotes
24
+ *
25
+ * @param value [String] Pre-parsed CSS declaration value (assumed well-formed)
26
+ * @return [Array<String>] Array of value tokens
27
+ */
28
+ VALUE cataract_split_value(VALUE self, VALUE value) {
29
+ Check_Type(value, T_STRING);
30
+ const char *str = RSTRING_PTR(value);
31
+ long len = RSTRING_LEN(value);
32
+
33
+ // Sanity check: reject unreasonably long values (DoS protection)
34
+ if (len > 65536) {
35
+ rb_raise(rb_eArgError, "CSS value too long (max 64KB)");
36
+ }
37
+
38
+ // Result array
39
+ VALUE result = rb_ary_new();
40
+
41
+ // State tracking
42
+ int paren_depth = 0;
43
+ int in_quotes = 0;
44
+ char quote_char = '\0';
45
+ const char *token_start = NULL;
46
+ const char *p = str;
47
+ const char *pe = str + len;
48
+
49
+ while (p < pe) {
50
+ char c = *p;
51
+
52
+ // Handle quotes
53
+ if ((c == '"' || c == '\'') && !in_quotes) {
54
+ // Opening quote
55
+ in_quotes = 1;
56
+ quote_char = c;
57
+ if (token_start == NULL) token_start = p;
58
+ p++;
59
+ continue;
60
+ }
61
+
62
+ if (in_quotes && c == quote_char) {
63
+ // Closing quote
64
+ in_quotes = 0;
65
+ p++;
66
+ continue;
67
+ }
68
+
69
+ // Handle parentheses (only when not in quotes)
70
+ if (!in_quotes) {
71
+ if (c == '(') {
72
+ paren_depth++;
73
+ if (token_start == NULL) token_start = p;
74
+ p++;
75
+ continue;
76
+ }
77
+
78
+ if (c == ')') {
79
+ paren_depth--;
80
+ p++;
81
+ continue;
82
+ }
83
+
84
+ // Handle whitespace (delimiter when depth=0 and not quoted)
85
+ if (IS_WHITESPACE(c)) {
86
+ if (paren_depth == 0 && !in_quotes) {
87
+ // Emit token if we have one
88
+ if (token_start != NULL) {
89
+ size_t token_len = p - token_start;
90
+ VALUE token = rb_str_new(token_start, token_len);
91
+ rb_ary_push(result, token);
92
+ token_start = NULL;
93
+ }
94
+ p++;
95
+ continue;
96
+ }
97
+ // else: whitespace inside function/quotes, part of token
98
+ }
99
+ }
100
+
101
+ // Regular character - mark start if needed
102
+ if (token_start == NULL) {
103
+ token_start = p;
104
+ }
105
+ p++;
106
+ }
107
+
108
+ // Emit final token if any
109
+ if (token_start != NULL) {
110
+ size_t token_len = pe - token_start;
111
+ VALUE token = rb_str_new(token_start, token_len);
112
+ rb_ary_push(result, token);
113
+ }
114
+
115
+ return result;
116
+ }
@@ -0,0 +1,16 @@
1
+ // cataract_color.c - Color conversion extension entry point
2
+ // This is a separate extension loaded on-demand via require 'cataract/color_conversion'
3
+
4
+ #include <ruby.h>
5
+
6
+ // Forward declaration from color_conversion.c
7
+ void Init_color_conversion(VALUE mCataract);
8
+
9
+ // Extension initialization - called when the .so is loaded
10
+ void Init_cataract_color(void) {
11
+ // Get the Cataract module (must already be loaded by main extension)
12
+ VALUE mCataract = rb_const_get(rb_cObject, rb_intern("Cataract"));
13
+
14
+ // Initialize color conversion methods on Cataract::Stylesheet
15
+ Init_color_conversion(mCataract);
16
+ }