cataract 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-tidy +30 -0
  3. data/.github/workflows/ci-macos.yml +12 -0
  4. data/.github/workflows/ci.yml +77 -0
  5. data/.github/workflows/test.yml +76 -0
  6. data/.gitignore +45 -0
  7. data/.overcommit.yml +38 -0
  8. data/.rubocop.yml +83 -0
  9. data/BENCHMARKS.md +201 -0
  10. data/CHANGELOG.md +1 -0
  11. data/Gemfile +27 -0
  12. data/LICENSE +21 -0
  13. data/RAGEL_MIGRATION.md +60 -0
  14. data/README.md +292 -0
  15. data/Rakefile +209 -0
  16. data/benchmarks/benchmark_harness.rb +193 -0
  17. data/benchmarks/benchmark_merging.rb +121 -0
  18. data/benchmarks/benchmark_optimization_comparison.rb +168 -0
  19. data/benchmarks/benchmark_parsing.rb +153 -0
  20. data/benchmarks/benchmark_ragel_removal.rb +56 -0
  21. data/benchmarks/benchmark_runner.rb +70 -0
  22. data/benchmarks/benchmark_serialization.rb +180 -0
  23. data/benchmarks/benchmark_shorthand.rb +109 -0
  24. data/benchmarks/benchmark_shorthand_expansion.rb +176 -0
  25. data/benchmarks/benchmark_specificity.rb +124 -0
  26. data/benchmarks/benchmark_string_allocation.rb +151 -0
  27. data/benchmarks/benchmark_stylesheet_to_s.rb +62 -0
  28. data/benchmarks/benchmark_to_s_cached.rb +55 -0
  29. data/benchmarks/benchmark_value_splitter.rb +54 -0
  30. data/benchmarks/benchmark_yjit.rb +158 -0
  31. data/benchmarks/benchmark_yjit_workers.rb +61 -0
  32. data/benchmarks/profile_to_s.rb +23 -0
  33. data/benchmarks/speedup_calculator.rb +83 -0
  34. data/benchmarks/system_metadata.rb +81 -0
  35. data/benchmarks/templates/benchmarks.md.erb +221 -0
  36. data/benchmarks/yjit_tests.rb +141 -0
  37. data/cataract.gemspec +34 -0
  38. data/cliff.toml +92 -0
  39. data/examples/color_conversion_visual_test/color_conversion_test.html +3603 -0
  40. data/examples/color_conversion_visual_test/generate.rb +202 -0
  41. data/examples/color_conversion_visual_test/template.html.erb +259 -0
  42. data/examples/css_analyzer/analyzer.rb +164 -0
  43. data/examples/css_analyzer/analyzers/base.rb +33 -0
  44. data/examples/css_analyzer/analyzers/colors.rb +133 -0
  45. data/examples/css_analyzer/analyzers/important.rb +88 -0
  46. data/examples/css_analyzer/analyzers/properties.rb +61 -0
  47. data/examples/css_analyzer/analyzers/specificity.rb +68 -0
  48. data/examples/css_analyzer/templates/report.html.erb +575 -0
  49. data/examples/css_analyzer.rb +69 -0
  50. data/examples/github_analysis.html +5343 -0
  51. data/ext/cataract/cataract.c +1086 -0
  52. data/ext/cataract/cataract.h +174 -0
  53. data/ext/cataract/css_parser.c +1435 -0
  54. data/ext/cataract/extconf.rb +48 -0
  55. data/ext/cataract/import_scanner.c +174 -0
  56. data/ext/cataract/merge.c +973 -0
  57. data/ext/cataract/shorthand_expander.c +902 -0
  58. data/ext/cataract/specificity.c +213 -0
  59. data/ext/cataract/value_splitter.c +116 -0
  60. data/ext/cataract_color/cataract_color.c +16 -0
  61. data/ext/cataract_color/color_conversion.c +1687 -0
  62. data/ext/cataract_color/color_conversion.h +136 -0
  63. data/ext/cataract_color/color_conversion_lab.c +571 -0
  64. data/ext/cataract_color/color_conversion_named.c +259 -0
  65. data/ext/cataract_color/color_conversion_oklab.c +547 -0
  66. data/ext/cataract_color/extconf.rb +23 -0
  67. data/ext/cataract_old/cataract.c +393 -0
  68. data/ext/cataract_old/cataract.h +250 -0
  69. data/ext/cataract_old/css_parser.c +933 -0
  70. data/ext/cataract_old/extconf.rb +67 -0
  71. data/ext/cataract_old/import_scanner.c +174 -0
  72. data/ext/cataract_old/merge.c +776 -0
  73. data/ext/cataract_old/shorthand_expander.c +902 -0
  74. data/ext/cataract_old/specificity.c +213 -0
  75. data/ext/cataract_old/stylesheet.c +290 -0
  76. data/ext/cataract_old/value_splitter.c +116 -0
  77. data/lib/cataract/at_rule.rb +97 -0
  78. data/lib/cataract/color_conversion.rb +18 -0
  79. data/lib/cataract/declarations.rb +332 -0
  80. data/lib/cataract/import_resolver.rb +210 -0
  81. data/lib/cataract/rule.rb +131 -0
  82. data/lib/cataract/stylesheet.rb +716 -0
  83. data/lib/cataract/stylesheet_scope.rb +257 -0
  84. data/lib/cataract/version.rb +5 -0
  85. data/lib/cataract.rb +107 -0
  86. data/lib/tasks/gem.rake +158 -0
  87. data/scripts/fuzzer/run.rb +828 -0
  88. data/scripts/fuzzer/worker.rb +99 -0
  89. data/scripts/generate_benchmarks_md.rb +155 -0
  90. metadata +135 -0
@@ -0,0 +1,213 @@
1
+ /*
2
+ * specificity.c - CSS selector specificity calculator
3
+ *
4
+ * Calculates CSS selector specificity according to W3C spec:
5
+ * https://www.w3.org/TR/selectors/#specificity
6
+ *
7
+ * Specificity = a*100 + b*10 + c*1 where:
8
+ * a = count of ID selectors (#id)
9
+ * b = count of class selectors (.class), attributes ([attr]), and pseudo-classes (:hover)
10
+ * c = count of type selectors (div) and pseudo-elements (::before)
11
+ *
12
+ * Special handling:
13
+ * - :not() doesn't count itself, but its content does
14
+ * - Legacy pseudo-elements with single colon (:before) count as pseudo-elements
15
+ * - Universal selector (*) has zero specificity
16
+ */
17
+
18
+ #include "cataract.h"
19
+ #include <string.h>
20
+
21
+ // Calculate specificity for a CSS selector string
22
+ VALUE calculate_specificity(VALUE self, VALUE selector_string) {
23
+ Check_Type(selector_string, T_STRING);
24
+
25
+ const char *p = RSTRING_PTR(selector_string);
26
+ const char *pe = p + RSTRING_LEN(selector_string);
27
+
28
+ // Counters for specificity components
29
+ int id_count = 0;
30
+ int class_count = 0;
31
+ int attr_count = 0;
32
+ int pseudo_class_count = 0;
33
+ int pseudo_element_count = 0;
34
+ int element_count = 0;
35
+
36
+ while (p < pe) {
37
+ char c = *p;
38
+
39
+ // Skip whitespace and combinators
40
+ if (IS_WHITESPACE(c) || c == '>' || c == '+' || c == '~' || c == ',') {
41
+ p++;
42
+ continue;
43
+ }
44
+
45
+ // ID selector: #id
46
+ if (c == '#') {
47
+ id_count++;
48
+ p++;
49
+ // Skip the identifier
50
+ while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
51
+ (*p >= '0' && *p <= '9') || *p == '-' || *p == '_')) {
52
+ p++;
53
+ }
54
+ continue;
55
+ }
56
+
57
+ // Class selector: .class
58
+ if (c == '.') {
59
+ class_count++;
60
+ p++;
61
+ // Skip the identifier
62
+ while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
63
+ (*p >= '0' && *p <= '9') || *p == '-' || *p == '_')) {
64
+ p++;
65
+ }
66
+ continue;
67
+ }
68
+
69
+ // Attribute selector: [attr] or [attr=value]
70
+ if (c == '[') {
71
+ attr_count++;
72
+ p++;
73
+ // Skip to closing bracket
74
+ int bracket_depth = 1;
75
+ while (p < pe && bracket_depth > 0) {
76
+ if (*p == '[') bracket_depth++;
77
+ else if (*p == ']') bracket_depth--;
78
+ p++;
79
+ }
80
+ continue;
81
+ }
82
+
83
+ // Pseudo-element (::) or pseudo-class (:)
84
+ if (c == ':') {
85
+ p++;
86
+ int is_pseudo_element = 0;
87
+
88
+ // Check for double colon (::)
89
+ if (p < pe && *p == ':') {
90
+ is_pseudo_element = 1;
91
+ p++;
92
+ }
93
+
94
+ // Extract pseudo name
95
+ const char *pseudo_start = p;
96
+ while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
97
+ (*p >= '0' && *p <= '9') || *p == '-')) {
98
+ p++;
99
+ }
100
+ long pseudo_len = p - pseudo_start;
101
+
102
+ // Check for legacy pseudo-elements (single colon but should be double)
103
+ // :before, :after, :first-line, :first-letter, :selection
104
+ int is_legacy_pseudo_element = 0;
105
+ if (!is_pseudo_element && pseudo_len > 0) {
106
+ is_legacy_pseudo_element =
107
+ (pseudo_len == 6 && strncmp(pseudo_start, "before", 6) == 0) ||
108
+ (pseudo_len == 5 && strncmp(pseudo_start, "after", 5) == 0) ||
109
+ (pseudo_len == 10 && strncmp(pseudo_start, "first-line", 10) == 0) ||
110
+ (pseudo_len == 12 && strncmp(pseudo_start, "first-letter", 12) == 0) ||
111
+ (pseudo_len == 9 && strncmp(pseudo_start, "selection", 9) == 0);
112
+ }
113
+
114
+ // Check for :not() - it doesn't count itself, but its content does
115
+ int is_not = (pseudo_len == 3 && strncmp(pseudo_start, "not", 3) == 0);
116
+
117
+ // Skip function arguments if present
118
+ if (p < pe && *p == '(') {
119
+ p++;
120
+ int paren_depth = 1;
121
+
122
+ // If it's :not(), we need to calculate specificity of the content
123
+ if (is_not) {
124
+ const char *not_content_start = p;
125
+
126
+ // Find closing paren
127
+ while (p < pe && paren_depth > 0) {
128
+ if (*p == '(') paren_depth++;
129
+ else if (*p == ')') paren_depth--;
130
+ if (paren_depth > 0) p++;
131
+ }
132
+
133
+ const char *not_content_end = p;
134
+ long not_content_len = not_content_end - not_content_start;
135
+
136
+ // Recursively calculate specificity of :not() content
137
+ if (not_content_len > 0) {
138
+ VALUE not_content = rb_str_new(not_content_start, not_content_len);
139
+ VALUE not_spec = calculate_specificity(self, not_content);
140
+ int not_specificity = NUM2INT(not_spec);
141
+
142
+ // Add :not() content's specificity to our counts
143
+ int additional_a = not_specificity / 100;
144
+ int additional_b = (not_specificity % 100) / 10;
145
+ int additional_c = not_specificity % 10;
146
+
147
+ id_count += additional_a;
148
+ class_count += additional_b;
149
+ element_count += additional_c;
150
+
151
+ RB_GC_GUARD(not_content);
152
+ RB_GC_GUARD(not_spec);
153
+ }
154
+
155
+ p++; // Skip closing paren
156
+ } else {
157
+ // Skip other function arguments
158
+ while (p < pe && paren_depth > 0) {
159
+ if (*p == '(') paren_depth++;
160
+ else if (*p == ')') paren_depth--;
161
+ p++;
162
+ }
163
+
164
+ // Count the pseudo-class/element
165
+ if (is_pseudo_element || is_legacy_pseudo_element) {
166
+ pseudo_element_count++;
167
+ } else {
168
+ pseudo_class_count++;
169
+ }
170
+ }
171
+ } else {
172
+ // No function arguments - count the pseudo-class/element
173
+ if (is_not) {
174
+ // :not without parens is invalid, but don't count it
175
+ } else if (is_pseudo_element || is_legacy_pseudo_element) {
176
+ pseudo_element_count++;
177
+ } else {
178
+ pseudo_class_count++;
179
+ }
180
+ }
181
+ continue;
182
+ }
183
+
184
+ // Universal selector: *
185
+ if (c == '*') {
186
+ // Universal selector has specificity 0, don't count
187
+ p++;
188
+ continue;
189
+ }
190
+
191
+ // Type selector (element name): div, span, etc.
192
+ if ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')) {
193
+ element_count++;
194
+ // Skip the identifier
195
+ while (p < pe && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
196
+ (*p >= '0' && *p <= '9') || *p == '-' || *p == '_')) {
197
+ p++;
198
+ }
199
+ continue;
200
+ }
201
+
202
+ // Unknown character, skip it
203
+ p++;
204
+ }
205
+
206
+ // Calculate specificity using W3C formula:
207
+ // IDs * 100 + (classes + attributes + pseudo-classes) * 10 + (elements + pseudo-elements) * 1
208
+ int specificity = (id_count * 100) +
209
+ ((class_count + attr_count + pseudo_class_count) * 10) +
210
+ ((element_count + pseudo_element_count) * 1);
211
+
212
+ return INT2NUM(specificity);
213
+ }
@@ -0,0 +1,290 @@
1
+ #include "cataract.h"
2
+ #include <stdio.h>
3
+
4
+ /*
5
+ * C implementation of Stylesheet#to_s with no rb_funcall
6
+ * Optimized for new hash structure: {query_string => {media_types: [...], rules: [...]}}
7
+ *
8
+ * This provides ~36% speedup over the Ruby implementation for serialization,
9
+ * which is important since to_s is a hot path in the premailer use case.
10
+ */
11
+
12
+ // Context for merge callback within a group
13
+ struct merge_selector_ctx {
14
+ VALUE merged_rules;
15
+ VALUE self;
16
+ };
17
+
18
+ // Callback for merging rules with the same selector within a media group
19
+ static int merge_selector_callback(VALUE selector, VALUE selector_rules, VALUE arg) {
20
+ struct merge_selector_ctx *ctx = (struct merge_selector_ctx *)arg;
21
+
22
+ // If only one rule, use it directly
23
+ if (RARRAY_LEN(selector_rules) == 1) {
24
+ rb_ary_push(ctx->merged_rules, RARRAY_AREF(selector_rules, 0));
25
+ return ST_CONTINUE;
26
+ }
27
+
28
+ // Multiple rules with same selector - merge them
29
+ VALUE first_rule = RARRAY_AREF(selector_rules, 0);
30
+ VALUE specificity = rb_struct_aref(first_rule, INT2FIX(RULE_SPECIFICITY));
31
+
32
+ // Merge declarations for this selector (C function, no rb_funcall)
33
+ VALUE merged_declarations = cataract_merge(ctx->self, selector_rules);
34
+
35
+ // Create new merged Rule struct
36
+ VALUE merged_rule = rb_struct_new(cRule, selector, merged_declarations, specificity);
37
+ rb_ary_push(ctx->merged_rules, merged_rule);
38
+
39
+ return ST_CONTINUE;
40
+ }
41
+
42
+ // Context for processing each media group
43
+ struct process_group_ctx {
44
+ VALUE result;
45
+ VALUE self;
46
+ };
47
+
48
+ // Callback for processing each media query group
49
+ static int process_group_callback(VALUE query_string, VALUE group_hash, VALUE arg) {
50
+ struct process_group_ctx *ctx = (struct process_group_ctx *)arg;
51
+
52
+ // Extract rules array from group hash
53
+ VALUE rules_array = rb_hash_aref(group_hash, ID2SYM(rb_intern("rules")));
54
+ if (NIL_P(rules_array) || RARRAY_LEN(rules_array) == 0) {
55
+ return ST_CONTINUE; // Skip empty groups
56
+ }
57
+
58
+ // Group rules by selector for merging
59
+ VALUE rules_by_selector = rb_hash_new();
60
+ long rules_len = RARRAY_LEN(rules_array);
61
+
62
+ for (long i = 0; i < rules_len; i++) {
63
+ VALUE rule = RARRAY_AREF(rules_array, i);
64
+ VALUE selector = rb_struct_aref(rule, INT2FIX(RULE_SELECTOR));
65
+
66
+ VALUE selector_group = rb_hash_aref(rules_by_selector, selector);
67
+ if (NIL_P(selector_group)) {
68
+ selector_group = rb_ary_new();
69
+ rb_hash_aset(rules_by_selector, selector, selector_group);
70
+ }
71
+ rb_ary_push(selector_group, rule);
72
+ }
73
+
74
+ // Merge rules with same selector
75
+ VALUE merged_rules = rb_ary_new();
76
+ struct merge_selector_ctx merge_ctx = { merged_rules, ctx->self };
77
+ rb_hash_foreach(rules_by_selector, merge_selector_callback, (VALUE)&merge_ctx);
78
+
79
+ // Check if this is a media query or not
80
+ int has_media_query = !NIL_P(query_string);
81
+
82
+ if (has_media_query) {
83
+ // Output @media wrapper
84
+ rb_str_buf_cat2(ctx->result, "@media ");
85
+ rb_str_buf_append(ctx->result, query_string);
86
+ rb_str_buf_cat2(ctx->result, " {\n");
87
+ }
88
+
89
+ // Output each merged rule
90
+ long merged_len = RARRAY_LEN(merged_rules);
91
+ for (long j = 0; j < merged_len; j++) {
92
+ VALUE rule = RARRAY_AREF(merged_rules, j);
93
+ VALUE selector = rb_struct_aref(rule, INT2FIX(RULE_SELECTOR));
94
+ VALUE declarations = rb_struct_aref(rule, INT2FIX(RULE_DECLARATIONS));
95
+
96
+ if (has_media_query) {
97
+ rb_str_buf_cat2(ctx->result, " ");
98
+ }
99
+
100
+ rb_str_buf_append(ctx->result, selector);
101
+ rb_str_buf_cat2(ctx->result, " { ");
102
+
103
+ // C function, no rb_funcall
104
+ VALUE decls_str = declarations_array_to_s(declarations);
105
+ rb_str_buf_append(ctx->result, decls_str);
106
+
107
+ rb_str_buf_cat2(ctx->result, " }\n");
108
+ }
109
+
110
+ if (has_media_query) {
111
+ rb_str_buf_cat2(ctx->result, "}\n");
112
+ }
113
+
114
+ RB_GC_GUARD(rules_array);
115
+ RB_GC_GUARD(rules_by_selector);
116
+ RB_GC_GUARD(merged_rules);
117
+
118
+ return ST_CONTINUE;
119
+ }
120
+
121
+ // Main function: stylesheet_to_s_c(rule_groups_hash, charset)
122
+ // New signature: takes hash structure {query_string => {media_types: [...], rules: [...]}}
123
+ VALUE stylesheet_to_s_c(VALUE self, VALUE rule_groups, VALUE charset) {
124
+ Check_Type(rule_groups, T_HASH);
125
+
126
+ long num_groups = RHASH_SIZE(rule_groups);
127
+
128
+ // Handle empty stylesheet
129
+ if (num_groups == 0) {
130
+ if (!NIL_P(charset)) {
131
+ // Even empty stylesheet should emit @charset if present
132
+ VALUE result = UTF8_STR("@charset \"");
133
+ rb_str_buf_append(result, charset);
134
+ rb_str_buf_cat2(result, "\";\n");
135
+ return result;
136
+ }
137
+ return UTF8_STR("");
138
+ }
139
+
140
+ // Allocate result string with reasonable capacity
141
+ VALUE result = rb_str_buf_new(num_groups * 100);
142
+
143
+ // Emit @charset first if present (must be first per W3C spec)
144
+ if (!NIL_P(charset)) {
145
+ rb_str_buf_cat2(result, "@charset \"");
146
+ rb_str_buf_append(result, charset);
147
+ rb_str_buf_cat2(result, "\";\n");
148
+ }
149
+
150
+ // Process each media query group
151
+ struct process_group_ctx ctx = { result, self };
152
+ rb_hash_foreach(rule_groups, process_group_callback, (VALUE)&ctx);
153
+
154
+ RB_GC_GUARD(result);
155
+
156
+ return result;
157
+ }
158
+
159
+ // ============================================================================
160
+ // Formatted output (to_formatted_s)
161
+ // ============================================================================
162
+
163
+ // Context for formatted processing
164
+ struct format_group_ctx {
165
+ VALUE result;
166
+ VALUE self;
167
+ };
168
+
169
+ // Callback for formatted output with newlines and 2-space indentation
170
+ static int format_group_callback(VALUE query_string, VALUE group_hash, VALUE arg) {
171
+ struct format_group_ctx *ctx = (struct format_group_ctx *)arg;
172
+
173
+ // Extract rules array from group hash
174
+ VALUE rules_array = rb_hash_aref(group_hash, ID2SYM(rb_intern("rules")));
175
+ if (NIL_P(rules_array) || RARRAY_LEN(rules_array) == 0) {
176
+ return ST_CONTINUE; // Skip empty groups
177
+ }
178
+
179
+ // Group rules by selector for merging
180
+ VALUE rules_by_selector = rb_hash_new();
181
+ long rules_len = RARRAY_LEN(rules_array);
182
+
183
+ for (long i = 0; i < rules_len; i++) {
184
+ VALUE rule = RARRAY_AREF(rules_array, i);
185
+ VALUE selector = rb_struct_aref(rule, INT2FIX(RULE_SELECTOR));
186
+
187
+ VALUE selector_group = rb_hash_aref(rules_by_selector, selector);
188
+ if (NIL_P(selector_group)) {
189
+ selector_group = rb_ary_new();
190
+ rb_hash_aset(rules_by_selector, selector, selector_group);
191
+ }
192
+ rb_ary_push(selector_group, rule);
193
+ }
194
+
195
+ // Merge rules with same selector
196
+ VALUE merged_rules = rb_ary_new();
197
+ struct merge_selector_ctx merge_ctx = { merged_rules, ctx->self };
198
+ rb_hash_foreach(rules_by_selector, merge_selector_callback, (VALUE)&merge_ctx);
199
+
200
+ // Check if this is a media query or not
201
+ int has_media_query = !NIL_P(query_string);
202
+
203
+ if (has_media_query) {
204
+ // Output @media wrapper
205
+ rb_str_buf_cat2(ctx->result, "@media ");
206
+ rb_str_buf_append(ctx->result, query_string);
207
+ rb_str_buf_cat2(ctx->result, " {\n");
208
+ }
209
+
210
+ // Output each merged rule with formatting
211
+ long merged_len = RARRAY_LEN(merged_rules);
212
+ for (long j = 0; j < merged_len; j++) {
213
+ VALUE rule = RARRAY_AREF(merged_rules, j);
214
+ VALUE selector = rb_struct_aref(rule, INT2FIX(RULE_SELECTOR));
215
+ VALUE declarations = rb_struct_aref(rule, INT2FIX(RULE_DECLARATIONS));
216
+
217
+ // Indent selector if inside media query
218
+ if (has_media_query) {
219
+ rb_str_buf_cat2(ctx->result, " ");
220
+ }
221
+
222
+ // Selector on its own line
223
+ rb_str_buf_append(ctx->result, selector);
224
+ rb_str_buf_cat2(ctx->result, " {\n");
225
+
226
+ // Declarations indented with 2 spaces (or 4 if inside media query)
227
+ const char *indent = has_media_query ? " " : " ";
228
+ rb_str_buf_cat2(ctx->result, indent);
229
+
230
+ // Get declarations string
231
+ VALUE decls_str = declarations_array_to_s(declarations);
232
+ rb_str_buf_append(ctx->result, decls_str);
233
+
234
+ rb_str_buf_cat2(ctx->result, "\n");
235
+
236
+ // Closing brace
237
+ if (has_media_query) {
238
+ rb_str_buf_cat2(ctx->result, " ");
239
+ }
240
+ rb_str_buf_cat2(ctx->result, "}\n");
241
+ }
242
+
243
+ if (has_media_query) {
244
+ rb_str_buf_cat2(ctx->result, "}\n");
245
+ }
246
+
247
+ RB_GC_GUARD(rules_array);
248
+ RB_GC_GUARD(rules_by_selector);
249
+ RB_GC_GUARD(merged_rules);
250
+
251
+ return ST_CONTINUE;
252
+ }
253
+
254
+ // stylesheet_to_formatted_s_c(rule_groups_hash, charset)
255
+ // Returns formatted multi-line output with 2-space indentation
256
+ // Not optimized for performance since it's not in the hot path
257
+ VALUE stylesheet_to_formatted_s_c(VALUE self, VALUE rule_groups, VALUE charset) {
258
+ Check_Type(rule_groups, T_HASH);
259
+
260
+ long num_groups = RHASH_SIZE(rule_groups);
261
+
262
+ // Handle empty stylesheet
263
+ if (num_groups == 0) {
264
+ if (!NIL_P(charset)) {
265
+ VALUE result = UTF8_STR("@charset \"");
266
+ rb_str_buf_append(result, charset);
267
+ rb_str_buf_cat2(result, "\";\n");
268
+ return result;
269
+ }
270
+ return UTF8_STR("");
271
+ }
272
+
273
+ // Simple allocation - let Ruby resize as needed (not in hot path)
274
+ VALUE result = UTF8_STR("");
275
+
276
+ // Emit @charset first if present
277
+ if (!NIL_P(charset)) {
278
+ rb_str_buf_cat2(result, "@charset \"");
279
+ rb_str_buf_append(result, charset);
280
+ rb_str_buf_cat2(result, "\";\n");
281
+ }
282
+
283
+ // Process each media query group with formatting
284
+ struct format_group_ctx ctx = { result, self };
285
+ rb_hash_foreach(rule_groups, format_group_callback, (VALUE)&ctx);
286
+
287
+ RB_GC_GUARD(result);
288
+
289
+ return result;
290
+ }
@@ -0,0 +1,116 @@
1
+ /*
2
+ * value_splitter.c - CSS value splitting utility
3
+ *
4
+ * Purpose: Split CSS declaration values on whitespace while preserving content
5
+ * inside functions and quoted strings.
6
+ *
7
+ * Examples:
8
+ * "1px 2px 3px 4px" => ["1px", "2px", "3px", "4px"]
9
+ * "10px calc(100% - 20px)" => ["10px", "calc(100% - 20px)"]
10
+ * "rgb(255, 0, 0) blue" => ["rgb(255, 0, 0)", "blue"]
11
+ * "'Helvetica Neue', sans-serif" => ["'Helvetica Neue',", "sans-serif"]
12
+ */
13
+
14
+ #include "cataract.h"
15
+
16
+ /*
17
+ * Split a CSS declaration value on whitespace while preserving content
18
+ * inside functions and quoted strings.
19
+ *
20
+ * Algorithm:
21
+ * - Track parenthesis depth for functions like calc(), rgb()
22
+ * - Track quote state for strings like 'Helvetica Neue'
23
+ * - Split on whitespace only when depth=0 and not in quotes
24
+ *
25
+ * @param value [String] Pre-parsed CSS declaration value (assumed well-formed)
26
+ * @return [Array<String>] Array of value tokens
27
+ */
28
+ VALUE cataract_split_value(VALUE self, VALUE value) {
29
+ Check_Type(value, T_STRING);
30
+ const char *str = RSTRING_PTR(value);
31
+ long len = RSTRING_LEN(value);
32
+
33
+ // Sanity check: reject unreasonably long values (DoS protection)
34
+ if (len > 65536) {
35
+ rb_raise(rb_eArgError, "CSS value too long (max 64KB)");
36
+ }
37
+
38
+ // Result array
39
+ VALUE result = rb_ary_new();
40
+
41
+ // State tracking
42
+ int paren_depth = 0;
43
+ int in_quotes = 0;
44
+ char quote_char = '\0';
45
+ const char *token_start = NULL;
46
+ const char *p = str;
47
+ const char *pe = str + len;
48
+
49
+ while (p < pe) {
50
+ char c = *p;
51
+
52
+ // Handle quotes
53
+ if ((c == '"' || c == '\'') && !in_quotes) {
54
+ // Opening quote
55
+ in_quotes = 1;
56
+ quote_char = c;
57
+ if (token_start == NULL) token_start = p;
58
+ p++;
59
+ continue;
60
+ }
61
+
62
+ if (in_quotes && c == quote_char) {
63
+ // Closing quote
64
+ in_quotes = 0;
65
+ p++;
66
+ continue;
67
+ }
68
+
69
+ // Handle parentheses (only when not in quotes)
70
+ if (!in_quotes) {
71
+ if (c == '(') {
72
+ paren_depth++;
73
+ if (token_start == NULL) token_start = p;
74
+ p++;
75
+ continue;
76
+ }
77
+
78
+ if (c == ')') {
79
+ paren_depth--;
80
+ p++;
81
+ continue;
82
+ }
83
+
84
+ // Handle whitespace (delimiter when depth=0 and not quoted)
85
+ if (IS_WHITESPACE(c)) {
86
+ if (paren_depth == 0 && !in_quotes) {
87
+ // Emit token if we have one
88
+ if (token_start != NULL) {
89
+ size_t token_len = p - token_start;
90
+ VALUE token = rb_str_new(token_start, token_len);
91
+ rb_ary_push(result, token);
92
+ token_start = NULL;
93
+ }
94
+ p++;
95
+ continue;
96
+ }
97
+ // else: whitespace inside function/quotes, part of token
98
+ }
99
+ }
100
+
101
+ // Regular character - mark start if needed
102
+ if (token_start == NULL) {
103
+ token_start = p;
104
+ }
105
+ p++;
106
+ }
107
+
108
+ // Emit final token if any
109
+ if (token_start != NULL) {
110
+ size_t token_len = pe - token_start;
111
+ VALUE token = rb_str_new(token_start, token_len);
112
+ rb_ary_push(result, token);
113
+ }
114
+
115
+ return result;
116
+ }