cataract 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-tidy +30 -0
  3. data/.github/workflows/ci-macos.yml +12 -0
  4. data/.github/workflows/ci.yml +77 -0
  5. data/.github/workflows/test.yml +76 -0
  6. data/.gitignore +45 -0
  7. data/.overcommit.yml +38 -0
  8. data/.rubocop.yml +83 -0
  9. data/BENCHMARKS.md +201 -0
  10. data/CHANGELOG.md +1 -0
  11. data/Gemfile +27 -0
  12. data/LICENSE +21 -0
  13. data/RAGEL_MIGRATION.md +60 -0
  14. data/README.md +292 -0
  15. data/Rakefile +209 -0
  16. data/benchmarks/benchmark_harness.rb +193 -0
  17. data/benchmarks/benchmark_merging.rb +121 -0
  18. data/benchmarks/benchmark_optimization_comparison.rb +168 -0
  19. data/benchmarks/benchmark_parsing.rb +153 -0
  20. data/benchmarks/benchmark_ragel_removal.rb +56 -0
  21. data/benchmarks/benchmark_runner.rb +70 -0
  22. data/benchmarks/benchmark_serialization.rb +180 -0
  23. data/benchmarks/benchmark_shorthand.rb +109 -0
  24. data/benchmarks/benchmark_shorthand_expansion.rb +176 -0
  25. data/benchmarks/benchmark_specificity.rb +124 -0
  26. data/benchmarks/benchmark_string_allocation.rb +151 -0
  27. data/benchmarks/benchmark_stylesheet_to_s.rb +62 -0
  28. data/benchmarks/benchmark_to_s_cached.rb +55 -0
  29. data/benchmarks/benchmark_value_splitter.rb +54 -0
  30. data/benchmarks/benchmark_yjit.rb +158 -0
  31. data/benchmarks/benchmark_yjit_workers.rb +61 -0
  32. data/benchmarks/profile_to_s.rb +23 -0
  33. data/benchmarks/speedup_calculator.rb +83 -0
  34. data/benchmarks/system_metadata.rb +81 -0
  35. data/benchmarks/templates/benchmarks.md.erb +221 -0
  36. data/benchmarks/yjit_tests.rb +141 -0
  37. data/cataract.gemspec +34 -0
  38. data/cliff.toml +92 -0
  39. data/examples/color_conversion_visual_test/color_conversion_test.html +3603 -0
  40. data/examples/color_conversion_visual_test/generate.rb +202 -0
  41. data/examples/color_conversion_visual_test/template.html.erb +259 -0
  42. data/examples/css_analyzer/analyzer.rb +164 -0
  43. data/examples/css_analyzer/analyzers/base.rb +33 -0
  44. data/examples/css_analyzer/analyzers/colors.rb +133 -0
  45. data/examples/css_analyzer/analyzers/important.rb +88 -0
  46. data/examples/css_analyzer/analyzers/properties.rb +61 -0
  47. data/examples/css_analyzer/analyzers/specificity.rb +68 -0
  48. data/examples/css_analyzer/templates/report.html.erb +575 -0
  49. data/examples/css_analyzer.rb +69 -0
  50. data/examples/github_analysis.html +5343 -0
  51. data/ext/cataract/cataract.c +1086 -0
  52. data/ext/cataract/cataract.h +174 -0
  53. data/ext/cataract/css_parser.c +1435 -0
  54. data/ext/cataract/extconf.rb +48 -0
  55. data/ext/cataract/import_scanner.c +174 -0
  56. data/ext/cataract/merge.c +973 -0
  57. data/ext/cataract/shorthand_expander.c +902 -0
  58. data/ext/cataract/specificity.c +213 -0
  59. data/ext/cataract/value_splitter.c +116 -0
  60. data/ext/cataract_color/cataract_color.c +16 -0
  61. data/ext/cataract_color/color_conversion.c +1687 -0
  62. data/ext/cataract_color/color_conversion.h +136 -0
  63. data/ext/cataract_color/color_conversion_lab.c +571 -0
  64. data/ext/cataract_color/color_conversion_named.c +259 -0
  65. data/ext/cataract_color/color_conversion_oklab.c +547 -0
  66. data/ext/cataract_color/extconf.rb +23 -0
  67. data/ext/cataract_old/cataract.c +393 -0
  68. data/ext/cataract_old/cataract.h +250 -0
  69. data/ext/cataract_old/css_parser.c +933 -0
  70. data/ext/cataract_old/extconf.rb +67 -0
  71. data/ext/cataract_old/import_scanner.c +174 -0
  72. data/ext/cataract_old/merge.c +776 -0
  73. data/ext/cataract_old/shorthand_expander.c +902 -0
  74. data/ext/cataract_old/specificity.c +213 -0
  75. data/ext/cataract_old/stylesheet.c +290 -0
  76. data/ext/cataract_old/value_splitter.c +116 -0
  77. data/lib/cataract/at_rule.rb +97 -0
  78. data/lib/cataract/color_conversion.rb +18 -0
  79. data/lib/cataract/declarations.rb +332 -0
  80. data/lib/cataract/import_resolver.rb +210 -0
  81. data/lib/cataract/rule.rb +131 -0
  82. data/lib/cataract/stylesheet.rb +716 -0
  83. data/lib/cataract/stylesheet_scope.rb +257 -0
  84. data/lib/cataract/version.rb +5 -0
  85. data/lib/cataract.rb +107 -0
  86. data/lib/tasks/gem.rake +158 -0
  87. data/scripts/fuzzer/run.rb +828 -0
  88. data/scripts/fuzzer/worker.rb +99 -0
  89. data/scripts/generate_benchmarks_md.rb +155 -0
  90. metadata +135 -0
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+
5
+ # NOTE: Ragel dependency removed! All parsers are now pure C.
6
+ # ragel_generator.rb is no longer used, but kept for historical reference.
7
+
8
+ # Compile C files:
9
+ # - cataract.c (Ruby bindings and initialization)
10
+ # - shorthand_expander.c (shorthand property expansion/creation)
11
+ # - value_splitter.c (CSS value splitting utility)
12
+ # - stylesheet.c (CSS serialization)
13
+ # - css_parser.c (main CSS parser)
14
+ # - specificity.c (selector specificity calculator)
15
+ # - merge.c (CSS cascade and merge logic)
16
+ # - import_scanner.c (@import statement extraction)
17
+ # NOTE: Color conversion is now a separate extension (ext/cataract_color/)
18
+ $objs = ['cataract.o', 'shorthand_expander.o', 'value_splitter.o', 'stylesheet.o', 'css_parser.o', 'specificity.o',
19
+ 'merge.o', 'import_scanner.o']
20
+
21
+ # Enable debug mode for CI testing (checks debug printf statements)
22
+ if ENV['CATARACT_DEBUG']
23
+ puts 'Enabling debug mode (DEBUG_PRINTF enabled)'
24
+ $CFLAGS << ' -DCATARACT_DEBUG'
25
+ end
26
+
27
+ # String buffer optimization (enabled by default, disable for benchmarking)
28
+ # Check both env var (for development) and command-line flag (for gem install)
29
+ if ENV['DISABLE_STR_BUF_OPTIMIZATION'] || arg_config('--disable-str-buf-optimization')
30
+ puts 'Disabling string buffer pre-allocation optimization (baseline mode for benchmarking)'
31
+ $CFLAGS << ' -DDISABLE_STR_BUF_OPTIMIZATION'
32
+ else
33
+ puts 'Using string buffer pre-allocation optimization (rb_str_buf_new)'
34
+ end
35
+
36
+ # Compiler optimization flags (test one at a time)
37
+ if ENV['USE_O3']
38
+ puts 'Using -O3 optimization level'
39
+ $CFLAGS << ' -O3'
40
+ end
41
+
42
+ if ENV['USE_MARCH_NATIVE']
43
+ puts 'Using -march=native (CPU-specific optimizations)'
44
+ $CFLAGS << ' -march=native'
45
+ end
46
+
47
+ if ENV['USE_FUNROLL_LOOPS']
48
+ puts 'Using -funroll-loops (automatic loop unrolling)'
49
+ $CFLAGS << ' -funroll-loops'
50
+ end
51
+
52
+ # Manual loop unrolling in lowercase_property (enabled by default)
53
+ # Benchmark: ~6.6% faster on Apple Silicon M1 (bootstrap.css parsing)
54
+ if ENV['DISABLE_LOOP_UNROLL']
55
+ puts 'Disabling manual loop unrolling in lowercase_property (baseline mode)'
56
+ $CFLAGS << ' -DDISABLE_LOOP_UNROLL'
57
+ else
58
+ puts 'Using manual loop unrolling in lowercase_property (default, ~6.6% faster)'
59
+ end
60
+
61
+ # Suppress warnings from Ragel-generated code
62
+ # The generated C code has some harmless warnings we can't fix
63
+ $CFLAGS << ' -Wno-unused-const-variable' if RUBY_PLATFORM.match?(/darwin|linux/)
64
+ $CFLAGS << ' -Wno-shorten-64-to-32' if RUBY_PLATFORM.include?('darwin')
65
+ $CFLAGS << ' -Wno-unused-variable'
66
+
67
+ create_makefile('cataract/cataract')
@@ -0,0 +1,174 @@
1
+ #include <ruby.h>
2
+ #include <ctype.h>
3
+ #include <string.h>
4
+ #include "cataract.h"
5
+
6
+ /*
7
+ * Scan CSS for @import statements
8
+ *
9
+ * Matches patterns:
10
+ * @import url("path");
11
+ * @import url('path');
12
+ * @import "path";
13
+ * @import 'path';
14
+ * @import url("path") print; (with media query)
15
+ *
16
+ * Returns array of hashes: [{url: "...", media: "...", full_match: "..."}]
17
+ */
18
+ VALUE extract_imports(VALUE self, VALUE css_string) {
19
+ Check_Type(css_string, T_STRING);
20
+
21
+ const char *css = RSTRING_PTR(css_string);
22
+ long css_len = RSTRING_LEN(css_string);
23
+
24
+ VALUE imports = rb_ary_new();
25
+
26
+ const char *p = css;
27
+ const char *end = css + css_len;
28
+
29
+ while (p < end) {
30
+ // Skip whitespace and comments
31
+ while (p < end) {
32
+ if (IS_WHITESPACE(*p)) {
33
+ p++;
34
+ } else if (p + 2 <= end && p[0] == '/' && p[1] == '*') {
35
+ // Skip /* */ comment
36
+ p += 2;
37
+ while (p + 1 < end && !(p[0] == '*' && p[1] == '/')) {
38
+ p++;
39
+ }
40
+ if (p + 1 < end) p += 2; // Skip */
41
+ } else {
42
+ break;
43
+ }
44
+ }
45
+
46
+ // Check for @import
47
+ if (p + 7 <= end && strncasecmp(p, "@import", 7) == 0) {
48
+ const char *import_start = p;
49
+ p += 7;
50
+
51
+ // Skip whitespace after @import
52
+ while (p < end && IS_WHITESPACE(*p)) p++;
53
+
54
+ // Check for optional url(
55
+ int has_url_function = 0;
56
+ if (p + 4 <= end && strncasecmp(p, "url(", 4) == 0) {
57
+ has_url_function = 1;
58
+ p += 4;
59
+ while (p < end && IS_WHITESPACE(*p)) p++;
60
+ }
61
+
62
+ // Find opening quote
63
+ if (p >= end || (*p != '"' && *p != '\'')) {
64
+ // Invalid @import, skip to next semicolon
65
+ while (p < end && *p != ';') p++;
66
+ if (p < end) p++; // Skip semicolon
67
+ continue;
68
+ }
69
+
70
+ char quote_char = *p;
71
+ p++; // Skip opening quote
72
+
73
+ const char *url_start = p;
74
+
75
+ // Find closing quote (handle escaped quotes)
76
+ while (p < end && *p != quote_char) {
77
+ if (*p == '\\' && p + 1 < end) {
78
+ p += 2; // Skip escaped character
79
+ } else {
80
+ p++;
81
+ }
82
+ }
83
+
84
+ if (p >= end) {
85
+ // Unterminated string
86
+ break;
87
+ }
88
+
89
+ const char *url_end = p;
90
+ p++; // Skip closing quote
91
+
92
+ // Skip closing paren if we had url(
93
+ if (has_url_function) {
94
+ while (p < end && IS_WHITESPACE(*p)) p++;
95
+ if (p < end && *p == ')') {
96
+ p++;
97
+ }
98
+ }
99
+
100
+ // Skip whitespace before optional media query or semicolon
101
+ while (p < end && IS_WHITESPACE(*p)) p++;
102
+
103
+ // Check for optional media query (everything until semicolon)
104
+ const char *media_start = NULL;
105
+ const char *media_end = NULL;
106
+
107
+ if (p < end && *p != ';') {
108
+ media_start = p;
109
+
110
+ // Find semicolon
111
+ while (p < end && *p != ';') p++;
112
+
113
+ media_end = p;
114
+
115
+ // Trim trailing whitespace from media query
116
+ while (media_end > media_start && IS_WHITESPACE(*(media_end - 1))) {
117
+ media_end--;
118
+ }
119
+ }
120
+
121
+ // Skip semicolon
122
+ if (p < end && *p == ';') p++;
123
+
124
+ const char *import_end = p;
125
+
126
+ // Build result hash
127
+ VALUE import_hash = rb_hash_new();
128
+
129
+ // Extract URL
130
+ VALUE url = rb_str_new(url_start, url_end - url_start);
131
+ rb_hash_aset(import_hash, ID2SYM(rb_intern("url")), url);
132
+
133
+ // Extract media query (or nil)
134
+ VALUE media = Qnil;
135
+ if (media_start && media_end > media_start) {
136
+ media = rb_str_new(media_start, media_end - media_start);
137
+ }
138
+ rb_hash_aset(import_hash, ID2SYM(rb_intern("media")), media);
139
+
140
+ // Extract full match
141
+ VALUE full_match = rb_str_new(import_start, import_end - import_start);
142
+ rb_hash_aset(import_hash, ID2SYM(rb_intern("full_match")), full_match);
143
+
144
+ rb_ary_push(imports, import_hash);
145
+
146
+ RB_GC_GUARD(url);
147
+ RB_GC_GUARD(media);
148
+ RB_GC_GUARD(full_match);
149
+ RB_GC_GUARD(import_hash);
150
+ } else {
151
+ // Not an @import, skip to next line or rule
152
+ // Once we hit a non-@import rule (except @charset), stop looking
153
+ // Per CSS spec, @import must be at the top
154
+
155
+ // Skip @charset if present
156
+ if (p + 8 <= end && strncasecmp(p, "@charset", 8) == 0) {
157
+ // Skip to semicolon
158
+ while (p < end && *p != ';') p++;
159
+ if (p < end) p++; // Skip semicolon
160
+ continue;
161
+ }
162
+
163
+ // If we hit any other content, stop scanning for imports
164
+ if (p < end && !IS_WHITESPACE(*p)) {
165
+ break;
166
+ }
167
+
168
+ p++;
169
+ }
170
+ }
171
+
172
+ RB_GC_GUARD(imports);
173
+ return imports;
174
+ }