cataract 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-tidy +30 -0
- data/.github/workflows/ci-macos.yml +12 -0
- data/.github/workflows/ci.yml +77 -0
- data/.github/workflows/test.yml +76 -0
- data/.gitignore +45 -0
- data/.overcommit.yml +38 -0
- data/.rubocop.yml +83 -0
- data/BENCHMARKS.md +201 -0
- data/CHANGELOG.md +1 -0
- data/Gemfile +27 -0
- data/LICENSE +21 -0
- data/RAGEL_MIGRATION.md +60 -0
- data/README.md +292 -0
- data/Rakefile +209 -0
- data/benchmarks/benchmark_harness.rb +193 -0
- data/benchmarks/benchmark_merging.rb +121 -0
- data/benchmarks/benchmark_optimization_comparison.rb +168 -0
- data/benchmarks/benchmark_parsing.rb +153 -0
- data/benchmarks/benchmark_ragel_removal.rb +56 -0
- data/benchmarks/benchmark_runner.rb +70 -0
- data/benchmarks/benchmark_serialization.rb +180 -0
- data/benchmarks/benchmark_shorthand.rb +109 -0
- data/benchmarks/benchmark_shorthand_expansion.rb +176 -0
- data/benchmarks/benchmark_specificity.rb +124 -0
- data/benchmarks/benchmark_string_allocation.rb +151 -0
- data/benchmarks/benchmark_stylesheet_to_s.rb +62 -0
- data/benchmarks/benchmark_to_s_cached.rb +55 -0
- data/benchmarks/benchmark_value_splitter.rb +54 -0
- data/benchmarks/benchmark_yjit.rb +158 -0
- data/benchmarks/benchmark_yjit_workers.rb +61 -0
- data/benchmarks/profile_to_s.rb +23 -0
- data/benchmarks/speedup_calculator.rb +83 -0
- data/benchmarks/system_metadata.rb +81 -0
- data/benchmarks/templates/benchmarks.md.erb +221 -0
- data/benchmarks/yjit_tests.rb +141 -0
- data/cataract.gemspec +34 -0
- data/cliff.toml +92 -0
- data/examples/color_conversion_visual_test/color_conversion_test.html +3603 -0
- data/examples/color_conversion_visual_test/generate.rb +202 -0
- data/examples/color_conversion_visual_test/template.html.erb +259 -0
- data/examples/css_analyzer/analyzer.rb +164 -0
- data/examples/css_analyzer/analyzers/base.rb +33 -0
- data/examples/css_analyzer/analyzers/colors.rb +133 -0
- data/examples/css_analyzer/analyzers/important.rb +88 -0
- data/examples/css_analyzer/analyzers/properties.rb +61 -0
- data/examples/css_analyzer/analyzers/specificity.rb +68 -0
- data/examples/css_analyzer/templates/report.html.erb +575 -0
- data/examples/css_analyzer.rb +69 -0
- data/examples/github_analysis.html +5343 -0
- data/ext/cataract/cataract.c +1086 -0
- data/ext/cataract/cataract.h +174 -0
- data/ext/cataract/css_parser.c +1435 -0
- data/ext/cataract/extconf.rb +48 -0
- data/ext/cataract/import_scanner.c +174 -0
- data/ext/cataract/merge.c +973 -0
- data/ext/cataract/shorthand_expander.c +902 -0
- data/ext/cataract/specificity.c +213 -0
- data/ext/cataract/value_splitter.c +116 -0
- data/ext/cataract_color/cataract_color.c +16 -0
- data/ext/cataract_color/color_conversion.c +1687 -0
- data/ext/cataract_color/color_conversion.h +136 -0
- data/ext/cataract_color/color_conversion_lab.c +571 -0
- data/ext/cataract_color/color_conversion_named.c +259 -0
- data/ext/cataract_color/color_conversion_oklab.c +547 -0
- data/ext/cataract_color/extconf.rb +23 -0
- data/ext/cataract_old/cataract.c +393 -0
- data/ext/cataract_old/cataract.h +250 -0
- data/ext/cataract_old/css_parser.c +933 -0
- data/ext/cataract_old/extconf.rb +67 -0
- data/ext/cataract_old/import_scanner.c +174 -0
- data/ext/cataract_old/merge.c +776 -0
- data/ext/cataract_old/shorthand_expander.c +902 -0
- data/ext/cataract_old/specificity.c +213 -0
- data/ext/cataract_old/stylesheet.c +290 -0
- data/ext/cataract_old/value_splitter.c +116 -0
- data/lib/cataract/at_rule.rb +97 -0
- data/lib/cataract/color_conversion.rb +18 -0
- data/lib/cataract/declarations.rb +332 -0
- data/lib/cataract/import_resolver.rb +210 -0
- data/lib/cataract/rule.rb +131 -0
- data/lib/cataract/stylesheet.rb +716 -0
- data/lib/cataract/stylesheet_scope.rb +257 -0
- data/lib/cataract/version.rb +5 -0
- data/lib/cataract.rb +107 -0
- data/lib/tasks/gem.rake +158 -0
- data/scripts/fuzzer/run.rb +828 -0
- data/scripts/fuzzer/worker.rb +99 -0
- data/scripts/generate_benchmarks_md.rb +155 -0
- metadata +135 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'mkmf'
|
|
4
|
+
|
|
5
|
+
# NOTE: Ragel dependency removed! All parsers are now pure C.
|
|
6
|
+
# ragel_generator.rb is no longer used, but kept for historical reference.
|
|
7
|
+
|
|
8
|
+
# Compile C files:
|
|
9
|
+
# - cataract.c (Ruby bindings and initialization)
|
|
10
|
+
# - shorthand_expander.c (shorthand property expansion/creation)
|
|
11
|
+
# - value_splitter.c (CSS value splitting utility)
|
|
12
|
+
# - stylesheet.c (CSS serialization)
|
|
13
|
+
# - css_parser.c (main CSS parser)
|
|
14
|
+
# - specificity.c (selector specificity calculator)
|
|
15
|
+
# - merge.c (CSS cascade and merge logic)
|
|
16
|
+
# - import_scanner.c (@import statement extraction)
|
|
17
|
+
# NOTE: Color conversion is now a separate extension (ext/cataract_color/)
|
|
18
|
+
$objs = ['cataract.o', 'shorthand_expander.o', 'value_splitter.o', 'stylesheet.o', 'css_parser.o', 'specificity.o',
|
|
19
|
+
'merge.o', 'import_scanner.o']
|
|
20
|
+
|
|
21
|
+
# Enable debug mode for CI testing (checks debug printf statements)
|
|
22
|
+
if ENV['CATARACT_DEBUG']
|
|
23
|
+
puts 'Enabling debug mode (DEBUG_PRINTF enabled)'
|
|
24
|
+
$CFLAGS << ' -DCATARACT_DEBUG'
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# String buffer optimization (enabled by default, disable for benchmarking)
|
|
28
|
+
# Check both env var (for development) and command-line flag (for gem install)
|
|
29
|
+
if ENV['DISABLE_STR_BUF_OPTIMIZATION'] || arg_config('--disable-str-buf-optimization')
|
|
30
|
+
puts 'Disabling string buffer pre-allocation optimization (baseline mode for benchmarking)'
|
|
31
|
+
$CFLAGS << ' -DDISABLE_STR_BUF_OPTIMIZATION'
|
|
32
|
+
else
|
|
33
|
+
puts 'Using string buffer pre-allocation optimization (rb_str_buf_new)'
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Compiler optimization flags (test one at a time)
|
|
37
|
+
if ENV['USE_O3']
|
|
38
|
+
puts 'Using -O3 optimization level'
|
|
39
|
+
$CFLAGS << ' -O3'
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
if ENV['USE_MARCH_NATIVE']
|
|
43
|
+
puts 'Using -march=native (CPU-specific optimizations)'
|
|
44
|
+
$CFLAGS << ' -march=native'
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
if ENV['USE_FUNROLL_LOOPS']
|
|
48
|
+
puts 'Using -funroll-loops (automatic loop unrolling)'
|
|
49
|
+
$CFLAGS << ' -funroll-loops'
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Manual loop unrolling in lowercase_property (enabled by default)
|
|
53
|
+
# Benchmark: ~6.6% faster on Apple Silicon M1 (bootstrap.css parsing)
|
|
54
|
+
if ENV['DISABLE_LOOP_UNROLL']
|
|
55
|
+
puts 'Disabling manual loop unrolling in lowercase_property (baseline mode)'
|
|
56
|
+
$CFLAGS << ' -DDISABLE_LOOP_UNROLL'
|
|
57
|
+
else
|
|
58
|
+
puts 'Using manual loop unrolling in lowercase_property (default, ~6.6% faster)'
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Suppress warnings from Ragel-generated code
|
|
62
|
+
# The generated C code has some harmless warnings we can't fix
|
|
63
|
+
$CFLAGS << ' -Wno-unused-const-variable' if RUBY_PLATFORM.match?(/darwin|linux/)
|
|
64
|
+
$CFLAGS << ' -Wno-shorten-64-to-32' if RUBY_PLATFORM.include?('darwin')
|
|
65
|
+
$CFLAGS << ' -Wno-unused-variable'
|
|
66
|
+
|
|
67
|
+
create_makefile('cataract/cataract')
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
#include <ruby.h>
|
|
2
|
+
#include <ctype.h>
|
|
3
|
+
#include <string.h>
|
|
4
|
+
#include "cataract.h"
|
|
5
|
+
|
|
6
|
+
/*
|
|
7
|
+
* Scan CSS for @import statements
|
|
8
|
+
*
|
|
9
|
+
* Matches patterns:
|
|
10
|
+
* @import url("path");
|
|
11
|
+
* @import url('path');
|
|
12
|
+
* @import "path";
|
|
13
|
+
* @import 'path';
|
|
14
|
+
* @import url("path") print; (with media query)
|
|
15
|
+
*
|
|
16
|
+
* Returns array of hashes: [{url: "...", media: "...", full_match: "..."}]
|
|
17
|
+
*/
|
|
18
|
+
VALUE extract_imports(VALUE self, VALUE css_string) {
|
|
19
|
+
Check_Type(css_string, T_STRING);
|
|
20
|
+
|
|
21
|
+
const char *css = RSTRING_PTR(css_string);
|
|
22
|
+
long css_len = RSTRING_LEN(css_string);
|
|
23
|
+
|
|
24
|
+
VALUE imports = rb_ary_new();
|
|
25
|
+
|
|
26
|
+
const char *p = css;
|
|
27
|
+
const char *end = css + css_len;
|
|
28
|
+
|
|
29
|
+
while (p < end) {
|
|
30
|
+
// Skip whitespace and comments
|
|
31
|
+
while (p < end) {
|
|
32
|
+
if (IS_WHITESPACE(*p)) {
|
|
33
|
+
p++;
|
|
34
|
+
} else if (p + 2 <= end && p[0] == '/' && p[1] == '*') {
|
|
35
|
+
// Skip /* */ comment
|
|
36
|
+
p += 2;
|
|
37
|
+
while (p + 1 < end && !(p[0] == '*' && p[1] == '/')) {
|
|
38
|
+
p++;
|
|
39
|
+
}
|
|
40
|
+
if (p + 1 < end) p += 2; // Skip */
|
|
41
|
+
} else {
|
|
42
|
+
break;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Check for @import
|
|
47
|
+
if (p + 7 <= end && strncasecmp(p, "@import", 7) == 0) {
|
|
48
|
+
const char *import_start = p;
|
|
49
|
+
p += 7;
|
|
50
|
+
|
|
51
|
+
// Skip whitespace after @import
|
|
52
|
+
while (p < end && IS_WHITESPACE(*p)) p++;
|
|
53
|
+
|
|
54
|
+
// Check for optional url(
|
|
55
|
+
int has_url_function = 0;
|
|
56
|
+
if (p + 4 <= end && strncasecmp(p, "url(", 4) == 0) {
|
|
57
|
+
has_url_function = 1;
|
|
58
|
+
p += 4;
|
|
59
|
+
while (p < end && IS_WHITESPACE(*p)) p++;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Find opening quote
|
|
63
|
+
if (p >= end || (*p != '"' && *p != '\'')) {
|
|
64
|
+
// Invalid @import, skip to next semicolon
|
|
65
|
+
while (p < end && *p != ';') p++;
|
|
66
|
+
if (p < end) p++; // Skip semicolon
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
char quote_char = *p;
|
|
71
|
+
p++; // Skip opening quote
|
|
72
|
+
|
|
73
|
+
const char *url_start = p;
|
|
74
|
+
|
|
75
|
+
// Find closing quote (handle escaped quotes)
|
|
76
|
+
while (p < end && *p != quote_char) {
|
|
77
|
+
if (*p == '\\' && p + 1 < end) {
|
|
78
|
+
p += 2; // Skip escaped character
|
|
79
|
+
} else {
|
|
80
|
+
p++;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (p >= end) {
|
|
85
|
+
// Unterminated string
|
|
86
|
+
break;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const char *url_end = p;
|
|
90
|
+
p++; // Skip closing quote
|
|
91
|
+
|
|
92
|
+
// Skip closing paren if we had url(
|
|
93
|
+
if (has_url_function) {
|
|
94
|
+
while (p < end && IS_WHITESPACE(*p)) p++;
|
|
95
|
+
if (p < end && *p == ')') {
|
|
96
|
+
p++;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Skip whitespace before optional media query or semicolon
|
|
101
|
+
while (p < end && IS_WHITESPACE(*p)) p++;
|
|
102
|
+
|
|
103
|
+
// Check for optional media query (everything until semicolon)
|
|
104
|
+
const char *media_start = NULL;
|
|
105
|
+
const char *media_end = NULL;
|
|
106
|
+
|
|
107
|
+
if (p < end && *p != ';') {
|
|
108
|
+
media_start = p;
|
|
109
|
+
|
|
110
|
+
// Find semicolon
|
|
111
|
+
while (p < end && *p != ';') p++;
|
|
112
|
+
|
|
113
|
+
media_end = p;
|
|
114
|
+
|
|
115
|
+
// Trim trailing whitespace from media query
|
|
116
|
+
while (media_end > media_start && IS_WHITESPACE(*(media_end - 1))) {
|
|
117
|
+
media_end--;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Skip semicolon
|
|
122
|
+
if (p < end && *p == ';') p++;
|
|
123
|
+
|
|
124
|
+
const char *import_end = p;
|
|
125
|
+
|
|
126
|
+
// Build result hash
|
|
127
|
+
VALUE import_hash = rb_hash_new();
|
|
128
|
+
|
|
129
|
+
// Extract URL
|
|
130
|
+
VALUE url = rb_str_new(url_start, url_end - url_start);
|
|
131
|
+
rb_hash_aset(import_hash, ID2SYM(rb_intern("url")), url);
|
|
132
|
+
|
|
133
|
+
// Extract media query (or nil)
|
|
134
|
+
VALUE media = Qnil;
|
|
135
|
+
if (media_start && media_end > media_start) {
|
|
136
|
+
media = rb_str_new(media_start, media_end - media_start);
|
|
137
|
+
}
|
|
138
|
+
rb_hash_aset(import_hash, ID2SYM(rb_intern("media")), media);
|
|
139
|
+
|
|
140
|
+
// Extract full match
|
|
141
|
+
VALUE full_match = rb_str_new(import_start, import_end - import_start);
|
|
142
|
+
rb_hash_aset(import_hash, ID2SYM(rb_intern("full_match")), full_match);
|
|
143
|
+
|
|
144
|
+
rb_ary_push(imports, import_hash);
|
|
145
|
+
|
|
146
|
+
RB_GC_GUARD(url);
|
|
147
|
+
RB_GC_GUARD(media);
|
|
148
|
+
RB_GC_GUARD(full_match);
|
|
149
|
+
RB_GC_GUARD(import_hash);
|
|
150
|
+
} else {
|
|
151
|
+
// Not an @import, skip to next line or rule
|
|
152
|
+
// Once we hit a non-@import rule (except @charset), stop looking
|
|
153
|
+
// Per CSS spec, @import must be at the top
|
|
154
|
+
|
|
155
|
+
// Skip @charset if present
|
|
156
|
+
if (p + 8 <= end && strncasecmp(p, "@charset", 8) == 0) {
|
|
157
|
+
// Skip to semicolon
|
|
158
|
+
while (p < end && *p != ';') p++;
|
|
159
|
+
if (p < end) p++; // Skip semicolon
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// If we hit any other content, stop scanning for imports
|
|
164
|
+
if (p < end && !IS_WHITESPACE(*p)) {
|
|
165
|
+
break;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
p++;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
RB_GC_GUARD(imports);
|
|
173
|
+
return imports;
|
|
174
|
+
}
|