cataract 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-tidy +30 -0
- data/.github/workflows/ci-macos.yml +12 -0
- data/.github/workflows/ci.yml +77 -0
- data/.github/workflows/test.yml +76 -0
- data/.gitignore +45 -0
- data/.overcommit.yml +38 -0
- data/.rubocop.yml +83 -0
- data/BENCHMARKS.md +201 -0
- data/CHANGELOG.md +1 -0
- data/Gemfile +27 -0
- data/LICENSE +21 -0
- data/RAGEL_MIGRATION.md +60 -0
- data/README.md +292 -0
- data/Rakefile +209 -0
- data/benchmarks/benchmark_harness.rb +193 -0
- data/benchmarks/benchmark_merging.rb +121 -0
- data/benchmarks/benchmark_optimization_comparison.rb +168 -0
- data/benchmarks/benchmark_parsing.rb +153 -0
- data/benchmarks/benchmark_ragel_removal.rb +56 -0
- data/benchmarks/benchmark_runner.rb +70 -0
- data/benchmarks/benchmark_serialization.rb +180 -0
- data/benchmarks/benchmark_shorthand.rb +109 -0
- data/benchmarks/benchmark_shorthand_expansion.rb +176 -0
- data/benchmarks/benchmark_specificity.rb +124 -0
- data/benchmarks/benchmark_string_allocation.rb +151 -0
- data/benchmarks/benchmark_stylesheet_to_s.rb +62 -0
- data/benchmarks/benchmark_to_s_cached.rb +55 -0
- data/benchmarks/benchmark_value_splitter.rb +54 -0
- data/benchmarks/benchmark_yjit.rb +158 -0
- data/benchmarks/benchmark_yjit_workers.rb +61 -0
- data/benchmarks/profile_to_s.rb +23 -0
- data/benchmarks/speedup_calculator.rb +83 -0
- data/benchmarks/system_metadata.rb +81 -0
- data/benchmarks/templates/benchmarks.md.erb +221 -0
- data/benchmarks/yjit_tests.rb +141 -0
- data/cataract.gemspec +34 -0
- data/cliff.toml +92 -0
- data/examples/color_conversion_visual_test/color_conversion_test.html +3603 -0
- data/examples/color_conversion_visual_test/generate.rb +202 -0
- data/examples/color_conversion_visual_test/template.html.erb +259 -0
- data/examples/css_analyzer/analyzer.rb +164 -0
- data/examples/css_analyzer/analyzers/base.rb +33 -0
- data/examples/css_analyzer/analyzers/colors.rb +133 -0
- data/examples/css_analyzer/analyzers/important.rb +88 -0
- data/examples/css_analyzer/analyzers/properties.rb +61 -0
- data/examples/css_analyzer/analyzers/specificity.rb +68 -0
- data/examples/css_analyzer/templates/report.html.erb +575 -0
- data/examples/css_analyzer.rb +69 -0
- data/examples/github_analysis.html +5343 -0
- data/ext/cataract/cataract.c +1086 -0
- data/ext/cataract/cataract.h +174 -0
- data/ext/cataract/css_parser.c +1435 -0
- data/ext/cataract/extconf.rb +48 -0
- data/ext/cataract/import_scanner.c +174 -0
- data/ext/cataract/merge.c +973 -0
- data/ext/cataract/shorthand_expander.c +902 -0
- data/ext/cataract/specificity.c +213 -0
- data/ext/cataract/value_splitter.c +116 -0
- data/ext/cataract_color/cataract_color.c +16 -0
- data/ext/cataract_color/color_conversion.c +1687 -0
- data/ext/cataract_color/color_conversion.h +136 -0
- data/ext/cataract_color/color_conversion_lab.c +571 -0
- data/ext/cataract_color/color_conversion_named.c +259 -0
- data/ext/cataract_color/color_conversion_oklab.c +547 -0
- data/ext/cataract_color/extconf.rb +23 -0
- data/ext/cataract_old/cataract.c +393 -0
- data/ext/cataract_old/cataract.h +250 -0
- data/ext/cataract_old/css_parser.c +933 -0
- data/ext/cataract_old/extconf.rb +67 -0
- data/ext/cataract_old/import_scanner.c +174 -0
- data/ext/cataract_old/merge.c +776 -0
- data/ext/cataract_old/shorthand_expander.c +902 -0
- data/ext/cataract_old/specificity.c +213 -0
- data/ext/cataract_old/stylesheet.c +290 -0
- data/ext/cataract_old/value_splitter.c +116 -0
- data/lib/cataract/at_rule.rb +97 -0
- data/lib/cataract/color_conversion.rb +18 -0
- data/lib/cataract/declarations.rb +332 -0
- data/lib/cataract/import_resolver.rb +210 -0
- data/lib/cataract/rule.rb +131 -0
- data/lib/cataract/stylesheet.rb +716 -0
- data/lib/cataract/stylesheet_scope.rb +257 -0
- data/lib/cataract/version.rb +5 -0
- data/lib/cataract.rb +107 -0
- data/lib/tasks/gem.rake +158 -0
- data/scripts/fuzzer/run.rb +828 -0
- data/scripts/fuzzer/worker.rb +99 -0
- data/scripts/generate_benchmarks_md.rb +155 -0
- metadata +135 -0
data/README.md
ADDED
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
# Cataract
|
|
2
|
+
|
|
3
|
+
A performant CSS parser for accurate parsing of complex CSS structures.
|
|
4
|
+
|
|
5
|
+
[](https://codecov.io/github/jamescook/cataract)
|
|
6
|
+
|
|
7
|
+
**[API Documentation](https://jamescook.github.io/cataract/)**
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- **C Extension**: Performance-focused C implementation for parsing and serialization
|
|
12
|
+
- **CSS2 Support**: Selectors, combinators, pseudo-classes, pseudo-elements, @media queries
|
|
13
|
+
- **CSS3 Support**: Attribute selectors (`^=`, `$=`, `*=`)
|
|
14
|
+
- **CSS Color Level 4**: Supports hex, rgb, hsl, hwb, oklab, oklch, and named colors with high precision
|
|
15
|
+
- **Specificity Calculation**: Automatic CSS specificity computation
|
|
16
|
+
- **Media Query Filtering**: Query rules by media type
|
|
17
|
+
- **Zero Runtime Dependencies**: Pure C extension with no runtime gem dependencies
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
Add this line to your Gemfile:
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
gem 'cataract'
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Or install directly:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
gem install cataract
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Requirements
|
|
34
|
+
|
|
35
|
+
- Ruby >= 3.1.0
|
|
36
|
+
|
|
37
|
+
## Usage
|
|
38
|
+
|
|
39
|
+
### Basic Parsing
|
|
40
|
+
|
|
41
|
+
```ruby
|
|
42
|
+
require 'cataract'
|
|
43
|
+
|
|
44
|
+
# Parse CSS
|
|
45
|
+
sheet = Cataract::Stylesheet.parse(<<~CSS)
|
|
46
|
+
body { margin: 0; padding: 0 }
|
|
47
|
+
|
|
48
|
+
@media screen and (min-width: 768px) {
|
|
49
|
+
.container { width: 750px }
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
div.header > h1:hover { color: blue }
|
|
53
|
+
CSS
|
|
54
|
+
|
|
55
|
+
# Get all selectors
|
|
56
|
+
sheet.selectors
|
|
57
|
+
# => ["body", ".container", "div.header > h1:hover"]
|
|
58
|
+
|
|
59
|
+
# Get all rules
|
|
60
|
+
sheet.rules.each do |rule|
|
|
61
|
+
puts "#{rule.selector}: #{rule.declarations.length} declarations"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Access specific rule
|
|
65
|
+
body_rule = sheet.rules.first
|
|
66
|
+
body_rule.selector # => "body"
|
|
67
|
+
body_rule.specificity # => 1
|
|
68
|
+
body_rule.declarations # => [#<Declaration property="margin" value="0">, ...]
|
|
69
|
+
|
|
70
|
+
# Count rules
|
|
71
|
+
sheet.rules_count
|
|
72
|
+
# => 3
|
|
73
|
+
|
|
74
|
+
# Serialize back to CSS
|
|
75
|
+
sheet.to_s
|
|
76
|
+
# => "body { margin: 0; padding: 0; } @media screen and (min-width: 768px) { .container { width: 750px; } } ..."
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Advanced Filtering with Enumerable
|
|
80
|
+
|
|
81
|
+
`Cataracy::Stylesheet` implements `Enumerable`, providing standard Ruby collection methods plus chainable scopes:
|
|
82
|
+
|
|
83
|
+
```ruby
|
|
84
|
+
sheet = Cataract::Stylesheet.parse(css)
|
|
85
|
+
|
|
86
|
+
# Basic Enumerable methods work
|
|
87
|
+
sheet.map(&:selector) # => ["body", ".container", "div.header > h1:hover"]
|
|
88
|
+
sheet.select(&:selector?).count # => Count only selector-based rules (excludes @keyframes, etc.)
|
|
89
|
+
sheet.find { |r| r.selector == 'body' } # => First rule matching selector
|
|
90
|
+
|
|
91
|
+
# Filter to selector-based rules only (excludes at-rules like @keyframes, @font-face)
|
|
92
|
+
sheet.select(&:selector?).each do |rule|
|
|
93
|
+
puts "#{rule.selector}: specificity #{rule.specificity}"
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Filter by media query (returns chainable scope)
|
|
97
|
+
sheet.with_media(:print).each do |rule|
|
|
98
|
+
puts "Print rule: #{rule.selector}"
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Filter by selector (returns chainable scope)
|
|
102
|
+
sheet.with_selector('body').each do |rule|
|
|
103
|
+
puts "Body rule has #{rule.declarations.length} declarations"
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Filter by specificity (returns chainable scope)
|
|
107
|
+
sheet.with_specificity(100..).each do |rule|
|
|
108
|
+
puts "High specificity: #{rule.selector} (#{rule.specificity})"
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Chain filters together
|
|
112
|
+
sheet.with_media(:screen)
|
|
113
|
+
.with_specificity(50..200)
|
|
114
|
+
.select(&:selector?)
|
|
115
|
+
.map(&:selector)
|
|
116
|
+
# => ["#header .nav", ".sidebar > ul li"]
|
|
117
|
+
|
|
118
|
+
# Find all rules with a specific property
|
|
119
|
+
sheet.select(&:selector?).select do |rule|
|
|
120
|
+
rule.declarations.any? { |d| d.property == 'color' }
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Find high-specificity selectors (potential refactoring targets)
|
|
124
|
+
sheet.with_specificity(100..).select(&:selector?).each do |rule|
|
|
125
|
+
puts "Refactor candidate: #{rule.selector} (specificity: #{rule.specificity})"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Find positioned elements in screen media
|
|
129
|
+
sheet.with_media(:screen).select do |rule|
|
|
130
|
+
rule.selector? && rule.declarations.any? do |d|
|
|
131
|
+
d.property == 'position' && d.value == 'relative'
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Terminal operations force evaluation
|
|
136
|
+
sheet.with_media(:print).to_a # => Array of rules
|
|
137
|
+
sheet.with_selector('.header').size # => 3
|
|
138
|
+
sheet.with_specificity(10..50).empty? # => false
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
See [BENCHMARKS.md](BENCHMARKS.md) for detailed performance comparisons.
|
|
142
|
+
|
|
143
|
+
## CSS Support
|
|
144
|
+
|
|
145
|
+
Cataract aims to support all CSS specifications including:
|
|
146
|
+
- **Selectors**: All CSS2/CSS3 selectors (type, class, ID, attribute, pseudo-classes, pseudo-elements, combinators)
|
|
147
|
+
- **At-rules**: `@media`, `@font-face`, `@keyframes`, `@supports`, `@page`, `@layer`, `@container`, `@property`, `@scope`, `@counter-style`
|
|
148
|
+
- **Media Queries**: Full support including nested queries and media features
|
|
149
|
+
- **Special syntax**: Data URIs, `calc()`, `url()`, CSS functions with parentheses
|
|
150
|
+
- **!important**: Full support with correct cascade behavior
|
|
151
|
+
|
|
152
|
+
### Color Conversion
|
|
153
|
+
|
|
154
|
+
Cataract supports converting colors between multiple CSS color formats with high precision.
|
|
155
|
+
|
|
156
|
+
**Note:** Color conversion is an optional extension. Load it explicitly to reduce memory footprint:
|
|
157
|
+
|
|
158
|
+
```ruby
|
|
159
|
+
require 'cataract'
|
|
160
|
+
require 'cataract/color_conversion'
|
|
161
|
+
|
|
162
|
+
# Convert hex to RGB
|
|
163
|
+
sheet = Cataract::Stylesheet.parse('.button { color: #ff0000; background: #00ff00; }')
|
|
164
|
+
sheet.convert_colors!(from: :hex, to: :rgb)
|
|
165
|
+
sheet.to_s
|
|
166
|
+
# => ".button { color: rgb(255 0 0); background: rgb(0 255 0); }"
|
|
167
|
+
|
|
168
|
+
# Convert RGB to HSL for easier color manipulation
|
|
169
|
+
sheet = Cataract::Stylesheet.parse('.card { color: rgb(255, 128, 0); }')
|
|
170
|
+
sheet.convert_colors!(from: :rgb, to: :hsl)
|
|
171
|
+
sheet.to_s
|
|
172
|
+
# => ".card { color: hsl(30, 100%, 50%); }"
|
|
173
|
+
|
|
174
|
+
# Convert to Oklab for perceptually uniform colors
|
|
175
|
+
sheet = Cataract::Stylesheet.parse('.gradient { background: linear-gradient(#ff0000, #0000ff); }')
|
|
176
|
+
sheet.convert_colors!(to: :oklab)
|
|
177
|
+
sheet.to_s
|
|
178
|
+
# => ".gradient { background: linear-gradient(oklab(0.6280 0.2249 0.1258), oklab(0.4520 -0.0325 -0.3115)); }"
|
|
179
|
+
|
|
180
|
+
# Auto-detect source format and convert all colors
|
|
181
|
+
sheet = Cataract::Stylesheet.parse(<<~CSS)
|
|
182
|
+
.mixed {
|
|
183
|
+
color: #ff0000;
|
|
184
|
+
background: rgb(0, 255, 0);
|
|
185
|
+
border-color: hsl(240, 100%, 50%);
|
|
186
|
+
}
|
|
187
|
+
CSS
|
|
188
|
+
sheet.convert_colors!(to: :hex) # Converts all formats to hex
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
#### Supported Color Formats
|
|
192
|
+
|
|
193
|
+
| Format | From | To | Alpha | Example | Notes |
|
|
194
|
+
|--------|------|-----|-------|---------|-------|
|
|
195
|
+
| **hex** | ✓ | ✓ | ✓ | `#ff0000`, `#f00`, `#ff000080` | 3, 6, or 8 digit hex |
|
|
196
|
+
| **rgb** | ✓ | ✓ | ✓ | `rgb(255 0 0)`, `rgb(255, 0, 0)` | Modern & legacy syntax |
|
|
197
|
+
| **hsl** | ✓ | ✓ | ✓ | `hsl(0, 100%, 50%)` | Hue, saturation, lightness |
|
|
198
|
+
| **hwb** | ✓ | ✓ | ✓ | `hwb(0 0% 0%)` | Hue, whiteness, blackness |
|
|
199
|
+
| **oklab** | ✓ | ✓ | ✓ | `oklab(0.628 0.225 0.126)` | Perceptually uniform color space |
|
|
200
|
+
| **oklch** | ✓ | ✓ | ✓ | `oklch(0.628 0.258 29.2)` | Cylindrical Oklab (LCh) |
|
|
201
|
+
| **lab** | ✓ | ✓ | ✓ | `lab(53.2% 80.1 67.2)` | CIE L\*a\*b\* color space (D50) |
|
|
202
|
+
| **lch** | ✓ | ✓ | ✓ | `lch(53.2% 104.5 40)` | Cylindrical Lab (polar coordinates) |
|
|
203
|
+
| **named** | ✓ | ✓ | – | `red`, `blue`, `rebeccapurple` | 147 CSS named colors |
|
|
204
|
+
| **color()** | – | – | – | `color(display-p3 1 0 0)` | Absolute color spaces (planned) |
|
|
205
|
+
|
|
206
|
+
**Format aliases:**
|
|
207
|
+
- `:rgba` → uses `rgb()` syntax with alpha
|
|
208
|
+
- `:hsla` → uses `hsl()` syntax with alpha
|
|
209
|
+
- `:hwba` → uses `hwb()` syntax with alpha
|
|
210
|
+
|
|
211
|
+
**Limitations:**
|
|
212
|
+
- Math functions (`calc()`, `min()`, `max()`, `clamp()`) are not evaluated and will be preserved unchanged
|
|
213
|
+
- CSS Color Level 5 features (`none`, `infinity`, relative color syntax with `from`) are preserved but not converted
|
|
214
|
+
- Unknown or future color functions are passed through unchanged
|
|
215
|
+
|
|
216
|
+
### `@import` Support
|
|
217
|
+
|
|
218
|
+
`@import` statements can be resolved with security controls:
|
|
219
|
+
|
|
220
|
+
```ruby
|
|
221
|
+
# Disabled by default
|
|
222
|
+
sheet = Cataract::Stylesheet.parse(css) # @import statements are ignored
|
|
223
|
+
|
|
224
|
+
# Enable with safe defaults (HTTPS only, .css files only, max depth 5)
|
|
225
|
+
sheet = Cataract::Stylesheet.parse(css, import: true)
|
|
226
|
+
|
|
227
|
+
# Custom options for full control
|
|
228
|
+
sheet = Cataract::Stylesheet.parse(css, import: {
|
|
229
|
+
allowed_schemes: ['https', 'file'], # Default: ['https']
|
|
230
|
+
extensions: ['css'], # Default: ['css']
|
|
231
|
+
max_depth: 3, # Default: 5
|
|
232
|
+
timeout: 10, # Default: 10 seconds
|
|
233
|
+
follow_redirects: true # Default: true
|
|
234
|
+
})
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
**Security note**: Import resolution includes protections against:
|
|
238
|
+
- Unauthorized schemes (file://, data://, etc.)
|
|
239
|
+
- Non-CSS file extensions
|
|
240
|
+
- Circular references
|
|
241
|
+
- Excessive nesting depth
|
|
242
|
+
|
|
243
|
+
## Development
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
# Install dependencies
|
|
247
|
+
bundle install
|
|
248
|
+
|
|
249
|
+
# Compile the C extension
|
|
250
|
+
rake compile
|
|
251
|
+
|
|
252
|
+
# Run tests
|
|
253
|
+
rake test
|
|
254
|
+
|
|
255
|
+
# Run benchmarks
|
|
256
|
+
rake benchmark
|
|
257
|
+
|
|
258
|
+
# Run fuzzer to test parser robustness
|
|
259
|
+
rake fuzz # 10,000 iterations (default)
|
|
260
|
+
rake fuzz ITERATIONS=100000 # Custom iteration count
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
**Fuzzer**: Generates random CSS input to test parser robustness against malformed or edge-case CSS. Helps catch crashes, memory leaks, and parsing edge cases.
|
|
264
|
+
|
|
265
|
+
## How It Works
|
|
266
|
+
|
|
267
|
+
Cataract uses a high-performance C implementation for CSS parsing and serialization.
|
|
268
|
+
|
|
269
|
+
Each `Rule` is a struct containing:
|
|
270
|
+
- `id`: Integer ID (position in rules array)
|
|
271
|
+
- `selector`: The CSS selector string
|
|
272
|
+
- `declarations`: Array of `Declaration` structs (property, value, important flag)
|
|
273
|
+
- `specificity`: Calculated CSS specificity (cached)
|
|
274
|
+
|
|
275
|
+
Implementation details:
|
|
276
|
+
- **C implementation**: Critical paths implemented in C (parsing, merging, serialization)
|
|
277
|
+
- **Flat rule array**: All rules stored in a single array, preserving source order
|
|
278
|
+
- **Efficient media query handling**: O(1) lookup via internal media index
|
|
279
|
+
- **Memory efficient**: Minimal allocations, reuses string buffers where possible
|
|
280
|
+
- **Comprehensive parsing**: Preserves complex CSS structures including nested media queries, nested selectors, data URIs, CSS functions (calc(), var(), etc.)
|
|
281
|
+
|
|
282
|
+
## Development Notes
|
|
283
|
+
|
|
284
|
+
Significant portions of this codebase were generated with assistance from [Claude Code](https://claude.com/claude-code), including the benchmark infrastructure, test suite, and documentation generation system.
|
|
285
|
+
|
|
286
|
+
## License
|
|
287
|
+
|
|
288
|
+
MIT
|
|
289
|
+
|
|
290
|
+
## Contributing
|
|
291
|
+
|
|
292
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/jamescook/cataract.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'bundler/gem_tasks'
|
|
4
|
+
require 'rake/testtask'
|
|
5
|
+
require 'rake/clean'
|
|
6
|
+
|
|
7
|
+
# Load tasks from lib/tasks/
|
|
8
|
+
Dir.glob('lib/tasks/**/*.rake').each { |r| load r }
|
|
9
|
+
|
|
10
|
+
# Only load extension task if rake-compiler is available
|
|
11
|
+
begin
|
|
12
|
+
require 'rake/extensiontask'
|
|
13
|
+
|
|
14
|
+
# Configure the main extension
|
|
15
|
+
Rake::ExtensionTask.new('cataract') do |ext|
|
|
16
|
+
ext.lib_dir = 'lib/cataract'
|
|
17
|
+
ext.ext_dir = 'ext/cataract'
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Configure the color conversion extension (optional, loaded on-demand)
|
|
21
|
+
Rake::ExtensionTask.new('cataract_color') do |ext|
|
|
22
|
+
ext.lib_dir = 'lib/cataract'
|
|
23
|
+
ext.ext_dir = 'ext/cataract_color'
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Configure CLEAN to run before compilation
|
|
28
|
+
# rake-compiler already adds: tmp/, lib/**/*.{so,bundle}, etc.
|
|
29
|
+
# All C files are now hand-written (Ragel removed), so only clean build artifacts
|
|
30
|
+
CLEAN.include('ext/**/Makefile', 'ext/**/*.o')
|
|
31
|
+
|
|
32
|
+
Rake::TestTask.new(:test) do |t|
|
|
33
|
+
t.libs << 'test'
|
|
34
|
+
t.libs << 'lib'
|
|
35
|
+
# Load test_helper before running tests (handles SimpleCov setup)
|
|
36
|
+
t.ruby_opts << '-rtest_helper'
|
|
37
|
+
# Exclude css_parser_compat directory (reference tests only, not run)
|
|
38
|
+
t.test_files = FileList['test/**/test_*.rb'].exclude('test/css_parser_compat/**/*')
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
desc 'Run all benchmarks'
|
|
42
|
+
task :benchmark do
|
|
43
|
+
Rake::Task[:compile].invoke
|
|
44
|
+
Rake::Task['benchmark:parsing'].invoke
|
|
45
|
+
Rake::Task['benchmark:serialization'].invoke
|
|
46
|
+
Rake::Task['benchmark:specificity'].invoke
|
|
47
|
+
Rake::Task['benchmark:merging'].invoke
|
|
48
|
+
Rake::Task['benchmark:yjit'].invoke
|
|
49
|
+
puts "\n#{'-' * 80}"
|
|
50
|
+
puts 'All benchmarks complete!'
|
|
51
|
+
puts 'Generate documentation with: rake benchmark:generate_docs'
|
|
52
|
+
puts '-' * 80
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
namespace :benchmark do
|
|
56
|
+
desc 'Benchmark CSS parsing performance'
|
|
57
|
+
task :parsing do
|
|
58
|
+
puts 'Running parsing benchmark...'
|
|
59
|
+
ruby 'benchmarks/benchmark_parsing.rb'
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
desc 'Benchmark CSS serialization (to_s) performance'
|
|
63
|
+
task :serialization do
|
|
64
|
+
puts 'Running serialization benchmark...'
|
|
65
|
+
ruby 'benchmarks/benchmark_serialization.rb'
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
desc 'Benchmark specificity calculation performance'
|
|
69
|
+
task :specificity do
|
|
70
|
+
puts 'Running specificity benchmark...'
|
|
71
|
+
ruby 'benchmarks/benchmark_specificity.rb'
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
desc 'Benchmark CSS merging performance'
|
|
75
|
+
task :merging do
|
|
76
|
+
puts 'Running merging benchmark...'
|
|
77
|
+
ruby 'benchmarks/benchmark_merging.rb'
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
desc 'Benchmark Ruby-side operations with YJIT on vs off'
|
|
81
|
+
task :yjit do
|
|
82
|
+
puts 'Running YJIT benchmark...'
|
|
83
|
+
ruby 'benchmarks/benchmark_yjit.rb'
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
desc 'Benchmark string allocation optimization (buffer vs dynamic)'
|
|
87
|
+
task :string_allocation do
|
|
88
|
+
# Clean up any existing benchmark results
|
|
89
|
+
results_dir = 'benchmarks/.benchmark_results'
|
|
90
|
+
if Dir.exist?(results_dir)
|
|
91
|
+
Dir.glob(File.join(results_dir, 'string_allocation_*.json')).each do |file|
|
|
92
|
+
puts "Removing old benchmark results: #{file}"
|
|
93
|
+
FileUtils.rm_f(file)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
puts "\n#{'=' * 80}"
|
|
98
|
+
puts 'Compiling with DYNAMIC allocation (rb_str_new_cstr)'
|
|
99
|
+
puts '=' * 80
|
|
100
|
+
system({ 'CONFIGURE_ARGS' => '--disable-str-buf-optimization' }, 'rake', 'compile')
|
|
101
|
+
system({}, RbConfig.ruby, 'benchmarks/benchmark_string_allocation.rb')
|
|
102
|
+
|
|
103
|
+
puts "\n\n#{'=' * 80}"
|
|
104
|
+
puts 'Compiling with BUFFER allocation (rb_str_buf_new, production default)'
|
|
105
|
+
puts '=' * 80
|
|
106
|
+
system({}, 'rake', 'compile')
|
|
107
|
+
system({}, RbConfig.ruby, 'benchmarks/benchmark_string_allocation.rb')
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
desc 'Generate BENCHMARKS.md from benchmark results'
|
|
111
|
+
task :generate_docs do
|
|
112
|
+
ruby 'scripts/generate_benchmarks_md.rb'
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
task compile: :clean
|
|
117
|
+
|
|
118
|
+
task default: :test
|
|
119
|
+
|
|
120
|
+
# Lint task - runs clang-tidy on C code
|
|
121
|
+
desc 'Run clang-tidy on C code'
|
|
122
|
+
task :lint do
|
|
123
|
+
# Find clang-tidy binary
|
|
124
|
+
clang_tidy = nil
|
|
125
|
+
|
|
126
|
+
# Try system PATH first (Linux, or if user has llvm in PATH)
|
|
127
|
+
if system('which clang-tidy > /dev/null 2>&1')
|
|
128
|
+
clang_tidy = 'clang-tidy'
|
|
129
|
+
# On macOS, check Homebrew LLVM (keg-only, not in PATH by default)
|
|
130
|
+
elsif system('which brew > /dev/null 2>&1')
|
|
131
|
+
llvm_prefix = `brew --prefix llvm 2>/dev/null`.strip
|
|
132
|
+
clang_tidy = "#{llvm_prefix}/bin/clang-tidy" if !llvm_prefix.empty? && File.exist?("#{llvm_prefix}/bin/clang-tidy")
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
unless clang_tidy
|
|
136
|
+
abort("clang-tidy not installed.\n " \
|
|
137
|
+
"macOS: brew install llvm\n " \
|
|
138
|
+
"Ubuntu/Debian: apt-get install clang-tidy\n " \
|
|
139
|
+
'Fedora/RHEL: dnf install clang-tools-extra')
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
puts 'Running clang-tidy on C code...'
|
|
143
|
+
|
|
144
|
+
# Find all .c files in ext/cataract/ and ext/cataract_color/
|
|
145
|
+
c_files = Dir.glob('ext/cataract/*.c') + Dir.glob('ext/cataract_color/*.c')
|
|
146
|
+
|
|
147
|
+
# Run clang-tidy on each file
|
|
148
|
+
# Note: clang-tidy uses the .clang-tidy config file automatically
|
|
149
|
+
# We pass Ruby include path so it can find ruby.h
|
|
150
|
+
ruby_include = RbConfig::CONFIG['rubyhdrdir']
|
|
151
|
+
ruby_arch_include = RbConfig::CONFIG['rubyarchhdrdir']
|
|
152
|
+
|
|
153
|
+
success = c_files.all? do |file|
|
|
154
|
+
puts " Checking #{file}..."
|
|
155
|
+
system(clang_tidy, '--quiet', file, '--',
|
|
156
|
+
"-I#{ruby_include}",
|
|
157
|
+
"-I#{ruby_arch_include}",
|
|
158
|
+
'-Iext/cataract',
|
|
159
|
+
'-Iext/cataract_color')
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
if success
|
|
163
|
+
puts '✓ clang-tidy passed'
|
|
164
|
+
else
|
|
165
|
+
abort('clang-tidy found issues!')
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Fuzz testing
|
|
170
|
+
desc 'Run fuzzer to test parser robustness (including color conversion)'
|
|
171
|
+
task fuzz: :compile do
|
|
172
|
+
iterations = ENV['ITERATIONS'] || '10000'
|
|
173
|
+
puts "Running CSS parser fuzzer (#{iterations} iterations)..."
|
|
174
|
+
# Use system with ENV.to_h to preserve environment variables like FUZZ_GC_STRESS
|
|
175
|
+
system(ENV.to_h, RbConfig.ruby, '-Ilib', 'scripts/fuzzer/run.rb', iterations)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Documentation generation with YARD
|
|
179
|
+
begin
|
|
180
|
+
require 'yard'
|
|
181
|
+
|
|
182
|
+
desc 'Generate example CSS analysis for documentation'
|
|
183
|
+
task :generate_example do
|
|
184
|
+
puts 'Generating GitHub CSS analysis example...'
|
|
185
|
+
# Generate with file. prefix for YARD compatibility
|
|
186
|
+
system('ruby examples/css_analyzer.rb https://github.com -o docs/file.github_analysis.html')
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
desc 'Generate documentation and open in browser'
|
|
190
|
+
task docs: :generate_example do
|
|
191
|
+
# Generate YARD documentation
|
|
192
|
+
YARD::CLI::Yardoc.run('--output-dir', 'docs', '--readme', 'README.md',
|
|
193
|
+
'--title', 'Cataract - Fast CSS Parser',
|
|
194
|
+
'lib/**/*.rb', 'ext/**/*.c', '-', 'docs/files/EXAMPLE.md')
|
|
195
|
+
|
|
196
|
+
# Open in browser (skip in CI)
|
|
197
|
+
unless ENV['CI']
|
|
198
|
+
system('open docs/index.html') if RUBY_PLATFORM.include?('darwin')
|
|
199
|
+
system('xdg-open docs/index.html') if RUBY_PLATFORM.include?('linux')
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
desc 'List undocumented code'
|
|
204
|
+
task :undoc do
|
|
205
|
+
system('yard stats --list-undoc')
|
|
206
|
+
end
|
|
207
|
+
rescue LoadError
|
|
208
|
+
# YARD not available - skip doc tasks
|
|
209
|
+
end
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'benchmark/ips'
|
|
4
|
+
require 'json'
|
|
5
|
+
require 'fileutils'
|
|
6
|
+
require_relative 'system_metadata'
|
|
7
|
+
require_relative 'speedup_calculator'
|
|
8
|
+
|
|
9
|
+
# Base class for all benchmarks. Provides structure and automatic JSON output.
|
|
10
|
+
#
|
|
11
|
+
# Usage:
|
|
12
|
+
# class MyBenchmark < BenchmarkHarness
|
|
13
|
+
# def self.benchmark_name
|
|
14
|
+
# 'my_benchmark'
|
|
15
|
+
# end
|
|
16
|
+
#
|
|
17
|
+
# def self.description
|
|
18
|
+
# 'What this benchmark measures'
|
|
19
|
+
# end
|
|
20
|
+
#
|
|
21
|
+
# def self.metadata
|
|
22
|
+
# { 'key' => 'value' } # Optional metadata for docs
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
# def self.sanity_checks
|
|
26
|
+
# # Optional: verify code works before benchmarking
|
|
27
|
+
# raise "Sanity check failed!" unless something_works
|
|
28
|
+
# end
|
|
29
|
+
#
|
|
30
|
+
# def self.call
|
|
31
|
+
# run_test_case_1
|
|
32
|
+
# run_test_case_2
|
|
33
|
+
# end
|
|
34
|
+
#
|
|
35
|
+
# private
|
|
36
|
+
#
|
|
37
|
+
# def self.run_test_case_1
|
|
38
|
+
# benchmark('test_case_1') do |x|
|
|
39
|
+
# x.config(time: 5, warmup: 2)
|
|
40
|
+
# x.report('label') { ... }
|
|
41
|
+
# x.compare!
|
|
42
|
+
# end
|
|
43
|
+
# end
|
|
44
|
+
# end
|
|
45
|
+
class BenchmarkHarness
|
|
46
|
+
RESULTS_DIR = File.expand_path('.results', __dir__)
|
|
47
|
+
|
|
48
|
+
class << self
|
|
49
|
+
# Abstract methods - must be implemented by subclasses
|
|
50
|
+
def benchmark_name
|
|
51
|
+
raise NotImplementedError, "#{self} must implement .benchmark_name"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def description
|
|
55
|
+
raise NotImplementedError, "#{self} must implement .description"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def metadata
|
|
59
|
+
{} # Optional, can be overridden
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def sanity_checks
|
|
63
|
+
# Optional, can be overridden
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def call
|
|
67
|
+
raise NotImplementedError, "#{self} must implement .call"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Optional: Define how to calculate speedups for this benchmark
|
|
71
|
+
# Override this to customize speedup calculation
|
|
72
|
+
#
|
|
73
|
+
# IMPORTANT: Result names must follow convention "tool_name: test_case_id"
|
|
74
|
+
#
|
|
75
|
+
# @return [Hash] Configuration for SpeedupCalculator
|
|
76
|
+
# {
|
|
77
|
+
# baseline_matcher: Proc, # Returns true for baseline results
|
|
78
|
+
# comparison_matcher: Proc, # Returns true for comparison results
|
|
79
|
+
# test_case_key: Symbol # Key in test_cases metadata matching test_case_id
|
|
80
|
+
# }
|
|
81
|
+
def speedup_config
|
|
82
|
+
# Default: compare css_parser (baseline) vs cataract (comparison)
|
|
83
|
+
# Match to test_cases by 'fixture' key
|
|
84
|
+
{
|
|
85
|
+
baseline_matcher: SpeedupCalculator::Matchers.css_parser,
|
|
86
|
+
comparison_matcher: SpeedupCalculator::Matchers.cataract,
|
|
87
|
+
test_case_key: :fixture
|
|
88
|
+
}
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Main entry point - handles setup, execution, and cleanup
|
|
92
|
+
def run
|
|
93
|
+
instance = new
|
|
94
|
+
setup
|
|
95
|
+
instance.sanity_checks if instance.respond_to?(:sanity_checks, true)
|
|
96
|
+
instance.call
|
|
97
|
+
finalize(instance)
|
|
98
|
+
rescue StandardError => e
|
|
99
|
+
puts "❌ Benchmark failed: #{e.message}"
|
|
100
|
+
puts e.backtrace.first(5).join("\n")
|
|
101
|
+
exit 1
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
private
|
|
105
|
+
|
|
106
|
+
def setup
|
|
107
|
+
FileUtils.mkdir_p(RESULTS_DIR)
|
|
108
|
+
|
|
109
|
+
# Collect system metadata once per run
|
|
110
|
+
unless File.exist?(File.join(RESULTS_DIR, 'metadata.json'))
|
|
111
|
+
SystemMetadata.collect
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Print header
|
|
115
|
+
puts "\n\n"
|
|
116
|
+
puts '=' * 80
|
|
117
|
+
puts "#{benchmark_name.upcase.tr('_', ' ')} BENCHMARK"
|
|
118
|
+
puts "Measures: #{description}"
|
|
119
|
+
puts '=' * 80
|
|
120
|
+
puts
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def finalize(instance)
|
|
124
|
+
# Combine all JSON files for this benchmark into one
|
|
125
|
+
return unless instance.instance_variable_defined?(:@json_files) && instance.instance_variable_get(:@json_files)&.any?
|
|
126
|
+
|
|
127
|
+
json_files = instance.instance_variable_get(:@json_files)
|
|
128
|
+
|
|
129
|
+
combined_data = {
|
|
130
|
+
'name' => benchmark_name,
|
|
131
|
+
'description' => description,
|
|
132
|
+
'metadata' => metadata,
|
|
133
|
+
'timestamp' => Time.now.iso8601,
|
|
134
|
+
'results' => []
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
# Read all the individual JSON files
|
|
138
|
+
json_files.each do |filename|
|
|
139
|
+
path = File.join(RESULTS_DIR, filename)
|
|
140
|
+
next unless File.exist?(path)
|
|
141
|
+
|
|
142
|
+
data = JSON.parse(File.read(path))
|
|
143
|
+
combined_data['results'].concat(data) if data.is_a?(Array)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Calculate speedups using configured strategy
|
|
147
|
+
config = speedup_config
|
|
148
|
+
if config
|
|
149
|
+
calculator = SpeedupCalculator.new(
|
|
150
|
+
results: combined_data['results'],
|
|
151
|
+
test_cases: combined_data['metadata']['test_cases'],
|
|
152
|
+
baseline_matcher: config[:baseline_matcher],
|
|
153
|
+
comparison_matcher: config[:comparison_matcher],
|
|
154
|
+
test_case_key: config[:test_case_key]
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
speedup_stats = calculator.calculate
|
|
158
|
+
combined_data['metadata']['speedups'] = speedup_stats if speedup_stats
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Write combined file
|
|
162
|
+
combined_path = File.join(RESULTS_DIR, "#{benchmark_name}.json")
|
|
163
|
+
File.write(combined_path, JSON.pretty_generate(combined_data))
|
|
164
|
+
|
|
165
|
+
# Clean up individual files
|
|
166
|
+
json_files.each do |filename|
|
|
167
|
+
File.delete(File.join(RESULTS_DIR, filename))
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
puts "\n✓ Results saved to #{combined_path}"
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Instance methods
|
|
175
|
+
protected
|
|
176
|
+
|
|
177
|
+
def benchmark(test_case_name)
|
|
178
|
+
json_filename = "#{self.class.benchmark_name}_#{test_case_name}.json"
|
|
179
|
+
json_path = File.join(RESULTS_DIR, json_filename)
|
|
180
|
+
|
|
181
|
+
Benchmark.ips do |x|
|
|
182
|
+
# Automatically enable JSON output
|
|
183
|
+
x.json!(json_path)
|
|
184
|
+
|
|
185
|
+
# Let the benchmark configure and run
|
|
186
|
+
yield x
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Track that we created this file
|
|
190
|
+
@json_files ||= []
|
|
191
|
+
@json_files << json_filename
|
|
192
|
+
end
|
|
193
|
+
end
|