cataract 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-tidy +30 -0
  3. data/.github/workflows/ci-macos.yml +12 -0
  4. data/.github/workflows/ci.yml +77 -0
  5. data/.github/workflows/test.yml +76 -0
  6. data/.gitignore +45 -0
  7. data/.overcommit.yml +38 -0
  8. data/.rubocop.yml +83 -0
  9. data/BENCHMARKS.md +201 -0
  10. data/CHANGELOG.md +1 -0
  11. data/Gemfile +27 -0
  12. data/LICENSE +21 -0
  13. data/RAGEL_MIGRATION.md +60 -0
  14. data/README.md +292 -0
  15. data/Rakefile +209 -0
  16. data/benchmarks/benchmark_harness.rb +193 -0
  17. data/benchmarks/benchmark_merging.rb +121 -0
  18. data/benchmarks/benchmark_optimization_comparison.rb +168 -0
  19. data/benchmarks/benchmark_parsing.rb +153 -0
  20. data/benchmarks/benchmark_ragel_removal.rb +56 -0
  21. data/benchmarks/benchmark_runner.rb +70 -0
  22. data/benchmarks/benchmark_serialization.rb +180 -0
  23. data/benchmarks/benchmark_shorthand.rb +109 -0
  24. data/benchmarks/benchmark_shorthand_expansion.rb +176 -0
  25. data/benchmarks/benchmark_specificity.rb +124 -0
  26. data/benchmarks/benchmark_string_allocation.rb +151 -0
  27. data/benchmarks/benchmark_stylesheet_to_s.rb +62 -0
  28. data/benchmarks/benchmark_to_s_cached.rb +55 -0
  29. data/benchmarks/benchmark_value_splitter.rb +54 -0
  30. data/benchmarks/benchmark_yjit.rb +158 -0
  31. data/benchmarks/benchmark_yjit_workers.rb +61 -0
  32. data/benchmarks/profile_to_s.rb +23 -0
  33. data/benchmarks/speedup_calculator.rb +83 -0
  34. data/benchmarks/system_metadata.rb +81 -0
  35. data/benchmarks/templates/benchmarks.md.erb +221 -0
  36. data/benchmarks/yjit_tests.rb +141 -0
  37. data/cataract.gemspec +34 -0
  38. data/cliff.toml +92 -0
  39. data/examples/color_conversion_visual_test/color_conversion_test.html +3603 -0
  40. data/examples/color_conversion_visual_test/generate.rb +202 -0
  41. data/examples/color_conversion_visual_test/template.html.erb +259 -0
  42. data/examples/css_analyzer/analyzer.rb +164 -0
  43. data/examples/css_analyzer/analyzers/base.rb +33 -0
  44. data/examples/css_analyzer/analyzers/colors.rb +133 -0
  45. data/examples/css_analyzer/analyzers/important.rb +88 -0
  46. data/examples/css_analyzer/analyzers/properties.rb +61 -0
  47. data/examples/css_analyzer/analyzers/specificity.rb +68 -0
  48. data/examples/css_analyzer/templates/report.html.erb +575 -0
  49. data/examples/css_analyzer.rb +69 -0
  50. data/examples/github_analysis.html +5343 -0
  51. data/ext/cataract/cataract.c +1086 -0
  52. data/ext/cataract/cataract.h +174 -0
  53. data/ext/cataract/css_parser.c +1435 -0
  54. data/ext/cataract/extconf.rb +48 -0
  55. data/ext/cataract/import_scanner.c +174 -0
  56. data/ext/cataract/merge.c +973 -0
  57. data/ext/cataract/shorthand_expander.c +902 -0
  58. data/ext/cataract/specificity.c +213 -0
  59. data/ext/cataract/value_splitter.c +116 -0
  60. data/ext/cataract_color/cataract_color.c +16 -0
  61. data/ext/cataract_color/color_conversion.c +1687 -0
  62. data/ext/cataract_color/color_conversion.h +136 -0
  63. data/ext/cataract_color/color_conversion_lab.c +571 -0
  64. data/ext/cataract_color/color_conversion_named.c +259 -0
  65. data/ext/cataract_color/color_conversion_oklab.c +547 -0
  66. data/ext/cataract_color/extconf.rb +23 -0
  67. data/ext/cataract_old/cataract.c +393 -0
  68. data/ext/cataract_old/cataract.h +250 -0
  69. data/ext/cataract_old/css_parser.c +933 -0
  70. data/ext/cataract_old/extconf.rb +67 -0
  71. data/ext/cataract_old/import_scanner.c +174 -0
  72. data/ext/cataract_old/merge.c +776 -0
  73. data/ext/cataract_old/shorthand_expander.c +902 -0
  74. data/ext/cataract_old/specificity.c +213 -0
  75. data/ext/cataract_old/stylesheet.c +290 -0
  76. data/ext/cataract_old/value_splitter.c +116 -0
  77. data/lib/cataract/at_rule.rb +97 -0
  78. data/lib/cataract/color_conversion.rb +18 -0
  79. data/lib/cataract/declarations.rb +332 -0
  80. data/lib/cataract/import_resolver.rb +210 -0
  81. data/lib/cataract/rule.rb +131 -0
  82. data/lib/cataract/stylesheet.rb +716 -0
  83. data/lib/cataract/stylesheet_scope.rb +257 -0
  84. data/lib/cataract/version.rb +5 -0
  85. data/lib/cataract.rb +107 -0
  86. data/lib/tasks/gem.rake +158 -0
  87. data/scripts/fuzzer/run.rb +828 -0
  88. data/scripts/fuzzer/worker.rb +99 -0
  89. data/scripts/generate_benchmarks_md.rb +155 -0
  90. metadata +135 -0
data/README.md ADDED
@@ -0,0 +1,292 @@
1
+ # Cataract
2
+
3
+ A performant CSS parser for accurate parsing of complex CSS structures.
4
+
5
+ [![codecov](https://codecov.io/github/jamescook/cataract/graph/badge.svg?token=1PTVV1QTV5)](https://codecov.io/github/jamescook/cataract)
6
+
7
+ **[API Documentation](https://jamescook.github.io/cataract/)**
8
+
9
+ ## Features
10
+
11
+ - **C Extension**: Performance-focused C implementation for parsing and serialization
12
+ - **CSS2 Support**: Selectors, combinators, pseudo-classes, pseudo-elements, @media queries
13
+ - **CSS3 Support**: Attribute selectors (`^=`, `$=`, `*=`)
14
+ - **CSS Color Level 4**: Supports hex, rgb, hsl, hwb, oklab, oklch, and named colors with high precision
15
+ - **Specificity Calculation**: Automatic CSS specificity computation
16
+ - **Media Query Filtering**: Query rules by media type
17
+ - **Zero Runtime Dependencies**: Pure C extension with no runtime gem dependencies
18
+
19
+ ## Installation
20
+
21
+ Add this line to your Gemfile:
22
+
23
+ ```ruby
24
+ gem 'cataract'
25
+ ```
26
+
27
+ Or install directly:
28
+
29
+ ```bash
30
+ gem install cataract
31
+ ```
32
+
33
+ ### Requirements
34
+
35
+ - Ruby >= 3.1.0
36
+
37
+ ## Usage
38
+
39
+ ### Basic Parsing
40
+
41
+ ```ruby
42
+ require 'cataract'
43
+
44
+ # Parse CSS
45
+ sheet = Cataract::Stylesheet.parse(<<~CSS)
46
+ body { margin: 0; padding: 0 }
47
+
48
+ @media screen and (min-width: 768px) {
49
+ .container { width: 750px }
50
+ }
51
+
52
+ div.header > h1:hover { color: blue }
53
+ CSS
54
+
55
+ # Get all selectors
56
+ sheet.selectors
57
+ # => ["body", ".container", "div.header > h1:hover"]
58
+
59
+ # Get all rules
60
+ sheet.rules.each do |rule|
61
+ puts "#{rule.selector}: #{rule.declarations.length} declarations"
62
+ end
63
+
64
+ # Access specific rule
65
+ body_rule = sheet.rules.first
66
+ body_rule.selector # => "body"
67
+ body_rule.specificity # => 1
68
+ body_rule.declarations # => [#<Declaration property="margin" value="0">, ...]
69
+
70
+ # Count rules
71
+ sheet.rules_count
72
+ # => 3
73
+
74
+ # Serialize back to CSS
75
+ sheet.to_s
76
+ # => "body { margin: 0; padding: 0; } @media screen and (min-width: 768px) { .container { width: 750px; } } ..."
77
+ ```
78
+
79
+ ### Advanced Filtering with Enumerable
80
+
81
+ `Cataracy::Stylesheet` implements `Enumerable`, providing standard Ruby collection methods plus chainable scopes:
82
+
83
+ ```ruby
84
+ sheet = Cataract::Stylesheet.parse(css)
85
+
86
+ # Basic Enumerable methods work
87
+ sheet.map(&:selector) # => ["body", ".container", "div.header > h1:hover"]
88
+ sheet.select(&:selector?).count # => Count only selector-based rules (excludes @keyframes, etc.)
89
+ sheet.find { |r| r.selector == 'body' } # => First rule matching selector
90
+
91
+ # Filter to selector-based rules only (excludes at-rules like @keyframes, @font-face)
92
+ sheet.select(&:selector?).each do |rule|
93
+ puts "#{rule.selector}: specificity #{rule.specificity}"
94
+ end
95
+
96
+ # Filter by media query (returns chainable scope)
97
+ sheet.with_media(:print).each do |rule|
98
+ puts "Print rule: #{rule.selector}"
99
+ end
100
+
101
+ # Filter by selector (returns chainable scope)
102
+ sheet.with_selector('body').each do |rule|
103
+ puts "Body rule has #{rule.declarations.length} declarations"
104
+ end
105
+
106
+ # Filter by specificity (returns chainable scope)
107
+ sheet.with_specificity(100..).each do |rule|
108
+ puts "High specificity: #{rule.selector} (#{rule.specificity})"
109
+ end
110
+
111
+ # Chain filters together
112
+ sheet.with_media(:screen)
113
+ .with_specificity(50..200)
114
+ .select(&:selector?)
115
+ .map(&:selector)
116
+ # => ["#header .nav", ".sidebar > ul li"]
117
+
118
+ # Find all rules with a specific property
119
+ sheet.select(&:selector?).select do |rule|
120
+ rule.declarations.any? { |d| d.property == 'color' }
121
+ end
122
+
123
+ # Find high-specificity selectors (potential refactoring targets)
124
+ sheet.with_specificity(100..).select(&:selector?).each do |rule|
125
+ puts "Refactor candidate: #{rule.selector} (specificity: #{rule.specificity})"
126
+ end
127
+
128
+ # Find positioned elements in screen media
129
+ sheet.with_media(:screen).select do |rule|
130
+ rule.selector? && rule.declarations.any? do |d|
131
+ d.property == 'position' && d.value == 'relative'
132
+ end
133
+ end
134
+
135
+ # Terminal operations force evaluation
136
+ sheet.with_media(:print).to_a # => Array of rules
137
+ sheet.with_selector('.header').size # => 3
138
+ sheet.with_specificity(10..50).empty? # => false
139
+ ```
140
+
141
+ See [BENCHMARKS.md](BENCHMARKS.md) for detailed performance comparisons.
142
+
143
+ ## CSS Support
144
+
145
+ Cataract aims to support all CSS specifications including:
146
+ - **Selectors**: All CSS2/CSS3 selectors (type, class, ID, attribute, pseudo-classes, pseudo-elements, combinators)
147
+ - **At-rules**: `@media`, `@font-face`, `@keyframes`, `@supports`, `@page`, `@layer`, `@container`, `@property`, `@scope`, `@counter-style`
148
+ - **Media Queries**: Full support including nested queries and media features
149
+ - **Special syntax**: Data URIs, `calc()`, `url()`, CSS functions with parentheses
150
+ - **!important**: Full support with correct cascade behavior
151
+
152
+ ### Color Conversion
153
+
154
+ Cataract supports converting colors between multiple CSS color formats with high precision.
155
+
156
+ **Note:** Color conversion is an optional extension. Load it explicitly to reduce memory footprint:
157
+
158
+ ```ruby
159
+ require 'cataract'
160
+ require 'cataract/color_conversion'
161
+
162
+ # Convert hex to RGB
163
+ sheet = Cataract::Stylesheet.parse('.button { color: #ff0000; background: #00ff00; }')
164
+ sheet.convert_colors!(from: :hex, to: :rgb)
165
+ sheet.to_s
166
+ # => ".button { color: rgb(255 0 0); background: rgb(0 255 0); }"
167
+
168
+ # Convert RGB to HSL for easier color manipulation
169
+ sheet = Cataract::Stylesheet.parse('.card { color: rgb(255, 128, 0); }')
170
+ sheet.convert_colors!(from: :rgb, to: :hsl)
171
+ sheet.to_s
172
+ # => ".card { color: hsl(30, 100%, 50%); }"
173
+
174
+ # Convert to Oklab for perceptually uniform colors
175
+ sheet = Cataract::Stylesheet.parse('.gradient { background: linear-gradient(#ff0000, #0000ff); }')
176
+ sheet.convert_colors!(to: :oklab)
177
+ sheet.to_s
178
+ # => ".gradient { background: linear-gradient(oklab(0.6280 0.2249 0.1258), oklab(0.4520 -0.0325 -0.3115)); }"
179
+
180
+ # Auto-detect source format and convert all colors
181
+ sheet = Cataract::Stylesheet.parse(<<~CSS)
182
+ .mixed {
183
+ color: #ff0000;
184
+ background: rgb(0, 255, 0);
185
+ border-color: hsl(240, 100%, 50%);
186
+ }
187
+ CSS
188
+ sheet.convert_colors!(to: :hex) # Converts all formats to hex
189
+ ```
190
+
191
+ #### Supported Color Formats
192
+
193
+ | Format | From | To | Alpha | Example | Notes |
194
+ |--------|------|-----|-------|---------|-------|
195
+ | **hex** | ✓ | ✓ | ✓ | `#ff0000`, `#f00`, `#ff000080` | 3, 6, or 8 digit hex |
196
+ | **rgb** | ✓ | ✓ | ✓ | `rgb(255 0 0)`, `rgb(255, 0, 0)` | Modern & legacy syntax |
197
+ | **hsl** | ✓ | ✓ | ✓ | `hsl(0, 100%, 50%)` | Hue, saturation, lightness |
198
+ | **hwb** | ✓ | ✓ | ✓ | `hwb(0 0% 0%)` | Hue, whiteness, blackness |
199
+ | **oklab** | ✓ | ✓ | ✓ | `oklab(0.628 0.225 0.126)` | Perceptually uniform color space |
200
+ | **oklch** | ✓ | ✓ | ✓ | `oklch(0.628 0.258 29.2)` | Cylindrical Oklab (LCh) |
201
+ | **lab** | ✓ | ✓ | ✓ | `lab(53.2% 80.1 67.2)` | CIE L\*a\*b\* color space (D50) |
202
+ | **lch** | ✓ | ✓ | ✓ | `lch(53.2% 104.5 40)` | Cylindrical Lab (polar coordinates) |
203
+ | **named** | ✓ | ✓ | – | `red`, `blue`, `rebeccapurple` | 147 CSS named colors |
204
+ | **color()** | – | – | – | `color(display-p3 1 0 0)` | Absolute color spaces (planned) |
205
+
206
+ **Format aliases:**
207
+ - `:rgba` → uses `rgb()` syntax with alpha
208
+ - `:hsla` → uses `hsl()` syntax with alpha
209
+ - `:hwba` → uses `hwb()` syntax with alpha
210
+
211
+ **Limitations:**
212
+ - Math functions (`calc()`, `min()`, `max()`, `clamp()`) are not evaluated and will be preserved unchanged
213
+ - CSS Color Level 5 features (`none`, `infinity`, relative color syntax with `from`) are preserved but not converted
214
+ - Unknown or future color functions are passed through unchanged
215
+
216
+ ### `@import` Support
217
+
218
+ `@import` statements can be resolved with security controls:
219
+
220
+ ```ruby
221
+ # Disabled by default
222
+ sheet = Cataract::Stylesheet.parse(css) # @import statements are ignored
223
+
224
+ # Enable with safe defaults (HTTPS only, .css files only, max depth 5)
225
+ sheet = Cataract::Stylesheet.parse(css, import: true)
226
+
227
+ # Custom options for full control
228
+ sheet = Cataract::Stylesheet.parse(css, import: {
229
+ allowed_schemes: ['https', 'file'], # Default: ['https']
230
+ extensions: ['css'], # Default: ['css']
231
+ max_depth: 3, # Default: 5
232
+ timeout: 10, # Default: 10 seconds
233
+ follow_redirects: true # Default: true
234
+ })
235
+ ```
236
+
237
+ **Security note**: Import resolution includes protections against:
238
+ - Unauthorized schemes (file://, data://, etc.)
239
+ - Non-CSS file extensions
240
+ - Circular references
241
+ - Excessive nesting depth
242
+
243
+ ## Development
244
+
245
+ ```bash
246
+ # Install dependencies
247
+ bundle install
248
+
249
+ # Compile the C extension
250
+ rake compile
251
+
252
+ # Run tests
253
+ rake test
254
+
255
+ # Run benchmarks
256
+ rake benchmark
257
+
258
+ # Run fuzzer to test parser robustness
259
+ rake fuzz # 10,000 iterations (default)
260
+ rake fuzz ITERATIONS=100000 # Custom iteration count
261
+ ```
262
+
263
+ **Fuzzer**: Generates random CSS input to test parser robustness against malformed or edge-case CSS. Helps catch crashes, memory leaks, and parsing edge cases.
264
+
265
+ ## How It Works
266
+
267
+ Cataract uses a high-performance C implementation for CSS parsing and serialization.
268
+
269
+ Each `Rule` is a struct containing:
270
+ - `id`: Integer ID (position in rules array)
271
+ - `selector`: The CSS selector string
272
+ - `declarations`: Array of `Declaration` structs (property, value, important flag)
273
+ - `specificity`: Calculated CSS specificity (cached)
274
+
275
+ Implementation details:
276
+ - **C implementation**: Critical paths implemented in C (parsing, merging, serialization)
277
+ - **Flat rule array**: All rules stored in a single array, preserving source order
278
+ - **Efficient media query handling**: O(1) lookup via internal media index
279
+ - **Memory efficient**: Minimal allocations, reuses string buffers where possible
280
+ - **Comprehensive parsing**: Preserves complex CSS structures including nested media queries, nested selectors, data URIs, CSS functions (calc(), var(), etc.)
281
+
282
+ ## Development Notes
283
+
284
+ Significant portions of this codebase were generated with assistance from [Claude Code](https://claude.com/claude-code), including the benchmark infrastructure, test suite, and documentation generation system.
285
+
286
+ ## License
287
+
288
+ MIT
289
+
290
+ ## Contributing
291
+
292
+ Bug reports and pull requests are welcome on GitHub at https://github.com/jamescook/cataract.
data/Rakefile ADDED
@@ -0,0 +1,209 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rake/testtask'
5
+ require 'rake/clean'
6
+
7
+ # Load tasks from lib/tasks/
8
+ Dir.glob('lib/tasks/**/*.rake').each { |r| load r }
9
+
10
+ # Only load extension task if rake-compiler is available
11
+ begin
12
+ require 'rake/extensiontask'
13
+
14
+ # Configure the main extension
15
+ Rake::ExtensionTask.new('cataract') do |ext|
16
+ ext.lib_dir = 'lib/cataract'
17
+ ext.ext_dir = 'ext/cataract'
18
+ end
19
+
20
+ # Configure the color conversion extension (optional, loaded on-demand)
21
+ Rake::ExtensionTask.new('cataract_color') do |ext|
22
+ ext.lib_dir = 'lib/cataract'
23
+ ext.ext_dir = 'ext/cataract_color'
24
+ end
25
+ end
26
+
27
+ # Configure CLEAN to run before compilation
28
+ # rake-compiler already adds: tmp/, lib/**/*.{so,bundle}, etc.
29
+ # All C files are now hand-written (Ragel removed), so only clean build artifacts
30
+ CLEAN.include('ext/**/Makefile', 'ext/**/*.o')
31
+
32
+ Rake::TestTask.new(:test) do |t|
33
+ t.libs << 'test'
34
+ t.libs << 'lib'
35
+ # Load test_helper before running tests (handles SimpleCov setup)
36
+ t.ruby_opts << '-rtest_helper'
37
+ # Exclude css_parser_compat directory (reference tests only, not run)
38
+ t.test_files = FileList['test/**/test_*.rb'].exclude('test/css_parser_compat/**/*')
39
+ end
40
+
41
+ desc 'Run all benchmarks'
42
+ task :benchmark do
43
+ Rake::Task[:compile].invoke
44
+ Rake::Task['benchmark:parsing'].invoke
45
+ Rake::Task['benchmark:serialization'].invoke
46
+ Rake::Task['benchmark:specificity'].invoke
47
+ Rake::Task['benchmark:merging'].invoke
48
+ Rake::Task['benchmark:yjit'].invoke
49
+ puts "\n#{'-' * 80}"
50
+ puts 'All benchmarks complete!'
51
+ puts 'Generate documentation with: rake benchmark:generate_docs'
52
+ puts '-' * 80
53
+ end
54
+
55
+ namespace :benchmark do
56
+ desc 'Benchmark CSS parsing performance'
57
+ task :parsing do
58
+ puts 'Running parsing benchmark...'
59
+ ruby 'benchmarks/benchmark_parsing.rb'
60
+ end
61
+
62
+ desc 'Benchmark CSS serialization (to_s) performance'
63
+ task :serialization do
64
+ puts 'Running serialization benchmark...'
65
+ ruby 'benchmarks/benchmark_serialization.rb'
66
+ end
67
+
68
+ desc 'Benchmark specificity calculation performance'
69
+ task :specificity do
70
+ puts 'Running specificity benchmark...'
71
+ ruby 'benchmarks/benchmark_specificity.rb'
72
+ end
73
+
74
+ desc 'Benchmark CSS merging performance'
75
+ task :merging do
76
+ puts 'Running merging benchmark...'
77
+ ruby 'benchmarks/benchmark_merging.rb'
78
+ end
79
+
80
+ desc 'Benchmark Ruby-side operations with YJIT on vs off'
81
+ task :yjit do
82
+ puts 'Running YJIT benchmark...'
83
+ ruby 'benchmarks/benchmark_yjit.rb'
84
+ end
85
+
86
+ desc 'Benchmark string allocation optimization (buffer vs dynamic)'
87
+ task :string_allocation do
88
+ # Clean up any existing benchmark results
89
+ results_dir = 'benchmarks/.benchmark_results'
90
+ if Dir.exist?(results_dir)
91
+ Dir.glob(File.join(results_dir, 'string_allocation_*.json')).each do |file|
92
+ puts "Removing old benchmark results: #{file}"
93
+ FileUtils.rm_f(file)
94
+ end
95
+ end
96
+
97
+ puts "\n#{'=' * 80}"
98
+ puts 'Compiling with DYNAMIC allocation (rb_str_new_cstr)'
99
+ puts '=' * 80
100
+ system({ 'CONFIGURE_ARGS' => '--disable-str-buf-optimization' }, 'rake', 'compile')
101
+ system({}, RbConfig.ruby, 'benchmarks/benchmark_string_allocation.rb')
102
+
103
+ puts "\n\n#{'=' * 80}"
104
+ puts 'Compiling with BUFFER allocation (rb_str_buf_new, production default)'
105
+ puts '=' * 80
106
+ system({}, 'rake', 'compile')
107
+ system({}, RbConfig.ruby, 'benchmarks/benchmark_string_allocation.rb')
108
+ end
109
+
110
+ desc 'Generate BENCHMARKS.md from benchmark results'
111
+ task :generate_docs do
112
+ ruby 'scripts/generate_benchmarks_md.rb'
113
+ end
114
+ end
115
+
116
+ task compile: :clean
117
+
118
+ task default: :test
119
+
120
+ # Lint task - runs clang-tidy on C code
121
+ desc 'Run clang-tidy on C code'
122
+ task :lint do
123
+ # Find clang-tidy binary
124
+ clang_tidy = nil
125
+
126
+ # Try system PATH first (Linux, or if user has llvm in PATH)
127
+ if system('which clang-tidy > /dev/null 2>&1')
128
+ clang_tidy = 'clang-tidy'
129
+ # On macOS, check Homebrew LLVM (keg-only, not in PATH by default)
130
+ elsif system('which brew > /dev/null 2>&1')
131
+ llvm_prefix = `brew --prefix llvm 2>/dev/null`.strip
132
+ clang_tidy = "#{llvm_prefix}/bin/clang-tidy" if !llvm_prefix.empty? && File.exist?("#{llvm_prefix}/bin/clang-tidy")
133
+ end
134
+
135
+ unless clang_tidy
136
+ abort("clang-tidy not installed.\n " \
137
+ "macOS: brew install llvm\n " \
138
+ "Ubuntu/Debian: apt-get install clang-tidy\n " \
139
+ 'Fedora/RHEL: dnf install clang-tools-extra')
140
+ end
141
+
142
+ puts 'Running clang-tidy on C code...'
143
+
144
+ # Find all .c files in ext/cataract/ and ext/cataract_color/
145
+ c_files = Dir.glob('ext/cataract/*.c') + Dir.glob('ext/cataract_color/*.c')
146
+
147
+ # Run clang-tidy on each file
148
+ # Note: clang-tidy uses the .clang-tidy config file automatically
149
+ # We pass Ruby include path so it can find ruby.h
150
+ ruby_include = RbConfig::CONFIG['rubyhdrdir']
151
+ ruby_arch_include = RbConfig::CONFIG['rubyarchhdrdir']
152
+
153
+ success = c_files.all? do |file|
154
+ puts " Checking #{file}..."
155
+ system(clang_tidy, '--quiet', file, '--',
156
+ "-I#{ruby_include}",
157
+ "-I#{ruby_arch_include}",
158
+ '-Iext/cataract',
159
+ '-Iext/cataract_color')
160
+ end
161
+
162
+ if success
163
+ puts '✓ clang-tidy passed'
164
+ else
165
+ abort('clang-tidy found issues!')
166
+ end
167
+ end
168
+
169
+ # Fuzz testing
170
+ desc 'Run fuzzer to test parser robustness (including color conversion)'
171
+ task fuzz: :compile do
172
+ iterations = ENV['ITERATIONS'] || '10000'
173
+ puts "Running CSS parser fuzzer (#{iterations} iterations)..."
174
+ # Use system with ENV.to_h to preserve environment variables like FUZZ_GC_STRESS
175
+ system(ENV.to_h, RbConfig.ruby, '-Ilib', 'scripts/fuzzer/run.rb', iterations)
176
+ end
177
+
178
+ # Documentation generation with YARD
179
+ begin
180
+ require 'yard'
181
+
182
+ desc 'Generate example CSS analysis for documentation'
183
+ task :generate_example do
184
+ puts 'Generating GitHub CSS analysis example...'
185
+ # Generate with file. prefix for YARD compatibility
186
+ system('ruby examples/css_analyzer.rb https://github.com -o docs/file.github_analysis.html')
187
+ end
188
+
189
+ desc 'Generate documentation and open in browser'
190
+ task docs: :generate_example do
191
+ # Generate YARD documentation
192
+ YARD::CLI::Yardoc.run('--output-dir', 'docs', '--readme', 'README.md',
193
+ '--title', 'Cataract - Fast CSS Parser',
194
+ 'lib/**/*.rb', 'ext/**/*.c', '-', 'docs/files/EXAMPLE.md')
195
+
196
+ # Open in browser (skip in CI)
197
+ unless ENV['CI']
198
+ system('open docs/index.html') if RUBY_PLATFORM.include?('darwin')
199
+ system('xdg-open docs/index.html') if RUBY_PLATFORM.include?('linux')
200
+ end
201
+ end
202
+
203
+ desc 'List undocumented code'
204
+ task :undoc do
205
+ system('yard stats --list-undoc')
206
+ end
207
+ rescue LoadError
208
+ # YARD not available - skip doc tasks
209
+ end
@@ -0,0 +1,193 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'benchmark/ips'
4
+ require 'json'
5
+ require 'fileutils'
6
+ require_relative 'system_metadata'
7
+ require_relative 'speedup_calculator'
8
+
9
+ # Base class for all benchmarks. Provides structure and automatic JSON output.
10
+ #
11
+ # Usage:
12
+ # class MyBenchmark < BenchmarkHarness
13
+ # def self.benchmark_name
14
+ # 'my_benchmark'
15
+ # end
16
+ #
17
+ # def self.description
18
+ # 'What this benchmark measures'
19
+ # end
20
+ #
21
+ # def self.metadata
22
+ # { 'key' => 'value' } # Optional metadata for docs
23
+ # end
24
+ #
25
+ # def self.sanity_checks
26
+ # # Optional: verify code works before benchmarking
27
+ # raise "Sanity check failed!" unless something_works
28
+ # end
29
+ #
30
+ # def self.call
31
+ # run_test_case_1
32
+ # run_test_case_2
33
+ # end
34
+ #
35
+ # private
36
+ #
37
+ # def self.run_test_case_1
38
+ # benchmark('test_case_1') do |x|
39
+ # x.config(time: 5, warmup: 2)
40
+ # x.report('label') { ... }
41
+ # x.compare!
42
+ # end
43
+ # end
44
+ # end
45
+ class BenchmarkHarness
46
+ RESULTS_DIR = File.expand_path('.results', __dir__)
47
+
48
+ class << self
49
+ # Abstract methods - must be implemented by subclasses
50
+ def benchmark_name
51
+ raise NotImplementedError, "#{self} must implement .benchmark_name"
52
+ end
53
+
54
+ def description
55
+ raise NotImplementedError, "#{self} must implement .description"
56
+ end
57
+
58
+ def metadata
59
+ {} # Optional, can be overridden
60
+ end
61
+
62
+ def sanity_checks
63
+ # Optional, can be overridden
64
+ end
65
+
66
+ def call
67
+ raise NotImplementedError, "#{self} must implement .call"
68
+ end
69
+
70
+ # Optional: Define how to calculate speedups for this benchmark
71
+ # Override this to customize speedup calculation
72
+ #
73
+ # IMPORTANT: Result names must follow convention "tool_name: test_case_id"
74
+ #
75
+ # @return [Hash] Configuration for SpeedupCalculator
76
+ # {
77
+ # baseline_matcher: Proc, # Returns true for baseline results
78
+ # comparison_matcher: Proc, # Returns true for comparison results
79
+ # test_case_key: Symbol # Key in test_cases metadata matching test_case_id
80
+ # }
81
+ def speedup_config
82
+ # Default: compare css_parser (baseline) vs cataract (comparison)
83
+ # Match to test_cases by 'fixture' key
84
+ {
85
+ baseline_matcher: SpeedupCalculator::Matchers.css_parser,
86
+ comparison_matcher: SpeedupCalculator::Matchers.cataract,
87
+ test_case_key: :fixture
88
+ }
89
+ end
90
+
91
+ # Main entry point - handles setup, execution, and cleanup
92
+ def run
93
+ instance = new
94
+ setup
95
+ instance.sanity_checks if instance.respond_to?(:sanity_checks, true)
96
+ instance.call
97
+ finalize(instance)
98
+ rescue StandardError => e
99
+ puts "❌ Benchmark failed: #{e.message}"
100
+ puts e.backtrace.first(5).join("\n")
101
+ exit 1
102
+ end
103
+
104
+ private
105
+
106
+ def setup
107
+ FileUtils.mkdir_p(RESULTS_DIR)
108
+
109
+ # Collect system metadata once per run
110
+ unless File.exist?(File.join(RESULTS_DIR, 'metadata.json'))
111
+ SystemMetadata.collect
112
+ end
113
+
114
+ # Print header
115
+ puts "\n\n"
116
+ puts '=' * 80
117
+ puts "#{benchmark_name.upcase.tr('_', ' ')} BENCHMARK"
118
+ puts "Measures: #{description}"
119
+ puts '=' * 80
120
+ puts
121
+ end
122
+
123
+ def finalize(instance)
124
+ # Combine all JSON files for this benchmark into one
125
+ return unless instance.instance_variable_defined?(:@json_files) && instance.instance_variable_get(:@json_files)&.any?
126
+
127
+ json_files = instance.instance_variable_get(:@json_files)
128
+
129
+ combined_data = {
130
+ 'name' => benchmark_name,
131
+ 'description' => description,
132
+ 'metadata' => metadata,
133
+ 'timestamp' => Time.now.iso8601,
134
+ 'results' => []
135
+ }
136
+
137
+ # Read all the individual JSON files
138
+ json_files.each do |filename|
139
+ path = File.join(RESULTS_DIR, filename)
140
+ next unless File.exist?(path)
141
+
142
+ data = JSON.parse(File.read(path))
143
+ combined_data['results'].concat(data) if data.is_a?(Array)
144
+ end
145
+
146
+ # Calculate speedups using configured strategy
147
+ config = speedup_config
148
+ if config
149
+ calculator = SpeedupCalculator.new(
150
+ results: combined_data['results'],
151
+ test_cases: combined_data['metadata']['test_cases'],
152
+ baseline_matcher: config[:baseline_matcher],
153
+ comparison_matcher: config[:comparison_matcher],
154
+ test_case_key: config[:test_case_key]
155
+ )
156
+
157
+ speedup_stats = calculator.calculate
158
+ combined_data['metadata']['speedups'] = speedup_stats if speedup_stats
159
+ end
160
+
161
+ # Write combined file
162
+ combined_path = File.join(RESULTS_DIR, "#{benchmark_name}.json")
163
+ File.write(combined_path, JSON.pretty_generate(combined_data))
164
+
165
+ # Clean up individual files
166
+ json_files.each do |filename|
167
+ File.delete(File.join(RESULTS_DIR, filename))
168
+ end
169
+
170
+ puts "\n✓ Results saved to #{combined_path}"
171
+ end
172
+ end
173
+
174
+ # Instance methods
175
+ protected
176
+
177
+ def benchmark(test_case_name)
178
+ json_filename = "#{self.class.benchmark_name}_#{test_case_name}.json"
179
+ json_path = File.join(RESULTS_DIR, json_filename)
180
+
181
+ Benchmark.ips do |x|
182
+ # Automatically enable JSON output
183
+ x.json!(json_path)
184
+
185
+ # Let the benchmark configure and run
186
+ yield x
187
+ end
188
+
189
+ # Track that we created this file
190
+ @json_files ||= []
191
+ @json_files << json_filename
192
+ end
193
+ end