vfcsv 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 30c8959e89b63c211bb93b9a3ba56a290688a2fefdd8b8644c57d95b55d2ccfb
4
+ data.tar.gz: 2e1ff95060b5c3a9ef9edc6187d6546c72a5bb4b5811e480bc542e48ef4540b7
5
+ SHA512:
6
+ metadata.gz: '0695ec5e9e3e1b92489eb46ef250a62d939d671f113357809cbf88444e3198aa6f244488240cabc941c83f15be8d5578d8be64a5fb99e7ab182dd359ffff274f'
7
+ data.tar.gz: 5e41903584067f08c8597d5c5358e9ad866b93f2dd11530b870e5db375ebb097f1c207b2b614ce2011339c89785119b7c27c6c46b01737bcff7b3bdc6d65e0e9
data/.tool-versions ADDED
@@ -0,0 +1 @@
1
+ ruby 4.0.0
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ gemspec
6
+
7
+ gem "csv" # Not in default gems since Ruby 3.4+
8
+
9
+ group :development, :test do
10
+ gem "rake-compiler"
11
+ gem "rspec"
12
+ gem "benchmark-ips"
13
+ gem "minitest"
14
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,65 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ vfcsv (0.1.0)
5
+ rb_sys (~> 0.9)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ benchmark-ips (2.14.0)
11
+ csv (3.3.5)
12
+ diff-lcs (1.6.2)
13
+ minitest (5.27.0)
14
+ rake (13.3.1)
15
+ rake-compiler (1.3.1)
16
+ rake
17
+ rake-compiler-dock (1.11.0)
18
+ rb_sys (0.9.124)
19
+ rake-compiler-dock (= 1.11.0)
20
+ rspec (3.13.2)
21
+ rspec-core (~> 3.13.0)
22
+ rspec-expectations (~> 3.13.0)
23
+ rspec-mocks (~> 3.13.0)
24
+ rspec-core (3.13.6)
25
+ rspec-support (~> 3.13.0)
26
+ rspec-expectations (3.13.5)
27
+ diff-lcs (>= 1.2.0, < 2.0)
28
+ rspec-support (~> 3.13.0)
29
+ rspec-mocks (3.13.7)
30
+ diff-lcs (>= 1.2.0, < 2.0)
31
+ rspec-support (~> 3.13.0)
32
+ rspec-support (3.13.6)
33
+
34
+ PLATFORMS
35
+ arm64-darwin-25
36
+ ruby
37
+
38
+ DEPENDENCIES
39
+ benchmark-ips (~> 2.0, >= 0)
40
+ csv
41
+ minitest (~> 5.0, >= 0)
42
+ rake (~> 13.0)
43
+ rake-compiler (~> 1.2, >= 0)
44
+ rb_sys (~> 0.9)
45
+ rspec
46
+ vfcsv!
47
+
48
+ CHECKSUMS
49
+ benchmark-ips (2.14.0) sha256=b72bc8a65d525d5906f8cd94270dccf73452ee3257a32b89fbd6684d3e8a9b1d
50
+ csv (3.3.5) sha256=6e5134ac3383ef728b7f02725d9872934f523cb40b961479f69cf3afa6c8e73f
51
+ diff-lcs (1.6.2) sha256=9ae0d2cba7d4df3075fe8cd8602a8604993efc0dfa934cff568969efb1909962
52
+ minitest (5.27.0)
53
+ rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
54
+ rake-compiler (1.3.1) sha256=6b351612b6e2d73ddd5563ee799bb58685176e05363db6758504bd11573d670a
55
+ rake-compiler-dock (1.11.0) sha256=eab51f2cd533eb35cea6b624a75281f047123e70a64c58b607471bb49428f8c2
56
+ rb_sys (0.9.124) sha256=513476557b12eaf73764b3da9f8746024558fe8699bda785fb548c9aa3877ae7
57
+ rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
58
+ rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
59
+ rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
60
+ rspec-mocks (3.13.7) sha256=0979034e64b1d7a838aaaddf12bf065ea4dc40ef3d4c39f01f93ae2c66c62b1c
61
+ rspec-support (3.13.6) sha256=2e8de3702427eab064c9352fe74488cc12a1bfae887ad8b91cba480ec9f8afb2
62
+ vfcsv (0.1.0)
63
+
64
+ BUNDLED WITH
65
+ 4.0.3
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Chris Hasinski
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,268 @@
1
+ # VFCSV - Very Fast CSV
2
+
3
+ **The only SIMD-accelerated CSV parser for Ruby with full stdlib API compatibility.**
4
+
5
+ [![Gem Version](https://badge.fury.io/rb/vfcsv.svg)](https://badge.fury.io/rb/vfcsv)
6
+
7
+ VFCSV is a drop-in replacement for Ruby's CSV library that delivers 2-6x faster parsing through SIMD acceleration (NEON on ARM64, AVX2 on x86_64), while maintaining 100% API compatibility with Ruby's CSV—including `Row`, `Table`, converters, and all standard options.
8
+
9
+ ## Why VFCSV?
10
+
11
+ | Library | Speed | Drop-in? | Row/Table | Converters | SIMD | Dependencies |
12
+ |---------|-------|----------|-----------|------------|------|--------------|
13
+ | **VFCSV** | **2-6x** | **✓** | **✓** | **✓** | **✓** | Pure Rust |
14
+ | zsv-ruby | 5-6x | Partial | ✗ | ✗ | ✓ | C (zsv) |
15
+ | OSV | 8x | ✗ | ✗ | ✗ | ✗ | Rust |
16
+ | FastCSV | 1.5x | ✓ | ✓ | ✓ | ✗ | C (Ragel) |
17
+ | FastestCSV | 3x | ✗ | ✗ | ✗ | ✗ | C |
18
+ | CSV (stdlib) | 1x | N/A | ✓ | ✓ | ✗ | None |
19
+
20
+ **VFCSV is the only library that combines SIMD acceleration with full API compatibility.**
21
+
22
+ ## Installation
23
+
24
+ Add to your Gemfile:
25
+
26
+ ```ruby
27
+ gem 'vfcsv'
28
+ ```
29
+
30
+ Or install directly:
31
+
32
+ ```bash
33
+ gem install vfcsv
34
+ ```
35
+
36
+ Requires Rust toolchain for compilation. Works on Ruby 3.0+ (optimized for Ruby 4.0).
37
+
38
+ ## Quick Start
39
+
40
+ ```ruby
41
+ # Just replace your require
42
+ require 'vfcsv' # instead of require 'csv'
43
+
44
+ # Use exactly like Ruby's CSV
45
+ data = VFCSV.parse("name,age\nAlice,30\nBob,25", headers: true)
46
+ data[0]["name"] # => "Alice"
47
+ data["age"] # => ["30", "25"]
48
+
49
+ # All the same methods work
50
+ VFCSV.read("data.csv")
51
+ VFCSV.foreach("data.csv") { |row| puts row }
52
+ VFCSV.generate { |csv| csv << [1, 2, 3] }
53
+ ```
54
+
55
+ ## Performance
56
+
57
+ Benchmarks on Apple M1 (Ruby 4.0, no YJIT):
58
+
59
+ | Data Type | CSV stdlib | VFCSV | Speedup |
60
+ |-----------|------------|-------|---------|
61
+ | Simple CSV | 40 MB/s | 90 MB/s | **2.2x** |
62
+ | Quoted CSV | 21 MB/s | 120 MB/s | **5.6x** |
63
+ | With Headers | 10.7 i/s | 27.0 i/s | **2.5x** |
64
+
65
+ SIMD excels at quote detection, making quoted CSV parsing significantly faster.
66
+
67
+ ```ruby
68
+ # Check your SIMD capabilities
69
+ VFCSV.simd_info
70
+ # => {neon: true, arch: "aarch64", backend: "vfcsv-simd"}
71
+ ```
72
+
73
+ ## Full API Compatibility
74
+
75
+ ### Parsing
76
+
77
+ ```ruby
78
+ # Basic parsing
79
+ VFCSV.parse("a,b,c\n1,2,3")
80
+ # => [["a", "b", "c"], ["1", "2", "3"]]
81
+
82
+ # With headers (returns Table with Row objects)
83
+ table = VFCSV.parse("name,age\nAlice,30", headers: true)
84
+ table.class # => VFCSV::Table
85
+ table[0].class # => VFCSV::Row
86
+ table[0]["name"] # => "Alice"
87
+ table[0][0] # => "Alice"
88
+ table["name"] # => ["Alice"]
89
+
90
+ # Parse single line
91
+ VFCSV.parse_line("a,b,c") # => ["a", "b", "c"]
92
+
93
+ # File operations
94
+ VFCSV.read("file.csv")
95
+ VFCSV.foreach("file.csv") { |row| process(row) }
96
+ VFCSV.table("file.csv") # Shortcut for read with headers
97
+ ```
98
+
99
+ ### Converters
100
+
101
+ ```ruby
102
+ # Built-in converters
103
+ VFCSV.parse("a,b\n1,2.5", headers: true, converters: :numeric)
104
+ # => a: 1 (Integer), b: 2.5 (Float)
105
+
106
+ # Available: :integer, :float, :numeric, :date, :date_time, :all
107
+
108
+ # Custom converters
109
+ upcase = ->(val) { val.upcase rescue val }
110
+ VFCSV.parse("a\nhello", headers: true, converters: [upcase])
111
+ # => a: "HELLO"
112
+ ```
113
+
114
+ ### Header Converters
115
+
116
+ ```ruby
117
+ # Downcase headers
118
+ VFCSV.parse("NAME,AGE\na,1", headers: true, header_converters: :downcase)
119
+ # headers: ["name", "age"]
120
+
121
+ # Symbol headers
122
+ VFCSV.parse("Name,Age\na,1", headers: true, header_converters: :symbol)
123
+ # headers: [:name, :age]
124
+ # Access: row[:name]
125
+ ```
126
+
127
+ ### Row Class
128
+
129
+ Full `CSV::Row` compatibility:
130
+
131
+ ```ruby
132
+ row = table[0]
133
+ row.headers # => ["name", "age"]
134
+ row.fields # => ["Alice", "30"]
135
+ row["name"] # => "Alice"
136
+ row[0] # => "Alice"
137
+ row.to_h # => {"name" => "Alice", "age" => "30"}
138
+ row.to_csv # => "Alice,30\n"
139
+ row.header?("name") # => true
140
+ row.field?("Alice") # => true
141
+
142
+ # Mutation
143
+ row["city"] = "NYC"
144
+ row << ["country", "USA"]
145
+ row.delete("country")
146
+ ```
147
+
148
+ ### Table Class
149
+
150
+ Full `CSV::Table` compatibility:
151
+
152
+ ```ruby
153
+ table.headers # => ["name", "age"]
154
+ table.size # => 2
155
+ table[0] # => Row
156
+ table["name"] # => ["Alice", "Bob"] (column)
157
+
158
+ # Access modes
159
+ table.by_col["name"] # Column access
160
+ table.by_row[0] # Row access
161
+
162
+ # Mutation
163
+ table << ["Carol", "35"]
164
+ table.delete(0)
165
+
166
+ # Output
167
+ table.to_csv # Full CSV string with headers
168
+ table.to_a # Array of arrays
169
+ ```
170
+
171
+ ### Generation
172
+
173
+ ```ruby
174
+ # Generate CSV string
175
+ csv = VFCSV.generate do |out|
176
+ out << ["name", "age"]
177
+ out << ["Alice", 30]
178
+ end
179
+ # => "name,age\nAlice,30\n"
180
+
181
+ # Generate single line
182
+ VFCSV.generate_line([1, 2, 3]) # => "1,2,3\n"
183
+ VFCSV.generate_line([1, 2], col_sep: "|") # => "1|2\n"
184
+
185
+ # Force quotes
186
+ VFCSV.generate_line([1, 2], force_quotes: true) # => "\"1\",\"2\"\n"
187
+
188
+ # Write to file
189
+ VFCSV.open("out.csv", "w") do |csv|
190
+ csv << [1, 2, 3]
191
+ end
192
+ ```
193
+
194
+ ### Options
195
+
196
+ All standard CSV options are supported:
197
+
198
+ ```ruby
199
+ VFCSV.parse(data,
200
+ col_sep: ",", # Column separator
201
+ row_sep: :auto, # Row separator (:auto, "\n", "\r\n")
202
+ quote_char: '"', # Quote character
203
+ headers: false, # First row as headers
204
+ converters: nil, # Value converters
205
+ header_converters: nil, # Header converters
206
+ skip_blanks: false, # Skip empty rows
207
+ skip_lines: nil, # Regexp to skip lines
208
+ force_quotes: false, # Quote all fields on output
209
+ liberal_parsing: false # Lenient parsing
210
+ )
211
+ ```
212
+
213
+ ## Architecture
214
+
215
+ VFCSV uses a two-stage SIMD-accelerated parsing approach inspired by simdjson:
216
+
217
+ 1. **Stage 1: Structural Detection** - SIMD instructions process 16 bytes at a time to identify commas, quotes, and newlines
218
+ 2. **Stage 2: Field Extraction** - Extract fields based on structural indices with optimized quote handling
219
+
220
+ The Rust core is wrapped with Magnus for zero-copy Ruby string handling.
221
+
222
+ ```
223
+ ┌───────────────────────────────────────────────┐
224
+ │ Ruby API │
225
+ │ VFCSV.parse / Row / Table / Generator │
226
+ ├───────────────────────────────────────────────┤
227
+ │ Magnus FFI │
228
+ ├───────────────────────────────────────────────┤
229
+ │ Rust SIMD Parser │
230
+ │ ┌─────────────┐ ┌───────────────────┐ │
231
+ │ │ NEON (ARM64)│ │ Portable Fallback │ │
232
+ │ └─────────────┘ └───────────────────┘ │
233
+ └───────────────────────────────────────────────┘
234
+ ```
235
+
236
+ ## When to Use VFCSV
237
+
238
+ **Use VFCSV when:**
239
+ - You need faster CSV parsing without changing your code
240
+ - You're processing large CSV files
241
+ - You need full CSV API compatibility (Row, Table, converters)
242
+ - You want SIMD acceleration with zero C dependencies
243
+
244
+ **Consider alternatives when:**
245
+ - You only need hash output (OSV might be faster)
246
+ - You don't need Row/Table classes (zsv-ruby is comparable speed)
247
+ - You need streaming for files larger than memory
248
+
249
+ ## Running Tests
250
+
251
+ ```bash
252
+ bundle exec rake test # Run all tests (136 tests)
253
+ bundle exec rake bench # Run benchmarks
254
+ ```
255
+
256
+ ## Contributing
257
+
258
+ Bug reports and pull requests welcome at https://github.com/khasinski/vfcsv.
259
+
260
+ ## License
261
+
262
+ MIT License. See [LICENSE](LICENSE) for details.
263
+
264
+ ## Acknowledgments
265
+
266
+ - Inspired by [simdjson](https://github.com/simdjson/simdjson)'s SIMD parsing techniques
267
+ - Built with [Magnus](https://github.com/matsadler/magnus) for Ruby bindings
268
+ - Benchmarked against [zsv-ruby](https://github.com/sebyx07/zsv-ruby), [OSV](https://github.com/njaremko/osv), and [FastCSV](https://github.com/jpmckinney/fastcsv)
data/Rakefile ADDED
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+ require "rake/extensiontask"
6
+ require "rake/testtask"
7
+
8
+ RSpec::Core::RakeTask.new(:spec)
9
+
10
+ # Minitest task for CSV compatibility tests
11
+ Rake::TestTask.new(:test) do |t|
12
+ t.libs << "test"
13
+ t.libs << "lib"
14
+ t.test_files = FileList["test/test_*.rb"]
15
+ t.verbose = true
16
+ end
17
+
18
+ # Rust extension (via rb_sys)
19
+ Rake::ExtensionTask.new("vfcsv_rust") do |ext|
20
+ ext.lib_dir = "lib/vfcsv"
21
+ end
22
+
23
+ task default: %i[compile test]
24
+
25
+ namespace :bench do
26
+ desc "Benchmark against stdlib CSV"
27
+ task :stdlib do
28
+ ruby "bench/vs_stdlib.rb"
29
+ end
30
+
31
+ desc "Benchmark against all CSV libraries"
32
+ task :all do
33
+ ruby "bench/vs_all.rb"
34
+ end
35
+ end
36
+
37
+ task bench: "bench:stdlib"
@@ -0,0 +1,20 @@
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ cd "$(dirname "$0")/.."
5
+
6
+ echo "=================================================="
7
+ echo "VFCSV vs CSV - All JIT Modes Comparison"
8
+ echo "=================================================="
9
+
10
+ echo ""
11
+ echo "==================== NO JIT ===================="
12
+ bundle exec ruby bench/vs_stdlib.rb
13
+
14
+ echo ""
15
+ echo "==================== YJIT ===================="
16
+ RUBY_YJIT_ENABLE=1 bundle exec ruby bench/vs_stdlib.rb
17
+
18
+ echo ""
19
+ echo "==================== ZJIT ===================="
20
+ RUBY_ZJIT_ENABLE=1 bundle exec ruby bench/vs_stdlib.rb
@@ -0,0 +1,253 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Comprehensive benchmark comparing VFCSV against all major CSV libraries
5
+ #
6
+ # Competitors:
7
+ # - csv (stdlib) - Ruby's built-in CSV
8
+ # - fastcsv - Ragel-based, drop-in replacement
9
+ # - fastest-csv - C-based, fastest pure parsing
10
+ # - smarter_csv - Feature-rich, hash output
11
+ # - zsv-ruby - SIMD-accelerated (wraps C zsv library)
12
+ # - osv - Rust-based (wraps csv-rs)
13
+
14
+ require "bundler/setup"
15
+ require "benchmark/ips"
16
+ require "csv"
17
+ require_relative "../lib/vfcsv"
18
+
19
+ # Try to load optional competitors
20
+ COMPETITORS = {}
21
+
22
+ begin
23
+ require "fastcsv"
24
+ COMPETITORS[:fastcsv] = true
25
+ rescue LoadError
26
+ COMPETITORS[:fastcsv] = false
27
+ end
28
+
29
+ begin
30
+ require "fastest-csv"
31
+ COMPETITORS[:fastest_csv] = true
32
+ rescue LoadError
33
+ COMPETITORS[:fastest_csv] = false
34
+ end
35
+
36
+ begin
37
+ require "smarter_csv"
38
+ COMPETITORS[:smarter_csv] = true
39
+ rescue LoadError
40
+ COMPETITORS[:smarter_csv] = false
41
+ end
42
+
43
+ begin
44
+ require "zsv"
45
+ COMPETITORS[:zsv] = true
46
+ rescue LoadError
47
+ COMPETITORS[:zsv] = false
48
+ end
49
+
50
+ begin
51
+ require "osv"
52
+ COMPETITORS[:osv] = true
53
+ rescue LoadError
54
+ COMPETITORS[:osv] = false
55
+ end
56
+
57
+ puts "=" * 70
58
+ puts "VFCSV vs All Competitors - Comprehensive Benchmark"
59
+ puts "=" * 70
60
+ puts
61
+ puts "Ruby: #{RUBY_VERSION}"
62
+ puts "YJIT: #{defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled? ? 'enabled' : 'disabled'}"
63
+ puts "VFCSV SIMD: #{VFCSV.simd_info.inspect}"
64
+ puts
65
+ puts "Competitors available:"
66
+ puts " - csv (stdlib): ✓"
67
+ COMPETITORS.each do |name, available|
68
+ puts " - #{name}: #{available ? '✓' : '✗ (not installed)'}"
69
+ end
70
+ puts
71
+
72
+ # Generate test data
73
+ def generate_csv(rows, cols)
74
+ header = (1..cols).map { |i| "col#{i}" }.join(",")
75
+ data = (1..rows).map do |r|
76
+ (1..cols).map { |c| "value#{r}_#{c}" }.join(",")
77
+ end.join("\n")
78
+ "#{header}\n#{data}\n"
79
+ end
80
+
81
+ def generate_quoted_csv(rows, cols)
82
+ header = (1..cols).map { |i| "\"col#{i}\"" }.join(",")
83
+ data = (1..rows).map do |r|
84
+ (1..cols).map { |c| "\"value #{r}, col #{c}\"" }.join(",")
85
+ end.join("\n")
86
+ "#{header}\n#{data}\n"
87
+ end
88
+
89
+ def generate_numeric_csv(rows, cols)
90
+ header = (1..cols).map { |i| "col#{i}" }.join(",")
91
+ data = (1..rows).map do |r|
92
+ (1..cols).map { |c| (r * c * 1.5).to_s }.join(",")
93
+ end.join("\n")
94
+ "#{header}\n#{data}\n"
95
+ end
96
+
97
+ # Test datasets
98
+ SMALL_CSV = generate_csv(1000, 10)
99
+ MEDIUM_CSV = generate_csv(10_000, 10)
100
+ LARGE_CSV = generate_csv(50_000, 10)
101
+ QUOTED_CSV = generate_quoted_csv(5000, 10)
102
+ NUMERIC_CSV = generate_numeric_csv(5000, 10)
103
+
104
+ puts "Test data sizes:"
105
+ puts " Small: #{SMALL_CSV.bytesize / 1024}KB (1,000 rows x 10 cols)"
106
+ puts " Medium: #{MEDIUM_CSV.bytesize / 1024}KB (10,000 rows x 10 cols)"
107
+ puts " Large: #{LARGE_CSV.bytesize / 1024}KB (50,000 rows x 10 cols)"
108
+ puts " Quoted: #{QUOTED_CSV.bytesize / 1024}KB (5,000 rows x 10 cols, quoted)"
109
+ puts " Numeric: #{NUMERIC_CSV.bytesize / 1024}KB (5,000 rows x 10 cols, numeric)"
110
+ puts
111
+
112
+ # Verify correctness first
113
+ puts "-" * 70
114
+ puts "Correctness Check (comparing output to CSV stdlib)"
115
+ puts "-" * 70
116
+
117
+ csv_result = CSV.parse(SMALL_CSV)
118
+ vfcsv_result = VFCSV.parse(SMALL_CSV)
119
+ puts "VFCSV: #{csv_result == vfcsv_result ? '✓ PASS' : '✗ FAIL'}"
120
+
121
+ if COMPETITORS[:fastcsv]
122
+ fastcsv_result = FastCSV.parse(SMALL_CSV)
123
+ puts "FastCSV: #{csv_result == fastcsv_result ? '✓ PASS' : '✗ FAIL'}"
124
+ end
125
+
126
+ if COMPETITORS[:fastest_csv]
127
+ fastest_result = FastestCSV.parse(SMALL_CSV)
128
+ puts "FastestCSV: #{csv_result == fastest_result ? '✓ PASS' : '✗ FAIL (known - multiline issues)'}"
129
+ end
130
+
131
+ puts
132
+
133
+ # Run benchmarks
134
+ [
135
+ ["Small (1K rows)", SMALL_CSV],
136
+ ["Medium (10K rows)", MEDIUM_CSV],
137
+ ["Large (50K rows)", LARGE_CSV],
138
+ ["Quoted (5K rows)", QUOTED_CSV],
139
+ ].each do |name, csv_data|
140
+ puts "-" * 70
141
+ puts "Benchmark: #{name}"
142
+ puts "-" * 70
143
+
144
+ Benchmark.ips do |x|
145
+ x.config(time: 3, warmup: 1)
146
+
147
+ x.report("CSV (stdlib)") { CSV.parse(csv_data) }
148
+ x.report("VFCSV (SIMD)") { VFCSV.parse(csv_data) }
149
+
150
+ if COMPETITORS[:fastcsv]
151
+ x.report("FastCSV") { FastCSV.parse(csv_data) }
152
+ end
153
+
154
+ if COMPETITORS[:fastest_csv]
155
+ x.report("FastestCSV") { FastestCSV.parse(csv_data) }
156
+ end
157
+
158
+ if COMPETITORS[:zsv]
159
+ x.report("ZSV") { ZSV.parse(csv_data) }
160
+ end
161
+
162
+ if COMPETITORS[:osv]
163
+ x.report("OSV") { OSV.for_each(StringIO.new(csv_data), result_type: :array).to_a }
164
+ end
165
+
166
+ x.compare!
167
+ end
168
+ puts
169
+ end
170
+
171
+ # Headers benchmark
172
+ puts "-" * 70
173
+ puts "Benchmark: With Headers (returns hash/Row objects)"
174
+ puts "-" * 70
175
+
176
+ Benchmark.ips do |x|
177
+ x.config(time: 3, warmup: 1)
178
+
179
+ x.report("CSV (headers)") { CSV.parse(MEDIUM_CSV, headers: true).map(&:to_h) }
180
+ x.report("VFCSV (headers)") { VFCSV.parse(MEDIUM_CSV, headers: true).map(&:to_h) }
181
+
182
+ if COMPETITORS[:fastcsv]
183
+ x.report("FastCSV (headers)") { FastCSV.parse(MEDIUM_CSV, headers: true).map(&:to_h) }
184
+ end
185
+
186
+ if COMPETITORS[:smarter_csv]
187
+ x.report("SmarterCSV") { SmarterCSV.parse(MEDIUM_CSV) }
188
+ end
189
+
190
+ if COMPETITORS[:osv]
191
+ x.report("OSV (hash)") { OSV.for_each(StringIO.new(MEDIUM_CSV), result_type: :hash).to_a }
192
+ end
193
+
194
+ x.compare!
195
+ end
196
+
197
+ # Throughput summary
198
+ puts
199
+ puts "=" * 70
200
+ puts "THROUGHPUT SUMMARY (MB/s)"
201
+ puts "=" * 70
202
+
203
+ results = {}
204
+
205
+ [SMALL_CSV, MEDIUM_CSV, LARGE_CSV].each_with_index do |csv_data, i|
206
+ size_mb = csv_data.bytesize / 1_000_000.0
207
+ iterations = [1000, 100, 20][i] # Adjust based on size
208
+
209
+ # CSV stdlib
210
+ t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
211
+ iterations.times { CSV.parse(csv_data) }
212
+ t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
213
+ csv_mbs = (size_mb * iterations) / (t1 - t0)
214
+
215
+ # VFCSV
216
+ t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
217
+ iterations.times { VFCSV.parse(csv_data) }
218
+ t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
219
+ vfcsv_mbs = (size_mb * iterations) / (t1 - t0)
220
+
221
+ results[["Small", "Medium", "Large"][i]] = {
222
+ csv: csv_mbs,
223
+ vfcsv: vfcsv_mbs,
224
+ speedup: vfcsv_mbs / csv_mbs
225
+ }
226
+ end
227
+
228
+ puts
229
+ puts "| Dataset | CSV (MB/s) | VFCSV (MB/s) | Speedup |"
230
+ puts "|---------|------------|--------------|---------|"
231
+ results.each do |name, data|
232
+ puts "| #{name.ljust(7)} | #{data[:csv].round(1).to_s.rjust(10)} | #{data[:vfcsv].round(1).to_s.rjust(12)} | #{data[:speedup].round(1)}x |"
233
+ end
234
+
235
+ puts
236
+ puts "=" * 70
237
+ puts "FEATURE COMPARISON"
238
+ puts "=" * 70
239
+ puts
240
+ puts "| Feature | CSV | VFCSV | FastCSV | FastestCSV | ZSV | OSV |"
241
+ puts "|----------------------------|-----|-------|---------|------------|-----|-----|"
242
+ puts "| Drop-in replacement | N/A | ✓ | ✓ | ✗ | ~ | ✗ |"
243
+ puts "| SIMD acceleration | ✗ | ✓ | ✗ | ✗ | ✓ | ✗ |"
244
+ puts "| Row/Table classes | ✓ | ✓ | ✓ | ✗ | ✗ | ✗ |"
245
+ puts "| Converters (:integer, etc) | ✓ | ✓ | ✓ | ✗ | ✗ | ✗ |"
246
+ puts "| Header converters | ✓ | ✓ | ✓ | ✗ | ✗ | ✗ |"
247
+ puts "| Multiline fields | ✓ | ✓ | ✓ | ✗ | ✓ | ✓ |"
248
+ puts "| Quoted fields | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |"
249
+ puts "| Custom col_sep | ✓ | ✓ | ✓ | ✗ | ✓ | ✓ |"
250
+ puts "| Ruby 4.0 compatible | ✓ | ✓ | ? | ? | ? | ✓ |"
251
+ puts "| Pure Rust/no C deps | N/A | ✓ | ✗ | ✗ | ✗ | ✓ |"
252
+ puts
253
+ puts "Legend: ✓ = supported, ✗ = not supported, ~ = partial, ? = unknown"