vfcsv 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "benchmark/ips"
6
+ require "csv"
7
+ require_relative "../lib/vfcsv"
8
+
9
+ puts "Ruby: #{RUBY_VERSION}"
10
+ puts "YJIT: #{defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled? ? 'enabled' : 'disabled'}"
11
+ puts "ZJIT: #{defined?(RubyVM::ZJIT) && RubyVM::ZJIT.enabled? ? 'enabled' : 'disabled'}"
12
+ puts "VFCSV SIMD: #{VFCSV.simd_info.inspect}"
13
+ puts
14
+
15
+ # Generate test CSV data of various sizes
16
+ def generate_csv(rows, cols)
17
+ header = (1..cols).map { |i| "col#{i}" }.join(",")
18
+ data = (1..rows).map do |r|
19
+ (1..cols).map { |c| "value#{r}_#{c}" }.join(",")
20
+ end.join("\n")
21
+ "#{header}\n#{data}\n"
22
+ end
23
+
24
+ def generate_quoted_csv(rows, cols)
25
+ header = (1..cols).map { |i| "\"col#{i}\"" }.join(",")
26
+ data = (1..rows).map do |r|
27
+ (1..cols).map { |c| "\"value #{r}, col #{c}\"" }.join(",")
28
+ end.join("\n")
29
+ "#{header}\n#{data}\n"
30
+ end
31
+
32
+ def generate_numeric_csv(rows, cols)
33
+ header = (1..cols).map { |i| "col#{i}" }.join(",")
34
+ data = (1..rows).map do |r|
35
+ (1..cols).map { |c| (r * c * 1.5).to_s }.join(",")
36
+ end.join("\n")
37
+ "#{header}\n#{data}\n"
38
+ end
39
+
40
+ # Test data
41
+ TINY_CSV = "a,b,c\n1,2,3\n"
42
+ SMALL_CSV = generate_csv(100, 10)
43
+ MEDIUM_CSV = generate_csv(1000, 20)
44
+ LARGE_CSV = generate_csv(10000, 20)
45
+ QUOTED_CSV = generate_quoted_csv(1000, 10)
46
+ NUMERIC_CSV = generate_numeric_csv(1000, 10)
47
+
48
+ puts "=" * 60
49
+ puts "VFCSV (SIMD) vs Ruby stdlib CSV"
50
+ puts "=" * 60
51
+
52
+ # First verify API compatibility
53
+ puts "\n--- API Compatibility Check ---"
54
+ csv_string = "name,age,city\nAlice,30,NYC\nBob,25,LA"
55
+
56
+ csv_result = CSV.parse(csv_string)
57
+ vfcsv_result = VFCSV.parse(csv_string)
58
+ puts "parse: #{csv_result == vfcsv_result ? '✓' : '✗'}"
59
+
60
+ csv_headers = CSV.parse(csv_string, headers: true).map(&:to_h)
61
+ vfcsv_headers = VFCSV.parse(csv_string, headers: true).map(&:to_h)
62
+ puts "parse(headers): #{csv_headers == vfcsv_headers ? '✓' : '✗'}"
63
+
64
+ csv_line = CSV.parse_line("a,b,c")
65
+ vfcsv_line = VFCSV.parse_line("a,b,c")
66
+ puts "parse_line: #{csv_line == vfcsv_line ? '✓' : '✗'}"
67
+
68
+ csv_gen = CSV.generate { |c| c << [1,2,3]; c << [4,5,6] }
69
+ vfcsv_gen = VFCSV.generate { |c| c << [1,2,3]; c << [4,5,6] }
70
+ puts "generate: #{csv_gen == vfcsv_gen ? '✓' : '✗'}"
71
+
72
+ csv_genline = CSV.generate_line([1,2,3])
73
+ vfcsv_genline = VFCSV.generate_line([1,2,3])
74
+ puts "generate_line: #{csv_genline == vfcsv_genline ? '✓' : '✗'}"
75
+
76
+ [
77
+ ["Tiny (#{TINY_CSV.bytesize}B)", TINY_CSV],
78
+ ["Small (#{SMALL_CSV.bytesize}B)", SMALL_CSV],
79
+ ["Medium (#{MEDIUM_CSV.bytesize}B)", MEDIUM_CSV],
80
+ ["Large (#{LARGE_CSV.bytesize}B)", LARGE_CSV],
81
+ ["Quoted (#{QUOTED_CSV.bytesize}B)", QUOTED_CSV],
82
+ ["Numeric (#{NUMERIC_CSV.bytesize}B)", NUMERIC_CSV],
83
+ ].each do |name, csv|
84
+ puts "\n--- #{name} ---"
85
+
86
+ # Verify correctness
87
+ stdlib_result = CSV.parse(csv)
88
+ vfcsv_result = VFCSV.parse(csv)
89
+
90
+ if stdlib_result != vfcsv_result
91
+ puts "WARNING: Results differ!"
92
+ puts "stdlib rows: #{stdlib_result.length}, VFCSV rows: #{vfcsv_result.length}"
93
+ if stdlib_result.length > 0 && vfcsv_result.length > 0
94
+ puts "stdlib[0]: #{stdlib_result[0].inspect}"
95
+ puts "VFCSV[0]: #{vfcsv_result[0].inspect}"
96
+ end
97
+ end
98
+
99
+ Benchmark.ips do |x|
100
+ x.config(time: 3, warmup: 1)
101
+
102
+ x.report("CSV.parse") { CSV.parse(csv) }
103
+ x.report("VFCSV.parse") { VFCSV.parse(csv) }
104
+
105
+ x.compare!
106
+ end
107
+ end
108
+
109
+ # Throughput summary
110
+ puts "\n" + "=" * 60
111
+ puts "THROUGHPUT SUMMARY (MB/s)"
112
+ puts "=" * 60
113
+
114
+ {
115
+ "Small" => SMALL_CSV,
116
+ "Medium" => MEDIUM_CSV,
117
+ "Large" => LARGE_CSV,
118
+ }.each do |name, csv|
119
+ size_mb = csv.bytesize / 1_000_000.0
120
+
121
+ # stdlib
122
+ t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
123
+ 1000.times { CSV.parse(csv) }
124
+ t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
125
+ stdlib_mbs = (size_mb * 1000) / (t1 - t0)
126
+
127
+ # VFCSV
128
+ t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
129
+ 1000.times { VFCSV.parse(csv) }
130
+ t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
131
+ vfcsv_mbs = (size_mb * 1000) / (t1 - t0)
132
+
133
+ ratio = vfcsv_mbs / stdlib_mbs
134
+ status = ratio > 1 ? "#{ratio.round(1)}x faster" : "#{(1/ratio).round(1)}x slower"
135
+
136
+ puts "#{name.ljust(10)} CSV=#{stdlib_mbs.round(1)} MB/s VFCSV=#{vfcsv_mbs.round(1)} MB/s (#{status})"
137
+ end
@@ -0,0 +1,289 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "aho-corasick"
7
+ version = "1.1.4"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
10
+ dependencies = [
11
+ "memchr",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "bindgen"
16
+ version = "0.69.5"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
19
+ dependencies = [
20
+ "bitflags",
21
+ "cexpr",
22
+ "clang-sys",
23
+ "itertools",
24
+ "lazy_static",
25
+ "lazycell",
26
+ "proc-macro2",
27
+ "quote",
28
+ "regex",
29
+ "rustc-hash",
30
+ "shlex",
31
+ "syn",
32
+ ]
33
+
34
+ [[package]]
35
+ name = "bitflags"
36
+ version = "2.10.0"
37
+ source = "registry+https://github.com/rust-lang/crates.io-index"
38
+ checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
39
+
40
+ [[package]]
41
+ name = "cexpr"
42
+ version = "0.6.0"
43
+ source = "registry+https://github.com/rust-lang/crates.io-index"
44
+ checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
45
+ dependencies = [
46
+ "nom",
47
+ ]
48
+
49
+ [[package]]
50
+ name = "cfg-if"
51
+ version = "1.0.4"
52
+ source = "registry+https://github.com/rust-lang/crates.io-index"
53
+ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
54
+
55
+ [[package]]
56
+ name = "clang-sys"
57
+ version = "1.8.1"
58
+ source = "registry+https://github.com/rust-lang/crates.io-index"
59
+ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
60
+ dependencies = [
61
+ "glob",
62
+ "libc",
63
+ "libloading",
64
+ ]
65
+
66
+ [[package]]
67
+ name = "either"
68
+ version = "1.15.0"
69
+ source = "registry+https://github.com/rust-lang/crates.io-index"
70
+ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
71
+
72
+ [[package]]
73
+ name = "glob"
74
+ version = "0.3.3"
75
+ source = "registry+https://github.com/rust-lang/crates.io-index"
76
+ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
77
+
78
+ [[package]]
79
+ name = "itertools"
80
+ version = "0.12.1"
81
+ source = "registry+https://github.com/rust-lang/crates.io-index"
82
+ checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
83
+ dependencies = [
84
+ "either",
85
+ ]
86
+
87
+ [[package]]
88
+ name = "lazy_static"
89
+ version = "1.5.0"
90
+ source = "registry+https://github.com/rust-lang/crates.io-index"
91
+ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
92
+
93
+ [[package]]
94
+ name = "lazycell"
95
+ version = "1.3.0"
96
+ source = "registry+https://github.com/rust-lang/crates.io-index"
97
+ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
98
+
99
+ [[package]]
100
+ name = "libc"
101
+ version = "0.2.180"
102
+ source = "registry+https://github.com/rust-lang/crates.io-index"
103
+ checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
104
+
105
+ [[package]]
106
+ name = "libloading"
107
+ version = "0.8.9"
108
+ source = "registry+https://github.com/rust-lang/crates.io-index"
109
+ checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
110
+ dependencies = [
111
+ "cfg-if",
112
+ "windows-link",
113
+ ]
114
+
115
+ [[package]]
116
+ name = "magnus"
117
+ version = "0.9.0"
118
+ source = "git+https://github.com/matsadler/magnus#2370a35261db7476f59e165752791fd60fddc26b"
119
+ dependencies = [
120
+ "magnus-macros",
121
+ "rb-sys",
122
+ "rb-sys-env",
123
+ "seq-macro",
124
+ ]
125
+
126
+ [[package]]
127
+ name = "magnus-macros"
128
+ version = "0.9.0"
129
+ source = "git+https://github.com/matsadler/magnus#2370a35261db7476f59e165752791fd60fddc26b"
130
+ dependencies = [
131
+ "proc-macro2",
132
+ "quote",
133
+ "syn",
134
+ ]
135
+
136
+ [[package]]
137
+ name = "memchr"
138
+ version = "2.7.6"
139
+ source = "registry+https://github.com/rust-lang/crates.io-index"
140
+ checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
141
+
142
+ [[package]]
143
+ name = "minimal-lexical"
144
+ version = "0.2.1"
145
+ source = "registry+https://github.com/rust-lang/crates.io-index"
146
+ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
147
+
148
+ [[package]]
149
+ name = "nom"
150
+ version = "7.1.3"
151
+ source = "registry+https://github.com/rust-lang/crates.io-index"
152
+ checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
153
+ dependencies = [
154
+ "memchr",
155
+ "minimal-lexical",
156
+ ]
157
+
158
+ [[package]]
159
+ name = "proc-macro2"
160
+ version = "1.0.105"
161
+ source = "registry+https://github.com/rust-lang/crates.io-index"
162
+ checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7"
163
+ dependencies = [
164
+ "unicode-ident",
165
+ ]
166
+
167
+ [[package]]
168
+ name = "quote"
169
+ version = "1.0.43"
170
+ source = "registry+https://github.com/rust-lang/crates.io-index"
171
+ checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a"
172
+ dependencies = [
173
+ "proc-macro2",
174
+ ]
175
+
176
+ [[package]]
177
+ name = "rb-sys"
178
+ version = "0.9.124"
179
+ source = "registry+https://github.com/rust-lang/crates.io-index"
180
+ checksum = "c85c4188462601e2aa1469def389c17228566f82ea72f137ed096f21591bc489"
181
+ dependencies = [
182
+ "rb-sys-build",
183
+ ]
184
+
185
+ [[package]]
186
+ name = "rb-sys-build"
187
+ version = "0.9.124"
188
+ source = "registry+https://github.com/rust-lang/crates.io-index"
189
+ checksum = "568068db4102230882e6d4ae8de6632e224ca75fe5970f6e026a04e91ed635d3"
190
+ dependencies = [
191
+ "bindgen",
192
+ "lazy_static",
193
+ "proc-macro2",
194
+ "quote",
195
+ "regex",
196
+ "shell-words",
197
+ "syn",
198
+ ]
199
+
200
+ [[package]]
201
+ name = "rb-sys-env"
202
+ version = "0.2.3"
203
+ source = "registry+https://github.com/rust-lang/crates.io-index"
204
+ checksum = "cca7ad6a7e21e72151d56fe2495a259b5670e204c3adac41ee7ef676ea08117a"
205
+
206
+ [[package]]
207
+ name = "regex"
208
+ version = "1.12.2"
209
+ source = "registry+https://github.com/rust-lang/crates.io-index"
210
+ checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
211
+ dependencies = [
212
+ "aho-corasick",
213
+ "memchr",
214
+ "regex-automata",
215
+ "regex-syntax",
216
+ ]
217
+
218
+ [[package]]
219
+ name = "regex-automata"
220
+ version = "0.4.13"
221
+ source = "registry+https://github.com/rust-lang/crates.io-index"
222
+ checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
223
+ dependencies = [
224
+ "aho-corasick",
225
+ "memchr",
226
+ "regex-syntax",
227
+ ]
228
+
229
+ [[package]]
230
+ name = "regex-syntax"
231
+ version = "0.8.8"
232
+ source = "registry+https://github.com/rust-lang/crates.io-index"
233
+ checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
234
+
235
+ [[package]]
236
+ name = "rustc-hash"
237
+ version = "1.1.0"
238
+ source = "registry+https://github.com/rust-lang/crates.io-index"
239
+ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
240
+
241
+ [[package]]
242
+ name = "seq-macro"
243
+ version = "0.3.6"
244
+ source = "registry+https://github.com/rust-lang/crates.io-index"
245
+ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
246
+
247
+ [[package]]
248
+ name = "shell-words"
249
+ version = "1.1.1"
250
+ source = "registry+https://github.com/rust-lang/crates.io-index"
251
+ checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77"
252
+
253
+ [[package]]
254
+ name = "shlex"
255
+ version = "1.3.0"
256
+ source = "registry+https://github.com/rust-lang/crates.io-index"
257
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
258
+
259
+ [[package]]
260
+ name = "syn"
261
+ version = "2.0.114"
262
+ source = "registry+https://github.com/rust-lang/crates.io-index"
263
+ checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
264
+ dependencies = [
265
+ "proc-macro2",
266
+ "quote",
267
+ "unicode-ident",
268
+ ]
269
+
270
+ [[package]]
271
+ name = "unicode-ident"
272
+ version = "1.0.22"
273
+ source = "registry+https://github.com/rust-lang/crates.io-index"
274
+ checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
275
+
276
+ [[package]]
277
+ name = "vfcsv_rust"
278
+ version = "0.1.0"
279
+ dependencies = [
280
+ "magnus",
281
+ "memchr",
282
+ "rb-sys",
283
+ ]
284
+
285
+ [[package]]
286
+ name = "windows-link"
287
+ version = "0.2.1"
288
+ source = "registry+https://github.com/rust-lang/crates.io-index"
289
+ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
@@ -0,0 +1,27 @@
1
+ [package]
2
+ name = "vfcsv_rust"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ publish = false
6
+
7
+ [lib]
8
+ crate-type = ["cdylib"]
9
+
10
+ [dependencies]
11
+ # Ruby bindings - use git for Ruby 4.0 compatibility
12
+ magnus = { git = "https://github.com/matsadler/magnus", features = ["rb-sys"] }
13
+ rb-sys = { version = "0.9", features = ["stable-api-compiled-fallback"] }
14
+
15
+ # Fast memory operations
16
+ memchr = "2.7"
17
+
18
+ [profile.release]
19
+ lto = "fat"
20
+ codegen-units = 1
21
+ opt-level = 3
22
+ panic = "abort"
23
+ debug = false
24
+ strip = "symbols"
25
+
26
+ [profile.release.build-override]
27
+ opt-level = 3
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+ require "rb_sys/mkmf"
5
+
6
+ create_rust_makefile("vfcsv/vfcsv_rust")