osv 0.3.13 → 0.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3e44a6bcd337573fcfd8586b5a6989940a8e0e5497f7376a85177eed59223d62
4
- data.tar.gz: a577f31562aa3d5806222f2deb4b86f6594ace07bcb7079d80b812449437abbe
3
+ metadata.gz: 872cf06d1389f45f77b4eefc178cc8462ab165b833ab2c5bf4dc7f92e1c8308e
4
+ data.tar.gz: 84e6c5d0e03389966b8882a5a73f1698ddee3ed0edae24f2fd5b7f257935a98e
5
5
  SHA512:
6
- metadata.gz: 346984e4dfd2a01943f874848920aaa0f63d2e59db72d0307e192e3bc2679a60bba1749020b357021ba4285cccba86480b6a1a13f8d2723803df765009126c10
7
- data.tar.gz: 71b5a79edf20b0377748ff78ebc9f6d11b077f7f8cdc70248d917df108e4e57ef9e5e1f611524b533ec300db5009a02fde3631ab6eaa739bf45aa705e131ebde
6
+ metadata.gz: 445581447e8f5ec336da7843af715a5f5fbc298232a24f303a22eebb844f83f65ecc2e85d877a448119adae9e6a5529e377d87399a36e6f070562fa4ce0a11b7
7
+ data.tar.gz: '08f417b19b0549aa4a3db1538e4be413c5ec8faa3bd18e4c101a6fc3ea3e9496d04c30e39ea8eec9cc0cc3a38f8f83f7c2274e09c75259a26f3609620cf07a80'
data/Cargo.lock CHANGED
@@ -1,6 +1,6 @@
1
1
  # This file is automatically @generated by Cargo.
2
2
  # It is not intended for manual editing.
3
- version = 3
3
+ version = 4
4
4
 
5
5
  [[package]]
6
6
  name = "adler2"
@@ -8,6 +8,19 @@ version = "2.0.0"
8
8
  source = "registry+https://github.com/rust-lang/crates.io-index"
9
9
  checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
10
10
 
11
+ [[package]]
12
+ name = "ahash"
13
+ version = "0.8.11"
14
+ source = "registry+https://github.com/rust-lang/crates.io-index"
15
+ checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
16
+ dependencies = [
17
+ "cfg-if",
18
+ "getrandom",
19
+ "once_cell",
20
+ "version_check",
21
+ "zerocopy",
22
+ ]
23
+
11
24
  [[package]]
12
25
  name = "aho-corasick"
13
26
  version = "1.1.3"
@@ -49,6 +62,15 @@ version = "2.6.0"
49
62
  source = "registry+https://github.com/rust-lang/crates.io-index"
50
63
  checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
51
64
 
65
+ [[package]]
66
+ name = "cc"
67
+ version = "1.2.7"
68
+ source = "registry+https://github.com/rust-lang/crates.io-index"
69
+ checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7"
70
+ dependencies = [
71
+ "shlex",
72
+ ]
73
+
52
74
  [[package]]
53
75
  name = "cexpr"
54
76
  version = "0.6.0"
@@ -127,6 +149,17 @@ version = "0.3.31"
127
149
  source = "registry+https://github.com/rust-lang/crates.io-index"
128
150
  checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
129
151
 
152
+ [[package]]
153
+ name = "getrandom"
154
+ version = "0.2.15"
155
+ source = "registry+https://github.com/rust-lang/crates.io-index"
156
+ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
157
+ dependencies = [
158
+ "cfg-if",
159
+ "libc",
160
+ "wasi",
161
+ ]
162
+
130
163
  [[package]]
131
164
  name = "glob"
132
165
  version = "0.3.1"
@@ -148,6 +181,26 @@ version = "1.0.14"
148
181
  source = "registry+https://github.com/rust-lang/crates.io-index"
149
182
  checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
150
183
 
184
+ [[package]]
185
+ name = "jemalloc-sys"
186
+ version = "0.5.4+5.3.0-patched"
187
+ source = "registry+https://github.com/rust-lang/crates.io-index"
188
+ checksum = "ac6c1946e1cea1788cbfde01c993b52a10e2da07f4bac608228d1bed20bfebf2"
189
+ dependencies = [
190
+ "cc",
191
+ "libc",
192
+ ]
193
+
194
+ [[package]]
195
+ name = "jemallocator"
196
+ version = "0.5.4"
197
+ source = "registry+https://github.com/rust-lang/crates.io-index"
198
+ checksum = "a0de374a9f8e63150e6f5e8a60cc14c668226d7a347d8aee1a45766e3c4dd3bc"
199
+ dependencies = [
200
+ "jemalloc-sys",
201
+ "libc",
202
+ ]
203
+
151
204
  [[package]]
152
205
  name = "kanal"
153
206
  version = "0.1.0-pre8"
@@ -186,6 +239,16 @@ dependencies = [
186
239
  "windows-targets",
187
240
  ]
188
241
 
242
+ [[package]]
243
+ name = "libmimalloc-sys"
244
+ version = "0.1.39"
245
+ source = "registry+https://github.com/rust-lang/crates.io-index"
246
+ checksum = "23aa6811d3bd4deb8a84dde645f943476d13b248d818edcf8ce0b2f37f036b44"
247
+ dependencies = [
248
+ "cc",
249
+ "libc",
250
+ ]
251
+
189
252
  [[package]]
190
253
  name = "lock_api"
191
254
  version = "0.4.12"
@@ -237,6 +300,15 @@ version = "2.7.4"
237
300
  source = "registry+https://github.com/rust-lang/crates.io-index"
238
301
  checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
239
302
 
303
+ [[package]]
304
+ name = "mimalloc"
305
+ version = "0.1.43"
306
+ source = "registry+https://github.com/rust-lang/crates.io-index"
307
+ checksum = "68914350ae34959d83f732418d51e2427a794055d0b9529f48259ac07af65633"
308
+ dependencies = [
309
+ "libmimalloc-sys",
310
+ ]
311
+
240
312
  [[package]]
241
313
  name = "minimal-lexical"
242
314
  version = "0.2.1"
@@ -262,19 +334,27 @@ dependencies = [
262
334
  "minimal-lexical",
263
335
  ]
264
336
 
337
+ [[package]]
338
+ name = "once_cell"
339
+ version = "1.20.2"
340
+ source = "registry+https://github.com/rust-lang/crates.io-index"
341
+ checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
342
+
265
343
  [[package]]
266
344
  name = "osv"
267
345
  version = "0.1.0"
268
346
  dependencies = [
347
+ "ahash",
269
348
  "csv",
270
349
  "flate2",
350
+ "jemallocator",
271
351
  "kanal",
272
352
  "magnus 0.7.1",
353
+ "mimalloc",
273
354
  "rb-sys",
274
355
  "serde",
275
356
  "serde_magnus",
276
357
  "thiserror",
277
- "xxhash-rust",
278
358
  ]
279
359
 
280
360
  [[package]]
@@ -464,6 +544,18 @@ version = "1.0.14"
464
544
  source = "registry+https://github.com/rust-lang/crates.io-index"
465
545
  checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
466
546
 
547
+ [[package]]
548
+ name = "version_check"
549
+ version = "0.9.5"
550
+ source = "registry+https://github.com/rust-lang/crates.io-index"
551
+ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
552
+
553
+ [[package]]
554
+ name = "wasi"
555
+ version = "0.11.0+wasi-snapshot-preview1"
556
+ source = "registry+https://github.com/rust-lang/crates.io-index"
557
+ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
558
+
467
559
  [[package]]
468
560
  name = "windows-targets"
469
561
  version = "0.52.6"
@@ -529,7 +621,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
529
621
  checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
530
622
 
531
623
  [[package]]
532
- name = "xxhash-rust"
533
- version = "0.8.14"
624
+ name = "zerocopy"
625
+ version = "0.7.35"
534
626
  source = "registry+https://github.com/rust-lang/crates.io-index"
535
- checksum = "d7d48f1b18be023c95e7b75f481cac649d74be7c507ff4a407c55cfb957f7934"
627
+ checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
628
+ dependencies = [
629
+ "zerocopy-derive",
630
+ ]
631
+
632
+ [[package]]
633
+ name = "zerocopy-derive"
634
+ version = "0.7.35"
635
+ source = "registry+https://github.com/rust-lang/crates.io-index"
636
+ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
637
+ dependencies = [
638
+ "proc-macro2",
639
+ "quote",
640
+ "syn",
641
+ ]
data/Gemfile CHANGED
@@ -2,12 +2,12 @@ source "https://rubygems.org"
2
2
 
3
3
  gem "rb_sys", "~> 0.9.56"
4
4
  gem "rake"
5
- gem "csv"
6
5
 
7
6
  # Use local version of osv
8
7
  gemspec
9
8
 
10
9
  group :development, :test do
10
+ gem "csv"
11
11
  gem "minitest", "~> 5.0"
12
12
  gem "benchmark-ips", "~> 2.12"
13
13
  gem "fastcsv", "~> 0.0.7"
data/README.md CHANGED
@@ -114,104 +114,62 @@ When `has_headers` is false, hash keys will be generated as `"c0"`, `"c1"`, etc.
114
114
 
115
115
  ## Performance
116
116
 
117
- This library is faster than the standard Ruby CSV library, and is comparable to the fastest CSV parser gems I've used.
117
+ This library is faster than the standard Ruby CSV library. It's also faster than any other CSV gem I've been able to find.
118
118
 
119
119
  Here's some unscientific benchmarks. You can find the code in the [benchmark/comparison_benchmark.rb](benchmark/comparison_benchmark.rb) file.
120
120
 
121
- ### 10,000 lines
121
+ ### 1,000,000 records
122
122
 
123
123
  ```
124
- Benchmarking with 100001 lines of data
124
+ 🏃 Running benchmarks...
125
+ Benchmarking with 3000001 lines of data
125
126
 
126
- ruby 3.3.6 (2024-11-05 revision 75015d4c1f) [arm64-darwin24]
127
+ ruby 3.3.6 (2024-11-05 revision 75015d4c1f) +YJIT [arm64-darwin24]
127
128
  Warming up --------------------------------------
128
- OSV - Hash output 1.000 i/100ms
129
- CSV - Hash output 1.000 i/100ms
130
- OSV - Array output 1.000 i/100ms
131
- OSV - Direct Open Array output
132
- 12.719M i/100ms
133
- CSV - Array output 1.000 i/100ms
134
- FastCSV - Array output
135
- 1.000 i/100ms
136
- OSV - StringIO 1.000 i/100ms
137
129
  CSV - StringIO 1.000 i/100ms
138
130
  FastCSV - StringIO 1.000 i/100ms
139
- OSV - Gzipped 1.000 i/100ms
140
- CSV - Gzipped 1.000 i/100ms
141
- Calculating -------------------------------------
142
- OSV - Hash output 6.722 (±14.9%) i/s (148.77 ms/i) - 59.000 in 10.074753s
143
- CSV - Hash output 1.223 (± 0.0%) i/s (817.62 ms/i) - 13.000 in 10.788284s
144
- OSV - Array output 17.284 (±11.6%) i/s (57.86 ms/i) - 171.000 in 10.007321s
145
- OSV - Direct Open Array output
146
- 213.629M (±13.5%) i/s (4.68 ns/i) - 1.921B in 10.005506s
147
- CSV - Array output 2.193 (± 0.0%) i/s (455.93 ms/i) - 22.000 in 10.052607s
148
- FastCSV - Array output
149
- 7.993 (± 0.0%) i/s (125.11 ms/i) - 80.000 in 10.053729s
150
- OSV - StringIO 6.626 (±15.1%) i/s (150.91 ms/i) - 66.000 in 10.103646s
151
- CSV - StringIO 1.478 (± 0.0%) i/s (676.78 ms/i) - 15.000 in 10.158640s
152
- FastCSV - StringIO 17.074 (± 5.9%) i/s (58.57 ms/i) - 171.000 in 10.059266s
153
- OSV - Gzipped 5.639 (± 0.0%) i/s (177.32 ms/i) - 57.000 in 10.152487s
154
- CSV - Gzipped 1.176 (± 0.0%) i/s (850.19 ms/i) - 12.000 in 10.233398s
155
-
156
- Comparison:
157
- OSV - Direct Open Array output: 213629268.6 i/s
158
- OSV - Array output: 17.3 i/s - 12360250.79x slower
159
- FastCSV - StringIO: 17.1 i/s - 12511956.50x slower
160
- FastCSV - Array output: 8.0 i/s - 26727225.72x slower
161
- OSV - Hash output: 6.7 i/s - 31780615.83x slower
162
- OSV - StringIO: 6.6 i/s - 32239620.60x slower
163
- OSV - Gzipped: 5.6 i/s - 37881517.48x slower
164
- CSV - Array output: 2.2 i/s - 97400427.87x slower
165
- CSV - StringIO: 1.5 i/s - 144580048.04x slower
166
- CSV - Hash output: 1.2 i/s - 174666591.31x slower
167
- CSV - Gzipped: 1.2 i/s - 181626018.23x slower
168
- ```
169
-
170
- ### 1,000,000 lines
171
-
172
- ```
173
- Benchmarking with 1000001 lines of data
174
-
175
- ruby 3.3.6 (2024-11-05 revision 75015d4c1f) [arm64-darwin24]
176
- Warming up --------------------------------------
177
- OSV - Hash output 1.000 i/100ms
131
+ OSV - StringIO 1.000 i/100ms
178
132
  CSV - Hash output 1.000 i/100ms
179
- OSV - Array output 1.000 i/100ms
180
- OSV - Direct Open Array output
181
- 1.000 i/100ms
133
+ OSV - Hash output 1.000 i/100ms
182
134
  CSV - Array output 1.000 i/100ms
135
+ OSV - Array output 1.000 i/100ms
183
136
  FastCSV - Array output
184
137
  1.000 i/100ms
185
- OSV - StringIO 1.000 i/100ms
186
- CSV - StringIO 1.000 i/100ms
187
- FastCSV - StringIO 1.000 i/100ms
138
+ OSV - Direct Open Array output
139
+ 1.000 i/100ms
188
140
  OSV - Gzipped 1.000 i/100ms
141
+ OSV - Gzipped Direct 1.000 i/100ms
142
+ FastCSV - Gzipped 1.000 i/100ms
189
143
  CSV - Gzipped 1.000 i/100ms
190
144
  Calculating -------------------------------------
191
- OSV - Hash output 0.492 (± 0.0%) i/s (2.03 s/i) - 5.000 in 10.463278s
192
- CSV - Hash output 0.114 (± 0.0%) i/s (8.75 s/i) - 2.000 in 17.573877s
193
- OSV - Array output 1.502 (± 0.0%) i/s (665.58 ms/i) - 14.000 in 10.217551s
194
- OSV - Direct Open Array output
195
- 1.626 (± 0.0%) i/s (614.90 ms/i) - 16.000 in 10.190323s
196
- CSV - Array output 0.183 (± 0.0%) i/s (5.46 s/i) - 2.000 in 10.951943s
145
+ CSV - StringIO 0.079 (± 0.0%) i/s (12.69 s/i) - 3.000 in 38.139709s
146
+ FastCSV - StringIO 0.370 (± 0.0%) i/s (2.71 s/i) - 12.000 in 32.474164s
147
+ OSV - StringIO 0.635 (± 0.0%) i/s (1.58 s/i) - 19.000 in 30.772490s
148
+ CSV - Hash output 0.058 0.0%) i/s (17.11 s/i) - 2.000 in 34.212335s
149
+ OSV - Hash output 0.249 (± 0.0%) i/s (4.01 s/i) - 8.000 in 32.124319s
150
+ CSV - Array output 0.066 (± 0.0%) i/s (15.11 s/i) - 2.000 in 30.212137s
151
+ OSV - Array output 0.665 (± 0.0%) i/s (1.50 s/i) - 20.000 in 30.813986s
197
152
  FastCSV - Array output
198
- 0.326 (± 0.0%) i/s (3.07 s/i) - 4.000 in 12.340605s
199
- OSV - StringIO 0.567 0.0%) i/s (1.76 s/i) - 6.000 in 10.698027s
200
- CSV - StringIO 0.141 (± 0.0%) i/s (7.10 s/i) - 2.000 in 14.237144s
201
- FastCSV - StringIO 0.923 (± 0.0%) i/s (1.08 s/i) - 10.000 in 11.567775s
202
- OSV - Gzipped 0.437 (± 0.0%) i/s (2.29 s/i) - 5.000 in 11.452764s
203
- CSV - Gzipped 0.104 (± 0.0%) i/s (9.64 s/i) - 2.000 in 19.373423s
153
+ 0.351 (± 0.0%) i/s (2.85 s/i) - 11.000 in 31.418786s
154
+ OSV - Direct Open Array output
155
+ 0.713 (± 0.0%) i/s (1.40 s/i) - 22.000 in 30.938525s
156
+ OSV - Gzipped 0.506 (± 0.0%) i/s (1.98 s/i) - 16.000 in 31.709708s
157
+ OSV - Gzipped Direct 0.685 (± 0.0%) i/s (1.46 s/i) - 21.000 in 31.145435s
158
+ FastCSV - Gzipped 0.324 (± 0.0%) i/s (3.09 s/i) - 10.000 in 30.983582s
159
+ CSV - Gzipped 0.057 (± 0.0%) i/s (17.69 s/i) - 2.000 in 35.379009s
204
160
 
205
161
  Comparison:
206
- OSV - Direct Open Array output: 1.6 i/s
207
- OSV - Array output: 1.5 i/s - 1.08x slower
208
- FastCSV - StringIO: 0.9 i/s - 1.76x slower
209
- OSV - StringIO: 0.6 i/s - 2.87x slower
210
- OSV - Hash output: 0.5 i/s - 3.30x slower
211
- OSV - Gzipped: 0.4 i/s - 3.72x slower
212
- FastCSV - Array output: 0.3 i/s - 4.99x slower
213
- CSV - Array output: 0.2 i/s - 8.88x slower
214
- CSV - StringIO: 0.1 i/s - 11.55x slower
215
- CSV - Hash output: 0.1 i/s - 14.24x slower
216
- CSV - Gzipped: 0.1 i/s - 15.68x slower
162
+ OSV - Direct Open Array output: 0.7 i/s
163
+ OSV - Gzipped Direct: 0.7 i/s - 1.04x slower
164
+ OSV - Array output: 0.7 i/s - 1.07x slower
165
+ OSV - StringIO: 0.6 i/s - 1.12x slower
166
+ OSV - Gzipped: 0.5 i/s - 1.41x slower
167
+ FastCSV - StringIO: 0.4 i/s - 1.93x slower
168
+ FastCSV - Array output: 0.4 i/s - 2.03x slower
169
+ FastCSV - Gzipped: 0.3 i/s - 2.20x slower
170
+ OSV - Hash output: 0.2 i/s - 2.86x slower
171
+ CSV - StringIO: 0.1 i/s - 9.05x slower
172
+ CSV - Array output: 0.1 i/s - 10.77x slower
173
+ CSV - Hash output: 0.1 i/s - 12.20x slower
174
+ CSV - Gzipped: 0.1 i/s - 12.61x slower
217
175
  ```
data/Rakefile CHANGED
@@ -1,21 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "rake/testtask"
4
- require "rake/extensiontask"
4
+ require "rb_sys/extensiontask"
5
5
 
6
6
  task default: :test
7
7
 
8
- Rake::ExtensionTask.new("osv") do |c|
9
- c.lib_dir = "lib/osv"
10
- c.ext_dir = "ext/osv"
11
- end
8
+ GEMSPEC = Gem::Specification.load("osv.gemspec")
12
9
 
13
- task :dev do
14
- ENV["RB_SYS_CARGO_PROFILE"] = "release"
10
+ RbSys::ExtensionTask.new("osv", GEMSPEC) do |ext|
11
+ ext.lib_dir = "lib/osv"
12
+ ext.ext_dir = "ext/osv"
15
13
  end
16
14
 
17
15
  Rake::TestTask.new do |t|
18
- t.deps << :dev << :compile
16
+ t.deps << :compile
19
17
  t.test_files = FileList[File.expand_path("test/*_test.rb", __dir__)]
20
18
  t.libs << "lib"
21
19
  t.libs << "test"
data/ext/osv/Cargo.toml CHANGED
@@ -7,6 +7,7 @@ edition = "2021"
7
7
  crate-type = ["cdylib"]
8
8
 
9
9
  [dependencies]
10
+ ahash = "0.8"
10
11
  csv = "^1.3"
11
12
  flate2 = "1.0.35"
12
13
  kanal = "0.1.0-pre8"
@@ -15,4 +16,9 @@ rb-sys = "^0.9"
15
16
  serde = { version = "1.0", features = ["derive"] }
16
17
  serde_magnus = "0.8.1"
17
18
  thiserror = "2.0"
18
- xxhash-rust = { version = "0.8.12", features = ["xxh3"] }
19
+
20
+ [target.'cfg(target_os = "linux")'.dependencies]
21
+ jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
22
+
23
+ [target.'cfg(not(any(target_os = "linux", target_os = "windows")))'.dependencies]
24
+ mimalloc = { version = "0.1", default-features = false }
@@ -0,0 +1,13 @@
1
+ #[cfg(target_os = "linux")]
2
+ use jemallocator::Jemalloc;
3
+
4
+ #[cfg(not(any(target_os = "linux", target_os = "windows")))]
5
+ use mimalloc::MiMalloc;
6
+
7
+ #[global_allocator]
8
+ #[cfg(target_os = "linux")]
9
+ static ALLOC: Jemalloc = Jemalloc;
10
+
11
+ #[global_allocator]
12
+ #[cfg(not(any(target_os = "linux", target_os = "windows")))]
13
+ static ALLOC: MiMalloc = MiMalloc;