osv 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +87 -37
- data/README.md +26 -26
- data/ext/osv/Cargo.toml +1 -1
- data/ext/osv/src/csv/builder.rs +4 -49
- data/ext/osv/src/csv/mod.rs +0 -2
- data/ext/osv/src/csv/parser.rs +1 -1
- data/ext/osv/src/csv/record_reader.rs +3 -3
- data/ext/osv/src/csv/ruby_reader.rs +98 -148
- data/ext/osv/src/reader.rs +23 -20
- data/lib/osv/version.rb +1 -1
- metadata +2 -3
- data/ext/osv/src/csv/ruby_integration.rs +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a791a08d1a1c44fcc4e69df668e1c04e9e099e8388d048f7e058aea2a7ee3305
|
4
|
+
data.tar.gz: 2616a3ab845191c29fb3f852f8a1a3a1cc51c33d39e5f4bc7b0da3ae733bf9f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d1a816e49ffbf62dcd33dd8a2d3c49f32bac34ddfe2681e58a55839e33afadf4487c7e216e67bf878071272d0fc129370a598657fb3fb0bc8ec6c7934e6de3d
|
7
|
+
data.tar.gz: 67f8f1416e8f11cac750e60b46437293bd41421a2ea3de9b5557a78ea78c34e2be8251a62c9650f16af449c0f534d807c7aabd838c274486d3cfe0eaef7eecde
|
data/Cargo.lock
CHANGED
@@ -15,7 +15,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
15
15
|
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
16
16
|
dependencies = [
|
17
17
|
"cfg-if",
|
18
|
-
"getrandom",
|
18
|
+
"getrandom 0.2.15",
|
19
19
|
"once_cell",
|
20
20
|
"version_check",
|
21
21
|
"zerocopy",
|
@@ -30,12 +30,6 @@ dependencies = [
|
|
30
30
|
"memchr",
|
31
31
|
]
|
32
32
|
|
33
|
-
[[package]]
|
34
|
-
name = "autocfg"
|
35
|
-
version = "1.4.0"
|
36
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
37
|
-
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
38
|
-
|
39
33
|
[[package]]
|
40
34
|
name = "bindgen"
|
41
35
|
version = "0.69.5"
|
@@ -133,6 +127,22 @@ version = "1.13.0"
|
|
133
127
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
134
128
|
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
135
129
|
|
130
|
+
[[package]]
|
131
|
+
name = "errno"
|
132
|
+
version = "0.3.10"
|
133
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
134
|
+
checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
|
135
|
+
dependencies = [
|
136
|
+
"libc",
|
137
|
+
"windows-sys",
|
138
|
+
]
|
139
|
+
|
140
|
+
[[package]]
|
141
|
+
name = "fastrand"
|
142
|
+
version = "2.3.0"
|
143
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
144
|
+
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
145
|
+
|
136
146
|
[[package]]
|
137
147
|
name = "flate2"
|
138
148
|
version = "1.0.35"
|
@@ -144,20 +154,26 @@ dependencies = [
|
|
144
154
|
]
|
145
155
|
|
146
156
|
[[package]]
|
147
|
-
name = "
|
148
|
-
version = "0.
|
157
|
+
name = "getrandom"
|
158
|
+
version = "0.2.15"
|
149
159
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
150
|
-
checksum = "
|
160
|
+
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
|
161
|
+
dependencies = [
|
162
|
+
"cfg-if",
|
163
|
+
"libc",
|
164
|
+
"wasi 0.11.0+wasi-snapshot-preview1",
|
165
|
+
]
|
151
166
|
|
152
167
|
[[package]]
|
153
168
|
name = "getrandom"
|
154
|
-
version = "0.
|
169
|
+
version = "0.3.1"
|
155
170
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
156
|
-
checksum = "
|
171
|
+
checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
|
157
172
|
dependencies = [
|
158
173
|
"cfg-if",
|
159
174
|
"libc",
|
160
|
-
"wasi",
|
175
|
+
"wasi 0.13.3+wasi-0.2.2",
|
176
|
+
"windows-targets",
|
161
177
|
]
|
162
178
|
|
163
179
|
[[package]]
|
@@ -210,16 +226,6 @@ dependencies = [
|
|
210
226
|
"libc",
|
211
227
|
]
|
212
228
|
|
213
|
-
[[package]]
|
214
|
-
name = "kanal"
|
215
|
-
version = "0.1.0-pre8"
|
216
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
217
|
-
checksum = "b05d55519627edaf7fd0f29981f6dc03fb52df3f5b257130eb8d0bf2801ea1d7"
|
218
|
-
dependencies = [
|
219
|
-
"futures-core",
|
220
|
-
"lock_api",
|
221
|
-
]
|
222
|
-
|
223
229
|
[[package]]
|
224
230
|
name = "lazy_static"
|
225
231
|
version = "1.5.0"
|
@@ -259,14 +265,10 @@ dependencies = [
|
|
259
265
|
]
|
260
266
|
|
261
267
|
[[package]]
|
262
|
-
name = "
|
263
|
-
version = "0.4.
|
268
|
+
name = "linux-raw-sys"
|
269
|
+
version = "0.4.15"
|
264
270
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
265
|
-
checksum = "
|
266
|
-
dependencies = [
|
267
|
-
"autocfg",
|
268
|
-
"scopeguard",
|
269
|
-
]
|
271
|
+
checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
|
270
272
|
|
271
273
|
[[package]]
|
272
274
|
name = "magnus"
|
@@ -358,12 +360,12 @@ dependencies = [
|
|
358
360
|
"flate2",
|
359
361
|
"itertools 0.14.0",
|
360
362
|
"jemallocator",
|
361
|
-
"kanal",
|
362
363
|
"magnus 0.7.1",
|
363
364
|
"mimalloc",
|
364
365
|
"rb-sys",
|
365
366
|
"serde",
|
366
367
|
"serde_magnus",
|
368
|
+
"tempfile",
|
367
369
|
"thiserror",
|
368
370
|
]
|
369
371
|
|
@@ -451,16 +453,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
451
453
|
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
452
454
|
|
453
455
|
[[package]]
|
454
|
-
name = "
|
455
|
-
version = "
|
456
|
+
name = "rustix"
|
457
|
+
version = "0.38.44"
|
456
458
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
457
|
-
checksum = "
|
459
|
+
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
|
460
|
+
dependencies = [
|
461
|
+
"bitflags",
|
462
|
+
"errno",
|
463
|
+
"libc",
|
464
|
+
"linux-raw-sys",
|
465
|
+
"windows-sys",
|
466
|
+
]
|
458
467
|
|
459
468
|
[[package]]
|
460
|
-
name = "
|
461
|
-
version = "1.
|
469
|
+
name = "ryu"
|
470
|
+
version = "1.0.18"
|
462
471
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
463
|
-
checksum = "
|
472
|
+
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
|
464
473
|
|
465
474
|
[[package]]
|
466
475
|
name = "seq-macro"
|
@@ -528,6 +537,20 @@ version = "1.0.1"
|
|
528
537
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
529
538
|
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
|
530
539
|
|
540
|
+
[[package]]
|
541
|
+
name = "tempfile"
|
542
|
+
version = "3.17.1"
|
543
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
544
|
+
checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230"
|
545
|
+
dependencies = [
|
546
|
+
"cfg-if",
|
547
|
+
"fastrand",
|
548
|
+
"getrandom 0.3.1",
|
549
|
+
"once_cell",
|
550
|
+
"rustix",
|
551
|
+
"windows-sys",
|
552
|
+
]
|
553
|
+
|
531
554
|
[[package]]
|
532
555
|
name = "thiserror"
|
533
556
|
version = "2.0.9"
|
@@ -566,6 +589,24 @@ version = "0.11.0+wasi-snapshot-preview1"
|
|
566
589
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
567
590
|
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
568
591
|
|
592
|
+
[[package]]
|
593
|
+
name = "wasi"
|
594
|
+
version = "0.13.3+wasi-0.2.2"
|
595
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
596
|
+
checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
|
597
|
+
dependencies = [
|
598
|
+
"wit-bindgen-rt",
|
599
|
+
]
|
600
|
+
|
601
|
+
[[package]]
|
602
|
+
name = "windows-sys"
|
603
|
+
version = "0.59.0"
|
604
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
605
|
+
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
606
|
+
dependencies = [
|
607
|
+
"windows-targets",
|
608
|
+
]
|
609
|
+
|
569
610
|
[[package]]
|
570
611
|
name = "windows-targets"
|
571
612
|
version = "0.52.6"
|
@@ -630,6 +671,15 @@ version = "0.52.6"
|
|
630
671
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
631
672
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
632
673
|
|
674
|
+
[[package]]
|
675
|
+
name = "wit-bindgen-rt"
|
676
|
+
version = "0.33.0"
|
677
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
678
|
+
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
|
679
|
+
dependencies = [
|
680
|
+
"bitflags",
|
681
|
+
]
|
682
|
+
|
633
683
|
[[package]]
|
634
684
|
name = "zerocopy"
|
635
685
|
version = "0.7.35"
|
data/README.md
CHANGED
@@ -143,34 +143,34 @@ OSV - Gzipped Direct 1.000 i/100ms
|
|
143
143
|
FastCSV - Gzipped 1.000 i/100ms
|
144
144
|
CSV - Gzipped 1.000 i/100ms
|
145
145
|
Calculating -------------------------------------
|
146
|
-
CSV - StringIO 0.
|
147
|
-
FastCSV - StringIO 0.
|
148
|
-
OSV - StringIO 0.
|
149
|
-
CSV - Hash output 0.
|
150
|
-
OSV - Hash output 0.
|
151
|
-
CSV - Array output 0.
|
152
|
-
OSV - Array output 0.
|
146
|
+
CSV - StringIO 0.081 (± 0.0%) i/s (12.36 s/i) - 3.000 in 37.155983s
|
147
|
+
FastCSV - StringIO 0.367 (± 0.0%) i/s (2.73 s/i) - 11.000 in 30.182262s
|
148
|
+
OSV - StringIO 0.673 (± 0.0%) i/s (1.49 s/i) - 20.000 in 30.247575s
|
149
|
+
CSV - Hash output 0.056 (± 0.0%) i/s (17.73 s/i) - 2.000 in 35.464673s
|
150
|
+
OSV - Hash output 0.266 (± 0.0%) i/s (3.77 s/i) - 8.000 in 30.511406s
|
151
|
+
CSV - Array output 0.068 (± 0.0%) i/s (14.76 s/i) - 3.000 in 44.371496s
|
152
|
+
OSV - Array output 0.631 (± 0.0%) i/s (1.59 s/i) - 19.000 in 30.896566s
|
153
153
|
FastCSV - Array output
|
154
|
-
0.
|
154
|
+
0.369 (± 0.0%) i/s (2.71 s/i) - 12.000 in 32.518984s
|
155
155
|
OSV - Direct Open Array output
|
156
|
-
0.
|
157
|
-
OSV - Gzipped 0.
|
158
|
-
OSV - Gzipped Direct 0.
|
159
|
-
FastCSV - Gzipped 0.
|
160
|
-
CSV - Gzipped 0.
|
156
|
+
0.642 (± 0.0%) i/s (1.56 s/i) - 19.000 in 30.162703s
|
157
|
+
OSV - Gzipped 0.519 (± 0.0%) i/s (1.93 s/i) - 16.000 in 31.551051s
|
158
|
+
OSV - Gzipped Direct 0.512 (± 0.0%) i/s (1.95 s/i) - 16.000 in 31.630035s
|
159
|
+
FastCSV - Gzipped 0.321 (± 0.0%) i/s (3.12 s/i) - 10.000 in 31.795400s
|
160
|
+
CSV - Gzipped 0.058 (± 0.0%) i/s (17.34 s/i) - 2.000 in 34.686451s
|
161
161
|
|
162
162
|
Comparison:
|
163
|
-
|
164
|
-
OSV - Direct Open Array output:
|
165
|
-
|
166
|
-
OSV - Gzipped
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
FastCSV -
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
163
|
+
OSV - StringIO: 0.7 i/s
|
164
|
+
OSV - Direct Open Array output: 0.6 i/s - 1.05x slower
|
165
|
+
OSV - Array output: 0.6 i/s - 1.07x slower
|
166
|
+
OSV - Gzipped: 0.5 i/s - 1.30x slower
|
167
|
+
OSV - Gzipped Direct: 0.5 i/s - 1.31x slower
|
168
|
+
FastCSV - Array output: 0.4 i/s - 1.82x slower
|
169
|
+
FastCSV - StringIO: 0.4 i/s - 1.83x slower
|
170
|
+
FastCSV - Gzipped: 0.3 i/s - 2.10x slower
|
171
|
+
OSV - Hash output: 0.3 i/s - 2.53x slower
|
172
|
+
CSV - StringIO: 0.1 i/s - 8.31x slower
|
173
|
+
CSV - Array output: 0.1 i/s - 9.93x slower
|
174
|
+
CSV - Gzipped: 0.1 i/s - 11.66x slower
|
175
|
+
CSV - Hash output: 0.1 i/s - 11.92x slower
|
176
176
|
```
|
data/ext/osv/Cargo.toml
CHANGED
@@ -10,13 +10,13 @@ crate-type = ["cdylib"]
|
|
10
10
|
ahash = "0.8"
|
11
11
|
csv = "^1.3"
|
12
12
|
flate2 = "1.0.35"
|
13
|
-
kanal = "0.1.0-pre8"
|
14
13
|
magnus = { version = "0.7", features = ["rb-sys"] }
|
15
14
|
rb-sys = "^0.9"
|
16
15
|
serde = { version = "1.0", features = ["derive"] }
|
17
16
|
serde_magnus = "0.8.1"
|
18
17
|
thiserror = "2.0"
|
19
18
|
itertools = "^0.14"
|
19
|
+
tempfile = "3.17.1"
|
20
20
|
|
21
21
|
[target.'cfg(target_os = "linux")'.dependencies]
|
22
22
|
jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
|
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -2,19 +2,13 @@ use super::{
|
|
2
2
|
header_cache::{CacheError, StringCache},
|
3
3
|
parser::RecordParser,
|
4
4
|
record_reader::{RecordReader, READ_BUFFER_SIZE},
|
5
|
-
ruby_reader::
|
6
|
-
ForgottenFileHandle,
|
5
|
+
ruby_reader::RubyReader,
|
7
6
|
};
|
8
|
-
use
|
9
|
-
use magnus::{rb_sys::AsRawValue, value::ReprValue, Error as MagnusError, RString, Ruby, Value};
|
7
|
+
use magnus::{Error as MagnusError, RString, Ruby, Value};
|
10
8
|
use std::{
|
11
9
|
borrow::Cow,
|
12
|
-
|
13
|
-
fs::File,
|
14
|
-
io::{self, BufReader, Read},
|
10
|
+
io::{self, BufReader},
|
15
11
|
marker::PhantomData,
|
16
|
-
mem::ManuallyDrop,
|
17
|
-
os::fd::FromRawFd,
|
18
12
|
};
|
19
13
|
|
20
14
|
use thiserror::Error;
|
@@ -159,48 +153,9 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
159
153
|
self
|
160
154
|
}
|
161
155
|
|
162
|
-
/// Handles reading from a file descriptor.
|
163
|
-
fn handle_file_descriptor(&self) -> Result<Box<dyn SeekableRead>, ReaderError> {
|
164
|
-
let raw_value = self.to_read.as_raw();
|
165
|
-
let fd = std::panic::catch_unwind(|| unsafe { rb_sys::rb_io_descriptor(raw_value) })
|
166
|
-
.map_err(|e| ReaderError::FileDescriptor(format!("{:?}", e)))?;
|
167
|
-
|
168
|
-
if fd < 0 {
|
169
|
-
return Err(ReaderError::InvalidFileDescriptor(fd));
|
170
|
-
}
|
171
|
-
|
172
|
-
let file = std::panic::catch_unwind(|| unsafe { File::from_raw_fd(fd) })
|
173
|
-
.map_err(|e| ReaderError::FileDescriptor(format!("{:?}", e)))?;
|
174
|
-
let forgotten = ForgottenFileHandle(ManuallyDrop::new(file));
|
175
|
-
Ok(Box::new(forgotten))
|
176
|
-
}
|
177
|
-
|
178
|
-
/// Handles reading from a file path.
|
179
|
-
fn handle_file_path(&self) -> Result<Box<dyn SeekableRead>, ReaderError> {
|
180
|
-
let path = self.to_read.to_r_string()?.to_string()?;
|
181
|
-
let file = File::open(&path)?;
|
182
|
-
|
183
|
-
if path.ends_with(".gz") {
|
184
|
-
// For gzipped files, we need to decompress them into memory first
|
185
|
-
// since GzDecoder doesn't support seeking
|
186
|
-
let mut decoder = GzDecoder::new(BufReader::with_capacity(READ_BUFFER_SIZE, file));
|
187
|
-
let mut contents = Vec::new();
|
188
|
-
decoder.read_to_end(&mut contents)?;
|
189
|
-
Ok(Box::new(std::io::Cursor::new(contents)))
|
190
|
-
} else {
|
191
|
-
Ok(Box::new(file))
|
192
|
-
}
|
193
|
-
}
|
194
|
-
|
195
156
|
/// Builds the RecordReader with the configured options.
|
196
157
|
pub fn build(self) -> Result<RecordReader<'a, T>, ReaderError> {
|
197
|
-
let readable =
|
198
|
-
self.handle_file_descriptor()?
|
199
|
-
} else if self.to_read.is_kind_of(self.ruby.class_string()) {
|
200
|
-
self.handle_file_path()?
|
201
|
-
} else {
|
202
|
-
build_ruby_reader(&self.ruby, self.to_read)?
|
203
|
-
};
|
158
|
+
let readable = RubyReader::try_from(self.to_read)?;
|
204
159
|
|
205
160
|
let flexible = self.flexible;
|
206
161
|
let reader = BufReader::with_capacity(READ_BUFFER_SIZE, readable);
|
data/ext/osv/src/csv/mod.rs
CHANGED
@@ -3,11 +3,9 @@ mod header_cache;
|
|
3
3
|
mod parser;
|
4
4
|
mod record;
|
5
5
|
mod record_reader;
|
6
|
-
mod ruby_integration;
|
7
6
|
mod ruby_reader;
|
8
7
|
|
9
8
|
pub use builder::RecordReaderBuilder;
|
10
9
|
pub use header_cache::StringCacheKey;
|
11
10
|
pub use record::CowStr;
|
12
11
|
pub use record::CsvRecord;
|
13
|
-
pub use ruby_integration::*;
|
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
use super::builder::ReaderError;
|
2
2
|
use super::header_cache::StringCacheKey;
|
3
3
|
use super::parser::{CsvRecordType, RecordParser};
|
4
|
-
use super::ruby_reader::
|
4
|
+
use super::ruby_reader::RubyReader;
|
5
5
|
use magnus::{Error, Ruby};
|
6
6
|
use std::borrow::Cow;
|
7
7
|
use std::io::{BufReader, Read};
|
@@ -13,7 +13,7 @@ pub(crate) const READ_BUFFER_SIZE: usize = 16384;
|
|
13
13
|
///
|
14
14
|
/// This struct implements Iterator to provide a streaming interface for CSV records.
|
15
15
|
pub struct RecordReader<'a, T: RecordParser<'a>> {
|
16
|
-
reader: csv::Reader<BufReader<
|
16
|
+
reader: csv::Reader<BufReader<RubyReader>>,
|
17
17
|
headers: Vec<StringCacheKey>,
|
18
18
|
null_string: Option<Cow<'a, str>>,
|
19
19
|
string_record: CsvRecordType,
|
@@ -73,7 +73,7 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
|
|
73
73
|
|
74
74
|
/// Creates a new RecordReader instance.
|
75
75
|
pub(crate) fn new(
|
76
|
-
reader: csv::Reader<BufReader<
|
76
|
+
reader: csv::Reader<BufReader<RubyReader>>,
|
77
77
|
headers: Vec<StringCacheKey>,
|
78
78
|
null_string: Option<Cow<'a, str>>,
|
79
79
|
ignore_null_bytes: bool,
|
@@ -1,178 +1,128 @@
|
|
1
|
+
use flate2::bufread::GzDecoder;
|
1
2
|
use magnus::{
|
2
|
-
error::Error as MagnusError,
|
3
3
|
value::{Opaque, ReprValue},
|
4
4
|
RClass, RString, Ruby, Value,
|
5
5
|
};
|
6
|
-
use std::
|
7
|
-
|
8
|
-
|
6
|
+
use std::{
|
7
|
+
fs::File,
|
8
|
+
io::{self, BufReader, Read, Write},
|
9
|
+
sync::OnceLock,
|
10
|
+
};
|
9
11
|
|
10
|
-
use super::{builder::ReaderError,
|
12
|
+
use super::{builder::ReaderError, record_reader::READ_BUFFER_SIZE};
|
11
13
|
|
12
14
|
static STRING_IO_CLASS: OnceLock<Opaque<RClass>> = OnceLock::new();
|
13
15
|
|
14
16
|
/// A reader that can handle various Ruby input types (String, StringIO, IO-like objects)
|
15
17
|
/// and provide a standard Read implementation for them.
|
16
|
-
pub
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
impl SeekableRead for ForgottenFileHandle {}
|
28
|
-
|
29
|
-
pub fn build_ruby_reader(ruby: &Ruby, input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
|
30
|
-
if RubyReader::is_string_io(ruby, &input) {
|
31
|
-
RubyReader::from_string_io(ruby, input)
|
32
|
-
} else if RubyReader::is_io_like(&input) {
|
33
|
-
RubyReader::from_io(input)
|
34
|
-
} else {
|
35
|
-
RubyReader::from_string_like(input)
|
36
|
-
}
|
18
|
+
pub enum RubyReader {
|
19
|
+
String {
|
20
|
+
inner: Opaque<RString>,
|
21
|
+
offset: usize,
|
22
|
+
},
|
23
|
+
RubyIoLike {
|
24
|
+
inner: Opaque<Value>,
|
25
|
+
},
|
26
|
+
NativeProxyIoLike {
|
27
|
+
proxy_file: Box<dyn Read>,
|
28
|
+
},
|
37
29
|
}
|
38
30
|
|
39
|
-
impl
|
40
|
-
fn
|
41
|
-
let
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
};
|
46
|
-
|
47
|
-
let new_position = self
|
48
|
-
.inner
|
49
|
-
.funcall("seek", (offset, whence))
|
50
|
-
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
51
|
-
|
52
|
-
Ok(new_position)
|
53
|
-
}
|
54
|
-
}
|
55
|
-
|
56
|
-
impl Write for RubyReader<Value> {
|
57
|
-
fn write(&mut self, buf: &[u8]) -> Result<usize, io::Error> {
|
58
|
-
let ruby_bytes = RString::from_slice(buf);
|
59
|
-
|
60
|
-
let bytes_written = self
|
61
|
-
.inner
|
62
|
-
.funcall::<_, _, usize>("write", (ruby_bytes,))
|
63
|
-
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
64
|
-
|
65
|
-
Ok(bytes_written)
|
66
|
-
}
|
67
|
-
|
68
|
-
fn flush(&mut self) -> Result<(), io::Error> {
|
69
|
-
self.inner
|
70
|
-
.funcall::<_, _, Value>("flush", ())
|
71
|
-
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
72
|
-
|
73
|
-
Ok(())
|
31
|
+
impl RubyReader {
|
32
|
+
fn is_string_io(ruby: &Ruby, value: &Value) -> bool {
|
33
|
+
let string_io_class = STRING_IO_CLASS.get_or_init(|| {
|
34
|
+
let class = RClass::from_value(ruby.eval("StringIO").expect("Failed to find StringIO"))
|
35
|
+
.expect("Failed to get StringIO class");
|
36
|
+
Opaque::from(class)
|
37
|
+
});
|
38
|
+
value.is_kind_of(ruby.get_inner(*string_io_class))
|
74
39
|
}
|
75
|
-
}
|
76
40
|
|
77
|
-
|
78
|
-
|
79
|
-
match pos {
|
80
|
-
io::SeekFrom::Start(offset) => self.offset = offset as usize,
|
81
|
-
io::SeekFrom::Current(offset) => self.offset = (self.offset as i64 + offset) as usize,
|
82
|
-
io::SeekFrom::End(offset) => self.offset = self.inner.len() - offset as usize,
|
83
|
-
}
|
84
|
-
Ok(self.offset as u64)
|
41
|
+
fn is_io_like(value: &Value) -> bool {
|
42
|
+
value.respond_to("read", false).unwrap_or(false)
|
85
43
|
}
|
86
44
|
}
|
87
45
|
|
88
|
-
impl
|
89
|
-
|
90
|
-
|
91
|
-
|
46
|
+
impl TryFrom<Value> for RubyReader {
|
47
|
+
type Error = ReaderError;
|
48
|
+
|
49
|
+
fn try_from(value: Value) -> Result<Self, Self::Error> {
|
50
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
51
|
+
if RubyReader::is_string_io(&ruby, &value) {
|
52
|
+
let string_content = value.funcall::<_, _, RString>("string", ())?;
|
53
|
+
Ok(RubyReader::String {
|
54
|
+
inner: Opaque::from(string_content),
|
55
|
+
offset: 0,
|
56
|
+
})
|
57
|
+
} else if RubyReader::is_io_like(&value) {
|
58
|
+
Ok(RubyReader::RubyIoLike {
|
59
|
+
inner: Opaque::from(value),
|
60
|
+
})
|
61
|
+
} else if value.is_kind_of(ruby.class_string()) {
|
62
|
+
let ruby_string = value.to_r_string()?;
|
63
|
+
let file_path = unsafe { ruby_string.as_str()? };
|
64
|
+
let file = File::open(&file_path)?;
|
65
|
+
|
66
|
+
let x: Box<dyn Read> = if file_path.ends_with(".gz") {
|
67
|
+
let decoder = GzDecoder::new(BufReader::with_capacity(READ_BUFFER_SIZE, file));
|
68
|
+
Box::new(decoder)
|
69
|
+
} else {
|
70
|
+
Box::new(file)
|
71
|
+
};
|
72
|
+
|
73
|
+
Ok(RubyReader::NativeProxyIoLike { proxy_file: x })
|
92
74
|
} else {
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
input.respond_to("read", false).unwrap_or(false)
|
102
|
-
}
|
103
|
-
|
104
|
-
fn from_io_like(input: Value) -> Self {
|
105
|
-
Self {
|
106
|
-
inner: input,
|
107
|
-
offset: 0,
|
75
|
+
// Try calling `to_str`, and if that fails, try `to_s`
|
76
|
+
let string_content = value
|
77
|
+
.funcall::<_, _, RString>("to_str", ())
|
78
|
+
.or_else(|_| value.funcall::<_, _, RString>("to_s", ()))?;
|
79
|
+
Ok(RubyReader::String {
|
80
|
+
inner: Opaque::from(string_content),
|
81
|
+
offset: 0,
|
82
|
+
})
|
108
83
|
}
|
109
84
|
}
|
110
85
|
}
|
111
86
|
|
112
|
-
impl RubyReader
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
}
|
120
|
-
|
121
|
-
let string_content = input.funcall::<_, _, RString>("string", ()).unwrap();
|
122
|
-
Ok(Box::new(Self {
|
123
|
-
inner: string_content,
|
124
|
-
offset: 0,
|
125
|
-
}))
|
126
|
-
}
|
87
|
+
impl Read for RubyReader {
|
88
|
+
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
|
89
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
90
|
+
match self {
|
91
|
+
RubyReader::NativeProxyIoLike { proxy_file } => proxy_file.read(buf),
|
92
|
+
RubyReader::String { inner, offset } => {
|
93
|
+
let unwrapped_inner = ruby.get_inner(*inner);
|
127
94
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
});
|
133
|
-
input.is_kind_of(ruby.get_inner(*string_io_class))
|
134
|
-
}
|
95
|
+
let string_buffer = unsafe { unwrapped_inner.as_slice() };
|
96
|
+
if *offset >= string_buffer.len() {
|
97
|
+
return Ok(0); // EOF
|
98
|
+
}
|
135
99
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
.or_else(|_| input.funcall::<_, _, RString>("to_s", ()))?;
|
100
|
+
let remaining = string_buffer.len() - *offset;
|
101
|
+
let copy_size = remaining.min(buf.len());
|
102
|
+
buf[..copy_size].copy_from_slice(&string_buffer[*offset..*offset + copy_size]);
|
140
103
|
|
141
|
-
|
142
|
-
inner: string_content,
|
143
|
-
offset: 0,
|
144
|
-
}))
|
145
|
-
}
|
146
|
-
}
|
104
|
+
*offset += copy_size;
|
147
105
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
106
|
+
Ok(copy_size)
|
107
|
+
}
|
108
|
+
RubyReader::RubyIoLike { inner } => {
|
109
|
+
let unwrapped_inner = ruby.get_inner(*inner);
|
110
|
+
|
111
|
+
let bytes = unwrapped_inner
|
112
|
+
.funcall::<_, _, Option<RString>>("read", (buf.len(),))
|
113
|
+
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
114
|
+
|
115
|
+
match bytes {
|
116
|
+
Some(bytes) => {
|
117
|
+
let string_buffer = unsafe { bytes.as_slice() };
|
118
|
+
buf.write_all(string_buffer)?;
|
119
|
+
Ok(string_buffer.len())
|
120
|
+
}
|
121
|
+
None => {
|
122
|
+
return Ok(0);
|
123
|
+
}
|
124
|
+
}
|
159
125
|
}
|
160
|
-
None => Ok(0), // EOF
|
161
|
-
}
|
162
|
-
}
|
163
|
-
}
|
164
|
-
|
165
|
-
impl Read for RubyReader<RString> {
|
166
|
-
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
167
|
-
let string_buffer = unsafe { self.inner.as_slice() };
|
168
|
-
if self.offset >= string_buffer.len() {
|
169
|
-
return Ok(0); // EOF
|
170
126
|
}
|
171
|
-
|
172
|
-
let remaining = string_buffer.len() - self.offset;
|
173
|
-
let copy_size = remaining.min(buf.len());
|
174
|
-
buf[..copy_size].copy_from_slice(&string_buffer[self.offset..self.offset + copy_size]);
|
175
|
-
self.offset += copy_size;
|
176
|
-
Ok(copy_size)
|
177
127
|
}
|
178
128
|
}
|
data/ext/osv/src/reader.rs
CHANGED
@@ -3,7 +3,7 @@ use crate::utils::*;
|
|
3
3
|
use ahash::RandomState;
|
4
4
|
use csv::Trim;
|
5
5
|
use magnus::value::ReprValue;
|
6
|
-
use magnus::{Error, IntoValue, KwArgs,
|
6
|
+
use magnus::{Error, IntoValue, KwArgs, Ruby, Symbol, Value};
|
7
7
|
use std::collections::HashMap;
|
8
8
|
|
9
9
|
/// Valid result types for CSV parsing
|
@@ -62,24 +62,27 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
|
|
62
62
|
} = parse_read_csv_args(&ruby, args)?;
|
63
63
|
|
64
64
|
if !ruby.block_given() {
|
65
|
-
return create_enumerator(
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
65
|
+
return create_enumerator(
|
66
|
+
&ruby,
|
67
|
+
EnumeratorArgs {
|
68
|
+
rb_self,
|
69
|
+
to_read,
|
70
|
+
has_headers,
|
71
|
+
delimiter,
|
72
|
+
quote_char,
|
73
|
+
null_string,
|
74
|
+
result_type,
|
75
|
+
flexible,
|
76
|
+
trim: match trim {
|
77
|
+
Trim::All => Some("all".to_string()),
|
78
|
+
Trim::Headers => Some("headers".to_string()),
|
79
|
+
Trim::Fields => Some("fields".to_string()),
|
80
|
+
_ => None,
|
81
|
+
},
|
82
|
+
ignore_null_bytes,
|
83
|
+
lossy,
|
79
84
|
},
|
80
|
-
|
81
|
-
lossy,
|
82
|
-
})
|
85
|
+
)
|
83
86
|
.map(|yield_enum| yield_enum.into_value_with(&ruby));
|
84
87
|
}
|
85
88
|
|
@@ -136,8 +139,8 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
|
|
136
139
|
}
|
137
140
|
|
138
141
|
/// Creates an enumerator for lazy CSV parsing
|
139
|
-
fn create_enumerator(args: EnumeratorArgs) -> Result<magnus::Enumerator, Error> {
|
140
|
-
let kwargs =
|
142
|
+
fn create_enumerator(ruby: &Ruby, args: EnumeratorArgs) -> Result<magnus::Enumerator, Error> {
|
143
|
+
let kwargs = ruby.hash_new();
|
141
144
|
kwargs.aset(Symbol::new("has_headers"), args.has_headers)?;
|
142
145
|
kwargs.aset(
|
143
146
|
Symbol::new("col_sep"),
|
data/lib/osv/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-02-
|
11
|
+
date: 2025-02-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -66,7 +66,6 @@ files:
|
|
66
66
|
- ext/osv/src/csv/parser.rs
|
67
67
|
- ext/osv/src/csv/record.rs
|
68
68
|
- ext/osv/src/csv/record_reader.rs
|
69
|
-
- ext/osv/src/csv/ruby_integration.rs
|
70
69
|
- ext/osv/src/csv/ruby_reader.rs
|
71
70
|
- ext/osv/src/lib.rs
|
72
71
|
- ext/osv/src/reader.rs
|
@@ -1,19 +0,0 @@
|
|
1
|
-
use std::{
|
2
|
-
fs::File,
|
3
|
-
io::{self, Read, Seek, SeekFrom},
|
4
|
-
mem::ManuallyDrop,
|
5
|
-
};
|
6
|
-
|
7
|
-
pub struct ForgottenFileHandle(pub ManuallyDrop<File>);
|
8
|
-
|
9
|
-
impl Read for ForgottenFileHandle {
|
10
|
-
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
11
|
-
self.0.read(buf)
|
12
|
-
}
|
13
|
-
}
|
14
|
-
|
15
|
-
impl Seek for ForgottenFileHandle {
|
16
|
-
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
|
17
|
-
self.0.seek(pos)
|
18
|
-
}
|
19
|
-
}
|