osv 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cae389fff24c3109f17a1c450022771da964e1b7dced4ed2f34f93753c213dc8
4
- data.tar.gz: 1e3736fb0b84003f62a5038a3d7e71e7d6e31581f943452c2acb08b04a21ff64
3
+ metadata.gz: a791a08d1a1c44fcc4e69df668e1c04e9e099e8388d048f7e058aea2a7ee3305
4
+ data.tar.gz: 2616a3ab845191c29fb3f852f8a1a3a1cc51c33d39e5f4bc7b0da3ae733bf9f3
5
5
  SHA512:
6
- metadata.gz: ddae565f1b208de7fc18fa1cdaff7b3d1ed02ec22bf85d0477a9b152d9238049893a5ef40bad95ac4b2d8f8cb0cd59f14fd4e365aa69f3ef109fa6f6701d2499
7
- data.tar.gz: aa81197d39f7e3dcc5732bfb7d71545cd9303d888ee7da1a34b0244ade287a8b25db16f7428d4f09689dfe1d874d3663bebb6d546e6b363f7496137339b15150
6
+ metadata.gz: 4d1a816e49ffbf62dcd33dd8a2d3c49f32bac34ddfe2681e58a55839e33afadf4487c7e216e67bf878071272d0fc129370a598657fb3fb0bc8ec6c7934e6de3d
7
+ data.tar.gz: 67f8f1416e8f11cac750e60b46437293bd41421a2ea3de9b5557a78ea78c34e2be8251a62c9650f16af449c0f534d807c7aabd838c274486d3cfe0eaef7eecde
data/Cargo.lock CHANGED
@@ -15,7 +15,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
15
15
  checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
16
16
  dependencies = [
17
17
  "cfg-if",
18
- "getrandom",
18
+ "getrandom 0.2.15",
19
19
  "once_cell",
20
20
  "version_check",
21
21
  "zerocopy",
@@ -30,12 +30,6 @@ dependencies = [
30
30
  "memchr",
31
31
  ]
32
32
 
33
- [[package]]
34
- name = "autocfg"
35
- version = "1.4.0"
36
- source = "registry+https://github.com/rust-lang/crates.io-index"
37
- checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
38
-
39
33
  [[package]]
40
34
  name = "bindgen"
41
35
  version = "0.69.5"
@@ -133,6 +127,22 @@ version = "1.13.0"
133
127
  source = "registry+https://github.com/rust-lang/crates.io-index"
134
128
  checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
135
129
 
130
+ [[package]]
131
+ name = "errno"
132
+ version = "0.3.10"
133
+ source = "registry+https://github.com/rust-lang/crates.io-index"
134
+ checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
135
+ dependencies = [
136
+ "libc",
137
+ "windows-sys",
138
+ ]
139
+
140
+ [[package]]
141
+ name = "fastrand"
142
+ version = "2.3.0"
143
+ source = "registry+https://github.com/rust-lang/crates.io-index"
144
+ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
145
+
136
146
  [[package]]
137
147
  name = "flate2"
138
148
  version = "1.0.35"
@@ -144,20 +154,26 @@ dependencies = [
144
154
  ]
145
155
 
146
156
  [[package]]
147
- name = "futures-core"
148
- version = "0.3.31"
157
+ name = "getrandom"
158
+ version = "0.2.15"
149
159
  source = "registry+https://github.com/rust-lang/crates.io-index"
150
- checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
160
+ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
161
+ dependencies = [
162
+ "cfg-if",
163
+ "libc",
164
+ "wasi 0.11.0+wasi-snapshot-preview1",
165
+ ]
151
166
 
152
167
  [[package]]
153
168
  name = "getrandom"
154
- version = "0.2.15"
169
+ version = "0.3.1"
155
170
  source = "registry+https://github.com/rust-lang/crates.io-index"
156
- checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
171
+ checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
157
172
  dependencies = [
158
173
  "cfg-if",
159
174
  "libc",
160
- "wasi",
175
+ "wasi 0.13.3+wasi-0.2.2",
176
+ "windows-targets",
161
177
  ]
162
178
 
163
179
  [[package]]
@@ -210,16 +226,6 @@ dependencies = [
210
226
  "libc",
211
227
  ]
212
228
 
213
- [[package]]
214
- name = "kanal"
215
- version = "0.1.0-pre8"
216
- source = "registry+https://github.com/rust-lang/crates.io-index"
217
- checksum = "b05d55519627edaf7fd0f29981f6dc03fb52df3f5b257130eb8d0bf2801ea1d7"
218
- dependencies = [
219
- "futures-core",
220
- "lock_api",
221
- ]
222
-
223
229
  [[package]]
224
230
  name = "lazy_static"
225
231
  version = "1.5.0"
@@ -259,14 +265,10 @@ dependencies = [
259
265
  ]
260
266
 
261
267
  [[package]]
262
- name = "lock_api"
263
- version = "0.4.12"
268
+ name = "linux-raw-sys"
269
+ version = "0.4.15"
264
270
  source = "registry+https://github.com/rust-lang/crates.io-index"
265
- checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
266
- dependencies = [
267
- "autocfg",
268
- "scopeguard",
269
- ]
271
+ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
270
272
 
271
273
  [[package]]
272
274
  name = "magnus"
@@ -358,12 +360,12 @@ dependencies = [
358
360
  "flate2",
359
361
  "itertools 0.14.0",
360
362
  "jemallocator",
361
- "kanal",
362
363
  "magnus 0.7.1",
363
364
  "mimalloc",
364
365
  "rb-sys",
365
366
  "serde",
366
367
  "serde_magnus",
368
+ "tempfile",
367
369
  "thiserror",
368
370
  ]
369
371
 
@@ -451,16 +453,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
451
453
  checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
452
454
 
453
455
  [[package]]
454
- name = "ryu"
455
- version = "1.0.18"
456
+ name = "rustix"
457
+ version = "0.38.44"
456
458
  source = "registry+https://github.com/rust-lang/crates.io-index"
457
- checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
459
+ checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
460
+ dependencies = [
461
+ "bitflags",
462
+ "errno",
463
+ "libc",
464
+ "linux-raw-sys",
465
+ "windows-sys",
466
+ ]
458
467
 
459
468
  [[package]]
460
- name = "scopeguard"
461
- version = "1.2.0"
469
+ name = "ryu"
470
+ version = "1.0.18"
462
471
  source = "registry+https://github.com/rust-lang/crates.io-index"
463
- checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
472
+ checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
464
473
 
465
474
  [[package]]
466
475
  name = "seq-macro"
@@ -528,6 +537,20 @@ version = "1.0.1"
528
537
  source = "registry+https://github.com/rust-lang/crates.io-index"
529
538
  checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
530
539
 
540
+ [[package]]
541
+ name = "tempfile"
542
+ version = "3.17.1"
543
+ source = "registry+https://github.com/rust-lang/crates.io-index"
544
+ checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230"
545
+ dependencies = [
546
+ "cfg-if",
547
+ "fastrand",
548
+ "getrandom 0.3.1",
549
+ "once_cell",
550
+ "rustix",
551
+ "windows-sys",
552
+ ]
553
+
531
554
  [[package]]
532
555
  name = "thiserror"
533
556
  version = "2.0.9"
@@ -566,6 +589,24 @@ version = "0.11.0+wasi-snapshot-preview1"
566
589
  source = "registry+https://github.com/rust-lang/crates.io-index"
567
590
  checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
568
591
 
592
+ [[package]]
593
+ name = "wasi"
594
+ version = "0.13.3+wasi-0.2.2"
595
+ source = "registry+https://github.com/rust-lang/crates.io-index"
596
+ checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
597
+ dependencies = [
598
+ "wit-bindgen-rt",
599
+ ]
600
+
601
+ [[package]]
602
+ name = "windows-sys"
603
+ version = "0.59.0"
604
+ source = "registry+https://github.com/rust-lang/crates.io-index"
605
+ checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
606
+ dependencies = [
607
+ "windows-targets",
608
+ ]
609
+
569
610
  [[package]]
570
611
  name = "windows-targets"
571
612
  version = "0.52.6"
@@ -630,6 +671,15 @@ version = "0.52.6"
630
671
  source = "registry+https://github.com/rust-lang/crates.io-index"
631
672
  checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
632
673
 
674
+ [[package]]
675
+ name = "wit-bindgen-rt"
676
+ version = "0.33.0"
677
+ source = "registry+https://github.com/rust-lang/crates.io-index"
678
+ checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
679
+ dependencies = [
680
+ "bitflags",
681
+ ]
682
+
633
683
  [[package]]
634
684
  name = "zerocopy"
635
685
  version = "0.7.35"
data/README.md CHANGED
@@ -143,34 +143,34 @@ OSV - Gzipped Direct 1.000 i/100ms
143
143
  FastCSV - Gzipped 1.000 i/100ms
144
144
  CSV - Gzipped 1.000 i/100ms
145
145
  Calculating -------------------------------------
146
- CSV - StringIO 0.083 (± 0.0%) i/s (12.06 s/i) - 3.000 in 36.304469s
147
- FastCSV - StringIO 0.335 (± 0.0%) i/s (2.98 s/i) - 10.000 in 31.019521s
148
- OSV - StringIO 0.705 (± 0.0%) i/s (1.42 s/i) - 21.000 in 30.629511s
149
- CSV - Hash output 0.060 (± 0.0%) i/s (16.74 s/i) - 2.000 in 33.475977s
150
- OSV - Hash output 0.434 (± 0.0%) i/s (2.30 s/i) - 13.000 in 30.071679s
151
- CSV - Array output 0.063 (± 0.0%) i/s (15.88 s/i) - 2.000 in 32.229906s
152
- OSV - Array output 0.406 (± 0.0%) i/s (2.47 s/i) - 12.000 in 31.072600s
146
+ CSV - StringIO 0.081 (± 0.0%) i/s (12.36 s/i) - 3.000 in 37.155983s
147
+ FastCSV - StringIO 0.367 (± 0.0%) i/s (2.73 s/i) - 11.000 in 30.182262s
148
+ OSV - StringIO 0.673 (± 0.0%) i/s (1.49 s/i) - 20.000 in 30.247575s
149
+ CSV - Hash output 0.056 (± 0.0%) i/s (17.73 s/i) - 2.000 in 35.464673s
150
+ OSV - Hash output 0.266 (± 0.0%) i/s (3.77 s/i) - 8.000 in 30.511406s
151
+ CSV - Array output 0.068 (± 0.0%) i/s (14.76 s/i) - 3.000 in 44.371496s
152
+ OSV - Array output 0.631 (± 0.0%) i/s (1.59 s/i) - 19.000 in 30.896566s
153
153
  FastCSV - Array output
154
- 0.321 (± 0.0%) i/s (3.11 s/i) - 10.000 in 31.458966s
154
+ 0.369 (± 0.0%) i/s (2.71 s/i) - 12.000 in 32.518984s
155
155
  OSV - Direct Open Array output
156
- 0.686 (± 0.0%) i/s (1.46 s/i) - 21.000 in 30.639715s
157
- OSV - Gzipped 0.524 (± 0.0%) i/s (1.91 s/i) - 16.000 in 30.695259s
158
- OSV - Gzipped Direct 0.519 (± 0.0%) i/s (1.93 s/i) - 16.000 in 30.830005s
159
- FastCSV - Gzipped 0.313 (± 0.0%) i/s (3.20 s/i) - 10.000 in 32.031002s
160
- CSV - Gzipped 0.057 (± 0.0%) i/s (17.55 s/i) - 2.000 in 35.107808s
156
+ 0.642 (± 0.0%) i/s (1.56 s/i) - 19.000 in 30.162703s
157
+ OSV - Gzipped 0.519 (± 0.0%) i/s (1.93 s/i) - 16.000 in 31.551051s
158
+ OSV - Gzipped Direct 0.512 (± 0.0%) i/s (1.95 s/i) - 16.000 in 31.630035s
159
+ FastCSV - Gzipped 0.321 (± 0.0%) i/s (3.12 s/i) - 10.000 in 31.795400s
160
+ CSV - Gzipped 0.058 (± 0.0%) i/s (17.34 s/i) - 2.000 in 34.686451s
161
161
 
162
162
  Comparison:
163
- OSV - StringIO : 0.7 i/s
164
- OSV - Direct Open Array output: 0.7 i/s - 1.03x slower
165
- OSV - Gzipped : 0.5 i/s - 1.34x slower
166
- OSV - Gzipped Direct : 0.5 i/s - 1.36x slower
167
- OSV - Hash output : 0.4 i/s - 1.62x slower
168
- OSV - Array output : 0.4 i/s - 1.74x slower
169
- FastCSV - StringIO : 0.3 i/s - 2.10x slower
170
- FastCSV - Array output : 0.3 i/s - 2.20x slower
171
- FastCSV - Gzipped : 0.3 i/s - 2.26x slower
172
- CSV - StringIO : 0.1 i/s - 8.50x slower
173
- CSV - Array output : 0.1 i/s - 11.20x slower
174
- CSV - Hash output : 0.1 i/s - 11.80x slower
175
- CSV - Gzipped : 0.1 i/s - 12.37x slower
163
+ OSV - StringIO: 0.7 i/s
164
+ OSV - Direct Open Array output: 0.6 i/s - 1.05x slower
165
+ OSV - Array output: 0.6 i/s - 1.07x slower
166
+ OSV - Gzipped: 0.5 i/s - 1.30x slower
167
+ OSV - Gzipped Direct: 0.5 i/s - 1.31x slower
168
+ FastCSV - Array output: 0.4 i/s - 1.82x slower
169
+ FastCSV - StringIO: 0.4 i/s - 1.83x slower
170
+ FastCSV - Gzipped: 0.3 i/s - 2.10x slower
171
+ OSV - Hash output: 0.3 i/s - 2.53x slower
172
+ CSV - StringIO: 0.1 i/s - 8.31x slower
173
+ CSV - Array output: 0.1 i/s - 9.93x slower
174
+ CSV - Gzipped: 0.1 i/s - 11.66x slower
175
+ CSV - Hash output: 0.1 i/s - 11.92x slower
176
176
  ```
data/Rakefile CHANGED
@@ -7,9 +7,24 @@ task default: :test
7
7
 
8
8
  GEMSPEC = Gem::Specification.load("osv.gemspec")
9
9
 
10
+ platforms = [
11
+ "x86_64-linux",
12
+ "x86_64-linux-musl",
13
+ "aarch64-linux",
14
+ "aarch64-linux-musl",
15
+ "x86_64-darwin",
16
+ "arm64-darwin"
17
+ ]
18
+
10
19
  RbSys::ExtensionTask.new("osv", GEMSPEC) do |ext|
11
20
  ext.lib_dir = "lib/osv"
12
21
  ext.ext_dir = "ext/osv"
22
+ ext.cross_compile = true
23
+ ext.cross_platform = platforms
24
+ ext.cross_compiling do |spec|
25
+ spec.dependencies.reject! { |dep| dep.name == "rb_sys" }
26
+ spec.files.reject! { |file| File.fnmatch?("ext/*", file, File::FNM_EXTGLOB) }
27
+ end
13
28
  end
14
29
 
15
30
  Rake::TestTask.new do |t|
data/ext/osv/Cargo.toml CHANGED
@@ -10,13 +10,13 @@ crate-type = ["cdylib"]
10
10
  ahash = "0.8"
11
11
  csv = "^1.3"
12
12
  flate2 = "1.0.35"
13
- kanal = "0.1.0-pre8"
14
13
  magnus = { version = "0.7", features = ["rb-sys"] }
15
14
  rb-sys = "^0.9"
16
15
  serde = { version = "1.0", features = ["derive"] }
17
16
  serde_magnus = "0.8.1"
18
17
  thiserror = "2.0"
19
18
  itertools = "^0.14"
19
+ tempfile = "3.17.1"
20
20
 
21
21
  [target.'cfg(target_os = "linux")'.dependencies]
22
22
  jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
@@ -2,19 +2,13 @@ use super::{
2
2
  header_cache::{CacheError, StringCache},
3
3
  parser::RecordParser,
4
4
  record_reader::{RecordReader, READ_BUFFER_SIZE},
5
- ruby_reader::{build_ruby_reader, SeekableRead},
6
- ForgottenFileHandle,
5
+ ruby_reader::RubyReader,
7
6
  };
8
- use flate2::read::GzDecoder;
9
- use magnus::{rb_sys::AsRawValue, value::ReprValue, Error as MagnusError, RString, Ruby, Value};
7
+ use magnus::{Error as MagnusError, RString, Ruby, Value};
10
8
  use std::{
11
9
  borrow::Cow,
12
- fmt::Debug,
13
- fs::File,
14
- io::{self, BufReader, Read},
10
+ io::{self, BufReader},
15
11
  marker::PhantomData,
16
- mem::ManuallyDrop,
17
- os::fd::FromRawFd,
18
12
  };
19
13
 
20
14
  use thiserror::Error;
@@ -159,48 +153,9 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
159
153
  self
160
154
  }
161
155
 
162
- /// Handles reading from a file descriptor.
163
- fn handle_file_descriptor(&self) -> Result<Box<dyn SeekableRead>, ReaderError> {
164
- let raw_value = self.to_read.as_raw();
165
- let fd = std::panic::catch_unwind(|| unsafe { rb_sys::rb_io_descriptor(raw_value) })
166
- .map_err(|e| ReaderError::FileDescriptor(format!("{:?}", e)))?;
167
-
168
- if fd < 0 {
169
- return Err(ReaderError::InvalidFileDescriptor(fd));
170
- }
171
-
172
- let file = std::panic::catch_unwind(|| unsafe { File::from_raw_fd(fd) })
173
- .map_err(|e| ReaderError::FileDescriptor(format!("{:?}", e)))?;
174
- let forgotten = ForgottenFileHandle(ManuallyDrop::new(file));
175
- Ok(Box::new(forgotten))
176
- }
177
-
178
- /// Handles reading from a file path.
179
- fn handle_file_path(&self) -> Result<Box<dyn SeekableRead>, ReaderError> {
180
- let path = self.to_read.to_r_string()?.to_string()?;
181
- let file = File::open(&path)?;
182
-
183
- if path.ends_with(".gz") {
184
- // For gzipped files, we need to decompress them into memory first
185
- // since GzDecoder doesn't support seeking
186
- let mut decoder = GzDecoder::new(BufReader::with_capacity(READ_BUFFER_SIZE, file));
187
- let mut contents = Vec::new();
188
- decoder.read_to_end(&mut contents)?;
189
- Ok(Box::new(std::io::Cursor::new(contents)))
190
- } else {
191
- Ok(Box::new(file))
192
- }
193
- }
194
-
195
156
  /// Builds the RecordReader with the configured options.
196
157
  pub fn build(self) -> Result<RecordReader<'a, T>, ReaderError> {
197
- let readable = if self.to_read.is_kind_of(self.ruby.class_io()) {
198
- self.handle_file_descriptor()?
199
- } else if self.to_read.is_kind_of(self.ruby.class_string()) {
200
- self.handle_file_path()?
201
- } else {
202
- build_ruby_reader(&self.ruby, self.to_read)?
203
- };
158
+ let readable = RubyReader::try_from(self.to_read)?;
204
159
 
205
160
  let flexible = self.flexible;
206
161
  let reader = BufReader::with_capacity(READ_BUFFER_SIZE, readable);
@@ -3,11 +3,9 @@ mod header_cache;
3
3
  mod parser;
4
4
  mod record;
5
5
  mod record_reader;
6
- mod ruby_integration;
7
6
  mod ruby_reader;
8
7
 
9
8
  pub use builder::RecordReaderBuilder;
10
9
  pub use header_cache::StringCacheKey;
11
10
  pub use record::CowStr;
12
11
  pub use record::CsvRecord;
13
- pub use ruby_integration::*;
@@ -57,7 +57,7 @@ impl<'a, S: BuildHasher + Default> RecordParser<'a>
57
57
  }),
58
58
  };
59
59
 
60
- map.insert(header.clone(), value);
60
+ map.insert(*header, value);
61
61
  });
62
62
  map
63
63
  }
@@ -1,7 +1,7 @@
1
1
  use super::builder::ReaderError;
2
2
  use super::header_cache::StringCacheKey;
3
3
  use super::parser::{CsvRecordType, RecordParser};
4
- use super::ruby_reader::SeekableRead;
4
+ use super::ruby_reader::RubyReader;
5
5
  use magnus::{Error, Ruby};
6
6
  use std::borrow::Cow;
7
7
  use std::io::{BufReader, Read};
@@ -13,7 +13,7 @@ pub(crate) const READ_BUFFER_SIZE: usize = 16384;
13
13
  ///
14
14
  /// This struct implements Iterator to provide a streaming interface for CSV records.
15
15
  pub struct RecordReader<'a, T: RecordParser<'a>> {
16
- reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
16
+ reader: csv::Reader<BufReader<RubyReader>>,
17
17
  headers: Vec<StringCacheKey>,
18
18
  null_string: Option<Cow<'a, str>>,
19
19
  string_record: CsvRecordType,
@@ -73,7 +73,7 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
73
73
 
74
74
  /// Creates a new RecordReader instance.
75
75
  pub(crate) fn new(
76
- reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
76
+ reader: csv::Reader<BufReader<RubyReader>>,
77
77
  headers: Vec<StringCacheKey>,
78
78
  null_string: Option<Cow<'a, str>>,
79
79
  ignore_null_bytes: bool,
@@ -1,178 +1,128 @@
1
+ use flate2::bufread::GzDecoder;
1
2
  use magnus::{
2
- error::Error as MagnusError,
3
3
  value::{Opaque, ReprValue},
4
4
  RClass, RString, Ruby, Value,
5
5
  };
6
- use std::fs::File;
7
- use std::io::{self, BufReader, Read, Seek, SeekFrom, Write};
8
- use std::sync::OnceLock;
6
+ use std::{
7
+ fs::File,
8
+ io::{self, BufReader, Read, Write},
9
+ sync::OnceLock,
10
+ };
9
11
 
10
- use super::{builder::ReaderError, ForgottenFileHandle};
12
+ use super::{builder::ReaderError, record_reader::READ_BUFFER_SIZE};
11
13
 
12
14
  static STRING_IO_CLASS: OnceLock<Opaque<RClass>> = OnceLock::new();
13
15
 
14
16
  /// A reader that can handle various Ruby input types (String, StringIO, IO-like objects)
15
17
  /// and provide a standard Read implementation for them.
16
- pub struct RubyReader<T> {
17
- inner: T,
18
- offset: usize,
19
- }
20
-
21
- pub trait SeekableRead: std::io::Read + Seek {}
22
- impl SeekableRead for RubyReader<Value> {}
23
- impl SeekableRead for RubyReader<RString> {}
24
- impl SeekableRead for File {}
25
- impl<T: Read + Seek> SeekableRead for BufReader<T> {}
26
- impl SeekableRead for std::io::Cursor<Vec<u8>> {}
27
- impl SeekableRead for ForgottenFileHandle {}
28
-
29
- pub fn build_ruby_reader(ruby: &Ruby, input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
30
- if RubyReader::is_string_io(ruby, &input) {
31
- RubyReader::from_string_io(ruby, input)
32
- } else if RubyReader::is_io_like(&input) {
33
- RubyReader::from_io(input)
34
- } else {
35
- RubyReader::from_string_like(input)
36
- }
18
+ pub enum RubyReader {
19
+ String {
20
+ inner: Opaque<RString>,
21
+ offset: usize,
22
+ },
23
+ RubyIoLike {
24
+ inner: Opaque<Value>,
25
+ },
26
+ NativeProxyIoLike {
27
+ proxy_file: Box<dyn Read>,
28
+ },
37
29
  }
38
30
 
39
- impl Seek for RubyReader<Value> {
40
- fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
41
- let (whence, offset) = match pos {
42
- SeekFrom::Start(i) => (0, i as i64),
43
- SeekFrom::Current(i) => (1, i),
44
- SeekFrom::End(i) => (2, i),
45
- };
46
-
47
- let new_position = self
48
- .inner
49
- .funcall("seek", (offset, whence))
50
- .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
51
-
52
- Ok(new_position)
53
- }
54
- }
55
-
56
- impl Write for RubyReader<Value> {
57
- fn write(&mut self, buf: &[u8]) -> Result<usize, io::Error> {
58
- let ruby_bytes = RString::from_slice(buf);
59
-
60
- let bytes_written = self
61
- .inner
62
- .funcall::<_, _, usize>("write", (ruby_bytes,))
63
- .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
64
-
65
- Ok(bytes_written)
66
- }
67
-
68
- fn flush(&mut self) -> Result<(), io::Error> {
69
- self.inner
70
- .funcall::<_, _, Value>("flush", ())
71
- .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
72
-
73
- Ok(())
31
+ impl RubyReader {
32
+ fn is_string_io(ruby: &Ruby, value: &Value) -> bool {
33
+ let string_io_class = STRING_IO_CLASS.get_or_init(|| {
34
+ let class = RClass::from_value(ruby.eval("StringIO").expect("Failed to find StringIO"))
35
+ .expect("Failed to get StringIO class");
36
+ Opaque::from(class)
37
+ });
38
+ value.is_kind_of(ruby.get_inner(*string_io_class))
74
39
  }
75
- }
76
40
 
77
- impl Seek for RubyReader<RString> {
78
- fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
79
- match pos {
80
- io::SeekFrom::Start(offset) => self.offset = offset as usize,
81
- io::SeekFrom::Current(offset) => self.offset = (self.offset as i64 + offset) as usize,
82
- io::SeekFrom::End(offset) => self.offset = self.inner.len() - offset as usize,
83
- }
84
- Ok(self.offset as u64)
41
+ fn is_io_like(value: &Value) -> bool {
42
+ value.respond_to("read", false).unwrap_or(false)
85
43
  }
86
44
  }
87
45
 
88
- impl RubyReader<Value> {
89
- fn from_io(input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
90
- if Self::is_io_like(&input) {
91
- Ok(Box::new(Self::from_io_like(input)))
46
+ impl TryFrom<Value> for RubyReader {
47
+ type Error = ReaderError;
48
+
49
+ fn try_from(value: Value) -> Result<Self, Self::Error> {
50
+ let ruby = unsafe { Ruby::get_unchecked() };
51
+ if RubyReader::is_string_io(&ruby, &value) {
52
+ let string_content = value.funcall::<_, _, RString>("string", ())?;
53
+ Ok(RubyReader::String {
54
+ inner: Opaque::from(string_content),
55
+ offset: 0,
56
+ })
57
+ } else if RubyReader::is_io_like(&value) {
58
+ Ok(RubyReader::RubyIoLike {
59
+ inner: Opaque::from(value),
60
+ })
61
+ } else if value.is_kind_of(ruby.class_string()) {
62
+ let ruby_string = value.to_r_string()?;
63
+ let file_path = unsafe { ruby_string.as_str()? };
64
+ let file = File::open(&file_path)?;
65
+
66
+ let x: Box<dyn Read> = if file_path.ends_with(".gz") {
67
+ let decoder = GzDecoder::new(BufReader::with_capacity(READ_BUFFER_SIZE, file));
68
+ Box::new(decoder)
69
+ } else {
70
+ Box::new(file)
71
+ };
72
+
73
+ Ok(RubyReader::NativeProxyIoLike { proxy_file: x })
92
74
  } else {
93
- Err(MagnusError::new(
94
- magnus::exception::type_error(),
95
- "Input is not an IO-like object",
96
- ))?
97
- }
98
- }
99
-
100
- fn is_io_like(input: &Value) -> bool {
101
- input.respond_to("read", false).unwrap_or(false)
102
- }
103
-
104
- fn from_io_like(input: Value) -> Self {
105
- Self {
106
- inner: input,
107
- offset: 0,
75
+ // Try calling `to_str`, and if that fails, try `to_s`
76
+ let string_content = value
77
+ .funcall::<_, _, RString>("to_str", ())
78
+ .or_else(|_| value.funcall::<_, _, RString>("to_s", ()))?;
79
+ Ok(RubyReader::String {
80
+ inner: Opaque::from(string_content),
81
+ offset: 0,
82
+ })
108
83
  }
109
84
  }
110
85
  }
111
86
 
112
- impl RubyReader<RString> {
113
- pub fn from_string_io(ruby: &Ruby, input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
114
- if !Self::is_string_io(ruby, &input) {
115
- return Err(MagnusError::new(
116
- magnus::exception::type_error(),
117
- "Input is not a StringIO",
118
- ))?;
119
- }
120
-
121
- let string_content = input.funcall::<_, _, RString>("string", ()).unwrap();
122
- Ok(Box::new(Self {
123
- inner: string_content,
124
- offset: 0,
125
- }))
126
- }
87
+ impl Read for RubyReader {
88
+ fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
89
+ let ruby = unsafe { Ruby::get_unchecked() };
90
+ match self {
91
+ RubyReader::NativeProxyIoLike { proxy_file } => proxy_file.read(buf),
92
+ RubyReader::String { inner, offset } => {
93
+ let unwrapped_inner = ruby.get_inner(*inner);
127
94
 
128
- fn is_string_io(ruby: &Ruby, input: &Value) -> bool {
129
- let string_io_class = STRING_IO_CLASS.get_or_init(|| {
130
- let class = RClass::from_value(ruby.eval("StringIO").unwrap()).unwrap();
131
- Opaque::from(class)
132
- });
133
- input.is_kind_of(ruby.get_inner(*string_io_class))
134
- }
95
+ let string_buffer = unsafe { unwrapped_inner.as_slice() };
96
+ if *offset >= string_buffer.len() {
97
+ return Ok(0); // EOF
98
+ }
135
99
 
136
- fn from_string_like(input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
137
- let string_content = input
138
- .funcall::<_, _, RString>("to_str", ())
139
- .or_else(|_| input.funcall::<_, _, RString>("to_s", ()))?;
100
+ let remaining = string_buffer.len() - *offset;
101
+ let copy_size = remaining.min(buf.len());
102
+ buf[..copy_size].copy_from_slice(&string_buffer[*offset..*offset + copy_size]);
140
103
 
141
- Ok(Box::new(Self {
142
- inner: string_content,
143
- offset: 0,
144
- }))
145
- }
146
- }
104
+ *offset += copy_size;
147
105
 
148
- impl Read for RubyReader<Value> {
149
- fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
150
- let bytes = self
151
- .inner
152
- .funcall::<_, _, Option<RString>>("read", (buf.len(),))
153
- .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
154
-
155
- match bytes {
156
- Some(bytes) => {
157
- buf.write_all(unsafe { bytes.as_slice() })?;
158
- Ok(bytes.len())
106
+ Ok(copy_size)
107
+ }
108
+ RubyReader::RubyIoLike { inner } => {
109
+ let unwrapped_inner = ruby.get_inner(*inner);
110
+
111
+ let bytes = unwrapped_inner
112
+ .funcall::<_, _, Option<RString>>("read", (buf.len(),))
113
+ .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
114
+
115
+ match bytes {
116
+ Some(bytes) => {
117
+ let string_buffer = unsafe { bytes.as_slice() };
118
+ buf.write_all(string_buffer)?;
119
+ Ok(string_buffer.len())
120
+ }
121
+ None => {
122
+ return Ok(0);
123
+ }
124
+ }
159
125
  }
160
- None => Ok(0), // EOF
161
- }
162
- }
163
- }
164
-
165
- impl Read for RubyReader<RString> {
166
- fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
167
- let string_buffer = unsafe { self.inner.as_slice() };
168
- if self.offset >= string_buffer.len() {
169
- return Ok(0); // EOF
170
126
  }
171
-
172
- let remaining = string_buffer.len() - self.offset;
173
- let copy_size = remaining.min(buf.len());
174
- buf[..copy_size].copy_from_slice(&string_buffer[self.offset..self.offset + copy_size]);
175
- self.offset += copy_size;
176
- Ok(copy_size)
177
127
  }
178
128
  }
@@ -3,7 +3,7 @@ use crate::utils::*;
3
3
  use ahash::RandomState;
4
4
  use csv::Trim;
5
5
  use magnus::value::ReprValue;
6
- use magnus::{Error, IntoValue, KwArgs, RHash, Ruby, Symbol, Value};
6
+ use magnus::{Error, IntoValue, KwArgs, Ruby, Symbol, Value};
7
7
  use std::collections::HashMap;
8
8
 
9
9
  /// Valid result types for CSV parsing
@@ -62,24 +62,27 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
62
62
  } = parse_read_csv_args(&ruby, args)?;
63
63
 
64
64
  if !ruby.block_given() {
65
- return create_enumerator(EnumeratorArgs {
66
- rb_self,
67
- to_read,
68
- has_headers,
69
- delimiter,
70
- quote_char,
71
- null_string,
72
- result_type,
73
- flexible,
74
- trim: match trim {
75
- Trim::All => Some("all".to_string()),
76
- Trim::Headers => Some("headers".to_string()),
77
- Trim::Fields => Some("fields".to_string()),
78
- _ => None,
65
+ return create_enumerator(
66
+ &ruby,
67
+ EnumeratorArgs {
68
+ rb_self,
69
+ to_read,
70
+ has_headers,
71
+ delimiter,
72
+ quote_char,
73
+ null_string,
74
+ result_type,
75
+ flexible,
76
+ trim: match trim {
77
+ Trim::All => Some("all".to_string()),
78
+ Trim::Headers => Some("headers".to_string()),
79
+ Trim::Fields => Some("fields".to_string()),
80
+ _ => None,
81
+ },
82
+ ignore_null_bytes,
83
+ lossy,
79
84
  },
80
- ignore_null_bytes,
81
- lossy,
82
- })
85
+ )
83
86
  .map(|yield_enum| yield_enum.into_value_with(&ruby));
84
87
  }
85
88
 
@@ -136,8 +139,8 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
136
139
  }
137
140
 
138
141
  /// Creates an enumerator for lazy CSV parsing
139
- fn create_enumerator(args: EnumeratorArgs) -> Result<magnus::Enumerator, Error> {
140
- let kwargs = RHash::new();
142
+ fn create_enumerator(ruby: &Ruby, args: EnumeratorArgs) -> Result<magnus::Enumerator, Error> {
143
+ let kwargs = ruby.hash_new();
141
144
  kwargs.aset(Symbol::new("has_headers"), args.has_headers)?;
142
145
  kwargs.aset(
143
146
  Symbol::new("col_sep"),
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.4.3"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/osv.rb CHANGED
@@ -1,5 +1,10 @@
1
1
  require_relative "osv/version"
2
- require_relative "osv/osv"
2
+
3
+ begin
4
+ require "osv/#{RUBY_VERSION.to_f}/osv"
5
+ rescue LoadError
6
+ require "osv/osv"
7
+ end
3
8
 
4
9
  module OSV
5
10
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-31 00:00:00.000000000 Z
11
+ date: 2025-02-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -66,7 +66,6 @@ files:
66
66
  - ext/osv/src/csv/parser.rs
67
67
  - ext/osv/src/csv/record.rs
68
68
  - ext/osv/src/csv/record_reader.rs
69
- - ext/osv/src/csv/ruby_integration.rs
70
69
  - ext/osv/src/csv/ruby_reader.rs
71
70
  - ext/osv/src/lib.rs
72
71
  - ext/osv/src/reader.rs
@@ -1,19 +0,0 @@
1
- use std::{
2
- fs::File,
3
- io::{self, Read, Seek, SeekFrom},
4
- mem::ManuallyDrop,
5
- };
6
-
7
- pub struct ForgottenFileHandle(pub ManuallyDrop<File>);
8
-
9
- impl Read for ForgottenFileHandle {
10
- fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
11
- self.0.read(buf)
12
- }
13
- }
14
-
15
- impl Seek for ForgottenFileHandle {
16
- fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
17
- self.0.seek(pos)
18
- }
19
- }