osv 0.3.12 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3158fed2ced03b0fd6bdc792198529e675a10656ec32d5d375a0fee88481d5e4
4
- data.tar.gz: 0e2203377d114a6ee12cc7aa4abdb80baeea5adba3bd253825a6fa975cdd51aa
3
+ metadata.gz: 689f28c935746890aa680fd2f649076a36d6ce233d4cbf2717dc129174b593dc
4
+ data.tar.gz: 45ddaa6774a9a4e9391d000b30b6e92afb8560b81821d8fec363d54283bac6d9
5
5
  SHA512:
6
- metadata.gz: 82ae4170b55db12c767ccff522f0415e6ff586455a9c10e09d603323a8d177e0d532fae45d20e5eef57de59a6e75e0e711930b2a5c881af8e8192f9c804fedc1
7
- data.tar.gz: 3e00897d9e6ae42756e22e55a0d3e2450b9375a63af71618f9d866bf256783b1315f18d27632a7f94f0d4ef7cefd20210efef85f3a5bddc47b216f77e51c69f3
6
+ metadata.gz: 74c2052ea9cbc61ddef5d1c46abdd5e4cdf7c60c946c421e4b8da7c160ba3f3eb761842279cd9f066aa6a1aa2214d0ef9ba9ff11c46294e8e1d4ebbb95161d70
7
+ data.tar.gz: 5a795e5fa6d84b39082c2754dea655cd5b4f8a00558627fb64f661a14ec32daa8ea7b31a13724291e11531d03c8c5fa1bdb928c6d9422b87fcbb5b5aba7daad5
data/Gemfile CHANGED
@@ -2,12 +2,12 @@ source "https://rubygems.org"
2
2
 
3
3
  gem "rb_sys", "~> 0.9.56"
4
4
  gem "rake"
5
- gem "csv"
6
5
 
7
6
  # Use local version of osv
8
7
  gemspec
9
8
 
10
9
  group :development, :test do
10
+ gem "csv"
11
11
  gem "minitest", "~> 5.0"
12
12
  gem "benchmark-ips", "~> 2.12"
13
13
  gem "fastcsv", "~> 0.0.7"
data/README.md CHANGED
@@ -114,104 +114,62 @@ When `has_headers` is false, hash keys will be generated as `"c0"`, `"c1"`, etc.
114
114
 
115
115
  ## Performance
116
116
 
117
- This library is faster than the standard Ruby CSV library, and is comparable to the fastest CSV parser gems I've used.
117
+ This library is faster than the standard Ruby CSV library. It's also faster than any other CSV gem I've been able to find.
118
118
 
119
119
  Here's some unscientific benchmarks. You can find the code in the [benchmark/comparison_benchmark.rb](benchmark/comparison_benchmark.rb) file.
120
120
 
121
- ### 10,000 lines
121
+ ### 1,000,000 lines
122
122
 
123
123
  ```
124
- Benchmarking with 100001 lines of data
124
+ 🏃 Running benchmarks...
125
+ Benchmarking with 3000001 lines of data
125
126
 
126
- ruby 3.3.6 (2024-11-05 revision 75015d4c1f) [arm64-darwin24]
127
+ ruby 3.3.6 (2024-11-05 revision 75015d4c1f) +YJIT [arm64-darwin24]
127
128
  Warming up --------------------------------------
128
- OSV - Hash output 1.000 i/100ms
129
- CSV - Hash output 1.000 i/100ms
130
- OSV - Array output 1.000 i/100ms
131
- OSV - Direct Open Array output
132
- 12.719M i/100ms
133
- CSV - Array output 1.000 i/100ms
134
- FastCSV - Array output
135
- 1.000 i/100ms
136
- OSV - StringIO 1.000 i/100ms
137
129
  CSV - StringIO 1.000 i/100ms
138
130
  FastCSV - StringIO 1.000 i/100ms
139
- OSV - Gzipped 1.000 i/100ms
140
- CSV - Gzipped 1.000 i/100ms
141
- Calculating -------------------------------------
142
- OSV - Hash output 6.722 (±14.9%) i/s (148.77 ms/i) - 59.000 in 10.074753s
143
- CSV - Hash output 1.223 (± 0.0%) i/s (817.62 ms/i) - 13.000 in 10.788284s
144
- OSV - Array output 17.284 (±11.6%) i/s (57.86 ms/i) - 171.000 in 10.007321s
145
- OSV - Direct Open Array output
146
- 213.629M (±13.5%) i/s (4.68 ns/i) - 1.921B in 10.005506s
147
- CSV - Array output 2.193 (± 0.0%) i/s (455.93 ms/i) - 22.000 in 10.052607s
148
- FastCSV - Array output
149
- 7.993 (± 0.0%) i/s (125.11 ms/i) - 80.000 in 10.053729s
150
- OSV - StringIO 6.626 (±15.1%) i/s (150.91 ms/i) - 66.000 in 10.103646s
151
- CSV - StringIO 1.478 (± 0.0%) i/s (676.78 ms/i) - 15.000 in 10.158640s
152
- FastCSV - StringIO 17.074 (± 5.9%) i/s (58.57 ms/i) - 171.000 in 10.059266s
153
- OSV - Gzipped 5.639 (± 0.0%) i/s (177.32 ms/i) - 57.000 in 10.152487s
154
- CSV - Gzipped 1.176 (± 0.0%) i/s (850.19 ms/i) - 12.000 in 10.233398s
155
-
156
- Comparison:
157
- OSV - Direct Open Array output: 213629268.6 i/s
158
- OSV - Array output: 17.3 i/s - 12360250.79x slower
159
- FastCSV - StringIO: 17.1 i/s - 12511956.50x slower
160
- FastCSV - Array output: 8.0 i/s - 26727225.72x slower
161
- OSV - Hash output: 6.7 i/s - 31780615.83x slower
162
- OSV - StringIO: 6.6 i/s - 32239620.60x slower
163
- OSV - Gzipped: 5.6 i/s - 37881517.48x slower
164
- CSV - Array output: 2.2 i/s - 97400427.87x slower
165
- CSV - StringIO: 1.5 i/s - 144580048.04x slower
166
- CSV - Hash output: 1.2 i/s - 174666591.31x slower
167
- CSV - Gzipped: 1.2 i/s - 181626018.23x slower
168
- ```
169
-
170
- ### 1,000,000 lines
171
-
172
- ```
173
- Benchmarking with 1000001 lines of data
174
-
175
- ruby 3.3.6 (2024-11-05 revision 75015d4c1f) [arm64-darwin24]
176
- Warming up --------------------------------------
177
- OSV - Hash output 1.000 i/100ms
131
+ OSV - StringIO 1.000 i/100ms
178
132
  CSV - Hash output 1.000 i/100ms
179
- OSV - Array output 1.000 i/100ms
180
- OSV - Direct Open Array output
181
- 1.000 i/100ms
133
+ OSV - Hash output 1.000 i/100ms
182
134
  CSV - Array output 1.000 i/100ms
135
+ OSV - Array output 1.000 i/100ms
183
136
  FastCSV - Array output
184
137
  1.000 i/100ms
185
- OSV - StringIO 1.000 i/100ms
186
- CSV - StringIO 1.000 i/100ms
187
- FastCSV - StringIO 1.000 i/100ms
138
+ OSV - Direct Open Array output
139
+ 1.000 i/100ms
188
140
  OSV - Gzipped 1.000 i/100ms
141
+ OSV - Gzipped Direct 1.000 i/100ms
142
+ FastCSV - Gzipped 1.000 i/100ms
189
143
  CSV - Gzipped 1.000 i/100ms
190
144
  Calculating -------------------------------------
191
- OSV - Hash output 0.492 (± 0.0%) i/s (2.03 s/i) - 5.000 in 10.463278s
192
- CSV - Hash output 0.114 (± 0.0%) i/s (8.75 s/i) - 2.000 in 17.573877s
193
- OSV - Array output 1.502 (± 0.0%) i/s (665.58 ms/i) - 14.000 in 10.217551s
194
- OSV - Direct Open Array output
195
- 1.626 (± 0.0%) i/s (614.90 ms/i) - 16.000 in 10.190323s
196
- CSV - Array output 0.183 (± 0.0%) i/s (5.46 s/i) - 2.000 in 10.951943s
145
+ CSV - StringIO 0.083 (± 0.0%) i/s (12.01 s/i) - 3.000 in 36.028672s
146
+ FastCSV - StringIO 0.366 (± 0.0%) i/s (2.73 s/i) - 11.000 in 30.032350s
147
+ OSV - StringIO 0.522 (± 0.0%) i/s (1.92 s/i) - 16.000 in 30.655768s
148
+ CSV - Hash output 0.062 0.0%) i/s (16.16 s/i) - 2.000 in 32.311990s
149
+ OSV - Hash output 0.273 (± 0.0%) i/s (3.66 s/i) - 9.000 in 32.924970s
150
+ CSV - Array output 0.069 (± 0.0%) i/s (14.50 s/i) - 3.000 in 43.488185s
151
+ OSV - Array output 0.601 (± 0.0%) i/s (1.66 s/i) - 19.000 in 31.636782s
197
152
  FastCSV - Array output
198
- 0.326 (± 0.0%) i/s (3.07 s/i) - 4.000 in 12.340605s
199
- OSV - StringIO 0.567 0.0%) i/s (1.76 s/i) - 6.000 in 10.698027s
200
- CSV - StringIO 0.141 (± 0.0%) i/s (7.10 s/i) - 2.000 in 14.237144s
201
- FastCSV - StringIO 0.923 (± 0.0%) i/s (1.08 s/i) - 10.000 in 11.567775s
202
- OSV - Gzipped 0.437 (± 0.0%) i/s (2.29 s/i) - 5.000 in 11.452764s
203
- CSV - Gzipped 0.104 (± 0.0%) i/s (9.64 s/i) - 2.000 in 19.373423s
153
+ 0.356 (± 0.0%) i/s (2.81 s/i) - 11.000 in 30.871931s
154
+ OSV - Direct Open Array output
155
+ 0.604 (± 0.0%) i/s (1.66 s/i) - 19.000 in 31.469190s
156
+ OSV - Gzipped 0.424 (± 0.0%) i/s (2.36 s/i) - 13.000 in 30.642322s
157
+ OSV - Gzipped Direct 0.636 (± 0.0%) i/s (1.57 s/i) - 20.000 in 31.424083s
158
+ FastCSV - Gzipped 0.323 (± 0.0%) i/s (3.10 s/i) - 10.000 in 30.990648s
159
+ CSV - Gzipped 0.058 (± 0.0%) i/s (17.11 s/i) - 2.000 in 34.228691s
204
160
 
205
161
  Comparison:
206
- OSV - Direct Open Array output: 1.6 i/s
207
- OSV - Array output: 1.5 i/s - 1.08x slower
208
- FastCSV - StringIO: 0.9 i/s - 1.76x slower
209
- OSV - StringIO: 0.6 i/s - 2.87x slower
210
- OSV - Hash output: 0.5 i/s - 3.30x slower
211
- OSV - Gzipped: 0.4 i/s - 3.72x slower
212
- FastCSV - Array output: 0.3 i/s - 4.99x slower
213
- CSV - Array output: 0.2 i/s - 8.88x slower
214
- CSV - StringIO: 0.1 i/s - 11.55x slower
215
- CSV - Hash output: 0.1 i/s - 14.24x slower
216
- CSV - Gzipped: 0.1 i/s - 15.68x slower
162
+ OSV - Gzipped Direct: 0.6 i/s
163
+ OSV - Direct Open Array output: 0.6 i/s - 1.05x slower
164
+ OSV - Array output: 0.6 i/s - 1.06x slower
165
+ OSV - StringIO: 0.5 i/s - 1.22x slower
166
+ OSV - Gzipped: 0.4 i/s - 1.50x slower
167
+ FastCSV - StringIO: 0.4 i/s - 1.74x slower
168
+ FastCSV - Array output: 0.4 i/s - 1.79x slower
169
+ FastCSV - Gzipped: 0.3 i/s - 1.97x slower
170
+ OSV - Hash output: 0.3 i/s - 2.33x slower
171
+ CSV - StringIO: 0.1 i/s - 7.64x slower
172
+ CSV - Array output: 0.1 i/s - 9.23x slower
173
+ CSV - Hash output: 0.1 i/s - 10.28x slower
174
+ CSV - Gzipped: 0.1 i/s - 10.89x slower
217
175
  ```
data/Rakefile CHANGED
@@ -1,21 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "rake/testtask"
4
- require "rake/extensiontask"
4
+ require "rb_sys/extensiontask"
5
5
 
6
6
  task default: :test
7
7
 
8
- Rake::ExtensionTask.new("osv") do |c|
9
- c.lib_dir = "lib/osv"
10
- c.ext_dir = "ext/osv"
11
- end
8
+ GEMSPEC = Gem::Specification.load("osv.gemspec")
12
9
 
13
- task :dev do
14
- ENV["RB_SYS_CARGO_PROFILE"] = "release"
10
+ RbSys::ExtensionTask.new("osv", GEMSPEC) do |ext|
11
+ ext.lib_dir = "lib/osv"
12
+ ext.ext_dir = "ext/osv"
15
13
  end
16
14
 
17
15
  Rake::TestTask.new do |t|
18
- t.deps << :dev << :compile
16
+ t.deps << :compile
19
17
  t.test_files = FileList[File.expand_path("test/*_test.rb", __dir__)]
20
18
  t.libs << "lib"
21
19
  t.libs << "test"
@@ -1,19 +1,18 @@
1
1
  use super::{
2
2
  header_cache::{CacheError, StringCache},
3
3
  parser::RecordParser,
4
- read_impl::ReadImpl,
5
- reader::RecordReader,
6
- READ_BUFFER_SIZE,
4
+ record_reader::{RecordReader, READ_BUFFER_SIZE},
5
+ ruby_reader::build_ruby_reader,
7
6
  };
8
7
  use flate2::read::GzDecoder;
9
- use magnus::{rb_sys::AsRawValue, value::ReprValue, Error as MagnusError, RString, Ruby, Value};
8
+ use magnus::{rb_sys::AsRawValue, value::ReprValue, Error as MagnusError, Ruby, Value};
10
9
  use std::{
11
10
  fs::File,
12
11
  io::{self, BufReader, Read},
13
12
  marker::PhantomData,
14
13
  os::fd::FromRawFd,
15
- thread,
16
14
  };
15
+
17
16
  use thiserror::Error;
18
17
 
19
18
  pub(crate) static BUFFER_CHANNEL_SIZE: usize = 1024;
@@ -28,8 +27,6 @@ pub enum ReaderError {
28
27
  FileOpen(#[from] io::Error),
29
28
  #[error("Failed to intern headers: {0}")]
30
29
  HeaderIntern(#[from] CacheError),
31
- #[error("Unsupported GzipReader")]
32
- UnsupportedGzipReader,
33
30
  #[error("Ruby error: {0}")]
34
31
  Ruby(String),
35
32
  }
@@ -49,7 +46,7 @@ impl From<ReaderError> for MagnusError {
49
46
  }
50
47
  }
51
48
 
52
- pub struct RecordReaderBuilder<'a, T: RecordParser + Send + 'static> {
49
+ pub struct RecordReaderBuilder<'a, T: RecordParser<'a> + Send> {
53
50
  ruby: &'a Ruby,
54
51
  to_read: Value,
55
52
  has_headers: bool,
@@ -58,12 +55,55 @@ pub struct RecordReaderBuilder<'a, T: RecordParser + Send + 'static> {
58
55
  null_string: Option<String>,
59
56
  buffer: usize,
60
57
  flexible: bool,
61
- flexible_default: Option<String>,
58
+ flexible_default: Option<&'a str>,
62
59
  trim: csv::Trim,
63
60
  _phantom: PhantomData<T>,
64
61
  }
65
62
 
66
- impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
63
+ impl<T: RecordParser<'static> + Send + 'static> RecordReaderBuilder<'static, T> {
64
+ fn build_multi_threaded(
65
+ self,
66
+ readable: Box<dyn Read + Send + 'static>,
67
+ should_forget: bool,
68
+ ) -> Result<RecordReader<'static, T>, ReaderError> {
69
+ let flexible = self.flexible || self.flexible_default.is_some();
70
+ let mut reader = csv::ReaderBuilder::new()
71
+ .has_headers(self.has_headers)
72
+ .delimiter(self.delimiter)
73
+ .quote(self.quote_char)
74
+ .flexible(flexible)
75
+ .trim(self.trim)
76
+ .from_reader(readable);
77
+
78
+ let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
79
+ let static_headers = StringCache::intern_many(&headers)?;
80
+
81
+ Ok(RecordReader::new_multi_threaded(
82
+ reader,
83
+ static_headers,
84
+ self.buffer,
85
+ self.null_string,
86
+ self.flexible_default,
87
+ should_forget,
88
+ ))
89
+ }
90
+
91
+ pub fn build_threaded(self) -> Result<RecordReader<'static, T>, ReaderError> {
92
+ if self.to_read.is_kind_of(self.ruby.class_io()) {
93
+ let readable = self.handle_file_descriptor()?;
94
+ self.build_multi_threaded(readable, true)
95
+ } else if self.to_read.is_kind_of(self.ruby.class_string()) {
96
+ let readable = self.handle_file_path()?;
97
+ self.build_multi_threaded(readable, false)
98
+ } else {
99
+ let readable = build_ruby_reader(self.ruby, self.to_read)?;
100
+
101
+ self.build_single_threaded(readable)
102
+ }
103
+ }
104
+ }
105
+
106
+ impl<'a, T: RecordParser<'a> + Send> RecordReaderBuilder<'a, T> {
67
107
  pub fn new(ruby: &'a Ruby, to_read: Value) -> Self {
68
108
  Self {
69
109
  ruby,
@@ -110,7 +150,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
110
150
  self
111
151
  }
112
152
 
113
- pub fn flexible_default(mut self, flexible_default: Option<String>) -> Self {
153
+ pub fn flexible_default(mut self, flexible_default: Option<&'a str>) -> Self {
114
154
  self.flexible_default = flexible_default;
115
155
  self
116
156
  }
@@ -120,12 +160,6 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
120
160
  self
121
161
  }
122
162
 
123
- fn handle_string_io(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
124
- let string: RString = self.to_read.funcall("string", ())?;
125
- let content = string.to_string()?;
126
- Ok(Box::new(std::io::Cursor::new(content)))
127
- }
128
-
129
163
  fn handle_file_descriptor(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
130
164
  let raw_value = self.to_read.as_raw();
131
165
  let fd = std::panic::catch_unwind(|| unsafe { rb_sys::rb_io_descriptor(raw_value) })
@@ -155,101 +189,10 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
155
189
  })
156
190
  }
157
191
 
158
- fn get_reader(&self) -> Result<(Box<dyn Read + Send + 'static>, bool), ReaderError> {
159
- let string_io: magnus::RClass = self.ruby.eval("StringIO")?;
160
- let gzip_reader_class: magnus::RClass = self.ruby.eval("Zlib::GzipReader")?;
161
-
162
- if self.to_read.is_kind_of(string_io) {
163
- self.handle_string_io().map(|r| (r, false))
164
- } else if self.to_read.is_kind_of(gzip_reader_class) {
165
- Err(ReaderError::UnsupportedGzipReader)
166
- } else if self.to_read.is_kind_of(self.ruby.class_io()) {
167
- self.handle_file_descriptor().map(|r| (r, true))
168
- } else {
169
- self.handle_file_path().map(|r| (r, false))
170
- }
171
- }
172
-
173
- fn get_single_threaded_reader(&self) -> Result<Box<dyn Read>, ReaderError> {
174
- let string_io: magnus::RClass = self.ruby.eval("StringIO")?;
175
- let gzip_reader_class: magnus::RClass = self.ruby.eval("Zlib::GzipReader")?;
176
-
177
- if self.to_read.is_kind_of(string_io) {
178
- self.handle_string_io().map(|r| -> Box<dyn Read> { r })
179
- } else if self.to_read.is_kind_of(gzip_reader_class) {
180
- Ok(Box::new(RubyReader::new(self.to_read)))
181
- } else if self.to_read.is_kind_of(self.ruby.class_io()) {
182
- self.handle_file_descriptor()
183
- .map(|r| -> Box<dyn Read> { r })
184
- } else {
185
- self.handle_file_path().map(|r| -> Box<dyn Read> { r })
186
- }
187
- }
188
-
189
- pub fn build(self) -> Result<RecordReader<T>, ReaderError> {
190
- match self.get_reader() {
191
- Ok((readable, should_forget)) => self.build_multi_threaded(readable, should_forget),
192
- Err(_) => {
193
- let readable = self.get_single_threaded_reader()?;
194
- self.build_single_threaded(readable)
195
- }
196
- }
197
- }
198
-
199
- fn build_multi_threaded(
200
- self,
201
- readable: Box<dyn Read + Send + 'static>,
202
- should_forget: bool,
203
- ) -> Result<RecordReader<T>, ReaderError> {
204
- let flexible = self.flexible || self.flexible_default.is_some();
205
- let mut reader = csv::ReaderBuilder::new()
206
- .has_headers(self.has_headers)
207
- .delimiter(self.delimiter)
208
- .quote(self.quote_char)
209
- .flexible(flexible)
210
- .trim(self.trim)
211
- .from_reader(readable);
212
-
213
- let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
214
- let static_headers = StringCache::intern_many(&headers)?;
215
- let headers_for_cleanup = static_headers.clone();
216
-
217
- let (sender, receiver) = kanal::bounded(self.buffer);
218
- let null_string = self.null_string.clone();
219
-
220
- let flexible_default = self.flexible_default.clone();
221
- let handle = thread::spawn(move || {
222
- let mut record = csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers.len());
223
- while let Ok(true) = reader.read_record(&mut record) {
224
- let row = T::parse(
225
- &static_headers,
226
- &record,
227
- null_string.as_deref(),
228
- flexible_default.as_deref(),
229
- );
230
- if sender.send(row).is_err() {
231
- break;
232
- }
233
- }
234
- if should_forget {
235
- let file_to_forget = reader.into_inner();
236
- std::mem::forget(file_to_forget);
237
- }
238
- });
239
-
240
- Ok(RecordReader {
241
- reader: ReadImpl::MultiThreaded {
242
- headers: headers_for_cleanup,
243
- receiver,
244
- handle: Some(handle),
245
- },
246
- })
247
- }
248
-
249
192
  fn build_single_threaded(
250
193
  self,
251
- readable: Box<dyn Read>,
252
- ) -> Result<RecordReader<T>, ReaderError> {
194
+ readable: Box<dyn Read + 'a>,
195
+ ) -> Result<RecordReader<'a, T>, ReaderError> {
253
196
  let flexible = self.flexible || self.flexible_default.is_some();
254
197
  let mut reader = csv::ReaderBuilder::new()
255
198
  .has_headers(self.has_headers)
@@ -262,70 +205,11 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
262
205
  let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
263
206
  let static_headers = StringCache::intern_many(&headers)?;
264
207
 
265
- Ok(RecordReader {
266
- reader: ReadImpl::SingleThreaded {
267
- reader,
268
- headers: static_headers,
269
- null_string: self.null_string,
270
- flexible_default: self.flexible_default,
271
- },
272
- })
273
- }
274
- }
275
-
276
- struct RubyReader {
277
- inner: Value,
278
- buffer: Option<Vec<u8>>,
279
- offset: usize,
280
- }
281
-
282
- impl RubyReader {
283
- fn new(inner: Value) -> Self {
284
- Self {
285
- inner,
286
- buffer: None,
287
- offset: 0,
288
- }
289
- }
290
- }
291
-
292
- // Read the entire inner into a vector and then read future reads from that vector with offset
293
- impl Read for RubyReader {
294
- fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
295
- // If we have an existing buffer, read from it
296
- if let Some(buffer) = self.buffer.as_ref() {
297
- let remaining = buffer.len() - self.offset;
298
- let copy_size = remaining.min(buf.len());
299
- buf[..copy_size].copy_from_slice(&buffer[self.offset..self.offset + copy_size]);
300
- self.offset += copy_size;
301
- return Ok(copy_size);
302
- }
303
-
304
- // No buffer yet - read the entire content from Ruby
305
- let result = self.inner.funcall::<_, _, Value>("read", ());
306
- match result {
307
- Ok(data) => {
308
- if data.is_nil() {
309
- return Ok(0); // EOF
310
- }
311
-
312
- let string = RString::from_value(data).ok_or_else(|| {
313
- io::Error::new(io::ErrorKind::Other, "Failed to convert to RString")
314
- })?;
315
- let bytes = unsafe { string.as_slice() };
316
-
317
- // Store the entire content in the buffer
318
- self.buffer = Some(bytes.to_vec());
319
- self.offset = 0;
320
-
321
- // Read initial chunk
322
- let copy_size = bytes.len().min(buf.len());
323
- buf[..copy_size].copy_from_slice(&bytes[..copy_size]);
324
- self.offset = copy_size;
325
-
326
- Ok(copy_size)
327
- }
328
- Err(e) => Err(io::Error::new(io::ErrorKind::Other, e.to_string())),
329
- }
208
+ Ok(RecordReader::new_single_threaded(
209
+ reader,
210
+ static_headers,
211
+ self.null_string,
212
+ self.flexible_default,
213
+ ))
330
214
  }
331
215
  }
@@ -1,11 +1,12 @@
1
1
  mod builder;
2
2
  mod header_cache;
3
3
  mod parser;
4
- pub mod read_impl;
5
- mod reader;
6
4
  mod record;
5
+ mod record_reader;
6
+ mod ruby_reader;
7
7
 
8
8
  pub use builder::RecordReaderBuilder;
9
9
  pub(crate) use builder::BUFFER_CHANNEL_SIZE;
10
- pub(crate) use read_impl::READ_BUFFER_SIZE;
10
+ pub use record::CowValue;
11
11
  pub use record::CsvRecord;
12
+ pub(crate) use record_reader::READ_BUFFER_SIZE;