osv 0.3.16 → 0.3.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 91401989a8532162a9731fed3cb07661c0676105f77465da23f9a267773e7651
4
- data.tar.gz: aeba48f1338a4160044e8c7264f80eb065d950567288bded39acf5d9bc593d7b
3
+ metadata.gz: da944a5af1cc88630fe0952e6e710d2acb8ac420ae8708a107064f5ecf444dec
4
+ data.tar.gz: bd6de3860ff2f47eb03b9019d307d647fa8c2e8f366543fbe95604f284871b62
5
5
  SHA512:
6
- metadata.gz: 8d2ea3f724a6f7af317bb1ae865513c15f2ef0e475b070e7f9ae2e1b4155b2d82090387beb0c6a2e5cb8664b1f6dd0cf61e6ad9545957bc3ada1a3e87758b1ee
7
- data.tar.gz: 0eaa86241092c14f4c2973d74e65877b7f3f87487a2681b9a094054f98db759772bcf012ec2f4fa073bd16f2b02927212b13afec484f84daf764d3b3e0811b6b
6
+ metadata.gz: 8a130687fb25aaae3734f2e69c2258ccf893c584cd0c2893b751282b393ee4d52b2317a338f1ef68a864222e4947614ffdca7e6b98d8d37dc934dfede61f7bc1
7
+ data.tar.gz: 332a5dc1c6ce6df721b22f9e66b54d48426da3a0148917f9ec13036edd63e1fb70a950a2971964d289e076536af47d090c89fd95961d8ca4b51f1f1b8a221a98
data/README.md CHANGED
@@ -142,34 +142,34 @@ OSV - Gzipped Direct 1.000 i/100ms
142
142
  FastCSV - Gzipped 1.000 i/100ms
143
143
  CSV - Gzipped 1.000 i/100ms
144
144
  Calculating -------------------------------------
145
- CSV - StringIO 0.080 (± 0.0%) i/s (12.43 s/i) - 3.000 in 37.301114s
146
- FastCSV - StringIO 0.368 (± 0.0%) i/s (2.72 s/i) - 12.000 in 32.619020s
147
- OSV - StringIO 0.699 (± 0.0%) i/s (1.43 s/i) - 21.000 in 30.091225s
148
- CSV - Hash output 0.059 (± 0.0%) i/s (16.95 s/i) - 2.000 in 33.908533s
149
- OSV - Hash output 0.329 (± 0.0%) i/s (3.04 s/i) - 10.000 in 30.551275s
150
- CSV - Array output 0.066 (± 0.0%) i/s (15.18 s/i) - 2.000 in 30.357327s
151
- OSV - Array output 0.632 (± 0.0%) i/s (1.58 s/i) - 19.000 in 30.150113s
145
+ CSV - StringIO 0.083 (± 0.0%) i/s (12.06 s/i) - 3.000 in 36.304469s
146
+ FastCSV - StringIO 0.335 (± 0.0%) i/s (2.98 s/i) - 10.000 in 31.019521s
147
+ OSV - StringIO 0.705 (± 0.0%) i/s (1.42 s/i) - 21.000 in 30.629511s
148
+ CSV - Hash output 0.060 (± 0.0%) i/s (16.74 s/i) - 2.000 in 33.475977s
149
+ OSV - Hash output 0.434 (± 0.0%) i/s (2.30 s/i) - 13.000 in 30.071679s
150
+ CSV - Array output 0.063 (± 0.0%) i/s (15.88 s/i) - 2.000 in 32.229906s
151
+ OSV - Array output 0.406 (± 0.0%) i/s (2.47 s/i) - 12.000 in 31.072600s
152
152
  FastCSV - Array output
153
- 0.350 (± 0.0%) i/s (2.86 s/i) - 11.000 in 31.477268s
153
+ 0.321 (± 0.0%) i/s (3.11 s/i) - 10.000 in 31.458966s
154
154
  OSV - Direct Open Array output
155
- 0.641 (± 0.0%) i/s (1.56 s/i) - 20.000 in 31.275201s
156
- OSV - Gzipped 0.530 (± 0.0%) i/s (1.89 s/i) - 16.000 in 30.183753s
157
- OSV - Gzipped Direct 0.727 (± 0.0%) i/s (1.37 s/i) - 22.000 in 30.283991s
158
- FastCSV - Gzipped 0.323 (± 0.0%) i/s (3.09 s/i) - 10.000 in 30.949600s
159
- CSV - Gzipped 0.056 (± 0.0%) i/s (17.72 s/i) - 2.000 in 35.440473s
155
+ 0.686 (± 0.0%) i/s (1.46 s/i) - 21.000 in 30.639715s
156
+ OSV - Gzipped 0.524 (± 0.0%) i/s (1.91 s/i) - 16.000 in 30.695259s
157
+ OSV - Gzipped Direct 0.519 (± 0.0%) i/s (1.93 s/i) - 16.000 in 30.830005s
158
+ FastCSV - Gzipped 0.313 (± 0.0%) i/s (3.20 s/i) - 10.000 in 32.031002s
159
+ CSV - Gzipped 0.057 (± 0.0%) i/s (17.55 s/i) - 2.000 in 35.107808s
160
160
 
161
161
  Comparison:
162
- OSV - Gzipped Direct: 0.7 i/s
163
- OSV - StringIO: 0.7 i/s - 1.04x slower
164
- OSV - Direct Open Array output: 0.6 i/s - 1.14x slower
165
- OSV - Array output: 0.6 i/s - 1.15x slower
166
- OSV - Gzipped: 0.5 i/s - 1.37x slower
167
- FastCSV - StringIO: 0.4 i/s - 1.98x slower
168
- FastCSV - Array output: 0.3 i/s - 2.08x slower
169
- OSV - Hash output: 0.3 i/s - 2.21x slower
170
- FastCSV - Gzipped: 0.3 i/s - 2.25x slower
171
- CSV - StringIO: 0.1 i/s - 9.04x slower
172
- CSV - Array output: 0.1 i/s - 11.04x slower
173
- CSV - Hash output: 0.1 i/s - 12.33x slower
174
- CSV - Gzipped: 0.1 i/s - 12.89x slower
162
+ OSV - StringIO : 0.7 i/s
163
+ OSV - Direct Open Array output: 0.7 i/s - 1.03x slower
164
+ OSV - Gzipped : 0.5 i/s - 1.34x slower
165
+ OSV - Gzipped Direct : 0.5 i/s - 1.36x slower
166
+ OSV - Hash output : 0.4 i/s - 1.62x slower
167
+ OSV - Array output : 0.4 i/s - 1.74x slower
168
+ FastCSV - StringIO : 0.3 i/s - 2.10x slower
169
+ FastCSV - Array output : 0.3 i/s - 2.20x slower
170
+ FastCSV - Gzipped : 0.3 i/s - 2.26x slower
171
+ CSV - StringIO : 0.1 i/s - 8.50x slower
172
+ CSV - Array output : 0.1 i/s - 11.20x slower
173
+ CSV - Hash output : 0.1 i/s - 11.80x slower
174
+ CSV - Gzipped : 0.1 i/s - 12.37x slower
175
175
  ```
@@ -6,8 +6,10 @@ use super::{
6
6
  ForgottenFileHandle,
7
7
  };
8
8
  use flate2::read::GzDecoder;
9
- use magnus::{rb_sys::AsRawValue, value::ReprValue, Error as MagnusError, Ruby, Value};
9
+ use magnus::{rb_sys::AsRawValue, value::ReprValue, Error as MagnusError, RString, Ruby, Value};
10
10
  use std::{
11
+ borrow::Cow,
12
+ fmt::Debug,
11
13
  fs::File,
12
14
  io::{self, BufReader, Read},
13
15
  marker::PhantomData,
@@ -17,18 +19,21 @@ use std::{
17
19
 
18
20
  use thiserror::Error;
19
21
 
20
- pub(crate) static BUFFER_CHANNEL_SIZE: usize = 1024;
21
-
22
+ /// Errors that can occur when building a RecordReader
22
23
  #[derive(Error, Debug)]
23
24
  pub enum ReaderError {
24
25
  #[error("Failed to get file descriptor: {0}")]
25
26
  FileDescriptor(String),
26
- #[error("Invalid file descriptor")]
27
- InvalidFileDescriptor,
27
+ #[error("Invalid file descriptor: {0}")]
28
+ InvalidFileDescriptor(i32),
28
29
  #[error("Failed to open file: {0}")]
29
30
  FileOpen(#[from] io::Error),
30
31
  #[error("Failed to intern headers: {0}")]
31
32
  HeaderIntern(#[from] CacheError),
33
+ #[error("Invalid flexible default value: {0}")]
34
+ InvalidFlexibleDefault(String),
35
+ #[error("Invalid null string value: {0}")]
36
+ InvalidNullString(String),
32
37
  #[error("Ruby error: {0}")]
33
38
  Ruby(String),
34
39
  }
@@ -48,63 +53,27 @@ impl From<ReaderError> for MagnusError {
48
53
  }
49
54
  }
50
55
 
51
- pub struct RecordReaderBuilder<'a, T: RecordParser<'a> + Send> {
52
- ruby: &'a Ruby,
56
+ /// Builder for configuring and creating a RecordReader instance.
57
+ ///
58
+ /// This struct provides a fluent interface for setting up CSV parsing options
59
+ /// and creating a RecordReader with the specified configuration.
60
+ pub struct RecordReaderBuilder<'a, T: RecordParser<'a>> {
61
+ ruby: Ruby,
53
62
  to_read: Value,
54
63
  has_headers: bool,
55
64
  delimiter: u8,
56
65
  quote_char: u8,
57
66
  null_string: Option<String>,
58
- buffer: usize,
59
67
  flexible: bool,
60
- flexible_default: Option<&'a str>,
68
+ flexible_default: Option<String>,
61
69
  trim: csv::Trim,
62
70
  _phantom: PhantomData<T>,
71
+ _phantom_a: PhantomData<&'a ()>,
63
72
  }
64
73
 
65
- impl<T: RecordParser<'static> + Send + 'static> RecordReaderBuilder<'static, T> {
66
- fn build_multi_threaded(
67
- self,
68
- readable: Box<dyn Read + Send + 'static>,
69
- ) -> Result<RecordReader<'static, T>, ReaderError> {
70
- let flexible = self.flexible || self.flexible_default.is_some();
71
- let mut reader = csv::ReaderBuilder::new()
72
- .has_headers(self.has_headers)
73
- .delimiter(self.delimiter)
74
- .quote(self.quote_char)
75
- .flexible(flexible)
76
- .trim(self.trim)
77
- .from_reader(readable);
78
-
79
- let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
80
- let static_headers = StringCache::intern_many(&headers)?;
81
-
82
- Ok(RecordReader::new_multi_threaded(
83
- reader,
84
- static_headers,
85
- self.buffer,
86
- self.null_string,
87
- self.flexible_default,
88
- ))
89
- }
90
-
91
- pub fn build_threaded(self) -> Result<RecordReader<'static, T>, ReaderError> {
92
- if self.to_read.is_kind_of(self.ruby.class_io()) {
93
- let readable = self.handle_file_descriptor()?;
94
- self.build_multi_threaded(readable)
95
- } else if self.to_read.is_kind_of(self.ruby.class_string()) {
96
- let readable = self.handle_file_path()?;
97
- self.build_multi_threaded(readable)
98
- } else {
99
- let readable = build_ruby_reader(self.ruby, self.to_read)?;
100
- let buffered_reader = BufReader::with_capacity(READ_BUFFER_SIZE, readable);
101
- self.build_single_threaded(buffered_reader)
102
- }
103
- }
104
- }
105
-
106
- impl<'a, T: RecordParser<'a> + Send> RecordReaderBuilder<'a, T> {
107
- pub fn new(ruby: &'a Ruby, to_read: Value) -> Self {
74
+ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
75
+ /// Creates a new builder instance with default settings.
76
+ pub fn new(ruby: Ruby, to_read: Value) -> Self {
108
77
  Self {
109
78
  ruby,
110
79
  to_read,
@@ -112,92 +81,107 @@ impl<'a, T: RecordParser<'a> + Send> RecordReaderBuilder<'a, T> {
112
81
  delimiter: b',',
113
82
  quote_char: b'"',
114
83
  null_string: None,
115
- buffer: BUFFER_CHANNEL_SIZE,
116
84
  flexible: false,
117
85
  flexible_default: None,
118
86
  trim: csv::Trim::None,
119
87
  _phantom: PhantomData,
88
+ _phantom_a: PhantomData,
120
89
  }
121
90
  }
122
91
 
92
+ /// Sets whether the CSV file has headers.
93
+ #[must_use]
123
94
  pub fn has_headers(mut self, has_headers: bool) -> Self {
124
95
  self.has_headers = has_headers;
125
96
  self
126
97
  }
127
98
 
99
+ /// Sets the delimiter character for the CSV.
100
+ #[must_use]
128
101
  pub fn delimiter(mut self, delimiter: u8) -> Self {
129
102
  self.delimiter = delimiter;
130
103
  self
131
104
  }
132
105
 
106
+ /// Sets the quote character for the CSV.
107
+ #[must_use]
133
108
  pub fn quote_char(mut self, quote_char: u8) -> Self {
134
109
  self.quote_char = quote_char;
135
110
  self
136
111
  }
137
112
 
113
+ /// Sets the string that should be interpreted as null.
114
+ #[must_use]
138
115
  pub fn null_string(mut self, null_string: Option<String>) -> Self {
139
116
  self.null_string = null_string;
140
117
  self
141
118
  }
142
119
 
143
- pub fn buffer(mut self, buffer: usize) -> Self {
144
- self.buffer = buffer;
145
- self
146
- }
147
-
120
+ /// Sets whether the reader should be flexible with field counts.
121
+ #[must_use]
148
122
  pub fn flexible(mut self, flexible: bool) -> Self {
149
123
  self.flexible = flexible;
150
124
  self
151
125
  }
152
126
 
153
- pub fn flexible_default(mut self, flexible_default: Option<&'a str>) -> Self {
127
+ /// Sets the default value for missing fields when in flexible mode.
128
+ #[must_use]
129
+ pub fn flexible_default(mut self, flexible_default: Option<String>) -> Self {
154
130
  self.flexible_default = flexible_default;
155
131
  self
156
132
  }
157
133
 
134
+ /// Sets the trimming mode for fields.
135
+ #[must_use]
158
136
  pub fn trim(mut self, trim: csv::Trim) -> Self {
159
137
  self.trim = trim;
160
138
  self
161
139
  }
162
140
 
163
- fn handle_file_descriptor(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
141
+ /// Handles reading from a file descriptor.
142
+ fn handle_file_descriptor(&self) -> Result<Box<dyn SeekableRead>, ReaderError> {
164
143
  let raw_value = self.to_read.as_raw();
165
144
  let fd = std::panic::catch_unwind(|| unsafe { rb_sys::rb_io_descriptor(raw_value) })
166
- .map_err(|_| {
167
- ReaderError::FileDescriptor("Failed to get file descriptor".to_string())
168
- })?;
145
+ .map_err(|e| ReaderError::FileDescriptor(format!("{:?}", e)))?;
169
146
 
170
147
  if fd < 0 {
171
- return Err(ReaderError::InvalidFileDescriptor);
148
+ return Err(ReaderError::InvalidFileDescriptor(fd));
172
149
  }
173
150
 
174
151
  let file = unsafe { File::from_raw_fd(fd) };
175
152
  let forgotten = ForgottenFileHandle(ManuallyDrop::new(file));
176
- Ok(Box::new(BufReader::with_capacity(
177
- READ_BUFFER_SIZE,
178
- forgotten,
179
- )))
153
+ Ok(Box::new(forgotten))
180
154
  }
181
155
 
182
- fn handle_file_path(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
156
+ /// Handles reading from a file path.
157
+ fn handle_file_path(&self) -> Result<Box<dyn SeekableRead>, ReaderError> {
183
158
  let path = self.to_read.to_r_string()?.to_string()?;
184
159
  let file = File::open(&path)?;
185
160
 
186
- Ok(if path.ends_with(".gz") {
187
- Box::new(GzDecoder::new(BufReader::with_capacity(
188
- READ_BUFFER_SIZE,
189
- file,
190
- )))
161
+ if path.ends_with(".gz") {
162
+ // For gzipped files, we need to decompress them into memory first
163
+ // since GzDecoder doesn't support seeking
164
+ let mut decoder = GzDecoder::new(BufReader::with_capacity(READ_BUFFER_SIZE, file));
165
+ let mut contents = Vec::new();
166
+ decoder.read_to_end(&mut contents)?;
167
+ Ok(Box::new(std::io::Cursor::new(contents)))
191
168
  } else {
192
- Box::new(BufReader::with_capacity(READ_BUFFER_SIZE, file))
193
- })
169
+ Ok(Box::new(file))
170
+ }
194
171
  }
195
172
 
196
- fn build_single_threaded(
197
- self,
198
- readable: BufReader<Box<dyn SeekableRead>>,
199
- ) -> Result<RecordReader<'a, T>, ReaderError> {
173
+ /// Builds the RecordReader with the configured options.
174
+ pub fn build(self) -> Result<RecordReader<'a, T>, ReaderError> {
175
+ let readable = if self.to_read.is_kind_of(self.ruby.class_io()) {
176
+ self.handle_file_descriptor()?
177
+ } else if self.to_read.is_kind_of(self.ruby.class_string()) {
178
+ self.handle_file_path()?
179
+ } else {
180
+ build_ruby_reader(&self.ruby, self.to_read)?
181
+ };
182
+
200
183
  let flexible = self.flexible || self.flexible_default.is_some();
184
+ let reader = BufReader::with_capacity(READ_BUFFER_SIZE, readable);
201
185
 
202
186
  let mut reader = csv::ReaderBuilder::new()
203
187
  .has_headers(self.has_headers)
@@ -205,16 +189,39 @@ impl<'a, T: RecordParser<'a> + Send> RecordReaderBuilder<'a, T> {
205
189
  .quote(self.quote_char)
206
190
  .flexible(flexible)
207
191
  .trim(self.trim)
208
- .from_reader(readable);
192
+ .from_reader(reader);
209
193
 
210
- let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
194
+ let headers = RecordReader::<T>::get_headers(&self.ruby, &mut reader, self.has_headers)?;
211
195
  let static_headers = StringCache::intern_many(&headers)?;
212
196
 
213
- Ok(RecordReader::new_single_threaded(
197
+ // We intern both of these to get static string references we can reuse throughout the parser.
198
+ let flexible_default = self
199
+ .flexible_default
200
+ .map(|s| {
201
+ RString::new(&s)
202
+ .to_interned_str()
203
+ .as_str()
204
+ .map_err(|e| ReaderError::InvalidFlexibleDefault(format!("{:?}", e)))
205
+ })
206
+ .transpose()?
207
+ .map(|s| Cow::Borrowed(s));
208
+
209
+ let null_string = self
210
+ .null_string
211
+ .map(|s| {
212
+ RString::new(&s)
213
+ .to_interned_str()
214
+ .as_str()
215
+ .map_err(|e| ReaderError::InvalidNullString(format!("{:?}", e)))
216
+ })
217
+ .transpose()?
218
+ .map(|s| Cow::Borrowed(s));
219
+
220
+ Ok(RecordReader::new(
214
221
  reader,
215
222
  static_headers,
216
- self.null_string,
217
- self.flexible_default,
223
+ null_string,
224
+ flexible_default,
218
225
  ))
219
226
  }
220
227
  }
@@ -1,4 +1,3 @@
1
- use magnus::{r_string::FString, value::Opaque, IntoValue, RString, Ruby, Value};
2
1
  /// This module exists to avoid cloning header keys in returned HashMaps.
3
2
  /// Since the underlying RString creation already involves cloning,
4
3
  /// this caching layer aims to reduce redundant allocations.
@@ -7,8 +6,14 @@ use magnus::{r_string::FString, value::Opaque, IntoValue, RString, Ruby, Value};
7
6
  /// so this optimization could be removed if any issues arise.
8
7
  use std::{
9
8
  collections::HashMap,
10
- sync::{atomic::AtomicU32, atomic::Ordering, LazyLock, Mutex},
9
+ sync::{
10
+ atomic::{AtomicU32, Ordering},
11
+ LazyLock, Mutex, OnceLock,
12
+ },
11
13
  };
14
+
15
+ use magnus::{r_string::FString, value::Opaque, IntoValue, RString, Ruby, Value};
16
+
12
17
  use thiserror::Error;
13
18
 
14
19
  #[derive(Debug, Error)]
@@ -132,3 +137,24 @@ impl StringCache {
132
137
  Ok(())
133
138
  }
134
139
  }
140
+
141
+ pub struct HeaderCacheCleanupIter<I> {
142
+ pub inner: I,
143
+ pub headers: OnceLock<Vec<StringCacheKey>>,
144
+ }
145
+
146
+ impl<I: Iterator> Iterator for HeaderCacheCleanupIter<I> {
147
+ type Item = I::Item;
148
+
149
+ fn next(&mut self) -> Option<Self::Item> {
150
+ self.inner.next()
151
+ }
152
+ }
153
+
154
+ impl<I> Drop for HeaderCacheCleanupIter<I> {
155
+ fn drop(&mut self) {
156
+ if let Some(headers) = self.headers.get() {
157
+ StringCache::clear(&headers).unwrap();
158
+ }
159
+ }
160
+ }
@@ -7,8 +7,7 @@ mod ruby_integration;
7
7
  mod ruby_reader;
8
8
 
9
9
  pub use builder::RecordReaderBuilder;
10
- pub(crate) use builder::BUFFER_CHANNEL_SIZE;
11
10
  pub use header_cache::StringCacheKey;
12
- pub use record::CowValue;
11
+ pub use record::CowStr;
13
12
  pub use record::CsvRecord;
14
13
  pub use ruby_integration::*;
@@ -3,21 +3,21 @@ use std::collections::HashMap;
3
3
  use std::hash::BuildHasher;
4
4
 
5
5
  use super::header_cache::StringCacheKey;
6
- use super::CowValue;
6
+ use super::CowStr;
7
7
 
8
8
  pub trait RecordParser<'a> {
9
- type Output: 'a;
9
+ type Output;
10
10
 
11
11
  fn parse(
12
12
  headers: &[StringCacheKey],
13
13
  record: &csv::StringRecord,
14
- null_string: Option<&str>,
14
+ null_string: Option<Cow<'a, str>>,
15
15
  flexible_default: Option<Cow<'a, str>>,
16
16
  ) -> Self::Output;
17
17
  }
18
18
 
19
- impl<'a, S: BuildHasher + Default + 'a> RecordParser<'a>
20
- for HashMap<StringCacheKey, Option<CowValue<'a>>, S>
19
+ impl<'a, S: BuildHasher + Default> RecordParser<'a>
20
+ for HashMap<StringCacheKey, Option<CowStr<'a>>, S>
21
21
  {
22
22
  type Output = Self;
23
23
 
@@ -25,23 +25,23 @@ impl<'a, S: BuildHasher + Default + 'a> RecordParser<'a>
25
25
  fn parse(
26
26
  headers: &[StringCacheKey],
27
27
  record: &csv::StringRecord,
28
- null_string: Option<&str>,
28
+ null_string: Option<Cow<'a, str>>,
29
29
  flexible_default: Option<Cow<'a, str>>,
30
30
  ) -> Self::Output {
31
31
  let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
32
32
 
33
33
  let shared_empty = Cow::Borrowed("");
34
- let shared_default = flexible_default.map(CowValue);
34
+ let shared_default = flexible_default.map(CowStr);
35
35
  headers.iter().enumerate().for_each(|(i, ref header)| {
36
36
  let value = record.get(i).map_or_else(
37
37
  || shared_default.clone(),
38
38
  |field| {
39
- if null_string == Some(field) {
39
+ if null_string.as_deref() == Some(field) {
40
40
  None
41
41
  } else if field.is_empty() {
42
- Some(CowValue(shared_empty.clone()))
42
+ Some(CowStr(shared_empty.clone()))
43
43
  } else {
44
- Some(CowValue(Cow::Owned(field.to_string())))
44
+ Some(CowStr(Cow::Owned(field.to_string())))
45
45
  }
46
46
  },
47
47
  );
@@ -51,29 +51,29 @@ impl<'a, S: BuildHasher + Default + 'a> RecordParser<'a>
51
51
  }
52
52
  }
53
53
 
54
- impl<'a> RecordParser<'a> for Vec<Option<CowValue<'a>>> {
54
+ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
55
55
  type Output = Self;
56
56
 
57
57
  #[inline]
58
58
  fn parse(
59
59
  headers: &[StringCacheKey],
60
60
  record: &csv::StringRecord,
61
- null_string: Option<&str>,
61
+ null_string: Option<Cow<'a, str>>,
62
62
  flexible_default: Option<Cow<'a, str>>,
63
63
  ) -> Self::Output {
64
64
  let target_len = headers.len();
65
65
  let mut vec = Vec::with_capacity(target_len);
66
66
 
67
67
  let shared_empty = Cow::Borrowed("");
68
- let shared_default = flexible_default.map(CowValue);
68
+ let shared_default = flexible_default.map(CowStr);
69
69
 
70
70
  for field in record.iter() {
71
- let value = if Some(field) == null_string {
71
+ let value = if Some(field) == null_string.as_deref() {
72
72
  None
73
73
  } else if field.is_empty() {
74
- Some(CowValue(shared_empty.clone()))
74
+ Some(CowStr(shared_empty.clone()))
75
75
  } else {
76
- Some(CowValue(Cow::Owned(field.to_string())))
76
+ Some(CowStr(Cow::Owned(field.to_string())))
77
77
  };
78
78
  vec.push(value);
79
79
  }
@@ -86,67 +86,3 @@ impl<'a> RecordParser<'a> for Vec<Option<CowValue<'a>>> {
86
86
  vec
87
87
  }
88
88
  }
89
-
90
- // impl<'a, S: BuildHasher + Default + 'a> RecordParser<'a>
91
- // for HashMap<&'static str, Option<String>, S>
92
- // {
93
- // type Output = Self;
94
-
95
- // #[inline]
96
- // fn parse(
97
- // headers: &[&'static str],
98
- // record: &csv::StringRecord,
99
- // null_string: Option<&str>,
100
- // flexible_default: Option<Cow<'a, str>>,
101
- // ) -> Self::Output {
102
- // let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
103
- // headers.iter().enumerate().for_each(|(i, &header)| {
104
- // let value = record.get(i).map_or_else(
105
- // || flexible_default.clone(),
106
- // |field| {
107
- // if null_string == Some(field) {
108
- // None
109
- // } else if field.is_empty() {
110
- // Some(String::new())
111
- // } else {
112
- // Some(field.into())
113
- // }
114
- // },
115
- // );
116
- // map.insert(header, value);
117
- // });
118
- // map
119
- // }
120
- // }
121
-
122
- // impl<'a> RecordParser<'a> for Vec<Option<String>> {
123
- // type Output = Self;
124
-
125
- // #[inline]
126
- // fn parse(
127
- // headers: &[&'static str],
128
- // record: &csv::StringRecord,
129
- // null_string: Option<&str>,
130
- // flexible_default: Option<Cow<'a, str>>,
131
- // ) -> Self::Output {
132
- // let target_len = headers.len();
133
- // let mut vec = Vec::with_capacity(target_len);
134
- // for field in record.iter() {
135
- // let value = if Some(field) == null_string {
136
- // None
137
- // } else if field.is_empty() {
138
- // Some(String::new())
139
- // } else {
140
- // Some(field.into())
141
- // };
142
- // vec.push(value);
143
- // }
144
-
145
- // if vec.len() < target_len {
146
- // if let Some(default) = flexible_default {
147
- // vec.resize_with(target_len, || Some(default.to_string()));
148
- // }
149
- // }
150
- // vec
151
- // }
152
- // }
@@ -6,8 +6,8 @@ use super::StringCacheKey;
6
6
 
7
7
  #[derive(Debug)]
8
8
  pub enum CsvRecord<'a, S: BuildHasher + Default> {
9
- Vec(Vec<Option<CowValue<'a>>>),
10
- Map(HashMap<StringCacheKey, Option<CowValue<'a>>, S>),
9
+ Vec(Vec<Option<CowStr<'a>>>),
10
+ Map(HashMap<StringCacheKey, Option<CowStr<'a>>, S>),
11
11
  }
12
12
 
13
13
  impl<S: BuildHasher + Default> IntoValue for CsvRecord<'_, S> {
@@ -46,9 +46,9 @@ impl<S: BuildHasher + Default> IntoValue for CsvRecord<'_, S> {
46
46
  }
47
47
 
48
48
  #[derive(Debug, Clone)]
49
- pub struct CowValue<'a>(pub Cow<'a, str>);
49
+ pub struct CowStr<'a>(pub Cow<'a, str>);
50
50
 
51
- impl IntoValue for CowValue<'_> {
51
+ impl IntoValue for CowStr<'_> {
52
52
  fn into_value_with(self, handle: &Ruby) -> Value {
53
53
  self.0.into_value_with(handle)
54
54
  }
@@ -2,32 +2,34 @@ use super::header_cache::StringCacheKey;
2
2
  use super::parser::RecordParser;
3
3
  use super::{header_cache::StringCache, ruby_reader::SeekableRead};
4
4
  use magnus::{Error, Ruby};
5
- use std::io::BufReader;
6
- use std::{borrow::Cow, io::Read, thread};
5
+ use std::borrow::Cow;
6
+ use std::io::{BufReader, Read};
7
7
 
8
+ /// Size of the internal buffer used for reading CSV records
8
9
  pub(crate) const READ_BUFFER_SIZE: usize = 16384;
9
10
 
11
+ /// A reader that processes CSV records using a specified parser.
12
+ ///
13
+ /// This struct implements Iterator to provide a streaming interface for CSV records.
10
14
  pub struct RecordReader<'a, T: RecordParser<'a>> {
11
- inner: ReaderImpl<'a, T>,
12
- }
13
-
14
- #[allow(clippy::large_enum_variant)]
15
- enum ReaderImpl<'a, T: RecordParser<'a>> {
16
- SingleThreaded {
17
- reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
18
- headers: Vec<StringCacheKey>,
19
- null_string: Option<String>,
20
- flexible_default: Option<Cow<'a, str>>,
21
- string_record: csv::StringRecord,
22
- },
23
- MultiThreaded {
24
- headers: Vec<StringCacheKey>,
25
- receiver: kanal::Receiver<T::Output>,
26
- handle: Option<thread::JoinHandle<()>>,
27
- },
15
+ reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
16
+ headers: Vec<StringCacheKey>,
17
+ null_string: Option<Cow<'a, str>>,
18
+ flexible_default: Option<Cow<'a, str>>,
19
+ string_record: csv::StringRecord,
20
+ parser: std::marker::PhantomData<T>,
28
21
  }
29
22
 
30
23
  impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
24
+ /// Reads and processes headers from a CSV reader.
25
+ ///
26
+ /// # Arguments
27
+ /// * `ruby` - Ruby VM context for error handling
28
+ /// * `reader` - CSV reader instance
29
+ /// * `has_headers` - Whether the CSV file contains headers
30
+ ///
31
+ /// # Returns
32
+ /// A vector of header strings or generated column names if `has_headers` is false
31
33
  #[inline]
32
34
  pub(crate) fn get_headers(
33
35
  ruby: &Ruby,
@@ -41,67 +43,41 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
41
43
  )
42
44
  })?;
43
45
 
44
- let mut headers = Vec::with_capacity(first_row.len());
45
- if has_headers {
46
- headers.extend(first_row.iter().map(String::from));
46
+ Ok(if has_headers {
47
+ first_row.iter().map(String::from).collect()
47
48
  } else {
48
- headers.extend((0..first_row.len()).map(|i| format!("c{i}")));
49
- }
50
- Ok(headers)
49
+ (0..first_row.len()).map(|i| format!("c{i}")).collect()
50
+ })
51
51
  }
52
52
 
53
- pub(crate) fn new_single_threaded(
53
+ /// Creates a new RecordReader instance.
54
+ pub(crate) fn new(
54
55
  reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
55
56
  headers: Vec<StringCacheKey>,
56
- null_string: Option<String>,
57
- flexible_default: Option<&'a str>,
57
+ null_string: Option<Cow<'a, str>>,
58
+ flexible_default: Option<Cow<'a, str>>,
58
59
  ) -> Self {
59
60
  let headers_len = headers.len();
60
61
  Self {
61
- inner: ReaderImpl::SingleThreaded {
62
- reader,
63
- headers,
64
- null_string,
65
- flexible_default: flexible_default.map(Cow::Borrowed),
66
- string_record: csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_len),
67
- },
62
+ reader,
63
+ headers,
64
+ null_string,
65
+ flexible_default,
66
+ string_record: csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_len),
67
+ parser: std::marker::PhantomData,
68
68
  }
69
69
  }
70
- }
71
-
72
- impl<T: RecordParser<'static> + Send> RecordReader<'static, T> {
73
- pub(crate) fn new_multi_threaded(
74
- mut reader: csv::Reader<Box<dyn Read + Send + 'static>>,
75
- headers: Vec<StringCacheKey>,
76
- buffer_size: usize,
77
- null_string: Option<String>,
78
- flexible_default: Option<&'static str>,
79
- ) -> Self {
80
- let (sender, receiver) = kanal::bounded(buffer_size);
81
- let headers_for_thread = headers.clone();
82
70
 
83
- let handle = thread::spawn(move || {
84
- let mut record =
85
- csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_for_thread.len());
86
- while let Ok(true) = reader.read_record(&mut record) {
87
- let row = T::parse(
88
- &headers_for_thread,
89
- &record,
90
- null_string.as_deref(),
91
- flexible_default.map(Cow::Borrowed),
92
- );
93
- if sender.send(row).is_err() {
94
- break;
95
- }
96
- }
97
- });
98
-
99
- Self {
100
- inner: ReaderImpl::MultiThreaded {
101
- headers,
102
- receiver,
103
- handle: Some(handle),
104
- },
71
+ /// Attempts to read the next record, returning any errors encountered.
72
+ fn try_next(&mut self) -> csv::Result<Option<T::Output>> {
73
+ match self.reader.read_record(&mut self.string_record)? {
74
+ true => Ok(Some(T::parse(
75
+ &self.headers,
76
+ &self.string_record,
77
+ self.null_string.clone(),
78
+ self.flexible_default.clone(),
79
+ ))),
80
+ false => Ok(None),
105
81
  }
106
82
  }
107
83
  }
@@ -111,63 +87,21 @@ impl<'a, T: RecordParser<'a>> Iterator for RecordReader<'a, T> {
111
87
 
112
88
  #[inline]
113
89
  fn next(&mut self) -> Option<Self::Item> {
114
- match &mut self.inner {
115
- ReaderImpl::MultiThreaded {
116
- receiver, handle, ..
117
- } => match receiver.recv() {
118
- Ok(record) => Some(record),
119
- Err(_) => {
120
- if let Some(handle) = handle.take() {
121
- let _ = handle.join();
122
- }
123
- None
124
- }
125
- },
126
- ReaderImpl::SingleThreaded {
127
- reader,
128
- headers,
129
- null_string,
130
- flexible_default,
131
- ref mut string_record,
132
- } => match reader.read_record(string_record) {
133
- Ok(true) => Some(T::parse(
134
- headers,
135
- string_record,
136
- null_string.as_deref(),
137
- flexible_default.clone(),
138
- )),
139
- Ok(false) => None,
140
- Err(_e) => None,
141
- },
142
- }
90
+ // Note: We intentionally swallow errors here to maintain Iterator contract.
91
+ // Errors can be handled by using try_next() directly if needed.
92
+ self.try_next().ok().flatten()
143
93
  }
144
94
 
145
95
  #[inline]
146
96
  fn size_hint(&self) -> (usize, Option<usize>) {
147
- // We can't know the exact size without reading the whole file
148
- (0, None)
97
+ (0, None) // Cannot determine size without reading entire file
149
98
  }
150
99
  }
151
100
 
152
101
  impl<'a, T: RecordParser<'a>> Drop for RecordReader<'a, T> {
153
102
  #[inline]
154
103
  fn drop(&mut self) {
155
- match &mut self.inner {
156
- ReaderImpl::MultiThreaded {
157
- receiver,
158
- handle,
159
- headers,
160
- ..
161
- } => {
162
- receiver.close();
163
- if let Some(handle) = handle.take() {
164
- let _ = handle.join();
165
- }
166
- let _ = StringCache::clear(&headers);
167
- }
168
- ReaderImpl::SingleThreaded { headers, .. } => {
169
- let _ = StringCache::clear(&headers);
170
- }
171
- }
104
+ // Intentionally ignore errors during cleanup as there's no meaningful way to handle them
105
+ let _ = StringCache::clear(&self.headers);
172
106
  }
173
107
  }
@@ -1,30 +1,19 @@
1
- use std::{fs::File, io, mem::ManuallyDrop};
1
+ use std::{
2
+ fs::File,
3
+ io::{self, Read, Seek, SeekFrom},
4
+ mem::ManuallyDrop,
5
+ };
2
6
 
3
7
  pub struct ForgottenFileHandle(pub ManuallyDrop<File>);
4
8
 
5
- impl std::io::Read for ForgottenFileHandle {
9
+ impl Read for ForgottenFileHandle {
6
10
  fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
7
11
  self.0.read(buf)
8
12
  }
13
+ }
9
14
 
10
- fn read_vectored(&mut self, bufs: &mut [std::io::IoSliceMut<'_>]) -> io::Result<usize> {
11
- self.0.read_vectored(bufs)
12
- }
13
-
14
- // fn read_buf(&mut self, cursor: BorrowedCursor<'_>) -> io::Result<()> {
15
- // self.0.read_buf(cursor)
16
- // }
17
-
18
- // #[inline]
19
- // fn is_read_vectored(&self) -> bool {
20
- // self.0.is_read_vectored()
21
- // }
22
-
23
- fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
24
- self.0.read_to_end(buf)
25
- }
26
-
27
- fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
28
- self.0.read_to_string(buf)
15
+ impl Seek for ForgottenFileHandle {
16
+ fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
17
+ self.0.seek(pos)
29
18
  }
30
19
  }
@@ -2,9 +2,12 @@ use magnus::{
2
2
  value::{Opaque, ReprValue},
3
3
  RClass, RString, Ruby, Value,
4
4
  };
5
- use std::io::{self, Read, Seek, SeekFrom, Write};
5
+ use std::fs::File;
6
+ use std::io::{self, BufReader, Read, Seek, SeekFrom, Write};
6
7
  use std::sync::OnceLock;
7
8
 
9
+ use super::ForgottenFileHandle;
10
+
8
11
  static STRING_IO_CLASS: OnceLock<Opaque<RClass>> = OnceLock::new();
9
12
 
10
13
  /// A reader that can handle various Ruby input types (String, StringIO, IO-like objects)
@@ -17,6 +20,10 @@ pub struct RubyReader<T> {
17
20
  pub trait SeekableRead: std::io::Read + Seek {}
18
21
  impl SeekableRead for RubyReader<Value> {}
19
22
  impl SeekableRead for RubyReader<RString> {}
23
+ impl SeekableRead for File {}
24
+ impl<T: Read + Seek> SeekableRead for BufReader<T> {}
25
+ impl SeekableRead for std::io::Cursor<Vec<u8>> {}
26
+ impl SeekableRead for ForgottenFileHandle {}
20
27
 
21
28
  pub fn build_ruby_reader(
22
29
  ruby: &Ruby,
@@ -1,4 +1,4 @@
1
- use crate::csv::{CowValue, CsvRecord, RecordReaderBuilder, StringCacheKey};
1
+ use crate::csv::{CowStr, CsvRecord, RecordReaderBuilder, StringCacheKey};
2
2
  use crate::utils::*;
3
3
  use ahash::RandomState;
4
4
  use csv::Trim;
@@ -6,12 +6,49 @@ use magnus::value::ReprValue;
6
6
  use magnus::{block::Yield, Error, KwArgs, RHash, Ruby, Symbol, Value};
7
7
  use std::collections::HashMap;
8
8
 
9
+ /// Valid result types for CSV parsing
10
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
11
+ enum ResultType {
12
+ Hash,
13
+ Array,
14
+ }
15
+
16
+ impl ResultType {
17
+ fn from_str(s: &str) -> Option<Self> {
18
+ match s {
19
+ "hash" => Some(Self::Hash),
20
+ "array" => Some(Self::Array),
21
+ _ => None,
22
+ }
23
+ }
24
+ }
25
+
26
+ /// Arguments for creating an enumerator
27
+ #[derive(Debug)]
28
+ struct EnumeratorArgs {
29
+ rb_self: Value,
30
+ to_read: Value,
31
+ has_headers: bool,
32
+ delimiter: u8,
33
+ quote_char: u8,
34
+ null_string: Option<String>,
35
+ result_type: String,
36
+ flexible: bool,
37
+ flexible_default: Option<String>,
38
+ trim: Option<String>,
39
+ }
40
+
41
+ /// Parses a CSV file with the given configuration.
42
+ ///
43
+ /// # Safety
44
+ /// This function uses unsafe code to get the Ruby runtime and leak memory for static references.
45
+ /// This is necessary for Ruby integration but should be used with caution.
9
46
  pub fn parse_csv(
10
47
  rb_self: Value,
11
48
  args: &[Value],
12
49
  ) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
13
- let original = unsafe { Ruby::get_unchecked() };
14
- let ruby: &'static Ruby = Box::leak(Box::new(original));
50
+ // SAFETY: We're in a Ruby callback, so Ruby runtime is guaranteed to be initialized
51
+ let ruby = unsafe { Ruby::get_unchecked() };
15
52
 
16
53
  let ReadCsvArgs {
17
54
  to_read,
@@ -19,16 +56,11 @@ pub fn parse_csv(
19
56
  delimiter,
20
57
  quote_char,
21
58
  null_string,
22
- buffer_size,
23
59
  result_type,
24
60
  flexible,
25
61
  flexible_default,
26
62
  trim,
27
- } = parse_read_csv_args(ruby, args)?;
28
-
29
- let flexible_default: &'static Option<String> = Box::leak(Box::new(flexible_default));
30
- let leaked_flexible_default: &'static Option<&str> =
31
- Box::leak(Box::new(flexible_default.as_deref()));
63
+ } = parse_read_csv_args(&ruby, args)?;
32
64
 
33
65
  if !ruby.block_given() {
34
66
  return create_enumerator(EnumeratorArgs {
@@ -38,10 +70,9 @@ pub fn parse_csv(
38
70
  delimiter,
39
71
  quote_char,
40
72
  null_string,
41
- buffer_size,
42
- result_type,
73
+ result_type: result_type,
43
74
  flexible,
44
- flexible_default: leaked_flexible_default.as_deref(),
75
+ flexible_default: flexible_default,
45
76
  trim: match trim {
46
77
  Trim::All => Some("all".to_string()),
47
78
  Trim::Headers => Some("headers".to_string()),
@@ -51,60 +82,47 @@ pub fn parse_csv(
51
82
  });
52
83
  }
53
84
 
54
- let iter: Box<dyn Iterator<Item = CsvRecord<RandomState>>> = match result_type.as_str() {
55
- "hash" => {
85
+ let result_type = ResultType::from_str(&result_type).ok_or_else(|| {
86
+ Error::new(
87
+ ruby.exception_runtime_error(),
88
+ "Invalid result type, expected 'hash' or 'array'",
89
+ )
90
+ })?;
91
+
92
+ let iter: Box<dyn Iterator<Item = CsvRecord<RandomState>>> = match result_type {
93
+ ResultType::Hash => {
56
94
  let builder = RecordReaderBuilder::<
57
- HashMap<StringCacheKey, Option<CowValue<'static>>, RandomState>,
95
+ HashMap<StringCacheKey, Option<CowStr<'static>>, RandomState>,
58
96
  >::new(ruby, to_read)
59
97
  .has_headers(has_headers)
60
98
  .flexible(flexible)
61
- .flexible_default(flexible_default.as_deref())
99
+ .flexible_default(flexible_default)
62
100
  .trim(trim)
63
101
  .delimiter(delimiter)
64
102
  .quote_char(quote_char)
65
- .null_string(null_string)
66
- .buffer(buffer_size);
103
+ .null_string(null_string);
67
104
 
68
- Box::new(builder.build_threaded()?.map(CsvRecord::Map))
105
+ Box::new(builder.build()?.map(CsvRecord::Map))
69
106
  }
70
- "array" => Box::new(
71
- RecordReaderBuilder::<Vec<Option<CowValue<'static>>>>::new(ruby, to_read)
107
+ ResultType::Array => {
108
+ let builder = RecordReaderBuilder::<Vec<Option<CowStr<'static>>>>::new(ruby, to_read)
72
109
  .has_headers(has_headers)
73
110
  .flexible(flexible)
74
- .flexible_default(flexible_default.as_deref())
111
+ .flexible_default(flexible_default)
75
112
  .trim(trim)
76
113
  .delimiter(delimiter)
77
114
  .quote_char(quote_char)
78
115
  .null_string(null_string)
79
- .buffer(buffer_size)
80
- .build_threaded()?
81
- .map(CsvRecord::Vec),
82
- ),
83
- _ => {
84
- return Err(Error::new(
85
- ruby.exception_runtime_error(),
86
- "Invalid result type",
87
- ))
116
+ .build()?;
117
+
118
+ Box::new(builder.map(CsvRecord::Vec))
88
119
  }
89
120
  };
90
121
 
91
122
  Ok(Yield::Iter(iter))
92
123
  }
93
124
 
94
- struct EnumeratorArgs {
95
- rb_self: Value,
96
- to_read: Value,
97
- has_headers: bool,
98
- delimiter: u8,
99
- quote_char: u8,
100
- null_string: Option<String>,
101
- buffer_size: usize,
102
- result_type: String,
103
- flexible: bool,
104
- flexible_default: Option<&'static str>,
105
- trim: Option<String>,
106
- }
107
-
125
+ /// Creates an enumerator for lazy CSV parsing
108
126
  fn create_enumerator(
109
127
  args: EnumeratorArgs,
110
128
  ) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
@@ -119,11 +137,11 @@ fn create_enumerator(
119
137
  String::from_utf8(vec![args.quote_char]).unwrap(),
120
138
  )?;
121
139
  kwargs.aset(Symbol::new("nil_string"), args.null_string)?;
122
- kwargs.aset(Symbol::new("buffer_size"), args.buffer_size)?;
123
140
  kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
124
141
  kwargs.aset(Symbol::new("flexible"), args.flexible)?;
125
142
  kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
126
143
  kwargs.aset(Symbol::new("trim"), args.trim.map(Symbol::new))?;
144
+
127
145
  let enumerator = args
128
146
  .rb_self
129
147
  .enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
data/ext/osv/src/utils.rs CHANGED
@@ -4,8 +4,6 @@ use magnus::{
4
4
  Error, RString, Ruby, Symbol, Value,
5
5
  };
6
6
 
7
- use crate::csv::BUFFER_CHANNEL_SIZE;
8
-
9
7
  fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String>, Error> {
10
8
  if value.is_nil() {
11
9
  Ok(None)
@@ -34,7 +32,6 @@ pub struct ReadCsvArgs {
34
32
  pub delimiter: u8,
35
33
  pub quote_char: u8,
36
34
  pub null_string: Option<String>,
37
- pub buffer_size: usize,
38
35
  pub result_type: String,
39
36
  pub flexible: bool,
40
37
  pub flexible_default: Option<String>,
@@ -50,15 +47,14 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
50
47
  _,
51
48
  (),
52
49
  (
53
- Option<bool>,
54
- Option<String>,
55
- Option<String>,
50
+ Option<Option<bool>>,
51
+ Option<Option<String>>,
56
52
  Option<Option<String>>,
57
- Option<usize>,
58
- Option<Value>,
59
- Option<bool>,
60
53
  Option<Option<String>>,
61
- Option<Value>,
54
+ Option<Option<Value>>,
55
+ Option<Option<bool>>,
56
+ Option<Option<Option<String>>>,
57
+ Option<Option<Value>>,
62
58
  ),
63
59
  (),
64
60
  >(
@@ -69,7 +65,6 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
69
65
  "col_sep",
70
66
  "quote_char",
71
67
  "nil_string",
72
- "buffer_size",
73
68
  "result_type",
74
69
  "flexible",
75
70
  "flexible_default",
@@ -77,11 +72,12 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
77
72
  ],
78
73
  )?;
79
74
 
80
- let has_headers = kwargs.optional.0.unwrap_or(true);
75
+ let has_headers = kwargs.optional.0.flatten().unwrap_or(true);
81
76
 
82
77
  let delimiter = *kwargs
83
78
  .optional
84
79
  .1
80
+ .flatten()
85
81
  .unwrap_or_else(|| ",".to_string())
86
82
  .as_bytes()
87
83
  .first()
@@ -95,6 +91,7 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
95
91
  let quote_char = *kwargs
96
92
  .optional
97
93
  .2
94
+ .flatten()
98
95
  .unwrap_or_else(|| "\"".to_string())
99
96
  .as_bytes()
100
97
  .first()
@@ -107,11 +104,10 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
107
104
 
108
105
  let null_string = kwargs.optional.3.unwrap_or_default();
109
106
 
110
- let buffer_size = kwargs.optional.4.unwrap_or(BUFFER_CHANNEL_SIZE);
111
-
112
107
  let result_type = match kwargs
113
108
  .optional
114
- .5
109
+ .4
110
+ .flatten()
115
111
  .map(|value| parse_string_or_symbol(ruby, value))
116
112
  {
117
113
  Some(Ok(Some(parsed))) => match parsed.as_str() {
@@ -133,13 +129,14 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
133
129
  None => String::from("hash"),
134
130
  };
135
131
 
136
- let flexible = kwargs.optional.6.unwrap_or_default();
132
+ let flexible = kwargs.optional.5.flatten().unwrap_or_default();
137
133
 
138
- let flexible_default = kwargs.optional.7.unwrap_or_default();
134
+ let flexible_default = kwargs.optional.6.flatten().unwrap_or_default();
139
135
 
140
136
  let trim = match kwargs
141
137
  .optional
142
- .8
138
+ .7
139
+ .flatten()
143
140
  .map(|value| parse_string_or_symbol(ruby, value))
144
141
  {
145
142
  Some(Ok(Some(parsed))) => match parsed.as_str() {
@@ -172,7 +169,6 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
172
169
  delimiter,
173
170
  quote_char,
174
171
  null_string,
175
- buffer_size,
176
172
  result_type,
177
173
  flexible,
178
174
  flexible_default,
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.3.16"
2
+ VERSION = "0.3.18"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.16
4
+ version: 0.3.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-03 00:00:00.000000000 Z
11
+ date: 2025-01-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys