osv 0.3.13 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +111 -5
- data/Gemfile +1 -1
- data/README.md +39 -81
- data/Rakefile +6 -8
- data/ext/osv/Cargo.toml +7 -1
- data/ext/osv/src/allocator.rs +13 -0
- data/ext/osv/src/csv/builder.rs +65 -176
- data/ext/osv/src/csv/mod.rs +5 -3
- data/ext/osv/src/csv/parser.rs +90 -14
- data/ext/osv/src/csv/record.rs +19 -6
- data/ext/osv/src/csv/record_reader.rs +172 -0
- data/ext/osv/src/csv/ruby_integration.rs +30 -0
- data/ext/osv/src/csv/ruby_reader.rs +174 -0
- data/ext/osv/src/lib.rs +1 -0
- data/ext/osv/src/reader.rs +27 -22
- data/ext/osv/src/utils.rs +5 -5
- data/lib/osv/version.rb +1 -1
- metadata +13 -15
- data/ext/osv/src/csv/read_impl.rs +0 -75
- data/ext/osv/src/csv/reader.rs +0 -57
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -1,19 +1,20 @@
|
|
1
1
|
use super::{
|
2
2
|
header_cache::{CacheError, StringCache},
|
3
3
|
parser::RecordParser,
|
4
|
-
|
5
|
-
|
6
|
-
|
4
|
+
record_reader::{RecordReader, READ_BUFFER_SIZE},
|
5
|
+
ruby_reader::{build_ruby_reader, SeekableRead},
|
6
|
+
ForgottenFileHandle,
|
7
7
|
};
|
8
8
|
use flate2::read::GzDecoder;
|
9
|
-
use magnus::{rb_sys::AsRawValue, value::ReprValue, Error as MagnusError,
|
9
|
+
use magnus::{rb_sys::AsRawValue, value::ReprValue, Error as MagnusError, Ruby, Value};
|
10
10
|
use std::{
|
11
11
|
fs::File,
|
12
12
|
io::{self, BufReader, Read},
|
13
13
|
marker::PhantomData,
|
14
|
+
mem::ManuallyDrop,
|
14
15
|
os::fd::FromRawFd,
|
15
|
-
thread,
|
16
16
|
};
|
17
|
+
|
17
18
|
use thiserror::Error;
|
18
19
|
|
19
20
|
pub(crate) static BUFFER_CHANNEL_SIZE: usize = 1024;
|
@@ -28,8 +29,6 @@ pub enum ReaderError {
|
|
28
29
|
FileOpen(#[from] io::Error),
|
29
30
|
#[error("Failed to intern headers: {0}")]
|
30
31
|
HeaderIntern(#[from] CacheError),
|
31
|
-
#[error("Unsupported GzipReader")]
|
32
|
-
UnsupportedGzipReader,
|
33
32
|
#[error("Ruby error: {0}")]
|
34
33
|
Ruby(String),
|
35
34
|
}
|
@@ -49,7 +48,7 @@ impl From<ReaderError> for MagnusError {
|
|
49
48
|
}
|
50
49
|
}
|
51
50
|
|
52
|
-
pub struct RecordReaderBuilder<'a, T: RecordParser + Send
|
51
|
+
pub struct RecordReaderBuilder<'a, T: RecordParser<'a> + Send> {
|
53
52
|
ruby: &'a Ruby,
|
54
53
|
to_read: Value,
|
55
54
|
has_headers: bool,
|
@@ -58,12 +57,53 @@ pub struct RecordReaderBuilder<'a, T: RecordParser + Send + 'static> {
|
|
58
57
|
null_string: Option<String>,
|
59
58
|
buffer: usize,
|
60
59
|
flexible: bool,
|
61
|
-
flexible_default: Option
|
60
|
+
flexible_default: Option<&'a str>,
|
62
61
|
trim: csv::Trim,
|
63
62
|
_phantom: PhantomData<T>,
|
64
63
|
}
|
65
64
|
|
66
|
-
impl<
|
65
|
+
impl<T: RecordParser<'static> + Send + 'static> RecordReaderBuilder<'static, T> {
|
66
|
+
fn build_multi_threaded(
|
67
|
+
self,
|
68
|
+
readable: Box<dyn Read + Send + 'static>,
|
69
|
+
) -> Result<RecordReader<'static, T>, ReaderError> {
|
70
|
+
let flexible = self.flexible || self.flexible_default.is_some();
|
71
|
+
let mut reader = csv::ReaderBuilder::new()
|
72
|
+
.has_headers(self.has_headers)
|
73
|
+
.delimiter(self.delimiter)
|
74
|
+
.quote(self.quote_char)
|
75
|
+
.flexible(flexible)
|
76
|
+
.trim(self.trim)
|
77
|
+
.from_reader(readable);
|
78
|
+
|
79
|
+
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
80
|
+
let static_headers = StringCache::intern_many(&headers)?;
|
81
|
+
|
82
|
+
Ok(RecordReader::new_multi_threaded(
|
83
|
+
reader,
|
84
|
+
static_headers,
|
85
|
+
self.buffer,
|
86
|
+
self.null_string,
|
87
|
+
self.flexible_default,
|
88
|
+
))
|
89
|
+
}
|
90
|
+
|
91
|
+
pub fn build_threaded(self) -> Result<RecordReader<'static, T>, ReaderError> {
|
92
|
+
if self.to_read.is_kind_of(self.ruby.class_io()) {
|
93
|
+
let readable = self.handle_file_descriptor()?;
|
94
|
+
self.build_multi_threaded(readable)
|
95
|
+
} else if self.to_read.is_kind_of(self.ruby.class_string()) {
|
96
|
+
let readable = self.handle_file_path()?;
|
97
|
+
self.build_multi_threaded(readable)
|
98
|
+
} else {
|
99
|
+
let readable = build_ruby_reader(self.ruby, self.to_read)?;
|
100
|
+
let buffered_reader = BufReader::with_capacity(READ_BUFFER_SIZE, readable);
|
101
|
+
self.build_single_threaded(buffered_reader)
|
102
|
+
}
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
impl<'a, T: RecordParser<'a> + Send> RecordReaderBuilder<'a, T> {
|
67
107
|
pub fn new(ruby: &'a Ruby, to_read: Value) -> Self {
|
68
108
|
Self {
|
69
109
|
ruby,
|
@@ -110,7 +150,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
110
150
|
self
|
111
151
|
}
|
112
152
|
|
113
|
-
pub fn flexible_default(mut self, flexible_default: Option
|
153
|
+
pub fn flexible_default(mut self, flexible_default: Option<&'a str>) -> Self {
|
114
154
|
self.flexible_default = flexible_default;
|
115
155
|
self
|
116
156
|
}
|
@@ -120,12 +160,6 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
120
160
|
self
|
121
161
|
}
|
122
162
|
|
123
|
-
fn handle_string_io(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
|
124
|
-
let string: RString = self.to_read.funcall("string", ())?;
|
125
|
-
let content = string.to_string()?;
|
126
|
-
Ok(Box::new(std::io::Cursor::new(content)))
|
127
|
-
}
|
128
|
-
|
129
163
|
fn handle_file_descriptor(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
|
130
164
|
let raw_value = self.to_read.as_raw();
|
131
165
|
let fd = std::panic::catch_unwind(|| unsafe { rb_sys::rb_io_descriptor(raw_value) })
|
@@ -138,7 +172,11 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
138
172
|
}
|
139
173
|
|
140
174
|
let file = unsafe { File::from_raw_fd(fd) };
|
141
|
-
|
175
|
+
let forgotten = ForgottenFileHandle(ManuallyDrop::new(file));
|
176
|
+
Ok(Box::new(BufReader::with_capacity(
|
177
|
+
READ_BUFFER_SIZE,
|
178
|
+
forgotten,
|
179
|
+
)))
|
142
180
|
}
|
143
181
|
|
144
182
|
fn handle_file_path(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
|
@@ -155,102 +193,12 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
155
193
|
})
|
156
194
|
}
|
157
195
|
|
158
|
-
fn get_reader(&self) -> Result<(Box<dyn Read + Send + 'static>, bool), ReaderError> {
|
159
|
-
let string_io: magnus::RClass = self.ruby.eval("StringIO")?;
|
160
|
-
let gzip_reader_class: magnus::RClass = self.ruby.eval("Zlib::GzipReader")?;
|
161
|
-
|
162
|
-
if self.to_read.is_kind_of(string_io) {
|
163
|
-
self.handle_string_io().map(|r| (r, false))
|
164
|
-
} else if self.to_read.is_kind_of(gzip_reader_class) {
|
165
|
-
Err(ReaderError::UnsupportedGzipReader)
|
166
|
-
} else if self.to_read.is_kind_of(self.ruby.class_io()) {
|
167
|
-
self.handle_file_descriptor().map(|r| (r, true))
|
168
|
-
} else {
|
169
|
-
self.handle_file_path().map(|r| (r, false))
|
170
|
-
}
|
171
|
-
}
|
172
|
-
|
173
|
-
fn get_single_threaded_reader(&self) -> Result<Box<dyn Read>, ReaderError> {
|
174
|
-
let string_io: magnus::RClass = self.ruby.eval("StringIO")?;
|
175
|
-
let gzip_reader_class: magnus::RClass = self.ruby.eval("Zlib::GzipReader")?;
|
176
|
-
|
177
|
-
if self.to_read.is_kind_of(string_io) {
|
178
|
-
self.handle_string_io().map(|r| -> Box<dyn Read> { r })
|
179
|
-
} else if self.to_read.is_kind_of(gzip_reader_class) {
|
180
|
-
Ok(Box::new(RubyReader::new(self.to_read)))
|
181
|
-
} else if self.to_read.is_kind_of(self.ruby.class_io()) {
|
182
|
-
self.handle_file_descriptor()
|
183
|
-
.map(|r| -> Box<dyn Read> { r })
|
184
|
-
} else {
|
185
|
-
self.handle_file_path().map(|r| -> Box<dyn Read> { r })
|
186
|
-
}
|
187
|
-
}
|
188
|
-
|
189
|
-
pub fn build(self) -> Result<RecordReader<T>, ReaderError> {
|
190
|
-
match self.get_reader() {
|
191
|
-
Ok((readable, should_forget)) => self.build_multi_threaded(readable, should_forget),
|
192
|
-
Err(_) => {
|
193
|
-
let readable = self.get_single_threaded_reader()?;
|
194
|
-
self.build_single_threaded(readable)
|
195
|
-
}
|
196
|
-
}
|
197
|
-
}
|
198
|
-
|
199
|
-
fn build_multi_threaded(
|
200
|
-
self,
|
201
|
-
readable: Box<dyn Read + Send + 'static>,
|
202
|
-
should_forget: bool,
|
203
|
-
) -> Result<RecordReader<T>, ReaderError> {
|
204
|
-
let flexible = self.flexible || self.flexible_default.is_some();
|
205
|
-
let mut reader = csv::ReaderBuilder::new()
|
206
|
-
.has_headers(self.has_headers)
|
207
|
-
.delimiter(self.delimiter)
|
208
|
-
.quote(self.quote_char)
|
209
|
-
.flexible(flexible)
|
210
|
-
.trim(self.trim)
|
211
|
-
.from_reader(readable);
|
212
|
-
|
213
|
-
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
214
|
-
let static_headers = StringCache::intern_many(&headers)?;
|
215
|
-
let headers_for_cleanup = static_headers.clone();
|
216
|
-
|
217
|
-
let (sender, receiver) = kanal::bounded(self.buffer);
|
218
|
-
let null_string = self.null_string.clone();
|
219
|
-
|
220
|
-
let flexible_default = self.flexible_default.clone();
|
221
|
-
let handle = thread::spawn(move || {
|
222
|
-
let mut record = csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers.len());
|
223
|
-
while let Ok(true) = reader.read_record(&mut record) {
|
224
|
-
let row = T::parse(
|
225
|
-
&static_headers,
|
226
|
-
&record,
|
227
|
-
null_string.as_deref(),
|
228
|
-
flexible_default.as_deref(),
|
229
|
-
);
|
230
|
-
if sender.send(row).is_err() {
|
231
|
-
break;
|
232
|
-
}
|
233
|
-
}
|
234
|
-
if should_forget {
|
235
|
-
let file_to_forget = reader.into_inner();
|
236
|
-
std::mem::forget(file_to_forget);
|
237
|
-
}
|
238
|
-
});
|
239
|
-
|
240
|
-
Ok(RecordReader {
|
241
|
-
reader: ReadImpl::MultiThreaded {
|
242
|
-
headers: headers_for_cleanup,
|
243
|
-
receiver,
|
244
|
-
handle: Some(handle),
|
245
|
-
},
|
246
|
-
})
|
247
|
-
}
|
248
|
-
|
249
196
|
fn build_single_threaded(
|
250
197
|
self,
|
251
|
-
readable: Box<dyn
|
252
|
-
) -> Result<RecordReader<T>, ReaderError> {
|
198
|
+
readable: BufReader<Box<dyn SeekableRead>>,
|
199
|
+
) -> Result<RecordReader<'a, T>, ReaderError> {
|
253
200
|
let flexible = self.flexible || self.flexible_default.is_some();
|
201
|
+
|
254
202
|
let mut reader = csv::ReaderBuilder::new()
|
255
203
|
.has_headers(self.has_headers)
|
256
204
|
.delimiter(self.delimiter)
|
@@ -262,70 +210,11 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
262
210
|
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
263
211
|
let static_headers = StringCache::intern_many(&headers)?;
|
264
212
|
|
265
|
-
Ok(RecordReader
|
266
|
-
reader
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
},
|
272
|
-
})
|
273
|
-
}
|
274
|
-
}
|
275
|
-
|
276
|
-
struct RubyReader {
|
277
|
-
inner: Value,
|
278
|
-
buffer: Option<Vec<u8>>,
|
279
|
-
offset: usize,
|
280
|
-
}
|
281
|
-
|
282
|
-
impl RubyReader {
|
283
|
-
fn new(inner: Value) -> Self {
|
284
|
-
Self {
|
285
|
-
inner,
|
286
|
-
buffer: None,
|
287
|
-
offset: 0,
|
288
|
-
}
|
289
|
-
}
|
290
|
-
}
|
291
|
-
|
292
|
-
// Read the entire inner into a vector and then read future reads from that vector with offset
|
293
|
-
impl Read for RubyReader {
|
294
|
-
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
295
|
-
// If we have an existing buffer, read from it
|
296
|
-
if let Some(buffer) = self.buffer.as_ref() {
|
297
|
-
let remaining = buffer.len() - self.offset;
|
298
|
-
let copy_size = remaining.min(buf.len());
|
299
|
-
buf[..copy_size].copy_from_slice(&buffer[self.offset..self.offset + copy_size]);
|
300
|
-
self.offset += copy_size;
|
301
|
-
return Ok(copy_size);
|
302
|
-
}
|
303
|
-
|
304
|
-
// No buffer yet - read the entire content from Ruby
|
305
|
-
let result = self.inner.funcall::<_, _, Value>("read", ());
|
306
|
-
match result {
|
307
|
-
Ok(data) => {
|
308
|
-
if data.is_nil() {
|
309
|
-
return Ok(0); // EOF
|
310
|
-
}
|
311
|
-
|
312
|
-
let string = RString::from_value(data).ok_or_else(|| {
|
313
|
-
io::Error::new(io::ErrorKind::Other, "Failed to convert to RString")
|
314
|
-
})?;
|
315
|
-
let bytes = unsafe { string.as_slice() };
|
316
|
-
|
317
|
-
// Store the entire content in the buffer
|
318
|
-
self.buffer = Some(bytes.to_vec());
|
319
|
-
self.offset = 0;
|
320
|
-
|
321
|
-
// Read initial chunk
|
322
|
-
let copy_size = bytes.len().min(buf.len());
|
323
|
-
buf[..copy_size].copy_from_slice(&bytes[..copy_size]);
|
324
|
-
self.offset = copy_size;
|
325
|
-
|
326
|
-
Ok(copy_size)
|
327
|
-
}
|
328
|
-
Err(e) => Err(io::Error::new(io::ErrorKind::Other, e.to_string())),
|
329
|
-
}
|
213
|
+
Ok(RecordReader::new_single_threaded(
|
214
|
+
reader,
|
215
|
+
static_headers,
|
216
|
+
self.null_string,
|
217
|
+
self.flexible_default,
|
218
|
+
))
|
330
219
|
}
|
331
220
|
}
|
data/ext/osv/src/csv/mod.rs
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
mod builder;
|
2
2
|
mod header_cache;
|
3
3
|
mod parser;
|
4
|
-
pub mod read_impl;
|
5
|
-
mod reader;
|
6
4
|
mod record;
|
5
|
+
mod record_reader;
|
6
|
+
mod ruby_integration;
|
7
|
+
mod ruby_reader;
|
7
8
|
|
8
9
|
pub use builder::RecordReaderBuilder;
|
9
10
|
pub(crate) use builder::BUFFER_CHANNEL_SIZE;
|
10
|
-
pub
|
11
|
+
pub use record::CowValue;
|
11
12
|
pub use record::CsvRecord;
|
13
|
+
pub use ruby_integration::*;
|
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -1,18 +1,23 @@
|
|
1
|
+
use std::borrow::Cow;
|
1
2
|
use std::collections::HashMap;
|
2
3
|
use std::hash::BuildHasher;
|
3
4
|
|
4
|
-
|
5
|
-
|
5
|
+
use super::CowValue;
|
6
|
+
|
7
|
+
pub trait RecordParser<'a> {
|
8
|
+
type Output: 'a;
|
6
9
|
|
7
10
|
fn parse(
|
8
11
|
headers: &[&'static str],
|
9
12
|
record: &csv::StringRecord,
|
10
13
|
null_string: Option<&str>,
|
11
|
-
flexible_default: Option
|
14
|
+
flexible_default: Option<Cow<'a, str>>,
|
12
15
|
) -> Self::Output;
|
13
16
|
}
|
14
17
|
|
15
|
-
impl<S: BuildHasher + Default
|
18
|
+
impl<'a, S: BuildHasher + Default + 'a> RecordParser<'a>
|
19
|
+
for HashMap<&'static str, Option<CowValue<'a>>, S>
|
20
|
+
{
|
16
21
|
type Output = Self;
|
17
22
|
|
18
23
|
#[inline]
|
@@ -20,19 +25,22 @@ impl<S: BuildHasher + Default> RecordParser for HashMap<&'static str, Option<Str
|
|
20
25
|
headers: &[&'static str],
|
21
26
|
record: &csv::StringRecord,
|
22
27
|
null_string: Option<&str>,
|
23
|
-
flexible_default: Option
|
28
|
+
flexible_default: Option<Cow<'a, str>>,
|
24
29
|
) -> Self::Output {
|
25
30
|
let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
|
31
|
+
|
32
|
+
let shared_empty = Cow::Borrowed("");
|
33
|
+
let shared_default = flexible_default.map(CowValue);
|
26
34
|
headers.iter().enumerate().for_each(|(i, &header)| {
|
27
35
|
let value = record.get(i).map_or_else(
|
28
|
-
||
|
36
|
+
|| shared_default.clone(),
|
29
37
|
|field| {
|
30
38
|
if null_string == Some(field) {
|
31
39
|
None
|
32
40
|
} else if field.is_empty() {
|
33
|
-
Some(
|
41
|
+
Some(CowValue(shared_empty.clone()))
|
34
42
|
} else {
|
35
|
-
Some(field.
|
43
|
+
Some(CowValue(Cow::Owned(field.to_string())))
|
36
44
|
}
|
37
45
|
},
|
38
46
|
);
|
@@ -42,7 +50,7 @@ impl<S: BuildHasher + Default> RecordParser for HashMap<&'static str, Option<Str
|
|
42
50
|
}
|
43
51
|
}
|
44
52
|
|
45
|
-
impl RecordParser for Vec<Option<
|
53
|
+
impl<'a> RecordParser<'a> for Vec<Option<CowValue<'a>>> {
|
46
54
|
type Output = Self;
|
47
55
|
|
48
56
|
#[inline]
|
@@ -50,26 +58,94 @@ impl RecordParser for Vec<Option<String>> {
|
|
50
58
|
headers: &[&'static str],
|
51
59
|
record: &csv::StringRecord,
|
52
60
|
null_string: Option<&str>,
|
53
|
-
flexible_default: Option
|
61
|
+
flexible_default: Option<Cow<'a, str>>,
|
54
62
|
) -> Self::Output {
|
55
63
|
let target_len = headers.len();
|
56
64
|
let mut vec = Vec::with_capacity(target_len);
|
65
|
+
|
66
|
+
let shared_empty = Cow::Borrowed("");
|
67
|
+
let shared_default = flexible_default.map(CowValue);
|
68
|
+
|
57
69
|
for field in record.iter() {
|
58
70
|
let value = if Some(field) == null_string {
|
59
71
|
None
|
60
72
|
} else if field.is_empty() {
|
61
|
-
Some(
|
73
|
+
Some(CowValue(shared_empty.clone()))
|
62
74
|
} else {
|
63
|
-
Some(field.
|
75
|
+
Some(CowValue(Cow::Owned(field.to_string())))
|
64
76
|
};
|
65
77
|
vec.push(value);
|
66
78
|
}
|
67
79
|
|
68
80
|
if vec.len() < target_len {
|
69
|
-
if let Some(default) =
|
70
|
-
vec.resize_with(target_len, || Some(default.
|
81
|
+
if let Some(default) = shared_default {
|
82
|
+
vec.resize_with(target_len, || Some(default.clone()));
|
71
83
|
}
|
72
84
|
}
|
73
85
|
vec
|
74
86
|
}
|
75
87
|
}
|
88
|
+
|
89
|
+
// impl<'a, S: BuildHasher + Default + 'a> RecordParser<'a>
|
90
|
+
// for HashMap<&'static str, Option<String>, S>
|
91
|
+
// {
|
92
|
+
// type Output = Self;
|
93
|
+
|
94
|
+
// #[inline]
|
95
|
+
// fn parse(
|
96
|
+
// headers: &[&'static str],
|
97
|
+
// record: &csv::StringRecord,
|
98
|
+
// null_string: Option<&str>,
|
99
|
+
// flexible_default: Option<Cow<'a, str>>,
|
100
|
+
// ) -> Self::Output {
|
101
|
+
// let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
|
102
|
+
// headers.iter().enumerate().for_each(|(i, &header)| {
|
103
|
+
// let value = record.get(i).map_or_else(
|
104
|
+
// || flexible_default.clone(),
|
105
|
+
// |field| {
|
106
|
+
// if null_string == Some(field) {
|
107
|
+
// None
|
108
|
+
// } else if field.is_empty() {
|
109
|
+
// Some(String::new())
|
110
|
+
// } else {
|
111
|
+
// Some(field.into())
|
112
|
+
// }
|
113
|
+
// },
|
114
|
+
// );
|
115
|
+
// map.insert(header, value);
|
116
|
+
// });
|
117
|
+
// map
|
118
|
+
// }
|
119
|
+
// }
|
120
|
+
|
121
|
+
// impl<'a> RecordParser<'a> for Vec<Option<String>> {
|
122
|
+
// type Output = Self;
|
123
|
+
|
124
|
+
// #[inline]
|
125
|
+
// fn parse(
|
126
|
+
// headers: &[&'static str],
|
127
|
+
// record: &csv::StringRecord,
|
128
|
+
// null_string: Option<&str>,
|
129
|
+
// flexible_default: Option<Cow<'a, str>>,
|
130
|
+
// ) -> Self::Output {
|
131
|
+
// let target_len = headers.len();
|
132
|
+
// let mut vec = Vec::with_capacity(target_len);
|
133
|
+
// for field in record.iter() {
|
134
|
+
// let value = if Some(field) == null_string {
|
135
|
+
// None
|
136
|
+
// } else if field.is_empty() {
|
137
|
+
// Some(String::new())
|
138
|
+
// } else {
|
139
|
+
// Some(field.into())
|
140
|
+
// };
|
141
|
+
// vec.push(value);
|
142
|
+
// }
|
143
|
+
|
144
|
+
// if vec.len() < target_len {
|
145
|
+
// if let Some(default) = flexible_default {
|
146
|
+
// vec.resize_with(target_len, || Some(default.to_string()));
|
147
|
+
// }
|
148
|
+
// }
|
149
|
+
// vec
|
150
|
+
// }
|
151
|
+
// }
|
data/ext/osv/src/csv/record.rs
CHANGED
@@ -1,17 +1,21 @@
|
|
1
1
|
use magnus::{IntoValue, Ruby, Value};
|
2
|
-
use std::{collections::HashMap, hash::BuildHasher};
|
2
|
+
use std::{borrow::Cow, collections::HashMap, hash::BuildHasher};
|
3
3
|
|
4
4
|
#[derive(Debug)]
|
5
|
-
pub enum CsvRecord<S: BuildHasher + Default> {
|
6
|
-
Vec(Vec<Option<
|
7
|
-
Map(HashMap<&'static str, Option<
|
5
|
+
pub enum CsvRecord<'a, S: BuildHasher + Default> {
|
6
|
+
Vec(Vec<Option<CowValue<'a>>>),
|
7
|
+
Map(HashMap<&'static str, Option<CowValue<'a>>, S>),
|
8
8
|
}
|
9
9
|
|
10
|
-
impl<S: BuildHasher + Default> IntoValue for CsvRecord<S> {
|
10
|
+
impl<S: BuildHasher + Default> IntoValue for CsvRecord<'_, S> {
|
11
11
|
#[inline]
|
12
12
|
fn into_value_with(self, handle: &Ruby) -> Value {
|
13
13
|
match self {
|
14
|
-
CsvRecord::Vec(vec) =>
|
14
|
+
CsvRecord::Vec(vec) => {
|
15
|
+
let ary = handle.ary_new_capa(vec.len());
|
16
|
+
vec.into_iter().try_for_each(|v| ary.push(v)).unwrap();
|
17
|
+
ary.into_value_with(handle)
|
18
|
+
}
|
15
19
|
CsvRecord::Map(map) => {
|
16
20
|
// Pre-allocate the hash with the known size
|
17
21
|
let hash = handle.hash_new_capa(map.len());
|
@@ -23,3 +27,12 @@ impl<S: BuildHasher + Default> IntoValue for CsvRecord<S> {
|
|
23
27
|
}
|
24
28
|
}
|
25
29
|
}
|
30
|
+
|
31
|
+
#[derive(Debug, Clone)]
|
32
|
+
pub struct CowValue<'a>(pub Cow<'a, str>);
|
33
|
+
|
34
|
+
impl IntoValue for CowValue<'_> {
|
35
|
+
fn into_value_with(self, handle: &Ruby) -> Value {
|
36
|
+
self.0.into_value_with(handle)
|
37
|
+
}
|
38
|
+
}
|