osv 0.3.13 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +111 -5
- data/Gemfile +1 -1
- data/README.md +39 -81
- data/Rakefile +6 -8
- data/ext/osv/Cargo.toml +7 -1
- data/ext/osv/src/allocator.rs +13 -0
- data/ext/osv/src/csv/builder.rs +65 -176
- data/ext/osv/src/csv/mod.rs +5 -3
- data/ext/osv/src/csv/parser.rs +90 -14
- data/ext/osv/src/csv/record.rs +19 -6
- data/ext/osv/src/csv/record_reader.rs +172 -0
- data/ext/osv/src/csv/ruby_integration.rs +30 -0
- data/ext/osv/src/csv/ruby_reader.rs +174 -0
- data/ext/osv/src/lib.rs +1 -0
- data/ext/osv/src/reader.rs +27 -22
- data/ext/osv/src/utils.rs +5 -5
- data/lib/osv/version.rb +1 -1
- metadata +13 -15
- data/ext/osv/src/csv/read_impl.rs +0 -75
- data/ext/osv/src/csv/reader.rs +0 -57
@@ -0,0 +1,172 @@
|
|
1
|
+
use super::parser::RecordParser;
|
2
|
+
use super::{header_cache::StringCache, ruby_reader::SeekableRead};
|
3
|
+
use magnus::{Error, Ruby};
|
4
|
+
use std::io::BufReader;
|
5
|
+
use std::{borrow::Cow, io::Read, thread};
|
6
|
+
|
7
|
+
pub(crate) const READ_BUFFER_SIZE: usize = 16384;
|
8
|
+
|
9
|
+
pub struct RecordReader<'a, T: RecordParser<'a>> {
|
10
|
+
inner: ReaderImpl<'a, T>,
|
11
|
+
}
|
12
|
+
|
13
|
+
#[allow(clippy::large_enum_variant)]
|
14
|
+
enum ReaderImpl<'a, T: RecordParser<'a>> {
|
15
|
+
SingleThreaded {
|
16
|
+
reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
|
17
|
+
headers: Vec<&'static str>,
|
18
|
+
null_string: Option<String>,
|
19
|
+
flexible_default: Option<Cow<'a, str>>,
|
20
|
+
string_record: csv::StringRecord,
|
21
|
+
},
|
22
|
+
MultiThreaded {
|
23
|
+
headers: Vec<&'static str>,
|
24
|
+
receiver: kanal::Receiver<T::Output>,
|
25
|
+
handle: Option<thread::JoinHandle<()>>,
|
26
|
+
},
|
27
|
+
}
|
28
|
+
|
29
|
+
impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
|
30
|
+
#[inline]
|
31
|
+
pub(crate) fn get_headers(
|
32
|
+
ruby: &Ruby,
|
33
|
+
reader: &mut csv::Reader<impl Read>,
|
34
|
+
has_headers: bool,
|
35
|
+
) -> Result<Vec<String>, Error> {
|
36
|
+
let first_row = reader.headers().map_err(|e| {
|
37
|
+
Error::new(
|
38
|
+
ruby.exception_runtime_error(),
|
39
|
+
format!("Failed to read headers: {e}"),
|
40
|
+
)
|
41
|
+
})?;
|
42
|
+
|
43
|
+
let mut headers = Vec::with_capacity(first_row.len());
|
44
|
+
if has_headers {
|
45
|
+
headers.extend(first_row.iter().map(String::from));
|
46
|
+
} else {
|
47
|
+
headers.extend((0..first_row.len()).map(|i| format!("c{i}")));
|
48
|
+
}
|
49
|
+
Ok(headers)
|
50
|
+
}
|
51
|
+
|
52
|
+
pub(crate) fn new_single_threaded(
|
53
|
+
reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
|
54
|
+
headers: Vec<&'static str>,
|
55
|
+
null_string: Option<String>,
|
56
|
+
flexible_default: Option<&'a str>,
|
57
|
+
) -> Self {
|
58
|
+
let headers_len = headers.len();
|
59
|
+
Self {
|
60
|
+
inner: ReaderImpl::SingleThreaded {
|
61
|
+
reader,
|
62
|
+
headers,
|
63
|
+
null_string,
|
64
|
+
flexible_default: flexible_default.map(Cow::Borrowed),
|
65
|
+
string_record: csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_len),
|
66
|
+
},
|
67
|
+
}
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
impl<T: RecordParser<'static> + Send> RecordReader<'static, T> {
|
72
|
+
pub(crate) fn new_multi_threaded(
|
73
|
+
mut reader: csv::Reader<Box<dyn Read + Send + 'static>>,
|
74
|
+
headers: Vec<&'static str>,
|
75
|
+
buffer_size: usize,
|
76
|
+
null_string: Option<String>,
|
77
|
+
flexible_default: Option<&'static str>,
|
78
|
+
) -> Self {
|
79
|
+
let (sender, receiver) = kanal::bounded(buffer_size);
|
80
|
+
let headers_for_thread = headers.clone();
|
81
|
+
|
82
|
+
let handle = thread::spawn(move || {
|
83
|
+
let mut record =
|
84
|
+
csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_for_thread.len());
|
85
|
+
while let Ok(true) = reader.read_record(&mut record) {
|
86
|
+
let row = T::parse(
|
87
|
+
&headers_for_thread,
|
88
|
+
&record,
|
89
|
+
null_string.as_deref(),
|
90
|
+
flexible_default.map(Cow::Borrowed),
|
91
|
+
);
|
92
|
+
if sender.send(row).is_err() {
|
93
|
+
break;
|
94
|
+
}
|
95
|
+
}
|
96
|
+
});
|
97
|
+
|
98
|
+
Self {
|
99
|
+
inner: ReaderImpl::MultiThreaded {
|
100
|
+
headers,
|
101
|
+
receiver,
|
102
|
+
handle: Some(handle),
|
103
|
+
},
|
104
|
+
}
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
impl<'a, T: RecordParser<'a>> Iterator for RecordReader<'a, T> {
|
109
|
+
type Item = T::Output;
|
110
|
+
|
111
|
+
#[inline]
|
112
|
+
fn next(&mut self) -> Option<Self::Item> {
|
113
|
+
match &mut self.inner {
|
114
|
+
ReaderImpl::MultiThreaded {
|
115
|
+
receiver, handle, ..
|
116
|
+
} => match receiver.recv() {
|
117
|
+
Ok(record) => Some(record),
|
118
|
+
Err(_) => {
|
119
|
+
if let Some(handle) = handle.take() {
|
120
|
+
let _ = handle.join();
|
121
|
+
}
|
122
|
+
None
|
123
|
+
}
|
124
|
+
},
|
125
|
+
ReaderImpl::SingleThreaded {
|
126
|
+
reader,
|
127
|
+
headers,
|
128
|
+
null_string,
|
129
|
+
flexible_default,
|
130
|
+
ref mut string_record,
|
131
|
+
} => match reader.read_record(string_record) {
|
132
|
+
Ok(true) => Some(T::parse(
|
133
|
+
headers,
|
134
|
+
string_record,
|
135
|
+
null_string.as_deref(),
|
136
|
+
flexible_default.clone(),
|
137
|
+
)),
|
138
|
+
Ok(false) => None,
|
139
|
+
Err(_e) => None,
|
140
|
+
},
|
141
|
+
}
|
142
|
+
}
|
143
|
+
|
144
|
+
#[inline]
|
145
|
+
fn size_hint(&self) -> (usize, Option<usize>) {
|
146
|
+
// We can't know the exact size without reading the whole file
|
147
|
+
(0, None)
|
148
|
+
}
|
149
|
+
}
|
150
|
+
|
151
|
+
impl<'a, T: RecordParser<'a>> Drop for RecordReader<'a, T> {
|
152
|
+
#[inline]
|
153
|
+
fn drop(&mut self) {
|
154
|
+
match &mut self.inner {
|
155
|
+
ReaderImpl::MultiThreaded {
|
156
|
+
receiver,
|
157
|
+
handle,
|
158
|
+
headers,
|
159
|
+
..
|
160
|
+
} => {
|
161
|
+
receiver.close();
|
162
|
+
if let Some(handle) = handle.take() {
|
163
|
+
let _ = handle.join();
|
164
|
+
}
|
165
|
+
let _ = StringCache::clear(headers);
|
166
|
+
}
|
167
|
+
ReaderImpl::SingleThreaded { headers, .. } => {
|
168
|
+
let _ = StringCache::clear(headers);
|
169
|
+
}
|
170
|
+
}
|
171
|
+
}
|
172
|
+
}
|
@@ -0,0 +1,30 @@
|
|
1
|
+
use std::{fs::File, io, mem::ManuallyDrop};
|
2
|
+
|
3
|
+
pub struct ForgottenFileHandle(pub ManuallyDrop<File>);
|
4
|
+
|
5
|
+
impl std::io::Read for ForgottenFileHandle {
|
6
|
+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
7
|
+
self.0.read(buf)
|
8
|
+
}
|
9
|
+
|
10
|
+
fn read_vectored(&mut self, bufs: &mut [std::io::IoSliceMut<'_>]) -> io::Result<usize> {
|
11
|
+
self.0.read_vectored(bufs)
|
12
|
+
}
|
13
|
+
|
14
|
+
// fn read_buf(&mut self, cursor: BorrowedCursor<'_>) -> io::Result<()> {
|
15
|
+
// self.0.read_buf(cursor)
|
16
|
+
// }
|
17
|
+
|
18
|
+
// #[inline]
|
19
|
+
// fn is_read_vectored(&self) -> bool {
|
20
|
+
// self.0.is_read_vectored()
|
21
|
+
// }
|
22
|
+
|
23
|
+
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
|
24
|
+
self.0.read_to_end(buf)
|
25
|
+
}
|
26
|
+
|
27
|
+
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
|
28
|
+
self.0.read_to_string(buf)
|
29
|
+
}
|
30
|
+
}
|
@@ -0,0 +1,174 @@
|
|
1
|
+
use magnus::{
|
2
|
+
value::{Opaque, ReprValue},
|
3
|
+
RClass, RString, Ruby, Value,
|
4
|
+
};
|
5
|
+
use std::io::{self, Read, Seek, SeekFrom, Write};
|
6
|
+
use std::sync::OnceLock;
|
7
|
+
|
8
|
+
static STRING_IO_CLASS: OnceLock<Opaque<RClass>> = OnceLock::new();
|
9
|
+
|
10
|
+
/// A reader that can handle various Ruby input types (String, StringIO, IO-like objects)
|
11
|
+
/// and provide a standard Read implementation for them.
|
12
|
+
pub struct RubyReader<T> {
|
13
|
+
inner: T,
|
14
|
+
offset: usize,
|
15
|
+
}
|
16
|
+
|
17
|
+
pub trait SeekableRead: std::io::Read + Seek {}
|
18
|
+
impl SeekableRead for RubyReader<Value> {}
|
19
|
+
impl SeekableRead for RubyReader<RString> {}
|
20
|
+
|
21
|
+
pub fn build_ruby_reader(
|
22
|
+
ruby: &Ruby,
|
23
|
+
input: Value,
|
24
|
+
) -> Result<Box<dyn SeekableRead>, magnus::Error> {
|
25
|
+
if RubyReader::is_string_io(ruby, &input) {
|
26
|
+
RubyReader::from_string_io(ruby, input)
|
27
|
+
} else if RubyReader::is_io_like(&input) {
|
28
|
+
RubyReader::from_io(input)
|
29
|
+
} else {
|
30
|
+
RubyReader::from_string_like(input)
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
impl Seek for RubyReader<Value> {
|
35
|
+
fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
|
36
|
+
let (whence, offset) = match pos {
|
37
|
+
SeekFrom::Start(i) => (0, i as i64),
|
38
|
+
SeekFrom::Current(i) => (1, i),
|
39
|
+
SeekFrom::End(i) => (2, i),
|
40
|
+
};
|
41
|
+
|
42
|
+
let new_position = self
|
43
|
+
.inner
|
44
|
+
.funcall("seek", (offset, whence))
|
45
|
+
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
46
|
+
|
47
|
+
Ok(new_position)
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
impl Write for RubyReader<Value> {
|
52
|
+
fn write(&mut self, buf: &[u8]) -> Result<usize, io::Error> {
|
53
|
+
let ruby_bytes = RString::from_slice(buf);
|
54
|
+
|
55
|
+
let bytes_written = self
|
56
|
+
.inner
|
57
|
+
.funcall::<_, _, usize>("write", (ruby_bytes,))
|
58
|
+
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
59
|
+
|
60
|
+
Ok(bytes_written)
|
61
|
+
}
|
62
|
+
|
63
|
+
fn flush(&mut self) -> Result<(), io::Error> {
|
64
|
+
self.inner
|
65
|
+
.funcall::<_, _, Value>("flush", ())
|
66
|
+
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
67
|
+
|
68
|
+
Ok(())
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
impl Seek for RubyReader<RString> {
|
73
|
+
fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
|
74
|
+
match pos {
|
75
|
+
io::SeekFrom::Start(offset) => self.offset = offset as usize,
|
76
|
+
io::SeekFrom::Current(offset) => self.offset = (self.offset as i64 + offset) as usize,
|
77
|
+
io::SeekFrom::End(offset) => {
|
78
|
+
self.offset = self.inner.len() - offset as usize
|
79
|
+
}
|
80
|
+
}
|
81
|
+
Ok(self.offset as u64)
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
impl RubyReader<Value> {
|
86
|
+
fn from_io(input: Value) -> Result<Box<dyn SeekableRead>, magnus::Error> {
|
87
|
+
if Self::is_io_like(&input) {
|
88
|
+
Ok(Box::new(Self::from_io_like(input)))
|
89
|
+
} else {
|
90
|
+
Err(magnus::Error::new(
|
91
|
+
magnus::exception::type_error(),
|
92
|
+
"Input is not an IO-like object",
|
93
|
+
))
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
fn is_io_like(input: &Value) -> bool {
|
98
|
+
input.respond_to("read", false).unwrap_or(false)
|
99
|
+
}
|
100
|
+
|
101
|
+
fn from_io_like(input: Value) -> Self {
|
102
|
+
Self {
|
103
|
+
inner: input,
|
104
|
+
offset: 0,
|
105
|
+
}
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
impl RubyReader<RString> {
|
110
|
+
pub fn from_string_io(
|
111
|
+
ruby: &Ruby,
|
112
|
+
input: Value,
|
113
|
+
) -> Result<Box<dyn SeekableRead>, magnus::Error> {
|
114
|
+
if !Self::is_string_io(ruby, &input) {
|
115
|
+
return Err(magnus::Error::new(
|
116
|
+
magnus::exception::type_error(),
|
117
|
+
"Input is not a StringIO",
|
118
|
+
));
|
119
|
+
}
|
120
|
+
|
121
|
+
let string_content = input.funcall::<_, _, RString>("string", ()).unwrap();
|
122
|
+
Ok(Box::new(Self {
|
123
|
+
inner: string_content,
|
124
|
+
offset: 0,
|
125
|
+
}))
|
126
|
+
}
|
127
|
+
|
128
|
+
fn is_string_io(ruby: &Ruby, input: &Value) -> bool {
|
129
|
+
let string_io_class = STRING_IO_CLASS.get_or_init(|| {
|
130
|
+
let class = RClass::from_value(ruby.eval("StringIO").unwrap()).unwrap();
|
131
|
+
Opaque::from(class)
|
132
|
+
});
|
133
|
+
input.is_kind_of(ruby.get_inner(*string_io_class))
|
134
|
+
}
|
135
|
+
|
136
|
+
fn from_string_like(input: Value) -> Result<Box<dyn SeekableRead>, magnus::Error> {
|
137
|
+
// Try calling `to_str`, and if that fails, try `to_s`
|
138
|
+
let string_content = input
|
139
|
+
.funcall::<_, _, RString>("to_str", ())
|
140
|
+
.or_else(|_| input.funcall::<_, _, RString>("to_s", ()))?;
|
141
|
+
Ok(Box::new(Self {
|
142
|
+
inner: string_content,
|
143
|
+
offset: 0,
|
144
|
+
}))
|
145
|
+
}
|
146
|
+
}
|
147
|
+
|
148
|
+
impl Read for RubyReader<Value> {
|
149
|
+
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
|
150
|
+
let bytes = self
|
151
|
+
.inner
|
152
|
+
.funcall::<_, _, RString>("read", (buf.len(),))
|
153
|
+
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
154
|
+
|
155
|
+
buf.write_all(unsafe { bytes.as_slice() })?;
|
156
|
+
|
157
|
+
Ok(bytes.len())
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
impl Read for RubyReader<RString> {
|
162
|
+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
163
|
+
let string_buffer = unsafe { self.inner.as_slice() };
|
164
|
+
if self.offset >= string_buffer.len() {
|
165
|
+
return Ok(0); // EOF
|
166
|
+
}
|
167
|
+
|
168
|
+
let remaining = string_buffer.len() - self.offset;
|
169
|
+
let copy_size = remaining.min(buf.len());
|
170
|
+
buf[..copy_size].copy_from_slice(&string_buffer[self.offset..self.offset + copy_size]);
|
171
|
+
self.offset += copy_size;
|
172
|
+
Ok(copy_size)
|
173
|
+
}
|
174
|
+
}
|
data/ext/osv/src/lib.rs
CHANGED
data/ext/osv/src/reader.rs
CHANGED
@@ -1,18 +1,19 @@
|
|
1
|
-
use crate::csv::{CsvRecord, RecordReaderBuilder};
|
1
|
+
use crate::csv::{CowValue, CsvRecord, RecordReaderBuilder};
|
2
2
|
use crate::utils::*;
|
3
|
+
use ahash::RandomState;
|
3
4
|
use csv::Trim;
|
4
5
|
use magnus::value::ReprValue;
|
5
6
|
use magnus::{block::Yield, Error, KwArgs, RHash, Ruby, Symbol, Value};
|
6
7
|
use std::collections::HashMap;
|
7
|
-
use xxhash_rust::xxh3::Xxh3Builder;
|
8
8
|
|
9
9
|
pub fn parse_csv(
|
10
10
|
rb_self: Value,
|
11
11
|
args: &[Value],
|
12
|
-
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<
|
13
|
-
let
|
12
|
+
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
|
13
|
+
let original = unsafe { Ruby::get_unchecked() };
|
14
|
+
let ruby: &'static Ruby = Box::leak(Box::new(original));
|
14
15
|
|
15
|
-
let
|
16
|
+
let ReadCsvArgs {
|
16
17
|
to_read,
|
17
18
|
has_headers,
|
18
19
|
delimiter,
|
@@ -23,7 +24,11 @@ pub fn parse_csv(
|
|
23
24
|
flexible,
|
24
25
|
flexible_default,
|
25
26
|
trim,
|
26
|
-
} =
|
27
|
+
} = parse_read_csv_args(ruby, args)?;
|
28
|
+
|
29
|
+
let flexible_default: &'static Option<String> = Box::leak(Box::new(flexible_default));
|
30
|
+
let leaked_flexible_default: &'static Option<&str> =
|
31
|
+
Box::leak(Box::new(flexible_default.as_deref()));
|
27
32
|
|
28
33
|
if !ruby.block_given() {
|
29
34
|
return create_enumerator(EnumeratorArgs {
|
@@ -36,7 +41,7 @@ pub fn parse_csv(
|
|
36
41
|
buffer_size,
|
37
42
|
result_type,
|
38
43
|
flexible,
|
39
|
-
flexible_default,
|
44
|
+
flexible_default: leaked_flexible_default.as_deref(),
|
40
45
|
trim: match trim {
|
41
46
|
Trim::All => Some("all".to_string()),
|
42
47
|
Trim::Headers => Some("headers".to_string()),
|
@@ -46,33 +51,33 @@ pub fn parse_csv(
|
|
46
51
|
});
|
47
52
|
}
|
48
53
|
|
49
|
-
let iter: Box<dyn Iterator<Item = CsvRecord<
|
50
|
-
"hash" =>
|
51
|
-
|
52
|
-
|
53
|
-
)
|
54
|
+
let iter: Box<dyn Iterator<Item = CsvRecord<RandomState>>> = match result_type.as_str() {
|
55
|
+
"hash" => {
|
56
|
+
let builder = RecordReaderBuilder::<
|
57
|
+
HashMap<&'static str, Option<CowValue<'static>>, RandomState>,
|
58
|
+
>::new(ruby, to_read)
|
54
59
|
.has_headers(has_headers)
|
55
60
|
.flexible(flexible)
|
56
|
-
.flexible_default(flexible_default)
|
61
|
+
.flexible_default(flexible_default.as_deref())
|
57
62
|
.trim(trim)
|
58
63
|
.delimiter(delimiter)
|
59
64
|
.quote_char(quote_char)
|
60
65
|
.null_string(null_string)
|
61
|
-
.buffer(buffer_size)
|
62
|
-
|
63
|
-
.map(CsvRecord::Map)
|
64
|
-
|
66
|
+
.buffer(buffer_size);
|
67
|
+
|
68
|
+
Box::new(builder.build_threaded()?.map(CsvRecord::Map))
|
69
|
+
}
|
65
70
|
"array" => Box::new(
|
66
|
-
RecordReaderBuilder::<Vec<Option<
|
71
|
+
RecordReaderBuilder::<Vec<Option<CowValue<'static>>>>::new(ruby, to_read)
|
67
72
|
.has_headers(has_headers)
|
68
73
|
.flexible(flexible)
|
69
|
-
.flexible_default(flexible_default)
|
74
|
+
.flexible_default(flexible_default.as_deref())
|
70
75
|
.trim(trim)
|
71
76
|
.delimiter(delimiter)
|
72
77
|
.quote_char(quote_char)
|
73
78
|
.null_string(null_string)
|
74
79
|
.buffer(buffer_size)
|
75
|
-
.
|
80
|
+
.build_threaded()?
|
76
81
|
.map(CsvRecord::Vec),
|
77
82
|
),
|
78
83
|
_ => {
|
@@ -96,13 +101,13 @@ struct EnumeratorArgs {
|
|
96
101
|
buffer_size: usize,
|
97
102
|
result_type: String,
|
98
103
|
flexible: bool,
|
99
|
-
flexible_default: Option
|
104
|
+
flexible_default: Option<&'static str>,
|
100
105
|
trim: Option<String>,
|
101
106
|
}
|
102
107
|
|
103
108
|
fn create_enumerator(
|
104
109
|
args: EnumeratorArgs,
|
105
|
-
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<
|
110
|
+
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
|
106
111
|
let kwargs = RHash::new();
|
107
112
|
kwargs.aset(Symbol::new("has_headers"), args.has_headers)?;
|
108
113
|
kwargs.aset(
|
data/ext/osv/src/utils.rs
CHANGED
@@ -13,12 +13,12 @@ fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String>, E
|
|
13
13
|
RString::from_value(value)
|
14
14
|
.ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid string value"))?
|
15
15
|
.to_string()
|
16
|
-
.map(
|
16
|
+
.map(Some)
|
17
17
|
} else if value.is_kind_of(ruby.class_symbol()) {
|
18
18
|
Symbol::from_value(value)
|
19
19
|
.ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid symbol value"))?
|
20
20
|
.funcall("to_s", ())
|
21
|
-
.map(
|
21
|
+
.map(Some)
|
22
22
|
} else {
|
23
23
|
Err(Error::new(
|
24
24
|
magnus::exception::type_error(),
|
@@ -28,7 +28,7 @@ fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String>, E
|
|
28
28
|
}
|
29
29
|
|
30
30
|
#[derive(Debug)]
|
31
|
-
pub struct
|
31
|
+
pub struct ReadCsvArgs {
|
32
32
|
pub to_read: Value,
|
33
33
|
pub has_headers: bool,
|
34
34
|
pub delimiter: u8,
|
@@ -42,7 +42,7 @@ pub struct CsvArgs {
|
|
42
42
|
}
|
43
43
|
|
44
44
|
/// Parse common arguments for CSV parsing
|
45
|
-
pub fn
|
45
|
+
pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, Error> {
|
46
46
|
let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
|
47
47
|
let (to_read,) = parsed_args.required;
|
48
48
|
|
@@ -166,7 +166,7 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
166
166
|
None => csv::Trim::None,
|
167
167
|
};
|
168
168
|
|
169
|
-
Ok(
|
169
|
+
Ok(ReadCsvArgs {
|
170
170
|
to_read,
|
171
171
|
has_headers,
|
172
172
|
delimiter,
|
data/lib/osv/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-01-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -38,16 +38,12 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 1.2.0
|
41
|
-
description: |
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
- Support for both hash and array output formats
|
48
|
-
- Whitespace trimming options
|
49
|
-
- Strict or flexible parsing modes
|
50
|
-
- Significantly faster than Ruby's standard CSV library
|
41
|
+
description: |2
|
42
|
+
OSV is a high-performance CSV parser for Ruby, implemented in Rust.
|
43
|
+
It wraps BurntSushi's csv-rs crate to provide fast CSV parsing with support for both hash-based and array-based row formats.
|
44
|
+
Features include: Flexible input sources (file paths, gzipped files, IO objects, strings),
|
45
|
+
configurable parsing options (headers, separators, quote chars), support for both hash and array output formats,
|
46
|
+
whitespace trimming options, strict or flexible parsing modes, and is significantly faster than Ruby's standard CSV library.
|
51
47
|
email:
|
52
48
|
- nathan@jaremko.ca
|
53
49
|
executables: []
|
@@ -63,13 +59,15 @@ files:
|
|
63
59
|
- Rakefile
|
64
60
|
- ext/osv/Cargo.toml
|
65
61
|
- ext/osv/extconf.rb
|
62
|
+
- ext/osv/src/allocator.rs
|
66
63
|
- ext/osv/src/csv/builder.rs
|
67
64
|
- ext/osv/src/csv/header_cache.rs
|
68
65
|
- ext/osv/src/csv/mod.rs
|
69
66
|
- ext/osv/src/csv/parser.rs
|
70
|
-
- ext/osv/src/csv/read_impl.rs
|
71
|
-
- ext/osv/src/csv/reader.rs
|
72
67
|
- ext/osv/src/csv/record.rs
|
68
|
+
- ext/osv/src/csv/record_reader.rs
|
69
|
+
- ext/osv/src/csv/ruby_integration.rs
|
70
|
+
- ext/osv/src/csv/ruby_reader.rs
|
73
71
|
- ext/osv/src/lib.rs
|
74
72
|
- ext/osv/src/reader.rs
|
75
73
|
- ext/osv/src/utils.rs
|
@@ -84,8 +82,8 @@ metadata:
|
|
84
82
|
source_code_uri: https://github.com/njaremko/osv
|
85
83
|
readme_uri: https://github.com/njaremko/osv/blob/main/README.md
|
86
84
|
changelog_uri: https://github.com/njaremko/osv/blob/main/CHANGELOG.md
|
87
|
-
rubygems_mfa_required: 'true'
|
88
85
|
documentation_uri: https://www.rubydoc.info/gems/osv
|
86
|
+
funding_uri: https://github.com/sponsors/njaremko
|
89
87
|
post_install_message:
|
90
88
|
rdoc_options: []
|
91
89
|
require_paths:
|
@@ -1,75 +0,0 @@
|
|
1
|
-
use super::{header_cache::StringCache, parser::RecordParser};
|
2
|
-
use std::{io::Read, thread};
|
3
|
-
|
4
|
-
pub(crate) const READ_BUFFER_SIZE: usize = 8192;
|
5
|
-
|
6
|
-
pub enum ReadImpl<T: RecordParser> {
|
7
|
-
SingleThreaded {
|
8
|
-
reader: csv::Reader<Box<dyn Read>>,
|
9
|
-
headers: Vec<&'static str>,
|
10
|
-
null_string: Option<String>,
|
11
|
-
flexible_default: Option<String>,
|
12
|
-
},
|
13
|
-
MultiThreaded {
|
14
|
-
headers: Vec<&'static str>,
|
15
|
-
receiver: kanal::Receiver<T::Output>,
|
16
|
-
handle: Option<thread::JoinHandle<()>>,
|
17
|
-
},
|
18
|
-
}
|
19
|
-
|
20
|
-
impl<T: RecordParser> ReadImpl<T> {
|
21
|
-
#[inline]
|
22
|
-
pub fn next(&mut self) -> Option<T::Output> {
|
23
|
-
match self {
|
24
|
-
Self::MultiThreaded {
|
25
|
-
receiver, handle, ..
|
26
|
-
} => match receiver.recv() {
|
27
|
-
Ok(record) => Some(record),
|
28
|
-
Err(_) => {
|
29
|
-
if let Some(handle) = handle.take() {
|
30
|
-
let _ = handle.join();
|
31
|
-
}
|
32
|
-
None
|
33
|
-
}
|
34
|
-
},
|
35
|
-
Self::SingleThreaded {
|
36
|
-
reader,
|
37
|
-
headers,
|
38
|
-
null_string,
|
39
|
-
flexible_default,
|
40
|
-
} => {
|
41
|
-
let mut record = csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers.len());
|
42
|
-
match reader.read_record(&mut record) {
|
43
|
-
Ok(true) => Some(T::parse(
|
44
|
-
headers,
|
45
|
-
&record,
|
46
|
-
null_string.as_deref(),
|
47
|
-
flexible_default.as_deref(),
|
48
|
-
)),
|
49
|
-
_ => None,
|
50
|
-
}
|
51
|
-
}
|
52
|
-
}
|
53
|
-
}
|
54
|
-
|
55
|
-
#[inline]
|
56
|
-
pub fn cleanup(&mut self) {
|
57
|
-
match self {
|
58
|
-
Self::MultiThreaded {
|
59
|
-
receiver,
|
60
|
-
handle,
|
61
|
-
headers,
|
62
|
-
..
|
63
|
-
} => {
|
64
|
-
receiver.close();
|
65
|
-
if let Some(handle) = handle.take() {
|
66
|
-
let _ = handle.join();
|
67
|
-
}
|
68
|
-
let _ = StringCache::clear(headers);
|
69
|
-
}
|
70
|
-
Self::SingleThreaded { headers, .. } => {
|
71
|
-
let _ = StringCache::clear(headers);
|
72
|
-
}
|
73
|
-
}
|
74
|
-
}
|
75
|
-
}
|