osv 0.3.13 → 0.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/README.md +39 -81
- data/Rakefile +6 -8
- data/ext/osv/src/csv/builder.rs +59 -175
- data/ext/osv/src/csv/mod.rs +4 -3
- data/ext/osv/src/csv/parser.rs +90 -14
- data/ext/osv/src/csv/record.rs +19 -6
- data/ext/osv/src/csv/record_reader.rs +175 -0
- data/ext/osv/src/csv/ruby_reader.rs +181 -0
- data/ext/osv/src/reader.rs +24 -19
- data/lib/osv/version.rb +1 -1
- metadata +11 -15
- data/ext/osv/src/csv/read_impl.rs +0 -75
- data/ext/osv/src/csv/reader.rs +0 -57
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -1,18 +1,23 @@
|
|
1
|
+
use std::borrow::Cow;
|
1
2
|
use std::collections::HashMap;
|
2
3
|
use std::hash::BuildHasher;
|
3
4
|
|
4
|
-
|
5
|
-
|
5
|
+
use super::CowValue;
|
6
|
+
|
7
|
+
pub trait RecordParser<'a> {
|
8
|
+
type Output: 'a;
|
6
9
|
|
7
10
|
fn parse(
|
8
11
|
headers: &[&'static str],
|
9
12
|
record: &csv::StringRecord,
|
10
13
|
null_string: Option<&str>,
|
11
|
-
flexible_default: Option
|
14
|
+
flexible_default: Option<Cow<'a, str>>,
|
12
15
|
) -> Self::Output;
|
13
16
|
}
|
14
17
|
|
15
|
-
impl<S: BuildHasher + Default
|
18
|
+
impl<'a, S: BuildHasher + Default + 'a> RecordParser<'a>
|
19
|
+
for HashMap<&'static str, Option<CowValue<'a>>, S>
|
20
|
+
{
|
16
21
|
type Output = Self;
|
17
22
|
|
18
23
|
#[inline]
|
@@ -20,19 +25,22 @@ impl<S: BuildHasher + Default> RecordParser for HashMap<&'static str, Option<Str
|
|
20
25
|
headers: &[&'static str],
|
21
26
|
record: &csv::StringRecord,
|
22
27
|
null_string: Option<&str>,
|
23
|
-
flexible_default: Option
|
28
|
+
flexible_default: Option<Cow<'a, str>>,
|
24
29
|
) -> Self::Output {
|
25
30
|
let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
|
31
|
+
|
32
|
+
let shared_empty = Cow::Borrowed("");
|
33
|
+
let shared_default = flexible_default.map(|f| CowValue(f));
|
26
34
|
headers.iter().enumerate().for_each(|(i, &header)| {
|
27
35
|
let value = record.get(i).map_or_else(
|
28
|
-
||
|
36
|
+
|| shared_default.clone(),
|
29
37
|
|field| {
|
30
38
|
if null_string == Some(field) {
|
31
39
|
None
|
32
40
|
} else if field.is_empty() {
|
33
|
-
Some(
|
41
|
+
Some(CowValue(shared_empty.clone()))
|
34
42
|
} else {
|
35
|
-
Some(field.
|
43
|
+
Some(CowValue(Cow::Owned(field.to_string())))
|
36
44
|
}
|
37
45
|
},
|
38
46
|
);
|
@@ -42,7 +50,7 @@ impl<S: BuildHasher + Default> RecordParser for HashMap<&'static str, Option<Str
|
|
42
50
|
}
|
43
51
|
}
|
44
52
|
|
45
|
-
impl RecordParser for Vec<Option<
|
53
|
+
impl<'a> RecordParser<'a> for Vec<Option<CowValue<'a>>> {
|
46
54
|
type Output = Self;
|
47
55
|
|
48
56
|
#[inline]
|
@@ -50,26 +58,94 @@ impl RecordParser for Vec<Option<String>> {
|
|
50
58
|
headers: &[&'static str],
|
51
59
|
record: &csv::StringRecord,
|
52
60
|
null_string: Option<&str>,
|
53
|
-
flexible_default: Option
|
61
|
+
flexible_default: Option<Cow<'a, str>>,
|
54
62
|
) -> Self::Output {
|
55
63
|
let target_len = headers.len();
|
56
64
|
let mut vec = Vec::with_capacity(target_len);
|
65
|
+
|
66
|
+
let shared_empty = Cow::Borrowed("");
|
67
|
+
let shared_default = flexible_default.map(|f| CowValue(f));
|
68
|
+
|
57
69
|
for field in record.iter() {
|
58
70
|
let value = if Some(field) == null_string {
|
59
71
|
None
|
60
72
|
} else if field.is_empty() {
|
61
|
-
Some(
|
73
|
+
Some(CowValue(shared_empty.clone()))
|
62
74
|
} else {
|
63
|
-
Some(field.
|
75
|
+
Some(CowValue(Cow::Owned(field.to_string())))
|
64
76
|
};
|
65
77
|
vec.push(value);
|
66
78
|
}
|
67
79
|
|
68
80
|
if vec.len() < target_len {
|
69
|
-
if let Some(default) =
|
70
|
-
vec.resize_with(target_len, || Some(default.
|
81
|
+
if let Some(default) = shared_default {
|
82
|
+
vec.resize_with(target_len, || Some(default.clone()));
|
71
83
|
}
|
72
84
|
}
|
73
85
|
vec
|
74
86
|
}
|
75
87
|
}
|
88
|
+
|
89
|
+
// impl<'a, S: BuildHasher + Default + 'a> RecordParser<'a>
|
90
|
+
// for HashMap<&'static str, Option<String>, S>
|
91
|
+
// {
|
92
|
+
// type Output = Self;
|
93
|
+
|
94
|
+
// #[inline]
|
95
|
+
// fn parse(
|
96
|
+
// headers: &[&'static str],
|
97
|
+
// record: &csv::StringRecord,
|
98
|
+
// null_string: Option<&str>,
|
99
|
+
// flexible_default: Option<Cow<'a, str>>,
|
100
|
+
// ) -> Self::Output {
|
101
|
+
// let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
|
102
|
+
// headers.iter().enumerate().for_each(|(i, &header)| {
|
103
|
+
// let value = record.get(i).map_or_else(
|
104
|
+
// || flexible_default.clone(),
|
105
|
+
// |field| {
|
106
|
+
// if null_string == Some(field) {
|
107
|
+
// None
|
108
|
+
// } else if field.is_empty() {
|
109
|
+
// Some(String::new())
|
110
|
+
// } else {
|
111
|
+
// Some(field.into())
|
112
|
+
// }
|
113
|
+
// },
|
114
|
+
// );
|
115
|
+
// map.insert(header, value);
|
116
|
+
// });
|
117
|
+
// map
|
118
|
+
// }
|
119
|
+
// }
|
120
|
+
|
121
|
+
// impl<'a> RecordParser<'a> for Vec<Option<String>> {
|
122
|
+
// type Output = Self;
|
123
|
+
|
124
|
+
// #[inline]
|
125
|
+
// fn parse(
|
126
|
+
// headers: &[&'static str],
|
127
|
+
// record: &csv::StringRecord,
|
128
|
+
// null_string: Option<&str>,
|
129
|
+
// flexible_default: Option<Cow<'a, str>>,
|
130
|
+
// ) -> Self::Output {
|
131
|
+
// let target_len = headers.len();
|
132
|
+
// let mut vec = Vec::with_capacity(target_len);
|
133
|
+
// for field in record.iter() {
|
134
|
+
// let value = if Some(field) == null_string {
|
135
|
+
// None
|
136
|
+
// } else if field.is_empty() {
|
137
|
+
// Some(String::new())
|
138
|
+
// } else {
|
139
|
+
// Some(field.into())
|
140
|
+
// };
|
141
|
+
// vec.push(value);
|
142
|
+
// }
|
143
|
+
|
144
|
+
// if vec.len() < target_len {
|
145
|
+
// if let Some(default) = flexible_default {
|
146
|
+
// vec.resize_with(target_len, || Some(default.to_string()));
|
147
|
+
// }
|
148
|
+
// }
|
149
|
+
// vec
|
150
|
+
// }
|
151
|
+
// }
|
data/ext/osv/src/csv/record.rs
CHANGED
@@ -1,17 +1,21 @@
|
|
1
1
|
use magnus::{IntoValue, Ruby, Value};
|
2
|
-
use std::{collections::HashMap, hash::BuildHasher};
|
2
|
+
use std::{borrow::Cow, collections::HashMap, hash::BuildHasher};
|
3
3
|
|
4
4
|
#[derive(Debug)]
|
5
|
-
pub enum CsvRecord<S: BuildHasher + Default> {
|
6
|
-
Vec(Vec<Option<
|
7
|
-
Map(HashMap<&'static str, Option<
|
5
|
+
pub enum CsvRecord<'a, S: BuildHasher + Default> {
|
6
|
+
Vec(Vec<Option<CowValue<'a>>>),
|
7
|
+
Map(HashMap<&'static str, Option<CowValue<'a>>, S>),
|
8
8
|
}
|
9
9
|
|
10
|
-
impl<S: BuildHasher + Default> IntoValue for CsvRecord<S> {
|
10
|
+
impl<'a, S: BuildHasher + Default> IntoValue for CsvRecord<'a, S> {
|
11
11
|
#[inline]
|
12
12
|
fn into_value_with(self, handle: &Ruby) -> Value {
|
13
13
|
match self {
|
14
|
-
CsvRecord::Vec(vec) =>
|
14
|
+
CsvRecord::Vec(vec) => {
|
15
|
+
let ary = handle.ary_new_capa(vec.len());
|
16
|
+
vec.into_iter().try_for_each(|v| ary.push(v)).unwrap();
|
17
|
+
ary.into_value_with(handle)
|
18
|
+
}
|
15
19
|
CsvRecord::Map(map) => {
|
16
20
|
// Pre-allocate the hash with the known size
|
17
21
|
let hash = handle.hash_new_capa(map.len());
|
@@ -23,3 +27,12 @@ impl<S: BuildHasher + Default> IntoValue for CsvRecord<S> {
|
|
23
27
|
}
|
24
28
|
}
|
25
29
|
}
|
30
|
+
|
31
|
+
#[derive(Debug, Clone)]
|
32
|
+
pub struct CowValue<'a>(pub Cow<'a, str>);
|
33
|
+
|
34
|
+
impl<'a> IntoValue for CowValue<'a> {
|
35
|
+
fn into_value_with(self, handle: &Ruby) -> Value {
|
36
|
+
self.0.into_value_with(handle)
|
37
|
+
}
|
38
|
+
}
|
@@ -0,0 +1,175 @@
|
|
1
|
+
use super::header_cache::StringCache;
|
2
|
+
use super::parser::RecordParser;
|
3
|
+
use magnus::{Error, Ruby};
|
4
|
+
use std::{borrow::Cow, io::Read, thread};
|
5
|
+
|
6
|
+
pub(crate) const READ_BUFFER_SIZE: usize = 16384;
|
7
|
+
|
8
|
+
pub struct RecordReader<'a, T: RecordParser<'a>> {
|
9
|
+
inner: ReaderImpl<'a, T>,
|
10
|
+
}
|
11
|
+
|
12
|
+
enum ReaderImpl<'a, T: RecordParser<'a>> {
|
13
|
+
SingleThreaded {
|
14
|
+
reader: csv::Reader<Box<dyn Read + 'a>>,
|
15
|
+
headers: Vec<&'static str>,
|
16
|
+
null_string: Option<String>,
|
17
|
+
flexible_default: Option<Cow<'a, str>>,
|
18
|
+
string_record: csv::StringRecord,
|
19
|
+
},
|
20
|
+
MultiThreaded {
|
21
|
+
headers: Vec<&'static str>,
|
22
|
+
receiver: kanal::Receiver<T::Output>,
|
23
|
+
handle: Option<thread::JoinHandle<()>>,
|
24
|
+
},
|
25
|
+
}
|
26
|
+
|
27
|
+
impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
|
28
|
+
#[inline]
|
29
|
+
pub(crate) fn get_headers(
|
30
|
+
ruby: &Ruby,
|
31
|
+
reader: &mut csv::Reader<impl Read>,
|
32
|
+
has_headers: bool,
|
33
|
+
) -> Result<Vec<String>, Error> {
|
34
|
+
let first_row = reader.headers().map_err(|e| {
|
35
|
+
Error::new(
|
36
|
+
ruby.exception_runtime_error(),
|
37
|
+
format!("Failed to read headers: {e}"),
|
38
|
+
)
|
39
|
+
})?;
|
40
|
+
|
41
|
+
let mut headers = Vec::with_capacity(first_row.len());
|
42
|
+
if has_headers {
|
43
|
+
headers.extend(first_row.iter().map(String::from));
|
44
|
+
} else {
|
45
|
+
headers.extend((0..first_row.len()).map(|i| format!("c{i}")));
|
46
|
+
}
|
47
|
+
Ok(headers)
|
48
|
+
}
|
49
|
+
|
50
|
+
pub(crate) fn new_single_threaded(
|
51
|
+
reader: csv::Reader<Box<dyn Read + 'a>>,
|
52
|
+
headers: Vec<&'static str>,
|
53
|
+
null_string: Option<String>,
|
54
|
+
flexible_default: Option<&'a str>,
|
55
|
+
) -> Self {
|
56
|
+
let headers_len = headers.len();
|
57
|
+
Self {
|
58
|
+
inner: ReaderImpl::SingleThreaded {
|
59
|
+
reader,
|
60
|
+
headers,
|
61
|
+
null_string,
|
62
|
+
flexible_default: flexible_default.map(|s| Cow::Borrowed(s)),
|
63
|
+
string_record: csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_len),
|
64
|
+
},
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
impl<T: RecordParser<'static> + Send> RecordReader<'static, T> {
|
70
|
+
pub(crate) fn new_multi_threaded(
|
71
|
+
mut reader: csv::Reader<Box<dyn Read + Send + 'static>>,
|
72
|
+
headers: Vec<&'static str>,
|
73
|
+
buffer_size: usize,
|
74
|
+
null_string: Option<String>,
|
75
|
+
flexible_default: Option<&'static str>,
|
76
|
+
should_forget: bool,
|
77
|
+
) -> Self {
|
78
|
+
let (sender, receiver) = kanal::bounded(buffer_size);
|
79
|
+
let headers_for_thread = headers.clone();
|
80
|
+
|
81
|
+
let handle = thread::spawn(move || {
|
82
|
+
let mut record =
|
83
|
+
csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_for_thread.len());
|
84
|
+
while let Ok(true) = reader.read_record(&mut record) {
|
85
|
+
let row = T::parse(
|
86
|
+
&headers_for_thread,
|
87
|
+
&record,
|
88
|
+
null_string.as_deref(),
|
89
|
+
flexible_default.map(|s| Cow::Borrowed(s)),
|
90
|
+
);
|
91
|
+
if sender.send(row).is_err() {
|
92
|
+
break;
|
93
|
+
}
|
94
|
+
}
|
95
|
+
if should_forget {
|
96
|
+
let file_to_forget = reader.into_inner();
|
97
|
+
std::mem::forget(file_to_forget);
|
98
|
+
}
|
99
|
+
});
|
100
|
+
|
101
|
+
Self {
|
102
|
+
inner: ReaderImpl::MultiThreaded {
|
103
|
+
headers,
|
104
|
+
receiver,
|
105
|
+
handle: Some(handle),
|
106
|
+
},
|
107
|
+
}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
impl<'a, T: RecordParser<'a>> Iterator for RecordReader<'a, T> {
|
112
|
+
type Item = T::Output;
|
113
|
+
|
114
|
+
#[inline]
|
115
|
+
fn next(&mut self) -> Option<Self::Item> {
|
116
|
+
match &mut self.inner {
|
117
|
+
ReaderImpl::MultiThreaded {
|
118
|
+
receiver, handle, ..
|
119
|
+
} => match receiver.recv() {
|
120
|
+
Ok(record) => Some(record),
|
121
|
+
Err(_) => {
|
122
|
+
if let Some(handle) = handle.take() {
|
123
|
+
let _ = handle.join();
|
124
|
+
}
|
125
|
+
None
|
126
|
+
}
|
127
|
+
},
|
128
|
+
ReaderImpl::SingleThreaded {
|
129
|
+
reader,
|
130
|
+
headers,
|
131
|
+
null_string,
|
132
|
+
flexible_default,
|
133
|
+
ref mut string_record,
|
134
|
+
} => match reader.read_record(string_record) {
|
135
|
+
Ok(true) => Some(T::parse(
|
136
|
+
headers,
|
137
|
+
&string_record,
|
138
|
+
null_string.as_deref(),
|
139
|
+
flexible_default.clone(),
|
140
|
+
)),
|
141
|
+
Ok(false) => None,
|
142
|
+
Err(_e) => None,
|
143
|
+
},
|
144
|
+
}
|
145
|
+
}
|
146
|
+
|
147
|
+
#[inline]
|
148
|
+
fn size_hint(&self) -> (usize, Option<usize>) {
|
149
|
+
// We can't know the exact size without reading the whole file
|
150
|
+
(0, None)
|
151
|
+
}
|
152
|
+
}
|
153
|
+
|
154
|
+
impl<'a, T: RecordParser<'a>> Drop for RecordReader<'a, T> {
|
155
|
+
#[inline]
|
156
|
+
fn drop(&mut self) {
|
157
|
+
match &mut self.inner {
|
158
|
+
ReaderImpl::MultiThreaded {
|
159
|
+
receiver,
|
160
|
+
handle,
|
161
|
+
headers,
|
162
|
+
..
|
163
|
+
} => {
|
164
|
+
receiver.close();
|
165
|
+
if let Some(handle) = handle.take() {
|
166
|
+
let _ = handle.join();
|
167
|
+
}
|
168
|
+
let _ = StringCache::clear(headers);
|
169
|
+
}
|
170
|
+
ReaderImpl::SingleThreaded { headers, .. } => {
|
171
|
+
let _ = StringCache::clear(headers);
|
172
|
+
}
|
173
|
+
}
|
174
|
+
}
|
175
|
+
}
|
@@ -0,0 +1,181 @@
|
|
1
|
+
use super::READ_BUFFER_SIZE;
|
2
|
+
use magnus::{
|
3
|
+
value::{Opaque, ReprValue},
|
4
|
+
RClass, RString, Ruby, Value,
|
5
|
+
};
|
6
|
+
use std::io::{self, Read};
|
7
|
+
use std::sync::OnceLock;
|
8
|
+
|
9
|
+
static STRING_IO_CLASS: OnceLock<Opaque<RClass>> = OnceLock::new();
|
10
|
+
|
11
|
+
/// A reader that can handle various Ruby input types (String, StringIO, IO-like objects)
|
12
|
+
/// and provide a standard Read implementation for them.
|
13
|
+
pub struct RubyReader<'a, T> {
|
14
|
+
#[allow(unused)]
|
15
|
+
ruby: &'a Ruby,
|
16
|
+
inner: T,
|
17
|
+
buffer: Option<Vec<u8>>,
|
18
|
+
offset: usize,
|
19
|
+
// Number of bytes that have been read into the buffer
|
20
|
+
// Used as an upper bound for offset
|
21
|
+
buffered_bytes: usize,
|
22
|
+
}
|
23
|
+
|
24
|
+
pub fn build_ruby_reader<'a>(
|
25
|
+
ruby: &'a Ruby,
|
26
|
+
input: Value,
|
27
|
+
) -> Result<Box<dyn Read + 'a>, magnus::Error> {
|
28
|
+
if RubyReader::is_string_io(ruby, &input) {
|
29
|
+
RubyReader::from_string_io(ruby, input)
|
30
|
+
} else if RubyReader::is_io_like(&input) {
|
31
|
+
RubyReader::from_io(ruby, input)
|
32
|
+
} else {
|
33
|
+
RubyReader::from_string_like(ruby, input)
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
impl<'a> RubyReader<'a, Value> {
|
38
|
+
fn from_io(ruby: &'a Ruby, input: Value) -> Result<Box<dyn Read + 'a>, magnus::Error> {
|
39
|
+
if Self::is_io_like(&input) {
|
40
|
+
Ok(Box::new(Self::from_io_like(ruby, input)))
|
41
|
+
} else {
|
42
|
+
Err(magnus::Error::new(
|
43
|
+
magnus::exception::type_error(),
|
44
|
+
"Input is not an IO-like object",
|
45
|
+
))
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
fn is_io_like(input: &Value) -> bool {
|
50
|
+
input.respond_to("read", false).unwrap_or(false)
|
51
|
+
}
|
52
|
+
|
53
|
+
fn from_io_like(ruby: &'a Ruby, input: Value) -> Self {
|
54
|
+
Self {
|
55
|
+
ruby,
|
56
|
+
inner: input,
|
57
|
+
buffer: Some(vec![0; READ_BUFFER_SIZE]),
|
58
|
+
offset: 0,
|
59
|
+
buffered_bytes: 0,
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
fn read_from_buffer(&mut self, to_buf: &mut [u8]) -> Option<io::Result<usize>> {
|
64
|
+
if let Some(from_buf) = &self.buffer {
|
65
|
+
// If the offset is within the buffered bytes, copy the remaining bytes to the output buffer
|
66
|
+
if self.offset < self.buffered_bytes {
|
67
|
+
let remaining = self.buffered_bytes - self.offset;
|
68
|
+
let copy_size = remaining.min(to_buf.len());
|
69
|
+
to_buf[..copy_size]
|
70
|
+
.copy_from_slice(&from_buf[self.offset..self.offset + copy_size]);
|
71
|
+
self.offset += copy_size;
|
72
|
+
Some(Ok(copy_size))
|
73
|
+
} else {
|
74
|
+
None
|
75
|
+
}
|
76
|
+
} else {
|
77
|
+
None
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
fn read_from_ruby(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
82
|
+
let buffer = self.buffer.as_mut().unwrap();
|
83
|
+
let result = self
|
84
|
+
.inner
|
85
|
+
.funcall::<_, _, RString>("read", (buffer.capacity(),))
|
86
|
+
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
87
|
+
|
88
|
+
if result.is_nil() {
|
89
|
+
return Ok(0); // EOF
|
90
|
+
}
|
91
|
+
|
92
|
+
let bytes = unsafe { result.as_slice() };
|
93
|
+
|
94
|
+
// Update internal buffer
|
95
|
+
let bytes_len = bytes.len();
|
96
|
+
if bytes_len == 0 {
|
97
|
+
return Ok(0);
|
98
|
+
}
|
99
|
+
|
100
|
+
// Only copy what we actually read
|
101
|
+
buffer[..bytes_len].copy_from_slice(bytes);
|
102
|
+
self.buffered_bytes = bytes_len;
|
103
|
+
|
104
|
+
// Copy to output buffer
|
105
|
+
let copy_size = bytes_len.min(buf.len());
|
106
|
+
buf[..copy_size].copy_from_slice(&buffer[..copy_size]);
|
107
|
+
self.offset = copy_size;
|
108
|
+
Ok(copy_size)
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
impl<'a> RubyReader<'a, RString> {
|
113
|
+
pub fn from_string_io(
|
114
|
+
ruby: &'a Ruby,
|
115
|
+
input: Value,
|
116
|
+
) -> Result<Box<dyn Read + 'a>, magnus::Error> {
|
117
|
+
if !Self::is_string_io(ruby, &input) {
|
118
|
+
return Err(magnus::Error::new(
|
119
|
+
magnus::exception::type_error(),
|
120
|
+
"Input is not a StringIO",
|
121
|
+
));
|
122
|
+
}
|
123
|
+
|
124
|
+
let string_content = input.funcall::<_, _, RString>("string", ()).unwrap();
|
125
|
+
Ok(Box::new(Self {
|
126
|
+
ruby,
|
127
|
+
inner: string_content,
|
128
|
+
buffer: None,
|
129
|
+
offset: 0,
|
130
|
+
buffered_bytes: 0,
|
131
|
+
}))
|
132
|
+
}
|
133
|
+
|
134
|
+
fn is_string_io(ruby: &Ruby, input: &Value) -> bool {
|
135
|
+
let string_io_class = STRING_IO_CLASS.get_or_init(|| {
|
136
|
+
let class = RClass::from_value(ruby.eval("StringIO").unwrap()).unwrap();
|
137
|
+
Opaque::from(class)
|
138
|
+
});
|
139
|
+
input.is_kind_of(ruby.get_inner(*string_io_class))
|
140
|
+
}
|
141
|
+
|
142
|
+
fn from_string_like(ruby: &'a Ruby, input: Value) -> Result<Box<dyn Read + 'a>, magnus::Error> {
|
143
|
+
// Try calling `to_str`, and if that fails, try `to_s`
|
144
|
+
let string_content = input
|
145
|
+
.funcall::<_, _, RString>("to_str", ())
|
146
|
+
.or_else(|_| input.funcall::<_, _, RString>("to_s", ()))?;
|
147
|
+
Ok(Box::new(Self {
|
148
|
+
ruby,
|
149
|
+
inner: string_content,
|
150
|
+
buffer: None,
|
151
|
+
offset: 0,
|
152
|
+
buffered_bytes: 0,
|
153
|
+
}))
|
154
|
+
}
|
155
|
+
}
|
156
|
+
|
157
|
+
impl<'a> Read for RubyReader<'a, Value> {
|
158
|
+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
159
|
+
if let Some(result) = self.read_from_buffer(buf) {
|
160
|
+
result
|
161
|
+
} else {
|
162
|
+
// If the buffer is empty, read from Ruby
|
163
|
+
self.read_from_ruby(buf)
|
164
|
+
}
|
165
|
+
}
|
166
|
+
}
|
167
|
+
|
168
|
+
impl<'a> Read for RubyReader<'a, RString> {
|
169
|
+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
170
|
+
let string_buffer = unsafe { self.inner.as_slice() };
|
171
|
+
if self.offset >= string_buffer.len() {
|
172
|
+
return Ok(0); // EOF
|
173
|
+
}
|
174
|
+
|
175
|
+
let remaining = string_buffer.len() - self.offset;
|
176
|
+
let copy_size = remaining.min(buf.len());
|
177
|
+
buf[..copy_size].copy_from_slice(&string_buffer[self.offset..self.offset + copy_size]);
|
178
|
+
self.offset += copy_size;
|
179
|
+
Ok(copy_size)
|
180
|
+
}
|
181
|
+
}
|
data/ext/osv/src/reader.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use crate::csv::{CsvRecord, RecordReaderBuilder};
|
1
|
+
use crate::csv::{CowValue, CsvRecord, RecordReaderBuilder};
|
2
2
|
use crate::utils::*;
|
3
3
|
use csv::Trim;
|
4
4
|
use magnus::value::ReprValue;
|
@@ -6,11 +6,12 @@ use magnus::{block::Yield, Error, KwArgs, RHash, Ruby, Symbol, Value};
|
|
6
6
|
use std::collections::HashMap;
|
7
7
|
use xxhash_rust::xxh3::Xxh3Builder;
|
8
8
|
|
9
|
-
pub fn parse_csv(
|
9
|
+
pub fn parse_csv<'a>(
|
10
10
|
rb_self: Value,
|
11
11
|
args: &[Value],
|
12
|
-
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<Xxh3Builder>>>>, Error> {
|
13
|
-
let
|
12
|
+
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, Xxh3Builder>>>>, Error> {
|
13
|
+
let original = unsafe { Ruby::get_unchecked() };
|
14
|
+
let ruby: &'static Ruby = Box::leak(Box::new(original));
|
14
15
|
|
15
16
|
let CsvArgs {
|
16
17
|
to_read,
|
@@ -25,6 +26,10 @@ pub fn parse_csv(
|
|
25
26
|
trim,
|
26
27
|
} = parse_csv_args(&ruby, args)?;
|
27
28
|
|
29
|
+
let flexible_default: &'static Option<String> = Box::leak(Box::new(flexible_default));
|
30
|
+
let leaked_flexible_default: &'static Option<&str> =
|
31
|
+
Box::leak(Box::new(flexible_default.as_deref()));
|
32
|
+
|
28
33
|
if !ruby.block_given() {
|
29
34
|
return create_enumerator(EnumeratorArgs {
|
30
35
|
rb_self,
|
@@ -36,7 +41,7 @@ pub fn parse_csv(
|
|
36
41
|
buffer_size,
|
37
42
|
result_type,
|
38
43
|
flexible,
|
39
|
-
flexible_default,
|
44
|
+
flexible_default: leaked_flexible_default.as_deref(),
|
40
45
|
trim: match trim {
|
41
46
|
Trim::All => Some("all".to_string()),
|
42
47
|
Trim::Headers => Some("headers".to_string()),
|
@@ -47,32 +52,32 @@ pub fn parse_csv(
|
|
47
52
|
}
|
48
53
|
|
49
54
|
let iter: Box<dyn Iterator<Item = CsvRecord<Xxh3Builder>>> = match result_type.as_str() {
|
50
|
-
"hash" =>
|
51
|
-
|
52
|
-
|
53
|
-
)
|
55
|
+
"hash" => {
|
56
|
+
let builder = RecordReaderBuilder::<
|
57
|
+
HashMap<&'static str, Option<CowValue<'static>>, Xxh3Builder>,
|
58
|
+
>::new(&ruby, to_read)
|
54
59
|
.has_headers(has_headers)
|
55
60
|
.flexible(flexible)
|
56
|
-
.flexible_default(flexible_default)
|
61
|
+
.flexible_default(flexible_default.as_deref())
|
57
62
|
.trim(trim)
|
58
63
|
.delimiter(delimiter)
|
59
64
|
.quote_char(quote_char)
|
60
65
|
.null_string(null_string)
|
61
|
-
.buffer(buffer_size)
|
62
|
-
|
63
|
-
.map(CsvRecord::Map)
|
64
|
-
|
66
|
+
.buffer(buffer_size);
|
67
|
+
|
68
|
+
Box::new(builder.build_threaded()?.map(CsvRecord::Map))
|
69
|
+
}
|
65
70
|
"array" => Box::new(
|
66
|
-
RecordReaderBuilder::<Vec<Option<
|
71
|
+
RecordReaderBuilder::<Vec<Option<CowValue<'static>>>>::new(&ruby, to_read)
|
67
72
|
.has_headers(has_headers)
|
68
73
|
.flexible(flexible)
|
69
|
-
.flexible_default(flexible_default)
|
74
|
+
.flexible_default(flexible_default.as_deref())
|
70
75
|
.trim(trim)
|
71
76
|
.delimiter(delimiter)
|
72
77
|
.quote_char(quote_char)
|
73
78
|
.null_string(null_string)
|
74
79
|
.buffer(buffer_size)
|
75
|
-
.
|
80
|
+
.build_threaded()?
|
76
81
|
.map(CsvRecord::Vec),
|
77
82
|
),
|
78
83
|
_ => {
|
@@ -96,13 +101,13 @@ struct EnumeratorArgs {
|
|
96
101
|
buffer_size: usize,
|
97
102
|
result_type: String,
|
98
103
|
flexible: bool,
|
99
|
-
flexible_default: Option
|
104
|
+
flexible_default: Option<&'static str>,
|
100
105
|
trim: Option<String>,
|
101
106
|
}
|
102
107
|
|
103
108
|
fn create_enumerator(
|
104
109
|
args: EnumeratorArgs,
|
105
|
-
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<Xxh3Builder>>>>, Error> {
|
110
|
+
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, Xxh3Builder>>>>, Error> {
|
106
111
|
let kwargs = RHash::new();
|
107
112
|
kwargs.aset(Symbol::new("has_headers"), args.has_headers)?;
|
108
113
|
kwargs.aset(
|
data/lib/osv/version.rb
CHANGED