osv 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +39 -0
- data/ext/osv/Cargo.toml +1 -0
- data/ext/osv/src/lib.rs +6 -168
- data/ext/osv/src/reader.rs +230 -0
- data/ext/osv/src/utils.rs +20 -0
- data/lib/osv/version.rb +1 -1
- data/lib/osv.rbi +29 -0
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a1b0c347b0bab5c9d31069c56f47999bfa51e85dfc1e127d1c4474a84ac19c53
|
|
4
|
+
data.tar.gz: 847b199da27b7c1329c1fa64fc8636592f004e93a12fc2ddd8db6127298ac23d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 62fa77c1ca98031f483569a4dba7cf9e4eca52a4b5fae293d274d5f89c48003e301eab01d95116cfc9cc6a2642e742d16046231a21d25e4a5143bd6ec3b40dac
|
|
7
|
+
data.tar.gz: 3832cbb6ebadfc718a8a5d1963de960ed3abf09d4559d2f0ffe031c642a6c2581dc6ad6edf5d65f22248812585ca464916375503753831024fc355fe4cd04455
|
data/Cargo.lock
CHANGED
|
@@ -11,6 +11,12 @@ dependencies = [
|
|
|
11
11
|
"memchr",
|
|
12
12
|
]
|
|
13
13
|
|
|
14
|
+
[[package]]
|
|
15
|
+
name = "autocfg"
|
|
16
|
+
version = "1.4.0"
|
|
17
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
18
|
+
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
|
19
|
+
|
|
14
20
|
[[package]]
|
|
15
21
|
name = "bindgen"
|
|
16
22
|
version = "0.69.5"
|
|
@@ -90,6 +96,12 @@ version = "1.13.0"
|
|
|
90
96
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
91
97
|
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
|
92
98
|
|
|
99
|
+
[[package]]
|
|
100
|
+
name = "futures-core"
|
|
101
|
+
version = "0.3.31"
|
|
102
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
103
|
+
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
|
|
104
|
+
|
|
93
105
|
[[package]]
|
|
94
106
|
name = "glob"
|
|
95
107
|
version = "0.3.1"
|
|
@@ -111,6 +123,16 @@ version = "1.0.14"
|
|
|
111
123
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
112
124
|
checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
|
|
113
125
|
|
|
126
|
+
[[package]]
|
|
127
|
+
name = "kanal"
|
|
128
|
+
version = "0.1.0-pre8"
|
|
129
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
130
|
+
checksum = "b05d55519627edaf7fd0f29981f6dc03fb52df3f5b257130eb8d0bf2801ea1d7"
|
|
131
|
+
dependencies = [
|
|
132
|
+
"futures-core",
|
|
133
|
+
"lock_api",
|
|
134
|
+
]
|
|
135
|
+
|
|
114
136
|
[[package]]
|
|
115
137
|
name = "lazy_static"
|
|
116
138
|
version = "1.5.0"
|
|
@@ -139,6 +161,16 @@ dependencies = [
|
|
|
139
161
|
"windows-targets",
|
|
140
162
|
]
|
|
141
163
|
|
|
164
|
+
[[package]]
|
|
165
|
+
name = "lock_api"
|
|
166
|
+
version = "0.4.12"
|
|
167
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
168
|
+
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
|
|
169
|
+
dependencies = [
|
|
170
|
+
"autocfg",
|
|
171
|
+
"scopeguard",
|
|
172
|
+
]
|
|
173
|
+
|
|
142
174
|
[[package]]
|
|
143
175
|
name = "magnus"
|
|
144
176
|
version = "0.6.4"
|
|
@@ -201,6 +233,7 @@ name = "osv"
|
|
|
201
233
|
version = "0.1.0"
|
|
202
234
|
dependencies = [
|
|
203
235
|
"csv",
|
|
236
|
+
"kanal",
|
|
204
237
|
"magnus 0.7.1",
|
|
205
238
|
"rb-sys",
|
|
206
239
|
"serde",
|
|
@@ -296,6 +329,12 @@ version = "1.0.18"
|
|
|
296
329
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
297
330
|
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
|
|
298
331
|
|
|
332
|
+
[[package]]
|
|
333
|
+
name = "scopeguard"
|
|
334
|
+
version = "1.2.0"
|
|
335
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
336
|
+
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
|
337
|
+
|
|
299
338
|
[[package]]
|
|
300
339
|
name = "seq-macro"
|
|
301
340
|
version = "0.3.5"
|
data/ext/osv/Cargo.toml
CHANGED
data/ext/osv/src/lib.rs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
};
|
|
1
|
+
mod reader;
|
|
2
|
+
mod utils;
|
|
3
|
+
|
|
4
|
+
use crate::reader::*;
|
|
5
|
+
|
|
6
|
+
use magnus::{Error, Ruby};
|
|
7
7
|
|
|
8
8
|
/// Initializes the Ruby extension and defines methods.
|
|
9
9
|
#[magnus::init]
|
|
@@ -13,165 +13,3 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
13
13
|
module.define_module_function("for_each_compat", magnus::method!(parse_compat, -1))?;
|
|
14
14
|
Ok(())
|
|
15
15
|
}
|
|
16
|
-
|
|
17
|
-
/// Helper function to get a readable from either an IO object or a file path
|
|
18
|
-
fn get_readable(ruby: &Ruby, to_read: Value) -> Result<Box<dyn Read>, Error> {
|
|
19
|
-
if to_read.is_kind_of(ruby.class_io()) {
|
|
20
|
-
let reader = RubyIOReader::new(ruby, to_read)?;
|
|
21
|
-
Ok(Box::new(reader))
|
|
22
|
-
} else {
|
|
23
|
-
let path = to_read.to_r_string()?.to_string()?;
|
|
24
|
-
let file = std::fs::File::open(&path).map_err(|e| {
|
|
25
|
-
Error::new(
|
|
26
|
-
ruby.exception_runtime_error(),
|
|
27
|
-
format!("Failed to open file: {}", e),
|
|
28
|
-
)
|
|
29
|
-
})?;
|
|
30
|
-
Ok(Box::new(file))
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
/// Helper function to create a CSV reader with the given configuration
|
|
35
|
-
fn create_csv_reader(
|
|
36
|
-
ruby: &Ruby,
|
|
37
|
-
to_read: Value,
|
|
38
|
-
has_headers: bool,
|
|
39
|
-
delimiter: Option<String>,
|
|
40
|
-
) -> Result<csv::Reader<Box<dyn Read>>, Error> {
|
|
41
|
-
let readable = get_readable(ruby, to_read)?;
|
|
42
|
-
let delimiter = delimiter.unwrap_or_else(|| ",".to_string());
|
|
43
|
-
|
|
44
|
-
let rdr = csv::ReaderBuilder::new()
|
|
45
|
-
.has_headers(has_headers)
|
|
46
|
-
.delimiter(delimiter.as_bytes()[0])
|
|
47
|
-
.from_reader(readable);
|
|
48
|
-
|
|
49
|
-
Ok(rdr)
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/// Common setup for CSV parsing, returns the reader and headers
|
|
53
|
-
fn setup_csv_parser(
|
|
54
|
-
ruby: &Ruby,
|
|
55
|
-
to_read: Value,
|
|
56
|
-
has_headers: bool,
|
|
57
|
-
delimiter: Option<String>,
|
|
58
|
-
) -> Result<(csv::Reader<Box<dyn Read>>, Vec<String>), Error> {
|
|
59
|
-
let mut rdr = create_csv_reader(ruby, to_read, has_headers, delimiter)?;
|
|
60
|
-
|
|
61
|
-
let first_row = rdr.headers().unwrap().clone();
|
|
62
|
-
let num_fields = first_row.len();
|
|
63
|
-
|
|
64
|
-
let headers = if has_headers {
|
|
65
|
-
first_row.iter().map(|h| h.to_string()).collect()
|
|
66
|
-
} else {
|
|
67
|
-
(0..num_fields).map(|i| format!("c{}", i)).collect()
|
|
68
|
-
};
|
|
69
|
-
|
|
70
|
-
Ok((rdr, headers))
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
/// Parse common arguments for CSV parsing
|
|
74
|
-
fn parse_csv_args(args: &[Value]) -> Result<(Value, bool, Option<String>), Error> {
|
|
75
|
-
let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
|
|
76
|
-
let (to_read,) = parsed_args.required;
|
|
77
|
-
|
|
78
|
-
let kwargs = get_kwargs::<_, (), (Option<bool>, Option<String>), ()>(
|
|
79
|
-
parsed_args.keywords,
|
|
80
|
-
&[],
|
|
81
|
-
&["has_headers", "delimiter"],
|
|
82
|
-
)?;
|
|
83
|
-
|
|
84
|
-
let has_headers = kwargs.optional.0.unwrap_or(true);
|
|
85
|
-
|
|
86
|
-
Ok((to_read, has_headers, kwargs.optional.1))
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
/// Parses CSV data from a file and yields each row as a hash to the block.
|
|
90
|
-
fn parse_csv(
|
|
91
|
-
ruby: &Ruby,
|
|
92
|
-
rb_self: Value,
|
|
93
|
-
args: &[Value],
|
|
94
|
-
) -> Result<Yield<impl Iterator<Item = std::collections::HashMap<String, String>>>, Error> {
|
|
95
|
-
if !ruby.block_given() {
|
|
96
|
-
return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
|
|
100
|
-
let (rdr, headers) = setup_csv_parser(ruby, to_read, has_headers, delimiter)?;
|
|
101
|
-
|
|
102
|
-
let iter = rdr.into_records().filter_map(move |result| {
|
|
103
|
-
let record = result.ok()?;
|
|
104
|
-
let mut hash = std::collections::HashMap::new();
|
|
105
|
-
for (header, field) in headers.iter().zip(record.iter()) {
|
|
106
|
-
hash.insert(header.to_string(), field.to_string());
|
|
107
|
-
}
|
|
108
|
-
Some(hash)
|
|
109
|
-
});
|
|
110
|
-
|
|
111
|
-
Ok(Yield::Iter(iter))
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
fn parse_compat(
|
|
115
|
-
ruby: &Ruby,
|
|
116
|
-
rb_self: Value,
|
|
117
|
-
args: &[Value],
|
|
118
|
-
) -> Result<Yield<impl Iterator<Item = Vec<String>>>, Error> {
|
|
119
|
-
if !ruby.block_given() {
|
|
120
|
-
return Ok(Yield::Enumerator(
|
|
121
|
-
rb_self.enumeratorize("for_each_compat", args),
|
|
122
|
-
));
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
|
|
126
|
-
let (rdr, _) = setup_csv_parser(ruby, to_read, has_headers, delimiter)?;
|
|
127
|
-
|
|
128
|
-
let iter = rdr.into_records().filter_map(|result| {
|
|
129
|
-
result
|
|
130
|
-
.ok()
|
|
131
|
-
.map(|record| record.iter().map(|field| field.to_string()).collect())
|
|
132
|
-
});
|
|
133
|
-
|
|
134
|
-
Ok(Yield::Iter(iter))
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
use std::io::Read;
|
|
138
|
-
|
|
139
|
-
struct RubyIOReader {
|
|
140
|
-
io_obj: Value,
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
impl Read for RubyIOReader {
|
|
144
|
-
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
|
145
|
-
let result: RString = self.io_obj.funcall("read", (buf.len(),)).map_err(|_| {
|
|
146
|
-
std::io::Error::new(std::io::ErrorKind::Other, "Failed to read from IO")
|
|
147
|
-
})?;
|
|
148
|
-
|
|
149
|
-
// Handle EOF case
|
|
150
|
-
if result.is_nil() {
|
|
151
|
-
return Ok(0);
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
let rust_string = result.to_string().map_err(|_| {
|
|
155
|
-
std::io::Error::new(std::io::ErrorKind::Other, "Failed to convert to string")
|
|
156
|
-
})?;
|
|
157
|
-
let bytes = rust_string.as_bytes();
|
|
158
|
-
|
|
159
|
-
let bytes_to_copy = rust_string.len().min(buf.len());
|
|
160
|
-
buf[..bytes_to_copy].copy_from_slice(&bytes[..bytes_to_copy]);
|
|
161
|
-
|
|
162
|
-
Ok(bytes_to_copy)
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
impl RubyIOReader {
|
|
167
|
-
fn new(ruby: &Ruby, value: Value) -> Result<Self, Error> {
|
|
168
|
-
if value.is_kind_of(ruby.class_io()) {
|
|
169
|
-
Ok(RubyIOReader { io_obj: value })
|
|
170
|
-
} else {
|
|
171
|
-
Err(Error::new(
|
|
172
|
-
ruby.exception_runtime_error(),
|
|
173
|
-
"IO object is not a valid IO object",
|
|
174
|
-
))
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
}
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
use crate::utils::*;
|
|
2
|
+
use magnus::{
|
|
3
|
+
block::Yield, rb_sys::AsRawValue, value::ReprValue, Error, RClass, RString, Ruby, Value,
|
|
4
|
+
};
|
|
5
|
+
use std::{fs::File, io::Read, os::fd::FromRawFd, thread};
|
|
6
|
+
|
|
7
|
+
/// Parses CSV data from a file and yields each row as a hash to the block.
|
|
8
|
+
pub fn parse_csv(
|
|
9
|
+
ruby: &Ruby,
|
|
10
|
+
rb_self: Value,
|
|
11
|
+
args: &[Value],
|
|
12
|
+
) -> Result<Yield<impl Iterator<Item = std::collections::HashMap<String, String>>>, Error> {
|
|
13
|
+
if !ruby.block_given() {
|
|
14
|
+
return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
|
|
15
|
+
}
|
|
16
|
+
let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
|
|
17
|
+
|
|
18
|
+
let iter = RecordReader::<std::collections::HashMap<String, String>>::new(
|
|
19
|
+
ruby,
|
|
20
|
+
to_read,
|
|
21
|
+
has_headers,
|
|
22
|
+
delimiter.unwrap_or_else(|| ",".to_string()).as_bytes()[0],
|
|
23
|
+
1000,
|
|
24
|
+
)?;
|
|
25
|
+
|
|
26
|
+
Ok(Yield::Iter(iter))
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
pub fn parse_compat(
|
|
30
|
+
ruby: &Ruby,
|
|
31
|
+
rb_self: Value,
|
|
32
|
+
args: &[Value],
|
|
33
|
+
) -> Result<Yield<impl Iterator<Item = Vec<String>>>, Error> {
|
|
34
|
+
if !ruby.block_given() {
|
|
35
|
+
return Ok(Yield::Enumerator(
|
|
36
|
+
rb_self.enumeratorize("for_each_compat", args),
|
|
37
|
+
));
|
|
38
|
+
}
|
|
39
|
+
let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
|
|
40
|
+
|
|
41
|
+
let iter = RecordReader::<Vec<String>>::new(
|
|
42
|
+
ruby,
|
|
43
|
+
to_read,
|
|
44
|
+
has_headers,
|
|
45
|
+
delimiter.unwrap_or_else(|| ",".to_string()).as_bytes()[0],
|
|
46
|
+
1000,
|
|
47
|
+
)?;
|
|
48
|
+
|
|
49
|
+
Ok(Yield::Iter(iter))
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
pub trait RecordParser {
|
|
53
|
+
type Output;
|
|
54
|
+
|
|
55
|
+
fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
impl RecordParser for std::collections::HashMap<String, String> {
|
|
59
|
+
type Output = Self;
|
|
60
|
+
|
|
61
|
+
fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output {
|
|
62
|
+
record
|
|
63
|
+
.iter()
|
|
64
|
+
.enumerate()
|
|
65
|
+
.map(|(i, field)| (headers[i].clone(), field.to_string()))
|
|
66
|
+
.collect()
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
impl RecordParser for Vec<String> {
|
|
71
|
+
type Output = Self;
|
|
72
|
+
|
|
73
|
+
fn parse(_headers: &[String], record: &csv::StringRecord) -> Self::Output {
|
|
74
|
+
record.iter().map(|field| field.to_string()).collect()
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
struct RecordReader<T: RecordParser> {
|
|
79
|
+
reader: ReadImpl<T>,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
#[allow(dead_code)]
|
|
83
|
+
enum ReadImpl<T: RecordParser> {
|
|
84
|
+
SingleThreaded {
|
|
85
|
+
reader: csv::Reader<Box<dyn Read + Send + 'static>>,
|
|
86
|
+
headers: Vec<String>,
|
|
87
|
+
},
|
|
88
|
+
MultiThreaded {
|
|
89
|
+
receiver: kanal::Receiver<T::Output>,
|
|
90
|
+
handle: Option<thread::JoinHandle<()>>,
|
|
91
|
+
},
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
impl<T: RecordParser + Send + 'static> RecordReader<T> {
|
|
95
|
+
fn new(
|
|
96
|
+
ruby: &Ruby,
|
|
97
|
+
to_read: Value,
|
|
98
|
+
has_headers: bool,
|
|
99
|
+
delimiter: u8,
|
|
100
|
+
buffer: usize,
|
|
101
|
+
) -> Result<Self, Error> {
|
|
102
|
+
let string_io = RClass::from(ruby.eval("StringIO").map_err(|e| {
|
|
103
|
+
Error::new(
|
|
104
|
+
ruby.exception_runtime_error(),
|
|
105
|
+
format!("Failed to get StringIO class: {}", e),
|
|
106
|
+
)
|
|
107
|
+
})?);
|
|
108
|
+
|
|
109
|
+
let readable: Box<dyn Read + Send + 'static> = if to_read.is_kind_of(string_io) {
|
|
110
|
+
let string: RString = to_read.funcall("string", ()).map_err(|e| {
|
|
111
|
+
Error::new(
|
|
112
|
+
ruby.exception_runtime_error(),
|
|
113
|
+
format!("Failed to get string from StringIO: {}", e),
|
|
114
|
+
)
|
|
115
|
+
})?;
|
|
116
|
+
let content = string.to_string().map_err(|e| {
|
|
117
|
+
Error::new(
|
|
118
|
+
ruby.exception_runtime_error(),
|
|
119
|
+
format!("Failed to convert string to Rust String: {}", e),
|
|
120
|
+
)
|
|
121
|
+
})?;
|
|
122
|
+
Box::new(std::io::Cursor::new(content))
|
|
123
|
+
} else if to_read.is_kind_of(ruby.class_io()) {
|
|
124
|
+
let fd = unsafe { rb_sys::rb_io_descriptor(to_read.as_raw()) };
|
|
125
|
+
let file = unsafe { File::from_raw_fd(fd) };
|
|
126
|
+
Box::new(file)
|
|
127
|
+
} else {
|
|
128
|
+
let path = to_read
|
|
129
|
+
.to_r_string()
|
|
130
|
+
.map_err(|e| {
|
|
131
|
+
Error::new(
|
|
132
|
+
ruby.exception_runtime_error(),
|
|
133
|
+
format!("Failed to convert path to string: {}", e),
|
|
134
|
+
)
|
|
135
|
+
})?
|
|
136
|
+
.to_string()
|
|
137
|
+
.map_err(|e| {
|
|
138
|
+
Error::new(
|
|
139
|
+
ruby.exception_runtime_error(),
|
|
140
|
+
format!("Failed to convert RString to Rust String: {}", e),
|
|
141
|
+
)
|
|
142
|
+
})?;
|
|
143
|
+
let file = std::fs::File::open(&path).map_err(|e| {
|
|
144
|
+
Error::new(
|
|
145
|
+
ruby.exception_runtime_error(),
|
|
146
|
+
format!("Failed to open file: {}", e),
|
|
147
|
+
)
|
|
148
|
+
})?;
|
|
149
|
+
Box::new(file)
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
let mut reader = csv::ReaderBuilder::new()
|
|
153
|
+
.has_headers(has_headers)
|
|
154
|
+
.delimiter(delimiter)
|
|
155
|
+
.from_reader(readable);
|
|
156
|
+
|
|
157
|
+
let headers = Self::get_headers(&mut reader, has_headers)?;
|
|
158
|
+
let headers_clone = headers.clone();
|
|
159
|
+
|
|
160
|
+
let (sender, receiver) = kanal::bounded(buffer);
|
|
161
|
+
let handle = thread::spawn(move || {
|
|
162
|
+
let mut record = csv::StringRecord::new();
|
|
163
|
+
while let Ok(read) = reader.read_record(&mut record) {
|
|
164
|
+
if !read {
|
|
165
|
+
let file_to_forget = reader.into_inner();
|
|
166
|
+
std::mem::forget(file_to_forget);
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
let row = T::parse(&headers_clone, &record);
|
|
170
|
+
if sender.send(row).is_err() {
|
|
171
|
+
break;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
let read_impl = ReadImpl::MultiThreaded {
|
|
177
|
+
receiver,
|
|
178
|
+
handle: Some(handle),
|
|
179
|
+
};
|
|
180
|
+
|
|
181
|
+
Ok(Self { reader: read_impl })
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
fn get_headers(
|
|
185
|
+
reader: &mut csv::Reader<impl Read>,
|
|
186
|
+
has_headers: bool,
|
|
187
|
+
) -> Result<Vec<String>, Error> {
|
|
188
|
+
let first_row = reader
|
|
189
|
+
.headers()
|
|
190
|
+
.map_err(|e| {
|
|
191
|
+
Error::new(
|
|
192
|
+
magnus::exception::runtime_error(),
|
|
193
|
+
format!("Failed to read headers: {}", e),
|
|
194
|
+
)
|
|
195
|
+
})?
|
|
196
|
+
.clone();
|
|
197
|
+
let num_fields = first_row.len();
|
|
198
|
+
|
|
199
|
+
Ok(if has_headers {
|
|
200
|
+
first_row.iter().map(|h| h.to_string()).collect()
|
|
201
|
+
} else {
|
|
202
|
+
(0..num_fields).map(|i| format!("c{}", i)).collect()
|
|
203
|
+
})
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
impl<T: RecordParser> Iterator for RecordReader<T> {
|
|
208
|
+
type Item = T::Output;
|
|
209
|
+
|
|
210
|
+
fn next(&mut self) -> Option<Self::Item> {
|
|
211
|
+
match &mut self.reader {
|
|
212
|
+
ReadImpl::MultiThreaded { receiver, handle } => match receiver.recv() {
|
|
213
|
+
Ok(record) => Some(record),
|
|
214
|
+
Err(_) => {
|
|
215
|
+
if let Some(handle) = handle.take() {
|
|
216
|
+
let _ = handle.join();
|
|
217
|
+
}
|
|
218
|
+
None
|
|
219
|
+
}
|
|
220
|
+
},
|
|
221
|
+
ReadImpl::SingleThreaded { reader, headers } => {
|
|
222
|
+
let mut record = csv::StringRecord::new();
|
|
223
|
+
match reader.read_record(&mut record) {
|
|
224
|
+
Ok(true) => Some(T::parse(headers, &record)),
|
|
225
|
+
_ => None,
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
use magnus::{
|
|
2
|
+
scan_args::{get_kwargs, scan_args},
|
|
3
|
+
Error, Value,
|
|
4
|
+
};
|
|
5
|
+
|
|
6
|
+
/// Parse common arguments for CSV parsing
|
|
7
|
+
pub fn parse_csv_args(args: &[Value]) -> Result<(Value, bool, Option<String>), Error> {
|
|
8
|
+
let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
|
|
9
|
+
let (to_read,) = parsed_args.required;
|
|
10
|
+
|
|
11
|
+
let kwargs = get_kwargs::<_, (), (Option<bool>, Option<String>), ()>(
|
|
12
|
+
parsed_args.keywords,
|
|
13
|
+
&[],
|
|
14
|
+
&["has_headers", "delimiter"],
|
|
15
|
+
)?;
|
|
16
|
+
|
|
17
|
+
let has_headers = kwargs.optional.0.unwrap_or(true);
|
|
18
|
+
|
|
19
|
+
Ok((to_read, has_headers, kwargs.optional.1))
|
|
20
|
+
}
|
data/lib/osv/version.rb
CHANGED
data/lib/osv.rbi
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# typed: strict
|
|
2
|
+
|
|
3
|
+
module OSV
|
|
4
|
+
sig do
|
|
5
|
+
type_parameters(:T)
|
|
6
|
+
.params(
|
|
7
|
+
input: T.any(String, StringIO, IO),
|
|
8
|
+
has_headers: T.nilable(T::Boolean),
|
|
9
|
+
delimiter: T.nilable(String),
|
|
10
|
+
blk: T.proc.params(row: T::Hash[String, String]).void
|
|
11
|
+
)
|
|
12
|
+
.returns(T.untyped)
|
|
13
|
+
end
|
|
14
|
+
def self.for_each(input, has_headers: true, delimiter: nil, &blk)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
sig do
|
|
18
|
+
type_parameters(:T)
|
|
19
|
+
.params(
|
|
20
|
+
input: T.any(String, StringIO, IO),
|
|
21
|
+
has_headers: T.nilable(T::Boolean),
|
|
22
|
+
delimiter: T.nilable(String),
|
|
23
|
+
blk: T.proc.params(row: T::Array[String]).void
|
|
24
|
+
)
|
|
25
|
+
.returns(T.untyped)
|
|
26
|
+
end
|
|
27
|
+
def self.for_each_compat(input, has_headers: true, delimiter: nil, &blk)
|
|
28
|
+
end
|
|
29
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: osv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Nathan Jaremko
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2024-12-
|
|
11
|
+
date: 2024-12-23 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rb_sys
|
|
@@ -54,7 +54,10 @@ files:
|
|
|
54
54
|
- ext/osv/Cargo.toml
|
|
55
55
|
- ext/osv/extconf.rb
|
|
56
56
|
- ext/osv/src/lib.rs
|
|
57
|
+
- ext/osv/src/reader.rs
|
|
58
|
+
- ext/osv/src/utils.rs
|
|
57
59
|
- lib/osv.rb
|
|
60
|
+
- lib/osv.rbi
|
|
58
61
|
- lib/osv/version.rb
|
|
59
62
|
homepage: https://github.com/njaremko/osv
|
|
60
63
|
licenses:
|