osv 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c67f4e48abcf76bbbe0fce6513b67464ed550287451f5fa2b4c6e8f83256fef5
4
- data.tar.gz: b38342f6e6c43050b45c99ea7f3fba4334423cb10d9df39b23ed054264b35e84
3
+ metadata.gz: a1b0c347b0bab5c9d31069c56f47999bfa51e85dfc1e127d1c4474a84ac19c53
4
+ data.tar.gz: 847b199da27b7c1329c1fa64fc8636592f004e93a12fc2ddd8db6127298ac23d
5
5
  SHA512:
6
- metadata.gz: 190d1cfafc93554e5096682f1ce3ec6e9d8049b2669c471db5b27d9190a36500465d798a07e279d73d6e8569c47eaa259f48644a8f1df70138c6f5fc7a826348
7
- data.tar.gz: f3c4ed3eac64aaf95e14bf7f5e60b84f2d698db586a2b8c5c6328389e0c067df298b070d4ac78eb91df444ebf23bad2e8ec74f5e7f3314f6ff0c8a248e52b263
6
+ metadata.gz: 62fa77c1ca98031f483569a4dba7cf9e4eca52a4b5fae293d274d5f89c48003e301eab01d95116cfc9cc6a2642e742d16046231a21d25e4a5143bd6ec3b40dac
7
+ data.tar.gz: 3832cbb6ebadfc718a8a5d1963de960ed3abf09d4559d2f0ffe031c642a6c2581dc6ad6edf5d65f22248812585ca464916375503753831024fc355fe4cd04455
data/Cargo.lock CHANGED
@@ -11,6 +11,12 @@ dependencies = [
11
11
  "memchr",
12
12
  ]
13
13
 
14
+ [[package]]
15
+ name = "autocfg"
16
+ version = "1.4.0"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
19
+
14
20
  [[package]]
15
21
  name = "bindgen"
16
22
  version = "0.69.5"
@@ -90,6 +96,12 @@ version = "1.13.0"
90
96
  source = "registry+https://github.com/rust-lang/crates.io-index"
91
97
  checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
92
98
 
99
+ [[package]]
100
+ name = "futures-core"
101
+ version = "0.3.31"
102
+ source = "registry+https://github.com/rust-lang/crates.io-index"
103
+ checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
104
+
93
105
  [[package]]
94
106
  name = "glob"
95
107
  version = "0.3.1"
@@ -111,6 +123,16 @@ version = "1.0.14"
111
123
  source = "registry+https://github.com/rust-lang/crates.io-index"
112
124
  checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
113
125
 
126
+ [[package]]
127
+ name = "kanal"
128
+ version = "0.1.0-pre8"
129
+ source = "registry+https://github.com/rust-lang/crates.io-index"
130
+ checksum = "b05d55519627edaf7fd0f29981f6dc03fb52df3f5b257130eb8d0bf2801ea1d7"
131
+ dependencies = [
132
+ "futures-core",
133
+ "lock_api",
134
+ ]
135
+
114
136
  [[package]]
115
137
  name = "lazy_static"
116
138
  version = "1.5.0"
@@ -139,6 +161,16 @@ dependencies = [
139
161
  "windows-targets",
140
162
  ]
141
163
 
164
+ [[package]]
165
+ name = "lock_api"
166
+ version = "0.4.12"
167
+ source = "registry+https://github.com/rust-lang/crates.io-index"
168
+ checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
169
+ dependencies = [
170
+ "autocfg",
171
+ "scopeguard",
172
+ ]
173
+
142
174
  [[package]]
143
175
  name = "magnus"
144
176
  version = "0.6.4"
@@ -201,6 +233,7 @@ name = "osv"
201
233
  version = "0.1.0"
202
234
  dependencies = [
203
235
  "csv",
236
+ "kanal",
204
237
  "magnus 0.7.1",
205
238
  "rb-sys",
206
239
  "serde",
@@ -296,6 +329,12 @@ version = "1.0.18"
296
329
  source = "registry+https://github.com/rust-lang/crates.io-index"
297
330
  checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
298
331
 
332
+ [[package]]
333
+ name = "scopeguard"
334
+ version = "1.2.0"
335
+ source = "registry+https://github.com/rust-lang/crates.io-index"
336
+ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
337
+
299
338
  [[package]]
300
339
  name = "seq-macro"
301
340
  version = "0.3.5"
data/ext/osv/Cargo.toml CHANGED
@@ -12,3 +12,4 @@ magnus = { version = "0.7", features = ["rb-sys"] }
12
12
  rb-sys = "0.9"
13
13
  serde = { version = "1.0", features = ["derive"] }
14
14
  serde_magnus = "0.8.1"
15
+ kanal = "0.1.0-pre8"
data/ext/osv/src/lib.rs CHANGED
@@ -1,9 +1,9 @@
1
- use magnus::{
2
- block::Yield,
3
- prelude::*,
4
- scan_args::{get_kwargs, scan_args},
5
- Error, RString, Ruby, Value,
6
- };
1
+ mod reader;
2
+ mod utils;
3
+
4
+ use crate::reader::*;
5
+
6
+ use magnus::{Error, Ruby};
7
7
 
8
8
  /// Initializes the Ruby extension and defines methods.
9
9
  #[magnus::init]
@@ -13,165 +13,3 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
13
13
  module.define_module_function("for_each_compat", magnus::method!(parse_compat, -1))?;
14
14
  Ok(())
15
15
  }
16
-
17
- /// Helper function to get a readable from either an IO object or a file path
18
- fn get_readable(ruby: &Ruby, to_read: Value) -> Result<Box<dyn Read>, Error> {
19
- if to_read.is_kind_of(ruby.class_io()) {
20
- let reader = RubyIOReader::new(ruby, to_read)?;
21
- Ok(Box::new(reader))
22
- } else {
23
- let path = to_read.to_r_string()?.to_string()?;
24
- let file = std::fs::File::open(&path).map_err(|e| {
25
- Error::new(
26
- ruby.exception_runtime_error(),
27
- format!("Failed to open file: {}", e),
28
- )
29
- })?;
30
- Ok(Box::new(file))
31
- }
32
- }
33
-
34
- /// Helper function to create a CSV reader with the given configuration
35
- fn create_csv_reader(
36
- ruby: &Ruby,
37
- to_read: Value,
38
- has_headers: bool,
39
- delimiter: Option<String>,
40
- ) -> Result<csv::Reader<Box<dyn Read>>, Error> {
41
- let readable = get_readable(ruby, to_read)?;
42
- let delimiter = delimiter.unwrap_or_else(|| ",".to_string());
43
-
44
- let rdr = csv::ReaderBuilder::new()
45
- .has_headers(has_headers)
46
- .delimiter(delimiter.as_bytes()[0])
47
- .from_reader(readable);
48
-
49
- Ok(rdr)
50
- }
51
-
52
- /// Common setup for CSV parsing, returns the reader and headers
53
- fn setup_csv_parser(
54
- ruby: &Ruby,
55
- to_read: Value,
56
- has_headers: bool,
57
- delimiter: Option<String>,
58
- ) -> Result<(csv::Reader<Box<dyn Read>>, Vec<String>), Error> {
59
- let mut rdr = create_csv_reader(ruby, to_read, has_headers, delimiter)?;
60
-
61
- let first_row = rdr.headers().unwrap().clone();
62
- let num_fields = first_row.len();
63
-
64
- let headers = if has_headers {
65
- first_row.iter().map(|h| h.to_string()).collect()
66
- } else {
67
- (0..num_fields).map(|i| format!("c{}", i)).collect()
68
- };
69
-
70
- Ok((rdr, headers))
71
- }
72
-
73
- /// Parse common arguments for CSV parsing
74
- fn parse_csv_args(args: &[Value]) -> Result<(Value, bool, Option<String>), Error> {
75
- let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
76
- let (to_read,) = parsed_args.required;
77
-
78
- let kwargs = get_kwargs::<_, (), (Option<bool>, Option<String>), ()>(
79
- parsed_args.keywords,
80
- &[],
81
- &["has_headers", "delimiter"],
82
- )?;
83
-
84
- let has_headers = kwargs.optional.0.unwrap_or(true);
85
-
86
- Ok((to_read, has_headers, kwargs.optional.1))
87
- }
88
-
89
- /// Parses CSV data from a file and yields each row as a hash to the block.
90
- fn parse_csv(
91
- ruby: &Ruby,
92
- rb_self: Value,
93
- args: &[Value],
94
- ) -> Result<Yield<impl Iterator<Item = std::collections::HashMap<String, String>>>, Error> {
95
- if !ruby.block_given() {
96
- return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
97
- }
98
-
99
- let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
100
- let (rdr, headers) = setup_csv_parser(ruby, to_read, has_headers, delimiter)?;
101
-
102
- let iter = rdr.into_records().filter_map(move |result| {
103
- let record = result.ok()?;
104
- let mut hash = std::collections::HashMap::new();
105
- for (header, field) in headers.iter().zip(record.iter()) {
106
- hash.insert(header.to_string(), field.to_string());
107
- }
108
- Some(hash)
109
- });
110
-
111
- Ok(Yield::Iter(iter))
112
- }
113
-
114
- fn parse_compat(
115
- ruby: &Ruby,
116
- rb_self: Value,
117
- args: &[Value],
118
- ) -> Result<Yield<impl Iterator<Item = Vec<String>>>, Error> {
119
- if !ruby.block_given() {
120
- return Ok(Yield::Enumerator(
121
- rb_self.enumeratorize("for_each_compat", args),
122
- ));
123
- }
124
-
125
- let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
126
- let (rdr, _) = setup_csv_parser(ruby, to_read, has_headers, delimiter)?;
127
-
128
- let iter = rdr.into_records().filter_map(|result| {
129
- result
130
- .ok()
131
- .map(|record| record.iter().map(|field| field.to_string()).collect())
132
- });
133
-
134
- Ok(Yield::Iter(iter))
135
- }
136
-
137
- use std::io::Read;
138
-
139
- struct RubyIOReader {
140
- io_obj: Value,
141
- }
142
-
143
- impl Read for RubyIOReader {
144
- fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
145
- let result: RString = self.io_obj.funcall("read", (buf.len(),)).map_err(|_| {
146
- std::io::Error::new(std::io::ErrorKind::Other, "Failed to read from IO")
147
- })?;
148
-
149
- // Handle EOF case
150
- if result.is_nil() {
151
- return Ok(0);
152
- }
153
-
154
- let rust_string = result.to_string().map_err(|_| {
155
- std::io::Error::new(std::io::ErrorKind::Other, "Failed to convert to string")
156
- })?;
157
- let bytes = rust_string.as_bytes();
158
-
159
- let bytes_to_copy = rust_string.len().min(buf.len());
160
- buf[..bytes_to_copy].copy_from_slice(&bytes[..bytes_to_copy]);
161
-
162
- Ok(bytes_to_copy)
163
- }
164
- }
165
-
166
- impl RubyIOReader {
167
- fn new(ruby: &Ruby, value: Value) -> Result<Self, Error> {
168
- if value.is_kind_of(ruby.class_io()) {
169
- Ok(RubyIOReader { io_obj: value })
170
- } else {
171
- Err(Error::new(
172
- ruby.exception_runtime_error(),
173
- "IO object is not a valid IO object",
174
- ))
175
- }
176
- }
177
- }
@@ -0,0 +1,230 @@
1
+ use crate::utils::*;
2
+ use magnus::{
3
+ block::Yield, rb_sys::AsRawValue, value::ReprValue, Error, RClass, RString, Ruby, Value,
4
+ };
5
+ use std::{fs::File, io::Read, os::fd::FromRawFd, thread};
6
+
7
+ /// Parses CSV data from a file and yields each row as a hash to the block.
8
+ pub fn parse_csv(
9
+ ruby: &Ruby,
10
+ rb_self: Value,
11
+ args: &[Value],
12
+ ) -> Result<Yield<impl Iterator<Item = std::collections::HashMap<String, String>>>, Error> {
13
+ if !ruby.block_given() {
14
+ return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
15
+ }
16
+ let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
17
+
18
+ let iter = RecordReader::<std::collections::HashMap<String, String>>::new(
19
+ ruby,
20
+ to_read,
21
+ has_headers,
22
+ delimiter.unwrap_or_else(|| ",".to_string()).as_bytes()[0],
23
+ 1000,
24
+ )?;
25
+
26
+ Ok(Yield::Iter(iter))
27
+ }
28
+
29
+ pub fn parse_compat(
30
+ ruby: &Ruby,
31
+ rb_self: Value,
32
+ args: &[Value],
33
+ ) -> Result<Yield<impl Iterator<Item = Vec<String>>>, Error> {
34
+ if !ruby.block_given() {
35
+ return Ok(Yield::Enumerator(
36
+ rb_self.enumeratorize("for_each_compat", args),
37
+ ));
38
+ }
39
+ let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
40
+
41
+ let iter = RecordReader::<Vec<String>>::new(
42
+ ruby,
43
+ to_read,
44
+ has_headers,
45
+ delimiter.unwrap_or_else(|| ",".to_string()).as_bytes()[0],
46
+ 1000,
47
+ )?;
48
+
49
+ Ok(Yield::Iter(iter))
50
+ }
51
+
52
+ pub trait RecordParser {
53
+ type Output;
54
+
55
+ fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output;
56
+ }
57
+
58
+ impl RecordParser for std::collections::HashMap<String, String> {
59
+ type Output = Self;
60
+
61
+ fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output {
62
+ record
63
+ .iter()
64
+ .enumerate()
65
+ .map(|(i, field)| (headers[i].clone(), field.to_string()))
66
+ .collect()
67
+ }
68
+ }
69
+
70
+ impl RecordParser for Vec<String> {
71
+ type Output = Self;
72
+
73
+ fn parse(_headers: &[String], record: &csv::StringRecord) -> Self::Output {
74
+ record.iter().map(|field| field.to_string()).collect()
75
+ }
76
+ }
77
+
78
+ struct RecordReader<T: RecordParser> {
79
+ reader: ReadImpl<T>,
80
+ }
81
+
82
+ #[allow(dead_code)]
83
+ enum ReadImpl<T: RecordParser> {
84
+ SingleThreaded {
85
+ reader: csv::Reader<Box<dyn Read + Send + 'static>>,
86
+ headers: Vec<String>,
87
+ },
88
+ MultiThreaded {
89
+ receiver: kanal::Receiver<T::Output>,
90
+ handle: Option<thread::JoinHandle<()>>,
91
+ },
92
+ }
93
+
94
+ impl<T: RecordParser + Send + 'static> RecordReader<T> {
95
+ fn new(
96
+ ruby: &Ruby,
97
+ to_read: Value,
98
+ has_headers: bool,
99
+ delimiter: u8,
100
+ buffer: usize,
101
+ ) -> Result<Self, Error> {
102
+ let string_io = RClass::from(ruby.eval("StringIO").map_err(|e| {
103
+ Error::new(
104
+ ruby.exception_runtime_error(),
105
+ format!("Failed to get StringIO class: {}", e),
106
+ )
107
+ })?);
108
+
109
+ let readable: Box<dyn Read + Send + 'static> = if to_read.is_kind_of(string_io) {
110
+ let string: RString = to_read.funcall("string", ()).map_err(|e| {
111
+ Error::new(
112
+ ruby.exception_runtime_error(),
113
+ format!("Failed to get string from StringIO: {}", e),
114
+ )
115
+ })?;
116
+ let content = string.to_string().map_err(|e| {
117
+ Error::new(
118
+ ruby.exception_runtime_error(),
119
+ format!("Failed to convert string to Rust String: {}", e),
120
+ )
121
+ })?;
122
+ Box::new(std::io::Cursor::new(content))
123
+ } else if to_read.is_kind_of(ruby.class_io()) {
124
+ let fd = unsafe { rb_sys::rb_io_descriptor(to_read.as_raw()) };
125
+ let file = unsafe { File::from_raw_fd(fd) };
126
+ Box::new(file)
127
+ } else {
128
+ let path = to_read
129
+ .to_r_string()
130
+ .map_err(|e| {
131
+ Error::new(
132
+ ruby.exception_runtime_error(),
133
+ format!("Failed to convert path to string: {}", e),
134
+ )
135
+ })?
136
+ .to_string()
137
+ .map_err(|e| {
138
+ Error::new(
139
+ ruby.exception_runtime_error(),
140
+ format!("Failed to convert RString to Rust String: {}", e),
141
+ )
142
+ })?;
143
+ let file = std::fs::File::open(&path).map_err(|e| {
144
+ Error::new(
145
+ ruby.exception_runtime_error(),
146
+ format!("Failed to open file: {}", e),
147
+ )
148
+ })?;
149
+ Box::new(file)
150
+ };
151
+
152
+ let mut reader = csv::ReaderBuilder::new()
153
+ .has_headers(has_headers)
154
+ .delimiter(delimiter)
155
+ .from_reader(readable);
156
+
157
+ let headers = Self::get_headers(&mut reader, has_headers)?;
158
+ let headers_clone = headers.clone();
159
+
160
+ let (sender, receiver) = kanal::bounded(buffer);
161
+ let handle = thread::spawn(move || {
162
+ let mut record = csv::StringRecord::new();
163
+ while let Ok(read) = reader.read_record(&mut record) {
164
+ if !read {
165
+ let file_to_forget = reader.into_inner();
166
+ std::mem::forget(file_to_forget);
167
+ break;
168
+ }
169
+ let row = T::parse(&headers_clone, &record);
170
+ if sender.send(row).is_err() {
171
+ break;
172
+ }
173
+ }
174
+ });
175
+
176
+ let read_impl = ReadImpl::MultiThreaded {
177
+ receiver,
178
+ handle: Some(handle),
179
+ };
180
+
181
+ Ok(Self { reader: read_impl })
182
+ }
183
+
184
+ fn get_headers(
185
+ reader: &mut csv::Reader<impl Read>,
186
+ has_headers: bool,
187
+ ) -> Result<Vec<String>, Error> {
188
+ let first_row = reader
189
+ .headers()
190
+ .map_err(|e| {
191
+ Error::new(
192
+ magnus::exception::runtime_error(),
193
+ format!("Failed to read headers: {}", e),
194
+ )
195
+ })?
196
+ .clone();
197
+ let num_fields = first_row.len();
198
+
199
+ Ok(if has_headers {
200
+ first_row.iter().map(|h| h.to_string()).collect()
201
+ } else {
202
+ (0..num_fields).map(|i| format!("c{}", i)).collect()
203
+ })
204
+ }
205
+ }
206
+
207
+ impl<T: RecordParser> Iterator for RecordReader<T> {
208
+ type Item = T::Output;
209
+
210
+ fn next(&mut self) -> Option<Self::Item> {
211
+ match &mut self.reader {
212
+ ReadImpl::MultiThreaded { receiver, handle } => match receiver.recv() {
213
+ Ok(record) => Some(record),
214
+ Err(_) => {
215
+ if let Some(handle) = handle.take() {
216
+ let _ = handle.join();
217
+ }
218
+ None
219
+ }
220
+ },
221
+ ReadImpl::SingleThreaded { reader, headers } => {
222
+ let mut record = csv::StringRecord::new();
223
+ match reader.read_record(&mut record) {
224
+ Ok(true) => Some(T::parse(headers, &record)),
225
+ _ => None,
226
+ }
227
+ }
228
+ }
229
+ }
230
+ }
@@ -0,0 +1,20 @@
1
+ use magnus::{
2
+ scan_args::{get_kwargs, scan_args},
3
+ Error, Value,
4
+ };
5
+
6
+ /// Parse common arguments for CSV parsing
7
+ pub fn parse_csv_args(args: &[Value]) -> Result<(Value, bool, Option<String>), Error> {
8
+ let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
9
+ let (to_read,) = parsed_args.required;
10
+
11
+ let kwargs = get_kwargs::<_, (), (Option<bool>, Option<String>), ()>(
12
+ parsed_args.keywords,
13
+ &[],
14
+ &["has_headers", "delimiter"],
15
+ )?;
16
+
17
+ let has_headers = kwargs.optional.0.unwrap_or(true);
18
+
19
+ Ok((to_read, has_headers, kwargs.optional.1))
20
+ }
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/osv.rbi ADDED
@@ -0,0 +1,29 @@
1
+ # typed: strict
2
+
3
+ module OSV
4
+ sig do
5
+ type_parameters(:T)
6
+ .params(
7
+ input: T.any(String, StringIO, IO),
8
+ has_headers: T.nilable(T::Boolean),
9
+ delimiter: T.nilable(String),
10
+ blk: T.proc.params(row: T::Hash[String, String]).void
11
+ )
12
+ .returns(T.untyped)
13
+ end
14
+ def self.for_each(input, has_headers: true, delimiter: nil, &blk)
15
+ end
16
+
17
+ sig do
18
+ type_parameters(:T)
19
+ .params(
20
+ input: T.any(String, StringIO, IO),
21
+ has_headers: T.nilable(T::Boolean),
22
+ delimiter: T.nilable(String),
23
+ blk: T.proc.params(row: T::Array[String]).void
24
+ )
25
+ .returns(T.untyped)
26
+ end
27
+ def self.for_each_compat(input, has_headers: true, delimiter: nil, &blk)
28
+ end
29
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-21 00:00:00.000000000 Z
11
+ date: 2024-12-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -54,7 +54,10 @@ files:
54
54
  - ext/osv/Cargo.toml
55
55
  - ext/osv/extconf.rb
56
56
  - ext/osv/src/lib.rs
57
+ - ext/osv/src/reader.rs
58
+ - ext/osv/src/utils.rs
57
59
  - lib/osv.rb
60
+ - lib/osv.rbi
58
61
  - lib/osv/version.rb
59
62
  homepage: https://github.com/njaremko/osv
60
63
  licenses: