osv 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c67f4e48abcf76bbbe0fce6513b67464ed550287451f5fa2b4c6e8f83256fef5
4
- data.tar.gz: b38342f6e6c43050b45c99ea7f3fba4334423cb10d9df39b23ed054264b35e84
3
+ metadata.gz: a1b0c347b0bab5c9d31069c56f47999bfa51e85dfc1e127d1c4474a84ac19c53
4
+ data.tar.gz: 847b199da27b7c1329c1fa64fc8636592f004e93a12fc2ddd8db6127298ac23d
5
5
  SHA512:
6
- metadata.gz: 190d1cfafc93554e5096682f1ce3ec6e9d8049b2669c471db5b27d9190a36500465d798a07e279d73d6e8569c47eaa259f48644a8f1df70138c6f5fc7a826348
7
- data.tar.gz: f3c4ed3eac64aaf95e14bf7f5e60b84f2d698db586a2b8c5c6328389e0c067df298b070d4ac78eb91df444ebf23bad2e8ec74f5e7f3314f6ff0c8a248e52b263
6
+ metadata.gz: 62fa77c1ca98031f483569a4dba7cf9e4eca52a4b5fae293d274d5f89c48003e301eab01d95116cfc9cc6a2642e742d16046231a21d25e4a5143bd6ec3b40dac
7
+ data.tar.gz: 3832cbb6ebadfc718a8a5d1963de960ed3abf09d4559d2f0ffe031c642a6c2581dc6ad6edf5d65f22248812585ca464916375503753831024fc355fe4cd04455
data/Cargo.lock CHANGED
@@ -11,6 +11,12 @@ dependencies = [
11
11
  "memchr",
12
12
  ]
13
13
 
14
+ [[package]]
15
+ name = "autocfg"
16
+ version = "1.4.0"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
19
+
14
20
  [[package]]
15
21
  name = "bindgen"
16
22
  version = "0.69.5"
@@ -90,6 +96,12 @@ version = "1.13.0"
90
96
  source = "registry+https://github.com/rust-lang/crates.io-index"
91
97
  checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
92
98
 
99
+ [[package]]
100
+ name = "futures-core"
101
+ version = "0.3.31"
102
+ source = "registry+https://github.com/rust-lang/crates.io-index"
103
+ checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
104
+
93
105
  [[package]]
94
106
  name = "glob"
95
107
  version = "0.3.1"
@@ -111,6 +123,16 @@ version = "1.0.14"
111
123
  source = "registry+https://github.com/rust-lang/crates.io-index"
112
124
  checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
113
125
 
126
+ [[package]]
127
+ name = "kanal"
128
+ version = "0.1.0-pre8"
129
+ source = "registry+https://github.com/rust-lang/crates.io-index"
130
+ checksum = "b05d55519627edaf7fd0f29981f6dc03fb52df3f5b257130eb8d0bf2801ea1d7"
131
+ dependencies = [
132
+ "futures-core",
133
+ "lock_api",
134
+ ]
135
+
114
136
  [[package]]
115
137
  name = "lazy_static"
116
138
  version = "1.5.0"
@@ -139,6 +161,16 @@ dependencies = [
139
161
  "windows-targets",
140
162
  ]
141
163
 
164
+ [[package]]
165
+ name = "lock_api"
166
+ version = "0.4.12"
167
+ source = "registry+https://github.com/rust-lang/crates.io-index"
168
+ checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
169
+ dependencies = [
170
+ "autocfg",
171
+ "scopeguard",
172
+ ]
173
+
142
174
  [[package]]
143
175
  name = "magnus"
144
176
  version = "0.6.4"
@@ -201,6 +233,7 @@ name = "osv"
201
233
  version = "0.1.0"
202
234
  dependencies = [
203
235
  "csv",
236
+ "kanal",
204
237
  "magnus 0.7.1",
205
238
  "rb-sys",
206
239
  "serde",
@@ -296,6 +329,12 @@ version = "1.0.18"
296
329
  source = "registry+https://github.com/rust-lang/crates.io-index"
297
330
  checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
298
331
 
332
+ [[package]]
333
+ name = "scopeguard"
334
+ version = "1.2.0"
335
+ source = "registry+https://github.com/rust-lang/crates.io-index"
336
+ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
337
+
299
338
  [[package]]
300
339
  name = "seq-macro"
301
340
  version = "0.3.5"
data/ext/osv/Cargo.toml CHANGED
@@ -12,3 +12,4 @@ magnus = { version = "0.7", features = ["rb-sys"] }
12
12
  rb-sys = "0.9"
13
13
  serde = { version = "1.0", features = ["derive"] }
14
14
  serde_magnus = "0.8.1"
15
+ kanal = "0.1.0-pre8"
data/ext/osv/src/lib.rs CHANGED
@@ -1,9 +1,9 @@
1
- use magnus::{
2
- block::Yield,
3
- prelude::*,
4
- scan_args::{get_kwargs, scan_args},
5
- Error, RString, Ruby, Value,
6
- };
1
+ mod reader;
2
+ mod utils;
3
+
4
+ use crate::reader::*;
5
+
6
+ use magnus::{Error, Ruby};
7
7
 
8
8
  /// Initializes the Ruby extension and defines methods.
9
9
  #[magnus::init]
@@ -13,165 +13,3 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
13
13
  module.define_module_function("for_each_compat", magnus::method!(parse_compat, -1))?;
14
14
  Ok(())
15
15
  }
16
-
17
- /// Helper function to get a readable from either an IO object or a file path
18
- fn get_readable(ruby: &Ruby, to_read: Value) -> Result<Box<dyn Read>, Error> {
19
- if to_read.is_kind_of(ruby.class_io()) {
20
- let reader = RubyIOReader::new(ruby, to_read)?;
21
- Ok(Box::new(reader))
22
- } else {
23
- let path = to_read.to_r_string()?.to_string()?;
24
- let file = std::fs::File::open(&path).map_err(|e| {
25
- Error::new(
26
- ruby.exception_runtime_error(),
27
- format!("Failed to open file: {}", e),
28
- )
29
- })?;
30
- Ok(Box::new(file))
31
- }
32
- }
33
-
34
- /// Helper function to create a CSV reader with the given configuration
35
- fn create_csv_reader(
36
- ruby: &Ruby,
37
- to_read: Value,
38
- has_headers: bool,
39
- delimiter: Option<String>,
40
- ) -> Result<csv::Reader<Box<dyn Read>>, Error> {
41
- let readable = get_readable(ruby, to_read)?;
42
- let delimiter = delimiter.unwrap_or_else(|| ",".to_string());
43
-
44
- let rdr = csv::ReaderBuilder::new()
45
- .has_headers(has_headers)
46
- .delimiter(delimiter.as_bytes()[0])
47
- .from_reader(readable);
48
-
49
- Ok(rdr)
50
- }
51
-
52
- /// Common setup for CSV parsing, returns the reader and headers
53
- fn setup_csv_parser(
54
- ruby: &Ruby,
55
- to_read: Value,
56
- has_headers: bool,
57
- delimiter: Option<String>,
58
- ) -> Result<(csv::Reader<Box<dyn Read>>, Vec<String>), Error> {
59
- let mut rdr = create_csv_reader(ruby, to_read, has_headers, delimiter)?;
60
-
61
- let first_row = rdr.headers().unwrap().clone();
62
- let num_fields = first_row.len();
63
-
64
- let headers = if has_headers {
65
- first_row.iter().map(|h| h.to_string()).collect()
66
- } else {
67
- (0..num_fields).map(|i| format!("c{}", i)).collect()
68
- };
69
-
70
- Ok((rdr, headers))
71
- }
72
-
73
- /// Parse common arguments for CSV parsing
74
- fn parse_csv_args(args: &[Value]) -> Result<(Value, bool, Option<String>), Error> {
75
- let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
76
- let (to_read,) = parsed_args.required;
77
-
78
- let kwargs = get_kwargs::<_, (), (Option<bool>, Option<String>), ()>(
79
- parsed_args.keywords,
80
- &[],
81
- &["has_headers", "delimiter"],
82
- )?;
83
-
84
- let has_headers = kwargs.optional.0.unwrap_or(true);
85
-
86
- Ok((to_read, has_headers, kwargs.optional.1))
87
- }
88
-
89
- /// Parses CSV data from a file and yields each row as a hash to the block.
90
- fn parse_csv(
91
- ruby: &Ruby,
92
- rb_self: Value,
93
- args: &[Value],
94
- ) -> Result<Yield<impl Iterator<Item = std::collections::HashMap<String, String>>>, Error> {
95
- if !ruby.block_given() {
96
- return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
97
- }
98
-
99
- let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
100
- let (rdr, headers) = setup_csv_parser(ruby, to_read, has_headers, delimiter)?;
101
-
102
- let iter = rdr.into_records().filter_map(move |result| {
103
- let record = result.ok()?;
104
- let mut hash = std::collections::HashMap::new();
105
- for (header, field) in headers.iter().zip(record.iter()) {
106
- hash.insert(header.to_string(), field.to_string());
107
- }
108
- Some(hash)
109
- });
110
-
111
- Ok(Yield::Iter(iter))
112
- }
113
-
114
- fn parse_compat(
115
- ruby: &Ruby,
116
- rb_self: Value,
117
- args: &[Value],
118
- ) -> Result<Yield<impl Iterator<Item = Vec<String>>>, Error> {
119
- if !ruby.block_given() {
120
- return Ok(Yield::Enumerator(
121
- rb_self.enumeratorize("for_each_compat", args),
122
- ));
123
- }
124
-
125
- let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
126
- let (rdr, _) = setup_csv_parser(ruby, to_read, has_headers, delimiter)?;
127
-
128
- let iter = rdr.into_records().filter_map(|result| {
129
- result
130
- .ok()
131
- .map(|record| record.iter().map(|field| field.to_string()).collect())
132
- });
133
-
134
- Ok(Yield::Iter(iter))
135
- }
136
-
137
- use std::io::Read;
138
-
139
- struct RubyIOReader {
140
- io_obj: Value,
141
- }
142
-
143
- impl Read for RubyIOReader {
144
- fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
145
- let result: RString = self.io_obj.funcall("read", (buf.len(),)).map_err(|_| {
146
- std::io::Error::new(std::io::ErrorKind::Other, "Failed to read from IO")
147
- })?;
148
-
149
- // Handle EOF case
150
- if result.is_nil() {
151
- return Ok(0);
152
- }
153
-
154
- let rust_string = result.to_string().map_err(|_| {
155
- std::io::Error::new(std::io::ErrorKind::Other, "Failed to convert to string")
156
- })?;
157
- let bytes = rust_string.as_bytes();
158
-
159
- let bytes_to_copy = rust_string.len().min(buf.len());
160
- buf[..bytes_to_copy].copy_from_slice(&bytes[..bytes_to_copy]);
161
-
162
- Ok(bytes_to_copy)
163
- }
164
- }
165
-
166
- impl RubyIOReader {
167
- fn new(ruby: &Ruby, value: Value) -> Result<Self, Error> {
168
- if value.is_kind_of(ruby.class_io()) {
169
- Ok(RubyIOReader { io_obj: value })
170
- } else {
171
- Err(Error::new(
172
- ruby.exception_runtime_error(),
173
- "IO object is not a valid IO object",
174
- ))
175
- }
176
- }
177
- }
@@ -0,0 +1,230 @@
1
+ use crate::utils::*;
2
+ use magnus::{
3
+ block::Yield, rb_sys::AsRawValue, value::ReprValue, Error, RClass, RString, Ruby, Value,
4
+ };
5
+ use std::{fs::File, io::Read, os::fd::FromRawFd, thread};
6
+
7
+ /// Parses CSV data from a file and yields each row as a hash to the block.
8
+ pub fn parse_csv(
9
+ ruby: &Ruby,
10
+ rb_self: Value,
11
+ args: &[Value],
12
+ ) -> Result<Yield<impl Iterator<Item = std::collections::HashMap<String, String>>>, Error> {
13
+ if !ruby.block_given() {
14
+ return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
15
+ }
16
+ let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
17
+
18
+ let iter = RecordReader::<std::collections::HashMap<String, String>>::new(
19
+ ruby,
20
+ to_read,
21
+ has_headers,
22
+ delimiter.unwrap_or_else(|| ",".to_string()).as_bytes()[0],
23
+ 1000,
24
+ )?;
25
+
26
+ Ok(Yield::Iter(iter))
27
+ }
28
+
29
+ pub fn parse_compat(
30
+ ruby: &Ruby,
31
+ rb_self: Value,
32
+ args: &[Value],
33
+ ) -> Result<Yield<impl Iterator<Item = Vec<String>>>, Error> {
34
+ if !ruby.block_given() {
35
+ return Ok(Yield::Enumerator(
36
+ rb_self.enumeratorize("for_each_compat", args),
37
+ ));
38
+ }
39
+ let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
40
+
41
+ let iter = RecordReader::<Vec<String>>::new(
42
+ ruby,
43
+ to_read,
44
+ has_headers,
45
+ delimiter.unwrap_or_else(|| ",".to_string()).as_bytes()[0],
46
+ 1000,
47
+ )?;
48
+
49
+ Ok(Yield::Iter(iter))
50
+ }
51
+
52
+ pub trait RecordParser {
53
+ type Output;
54
+
55
+ fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output;
56
+ }
57
+
58
+ impl RecordParser for std::collections::HashMap<String, String> {
59
+ type Output = Self;
60
+
61
+ fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output {
62
+ record
63
+ .iter()
64
+ .enumerate()
65
+ .map(|(i, field)| (headers[i].clone(), field.to_string()))
66
+ .collect()
67
+ }
68
+ }
69
+
70
+ impl RecordParser for Vec<String> {
71
+ type Output = Self;
72
+
73
+ fn parse(_headers: &[String], record: &csv::StringRecord) -> Self::Output {
74
+ record.iter().map(|field| field.to_string()).collect()
75
+ }
76
+ }
77
+
78
+ struct RecordReader<T: RecordParser> {
79
+ reader: ReadImpl<T>,
80
+ }
81
+
82
+ #[allow(dead_code)]
83
+ enum ReadImpl<T: RecordParser> {
84
+ SingleThreaded {
85
+ reader: csv::Reader<Box<dyn Read + Send + 'static>>,
86
+ headers: Vec<String>,
87
+ },
88
+ MultiThreaded {
89
+ receiver: kanal::Receiver<T::Output>,
90
+ handle: Option<thread::JoinHandle<()>>,
91
+ },
92
+ }
93
+
94
+ impl<T: RecordParser + Send + 'static> RecordReader<T> {
95
+ fn new(
96
+ ruby: &Ruby,
97
+ to_read: Value,
98
+ has_headers: bool,
99
+ delimiter: u8,
100
+ buffer: usize,
101
+ ) -> Result<Self, Error> {
102
+ let string_io = RClass::from(ruby.eval("StringIO").map_err(|e| {
103
+ Error::new(
104
+ ruby.exception_runtime_error(),
105
+ format!("Failed to get StringIO class: {}", e),
106
+ )
107
+ })?);
108
+
109
+ let readable: Box<dyn Read + Send + 'static> = if to_read.is_kind_of(string_io) {
110
+ let string: RString = to_read.funcall("string", ()).map_err(|e| {
111
+ Error::new(
112
+ ruby.exception_runtime_error(),
113
+ format!("Failed to get string from StringIO: {}", e),
114
+ )
115
+ })?;
116
+ let content = string.to_string().map_err(|e| {
117
+ Error::new(
118
+ ruby.exception_runtime_error(),
119
+ format!("Failed to convert string to Rust String: {}", e),
120
+ )
121
+ })?;
122
+ Box::new(std::io::Cursor::new(content))
123
+ } else if to_read.is_kind_of(ruby.class_io()) {
124
+ let fd = unsafe { rb_sys::rb_io_descriptor(to_read.as_raw()) };
125
+ let file = unsafe { File::from_raw_fd(fd) };
126
+ Box::new(file)
127
+ } else {
128
+ let path = to_read
129
+ .to_r_string()
130
+ .map_err(|e| {
131
+ Error::new(
132
+ ruby.exception_runtime_error(),
133
+ format!("Failed to convert path to string: {}", e),
134
+ )
135
+ })?
136
+ .to_string()
137
+ .map_err(|e| {
138
+ Error::new(
139
+ ruby.exception_runtime_error(),
140
+ format!("Failed to convert RString to Rust String: {}", e),
141
+ )
142
+ })?;
143
+ let file = std::fs::File::open(&path).map_err(|e| {
144
+ Error::new(
145
+ ruby.exception_runtime_error(),
146
+ format!("Failed to open file: {}", e),
147
+ )
148
+ })?;
149
+ Box::new(file)
150
+ };
151
+
152
+ let mut reader = csv::ReaderBuilder::new()
153
+ .has_headers(has_headers)
154
+ .delimiter(delimiter)
155
+ .from_reader(readable);
156
+
157
+ let headers = Self::get_headers(&mut reader, has_headers)?;
158
+ let headers_clone = headers.clone();
159
+
160
+ let (sender, receiver) = kanal::bounded(buffer);
161
+ let handle = thread::spawn(move || {
162
+ let mut record = csv::StringRecord::new();
163
+ while let Ok(read) = reader.read_record(&mut record) {
164
+ if !read {
165
+ let file_to_forget = reader.into_inner();
166
+ std::mem::forget(file_to_forget);
167
+ break;
168
+ }
169
+ let row = T::parse(&headers_clone, &record);
170
+ if sender.send(row).is_err() {
171
+ break;
172
+ }
173
+ }
174
+ });
175
+
176
+ let read_impl = ReadImpl::MultiThreaded {
177
+ receiver,
178
+ handle: Some(handle),
179
+ };
180
+
181
+ Ok(Self { reader: read_impl })
182
+ }
183
+
184
+ fn get_headers(
185
+ reader: &mut csv::Reader<impl Read>,
186
+ has_headers: bool,
187
+ ) -> Result<Vec<String>, Error> {
188
+ let first_row = reader
189
+ .headers()
190
+ .map_err(|e| {
191
+ Error::new(
192
+ magnus::exception::runtime_error(),
193
+ format!("Failed to read headers: {}", e),
194
+ )
195
+ })?
196
+ .clone();
197
+ let num_fields = first_row.len();
198
+
199
+ Ok(if has_headers {
200
+ first_row.iter().map(|h| h.to_string()).collect()
201
+ } else {
202
+ (0..num_fields).map(|i| format!("c{}", i)).collect()
203
+ })
204
+ }
205
+ }
206
+
207
+ impl<T: RecordParser> Iterator for RecordReader<T> {
208
+ type Item = T::Output;
209
+
210
+ fn next(&mut self) -> Option<Self::Item> {
211
+ match &mut self.reader {
212
+ ReadImpl::MultiThreaded { receiver, handle } => match receiver.recv() {
213
+ Ok(record) => Some(record),
214
+ Err(_) => {
215
+ if let Some(handle) = handle.take() {
216
+ let _ = handle.join();
217
+ }
218
+ None
219
+ }
220
+ },
221
+ ReadImpl::SingleThreaded { reader, headers } => {
222
+ let mut record = csv::StringRecord::new();
223
+ match reader.read_record(&mut record) {
224
+ Ok(true) => Some(T::parse(headers, &record)),
225
+ _ => None,
226
+ }
227
+ }
228
+ }
229
+ }
230
+ }
@@ -0,0 +1,20 @@
1
+ use magnus::{
2
+ scan_args::{get_kwargs, scan_args},
3
+ Error, Value,
4
+ };
5
+
6
+ /// Parse common arguments for CSV parsing
7
+ pub fn parse_csv_args(args: &[Value]) -> Result<(Value, bool, Option<String>), Error> {
8
+ let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
9
+ let (to_read,) = parsed_args.required;
10
+
11
+ let kwargs = get_kwargs::<_, (), (Option<bool>, Option<String>), ()>(
12
+ parsed_args.keywords,
13
+ &[],
14
+ &["has_headers", "delimiter"],
15
+ )?;
16
+
17
+ let has_headers = kwargs.optional.0.unwrap_or(true);
18
+
19
+ Ok((to_read, has_headers, kwargs.optional.1))
20
+ }
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/osv.rbi ADDED
@@ -0,0 +1,29 @@
1
+ # typed: strict
2
+
3
+ module OSV
4
+ sig do
5
+ type_parameters(:T)
6
+ .params(
7
+ input: T.any(String, StringIO, IO),
8
+ has_headers: T.nilable(T::Boolean),
9
+ delimiter: T.nilable(String),
10
+ blk: T.proc.params(row: T::Hash[String, String]).void
11
+ )
12
+ .returns(T.untyped)
13
+ end
14
+ def self.for_each(input, has_headers: true, delimiter: nil, &blk)
15
+ end
16
+
17
+ sig do
18
+ type_parameters(:T)
19
+ .params(
20
+ input: T.any(String, StringIO, IO),
21
+ has_headers: T.nilable(T::Boolean),
22
+ delimiter: T.nilable(String),
23
+ blk: T.proc.params(row: T::Array[String]).void
24
+ )
25
+ .returns(T.untyped)
26
+ end
27
+ def self.for_each_compat(input, has_headers: true, delimiter: nil, &blk)
28
+ end
29
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-21 00:00:00.000000000 Z
11
+ date: 2024-12-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -54,7 +54,10 @@ files:
54
54
  - ext/osv/Cargo.toml
55
55
  - ext/osv/extconf.rb
56
56
  - ext/osv/src/lib.rs
57
+ - ext/osv/src/reader.rs
58
+ - ext/osv/src/utils.rs
57
59
  - lib/osv.rb
60
+ - lib/osv.rbi
58
61
  - lib/osv/version.rb
59
62
  homepage: https://github.com/njaremko/osv
60
63
  licenses: