osv 0.1.1 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c67f4e48abcf76bbbe0fce6513b67464ed550287451f5fa2b4c6e8f83256fef5
4
- data.tar.gz: b38342f6e6c43050b45c99ea7f3fba4334423cb10d9df39b23ed054264b35e84
3
+ metadata.gz: 17db543fc59ce3ec7b4ea87a0d403b82a628860ca01ba03eaea39438790b7863
4
+ data.tar.gz: 3d54507b6097b7b9e0a771f5a3c72d7605b27c5307528bf0707cd0f7ba29b474
5
5
  SHA512:
6
- metadata.gz: 190d1cfafc93554e5096682f1ce3ec6e9d8049b2669c471db5b27d9190a36500465d798a07e279d73d6e8569c47eaa259f48644a8f1df70138c6f5fc7a826348
7
- data.tar.gz: f3c4ed3eac64aaf95e14bf7f5e60b84f2d698db586a2b8c5c6328389e0c067df298b070d4ac78eb91df444ebf23bad2e8ec74f5e7f3314f6ff0c8a248e52b263
6
+ metadata.gz: 975e4e16a32d1c2d5678f1c4ede658165fcf247f563c166fa167bff7d9bfd95c34937f894207693df2e6716b61fce8c315b6bb4dad7a29d68161ab842768eca1
7
+ data.tar.gz: 9176674f894855098875df2c3287b4370b42193f84d7bdf20d13fa8ea9de9330a431c3070a00652458bbb0a8061866e69b4849a6ee71c1b90718e811d1ed7172
data/Cargo.lock CHANGED
@@ -11,6 +11,12 @@ dependencies = [
11
11
  "memchr",
12
12
  ]
13
13
 
14
+ [[package]]
15
+ name = "autocfg"
16
+ version = "1.4.0"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
19
+
14
20
  [[package]]
15
21
  name = "bindgen"
16
22
  version = "0.69.5"
@@ -90,6 +96,12 @@ version = "1.13.0"
90
96
  source = "registry+https://github.com/rust-lang/crates.io-index"
91
97
  checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
92
98
 
99
+ [[package]]
100
+ name = "futures-core"
101
+ version = "0.3.31"
102
+ source = "registry+https://github.com/rust-lang/crates.io-index"
103
+ checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
104
+
93
105
  [[package]]
94
106
  name = "glob"
95
107
  version = "0.3.1"
@@ -111,6 +123,16 @@ version = "1.0.14"
111
123
  source = "registry+https://github.com/rust-lang/crates.io-index"
112
124
  checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
113
125
 
126
+ [[package]]
127
+ name = "kanal"
128
+ version = "0.1.0-pre8"
129
+ source = "registry+https://github.com/rust-lang/crates.io-index"
130
+ checksum = "b05d55519627edaf7fd0f29981f6dc03fb52df3f5b257130eb8d0bf2801ea1d7"
131
+ dependencies = [
132
+ "futures-core",
133
+ "lock_api",
134
+ ]
135
+
114
136
  [[package]]
115
137
  name = "lazy_static"
116
138
  version = "1.5.0"
@@ -139,6 +161,16 @@ dependencies = [
139
161
  "windows-targets",
140
162
  ]
141
163
 
164
+ [[package]]
165
+ name = "lock_api"
166
+ version = "0.4.12"
167
+ source = "registry+https://github.com/rust-lang/crates.io-index"
168
+ checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
169
+ dependencies = [
170
+ "autocfg",
171
+ "scopeguard",
172
+ ]
173
+
142
174
  [[package]]
143
175
  name = "magnus"
144
176
  version = "0.6.4"
@@ -201,6 +233,7 @@ name = "osv"
201
233
  version = "0.1.0"
202
234
  dependencies = [
203
235
  "csv",
236
+ "kanal",
204
237
  "magnus 0.7.1",
205
238
  "rb-sys",
206
239
  "serde",
@@ -296,6 +329,12 @@ version = "1.0.18"
296
329
  source = "registry+https://github.com/rust-lang/crates.io-index"
297
330
  checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
298
331
 
332
+ [[package]]
333
+ name = "scopeguard"
334
+ version = "1.2.0"
335
+ source = "registry+https://github.com/rust-lang/crates.io-index"
336
+ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
337
+
299
338
  [[package]]
300
339
  name = "seq-macro"
301
340
  version = "0.3.5"
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Nathan Jaremko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,117 @@
1
+ # OSV
2
+
3
+ OSV is a high-performance CSV parser for Ruby, implemented in Rust. It wraps BurntSushi's excellent [csv-rs](https://github.com/BurntSushi/rust-csv) crate.
4
+
5
+ It provides a simple interface for reading CSV files with support for both hash-based and array-based row formats.
6
+
7
+ The array-based mode is faster than the hash-based mode, so if you don't need the hash keys, use the array-based mode.
8
+
9
+ I have yet to figure out how to get rust to accept an implementation of this as one method with different return types, so I've had to implement two methods.
10
+
11
+ ## Installation
12
+
13
+ Add this line to your application's Gemfile:
14
+
15
+ ```ruby
16
+ gem 'osv'
17
+ ```
18
+
19
+ And then execute:
20
+
21
+ ```bash
22
+ bundle install
23
+ ```
24
+
25
+ Or install it directly:
26
+
27
+ ```bash
28
+ gem install osv
29
+ ```
30
+
31
+ ## Usage
32
+
33
+ ### Basic Usage with Hash Output
34
+
35
+ Each row is returned as a hash where the keys are the column headers:
36
+
37
+ ```ruby
38
+ require 'osv'
39
+
40
+ # Read from a file
41
+ OSV.for_each("path/to/file.csv") do |row|
42
+ # row is a Hash like {"name" => "John", "age" => "25"}
43
+ puts row["name"]
44
+ end
45
+
46
+ # Without a block, returns an Enumerator
47
+ rows = OSV.for_each("path/to/file.csv")
48
+ rows.each { |row| puts row["name"] }
49
+ ```
50
+
51
+ ### Array Output Mode
52
+
53
+ If you prefer working with arrays instead of hashes, use `for_each_compat`:
54
+
55
+ ```ruby
56
+ OSV.for_each_compat("path/to/file.csv") do |row|
57
+ # row is an Array like ["John", "25"]
58
+ puts row[0]
59
+ end
60
+ ```
61
+
62
+ ### Options
63
+
64
+ Both methods support the following options:
65
+
66
+ - `has_headers`: Boolean indicating if the first row contains headers (default: true)
67
+ - `delimiter`: String specifying the field separator (default: ",")
68
+
69
+ ```ruby
70
+ # Reading TSV files
71
+ OSV.for_each("path/to/file.tsv", delimiter: "\t") do |row|
72
+ puts row["name"]
73
+ end
74
+
75
+ # Reading without headers
76
+ OSV.for_each("path/to/file.csv", has_headers: false) do |row|
77
+ # Headers will be automatically generated as "c0", "c1", etc.
78
+ puts row["c0"]
79
+ end
80
+ ```
81
+
82
+ ### Input Sources
83
+
84
+ OSV supports reading from:
85
+
86
+ - File paths (as strings)
87
+ - IO objects
88
+ - Important caveat: the IO object must respond to `rb_io_descriptor` with a file descriptor.
89
+ - StringIO objects
90
+ - Note: when you do this, the string is read (in full) into a Rust string, and we parse it there.
91
+
92
+ ```ruby
93
+ # From file path
94
+ OSV.for_each("path/to/file.csv") { |row| puts row["name"] }
95
+
96
+ # From IO object
97
+ File.open("path/to/file.csv") do |file|
98
+ OSV.for_each(file) { |row| puts row["name"] }
99
+ end
100
+
101
+ # From StringIO
102
+ data = StringIO.new("name,age\nJohn,25")
103
+ OSV.for_each(data) { |row| puts row["name"] }
104
+ ```
105
+
106
+ ## Requirements
107
+
108
+ - Ruby >= 3.1.0
109
+ - Rust toolchain (for installation from source)
110
+
111
+ ## Performance
112
+
113
+ This library is faster than the standard Ruby CSV library, and is comparable to the fastest CSV parser gems I've used.
114
+
115
+ ## License
116
+
117
+ This gem is not currently licensed for public use.
data/ext/osv/Cargo.toml CHANGED
@@ -12,3 +12,4 @@ magnus = { version = "0.7", features = ["rb-sys"] }
12
12
  rb-sys = "0.9"
13
13
  serde = { version = "1.0", features = ["derive"] }
14
14
  serde_magnus = "0.8.1"
15
+ kanal = "0.1.0-pre8"
data/ext/osv/src/lib.rs CHANGED
@@ -1,9 +1,9 @@
1
- use magnus::{
2
- block::Yield,
3
- prelude::*,
4
- scan_args::{get_kwargs, scan_args},
5
- Error, RString, Ruby, Value,
6
- };
1
+ mod reader;
2
+ mod utils;
3
+
4
+ use crate::reader::*;
5
+
6
+ use magnus::{Error, Ruby};
7
7
 
8
8
  /// Initializes the Ruby extension and defines methods.
9
9
  #[magnus::init]
@@ -13,165 +13,3 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
13
13
  module.define_module_function("for_each_compat", magnus::method!(parse_compat, -1))?;
14
14
  Ok(())
15
15
  }
16
-
17
- /// Helper function to get a readable from either an IO object or a file path
18
- fn get_readable(ruby: &Ruby, to_read: Value) -> Result<Box<dyn Read>, Error> {
19
- if to_read.is_kind_of(ruby.class_io()) {
20
- let reader = RubyIOReader::new(ruby, to_read)?;
21
- Ok(Box::new(reader))
22
- } else {
23
- let path = to_read.to_r_string()?.to_string()?;
24
- let file = std::fs::File::open(&path).map_err(|e| {
25
- Error::new(
26
- ruby.exception_runtime_error(),
27
- format!("Failed to open file: {}", e),
28
- )
29
- })?;
30
- Ok(Box::new(file))
31
- }
32
- }
33
-
34
- /// Helper function to create a CSV reader with the given configuration
35
- fn create_csv_reader(
36
- ruby: &Ruby,
37
- to_read: Value,
38
- has_headers: bool,
39
- delimiter: Option<String>,
40
- ) -> Result<csv::Reader<Box<dyn Read>>, Error> {
41
- let readable = get_readable(ruby, to_read)?;
42
- let delimiter = delimiter.unwrap_or_else(|| ",".to_string());
43
-
44
- let rdr = csv::ReaderBuilder::new()
45
- .has_headers(has_headers)
46
- .delimiter(delimiter.as_bytes()[0])
47
- .from_reader(readable);
48
-
49
- Ok(rdr)
50
- }
51
-
52
- /// Common setup for CSV parsing, returns the reader and headers
53
- fn setup_csv_parser(
54
- ruby: &Ruby,
55
- to_read: Value,
56
- has_headers: bool,
57
- delimiter: Option<String>,
58
- ) -> Result<(csv::Reader<Box<dyn Read>>, Vec<String>), Error> {
59
- let mut rdr = create_csv_reader(ruby, to_read, has_headers, delimiter)?;
60
-
61
- let first_row = rdr.headers().unwrap().clone();
62
- let num_fields = first_row.len();
63
-
64
- let headers = if has_headers {
65
- first_row.iter().map(|h| h.to_string()).collect()
66
- } else {
67
- (0..num_fields).map(|i| format!("c{}", i)).collect()
68
- };
69
-
70
- Ok((rdr, headers))
71
- }
72
-
73
- /// Parse common arguments for CSV parsing
74
- fn parse_csv_args(args: &[Value]) -> Result<(Value, bool, Option<String>), Error> {
75
- let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
76
- let (to_read,) = parsed_args.required;
77
-
78
- let kwargs = get_kwargs::<_, (), (Option<bool>, Option<String>), ()>(
79
- parsed_args.keywords,
80
- &[],
81
- &["has_headers", "delimiter"],
82
- )?;
83
-
84
- let has_headers = kwargs.optional.0.unwrap_or(true);
85
-
86
- Ok((to_read, has_headers, kwargs.optional.1))
87
- }
88
-
89
- /// Parses CSV data from a file and yields each row as a hash to the block.
90
- fn parse_csv(
91
- ruby: &Ruby,
92
- rb_self: Value,
93
- args: &[Value],
94
- ) -> Result<Yield<impl Iterator<Item = std::collections::HashMap<String, String>>>, Error> {
95
- if !ruby.block_given() {
96
- return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
97
- }
98
-
99
- let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
100
- let (rdr, headers) = setup_csv_parser(ruby, to_read, has_headers, delimiter)?;
101
-
102
- let iter = rdr.into_records().filter_map(move |result| {
103
- let record = result.ok()?;
104
- let mut hash = std::collections::HashMap::new();
105
- for (header, field) in headers.iter().zip(record.iter()) {
106
- hash.insert(header.to_string(), field.to_string());
107
- }
108
- Some(hash)
109
- });
110
-
111
- Ok(Yield::Iter(iter))
112
- }
113
-
114
- fn parse_compat(
115
- ruby: &Ruby,
116
- rb_self: Value,
117
- args: &[Value],
118
- ) -> Result<Yield<impl Iterator<Item = Vec<String>>>, Error> {
119
- if !ruby.block_given() {
120
- return Ok(Yield::Enumerator(
121
- rb_self.enumeratorize("for_each_compat", args),
122
- ));
123
- }
124
-
125
- let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
126
- let (rdr, _) = setup_csv_parser(ruby, to_read, has_headers, delimiter)?;
127
-
128
- let iter = rdr.into_records().filter_map(|result| {
129
- result
130
- .ok()
131
- .map(|record| record.iter().map(|field| field.to_string()).collect())
132
- });
133
-
134
- Ok(Yield::Iter(iter))
135
- }
136
-
137
- use std::io::Read;
138
-
139
- struct RubyIOReader {
140
- io_obj: Value,
141
- }
142
-
143
- impl Read for RubyIOReader {
144
- fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
145
- let result: RString = self.io_obj.funcall("read", (buf.len(),)).map_err(|_| {
146
- std::io::Error::new(std::io::ErrorKind::Other, "Failed to read from IO")
147
- })?;
148
-
149
- // Handle EOF case
150
- if result.is_nil() {
151
- return Ok(0);
152
- }
153
-
154
- let rust_string = result.to_string().map_err(|_| {
155
- std::io::Error::new(std::io::ErrorKind::Other, "Failed to convert to string")
156
- })?;
157
- let bytes = rust_string.as_bytes();
158
-
159
- let bytes_to_copy = rust_string.len().min(buf.len());
160
- buf[..bytes_to_copy].copy_from_slice(&bytes[..bytes_to_copy]);
161
-
162
- Ok(bytes_to_copy)
163
- }
164
- }
165
-
166
- impl RubyIOReader {
167
- fn new(ruby: &Ruby, value: Value) -> Result<Self, Error> {
168
- if value.is_kind_of(ruby.class_io()) {
169
- Ok(RubyIOReader { io_obj: value })
170
- } else {
171
- Err(Error::new(
172
- ruby.exception_runtime_error(),
173
- "IO object is not a valid IO object",
174
- ))
175
- }
176
- }
177
- }
@@ -0,0 +1,235 @@
1
+ use crate::utils::*;
2
+ use magnus::{
3
+ block::Yield, rb_sys::AsRawValue, value::ReprValue, Error, RClass, RString, Ruby, Value,
4
+ };
5
+ use std::{collections::HashMap, fs::File, io::Read, os::fd::FromRawFd, thread};
6
+
7
+ /// Parses CSV data from a file and yields each row as a hash to the block.
8
+ pub fn parse_csv(
9
+ ruby: &Ruby,
10
+ rb_self: Value,
11
+ args: &[Value],
12
+ ) -> Result<Yield<impl Iterator<Item = HashMap<String, String>>>, Error> {
13
+ if !ruby.block_given() {
14
+ return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
15
+ }
16
+ let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
17
+
18
+ let iter = RecordReader::<HashMap<String, String>>::new(
19
+ ruby,
20
+ to_read,
21
+ has_headers,
22
+ delimiter.unwrap_or_else(|| ",".to_string()).as_bytes()[0],
23
+ 1000,
24
+ )?;
25
+
26
+ Ok(Yield::Iter(iter))
27
+ }
28
+
29
+ pub fn parse_compat(
30
+ ruby: &Ruby,
31
+ rb_self: Value,
32
+ args: &[Value],
33
+ ) -> Result<Yield<impl Iterator<Item = Vec<String>>>, Error> {
34
+ if !ruby.block_given() {
35
+ return Ok(Yield::Enumerator(
36
+ rb_self.enumeratorize("for_each_compat", args),
37
+ ));
38
+ }
39
+ let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
40
+
41
+ let iter = RecordReader::<Vec<String>>::new(
42
+ ruby,
43
+ to_read,
44
+ has_headers,
45
+ delimiter.unwrap_or_else(|| ",".to_string()).as_bytes()[0],
46
+ 1000,
47
+ )?;
48
+
49
+ Ok(Yield::Iter(iter))
50
+ }
51
+
52
+ pub trait RecordParser {
53
+ type Output;
54
+
55
+ fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output;
56
+ }
57
+
58
+ impl RecordParser for HashMap<String, String> {
59
+ type Output = Self;
60
+
61
+ fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output {
62
+ let capacity = headers.len();
63
+ let mut map = HashMap::with_capacity(capacity);
64
+ for (i, field) in record.iter().enumerate() {
65
+ map.insert(headers[i].to_owned(), field.to_string());
66
+ }
67
+ map
68
+ }
69
+ }
70
+
71
+ impl RecordParser for Vec<String> {
72
+ type Output = Self;
73
+
74
+ fn parse(_headers: &[String], record: &csv::StringRecord) -> Self::Output {
75
+ let mut output = Vec::with_capacity(record.len());
76
+ for field in record.iter() {
77
+ output.push(field.to_string());
78
+ }
79
+ output
80
+ }
81
+ }
82
+
83
+ struct RecordReader<T: RecordParser> {
84
+ reader: ReadImpl<T>,
85
+ }
86
+
87
+ #[allow(dead_code)]
88
+ enum ReadImpl<T: RecordParser> {
89
+ SingleThreaded {
90
+ reader: csv::Reader<Box<dyn Read + Send + 'static>>,
91
+ headers: Vec<String>,
92
+ },
93
+ MultiThreaded {
94
+ receiver: kanal::Receiver<T::Output>,
95
+ handle: Option<thread::JoinHandle<()>>,
96
+ },
97
+ }
98
+
99
+ impl<T: RecordParser + Send + 'static> RecordReader<T> {
100
+ fn new(
101
+ ruby: &Ruby,
102
+ to_read: Value,
103
+ has_headers: bool,
104
+ delimiter: u8,
105
+ buffer: usize,
106
+ ) -> Result<Self, Error> {
107
+ let string_io: RClass = ruby.eval("StringIO").map_err(|e| {
108
+ Error::new(
109
+ ruby.exception_runtime_error(),
110
+ format!("Failed to get StringIO class: {}", e),
111
+ )
112
+ })?;
113
+
114
+ let readable: Box<dyn Read + Send + 'static> = if to_read.is_kind_of(string_io) {
115
+ let string: RString = to_read.funcall("string", ()).map_err(|e| {
116
+ Error::new(
117
+ ruby.exception_runtime_error(),
118
+ format!("Failed to get string from StringIO: {}", e),
119
+ )
120
+ })?;
121
+ let content = string.to_string().map_err(|e| {
122
+ Error::new(
123
+ ruby.exception_runtime_error(),
124
+ format!("Failed to convert string to Rust String: {}", e),
125
+ )
126
+ })?;
127
+ Box::new(std::io::Cursor::new(content))
128
+ } else if to_read.is_kind_of(ruby.class_io()) {
129
+ let fd = unsafe { rb_sys::rb_io_descriptor(to_read.as_raw()) };
130
+ let file = unsafe { File::from_raw_fd(fd) };
131
+ Box::new(file)
132
+ } else {
133
+ let path = to_read
134
+ .to_r_string()
135
+ .map_err(|e| {
136
+ Error::new(
137
+ ruby.exception_runtime_error(),
138
+ format!("Failed to convert path to string: {}", e),
139
+ )
140
+ })?
141
+ .to_string()
142
+ .map_err(|e| {
143
+ Error::new(
144
+ ruby.exception_runtime_error(),
145
+ format!("Failed to convert RString to Rust String: {}", e),
146
+ )
147
+ })?;
148
+ let file = std::fs::File::open(&path).map_err(|e| {
149
+ Error::new(
150
+ ruby.exception_runtime_error(),
151
+ format!("Failed to open file: {}", e),
152
+ )
153
+ })?;
154
+ Box::new(file)
155
+ };
156
+
157
+ let mut reader = csv::ReaderBuilder::new()
158
+ .has_headers(has_headers)
159
+ .delimiter(delimiter)
160
+ .from_reader(readable);
161
+
162
+ let headers = Self::get_headers(&mut reader, has_headers)?;
163
+ let headers_clone = headers.clone();
164
+
165
+ let (sender, receiver) = kanal::bounded(buffer);
166
+ let handle = thread::spawn(move || {
167
+ let mut record = csv::StringRecord::new();
168
+ while let Ok(read) = reader.read_record(&mut record) {
169
+ if !read {
170
+ let file_to_forget = reader.into_inner();
171
+ std::mem::forget(file_to_forget);
172
+ break;
173
+ }
174
+ let row = T::parse(&headers_clone, &record);
175
+ if sender.send(row).is_err() {
176
+ break;
177
+ }
178
+ }
179
+ });
180
+
181
+ let read_impl = ReadImpl::MultiThreaded {
182
+ receiver,
183
+ handle: Some(handle),
184
+ };
185
+
186
+ Ok(Self { reader: read_impl })
187
+ }
188
+
189
+ fn get_headers(
190
+ reader: &mut csv::Reader<impl Read>,
191
+ has_headers: bool,
192
+ ) -> Result<Vec<String>, Error> {
193
+ let first_row = reader
194
+ .headers()
195
+ .map_err(|e| {
196
+ Error::new(
197
+ magnus::exception::runtime_error(),
198
+ format!("Failed to read headers: {}", e),
199
+ )
200
+ })?
201
+ .clone();
202
+ let num_fields = first_row.len();
203
+
204
+ Ok(if has_headers {
205
+ first_row.iter().map(|h| h.to_string()).collect()
206
+ } else {
207
+ (0..num_fields).map(|i| format!("c{}", i)).collect()
208
+ })
209
+ }
210
+ }
211
+
212
+ impl<T: RecordParser> Iterator for RecordReader<T> {
213
+ type Item = T::Output;
214
+
215
+ fn next(&mut self) -> Option<Self::Item> {
216
+ match &mut self.reader {
217
+ ReadImpl::MultiThreaded { receiver, handle } => match receiver.recv() {
218
+ Ok(record) => Some(record),
219
+ Err(_) => {
220
+ if let Some(handle) = handle.take() {
221
+ let _ = handle.join();
222
+ }
223
+ None
224
+ }
225
+ },
226
+ ReadImpl::SingleThreaded { reader, headers } => {
227
+ let mut record = csv::StringRecord::new();
228
+ match reader.read_record(&mut record) {
229
+ Ok(true) => Some(T::parse(headers, &record)),
230
+ _ => None,
231
+ }
232
+ }
233
+ }
234
+ }
235
+ }
@@ -0,0 +1,20 @@
1
+ use magnus::{
2
+ scan_args::{get_kwargs, scan_args},
3
+ Error, Value,
4
+ };
5
+
6
+ /// Parse common arguments for CSV parsing
7
+ pub fn parse_csv_args(args: &[Value]) -> Result<(Value, bool, Option<String>), Error> {
8
+ let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
9
+ let (to_read,) = parsed_args.required;
10
+
11
+ let kwargs = get_kwargs::<_, (), (Option<bool>, Option<String>), ()>(
12
+ parsed_args.keywords,
13
+ &[],
14
+ &["has_headers", "delimiter"],
15
+ )?;
16
+
17
+ let has_headers = kwargs.optional.0.unwrap_or(true);
18
+
19
+ Ok((to_read, has_headers, kwargs.optional.1))
20
+ }
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.1"
3
3
  end
data/lib/osv.rbi ADDED
@@ -0,0 +1,29 @@
1
+ # typed: strict
2
+
3
+ module OSV
4
+ sig do
5
+ type_parameters(:T)
6
+ .params(
7
+ input: T.any(String, StringIO, IO),
8
+ has_headers: T.nilable(T::Boolean),
9
+ delimiter: T.nilable(String),
10
+ blk: T.proc.params(row: T::Hash[String, String]).void
11
+ )
12
+ .returns(T.untyped)
13
+ end
14
+ def self.for_each(input, has_headers: true, delimiter: nil, &blk)
15
+ end
16
+
17
+ sig do
18
+ type_parameters(:T)
19
+ .params(
20
+ input: T.any(String, StringIO, IO),
21
+ has_headers: T.nilable(T::Boolean),
22
+ delimiter: T.nilable(String),
23
+ blk: T.proc.params(row: T::Array[String]).void
24
+ )
25
+ .returns(T.untyped)
26
+ end
27
+ def self.for_each_compat(input, has_headers: true, delimiter: nil, &blk)
28
+ end
29
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-21 00:00:00.000000000 Z
11
+ date: 2024-12-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -49,12 +49,17 @@ files:
49
49
  - Cargo.lock
50
50
  - Cargo.toml
51
51
  - Gemfile
52
+ - LICENSE
53
+ - README.md
52
54
  - Rakefile
53
55
  - ext/osv/Cargo.lock
54
56
  - ext/osv/Cargo.toml
55
57
  - ext/osv/extconf.rb
56
58
  - ext/osv/src/lib.rs
59
+ - ext/osv/src/reader.rs
60
+ - ext/osv/src/utils.rs
57
61
  - lib/osv.rb
62
+ - lib/osv.rbi
58
63
  - lib/osv/version.rb
59
64
  homepage: https://github.com/njaremko/osv
60
65
  licenses: