osv 0.1.1 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Cargo.lock +39 -0
- data/LICENSE +21 -0
- data/README.md +117 -0
- data/ext/osv/Cargo.toml +1 -0
- data/ext/osv/src/lib.rs +6 -168
- data/ext/osv/src/reader.rs +235 -0
- data/ext/osv/src/utils.rs +20 -0
- data/lib/osv/version.rb +1 -1
- data/lib/osv.rbi +29 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17db543fc59ce3ec7b4ea87a0d403b82a628860ca01ba03eaea39438790b7863
|
4
|
+
data.tar.gz: 3d54507b6097b7b9e0a771f5a3c72d7605b27c5307528bf0707cd0f7ba29b474
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 975e4e16a32d1c2d5678f1c4ede658165fcf247f563c166fa167bff7d9bfd95c34937f894207693df2e6716b61fce8c315b6bb4dad7a29d68161ab842768eca1
|
7
|
+
data.tar.gz: 9176674f894855098875df2c3287b4370b42193f84d7bdf20d13fa8ea9de9330a431c3070a00652458bbb0a8061866e69b4849a6ee71c1b90718e811d1ed7172
|
data/Cargo.lock
CHANGED
@@ -11,6 +11,12 @@ dependencies = [
|
|
11
11
|
"memchr",
|
12
12
|
]
|
13
13
|
|
14
|
+
[[package]]
|
15
|
+
name = "autocfg"
|
16
|
+
version = "1.4.0"
|
17
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
18
|
+
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
19
|
+
|
14
20
|
[[package]]
|
15
21
|
name = "bindgen"
|
16
22
|
version = "0.69.5"
|
@@ -90,6 +96,12 @@ version = "1.13.0"
|
|
90
96
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
91
97
|
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
92
98
|
|
99
|
+
[[package]]
|
100
|
+
name = "futures-core"
|
101
|
+
version = "0.3.31"
|
102
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
103
|
+
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
|
104
|
+
|
93
105
|
[[package]]
|
94
106
|
name = "glob"
|
95
107
|
version = "0.3.1"
|
@@ -111,6 +123,16 @@ version = "1.0.14"
|
|
111
123
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
112
124
|
checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
|
113
125
|
|
126
|
+
[[package]]
|
127
|
+
name = "kanal"
|
128
|
+
version = "0.1.0-pre8"
|
129
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
130
|
+
checksum = "b05d55519627edaf7fd0f29981f6dc03fb52df3f5b257130eb8d0bf2801ea1d7"
|
131
|
+
dependencies = [
|
132
|
+
"futures-core",
|
133
|
+
"lock_api",
|
134
|
+
]
|
135
|
+
|
114
136
|
[[package]]
|
115
137
|
name = "lazy_static"
|
116
138
|
version = "1.5.0"
|
@@ -139,6 +161,16 @@ dependencies = [
|
|
139
161
|
"windows-targets",
|
140
162
|
]
|
141
163
|
|
164
|
+
[[package]]
|
165
|
+
name = "lock_api"
|
166
|
+
version = "0.4.12"
|
167
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
168
|
+
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
|
169
|
+
dependencies = [
|
170
|
+
"autocfg",
|
171
|
+
"scopeguard",
|
172
|
+
]
|
173
|
+
|
142
174
|
[[package]]
|
143
175
|
name = "magnus"
|
144
176
|
version = "0.6.4"
|
@@ -201,6 +233,7 @@ name = "osv"
|
|
201
233
|
version = "0.1.0"
|
202
234
|
dependencies = [
|
203
235
|
"csv",
|
236
|
+
"kanal",
|
204
237
|
"magnus 0.7.1",
|
205
238
|
"rb-sys",
|
206
239
|
"serde",
|
@@ -296,6 +329,12 @@ version = "1.0.18"
|
|
296
329
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
297
330
|
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
|
298
331
|
|
332
|
+
[[package]]
|
333
|
+
name = "scopeguard"
|
334
|
+
version = "1.2.0"
|
335
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
336
|
+
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
337
|
+
|
299
338
|
[[package]]
|
300
339
|
name = "seq-macro"
|
301
340
|
version = "0.3.5"
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2024 Nathan Jaremko
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
# OSV
|
2
|
+
|
3
|
+
OSV is a high-performance CSV parser for Ruby, implemented in Rust. It wraps BurntSushi's excellent [csv-rs](https://github.com/BurntSushi/rust-csv) crate.
|
4
|
+
|
5
|
+
It provides a simple interface for reading CSV files with support for both hash-based and array-based row formats.
|
6
|
+
|
7
|
+
The array-based mode is faster than the hash-based mode, so if you don't need the hash keys, use the array-based mode.
|
8
|
+
|
9
|
+
I have yet to figure out how to get rust to accept an implementation of this as one method with different return types, so I've had to implement two methods.
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
gem 'osv'
|
17
|
+
```
|
18
|
+
|
19
|
+
And then execute:
|
20
|
+
|
21
|
+
```bash
|
22
|
+
bundle install
|
23
|
+
```
|
24
|
+
|
25
|
+
Or install it directly:
|
26
|
+
|
27
|
+
```bash
|
28
|
+
gem install osv
|
29
|
+
```
|
30
|
+
|
31
|
+
## Usage
|
32
|
+
|
33
|
+
### Basic Usage with Hash Output
|
34
|
+
|
35
|
+
Each row is returned as a hash where the keys are the column headers:
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
require 'osv'
|
39
|
+
|
40
|
+
# Read from a file
|
41
|
+
OSV.for_each("path/to/file.csv") do |row|
|
42
|
+
# row is a Hash like {"name" => "John", "age" => "25"}
|
43
|
+
puts row["name"]
|
44
|
+
end
|
45
|
+
|
46
|
+
# Without a block, returns an Enumerator
|
47
|
+
rows = OSV.for_each("path/to/file.csv")
|
48
|
+
rows.each { |row| puts row["name"] }
|
49
|
+
```
|
50
|
+
|
51
|
+
### Array Output Mode
|
52
|
+
|
53
|
+
If you prefer working with arrays instead of hashes, use `for_each_compat`:
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
OSV.for_each_compat("path/to/file.csv") do |row|
|
57
|
+
# row is an Array like ["John", "25"]
|
58
|
+
puts row[0]
|
59
|
+
end
|
60
|
+
```
|
61
|
+
|
62
|
+
### Options
|
63
|
+
|
64
|
+
Both methods support the following options:
|
65
|
+
|
66
|
+
- `has_headers`: Boolean indicating if the first row contains headers (default: true)
|
67
|
+
- `delimiter`: String specifying the field separator (default: ",")
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
# Reading TSV files
|
71
|
+
OSV.for_each("path/to/file.tsv", delimiter: "\t") do |row|
|
72
|
+
puts row["name"]
|
73
|
+
end
|
74
|
+
|
75
|
+
# Reading without headers
|
76
|
+
OSV.for_each("path/to/file.csv", has_headers: false) do |row|
|
77
|
+
# Headers will be automatically generated as "c0", "c1", etc.
|
78
|
+
puts row["c0"]
|
79
|
+
end
|
80
|
+
```
|
81
|
+
|
82
|
+
### Input Sources
|
83
|
+
|
84
|
+
OSV supports reading from:
|
85
|
+
|
86
|
+
- File paths (as strings)
|
87
|
+
- IO objects
|
88
|
+
- Important caveat: the IO object must respond to `rb_io_descriptor` with a file descriptor.
|
89
|
+
- StringIO objects
|
90
|
+
- Note: when you do this, the string is read (in full) into a Rust string, and we parse it there.
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
# From file path
|
94
|
+
OSV.for_each("path/to/file.csv") { |row| puts row["name"] }
|
95
|
+
|
96
|
+
# From IO object
|
97
|
+
File.open("path/to/file.csv") do |file|
|
98
|
+
OSV.for_each(file) { |row| puts row["name"] }
|
99
|
+
end
|
100
|
+
|
101
|
+
# From StringIO
|
102
|
+
data = StringIO.new("name,age\nJohn,25")
|
103
|
+
OSV.for_each(data) { |row| puts row["name"] }
|
104
|
+
```
|
105
|
+
|
106
|
+
## Requirements
|
107
|
+
|
108
|
+
- Ruby >= 3.1.0
|
109
|
+
- Rust toolchain (for installation from source)
|
110
|
+
|
111
|
+
## Performance
|
112
|
+
|
113
|
+
This library is faster than the standard Ruby CSV library, and is comparable to the fastest CSV parser gems I've used.
|
114
|
+
|
115
|
+
## License
|
116
|
+
|
117
|
+
This gem is not currently licensed for public use.
|
data/ext/osv/Cargo.toml
CHANGED
data/ext/osv/src/lib.rs
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
};
|
1
|
+
mod reader;
|
2
|
+
mod utils;
|
3
|
+
|
4
|
+
use crate::reader::*;
|
5
|
+
|
6
|
+
use magnus::{Error, Ruby};
|
7
7
|
|
8
8
|
/// Initializes the Ruby extension and defines methods.
|
9
9
|
#[magnus::init]
|
@@ -13,165 +13,3 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
13
13
|
module.define_module_function("for_each_compat", magnus::method!(parse_compat, -1))?;
|
14
14
|
Ok(())
|
15
15
|
}
|
16
|
-
|
17
|
-
/// Helper function to get a readable from either an IO object or a file path
|
18
|
-
fn get_readable(ruby: &Ruby, to_read: Value) -> Result<Box<dyn Read>, Error> {
|
19
|
-
if to_read.is_kind_of(ruby.class_io()) {
|
20
|
-
let reader = RubyIOReader::new(ruby, to_read)?;
|
21
|
-
Ok(Box::new(reader))
|
22
|
-
} else {
|
23
|
-
let path = to_read.to_r_string()?.to_string()?;
|
24
|
-
let file = std::fs::File::open(&path).map_err(|e| {
|
25
|
-
Error::new(
|
26
|
-
ruby.exception_runtime_error(),
|
27
|
-
format!("Failed to open file: {}", e),
|
28
|
-
)
|
29
|
-
})?;
|
30
|
-
Ok(Box::new(file))
|
31
|
-
}
|
32
|
-
}
|
33
|
-
|
34
|
-
/// Helper function to create a CSV reader with the given configuration
|
35
|
-
fn create_csv_reader(
|
36
|
-
ruby: &Ruby,
|
37
|
-
to_read: Value,
|
38
|
-
has_headers: bool,
|
39
|
-
delimiter: Option<String>,
|
40
|
-
) -> Result<csv::Reader<Box<dyn Read>>, Error> {
|
41
|
-
let readable = get_readable(ruby, to_read)?;
|
42
|
-
let delimiter = delimiter.unwrap_or_else(|| ",".to_string());
|
43
|
-
|
44
|
-
let rdr = csv::ReaderBuilder::new()
|
45
|
-
.has_headers(has_headers)
|
46
|
-
.delimiter(delimiter.as_bytes()[0])
|
47
|
-
.from_reader(readable);
|
48
|
-
|
49
|
-
Ok(rdr)
|
50
|
-
}
|
51
|
-
|
52
|
-
/// Common setup for CSV parsing, returns the reader and headers
|
53
|
-
fn setup_csv_parser(
|
54
|
-
ruby: &Ruby,
|
55
|
-
to_read: Value,
|
56
|
-
has_headers: bool,
|
57
|
-
delimiter: Option<String>,
|
58
|
-
) -> Result<(csv::Reader<Box<dyn Read>>, Vec<String>), Error> {
|
59
|
-
let mut rdr = create_csv_reader(ruby, to_read, has_headers, delimiter)?;
|
60
|
-
|
61
|
-
let first_row = rdr.headers().unwrap().clone();
|
62
|
-
let num_fields = first_row.len();
|
63
|
-
|
64
|
-
let headers = if has_headers {
|
65
|
-
first_row.iter().map(|h| h.to_string()).collect()
|
66
|
-
} else {
|
67
|
-
(0..num_fields).map(|i| format!("c{}", i)).collect()
|
68
|
-
};
|
69
|
-
|
70
|
-
Ok((rdr, headers))
|
71
|
-
}
|
72
|
-
|
73
|
-
/// Parse common arguments for CSV parsing
|
74
|
-
fn parse_csv_args(args: &[Value]) -> Result<(Value, bool, Option<String>), Error> {
|
75
|
-
let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
|
76
|
-
let (to_read,) = parsed_args.required;
|
77
|
-
|
78
|
-
let kwargs = get_kwargs::<_, (), (Option<bool>, Option<String>), ()>(
|
79
|
-
parsed_args.keywords,
|
80
|
-
&[],
|
81
|
-
&["has_headers", "delimiter"],
|
82
|
-
)?;
|
83
|
-
|
84
|
-
let has_headers = kwargs.optional.0.unwrap_or(true);
|
85
|
-
|
86
|
-
Ok((to_read, has_headers, kwargs.optional.1))
|
87
|
-
}
|
88
|
-
|
89
|
-
/// Parses CSV data from a file and yields each row as a hash to the block.
|
90
|
-
fn parse_csv(
|
91
|
-
ruby: &Ruby,
|
92
|
-
rb_self: Value,
|
93
|
-
args: &[Value],
|
94
|
-
) -> Result<Yield<impl Iterator<Item = std::collections::HashMap<String, String>>>, Error> {
|
95
|
-
if !ruby.block_given() {
|
96
|
-
return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
|
97
|
-
}
|
98
|
-
|
99
|
-
let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
|
100
|
-
let (rdr, headers) = setup_csv_parser(ruby, to_read, has_headers, delimiter)?;
|
101
|
-
|
102
|
-
let iter = rdr.into_records().filter_map(move |result| {
|
103
|
-
let record = result.ok()?;
|
104
|
-
let mut hash = std::collections::HashMap::new();
|
105
|
-
for (header, field) in headers.iter().zip(record.iter()) {
|
106
|
-
hash.insert(header.to_string(), field.to_string());
|
107
|
-
}
|
108
|
-
Some(hash)
|
109
|
-
});
|
110
|
-
|
111
|
-
Ok(Yield::Iter(iter))
|
112
|
-
}
|
113
|
-
|
114
|
-
fn parse_compat(
|
115
|
-
ruby: &Ruby,
|
116
|
-
rb_self: Value,
|
117
|
-
args: &[Value],
|
118
|
-
) -> Result<Yield<impl Iterator<Item = Vec<String>>>, Error> {
|
119
|
-
if !ruby.block_given() {
|
120
|
-
return Ok(Yield::Enumerator(
|
121
|
-
rb_self.enumeratorize("for_each_compat", args),
|
122
|
-
));
|
123
|
-
}
|
124
|
-
|
125
|
-
let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
|
126
|
-
let (rdr, _) = setup_csv_parser(ruby, to_read, has_headers, delimiter)?;
|
127
|
-
|
128
|
-
let iter = rdr.into_records().filter_map(|result| {
|
129
|
-
result
|
130
|
-
.ok()
|
131
|
-
.map(|record| record.iter().map(|field| field.to_string()).collect())
|
132
|
-
});
|
133
|
-
|
134
|
-
Ok(Yield::Iter(iter))
|
135
|
-
}
|
136
|
-
|
137
|
-
use std::io::Read;
|
138
|
-
|
139
|
-
struct RubyIOReader {
|
140
|
-
io_obj: Value,
|
141
|
-
}
|
142
|
-
|
143
|
-
impl Read for RubyIOReader {
|
144
|
-
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
145
|
-
let result: RString = self.io_obj.funcall("read", (buf.len(),)).map_err(|_| {
|
146
|
-
std::io::Error::new(std::io::ErrorKind::Other, "Failed to read from IO")
|
147
|
-
})?;
|
148
|
-
|
149
|
-
// Handle EOF case
|
150
|
-
if result.is_nil() {
|
151
|
-
return Ok(0);
|
152
|
-
}
|
153
|
-
|
154
|
-
let rust_string = result.to_string().map_err(|_| {
|
155
|
-
std::io::Error::new(std::io::ErrorKind::Other, "Failed to convert to string")
|
156
|
-
})?;
|
157
|
-
let bytes = rust_string.as_bytes();
|
158
|
-
|
159
|
-
let bytes_to_copy = rust_string.len().min(buf.len());
|
160
|
-
buf[..bytes_to_copy].copy_from_slice(&bytes[..bytes_to_copy]);
|
161
|
-
|
162
|
-
Ok(bytes_to_copy)
|
163
|
-
}
|
164
|
-
}
|
165
|
-
|
166
|
-
impl RubyIOReader {
|
167
|
-
fn new(ruby: &Ruby, value: Value) -> Result<Self, Error> {
|
168
|
-
if value.is_kind_of(ruby.class_io()) {
|
169
|
-
Ok(RubyIOReader { io_obj: value })
|
170
|
-
} else {
|
171
|
-
Err(Error::new(
|
172
|
-
ruby.exception_runtime_error(),
|
173
|
-
"IO object is not a valid IO object",
|
174
|
-
))
|
175
|
-
}
|
176
|
-
}
|
177
|
-
}
|
@@ -0,0 +1,235 @@
|
|
1
|
+
use crate::utils::*;
|
2
|
+
use magnus::{
|
3
|
+
block::Yield, rb_sys::AsRawValue, value::ReprValue, Error, RClass, RString, Ruby, Value,
|
4
|
+
};
|
5
|
+
use std::{collections::HashMap, fs::File, io::Read, os::fd::FromRawFd, thread};
|
6
|
+
|
7
|
+
/// Parses CSV data from a file and yields each row as a hash to the block.
|
8
|
+
pub fn parse_csv(
|
9
|
+
ruby: &Ruby,
|
10
|
+
rb_self: Value,
|
11
|
+
args: &[Value],
|
12
|
+
) -> Result<Yield<impl Iterator<Item = HashMap<String, String>>>, Error> {
|
13
|
+
if !ruby.block_given() {
|
14
|
+
return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
|
15
|
+
}
|
16
|
+
let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
|
17
|
+
|
18
|
+
let iter = RecordReader::<HashMap<String, String>>::new(
|
19
|
+
ruby,
|
20
|
+
to_read,
|
21
|
+
has_headers,
|
22
|
+
delimiter.unwrap_or_else(|| ",".to_string()).as_bytes()[0],
|
23
|
+
1000,
|
24
|
+
)?;
|
25
|
+
|
26
|
+
Ok(Yield::Iter(iter))
|
27
|
+
}
|
28
|
+
|
29
|
+
pub fn parse_compat(
|
30
|
+
ruby: &Ruby,
|
31
|
+
rb_self: Value,
|
32
|
+
args: &[Value],
|
33
|
+
) -> Result<Yield<impl Iterator<Item = Vec<String>>>, Error> {
|
34
|
+
if !ruby.block_given() {
|
35
|
+
return Ok(Yield::Enumerator(
|
36
|
+
rb_self.enumeratorize("for_each_compat", args),
|
37
|
+
));
|
38
|
+
}
|
39
|
+
let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
|
40
|
+
|
41
|
+
let iter = RecordReader::<Vec<String>>::new(
|
42
|
+
ruby,
|
43
|
+
to_read,
|
44
|
+
has_headers,
|
45
|
+
delimiter.unwrap_or_else(|| ",".to_string()).as_bytes()[0],
|
46
|
+
1000,
|
47
|
+
)?;
|
48
|
+
|
49
|
+
Ok(Yield::Iter(iter))
|
50
|
+
}
|
51
|
+
|
52
|
+
pub trait RecordParser {
|
53
|
+
type Output;
|
54
|
+
|
55
|
+
fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output;
|
56
|
+
}
|
57
|
+
|
58
|
+
impl RecordParser for HashMap<String, String> {
|
59
|
+
type Output = Self;
|
60
|
+
|
61
|
+
fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output {
|
62
|
+
let capacity = headers.len();
|
63
|
+
let mut map = HashMap::with_capacity(capacity);
|
64
|
+
for (i, field) in record.iter().enumerate() {
|
65
|
+
map.insert(headers[i].to_owned(), field.to_string());
|
66
|
+
}
|
67
|
+
map
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
impl RecordParser for Vec<String> {
|
72
|
+
type Output = Self;
|
73
|
+
|
74
|
+
fn parse(_headers: &[String], record: &csv::StringRecord) -> Self::Output {
|
75
|
+
let mut output = Vec::with_capacity(record.len());
|
76
|
+
for field in record.iter() {
|
77
|
+
output.push(field.to_string());
|
78
|
+
}
|
79
|
+
output
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
struct RecordReader<T: RecordParser> {
|
84
|
+
reader: ReadImpl<T>,
|
85
|
+
}
|
86
|
+
|
87
|
+
#[allow(dead_code)]
|
88
|
+
enum ReadImpl<T: RecordParser> {
|
89
|
+
SingleThreaded {
|
90
|
+
reader: csv::Reader<Box<dyn Read + Send + 'static>>,
|
91
|
+
headers: Vec<String>,
|
92
|
+
},
|
93
|
+
MultiThreaded {
|
94
|
+
receiver: kanal::Receiver<T::Output>,
|
95
|
+
handle: Option<thread::JoinHandle<()>>,
|
96
|
+
},
|
97
|
+
}
|
98
|
+
|
99
|
+
impl<T: RecordParser + Send + 'static> RecordReader<T> {
|
100
|
+
fn new(
|
101
|
+
ruby: &Ruby,
|
102
|
+
to_read: Value,
|
103
|
+
has_headers: bool,
|
104
|
+
delimiter: u8,
|
105
|
+
buffer: usize,
|
106
|
+
) -> Result<Self, Error> {
|
107
|
+
let string_io: RClass = ruby.eval("StringIO").map_err(|e| {
|
108
|
+
Error::new(
|
109
|
+
ruby.exception_runtime_error(),
|
110
|
+
format!("Failed to get StringIO class: {}", e),
|
111
|
+
)
|
112
|
+
})?;
|
113
|
+
|
114
|
+
let readable: Box<dyn Read + Send + 'static> = if to_read.is_kind_of(string_io) {
|
115
|
+
let string: RString = to_read.funcall("string", ()).map_err(|e| {
|
116
|
+
Error::new(
|
117
|
+
ruby.exception_runtime_error(),
|
118
|
+
format!("Failed to get string from StringIO: {}", e),
|
119
|
+
)
|
120
|
+
})?;
|
121
|
+
let content = string.to_string().map_err(|e| {
|
122
|
+
Error::new(
|
123
|
+
ruby.exception_runtime_error(),
|
124
|
+
format!("Failed to convert string to Rust String: {}", e),
|
125
|
+
)
|
126
|
+
})?;
|
127
|
+
Box::new(std::io::Cursor::new(content))
|
128
|
+
} else if to_read.is_kind_of(ruby.class_io()) {
|
129
|
+
let fd = unsafe { rb_sys::rb_io_descriptor(to_read.as_raw()) };
|
130
|
+
let file = unsafe { File::from_raw_fd(fd) };
|
131
|
+
Box::new(file)
|
132
|
+
} else {
|
133
|
+
let path = to_read
|
134
|
+
.to_r_string()
|
135
|
+
.map_err(|e| {
|
136
|
+
Error::new(
|
137
|
+
ruby.exception_runtime_error(),
|
138
|
+
format!("Failed to convert path to string: {}", e),
|
139
|
+
)
|
140
|
+
})?
|
141
|
+
.to_string()
|
142
|
+
.map_err(|e| {
|
143
|
+
Error::new(
|
144
|
+
ruby.exception_runtime_error(),
|
145
|
+
format!("Failed to convert RString to Rust String: {}", e),
|
146
|
+
)
|
147
|
+
})?;
|
148
|
+
let file = std::fs::File::open(&path).map_err(|e| {
|
149
|
+
Error::new(
|
150
|
+
ruby.exception_runtime_error(),
|
151
|
+
format!("Failed to open file: {}", e),
|
152
|
+
)
|
153
|
+
})?;
|
154
|
+
Box::new(file)
|
155
|
+
};
|
156
|
+
|
157
|
+
let mut reader = csv::ReaderBuilder::new()
|
158
|
+
.has_headers(has_headers)
|
159
|
+
.delimiter(delimiter)
|
160
|
+
.from_reader(readable);
|
161
|
+
|
162
|
+
let headers = Self::get_headers(&mut reader, has_headers)?;
|
163
|
+
let headers_clone = headers.clone();
|
164
|
+
|
165
|
+
let (sender, receiver) = kanal::bounded(buffer);
|
166
|
+
let handle = thread::spawn(move || {
|
167
|
+
let mut record = csv::StringRecord::new();
|
168
|
+
while let Ok(read) = reader.read_record(&mut record) {
|
169
|
+
if !read {
|
170
|
+
let file_to_forget = reader.into_inner();
|
171
|
+
std::mem::forget(file_to_forget);
|
172
|
+
break;
|
173
|
+
}
|
174
|
+
let row = T::parse(&headers_clone, &record);
|
175
|
+
if sender.send(row).is_err() {
|
176
|
+
break;
|
177
|
+
}
|
178
|
+
}
|
179
|
+
});
|
180
|
+
|
181
|
+
let read_impl = ReadImpl::MultiThreaded {
|
182
|
+
receiver,
|
183
|
+
handle: Some(handle),
|
184
|
+
};
|
185
|
+
|
186
|
+
Ok(Self { reader: read_impl })
|
187
|
+
}
|
188
|
+
|
189
|
+
fn get_headers(
|
190
|
+
reader: &mut csv::Reader<impl Read>,
|
191
|
+
has_headers: bool,
|
192
|
+
) -> Result<Vec<String>, Error> {
|
193
|
+
let first_row = reader
|
194
|
+
.headers()
|
195
|
+
.map_err(|e| {
|
196
|
+
Error::new(
|
197
|
+
magnus::exception::runtime_error(),
|
198
|
+
format!("Failed to read headers: {}", e),
|
199
|
+
)
|
200
|
+
})?
|
201
|
+
.clone();
|
202
|
+
let num_fields = first_row.len();
|
203
|
+
|
204
|
+
Ok(if has_headers {
|
205
|
+
first_row.iter().map(|h| h.to_string()).collect()
|
206
|
+
} else {
|
207
|
+
(0..num_fields).map(|i| format!("c{}", i)).collect()
|
208
|
+
})
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
impl<T: RecordParser> Iterator for RecordReader<T> {
|
213
|
+
type Item = T::Output;
|
214
|
+
|
215
|
+
fn next(&mut self) -> Option<Self::Item> {
|
216
|
+
match &mut self.reader {
|
217
|
+
ReadImpl::MultiThreaded { receiver, handle } => match receiver.recv() {
|
218
|
+
Ok(record) => Some(record),
|
219
|
+
Err(_) => {
|
220
|
+
if let Some(handle) = handle.take() {
|
221
|
+
let _ = handle.join();
|
222
|
+
}
|
223
|
+
None
|
224
|
+
}
|
225
|
+
},
|
226
|
+
ReadImpl::SingleThreaded { reader, headers } => {
|
227
|
+
let mut record = csv::StringRecord::new();
|
228
|
+
match reader.read_record(&mut record) {
|
229
|
+
Ok(true) => Some(T::parse(headers, &record)),
|
230
|
+
_ => None,
|
231
|
+
}
|
232
|
+
}
|
233
|
+
}
|
234
|
+
}
|
235
|
+
}
|
@@ -0,0 +1,20 @@
|
|
1
|
+
use magnus::{
|
2
|
+
scan_args::{get_kwargs, scan_args},
|
3
|
+
Error, Value,
|
4
|
+
};
|
5
|
+
|
6
|
+
/// Parse common arguments for CSV parsing
|
7
|
+
pub fn parse_csv_args(args: &[Value]) -> Result<(Value, bool, Option<String>), Error> {
|
8
|
+
let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
|
9
|
+
let (to_read,) = parsed_args.required;
|
10
|
+
|
11
|
+
let kwargs = get_kwargs::<_, (), (Option<bool>, Option<String>), ()>(
|
12
|
+
parsed_args.keywords,
|
13
|
+
&[],
|
14
|
+
&["has_headers", "delimiter"],
|
15
|
+
)?;
|
16
|
+
|
17
|
+
let has_headers = kwargs.optional.0.unwrap_or(true);
|
18
|
+
|
19
|
+
Ok((to_read, has_headers, kwargs.optional.1))
|
20
|
+
}
|
data/lib/osv/version.rb
CHANGED
data/lib/osv.rbi
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# typed: strict
|
2
|
+
|
3
|
+
module OSV
|
4
|
+
sig do
|
5
|
+
type_parameters(:T)
|
6
|
+
.params(
|
7
|
+
input: T.any(String, StringIO, IO),
|
8
|
+
has_headers: T.nilable(T::Boolean),
|
9
|
+
delimiter: T.nilable(String),
|
10
|
+
blk: T.proc.params(row: T::Hash[String, String]).void
|
11
|
+
)
|
12
|
+
.returns(T.untyped)
|
13
|
+
end
|
14
|
+
def self.for_each(input, has_headers: true, delimiter: nil, &blk)
|
15
|
+
end
|
16
|
+
|
17
|
+
sig do
|
18
|
+
type_parameters(:T)
|
19
|
+
.params(
|
20
|
+
input: T.any(String, StringIO, IO),
|
21
|
+
has_headers: T.nilable(T::Boolean),
|
22
|
+
delimiter: T.nilable(String),
|
23
|
+
blk: T.proc.params(row: T::Array[String]).void
|
24
|
+
)
|
25
|
+
.returns(T.untyped)
|
26
|
+
end
|
27
|
+
def self.for_each_compat(input, has_headers: true, delimiter: nil, &blk)
|
28
|
+
end
|
29
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-12-
|
11
|
+
date: 2024-12-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -49,12 +49,17 @@ files:
|
|
49
49
|
- Cargo.lock
|
50
50
|
- Cargo.toml
|
51
51
|
- Gemfile
|
52
|
+
- LICENSE
|
53
|
+
- README.md
|
52
54
|
- Rakefile
|
53
55
|
- ext/osv/Cargo.lock
|
54
56
|
- ext/osv/Cargo.toml
|
55
57
|
- ext/osv/extconf.rb
|
56
58
|
- ext/osv/src/lib.rs
|
59
|
+
- ext/osv/src/reader.rs
|
60
|
+
- ext/osv/src/utils.rs
|
57
61
|
- lib/osv.rb
|
62
|
+
- lib/osv.rbi
|
58
63
|
- lib/osv/version.rb
|
59
64
|
homepage: https://github.com/njaremko/osv
|
60
65
|
licenses:
|