osv 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +21 -0
- data/README.md +117 -0
- data/ext/osv/src/reader.rs +17 -12
- data/lib/osv/version.rb +1 -1
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17db543fc59ce3ec7b4ea87a0d403b82a628860ca01ba03eaea39438790b7863
|
4
|
+
data.tar.gz: 3d54507b6097b7b9e0a771f5a3c72d7605b27c5307528bf0707cd0f7ba29b474
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 975e4e16a32d1c2d5678f1c4ede658165fcf247f563c166fa167bff7d9bfd95c34937f894207693df2e6716b61fce8c315b6bb4dad7a29d68161ab842768eca1
|
7
|
+
data.tar.gz: 9176674f894855098875df2c3287b4370b42193f84d7bdf20d13fa8ea9de9330a431c3070a00652458bbb0a8061866e69b4849a6ee71c1b90718e811d1ed7172
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2024 Nathan Jaremko
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
# OSV
|
2
|
+
|
3
|
+
OSV is a high-performance CSV parser for Ruby, implemented in Rust. It wraps BurntSushi's excellent [csv-rs](https://github.com/BurntSushi/rust-csv) crate.
|
4
|
+
|
5
|
+
It provides a simple interface for reading CSV files with support for both hash-based and array-based row formats.
|
6
|
+
|
7
|
+
The array-based mode is faster than the hash-based mode, so if you don't need the hash keys, use the array-based mode.
|
8
|
+
|
9
|
+
I have yet to figure out how to get rust to accept an implementation of this as one method with different return types, so I've had to implement two methods.
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
gem 'osv'
|
17
|
+
```
|
18
|
+
|
19
|
+
And then execute:
|
20
|
+
|
21
|
+
```bash
|
22
|
+
bundle install
|
23
|
+
```
|
24
|
+
|
25
|
+
Or install it directly:
|
26
|
+
|
27
|
+
```bash
|
28
|
+
gem install osv
|
29
|
+
```
|
30
|
+
|
31
|
+
## Usage
|
32
|
+
|
33
|
+
### Basic Usage with Hash Output
|
34
|
+
|
35
|
+
Each row is returned as a hash where the keys are the column headers:
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
require 'osv'
|
39
|
+
|
40
|
+
# Read from a file
|
41
|
+
OSV.for_each("path/to/file.csv") do |row|
|
42
|
+
# row is a Hash like {"name" => "John", "age" => "25"}
|
43
|
+
puts row["name"]
|
44
|
+
end
|
45
|
+
|
46
|
+
# Without a block, returns an Enumerator
|
47
|
+
rows = OSV.for_each("path/to/file.csv")
|
48
|
+
rows.each { |row| puts row["name"] }
|
49
|
+
```
|
50
|
+
|
51
|
+
### Array Output Mode
|
52
|
+
|
53
|
+
If you prefer working with arrays instead of hashes, use `for_each_compat`:
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
OSV.for_each_compat("path/to/file.csv") do |row|
|
57
|
+
# row is an Array like ["John", "25"]
|
58
|
+
puts row[0]
|
59
|
+
end
|
60
|
+
```
|
61
|
+
|
62
|
+
### Options
|
63
|
+
|
64
|
+
Both methods support the following options:
|
65
|
+
|
66
|
+
- `has_headers`: Boolean indicating if the first row contains headers (default: true)
|
67
|
+
- `delimiter`: String specifying the field separator (default: ",")
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
# Reading TSV files
|
71
|
+
OSV.for_each("path/to/file.tsv", delimiter: "\t") do |row|
|
72
|
+
puts row["name"]
|
73
|
+
end
|
74
|
+
|
75
|
+
# Reading without headers
|
76
|
+
OSV.for_each("path/to/file.csv", has_headers: false) do |row|
|
77
|
+
# Headers will be automatically generated as "c0", "c1", etc.
|
78
|
+
puts row["c0"]
|
79
|
+
end
|
80
|
+
```
|
81
|
+
|
82
|
+
### Input Sources
|
83
|
+
|
84
|
+
OSV supports reading from:
|
85
|
+
|
86
|
+
- File paths (as strings)
|
87
|
+
- IO objects
|
88
|
+
- Important caveat: the IO object must respond to `rb_io_descriptor` with a file descriptor.
|
89
|
+
- StringIO objects
|
90
|
+
- Note: when you do this, the string is read (in full) into a Rust string, and we parse it there.
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
# From file path
|
94
|
+
OSV.for_each("path/to/file.csv") { |row| puts row["name"] }
|
95
|
+
|
96
|
+
# From IO object
|
97
|
+
File.open("path/to/file.csv") do |file|
|
98
|
+
OSV.for_each(file) { |row| puts row["name"] }
|
99
|
+
end
|
100
|
+
|
101
|
+
# From StringIO
|
102
|
+
data = StringIO.new("name,age\nJohn,25")
|
103
|
+
OSV.for_each(data) { |row| puts row["name"] }
|
104
|
+
```
|
105
|
+
|
106
|
+
## Requirements
|
107
|
+
|
108
|
+
- Ruby >= 3.1.0
|
109
|
+
- Rust toolchain (for installation from source)
|
110
|
+
|
111
|
+
## Performance
|
112
|
+
|
113
|
+
This library is faster than the standard Ruby CSV library, and is comparable to the fastest CSV parser gems I've used.
|
114
|
+
|
115
|
+
## License
|
116
|
+
|
117
|
+
This gem is not currently licensed for public use.
|
data/ext/osv/src/reader.rs
CHANGED
@@ -2,20 +2,20 @@ use crate::utils::*;
|
|
2
2
|
use magnus::{
|
3
3
|
block::Yield, rb_sys::AsRawValue, value::ReprValue, Error, RClass, RString, Ruby, Value,
|
4
4
|
};
|
5
|
-
use std::{fs::File, io::Read, os::fd::FromRawFd, thread};
|
5
|
+
use std::{collections::HashMap, fs::File, io::Read, os::fd::FromRawFd, thread};
|
6
6
|
|
7
7
|
/// Parses CSV data from a file and yields each row as a hash to the block.
|
8
8
|
pub fn parse_csv(
|
9
9
|
ruby: &Ruby,
|
10
10
|
rb_self: Value,
|
11
11
|
args: &[Value],
|
12
|
-
) -> Result<Yield<impl Iterator<Item =
|
12
|
+
) -> Result<Yield<impl Iterator<Item = HashMap<String, String>>>, Error> {
|
13
13
|
if !ruby.block_given() {
|
14
14
|
return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
|
15
15
|
}
|
16
16
|
let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
|
17
17
|
|
18
|
-
let iter = RecordReader::<
|
18
|
+
let iter = RecordReader::<HashMap<String, String>>::new(
|
19
19
|
ruby,
|
20
20
|
to_read,
|
21
21
|
has_headers,
|
@@ -55,15 +55,16 @@ pub trait RecordParser {
|
|
55
55
|
fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output;
|
56
56
|
}
|
57
57
|
|
58
|
-
impl RecordParser for
|
58
|
+
impl RecordParser for HashMap<String, String> {
|
59
59
|
type Output = Self;
|
60
60
|
|
61
61
|
fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output {
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
.
|
66
|
-
|
62
|
+
let capacity = headers.len();
|
63
|
+
let mut map = HashMap::with_capacity(capacity);
|
64
|
+
for (i, field) in record.iter().enumerate() {
|
65
|
+
map.insert(headers[i].to_owned(), field.to_string());
|
66
|
+
}
|
67
|
+
map
|
67
68
|
}
|
68
69
|
}
|
69
70
|
|
@@ -71,7 +72,11 @@ impl RecordParser for Vec<String> {
|
|
71
72
|
type Output = Self;
|
72
73
|
|
73
74
|
fn parse(_headers: &[String], record: &csv::StringRecord) -> Self::Output {
|
74
|
-
record.
|
75
|
+
let mut output = Vec::with_capacity(record.len());
|
76
|
+
for field in record.iter() {
|
77
|
+
output.push(field.to_string());
|
78
|
+
}
|
79
|
+
output
|
75
80
|
}
|
76
81
|
}
|
77
82
|
|
@@ -99,12 +104,12 @@ impl<T: RecordParser + Send + 'static> RecordReader<T> {
|
|
99
104
|
delimiter: u8,
|
100
105
|
buffer: usize,
|
101
106
|
) -> Result<Self, Error> {
|
102
|
-
let string_io =
|
107
|
+
let string_io: RClass = ruby.eval("StringIO").map_err(|e| {
|
103
108
|
Error::new(
|
104
109
|
ruby.exception_runtime_error(),
|
105
110
|
format!("Failed to get StringIO class: {}", e),
|
106
111
|
)
|
107
|
-
})
|
112
|
+
})?;
|
108
113
|
|
109
114
|
let readable: Box<dyn Read + Send + 'static> = if to_read.is_kind_of(string_io) {
|
110
115
|
let string: RString = to_read.funcall("string", ()).map_err(|e| {
|
data/lib/osv/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
@@ -49,6 +49,8 @@ files:
|
|
49
49
|
- Cargo.lock
|
50
50
|
- Cargo.toml
|
51
51
|
- Gemfile
|
52
|
+
- LICENSE
|
53
|
+
- README.md
|
52
54
|
- Rakefile
|
53
55
|
- ext/osv/Cargo.lock
|
54
56
|
- ext/osv/Cargo.toml
|