osv 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +21 -0
- data/README.md +117 -0
- data/ext/osv/src/reader.rs +17 -12
- data/lib/osv/version.rb +1 -1
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17db543fc59ce3ec7b4ea87a0d403b82a628860ca01ba03eaea39438790b7863
|
4
|
+
data.tar.gz: 3d54507b6097b7b9e0a771f5a3c72d7605b27c5307528bf0707cd0f7ba29b474
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 975e4e16a32d1c2d5678f1c4ede658165fcf247f563c166fa167bff7d9bfd95c34937f894207693df2e6716b61fce8c315b6bb4dad7a29d68161ab842768eca1
|
7
|
+
data.tar.gz: 9176674f894855098875df2c3287b4370b42193f84d7bdf20d13fa8ea9de9330a431c3070a00652458bbb0a8061866e69b4849a6ee71c1b90718e811d1ed7172
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2024 Nathan Jaremko
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
# OSV
|
2
|
+
|
3
|
+
OSV is a high-performance CSV parser for Ruby, implemented in Rust. It wraps BurntSushi's excellent [csv-rs](https://github.com/BurntSushi/rust-csv) crate.
|
4
|
+
|
5
|
+
It provides a simple interface for reading CSV files with support for both hash-based and array-based row formats.
|
6
|
+
|
7
|
+
The array-based mode is faster than the hash-based mode, so if you don't need the hash keys, use the array-based mode.
|
8
|
+
|
9
|
+
I have yet to figure out how to get rust to accept an implementation of this as one method with different return types, so I've had to implement two methods.
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
gem 'osv'
|
17
|
+
```
|
18
|
+
|
19
|
+
And then execute:
|
20
|
+
|
21
|
+
```bash
|
22
|
+
bundle install
|
23
|
+
```
|
24
|
+
|
25
|
+
Or install it directly:
|
26
|
+
|
27
|
+
```bash
|
28
|
+
gem install osv
|
29
|
+
```
|
30
|
+
|
31
|
+
## Usage
|
32
|
+
|
33
|
+
### Basic Usage with Hash Output
|
34
|
+
|
35
|
+
Each row is returned as a hash where the keys are the column headers:
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
require 'osv'
|
39
|
+
|
40
|
+
# Read from a file
|
41
|
+
OSV.for_each("path/to/file.csv") do |row|
|
42
|
+
# row is a Hash like {"name" => "John", "age" => "25"}
|
43
|
+
puts row["name"]
|
44
|
+
end
|
45
|
+
|
46
|
+
# Without a block, returns an Enumerator
|
47
|
+
rows = OSV.for_each("path/to/file.csv")
|
48
|
+
rows.each { |row| puts row["name"] }
|
49
|
+
```
|
50
|
+
|
51
|
+
### Array Output Mode
|
52
|
+
|
53
|
+
If you prefer working with arrays instead of hashes, use `for_each_compat`:
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
OSV.for_each_compat("path/to/file.csv") do |row|
|
57
|
+
# row is an Array like ["John", "25"]
|
58
|
+
puts row[0]
|
59
|
+
end
|
60
|
+
```
|
61
|
+
|
62
|
+
### Options
|
63
|
+
|
64
|
+
Both methods support the following options:
|
65
|
+
|
66
|
+
- `has_headers`: Boolean indicating if the first row contains headers (default: true)
|
67
|
+
- `delimiter`: String specifying the field separator (default: ",")
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
# Reading TSV files
|
71
|
+
OSV.for_each("path/to/file.tsv", delimiter: "\t") do |row|
|
72
|
+
puts row["name"]
|
73
|
+
end
|
74
|
+
|
75
|
+
# Reading without headers
|
76
|
+
OSV.for_each("path/to/file.csv", has_headers: false) do |row|
|
77
|
+
# Headers will be automatically generated as "c0", "c1", etc.
|
78
|
+
puts row["c0"]
|
79
|
+
end
|
80
|
+
```
|
81
|
+
|
82
|
+
### Input Sources
|
83
|
+
|
84
|
+
OSV supports reading from:
|
85
|
+
|
86
|
+
- File paths (as strings)
|
87
|
+
- IO objects
|
88
|
+
- Important caveat: the IO object must respond to `rb_io_descriptor` with a file descriptor.
|
89
|
+
- StringIO objects
|
90
|
+
- Note: when you do this, the string is read (in full) into a Rust string, and we parse it there.
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
# From file path
|
94
|
+
OSV.for_each("path/to/file.csv") { |row| puts row["name"] }
|
95
|
+
|
96
|
+
# From IO object
|
97
|
+
File.open("path/to/file.csv") do |file|
|
98
|
+
OSV.for_each(file) { |row| puts row["name"] }
|
99
|
+
end
|
100
|
+
|
101
|
+
# From StringIO
|
102
|
+
data = StringIO.new("name,age\nJohn,25")
|
103
|
+
OSV.for_each(data) { |row| puts row["name"] }
|
104
|
+
```
|
105
|
+
|
106
|
+
## Requirements
|
107
|
+
|
108
|
+
- Ruby >= 3.1.0
|
109
|
+
- Rust toolchain (for installation from source)
|
110
|
+
|
111
|
+
## Performance
|
112
|
+
|
113
|
+
This library is faster than the standard Ruby CSV library, and is comparable to the fastest CSV parser gems I've used.
|
114
|
+
|
115
|
+
## License
|
116
|
+
|
117
|
+
This gem is not currently licensed for public use.
|
data/ext/osv/src/reader.rs
CHANGED
@@ -2,20 +2,20 @@ use crate::utils::*;
|
|
2
2
|
use magnus::{
|
3
3
|
block::Yield, rb_sys::AsRawValue, value::ReprValue, Error, RClass, RString, Ruby, Value,
|
4
4
|
};
|
5
|
-
use std::{fs::File, io::Read, os::fd::FromRawFd, thread};
|
5
|
+
use std::{collections::HashMap, fs::File, io::Read, os::fd::FromRawFd, thread};
|
6
6
|
|
7
7
|
/// Parses CSV data from a file and yields each row as a hash to the block.
|
8
8
|
pub fn parse_csv(
|
9
9
|
ruby: &Ruby,
|
10
10
|
rb_self: Value,
|
11
11
|
args: &[Value],
|
12
|
-
) -> Result<Yield<impl Iterator<Item =
|
12
|
+
) -> Result<Yield<impl Iterator<Item = HashMap<String, String>>>, Error> {
|
13
13
|
if !ruby.block_given() {
|
14
14
|
return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
|
15
15
|
}
|
16
16
|
let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
|
17
17
|
|
18
|
-
let iter = RecordReader::<
|
18
|
+
let iter = RecordReader::<HashMap<String, String>>::new(
|
19
19
|
ruby,
|
20
20
|
to_read,
|
21
21
|
has_headers,
|
@@ -55,15 +55,16 @@ pub trait RecordParser {
|
|
55
55
|
fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output;
|
56
56
|
}
|
57
57
|
|
58
|
-
impl RecordParser for
|
58
|
+
impl RecordParser for HashMap<String, String> {
|
59
59
|
type Output = Self;
|
60
60
|
|
61
61
|
fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output {
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
.
|
66
|
-
|
62
|
+
let capacity = headers.len();
|
63
|
+
let mut map = HashMap::with_capacity(capacity);
|
64
|
+
for (i, field) in record.iter().enumerate() {
|
65
|
+
map.insert(headers[i].to_owned(), field.to_string());
|
66
|
+
}
|
67
|
+
map
|
67
68
|
}
|
68
69
|
}
|
69
70
|
|
@@ -71,7 +72,11 @@ impl RecordParser for Vec<String> {
|
|
71
72
|
type Output = Self;
|
72
73
|
|
73
74
|
fn parse(_headers: &[String], record: &csv::StringRecord) -> Self::Output {
|
74
|
-
record.
|
75
|
+
let mut output = Vec::with_capacity(record.len());
|
76
|
+
for field in record.iter() {
|
77
|
+
output.push(field.to_string());
|
78
|
+
}
|
79
|
+
output
|
75
80
|
}
|
76
81
|
}
|
77
82
|
|
@@ -99,12 +104,12 @@ impl<T: RecordParser + Send + 'static> RecordReader<T> {
|
|
99
104
|
delimiter: u8,
|
100
105
|
buffer: usize,
|
101
106
|
) -> Result<Self, Error> {
|
102
|
-
let string_io =
|
107
|
+
let string_io: RClass = ruby.eval("StringIO").map_err(|e| {
|
103
108
|
Error::new(
|
104
109
|
ruby.exception_runtime_error(),
|
105
110
|
format!("Failed to get StringIO class: {}", e),
|
106
111
|
)
|
107
|
-
})
|
112
|
+
})?;
|
108
113
|
|
109
114
|
let readable: Box<dyn Read + Send + 'static> = if to_read.is_kind_of(string_io) {
|
110
115
|
let string: RString = to_read.funcall("string", ()).map_err(|e| {
|
data/lib/osv/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
@@ -49,6 +49,8 @@ files:
|
|
49
49
|
- Cargo.lock
|
50
50
|
- Cargo.toml
|
51
51
|
- Gemfile
|
52
|
+
- LICENSE
|
53
|
+
- README.md
|
52
54
|
- Rakefile
|
53
55
|
- ext/osv/Cargo.lock
|
54
56
|
- ext/osv/Cargo.toml
|