osv 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a1b0c347b0bab5c9d31069c56f47999bfa51e85dfc1e127d1c4474a84ac19c53
4
- data.tar.gz: 847b199da27b7c1329c1fa64fc8636592f004e93a12fc2ddd8db6127298ac23d
3
+ metadata.gz: 17db543fc59ce3ec7b4ea87a0d403b82a628860ca01ba03eaea39438790b7863
4
+ data.tar.gz: 3d54507b6097b7b9e0a771f5a3c72d7605b27c5307528bf0707cd0f7ba29b474
5
5
  SHA512:
6
- metadata.gz: 62fa77c1ca98031f483569a4dba7cf9e4eca52a4b5fae293d274d5f89c48003e301eab01d95116cfc9cc6a2642e742d16046231a21d25e4a5143bd6ec3b40dac
7
- data.tar.gz: 3832cbb6ebadfc718a8a5d1963de960ed3abf09d4559d2f0ffe031c642a6c2581dc6ad6edf5d65f22248812585ca464916375503753831024fc355fe4cd04455
6
+ metadata.gz: 975e4e16a32d1c2d5678f1c4ede658165fcf247f563c166fa167bff7d9bfd95c34937f894207693df2e6716b61fce8c315b6bb4dad7a29d68161ab842768eca1
7
+ data.tar.gz: 9176674f894855098875df2c3287b4370b42193f84d7bdf20d13fa8ea9de9330a431c3070a00652458bbb0a8061866e69b4849a6ee71c1b90718e811d1ed7172
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Nathan Jaremko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,117 @@
1
+ # OSV
2
+
3
+ OSV is a high-performance CSV parser for Ruby, implemented in Rust. It wraps BurntSushi's excellent [csv-rs](https://github.com/BurntSushi/rust-csv) crate.
4
+
5
+ It provides a simple interface for reading CSV files with support for both hash-based and array-based row formats.
6
+
7
+ The array-based mode is faster than the hash-based mode, so if you don't need the hash keys, use the array-based mode.
8
+
9
+ I have yet to figure out how to get rust to accept an implementation of this as one method with different return types, so I've had to implement two methods.
10
+
11
+ ## Installation
12
+
13
+ Add this line to your application's Gemfile:
14
+
15
+ ```ruby
16
+ gem 'osv'
17
+ ```
18
+
19
+ And then execute:
20
+
21
+ ```bash
22
+ bundle install
23
+ ```
24
+
25
+ Or install it directly:
26
+
27
+ ```bash
28
+ gem install osv
29
+ ```
30
+
31
+ ## Usage
32
+
33
+ ### Basic Usage with Hash Output
34
+
35
+ Each row is returned as a hash where the keys are the column headers:
36
+
37
+ ```ruby
38
+ require 'osv'
39
+
40
+ # Read from a file
41
+ OSV.for_each("path/to/file.csv") do |row|
42
+ # row is a Hash like {"name" => "John", "age" => "25"}
43
+ puts row["name"]
44
+ end
45
+
46
+ # Without a block, returns an Enumerator
47
+ rows = OSV.for_each("path/to/file.csv")
48
+ rows.each { |row| puts row["name"] }
49
+ ```
50
+
51
+ ### Array Output Mode
52
+
53
+ If you prefer working with arrays instead of hashes, use `for_each_compat`:
54
+
55
+ ```ruby
56
+ OSV.for_each_compat("path/to/file.csv") do |row|
57
+ # row is an Array like ["John", "25"]
58
+ puts row[0]
59
+ end
60
+ ```
61
+
62
+ ### Options
63
+
64
+ Both methods support the following options:
65
+
66
+ - `has_headers`: Boolean indicating if the first row contains headers (default: true)
67
+ - `delimiter`: String specifying the field separator (default: ",")
68
+
69
+ ```ruby
70
+ # Reading TSV files
71
+ OSV.for_each("path/to/file.tsv", delimiter: "\t") do |row|
72
+ puts row["name"]
73
+ end
74
+
75
+ # Reading without headers
76
+ OSV.for_each("path/to/file.csv", has_headers: false) do |row|
77
+ # Headers will be automatically generated as "c0", "c1", etc.
78
+ puts row["c0"]
79
+ end
80
+ ```
81
+
82
+ ### Input Sources
83
+
84
+ OSV supports reading from:
85
+
86
+ - File paths (as strings)
87
+ - IO objects
88
+ - Important caveat: the IO object must respond to `rb_io_descriptor` with a file descriptor.
89
+ - StringIO objects
90
+ - Note: when you do this, the string is read (in full) into a Rust string, and we parse it there.
91
+
92
+ ```ruby
93
+ # From file path
94
+ OSV.for_each("path/to/file.csv") { |row| puts row["name"] }
95
+
96
+ # From IO object
97
+ File.open("path/to/file.csv") do |file|
98
+ OSV.for_each(file) { |row| puts row["name"] }
99
+ end
100
+
101
+ # From StringIO
102
+ data = StringIO.new("name,age\nJohn,25")
103
+ OSV.for_each(data) { |row| puts row["name"] }
104
+ ```
105
+
106
+ ## Requirements
107
+
108
+ - Ruby >= 3.1.0
109
+ - Rust toolchain (for installation from source)
110
+
111
+ ## Performance
112
+
113
+ This library is faster than the standard Ruby CSV library, and is comparable to the fastest CSV parser gems I've used.
114
+
115
+ ## License
116
+
117
+ This gem is not currently licensed for public use.
@@ -2,20 +2,20 @@ use crate::utils::*;
2
2
  use magnus::{
3
3
  block::Yield, rb_sys::AsRawValue, value::ReprValue, Error, RClass, RString, Ruby, Value,
4
4
  };
5
- use std::{fs::File, io::Read, os::fd::FromRawFd, thread};
5
+ use std::{collections::HashMap, fs::File, io::Read, os::fd::FromRawFd, thread};
6
6
 
7
7
  /// Parses CSV data from a file and yields each row as a hash to the block.
8
8
  pub fn parse_csv(
9
9
  ruby: &Ruby,
10
10
  rb_self: Value,
11
11
  args: &[Value],
12
- ) -> Result<Yield<impl Iterator<Item = std::collections::HashMap<String, String>>>, Error> {
12
+ ) -> Result<Yield<impl Iterator<Item = HashMap<String, String>>>, Error> {
13
13
  if !ruby.block_given() {
14
14
  return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
15
15
  }
16
16
  let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
17
17
 
18
- let iter = RecordReader::<std::collections::HashMap<String, String>>::new(
18
+ let iter = RecordReader::<HashMap<String, String>>::new(
19
19
  ruby,
20
20
  to_read,
21
21
  has_headers,
@@ -55,15 +55,16 @@ pub trait RecordParser {
55
55
  fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output;
56
56
  }
57
57
 
58
- impl RecordParser for std::collections::HashMap<String, String> {
58
+ impl RecordParser for HashMap<String, String> {
59
59
  type Output = Self;
60
60
 
61
61
  fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output {
62
- record
63
- .iter()
64
- .enumerate()
65
- .map(|(i, field)| (headers[i].clone(), field.to_string()))
66
- .collect()
62
+ let capacity = headers.len();
63
+ let mut map = HashMap::with_capacity(capacity);
64
+ for (i, field) in record.iter().enumerate() {
65
+ map.insert(headers[i].to_owned(), field.to_string());
66
+ }
67
+ map
67
68
  }
68
69
  }
69
70
 
@@ -71,7 +72,11 @@ impl RecordParser for Vec<String> {
71
72
  type Output = Self;
72
73
 
73
74
  fn parse(_headers: &[String], record: &csv::StringRecord) -> Self::Output {
74
- record.iter().map(|field| field.to_string()).collect()
75
+ let mut output = Vec::with_capacity(record.len());
76
+ for field in record.iter() {
77
+ output.push(field.to_string());
78
+ }
79
+ output
75
80
  }
76
81
  }
77
82
 
@@ -99,12 +104,12 @@ impl<T: RecordParser + Send + 'static> RecordReader<T> {
99
104
  delimiter: u8,
100
105
  buffer: usize,
101
106
  ) -> Result<Self, Error> {
102
- let string_io = RClass::from(ruby.eval("StringIO").map_err(|e| {
107
+ let string_io: RClass = ruby.eval("StringIO").map_err(|e| {
103
108
  Error::new(
104
109
  ruby.exception_runtime_error(),
105
110
  format!("Failed to get StringIO class: {}", e),
106
111
  )
107
- })?);
112
+ })?;
108
113
 
109
114
  let readable: Box<dyn Read + Send + 'static> = if to_read.is_kind_of(string_io) {
110
115
  let string: RString = to_read.funcall("string", ()).map_err(|e| {
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
@@ -49,6 +49,8 @@ files:
49
49
  - Cargo.lock
50
50
  - Cargo.toml
51
51
  - Gemfile
52
+ - LICENSE
53
+ - README.md
52
54
  - Rakefile
53
55
  - ext/osv/Cargo.lock
54
56
  - ext/osv/Cargo.toml