osv 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 17db543fc59ce3ec7b4ea87a0d403b82a628860ca01ba03eaea39438790b7863
4
- data.tar.gz: 3d54507b6097b7b9e0a771f5a3c72d7605b27c5307528bf0707cd0f7ba29b474
3
+ metadata.gz: f22d1d56b0eba1e23ca192db2c70e68689486e2f0032672285017e1f98a530d2
4
+ data.tar.gz: 6288dce70b95faf312e8aa244ba56a60c3d59b85bdd16a4a951060df78b97e1e
5
5
  SHA512:
6
- metadata.gz: 975e4e16a32d1c2d5678f1c4ede658165fcf247f563c166fa167bff7d9bfd95c34937f894207693df2e6716b61fce8c315b6bb4dad7a29d68161ab842768eca1
7
- data.tar.gz: 9176674f894855098875df2c3287b4370b42193f84d7bdf20d13fa8ea9de9330a431c3070a00652458bbb0a8061866e69b4849a6ee71c1b90718e811d1ed7172
6
+ metadata.gz: 5399b43ecd3987c73daf09341d51a1ee8e5d060f1085e9a7aac9b823ab723ccbbc4084c5c0c9abbd36e7e17becfcfa1757af6af3666e01ef3992a27e35b5b983
7
+ data.tar.gz: 2a6d98b645af40ab08a5a01a2bcf5b67ce9ebff18a993e602f6b00fa2f3f80d65c63605a7f5a715286ac20751db0607c515686e4439156a4039ae24f49e95e10
data/README.md CHANGED
@@ -1,13 +1,13 @@
1
1
  # OSV
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/osv.svg)](https://badge.fury.io/rb/osv)
4
+
3
5
  OSV is a high-performance CSV parser for Ruby, implemented in Rust. It wraps BurntSushi's excellent [csv-rs](https://github.com/BurntSushi/rust-csv) crate.
4
6
 
5
7
  It provides a simple interface for reading CSV files with support for both hash-based and array-based row formats.
6
8
 
7
9
  The array-based mode is faster than the hash-based mode, so if you don't need the hash keys, use the array-based mode.
8
10
 
9
- I have yet to figure out how to get rust to accept an implementation of this as one method with different return types, so I've had to implement two methods.
10
-
11
11
  ## Installation
12
12
 
13
13
  Add this line to your application's Gemfile:
@@ -53,7 +53,7 @@ rows.each { |row| puts row["name"] }
53
53
  If you prefer working with arrays instead of hashes, use `for_each_compat`:
54
54
 
55
55
  ```ruby
56
- OSV.for_each_compat("path/to/file.csv") do |row|
56
+ OSV.for_each("path/to/file.csv", result_type: :array) do |row|
57
57
  # row is an Array like ["John", "25"]
58
58
  puts row[0]
59
59
  end
@@ -64,11 +64,11 @@ end
64
64
  Both methods support the following options:
65
65
 
66
66
  - `has_headers`: Boolean indicating if the first row contains headers (default: true)
67
- - `delimiter`: String specifying the field separator (default: ",")
67
+ - `col_sep`: String specifying the field separator (default: ",")
68
68
 
69
69
  ```ruby
70
70
  # Reading TSV files
71
- OSV.for_each("path/to/file.tsv", delimiter: "\t") do |row|
71
+ OSV.for_each("path/to/file.tsv", col_sep: "\t") do |row|
72
72
  puts row["name"]
73
73
  end
74
74
 
@@ -111,7 +111,3 @@ OSV.for_each(data) { |row| puts row["name"] }
111
111
  ## Performance
112
112
 
113
113
  This library is faster than the standard Ruby CSV library, and is comparable to the fastest CSV parser gems I've used.
114
-
115
- ## License
116
-
117
- This gem is not currently licensed for public use.
@@ -0,0 +1,114 @@
1
+ use super::{
2
+ parser::RecordParser,
3
+ reader::{ReadImpl, RecordReader},
4
+ };
5
+ use magnus::{rb_sys::AsRawValue, value::ReprValue, Error, RString, Ruby, Value};
6
+ use std::{fs::File, io::Read, marker::PhantomData, os::fd::FromRawFd, thread};
7
+
8
+ pub struct RecordReaderBuilder<'a, T: RecordParser + Send + 'static> {
9
+ ruby: &'a Ruby,
10
+ to_read: Value,
11
+ has_headers: bool,
12
+ delimiter: u8,
13
+ quote_char: u8,
14
+ null_string: String,
15
+ buffer: usize,
16
+ _phantom: PhantomData<T>,
17
+ }
18
+
19
+ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
20
+ pub fn new(ruby: &'a Ruby, to_read: Value) -> Self {
21
+ Self {
22
+ ruby,
23
+ to_read,
24
+ has_headers: true,
25
+ delimiter: b',',
26
+ quote_char: b'"',
27
+ null_string: String::new(),
28
+ buffer: 1000,
29
+ _phantom: PhantomData,
30
+ }
31
+ }
32
+
33
+ pub fn has_headers(mut self, has_headers: bool) -> Self {
34
+ self.has_headers = has_headers;
35
+ self
36
+ }
37
+
38
+ pub fn delimiter(mut self, delimiter: u8) -> Self {
39
+ self.delimiter = delimiter;
40
+ self
41
+ }
42
+
43
+ pub fn quote_char(mut self, quote_char: u8) -> Self {
44
+ self.quote_char = quote_char;
45
+ self
46
+ }
47
+
48
+ pub fn null_string(mut self, null_string: String) -> Self {
49
+ self.null_string = null_string;
50
+ self
51
+ }
52
+
53
+ pub fn buffer(mut self, buffer: usize) -> Self {
54
+ self.buffer = buffer;
55
+ self
56
+ }
57
+
58
+ fn get_reader(&self) -> Result<Box<dyn Read + Send + 'static>, Error> {
59
+ let string_io: magnus::RClass = self.ruby.eval("StringIO")?;
60
+
61
+ if self.to_read.is_kind_of(string_io) {
62
+ let string: RString = self.to_read.funcall("string", ())?;
63
+ let content = string.to_string()?;
64
+ Ok(Box::new(std::io::Cursor::new(content)))
65
+ } else if self.to_read.is_kind_of(self.ruby.class_io()) {
66
+ let fd = unsafe { rb_sys::rb_io_descriptor(self.to_read.as_raw()) };
67
+ let file = unsafe { File::from_raw_fd(fd) };
68
+ Ok(Box::new(file))
69
+ } else {
70
+ let path = self.to_read.to_r_string()?.to_string()?;
71
+ let file = std::fs::File::open(&path).map_err(|e| {
72
+ Error::new(
73
+ self.ruby.exception_runtime_error(),
74
+ format!("Failed to open file: {e}"),
75
+ )
76
+ })?;
77
+ Ok(Box::new(file))
78
+ }
79
+ }
80
+
81
+ pub fn build(self) -> Result<RecordReader<T>, Error> {
82
+ let readable = self.get_reader()?;
83
+
84
+ let mut reader = csv::ReaderBuilder::new()
85
+ .has_headers(self.has_headers)
86
+ .delimiter(self.delimiter)
87
+ .quote(self.quote_char)
88
+ .from_reader(readable);
89
+
90
+ let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
91
+ let headers_clone = headers.clone();
92
+ let null_string = self.null_string;
93
+
94
+ let (sender, receiver) = kanal::bounded(self.buffer);
95
+ let handle = thread::spawn(move || {
96
+ let mut record = csv::StringRecord::new();
97
+ while let Ok(true) = reader.read_record(&mut record) {
98
+ let row = T::parse(&headers_clone, &record, &null_string);
99
+ if sender.send(row).is_err() {
100
+ break;
101
+ }
102
+ }
103
+ let file_to_forget = reader.into_inner();
104
+ std::mem::forget(file_to_forget);
105
+ });
106
+
107
+ Ok(RecordReader {
108
+ reader: ReadImpl::MultiThreaded {
109
+ receiver,
110
+ handle: Some(handle),
111
+ },
112
+ })
113
+ }
114
+ }
@@ -0,0 +1,8 @@
1
+ mod builder;
2
+ mod parser;
3
+ mod reader;
4
+ mod record;
5
+
6
+ pub use builder::RecordReaderBuilder;
7
+ pub use record::CsvRecord;
8
+
@@ -0,0 +1,43 @@
1
+ use std::collections::HashMap;
2
+
3
+ pub trait RecordParser {
4
+ type Output;
5
+
6
+ fn parse(headers: &[String], record: &csv::StringRecord, null_string: &str) -> Self::Output;
7
+ }
8
+
9
+ impl RecordParser for HashMap<String, Option<String>> {
10
+ type Output = Self;
11
+
12
+ fn parse(headers: &[String], record: &csv::StringRecord, null_string: &str) -> Self::Output {
13
+ headers
14
+ .iter()
15
+ .zip(record.iter())
16
+ .map(|(header, field)| {
17
+ let value = if field == null_string {
18
+ None
19
+ } else {
20
+ Some(field.to_string())
21
+ };
22
+ (header.clone(), value)
23
+ })
24
+ .collect()
25
+ }
26
+ }
27
+
28
+ impl RecordParser for Vec<Option<String>> {
29
+ type Output = Self;
30
+
31
+ fn parse(_headers: &[String], record: &csv::StringRecord, null_string: &str) -> Self::Output {
32
+ record
33
+ .iter()
34
+ .map(|field| {
35
+ if field == null_string {
36
+ None
37
+ } else {
38
+ Some(field.to_string())
39
+ }
40
+ })
41
+ .collect()
42
+ }
43
+ }
@@ -0,0 +1,73 @@
1
+ use super::parser::RecordParser;
2
+ use magnus::{Error, Ruby};
3
+ use std::{io::Read, thread};
4
+
5
+ pub struct RecordReader<T: RecordParser> {
6
+ pub(crate) reader: ReadImpl<T>,
7
+ }
8
+
9
+ #[allow(dead_code)]
10
+ pub enum ReadImpl<T: RecordParser> {
11
+ SingleThreaded {
12
+ reader: csv::Reader<Box<dyn Read + Send + 'static>>,
13
+ headers: Vec<String>,
14
+ null_string: String,
15
+ },
16
+ MultiThreaded {
17
+ receiver: kanal::Receiver<T::Output>,
18
+ handle: Option<thread::JoinHandle<()>>,
19
+ },
20
+ }
21
+
22
+ impl<T: RecordParser> RecordReader<T> {
23
+ pub(crate) fn get_headers(
24
+ ruby: &Ruby,
25
+ reader: &mut csv::Reader<impl Read>,
26
+ has_headers: bool,
27
+ ) -> Result<Vec<String>, Error> {
28
+ let first_row = reader
29
+ .headers()
30
+ .map_err(|e| {
31
+ Error::new(
32
+ ruby.exception_runtime_error(),
33
+ format!("Failed to read headers: {e}"),
34
+ )
35
+ })?
36
+ .clone();
37
+
38
+ Ok(if has_headers {
39
+ first_row.iter().map(String::from).collect()
40
+ } else {
41
+ (0..first_row.len()).map(|i| format!("c{i}")).collect()
42
+ })
43
+ }
44
+ }
45
+
46
+ impl<T: RecordParser> Iterator for RecordReader<T> {
47
+ type Item = T::Output;
48
+
49
+ fn next(&mut self) -> Option<Self::Item> {
50
+ match &mut self.reader {
51
+ ReadImpl::MultiThreaded { receiver, handle } => match receiver.recv() {
52
+ Ok(record) => Some(record),
53
+ Err(_) => {
54
+ if let Some(handle) = handle.take() {
55
+ let _ = handle.join();
56
+ }
57
+ None
58
+ }
59
+ },
60
+ ReadImpl::SingleThreaded {
61
+ reader,
62
+ headers,
63
+ null_string,
64
+ } => {
65
+ let mut record = csv::StringRecord::new();
66
+ match reader.read_record(&mut record) {
67
+ Ok(true) => Some(T::parse(headers, &record, null_string)),
68
+ _ => None,
69
+ }
70
+ }
71
+ }
72
+ }
73
+ }
@@ -0,0 +1,17 @@
1
+ use magnus::{IntoValue, Ruby, Value};
2
+ use std::collections::HashMap;
3
+
4
+ #[derive(Debug)]
5
+ pub enum CsvRecord {
6
+ Vec(Vec<Option<String>>),
7
+ Map(HashMap<String, Option<String>>),
8
+ }
9
+
10
+ impl IntoValue for CsvRecord {
11
+ fn into_value_with(self, handle: &Ruby) -> Value {
12
+ match self {
13
+ CsvRecord::Vec(vec) => vec.into_value_with(handle),
14
+ CsvRecord::Map(map) => map.into_value_with(handle),
15
+ }
16
+ }
17
+ }
data/ext/osv/src/lib.rs CHANGED
@@ -1,3 +1,4 @@
1
+ mod csv;
1
2
  mod reader;
2
3
  mod utils;
3
4
 
@@ -10,6 +11,5 @@ use magnus::{Error, Ruby};
10
11
  fn init(ruby: &Ruby) -> Result<(), Error> {
11
12
  let module = ruby.define_module("OSV")?;
12
13
  module.define_module_function("for_each", magnus::method!(parse_csv, -1))?;
13
- module.define_module_function("for_each_compat", magnus::method!(parse_compat, -1))?;
14
14
  Ok(())
15
15
  }
@@ -1,235 +1,100 @@
1
+ use std::collections::HashMap;
2
+
3
+ use crate::csv::{CsvRecord, RecordReaderBuilder};
1
4
  use crate::utils::*;
2
- use magnus::{
3
- block::Yield, rb_sys::AsRawValue, value::ReprValue, Error, RClass, RString, Ruby, Value,
4
- };
5
- use std::{collections::HashMap, fs::File, io::Read, os::fd::FromRawFd, thread};
5
+ use magnus::value::ReprValue;
6
+ use magnus::{block::Yield, Error, KwArgs, RHash, Ruby, Symbol, Value};
6
7
 
7
- /// Parses CSV data from a file and yields each row as a hash to the block.
8
8
  pub fn parse_csv(
9
9
  ruby: &Ruby,
10
10
  rb_self: Value,
11
11
  args: &[Value],
12
- ) -> Result<Yield<impl Iterator<Item = HashMap<String, String>>>, Error> {
13
- if !ruby.block_given() {
14
- return Ok(Yield::Enumerator(rb_self.enumeratorize("for_each", args)));
15
- }
16
- let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
17
-
18
- let iter = RecordReader::<HashMap<String, String>>::new(
19
- ruby,
12
+ ) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord>>>, Error> {
13
+ let CsvArgs {
20
14
  to_read,
21
15
  has_headers,
22
- delimiter.unwrap_or_else(|| ",".to_string()).as_bytes()[0],
23
- 1000,
24
- )?;
16
+ delimiter,
17
+ quote_char,
18
+ null_string,
19
+ buffer_size,
20
+ result_type,
21
+ } = parse_csv_args(ruby, args)?;
25
22
 
26
- Ok(Yield::Iter(iter))
27
- }
28
-
29
- pub fn parse_compat(
30
- ruby: &Ruby,
31
- rb_self: Value,
32
- args: &[Value],
33
- ) -> Result<Yield<impl Iterator<Item = Vec<String>>>, Error> {
34
23
  if !ruby.block_given() {
35
- return Ok(Yield::Enumerator(
36
- rb_self.enumeratorize("for_each_compat", args),
37
- ));
38
- }
39
- let (to_read, has_headers, delimiter) = parse_csv_args(args)?;
40
-
41
- let iter = RecordReader::<Vec<String>>::new(
42
- ruby,
43
- to_read,
44
- has_headers,
45
- delimiter.unwrap_or_else(|| ",".to_string()).as_bytes()[0],
46
- 1000,
47
- )?;
48
-
49
- Ok(Yield::Iter(iter))
50
- }
51
-
52
- pub trait RecordParser {
53
- type Output;
54
-
55
- fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output;
56
- }
57
-
58
- impl RecordParser for HashMap<String, String> {
59
- type Output = Self;
60
-
61
- fn parse(headers: &[String], record: &csv::StringRecord) -> Self::Output {
62
- let capacity = headers.len();
63
- let mut map = HashMap::with_capacity(capacity);
64
- for (i, field) in record.iter().enumerate() {
65
- map.insert(headers[i].to_owned(), field.to_string());
66
- }
67
- map
24
+ return create_enumerator(EnumeratorArgs {
25
+ rb_self,
26
+ to_read,
27
+ has_headers,
28
+ delimiter,
29
+ quote_char,
30
+ null_string,
31
+ buffer_size,
32
+ result_type,
33
+ });
68
34
  }
69
- }
70
-
71
- impl RecordParser for Vec<String> {
72
- type Output = Self;
73
35
 
74
- fn parse(_headers: &[String], record: &csv::StringRecord) -> Self::Output {
75
- let mut output = Vec::with_capacity(record.len());
76
- for field in record.iter() {
77
- output.push(field.to_string());
36
+ let iter: Box<dyn Iterator<Item = CsvRecord>> = match result_type.as_str() {
37
+ "hash" => Box::new(
38
+ RecordReaderBuilder::<HashMap<String, Option<String>>>::new(ruby, to_read)
39
+ .has_headers(has_headers)
40
+ .delimiter(delimiter)
41
+ .quote_char(quote_char)
42
+ .null_string(null_string)
43
+ .buffer(buffer_size)
44
+ .build()?
45
+ .map(CsvRecord::Map),
46
+ ),
47
+ "array" => Box::new(
48
+ RecordReaderBuilder::<Vec<Option<String>>>::new(ruby, to_read)
49
+ .has_headers(has_headers)
50
+ .delimiter(delimiter)
51
+ .quote_char(quote_char)
52
+ .null_string(null_string)
53
+ .buffer(buffer_size)
54
+ .build()?
55
+ .map(CsvRecord::Vec),
56
+ ),
57
+ _ => {
58
+ return Err(Error::new(
59
+ ruby.exception_runtime_error(),
60
+ "Invalid result type",
61
+ ))
78
62
  }
79
- output
80
- }
81
- }
82
-
83
- struct RecordReader<T: RecordParser> {
84
- reader: ReadImpl<T>,
85
- }
63
+ };
86
64
 
87
- #[allow(dead_code)]
88
- enum ReadImpl<T: RecordParser> {
89
- SingleThreaded {
90
- reader: csv::Reader<Box<dyn Read + Send + 'static>>,
91
- headers: Vec<String>,
92
- },
93
- MultiThreaded {
94
- receiver: kanal::Receiver<T::Output>,
95
- handle: Option<thread::JoinHandle<()>>,
96
- },
65
+ Ok(Yield::Iter(iter))
97
66
  }
98
67
 
99
- impl<T: RecordParser + Send + 'static> RecordReader<T> {
100
- fn new(
101
- ruby: &Ruby,
102
- to_read: Value,
103
- has_headers: bool,
104
- delimiter: u8,
105
- buffer: usize,
106
- ) -> Result<Self, Error> {
107
- let string_io: RClass = ruby.eval("StringIO").map_err(|e| {
108
- Error::new(
109
- ruby.exception_runtime_error(),
110
- format!("Failed to get StringIO class: {}", e),
111
- )
112
- })?;
113
-
114
- let readable: Box<dyn Read + Send + 'static> = if to_read.is_kind_of(string_io) {
115
- let string: RString = to_read.funcall("string", ()).map_err(|e| {
116
- Error::new(
117
- ruby.exception_runtime_error(),
118
- format!("Failed to get string from StringIO: {}", e),
119
- )
120
- })?;
121
- let content = string.to_string().map_err(|e| {
122
- Error::new(
123
- ruby.exception_runtime_error(),
124
- format!("Failed to convert string to Rust String: {}", e),
125
- )
126
- })?;
127
- Box::new(std::io::Cursor::new(content))
128
- } else if to_read.is_kind_of(ruby.class_io()) {
129
- let fd = unsafe { rb_sys::rb_io_descriptor(to_read.as_raw()) };
130
- let file = unsafe { File::from_raw_fd(fd) };
131
- Box::new(file)
132
- } else {
133
- let path = to_read
134
- .to_r_string()
135
- .map_err(|e| {
136
- Error::new(
137
- ruby.exception_runtime_error(),
138
- format!("Failed to convert path to string: {}", e),
139
- )
140
- })?
141
- .to_string()
142
- .map_err(|e| {
143
- Error::new(
144
- ruby.exception_runtime_error(),
145
- format!("Failed to convert RString to Rust String: {}", e),
146
- )
147
- })?;
148
- let file = std::fs::File::open(&path).map_err(|e| {
149
- Error::new(
150
- ruby.exception_runtime_error(),
151
- format!("Failed to open file: {}", e),
152
- )
153
- })?;
154
- Box::new(file)
155
- };
156
-
157
- let mut reader = csv::ReaderBuilder::new()
158
- .has_headers(has_headers)
159
- .delimiter(delimiter)
160
- .from_reader(readable);
161
-
162
- let headers = Self::get_headers(&mut reader, has_headers)?;
163
- let headers_clone = headers.clone();
164
-
165
- let (sender, receiver) = kanal::bounded(buffer);
166
- let handle = thread::spawn(move || {
167
- let mut record = csv::StringRecord::new();
168
- while let Ok(read) = reader.read_record(&mut record) {
169
- if !read {
170
- let file_to_forget = reader.into_inner();
171
- std::mem::forget(file_to_forget);
172
- break;
173
- }
174
- let row = T::parse(&headers_clone, &record);
175
- if sender.send(row).is_err() {
176
- break;
177
- }
178
- }
179
- });
180
-
181
- let read_impl = ReadImpl::MultiThreaded {
182
- receiver,
183
- handle: Some(handle),
184
- };
185
-
186
- Ok(Self { reader: read_impl })
187
- }
188
-
189
- fn get_headers(
190
- reader: &mut csv::Reader<impl Read>,
191
- has_headers: bool,
192
- ) -> Result<Vec<String>, Error> {
193
- let first_row = reader
194
- .headers()
195
- .map_err(|e| {
196
- Error::new(
197
- magnus::exception::runtime_error(),
198
- format!("Failed to read headers: {}", e),
199
- )
200
- })?
201
- .clone();
202
- let num_fields = first_row.len();
203
-
204
- Ok(if has_headers {
205
- first_row.iter().map(|h| h.to_string()).collect()
206
- } else {
207
- (0..num_fields).map(|i| format!("c{}", i)).collect()
208
- })
209
- }
68
+ struct EnumeratorArgs {
69
+ rb_self: Value,
70
+ to_read: Value,
71
+ has_headers: bool,
72
+ delimiter: u8,
73
+ quote_char: u8,
74
+ null_string: String,
75
+ buffer_size: usize,
76
+ result_type: String,
210
77
  }
211
78
 
212
- impl<T: RecordParser> Iterator for RecordReader<T> {
213
- type Item = T::Output;
214
-
215
- fn next(&mut self) -> Option<Self::Item> {
216
- match &mut self.reader {
217
- ReadImpl::MultiThreaded { receiver, handle } => match receiver.recv() {
218
- Ok(record) => Some(record),
219
- Err(_) => {
220
- if let Some(handle) = handle.take() {
221
- let _ = handle.join();
222
- }
223
- None
224
- }
225
- },
226
- ReadImpl::SingleThreaded { reader, headers } => {
227
- let mut record = csv::StringRecord::new();
228
- match reader.read_record(&mut record) {
229
- Ok(true) => Some(T::parse(headers, &record)),
230
- _ => None,
231
- }
232
- }
233
- }
234
- }
79
+ fn create_enumerator(
80
+ args: EnumeratorArgs,
81
+ ) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord>>>, Error> {
82
+ let kwargs = RHash::new();
83
+ kwargs.aset(Symbol::new("has_headers"), args.has_headers)?;
84
+ kwargs.aset(
85
+ Symbol::new("col_sep"),
86
+ String::from_utf8(vec![args.delimiter]).unwrap(),
87
+ )?;
88
+ kwargs.aset(
89
+ Symbol::new("quote_char"),
90
+ String::from_utf8(vec![args.quote_char]).unwrap(),
91
+ )?;
92
+ kwargs.aset(Symbol::new("nil_string"), args.null_string)?;
93
+ kwargs.aset(Symbol::new("buffer_size"), args.buffer_size)?;
94
+ kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
95
+
96
+ let enumerator = args
97
+ .rb_self
98
+ .enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
99
+ Ok(Yield::Enumerator(enumerator))
235
100
  }
data/ext/osv/src/utils.rs CHANGED
@@ -1,20 +1,123 @@
1
1
  use magnus::{
2
2
  scan_args::{get_kwargs, scan_args},
3
- Error, Value,
3
+ value::ReprValue,
4
+ Error, RString, Ruby, Symbol, Value,
4
5
  };
5
6
 
7
+ #[derive(Debug)]
8
+ pub struct CsvArgs {
9
+ pub to_read: Value,
10
+ pub has_headers: bool,
11
+ pub delimiter: u8,
12
+ pub quote_char: u8,
13
+ pub null_string: String,
14
+ pub buffer_size: usize,
15
+ pub result_type: String,
16
+ }
17
+
6
18
  /// Parse common arguments for CSV parsing
7
- pub fn parse_csv_args(args: &[Value]) -> Result<(Value, bool, Option<String>), Error> {
19
+ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
8
20
  let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
9
21
  let (to_read,) = parsed_args.required;
10
22
 
11
- let kwargs = get_kwargs::<_, (), (Option<bool>, Option<String>), ()>(
23
+ let kwargs = get_kwargs::<
24
+ _,
25
+ (),
26
+ (
27
+ Option<bool>,
28
+ Option<String>,
29
+ Option<String>,
30
+ Option<String>,
31
+ Option<usize>,
32
+ Option<Value>,
33
+ ),
34
+ (),
35
+ >(
12
36
  parsed_args.keywords,
13
37
  &[],
14
- &["has_headers", "delimiter"],
38
+ &[
39
+ "has_headers",
40
+ "col_sep",
41
+ "quote_char",
42
+ "nil_string",
43
+ "buffer_size",
44
+ "result_type",
45
+ ],
15
46
  )?;
16
47
 
17
48
  let has_headers = kwargs.optional.0.unwrap_or(true);
18
49
 
19
- Ok((to_read, has_headers, kwargs.optional.1))
50
+ let delimiter = *kwargs
51
+ .optional
52
+ .1
53
+ .unwrap_or_else(|| ",".to_string())
54
+ .as_bytes()
55
+ .first()
56
+ .ok_or_else(|| {
57
+ Error::new(
58
+ magnus::exception::runtime_error(),
59
+ "Delimiter cannot be empty",
60
+ )
61
+ })?;
62
+
63
+ let quote_char = *kwargs
64
+ .optional
65
+ .2
66
+ .unwrap_or_else(|| "\"".to_string())
67
+ .as_bytes()
68
+ .first()
69
+ .ok_or_else(|| {
70
+ Error::new(
71
+ magnus::exception::runtime_error(),
72
+ "Quote character cannot be empty",
73
+ )
74
+ })?;
75
+
76
+ let null_string = kwargs.optional.3.unwrap_or_else(|| "".to_string());
77
+
78
+ let buffer_size = kwargs.optional.4.unwrap_or(1000);
79
+
80
+ let result_type = match kwargs.optional.5 {
81
+ Some(value) => {
82
+ let parsed = if value.is_kind_of(ruby.class_string()) {
83
+ RString::from_value(value)
84
+ .ok_or_else(|| {
85
+ Error::new(magnus::exception::type_error(), "Invalid string value")
86
+ })?
87
+ .to_string()?
88
+ } else if value.is_kind_of(ruby.class_symbol()) {
89
+ Symbol::from_value(value)
90
+ .ok_or_else(|| {
91
+ Error::new(magnus::exception::type_error(), "Invalid symbol value")
92
+ })?
93
+ .funcall("to_s", ())?
94
+ } else {
95
+ return Err(Error::new(
96
+ magnus::exception::type_error(),
97
+ "result_type must be a String or Symbol",
98
+ ));
99
+ };
100
+
101
+ match parsed.as_str() {
102
+ "hash" | "array" => parsed,
103
+ _ => {
104
+ return Err(Error::new(
105
+ magnus::exception::runtime_error(),
106
+ "result_type must be either 'hash' or 'array'",
107
+ ))
108
+ }
109
+ }
110
+ }
111
+ None => String::from("hash"),
112
+ };
113
+
114
+ Ok(CsvArgs {
115
+ to_read,
116
+ has_headers,
117
+ delimiter,
118
+ quote_char,
119
+ null_string,
120
+ buffer_size,
121
+ result_type,
122
+ })
20
123
  }
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.2.1"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/osv.rbi CHANGED
@@ -2,28 +2,26 @@
2
2
 
3
3
  module OSV
4
4
  sig do
5
- type_parameters(:T)
6
- .params(
7
- input: T.any(String, StringIO, IO),
8
- has_headers: T.nilable(T::Boolean),
9
- delimiter: T.nilable(String),
10
- blk: T.proc.params(row: T::Hash[String, String]).void
11
- )
12
- .returns(T.untyped)
5
+ params(
6
+ input: T.any(String, StringIO, IO),
7
+ has_headers: T.nilable(T::Boolean),
8
+ col_sep: T.nilable(String),
9
+ quote_char: T.nilable(String),
10
+ nil_string: T.nilable(String),
11
+ buffer_size: T.nilable(Integer),
12
+ result_type: T.nilable(String),
13
+ blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.nilable(String)], T::Array[T.nilable(String)])).void)
14
+ ).returns(T.any(Enumerator, T.untyped))
13
15
  end
14
- def self.for_each(input, has_headers: true, delimiter: nil, &blk)
15
- end
16
-
17
- sig do
18
- type_parameters(:T)
19
- .params(
20
- input: T.any(String, StringIO, IO),
21
- has_headers: T.nilable(T::Boolean),
22
- delimiter: T.nilable(String),
23
- blk: T.proc.params(row: T::Array[String]).void
24
- )
25
- .returns(T.untyped)
26
- end
27
- def self.for_each_compat(input, has_headers: true, delimiter: nil, &blk)
16
+ def self.for_each(
17
+ input,
18
+ has_headers: true,
19
+ col_sep: nil,
20
+ quote_char: nil,
21
+ nil_string: nil,
22
+ buffer_size: nil,
23
+ result_type: nil,
24
+ &blk
25
+ )
28
26
  end
29
27
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
@@ -55,6 +55,11 @@ files:
55
55
  - ext/osv/Cargo.lock
56
56
  - ext/osv/Cargo.toml
57
57
  - ext/osv/extconf.rb
58
+ - ext/osv/src/csv/builder.rs
59
+ - ext/osv/src/csv/mod.rs
60
+ - ext/osv/src/csv/parser.rs
61
+ - ext/osv/src/csv/reader.rs
62
+ - ext/osv/src/csv/record.rs
58
63
  - ext/osv/src/lib.rs
59
64
  - ext/osv/src/reader.rs
60
65
  - ext/osv/src/utils.rs