osv 0.3.9 → 0.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +52 -43
- data/Rakefile +7 -0
- data/ext/osv/src/csv/builder.rs +9 -0
- data/ext/osv/src/reader.rs +12 -0
- data/ext/osv/src/utils.rs +72 -28
- data/lib/osv/version.rb +1 -1
- data/lib/osv.rbi +7 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 593a3c94b6ec0366399485444f4d5d6c45b04393cad30686e04b43f289b93d88
|
4
|
+
data.tar.gz: 9ae3ce1ff2655d65c726680a412336e853f3fa0328756b5dea69186b2c82654b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a24c9b58b291d934ae6526e88baddebd768ad4ad431bfab8353b84d4e9f284e954aa2dd80b11138c041613702d56927f3fc4819b9fdcda2a22b5a590ba5459fa
|
7
|
+
data.tar.gz: 0a85ef04fc18b4ef680d669c4fb06310b0cd3a0eb2c6c3c5a0a9161bc5af8b8851bf7ca3c1cb08f3fc6fa2d96449e112b86d0bc47871cbd10cae5ebd332edfb4
|
data/README.md
CHANGED
@@ -30,38 +30,68 @@ gem install osv
|
|
30
30
|
|
31
31
|
## Usage
|
32
32
|
|
33
|
-
###
|
34
|
-
|
35
|
-
Each row is returned as a hash where the keys are the column headers:
|
33
|
+
### Reading CSV Files
|
36
34
|
|
37
35
|
```ruby
|
38
36
|
require 'osv'
|
39
37
|
|
40
|
-
#
|
41
|
-
OSV.for_each("
|
42
|
-
|
43
|
-
puts row["
|
38
|
+
# Basic usage - each row as a hash
|
39
|
+
OSV.for_each("data.csv") do |row|
|
40
|
+
puts row["name"] # => "John"
|
41
|
+
puts row["age"] # => "25"
|
44
42
|
end
|
45
43
|
|
46
|
-
#
|
47
|
-
rows = OSV.for_each("
|
44
|
+
# Return an enumerator instead of using a block
|
45
|
+
rows = OSV.for_each("data.csv")
|
48
46
|
rows.each { |row| puts row["name"] }
|
49
|
-
```
|
50
47
|
|
51
|
-
|
48
|
+
# High-performance array mode
|
49
|
+
OSV.for_each("data.csv", result_type: :array) do |row|
|
50
|
+
puts row[0] # First column
|
51
|
+
puts row[1] # Second column
|
52
|
+
end
|
53
|
+
```
|
52
54
|
|
53
|
-
|
55
|
+
### Input Sources
|
54
56
|
|
55
57
|
```ruby
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
58
|
+
# From a file path
|
59
|
+
OSV.for_each("data.csv") { |row| puts row["name"] }
|
60
|
+
|
61
|
+
# From a file path
|
62
|
+
OSV.for_each("data.csv.gz") { |row| puts row["name"] }
|
63
|
+
|
64
|
+
# From an IO object
|
65
|
+
File.open("data.csv") { |file| OSV.for_each(file) { |row| puts row["name"] } }
|
66
|
+
|
67
|
+
# From a string
|
68
|
+
data = StringIO.new("name,age\nJohn,25")
|
69
|
+
OSV.for_each(data) { |row| puts row["name"] }
|
60
70
|
```
|
61
71
|
|
62
|
-
### Options
|
72
|
+
### Configuration Options
|
63
73
|
|
64
|
-
|
74
|
+
```ruby
|
75
|
+
OSV.for_each("data.csv",
|
76
|
+
# Input formatting
|
77
|
+
has_headers: true, # First row contains headers (default: true)
|
78
|
+
col_sep: ",", # Column separator (default: ",")
|
79
|
+
quote_char: '"', # Quote character (default: '"')
|
80
|
+
|
81
|
+
# Output formatting
|
82
|
+
result_type: :hash, # :hash or :array (hash is default)
|
83
|
+
nil_string: nil, # String to interpret as nil when parsing (default: nil)
|
84
|
+
|
85
|
+
# Parsing behavior
|
86
|
+
flexible: false, # Allow varying number of fields (default: false)
|
87
|
+
flexible_default: nil, # Default value for missing fields. If unset, we ignore missing fields.
|
88
|
+
# Implicitly enables flexible mode if set.
|
89
|
+
trim: :all, # Whether to trim whitespace. Options are :all, :headers, or :fields (default: nil)
|
90
|
+
buffer_size: 1024, # Number of rows to buffer in memory (default: 1024)
|
91
|
+
)
|
92
|
+
```
|
93
|
+
|
94
|
+
#### Available Options
|
65
95
|
|
66
96
|
- `has_headers`: Boolean indicating if the first row contains headers (default: true)
|
67
97
|
- `col_sep`: String specifying the field separator (default: ",")
|
@@ -69,34 +99,13 @@ Both methods support the following options:
|
|
69
99
|
- `nil_string`: String that should be interpreted as nil
|
70
100
|
- by default, empty strings are interpreted as empty strings
|
71
101
|
- if you want to interpret empty strings as nil, set this to an empty string
|
72
|
-
- `buffer_size`: Integer specifying the
|
73
|
-
- `result_type`: String specifying the output format ("hash" or "array")
|
102
|
+
- `buffer_size`: Integer specifying the number of rows to buffer in memory (default: 1024)
|
103
|
+
- `result_type`: String specifying the output format ("hash" or "array" or :hash or :array)
|
74
104
|
- `flexible`: Boolean specifying if the parser should be flexible (default: false)
|
75
105
|
- `flexible_default`: String specifying the default value for missing fields. Implicitly enables flexible mode if set. (default: `nil`)
|
106
|
+
- `trim`: String specifying the trim mode ("all" or "headers" or "fields" or :all or :headers or :fields)
|
76
107
|
|
77
|
-
|
78
|
-
|
79
|
-
OSV supports reading from:
|
80
|
-
|
81
|
-
- File paths (as strings)
|
82
|
-
- IO objects
|
83
|
-
- Important caveat: the IO object must respond to `rb_io_descriptor` with a file descriptor.
|
84
|
-
- StringIO objects
|
85
|
-
- Note: when you do this, the string is read (in full) into a Rust string, and we parse it there.
|
86
|
-
|
87
|
-
```ruby
|
88
|
-
# From file path
|
89
|
-
OSV.for_each("path/to/file.csv") { |row| puts row["name"] }
|
90
|
-
|
91
|
-
# From IO object
|
92
|
-
File.open("path/to/file.csv") do |file|
|
93
|
-
OSV.for_each(file) { |row| puts row["name"] }
|
94
|
-
end
|
95
|
-
|
96
|
-
# From StringIO
|
97
|
-
data = StringIO.new("name,age\nJohn,25")
|
98
|
-
OSV.for_each(data) { |row| puts row["name"] }
|
99
|
-
```
|
108
|
+
When `has_headers` is false, hash keys will be generated as `"c0"`, `"c1"`, etc.
|
100
109
|
|
101
110
|
## Requirements
|
102
111
|
|
data/Rakefile
CHANGED
@@ -20,3 +20,10 @@ Rake::TestTask.new do |t|
|
|
20
20
|
t.libs << "lib"
|
21
21
|
t.libs << "test"
|
22
22
|
end
|
23
|
+
|
24
|
+
task :release do
|
25
|
+
sh "bundle exec rake test"
|
26
|
+
sh "mkdir -p pkg"
|
27
|
+
sh "gem build osv.gemspec -o pkg/osv-#{OSV::VERSION}.gem"
|
28
|
+
sh "gem push pkg/osv-#{OSV::VERSION}.gem"
|
29
|
+
end
|
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -59,6 +59,7 @@ pub struct RecordReaderBuilder<'a, T: RecordParser + Send + 'static> {
|
|
59
59
|
buffer: usize,
|
60
60
|
flexible: bool,
|
61
61
|
flexible_default: Option<String>,
|
62
|
+
trim: csv::Trim,
|
62
63
|
_phantom: PhantomData<T>,
|
63
64
|
}
|
64
65
|
|
@@ -74,6 +75,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
74
75
|
buffer: BUFFER_CHANNEL_SIZE,
|
75
76
|
flexible: false,
|
76
77
|
flexible_default: None,
|
78
|
+
trim: csv::Trim::None,
|
77
79
|
_phantom: PhantomData,
|
78
80
|
}
|
79
81
|
}
|
@@ -113,6 +115,11 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
113
115
|
self
|
114
116
|
}
|
115
117
|
|
118
|
+
pub fn trim(mut self, trim: csv::Trim) -> Self {
|
119
|
+
self.trim = trim;
|
120
|
+
self
|
121
|
+
}
|
122
|
+
|
116
123
|
fn handle_string_io(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
|
117
124
|
let string: RString = self.to_read.funcall("string", ())?;
|
118
125
|
let content = string.to_string()?;
|
@@ -200,6 +207,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
200
207
|
.delimiter(self.delimiter)
|
201
208
|
.quote(self.quote_char)
|
202
209
|
.flexible(flexible)
|
210
|
+
.trim(self.trim)
|
203
211
|
.from_reader(readable);
|
204
212
|
|
205
213
|
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
@@ -248,6 +256,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
248
256
|
.delimiter(self.delimiter)
|
249
257
|
.quote(self.quote_char)
|
250
258
|
.flexible(flexible)
|
259
|
+
.trim(self.trim)
|
251
260
|
.from_reader(readable);
|
252
261
|
|
253
262
|
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
data/ext/osv/src/reader.rs
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
use crate::csv::{CsvRecord, RecordReaderBuilder};
|
2
2
|
use crate::utils::*;
|
3
|
+
use csv::Trim;
|
3
4
|
use magnus::value::ReprValue;
|
4
5
|
use magnus::{block::Yield, Error, KwArgs, RHash, Ruby, Symbol, Value};
|
5
6
|
use std::collections::HashMap;
|
@@ -21,6 +22,7 @@ pub fn parse_csv(
|
|
21
22
|
result_type,
|
22
23
|
flexible,
|
23
24
|
flexible_default,
|
25
|
+
trim,
|
24
26
|
} = parse_csv_args(&ruby, args)?;
|
25
27
|
|
26
28
|
if !ruby.block_given() {
|
@@ -35,6 +37,12 @@ pub fn parse_csv(
|
|
35
37
|
result_type,
|
36
38
|
flexible,
|
37
39
|
flexible_default,
|
40
|
+
trim: match trim {
|
41
|
+
Trim::All => Some("all".to_string()),
|
42
|
+
Trim::Headers => Some("headers".to_string()),
|
43
|
+
Trim::Fields => Some("fields".to_string()),
|
44
|
+
_ => None,
|
45
|
+
},
|
38
46
|
});
|
39
47
|
}
|
40
48
|
|
@@ -46,6 +54,7 @@ pub fn parse_csv(
|
|
46
54
|
.has_headers(has_headers)
|
47
55
|
.flexible(flexible)
|
48
56
|
.flexible_default(flexible_default)
|
57
|
+
.trim(trim)
|
49
58
|
.delimiter(delimiter)
|
50
59
|
.quote_char(quote_char)
|
51
60
|
.null_string(null_string)
|
@@ -58,6 +67,7 @@ pub fn parse_csv(
|
|
58
67
|
.has_headers(has_headers)
|
59
68
|
.flexible(flexible)
|
60
69
|
.flexible_default(flexible_default)
|
70
|
+
.trim(trim)
|
61
71
|
.delimiter(delimiter)
|
62
72
|
.quote_char(quote_char)
|
63
73
|
.null_string(null_string)
|
@@ -87,6 +97,7 @@ struct EnumeratorArgs {
|
|
87
97
|
result_type: String,
|
88
98
|
flexible: bool,
|
89
99
|
flexible_default: Option<String>,
|
100
|
+
trim: Option<String>,
|
90
101
|
}
|
91
102
|
|
92
103
|
fn create_enumerator(
|
@@ -107,6 +118,7 @@ fn create_enumerator(
|
|
107
118
|
kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
|
108
119
|
kwargs.aset(Symbol::new("flexible"), args.flexible)?;
|
109
120
|
kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
|
121
|
+
kwargs.aset(Symbol::new("trim"), args.trim.map(Symbol::new))?;
|
110
122
|
let enumerator = args
|
111
123
|
.rb_self
|
112
124
|
.enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
|
data/ext/osv/src/utils.rs
CHANGED
@@ -6,6 +6,27 @@ use magnus::{
|
|
6
6
|
|
7
7
|
use crate::csv::BUFFER_CHANNEL_SIZE;
|
8
8
|
|
9
|
+
fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String>, Error> {
|
10
|
+
if value.is_nil() {
|
11
|
+
Ok(None)
|
12
|
+
} else if value.is_kind_of(ruby.class_string()) {
|
13
|
+
RString::from_value(value)
|
14
|
+
.ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid string value"))?
|
15
|
+
.to_string()
|
16
|
+
.map(|s| Some(s))
|
17
|
+
} else if value.is_kind_of(ruby.class_symbol()) {
|
18
|
+
Symbol::from_value(value)
|
19
|
+
.ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid symbol value"))?
|
20
|
+
.funcall("to_s", ())
|
21
|
+
.map(|s| Some(s))
|
22
|
+
} else {
|
23
|
+
Err(Error::new(
|
24
|
+
magnus::exception::type_error(),
|
25
|
+
"Value must be a String or Symbol",
|
26
|
+
))
|
27
|
+
}
|
28
|
+
}
|
29
|
+
|
9
30
|
#[derive(Debug)]
|
10
31
|
pub struct CsvArgs {
|
11
32
|
pub to_read: Value,
|
@@ -17,6 +38,7 @@ pub struct CsvArgs {
|
|
17
38
|
pub result_type: String,
|
18
39
|
pub flexible: bool,
|
19
40
|
pub flexible_default: Option<String>,
|
41
|
+
pub trim: csv::Trim,
|
20
42
|
}
|
21
43
|
|
22
44
|
/// Parse common arguments for CSV parsing
|
@@ -36,6 +58,7 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
36
58
|
Option<Value>,
|
37
59
|
Option<bool>,
|
38
60
|
Option<Option<String>>,
|
61
|
+
Option<Value>,
|
39
62
|
),
|
40
63
|
(),
|
41
64
|
>(
|
@@ -50,6 +73,7 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
50
73
|
"result_type",
|
51
74
|
"flexible",
|
52
75
|
"flexible_default",
|
76
|
+
"trim",
|
53
77
|
],
|
54
78
|
)?;
|
55
79
|
|
@@ -85,36 +109,26 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
85
109
|
|
86
110
|
let buffer_size = kwargs.optional.4.unwrap_or(BUFFER_CHANNEL_SIZE);
|
87
111
|
|
88
|
-
let result_type = match kwargs
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
} else if value.is_kind_of(ruby.class_symbol()) {
|
97
|
-
Symbol::from_value(value)
|
98
|
-
.ok_or_else(|| {
|
99
|
-
Error::new(magnus::exception::type_error(), "Invalid symbol value")
|
100
|
-
})?
|
101
|
-
.funcall("to_s", ())?
|
102
|
-
} else {
|
112
|
+
let result_type = match kwargs
|
113
|
+
.optional
|
114
|
+
.5
|
115
|
+
.map(|value| parse_string_or_symbol(ruby, value))
|
116
|
+
{
|
117
|
+
Some(Ok(Some(parsed))) => match parsed.as_str() {
|
118
|
+
"hash" | "array" => parsed,
|
119
|
+
_ => {
|
103
120
|
return Err(Error::new(
|
104
|
-
magnus::exception::
|
105
|
-
"result_type must be
|
106
|
-
))
|
107
|
-
};
|
108
|
-
|
109
|
-
match parsed.as_str() {
|
110
|
-
"hash" | "array" => parsed,
|
111
|
-
_ => {
|
112
|
-
return Err(Error::new(
|
113
|
-
magnus::exception::runtime_error(),
|
114
|
-
"result_type must be either 'hash' or 'array'",
|
115
|
-
))
|
116
|
-
}
|
121
|
+
magnus::exception::runtime_error(),
|
122
|
+
"result_type must be either 'hash' or 'array'",
|
123
|
+
))
|
117
124
|
}
|
125
|
+
},
|
126
|
+
Some(Ok(None)) => String::from("hash"),
|
127
|
+
Some(Err(_)) => {
|
128
|
+
return Err(Error::new(
|
129
|
+
magnus::exception::type_error(),
|
130
|
+
"result_type must be a String or Symbol",
|
131
|
+
))
|
118
132
|
}
|
119
133
|
None => String::from("hash"),
|
120
134
|
};
|
@@ -123,6 +137,35 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
123
137
|
|
124
138
|
let flexible_default = kwargs.optional.7.unwrap_or_default();
|
125
139
|
|
140
|
+
let trim = match kwargs
|
141
|
+
.optional
|
142
|
+
.8
|
143
|
+
.map(|value| parse_string_or_symbol(ruby, value))
|
144
|
+
{
|
145
|
+
Some(Ok(Some(parsed))) => match parsed.as_str() {
|
146
|
+
"all" => csv::Trim::All,
|
147
|
+
"headers" => csv::Trim::Headers,
|
148
|
+
"fields" => csv::Trim::Fields,
|
149
|
+
invalid => {
|
150
|
+
return Err(Error::new(
|
151
|
+
magnus::exception::runtime_error(),
|
152
|
+
format!(
|
153
|
+
"trim must be either 'all', 'headers', or 'fields' but got '{}'",
|
154
|
+
invalid
|
155
|
+
),
|
156
|
+
))
|
157
|
+
}
|
158
|
+
},
|
159
|
+
Some(Ok(None)) => csv::Trim::None,
|
160
|
+
Some(Err(_)) => {
|
161
|
+
return Err(Error::new(
|
162
|
+
magnus::exception::type_error(),
|
163
|
+
"trim must be a String or Symbol",
|
164
|
+
))
|
165
|
+
}
|
166
|
+
None => csv::Trim::None,
|
167
|
+
};
|
168
|
+
|
126
169
|
Ok(CsvArgs {
|
127
170
|
to_read,
|
128
171
|
has_headers,
|
@@ -133,5 +176,6 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
133
176
|
result_type,
|
134
177
|
flexible,
|
135
178
|
flexible_default,
|
179
|
+
trim,
|
136
180
|
})
|
137
181
|
}
|
data/lib/osv/version.rb
CHANGED
data/lib/osv.rbi
CHANGED
@@ -14,12 +14,15 @@ module OSV
|
|
14
14
|
# an empty string.
|
15
15
|
# - `buffer_size`: Integer specifying the read buffer size
|
16
16
|
# - `result_type`: String specifying the output format
|
17
|
-
# ("hash" or "array")
|
17
|
+
# ("hash" or "array" or :hash or :array)
|
18
18
|
# - `flexible`: Boolean specifying if the parser should be flexible
|
19
19
|
# (default: false)
|
20
20
|
# - `flexible_default`: String specifying the default value for missing fields.
|
21
21
|
# Implicitly enables flexible mode if set.
|
22
22
|
# (default: `nil`)
|
23
|
+
# - `trim`: String specifying the trim mode
|
24
|
+
# ("all" or "headers" or "fields" or :all or :headers or :fields)
|
25
|
+
# (default: `nil`)
|
23
26
|
sig do
|
24
27
|
params(
|
25
28
|
input: T.any(String, StringIO, IO),
|
@@ -28,9 +31,10 @@ module OSV
|
|
28
31
|
quote_char: T.nilable(String),
|
29
32
|
nil_string: T.nilable(String),
|
30
33
|
buffer_size: T.nilable(Integer),
|
31
|
-
result_type: T.nilable(String),
|
34
|
+
result_type: T.nilable(T.any(String, Symbol)),
|
32
35
|
flexible: T.nilable(T::Boolean),
|
33
36
|
flexible_default: T.nilable(String),
|
37
|
+
trim: T.nilable(T.any(String, Symbol)),
|
34
38
|
blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.nilable(String)], T::Array[T.nilable(String)])).void)
|
35
39
|
).returns(T.any(Enumerator, T.untyped))
|
36
40
|
end
|
@@ -44,6 +48,7 @@ module OSV
|
|
44
48
|
result_type: nil,
|
45
49
|
flexible: nil,
|
46
50
|
flexible_default: nil,
|
51
|
+
trim: nil,
|
47
52
|
&blk
|
48
53
|
)
|
49
54
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
@@ -69,7 +69,7 @@ files:
|
|
69
69
|
- lib/osv/version.rb
|
70
70
|
homepage: https://github.com/njaremko/osv
|
71
71
|
licenses:
|
72
|
-
-
|
72
|
+
- MIT
|
73
73
|
metadata:
|
74
74
|
homepage_uri: https://github.com/njaremko/osv
|
75
75
|
source_code_uri: https://github.com/njaremko/osv
|