osv 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/ext/osv/src/csv/builder.rs +26 -1
- data/ext/osv/src/csv/parser.rs +28 -23
- data/ext/osv/src/csv/read_impl.rs +9 -1
- data/ext/osv/src/reader.rs +12 -1
- data/ext/osv/src/utils.rs +12 -0
- data/lib/osv/version.rb +1 -1
- data/lib/osv.rbi +23 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 02205de8cef4d5f7633c06720a9e925a2b608116354da4a1678d4746d2197d23
|
4
|
+
data.tar.gz: 3e1d63323fdaad1b6a60e0a0a63801f98710615d6616c882f0cdce00e36c6e2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df6a4a4b86c41010ea671ac0e98c2ee6307e62ceff35dab125868f0ee7edb6d14984348ecd4ac9f913489e5a6be0b364240b461334554385aabe5b3374fe798d
|
7
|
+
data.tar.gz: d931b888ce9d0ad1cdb1fa3d0be8cd0e526292206742f5adde718f414e9feca97eff3af6d4139d144c18a50e4807650ea9f7582153bcee80cea1e6ed4ce4ef49
|
data/README.md
CHANGED
@@ -71,6 +71,8 @@ Both methods support the following options:
|
|
71
71
|
- if you want to interpret empty strings as nil, set this to an empty string
|
72
72
|
- `buffer_size`: Integer specifying the read buffer size
|
73
73
|
- `result_type`: String specifying the output format ("hash" or "array")
|
74
|
+
- `flexible`: Boolean specifying if the parser should be flexible (default: false)
|
75
|
+
- `flexible_default`: String specifying the default value for missing fields. Implicitly enables flexible mode if set. (default: `nil`)
|
74
76
|
|
75
77
|
### Input Sources
|
76
78
|
|
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -54,6 +54,8 @@ pub struct RecordReaderBuilder<'a, T: RecordParser + Send + 'static> {
|
|
54
54
|
quote_char: u8,
|
55
55
|
null_string: Option<String>,
|
56
56
|
buffer: usize,
|
57
|
+
flexible: bool,
|
58
|
+
flexible_default: Option<String>,
|
57
59
|
_phantom: PhantomData<T>,
|
58
60
|
}
|
59
61
|
|
@@ -67,6 +69,8 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
67
69
|
quote_char: b'"',
|
68
70
|
null_string: None,
|
69
71
|
buffer: 1000,
|
72
|
+
flexible: false,
|
73
|
+
flexible_default: None,
|
70
74
|
_phantom: PhantomData,
|
71
75
|
}
|
72
76
|
}
|
@@ -96,6 +100,16 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
96
100
|
self
|
97
101
|
}
|
98
102
|
|
103
|
+
pub fn flexible(mut self, flexible: bool) -> Self {
|
104
|
+
self.flexible = flexible;
|
105
|
+
self
|
106
|
+
}
|
107
|
+
|
108
|
+
pub fn flexible_default(mut self, flexible_default: Option<String>) -> Self {
|
109
|
+
self.flexible_default = flexible_default;
|
110
|
+
self
|
111
|
+
}
|
112
|
+
|
99
113
|
fn handle_string_io(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
|
100
114
|
let string: RString = self.to_read.funcall("string", ())?;
|
101
115
|
let content = string.to_string()?;
|
@@ -173,10 +187,12 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
173
187
|
self,
|
174
188
|
readable: Box<dyn Read + Send + 'static>,
|
175
189
|
) -> Result<RecordReader<T>, ReaderError> {
|
190
|
+
let flexible = self.flexible || self.flexible_default.is_some();
|
176
191
|
let mut reader = csv::ReaderBuilder::new()
|
177
192
|
.has_headers(self.has_headers)
|
178
193
|
.delimiter(self.delimiter)
|
179
194
|
.quote(self.quote_char)
|
195
|
+
.flexible(flexible)
|
180
196
|
.from_reader(readable);
|
181
197
|
|
182
198
|
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
@@ -186,10 +202,16 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
186
202
|
let (sender, receiver) = kanal::bounded(self.buffer);
|
187
203
|
let null_string = self.null_string.clone();
|
188
204
|
|
205
|
+
let flexible_default = self.flexible_default.clone();
|
189
206
|
let handle = thread::spawn(move || {
|
190
207
|
let mut record = csv::StringRecord::new();
|
191
208
|
while let Ok(true) = reader.read_record(&mut record) {
|
192
|
-
let row = T::parse(
|
209
|
+
let row = T::parse(
|
210
|
+
&static_headers,
|
211
|
+
&record,
|
212
|
+
null_string.as_deref(),
|
213
|
+
flexible_default.as_deref(),
|
214
|
+
);
|
193
215
|
if sender.send(row).is_err() {
|
194
216
|
break;
|
195
217
|
}
|
@@ -211,10 +233,12 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
211
233
|
self,
|
212
234
|
readable: Box<dyn Read>,
|
213
235
|
) -> Result<RecordReader<T>, ReaderError> {
|
236
|
+
let flexible = self.flexible || self.flexible_default.is_some();
|
214
237
|
let mut reader = csv::ReaderBuilder::new()
|
215
238
|
.has_headers(self.has_headers)
|
216
239
|
.delimiter(self.delimiter)
|
217
240
|
.quote(self.quote_char)
|
241
|
+
.flexible(flexible)
|
218
242
|
.from_reader(readable);
|
219
243
|
|
220
244
|
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
@@ -225,6 +249,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
225
249
|
reader,
|
226
250
|
headers: static_headers,
|
227
251
|
null_string: self.null_string,
|
252
|
+
flexible_default: self.flexible_default,
|
228
253
|
},
|
229
254
|
})
|
230
255
|
}
|
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -7,6 +7,7 @@ pub trait RecordParser {
|
|
7
7
|
headers: &[&'static str],
|
8
8
|
record: &csv::StringRecord,
|
9
9
|
null_string: Option<&str>,
|
10
|
+
flexible_default: Option<&str>,
|
10
11
|
) -> Self::Output;
|
11
12
|
}
|
12
13
|
|
@@ -18,26 +19,24 @@ impl RecordParser for HashMap<&'static str, Option<String>> {
|
|
18
19
|
headers: &[&'static str],
|
19
20
|
record: &csv::StringRecord,
|
20
21
|
null_string: Option<&str>,
|
22
|
+
flexible_default: Option<&str>,
|
21
23
|
) -> Self::Output {
|
22
24
|
let mut map = HashMap::with_capacity(headers.len());
|
23
|
-
headers
|
24
|
-
.
|
25
|
-
|
26
|
-
|
27
|
-
map.insert(
|
28
|
-
*header,
|
25
|
+
headers.iter().enumerate().for_each(|(i, header)| {
|
26
|
+
let value = record.get(i).map_or_else(
|
27
|
+
|| flexible_default.map(|s| s.to_string()),
|
28
|
+
|field| {
|
29
29
|
if null_string == Some(field) {
|
30
30
|
None
|
31
|
+
} else if field.is_empty() {
|
32
|
+
Some(String::new())
|
31
33
|
} else {
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
},
|
39
|
-
);
|
40
|
-
});
|
34
|
+
Some(field.to_string())
|
35
|
+
}
|
36
|
+
},
|
37
|
+
);
|
38
|
+
map.insert(*header, value);
|
39
|
+
});
|
41
40
|
map
|
42
41
|
}
|
43
42
|
}
|
@@ -47,23 +46,29 @@ impl RecordParser for Vec<Option<String>> {
|
|
47
46
|
|
48
47
|
#[inline]
|
49
48
|
fn parse(
|
50
|
-
|
49
|
+
headers: &[&'static str],
|
51
50
|
record: &csv::StringRecord,
|
52
51
|
null_string: Option<&str>,
|
52
|
+
flexible_default: Option<&str>,
|
53
53
|
) -> Self::Output {
|
54
|
-
let
|
54
|
+
let target_len = headers.len();
|
55
|
+
let mut vec = Vec::with_capacity(target_len);
|
55
56
|
vec.extend(record.iter().map(|field| {
|
56
57
|
if null_string == Some(field) {
|
57
58
|
None
|
59
|
+
} else if field.is_empty() {
|
60
|
+
Some(String::new())
|
58
61
|
} else {
|
59
|
-
|
60
|
-
if field.is_empty() {
|
61
|
-
Some(String::new())
|
62
|
-
} else {
|
63
|
-
Some(field.to_string())
|
64
|
-
}
|
62
|
+
Some(field.to_string())
|
65
63
|
}
|
66
64
|
}));
|
65
|
+
|
66
|
+
// Fill remaining slots with flexible_default if needed
|
67
|
+
if let Some(default) = flexible_default {
|
68
|
+
while vec.len() < target_len {
|
69
|
+
vec.push(Some(default.to_string()));
|
70
|
+
}
|
71
|
+
}
|
67
72
|
vec
|
68
73
|
}
|
69
74
|
}
|
@@ -6,6 +6,7 @@ pub enum ReadImpl<T: RecordParser> {
|
|
6
6
|
reader: csv::Reader<Box<dyn Read>>,
|
7
7
|
headers: Vec<&'static str>,
|
8
8
|
null_string: Option<String>,
|
9
|
+
flexible_default: Option<String>,
|
9
10
|
},
|
10
11
|
MultiThreaded {
|
11
12
|
headers: Vec<&'static str>,
|
@@ -33,10 +34,16 @@ impl<T: RecordParser> ReadImpl<T> {
|
|
33
34
|
reader,
|
34
35
|
headers,
|
35
36
|
null_string,
|
37
|
+
flexible_default,
|
36
38
|
} => {
|
37
39
|
let mut record = csv::StringRecord::new();
|
38
40
|
match reader.read_record(&mut record) {
|
39
|
-
Ok(true) => Some(T::parse(
|
41
|
+
Ok(true) => Some(T::parse(
|
42
|
+
headers,
|
43
|
+
&record,
|
44
|
+
null_string.as_deref(),
|
45
|
+
flexible_default.as_deref(),
|
46
|
+
)),
|
40
47
|
_ => None,
|
41
48
|
}
|
42
49
|
}
|
@@ -50,6 +57,7 @@ impl<T: RecordParser> ReadImpl<T> {
|
|
50
57
|
receiver,
|
51
58
|
handle,
|
52
59
|
headers,
|
60
|
+
..
|
53
61
|
} => {
|
54
62
|
receiver.close();
|
55
63
|
if let Some(handle) = handle.take() {
|
data/ext/osv/src/reader.rs
CHANGED
@@ -18,6 +18,8 @@ pub fn parse_csv(
|
|
18
18
|
null_string,
|
19
19
|
buffer_size,
|
20
20
|
result_type,
|
21
|
+
flexible,
|
22
|
+
flexible_default,
|
21
23
|
} = parse_csv_args(&ruby, args)?;
|
22
24
|
|
23
25
|
if !ruby.block_given() {
|
@@ -30,6 +32,8 @@ pub fn parse_csv(
|
|
30
32
|
null_string,
|
31
33
|
buffer_size,
|
32
34
|
result_type,
|
35
|
+
flexible,
|
36
|
+
flexible_default,
|
33
37
|
});
|
34
38
|
}
|
35
39
|
|
@@ -37,6 +41,8 @@ pub fn parse_csv(
|
|
37
41
|
"hash" => Box::new(
|
38
42
|
RecordReaderBuilder::<HashMap<&'static str, Option<String>>>::new(&ruby, to_read)
|
39
43
|
.has_headers(has_headers)
|
44
|
+
.flexible(flexible)
|
45
|
+
.flexible_default(flexible_default)
|
40
46
|
.delimiter(delimiter)
|
41
47
|
.quote_char(quote_char)
|
42
48
|
.null_string(null_string)
|
@@ -47,6 +53,8 @@ pub fn parse_csv(
|
|
47
53
|
"array" => Box::new(
|
48
54
|
RecordReaderBuilder::<Vec<Option<String>>>::new(&ruby, to_read)
|
49
55
|
.has_headers(has_headers)
|
56
|
+
.flexible(flexible)
|
57
|
+
.flexible_default(flexible_default)
|
50
58
|
.delimiter(delimiter)
|
51
59
|
.quote_char(quote_char)
|
52
60
|
.null_string(null_string)
|
@@ -74,6 +82,8 @@ struct EnumeratorArgs {
|
|
74
82
|
null_string: Option<String>,
|
75
83
|
buffer_size: usize,
|
76
84
|
result_type: String,
|
85
|
+
flexible: bool,
|
86
|
+
flexible_default: Option<String>,
|
77
87
|
}
|
78
88
|
|
79
89
|
fn create_enumerator(
|
@@ -92,7 +102,8 @@ fn create_enumerator(
|
|
92
102
|
kwargs.aset(Symbol::new("nil_string"), args.null_string)?;
|
93
103
|
kwargs.aset(Symbol::new("buffer_size"), args.buffer_size)?;
|
94
104
|
kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
|
95
|
-
|
105
|
+
kwargs.aset(Symbol::new("flexible"), args.flexible)?;
|
106
|
+
kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
|
96
107
|
let enumerator = args
|
97
108
|
.rb_self
|
98
109
|
.enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
|
data/ext/osv/src/utils.rs
CHANGED
@@ -13,6 +13,8 @@ pub struct CsvArgs {
|
|
13
13
|
pub null_string: Option<String>,
|
14
14
|
pub buffer_size: usize,
|
15
15
|
pub result_type: String,
|
16
|
+
pub flexible: bool,
|
17
|
+
pub flexible_default: Option<String>,
|
16
18
|
}
|
17
19
|
|
18
20
|
/// Parse common arguments for CSV parsing
|
@@ -30,6 +32,8 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
30
32
|
Option<Option<String>>,
|
31
33
|
Option<usize>,
|
32
34
|
Option<Value>,
|
35
|
+
Option<bool>,
|
36
|
+
Option<Option<String>>,
|
33
37
|
),
|
34
38
|
(),
|
35
39
|
>(
|
@@ -42,6 +46,8 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
42
46
|
"nil_string",
|
43
47
|
"buffer_size",
|
44
48
|
"result_type",
|
49
|
+
"flexible",
|
50
|
+
"flexible_default",
|
45
51
|
],
|
46
52
|
)?;
|
47
53
|
|
@@ -111,6 +117,10 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
111
117
|
None => String::from("hash"),
|
112
118
|
};
|
113
119
|
|
120
|
+
let flexible = kwargs.optional.6.unwrap_or_default();
|
121
|
+
|
122
|
+
let flexible_default = kwargs.optional.7.unwrap_or_default();
|
123
|
+
|
114
124
|
Ok(CsvArgs {
|
115
125
|
to_read,
|
116
126
|
has_headers,
|
@@ -119,5 +129,7 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
119
129
|
null_string,
|
120
130
|
buffer_size,
|
121
131
|
result_type,
|
132
|
+
flexible,
|
133
|
+
flexible_default,
|
122
134
|
})
|
123
135
|
}
|
data/lib/osv/version.rb
CHANGED
data/lib/osv.rbi
CHANGED
@@ -1,6 +1,25 @@
|
|
1
1
|
# typed: strict
|
2
2
|
|
3
3
|
module OSV
|
4
|
+
# Options:
|
5
|
+
# - `has_headers`: Boolean indicating if the first row contains headers
|
6
|
+
# (default: true)
|
7
|
+
# - `col_sep`: String specifying the field separator
|
8
|
+
# (default: ",")
|
9
|
+
# - `quote_char`: String specifying the quote character
|
10
|
+
# (default: "\"")
|
11
|
+
# - `nil_string`: String that should be interpreted as nil
|
12
|
+
# By default, empty strings are interpreted as empty strings.
|
13
|
+
# If you want to interpret empty strings as nil, set this to
|
14
|
+
# an empty string.
|
15
|
+
# - `buffer_size`: Integer specifying the read buffer size
|
16
|
+
# - `result_type`: String specifying the output format
|
17
|
+
# ("hash" or "array")
|
18
|
+
# - `flexible`: Boolean specifying if the parser should be flexible
|
19
|
+
# (default: false)
|
20
|
+
# - `flexible_default`: String specifying the default value for missing fields.
|
21
|
+
# Implicitly enables flexible mode if set.
|
22
|
+
# (default: `nil`)
|
4
23
|
sig do
|
5
24
|
params(
|
6
25
|
input: T.any(String, StringIO, IO),
|
@@ -10,6 +29,8 @@ module OSV
|
|
10
29
|
nil_string: T.nilable(String),
|
11
30
|
buffer_size: T.nilable(Integer),
|
12
31
|
result_type: T.nilable(String),
|
32
|
+
flexible: T.nilable(T::Boolean),
|
33
|
+
flexible_default: T.nilable(String),
|
13
34
|
blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.nilable(String)], T::Array[T.nilable(String)])).void)
|
14
35
|
).returns(T.any(Enumerator, T.untyped))
|
15
36
|
end
|
@@ -21,6 +42,8 @@ module OSV
|
|
21
42
|
nil_string: nil,
|
22
43
|
buffer_size: nil,
|
23
44
|
result_type: nil,
|
45
|
+
flexible: nil,
|
46
|
+
flexible_default: nil,
|
24
47
|
&blk
|
25
48
|
)
|
26
49
|
end
|