osv 0.3.6 → 0.3.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/ext/osv/src/csv/builder.rs +26 -1
- data/ext/osv/src/csv/parser.rs +28 -23
- data/ext/osv/src/csv/read_impl.rs +9 -1
- data/ext/osv/src/reader.rs +12 -1
- data/ext/osv/src/utils.rs +12 -0
- data/lib/osv/version.rb +1 -1
- data/lib/osv.rbi +23 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 02205de8cef4d5f7633c06720a9e925a2b608116354da4a1678d4746d2197d23
|
4
|
+
data.tar.gz: 3e1d63323fdaad1b6a60e0a0a63801f98710615d6616c882f0cdce00e36c6e2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df6a4a4b86c41010ea671ac0e98c2ee6307e62ceff35dab125868f0ee7edb6d14984348ecd4ac9f913489e5a6be0b364240b461334554385aabe5b3374fe798d
|
7
|
+
data.tar.gz: d931b888ce9d0ad1cdb1fa3d0be8cd0e526292206742f5adde718f414e9feca97eff3af6d4139d144c18a50e4807650ea9f7582153bcee80cea1e6ed4ce4ef49
|
data/README.md
CHANGED
@@ -71,6 +71,8 @@ Both methods support the following options:
|
|
71
71
|
- if you want to interpret empty strings as nil, set this to an empty string
|
72
72
|
- `buffer_size`: Integer specifying the read buffer size
|
73
73
|
- `result_type`: String specifying the output format ("hash" or "array")
|
74
|
+
- `flexible`: Boolean specifying if the parser should be flexible (default: false)
|
75
|
+
- `flexible_default`: String specifying the default value for missing fields. Implicitly enables flexible mode if set. (default: `nil`)
|
74
76
|
|
75
77
|
### Input Sources
|
76
78
|
|
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -54,6 +54,8 @@ pub struct RecordReaderBuilder<'a, T: RecordParser + Send + 'static> {
|
|
54
54
|
quote_char: u8,
|
55
55
|
null_string: Option<String>,
|
56
56
|
buffer: usize,
|
57
|
+
flexible: bool,
|
58
|
+
flexible_default: Option<String>,
|
57
59
|
_phantom: PhantomData<T>,
|
58
60
|
}
|
59
61
|
|
@@ -67,6 +69,8 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
67
69
|
quote_char: b'"',
|
68
70
|
null_string: None,
|
69
71
|
buffer: 1000,
|
72
|
+
flexible: false,
|
73
|
+
flexible_default: None,
|
70
74
|
_phantom: PhantomData,
|
71
75
|
}
|
72
76
|
}
|
@@ -96,6 +100,16 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
96
100
|
self
|
97
101
|
}
|
98
102
|
|
103
|
+
pub fn flexible(mut self, flexible: bool) -> Self {
|
104
|
+
self.flexible = flexible;
|
105
|
+
self
|
106
|
+
}
|
107
|
+
|
108
|
+
pub fn flexible_default(mut self, flexible_default: Option<String>) -> Self {
|
109
|
+
self.flexible_default = flexible_default;
|
110
|
+
self
|
111
|
+
}
|
112
|
+
|
99
113
|
fn handle_string_io(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
|
100
114
|
let string: RString = self.to_read.funcall("string", ())?;
|
101
115
|
let content = string.to_string()?;
|
@@ -173,10 +187,12 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
173
187
|
self,
|
174
188
|
readable: Box<dyn Read + Send + 'static>,
|
175
189
|
) -> Result<RecordReader<T>, ReaderError> {
|
190
|
+
let flexible = self.flexible || self.flexible_default.is_some();
|
176
191
|
let mut reader = csv::ReaderBuilder::new()
|
177
192
|
.has_headers(self.has_headers)
|
178
193
|
.delimiter(self.delimiter)
|
179
194
|
.quote(self.quote_char)
|
195
|
+
.flexible(flexible)
|
180
196
|
.from_reader(readable);
|
181
197
|
|
182
198
|
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
@@ -186,10 +202,16 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
186
202
|
let (sender, receiver) = kanal::bounded(self.buffer);
|
187
203
|
let null_string = self.null_string.clone();
|
188
204
|
|
205
|
+
let flexible_default = self.flexible_default.clone();
|
189
206
|
let handle = thread::spawn(move || {
|
190
207
|
let mut record = csv::StringRecord::new();
|
191
208
|
while let Ok(true) = reader.read_record(&mut record) {
|
192
|
-
let row = T::parse(
|
209
|
+
let row = T::parse(
|
210
|
+
&static_headers,
|
211
|
+
&record,
|
212
|
+
null_string.as_deref(),
|
213
|
+
flexible_default.as_deref(),
|
214
|
+
);
|
193
215
|
if sender.send(row).is_err() {
|
194
216
|
break;
|
195
217
|
}
|
@@ -211,10 +233,12 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
211
233
|
self,
|
212
234
|
readable: Box<dyn Read>,
|
213
235
|
) -> Result<RecordReader<T>, ReaderError> {
|
236
|
+
let flexible = self.flexible || self.flexible_default.is_some();
|
214
237
|
let mut reader = csv::ReaderBuilder::new()
|
215
238
|
.has_headers(self.has_headers)
|
216
239
|
.delimiter(self.delimiter)
|
217
240
|
.quote(self.quote_char)
|
241
|
+
.flexible(flexible)
|
218
242
|
.from_reader(readable);
|
219
243
|
|
220
244
|
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
@@ -225,6 +249,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
225
249
|
reader,
|
226
250
|
headers: static_headers,
|
227
251
|
null_string: self.null_string,
|
252
|
+
flexible_default: self.flexible_default,
|
228
253
|
},
|
229
254
|
})
|
230
255
|
}
|
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -7,6 +7,7 @@ pub trait RecordParser {
|
|
7
7
|
headers: &[&'static str],
|
8
8
|
record: &csv::StringRecord,
|
9
9
|
null_string: Option<&str>,
|
10
|
+
flexible_default: Option<&str>,
|
10
11
|
) -> Self::Output;
|
11
12
|
}
|
12
13
|
|
@@ -18,26 +19,24 @@ impl RecordParser for HashMap<&'static str, Option<String>> {
|
|
18
19
|
headers: &[&'static str],
|
19
20
|
record: &csv::StringRecord,
|
20
21
|
null_string: Option<&str>,
|
22
|
+
flexible_default: Option<&str>,
|
21
23
|
) -> Self::Output {
|
22
24
|
let mut map = HashMap::with_capacity(headers.len());
|
23
|
-
headers
|
24
|
-
.
|
25
|
-
|
26
|
-
|
27
|
-
map.insert(
|
28
|
-
*header,
|
25
|
+
headers.iter().enumerate().for_each(|(i, header)| {
|
26
|
+
let value = record.get(i).map_or_else(
|
27
|
+
|| flexible_default.map(|s| s.to_string()),
|
28
|
+
|field| {
|
29
29
|
if null_string == Some(field) {
|
30
30
|
None
|
31
|
+
} else if field.is_empty() {
|
32
|
+
Some(String::new())
|
31
33
|
} else {
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
},
|
39
|
-
);
|
40
|
-
});
|
34
|
+
Some(field.to_string())
|
35
|
+
}
|
36
|
+
},
|
37
|
+
);
|
38
|
+
map.insert(*header, value);
|
39
|
+
});
|
41
40
|
map
|
42
41
|
}
|
43
42
|
}
|
@@ -47,23 +46,29 @@ impl RecordParser for Vec<Option<String>> {
|
|
47
46
|
|
48
47
|
#[inline]
|
49
48
|
fn parse(
|
50
|
-
|
49
|
+
headers: &[&'static str],
|
51
50
|
record: &csv::StringRecord,
|
52
51
|
null_string: Option<&str>,
|
52
|
+
flexible_default: Option<&str>,
|
53
53
|
) -> Self::Output {
|
54
|
-
let
|
54
|
+
let target_len = headers.len();
|
55
|
+
let mut vec = Vec::with_capacity(target_len);
|
55
56
|
vec.extend(record.iter().map(|field| {
|
56
57
|
if null_string == Some(field) {
|
57
58
|
None
|
59
|
+
} else if field.is_empty() {
|
60
|
+
Some(String::new())
|
58
61
|
} else {
|
59
|
-
|
60
|
-
if field.is_empty() {
|
61
|
-
Some(String::new())
|
62
|
-
} else {
|
63
|
-
Some(field.to_string())
|
64
|
-
}
|
62
|
+
Some(field.to_string())
|
65
63
|
}
|
66
64
|
}));
|
65
|
+
|
66
|
+
// Fill remaining slots with flexible_default if needed
|
67
|
+
if let Some(default) = flexible_default {
|
68
|
+
while vec.len() < target_len {
|
69
|
+
vec.push(Some(default.to_string()));
|
70
|
+
}
|
71
|
+
}
|
67
72
|
vec
|
68
73
|
}
|
69
74
|
}
|
@@ -6,6 +6,7 @@ pub enum ReadImpl<T: RecordParser> {
|
|
6
6
|
reader: csv::Reader<Box<dyn Read>>,
|
7
7
|
headers: Vec<&'static str>,
|
8
8
|
null_string: Option<String>,
|
9
|
+
flexible_default: Option<String>,
|
9
10
|
},
|
10
11
|
MultiThreaded {
|
11
12
|
headers: Vec<&'static str>,
|
@@ -33,10 +34,16 @@ impl<T: RecordParser> ReadImpl<T> {
|
|
33
34
|
reader,
|
34
35
|
headers,
|
35
36
|
null_string,
|
37
|
+
flexible_default,
|
36
38
|
} => {
|
37
39
|
let mut record = csv::StringRecord::new();
|
38
40
|
match reader.read_record(&mut record) {
|
39
|
-
Ok(true) => Some(T::parse(
|
41
|
+
Ok(true) => Some(T::parse(
|
42
|
+
headers,
|
43
|
+
&record,
|
44
|
+
null_string.as_deref(),
|
45
|
+
flexible_default.as_deref(),
|
46
|
+
)),
|
40
47
|
_ => None,
|
41
48
|
}
|
42
49
|
}
|
@@ -50,6 +57,7 @@ impl<T: RecordParser> ReadImpl<T> {
|
|
50
57
|
receiver,
|
51
58
|
handle,
|
52
59
|
headers,
|
60
|
+
..
|
53
61
|
} => {
|
54
62
|
receiver.close();
|
55
63
|
if let Some(handle) = handle.take() {
|
data/ext/osv/src/reader.rs
CHANGED
@@ -18,6 +18,8 @@ pub fn parse_csv(
|
|
18
18
|
null_string,
|
19
19
|
buffer_size,
|
20
20
|
result_type,
|
21
|
+
flexible,
|
22
|
+
flexible_default,
|
21
23
|
} = parse_csv_args(&ruby, args)?;
|
22
24
|
|
23
25
|
if !ruby.block_given() {
|
@@ -30,6 +32,8 @@ pub fn parse_csv(
|
|
30
32
|
null_string,
|
31
33
|
buffer_size,
|
32
34
|
result_type,
|
35
|
+
flexible,
|
36
|
+
flexible_default,
|
33
37
|
});
|
34
38
|
}
|
35
39
|
|
@@ -37,6 +41,8 @@ pub fn parse_csv(
|
|
37
41
|
"hash" => Box::new(
|
38
42
|
RecordReaderBuilder::<HashMap<&'static str, Option<String>>>::new(&ruby, to_read)
|
39
43
|
.has_headers(has_headers)
|
44
|
+
.flexible(flexible)
|
45
|
+
.flexible_default(flexible_default)
|
40
46
|
.delimiter(delimiter)
|
41
47
|
.quote_char(quote_char)
|
42
48
|
.null_string(null_string)
|
@@ -47,6 +53,8 @@ pub fn parse_csv(
|
|
47
53
|
"array" => Box::new(
|
48
54
|
RecordReaderBuilder::<Vec<Option<String>>>::new(&ruby, to_read)
|
49
55
|
.has_headers(has_headers)
|
56
|
+
.flexible(flexible)
|
57
|
+
.flexible_default(flexible_default)
|
50
58
|
.delimiter(delimiter)
|
51
59
|
.quote_char(quote_char)
|
52
60
|
.null_string(null_string)
|
@@ -74,6 +82,8 @@ struct EnumeratorArgs {
|
|
74
82
|
null_string: Option<String>,
|
75
83
|
buffer_size: usize,
|
76
84
|
result_type: String,
|
85
|
+
flexible: bool,
|
86
|
+
flexible_default: Option<String>,
|
77
87
|
}
|
78
88
|
|
79
89
|
fn create_enumerator(
|
@@ -92,7 +102,8 @@ fn create_enumerator(
|
|
92
102
|
kwargs.aset(Symbol::new("nil_string"), args.null_string)?;
|
93
103
|
kwargs.aset(Symbol::new("buffer_size"), args.buffer_size)?;
|
94
104
|
kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
|
95
|
-
|
105
|
+
kwargs.aset(Symbol::new("flexible"), args.flexible)?;
|
106
|
+
kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
|
96
107
|
let enumerator = args
|
97
108
|
.rb_self
|
98
109
|
.enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
|
data/ext/osv/src/utils.rs
CHANGED
@@ -13,6 +13,8 @@ pub struct CsvArgs {
|
|
13
13
|
pub null_string: Option<String>,
|
14
14
|
pub buffer_size: usize,
|
15
15
|
pub result_type: String,
|
16
|
+
pub flexible: bool,
|
17
|
+
pub flexible_default: Option<String>,
|
16
18
|
}
|
17
19
|
|
18
20
|
/// Parse common arguments for CSV parsing
|
@@ -30,6 +32,8 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
30
32
|
Option<Option<String>>,
|
31
33
|
Option<usize>,
|
32
34
|
Option<Value>,
|
35
|
+
Option<bool>,
|
36
|
+
Option<Option<String>>,
|
33
37
|
),
|
34
38
|
(),
|
35
39
|
>(
|
@@ -42,6 +46,8 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
42
46
|
"nil_string",
|
43
47
|
"buffer_size",
|
44
48
|
"result_type",
|
49
|
+
"flexible",
|
50
|
+
"flexible_default",
|
45
51
|
],
|
46
52
|
)?;
|
47
53
|
|
@@ -111,6 +117,10 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
111
117
|
None => String::from("hash"),
|
112
118
|
};
|
113
119
|
|
120
|
+
let flexible = kwargs.optional.6.unwrap_or_default();
|
121
|
+
|
122
|
+
let flexible_default = kwargs.optional.7.unwrap_or_default();
|
123
|
+
|
114
124
|
Ok(CsvArgs {
|
115
125
|
to_read,
|
116
126
|
has_headers,
|
@@ -119,5 +129,7 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
119
129
|
null_string,
|
120
130
|
buffer_size,
|
121
131
|
result_type,
|
132
|
+
flexible,
|
133
|
+
flexible_default,
|
122
134
|
})
|
123
135
|
}
|
data/lib/osv/version.rb
CHANGED
data/lib/osv.rbi
CHANGED
@@ -1,6 +1,25 @@
|
|
1
1
|
# typed: strict
|
2
2
|
|
3
3
|
module OSV
|
4
|
+
# Options:
|
5
|
+
# - `has_headers`: Boolean indicating if the first row contains headers
|
6
|
+
# (default: true)
|
7
|
+
# - `col_sep`: String specifying the field separator
|
8
|
+
# (default: ",")
|
9
|
+
# - `quote_char`: String specifying the quote character
|
10
|
+
# (default: "\"")
|
11
|
+
# - `nil_string`: String that should be interpreted as nil
|
12
|
+
# By default, empty strings are interpreted as empty strings.
|
13
|
+
# If you want to interpret empty strings as nil, set this to
|
14
|
+
# an empty string.
|
15
|
+
# - `buffer_size`: Integer specifying the read buffer size
|
16
|
+
# - `result_type`: String specifying the output format
|
17
|
+
# ("hash" or "array")
|
18
|
+
# - `flexible`: Boolean specifying if the parser should be flexible
|
19
|
+
# (default: false)
|
20
|
+
# - `flexible_default`: String specifying the default value for missing fields.
|
21
|
+
# Implicitly enables flexible mode if set.
|
22
|
+
# (default: `nil`)
|
4
23
|
sig do
|
5
24
|
params(
|
6
25
|
input: T.any(String, StringIO, IO),
|
@@ -10,6 +29,8 @@ module OSV
|
|
10
29
|
nil_string: T.nilable(String),
|
11
30
|
buffer_size: T.nilable(Integer),
|
12
31
|
result_type: T.nilable(String),
|
32
|
+
flexible: T.nilable(T::Boolean),
|
33
|
+
flexible_default: T.nilable(String),
|
13
34
|
blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.nilable(String)], T::Array[T.nilable(String)])).void)
|
14
35
|
).returns(T.any(Enumerator, T.untyped))
|
15
36
|
end
|
@@ -21,6 +42,8 @@ module OSV
|
|
21
42
|
nil_string: nil,
|
22
43
|
buffer_size: nil,
|
23
44
|
result_type: nil,
|
45
|
+
flexible: nil,
|
46
|
+
flexible_default: nil,
|
24
47
|
&blk
|
25
48
|
)
|
26
49
|
end
|