osv 0.3.6 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/ext/osv/src/csv/builder.rs +17 -1
- data/ext/osv/src/csv/parser.rs +28 -23
- data/ext/osv/src/csv/read_impl.rs +9 -1
- data/ext/osv/src/reader.rs +6 -1
- data/ext/osv/src/utils.rs +6 -0
- data/lib/osv/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37c201207d5ad93aa949fa34b461d9c067e6d574ad18449c2dd1f748420ed85b
|
4
|
+
data.tar.gz: c2487c21f79ed3a126764d0dffed42b9b7436980d66c148fb371478b6291789d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: acb84f26530b896607572be2aaa9e8eb97f3dac01bada8fe4308e2b5bf624c6e7c3f6c0be04bb1ae35e44acf29dde5b146b100be5c67f459d443026ff83c696f
|
7
|
+
data.tar.gz: d4ede0f67eaaf930b3a27f96260167b694c827434b744d38aa71c12d2f4a784cc55e1d8a4318dab30c4d64ecb17b8894cb85f0474b88d4488f9be167f8aceb09
|
data/README.md
CHANGED
@@ -71,6 +71,7 @@ Both methods support the following options:
|
|
71
71
|
- if you want to interpret empty strings as nil, set this to an empty string
|
72
72
|
- `buffer_size`: Integer specifying the read buffer size
|
73
73
|
- `result_type`: String specifying the output format ("hash" or "array")
|
74
|
+
- `flexible_default`: String specifying the default value for missing fields (default: `nil`)
|
74
75
|
|
75
76
|
### Input Sources
|
76
77
|
|
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -54,6 +54,7 @@ pub struct RecordReaderBuilder<'a, T: RecordParser + Send + 'static> {
|
|
54
54
|
quote_char: u8,
|
55
55
|
null_string: Option<String>,
|
56
56
|
buffer: usize,
|
57
|
+
flexible_default: Option<String>,
|
57
58
|
_phantom: PhantomData<T>,
|
58
59
|
}
|
59
60
|
|
@@ -67,6 +68,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
67
68
|
quote_char: b'"',
|
68
69
|
null_string: None,
|
69
70
|
buffer: 1000,
|
71
|
+
flexible_default: None,
|
70
72
|
_phantom: PhantomData,
|
71
73
|
}
|
72
74
|
}
|
@@ -96,6 +98,11 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
96
98
|
self
|
97
99
|
}
|
98
100
|
|
101
|
+
pub fn flexible_default(mut self, flexible_default: Option<String>) -> Self {
|
102
|
+
self.flexible_default = flexible_default;
|
103
|
+
self
|
104
|
+
}
|
105
|
+
|
99
106
|
fn handle_string_io(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
|
100
107
|
let string: RString = self.to_read.funcall("string", ())?;
|
101
108
|
let content = string.to_string()?;
|
@@ -177,6 +184,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
177
184
|
.has_headers(self.has_headers)
|
178
185
|
.delimiter(self.delimiter)
|
179
186
|
.quote(self.quote_char)
|
187
|
+
.flexible(self.flexible_default.is_some())
|
180
188
|
.from_reader(readable);
|
181
189
|
|
182
190
|
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
@@ -186,10 +194,16 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
186
194
|
let (sender, receiver) = kanal::bounded(self.buffer);
|
187
195
|
let null_string = self.null_string.clone();
|
188
196
|
|
197
|
+
let flexible_default = self.flexible_default.clone();
|
189
198
|
let handle = thread::spawn(move || {
|
190
199
|
let mut record = csv::StringRecord::new();
|
191
200
|
while let Ok(true) = reader.read_record(&mut record) {
|
192
|
-
let row = T::parse(
|
201
|
+
let row = T::parse(
|
202
|
+
&static_headers,
|
203
|
+
&record,
|
204
|
+
null_string.as_deref(),
|
205
|
+
flexible_default.as_deref(),
|
206
|
+
);
|
193
207
|
if sender.send(row).is_err() {
|
194
208
|
break;
|
195
209
|
}
|
@@ -215,6 +229,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
215
229
|
.has_headers(self.has_headers)
|
216
230
|
.delimiter(self.delimiter)
|
217
231
|
.quote(self.quote_char)
|
232
|
+
.flexible(self.flexible_default.is_some())
|
218
233
|
.from_reader(readable);
|
219
234
|
|
220
235
|
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
@@ -225,6 +240,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
225
240
|
reader,
|
226
241
|
headers: static_headers,
|
227
242
|
null_string: self.null_string,
|
243
|
+
flexible_default: self.flexible_default,
|
228
244
|
},
|
229
245
|
})
|
230
246
|
}
|
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -7,6 +7,7 @@ pub trait RecordParser {
|
|
7
7
|
headers: &[&'static str],
|
8
8
|
record: &csv::StringRecord,
|
9
9
|
null_string: Option<&str>,
|
10
|
+
flexible_default: Option<&str>,
|
10
11
|
) -> Self::Output;
|
11
12
|
}
|
12
13
|
|
@@ -18,26 +19,24 @@ impl RecordParser for HashMap<&'static str, Option<String>> {
|
|
18
19
|
headers: &[&'static str],
|
19
20
|
record: &csv::StringRecord,
|
20
21
|
null_string: Option<&str>,
|
22
|
+
flexible_default: Option<&str>,
|
21
23
|
) -> Self::Output {
|
22
24
|
let mut map = HashMap::with_capacity(headers.len());
|
23
|
-
headers
|
24
|
-
.
|
25
|
-
|
26
|
-
|
27
|
-
map.insert(
|
28
|
-
*header,
|
25
|
+
headers.iter().enumerate().for_each(|(i, header)| {
|
26
|
+
let value = record.get(i).map_or_else(
|
27
|
+
|| flexible_default.map(|s| s.to_string()),
|
28
|
+
|field| {
|
29
29
|
if null_string == Some(field) {
|
30
30
|
None
|
31
|
+
} else if field.is_empty() {
|
32
|
+
Some(String::new())
|
31
33
|
} else {
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
},
|
39
|
-
);
|
40
|
-
});
|
34
|
+
Some(field.to_string())
|
35
|
+
}
|
36
|
+
},
|
37
|
+
);
|
38
|
+
map.insert(*header, value);
|
39
|
+
});
|
41
40
|
map
|
42
41
|
}
|
43
42
|
}
|
@@ -47,23 +46,29 @@ impl RecordParser for Vec<Option<String>> {
|
|
47
46
|
|
48
47
|
#[inline]
|
49
48
|
fn parse(
|
50
|
-
|
49
|
+
headers: &[&'static str],
|
51
50
|
record: &csv::StringRecord,
|
52
51
|
null_string: Option<&str>,
|
52
|
+
flexible_default: Option<&str>,
|
53
53
|
) -> Self::Output {
|
54
|
-
let
|
54
|
+
let target_len = headers.len();
|
55
|
+
let mut vec = Vec::with_capacity(target_len);
|
55
56
|
vec.extend(record.iter().map(|field| {
|
56
57
|
if null_string == Some(field) {
|
57
58
|
None
|
59
|
+
} else if field.is_empty() {
|
60
|
+
Some(String::new())
|
58
61
|
} else {
|
59
|
-
|
60
|
-
if field.is_empty() {
|
61
|
-
Some(String::new())
|
62
|
-
} else {
|
63
|
-
Some(field.to_string())
|
64
|
-
}
|
62
|
+
Some(field.to_string())
|
65
63
|
}
|
66
64
|
}));
|
65
|
+
|
66
|
+
// Fill remaining slots with flexible_default if needed
|
67
|
+
if let Some(default) = flexible_default {
|
68
|
+
while vec.len() < target_len {
|
69
|
+
vec.push(Some(default.to_string()));
|
70
|
+
}
|
71
|
+
}
|
67
72
|
vec
|
68
73
|
}
|
69
74
|
}
|
@@ -6,6 +6,7 @@ pub enum ReadImpl<T: RecordParser> {
|
|
6
6
|
reader: csv::Reader<Box<dyn Read>>,
|
7
7
|
headers: Vec<&'static str>,
|
8
8
|
null_string: Option<String>,
|
9
|
+
flexible_default: Option<String>,
|
9
10
|
},
|
10
11
|
MultiThreaded {
|
11
12
|
headers: Vec<&'static str>,
|
@@ -33,10 +34,16 @@ impl<T: RecordParser> ReadImpl<T> {
|
|
33
34
|
reader,
|
34
35
|
headers,
|
35
36
|
null_string,
|
37
|
+
flexible_default,
|
36
38
|
} => {
|
37
39
|
let mut record = csv::StringRecord::new();
|
38
40
|
match reader.read_record(&mut record) {
|
39
|
-
Ok(true) => Some(T::parse(
|
41
|
+
Ok(true) => Some(T::parse(
|
42
|
+
headers,
|
43
|
+
&record,
|
44
|
+
null_string.as_deref(),
|
45
|
+
flexible_default.as_deref(),
|
46
|
+
)),
|
40
47
|
_ => None,
|
41
48
|
}
|
42
49
|
}
|
@@ -50,6 +57,7 @@ impl<T: RecordParser> ReadImpl<T> {
|
|
50
57
|
receiver,
|
51
58
|
handle,
|
52
59
|
headers,
|
60
|
+
..
|
53
61
|
} => {
|
54
62
|
receiver.close();
|
55
63
|
if let Some(handle) = handle.take() {
|
data/ext/osv/src/reader.rs
CHANGED
@@ -18,6 +18,7 @@ pub fn parse_csv(
|
|
18
18
|
null_string,
|
19
19
|
buffer_size,
|
20
20
|
result_type,
|
21
|
+
flexible_default,
|
21
22
|
} = parse_csv_args(&ruby, args)?;
|
22
23
|
|
23
24
|
if !ruby.block_given() {
|
@@ -30,6 +31,7 @@ pub fn parse_csv(
|
|
30
31
|
null_string,
|
31
32
|
buffer_size,
|
32
33
|
result_type,
|
34
|
+
flexible_default,
|
33
35
|
});
|
34
36
|
}
|
35
37
|
|
@@ -37,6 +39,7 @@ pub fn parse_csv(
|
|
37
39
|
"hash" => Box::new(
|
38
40
|
RecordReaderBuilder::<HashMap<&'static str, Option<String>>>::new(&ruby, to_read)
|
39
41
|
.has_headers(has_headers)
|
42
|
+
.flexible_default(flexible_default)
|
40
43
|
.delimiter(delimiter)
|
41
44
|
.quote_char(quote_char)
|
42
45
|
.null_string(null_string)
|
@@ -47,6 +50,7 @@ pub fn parse_csv(
|
|
47
50
|
"array" => Box::new(
|
48
51
|
RecordReaderBuilder::<Vec<Option<String>>>::new(&ruby, to_read)
|
49
52
|
.has_headers(has_headers)
|
53
|
+
.flexible_default(flexible_default)
|
50
54
|
.delimiter(delimiter)
|
51
55
|
.quote_char(quote_char)
|
52
56
|
.null_string(null_string)
|
@@ -74,6 +78,7 @@ struct EnumeratorArgs {
|
|
74
78
|
null_string: Option<String>,
|
75
79
|
buffer_size: usize,
|
76
80
|
result_type: String,
|
81
|
+
flexible_default: Option<String>,
|
77
82
|
}
|
78
83
|
|
79
84
|
fn create_enumerator(
|
@@ -92,7 +97,7 @@ fn create_enumerator(
|
|
92
97
|
kwargs.aset(Symbol::new("nil_string"), args.null_string)?;
|
93
98
|
kwargs.aset(Symbol::new("buffer_size"), args.buffer_size)?;
|
94
99
|
kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
|
95
|
-
|
100
|
+
kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
|
96
101
|
let enumerator = args
|
97
102
|
.rb_self
|
98
103
|
.enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
|
data/ext/osv/src/utils.rs
CHANGED
@@ -13,6 +13,7 @@ pub struct CsvArgs {
|
|
13
13
|
pub null_string: Option<String>,
|
14
14
|
pub buffer_size: usize,
|
15
15
|
pub result_type: String,
|
16
|
+
pub flexible_default: Option<String>,
|
16
17
|
}
|
17
18
|
|
18
19
|
/// Parse common arguments for CSV parsing
|
@@ -30,6 +31,7 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
30
31
|
Option<Option<String>>,
|
31
32
|
Option<usize>,
|
32
33
|
Option<Value>,
|
34
|
+
Option<Option<String>>,
|
33
35
|
),
|
34
36
|
(),
|
35
37
|
>(
|
@@ -42,6 +44,7 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
42
44
|
"nil_string",
|
43
45
|
"buffer_size",
|
44
46
|
"result_type",
|
47
|
+
"flexible_default",
|
45
48
|
],
|
46
49
|
)?;
|
47
50
|
|
@@ -111,6 +114,8 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
111
114
|
None => String::from("hash"),
|
112
115
|
};
|
113
116
|
|
117
|
+
let flexible_default = kwargs.optional.6.unwrap_or_default();
|
118
|
+
|
114
119
|
Ok(CsvArgs {
|
115
120
|
to_read,
|
116
121
|
has_headers,
|
@@ -119,5 +124,6 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
119
124
|
null_string,
|
120
125
|
buffer_size,
|
121
126
|
result_type,
|
127
|
+
flexible_default,
|
122
128
|
})
|
123
129
|
}
|
data/lib/osv/version.rb
CHANGED