osv 0.3.6 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/ext/osv/src/csv/builder.rs +17 -1
- data/ext/osv/src/csv/parser.rs +28 -23
- data/ext/osv/src/csv/read_impl.rs +9 -1
- data/ext/osv/src/reader.rs +6 -1
- data/ext/osv/src/utils.rs +6 -0
- data/lib/osv/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37c201207d5ad93aa949fa34b461d9c067e6d574ad18449c2dd1f748420ed85b
|
4
|
+
data.tar.gz: c2487c21f79ed3a126764d0dffed42b9b7436980d66c148fb371478b6291789d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: acb84f26530b896607572be2aaa9e8eb97f3dac01bada8fe4308e2b5bf624c6e7c3f6c0be04bb1ae35e44acf29dde5b146b100be5c67f459d443026ff83c696f
|
7
|
+
data.tar.gz: d4ede0f67eaaf930b3a27f96260167b694c827434b744d38aa71c12d2f4a784cc55e1d8a4318dab30c4d64ecb17b8894cb85f0474b88d4488f9be167f8aceb09
|
data/README.md
CHANGED
@@ -71,6 +71,7 @@ Both methods support the following options:
|
|
71
71
|
- if you want to interpret empty strings as nil, set this to an empty string
|
72
72
|
- `buffer_size`: Integer specifying the read buffer size
|
73
73
|
- `result_type`: String specifying the output format ("hash" or "array")
|
74
|
+
- `flexible_default`: String specifying the default value for missing fields (default: `nil`)
|
74
75
|
|
75
76
|
### Input Sources
|
76
77
|
|
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -54,6 +54,7 @@ pub struct RecordReaderBuilder<'a, T: RecordParser + Send + 'static> {
|
|
54
54
|
quote_char: u8,
|
55
55
|
null_string: Option<String>,
|
56
56
|
buffer: usize,
|
57
|
+
flexible_default: Option<String>,
|
57
58
|
_phantom: PhantomData<T>,
|
58
59
|
}
|
59
60
|
|
@@ -67,6 +68,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
67
68
|
quote_char: b'"',
|
68
69
|
null_string: None,
|
69
70
|
buffer: 1000,
|
71
|
+
flexible_default: None,
|
70
72
|
_phantom: PhantomData,
|
71
73
|
}
|
72
74
|
}
|
@@ -96,6 +98,11 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
96
98
|
self
|
97
99
|
}
|
98
100
|
|
101
|
+
pub fn flexible_default(mut self, flexible_default: Option<String>) -> Self {
|
102
|
+
self.flexible_default = flexible_default;
|
103
|
+
self
|
104
|
+
}
|
105
|
+
|
99
106
|
fn handle_string_io(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
|
100
107
|
let string: RString = self.to_read.funcall("string", ())?;
|
101
108
|
let content = string.to_string()?;
|
@@ -177,6 +184,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
177
184
|
.has_headers(self.has_headers)
|
178
185
|
.delimiter(self.delimiter)
|
179
186
|
.quote(self.quote_char)
|
187
|
+
.flexible(self.flexible_default.is_some())
|
180
188
|
.from_reader(readable);
|
181
189
|
|
182
190
|
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
@@ -186,10 +194,16 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
186
194
|
let (sender, receiver) = kanal::bounded(self.buffer);
|
187
195
|
let null_string = self.null_string.clone();
|
188
196
|
|
197
|
+
let flexible_default = self.flexible_default.clone();
|
189
198
|
let handle = thread::spawn(move || {
|
190
199
|
let mut record = csv::StringRecord::new();
|
191
200
|
while let Ok(true) = reader.read_record(&mut record) {
|
192
|
-
let row = T::parse(
|
201
|
+
let row = T::parse(
|
202
|
+
&static_headers,
|
203
|
+
&record,
|
204
|
+
null_string.as_deref(),
|
205
|
+
flexible_default.as_deref(),
|
206
|
+
);
|
193
207
|
if sender.send(row).is_err() {
|
194
208
|
break;
|
195
209
|
}
|
@@ -215,6 +229,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
215
229
|
.has_headers(self.has_headers)
|
216
230
|
.delimiter(self.delimiter)
|
217
231
|
.quote(self.quote_char)
|
232
|
+
.flexible(self.flexible_default.is_some())
|
218
233
|
.from_reader(readable);
|
219
234
|
|
220
235
|
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
@@ -225,6 +240,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
225
240
|
reader,
|
226
241
|
headers: static_headers,
|
227
242
|
null_string: self.null_string,
|
243
|
+
flexible_default: self.flexible_default,
|
228
244
|
},
|
229
245
|
})
|
230
246
|
}
|
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -7,6 +7,7 @@ pub trait RecordParser {
|
|
7
7
|
headers: &[&'static str],
|
8
8
|
record: &csv::StringRecord,
|
9
9
|
null_string: Option<&str>,
|
10
|
+
flexible_default: Option<&str>,
|
10
11
|
) -> Self::Output;
|
11
12
|
}
|
12
13
|
|
@@ -18,26 +19,24 @@ impl RecordParser for HashMap<&'static str, Option<String>> {
|
|
18
19
|
headers: &[&'static str],
|
19
20
|
record: &csv::StringRecord,
|
20
21
|
null_string: Option<&str>,
|
22
|
+
flexible_default: Option<&str>,
|
21
23
|
) -> Self::Output {
|
22
24
|
let mut map = HashMap::with_capacity(headers.len());
|
23
|
-
headers
|
24
|
-
.
|
25
|
-
|
26
|
-
|
27
|
-
map.insert(
|
28
|
-
*header,
|
25
|
+
headers.iter().enumerate().for_each(|(i, header)| {
|
26
|
+
let value = record.get(i).map_or_else(
|
27
|
+
|| flexible_default.map(|s| s.to_string()),
|
28
|
+
|field| {
|
29
29
|
if null_string == Some(field) {
|
30
30
|
None
|
31
|
+
} else if field.is_empty() {
|
32
|
+
Some(String::new())
|
31
33
|
} else {
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
},
|
39
|
-
);
|
40
|
-
});
|
34
|
+
Some(field.to_string())
|
35
|
+
}
|
36
|
+
},
|
37
|
+
);
|
38
|
+
map.insert(*header, value);
|
39
|
+
});
|
41
40
|
map
|
42
41
|
}
|
43
42
|
}
|
@@ -47,23 +46,29 @@ impl RecordParser for Vec<Option<String>> {
|
|
47
46
|
|
48
47
|
#[inline]
|
49
48
|
fn parse(
|
50
|
-
|
49
|
+
headers: &[&'static str],
|
51
50
|
record: &csv::StringRecord,
|
52
51
|
null_string: Option<&str>,
|
52
|
+
flexible_default: Option<&str>,
|
53
53
|
) -> Self::Output {
|
54
|
-
let
|
54
|
+
let target_len = headers.len();
|
55
|
+
let mut vec = Vec::with_capacity(target_len);
|
55
56
|
vec.extend(record.iter().map(|field| {
|
56
57
|
if null_string == Some(field) {
|
57
58
|
None
|
59
|
+
} else if field.is_empty() {
|
60
|
+
Some(String::new())
|
58
61
|
} else {
|
59
|
-
|
60
|
-
if field.is_empty() {
|
61
|
-
Some(String::new())
|
62
|
-
} else {
|
63
|
-
Some(field.to_string())
|
64
|
-
}
|
62
|
+
Some(field.to_string())
|
65
63
|
}
|
66
64
|
}));
|
65
|
+
|
66
|
+
// Fill remaining slots with flexible_default if needed
|
67
|
+
if let Some(default) = flexible_default {
|
68
|
+
while vec.len() < target_len {
|
69
|
+
vec.push(Some(default.to_string()));
|
70
|
+
}
|
71
|
+
}
|
67
72
|
vec
|
68
73
|
}
|
69
74
|
}
|
@@ -6,6 +6,7 @@ pub enum ReadImpl<T: RecordParser> {
|
|
6
6
|
reader: csv::Reader<Box<dyn Read>>,
|
7
7
|
headers: Vec<&'static str>,
|
8
8
|
null_string: Option<String>,
|
9
|
+
flexible_default: Option<String>,
|
9
10
|
},
|
10
11
|
MultiThreaded {
|
11
12
|
headers: Vec<&'static str>,
|
@@ -33,10 +34,16 @@ impl<T: RecordParser> ReadImpl<T> {
|
|
33
34
|
reader,
|
34
35
|
headers,
|
35
36
|
null_string,
|
37
|
+
flexible_default,
|
36
38
|
} => {
|
37
39
|
let mut record = csv::StringRecord::new();
|
38
40
|
match reader.read_record(&mut record) {
|
39
|
-
Ok(true) => Some(T::parse(
|
41
|
+
Ok(true) => Some(T::parse(
|
42
|
+
headers,
|
43
|
+
&record,
|
44
|
+
null_string.as_deref(),
|
45
|
+
flexible_default.as_deref(),
|
46
|
+
)),
|
40
47
|
_ => None,
|
41
48
|
}
|
42
49
|
}
|
@@ -50,6 +57,7 @@ impl<T: RecordParser> ReadImpl<T> {
|
|
50
57
|
receiver,
|
51
58
|
handle,
|
52
59
|
headers,
|
60
|
+
..
|
53
61
|
} => {
|
54
62
|
receiver.close();
|
55
63
|
if let Some(handle) = handle.take() {
|
data/ext/osv/src/reader.rs
CHANGED
@@ -18,6 +18,7 @@ pub fn parse_csv(
|
|
18
18
|
null_string,
|
19
19
|
buffer_size,
|
20
20
|
result_type,
|
21
|
+
flexible_default,
|
21
22
|
} = parse_csv_args(&ruby, args)?;
|
22
23
|
|
23
24
|
if !ruby.block_given() {
|
@@ -30,6 +31,7 @@ pub fn parse_csv(
|
|
30
31
|
null_string,
|
31
32
|
buffer_size,
|
32
33
|
result_type,
|
34
|
+
flexible_default,
|
33
35
|
});
|
34
36
|
}
|
35
37
|
|
@@ -37,6 +39,7 @@ pub fn parse_csv(
|
|
37
39
|
"hash" => Box::new(
|
38
40
|
RecordReaderBuilder::<HashMap<&'static str, Option<String>>>::new(&ruby, to_read)
|
39
41
|
.has_headers(has_headers)
|
42
|
+
.flexible_default(flexible_default)
|
40
43
|
.delimiter(delimiter)
|
41
44
|
.quote_char(quote_char)
|
42
45
|
.null_string(null_string)
|
@@ -47,6 +50,7 @@ pub fn parse_csv(
|
|
47
50
|
"array" => Box::new(
|
48
51
|
RecordReaderBuilder::<Vec<Option<String>>>::new(&ruby, to_read)
|
49
52
|
.has_headers(has_headers)
|
53
|
+
.flexible_default(flexible_default)
|
50
54
|
.delimiter(delimiter)
|
51
55
|
.quote_char(quote_char)
|
52
56
|
.null_string(null_string)
|
@@ -74,6 +78,7 @@ struct EnumeratorArgs {
|
|
74
78
|
null_string: Option<String>,
|
75
79
|
buffer_size: usize,
|
76
80
|
result_type: String,
|
81
|
+
flexible_default: Option<String>,
|
77
82
|
}
|
78
83
|
|
79
84
|
fn create_enumerator(
|
@@ -92,7 +97,7 @@ fn create_enumerator(
|
|
92
97
|
kwargs.aset(Symbol::new("nil_string"), args.null_string)?;
|
93
98
|
kwargs.aset(Symbol::new("buffer_size"), args.buffer_size)?;
|
94
99
|
kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
|
95
|
-
|
100
|
+
kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
|
96
101
|
let enumerator = args
|
97
102
|
.rb_self
|
98
103
|
.enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
|
data/ext/osv/src/utils.rs
CHANGED
@@ -13,6 +13,7 @@ pub struct CsvArgs {
|
|
13
13
|
pub null_string: Option<String>,
|
14
14
|
pub buffer_size: usize,
|
15
15
|
pub result_type: String,
|
16
|
+
pub flexible_default: Option<String>,
|
16
17
|
}
|
17
18
|
|
18
19
|
/// Parse common arguments for CSV parsing
|
@@ -30,6 +31,7 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
30
31
|
Option<Option<String>>,
|
31
32
|
Option<usize>,
|
32
33
|
Option<Value>,
|
34
|
+
Option<Option<String>>,
|
33
35
|
),
|
34
36
|
(),
|
35
37
|
>(
|
@@ -42,6 +44,7 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
42
44
|
"nil_string",
|
43
45
|
"buffer_size",
|
44
46
|
"result_type",
|
47
|
+
"flexible_default",
|
45
48
|
],
|
46
49
|
)?;
|
47
50
|
|
@@ -111,6 +114,8 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
111
114
|
None => String::from("hash"),
|
112
115
|
};
|
113
116
|
|
117
|
+
let flexible_default = kwargs.optional.6.unwrap_or_default();
|
118
|
+
|
114
119
|
Ok(CsvArgs {
|
115
120
|
to_read,
|
116
121
|
has_headers,
|
@@ -119,5 +124,6 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
119
124
|
null_string,
|
120
125
|
buffer_size,
|
121
126
|
result_type,
|
127
|
+
flexible_default,
|
122
128
|
})
|
123
129
|
}
|
data/lib/osv/version.rb
CHANGED