osv 0.3.18 → 0.3.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/osv/src/csv/builder.rs +15 -3
- data/ext/osv/src/csv/header_cache.rs +2 -2
- data/ext/osv/src/csv/parser.rs +13 -4
- data/ext/osv/src/csv/record_reader.rs +4 -0
- data/ext/osv/src/reader.rs +8 -4
- data/ext/osv/src/utils.rs +6 -0
- data/lib/osv/version.rb +1 -1
- data/lib/osv.rbi +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 87b1080bdb84d6db09a077d9eacc6c4792cfe93851671e4ba0438fe27c3d3218
|
4
|
+
data.tar.gz: f1dc704d251d906e9d6011c288c37844af8ccc976eb90611c629b7bc9cc9353e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa7a083aebc9f480528de32fea2890189857c2381811f1cd90a8115724c75640a6bd11424e186492ace41a5fed28ee74dd17172f983801b48b31b81a750e24e9
|
7
|
+
data.tar.gz: 214c20e5011660e2250364ca79bd91a6c812053b401e9a1266abd500cf1e9e7c045f02054144f46190e45b459480aae70bbfab4d19ab246d1e1f5a8556f616a1
|
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -67,6 +67,7 @@ pub struct RecordReaderBuilder<'a, T: RecordParser<'a>> {
|
|
67
67
|
flexible: bool,
|
68
68
|
flexible_default: Option<String>,
|
69
69
|
trim: csv::Trim,
|
70
|
+
ignore_null_bytes: bool,
|
70
71
|
_phantom: PhantomData<T>,
|
71
72
|
_phantom_a: PhantomData<&'a ()>,
|
72
73
|
}
|
@@ -84,6 +85,7 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
84
85
|
flexible: false,
|
85
86
|
flexible_default: None,
|
86
87
|
trim: csv::Trim::None,
|
88
|
+
ignore_null_bytes: false,
|
87
89
|
_phantom: PhantomData,
|
88
90
|
_phantom_a: PhantomData,
|
89
91
|
}
|
@@ -138,6 +140,12 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
138
140
|
self
|
139
141
|
}
|
140
142
|
|
143
|
+
#[must_use]
|
144
|
+
pub fn ignore_null_bytes(mut self, ignore_null_bytes: bool) -> Self {
|
145
|
+
self.ignore_null_bytes = ignore_null_bytes;
|
146
|
+
self
|
147
|
+
}
|
148
|
+
|
141
149
|
/// Handles reading from a file descriptor.
|
142
150
|
fn handle_file_descriptor(&self) -> Result<Box<dyn SeekableRead>, ReaderError> {
|
143
151
|
let raw_value = self.to_read.as_raw();
|
@@ -191,7 +199,10 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
191
199
|
.trim(self.trim)
|
192
200
|
.from_reader(reader);
|
193
201
|
|
194
|
-
let headers = RecordReader::<T>::get_headers(&self.ruby, &mut reader, self.has_headers)?;
|
202
|
+
let mut headers = RecordReader::<T>::get_headers(&self.ruby, &mut reader, self.has_headers)?;
|
203
|
+
if self.ignore_null_bytes {
|
204
|
+
headers = headers.iter().map(|h| h.replace("\0", "")).collect();
|
205
|
+
}
|
195
206
|
let static_headers = StringCache::intern_many(&headers)?;
|
196
207
|
|
197
208
|
// We intern both of these to get static string references we can reuse throughout the parser.
|
@@ -204,7 +215,7 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
204
215
|
.map_err(|e| ReaderError::InvalidFlexibleDefault(format!("{:?}", e)))
|
205
216
|
})
|
206
217
|
.transpose()?
|
207
|
-
.map(
|
218
|
+
.map(Cow::Borrowed);
|
208
219
|
|
209
220
|
let null_string = self
|
210
221
|
.null_string
|
@@ -215,13 +226,14 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
215
226
|
.map_err(|e| ReaderError::InvalidNullString(format!("{:?}", e)))
|
216
227
|
})
|
217
228
|
.transpose()?
|
218
|
-
.map(
|
229
|
+
.map(Cow::Borrowed);
|
219
230
|
|
220
231
|
Ok(RecordReader::new(
|
221
232
|
reader,
|
222
233
|
static_headers,
|
223
234
|
null_string,
|
224
235
|
flexible_default,
|
236
|
+
self.ignore_null_bytes,
|
225
237
|
))
|
226
238
|
}
|
227
239
|
}
|
@@ -99,7 +99,7 @@ impl StringCache {
|
|
99
99
|
counter.fetch_add(1, Ordering::Relaxed);
|
100
100
|
result.push(*interned_string);
|
101
101
|
} else {
|
102
|
-
let interned = StringCacheKey::new(
|
102
|
+
let interned = StringCacheKey::new(string);
|
103
103
|
let leaked = Box::leak(string.clone().into_boxed_str());
|
104
104
|
cache.insert(leaked, (interned, AtomicU32::new(1)));
|
105
105
|
result.push(interned);
|
@@ -154,7 +154,7 @@ impl<I: Iterator> Iterator for HeaderCacheCleanupIter<I> {
|
|
154
154
|
impl<I> Drop for HeaderCacheCleanupIter<I> {
|
155
155
|
fn drop(&mut self) {
|
156
156
|
if let Some(headers) = self.headers.get() {
|
157
|
-
StringCache::clear(
|
157
|
+
StringCache::clear(headers).unwrap();
|
158
158
|
}
|
159
159
|
}
|
160
160
|
}
|
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -13,6 +13,7 @@ pub trait RecordParser<'a> {
|
|
13
13
|
record: &csv::StringRecord,
|
14
14
|
null_string: Option<Cow<'a, str>>,
|
15
15
|
flexible_default: Option<Cow<'a, str>>,
|
16
|
+
ignore_null_bytes: bool,
|
16
17
|
) -> Self::Output;
|
17
18
|
}
|
18
19
|
|
@@ -27,12 +28,13 @@ impl<'a, S: BuildHasher + Default> RecordParser<'a>
|
|
27
28
|
record: &csv::StringRecord,
|
28
29
|
null_string: Option<Cow<'a, str>>,
|
29
30
|
flexible_default: Option<Cow<'a, str>>,
|
31
|
+
ignore_null_bytes: bool,
|
30
32
|
) -> Self::Output {
|
31
33
|
let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
|
32
34
|
|
33
35
|
let shared_empty = Cow::Borrowed("");
|
34
36
|
let shared_default = flexible_default.map(CowStr);
|
35
|
-
headers.iter().enumerate().for_each(|(i,
|
37
|
+
headers.iter().enumerate().for_each(|(i, header)| {
|
36
38
|
let value = record.get(i).map_or_else(
|
37
39
|
|| shared_default.clone(),
|
38
40
|
|field| {
|
@@ -40,12 +42,15 @@ impl<'a, S: BuildHasher + Default> RecordParser<'a>
|
|
40
42
|
None
|
41
43
|
} else if field.is_empty() {
|
42
44
|
Some(CowStr(shared_empty.clone()))
|
43
|
-
} else {
|
45
|
+
} else if ignore_null_bytes {
|
46
|
+
Some(CowStr(Cow::Owned(field.replace("\0", "").to_string())))
|
47
|
+
}
|
48
|
+
else {
|
44
49
|
Some(CowStr(Cow::Owned(field.to_string())))
|
45
50
|
}
|
46
51
|
},
|
47
52
|
);
|
48
|
-
map.insert(
|
53
|
+
map.insert(*header, value);
|
49
54
|
});
|
50
55
|
map
|
51
56
|
}
|
@@ -60,6 +65,7 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
|
|
60
65
|
record: &csv::StringRecord,
|
61
66
|
null_string: Option<Cow<'a, str>>,
|
62
67
|
flexible_default: Option<Cow<'a, str>>,
|
68
|
+
ignore_null_bytes: bool,
|
63
69
|
) -> Self::Output {
|
64
70
|
let target_len = headers.len();
|
65
71
|
let mut vec = Vec::with_capacity(target_len);
|
@@ -72,7 +78,10 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
|
|
72
78
|
None
|
73
79
|
} else if field.is_empty() {
|
74
80
|
Some(CowStr(shared_empty.clone()))
|
75
|
-
} else {
|
81
|
+
} else if ignore_null_bytes {
|
82
|
+
Some(CowStr(Cow::Owned(field.replace("\0", "").to_string())))
|
83
|
+
}
|
84
|
+
else {
|
76
85
|
Some(CowStr(Cow::Owned(field.to_string())))
|
77
86
|
};
|
78
87
|
vec.push(value);
|
@@ -18,6 +18,7 @@ pub struct RecordReader<'a, T: RecordParser<'a>> {
|
|
18
18
|
flexible_default: Option<Cow<'a, str>>,
|
19
19
|
string_record: csv::StringRecord,
|
20
20
|
parser: std::marker::PhantomData<T>,
|
21
|
+
ignore_null_bytes: bool,
|
21
22
|
}
|
22
23
|
|
23
24
|
impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
|
@@ -56,6 +57,7 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
|
|
56
57
|
headers: Vec<StringCacheKey>,
|
57
58
|
null_string: Option<Cow<'a, str>>,
|
58
59
|
flexible_default: Option<Cow<'a, str>>,
|
60
|
+
ignore_null_bytes: bool,
|
59
61
|
) -> Self {
|
60
62
|
let headers_len = headers.len();
|
61
63
|
Self {
|
@@ -65,6 +67,7 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
|
|
65
67
|
flexible_default,
|
66
68
|
string_record: csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_len),
|
67
69
|
parser: std::marker::PhantomData,
|
70
|
+
ignore_null_bytes,
|
68
71
|
}
|
69
72
|
}
|
70
73
|
|
@@ -76,6 +79,7 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
|
|
76
79
|
&self.string_record,
|
77
80
|
self.null_string.clone(),
|
78
81
|
self.flexible_default.clone(),
|
82
|
+
self.ignore_null_bytes
|
79
83
|
))),
|
80
84
|
false => Ok(None),
|
81
85
|
}
|
data/ext/osv/src/reader.rs
CHANGED
@@ -60,6 +60,7 @@ pub fn parse_csv(
|
|
60
60
|
flexible,
|
61
61
|
flexible_default,
|
62
62
|
trim,
|
63
|
+
ignore_null_bytes,
|
63
64
|
} = parse_read_csv_args(&ruby, args)?;
|
64
65
|
|
65
66
|
if !ruby.block_given() {
|
@@ -70,9 +71,9 @@ pub fn parse_csv(
|
|
70
71
|
delimiter,
|
71
72
|
quote_char,
|
72
73
|
null_string,
|
73
|
-
result_type
|
74
|
+
result_type,
|
74
75
|
flexible,
|
75
|
-
flexible_default
|
76
|
+
flexible_default,
|
76
77
|
trim: match trim {
|
77
78
|
Trim::All => Some("all".to_string()),
|
78
79
|
Trim::Headers => Some("headers".to_string()),
|
@@ -100,9 +101,11 @@ pub fn parse_csv(
|
|
100
101
|
.trim(trim)
|
101
102
|
.delimiter(delimiter)
|
102
103
|
.quote_char(quote_char)
|
103
|
-
.null_string(null_string)
|
104
|
+
.null_string(null_string)
|
105
|
+
.ignore_null_bytes(ignore_null_bytes)
|
106
|
+
.build()?;
|
104
107
|
|
105
|
-
Box::new(builder.
|
108
|
+
Box::new(builder.map(CsvRecord::Map))
|
106
109
|
}
|
107
110
|
ResultType::Array => {
|
108
111
|
let builder = RecordReaderBuilder::<Vec<Option<CowStr<'static>>>>::new(ruby, to_read)
|
@@ -113,6 +116,7 @@ pub fn parse_csv(
|
|
113
116
|
.delimiter(delimiter)
|
114
117
|
.quote_char(quote_char)
|
115
118
|
.null_string(null_string)
|
119
|
+
.ignore_null_bytes(ignore_null_bytes)
|
116
120
|
.build()?;
|
117
121
|
|
118
122
|
Box::new(builder.map(CsvRecord::Vec))
|
data/ext/osv/src/utils.rs
CHANGED
@@ -36,6 +36,7 @@ pub struct ReadCsvArgs {
|
|
36
36
|
pub flexible: bool,
|
37
37
|
pub flexible_default: Option<String>,
|
38
38
|
pub trim: csv::Trim,
|
39
|
+
pub ignore_null_bytes: bool,
|
39
40
|
}
|
40
41
|
|
41
42
|
/// Parse common arguments for CSV parsing
|
@@ -55,6 +56,7 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
55
56
|
Option<Option<bool>>,
|
56
57
|
Option<Option<Option<String>>>,
|
57
58
|
Option<Option<Value>>,
|
59
|
+
Option<Option<bool>>,
|
58
60
|
),
|
59
61
|
(),
|
60
62
|
>(
|
@@ -69,6 +71,7 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
69
71
|
"flexible",
|
70
72
|
"flexible_default",
|
71
73
|
"trim",
|
74
|
+
"ignore_null_bytes",
|
72
75
|
],
|
73
76
|
)?;
|
74
77
|
|
@@ -163,6 +166,8 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
163
166
|
None => csv::Trim::None,
|
164
167
|
};
|
165
168
|
|
169
|
+
let ignore_null_bytes = kwargs.optional.8.flatten().unwrap_or_default();
|
170
|
+
|
166
171
|
Ok(ReadCsvArgs {
|
167
172
|
to_read,
|
168
173
|
has_headers,
|
@@ -173,5 +178,6 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
173
178
|
flexible,
|
174
179
|
flexible_default,
|
175
180
|
trim,
|
181
|
+
ignore_null_bytes,
|
176
182
|
})
|
177
183
|
}
|
data/lib/osv/version.rb
CHANGED
data/lib/osv.rbi
CHANGED
@@ -23,6 +23,8 @@ module OSV
|
|
23
23
|
# - `trim`: String specifying the trim mode
|
24
24
|
# ("all" or "headers" or "fields" or :all or :headers or :fields)
|
25
25
|
# (default: `nil`)
|
26
|
+
# - `ignore_null_bytes`: Boolean specifying if null bytes should be ignored
|
27
|
+
# (default: false)
|
26
28
|
sig do
|
27
29
|
params(
|
28
30
|
input: T.any(String, StringIO, IO),
|
@@ -34,6 +36,7 @@ module OSV
|
|
34
36
|
result_type: T.nilable(T.any(String, Symbol)),
|
35
37
|
flexible: T.nilable(T::Boolean),
|
36
38
|
flexible_default: T.nilable(String),
|
39
|
+
ignore_null_bytes: T.nilable(T::Boolean),
|
37
40
|
trim: T.nilable(T.any(String, Symbol)),
|
38
41
|
blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.nilable(String)], T::Array[T.nilable(String)])).void)
|
39
42
|
).returns(T.any(Enumerator, T.untyped))
|
@@ -48,6 +51,7 @@ module OSV
|
|
48
51
|
result_type: nil,
|
49
52
|
flexible: nil,
|
50
53
|
flexible_default: nil,
|
54
|
+
ignore_null_bytes: nil,
|
51
55
|
trim: nil,
|
52
56
|
&blk
|
53
57
|
)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.19
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|