osv 0.3.22 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/ext/osv/src/csv/builder.rs +10 -23
- data/ext/osv/src/csv/parser.rs +58 -30
- data/ext/osv/src/csv/record_reader.rs +19 -8
- data/ext/osv/src/reader.rs +6 -7
- data/ext/osv/src/utils.rs +8 -8
- data/lib/osv/version.rb +1 -1
- data/lib/osv.rbi +2 -5
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 51e4a387f1ed43bddc9f1f7a118637953d04239b5324ef131b9c860577ed4d41
|
4
|
+
data.tar.gz: e42928a09656216bbadcc2458953a8c5f28401ddf27095fc05038e0960471854
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4100c50a629ba5803db883532cfbe547eb3091e421b0876595d91791d8952a7b0169477c9c6f31063eafa5b91d0a9b1a9f0a5ae016d70cdd101e284beebfaf22
|
7
|
+
data.tar.gz: 90a822c644fcb37dc1892ede85a54395bc9e62a4b0b0a1af838182d390702d0ee4253151faafcedbf734b0a381fe2acf5c1ab23b842059fbdd4d51570fe33e58
|
data/README.md
CHANGED
@@ -84,11 +84,10 @@ OSV.for_each("data.csv",
|
|
84
84
|
|
85
85
|
# Parsing behavior
|
86
86
|
flexible: false, # Allow varying number of fields (default: false)
|
87
|
-
flexible_default: nil, # Default value for missing fields. If unset, we ignore missing fields.
|
88
|
-
# Implicitly enables flexible mode if set.
|
89
87
|
trim: :all, # Whether to trim whitespace. Options are :all, :headers, or :fields (default: nil)
|
90
88
|
buffer_size: 1024, # Number of rows to buffer in memory (default: 1024)
|
91
89
|
ignore_null_bytes: false, # Boolean specifying if null bytes should be ignored (default: false)
|
90
|
+
lossy: false, # Boolean specifying if invalid UTF-8 characters should be replaced with a replacement character (default: false)
|
92
91
|
)
|
93
92
|
```
|
94
93
|
|
@@ -103,9 +102,9 @@ OSV.for_each("data.csv",
|
|
103
102
|
- `buffer_size`: Integer specifying the number of rows to buffer in memory (default: 1024)
|
104
103
|
- `result_type`: String specifying the output format ("hash" or "array" or :hash or :array)
|
105
104
|
- `flexible`: Boolean specifying if the parser should be flexible (default: false)
|
106
|
-
- `flexible_default`: String specifying the default value for missing fields. Implicitly enables flexible mode if set. (default: `nil`)
|
107
105
|
- `trim`: String specifying the trim mode ("all" or "headers" or "fields" or :all or :headers or :fields)
|
108
106
|
- `ignore_null_bytes`: Boolean specifying if null bytes should be ignored (default: false)
|
107
|
+
- `lossy`: Boolean specifying if invalid UTF-8 characters should be replaced with a replacement character (default: false)
|
109
108
|
|
110
109
|
When `has_headers` is false, hash keys will be generated as `"c0"`, `"c1"`, etc.
|
111
110
|
|
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -79,9 +79,9 @@ pub struct RecordReaderBuilder<'a, T: RecordParser<'a>> {
|
|
79
79
|
quote_char: u8,
|
80
80
|
null_string: Option<String>,
|
81
81
|
flexible: bool,
|
82
|
-
flexible_default: Option<String>,
|
83
82
|
trim: csv::Trim,
|
84
83
|
ignore_null_bytes: bool,
|
84
|
+
lossy: bool,
|
85
85
|
_phantom: PhantomData<T>,
|
86
86
|
_phantom_a: PhantomData<&'a ()>,
|
87
87
|
}
|
@@ -97,9 +97,9 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
97
97
|
quote_char: b'"',
|
98
98
|
null_string: None,
|
99
99
|
flexible: false,
|
100
|
-
flexible_default: None,
|
101
100
|
trim: csv::Trim::None,
|
102
101
|
ignore_null_bytes: false,
|
102
|
+
lossy: false,
|
103
103
|
_phantom: PhantomData,
|
104
104
|
_phantom_a: PhantomData,
|
105
105
|
}
|
@@ -140,13 +140,6 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
140
140
|
self
|
141
141
|
}
|
142
142
|
|
143
|
-
/// Sets the default value for missing fields when in flexible mode.
|
144
|
-
#[must_use]
|
145
|
-
pub fn flexible_default(mut self, flexible_default: Option<String>) -> Self {
|
146
|
-
self.flexible_default = flexible_default;
|
147
|
-
self
|
148
|
-
}
|
149
|
-
|
150
143
|
/// Sets the trimming mode for fields.
|
151
144
|
#[must_use]
|
152
145
|
pub fn trim(mut self, trim: csv::Trim) -> Self {
|
@@ -160,6 +153,12 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
160
153
|
self
|
161
154
|
}
|
162
155
|
|
156
|
+
#[must_use]
|
157
|
+
pub fn lossy(mut self, lossy: bool) -> Self {
|
158
|
+
self.lossy = lossy;
|
159
|
+
self
|
160
|
+
}
|
161
|
+
|
163
162
|
/// Handles reading from a file descriptor.
|
164
163
|
fn handle_file_descriptor(&self) -> Result<Box<dyn SeekableRead>, ReaderError> {
|
165
164
|
let raw_value = self.to_read.as_raw();
|
@@ -202,7 +201,7 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
202
201
|
build_ruby_reader(&self.ruby, self.to_read)?
|
203
202
|
};
|
204
203
|
|
205
|
-
let flexible = self.flexible
|
204
|
+
let flexible = self.flexible;
|
206
205
|
let reader = BufReader::with_capacity(READ_BUFFER_SIZE, readable);
|
207
206
|
|
208
207
|
let mut reader = csv::ReaderBuilder::new()
|
@@ -220,18 +219,6 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
220
219
|
}
|
221
220
|
let static_headers = StringCache::intern_many(&headers)?;
|
222
221
|
|
223
|
-
// We intern both of these to get static string references we can reuse throughout the parser.
|
224
|
-
let flexible_default = self
|
225
|
-
.flexible_default
|
226
|
-
.map(|s| {
|
227
|
-
RString::new(&s)
|
228
|
-
.to_interned_str()
|
229
|
-
.as_str()
|
230
|
-
.map_err(|e| ReaderError::InvalidFlexibleDefault(format!("{:?}", e)))
|
231
|
-
})
|
232
|
-
.transpose()?
|
233
|
-
.map(Cow::Borrowed);
|
234
|
-
|
235
222
|
let null_string = self
|
236
223
|
.null_string
|
237
224
|
.map(|s| {
|
@@ -247,8 +234,8 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
247
234
|
reader,
|
248
235
|
static_headers,
|
249
236
|
null_string,
|
250
|
-
flexible_default,
|
251
237
|
self.ignore_null_bytes,
|
238
|
+
self.lossy,
|
252
239
|
))
|
253
240
|
}
|
254
241
|
}
|
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -5,14 +5,18 @@ use std::hash::BuildHasher;
|
|
5
5
|
use super::header_cache::StringCacheKey;
|
6
6
|
use super::CowStr;
|
7
7
|
|
8
|
+
pub enum CsvRecordType {
|
9
|
+
String(csv::StringRecord),
|
10
|
+
Byte(csv::ByteRecord),
|
11
|
+
}
|
12
|
+
|
8
13
|
pub trait RecordParser<'a> {
|
9
14
|
type Output;
|
10
15
|
|
11
16
|
fn parse(
|
12
17
|
headers: &[StringCacheKey],
|
13
|
-
record: &
|
18
|
+
record: &CsvRecordType,
|
14
19
|
null_string: Option<Cow<'a, str>>,
|
15
|
-
flexible_default: Option<Cow<'a, str>>,
|
16
20
|
ignore_null_bytes: bool,
|
17
21
|
) -> Self::Output;
|
18
22
|
}
|
@@ -25,20 +29,18 @@ impl<'a, S: BuildHasher + Default> RecordParser<'a>
|
|
25
29
|
#[inline]
|
26
30
|
fn parse(
|
27
31
|
headers: &[StringCacheKey],
|
28
|
-
record: &
|
32
|
+
record: &CsvRecordType,
|
29
33
|
null_string: Option<Cow<'a, str>>,
|
30
|
-
flexible_default: Option<Cow<'a, str>>,
|
31
34
|
ignore_null_bytes: bool,
|
32
35
|
) -> Self::Output {
|
33
36
|
let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
|
34
37
|
|
35
38
|
let shared_empty = Cow::Borrowed("");
|
36
|
-
|
39
|
+
|
37
40
|
headers.iter().enumerate().for_each(|(i, header)| {
|
38
|
-
let value = record
|
39
|
-
|
40
|
-
|
41
|
-
if null_string.as_deref() == Some(field) {
|
41
|
+
let value = match record {
|
42
|
+
CsvRecordType::String(s) => s.get(i).and_then(|field| {
|
43
|
+
if null_string.as_deref() == Some(field.as_ref()) {
|
42
44
|
None
|
43
45
|
} else if field.is_empty() {
|
44
46
|
Some(CowStr(shared_empty.clone()))
|
@@ -47,8 +49,22 @@ impl<'a, S: BuildHasher + Default> RecordParser<'a>
|
|
47
49
|
} else {
|
48
50
|
Some(CowStr(Cow::Owned(field.to_string())))
|
49
51
|
}
|
50
|
-
},
|
51
|
-
|
52
|
+
}),
|
53
|
+
|
54
|
+
CsvRecordType::Byte(b) => b.get(i).and_then(|field| {
|
55
|
+
let field = String::from_utf8_lossy(field);
|
56
|
+
if null_string.as_deref() == Some(field.as_ref()) {
|
57
|
+
None
|
58
|
+
} else if field.is_empty() {
|
59
|
+
Some(CowStr(shared_empty.clone()))
|
60
|
+
} else if ignore_null_bytes {
|
61
|
+
Some(CowStr(Cow::Owned(field.replace("\0", ""))))
|
62
|
+
} else {
|
63
|
+
Some(CowStr(Cow::Owned(field.to_string())))
|
64
|
+
}
|
65
|
+
}),
|
66
|
+
};
|
67
|
+
|
52
68
|
map.insert(*header, value);
|
53
69
|
});
|
54
70
|
map
|
@@ -61,35 +77,47 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
|
|
61
77
|
#[inline]
|
62
78
|
fn parse(
|
63
79
|
headers: &[StringCacheKey],
|
64
|
-
record: &
|
80
|
+
record: &CsvRecordType,
|
65
81
|
null_string: Option<Cow<'a, str>>,
|
66
|
-
flexible_default: Option<Cow<'a, str>>,
|
67
82
|
ignore_null_bytes: bool,
|
68
83
|
) -> Self::Output {
|
69
84
|
let target_len = headers.len();
|
70
85
|
let mut vec = Vec::with_capacity(target_len);
|
71
86
|
|
72
87
|
let shared_empty = Cow::Borrowed("");
|
73
|
-
let shared_default = flexible_default.map(CowStr);
|
74
|
-
|
75
|
-
for field in record.iter() {
|
76
|
-
let value = if Some(field) == null_string.as_deref() {
|
77
|
-
None
|
78
|
-
} else if field.is_empty() {
|
79
|
-
Some(CowStr(shared_empty.clone()))
|
80
|
-
} else if ignore_null_bytes {
|
81
|
-
Some(CowStr(Cow::Owned(field.replace("\0", ""))))
|
82
|
-
} else {
|
83
|
-
Some(CowStr(Cow::Owned(field.to_string())))
|
84
|
-
};
|
85
|
-
vec.push(value);
|
86
|
-
}
|
87
88
|
|
88
|
-
|
89
|
-
|
90
|
-
|
89
|
+
match record {
|
90
|
+
CsvRecordType::String(record) => {
|
91
|
+
for field in record.iter() {
|
92
|
+
let value = if Some(field.as_ref()) == null_string.as_deref() {
|
93
|
+
None
|
94
|
+
} else if field.is_empty() {
|
95
|
+
Some(CowStr(shared_empty.clone()))
|
96
|
+
} else if ignore_null_bytes {
|
97
|
+
Some(CowStr(Cow::Owned(field.replace("\0", ""))))
|
98
|
+
} else {
|
99
|
+
Some(CowStr(Cow::Owned(field.to_string())))
|
100
|
+
};
|
101
|
+
vec.push(value);
|
102
|
+
}
|
103
|
+
}
|
104
|
+
CsvRecordType::Byte(record) => {
|
105
|
+
for field in record.iter() {
|
106
|
+
let field = String::from_utf8_lossy(field);
|
107
|
+
let value = if Some(field.as_ref()) == null_string.as_deref() {
|
108
|
+
None
|
109
|
+
} else if field.is_empty() {
|
110
|
+
Some(CowStr(shared_empty.clone()))
|
111
|
+
} else if ignore_null_bytes {
|
112
|
+
Some(CowStr(Cow::Owned(field.replace("\0", ""))))
|
113
|
+
} else {
|
114
|
+
Some(CowStr(Cow::Owned(field.to_string())))
|
115
|
+
};
|
116
|
+
vec.push(value);
|
117
|
+
}
|
91
118
|
}
|
92
119
|
}
|
120
|
+
|
93
121
|
vec
|
94
122
|
}
|
95
123
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
use super::builder::ReaderError;
|
2
2
|
use super::header_cache::StringCacheKey;
|
3
|
-
use super::parser::RecordParser;
|
3
|
+
use super::parser::{CsvRecordType, RecordParser};
|
4
4
|
use super::{header_cache::StringCache, ruby_reader::SeekableRead};
|
5
5
|
use magnus::{Error, Ruby};
|
6
6
|
use std::borrow::Cow;
|
@@ -16,8 +16,7 @@ pub struct RecordReader<'a, T: RecordParser<'a>> {
|
|
16
16
|
reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
|
17
17
|
headers: Vec<StringCacheKey>,
|
18
18
|
null_string: Option<Cow<'a, str>>,
|
19
|
-
|
20
|
-
string_record: csv::StringRecord,
|
19
|
+
string_record: CsvRecordType,
|
21
20
|
parser: std::marker::PhantomData<T>,
|
22
21
|
ignore_null_bytes: bool,
|
23
22
|
}
|
@@ -57,16 +56,25 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
|
|
57
56
|
reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
|
58
57
|
headers: Vec<StringCacheKey>,
|
59
58
|
null_string: Option<Cow<'a, str>>,
|
60
|
-
flexible_default: Option<Cow<'a, str>>,
|
61
59
|
ignore_null_bytes: bool,
|
60
|
+
lossy: bool,
|
62
61
|
) -> Self {
|
63
62
|
let headers_len = headers.len();
|
64
63
|
Self {
|
65
64
|
reader,
|
66
65
|
headers,
|
67
66
|
null_string,
|
68
|
-
|
69
|
-
|
67
|
+
string_record: if lossy {
|
68
|
+
CsvRecordType::Byte(csv::ByteRecord::with_capacity(
|
69
|
+
READ_BUFFER_SIZE,
|
70
|
+
headers_len,
|
71
|
+
))
|
72
|
+
} else {
|
73
|
+
CsvRecordType::String(csv::StringRecord::with_capacity(
|
74
|
+
READ_BUFFER_SIZE,
|
75
|
+
headers_len,
|
76
|
+
))
|
77
|
+
},
|
70
78
|
parser: std::marker::PhantomData,
|
71
79
|
ignore_null_bytes,
|
72
80
|
}
|
@@ -74,12 +82,15 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
|
|
74
82
|
|
75
83
|
/// Attempts to read the next record, returning any errors encountered.
|
76
84
|
fn try_next(&mut self) -> Result<Option<T::Output>, ReaderError> {
|
77
|
-
|
85
|
+
let record = match self.string_record {
|
86
|
+
CsvRecordType::String(ref mut record) => self.reader.read_record(record),
|
87
|
+
CsvRecordType::Byte(ref mut record) => self.reader.read_byte_record(record),
|
88
|
+
}?;
|
89
|
+
if record {
|
78
90
|
Ok(Some(T::parse(
|
79
91
|
&self.headers,
|
80
92
|
&self.string_record,
|
81
93
|
self.null_string.clone(),
|
82
|
-
self.flexible_default.clone(),
|
83
94
|
self.ignore_null_bytes,
|
84
95
|
)))
|
85
96
|
} else {
|
data/ext/osv/src/reader.rs
CHANGED
@@ -34,9 +34,9 @@ struct EnumeratorArgs {
|
|
34
34
|
null_string: Option<String>,
|
35
35
|
result_type: String,
|
36
36
|
flexible: bool,
|
37
|
-
flexible_default: Option<String>,
|
38
37
|
trim: Option<String>,
|
39
38
|
ignore_null_bytes: bool,
|
39
|
+
lossy: bool,
|
40
40
|
}
|
41
41
|
|
42
42
|
/// Parses a CSV file with the given configuration.
|
@@ -56,9 +56,9 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
|
|
56
56
|
null_string,
|
57
57
|
result_type,
|
58
58
|
flexible,
|
59
|
-
flexible_default,
|
60
59
|
trim,
|
61
60
|
ignore_null_bytes,
|
61
|
+
lossy,
|
62
62
|
} = parse_read_csv_args(&ruby, args)?;
|
63
63
|
|
64
64
|
if !ruby.block_given() {
|
@@ -71,7 +71,6 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
|
|
71
71
|
null_string,
|
72
72
|
result_type,
|
73
73
|
flexible,
|
74
|
-
flexible_default,
|
75
74
|
trim: match trim {
|
76
75
|
Trim::All => Some("all".to_string()),
|
77
76
|
Trim::Headers => Some("headers".to_string()),
|
@@ -79,6 +78,7 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
|
|
79
78
|
_ => None,
|
80
79
|
},
|
81
80
|
ignore_null_bytes,
|
81
|
+
lossy,
|
82
82
|
})
|
83
83
|
.map(|yield_enum| yield_enum.into_value_with(&ruby));
|
84
84
|
}
|
@@ -97,12 +97,12 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
|
|
97
97
|
>::new(ruby, to_read)
|
98
98
|
.has_headers(has_headers)
|
99
99
|
.flexible(flexible)
|
100
|
-
.flexible_default(flexible_default)
|
101
100
|
.trim(trim)
|
102
101
|
.delimiter(delimiter)
|
103
102
|
.quote_char(quote_char)
|
104
103
|
.null_string(null_string)
|
105
104
|
.ignore_null_bytes(ignore_null_bytes)
|
105
|
+
.lossy(lossy)
|
106
106
|
.build()?;
|
107
107
|
|
108
108
|
let ruby = unsafe { Ruby::get_unchecked() };
|
@@ -115,12 +115,12 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
|
|
115
115
|
let builder = RecordReaderBuilder::<Vec<Option<CowStr<'static>>>>::new(ruby, to_read)
|
116
116
|
.has_headers(has_headers)
|
117
117
|
.flexible(flexible)
|
118
|
-
.flexible_default(flexible_default)
|
119
118
|
.trim(trim)
|
120
119
|
.delimiter(delimiter)
|
121
120
|
.quote_char(quote_char)
|
122
121
|
.null_string(null_string)
|
123
122
|
.ignore_null_bytes(ignore_null_bytes)
|
123
|
+
.lossy(lossy)
|
124
124
|
.build()?;
|
125
125
|
|
126
126
|
let ruby = unsafe { Ruby::get_unchecked() };
|
@@ -150,10 +150,9 @@ fn create_enumerator(args: EnumeratorArgs) -> Result<magnus::Enumerator, Error>
|
|
150
150
|
kwargs.aset(Symbol::new("nil_string"), args.null_string)?;
|
151
151
|
kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
|
152
152
|
kwargs.aset(Symbol::new("flexible"), args.flexible)?;
|
153
|
-
kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
|
154
153
|
kwargs.aset(Symbol::new("trim"), args.trim.map(Symbol::new))?;
|
155
154
|
kwargs.aset(Symbol::new("ignore_null_bytes"), args.ignore_null_bytes)?;
|
156
|
-
|
155
|
+
kwargs.aset(Symbol::new("lossy"), args.lossy)?;
|
157
156
|
Ok(args
|
158
157
|
.rb_self
|
159
158
|
.enumeratorize("for_each", (args.to_read, KwArgs(kwargs))))
|
data/ext/osv/src/utils.rs
CHANGED
@@ -34,9 +34,9 @@ pub struct ReadCsvArgs {
|
|
34
34
|
pub null_string: Option<String>,
|
35
35
|
pub result_type: String,
|
36
36
|
pub flexible: bool,
|
37
|
-
pub flexible_default: Option<String>,
|
38
37
|
pub trim: csv::Trim,
|
39
38
|
pub ignore_null_bytes: bool,
|
39
|
+
pub lossy: bool,
|
40
40
|
}
|
41
41
|
|
42
42
|
/// Parse common arguments for CSV parsing
|
@@ -54,9 +54,9 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
54
54
|
Option<Option<String>>,
|
55
55
|
Option<Option<Value>>,
|
56
56
|
Option<Option<bool>>,
|
57
|
-
Option<Option<Option<String>>>,
|
58
57
|
Option<Option<Value>>,
|
59
58
|
Option<Option<bool>>,
|
59
|
+
Option<Option<bool>>,
|
60
60
|
),
|
61
61
|
(),
|
62
62
|
>(
|
@@ -69,9 +69,9 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
69
69
|
"nil_string",
|
70
70
|
"result_type",
|
71
71
|
"flexible",
|
72
|
-
"flexible_default",
|
73
72
|
"trim",
|
74
73
|
"ignore_null_bytes",
|
74
|
+
"lossy",
|
75
75
|
],
|
76
76
|
)?;
|
77
77
|
|
@@ -134,11 +134,9 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
134
134
|
|
135
135
|
let flexible = kwargs.optional.5.flatten().unwrap_or_default();
|
136
136
|
|
137
|
-
let flexible_default = kwargs.optional.6.flatten().unwrap_or_default();
|
138
|
-
|
139
137
|
let trim = match kwargs
|
140
138
|
.optional
|
141
|
-
.
|
139
|
+
.6
|
142
140
|
.flatten()
|
143
141
|
.map(|value| parse_string_or_symbol(ruby, value))
|
144
142
|
{
|
@@ -166,7 +164,9 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
166
164
|
None => csv::Trim::None,
|
167
165
|
};
|
168
166
|
|
169
|
-
let ignore_null_bytes = kwargs.optional.
|
167
|
+
let ignore_null_bytes = kwargs.optional.7.flatten().unwrap_or_default();
|
168
|
+
|
169
|
+
let lossy = kwargs.optional.8.flatten().unwrap_or_default();
|
170
170
|
|
171
171
|
Ok(ReadCsvArgs {
|
172
172
|
to_read,
|
@@ -176,8 +176,8 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
176
176
|
null_string,
|
177
177
|
result_type,
|
178
178
|
flexible,
|
179
|
-
flexible_default,
|
180
179
|
trim,
|
181
180
|
ignore_null_bytes,
|
181
|
+
lossy,
|
182
182
|
})
|
183
183
|
}
|
data/lib/osv/version.rb
CHANGED
data/lib/osv.rbi
CHANGED
@@ -17,14 +17,12 @@ module OSV
|
|
17
17
|
# ("hash" or "array" or :hash or :array)
|
18
18
|
# - `flexible`: Boolean specifying if the parser should be flexible
|
19
19
|
# (default: false)
|
20
|
-
# - `flexible_default`: String specifying the default value for missing fields.
|
21
|
-
# Implicitly enables flexible mode if set.
|
22
|
-
# (default: `nil`)
|
23
20
|
# - `trim`: String specifying the trim mode
|
24
21
|
# ("all" or "headers" or "fields" or :all or :headers or :fields)
|
25
22
|
# (default: `nil`)
|
26
23
|
# - `ignore_null_bytes`: Boolean specifying if null bytes should be ignored
|
27
24
|
# (default: false)
|
25
|
+
# - `lossy`: Boolean specifying if invalid UTF-8 characters should be replaced with a replacement character
|
28
26
|
sig do
|
29
27
|
params(
|
30
28
|
input: T.any(String, StringIO, IO),
|
@@ -35,7 +33,6 @@ module OSV
|
|
35
33
|
buffer_size: T.nilable(Integer),
|
36
34
|
result_type: T.nilable(T.any(String, Symbol)),
|
37
35
|
flexible: T.nilable(T::Boolean),
|
38
|
-
flexible_default: T.nilable(String),
|
39
36
|
ignore_null_bytes: T.nilable(T::Boolean),
|
40
37
|
trim: T.nilable(T.any(String, Symbol)),
|
41
38
|
blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.nilable(String)], T::Array[T.nilable(String)])).void)
|
@@ -50,9 +47,9 @@ module OSV
|
|
50
47
|
buffer_size: nil,
|
51
48
|
result_type: nil,
|
52
49
|
flexible: nil,
|
53
|
-
flexible_default: nil,
|
54
50
|
ignore_null_bytes: nil,
|
55
51
|
trim: nil,
|
52
|
+
lossy: nil,
|
56
53
|
&blk
|
57
54
|
)
|
58
55
|
end
|