osv 0.3.6 → 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6d51b65a56cd4a514b4e7746bc87d78be5abe033b390251d0e111ad4ad30813c
4
- data.tar.gz: 59f4377530f2e5e54649b4c35a0ca78448f3a965279b358810ea9fcc17ddc5a8
3
+ metadata.gz: 02205de8cef4d5f7633c06720a9e925a2b608116354da4a1678d4746d2197d23
4
+ data.tar.gz: 3e1d63323fdaad1b6a60e0a0a63801f98710615d6616c882f0cdce00e36c6e2e
5
5
  SHA512:
6
- metadata.gz: 397350596a09659e20fc9db840006649687db469cefcd5be09965ba147cc688957316cb9675d434594c3a19aa66efd2c734a41c71235dcc8d0b445c24e42f1a2
7
- data.tar.gz: c225c5d016026a21afe8e90bf5a89e0465304236899d367b6484757073beefb1b75bb7381c37834bc2e344d1ee259d05e9ff82bba3d8214de098b3cf53eb571c
6
+ metadata.gz: df6a4a4b86c41010ea671ac0e98c2ee6307e62ceff35dab125868f0ee7edb6d14984348ecd4ac9f913489e5a6be0b364240b461334554385aabe5b3374fe798d
7
+ data.tar.gz: d931b888ce9d0ad1cdb1fa3d0be8cd0e526292206742f5adde718f414e9feca97eff3af6d4139d144c18a50e4807650ea9f7582153bcee80cea1e6ed4ce4ef49
data/README.md CHANGED
@@ -71,6 +71,8 @@ Both methods support the following options:
71
71
  - if you want to interpret empty strings as nil, set this to an empty string
72
72
  - `buffer_size`: Integer specifying the read buffer size
73
73
  - `result_type`: String specifying the output format ("hash" or "array")
74
+ - `flexible`: Boolean specifying if the parser should be flexible (default: false)
75
+ - `flexible_default`: String specifying the default value for missing fields. Implicitly enables flexible mode if set. (default: `nil`)
74
76
 
75
77
  ### Input Sources
76
78
 
@@ -54,6 +54,8 @@ pub struct RecordReaderBuilder<'a, T: RecordParser + Send + 'static> {
54
54
  quote_char: u8,
55
55
  null_string: Option<String>,
56
56
  buffer: usize,
57
+ flexible: bool,
58
+ flexible_default: Option<String>,
57
59
  _phantom: PhantomData<T>,
58
60
  }
59
61
 
@@ -67,6 +69,8 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
67
69
  quote_char: b'"',
68
70
  null_string: None,
69
71
  buffer: 1000,
72
+ flexible: false,
73
+ flexible_default: None,
70
74
  _phantom: PhantomData,
71
75
  }
72
76
  }
@@ -96,6 +100,16 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
96
100
  self
97
101
  }
98
102
 
103
+ pub fn flexible(mut self, flexible: bool) -> Self {
104
+ self.flexible = flexible;
105
+ self
106
+ }
107
+
108
+ pub fn flexible_default(mut self, flexible_default: Option<String>) -> Self {
109
+ self.flexible_default = flexible_default;
110
+ self
111
+ }
112
+
99
113
  fn handle_string_io(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
100
114
  let string: RString = self.to_read.funcall("string", ())?;
101
115
  let content = string.to_string()?;
@@ -173,10 +187,12 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
173
187
  self,
174
188
  readable: Box<dyn Read + Send + 'static>,
175
189
  ) -> Result<RecordReader<T>, ReaderError> {
190
+ let flexible = self.flexible || self.flexible_default.is_some();
176
191
  let mut reader = csv::ReaderBuilder::new()
177
192
  .has_headers(self.has_headers)
178
193
  .delimiter(self.delimiter)
179
194
  .quote(self.quote_char)
195
+ .flexible(flexible)
180
196
  .from_reader(readable);
181
197
 
182
198
  let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
@@ -186,10 +202,16 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
186
202
  let (sender, receiver) = kanal::bounded(self.buffer);
187
203
  let null_string = self.null_string.clone();
188
204
 
205
+ let flexible_default = self.flexible_default.clone();
189
206
  let handle = thread::spawn(move || {
190
207
  let mut record = csv::StringRecord::new();
191
208
  while let Ok(true) = reader.read_record(&mut record) {
192
- let row = T::parse(&static_headers, &record, null_string.as_deref());
209
+ let row = T::parse(
210
+ &static_headers,
211
+ &record,
212
+ null_string.as_deref(),
213
+ flexible_default.as_deref(),
214
+ );
193
215
  if sender.send(row).is_err() {
194
216
  break;
195
217
  }
@@ -211,10 +233,12 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
211
233
  self,
212
234
  readable: Box<dyn Read>,
213
235
  ) -> Result<RecordReader<T>, ReaderError> {
236
+ let flexible = self.flexible || self.flexible_default.is_some();
214
237
  let mut reader = csv::ReaderBuilder::new()
215
238
  .has_headers(self.has_headers)
216
239
  .delimiter(self.delimiter)
217
240
  .quote(self.quote_char)
241
+ .flexible(flexible)
218
242
  .from_reader(readable);
219
243
 
220
244
  let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
@@ -225,6 +249,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
225
249
  reader,
226
250
  headers: static_headers,
227
251
  null_string: self.null_string,
252
+ flexible_default: self.flexible_default,
228
253
  },
229
254
  })
230
255
  }
@@ -7,6 +7,7 @@ pub trait RecordParser {
7
7
  headers: &[&'static str],
8
8
  record: &csv::StringRecord,
9
9
  null_string: Option<&str>,
10
+ flexible_default: Option<&str>,
10
11
  ) -> Self::Output;
11
12
  }
12
13
 
@@ -18,26 +19,24 @@ impl RecordParser for HashMap<&'static str, Option<String>> {
18
19
  headers: &[&'static str],
19
20
  record: &csv::StringRecord,
20
21
  null_string: Option<&str>,
22
+ flexible_default: Option<&str>,
21
23
  ) -> Self::Output {
22
24
  let mut map = HashMap::with_capacity(headers.len());
23
- headers
24
- .iter()
25
- .zip(record.iter())
26
- .for_each(|(header, field)| {
27
- map.insert(
28
- *header,
25
+ headers.iter().enumerate().for_each(|(i, header)| {
26
+ let value = record.get(i).map_or_else(
27
+ || flexible_default.map(|s| s.to_string()),
28
+ |field| {
29
29
  if null_string == Some(field) {
30
30
  None
31
+ } else if field.is_empty() {
32
+ Some(String::new())
31
33
  } else {
32
- // Avoid allocating for empty strings
33
- if field.is_empty() {
34
- Some(String::new())
35
- } else {
36
- Some(field.to_string())
37
- }
38
- },
39
- );
40
- });
34
+ Some(field.to_string())
35
+ }
36
+ },
37
+ );
38
+ map.insert(*header, value);
39
+ });
41
40
  map
42
41
  }
43
42
  }
@@ -47,23 +46,29 @@ impl RecordParser for Vec<Option<String>> {
47
46
 
48
47
  #[inline]
49
48
  fn parse(
50
- _headers: &[&'static str],
49
+ headers: &[&'static str],
51
50
  record: &csv::StringRecord,
52
51
  null_string: Option<&str>,
52
+ flexible_default: Option<&str>,
53
53
  ) -> Self::Output {
54
- let mut vec = Vec::with_capacity(record.len());
54
+ let target_len = headers.len();
55
+ let mut vec = Vec::with_capacity(target_len);
55
56
  vec.extend(record.iter().map(|field| {
56
57
  if null_string == Some(field) {
57
58
  None
59
+ } else if field.is_empty() {
60
+ Some(String::new())
58
61
  } else {
59
- // Avoid allocating for empty strings
60
- if field.is_empty() {
61
- Some(String::new())
62
- } else {
63
- Some(field.to_string())
64
- }
62
+ Some(field.to_string())
65
63
  }
66
64
  }));
65
+
66
+ // Fill remaining slots with flexible_default if needed
67
+ if let Some(default) = flexible_default {
68
+ while vec.len() < target_len {
69
+ vec.push(Some(default.to_string()));
70
+ }
71
+ }
67
72
  vec
68
73
  }
69
74
  }
@@ -6,6 +6,7 @@ pub enum ReadImpl<T: RecordParser> {
6
6
  reader: csv::Reader<Box<dyn Read>>,
7
7
  headers: Vec<&'static str>,
8
8
  null_string: Option<String>,
9
+ flexible_default: Option<String>,
9
10
  },
10
11
  MultiThreaded {
11
12
  headers: Vec<&'static str>,
@@ -33,10 +34,16 @@ impl<T: RecordParser> ReadImpl<T> {
33
34
  reader,
34
35
  headers,
35
36
  null_string,
37
+ flexible_default,
36
38
  } => {
37
39
  let mut record = csv::StringRecord::new();
38
40
  match reader.read_record(&mut record) {
39
- Ok(true) => Some(T::parse(headers, &record, null_string.as_deref())),
41
+ Ok(true) => Some(T::parse(
42
+ headers,
43
+ &record,
44
+ null_string.as_deref(),
45
+ flexible_default.as_deref(),
46
+ )),
40
47
  _ => None,
41
48
  }
42
49
  }
@@ -50,6 +57,7 @@ impl<T: RecordParser> ReadImpl<T> {
50
57
  receiver,
51
58
  handle,
52
59
  headers,
60
+ ..
53
61
  } => {
54
62
  receiver.close();
55
63
  if let Some(handle) = handle.take() {
@@ -18,6 +18,8 @@ pub fn parse_csv(
18
18
  null_string,
19
19
  buffer_size,
20
20
  result_type,
21
+ flexible,
22
+ flexible_default,
21
23
  } = parse_csv_args(&ruby, args)?;
22
24
 
23
25
  if !ruby.block_given() {
@@ -30,6 +32,8 @@ pub fn parse_csv(
30
32
  null_string,
31
33
  buffer_size,
32
34
  result_type,
35
+ flexible,
36
+ flexible_default,
33
37
  });
34
38
  }
35
39
 
@@ -37,6 +41,8 @@ pub fn parse_csv(
37
41
  "hash" => Box::new(
38
42
  RecordReaderBuilder::<HashMap<&'static str, Option<String>>>::new(&ruby, to_read)
39
43
  .has_headers(has_headers)
44
+ .flexible(flexible)
45
+ .flexible_default(flexible_default)
40
46
  .delimiter(delimiter)
41
47
  .quote_char(quote_char)
42
48
  .null_string(null_string)
@@ -47,6 +53,8 @@ pub fn parse_csv(
47
53
  "array" => Box::new(
48
54
  RecordReaderBuilder::<Vec<Option<String>>>::new(&ruby, to_read)
49
55
  .has_headers(has_headers)
56
+ .flexible(flexible)
57
+ .flexible_default(flexible_default)
50
58
  .delimiter(delimiter)
51
59
  .quote_char(quote_char)
52
60
  .null_string(null_string)
@@ -74,6 +82,8 @@ struct EnumeratorArgs {
74
82
  null_string: Option<String>,
75
83
  buffer_size: usize,
76
84
  result_type: String,
85
+ flexible: bool,
86
+ flexible_default: Option<String>,
77
87
  }
78
88
 
79
89
  fn create_enumerator(
@@ -92,7 +102,8 @@ fn create_enumerator(
92
102
  kwargs.aset(Symbol::new("nil_string"), args.null_string)?;
93
103
  kwargs.aset(Symbol::new("buffer_size"), args.buffer_size)?;
94
104
  kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
95
-
105
+ kwargs.aset(Symbol::new("flexible"), args.flexible)?;
106
+ kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
96
107
  let enumerator = args
97
108
  .rb_self
98
109
  .enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
data/ext/osv/src/utils.rs CHANGED
@@ -13,6 +13,8 @@ pub struct CsvArgs {
13
13
  pub null_string: Option<String>,
14
14
  pub buffer_size: usize,
15
15
  pub result_type: String,
16
+ pub flexible: bool,
17
+ pub flexible_default: Option<String>,
16
18
  }
17
19
 
18
20
  /// Parse common arguments for CSV parsing
@@ -30,6 +32,8 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
30
32
  Option<Option<String>>,
31
33
  Option<usize>,
32
34
  Option<Value>,
35
+ Option<bool>,
36
+ Option<Option<String>>,
33
37
  ),
34
38
  (),
35
39
  >(
@@ -42,6 +46,8 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
42
46
  "nil_string",
43
47
  "buffer_size",
44
48
  "result_type",
49
+ "flexible",
50
+ "flexible_default",
45
51
  ],
46
52
  )?;
47
53
 
@@ -111,6 +117,10 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
111
117
  None => String::from("hash"),
112
118
  };
113
119
 
120
+ let flexible = kwargs.optional.6.unwrap_or_default();
121
+
122
+ let flexible_default = kwargs.optional.7.unwrap_or_default();
123
+
114
124
  Ok(CsvArgs {
115
125
  to_read,
116
126
  has_headers,
@@ -119,5 +129,7 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
119
129
  null_string,
120
130
  buffer_size,
121
131
  result_type,
132
+ flexible,
133
+ flexible_default,
122
134
  })
123
135
  }
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.3.6"
2
+ VERSION = "0.3.8"
3
3
  end
data/lib/osv.rbi CHANGED
@@ -1,6 +1,25 @@
1
1
  # typed: strict
2
2
 
3
3
  module OSV
4
+ # Options:
5
+ # - `has_headers`: Boolean indicating if the first row contains headers
6
+ # (default: true)
7
+ # - `col_sep`: String specifying the field separator
8
+ # (default: ",")
9
+ # - `quote_char`: String specifying the quote character
10
+ # (default: "\"")
11
+ # - `nil_string`: String that should be interpreted as nil
12
+ # By default, empty strings are interpreted as empty strings.
13
+ # If you want to interpret empty strings as nil, set this to
14
+ # an empty string.
15
+ # - `buffer_size`: Integer specifying the read buffer size
16
+ # - `result_type`: String specifying the output format
17
+ # ("hash" or "array")
18
+ # - `flexible`: Boolean specifying if the parser should be flexible
19
+ # (default: false)
20
+ # - `flexible_default`: String specifying the default value for missing fields.
21
+ # Implicitly enables flexible mode if set.
22
+ # (default: `nil`)
4
23
  sig do
5
24
  params(
6
25
  input: T.any(String, StringIO, IO),
@@ -10,6 +29,8 @@ module OSV
10
29
  nil_string: T.nilable(String),
11
30
  buffer_size: T.nilable(Integer),
12
31
  result_type: T.nilable(String),
32
+ flexible: T.nilable(T::Boolean),
33
+ flexible_default: T.nilable(String),
13
34
  blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.nilable(String)], T::Array[T.nilable(String)])).void)
14
35
  ).returns(T.any(Enumerator, T.untyped))
15
36
  end
@@ -21,6 +42,8 @@ module OSV
21
42
  nil_string: nil,
22
43
  buffer_size: nil,
23
44
  result_type: nil,
45
+ flexible: nil,
46
+ flexible_default: nil,
24
47
  &blk
25
48
  )
26
49
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.3.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko