osv 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6d51b65a56cd4a514b4e7746bc87d78be5abe033b390251d0e111ad4ad30813c
4
- data.tar.gz: 59f4377530f2e5e54649b4c35a0ca78448f3a965279b358810ea9fcc17ddc5a8
3
+ metadata.gz: 02205de8cef4d5f7633c06720a9e925a2b608116354da4a1678d4746d2197d23
4
+ data.tar.gz: 3e1d63323fdaad1b6a60e0a0a63801f98710615d6616c882f0cdce00e36c6e2e
5
5
  SHA512:
6
- metadata.gz: 397350596a09659e20fc9db840006649687db469cefcd5be09965ba147cc688957316cb9675d434594c3a19aa66efd2c734a41c71235dcc8d0b445c24e42f1a2
7
- data.tar.gz: c225c5d016026a21afe8e90bf5a89e0465304236899d367b6484757073beefb1b75bb7381c37834bc2e344d1ee259d05e9ff82bba3d8214de098b3cf53eb571c
6
+ metadata.gz: df6a4a4b86c41010ea671ac0e98c2ee6307e62ceff35dab125868f0ee7edb6d14984348ecd4ac9f913489e5a6be0b364240b461334554385aabe5b3374fe798d
7
+ data.tar.gz: d931b888ce9d0ad1cdb1fa3d0be8cd0e526292206742f5adde718f414e9feca97eff3af6d4139d144c18a50e4807650ea9f7582153bcee80cea1e6ed4ce4ef49
data/README.md CHANGED
@@ -71,6 +71,8 @@ Both methods support the following options:
71
71
  - if you want to interpret empty strings as nil, set this to an empty string
72
72
  - `buffer_size`: Integer specifying the read buffer size
73
73
  - `result_type`: String specifying the output format ("hash" or "array")
74
+ - `flexible`: Boolean specifying if the parser should be flexible (default: false)
75
+ - `flexible_default`: String specifying the default value for missing fields. Implicitly enables flexible mode if set. (default: `nil`)
74
76
 
75
77
  ### Input Sources
76
78
 
@@ -54,6 +54,8 @@ pub struct RecordReaderBuilder<'a, T: RecordParser + Send + 'static> {
54
54
  quote_char: u8,
55
55
  null_string: Option<String>,
56
56
  buffer: usize,
57
+ flexible: bool,
58
+ flexible_default: Option<String>,
57
59
  _phantom: PhantomData<T>,
58
60
  }
59
61
 
@@ -67,6 +69,8 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
67
69
  quote_char: b'"',
68
70
  null_string: None,
69
71
  buffer: 1000,
72
+ flexible: false,
73
+ flexible_default: None,
70
74
  _phantom: PhantomData,
71
75
  }
72
76
  }
@@ -96,6 +100,16 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
96
100
  self
97
101
  }
98
102
 
103
+ pub fn flexible(mut self, flexible: bool) -> Self {
104
+ self.flexible = flexible;
105
+ self
106
+ }
107
+
108
+ pub fn flexible_default(mut self, flexible_default: Option<String>) -> Self {
109
+ self.flexible_default = flexible_default;
110
+ self
111
+ }
112
+
99
113
  fn handle_string_io(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
100
114
  let string: RString = self.to_read.funcall("string", ())?;
101
115
  let content = string.to_string()?;
@@ -173,10 +187,12 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
173
187
  self,
174
188
  readable: Box<dyn Read + Send + 'static>,
175
189
  ) -> Result<RecordReader<T>, ReaderError> {
190
+ let flexible = self.flexible || self.flexible_default.is_some();
176
191
  let mut reader = csv::ReaderBuilder::new()
177
192
  .has_headers(self.has_headers)
178
193
  .delimiter(self.delimiter)
179
194
  .quote(self.quote_char)
195
+ .flexible(flexible)
180
196
  .from_reader(readable);
181
197
 
182
198
  let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
@@ -186,10 +202,16 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
186
202
  let (sender, receiver) = kanal::bounded(self.buffer);
187
203
  let null_string = self.null_string.clone();
188
204
 
205
+ let flexible_default = self.flexible_default.clone();
189
206
  let handle = thread::spawn(move || {
190
207
  let mut record = csv::StringRecord::new();
191
208
  while let Ok(true) = reader.read_record(&mut record) {
192
- let row = T::parse(&static_headers, &record, null_string.as_deref());
209
+ let row = T::parse(
210
+ &static_headers,
211
+ &record,
212
+ null_string.as_deref(),
213
+ flexible_default.as_deref(),
214
+ );
193
215
  if sender.send(row).is_err() {
194
216
  break;
195
217
  }
@@ -211,10 +233,12 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
211
233
  self,
212
234
  readable: Box<dyn Read>,
213
235
  ) -> Result<RecordReader<T>, ReaderError> {
236
+ let flexible = self.flexible || self.flexible_default.is_some();
214
237
  let mut reader = csv::ReaderBuilder::new()
215
238
  .has_headers(self.has_headers)
216
239
  .delimiter(self.delimiter)
217
240
  .quote(self.quote_char)
241
+ .flexible(flexible)
218
242
  .from_reader(readable);
219
243
 
220
244
  let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
@@ -225,6 +249,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
225
249
  reader,
226
250
  headers: static_headers,
227
251
  null_string: self.null_string,
252
+ flexible_default: self.flexible_default,
228
253
  },
229
254
  })
230
255
  }
@@ -7,6 +7,7 @@ pub trait RecordParser {
7
7
  headers: &[&'static str],
8
8
  record: &csv::StringRecord,
9
9
  null_string: Option<&str>,
10
+ flexible_default: Option<&str>,
10
11
  ) -> Self::Output;
11
12
  }
12
13
 
@@ -18,26 +19,24 @@ impl RecordParser for HashMap<&'static str, Option<String>> {
18
19
  headers: &[&'static str],
19
20
  record: &csv::StringRecord,
20
21
  null_string: Option<&str>,
22
+ flexible_default: Option<&str>,
21
23
  ) -> Self::Output {
22
24
  let mut map = HashMap::with_capacity(headers.len());
23
- headers
24
- .iter()
25
- .zip(record.iter())
26
- .for_each(|(header, field)| {
27
- map.insert(
28
- *header,
25
+ headers.iter().enumerate().for_each(|(i, header)| {
26
+ let value = record.get(i).map_or_else(
27
+ || flexible_default.map(|s| s.to_string()),
28
+ |field| {
29
29
  if null_string == Some(field) {
30
30
  None
31
+ } else if field.is_empty() {
32
+ Some(String::new())
31
33
  } else {
32
- // Avoid allocating for empty strings
33
- if field.is_empty() {
34
- Some(String::new())
35
- } else {
36
- Some(field.to_string())
37
- }
38
- },
39
- );
40
- });
34
+ Some(field.to_string())
35
+ }
36
+ },
37
+ );
38
+ map.insert(*header, value);
39
+ });
41
40
  map
42
41
  }
43
42
  }
@@ -47,23 +46,29 @@ impl RecordParser for Vec<Option<String>> {
47
46
 
48
47
  #[inline]
49
48
  fn parse(
50
- _headers: &[&'static str],
49
+ headers: &[&'static str],
51
50
  record: &csv::StringRecord,
52
51
  null_string: Option<&str>,
52
+ flexible_default: Option<&str>,
53
53
  ) -> Self::Output {
54
- let mut vec = Vec::with_capacity(record.len());
54
+ let target_len = headers.len();
55
+ let mut vec = Vec::with_capacity(target_len);
55
56
  vec.extend(record.iter().map(|field| {
56
57
  if null_string == Some(field) {
57
58
  None
59
+ } else if field.is_empty() {
60
+ Some(String::new())
58
61
  } else {
59
- // Avoid allocating for empty strings
60
- if field.is_empty() {
61
- Some(String::new())
62
- } else {
63
- Some(field.to_string())
64
- }
62
+ Some(field.to_string())
65
63
  }
66
64
  }));
65
+
66
+ // Fill remaining slots with flexible_default if needed
67
+ if let Some(default) = flexible_default {
68
+ while vec.len() < target_len {
69
+ vec.push(Some(default.to_string()));
70
+ }
71
+ }
67
72
  vec
68
73
  }
69
74
  }
@@ -6,6 +6,7 @@ pub enum ReadImpl<T: RecordParser> {
6
6
  reader: csv::Reader<Box<dyn Read>>,
7
7
  headers: Vec<&'static str>,
8
8
  null_string: Option<String>,
9
+ flexible_default: Option<String>,
9
10
  },
10
11
  MultiThreaded {
11
12
  headers: Vec<&'static str>,
@@ -33,10 +34,16 @@ impl<T: RecordParser> ReadImpl<T> {
33
34
  reader,
34
35
  headers,
35
36
  null_string,
37
+ flexible_default,
36
38
  } => {
37
39
  let mut record = csv::StringRecord::new();
38
40
  match reader.read_record(&mut record) {
39
- Ok(true) => Some(T::parse(headers, &record, null_string.as_deref())),
41
+ Ok(true) => Some(T::parse(
42
+ headers,
43
+ &record,
44
+ null_string.as_deref(),
45
+ flexible_default.as_deref(),
46
+ )),
40
47
  _ => None,
41
48
  }
42
49
  }
@@ -50,6 +57,7 @@ impl<T: RecordParser> ReadImpl<T> {
50
57
  receiver,
51
58
  handle,
52
59
  headers,
60
+ ..
53
61
  } => {
54
62
  receiver.close();
55
63
  if let Some(handle) = handle.take() {
@@ -18,6 +18,8 @@ pub fn parse_csv(
18
18
  null_string,
19
19
  buffer_size,
20
20
  result_type,
21
+ flexible,
22
+ flexible_default,
21
23
  } = parse_csv_args(&ruby, args)?;
22
24
 
23
25
  if !ruby.block_given() {
@@ -30,6 +32,8 @@ pub fn parse_csv(
30
32
  null_string,
31
33
  buffer_size,
32
34
  result_type,
35
+ flexible,
36
+ flexible_default,
33
37
  });
34
38
  }
35
39
 
@@ -37,6 +41,8 @@ pub fn parse_csv(
37
41
  "hash" => Box::new(
38
42
  RecordReaderBuilder::<HashMap<&'static str, Option<String>>>::new(&ruby, to_read)
39
43
  .has_headers(has_headers)
44
+ .flexible(flexible)
45
+ .flexible_default(flexible_default)
40
46
  .delimiter(delimiter)
41
47
  .quote_char(quote_char)
42
48
  .null_string(null_string)
@@ -47,6 +53,8 @@ pub fn parse_csv(
47
53
  "array" => Box::new(
48
54
  RecordReaderBuilder::<Vec<Option<String>>>::new(&ruby, to_read)
49
55
  .has_headers(has_headers)
56
+ .flexible(flexible)
57
+ .flexible_default(flexible_default)
50
58
  .delimiter(delimiter)
51
59
  .quote_char(quote_char)
52
60
  .null_string(null_string)
@@ -74,6 +82,8 @@ struct EnumeratorArgs {
74
82
  null_string: Option<String>,
75
83
  buffer_size: usize,
76
84
  result_type: String,
85
+ flexible: bool,
86
+ flexible_default: Option<String>,
77
87
  }
78
88
 
79
89
  fn create_enumerator(
@@ -92,7 +102,8 @@ fn create_enumerator(
92
102
  kwargs.aset(Symbol::new("nil_string"), args.null_string)?;
93
103
  kwargs.aset(Symbol::new("buffer_size"), args.buffer_size)?;
94
104
  kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
95
-
105
+ kwargs.aset(Symbol::new("flexible"), args.flexible)?;
106
+ kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
96
107
  let enumerator = args
97
108
  .rb_self
98
109
  .enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
data/ext/osv/src/utils.rs CHANGED
@@ -13,6 +13,8 @@ pub struct CsvArgs {
13
13
  pub null_string: Option<String>,
14
14
  pub buffer_size: usize,
15
15
  pub result_type: String,
16
+ pub flexible: bool,
17
+ pub flexible_default: Option<String>,
16
18
  }
17
19
 
18
20
  /// Parse common arguments for CSV parsing
@@ -30,6 +32,8 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
30
32
  Option<Option<String>>,
31
33
  Option<usize>,
32
34
  Option<Value>,
35
+ Option<bool>,
36
+ Option<Option<String>>,
33
37
  ),
34
38
  (),
35
39
  >(
@@ -42,6 +46,8 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
42
46
  "nil_string",
43
47
  "buffer_size",
44
48
  "result_type",
49
+ "flexible",
50
+ "flexible_default",
45
51
  ],
46
52
  )?;
47
53
 
@@ -111,6 +117,10 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
111
117
  None => String::from("hash"),
112
118
  };
113
119
 
120
+ let flexible = kwargs.optional.6.unwrap_or_default();
121
+
122
+ let flexible_default = kwargs.optional.7.unwrap_or_default();
123
+
114
124
  Ok(CsvArgs {
115
125
  to_read,
116
126
  has_headers,
@@ -119,5 +129,7 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
119
129
  null_string,
120
130
  buffer_size,
121
131
  result_type,
132
+ flexible,
133
+ flexible_default,
122
134
  })
123
135
  }
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.3.6"
2
+ VERSION = "0.3.8"
3
3
  end
data/lib/osv.rbi CHANGED
@@ -1,6 +1,25 @@
1
1
  # typed: strict
2
2
 
3
3
  module OSV
4
+ # Options:
5
+ # - `has_headers`: Boolean indicating if the first row contains headers
6
+ # (default: true)
7
+ # - `col_sep`: String specifying the field separator
8
+ # (default: ",")
9
+ # - `quote_char`: String specifying the quote character
10
+ # (default: "\"")
11
+ # - `nil_string`: String that should be interpreted as nil
12
+ # By default, empty strings are interpreted as empty strings.
13
+ # If you want to interpret empty strings as nil, set this to
14
+ # an empty string.
15
+ # - `buffer_size`: Integer specifying the read buffer size
16
+ # - `result_type`: String specifying the output format
17
+ # ("hash" or "array")
18
+ # - `flexible`: Boolean specifying if the parser should be flexible
19
+ # (default: false)
20
+ # - `flexible_default`: String specifying the default value for missing fields.
21
+ # Implicitly enables flexible mode if set.
22
+ # (default: `nil`)
4
23
  sig do
5
24
  params(
6
25
  input: T.any(String, StringIO, IO),
@@ -10,6 +29,8 @@ module OSV
10
29
  nil_string: T.nilable(String),
11
30
  buffer_size: T.nilable(Integer),
12
31
  result_type: T.nilable(String),
32
+ flexible: T.nilable(T::Boolean),
33
+ flexible_default: T.nilable(String),
13
34
  blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.nilable(String)], T::Array[T.nilable(String)])).void)
14
35
  ).returns(T.any(Enumerator, T.untyped))
15
36
  end
@@ -21,6 +42,8 @@ module OSV
21
42
  nil_string: nil,
22
43
  buffer_size: nil,
23
44
  result_type: nil,
45
+ flexible: nil,
46
+ flexible_default: nil,
24
47
  &blk
25
48
  )
26
49
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.3.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko