osv 0.3.18 → 0.3.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: da944a5af1cc88630fe0952e6e710d2acb8ac420ae8708a107064f5ecf444dec
4
- data.tar.gz: bd6de3860ff2f47eb03b9019d307d647fa8c2e8f366543fbe95604f284871b62
3
+ metadata.gz: e5d6f70d75ffe07595c68bf9c1d4c26d3c6f8fcb3dd1f6bcde44de036558584d
4
+ data.tar.gz: 70e7d68fe4e42755ed54fd900ef8eb871741ff9c17e683ae7aa7d3f20801c7da
5
5
  SHA512:
6
- metadata.gz: 8a130687fb25aaae3734f2e69c2258ccf893c584cd0c2893b751282b393ee4d52b2317a338f1ef68a864222e4947614ffdca7e6b98d8d37dc934dfede61f7bc1
7
- data.tar.gz: 332a5dc1c6ce6df721b22f9e66b54d48426da3a0148917f9ec13036edd63e1fb70a950a2971964d289e076536af47d090c89fd95961d8ca4b51f1f1b8a221a98
6
+ metadata.gz: 1b2817f12cef251ee9006ba93483b7c7a529e3302579148ef998513b5401fa8eeef924ba3553b1a95515437d905239ce08e5ab4117160d2aabd37ebf4c70fbd8
7
+ data.tar.gz: 5beb1952d332923bc4703dfdd7b8e234d785f548c0c5724706aad78302234f5f76b4c0094968354ff29eee5d03d5ed2e6ff633649c7a0e6092291b9824f77eac
data/README.md CHANGED
@@ -88,6 +88,7 @@ OSV.for_each("data.csv",
88
88
  # Implicitly enables flexible mode if set.
89
89
  trim: :all, # Whether to trim whitespace. Options are :all, :headers, or :fields (default: nil)
90
90
  buffer_size: 1024, # Number of rows to buffer in memory (default: 1024)
91
+ ignore_null_bytes: false, # Boolean specifying if null bytes should be ignored (default: false)
91
92
  )
92
93
  ```
93
94
 
@@ -104,6 +105,7 @@ OSV.for_each("data.csv",
104
105
  - `flexible`: Boolean specifying if the parser should be flexible (default: false)
105
106
  - `flexible_default`: String specifying the default value for missing fields. Implicitly enables flexible mode if set. (default: `nil`)
106
107
  - `trim`: String specifying the trim mode ("all" or "headers" or "fields" or :all or :headers or :fields)
108
+ - `ignore_null_bytes`: Boolean specifying if null bytes should be ignored (default: false)
107
109
 
108
110
  When `has_headers` is false, hash keys will be generated as `"c0"`, `"c1"`, etc.
109
111
 
@@ -67,6 +67,7 @@ pub struct RecordReaderBuilder<'a, T: RecordParser<'a>> {
67
67
  flexible: bool,
68
68
  flexible_default: Option<String>,
69
69
  trim: csv::Trim,
70
+ ignore_null_bytes: bool,
70
71
  _phantom: PhantomData<T>,
71
72
  _phantom_a: PhantomData<&'a ()>,
72
73
  }
@@ -84,6 +85,7 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
84
85
  flexible: false,
85
86
  flexible_default: None,
86
87
  trim: csv::Trim::None,
88
+ ignore_null_bytes: false,
87
89
  _phantom: PhantomData,
88
90
  _phantom_a: PhantomData,
89
91
  }
@@ -138,6 +140,12 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
138
140
  self
139
141
  }
140
142
 
143
+ #[must_use]
144
+ pub fn ignore_null_bytes(mut self, ignore_null_bytes: bool) -> Self {
145
+ self.ignore_null_bytes = ignore_null_bytes;
146
+ self
147
+ }
148
+
141
149
  /// Handles reading from a file descriptor.
142
150
  fn handle_file_descriptor(&self) -> Result<Box<dyn SeekableRead>, ReaderError> {
143
151
  let raw_value = self.to_read.as_raw();
@@ -191,7 +199,10 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
191
199
  .trim(self.trim)
192
200
  .from_reader(reader);
193
201
 
194
- let headers = RecordReader::<T>::get_headers(&self.ruby, &mut reader, self.has_headers)?;
202
+ let mut headers = RecordReader::<T>::get_headers(&self.ruby, &mut reader, self.has_headers)?;
203
+ if self.ignore_null_bytes {
204
+ headers = headers.iter().map(|h| h.replace("\0", "")).collect();
205
+ }
195
206
  let static_headers = StringCache::intern_many(&headers)?;
196
207
 
197
208
  // We intern both of these to get static string references we can reuse throughout the parser.
@@ -204,7 +215,7 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
204
215
  .map_err(|e| ReaderError::InvalidFlexibleDefault(format!("{:?}", e)))
205
216
  })
206
217
  .transpose()?
207
- .map(|s| Cow::Borrowed(s));
218
+ .map(Cow::Borrowed);
208
219
 
209
220
  let null_string = self
210
221
  .null_string
@@ -215,13 +226,14 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
215
226
  .map_err(|e| ReaderError::InvalidNullString(format!("{:?}", e)))
216
227
  })
217
228
  .transpose()?
218
- .map(|s| Cow::Borrowed(s));
229
+ .map(Cow::Borrowed);
219
230
 
220
231
  Ok(RecordReader::new(
221
232
  reader,
222
233
  static_headers,
223
234
  null_string,
224
235
  flexible_default,
236
+ self.ignore_null_bytes,
225
237
  ))
226
238
  }
227
239
  }
@@ -99,7 +99,7 @@ impl StringCache {
99
99
  counter.fetch_add(1, Ordering::Relaxed);
100
100
  result.push(*interned_string);
101
101
  } else {
102
- let interned = StringCacheKey::new(&string);
102
+ let interned = StringCacheKey::new(string);
103
103
  let leaked = Box::leak(string.clone().into_boxed_str());
104
104
  cache.insert(leaked, (interned, AtomicU32::new(1)));
105
105
  result.push(interned);
@@ -154,7 +154,7 @@ impl<I: Iterator> Iterator for HeaderCacheCleanupIter<I> {
154
154
  impl<I> Drop for HeaderCacheCleanupIter<I> {
155
155
  fn drop(&mut self) {
156
156
  if let Some(headers) = self.headers.get() {
157
- StringCache::clear(&headers).unwrap();
157
+ StringCache::clear(headers).unwrap();
158
158
  }
159
159
  }
160
160
  }
@@ -13,6 +13,7 @@ pub trait RecordParser<'a> {
13
13
  record: &csv::StringRecord,
14
14
  null_string: Option<Cow<'a, str>>,
15
15
  flexible_default: Option<Cow<'a, str>>,
16
+ ignore_null_bytes: bool,
16
17
  ) -> Self::Output;
17
18
  }
18
19
 
@@ -27,12 +28,13 @@ impl<'a, S: BuildHasher + Default> RecordParser<'a>
27
28
  record: &csv::StringRecord,
28
29
  null_string: Option<Cow<'a, str>>,
29
30
  flexible_default: Option<Cow<'a, str>>,
31
+ ignore_null_bytes: bool,
30
32
  ) -> Self::Output {
31
33
  let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
32
34
 
33
35
  let shared_empty = Cow::Borrowed("");
34
36
  let shared_default = flexible_default.map(CowStr);
35
- headers.iter().enumerate().for_each(|(i, ref header)| {
37
+ headers.iter().enumerate().for_each(|(i, header)| {
36
38
  let value = record.get(i).map_or_else(
37
39
  || shared_default.clone(),
38
40
  |field| {
@@ -40,12 +42,15 @@ impl<'a, S: BuildHasher + Default> RecordParser<'a>
40
42
  None
41
43
  } else if field.is_empty() {
42
44
  Some(CowStr(shared_empty.clone()))
43
- } else {
45
+ } else if ignore_null_bytes {
46
+ Some(CowStr(Cow::Owned(field.replace("\0", ""))))
47
+ }
48
+ else {
44
49
  Some(CowStr(Cow::Owned(field.to_string())))
45
50
  }
46
51
  },
47
52
  );
48
- map.insert((*header).clone(), value);
53
+ map.insert(*header, value);
49
54
  });
50
55
  map
51
56
  }
@@ -60,6 +65,7 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
60
65
  record: &csv::StringRecord,
61
66
  null_string: Option<Cow<'a, str>>,
62
67
  flexible_default: Option<Cow<'a, str>>,
68
+ ignore_null_bytes: bool,
63
69
  ) -> Self::Output {
64
70
  let target_len = headers.len();
65
71
  let mut vec = Vec::with_capacity(target_len);
@@ -72,7 +78,10 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
72
78
  None
73
79
  } else if field.is_empty() {
74
80
  Some(CowStr(shared_empty.clone()))
75
- } else {
81
+ } else if ignore_null_bytes {
82
+ Some(CowStr(Cow::Owned(field.replace("\0", ""))))
83
+ }
84
+ else {
76
85
  Some(CowStr(Cow::Owned(field.to_string())))
77
86
  };
78
87
  vec.push(value);
@@ -18,6 +18,7 @@ pub struct RecordReader<'a, T: RecordParser<'a>> {
18
18
  flexible_default: Option<Cow<'a, str>>,
19
19
  string_record: csv::StringRecord,
20
20
  parser: std::marker::PhantomData<T>,
21
+ ignore_null_bytes: bool,
21
22
  }
22
23
 
23
24
  impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
@@ -56,6 +57,7 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
56
57
  headers: Vec<StringCacheKey>,
57
58
  null_string: Option<Cow<'a, str>>,
58
59
  flexible_default: Option<Cow<'a, str>>,
60
+ ignore_null_bytes: bool,
59
61
  ) -> Self {
60
62
  let headers_len = headers.len();
61
63
  Self {
@@ -65,6 +67,7 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
65
67
  flexible_default,
66
68
  string_record: csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_len),
67
69
  parser: std::marker::PhantomData,
70
+ ignore_null_bytes,
68
71
  }
69
72
  }
70
73
 
@@ -76,6 +79,7 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
76
79
  &self.string_record,
77
80
  self.null_string.clone(),
78
81
  self.flexible_default.clone(),
82
+ self.ignore_null_bytes
79
83
  ))),
80
84
  false => Ok(None),
81
85
  }
@@ -60,6 +60,7 @@ pub fn parse_csv(
60
60
  flexible,
61
61
  flexible_default,
62
62
  trim,
63
+ ignore_null_bytes,
63
64
  } = parse_read_csv_args(&ruby, args)?;
64
65
 
65
66
  if !ruby.block_given() {
@@ -70,9 +71,9 @@ pub fn parse_csv(
70
71
  delimiter,
71
72
  quote_char,
72
73
  null_string,
73
- result_type: result_type,
74
+ result_type,
74
75
  flexible,
75
- flexible_default: flexible_default,
76
+ flexible_default,
76
77
  trim: match trim {
77
78
  Trim::All => Some("all".to_string()),
78
79
  Trim::Headers => Some("headers".to_string()),
@@ -100,9 +101,11 @@ pub fn parse_csv(
100
101
  .trim(trim)
101
102
  .delimiter(delimiter)
102
103
  .quote_char(quote_char)
103
- .null_string(null_string);
104
+ .null_string(null_string)
105
+ .ignore_null_bytes(ignore_null_bytes)
106
+ .build()?;
104
107
 
105
- Box::new(builder.build()?.map(CsvRecord::Map))
108
+ Box::new(builder.map(CsvRecord::Map))
106
109
  }
107
110
  ResultType::Array => {
108
111
  let builder = RecordReaderBuilder::<Vec<Option<CowStr<'static>>>>::new(ruby, to_read)
@@ -113,6 +116,7 @@ pub fn parse_csv(
113
116
  .delimiter(delimiter)
114
117
  .quote_char(quote_char)
115
118
  .null_string(null_string)
119
+ .ignore_null_bytes(ignore_null_bytes)
116
120
  .build()?;
117
121
 
118
122
  Box::new(builder.map(CsvRecord::Vec))
data/ext/osv/src/utils.rs CHANGED
@@ -36,6 +36,7 @@ pub struct ReadCsvArgs {
36
36
  pub flexible: bool,
37
37
  pub flexible_default: Option<String>,
38
38
  pub trim: csv::Trim,
39
+ pub ignore_null_bytes: bool,
39
40
  }
40
41
 
41
42
  /// Parse common arguments for CSV parsing
@@ -55,6 +56,7 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
55
56
  Option<Option<bool>>,
56
57
  Option<Option<Option<String>>>,
57
58
  Option<Option<Value>>,
59
+ Option<Option<bool>>,
58
60
  ),
59
61
  (),
60
62
  >(
@@ -69,6 +71,7 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
69
71
  "flexible",
70
72
  "flexible_default",
71
73
  "trim",
74
+ "ignore_null_bytes",
72
75
  ],
73
76
  )?;
74
77
 
@@ -163,6 +166,8 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
163
166
  None => csv::Trim::None,
164
167
  };
165
168
 
169
+ let ignore_null_bytes = kwargs.optional.8.flatten().unwrap_or_default();
170
+
166
171
  Ok(ReadCsvArgs {
167
172
  to_read,
168
173
  has_headers,
@@ -173,5 +178,6 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
173
178
  flexible,
174
179
  flexible_default,
175
180
  trim,
181
+ ignore_null_bytes,
176
182
  })
177
183
  }
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.3.18"
2
+ VERSION = "0.3.20"
3
3
  end
data/lib/osv.rbi CHANGED
@@ -23,6 +23,8 @@ module OSV
23
23
  # - `trim`: String specifying the trim mode
24
24
  # ("all" or "headers" or "fields" or :all or :headers or :fields)
25
25
  # (default: `nil`)
26
+ # - `ignore_null_bytes`: Boolean specifying if null bytes should be ignored
27
+ # (default: false)
26
28
  sig do
27
29
  params(
28
30
  input: T.any(String, StringIO, IO),
@@ -34,6 +36,7 @@ module OSV
34
36
  result_type: T.nilable(T.any(String, Symbol)),
35
37
  flexible: T.nilable(T::Boolean),
36
38
  flexible_default: T.nilable(String),
39
+ ignore_null_bytes: T.nilable(T::Boolean),
37
40
  trim: T.nilable(T.any(String, Symbol)),
38
41
  blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.nilable(String)], T::Array[T.nilable(String)])).void)
39
42
  ).returns(T.any(Enumerator, T.untyped))
@@ -48,6 +51,7 @@ module OSV
48
51
  result_type: nil,
49
52
  flexible: nil,
50
53
  flexible_default: nil,
54
+ ignore_null_bytes: nil,
51
55
  trim: nil,
52
56
  &blk
53
57
  )
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.18
4
+ version: 0.3.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-13 00:00:00.000000000 Z
11
+ date: 2025-01-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys