osv 0.3.20 → 0.3.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e5d6f70d75ffe07595c68bf9c1d4c26d3c6f8fcb3dd1f6bcde44de036558584d
4
- data.tar.gz: 70e7d68fe4e42755ed54fd900ef8eb871741ff9c17e683ae7aa7d3f20801c7da
3
+ metadata.gz: 26bda7b8aed144013156dea4f4f68c322b0a2042d6478c225edde0c44f54452f
4
+ data.tar.gz: cd63b6b71c158d8a09196a4fff496c5c7e7a9ac2c9a64724bbf31c56ff9ee0c7
5
5
  SHA512:
6
- metadata.gz: 1b2817f12cef251ee9006ba93483b7c7a529e3302579148ef998513b5401fa8eeef924ba3553b1a95515437d905239ce08e5ab4117160d2aabd37ebf4c70fbd8
7
- data.tar.gz: 5beb1952d332923bc4703dfdd7b8e234d785f548c0c5724706aad78302234f5f76b4c0094968354ff29eee5d03d5ed2e6ff633649c7a0e6092291b9824f77eac
6
+ metadata.gz: 947a7cc0d9f644977d157d0424893daf10c62efee5b4d544f81731ca7f04056cc75bab464560f4aea0b793b4b4e6e63a964fed8adace0c232cd388182a709a3a
7
+ data.tar.gz: 78536bdbba174b441792e39dc3b1c2ca08d85bafe4dce7763156160abcc611bb38c0743e6cadfd5d3466410b42fa0e598dc32e4865393109b8c88ec9673bf44b
@@ -34,6 +34,10 @@ pub enum ReaderError {
34
34
  InvalidFlexibleDefault(String),
35
35
  #[error("Invalid null string value: {0}")]
36
36
  InvalidNullString(String),
37
+ #[error("Failed to parse CSV record: {0}")]
38
+ CsvParse(#[from] csv::Error),
39
+ #[error("Invalid UTF-8: {0}")]
40
+ InvalidUtf8(String),
37
41
  #[error("Ruby error: {0}")]
38
42
  Ruby(String),
39
43
  }
@@ -46,10 +50,20 @@ impl From<MagnusError> for ReaderError {
46
50
 
47
51
  impl From<ReaderError> for MagnusError {
48
52
  fn from(err: ReaderError) -> Self {
49
- MagnusError::new(
50
- Ruby::get().unwrap().exception_runtime_error(),
51
- err.to_string(),
52
- )
53
+ let ruby = Ruby::get().unwrap();
54
+ match err {
55
+ ReaderError::CsvParse(csv_err) => {
56
+ if csv_err.to_string().contains("invalid utf-8") {
57
+ MagnusError::new(ruby.exception_encoding_error(), csv_err.to_string())
58
+ } else {
59
+ MagnusError::new(ruby.exception_runtime_error(), csv_err.to_string())
60
+ }
61
+ }
62
+ ReaderError::InvalidUtf8(utf8_err) => {
63
+ MagnusError::new(ruby.exception_encoding_error(), utf8_err.to_string())
64
+ }
65
+ _ => MagnusError::new(ruby.exception_runtime_error(), err.to_string()),
66
+ }
53
67
  }
54
68
  }
55
69
 
@@ -199,7 +213,8 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
199
213
  .trim(self.trim)
200
214
  .from_reader(reader);
201
215
 
202
- let mut headers = RecordReader::<T>::get_headers(&self.ruby, &mut reader, self.has_headers)?;
216
+ let mut headers =
217
+ RecordReader::<T>::get_headers(&self.ruby, &mut reader, self.has_headers)?;
203
218
  if self.ignore_null_bytes {
204
219
  headers = headers.iter().map(|h| h.replace("\0", "")).collect();
205
220
  }
@@ -42,10 +42,9 @@ impl<'a, S: BuildHasher + Default> RecordParser<'a>
42
42
  None
43
43
  } else if field.is_empty() {
44
44
  Some(CowStr(shared_empty.clone()))
45
- } else if ignore_null_bytes {
45
+ } else if ignore_null_bytes {
46
46
  Some(CowStr(Cow::Owned(field.replace("\0", ""))))
47
- }
48
- else {
47
+ } else {
49
48
  Some(CowStr(Cow::Owned(field.to_string())))
50
49
  }
51
50
  },
@@ -78,10 +77,9 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
78
77
  None
79
78
  } else if field.is_empty() {
80
79
  Some(CowStr(shared_empty.clone()))
81
- } else if ignore_null_bytes {
80
+ } else if ignore_null_bytes {
82
81
  Some(CowStr(Cow::Owned(field.replace("\0", ""))))
83
- }
84
- else {
82
+ } else {
85
83
  Some(CowStr(Cow::Owned(field.to_string())))
86
84
  };
87
85
  vec.push(value);
@@ -1,3 +1,4 @@
1
+ use super::builder::ReaderError;
1
2
  use super::header_cache::StringCacheKey;
2
3
  use super::parser::RecordParser;
3
4
  use super::{header_cache::StringCache, ruby_reader::SeekableRead};
@@ -72,28 +73,31 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
72
73
  }
73
74
 
74
75
  /// Attempts to read the next record, returning any errors encountered.
75
- fn try_next(&mut self) -> csv::Result<Option<T::Output>> {
76
- match self.reader.read_record(&mut self.string_record)? {
77
- true => Ok(Some(T::parse(
76
+ fn try_next(&mut self) -> Result<Option<T::Output>, ReaderError> {
77
+ if self.reader.read_record(&mut self.string_record)? {
78
+ Ok(Some(T::parse(
78
79
  &self.headers,
79
80
  &self.string_record,
80
81
  self.null_string.clone(),
81
82
  self.flexible_default.clone(),
82
- self.ignore_null_bytes
83
- ))),
84
- false => Ok(None),
83
+ self.ignore_null_bytes,
84
+ )))
85
+ } else {
86
+ Ok(None)
85
87
  }
86
88
  }
87
89
  }
88
90
 
89
91
  impl<'a, T: RecordParser<'a>> Iterator for RecordReader<'a, T> {
90
- type Item = T::Output;
92
+ type Item = Result<T::Output, ReaderError>;
91
93
 
92
94
  #[inline]
93
95
  fn next(&mut self) -> Option<Self::Item> {
94
- // Note: We intentionally swallow errors here to maintain Iterator contract.
95
- // Errors can be handled by using try_next() directly if needed.
96
- self.try_next().ok().flatten()
96
+ match self.try_next() {
97
+ Ok(Some(record)) => Some(Ok(record)),
98
+ Ok(None) => None,
99
+ Err(e) => Some(Err(e)),
100
+ }
97
101
  }
98
102
 
99
103
  #[inline]
@@ -1,4 +1,5 @@
1
1
  use magnus::{
2
+ error::Error as MagnusError,
2
3
  value::{Opaque, ReprValue},
3
4
  RClass, RString, Ruby, Value,
4
5
  };
@@ -6,7 +7,7 @@ use std::fs::File;
6
7
  use std::io::{self, BufReader, Read, Seek, SeekFrom, Write};
7
8
  use std::sync::OnceLock;
8
9
 
9
- use super::ForgottenFileHandle;
10
+ use super::{builder::ReaderError, ForgottenFileHandle};
10
11
 
11
12
  static STRING_IO_CLASS: OnceLock<Opaque<RClass>> = OnceLock::new();
12
13
 
@@ -25,10 +26,7 @@ impl<T: Read + Seek> SeekableRead for BufReader<T> {}
25
26
  impl SeekableRead for std::io::Cursor<Vec<u8>> {}
26
27
  impl SeekableRead for ForgottenFileHandle {}
27
28
 
28
- pub fn build_ruby_reader(
29
- ruby: &Ruby,
30
- input: Value,
31
- ) -> Result<Box<dyn SeekableRead>, magnus::Error> {
29
+ pub fn build_ruby_reader(ruby: &Ruby, input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
32
30
  if RubyReader::is_string_io(ruby, &input) {
33
31
  RubyReader::from_string_io(ruby, input)
34
32
  } else if RubyReader::is_io_like(&input) {
@@ -88,14 +86,14 @@ impl Seek for RubyReader<RString> {
88
86
  }
89
87
 
90
88
  impl RubyReader<Value> {
91
- fn from_io(input: Value) -> Result<Box<dyn SeekableRead>, magnus::Error> {
89
+ fn from_io(input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
92
90
  if Self::is_io_like(&input) {
93
91
  Ok(Box::new(Self::from_io_like(input)))
94
92
  } else {
95
- Err(magnus::Error::new(
93
+ Err(MagnusError::new(
96
94
  magnus::exception::type_error(),
97
95
  "Input is not an IO-like object",
98
- ))
96
+ ))?
99
97
  }
100
98
  }
101
99
 
@@ -112,15 +110,12 @@ impl RubyReader<Value> {
112
110
  }
113
111
 
114
112
  impl RubyReader<RString> {
115
- pub fn from_string_io(
116
- ruby: &Ruby,
117
- input: Value,
118
- ) -> Result<Box<dyn SeekableRead>, magnus::Error> {
113
+ pub fn from_string_io(ruby: &Ruby, input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
119
114
  if !Self::is_string_io(ruby, &input) {
120
- return Err(magnus::Error::new(
115
+ return Err(MagnusError::new(
121
116
  magnus::exception::type_error(),
122
117
  "Input is not a StringIO",
123
- ));
118
+ ))?;
124
119
  }
125
120
 
126
121
  let string_content = input.funcall::<_, _, RString>("string", ()).unwrap();
@@ -138,11 +133,11 @@ impl RubyReader<RString> {
138
133
  input.is_kind_of(ruby.get_inner(*string_io_class))
139
134
  }
140
135
 
141
- fn from_string_like(input: Value) -> Result<Box<dyn SeekableRead>, magnus::Error> {
142
- // Try calling `to_str`, and if that fails, try `to_s`
136
+ fn from_string_like(input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
143
137
  let string_content = input
144
138
  .funcall::<_, _, RString>("to_str", ())
145
139
  .or_else(|_| input.funcall::<_, _, RString>("to_s", ()))?;
140
+
146
141
  Ok(Box::new(Self {
147
142
  inner: string_content,
148
143
  offset: 0,
@@ -154,12 +149,16 @@ impl Read for RubyReader<Value> {
154
149
  fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
155
150
  let bytes = self
156
151
  .inner
157
- .funcall::<_, _, RString>("read", (buf.len(),))
152
+ .funcall::<_, _, Option<RString>>("read", (buf.len(),))
158
153
  .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
159
154
 
160
- buf.write_all(unsafe { bytes.as_slice() })?;
161
-
162
- Ok(bytes.len())
155
+ match bytes {
156
+ Some(bytes) => {
157
+ buf.write_all(unsafe { bytes.as_slice() })?;
158
+ Ok(bytes.len())
159
+ }
160
+ None => Ok(0), // EOF
161
+ }
163
162
  }
164
163
  }
165
164
 
@@ -3,7 +3,7 @@ use crate::utils::*;
3
3
  use ahash::RandomState;
4
4
  use csv::Trim;
5
5
  use magnus::value::ReprValue;
6
- use magnus::{block::Yield, Error, KwArgs, RHash, Ruby, Symbol, Value};
6
+ use magnus::{Error, IntoValue, KwArgs, RHash, Ruby, Symbol, Value};
7
7
  use std::collections::HashMap;
8
8
 
9
9
  /// Valid result types for CSV parsing
@@ -36,6 +36,7 @@ struct EnumeratorArgs {
36
36
  flexible: bool,
37
37
  flexible_default: Option<String>,
38
38
  trim: Option<String>,
39
+ ignore_null_bytes: bool,
39
40
  }
40
41
 
41
42
  /// Parses a CSV file with the given configuration.
@@ -43,10 +44,7 @@ struct EnumeratorArgs {
43
44
  /// # Safety
44
45
  /// This function uses unsafe code to get the Ruby runtime and leak memory for static references.
45
46
  /// This is necessary for Ruby integration but should be used with caution.
46
- pub fn parse_csv(
47
- rb_self: Value,
48
- args: &[Value],
49
- ) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
47
+ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
50
48
  // SAFETY: We're in a Ruby callback, so Ruby runtime is guaranteed to be initialized
51
49
  let ruby = unsafe { Ruby::get_unchecked() };
52
50
 
@@ -80,7 +78,9 @@ pub fn parse_csv(
80
78
  Trim::Fields => Some("fields".to_string()),
81
79
  _ => None,
82
80
  },
83
- });
81
+ ignore_null_bytes,
82
+ })
83
+ .map(|yield_enum| yield_enum.into_value_with(&ruby));
84
84
  }
85
85
 
86
86
  let result_type = ResultType::from_str(&result_type).ok_or_else(|| {
@@ -90,7 +90,7 @@ pub fn parse_csv(
90
90
  )
91
91
  })?;
92
92
 
93
- let iter: Box<dyn Iterator<Item = CsvRecord<RandomState>>> = match result_type {
93
+ match result_type {
94
94
  ResultType::Hash => {
95
95
  let builder = RecordReaderBuilder::<
96
96
  HashMap<StringCacheKey, Option<CowStr<'static>>, RandomState>,
@@ -105,7 +105,11 @@ pub fn parse_csv(
105
105
  .ignore_null_bytes(ignore_null_bytes)
106
106
  .build()?;
107
107
 
108
- Box::new(builder.map(CsvRecord::Map))
108
+ let ruby = unsafe { Ruby::get_unchecked() };
109
+ for result in builder {
110
+ let record = result?;
111
+ let _: Value = ruby.yield_value(CsvRecord::Map(record))?;
112
+ }
109
113
  }
110
114
  ResultType::Array => {
111
115
  let builder = RecordReaderBuilder::<Vec<Option<CowStr<'static>>>>::new(ruby, to_read)
@@ -119,17 +123,20 @@ pub fn parse_csv(
119
123
  .ignore_null_bytes(ignore_null_bytes)
120
124
  .build()?;
121
125
 
122
- Box::new(builder.map(CsvRecord::Vec))
126
+ let ruby = unsafe { Ruby::get_unchecked() };
127
+ for result in builder {
128
+ let record = result?;
129
+ let _: Value = ruby.yield_value(CsvRecord::<ahash::RandomState>::Vec(record))?;
130
+ }
123
131
  }
124
- };
132
+ }
125
133
 
126
- Ok(Yield::Iter(iter))
134
+ let ruby = unsafe { Ruby::get_unchecked() };
135
+ Ok(ruby.qnil().into_value_with(&ruby))
127
136
  }
128
137
 
129
138
  /// Creates an enumerator for lazy CSV parsing
130
- fn create_enumerator(
131
- args: EnumeratorArgs,
132
- ) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
139
+ fn create_enumerator(args: EnumeratorArgs) -> Result<magnus::Enumerator, Error> {
133
140
  let kwargs = RHash::new();
134
141
  kwargs.aset(Symbol::new("has_headers"), args.has_headers)?;
135
142
  kwargs.aset(
@@ -145,9 +152,9 @@ fn create_enumerator(
145
152
  kwargs.aset(Symbol::new("flexible"), args.flexible)?;
146
153
  kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
147
154
  kwargs.aset(Symbol::new("trim"), args.trim.map(Symbol::new))?;
155
+ kwargs.aset(Symbol::new("ignore_null_bytes"), args.ignore_null_bytes)?;
148
156
 
149
- let enumerator = args
157
+ Ok(args
150
158
  .rb_self
151
- .enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
152
- Ok(Yield::Enumerator(enumerator))
159
+ .enumeratorize("for_each", (args.to_read, KwArgs(kwargs))))
153
160
  }
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.3.20"
2
+ VERSION = "0.3.22"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.20
4
+ version: 0.3.22
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-24 00:00:00.000000000 Z
11
+ date: 2025-01-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys