RubyGems - osv - Versions diffs - 0.3.21 → 0.4.0 - Mend

osv 0.3.21 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/README.md +2 -3
data/ext/osv/src/csv/builder.rs +30 -28
data/ext/osv/src/csv/parser.rs +58 -32
data/ext/osv/src/csv/record_reader.rs +32 -17
data/ext/osv/src/csv/ruby_reader.rs +19 -20
data/ext/osv/src/reader.rs +27 -26
data/ext/osv/src/utils.rs +8 -8
data/lib/osv/version.rb +1 -1
data/lib/osv.rbi +2 -5
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: dba77f8e6d2a8d19d969871594f1c2b18557125d3eee4b5c2759a0398d21da01
-  data.tar.gz: da35996064fd954c99e6241c045444ec5dc1dc52204db956291401d0c3dfe805
+  metadata.gz: 51e4a387f1ed43bddc9f1f7a118637953d04239b5324ef131b9c860577ed4d41
+  data.tar.gz: e42928a09656216bbadcc2458953a8c5f28401ddf27095fc05038e0960471854
 SHA512:
-  metadata.gz: 26398c526e829d7a6c3f943e175d10f8f626a42cec81da9bb65b4fc30dd29078629d29602f4fe3461fbb2cc86ca6d1b91f7f5f6ba2e7de362c2d44f5a79fcf5a
-  data.tar.gz: 39cd4dd78ed38559302c9735b3514648fbc668b415be65870b4deecdea8b6fb41dc7053d347ea5c8fe8004cc653b933ec201eed98582d12cbb26d84302396372
+  metadata.gz: 4100c50a629ba5803db883532cfbe547eb3091e421b0876595d91791d8952a7b0169477c9c6f31063eafa5b91d0a9b1a9f0a5ae016d70cdd101e284beebfaf22
+  data.tar.gz: 90a822c644fcb37dc1892ede85a54395bc9e62a4b0b0a1af838182d390702d0ee4253151faafcedbf734b0a381fe2acf5c1ab23b842059fbdd4d51570fe33e58

data/README.md CHANGED Viewed

@@ -84,11 +84,10 @@ OSV.for_each("data.csv",
   # Parsing behavior
   flexible: false,       # Allow varying number of fields (default: false)
-  flexible_default: nil, # Default value for missing fields. If unset, we ignore missing fields.
-                         # Implicitly enables flexible mode if set.
   trim: :all,            # Whether to trim whitespace. Options are :all, :headers, or :fields (default: nil)
   buffer_size: 1024,     # Number of rows to buffer in memory (default: 1024)
   ignore_null_bytes: false, # Boolean specifying if null bytes should be ignored (default: false)
+  lossy: false,             # Boolean specifying if invalid UTF-8 characters should be replaced with a replacement character (default: false)
 )
 ```
@@ -103,9 +102,9 @@ OSV.for_each("data.csv",
 - `buffer_size`: Integer specifying the number of rows to buffer in memory (default: 1024)
 - `result_type`: String specifying the output format ("hash" or "array" or :hash or :array)
 - `flexible`: Boolean specifying if the parser should be flexible (default: false)
-- `flexible_default`: String specifying the default value for missing fields. Implicitly enables flexible mode if set. (default: `nil`)
 - `trim`: String specifying the trim mode ("all" or "headers" or "fields" or :all or :headers or :fields)
 - `ignore_null_bytes`: Boolean specifying if null bytes should be ignored (default: false)
+- `lossy`: Boolean specifying if invalid UTF-8 characters should be replaced with a replacement character (default: false)
 When `has_headers` is false, hash keys will be generated as `"c0"`, `"c1"`, etc.

data/ext/osv/src/csv/builder.rs CHANGED Viewed

@@ -34,6 +34,10 @@ pub enum ReaderError {
     InvalidFlexibleDefault(String),
     #[error("Invalid null string value: {0}")]
     InvalidNullString(String),
+    #[error("Failed to parse CSV record: {0}")]
+    CsvParse(#[from] csv::Error),
+    #[error("Invalid UTF-8: {0}")]
+    InvalidUtf8(String),
     #[error("Ruby error: {0}")]
     Ruby(String),
 }
@@ -46,10 +50,20 @@ impl From<MagnusError> for ReaderError {
 impl From<ReaderError> for MagnusError {
     fn from(err: ReaderError) -> Self {
-        MagnusError::new(
-            Ruby::get().unwrap().exception_runtime_error(),
-            err.to_string(),
-        )
+        let ruby = Ruby::get().unwrap();
+        match err {
+            ReaderError::CsvParse(csv_err) => {
+                if csv_err.to_string().contains("invalid utf-8") {
+                    MagnusError::new(ruby.exception_encoding_error(), csv_err.to_string())
+                } else {
+                    MagnusError::new(ruby.exception_runtime_error(), csv_err.to_string())
+                }
+            }
+            ReaderError::InvalidUtf8(utf8_err) => {
+                MagnusError::new(ruby.exception_encoding_error(), utf8_err.to_string())
+            }
+            _ => MagnusError::new(ruby.exception_runtime_error(), err.to_string()),
+        }
     }
 }
@@ -65,9 +79,9 @@ pub struct RecordReaderBuilder<'a, T: RecordParser<'a>> {
     quote_char: u8,
     null_string: Option<String>,
     flexible: bool,
-    flexible_default: Option<String>,
     trim: csv::Trim,
     ignore_null_bytes: bool,
+    lossy: bool,
     _phantom: PhantomData<T>,
     _phantom_a: PhantomData<&'a ()>,
 }
@@ -83,9 +97,9 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
             quote_char: b'"',
             null_string: None,
             flexible: false,
-            flexible_default: None,
             trim: csv::Trim::None,
             ignore_null_bytes: false,
+            lossy: false,
             _phantom: PhantomData,
             _phantom_a: PhantomData,
         }
@@ -126,13 +140,6 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
         self
     }
-    /// Sets the default value for missing fields when in flexible mode.
-    #[must_use]
-    pub fn flexible_default(mut self, flexible_default: Option<String>) -> Self {
-        self.flexible_default = flexible_default;
-        self
-    }
     /// Sets the trimming mode for fields.
     #[must_use]
     pub fn trim(mut self, trim: csv::Trim) -> Self {
@@ -146,6 +153,12 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
         self
     }
+    #[must_use]
+    pub fn lossy(mut self, lossy: bool) -> Self {
+        self.lossy = lossy;
+        self
+    }
     /// Handles reading from a file descriptor.
     fn handle_file_descriptor(&self) -> Result<Box<dyn SeekableRead>, ReaderError> {
         let raw_value = self.to_read.as_raw();
@@ -188,7 +201,7 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
             build_ruby_reader(&self.ruby, self.to_read)?
         };
-        let flexible = self.flexible || self.flexible_default.is_some();
+        let flexible = self.flexible;
         let reader = BufReader::with_capacity(READ_BUFFER_SIZE, readable);
         let mut reader = csv::ReaderBuilder::new()
@@ -199,24 +212,13 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
             .trim(self.trim)
             .from_reader(reader);
-        let mut headers = RecordReader::<T>::get_headers(&self.ruby, &mut reader, self.has_headers)?;
+        let mut headers =
+            RecordReader::<T>::get_headers(&self.ruby, &mut reader, self.has_headers)?;
         if self.ignore_null_bytes {
             headers = headers.iter().map(|h| h.replace("\0", "")).collect();
         }
         let static_headers = StringCache::intern_many(&headers)?;
-        // We intern both of these to get static string references we can reuse throughout the parser.
-        let flexible_default = self
-            .flexible_default
-            .map(|s| {
-                RString::new(&s)
-                    .to_interned_str()
-                    .as_str()
-                    .map_err(|e| ReaderError::InvalidFlexibleDefault(format!("{:?}", e)))
-            })
-            .transpose()?
-            .map(Cow::Borrowed);
         let null_string = self
             .null_string
             .map(|s| {
@@ -232,8 +234,8 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
             reader,
             static_headers,
             null_string,
-            flexible_default,
             self.ignore_null_bytes,
+            self.lossy,
         ))
     }
 }

data/ext/osv/src/csv/parser.rs CHANGED Viewed

@@ -5,14 +5,18 @@ use std::hash::BuildHasher;
 use super::header_cache::StringCacheKey;
 use super::CowStr;
+pub enum CsvRecordType {
+    String(csv::StringRecord),
+    Byte(csv::ByteRecord),
+}
 pub trait RecordParser<'a> {
     type Output;
     fn parse(
         headers: &[StringCacheKey],
-        record: &csv::StringRecord,
+        record: &CsvRecordType,
         null_string: Option<Cow<'a, str>>,
-        flexible_default: Option<Cow<'a, str>>,
         ignore_null_bytes: bool,
     ) -> Self::Output;
 }
@@ -25,31 +29,42 @@ impl<'a, S: BuildHasher + Default> RecordParser<'a>
     #[inline]
     fn parse(
         headers: &[StringCacheKey],
-        record: &csv::StringRecord,
+        record: &CsvRecordType,
         null_string: Option<Cow<'a, str>>,
-        flexible_default: Option<Cow<'a, str>>,
         ignore_null_bytes: bool,
     ) -> Self::Output {
         let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
         let shared_empty = Cow::Borrowed("");
-        let shared_default = flexible_default.map(CowStr);
         headers.iter().enumerate().for_each(|(i, header)| {
-            let value = record.get(i).map_or_else(
-                || shared_default.clone(),
-                |field| {
-                    if null_string.as_deref() == Some(field) {
+            let value = match record {
+                CsvRecordType::String(s) => s.get(i).and_then(|field| {
+                    if null_string.as_deref() == Some(field.as_ref()) {
                         None
                     } else if field.is_empty() {
                         Some(CowStr(shared_empty.clone()))
-                    } else if ignore_null_bytes  {
+                    } else if ignore_null_bytes {
                         Some(CowStr(Cow::Owned(field.replace("\0", ""))))
+                    } else {
+                        Some(CowStr(Cow::Owned(field.to_string())))
                     }
-                    else {
+                }),
+                CsvRecordType::Byte(b) => b.get(i).and_then(|field| {
+                    let field = String::from_utf8_lossy(field);
+                    if null_string.as_deref() == Some(field.as_ref()) {
+                        None
+                    } else if field.is_empty() {
+                        Some(CowStr(shared_empty.clone()))
+                    } else if ignore_null_bytes {
+                        Some(CowStr(Cow::Owned(field.replace("\0", ""))))
+                    } else {
                         Some(CowStr(Cow::Owned(field.to_string())))
                     }
-                },
-            );
+                }),
+            };
             map.insert(*header, value);
         });
         map
@@ -62,36 +77,47 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
     #[inline]
     fn parse(
         headers: &[StringCacheKey],
-        record: &csv::StringRecord,
+        record: &CsvRecordType,
         null_string: Option<Cow<'a, str>>,
-        flexible_default: Option<Cow<'a, str>>,
         ignore_null_bytes: bool,
     ) -> Self::Output {
         let target_len = headers.len();
         let mut vec = Vec::with_capacity(target_len);
         let shared_empty = Cow::Borrowed("");
-        let shared_default = flexible_default.map(CowStr);
-        for field in record.iter() {
-            let value = if Some(field) == null_string.as_deref() {
-                None
-            } else if field.is_empty() {
-                Some(CowStr(shared_empty.clone()))
-            } else if ignore_null_bytes  {
-                Some(CowStr(Cow::Owned(field.replace("\0", ""))))
+        match record {
+            CsvRecordType::String(record) => {
+                for field in record.iter() {
+                    let value = if Some(field.as_ref()) == null_string.as_deref() {
+                        None
+                    } else if field.is_empty() {
+                        Some(CowStr(shared_empty.clone()))
+                    } else if ignore_null_bytes {
+                        Some(CowStr(Cow::Owned(field.replace("\0", ""))))
+                    } else {
+                        Some(CowStr(Cow::Owned(field.to_string())))
+                    };
+                    vec.push(value);
+                }
             }
-            else {
-                Some(CowStr(Cow::Owned(field.to_string())))
-            };
-            vec.push(value);
-        }
-        if vec.len() < target_len {
-            if let Some(default) = shared_default {
-                vec.resize_with(target_len, || Some(default.clone()));
+            CsvRecordType::Byte(record) => {
+                for field in record.iter() {
+                    let field = String::from_utf8_lossy(field);
+                    let value = if Some(field.as_ref()) == null_string.as_deref() {
+                        None
+                    } else if field.is_empty() {
+                        Some(CowStr(shared_empty.clone()))
+                    } else if ignore_null_bytes {
+                        Some(CowStr(Cow::Owned(field.replace("\0", ""))))
+                    } else {
+                        Some(CowStr(Cow::Owned(field.to_string())))
+                    };
+                    vec.push(value);
+                }
             }
         }
         vec
     }
 }

data/ext/osv/src/csv/record_reader.rs CHANGED Viewed

@@ -1,5 +1,6 @@
+use super::builder::ReaderError;
 use super::header_cache::StringCacheKey;
-use super::parser::RecordParser;
+use super::parser::{CsvRecordType, RecordParser};
 use super::{header_cache::StringCache, ruby_reader::SeekableRead};
 use magnus::{Error, Ruby};
 use std::borrow::Cow;
@@ -15,8 +16,7 @@ pub struct RecordReader<'a, T: RecordParser<'a>> {
     reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
     headers: Vec<StringCacheKey>,
     null_string: Option<Cow<'a, str>>,
-    flexible_default: Option<Cow<'a, str>>,
-    string_record: csv::StringRecord,
+    string_record: CsvRecordType,
     parser: std::marker::PhantomData<T>,
     ignore_null_bytes: bool,
 }
@@ -56,44 +56,59 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
         reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
         headers: Vec<StringCacheKey>,
         null_string: Option<Cow<'a, str>>,
-        flexible_default: Option<Cow<'a, str>>,
         ignore_null_bytes: bool,
+        lossy: bool,
     ) -> Self {
         let headers_len = headers.len();
         Self {
             reader,
             headers,
             null_string,
-            flexible_default,
-            string_record: csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_len),
+            string_record: if lossy {
+                CsvRecordType::Byte(csv::ByteRecord::with_capacity(
+                    READ_BUFFER_SIZE,
+                    headers_len,
+                ))
+            } else {
+                CsvRecordType::String(csv::StringRecord::with_capacity(
+                    READ_BUFFER_SIZE,
+                    headers_len,
+                ))
+            },
             parser: std::marker::PhantomData,
             ignore_null_bytes,
         }
     }
     /// Attempts to read the next record, returning any errors encountered.
-    fn try_next(&mut self) -> csv::Result<Option<T::Output>> {
-        match self.reader.read_record(&mut self.string_record)? {
-            true => Ok(Some(T::parse(
+    fn try_next(&mut self) -> Result<Option<T::Output>, ReaderError> {
+        let record = match self.string_record {
+            CsvRecordType::String(ref mut record) => self.reader.read_record(record),
+            CsvRecordType::Byte(ref mut record) => self.reader.read_byte_record(record),
+        }?;
+        if record {
+            Ok(Some(T::parse(
                 &self.headers,
                 &self.string_record,
                 self.null_string.clone(),
-                self.flexible_default.clone(),
-                self.ignore_null_bytes
-            ))),
-            false => Ok(None),
+                self.ignore_null_bytes,
+            )))
+        } else {
+            Ok(None)
         }
     }
 }
 impl<'a, T: RecordParser<'a>> Iterator for RecordReader<'a, T> {
-    type Item = T::Output;
+    type Item = Result<T::Output, ReaderError>;
     #[inline]
     fn next(&mut self) -> Option<Self::Item> {
-        // Note: We intentionally swallow errors here to maintain Iterator contract.
-        // Errors can be handled by using try_next() directly if needed.
-        self.try_next().ok().flatten()
+        match self.try_next() {
+            Ok(Some(record)) => Some(Ok(record)),
+            Ok(None) => None,
+            Err(e) => Some(Err(e)),
+        }
     }
     #[inline]

data/ext/osv/src/csv/ruby_reader.rs CHANGED Viewed

@@ -1,4 +1,5 @@
 use magnus::{
+    error::Error as MagnusError,
     value::{Opaque, ReprValue},
     RClass, RString, Ruby, Value,
 };
@@ -6,7 +7,7 @@ use std::fs::File;
 use std::io::{self, BufReader, Read, Seek, SeekFrom, Write};
 use std::sync::OnceLock;
-use super::ForgottenFileHandle;
+use super::{builder::ReaderError, ForgottenFileHandle};
 static STRING_IO_CLASS: OnceLock<Opaque<RClass>> = OnceLock::new();
@@ -25,10 +26,7 @@ impl<T: Read + Seek> SeekableRead for BufReader<T> {}
 impl SeekableRead for std::io::Cursor<Vec<u8>> {}
 impl SeekableRead for ForgottenFileHandle {}
-pub fn build_ruby_reader(
-    ruby: &Ruby,
-    input: Value,
-) -> Result<Box<dyn SeekableRead>, magnus::Error> {
+pub fn build_ruby_reader(ruby: &Ruby, input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
     if RubyReader::is_string_io(ruby, &input) {
         RubyReader::from_string_io(ruby, input)
     } else if RubyReader::is_io_like(&input) {
@@ -88,14 +86,14 @@ impl Seek for RubyReader<RString> {
 }
 impl RubyReader<Value> {
-    fn from_io(input: Value) -> Result<Box<dyn SeekableRead>, magnus::Error> {
+    fn from_io(input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
         if Self::is_io_like(&input) {
             Ok(Box::new(Self::from_io_like(input)))
         } else {
-            Err(magnus::Error::new(
+            Err(MagnusError::new(
                 magnus::exception::type_error(),
                 "Input is not an IO-like object",
-            ))
+            ))?
         }
     }
@@ -112,15 +110,12 @@ impl RubyReader<Value> {
 }
 impl RubyReader<RString> {
-    pub fn from_string_io(
-        ruby: &Ruby,
-        input: Value,
-    ) -> Result<Box<dyn SeekableRead>, magnus::Error> {
+    pub fn from_string_io(ruby: &Ruby, input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
         if !Self::is_string_io(ruby, &input) {
-            return Err(magnus::Error::new(
+            return Err(MagnusError::new(
                 magnus::exception::type_error(),
                 "Input is not a StringIO",
-            ));
+            ))?;
         }
         let string_content = input.funcall::<_, _, RString>("string", ()).unwrap();
@@ -138,11 +133,11 @@ impl RubyReader<RString> {
         input.is_kind_of(ruby.get_inner(*string_io_class))
     }
-    fn from_string_like(input: Value) -> Result<Box<dyn SeekableRead>, magnus::Error> {
-        // Try calling `to_str`, and if that fails, try `to_s`
+    fn from_string_like(input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
         let string_content = input
             .funcall::<_, _, RString>("to_str", ())
             .or_else(|_| input.funcall::<_, _, RString>("to_s", ()))?;
         Ok(Box::new(Self {
             inner: string_content,
             offset: 0,
@@ -154,12 +149,16 @@ impl Read for RubyReader<Value> {
     fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
         let bytes = self
             .inner
-            .funcall::<_, _, RString>("read", (buf.len(),))
+            .funcall::<_, _, Option<RString>>("read", (buf.len(),))
             .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
-        buf.write_all(unsafe { bytes.as_slice() })?;
-        Ok(bytes.len())
+        match bytes {
+            Some(bytes) => {
+                buf.write_all(unsafe { bytes.as_slice() })?;
+                Ok(bytes.len())
+            }
+            None => Ok(0), // EOF
+        }
     }
 }

data/ext/osv/src/reader.rs CHANGED Viewed

@@ -3,7 +3,7 @@ use crate::utils::*;
 use ahash::RandomState;
 use csv::Trim;
 use magnus::value::ReprValue;
-use magnus::{block::Yield, Error, KwArgs, RHash, Ruby, Symbol, Value};
+use magnus::{Error, IntoValue, KwArgs, RHash, Ruby, Symbol, Value};
 use std::collections::HashMap;
 /// Valid result types for CSV parsing
@@ -34,9 +34,9 @@ struct EnumeratorArgs {
     null_string: Option<String>,
     result_type: String,
     flexible: bool,
-    flexible_default: Option<String>,
     trim: Option<String>,
     ignore_null_bytes: bool,
+    lossy: bool,
 }
 /// Parses a CSV file with the given configuration.
@@ -44,10 +44,7 @@ struct EnumeratorArgs {
 /// # Safety
 /// This function uses unsafe code to get the Ruby runtime and leak memory for static references.
 /// This is necessary for Ruby integration but should be used with caution.
-pub fn parse_csv(
-    rb_self: Value,
-    args: &[Value],
-) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
+pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
     //  SAFETY: We're in a Ruby callback, so Ruby runtime is guaranteed to be initialized
     let ruby = unsafe { Ruby::get_unchecked() };
@@ -59,9 +56,9 @@ pub fn parse_csv(
         null_string,
         result_type,
         flexible,
-        flexible_default,
         trim,
         ignore_null_bytes,
+        lossy,
     } = parse_read_csv_args(&ruby, args)?;
     if !ruby.block_given() {
@@ -74,7 +71,6 @@ pub fn parse_csv(
             null_string,
             result_type,
             flexible,
-            flexible_default,
             trim: match trim {
                 Trim::All => Some("all".to_string()),
                 Trim::Headers => Some("headers".to_string()),
@@ -82,7 +78,9 @@ pub fn parse_csv(
                 _ => None,
             },
             ignore_null_bytes,
-        });
+            lossy,
+        })
+        .map(|yield_enum| yield_enum.into_value_with(&ruby));
     }
     let result_type = ResultType::from_str(&result_type).ok_or_else(|| {
@@ -92,46 +90,53 @@ pub fn parse_csv(
         )
     })?;
-    let iter: Box<dyn Iterator<Item = CsvRecord<RandomState>>> = match result_type {
+    match result_type {
         ResultType::Hash => {
             let builder = RecordReaderBuilder::<
                 HashMap<StringCacheKey, Option<CowStr<'static>>, RandomState>,
             >::new(ruby, to_read)
             .has_headers(has_headers)
             .flexible(flexible)
-            .flexible_default(flexible_default)
             .trim(trim)
             .delimiter(delimiter)
             .quote_char(quote_char)
             .null_string(null_string)
             .ignore_null_bytes(ignore_null_bytes)
+            .lossy(lossy)
             .build()?;
-            Box::new(builder.map(CsvRecord::Map))
+            let ruby = unsafe { Ruby::get_unchecked() };
+            for result in builder {
+                let record = result?;
+                let _: Value = ruby.yield_value(CsvRecord::Map(record))?;
+            }
         }
         ResultType::Array => {
             let builder = RecordReaderBuilder::<Vec<Option<CowStr<'static>>>>::new(ruby, to_read)
                 .has_headers(has_headers)
                 .flexible(flexible)
-                .flexible_default(flexible_default)
                 .trim(trim)
                 .delimiter(delimiter)
                 .quote_char(quote_char)
                 .null_string(null_string)
                 .ignore_null_bytes(ignore_null_bytes)
+                .lossy(lossy)
                 .build()?;
-            Box::new(builder.map(CsvRecord::Vec))
+            let ruby = unsafe { Ruby::get_unchecked() };
+            for result in builder {
+                let record = result?;
+                let _: Value = ruby.yield_value(CsvRecord::<ahash::RandomState>::Vec(record))?;
+            }
         }
-    };
+    }
-    Ok(Yield::Iter(iter))
+    let ruby = unsafe { Ruby::get_unchecked() };
+    Ok(ruby.qnil().into_value_with(&ruby))
 }
 /// Creates an enumerator for lazy CSV parsing
-fn create_enumerator(
-    args: EnumeratorArgs,
-) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
+fn create_enumerator(args: EnumeratorArgs) -> Result<magnus::Enumerator, Error> {
     let kwargs = RHash::new();
     kwargs.aset(Symbol::new("has_headers"), args.has_headers)?;
     kwargs.aset(
@@ -145,14 +150,10 @@ fn create_enumerator(
     kwargs.aset(Symbol::new("nil_string"), args.null_string)?;
     kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
     kwargs.aset(Symbol::new("flexible"), args.flexible)?;
-    kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
     kwargs.aset(Symbol::new("trim"), args.trim.map(Symbol::new))?;
     kwargs.aset(Symbol::new("ignore_null_bytes"), args.ignore_null_bytes)?;
-    let enumerator = args
+    kwargs.aset(Symbol::new("lossy"), args.lossy)?;
+    Ok(args
         .rb_self
-        .enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
-    Ok(Yield::Enumerator(enumerator))
+        .enumeratorize("for_each", (args.to_read, KwArgs(kwargs))))
 }

data/ext/osv/src/utils.rs CHANGED Viewed

@@ -34,9 +34,9 @@ pub struct ReadCsvArgs {
     pub null_string: Option<String>,
     pub result_type: String,
     pub flexible: bool,
-    pub flexible_default: Option<String>,
     pub trim: csv::Trim,
     pub ignore_null_bytes: bool,
+    pub lossy: bool,
 }
 /// Parse common arguments for CSV parsing
@@ -54,9 +54,9 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
             Option<Option<String>>,
             Option<Option<Value>>,
             Option<Option<bool>>,
-            Option<Option<Option<String>>>,
             Option<Option<Value>>,
             Option<Option<bool>>,
+            Option<Option<bool>>,
         ),
         (),
     >(
@@ -69,9 +69,9 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
             "nil_string",
             "result_type",
             "flexible",
-            "flexible_default",
             "trim",
             "ignore_null_bytes",
+            "lossy",
         ],
     )?;
@@ -134,11 +134,9 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
     let flexible = kwargs.optional.5.flatten().unwrap_or_default();
-    let flexible_default = kwargs.optional.6.flatten().unwrap_or_default();
     let trim = match kwargs
         .optional
-        .7
+        .6
         .flatten()
         .map(|value| parse_string_or_symbol(ruby, value))
     {
@@ -166,7 +164,9 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
         None => csv::Trim::None,
     };
-    let ignore_null_bytes = kwargs.optional.8.flatten().unwrap_or_default();
+    let ignore_null_bytes = kwargs.optional.7.flatten().unwrap_or_default();
+    let lossy = kwargs.optional.8.flatten().unwrap_or_default();
     Ok(ReadCsvArgs {
         to_read,
@@ -176,8 +176,8 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
         null_string,
         result_type,
         flexible,
-        flexible_default,
         trim,
         ignore_null_bytes,
+        lossy,
     })
 }

data/lib/osv/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module OSV
-  VERSION = "0.3.21"
+  VERSION = "0.4.0"
 end

data/lib/osv.rbi CHANGED Viewed

@@ -17,14 +17,12 @@ module OSV
   #                    ("hash" or "array" or :hash or :array)
   #   - `flexible`: Boolean specifying if the parser should be flexible
   #                 (default: false)
-  #   - `flexible_default`: String specifying the default value for missing fields.
-  #                         Implicitly enables flexible mode if set.
-  #                         (default: `nil`)
   #   - `trim`: String specifying the trim mode
   #             ("all" or "headers" or "fields" or :all or :headers or :fields)
   #             (default: `nil`)
   #   - `ignore_null_bytes`: Boolean specifying if null bytes should be ignored
   #                         (default: false)
+  #   - `lossy`: Boolean specifying if invalid UTF-8 characters should be replaced with a replacement character
   sig do
     params(
       input: T.any(String, StringIO, IO),
@@ -35,7 +33,6 @@ module OSV
       buffer_size: T.nilable(Integer),
       result_type: T.nilable(T.any(String, Symbol)),
       flexible: T.nilable(T::Boolean),
-      flexible_default: T.nilable(String),
       ignore_null_bytes: T.nilable(T::Boolean),
       trim: T.nilable(T.any(String, Symbol)),
       blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.nilable(String)], T::Array[T.nilable(String)])).void)
@@ -50,9 +47,9 @@ module OSV
     buffer_size: nil,
     result_type: nil,
     flexible: nil,
-    flexible_default: nil,
     ignore_null_bytes: nil,
     trim: nil,
+    lossy: nil,
     &blk
   )
   end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: osv
 version: !ruby/object:Gem::Version
-  version: 0.3.21
+  version: 0.4.0
 platform: ruby
 authors:
 - Nathan Jaremko
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2025-01-24 00:00:00.000000000 Z
+date: 2025-01-30 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rb_sys