osv 0.3.20 → 0.3.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/osv/src/csv/builder.rs +20 -5
- data/ext/osv/src/csv/parser.rs +4 -6
- data/ext/osv/src/csv/record_reader.rs +14 -10
- data/ext/osv/src/csv/ruby_reader.rs +19 -20
- data/ext/osv/src/reader.rs +24 -17
- data/lib/osv/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 26bda7b8aed144013156dea4f4f68c322b0a2042d6478c225edde0c44f54452f
|
4
|
+
data.tar.gz: cd63b6b71c158d8a09196a4fff496c5c7e7a9ac2c9a64724bbf31c56ff9ee0c7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 947a7cc0d9f644977d157d0424893daf10c62efee5b4d544f81731ca7f04056cc75bab464560f4aea0b793b4b4e6e63a964fed8adace0c232cd388182a709a3a
|
7
|
+
data.tar.gz: 78536bdbba174b441792e39dc3b1c2ca08d85bafe4dce7763156160abcc611bb38c0743e6cadfd5d3466410b42fa0e598dc32e4865393109b8c88ec9673bf44b
|
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -34,6 +34,10 @@ pub enum ReaderError {
|
|
34
34
|
InvalidFlexibleDefault(String),
|
35
35
|
#[error("Invalid null string value: {0}")]
|
36
36
|
InvalidNullString(String),
|
37
|
+
#[error("Failed to parse CSV record: {0}")]
|
38
|
+
CsvParse(#[from] csv::Error),
|
39
|
+
#[error("Invalid UTF-8: {0}")]
|
40
|
+
InvalidUtf8(String),
|
37
41
|
#[error("Ruby error: {0}")]
|
38
42
|
Ruby(String),
|
39
43
|
}
|
@@ -46,10 +50,20 @@ impl From<MagnusError> for ReaderError {
|
|
46
50
|
|
47
51
|
impl From<ReaderError> for MagnusError {
|
48
52
|
fn from(err: ReaderError) -> Self {
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
+
let ruby = Ruby::get().unwrap();
|
54
|
+
match err {
|
55
|
+
ReaderError::CsvParse(csv_err) => {
|
56
|
+
if csv_err.to_string().contains("invalid utf-8") {
|
57
|
+
MagnusError::new(ruby.exception_encoding_error(), csv_err.to_string())
|
58
|
+
} else {
|
59
|
+
MagnusError::new(ruby.exception_runtime_error(), csv_err.to_string())
|
60
|
+
}
|
61
|
+
}
|
62
|
+
ReaderError::InvalidUtf8(utf8_err) => {
|
63
|
+
MagnusError::new(ruby.exception_encoding_error(), utf8_err.to_string())
|
64
|
+
}
|
65
|
+
_ => MagnusError::new(ruby.exception_runtime_error(), err.to_string()),
|
66
|
+
}
|
53
67
|
}
|
54
68
|
}
|
55
69
|
|
@@ -199,7 +213,8 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
199
213
|
.trim(self.trim)
|
200
214
|
.from_reader(reader);
|
201
215
|
|
202
|
-
let mut headers =
|
216
|
+
let mut headers =
|
217
|
+
RecordReader::<T>::get_headers(&self.ruby, &mut reader, self.has_headers)?;
|
203
218
|
if self.ignore_null_bytes {
|
204
219
|
headers = headers.iter().map(|h| h.replace("\0", "")).collect();
|
205
220
|
}
|
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -42,10 +42,9 @@ impl<'a, S: BuildHasher + Default> RecordParser<'a>
|
|
42
42
|
None
|
43
43
|
} else if field.is_empty() {
|
44
44
|
Some(CowStr(shared_empty.clone()))
|
45
|
-
} else if ignore_null_bytes
|
45
|
+
} else if ignore_null_bytes {
|
46
46
|
Some(CowStr(Cow::Owned(field.replace("\0", ""))))
|
47
|
-
}
|
48
|
-
else {
|
47
|
+
} else {
|
49
48
|
Some(CowStr(Cow::Owned(field.to_string())))
|
50
49
|
}
|
51
50
|
},
|
@@ -78,10 +77,9 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
|
|
78
77
|
None
|
79
78
|
} else if field.is_empty() {
|
80
79
|
Some(CowStr(shared_empty.clone()))
|
81
|
-
} else if ignore_null_bytes
|
80
|
+
} else if ignore_null_bytes {
|
82
81
|
Some(CowStr(Cow::Owned(field.replace("\0", ""))))
|
83
|
-
}
|
84
|
-
else {
|
82
|
+
} else {
|
85
83
|
Some(CowStr(Cow::Owned(field.to_string())))
|
86
84
|
};
|
87
85
|
vec.push(value);
|
@@ -1,3 +1,4 @@
|
|
1
|
+
use super::builder::ReaderError;
|
1
2
|
use super::header_cache::StringCacheKey;
|
2
3
|
use super::parser::RecordParser;
|
3
4
|
use super::{header_cache::StringCache, ruby_reader::SeekableRead};
|
@@ -72,28 +73,31 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
|
|
72
73
|
}
|
73
74
|
|
74
75
|
/// Attempts to read the next record, returning any errors encountered.
|
75
|
-
fn try_next(&mut self) ->
|
76
|
-
|
77
|
-
|
76
|
+
fn try_next(&mut self) -> Result<Option<T::Output>, ReaderError> {
|
77
|
+
if self.reader.read_record(&mut self.string_record)? {
|
78
|
+
Ok(Some(T::parse(
|
78
79
|
&self.headers,
|
79
80
|
&self.string_record,
|
80
81
|
self.null_string.clone(),
|
81
82
|
self.flexible_default.clone(),
|
82
|
-
self.ignore_null_bytes
|
83
|
-
)))
|
84
|
-
|
83
|
+
self.ignore_null_bytes,
|
84
|
+
)))
|
85
|
+
} else {
|
86
|
+
Ok(None)
|
85
87
|
}
|
86
88
|
}
|
87
89
|
}
|
88
90
|
|
89
91
|
impl<'a, T: RecordParser<'a>> Iterator for RecordReader<'a, T> {
|
90
|
-
type Item = T::Output
|
92
|
+
type Item = Result<T::Output, ReaderError>;
|
91
93
|
|
92
94
|
#[inline]
|
93
95
|
fn next(&mut self) -> Option<Self::Item> {
|
94
|
-
|
95
|
-
|
96
|
-
|
96
|
+
match self.try_next() {
|
97
|
+
Ok(Some(record)) => Some(Ok(record)),
|
98
|
+
Ok(None) => None,
|
99
|
+
Err(e) => Some(Err(e)),
|
100
|
+
}
|
97
101
|
}
|
98
102
|
|
99
103
|
#[inline]
|
@@ -1,4 +1,5 @@
|
|
1
1
|
use magnus::{
|
2
|
+
error::Error as MagnusError,
|
2
3
|
value::{Opaque, ReprValue},
|
3
4
|
RClass, RString, Ruby, Value,
|
4
5
|
};
|
@@ -6,7 +7,7 @@ use std::fs::File;
|
|
6
7
|
use std::io::{self, BufReader, Read, Seek, SeekFrom, Write};
|
7
8
|
use std::sync::OnceLock;
|
8
9
|
|
9
|
-
use super::ForgottenFileHandle;
|
10
|
+
use super::{builder::ReaderError, ForgottenFileHandle};
|
10
11
|
|
11
12
|
static STRING_IO_CLASS: OnceLock<Opaque<RClass>> = OnceLock::new();
|
12
13
|
|
@@ -25,10 +26,7 @@ impl<T: Read + Seek> SeekableRead for BufReader<T> {}
|
|
25
26
|
impl SeekableRead for std::io::Cursor<Vec<u8>> {}
|
26
27
|
impl SeekableRead for ForgottenFileHandle {}
|
27
28
|
|
28
|
-
pub fn build_ruby_reader(
|
29
|
-
ruby: &Ruby,
|
30
|
-
input: Value,
|
31
|
-
) -> Result<Box<dyn SeekableRead>, magnus::Error> {
|
29
|
+
pub fn build_ruby_reader(ruby: &Ruby, input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
|
32
30
|
if RubyReader::is_string_io(ruby, &input) {
|
33
31
|
RubyReader::from_string_io(ruby, input)
|
34
32
|
} else if RubyReader::is_io_like(&input) {
|
@@ -88,14 +86,14 @@ impl Seek for RubyReader<RString> {
|
|
88
86
|
}
|
89
87
|
|
90
88
|
impl RubyReader<Value> {
|
91
|
-
fn from_io(input: Value) -> Result<Box<dyn SeekableRead>,
|
89
|
+
fn from_io(input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
|
92
90
|
if Self::is_io_like(&input) {
|
93
91
|
Ok(Box::new(Self::from_io_like(input)))
|
94
92
|
} else {
|
95
|
-
Err(
|
93
|
+
Err(MagnusError::new(
|
96
94
|
magnus::exception::type_error(),
|
97
95
|
"Input is not an IO-like object",
|
98
|
-
))
|
96
|
+
))?
|
99
97
|
}
|
100
98
|
}
|
101
99
|
|
@@ -112,15 +110,12 @@ impl RubyReader<Value> {
|
|
112
110
|
}
|
113
111
|
|
114
112
|
impl RubyReader<RString> {
|
115
|
-
pub fn from_string_io(
|
116
|
-
ruby: &Ruby,
|
117
|
-
input: Value,
|
118
|
-
) -> Result<Box<dyn SeekableRead>, magnus::Error> {
|
113
|
+
pub fn from_string_io(ruby: &Ruby, input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
|
119
114
|
if !Self::is_string_io(ruby, &input) {
|
120
|
-
return Err(
|
115
|
+
return Err(MagnusError::new(
|
121
116
|
magnus::exception::type_error(),
|
122
117
|
"Input is not a StringIO",
|
123
|
-
))
|
118
|
+
))?;
|
124
119
|
}
|
125
120
|
|
126
121
|
let string_content = input.funcall::<_, _, RString>("string", ()).unwrap();
|
@@ -138,11 +133,11 @@ impl RubyReader<RString> {
|
|
138
133
|
input.is_kind_of(ruby.get_inner(*string_io_class))
|
139
134
|
}
|
140
135
|
|
141
|
-
fn from_string_like(input: Value) -> Result<Box<dyn SeekableRead>,
|
142
|
-
// Try calling `to_str`, and if that fails, try `to_s`
|
136
|
+
fn from_string_like(input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
|
143
137
|
let string_content = input
|
144
138
|
.funcall::<_, _, RString>("to_str", ())
|
145
139
|
.or_else(|_| input.funcall::<_, _, RString>("to_s", ()))?;
|
140
|
+
|
146
141
|
Ok(Box::new(Self {
|
147
142
|
inner: string_content,
|
148
143
|
offset: 0,
|
@@ -154,12 +149,16 @@ impl Read for RubyReader<Value> {
|
|
154
149
|
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
|
155
150
|
let bytes = self
|
156
151
|
.inner
|
157
|
-
.funcall::<_, _, RString
|
152
|
+
.funcall::<_, _, Option<RString>>("read", (buf.len(),))
|
158
153
|
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
159
154
|
|
160
|
-
|
161
|
-
|
162
|
-
|
155
|
+
match bytes {
|
156
|
+
Some(bytes) => {
|
157
|
+
buf.write_all(unsafe { bytes.as_slice() })?;
|
158
|
+
Ok(bytes.len())
|
159
|
+
}
|
160
|
+
None => Ok(0), // EOF
|
161
|
+
}
|
163
162
|
}
|
164
163
|
}
|
165
164
|
|
data/ext/osv/src/reader.rs
CHANGED
@@ -3,7 +3,7 @@ use crate::utils::*;
|
|
3
3
|
use ahash::RandomState;
|
4
4
|
use csv::Trim;
|
5
5
|
use magnus::value::ReprValue;
|
6
|
-
use magnus::{
|
6
|
+
use magnus::{Error, IntoValue, KwArgs, RHash, Ruby, Symbol, Value};
|
7
7
|
use std::collections::HashMap;
|
8
8
|
|
9
9
|
/// Valid result types for CSV parsing
|
@@ -36,6 +36,7 @@ struct EnumeratorArgs {
|
|
36
36
|
flexible: bool,
|
37
37
|
flexible_default: Option<String>,
|
38
38
|
trim: Option<String>,
|
39
|
+
ignore_null_bytes: bool,
|
39
40
|
}
|
40
41
|
|
41
42
|
/// Parses a CSV file with the given configuration.
|
@@ -43,10 +44,7 @@ struct EnumeratorArgs {
|
|
43
44
|
/// # Safety
|
44
45
|
/// This function uses unsafe code to get the Ruby runtime and leak memory for static references.
|
45
46
|
/// This is necessary for Ruby integration but should be used with caution.
|
46
|
-
pub fn parse_csv(
|
47
|
-
rb_self: Value,
|
48
|
-
args: &[Value],
|
49
|
-
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
|
47
|
+
pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
|
50
48
|
// SAFETY: We're in a Ruby callback, so Ruby runtime is guaranteed to be initialized
|
51
49
|
let ruby = unsafe { Ruby::get_unchecked() };
|
52
50
|
|
@@ -80,7 +78,9 @@ pub fn parse_csv(
|
|
80
78
|
Trim::Fields => Some("fields".to_string()),
|
81
79
|
_ => None,
|
82
80
|
},
|
83
|
-
|
81
|
+
ignore_null_bytes,
|
82
|
+
})
|
83
|
+
.map(|yield_enum| yield_enum.into_value_with(&ruby));
|
84
84
|
}
|
85
85
|
|
86
86
|
let result_type = ResultType::from_str(&result_type).ok_or_else(|| {
|
@@ -90,7 +90,7 @@ pub fn parse_csv(
|
|
90
90
|
)
|
91
91
|
})?;
|
92
92
|
|
93
|
-
|
93
|
+
match result_type {
|
94
94
|
ResultType::Hash => {
|
95
95
|
let builder = RecordReaderBuilder::<
|
96
96
|
HashMap<StringCacheKey, Option<CowStr<'static>>, RandomState>,
|
@@ -105,7 +105,11 @@ pub fn parse_csv(
|
|
105
105
|
.ignore_null_bytes(ignore_null_bytes)
|
106
106
|
.build()?;
|
107
107
|
|
108
|
-
|
108
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
109
|
+
for result in builder {
|
110
|
+
let record = result?;
|
111
|
+
let _: Value = ruby.yield_value(CsvRecord::Map(record))?;
|
112
|
+
}
|
109
113
|
}
|
110
114
|
ResultType::Array => {
|
111
115
|
let builder = RecordReaderBuilder::<Vec<Option<CowStr<'static>>>>::new(ruby, to_read)
|
@@ -119,17 +123,20 @@ pub fn parse_csv(
|
|
119
123
|
.ignore_null_bytes(ignore_null_bytes)
|
120
124
|
.build()?;
|
121
125
|
|
122
|
-
|
126
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
127
|
+
for result in builder {
|
128
|
+
let record = result?;
|
129
|
+
let _: Value = ruby.yield_value(CsvRecord::<ahash::RandomState>::Vec(record))?;
|
130
|
+
}
|
123
131
|
}
|
124
|
-
}
|
132
|
+
}
|
125
133
|
|
126
|
-
|
134
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
135
|
+
Ok(ruby.qnil().into_value_with(&ruby))
|
127
136
|
}
|
128
137
|
|
129
138
|
/// Creates an enumerator for lazy CSV parsing
|
130
|
-
fn create_enumerator(
|
131
|
-
args: EnumeratorArgs,
|
132
|
-
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
|
139
|
+
fn create_enumerator(args: EnumeratorArgs) -> Result<magnus::Enumerator, Error> {
|
133
140
|
let kwargs = RHash::new();
|
134
141
|
kwargs.aset(Symbol::new("has_headers"), args.has_headers)?;
|
135
142
|
kwargs.aset(
|
@@ -145,9 +152,9 @@ fn create_enumerator(
|
|
145
152
|
kwargs.aset(Symbol::new("flexible"), args.flexible)?;
|
146
153
|
kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
|
147
154
|
kwargs.aset(Symbol::new("trim"), args.trim.map(Symbol::new))?;
|
155
|
+
kwargs.aset(Symbol::new("ignore_null_bytes"), args.ignore_null_bytes)?;
|
148
156
|
|
149
|
-
|
157
|
+
Ok(args
|
150
158
|
.rb_self
|
151
|
-
.enumeratorize("for_each", (args.to_read, KwArgs(kwargs)))
|
152
|
-
Ok(Yield::Enumerator(enumerator))
|
159
|
+
.enumeratorize("for_each", (args.to_read, KwArgs(kwargs))))
|
153
160
|
}
|
data/lib/osv/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.22
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|