osv 0.3.21 → 0.3.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/osv/src/csv/builder.rs +20 -5
- data/ext/osv/src/csv/parser.rs +4 -6
- data/ext/osv/src/csv/record_reader.rs +14 -10
- data/ext/osv/src/csv/ruby_reader.rs +19 -20
- data/ext/osv/src/reader.rs +21 -19
- data/lib/osv/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 26bda7b8aed144013156dea4f4f68c322b0a2042d6478c225edde0c44f54452f
|
4
|
+
data.tar.gz: cd63b6b71c158d8a09196a4fff496c5c7e7a9ac2c9a64724bbf31c56ff9ee0c7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 947a7cc0d9f644977d157d0424893daf10c62efee5b4d544f81731ca7f04056cc75bab464560f4aea0b793b4b4e6e63a964fed8adace0c232cd388182a709a3a
|
7
|
+
data.tar.gz: 78536bdbba174b441792e39dc3b1c2ca08d85bafe4dce7763156160abcc611bb38c0743e6cadfd5d3466410b42fa0e598dc32e4865393109b8c88ec9673bf44b
|
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -34,6 +34,10 @@ pub enum ReaderError {
|
|
34
34
|
InvalidFlexibleDefault(String),
|
35
35
|
#[error("Invalid null string value: {0}")]
|
36
36
|
InvalidNullString(String),
|
37
|
+
#[error("Failed to parse CSV record: {0}")]
|
38
|
+
CsvParse(#[from] csv::Error),
|
39
|
+
#[error("Invalid UTF-8: {0}")]
|
40
|
+
InvalidUtf8(String),
|
37
41
|
#[error("Ruby error: {0}")]
|
38
42
|
Ruby(String),
|
39
43
|
}
|
@@ -46,10 +50,20 @@ impl From<MagnusError> for ReaderError {
|
|
46
50
|
|
47
51
|
impl From<ReaderError> for MagnusError {
|
48
52
|
fn from(err: ReaderError) -> Self {
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
+
let ruby = Ruby::get().unwrap();
|
54
|
+
match err {
|
55
|
+
ReaderError::CsvParse(csv_err) => {
|
56
|
+
if csv_err.to_string().contains("invalid utf-8") {
|
57
|
+
MagnusError::new(ruby.exception_encoding_error(), csv_err.to_string())
|
58
|
+
} else {
|
59
|
+
MagnusError::new(ruby.exception_runtime_error(), csv_err.to_string())
|
60
|
+
}
|
61
|
+
}
|
62
|
+
ReaderError::InvalidUtf8(utf8_err) => {
|
63
|
+
MagnusError::new(ruby.exception_encoding_error(), utf8_err.to_string())
|
64
|
+
}
|
65
|
+
_ => MagnusError::new(ruby.exception_runtime_error(), err.to_string()),
|
66
|
+
}
|
53
67
|
}
|
54
68
|
}
|
55
69
|
|
@@ -199,7 +213,8 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
199
213
|
.trim(self.trim)
|
200
214
|
.from_reader(reader);
|
201
215
|
|
202
|
-
let mut headers =
|
216
|
+
let mut headers =
|
217
|
+
RecordReader::<T>::get_headers(&self.ruby, &mut reader, self.has_headers)?;
|
203
218
|
if self.ignore_null_bytes {
|
204
219
|
headers = headers.iter().map(|h| h.replace("\0", "")).collect();
|
205
220
|
}
|
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -42,10 +42,9 @@ impl<'a, S: BuildHasher + Default> RecordParser<'a>
|
|
42
42
|
None
|
43
43
|
} else if field.is_empty() {
|
44
44
|
Some(CowStr(shared_empty.clone()))
|
45
|
-
} else if ignore_null_bytes
|
45
|
+
} else if ignore_null_bytes {
|
46
46
|
Some(CowStr(Cow::Owned(field.replace("\0", ""))))
|
47
|
-
}
|
48
|
-
else {
|
47
|
+
} else {
|
49
48
|
Some(CowStr(Cow::Owned(field.to_string())))
|
50
49
|
}
|
51
50
|
},
|
@@ -78,10 +77,9 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
|
|
78
77
|
None
|
79
78
|
} else if field.is_empty() {
|
80
79
|
Some(CowStr(shared_empty.clone()))
|
81
|
-
} else if ignore_null_bytes
|
80
|
+
} else if ignore_null_bytes {
|
82
81
|
Some(CowStr(Cow::Owned(field.replace("\0", ""))))
|
83
|
-
}
|
84
|
-
else {
|
82
|
+
} else {
|
85
83
|
Some(CowStr(Cow::Owned(field.to_string())))
|
86
84
|
};
|
87
85
|
vec.push(value);
|
@@ -1,3 +1,4 @@
|
|
1
|
+
use super::builder::ReaderError;
|
1
2
|
use super::header_cache::StringCacheKey;
|
2
3
|
use super::parser::RecordParser;
|
3
4
|
use super::{header_cache::StringCache, ruby_reader::SeekableRead};
|
@@ -72,28 +73,31 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
|
|
72
73
|
}
|
73
74
|
|
74
75
|
/// Attempts to read the next record, returning any errors encountered.
|
75
|
-
fn try_next(&mut self) ->
|
76
|
-
|
77
|
-
|
76
|
+
fn try_next(&mut self) -> Result<Option<T::Output>, ReaderError> {
|
77
|
+
if self.reader.read_record(&mut self.string_record)? {
|
78
|
+
Ok(Some(T::parse(
|
78
79
|
&self.headers,
|
79
80
|
&self.string_record,
|
80
81
|
self.null_string.clone(),
|
81
82
|
self.flexible_default.clone(),
|
82
|
-
self.ignore_null_bytes
|
83
|
-
)))
|
84
|
-
|
83
|
+
self.ignore_null_bytes,
|
84
|
+
)))
|
85
|
+
} else {
|
86
|
+
Ok(None)
|
85
87
|
}
|
86
88
|
}
|
87
89
|
}
|
88
90
|
|
89
91
|
impl<'a, T: RecordParser<'a>> Iterator for RecordReader<'a, T> {
|
90
|
-
type Item = T::Output
|
92
|
+
type Item = Result<T::Output, ReaderError>;
|
91
93
|
|
92
94
|
#[inline]
|
93
95
|
fn next(&mut self) -> Option<Self::Item> {
|
94
|
-
|
95
|
-
|
96
|
-
|
96
|
+
match self.try_next() {
|
97
|
+
Ok(Some(record)) => Some(Ok(record)),
|
98
|
+
Ok(None) => None,
|
99
|
+
Err(e) => Some(Err(e)),
|
100
|
+
}
|
97
101
|
}
|
98
102
|
|
99
103
|
#[inline]
|
@@ -1,4 +1,5 @@
|
|
1
1
|
use magnus::{
|
2
|
+
error::Error as MagnusError,
|
2
3
|
value::{Opaque, ReprValue},
|
3
4
|
RClass, RString, Ruby, Value,
|
4
5
|
};
|
@@ -6,7 +7,7 @@ use std::fs::File;
|
|
6
7
|
use std::io::{self, BufReader, Read, Seek, SeekFrom, Write};
|
7
8
|
use std::sync::OnceLock;
|
8
9
|
|
9
|
-
use super::ForgottenFileHandle;
|
10
|
+
use super::{builder::ReaderError, ForgottenFileHandle};
|
10
11
|
|
11
12
|
static STRING_IO_CLASS: OnceLock<Opaque<RClass>> = OnceLock::new();
|
12
13
|
|
@@ -25,10 +26,7 @@ impl<T: Read + Seek> SeekableRead for BufReader<T> {}
|
|
25
26
|
impl SeekableRead for std::io::Cursor<Vec<u8>> {}
|
26
27
|
impl SeekableRead for ForgottenFileHandle {}
|
27
28
|
|
28
|
-
pub fn build_ruby_reader(
|
29
|
-
ruby: &Ruby,
|
30
|
-
input: Value,
|
31
|
-
) -> Result<Box<dyn SeekableRead>, magnus::Error> {
|
29
|
+
pub fn build_ruby_reader(ruby: &Ruby, input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
|
32
30
|
if RubyReader::is_string_io(ruby, &input) {
|
33
31
|
RubyReader::from_string_io(ruby, input)
|
34
32
|
} else if RubyReader::is_io_like(&input) {
|
@@ -88,14 +86,14 @@ impl Seek for RubyReader<RString> {
|
|
88
86
|
}
|
89
87
|
|
90
88
|
impl RubyReader<Value> {
|
91
|
-
fn from_io(input: Value) -> Result<Box<dyn SeekableRead>,
|
89
|
+
fn from_io(input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
|
92
90
|
if Self::is_io_like(&input) {
|
93
91
|
Ok(Box::new(Self::from_io_like(input)))
|
94
92
|
} else {
|
95
|
-
Err(
|
93
|
+
Err(MagnusError::new(
|
96
94
|
magnus::exception::type_error(),
|
97
95
|
"Input is not an IO-like object",
|
98
|
-
))
|
96
|
+
))?
|
99
97
|
}
|
100
98
|
}
|
101
99
|
|
@@ -112,15 +110,12 @@ impl RubyReader<Value> {
|
|
112
110
|
}
|
113
111
|
|
114
112
|
impl RubyReader<RString> {
|
115
|
-
pub fn from_string_io(
|
116
|
-
ruby: &Ruby,
|
117
|
-
input: Value,
|
118
|
-
) -> Result<Box<dyn SeekableRead>, magnus::Error> {
|
113
|
+
pub fn from_string_io(ruby: &Ruby, input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
|
119
114
|
if !Self::is_string_io(ruby, &input) {
|
120
|
-
return Err(
|
115
|
+
return Err(MagnusError::new(
|
121
116
|
magnus::exception::type_error(),
|
122
117
|
"Input is not a StringIO",
|
123
|
-
))
|
118
|
+
))?;
|
124
119
|
}
|
125
120
|
|
126
121
|
let string_content = input.funcall::<_, _, RString>("string", ()).unwrap();
|
@@ -138,11 +133,11 @@ impl RubyReader<RString> {
|
|
138
133
|
input.is_kind_of(ruby.get_inner(*string_io_class))
|
139
134
|
}
|
140
135
|
|
141
|
-
fn from_string_like(input: Value) -> Result<Box<dyn SeekableRead>,
|
142
|
-
// Try calling `to_str`, and if that fails, try `to_s`
|
136
|
+
fn from_string_like(input: Value) -> Result<Box<dyn SeekableRead>, ReaderError> {
|
143
137
|
let string_content = input
|
144
138
|
.funcall::<_, _, RString>("to_str", ())
|
145
139
|
.or_else(|_| input.funcall::<_, _, RString>("to_s", ()))?;
|
140
|
+
|
146
141
|
Ok(Box::new(Self {
|
147
142
|
inner: string_content,
|
148
143
|
offset: 0,
|
@@ -154,12 +149,16 @@ impl Read for RubyReader<Value> {
|
|
154
149
|
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
|
155
150
|
let bytes = self
|
156
151
|
.inner
|
157
|
-
.funcall::<_, _, RString
|
152
|
+
.funcall::<_, _, Option<RString>>("read", (buf.len(),))
|
158
153
|
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
159
154
|
|
160
|
-
|
161
|
-
|
162
|
-
|
155
|
+
match bytes {
|
156
|
+
Some(bytes) => {
|
157
|
+
buf.write_all(unsafe { bytes.as_slice() })?;
|
158
|
+
Ok(bytes.len())
|
159
|
+
}
|
160
|
+
None => Ok(0), // EOF
|
161
|
+
}
|
163
162
|
}
|
164
163
|
}
|
165
164
|
|
data/ext/osv/src/reader.rs
CHANGED
@@ -3,7 +3,7 @@ use crate::utils::*;
|
|
3
3
|
use ahash::RandomState;
|
4
4
|
use csv::Trim;
|
5
5
|
use magnus::value::ReprValue;
|
6
|
-
use magnus::{
|
6
|
+
use magnus::{Error, IntoValue, KwArgs, RHash, Ruby, Symbol, Value};
|
7
7
|
use std::collections::HashMap;
|
8
8
|
|
9
9
|
/// Valid result types for CSV parsing
|
@@ -44,10 +44,7 @@ struct EnumeratorArgs {
|
|
44
44
|
/// # Safety
|
45
45
|
/// This function uses unsafe code to get the Ruby runtime and leak memory for static references.
|
46
46
|
/// This is necessary for Ruby integration but should be used with caution.
|
47
|
-
pub fn parse_csv(
|
48
|
-
rb_self: Value,
|
49
|
-
args: &[Value],
|
50
|
-
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
|
47
|
+
pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
|
51
48
|
// SAFETY: We're in a Ruby callback, so Ruby runtime is guaranteed to be initialized
|
52
49
|
let ruby = unsafe { Ruby::get_unchecked() };
|
53
50
|
|
@@ -82,7 +79,8 @@ pub fn parse_csv(
|
|
82
79
|
_ => None,
|
83
80
|
},
|
84
81
|
ignore_null_bytes,
|
85
|
-
})
|
82
|
+
})
|
83
|
+
.map(|yield_enum| yield_enum.into_value_with(&ruby));
|
86
84
|
}
|
87
85
|
|
88
86
|
let result_type = ResultType::from_str(&result_type).ok_or_else(|| {
|
@@ -92,7 +90,7 @@ pub fn parse_csv(
|
|
92
90
|
)
|
93
91
|
})?;
|
94
92
|
|
95
|
-
|
93
|
+
match result_type {
|
96
94
|
ResultType::Hash => {
|
97
95
|
let builder = RecordReaderBuilder::<
|
98
96
|
HashMap<StringCacheKey, Option<CowStr<'static>>, RandomState>,
|
@@ -107,7 +105,11 @@ pub fn parse_csv(
|
|
107
105
|
.ignore_null_bytes(ignore_null_bytes)
|
108
106
|
.build()?;
|
109
107
|
|
110
|
-
|
108
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
109
|
+
for result in builder {
|
110
|
+
let record = result?;
|
111
|
+
let _: Value = ruby.yield_value(CsvRecord::Map(record))?;
|
112
|
+
}
|
111
113
|
}
|
112
114
|
ResultType::Array => {
|
113
115
|
let builder = RecordReaderBuilder::<Vec<Option<CowStr<'static>>>>::new(ruby, to_read)
|
@@ -121,17 +123,20 @@ pub fn parse_csv(
|
|
121
123
|
.ignore_null_bytes(ignore_null_bytes)
|
122
124
|
.build()?;
|
123
125
|
|
124
|
-
|
126
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
127
|
+
for result in builder {
|
128
|
+
let record = result?;
|
129
|
+
let _: Value = ruby.yield_value(CsvRecord::<ahash::RandomState>::Vec(record))?;
|
130
|
+
}
|
125
131
|
}
|
126
|
-
}
|
132
|
+
}
|
127
133
|
|
128
|
-
|
134
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
135
|
+
Ok(ruby.qnil().into_value_with(&ruby))
|
129
136
|
}
|
130
137
|
|
131
138
|
/// Creates an enumerator for lazy CSV parsing
|
132
|
-
fn create_enumerator(
|
133
|
-
args: EnumeratorArgs,
|
134
|
-
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
|
139
|
+
fn create_enumerator(args: EnumeratorArgs) -> Result<magnus::Enumerator, Error> {
|
135
140
|
let kwargs = RHash::new();
|
136
141
|
kwargs.aset(Symbol::new("has_headers"), args.has_headers)?;
|
137
142
|
kwargs.aset(
|
@@ -147,12 +152,9 @@ fn create_enumerator(
|
|
147
152
|
kwargs.aset(Symbol::new("flexible"), args.flexible)?;
|
148
153
|
kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
|
149
154
|
kwargs.aset(Symbol::new("trim"), args.trim.map(Symbol::new))?;
|
150
|
-
|
151
155
|
kwargs.aset(Symbol::new("ignore_null_bytes"), args.ignore_null_bytes)?;
|
152
156
|
|
153
|
-
|
154
|
-
let enumerator = args
|
157
|
+
Ok(args
|
155
158
|
.rb_self
|
156
|
-
.enumeratorize("for_each", (args.to_read, KwArgs(kwargs)))
|
157
|
-
Ok(Yield::Enumerator(enumerator))
|
159
|
+
.enumeratorize("for_each", (args.to_read, KwArgs(kwargs))))
|
158
160
|
}
|
data/lib/osv/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.22
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|