osv 0.3.15 → 0.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +11 -1
- data/README.md +27 -27
- data/ext/osv/Cargo.toml +1 -0
- data/ext/osv/src/csv/builder.rs +92 -85
- data/ext/osv/src/csv/header_cache.rs +105 -26
- data/ext/osv/src/csv/mod.rs +2 -2
- data/ext/osv/src/csv/parser.rs +22 -85
- data/ext/osv/src/csv/record.rs +25 -8
- data/ext/osv/src/csv/record_reader.rs +53 -118
- data/ext/osv/src/csv/ruby_integration.rs +10 -21
- data/ext/osv/src/csv/ruby_reader.rs +9 -4
- data/ext/osv/src/reader.rs +64 -46
- data/ext/osv/src/utils.rs +4 -12
- data/lib/osv/version.rb +1 -1
- metadata +2 -2
data/ext/osv/src/reader.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use crate::csv::{
|
1
|
+
use crate::csv::{CowStr, CsvRecord, RecordReaderBuilder, StringCacheKey};
|
2
2
|
use crate::utils::*;
|
3
3
|
use ahash::RandomState;
|
4
4
|
use csv::Trim;
|
@@ -6,12 +6,49 @@ use magnus::value::ReprValue;
|
|
6
6
|
use magnus::{block::Yield, Error, KwArgs, RHash, Ruby, Symbol, Value};
|
7
7
|
use std::collections::HashMap;
|
8
8
|
|
9
|
+
/// Valid result types for CSV parsing
|
10
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
11
|
+
enum ResultType {
|
12
|
+
Hash,
|
13
|
+
Array,
|
14
|
+
}
|
15
|
+
|
16
|
+
impl ResultType {
|
17
|
+
fn from_str(s: &str) -> Option<Self> {
|
18
|
+
match s {
|
19
|
+
"hash" => Some(Self::Hash),
|
20
|
+
"array" => Some(Self::Array),
|
21
|
+
_ => None,
|
22
|
+
}
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
/// Arguments for creating an enumerator
|
27
|
+
#[derive(Debug)]
|
28
|
+
struct EnumeratorArgs {
|
29
|
+
rb_self: Value,
|
30
|
+
to_read: Value,
|
31
|
+
has_headers: bool,
|
32
|
+
delimiter: u8,
|
33
|
+
quote_char: u8,
|
34
|
+
null_string: Option<String>,
|
35
|
+
result_type: String,
|
36
|
+
flexible: bool,
|
37
|
+
flexible_default: Option<String>,
|
38
|
+
trim: Option<String>,
|
39
|
+
}
|
40
|
+
|
41
|
+
/// Parses a CSV file with the given configuration.
|
42
|
+
///
|
43
|
+
/// # Safety
|
44
|
+
/// This function uses unsafe code to get the Ruby runtime and leak memory for static references.
|
45
|
+
/// This is necessary for Ruby integration but should be used with caution.
|
9
46
|
pub fn parse_csv(
|
10
47
|
rb_self: Value,
|
11
48
|
args: &[Value],
|
12
49
|
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
|
13
|
-
|
14
|
-
let ruby
|
50
|
+
// SAFETY: We're in a Ruby callback, so Ruby runtime is guaranteed to be initialized
|
51
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
15
52
|
|
16
53
|
let ReadCsvArgs {
|
17
54
|
to_read,
|
@@ -19,16 +56,11 @@ pub fn parse_csv(
|
|
19
56
|
delimiter,
|
20
57
|
quote_char,
|
21
58
|
null_string,
|
22
|
-
buffer_size,
|
23
59
|
result_type,
|
24
60
|
flexible,
|
25
61
|
flexible_default,
|
26
62
|
trim,
|
27
|
-
} = parse_read_csv_args(ruby, args)?;
|
28
|
-
|
29
|
-
let flexible_default: &'static Option<String> = Box::leak(Box::new(flexible_default));
|
30
|
-
let leaked_flexible_default: &'static Option<&str> =
|
31
|
-
Box::leak(Box::new(flexible_default.as_deref()));
|
63
|
+
} = parse_read_csv_args(&ruby, args)?;
|
32
64
|
|
33
65
|
if !ruby.block_given() {
|
34
66
|
return create_enumerator(EnumeratorArgs {
|
@@ -38,10 +70,9 @@ pub fn parse_csv(
|
|
38
70
|
delimiter,
|
39
71
|
quote_char,
|
40
72
|
null_string,
|
41
|
-
|
42
|
-
result_type,
|
73
|
+
result_type: result_type,
|
43
74
|
flexible,
|
44
|
-
flexible_default:
|
75
|
+
flexible_default: flexible_default,
|
45
76
|
trim: match trim {
|
46
77
|
Trim::All => Some("all".to_string()),
|
47
78
|
Trim::Headers => Some("headers".to_string()),
|
@@ -51,60 +82,47 @@ pub fn parse_csv(
|
|
51
82
|
});
|
52
83
|
}
|
53
84
|
|
54
|
-
let
|
55
|
-
|
85
|
+
let result_type = ResultType::from_str(&result_type).ok_or_else(|| {
|
86
|
+
Error::new(
|
87
|
+
ruby.exception_runtime_error(),
|
88
|
+
"Invalid result type, expected 'hash' or 'array'",
|
89
|
+
)
|
90
|
+
})?;
|
91
|
+
|
92
|
+
let iter: Box<dyn Iterator<Item = CsvRecord<RandomState>>> = match result_type {
|
93
|
+
ResultType::Hash => {
|
56
94
|
let builder = RecordReaderBuilder::<
|
57
|
-
HashMap
|
95
|
+
HashMap<StringCacheKey, Option<CowStr<'static>>, RandomState>,
|
58
96
|
>::new(ruby, to_read)
|
59
97
|
.has_headers(has_headers)
|
60
98
|
.flexible(flexible)
|
61
|
-
.flexible_default(flexible_default
|
99
|
+
.flexible_default(flexible_default)
|
62
100
|
.trim(trim)
|
63
101
|
.delimiter(delimiter)
|
64
102
|
.quote_char(quote_char)
|
65
|
-
.null_string(null_string)
|
66
|
-
.buffer(buffer_size);
|
103
|
+
.null_string(null_string);
|
67
104
|
|
68
|
-
Box::new(builder.
|
105
|
+
Box::new(builder.build()?.map(CsvRecord::Map))
|
69
106
|
}
|
70
|
-
|
71
|
-
RecordReaderBuilder::<Vec<Option<
|
107
|
+
ResultType::Array => {
|
108
|
+
let builder = RecordReaderBuilder::<Vec<Option<CowStr<'static>>>>::new(ruby, to_read)
|
72
109
|
.has_headers(has_headers)
|
73
110
|
.flexible(flexible)
|
74
|
-
.flexible_default(flexible_default
|
111
|
+
.flexible_default(flexible_default)
|
75
112
|
.trim(trim)
|
76
113
|
.delimiter(delimiter)
|
77
114
|
.quote_char(quote_char)
|
78
115
|
.null_string(null_string)
|
79
|
-
.
|
80
|
-
|
81
|
-
|
82
|
-
),
|
83
|
-
_ => {
|
84
|
-
return Err(Error::new(
|
85
|
-
ruby.exception_runtime_error(),
|
86
|
-
"Invalid result type",
|
87
|
-
))
|
116
|
+
.build()?;
|
117
|
+
|
118
|
+
Box::new(builder.map(CsvRecord::Vec))
|
88
119
|
}
|
89
120
|
};
|
90
121
|
|
91
122
|
Ok(Yield::Iter(iter))
|
92
123
|
}
|
93
124
|
|
94
|
-
|
95
|
-
rb_self: Value,
|
96
|
-
to_read: Value,
|
97
|
-
has_headers: bool,
|
98
|
-
delimiter: u8,
|
99
|
-
quote_char: u8,
|
100
|
-
null_string: Option<String>,
|
101
|
-
buffer_size: usize,
|
102
|
-
result_type: String,
|
103
|
-
flexible: bool,
|
104
|
-
flexible_default: Option<&'static str>,
|
105
|
-
trim: Option<String>,
|
106
|
-
}
|
107
|
-
|
125
|
+
/// Creates an enumerator for lazy CSV parsing
|
108
126
|
fn create_enumerator(
|
109
127
|
args: EnumeratorArgs,
|
110
128
|
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
|
@@ -119,11 +137,11 @@ fn create_enumerator(
|
|
119
137
|
String::from_utf8(vec![args.quote_char]).unwrap(),
|
120
138
|
)?;
|
121
139
|
kwargs.aset(Symbol::new("nil_string"), args.null_string)?;
|
122
|
-
kwargs.aset(Symbol::new("buffer_size"), args.buffer_size)?;
|
123
140
|
kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
|
124
141
|
kwargs.aset(Symbol::new("flexible"), args.flexible)?;
|
125
142
|
kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
|
126
143
|
kwargs.aset(Symbol::new("trim"), args.trim.map(Symbol::new))?;
|
144
|
+
|
127
145
|
let enumerator = args
|
128
146
|
.rb_self
|
129
147
|
.enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
|
data/ext/osv/src/utils.rs
CHANGED
@@ -4,8 +4,6 @@ use magnus::{
|
|
4
4
|
Error, RString, Ruby, Symbol, Value,
|
5
5
|
};
|
6
6
|
|
7
|
-
use crate::csv::BUFFER_CHANNEL_SIZE;
|
8
|
-
|
9
7
|
fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String>, Error> {
|
10
8
|
if value.is_nil() {
|
11
9
|
Ok(None)
|
@@ -34,7 +32,6 @@ pub struct ReadCsvArgs {
|
|
34
32
|
pub delimiter: u8,
|
35
33
|
pub quote_char: u8,
|
36
34
|
pub null_string: Option<String>,
|
37
|
-
pub buffer_size: usize,
|
38
35
|
pub result_type: String,
|
39
36
|
pub flexible: bool,
|
40
37
|
pub flexible_default: Option<String>,
|
@@ -54,7 +51,6 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
54
51
|
Option<String>,
|
55
52
|
Option<String>,
|
56
53
|
Option<Option<String>>,
|
57
|
-
Option<usize>,
|
58
54
|
Option<Value>,
|
59
55
|
Option<bool>,
|
60
56
|
Option<Option<String>>,
|
@@ -69,7 +65,6 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
69
65
|
"col_sep",
|
70
66
|
"quote_char",
|
71
67
|
"nil_string",
|
72
|
-
"buffer_size",
|
73
68
|
"result_type",
|
74
69
|
"flexible",
|
75
70
|
"flexible_default",
|
@@ -107,11 +102,9 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
107
102
|
|
108
103
|
let null_string = kwargs.optional.3.unwrap_or_default();
|
109
104
|
|
110
|
-
let buffer_size = kwargs.optional.4.unwrap_or(BUFFER_CHANNEL_SIZE);
|
111
|
-
|
112
105
|
let result_type = match kwargs
|
113
106
|
.optional
|
114
|
-
.
|
107
|
+
.4
|
115
108
|
.map(|value| parse_string_or_symbol(ruby, value))
|
116
109
|
{
|
117
110
|
Some(Ok(Some(parsed))) => match parsed.as_str() {
|
@@ -133,13 +126,13 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
133
126
|
None => String::from("hash"),
|
134
127
|
};
|
135
128
|
|
136
|
-
let flexible = kwargs.optional.
|
129
|
+
let flexible = kwargs.optional.5.unwrap_or_default();
|
137
130
|
|
138
|
-
let flexible_default = kwargs.optional.
|
131
|
+
let flexible_default = kwargs.optional.6.unwrap_or_default();
|
139
132
|
|
140
133
|
let trim = match kwargs
|
141
134
|
.optional
|
142
|
-
.
|
135
|
+
.7
|
143
136
|
.map(|value| parse_string_or_symbol(ruby, value))
|
144
137
|
{
|
145
138
|
Some(Ok(Some(parsed))) => match parsed.as_str() {
|
@@ -172,7 +165,6 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
|
|
172
165
|
delimiter,
|
173
166
|
quote_char,
|
174
167
|
null_string,
|
175
|
-
buffer_size,
|
176
168
|
result_type,
|
177
169
|
flexible,
|
178
170
|
flexible_default,
|
data/lib/osv/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|