osv 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +23 -2
- data/Gemfile +9 -2
- data/README.md +96 -13
- data/ext/osv/Cargo.toml +1 -0
- data/ext/osv/src/csv/builder.rs +172 -37
- data/ext/osv/src/csv/header_cache.rs +33 -23
- data/ext/osv/src/csv/mod.rs +1 -0
- data/ext/osv/src/csv/parser.rs +36 -18
- data/ext/osv/src/csv/read_impl.rs +65 -0
- data/ext/osv/src/csv/reader.rs +32 -72
- data/ext/osv/src/csv/record.rs +7 -5
- data/ext/osv/src/reader.rs +1 -1
- data/ext/osv/src/utils.rs +2 -2
- data/lib/osv/version.rb +1 -1
- metadata +3 -3
- data/ext/osv/Cargo.lock +0 -402
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9a28169673f9693d3985c700f79490ae9057b43d86161a4354e9cb7196b92425
|
4
|
+
data.tar.gz: 3871878a45c4e564eeacbc4af911fc1e91c95f7ca0ca93fcaf8f06426ce0b3a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2038c7f8a0f56460e2bb6cb11b1bf6342841306b19b0847d7f57e3be6fc96687ba5743f1f5326251110bc6b41f049d24e5b1045543a71b9aa9b7364053f9a3a7
|
7
|
+
data.tar.gz: a99622513a55f4ec6e3c19ac73d29ab0f7e85191bacc68de324e6e12c8cfff06355b881d46380735fa1625420744d6dcd652ab0cdca08c92b7ecdd3f82f43170
|
data/Cargo.lock
CHANGED
@@ -273,6 +273,7 @@ dependencies = [
|
|
273
273
|
"rb-sys",
|
274
274
|
"serde",
|
275
275
|
"serde_magnus",
|
276
|
+
"thiserror",
|
276
277
|
]
|
277
278
|
|
278
279
|
[[package]]
|
@@ -421,9 +422,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
|
421
422
|
|
422
423
|
[[package]]
|
423
424
|
name = "syn"
|
424
|
-
version = "2.0.
|
425
|
+
version = "2.0.91"
|
425
426
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
426
|
-
checksum = "
|
427
|
+
checksum = "d53cbcb5a243bd33b7858b1d7f4aca2153490815872d86d955d6ea29f743c035"
|
427
428
|
dependencies = [
|
428
429
|
"proc-macro2",
|
429
430
|
"quote",
|
@@ -436,6 +437,26 @@ version = "1.0.1"
|
|
436
437
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
437
438
|
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
|
438
439
|
|
440
|
+
[[package]]
|
441
|
+
name = "thiserror"
|
442
|
+
version = "2.0.9"
|
443
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
444
|
+
checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc"
|
445
|
+
dependencies = [
|
446
|
+
"thiserror-impl",
|
447
|
+
]
|
448
|
+
|
449
|
+
[[package]]
|
450
|
+
name = "thiserror-impl"
|
451
|
+
version = "2.0.9"
|
452
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
453
|
+
checksum = "7b50fa271071aae2e6ee85f842e2e28ba8cd2c5fb67f11fcb1fd70b276f9e7d4"
|
454
|
+
dependencies = [
|
455
|
+
"proc-macro2",
|
456
|
+
"quote",
|
457
|
+
"syn",
|
458
|
+
]
|
459
|
+
|
439
460
|
[[package]]
|
440
461
|
name = "unicode-ident"
|
441
462
|
version = "1.0.14"
|
data/Gemfile
CHANGED
@@ -2,6 +2,13 @@ source "https://rubygems.org"
|
|
2
2
|
|
3
3
|
gem "rb_sys", "~> 0.9.56"
|
4
4
|
gem "rake"
|
5
|
-
gem "
|
5
|
+
gem "csv"
|
6
6
|
|
7
|
-
|
7
|
+
# Use local version of osv
|
8
|
+
gemspec
|
9
|
+
|
10
|
+
group :development, :test do
|
11
|
+
gem "minitest", "~> 5.0"
|
12
|
+
gem "benchmark-ips", "~> 2.12"
|
13
|
+
gem "fastcsv", "~> 0.0.7"
|
14
|
+
end
|
data/README.md
CHANGED
@@ -65,19 +65,12 @@ Both methods support the following options:
|
|
65
65
|
|
66
66
|
- `has_headers`: Boolean indicating if the first row contains headers (default: true)
|
67
67
|
- `col_sep`: String specifying the field separator (default: ",")
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
# Reading without headers
|
76
|
-
OSV.for_each("path/to/file.csv", has_headers: false) do |row|
|
77
|
-
# Headers will be automatically generated as "c0", "c1", etc.
|
78
|
-
puts row["c0"]
|
79
|
-
end
|
80
|
-
```
|
68
|
+
- `quote_char`: String specifying the quote character (default: "\"")
|
69
|
+
- `nil_string`: String that should be interpreted as nil
|
70
|
+
- by default, empty strings are interpreted as empty strings
|
71
|
+
- if you want to interpret empty strings as nil, set this to an empty string
|
72
|
+
- `buffer_size`: Integer specifying the read buffer size
|
73
|
+
- `result_type`: String specifying the output format ("hash" or "array")
|
81
74
|
|
82
75
|
### Input Sources
|
83
76
|
|
@@ -111,3 +104,93 @@ OSV.for_each(data) { |row| puts row["name"] }
|
|
111
104
|
## Performance
|
112
105
|
|
113
106
|
This library is faster than the standard Ruby CSV library, and is comparable to the fastest CSV parser gems I've used.
|
107
|
+
|
108
|
+
Here's some unscientific benchmarks. You can find the code in the [benchmark/comparison_benchmark.rb](benchmark/comparison_benchmark.rb) file.
|
109
|
+
|
110
|
+
### 10,000 lines
|
111
|
+
|
112
|
+
```
|
113
|
+
Benchmarking with 10001 lines of data
|
114
|
+
|
115
|
+
ruby 3.3.3 (2024-06-12 revision f1c7b6f435) [arm64-darwin23]
|
116
|
+
Warming up --------------------------------------
|
117
|
+
OSV - Hash output 6.000 i/100ms
|
118
|
+
CSV - Hash output 1.000 i/100ms
|
119
|
+
OSV - Array output 18.000 i/100ms
|
120
|
+
CSV - Array output 2.000 i/100ms
|
121
|
+
FastCSV - Array output
|
122
|
+
9.000 i/100ms
|
123
|
+
OSV - StringIO 7.000 i/100ms
|
124
|
+
CSV - StringIO 1.000 i/100ms
|
125
|
+
FastCSV - StringIO 20.000 i/100ms
|
126
|
+
OSV - Gzipped 6.000 i/100ms
|
127
|
+
CSV - Gzipped 1.000 i/100ms
|
128
|
+
Calculating -------------------------------------
|
129
|
+
OSV - Hash output 73.360 (± 4.1%) i/s (13.63 ms/i) - 366.000 in 5.000390s
|
130
|
+
CSV - Hash output 11.937 (±25.1%) i/s (83.78 ms/i) - 52.000 in 5.036297s
|
131
|
+
OSV - Array output 189.738 (± 8.4%) i/s (5.27 ms/i) - 954.000 in 5.071018s
|
132
|
+
CSV - Array output 25.471 (±11.8%) i/s (39.26 ms/i) - 120.000 in 5.015289s
|
133
|
+
FastCSV - Array output
|
134
|
+
97.867 (± 2.0%) i/s (10.22 ms/i) - 495.000 in 5.060957s
|
135
|
+
OSV - StringIO 80.784 (± 6.2%) i/s (12.38 ms/i) - 406.000 in 5.046696s
|
136
|
+
CSV - StringIO 15.872 (± 0.0%) i/s (63.01 ms/i) - 80.000 in 5.043361s
|
137
|
+
FastCSV - StringIO 200.511 (± 2.0%) i/s (4.99 ms/i) - 1.020k in 5.088592s
|
138
|
+
OSV - Gzipped 55.220 (±12.7%) i/s (18.11 ms/i) - 258.000 in 5.030928s
|
139
|
+
CSV - Gzipped 12.591 (±15.9%) i/s (79.42 ms/i) - 59.000 in 5.039709s
|
140
|
+
|
141
|
+
Comparison:
|
142
|
+
FastCSV - StringIO: 200.5 i/s
|
143
|
+
OSV - Array output: 189.7 i/s - same-ish: difference falls within error
|
144
|
+
FastCSV - Array output: 97.9 i/s - 2.05x slower
|
145
|
+
OSV - StringIO: 80.8 i/s - 2.48x slower
|
146
|
+
OSV - Hash output: 73.4 i/s - 2.73x slower
|
147
|
+
OSV - Gzipped: 55.2 i/s - 3.63x slower
|
148
|
+
CSV - Array output: 25.5 i/s - 7.87x slower
|
149
|
+
CSV - StringIO: 15.9 i/s - 12.63x slower
|
150
|
+
CSV - Gzipped: 12.6 i/s - 15.92x slower
|
151
|
+
CSV - Hash output: 11.9 i/s - 16.80x slower
|
152
|
+
```
|
153
|
+
|
154
|
+
### 1,000,000 lines
|
155
|
+
|
156
|
+
```
|
157
|
+
Benchmarking with 1000001 lines of data
|
158
|
+
|
159
|
+
ruby 3.3.3 (2024-06-12 revision f1c7b6f435) [arm64-darwin23]
|
160
|
+
Warming up --------------------------------------
|
161
|
+
OSV - Hash output 1.000 i/100ms
|
162
|
+
CSV - Hash output 1.000 i/100ms
|
163
|
+
OSV - Array output 1.000 i/100ms
|
164
|
+
CSV - Array output 1.000 i/100ms
|
165
|
+
FastCSV - Array output
|
166
|
+
1.000 i/100ms
|
167
|
+
OSV - StringIO 1.000 i/100ms
|
168
|
+
CSV - StringIO 1.000 i/100ms
|
169
|
+
FastCSV - StringIO 1.000 i/100ms
|
170
|
+
OSV - Gzipped 1.000 i/100ms
|
171
|
+
CSV - Gzipped 1.000 i/100ms
|
172
|
+
Calculating -------------------------------------
|
173
|
+
OSV - Hash output 0.578 (± 0.0%) i/s (1.73 s/i) - 3.000 in 5.287845s
|
174
|
+
CSV - Hash output 0.117 (± 0.0%) i/s (8.57 s/i) - 1.000 in 8.571770s
|
175
|
+
OSV - Array output 1.142 (± 0.0%) i/s (875.97 ms/i) - 5.000 in 5.234694s
|
176
|
+
CSV - Array output 0.235 (± 0.0%) i/s (4.25 s/i) - 2.000 in 8.561144s
|
177
|
+
FastCSV - Array output
|
178
|
+
0.768 (± 0.0%) i/s (1.30 s/i) - 4.000 in 6.924574s
|
179
|
+
OSV - StringIO 0.522 (± 0.0%) i/s (1.91 s/i) - 3.000 in 5.803969s
|
180
|
+
CSV - StringIO 0.132 (± 0.0%) i/s (7.59 s/i) - 1.000 in 7.593243s
|
181
|
+
FastCSV - StringIO 1.039 (± 0.0%) i/s (962.53 ms/i) - 6.000 in 5.806644s
|
182
|
+
OSV - Gzipped 0.437 (± 0.0%) i/s (2.29 s/i) - 3.000 in 6.885125s
|
183
|
+
CSV - Gzipped 0.115 (± 0.0%) i/s (8.68 s/i) - 1.000 in 8.684069s
|
184
|
+
|
185
|
+
Comparison:
|
186
|
+
OSV - Array output: 1.1 i/s
|
187
|
+
FastCSV - StringIO: 1.0 i/s - 1.10x slower
|
188
|
+
FastCSV - Array output: 0.8 i/s - 1.49x slower
|
189
|
+
OSV - Hash output: 0.6 i/s - 1.98x slower
|
190
|
+
OSV - StringIO: 0.5 i/s - 2.19x slower
|
191
|
+
OSV - Gzipped: 0.4 i/s - 2.61x slower
|
192
|
+
CSV - Array output: 0.2 i/s - 4.86x slower
|
193
|
+
CSV - StringIO: 0.1 i/s - 8.67x slower
|
194
|
+
CSV - Hash output: 0.1 i/s - 9.79x slower
|
195
|
+
CSV - Gzipped: 0.1 i/s - 9.91x slower
|
196
|
+
```
|
data/ext/osv/Cargo.toml
CHANGED
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -1,11 +1,50 @@
|
|
1
1
|
use super::{
|
2
|
-
header_cache::StringCache,
|
2
|
+
header_cache::{CacheError, StringCache},
|
3
3
|
parser::RecordParser,
|
4
|
-
|
4
|
+
read_impl::ReadImpl,
|
5
|
+
reader::RecordReader,
|
5
6
|
};
|
6
7
|
use flate2::read::GzDecoder;
|
7
|
-
use magnus::{rb_sys::AsRawValue, value::ReprValue, Error, RString, Ruby, Value};
|
8
|
-
use std::{
|
8
|
+
use magnus::{rb_sys::AsRawValue, value::ReprValue, Error as MagnusError, RString, Ruby, Value};
|
9
|
+
use std::{
|
10
|
+
fs::File,
|
11
|
+
io::{self, Read},
|
12
|
+
marker::PhantomData,
|
13
|
+
os::fd::FromRawFd,
|
14
|
+
thread,
|
15
|
+
};
|
16
|
+
use thiserror::Error;
|
17
|
+
|
18
|
+
#[derive(Error, Debug)]
|
19
|
+
pub enum ReaderError {
|
20
|
+
#[error("Failed to get file descriptor: {0}")]
|
21
|
+
FileDescriptor(String),
|
22
|
+
#[error("Invalid file descriptor")]
|
23
|
+
InvalidFileDescriptor,
|
24
|
+
#[error("Failed to open file: {0}")]
|
25
|
+
FileOpen(#[from] io::Error),
|
26
|
+
#[error("Failed to intern headers: {0}")]
|
27
|
+
HeaderIntern(#[from] CacheError),
|
28
|
+
#[error("Unsupported GzipReader")]
|
29
|
+
UnsupportedGzipReader,
|
30
|
+
#[error("Ruby error: {0}")]
|
31
|
+
Ruby(String),
|
32
|
+
}
|
33
|
+
|
34
|
+
impl From<MagnusError> for ReaderError {
|
35
|
+
fn from(err: MagnusError) -> Self {
|
36
|
+
Self::Ruby(err.to_string())
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
impl From<ReaderError> for MagnusError {
|
41
|
+
fn from(err: ReaderError) -> Self {
|
42
|
+
MagnusError::new(
|
43
|
+
Ruby::get().unwrap().exception_runtime_error(),
|
44
|
+
err.to_string(),
|
45
|
+
)
|
46
|
+
}
|
47
|
+
}
|
9
48
|
|
10
49
|
pub struct RecordReaderBuilder<'a, T: RecordParser + Send + 'static> {
|
11
50
|
ruby: &'a Ruby,
|
@@ -13,7 +52,7 @@ pub struct RecordReaderBuilder<'a, T: RecordParser + Send + 'static> {
|
|
13
52
|
has_headers: bool,
|
14
53
|
delimiter: u8,
|
15
54
|
quote_char: u8,
|
16
|
-
null_string: String
|
55
|
+
null_string: Option<String>,
|
17
56
|
buffer: usize,
|
18
57
|
_phantom: PhantomData<T>,
|
19
58
|
}
|
@@ -26,7 +65,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
26
65
|
has_headers: true,
|
27
66
|
delimiter: b',',
|
28
67
|
quote_char: b'"',
|
29
|
-
null_string:
|
68
|
+
null_string: None,
|
30
69
|
buffer: 1000,
|
31
70
|
_phantom: PhantomData,
|
32
71
|
}
|
@@ -47,7 +86,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
47
86
|
self
|
48
87
|
}
|
49
88
|
|
50
|
-
pub fn null_string(mut self, null_string: String) -> Self {
|
89
|
+
pub fn null_string(mut self, null_string: Option<String>) -> Self {
|
51
90
|
self.null_string = null_string;
|
52
91
|
self
|
53
92
|
}
|
@@ -57,36 +96,83 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
57
96
|
self
|
58
97
|
}
|
59
98
|
|
60
|
-
fn
|
99
|
+
fn handle_string_io(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
|
100
|
+
let string: RString = self.to_read.funcall("string", ())?;
|
101
|
+
let content = string.to_string()?;
|
102
|
+
Ok(Box::new(std::io::Cursor::new(content)))
|
103
|
+
}
|
104
|
+
|
105
|
+
fn handle_file_descriptor(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
|
106
|
+
let raw_value = self.to_read.as_raw();
|
107
|
+
let fd = std::panic::catch_unwind(|| unsafe { rb_sys::rb_io_descriptor(raw_value) })
|
108
|
+
.map_err(|_| {
|
109
|
+
ReaderError::FileDescriptor("Failed to get file descriptor".to_string())
|
110
|
+
})?;
|
111
|
+
|
112
|
+
if fd < 0 {
|
113
|
+
return Err(ReaderError::InvalidFileDescriptor);
|
114
|
+
}
|
115
|
+
|
116
|
+
let file = unsafe { File::from_raw_fd(fd) };
|
117
|
+
Ok(Box::new(file))
|
118
|
+
}
|
119
|
+
|
120
|
+
fn handle_file_path(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
|
121
|
+
let path = self.to_read.to_r_string()?.to_string()?;
|
122
|
+
let file = File::open(&path)?;
|
123
|
+
|
124
|
+
Ok(if path.ends_with(".gz") {
|
125
|
+
Box::new(GzDecoder::new(file))
|
126
|
+
} else {
|
127
|
+
Box::new(file)
|
128
|
+
})
|
129
|
+
}
|
130
|
+
|
131
|
+
fn get_reader(&self) -> Result<Box<dyn Read + Send + 'static>, ReaderError> {
|
61
132
|
let string_io: magnus::RClass = self.ruby.eval("StringIO")?;
|
133
|
+
let gzip_reader_class: magnus::RClass = self.ruby.eval("Zlib::GzipReader")?;
|
62
134
|
|
63
135
|
if self.to_read.is_kind_of(string_io) {
|
64
|
-
|
65
|
-
|
66
|
-
|
136
|
+
self.handle_string_io()
|
137
|
+
} else if self.to_read.is_kind_of(gzip_reader_class) {
|
138
|
+
Err(ReaderError::UnsupportedGzipReader)
|
67
139
|
} else if self.to_read.is_kind_of(self.ruby.class_io()) {
|
68
|
-
|
69
|
-
let file = unsafe { File::from_raw_fd(fd) };
|
70
|
-
Ok(Box::new(file))
|
140
|
+
self.handle_file_descriptor()
|
71
141
|
} else {
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
142
|
+
self.handle_file_path()
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
fn get_single_threaded_reader(&self) -> Result<Box<dyn Read>, ReaderError> {
|
147
|
+
let string_io: magnus::RClass = self.ruby.eval("StringIO")?;
|
148
|
+
let gzip_reader_class: magnus::RClass = self.ruby.eval("Zlib::GzipReader")?;
|
149
|
+
|
150
|
+
if self.to_read.is_kind_of(string_io) {
|
151
|
+
self.handle_string_io().map(|r| -> Box<dyn Read> { r })
|
152
|
+
} else if self.to_read.is_kind_of(gzip_reader_class) {
|
153
|
+
Ok(Box::new(RubyReader::new(self.to_read)))
|
154
|
+
} else if self.to_read.is_kind_of(self.ruby.class_io()) {
|
155
|
+
self.handle_file_descriptor()
|
156
|
+
.map(|r| -> Box<dyn Read> { r })
|
157
|
+
} else {
|
158
|
+
self.handle_file_path().map(|r| -> Box<dyn Read> { r })
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
162
|
+
pub fn build(self) -> Result<RecordReader<T>, ReaderError> {
|
163
|
+
match self.get_reader() {
|
164
|
+
Ok(readable) => self.build_multi_threaded(readable),
|
165
|
+
Err(_) => {
|
166
|
+
let readable = self.get_single_threaded_reader()?;
|
167
|
+
self.build_single_threaded(readable)
|
84
168
|
}
|
85
169
|
}
|
86
170
|
}
|
87
171
|
|
88
|
-
|
89
|
-
|
172
|
+
fn build_multi_threaded(
|
173
|
+
self,
|
174
|
+
readable: Box<dyn Read + Send + 'static>,
|
175
|
+
) -> Result<RecordReader<T>, ReaderError> {
|
90
176
|
let mut reader = csv::ReaderBuilder::new()
|
91
177
|
.has_headers(self.has_headers)
|
92
178
|
.delimiter(self.delimiter)
|
@@ -94,21 +180,16 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
94
180
|
.from_reader(readable);
|
95
181
|
|
96
182
|
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
97
|
-
let
|
98
|
-
|
99
|
-
let static_headers = StringCache::intern_many(&headers).map_err(|e| {
|
100
|
-
Error::new(
|
101
|
-
self.ruby.exception_runtime_error(),
|
102
|
-
format!("Failed to intern headers: {e}"),
|
103
|
-
)
|
104
|
-
})?;
|
183
|
+
let static_headers = StringCache::intern_many(&headers)?;
|
105
184
|
let headers_for_cleanup = static_headers.clone();
|
106
185
|
|
107
186
|
let (sender, receiver) = kanal::bounded(self.buffer);
|
187
|
+
let null_string = self.null_string.clone();
|
188
|
+
|
108
189
|
let handle = thread::spawn(move || {
|
109
190
|
let mut record = csv::StringRecord::new();
|
110
191
|
while let Ok(true) = reader.read_record(&mut record) {
|
111
|
-
let row = T::parse(&static_headers, &record,
|
192
|
+
let row = T::parse(&static_headers, &record, null_string.as_deref());
|
112
193
|
if sender.send(row).is_err() {
|
113
194
|
break;
|
114
195
|
}
|
@@ -125,4 +206,58 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
125
206
|
},
|
126
207
|
})
|
127
208
|
}
|
209
|
+
|
210
|
+
fn build_single_threaded(
|
211
|
+
self,
|
212
|
+
readable: Box<dyn Read>,
|
213
|
+
) -> Result<RecordReader<T>, ReaderError> {
|
214
|
+
let mut reader = csv::ReaderBuilder::new()
|
215
|
+
.has_headers(self.has_headers)
|
216
|
+
.delimiter(self.delimiter)
|
217
|
+
.quote(self.quote_char)
|
218
|
+
.from_reader(readable);
|
219
|
+
|
220
|
+
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
221
|
+
let static_headers = StringCache::intern_many(&headers)?;
|
222
|
+
|
223
|
+
Ok(RecordReader {
|
224
|
+
reader: ReadImpl::SingleThreaded {
|
225
|
+
reader,
|
226
|
+
headers: static_headers,
|
227
|
+
null_string: self.null_string,
|
228
|
+
},
|
229
|
+
})
|
230
|
+
}
|
231
|
+
}
|
232
|
+
|
233
|
+
struct RubyReader {
|
234
|
+
inner: Value,
|
235
|
+
}
|
236
|
+
|
237
|
+
impl RubyReader {
|
238
|
+
fn new(inner: Value) -> Self {
|
239
|
+
Self { inner }
|
240
|
+
}
|
241
|
+
}
|
242
|
+
|
243
|
+
impl Read for RubyReader {
|
244
|
+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
245
|
+
let result = self.inner.funcall::<_, _, Value>("read", (buf.len(),));
|
246
|
+
match result {
|
247
|
+
Ok(data) => {
|
248
|
+
if data.is_nil() {
|
249
|
+
return Ok(0);
|
250
|
+
}
|
251
|
+
|
252
|
+
let string = RString::from_value(data).ok_or_else(|| {
|
253
|
+
io::Error::new(io::ErrorKind::Other, "Failed to convert to RString")
|
254
|
+
})?;
|
255
|
+
let bytes = unsafe { string.as_slice() };
|
256
|
+
let len = bytes.len().min(buf.len());
|
257
|
+
buf[..len].copy_from_slice(&bytes[..len]);
|
258
|
+
Ok(len)
|
259
|
+
}
|
260
|
+
Err(e) => Err(io::Error::new(io::ErrorKind::Other, e.to_string())),
|
261
|
+
}
|
262
|
+
}
|
128
263
|
}
|
@@ -4,22 +4,29 @@
|
|
4
4
|
///
|
5
5
|
/// Note: Performance testing on macOS showed minimal speed improvements,
|
6
6
|
/// so this optimization could be removed if any issues arise.
|
7
|
-
|
8
|
-
|
9
7
|
use std::{
|
10
8
|
collections::HashMap,
|
11
9
|
sync::{atomic::AtomicU32, LazyLock, Mutex},
|
12
10
|
};
|
11
|
+
use thiserror::Error;
|
12
|
+
|
13
|
+
#[derive(Debug, Error)]
|
14
|
+
pub enum CacheError {
|
15
|
+
#[error("Failed to acquire lock: {0}")]
|
16
|
+
LockError(String),
|
17
|
+
}
|
13
18
|
|
14
19
|
static STRING_CACHE: LazyLock<Mutex<HashMap<&'static str, AtomicU32>>> =
|
15
20
|
LazyLock::new(|| Mutex::new(HashMap::with_capacity(100)));
|
16
21
|
|
17
|
-
pub struct StringCache
|
22
|
+
pub struct StringCache;
|
18
23
|
|
19
24
|
impl StringCache {
|
20
25
|
#[allow(dead_code)]
|
21
|
-
pub fn intern(string: String) -> Result<&'static str,
|
22
|
-
let mut cache = STRING_CACHE
|
26
|
+
pub fn intern(string: String) -> Result<&'static str, CacheError> {
|
27
|
+
let mut cache = STRING_CACHE
|
28
|
+
.lock()
|
29
|
+
.map_err(|e| CacheError::LockError(e.to_string()))?;
|
23
30
|
|
24
31
|
if let Some((&existing, count)) = cache.get_key_value(string.as_str()) {
|
25
32
|
count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
@@ -31,33 +38,36 @@ impl StringCache {
|
|
31
38
|
}
|
32
39
|
}
|
33
40
|
|
34
|
-
pub fn intern_many(strings: &[String]) -> Result<Vec<&'static str>,
|
35
|
-
let mut cache = STRING_CACHE
|
36
|
-
|
41
|
+
pub fn intern_many(strings: &[String]) -> Result<Vec<&'static str>, CacheError> {
|
42
|
+
let mut cache = STRING_CACHE
|
43
|
+
.lock()
|
44
|
+
.map_err(|e| CacheError::LockError(e.to_string()))?;
|
37
45
|
|
46
|
+
let mut result = Vec::with_capacity(strings.len());
|
38
47
|
for string in strings {
|
39
|
-
let
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
};
|
48
|
-
result.push(static_str);
|
48
|
+
if let Some((&existing, count)) = cache.get_key_value(string.as_str()) {
|
49
|
+
count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
50
|
+
result.push(existing);
|
51
|
+
} else {
|
52
|
+
let leaked = Box::leak(string.clone().into_boxed_str());
|
53
|
+
cache.insert(leaked, AtomicU32::new(1));
|
54
|
+
result.push(leaked);
|
55
|
+
}
|
49
56
|
}
|
50
|
-
|
51
57
|
Ok(result)
|
52
58
|
}
|
53
59
|
|
54
|
-
pub fn clear(headers: &[&'static str]) -> Result<(),
|
55
|
-
let cache = STRING_CACHE
|
60
|
+
pub fn clear(headers: &[&'static str]) -> Result<(), CacheError> {
|
61
|
+
let mut cache = STRING_CACHE
|
62
|
+
.lock()
|
63
|
+
.map_err(|e| CacheError::LockError(e.to_string()))?;
|
56
64
|
|
57
65
|
for header in headers {
|
58
66
|
if let Some(count) = cache.get(header) {
|
59
|
-
|
60
|
-
|
67
|
+
// Returns the previous value of the counter
|
68
|
+
let was = count.fetch_sub(1, std::sync::atomic::Ordering::Relaxed);
|
69
|
+
if was == 1 {
|
70
|
+
cache.remove(header);
|
61
71
|
let ptr = *header as *const str as *mut str;
|
62
72
|
unsafe {
|
63
73
|
let _ = Box::from_raw(ptr);
|
data/ext/osv/src/csv/mod.rs
CHANGED
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -2,50 +2,68 @@ use std::collections::HashMap;
|
|
2
2
|
|
3
3
|
pub trait RecordParser {
|
4
4
|
type Output;
|
5
|
+
|
5
6
|
fn parse(
|
6
7
|
headers: &[&'static str],
|
7
8
|
record: &csv::StringRecord,
|
8
|
-
null_string:
|
9
|
+
null_string: Option<&str>,
|
9
10
|
) -> Self::Output;
|
10
11
|
}
|
11
12
|
|
12
13
|
impl RecordParser for HashMap<&'static str, Option<String>> {
|
13
14
|
type Output = Self;
|
15
|
+
|
16
|
+
#[inline]
|
14
17
|
fn parse(
|
15
18
|
headers: &[&'static str],
|
16
19
|
record: &csv::StringRecord,
|
17
|
-
null_string:
|
20
|
+
null_string: Option<&str>,
|
18
21
|
) -> Self::Output {
|
19
22
|
let mut map = HashMap::with_capacity(headers.len());
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
Some(field
|
27
|
-
|
28
|
-
|
29
|
-
|
23
|
+
headers
|
24
|
+
.iter()
|
25
|
+
.zip(record.iter())
|
26
|
+
.for_each(|(header, field)| {
|
27
|
+
map.insert(
|
28
|
+
*header,
|
29
|
+
if null_string == Some(field) {
|
30
|
+
None
|
31
|
+
} else {
|
32
|
+
// Avoid allocating for empty strings
|
33
|
+
if field.is_empty() {
|
34
|
+
Some(String::new())
|
35
|
+
} else {
|
36
|
+
Some(field.to_string())
|
37
|
+
}
|
38
|
+
},
|
39
|
+
);
|
40
|
+
});
|
30
41
|
map
|
31
42
|
}
|
32
43
|
}
|
33
44
|
|
34
45
|
impl RecordParser for Vec<Option<String>> {
|
35
46
|
type Output = Self;
|
47
|
+
|
48
|
+
#[inline]
|
36
49
|
fn parse(
|
37
50
|
_headers: &[&'static str],
|
38
51
|
record: &csv::StringRecord,
|
39
|
-
null_string:
|
52
|
+
null_string: Option<&str>,
|
40
53
|
) -> Self::Output {
|
41
54
|
let mut vec = Vec::with_capacity(record.len());
|
42
|
-
|
43
|
-
|
55
|
+
vec.extend(record.iter().map(|field| {
|
56
|
+
if null_string == Some(field) {
|
44
57
|
None
|
45
58
|
} else {
|
46
|
-
|
47
|
-
|
48
|
-
|
59
|
+
// Avoid allocating for empty strings
|
60
|
+
if field.is_empty() {
|
61
|
+
Some(String::new())
|
62
|
+
} else {
|
63
|
+
Some(field.to_string())
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}));
|
49
67
|
vec
|
50
68
|
}
|
51
69
|
}
|
@@ -0,0 +1,65 @@
|
|
1
|
+
use super::{header_cache::StringCache, parser::RecordParser};
|
2
|
+
use std::{io::Read, thread};
|
3
|
+
|
4
|
+
pub enum ReadImpl<T: RecordParser> {
|
5
|
+
SingleThreaded {
|
6
|
+
reader: csv::Reader<Box<dyn Read>>,
|
7
|
+
headers: Vec<&'static str>,
|
8
|
+
null_string: Option<String>,
|
9
|
+
},
|
10
|
+
MultiThreaded {
|
11
|
+
headers: Vec<&'static str>,
|
12
|
+
receiver: kanal::Receiver<T::Output>,
|
13
|
+
handle: Option<thread::JoinHandle<()>>,
|
14
|
+
},
|
15
|
+
}
|
16
|
+
|
17
|
+
impl<T: RecordParser> ReadImpl<T> {
|
18
|
+
#[inline]
|
19
|
+
pub fn next(&mut self) -> Option<T::Output> {
|
20
|
+
match self {
|
21
|
+
Self::MultiThreaded {
|
22
|
+
receiver, handle, ..
|
23
|
+
} => match receiver.recv() {
|
24
|
+
Ok(record) => Some(record),
|
25
|
+
Err(_) => {
|
26
|
+
if let Some(handle) = handle.take() {
|
27
|
+
let _ = handle.join();
|
28
|
+
}
|
29
|
+
None
|
30
|
+
}
|
31
|
+
},
|
32
|
+
Self::SingleThreaded {
|
33
|
+
reader,
|
34
|
+
headers,
|
35
|
+
null_string,
|
36
|
+
} => {
|
37
|
+
let mut record = csv::StringRecord::new();
|
38
|
+
match reader.read_record(&mut record) {
|
39
|
+
Ok(true) => Some(T::parse(headers, &record, null_string.as_deref())),
|
40
|
+
_ => None,
|
41
|
+
}
|
42
|
+
}
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
#[inline]
|
47
|
+
pub fn cleanup(&mut self) {
|
48
|
+
match self {
|
49
|
+
Self::MultiThreaded {
|
50
|
+
receiver,
|
51
|
+
handle,
|
52
|
+
headers,
|
53
|
+
} => {
|
54
|
+
receiver.close();
|
55
|
+
if let Some(handle) = handle.take() {
|
56
|
+
let _ = handle.join();
|
57
|
+
}
|
58
|
+
let _ = StringCache::clear(headers);
|
59
|
+
}
|
60
|
+
Self::SingleThreaded { headers, .. } => {
|
61
|
+
let _ = StringCache::clear(headers);
|
62
|
+
}
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
data/ext/osv/src/csv/reader.rs
CHANGED
@@ -1,66 +1,35 @@
|
|
1
|
-
use super::{
|
1
|
+
use super::{parser::RecordParser, read_impl::ReadImpl};
|
2
2
|
use magnus::{Error, Ruby};
|
3
|
-
use std::{io::Read
|
3
|
+
use std::{borrow::Cow, io::Read};
|
4
4
|
|
5
5
|
pub struct RecordReader<T: RecordParser> {
|
6
6
|
pub(crate) reader: ReadImpl<T>,
|
7
7
|
}
|
8
8
|
|
9
|
-
impl<T: RecordParser> Drop for RecordReader<T> {
|
10
|
-
fn drop(&mut self) {
|
11
|
-
match &mut self.reader {
|
12
|
-
ReadImpl::MultiThreaded {
|
13
|
-
receiver,
|
14
|
-
handle,
|
15
|
-
headers,
|
16
|
-
} => {
|
17
|
-
receiver.close();
|
18
|
-
if let Some(handle) = handle.take() {
|
19
|
-
let _ = handle.join();
|
20
|
-
}
|
21
|
-
StringCache::clear(headers).unwrap();
|
22
|
-
}
|
23
|
-
ReadImpl::SingleThreaded { headers, .. } => {
|
24
|
-
StringCache::clear(headers).unwrap();
|
25
|
-
}
|
26
|
-
}
|
27
|
-
}
|
28
|
-
}
|
29
|
-
|
30
|
-
#[allow(dead_code)]
|
31
|
-
pub enum ReadImpl<T: RecordParser> {
|
32
|
-
SingleThreaded {
|
33
|
-
reader: csv::Reader<Box<dyn Read + Send + 'static>>,
|
34
|
-
headers: Vec<&'static str>,
|
35
|
-
null_string: String,
|
36
|
-
},
|
37
|
-
MultiThreaded {
|
38
|
-
headers: Vec<&'static str>,
|
39
|
-
receiver: kanal::Receiver<T::Output>,
|
40
|
-
handle: Option<thread::JoinHandle<()>>,
|
41
|
-
},
|
42
|
-
}
|
43
|
-
|
44
9
|
impl<T: RecordParser> RecordReader<T> {
|
10
|
+
#[inline]
|
45
11
|
pub(crate) fn get_headers(
|
46
12
|
ruby: &Ruby,
|
47
13
|
reader: &mut csv::Reader<impl Read>,
|
48
14
|
has_headers: bool,
|
49
15
|
) -> Result<Vec<String>, Error> {
|
50
|
-
let first_row = reader
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
)
|
57
|
-
})?
|
58
|
-
.clone();
|
16
|
+
let first_row = reader.headers().map_err(|e| {
|
17
|
+
Error::new(
|
18
|
+
ruby.exception_runtime_error(),
|
19
|
+
Cow::Owned(format!("Failed to read headers: {e}")),
|
20
|
+
)
|
21
|
+
})?;
|
59
22
|
|
60
23
|
Ok(if has_headers {
|
61
|
-
|
24
|
+
// Pre-allocate the vector with exact capacity
|
25
|
+
let mut headers = Vec::with_capacity(first_row.len());
|
26
|
+
headers.extend(first_row.iter().map(String::from));
|
27
|
+
headers
|
62
28
|
} else {
|
63
|
-
|
29
|
+
// Pre-allocate the vector with exact capacity
|
30
|
+
let mut headers = Vec::with_capacity(first_row.len());
|
31
|
+
headers.extend((0..first_row.len()).map(|i| format!("c{i}")));
|
32
|
+
headers
|
64
33
|
})
|
65
34
|
}
|
66
35
|
}
|
@@ -68,30 +37,21 @@ impl<T: RecordParser> RecordReader<T> {
|
|
68
37
|
impl<T: RecordParser> Iterator for RecordReader<T> {
|
69
38
|
type Item = T::Output;
|
70
39
|
|
40
|
+
#[inline]
|
71
41
|
fn next(&mut self) -> Option<Self::Item> {
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
headers,
|
87
|
-
null_string,
|
88
|
-
} => {
|
89
|
-
let mut record = csv::StringRecord::new();
|
90
|
-
match reader.read_record(&mut record) {
|
91
|
-
Ok(true) => Some(T::parse(headers, &record, null_string)),
|
92
|
-
_ => None,
|
93
|
-
}
|
94
|
-
}
|
95
|
-
}
|
42
|
+
self.reader.next()
|
43
|
+
}
|
44
|
+
|
45
|
+
#[inline]
|
46
|
+
fn size_hint(&self) -> (usize, Option<usize>) {
|
47
|
+
// We can't know the exact size without reading the whole file
|
48
|
+
(0, None)
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
impl<T: RecordParser> Drop for RecordReader<T> {
|
53
|
+
#[inline]
|
54
|
+
fn drop(&mut self) {
|
55
|
+
self.reader.cleanup();
|
96
56
|
}
|
97
57
|
}
|
data/ext/osv/src/csv/record.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{IntoValue,
|
1
|
+
use magnus::{IntoValue, Ruby, Value};
|
2
2
|
use std::collections::HashMap;
|
3
3
|
|
4
4
|
#[derive(Debug)]
|
@@ -8,14 +8,16 @@ pub enum CsvRecord {
|
|
8
8
|
}
|
9
9
|
|
10
10
|
impl IntoValue for CsvRecord {
|
11
|
+
#[inline]
|
11
12
|
fn into_value_with(self, handle: &Ruby) -> Value {
|
12
13
|
match self {
|
13
14
|
CsvRecord::Vec(vec) => vec.into_value_with(handle),
|
14
15
|
CsvRecord::Map(map) => {
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
// Pre-allocate the hash with the known size
|
17
|
+
let hash = handle.hash_new_capa(map.len());
|
18
|
+
map.into_iter()
|
19
|
+
.try_for_each(|(k, v)| hash.aset(k, v))
|
20
|
+
.unwrap();
|
19
21
|
hash.into_value_with(handle)
|
20
22
|
}
|
21
23
|
}
|
data/ext/osv/src/reader.rs
CHANGED
data/ext/osv/src/utils.rs
CHANGED
@@ -10,7 +10,7 @@ pub struct CsvArgs {
|
|
10
10
|
pub has_headers: bool,
|
11
11
|
pub delimiter: u8,
|
12
12
|
pub quote_char: u8,
|
13
|
-
pub null_string: String
|
13
|
+
pub null_string: Option<String>,
|
14
14
|
pub buffer_size: usize,
|
15
15
|
pub result_type: String,
|
16
16
|
}
|
@@ -73,7 +73,7 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
|
|
73
73
|
)
|
74
74
|
})?;
|
75
75
|
|
76
|
-
let null_string = kwargs.optional.3
|
76
|
+
let null_string = kwargs.optional.3;
|
77
77
|
|
78
78
|
let buffer_size = kwargs.optional.4.unwrap_or(1000);
|
79
79
|
|
data/lib/osv/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-12-
|
11
|
+
date: 2024-12-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -52,13 +52,13 @@ files:
|
|
52
52
|
- LICENSE
|
53
53
|
- README.md
|
54
54
|
- Rakefile
|
55
|
-
- ext/osv/Cargo.lock
|
56
55
|
- ext/osv/Cargo.toml
|
57
56
|
- ext/osv/extconf.rb
|
58
57
|
- ext/osv/src/csv/builder.rs
|
59
58
|
- ext/osv/src/csv/header_cache.rs
|
60
59
|
- ext/osv/src/csv/mod.rs
|
61
60
|
- ext/osv/src/csv/parser.rs
|
61
|
+
- ext/osv/src/csv/read_impl.rs
|
62
62
|
- ext/osv/src/csv/reader.rs
|
63
63
|
- ext/osv/src/csv/record.rs
|
64
64
|
- ext/osv/src/lib.rs
|
data/ext/osv/Cargo.lock
DELETED
@@ -1,402 +0,0 @@
|
|
1
|
-
# This file is automatically @generated by Cargo.
|
2
|
-
# It is not intended for manual editing.
|
3
|
-
version = 3
|
4
|
-
|
5
|
-
[[package]]
|
6
|
-
name = "aho-corasick"
|
7
|
-
version = "1.1.3"
|
8
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
9
|
-
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
10
|
-
dependencies = [
|
11
|
-
"memchr",
|
12
|
-
]
|
13
|
-
|
14
|
-
[[package]]
|
15
|
-
name = "bindgen"
|
16
|
-
version = "0.69.5"
|
17
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
18
|
-
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
|
19
|
-
dependencies = [
|
20
|
-
"bitflags",
|
21
|
-
"cexpr",
|
22
|
-
"clang-sys",
|
23
|
-
"itertools",
|
24
|
-
"lazy_static",
|
25
|
-
"lazycell",
|
26
|
-
"proc-macro2",
|
27
|
-
"quote",
|
28
|
-
"regex",
|
29
|
-
"rustc-hash",
|
30
|
-
"shlex",
|
31
|
-
"syn",
|
32
|
-
]
|
33
|
-
|
34
|
-
[[package]]
|
35
|
-
name = "bitflags"
|
36
|
-
version = "2.6.0"
|
37
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
38
|
-
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
|
39
|
-
|
40
|
-
[[package]]
|
41
|
-
name = "cexpr"
|
42
|
-
version = "0.6.0"
|
43
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
44
|
-
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
45
|
-
dependencies = [
|
46
|
-
"nom",
|
47
|
-
]
|
48
|
-
|
49
|
-
[[package]]
|
50
|
-
name = "cfg-if"
|
51
|
-
version = "1.0.0"
|
52
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
53
|
-
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
54
|
-
|
55
|
-
[[package]]
|
56
|
-
name = "clang-sys"
|
57
|
-
version = "1.8.1"
|
58
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
59
|
-
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
60
|
-
dependencies = [
|
61
|
-
"glob",
|
62
|
-
"libc",
|
63
|
-
"libloading",
|
64
|
-
]
|
65
|
-
|
66
|
-
[[package]]
|
67
|
-
name = "csv"
|
68
|
-
version = "1.3.1"
|
69
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
70
|
-
checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf"
|
71
|
-
dependencies = [
|
72
|
-
"csv-core",
|
73
|
-
"itoa",
|
74
|
-
"ryu",
|
75
|
-
"serde",
|
76
|
-
]
|
77
|
-
|
78
|
-
[[package]]
|
79
|
-
name = "csv-core"
|
80
|
-
version = "0.1.11"
|
81
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
82
|
-
checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
|
83
|
-
dependencies = [
|
84
|
-
"memchr",
|
85
|
-
]
|
86
|
-
|
87
|
-
[[package]]
|
88
|
-
name = "either"
|
89
|
-
version = "1.13.0"
|
90
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
91
|
-
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
92
|
-
|
93
|
-
[[package]]
|
94
|
-
name = "glob"
|
95
|
-
version = "0.3.1"
|
96
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
97
|
-
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
98
|
-
|
99
|
-
[[package]]
|
100
|
-
name = "itertools"
|
101
|
-
version = "0.12.1"
|
102
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
103
|
-
checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
|
104
|
-
dependencies = [
|
105
|
-
"either",
|
106
|
-
]
|
107
|
-
|
108
|
-
[[package]]
|
109
|
-
name = "itoa"
|
110
|
-
version = "1.0.14"
|
111
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
112
|
-
checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
|
113
|
-
|
114
|
-
[[package]]
|
115
|
-
name = "lazy_static"
|
116
|
-
version = "1.5.0"
|
117
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
118
|
-
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
119
|
-
|
120
|
-
[[package]]
|
121
|
-
name = "lazycell"
|
122
|
-
version = "1.3.0"
|
123
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
124
|
-
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
125
|
-
|
126
|
-
[[package]]
|
127
|
-
name = "libc"
|
128
|
-
version = "0.2.169"
|
129
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
130
|
-
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
131
|
-
|
132
|
-
[[package]]
|
133
|
-
name = "libloading"
|
134
|
-
version = "0.8.6"
|
135
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
136
|
-
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
|
137
|
-
dependencies = [
|
138
|
-
"cfg-if",
|
139
|
-
"windows-targets",
|
140
|
-
]
|
141
|
-
|
142
|
-
[[package]]
|
143
|
-
name = "magnus"
|
144
|
-
version = "0.7.1"
|
145
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
146
|
-
checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
|
147
|
-
dependencies = [
|
148
|
-
"magnus-macros",
|
149
|
-
"rb-sys",
|
150
|
-
"rb-sys-env",
|
151
|
-
"seq-macro",
|
152
|
-
]
|
153
|
-
|
154
|
-
[[package]]
|
155
|
-
name = "magnus-macros"
|
156
|
-
version = "0.6.0"
|
157
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
158
|
-
checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
|
159
|
-
dependencies = [
|
160
|
-
"proc-macro2",
|
161
|
-
"quote",
|
162
|
-
"syn",
|
163
|
-
]
|
164
|
-
|
165
|
-
[[package]]
|
166
|
-
name = "memchr"
|
167
|
-
version = "2.7.4"
|
168
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
169
|
-
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
170
|
-
|
171
|
-
[[package]]
|
172
|
-
name = "minimal-lexical"
|
173
|
-
version = "0.2.1"
|
174
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
175
|
-
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
176
|
-
|
177
|
-
[[package]]
|
178
|
-
name = "nom"
|
179
|
-
version = "7.1.3"
|
180
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
181
|
-
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
182
|
-
dependencies = [
|
183
|
-
"memchr",
|
184
|
-
"minimal-lexical",
|
185
|
-
]
|
186
|
-
|
187
|
-
[[package]]
|
188
|
-
name = "osv"
|
189
|
-
version = "0.1.0"
|
190
|
-
dependencies = [
|
191
|
-
"csv",
|
192
|
-
"magnus",
|
193
|
-
"rb-sys",
|
194
|
-
]
|
195
|
-
|
196
|
-
[[package]]
|
197
|
-
name = "proc-macro2"
|
198
|
-
version = "1.0.92"
|
199
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
200
|
-
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
|
201
|
-
dependencies = [
|
202
|
-
"unicode-ident",
|
203
|
-
]
|
204
|
-
|
205
|
-
[[package]]
|
206
|
-
name = "quote"
|
207
|
-
version = "1.0.37"
|
208
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
209
|
-
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
|
210
|
-
dependencies = [
|
211
|
-
"proc-macro2",
|
212
|
-
]
|
213
|
-
|
214
|
-
[[package]]
|
215
|
-
name = "rb-sys"
|
216
|
-
version = "0.9.103"
|
217
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
218
|
-
checksum = "91dbe37ab6ac2fba187480fb6544b92445e41e5c6f553bf0c33743f3c450a1df"
|
219
|
-
dependencies = [
|
220
|
-
"rb-sys-build",
|
221
|
-
]
|
222
|
-
|
223
|
-
[[package]]
|
224
|
-
name = "rb-sys-build"
|
225
|
-
version = "0.9.103"
|
226
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
227
|
-
checksum = "c4d56a49dcb646b70b758789c0d16c055a386a4f2a3346333abb69850fa860ce"
|
228
|
-
dependencies = [
|
229
|
-
"bindgen",
|
230
|
-
"lazy_static",
|
231
|
-
"proc-macro2",
|
232
|
-
"quote",
|
233
|
-
"regex",
|
234
|
-
"shell-words",
|
235
|
-
"syn",
|
236
|
-
]
|
237
|
-
|
238
|
-
[[package]]
|
239
|
-
name = "rb-sys-env"
|
240
|
-
version = "0.1.2"
|
241
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
242
|
-
checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
243
|
-
|
244
|
-
[[package]]
|
245
|
-
name = "regex"
|
246
|
-
version = "1.11.1"
|
247
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
248
|
-
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
249
|
-
dependencies = [
|
250
|
-
"aho-corasick",
|
251
|
-
"memchr",
|
252
|
-
"regex-automata",
|
253
|
-
"regex-syntax",
|
254
|
-
]
|
255
|
-
|
256
|
-
[[package]]
|
257
|
-
name = "regex-automata"
|
258
|
-
version = "0.4.9"
|
259
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
260
|
-
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
261
|
-
dependencies = [
|
262
|
-
"aho-corasick",
|
263
|
-
"memchr",
|
264
|
-
"regex-syntax",
|
265
|
-
]
|
266
|
-
|
267
|
-
[[package]]
|
268
|
-
name = "regex-syntax"
|
269
|
-
version = "0.8.5"
|
270
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
271
|
-
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
272
|
-
|
273
|
-
[[package]]
|
274
|
-
name = "rustc-hash"
|
275
|
-
version = "1.1.0"
|
276
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
277
|
-
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
278
|
-
|
279
|
-
[[package]]
|
280
|
-
name = "ryu"
|
281
|
-
version = "1.0.18"
|
282
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
283
|
-
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
|
284
|
-
|
285
|
-
[[package]]
|
286
|
-
name = "seq-macro"
|
287
|
-
version = "0.3.5"
|
288
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
289
|
-
checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
290
|
-
|
291
|
-
[[package]]
|
292
|
-
name = "serde"
|
293
|
-
version = "1.0.216"
|
294
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
295
|
-
checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e"
|
296
|
-
dependencies = [
|
297
|
-
"serde_derive",
|
298
|
-
]
|
299
|
-
|
300
|
-
[[package]]
|
301
|
-
name = "serde_derive"
|
302
|
-
version = "1.0.216"
|
303
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
304
|
-
checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e"
|
305
|
-
dependencies = [
|
306
|
-
"proc-macro2",
|
307
|
-
"quote",
|
308
|
-
"syn",
|
309
|
-
]
|
310
|
-
|
311
|
-
[[package]]
|
312
|
-
name = "shell-words"
|
313
|
-
version = "1.1.0"
|
314
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
315
|
-
checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
|
316
|
-
|
317
|
-
[[package]]
|
318
|
-
name = "shlex"
|
319
|
-
version = "1.3.0"
|
320
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
321
|
-
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
322
|
-
|
323
|
-
[[package]]
|
324
|
-
name = "syn"
|
325
|
-
version = "2.0.90"
|
326
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
327
|
-
checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31"
|
328
|
-
dependencies = [
|
329
|
-
"proc-macro2",
|
330
|
-
"quote",
|
331
|
-
"unicode-ident",
|
332
|
-
]
|
333
|
-
|
334
|
-
[[package]]
|
335
|
-
name = "unicode-ident"
|
336
|
-
version = "1.0.14"
|
337
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
338
|
-
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
|
339
|
-
|
340
|
-
[[package]]
|
341
|
-
name = "windows-targets"
|
342
|
-
version = "0.52.6"
|
343
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
344
|
-
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
345
|
-
dependencies = [
|
346
|
-
"windows_aarch64_gnullvm",
|
347
|
-
"windows_aarch64_msvc",
|
348
|
-
"windows_i686_gnu",
|
349
|
-
"windows_i686_gnullvm",
|
350
|
-
"windows_i686_msvc",
|
351
|
-
"windows_x86_64_gnu",
|
352
|
-
"windows_x86_64_gnullvm",
|
353
|
-
"windows_x86_64_msvc",
|
354
|
-
]
|
355
|
-
|
356
|
-
[[package]]
|
357
|
-
name = "windows_aarch64_gnullvm"
|
358
|
-
version = "0.52.6"
|
359
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
360
|
-
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
361
|
-
|
362
|
-
[[package]]
|
363
|
-
name = "windows_aarch64_msvc"
|
364
|
-
version = "0.52.6"
|
365
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
366
|
-
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
367
|
-
|
368
|
-
[[package]]
|
369
|
-
name = "windows_i686_gnu"
|
370
|
-
version = "0.52.6"
|
371
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
372
|
-
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
373
|
-
|
374
|
-
[[package]]
|
375
|
-
name = "windows_i686_gnullvm"
|
376
|
-
version = "0.52.6"
|
377
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
378
|
-
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
379
|
-
|
380
|
-
[[package]]
|
381
|
-
name = "windows_i686_msvc"
|
382
|
-
version = "0.52.6"
|
383
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
384
|
-
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
385
|
-
|
386
|
-
[[package]]
|
387
|
-
name = "windows_x86_64_gnu"
|
388
|
-
version = "0.52.6"
|
389
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
390
|
-
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
391
|
-
|
392
|
-
[[package]]
|
393
|
-
name = "windows_x86_64_gnullvm"
|
394
|
-
version = "0.52.6"
|
395
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
396
|
-
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
397
|
-
|
398
|
-
[[package]]
|
399
|
-
name = "windows_x86_64_msvc"
|
400
|
-
version = "0.52.6"
|
401
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
402
|
-
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|