osv 0.3.13 → 0.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/README.md +39 -81
- data/Rakefile +6 -8
- data/ext/osv/src/csv/builder.rs +59 -175
- data/ext/osv/src/csv/mod.rs +4 -3
- data/ext/osv/src/csv/parser.rs +90 -14
- data/ext/osv/src/csv/record.rs +19 -6
- data/ext/osv/src/csv/record_reader.rs +175 -0
- data/ext/osv/src/csv/ruby_reader.rs +181 -0
- data/ext/osv/src/reader.rs +24 -19
- data/lib/osv/version.rb +1 -1
- metadata +11 -15
- data/ext/osv/src/csv/read_impl.rs +0 -75
- data/ext/osv/src/csv/reader.rs +0 -57
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-01-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -38,16 +38,12 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 1.2.0
|
41
|
-
description: |
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
- Support for both hash and array output formats
|
48
|
-
- Whitespace trimming options
|
49
|
-
- Strict or flexible parsing modes
|
50
|
-
- Significantly faster than Ruby's standard CSV library
|
41
|
+
description: |2
|
42
|
+
OSV is a high-performance CSV parser for Ruby, implemented in Rust.
|
43
|
+
It wraps BurntSushi's csv-rs crate to provide fast CSV parsing with support for both hash-based and array-based row formats.
|
44
|
+
Features include: Flexible input sources (file paths, gzipped files, IO objects, strings),
|
45
|
+
configurable parsing options (headers, separators, quote chars), support for both hash and array output formats,
|
46
|
+
whitespace trimming options, strict or flexible parsing modes, and is significantly faster than Ruby's standard CSV library.
|
51
47
|
email:
|
52
48
|
- nathan@jaremko.ca
|
53
49
|
executables: []
|
@@ -67,9 +63,9 @@ files:
|
|
67
63
|
- ext/osv/src/csv/header_cache.rs
|
68
64
|
- ext/osv/src/csv/mod.rs
|
69
65
|
- ext/osv/src/csv/parser.rs
|
70
|
-
- ext/osv/src/csv/read_impl.rs
|
71
|
-
- ext/osv/src/csv/reader.rs
|
72
66
|
- ext/osv/src/csv/record.rs
|
67
|
+
- ext/osv/src/csv/record_reader.rs
|
68
|
+
- ext/osv/src/csv/ruby_reader.rs
|
73
69
|
- ext/osv/src/lib.rs
|
74
70
|
- ext/osv/src/reader.rs
|
75
71
|
- ext/osv/src/utils.rs
|
@@ -84,8 +80,8 @@ metadata:
|
|
84
80
|
source_code_uri: https://github.com/njaremko/osv
|
85
81
|
readme_uri: https://github.com/njaremko/osv/blob/main/README.md
|
86
82
|
changelog_uri: https://github.com/njaremko/osv/blob/main/CHANGELOG.md
|
87
|
-
rubygems_mfa_required: 'true'
|
88
83
|
documentation_uri: https://www.rubydoc.info/gems/osv
|
84
|
+
funding_uri: https://github.com/sponsors/njaremko
|
89
85
|
post_install_message:
|
90
86
|
rdoc_options: []
|
91
87
|
require_paths:
|
@@ -1,75 +0,0 @@
|
|
1
|
-
use super::{header_cache::StringCache, parser::RecordParser};
|
2
|
-
use std::{io::Read, thread};
|
3
|
-
|
4
|
-
pub(crate) const READ_BUFFER_SIZE: usize = 8192;
|
5
|
-
|
6
|
-
pub enum ReadImpl<T: RecordParser> {
|
7
|
-
SingleThreaded {
|
8
|
-
reader: csv::Reader<Box<dyn Read>>,
|
9
|
-
headers: Vec<&'static str>,
|
10
|
-
null_string: Option<String>,
|
11
|
-
flexible_default: Option<String>,
|
12
|
-
},
|
13
|
-
MultiThreaded {
|
14
|
-
headers: Vec<&'static str>,
|
15
|
-
receiver: kanal::Receiver<T::Output>,
|
16
|
-
handle: Option<thread::JoinHandle<()>>,
|
17
|
-
},
|
18
|
-
}
|
19
|
-
|
20
|
-
impl<T: RecordParser> ReadImpl<T> {
|
21
|
-
#[inline]
|
22
|
-
pub fn next(&mut self) -> Option<T::Output> {
|
23
|
-
match self {
|
24
|
-
Self::MultiThreaded {
|
25
|
-
receiver, handle, ..
|
26
|
-
} => match receiver.recv() {
|
27
|
-
Ok(record) => Some(record),
|
28
|
-
Err(_) => {
|
29
|
-
if let Some(handle) = handle.take() {
|
30
|
-
let _ = handle.join();
|
31
|
-
}
|
32
|
-
None
|
33
|
-
}
|
34
|
-
},
|
35
|
-
Self::SingleThreaded {
|
36
|
-
reader,
|
37
|
-
headers,
|
38
|
-
null_string,
|
39
|
-
flexible_default,
|
40
|
-
} => {
|
41
|
-
let mut record = csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers.len());
|
42
|
-
match reader.read_record(&mut record) {
|
43
|
-
Ok(true) => Some(T::parse(
|
44
|
-
headers,
|
45
|
-
&record,
|
46
|
-
null_string.as_deref(),
|
47
|
-
flexible_default.as_deref(),
|
48
|
-
)),
|
49
|
-
_ => None,
|
50
|
-
}
|
51
|
-
}
|
52
|
-
}
|
53
|
-
}
|
54
|
-
|
55
|
-
#[inline]
|
56
|
-
pub fn cleanup(&mut self) {
|
57
|
-
match self {
|
58
|
-
Self::MultiThreaded {
|
59
|
-
receiver,
|
60
|
-
handle,
|
61
|
-
headers,
|
62
|
-
..
|
63
|
-
} => {
|
64
|
-
receiver.close();
|
65
|
-
if let Some(handle) = handle.take() {
|
66
|
-
let _ = handle.join();
|
67
|
-
}
|
68
|
-
let _ = StringCache::clear(headers);
|
69
|
-
}
|
70
|
-
Self::SingleThreaded { headers, .. } => {
|
71
|
-
let _ = StringCache::clear(headers);
|
72
|
-
}
|
73
|
-
}
|
74
|
-
}
|
75
|
-
}
|
data/ext/osv/src/csv/reader.rs
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
use super::{parser::RecordParser, read_impl::ReadImpl};
|
2
|
-
use magnus::{Error, Ruby};
|
3
|
-
use std::{borrow::Cow, io::Read};
|
4
|
-
|
5
|
-
pub struct RecordReader<T: RecordParser> {
|
6
|
-
pub(crate) reader: ReadImpl<T>,
|
7
|
-
}
|
8
|
-
|
9
|
-
impl<T: RecordParser> RecordReader<T> {
|
10
|
-
#[inline]
|
11
|
-
pub(crate) fn get_headers(
|
12
|
-
ruby: &Ruby,
|
13
|
-
reader: &mut csv::Reader<impl Read>,
|
14
|
-
has_headers: bool,
|
15
|
-
) -> Result<Vec<String>, Error> {
|
16
|
-
let first_row = reader.headers().map_err(|e| {
|
17
|
-
Error::new(
|
18
|
-
ruby.exception_runtime_error(),
|
19
|
-
Cow::Owned(format!("Failed to read headers: {e}")),
|
20
|
-
)
|
21
|
-
})?;
|
22
|
-
|
23
|
-
Ok(if has_headers {
|
24
|
-
// Pre-allocate the vector with exact capacity
|
25
|
-
let mut headers = Vec::with_capacity(first_row.len());
|
26
|
-
headers.extend(first_row.iter().map(String::from));
|
27
|
-
headers
|
28
|
-
} else {
|
29
|
-
// Pre-allocate the vector with exact capacity
|
30
|
-
let mut headers = Vec::with_capacity(first_row.len());
|
31
|
-
headers.extend((0..first_row.len()).map(|i| format!("c{i}")));
|
32
|
-
headers
|
33
|
-
})
|
34
|
-
}
|
35
|
-
}
|
36
|
-
|
37
|
-
impl<T: RecordParser> Iterator for RecordReader<T> {
|
38
|
-
type Item = T::Output;
|
39
|
-
|
40
|
-
#[inline]
|
41
|
-
fn next(&mut self) -> Option<Self::Item> {
|
42
|
-
self.reader.next()
|
43
|
-
}
|
44
|
-
|
45
|
-
#[inline]
|
46
|
-
fn size_hint(&self) -> (usize, Option<usize>) {
|
47
|
-
// We can't know the exact size without reading the whole file
|
48
|
-
(0, None)
|
49
|
-
}
|
50
|
-
}
|
51
|
-
|
52
|
-
impl<T: RecordParser> Drop for RecordReader<T> {
|
53
|
-
#[inline]
|
54
|
-
fn drop(&mut self) {
|
55
|
-
self.reader.cleanup();
|
56
|
-
}
|
57
|
-
}
|