osv 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +11 -4
- data/ext/osv/Cargo.toml +3 -2
- data/ext/osv/src/csv/builder.rs +11 -3
- data/ext/osv/src/csv/header_cache.rs +63 -0
- data/ext/osv/src/csv/mod.rs +1 -1
- data/ext/osv/src/csv/parser.rs +34 -26
- data/ext/osv/src/csv/reader.rs +27 -3
- data/ext/osv/src/csv/record.rs +9 -3
- data/ext/osv/src/reader.rs +6 -6
- data/lib/osv/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 687f5a3426558ea4d4d342f153dae1f11da93807daddcc6944888d92cda7502b
|
4
|
+
data.tar.gz: e9f766b94e4c7f806e81eec859a0146b6a4758c5a69c759b084f64e01fd7a888
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bb14e59bca79613064a07cbcfa6c2531f08c68eb131e512b9822c7295bcacfd051de8dcd49100d12c8377d90a438d528dc5b20bcf6a1aa7c4e44f8e2aaae7111
|
7
|
+
data.tar.gz: a3a83f9653ef5bc41c908de14c96b756c719fd2750c8739b4a2165ff91ed82d34fdfde184c853185a8ff516e324d01816c65764f05adf58db6b2159ae66b0c74
|
data/Cargo.lock
CHANGED
@@ -43,6 +43,12 @@ version = "2.6.0"
|
|
43
43
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
44
44
|
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
|
45
45
|
|
46
|
+
[[package]]
|
47
|
+
name = "bumpalo"
|
48
|
+
version = "3.16.0"
|
49
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
50
|
+
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
|
51
|
+
|
46
52
|
[[package]]
|
47
53
|
name = "cexpr"
|
48
54
|
version = "0.6.0"
|
@@ -232,6 +238,7 @@ dependencies = [
|
|
232
238
|
name = "osv"
|
233
239
|
version = "0.1.0"
|
234
240
|
dependencies = [
|
241
|
+
"bumpalo",
|
235
242
|
"csv",
|
236
243
|
"kanal",
|
237
244
|
"magnus 0.7.1",
|
@@ -260,18 +267,18 @@ dependencies = [
|
|
260
267
|
|
261
268
|
[[package]]
|
262
269
|
name = "rb-sys"
|
263
|
-
version = "0.9.
|
270
|
+
version = "0.9.104"
|
264
271
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
265
|
-
checksum = "
|
272
|
+
checksum = "e2e26425f064a90404ed5e33fee2137b02a9c6d1c83e19394f4d8a476b9d76a2"
|
266
273
|
dependencies = [
|
267
274
|
"rb-sys-build",
|
268
275
|
]
|
269
276
|
|
270
277
|
[[package]]
|
271
278
|
name = "rb-sys-build"
|
272
|
-
version = "0.9.
|
279
|
+
version = "0.9.104"
|
273
280
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
274
|
-
checksum = "
|
281
|
+
checksum = "c9802c9003c5648ee0a067e9aa8960d402d5f764f682f93c1ed49eec72f6d7fc"
|
275
282
|
dependencies = [
|
276
283
|
"bindgen",
|
277
284
|
"lazy_static",
|
data/ext/osv/Cargo.toml
CHANGED
@@ -7,9 +7,10 @@ edition = "2021"
|
|
7
7
|
crate-type = ["cdylib"]
|
8
8
|
|
9
9
|
[dependencies]
|
10
|
-
csv = "1.3
|
10
|
+
csv = "^1.3"
|
11
11
|
magnus = { version = "0.7", features = ["rb-sys"] }
|
12
|
-
rb-sys = "0.9"
|
12
|
+
rb-sys = "^0.9"
|
13
13
|
serde = { version = "1.0", features = ["derive"] }
|
14
14
|
serde_magnus = "0.8.1"
|
15
15
|
kanal = "0.1.0-pre8"
|
16
|
+
bumpalo = { version = "3.16.0", features = ["collections"] }
|
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
use super::{
|
2
|
+
header_cache::StringCache,
|
2
3
|
parser::RecordParser,
|
3
4
|
reader::{ReadImpl, RecordReader},
|
4
5
|
};
|
@@ -80,7 +81,6 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
80
81
|
|
81
82
|
pub fn build(self) -> Result<RecordReader<T>, Error> {
|
82
83
|
let readable = self.get_reader()?;
|
83
|
-
|
84
84
|
let mut reader = csv::ReaderBuilder::new()
|
85
85
|
.has_headers(self.has_headers)
|
86
86
|
.delimiter(self.delimiter)
|
@@ -88,14 +88,21 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
88
88
|
.from_reader(readable);
|
89
89
|
|
90
90
|
let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
|
91
|
-
let headers_clone = headers.clone();
|
92
91
|
let null_string = self.null_string;
|
93
92
|
|
93
|
+
let static_headers = StringCache::intern_many(&headers).map_err(|e| {
|
94
|
+
Error::new(
|
95
|
+
self.ruby.exception_runtime_error(),
|
96
|
+
format!("Failed to intern headers: {e}"),
|
97
|
+
)
|
98
|
+
})?;
|
99
|
+
let headers_for_cleanup = static_headers.clone();
|
100
|
+
|
94
101
|
let (sender, receiver) = kanal::bounded(self.buffer);
|
95
102
|
let handle = thread::spawn(move || {
|
96
103
|
let mut record = csv::StringRecord::new();
|
97
104
|
while let Ok(true) = reader.read_record(&mut record) {
|
98
|
-
let row = T::parse(&
|
105
|
+
let row = T::parse(&static_headers, &record, &null_string);
|
99
106
|
if sender.send(row).is_err() {
|
100
107
|
break;
|
101
108
|
}
|
@@ -106,6 +113,7 @@ impl<'a, T: RecordParser + Send + 'static> RecordReaderBuilder<'a, T> {
|
|
106
113
|
|
107
114
|
Ok(RecordReader {
|
108
115
|
reader: ReadImpl::MultiThreaded {
|
116
|
+
headers: headers_for_cleanup,
|
109
117
|
receiver,
|
110
118
|
handle: Some(handle),
|
111
119
|
},
|
@@ -0,0 +1,63 @@
|
|
1
|
+
use std::{
|
2
|
+
collections::HashMap,
|
3
|
+
sync::{atomic::AtomicU32, LazyLock, Mutex},
|
4
|
+
};
|
5
|
+
|
6
|
+
static STRING_CACHE: LazyLock<Mutex<HashMap<&'static str, AtomicU32>>> =
|
7
|
+
LazyLock::new(|| Mutex::new(HashMap::with_capacity(100)));
|
8
|
+
|
9
|
+
pub struct StringCache {}
|
10
|
+
|
11
|
+
impl StringCache {
|
12
|
+
#[allow(dead_code)]
|
13
|
+
pub fn intern(string: String) -> Result<&'static str, String> {
|
14
|
+
let mut cache = STRING_CACHE.lock().map_err(|e| e.to_string())?;
|
15
|
+
|
16
|
+
if let Some((&existing, count)) = cache.get_key_value(string.as_str()) {
|
17
|
+
count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
18
|
+
Ok(existing)
|
19
|
+
} else {
|
20
|
+
let leaked = Box::leak(string.into_boxed_str());
|
21
|
+
cache.insert(leaked, AtomicU32::new(1));
|
22
|
+
Ok(leaked)
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn intern_many(strings: &[String]) -> Result<Vec<&'static str>, String> {
|
27
|
+
let mut cache = STRING_CACHE.lock().map_err(|e| e.to_string())?;
|
28
|
+
let mut result = Vec::with_capacity(strings.len());
|
29
|
+
|
30
|
+
for string in strings {
|
31
|
+
let static_str: &'static str =
|
32
|
+
if let Some((&existing, count)) = cache.get_key_value(string.as_str()) {
|
33
|
+
count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
34
|
+
existing
|
35
|
+
} else {
|
36
|
+
let leaked = Box::leak(string.clone().into_boxed_str());
|
37
|
+
cache.insert(leaked, AtomicU32::new(1));
|
38
|
+
leaked
|
39
|
+
};
|
40
|
+
result.push(static_str);
|
41
|
+
}
|
42
|
+
|
43
|
+
Ok(result)
|
44
|
+
}
|
45
|
+
|
46
|
+
pub fn clear(headers: &[&'static str]) -> Result<(), String> {
|
47
|
+
let cache = STRING_CACHE.lock().map_err(|e| e.to_string())?;
|
48
|
+
|
49
|
+
for header in headers {
|
50
|
+
if let Some(count) = cache.get(header) {
|
51
|
+
let remaining = count.fetch_sub(1, std::sync::atomic::Ordering::Relaxed);
|
52
|
+
if remaining == 0 {
|
53
|
+
let ptr = *header as *const str as *mut str;
|
54
|
+
unsafe {
|
55
|
+
let _ = Box::from_raw(ptr);
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
Ok(())
|
62
|
+
}
|
63
|
+
}
|
data/ext/osv/src/csv/mod.rs
CHANGED
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -2,42 +2,50 @@ use std::collections::HashMap;
|
|
2
2
|
|
3
3
|
pub trait RecordParser {
|
4
4
|
type Output;
|
5
|
-
|
6
|
-
|
5
|
+
fn parse(
|
6
|
+
headers: &[&'static str],
|
7
|
+
record: &csv::StringRecord,
|
8
|
+
null_string: &str,
|
9
|
+
) -> Self::Output;
|
7
10
|
}
|
8
11
|
|
9
|
-
impl RecordParser for HashMap
|
12
|
+
impl RecordParser for HashMap<&'static str, Option<String>> {
|
10
13
|
type Output = Self;
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
14
|
+
fn parse(
|
15
|
+
headers: &[&'static str],
|
16
|
+
record: &csv::StringRecord,
|
17
|
+
null_string: &str,
|
18
|
+
) -> Self::Output {
|
19
|
+
let mut map = HashMap::with_capacity(headers.len());
|
20
|
+
for (header, field) in headers.iter().zip(record.iter()) {
|
21
|
+
map.insert(
|
22
|
+
*header,
|
23
|
+
if field == null_string {
|
18
24
|
None
|
19
25
|
} else {
|
20
26
|
Some(field.to_string())
|
21
|
-
}
|
22
|
-
|
23
|
-
|
24
|
-
|
27
|
+
},
|
28
|
+
);
|
29
|
+
}
|
30
|
+
map
|
25
31
|
}
|
26
32
|
}
|
27
33
|
|
28
34
|
impl RecordParser for Vec<Option<String>> {
|
29
35
|
type Output = Self;
|
30
|
-
|
31
|
-
|
32
|
-
record
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
36
|
+
fn parse(
|
37
|
+
_headers: &[&'static str],
|
38
|
+
record: &csv::StringRecord,
|
39
|
+
null_string: &str,
|
40
|
+
) -> Self::Output {
|
41
|
+
let mut vec = Vec::with_capacity(record.len());
|
42
|
+
for field in record.iter() {
|
43
|
+
vec.push(if field == null_string {
|
44
|
+
None
|
45
|
+
} else {
|
46
|
+
Some(field.to_string())
|
47
|
+
});
|
48
|
+
}
|
49
|
+
vec
|
42
50
|
}
|
43
51
|
}
|
data/ext/osv/src/csv/reader.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use super::parser::RecordParser;
|
1
|
+
use super::{header_cache::StringCache, parser::RecordParser};
|
2
2
|
use magnus::{Error, Ruby};
|
3
3
|
use std::{io::Read, thread};
|
4
4
|
|
@@ -6,14 +6,36 @@ pub struct RecordReader<T: RecordParser> {
|
|
6
6
|
pub(crate) reader: ReadImpl<T>,
|
7
7
|
}
|
8
8
|
|
9
|
+
impl<T: RecordParser> Drop for RecordReader<T> {
|
10
|
+
fn drop(&mut self) {
|
11
|
+
match &mut self.reader {
|
12
|
+
ReadImpl::MultiThreaded {
|
13
|
+
receiver,
|
14
|
+
handle,
|
15
|
+
headers,
|
16
|
+
} => {
|
17
|
+
receiver.close();
|
18
|
+
if let Some(handle) = handle.take() {
|
19
|
+
let _ = handle.join();
|
20
|
+
}
|
21
|
+
StringCache::clear(headers).unwrap();
|
22
|
+
}
|
23
|
+
ReadImpl::SingleThreaded { headers, .. } => {
|
24
|
+
StringCache::clear(headers).unwrap();
|
25
|
+
}
|
26
|
+
}
|
27
|
+
}
|
28
|
+
}
|
29
|
+
|
9
30
|
#[allow(dead_code)]
|
10
31
|
pub enum ReadImpl<T: RecordParser> {
|
11
32
|
SingleThreaded {
|
12
33
|
reader: csv::Reader<Box<dyn Read + Send + 'static>>,
|
13
|
-
headers: Vec
|
34
|
+
headers: Vec<&'static str>,
|
14
35
|
null_string: String,
|
15
36
|
},
|
16
37
|
MultiThreaded {
|
38
|
+
headers: Vec<&'static str>,
|
17
39
|
receiver: kanal::Receiver<T::Output>,
|
18
40
|
handle: Option<thread::JoinHandle<()>>,
|
19
41
|
},
|
@@ -48,7 +70,9 @@ impl<T: RecordParser> Iterator for RecordReader<T> {
|
|
48
70
|
|
49
71
|
fn next(&mut self) -> Option<Self::Item> {
|
50
72
|
match &mut self.reader {
|
51
|
-
ReadImpl::MultiThreaded {
|
73
|
+
ReadImpl::MultiThreaded {
|
74
|
+
receiver, handle, ..
|
75
|
+
} => match receiver.recv() {
|
52
76
|
Ok(record) => Some(record),
|
53
77
|
Err(_) => {
|
54
78
|
if let Some(handle) = handle.take() {
|
data/ext/osv/src/csv/record.rs
CHANGED
@@ -1,17 +1,23 @@
|
|
1
|
-
use magnus::{IntoValue, Ruby, Value};
|
1
|
+
use magnus::{IntoValue, RHash, Ruby, Value};
|
2
2
|
use std::collections::HashMap;
|
3
3
|
|
4
4
|
#[derive(Debug)]
|
5
5
|
pub enum CsvRecord {
|
6
6
|
Vec(Vec<Option<String>>),
|
7
|
-
Map(HashMap
|
7
|
+
Map(HashMap<&'static str, Option<String>>),
|
8
8
|
}
|
9
9
|
|
10
10
|
impl IntoValue for CsvRecord {
|
11
11
|
fn into_value_with(self, handle: &Ruby) -> Value {
|
12
12
|
match self {
|
13
13
|
CsvRecord::Vec(vec) => vec.into_value_with(handle),
|
14
|
-
CsvRecord::Map(map) =>
|
14
|
+
CsvRecord::Map(map) => {
|
15
|
+
let hash = RHash::new();
|
16
|
+
for (k, v) in map {
|
17
|
+
hash.aset(k, v).unwrap();
|
18
|
+
}
|
19
|
+
hash.into_value_with(handle)
|
20
|
+
}
|
15
21
|
}
|
16
22
|
}
|
17
23
|
}
|
data/ext/osv/src/reader.rs
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
-
use std::collections::HashMap;
|
2
|
-
|
3
1
|
use crate::csv::{CsvRecord, RecordReaderBuilder};
|
4
2
|
use crate::utils::*;
|
5
3
|
use magnus::value::ReprValue;
|
6
4
|
use magnus::{block::Yield, Error, KwArgs, RHash, Ruby, Symbol, Value};
|
5
|
+
use std::collections::HashMap;
|
7
6
|
|
8
7
|
pub fn parse_csv(
|
9
|
-
ruby: &Ruby,
|
10
8
|
rb_self: Value,
|
11
9
|
args: &[Value],
|
12
10
|
) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord>>>, Error> {
|
11
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
12
|
+
|
13
13
|
let CsvArgs {
|
14
14
|
to_read,
|
15
15
|
has_headers,
|
@@ -18,7 +18,7 @@ pub fn parse_csv(
|
|
18
18
|
null_string,
|
19
19
|
buffer_size,
|
20
20
|
result_type,
|
21
|
-
} = parse_csv_args(ruby, args)?;
|
21
|
+
} = parse_csv_args(&ruby, args)?;
|
22
22
|
|
23
23
|
if !ruby.block_given() {
|
24
24
|
return create_enumerator(EnumeratorArgs {
|
@@ -35,7 +35,7 @@ pub fn parse_csv(
|
|
35
35
|
|
36
36
|
let iter: Box<dyn Iterator<Item = CsvRecord>> = match result_type.as_str() {
|
37
37
|
"hash" => Box::new(
|
38
|
-
RecordReaderBuilder::<HashMap
|
38
|
+
RecordReaderBuilder::<HashMap<&'static str, Option<String>>>::new(&ruby, to_read)
|
39
39
|
.has_headers(has_headers)
|
40
40
|
.delimiter(delimiter)
|
41
41
|
.quote_char(quote_char)
|
@@ -45,7 +45,7 @@ pub fn parse_csv(
|
|
45
45
|
.map(CsvRecord::Map),
|
46
46
|
),
|
47
47
|
"array" => Box::new(
|
48
|
-
RecordReaderBuilder::<Vec<Option<String>>>::new(ruby, to_read)
|
48
|
+
RecordReaderBuilder::<Vec<Option<String>>>::new(&ruby, to_read)
|
49
49
|
.has_headers(has_headers)
|
50
50
|
.delimiter(delimiter)
|
51
51
|
.quote_char(quote_char)
|
data/lib/osv/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
@@ -56,6 +56,7 @@ files:
|
|
56
56
|
- ext/osv/Cargo.toml
|
57
57
|
- ext/osv/extconf.rb
|
58
58
|
- ext/osv/src/csv/builder.rs
|
59
|
+
- ext/osv/src/csv/header_cache.rs
|
59
60
|
- ext/osv/src/csv/mod.rs
|
60
61
|
- ext/osv/src/csv/parser.rs
|
61
62
|
- ext/osv/src/csv/reader.rs
|