osv 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/osv/src/csv/builder.rs +2 -1
- data/ext/osv/src/csv/header_cache.rs +18 -33
- data/ext/osv/src/csv/parser.rs +46 -44
- data/ext/osv/src/csv/record.rs +2 -2
- data/ext/osv/src/csv/record_reader.rs +2 -3
- data/ext/osv/src/reader.rs +2 -3
- data/lib/osv/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cae389fff24c3109f17a1c450022771da964e1b7dced4ed2f34f93753c213dc8
|
4
|
+
data.tar.gz: 1e3736fb0b84003f62a5038a3d7e71e7d6e31581f943452c2acb08b04a21ff64
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ddae565f1b208de7fc18fa1cdaff7b3d1ed02ec22bf85d0477a9b152d9238049893a5ef40bad95ac4b2d8f8cb0cd59f14fd4e365aa69f3ef109fa6f6701d2499
|
7
|
+
data.tar.gz: aa81197d39f7e3dcc5732bfb7d71545cd9303d888ee7da1a34b0244ade287a8b25db16f7428d4f09689dfe1d874d3663bebb6d546e6b363f7496137339b15150
|
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -169,7 +169,8 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
169
169
|
return Err(ReaderError::InvalidFileDescriptor(fd));
|
170
170
|
}
|
171
171
|
|
172
|
-
let file = unsafe { File::from_raw_fd(fd) }
|
172
|
+
let file = std::panic::catch_unwind(|| unsafe { File::from_raw_fd(fd) })
|
173
|
+
.map_err(|e| ReaderError::FileDescriptor(format!("{:?}", e)))?;
|
173
174
|
let forgotten = ForgottenFileHandle(ManuallyDrop::new(file));
|
174
175
|
Ok(Box::new(forgotten))
|
175
176
|
}
|
@@ -8,11 +8,11 @@ use std::{
|
|
8
8
|
collections::HashMap,
|
9
9
|
sync::{
|
10
10
|
atomic::{AtomicU32, Ordering},
|
11
|
-
|
11
|
+
LazyLock, Mutex,
|
12
12
|
},
|
13
13
|
};
|
14
14
|
|
15
|
-
use magnus::{
|
15
|
+
use magnus::{IntoValue, RString, Ruby, Value};
|
16
16
|
|
17
17
|
use thiserror::Error;
|
18
18
|
|
@@ -22,24 +22,25 @@ pub enum CacheError {
|
|
22
22
|
LockError(String),
|
23
23
|
}
|
24
24
|
|
25
|
-
static STRING_CACHE: LazyLock<Mutex<HashMap<&'static str, (
|
25
|
+
static STRING_CACHE: LazyLock<Mutex<HashMap<&'static str, (StringCacheKey, AtomicU32)>>> =
|
26
26
|
LazyLock::new(|| Mutex::new(HashMap::with_capacity(100)));
|
27
27
|
|
28
28
|
pub struct StringCache;
|
29
29
|
|
30
|
-
|
30
|
+
#[derive(Copy, Clone)]
|
31
|
+
pub struct StringCacheKey(&'static str);
|
31
32
|
|
32
33
|
impl StringCacheKey {
|
33
34
|
pub fn new(string: &str) -> Self {
|
34
35
|
let rstr = RString::new(string);
|
35
36
|
let fstr = rstr.to_interned_str();
|
36
|
-
Self(
|
37
|
+
Self(fstr.as_str().unwrap())
|
37
38
|
}
|
38
39
|
}
|
39
40
|
|
40
41
|
impl AsRef<str> for StringCacheKey {
|
41
42
|
fn as_ref(&self) -> &'static str {
|
42
|
-
self.
|
43
|
+
self.0
|
43
44
|
}
|
44
45
|
}
|
45
46
|
|
@@ -57,13 +58,13 @@ impl IntoValue for &StringCacheKey {
|
|
57
58
|
|
58
59
|
impl std::fmt::Debug for StringCacheKey {
|
59
60
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
60
|
-
self.
|
61
|
+
self.0.fmt(f)
|
61
62
|
}
|
62
63
|
}
|
63
64
|
|
64
65
|
impl PartialEq for StringCacheKey {
|
65
66
|
fn eq(&self, other: &Self) -> bool {
|
66
|
-
self.
|
67
|
+
self.0 == other.0
|
67
68
|
}
|
68
69
|
}
|
69
70
|
|
@@ -71,42 +72,26 @@ impl std::cmp::Eq for StringCacheKey {}
|
|
71
72
|
|
72
73
|
impl std::hash::Hash for StringCacheKey {
|
73
74
|
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
74
|
-
self.
|
75
|
+
self.0.hash(state);
|
75
76
|
}
|
76
77
|
}
|
77
78
|
|
78
79
|
impl StringCache {
|
79
|
-
|
80
|
-
|
80
|
+
pub fn intern_many<AsStr: AsRef<str>>(
|
81
|
+
strings: &[AsStr],
|
82
|
+
) -> Result<Vec<StringCacheKey>, CacheError> {
|
81
83
|
let mut cache = STRING_CACHE
|
82
84
|
.lock()
|
83
85
|
.map_err(|e| CacheError::LockError(e.to_string()))?;
|
84
86
|
|
85
|
-
|
86
|
-
counter.fetch_add(1, Ordering::Relaxed);
|
87
|
-
Ok(interned_string.clone())
|
88
|
-
} else {
|
89
|
-
let interned = Arc::new(StringCacheKey::new(string.as_str()));
|
90
|
-
let leaked = Box::leak(string.into_boxed_str());
|
91
|
-
cache.insert(leaked, (interned.clone(), AtomicU32::new(1)));
|
92
|
-
Ok(interned)
|
93
|
-
}
|
94
|
-
}
|
95
|
-
|
96
|
-
pub fn intern_many(strings: &[String]) -> Result<Vec<Arc<StringCacheKey>>, CacheError> {
|
97
|
-
let mut cache = STRING_CACHE
|
98
|
-
.lock()
|
99
|
-
.map_err(|e| CacheError::LockError(e.to_string()))?;
|
100
|
-
|
101
|
-
let mut result: Vec<Arc<StringCacheKey>> = Vec::with_capacity(strings.len());
|
87
|
+
let mut result: Vec<StringCacheKey> = Vec::with_capacity(strings.len());
|
102
88
|
for string in strings {
|
103
|
-
if let Some((_, (interned_string, counter))) = cache.get_key_value(string.
|
89
|
+
if let Some((_, (interned_string, counter))) = cache.get_key_value(string.as_ref()) {
|
104
90
|
counter.fetch_add(1, Ordering::Relaxed);
|
105
|
-
result.push(interned_string
|
91
|
+
result.push(*interned_string);
|
106
92
|
} else {
|
107
|
-
let interned =
|
108
|
-
|
109
|
-
cache.insert(leaked, (interned.clone(), AtomicU32::new(1)));
|
93
|
+
let interned = StringCacheKey::new(string.as_ref());
|
94
|
+
cache.insert(interned.0, (interned, AtomicU32::new(1)));
|
110
95
|
result.push(interned);
|
111
96
|
}
|
112
97
|
}
|
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
use std::borrow::Cow;
|
2
2
|
use std::collections::HashMap;
|
3
3
|
use std::hash::BuildHasher;
|
4
|
-
use std::sync::Arc;
|
5
4
|
|
6
5
|
use super::header_cache::StringCacheKey;
|
7
6
|
use super::CowStr;
|
@@ -15,7 +14,7 @@ pub trait RecordParser<'a> {
|
|
15
14
|
type Output;
|
16
15
|
|
17
16
|
fn parse(
|
18
|
-
headers: &[
|
17
|
+
headers: &[StringCacheKey],
|
19
18
|
record: &CsvRecordType,
|
20
19
|
null_string: Option<Cow<'a, str>>,
|
21
20
|
ignore_null_bytes: bool,
|
@@ -23,46 +22,38 @@ pub trait RecordParser<'a> {
|
|
23
22
|
}
|
24
23
|
|
25
24
|
impl<'a, S: BuildHasher + Default> RecordParser<'a>
|
26
|
-
for HashMap<
|
25
|
+
for HashMap<StringCacheKey, Option<CowStr<'a>>, S>
|
27
26
|
{
|
28
27
|
type Output = Self;
|
29
28
|
|
30
29
|
#[inline]
|
31
30
|
fn parse(
|
32
|
-
headers: &[
|
31
|
+
headers: &[StringCacheKey],
|
33
32
|
record: &CsvRecordType,
|
34
33
|
null_string: Option<Cow<'a, str>>,
|
35
34
|
ignore_null_bytes: bool,
|
36
35
|
) -> Self::Output {
|
37
36
|
let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
|
38
|
-
|
39
37
|
let shared_empty = Cow::Borrowed("");
|
40
38
|
|
41
39
|
headers.iter().enumerate().for_each(|(i, header)| {
|
42
40
|
let value = match record {
|
43
41
|
CsvRecordType::String(s) => s.get(i).and_then(|field| {
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
} else {
|
51
|
-
Some(CowStr(Cow::Owned(field.to_string())))
|
52
|
-
}
|
42
|
+
convert_field_to_cow_str(
|
43
|
+
field,
|
44
|
+
null_string.as_deref(),
|
45
|
+
ignore_null_bytes,
|
46
|
+
&shared_empty,
|
47
|
+
)
|
53
48
|
}),
|
54
|
-
|
55
49
|
CsvRecordType::Byte(b) => b.get(i).and_then(|field| {
|
56
50
|
let field = String::from_utf8_lossy(field);
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
} else {
|
64
|
-
Some(CowStr(Cow::Owned(field.to_string())))
|
65
|
-
}
|
51
|
+
convert_field_to_cow_str(
|
52
|
+
&field,
|
53
|
+
null_string.as_deref(),
|
54
|
+
ignore_null_bytes,
|
55
|
+
&shared_empty,
|
56
|
+
)
|
66
57
|
}),
|
67
58
|
};
|
68
59
|
|
@@ -77,43 +68,36 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
|
|
77
68
|
|
78
69
|
#[inline]
|
79
70
|
fn parse(
|
80
|
-
headers: &[
|
71
|
+
headers: &[StringCacheKey],
|
81
72
|
record: &CsvRecordType,
|
82
73
|
null_string: Option<Cow<'a, str>>,
|
83
74
|
ignore_null_bytes: bool,
|
84
75
|
) -> Self::Output {
|
85
76
|
let target_len = headers.len();
|
86
77
|
let mut vec = Vec::with_capacity(target_len);
|
87
|
-
|
88
78
|
let shared_empty = Cow::Borrowed("");
|
89
79
|
|
90
80
|
match record {
|
91
81
|
CsvRecordType::String(record) => {
|
92
82
|
for field in record.iter() {
|
93
|
-
let value =
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
} else {
|
100
|
-
Some(CowStr(Cow::Owned(field.to_string())))
|
101
|
-
};
|
83
|
+
let value = convert_field_to_cow_str(
|
84
|
+
field,
|
85
|
+
null_string.as_deref(),
|
86
|
+
ignore_null_bytes,
|
87
|
+
&shared_empty,
|
88
|
+
);
|
102
89
|
vec.push(value);
|
103
90
|
}
|
104
91
|
}
|
105
92
|
CsvRecordType::Byte(record) => {
|
106
93
|
for field in record.iter() {
|
107
94
|
let field = String::from_utf8_lossy(field);
|
108
|
-
let value =
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
} else {
|
115
|
-
Some(CowStr(Cow::Owned(field.to_string())))
|
116
|
-
};
|
95
|
+
let value = convert_field_to_cow_str(
|
96
|
+
&field,
|
97
|
+
null_string.as_deref(),
|
98
|
+
ignore_null_bytes,
|
99
|
+
&shared_empty,
|
100
|
+
);
|
117
101
|
vec.push(value);
|
118
102
|
}
|
119
103
|
}
|
@@ -122,3 +106,21 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
|
|
122
106
|
vec
|
123
107
|
}
|
124
108
|
}
|
109
|
+
|
110
|
+
#[inline]
|
111
|
+
fn convert_field_to_cow_str<'a>(
|
112
|
+
field: &str,
|
113
|
+
null_string: Option<&str>,
|
114
|
+
ignore_null_bytes: bool,
|
115
|
+
shared_empty: &Cow<'a, str>,
|
116
|
+
) -> Option<CowStr<'a>> {
|
117
|
+
if Some(field) == null_string {
|
118
|
+
None
|
119
|
+
} else if field.is_empty() {
|
120
|
+
Some(CowStr(shared_empty.clone()))
|
121
|
+
} else if ignore_null_bytes {
|
122
|
+
Some(CowStr(Cow::Owned(field.replace("\0", ""))))
|
123
|
+
} else {
|
124
|
+
Some(CowStr(Cow::Owned(field.to_string())))
|
125
|
+
}
|
126
|
+
}
|
data/ext/osv/src/csv/record.rs
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
use itertools::Itertools;
|
2
2
|
use magnus::{value::ReprValue, IntoValue, Ruby, Value};
|
3
|
-
use std::{borrow::Cow, collections::HashMap, hash::BuildHasher
|
3
|
+
use std::{borrow::Cow, collections::HashMap, hash::BuildHasher};
|
4
4
|
|
5
5
|
use super::StringCacheKey;
|
6
6
|
|
7
7
|
#[derive(Debug)]
|
8
8
|
pub enum CsvRecord<'a, S: BuildHasher + Default> {
|
9
9
|
Vec(Vec<Option<CowStr<'a>>>),
|
10
|
-
Map(HashMap<
|
10
|
+
Map(HashMap<StringCacheKey, Option<CowStr<'a>>, S>),
|
11
11
|
}
|
12
12
|
|
13
13
|
impl<S: BuildHasher + Default> IntoValue for CsvRecord<'_, S> {
|
@@ -5,7 +5,6 @@ use super::ruby_reader::SeekableRead;
|
|
5
5
|
use magnus::{Error, Ruby};
|
6
6
|
use std::borrow::Cow;
|
7
7
|
use std::io::{BufReader, Read};
|
8
|
-
use std::sync::Arc;
|
9
8
|
|
10
9
|
/// Size of the internal buffer used for reading CSV records
|
11
10
|
pub(crate) const READ_BUFFER_SIZE: usize = 16384;
|
@@ -15,7 +14,7 @@ pub(crate) const READ_BUFFER_SIZE: usize = 16384;
|
|
15
14
|
/// This struct implements Iterator to provide a streaming interface for CSV records.
|
16
15
|
pub struct RecordReader<'a, T: RecordParser<'a>> {
|
17
16
|
reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
|
18
|
-
headers: Vec<
|
17
|
+
headers: Vec<StringCacheKey>,
|
19
18
|
null_string: Option<Cow<'a, str>>,
|
20
19
|
string_record: CsvRecordType,
|
21
20
|
parser: std::marker::PhantomData<T>,
|
@@ -75,7 +74,7 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
|
|
75
74
|
/// Creates a new RecordReader instance.
|
76
75
|
pub(crate) fn new(
|
77
76
|
reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
|
78
|
-
headers: Vec<
|
77
|
+
headers: Vec<StringCacheKey>,
|
79
78
|
null_string: Option<Cow<'a, str>>,
|
80
79
|
ignore_null_bytes: bool,
|
81
80
|
lossy: bool,
|
data/ext/osv/src/reader.rs
CHANGED
@@ -5,7 +5,6 @@ use csv::Trim;
|
|
5
5
|
use magnus::value::ReprValue;
|
6
6
|
use magnus::{Error, IntoValue, KwArgs, RHash, Ruby, Symbol, Value};
|
7
7
|
use std::collections::HashMap;
|
8
|
-
use std::sync::Arc;
|
9
8
|
|
10
9
|
/// Valid result types for CSV parsing
|
11
10
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
@@ -94,7 +93,7 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
|
|
94
93
|
match result_type {
|
95
94
|
ResultType::Hash => {
|
96
95
|
let builder = RecordReaderBuilder::<
|
97
|
-
HashMap<
|
96
|
+
HashMap<StringCacheKey, Option<CowStr<'_>>, RandomState>,
|
98
97
|
>::new(ruby, to_read)
|
99
98
|
.has_headers(has_headers)
|
100
99
|
.flexible(flexible)
|
@@ -109,7 +108,7 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
|
|
109
108
|
let ruby = unsafe { Ruby::get_unchecked() };
|
110
109
|
for result in builder {
|
111
110
|
let record = result?;
|
112
|
-
let _: Value = ruby.yield_value(CsvRecord::Map(record))?;
|
111
|
+
let _: Value = ruby.yield_value(CsvRecord::<ahash::RandomState>::Map(record))?;
|
113
112
|
}
|
114
113
|
}
|
115
114
|
ResultType::Array => {
|
data/lib/osv/version.rb
CHANGED