osv 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +4 -16
- data/ext/osv/Cargo.toml +1 -1
- data/ext/osv/src/csv/builder.rs +6 -1
- data/ext/osv/src/csv/header_cache.rs +12 -13
- data/ext/osv/src/csv/parser.rs +12 -0
- data/ext/osv/src/csv/record.rs +1 -1
- data/ext/osv/src/csv/ruby_reader.rs +3 -21
- data/lib/osv/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec1e35e2e9f14666ea236823b6433cd7910d3a384de97ade622aaf40bb320114
|
4
|
+
data.tar.gz: a4b61200cdf3350e153d93f3269695d771f7f985ed0bc9b2d55ec0b7da98086e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a612087b5d63ebb1d30b215e346f351df1fc7e6d2a05ddc2860f3be9ddda3cba8311259a5673591f576dc7e2ac6fcfd3c8eeac5a8a3f2d376f7898cfcf2953ab
|
7
|
+
data.tar.gz: '02898ed010a8fb358f22d1caf7bbdc302733e2d80935fc4d03a6a0be0204513c63c58cfa875ea6e3326b78d664b9a2b7f3a303ba284396fb62820b8138c97306'
|
data/Cargo.lock
CHANGED
@@ -270,18 +270,6 @@ version = "0.4.15"
|
|
270
270
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
271
271
|
checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
|
272
272
|
|
273
|
-
[[package]]
|
274
|
-
name = "magnus"
|
275
|
-
version = "0.6.4"
|
276
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
277
|
-
checksum = "b1597ef40aa8c36be098249e82c9a20cf7199278ac1c1a1a995eeead6a184479"
|
278
|
-
dependencies = [
|
279
|
-
"magnus-macros",
|
280
|
-
"rb-sys",
|
281
|
-
"rb-sys-env",
|
282
|
-
"seq-macro",
|
283
|
-
]
|
284
|
-
|
285
273
|
[[package]]
|
286
274
|
name = "magnus"
|
287
275
|
version = "0.7.1"
|
@@ -360,7 +348,7 @@ dependencies = [
|
|
360
348
|
"flate2",
|
361
349
|
"itertools 0.14.0",
|
362
350
|
"jemallocator",
|
363
|
-
"magnus
|
351
|
+
"magnus",
|
364
352
|
"mimalloc",
|
365
353
|
"rb-sys",
|
366
354
|
"serde",
|
@@ -499,11 +487,11 @@ dependencies = [
|
|
499
487
|
|
500
488
|
[[package]]
|
501
489
|
name = "serde_magnus"
|
502
|
-
version = "0.
|
490
|
+
version = "0.9.0"
|
503
491
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
504
|
-
checksum = "
|
492
|
+
checksum = "51b8b945a2dadb221f1c5490cfb411cab6c3821446b8eca50ee07e5a3893ec51"
|
505
493
|
dependencies = [
|
506
|
-
"magnus
|
494
|
+
"magnus",
|
507
495
|
"serde",
|
508
496
|
"tap",
|
509
497
|
]
|
data/ext/osv/Cargo.toml
CHANGED
@@ -13,7 +13,7 @@ flate2 = "1.0.35"
|
|
13
13
|
magnus = { version = "0.7", features = ["rb-sys"] }
|
14
14
|
rb-sys = "^0.9"
|
15
15
|
serde = { version = "1.0", features = ["derive"] }
|
16
|
-
serde_magnus = "0.
|
16
|
+
serde_magnus = "0.9.0"
|
17
17
|
thiserror = "2.0"
|
18
18
|
itertools = "^0.14"
|
19
19
|
tempfile = "3.17.1"
|
data/ext/osv/src/csv/builder.rs
CHANGED
@@ -174,7 +174,12 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
|
|
174
174
|
if self.ignore_null_bytes {
|
175
175
|
headers = headers.iter().map(|h| h.replace("\0", "")).collect();
|
176
176
|
}
|
177
|
-
|
177
|
+
|
178
|
+
let static_headers = if T::uses_headers() {
|
179
|
+
StringCache::intern_many(&headers)?
|
180
|
+
} else {
|
181
|
+
Vec::new()
|
182
|
+
};
|
178
183
|
|
179
184
|
let null_string = self
|
180
185
|
.null_string
|
@@ -6,23 +6,22 @@
|
|
6
6
|
/// so this optimization could be removed if any issues arise.
|
7
7
|
use std::{
|
8
8
|
collections::HashMap,
|
9
|
-
sync::{
|
10
|
-
atomic::{AtomicU32, Ordering},
|
11
|
-
LazyLock, Mutex,
|
12
|
-
},
|
9
|
+
sync::{LazyLock, Mutex},
|
13
10
|
};
|
14
11
|
|
15
12
|
use magnus::{IntoValue, RString, Ruby, Value};
|
16
13
|
|
17
14
|
use thiserror::Error;
|
18
15
|
|
19
|
-
#[derive(Debug, Error)]
|
16
|
+
#[derive(Debug, Clone, Error)]
|
20
17
|
pub enum CacheError {
|
21
18
|
#[error("Failed to acquire lock: {0}")]
|
22
19
|
LockError(String),
|
20
|
+
#[error("Failed to convert Ruby String to interned string: {0}")]
|
21
|
+
RStringConversion(String),
|
23
22
|
}
|
24
23
|
|
25
|
-
static STRING_CACHE: LazyLock<Mutex<HashMap<&'static str,
|
24
|
+
static STRING_CACHE: LazyLock<Mutex<HashMap<&'static str, StringCacheKey>>> =
|
26
25
|
LazyLock::new(|| Mutex::new(HashMap::with_capacity(100)));
|
27
26
|
|
28
27
|
pub struct StringCache;
|
@@ -31,10 +30,12 @@ pub struct StringCache;
|
|
31
30
|
pub struct StringCacheKey(&'static str);
|
32
31
|
|
33
32
|
impl StringCacheKey {
|
34
|
-
pub fn new(string: &str) -> Self {
|
33
|
+
pub fn new(string: &str) -> Result<Self, CacheError> {
|
35
34
|
let rstr = RString::new(string);
|
36
35
|
let fstr = rstr.to_interned_str();
|
37
|
-
Self(fstr.as_str().
|
36
|
+
Ok(Self(fstr.as_str().map_err(|e| {
|
37
|
+
CacheError::RStringConversion(e.to_string())
|
38
|
+
})?))
|
38
39
|
}
|
39
40
|
}
|
40
41
|
|
@@ -80,18 +81,16 @@ impl StringCache {
|
|
80
81
|
pub fn intern_many<AsStr: AsRef<str>>(
|
81
82
|
strings: &[AsStr],
|
82
83
|
) -> Result<Vec<StringCacheKey>, CacheError> {
|
83
|
-
let
|
84
|
+
let cache = STRING_CACHE
|
84
85
|
.lock()
|
85
86
|
.map_err(|e| CacheError::LockError(e.to_string()))?;
|
86
87
|
|
87
88
|
let mut result: Vec<StringCacheKey> = Vec::with_capacity(strings.len());
|
88
89
|
for string in strings {
|
89
|
-
if let Some((_,
|
90
|
-
counter.fetch_add(1, Ordering::Relaxed);
|
90
|
+
if let Some((_, interned_string)) = cache.get_key_value(string.as_ref()) {
|
91
91
|
result.push(*interned_string);
|
92
92
|
} else {
|
93
|
-
let interned = StringCacheKey::new(string.as_ref())
|
94
|
-
cache.insert(interned.0, (interned, AtomicU32::new(1)));
|
93
|
+
let interned = StringCacheKey::new(string.as_ref())?;
|
95
94
|
result.push(interned);
|
96
95
|
}
|
97
96
|
}
|
data/ext/osv/src/csv/parser.rs
CHANGED
@@ -19,6 +19,8 @@ pub trait RecordParser<'a> {
|
|
19
19
|
null_string: Option<Cow<'a, str>>,
|
20
20
|
ignore_null_bytes: bool,
|
21
21
|
) -> Self::Output;
|
22
|
+
|
23
|
+
fn uses_headers() -> bool;
|
22
24
|
}
|
23
25
|
|
24
26
|
impl<'a, S: BuildHasher + Default> RecordParser<'a>
|
@@ -26,6 +28,11 @@ impl<'a, S: BuildHasher + Default> RecordParser<'a>
|
|
26
28
|
{
|
27
29
|
type Output = Self;
|
28
30
|
|
31
|
+
#[inline]
|
32
|
+
fn uses_headers() -> bool {
|
33
|
+
true
|
34
|
+
}
|
35
|
+
|
29
36
|
#[inline]
|
30
37
|
fn parse(
|
31
38
|
headers: &[StringCacheKey],
|
@@ -66,6 +73,11 @@ impl<'a, S: BuildHasher + Default> RecordParser<'a>
|
|
66
73
|
impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
|
67
74
|
type Output = Self;
|
68
75
|
|
76
|
+
#[inline]
|
77
|
+
fn uses_headers() -> bool {
|
78
|
+
false
|
79
|
+
}
|
80
|
+
|
69
81
|
#[inline]
|
70
82
|
fn parse(
|
71
83
|
headers: &[StringCacheKey],
|
data/ext/osv/src/csv/record.rs
CHANGED
@@ -26,7 +26,7 @@ impl<S: BuildHasher + Default> IntoValue for CsvRecord<'_, S> {
|
|
26
26
|
let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
|
27
27
|
let mut i = 0;
|
28
28
|
|
29
|
-
for chunk in &map.into_iter().chunks(
|
29
|
+
for chunk in &map.into_iter().chunks(64) {
|
30
30
|
for (k, v) in chunk {
|
31
31
|
values[i] = handle.into_value(k.as_ref());
|
32
32
|
values[i + 1] = handle.into_value(v);
|
@@ -1,18 +1,15 @@
|
|
1
1
|
use flate2::bufread::GzDecoder;
|
2
2
|
use magnus::{
|
3
3
|
value::{Opaque, ReprValue},
|
4
|
-
|
4
|
+
RString, Ruby, Value,
|
5
5
|
};
|
6
6
|
use std::{
|
7
7
|
fs::File,
|
8
8
|
io::{self, BufReader, Read, Write},
|
9
|
-
sync::OnceLock,
|
10
9
|
};
|
11
10
|
|
12
11
|
use super::{builder::ReaderError, record_reader::READ_BUFFER_SIZE};
|
13
12
|
|
14
|
-
static STRING_IO_CLASS: OnceLock<Opaque<RClass>> = OnceLock::new();
|
15
|
-
|
16
13
|
/// A reader that can handle various Ruby input types (String, StringIO, IO-like objects)
|
17
14
|
/// and provide a standard Read implementation for them.
|
18
15
|
pub enum RubyReader {
|
@@ -29,15 +26,6 @@ pub enum RubyReader {
|
|
29
26
|
}
|
30
27
|
|
31
28
|
impl RubyReader {
|
32
|
-
fn is_string_io(ruby: &Ruby, value: &Value) -> bool {
|
33
|
-
let string_io_class = STRING_IO_CLASS.get_or_init(|| {
|
34
|
-
let class = RClass::from_value(ruby.eval("StringIO").expect("Failed to find StringIO"))
|
35
|
-
.expect("Failed to get StringIO class");
|
36
|
-
Opaque::from(class)
|
37
|
-
});
|
38
|
-
value.is_kind_of(ruby.get_inner(*string_io_class))
|
39
|
-
}
|
40
|
-
|
41
29
|
fn is_io_like(value: &Value) -> bool {
|
42
30
|
value.respond_to("read", false).unwrap_or(false)
|
43
31
|
}
|
@@ -48,19 +36,13 @@ impl TryFrom<Value> for RubyReader {
|
|
48
36
|
|
49
37
|
fn try_from(value: Value) -> Result<Self, Self::Error> {
|
50
38
|
let ruby = unsafe { Ruby::get_unchecked() };
|
51
|
-
if RubyReader::
|
52
|
-
let string_content = value.funcall::<_, _, RString>("string", ())?;
|
53
|
-
Ok(RubyReader::String {
|
54
|
-
inner: Opaque::from(string_content),
|
55
|
-
offset: 0,
|
56
|
-
})
|
57
|
-
} else if RubyReader::is_io_like(&value) {
|
39
|
+
if RubyReader::is_io_like(&value) {
|
58
40
|
Ok(RubyReader::RubyIoLike {
|
59
41
|
inner: Opaque::from(value),
|
60
42
|
})
|
61
43
|
} else if value.is_kind_of(ruby.class_string()) {
|
62
44
|
let ruby_string = value.to_r_string()?;
|
63
|
-
let file_path =
|
45
|
+
let file_path = ruby_string.to_string()?;
|
64
46
|
let file = File::open(&file_path)?;
|
65
47
|
|
66
48
|
let x: Box<dyn Read> = if file_path.ends_with(".gz") {
|
data/lib/osv/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-03-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|