osv 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 137ae556685639f7d13234e3061d9b310757ce02f75a713753d175f1bc71b628
4
- data.tar.gz: 5892494ad08d783955d2b932150d65433a4d3593376fadbaf54e54780e7a350f
3
+ metadata.gz: cae389fff24c3109f17a1c450022771da964e1b7dced4ed2f34f93753c213dc8
4
+ data.tar.gz: 1e3736fb0b84003f62a5038a3d7e71e7d6e31581f943452c2acb08b04a21ff64
5
5
  SHA512:
6
- metadata.gz: 6efbc2ee65a8e79379722ae977ee7dbec6131b78968d080f9feb86a3310368c387da54dd8c073e9b4008cb80d906293ea9115982d00d5ff637cf5ab51179b53c
7
- data.tar.gz: 7b4ab3199f90654cd831dfbb52a9d22b70237e7120bd5308a1b7698268fa981abefd7ee47d53424d0c7bff46956256db8f1e139d17e381fd5570a16ca183e376
6
+ metadata.gz: ddae565f1b208de7fc18fa1cdaff7b3d1ed02ec22bf85d0477a9b152d9238049893a5ef40bad95ac4b2d8f8cb0cd59f14fd4e365aa69f3ef109fa6f6701d2499
7
+ data.tar.gz: aa81197d39f7e3dcc5732bfb7d71545cd9303d888ee7da1a34b0244ade287a8b25db16f7428d4f09689dfe1d874d3663bebb6d546e6b363f7496137339b15150
@@ -169,7 +169,8 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
169
169
  return Err(ReaderError::InvalidFileDescriptor(fd));
170
170
  }
171
171
 
172
- let file = unsafe { File::from_raw_fd(fd) };
172
+ let file = std::panic::catch_unwind(|| unsafe { File::from_raw_fd(fd) })
173
+ .map_err(|e| ReaderError::FileDescriptor(format!("{:?}", e)))?;
173
174
  let forgotten = ForgottenFileHandle(ManuallyDrop::new(file));
174
175
  Ok(Box::new(forgotten))
175
176
  }
@@ -8,11 +8,11 @@ use std::{
8
8
  collections::HashMap,
9
9
  sync::{
10
10
  atomic::{AtomicU32, Ordering},
11
- Arc, LazyLock, Mutex, OnceLock,
11
+ LazyLock, Mutex,
12
12
  },
13
13
  };
14
14
 
15
- use magnus::{r_string::FString, value::Opaque, IntoValue, RString, Ruby, Value};
15
+ use magnus::{IntoValue, RString, Ruby, Value};
16
16
 
17
17
  use thiserror::Error;
18
18
 
@@ -22,24 +22,25 @@ pub enum CacheError {
22
22
  LockError(String),
23
23
  }
24
24
 
25
- static STRING_CACHE: LazyLock<Mutex<HashMap<&'static str, (Arc<StringCacheKey>, AtomicU32)>>> =
25
+ static STRING_CACHE: LazyLock<Mutex<HashMap<&'static str, (StringCacheKey, AtomicU32)>>> =
26
26
  LazyLock::new(|| Mutex::new(HashMap::with_capacity(100)));
27
27
 
28
28
  pub struct StringCache;
29
29
 
30
- pub struct StringCacheKey(Opaque<FString>, &'static str);
30
+ #[derive(Copy, Clone)]
31
+ pub struct StringCacheKey(&'static str);
31
32
 
32
33
  impl StringCacheKey {
33
34
  pub fn new(string: &str) -> Self {
34
35
  let rstr = RString::new(string);
35
36
  let fstr = rstr.to_interned_str();
36
- Self(Opaque::from(fstr), fstr.as_str().unwrap())
37
+ Self(fstr.as_str().unwrap())
37
38
  }
38
39
  }
39
40
 
40
41
  impl AsRef<str> for StringCacheKey {
41
42
  fn as_ref(&self) -> &'static str {
42
- self.1
43
+ self.0
43
44
  }
44
45
  }
45
46
 
@@ -57,13 +58,13 @@ impl IntoValue for &StringCacheKey {
57
58
 
58
59
  impl std::fmt::Debug for StringCacheKey {
59
60
  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60
- self.1.fmt(f)
61
+ self.0.fmt(f)
61
62
  }
62
63
  }
63
64
 
64
65
  impl PartialEq for StringCacheKey {
65
66
  fn eq(&self, other: &Self) -> bool {
66
- self.1 == other.1
67
+ self.0 == other.0
67
68
  }
68
69
  }
69
70
 
@@ -71,95 +72,29 @@ impl std::cmp::Eq for StringCacheKey {}
71
72
 
72
73
  impl std::hash::Hash for StringCacheKey {
73
74
  fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
74
- self.1.hash(state);
75
+ self.0.hash(state);
75
76
  }
76
77
  }
77
78
 
78
79
  impl StringCache {
79
- #[allow(dead_code)]
80
- pub fn intern(string: String) -> Result<Arc<StringCacheKey>, CacheError> {
80
+ pub fn intern_many<AsStr: AsRef<str>>(
81
+ strings: &[AsStr],
82
+ ) -> Result<Vec<StringCacheKey>, CacheError> {
81
83
  let mut cache = STRING_CACHE
82
84
  .lock()
83
85
  .map_err(|e| CacheError::LockError(e.to_string()))?;
84
86
 
85
- if let Some((_, (interned_string, counter))) = cache.get_key_value(string.as_str()) {
86
- counter.fetch_add(1, Ordering::Relaxed);
87
- Ok(interned_string.clone())
88
- } else {
89
- let interned = Arc::new(StringCacheKey::new(string.as_str()));
90
- let leaked = Box::leak(string.into_boxed_str());
91
- cache.insert(leaked, (interned.clone(), AtomicU32::new(1)));
92
- Ok(interned)
93
- }
94
- }
95
-
96
- pub fn intern_many(strings: &[String]) -> Result<Vec<Arc<StringCacheKey>>, CacheError> {
97
- let mut cache = STRING_CACHE
98
- .lock()
99
- .map_err(|e| CacheError::LockError(e.to_string()))?;
100
-
101
- let mut result: Vec<Arc<StringCacheKey>> = Vec::with_capacity(strings.len());
87
+ let mut result: Vec<StringCacheKey> = Vec::with_capacity(strings.len());
102
88
  for string in strings {
103
- if let Some((_, (interned_string, counter))) = cache.get_key_value(string.as_str()) {
89
+ if let Some((_, (interned_string, counter))) = cache.get_key_value(string.as_ref()) {
104
90
  counter.fetch_add(1, Ordering::Relaxed);
105
- result.push(interned_string.clone());
91
+ result.push(*interned_string);
106
92
  } else {
107
- let interned = Arc::new(StringCacheKey::new(string));
108
- let leaked = Box::leak(string.clone().into_boxed_str());
109
- cache.insert(leaked, (interned.clone(), AtomicU32::new(1)));
93
+ let interned = StringCacheKey::new(string.as_ref());
94
+ cache.insert(interned.0, (interned, AtomicU32::new(1)));
110
95
  result.push(interned);
111
96
  }
112
97
  }
113
98
  Ok(result)
114
99
  }
115
-
116
- pub fn clear(headers: &[Arc<StringCacheKey>]) -> Result<(), CacheError> {
117
- let mut cache = STRING_CACHE
118
- .lock()
119
- .map_err(|e| CacheError::LockError(e.to_string()))?;
120
-
121
- let to_remove: Vec<_> = headers
122
- .iter()
123
- .filter_map(|header| {
124
- let key = header.as_ref().as_ref();
125
- if let Some((_, (_, counter))) = cache.get_key_value(key) {
126
- let prev_count = counter.fetch_sub(1, Ordering::Relaxed);
127
- if prev_count == 1 {
128
- Some(key)
129
- } else {
130
- None
131
- }
132
- } else {
133
- None
134
- }
135
- })
136
- .collect();
137
-
138
- for key in to_remove {
139
- cache.remove(key);
140
- }
141
-
142
- Ok(())
143
- }
144
- }
145
-
146
- pub struct HeaderCacheCleanupIter<I> {
147
- pub inner: I,
148
- pub headers: OnceLock<Vec<Arc<StringCacheKey>>>,
149
- }
150
-
151
- impl<I: Iterator> Iterator for HeaderCacheCleanupIter<I> {
152
- type Item = I::Item;
153
-
154
- fn next(&mut self) -> Option<Self::Item> {
155
- self.inner.next()
156
- }
157
- }
158
-
159
- impl<I> Drop for HeaderCacheCleanupIter<I> {
160
- fn drop(&mut self) {
161
- if let Some(headers) = self.headers.get() {
162
- StringCache::clear(headers).unwrap();
163
- }
164
- }
165
100
  }
@@ -1,7 +1,6 @@
1
1
  use std::borrow::Cow;
2
2
  use std::collections::HashMap;
3
3
  use std::hash::BuildHasher;
4
- use std::sync::Arc;
5
4
 
6
5
  use super::header_cache::StringCacheKey;
7
6
  use super::CowStr;
@@ -15,7 +14,7 @@ pub trait RecordParser<'a> {
15
14
  type Output;
16
15
 
17
16
  fn parse(
18
- headers: &[Arc<StringCacheKey>],
17
+ headers: &[StringCacheKey],
19
18
  record: &CsvRecordType,
20
19
  null_string: Option<Cow<'a, str>>,
21
20
  ignore_null_bytes: bool,
@@ -23,46 +22,38 @@ pub trait RecordParser<'a> {
23
22
  }
24
23
 
25
24
  impl<'a, S: BuildHasher + Default> RecordParser<'a>
26
- for HashMap<Arc<StringCacheKey>, Option<CowStr<'a>>, S>
25
+ for HashMap<StringCacheKey, Option<CowStr<'a>>, S>
27
26
  {
28
27
  type Output = Self;
29
28
 
30
29
  #[inline]
31
30
  fn parse(
32
- headers: &[Arc<StringCacheKey>],
31
+ headers: &[StringCacheKey],
33
32
  record: &CsvRecordType,
34
33
  null_string: Option<Cow<'a, str>>,
35
34
  ignore_null_bytes: bool,
36
35
  ) -> Self::Output {
37
36
  let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
38
-
39
37
  let shared_empty = Cow::Borrowed("");
40
38
 
41
39
  headers.iter().enumerate().for_each(|(i, header)| {
42
40
  let value = match record {
43
41
  CsvRecordType::String(s) => s.get(i).and_then(|field| {
44
- if null_string.as_deref() == Some(field.as_ref()) {
45
- None
46
- } else if field.is_empty() {
47
- Some(CowStr(shared_empty.clone()))
48
- } else if ignore_null_bytes {
49
- Some(CowStr(Cow::Owned(field.replace("\0", ""))))
50
- } else {
51
- Some(CowStr(Cow::Owned(field.to_string())))
52
- }
42
+ convert_field_to_cow_str(
43
+ field,
44
+ null_string.as_deref(),
45
+ ignore_null_bytes,
46
+ &shared_empty,
47
+ )
53
48
  }),
54
-
55
49
  CsvRecordType::Byte(b) => b.get(i).and_then(|field| {
56
50
  let field = String::from_utf8_lossy(field);
57
- if null_string.as_deref() == Some(field.as_ref()) {
58
- None
59
- } else if field.is_empty() {
60
- Some(CowStr(shared_empty.clone()))
61
- } else if ignore_null_bytes {
62
- Some(CowStr(Cow::Owned(field.replace("\0", ""))))
63
- } else {
64
- Some(CowStr(Cow::Owned(field.to_string())))
65
- }
51
+ convert_field_to_cow_str(
52
+ &field,
53
+ null_string.as_deref(),
54
+ ignore_null_bytes,
55
+ &shared_empty,
56
+ )
66
57
  }),
67
58
  };
68
59
 
@@ -77,43 +68,36 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
77
68
 
78
69
  #[inline]
79
70
  fn parse(
80
- headers: &[Arc<StringCacheKey>],
71
+ headers: &[StringCacheKey],
81
72
  record: &CsvRecordType,
82
73
  null_string: Option<Cow<'a, str>>,
83
74
  ignore_null_bytes: bool,
84
75
  ) -> Self::Output {
85
76
  let target_len = headers.len();
86
77
  let mut vec = Vec::with_capacity(target_len);
87
-
88
78
  let shared_empty = Cow::Borrowed("");
89
79
 
90
80
  match record {
91
81
  CsvRecordType::String(record) => {
92
82
  for field in record.iter() {
93
- let value = if Some(field.as_ref()) == null_string.as_deref() {
94
- None
95
- } else if field.is_empty() {
96
- Some(CowStr(shared_empty.clone()))
97
- } else if ignore_null_bytes {
98
- Some(CowStr(Cow::Owned(field.replace("\0", ""))))
99
- } else {
100
- Some(CowStr(Cow::Owned(field.to_string())))
101
- };
83
+ let value = convert_field_to_cow_str(
84
+ field,
85
+ null_string.as_deref(),
86
+ ignore_null_bytes,
87
+ &shared_empty,
88
+ );
102
89
  vec.push(value);
103
90
  }
104
91
  }
105
92
  CsvRecordType::Byte(record) => {
106
93
  for field in record.iter() {
107
94
  let field = String::from_utf8_lossy(field);
108
- let value = if Some(field.as_ref()) == null_string.as_deref() {
109
- None
110
- } else if field.is_empty() {
111
- Some(CowStr(shared_empty.clone()))
112
- } else if ignore_null_bytes {
113
- Some(CowStr(Cow::Owned(field.replace("\0", ""))))
114
- } else {
115
- Some(CowStr(Cow::Owned(field.to_string())))
116
- };
95
+ let value = convert_field_to_cow_str(
96
+ &field,
97
+ null_string.as_deref(),
98
+ ignore_null_bytes,
99
+ &shared_empty,
100
+ );
117
101
  vec.push(value);
118
102
  }
119
103
  }
@@ -122,3 +106,21 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
122
106
  vec
123
107
  }
124
108
  }
109
+
110
+ #[inline]
111
+ fn convert_field_to_cow_str<'a>(
112
+ field: &str,
113
+ null_string: Option<&str>,
114
+ ignore_null_bytes: bool,
115
+ shared_empty: &Cow<'a, str>,
116
+ ) -> Option<CowStr<'a>> {
117
+ if Some(field) == null_string {
118
+ None
119
+ } else if field.is_empty() {
120
+ Some(CowStr(shared_empty.clone()))
121
+ } else if ignore_null_bytes {
122
+ Some(CowStr(Cow::Owned(field.replace("\0", ""))))
123
+ } else {
124
+ Some(CowStr(Cow::Owned(field.to_string())))
125
+ }
126
+ }
@@ -1,13 +1,13 @@
1
1
  use itertools::Itertools;
2
2
  use magnus::{value::ReprValue, IntoValue, Ruby, Value};
3
- use std::{borrow::Cow, collections::HashMap, hash::BuildHasher, sync::Arc};
3
+ use std::{borrow::Cow, collections::HashMap, hash::BuildHasher};
4
4
 
5
5
  use super::StringCacheKey;
6
6
 
7
7
  #[derive(Debug)]
8
8
  pub enum CsvRecord<'a, S: BuildHasher + Default> {
9
9
  Vec(Vec<Option<CowStr<'a>>>),
10
- Map(HashMap<Arc<StringCacheKey>, Option<CowStr<'a>>, S>),
10
+ Map(HashMap<StringCacheKey, Option<CowStr<'a>>, S>),
11
11
  }
12
12
 
13
13
  impl<S: BuildHasher + Default> IntoValue for CsvRecord<'_, S> {
@@ -1,11 +1,10 @@
1
1
  use super::builder::ReaderError;
2
2
  use super::header_cache::StringCacheKey;
3
3
  use super::parser::{CsvRecordType, RecordParser};
4
- use super::{header_cache::StringCache, ruby_reader::SeekableRead};
4
+ use super::ruby_reader::SeekableRead;
5
5
  use magnus::{Error, Ruby};
6
6
  use std::borrow::Cow;
7
7
  use std::io::{BufReader, Read};
8
- use std::sync::Arc;
9
8
 
10
9
  /// Size of the internal buffer used for reading CSV records
11
10
  pub(crate) const READ_BUFFER_SIZE: usize = 16384;
@@ -15,7 +14,7 @@ pub(crate) const READ_BUFFER_SIZE: usize = 16384;
15
14
  /// This struct implements Iterator to provide a streaming interface for CSV records.
16
15
  pub struct RecordReader<'a, T: RecordParser<'a>> {
17
16
  reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
18
- headers: Vec<Arc<StringCacheKey>>,
17
+ headers: Vec<StringCacheKey>,
19
18
  null_string: Option<Cow<'a, str>>,
20
19
  string_record: CsvRecordType,
21
20
  parser: std::marker::PhantomData<T>,
@@ -75,7 +74,7 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
75
74
  /// Creates a new RecordReader instance.
76
75
  pub(crate) fn new(
77
76
  reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
78
- headers: Vec<Arc<StringCacheKey>>,
77
+ headers: Vec<StringCacheKey>,
79
78
  null_string: Option<Cow<'a, str>>,
80
79
  ignore_null_bytes: bool,
81
80
  lossy: bool,
@@ -137,11 +136,3 @@ impl<'a, T: RecordParser<'a>> Iterator for RecordReader<'a, T> {
137
136
  (0, None) // Cannot determine size without reading entire file
138
137
  }
139
138
  }
140
-
141
- impl<'a, T: RecordParser<'a>> Drop for RecordReader<'a, T> {
142
- #[inline]
143
- fn drop(&mut self) {
144
- // Intentionally ignore errors during cleanup as there's no meaningful way to handle them
145
- let _ = StringCache::clear(&self.headers);
146
- }
147
- }
@@ -5,7 +5,6 @@ use csv::Trim;
5
5
  use magnus::value::ReprValue;
6
6
  use magnus::{Error, IntoValue, KwArgs, RHash, Ruby, Symbol, Value};
7
7
  use std::collections::HashMap;
8
- use std::sync::Arc;
9
8
 
10
9
  /// Valid result types for CSV parsing
11
10
  #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -94,7 +93,7 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
94
93
  match result_type {
95
94
  ResultType::Hash => {
96
95
  let builder = RecordReaderBuilder::<
97
- HashMap<Arc<StringCacheKey>, Option<CowStr<'static>>, RandomState>,
96
+ HashMap<StringCacheKey, Option<CowStr<'_>>, RandomState>,
98
97
  >::new(ruby, to_read)
99
98
  .has_headers(has_headers)
100
99
  .flexible(flexible)
@@ -109,11 +108,11 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
109
108
  let ruby = unsafe { Ruby::get_unchecked() };
110
109
  for result in builder {
111
110
  let record = result?;
112
- let _: Value = ruby.yield_value(CsvRecord::Map(record))?;
111
+ let _: Value = ruby.yield_value(CsvRecord::<ahash::RandomState>::Map(record))?;
113
112
  }
114
113
  }
115
114
  ResultType::Array => {
116
- let builder = RecordReaderBuilder::<Vec<Option<CowStr<'static>>>>::new(ruby, to_read)
115
+ let builder = RecordReaderBuilder::<Vec<Option<CowStr<'_>>>>::new(ruby, to_read)
117
116
  .has_headers(has_headers)
118
117
  .flexible(flexible)
119
118
  .trim(trim)
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.4.1"
2
+ VERSION = "0.4.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko