osv 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c05364b766bdf78065ac6da9553bfa7992d61691b5ca111609e385ae05ded4e8
4
- data.tar.gz: 6eb41be77ba5dfd567f5014ac13232f93231edba7777ea63039e836840b2673e
3
+ metadata.gz: cae389fff24c3109f17a1c450022771da964e1b7dced4ed2f34f93753c213dc8
4
+ data.tar.gz: 1e3736fb0b84003f62a5038a3d7e71e7d6e31581f943452c2acb08b04a21ff64
5
5
  SHA512:
6
- metadata.gz: 65ede075b21b4a9f172343002943799765a599ed5462384ce21199eae451dd8b7a66fa1eb1809bcaec9e44ef48654a6212b6c765015e338435aa792790931051
7
- data.tar.gz: 4edbfc0d1c152c13e0b679ba035426cbea9fbac7d4a4f5a07ef3c4a9797724aa43ef6ee1424671568770e1e6fb121a3501a32f86fc6ee1aacaccdd85610644ee
6
+ metadata.gz: ddae565f1b208de7fc18fa1cdaff7b3d1ed02ec22bf85d0477a9b152d9238049893a5ef40bad95ac4b2d8f8cb0cd59f14fd4e365aa69f3ef109fa6f6701d2499
7
+ data.tar.gz: aa81197d39f7e3dcc5732bfb7d71545cd9303d888ee7da1a34b0244ade287a8b25db16f7428d4f09689dfe1d874d3663bebb6d546e6b363f7496137339b15150
@@ -169,7 +169,8 @@ impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
169
169
  return Err(ReaderError::InvalidFileDescriptor(fd));
170
170
  }
171
171
 
172
- let file = unsafe { File::from_raw_fd(fd) };
172
+ let file = std::panic::catch_unwind(|| unsafe { File::from_raw_fd(fd) })
173
+ .map_err(|e| ReaderError::FileDescriptor(format!("{:?}", e)))?;
173
174
  let forgotten = ForgottenFileHandle(ManuallyDrop::new(file));
174
175
  Ok(Box::new(forgotten))
175
176
  }
@@ -8,11 +8,11 @@ use std::{
8
8
  collections::HashMap,
9
9
  sync::{
10
10
  atomic::{AtomicU32, Ordering},
11
- Arc, LazyLock, Mutex,
11
+ LazyLock, Mutex,
12
12
  },
13
13
  };
14
14
 
15
- use magnus::{r_string::FString, value::Opaque, IntoValue, RString, Ruby, Value};
15
+ use magnus::{IntoValue, RString, Ruby, Value};
16
16
 
17
17
  use thiserror::Error;
18
18
 
@@ -22,24 +22,25 @@ pub enum CacheError {
22
22
  LockError(String),
23
23
  }
24
24
 
25
- static STRING_CACHE: LazyLock<Mutex<HashMap<&'static str, (Arc<StringCacheKey>, AtomicU32)>>> =
25
+ static STRING_CACHE: LazyLock<Mutex<HashMap<&'static str, (StringCacheKey, AtomicU32)>>> =
26
26
  LazyLock::new(|| Mutex::new(HashMap::with_capacity(100)));
27
27
 
28
28
  pub struct StringCache;
29
29
 
30
- pub struct StringCacheKey(Opaque<FString>, &'static str);
30
+ #[derive(Copy, Clone)]
31
+ pub struct StringCacheKey(&'static str);
31
32
 
32
33
  impl StringCacheKey {
33
34
  pub fn new(string: &str) -> Self {
34
35
  let rstr = RString::new(string);
35
36
  let fstr = rstr.to_interned_str();
36
- Self(Opaque::from(fstr), fstr.as_str().unwrap())
37
+ Self(fstr.as_str().unwrap())
37
38
  }
38
39
  }
39
40
 
40
41
  impl AsRef<str> for StringCacheKey {
41
42
  fn as_ref(&self) -> &'static str {
42
- self.1
43
+ self.0
43
44
  }
44
45
  }
45
46
 
@@ -57,13 +58,13 @@ impl IntoValue for &StringCacheKey {
57
58
 
58
59
  impl std::fmt::Debug for StringCacheKey {
59
60
  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60
- self.1.fmt(f)
61
+ self.0.fmt(f)
61
62
  }
62
63
  }
63
64
 
64
65
  impl PartialEq for StringCacheKey {
65
66
  fn eq(&self, other: &Self) -> bool {
66
- self.1 == other.1
67
+ self.0 == other.0
67
68
  }
68
69
  }
69
70
 
@@ -71,42 +72,26 @@ impl std::cmp::Eq for StringCacheKey {}
71
72
 
72
73
  impl std::hash::Hash for StringCacheKey {
73
74
  fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
74
- self.1.hash(state);
75
+ self.0.hash(state);
75
76
  }
76
77
  }
77
78
 
78
79
  impl StringCache {
79
- #[allow(dead_code)]
80
- pub fn intern(string: String) -> Result<Arc<StringCacheKey>, CacheError> {
80
+ pub fn intern_many<AsStr: AsRef<str>>(
81
+ strings: &[AsStr],
82
+ ) -> Result<Vec<StringCacheKey>, CacheError> {
81
83
  let mut cache = STRING_CACHE
82
84
  .lock()
83
85
  .map_err(|e| CacheError::LockError(e.to_string()))?;
84
86
 
85
- if let Some((_, (interned_string, counter))) = cache.get_key_value(string.as_str()) {
86
- counter.fetch_add(1, Ordering::Relaxed);
87
- Ok(interned_string.clone())
88
- } else {
89
- let interned = Arc::new(StringCacheKey::new(string.as_str()));
90
- let leaked = Box::leak(string.into_boxed_str());
91
- cache.insert(leaked, (interned.clone(), AtomicU32::new(1)));
92
- Ok(interned)
93
- }
94
- }
95
-
96
- pub fn intern_many(strings: &[String]) -> Result<Vec<Arc<StringCacheKey>>, CacheError> {
97
- let mut cache = STRING_CACHE
98
- .lock()
99
- .map_err(|e| CacheError::LockError(e.to_string()))?;
100
-
101
- let mut result: Vec<Arc<StringCacheKey>> = Vec::with_capacity(strings.len());
87
+ let mut result: Vec<StringCacheKey> = Vec::with_capacity(strings.len());
102
88
  for string in strings {
103
- if let Some((_, (interned_string, counter))) = cache.get_key_value(string.as_str()) {
89
+ if let Some((_, (interned_string, counter))) = cache.get_key_value(string.as_ref()) {
104
90
  counter.fetch_add(1, Ordering::Relaxed);
105
- result.push(interned_string.clone());
91
+ result.push(*interned_string);
106
92
  } else {
107
- let interned = Arc::new(StringCacheKey::new(string));
108
- let leaked = Box::leak(string.clone().into_boxed_str());
109
- cache.insert(leaked, (interned.clone(), AtomicU32::new(1)));
93
+ let interned = StringCacheKey::new(string.as_ref());
94
+ cache.insert(interned.0, (interned, AtomicU32::new(1)));
110
95
  result.push(interned);
111
96
  }
112
97
  }
@@ -1,7 +1,6 @@
1
1
  use std::borrow::Cow;
2
2
  use std::collections::HashMap;
3
3
  use std::hash::BuildHasher;
4
- use std::sync::Arc;
5
4
 
6
5
  use super::header_cache::StringCacheKey;
7
6
  use super::CowStr;
@@ -15,7 +14,7 @@ pub trait RecordParser<'a> {
15
14
  type Output;
16
15
 
17
16
  fn parse(
18
- headers: &[Arc<StringCacheKey>],
17
+ headers: &[StringCacheKey],
19
18
  record: &CsvRecordType,
20
19
  null_string: Option<Cow<'a, str>>,
21
20
  ignore_null_bytes: bool,
@@ -23,46 +22,38 @@ pub trait RecordParser<'a> {
23
22
  }
24
23
 
25
24
  impl<'a, S: BuildHasher + Default> RecordParser<'a>
26
- for HashMap<Arc<StringCacheKey>, Option<CowStr<'a>>, S>
25
+ for HashMap<StringCacheKey, Option<CowStr<'a>>, S>
27
26
  {
28
27
  type Output = Self;
29
28
 
30
29
  #[inline]
31
30
  fn parse(
32
- headers: &[Arc<StringCacheKey>],
31
+ headers: &[StringCacheKey],
33
32
  record: &CsvRecordType,
34
33
  null_string: Option<Cow<'a, str>>,
35
34
  ignore_null_bytes: bool,
36
35
  ) -> Self::Output {
37
36
  let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
38
-
39
37
  let shared_empty = Cow::Borrowed("");
40
38
 
41
39
  headers.iter().enumerate().for_each(|(i, header)| {
42
40
  let value = match record {
43
41
  CsvRecordType::String(s) => s.get(i).and_then(|field| {
44
- if null_string.as_deref() == Some(field.as_ref()) {
45
- None
46
- } else if field.is_empty() {
47
- Some(CowStr(shared_empty.clone()))
48
- } else if ignore_null_bytes {
49
- Some(CowStr(Cow::Owned(field.replace("\0", ""))))
50
- } else {
51
- Some(CowStr(Cow::Owned(field.to_string())))
52
- }
42
+ convert_field_to_cow_str(
43
+ field,
44
+ null_string.as_deref(),
45
+ ignore_null_bytes,
46
+ &shared_empty,
47
+ )
53
48
  }),
54
-
55
49
  CsvRecordType::Byte(b) => b.get(i).and_then(|field| {
56
50
  let field = String::from_utf8_lossy(field);
57
- if null_string.as_deref() == Some(field.as_ref()) {
58
- None
59
- } else if field.is_empty() {
60
- Some(CowStr(shared_empty.clone()))
61
- } else if ignore_null_bytes {
62
- Some(CowStr(Cow::Owned(field.replace("\0", ""))))
63
- } else {
64
- Some(CowStr(Cow::Owned(field.to_string())))
65
- }
51
+ convert_field_to_cow_str(
52
+ &field,
53
+ null_string.as_deref(),
54
+ ignore_null_bytes,
55
+ &shared_empty,
56
+ )
66
57
  }),
67
58
  };
68
59
 
@@ -77,43 +68,36 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
77
68
 
78
69
  #[inline]
79
70
  fn parse(
80
- headers: &[Arc<StringCacheKey>],
71
+ headers: &[StringCacheKey],
81
72
  record: &CsvRecordType,
82
73
  null_string: Option<Cow<'a, str>>,
83
74
  ignore_null_bytes: bool,
84
75
  ) -> Self::Output {
85
76
  let target_len = headers.len();
86
77
  let mut vec = Vec::with_capacity(target_len);
87
-
88
78
  let shared_empty = Cow::Borrowed("");
89
79
 
90
80
  match record {
91
81
  CsvRecordType::String(record) => {
92
82
  for field in record.iter() {
93
- let value = if Some(field.as_ref()) == null_string.as_deref() {
94
- None
95
- } else if field.is_empty() {
96
- Some(CowStr(shared_empty.clone()))
97
- } else if ignore_null_bytes {
98
- Some(CowStr(Cow::Owned(field.replace("\0", ""))))
99
- } else {
100
- Some(CowStr(Cow::Owned(field.to_string())))
101
- };
83
+ let value = convert_field_to_cow_str(
84
+ field,
85
+ null_string.as_deref(),
86
+ ignore_null_bytes,
87
+ &shared_empty,
88
+ );
102
89
  vec.push(value);
103
90
  }
104
91
  }
105
92
  CsvRecordType::Byte(record) => {
106
93
  for field in record.iter() {
107
94
  let field = String::from_utf8_lossy(field);
108
- let value = if Some(field.as_ref()) == null_string.as_deref() {
109
- None
110
- } else if field.is_empty() {
111
- Some(CowStr(shared_empty.clone()))
112
- } else if ignore_null_bytes {
113
- Some(CowStr(Cow::Owned(field.replace("\0", ""))))
114
- } else {
115
- Some(CowStr(Cow::Owned(field.to_string())))
116
- };
95
+ let value = convert_field_to_cow_str(
96
+ &field,
97
+ null_string.as_deref(),
98
+ ignore_null_bytes,
99
+ &shared_empty,
100
+ );
117
101
  vec.push(value);
118
102
  }
119
103
  }
@@ -122,3 +106,21 @@ impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
122
106
  vec
123
107
  }
124
108
  }
109
+
110
+ #[inline]
111
+ fn convert_field_to_cow_str<'a>(
112
+ field: &str,
113
+ null_string: Option<&str>,
114
+ ignore_null_bytes: bool,
115
+ shared_empty: &Cow<'a, str>,
116
+ ) -> Option<CowStr<'a>> {
117
+ if Some(field) == null_string {
118
+ None
119
+ } else if field.is_empty() {
120
+ Some(CowStr(shared_empty.clone()))
121
+ } else if ignore_null_bytes {
122
+ Some(CowStr(Cow::Owned(field.replace("\0", ""))))
123
+ } else {
124
+ Some(CowStr(Cow::Owned(field.to_string())))
125
+ }
126
+ }
@@ -1,13 +1,13 @@
1
1
  use itertools::Itertools;
2
2
  use magnus::{value::ReprValue, IntoValue, Ruby, Value};
3
- use std::{borrow::Cow, collections::HashMap, hash::BuildHasher, sync::Arc};
3
+ use std::{borrow::Cow, collections::HashMap, hash::BuildHasher};
4
4
 
5
5
  use super::StringCacheKey;
6
6
 
7
7
  #[derive(Debug)]
8
8
  pub enum CsvRecord<'a, S: BuildHasher + Default> {
9
9
  Vec(Vec<Option<CowStr<'a>>>),
10
- Map(HashMap<Arc<StringCacheKey>, Option<CowStr<'a>>, S>),
10
+ Map(HashMap<StringCacheKey, Option<CowStr<'a>>, S>),
11
11
  }
12
12
 
13
13
  impl<S: BuildHasher + Default> IntoValue for CsvRecord<'_, S> {
@@ -5,7 +5,6 @@ use super::ruby_reader::SeekableRead;
5
5
  use magnus::{Error, Ruby};
6
6
  use std::borrow::Cow;
7
7
  use std::io::{BufReader, Read};
8
- use std::sync::Arc;
9
8
 
10
9
  /// Size of the internal buffer used for reading CSV records
11
10
  pub(crate) const READ_BUFFER_SIZE: usize = 16384;
@@ -15,7 +14,7 @@ pub(crate) const READ_BUFFER_SIZE: usize = 16384;
15
14
  /// This struct implements Iterator to provide a streaming interface for CSV records.
16
15
  pub struct RecordReader<'a, T: RecordParser<'a>> {
17
16
  reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
18
- headers: Vec<Arc<StringCacheKey>>,
17
+ headers: Vec<StringCacheKey>,
19
18
  null_string: Option<Cow<'a, str>>,
20
19
  string_record: CsvRecordType,
21
20
  parser: std::marker::PhantomData<T>,
@@ -75,7 +74,7 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
75
74
  /// Creates a new RecordReader instance.
76
75
  pub(crate) fn new(
77
76
  reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
78
- headers: Vec<Arc<StringCacheKey>>,
77
+ headers: Vec<StringCacheKey>,
79
78
  null_string: Option<Cow<'a, str>>,
80
79
  ignore_null_bytes: bool,
81
80
  lossy: bool,
@@ -5,7 +5,6 @@ use csv::Trim;
5
5
  use magnus::value::ReprValue;
6
6
  use magnus::{Error, IntoValue, KwArgs, RHash, Ruby, Symbol, Value};
7
7
  use std::collections::HashMap;
8
- use std::sync::Arc;
9
8
 
10
9
  /// Valid result types for CSV parsing
11
10
  #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -94,7 +93,7 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
94
93
  match result_type {
95
94
  ResultType::Hash => {
96
95
  let builder = RecordReaderBuilder::<
97
- HashMap<Arc<StringCacheKey>, Option<CowStr<'_>>, RandomState>,
96
+ HashMap<StringCacheKey, Option<CowStr<'_>>, RandomState>,
98
97
  >::new(ruby, to_read)
99
98
  .has_headers(has_headers)
100
99
  .flexible(flexible)
@@ -109,7 +108,7 @@ pub fn parse_csv(rb_self: Value, args: &[Value]) -> Result<Value, Error> {
109
108
  let ruby = unsafe { Ruby::get_unchecked() };
110
109
  for result in builder {
111
110
  let record = result?;
112
- let _: Value = ruby.yield_value(CsvRecord::Map(record))?;
111
+ let _: Value = ruby.yield_value(CsvRecord::<ahash::RandomState>::Map(record))?;
113
112
  }
114
113
  }
115
114
  ResultType::Array => {
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.4.2"
2
+ VERSION = "0.4.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko