osv 0.3.15 → 0.3.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- use crate::csv::{CowValue, CsvRecord, RecordReaderBuilder};
1
+ use crate::csv::{CowStr, CsvRecord, RecordReaderBuilder, StringCacheKey};
2
2
  use crate::utils::*;
3
3
  use ahash::RandomState;
4
4
  use csv::Trim;
@@ -6,12 +6,49 @@ use magnus::value::ReprValue;
6
6
  use magnus::{block::Yield, Error, KwArgs, RHash, Ruby, Symbol, Value};
7
7
  use std::collections::HashMap;
8
8
 
9
+ /// Valid result types for CSV parsing
10
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
11
+ enum ResultType {
12
+ Hash,
13
+ Array,
14
+ }
15
+
16
+ impl ResultType {
17
+ fn from_str(s: &str) -> Option<Self> {
18
+ match s {
19
+ "hash" => Some(Self::Hash),
20
+ "array" => Some(Self::Array),
21
+ _ => None,
22
+ }
23
+ }
24
+ }
25
+
26
+ /// Arguments for creating an enumerator
27
+ #[derive(Debug)]
28
+ struct EnumeratorArgs {
29
+ rb_self: Value,
30
+ to_read: Value,
31
+ has_headers: bool,
32
+ delimiter: u8,
33
+ quote_char: u8,
34
+ null_string: Option<String>,
35
+ result_type: String,
36
+ flexible: bool,
37
+ flexible_default: Option<String>,
38
+ trim: Option<String>,
39
+ }
40
+
41
+ /// Parses a CSV file with the given configuration.
42
+ ///
43
+ /// # Safety
44
+ /// This function uses unsafe code to get the Ruby runtime and leak memory for static references.
45
+ /// This is necessary for Ruby integration but should be used with caution.
9
46
  pub fn parse_csv(
10
47
  rb_self: Value,
11
48
  args: &[Value],
12
49
  ) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
13
- let original = unsafe { Ruby::get_unchecked() };
14
- let ruby: &'static Ruby = Box::leak(Box::new(original));
50
+ // SAFETY: We're in a Ruby callback, so Ruby runtime is guaranteed to be initialized
51
+ let ruby = unsafe { Ruby::get_unchecked() };
15
52
 
16
53
  let ReadCsvArgs {
17
54
  to_read,
@@ -19,16 +56,11 @@ pub fn parse_csv(
19
56
  delimiter,
20
57
  quote_char,
21
58
  null_string,
22
- buffer_size,
23
59
  result_type,
24
60
  flexible,
25
61
  flexible_default,
26
62
  trim,
27
- } = parse_read_csv_args(ruby, args)?;
28
-
29
- let flexible_default: &'static Option<String> = Box::leak(Box::new(flexible_default));
30
- let leaked_flexible_default: &'static Option<&str> =
31
- Box::leak(Box::new(flexible_default.as_deref()));
63
+ } = parse_read_csv_args(&ruby, args)?;
32
64
 
33
65
  if !ruby.block_given() {
34
66
  return create_enumerator(EnumeratorArgs {
@@ -38,10 +70,9 @@ pub fn parse_csv(
38
70
  delimiter,
39
71
  quote_char,
40
72
  null_string,
41
- buffer_size,
42
- result_type,
73
+ result_type: result_type,
43
74
  flexible,
44
- flexible_default: leaked_flexible_default.as_deref(),
75
+ flexible_default: flexible_default,
45
76
  trim: match trim {
46
77
  Trim::All => Some("all".to_string()),
47
78
  Trim::Headers => Some("headers".to_string()),
@@ -51,60 +82,47 @@ pub fn parse_csv(
51
82
  });
52
83
  }
53
84
 
54
- let iter: Box<dyn Iterator<Item = CsvRecord<RandomState>>> = match result_type.as_str() {
55
- "hash" => {
85
+ let result_type = ResultType::from_str(&result_type).ok_or_else(|| {
86
+ Error::new(
87
+ ruby.exception_runtime_error(),
88
+ "Invalid result type, expected 'hash' or 'array'",
89
+ )
90
+ })?;
91
+
92
+ let iter: Box<dyn Iterator<Item = CsvRecord<RandomState>>> = match result_type {
93
+ ResultType::Hash => {
56
94
  let builder = RecordReaderBuilder::<
57
- HashMap<&'static str, Option<CowValue<'static>>, RandomState>,
95
+ HashMap<StringCacheKey, Option<CowStr<'static>>, RandomState>,
58
96
  >::new(ruby, to_read)
59
97
  .has_headers(has_headers)
60
98
  .flexible(flexible)
61
- .flexible_default(flexible_default.as_deref())
99
+ .flexible_default(flexible_default)
62
100
  .trim(trim)
63
101
  .delimiter(delimiter)
64
102
  .quote_char(quote_char)
65
- .null_string(null_string)
66
- .buffer(buffer_size);
103
+ .null_string(null_string);
67
104
 
68
- Box::new(builder.build_threaded()?.map(CsvRecord::Map))
105
+ Box::new(builder.build()?.map(CsvRecord::Map))
69
106
  }
70
- "array" => Box::new(
71
- RecordReaderBuilder::<Vec<Option<CowValue<'static>>>>::new(ruby, to_read)
107
+ ResultType::Array => {
108
+ let builder = RecordReaderBuilder::<Vec<Option<CowStr<'static>>>>::new(ruby, to_read)
72
109
  .has_headers(has_headers)
73
110
  .flexible(flexible)
74
- .flexible_default(flexible_default.as_deref())
111
+ .flexible_default(flexible_default)
75
112
  .trim(trim)
76
113
  .delimiter(delimiter)
77
114
  .quote_char(quote_char)
78
115
  .null_string(null_string)
79
- .buffer(buffer_size)
80
- .build_threaded()?
81
- .map(CsvRecord::Vec),
82
- ),
83
- _ => {
84
- return Err(Error::new(
85
- ruby.exception_runtime_error(),
86
- "Invalid result type",
87
- ))
116
+ .build()?;
117
+
118
+ Box::new(builder.map(CsvRecord::Vec))
88
119
  }
89
120
  };
90
121
 
91
122
  Ok(Yield::Iter(iter))
92
123
  }
93
124
 
94
- struct EnumeratorArgs {
95
- rb_self: Value,
96
- to_read: Value,
97
- has_headers: bool,
98
- delimiter: u8,
99
- quote_char: u8,
100
- null_string: Option<String>,
101
- buffer_size: usize,
102
- result_type: String,
103
- flexible: bool,
104
- flexible_default: Option<&'static str>,
105
- trim: Option<String>,
106
- }
107
-
125
+ /// Creates an enumerator for lazy CSV parsing
108
126
  fn create_enumerator(
109
127
  args: EnumeratorArgs,
110
128
  ) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
@@ -119,11 +137,11 @@ fn create_enumerator(
119
137
  String::from_utf8(vec![args.quote_char]).unwrap(),
120
138
  )?;
121
139
  kwargs.aset(Symbol::new("nil_string"), args.null_string)?;
122
- kwargs.aset(Symbol::new("buffer_size"), args.buffer_size)?;
123
140
  kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
124
141
  kwargs.aset(Symbol::new("flexible"), args.flexible)?;
125
142
  kwargs.aset(Symbol::new("flexible_default"), args.flexible_default)?;
126
143
  kwargs.aset(Symbol::new("trim"), args.trim.map(Symbol::new))?;
144
+
127
145
  let enumerator = args
128
146
  .rb_self
129
147
  .enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
data/ext/osv/src/utils.rs CHANGED
@@ -4,8 +4,6 @@ use magnus::{
4
4
  Error, RString, Ruby, Symbol, Value,
5
5
  };
6
6
 
7
- use crate::csv::BUFFER_CHANNEL_SIZE;
8
-
9
7
  fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String>, Error> {
10
8
  if value.is_nil() {
11
9
  Ok(None)
@@ -34,7 +32,6 @@ pub struct ReadCsvArgs {
34
32
  pub delimiter: u8,
35
33
  pub quote_char: u8,
36
34
  pub null_string: Option<String>,
37
- pub buffer_size: usize,
38
35
  pub result_type: String,
39
36
  pub flexible: bool,
40
37
  pub flexible_default: Option<String>,
@@ -54,7 +51,6 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
54
51
  Option<String>,
55
52
  Option<String>,
56
53
  Option<Option<String>>,
57
- Option<usize>,
58
54
  Option<Value>,
59
55
  Option<bool>,
60
56
  Option<Option<String>>,
@@ -69,7 +65,6 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
69
65
  "col_sep",
70
66
  "quote_char",
71
67
  "nil_string",
72
- "buffer_size",
73
68
  "result_type",
74
69
  "flexible",
75
70
  "flexible_default",
@@ -107,11 +102,9 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
107
102
 
108
103
  let null_string = kwargs.optional.3.unwrap_or_default();
109
104
 
110
- let buffer_size = kwargs.optional.4.unwrap_or(BUFFER_CHANNEL_SIZE);
111
-
112
105
  let result_type = match kwargs
113
106
  .optional
114
- .5
107
+ .4
115
108
  .map(|value| parse_string_or_symbol(ruby, value))
116
109
  {
117
110
  Some(Ok(Some(parsed))) => match parsed.as_str() {
@@ -133,13 +126,13 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
133
126
  None => String::from("hash"),
134
127
  };
135
128
 
136
- let flexible = kwargs.optional.6.unwrap_or_default();
129
+ let flexible = kwargs.optional.5.unwrap_or_default();
137
130
 
138
- let flexible_default = kwargs.optional.7.unwrap_or_default();
131
+ let flexible_default = kwargs.optional.6.unwrap_or_default();
139
132
 
140
133
  let trim = match kwargs
141
134
  .optional
142
- .8
135
+ .7
143
136
  .map(|value| parse_string_or_symbol(ruby, value))
144
137
  {
145
138
  Some(Ok(Some(parsed))) => match parsed.as_str() {
@@ -172,7 +165,6 @@ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, E
172
165
  delimiter,
173
166
  quote_char,
174
167
  null_string,
175
- buffer_size,
176
168
  result_type,
177
169
  flexible,
178
170
  flexible_default,
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.3.15"
2
+ VERSION = "0.3.17"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.15
4
+ version: 0.3.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-03 00:00:00.000000000 Z
11
+ date: 2025-01-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys