osv 0.3.13 → 0.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,172 @@
1
+ use super::parser::RecordParser;
2
+ use super::{header_cache::StringCache, ruby_reader::SeekableRead};
3
+ use magnus::{Error, Ruby};
4
+ use std::io::BufReader;
5
+ use std::{borrow::Cow, io::Read, thread};
6
+
7
+ pub(crate) const READ_BUFFER_SIZE: usize = 16384;
8
+
9
+ pub struct RecordReader<'a, T: RecordParser<'a>> {
10
+ inner: ReaderImpl<'a, T>,
11
+ }
12
+
13
+ #[allow(clippy::large_enum_variant)]
14
+ enum ReaderImpl<'a, T: RecordParser<'a>> {
15
+ SingleThreaded {
16
+ reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
17
+ headers: Vec<&'static str>,
18
+ null_string: Option<String>,
19
+ flexible_default: Option<Cow<'a, str>>,
20
+ string_record: csv::StringRecord,
21
+ },
22
+ MultiThreaded {
23
+ headers: Vec<&'static str>,
24
+ receiver: kanal::Receiver<T::Output>,
25
+ handle: Option<thread::JoinHandle<()>>,
26
+ },
27
+ }
28
+
29
+ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
30
+ #[inline]
31
+ pub(crate) fn get_headers(
32
+ ruby: &Ruby,
33
+ reader: &mut csv::Reader<impl Read>,
34
+ has_headers: bool,
35
+ ) -> Result<Vec<String>, Error> {
36
+ let first_row = reader.headers().map_err(|e| {
37
+ Error::new(
38
+ ruby.exception_runtime_error(),
39
+ format!("Failed to read headers: {e}"),
40
+ )
41
+ })?;
42
+
43
+ let mut headers = Vec::with_capacity(first_row.len());
44
+ if has_headers {
45
+ headers.extend(first_row.iter().map(String::from));
46
+ } else {
47
+ headers.extend((0..first_row.len()).map(|i| format!("c{i}")));
48
+ }
49
+ Ok(headers)
50
+ }
51
+
52
+ pub(crate) fn new_single_threaded(
53
+ reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
54
+ headers: Vec<&'static str>,
55
+ null_string: Option<String>,
56
+ flexible_default: Option<&'a str>,
57
+ ) -> Self {
58
+ let headers_len = headers.len();
59
+ Self {
60
+ inner: ReaderImpl::SingleThreaded {
61
+ reader,
62
+ headers,
63
+ null_string,
64
+ flexible_default: flexible_default.map(Cow::Borrowed),
65
+ string_record: csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_len),
66
+ },
67
+ }
68
+ }
69
+ }
70
+
71
+ impl<T: RecordParser<'static> + Send> RecordReader<'static, T> {
72
+ pub(crate) fn new_multi_threaded(
73
+ mut reader: csv::Reader<Box<dyn Read + Send + 'static>>,
74
+ headers: Vec<&'static str>,
75
+ buffer_size: usize,
76
+ null_string: Option<String>,
77
+ flexible_default: Option<&'static str>,
78
+ ) -> Self {
79
+ let (sender, receiver) = kanal::bounded(buffer_size);
80
+ let headers_for_thread = headers.clone();
81
+
82
+ let handle = thread::spawn(move || {
83
+ let mut record =
84
+ csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_for_thread.len());
85
+ while let Ok(true) = reader.read_record(&mut record) {
86
+ let row = T::parse(
87
+ &headers_for_thread,
88
+ &record,
89
+ null_string.as_deref(),
90
+ flexible_default.map(Cow::Borrowed),
91
+ );
92
+ if sender.send(row).is_err() {
93
+ break;
94
+ }
95
+ }
96
+ });
97
+
98
+ Self {
99
+ inner: ReaderImpl::MultiThreaded {
100
+ headers,
101
+ receiver,
102
+ handle: Some(handle),
103
+ },
104
+ }
105
+ }
106
+ }
107
+
108
+ impl<'a, T: RecordParser<'a>> Iterator for RecordReader<'a, T> {
109
+ type Item = T::Output;
110
+
111
+ #[inline]
112
+ fn next(&mut self) -> Option<Self::Item> {
113
+ match &mut self.inner {
114
+ ReaderImpl::MultiThreaded {
115
+ receiver, handle, ..
116
+ } => match receiver.recv() {
117
+ Ok(record) => Some(record),
118
+ Err(_) => {
119
+ if let Some(handle) = handle.take() {
120
+ let _ = handle.join();
121
+ }
122
+ None
123
+ }
124
+ },
125
+ ReaderImpl::SingleThreaded {
126
+ reader,
127
+ headers,
128
+ null_string,
129
+ flexible_default,
130
+ ref mut string_record,
131
+ } => match reader.read_record(string_record) {
132
+ Ok(true) => Some(T::parse(
133
+ headers,
134
+ string_record,
135
+ null_string.as_deref(),
136
+ flexible_default.clone(),
137
+ )),
138
+ Ok(false) => None,
139
+ Err(_e) => None,
140
+ },
141
+ }
142
+ }
143
+
144
+ #[inline]
145
+ fn size_hint(&self) -> (usize, Option<usize>) {
146
+ // We can't know the exact size without reading the whole file
147
+ (0, None)
148
+ }
149
+ }
150
+
151
+ impl<'a, T: RecordParser<'a>> Drop for RecordReader<'a, T> {
152
+ #[inline]
153
+ fn drop(&mut self) {
154
+ match &mut self.inner {
155
+ ReaderImpl::MultiThreaded {
156
+ receiver,
157
+ handle,
158
+ headers,
159
+ ..
160
+ } => {
161
+ receiver.close();
162
+ if let Some(handle) = handle.take() {
163
+ let _ = handle.join();
164
+ }
165
+ let _ = StringCache::clear(headers);
166
+ }
167
+ ReaderImpl::SingleThreaded { headers, .. } => {
168
+ let _ = StringCache::clear(headers);
169
+ }
170
+ }
171
+ }
172
+ }
@@ -0,0 +1,30 @@
1
+ use std::{fs::File, io, mem::ManuallyDrop};
2
+
3
+ pub struct ForgottenFileHandle(pub ManuallyDrop<File>);
4
+
5
+ impl std::io::Read for ForgottenFileHandle {
6
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
7
+ self.0.read(buf)
8
+ }
9
+
10
+ fn read_vectored(&mut self, bufs: &mut [std::io::IoSliceMut<'_>]) -> io::Result<usize> {
11
+ self.0.read_vectored(bufs)
12
+ }
13
+
14
+ // fn read_buf(&mut self, cursor: BorrowedCursor<'_>) -> io::Result<()> {
15
+ // self.0.read_buf(cursor)
16
+ // }
17
+
18
+ // #[inline]
19
+ // fn is_read_vectored(&self) -> bool {
20
+ // self.0.is_read_vectored()
21
+ // }
22
+
23
+ fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
24
+ self.0.read_to_end(buf)
25
+ }
26
+
27
+ fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
28
+ self.0.read_to_string(buf)
29
+ }
30
+ }
@@ -0,0 +1,174 @@
1
+ use magnus::{
2
+ value::{Opaque, ReprValue},
3
+ RClass, RString, Ruby, Value,
4
+ };
5
+ use std::io::{self, Read, Seek, SeekFrom, Write};
6
+ use std::sync::OnceLock;
7
+
8
+ static STRING_IO_CLASS: OnceLock<Opaque<RClass>> = OnceLock::new();
9
+
10
+ /// A reader that can handle various Ruby input types (String, StringIO, IO-like objects)
11
+ /// and provide a standard Read implementation for them.
12
+ pub struct RubyReader<T> {
13
+ inner: T,
14
+ offset: usize,
15
+ }
16
+
17
+ pub trait SeekableRead: std::io::Read + Seek {}
18
+ impl SeekableRead for RubyReader<Value> {}
19
+ impl SeekableRead for RubyReader<RString> {}
20
+
21
+ pub fn build_ruby_reader(
22
+ ruby: &Ruby,
23
+ input: Value,
24
+ ) -> Result<Box<dyn SeekableRead>, magnus::Error> {
25
+ if RubyReader::is_string_io(ruby, &input) {
26
+ RubyReader::from_string_io(ruby, input)
27
+ } else if RubyReader::is_io_like(&input) {
28
+ RubyReader::from_io(input)
29
+ } else {
30
+ RubyReader::from_string_like(input)
31
+ }
32
+ }
33
+
34
+ impl Seek for RubyReader<Value> {
35
+ fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
36
+ let (whence, offset) = match pos {
37
+ SeekFrom::Start(i) => (0, i as i64),
38
+ SeekFrom::Current(i) => (1, i),
39
+ SeekFrom::End(i) => (2, i),
40
+ };
41
+
42
+ let new_position = self
43
+ .inner
44
+ .funcall("seek", (offset, whence))
45
+ .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
46
+
47
+ Ok(new_position)
48
+ }
49
+ }
50
+
51
+ impl Write for RubyReader<Value> {
52
+ fn write(&mut self, buf: &[u8]) -> Result<usize, io::Error> {
53
+ let ruby_bytes = RString::from_slice(buf);
54
+
55
+ let bytes_written = self
56
+ .inner
57
+ .funcall::<_, _, usize>("write", (ruby_bytes,))
58
+ .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
59
+
60
+ Ok(bytes_written)
61
+ }
62
+
63
+ fn flush(&mut self) -> Result<(), io::Error> {
64
+ self.inner
65
+ .funcall::<_, _, Value>("flush", ())
66
+ .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
67
+
68
+ Ok(())
69
+ }
70
+ }
71
+
72
+ impl Seek for RubyReader<RString> {
73
+ fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
74
+ match pos {
75
+ io::SeekFrom::Start(offset) => self.offset = offset as usize,
76
+ io::SeekFrom::Current(offset) => self.offset = (self.offset as i64 + offset) as usize,
77
+ io::SeekFrom::End(offset) => {
78
+ self.offset = self.inner.len() - offset as usize
79
+ }
80
+ }
81
+ Ok(self.offset as u64)
82
+ }
83
+ }
84
+
85
+ impl RubyReader<Value> {
86
+ fn from_io(input: Value) -> Result<Box<dyn SeekableRead>, magnus::Error> {
87
+ if Self::is_io_like(&input) {
88
+ Ok(Box::new(Self::from_io_like(input)))
89
+ } else {
90
+ Err(magnus::Error::new(
91
+ magnus::exception::type_error(),
92
+ "Input is not an IO-like object",
93
+ ))
94
+ }
95
+ }
96
+
97
+ fn is_io_like(input: &Value) -> bool {
98
+ input.respond_to("read", false).unwrap_or(false)
99
+ }
100
+
101
+ fn from_io_like(input: Value) -> Self {
102
+ Self {
103
+ inner: input,
104
+ offset: 0,
105
+ }
106
+ }
107
+ }
108
+
109
+ impl RubyReader<RString> {
110
+ pub fn from_string_io(
111
+ ruby: &Ruby,
112
+ input: Value,
113
+ ) -> Result<Box<dyn SeekableRead>, magnus::Error> {
114
+ if !Self::is_string_io(ruby, &input) {
115
+ return Err(magnus::Error::new(
116
+ magnus::exception::type_error(),
117
+ "Input is not a StringIO",
118
+ ));
119
+ }
120
+
121
+ let string_content = input.funcall::<_, _, RString>("string", ()).unwrap();
122
+ Ok(Box::new(Self {
123
+ inner: string_content,
124
+ offset: 0,
125
+ }))
126
+ }
127
+
128
+ fn is_string_io(ruby: &Ruby, input: &Value) -> bool {
129
+ let string_io_class = STRING_IO_CLASS.get_or_init(|| {
130
+ let class = RClass::from_value(ruby.eval("StringIO").unwrap()).unwrap();
131
+ Opaque::from(class)
132
+ });
133
+ input.is_kind_of(ruby.get_inner(*string_io_class))
134
+ }
135
+
136
+ fn from_string_like(input: Value) -> Result<Box<dyn SeekableRead>, magnus::Error> {
137
+ // Try calling `to_str`, and if that fails, try `to_s`
138
+ let string_content = input
139
+ .funcall::<_, _, RString>("to_str", ())
140
+ .or_else(|_| input.funcall::<_, _, RString>("to_s", ()))?;
141
+ Ok(Box::new(Self {
142
+ inner: string_content,
143
+ offset: 0,
144
+ }))
145
+ }
146
+ }
147
+
148
+ impl Read for RubyReader<Value> {
149
+ fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
150
+ let bytes = self
151
+ .inner
152
+ .funcall::<_, _, RString>("read", (buf.len(),))
153
+ .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
154
+
155
+ buf.write_all(unsafe { bytes.as_slice() })?;
156
+
157
+ Ok(bytes.len())
158
+ }
159
+ }
160
+
161
+ impl Read for RubyReader<RString> {
162
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
163
+ let string_buffer = unsafe { self.inner.as_slice() };
164
+ if self.offset >= string_buffer.len() {
165
+ return Ok(0); // EOF
166
+ }
167
+
168
+ let remaining = string_buffer.len() - self.offset;
169
+ let copy_size = remaining.min(buf.len());
170
+ buf[..copy_size].copy_from_slice(&string_buffer[self.offset..self.offset + copy_size]);
171
+ self.offset += copy_size;
172
+ Ok(copy_size)
173
+ }
174
+ }
data/ext/osv/src/lib.rs CHANGED
@@ -1,3 +1,4 @@
1
+ mod allocator;
1
2
  mod csv;
2
3
  mod reader;
3
4
  mod utils;
@@ -1,18 +1,19 @@
1
- use crate::csv::{CsvRecord, RecordReaderBuilder};
1
+ use crate::csv::{CowValue, CsvRecord, RecordReaderBuilder};
2
2
  use crate::utils::*;
3
+ use ahash::RandomState;
3
4
  use csv::Trim;
4
5
  use magnus::value::ReprValue;
5
6
  use magnus::{block::Yield, Error, KwArgs, RHash, Ruby, Symbol, Value};
6
7
  use std::collections::HashMap;
7
- use xxhash_rust::xxh3::Xxh3Builder;
8
8
 
9
9
  pub fn parse_csv(
10
10
  rb_self: Value,
11
11
  args: &[Value],
12
- ) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<Xxh3Builder>>>>, Error> {
13
- let ruby = unsafe { Ruby::get_unchecked() };
12
+ ) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
13
+ let original = unsafe { Ruby::get_unchecked() };
14
+ let ruby: &'static Ruby = Box::leak(Box::new(original));
14
15
 
15
- let CsvArgs {
16
+ let ReadCsvArgs {
16
17
  to_read,
17
18
  has_headers,
18
19
  delimiter,
@@ -23,7 +24,11 @@ pub fn parse_csv(
23
24
  flexible,
24
25
  flexible_default,
25
26
  trim,
26
- } = parse_csv_args(&ruby, args)?;
27
+ } = parse_read_csv_args(ruby, args)?;
28
+
29
+ let flexible_default: &'static Option<String> = Box::leak(Box::new(flexible_default));
30
+ let leaked_flexible_default: &'static Option<&str> =
31
+ Box::leak(Box::new(flexible_default.as_deref()));
27
32
 
28
33
  if !ruby.block_given() {
29
34
  return create_enumerator(EnumeratorArgs {
@@ -36,7 +41,7 @@ pub fn parse_csv(
36
41
  buffer_size,
37
42
  result_type,
38
43
  flexible,
39
- flexible_default,
44
+ flexible_default: leaked_flexible_default.as_deref(),
40
45
  trim: match trim {
41
46
  Trim::All => Some("all".to_string()),
42
47
  Trim::Headers => Some("headers".to_string()),
@@ -46,33 +51,33 @@ pub fn parse_csv(
46
51
  });
47
52
  }
48
53
 
49
- let iter: Box<dyn Iterator<Item = CsvRecord<Xxh3Builder>>> = match result_type.as_str() {
50
- "hash" => Box::new(
51
- RecordReaderBuilder::<HashMap<&'static str, Option<String>, Xxh3Builder>>::new(
52
- &ruby, to_read,
53
- )
54
+ let iter: Box<dyn Iterator<Item = CsvRecord<RandomState>>> = match result_type.as_str() {
55
+ "hash" => {
56
+ let builder = RecordReaderBuilder::<
57
+ HashMap<&'static str, Option<CowValue<'static>>, RandomState>,
58
+ >::new(ruby, to_read)
54
59
  .has_headers(has_headers)
55
60
  .flexible(flexible)
56
- .flexible_default(flexible_default)
61
+ .flexible_default(flexible_default.as_deref())
57
62
  .trim(trim)
58
63
  .delimiter(delimiter)
59
64
  .quote_char(quote_char)
60
65
  .null_string(null_string)
61
- .buffer(buffer_size)
62
- .build()?
63
- .map(CsvRecord::Map),
64
- ),
66
+ .buffer(buffer_size);
67
+
68
+ Box::new(builder.build_threaded()?.map(CsvRecord::Map))
69
+ }
65
70
  "array" => Box::new(
66
- RecordReaderBuilder::<Vec<Option<String>>>::new(&ruby, to_read)
71
+ RecordReaderBuilder::<Vec<Option<CowValue<'static>>>>::new(ruby, to_read)
67
72
  .has_headers(has_headers)
68
73
  .flexible(flexible)
69
- .flexible_default(flexible_default)
74
+ .flexible_default(flexible_default.as_deref())
70
75
  .trim(trim)
71
76
  .delimiter(delimiter)
72
77
  .quote_char(quote_char)
73
78
  .null_string(null_string)
74
79
  .buffer(buffer_size)
75
- .build()?
80
+ .build_threaded()?
76
81
  .map(CsvRecord::Vec),
77
82
  ),
78
83
  _ => {
@@ -96,13 +101,13 @@ struct EnumeratorArgs {
96
101
  buffer_size: usize,
97
102
  result_type: String,
98
103
  flexible: bool,
99
- flexible_default: Option<String>,
104
+ flexible_default: Option<&'static str>,
100
105
  trim: Option<String>,
101
106
  }
102
107
 
103
108
  fn create_enumerator(
104
109
  args: EnumeratorArgs,
105
- ) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<Xxh3Builder>>>>, Error> {
110
+ ) -> Result<Yield<Box<dyn Iterator<Item = CsvRecord<'static, RandomState>>>>, Error> {
106
111
  let kwargs = RHash::new();
107
112
  kwargs.aset(Symbol::new("has_headers"), args.has_headers)?;
108
113
  kwargs.aset(
data/ext/osv/src/utils.rs CHANGED
@@ -13,12 +13,12 @@ fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String>, E
13
13
  RString::from_value(value)
14
14
  .ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid string value"))?
15
15
  .to_string()
16
- .map(|s| Some(s))
16
+ .map(Some)
17
17
  } else if value.is_kind_of(ruby.class_symbol()) {
18
18
  Symbol::from_value(value)
19
19
  .ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid symbol value"))?
20
20
  .funcall("to_s", ())
21
- .map(|s| Some(s))
21
+ .map(Some)
22
22
  } else {
23
23
  Err(Error::new(
24
24
  magnus::exception::type_error(),
@@ -28,7 +28,7 @@ fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String>, E
28
28
  }
29
29
 
30
30
  #[derive(Debug)]
31
- pub struct CsvArgs {
31
+ pub struct ReadCsvArgs {
32
32
  pub to_read: Value,
33
33
  pub has_headers: bool,
34
34
  pub delimiter: u8,
@@ -42,7 +42,7 @@ pub struct CsvArgs {
42
42
  }
43
43
 
44
44
  /// Parse common arguments for CSV parsing
45
- pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
45
+ pub fn parse_read_csv_args(ruby: &Ruby, args: &[Value]) -> Result<ReadCsvArgs, Error> {
46
46
  let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
47
47
  let (to_read,) = parsed_args.required;
48
48
 
@@ -166,7 +166,7 @@ pub fn parse_csv_args(ruby: &Ruby, args: &[Value]) -> Result<CsvArgs, Error> {
166
166
  None => csv::Trim::None,
167
167
  };
168
168
 
169
- Ok(CsvArgs {
169
+ Ok(ReadCsvArgs {
170
170
  to_read,
171
171
  has_headers,
172
172
  delimiter,
data/lib/osv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OSV
2
- VERSION = "0.3.13"
2
+ VERSION = "0.3.15"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.13
4
+ version: 0.3.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-24 00:00:00.000000000 Z
11
+ date: 2025-01-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -38,16 +38,12 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 1.2.0
41
- description: |
42
- OSV is a high-performance CSV parser for Ruby, implemented in Rust. It wraps BurntSushi's csv-rs crate to provide fast CSV parsing with support for both hash-based and array-based row formats.
43
-
44
- Features include:
45
- - Flexible input sources (file paths, gzipped files, IO objects, strings)
46
- - Configurable parsing options (headers, separators, quote chars)
47
- - Support for both hash and array output formats
48
- - Whitespace trimming options
49
- - Strict or flexible parsing modes
50
- - Significantly faster than Ruby's standard CSV library
41
+ description: |2
42
+ OSV is a high-performance CSV parser for Ruby, implemented in Rust.
43
+ It wraps BurntSushi's csv-rs crate to provide fast CSV parsing with support for both hash-based and array-based row formats.
44
+ Features include: Flexible input sources (file paths, gzipped files, IO objects, strings),
45
+ configurable parsing options (headers, separators, quote chars), support for both hash and array output formats,
46
+ whitespace trimming options, strict or flexible parsing modes, and is significantly faster than Ruby's standard CSV library.
51
47
  email:
52
48
  - nathan@jaremko.ca
53
49
  executables: []
@@ -63,13 +59,15 @@ files:
63
59
  - Rakefile
64
60
  - ext/osv/Cargo.toml
65
61
  - ext/osv/extconf.rb
62
+ - ext/osv/src/allocator.rs
66
63
  - ext/osv/src/csv/builder.rs
67
64
  - ext/osv/src/csv/header_cache.rs
68
65
  - ext/osv/src/csv/mod.rs
69
66
  - ext/osv/src/csv/parser.rs
70
- - ext/osv/src/csv/read_impl.rs
71
- - ext/osv/src/csv/reader.rs
72
67
  - ext/osv/src/csv/record.rs
68
+ - ext/osv/src/csv/record_reader.rs
69
+ - ext/osv/src/csv/ruby_integration.rs
70
+ - ext/osv/src/csv/ruby_reader.rs
73
71
  - ext/osv/src/lib.rs
74
72
  - ext/osv/src/reader.rs
75
73
  - ext/osv/src/utils.rs
@@ -84,8 +82,8 @@ metadata:
84
82
  source_code_uri: https://github.com/njaremko/osv
85
83
  readme_uri: https://github.com/njaremko/osv/blob/main/README.md
86
84
  changelog_uri: https://github.com/njaremko/osv/blob/main/CHANGELOG.md
87
- rubygems_mfa_required: 'true'
88
85
  documentation_uri: https://www.rubydoc.info/gems/osv
86
+ funding_uri: https://github.com/sponsors/njaremko
89
87
  post_install_message:
90
88
  rdoc_options: []
91
89
  require_paths:
@@ -1,75 +0,0 @@
1
- use super::{header_cache::StringCache, parser::RecordParser};
2
- use std::{io::Read, thread};
3
-
4
- pub(crate) const READ_BUFFER_SIZE: usize = 8192;
5
-
6
- pub enum ReadImpl<T: RecordParser> {
7
- SingleThreaded {
8
- reader: csv::Reader<Box<dyn Read>>,
9
- headers: Vec<&'static str>,
10
- null_string: Option<String>,
11
- flexible_default: Option<String>,
12
- },
13
- MultiThreaded {
14
- headers: Vec<&'static str>,
15
- receiver: kanal::Receiver<T::Output>,
16
- handle: Option<thread::JoinHandle<()>>,
17
- },
18
- }
19
-
20
- impl<T: RecordParser> ReadImpl<T> {
21
- #[inline]
22
- pub fn next(&mut self) -> Option<T::Output> {
23
- match self {
24
- Self::MultiThreaded {
25
- receiver, handle, ..
26
- } => match receiver.recv() {
27
- Ok(record) => Some(record),
28
- Err(_) => {
29
- if let Some(handle) = handle.take() {
30
- let _ = handle.join();
31
- }
32
- None
33
- }
34
- },
35
- Self::SingleThreaded {
36
- reader,
37
- headers,
38
- null_string,
39
- flexible_default,
40
- } => {
41
- let mut record = csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers.len());
42
- match reader.read_record(&mut record) {
43
- Ok(true) => Some(T::parse(
44
- headers,
45
- &record,
46
- null_string.as_deref(),
47
- flexible_default.as_deref(),
48
- )),
49
- _ => None,
50
- }
51
- }
52
- }
53
- }
54
-
55
- #[inline]
56
- pub fn cleanup(&mut self) {
57
- match self {
58
- Self::MultiThreaded {
59
- receiver,
60
- handle,
61
- headers,
62
- ..
63
- } => {
64
- receiver.close();
65
- if let Some(handle) = handle.take() {
66
- let _ = handle.join();
67
- }
68
- let _ = StringCache::clear(headers);
69
- }
70
- Self::SingleThreaded { headers, .. } => {
71
- let _ = StringCache::clear(headers);
72
- }
73
- }
74
- }
75
- }