rscsv 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 49902e25dff6b9904a52ad232077add5d62f6fa7
4
- data.tar.gz: 11c0c12ca451c8e8d67da7bc4868e914bbf62436
3
+ metadata.gz: cb17bc325379d4ebf7d2c7e088b84495253c62d4
4
+ data.tar.gz: e379a8d05d9a39d551e8a118ace70c394598e187
5
5
  SHA512:
6
- metadata.gz: 8f371fd7e2301db02e51516e648f5abc0add03d2a459a0da8600488f7a8a8ba39206fe8dae9ac644288c3504aa9d6d945f792c99cb4fc6bb9e9a3a236ea7dd56
7
- data.tar.gz: 9051861c418781ea1e4aa862f070cb3c80201ba0fb2239c280957cabef721807c4b5a858621a2fa648e15b971ce2b6fc02727c2c6329c1b7642cee3fbf4b3b59
6
+ metadata.gz: 3fb8a756221d7b8e293c11ca885e2d935d90ea3abda2b451e0cbd29dd25d63cf2602e0d0109a01239935363fcc822c741b74750727eb29acbe458a95a7e85787
7
+ data.tar.gz: 13dfae6dee17a62a51cd64dcb2503f2472066db729d05fd925b01ddb304bb61a551a591cf2813c92b9739b358eb3bd28459e70f8f16411a8ff1f7f734248c367
data/README.md CHANGED
@@ -24,8 +24,13 @@ Rscsv::Writer.generate_lines([['1', '2', '3'], ['3', '4', '5']])
24
24
  Rscsv::Writer.generate_line(['1', '2', '3'])
25
25
  # => 1,2,3\n
26
26
 
27
- Rscsv::Reader.parse("1,2,3\n4,5,6")
27
+ Rscsv::Reader.parse("1,2,3\n4,5,6\n")
28
28
  # => [["1", "2", "3"], ["4", "5", "6"]]
29
+
30
+ # Streaming from Enumerator
31
+ Rscsv::Reader.each(["1,2,3\n","4,5,6\n"].each) do |row|
32
+ # yields ["1", "2", "3"] and ["4", "5", "6"]
33
+ end
29
34
  ```
30
35
 
31
36
  This is ~3x faster than using native Ruby `CSV.generate` or `CSV.parse`.
@@ -4,5 +4,13 @@ require 'rscsv/version'
4
4
 
5
5
  module Rscsv
6
6
  Reader = RscsvReader
7
+
8
+ class Reader
9
+ def self.each(input, &block)
10
+ each_internal(input, &block)
11
+ rescue StopIteration
12
+ nil
13
+ end
14
+ end
7
15
  Writer = RscsvWriter
8
16
  end
@@ -1,3 +1,3 @@
1
1
  module Rscsv
2
- VERSION = '0.2.1'.freeze
2
+ VERSION = '0.3.0'.freeze
3
3
  end
data/src/lib.rs CHANGED
@@ -3,9 +3,12 @@ extern crate helix;
3
3
  extern crate csv;
4
4
 
5
5
  use std::error::Error;
6
+ use std::io::Read;
7
+ use std::slice::from_raw_parts;
6
8
  use helix::sys;
7
- use helix::sys::VALUE;
9
+ use helix::sys::{VALUE, ID};
8
10
  use helix::{UncheckedValue, CheckResult, CheckedValue, ToRust, ToRuby};
11
+ use helix::libc::c_int;
9
12
 
10
13
  struct VecWrap<T>(Vec<T>);
11
14
 
@@ -14,10 +17,8 @@ impl<T> UncheckedValue<VecWrap<T>> for VALUE
14
17
  {
15
18
  fn to_checked(self) -> CheckResult<VecWrap<T>> {
16
19
  if unsafe { sys::RB_TYPE_P(self, sys::T_ARRAY) } {
17
- let len = unsafe { sys::RARRAY_LEN(self) };
18
- let ptr = unsafe { sys::RARRAY_PTR(self) };
19
- for i in 0..len {
20
- let val = unsafe { *ptr.offset(i) };
20
+ let slice = ruby_array_to_slice(self);
21
+ for val in slice.iter() {
21
22
  if let Err(error) = val.to_checked() {
22
23
  return Err(format!("Failed to convert value for Vec<T>: {}", error));
23
24
  }
@@ -31,20 +32,23 @@ impl<T> UncheckedValue<VecWrap<T>> for VALUE
31
32
  }
32
33
  }
33
34
 
35
+ fn ruby_array_to_slice<'a>(array: VALUE) -> &'a [VALUE] {
36
+ let length = unsafe { sys::RARRAY_LEN(array) } as usize;
37
+ unsafe { from_raw_parts(sys::RARRAY_CONST_PTR(array), length) }
38
+ }
39
+
34
40
  impl ToRust<VecWrap<String>> for CheckedValue<VecWrap<String>>
35
41
  where VALUE: UncheckedValue<String>,
36
42
  CheckedValue<String>: ToRust<String>
37
43
  {
38
44
  fn to_rust(self) -> VecWrap<String> {
39
- let len = unsafe { sys::RARRAY_LEN(self.inner) };
40
- let ptr = unsafe { sys::RARRAY_PTR(self.inner) };
41
- let mut vec: Vec<String> = Vec::with_capacity(len as usize);
42
- for i in 0..len {
43
- let val = unsafe { *ptr.offset(i) };
45
+ let slice = ruby_array_to_slice(self.inner);
46
+ let mut vec: Vec<String> = Vec::with_capacity(slice.len());
47
+ for val in slice.iter() {
44
48
  let checked = val.to_checked().unwrap();
45
49
  vec.push(checked.to_rust());
46
50
  }
47
- return VecWrap(vec);
51
+ VecWrap(vec)
48
52
  }
49
53
  }
50
54
 
@@ -53,15 +57,13 @@ impl ToRust<VecWrap<VecWrap<String>>> for CheckedValue<VecWrap<VecWrap<String>>>
53
57
  CheckedValue<VecWrap<String>>: ToRust<VecWrap<String>>
54
58
  {
55
59
  fn to_rust(self) -> VecWrap<VecWrap<String>> {
56
- let len = unsafe { sys::RARRAY_LEN(self.inner) };
57
- let ptr = unsafe { sys::RARRAY_PTR(self.inner) };
58
- let mut vec: Vec<VecWrap<String>> = Vec::with_capacity(len as usize);
59
- for i in 0..len {
60
- let val = unsafe { *ptr.offset(i) };
60
+ let slice = ruby_array_to_slice(self.inner);
61
+ let mut vec: Vec<VecWrap<String>> = Vec::with_capacity(slice.len());
62
+ for val in slice.iter() {
61
63
  let checked = val.to_checked().unwrap();
62
64
  vec.push(checked.to_rust());
63
65
  }
64
- return VecWrap(vec);
66
+ VecWrap(vec)
65
67
  }
66
68
  }
67
69
 
@@ -70,6 +72,9 @@ extern "C" {
70
72
  pub fn rb_ary_new_capa(capa: isize) -> VALUE;
71
73
  pub fn rb_ary_entry(ary: VALUE, offset: isize) -> VALUE;
72
74
  pub fn rb_ary_push(ary: VALUE, item: VALUE) -> VALUE;
75
+ pub fn rb_block_given_p() -> c_int;
76
+ pub fn rb_yield(value: VALUE);
77
+ pub fn rb_funcall(value: VALUE, name: ID, nargs: c_int, ...) -> VALUE;
73
78
  }
74
79
 
75
80
  impl ToRuby for VecWrap<csv::StringRecord> {
@@ -96,26 +101,137 @@ fn generate_lines(rows: VecWrap<VecWrap<String>>) -> Result<String, Box<Error>>
96
101
  wtr.write_record(&(row.0))?;
97
102
  }
98
103
 
99
- return Ok(String::from_utf8(wtr.into_inner()?)?);
104
+ Ok(String::from_utf8(wtr.into_inner()?)?)
100
105
  }
101
106
 
102
- fn parse_csv(data: String) -> Result<Vec<csv::StringRecord>, csv::Error> {
103
- let mut reader = csv::ReaderBuilder::new()
107
+ fn record_to_ruby(record: &csv::ByteRecord) -> VALUE {
108
+ let inner_array = unsafe { rb_ary_new_capa(record.len() as isize) };
109
+ for column in record.iter() {
110
+ unsafe {
111
+ let column_value = sys::rb_utf8_str_new(column.as_ptr() as *const i8,
112
+ column.len() as i64);
113
+ rb_ary_push(inner_array, column_value);
114
+ }
115
+ }
116
+ inner_array
117
+ }
118
+
119
+
120
+ impl UncheckedValue<Enumerator> for VALUE {
121
+ fn to_checked(self) -> CheckResult<Enumerator> {
122
+ Ok(unsafe { CheckedValue::new(self) })
123
+ }
124
+ }
125
+
126
+ impl ToRust<Enumerator> for CheckedValue<Enumerator> {
127
+ fn to_rust(self) -> Enumerator {
128
+ Enumerator { value: self.inner }
129
+ }
130
+ }
131
+
132
+ struct Enumerator {
133
+ value: VALUE,
134
+ }
135
+
136
+ struct EnumeratorRead {
137
+ value: VALUE,
138
+ next: Option<Vec<u8>>,
139
+ }
140
+
141
+ impl EnumeratorRead {
142
+ fn new(value: VALUE) -> EnumeratorRead {
143
+ EnumeratorRead {
144
+ value: value,
145
+ next: None,
146
+ }
147
+ }
148
+
149
+ fn read_and_store_overflow(&mut self, buf: &mut [u8], value: &[u8]) -> std::io::Result<usize> {
150
+ if value.len() > buf.len() {
151
+ match value.split_at(buf.len()) {
152
+ (current, next) => {
153
+ for (index, c) in current.iter().enumerate() {
154
+ buf[index] = *c;
155
+ }
156
+ self.next = Some(next.to_vec());
157
+ Ok(current.len())
158
+ }
159
+ }
160
+
161
+ } else {
162
+ for (index, value) in value.iter().enumerate() {
163
+ buf[index] = *value;
164
+ }
165
+ self.next = None;
166
+ Ok(value.len() as usize)
167
+ }
168
+ }
169
+
170
+ fn read_from_external(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
171
+ let next = unsafe {
172
+ rb_funcall(self.value,
173
+ sys::rb_intern("next\0".as_ptr() as *const i8),
174
+ 0)
175
+ };
176
+ let slice = unsafe {
177
+ from_raw_parts(sys::RSTRING_PTR(next) as *const u8,
178
+ sys::RSTRING_LEN(next) as usize)
179
+ };
180
+
181
+ self.read_and_store_overflow(buf, slice)
182
+ }
183
+ }
184
+
185
+ impl Read for EnumeratorRead {
186
+ fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
187
+ match self.next.clone() {
188
+ Some(inner) => self.read_and_store_overflow(buf, &inner),
189
+ None => self.read_from_external(buf),
190
+ }
191
+ }
192
+ }
193
+
194
+ fn csv_reader<R: Read>(reader: R) -> csv::Reader<R> {
195
+ csv::ReaderBuilder::new()
196
+ .buffer_capacity(16 * 1024)
104
197
  .has_headers(false)
105
- .from_reader(data.as_bytes());
106
- let records = reader
198
+ .from_reader(reader)
199
+ }
200
+
201
+ fn yield_csv(data: Enumerator) -> Result<(), csv::Error> {
202
+ let mut reader = csv_reader(EnumeratorRead::new(data.value));
203
+ let mut record = csv::ByteRecord::new();
204
+
205
+ while reader.read_byte_record(&mut record)? {
206
+ let inner_array = record_to_ruby(&record);
207
+ unsafe {
208
+ rb_yield(inner_array);
209
+ }
210
+ }
211
+
212
+ Ok(())
213
+ }
214
+
215
+ fn parse_csv(data: String) -> Result<Vec<csv::StringRecord>, csv::Error> {
216
+ let mut reader = csv_reader(data.as_bytes());
217
+ reader
107
218
  .records()
108
- .collect::<Result<Vec<csv::StringRecord>, csv::Error>>();
109
- return records;
219
+ .collect::<Result<Vec<csv::StringRecord>, csv::Error>>()
110
220
  }
111
221
 
112
222
  ruby! {
113
223
  class RscsvReader {
224
+ def each_internal(data: Enumerator) {
225
+ match yield_csv(data) {
226
+ Err(_) => throw!("Error parsing CSV"),
227
+ Ok(_) => ()
228
+ }
229
+ }
114
230
  def parse(data: String) -> VecWrap<csv::StringRecord> {
115
231
  match parse_csv(data) {
116
232
  Err(_) => throw!("Error parsing CSV"),
117
- Ok(result) => return VecWrap(result)
118
- };
233
+ Ok(result) => VecWrap(result)
234
+ }
119
235
  }
120
236
  }
121
237
  class RscsvWriter {
@@ -124,8 +240,8 @@ ruby! {
124
240
  let result = wtr.write_record(&(row.0));
125
241
  match result {
126
242
  Err(_) => throw!("Error generating csv"),
127
- Ok(_) => return String::from_utf8(wtr.into_inner().unwrap()).unwrap(),
128
- };
243
+ Ok(_) => String::from_utf8(wtr.into_inner().unwrap()).unwrap(),
244
+ }
129
245
  }
130
246
 
131
247
  def generate_lines(rows: VecWrap<VecWrap<String>>) -> String {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rscsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ville Lautanala
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-05-25 00:00:00.000000000 Z
11
+ date: 2017-05-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: helix_runtime