rscsv 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 49902e25dff6b9904a52ad232077add5d62f6fa7
4
- data.tar.gz: 11c0c12ca451c8e8d67da7bc4868e914bbf62436
3
+ metadata.gz: cb17bc325379d4ebf7d2c7e088b84495253c62d4
4
+ data.tar.gz: e379a8d05d9a39d551e8a118ace70c394598e187
5
5
  SHA512:
6
- metadata.gz: 8f371fd7e2301db02e51516e648f5abc0add03d2a459a0da8600488f7a8a8ba39206fe8dae9ac644288c3504aa9d6d945f792c99cb4fc6bb9e9a3a236ea7dd56
7
- data.tar.gz: 9051861c418781ea1e4aa862f070cb3c80201ba0fb2239c280957cabef721807c4b5a858621a2fa648e15b971ce2b6fc02727c2c6329c1b7642cee3fbf4b3b59
6
+ metadata.gz: 3fb8a756221d7b8e293c11ca885e2d935d90ea3abda2b451e0cbd29dd25d63cf2602e0d0109a01239935363fcc822c741b74750727eb29acbe458a95a7e85787
7
+ data.tar.gz: 13dfae6dee17a62a51cd64dcb2503f2472066db729d05fd925b01ddb304bb61a551a591cf2813c92b9739b358eb3bd28459e70f8f16411a8ff1f7f734248c367
data/README.md CHANGED
@@ -24,8 +24,13 @@ Rscsv::Writer.generate_lines([['1', '2', '3'], ['3', '4', '5']])
24
24
  Rscsv::Writer.generate_line(['1', '2', '3'])
25
25
  # => 1,2,3\n
26
26
 
27
- Rscsv::Reader.parse("1,2,3\n4,5,6")
27
+ Rscsv::Reader.parse("1,2,3\n4,5,6\n")
28
28
  # => [["1", "2", "3"], ["4", "5", "6"]]
29
+
30
+ # Streaming from Enumerator
31
+ Rscsv::Reader.each(["1,2,3\n","4,5,6\n"].each) do |row|
32
+ # yields ["1", "2", "3"] and ["4", "5", "6"]
33
+ end
29
34
  ```
30
35
 
31
36
  This is ~3x faster than using native Ruby `CSV.generate` or `CSV.parse`.
@@ -4,5 +4,13 @@ require 'rscsv/version'
4
4
 
5
5
  module Rscsv
6
6
  Reader = RscsvReader
7
+
8
+ class Reader
9
+ def self.each(input, &block)
10
+ each_internal(input, &block)
11
+ rescue StopIteration
12
+ nil
13
+ end
14
+ end
7
15
  Writer = RscsvWriter
8
16
  end
@@ -1,3 +1,3 @@
1
1
  module Rscsv
2
- VERSION = '0.2.1'.freeze
2
+ VERSION = '0.3.0'.freeze
3
3
  end
data/src/lib.rs CHANGED
@@ -3,9 +3,12 @@ extern crate helix;
3
3
  extern crate csv;
4
4
 
5
5
  use std::error::Error;
6
+ use std::io::Read;
7
+ use std::slice::from_raw_parts;
6
8
  use helix::sys;
7
- use helix::sys::VALUE;
9
+ use helix::sys::{VALUE, ID};
8
10
  use helix::{UncheckedValue, CheckResult, CheckedValue, ToRust, ToRuby};
11
+ use helix::libc::c_int;
9
12
 
10
13
  struct VecWrap<T>(Vec<T>);
11
14
 
@@ -14,10 +17,8 @@ impl<T> UncheckedValue<VecWrap<T>> for VALUE
14
17
  {
15
18
  fn to_checked(self) -> CheckResult<VecWrap<T>> {
16
19
  if unsafe { sys::RB_TYPE_P(self, sys::T_ARRAY) } {
17
- let len = unsafe { sys::RARRAY_LEN(self) };
18
- let ptr = unsafe { sys::RARRAY_PTR(self) };
19
- for i in 0..len {
20
- let val = unsafe { *ptr.offset(i) };
20
+ let slice = ruby_array_to_slice(self);
21
+ for val in slice.iter() {
21
22
  if let Err(error) = val.to_checked() {
22
23
  return Err(format!("Failed to convert value for Vec<T>: {}", error));
23
24
  }
@@ -31,20 +32,23 @@ impl<T> UncheckedValue<VecWrap<T>> for VALUE
31
32
  }
32
33
  }
33
34
 
35
+ fn ruby_array_to_slice<'a>(array: VALUE) -> &'a [VALUE] {
36
+ let length = unsafe { sys::RARRAY_LEN(array) } as usize;
37
+ unsafe { from_raw_parts(sys::RARRAY_CONST_PTR(array), length) }
38
+ }
39
+
34
40
  impl ToRust<VecWrap<String>> for CheckedValue<VecWrap<String>>
35
41
  where VALUE: UncheckedValue<String>,
36
42
  CheckedValue<String>: ToRust<String>
37
43
  {
38
44
  fn to_rust(self) -> VecWrap<String> {
39
- let len = unsafe { sys::RARRAY_LEN(self.inner) };
40
- let ptr = unsafe { sys::RARRAY_PTR(self.inner) };
41
- let mut vec: Vec<String> = Vec::with_capacity(len as usize);
42
- for i in 0..len {
43
- let val = unsafe { *ptr.offset(i) };
45
+ let slice = ruby_array_to_slice(self.inner);
46
+ let mut vec: Vec<String> = Vec::with_capacity(slice.len());
47
+ for val in slice.iter() {
44
48
  let checked = val.to_checked().unwrap();
45
49
  vec.push(checked.to_rust());
46
50
  }
47
- return VecWrap(vec);
51
+ VecWrap(vec)
48
52
  }
49
53
  }
50
54
 
@@ -53,15 +57,13 @@ impl ToRust<VecWrap<VecWrap<String>>> for CheckedValue<VecWrap<VecWrap<String>>>
53
57
  CheckedValue<VecWrap<String>>: ToRust<VecWrap<String>>
54
58
  {
55
59
  fn to_rust(self) -> VecWrap<VecWrap<String>> {
56
- let len = unsafe { sys::RARRAY_LEN(self.inner) };
57
- let ptr = unsafe { sys::RARRAY_PTR(self.inner) };
58
- let mut vec: Vec<VecWrap<String>> = Vec::with_capacity(len as usize);
59
- for i in 0..len {
60
- let val = unsafe { *ptr.offset(i) };
60
+ let slice = ruby_array_to_slice(self.inner);
61
+ let mut vec: Vec<VecWrap<String>> = Vec::with_capacity(slice.len());
62
+ for val in slice.iter() {
61
63
  let checked = val.to_checked().unwrap();
62
64
  vec.push(checked.to_rust());
63
65
  }
64
- return VecWrap(vec);
66
+ VecWrap(vec)
65
67
  }
66
68
  }
67
69
 
@@ -70,6 +72,9 @@ extern "C" {
70
72
  pub fn rb_ary_new_capa(capa: isize) -> VALUE;
71
73
  pub fn rb_ary_entry(ary: VALUE, offset: isize) -> VALUE;
72
74
  pub fn rb_ary_push(ary: VALUE, item: VALUE) -> VALUE;
75
+ pub fn rb_block_given_p() -> c_int;
76
+ pub fn rb_yield(value: VALUE);
77
+ pub fn rb_funcall(value: VALUE, name: ID, nargs: c_int, ...) -> VALUE;
73
78
  }
74
79
 
75
80
  impl ToRuby for VecWrap<csv::StringRecord> {
@@ -96,26 +101,137 @@ fn generate_lines(rows: VecWrap<VecWrap<String>>) -> Result<String, Box<Error>>
96
101
  wtr.write_record(&(row.0))?;
97
102
  }
98
103
 
99
- return Ok(String::from_utf8(wtr.into_inner()?)?);
104
+ Ok(String::from_utf8(wtr.into_inner()?)?)
100
105
  }
101
106
 
102
- fn parse_csv(data: String) -> Result<Vec<csv::StringRecord>, csv::Error> {
103
- let mut reader = csv::ReaderBuilder::new()
107
+ fn record_to_ruby(record: &csv::ByteRecord) -> VALUE {
108
+ let inner_array = unsafe { rb_ary_new_capa(record.len() as isize) };
109
+ for column in record.iter() {
110
+ unsafe {
111
+ let column_value = sys::rb_utf8_str_new(column.as_ptr() as *const i8,
112
+ column.len() as i64);
113
+ rb_ary_push(inner_array, column_value);
114
+ }
115
+ }
116
+ inner_array
117
+ }
118
+
119
+
120
+ impl UncheckedValue<Enumerator> for VALUE {
121
+ fn to_checked(self) -> CheckResult<Enumerator> {
122
+ Ok(unsafe { CheckedValue::new(self) })
123
+ }
124
+ }
125
+
126
+ impl ToRust<Enumerator> for CheckedValue<Enumerator> {
127
+ fn to_rust(self) -> Enumerator {
128
+ Enumerator { value: self.inner }
129
+ }
130
+ }
131
+
132
+ struct Enumerator {
133
+ value: VALUE,
134
+ }
135
+
136
+ struct EnumeratorRead {
137
+ value: VALUE,
138
+ next: Option<Vec<u8>>,
139
+ }
140
+
141
+ impl EnumeratorRead {
142
+ fn new(value: VALUE) -> EnumeratorRead {
143
+ EnumeratorRead {
144
+ value: value,
145
+ next: None,
146
+ }
147
+ }
148
+
149
+ fn read_and_store_overflow(&mut self, buf: &mut [u8], value: &[u8]) -> std::io::Result<usize> {
150
+ if value.len() > buf.len() {
151
+ match value.split_at(buf.len()) {
152
+ (current, next) => {
153
+ for (index, c) in current.iter().enumerate() {
154
+ buf[index] = *c;
155
+ }
156
+ self.next = Some(next.to_vec());
157
+ Ok(current.len())
158
+ }
159
+ }
160
+
161
+ } else {
162
+ for (index, value) in value.iter().enumerate() {
163
+ buf[index] = *value;
164
+ }
165
+ self.next = None;
166
+ Ok(value.len() as usize)
167
+ }
168
+ }
169
+
170
+ fn read_from_external(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
171
+ let next = unsafe {
172
+ rb_funcall(self.value,
173
+ sys::rb_intern("next\0".as_ptr() as *const i8),
174
+ 0)
175
+ };
176
+ let slice = unsafe {
177
+ from_raw_parts(sys::RSTRING_PTR(next) as *const u8,
178
+ sys::RSTRING_LEN(next) as usize)
179
+ };
180
+
181
+ self.read_and_store_overflow(buf, slice)
182
+ }
183
+ }
184
+
185
+ impl Read for EnumeratorRead {
186
+ fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
187
+ match self.next.clone() {
188
+ Some(inner) => self.read_and_store_overflow(buf, &inner),
189
+ None => self.read_from_external(buf),
190
+ }
191
+ }
192
+ }
193
+
194
+ fn csv_reader<R: Read>(reader: R) -> csv::Reader<R> {
195
+ csv::ReaderBuilder::new()
196
+ .buffer_capacity(16 * 1024)
104
197
  .has_headers(false)
105
- .from_reader(data.as_bytes());
106
- let records = reader
198
+ .from_reader(reader)
199
+ }
200
+
201
+ fn yield_csv(data: Enumerator) -> Result<(), csv::Error> {
202
+ let mut reader = csv_reader(EnumeratorRead::new(data.value));
203
+ let mut record = csv::ByteRecord::new();
204
+
205
+ while reader.read_byte_record(&mut record)? {
206
+ let inner_array = record_to_ruby(&record);
207
+ unsafe {
208
+ rb_yield(inner_array);
209
+ }
210
+ }
211
+
212
+ Ok(())
213
+ }
214
+
215
+ fn parse_csv(data: String) -> Result<Vec<csv::StringRecord>, csv::Error> {
216
+ let mut reader = csv_reader(data.as_bytes());
217
+ reader
107
218
  .records()
108
- .collect::<Result<Vec<csv::StringRecord>, csv::Error>>();
109
- return records;
219
+ .collect::<Result<Vec<csv::StringRecord>, csv::Error>>()
110
220
  }
111
221
 
112
222
  ruby! {
113
223
  class RscsvReader {
224
+ def each_internal(data: Enumerator) {
225
+ match yield_csv(data) {
226
+ Err(_) => throw!("Error parsing CSV"),
227
+ Ok(_) => ()
228
+ }
229
+ }
114
230
  def parse(data: String) -> VecWrap<csv::StringRecord> {
115
231
  match parse_csv(data) {
116
232
  Err(_) => throw!("Error parsing CSV"),
117
- Ok(result) => return VecWrap(result)
118
- };
233
+ Ok(result) => VecWrap(result)
234
+ }
119
235
  }
120
236
  }
121
237
  class RscsvWriter {
@@ -124,8 +240,8 @@ ruby! {
124
240
  let result = wtr.write_record(&(row.0));
125
241
  match result {
126
242
  Err(_) => throw!("Error generating csv"),
127
- Ok(_) => return String::from_utf8(wtr.into_inner().unwrap()).unwrap(),
128
- };
243
+ Ok(_) => String::from_utf8(wtr.into_inner().unwrap()).unwrap(),
244
+ }
129
245
  }
130
246
 
131
247
  def generate_lines(rows: VecWrap<VecWrap<String>>) -> String {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rscsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ville Lautanala
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-05-25 00:00:00.000000000 Z
11
+ date: 2017-05-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: helix_runtime