rscsv 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/lib/rscsv.rb +8 -0
- data/lib/rscsv/version.rb +1 -1
- data/src/lib.rs +144 -28
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb17bc325379d4ebf7d2c7e088b84495253c62d4
|
4
|
+
data.tar.gz: e379a8d05d9a39d551e8a118ace70c394598e187
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3fb8a756221d7b8e293c11ca885e2d935d90ea3abda2b451e0cbd29dd25d63cf2602e0d0109a01239935363fcc822c741b74750727eb29acbe458a95a7e85787
|
7
|
+
data.tar.gz: 13dfae6dee17a62a51cd64dcb2503f2472066db729d05fd925b01ddb304bb61a551a591cf2813c92b9739b358eb3bd28459e70f8f16411a8ff1f7f734248c367
|
data/README.md
CHANGED
@@ -24,8 +24,13 @@ Rscsv::Writer.generate_lines([['1', '2', '3'], ['3', '4', '5']])
|
|
24
24
|
Rscsv::Writer.generate_line(['1', '2', '3'])
|
25
25
|
# => 1,2,3\n
|
26
26
|
|
27
|
-
Rscsv::Reader.parse("1,2,3\n4,5,6")
|
27
|
+
Rscsv::Reader.parse("1,2,3\n4,5,6\n")
|
28
28
|
# => [["1", "2", "3"], ["4", "5", "6"]]
|
29
|
+
|
30
|
+
# Streaming from Enumerator
|
31
|
+
Rscsv::Reader.each(["1,2,3\n","4,5,6\n"].each) do |row|
|
32
|
+
# yields ["1", "2", "3"] and ["4", "5", "6"]
|
33
|
+
end
|
29
34
|
```
|
30
35
|
|
31
36
|
This is ~3x faster than using native Ruby `CSV.generate` or `CSV.parse`.
|
data/lib/rscsv.rb
CHANGED
data/lib/rscsv/version.rb
CHANGED
data/src/lib.rs
CHANGED
@@ -3,9 +3,12 @@ extern crate helix;
|
|
3
3
|
extern crate csv;
|
4
4
|
|
5
5
|
use std::error::Error;
|
6
|
+
use std::io::Read;
|
7
|
+
use std::slice::from_raw_parts;
|
6
8
|
use helix::sys;
|
7
|
-
use helix::sys::VALUE;
|
9
|
+
use helix::sys::{VALUE, ID};
|
8
10
|
use helix::{UncheckedValue, CheckResult, CheckedValue, ToRust, ToRuby};
|
11
|
+
use helix::libc::c_int;
|
9
12
|
|
10
13
|
struct VecWrap<T>(Vec<T>);
|
11
14
|
|
@@ -14,10 +17,8 @@ impl<T> UncheckedValue<VecWrap<T>> for VALUE
|
|
14
17
|
{
|
15
18
|
fn to_checked(self) -> CheckResult<VecWrap<T>> {
|
16
19
|
if unsafe { sys::RB_TYPE_P(self, sys::T_ARRAY) } {
|
17
|
-
let
|
18
|
-
|
19
|
-
for i in 0..len {
|
20
|
-
let val = unsafe { *ptr.offset(i) };
|
20
|
+
let slice = ruby_array_to_slice(self);
|
21
|
+
for val in slice.iter() {
|
21
22
|
if let Err(error) = val.to_checked() {
|
22
23
|
return Err(format!("Failed to convert value for Vec<T>: {}", error));
|
23
24
|
}
|
@@ -31,20 +32,23 @@ impl<T> UncheckedValue<VecWrap<T>> for VALUE
|
|
31
32
|
}
|
32
33
|
}
|
33
34
|
|
35
|
+
fn ruby_array_to_slice<'a>(array: VALUE) -> &'a [VALUE] {
|
36
|
+
let length = unsafe { sys::RARRAY_LEN(array) } as usize;
|
37
|
+
unsafe { from_raw_parts(sys::RARRAY_CONST_PTR(array), length) }
|
38
|
+
}
|
39
|
+
|
34
40
|
impl ToRust<VecWrap<String>> for CheckedValue<VecWrap<String>>
|
35
41
|
where VALUE: UncheckedValue<String>,
|
36
42
|
CheckedValue<String>: ToRust<String>
|
37
43
|
{
|
38
44
|
fn to_rust(self) -> VecWrap<String> {
|
39
|
-
let
|
40
|
-
let
|
41
|
-
|
42
|
-
for i in 0..len {
|
43
|
-
let val = unsafe { *ptr.offset(i) };
|
45
|
+
let slice = ruby_array_to_slice(self.inner);
|
46
|
+
let mut vec: Vec<String> = Vec::with_capacity(slice.len());
|
47
|
+
for val in slice.iter() {
|
44
48
|
let checked = val.to_checked().unwrap();
|
45
49
|
vec.push(checked.to_rust());
|
46
50
|
}
|
47
|
-
|
51
|
+
VecWrap(vec)
|
48
52
|
}
|
49
53
|
}
|
50
54
|
|
@@ -53,15 +57,13 @@ impl ToRust<VecWrap<VecWrap<String>>> for CheckedValue<VecWrap<VecWrap<String>>>
|
|
53
57
|
CheckedValue<VecWrap<String>>: ToRust<VecWrap<String>>
|
54
58
|
{
|
55
59
|
fn to_rust(self) -> VecWrap<VecWrap<String>> {
|
56
|
-
let
|
57
|
-
let
|
58
|
-
|
59
|
-
for i in 0..len {
|
60
|
-
let val = unsafe { *ptr.offset(i) };
|
60
|
+
let slice = ruby_array_to_slice(self.inner);
|
61
|
+
let mut vec: Vec<VecWrap<String>> = Vec::with_capacity(slice.len());
|
62
|
+
for val in slice.iter() {
|
61
63
|
let checked = val.to_checked().unwrap();
|
62
64
|
vec.push(checked.to_rust());
|
63
65
|
}
|
64
|
-
|
66
|
+
VecWrap(vec)
|
65
67
|
}
|
66
68
|
}
|
67
69
|
|
@@ -70,6 +72,9 @@ extern "C" {
|
|
70
72
|
pub fn rb_ary_new_capa(capa: isize) -> VALUE;
|
71
73
|
pub fn rb_ary_entry(ary: VALUE, offset: isize) -> VALUE;
|
72
74
|
pub fn rb_ary_push(ary: VALUE, item: VALUE) -> VALUE;
|
75
|
+
pub fn rb_block_given_p() -> c_int;
|
76
|
+
pub fn rb_yield(value: VALUE);
|
77
|
+
pub fn rb_funcall(value: VALUE, name: ID, nargs: c_int, ...) -> VALUE;
|
73
78
|
}
|
74
79
|
|
75
80
|
impl ToRuby for VecWrap<csv::StringRecord> {
|
@@ -96,26 +101,137 @@ fn generate_lines(rows: VecWrap<VecWrap<String>>) -> Result<String, Box<Error>>
|
|
96
101
|
wtr.write_record(&(row.0))?;
|
97
102
|
}
|
98
103
|
|
99
|
-
|
104
|
+
Ok(String::from_utf8(wtr.into_inner()?)?)
|
100
105
|
}
|
101
106
|
|
102
|
-
fn
|
103
|
-
let
|
107
|
+
fn record_to_ruby(record: &csv::ByteRecord) -> VALUE {
|
108
|
+
let inner_array = unsafe { rb_ary_new_capa(record.len() as isize) };
|
109
|
+
for column in record.iter() {
|
110
|
+
unsafe {
|
111
|
+
let column_value = sys::rb_utf8_str_new(column.as_ptr() as *const i8,
|
112
|
+
column.len() as i64);
|
113
|
+
rb_ary_push(inner_array, column_value);
|
114
|
+
}
|
115
|
+
}
|
116
|
+
inner_array
|
117
|
+
}
|
118
|
+
|
119
|
+
|
120
|
+
impl UncheckedValue<Enumerator> for VALUE {
|
121
|
+
fn to_checked(self) -> CheckResult<Enumerator> {
|
122
|
+
Ok(unsafe { CheckedValue::new(self) })
|
123
|
+
}
|
124
|
+
}
|
125
|
+
|
126
|
+
impl ToRust<Enumerator> for CheckedValue<Enumerator> {
|
127
|
+
fn to_rust(self) -> Enumerator {
|
128
|
+
Enumerator { value: self.inner }
|
129
|
+
}
|
130
|
+
}
|
131
|
+
|
132
|
+
struct Enumerator {
|
133
|
+
value: VALUE,
|
134
|
+
}
|
135
|
+
|
136
|
+
struct EnumeratorRead {
|
137
|
+
value: VALUE,
|
138
|
+
next: Option<Vec<u8>>,
|
139
|
+
}
|
140
|
+
|
141
|
+
impl EnumeratorRead {
|
142
|
+
fn new(value: VALUE) -> EnumeratorRead {
|
143
|
+
EnumeratorRead {
|
144
|
+
value: value,
|
145
|
+
next: None,
|
146
|
+
}
|
147
|
+
}
|
148
|
+
|
149
|
+
fn read_and_store_overflow(&mut self, buf: &mut [u8], value: &[u8]) -> std::io::Result<usize> {
|
150
|
+
if value.len() > buf.len() {
|
151
|
+
match value.split_at(buf.len()) {
|
152
|
+
(current, next) => {
|
153
|
+
for (index, c) in current.iter().enumerate() {
|
154
|
+
buf[index] = *c;
|
155
|
+
}
|
156
|
+
self.next = Some(next.to_vec());
|
157
|
+
Ok(current.len())
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
} else {
|
162
|
+
for (index, value) in value.iter().enumerate() {
|
163
|
+
buf[index] = *value;
|
164
|
+
}
|
165
|
+
self.next = None;
|
166
|
+
Ok(value.len() as usize)
|
167
|
+
}
|
168
|
+
}
|
169
|
+
|
170
|
+
fn read_from_external(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
171
|
+
let next = unsafe {
|
172
|
+
rb_funcall(self.value,
|
173
|
+
sys::rb_intern("next\0".as_ptr() as *const i8),
|
174
|
+
0)
|
175
|
+
};
|
176
|
+
let slice = unsafe {
|
177
|
+
from_raw_parts(sys::RSTRING_PTR(next) as *const u8,
|
178
|
+
sys::RSTRING_LEN(next) as usize)
|
179
|
+
};
|
180
|
+
|
181
|
+
self.read_and_store_overflow(buf, slice)
|
182
|
+
}
|
183
|
+
}
|
184
|
+
|
185
|
+
impl Read for EnumeratorRead {
|
186
|
+
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
187
|
+
match self.next.clone() {
|
188
|
+
Some(inner) => self.read_and_store_overflow(buf, &inner),
|
189
|
+
None => self.read_from_external(buf),
|
190
|
+
}
|
191
|
+
}
|
192
|
+
}
|
193
|
+
|
194
|
+
fn csv_reader<R: Read>(reader: R) -> csv::Reader<R> {
|
195
|
+
csv::ReaderBuilder::new()
|
196
|
+
.buffer_capacity(16 * 1024)
|
104
197
|
.has_headers(false)
|
105
|
-
.from_reader(
|
106
|
-
|
198
|
+
.from_reader(reader)
|
199
|
+
}
|
200
|
+
|
201
|
+
fn yield_csv(data: Enumerator) -> Result<(), csv::Error> {
|
202
|
+
let mut reader = csv_reader(EnumeratorRead::new(data.value));
|
203
|
+
let mut record = csv::ByteRecord::new();
|
204
|
+
|
205
|
+
while reader.read_byte_record(&mut record)? {
|
206
|
+
let inner_array = record_to_ruby(&record);
|
207
|
+
unsafe {
|
208
|
+
rb_yield(inner_array);
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
Ok(())
|
213
|
+
}
|
214
|
+
|
215
|
+
fn parse_csv(data: String) -> Result<Vec<csv::StringRecord>, csv::Error> {
|
216
|
+
let mut reader = csv_reader(data.as_bytes());
|
217
|
+
reader
|
107
218
|
.records()
|
108
|
-
.collect::<Result<Vec<csv::StringRecord>, csv::Error>>()
|
109
|
-
return records;
|
219
|
+
.collect::<Result<Vec<csv::StringRecord>, csv::Error>>()
|
110
220
|
}
|
111
221
|
|
112
222
|
ruby! {
|
113
223
|
class RscsvReader {
|
224
|
+
def each_internal(data: Enumerator) {
|
225
|
+
match yield_csv(data) {
|
226
|
+
Err(_) => throw!("Error parsing CSV"),
|
227
|
+
Ok(_) => ()
|
228
|
+
}
|
229
|
+
}
|
114
230
|
def parse(data: String) -> VecWrap<csv::StringRecord> {
|
115
231
|
match parse_csv(data) {
|
116
232
|
Err(_) => throw!("Error parsing CSV"),
|
117
|
-
Ok(result) =>
|
118
|
-
}
|
233
|
+
Ok(result) => VecWrap(result)
|
234
|
+
}
|
119
235
|
}
|
120
236
|
}
|
121
237
|
class RscsvWriter {
|
@@ -124,8 +240,8 @@ ruby! {
|
|
124
240
|
let result = wtr.write_record(&(row.0));
|
125
241
|
match result {
|
126
242
|
Err(_) => throw!("Error generating csv"),
|
127
|
-
Ok(_) =>
|
128
|
-
}
|
243
|
+
Ok(_) => String::from_utf8(wtr.into_inner().unwrap()).unwrap(),
|
244
|
+
}
|
129
245
|
}
|
130
246
|
|
131
247
|
def generate_lines(rows: VecWrap<VecWrap<String>>) -> String {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rscsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ville Lautanala
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-05-
|
11
|
+
date: 2017-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: helix_runtime
|