rscsv 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/lib/rscsv.rb +8 -0
- data/lib/rscsv/version.rb +1 -1
- data/src/lib.rs +144 -28
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cb17bc325379d4ebf7d2c7e088b84495253c62d4
|
|
4
|
+
data.tar.gz: e379a8d05d9a39d551e8a118ace70c394598e187
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3fb8a756221d7b8e293c11ca885e2d935d90ea3abda2b451e0cbd29dd25d63cf2602e0d0109a01239935363fcc822c741b74750727eb29acbe458a95a7e85787
|
|
7
|
+
data.tar.gz: 13dfae6dee17a62a51cd64dcb2503f2472066db729d05fd925b01ddb304bb61a551a591cf2813c92b9739b358eb3bd28459e70f8f16411a8ff1f7f734248c367
|
data/README.md
CHANGED
|
@@ -24,8 +24,13 @@ Rscsv::Writer.generate_lines([['1', '2', '3'], ['3', '4', '5']])
|
|
|
24
24
|
Rscsv::Writer.generate_line(['1', '2', '3'])
|
|
25
25
|
# => 1,2,3\n
|
|
26
26
|
|
|
27
|
-
Rscsv::Reader.parse("1,2,3\n4,5,6")
|
|
27
|
+
Rscsv::Reader.parse("1,2,3\n4,5,6\n")
|
|
28
28
|
# => [["1", "2", "3"], ["4", "5", "6"]]
|
|
29
|
+
|
|
30
|
+
# Streaming from Enumerator
|
|
31
|
+
Rscsv::Reader.each(["1,2,3\n","4,5,6\n"].each) do |row|
|
|
32
|
+
# yields ["1", "2", "3"] and ["4", "5", "6"]
|
|
33
|
+
end
|
|
29
34
|
```
|
|
30
35
|
|
|
31
36
|
This is ~3x faster than using native Ruby `CSV.generate` or `CSV.parse`.
|
data/lib/rscsv.rb
CHANGED
data/lib/rscsv/version.rb
CHANGED
data/src/lib.rs
CHANGED
|
@@ -3,9 +3,12 @@ extern crate helix;
|
|
|
3
3
|
extern crate csv;
|
|
4
4
|
|
|
5
5
|
use std::error::Error;
|
|
6
|
+
use std::io::Read;
|
|
7
|
+
use std::slice::from_raw_parts;
|
|
6
8
|
use helix::sys;
|
|
7
|
-
use helix::sys::VALUE;
|
|
9
|
+
use helix::sys::{VALUE, ID};
|
|
8
10
|
use helix::{UncheckedValue, CheckResult, CheckedValue, ToRust, ToRuby};
|
|
11
|
+
use helix::libc::c_int;
|
|
9
12
|
|
|
10
13
|
struct VecWrap<T>(Vec<T>);
|
|
11
14
|
|
|
@@ -14,10 +17,8 @@ impl<T> UncheckedValue<VecWrap<T>> for VALUE
|
|
|
14
17
|
{
|
|
15
18
|
fn to_checked(self) -> CheckResult<VecWrap<T>> {
|
|
16
19
|
if unsafe { sys::RB_TYPE_P(self, sys::T_ARRAY) } {
|
|
17
|
-
let
|
|
18
|
-
|
|
19
|
-
for i in 0..len {
|
|
20
|
-
let val = unsafe { *ptr.offset(i) };
|
|
20
|
+
let slice = ruby_array_to_slice(self);
|
|
21
|
+
for val in slice.iter() {
|
|
21
22
|
if let Err(error) = val.to_checked() {
|
|
22
23
|
return Err(format!("Failed to convert value for Vec<T>: {}", error));
|
|
23
24
|
}
|
|
@@ -31,20 +32,23 @@ impl<T> UncheckedValue<VecWrap<T>> for VALUE
|
|
|
31
32
|
}
|
|
32
33
|
}
|
|
33
34
|
|
|
35
|
+
fn ruby_array_to_slice<'a>(array: VALUE) -> &'a [VALUE] {
|
|
36
|
+
let length = unsafe { sys::RARRAY_LEN(array) } as usize;
|
|
37
|
+
unsafe { from_raw_parts(sys::RARRAY_CONST_PTR(array), length) }
|
|
38
|
+
}
|
|
39
|
+
|
|
34
40
|
impl ToRust<VecWrap<String>> for CheckedValue<VecWrap<String>>
|
|
35
41
|
where VALUE: UncheckedValue<String>,
|
|
36
42
|
CheckedValue<String>: ToRust<String>
|
|
37
43
|
{
|
|
38
44
|
fn to_rust(self) -> VecWrap<String> {
|
|
39
|
-
let
|
|
40
|
-
let
|
|
41
|
-
|
|
42
|
-
for i in 0..len {
|
|
43
|
-
let val = unsafe { *ptr.offset(i) };
|
|
45
|
+
let slice = ruby_array_to_slice(self.inner);
|
|
46
|
+
let mut vec: Vec<String> = Vec::with_capacity(slice.len());
|
|
47
|
+
for val in slice.iter() {
|
|
44
48
|
let checked = val.to_checked().unwrap();
|
|
45
49
|
vec.push(checked.to_rust());
|
|
46
50
|
}
|
|
47
|
-
|
|
51
|
+
VecWrap(vec)
|
|
48
52
|
}
|
|
49
53
|
}
|
|
50
54
|
|
|
@@ -53,15 +57,13 @@ impl ToRust<VecWrap<VecWrap<String>>> for CheckedValue<VecWrap<VecWrap<String>>>
|
|
|
53
57
|
CheckedValue<VecWrap<String>>: ToRust<VecWrap<String>>
|
|
54
58
|
{
|
|
55
59
|
fn to_rust(self) -> VecWrap<VecWrap<String>> {
|
|
56
|
-
let
|
|
57
|
-
let
|
|
58
|
-
|
|
59
|
-
for i in 0..len {
|
|
60
|
-
let val = unsafe { *ptr.offset(i) };
|
|
60
|
+
let slice = ruby_array_to_slice(self.inner);
|
|
61
|
+
let mut vec: Vec<VecWrap<String>> = Vec::with_capacity(slice.len());
|
|
62
|
+
for val in slice.iter() {
|
|
61
63
|
let checked = val.to_checked().unwrap();
|
|
62
64
|
vec.push(checked.to_rust());
|
|
63
65
|
}
|
|
64
|
-
|
|
66
|
+
VecWrap(vec)
|
|
65
67
|
}
|
|
66
68
|
}
|
|
67
69
|
|
|
@@ -70,6 +72,9 @@ extern "C" {
|
|
|
70
72
|
pub fn rb_ary_new_capa(capa: isize) -> VALUE;
|
|
71
73
|
pub fn rb_ary_entry(ary: VALUE, offset: isize) -> VALUE;
|
|
72
74
|
pub fn rb_ary_push(ary: VALUE, item: VALUE) -> VALUE;
|
|
75
|
+
pub fn rb_block_given_p() -> c_int;
|
|
76
|
+
pub fn rb_yield(value: VALUE);
|
|
77
|
+
pub fn rb_funcall(value: VALUE, name: ID, nargs: c_int, ...) -> VALUE;
|
|
73
78
|
}
|
|
74
79
|
|
|
75
80
|
impl ToRuby for VecWrap<csv::StringRecord> {
|
|
@@ -96,26 +101,137 @@ fn generate_lines(rows: VecWrap<VecWrap<String>>) -> Result<String, Box<Error>>
|
|
|
96
101
|
wtr.write_record(&(row.0))?;
|
|
97
102
|
}
|
|
98
103
|
|
|
99
|
-
|
|
104
|
+
Ok(String::from_utf8(wtr.into_inner()?)?)
|
|
100
105
|
}
|
|
101
106
|
|
|
102
|
-
fn
|
|
103
|
-
let
|
|
107
|
+
fn record_to_ruby(record: &csv::ByteRecord) -> VALUE {
|
|
108
|
+
let inner_array = unsafe { rb_ary_new_capa(record.len() as isize) };
|
|
109
|
+
for column in record.iter() {
|
|
110
|
+
unsafe {
|
|
111
|
+
let column_value = sys::rb_utf8_str_new(column.as_ptr() as *const i8,
|
|
112
|
+
column.len() as i64);
|
|
113
|
+
rb_ary_push(inner_array, column_value);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
inner_array
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
impl UncheckedValue<Enumerator> for VALUE {
|
|
121
|
+
fn to_checked(self) -> CheckResult<Enumerator> {
|
|
122
|
+
Ok(unsafe { CheckedValue::new(self) })
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
impl ToRust<Enumerator> for CheckedValue<Enumerator> {
|
|
127
|
+
fn to_rust(self) -> Enumerator {
|
|
128
|
+
Enumerator { value: self.inner }
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
struct Enumerator {
|
|
133
|
+
value: VALUE,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
struct EnumeratorRead {
|
|
137
|
+
value: VALUE,
|
|
138
|
+
next: Option<Vec<u8>>,
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
impl EnumeratorRead {
|
|
142
|
+
fn new(value: VALUE) -> EnumeratorRead {
|
|
143
|
+
EnumeratorRead {
|
|
144
|
+
value: value,
|
|
145
|
+
next: None,
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
fn read_and_store_overflow(&mut self, buf: &mut [u8], value: &[u8]) -> std::io::Result<usize> {
|
|
150
|
+
if value.len() > buf.len() {
|
|
151
|
+
match value.split_at(buf.len()) {
|
|
152
|
+
(current, next) => {
|
|
153
|
+
for (index, c) in current.iter().enumerate() {
|
|
154
|
+
buf[index] = *c;
|
|
155
|
+
}
|
|
156
|
+
self.next = Some(next.to_vec());
|
|
157
|
+
Ok(current.len())
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
} else {
|
|
162
|
+
for (index, value) in value.iter().enumerate() {
|
|
163
|
+
buf[index] = *value;
|
|
164
|
+
}
|
|
165
|
+
self.next = None;
|
|
166
|
+
Ok(value.len() as usize)
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
fn read_from_external(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
|
171
|
+
let next = unsafe {
|
|
172
|
+
rb_funcall(self.value,
|
|
173
|
+
sys::rb_intern("next\0".as_ptr() as *const i8),
|
|
174
|
+
0)
|
|
175
|
+
};
|
|
176
|
+
let slice = unsafe {
|
|
177
|
+
from_raw_parts(sys::RSTRING_PTR(next) as *const u8,
|
|
178
|
+
sys::RSTRING_LEN(next) as usize)
|
|
179
|
+
};
|
|
180
|
+
|
|
181
|
+
self.read_and_store_overflow(buf, slice)
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
impl Read for EnumeratorRead {
|
|
186
|
+
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
|
187
|
+
match self.next.clone() {
|
|
188
|
+
Some(inner) => self.read_and_store_overflow(buf, &inner),
|
|
189
|
+
None => self.read_from_external(buf),
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
fn csv_reader<R: Read>(reader: R) -> csv::Reader<R> {
|
|
195
|
+
csv::ReaderBuilder::new()
|
|
196
|
+
.buffer_capacity(16 * 1024)
|
|
104
197
|
.has_headers(false)
|
|
105
|
-
.from_reader(
|
|
106
|
-
|
|
198
|
+
.from_reader(reader)
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
fn yield_csv(data: Enumerator) -> Result<(), csv::Error> {
|
|
202
|
+
let mut reader = csv_reader(EnumeratorRead::new(data.value));
|
|
203
|
+
let mut record = csv::ByteRecord::new();
|
|
204
|
+
|
|
205
|
+
while reader.read_byte_record(&mut record)? {
|
|
206
|
+
let inner_array = record_to_ruby(&record);
|
|
207
|
+
unsafe {
|
|
208
|
+
rb_yield(inner_array);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
Ok(())
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
fn parse_csv(data: String) -> Result<Vec<csv::StringRecord>, csv::Error> {
|
|
216
|
+
let mut reader = csv_reader(data.as_bytes());
|
|
217
|
+
reader
|
|
107
218
|
.records()
|
|
108
|
-
.collect::<Result<Vec<csv::StringRecord>, csv::Error>>()
|
|
109
|
-
return records;
|
|
219
|
+
.collect::<Result<Vec<csv::StringRecord>, csv::Error>>()
|
|
110
220
|
}
|
|
111
221
|
|
|
112
222
|
ruby! {
|
|
113
223
|
class RscsvReader {
|
|
224
|
+
def each_internal(data: Enumerator) {
|
|
225
|
+
match yield_csv(data) {
|
|
226
|
+
Err(_) => throw!("Error parsing CSV"),
|
|
227
|
+
Ok(_) => ()
|
|
228
|
+
}
|
|
229
|
+
}
|
|
114
230
|
def parse(data: String) -> VecWrap<csv::StringRecord> {
|
|
115
231
|
match parse_csv(data) {
|
|
116
232
|
Err(_) => throw!("Error parsing CSV"),
|
|
117
|
-
Ok(result) =>
|
|
118
|
-
}
|
|
233
|
+
Ok(result) => VecWrap(result)
|
|
234
|
+
}
|
|
119
235
|
}
|
|
120
236
|
}
|
|
121
237
|
class RscsvWriter {
|
|
@@ -124,8 +240,8 @@ ruby! {
|
|
|
124
240
|
let result = wtr.write_record(&(row.0));
|
|
125
241
|
match result {
|
|
126
242
|
Err(_) => throw!("Error generating csv"),
|
|
127
|
-
Ok(_) =>
|
|
128
|
-
}
|
|
243
|
+
Ok(_) => String::from_utf8(wtr.into_inner().unwrap()).unwrap(),
|
|
244
|
+
}
|
|
129
245
|
}
|
|
130
246
|
|
|
131
247
|
def generate_lines(rows: VecWrap<VecWrap<String>>) -> String {
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rscsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ville Lautanala
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2017-05-
|
|
11
|
+
date: 2017-05-30 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: helix_runtime
|