toastyapps-excelsior 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/ext/excelsior_reader/excelsior_reader.c +99 -44
- data/ext/excelsior_reader/excelsior_reader.rl +82 -27
- metadata +2 -2
data/Rakefile
CHANGED
@@ -8,7 +8,7 @@ PKG_FILES = %w(Rakefile) +
|
|
8
8
|
|
9
9
|
gem_spec = Gem::Specification.new do |gem_spec|
|
10
10
|
gem_spec.name = 'excelsior'
|
11
|
-
gem_spec.version = '0.0.
|
11
|
+
gem_spec.version = '0.0.5'
|
12
12
|
gem_spec.summary = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
|
13
13
|
gem_spec.description = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
|
14
14
|
gem_spec.email = 'matt@toastyapps.com'
|
@@ -1,26 +1,18 @@
|
|
1
1
|
|
2
2
|
#line 1 "excelsior_reader.rl"
|
3
|
-
#include
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
VALUE arr;
|
11
|
-
rows = rb_ary_new();
|
12
|
-
arr = rb_ary_new();
|
13
|
-
len = RSTRING(data)->len;
|
14
|
-
p = RSTRING(data)->ptr;
|
15
|
-
pe = p + len;
|
16
|
-
eof = pe;
|
17
|
-
int has_found = 0;
|
18
|
-
|
19
|
-
#line 31 "excelsior_reader.rl"
|
3
|
+
#include <ruby.h>
|
4
|
+
|
5
|
+
static ID s_read;
|
6
|
+
VALUE arr;
|
7
|
+
int has_found = 0;
|
8
|
+
#define BUFSIZE 16384
|
9
|
+
|
20
10
|
|
11
|
+
#line 23 "excelsior_reader.rl"
|
21
12
|
|
22
13
|
|
23
|
-
|
14
|
+
|
15
|
+
#line 16 "excelsior_reader.c"
|
24
16
|
static const char _excelsior_scan_actions[] = {
|
25
17
|
0, 1, 2, 1, 7, 1, 8, 1,
|
26
18
|
9, 1, 10, 1, 11, 2, 0, 1,
|
@@ -79,9 +71,28 @@ static const int excelsior_scan_error = 0;
|
|
79
71
|
static const int excelsior_scan_en_main = 2;
|
80
72
|
|
81
73
|
|
82
|
-
#line
|
83
|
-
|
84
|
-
|
74
|
+
#line 26 "excelsior_reader.rl"
|
75
|
+
|
76
|
+
|
77
|
+
VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
78
|
+
|
79
|
+
int cs, act, have = 0, nread = 0, curline = 1, text = 0;
|
80
|
+
char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
|
81
|
+
int buffer_size = BUFSIZE;
|
82
|
+
|
83
|
+
has_found = 0;
|
84
|
+
VALUE io;
|
85
|
+
int is_io = 0;
|
86
|
+
int done = 0;
|
87
|
+
|
88
|
+
arr = rb_ary_new();
|
89
|
+
rb_scan_args(argc, argv, "1", &io);
|
90
|
+
|
91
|
+
is_io = rb_respond_to(io, s_read);
|
92
|
+
buf = ALLOC_N(char, buffer_size);
|
93
|
+
|
94
|
+
|
95
|
+
#line 96 "excelsior_reader.c"
|
85
96
|
{
|
86
97
|
cs = excelsior_scan_start;
|
87
98
|
ts = 0;
|
@@ -89,9 +100,41 @@ static const int excelsior_scan_en_main = 2;
|
|
89
100
|
act = 0;
|
90
101
|
}
|
91
102
|
|
92
|
-
#line
|
93
|
-
|
94
|
-
|
103
|
+
#line 46 "excelsior_reader.rl"
|
104
|
+
|
105
|
+
while(!done) {
|
106
|
+
|
107
|
+
int len, space = buffer_size - have;
|
108
|
+
VALUE str;
|
109
|
+
char *p, *pe;
|
110
|
+
p = buf + have;
|
111
|
+
|
112
|
+
if(is_io) {
|
113
|
+
str = rb_funcall(io, s_read, 1, INT2FIX(space));
|
114
|
+
len = RSTRING_LEN(str);
|
115
|
+
memcpy(p, StringValuePtr(str), len);
|
116
|
+
} else {
|
117
|
+
// Going to assume it's a string and already in memory
|
118
|
+
//str = io;
|
119
|
+
len = buffer_size = RSTRING_LEN(io);
|
120
|
+
memcpy(p, StringValuePtr(io), len);
|
121
|
+
space = buffer_size - have;
|
122
|
+
pe = p + buffer_size;
|
123
|
+
eof = pe;
|
124
|
+
done = 1;
|
125
|
+
}
|
126
|
+
|
127
|
+
if(len < space) {
|
128
|
+
done = 1;
|
129
|
+
//p[len++] = 0; can't seem to get it to work with this
|
130
|
+
pe = p + len;
|
131
|
+
eof = pe;
|
132
|
+
} else {
|
133
|
+
pe = p + len;
|
134
|
+
}
|
135
|
+
|
136
|
+
|
137
|
+
#line 138 "excelsior_reader.c"
|
95
138
|
{
|
96
139
|
int _klen;
|
97
140
|
unsigned int _trans;
|
@@ -112,7 +155,7 @@ _resume:
|
|
112
155
|
#line 1 "excelsior_reader.rl"
|
113
156
|
{ts = p;}
|
114
157
|
break;
|
115
|
-
#line
|
158
|
+
#line 159 "excelsior_reader.c"
|
116
159
|
}
|
117
160
|
}
|
118
161
|
|
@@ -182,31 +225,31 @@ _eof_trans:
|
|
182
225
|
{te = p+1;}
|
183
226
|
break;
|
184
227
|
case 4:
|
185
|
-
#line
|
228
|
+
#line 18 "excelsior_reader.rl"
|
186
229
|
{act = 2;}
|
187
230
|
break;
|
188
231
|
case 5:
|
189
|
-
#line
|
232
|
+
#line 19 "excelsior_reader.rl"
|
190
233
|
{act = 3;}
|
191
234
|
break;
|
192
235
|
case 6:
|
193
|
-
#line
|
236
|
+
#line 20 "excelsior_reader.rl"
|
194
237
|
{act = 4;}
|
195
238
|
break;
|
196
239
|
case 7:
|
197
|
-
#line
|
198
|
-
{te = p+1;{
|
240
|
+
#line 17 "excelsior_reader.rl"
|
241
|
+
{te = p+1;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
|
199
242
|
break;
|
200
243
|
case 8:
|
201
|
-
#line
|
244
|
+
#line 21 "excelsior_reader.rl"
|
202
245
|
{te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
|
203
246
|
break;
|
204
247
|
case 9:
|
205
|
-
#line
|
206
|
-
{te = p;p--;{
|
248
|
+
#line 17 "excelsior_reader.rl"
|
249
|
+
{te = p;p--;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
|
207
250
|
break;
|
208
251
|
case 10:
|
209
|
-
#line
|
252
|
+
#line 20 "excelsior_reader.rl"
|
210
253
|
{te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
|
211
254
|
break;
|
212
255
|
case 11:
|
@@ -227,7 +270,7 @@ _eof_trans:
|
|
227
270
|
}
|
228
271
|
}
|
229
272
|
break;
|
230
|
-
#line
|
273
|
+
#line 274 "excelsior_reader.c"
|
231
274
|
}
|
232
275
|
}
|
233
276
|
|
@@ -244,7 +287,7 @@ _again:
|
|
244
287
|
#line 1 "excelsior_reader.rl"
|
245
288
|
{act = 0;}
|
246
289
|
break;
|
247
|
-
#line
|
290
|
+
#line 291 "excelsior_reader.c"
|
248
291
|
}
|
249
292
|
}
|
250
293
|
|
@@ -264,18 +307,30 @@ _again:
|
|
264
307
|
_out: {}
|
265
308
|
}
|
266
309
|
|
267
|
-
#line
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
310
|
+
#line 79 "excelsior_reader.rl"
|
311
|
+
|
312
|
+
if(ts != 0) { // we are not at the end
|
313
|
+
have = pe - ts; //so copy stuff back in
|
314
|
+
memmove(buf, ts, have);
|
315
|
+
te = buf + (te - ts);
|
316
|
+
ts = buf;
|
317
|
+
}
|
318
|
+
|
319
|
+
}
|
320
|
+
|
321
|
+
if(RARRAY_LEN(arr) > 0) { // have a last array to yield
|
322
|
+
rb_yield(arr);
|
323
|
+
}
|
324
|
+
|
325
|
+
return Qnil;
|
272
326
|
}
|
273
327
|
|
274
328
|
VALUE mExcelsior;
|
275
329
|
VALUE cReader;
|
276
330
|
|
277
331
|
void Init_excelsior_reader() {
|
278
|
-
|
279
|
-
|
280
|
-
|
332
|
+
s_read = rb_intern("read");
|
333
|
+
mExcelsior = rb_define_module("Excelsior");
|
334
|
+
cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
|
335
|
+
rb_define_singleton_method(cReader, "rows", e_rows, -1);
|
281
336
|
}
|
@@ -1,19 +1,11 @@
|
|
1
|
-
#include
|
1
|
+
#include <ruby.h>
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
rows = rb_ary_new();
|
10
|
-
arr = rb_ary_new();
|
11
|
-
len = RSTRING(data)->len;
|
12
|
-
p = RSTRING(data)->ptr;
|
13
|
-
pe = p + len;
|
14
|
-
eof = pe;
|
15
|
-
int has_found = 0;
|
16
|
-
%%{
|
3
|
+
static ID s_read;
|
4
|
+
VALUE arr;
|
5
|
+
int has_found = 0;
|
6
|
+
#define BUFSIZE 16384
|
7
|
+
|
8
|
+
%%{
|
17
9
|
machine excelsior_scan;
|
18
10
|
delimeter = ",";
|
19
11
|
newline = "\r"? "\n" | "\r" | "\n";
|
@@ -22,28 +14,91 @@ VALUE e_parse(VALUE self, VALUE data) {
|
|
22
14
|
string = '"' (schar | '""')* '"' ;
|
23
15
|
value = letter+;
|
24
16
|
main := |*
|
25
|
-
newline {
|
17
|
+
newline { rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
|
26
18
|
space;
|
27
19
|
value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
|
28
20
|
string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
|
29
21
|
delimeter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
|
30
22
|
*|;
|
31
|
-
|
23
|
+
}%%
|
24
|
+
|
25
|
+
%% write data nofinal;
|
26
|
+
|
32
27
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
28
|
+
VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
29
|
+
|
30
|
+
int cs, act, have = 0, nread = 0, curline = 1, text = 0;
|
31
|
+
char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
|
32
|
+
int buffer_size = BUFSIZE;
|
33
|
+
|
34
|
+
has_found = 0;
|
35
|
+
VALUE io;
|
36
|
+
int is_io = 0;
|
37
|
+
int done = 0;
|
38
|
+
|
39
|
+
arr = rb_ary_new();
|
40
|
+
rb_scan_args(argc, argv, "1", &io);
|
41
|
+
|
42
|
+
is_io = rb_respond_to(io, s_read);
|
43
|
+
buf = ALLOC_N(char, buffer_size);
|
44
|
+
|
45
|
+
%% write init;
|
46
|
+
|
47
|
+
while(!done) {
|
48
|
+
|
49
|
+
int len, space = buffer_size - have;
|
50
|
+
VALUE str;
|
51
|
+
char *p, *pe;
|
52
|
+
p = buf + have;
|
53
|
+
|
54
|
+
if(is_io) {
|
55
|
+
str = rb_funcall(io, s_read, 1, INT2FIX(space));
|
56
|
+
len = RSTRING_LEN(str);
|
57
|
+
memcpy(p, StringValuePtr(str), len);
|
58
|
+
} else {
|
59
|
+
// Going to assume it's a string and already in memory
|
60
|
+
//str = io;
|
61
|
+
len = buffer_size = RSTRING_LEN(io);
|
62
|
+
memcpy(p, StringValuePtr(io), len);
|
63
|
+
space = buffer_size - have;
|
64
|
+
pe = p + buffer_size;
|
65
|
+
eof = pe;
|
66
|
+
done = 1;
|
67
|
+
}
|
68
|
+
|
69
|
+
if(len < space) {
|
70
|
+
done = 1;
|
71
|
+
//p[len++] = 0; can't seem to get it to work with this
|
72
|
+
pe = p + len;
|
73
|
+
eof = pe;
|
74
|
+
} else {
|
75
|
+
pe = p + len;
|
76
|
+
}
|
77
|
+
|
78
|
+
%% write exec;
|
79
|
+
|
80
|
+
if(ts != 0) { // we are not at the end
|
81
|
+
have = pe - ts; //so copy stuff back in
|
82
|
+
memmove(buf, ts, have);
|
83
|
+
te = buf + (te - ts);
|
84
|
+
ts = buf;
|
85
|
+
}
|
86
|
+
|
87
|
+
}
|
88
|
+
|
89
|
+
if(RARRAY_LEN(arr) > 0) { // have a last array to yield
|
90
|
+
rb_yield(arr);
|
91
|
+
}
|
92
|
+
|
93
|
+
return Qnil;
|
40
94
|
}
|
41
95
|
|
42
96
|
VALUE mExcelsior;
|
43
97
|
VALUE cReader;
|
44
98
|
|
45
99
|
void Init_excelsior_reader() {
|
46
|
-
|
47
|
-
|
48
|
-
|
100
|
+
s_read = rb_intern("read");
|
101
|
+
mExcelsior = rb_define_module("Excelsior");
|
102
|
+
cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
|
103
|
+
rb_define_singleton_method(cReader, "rows", e_rows, -1);
|
49
104
|
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: toastyapps-excelsior
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matthew Mongeau
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-08-
|
12
|
+
date: 2009-08-20 21:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|