excelsior 0.0.9 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/ext/excelsior_reader/excelsior_reader.c +92 -32
- data/ext/excelsior_reader/excelsior_reader.rl +48 -7
- data/lib/excelsior.rb +1 -1
- metadata +2 -2
data/Rakefile
CHANGED
@@ -8,7 +8,7 @@ PKG_FILES = %w(Rakefile) +
|
|
8
8
|
|
9
9
|
gem_spec = Gem::Specification.new do |gem_spec|
|
10
10
|
gem_spec.name = 'excelsior'
|
11
|
-
gem_spec.version = '0.0
|
11
|
+
gem_spec.version = '0.1.0'
|
12
12
|
gem_spec.summary = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
|
13
13
|
gem_spec.description = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
|
14
14
|
gem_spec.email = 'matt@toastyapps.com'
|
@@ -1,18 +1,22 @@
|
|
1
1
|
|
2
|
-
#line 1 "
|
2
|
+
#line 1 "excelsior_reader.rl"
|
3
3
|
#include <ruby.h>
|
4
4
|
|
5
5
|
static ID s_read;
|
6
6
|
VALUE arr;
|
7
|
+
VALUE header_row;
|
8
|
+
int row_index = 0;
|
9
|
+
int header = 0;
|
10
|
+
int is_header_row = 0;
|
7
11
|
int has_found = 0;
|
8
12
|
#define BUFSIZE 16384
|
9
13
|
|
10
14
|
|
11
|
-
#line
|
15
|
+
#line 46 "excelsior_reader.rl"
|
12
16
|
|
13
17
|
|
14
18
|
|
15
|
-
#line
|
19
|
+
#line 20 "excelsior_reader.c"
|
16
20
|
static const char _excelsior_scan_actions[] = {
|
17
21
|
0, 1, 2, 1, 7, 1, 8, 1,
|
18
22
|
9, 1, 10, 1, 11, 2, 0, 1,
|
@@ -71,7 +75,7 @@ static const int excelsior_scan_error = 0;
|
|
71
75
|
static const int excelsior_scan_en_main = 2;
|
72
76
|
|
73
77
|
|
74
|
-
#line
|
78
|
+
#line 49 "excelsior_reader.rl"
|
75
79
|
|
76
80
|
|
77
81
|
VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
@@ -82,17 +86,24 @@ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
|
82
86
|
|
83
87
|
has_found = 0;
|
84
88
|
VALUE io;
|
89
|
+
VALUE options;
|
85
90
|
int is_io = 0;
|
86
91
|
int done = 0;
|
87
92
|
|
88
93
|
arr = rb_ary_new();
|
89
|
-
rb_scan_args(argc, argv, "
|
90
|
-
|
94
|
+
rb_scan_args(argc, argv, "11", &io, &options);
|
95
|
+
if(options != Qnil) {
|
96
|
+
header = rb_hash_aref(options, ID2SYM(rb_intern("header"))) == 2;
|
97
|
+
}
|
98
|
+
if(header == 1) {
|
99
|
+
is_header_row = 1;
|
100
|
+
header_row = rb_ary_new();
|
101
|
+
}
|
91
102
|
is_io = rb_respond_to(io, s_read);
|
92
103
|
buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
|
93
104
|
|
94
105
|
|
95
|
-
#line
|
106
|
+
#line 107 "excelsior_reader.c"
|
96
107
|
{
|
97
108
|
cs = excelsior_scan_start;
|
98
109
|
ts = 0;
|
@@ -100,7 +111,7 @@ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
|
100
111
|
act = 0;
|
101
112
|
}
|
102
113
|
|
103
|
-
#line
|
114
|
+
#line 76 "excelsior_reader.rl"
|
104
115
|
|
105
116
|
while(!done) {
|
106
117
|
|
@@ -133,7 +144,7 @@ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
|
133
144
|
}
|
134
145
|
|
135
146
|
|
136
|
-
#line
|
147
|
+
#line 148 "excelsior_reader.c"
|
137
148
|
{
|
138
149
|
int _klen;
|
139
150
|
unsigned int _trans;
|
@@ -151,10 +162,10 @@ _resume:
|
|
151
162
|
while ( _nacts-- > 0 ) {
|
152
163
|
switch ( *_acts++ ) {
|
153
164
|
case 2:
|
154
|
-
#line 1 "
|
165
|
+
#line 1 "excelsior_reader.rl"
|
155
166
|
{ts = p;}
|
156
167
|
break;
|
157
|
-
#line
|
168
|
+
#line 169 "excelsior_reader.c"
|
158
169
|
}
|
159
170
|
}
|
160
171
|
|
@@ -220,48 +231,86 @@ _eof_trans:
|
|
220
231
|
switch ( *_acts++ )
|
221
232
|
{
|
222
233
|
case 3:
|
223
|
-
#line 1 "
|
234
|
+
#line 1 "excelsior_reader.rl"
|
224
235
|
{te = p+1;}
|
225
236
|
break;
|
226
237
|
case 4:
|
227
|
-
#line
|
238
|
+
#line 41 "excelsior_reader.rl"
|
228
239
|
{act = 2;}
|
229
240
|
break;
|
230
241
|
case 5:
|
231
|
-
#line
|
242
|
+
#line 42 "excelsior_reader.rl"
|
232
243
|
{act = 3;}
|
233
244
|
break;
|
234
245
|
case 6:
|
235
|
-
#line
|
246
|
+
#line 43 "excelsior_reader.rl"
|
236
247
|
{act = 4;}
|
237
248
|
break;
|
238
249
|
case 7:
|
239
|
-
#line
|
240
|
-
{te = p+1;{
|
250
|
+
#line 21 "excelsior_reader.rl"
|
251
|
+
{te = p+1;{
|
252
|
+
if(has_found ==0) {
|
253
|
+
rb_ary_push((is_header_row ? header_row : arr), Qnil);
|
254
|
+
}
|
255
|
+
if(!is_header_row) {
|
256
|
+
if(header == 1) {
|
257
|
+
VALUE hash = rb_hash_new();
|
258
|
+
int i = 0;
|
259
|
+
for(i = 0; i < RARRAY_LEN(header_row); i++) {
|
260
|
+
rb_hash_aset(hash, rb_ary_entry(header_row, i), rb_ary_entry(arr, i));
|
261
|
+
}
|
262
|
+
rb_yield(hash);
|
263
|
+
} else {
|
264
|
+
rb_yield(arr);
|
265
|
+
}
|
266
|
+
}
|
267
|
+
arr = rb_ary_new();
|
268
|
+
has_found = 0;
|
269
|
+
is_header_row = 0;
|
270
|
+
}}
|
241
271
|
break;
|
242
272
|
case 8:
|
243
|
-
#line
|
244
|
-
{te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
|
273
|
+
#line 44 "excelsior_reader.rl"
|
274
|
+
{te = p+1;{ if(has_found == 0) rb_ary_push((is_header_row ? header_row : arr), Qnil); has_found = 0;}}
|
245
275
|
break;
|
246
276
|
case 9:
|
247
|
-
#line
|
248
|
-
{te = p;p--;{
|
277
|
+
#line 21 "excelsior_reader.rl"
|
278
|
+
{te = p;p--;{
|
279
|
+
if(has_found ==0) {
|
280
|
+
rb_ary_push((is_header_row ? header_row : arr), Qnil);
|
281
|
+
}
|
282
|
+
if(!is_header_row) {
|
283
|
+
if(header == 1) {
|
284
|
+
VALUE hash = rb_hash_new();
|
285
|
+
int i = 0;
|
286
|
+
for(i = 0; i < RARRAY_LEN(header_row); i++) {
|
287
|
+
rb_hash_aset(hash, rb_ary_entry(header_row, i), rb_ary_entry(arr, i));
|
288
|
+
}
|
289
|
+
rb_yield(hash);
|
290
|
+
} else {
|
291
|
+
rb_yield(arr);
|
292
|
+
}
|
293
|
+
}
|
294
|
+
arr = rb_ary_new();
|
295
|
+
has_found = 0;
|
296
|
+
is_header_row = 0;
|
297
|
+
}}
|
249
298
|
break;
|
250
299
|
case 10:
|
251
|
-
#line
|
252
|
-
{te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
|
300
|
+
#line 43 "excelsior_reader.rl"
|
301
|
+
{te = p;p--;{ rb_ary_push((is_header_row ? header_row : arr), rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
|
253
302
|
break;
|
254
303
|
case 11:
|
255
|
-
#line 1 "
|
304
|
+
#line 1 "excelsior_reader.rl"
|
256
305
|
{ switch( act ) {
|
257
306
|
case 0:
|
258
307
|
{{cs = 0; goto _again;}}
|
259
308
|
break;
|
260
309
|
case 3:
|
261
|
-
{{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;}
|
310
|
+
{{p = ((te))-1;} rb_ary_push((is_header_row ? header_row : arr), rb_str_new(ts, te - ts)); has_found = 1;}
|
262
311
|
break;
|
263
312
|
case 4:
|
264
|
-
{{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
|
313
|
+
{{p = ((te))-1;} rb_ary_push((is_header_row ? header_row : arr), rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
|
265
314
|
break;
|
266
315
|
default:
|
267
316
|
{{p = ((te))-1;}}
|
@@ -269,7 +318,7 @@ _eof_trans:
|
|
269
318
|
}
|
270
319
|
}
|
271
320
|
break;
|
272
|
-
#line
|
321
|
+
#line 322 "excelsior_reader.c"
|
273
322
|
}
|
274
323
|
}
|
275
324
|
|
@@ -279,14 +328,14 @@ _again:
|
|
279
328
|
while ( _nacts-- > 0 ) {
|
280
329
|
switch ( *_acts++ ) {
|
281
330
|
case 0:
|
282
|
-
#line 1 "
|
331
|
+
#line 1 "excelsior_reader.rl"
|
283
332
|
{ts = 0;}
|
284
333
|
break;
|
285
334
|
case 1:
|
286
|
-
#line 1 "
|
335
|
+
#line 1 "excelsior_reader.rl"
|
287
336
|
{act = 0;}
|
288
337
|
break;
|
289
|
-
#line
|
338
|
+
#line 339 "excelsior_reader.c"
|
290
339
|
}
|
291
340
|
}
|
292
341
|
|
@@ -306,7 +355,7 @@ _again:
|
|
306
355
|
_out: {}
|
307
356
|
}
|
308
357
|
|
309
|
-
#line
|
358
|
+
#line 108 "excelsior_reader.rl"
|
310
359
|
|
311
360
|
if(ts != 0) { // we are not at the end
|
312
361
|
have = pe - ts; //so copy stuff back in
|
@@ -318,7 +367,18 @@ _again:
|
|
318
367
|
}
|
319
368
|
|
320
369
|
if(RARRAY_LEN(arr) > 0) { // have a last array to yield
|
321
|
-
|
370
|
+
if(!is_header_row) {
|
371
|
+
if(header == 1) {
|
372
|
+
VALUE hash = rb_hash_new();
|
373
|
+
int i = 0;
|
374
|
+
for(i = 0; i < RARRAY_LEN(header_row); i++) {
|
375
|
+
rb_hash_aset(hash, rb_ary_entry(header_row, i), rb_ary_entry(arr, i));
|
376
|
+
}
|
377
|
+
rb_yield(hash);
|
378
|
+
} else {
|
379
|
+
rb_yield(arr);
|
380
|
+
}
|
381
|
+
}
|
322
382
|
}
|
323
383
|
|
324
384
|
return Qnil;
|
@@ -2,6 +2,10 @@
|
|
2
2
|
|
3
3
|
static ID s_read;
|
4
4
|
VALUE arr;
|
5
|
+
VALUE header_row;
|
6
|
+
int row_index = 0;
|
7
|
+
int header = 0;
|
8
|
+
int is_header_row = 0;
|
5
9
|
int has_found = 0;
|
6
10
|
#define BUFSIZE 16384
|
7
11
|
|
@@ -14,11 +18,30 @@ int has_found = 0;
|
|
14
18
|
string = '"' (string_character | '""')* '"' ;
|
15
19
|
value = letter+;
|
16
20
|
main := |*
|
17
|
-
newline {
|
21
|
+
newline {
|
22
|
+
if(has_found ==0) {
|
23
|
+
rb_ary_push((is_header_row ? header_row : arr), Qnil);
|
24
|
+
}
|
25
|
+
if(!is_header_row) {
|
26
|
+
if(header == 1) {
|
27
|
+
VALUE hash = rb_hash_new();
|
28
|
+
int i = 0;
|
29
|
+
for(i = 0; i < RARRAY_LEN(header_row); i++) {
|
30
|
+
rb_hash_aset(hash, rb_ary_entry(header_row, i), rb_ary_entry(arr, i));
|
31
|
+
}
|
32
|
+
rb_yield(hash);
|
33
|
+
} else {
|
34
|
+
rb_yield(arr);
|
35
|
+
}
|
36
|
+
}
|
37
|
+
arr = rb_ary_new();
|
38
|
+
has_found = 0;
|
39
|
+
is_header_row = 0;
|
40
|
+
};
|
18
41
|
space;
|
19
|
-
value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
|
20
|
-
string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
|
21
|
-
delimiter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
|
42
|
+
value { rb_ary_push((is_header_row ? header_row : arr), rb_str_new(ts, te - ts)); has_found = 1;};
|
43
|
+
string { rb_ary_push((is_header_row ? header_row : arr), rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
|
44
|
+
delimiter { if(has_found == 0) rb_ary_push((is_header_row ? header_row : arr), Qnil); has_found = 0;};
|
22
45
|
*|;
|
23
46
|
}%%
|
24
47
|
|
@@ -33,12 +56,19 @@ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
|
33
56
|
|
34
57
|
has_found = 0;
|
35
58
|
VALUE io;
|
59
|
+
VALUE options;
|
36
60
|
int is_io = 0;
|
37
61
|
int done = 0;
|
38
62
|
|
39
63
|
arr = rb_ary_new();
|
40
|
-
rb_scan_args(argc, argv, "
|
41
|
-
|
64
|
+
rb_scan_args(argc, argv, "11", &io, &options);
|
65
|
+
if(options != Qnil) {
|
66
|
+
header = rb_hash_aref(options, ID2SYM(rb_intern("header"))) == 2;
|
67
|
+
}
|
68
|
+
if(header == 1) {
|
69
|
+
is_header_row = 1;
|
70
|
+
header_row = rb_ary_new();
|
71
|
+
}
|
42
72
|
is_io = rb_respond_to(io, s_read);
|
43
73
|
buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
|
44
74
|
|
@@ -86,7 +116,18 @@ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
|
86
116
|
}
|
87
117
|
|
88
118
|
if(RARRAY_LEN(arr) > 0) { // have a last array to yield
|
89
|
-
|
119
|
+
if(!is_header_row) {
|
120
|
+
if(header == 1) {
|
121
|
+
VALUE hash = rb_hash_new();
|
122
|
+
int i = 0;
|
123
|
+
for(i = 0; i < RARRAY_LEN(header_row); i++) {
|
124
|
+
rb_hash_aset(hash, rb_ary_entry(header_row, i), rb_ary_entry(arr, i));
|
125
|
+
}
|
126
|
+
rb_yield(hash);
|
127
|
+
} else {
|
128
|
+
rb_yield(arr);
|
129
|
+
}
|
130
|
+
}
|
90
131
|
}
|
91
132
|
|
92
133
|
return Qnil;
|
data/lib/excelsior.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: excelsior
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matthew Mongeau
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2010-01-04 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|