excelsior 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/ext/excelsior_reader/excelsior_reader.c +92 -32
- data/ext/excelsior_reader/excelsior_reader.rl +48 -7
- data/lib/excelsior.rb +1 -1
- metadata +2 -2
data/Rakefile
CHANGED
@@ -8,7 +8,7 @@ PKG_FILES = %w(Rakefile) +
|
|
8
8
|
|
9
9
|
gem_spec = Gem::Specification.new do |gem_spec|
|
10
10
|
gem_spec.name = 'excelsior'
|
11
|
-
gem_spec.version = '0.0
|
11
|
+
gem_spec.version = '0.1.0'
|
12
12
|
gem_spec.summary = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
|
13
13
|
gem_spec.description = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
|
14
14
|
gem_spec.email = 'matt@toastyapps.com'
|
@@ -1,18 +1,22 @@
|
|
1
1
|
|
2
|
-
#line 1 "
|
2
|
+
#line 1 "excelsior_reader.rl"
|
3
3
|
#include <ruby.h>
|
4
4
|
|
5
5
|
static ID s_read;
|
6
6
|
VALUE arr;
|
7
|
+
VALUE header_row;
|
8
|
+
int row_index = 0;
|
9
|
+
int header = 0;
|
10
|
+
int is_header_row = 0;
|
7
11
|
int has_found = 0;
|
8
12
|
#define BUFSIZE 16384
|
9
13
|
|
10
14
|
|
11
|
-
#line
|
15
|
+
#line 46 "excelsior_reader.rl"
|
12
16
|
|
13
17
|
|
14
18
|
|
15
|
-
#line
|
19
|
+
#line 20 "excelsior_reader.c"
|
16
20
|
static const char _excelsior_scan_actions[] = {
|
17
21
|
0, 1, 2, 1, 7, 1, 8, 1,
|
18
22
|
9, 1, 10, 1, 11, 2, 0, 1,
|
@@ -71,7 +75,7 @@ static const int excelsior_scan_error = 0;
|
|
71
75
|
static const int excelsior_scan_en_main = 2;
|
72
76
|
|
73
77
|
|
74
|
-
#line
|
78
|
+
#line 49 "excelsior_reader.rl"
|
75
79
|
|
76
80
|
|
77
81
|
VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
@@ -82,17 +86,24 @@ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
|
82
86
|
|
83
87
|
has_found = 0;
|
84
88
|
VALUE io;
|
89
|
+
VALUE options;
|
85
90
|
int is_io = 0;
|
86
91
|
int done = 0;
|
87
92
|
|
88
93
|
arr = rb_ary_new();
|
89
|
-
rb_scan_args(argc, argv, "
|
90
|
-
|
94
|
+
rb_scan_args(argc, argv, "11", &io, &options);
|
95
|
+
if(options != Qnil) {
|
96
|
+
header = rb_hash_aref(options, ID2SYM(rb_intern("header"))) == 2;
|
97
|
+
}
|
98
|
+
if(header == 1) {
|
99
|
+
is_header_row = 1;
|
100
|
+
header_row = rb_ary_new();
|
101
|
+
}
|
91
102
|
is_io = rb_respond_to(io, s_read);
|
92
103
|
buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
|
93
104
|
|
94
105
|
|
95
|
-
#line
|
106
|
+
#line 107 "excelsior_reader.c"
|
96
107
|
{
|
97
108
|
cs = excelsior_scan_start;
|
98
109
|
ts = 0;
|
@@ -100,7 +111,7 @@ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
|
100
111
|
act = 0;
|
101
112
|
}
|
102
113
|
|
103
|
-
#line
|
114
|
+
#line 76 "excelsior_reader.rl"
|
104
115
|
|
105
116
|
while(!done) {
|
106
117
|
|
@@ -133,7 +144,7 @@ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
|
133
144
|
}
|
134
145
|
|
135
146
|
|
136
|
-
#line
|
147
|
+
#line 148 "excelsior_reader.c"
|
137
148
|
{
|
138
149
|
int _klen;
|
139
150
|
unsigned int _trans;
|
@@ -151,10 +162,10 @@ _resume:
|
|
151
162
|
while ( _nacts-- > 0 ) {
|
152
163
|
switch ( *_acts++ ) {
|
153
164
|
case 2:
|
154
|
-
#line 1 "
|
165
|
+
#line 1 "excelsior_reader.rl"
|
155
166
|
{ts = p;}
|
156
167
|
break;
|
157
|
-
#line
|
168
|
+
#line 169 "excelsior_reader.c"
|
158
169
|
}
|
159
170
|
}
|
160
171
|
|
@@ -220,48 +231,86 @@ _eof_trans:
|
|
220
231
|
switch ( *_acts++ )
|
221
232
|
{
|
222
233
|
case 3:
|
223
|
-
#line 1 "
|
234
|
+
#line 1 "excelsior_reader.rl"
|
224
235
|
{te = p+1;}
|
225
236
|
break;
|
226
237
|
case 4:
|
227
|
-
#line
|
238
|
+
#line 41 "excelsior_reader.rl"
|
228
239
|
{act = 2;}
|
229
240
|
break;
|
230
241
|
case 5:
|
231
|
-
#line
|
242
|
+
#line 42 "excelsior_reader.rl"
|
232
243
|
{act = 3;}
|
233
244
|
break;
|
234
245
|
case 6:
|
235
|
-
#line
|
246
|
+
#line 43 "excelsior_reader.rl"
|
236
247
|
{act = 4;}
|
237
248
|
break;
|
238
249
|
case 7:
|
239
|
-
#line
|
240
|
-
{te = p+1;{
|
250
|
+
#line 21 "excelsior_reader.rl"
|
251
|
+
{te = p+1;{
|
252
|
+
if(has_found ==0) {
|
253
|
+
rb_ary_push((is_header_row ? header_row : arr), Qnil);
|
254
|
+
}
|
255
|
+
if(!is_header_row) {
|
256
|
+
if(header == 1) {
|
257
|
+
VALUE hash = rb_hash_new();
|
258
|
+
int i = 0;
|
259
|
+
for(i = 0; i < RARRAY_LEN(header_row); i++) {
|
260
|
+
rb_hash_aset(hash, rb_ary_entry(header_row, i), rb_ary_entry(arr, i));
|
261
|
+
}
|
262
|
+
rb_yield(hash);
|
263
|
+
} else {
|
264
|
+
rb_yield(arr);
|
265
|
+
}
|
266
|
+
}
|
267
|
+
arr = rb_ary_new();
|
268
|
+
has_found = 0;
|
269
|
+
is_header_row = 0;
|
270
|
+
}}
|
241
271
|
break;
|
242
272
|
case 8:
|
243
|
-
#line
|
244
|
-
{te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
|
273
|
+
#line 44 "excelsior_reader.rl"
|
274
|
+
{te = p+1;{ if(has_found == 0) rb_ary_push((is_header_row ? header_row : arr), Qnil); has_found = 0;}}
|
245
275
|
break;
|
246
276
|
case 9:
|
247
|
-
#line
|
248
|
-
{te = p;p--;{
|
277
|
+
#line 21 "excelsior_reader.rl"
|
278
|
+
{te = p;p--;{
|
279
|
+
if(has_found ==0) {
|
280
|
+
rb_ary_push((is_header_row ? header_row : arr), Qnil);
|
281
|
+
}
|
282
|
+
if(!is_header_row) {
|
283
|
+
if(header == 1) {
|
284
|
+
VALUE hash = rb_hash_new();
|
285
|
+
int i = 0;
|
286
|
+
for(i = 0; i < RARRAY_LEN(header_row); i++) {
|
287
|
+
rb_hash_aset(hash, rb_ary_entry(header_row, i), rb_ary_entry(arr, i));
|
288
|
+
}
|
289
|
+
rb_yield(hash);
|
290
|
+
} else {
|
291
|
+
rb_yield(arr);
|
292
|
+
}
|
293
|
+
}
|
294
|
+
arr = rb_ary_new();
|
295
|
+
has_found = 0;
|
296
|
+
is_header_row = 0;
|
297
|
+
}}
|
249
298
|
break;
|
250
299
|
case 10:
|
251
|
-
#line
|
252
|
-
{te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
|
300
|
+
#line 43 "excelsior_reader.rl"
|
301
|
+
{te = p;p--;{ rb_ary_push((is_header_row ? header_row : arr), rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
|
253
302
|
break;
|
254
303
|
case 11:
|
255
|
-
#line 1 "
|
304
|
+
#line 1 "excelsior_reader.rl"
|
256
305
|
{ switch( act ) {
|
257
306
|
case 0:
|
258
307
|
{{cs = 0; goto _again;}}
|
259
308
|
break;
|
260
309
|
case 3:
|
261
|
-
{{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;}
|
310
|
+
{{p = ((te))-1;} rb_ary_push((is_header_row ? header_row : arr), rb_str_new(ts, te - ts)); has_found = 1;}
|
262
311
|
break;
|
263
312
|
case 4:
|
264
|
-
{{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
|
313
|
+
{{p = ((te))-1;} rb_ary_push((is_header_row ? header_row : arr), rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
|
265
314
|
break;
|
266
315
|
default:
|
267
316
|
{{p = ((te))-1;}}
|
@@ -269,7 +318,7 @@ _eof_trans:
|
|
269
318
|
}
|
270
319
|
}
|
271
320
|
break;
|
272
|
-
#line
|
321
|
+
#line 322 "excelsior_reader.c"
|
273
322
|
}
|
274
323
|
}
|
275
324
|
|
@@ -279,14 +328,14 @@ _again:
|
|
279
328
|
while ( _nacts-- > 0 ) {
|
280
329
|
switch ( *_acts++ ) {
|
281
330
|
case 0:
|
282
|
-
#line 1 "
|
331
|
+
#line 1 "excelsior_reader.rl"
|
283
332
|
{ts = 0;}
|
284
333
|
break;
|
285
334
|
case 1:
|
286
|
-
#line 1 "
|
335
|
+
#line 1 "excelsior_reader.rl"
|
287
336
|
{act = 0;}
|
288
337
|
break;
|
289
|
-
#line
|
338
|
+
#line 339 "excelsior_reader.c"
|
290
339
|
}
|
291
340
|
}
|
292
341
|
|
@@ -306,7 +355,7 @@ _again:
|
|
306
355
|
_out: {}
|
307
356
|
}
|
308
357
|
|
309
|
-
#line
|
358
|
+
#line 108 "excelsior_reader.rl"
|
310
359
|
|
311
360
|
if(ts != 0) { // we are not at the end
|
312
361
|
have = pe - ts; //so copy stuff back in
|
@@ -318,7 +367,18 @@ _again:
|
|
318
367
|
}
|
319
368
|
|
320
369
|
if(RARRAY_LEN(arr) > 0) { // have a last array to yield
|
321
|
-
|
370
|
+
if(!is_header_row) {
|
371
|
+
if(header == 1) {
|
372
|
+
VALUE hash = rb_hash_new();
|
373
|
+
int i = 0;
|
374
|
+
for(i = 0; i < RARRAY_LEN(header_row); i++) {
|
375
|
+
rb_hash_aset(hash, rb_ary_entry(header_row, i), rb_ary_entry(arr, i));
|
376
|
+
}
|
377
|
+
rb_yield(hash);
|
378
|
+
} else {
|
379
|
+
rb_yield(arr);
|
380
|
+
}
|
381
|
+
}
|
322
382
|
}
|
323
383
|
|
324
384
|
return Qnil;
|
@@ -2,6 +2,10 @@
|
|
2
2
|
|
3
3
|
static ID s_read;
|
4
4
|
VALUE arr;
|
5
|
+
VALUE header_row;
|
6
|
+
int row_index = 0;
|
7
|
+
int header = 0;
|
8
|
+
int is_header_row = 0;
|
5
9
|
int has_found = 0;
|
6
10
|
#define BUFSIZE 16384
|
7
11
|
|
@@ -14,11 +18,30 @@ int has_found = 0;
|
|
14
18
|
string = '"' (string_character | '""')* '"' ;
|
15
19
|
value = letter+;
|
16
20
|
main := |*
|
17
|
-
newline {
|
21
|
+
newline {
|
22
|
+
if(has_found ==0) {
|
23
|
+
rb_ary_push((is_header_row ? header_row : arr), Qnil);
|
24
|
+
}
|
25
|
+
if(!is_header_row) {
|
26
|
+
if(header == 1) {
|
27
|
+
VALUE hash = rb_hash_new();
|
28
|
+
int i = 0;
|
29
|
+
for(i = 0; i < RARRAY_LEN(header_row); i++) {
|
30
|
+
rb_hash_aset(hash, rb_ary_entry(header_row, i), rb_ary_entry(arr, i));
|
31
|
+
}
|
32
|
+
rb_yield(hash);
|
33
|
+
} else {
|
34
|
+
rb_yield(arr);
|
35
|
+
}
|
36
|
+
}
|
37
|
+
arr = rb_ary_new();
|
38
|
+
has_found = 0;
|
39
|
+
is_header_row = 0;
|
40
|
+
};
|
18
41
|
space;
|
19
|
-
value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
|
20
|
-
string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
|
21
|
-
delimiter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
|
42
|
+
value { rb_ary_push((is_header_row ? header_row : arr), rb_str_new(ts, te - ts)); has_found = 1;};
|
43
|
+
string { rb_ary_push((is_header_row ? header_row : arr), rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
|
44
|
+
delimiter { if(has_found == 0) rb_ary_push((is_header_row ? header_row : arr), Qnil); has_found = 0;};
|
22
45
|
*|;
|
23
46
|
}%%
|
24
47
|
|
@@ -33,12 +56,19 @@ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
|
33
56
|
|
34
57
|
has_found = 0;
|
35
58
|
VALUE io;
|
59
|
+
VALUE options;
|
36
60
|
int is_io = 0;
|
37
61
|
int done = 0;
|
38
62
|
|
39
63
|
arr = rb_ary_new();
|
40
|
-
rb_scan_args(argc, argv, "
|
41
|
-
|
64
|
+
rb_scan_args(argc, argv, "11", &io, &options);
|
65
|
+
if(options != Qnil) {
|
66
|
+
header = rb_hash_aref(options, ID2SYM(rb_intern("header"))) == 2;
|
67
|
+
}
|
68
|
+
if(header == 1) {
|
69
|
+
is_header_row = 1;
|
70
|
+
header_row = rb_ary_new();
|
71
|
+
}
|
42
72
|
is_io = rb_respond_to(io, s_read);
|
43
73
|
buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
|
44
74
|
|
@@ -86,7 +116,18 @@ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
|
86
116
|
}
|
87
117
|
|
88
118
|
if(RARRAY_LEN(arr) > 0) { // have a last array to yield
|
89
|
-
|
119
|
+
if(!is_header_row) {
|
120
|
+
if(header == 1) {
|
121
|
+
VALUE hash = rb_hash_new();
|
122
|
+
int i = 0;
|
123
|
+
for(i = 0; i < RARRAY_LEN(header_row); i++) {
|
124
|
+
rb_hash_aset(hash, rb_ary_entry(header_row, i), rb_ary_entry(arr, i));
|
125
|
+
}
|
126
|
+
rb_yield(hash);
|
127
|
+
} else {
|
128
|
+
rb_yield(arr);
|
129
|
+
}
|
130
|
+
}
|
90
131
|
}
|
91
132
|
|
92
133
|
return Qnil;
|
data/lib/excelsior.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: excelsior
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matthew Mongeau
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2010-01-04 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|