excelsior-formats 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+
4
+ PKG_FILES = %w(Rakefile) +
5
+ Dir.glob("{lib}/**/*") +
6
+ Dir.glob("ext/**/*.{c,rb,rl}") +
7
+ %w[ext/excelsior_reader/excelsior_reader.c] # needed because they are generated later
8
+
9
+ gem_spec = Gem::Specification.new do |gem_spec|
10
+ gem_spec.name = 'excelsior'
11
+ gem_spec.version = '0.0.9'
12
+ gem_spec.summary = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
13
+ gem_spec.description = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
14
+ gem_spec.email = 'matt@toastyapps.com'
15
+ gem_spec.homepage = 'http://github.com/toastyapps/excelsior'
16
+ gem_spec.authors = ["Matthew Mongeau"]
17
+ gem_spec.files = PKG_FILES
18
+ gem_spec.extensions = FileList["ext/**/extconf.rb"].to_a
19
+ end
20
+
21
+ desc "Generate a gemspec file"
22
+ task :gemspec do
23
+ File.open("#{gem_spec.name}.gemspec", "w") do |f|
24
+ f.write gem_spec.to_yaml
25
+ end
26
+ end
@@ -0,0 +1,547 @@
1
+
2
+ #line 1 "excelsior_reader.rl"
3
+ #include <ruby.h>
4
+
5
+ static ID s_read;
6
+
7
+ int has_found = 0;
8
+ #define BUFSIZE 16384
9
+
10
+
11
+ #line 67 "excelsior_reader.rl"
12
+
13
+
14
+
15
+ #line 16 "excelsior_reader.c"
16
+ static const char _excelsior_scan_actions[] = {
17
+ 0, 1, 0, 1, 2, 1, 4, 1,
18
+ 5, 1, 6, 1, 7, 1, 11, 1,
19
+ 12, 1, 13, 1, 14, 1, 15, 1,
20
+ 19, 1, 20, 1, 21, 1, 22, 1,
21
+ 23, 1, 27, 1, 28, 1, 29, 1,
22
+ 30, 1, 31, 1, 35, 1, 36, 1,
23
+ 37, 1, 38, 1, 39, 2, 0, 1,
24
+ 2, 3, 8, 2, 3, 9, 2, 3,
25
+ 10, 2, 3, 16, 2, 3, 17, 2,
26
+ 3, 18, 2, 3, 24, 2, 3, 25,
27
+ 2, 3, 26, 2, 3, 32, 2, 3,
28
+ 33, 2, 3, 34
29
+ };
30
+
31
+ static const char _excelsior_scan_key_offsets[] = {
32
+ 0, 0, 1, 2, 3, 4, 8, 15,
33
+ 19, 20, 21, 28, 32, 33, 34, 41,
34
+ 45, 46, 47, 54, 58, 59
35
+ };
36
+
37
+ static const char _excelsior_scan_trans_keys[] = {
38
+ 34, 34, 34, 34, 9, 44, 59, 124,
39
+ 10, 13, 32, 34, 44, 9, 12, 10,
40
+ 13, 34, 44, 10, 34, 9, 10, 13,
41
+ 32, 34, 11, 12, 13, 34, 9, 10,
42
+ 10, 34, 10, 13, 32, 34, 124, 9,
43
+ 12, 10, 13, 34, 124, 10, 34, 10,
44
+ 13, 32, 34, 59, 9, 12, 10, 13,
45
+ 34, 59, 10, 34, 0
46
+ };
47
+
48
+ static const char _excelsior_scan_single_lengths[] = {
49
+ 0, 1, 1, 1, 1, 4, 5, 4,
50
+ 1, 1, 5, 2, 1, 1, 5, 4,
51
+ 1, 1, 5, 4, 1, 1
52
+ };
53
+
54
+ static const char _excelsior_scan_range_lengths[] = {
55
+ 0, 0, 0, 0, 0, 0, 1, 0,
56
+ 0, 0, 1, 1, 0, 0, 1, 0,
57
+ 0, 0, 1, 0, 0, 0
58
+ };
59
+
60
+ static const char _excelsior_scan_index_offsets[] = {
61
+ 0, 0, 2, 4, 6, 8, 13, 20,
62
+ 25, 27, 29, 36, 40, 42, 44, 51,
63
+ 56, 58, 60, 67, 72, 74
64
+ };
65
+
66
+ static const char _excelsior_scan_trans_targs[] = {
67
+ 9, 1, 13, 2, 17, 3, 21, 4,
68
+ 5, 5, 5, 5, 0, 6, 8, 7,
69
+ 1, 6, 7, 7, 6, 6, 6, 6,
70
+ 7, 6, 6, 1, 6, 10, 10, 12,
71
+ 11, 2, 11, 11, 10, 10, 10, 11,
72
+ 10, 10, 2, 10, 14, 16, 15, 3,
73
+ 14, 15, 15, 14, 14, 14, 14, 15,
74
+ 14, 14, 3, 14, 18, 20, 19, 4,
75
+ 18, 19, 19, 18, 18, 18, 18, 19,
76
+ 18, 18, 4, 18, 6, 10, 14, 18,
77
+ 6, 6, 6, 10, 10, 10, 14, 14,
78
+ 14, 18, 18, 18, 0
79
+ };
80
+
81
+ static const char _excelsior_scan_trans_actions[] = {
82
+ 62, 0, 71, 0, 80, 0, 89, 0,
83
+ 7, 5, 11, 9, 0, 13, 0, 56,
84
+ 0, 15, 56, 59, 21, 21, 21, 21,
85
+ 59, 13, 17, 0, 19, 25, 23, 0,
86
+ 65, 0, 65, 68, 31, 31, 31, 68,
87
+ 23, 27, 0, 29, 33, 0, 74, 0,
88
+ 35, 74, 77, 41, 41, 41, 41, 77,
89
+ 33, 37, 0, 39, 43, 0, 83, 0,
90
+ 45, 83, 86, 51, 51, 51, 51, 86,
91
+ 43, 47, 0, 49, 21, 31, 41, 51,
92
+ 21, 17, 19, 31, 27, 29, 41, 37,
93
+ 39, 51, 47, 49, 0
94
+ };
95
+
96
+ static const char _excelsior_scan_to_state_actions[] = {
97
+ 0, 0, 0, 0, 0, 1, 53, 0,
98
+ 0, 0, 53, 0, 0, 0, 53, 0,
99
+ 0, 0, 53, 0, 0, 0
100
+ };
101
+
102
+ static const char _excelsior_scan_from_state_actions[] = {
103
+ 0, 0, 0, 0, 0, 3, 3, 0,
104
+ 0, 0, 3, 0, 0, 0, 3, 0,
105
+ 0, 0, 3, 0, 0, 0
106
+ };
107
+
108
+ static const char _excelsior_scan_eof_trans[] = {
109
+ 0, 81, 84, 87, 90, 0, 0, 81,
110
+ 82, 83, 0, 84, 85, 86, 0, 87,
111
+ 88, 89, 0, 90, 91, 92
112
+ };
113
+
114
+ static const int excelsior_scan_start = 5;
115
+ static const int excelsior_scan_error = 0;
116
+
117
+ static const int excelsior_scan_en_main = 5;
118
+ static const int excelsior_scan_en_csv = 6;
119
+ static const int excelsior_scan_en_tsv = 10;
120
+ static const int excelsior_scan_en_psv = 14;
121
+ static const int excelsior_scan_en_ssv = 18;
122
+
123
+
124
+ #line 70 "excelsior_reader.rl"
125
+
126
+
127
+ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
128
+
129
+ int cs, act, have = 0, nread = 0, curline = 1, text = 0;
130
+ char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
131
+ char *p, *pe;
132
+ int buffer_size = BUFSIZE;
133
+
134
+ has_found = 0;
135
+ VALUE arr;
136
+ VALUE io;
137
+ VALUE format;
138
+ int is_io = 0;
139
+ int done = 0;
140
+ int first_run = 1;
141
+ arr = rb_ary_new();
142
+ rb_scan_args(argc, argv, "11", &io, &format);
143
+ if(NIL_P(format)) format = rb_str_new2(",");
144
+
145
+
146
+ #line 147 "excelsior_reader.c"
147
+ {
148
+ cs = excelsior_scan_start;
149
+ ts = 0;
150
+ te = 0;
151
+ act = 0;
152
+ }
153
+
154
+ #line 91 "excelsior_reader.rl"
155
+
156
+ is_io = rb_respond_to(io, s_read);
157
+ buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
158
+
159
+ while(!done) {
160
+
161
+ int len, space = buffer_size - have;
162
+ VALUE str;
163
+ p = buf + have;
164
+
165
+ if(is_io) {
166
+ str = rb_funcall(io, s_read, 1, INT2FIX(space));
167
+ if(first_run) {
168
+ str = rb_str_append(format, str);
169
+ first_run = 0;
170
+ }
171
+ len = RSTRING_LEN(str);
172
+ p = memcpy(p, StringValuePtr(str), len);
173
+ } else {
174
+ // Going to assume it's a string and already in memory
175
+ //str = io;
176
+ io = rb_str_append(format, io);
177
+ p = RSTRING_PTR(io);
178
+ len = RSTRING_LEN(io);
179
+ pe = p + len;
180
+ eof = pe;
181
+ done = 1;
182
+ }
183
+ if(len < space) {
184
+ done = 1;
185
+ //p[len++] = 0; can't seem to get it to work with this
186
+ pe = p + len;
187
+ eof = pe;
188
+ } else {
189
+ pe = p + len;
190
+ }
191
+
192
+
193
+ #line 194 "excelsior_reader.c"
194
+ {
195
+ int _klen;
196
+ unsigned int _trans;
197
+ const char *_acts;
198
+ unsigned int _nacts;
199
+ const char *_keys;
200
+
201
+ if ( p == pe )
202
+ goto _test_eof;
203
+ if ( cs == 0 )
204
+ goto _out;
205
+ _resume:
206
+ _acts = _excelsior_scan_actions + _excelsior_scan_from_state_actions[cs];
207
+ _nacts = (unsigned int) *_acts++;
208
+ while ( _nacts-- > 0 ) {
209
+ switch ( *_acts++ ) {
210
+ case 2:
211
+ #line 1 "excelsior_reader.rl"
212
+ {ts = p;}
213
+ break;
214
+ #line 215 "excelsior_reader.c"
215
+ }
216
+ }
217
+
218
+ _keys = _excelsior_scan_trans_keys + _excelsior_scan_key_offsets[cs];
219
+ _trans = _excelsior_scan_index_offsets[cs];
220
+
221
+ _klen = _excelsior_scan_single_lengths[cs];
222
+ if ( _klen > 0 ) {
223
+ const char *_lower = _keys;
224
+ const char *_mid;
225
+ const char *_upper = _keys + _klen - 1;
226
+ while (1) {
227
+ if ( _upper < _lower )
228
+ break;
229
+
230
+ _mid = _lower + ((_upper-_lower) >> 1);
231
+ if ( (*p) < *_mid )
232
+ _upper = _mid - 1;
233
+ else if ( (*p) > *_mid )
234
+ _lower = _mid + 1;
235
+ else {
236
+ _trans += (_mid - _keys);
237
+ goto _match;
238
+ }
239
+ }
240
+ _keys += _klen;
241
+ _trans += _klen;
242
+ }
243
+
244
+ _klen = _excelsior_scan_range_lengths[cs];
245
+ if ( _klen > 0 ) {
246
+ const char *_lower = _keys;
247
+ const char *_mid;
248
+ const char *_upper = _keys + (_klen<<1) - 2;
249
+ while (1) {
250
+ if ( _upper < _lower )
251
+ break;
252
+
253
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
254
+ if ( (*p) < _mid[0] )
255
+ _upper = _mid - 2;
256
+ else if ( (*p) > _mid[1] )
257
+ _lower = _mid + 2;
258
+ else {
259
+ _trans += ((_mid - _keys)>>1);
260
+ goto _match;
261
+ }
262
+ }
263
+ _trans += _klen;
264
+ }
265
+
266
+ _match:
267
+ _eof_trans:
268
+ cs = _excelsior_scan_trans_targs[_trans];
269
+
270
+ if ( _excelsior_scan_trans_actions[_trans] == 0 )
271
+ goto _again;
272
+
273
+ _acts = _excelsior_scan_actions + _excelsior_scan_trans_actions[_trans];
274
+ _nacts = (unsigned int) *_acts++;
275
+ while ( _nacts-- > 0 )
276
+ {
277
+ switch ( *_acts++ )
278
+ {
279
+ case 3:
280
+ #line 1 "excelsior_reader.rl"
281
+ {te = p+1;}
282
+ break;
283
+ case 4:
284
+ #line 32 "excelsior_reader.rl"
285
+ {te = p+1;{ {cs = 6; goto _again;} }}
286
+ break;
287
+ case 5:
288
+ #line 33 "excelsior_reader.rl"
289
+ {te = p+1;{ {cs = 10; goto _again;} }}
290
+ break;
291
+ case 6:
292
+ #line 34 "excelsior_reader.rl"
293
+ {te = p+1;{ {cs = 14; goto _again;} }}
294
+ break;
295
+ case 7:
296
+ #line 35 "excelsior_reader.rl"
297
+ {te = p+1;{ {cs = 18; goto _again;} }}
298
+ break;
299
+ case 8:
300
+ #line 40 "excelsior_reader.rl"
301
+ {act = 6;}
302
+ break;
303
+ case 9:
304
+ #line 41 "excelsior_reader.rl"
305
+ {act = 7;}
306
+ break;
307
+ case 10:
308
+ #line 42 "excelsior_reader.rl"
309
+ {act = 8;}
310
+ break;
311
+ case 11:
312
+ #line 39 "excelsior_reader.rl"
313
+ {te = p+1;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
314
+ break;
315
+ case 12:
316
+ #line 43 "excelsior_reader.rl"
317
+ {te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
318
+ break;
319
+ case 13:
320
+ #line 39 "excelsior_reader.rl"
321
+ {te = p;p--;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
322
+ break;
323
+ case 14:
324
+ #line 42 "excelsior_reader.rl"
325
+ {te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
326
+ break;
327
+ case 15:
328
+ #line 1 "excelsior_reader.rl"
329
+ { switch( act ) {
330
+ case 0:
331
+ {{cs = 0; goto _again;}}
332
+ break;
333
+ case 7:
334
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;}
335
+ break;
336
+ case 8:
337
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
338
+ break;
339
+ default:
340
+ {{p = ((te))-1;}}
341
+ break;
342
+ }
343
+ }
344
+ break;
345
+ case 16:
346
+ #line 47 "excelsior_reader.rl"
347
+ {act = 11;}
348
+ break;
349
+ case 17:
350
+ #line 48 "excelsior_reader.rl"
351
+ {act = 12;}
352
+ break;
353
+ case 18:
354
+ #line 49 "excelsior_reader.rl"
355
+ {act = 13;}
356
+ break;
357
+ case 19:
358
+ #line 46 "excelsior_reader.rl"
359
+ {te = p+1;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
360
+ break;
361
+ case 20:
362
+ #line 47 "excelsior_reader.rl"
363
+ {te = p+1;}
364
+ break;
365
+ case 21:
366
+ #line 46 "excelsior_reader.rl"
367
+ {te = p;p--;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
368
+ break;
369
+ case 22:
370
+ #line 49 "excelsior_reader.rl"
371
+ {te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
372
+ break;
373
+ case 23:
374
+ #line 1 "excelsior_reader.rl"
375
+ { switch( act ) {
376
+ case 0:
377
+ {{cs = 0; goto _again;}}
378
+ break;
379
+ case 12:
380
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;}
381
+ break;
382
+ case 13:
383
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
384
+ break;
385
+ default:
386
+ {{p = ((te))-1;}}
387
+ break;
388
+ }
389
+ }
390
+ break;
391
+ case 24:
392
+ #line 54 "excelsior_reader.rl"
393
+ {act = 16;}
394
+ break;
395
+ case 25:
396
+ #line 55 "excelsior_reader.rl"
397
+ {act = 17;}
398
+ break;
399
+ case 26:
400
+ #line 56 "excelsior_reader.rl"
401
+ {act = 18;}
402
+ break;
403
+ case 27:
404
+ #line 53 "excelsior_reader.rl"
405
+ {te = p+1;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
406
+ break;
407
+ case 28:
408
+ #line 57 "excelsior_reader.rl"
409
+ {te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
410
+ break;
411
+ case 29:
412
+ #line 53 "excelsior_reader.rl"
413
+ {te = p;p--;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
414
+ break;
415
+ case 30:
416
+ #line 56 "excelsior_reader.rl"
417
+ {te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
418
+ break;
419
+ case 31:
420
+ #line 1 "excelsior_reader.rl"
421
+ { switch( act ) {
422
+ case 0:
423
+ {{cs = 0; goto _again;}}
424
+ break;
425
+ case 17:
426
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;}
427
+ break;
428
+ case 18:
429
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
430
+ break;
431
+ default:
432
+ {{p = ((te))-1;}}
433
+ break;
434
+ }
435
+ }
436
+ break;
437
+ case 32:
438
+ #line 61 "excelsior_reader.rl"
439
+ {act = 21;}
440
+ break;
441
+ case 33:
442
+ #line 62 "excelsior_reader.rl"
443
+ {act = 22;}
444
+ break;
445
+ case 34:
446
+ #line 63 "excelsior_reader.rl"
447
+ {act = 23;}
448
+ break;
449
+ case 35:
450
+ #line 60 "excelsior_reader.rl"
451
+ {te = p+1;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
452
+ break;
453
+ case 36:
454
+ #line 64 "excelsior_reader.rl"
455
+ {te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
456
+ break;
457
+ case 37:
458
+ #line 60 "excelsior_reader.rl"
459
+ {te = p;p--;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
460
+ break;
461
+ case 38:
462
+ #line 63 "excelsior_reader.rl"
463
+ {te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
464
+ break;
465
+ case 39:
466
+ #line 1 "excelsior_reader.rl"
467
+ { switch( act ) {
468
+ case 0:
469
+ {{cs = 0; goto _again;}}
470
+ break;
471
+ case 22:
472
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;}
473
+ break;
474
+ case 23:
475
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
476
+ break;
477
+ default:
478
+ {{p = ((te))-1;}}
479
+ break;
480
+ }
481
+ }
482
+ break;
483
+ #line 484 "excelsior_reader.c"
484
+ }
485
+ }
486
+
487
+ _again:
488
+ _acts = _excelsior_scan_actions + _excelsior_scan_to_state_actions[cs];
489
+ _nacts = (unsigned int) *_acts++;
490
+ while ( _nacts-- > 0 ) {
491
+ switch ( *_acts++ ) {
492
+ case 0:
493
+ #line 1 "excelsior_reader.rl"
494
+ {ts = 0;}
495
+ break;
496
+ case 1:
497
+ #line 1 "excelsior_reader.rl"
498
+ {act = 0;}
499
+ break;
500
+ #line 501 "excelsior_reader.c"
501
+ }
502
+ }
503
+
504
+ if ( cs == 0 )
505
+ goto _out;
506
+ if ( ++p != pe )
507
+ goto _resume;
508
+ _test_eof: {}
509
+ if ( p == eof )
510
+ {
511
+ if ( _excelsior_scan_eof_trans[cs] > 0 ) {
512
+ _trans = _excelsior_scan_eof_trans[cs] - 1;
513
+ goto _eof_trans;
514
+ }
515
+ }
516
+
517
+ _out: {}
518
+ }
519
+
520
+ #line 129 "excelsior_reader.rl"
521
+
522
+
523
+ if(ts != 0) { // we are not at the end
524
+ have = pe - ts; //so copy stuff back in
525
+ memmove(buf, ts, have);
526
+ te = buf + (te - ts);
527
+ ts = buf;
528
+ }
529
+
530
+ }
531
+
532
+ if(RARRAY_LEN(arr) > 0) { // have a last array to yield
533
+ rb_yield(arr);
534
+ }
535
+
536
+ return Qnil;
537
+ }
538
+
539
+ VALUE mExcelsior;
540
+ VALUE cReader;
541
+
542
+ void Init_excelsior_reader() {
543
+ s_read = rb_intern("read");
544
+ mExcelsior = rb_define_module("Excelsior");
545
+ cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
546
+ rb_define_singleton_method(cReader, "rows", e_rows, -1);
547
+ }
@@ -0,0 +1,155 @@
1
+ #include <ruby.h>
2
+
3
+ static ID s_read;
4
+
5
+ int has_found = 0;
6
+ #define BUFSIZE 16384
7
+
8
+ %%{
9
+ machine excelsior_scan;
10
+
11
+ csv_delimiter = ',';
12
+ tsv_delimiter = '\t';
13
+ psv_delimiter = '|';
14
+ ssv_delimiter = ';';
15
+
16
+
17
+ newline = "\r"? "\n" | "\r";
18
+ string_character = any - '"';
19
+ string = '"' (string_character | '""')* '"' ;
20
+
21
+ csv_letter = string_character - csv_delimiter - newline;
22
+ tsv_letter = string_character - tsv_delimiter - newline;
23
+ psv_letter = string_character - psv_delimiter - newline;
24
+ ssv_letter = string_character - ssv_delimiter - newline;
25
+
26
+ csv_value = csv_letter+;
27
+ tsv_value = tsv_letter+;
28
+ psv_value = psv_letter+;
29
+ ssv_value = ssv_letter+;
30
+
31
+ main := |*
32
+ csv_delimiter { fgoto csv; };
33
+ tsv_delimiter { fgoto tsv; };
34
+ psv_delimiter { fgoto psv; };
35
+ ssv_delimiter { fgoto ssv; };
36
+ *|;
37
+
38
+ csv := |*
39
+ newline { if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
40
+ space;
41
+ csv_value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
42
+ string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
43
+ csv_delimiter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
44
+ *|;
45
+ tsv := |*
46
+ newline { if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
47
+ space;
48
+ tsv_value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
49
+ string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
50
+ tsv_delimiter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
51
+ *|;
52
+ psv := |*
53
+ newline { if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
54
+ space;
55
+ psv_value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
56
+ string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
57
+ psv_delimiter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
58
+ *|;
59
+ ssv := |*
60
+ newline { if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
61
+ space;
62
+ ssv_value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
63
+ string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
64
+ ssv_delimiter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
65
+ *|;
66
+
67
+ }%%
68
+
69
+ %% write data nofinal;
70
+
71
+
72
+ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
73
+
74
+ int cs, act, have = 0, nread = 0, curline = 1, text = 0;
75
+ char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
76
+ char *p, *pe;
77
+ int buffer_size = BUFSIZE;
78
+
79
+ has_found = 0;
80
+ VALUE arr;
81
+ VALUE io;
82
+ VALUE format;
83
+ int is_io = 0;
84
+ int done = 0;
85
+ int first_run = 1;
86
+ arr = rb_ary_new();
87
+ rb_scan_args(argc, argv, "11", &io, &format);
88
+ if(NIL_P(format)) format = rb_str_new2(",");
89
+
90
+ %% write init;
91
+
92
+ is_io = rb_respond_to(io, s_read);
93
+ buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
94
+
95
+ while(!done) {
96
+
97
+ int len, space = buffer_size - have;
98
+ VALUE str;
99
+ p = buf + have;
100
+
101
+ if(is_io) {
102
+ str = rb_funcall(io, s_read, 1, INT2FIX(space));
103
+ if(first_run) {
104
+ str = rb_str_append(format, str);
105
+ first_run = 0;
106
+ }
107
+ len = RSTRING_LEN(str);
108
+ p = memcpy(p, StringValuePtr(str), len);
109
+ } else {
110
+ // Going to assume it's a string and already in memory
111
+ //str = io;
112
+ io = rb_str_append(format, io);
113
+ p = RSTRING_PTR(io);
114
+ len = RSTRING_LEN(io);
115
+ pe = p + len;
116
+ eof = pe;
117
+ done = 1;
118
+ }
119
+ if(len < space) {
120
+ done = 1;
121
+ //p[len++] = 0; can't seem to get it to work with this
122
+ pe = p + len;
123
+ eof = pe;
124
+ } else {
125
+ pe = p + len;
126
+ }
127
+
128
+ %% write exec;
129
+
130
+
131
+ if(ts != 0) { // we are not at the end
132
+ have = pe - ts; //so copy stuff back in
133
+ memmove(buf, ts, have);
134
+ te = buf + (te - ts);
135
+ ts = buf;
136
+ }
137
+
138
+ }
139
+
140
+ if(RARRAY_LEN(arr) > 0) { // have a last array to yield
141
+ rb_yield(arr);
142
+ }
143
+
144
+ return Qnil;
145
+ }
146
+
147
+ VALUE mExcelsior;
148
+ VALUE cReader;
149
+
150
+ void Init_excelsior_reader() {
151
+ s_read = rb_intern("read");
152
+ mExcelsior = rb_define_module("Excelsior");
153
+ cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
154
+ rb_define_singleton_method(cReader, "rows", e_rows, -1);
155
+ }
@@ -0,0 +1,5 @@
1
+ require 'mkmf'
2
+
3
+ dir_config('excelsior')
4
+ have_library("c", "main")
5
+ create_makefile('excelsior_reader')
data/lib/excelsior.rb ADDED
@@ -0,0 +1,7 @@
1
+ require 'excelsior_reader'
2
+
3
+ module Excelsior
4
+ def self.version
5
+ "0.0.1"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: excelsior-formats
3
+ version: !ruby/object:Gem::Version
4
+ segments:
5
+ - 0
6
+ - 0
7
+ - 9
8
+ version: 0.0.9
9
+ platform: ruby
10
+ authors:
11
+ - Matthew Mongeau
12
+ autorequire:
13
+ bindir: bin
14
+ cert_chain: []
15
+
16
+ date: 2009-12-10 06:00:00 +01:00
17
+ default_executable:
18
+ dependencies: []
19
+
20
+ description: A Ruby gem that uses C bindings to read CSV files superfast. I'm totally serial!
21
+ email: matt@toastyapps.com
22
+ executables: []
23
+
24
+ extensions:
25
+ - ext/excelsior_reader/extconf.rb
26
+ extra_rdoc_files: []
27
+
28
+ files:
29
+ - Rakefile
30
+ - lib/excelsior.rb
31
+ - ext/excelsior_reader/excelsior_reader.c
32
+ - ext/excelsior_reader/extconf.rb
33
+ - ext/excelsior_reader/excelsior_reader.rl
34
+ has_rdoc: true
35
+ homepage: http://github.com/toastyapps/excelsior
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ segments:
48
+ - 0
49
+ version: "0"
50
+ version:
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 0
57
+ version: "0"
58
+ version:
59
+ requirements: []
60
+
61
+ rubyforge_project:
62
+ rubygems_version: 1.3.6
63
+ signing_key:
64
+ specification_version: 3
65
+ summary: A Ruby gem that uses C bindings to read CSV files superfast. I'm totally serial!
66
+ test_files: []
67
+