excelsior-formats 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+
4
+ PKG_FILES = %w(Rakefile) +
5
+ Dir.glob("{lib}/**/*") +
6
+ Dir.glob("ext/**/*.{c,rb,rl}") +
7
+ %w[ext/excelsior_reader/excelsior_reader.c] # needed because they are generated later
8
+
9
+ gem_spec = Gem::Specification.new do |gem_spec|
10
+ gem_spec.name = 'excelsior'
11
+ gem_spec.version = '0.0.9'
12
+ gem_spec.summary = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
13
+ gem_spec.description = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
14
+ gem_spec.email = 'matt@toastyapps.com'
15
+ gem_spec.homepage = 'http://github.com/toastyapps/excelsior'
16
+ gem_spec.authors = ["Matthew Mongeau"]
17
+ gem_spec.files = PKG_FILES
18
+ gem_spec.extensions = FileList["ext/**/extconf.rb"].to_a
19
+ end
20
+
21
+ desc "Generate a gemspec file"
22
+ task :gemspec do
23
+ File.open("#{gem_spec.name}.gemspec", "w") do |f|
24
+ f.write gem_spec.to_yaml
25
+ end
26
+ end
@@ -0,0 +1,547 @@
1
+
2
+ #line 1 "excelsior_reader.rl"
3
+ #include <ruby.h>
4
+
5
+ static ID s_read;
6
+
7
+ int has_found = 0;
8
+ #define BUFSIZE 16384
9
+
10
+
11
+ #line 67 "excelsior_reader.rl"
12
+
13
+
14
+
15
+ #line 16 "excelsior_reader.c"
16
+ static const char _excelsior_scan_actions[] = {
17
+ 0, 1, 0, 1, 2, 1, 4, 1,
18
+ 5, 1, 6, 1, 7, 1, 11, 1,
19
+ 12, 1, 13, 1, 14, 1, 15, 1,
20
+ 19, 1, 20, 1, 21, 1, 22, 1,
21
+ 23, 1, 27, 1, 28, 1, 29, 1,
22
+ 30, 1, 31, 1, 35, 1, 36, 1,
23
+ 37, 1, 38, 1, 39, 2, 0, 1,
24
+ 2, 3, 8, 2, 3, 9, 2, 3,
25
+ 10, 2, 3, 16, 2, 3, 17, 2,
26
+ 3, 18, 2, 3, 24, 2, 3, 25,
27
+ 2, 3, 26, 2, 3, 32, 2, 3,
28
+ 33, 2, 3, 34
29
+ };
30
+
31
+ static const char _excelsior_scan_key_offsets[] = {
32
+ 0, 0, 1, 2, 3, 4, 8, 15,
33
+ 19, 20, 21, 28, 32, 33, 34, 41,
34
+ 45, 46, 47, 54, 58, 59
35
+ };
36
+
37
+ static const char _excelsior_scan_trans_keys[] = {
38
+ 34, 34, 34, 34, 9, 44, 59, 124,
39
+ 10, 13, 32, 34, 44, 9, 12, 10,
40
+ 13, 34, 44, 10, 34, 9, 10, 13,
41
+ 32, 34, 11, 12, 13, 34, 9, 10,
42
+ 10, 34, 10, 13, 32, 34, 124, 9,
43
+ 12, 10, 13, 34, 124, 10, 34, 10,
44
+ 13, 32, 34, 59, 9, 12, 10, 13,
45
+ 34, 59, 10, 34, 0
46
+ };
47
+
48
+ static const char _excelsior_scan_single_lengths[] = {
49
+ 0, 1, 1, 1, 1, 4, 5, 4,
50
+ 1, 1, 5, 2, 1, 1, 5, 4,
51
+ 1, 1, 5, 4, 1, 1
52
+ };
53
+
54
+ static const char _excelsior_scan_range_lengths[] = {
55
+ 0, 0, 0, 0, 0, 0, 1, 0,
56
+ 0, 0, 1, 1, 0, 0, 1, 0,
57
+ 0, 0, 1, 0, 0, 0
58
+ };
59
+
60
+ static const char _excelsior_scan_index_offsets[] = {
61
+ 0, 0, 2, 4, 6, 8, 13, 20,
62
+ 25, 27, 29, 36, 40, 42, 44, 51,
63
+ 56, 58, 60, 67, 72, 74
64
+ };
65
+
66
+ static const char _excelsior_scan_trans_targs[] = {
67
+ 9, 1, 13, 2, 17, 3, 21, 4,
68
+ 5, 5, 5, 5, 0, 6, 8, 7,
69
+ 1, 6, 7, 7, 6, 6, 6, 6,
70
+ 7, 6, 6, 1, 6, 10, 10, 12,
71
+ 11, 2, 11, 11, 10, 10, 10, 11,
72
+ 10, 10, 2, 10, 14, 16, 15, 3,
73
+ 14, 15, 15, 14, 14, 14, 14, 15,
74
+ 14, 14, 3, 14, 18, 20, 19, 4,
75
+ 18, 19, 19, 18, 18, 18, 18, 19,
76
+ 18, 18, 4, 18, 6, 10, 14, 18,
77
+ 6, 6, 6, 10, 10, 10, 14, 14,
78
+ 14, 18, 18, 18, 0
79
+ };
80
+
81
+ static const char _excelsior_scan_trans_actions[] = {
82
+ 62, 0, 71, 0, 80, 0, 89, 0,
83
+ 7, 5, 11, 9, 0, 13, 0, 56,
84
+ 0, 15, 56, 59, 21, 21, 21, 21,
85
+ 59, 13, 17, 0, 19, 25, 23, 0,
86
+ 65, 0, 65, 68, 31, 31, 31, 68,
87
+ 23, 27, 0, 29, 33, 0, 74, 0,
88
+ 35, 74, 77, 41, 41, 41, 41, 77,
89
+ 33, 37, 0, 39, 43, 0, 83, 0,
90
+ 45, 83, 86, 51, 51, 51, 51, 86,
91
+ 43, 47, 0, 49, 21, 31, 41, 51,
92
+ 21, 17, 19, 31, 27, 29, 41, 37,
93
+ 39, 51, 47, 49, 0
94
+ };
95
+
96
+ static const char _excelsior_scan_to_state_actions[] = {
97
+ 0, 0, 0, 0, 0, 1, 53, 0,
98
+ 0, 0, 53, 0, 0, 0, 53, 0,
99
+ 0, 0, 53, 0, 0, 0
100
+ };
101
+
102
+ static const char _excelsior_scan_from_state_actions[] = {
103
+ 0, 0, 0, 0, 0, 3, 3, 0,
104
+ 0, 0, 3, 0, 0, 0, 3, 0,
105
+ 0, 0, 3, 0, 0, 0
106
+ };
107
+
108
+ static const char _excelsior_scan_eof_trans[] = {
109
+ 0, 81, 84, 87, 90, 0, 0, 81,
110
+ 82, 83, 0, 84, 85, 86, 0, 87,
111
+ 88, 89, 0, 90, 91, 92
112
+ };
113
+
114
+ static const int excelsior_scan_start = 5;
115
+ static const int excelsior_scan_error = 0;
116
+
117
+ static const int excelsior_scan_en_main = 5;
118
+ static const int excelsior_scan_en_csv = 6;
119
+ static const int excelsior_scan_en_tsv = 10;
120
+ static const int excelsior_scan_en_psv = 14;
121
+ static const int excelsior_scan_en_ssv = 18;
122
+
123
+
124
+ #line 70 "excelsior_reader.rl"
125
+
126
+
127
+ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
128
+
129
+ int cs, act, have = 0, nread = 0, curline = 1, text = 0;
130
+ char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
131
+ char *p, *pe;
132
+ int buffer_size = BUFSIZE;
133
+
134
+ has_found = 0;
135
+ VALUE arr;
136
+ VALUE io;
137
+ VALUE format;
138
+ int is_io = 0;
139
+ int done = 0;
140
+ int first_run = 1;
141
+ arr = rb_ary_new();
142
+ rb_scan_args(argc, argv, "11", &io, &format);
143
+ if(NIL_P(format)) format = rb_str_new2(",");
144
+
145
+
146
+ #line 147 "excelsior_reader.c"
147
+ {
148
+ cs = excelsior_scan_start;
149
+ ts = 0;
150
+ te = 0;
151
+ act = 0;
152
+ }
153
+
154
+ #line 91 "excelsior_reader.rl"
155
+
156
+ is_io = rb_respond_to(io, s_read);
157
+ buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
158
+
159
+ while(!done) {
160
+
161
+ int len, space = buffer_size - have;
162
+ VALUE str;
163
+ p = buf + have;
164
+
165
+ if(is_io) {
166
+ str = rb_funcall(io, s_read, 1, INT2FIX(space));
167
+ if(first_run) {
168
+ str = rb_str_append(format, str);
169
+ first_run = 0;
170
+ }
171
+ len = RSTRING_LEN(str);
172
+ p = memcpy(p, StringValuePtr(str), len);
173
+ } else {
174
+ // Going to assume it's a string and already in memory
175
+ //str = io;
176
+ io = rb_str_append(format, io);
177
+ p = RSTRING_PTR(io);
178
+ len = RSTRING_LEN(io);
179
+ pe = p + len;
180
+ eof = pe;
181
+ done = 1;
182
+ }
183
+ if(len < space) {
184
+ done = 1;
185
+ //p[len++] = 0; can't seem to get it to work with this
186
+ pe = p + len;
187
+ eof = pe;
188
+ } else {
189
+ pe = p + len;
190
+ }
191
+
192
+
193
+ #line 194 "excelsior_reader.c"
194
+ {
195
+ int _klen;
196
+ unsigned int _trans;
197
+ const char *_acts;
198
+ unsigned int _nacts;
199
+ const char *_keys;
200
+
201
+ if ( p == pe )
202
+ goto _test_eof;
203
+ if ( cs == 0 )
204
+ goto _out;
205
+ _resume:
206
+ _acts = _excelsior_scan_actions + _excelsior_scan_from_state_actions[cs];
207
+ _nacts = (unsigned int) *_acts++;
208
+ while ( _nacts-- > 0 ) {
209
+ switch ( *_acts++ ) {
210
+ case 2:
211
+ #line 1 "excelsior_reader.rl"
212
+ {ts = p;}
213
+ break;
214
+ #line 215 "excelsior_reader.c"
215
+ }
216
+ }
217
+
218
+ _keys = _excelsior_scan_trans_keys + _excelsior_scan_key_offsets[cs];
219
+ _trans = _excelsior_scan_index_offsets[cs];
220
+
221
+ _klen = _excelsior_scan_single_lengths[cs];
222
+ if ( _klen > 0 ) {
223
+ const char *_lower = _keys;
224
+ const char *_mid;
225
+ const char *_upper = _keys + _klen - 1;
226
+ while (1) {
227
+ if ( _upper < _lower )
228
+ break;
229
+
230
+ _mid = _lower + ((_upper-_lower) >> 1);
231
+ if ( (*p) < *_mid )
232
+ _upper = _mid - 1;
233
+ else if ( (*p) > *_mid )
234
+ _lower = _mid + 1;
235
+ else {
236
+ _trans += (_mid - _keys);
237
+ goto _match;
238
+ }
239
+ }
240
+ _keys += _klen;
241
+ _trans += _klen;
242
+ }
243
+
244
+ _klen = _excelsior_scan_range_lengths[cs];
245
+ if ( _klen > 0 ) {
246
+ const char *_lower = _keys;
247
+ const char *_mid;
248
+ const char *_upper = _keys + (_klen<<1) - 2;
249
+ while (1) {
250
+ if ( _upper < _lower )
251
+ break;
252
+
253
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
254
+ if ( (*p) < _mid[0] )
255
+ _upper = _mid - 2;
256
+ else if ( (*p) > _mid[1] )
257
+ _lower = _mid + 2;
258
+ else {
259
+ _trans += ((_mid - _keys)>>1);
260
+ goto _match;
261
+ }
262
+ }
263
+ _trans += _klen;
264
+ }
265
+
266
+ _match:
267
+ _eof_trans:
268
+ cs = _excelsior_scan_trans_targs[_trans];
269
+
270
+ if ( _excelsior_scan_trans_actions[_trans] == 0 )
271
+ goto _again;
272
+
273
+ _acts = _excelsior_scan_actions + _excelsior_scan_trans_actions[_trans];
274
+ _nacts = (unsigned int) *_acts++;
275
+ while ( _nacts-- > 0 )
276
+ {
277
+ switch ( *_acts++ )
278
+ {
279
+ case 3:
280
+ #line 1 "excelsior_reader.rl"
281
+ {te = p+1;}
282
+ break;
283
+ case 4:
284
+ #line 32 "excelsior_reader.rl"
285
+ {te = p+1;{ {cs = 6; goto _again;} }}
286
+ break;
287
+ case 5:
288
+ #line 33 "excelsior_reader.rl"
289
+ {te = p+1;{ {cs = 10; goto _again;} }}
290
+ break;
291
+ case 6:
292
+ #line 34 "excelsior_reader.rl"
293
+ {te = p+1;{ {cs = 14; goto _again;} }}
294
+ break;
295
+ case 7:
296
+ #line 35 "excelsior_reader.rl"
297
+ {te = p+1;{ {cs = 18; goto _again;} }}
298
+ break;
299
+ case 8:
300
+ #line 40 "excelsior_reader.rl"
301
+ {act = 6;}
302
+ break;
303
+ case 9:
304
+ #line 41 "excelsior_reader.rl"
305
+ {act = 7;}
306
+ break;
307
+ case 10:
308
+ #line 42 "excelsior_reader.rl"
309
+ {act = 8;}
310
+ break;
311
+ case 11:
312
+ #line 39 "excelsior_reader.rl"
313
+ {te = p+1;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
314
+ break;
315
+ case 12:
316
+ #line 43 "excelsior_reader.rl"
317
+ {te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
318
+ break;
319
+ case 13:
320
+ #line 39 "excelsior_reader.rl"
321
+ {te = p;p--;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
322
+ break;
323
+ case 14:
324
+ #line 42 "excelsior_reader.rl"
325
+ {te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
326
+ break;
327
+ case 15:
328
+ #line 1 "excelsior_reader.rl"
329
+ { switch( act ) {
330
+ case 0:
331
+ {{cs = 0; goto _again;}}
332
+ break;
333
+ case 7:
334
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;}
335
+ break;
336
+ case 8:
337
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
338
+ break;
339
+ default:
340
+ {{p = ((te))-1;}}
341
+ break;
342
+ }
343
+ }
344
+ break;
345
+ case 16:
346
+ #line 47 "excelsior_reader.rl"
347
+ {act = 11;}
348
+ break;
349
+ case 17:
350
+ #line 48 "excelsior_reader.rl"
351
+ {act = 12;}
352
+ break;
353
+ case 18:
354
+ #line 49 "excelsior_reader.rl"
355
+ {act = 13;}
356
+ break;
357
+ case 19:
358
+ #line 46 "excelsior_reader.rl"
359
+ {te = p+1;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
360
+ break;
361
+ case 20:
362
+ #line 47 "excelsior_reader.rl"
363
+ {te = p+1;}
364
+ break;
365
+ case 21:
366
+ #line 46 "excelsior_reader.rl"
367
+ {te = p;p--;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
368
+ break;
369
+ case 22:
370
+ #line 49 "excelsior_reader.rl"
371
+ {te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
372
+ break;
373
+ case 23:
374
+ #line 1 "excelsior_reader.rl"
375
+ { switch( act ) {
376
+ case 0:
377
+ {{cs = 0; goto _again;}}
378
+ break;
379
+ case 12:
380
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;}
381
+ break;
382
+ case 13:
383
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
384
+ break;
385
+ default:
386
+ {{p = ((te))-1;}}
387
+ break;
388
+ }
389
+ }
390
+ break;
391
+ case 24:
392
+ #line 54 "excelsior_reader.rl"
393
+ {act = 16;}
394
+ break;
395
+ case 25:
396
+ #line 55 "excelsior_reader.rl"
397
+ {act = 17;}
398
+ break;
399
+ case 26:
400
+ #line 56 "excelsior_reader.rl"
401
+ {act = 18;}
402
+ break;
403
+ case 27:
404
+ #line 53 "excelsior_reader.rl"
405
+ {te = p+1;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
406
+ break;
407
+ case 28:
408
+ #line 57 "excelsior_reader.rl"
409
+ {te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
410
+ break;
411
+ case 29:
412
+ #line 53 "excelsior_reader.rl"
413
+ {te = p;p--;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
414
+ break;
415
+ case 30:
416
+ #line 56 "excelsior_reader.rl"
417
+ {te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
418
+ break;
419
+ case 31:
420
+ #line 1 "excelsior_reader.rl"
421
+ { switch( act ) {
422
+ case 0:
423
+ {{cs = 0; goto _again;}}
424
+ break;
425
+ case 17:
426
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;}
427
+ break;
428
+ case 18:
429
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
430
+ break;
431
+ default:
432
+ {{p = ((te))-1;}}
433
+ break;
434
+ }
435
+ }
436
+ break;
437
+ case 32:
438
+ #line 61 "excelsior_reader.rl"
439
+ {act = 21;}
440
+ break;
441
+ case 33:
442
+ #line 62 "excelsior_reader.rl"
443
+ {act = 22;}
444
+ break;
445
+ case 34:
446
+ #line 63 "excelsior_reader.rl"
447
+ {act = 23;}
448
+ break;
449
+ case 35:
450
+ #line 60 "excelsior_reader.rl"
451
+ {te = p+1;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
452
+ break;
453
+ case 36:
454
+ #line 64 "excelsior_reader.rl"
455
+ {te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
456
+ break;
457
+ case 37:
458
+ #line 60 "excelsior_reader.rl"
459
+ {te = p;p--;{ if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
460
+ break;
461
+ case 38:
462
+ #line 63 "excelsior_reader.rl"
463
+ {te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
464
+ break;
465
+ case 39:
466
+ #line 1 "excelsior_reader.rl"
467
+ { switch( act ) {
468
+ case 0:
469
+ {{cs = 0; goto _again;}}
470
+ break;
471
+ case 22:
472
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;}
473
+ break;
474
+ case 23:
475
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
476
+ break;
477
+ default:
478
+ {{p = ((te))-1;}}
479
+ break;
480
+ }
481
+ }
482
+ break;
483
+ #line 484 "excelsior_reader.c"
484
+ }
485
+ }
486
+
487
+ _again:
488
+ _acts = _excelsior_scan_actions + _excelsior_scan_to_state_actions[cs];
489
+ _nacts = (unsigned int) *_acts++;
490
+ while ( _nacts-- > 0 ) {
491
+ switch ( *_acts++ ) {
492
+ case 0:
493
+ #line 1 "excelsior_reader.rl"
494
+ {ts = 0;}
495
+ break;
496
+ case 1:
497
+ #line 1 "excelsior_reader.rl"
498
+ {act = 0;}
499
+ break;
500
+ #line 501 "excelsior_reader.c"
501
+ }
502
+ }
503
+
504
+ if ( cs == 0 )
505
+ goto _out;
506
+ if ( ++p != pe )
507
+ goto _resume;
508
+ _test_eof: {}
509
+ if ( p == eof )
510
+ {
511
+ if ( _excelsior_scan_eof_trans[cs] > 0 ) {
512
+ _trans = _excelsior_scan_eof_trans[cs] - 1;
513
+ goto _eof_trans;
514
+ }
515
+ }
516
+
517
+ _out: {}
518
+ }
519
+
520
+ #line 129 "excelsior_reader.rl"
521
+
522
+
523
+ if(ts != 0) { // we are not at the end
524
+ have = pe - ts; //so copy stuff back in
525
+ memmove(buf, ts, have);
526
+ te = buf + (te - ts);
527
+ ts = buf;
528
+ }
529
+
530
+ }
531
+
532
+ if(RARRAY_LEN(arr) > 0) { // have a last array to yield
533
+ rb_yield(arr);
534
+ }
535
+
536
+ return Qnil;
537
+ }
538
+
539
+ VALUE mExcelsior;
540
+ VALUE cReader;
541
+
542
+ void Init_excelsior_reader() {
543
+ s_read = rb_intern("read");
544
+ mExcelsior = rb_define_module("Excelsior");
545
+ cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
546
+ rb_define_singleton_method(cReader, "rows", e_rows, -1);
547
+ }
@@ -0,0 +1,155 @@
1
+ #include <ruby.h>
2
+
3
+ static ID s_read;
4
+
5
+ int has_found = 0;
6
+ #define BUFSIZE 16384
7
+
8
+ %%{
9
+ machine excelsior_scan;
10
+
11
+ csv_delimiter = ',';
12
+ tsv_delimiter = '\t';
13
+ psv_delimiter = '|';
14
+ ssv_delimiter = ';';
15
+
16
+
17
+ newline = "\r"? "\n" | "\r";
18
+ string_character = any - '"';
19
+ string = '"' (string_character | '""')* '"' ;
20
+
21
+ csv_letter = string_character - csv_delimiter - newline;
22
+ tsv_letter = string_character - tsv_delimiter - newline;
23
+ psv_letter = string_character - psv_delimiter - newline;
24
+ ssv_letter = string_character - ssv_delimiter - newline;
25
+
26
+ csv_value = csv_letter+;
27
+ tsv_value = tsv_letter+;
28
+ psv_value = psv_letter+;
29
+ ssv_value = ssv_letter+;
30
+
31
+ main := |*
32
+ csv_delimiter { fgoto csv; };
33
+ tsv_delimiter { fgoto tsv; };
34
+ psv_delimiter { fgoto psv; };
35
+ ssv_delimiter { fgoto ssv; };
36
+ *|;
37
+
38
+ csv := |*
39
+ newline { if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
40
+ space;
41
+ csv_value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
42
+ string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
43
+ csv_delimiter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
44
+ *|;
45
+ tsv := |*
46
+ newline { if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
47
+ space;
48
+ tsv_value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
49
+ string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
50
+ tsv_delimiter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
51
+ *|;
52
+ psv := |*
53
+ newline { if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
54
+ space;
55
+ psv_value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
56
+ string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
57
+ psv_delimiter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
58
+ *|;
59
+ ssv := |*
60
+ newline { if(has_found ==0) rb_ary_push(arr, Qnil); rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
61
+ space;
62
+ ssv_value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
63
+ string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
64
+ ssv_delimiter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
65
+ *|;
66
+
67
+ }%%
68
+
69
+ %% write data nofinal;
70
+
71
+
72
+ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
73
+
74
+ int cs, act, have = 0, nread = 0, curline = 1, text = 0;
75
+ char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
76
+ char *p, *pe;
77
+ int buffer_size = BUFSIZE;
78
+
79
+ has_found = 0;
80
+ VALUE arr;
81
+ VALUE io;
82
+ VALUE format;
83
+ int is_io = 0;
84
+ int done = 0;
85
+ int first_run = 1;
86
+ arr = rb_ary_new();
87
+ rb_scan_args(argc, argv, "11", &io, &format);
88
+ if(NIL_P(format)) format = rb_str_new2(",");
89
+
90
+ %% write init;
91
+
92
+ is_io = rb_respond_to(io, s_read);
93
+ buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
94
+
95
+ while(!done) {
96
+
97
+ int len, space = buffer_size - have;
98
+ VALUE str;
99
+ p = buf + have;
100
+
101
+ if(is_io) {
102
+ str = rb_funcall(io, s_read, 1, INT2FIX(space));
103
+ if(first_run) {
104
+ str = rb_str_append(format, str);
105
+ first_run = 0;
106
+ }
107
+ len = RSTRING_LEN(str);
108
+ p = memcpy(p, StringValuePtr(str), len);
109
+ } else {
110
+ // Going to assume it's a string and already in memory
111
+ //str = io;
112
+ io = rb_str_append(format, io);
113
+ p = RSTRING_PTR(io);
114
+ len = RSTRING_LEN(io);
115
+ pe = p + len;
116
+ eof = pe;
117
+ done = 1;
118
+ }
119
+ if(len < space) {
120
+ done = 1;
121
+ //p[len++] = 0; can't seem to get it to work with this
122
+ pe = p + len;
123
+ eof = pe;
124
+ } else {
125
+ pe = p + len;
126
+ }
127
+
128
+ %% write exec;
129
+
130
+
131
+ if(ts != 0) { // we are not at the end
132
+ have = pe - ts; //so copy stuff back in
133
+ memmove(buf, ts, have);
134
+ te = buf + (te - ts);
135
+ ts = buf;
136
+ }
137
+
138
+ }
139
+
140
+ if(RARRAY_LEN(arr) > 0) { // have a last array to yield
141
+ rb_yield(arr);
142
+ }
143
+
144
+ return Qnil;
145
+ }
146
+
147
+ VALUE mExcelsior;
148
+ VALUE cReader;
149
+
150
+ void Init_excelsior_reader() {
151
+ s_read = rb_intern("read");
152
+ mExcelsior = rb_define_module("Excelsior");
153
+ cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
154
+ rb_define_singleton_method(cReader, "rows", e_rows, -1);
155
+ }
@@ -0,0 +1,5 @@
1
+ require 'mkmf'
2
+
3
+ dir_config('excelsior')
4
+ have_library("c", "main")
5
+ create_makefile('excelsior_reader')
data/lib/excelsior.rb ADDED
@@ -0,0 +1,7 @@
1
+ require 'excelsior_reader'
2
+
3
+ module Excelsior
4
+ def self.version
5
+ "0.0.1"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: excelsior-formats
3
+ version: !ruby/object:Gem::Version
4
+ segments:
5
+ - 0
6
+ - 0
7
+ - 9
8
+ version: 0.0.9
9
+ platform: ruby
10
+ authors:
11
+ - Matthew Mongeau
12
+ autorequire:
13
+ bindir: bin
14
+ cert_chain: []
15
+
16
+ date: 2009-12-10 06:00:00 +01:00
17
+ default_executable:
18
+ dependencies: []
19
+
20
+ description: A Ruby gem that uses C bindings to read CSV files superfast. I'm totally serial!
21
+ email: matt@toastyapps.com
22
+ executables: []
23
+
24
+ extensions:
25
+ - ext/excelsior_reader/extconf.rb
26
+ extra_rdoc_files: []
27
+
28
+ files:
29
+ - Rakefile
30
+ - lib/excelsior.rb
31
+ - ext/excelsior_reader/excelsior_reader.c
32
+ - ext/excelsior_reader/extconf.rb
33
+ - ext/excelsior_reader/excelsior_reader.rl
34
+ has_rdoc: true
35
+ homepage: http://github.com/toastyapps/excelsior
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ segments:
48
+ - 0
49
+ version: "0"
50
+ version:
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 0
57
+ version: "0"
58
+ version:
59
+ requirements: []
60
+
61
+ rubyforge_project:
62
+ rubygems_version: 1.3.6
63
+ signing_key:
64
+ specification_version: 3
65
+ summary: A Ruby gem that uses C bindings to read CSV files superfast. I'm totally serial!
66
+ test_files: []
67
+