excelsior 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +26 -0
- data/ext/excelsior_reader/excelsior_reader.c +336 -0
- data/ext/excelsior_reader/excelsior_reader.rl +104 -0
- data/ext/excelsior_reader/extconf.rb +5 -0
- data/lib/excelsior.rb +7 -0
- metadata +59 -0
data/Rakefile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
PKG_FILES = %w(Rakefile) +
|
5
|
+
Dir.glob("{lib}/**/*") +
|
6
|
+
Dir.glob("ext/**/*.{c,rb,rl}") +
|
7
|
+
%w[ext/excelsior_reader/excelsior_reader.c] # needed because they are generated later
|
8
|
+
|
9
|
+
gem_spec = Gem::Specification.new do |gem_spec|
|
10
|
+
gem_spec.name = 'excelsior'
|
11
|
+
gem_spec.version = '0.0.6'
|
12
|
+
gem_spec.summary = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
|
13
|
+
gem_spec.description = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
|
14
|
+
gem_spec.email = 'matt@toastyapps.com'
|
15
|
+
gem_spec.homepage = 'http://github.com/toastyapps/excelsior'
|
16
|
+
gem_spec.authors = ["Matthew Mongeau"]
|
17
|
+
gem_spec.files = PKG_FILES
|
18
|
+
gem_spec.extensions = FileList["ext/**/extconf.rb"].to_a
|
19
|
+
end
|
20
|
+
|
21
|
+
desc "Generate a gemspec file"
|
22
|
+
task :gemspec do
|
23
|
+
File.open("#{gem_spec.name}.gemspec", "w") do |f|
|
24
|
+
f.write gem_spec.to_yaml
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,336 @@
|
|
1
|
+
|
2
|
+
#line 1 "excelsior_reader.rl"
|
3
|
+
#include <ruby.h>
|
4
|
+
|
5
|
+
static ID s_read;
|
6
|
+
VALUE arr;
|
7
|
+
int has_found = 0;
|
8
|
+
#define BUFSIZE 16384
|
9
|
+
|
10
|
+
|
11
|
+
#line 23 "excelsior_reader.rl"
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
#line 16 "excelsior_reader.c"
|
16
|
+
static const char _excelsior_scan_actions[] = {
|
17
|
+
0, 1, 2, 1, 7, 1, 8, 1,
|
18
|
+
9, 1, 10, 1, 11, 2, 0, 1,
|
19
|
+
2, 3, 4, 2, 3, 5, 2, 3,
|
20
|
+
6
|
21
|
+
};
|
22
|
+
|
23
|
+
static const char _excelsior_scan_key_offsets[] = {
|
24
|
+
0, 0, 1, 8, 12, 13
|
25
|
+
};
|
26
|
+
|
27
|
+
static const char _excelsior_scan_trans_keys[] = {
|
28
|
+
34, 10, 13, 32, 34, 44, 9, 12,
|
29
|
+
10, 13, 34, 44, 10, 34, 0
|
30
|
+
};
|
31
|
+
|
32
|
+
static const char _excelsior_scan_single_lengths[] = {
|
33
|
+
0, 1, 5, 4, 1, 1
|
34
|
+
};
|
35
|
+
|
36
|
+
static const char _excelsior_scan_range_lengths[] = {
|
37
|
+
0, 0, 1, 0, 0, 0
|
38
|
+
};
|
39
|
+
|
40
|
+
static const char _excelsior_scan_index_offsets[] = {
|
41
|
+
0, 0, 2, 9, 14, 16
|
42
|
+
};
|
43
|
+
|
44
|
+
static const char _excelsior_scan_trans_targs[] = {
|
45
|
+
5, 1, 2, 4, 3, 1, 2, 3,
|
46
|
+
3, 2, 2, 2, 2, 3, 2, 2,
|
47
|
+
1, 2, 2, 2, 2, 2, 0
|
48
|
+
};
|
49
|
+
|
50
|
+
static const char _excelsior_scan_trans_actions[] = {
|
51
|
+
22, 0, 3, 0, 16, 0, 5, 16,
|
52
|
+
19, 11, 11, 11, 11, 19, 3, 7,
|
53
|
+
0, 9, 11, 11, 7, 9, 0
|
54
|
+
};
|
55
|
+
|
56
|
+
static const char _excelsior_scan_to_state_actions[] = {
|
57
|
+
0, 0, 13, 0, 0, 0
|
58
|
+
};
|
59
|
+
|
60
|
+
static const char _excelsior_scan_from_state_actions[] = {
|
61
|
+
0, 0, 1, 0, 0, 0
|
62
|
+
};
|
63
|
+
|
64
|
+
static const char _excelsior_scan_eof_trans[] = {
|
65
|
+
0, 20, 0, 20, 21, 22
|
66
|
+
};
|
67
|
+
|
68
|
+
static const int excelsior_scan_start = 2;
|
69
|
+
static const int excelsior_scan_error = 0;
|
70
|
+
|
71
|
+
static const int excelsior_scan_en_main = 2;
|
72
|
+
|
73
|
+
|
74
|
+
#line 26 "excelsior_reader.rl"
|
75
|
+
|
76
|
+
|
77
|
+
VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
78
|
+
|
79
|
+
int cs, act, have = 0, nread = 0, curline = 1, text = 0;
|
80
|
+
char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
|
81
|
+
int buffer_size = BUFSIZE;
|
82
|
+
|
83
|
+
has_found = 0;
|
84
|
+
VALUE io;
|
85
|
+
int is_io = 0;
|
86
|
+
int done = 0;
|
87
|
+
|
88
|
+
arr = rb_ary_new();
|
89
|
+
rb_scan_args(argc, argv, "1", &io);
|
90
|
+
|
91
|
+
is_io = rb_respond_to(io, s_read);
|
92
|
+
buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
|
93
|
+
|
94
|
+
|
95
|
+
#line 96 "excelsior_reader.c"
|
96
|
+
{
|
97
|
+
cs = excelsior_scan_start;
|
98
|
+
ts = 0;
|
99
|
+
te = 0;
|
100
|
+
act = 0;
|
101
|
+
}
|
102
|
+
|
103
|
+
#line 46 "excelsior_reader.rl"
|
104
|
+
|
105
|
+
while(!done) {
|
106
|
+
|
107
|
+
int len, space = buffer_size - have;
|
108
|
+
VALUE str;
|
109
|
+
char *p, *pe;
|
110
|
+
p = buf + have;
|
111
|
+
|
112
|
+
if(is_io) {
|
113
|
+
str = rb_funcall(io, s_read, 1, INT2FIX(space));
|
114
|
+
len = RSTRING_LEN(str);
|
115
|
+
memcpy(p, StringValuePtr(str), len);
|
116
|
+
} else {
|
117
|
+
// Going to assume it's a string and already in memory
|
118
|
+
//str = io;
|
119
|
+
len = buffer_size = RSTRING_LEN(io);
|
120
|
+
memcpy(p, StringValuePtr(io), len);
|
121
|
+
space = buffer_size - have;
|
122
|
+
pe = p + buffer_size;
|
123
|
+
eof = pe;
|
124
|
+
done = 1;
|
125
|
+
}
|
126
|
+
|
127
|
+
if(len < space) {
|
128
|
+
done = 1;
|
129
|
+
//p[len++] = 0; can't seem to get it to work with this
|
130
|
+
pe = p + len;
|
131
|
+
eof = pe;
|
132
|
+
} else {
|
133
|
+
pe = p + len;
|
134
|
+
}
|
135
|
+
|
136
|
+
|
137
|
+
#line 138 "excelsior_reader.c"
|
138
|
+
{
|
139
|
+
int _klen;
|
140
|
+
unsigned int _trans;
|
141
|
+
const char *_acts;
|
142
|
+
unsigned int _nacts;
|
143
|
+
const char *_keys;
|
144
|
+
|
145
|
+
if ( p == pe )
|
146
|
+
goto _test_eof;
|
147
|
+
if ( cs == 0 )
|
148
|
+
goto _out;
|
149
|
+
_resume:
|
150
|
+
_acts = _excelsior_scan_actions + _excelsior_scan_from_state_actions[cs];
|
151
|
+
_nacts = (unsigned int) *_acts++;
|
152
|
+
while ( _nacts-- > 0 ) {
|
153
|
+
switch ( *_acts++ ) {
|
154
|
+
case 2:
|
155
|
+
#line 1 "excelsior_reader.rl"
|
156
|
+
{ts = p;}
|
157
|
+
break;
|
158
|
+
#line 159 "excelsior_reader.c"
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
162
|
+
_keys = _excelsior_scan_trans_keys + _excelsior_scan_key_offsets[cs];
|
163
|
+
_trans = _excelsior_scan_index_offsets[cs];
|
164
|
+
|
165
|
+
_klen = _excelsior_scan_single_lengths[cs];
|
166
|
+
if ( _klen > 0 ) {
|
167
|
+
const char *_lower = _keys;
|
168
|
+
const char *_mid;
|
169
|
+
const char *_upper = _keys + _klen - 1;
|
170
|
+
while (1) {
|
171
|
+
if ( _upper < _lower )
|
172
|
+
break;
|
173
|
+
|
174
|
+
_mid = _lower + ((_upper-_lower) >> 1);
|
175
|
+
if ( (*p) < *_mid )
|
176
|
+
_upper = _mid - 1;
|
177
|
+
else if ( (*p) > *_mid )
|
178
|
+
_lower = _mid + 1;
|
179
|
+
else {
|
180
|
+
_trans += (_mid - _keys);
|
181
|
+
goto _match;
|
182
|
+
}
|
183
|
+
}
|
184
|
+
_keys += _klen;
|
185
|
+
_trans += _klen;
|
186
|
+
}
|
187
|
+
|
188
|
+
_klen = _excelsior_scan_range_lengths[cs];
|
189
|
+
if ( _klen > 0 ) {
|
190
|
+
const char *_lower = _keys;
|
191
|
+
const char *_mid;
|
192
|
+
const char *_upper = _keys + (_klen<<1) - 2;
|
193
|
+
while (1) {
|
194
|
+
if ( _upper < _lower )
|
195
|
+
break;
|
196
|
+
|
197
|
+
_mid = _lower + (((_upper-_lower) >> 1) & ~1);
|
198
|
+
if ( (*p) < _mid[0] )
|
199
|
+
_upper = _mid - 2;
|
200
|
+
else if ( (*p) > _mid[1] )
|
201
|
+
_lower = _mid + 2;
|
202
|
+
else {
|
203
|
+
_trans += ((_mid - _keys)>>1);
|
204
|
+
goto _match;
|
205
|
+
}
|
206
|
+
}
|
207
|
+
_trans += _klen;
|
208
|
+
}
|
209
|
+
|
210
|
+
_match:
|
211
|
+
_eof_trans:
|
212
|
+
cs = _excelsior_scan_trans_targs[_trans];
|
213
|
+
|
214
|
+
if ( _excelsior_scan_trans_actions[_trans] == 0 )
|
215
|
+
goto _again;
|
216
|
+
|
217
|
+
_acts = _excelsior_scan_actions + _excelsior_scan_trans_actions[_trans];
|
218
|
+
_nacts = (unsigned int) *_acts++;
|
219
|
+
while ( _nacts-- > 0 )
|
220
|
+
{
|
221
|
+
switch ( *_acts++ )
|
222
|
+
{
|
223
|
+
case 3:
|
224
|
+
#line 1 "excelsior_reader.rl"
|
225
|
+
{te = p+1;}
|
226
|
+
break;
|
227
|
+
case 4:
|
228
|
+
#line 18 "excelsior_reader.rl"
|
229
|
+
{act = 2;}
|
230
|
+
break;
|
231
|
+
case 5:
|
232
|
+
#line 19 "excelsior_reader.rl"
|
233
|
+
{act = 3;}
|
234
|
+
break;
|
235
|
+
case 6:
|
236
|
+
#line 20 "excelsior_reader.rl"
|
237
|
+
{act = 4;}
|
238
|
+
break;
|
239
|
+
case 7:
|
240
|
+
#line 17 "excelsior_reader.rl"
|
241
|
+
{te = p+1;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
|
242
|
+
break;
|
243
|
+
case 8:
|
244
|
+
#line 21 "excelsior_reader.rl"
|
245
|
+
{te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
|
246
|
+
break;
|
247
|
+
case 9:
|
248
|
+
#line 17 "excelsior_reader.rl"
|
249
|
+
{te = p;p--;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
|
250
|
+
break;
|
251
|
+
case 10:
|
252
|
+
#line 20 "excelsior_reader.rl"
|
253
|
+
{te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
|
254
|
+
break;
|
255
|
+
case 11:
|
256
|
+
#line 1 "excelsior_reader.rl"
|
257
|
+
{ switch( act ) {
|
258
|
+
case 0:
|
259
|
+
{{cs = 0; goto _again;}}
|
260
|
+
break;
|
261
|
+
case 3:
|
262
|
+
{{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;}
|
263
|
+
break;
|
264
|
+
case 4:
|
265
|
+
{{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
|
266
|
+
break;
|
267
|
+
default:
|
268
|
+
{{p = ((te))-1;}}
|
269
|
+
break;
|
270
|
+
}
|
271
|
+
}
|
272
|
+
break;
|
273
|
+
#line 274 "excelsior_reader.c"
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
_again:
|
278
|
+
_acts = _excelsior_scan_actions + _excelsior_scan_to_state_actions[cs];
|
279
|
+
_nacts = (unsigned int) *_acts++;
|
280
|
+
while ( _nacts-- > 0 ) {
|
281
|
+
switch ( *_acts++ ) {
|
282
|
+
case 0:
|
283
|
+
#line 1 "excelsior_reader.rl"
|
284
|
+
{ts = 0;}
|
285
|
+
break;
|
286
|
+
case 1:
|
287
|
+
#line 1 "excelsior_reader.rl"
|
288
|
+
{act = 0;}
|
289
|
+
break;
|
290
|
+
#line 291 "excelsior_reader.c"
|
291
|
+
}
|
292
|
+
}
|
293
|
+
|
294
|
+
if ( cs == 0 )
|
295
|
+
goto _out;
|
296
|
+
if ( ++p != pe )
|
297
|
+
goto _resume;
|
298
|
+
_test_eof: {}
|
299
|
+
if ( p == eof )
|
300
|
+
{
|
301
|
+
if ( _excelsior_scan_eof_trans[cs] > 0 ) {
|
302
|
+
_trans = _excelsior_scan_eof_trans[cs] - 1;
|
303
|
+
goto _eof_trans;
|
304
|
+
}
|
305
|
+
}
|
306
|
+
|
307
|
+
_out: {}
|
308
|
+
}
|
309
|
+
|
310
|
+
#line 79 "excelsior_reader.rl"
|
311
|
+
|
312
|
+
if(ts != 0) { // we are not at the end
|
313
|
+
have = pe - ts; //so copy stuff back in
|
314
|
+
memmove(buf, ts, have);
|
315
|
+
te = buf + (te - ts);
|
316
|
+
ts = buf;
|
317
|
+
}
|
318
|
+
|
319
|
+
}
|
320
|
+
|
321
|
+
if(RARRAY_LEN(arr) > 0) { // have a last array to yield
|
322
|
+
rb_yield(arr);
|
323
|
+
}
|
324
|
+
|
325
|
+
return Qnil;
|
326
|
+
}
|
327
|
+
|
328
|
+
VALUE mExcelsior;
|
329
|
+
VALUE cReader;
|
330
|
+
|
331
|
+
void Init_excelsior_reader() {
|
332
|
+
s_read = rb_intern("read");
|
333
|
+
mExcelsior = rb_define_module("Excelsior");
|
334
|
+
cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
|
335
|
+
rb_define_singleton_method(cReader, "rows", e_rows, -1);
|
336
|
+
}
|
@@ -0,0 +1,104 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
|
3
|
+
static ID s_read;
|
4
|
+
VALUE arr;
|
5
|
+
int has_found = 0;
|
6
|
+
#define BUFSIZE 16384
|
7
|
+
|
8
|
+
%%{
|
9
|
+
machine excelsior_scan;
|
10
|
+
delimeter = ",";
|
11
|
+
newline = "\r"? "\n" | "\r" | "\n";
|
12
|
+
schar = any - '"';
|
13
|
+
letter = any - delimeter - '"' - newline;
|
14
|
+
string = '"' (schar | '""')* '"' ;
|
15
|
+
value = letter+;
|
16
|
+
main := |*
|
17
|
+
newline { rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
|
18
|
+
space;
|
19
|
+
value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
|
20
|
+
string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
|
21
|
+
delimeter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
|
22
|
+
*|;
|
23
|
+
}%%
|
24
|
+
|
25
|
+
%% write data nofinal;
|
26
|
+
|
27
|
+
|
28
|
+
VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
29
|
+
|
30
|
+
int cs, act, have = 0, nread = 0, curline = 1, text = 0;
|
31
|
+
char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
|
32
|
+
int buffer_size = BUFSIZE;
|
33
|
+
|
34
|
+
has_found = 0;
|
35
|
+
VALUE io;
|
36
|
+
int is_io = 0;
|
37
|
+
int done = 0;
|
38
|
+
|
39
|
+
arr = rb_ary_new();
|
40
|
+
rb_scan_args(argc, argv, "1", &io);
|
41
|
+
|
42
|
+
is_io = rb_respond_to(io, s_read);
|
43
|
+
buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
|
44
|
+
|
45
|
+
%% write init;
|
46
|
+
|
47
|
+
while(!done) {
|
48
|
+
|
49
|
+
int len, space = buffer_size - have;
|
50
|
+
VALUE str;
|
51
|
+
char *p, *pe;
|
52
|
+
p = buf + have;
|
53
|
+
|
54
|
+
if(is_io) {
|
55
|
+
str = rb_funcall(io, s_read, 1, INT2FIX(space));
|
56
|
+
len = RSTRING_LEN(str);
|
57
|
+
memcpy(p, StringValuePtr(str), len);
|
58
|
+
} else {
|
59
|
+
// Going to assume it's a string and already in memory
|
60
|
+
//str = io;
|
61
|
+
len = buffer_size = RSTRING_LEN(io);
|
62
|
+
memcpy(p, StringValuePtr(io), len);
|
63
|
+
space = buffer_size - have;
|
64
|
+
pe = p + buffer_size;
|
65
|
+
eof = pe;
|
66
|
+
done = 1;
|
67
|
+
}
|
68
|
+
|
69
|
+
if(len < space) {
|
70
|
+
done = 1;
|
71
|
+
//p[len++] = 0; can't seem to get it to work with this
|
72
|
+
pe = p + len;
|
73
|
+
eof = pe;
|
74
|
+
} else {
|
75
|
+
pe = p + len;
|
76
|
+
}
|
77
|
+
|
78
|
+
%% write exec;
|
79
|
+
|
80
|
+
if(ts != 0) { // we are not at the end
|
81
|
+
have = pe - ts; //so copy stuff back in
|
82
|
+
memmove(buf, ts, have);
|
83
|
+
te = buf + (te - ts);
|
84
|
+
ts = buf;
|
85
|
+
}
|
86
|
+
|
87
|
+
}
|
88
|
+
|
89
|
+
if(RARRAY_LEN(arr) > 0) { // have a last array to yield
|
90
|
+
rb_yield(arr);
|
91
|
+
}
|
92
|
+
|
93
|
+
return Qnil;
|
94
|
+
}
|
95
|
+
|
96
|
+
VALUE mExcelsior;
|
97
|
+
VALUE cReader;
|
98
|
+
|
99
|
+
void Init_excelsior_reader() {
|
100
|
+
s_read = rb_intern("read");
|
101
|
+
mExcelsior = rb_define_module("Excelsior");
|
102
|
+
cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
|
103
|
+
rb_define_singleton_method(cReader, "rows", e_rows, -1);
|
104
|
+
}
|
data/lib/excelsior.rb
ADDED
metadata
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: excelsior
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.6
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matthew Mongeau
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-08-24 00:00:00 -04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: A Ruby gem that uses C bindings to read CSV files superfast. I'm totally serial!
|
17
|
+
email: matt@toastyapps.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions:
|
21
|
+
- ext/excelsior_reader/extconf.rb
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- Rakefile
|
26
|
+
- lib/excelsior.rb
|
27
|
+
- ext/excelsior_reader/excelsior_reader.c
|
28
|
+
- ext/excelsior_reader/extconf.rb
|
29
|
+
- ext/excelsior_reader/excelsior_reader.rl
|
30
|
+
has_rdoc: true
|
31
|
+
homepage: http://github.com/toastyapps/excelsior
|
32
|
+
licenses: []
|
33
|
+
|
34
|
+
post_install_message:
|
35
|
+
rdoc_options: []
|
36
|
+
|
37
|
+
require_paths:
|
38
|
+
- lib
|
39
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: "0"
|
50
|
+
version:
|
51
|
+
requirements: []
|
52
|
+
|
53
|
+
rubyforge_project:
|
54
|
+
rubygems_version: 1.3.4
|
55
|
+
signing_key:
|
56
|
+
specification_version: 3
|
57
|
+
summary: A Ruby gem that uses C bindings to read CSV files superfast. I'm totally serial!
|
58
|
+
test_files: []
|
59
|
+
|