excelsior 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,26 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+
4
+ PKG_FILES = %w(Rakefile) +
5
+ Dir.glob("{lib}/**/*") +
6
+ Dir.glob("ext/**/*.{c,rb,rl}") +
7
+ %w[ext/excelsior_reader/excelsior_reader.c] # needed because they are generated later
8
+
9
+ gem_spec = Gem::Specification.new do |gem_spec|
10
+ gem_spec.name = 'excelsior'
11
+ gem_spec.version = '0.0.6'
12
+ gem_spec.summary = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
13
+ gem_spec.description = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
14
+ gem_spec.email = 'matt@toastyapps.com'
15
+ gem_spec.homepage = 'http://github.com/toastyapps/excelsior'
16
+ gem_spec.authors = ["Matthew Mongeau"]
17
+ gem_spec.files = PKG_FILES
18
+ gem_spec.extensions = FileList["ext/**/extconf.rb"].to_a
19
+ end
20
+
21
+ desc "Generate a gemspec file"
22
+ task :gemspec do
23
+ File.open("#{gem_spec.name}.gemspec", "w") do |f|
24
+ f.write gem_spec.to_yaml
25
+ end
26
+ end
@@ -0,0 +1,336 @@
1
+
2
+ #line 1 "excelsior_reader.rl"
3
+ #include <ruby.h>
4
+
5
+ static ID s_read;
6
+ VALUE arr;
7
+ int has_found = 0;
8
+ #define BUFSIZE 16384
9
+
10
+
11
+ #line 23 "excelsior_reader.rl"
12
+
13
+
14
+
15
+ #line 16 "excelsior_reader.c"
16
+ static const char _excelsior_scan_actions[] = {
17
+ 0, 1, 2, 1, 7, 1, 8, 1,
18
+ 9, 1, 10, 1, 11, 2, 0, 1,
19
+ 2, 3, 4, 2, 3, 5, 2, 3,
20
+ 6
21
+ };
22
+
23
+ static const char _excelsior_scan_key_offsets[] = {
24
+ 0, 0, 1, 8, 12, 13
25
+ };
26
+
27
+ static const char _excelsior_scan_trans_keys[] = {
28
+ 34, 10, 13, 32, 34, 44, 9, 12,
29
+ 10, 13, 34, 44, 10, 34, 0
30
+ };
31
+
32
+ static const char _excelsior_scan_single_lengths[] = {
33
+ 0, 1, 5, 4, 1, 1
34
+ };
35
+
36
+ static const char _excelsior_scan_range_lengths[] = {
37
+ 0, 0, 1, 0, 0, 0
38
+ };
39
+
40
+ static const char _excelsior_scan_index_offsets[] = {
41
+ 0, 0, 2, 9, 14, 16
42
+ };
43
+
44
+ static const char _excelsior_scan_trans_targs[] = {
45
+ 5, 1, 2, 4, 3, 1, 2, 3,
46
+ 3, 2, 2, 2, 2, 3, 2, 2,
47
+ 1, 2, 2, 2, 2, 2, 0
48
+ };
49
+
50
+ static const char _excelsior_scan_trans_actions[] = {
51
+ 22, 0, 3, 0, 16, 0, 5, 16,
52
+ 19, 11, 11, 11, 11, 19, 3, 7,
53
+ 0, 9, 11, 11, 7, 9, 0
54
+ };
55
+
56
+ static const char _excelsior_scan_to_state_actions[] = {
57
+ 0, 0, 13, 0, 0, 0
58
+ };
59
+
60
+ static const char _excelsior_scan_from_state_actions[] = {
61
+ 0, 0, 1, 0, 0, 0
62
+ };
63
+
64
+ static const char _excelsior_scan_eof_trans[] = {
65
+ 0, 20, 0, 20, 21, 22
66
+ };
67
+
68
+ static const int excelsior_scan_start = 2;
69
+ static const int excelsior_scan_error = 0;
70
+
71
+ static const int excelsior_scan_en_main = 2;
72
+
73
+
74
+ #line 26 "excelsior_reader.rl"
75
+
76
+
77
+ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
78
+
79
+ int cs, act, have = 0, nread = 0, curline = 1, text = 0;
80
+ char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
81
+ int buffer_size = BUFSIZE;
82
+
83
+ has_found = 0;
84
+ VALUE io;
85
+ int is_io = 0;
86
+ int done = 0;
87
+
88
+ arr = rb_ary_new();
89
+ rb_scan_args(argc, argv, "1", &io);
90
+
91
+ is_io = rb_respond_to(io, s_read);
92
+ buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
93
+
94
+
95
+ #line 96 "excelsior_reader.c"
96
+ {
97
+ cs = excelsior_scan_start;
98
+ ts = 0;
99
+ te = 0;
100
+ act = 0;
101
+ }
102
+
103
+ #line 46 "excelsior_reader.rl"
104
+
105
+ while(!done) {
106
+
107
+ int len, space = buffer_size - have;
108
+ VALUE str;
109
+ char *p, *pe;
110
+ p = buf + have;
111
+
112
+ if(is_io) {
113
+ str = rb_funcall(io, s_read, 1, INT2FIX(space));
114
+ len = RSTRING_LEN(str);
115
+ memcpy(p, StringValuePtr(str), len);
116
+ } else {
117
+ // Going to assume it's a string and already in memory
118
+ //str = io;
119
+ len = buffer_size = RSTRING_LEN(io);
120
+ memcpy(p, StringValuePtr(io), len);
121
+ space = buffer_size - have;
122
+ pe = p + buffer_size;
123
+ eof = pe;
124
+ done = 1;
125
+ }
126
+
127
+ if(len < space) {
128
+ done = 1;
129
+ //p[len++] = 0; can't seem to get it to work with this
130
+ pe = p + len;
131
+ eof = pe;
132
+ } else {
133
+ pe = p + len;
134
+ }
135
+
136
+
137
+ #line 138 "excelsior_reader.c"
138
+ {
139
+ int _klen;
140
+ unsigned int _trans;
141
+ const char *_acts;
142
+ unsigned int _nacts;
143
+ const char *_keys;
144
+
145
+ if ( p == pe )
146
+ goto _test_eof;
147
+ if ( cs == 0 )
148
+ goto _out;
149
+ _resume:
150
+ _acts = _excelsior_scan_actions + _excelsior_scan_from_state_actions[cs];
151
+ _nacts = (unsigned int) *_acts++;
152
+ while ( _nacts-- > 0 ) {
153
+ switch ( *_acts++ ) {
154
+ case 2:
155
+ #line 1 "excelsior_reader.rl"
156
+ {ts = p;}
157
+ break;
158
+ #line 159 "excelsior_reader.c"
159
+ }
160
+ }
161
+
162
+ _keys = _excelsior_scan_trans_keys + _excelsior_scan_key_offsets[cs];
163
+ _trans = _excelsior_scan_index_offsets[cs];
164
+
165
+ _klen = _excelsior_scan_single_lengths[cs];
166
+ if ( _klen > 0 ) {
167
+ const char *_lower = _keys;
168
+ const char *_mid;
169
+ const char *_upper = _keys + _klen - 1;
170
+ while (1) {
171
+ if ( _upper < _lower )
172
+ break;
173
+
174
+ _mid = _lower + ((_upper-_lower) >> 1);
175
+ if ( (*p) < *_mid )
176
+ _upper = _mid - 1;
177
+ else if ( (*p) > *_mid )
178
+ _lower = _mid + 1;
179
+ else {
180
+ _trans += (_mid - _keys);
181
+ goto _match;
182
+ }
183
+ }
184
+ _keys += _klen;
185
+ _trans += _klen;
186
+ }
187
+
188
+ _klen = _excelsior_scan_range_lengths[cs];
189
+ if ( _klen > 0 ) {
190
+ const char *_lower = _keys;
191
+ const char *_mid;
192
+ const char *_upper = _keys + (_klen<<1) - 2;
193
+ while (1) {
194
+ if ( _upper < _lower )
195
+ break;
196
+
197
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
198
+ if ( (*p) < _mid[0] )
199
+ _upper = _mid - 2;
200
+ else if ( (*p) > _mid[1] )
201
+ _lower = _mid + 2;
202
+ else {
203
+ _trans += ((_mid - _keys)>>1);
204
+ goto _match;
205
+ }
206
+ }
207
+ _trans += _klen;
208
+ }
209
+
210
+ _match:
211
+ _eof_trans:
212
+ cs = _excelsior_scan_trans_targs[_trans];
213
+
214
+ if ( _excelsior_scan_trans_actions[_trans] == 0 )
215
+ goto _again;
216
+
217
+ _acts = _excelsior_scan_actions + _excelsior_scan_trans_actions[_trans];
218
+ _nacts = (unsigned int) *_acts++;
219
+ while ( _nacts-- > 0 )
220
+ {
221
+ switch ( *_acts++ )
222
+ {
223
+ case 3:
224
+ #line 1 "excelsior_reader.rl"
225
+ {te = p+1;}
226
+ break;
227
+ case 4:
228
+ #line 18 "excelsior_reader.rl"
229
+ {act = 2;}
230
+ break;
231
+ case 5:
232
+ #line 19 "excelsior_reader.rl"
233
+ {act = 3;}
234
+ break;
235
+ case 6:
236
+ #line 20 "excelsior_reader.rl"
237
+ {act = 4;}
238
+ break;
239
+ case 7:
240
+ #line 17 "excelsior_reader.rl"
241
+ {te = p+1;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
242
+ break;
243
+ case 8:
244
+ #line 21 "excelsior_reader.rl"
245
+ {te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
246
+ break;
247
+ case 9:
248
+ #line 17 "excelsior_reader.rl"
249
+ {te = p;p--;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
250
+ break;
251
+ case 10:
252
+ #line 20 "excelsior_reader.rl"
253
+ {te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
254
+ break;
255
+ case 11:
256
+ #line 1 "excelsior_reader.rl"
257
+ { switch( act ) {
258
+ case 0:
259
+ {{cs = 0; goto _again;}}
260
+ break;
261
+ case 3:
262
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;}
263
+ break;
264
+ case 4:
265
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
266
+ break;
267
+ default:
268
+ {{p = ((te))-1;}}
269
+ break;
270
+ }
271
+ }
272
+ break;
273
+ #line 274 "excelsior_reader.c"
274
+ }
275
+ }
276
+
277
+ _again:
278
+ _acts = _excelsior_scan_actions + _excelsior_scan_to_state_actions[cs];
279
+ _nacts = (unsigned int) *_acts++;
280
+ while ( _nacts-- > 0 ) {
281
+ switch ( *_acts++ ) {
282
+ case 0:
283
+ #line 1 "excelsior_reader.rl"
284
+ {ts = 0;}
285
+ break;
286
+ case 1:
287
+ #line 1 "excelsior_reader.rl"
288
+ {act = 0;}
289
+ break;
290
+ #line 291 "excelsior_reader.c"
291
+ }
292
+ }
293
+
294
+ if ( cs == 0 )
295
+ goto _out;
296
+ if ( ++p != pe )
297
+ goto _resume;
298
+ _test_eof: {}
299
+ if ( p == eof )
300
+ {
301
+ if ( _excelsior_scan_eof_trans[cs] > 0 ) {
302
+ _trans = _excelsior_scan_eof_trans[cs] - 1;
303
+ goto _eof_trans;
304
+ }
305
+ }
306
+
307
+ _out: {}
308
+ }
309
+
310
+ #line 79 "excelsior_reader.rl"
311
+
312
+ if(ts != 0) { // we are not at the end
313
+ have = pe - ts; //so copy stuff back in
314
+ memmove(buf, ts, have);
315
+ te = buf + (te - ts);
316
+ ts = buf;
317
+ }
318
+
319
+ }
320
+
321
+ if(RARRAY_LEN(arr) > 0) { // have a last array to yield
322
+ rb_yield(arr);
323
+ }
324
+
325
+ return Qnil;
326
+ }
327
+
328
+ VALUE mExcelsior;
329
+ VALUE cReader;
330
+
331
+ void Init_excelsior_reader() {
332
+ s_read = rb_intern("read");
333
+ mExcelsior = rb_define_module("Excelsior");
334
+ cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
335
+ rb_define_singleton_method(cReader, "rows", e_rows, -1);
336
+ }
@@ -0,0 +1,104 @@
1
+ #include <ruby.h>
2
+
3
+ static ID s_read;
4
+ VALUE arr;
5
+ int has_found = 0;
6
+ #define BUFSIZE 16384
7
+
8
+ %%{
9
+ machine excelsior_scan;
10
+ delimeter = ",";
11
+ newline = "\r"? "\n" | "\r" | "\n";
12
+ schar = any - '"';
13
+ letter = any - delimeter - '"' - newline;
14
+ string = '"' (schar | '""')* '"' ;
15
+ value = letter+;
16
+ main := |*
17
+ newline { rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
18
+ space;
19
+ value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
20
+ string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
21
+ delimeter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
22
+ *|;
23
+ }%%
24
+
25
+ %% write data nofinal;
26
+
27
+
28
+ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
29
+
30
+ int cs, act, have = 0, nread = 0, curline = 1, text = 0;
31
+ char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
32
+ int buffer_size = BUFSIZE;
33
+
34
+ has_found = 0;
35
+ VALUE io;
36
+ int is_io = 0;
37
+ int done = 0;
38
+
39
+ arr = rb_ary_new();
40
+ rb_scan_args(argc, argv, "1", &io);
41
+
42
+ is_io = rb_respond_to(io, s_read);
43
+ buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
44
+
45
+ %% write init;
46
+
47
+ while(!done) {
48
+
49
+ int len, space = buffer_size - have;
50
+ VALUE str;
51
+ char *p, *pe;
52
+ p = buf + have;
53
+
54
+ if(is_io) {
55
+ str = rb_funcall(io, s_read, 1, INT2FIX(space));
56
+ len = RSTRING_LEN(str);
57
+ memcpy(p, StringValuePtr(str), len);
58
+ } else {
59
+ // Going to assume it's a string and already in memory
60
+ //str = io;
61
+ len = buffer_size = RSTRING_LEN(io);
62
+ memcpy(p, StringValuePtr(io), len);
63
+ space = buffer_size - have;
64
+ pe = p + buffer_size;
65
+ eof = pe;
66
+ done = 1;
67
+ }
68
+
69
+ if(len < space) {
70
+ done = 1;
71
+ //p[len++] = 0; can't seem to get it to work with this
72
+ pe = p + len;
73
+ eof = pe;
74
+ } else {
75
+ pe = p + len;
76
+ }
77
+
78
+ %% write exec;
79
+
80
+ if(ts != 0) { // we are not at the end
81
+ have = pe - ts; //so copy stuff back in
82
+ memmove(buf, ts, have);
83
+ te = buf + (te - ts);
84
+ ts = buf;
85
+ }
86
+
87
+ }
88
+
89
+ if(RARRAY_LEN(arr) > 0) { // have a last array to yield
90
+ rb_yield(arr);
91
+ }
92
+
93
+ return Qnil;
94
+ }
95
+
96
+ VALUE mExcelsior;
97
+ VALUE cReader;
98
+
99
+ void Init_excelsior_reader() {
100
+ s_read = rb_intern("read");
101
+ mExcelsior = rb_define_module("Excelsior");
102
+ cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
103
+ rb_define_singleton_method(cReader, "rows", e_rows, -1);
104
+ }
@@ -0,0 +1,5 @@
1
+ require 'mkmf'
2
+
3
+ dir_config('excelsior')
4
+ have_library("c", "main")
5
+ create_makefile('excelsior_reader')
@@ -0,0 +1,7 @@
1
+ require 'excelsior_reader'
2
+
3
+ module Excelsior
4
+ def self.version
5
+ "0.0.1"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: excelsior
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.6
5
+ platform: ruby
6
+ authors:
7
+ - Matthew Mongeau
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-08-24 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A Ruby gem that uses C bindings to read CSV files superfast. I'm totally serial!
17
+ email: matt@toastyapps.com
18
+ executables: []
19
+
20
+ extensions:
21
+ - ext/excelsior_reader/extconf.rb
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - Rakefile
26
+ - lib/excelsior.rb
27
+ - ext/excelsior_reader/excelsior_reader.c
28
+ - ext/excelsior_reader/extconf.rb
29
+ - ext/excelsior_reader/excelsior_reader.rl
30
+ has_rdoc: true
31
+ homepage: http://github.com/toastyapps/excelsior
32
+ licenses: []
33
+
34
+ post_install_message:
35
+ rdoc_options: []
36
+
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: "0"
50
+ version:
51
+ requirements: []
52
+
53
+ rubyforge_project:
54
+ rubygems_version: 1.3.4
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: A Ruby gem that uses C bindings to read CSV files superfast. I'm totally serial!
58
+ test_files: []
59
+