excelsior 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+
4
+ PKG_FILES = %w(Rakefile) +
5
+ Dir.glob("{lib}/**/*") +
6
+ Dir.glob("ext/**/*.{c,rb,rl}") +
7
+ %w[ext/excelsior_reader/excelsior_reader.c] # needed because they are generated later
8
+
9
+ gem_spec = Gem::Specification.new do |gem_spec|
10
+ gem_spec.name = 'excelsior'
11
+ gem_spec.version = '0.0.6'
12
+ gem_spec.summary = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
13
+ gem_spec.description = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
14
+ gem_spec.email = 'matt@toastyapps.com'
15
+ gem_spec.homepage = 'http://github.com/toastyapps/excelsior'
16
+ gem_spec.authors = ["Matthew Mongeau"]
17
+ gem_spec.files = PKG_FILES
18
+ gem_spec.extensions = FileList["ext/**/extconf.rb"].to_a
19
+ end
20
+
21
+ desc "Generate a gemspec file"
22
+ task :gemspec do
23
+ File.open("#{gem_spec.name}.gemspec", "w") do |f|
24
+ f.write gem_spec.to_yaml
25
+ end
26
+ end
@@ -0,0 +1,336 @@
1
+
2
+ #line 1 "excelsior_reader.rl"
3
+ #include <ruby.h>
4
+
5
+ static ID s_read;
6
+ VALUE arr;
7
+ int has_found = 0;
8
+ #define BUFSIZE 16384
9
+
10
+
11
+ #line 23 "excelsior_reader.rl"
12
+
13
+
14
+
15
+ #line 16 "excelsior_reader.c"
16
+ static const char _excelsior_scan_actions[] = {
17
+ 0, 1, 2, 1, 7, 1, 8, 1,
18
+ 9, 1, 10, 1, 11, 2, 0, 1,
19
+ 2, 3, 4, 2, 3, 5, 2, 3,
20
+ 6
21
+ };
22
+
23
+ static const char _excelsior_scan_key_offsets[] = {
24
+ 0, 0, 1, 8, 12, 13
25
+ };
26
+
27
+ static const char _excelsior_scan_trans_keys[] = {
28
+ 34, 10, 13, 32, 34, 44, 9, 12,
29
+ 10, 13, 34, 44, 10, 34, 0
30
+ };
31
+
32
+ static const char _excelsior_scan_single_lengths[] = {
33
+ 0, 1, 5, 4, 1, 1
34
+ };
35
+
36
+ static const char _excelsior_scan_range_lengths[] = {
37
+ 0, 0, 1, 0, 0, 0
38
+ };
39
+
40
+ static const char _excelsior_scan_index_offsets[] = {
41
+ 0, 0, 2, 9, 14, 16
42
+ };
43
+
44
+ static const char _excelsior_scan_trans_targs[] = {
45
+ 5, 1, 2, 4, 3, 1, 2, 3,
46
+ 3, 2, 2, 2, 2, 3, 2, 2,
47
+ 1, 2, 2, 2, 2, 2, 0
48
+ };
49
+
50
+ static const char _excelsior_scan_trans_actions[] = {
51
+ 22, 0, 3, 0, 16, 0, 5, 16,
52
+ 19, 11, 11, 11, 11, 19, 3, 7,
53
+ 0, 9, 11, 11, 7, 9, 0
54
+ };
55
+
56
+ static const char _excelsior_scan_to_state_actions[] = {
57
+ 0, 0, 13, 0, 0, 0
58
+ };
59
+
60
+ static const char _excelsior_scan_from_state_actions[] = {
61
+ 0, 0, 1, 0, 0, 0
62
+ };
63
+
64
+ static const char _excelsior_scan_eof_trans[] = {
65
+ 0, 20, 0, 20, 21, 22
66
+ };
67
+
68
+ static const int excelsior_scan_start = 2;
69
+ static const int excelsior_scan_error = 0;
70
+
71
+ static const int excelsior_scan_en_main = 2;
72
+
73
+
74
+ #line 26 "excelsior_reader.rl"
75
+
76
+
77
+ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
78
+
79
+ int cs, act, have = 0, nread = 0, curline = 1, text = 0;
80
+ char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
81
+ int buffer_size = BUFSIZE;
82
+
83
+ has_found = 0;
84
+ VALUE io;
85
+ int is_io = 0;
86
+ int done = 0;
87
+
88
+ arr = rb_ary_new();
89
+ rb_scan_args(argc, argv, "1", &io);
90
+
91
+ is_io = rb_respond_to(io, s_read);
92
+ buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
93
+
94
+
95
+ #line 96 "excelsior_reader.c"
96
+ {
97
+ cs = excelsior_scan_start;
98
+ ts = 0;
99
+ te = 0;
100
+ act = 0;
101
+ }
102
+
103
+ #line 46 "excelsior_reader.rl"
104
+
105
+ while(!done) {
106
+
107
+ int len, space = buffer_size - have;
108
+ VALUE str;
109
+ char *p, *pe;
110
+ p = buf + have;
111
+
112
+ if(is_io) {
113
+ str = rb_funcall(io, s_read, 1, INT2FIX(space));
114
+ len = RSTRING_LEN(str);
115
+ memcpy(p, StringValuePtr(str), len);
116
+ } else {
117
+ // Going to assume it's a string and already in memory
118
+ //str = io;
119
+ len = buffer_size = RSTRING_LEN(io);
120
+ memcpy(p, StringValuePtr(io), len);
121
+ space = buffer_size - have;
122
+ pe = p + buffer_size;
123
+ eof = pe;
124
+ done = 1;
125
+ }
126
+
127
+ if(len < space) {
128
+ done = 1;
129
+ //p[len++] = 0; can't seem to get it to work with this
130
+ pe = p + len;
131
+ eof = pe;
132
+ } else {
133
+ pe = p + len;
134
+ }
135
+
136
+
137
+ #line 138 "excelsior_reader.c"
138
+ {
139
+ int _klen;
140
+ unsigned int _trans;
141
+ const char *_acts;
142
+ unsigned int _nacts;
143
+ const char *_keys;
144
+
145
+ if ( p == pe )
146
+ goto _test_eof;
147
+ if ( cs == 0 )
148
+ goto _out;
149
+ _resume:
150
+ _acts = _excelsior_scan_actions + _excelsior_scan_from_state_actions[cs];
151
+ _nacts = (unsigned int) *_acts++;
152
+ while ( _nacts-- > 0 ) {
153
+ switch ( *_acts++ ) {
154
+ case 2:
155
+ #line 1 "excelsior_reader.rl"
156
+ {ts = p;}
157
+ break;
158
+ #line 159 "excelsior_reader.c"
159
+ }
160
+ }
161
+
162
+ _keys = _excelsior_scan_trans_keys + _excelsior_scan_key_offsets[cs];
163
+ _trans = _excelsior_scan_index_offsets[cs];
164
+
165
+ _klen = _excelsior_scan_single_lengths[cs];
166
+ if ( _klen > 0 ) {
167
+ const char *_lower = _keys;
168
+ const char *_mid;
169
+ const char *_upper = _keys + _klen - 1;
170
+ while (1) {
171
+ if ( _upper < _lower )
172
+ break;
173
+
174
+ _mid = _lower + ((_upper-_lower) >> 1);
175
+ if ( (*p) < *_mid )
176
+ _upper = _mid - 1;
177
+ else if ( (*p) > *_mid )
178
+ _lower = _mid + 1;
179
+ else {
180
+ _trans += (_mid - _keys);
181
+ goto _match;
182
+ }
183
+ }
184
+ _keys += _klen;
185
+ _trans += _klen;
186
+ }
187
+
188
+ _klen = _excelsior_scan_range_lengths[cs];
189
+ if ( _klen > 0 ) {
190
+ const char *_lower = _keys;
191
+ const char *_mid;
192
+ const char *_upper = _keys + (_klen<<1) - 2;
193
+ while (1) {
194
+ if ( _upper < _lower )
195
+ break;
196
+
197
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
198
+ if ( (*p) < _mid[0] )
199
+ _upper = _mid - 2;
200
+ else if ( (*p) > _mid[1] )
201
+ _lower = _mid + 2;
202
+ else {
203
+ _trans += ((_mid - _keys)>>1);
204
+ goto _match;
205
+ }
206
+ }
207
+ _trans += _klen;
208
+ }
209
+
210
+ _match:
211
+ _eof_trans:
212
+ cs = _excelsior_scan_trans_targs[_trans];
213
+
214
+ if ( _excelsior_scan_trans_actions[_trans] == 0 )
215
+ goto _again;
216
+
217
+ _acts = _excelsior_scan_actions + _excelsior_scan_trans_actions[_trans];
218
+ _nacts = (unsigned int) *_acts++;
219
+ while ( _nacts-- > 0 )
220
+ {
221
+ switch ( *_acts++ )
222
+ {
223
+ case 3:
224
+ #line 1 "excelsior_reader.rl"
225
+ {te = p+1;}
226
+ break;
227
+ case 4:
228
+ #line 18 "excelsior_reader.rl"
229
+ {act = 2;}
230
+ break;
231
+ case 5:
232
+ #line 19 "excelsior_reader.rl"
233
+ {act = 3;}
234
+ break;
235
+ case 6:
236
+ #line 20 "excelsior_reader.rl"
237
+ {act = 4;}
238
+ break;
239
+ case 7:
240
+ #line 17 "excelsior_reader.rl"
241
+ {te = p+1;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
242
+ break;
243
+ case 8:
244
+ #line 21 "excelsior_reader.rl"
245
+ {te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
246
+ break;
247
+ case 9:
248
+ #line 17 "excelsior_reader.rl"
249
+ {te = p;p--;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
250
+ break;
251
+ case 10:
252
+ #line 20 "excelsior_reader.rl"
253
+ {te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
254
+ break;
255
+ case 11:
256
+ #line 1 "excelsior_reader.rl"
257
+ { switch( act ) {
258
+ case 0:
259
+ {{cs = 0; goto _again;}}
260
+ break;
261
+ case 3:
262
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;}
263
+ break;
264
+ case 4:
265
+ {{p = ((te))-1;} rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}
266
+ break;
267
+ default:
268
+ {{p = ((te))-1;}}
269
+ break;
270
+ }
271
+ }
272
+ break;
273
+ #line 274 "excelsior_reader.c"
274
+ }
275
+ }
276
+
277
+ _again:
278
+ _acts = _excelsior_scan_actions + _excelsior_scan_to_state_actions[cs];
279
+ _nacts = (unsigned int) *_acts++;
280
+ while ( _nacts-- > 0 ) {
281
+ switch ( *_acts++ ) {
282
+ case 0:
283
+ #line 1 "excelsior_reader.rl"
284
+ {ts = 0;}
285
+ break;
286
+ case 1:
287
+ #line 1 "excelsior_reader.rl"
288
+ {act = 0;}
289
+ break;
290
+ #line 291 "excelsior_reader.c"
291
+ }
292
+ }
293
+
294
+ if ( cs == 0 )
295
+ goto _out;
296
+ if ( ++p != pe )
297
+ goto _resume;
298
+ _test_eof: {}
299
+ if ( p == eof )
300
+ {
301
+ if ( _excelsior_scan_eof_trans[cs] > 0 ) {
302
+ _trans = _excelsior_scan_eof_trans[cs] - 1;
303
+ goto _eof_trans;
304
+ }
305
+ }
306
+
307
+ _out: {}
308
+ }
309
+
310
+ #line 79 "excelsior_reader.rl"
311
+
312
+ if(ts != 0) { // we are not at the end
313
+ have = pe - ts; //so copy stuff back in
314
+ memmove(buf, ts, have);
315
+ te = buf + (te - ts);
316
+ ts = buf;
317
+ }
318
+
319
+ }
320
+
321
+ if(RARRAY_LEN(arr) > 0) { // have a last array to yield
322
+ rb_yield(arr);
323
+ }
324
+
325
+ return Qnil;
326
+ }
327
+
328
+ VALUE mExcelsior;
329
+ VALUE cReader;
330
+
331
+ void Init_excelsior_reader() {
332
+ s_read = rb_intern("read");
333
+ mExcelsior = rb_define_module("Excelsior");
334
+ cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
335
+ rb_define_singleton_method(cReader, "rows", e_rows, -1);
336
+ }
@@ -0,0 +1,104 @@
1
+ #include <ruby.h>
2
+
3
+ static ID s_read;
4
+ VALUE arr;
5
+ int has_found = 0;
6
+ #define BUFSIZE 16384
7
+
8
+ %%{
9
+ machine excelsior_scan;
10
+ delimeter = ",";
11
+ newline = "\r"? "\n" | "\r" | "\n";
12
+ schar = any - '"';
13
+ letter = any - delimeter - '"' - newline;
14
+ string = '"' (schar | '""')* '"' ;
15
+ value = letter+;
16
+ main := |*
17
+ newline { rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
18
+ space;
19
+ value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
20
+ string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
21
+ delimeter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
22
+ *|;
23
+ }%%
24
+
25
+ %% write data nofinal;
26
+
27
+
28
+ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
29
+
30
+ int cs, act, have = 0, nread = 0, curline = 1, text = 0;
31
+ char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
32
+ int buffer_size = BUFSIZE;
33
+
34
+ has_found = 0;
35
+ VALUE io;
36
+ int is_io = 0;
37
+ int done = 0;
38
+
39
+ arr = rb_ary_new();
40
+ rb_scan_args(argc, argv, "1", &io);
41
+
42
+ is_io = rb_respond_to(io, s_read);
43
+ buf = (char *) malloc(buffer_size); //ALLOC_N(char, buffer_size); <= This caused problems
44
+
45
+ %% write init;
46
+
47
+ while(!done) {
48
+
49
+ int len, space = buffer_size - have;
50
+ VALUE str;
51
+ char *p, *pe;
52
+ p = buf + have;
53
+
54
+ if(is_io) {
55
+ str = rb_funcall(io, s_read, 1, INT2FIX(space));
56
+ len = RSTRING_LEN(str);
57
+ memcpy(p, StringValuePtr(str), len);
58
+ } else {
59
+ // Going to assume it's a string and already in memory
60
+ //str = io;
61
+ len = buffer_size = RSTRING_LEN(io);
62
+ memcpy(p, StringValuePtr(io), len);
63
+ space = buffer_size - have;
64
+ pe = p + buffer_size;
65
+ eof = pe;
66
+ done = 1;
67
+ }
68
+
69
+ if(len < space) {
70
+ done = 1;
71
+ //p[len++] = 0; can't seem to get it to work with this
72
+ pe = p + len;
73
+ eof = pe;
74
+ } else {
75
+ pe = p + len;
76
+ }
77
+
78
+ %% write exec;
79
+
80
+ if(ts != 0) { // we are not at the end
81
+ have = pe - ts; //so copy stuff back in
82
+ memmove(buf, ts, have);
83
+ te = buf + (te - ts);
84
+ ts = buf;
85
+ }
86
+
87
+ }
88
+
89
+ if(RARRAY_LEN(arr) > 0) { // have a last array to yield
90
+ rb_yield(arr);
91
+ }
92
+
93
+ return Qnil;
94
+ }
95
+
96
+ VALUE mExcelsior;
97
+ VALUE cReader;
98
+
99
+ void Init_excelsior_reader() {
100
+ s_read = rb_intern("read");
101
+ mExcelsior = rb_define_module("Excelsior");
102
+ cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
103
+ rb_define_singleton_method(cReader, "rows", e_rows, -1);
104
+ }
@@ -0,0 +1,5 @@
1
+ require 'mkmf'
2
+
3
+ dir_config('excelsior')
4
+ have_library("c", "main")
5
+ create_makefile('excelsior_reader')
@@ -0,0 +1,7 @@
1
+ require 'excelsior_reader'
2
+
3
+ module Excelsior
4
+ def self.version
5
+ "0.0.1"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: excelsior
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.6
5
+ platform: ruby
6
+ authors:
7
+ - Matthew Mongeau
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-08-24 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A Ruby gem that uses C bindings to read CSV files superfast. I'm totally serial!
17
+ email: matt@toastyapps.com
18
+ executables: []
19
+
20
+ extensions:
21
+ - ext/excelsior_reader/extconf.rb
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - Rakefile
26
+ - lib/excelsior.rb
27
+ - ext/excelsior_reader/excelsior_reader.c
28
+ - ext/excelsior_reader/extconf.rb
29
+ - ext/excelsior_reader/excelsior_reader.rl
30
+ has_rdoc: true
31
+ homepage: http://github.com/toastyapps/excelsior
32
+ licenses: []
33
+
34
+ post_install_message:
35
+ rdoc_options: []
36
+
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: "0"
50
+ version:
51
+ requirements: []
52
+
53
+ rubyforge_project:
54
+ rubygems_version: 1.3.4
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: A Ruby gem that uses C bindings to read CSV files superfast. I'm totally serial!
58
+ test_files: []
59
+