toastyapps-excelsior 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -8,7 +8,7 @@ PKG_FILES = %w(Rakefile) +
8
8
 
9
9
  gem_spec = Gem::Specification.new do |gem_spec|
10
10
  gem_spec.name = 'excelsior'
11
- gem_spec.version = '0.0.4'
11
+ gem_spec.version = '0.0.5'
12
12
  gem_spec.summary = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
13
13
  gem_spec.description = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
14
14
  gem_spec.email = 'matt@toastyapps.com'
@@ -1,26 +1,18 @@
1
1
 
2
2
  #line 1 "excelsior_reader.rl"
3
- #include "ruby.h"
4
-
5
- VALUE e_parse(VALUE self, VALUE data) {
6
- int cs, act, curline = 1, len = 0;
7
- char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
8
- char *p, *pe;
9
- VALUE rows;
10
- VALUE arr;
11
- rows = rb_ary_new();
12
- arr = rb_ary_new();
13
- len = RSTRING(data)->len;
14
- p = RSTRING(data)->ptr;
15
- pe = p + len;
16
- eof = pe;
17
- int has_found = 0;
18
-
19
- #line 31 "excelsior_reader.rl"
3
+ #include <ruby.h>
4
+
5
+ static ID s_read;
6
+ VALUE arr;
7
+ int has_found = 0;
8
+ #define BUFSIZE 16384
9
+
20
10
 
11
+ #line 23 "excelsior_reader.rl"
21
12
 
22
13
 
23
- #line 24 "excelsior_reader.c"
14
+
15
+ #line 16 "excelsior_reader.c"
24
16
  static const char _excelsior_scan_actions[] = {
25
17
  0, 1, 2, 1, 7, 1, 8, 1,
26
18
  9, 1, 10, 1, 11, 2, 0, 1,
@@ -79,9 +71,28 @@ static const int excelsior_scan_error = 0;
79
71
  static const int excelsior_scan_en_main = 2;
80
72
 
81
73
 
82
- #line 34 "excelsior_reader.rl"
83
-
84
- #line 85 "excelsior_reader.c"
74
+ #line 26 "excelsior_reader.rl"
75
+
76
+
77
+ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
78
+
79
+ int cs, act, have = 0, nread = 0, curline = 1, text = 0;
80
+ char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
81
+ int buffer_size = BUFSIZE;
82
+
83
+ has_found = 0;
84
+ VALUE io;
85
+ int is_io = 0;
86
+ int done = 0;
87
+
88
+ arr = rb_ary_new();
89
+ rb_scan_args(argc, argv, "1", &io);
90
+
91
+ is_io = rb_respond_to(io, s_read);
92
+ buf = ALLOC_N(char, buffer_size);
93
+
94
+
95
+ #line 96 "excelsior_reader.c"
85
96
  {
86
97
  cs = excelsior_scan_start;
87
98
  ts = 0;
@@ -89,9 +100,41 @@ static const int excelsior_scan_en_main = 2;
89
100
  act = 0;
90
101
  }
91
102
 
92
- #line 35 "excelsior_reader.rl"
93
-
94
- #line 95 "excelsior_reader.c"
103
+ #line 46 "excelsior_reader.rl"
104
+
105
+ while(!done) {
106
+
107
+ int len, space = buffer_size - have;
108
+ VALUE str;
109
+ char *p, *pe;
110
+ p = buf + have;
111
+
112
+ if(is_io) {
113
+ str = rb_funcall(io, s_read, 1, INT2FIX(space));
114
+ len = RSTRING_LEN(str);
115
+ memcpy(p, StringValuePtr(str), len);
116
+ } else {
117
+ // Going to assume it's a string and already in memory
118
+ //str = io;
119
+ len = buffer_size = RSTRING_LEN(io);
120
+ memcpy(p, StringValuePtr(io), len);
121
+ space = buffer_size - have;
122
+ pe = p + buffer_size;
123
+ eof = pe;
124
+ done = 1;
125
+ }
126
+
127
+ if(len < space) {
128
+ done = 1;
129
+ //p[len++] = 0; can't seem to get it to work with this
130
+ pe = p + len;
131
+ eof = pe;
132
+ } else {
133
+ pe = p + len;
134
+ }
135
+
136
+
137
+ #line 138 "excelsior_reader.c"
95
138
  {
96
139
  int _klen;
97
140
  unsigned int _trans;
@@ -112,7 +155,7 @@ _resume:
112
155
  #line 1 "excelsior_reader.rl"
113
156
  {ts = p;}
114
157
  break;
115
- #line 116 "excelsior_reader.c"
158
+ #line 159 "excelsior_reader.c"
116
159
  }
117
160
  }
118
161
 
@@ -182,31 +225,31 @@ _eof_trans:
182
225
  {te = p+1;}
183
226
  break;
184
227
  case 4:
185
- #line 26 "excelsior_reader.rl"
228
+ #line 18 "excelsior_reader.rl"
186
229
  {act = 2;}
187
230
  break;
188
231
  case 5:
189
- #line 27 "excelsior_reader.rl"
232
+ #line 19 "excelsior_reader.rl"
190
233
  {act = 3;}
191
234
  break;
192
235
  case 6:
193
- #line 28 "excelsior_reader.rl"
236
+ #line 20 "excelsior_reader.rl"
194
237
  {act = 4;}
195
238
  break;
196
239
  case 7:
197
- #line 25 "excelsior_reader.rl"
198
- {te = p+1;{ rb_ary_push(rows, arr); arr = rb_ary_new(); has_found = 0; }}
240
+ #line 17 "excelsior_reader.rl"
241
+ {te = p+1;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
199
242
  break;
200
243
  case 8:
201
- #line 29 "excelsior_reader.rl"
244
+ #line 21 "excelsior_reader.rl"
202
245
  {te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
203
246
  break;
204
247
  case 9:
205
- #line 25 "excelsior_reader.rl"
206
- {te = p;p--;{ rb_ary_push(rows, arr); arr = rb_ary_new(); has_found = 0; }}
248
+ #line 17 "excelsior_reader.rl"
249
+ {te = p;p--;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
207
250
  break;
208
251
  case 10:
209
- #line 28 "excelsior_reader.rl"
252
+ #line 20 "excelsior_reader.rl"
210
253
  {te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
211
254
  break;
212
255
  case 11:
@@ -227,7 +270,7 @@ _eof_trans:
227
270
  }
228
271
  }
229
272
  break;
230
- #line 231 "excelsior_reader.c"
273
+ #line 274 "excelsior_reader.c"
231
274
  }
232
275
  }
233
276
 
@@ -244,7 +287,7 @@ _again:
244
287
  #line 1 "excelsior_reader.rl"
245
288
  {act = 0;}
246
289
  break;
247
- #line 248 "excelsior_reader.c"
290
+ #line 291 "excelsior_reader.c"
248
291
  }
249
292
  }
250
293
 
@@ -264,18 +307,30 @@ _again:
264
307
  _out: {}
265
308
  }
266
309
 
267
- #line 36 "excelsior_reader.rl"
268
- if(RARRAY_LEN(arr) > 0) {
269
- rb_ary_push(rows, arr);
270
- }
271
- return rows;
310
+ #line 79 "excelsior_reader.rl"
311
+
312
+ if(ts != 0) { // we are not at the end
313
+ have = pe - ts; //so copy stuff back in
314
+ memmove(buf, ts, have);
315
+ te = buf + (te - ts);
316
+ ts = buf;
317
+ }
318
+
319
+ }
320
+
321
+ if(RARRAY_LEN(arr) > 0) { // have a last array to yield
322
+ rb_yield(arr);
323
+ }
324
+
325
+ return Qnil;
272
326
  }
273
327
 
274
328
  VALUE mExcelsior;
275
329
  VALUE cReader;
276
330
 
277
331
  void Init_excelsior_reader() {
278
- mExcelsior = rb_define_module("Excelsior");
279
- cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
280
- rb_define_singleton_method(cReader, "parse", e_parse, 1);
332
+ s_read = rb_intern("read");
333
+ mExcelsior = rb_define_module("Excelsior");
334
+ cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
335
+ rb_define_singleton_method(cReader, "rows", e_rows, -1);
281
336
  }
@@ -1,19 +1,11 @@
1
- #include "ruby.h"
1
+ #include <ruby.h>
2
2
 
3
- VALUE e_parse(VALUE self, VALUE data) {
4
- int cs, act, curline = 1, len = 0;
5
- char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
6
- char *p, *pe;
7
- VALUE rows;
8
- VALUE arr;
9
- rows = rb_ary_new();
10
- arr = rb_ary_new();
11
- len = RSTRING(data)->len;
12
- p = RSTRING(data)->ptr;
13
- pe = p + len;
14
- eof = pe;
15
- int has_found = 0;
16
- %%{
3
+ static ID s_read;
4
+ VALUE arr;
5
+ int has_found = 0;
6
+ #define BUFSIZE 16384
7
+
8
+ %%{
17
9
  machine excelsior_scan;
18
10
  delimeter = ",";
19
11
  newline = "\r"? "\n" | "\r" | "\n";
@@ -22,28 +14,91 @@ VALUE e_parse(VALUE self, VALUE data) {
22
14
  string = '"' (schar | '""')* '"' ;
23
15
  value = letter+;
24
16
  main := |*
25
- newline { rb_ary_push(rows, arr); arr = rb_ary_new(); has_found = 0; };
17
+ newline { rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
26
18
  space;
27
19
  value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
28
20
  string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
29
21
  delimeter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
30
22
  *|;
31
- }%%
23
+ }%%
24
+
25
+ %% write data nofinal;
26
+
32
27
 
33
- %% write data nofinal;
34
- %% write init;
35
- %% write exec;
36
- if(RARRAY_LEN(arr) > 0) {
37
- rb_ary_push(rows, arr);
38
- }
39
- return rows;
28
+ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
29
+
30
+ int cs, act, have = 0, nread = 0, curline = 1, text = 0;
31
+ char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
32
+ int buffer_size = BUFSIZE;
33
+
34
+ has_found = 0;
35
+ VALUE io;
36
+ int is_io = 0;
37
+ int done = 0;
38
+
39
+ arr = rb_ary_new();
40
+ rb_scan_args(argc, argv, "1", &io);
41
+
42
+ is_io = rb_respond_to(io, s_read);
43
+ buf = ALLOC_N(char, buffer_size);
44
+
45
+ %% write init;
46
+
47
+ while(!done) {
48
+
49
+ int len, space = buffer_size - have;
50
+ VALUE str;
51
+ char *p, *pe;
52
+ p = buf + have;
53
+
54
+ if(is_io) {
55
+ str = rb_funcall(io, s_read, 1, INT2FIX(space));
56
+ len = RSTRING_LEN(str);
57
+ memcpy(p, StringValuePtr(str), len);
58
+ } else {
59
+ // Going to assume it's a string and already in memory
60
+ //str = io;
61
+ len = buffer_size = RSTRING_LEN(io);
62
+ memcpy(p, StringValuePtr(io), len);
63
+ space = buffer_size - have;
64
+ pe = p + buffer_size;
65
+ eof = pe;
66
+ done = 1;
67
+ }
68
+
69
+ if(len < space) {
70
+ done = 1;
71
+ //p[len++] = 0; can't seem to get it to work with this
72
+ pe = p + len;
73
+ eof = pe;
74
+ } else {
75
+ pe = p + len;
76
+ }
77
+
78
+ %% write exec;
79
+
80
+ if(ts != 0) { // we are not at the end
81
+ have = pe - ts; //so copy stuff back in
82
+ memmove(buf, ts, have);
83
+ te = buf + (te - ts);
84
+ ts = buf;
85
+ }
86
+
87
+ }
88
+
89
+ if(RARRAY_LEN(arr) > 0) { // have a last array to yield
90
+ rb_yield(arr);
91
+ }
92
+
93
+ return Qnil;
40
94
  }
41
95
 
42
96
  VALUE mExcelsior;
43
97
  VALUE cReader;
44
98
 
45
99
  void Init_excelsior_reader() {
46
- mExcelsior = rb_define_module("Excelsior");
47
- cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
48
- rb_define_singleton_method(cReader, "parse", e_parse, 1);
100
+ s_read = rb_intern("read");
101
+ mExcelsior = rb_define_module("Excelsior");
102
+ cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
103
+ rb_define_singleton_method(cReader, "rows", e_rows, -1);
49
104
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: toastyapps-excelsior
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthew Mongeau
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-17 21:00:00 -07:00
12
+ date: 2009-08-20 21:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15