toastyapps-excelsior 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -8,7 +8,7 @@ PKG_FILES = %w(Rakefile) +
8
8
 
9
9
  gem_spec = Gem::Specification.new do |gem_spec|
10
10
  gem_spec.name = 'excelsior'
11
- gem_spec.version = '0.0.4'
11
+ gem_spec.version = '0.0.5'
12
12
  gem_spec.summary = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
13
13
  gem_spec.description = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
14
14
  gem_spec.email = 'matt@toastyapps.com'
@@ -1,26 +1,18 @@
1
1
 
2
2
  #line 1 "excelsior_reader.rl"
3
- #include "ruby.h"
4
-
5
- VALUE e_parse(VALUE self, VALUE data) {
6
- int cs, act, curline = 1, len = 0;
7
- char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
8
- char *p, *pe;
9
- VALUE rows;
10
- VALUE arr;
11
- rows = rb_ary_new();
12
- arr = rb_ary_new();
13
- len = RSTRING(data)->len;
14
- p = RSTRING(data)->ptr;
15
- pe = p + len;
16
- eof = pe;
17
- int has_found = 0;
18
-
19
- #line 31 "excelsior_reader.rl"
3
+ #include <ruby.h>
4
+
5
+ static ID s_read;
6
+ VALUE arr;
7
+ int has_found = 0;
8
+ #define BUFSIZE 16384
9
+
20
10
 
11
+ #line 23 "excelsior_reader.rl"
21
12
 
22
13
 
23
- #line 24 "excelsior_reader.c"
14
+
15
+ #line 16 "excelsior_reader.c"
24
16
  static const char _excelsior_scan_actions[] = {
25
17
  0, 1, 2, 1, 7, 1, 8, 1,
26
18
  9, 1, 10, 1, 11, 2, 0, 1,
@@ -79,9 +71,28 @@ static const int excelsior_scan_error = 0;
79
71
  static const int excelsior_scan_en_main = 2;
80
72
 
81
73
 
82
- #line 34 "excelsior_reader.rl"
83
-
84
- #line 85 "excelsior_reader.c"
74
+ #line 26 "excelsior_reader.rl"
75
+
76
+
77
+ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
78
+
79
+ int cs, act, have = 0, nread = 0, curline = 1, text = 0;
80
+ char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
81
+ int buffer_size = BUFSIZE;
82
+
83
+ has_found = 0;
84
+ VALUE io;
85
+ int is_io = 0;
86
+ int done = 0;
87
+
88
+ arr = rb_ary_new();
89
+ rb_scan_args(argc, argv, "1", &io);
90
+
91
+ is_io = rb_respond_to(io, s_read);
92
+ buf = ALLOC_N(char, buffer_size);
93
+
94
+
95
+ #line 96 "excelsior_reader.c"
85
96
  {
86
97
  cs = excelsior_scan_start;
87
98
  ts = 0;
@@ -89,9 +100,41 @@ static const int excelsior_scan_en_main = 2;
89
100
  act = 0;
90
101
  }
91
102
 
92
- #line 35 "excelsior_reader.rl"
93
-
94
- #line 95 "excelsior_reader.c"
103
+ #line 46 "excelsior_reader.rl"
104
+
105
+ while(!done) {
106
+
107
+ int len, space = buffer_size - have;
108
+ VALUE str;
109
+ char *p, *pe;
110
+ p = buf + have;
111
+
112
+ if(is_io) {
113
+ str = rb_funcall(io, s_read, 1, INT2FIX(space));
114
+ len = RSTRING_LEN(str);
115
+ memcpy(p, StringValuePtr(str), len);
116
+ } else {
117
+ // Going to assume it's a string and already in memory
118
+ //str = io;
119
+ len = buffer_size = RSTRING_LEN(io);
120
+ memcpy(p, StringValuePtr(io), len);
121
+ space = buffer_size - have;
122
+ pe = p + buffer_size;
123
+ eof = pe;
124
+ done = 1;
125
+ }
126
+
127
+ if(len < space) {
128
+ done = 1;
129
+ //p[len++] = 0; can't seem to get it to work with this
130
+ pe = p + len;
131
+ eof = pe;
132
+ } else {
133
+ pe = p + len;
134
+ }
135
+
136
+
137
+ #line 138 "excelsior_reader.c"
95
138
  {
96
139
  int _klen;
97
140
  unsigned int _trans;
@@ -112,7 +155,7 @@ _resume:
112
155
  #line 1 "excelsior_reader.rl"
113
156
  {ts = p;}
114
157
  break;
115
- #line 116 "excelsior_reader.c"
158
+ #line 159 "excelsior_reader.c"
116
159
  }
117
160
  }
118
161
 
@@ -182,31 +225,31 @@ _eof_trans:
182
225
  {te = p+1;}
183
226
  break;
184
227
  case 4:
185
- #line 26 "excelsior_reader.rl"
228
+ #line 18 "excelsior_reader.rl"
186
229
  {act = 2;}
187
230
  break;
188
231
  case 5:
189
- #line 27 "excelsior_reader.rl"
232
+ #line 19 "excelsior_reader.rl"
190
233
  {act = 3;}
191
234
  break;
192
235
  case 6:
193
- #line 28 "excelsior_reader.rl"
236
+ #line 20 "excelsior_reader.rl"
194
237
  {act = 4;}
195
238
  break;
196
239
  case 7:
197
- #line 25 "excelsior_reader.rl"
198
- {te = p+1;{ rb_ary_push(rows, arr); arr = rb_ary_new(); has_found = 0; }}
240
+ #line 17 "excelsior_reader.rl"
241
+ {te = p+1;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
199
242
  break;
200
243
  case 8:
201
- #line 29 "excelsior_reader.rl"
244
+ #line 21 "excelsior_reader.rl"
202
245
  {te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
203
246
  break;
204
247
  case 9:
205
- #line 25 "excelsior_reader.rl"
206
- {te = p;p--;{ rb_ary_push(rows, arr); arr = rb_ary_new(); has_found = 0; }}
248
+ #line 17 "excelsior_reader.rl"
249
+ {te = p;p--;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
207
250
  break;
208
251
  case 10:
209
- #line 28 "excelsior_reader.rl"
252
+ #line 20 "excelsior_reader.rl"
210
253
  {te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
211
254
  break;
212
255
  case 11:
@@ -227,7 +270,7 @@ _eof_trans:
227
270
  }
228
271
  }
229
272
  break;
230
- #line 231 "excelsior_reader.c"
273
+ #line 274 "excelsior_reader.c"
231
274
  }
232
275
  }
233
276
 
@@ -244,7 +287,7 @@ _again:
244
287
  #line 1 "excelsior_reader.rl"
245
288
  {act = 0;}
246
289
  break;
247
- #line 248 "excelsior_reader.c"
290
+ #line 291 "excelsior_reader.c"
248
291
  }
249
292
  }
250
293
 
@@ -264,18 +307,30 @@ _again:
264
307
  _out: {}
265
308
  }
266
309
 
267
- #line 36 "excelsior_reader.rl"
268
- if(RARRAY_LEN(arr) > 0) {
269
- rb_ary_push(rows, arr);
270
- }
271
- return rows;
310
+ #line 79 "excelsior_reader.rl"
311
+
312
+ if(ts != 0) { // we are not at the end
313
+ have = pe - ts; //so copy stuff back in
314
+ memmove(buf, ts, have);
315
+ te = buf + (te - ts);
316
+ ts = buf;
317
+ }
318
+
319
+ }
320
+
321
+ if(RARRAY_LEN(arr) > 0) { // have a last array to yield
322
+ rb_yield(arr);
323
+ }
324
+
325
+ return Qnil;
272
326
  }
273
327
 
274
328
  VALUE mExcelsior;
275
329
  VALUE cReader;
276
330
 
277
331
  void Init_excelsior_reader() {
278
- mExcelsior = rb_define_module("Excelsior");
279
- cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
280
- rb_define_singleton_method(cReader, "parse", e_parse, 1);
332
+ s_read = rb_intern("read");
333
+ mExcelsior = rb_define_module("Excelsior");
334
+ cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
335
+ rb_define_singleton_method(cReader, "rows", e_rows, -1);
281
336
  }
@@ -1,19 +1,11 @@
1
- #include "ruby.h"
1
+ #include <ruby.h>
2
2
 
3
- VALUE e_parse(VALUE self, VALUE data) {
4
- int cs, act, curline = 1, len = 0;
5
- char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
6
- char *p, *pe;
7
- VALUE rows;
8
- VALUE arr;
9
- rows = rb_ary_new();
10
- arr = rb_ary_new();
11
- len = RSTRING(data)->len;
12
- p = RSTRING(data)->ptr;
13
- pe = p + len;
14
- eof = pe;
15
- int has_found = 0;
16
- %%{
3
+ static ID s_read;
4
+ VALUE arr;
5
+ int has_found = 0;
6
+ #define BUFSIZE 16384
7
+
8
+ %%{
17
9
  machine excelsior_scan;
18
10
  delimeter = ",";
19
11
  newline = "\r"? "\n" | "\r" | "\n";
@@ -22,28 +14,91 @@ VALUE e_parse(VALUE self, VALUE data) {
22
14
  string = '"' (schar | '""')* '"' ;
23
15
  value = letter+;
24
16
  main := |*
25
- newline { rb_ary_push(rows, arr); arr = rb_ary_new(); has_found = 0; };
17
+ newline { rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
26
18
  space;
27
19
  value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
28
20
  string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
29
21
  delimeter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
30
22
  *|;
31
- }%%
23
+ }%%
24
+
25
+ %% write data nofinal;
26
+
32
27
 
33
- %% write data nofinal;
34
- %% write init;
35
- %% write exec;
36
- if(RARRAY_LEN(arr) > 0) {
37
- rb_ary_push(rows, arr);
38
- }
39
- return rows;
28
+ VALUE e_rows(int argc, VALUE *argv, VALUE self) {
29
+
30
+ int cs, act, have = 0, nread = 0, curline = 1, text = 0;
31
+ char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
32
+ int buffer_size = BUFSIZE;
33
+
34
+ has_found = 0;
35
+ VALUE io;
36
+ int is_io = 0;
37
+ int done = 0;
38
+
39
+ arr = rb_ary_new();
40
+ rb_scan_args(argc, argv, "1", &io);
41
+
42
+ is_io = rb_respond_to(io, s_read);
43
+ buf = ALLOC_N(char, buffer_size);
44
+
45
+ %% write init;
46
+
47
+ while(!done) {
48
+
49
+ int len, space = buffer_size - have;
50
+ VALUE str;
51
+ char *p, *pe;
52
+ p = buf + have;
53
+
54
+ if(is_io) {
55
+ str = rb_funcall(io, s_read, 1, INT2FIX(space));
56
+ len = RSTRING_LEN(str);
57
+ memcpy(p, StringValuePtr(str), len);
58
+ } else {
59
+ // Going to assume it's a string and already in memory
60
+ //str = io;
61
+ len = buffer_size = RSTRING_LEN(io);
62
+ memcpy(p, StringValuePtr(io), len);
63
+ space = buffer_size - have;
64
+ pe = p + buffer_size;
65
+ eof = pe;
66
+ done = 1;
67
+ }
68
+
69
+ if(len < space) {
70
+ done = 1;
71
+ //p[len++] = 0; can't seem to get it to work with this
72
+ pe = p + len;
73
+ eof = pe;
74
+ } else {
75
+ pe = p + len;
76
+ }
77
+
78
+ %% write exec;
79
+
80
+ if(ts != 0) { // we are not at the end
81
+ have = pe - ts; //so copy stuff back in
82
+ memmove(buf, ts, have);
83
+ te = buf + (te - ts);
84
+ ts = buf;
85
+ }
86
+
87
+ }
88
+
89
+ if(RARRAY_LEN(arr) > 0) { // have a last array to yield
90
+ rb_yield(arr);
91
+ }
92
+
93
+ return Qnil;
40
94
  }
41
95
 
42
96
  VALUE mExcelsior;
43
97
  VALUE cReader;
44
98
 
45
99
  void Init_excelsior_reader() {
46
- mExcelsior = rb_define_module("Excelsior");
47
- cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
48
- rb_define_singleton_method(cReader, "parse", e_parse, 1);
100
+ s_read = rb_intern("read");
101
+ mExcelsior = rb_define_module("Excelsior");
102
+ cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
103
+ rb_define_singleton_method(cReader, "rows", e_rows, -1);
49
104
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: toastyapps-excelsior
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthew Mongeau
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-17 21:00:00 -07:00
12
+ date: 2009-08-20 21:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15