toastyapps-excelsior 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/ext/excelsior_reader/excelsior_reader.c +99 -44
- data/ext/excelsior_reader/excelsior_reader.rl +82 -27
- metadata +2 -2
data/Rakefile
CHANGED
@@ -8,7 +8,7 @@ PKG_FILES = %w(Rakefile) +
|
|
8
8
|
|
9
9
|
gem_spec = Gem::Specification.new do |gem_spec|
|
10
10
|
gem_spec.name = 'excelsior'
|
11
|
-
gem_spec.version = '0.0.
|
11
|
+
gem_spec.version = '0.0.5'
|
12
12
|
gem_spec.summary = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
|
13
13
|
gem_spec.description = 'A Ruby gem that uses C bindings to read CSV files superfast. I\'m totally serial!'
|
14
14
|
gem_spec.email = 'matt@toastyapps.com'
|
@@ -1,26 +1,18 @@
|
|
1
1
|
|
2
2
|
#line 1 "excelsior_reader.rl"
|
3
|
-
#include
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
VALUE arr;
|
11
|
-
rows = rb_ary_new();
|
12
|
-
arr = rb_ary_new();
|
13
|
-
len = RSTRING(data)->len;
|
14
|
-
p = RSTRING(data)->ptr;
|
15
|
-
pe = p + len;
|
16
|
-
eof = pe;
|
17
|
-
int has_found = 0;
|
18
|
-
|
19
|
-
#line 31 "excelsior_reader.rl"
|
3
|
+
#include <ruby.h>
|
4
|
+
|
5
|
+
static ID s_read;
|
6
|
+
VALUE arr;
|
7
|
+
int has_found = 0;
|
8
|
+
#define BUFSIZE 16384
|
9
|
+
|
20
10
|
|
11
|
+
#line 23 "excelsior_reader.rl"
|
21
12
|
|
22
13
|
|
23
|
-
|
14
|
+
|
15
|
+
#line 16 "excelsior_reader.c"
|
24
16
|
static const char _excelsior_scan_actions[] = {
|
25
17
|
0, 1, 2, 1, 7, 1, 8, 1,
|
26
18
|
9, 1, 10, 1, 11, 2, 0, 1,
|
@@ -79,9 +71,28 @@ static const int excelsior_scan_error = 0;
|
|
79
71
|
static const int excelsior_scan_en_main = 2;
|
80
72
|
|
81
73
|
|
82
|
-
#line
|
83
|
-
|
84
|
-
|
74
|
+
#line 26 "excelsior_reader.rl"
|
75
|
+
|
76
|
+
|
77
|
+
VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
78
|
+
|
79
|
+
int cs, act, have = 0, nread = 0, curline = 1, text = 0;
|
80
|
+
char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
|
81
|
+
int buffer_size = BUFSIZE;
|
82
|
+
|
83
|
+
has_found = 0;
|
84
|
+
VALUE io;
|
85
|
+
int is_io = 0;
|
86
|
+
int done = 0;
|
87
|
+
|
88
|
+
arr = rb_ary_new();
|
89
|
+
rb_scan_args(argc, argv, "1", &io);
|
90
|
+
|
91
|
+
is_io = rb_respond_to(io, s_read);
|
92
|
+
buf = ALLOC_N(char, buffer_size);
|
93
|
+
|
94
|
+
|
95
|
+
#line 96 "excelsior_reader.c"
|
85
96
|
{
|
86
97
|
cs = excelsior_scan_start;
|
87
98
|
ts = 0;
|
@@ -89,9 +100,41 @@ static const int excelsior_scan_en_main = 2;
|
|
89
100
|
act = 0;
|
90
101
|
}
|
91
102
|
|
92
|
-
#line
|
93
|
-
|
94
|
-
|
103
|
+
#line 46 "excelsior_reader.rl"
|
104
|
+
|
105
|
+
while(!done) {
|
106
|
+
|
107
|
+
int len, space = buffer_size - have;
|
108
|
+
VALUE str;
|
109
|
+
char *p, *pe;
|
110
|
+
p = buf + have;
|
111
|
+
|
112
|
+
if(is_io) {
|
113
|
+
str = rb_funcall(io, s_read, 1, INT2FIX(space));
|
114
|
+
len = RSTRING_LEN(str);
|
115
|
+
memcpy(p, StringValuePtr(str), len);
|
116
|
+
} else {
|
117
|
+
// Going to assume it's a string and already in memory
|
118
|
+
//str = io;
|
119
|
+
len = buffer_size = RSTRING_LEN(io);
|
120
|
+
memcpy(p, StringValuePtr(io), len);
|
121
|
+
space = buffer_size - have;
|
122
|
+
pe = p + buffer_size;
|
123
|
+
eof = pe;
|
124
|
+
done = 1;
|
125
|
+
}
|
126
|
+
|
127
|
+
if(len < space) {
|
128
|
+
done = 1;
|
129
|
+
//p[len++] = 0; can't seem to get it to work with this
|
130
|
+
pe = p + len;
|
131
|
+
eof = pe;
|
132
|
+
} else {
|
133
|
+
pe = p + len;
|
134
|
+
}
|
135
|
+
|
136
|
+
|
137
|
+
#line 138 "excelsior_reader.c"
|
95
138
|
{
|
96
139
|
int _klen;
|
97
140
|
unsigned int _trans;
|
@@ -112,7 +155,7 @@ _resume:
|
|
112
155
|
#line 1 "excelsior_reader.rl"
|
113
156
|
{ts = p;}
|
114
157
|
break;
|
115
|
-
#line
|
158
|
+
#line 159 "excelsior_reader.c"
|
116
159
|
}
|
117
160
|
}
|
118
161
|
|
@@ -182,31 +225,31 @@ _eof_trans:
|
|
182
225
|
{te = p+1;}
|
183
226
|
break;
|
184
227
|
case 4:
|
185
|
-
#line
|
228
|
+
#line 18 "excelsior_reader.rl"
|
186
229
|
{act = 2;}
|
187
230
|
break;
|
188
231
|
case 5:
|
189
|
-
#line
|
232
|
+
#line 19 "excelsior_reader.rl"
|
190
233
|
{act = 3;}
|
191
234
|
break;
|
192
235
|
case 6:
|
193
|
-
#line
|
236
|
+
#line 20 "excelsior_reader.rl"
|
194
237
|
{act = 4;}
|
195
238
|
break;
|
196
239
|
case 7:
|
197
|
-
#line
|
198
|
-
{te = p+1;{
|
240
|
+
#line 17 "excelsior_reader.rl"
|
241
|
+
{te = p+1;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
|
199
242
|
break;
|
200
243
|
case 8:
|
201
|
-
#line
|
244
|
+
#line 21 "excelsior_reader.rl"
|
202
245
|
{te = p+1;{ if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;}}
|
203
246
|
break;
|
204
247
|
case 9:
|
205
|
-
#line
|
206
|
-
{te = p;p--;{
|
248
|
+
#line 17 "excelsior_reader.rl"
|
249
|
+
{te = p;p--;{ rb_yield(arr); arr = rb_ary_new(); has_found = 0; }}
|
207
250
|
break;
|
208
251
|
case 10:
|
209
|
-
#line
|
252
|
+
#line 20 "excelsior_reader.rl"
|
210
253
|
{te = p;p--;{ rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;}}
|
211
254
|
break;
|
212
255
|
case 11:
|
@@ -227,7 +270,7 @@ _eof_trans:
|
|
227
270
|
}
|
228
271
|
}
|
229
272
|
break;
|
230
|
-
#line
|
273
|
+
#line 274 "excelsior_reader.c"
|
231
274
|
}
|
232
275
|
}
|
233
276
|
|
@@ -244,7 +287,7 @@ _again:
|
|
244
287
|
#line 1 "excelsior_reader.rl"
|
245
288
|
{act = 0;}
|
246
289
|
break;
|
247
|
-
#line
|
290
|
+
#line 291 "excelsior_reader.c"
|
248
291
|
}
|
249
292
|
}
|
250
293
|
|
@@ -264,18 +307,30 @@ _again:
|
|
264
307
|
_out: {}
|
265
308
|
}
|
266
309
|
|
267
|
-
#line
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
310
|
+
#line 79 "excelsior_reader.rl"
|
311
|
+
|
312
|
+
if(ts != 0) { // we are not at the end
|
313
|
+
have = pe - ts; //so copy stuff back in
|
314
|
+
memmove(buf, ts, have);
|
315
|
+
te = buf + (te - ts);
|
316
|
+
ts = buf;
|
317
|
+
}
|
318
|
+
|
319
|
+
}
|
320
|
+
|
321
|
+
if(RARRAY_LEN(arr) > 0) { // have a last array to yield
|
322
|
+
rb_yield(arr);
|
323
|
+
}
|
324
|
+
|
325
|
+
return Qnil;
|
272
326
|
}
|
273
327
|
|
274
328
|
VALUE mExcelsior;
|
275
329
|
VALUE cReader;
|
276
330
|
|
277
331
|
void Init_excelsior_reader() {
|
278
|
-
|
279
|
-
|
280
|
-
|
332
|
+
s_read = rb_intern("read");
|
333
|
+
mExcelsior = rb_define_module("Excelsior");
|
334
|
+
cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
|
335
|
+
rb_define_singleton_method(cReader, "rows", e_rows, -1);
|
281
336
|
}
|
@@ -1,19 +1,11 @@
|
|
1
|
-
#include
|
1
|
+
#include <ruby.h>
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
rows = rb_ary_new();
|
10
|
-
arr = rb_ary_new();
|
11
|
-
len = RSTRING(data)->len;
|
12
|
-
p = RSTRING(data)->ptr;
|
13
|
-
pe = p + len;
|
14
|
-
eof = pe;
|
15
|
-
int has_found = 0;
|
16
|
-
%%{
|
3
|
+
static ID s_read;
|
4
|
+
VALUE arr;
|
5
|
+
int has_found = 0;
|
6
|
+
#define BUFSIZE 16384
|
7
|
+
|
8
|
+
%%{
|
17
9
|
machine excelsior_scan;
|
18
10
|
delimeter = ",";
|
19
11
|
newline = "\r"? "\n" | "\r" | "\n";
|
@@ -22,28 +14,91 @@ VALUE e_parse(VALUE self, VALUE data) {
|
|
22
14
|
string = '"' (schar | '""')* '"' ;
|
23
15
|
value = letter+;
|
24
16
|
main := |*
|
25
|
-
newline {
|
17
|
+
newline { rb_yield(arr); arr = rb_ary_new(); has_found = 0; };
|
26
18
|
space;
|
27
19
|
value { rb_ary_push(arr, rb_str_new(ts, te - ts)); has_found = 1;};
|
28
20
|
string { rb_ary_push(arr, rb_str_new(ts + 1, te - ts - 2)); has_found = 1;};
|
29
21
|
delimeter { if(has_found == 0) rb_ary_push(arr, Qnil); has_found = 0;};
|
30
22
|
*|;
|
31
|
-
|
23
|
+
}%%
|
24
|
+
|
25
|
+
%% write data nofinal;
|
26
|
+
|
32
27
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
28
|
+
VALUE e_rows(int argc, VALUE *argv, VALUE self) {
|
29
|
+
|
30
|
+
int cs, act, have = 0, nread = 0, curline = 1, text = 0;
|
31
|
+
char *ts = 0, *te = 0, *buf = NULL, *eof = NULL;
|
32
|
+
int buffer_size = BUFSIZE;
|
33
|
+
|
34
|
+
has_found = 0;
|
35
|
+
VALUE io;
|
36
|
+
int is_io = 0;
|
37
|
+
int done = 0;
|
38
|
+
|
39
|
+
arr = rb_ary_new();
|
40
|
+
rb_scan_args(argc, argv, "1", &io);
|
41
|
+
|
42
|
+
is_io = rb_respond_to(io, s_read);
|
43
|
+
buf = ALLOC_N(char, buffer_size);
|
44
|
+
|
45
|
+
%% write init;
|
46
|
+
|
47
|
+
while(!done) {
|
48
|
+
|
49
|
+
int len, space = buffer_size - have;
|
50
|
+
VALUE str;
|
51
|
+
char *p, *pe;
|
52
|
+
p = buf + have;
|
53
|
+
|
54
|
+
if(is_io) {
|
55
|
+
str = rb_funcall(io, s_read, 1, INT2FIX(space));
|
56
|
+
len = RSTRING_LEN(str);
|
57
|
+
memcpy(p, StringValuePtr(str), len);
|
58
|
+
} else {
|
59
|
+
// Going to assume it's a string and already in memory
|
60
|
+
//str = io;
|
61
|
+
len = buffer_size = RSTRING_LEN(io);
|
62
|
+
memcpy(p, StringValuePtr(io), len);
|
63
|
+
space = buffer_size - have;
|
64
|
+
pe = p + buffer_size;
|
65
|
+
eof = pe;
|
66
|
+
done = 1;
|
67
|
+
}
|
68
|
+
|
69
|
+
if(len < space) {
|
70
|
+
done = 1;
|
71
|
+
//p[len++] = 0; can't seem to get it to work with this
|
72
|
+
pe = p + len;
|
73
|
+
eof = pe;
|
74
|
+
} else {
|
75
|
+
pe = p + len;
|
76
|
+
}
|
77
|
+
|
78
|
+
%% write exec;
|
79
|
+
|
80
|
+
if(ts != 0) { // we are not at the end
|
81
|
+
have = pe - ts; //so copy stuff back in
|
82
|
+
memmove(buf, ts, have);
|
83
|
+
te = buf + (te - ts);
|
84
|
+
ts = buf;
|
85
|
+
}
|
86
|
+
|
87
|
+
}
|
88
|
+
|
89
|
+
if(RARRAY_LEN(arr) > 0) { // have a last array to yield
|
90
|
+
rb_yield(arr);
|
91
|
+
}
|
92
|
+
|
93
|
+
return Qnil;
|
40
94
|
}
|
41
95
|
|
42
96
|
VALUE mExcelsior;
|
43
97
|
VALUE cReader;
|
44
98
|
|
45
99
|
void Init_excelsior_reader() {
|
46
|
-
|
47
|
-
|
48
|
-
|
100
|
+
s_read = rb_intern("read");
|
101
|
+
mExcelsior = rb_define_module("Excelsior");
|
102
|
+
cReader = rb_define_class_under(mExcelsior, "Reader", rb_cObject);
|
103
|
+
rb_define_singleton_method(cReader, "rows", e_rows, -1);
|
49
104
|
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: toastyapps-excelsior
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matthew Mongeau
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-08-
|
12
|
+
date: 2009-08-20 21:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|