csvscan 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2009-08-11
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
@@ -0,0 +1,11 @@
1
+ README.ja
2
+ README.txt
3
+ ext/csvscan/MANIFEST
4
+ ext/csvscan/csvscan.c
5
+ ext/csvscan/csvscan.rl
6
+ ext/csvscan/extconf.rb
7
+ setup.rb
8
+ History.txt
9
+ Manifest.txt
10
+ Rakefile
11
+ test/test_csvscan.rb
@@ -0,0 +1,33 @@
1
+ CSVScan
2
+
3
+ CSVScan ��CSV���®�˥ѡ������뤿��Υ饤�֥��Ǥ���
4
+
5
+
6
+ 1. ɬ�״Ķ�
7
+
8
+ * ruby 1.8
9
+ * C ����ѥ���
10
+
11
+
12
+ 2. ���󥹥ȡ�����ˡ
13
+
14
+ ���ޥ�ɥ饤��ǰʲ��Τ褦�����Ϥ��Ƥ���������
15
+ UNIX �� OS �ǤϤ����餯 root ���¤�ɬ�פˤʤ�ޤ���
16
+
17
+ # ruby setup.rb
18
+
19
+ 3. �Ȥ���
20
+
21
+ require "csvscan" # �饤�֥��Υ�����
22
+
23
+ open(ARGV.shift) {|io|
24
+ CSVScan.scan(io) {|row|
25
+ p row
26
+ }
27
+ }
28
+
29
+ 4. �饤����
30
+
31
+ �饤���󥹤�Ruby�Υ饤���󥹤˽����ޤ���
32
+
33
+ MoonWolf <moonwolf@moonwolf.com>
@@ -0,0 +1,46 @@
1
+ = csvscan
2
+
3
+ http://github.com/sandofsky/csvscan
4
+
5
+ == DESCRIPTION:
6
+
7
+ This is a packaged version of CSVScan, written by MoonWolf. If you can read Japanese, checkout README.ja for whatever he said.
8
+
9
+ On a 10,000 line file:
10
+
11
+ time cat example.csv | ruby fastercsv_benchmark.rb
12
+
13
+ real 0m8.804s
14
+ user 0m8.502s
15
+ sys 0m0.304s
16
+
17
+ time cat example.csv | ruby csvscan_benchmark.rb
18
+
19
+ real 0m0.860s
20
+ user 0m0.782s
21
+ sys 0m0.088s
22
+
23
+
24
+ == FEATURES/PROBLEMS:
25
+
26
+ * First version.
27
+ * I have not tested this on Windows, and have no intention to.
28
+
29
+ == SYNOPSIS:
30
+
31
+ require 'csvscan'
32
+ CSVScan.scan(STDIN) do |row|
33
+ puts row.inspect
34
+ end
35
+
36
+ == REQUIREMENTS:
37
+
38
+ * FIX (list of requirements)
39
+
40
+ == INSTALL:
41
+
42
+ * gem install sandofsky-csvscan --source http://gems.github.com
43
+
44
+ == LICENSE:
45
+
46
+ Looks like the original source was LGPL, so I'm stuck with that.
@@ -0,0 +1,16 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ class CSVScan
7
+ VERSION = "0.1.0"
8
+ end
9
+
10
+ Hoe.spec 'csvscan' do
11
+ developer('Ben Sandofsky', 'sandofsky@gmail.com')
12
+ spec_extras[:extensions] = "ext/csvscan/extconf.rb"
13
+ clean_globs << "ext/csvscan/csvscan.*" << "ext/csvscan/*.o" << "ext/Makefile"
14
+ end
15
+
16
+ # vim: syntax=ruby
@@ -0,0 +1,4 @@
1
+ MANIFEST
2
+ csvscan.c
3
+ csvscan.rl
4
+ extconf.rb
@@ -0,0 +1,332 @@
1
+ #line 1 "csvscan.rl"
2
+ #include <ruby.h>
3
+
4
+ static VALUE rb_eCSVParseError;
5
+ static ID s_read, s_to_str;
6
+
7
+ #line 70 "csvscan.rl"
8
+
9
+
10
+
11
+ #line 12 "csvscan.c"
12
+ static const int csv_scan_start = 2;
13
+
14
+ static const int csv_scan_error = 1;
15
+
16
+ #line 73 "csvscan.rl"
17
+
18
+ #define BUFSIZE 131072
19
+
20
+ VALUE csv_scan(VALUE self, VALUE port) {
21
+ int cs, act, have = 0, nread = 0, curline = 1;
22
+ unsigned char *tokstart = NULL, *tokend = NULL, *buf;
23
+ VALUE row, coldata;
24
+ VALUE bufsize = Qnil;
25
+ int done=0, buffer_size;
26
+
27
+ if ( !rb_respond_to( port, s_read ) ) {
28
+ if ( rb_respond_to( port, s_to_str ) ) {
29
+ port = rb_funcall( port, s_to_str, 0 );
30
+ StringValue(port);
31
+ } else {
32
+ rb_raise( rb_eArgError, "bad argument, String or IO only please." );
33
+ }
34
+ }
35
+
36
+ buffer_size = BUFSIZE;
37
+ if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
38
+ bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
39
+ if (!NIL_P(bufsize)) {
40
+ buffer_size = NUM2INT(bufsize);
41
+ }
42
+ }
43
+ buf = ALLOC_N(unsigned char, buffer_size);
44
+
45
+
46
+ #line 47 "csvscan.c"
47
+ {
48
+ cs = csv_scan_start;
49
+ tokstart = 0;
50
+ tokend = 0;
51
+ act = 0;
52
+ }
53
+ #line 102 "csvscan.rl"
54
+
55
+ row = rb_ary_new();
56
+ coldata = Qnil;
57
+
58
+ while( !done ) {
59
+ VALUE str;
60
+ unsigned char *p = buf + have, *pe;
61
+ int len, space = buffer_size - have;
62
+
63
+ if ( space == 0 ) {
64
+ rb_raise(rb_eCSVParseError, "ran out of buffer on line %d.", curline);
65
+ }
66
+
67
+ if ( rb_respond_to( port, s_read ) ) {
68
+ str = rb_funcall( port, s_read, 1, INT2FIX(space) );
69
+ } else {
70
+ str = rb_str_substr( port, nread, space );
71
+ }
72
+
73
+ StringValue(str);
74
+ memcpy( p, RSTRING(str)->ptr, RSTRING(str)->len );
75
+ len = RSTRING(str)->len;
76
+ nread += len;
77
+
78
+ /* If this is the last buffer, tack on an EOF. */
79
+ if ( len < space ) {
80
+ p[len++] = 0;
81
+ done = 1;
82
+ }
83
+
84
+ pe = p + len;
85
+
86
+ #line 87 "csvscan.c"
87
+ {
88
+ if ( p == pe )
89
+ goto _out;
90
+ switch ( cs )
91
+ {
92
+ tr0:
93
+ #line 19 "csvscan.rl"
94
+ {tokend = p;{p = ((tokend))-1;}}
95
+ goto st2;
96
+ tr1:
97
+ #line 10 "csvscan.rl"
98
+ {
99
+ curline += 1;
100
+ }
101
+ #line 20 "csvscan.rl"
102
+ {
103
+ rb_ary_push(row, coldata);
104
+ rb_yield(row);
105
+ coldata = Qnil;
106
+ row = rb_ary_new();
107
+ }
108
+ #line 20 "csvscan.rl"
109
+ {tokend = p+1;{p = ((tokend))-1;}}
110
+ goto st2;
111
+ tr2:
112
+ #line 49 "csvscan.rl"
113
+ {tokend = p;{
114
+ unsigned char ch, *start_p, *wptr, *rptr;
115
+ int rest, datalen;
116
+ start_p = wptr = tokstart;
117
+ rptr = tokstart + 1;
118
+ rest = tokend - tokstart - 2;
119
+ datalen = 0;
120
+ while(rest>0) {
121
+ ch = *rptr++;
122
+ if (ch=='"') {
123
+ rptr++;
124
+ rest--;
125
+ }
126
+ *wptr++ = ch;
127
+ datalen++;
128
+ rest--;
129
+ }
130
+ coldata = rb_str_new( start_p, datalen );
131
+ }{p = ((tokend))-1;}}
132
+ goto st2;
133
+ tr5:
134
+ #line 1 "csvscan.rl"
135
+ { switch( act ) {
136
+ case 0: tokend = tokstart; {goto st1;}
137
+ case 4:
138
+ {
139
+ unsigned char ch, *endp;
140
+ int datalen;
141
+ datalen = tokend - tokstart;
142
+ endp = tokend - 1;
143
+ while(datalen>0) {
144
+ ch = *endp--;
145
+ if (ch==' ' || ch=='\t') {
146
+ datalen--;
147
+ } else {
148
+ break;
149
+ }
150
+ }
151
+ if (datalen==0) {
152
+ coldata = Qnil;
153
+ } else {
154
+ coldata = rb_str_new(tokstart, datalen);
155
+ }
156
+ }
157
+ break;
158
+ case 5:
159
+ {
160
+ unsigned char ch, *start_p, *wptr, *rptr;
161
+ int rest, datalen;
162
+ start_p = wptr = tokstart;
163
+ rptr = tokstart + 1;
164
+ rest = tokend - tokstart - 2;
165
+ datalen = 0;
166
+ while(rest>0) {
167
+ ch = *rptr++;
168
+ if (ch=='"') {
169
+ rptr++;
170
+ rest--;
171
+ }
172
+ *wptr++ = ch;
173
+ datalen++;
174
+ rest--;
175
+ }
176
+ coldata = rb_str_new( start_p, datalen );
177
+ }
178
+ break;
179
+ default: break;
180
+ }
181
+ {p = ((tokend))-1;}}
182
+ goto st2;
183
+ tr6:
184
+ #line 19 "csvscan.rl"
185
+ {tokend = p+1;{p = ((tokend))-1;}}
186
+ goto st2;
187
+ tr7:
188
+ #line 19 "csvscan.rl"
189
+ {tokend = p+1;{p = ((tokend))-1;}}
190
+ #line 10 "csvscan.rl"
191
+ {
192
+ curline += 1;
193
+ }
194
+ #line 20 "csvscan.rl"
195
+ {
196
+ rb_ary_push(row, coldata);
197
+ rb_yield(row);
198
+ coldata = Qnil;
199
+ row = rb_ary_new();
200
+ }
201
+ goto st2;
202
+ tr10:
203
+ #line 26 "csvscan.rl"
204
+ {tokend = p+1;{
205
+ rb_ary_push(row, coldata);
206
+ coldata = Qnil;
207
+ }{p = ((tokend))-1;}}
208
+ goto st2;
209
+ st2:
210
+ #line 1 "csvscan.rl"
211
+ {tokstart = 0;}
212
+ #line 1 "csvscan.rl"
213
+ {act = 0;}
214
+ if ( ++p == pe )
215
+ goto _out2;
216
+ case 2:
217
+ #line 1 "csvscan.rl"
218
+ {tokstart = p;}
219
+ #line 220 "csvscan.c"
220
+ switch( (*p) ) {
221
+ case 9u: goto tr6;
222
+ case 10u: goto tr7;
223
+ case 13u: goto st4;
224
+ case 32u: goto tr6;
225
+ case 34u: goto st0;
226
+ case 44u: goto tr10;
227
+ }
228
+ if ( 11u <= (*p) && (*p) <= 12u )
229
+ goto tr8;
230
+ goto tr4;
231
+ tr4:
232
+ #line 1 "csvscan.rl"
233
+ {tokend = p+1;}
234
+ #line 30 "csvscan.rl"
235
+ {act = 4;}
236
+ goto st3;
237
+ tr8:
238
+ #line 1 "csvscan.rl"
239
+ {tokend = p+1;}
240
+ #line 19 "csvscan.rl"
241
+ {act = 1;}
242
+ goto st3;
243
+ st3:
244
+ if ( ++p == pe )
245
+ goto _out3;
246
+ case 3:
247
+ #line 248 "csvscan.c"
248
+ switch( (*p) ) {
249
+ case 10u: goto tr5;
250
+ case 13u: goto tr5;
251
+ case 34u: goto tr5;
252
+ case 44u: goto tr5;
253
+ }
254
+ goto tr4;
255
+ st4:
256
+ if ( ++p == pe )
257
+ goto _out4;
258
+ case 4:
259
+ if ( (*p) == 10u )
260
+ goto tr1;
261
+ goto tr0;
262
+ tr11:
263
+ #line 10 "csvscan.rl"
264
+ {
265
+ curline += 1;
266
+ }
267
+ goto st0;
268
+ st0:
269
+ if ( ++p == pe )
270
+ goto _out0;
271
+ case 0:
272
+ #line 273 "csvscan.c"
273
+ switch( (*p) ) {
274
+ case 10u: goto tr11;
275
+ case 34u: goto tr12;
276
+ }
277
+ goto st0;
278
+ tr12:
279
+ #line 1 "csvscan.rl"
280
+ {tokend = p+1;}
281
+ #line 49 "csvscan.rl"
282
+ {act = 5;}
283
+ goto st5;
284
+ st5:
285
+ if ( ++p == pe )
286
+ goto _out5;
287
+ case 5:
288
+ #line 289 "csvscan.c"
289
+ if ( (*p) == 34u )
290
+ goto st0;
291
+ goto tr2;
292
+ st1:
293
+ goto _out1;
294
+ }
295
+ _out2: cs = 2; goto _out;
296
+ _out3: cs = 3; goto _out;
297
+ _out4: cs = 4; goto _out;
298
+ _out0: cs = 0; goto _out;
299
+ _out5: cs = 5; goto _out;
300
+ _out1: cs = 1; goto _out;
301
+
302
+ _out: {}
303
+ }
304
+ #line 134 "csvscan.rl"
305
+
306
+ if ( cs == csv_scan_error ) {
307
+ free(buf);
308
+ rb_raise(rb_eCSVParseError, "parse error on line %d.", curline);
309
+ }
310
+
311
+ if ( tokstart == 0 ) {
312
+ have = 0;
313
+ } else {
314
+ have = pe - tokstart;
315
+ memmove( buf, tokstart, have );
316
+ tokend = buf + (tokend - tokstart);
317
+ tokstart = buf;
318
+ }
319
+ }
320
+ free(buf);
321
+ return Qnil;
322
+ }
323
+
324
+ void Init_csvscan() {
325
+ VALUE mCSVScan = rb_define_module("CSVScan");
326
+ rb_define_attr(rb_singleton_class(mCSVScan), "buffer_size", 1, 1);
327
+ rb_define_singleton_method(mCSVScan, "scan", csv_scan, 1);
328
+ rb_eCSVParseError = rb_define_class_under(mCSVScan, "ParseError", rb_eException);
329
+
330
+ s_read = rb_intern("read");
331
+ s_to_str = rb_intern("to_str");
332
+ }