sandofsky-csvscan 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2009-08-11
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
@@ -0,0 +1,11 @@
1
+ README.ja
2
+ README.txt
3
+ ext/csvscan/MANIFEST
4
+ ext/csvscan/csvscan.c
5
+ ext/csvscan/csvscan.rl
6
+ ext/csvscan/extconf.rb
7
+ setup.rb
8
+ History.txt
9
+ Manifest.txt
10
+ Rakefile
11
+ test/test_csvscan.rb
@@ -0,0 +1,33 @@
1
+ CSVScan
2
+
3
+ CSVScan ��CSV���®�˥ѡ������뤿��Υ饤�֥��Ǥ���
4
+
5
+
6
+ 1. ɬ�״Ķ�
7
+
8
+ * ruby 1.8
9
+ * C ����ѥ���
10
+
11
+
12
+ 2. ���󥹥ȡ�����ˡ
13
+
14
+ ���ޥ�ɥ饤��ǰʲ��Τ褦�����Ϥ��Ƥ���������
15
+ UNIX �� OS �ǤϤ����餯 root ���¤�ɬ�פˤʤ�ޤ���
16
+
17
+ # ruby setup.rb
18
+
19
+ 3. �Ȥ���
20
+
21
+ require "csvscan" # �饤�֥��Υ�����
22
+
23
+ open(ARGV.shift) {|io|
24
+ CSVScan.scan(io) {|row|
25
+ p row
26
+ }
27
+ }
28
+
29
+ 4. �饤����
30
+
31
+ �饤���󥹤�Ruby�Υ饤���󥹤˽����ޤ���
32
+
33
+ MoonWolf <moonwolf@moonwolf.com>
@@ -0,0 +1,46 @@
1
+ = csvscan
2
+
3
+ http://github.com/sandofsky/csvscan
4
+
5
+ == DESCRIPTION:
6
+
7
+ This is a packaged version of CSVScan, written by MoonWolf. If you can read Japanese, checkout README.ja for whatever he said.
8
+
9
+ On a 10,000 line file:
10
+
11
+ time cat example.csv | ruby fastercsv_benchmark.rb
12
+
13
+ real 0m8.804s
14
+ user 0m8.502s
15
+ sys 0m0.304s
16
+
17
+ time cat example.csv | ruby csvscan_benchmark.rb
18
+
19
+ real 0m0.860s
20
+ user 0m0.782s
21
+ sys 0m0.088s
22
+
23
+
24
+ == FEATURES/PROBLEMS:
25
+
26
+ * First version.
27
+ * I have not tested this on Windows, and have no intention to.
28
+
29
+ == SYNOPSIS:
30
+
31
+ require 'csvscan'
32
+ CSVScan.scan(STDIN) do |row|
33
+ puts row.inspect
34
+ end
35
+
36
+ == REQUIREMENTS:
37
+
38
+ * FIX (list of requirements)
39
+
40
+ == INSTALL:
41
+
42
+ * gem install sandofsky-csvscan --source http://gems.github.com
43
+
44
+ == LICENSE:
45
+
46
+ Looks like the original source was LGPL, so I'm stuck with that.
@@ -0,0 +1,16 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ class CSVScan
7
+ VERSION = "0.1.0"
8
+ end
9
+
10
+ Hoe.spec 'csvscan' do
11
+ developer('Ben Sandofsky', 'sandofsky@gmail.com')
12
+ spec_extras[:extensions] = "ext/csvscan/extconf.rb"
13
+ clean_globs << "ext/csvscan/csvscan.*" << "ext/csvscan/*.o" << "ext/Makefile"
14
+ end
15
+
16
+ # vim: syntax=ruby
@@ -0,0 +1,4 @@
1
+ MANIFEST
2
+ csvscan.c
3
+ csvscan.rl
4
+ extconf.rb
@@ -0,0 +1,332 @@
1
+ #line 1 "csvscan.rl"
2
+ #include <ruby.h>
3
+
4
+ static VALUE rb_eCSVParseError;
5
+ static ID s_read, s_to_str;
6
+
7
+ #line 70 "csvscan.rl"
8
+
9
+
10
+
11
+ #line 12 "csvscan.c"
12
+ static const int csv_scan_start = 2;
13
+
14
+ static const int csv_scan_error = 1;
15
+
16
+ #line 73 "csvscan.rl"
17
+
18
+ #define BUFSIZE 131072
19
+
20
+ VALUE csv_scan(VALUE self, VALUE port) {
21
+ int cs, act, have = 0, nread = 0, curline = 1;
22
+ unsigned char *tokstart = NULL, *tokend = NULL, *buf;
23
+ VALUE row, coldata;
24
+ VALUE bufsize = Qnil;
25
+ int done=0, buffer_size;
26
+
27
+ if ( !rb_respond_to( port, s_read ) ) {
28
+ if ( rb_respond_to( port, s_to_str ) ) {
29
+ port = rb_funcall( port, s_to_str, 0 );
30
+ StringValue(port);
31
+ } else {
32
+ rb_raise( rb_eArgError, "bad argument, String or IO only please." );
33
+ }
34
+ }
35
+
36
+ buffer_size = BUFSIZE;
37
+ if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
38
+ bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
39
+ if (!NIL_P(bufsize)) {
40
+ buffer_size = NUM2INT(bufsize);
41
+ }
42
+ }
43
+ buf = ALLOC_N(unsigned char, buffer_size);
44
+
45
+
46
+ #line 47 "csvscan.c"
47
+ {
48
+ cs = csv_scan_start;
49
+ tokstart = 0;
50
+ tokend = 0;
51
+ act = 0;
52
+ }
53
+ #line 102 "csvscan.rl"
54
+
55
+ row = rb_ary_new();
56
+ coldata = Qnil;
57
+
58
+ while( !done ) {
59
+ VALUE str;
60
+ unsigned char *p = buf + have, *pe;
61
+ int len, space = buffer_size - have;
62
+
63
+ if ( space == 0 ) {
64
+ rb_raise(rb_eCSVParseError, "ran out of buffer on line %d.", curline);
65
+ }
66
+
67
+ if ( rb_respond_to( port, s_read ) ) {
68
+ str = rb_funcall( port, s_read, 1, INT2FIX(space) );
69
+ } else {
70
+ str = rb_str_substr( port, nread, space );
71
+ }
72
+
73
+ StringValue(str);
74
+ memcpy( p, RSTRING(str)->ptr, RSTRING(str)->len );
75
+ len = RSTRING(str)->len;
76
+ nread += len;
77
+
78
+ /* If this is the last buffer, tack on an EOF. */
79
+ if ( len < space ) {
80
+ p[len++] = 0;
81
+ done = 1;
82
+ }
83
+
84
+ pe = p + len;
85
+
86
+ #line 87 "csvscan.c"
87
+ {
88
+ if ( p == pe )
89
+ goto _out;
90
+ switch ( cs )
91
+ {
92
+ tr0:
93
+ #line 19 "csvscan.rl"
94
+ {tokend = p;{p = ((tokend))-1;}}
95
+ goto st2;
96
+ tr1:
97
+ #line 10 "csvscan.rl"
98
+ {
99
+ curline += 1;
100
+ }
101
+ #line 20 "csvscan.rl"
102
+ {
103
+ rb_ary_push(row, coldata);
104
+ rb_yield(row);
105
+ coldata = Qnil;
106
+ row = rb_ary_new();
107
+ }
108
+ #line 20 "csvscan.rl"
109
+ {tokend = p+1;{p = ((tokend))-1;}}
110
+ goto st2;
111
+ tr2:
112
+ #line 49 "csvscan.rl"
113
+ {tokend = p;{
114
+ unsigned char ch, *start_p, *wptr, *rptr;
115
+ int rest, datalen;
116
+ start_p = wptr = tokstart;
117
+ rptr = tokstart + 1;
118
+ rest = tokend - tokstart - 2;
119
+ datalen = 0;
120
+ while(rest>0) {
121
+ ch = *rptr++;
122
+ if (ch=='"') {
123
+ rptr++;
124
+ rest--;
125
+ }
126
+ *wptr++ = ch;
127
+ datalen++;
128
+ rest--;
129
+ }
130
+ coldata = rb_str_new( start_p, datalen );
131
+ }{p = ((tokend))-1;}}
132
+ goto st2;
133
+ tr5:
134
+ #line 1 "csvscan.rl"
135
+ { switch( act ) {
136
+ case 0: tokend = tokstart; {goto st1;}
137
+ case 4:
138
+ {
139
+ unsigned char ch, *endp;
140
+ int datalen;
141
+ datalen = tokend - tokstart;
142
+ endp = tokend - 1;
143
+ while(datalen>0) {
144
+ ch = *endp--;
145
+ if (ch==' ' || ch=='\t') {
146
+ datalen--;
147
+ } else {
148
+ break;
149
+ }
150
+ }
151
+ if (datalen==0) {
152
+ coldata = Qnil;
153
+ } else {
154
+ coldata = rb_str_new(tokstart, datalen);
155
+ }
156
+ }
157
+ break;
158
+ case 5:
159
+ {
160
+ unsigned char ch, *start_p, *wptr, *rptr;
161
+ int rest, datalen;
162
+ start_p = wptr = tokstart;
163
+ rptr = tokstart + 1;
164
+ rest = tokend - tokstart - 2;
165
+ datalen = 0;
166
+ while(rest>0) {
167
+ ch = *rptr++;
168
+ if (ch=='"') {
169
+ rptr++;
170
+ rest--;
171
+ }
172
+ *wptr++ = ch;
173
+ datalen++;
174
+ rest--;
175
+ }
176
+ coldata = rb_str_new( start_p, datalen );
177
+ }
178
+ break;
179
+ default: break;
180
+ }
181
+ {p = ((tokend))-1;}}
182
+ goto st2;
183
+ tr6:
184
+ #line 19 "csvscan.rl"
185
+ {tokend = p+1;{p = ((tokend))-1;}}
186
+ goto st2;
187
+ tr7:
188
+ #line 19 "csvscan.rl"
189
+ {tokend = p+1;{p = ((tokend))-1;}}
190
+ #line 10 "csvscan.rl"
191
+ {
192
+ curline += 1;
193
+ }
194
+ #line 20 "csvscan.rl"
195
+ {
196
+ rb_ary_push(row, coldata);
197
+ rb_yield(row);
198
+ coldata = Qnil;
199
+ row = rb_ary_new();
200
+ }
201
+ goto st2;
202
+ tr10:
203
+ #line 26 "csvscan.rl"
204
+ {tokend = p+1;{
205
+ rb_ary_push(row, coldata);
206
+ coldata = Qnil;
207
+ }{p = ((tokend))-1;}}
208
+ goto st2;
209
+ st2:
210
+ #line 1 "csvscan.rl"
211
+ {tokstart = 0;}
212
+ #line 1 "csvscan.rl"
213
+ {act = 0;}
214
+ if ( ++p == pe )
215
+ goto _out2;
216
+ case 2:
217
+ #line 1 "csvscan.rl"
218
+ {tokstart = p;}
219
+ #line 220 "csvscan.c"
220
+ switch( (*p) ) {
221
+ case 9u: goto tr6;
222
+ case 10u: goto tr7;
223
+ case 13u: goto st4;
224
+ case 32u: goto tr6;
225
+ case 34u: goto st0;
226
+ case 44u: goto tr10;
227
+ }
228
+ if ( 11u <= (*p) && (*p) <= 12u )
229
+ goto tr8;
230
+ goto tr4;
231
+ tr4:
232
+ #line 1 "csvscan.rl"
233
+ {tokend = p+1;}
234
+ #line 30 "csvscan.rl"
235
+ {act = 4;}
236
+ goto st3;
237
+ tr8:
238
+ #line 1 "csvscan.rl"
239
+ {tokend = p+1;}
240
+ #line 19 "csvscan.rl"
241
+ {act = 1;}
242
+ goto st3;
243
+ st3:
244
+ if ( ++p == pe )
245
+ goto _out3;
246
+ case 3:
247
+ #line 248 "csvscan.c"
248
+ switch( (*p) ) {
249
+ case 10u: goto tr5;
250
+ case 13u: goto tr5;
251
+ case 34u: goto tr5;
252
+ case 44u: goto tr5;
253
+ }
254
+ goto tr4;
255
+ st4:
256
+ if ( ++p == pe )
257
+ goto _out4;
258
+ case 4:
259
+ if ( (*p) == 10u )
260
+ goto tr1;
261
+ goto tr0;
262
+ tr11:
263
+ #line 10 "csvscan.rl"
264
+ {
265
+ curline += 1;
266
+ }
267
+ goto st0;
268
+ st0:
269
+ if ( ++p == pe )
270
+ goto _out0;
271
+ case 0:
272
+ #line 273 "csvscan.c"
273
+ switch( (*p) ) {
274
+ case 10u: goto tr11;
275
+ case 34u: goto tr12;
276
+ }
277
+ goto st0;
278
+ tr12:
279
+ #line 1 "csvscan.rl"
280
+ {tokend = p+1;}
281
+ #line 49 "csvscan.rl"
282
+ {act = 5;}
283
+ goto st5;
284
+ st5:
285
+ if ( ++p == pe )
286
+ goto _out5;
287
+ case 5:
288
+ #line 289 "csvscan.c"
289
+ if ( (*p) == 34u )
290
+ goto st0;
291
+ goto tr2;
292
+ st1:
293
+ goto _out1;
294
+ }
295
+ _out2: cs = 2; goto _out;
296
+ _out3: cs = 3; goto _out;
297
+ _out4: cs = 4; goto _out;
298
+ _out0: cs = 0; goto _out;
299
+ _out5: cs = 5; goto _out;
300
+ _out1: cs = 1; goto _out;
301
+
302
+ _out: {}
303
+ }
304
+ #line 134 "csvscan.rl"
305
+
306
+ if ( cs == csv_scan_error ) {
307
+ free(buf);
308
+ rb_raise(rb_eCSVParseError, "parse error on line %d.", curline);
309
+ }
310
+
311
+ if ( tokstart == 0 ) {
312
+ have = 0;
313
+ } else {
314
+ have = pe - tokstart;
315
+ memmove( buf, tokstart, have );
316
+ tokend = buf + (tokend - tokstart);
317
+ tokstart = buf;
318
+ }
319
+ }
320
+ free(buf);
321
+ return Qnil;
322
+ }
323
+
324
+ void Init_csvscan() {
325
+ VALUE mCSVScan = rb_define_module("CSVScan");
326
+ rb_define_attr(rb_singleton_class(mCSVScan), "buffer_size", 1, 1);
327
+ rb_define_singleton_method(mCSVScan, "scan", csv_scan, 1);
328
+ rb_eCSVParseError = rb_define_class_under(mCSVScan, "ParseError", rb_eException);
329
+
330
+ s_read = rb_intern("read");
331
+ s_to_str = rb_intern("to_str");
332
+ }