fastcsv 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e5d991d026c76068b9e646ba62cefdad823f01e1
4
+ data.tar.gz: 8815f0bb3b00e01593f2a46320cf58c88926099c
5
+ SHA512:
6
+ metadata.gz: 8762ce01e3e5af4cd0395bf541879db46f677f79201e2f44dc5f35dd30514c53fa2c2c5808ec61149898d64bff908a74fe2153c690a04e75c82ba7306794fa15
7
+ data.tar.gz: e6dd0a3f89f9d330428fbc8d6f1b469d9f3a8ca255561c6fa972d1ad2147422bd090f3c7a373d08d29b07748f781f9b8d70fae27a092a075d183de897094eea5
data/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ *.gem
2
+ .bundle
3
+ .yardoc
4
+ Gemfile.lock
5
+ doc/*
6
+ pkg/*
7
+ tmp/*
8
+ lib/fastcsv/fastcsv.bundle
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in the gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2014 Open North Inc.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,56 @@
1
+ # FastCSV
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/fastcsv.svg)](http://badge.fury.io/rb/fastcsv)
4
+ [![Dependency Status](https://gemnasium.com/opennorth/fastcsv.png)](https://gemnasium.com/opennorth/fastcsv)
5
+
6
+ A fast [Ragel](http://www.colm.net/open-source/ragel/)-based CSV parser.
7
+
8
+ ## Usage
9
+
10
+ ```ruby
11
+ require 'fastcsv'
12
+
13
+ # Read from file.
14
+ File.open(filename) do |f|
15
+ FastCSV.raw_parse(f) do |row|
16
+ # do stuff
17
+ end
18
+ end
19
+
20
+ # Read from an IO object.
21
+ FastCSV.raw_parse(StringIO.new("foo,bar\n")) do |row|
22
+ # do stuff
23
+ end
24
+
25
+ # Read from a string.
26
+ FastCSV.raw_parse("foo,bar\n") do |row|
27
+ # do stuff
28
+ end
29
+
30
+ # Transcode like with the CSV module.
31
+ FastCSV.raw_parse("\xF1\n", encoding: 'iso-8859-1:utf-8') do |row|
32
+ # ["ñ"]
33
+ end
34
+ ```
35
+
36
+ ## Development
37
+
38
+ ragel -G2 ext/fastcsv/fastcsv.rl
39
+ ragel -Vp ext/fastcsv/fastcsv.rl | dot -Tpng -o machine.png
40
+ rake compile
41
+ gem uninstall fastcsv
42
+ rake install
43
+
44
+ ## Why?
45
+
46
+ We evaluated [many CSV Ruby gems](https://github.com/jpmckinney/csv-benchmark#benchmark), and they were either too slow or had implementation errors. [rcsv](https://github.com/fiksu/rcsv) is fast and [libcsv](http://sourceforge.net/projects/libcsv/)-based, but it skips blank rows (Ruby's CSV module returns an empty array) and silently fails on input with an unclosed quote; nonetheless, it's an excellent alternative if you find errors in FastCSV! We looked for Ragel-based CSV parsers to copy, but they either had implementation errors or could not handle large inputs. [commas](https://github.com/aklt/commas/blob/master/csv.rl) looks good, but it performs a memory check on each character, which is overkill.
47
+
48
+ ## Bugs? Questions?
49
+
50
+ This project's main repository is on GitHub: [http://github.com/opennorth/fastcsv](http://github.com/opennorth/fastcsv), where your contributions, forks, bug reports, feature requests, and feedback are greatly welcomed.
51
+
52
+ ## Acknowledgements
53
+
54
+ Started as a Ruby 2.1 fork of MoonWolf <moonwolf@moonwolf.com>'s CSVScan, found in [this commit](https://github.com/nickstenning/csvscan/commit/11ec30f71a27cc673bca09738ee8a63942f416f0.patch). CSVScan uses Ragel code from [HPricot](https://github.com/hpricot/hpricot/blob/master/ext/hpricot_scan/hpricot_scan.rl) from [this commit](https://github.com/hpricot/hpricot/blob/908a4ae64bc8b935c4415c47ca6aea6492c6ce0a/ext/hpricot_scan/hpricot_scan.rl).
55
+
56
+ Copyright (c) 2014 Open North Inc., released under the MIT license
data/Rakefile ADDED
@@ -0,0 +1,21 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/extensiontask'
5
+ Rake::ExtensionTask.new('fastcsv') do |ext|
6
+ ext.lib_dir = 'lib/fastcsv'
7
+ end
8
+
9
+ require 'rspec/core/rake_task'
10
+ RSpec::Core::RakeTask.new(:spec)
11
+
12
+ task :default => :spec
13
+
14
+ begin
15
+ require 'yard'
16
+ YARD::Rake::YardocTask.new
17
+ rescue LoadError
18
+ task :yard do
19
+ abort 'YARD is not available. In order to run yard, you must: gem install yard'
20
+ end
21
+ end
data/USAGE ADDED
@@ -0,0 +1 @@
1
+ See README.md for full usage details.
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+
3
+ create_makefile('fastcsv/fastcsv')
@@ -0,0 +1,697 @@
1
+
2
+ #line 1 "ext/fastcsv/fastcsv.rl"
3
+ #include <ruby.h>
4
+ #include <ruby/encoding.h>
5
+ // CSV specifications.
6
+ // http://tools.ietf.org/html/rfc4180
7
+ // http://w3c.github.io/csvw/syntax/#ebnf
8
+
9
+ // CSV implementation.
10
+ // https://github.com/ruby/ruby/blob/master/lib/csv.rb
11
+
12
+ // Ruby C extensions help.
13
+ // https://github.com/ruby/ruby/blob/trunk/README.EXT
14
+ // http://rxr.whitequark.org/mri/source
15
+
16
+ // Ragel help.
17
+ // https://www.mail-archive.com/ragel-users@complang.org/
18
+
19
+ # define ASSOCIATE_INDEX \
20
+ if (internal_index >= 0) { \
21
+ rb_enc_associate_index(field, internal_index); \
22
+ field = rb_str_encode(field, rb_enc_from_encoding(external_encoding), 0, Qnil); \
23
+ } \
24
+ else { \
25
+ rb_enc_associate_index(field, rb_enc_to_index(external_encoding)); \
26
+ }
27
+
28
+ static VALUE mModule, rb_eParseError;
29
+ static ID s_read, s_to_str;
30
+
31
+
32
+ #line 139 "ext/fastcsv/fastcsv.rl"
33
+
34
+
35
+
36
+ #line 37 "ext/fastcsv/fastcsv.c"
37
+ static const int fastcsv_start = 4;
38
+ static const int fastcsv_first_final = 4;
39
+ static const int fastcsv_error = 0;
40
+
41
+ static const int fastcsv_en_main = 4;
42
+
43
+
44
+ #line 142 "ext/fastcsv/fastcsv.rl"
45
+
46
+ #define BUFSIZE 16384
47
+
48
+ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
49
+ int cs, act, have = 0, curline = 1, io = 0;
50
+ char *ts = 0, *te = 0, *buf = 0, *eof = 0;
51
+
52
+ VALUE port, opts;
53
+ VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
54
+ int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
55
+ int internal_index = 0, external_index = rb_enc_to_index(rb_default_external_encoding());
56
+ rb_encoding *external_encoding = rb_default_external_encoding();
57
+
58
+ VALUE option;
59
+ char quote_char = '"'; //, *col_sep = ",", *row_sep = "\r\n";
60
+
61
+ rb_scan_args(argc, argv, "11", &port, &opts);
62
+ taint = OBJ_TAINTED(port);
63
+ io = rb_respond_to(port, s_read);
64
+ if (!io) {
65
+ if (rb_respond_to(port, s_to_str)) {
66
+ port = rb_funcall(port, s_to_str, 0);
67
+ StringValue(port);
68
+ }
69
+ else {
70
+ rb_raise(rb_eArgError, "data has to respond to #read or #to_str");
71
+ }
72
+ }
73
+
74
+ if (NIL_P(opts)) {
75
+ opts = rb_hash_new();
76
+ }
77
+ else if (TYPE(opts) != T_HASH) {
78
+ rb_raise(rb_eArgError, "options has to be a Hash or nil");
79
+ }
80
+
81
+ // @note Add machines for common CSV dialects, or see if we can use "when"
82
+ // from Chapter 6 to compare the character to the host program's variable.
83
+ // option = rb_hash_aref(opts, ID2SYM(rb_intern("quote_char")));
84
+ // if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) {
85
+ // quote_char = *StringValueCStr(option);
86
+ // }
87
+ // else if (!NIL_P(option)) {
88
+ // rb_raise(rb_eArgError, ":quote_char has to be a single character String");
89
+ // }
90
+
91
+ // option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep")));
92
+ // if (TYPE(option) == T_STRING) {
93
+ // col_sep = StringValueCStr(option);
94
+ // }
95
+ // else if (!NIL_P(option)) {
96
+ // rb_raise(rb_eArgError, ":col_sep has to be a String");
97
+ // }
98
+
99
+ // option = rb_hash_aref(opts, ID2SYM(rb_intern("row_sep")));
100
+ // if (TYPE(option) == T_STRING) {
101
+ // row_sep = StringValueCStr(option);
102
+ // }
103
+ // else if (!NIL_P(option)) {
104
+ // rb_raise(rb_eArgError, ":row_sep has to be a String");
105
+ // }
106
+
107
+ option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
108
+ if (TYPE(option) == T_STRING) {
109
+ // @see parse_mode_enc in Ruby's io.c
110
+ const char *string = StringValueCStr(option), *pointer;
111
+ char internal_encoding_name[ENCODING_MAXNAMELEN + 1];
112
+
113
+ pointer = strrchr(string, ':');
114
+ if (pointer) {
115
+ long len = (pointer++) - string;
116
+ if (len == 0 || len > ENCODING_MAXNAMELEN) {
117
+ internal_index = -1;
118
+ }
119
+ else {
120
+ memcpy(internal_encoding_name, string, len);
121
+ internal_encoding_name[len] = '\0';
122
+ string = internal_encoding_name;
123
+ internal_index = rb_enc_find_index(internal_encoding_name);
124
+ }
125
+ }
126
+ else {
127
+ internal_index = rb_enc_find_index(string);
128
+ }
129
+
130
+ if (internal_index < 0 && internal_index != -2) {
131
+ rb_warn("Unsupported encoding %s ignored", string);
132
+ }
133
+
134
+ if (pointer) {
135
+ external_index = rb_enc_find_index(pointer);
136
+ if (external_index >= 0) {
137
+ external_encoding = rb_enc_from_index(external_index);
138
+ }
139
+ else {
140
+ rb_warn("Unsupported encoding %s ignored", string);
141
+ }
142
+ }
143
+ else if (internal_index >= 0) {
144
+ external_encoding = rb_enc_from_index(internal_index);
145
+ }
146
+ }
147
+ else if (!NIL_P(option)) {
148
+ rb_raise(rb_eArgError, ":encoding has to be a String");
149
+ }
150
+
151
+ buffer_size = BUFSIZE;
152
+ if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
153
+ bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
154
+ if (!NIL_P(bufsize)) {
155
+ buffer_size = NUM2INT(bufsize);
156
+ }
157
+ }
158
+
159
+ if (io) {
160
+ buf = ALLOC_N(char, buffer_size);
161
+ }
162
+
163
+
164
+ #line 165 "ext/fastcsv/fastcsv.c"
165
+ {
166
+ cs = fastcsv_start;
167
+ ts = 0;
168
+ te = 0;
169
+ act = 0;
170
+ }
171
+
172
+ #line 261 "ext/fastcsv/fastcsv.rl"
173
+
174
+ while (!done) {
175
+ VALUE str;
176
+ char *p, *pe;
177
+ int len, space = buffer_size - have, tokstart_diff, tokend_diff;
178
+
179
+ if (io) {
180
+ if (space == 0) {
181
+ tokstart_diff = ts - buf;
182
+ tokend_diff = te - buf;
183
+
184
+ buffer_size += BUFSIZE;
185
+ REALLOC_N(buf, char, buffer_size);
186
+
187
+ space = buffer_size - have;
188
+
189
+ ts = buf + tokstart_diff;
190
+ te = buf + tokend_diff;
191
+ }
192
+ p = buf + have;
193
+
194
+ str = rb_funcall(port, s_read, 1, INT2FIX(space));
195
+ if (NIL_P(str)) {
196
+ // StringIO#read returns nil for empty string.
197
+ len = 0;
198
+ }
199
+ else {
200
+ len = RSTRING_LEN(str);
201
+ memcpy(p, StringValuePtr(str), len);
202
+ }
203
+
204
+ if (len < space) {
205
+ // EOF actions don't work in scanners, so we add a sentinel value.
206
+ // @see http://www.complang.org/pipermail/ragel-users/2007-May/001516.html
207
+ // @see https://github.com/leeonix/lua-csv-ragel/blob/master/src/csv.rl
208
+ p[len++] = 0;
209
+ done = 1;
210
+ }
211
+ }
212
+ else {
213
+ p = RSTRING_PTR(port);
214
+ len = RSTRING_LEN(port);
215
+ p[len++] = 0;
216
+ done = 1;
217
+ }
218
+
219
+ pe = p + len;
220
+ // if (done) {
221
+ // // This triggers the eof action in the non-scanner version.
222
+ // eof = pe;
223
+ // }
224
+
225
+ #line 226 "ext/fastcsv/fastcsv.c"
226
+ {
227
+ if ( p == pe )
228
+ goto _test_eof;
229
+ switch ( cs )
230
+ {
231
+ tr0:
232
+ #line 1 "NONE"
233
+ { switch( act ) {
234
+ case 0:
235
+ {{goto st0;}}
236
+ break;
237
+ default:
238
+ {{p = ((te))-1;}}
239
+ break;
240
+ }
241
+ }
242
+ goto st4;
243
+ tr10:
244
+ #line 105 "ext/fastcsv/fastcsv.rl"
245
+ {
246
+ if (!NIL_P(field) || RARRAY_LEN(row)) {
247
+ rb_ary_push(row, field);
248
+ }
249
+ if (RARRAY_LEN(row)) {
250
+ rb_yield(row);
251
+ }
252
+ }
253
+ #line 129 "ext/fastcsv/fastcsv.rl"
254
+ {te = p+1;}
255
+ goto st4;
256
+ tr16:
257
+ #line 129 "ext/fastcsv/fastcsv.rl"
258
+ {te = p;p--;}
259
+ goto st4;
260
+ tr17:
261
+ #line 128 "ext/fastcsv/fastcsv.rl"
262
+ {te = p;p--;}
263
+ goto st4;
264
+ tr18:
265
+ #line 105 "ext/fastcsv/fastcsv.rl"
266
+ {
267
+ if (!NIL_P(field) || RARRAY_LEN(row)) {
268
+ rb_ary_push(row, field);
269
+ }
270
+ if (RARRAY_LEN(row)) {
271
+ rb_yield(row);
272
+ }
273
+ }
274
+ #line 128 "ext/fastcsv/fastcsv.rl"
275
+ {te = p+1;}
276
+ goto st4;
277
+ tr20:
278
+ #line 127 "ext/fastcsv/fastcsv.rl"
279
+ {te = p;p--;}
280
+ goto st4;
281
+ tr21:
282
+ #line 105 "ext/fastcsv/fastcsv.rl"
283
+ {
284
+ if (!NIL_P(field) || RARRAY_LEN(row)) {
285
+ rb_ary_push(row, field);
286
+ }
287
+ if (RARRAY_LEN(row)) {
288
+ rb_yield(row);
289
+ }
290
+ }
291
+ #line 127 "ext/fastcsv/fastcsv.rl"
292
+ {te = p+1;}
293
+ goto st4;
294
+ st4:
295
+ #line 1 "NONE"
296
+ {ts = 0;}
297
+ #line 1 "NONE"
298
+ {act = 0;}
299
+ if ( ++p == pe )
300
+ goto _test_eof4;
301
+ case 4:
302
+ #line 1 "NONE"
303
+ {ts = p;}
304
+ #line 305 "ext/fastcsv/fastcsv.c"
305
+ switch( (*p) ) {
306
+ case 0: goto tr14;
307
+ case 10: goto tr3;
308
+ case 13: goto tr4;
309
+ case 34: goto tr15;
310
+ case 44: goto tr5;
311
+ }
312
+ goto st1;
313
+ st1:
314
+ if ( ++p == pe )
315
+ goto _test_eof1;
316
+ case 1:
317
+ switch( (*p) ) {
318
+ case 0: goto tr2;
319
+ case 10: goto tr3;
320
+ case 13: goto tr4;
321
+ case 34: goto tr0;
322
+ case 44: goto tr5;
323
+ }
324
+ goto st1;
325
+ tr2:
326
+ #line 1 "NONE"
327
+ {te = p+1;}
328
+ #line 44 "ext/fastcsv/fastcsv.rl"
329
+ {
330
+ if (p == ts) {
331
+ // Unquoted empty fields are nil, not "", in Ruby.
332
+ field = Qnil;
333
+ }
334
+ else if (p > ts) {
335
+ field = rb_str_new(ts, p - ts);
336
+ ASSOCIATE_INDEX;
337
+ }
338
+ }
339
+ #line 105 "ext/fastcsv/fastcsv.rl"
340
+ {
341
+ if (!NIL_P(field) || RARRAY_LEN(row)) {
342
+ rb_ary_push(row, field);
343
+ }
344
+ if (RARRAY_LEN(row)) {
345
+ rb_yield(row);
346
+ }
347
+ }
348
+ #line 129 "ext/fastcsv/fastcsv.rl"
349
+ {act = 3;}
350
+ goto st5;
351
+ st5:
352
+ if ( ++p == pe )
353
+ goto _test_eof5;
354
+ case 5:
355
+ #line 356 "ext/fastcsv/fastcsv.c"
356
+ switch( (*p) ) {
357
+ case 0: goto tr2;
358
+ case 10: goto tr3;
359
+ case 13: goto tr4;
360
+ case 34: goto tr16;
361
+ case 44: goto tr5;
362
+ }
363
+ goto st1;
364
+ tr3:
365
+ #line 44 "ext/fastcsv/fastcsv.rl"
366
+ {
367
+ if (p == ts) {
368
+ // Unquoted empty fields are nil, not "", in Ruby.
369
+ field = Qnil;
370
+ }
371
+ else if (p > ts) {
372
+ field = rb_str_new(ts, p - ts);
373
+ ASSOCIATE_INDEX;
374
+ }
375
+ }
376
+ #line 95 "ext/fastcsv/fastcsv.rl"
377
+ {
378
+ if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
379
+ rb_ary_push(row, field);
380
+ field = Qnil;
381
+ }
382
+
383
+ rb_yield(row);
384
+ row = rb_ary_new();
385
+ }
386
+ #line 32 "ext/fastcsv/fastcsv.rl"
387
+ {
388
+ curline++;
389
+ }
390
+ goto st6;
391
+ tr19:
392
+ #line 32 "ext/fastcsv/fastcsv.rl"
393
+ {
394
+ curline++;
395
+ }
396
+ goto st6;
397
+ tr11:
398
+ #line 95 "ext/fastcsv/fastcsv.rl"
399
+ {
400
+ if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
401
+ rb_ary_push(row, field);
402
+ field = Qnil;
403
+ }
404
+
405
+ rb_yield(row);
406
+ row = rb_ary_new();
407
+ }
408
+ #line 32 "ext/fastcsv/fastcsv.rl"
409
+ {
410
+ curline++;
411
+ }
412
+ goto st6;
413
+ st6:
414
+ if ( ++p == pe )
415
+ goto _test_eof6;
416
+ case 6:
417
+ #line 418 "ext/fastcsv/fastcsv.c"
418
+ if ( (*p) == 0 )
419
+ goto tr18;
420
+ goto tr17;
421
+ tr4:
422
+ #line 44 "ext/fastcsv/fastcsv.rl"
423
+ {
424
+ if (p == ts) {
425
+ // Unquoted empty fields are nil, not "", in Ruby.
426
+ field = Qnil;
427
+ }
428
+ else if (p > ts) {
429
+ field = rb_str_new(ts, p - ts);
430
+ ASSOCIATE_INDEX;
431
+ }
432
+ }
433
+ #line 95 "ext/fastcsv/fastcsv.rl"
434
+ {
435
+ if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
436
+ rb_ary_push(row, field);
437
+ field = Qnil;
438
+ }
439
+
440
+ rb_yield(row);
441
+ row = rb_ary_new();
442
+ }
443
+ #line 32 "ext/fastcsv/fastcsv.rl"
444
+ {
445
+ curline++;
446
+ }
447
+ goto st7;
448
+ tr12:
449
+ #line 95 "ext/fastcsv/fastcsv.rl"
450
+ {
451
+ if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
452
+ rb_ary_push(row, field);
453
+ field = Qnil;
454
+ }
455
+
456
+ rb_yield(row);
457
+ row = rb_ary_new();
458
+ }
459
+ #line 32 "ext/fastcsv/fastcsv.rl"
460
+ {
461
+ curline++;
462
+ }
463
+ goto st7;
464
+ st7:
465
+ if ( ++p == pe )
466
+ goto _test_eof7;
467
+ case 7:
468
+ #line 469 "ext/fastcsv/fastcsv.c"
469
+ switch( (*p) ) {
470
+ case 0: goto tr18;
471
+ case 10: goto tr19;
472
+ }
473
+ goto tr17;
474
+ tr5:
475
+ #line 44 "ext/fastcsv/fastcsv.rl"
476
+ {
477
+ if (p == ts) {
478
+ // Unquoted empty fields are nil, not "", in Ruby.
479
+ field = Qnil;
480
+ }
481
+ else if (p > ts) {
482
+ field = rb_str_new(ts, p - ts);
483
+ ASSOCIATE_INDEX;
484
+ }
485
+ }
486
+ #line 90 "ext/fastcsv/fastcsv.rl"
487
+ {
488
+ rb_ary_push(row, field);
489
+ field = Qnil;
490
+ }
491
+ goto st8;
492
+ tr13:
493
+ #line 90 "ext/fastcsv/fastcsv.rl"
494
+ {
495
+ rb_ary_push(row, field);
496
+ field = Qnil;
497
+ }
498
+ goto st8;
499
+ st8:
500
+ if ( ++p == pe )
501
+ goto _test_eof8;
502
+ case 8:
503
+ #line 504 "ext/fastcsv/fastcsv.c"
504
+ if ( (*p) == 0 )
505
+ goto tr21;
506
+ goto tr20;
507
+ tr14:
508
+ #line 1 "NONE"
509
+ {te = p+1;}
510
+ #line 105 "ext/fastcsv/fastcsv.rl"
511
+ {
512
+ if (!NIL_P(field) || RARRAY_LEN(row)) {
513
+ rb_ary_push(row, field);
514
+ }
515
+ if (RARRAY_LEN(row)) {
516
+ rb_yield(row);
517
+ }
518
+ }
519
+ #line 44 "ext/fastcsv/fastcsv.rl"
520
+ {
521
+ if (p == ts) {
522
+ // Unquoted empty fields are nil, not "", in Ruby.
523
+ field = Qnil;
524
+ }
525
+ else if (p > ts) {
526
+ field = rb_str_new(ts, p - ts);
527
+ ASSOCIATE_INDEX;
528
+ }
529
+ }
530
+ #line 129 "ext/fastcsv/fastcsv.rl"
531
+ {act = 3;}
532
+ goto st9;
533
+ st9:
534
+ if ( ++p == pe )
535
+ goto _test_eof9;
536
+ case 9:
537
+ #line 538 "ext/fastcsv/fastcsv.c"
538
+ switch( (*p) ) {
539
+ case 10: goto tr16;
540
+ case 13: goto tr16;
541
+ case 34: goto tr16;
542
+ case 44: goto tr16;
543
+ }
544
+ goto st1;
545
+ tr8:
546
+ #line 32 "ext/fastcsv/fastcsv.rl"
547
+ {
548
+ curline++;
549
+ }
550
+ goto st2;
551
+ tr15:
552
+ #line 36 "ext/fastcsv/fastcsv.rl"
553
+ {
554
+ unclosed_line = curline;
555
+ }
556
+ goto st2;
557
+ st2:
558
+ if ( ++p == pe )
559
+ goto _test_eof2;
560
+ case 2:
561
+ #line 562 "ext/fastcsv/fastcsv.c"
562
+ switch( (*p) ) {
563
+ case 0: goto st0;
564
+ case 10: goto tr8;
565
+ case 13: goto tr8;
566
+ case 34: goto tr9;
567
+ }
568
+ goto st2;
569
+ st0:
570
+ cs = 0;
571
+ goto _out;
572
+ tr9:
573
+ #line 55 "ext/fastcsv/fastcsv.rl"
574
+ {
575
+ if (p == ts) {
576
+ field = rb_str_new2("");
577
+ ASSOCIATE_INDEX;
578
+ }
579
+ // @note If we add an action on '""', we can skip some steps if no '""' is found.
580
+ else if (p > ts) {
581
+ // Operating on ts in-place produces odd behavior, FYI.
582
+ char *copy = ALLOC_N(char, p - ts);
583
+ memcpy(copy, ts, p - ts);
584
+
585
+ char *reader = ts, *writer = copy;
586
+ int escaped = 0;
587
+
588
+ while (p > reader) {
589
+ if (*reader == quote_char && !escaped) {
590
+ // Skip the escaping character.
591
+ escaped = 1;
592
+ }
593
+ else {
594
+ escaped = 0;
595
+ *writer++ = *reader;
596
+ }
597
+ reader++;
598
+ }
599
+
600
+ field = rb_str_new(copy, writer - copy);
601
+ ASSOCIATE_INDEX;
602
+
603
+ if (copy != NULL) {
604
+ free(copy);
605
+ }
606
+ }
607
+ }
608
+ #line 40 "ext/fastcsv/fastcsv.rl"
609
+ {
610
+ unclosed_line = 0;
611
+ }
612
+ goto st3;
613
+ st3:
614
+ if ( ++p == pe )
615
+ goto _test_eof3;
616
+ case 3:
617
+ #line 618 "ext/fastcsv/fastcsv.c"
618
+ switch( (*p) ) {
619
+ case 0: goto tr10;
620
+ case 10: goto tr11;
621
+ case 13: goto tr12;
622
+ case 34: goto st2;
623
+ case 44: goto tr13;
624
+ }
625
+ goto st0;
626
+ }
627
+ _test_eof4: cs = 4; goto _test_eof;
628
+ _test_eof1: cs = 1; goto _test_eof;
629
+ _test_eof5: cs = 5; goto _test_eof;
630
+ _test_eof6: cs = 6; goto _test_eof;
631
+ _test_eof7: cs = 7; goto _test_eof;
632
+ _test_eof8: cs = 8; goto _test_eof;
633
+ _test_eof9: cs = 9; goto _test_eof;
634
+ _test_eof2: cs = 2; goto _test_eof;
635
+ _test_eof3: cs = 3; goto _test_eof;
636
+
637
+ _test_eof: {}
638
+ if ( p == eof )
639
+ {
640
+ switch ( cs ) {
641
+ case 1: goto tr0;
642
+ case 5: goto tr16;
643
+ case 6: goto tr17;
644
+ case 7: goto tr17;
645
+ case 8: goto tr20;
646
+ case 9: goto tr16;
647
+ }
648
+ }
649
+
650
+ _out: {}
651
+ }
652
+
653
+ #line 313 "ext/fastcsv/fastcsv.rl"
654
+
655
+ if (done && cs < fastcsv_first_final) {
656
+ if (buf != NULL) {
657
+ free(buf);
658
+ }
659
+ if (unclosed_line) {
660
+ rb_raise(rb_eParseError, "Unclosed quoted field on line %d.", unclosed_line);
661
+ }
662
+ // Ruby raises different errors for illegal quoting, depending on whether
663
+ // a quoted string is followed by a string ("Unclosed quoted field on line
664
+ // %d.") or by a string ending in a quote ("Missing or stray quote in line
665
+ // %d"). These precisions are kind of bogus, but we can try using $!.
666
+ else {
667
+ rb_raise(rb_eParseError, "Illegal quoting in line %d.", curline);
668
+ }
669
+ }
670
+
671
+ if (ts == 0) {
672
+ have = 0;
673
+ }
674
+ else if (io) {
675
+ have = pe - ts;
676
+ memmove(buf, ts, have);
677
+ te = buf + (te - ts);
678
+ ts = buf;
679
+ }
680
+ }
681
+
682
+ if (buf != NULL) {
683
+ free(buf);
684
+ }
685
+
686
+ return Qnil;
687
+ }
688
+
689
+ void Init_fastcsv() {
690
+ s_read = rb_intern("read");
691
+ s_to_str = rb_intern("to_str");
692
+
693
+ mModule = rb_define_module("FastCSV");
694
+ rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
695
+ rb_define_singleton_method(mModule, "raw_parse", fastcsv, -1);
696
+ rb_eParseError = rb_define_class_under(mModule, "ParseError", rb_eStandardError);
697
+ }