fastcsv 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e5d991d026c76068b9e646ba62cefdad823f01e1
4
+ data.tar.gz: 8815f0bb3b00e01593f2a46320cf58c88926099c
5
+ SHA512:
6
+ metadata.gz: 8762ce01e3e5af4cd0395bf541879db46f677f79201e2f44dc5f35dd30514c53fa2c2c5808ec61149898d64bff908a74fe2153c690a04e75c82ba7306794fa15
7
+ data.tar.gz: e6dd0a3f89f9d330428fbc8d6f1b469d9f3a8ca255561c6fa972d1ad2147422bd090f3c7a373d08d29b07748f781f9b8d70fae27a092a075d183de897094eea5
data/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ *.gem
2
+ .bundle
3
+ .yardoc
4
+ Gemfile.lock
5
+ doc/*
6
+ pkg/*
7
+ tmp/*
8
+ lib/fastcsv/fastcsv.bundle
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in the gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2014 Open North Inc.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,56 @@
1
+ # FastCSV
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/fastcsv.svg)](http://badge.fury.io/rb/fastcsv)
4
+ [![Dependency Status](https://gemnasium.com/opennorth/fastcsv.png)](https://gemnasium.com/opennorth/fastcsv)
5
+
6
+ A fast [Ragel](http://www.colm.net/open-source/ragel/)-based CSV parser.
7
+
8
+ ## Usage
9
+
10
+ ```ruby
11
+ require 'fastcsv'
12
+
13
+ # Read from file.
14
+ File.open(filename) do |f|
15
+ FastCSV.raw_parse(f) do |row|
16
+ # do stuff
17
+ end
18
+ end
19
+
20
+ # Read from an IO object.
21
+ FastCSV.raw_parse(StringIO.new("foo,bar\n")) do |row|
22
+ # do stuff
23
+ end
24
+
25
+ # Read from a string.
26
+ FastCSV.raw_parse("foo,bar\n") do |row|
27
+ # do stuff
28
+ end
29
+
30
+ # Transcode like with the CSV module.
31
+ FastCSV.raw_parse("\xF1\n", encoding: 'iso-8859-1:utf-8') do |row|
32
+ # ["ñ"]
33
+ end
34
+ ```
35
+
36
+ ## Development
37
+
38
+ ragel -G2 ext/fastcsv/fastcsv.rl
39
+ ragel -Vp ext/fastcsv/fastcsv.rl | dot -Tpng -o machine.png
40
+ rake compile
41
+ gem uninstall fastcsv
42
+ rake install
43
+
44
+ ## Why?
45
+
46
+ We evaluated [many CSV Ruby gems](https://github.com/jpmckinney/csv-benchmark#benchmark), and they were either too slow or had implementation errors. [rcsv](https://github.com/fiksu/rcsv) is fast and [libcsv](http://sourceforge.net/projects/libcsv/)-based, but it skips blank rows (Ruby's CSV module returns an empty array) and silently fails on input with an unclosed quote; nonetheless, it's an excellent alternative if you find errors in FastCSV! We looked for Ragel-based CSV parsers to copy, but they either had implementation errors or could not handle large inputs. [commas](https://github.com/aklt/commas/blob/master/csv.rl) looks good, but it performs a memory check on each character, which is overkill.
47
+
48
+ ## Bugs? Questions?
49
+
50
+ This project's main repository is on GitHub: [http://github.com/opennorth/fastcsv](http://github.com/opennorth/fastcsv), where your contributions, forks, bug reports, feature requests, and feedback are greatly welcomed.
51
+
52
+ ## Acknowledgements
53
+
54
+ Started as a Ruby 2.1 fork of MoonWolf <moonwolf@moonwolf.com>'s CSVScan, found in [this commit](https://github.com/nickstenning/csvscan/commit/11ec30f71a27cc673bca09738ee8a63942f416f0.patch). CSVScan uses Ragel code from [HPricot](https://github.com/hpricot/hpricot/blob/master/ext/hpricot_scan/hpricot_scan.rl) from [this commit](https://github.com/hpricot/hpricot/blob/908a4ae64bc8b935c4415c47ca6aea6492c6ce0a/ext/hpricot_scan/hpricot_scan.rl).
55
+
56
+ Copyright (c) 2014 Open North Inc., released under the MIT license
data/Rakefile ADDED
@@ -0,0 +1,21 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/extensiontask'
5
+ Rake::ExtensionTask.new('fastcsv') do |ext|
6
+ ext.lib_dir = 'lib/fastcsv'
7
+ end
8
+
9
+ require 'rspec/core/rake_task'
10
+ RSpec::Core::RakeTask.new(:spec)
11
+
12
+ task :default => :spec
13
+
14
+ begin
15
+ require 'yard'
16
+ YARD::Rake::YardocTask.new
17
+ rescue LoadError
18
+ task :yard do
19
+ abort 'YARD is not available. In order to run yard, you must: gem install yard'
20
+ end
21
+ end
data/USAGE ADDED
@@ -0,0 +1 @@
1
+ See README.md for full usage details.
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+
3
+ create_makefile('fastcsv/fastcsv')
@@ -0,0 +1,697 @@
1
+
2
+ #line 1 "ext/fastcsv/fastcsv.rl"
3
+ #include <ruby.h>
4
+ #include <ruby/encoding.h>
5
+ // CSV specifications.
6
+ // http://tools.ietf.org/html/rfc4180
7
+ // http://w3c.github.io/csvw/syntax/#ebnf
8
+
9
+ // CSV implementation.
10
+ // https://github.com/ruby/ruby/blob/master/lib/csv.rb
11
+
12
+ // Ruby C extensions help.
13
+ // https://github.com/ruby/ruby/blob/trunk/README.EXT
14
+ // http://rxr.whitequark.org/mri/source
15
+
16
+ // Ragel help.
17
+ // https://www.mail-archive.com/ragel-users@complang.org/
18
+
19
+ # define ASSOCIATE_INDEX \
20
+ if (internal_index >= 0) { \
21
+ rb_enc_associate_index(field, internal_index); \
22
+ field = rb_str_encode(field, rb_enc_from_encoding(external_encoding), 0, Qnil); \
23
+ } \
24
+ else { \
25
+ rb_enc_associate_index(field, rb_enc_to_index(external_encoding)); \
26
+ }
27
+
28
+ static VALUE mModule, rb_eParseError;
29
+ static ID s_read, s_to_str;
30
+
31
+
32
+ #line 139 "ext/fastcsv/fastcsv.rl"
33
+
34
+
35
+
36
+ #line 37 "ext/fastcsv/fastcsv.c"
37
+ static const int fastcsv_start = 4;
38
+ static const int fastcsv_first_final = 4;
39
+ static const int fastcsv_error = 0;
40
+
41
+ static const int fastcsv_en_main = 4;
42
+
43
+
44
+ #line 142 "ext/fastcsv/fastcsv.rl"
45
+
46
+ #define BUFSIZE 16384
47
+
48
+ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
49
+ int cs, act, have = 0, curline = 1, io = 0;
50
+ char *ts = 0, *te = 0, *buf = 0, *eof = 0;
51
+
52
+ VALUE port, opts;
53
+ VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
54
+ int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
55
+ int internal_index = 0, external_index = rb_enc_to_index(rb_default_external_encoding());
56
+ rb_encoding *external_encoding = rb_default_external_encoding();
57
+
58
+ VALUE option;
59
+ char quote_char = '"'; //, *col_sep = ",", *row_sep = "\r\n";
60
+
61
+ rb_scan_args(argc, argv, "11", &port, &opts);
62
+ taint = OBJ_TAINTED(port);
63
+ io = rb_respond_to(port, s_read);
64
+ if (!io) {
65
+ if (rb_respond_to(port, s_to_str)) {
66
+ port = rb_funcall(port, s_to_str, 0);
67
+ StringValue(port);
68
+ }
69
+ else {
70
+ rb_raise(rb_eArgError, "data has to respond to #read or #to_str");
71
+ }
72
+ }
73
+
74
+ if (NIL_P(opts)) {
75
+ opts = rb_hash_new();
76
+ }
77
+ else if (TYPE(opts) != T_HASH) {
78
+ rb_raise(rb_eArgError, "options has to be a Hash or nil");
79
+ }
80
+
81
+ // @note Add machines for common CSV dialects, or see if we can use "when"
82
+ // from Chapter 6 to compare the character to the host program's variable.
83
+ // option = rb_hash_aref(opts, ID2SYM(rb_intern("quote_char")));
84
+ // if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) {
85
+ // quote_char = *StringValueCStr(option);
86
+ // }
87
+ // else if (!NIL_P(option)) {
88
+ // rb_raise(rb_eArgError, ":quote_char has to be a single character String");
89
+ // }
90
+
91
+ // option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep")));
92
+ // if (TYPE(option) == T_STRING) {
93
+ // col_sep = StringValueCStr(option);
94
+ // }
95
+ // else if (!NIL_P(option)) {
96
+ // rb_raise(rb_eArgError, ":col_sep has to be a String");
97
+ // }
98
+
99
+ // option = rb_hash_aref(opts, ID2SYM(rb_intern("row_sep")));
100
+ // if (TYPE(option) == T_STRING) {
101
+ // row_sep = StringValueCStr(option);
102
+ // }
103
+ // else if (!NIL_P(option)) {
104
+ // rb_raise(rb_eArgError, ":row_sep has to be a String");
105
+ // }
106
+
107
+ option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
108
+ if (TYPE(option) == T_STRING) {
109
+ // @see parse_mode_enc in Ruby's io.c
110
+ const char *string = StringValueCStr(option), *pointer;
111
+ char internal_encoding_name[ENCODING_MAXNAMELEN + 1];
112
+
113
+ pointer = strrchr(string, ':');
114
+ if (pointer) {
115
+ long len = (pointer++) - string;
116
+ if (len == 0 || len > ENCODING_MAXNAMELEN) {
117
+ internal_index = -1;
118
+ }
119
+ else {
120
+ memcpy(internal_encoding_name, string, len);
121
+ internal_encoding_name[len] = '\0';
122
+ string = internal_encoding_name;
123
+ internal_index = rb_enc_find_index(internal_encoding_name);
124
+ }
125
+ }
126
+ else {
127
+ internal_index = rb_enc_find_index(string);
128
+ }
129
+
130
+ if (internal_index < 0 && internal_index != -2) {
131
+ rb_warn("Unsupported encoding %s ignored", string);
132
+ }
133
+
134
+ if (pointer) {
135
+ external_index = rb_enc_find_index(pointer);
136
+ if (external_index >= 0) {
137
+ external_encoding = rb_enc_from_index(external_index);
138
+ }
139
+ else {
140
+ rb_warn("Unsupported encoding %s ignored", string);
141
+ }
142
+ }
143
+ else if (internal_index >= 0) {
144
+ external_encoding = rb_enc_from_index(internal_index);
145
+ }
146
+ }
147
+ else if (!NIL_P(option)) {
148
+ rb_raise(rb_eArgError, ":encoding has to be a String");
149
+ }
150
+
151
+ buffer_size = BUFSIZE;
152
+ if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
153
+ bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
154
+ if (!NIL_P(bufsize)) {
155
+ buffer_size = NUM2INT(bufsize);
156
+ }
157
+ }
158
+
159
+ if (io) {
160
+ buf = ALLOC_N(char, buffer_size);
161
+ }
162
+
163
+
164
+ #line 165 "ext/fastcsv/fastcsv.c"
165
+ {
166
+ cs = fastcsv_start;
167
+ ts = 0;
168
+ te = 0;
169
+ act = 0;
170
+ }
171
+
172
+ #line 261 "ext/fastcsv/fastcsv.rl"
173
+
174
+ while (!done) {
175
+ VALUE str;
176
+ char *p, *pe;
177
+ int len, space = buffer_size - have, tokstart_diff, tokend_diff;
178
+
179
+ if (io) {
180
+ if (space == 0) {
181
+ tokstart_diff = ts - buf;
182
+ tokend_diff = te - buf;
183
+
184
+ buffer_size += BUFSIZE;
185
+ REALLOC_N(buf, char, buffer_size);
186
+
187
+ space = buffer_size - have;
188
+
189
+ ts = buf + tokstart_diff;
190
+ te = buf + tokend_diff;
191
+ }
192
+ p = buf + have;
193
+
194
+ str = rb_funcall(port, s_read, 1, INT2FIX(space));
195
+ if (NIL_P(str)) {
196
+ // StringIO#read returns nil for empty string.
197
+ len = 0;
198
+ }
199
+ else {
200
+ len = RSTRING_LEN(str);
201
+ memcpy(p, StringValuePtr(str), len);
202
+ }
203
+
204
+ if (len < space) {
205
+ // EOF actions don't work in scanners, so we add a sentinel value.
206
+ // @see http://www.complang.org/pipermail/ragel-users/2007-May/001516.html
207
+ // @see https://github.com/leeonix/lua-csv-ragel/blob/master/src/csv.rl
208
+ p[len++] = 0;
209
+ done = 1;
210
+ }
211
+ }
212
+ else {
213
+ p = RSTRING_PTR(port);
214
+ len = RSTRING_LEN(port);
215
+ p[len++] = 0;
216
+ done = 1;
217
+ }
218
+
219
+ pe = p + len;
220
+ // if (done) {
221
+ // // This triggers the eof action in the non-scanner version.
222
+ // eof = pe;
223
+ // }
224
+
225
+ #line 226 "ext/fastcsv/fastcsv.c"
226
+ {
227
+ if ( p == pe )
228
+ goto _test_eof;
229
+ switch ( cs )
230
+ {
231
+ tr0:
232
+ #line 1 "NONE"
233
+ { switch( act ) {
234
+ case 0:
235
+ {{goto st0;}}
236
+ break;
237
+ default:
238
+ {{p = ((te))-1;}}
239
+ break;
240
+ }
241
+ }
242
+ goto st4;
243
+ tr10:
244
+ #line 105 "ext/fastcsv/fastcsv.rl"
245
+ {
246
+ if (!NIL_P(field) || RARRAY_LEN(row)) {
247
+ rb_ary_push(row, field);
248
+ }
249
+ if (RARRAY_LEN(row)) {
250
+ rb_yield(row);
251
+ }
252
+ }
253
+ #line 129 "ext/fastcsv/fastcsv.rl"
254
+ {te = p+1;}
255
+ goto st4;
256
+ tr16:
257
+ #line 129 "ext/fastcsv/fastcsv.rl"
258
+ {te = p;p--;}
259
+ goto st4;
260
+ tr17:
261
+ #line 128 "ext/fastcsv/fastcsv.rl"
262
+ {te = p;p--;}
263
+ goto st4;
264
+ tr18:
265
+ #line 105 "ext/fastcsv/fastcsv.rl"
266
+ {
267
+ if (!NIL_P(field) || RARRAY_LEN(row)) {
268
+ rb_ary_push(row, field);
269
+ }
270
+ if (RARRAY_LEN(row)) {
271
+ rb_yield(row);
272
+ }
273
+ }
274
+ #line 128 "ext/fastcsv/fastcsv.rl"
275
+ {te = p+1;}
276
+ goto st4;
277
+ tr20:
278
+ #line 127 "ext/fastcsv/fastcsv.rl"
279
+ {te = p;p--;}
280
+ goto st4;
281
+ tr21:
282
+ #line 105 "ext/fastcsv/fastcsv.rl"
283
+ {
284
+ if (!NIL_P(field) || RARRAY_LEN(row)) {
285
+ rb_ary_push(row, field);
286
+ }
287
+ if (RARRAY_LEN(row)) {
288
+ rb_yield(row);
289
+ }
290
+ }
291
+ #line 127 "ext/fastcsv/fastcsv.rl"
292
+ {te = p+1;}
293
+ goto st4;
294
+ st4:
295
+ #line 1 "NONE"
296
+ {ts = 0;}
297
+ #line 1 "NONE"
298
+ {act = 0;}
299
+ if ( ++p == pe )
300
+ goto _test_eof4;
301
+ case 4:
302
+ #line 1 "NONE"
303
+ {ts = p;}
304
+ #line 305 "ext/fastcsv/fastcsv.c"
305
+ switch( (*p) ) {
306
+ case 0: goto tr14;
307
+ case 10: goto tr3;
308
+ case 13: goto tr4;
309
+ case 34: goto tr15;
310
+ case 44: goto tr5;
311
+ }
312
+ goto st1;
313
+ st1:
314
+ if ( ++p == pe )
315
+ goto _test_eof1;
316
+ case 1:
317
+ switch( (*p) ) {
318
+ case 0: goto tr2;
319
+ case 10: goto tr3;
320
+ case 13: goto tr4;
321
+ case 34: goto tr0;
322
+ case 44: goto tr5;
323
+ }
324
+ goto st1;
325
+ tr2:
326
+ #line 1 "NONE"
327
+ {te = p+1;}
328
+ #line 44 "ext/fastcsv/fastcsv.rl"
329
+ {
330
+ if (p == ts) {
331
+ // Unquoted empty fields are nil, not "", in Ruby.
332
+ field = Qnil;
333
+ }
334
+ else if (p > ts) {
335
+ field = rb_str_new(ts, p - ts);
336
+ ASSOCIATE_INDEX;
337
+ }
338
+ }
339
+ #line 105 "ext/fastcsv/fastcsv.rl"
340
+ {
341
+ if (!NIL_P(field) || RARRAY_LEN(row)) {
342
+ rb_ary_push(row, field);
343
+ }
344
+ if (RARRAY_LEN(row)) {
345
+ rb_yield(row);
346
+ }
347
+ }
348
+ #line 129 "ext/fastcsv/fastcsv.rl"
349
+ {act = 3;}
350
+ goto st5;
351
+ st5:
352
+ if ( ++p == pe )
353
+ goto _test_eof5;
354
+ case 5:
355
+ #line 356 "ext/fastcsv/fastcsv.c"
356
+ switch( (*p) ) {
357
+ case 0: goto tr2;
358
+ case 10: goto tr3;
359
+ case 13: goto tr4;
360
+ case 34: goto tr16;
361
+ case 44: goto tr5;
362
+ }
363
+ goto st1;
364
+ tr3:
365
+ #line 44 "ext/fastcsv/fastcsv.rl"
366
+ {
367
+ if (p == ts) {
368
+ // Unquoted empty fields are nil, not "", in Ruby.
369
+ field = Qnil;
370
+ }
371
+ else if (p > ts) {
372
+ field = rb_str_new(ts, p - ts);
373
+ ASSOCIATE_INDEX;
374
+ }
375
+ }
376
+ #line 95 "ext/fastcsv/fastcsv.rl"
377
+ {
378
+ if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
379
+ rb_ary_push(row, field);
380
+ field = Qnil;
381
+ }
382
+
383
+ rb_yield(row);
384
+ row = rb_ary_new();
385
+ }
386
+ #line 32 "ext/fastcsv/fastcsv.rl"
387
+ {
388
+ curline++;
389
+ }
390
+ goto st6;
391
+ tr19:
392
+ #line 32 "ext/fastcsv/fastcsv.rl"
393
+ {
394
+ curline++;
395
+ }
396
+ goto st6;
397
+ tr11:
398
+ #line 95 "ext/fastcsv/fastcsv.rl"
399
+ {
400
+ if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
401
+ rb_ary_push(row, field);
402
+ field = Qnil;
403
+ }
404
+
405
+ rb_yield(row);
406
+ row = rb_ary_new();
407
+ }
408
+ #line 32 "ext/fastcsv/fastcsv.rl"
409
+ {
410
+ curline++;
411
+ }
412
+ goto st6;
413
+ st6:
414
+ if ( ++p == pe )
415
+ goto _test_eof6;
416
+ case 6:
417
+ #line 418 "ext/fastcsv/fastcsv.c"
418
+ if ( (*p) == 0 )
419
+ goto tr18;
420
+ goto tr17;
421
+ tr4:
422
+ #line 44 "ext/fastcsv/fastcsv.rl"
423
+ {
424
+ if (p == ts) {
425
+ // Unquoted empty fields are nil, not "", in Ruby.
426
+ field = Qnil;
427
+ }
428
+ else if (p > ts) {
429
+ field = rb_str_new(ts, p - ts);
430
+ ASSOCIATE_INDEX;
431
+ }
432
+ }
433
+ #line 95 "ext/fastcsv/fastcsv.rl"
434
+ {
435
+ if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
436
+ rb_ary_push(row, field);
437
+ field = Qnil;
438
+ }
439
+
440
+ rb_yield(row);
441
+ row = rb_ary_new();
442
+ }
443
+ #line 32 "ext/fastcsv/fastcsv.rl"
444
+ {
445
+ curline++;
446
+ }
447
+ goto st7;
448
+ tr12:
449
+ #line 95 "ext/fastcsv/fastcsv.rl"
450
+ {
451
+ if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
452
+ rb_ary_push(row, field);
453
+ field = Qnil;
454
+ }
455
+
456
+ rb_yield(row);
457
+ row = rb_ary_new();
458
+ }
459
+ #line 32 "ext/fastcsv/fastcsv.rl"
460
+ {
461
+ curline++;
462
+ }
463
+ goto st7;
464
+ st7:
465
+ if ( ++p == pe )
466
+ goto _test_eof7;
467
+ case 7:
468
+ #line 469 "ext/fastcsv/fastcsv.c"
469
+ switch( (*p) ) {
470
+ case 0: goto tr18;
471
+ case 10: goto tr19;
472
+ }
473
+ goto tr17;
474
+ tr5:
475
+ #line 44 "ext/fastcsv/fastcsv.rl"
476
+ {
477
+ if (p == ts) {
478
+ // Unquoted empty fields are nil, not "", in Ruby.
479
+ field = Qnil;
480
+ }
481
+ else if (p > ts) {
482
+ field = rb_str_new(ts, p - ts);
483
+ ASSOCIATE_INDEX;
484
+ }
485
+ }
486
+ #line 90 "ext/fastcsv/fastcsv.rl"
487
+ {
488
+ rb_ary_push(row, field);
489
+ field = Qnil;
490
+ }
491
+ goto st8;
492
+ tr13:
493
+ #line 90 "ext/fastcsv/fastcsv.rl"
494
+ {
495
+ rb_ary_push(row, field);
496
+ field = Qnil;
497
+ }
498
+ goto st8;
499
+ st8:
500
+ if ( ++p == pe )
501
+ goto _test_eof8;
502
+ case 8:
503
+ #line 504 "ext/fastcsv/fastcsv.c"
504
+ if ( (*p) == 0 )
505
+ goto tr21;
506
+ goto tr20;
507
+ tr14:
508
+ #line 1 "NONE"
509
+ {te = p+1;}
510
+ #line 105 "ext/fastcsv/fastcsv.rl"
511
+ {
512
+ if (!NIL_P(field) || RARRAY_LEN(row)) {
513
+ rb_ary_push(row, field);
514
+ }
515
+ if (RARRAY_LEN(row)) {
516
+ rb_yield(row);
517
+ }
518
+ }
519
+ #line 44 "ext/fastcsv/fastcsv.rl"
520
+ {
521
+ if (p == ts) {
522
+ // Unquoted empty fields are nil, not "", in Ruby.
523
+ field = Qnil;
524
+ }
525
+ else if (p > ts) {
526
+ field = rb_str_new(ts, p - ts);
527
+ ASSOCIATE_INDEX;
528
+ }
529
+ }
530
+ #line 129 "ext/fastcsv/fastcsv.rl"
531
+ {act = 3;}
532
+ goto st9;
533
+ st9:
534
+ if ( ++p == pe )
535
+ goto _test_eof9;
536
+ case 9:
537
+ #line 538 "ext/fastcsv/fastcsv.c"
538
+ switch( (*p) ) {
539
+ case 10: goto tr16;
540
+ case 13: goto tr16;
541
+ case 34: goto tr16;
542
+ case 44: goto tr16;
543
+ }
544
+ goto st1;
545
+ tr8:
546
+ #line 32 "ext/fastcsv/fastcsv.rl"
547
+ {
548
+ curline++;
549
+ }
550
+ goto st2;
551
+ tr15:
552
+ #line 36 "ext/fastcsv/fastcsv.rl"
553
+ {
554
+ unclosed_line = curline;
555
+ }
556
+ goto st2;
557
+ st2:
558
+ if ( ++p == pe )
559
+ goto _test_eof2;
560
+ case 2:
561
+ #line 562 "ext/fastcsv/fastcsv.c"
562
+ switch( (*p) ) {
563
+ case 0: goto st0;
564
+ case 10: goto tr8;
565
+ case 13: goto tr8;
566
+ case 34: goto tr9;
567
+ }
568
+ goto st2;
569
+ st0:
570
+ cs = 0;
571
+ goto _out;
572
+ tr9:
573
+ #line 55 "ext/fastcsv/fastcsv.rl"
574
+ {
575
+ if (p == ts) {
576
+ field = rb_str_new2("");
577
+ ASSOCIATE_INDEX;
578
+ }
579
+ // @note If we add an action on '""', we can skip some steps if no '""' is found.
580
+ else if (p > ts) {
581
+ // Operating on ts in-place produces odd behavior, FYI.
582
+ char *copy = ALLOC_N(char, p - ts);
583
+ memcpy(copy, ts, p - ts);
584
+
585
+ char *reader = ts, *writer = copy;
586
+ int escaped = 0;
587
+
588
+ while (p > reader) {
589
+ if (*reader == quote_char && !escaped) {
590
+ // Skip the escaping character.
591
+ escaped = 1;
592
+ }
593
+ else {
594
+ escaped = 0;
595
+ *writer++ = *reader;
596
+ }
597
+ reader++;
598
+ }
599
+
600
+ field = rb_str_new(copy, writer - copy);
601
+ ASSOCIATE_INDEX;
602
+
603
+ if (copy != NULL) {
604
+ free(copy);
605
+ }
606
+ }
607
+ }
608
+ #line 40 "ext/fastcsv/fastcsv.rl"
609
+ {
610
+ unclosed_line = 0;
611
+ }
612
+ goto st3;
613
+ st3:
614
+ if ( ++p == pe )
615
+ goto _test_eof3;
616
+ case 3:
617
+ #line 618 "ext/fastcsv/fastcsv.c"
618
+ switch( (*p) ) {
619
+ case 0: goto tr10;
620
+ case 10: goto tr11;
621
+ case 13: goto tr12;
622
+ case 34: goto st2;
623
+ case 44: goto tr13;
624
+ }
625
+ goto st0;
626
+ }
627
+ _test_eof4: cs = 4; goto _test_eof;
628
+ _test_eof1: cs = 1; goto _test_eof;
629
+ _test_eof5: cs = 5; goto _test_eof;
630
+ _test_eof6: cs = 6; goto _test_eof;
631
+ _test_eof7: cs = 7; goto _test_eof;
632
+ _test_eof8: cs = 8; goto _test_eof;
633
+ _test_eof9: cs = 9; goto _test_eof;
634
+ _test_eof2: cs = 2; goto _test_eof;
635
+ _test_eof3: cs = 3; goto _test_eof;
636
+
637
+ _test_eof: {}
638
+ if ( p == eof )
639
+ {
640
+ switch ( cs ) {
641
+ case 1: goto tr0;
642
+ case 5: goto tr16;
643
+ case 6: goto tr17;
644
+ case 7: goto tr17;
645
+ case 8: goto tr20;
646
+ case 9: goto tr16;
647
+ }
648
+ }
649
+
650
+ _out: {}
651
+ }
652
+
653
+ #line 313 "ext/fastcsv/fastcsv.rl"
654
+
655
+ if (done && cs < fastcsv_first_final) {
656
+ if (buf != NULL) {
657
+ free(buf);
658
+ }
659
+ if (unclosed_line) {
660
+ rb_raise(rb_eParseError, "Unclosed quoted field on line %d.", unclosed_line);
661
+ }
662
+ // Ruby raises different errors for illegal quoting, depending on whether
663
+ // a quoted string is followed by a string ("Unclosed quoted field on line
664
+ // %d.") or by a string ending in a quote ("Missing or stray quote in line
665
+ // %d"). These precisions are kind of bogus, but we can try using $!.
666
+ else {
667
+ rb_raise(rb_eParseError, "Illegal quoting in line %d.", curline);
668
+ }
669
+ }
670
+
671
+ if (ts == 0) {
672
+ have = 0;
673
+ }
674
+ else if (io) {
675
+ have = pe - ts;
676
+ memmove(buf, ts, have);
677
+ te = buf + (te - ts);
678
+ ts = buf;
679
+ }
680
+ }
681
+
682
+ if (buf != NULL) {
683
+ free(buf);
684
+ }
685
+
686
+ return Qnil;
687
+ }
688
+
689
+ void Init_fastcsv() {
690
+ s_read = rb_intern("read");
691
+ s_to_str = rb_intern("to_str");
692
+
693
+ mModule = rb_define_module("FastCSV");
694
+ rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
695
+ rb_define_singleton_method(mModule, "raw_parse", fastcsv, -1);
696
+ rb_eParseError = rb_define_class_under(mModule, "ParseError", rb_eStandardError);
697
+ }