fastcsv 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,7 +7,7 @@
7
7
  // http://w3c.github.io/csvw/syntax/#ebnf
8
8
 
9
9
  // CSV implementation.
10
- // https://github.com/ruby/ruby/blob/master/lib/csv.rb
10
+ // https://github.com/ruby/ruby/blob/trunk/lib/csv.rb
11
11
 
12
12
  // Ruby C extensions help.
13
13
  // https://github.com/ruby/ruby/blob/trunk/README.EXT
@@ -21,31 +21,42 @@ if (enc2 != NULL) { \
21
21
  field = rb_str_encode(field, rb_enc_from_encoding(enc), 0, Qnil); \
22
22
  }
23
23
 
24
- static VALUE mModule, rb_eParseError;
25
- static ID s_read, s_to_str, s_internal_encoding, s_external_encoding, s_string, s_encoding;
24
+ #define FREE \
25
+ if (buf != NULL) { \
26
+ free(buf); \
27
+ } \
28
+ if (row_sep != NULL) { \
29
+ free(row_sep); \
30
+ }
31
+
32
+ static VALUE cClass, cParser, eError;
33
+ static ID s_read, s_row;
34
+
35
+ // @see https://github.com/nofxx/georuby_c/blob/b3b91fd90980d7c295ac8f6012d89878ea7cd569/ext/types.h#L22
36
+ typedef struct {
37
+ char *start;
38
+ } Data;
26
39
 
27
40
 
28
- #line 125 "ext/fastcsv/fastcsv.rl"
41
+ #line 170 "ext/fastcsv/fastcsv.rl"
29
42
 
30
43
 
31
44
 
32
- #line 33 "ext/fastcsv/fastcsv.c"
33
- static const int fastcsv_start = 4;
34
- static const int fastcsv_first_final = 4;
35
- static const int fastcsv_error = 0;
45
+ #line 46 "ext/fastcsv/fastcsv.c"
46
+ static const int raw_parse_start = 4;
47
+ static const int raw_parse_first_final = 4;
48
+ static const int raw_parse_error = 0;
36
49
 
37
- static const int fastcsv_en_main = 4;
50
+ static const int raw_parse_en_main = 4;
38
51
 
39
52
 
40
- #line 128 "ext/fastcsv/fastcsv.rl"
53
+ #line 173 "ext/fastcsv/fastcsv.rl"
41
54
 
42
55
  // 16 kB
43
56
  #define BUFSIZE 16384
44
57
 
45
58
  // @see http://rxr.whitequark.org/mri/source/io.c#4845
46
- static void
47
- rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
48
- {
59
+ static void rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode) {
49
60
  int default_ext = 0;
50
61
 
51
62
  if (ext == NULL) {
@@ -70,15 +81,17 @@ rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc,
70
81
  }
71
82
  }
72
83
 
73
- VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
84
+ static VALUE raw_parse(int argc, VALUE *argv, VALUE self) {
74
85
  int cs, act, have = 0, curline = 1, io = 0;
75
- char *ts = 0, *te = 0, *buf = 0, *eof = 0;
86
+ char *ts = 0, *te = 0, *buf = 0, *eof = 0, *mark_row_sep = 0, *row_sep = NULL;
76
87
 
77
- VALUE port, opts;
88
+ VALUE port, opts, r_encoding;
78
89
  VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
79
- int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
90
+ int done = 0, unclosed_line = 0, len_row_sep = 0, buffer_size = 0, taint = 0;
80
91
  rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL;
81
- VALUE r_encoding;
92
+
93
+ Data *d;
94
+ Data_Get_Struct(self, Data, d);
82
95
 
83
96
  VALUE option;
84
97
  char quote_char = '"';
@@ -87,8 +100,8 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
87
100
  taint = OBJ_TAINTED(port);
88
101
  io = rb_respond_to(port, s_read);
89
102
  if (!io) {
90
- if (rb_respond_to(port, s_to_str)) {
91
- port = rb_funcall(port, s_to_str, 0);
103
+ if (rb_respond_to(port, rb_intern("to_str"))) {
104
+ port = rb_funcall(port, rb_intern("to_str"), 0);
92
105
  StringValue(port);
93
106
  }
94
107
  else {
@@ -112,7 +125,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
112
125
  // @see http://ruby-doc.org/core-2.1.1/IO.html#method-c-new-label-Open+Mode
113
126
  option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
114
127
  if (TYPE(option) == T_STRING) {
115
- // parse_mode_enc is not in header file.
128
+ // `parse_mode_enc` is not in header file.
116
129
  const char *estr = StringValueCStr(option), *ptr;
117
130
  char encname[ENCODING_MAXNAMELEN+1];
118
131
  int idx, idx2;
@@ -123,17 +136,17 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
123
136
  ptr = strrchr(estr, ':');
124
137
  if (ptr) {
125
138
  long len = (ptr++) - estr;
126
- if (len == 0 || len > ENCODING_MAXNAMELEN) {
139
+ if (len == 0 || len > ENCODING_MAXNAMELEN) { // ":enc"
127
140
  idx = -1;
128
141
  }
129
- else {
142
+ else { // "enc2:enc" or "enc:-"
130
143
  memcpy(encname, estr, len);
131
144
  encname[len] = '\0';
132
145
  estr = encname;
133
146
  idx = rb_enc_find_index(encname);
134
147
  }
135
148
  }
136
- else {
149
+ else { // "enc"
137
150
  idx = rb_enc_find_index(estr);
138
151
  }
139
152
 
@@ -141,7 +154,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
141
154
  ext_enc = rb_enc_from_index(idx);
142
155
  }
143
156
  else {
144
- if (idx != -2) {
157
+ if (idx != -2) { // ":enc"
145
158
  // `unsupported_encoding` is not in header file.
146
159
  rb_warn("Unsupported encoding %s ignored", estr);
147
160
  }
@@ -150,11 +163,11 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
150
163
 
151
164
  int_enc = NULL;
152
165
  if (ptr) {
153
- if (*ptr == '-' && *(ptr+1) == '\0') {
166
+ if (*ptr == '-' && *(ptr+1) == '\0') { // "enc:-"
154
167
  /* Special case - "-" => no transcoding */
155
168
  int_enc = (rb_encoding *)Qnil;
156
169
  }
157
- else {
170
+ else { // "enc2:enc"
158
171
  idx2 = rb_enc_find_index(ptr);
159
172
  if (idx2 < 0) {
160
173
  // `unsupported_encoding` is not in header file.
@@ -175,29 +188,33 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
175
188
  rb_raise(rb_eArgError, ":encoding has to be a String");
176
189
  }
177
190
 
178
- // @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L1567
179
- // @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L2300
180
- if (rb_respond_to(port, s_internal_encoding)) {
181
- r_encoding = rb_funcall(port, s_internal_encoding, 0);
191
+ // @see CSV#raw_encoding
192
+ // @see https://github.com/ruby/ruby/blob/ab337e61ecb5f42384ba7d710c36faf96a454e5c/lib/csv.rb#L2290
193
+ if (rb_respond_to(port, rb_intern("internal_encoding"))) {
194
+ r_encoding = rb_funcall(port, rb_intern("internal_encoding"), 0);
182
195
  if (NIL_P(r_encoding)) {
183
- r_encoding = rb_funcall(port, s_external_encoding, 0);
196
+ r_encoding = rb_funcall(port, rb_intern("external_encoding"), 0);
184
197
  }
185
198
  }
186
- else if (rb_respond_to(port, s_string)) {
187
- r_encoding = rb_funcall(rb_funcall(port, s_string, 0), s_encoding, 0);
199
+ else if (rb_respond_to(port, rb_intern("string"))) {
200
+ r_encoding = rb_funcall(rb_funcall(port, rb_intern("string"), 0), rb_intern("encoding"), 0);
188
201
  }
189
- else if (rb_respond_to(port, s_encoding)) {
190
- r_encoding = rb_funcall(port, s_encoding, 0);
202
+ else if (rb_respond_to(port, rb_intern("encoding"))) {
203
+ r_encoding = rb_funcall(port, rb_intern("encoding"), 0);
191
204
  }
192
205
  else {
193
206
  r_encoding = rb_enc_from_encoding(rb_ascii8bit_encoding());
194
207
  }
208
+
209
+ // @see CSV#initialize
210
+ // @see https://github.com/ruby/ruby/blob/ab337e61ecb5f42384ba7d710c36faf96a454e5c/lib/csv.rb#L1510
195
211
  if (NIL_P(r_encoding)) {
196
212
  r_encoding = rb_enc_from_encoding(rb_default_internal_encoding());
197
213
  }
198
214
  if (NIL_P(r_encoding)) {
199
215
  r_encoding = rb_enc_from_encoding(rb_default_external_encoding());
200
216
  }
217
+
201
218
  if (enc2 != NULL) {
202
219
  encoding = enc2;
203
220
  }
@@ -208,11 +225,17 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
208
225
  encoding = rb_enc_get(r_encoding);
209
226
  }
210
227
 
228
+ rb_ivar_set(self, s_row, Qnil);
229
+
211
230
  buffer_size = BUFSIZE;
212
231
  if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
213
232
  bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
214
233
  if (!NIL_P(bufsize)) {
215
234
  buffer_size = NUM2INT(bufsize);
235
+ // buffer_size = 0 can cause segmentation faults.
236
+ if (buffer_size == 0) {
237
+ buffer_size = BUFSIZE;
238
+ }
216
239
  }
217
240
  }
218
241
 
@@ -221,39 +244,47 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
221
244
  }
222
245
 
223
246
 
224
- #line 225 "ext/fastcsv/fastcsv.c"
247
+ #line 248 "ext/fastcsv/fastcsv.c"
225
248
  {
226
- cs = fastcsv_start;
249
+ cs = raw_parse_start;
227
250
  ts = 0;
228
251
  te = 0;
229
252
  act = 0;
230
253
  }
231
254
 
232
- #line 311 "ext/fastcsv/fastcsv.rl"
255
+ #line 366 "ext/fastcsv/fastcsv.rl"
233
256
 
234
257
  while (!done) {
235
258
  VALUE str;
236
259
  char *p, *pe;
237
- int len, space = buffer_size - have, tokstart_diff, tokend_diff;
260
+ int len, space = buffer_size - have, tokstart_diff, tokend_diff, start_diff, mark_row_sep_diff;
238
261
 
239
262
  if (io) {
240
263
  if (space == 0) {
241
- tokstart_diff = ts - buf;
242
- tokend_diff = te - buf;
264
+ // Not moving d->start will cause intermittent segmentation faults.
265
+ tokstart_diff = ts - buf;
266
+ tokend_diff = te - buf;
267
+ start_diff = d->start - buf;
268
+ mark_row_sep_diff = mark_row_sep - buf;
243
269
 
244
- buffer_size += BUFSIZE;
245
- REALLOC_N(buf, char, buffer_size);
270
+ buffer_size += BUFSIZE;
271
+ REALLOC_N(buf, char, buffer_size);
246
272
 
247
- space = buffer_size - have;
273
+ space = buffer_size - have;
248
274
 
249
- ts = buf + tokstart_diff;
250
- te = buf + tokend_diff;
275
+ ts = buf + tokstart_diff;
276
+ te = buf + tokend_diff;
277
+ d->start = buf + start_diff;
278
+ mark_row_sep = buf + mark_row_sep_diff;
251
279
  }
252
280
  p = buf + have;
253
281
 
282
+ // Reads "`length` bytes without any conversion (binary mode)."
283
+ // "The resulted string is always ASCII-8BIT encoding."
284
+ // @see http://www.ruby-doc.org/core-2.1.4/IO.html#method-i-read
254
285
  str = rb_funcall(port, s_read, 1, INT2FIX(space));
255
286
  if (NIL_P(str)) {
256
- // StringIO#read returns nil for empty string.
287
+ // "`nil` means it met EOF at beginning," e.g. for `StringIO.new("")`.
257
288
  len = 0;
258
289
  }
259
290
  else {
@@ -261,6 +292,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
261
292
  memcpy(p, StringValuePtr(str), len);
262
293
  }
263
294
 
295
+ // "The 1 to `length`-1 bytes string means it met EOF after reading the result."
264
296
  if (len < space) {
265
297
  // EOF actions don't work in scanners, so we add a sentinel value.
266
298
  // @see http://www.complang.org/pipermail/ragel-users/2007-May/001516.html
@@ -276,9 +308,13 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
276
308
  done = 1;
277
309
  }
278
310
 
311
+ if (d->start == 0) {
312
+ d->start = p;
313
+ }
314
+
279
315
  pe = p + len;
280
316
 
281
- #line 282 "ext/fastcsv/fastcsv.c"
317
+ #line 318 "ext/fastcsv/fastcsv.c"
282
318
  {
283
319
  if ( p == pe )
284
320
  goto _test_eof;
@@ -296,56 +332,80 @@ tr0:
296
332
  }
297
333
  }
298
334
  goto st4;
299
- tr10:
300
- #line 101 "ext/fastcsv/fastcsv.rl"
335
+ tr5:
336
+ #line 49 "ext/fastcsv/fastcsv.rl"
301
337
  {
302
- if (!NIL_P(field) || RARRAY_LEN(row)) {
303
- rb_ary_push(row, field);
338
+ if (p == ts) {
339
+ // Unquoted empty fields are nil, not "", in Ruby.
340
+ field = Qnil;
304
341
  }
305
- if (RARRAY_LEN(row)) {
306
- rb_yield(row);
342
+ else if (p > ts) {
343
+ field = rb_enc_str_new(ts, p - ts, encoding);
344
+ ENCODE;
307
345
  }
308
346
  }
309
- #line 123 "ext/fastcsv/fastcsv.rl"
347
+ #line 95 "ext/fastcsv/fastcsv.rl"
348
+ {
349
+ rb_ary_push(row, field);
350
+ field = Qnil;
351
+ }
352
+ #line 166 "ext/fastcsv/fastcsv.rl"
310
353
  {te = p+1;}
311
354
  goto st4;
312
- tr16:
313
- #line 123 "ext/fastcsv/fastcsv.rl"
314
- {te = p;p--;}
315
- goto st4;
316
- tr17:
317
- #line 122 "ext/fastcsv/fastcsv.rl"
318
- {te = p;p--;}
319
- goto st4;
320
- tr18:
321
- #line 101 "ext/fastcsv/fastcsv.rl"
355
+ tr9:
356
+ #line 138 "ext/fastcsv/fastcsv.rl"
322
357
  {
358
+ if (d->start == 0 || p == d->start) {
359
+ rb_ivar_set(self, s_row, rb_str_new2(""));
360
+ }
361
+ else if (p > d->start) {
362
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
363
+ }
364
+
323
365
  if (!NIL_P(field) || RARRAY_LEN(row)) {
324
366
  rb_ary_push(row, field);
325
367
  }
368
+
326
369
  if (RARRAY_LEN(row)) {
327
370
  rb_yield(row);
328
371
  }
329
372
  }
330
- #line 122 "ext/fastcsv/fastcsv.rl"
373
+ #line 168 "ext/fastcsv/fastcsv.rl"
374
+ {te = p+1;}
375
+ goto st4;
376
+ tr12:
377
+ #line 95 "ext/fastcsv/fastcsv.rl"
378
+ {
379
+ rb_ary_push(row, field);
380
+ field = Qnil;
381
+ }
382
+ #line 166 "ext/fastcsv/fastcsv.rl"
331
383
  {te = p+1;}
332
384
  goto st4;
333
- tr20:
334
- #line 121 "ext/fastcsv/fastcsv.rl"
385
+ tr15:
386
+ #line 168 "ext/fastcsv/fastcsv.rl"
335
387
  {te = p;p--;}
336
388
  goto st4;
337
- tr21:
338
- #line 101 "ext/fastcsv/fastcsv.rl"
389
+ tr16:
390
+ #line 100 "ext/fastcsv/fastcsv.rl"
339
391
  {
340
- if (!NIL_P(field) || RARRAY_LEN(row)) {
341
- rb_ary_push(row, field);
392
+ d->start = p;
393
+
394
+ if (len_row_sep) {
395
+ if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1)) {
396
+ FREE;
397
+
398
+ rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1);
399
+ }
342
400
  }
343
- if (RARRAY_LEN(row)) {
344
- rb_yield(row);
401
+ else {
402
+ len_row_sep = p - mark_row_sep;
403
+ row_sep = ALLOC_N(char, p - mark_row_sep);
404
+ memcpy(row_sep, mark_row_sep, p - mark_row_sep);
345
405
  }
346
406
  }
347
- #line 121 "ext/fastcsv/fastcsv.rl"
348
- {te = p+1;}
407
+ #line 167 "ext/fastcsv/fastcsv.rl"
408
+ {te = p;p--;}
349
409
  goto st4;
350
410
  st4:
351
411
  #line 1 "NONE"
@@ -357,12 +417,12 @@ st4:
357
417
  case 4:
358
418
  #line 1 "NONE"
359
419
  {ts = p;}
360
- #line 361 "ext/fastcsv/fastcsv.c"
420
+ #line 421 "ext/fastcsv/fastcsv.c"
361
421
  switch( (*p) ) {
362
- case 0: goto tr14;
422
+ case 0: goto tr13;
363
423
  case 10: goto tr3;
364
424
  case 13: goto tr4;
365
- case 34: goto tr15;
425
+ case 34: goto tr14;
366
426
  case 44: goto tr5;
367
427
  }
368
428
  goto st1;
@@ -381,7 +441,7 @@ case 1:
381
441
  tr2:
382
442
  #line 1 "NONE"
383
443
  {te = p+1;}
384
- #line 40 "ext/fastcsv/fastcsv.rl"
444
+ #line 49 "ext/fastcsv/fastcsv.rl"
385
445
  {
386
446
  if (p == ts) {
387
447
  // Unquoted empty fields are nil, not "", in Ruby.
@@ -392,33 +452,41 @@ tr2:
392
452
  ENCODE;
393
453
  }
394
454
  }
395
- #line 101 "ext/fastcsv/fastcsv.rl"
455
+ #line 138 "ext/fastcsv/fastcsv.rl"
396
456
  {
457
+ if (d->start == 0 || p == d->start) {
458
+ rb_ivar_set(self, s_row, rb_str_new2(""));
459
+ }
460
+ else if (p > d->start) {
461
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
462
+ }
463
+
397
464
  if (!NIL_P(field) || RARRAY_LEN(row)) {
398
465
  rb_ary_push(row, field);
399
466
  }
467
+
400
468
  if (RARRAY_LEN(row)) {
401
469
  rb_yield(row);
402
470
  }
403
471
  }
404
- #line 123 "ext/fastcsv/fastcsv.rl"
472
+ #line 168 "ext/fastcsv/fastcsv.rl"
405
473
  {act = 3;}
406
474
  goto st5;
407
475
  st5:
408
476
  if ( ++p == pe )
409
477
  goto _test_eof5;
410
478
  case 5:
411
- #line 412 "ext/fastcsv/fastcsv.c"
479
+ #line 480 "ext/fastcsv/fastcsv.c"
412
480
  switch( (*p) ) {
413
481
  case 0: goto tr2;
414
482
  case 10: goto tr3;
415
483
  case 13: goto tr4;
416
- case 34: goto tr16;
484
+ case 34: goto tr15;
417
485
  case 44: goto tr5;
418
486
  }
419
487
  goto st1;
420
488
  tr3:
421
- #line 40 "ext/fastcsv/fastcsv.rl"
489
+ #line 49 "ext/fastcsv/fastcsv.rl"
422
490
  {
423
491
  if (p == ts) {
424
492
  // Unquoted empty fields are nil, not "", in Ruby.
@@ -429,8 +497,19 @@ tr3:
429
497
  ENCODE;
430
498
  }
431
499
  }
432
- #line 91 "ext/fastcsv/fastcsv.rl"
500
+ #line 117 "ext/fastcsv/fastcsv.rl"
433
501
  {
502
+ mark_row_sep = p;
503
+
504
+ curline++;
505
+
506
+ if (d->start == 0 || p == d->start) {
507
+ rb_ivar_set(self, s_row, rb_str_new2(""));
508
+ }
509
+ else if (p > d->start) {
510
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
511
+ }
512
+
434
513
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
435
514
  rb_ary_push(row, field);
436
515
  field = Qnil;
@@ -438,21 +517,22 @@ tr3:
438
517
 
439
518
  rb_yield(row);
440
519
  row = rb_ary_new();
441
- }
442
- #line 28 "ext/fastcsv/fastcsv.rl"
443
- {
444
- curline++;
445
520
  }
446
521
  goto st6;
447
- tr19:
448
- #line 28 "ext/fastcsv/fastcsv.rl"
522
+ tr10:
523
+ #line 117 "ext/fastcsv/fastcsv.rl"
449
524
  {
525
+ mark_row_sep = p;
526
+
450
527
  curline++;
451
- }
452
- goto st6;
453
- tr11:
454
- #line 91 "ext/fastcsv/fastcsv.rl"
455
- {
528
+
529
+ if (d->start == 0 || p == d->start) {
530
+ rb_ivar_set(self, s_row, rb_str_new2(""));
531
+ }
532
+ else if (p > d->start) {
533
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
534
+ }
535
+
456
536
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
457
537
  rb_ary_push(row, field);
458
538
  field = Qnil;
@@ -460,22 +540,16 @@ tr11:
460
540
 
461
541
  rb_yield(row);
462
542
  row = rb_ary_new();
463
- }
464
- #line 28 "ext/fastcsv/fastcsv.rl"
465
- {
466
- curline++;
467
543
  }
468
544
  goto st6;
469
545
  st6:
470
546
  if ( ++p == pe )
471
547
  goto _test_eof6;
472
548
  case 6:
473
- #line 474 "ext/fastcsv/fastcsv.c"
474
- if ( (*p) == 0 )
475
- goto tr18;
476
- goto tr17;
549
+ #line 550 "ext/fastcsv/fastcsv.c"
550
+ goto tr16;
477
551
  tr4:
478
- #line 40 "ext/fastcsv/fastcsv.rl"
552
+ #line 49 "ext/fastcsv/fastcsv.rl"
479
553
  {
480
554
  if (p == ts) {
481
555
  // Unquoted empty fields are nil, not "", in Ruby.
@@ -486,8 +560,19 @@ tr4:
486
560
  ENCODE;
487
561
  }
488
562
  }
489
- #line 91 "ext/fastcsv/fastcsv.rl"
563
+ #line 117 "ext/fastcsv/fastcsv.rl"
490
564
  {
565
+ mark_row_sep = p;
566
+
567
+ curline++;
568
+
569
+ if (d->start == 0 || p == d->start) {
570
+ rb_ivar_set(self, s_row, rb_str_new2(""));
571
+ }
572
+ else if (p > d->start) {
573
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
574
+ }
575
+
491
576
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
492
577
  rb_ary_push(row, field);
493
578
  field = Qnil;
@@ -495,15 +580,22 @@ tr4:
495
580
 
496
581
  rb_yield(row);
497
582
  row = rb_ary_new();
498
- }
499
- #line 28 "ext/fastcsv/fastcsv.rl"
500
- {
501
- curline++;
502
583
  }
503
584
  goto st7;
504
- tr12:
505
- #line 91 "ext/fastcsv/fastcsv.rl"
585
+ tr11:
586
+ #line 117 "ext/fastcsv/fastcsv.rl"
506
587
  {
588
+ mark_row_sep = p;
589
+
590
+ curline++;
591
+
592
+ if (d->start == 0 || p == d->start) {
593
+ rb_ivar_set(self, s_row, rb_str_new2(""));
594
+ }
595
+ else if (p > d->start) {
596
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
597
+ }
598
+
507
599
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
508
600
  rb_ary_push(row, field);
509
601
  field = Qnil;
@@ -511,24 +603,20 @@ tr12:
511
603
 
512
604
  rb_yield(row);
513
605
  row = rb_ary_new();
514
- }
515
- #line 28 "ext/fastcsv/fastcsv.rl"
516
- {
517
- curline++;
518
606
  }
519
607
  goto st7;
520
608
  st7:
521
609
  if ( ++p == pe )
522
610
  goto _test_eof7;
523
611
  case 7:
524
- #line 525 "ext/fastcsv/fastcsv.c"
525
- switch( (*p) ) {
526
- case 0: goto tr18;
527
- case 10: goto tr19;
528
- }
529
- goto tr17;
530
- tr5:
531
- #line 40 "ext/fastcsv/fastcsv.rl"
612
+ #line 613 "ext/fastcsv/fastcsv.c"
613
+ if ( (*p) == 10 )
614
+ goto st6;
615
+ goto tr16;
616
+ tr13:
617
+ #line 1 "NONE"
618
+ {te = p+1;}
619
+ #line 49 "ext/fastcsv/fastcsv.rl"
532
620
  {
533
621
  if (p == ts) {
534
622
  // Unquoted empty fields are nil, not "", in Ruby.
@@ -539,73 +627,40 @@ tr5:
539
627
  ENCODE;
540
628
  }
541
629
  }
542
- #line 86 "ext/fastcsv/fastcsv.rl"
543
- {
544
- rb_ary_push(row, field);
545
- field = Qnil;
546
- }
547
- goto st8;
548
- tr13:
549
- #line 86 "ext/fastcsv/fastcsv.rl"
550
- {
551
- rb_ary_push(row, field);
552
- field = Qnil;
553
- }
554
- goto st8;
555
- st8:
556
- if ( ++p == pe )
557
- goto _test_eof8;
558
- case 8:
559
- #line 560 "ext/fastcsv/fastcsv.c"
560
- if ( (*p) == 0 )
561
- goto tr21;
562
- goto tr20;
563
- tr14:
564
- #line 1 "NONE"
565
- {te = p+1;}
566
- #line 101 "ext/fastcsv/fastcsv.rl"
630
+ #line 138 "ext/fastcsv/fastcsv.rl"
567
631
  {
632
+ if (d->start == 0 || p == d->start) {
633
+ rb_ivar_set(self, s_row, rb_str_new2(""));
634
+ }
635
+ else if (p > d->start) {
636
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
637
+ }
638
+
568
639
  if (!NIL_P(field) || RARRAY_LEN(row)) {
569
640
  rb_ary_push(row, field);
570
641
  }
642
+
571
643
  if (RARRAY_LEN(row)) {
572
644
  rb_yield(row);
573
645
  }
574
646
  }
575
- #line 40 "ext/fastcsv/fastcsv.rl"
576
- {
577
- if (p == ts) {
578
- // Unquoted empty fields are nil, not "", in Ruby.
579
- field = Qnil;
580
- }
581
- else if (p > ts) {
582
- field = rb_enc_str_new(ts, p - ts, encoding);
583
- ENCODE;
584
- }
585
- }
586
- #line 123 "ext/fastcsv/fastcsv.rl"
647
+ #line 168 "ext/fastcsv/fastcsv.rl"
587
648
  {act = 3;}
588
- goto st9;
589
- st9:
649
+ goto st8;
650
+ st8:
590
651
  if ( ++p == pe )
591
- goto _test_eof9;
592
- case 9:
593
- #line 594 "ext/fastcsv/fastcsv.c"
652
+ goto _test_eof8;
653
+ case 8:
654
+ #line 655 "ext/fastcsv/fastcsv.c"
594
655
  switch( (*p) ) {
595
- case 10: goto tr16;
596
- case 13: goto tr16;
597
- case 34: goto tr16;
598
- case 44: goto tr16;
656
+ case 10: goto tr15;
657
+ case 13: goto tr15;
658
+ case 34: goto tr15;
659
+ case 44: goto tr15;
599
660
  }
600
661
  goto st1;
601
- tr8:
602
- #line 28 "ext/fastcsv/fastcsv.rl"
603
- {
604
- curline++;
605
- }
606
- goto st2;
607
- tr15:
608
- #line 32 "ext/fastcsv/fastcsv.rl"
662
+ tr14:
663
+ #line 41 "ext/fastcsv/fastcsv.rl"
609
664
  {
610
665
  unclosed_line = curline;
611
666
  }
@@ -614,19 +669,17 @@ st2:
614
669
  if ( ++p == pe )
615
670
  goto _test_eof2;
616
671
  case 2:
617
- #line 618 "ext/fastcsv/fastcsv.c"
672
+ #line 673 "ext/fastcsv/fastcsv.c"
618
673
  switch( (*p) ) {
619
674
  case 0: goto st0;
620
- case 10: goto tr8;
621
- case 13: goto tr8;
622
- case 34: goto tr9;
675
+ case 34: goto tr8;
623
676
  }
624
677
  goto st2;
625
678
  st0:
626
679
  cs = 0;
627
680
  goto _out;
628
- tr9:
629
- #line 51 "ext/fastcsv/fastcsv.rl"
681
+ tr8:
682
+ #line 60 "ext/fastcsv/fastcsv.rl"
630
683
  {
631
684
  if (p == ts) {
632
685
  field = rb_enc_str_new("", 0, encoding);
@@ -653,7 +706,7 @@ tr9:
653
706
  reader++;
654
707
  }
655
708
 
656
- field = rb_enc_str_new(copy, writer - copy, enc);
709
+ field = rb_enc_str_new(copy, writer - copy, encoding);
657
710
  ENCODE;
658
711
 
659
712
  if (copy != NULL) {
@@ -661,7 +714,7 @@ tr9:
661
714
  }
662
715
  }
663
716
  }
664
- #line 36 "ext/fastcsv/fastcsv.rl"
717
+ #line 45 "ext/fastcsv/fastcsv.rl"
665
718
  {
666
719
  unclosed_line = 0;
667
720
  }
@@ -670,13 +723,13 @@ st3:
670
723
  if ( ++p == pe )
671
724
  goto _test_eof3;
672
725
  case 3:
673
- #line 674 "ext/fastcsv/fastcsv.c"
726
+ #line 727 "ext/fastcsv/fastcsv.c"
674
727
  switch( (*p) ) {
675
- case 0: goto tr10;
676
- case 10: goto tr11;
677
- case 13: goto tr12;
728
+ case 0: goto tr9;
729
+ case 10: goto tr10;
730
+ case 13: goto tr11;
678
731
  case 34: goto st2;
679
- case 44: goto tr13;
732
+ case 44: goto tr12;
680
733
  }
681
734
  goto st0;
682
735
  }
@@ -686,7 +739,6 @@ case 3:
686
739
  _test_eof6: cs = 6; goto _test_eof;
687
740
  _test_eof7: cs = 7; goto _test_eof;
688
741
  _test_eof8: cs = 8; goto _test_eof;
689
- _test_eof9: cs = 9; goto _test_eof;
690
742
  _test_eof2: cs = 2; goto _test_eof;
691
743
  _test_eof3: cs = 3; goto _test_eof;
692
744
 
@@ -695,32 +747,26 @@ case 3:
695
747
  {
696
748
  switch ( cs ) {
697
749
  case 1: goto tr0;
698
- case 5: goto tr16;
699
- case 6: goto tr17;
700
- case 7: goto tr17;
701
- case 8: goto tr20;
702
- case 9: goto tr16;
750
+ case 5: goto tr15;
751
+ case 6: goto tr16;
752
+ case 7: goto tr16;
753
+ case 8: goto tr15;
703
754
  }
704
755
  }
705
756
 
706
757
  _out: {}
707
758
  }
708
759
 
709
- #line 359 "ext/fastcsv/fastcsv.rl"
760
+ #line 427 "ext/fastcsv/fastcsv.rl"
761
+
762
+ if (done && cs < raw_parse_first_final) {
763
+ FREE;
710
764
 
711
- if (done && cs < fastcsv_first_final) {
712
- if (buf != NULL) {
713
- free(buf);
714
- }
715
765
  if (unclosed_line) {
716
- rb_raise(rb_eParseError, "Unclosed quoted field on line %d.", unclosed_line);
766
+ rb_raise(eError, "Unclosed quoted field on line %d.", unclosed_line);
717
767
  }
718
- // Ruby raises different errors for illegal quoting, depending on whether
719
- // a quoted string is followed by a string ("Unclosed quoted field on line
720
- // %d.") or by a string ending in a quote ("Missing or stray quote in line
721
- // %d"). These precisions are kind of bogus, but we can try using $!.
722
768
  else {
723
- rb_raise(rb_eParseError, "Illegal quoting in line %d.", curline);
769
+ rb_raise(eError, "Illegal quoting in line %d.", curline);
724
770
  }
725
771
  }
726
772
 
@@ -735,23 +781,35 @@ case 3:
735
781
  }
736
782
  }
737
783
 
738
- if (buf != NULL) {
739
- free(buf);
740
- }
784
+ FREE;
741
785
 
742
786
  return Qnil;
743
787
  }
744
788
 
789
+ // @see https://github.com/ruby/ruby/blob/trunk/README.EXT#L616
790
+ static VALUE allocate(VALUE class) {
791
+ // @see https://github.com/nofxx/georuby_c/blob/b3b91fd90980d7c295ac8f6012d89878ea7cd569/ext/line.c#L66
792
+ Data *d = ALLOC(Data);
793
+ d->start = 0;
794
+ // @see https://github.com/nofxx/georuby_c/blob/b3b91fd90980d7c295ac8f6012d89878ea7cd569/ext/point.h#L26
795
+ // rb_gc_mark(d->start) or rb_gc_mark(d) cause warning "passing argument 1 of ‘rb_gc_mark’ makes integer from pointer without a cast"
796
+ // free(d->start) causes error "pointer being freed was not allocated"
797
+ return Data_Wrap_Struct(class, NULL, free, d);
798
+ }
799
+
800
+ // @see http://tenderlovemaking.com/2009/12/18/writing-ruby-c-extensions-part-1.html
801
+ // @see http://tenderlovemaking.com/2010/12/11/writing-ruby-c-extensions-part-2.html
745
802
  void Init_fastcsv() {
746
803
  s_read = rb_intern("read");
747
- s_to_str = rb_intern("to_str");
748
- s_internal_encoding = rb_intern("internal_encoding");
749
- s_external_encoding = rb_intern("external_encoding");
750
- s_string = rb_intern("string");
751
- s_encoding = rb_intern("encoding");
752
-
753
- mModule = rb_define_module("FastCSV");
754
- rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
755
- rb_define_singleton_method(mModule, "raw_parse", fastcsv, -1);
756
- rb_eParseError = rb_define_class_under(mModule, "ParseError", rb_eStandardError);
804
+ s_row = rb_intern("@row");
805
+
806
+ cClass = rb_define_class("FastCSV", rb_const_get(rb_cObject, rb_intern("CSV"))); // class FastCSV < CSV
807
+ cParser = rb_define_class_under(cClass, "Parser", rb_cObject); // class Parser
808
+ rb_define_alloc_func(cParser, allocate); //
809
+ rb_define_method(cParser, "raw_parse", raw_parse, -1); // def raw_parse(port, opts = nil); end
810
+ rb_define_attr(cParser, "row", 1, 0); // attr_reader :row
811
+ rb_define_attr(cParser, "buffer_size", 1, 1); // attr_accessor :buffer_size
812
+ // end
813
+ eError = rb_define_class_under(cClass, "MalformedCSVError", rb_eRuntimeError); // class MalformedCSVError < RuntimeError
814
+ // end
757
815
  }