fastcsv 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@
7
7
  // http://w3c.github.io/csvw/syntax/#ebnf
8
8
 
9
9
  // CSV implementation.
10
- // https://github.com/ruby/ruby/blob/master/lib/csv.rb
10
+ // https://github.com/ruby/ruby/blob/trunk/lib/csv.rb
11
11
 
12
12
  // Ruby C extensions help.
13
13
  // https://github.com/ruby/ruby/blob/trunk/README.EXT
@@ -21,31 +21,42 @@ if (enc2 != NULL) { \
21
21
  field = rb_str_encode(field, rb_enc_from_encoding(enc), 0, Qnil); \
22
22
  }
23
23
 
24
- static VALUE mModule, rb_eParseError;
25
- static ID s_read, s_to_str, s_internal_encoding, s_external_encoding, s_string, s_encoding;
24
+ #define FREE \
25
+ if (buf != NULL) { \
26
+ free(buf); \
27
+ } \
28
+ if (row_sep != NULL) { \
29
+ free(row_sep); \
30
+ }
31
+
32
+ static VALUE cClass, cParser, eError;
33
+ static ID s_read, s_row;
34
+
35
+ // @see https://github.com/nofxx/georuby_c/blob/b3b91fd90980d7c295ac8f6012d89878ea7cd569/ext/types.h#L22
36
+ typedef struct {
37
+ char *start;
38
+ } Data;
26
39
 
27
40
 
28
- #line 125 "ext/fastcsv/fastcsv.rl"
41
+ #line 170 "ext/fastcsv/fastcsv.rl"
29
42
 
30
43
 
31
44
 
32
- #line 33 "ext/fastcsv/fastcsv.c"
33
- static const int fastcsv_start = 4;
34
- static const int fastcsv_first_final = 4;
35
- static const int fastcsv_error = 0;
45
+ #line 46 "ext/fastcsv/fastcsv.c"
46
+ static const int raw_parse_start = 4;
47
+ static const int raw_parse_first_final = 4;
48
+ static const int raw_parse_error = 0;
36
49
 
37
- static const int fastcsv_en_main = 4;
50
+ static const int raw_parse_en_main = 4;
38
51
 
39
52
 
40
- #line 128 "ext/fastcsv/fastcsv.rl"
53
+ #line 173 "ext/fastcsv/fastcsv.rl"
41
54
 
42
55
  // 16 kB
43
56
  #define BUFSIZE 16384
44
57
 
45
58
  // @see http://rxr.whitequark.org/mri/source/io.c#4845
46
- static void
47
- rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
48
- {
59
+ static void rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode) {
49
60
  int default_ext = 0;
50
61
 
51
62
  if (ext == NULL) {
@@ -70,15 +81,17 @@ rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc,
70
81
  }
71
82
  }
72
83
 
73
- VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
84
+ static VALUE raw_parse(int argc, VALUE *argv, VALUE self) {
74
85
  int cs, act, have = 0, curline = 1, io = 0;
75
- char *ts = 0, *te = 0, *buf = 0, *eof = 0;
86
+ char *ts = 0, *te = 0, *buf = 0, *eof = 0, *mark_row_sep = 0, *row_sep = NULL;
76
87
 
77
- VALUE port, opts;
88
+ VALUE port, opts, r_encoding;
78
89
  VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
79
- int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
90
+ int done = 0, unclosed_line = 0, len_row_sep = 0, buffer_size = 0, taint = 0;
80
91
  rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL;
81
- VALUE r_encoding;
92
+
93
+ Data *d;
94
+ Data_Get_Struct(self, Data, d);
82
95
 
83
96
  VALUE option;
84
97
  char quote_char = '"';
@@ -87,8 +100,8 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
87
100
  taint = OBJ_TAINTED(port);
88
101
  io = rb_respond_to(port, s_read);
89
102
  if (!io) {
90
- if (rb_respond_to(port, s_to_str)) {
91
- port = rb_funcall(port, s_to_str, 0);
103
+ if (rb_respond_to(port, rb_intern("to_str"))) {
104
+ port = rb_funcall(port, rb_intern("to_str"), 0);
92
105
  StringValue(port);
93
106
  }
94
107
  else {
@@ -112,7 +125,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
112
125
  // @see http://ruby-doc.org/core-2.1.1/IO.html#method-c-new-label-Open+Mode
113
126
  option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
114
127
  if (TYPE(option) == T_STRING) {
115
- // parse_mode_enc is not in header file.
128
+ // `parse_mode_enc` is not in header file.
116
129
  const char *estr = StringValueCStr(option), *ptr;
117
130
  char encname[ENCODING_MAXNAMELEN+1];
118
131
  int idx, idx2;
@@ -123,17 +136,17 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
123
136
  ptr = strrchr(estr, ':');
124
137
  if (ptr) {
125
138
  long len = (ptr++) - estr;
126
- if (len == 0 || len > ENCODING_MAXNAMELEN) {
139
+ if (len == 0 || len > ENCODING_MAXNAMELEN) { // ":enc"
127
140
  idx = -1;
128
141
  }
129
- else {
142
+ else { // "enc2:enc" or "enc:-"
130
143
  memcpy(encname, estr, len);
131
144
  encname[len] = '\0';
132
145
  estr = encname;
133
146
  idx = rb_enc_find_index(encname);
134
147
  }
135
148
  }
136
- else {
149
+ else { // "enc"
137
150
  idx = rb_enc_find_index(estr);
138
151
  }
139
152
 
@@ -141,7 +154,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
141
154
  ext_enc = rb_enc_from_index(idx);
142
155
  }
143
156
  else {
144
- if (idx != -2) {
157
+ if (idx != -2) { // ":enc"
145
158
  // `unsupported_encoding` is not in header file.
146
159
  rb_warn("Unsupported encoding %s ignored", estr);
147
160
  }
@@ -150,11 +163,11 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
150
163
 
151
164
  int_enc = NULL;
152
165
  if (ptr) {
153
- if (*ptr == '-' && *(ptr+1) == '\0') {
166
+ if (*ptr == '-' && *(ptr+1) == '\0') { // "enc:-"
154
167
  /* Special case - "-" => no transcoding */
155
168
  int_enc = (rb_encoding *)Qnil;
156
169
  }
157
- else {
170
+ else { // "enc2:enc"
158
171
  idx2 = rb_enc_find_index(ptr);
159
172
  if (idx2 < 0) {
160
173
  // `unsupported_encoding` is not in header file.
@@ -175,29 +188,33 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
175
188
  rb_raise(rb_eArgError, ":encoding has to be a String");
176
189
  }
177
190
 
178
- // @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L1567
179
- // @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L2300
180
- if (rb_respond_to(port, s_internal_encoding)) {
181
- r_encoding = rb_funcall(port, s_internal_encoding, 0);
191
+ // @see CSV#raw_encoding
192
+ // @see https://github.com/ruby/ruby/blob/ab337e61ecb5f42384ba7d710c36faf96a454e5c/lib/csv.rb#L2290
193
+ if (rb_respond_to(port, rb_intern("internal_encoding"))) {
194
+ r_encoding = rb_funcall(port, rb_intern("internal_encoding"), 0);
182
195
  if (NIL_P(r_encoding)) {
183
- r_encoding = rb_funcall(port, s_external_encoding, 0);
196
+ r_encoding = rb_funcall(port, rb_intern("external_encoding"), 0);
184
197
  }
185
198
  }
186
- else if (rb_respond_to(port, s_string)) {
187
- r_encoding = rb_funcall(rb_funcall(port, s_string, 0), s_encoding, 0);
199
+ else if (rb_respond_to(port, rb_intern("string"))) {
200
+ r_encoding = rb_funcall(rb_funcall(port, rb_intern("string"), 0), rb_intern("encoding"), 0);
188
201
  }
189
- else if (rb_respond_to(port, s_encoding)) {
190
- r_encoding = rb_funcall(port, s_encoding, 0);
202
+ else if (rb_respond_to(port, rb_intern("encoding"))) {
203
+ r_encoding = rb_funcall(port, rb_intern("encoding"), 0);
191
204
  }
192
205
  else {
193
206
  r_encoding = rb_enc_from_encoding(rb_ascii8bit_encoding());
194
207
  }
208
+
209
+ // @see CSV#initialize
210
+ // @see https://github.com/ruby/ruby/blob/ab337e61ecb5f42384ba7d710c36faf96a454e5c/lib/csv.rb#L1510
195
211
  if (NIL_P(r_encoding)) {
196
212
  r_encoding = rb_enc_from_encoding(rb_default_internal_encoding());
197
213
  }
198
214
  if (NIL_P(r_encoding)) {
199
215
  r_encoding = rb_enc_from_encoding(rb_default_external_encoding());
200
216
  }
217
+
201
218
  if (enc2 != NULL) {
202
219
  encoding = enc2;
203
220
  }
@@ -208,11 +225,17 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
208
225
  encoding = rb_enc_get(r_encoding);
209
226
  }
210
227
 
228
+ rb_ivar_set(self, s_row, Qnil);
229
+
211
230
  buffer_size = BUFSIZE;
212
231
  if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
213
232
  bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
214
233
  if (!NIL_P(bufsize)) {
215
234
  buffer_size = NUM2INT(bufsize);
235
+ // buffer_size = 0 can cause segmentation faults.
236
+ if (buffer_size == 0) {
237
+ buffer_size = BUFSIZE;
238
+ }
216
239
  }
217
240
  }
218
241
 
@@ -221,39 +244,47 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
221
244
  }
222
245
 
223
246
 
224
- #line 225 "ext/fastcsv/fastcsv.c"
247
+ #line 248 "ext/fastcsv/fastcsv.c"
225
248
  {
226
- cs = fastcsv_start;
249
+ cs = raw_parse_start;
227
250
  ts = 0;
228
251
  te = 0;
229
252
  act = 0;
230
253
  }
231
254
 
232
- #line 311 "ext/fastcsv/fastcsv.rl"
255
+ #line 366 "ext/fastcsv/fastcsv.rl"
233
256
 
234
257
  while (!done) {
235
258
  VALUE str;
236
259
  char *p, *pe;
237
- int len, space = buffer_size - have, tokstart_diff, tokend_diff;
260
+ int len, space = buffer_size - have, tokstart_diff, tokend_diff, start_diff, mark_row_sep_diff;
238
261
 
239
262
  if (io) {
240
263
  if (space == 0) {
241
- tokstart_diff = ts - buf;
242
- tokend_diff = te - buf;
264
+ // Not moving d->start will cause intermittent segmentation faults.
265
+ tokstart_diff = ts - buf;
266
+ tokend_diff = te - buf;
267
+ start_diff = d->start - buf;
268
+ mark_row_sep_diff = mark_row_sep - buf;
243
269
 
244
- buffer_size += BUFSIZE;
245
- REALLOC_N(buf, char, buffer_size);
270
+ buffer_size += BUFSIZE;
271
+ REALLOC_N(buf, char, buffer_size);
246
272
 
247
- space = buffer_size - have;
273
+ space = buffer_size - have;
248
274
 
249
- ts = buf + tokstart_diff;
250
- te = buf + tokend_diff;
275
+ ts = buf + tokstart_diff;
276
+ te = buf + tokend_diff;
277
+ d->start = buf + start_diff;
278
+ mark_row_sep = buf + mark_row_sep_diff;
251
279
  }
252
280
  p = buf + have;
253
281
 
282
+ // Reads "`length` bytes without any conversion (binary mode)."
283
+ // "The resulted string is always ASCII-8BIT encoding."
284
+ // @see http://www.ruby-doc.org/core-2.1.4/IO.html#method-i-read
254
285
  str = rb_funcall(port, s_read, 1, INT2FIX(space));
255
286
  if (NIL_P(str)) {
256
- // StringIO#read returns nil for empty string.
287
+ // "`nil` means it met EOF at beginning," e.g. for `StringIO.new("")`.
257
288
  len = 0;
258
289
  }
259
290
  else {
@@ -261,6 +292,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
261
292
  memcpy(p, StringValuePtr(str), len);
262
293
  }
263
294
 
295
+ // "The 1 to `length`-1 bytes string means it met EOF after reading the result."
264
296
  if (len < space) {
265
297
  // EOF actions don't work in scanners, so we add a sentinel value.
266
298
  // @see http://www.complang.org/pipermail/ragel-users/2007-May/001516.html
@@ -276,9 +308,13 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
276
308
  done = 1;
277
309
  }
278
310
 
311
+ if (d->start == 0) {
312
+ d->start = p;
313
+ }
314
+
279
315
  pe = p + len;
280
316
 
281
- #line 282 "ext/fastcsv/fastcsv.c"
317
+ #line 318 "ext/fastcsv/fastcsv.c"
282
318
  {
283
319
  if ( p == pe )
284
320
  goto _test_eof;
@@ -296,56 +332,80 @@ tr0:
296
332
  }
297
333
  }
298
334
  goto st4;
299
- tr10:
300
- #line 101 "ext/fastcsv/fastcsv.rl"
335
+ tr5:
336
+ #line 49 "ext/fastcsv/fastcsv.rl"
301
337
  {
302
- if (!NIL_P(field) || RARRAY_LEN(row)) {
303
- rb_ary_push(row, field);
338
+ if (p == ts) {
339
+ // Unquoted empty fields are nil, not "", in Ruby.
340
+ field = Qnil;
304
341
  }
305
- if (RARRAY_LEN(row)) {
306
- rb_yield(row);
342
+ else if (p > ts) {
343
+ field = rb_enc_str_new(ts, p - ts, encoding);
344
+ ENCODE;
307
345
  }
308
346
  }
309
- #line 123 "ext/fastcsv/fastcsv.rl"
347
+ #line 95 "ext/fastcsv/fastcsv.rl"
348
+ {
349
+ rb_ary_push(row, field);
350
+ field = Qnil;
351
+ }
352
+ #line 166 "ext/fastcsv/fastcsv.rl"
310
353
  {te = p+1;}
311
354
  goto st4;
312
- tr16:
313
- #line 123 "ext/fastcsv/fastcsv.rl"
314
- {te = p;p--;}
315
- goto st4;
316
- tr17:
317
- #line 122 "ext/fastcsv/fastcsv.rl"
318
- {te = p;p--;}
319
- goto st4;
320
- tr18:
321
- #line 101 "ext/fastcsv/fastcsv.rl"
355
+ tr9:
356
+ #line 138 "ext/fastcsv/fastcsv.rl"
322
357
  {
358
+ if (d->start == 0 || p == d->start) {
359
+ rb_ivar_set(self, s_row, rb_str_new2(""));
360
+ }
361
+ else if (p > d->start) {
362
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
363
+ }
364
+
323
365
  if (!NIL_P(field) || RARRAY_LEN(row)) {
324
366
  rb_ary_push(row, field);
325
367
  }
368
+
326
369
  if (RARRAY_LEN(row)) {
327
370
  rb_yield(row);
328
371
  }
329
372
  }
330
- #line 122 "ext/fastcsv/fastcsv.rl"
373
+ #line 168 "ext/fastcsv/fastcsv.rl"
374
+ {te = p+1;}
375
+ goto st4;
376
+ tr12:
377
+ #line 95 "ext/fastcsv/fastcsv.rl"
378
+ {
379
+ rb_ary_push(row, field);
380
+ field = Qnil;
381
+ }
382
+ #line 166 "ext/fastcsv/fastcsv.rl"
331
383
  {te = p+1;}
332
384
  goto st4;
333
- tr20:
334
- #line 121 "ext/fastcsv/fastcsv.rl"
385
+ tr15:
386
+ #line 168 "ext/fastcsv/fastcsv.rl"
335
387
  {te = p;p--;}
336
388
  goto st4;
337
- tr21:
338
- #line 101 "ext/fastcsv/fastcsv.rl"
389
+ tr16:
390
+ #line 100 "ext/fastcsv/fastcsv.rl"
339
391
  {
340
- if (!NIL_P(field) || RARRAY_LEN(row)) {
341
- rb_ary_push(row, field);
392
+ d->start = p;
393
+
394
+ if (len_row_sep) {
395
+ if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1)) {
396
+ FREE;
397
+
398
+ rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1);
399
+ }
342
400
  }
343
- if (RARRAY_LEN(row)) {
344
- rb_yield(row);
401
+ else {
402
+ len_row_sep = p - mark_row_sep;
403
+ row_sep = ALLOC_N(char, p - mark_row_sep);
404
+ memcpy(row_sep, mark_row_sep, p - mark_row_sep);
345
405
  }
346
406
  }
347
- #line 121 "ext/fastcsv/fastcsv.rl"
348
- {te = p+1;}
407
+ #line 167 "ext/fastcsv/fastcsv.rl"
408
+ {te = p;p--;}
349
409
  goto st4;
350
410
  st4:
351
411
  #line 1 "NONE"
@@ -357,12 +417,12 @@ st4:
357
417
  case 4:
358
418
  #line 1 "NONE"
359
419
  {ts = p;}
360
- #line 361 "ext/fastcsv/fastcsv.c"
420
+ #line 421 "ext/fastcsv/fastcsv.c"
361
421
  switch( (*p) ) {
362
- case 0: goto tr14;
422
+ case 0: goto tr13;
363
423
  case 10: goto tr3;
364
424
  case 13: goto tr4;
365
- case 34: goto tr15;
425
+ case 34: goto tr14;
366
426
  case 44: goto tr5;
367
427
  }
368
428
  goto st1;
@@ -381,7 +441,7 @@ case 1:
381
441
  tr2:
382
442
  #line 1 "NONE"
383
443
  {te = p+1;}
384
- #line 40 "ext/fastcsv/fastcsv.rl"
444
+ #line 49 "ext/fastcsv/fastcsv.rl"
385
445
  {
386
446
  if (p == ts) {
387
447
  // Unquoted empty fields are nil, not "", in Ruby.
@@ -392,33 +452,41 @@ tr2:
392
452
  ENCODE;
393
453
  }
394
454
  }
395
- #line 101 "ext/fastcsv/fastcsv.rl"
455
+ #line 138 "ext/fastcsv/fastcsv.rl"
396
456
  {
457
+ if (d->start == 0 || p == d->start) {
458
+ rb_ivar_set(self, s_row, rb_str_new2(""));
459
+ }
460
+ else if (p > d->start) {
461
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
462
+ }
463
+
397
464
  if (!NIL_P(field) || RARRAY_LEN(row)) {
398
465
  rb_ary_push(row, field);
399
466
  }
467
+
400
468
  if (RARRAY_LEN(row)) {
401
469
  rb_yield(row);
402
470
  }
403
471
  }
404
- #line 123 "ext/fastcsv/fastcsv.rl"
472
+ #line 168 "ext/fastcsv/fastcsv.rl"
405
473
  {act = 3;}
406
474
  goto st5;
407
475
  st5:
408
476
  if ( ++p == pe )
409
477
  goto _test_eof5;
410
478
  case 5:
411
- #line 412 "ext/fastcsv/fastcsv.c"
479
+ #line 480 "ext/fastcsv/fastcsv.c"
412
480
  switch( (*p) ) {
413
481
  case 0: goto tr2;
414
482
  case 10: goto tr3;
415
483
  case 13: goto tr4;
416
- case 34: goto tr16;
484
+ case 34: goto tr15;
417
485
  case 44: goto tr5;
418
486
  }
419
487
  goto st1;
420
488
  tr3:
421
- #line 40 "ext/fastcsv/fastcsv.rl"
489
+ #line 49 "ext/fastcsv/fastcsv.rl"
422
490
  {
423
491
  if (p == ts) {
424
492
  // Unquoted empty fields are nil, not "", in Ruby.
@@ -429,8 +497,19 @@ tr3:
429
497
  ENCODE;
430
498
  }
431
499
  }
432
- #line 91 "ext/fastcsv/fastcsv.rl"
500
+ #line 117 "ext/fastcsv/fastcsv.rl"
433
501
  {
502
+ mark_row_sep = p;
503
+
504
+ curline++;
505
+
506
+ if (d->start == 0 || p == d->start) {
507
+ rb_ivar_set(self, s_row, rb_str_new2(""));
508
+ }
509
+ else if (p > d->start) {
510
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
511
+ }
512
+
434
513
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
435
514
  rb_ary_push(row, field);
436
515
  field = Qnil;
@@ -438,21 +517,22 @@ tr3:
438
517
 
439
518
  rb_yield(row);
440
519
  row = rb_ary_new();
441
- }
442
- #line 28 "ext/fastcsv/fastcsv.rl"
443
- {
444
- curline++;
445
520
  }
446
521
  goto st6;
447
- tr19:
448
- #line 28 "ext/fastcsv/fastcsv.rl"
522
+ tr10:
523
+ #line 117 "ext/fastcsv/fastcsv.rl"
449
524
  {
525
+ mark_row_sep = p;
526
+
450
527
  curline++;
451
- }
452
- goto st6;
453
- tr11:
454
- #line 91 "ext/fastcsv/fastcsv.rl"
455
- {
528
+
529
+ if (d->start == 0 || p == d->start) {
530
+ rb_ivar_set(self, s_row, rb_str_new2(""));
531
+ }
532
+ else if (p > d->start) {
533
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
534
+ }
535
+
456
536
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
457
537
  rb_ary_push(row, field);
458
538
  field = Qnil;
@@ -460,22 +540,16 @@ tr11:
460
540
 
461
541
  rb_yield(row);
462
542
  row = rb_ary_new();
463
- }
464
- #line 28 "ext/fastcsv/fastcsv.rl"
465
- {
466
- curline++;
467
543
  }
468
544
  goto st6;
469
545
  st6:
470
546
  if ( ++p == pe )
471
547
  goto _test_eof6;
472
548
  case 6:
473
- #line 474 "ext/fastcsv/fastcsv.c"
474
- if ( (*p) == 0 )
475
- goto tr18;
476
- goto tr17;
549
+ #line 550 "ext/fastcsv/fastcsv.c"
550
+ goto tr16;
477
551
  tr4:
478
- #line 40 "ext/fastcsv/fastcsv.rl"
552
+ #line 49 "ext/fastcsv/fastcsv.rl"
479
553
  {
480
554
  if (p == ts) {
481
555
  // Unquoted empty fields are nil, not "", in Ruby.
@@ -486,8 +560,19 @@ tr4:
486
560
  ENCODE;
487
561
  }
488
562
  }
489
- #line 91 "ext/fastcsv/fastcsv.rl"
563
+ #line 117 "ext/fastcsv/fastcsv.rl"
490
564
  {
565
+ mark_row_sep = p;
566
+
567
+ curline++;
568
+
569
+ if (d->start == 0 || p == d->start) {
570
+ rb_ivar_set(self, s_row, rb_str_new2(""));
571
+ }
572
+ else if (p > d->start) {
573
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
574
+ }
575
+
491
576
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
492
577
  rb_ary_push(row, field);
493
578
  field = Qnil;
@@ -495,15 +580,22 @@ tr4:
495
580
 
496
581
  rb_yield(row);
497
582
  row = rb_ary_new();
498
- }
499
- #line 28 "ext/fastcsv/fastcsv.rl"
500
- {
501
- curline++;
502
583
  }
503
584
  goto st7;
504
- tr12:
505
- #line 91 "ext/fastcsv/fastcsv.rl"
585
+ tr11:
586
+ #line 117 "ext/fastcsv/fastcsv.rl"
506
587
  {
588
+ mark_row_sep = p;
589
+
590
+ curline++;
591
+
592
+ if (d->start == 0 || p == d->start) {
593
+ rb_ivar_set(self, s_row, rb_str_new2(""));
594
+ }
595
+ else if (p > d->start) {
596
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
597
+ }
598
+
507
599
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
508
600
  rb_ary_push(row, field);
509
601
  field = Qnil;
@@ -511,24 +603,20 @@ tr12:
511
603
 
512
604
  rb_yield(row);
513
605
  row = rb_ary_new();
514
- }
515
- #line 28 "ext/fastcsv/fastcsv.rl"
516
- {
517
- curline++;
518
606
  }
519
607
  goto st7;
520
608
  st7:
521
609
  if ( ++p == pe )
522
610
  goto _test_eof7;
523
611
  case 7:
524
- #line 525 "ext/fastcsv/fastcsv.c"
525
- switch( (*p) ) {
526
- case 0: goto tr18;
527
- case 10: goto tr19;
528
- }
529
- goto tr17;
530
- tr5:
531
- #line 40 "ext/fastcsv/fastcsv.rl"
612
+ #line 613 "ext/fastcsv/fastcsv.c"
613
+ if ( (*p) == 10 )
614
+ goto st6;
615
+ goto tr16;
616
+ tr13:
617
+ #line 1 "NONE"
618
+ {te = p+1;}
619
+ #line 49 "ext/fastcsv/fastcsv.rl"
532
620
  {
533
621
  if (p == ts) {
534
622
  // Unquoted empty fields are nil, not "", in Ruby.
@@ -539,73 +627,40 @@ tr5:
539
627
  ENCODE;
540
628
  }
541
629
  }
542
- #line 86 "ext/fastcsv/fastcsv.rl"
543
- {
544
- rb_ary_push(row, field);
545
- field = Qnil;
546
- }
547
- goto st8;
548
- tr13:
549
- #line 86 "ext/fastcsv/fastcsv.rl"
550
- {
551
- rb_ary_push(row, field);
552
- field = Qnil;
553
- }
554
- goto st8;
555
- st8:
556
- if ( ++p == pe )
557
- goto _test_eof8;
558
- case 8:
559
- #line 560 "ext/fastcsv/fastcsv.c"
560
- if ( (*p) == 0 )
561
- goto tr21;
562
- goto tr20;
563
- tr14:
564
- #line 1 "NONE"
565
- {te = p+1;}
566
- #line 101 "ext/fastcsv/fastcsv.rl"
630
+ #line 138 "ext/fastcsv/fastcsv.rl"
567
631
  {
632
+ if (d->start == 0 || p == d->start) {
633
+ rb_ivar_set(self, s_row, rb_str_new2(""));
634
+ }
635
+ else if (p > d->start) {
636
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
637
+ }
638
+
568
639
  if (!NIL_P(field) || RARRAY_LEN(row)) {
569
640
  rb_ary_push(row, field);
570
641
  }
642
+
571
643
  if (RARRAY_LEN(row)) {
572
644
  rb_yield(row);
573
645
  }
574
646
  }
575
- #line 40 "ext/fastcsv/fastcsv.rl"
576
- {
577
- if (p == ts) {
578
- // Unquoted empty fields are nil, not "", in Ruby.
579
- field = Qnil;
580
- }
581
- else if (p > ts) {
582
- field = rb_enc_str_new(ts, p - ts, encoding);
583
- ENCODE;
584
- }
585
- }
586
- #line 123 "ext/fastcsv/fastcsv.rl"
647
+ #line 168 "ext/fastcsv/fastcsv.rl"
587
648
  {act = 3;}
588
- goto st9;
589
- st9:
649
+ goto st8;
650
+ st8:
590
651
  if ( ++p == pe )
591
- goto _test_eof9;
592
- case 9:
593
- #line 594 "ext/fastcsv/fastcsv.c"
652
+ goto _test_eof8;
653
+ case 8:
654
+ #line 655 "ext/fastcsv/fastcsv.c"
594
655
  switch( (*p) ) {
595
- case 10: goto tr16;
596
- case 13: goto tr16;
597
- case 34: goto tr16;
598
- case 44: goto tr16;
656
+ case 10: goto tr15;
657
+ case 13: goto tr15;
658
+ case 34: goto tr15;
659
+ case 44: goto tr15;
599
660
  }
600
661
  goto st1;
601
- tr8:
602
- #line 28 "ext/fastcsv/fastcsv.rl"
603
- {
604
- curline++;
605
- }
606
- goto st2;
607
- tr15:
608
- #line 32 "ext/fastcsv/fastcsv.rl"
662
+ tr14:
663
+ #line 41 "ext/fastcsv/fastcsv.rl"
609
664
  {
610
665
  unclosed_line = curline;
611
666
  }
@@ -614,19 +669,17 @@ st2:
614
669
  if ( ++p == pe )
615
670
  goto _test_eof2;
616
671
  case 2:
617
- #line 618 "ext/fastcsv/fastcsv.c"
672
+ #line 673 "ext/fastcsv/fastcsv.c"
618
673
  switch( (*p) ) {
619
674
  case 0: goto st0;
620
- case 10: goto tr8;
621
- case 13: goto tr8;
622
- case 34: goto tr9;
675
+ case 34: goto tr8;
623
676
  }
624
677
  goto st2;
625
678
  st0:
626
679
  cs = 0;
627
680
  goto _out;
628
- tr9:
629
- #line 51 "ext/fastcsv/fastcsv.rl"
681
+ tr8:
682
+ #line 60 "ext/fastcsv/fastcsv.rl"
630
683
  {
631
684
  if (p == ts) {
632
685
  field = rb_enc_str_new("", 0, encoding);
@@ -653,7 +706,7 @@ tr9:
653
706
  reader++;
654
707
  }
655
708
 
656
- field = rb_enc_str_new(copy, writer - copy, enc);
709
+ field = rb_enc_str_new(copy, writer - copy, encoding);
657
710
  ENCODE;
658
711
 
659
712
  if (copy != NULL) {
@@ -661,7 +714,7 @@ tr9:
661
714
  }
662
715
  }
663
716
  }
664
- #line 36 "ext/fastcsv/fastcsv.rl"
717
+ #line 45 "ext/fastcsv/fastcsv.rl"
665
718
  {
666
719
  unclosed_line = 0;
667
720
  }
@@ -670,13 +723,13 @@ st3:
670
723
  if ( ++p == pe )
671
724
  goto _test_eof3;
672
725
  case 3:
673
- #line 674 "ext/fastcsv/fastcsv.c"
726
+ #line 727 "ext/fastcsv/fastcsv.c"
674
727
  switch( (*p) ) {
675
- case 0: goto tr10;
676
- case 10: goto tr11;
677
- case 13: goto tr12;
728
+ case 0: goto tr9;
729
+ case 10: goto tr10;
730
+ case 13: goto tr11;
678
731
  case 34: goto st2;
679
- case 44: goto tr13;
732
+ case 44: goto tr12;
680
733
  }
681
734
  goto st0;
682
735
  }
@@ -686,7 +739,6 @@ case 3:
686
739
  _test_eof6: cs = 6; goto _test_eof;
687
740
  _test_eof7: cs = 7; goto _test_eof;
688
741
  _test_eof8: cs = 8; goto _test_eof;
689
- _test_eof9: cs = 9; goto _test_eof;
690
742
  _test_eof2: cs = 2; goto _test_eof;
691
743
  _test_eof3: cs = 3; goto _test_eof;
692
744
 
@@ -695,32 +747,26 @@ case 3:
695
747
  {
696
748
  switch ( cs ) {
697
749
  case 1: goto tr0;
698
- case 5: goto tr16;
699
- case 6: goto tr17;
700
- case 7: goto tr17;
701
- case 8: goto tr20;
702
- case 9: goto tr16;
750
+ case 5: goto tr15;
751
+ case 6: goto tr16;
752
+ case 7: goto tr16;
753
+ case 8: goto tr15;
703
754
  }
704
755
  }
705
756
 
706
757
  _out: {}
707
758
  }
708
759
 
709
- #line 359 "ext/fastcsv/fastcsv.rl"
760
+ #line 427 "ext/fastcsv/fastcsv.rl"
761
+
762
+ if (done && cs < raw_parse_first_final) {
763
+ FREE;
710
764
 
711
- if (done && cs < fastcsv_first_final) {
712
- if (buf != NULL) {
713
- free(buf);
714
- }
715
765
  if (unclosed_line) {
716
- rb_raise(rb_eParseError, "Unclosed quoted field on line %d.", unclosed_line);
766
+ rb_raise(eError, "Unclosed quoted field on line %d.", unclosed_line);
717
767
  }
718
- // Ruby raises different errors for illegal quoting, depending on whether
719
- // a quoted string is followed by a string ("Unclosed quoted field on line
720
- // %d.") or by a string ending in a quote ("Missing or stray quote in line
721
- // %d"). These precisions are kind of bogus, but we can try using $!.
722
768
  else {
723
- rb_raise(rb_eParseError, "Illegal quoting in line %d.", curline);
769
+ rb_raise(eError, "Illegal quoting in line %d.", curline);
724
770
  }
725
771
  }
726
772
 
@@ -735,23 +781,35 @@ case 3:
735
781
  }
736
782
  }
737
783
 
738
- if (buf != NULL) {
739
- free(buf);
740
- }
784
+ FREE;
741
785
 
742
786
  return Qnil;
743
787
  }
744
788
 
789
+ // @see https://github.com/ruby/ruby/blob/trunk/README.EXT#L616
790
+ static VALUE allocate(VALUE class) {
791
+ // @see https://github.com/nofxx/georuby_c/blob/b3b91fd90980d7c295ac8f6012d89878ea7cd569/ext/line.c#L66
792
+ Data *d = ALLOC(Data);
793
+ d->start = 0;
794
+ // @see https://github.com/nofxx/georuby_c/blob/b3b91fd90980d7c295ac8f6012d89878ea7cd569/ext/point.h#L26
795
+ // rb_gc_mark(d->start) or rb_gc_mark(d) cause warning "passing argument 1 of ‘rb_gc_mark’ makes integer from pointer without a cast"
796
+ // free(d->start) causes error "pointer being freed was not allocated"
797
+ return Data_Wrap_Struct(class, NULL, free, d);
798
+ }
799
+
800
+ // @see http://tenderlovemaking.com/2009/12/18/writing-ruby-c-extensions-part-1.html
801
+ // @see http://tenderlovemaking.com/2010/12/11/writing-ruby-c-extensions-part-2.html
745
802
  void Init_fastcsv() {
746
803
  s_read = rb_intern("read");
747
- s_to_str = rb_intern("to_str");
748
- s_internal_encoding = rb_intern("internal_encoding");
749
- s_external_encoding = rb_intern("external_encoding");
750
- s_string = rb_intern("string");
751
- s_encoding = rb_intern("encoding");
752
-
753
- mModule = rb_define_module("FastCSV");
754
- rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
755
- rb_define_singleton_method(mModule, "raw_parse", fastcsv, -1);
756
- rb_eParseError = rb_define_class_under(mModule, "ParseError", rb_eStandardError);
804
+ s_row = rb_intern("@row");
805
+
806
+ cClass = rb_define_class("FastCSV", rb_const_get(rb_cObject, rb_intern("CSV"))); // class FastCSV < CSV
807
+ cParser = rb_define_class_under(cClass, "Parser", rb_cObject); // class Parser
808
+ rb_define_alloc_func(cParser, allocate); //
809
+ rb_define_method(cParser, "raw_parse", raw_parse, -1); // def raw_parse(port, opts = nil); end
810
+ rb_define_attr(cParser, "row", 1, 0); // attr_reader :row
811
+ rb_define_attr(cParser, "buffer_size", 1, 1); // attr_accessor :buffer_size
812
+ // end
813
+ eError = rb_define_class_under(cClass, "MalformedCSVError", rb_eRuntimeError); // class MalformedCSVError < RuntimeError
814
+ // end
757
815
  }