fastcsv 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +11 -0
- data/README.md +37 -2
- data/TESTS.md +42 -0
- data/ext/fastcsv/fastcsv.c +281 -223
- data/ext/fastcsv/fastcsv.rl +149 -72
- data/fastcsv.gemspec +1 -1
- data/lib/fastcsv.rb +130 -0
- data/spec/fastcsv_spec.rb +189 -57
- data/spec/fixtures/csv.csv +3 -0
- data/spec/fixtures/iso-8859-1-quoted.csv +1 -0
- data/spec/fixtures/utf-8-quoted.csv +1 -0
- data/spec/spec_helper.rb +5 -0
- data/test/csv/base.rb +8 -0
- data/test/csv/line_endings.gz +0 -0
- data/test/csv/test_csv_parsing.rb +221 -0
- data/test/csv/test_csv_writing.rb +97 -0
- data/test/csv/test_data_converters.rb +263 -0
- data/test/csv/test_encodings.rb +339 -0
- data/test/csv/test_features.rb +317 -0
- data/test/csv/test_headers.rb +289 -0
- data/test/csv/test_interface.rb +362 -0
- data/test/csv/test_row.rb +349 -0
- data/test/csv/test_table.rb +420 -0
- data/test/csv/ts_all.rb +20 -0
- data/test/runner.rb +36 -0
- data/test/with_different_ofs.rb +17 -0
- metadata +38 -2
data/ext/fastcsv/fastcsv.c
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
// http://w3c.github.io/csvw/syntax/#ebnf
|
8
8
|
|
9
9
|
// CSV implementation.
|
10
|
-
// https://github.com/ruby/ruby/blob/
|
10
|
+
// https://github.com/ruby/ruby/blob/trunk/lib/csv.rb
|
11
11
|
|
12
12
|
// Ruby C extensions help.
|
13
13
|
// https://github.com/ruby/ruby/blob/trunk/README.EXT
|
@@ -21,31 +21,42 @@ if (enc2 != NULL) { \
|
|
21
21
|
field = rb_str_encode(field, rb_enc_from_encoding(enc), 0, Qnil); \
|
22
22
|
}
|
23
23
|
|
24
|
-
|
25
|
-
|
24
|
+
#define FREE \
|
25
|
+
if (buf != NULL) { \
|
26
|
+
free(buf); \
|
27
|
+
} \
|
28
|
+
if (row_sep != NULL) { \
|
29
|
+
free(row_sep); \
|
30
|
+
}
|
31
|
+
|
32
|
+
static VALUE cClass, cParser, eError;
|
33
|
+
static ID s_read, s_row;
|
34
|
+
|
35
|
+
// @see https://github.com/nofxx/georuby_c/blob/b3b91fd90980d7c295ac8f6012d89878ea7cd569/ext/types.h#L22
|
36
|
+
typedef struct {
|
37
|
+
char *start;
|
38
|
+
} Data;
|
26
39
|
|
27
40
|
|
28
|
-
#line
|
41
|
+
#line 170 "ext/fastcsv/fastcsv.rl"
|
29
42
|
|
30
43
|
|
31
44
|
|
32
|
-
#line
|
33
|
-
static const int
|
34
|
-
static const int
|
35
|
-
static const int
|
45
|
+
#line 46 "ext/fastcsv/fastcsv.c"
|
46
|
+
static const int raw_parse_start = 4;
|
47
|
+
static const int raw_parse_first_final = 4;
|
48
|
+
static const int raw_parse_error = 0;
|
36
49
|
|
37
|
-
static const int
|
50
|
+
static const int raw_parse_en_main = 4;
|
38
51
|
|
39
52
|
|
40
|
-
#line
|
53
|
+
#line 173 "ext/fastcsv/fastcsv.rl"
|
41
54
|
|
42
55
|
// 16 kB
|
43
56
|
#define BUFSIZE 16384
|
44
57
|
|
45
58
|
// @see http://rxr.whitequark.org/mri/source/io.c#4845
|
46
|
-
static void
|
47
|
-
rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
|
48
|
-
{
|
59
|
+
static void rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode) {
|
49
60
|
int default_ext = 0;
|
50
61
|
|
51
62
|
if (ext == NULL) {
|
@@ -70,15 +81,17 @@ rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc,
|
|
70
81
|
}
|
71
82
|
}
|
72
83
|
|
73
|
-
VALUE
|
84
|
+
static VALUE raw_parse(int argc, VALUE *argv, VALUE self) {
|
74
85
|
int cs, act, have = 0, curline = 1, io = 0;
|
75
|
-
char *ts = 0, *te = 0, *buf = 0, *eof = 0;
|
86
|
+
char *ts = 0, *te = 0, *buf = 0, *eof = 0, *mark_row_sep = 0, *row_sep = NULL;
|
76
87
|
|
77
|
-
VALUE port, opts;
|
88
|
+
VALUE port, opts, r_encoding;
|
78
89
|
VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
|
79
|
-
int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
|
90
|
+
int done = 0, unclosed_line = 0, len_row_sep = 0, buffer_size = 0, taint = 0;
|
80
91
|
rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL;
|
81
|
-
|
92
|
+
|
93
|
+
Data *d;
|
94
|
+
Data_Get_Struct(self, Data, d);
|
82
95
|
|
83
96
|
VALUE option;
|
84
97
|
char quote_char = '"';
|
@@ -87,8 +100,8 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
87
100
|
taint = OBJ_TAINTED(port);
|
88
101
|
io = rb_respond_to(port, s_read);
|
89
102
|
if (!io) {
|
90
|
-
if (rb_respond_to(port,
|
91
|
-
port = rb_funcall(port,
|
103
|
+
if (rb_respond_to(port, rb_intern("to_str"))) {
|
104
|
+
port = rb_funcall(port, rb_intern("to_str"), 0);
|
92
105
|
StringValue(port);
|
93
106
|
}
|
94
107
|
else {
|
@@ -112,7 +125,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
112
125
|
// @see http://ruby-doc.org/core-2.1.1/IO.html#method-c-new-label-Open+Mode
|
113
126
|
option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
|
114
127
|
if (TYPE(option) == T_STRING) {
|
115
|
-
// parse_mode_enc is not in header file.
|
128
|
+
// `parse_mode_enc` is not in header file.
|
116
129
|
const char *estr = StringValueCStr(option), *ptr;
|
117
130
|
char encname[ENCODING_MAXNAMELEN+1];
|
118
131
|
int idx, idx2;
|
@@ -123,17 +136,17 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
123
136
|
ptr = strrchr(estr, ':');
|
124
137
|
if (ptr) {
|
125
138
|
long len = (ptr++) - estr;
|
126
|
-
if (len == 0 || len > ENCODING_MAXNAMELEN) {
|
139
|
+
if (len == 0 || len > ENCODING_MAXNAMELEN) { // ":enc"
|
127
140
|
idx = -1;
|
128
141
|
}
|
129
|
-
else {
|
142
|
+
else { // "enc2:enc" or "enc:-"
|
130
143
|
memcpy(encname, estr, len);
|
131
144
|
encname[len] = '\0';
|
132
145
|
estr = encname;
|
133
146
|
idx = rb_enc_find_index(encname);
|
134
147
|
}
|
135
148
|
}
|
136
|
-
else {
|
149
|
+
else { // "enc"
|
137
150
|
idx = rb_enc_find_index(estr);
|
138
151
|
}
|
139
152
|
|
@@ -141,7 +154,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
141
154
|
ext_enc = rb_enc_from_index(idx);
|
142
155
|
}
|
143
156
|
else {
|
144
|
-
if (idx != -2) {
|
157
|
+
if (idx != -2) { // ":enc"
|
145
158
|
// `unsupported_encoding` is not in header file.
|
146
159
|
rb_warn("Unsupported encoding %s ignored", estr);
|
147
160
|
}
|
@@ -150,11 +163,11 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
150
163
|
|
151
164
|
int_enc = NULL;
|
152
165
|
if (ptr) {
|
153
|
-
if (*ptr == '-' && *(ptr+1) == '\0') {
|
166
|
+
if (*ptr == '-' && *(ptr+1) == '\0') { // "enc:-"
|
154
167
|
/* Special case - "-" => no transcoding */
|
155
168
|
int_enc = (rb_encoding *)Qnil;
|
156
169
|
}
|
157
|
-
else {
|
170
|
+
else { // "enc2:enc"
|
158
171
|
idx2 = rb_enc_find_index(ptr);
|
159
172
|
if (idx2 < 0) {
|
160
173
|
// `unsupported_encoding` is not in header file.
|
@@ -175,29 +188,33 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
175
188
|
rb_raise(rb_eArgError, ":encoding has to be a String");
|
176
189
|
}
|
177
190
|
|
178
|
-
// @see
|
179
|
-
// @see https://github.com/ruby/ruby/blob/
|
180
|
-
if (rb_respond_to(port,
|
181
|
-
r_encoding = rb_funcall(port,
|
191
|
+
// @see CSV#raw_encoding
|
192
|
+
// @see https://github.com/ruby/ruby/blob/ab337e61ecb5f42384ba7d710c36faf96a454e5c/lib/csv.rb#L2290
|
193
|
+
if (rb_respond_to(port, rb_intern("internal_encoding"))) {
|
194
|
+
r_encoding = rb_funcall(port, rb_intern("internal_encoding"), 0);
|
182
195
|
if (NIL_P(r_encoding)) {
|
183
|
-
r_encoding = rb_funcall(port,
|
196
|
+
r_encoding = rb_funcall(port, rb_intern("external_encoding"), 0);
|
184
197
|
}
|
185
198
|
}
|
186
|
-
else if (rb_respond_to(port,
|
187
|
-
r_encoding = rb_funcall(rb_funcall(port,
|
199
|
+
else if (rb_respond_to(port, rb_intern("string"))) {
|
200
|
+
r_encoding = rb_funcall(rb_funcall(port, rb_intern("string"), 0), rb_intern("encoding"), 0);
|
188
201
|
}
|
189
|
-
else if (rb_respond_to(port,
|
190
|
-
r_encoding = rb_funcall(port,
|
202
|
+
else if (rb_respond_to(port, rb_intern("encoding"))) {
|
203
|
+
r_encoding = rb_funcall(port, rb_intern("encoding"), 0);
|
191
204
|
}
|
192
205
|
else {
|
193
206
|
r_encoding = rb_enc_from_encoding(rb_ascii8bit_encoding());
|
194
207
|
}
|
208
|
+
|
209
|
+
// @see CSV#initialize
|
210
|
+
// @see https://github.com/ruby/ruby/blob/ab337e61ecb5f42384ba7d710c36faf96a454e5c/lib/csv.rb#L1510
|
195
211
|
if (NIL_P(r_encoding)) {
|
196
212
|
r_encoding = rb_enc_from_encoding(rb_default_internal_encoding());
|
197
213
|
}
|
198
214
|
if (NIL_P(r_encoding)) {
|
199
215
|
r_encoding = rb_enc_from_encoding(rb_default_external_encoding());
|
200
216
|
}
|
217
|
+
|
201
218
|
if (enc2 != NULL) {
|
202
219
|
encoding = enc2;
|
203
220
|
}
|
@@ -208,11 +225,17 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
208
225
|
encoding = rb_enc_get(r_encoding);
|
209
226
|
}
|
210
227
|
|
228
|
+
rb_ivar_set(self, s_row, Qnil);
|
229
|
+
|
211
230
|
buffer_size = BUFSIZE;
|
212
231
|
if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
|
213
232
|
bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
|
214
233
|
if (!NIL_P(bufsize)) {
|
215
234
|
buffer_size = NUM2INT(bufsize);
|
235
|
+
// buffer_size = 0 can cause segmentation faults.
|
236
|
+
if (buffer_size == 0) {
|
237
|
+
buffer_size = BUFSIZE;
|
238
|
+
}
|
216
239
|
}
|
217
240
|
}
|
218
241
|
|
@@ -221,39 +244,47 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
221
244
|
}
|
222
245
|
|
223
246
|
|
224
|
-
#line
|
247
|
+
#line 248 "ext/fastcsv/fastcsv.c"
|
225
248
|
{
|
226
|
-
cs =
|
249
|
+
cs = raw_parse_start;
|
227
250
|
ts = 0;
|
228
251
|
te = 0;
|
229
252
|
act = 0;
|
230
253
|
}
|
231
254
|
|
232
|
-
#line
|
255
|
+
#line 366 "ext/fastcsv/fastcsv.rl"
|
233
256
|
|
234
257
|
while (!done) {
|
235
258
|
VALUE str;
|
236
259
|
char *p, *pe;
|
237
|
-
int len, space = buffer_size - have, tokstart_diff, tokend_diff;
|
260
|
+
int len, space = buffer_size - have, tokstart_diff, tokend_diff, start_diff, mark_row_sep_diff;
|
238
261
|
|
239
262
|
if (io) {
|
240
263
|
if (space == 0) {
|
241
|
-
|
242
|
-
|
264
|
+
// Not moving d->start will cause intermittent segmentation faults.
|
265
|
+
tokstart_diff = ts - buf;
|
266
|
+
tokend_diff = te - buf;
|
267
|
+
start_diff = d->start - buf;
|
268
|
+
mark_row_sep_diff = mark_row_sep - buf;
|
243
269
|
|
244
|
-
|
245
|
-
|
270
|
+
buffer_size += BUFSIZE;
|
271
|
+
REALLOC_N(buf, char, buffer_size);
|
246
272
|
|
247
|
-
|
273
|
+
space = buffer_size - have;
|
248
274
|
|
249
|
-
|
250
|
-
|
275
|
+
ts = buf + tokstart_diff;
|
276
|
+
te = buf + tokend_diff;
|
277
|
+
d->start = buf + start_diff;
|
278
|
+
mark_row_sep = buf + mark_row_sep_diff;
|
251
279
|
}
|
252
280
|
p = buf + have;
|
253
281
|
|
282
|
+
// Reads "`length` bytes without any conversion (binary mode)."
|
283
|
+
// "The resulted string is always ASCII-8BIT encoding."
|
284
|
+
// @see http://www.ruby-doc.org/core-2.1.4/IO.html#method-i-read
|
254
285
|
str = rb_funcall(port, s_read, 1, INT2FIX(space));
|
255
286
|
if (NIL_P(str)) {
|
256
|
-
//
|
287
|
+
// "`nil` means it met EOF at beginning," e.g. for `StringIO.new("")`.
|
257
288
|
len = 0;
|
258
289
|
}
|
259
290
|
else {
|
@@ -261,6 +292,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
261
292
|
memcpy(p, StringValuePtr(str), len);
|
262
293
|
}
|
263
294
|
|
295
|
+
// "The 1 to `length`-1 bytes string means it met EOF after reading the result."
|
264
296
|
if (len < space) {
|
265
297
|
// EOF actions don't work in scanners, so we add a sentinel value.
|
266
298
|
// @see http://www.complang.org/pipermail/ragel-users/2007-May/001516.html
|
@@ -276,9 +308,13 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
276
308
|
done = 1;
|
277
309
|
}
|
278
310
|
|
311
|
+
if (d->start == 0) {
|
312
|
+
d->start = p;
|
313
|
+
}
|
314
|
+
|
279
315
|
pe = p + len;
|
280
316
|
|
281
|
-
#line
|
317
|
+
#line 318 "ext/fastcsv/fastcsv.c"
|
282
318
|
{
|
283
319
|
if ( p == pe )
|
284
320
|
goto _test_eof;
|
@@ -296,56 +332,80 @@ tr0:
|
|
296
332
|
}
|
297
333
|
}
|
298
334
|
goto st4;
|
299
|
-
|
300
|
-
#line
|
335
|
+
tr5:
|
336
|
+
#line 49 "ext/fastcsv/fastcsv.rl"
|
301
337
|
{
|
302
|
-
if (
|
303
|
-
|
338
|
+
if (p == ts) {
|
339
|
+
// Unquoted empty fields are nil, not "", in Ruby.
|
340
|
+
field = Qnil;
|
304
341
|
}
|
305
|
-
if (
|
306
|
-
|
342
|
+
else if (p > ts) {
|
343
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
344
|
+
ENCODE;
|
307
345
|
}
|
308
346
|
}
|
309
|
-
#line
|
347
|
+
#line 95 "ext/fastcsv/fastcsv.rl"
|
348
|
+
{
|
349
|
+
rb_ary_push(row, field);
|
350
|
+
field = Qnil;
|
351
|
+
}
|
352
|
+
#line 166 "ext/fastcsv/fastcsv.rl"
|
310
353
|
{te = p+1;}
|
311
354
|
goto st4;
|
312
|
-
|
313
|
-
#line
|
314
|
-
{te = p;p--;}
|
315
|
-
goto st4;
|
316
|
-
tr17:
|
317
|
-
#line 122 "ext/fastcsv/fastcsv.rl"
|
318
|
-
{te = p;p--;}
|
319
|
-
goto st4;
|
320
|
-
tr18:
|
321
|
-
#line 101 "ext/fastcsv/fastcsv.rl"
|
355
|
+
tr9:
|
356
|
+
#line 138 "ext/fastcsv/fastcsv.rl"
|
322
357
|
{
|
358
|
+
if (d->start == 0 || p == d->start) {
|
359
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
360
|
+
}
|
361
|
+
else if (p > d->start) {
|
362
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
363
|
+
}
|
364
|
+
|
323
365
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
324
366
|
rb_ary_push(row, field);
|
325
367
|
}
|
368
|
+
|
326
369
|
if (RARRAY_LEN(row)) {
|
327
370
|
rb_yield(row);
|
328
371
|
}
|
329
372
|
}
|
330
|
-
#line
|
373
|
+
#line 168 "ext/fastcsv/fastcsv.rl"
|
374
|
+
{te = p+1;}
|
375
|
+
goto st4;
|
376
|
+
tr12:
|
377
|
+
#line 95 "ext/fastcsv/fastcsv.rl"
|
378
|
+
{
|
379
|
+
rb_ary_push(row, field);
|
380
|
+
field = Qnil;
|
381
|
+
}
|
382
|
+
#line 166 "ext/fastcsv/fastcsv.rl"
|
331
383
|
{te = p+1;}
|
332
384
|
goto st4;
|
333
|
-
|
334
|
-
#line
|
385
|
+
tr15:
|
386
|
+
#line 168 "ext/fastcsv/fastcsv.rl"
|
335
387
|
{te = p;p--;}
|
336
388
|
goto st4;
|
337
|
-
|
338
|
-
#line
|
389
|
+
tr16:
|
390
|
+
#line 100 "ext/fastcsv/fastcsv.rl"
|
339
391
|
{
|
340
|
-
|
341
|
-
|
392
|
+
d->start = p;
|
393
|
+
|
394
|
+
if (len_row_sep) {
|
395
|
+
if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1)) {
|
396
|
+
FREE;
|
397
|
+
|
398
|
+
rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1);
|
399
|
+
}
|
342
400
|
}
|
343
|
-
|
344
|
-
|
401
|
+
else {
|
402
|
+
len_row_sep = p - mark_row_sep;
|
403
|
+
row_sep = ALLOC_N(char, p - mark_row_sep);
|
404
|
+
memcpy(row_sep, mark_row_sep, p - mark_row_sep);
|
345
405
|
}
|
346
406
|
}
|
347
|
-
#line
|
348
|
-
{te = p
|
407
|
+
#line 167 "ext/fastcsv/fastcsv.rl"
|
408
|
+
{te = p;p--;}
|
349
409
|
goto st4;
|
350
410
|
st4:
|
351
411
|
#line 1 "NONE"
|
@@ -357,12 +417,12 @@ st4:
|
|
357
417
|
case 4:
|
358
418
|
#line 1 "NONE"
|
359
419
|
{ts = p;}
|
360
|
-
#line
|
420
|
+
#line 421 "ext/fastcsv/fastcsv.c"
|
361
421
|
switch( (*p) ) {
|
362
|
-
case 0: goto
|
422
|
+
case 0: goto tr13;
|
363
423
|
case 10: goto tr3;
|
364
424
|
case 13: goto tr4;
|
365
|
-
case 34: goto
|
425
|
+
case 34: goto tr14;
|
366
426
|
case 44: goto tr5;
|
367
427
|
}
|
368
428
|
goto st1;
|
@@ -381,7 +441,7 @@ case 1:
|
|
381
441
|
tr2:
|
382
442
|
#line 1 "NONE"
|
383
443
|
{te = p+1;}
|
384
|
-
#line
|
444
|
+
#line 49 "ext/fastcsv/fastcsv.rl"
|
385
445
|
{
|
386
446
|
if (p == ts) {
|
387
447
|
// Unquoted empty fields are nil, not "", in Ruby.
|
@@ -392,33 +452,41 @@ tr2:
|
|
392
452
|
ENCODE;
|
393
453
|
}
|
394
454
|
}
|
395
|
-
#line
|
455
|
+
#line 138 "ext/fastcsv/fastcsv.rl"
|
396
456
|
{
|
457
|
+
if (d->start == 0 || p == d->start) {
|
458
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
459
|
+
}
|
460
|
+
else if (p > d->start) {
|
461
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
462
|
+
}
|
463
|
+
|
397
464
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
398
465
|
rb_ary_push(row, field);
|
399
466
|
}
|
467
|
+
|
400
468
|
if (RARRAY_LEN(row)) {
|
401
469
|
rb_yield(row);
|
402
470
|
}
|
403
471
|
}
|
404
|
-
#line
|
472
|
+
#line 168 "ext/fastcsv/fastcsv.rl"
|
405
473
|
{act = 3;}
|
406
474
|
goto st5;
|
407
475
|
st5:
|
408
476
|
if ( ++p == pe )
|
409
477
|
goto _test_eof5;
|
410
478
|
case 5:
|
411
|
-
#line
|
479
|
+
#line 480 "ext/fastcsv/fastcsv.c"
|
412
480
|
switch( (*p) ) {
|
413
481
|
case 0: goto tr2;
|
414
482
|
case 10: goto tr3;
|
415
483
|
case 13: goto tr4;
|
416
|
-
case 34: goto
|
484
|
+
case 34: goto tr15;
|
417
485
|
case 44: goto tr5;
|
418
486
|
}
|
419
487
|
goto st1;
|
420
488
|
tr3:
|
421
|
-
#line
|
489
|
+
#line 49 "ext/fastcsv/fastcsv.rl"
|
422
490
|
{
|
423
491
|
if (p == ts) {
|
424
492
|
// Unquoted empty fields are nil, not "", in Ruby.
|
@@ -429,8 +497,19 @@ tr3:
|
|
429
497
|
ENCODE;
|
430
498
|
}
|
431
499
|
}
|
432
|
-
#line
|
500
|
+
#line 117 "ext/fastcsv/fastcsv.rl"
|
433
501
|
{
|
502
|
+
mark_row_sep = p;
|
503
|
+
|
504
|
+
curline++;
|
505
|
+
|
506
|
+
if (d->start == 0 || p == d->start) {
|
507
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
508
|
+
}
|
509
|
+
else if (p > d->start) {
|
510
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
511
|
+
}
|
512
|
+
|
434
513
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
435
514
|
rb_ary_push(row, field);
|
436
515
|
field = Qnil;
|
@@ -438,21 +517,22 @@ tr3:
|
|
438
517
|
|
439
518
|
rb_yield(row);
|
440
519
|
row = rb_ary_new();
|
441
|
-
}
|
442
|
-
#line 28 "ext/fastcsv/fastcsv.rl"
|
443
|
-
{
|
444
|
-
curline++;
|
445
520
|
}
|
446
521
|
goto st6;
|
447
|
-
|
448
|
-
#line
|
522
|
+
tr10:
|
523
|
+
#line 117 "ext/fastcsv/fastcsv.rl"
|
449
524
|
{
|
525
|
+
mark_row_sep = p;
|
526
|
+
|
450
527
|
curline++;
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
528
|
+
|
529
|
+
if (d->start == 0 || p == d->start) {
|
530
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
531
|
+
}
|
532
|
+
else if (p > d->start) {
|
533
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
534
|
+
}
|
535
|
+
|
456
536
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
457
537
|
rb_ary_push(row, field);
|
458
538
|
field = Qnil;
|
@@ -460,22 +540,16 @@ tr11:
|
|
460
540
|
|
461
541
|
rb_yield(row);
|
462
542
|
row = rb_ary_new();
|
463
|
-
}
|
464
|
-
#line 28 "ext/fastcsv/fastcsv.rl"
|
465
|
-
{
|
466
|
-
curline++;
|
467
543
|
}
|
468
544
|
goto st6;
|
469
545
|
st6:
|
470
546
|
if ( ++p == pe )
|
471
547
|
goto _test_eof6;
|
472
548
|
case 6:
|
473
|
-
#line
|
474
|
-
|
475
|
-
goto tr18;
|
476
|
-
goto tr17;
|
549
|
+
#line 550 "ext/fastcsv/fastcsv.c"
|
550
|
+
goto tr16;
|
477
551
|
tr4:
|
478
|
-
#line
|
552
|
+
#line 49 "ext/fastcsv/fastcsv.rl"
|
479
553
|
{
|
480
554
|
if (p == ts) {
|
481
555
|
// Unquoted empty fields are nil, not "", in Ruby.
|
@@ -486,8 +560,19 @@ tr4:
|
|
486
560
|
ENCODE;
|
487
561
|
}
|
488
562
|
}
|
489
|
-
#line
|
563
|
+
#line 117 "ext/fastcsv/fastcsv.rl"
|
490
564
|
{
|
565
|
+
mark_row_sep = p;
|
566
|
+
|
567
|
+
curline++;
|
568
|
+
|
569
|
+
if (d->start == 0 || p == d->start) {
|
570
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
571
|
+
}
|
572
|
+
else if (p > d->start) {
|
573
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
574
|
+
}
|
575
|
+
|
491
576
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
492
577
|
rb_ary_push(row, field);
|
493
578
|
field = Qnil;
|
@@ -495,15 +580,22 @@ tr4:
|
|
495
580
|
|
496
581
|
rb_yield(row);
|
497
582
|
row = rb_ary_new();
|
498
|
-
}
|
499
|
-
#line 28 "ext/fastcsv/fastcsv.rl"
|
500
|
-
{
|
501
|
-
curline++;
|
502
583
|
}
|
503
584
|
goto st7;
|
504
|
-
|
505
|
-
#line
|
585
|
+
tr11:
|
586
|
+
#line 117 "ext/fastcsv/fastcsv.rl"
|
506
587
|
{
|
588
|
+
mark_row_sep = p;
|
589
|
+
|
590
|
+
curline++;
|
591
|
+
|
592
|
+
if (d->start == 0 || p == d->start) {
|
593
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
594
|
+
}
|
595
|
+
else if (p > d->start) {
|
596
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
597
|
+
}
|
598
|
+
|
507
599
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
508
600
|
rb_ary_push(row, field);
|
509
601
|
field = Qnil;
|
@@ -511,24 +603,20 @@ tr12:
|
|
511
603
|
|
512
604
|
rb_yield(row);
|
513
605
|
row = rb_ary_new();
|
514
|
-
}
|
515
|
-
#line 28 "ext/fastcsv/fastcsv.rl"
|
516
|
-
{
|
517
|
-
curline++;
|
518
606
|
}
|
519
607
|
goto st7;
|
520
608
|
st7:
|
521
609
|
if ( ++p == pe )
|
522
610
|
goto _test_eof7;
|
523
611
|
case 7:
|
524
|
-
#line
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
#line
|
612
|
+
#line 613 "ext/fastcsv/fastcsv.c"
|
613
|
+
if ( (*p) == 10 )
|
614
|
+
goto st6;
|
615
|
+
goto tr16;
|
616
|
+
tr13:
|
617
|
+
#line 1 "NONE"
|
618
|
+
{te = p+1;}
|
619
|
+
#line 49 "ext/fastcsv/fastcsv.rl"
|
532
620
|
{
|
533
621
|
if (p == ts) {
|
534
622
|
// Unquoted empty fields are nil, not "", in Ruby.
|
@@ -539,73 +627,40 @@ tr5:
|
|
539
627
|
ENCODE;
|
540
628
|
}
|
541
629
|
}
|
542
|
-
#line
|
543
|
-
{
|
544
|
-
rb_ary_push(row, field);
|
545
|
-
field = Qnil;
|
546
|
-
}
|
547
|
-
goto st8;
|
548
|
-
tr13:
|
549
|
-
#line 86 "ext/fastcsv/fastcsv.rl"
|
550
|
-
{
|
551
|
-
rb_ary_push(row, field);
|
552
|
-
field = Qnil;
|
553
|
-
}
|
554
|
-
goto st8;
|
555
|
-
st8:
|
556
|
-
if ( ++p == pe )
|
557
|
-
goto _test_eof8;
|
558
|
-
case 8:
|
559
|
-
#line 560 "ext/fastcsv/fastcsv.c"
|
560
|
-
if ( (*p) == 0 )
|
561
|
-
goto tr21;
|
562
|
-
goto tr20;
|
563
|
-
tr14:
|
564
|
-
#line 1 "NONE"
|
565
|
-
{te = p+1;}
|
566
|
-
#line 101 "ext/fastcsv/fastcsv.rl"
|
630
|
+
#line 138 "ext/fastcsv/fastcsv.rl"
|
567
631
|
{
|
632
|
+
if (d->start == 0 || p == d->start) {
|
633
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
634
|
+
}
|
635
|
+
else if (p > d->start) {
|
636
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
637
|
+
}
|
638
|
+
|
568
639
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
569
640
|
rb_ary_push(row, field);
|
570
641
|
}
|
642
|
+
|
571
643
|
if (RARRAY_LEN(row)) {
|
572
644
|
rb_yield(row);
|
573
645
|
}
|
574
646
|
}
|
575
|
-
#line
|
576
|
-
{
|
577
|
-
if (p == ts) {
|
578
|
-
// Unquoted empty fields are nil, not "", in Ruby.
|
579
|
-
field = Qnil;
|
580
|
-
}
|
581
|
-
else if (p > ts) {
|
582
|
-
field = rb_enc_str_new(ts, p - ts, encoding);
|
583
|
-
ENCODE;
|
584
|
-
}
|
585
|
-
}
|
586
|
-
#line 123 "ext/fastcsv/fastcsv.rl"
|
647
|
+
#line 168 "ext/fastcsv/fastcsv.rl"
|
587
648
|
{act = 3;}
|
588
|
-
goto
|
589
|
-
|
649
|
+
goto st8;
|
650
|
+
st8:
|
590
651
|
if ( ++p == pe )
|
591
|
-
goto
|
592
|
-
case
|
593
|
-
#line
|
652
|
+
goto _test_eof8;
|
653
|
+
case 8:
|
654
|
+
#line 655 "ext/fastcsv/fastcsv.c"
|
594
655
|
switch( (*p) ) {
|
595
|
-
case 10: goto
|
596
|
-
case 13: goto
|
597
|
-
case 34: goto
|
598
|
-
case 44: goto
|
656
|
+
case 10: goto tr15;
|
657
|
+
case 13: goto tr15;
|
658
|
+
case 34: goto tr15;
|
659
|
+
case 44: goto tr15;
|
599
660
|
}
|
600
661
|
goto st1;
|
601
|
-
|
602
|
-
#line
|
603
|
-
{
|
604
|
-
curline++;
|
605
|
-
}
|
606
|
-
goto st2;
|
607
|
-
tr15:
|
608
|
-
#line 32 "ext/fastcsv/fastcsv.rl"
|
662
|
+
tr14:
|
663
|
+
#line 41 "ext/fastcsv/fastcsv.rl"
|
609
664
|
{
|
610
665
|
unclosed_line = curline;
|
611
666
|
}
|
@@ -614,19 +669,17 @@ st2:
|
|
614
669
|
if ( ++p == pe )
|
615
670
|
goto _test_eof2;
|
616
671
|
case 2:
|
617
|
-
#line
|
672
|
+
#line 673 "ext/fastcsv/fastcsv.c"
|
618
673
|
switch( (*p) ) {
|
619
674
|
case 0: goto st0;
|
620
|
-
case
|
621
|
-
case 13: goto tr8;
|
622
|
-
case 34: goto tr9;
|
675
|
+
case 34: goto tr8;
|
623
676
|
}
|
624
677
|
goto st2;
|
625
678
|
st0:
|
626
679
|
cs = 0;
|
627
680
|
goto _out;
|
628
|
-
|
629
|
-
#line
|
681
|
+
tr8:
|
682
|
+
#line 60 "ext/fastcsv/fastcsv.rl"
|
630
683
|
{
|
631
684
|
if (p == ts) {
|
632
685
|
field = rb_enc_str_new("", 0, encoding);
|
@@ -653,7 +706,7 @@ tr9:
|
|
653
706
|
reader++;
|
654
707
|
}
|
655
708
|
|
656
|
-
field = rb_enc_str_new(copy, writer - copy,
|
709
|
+
field = rb_enc_str_new(copy, writer - copy, encoding);
|
657
710
|
ENCODE;
|
658
711
|
|
659
712
|
if (copy != NULL) {
|
@@ -661,7 +714,7 @@ tr9:
|
|
661
714
|
}
|
662
715
|
}
|
663
716
|
}
|
664
|
-
#line
|
717
|
+
#line 45 "ext/fastcsv/fastcsv.rl"
|
665
718
|
{
|
666
719
|
unclosed_line = 0;
|
667
720
|
}
|
@@ -670,13 +723,13 @@ st3:
|
|
670
723
|
if ( ++p == pe )
|
671
724
|
goto _test_eof3;
|
672
725
|
case 3:
|
673
|
-
#line
|
726
|
+
#line 727 "ext/fastcsv/fastcsv.c"
|
674
727
|
switch( (*p) ) {
|
675
|
-
case 0: goto
|
676
|
-
case 10: goto
|
677
|
-
case 13: goto
|
728
|
+
case 0: goto tr9;
|
729
|
+
case 10: goto tr10;
|
730
|
+
case 13: goto tr11;
|
678
731
|
case 34: goto st2;
|
679
|
-
case 44: goto
|
732
|
+
case 44: goto tr12;
|
680
733
|
}
|
681
734
|
goto st0;
|
682
735
|
}
|
@@ -686,7 +739,6 @@ case 3:
|
|
686
739
|
_test_eof6: cs = 6; goto _test_eof;
|
687
740
|
_test_eof7: cs = 7; goto _test_eof;
|
688
741
|
_test_eof8: cs = 8; goto _test_eof;
|
689
|
-
_test_eof9: cs = 9; goto _test_eof;
|
690
742
|
_test_eof2: cs = 2; goto _test_eof;
|
691
743
|
_test_eof3: cs = 3; goto _test_eof;
|
692
744
|
|
@@ -695,32 +747,26 @@ case 3:
|
|
695
747
|
{
|
696
748
|
switch ( cs ) {
|
697
749
|
case 1: goto tr0;
|
698
|
-
case 5: goto
|
699
|
-
case 6: goto
|
700
|
-
case 7: goto
|
701
|
-
case 8: goto
|
702
|
-
case 9: goto tr16;
|
750
|
+
case 5: goto tr15;
|
751
|
+
case 6: goto tr16;
|
752
|
+
case 7: goto tr16;
|
753
|
+
case 8: goto tr15;
|
703
754
|
}
|
704
755
|
}
|
705
756
|
|
706
757
|
_out: {}
|
707
758
|
}
|
708
759
|
|
709
|
-
#line
|
760
|
+
#line 427 "ext/fastcsv/fastcsv.rl"
|
761
|
+
|
762
|
+
if (done && cs < raw_parse_first_final) {
|
763
|
+
FREE;
|
710
764
|
|
711
|
-
if (done && cs < fastcsv_first_final) {
|
712
|
-
if (buf != NULL) {
|
713
|
-
free(buf);
|
714
|
-
}
|
715
765
|
if (unclosed_line) {
|
716
|
-
rb_raise(
|
766
|
+
rb_raise(eError, "Unclosed quoted field on line %d.", unclosed_line);
|
717
767
|
}
|
718
|
-
// Ruby raises different errors for illegal quoting, depending on whether
|
719
|
-
// a quoted string is followed by a string ("Unclosed quoted field on line
|
720
|
-
// %d.") or by a string ending in a quote ("Missing or stray quote in line
|
721
|
-
// %d"). These precisions are kind of bogus, but we can try using $!.
|
722
768
|
else {
|
723
|
-
rb_raise(
|
769
|
+
rb_raise(eError, "Illegal quoting in line %d.", curline);
|
724
770
|
}
|
725
771
|
}
|
726
772
|
|
@@ -735,23 +781,35 @@ case 3:
|
|
735
781
|
}
|
736
782
|
}
|
737
783
|
|
738
|
-
|
739
|
-
free(buf);
|
740
|
-
}
|
784
|
+
FREE;
|
741
785
|
|
742
786
|
return Qnil;
|
743
787
|
}
|
744
788
|
|
789
|
+
// @see https://github.com/ruby/ruby/blob/trunk/README.EXT#L616
|
790
|
+
static VALUE allocate(VALUE class) {
|
791
|
+
// @see https://github.com/nofxx/georuby_c/blob/b3b91fd90980d7c295ac8f6012d89878ea7cd569/ext/line.c#L66
|
792
|
+
Data *d = ALLOC(Data);
|
793
|
+
d->start = 0;
|
794
|
+
// @see https://github.com/nofxx/georuby_c/blob/b3b91fd90980d7c295ac8f6012d89878ea7cd569/ext/point.h#L26
|
795
|
+
// rb_gc_mark(d->start) or rb_gc_mark(d) cause warning "passing argument 1 of ‘rb_gc_mark’ makes integer from pointer without a cast"
|
796
|
+
// free(d->start) causes error "pointer being freed was not allocated"
|
797
|
+
return Data_Wrap_Struct(class, NULL, free, d);
|
798
|
+
}
|
799
|
+
|
800
|
+
// @see http://tenderlovemaking.com/2009/12/18/writing-ruby-c-extensions-part-1.html
|
801
|
+
// @see http://tenderlovemaking.com/2010/12/11/writing-ruby-c-extensions-part-2.html
|
745
802
|
void Init_fastcsv() {
|
746
803
|
s_read = rb_intern("read");
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
rb_define_attr(
|
755
|
-
|
756
|
-
|
804
|
+
s_row = rb_intern("@row");
|
805
|
+
|
806
|
+
cClass = rb_define_class("FastCSV", rb_const_get(rb_cObject, rb_intern("CSV"))); // class FastCSV < CSV
|
807
|
+
cParser = rb_define_class_under(cClass, "Parser", rb_cObject); // class Parser
|
808
|
+
rb_define_alloc_func(cParser, allocate); //
|
809
|
+
rb_define_method(cParser, "raw_parse", raw_parse, -1); // def raw_parse(port, opts = nil); end
|
810
|
+
rb_define_attr(cParser, "row", 1, 0); // attr_reader :row
|
811
|
+
rb_define_attr(cParser, "buffer_size", 1, 1); // attr_accessor :buffer_size
|
812
|
+
// end
|
813
|
+
eError = rb_define_class_under(cClass, "MalformedCSVError", rb_eRuntimeError); // class MalformedCSVError < RuntimeError
|
814
|
+
// end
|
757
815
|
}
|