fastcsv 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +11 -0
- data/README.md +37 -2
- data/TESTS.md +42 -0
- data/ext/fastcsv/fastcsv.c +281 -223
- data/ext/fastcsv/fastcsv.rl +149 -72
- data/fastcsv.gemspec +1 -1
- data/lib/fastcsv.rb +130 -0
- data/spec/fastcsv_spec.rb +189 -57
- data/spec/fixtures/csv.csv +3 -0
- data/spec/fixtures/iso-8859-1-quoted.csv +1 -0
- data/spec/fixtures/utf-8-quoted.csv +1 -0
- data/spec/spec_helper.rb +5 -0
- data/test/csv/base.rb +8 -0
- data/test/csv/line_endings.gz +0 -0
- data/test/csv/test_csv_parsing.rb +221 -0
- data/test/csv/test_csv_writing.rb +97 -0
- data/test/csv/test_data_converters.rb +263 -0
- data/test/csv/test_encodings.rb +339 -0
- data/test/csv/test_features.rb +317 -0
- data/test/csv/test_headers.rb +289 -0
- data/test/csv/test_interface.rb +362 -0
- data/test/csv/test_row.rb +349 -0
- data/test/csv/test_table.rb +420 -0
- data/test/csv/ts_all.rb +20 -0
- data/test/runner.rb +36 -0
- data/test/with_different_ofs.rb +17 -0
- metadata +38 -2
data/ext/fastcsv/fastcsv.c
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
// http://w3c.github.io/csvw/syntax/#ebnf
|
8
8
|
|
9
9
|
// CSV implementation.
|
10
|
-
// https://github.com/ruby/ruby/blob/
|
10
|
+
// https://github.com/ruby/ruby/blob/trunk/lib/csv.rb
|
11
11
|
|
12
12
|
// Ruby C extensions help.
|
13
13
|
// https://github.com/ruby/ruby/blob/trunk/README.EXT
|
@@ -21,31 +21,42 @@ if (enc2 != NULL) { \
|
|
21
21
|
field = rb_str_encode(field, rb_enc_from_encoding(enc), 0, Qnil); \
|
22
22
|
}
|
23
23
|
|
24
|
-
|
25
|
-
|
24
|
+
#define FREE \
|
25
|
+
if (buf != NULL) { \
|
26
|
+
free(buf); \
|
27
|
+
} \
|
28
|
+
if (row_sep != NULL) { \
|
29
|
+
free(row_sep); \
|
30
|
+
}
|
31
|
+
|
32
|
+
static VALUE cClass, cParser, eError;
|
33
|
+
static ID s_read, s_row;
|
34
|
+
|
35
|
+
// @see https://github.com/nofxx/georuby_c/blob/b3b91fd90980d7c295ac8f6012d89878ea7cd569/ext/types.h#L22
|
36
|
+
typedef struct {
|
37
|
+
char *start;
|
38
|
+
} Data;
|
26
39
|
|
27
40
|
|
28
|
-
#line
|
41
|
+
#line 170 "ext/fastcsv/fastcsv.rl"
|
29
42
|
|
30
43
|
|
31
44
|
|
32
|
-
#line
|
33
|
-
static const int
|
34
|
-
static const int
|
35
|
-
static const int
|
45
|
+
#line 46 "ext/fastcsv/fastcsv.c"
|
46
|
+
static const int raw_parse_start = 4;
|
47
|
+
static const int raw_parse_first_final = 4;
|
48
|
+
static const int raw_parse_error = 0;
|
36
49
|
|
37
|
-
static const int
|
50
|
+
static const int raw_parse_en_main = 4;
|
38
51
|
|
39
52
|
|
40
|
-
#line
|
53
|
+
#line 173 "ext/fastcsv/fastcsv.rl"
|
41
54
|
|
42
55
|
// 16 kB
|
43
56
|
#define BUFSIZE 16384
|
44
57
|
|
45
58
|
// @see http://rxr.whitequark.org/mri/source/io.c#4845
|
46
|
-
static void
|
47
|
-
rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
|
48
|
-
{
|
59
|
+
static void rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode) {
|
49
60
|
int default_ext = 0;
|
50
61
|
|
51
62
|
if (ext == NULL) {
|
@@ -70,15 +81,17 @@ rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc,
|
|
70
81
|
}
|
71
82
|
}
|
72
83
|
|
73
|
-
VALUE
|
84
|
+
static VALUE raw_parse(int argc, VALUE *argv, VALUE self) {
|
74
85
|
int cs, act, have = 0, curline = 1, io = 0;
|
75
|
-
char *ts = 0, *te = 0, *buf = 0, *eof = 0;
|
86
|
+
char *ts = 0, *te = 0, *buf = 0, *eof = 0, *mark_row_sep = 0, *row_sep = NULL;
|
76
87
|
|
77
|
-
VALUE port, opts;
|
88
|
+
VALUE port, opts, r_encoding;
|
78
89
|
VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
|
79
|
-
int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
|
90
|
+
int done = 0, unclosed_line = 0, len_row_sep = 0, buffer_size = 0, taint = 0;
|
80
91
|
rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL;
|
81
|
-
|
92
|
+
|
93
|
+
Data *d;
|
94
|
+
Data_Get_Struct(self, Data, d);
|
82
95
|
|
83
96
|
VALUE option;
|
84
97
|
char quote_char = '"';
|
@@ -87,8 +100,8 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
87
100
|
taint = OBJ_TAINTED(port);
|
88
101
|
io = rb_respond_to(port, s_read);
|
89
102
|
if (!io) {
|
90
|
-
if (rb_respond_to(port,
|
91
|
-
port = rb_funcall(port,
|
103
|
+
if (rb_respond_to(port, rb_intern("to_str"))) {
|
104
|
+
port = rb_funcall(port, rb_intern("to_str"), 0);
|
92
105
|
StringValue(port);
|
93
106
|
}
|
94
107
|
else {
|
@@ -112,7 +125,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
112
125
|
// @see http://ruby-doc.org/core-2.1.1/IO.html#method-c-new-label-Open+Mode
|
113
126
|
option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
|
114
127
|
if (TYPE(option) == T_STRING) {
|
115
|
-
// parse_mode_enc is not in header file.
|
128
|
+
// `parse_mode_enc` is not in header file.
|
116
129
|
const char *estr = StringValueCStr(option), *ptr;
|
117
130
|
char encname[ENCODING_MAXNAMELEN+1];
|
118
131
|
int idx, idx2;
|
@@ -123,17 +136,17 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
123
136
|
ptr = strrchr(estr, ':');
|
124
137
|
if (ptr) {
|
125
138
|
long len = (ptr++) - estr;
|
126
|
-
if (len == 0 || len > ENCODING_MAXNAMELEN) {
|
139
|
+
if (len == 0 || len > ENCODING_MAXNAMELEN) { // ":enc"
|
127
140
|
idx = -1;
|
128
141
|
}
|
129
|
-
else {
|
142
|
+
else { // "enc2:enc" or "enc:-"
|
130
143
|
memcpy(encname, estr, len);
|
131
144
|
encname[len] = '\0';
|
132
145
|
estr = encname;
|
133
146
|
idx = rb_enc_find_index(encname);
|
134
147
|
}
|
135
148
|
}
|
136
|
-
else {
|
149
|
+
else { // "enc"
|
137
150
|
idx = rb_enc_find_index(estr);
|
138
151
|
}
|
139
152
|
|
@@ -141,7 +154,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
141
154
|
ext_enc = rb_enc_from_index(idx);
|
142
155
|
}
|
143
156
|
else {
|
144
|
-
if (idx != -2) {
|
157
|
+
if (idx != -2) { // ":enc"
|
145
158
|
// `unsupported_encoding` is not in header file.
|
146
159
|
rb_warn("Unsupported encoding %s ignored", estr);
|
147
160
|
}
|
@@ -150,11 +163,11 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
150
163
|
|
151
164
|
int_enc = NULL;
|
152
165
|
if (ptr) {
|
153
|
-
if (*ptr == '-' && *(ptr+1) == '\0') {
|
166
|
+
if (*ptr == '-' && *(ptr+1) == '\0') { // "enc:-"
|
154
167
|
/* Special case - "-" => no transcoding */
|
155
168
|
int_enc = (rb_encoding *)Qnil;
|
156
169
|
}
|
157
|
-
else {
|
170
|
+
else { // "enc2:enc"
|
158
171
|
idx2 = rb_enc_find_index(ptr);
|
159
172
|
if (idx2 < 0) {
|
160
173
|
// `unsupported_encoding` is not in header file.
|
@@ -175,29 +188,33 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
175
188
|
rb_raise(rb_eArgError, ":encoding has to be a String");
|
176
189
|
}
|
177
190
|
|
178
|
-
// @see
|
179
|
-
// @see https://github.com/ruby/ruby/blob/
|
180
|
-
if (rb_respond_to(port,
|
181
|
-
r_encoding = rb_funcall(port,
|
191
|
+
// @see CSV#raw_encoding
|
192
|
+
// @see https://github.com/ruby/ruby/blob/ab337e61ecb5f42384ba7d710c36faf96a454e5c/lib/csv.rb#L2290
|
193
|
+
if (rb_respond_to(port, rb_intern("internal_encoding"))) {
|
194
|
+
r_encoding = rb_funcall(port, rb_intern("internal_encoding"), 0);
|
182
195
|
if (NIL_P(r_encoding)) {
|
183
|
-
r_encoding = rb_funcall(port,
|
196
|
+
r_encoding = rb_funcall(port, rb_intern("external_encoding"), 0);
|
184
197
|
}
|
185
198
|
}
|
186
|
-
else if (rb_respond_to(port,
|
187
|
-
r_encoding = rb_funcall(rb_funcall(port,
|
199
|
+
else if (rb_respond_to(port, rb_intern("string"))) {
|
200
|
+
r_encoding = rb_funcall(rb_funcall(port, rb_intern("string"), 0), rb_intern("encoding"), 0);
|
188
201
|
}
|
189
|
-
else if (rb_respond_to(port,
|
190
|
-
r_encoding = rb_funcall(port,
|
202
|
+
else if (rb_respond_to(port, rb_intern("encoding"))) {
|
203
|
+
r_encoding = rb_funcall(port, rb_intern("encoding"), 0);
|
191
204
|
}
|
192
205
|
else {
|
193
206
|
r_encoding = rb_enc_from_encoding(rb_ascii8bit_encoding());
|
194
207
|
}
|
208
|
+
|
209
|
+
// @see CSV#initialize
|
210
|
+
// @see https://github.com/ruby/ruby/blob/ab337e61ecb5f42384ba7d710c36faf96a454e5c/lib/csv.rb#L1510
|
195
211
|
if (NIL_P(r_encoding)) {
|
196
212
|
r_encoding = rb_enc_from_encoding(rb_default_internal_encoding());
|
197
213
|
}
|
198
214
|
if (NIL_P(r_encoding)) {
|
199
215
|
r_encoding = rb_enc_from_encoding(rb_default_external_encoding());
|
200
216
|
}
|
217
|
+
|
201
218
|
if (enc2 != NULL) {
|
202
219
|
encoding = enc2;
|
203
220
|
}
|
@@ -208,11 +225,17 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
208
225
|
encoding = rb_enc_get(r_encoding);
|
209
226
|
}
|
210
227
|
|
228
|
+
rb_ivar_set(self, s_row, Qnil);
|
229
|
+
|
211
230
|
buffer_size = BUFSIZE;
|
212
231
|
if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
|
213
232
|
bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
|
214
233
|
if (!NIL_P(bufsize)) {
|
215
234
|
buffer_size = NUM2INT(bufsize);
|
235
|
+
// buffer_size = 0 can cause segmentation faults.
|
236
|
+
if (buffer_size == 0) {
|
237
|
+
buffer_size = BUFSIZE;
|
238
|
+
}
|
216
239
|
}
|
217
240
|
}
|
218
241
|
|
@@ -221,39 +244,47 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
221
244
|
}
|
222
245
|
|
223
246
|
|
224
|
-
#line
|
247
|
+
#line 248 "ext/fastcsv/fastcsv.c"
|
225
248
|
{
|
226
|
-
cs =
|
249
|
+
cs = raw_parse_start;
|
227
250
|
ts = 0;
|
228
251
|
te = 0;
|
229
252
|
act = 0;
|
230
253
|
}
|
231
254
|
|
232
|
-
#line
|
255
|
+
#line 366 "ext/fastcsv/fastcsv.rl"
|
233
256
|
|
234
257
|
while (!done) {
|
235
258
|
VALUE str;
|
236
259
|
char *p, *pe;
|
237
|
-
int len, space = buffer_size - have, tokstart_diff, tokend_diff;
|
260
|
+
int len, space = buffer_size - have, tokstart_diff, tokend_diff, start_diff, mark_row_sep_diff;
|
238
261
|
|
239
262
|
if (io) {
|
240
263
|
if (space == 0) {
|
241
|
-
|
242
|
-
|
264
|
+
// Not moving d->start will cause intermittent segmentation faults.
|
265
|
+
tokstart_diff = ts - buf;
|
266
|
+
tokend_diff = te - buf;
|
267
|
+
start_diff = d->start - buf;
|
268
|
+
mark_row_sep_diff = mark_row_sep - buf;
|
243
269
|
|
244
|
-
|
245
|
-
|
270
|
+
buffer_size += BUFSIZE;
|
271
|
+
REALLOC_N(buf, char, buffer_size);
|
246
272
|
|
247
|
-
|
273
|
+
space = buffer_size - have;
|
248
274
|
|
249
|
-
|
250
|
-
|
275
|
+
ts = buf + tokstart_diff;
|
276
|
+
te = buf + tokend_diff;
|
277
|
+
d->start = buf + start_diff;
|
278
|
+
mark_row_sep = buf + mark_row_sep_diff;
|
251
279
|
}
|
252
280
|
p = buf + have;
|
253
281
|
|
282
|
+
// Reads "`length` bytes without any conversion (binary mode)."
|
283
|
+
// "The resulted string is always ASCII-8BIT encoding."
|
284
|
+
// @see http://www.ruby-doc.org/core-2.1.4/IO.html#method-i-read
|
254
285
|
str = rb_funcall(port, s_read, 1, INT2FIX(space));
|
255
286
|
if (NIL_P(str)) {
|
256
|
-
//
|
287
|
+
// "`nil` means it met EOF at beginning," e.g. for `StringIO.new("")`.
|
257
288
|
len = 0;
|
258
289
|
}
|
259
290
|
else {
|
@@ -261,6 +292,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
261
292
|
memcpy(p, StringValuePtr(str), len);
|
262
293
|
}
|
263
294
|
|
295
|
+
// "The 1 to `length`-1 bytes string means it met EOF after reading the result."
|
264
296
|
if (len < space) {
|
265
297
|
// EOF actions don't work in scanners, so we add a sentinel value.
|
266
298
|
// @see http://www.complang.org/pipermail/ragel-users/2007-May/001516.html
|
@@ -276,9 +308,13 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
276
308
|
done = 1;
|
277
309
|
}
|
278
310
|
|
311
|
+
if (d->start == 0) {
|
312
|
+
d->start = p;
|
313
|
+
}
|
314
|
+
|
279
315
|
pe = p + len;
|
280
316
|
|
281
|
-
#line
|
317
|
+
#line 318 "ext/fastcsv/fastcsv.c"
|
282
318
|
{
|
283
319
|
if ( p == pe )
|
284
320
|
goto _test_eof;
|
@@ -296,56 +332,80 @@ tr0:
|
|
296
332
|
}
|
297
333
|
}
|
298
334
|
goto st4;
|
299
|
-
|
300
|
-
#line
|
335
|
+
tr5:
|
336
|
+
#line 49 "ext/fastcsv/fastcsv.rl"
|
301
337
|
{
|
302
|
-
if (
|
303
|
-
|
338
|
+
if (p == ts) {
|
339
|
+
// Unquoted empty fields are nil, not "", in Ruby.
|
340
|
+
field = Qnil;
|
304
341
|
}
|
305
|
-
if (
|
306
|
-
|
342
|
+
else if (p > ts) {
|
343
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
344
|
+
ENCODE;
|
307
345
|
}
|
308
346
|
}
|
309
|
-
#line
|
347
|
+
#line 95 "ext/fastcsv/fastcsv.rl"
|
348
|
+
{
|
349
|
+
rb_ary_push(row, field);
|
350
|
+
field = Qnil;
|
351
|
+
}
|
352
|
+
#line 166 "ext/fastcsv/fastcsv.rl"
|
310
353
|
{te = p+1;}
|
311
354
|
goto st4;
|
312
|
-
|
313
|
-
#line
|
314
|
-
{te = p;p--;}
|
315
|
-
goto st4;
|
316
|
-
tr17:
|
317
|
-
#line 122 "ext/fastcsv/fastcsv.rl"
|
318
|
-
{te = p;p--;}
|
319
|
-
goto st4;
|
320
|
-
tr18:
|
321
|
-
#line 101 "ext/fastcsv/fastcsv.rl"
|
355
|
+
tr9:
|
356
|
+
#line 138 "ext/fastcsv/fastcsv.rl"
|
322
357
|
{
|
358
|
+
if (d->start == 0 || p == d->start) {
|
359
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
360
|
+
}
|
361
|
+
else if (p > d->start) {
|
362
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
363
|
+
}
|
364
|
+
|
323
365
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
324
366
|
rb_ary_push(row, field);
|
325
367
|
}
|
368
|
+
|
326
369
|
if (RARRAY_LEN(row)) {
|
327
370
|
rb_yield(row);
|
328
371
|
}
|
329
372
|
}
|
330
|
-
#line
|
373
|
+
#line 168 "ext/fastcsv/fastcsv.rl"
|
374
|
+
{te = p+1;}
|
375
|
+
goto st4;
|
376
|
+
tr12:
|
377
|
+
#line 95 "ext/fastcsv/fastcsv.rl"
|
378
|
+
{
|
379
|
+
rb_ary_push(row, field);
|
380
|
+
field = Qnil;
|
381
|
+
}
|
382
|
+
#line 166 "ext/fastcsv/fastcsv.rl"
|
331
383
|
{te = p+1;}
|
332
384
|
goto st4;
|
333
|
-
|
334
|
-
#line
|
385
|
+
tr15:
|
386
|
+
#line 168 "ext/fastcsv/fastcsv.rl"
|
335
387
|
{te = p;p--;}
|
336
388
|
goto st4;
|
337
|
-
|
338
|
-
#line
|
389
|
+
tr16:
|
390
|
+
#line 100 "ext/fastcsv/fastcsv.rl"
|
339
391
|
{
|
340
|
-
|
341
|
-
|
392
|
+
d->start = p;
|
393
|
+
|
394
|
+
if (len_row_sep) {
|
395
|
+
if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1)) {
|
396
|
+
FREE;
|
397
|
+
|
398
|
+
rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1);
|
399
|
+
}
|
342
400
|
}
|
343
|
-
|
344
|
-
|
401
|
+
else {
|
402
|
+
len_row_sep = p - mark_row_sep;
|
403
|
+
row_sep = ALLOC_N(char, p - mark_row_sep);
|
404
|
+
memcpy(row_sep, mark_row_sep, p - mark_row_sep);
|
345
405
|
}
|
346
406
|
}
|
347
|
-
#line
|
348
|
-
{te = p
|
407
|
+
#line 167 "ext/fastcsv/fastcsv.rl"
|
408
|
+
{te = p;p--;}
|
349
409
|
goto st4;
|
350
410
|
st4:
|
351
411
|
#line 1 "NONE"
|
@@ -357,12 +417,12 @@ st4:
|
|
357
417
|
case 4:
|
358
418
|
#line 1 "NONE"
|
359
419
|
{ts = p;}
|
360
|
-
#line
|
420
|
+
#line 421 "ext/fastcsv/fastcsv.c"
|
361
421
|
switch( (*p) ) {
|
362
|
-
case 0: goto
|
422
|
+
case 0: goto tr13;
|
363
423
|
case 10: goto tr3;
|
364
424
|
case 13: goto tr4;
|
365
|
-
case 34: goto
|
425
|
+
case 34: goto tr14;
|
366
426
|
case 44: goto tr5;
|
367
427
|
}
|
368
428
|
goto st1;
|
@@ -381,7 +441,7 @@ case 1:
|
|
381
441
|
tr2:
|
382
442
|
#line 1 "NONE"
|
383
443
|
{te = p+1;}
|
384
|
-
#line
|
444
|
+
#line 49 "ext/fastcsv/fastcsv.rl"
|
385
445
|
{
|
386
446
|
if (p == ts) {
|
387
447
|
// Unquoted empty fields are nil, not "", in Ruby.
|
@@ -392,33 +452,41 @@ tr2:
|
|
392
452
|
ENCODE;
|
393
453
|
}
|
394
454
|
}
|
395
|
-
#line
|
455
|
+
#line 138 "ext/fastcsv/fastcsv.rl"
|
396
456
|
{
|
457
|
+
if (d->start == 0 || p == d->start) {
|
458
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
459
|
+
}
|
460
|
+
else if (p > d->start) {
|
461
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
462
|
+
}
|
463
|
+
|
397
464
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
398
465
|
rb_ary_push(row, field);
|
399
466
|
}
|
467
|
+
|
400
468
|
if (RARRAY_LEN(row)) {
|
401
469
|
rb_yield(row);
|
402
470
|
}
|
403
471
|
}
|
404
|
-
#line
|
472
|
+
#line 168 "ext/fastcsv/fastcsv.rl"
|
405
473
|
{act = 3;}
|
406
474
|
goto st5;
|
407
475
|
st5:
|
408
476
|
if ( ++p == pe )
|
409
477
|
goto _test_eof5;
|
410
478
|
case 5:
|
411
|
-
#line
|
479
|
+
#line 480 "ext/fastcsv/fastcsv.c"
|
412
480
|
switch( (*p) ) {
|
413
481
|
case 0: goto tr2;
|
414
482
|
case 10: goto tr3;
|
415
483
|
case 13: goto tr4;
|
416
|
-
case 34: goto
|
484
|
+
case 34: goto tr15;
|
417
485
|
case 44: goto tr5;
|
418
486
|
}
|
419
487
|
goto st1;
|
420
488
|
tr3:
|
421
|
-
#line
|
489
|
+
#line 49 "ext/fastcsv/fastcsv.rl"
|
422
490
|
{
|
423
491
|
if (p == ts) {
|
424
492
|
// Unquoted empty fields are nil, not "", in Ruby.
|
@@ -429,8 +497,19 @@ tr3:
|
|
429
497
|
ENCODE;
|
430
498
|
}
|
431
499
|
}
|
432
|
-
#line
|
500
|
+
#line 117 "ext/fastcsv/fastcsv.rl"
|
433
501
|
{
|
502
|
+
mark_row_sep = p;
|
503
|
+
|
504
|
+
curline++;
|
505
|
+
|
506
|
+
if (d->start == 0 || p == d->start) {
|
507
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
508
|
+
}
|
509
|
+
else if (p > d->start) {
|
510
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
511
|
+
}
|
512
|
+
|
434
513
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
435
514
|
rb_ary_push(row, field);
|
436
515
|
field = Qnil;
|
@@ -438,21 +517,22 @@ tr3:
|
|
438
517
|
|
439
518
|
rb_yield(row);
|
440
519
|
row = rb_ary_new();
|
441
|
-
}
|
442
|
-
#line 28 "ext/fastcsv/fastcsv.rl"
|
443
|
-
{
|
444
|
-
curline++;
|
445
520
|
}
|
446
521
|
goto st6;
|
447
|
-
|
448
|
-
#line
|
522
|
+
tr10:
|
523
|
+
#line 117 "ext/fastcsv/fastcsv.rl"
|
449
524
|
{
|
525
|
+
mark_row_sep = p;
|
526
|
+
|
450
527
|
curline++;
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
528
|
+
|
529
|
+
if (d->start == 0 || p == d->start) {
|
530
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
531
|
+
}
|
532
|
+
else if (p > d->start) {
|
533
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
534
|
+
}
|
535
|
+
|
456
536
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
457
537
|
rb_ary_push(row, field);
|
458
538
|
field = Qnil;
|
@@ -460,22 +540,16 @@ tr11:
|
|
460
540
|
|
461
541
|
rb_yield(row);
|
462
542
|
row = rb_ary_new();
|
463
|
-
}
|
464
|
-
#line 28 "ext/fastcsv/fastcsv.rl"
|
465
|
-
{
|
466
|
-
curline++;
|
467
543
|
}
|
468
544
|
goto st6;
|
469
545
|
st6:
|
470
546
|
if ( ++p == pe )
|
471
547
|
goto _test_eof6;
|
472
548
|
case 6:
|
473
|
-
#line
|
474
|
-
|
475
|
-
goto tr18;
|
476
|
-
goto tr17;
|
549
|
+
#line 550 "ext/fastcsv/fastcsv.c"
|
550
|
+
goto tr16;
|
477
551
|
tr4:
|
478
|
-
#line
|
552
|
+
#line 49 "ext/fastcsv/fastcsv.rl"
|
479
553
|
{
|
480
554
|
if (p == ts) {
|
481
555
|
// Unquoted empty fields are nil, not "", in Ruby.
|
@@ -486,8 +560,19 @@ tr4:
|
|
486
560
|
ENCODE;
|
487
561
|
}
|
488
562
|
}
|
489
|
-
#line
|
563
|
+
#line 117 "ext/fastcsv/fastcsv.rl"
|
490
564
|
{
|
565
|
+
mark_row_sep = p;
|
566
|
+
|
567
|
+
curline++;
|
568
|
+
|
569
|
+
if (d->start == 0 || p == d->start) {
|
570
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
571
|
+
}
|
572
|
+
else if (p > d->start) {
|
573
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
574
|
+
}
|
575
|
+
|
491
576
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
492
577
|
rb_ary_push(row, field);
|
493
578
|
field = Qnil;
|
@@ -495,15 +580,22 @@ tr4:
|
|
495
580
|
|
496
581
|
rb_yield(row);
|
497
582
|
row = rb_ary_new();
|
498
|
-
}
|
499
|
-
#line 28 "ext/fastcsv/fastcsv.rl"
|
500
|
-
{
|
501
|
-
curline++;
|
502
583
|
}
|
503
584
|
goto st7;
|
504
|
-
|
505
|
-
#line
|
585
|
+
tr11:
|
586
|
+
#line 117 "ext/fastcsv/fastcsv.rl"
|
506
587
|
{
|
588
|
+
mark_row_sep = p;
|
589
|
+
|
590
|
+
curline++;
|
591
|
+
|
592
|
+
if (d->start == 0 || p == d->start) {
|
593
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
594
|
+
}
|
595
|
+
else if (p > d->start) {
|
596
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
597
|
+
}
|
598
|
+
|
507
599
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
508
600
|
rb_ary_push(row, field);
|
509
601
|
field = Qnil;
|
@@ -511,24 +603,20 @@ tr12:
|
|
511
603
|
|
512
604
|
rb_yield(row);
|
513
605
|
row = rb_ary_new();
|
514
|
-
}
|
515
|
-
#line 28 "ext/fastcsv/fastcsv.rl"
|
516
|
-
{
|
517
|
-
curline++;
|
518
606
|
}
|
519
607
|
goto st7;
|
520
608
|
st7:
|
521
609
|
if ( ++p == pe )
|
522
610
|
goto _test_eof7;
|
523
611
|
case 7:
|
524
|
-
#line
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
#line
|
612
|
+
#line 613 "ext/fastcsv/fastcsv.c"
|
613
|
+
if ( (*p) == 10 )
|
614
|
+
goto st6;
|
615
|
+
goto tr16;
|
616
|
+
tr13:
|
617
|
+
#line 1 "NONE"
|
618
|
+
{te = p+1;}
|
619
|
+
#line 49 "ext/fastcsv/fastcsv.rl"
|
532
620
|
{
|
533
621
|
if (p == ts) {
|
534
622
|
// Unquoted empty fields are nil, not "", in Ruby.
|
@@ -539,73 +627,40 @@ tr5:
|
|
539
627
|
ENCODE;
|
540
628
|
}
|
541
629
|
}
|
542
|
-
#line
|
543
|
-
{
|
544
|
-
rb_ary_push(row, field);
|
545
|
-
field = Qnil;
|
546
|
-
}
|
547
|
-
goto st8;
|
548
|
-
tr13:
|
549
|
-
#line 86 "ext/fastcsv/fastcsv.rl"
|
550
|
-
{
|
551
|
-
rb_ary_push(row, field);
|
552
|
-
field = Qnil;
|
553
|
-
}
|
554
|
-
goto st8;
|
555
|
-
st8:
|
556
|
-
if ( ++p == pe )
|
557
|
-
goto _test_eof8;
|
558
|
-
case 8:
|
559
|
-
#line 560 "ext/fastcsv/fastcsv.c"
|
560
|
-
if ( (*p) == 0 )
|
561
|
-
goto tr21;
|
562
|
-
goto tr20;
|
563
|
-
tr14:
|
564
|
-
#line 1 "NONE"
|
565
|
-
{te = p+1;}
|
566
|
-
#line 101 "ext/fastcsv/fastcsv.rl"
|
630
|
+
#line 138 "ext/fastcsv/fastcsv.rl"
|
567
631
|
{
|
632
|
+
if (d->start == 0 || p == d->start) {
|
633
|
+
rb_ivar_set(self, s_row, rb_str_new2(""));
|
634
|
+
}
|
635
|
+
else if (p > d->start) {
|
636
|
+
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
637
|
+
}
|
638
|
+
|
568
639
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
569
640
|
rb_ary_push(row, field);
|
570
641
|
}
|
642
|
+
|
571
643
|
if (RARRAY_LEN(row)) {
|
572
644
|
rb_yield(row);
|
573
645
|
}
|
574
646
|
}
|
575
|
-
#line
|
576
|
-
{
|
577
|
-
if (p == ts) {
|
578
|
-
// Unquoted empty fields are nil, not "", in Ruby.
|
579
|
-
field = Qnil;
|
580
|
-
}
|
581
|
-
else if (p > ts) {
|
582
|
-
field = rb_enc_str_new(ts, p - ts, encoding);
|
583
|
-
ENCODE;
|
584
|
-
}
|
585
|
-
}
|
586
|
-
#line 123 "ext/fastcsv/fastcsv.rl"
|
647
|
+
#line 168 "ext/fastcsv/fastcsv.rl"
|
587
648
|
{act = 3;}
|
588
|
-
goto
|
589
|
-
|
649
|
+
goto st8;
|
650
|
+
st8:
|
590
651
|
if ( ++p == pe )
|
591
|
-
goto
|
592
|
-
case
|
593
|
-
#line
|
652
|
+
goto _test_eof8;
|
653
|
+
case 8:
|
654
|
+
#line 655 "ext/fastcsv/fastcsv.c"
|
594
655
|
switch( (*p) ) {
|
595
|
-
case 10: goto
|
596
|
-
case 13: goto
|
597
|
-
case 34: goto
|
598
|
-
case 44: goto
|
656
|
+
case 10: goto tr15;
|
657
|
+
case 13: goto tr15;
|
658
|
+
case 34: goto tr15;
|
659
|
+
case 44: goto tr15;
|
599
660
|
}
|
600
661
|
goto st1;
|
601
|
-
|
602
|
-
#line
|
603
|
-
{
|
604
|
-
curline++;
|
605
|
-
}
|
606
|
-
goto st2;
|
607
|
-
tr15:
|
608
|
-
#line 32 "ext/fastcsv/fastcsv.rl"
|
662
|
+
tr14:
|
663
|
+
#line 41 "ext/fastcsv/fastcsv.rl"
|
609
664
|
{
|
610
665
|
unclosed_line = curline;
|
611
666
|
}
|
@@ -614,19 +669,17 @@ st2:
|
|
614
669
|
if ( ++p == pe )
|
615
670
|
goto _test_eof2;
|
616
671
|
case 2:
|
617
|
-
#line
|
672
|
+
#line 673 "ext/fastcsv/fastcsv.c"
|
618
673
|
switch( (*p) ) {
|
619
674
|
case 0: goto st0;
|
620
|
-
case
|
621
|
-
case 13: goto tr8;
|
622
|
-
case 34: goto tr9;
|
675
|
+
case 34: goto tr8;
|
623
676
|
}
|
624
677
|
goto st2;
|
625
678
|
st0:
|
626
679
|
cs = 0;
|
627
680
|
goto _out;
|
628
|
-
|
629
|
-
#line
|
681
|
+
tr8:
|
682
|
+
#line 60 "ext/fastcsv/fastcsv.rl"
|
630
683
|
{
|
631
684
|
if (p == ts) {
|
632
685
|
field = rb_enc_str_new("", 0, encoding);
|
@@ -653,7 +706,7 @@ tr9:
|
|
653
706
|
reader++;
|
654
707
|
}
|
655
708
|
|
656
|
-
field = rb_enc_str_new(copy, writer - copy,
|
709
|
+
field = rb_enc_str_new(copy, writer - copy, encoding);
|
657
710
|
ENCODE;
|
658
711
|
|
659
712
|
if (copy != NULL) {
|
@@ -661,7 +714,7 @@ tr9:
|
|
661
714
|
}
|
662
715
|
}
|
663
716
|
}
|
664
|
-
#line
|
717
|
+
#line 45 "ext/fastcsv/fastcsv.rl"
|
665
718
|
{
|
666
719
|
unclosed_line = 0;
|
667
720
|
}
|
@@ -670,13 +723,13 @@ st3:
|
|
670
723
|
if ( ++p == pe )
|
671
724
|
goto _test_eof3;
|
672
725
|
case 3:
|
673
|
-
#line
|
726
|
+
#line 727 "ext/fastcsv/fastcsv.c"
|
674
727
|
switch( (*p) ) {
|
675
|
-
case 0: goto
|
676
|
-
case 10: goto
|
677
|
-
case 13: goto
|
728
|
+
case 0: goto tr9;
|
729
|
+
case 10: goto tr10;
|
730
|
+
case 13: goto tr11;
|
678
731
|
case 34: goto st2;
|
679
|
-
case 44: goto
|
732
|
+
case 44: goto tr12;
|
680
733
|
}
|
681
734
|
goto st0;
|
682
735
|
}
|
@@ -686,7 +739,6 @@ case 3:
|
|
686
739
|
_test_eof6: cs = 6; goto _test_eof;
|
687
740
|
_test_eof7: cs = 7; goto _test_eof;
|
688
741
|
_test_eof8: cs = 8; goto _test_eof;
|
689
|
-
_test_eof9: cs = 9; goto _test_eof;
|
690
742
|
_test_eof2: cs = 2; goto _test_eof;
|
691
743
|
_test_eof3: cs = 3; goto _test_eof;
|
692
744
|
|
@@ -695,32 +747,26 @@ case 3:
|
|
695
747
|
{
|
696
748
|
switch ( cs ) {
|
697
749
|
case 1: goto tr0;
|
698
|
-
case 5: goto
|
699
|
-
case 6: goto
|
700
|
-
case 7: goto
|
701
|
-
case 8: goto
|
702
|
-
case 9: goto tr16;
|
750
|
+
case 5: goto tr15;
|
751
|
+
case 6: goto tr16;
|
752
|
+
case 7: goto tr16;
|
753
|
+
case 8: goto tr15;
|
703
754
|
}
|
704
755
|
}
|
705
756
|
|
706
757
|
_out: {}
|
707
758
|
}
|
708
759
|
|
709
|
-
#line
|
760
|
+
#line 427 "ext/fastcsv/fastcsv.rl"
|
761
|
+
|
762
|
+
if (done && cs < raw_parse_first_final) {
|
763
|
+
FREE;
|
710
764
|
|
711
|
-
if (done && cs < fastcsv_first_final) {
|
712
|
-
if (buf != NULL) {
|
713
|
-
free(buf);
|
714
|
-
}
|
715
765
|
if (unclosed_line) {
|
716
|
-
rb_raise(
|
766
|
+
rb_raise(eError, "Unclosed quoted field on line %d.", unclosed_line);
|
717
767
|
}
|
718
|
-
// Ruby raises different errors for illegal quoting, depending on whether
|
719
|
-
// a quoted string is followed by a string ("Unclosed quoted field on line
|
720
|
-
// %d.") or by a string ending in a quote ("Missing or stray quote in line
|
721
|
-
// %d"). These precisions are kind of bogus, but we can try using $!.
|
722
768
|
else {
|
723
|
-
rb_raise(
|
769
|
+
rb_raise(eError, "Illegal quoting in line %d.", curline);
|
724
770
|
}
|
725
771
|
}
|
726
772
|
|
@@ -735,23 +781,35 @@ case 3:
|
|
735
781
|
}
|
736
782
|
}
|
737
783
|
|
738
|
-
|
739
|
-
free(buf);
|
740
|
-
}
|
784
|
+
FREE;
|
741
785
|
|
742
786
|
return Qnil;
|
743
787
|
}
|
744
788
|
|
789
|
+
// @see https://github.com/ruby/ruby/blob/trunk/README.EXT#L616
|
790
|
+
static VALUE allocate(VALUE class) {
|
791
|
+
// @see https://github.com/nofxx/georuby_c/blob/b3b91fd90980d7c295ac8f6012d89878ea7cd569/ext/line.c#L66
|
792
|
+
Data *d = ALLOC(Data);
|
793
|
+
d->start = 0;
|
794
|
+
// @see https://github.com/nofxx/georuby_c/blob/b3b91fd90980d7c295ac8f6012d89878ea7cd569/ext/point.h#L26
|
795
|
+
// rb_gc_mark(d->start) or rb_gc_mark(d) cause warning "passing argument 1 of ‘rb_gc_mark’ makes integer from pointer without a cast"
|
796
|
+
// free(d->start) causes error "pointer being freed was not allocated"
|
797
|
+
return Data_Wrap_Struct(class, NULL, free, d);
|
798
|
+
}
|
799
|
+
|
800
|
+
// @see http://tenderlovemaking.com/2009/12/18/writing-ruby-c-extensions-part-1.html
|
801
|
+
// @see http://tenderlovemaking.com/2010/12/11/writing-ruby-c-extensions-part-2.html
|
745
802
|
void Init_fastcsv() {
|
746
803
|
s_read = rb_intern("read");
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
rb_define_attr(
|
755
|
-
|
756
|
-
|
804
|
+
s_row = rb_intern("@row");
|
805
|
+
|
806
|
+
cClass = rb_define_class("FastCSV", rb_const_get(rb_cObject, rb_intern("CSV"))); // class FastCSV < CSV
|
807
|
+
cParser = rb_define_class_under(cClass, "Parser", rb_cObject); // class Parser
|
808
|
+
rb_define_alloc_func(cParser, allocate); //
|
809
|
+
rb_define_method(cParser, "raw_parse", raw_parse, -1); // def raw_parse(port, opts = nil); end
|
810
|
+
rb_define_attr(cParser, "row", 1, 0); // attr_reader :row
|
811
|
+
rb_define_attr(cParser, "buffer_size", 1, 1); // attr_accessor :buffer_size
|
812
|
+
// end
|
813
|
+
eError = rb_define_class_under(cClass, "MalformedCSVError", rb_eRuntimeError); // class MalformedCSVError < RuntimeError
|
814
|
+
// end
|
757
815
|
}
|