fastcsv 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/fastcsv/fastcsv.c +185 -125
- data/ext/fastcsv/fastcsv.rl +130 -80
- data/fastcsv.gemspec +1 -1
- data/spec/fastcsv_spec.rb +69 -43
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 129c6ed1d3b30a44456108f280a582ccdaac96e9
|
4
|
+
data.tar.gz: 4d819f3bb6e637cb5fb3e130c378583202f8d3ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a960b458260e864346755a7b00afca9735e8851f70b9ebe3c4d95e1c5300c016fda9b1db5ff7c39cfcc288fdfa51ec038b5796eb12d01c8a7a7cb0d24ae1fe3
|
7
|
+
data.tar.gz: 76612ddd0aedef55ca914a5de6b141d9d274c395ec3b9fcc28897e4ca2762ade22759e078446f6ef8ee673ff96954f328e9d23a645e7ba89fe0168ae75e6e7dc
|
data/ext/fastcsv/fastcsv.c
CHANGED
@@ -16,24 +16,20 @@
|
|
16
16
|
// Ragel help.
|
17
17
|
// https://www.mail-archive.com/ragel-users@complang.org/
|
18
18
|
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
} \
|
24
|
-
else { \
|
25
|
-
rb_enc_associate_index(field, rb_enc_to_index(external_encoding)); \
|
26
|
-
}
|
19
|
+
#define ENCODE \
|
20
|
+
if (enc2 != NULL) { \
|
21
|
+
field = rb_str_encode(field, rb_enc_from_encoding(enc), 0, Qnil); \
|
22
|
+
}
|
27
23
|
|
28
24
|
static VALUE mModule, rb_eParseError;
|
29
|
-
static ID s_read, s_to_str;
|
25
|
+
static ID s_read, s_to_str, s_internal_encoding, s_external_encoding, s_string, s_encoding;
|
30
26
|
|
31
27
|
|
32
|
-
#line
|
28
|
+
#line 125 "ext/fastcsv/fastcsv.rl"
|
33
29
|
|
34
30
|
|
35
31
|
|
36
|
-
#line
|
32
|
+
#line 33 "ext/fastcsv/fastcsv.c"
|
37
33
|
static const int fastcsv_start = 4;
|
38
34
|
static const int fastcsv_first_final = 4;
|
39
35
|
static const int fastcsv_error = 0;
|
@@ -41,10 +37,39 @@ static const int fastcsv_error = 0;
|
|
41
37
|
static const int fastcsv_en_main = 4;
|
42
38
|
|
43
39
|
|
44
|
-
#line
|
40
|
+
#line 128 "ext/fastcsv/fastcsv.rl"
|
45
41
|
|
42
|
+
// 16 kB
|
46
43
|
#define BUFSIZE 16384
|
47
44
|
|
45
|
+
// @see http://rxr.whitequark.org/mri/source/io.c#4845
|
46
|
+
static void
|
47
|
+
rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
|
48
|
+
{
|
49
|
+
int default_ext = 0;
|
50
|
+
|
51
|
+
if (ext == NULL) {
|
52
|
+
ext = rb_default_external_encoding();
|
53
|
+
default_ext = 1;
|
54
|
+
}
|
55
|
+
if (ext == rb_ascii8bit_encoding()) {
|
56
|
+
/* If external is ASCII-8BIT, no transcoding */
|
57
|
+
intern = NULL;
|
58
|
+
}
|
59
|
+
else if (intern == NULL) {
|
60
|
+
intern = rb_default_internal_encoding();
|
61
|
+
}
|
62
|
+
if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) {
|
63
|
+
/* No internal encoding => use external + no transcoding */
|
64
|
+
*enc = (default_ext && intern != ext) ? NULL : ext;
|
65
|
+
*enc2 = NULL;
|
66
|
+
}
|
67
|
+
else {
|
68
|
+
*enc = intern;
|
69
|
+
*enc2 = ext;
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
48
73
|
VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
49
74
|
int cs, act, have = 0, curline = 1, io = 0;
|
50
75
|
char *ts = 0, *te = 0, *buf = 0, *eof = 0;
|
@@ -52,11 +77,11 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
52
77
|
VALUE port, opts;
|
53
78
|
VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
|
54
79
|
int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
|
55
|
-
|
56
|
-
|
80
|
+
rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL;
|
81
|
+
VALUE r_encoding;
|
57
82
|
|
58
83
|
VALUE option;
|
59
|
-
char quote_char = '"';
|
84
|
+
char quote_char = '"';
|
60
85
|
|
61
86
|
rb_scan_args(argc, argv, "11", &port, &opts);
|
62
87
|
taint = OBJ_TAINTED(port);
|
@@ -78,76 +103,111 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
78
103
|
rb_raise(rb_eArgError, "options has to be a Hash or nil");
|
79
104
|
}
|
80
105
|
|
81
|
-
// @
|
82
|
-
|
83
|
-
|
84
|
-
// if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) {
|
85
|
-
// quote_char = *StringValueCStr(option);
|
86
|
-
// }
|
87
|
-
// else if (!NIL_P(option)) {
|
88
|
-
// rb_raise(rb_eArgError, ":quote_char has to be a single character String");
|
89
|
-
// }
|
90
|
-
|
91
|
-
// option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep")));
|
92
|
-
// if (TYPE(option) == T_STRING) {
|
93
|
-
// col_sep = StringValueCStr(option);
|
94
|
-
// }
|
95
|
-
// else if (!NIL_P(option)) {
|
96
|
-
// rb_raise(rb_eArgError, ":col_sep has to be a String");
|
97
|
-
// }
|
98
|
-
|
99
|
-
// option = rb_hash_aref(opts, ID2SYM(rb_intern("row_sep")));
|
100
|
-
// if (TYPE(option) == T_STRING) {
|
101
|
-
// row_sep = StringValueCStr(option);
|
102
|
-
// }
|
103
|
-
// else if (!NIL_P(option)) {
|
104
|
-
// rb_raise(rb_eArgError, ":row_sep has to be a String");
|
105
|
-
// }
|
106
|
+
// @see rb_io_extract_modeenc
|
107
|
+
/* Set to defaults */
|
108
|
+
rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0);
|
106
109
|
|
110
|
+
// "enc" (internal) or "enc2:enc" (external:internal) or "enc:-" (external).
|
111
|
+
// We don't support binmode, which would force "ASCII-8BIT", or "BOM|UTF-*".
|
112
|
+
// @see http://ruby-doc.org/core-2.1.1/IO.html#method-c-new-label-Open+Mode
|
107
113
|
option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
|
108
114
|
if (TYPE(option) == T_STRING) {
|
109
|
-
//
|
110
|
-
const char *
|
111
|
-
char
|
115
|
+
// parse_mode_enc is not in header file.
|
116
|
+
const char *estr = StringValueCStr(option), *ptr;
|
117
|
+
char encname[ENCODING_MAXNAMELEN+1];
|
118
|
+
int idx, idx2;
|
119
|
+
rb_encoding *ext_enc, *int_enc;
|
120
|
+
|
121
|
+
/* parse estr as "enc" or "enc2:enc" or "enc:-" */
|
112
122
|
|
113
|
-
|
114
|
-
if (
|
115
|
-
long len = (
|
123
|
+
ptr = strrchr(estr, ':');
|
124
|
+
if (ptr) {
|
125
|
+
long len = (ptr++) - estr;
|
116
126
|
if (len == 0 || len > ENCODING_MAXNAMELEN) {
|
117
|
-
|
127
|
+
idx = -1;
|
118
128
|
}
|
119
129
|
else {
|
120
|
-
memcpy(
|
121
|
-
|
122
|
-
|
123
|
-
|
130
|
+
memcpy(encname, estr, len);
|
131
|
+
encname[len] = '\0';
|
132
|
+
estr = encname;
|
133
|
+
idx = rb_enc_find_index(encname);
|
124
134
|
}
|
125
135
|
}
|
126
136
|
else {
|
127
|
-
|
137
|
+
idx = rb_enc_find_index(estr);
|
128
138
|
}
|
129
139
|
|
130
|
-
if (
|
131
|
-
|
140
|
+
if (idx >= 0) {
|
141
|
+
ext_enc = rb_enc_from_index(idx);
|
142
|
+
}
|
143
|
+
else {
|
144
|
+
if (idx != -2) {
|
145
|
+
// `unsupported_encoding` is not in header file.
|
146
|
+
rb_warn("Unsupported encoding %s ignored", estr);
|
147
|
+
}
|
148
|
+
ext_enc = NULL;
|
132
149
|
}
|
133
150
|
|
134
|
-
|
135
|
-
|
136
|
-
if (
|
137
|
-
|
151
|
+
int_enc = NULL;
|
152
|
+
if (ptr) {
|
153
|
+
if (*ptr == '-' && *(ptr+1) == '\0') {
|
154
|
+
/* Special case - "-" => no transcoding */
|
155
|
+
int_enc = (rb_encoding *)Qnil;
|
138
156
|
}
|
139
157
|
else {
|
140
|
-
|
158
|
+
idx2 = rb_enc_find_index(ptr);
|
159
|
+
if (idx2 < 0) {
|
160
|
+
// `unsupported_encoding` is not in header file.
|
161
|
+
rb_warn("Unsupported encoding %s ignored", ptr);
|
162
|
+
}
|
163
|
+
else if (idx2 == idx) {
|
164
|
+
int_enc = (rb_encoding *)Qnil;
|
165
|
+
}
|
166
|
+
else {
|
167
|
+
int_enc = rb_enc_from_index(idx2);
|
168
|
+
}
|
141
169
|
}
|
142
170
|
}
|
143
|
-
|
144
|
-
|
145
|
-
}
|
171
|
+
|
172
|
+
rb_io_ext_int_to_encs(ext_enc, int_enc, &enc, &enc2, 0);
|
146
173
|
}
|
147
174
|
else if (!NIL_P(option)) {
|
148
175
|
rb_raise(rb_eArgError, ":encoding has to be a String");
|
149
176
|
}
|
150
177
|
|
178
|
+
// @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L1567
|
179
|
+
// @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L2300
|
180
|
+
if (rb_respond_to(port, s_internal_encoding)) {
|
181
|
+
r_encoding = rb_funcall(port, s_internal_encoding, 0);
|
182
|
+
if (NIL_P(r_encoding)) {
|
183
|
+
r_encoding = rb_funcall(port, s_external_encoding, 0);
|
184
|
+
}
|
185
|
+
}
|
186
|
+
else if (rb_respond_to(port, s_string)) {
|
187
|
+
r_encoding = rb_funcall(rb_funcall(port, s_string, 0), s_encoding, 0);
|
188
|
+
}
|
189
|
+
else if (rb_respond_to(port, s_encoding)) {
|
190
|
+
r_encoding = rb_funcall(port, s_encoding, 0);
|
191
|
+
}
|
192
|
+
else {
|
193
|
+
r_encoding = rb_enc_from_encoding(rb_ascii8bit_encoding());
|
194
|
+
}
|
195
|
+
if (NIL_P(r_encoding)) {
|
196
|
+
r_encoding = rb_enc_from_encoding(rb_default_internal_encoding());
|
197
|
+
}
|
198
|
+
if (NIL_P(r_encoding)) {
|
199
|
+
r_encoding = rb_enc_from_encoding(rb_default_external_encoding());
|
200
|
+
}
|
201
|
+
if (enc2 != NULL) {
|
202
|
+
encoding = enc2;
|
203
|
+
}
|
204
|
+
else if (enc != NULL) {
|
205
|
+
encoding = enc;
|
206
|
+
}
|
207
|
+
else if (!NIL_P(r_encoding)) {
|
208
|
+
encoding = rb_enc_get(r_encoding);
|
209
|
+
}
|
210
|
+
|
151
211
|
buffer_size = BUFSIZE;
|
152
212
|
if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
|
153
213
|
bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
|
@@ -161,7 +221,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
161
221
|
}
|
162
222
|
|
163
223
|
|
164
|
-
#line
|
224
|
+
#line 225 "ext/fastcsv/fastcsv.c"
|
165
225
|
{
|
166
226
|
cs = fastcsv_start;
|
167
227
|
ts = 0;
|
@@ -169,7 +229,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
169
229
|
act = 0;
|
170
230
|
}
|
171
231
|
|
172
|
-
#line
|
232
|
+
#line 311 "ext/fastcsv/fastcsv.rl"
|
173
233
|
|
174
234
|
while (!done) {
|
175
235
|
VALUE str;
|
@@ -217,12 +277,8 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
217
277
|
}
|
218
278
|
|
219
279
|
pe = p + len;
|
220
|
-
// if (done) {
|
221
|
-
// // This triggers the eof action in the non-scanner version.
|
222
|
-
// eof = pe;
|
223
|
-
// }
|
224
280
|
|
225
|
-
#line
|
281
|
+
#line 282 "ext/fastcsv/fastcsv.c"
|
226
282
|
{
|
227
283
|
if ( p == pe )
|
228
284
|
goto _test_eof;
|
@@ -241,7 +297,7 @@ tr0:
|
|
241
297
|
}
|
242
298
|
goto st4;
|
243
299
|
tr10:
|
244
|
-
#line
|
300
|
+
#line 101 "ext/fastcsv/fastcsv.rl"
|
245
301
|
{
|
246
302
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
247
303
|
rb_ary_push(row, field);
|
@@ -250,19 +306,19 @@ tr10:
|
|
250
306
|
rb_yield(row);
|
251
307
|
}
|
252
308
|
}
|
253
|
-
#line
|
309
|
+
#line 123 "ext/fastcsv/fastcsv.rl"
|
254
310
|
{te = p+1;}
|
255
311
|
goto st4;
|
256
312
|
tr16:
|
257
|
-
#line
|
313
|
+
#line 123 "ext/fastcsv/fastcsv.rl"
|
258
314
|
{te = p;p--;}
|
259
315
|
goto st4;
|
260
316
|
tr17:
|
261
|
-
#line
|
317
|
+
#line 122 "ext/fastcsv/fastcsv.rl"
|
262
318
|
{te = p;p--;}
|
263
319
|
goto st4;
|
264
320
|
tr18:
|
265
|
-
#line
|
321
|
+
#line 101 "ext/fastcsv/fastcsv.rl"
|
266
322
|
{
|
267
323
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
268
324
|
rb_ary_push(row, field);
|
@@ -271,15 +327,15 @@ tr18:
|
|
271
327
|
rb_yield(row);
|
272
328
|
}
|
273
329
|
}
|
274
|
-
#line
|
330
|
+
#line 122 "ext/fastcsv/fastcsv.rl"
|
275
331
|
{te = p+1;}
|
276
332
|
goto st4;
|
277
333
|
tr20:
|
278
|
-
#line
|
334
|
+
#line 121 "ext/fastcsv/fastcsv.rl"
|
279
335
|
{te = p;p--;}
|
280
336
|
goto st4;
|
281
337
|
tr21:
|
282
|
-
#line
|
338
|
+
#line 101 "ext/fastcsv/fastcsv.rl"
|
283
339
|
{
|
284
340
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
285
341
|
rb_ary_push(row, field);
|
@@ -288,7 +344,7 @@ tr21:
|
|
288
344
|
rb_yield(row);
|
289
345
|
}
|
290
346
|
}
|
291
|
-
#line
|
347
|
+
#line 121 "ext/fastcsv/fastcsv.rl"
|
292
348
|
{te = p+1;}
|
293
349
|
goto st4;
|
294
350
|
st4:
|
@@ -301,7 +357,7 @@ st4:
|
|
301
357
|
case 4:
|
302
358
|
#line 1 "NONE"
|
303
359
|
{ts = p;}
|
304
|
-
#line
|
360
|
+
#line 361 "ext/fastcsv/fastcsv.c"
|
305
361
|
switch( (*p) ) {
|
306
362
|
case 0: goto tr14;
|
307
363
|
case 10: goto tr3;
|
@@ -325,18 +381,18 @@ case 1:
|
|
325
381
|
tr2:
|
326
382
|
#line 1 "NONE"
|
327
383
|
{te = p+1;}
|
328
|
-
#line
|
384
|
+
#line 40 "ext/fastcsv/fastcsv.rl"
|
329
385
|
{
|
330
386
|
if (p == ts) {
|
331
387
|
// Unquoted empty fields are nil, not "", in Ruby.
|
332
388
|
field = Qnil;
|
333
389
|
}
|
334
390
|
else if (p > ts) {
|
335
|
-
field =
|
336
|
-
|
391
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
392
|
+
ENCODE;
|
337
393
|
}
|
338
394
|
}
|
339
|
-
#line
|
395
|
+
#line 101 "ext/fastcsv/fastcsv.rl"
|
340
396
|
{
|
341
397
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
342
398
|
rb_ary_push(row, field);
|
@@ -345,14 +401,14 @@ tr2:
|
|
345
401
|
rb_yield(row);
|
346
402
|
}
|
347
403
|
}
|
348
|
-
#line
|
404
|
+
#line 123 "ext/fastcsv/fastcsv.rl"
|
349
405
|
{act = 3;}
|
350
406
|
goto st5;
|
351
407
|
st5:
|
352
408
|
if ( ++p == pe )
|
353
409
|
goto _test_eof5;
|
354
410
|
case 5:
|
355
|
-
#line
|
411
|
+
#line 412 "ext/fastcsv/fastcsv.c"
|
356
412
|
switch( (*p) ) {
|
357
413
|
case 0: goto tr2;
|
358
414
|
case 10: goto tr3;
|
@@ -362,18 +418,18 @@ case 5:
|
|
362
418
|
}
|
363
419
|
goto st1;
|
364
420
|
tr3:
|
365
|
-
#line
|
421
|
+
#line 40 "ext/fastcsv/fastcsv.rl"
|
366
422
|
{
|
367
423
|
if (p == ts) {
|
368
424
|
// Unquoted empty fields are nil, not "", in Ruby.
|
369
425
|
field = Qnil;
|
370
426
|
}
|
371
427
|
else if (p > ts) {
|
372
|
-
field =
|
373
|
-
|
428
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
429
|
+
ENCODE;
|
374
430
|
}
|
375
431
|
}
|
376
|
-
#line
|
432
|
+
#line 91 "ext/fastcsv/fastcsv.rl"
|
377
433
|
{
|
378
434
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
379
435
|
rb_ary_push(row, field);
|
@@ -383,19 +439,19 @@ tr3:
|
|
383
439
|
rb_yield(row);
|
384
440
|
row = rb_ary_new();
|
385
441
|
}
|
386
|
-
#line
|
442
|
+
#line 28 "ext/fastcsv/fastcsv.rl"
|
387
443
|
{
|
388
444
|
curline++;
|
389
445
|
}
|
390
446
|
goto st6;
|
391
447
|
tr19:
|
392
|
-
#line
|
448
|
+
#line 28 "ext/fastcsv/fastcsv.rl"
|
393
449
|
{
|
394
450
|
curline++;
|
395
451
|
}
|
396
452
|
goto st6;
|
397
453
|
tr11:
|
398
|
-
#line
|
454
|
+
#line 91 "ext/fastcsv/fastcsv.rl"
|
399
455
|
{
|
400
456
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
401
457
|
rb_ary_push(row, field);
|
@@ -405,7 +461,7 @@ tr11:
|
|
405
461
|
rb_yield(row);
|
406
462
|
row = rb_ary_new();
|
407
463
|
}
|
408
|
-
#line
|
464
|
+
#line 28 "ext/fastcsv/fastcsv.rl"
|
409
465
|
{
|
410
466
|
curline++;
|
411
467
|
}
|
@@ -414,23 +470,23 @@ st6:
|
|
414
470
|
if ( ++p == pe )
|
415
471
|
goto _test_eof6;
|
416
472
|
case 6:
|
417
|
-
#line
|
473
|
+
#line 474 "ext/fastcsv/fastcsv.c"
|
418
474
|
if ( (*p) == 0 )
|
419
475
|
goto tr18;
|
420
476
|
goto tr17;
|
421
477
|
tr4:
|
422
|
-
#line
|
478
|
+
#line 40 "ext/fastcsv/fastcsv.rl"
|
423
479
|
{
|
424
480
|
if (p == ts) {
|
425
481
|
// Unquoted empty fields are nil, not "", in Ruby.
|
426
482
|
field = Qnil;
|
427
483
|
}
|
428
484
|
else if (p > ts) {
|
429
|
-
field =
|
430
|
-
|
485
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
486
|
+
ENCODE;
|
431
487
|
}
|
432
488
|
}
|
433
|
-
#line
|
489
|
+
#line 91 "ext/fastcsv/fastcsv.rl"
|
434
490
|
{
|
435
491
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
436
492
|
rb_ary_push(row, field);
|
@@ -440,13 +496,13 @@ tr4:
|
|
440
496
|
rb_yield(row);
|
441
497
|
row = rb_ary_new();
|
442
498
|
}
|
443
|
-
#line
|
499
|
+
#line 28 "ext/fastcsv/fastcsv.rl"
|
444
500
|
{
|
445
501
|
curline++;
|
446
502
|
}
|
447
503
|
goto st7;
|
448
504
|
tr12:
|
449
|
-
#line
|
505
|
+
#line 91 "ext/fastcsv/fastcsv.rl"
|
450
506
|
{
|
451
507
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
452
508
|
rb_ary_push(row, field);
|
@@ -456,7 +512,7 @@ tr12:
|
|
456
512
|
rb_yield(row);
|
457
513
|
row = rb_ary_new();
|
458
514
|
}
|
459
|
-
#line
|
515
|
+
#line 28 "ext/fastcsv/fastcsv.rl"
|
460
516
|
{
|
461
517
|
curline++;
|
462
518
|
}
|
@@ -465,32 +521,32 @@ st7:
|
|
465
521
|
if ( ++p == pe )
|
466
522
|
goto _test_eof7;
|
467
523
|
case 7:
|
468
|
-
#line
|
524
|
+
#line 525 "ext/fastcsv/fastcsv.c"
|
469
525
|
switch( (*p) ) {
|
470
526
|
case 0: goto tr18;
|
471
527
|
case 10: goto tr19;
|
472
528
|
}
|
473
529
|
goto tr17;
|
474
530
|
tr5:
|
475
|
-
#line
|
531
|
+
#line 40 "ext/fastcsv/fastcsv.rl"
|
476
532
|
{
|
477
533
|
if (p == ts) {
|
478
534
|
// Unquoted empty fields are nil, not "", in Ruby.
|
479
535
|
field = Qnil;
|
480
536
|
}
|
481
537
|
else if (p > ts) {
|
482
|
-
field =
|
483
|
-
|
538
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
539
|
+
ENCODE;
|
484
540
|
}
|
485
541
|
}
|
486
|
-
#line
|
542
|
+
#line 86 "ext/fastcsv/fastcsv.rl"
|
487
543
|
{
|
488
544
|
rb_ary_push(row, field);
|
489
545
|
field = Qnil;
|
490
546
|
}
|
491
547
|
goto st8;
|
492
548
|
tr13:
|
493
|
-
#line
|
549
|
+
#line 86 "ext/fastcsv/fastcsv.rl"
|
494
550
|
{
|
495
551
|
rb_ary_push(row, field);
|
496
552
|
field = Qnil;
|
@@ -500,14 +556,14 @@ st8:
|
|
500
556
|
if ( ++p == pe )
|
501
557
|
goto _test_eof8;
|
502
558
|
case 8:
|
503
|
-
#line
|
559
|
+
#line 560 "ext/fastcsv/fastcsv.c"
|
504
560
|
if ( (*p) == 0 )
|
505
561
|
goto tr21;
|
506
562
|
goto tr20;
|
507
563
|
tr14:
|
508
564
|
#line 1 "NONE"
|
509
565
|
{te = p+1;}
|
510
|
-
#line
|
566
|
+
#line 101 "ext/fastcsv/fastcsv.rl"
|
511
567
|
{
|
512
568
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
513
569
|
rb_ary_push(row, field);
|
@@ -516,25 +572,25 @@ tr14:
|
|
516
572
|
rb_yield(row);
|
517
573
|
}
|
518
574
|
}
|
519
|
-
#line
|
575
|
+
#line 40 "ext/fastcsv/fastcsv.rl"
|
520
576
|
{
|
521
577
|
if (p == ts) {
|
522
578
|
// Unquoted empty fields are nil, not "", in Ruby.
|
523
579
|
field = Qnil;
|
524
580
|
}
|
525
581
|
else if (p > ts) {
|
526
|
-
field =
|
527
|
-
|
582
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
583
|
+
ENCODE;
|
528
584
|
}
|
529
585
|
}
|
530
|
-
#line
|
586
|
+
#line 123 "ext/fastcsv/fastcsv.rl"
|
531
587
|
{act = 3;}
|
532
588
|
goto st9;
|
533
589
|
st9:
|
534
590
|
if ( ++p == pe )
|
535
591
|
goto _test_eof9;
|
536
592
|
case 9:
|
537
|
-
#line
|
593
|
+
#line 594 "ext/fastcsv/fastcsv.c"
|
538
594
|
switch( (*p) ) {
|
539
595
|
case 10: goto tr16;
|
540
596
|
case 13: goto tr16;
|
@@ -543,13 +599,13 @@ case 9:
|
|
543
599
|
}
|
544
600
|
goto st1;
|
545
601
|
tr8:
|
546
|
-
#line
|
602
|
+
#line 28 "ext/fastcsv/fastcsv.rl"
|
547
603
|
{
|
548
604
|
curline++;
|
549
605
|
}
|
550
606
|
goto st2;
|
551
607
|
tr15:
|
552
|
-
#line
|
608
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
553
609
|
{
|
554
610
|
unclosed_line = curline;
|
555
611
|
}
|
@@ -558,7 +614,7 @@ st2:
|
|
558
614
|
if ( ++p == pe )
|
559
615
|
goto _test_eof2;
|
560
616
|
case 2:
|
561
|
-
#line
|
617
|
+
#line 618 "ext/fastcsv/fastcsv.c"
|
562
618
|
switch( (*p) ) {
|
563
619
|
case 0: goto st0;
|
564
620
|
case 10: goto tr8;
|
@@ -570,11 +626,11 @@ st0:
|
|
570
626
|
cs = 0;
|
571
627
|
goto _out;
|
572
628
|
tr9:
|
573
|
-
#line
|
629
|
+
#line 51 "ext/fastcsv/fastcsv.rl"
|
574
630
|
{
|
575
631
|
if (p == ts) {
|
576
|
-
field =
|
577
|
-
|
632
|
+
field = rb_enc_str_new("", 0, encoding);
|
633
|
+
ENCODE;
|
578
634
|
}
|
579
635
|
// @note If we add an action on '""', we can skip some steps if no '""' is found.
|
580
636
|
else if (p > ts) {
|
@@ -597,15 +653,15 @@ tr9:
|
|
597
653
|
reader++;
|
598
654
|
}
|
599
655
|
|
600
|
-
field =
|
601
|
-
|
656
|
+
field = rb_enc_str_new(copy, writer - copy, enc);
|
657
|
+
ENCODE;
|
602
658
|
|
603
659
|
if (copy != NULL) {
|
604
660
|
free(copy);
|
605
661
|
}
|
606
662
|
}
|
607
663
|
}
|
608
|
-
#line
|
664
|
+
#line 36 "ext/fastcsv/fastcsv.rl"
|
609
665
|
{
|
610
666
|
unclosed_line = 0;
|
611
667
|
}
|
@@ -614,7 +670,7 @@ st3:
|
|
614
670
|
if ( ++p == pe )
|
615
671
|
goto _test_eof3;
|
616
672
|
case 3:
|
617
|
-
#line
|
673
|
+
#line 674 "ext/fastcsv/fastcsv.c"
|
618
674
|
switch( (*p) ) {
|
619
675
|
case 0: goto tr10;
|
620
676
|
case 10: goto tr11;
|
@@ -650,7 +706,7 @@ case 3:
|
|
650
706
|
_out: {}
|
651
707
|
}
|
652
708
|
|
653
|
-
#line
|
709
|
+
#line 359 "ext/fastcsv/fastcsv.rl"
|
654
710
|
|
655
711
|
if (done && cs < fastcsv_first_final) {
|
656
712
|
if (buf != NULL) {
|
@@ -689,6 +745,10 @@ case 3:
|
|
689
745
|
void Init_fastcsv() {
|
690
746
|
s_read = rb_intern("read");
|
691
747
|
s_to_str = rb_intern("to_str");
|
748
|
+
s_internal_encoding = rb_intern("internal_encoding");
|
749
|
+
s_external_encoding = rb_intern("external_encoding");
|
750
|
+
s_string = rb_intern("string");
|
751
|
+
s_encoding = rb_intern("encoding");
|
692
752
|
|
693
753
|
mModule = rb_define_module("FastCSV");
|
694
754
|
rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
|
data/ext/fastcsv/fastcsv.rl
CHANGED
@@ -14,17 +14,13 @@
|
|
14
14
|
// Ragel help.
|
15
15
|
// https://www.mail-archive.com/ragel-users@complang.org/
|
16
16
|
|
17
|
-
#
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
} \
|
22
|
-
else { \
|
23
|
-
rb_enc_associate_index(field, rb_enc_to_index(external_encoding)); \
|
24
|
-
}
|
17
|
+
#define ENCODE \
|
18
|
+
if (enc2 != NULL) { \
|
19
|
+
field = rb_str_encode(field, rb_enc_from_encoding(enc), 0, Qnil); \
|
20
|
+
}
|
25
21
|
|
26
22
|
static VALUE mModule, rb_eParseError;
|
27
|
-
static ID s_read, s_to_str;
|
23
|
+
static ID s_read, s_to_str, s_internal_encoding, s_external_encoding, s_string, s_encoding;
|
28
24
|
|
29
25
|
%%{
|
30
26
|
machine fastcsv;
|
@@ -47,15 +43,15 @@ static ID s_read, s_to_str;
|
|
47
43
|
field = Qnil;
|
48
44
|
}
|
49
45
|
else if (p > ts) {
|
50
|
-
field =
|
51
|
-
|
46
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
47
|
+
ENCODE;
|
52
48
|
}
|
53
49
|
}
|
54
50
|
|
55
51
|
action read_quoted {
|
56
52
|
if (p == ts) {
|
57
|
-
field =
|
58
|
-
|
53
|
+
field = rb_enc_str_new("", 0, encoding);
|
54
|
+
ENCODE;
|
59
55
|
}
|
60
56
|
// @note If we add an action on '""', we can skip some steps if no '""' is found.
|
61
57
|
else if (p > ts) {
|
@@ -78,8 +74,8 @@ static ID s_read, s_to_str;
|
|
78
74
|
reader++;
|
79
75
|
}
|
80
76
|
|
81
|
-
field =
|
82
|
-
|
77
|
+
field = rb_enc_str_new(copy, writer - copy, enc);
|
78
|
+
ENCODE;
|
83
79
|
|
84
80
|
if (copy != NULL) {
|
85
81
|
free(copy);
|
@@ -118,30 +114,49 @@ static ID s_read, s_to_str;
|
|
118
114
|
unquoted = (any* -- quote_char -- col_sep -- row_sep - EOF) %read_unquoted;
|
119
115
|
quoted = quote_char >open_quote (any - quote_char - EOF | quote_char quote_char | row_sep)* %read_quoted quote_char >close_quote;
|
120
116
|
field = unquoted | quoted;
|
121
|
-
# fields = (field col_sep)* field?;
|
122
|
-
# file = (fields row_sep >new_row)* fields?;
|
123
117
|
|
124
118
|
# @see Ragel Guide: 6.3 Scanners
|
125
|
-
#
|
119
|
+
# An unquoted field can be zero-length.
|
126
120
|
main := |*
|
127
121
|
field col_sep EOF?;
|
128
122
|
field row_sep >new_row EOF?;
|
129
123
|
field EOF;
|
130
124
|
*|;
|
131
|
-
|
132
|
-
# Non-scanner version requires very large buffer.
|
133
|
-
# main := file $/{
|
134
|
-
# if (!NIL_P(field) || RARRAY_LEN(row)) {
|
135
|
-
# rb_ary_push(row, field);
|
136
|
-
# rb_yield(row);
|
137
|
-
# }
|
138
|
-
# };
|
139
125
|
}%%
|
140
126
|
|
141
127
|
%% write data;
|
142
128
|
|
129
|
+
// 16 kB
|
143
130
|
#define BUFSIZE 16384
|
144
131
|
|
132
|
+
// @see http://rxr.whitequark.org/mri/source/io.c#4845
|
133
|
+
static void
|
134
|
+
rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
|
135
|
+
{
|
136
|
+
int default_ext = 0;
|
137
|
+
|
138
|
+
if (ext == NULL) {
|
139
|
+
ext = rb_default_external_encoding();
|
140
|
+
default_ext = 1;
|
141
|
+
}
|
142
|
+
if (ext == rb_ascii8bit_encoding()) {
|
143
|
+
/* If external is ASCII-8BIT, no transcoding */
|
144
|
+
intern = NULL;
|
145
|
+
}
|
146
|
+
else if (intern == NULL) {
|
147
|
+
intern = rb_default_internal_encoding();
|
148
|
+
}
|
149
|
+
if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) {
|
150
|
+
/* No internal encoding => use external + no transcoding */
|
151
|
+
*enc = (default_ext && intern != ext) ? NULL : ext;
|
152
|
+
*enc2 = NULL;
|
153
|
+
}
|
154
|
+
else {
|
155
|
+
*enc = intern;
|
156
|
+
*enc2 = ext;
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
145
160
|
VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
146
161
|
int cs, act, have = 0, curline = 1, io = 0;
|
147
162
|
char *ts = 0, *te = 0, *buf = 0, *eof = 0;
|
@@ -149,11 +164,11 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
149
164
|
VALUE port, opts;
|
150
165
|
VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
|
151
166
|
int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
|
152
|
-
|
153
|
-
|
167
|
+
rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL;
|
168
|
+
VALUE r_encoding;
|
154
169
|
|
155
170
|
VALUE option;
|
156
|
-
char quote_char = '"';
|
171
|
+
char quote_char = '"';
|
157
172
|
|
158
173
|
rb_scan_args(argc, argv, "11", &port, &opts);
|
159
174
|
taint = OBJ_TAINTED(port);
|
@@ -175,76 +190,111 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
175
190
|
rb_raise(rb_eArgError, "options has to be a Hash or nil");
|
176
191
|
}
|
177
192
|
|
178
|
-
// @
|
179
|
-
|
180
|
-
|
181
|
-
// if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) {
|
182
|
-
// quote_char = *StringValueCStr(option);
|
183
|
-
// }
|
184
|
-
// else if (!NIL_P(option)) {
|
185
|
-
// rb_raise(rb_eArgError, ":quote_char has to be a single character String");
|
186
|
-
// }
|
187
|
-
|
188
|
-
// option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep")));
|
189
|
-
// if (TYPE(option) == T_STRING) {
|
190
|
-
// col_sep = StringValueCStr(option);
|
191
|
-
// }
|
192
|
-
// else if (!NIL_P(option)) {
|
193
|
-
// rb_raise(rb_eArgError, ":col_sep has to be a String");
|
194
|
-
// }
|
195
|
-
|
196
|
-
// option = rb_hash_aref(opts, ID2SYM(rb_intern("row_sep")));
|
197
|
-
// if (TYPE(option) == T_STRING) {
|
198
|
-
// row_sep = StringValueCStr(option);
|
199
|
-
// }
|
200
|
-
// else if (!NIL_P(option)) {
|
201
|
-
// rb_raise(rb_eArgError, ":row_sep has to be a String");
|
202
|
-
// }
|
193
|
+
// @see rb_io_extract_modeenc
|
194
|
+
/* Set to defaults */
|
195
|
+
rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0);
|
203
196
|
|
197
|
+
// "enc" (internal) or "enc2:enc" (external:internal) or "enc:-" (external).
|
198
|
+
// We don't support binmode, which would force "ASCII-8BIT", or "BOM|UTF-*".
|
199
|
+
// @see http://ruby-doc.org/core-2.1.1/IO.html#method-c-new-label-Open+Mode
|
204
200
|
option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
|
205
201
|
if (TYPE(option) == T_STRING) {
|
206
|
-
//
|
207
|
-
const char *
|
208
|
-
char
|
202
|
+
// parse_mode_enc is not in header file.
|
203
|
+
const char *estr = StringValueCStr(option), *ptr;
|
204
|
+
char encname[ENCODING_MAXNAMELEN+1];
|
205
|
+
int idx, idx2;
|
206
|
+
rb_encoding *ext_enc, *int_enc;
|
207
|
+
|
208
|
+
/* parse estr as "enc" or "enc2:enc" or "enc:-" */
|
209
209
|
|
210
|
-
|
211
|
-
if (
|
212
|
-
long len = (
|
210
|
+
ptr = strrchr(estr, ':');
|
211
|
+
if (ptr) {
|
212
|
+
long len = (ptr++) - estr;
|
213
213
|
if (len == 0 || len > ENCODING_MAXNAMELEN) {
|
214
|
-
|
214
|
+
idx = -1;
|
215
215
|
}
|
216
216
|
else {
|
217
|
-
memcpy(
|
218
|
-
|
219
|
-
|
220
|
-
|
217
|
+
memcpy(encname, estr, len);
|
218
|
+
encname[len] = '\0';
|
219
|
+
estr = encname;
|
220
|
+
idx = rb_enc_find_index(encname);
|
221
221
|
}
|
222
222
|
}
|
223
223
|
else {
|
224
|
-
|
224
|
+
idx = rb_enc_find_index(estr);
|
225
225
|
}
|
226
226
|
|
227
|
-
if (
|
228
|
-
|
227
|
+
if (idx >= 0) {
|
228
|
+
ext_enc = rb_enc_from_index(idx);
|
229
|
+
}
|
230
|
+
else {
|
231
|
+
if (idx != -2) {
|
232
|
+
// `unsupported_encoding` is not in header file.
|
233
|
+
rb_warn("Unsupported encoding %s ignored", estr);
|
234
|
+
}
|
235
|
+
ext_enc = NULL;
|
229
236
|
}
|
230
237
|
|
231
|
-
|
232
|
-
|
233
|
-
if (
|
234
|
-
|
238
|
+
int_enc = NULL;
|
239
|
+
if (ptr) {
|
240
|
+
if (*ptr == '-' && *(ptr+1) == '\0') {
|
241
|
+
/* Special case - "-" => no transcoding */
|
242
|
+
int_enc = (rb_encoding *)Qnil;
|
235
243
|
}
|
236
244
|
else {
|
237
|
-
|
245
|
+
idx2 = rb_enc_find_index(ptr);
|
246
|
+
if (idx2 < 0) {
|
247
|
+
// `unsupported_encoding` is not in header file.
|
248
|
+
rb_warn("Unsupported encoding %s ignored", ptr);
|
249
|
+
}
|
250
|
+
else if (idx2 == idx) {
|
251
|
+
int_enc = (rb_encoding *)Qnil;
|
252
|
+
}
|
253
|
+
else {
|
254
|
+
int_enc = rb_enc_from_index(idx2);
|
255
|
+
}
|
238
256
|
}
|
239
257
|
}
|
240
|
-
|
241
|
-
|
242
|
-
}
|
258
|
+
|
259
|
+
rb_io_ext_int_to_encs(ext_enc, int_enc, &enc, &enc2, 0);
|
243
260
|
}
|
244
261
|
else if (!NIL_P(option)) {
|
245
262
|
rb_raise(rb_eArgError, ":encoding has to be a String");
|
246
263
|
}
|
247
264
|
|
265
|
+
// @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L1567
|
266
|
+
// @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L2300
|
267
|
+
if (rb_respond_to(port, s_internal_encoding)) {
|
268
|
+
r_encoding = rb_funcall(port, s_internal_encoding, 0);
|
269
|
+
if (NIL_P(r_encoding)) {
|
270
|
+
r_encoding = rb_funcall(port, s_external_encoding, 0);
|
271
|
+
}
|
272
|
+
}
|
273
|
+
else if (rb_respond_to(port, s_string)) {
|
274
|
+
r_encoding = rb_funcall(rb_funcall(port, s_string, 0), s_encoding, 0);
|
275
|
+
}
|
276
|
+
else if (rb_respond_to(port, s_encoding)) {
|
277
|
+
r_encoding = rb_funcall(port, s_encoding, 0);
|
278
|
+
}
|
279
|
+
else {
|
280
|
+
r_encoding = rb_enc_from_encoding(rb_ascii8bit_encoding());
|
281
|
+
}
|
282
|
+
if (NIL_P(r_encoding)) {
|
283
|
+
r_encoding = rb_enc_from_encoding(rb_default_internal_encoding());
|
284
|
+
}
|
285
|
+
if (NIL_P(r_encoding)) {
|
286
|
+
r_encoding = rb_enc_from_encoding(rb_default_external_encoding());
|
287
|
+
}
|
288
|
+
if (enc2 != NULL) {
|
289
|
+
encoding = enc2;
|
290
|
+
}
|
291
|
+
else if (enc != NULL) {
|
292
|
+
encoding = enc;
|
293
|
+
}
|
294
|
+
else if (!NIL_P(r_encoding)) {
|
295
|
+
encoding = rb_enc_get(r_encoding);
|
296
|
+
}
|
297
|
+
|
248
298
|
buffer_size = BUFSIZE;
|
249
299
|
if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
|
250
300
|
bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
|
@@ -305,10 +355,6 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
305
355
|
}
|
306
356
|
|
307
357
|
pe = p + len;
|
308
|
-
// if (done) {
|
309
|
-
// // This triggers the eof action in the non-scanner version.
|
310
|
-
// eof = pe;
|
311
|
-
// }
|
312
358
|
%% write exec;
|
313
359
|
|
314
360
|
if (done && cs < fastcsv_first_final) {
|
@@ -348,6 +394,10 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
348
394
|
void Init_fastcsv() {
|
349
395
|
s_read = rb_intern("read");
|
350
396
|
s_to_str = rb_intern("to_str");
|
397
|
+
s_internal_encoding = rb_intern("internal_encoding");
|
398
|
+
s_external_encoding = rb_intern("external_encoding");
|
399
|
+
s_string = rb_intern("string");
|
400
|
+
s_encoding = rb_intern("encoding");
|
351
401
|
|
352
402
|
mModule = rb_define_module("FastCSV");
|
353
403
|
rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
|
data/fastcsv.gemspec
CHANGED
data/spec/fastcsv_spec.rb
CHANGED
@@ -57,6 +57,9 @@ RSpec.shared_examples 'a CSV parser' do
|
|
57
57
|
%(foo,"bar\nbaz",bzz),
|
58
58
|
%(foo,"""bar""baz""bzz""",zzz),
|
59
59
|
|
60
|
+
# Single quotes.
|
61
|
+
%('foo','bar','baz'),
|
62
|
+
|
60
63
|
# Buffers.
|
61
64
|
"01234567890" * 2_000, # 20,000 > BUFSIZE
|
62
65
|
"0123456789," * 2_000,
|
@@ -68,7 +71,7 @@ RSpec.shared_examples 'a CSV parser' do
|
|
68
71
|
# Uneven data types.
|
69
72
|
"2000-01-01,2,x\nx,2000-01-01,2",
|
70
73
|
].each do |csv|
|
71
|
-
it "should parse: #{csv}" do
|
74
|
+
it "should parse: #{csv.inspect.gsub('\"', '"')}" do
|
72
75
|
expect(parse(csv)).to eq(CSV.parse(csv))
|
73
76
|
end
|
74
77
|
end
|
@@ -112,34 +115,45 @@ RSpec.shared_examples 'a CSV parser' do
|
|
112
115
|
end
|
113
116
|
end
|
114
117
|
|
118
|
+
it "should parse an encoded string" do
|
119
|
+
csv = "ß"
|
120
|
+
actual = parse(csv)
|
121
|
+
expected = CSV.parse(csv)
|
122
|
+
expect(actual[0][0].encoding).to eq(expected[0][0].encoding)
|
123
|
+
expect(actual).to eq(expected)
|
124
|
+
end
|
125
|
+
|
115
126
|
it 'should raise an error on mixed row separators are' do
|
116
|
-
|
117
|
-
expect{CSV.parse(csv)}.to raise_error(CSV::MalformedCSVError, 'Unquoted fields do not allow \r or \n (line 2).')
|
127
|
+
expect{CSV.parse("foo\rbar\nbaz\r\n")}.to raise_error(CSV::MalformedCSVError, 'Unquoted fields do not allow \r or \n (line 2).')
|
118
128
|
skip
|
119
129
|
end
|
120
130
|
|
121
|
-
|
122
|
-
|
123
|
-
|
131
|
+
context 'when initializing' do
|
132
|
+
it 'should raise an error if no block is given' do
|
133
|
+
expect{parse_without_block('x')}.to raise_error(LocalJumpError, 'no block given')
|
134
|
+
end
|
124
135
|
|
125
|
-
|
126
|
-
|
127
|
-
|
136
|
+
it 'should not raise an error if no block and empty input' do
|
137
|
+
expect{parse_without_block('')}.to_not raise_error
|
138
|
+
end
|
128
139
|
|
129
|
-
|
130
|
-
|
140
|
+
it 'should raise an error if the options are not a Hash or nil' do
|
141
|
+
expect{parse('', '')}.to raise_error(ArgumentError, 'options has to be a Hash or nil')
|
142
|
+
end
|
131
143
|
end
|
132
144
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
145
|
+
context 'when setting a buffer size' do
|
146
|
+
it 'should allow nil' do
|
147
|
+
FastCSV.buffer_size = nil
|
148
|
+
expect(parse(simple)).to eq(CSV.parse(simple))
|
149
|
+
FastCSV.buffer_size = nil
|
150
|
+
end
|
138
151
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
152
|
+
it 'should allow zero' do
|
153
|
+
FastCSV.buffer_size = 0
|
154
|
+
expect(parse(simple)).to eq(CSV.parse(simple))
|
155
|
+
FastCSV.buffer_size = nil
|
156
|
+
end
|
143
157
|
end
|
144
158
|
end
|
145
159
|
|
@@ -184,35 +198,47 @@ RSpec.describe FastCSV do
|
|
184
198
|
end
|
185
199
|
end
|
186
200
|
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
201
|
+
context 'with encoded strings' do
|
202
|
+
def parse_with_encoding(basename, encoding)
|
203
|
+
filename = File.expand_path(File.join('..', 'fixtures', basename), __FILE__)
|
204
|
+
options = {encoding: encoding}
|
205
|
+
File.open(filename) do |io|
|
206
|
+
rows = []
|
207
|
+
FastCSV.raw_parse(io, options){|row| rows << row}
|
208
|
+
expected = CSV.read(filename, options)
|
209
|
+
expect(rows[0][0].encoding).to eq(expected[0][0].encoding)
|
210
|
+
expect(rows).to eq(expected)
|
211
|
+
end
|
196
212
|
end
|
197
|
-
end
|
198
213
|
|
199
|
-
|
200
|
-
|
201
|
-
|
214
|
+
it 'should encode' do
|
215
|
+
parse_with_encoding('iso-8859-1.csv', 'iso-8859-1')
|
216
|
+
end
|
202
217
|
|
203
|
-
|
204
|
-
|
205
|
-
|
218
|
+
it 'should transcode' do
|
219
|
+
parse_with_encoding('iso-8859-1.csv', 'iso-8859-1:utf-8')
|
220
|
+
end
|
206
221
|
|
207
|
-
|
208
|
-
|
209
|
-
|
222
|
+
it 'should recover from blank external encoding' do
|
223
|
+
parse_with_encoding('utf-8.csv', ':utf-8')
|
224
|
+
end
|
225
|
+
|
226
|
+
it 'should recover from invalid internal encoding' do
|
227
|
+
parse_with_encoding('utf-8.csv', 'invalid')
|
228
|
+
end
|
210
229
|
|
211
|
-
|
212
|
-
|
230
|
+
it 'should recover from invalid external encoding' do
|
231
|
+
parse_with_encoding('utf-8.csv', 'invalid:-')
|
232
|
+
end
|
233
|
+
|
234
|
+
it 'should recover from invalid encodings' do
|
235
|
+
parse_with_encoding('utf-8.csv', 'invalid:invalid')
|
236
|
+
end
|
213
237
|
end
|
214
238
|
|
215
|
-
|
216
|
-
|
239
|
+
context 'when initializing' do
|
240
|
+
it 'should raise an error if the input is not a String or IO' do
|
241
|
+
expect{FastCSV.raw_parse(nil)}.to raise_error(ArgumentError, 'data has to respond to #read or #to_str')
|
242
|
+
end
|
217
243
|
end
|
218
244
|
end
|