fastcsv 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/fastcsv/fastcsv.c +185 -125
- data/ext/fastcsv/fastcsv.rl +130 -80
- data/fastcsv.gemspec +1 -1
- data/spec/fastcsv_spec.rb +69 -43
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 129c6ed1d3b30a44456108f280a582ccdaac96e9
|
4
|
+
data.tar.gz: 4d819f3bb6e637cb5fb3e130c378583202f8d3ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a960b458260e864346755a7b00afca9735e8851f70b9ebe3c4d95e1c5300c016fda9b1db5ff7c39cfcc288fdfa51ec038b5796eb12d01c8a7a7cb0d24ae1fe3
|
7
|
+
data.tar.gz: 76612ddd0aedef55ca914a5de6b141d9d274c395ec3b9fcc28897e4ca2762ade22759e078446f6ef8ee673ff96954f328e9d23a645e7ba89fe0168ae75e6e7dc
|
data/ext/fastcsv/fastcsv.c
CHANGED
@@ -16,24 +16,20 @@
|
|
16
16
|
// Ragel help.
|
17
17
|
// https://www.mail-archive.com/ragel-users@complang.org/
|
18
18
|
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
} \
|
24
|
-
else { \
|
25
|
-
rb_enc_associate_index(field, rb_enc_to_index(external_encoding)); \
|
26
|
-
}
|
19
|
+
#define ENCODE \
|
20
|
+
if (enc2 != NULL) { \
|
21
|
+
field = rb_str_encode(field, rb_enc_from_encoding(enc), 0, Qnil); \
|
22
|
+
}
|
27
23
|
|
28
24
|
static VALUE mModule, rb_eParseError;
|
29
|
-
static ID s_read, s_to_str;
|
25
|
+
static ID s_read, s_to_str, s_internal_encoding, s_external_encoding, s_string, s_encoding;
|
30
26
|
|
31
27
|
|
32
|
-
#line
|
28
|
+
#line 125 "ext/fastcsv/fastcsv.rl"
|
33
29
|
|
34
30
|
|
35
31
|
|
36
|
-
#line
|
32
|
+
#line 33 "ext/fastcsv/fastcsv.c"
|
37
33
|
static const int fastcsv_start = 4;
|
38
34
|
static const int fastcsv_first_final = 4;
|
39
35
|
static const int fastcsv_error = 0;
|
@@ -41,10 +37,39 @@ static const int fastcsv_error = 0;
|
|
41
37
|
static const int fastcsv_en_main = 4;
|
42
38
|
|
43
39
|
|
44
|
-
#line
|
40
|
+
#line 128 "ext/fastcsv/fastcsv.rl"
|
45
41
|
|
42
|
+
// 16 kB
|
46
43
|
#define BUFSIZE 16384
|
47
44
|
|
45
|
+
// @see http://rxr.whitequark.org/mri/source/io.c#4845
|
46
|
+
static void
|
47
|
+
rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
|
48
|
+
{
|
49
|
+
int default_ext = 0;
|
50
|
+
|
51
|
+
if (ext == NULL) {
|
52
|
+
ext = rb_default_external_encoding();
|
53
|
+
default_ext = 1;
|
54
|
+
}
|
55
|
+
if (ext == rb_ascii8bit_encoding()) {
|
56
|
+
/* If external is ASCII-8BIT, no transcoding */
|
57
|
+
intern = NULL;
|
58
|
+
}
|
59
|
+
else if (intern == NULL) {
|
60
|
+
intern = rb_default_internal_encoding();
|
61
|
+
}
|
62
|
+
if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) {
|
63
|
+
/* No internal encoding => use external + no transcoding */
|
64
|
+
*enc = (default_ext && intern != ext) ? NULL : ext;
|
65
|
+
*enc2 = NULL;
|
66
|
+
}
|
67
|
+
else {
|
68
|
+
*enc = intern;
|
69
|
+
*enc2 = ext;
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
48
73
|
VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
49
74
|
int cs, act, have = 0, curline = 1, io = 0;
|
50
75
|
char *ts = 0, *te = 0, *buf = 0, *eof = 0;
|
@@ -52,11 +77,11 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
52
77
|
VALUE port, opts;
|
53
78
|
VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
|
54
79
|
int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
|
55
|
-
|
56
|
-
|
80
|
+
rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL;
|
81
|
+
VALUE r_encoding;
|
57
82
|
|
58
83
|
VALUE option;
|
59
|
-
char quote_char = '"';
|
84
|
+
char quote_char = '"';
|
60
85
|
|
61
86
|
rb_scan_args(argc, argv, "11", &port, &opts);
|
62
87
|
taint = OBJ_TAINTED(port);
|
@@ -78,76 +103,111 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
78
103
|
rb_raise(rb_eArgError, "options has to be a Hash or nil");
|
79
104
|
}
|
80
105
|
|
81
|
-
// @
|
82
|
-
|
83
|
-
|
84
|
-
// if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) {
|
85
|
-
// quote_char = *StringValueCStr(option);
|
86
|
-
// }
|
87
|
-
// else if (!NIL_P(option)) {
|
88
|
-
// rb_raise(rb_eArgError, ":quote_char has to be a single character String");
|
89
|
-
// }
|
90
|
-
|
91
|
-
// option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep")));
|
92
|
-
// if (TYPE(option) == T_STRING) {
|
93
|
-
// col_sep = StringValueCStr(option);
|
94
|
-
// }
|
95
|
-
// else if (!NIL_P(option)) {
|
96
|
-
// rb_raise(rb_eArgError, ":col_sep has to be a String");
|
97
|
-
// }
|
98
|
-
|
99
|
-
// option = rb_hash_aref(opts, ID2SYM(rb_intern("row_sep")));
|
100
|
-
// if (TYPE(option) == T_STRING) {
|
101
|
-
// row_sep = StringValueCStr(option);
|
102
|
-
// }
|
103
|
-
// else if (!NIL_P(option)) {
|
104
|
-
// rb_raise(rb_eArgError, ":row_sep has to be a String");
|
105
|
-
// }
|
106
|
+
// @see rb_io_extract_modeenc
|
107
|
+
/* Set to defaults */
|
108
|
+
rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0);
|
106
109
|
|
110
|
+
// "enc" (internal) or "enc2:enc" (external:internal) or "enc:-" (external).
|
111
|
+
// We don't support binmode, which would force "ASCII-8BIT", or "BOM|UTF-*".
|
112
|
+
// @see http://ruby-doc.org/core-2.1.1/IO.html#method-c-new-label-Open+Mode
|
107
113
|
option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
|
108
114
|
if (TYPE(option) == T_STRING) {
|
109
|
-
//
|
110
|
-
const char *
|
111
|
-
char
|
115
|
+
// parse_mode_enc is not in header file.
|
116
|
+
const char *estr = StringValueCStr(option), *ptr;
|
117
|
+
char encname[ENCODING_MAXNAMELEN+1];
|
118
|
+
int idx, idx2;
|
119
|
+
rb_encoding *ext_enc, *int_enc;
|
120
|
+
|
121
|
+
/* parse estr as "enc" or "enc2:enc" or "enc:-" */
|
112
122
|
|
113
|
-
|
114
|
-
if (
|
115
|
-
long len = (
|
123
|
+
ptr = strrchr(estr, ':');
|
124
|
+
if (ptr) {
|
125
|
+
long len = (ptr++) - estr;
|
116
126
|
if (len == 0 || len > ENCODING_MAXNAMELEN) {
|
117
|
-
|
127
|
+
idx = -1;
|
118
128
|
}
|
119
129
|
else {
|
120
|
-
memcpy(
|
121
|
-
|
122
|
-
|
123
|
-
|
130
|
+
memcpy(encname, estr, len);
|
131
|
+
encname[len] = '\0';
|
132
|
+
estr = encname;
|
133
|
+
idx = rb_enc_find_index(encname);
|
124
134
|
}
|
125
135
|
}
|
126
136
|
else {
|
127
|
-
|
137
|
+
idx = rb_enc_find_index(estr);
|
128
138
|
}
|
129
139
|
|
130
|
-
if (
|
131
|
-
|
140
|
+
if (idx >= 0) {
|
141
|
+
ext_enc = rb_enc_from_index(idx);
|
142
|
+
}
|
143
|
+
else {
|
144
|
+
if (idx != -2) {
|
145
|
+
// `unsupported_encoding` is not in header file.
|
146
|
+
rb_warn("Unsupported encoding %s ignored", estr);
|
147
|
+
}
|
148
|
+
ext_enc = NULL;
|
132
149
|
}
|
133
150
|
|
134
|
-
|
135
|
-
|
136
|
-
if (
|
137
|
-
|
151
|
+
int_enc = NULL;
|
152
|
+
if (ptr) {
|
153
|
+
if (*ptr == '-' && *(ptr+1) == '\0') {
|
154
|
+
/* Special case - "-" => no transcoding */
|
155
|
+
int_enc = (rb_encoding *)Qnil;
|
138
156
|
}
|
139
157
|
else {
|
140
|
-
|
158
|
+
idx2 = rb_enc_find_index(ptr);
|
159
|
+
if (idx2 < 0) {
|
160
|
+
// `unsupported_encoding` is not in header file.
|
161
|
+
rb_warn("Unsupported encoding %s ignored", ptr);
|
162
|
+
}
|
163
|
+
else if (idx2 == idx) {
|
164
|
+
int_enc = (rb_encoding *)Qnil;
|
165
|
+
}
|
166
|
+
else {
|
167
|
+
int_enc = rb_enc_from_index(idx2);
|
168
|
+
}
|
141
169
|
}
|
142
170
|
}
|
143
|
-
|
144
|
-
|
145
|
-
}
|
171
|
+
|
172
|
+
rb_io_ext_int_to_encs(ext_enc, int_enc, &enc, &enc2, 0);
|
146
173
|
}
|
147
174
|
else if (!NIL_P(option)) {
|
148
175
|
rb_raise(rb_eArgError, ":encoding has to be a String");
|
149
176
|
}
|
150
177
|
|
178
|
+
// @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L1567
|
179
|
+
// @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L2300
|
180
|
+
if (rb_respond_to(port, s_internal_encoding)) {
|
181
|
+
r_encoding = rb_funcall(port, s_internal_encoding, 0);
|
182
|
+
if (NIL_P(r_encoding)) {
|
183
|
+
r_encoding = rb_funcall(port, s_external_encoding, 0);
|
184
|
+
}
|
185
|
+
}
|
186
|
+
else if (rb_respond_to(port, s_string)) {
|
187
|
+
r_encoding = rb_funcall(rb_funcall(port, s_string, 0), s_encoding, 0);
|
188
|
+
}
|
189
|
+
else if (rb_respond_to(port, s_encoding)) {
|
190
|
+
r_encoding = rb_funcall(port, s_encoding, 0);
|
191
|
+
}
|
192
|
+
else {
|
193
|
+
r_encoding = rb_enc_from_encoding(rb_ascii8bit_encoding());
|
194
|
+
}
|
195
|
+
if (NIL_P(r_encoding)) {
|
196
|
+
r_encoding = rb_enc_from_encoding(rb_default_internal_encoding());
|
197
|
+
}
|
198
|
+
if (NIL_P(r_encoding)) {
|
199
|
+
r_encoding = rb_enc_from_encoding(rb_default_external_encoding());
|
200
|
+
}
|
201
|
+
if (enc2 != NULL) {
|
202
|
+
encoding = enc2;
|
203
|
+
}
|
204
|
+
else if (enc != NULL) {
|
205
|
+
encoding = enc;
|
206
|
+
}
|
207
|
+
else if (!NIL_P(r_encoding)) {
|
208
|
+
encoding = rb_enc_get(r_encoding);
|
209
|
+
}
|
210
|
+
|
151
211
|
buffer_size = BUFSIZE;
|
152
212
|
if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
|
153
213
|
bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
|
@@ -161,7 +221,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
161
221
|
}
|
162
222
|
|
163
223
|
|
164
|
-
#line
|
224
|
+
#line 225 "ext/fastcsv/fastcsv.c"
|
165
225
|
{
|
166
226
|
cs = fastcsv_start;
|
167
227
|
ts = 0;
|
@@ -169,7 +229,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
169
229
|
act = 0;
|
170
230
|
}
|
171
231
|
|
172
|
-
#line
|
232
|
+
#line 311 "ext/fastcsv/fastcsv.rl"
|
173
233
|
|
174
234
|
while (!done) {
|
175
235
|
VALUE str;
|
@@ -217,12 +277,8 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
217
277
|
}
|
218
278
|
|
219
279
|
pe = p + len;
|
220
|
-
// if (done) {
|
221
|
-
// // This triggers the eof action in the non-scanner version.
|
222
|
-
// eof = pe;
|
223
|
-
// }
|
224
280
|
|
225
|
-
#line
|
281
|
+
#line 282 "ext/fastcsv/fastcsv.c"
|
226
282
|
{
|
227
283
|
if ( p == pe )
|
228
284
|
goto _test_eof;
|
@@ -241,7 +297,7 @@ tr0:
|
|
241
297
|
}
|
242
298
|
goto st4;
|
243
299
|
tr10:
|
244
|
-
#line
|
300
|
+
#line 101 "ext/fastcsv/fastcsv.rl"
|
245
301
|
{
|
246
302
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
247
303
|
rb_ary_push(row, field);
|
@@ -250,19 +306,19 @@ tr10:
|
|
250
306
|
rb_yield(row);
|
251
307
|
}
|
252
308
|
}
|
253
|
-
#line
|
309
|
+
#line 123 "ext/fastcsv/fastcsv.rl"
|
254
310
|
{te = p+1;}
|
255
311
|
goto st4;
|
256
312
|
tr16:
|
257
|
-
#line
|
313
|
+
#line 123 "ext/fastcsv/fastcsv.rl"
|
258
314
|
{te = p;p--;}
|
259
315
|
goto st4;
|
260
316
|
tr17:
|
261
|
-
#line
|
317
|
+
#line 122 "ext/fastcsv/fastcsv.rl"
|
262
318
|
{te = p;p--;}
|
263
319
|
goto st4;
|
264
320
|
tr18:
|
265
|
-
#line
|
321
|
+
#line 101 "ext/fastcsv/fastcsv.rl"
|
266
322
|
{
|
267
323
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
268
324
|
rb_ary_push(row, field);
|
@@ -271,15 +327,15 @@ tr18:
|
|
271
327
|
rb_yield(row);
|
272
328
|
}
|
273
329
|
}
|
274
|
-
#line
|
330
|
+
#line 122 "ext/fastcsv/fastcsv.rl"
|
275
331
|
{te = p+1;}
|
276
332
|
goto st4;
|
277
333
|
tr20:
|
278
|
-
#line
|
334
|
+
#line 121 "ext/fastcsv/fastcsv.rl"
|
279
335
|
{te = p;p--;}
|
280
336
|
goto st4;
|
281
337
|
tr21:
|
282
|
-
#line
|
338
|
+
#line 101 "ext/fastcsv/fastcsv.rl"
|
283
339
|
{
|
284
340
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
285
341
|
rb_ary_push(row, field);
|
@@ -288,7 +344,7 @@ tr21:
|
|
288
344
|
rb_yield(row);
|
289
345
|
}
|
290
346
|
}
|
291
|
-
#line
|
347
|
+
#line 121 "ext/fastcsv/fastcsv.rl"
|
292
348
|
{te = p+1;}
|
293
349
|
goto st4;
|
294
350
|
st4:
|
@@ -301,7 +357,7 @@ st4:
|
|
301
357
|
case 4:
|
302
358
|
#line 1 "NONE"
|
303
359
|
{ts = p;}
|
304
|
-
#line
|
360
|
+
#line 361 "ext/fastcsv/fastcsv.c"
|
305
361
|
switch( (*p) ) {
|
306
362
|
case 0: goto tr14;
|
307
363
|
case 10: goto tr3;
|
@@ -325,18 +381,18 @@ case 1:
|
|
325
381
|
tr2:
|
326
382
|
#line 1 "NONE"
|
327
383
|
{te = p+1;}
|
328
|
-
#line
|
384
|
+
#line 40 "ext/fastcsv/fastcsv.rl"
|
329
385
|
{
|
330
386
|
if (p == ts) {
|
331
387
|
// Unquoted empty fields are nil, not "", in Ruby.
|
332
388
|
field = Qnil;
|
333
389
|
}
|
334
390
|
else if (p > ts) {
|
335
|
-
field =
|
336
|
-
|
391
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
392
|
+
ENCODE;
|
337
393
|
}
|
338
394
|
}
|
339
|
-
#line
|
395
|
+
#line 101 "ext/fastcsv/fastcsv.rl"
|
340
396
|
{
|
341
397
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
342
398
|
rb_ary_push(row, field);
|
@@ -345,14 +401,14 @@ tr2:
|
|
345
401
|
rb_yield(row);
|
346
402
|
}
|
347
403
|
}
|
348
|
-
#line
|
404
|
+
#line 123 "ext/fastcsv/fastcsv.rl"
|
349
405
|
{act = 3;}
|
350
406
|
goto st5;
|
351
407
|
st5:
|
352
408
|
if ( ++p == pe )
|
353
409
|
goto _test_eof5;
|
354
410
|
case 5:
|
355
|
-
#line
|
411
|
+
#line 412 "ext/fastcsv/fastcsv.c"
|
356
412
|
switch( (*p) ) {
|
357
413
|
case 0: goto tr2;
|
358
414
|
case 10: goto tr3;
|
@@ -362,18 +418,18 @@ case 5:
|
|
362
418
|
}
|
363
419
|
goto st1;
|
364
420
|
tr3:
|
365
|
-
#line
|
421
|
+
#line 40 "ext/fastcsv/fastcsv.rl"
|
366
422
|
{
|
367
423
|
if (p == ts) {
|
368
424
|
// Unquoted empty fields are nil, not "", in Ruby.
|
369
425
|
field = Qnil;
|
370
426
|
}
|
371
427
|
else if (p > ts) {
|
372
|
-
field =
|
373
|
-
|
428
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
429
|
+
ENCODE;
|
374
430
|
}
|
375
431
|
}
|
376
|
-
#line
|
432
|
+
#line 91 "ext/fastcsv/fastcsv.rl"
|
377
433
|
{
|
378
434
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
379
435
|
rb_ary_push(row, field);
|
@@ -383,19 +439,19 @@ tr3:
|
|
383
439
|
rb_yield(row);
|
384
440
|
row = rb_ary_new();
|
385
441
|
}
|
386
|
-
#line
|
442
|
+
#line 28 "ext/fastcsv/fastcsv.rl"
|
387
443
|
{
|
388
444
|
curline++;
|
389
445
|
}
|
390
446
|
goto st6;
|
391
447
|
tr19:
|
392
|
-
#line
|
448
|
+
#line 28 "ext/fastcsv/fastcsv.rl"
|
393
449
|
{
|
394
450
|
curline++;
|
395
451
|
}
|
396
452
|
goto st6;
|
397
453
|
tr11:
|
398
|
-
#line
|
454
|
+
#line 91 "ext/fastcsv/fastcsv.rl"
|
399
455
|
{
|
400
456
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
401
457
|
rb_ary_push(row, field);
|
@@ -405,7 +461,7 @@ tr11:
|
|
405
461
|
rb_yield(row);
|
406
462
|
row = rb_ary_new();
|
407
463
|
}
|
408
|
-
#line
|
464
|
+
#line 28 "ext/fastcsv/fastcsv.rl"
|
409
465
|
{
|
410
466
|
curline++;
|
411
467
|
}
|
@@ -414,23 +470,23 @@ st6:
|
|
414
470
|
if ( ++p == pe )
|
415
471
|
goto _test_eof6;
|
416
472
|
case 6:
|
417
|
-
#line
|
473
|
+
#line 474 "ext/fastcsv/fastcsv.c"
|
418
474
|
if ( (*p) == 0 )
|
419
475
|
goto tr18;
|
420
476
|
goto tr17;
|
421
477
|
tr4:
|
422
|
-
#line
|
478
|
+
#line 40 "ext/fastcsv/fastcsv.rl"
|
423
479
|
{
|
424
480
|
if (p == ts) {
|
425
481
|
// Unquoted empty fields are nil, not "", in Ruby.
|
426
482
|
field = Qnil;
|
427
483
|
}
|
428
484
|
else if (p > ts) {
|
429
|
-
field =
|
430
|
-
|
485
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
486
|
+
ENCODE;
|
431
487
|
}
|
432
488
|
}
|
433
|
-
#line
|
489
|
+
#line 91 "ext/fastcsv/fastcsv.rl"
|
434
490
|
{
|
435
491
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
436
492
|
rb_ary_push(row, field);
|
@@ -440,13 +496,13 @@ tr4:
|
|
440
496
|
rb_yield(row);
|
441
497
|
row = rb_ary_new();
|
442
498
|
}
|
443
|
-
#line
|
499
|
+
#line 28 "ext/fastcsv/fastcsv.rl"
|
444
500
|
{
|
445
501
|
curline++;
|
446
502
|
}
|
447
503
|
goto st7;
|
448
504
|
tr12:
|
449
|
-
#line
|
505
|
+
#line 91 "ext/fastcsv/fastcsv.rl"
|
450
506
|
{
|
451
507
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
452
508
|
rb_ary_push(row, field);
|
@@ -456,7 +512,7 @@ tr12:
|
|
456
512
|
rb_yield(row);
|
457
513
|
row = rb_ary_new();
|
458
514
|
}
|
459
|
-
#line
|
515
|
+
#line 28 "ext/fastcsv/fastcsv.rl"
|
460
516
|
{
|
461
517
|
curline++;
|
462
518
|
}
|
@@ -465,32 +521,32 @@ st7:
|
|
465
521
|
if ( ++p == pe )
|
466
522
|
goto _test_eof7;
|
467
523
|
case 7:
|
468
|
-
#line
|
524
|
+
#line 525 "ext/fastcsv/fastcsv.c"
|
469
525
|
switch( (*p) ) {
|
470
526
|
case 0: goto tr18;
|
471
527
|
case 10: goto tr19;
|
472
528
|
}
|
473
529
|
goto tr17;
|
474
530
|
tr5:
|
475
|
-
#line
|
531
|
+
#line 40 "ext/fastcsv/fastcsv.rl"
|
476
532
|
{
|
477
533
|
if (p == ts) {
|
478
534
|
// Unquoted empty fields are nil, not "", in Ruby.
|
479
535
|
field = Qnil;
|
480
536
|
}
|
481
537
|
else if (p > ts) {
|
482
|
-
field =
|
483
|
-
|
538
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
539
|
+
ENCODE;
|
484
540
|
}
|
485
541
|
}
|
486
|
-
#line
|
542
|
+
#line 86 "ext/fastcsv/fastcsv.rl"
|
487
543
|
{
|
488
544
|
rb_ary_push(row, field);
|
489
545
|
field = Qnil;
|
490
546
|
}
|
491
547
|
goto st8;
|
492
548
|
tr13:
|
493
|
-
#line
|
549
|
+
#line 86 "ext/fastcsv/fastcsv.rl"
|
494
550
|
{
|
495
551
|
rb_ary_push(row, field);
|
496
552
|
field = Qnil;
|
@@ -500,14 +556,14 @@ st8:
|
|
500
556
|
if ( ++p == pe )
|
501
557
|
goto _test_eof8;
|
502
558
|
case 8:
|
503
|
-
#line
|
559
|
+
#line 560 "ext/fastcsv/fastcsv.c"
|
504
560
|
if ( (*p) == 0 )
|
505
561
|
goto tr21;
|
506
562
|
goto tr20;
|
507
563
|
tr14:
|
508
564
|
#line 1 "NONE"
|
509
565
|
{te = p+1;}
|
510
|
-
#line
|
566
|
+
#line 101 "ext/fastcsv/fastcsv.rl"
|
511
567
|
{
|
512
568
|
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
513
569
|
rb_ary_push(row, field);
|
@@ -516,25 +572,25 @@ tr14:
|
|
516
572
|
rb_yield(row);
|
517
573
|
}
|
518
574
|
}
|
519
|
-
#line
|
575
|
+
#line 40 "ext/fastcsv/fastcsv.rl"
|
520
576
|
{
|
521
577
|
if (p == ts) {
|
522
578
|
// Unquoted empty fields are nil, not "", in Ruby.
|
523
579
|
field = Qnil;
|
524
580
|
}
|
525
581
|
else if (p > ts) {
|
526
|
-
field =
|
527
|
-
|
582
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
583
|
+
ENCODE;
|
528
584
|
}
|
529
585
|
}
|
530
|
-
#line
|
586
|
+
#line 123 "ext/fastcsv/fastcsv.rl"
|
531
587
|
{act = 3;}
|
532
588
|
goto st9;
|
533
589
|
st9:
|
534
590
|
if ( ++p == pe )
|
535
591
|
goto _test_eof9;
|
536
592
|
case 9:
|
537
|
-
#line
|
593
|
+
#line 594 "ext/fastcsv/fastcsv.c"
|
538
594
|
switch( (*p) ) {
|
539
595
|
case 10: goto tr16;
|
540
596
|
case 13: goto tr16;
|
@@ -543,13 +599,13 @@ case 9:
|
|
543
599
|
}
|
544
600
|
goto st1;
|
545
601
|
tr8:
|
546
|
-
#line
|
602
|
+
#line 28 "ext/fastcsv/fastcsv.rl"
|
547
603
|
{
|
548
604
|
curline++;
|
549
605
|
}
|
550
606
|
goto st2;
|
551
607
|
tr15:
|
552
|
-
#line
|
608
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
553
609
|
{
|
554
610
|
unclosed_line = curline;
|
555
611
|
}
|
@@ -558,7 +614,7 @@ st2:
|
|
558
614
|
if ( ++p == pe )
|
559
615
|
goto _test_eof2;
|
560
616
|
case 2:
|
561
|
-
#line
|
617
|
+
#line 618 "ext/fastcsv/fastcsv.c"
|
562
618
|
switch( (*p) ) {
|
563
619
|
case 0: goto st0;
|
564
620
|
case 10: goto tr8;
|
@@ -570,11 +626,11 @@ st0:
|
|
570
626
|
cs = 0;
|
571
627
|
goto _out;
|
572
628
|
tr9:
|
573
|
-
#line
|
629
|
+
#line 51 "ext/fastcsv/fastcsv.rl"
|
574
630
|
{
|
575
631
|
if (p == ts) {
|
576
|
-
field =
|
577
|
-
|
632
|
+
field = rb_enc_str_new("", 0, encoding);
|
633
|
+
ENCODE;
|
578
634
|
}
|
579
635
|
// @note If we add an action on '""', we can skip some steps if no '""' is found.
|
580
636
|
else if (p > ts) {
|
@@ -597,15 +653,15 @@ tr9:
|
|
597
653
|
reader++;
|
598
654
|
}
|
599
655
|
|
600
|
-
field =
|
601
|
-
|
656
|
+
field = rb_enc_str_new(copy, writer - copy, enc);
|
657
|
+
ENCODE;
|
602
658
|
|
603
659
|
if (copy != NULL) {
|
604
660
|
free(copy);
|
605
661
|
}
|
606
662
|
}
|
607
663
|
}
|
608
|
-
#line
|
664
|
+
#line 36 "ext/fastcsv/fastcsv.rl"
|
609
665
|
{
|
610
666
|
unclosed_line = 0;
|
611
667
|
}
|
@@ -614,7 +670,7 @@ st3:
|
|
614
670
|
if ( ++p == pe )
|
615
671
|
goto _test_eof3;
|
616
672
|
case 3:
|
617
|
-
#line
|
673
|
+
#line 674 "ext/fastcsv/fastcsv.c"
|
618
674
|
switch( (*p) ) {
|
619
675
|
case 0: goto tr10;
|
620
676
|
case 10: goto tr11;
|
@@ -650,7 +706,7 @@ case 3:
|
|
650
706
|
_out: {}
|
651
707
|
}
|
652
708
|
|
653
|
-
#line
|
709
|
+
#line 359 "ext/fastcsv/fastcsv.rl"
|
654
710
|
|
655
711
|
if (done && cs < fastcsv_first_final) {
|
656
712
|
if (buf != NULL) {
|
@@ -689,6 +745,10 @@ case 3:
|
|
689
745
|
void Init_fastcsv() {
|
690
746
|
s_read = rb_intern("read");
|
691
747
|
s_to_str = rb_intern("to_str");
|
748
|
+
s_internal_encoding = rb_intern("internal_encoding");
|
749
|
+
s_external_encoding = rb_intern("external_encoding");
|
750
|
+
s_string = rb_intern("string");
|
751
|
+
s_encoding = rb_intern("encoding");
|
692
752
|
|
693
753
|
mModule = rb_define_module("FastCSV");
|
694
754
|
rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
|
data/ext/fastcsv/fastcsv.rl
CHANGED
@@ -14,17 +14,13 @@
|
|
14
14
|
// Ragel help.
|
15
15
|
// https://www.mail-archive.com/ragel-users@complang.org/
|
16
16
|
|
17
|
-
#
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
} \
|
22
|
-
else { \
|
23
|
-
rb_enc_associate_index(field, rb_enc_to_index(external_encoding)); \
|
24
|
-
}
|
17
|
+
#define ENCODE \
|
18
|
+
if (enc2 != NULL) { \
|
19
|
+
field = rb_str_encode(field, rb_enc_from_encoding(enc), 0, Qnil); \
|
20
|
+
}
|
25
21
|
|
26
22
|
static VALUE mModule, rb_eParseError;
|
27
|
-
static ID s_read, s_to_str;
|
23
|
+
static ID s_read, s_to_str, s_internal_encoding, s_external_encoding, s_string, s_encoding;
|
28
24
|
|
29
25
|
%%{
|
30
26
|
machine fastcsv;
|
@@ -47,15 +43,15 @@ static ID s_read, s_to_str;
|
|
47
43
|
field = Qnil;
|
48
44
|
}
|
49
45
|
else if (p > ts) {
|
50
|
-
field =
|
51
|
-
|
46
|
+
field = rb_enc_str_new(ts, p - ts, encoding);
|
47
|
+
ENCODE;
|
52
48
|
}
|
53
49
|
}
|
54
50
|
|
55
51
|
action read_quoted {
|
56
52
|
if (p == ts) {
|
57
|
-
field =
|
58
|
-
|
53
|
+
field = rb_enc_str_new("", 0, encoding);
|
54
|
+
ENCODE;
|
59
55
|
}
|
60
56
|
// @note If we add an action on '""', we can skip some steps if no '""' is found.
|
61
57
|
else if (p > ts) {
|
@@ -78,8 +74,8 @@ static ID s_read, s_to_str;
|
|
78
74
|
reader++;
|
79
75
|
}
|
80
76
|
|
81
|
-
field =
|
82
|
-
|
77
|
+
field = rb_enc_str_new(copy, writer - copy, enc);
|
78
|
+
ENCODE;
|
83
79
|
|
84
80
|
if (copy != NULL) {
|
85
81
|
free(copy);
|
@@ -118,30 +114,49 @@ static ID s_read, s_to_str;
|
|
118
114
|
unquoted = (any* -- quote_char -- col_sep -- row_sep - EOF) %read_unquoted;
|
119
115
|
quoted = quote_char >open_quote (any - quote_char - EOF | quote_char quote_char | row_sep)* %read_quoted quote_char >close_quote;
|
120
116
|
field = unquoted | quoted;
|
121
|
-
# fields = (field col_sep)* field?;
|
122
|
-
# file = (fields row_sep >new_row)* fields?;
|
123
117
|
|
124
118
|
# @see Ragel Guide: 6.3 Scanners
|
125
|
-
#
|
119
|
+
# An unquoted field can be zero-length.
|
126
120
|
main := |*
|
127
121
|
field col_sep EOF?;
|
128
122
|
field row_sep >new_row EOF?;
|
129
123
|
field EOF;
|
130
124
|
*|;
|
131
|
-
|
132
|
-
# Non-scanner version requires very large buffer.
|
133
|
-
# main := file $/{
|
134
|
-
# if (!NIL_P(field) || RARRAY_LEN(row)) {
|
135
|
-
# rb_ary_push(row, field);
|
136
|
-
# rb_yield(row);
|
137
|
-
# }
|
138
|
-
# };
|
139
125
|
}%%
|
140
126
|
|
141
127
|
%% write data;
|
142
128
|
|
129
|
+
// 16 kB
|
143
130
|
#define BUFSIZE 16384
|
144
131
|
|
132
|
+
// @see http://rxr.whitequark.org/mri/source/io.c#4845
|
133
|
+
static void
|
134
|
+
rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
|
135
|
+
{
|
136
|
+
int default_ext = 0;
|
137
|
+
|
138
|
+
if (ext == NULL) {
|
139
|
+
ext = rb_default_external_encoding();
|
140
|
+
default_ext = 1;
|
141
|
+
}
|
142
|
+
if (ext == rb_ascii8bit_encoding()) {
|
143
|
+
/* If external is ASCII-8BIT, no transcoding */
|
144
|
+
intern = NULL;
|
145
|
+
}
|
146
|
+
else if (intern == NULL) {
|
147
|
+
intern = rb_default_internal_encoding();
|
148
|
+
}
|
149
|
+
if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) {
|
150
|
+
/* No internal encoding => use external + no transcoding */
|
151
|
+
*enc = (default_ext && intern != ext) ? NULL : ext;
|
152
|
+
*enc2 = NULL;
|
153
|
+
}
|
154
|
+
else {
|
155
|
+
*enc = intern;
|
156
|
+
*enc2 = ext;
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
145
160
|
VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
146
161
|
int cs, act, have = 0, curline = 1, io = 0;
|
147
162
|
char *ts = 0, *te = 0, *buf = 0, *eof = 0;
|
@@ -149,11 +164,11 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
149
164
|
VALUE port, opts;
|
150
165
|
VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
|
151
166
|
int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
|
152
|
-
|
153
|
-
|
167
|
+
rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL;
|
168
|
+
VALUE r_encoding;
|
154
169
|
|
155
170
|
VALUE option;
|
156
|
-
char quote_char = '"';
|
171
|
+
char quote_char = '"';
|
157
172
|
|
158
173
|
rb_scan_args(argc, argv, "11", &port, &opts);
|
159
174
|
taint = OBJ_TAINTED(port);
|
@@ -175,76 +190,111 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
175
190
|
rb_raise(rb_eArgError, "options has to be a Hash or nil");
|
176
191
|
}
|
177
192
|
|
178
|
-
// @
|
179
|
-
|
180
|
-
|
181
|
-
// if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) {
|
182
|
-
// quote_char = *StringValueCStr(option);
|
183
|
-
// }
|
184
|
-
// else if (!NIL_P(option)) {
|
185
|
-
// rb_raise(rb_eArgError, ":quote_char has to be a single character String");
|
186
|
-
// }
|
187
|
-
|
188
|
-
// option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep")));
|
189
|
-
// if (TYPE(option) == T_STRING) {
|
190
|
-
// col_sep = StringValueCStr(option);
|
191
|
-
// }
|
192
|
-
// else if (!NIL_P(option)) {
|
193
|
-
// rb_raise(rb_eArgError, ":col_sep has to be a String");
|
194
|
-
// }
|
195
|
-
|
196
|
-
// option = rb_hash_aref(opts, ID2SYM(rb_intern("row_sep")));
|
197
|
-
// if (TYPE(option) == T_STRING) {
|
198
|
-
// row_sep = StringValueCStr(option);
|
199
|
-
// }
|
200
|
-
// else if (!NIL_P(option)) {
|
201
|
-
// rb_raise(rb_eArgError, ":row_sep has to be a String");
|
202
|
-
// }
|
193
|
+
// @see rb_io_extract_modeenc
|
194
|
+
/* Set to defaults */
|
195
|
+
rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0);
|
203
196
|
|
197
|
+
// "enc" (internal) or "enc2:enc" (external:internal) or "enc:-" (external).
|
198
|
+
// We don't support binmode, which would force "ASCII-8BIT", or "BOM|UTF-*".
|
199
|
+
// @see http://ruby-doc.org/core-2.1.1/IO.html#method-c-new-label-Open+Mode
|
204
200
|
option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
|
205
201
|
if (TYPE(option) == T_STRING) {
|
206
|
-
//
|
207
|
-
const char *
|
208
|
-
char
|
202
|
+
// parse_mode_enc is not in header file.
|
203
|
+
const char *estr = StringValueCStr(option), *ptr;
|
204
|
+
char encname[ENCODING_MAXNAMELEN+1];
|
205
|
+
int idx, idx2;
|
206
|
+
rb_encoding *ext_enc, *int_enc;
|
207
|
+
|
208
|
+
/* parse estr as "enc" or "enc2:enc" or "enc:-" */
|
209
209
|
|
210
|
-
|
211
|
-
if (
|
212
|
-
long len = (
|
210
|
+
ptr = strrchr(estr, ':');
|
211
|
+
if (ptr) {
|
212
|
+
long len = (ptr++) - estr;
|
213
213
|
if (len == 0 || len > ENCODING_MAXNAMELEN) {
|
214
|
-
|
214
|
+
idx = -1;
|
215
215
|
}
|
216
216
|
else {
|
217
|
-
memcpy(
|
218
|
-
|
219
|
-
|
220
|
-
|
217
|
+
memcpy(encname, estr, len);
|
218
|
+
encname[len] = '\0';
|
219
|
+
estr = encname;
|
220
|
+
idx = rb_enc_find_index(encname);
|
221
221
|
}
|
222
222
|
}
|
223
223
|
else {
|
224
|
-
|
224
|
+
idx = rb_enc_find_index(estr);
|
225
225
|
}
|
226
226
|
|
227
|
-
if (
|
228
|
-
|
227
|
+
if (idx >= 0) {
|
228
|
+
ext_enc = rb_enc_from_index(idx);
|
229
|
+
}
|
230
|
+
else {
|
231
|
+
if (idx != -2) {
|
232
|
+
// `unsupported_encoding` is not in header file.
|
233
|
+
rb_warn("Unsupported encoding %s ignored", estr);
|
234
|
+
}
|
235
|
+
ext_enc = NULL;
|
229
236
|
}
|
230
237
|
|
231
|
-
|
232
|
-
|
233
|
-
if (
|
234
|
-
|
238
|
+
int_enc = NULL;
|
239
|
+
if (ptr) {
|
240
|
+
if (*ptr == '-' && *(ptr+1) == '\0') {
|
241
|
+
/* Special case - "-" => no transcoding */
|
242
|
+
int_enc = (rb_encoding *)Qnil;
|
235
243
|
}
|
236
244
|
else {
|
237
|
-
|
245
|
+
idx2 = rb_enc_find_index(ptr);
|
246
|
+
if (idx2 < 0) {
|
247
|
+
// `unsupported_encoding` is not in header file.
|
248
|
+
rb_warn("Unsupported encoding %s ignored", ptr);
|
249
|
+
}
|
250
|
+
else if (idx2 == idx) {
|
251
|
+
int_enc = (rb_encoding *)Qnil;
|
252
|
+
}
|
253
|
+
else {
|
254
|
+
int_enc = rb_enc_from_index(idx2);
|
255
|
+
}
|
238
256
|
}
|
239
257
|
}
|
240
|
-
|
241
|
-
|
242
|
-
}
|
258
|
+
|
259
|
+
rb_io_ext_int_to_encs(ext_enc, int_enc, &enc, &enc2, 0);
|
243
260
|
}
|
244
261
|
else if (!NIL_P(option)) {
|
245
262
|
rb_raise(rb_eArgError, ":encoding has to be a String");
|
246
263
|
}
|
247
264
|
|
265
|
+
// @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L1567
|
266
|
+
// @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L2300
|
267
|
+
if (rb_respond_to(port, s_internal_encoding)) {
|
268
|
+
r_encoding = rb_funcall(port, s_internal_encoding, 0);
|
269
|
+
if (NIL_P(r_encoding)) {
|
270
|
+
r_encoding = rb_funcall(port, s_external_encoding, 0);
|
271
|
+
}
|
272
|
+
}
|
273
|
+
else if (rb_respond_to(port, s_string)) {
|
274
|
+
r_encoding = rb_funcall(rb_funcall(port, s_string, 0), s_encoding, 0);
|
275
|
+
}
|
276
|
+
else if (rb_respond_to(port, s_encoding)) {
|
277
|
+
r_encoding = rb_funcall(port, s_encoding, 0);
|
278
|
+
}
|
279
|
+
else {
|
280
|
+
r_encoding = rb_enc_from_encoding(rb_ascii8bit_encoding());
|
281
|
+
}
|
282
|
+
if (NIL_P(r_encoding)) {
|
283
|
+
r_encoding = rb_enc_from_encoding(rb_default_internal_encoding());
|
284
|
+
}
|
285
|
+
if (NIL_P(r_encoding)) {
|
286
|
+
r_encoding = rb_enc_from_encoding(rb_default_external_encoding());
|
287
|
+
}
|
288
|
+
if (enc2 != NULL) {
|
289
|
+
encoding = enc2;
|
290
|
+
}
|
291
|
+
else if (enc != NULL) {
|
292
|
+
encoding = enc;
|
293
|
+
}
|
294
|
+
else if (!NIL_P(r_encoding)) {
|
295
|
+
encoding = rb_enc_get(r_encoding);
|
296
|
+
}
|
297
|
+
|
248
298
|
buffer_size = BUFSIZE;
|
249
299
|
if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
|
250
300
|
bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
|
@@ -305,10 +355,6 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
305
355
|
}
|
306
356
|
|
307
357
|
pe = p + len;
|
308
|
-
// if (done) {
|
309
|
-
// // This triggers the eof action in the non-scanner version.
|
310
|
-
// eof = pe;
|
311
|
-
// }
|
312
358
|
%% write exec;
|
313
359
|
|
314
360
|
if (done && cs < fastcsv_first_final) {
|
@@ -348,6 +394,10 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
|
348
394
|
void Init_fastcsv() {
|
349
395
|
s_read = rb_intern("read");
|
350
396
|
s_to_str = rb_intern("to_str");
|
397
|
+
s_internal_encoding = rb_intern("internal_encoding");
|
398
|
+
s_external_encoding = rb_intern("external_encoding");
|
399
|
+
s_string = rb_intern("string");
|
400
|
+
s_encoding = rb_intern("encoding");
|
351
401
|
|
352
402
|
mModule = rb_define_module("FastCSV");
|
353
403
|
rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
|
data/fastcsv.gemspec
CHANGED
data/spec/fastcsv_spec.rb
CHANGED
@@ -57,6 +57,9 @@ RSpec.shared_examples 'a CSV parser' do
|
|
57
57
|
%(foo,"bar\nbaz",bzz),
|
58
58
|
%(foo,"""bar""baz""bzz""",zzz),
|
59
59
|
|
60
|
+
# Single quotes.
|
61
|
+
%('foo','bar','baz'),
|
62
|
+
|
60
63
|
# Buffers.
|
61
64
|
"01234567890" * 2_000, # 20,000 > BUFSIZE
|
62
65
|
"0123456789," * 2_000,
|
@@ -68,7 +71,7 @@ RSpec.shared_examples 'a CSV parser' do
|
|
68
71
|
# Uneven data types.
|
69
72
|
"2000-01-01,2,x\nx,2000-01-01,2",
|
70
73
|
].each do |csv|
|
71
|
-
it "should parse: #{csv}" do
|
74
|
+
it "should parse: #{csv.inspect.gsub('\"', '"')}" do
|
72
75
|
expect(parse(csv)).to eq(CSV.parse(csv))
|
73
76
|
end
|
74
77
|
end
|
@@ -112,34 +115,45 @@ RSpec.shared_examples 'a CSV parser' do
|
|
112
115
|
end
|
113
116
|
end
|
114
117
|
|
118
|
+
it "should parse an encoded string" do
|
119
|
+
csv = "ß"
|
120
|
+
actual = parse(csv)
|
121
|
+
expected = CSV.parse(csv)
|
122
|
+
expect(actual[0][0].encoding).to eq(expected[0][0].encoding)
|
123
|
+
expect(actual).to eq(expected)
|
124
|
+
end
|
125
|
+
|
115
126
|
it 'should raise an error on mixed row separators are' do
|
116
|
-
|
117
|
-
expect{CSV.parse(csv)}.to raise_error(CSV::MalformedCSVError, 'Unquoted fields do not allow \r or \n (line 2).')
|
127
|
+
expect{CSV.parse("foo\rbar\nbaz\r\n")}.to raise_error(CSV::MalformedCSVError, 'Unquoted fields do not allow \r or \n (line 2).')
|
118
128
|
skip
|
119
129
|
end
|
120
130
|
|
121
|
-
|
122
|
-
|
123
|
-
|
131
|
+
context 'when initializing' do
|
132
|
+
it 'should raise an error if no block is given' do
|
133
|
+
expect{parse_without_block('x')}.to raise_error(LocalJumpError, 'no block given')
|
134
|
+
end
|
124
135
|
|
125
|
-
|
126
|
-
|
127
|
-
|
136
|
+
it 'should not raise an error if no block and empty input' do
|
137
|
+
expect{parse_without_block('')}.to_not raise_error
|
138
|
+
end
|
128
139
|
|
129
|
-
|
130
|
-
|
140
|
+
it 'should raise an error if the options are not a Hash or nil' do
|
141
|
+
expect{parse('', '')}.to raise_error(ArgumentError, 'options has to be a Hash or nil')
|
142
|
+
end
|
131
143
|
end
|
132
144
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
145
|
+
context 'when setting a buffer size' do
|
146
|
+
it 'should allow nil' do
|
147
|
+
FastCSV.buffer_size = nil
|
148
|
+
expect(parse(simple)).to eq(CSV.parse(simple))
|
149
|
+
FastCSV.buffer_size = nil
|
150
|
+
end
|
138
151
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
152
|
+
it 'should allow zero' do
|
153
|
+
FastCSV.buffer_size = 0
|
154
|
+
expect(parse(simple)).to eq(CSV.parse(simple))
|
155
|
+
FastCSV.buffer_size = nil
|
156
|
+
end
|
143
157
|
end
|
144
158
|
end
|
145
159
|
|
@@ -184,35 +198,47 @@ RSpec.describe FastCSV do
|
|
184
198
|
end
|
185
199
|
end
|
186
200
|
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
201
|
+
context 'with encoded strings' do
|
202
|
+
def parse_with_encoding(basename, encoding)
|
203
|
+
filename = File.expand_path(File.join('..', 'fixtures', basename), __FILE__)
|
204
|
+
options = {encoding: encoding}
|
205
|
+
File.open(filename) do |io|
|
206
|
+
rows = []
|
207
|
+
FastCSV.raw_parse(io, options){|row| rows << row}
|
208
|
+
expected = CSV.read(filename, options)
|
209
|
+
expect(rows[0][0].encoding).to eq(expected[0][0].encoding)
|
210
|
+
expect(rows).to eq(expected)
|
211
|
+
end
|
196
212
|
end
|
197
|
-
end
|
198
213
|
|
199
|
-
|
200
|
-
|
201
|
-
|
214
|
+
it 'should encode' do
|
215
|
+
parse_with_encoding('iso-8859-1.csv', 'iso-8859-1')
|
216
|
+
end
|
202
217
|
|
203
|
-
|
204
|
-
|
205
|
-
|
218
|
+
it 'should transcode' do
|
219
|
+
parse_with_encoding('iso-8859-1.csv', 'iso-8859-1:utf-8')
|
220
|
+
end
|
206
221
|
|
207
|
-
|
208
|
-
|
209
|
-
|
222
|
+
it 'should recover from blank external encoding' do
|
223
|
+
parse_with_encoding('utf-8.csv', ':utf-8')
|
224
|
+
end
|
225
|
+
|
226
|
+
it 'should recover from invalid internal encoding' do
|
227
|
+
parse_with_encoding('utf-8.csv', 'invalid')
|
228
|
+
end
|
210
229
|
|
211
|
-
|
212
|
-
|
230
|
+
it 'should recover from invalid external encoding' do
|
231
|
+
parse_with_encoding('utf-8.csv', 'invalid:-')
|
232
|
+
end
|
233
|
+
|
234
|
+
it 'should recover from invalid encodings' do
|
235
|
+
parse_with_encoding('utf-8.csv', 'invalid:invalid')
|
236
|
+
end
|
213
237
|
end
|
214
238
|
|
215
|
-
|
216
|
-
|
239
|
+
context 'when initializing' do
|
240
|
+
it 'should raise an error if the input is not a String or IO' do
|
241
|
+
expect{FastCSV.raw_parse(nil)}.to raise_error(ArgumentError, 'data has to respond to #read or #to_str')
|
242
|
+
end
|
217
243
|
end
|
218
244
|
end
|