fastcsv 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e5d991d026c76068b9e646ba62cefdad823f01e1
4
- data.tar.gz: 8815f0bb3b00e01593f2a46320cf58c88926099c
3
+ metadata.gz: 129c6ed1d3b30a44456108f280a582ccdaac96e9
4
+ data.tar.gz: 4d819f3bb6e637cb5fb3e130c378583202f8d3ee
5
5
  SHA512:
6
- metadata.gz: 8762ce01e3e5af4cd0395bf541879db46f677f79201e2f44dc5f35dd30514c53fa2c2c5808ec61149898d64bff908a74fe2153c690a04e75c82ba7306794fa15
7
- data.tar.gz: e6dd0a3f89f9d330428fbc8d6f1b469d9f3a8ca255561c6fa972d1ad2147422bd090f3c7a373d08d29b07748f781f9b8d70fae27a092a075d183de897094eea5
6
+ metadata.gz: 8a960b458260e864346755a7b00afca9735e8851f70b9ebe3c4d95e1c5300c016fda9b1db5ff7c39cfcc288fdfa51ec038b5796eb12d01c8a7a7cb0d24ae1fe3
7
+ data.tar.gz: 76612ddd0aedef55ca914a5de6b141d9d274c395ec3b9fcc28897e4ca2762ade22759e078446f6ef8ee673ff96954f328e9d23a645e7ba89fe0168ae75e6e7dc
@@ -16,24 +16,20 @@
16
16
  // Ragel help.
17
17
  // https://www.mail-archive.com/ragel-users@complang.org/
18
18
 
19
- # define ASSOCIATE_INDEX \
20
- if (internal_index >= 0) { \
21
- rb_enc_associate_index(field, internal_index); \
22
- field = rb_str_encode(field, rb_enc_from_encoding(external_encoding), 0, Qnil); \
23
- } \
24
- else { \
25
- rb_enc_associate_index(field, rb_enc_to_index(external_encoding)); \
26
- }
19
+ #define ENCODE \
20
+ if (enc2 != NULL) { \
21
+ field = rb_str_encode(field, rb_enc_from_encoding(enc), 0, Qnil); \
22
+ }
27
23
 
28
24
  static VALUE mModule, rb_eParseError;
29
- static ID s_read, s_to_str;
25
+ static ID s_read, s_to_str, s_internal_encoding, s_external_encoding, s_string, s_encoding;
30
26
 
31
27
 
32
- #line 139 "ext/fastcsv/fastcsv.rl"
28
+ #line 125 "ext/fastcsv/fastcsv.rl"
33
29
 
34
30
 
35
31
 
36
- #line 37 "ext/fastcsv/fastcsv.c"
32
+ #line 33 "ext/fastcsv/fastcsv.c"
37
33
  static const int fastcsv_start = 4;
38
34
  static const int fastcsv_first_final = 4;
39
35
  static const int fastcsv_error = 0;
@@ -41,10 +37,39 @@ static const int fastcsv_error = 0;
41
37
  static const int fastcsv_en_main = 4;
42
38
 
43
39
 
44
- #line 142 "ext/fastcsv/fastcsv.rl"
40
+ #line 128 "ext/fastcsv/fastcsv.rl"
45
41
 
42
+ // 16 kB
46
43
  #define BUFSIZE 16384
47
44
 
45
+ // @see http://rxr.whitequark.org/mri/source/io.c#4845
46
+ static void
47
+ rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
48
+ {
49
+ int default_ext = 0;
50
+
51
+ if (ext == NULL) {
52
+ ext = rb_default_external_encoding();
53
+ default_ext = 1;
54
+ }
55
+ if (ext == rb_ascii8bit_encoding()) {
56
+ /* If external is ASCII-8BIT, no transcoding */
57
+ intern = NULL;
58
+ }
59
+ else if (intern == NULL) {
60
+ intern = rb_default_internal_encoding();
61
+ }
62
+ if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) {
63
+ /* No internal encoding => use external + no transcoding */
64
+ *enc = (default_ext && intern != ext) ? NULL : ext;
65
+ *enc2 = NULL;
66
+ }
67
+ else {
68
+ *enc = intern;
69
+ *enc2 = ext;
70
+ }
71
+ }
72
+
48
73
  VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
49
74
  int cs, act, have = 0, curline = 1, io = 0;
50
75
  char *ts = 0, *te = 0, *buf = 0, *eof = 0;
@@ -52,11 +77,11 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
52
77
  VALUE port, opts;
53
78
  VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
54
79
  int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
55
- int internal_index = 0, external_index = rb_enc_to_index(rb_default_external_encoding());
56
- rb_encoding *external_encoding = rb_default_external_encoding();
80
+ rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL;
81
+ VALUE r_encoding;
57
82
 
58
83
  VALUE option;
59
- char quote_char = '"'; //, *col_sep = ",", *row_sep = "\r\n";
84
+ char quote_char = '"';
60
85
 
61
86
  rb_scan_args(argc, argv, "11", &port, &opts);
62
87
  taint = OBJ_TAINTED(port);
@@ -78,76 +103,111 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
78
103
  rb_raise(rb_eArgError, "options has to be a Hash or nil");
79
104
  }
80
105
 
81
- // @note Add machines for common CSV dialects, or see if we can use "when"
82
- // from Chapter 6 to compare the character to the host program's variable.
83
- // option = rb_hash_aref(opts, ID2SYM(rb_intern("quote_char")));
84
- // if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) {
85
- // quote_char = *StringValueCStr(option);
86
- // }
87
- // else if (!NIL_P(option)) {
88
- // rb_raise(rb_eArgError, ":quote_char has to be a single character String");
89
- // }
90
-
91
- // option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep")));
92
- // if (TYPE(option) == T_STRING) {
93
- // col_sep = StringValueCStr(option);
94
- // }
95
- // else if (!NIL_P(option)) {
96
- // rb_raise(rb_eArgError, ":col_sep has to be a String");
97
- // }
98
-
99
- // option = rb_hash_aref(opts, ID2SYM(rb_intern("row_sep")));
100
- // if (TYPE(option) == T_STRING) {
101
- // row_sep = StringValueCStr(option);
102
- // }
103
- // else if (!NIL_P(option)) {
104
- // rb_raise(rb_eArgError, ":row_sep has to be a String");
105
- // }
106
+ // @see rb_io_extract_modeenc
107
+ /* Set to defaults */
108
+ rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0);
106
109
 
110
+ // "enc" (internal) or "enc2:enc" (external:internal) or "enc:-" (external).
111
+ // We don't support binmode, which would force "ASCII-8BIT", or "BOM|UTF-*".
112
+ // @see http://ruby-doc.org/core-2.1.1/IO.html#method-c-new-label-Open+Mode
107
113
  option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
108
114
  if (TYPE(option) == T_STRING) {
109
- // @see parse_mode_enc in Ruby's io.c
110
- const char *string = StringValueCStr(option), *pointer;
111
- char internal_encoding_name[ENCODING_MAXNAMELEN + 1];
115
+ // parse_mode_enc is not in header file.
116
+ const char *estr = StringValueCStr(option), *ptr;
117
+ char encname[ENCODING_MAXNAMELEN+1];
118
+ int idx, idx2;
119
+ rb_encoding *ext_enc, *int_enc;
120
+
121
+ /* parse estr as "enc" or "enc2:enc" or "enc:-" */
112
122
 
113
- pointer = strrchr(string, ':');
114
- if (pointer) {
115
- long len = (pointer++) - string;
123
+ ptr = strrchr(estr, ':');
124
+ if (ptr) {
125
+ long len = (ptr++) - estr;
116
126
  if (len == 0 || len > ENCODING_MAXNAMELEN) {
117
- internal_index = -1;
127
+ idx = -1;
118
128
  }
119
129
  else {
120
- memcpy(internal_encoding_name, string, len);
121
- internal_encoding_name[len] = '\0';
122
- string = internal_encoding_name;
123
- internal_index = rb_enc_find_index(internal_encoding_name);
130
+ memcpy(encname, estr, len);
131
+ encname[len] = '\0';
132
+ estr = encname;
133
+ idx = rb_enc_find_index(encname);
124
134
  }
125
135
  }
126
136
  else {
127
- internal_index = rb_enc_find_index(string);
137
+ idx = rb_enc_find_index(estr);
128
138
  }
129
139
 
130
- if (internal_index < 0 && internal_index != -2) {
131
- rb_warn("Unsupported encoding %s ignored", string);
140
+ if (idx >= 0) {
141
+ ext_enc = rb_enc_from_index(idx);
142
+ }
143
+ else {
144
+ if (idx != -2) {
145
+ // `unsupported_encoding` is not in header file.
146
+ rb_warn("Unsupported encoding %s ignored", estr);
147
+ }
148
+ ext_enc = NULL;
132
149
  }
133
150
 
134
- if (pointer) {
135
- external_index = rb_enc_find_index(pointer);
136
- if (external_index >= 0) {
137
- external_encoding = rb_enc_from_index(external_index);
151
+ int_enc = NULL;
152
+ if (ptr) {
153
+ if (*ptr == '-' && *(ptr+1) == '\0') {
154
+ /* Special case - "-" => no transcoding */
155
+ int_enc = (rb_encoding *)Qnil;
138
156
  }
139
157
  else {
140
- rb_warn("Unsupported encoding %s ignored", string);
158
+ idx2 = rb_enc_find_index(ptr);
159
+ if (idx2 < 0) {
160
+ // `unsupported_encoding` is not in header file.
161
+ rb_warn("Unsupported encoding %s ignored", ptr);
162
+ }
163
+ else if (idx2 == idx) {
164
+ int_enc = (rb_encoding *)Qnil;
165
+ }
166
+ else {
167
+ int_enc = rb_enc_from_index(idx2);
168
+ }
141
169
  }
142
170
  }
143
- else if (internal_index >= 0) {
144
- external_encoding = rb_enc_from_index(internal_index);
145
- }
171
+
172
+ rb_io_ext_int_to_encs(ext_enc, int_enc, &enc, &enc2, 0);
146
173
  }
147
174
  else if (!NIL_P(option)) {
148
175
  rb_raise(rb_eArgError, ":encoding has to be a String");
149
176
  }
150
177
 
178
+ // @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L1567
179
+ // @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L2300
180
+ if (rb_respond_to(port, s_internal_encoding)) {
181
+ r_encoding = rb_funcall(port, s_internal_encoding, 0);
182
+ if (NIL_P(r_encoding)) {
183
+ r_encoding = rb_funcall(port, s_external_encoding, 0);
184
+ }
185
+ }
186
+ else if (rb_respond_to(port, s_string)) {
187
+ r_encoding = rb_funcall(rb_funcall(port, s_string, 0), s_encoding, 0);
188
+ }
189
+ else if (rb_respond_to(port, s_encoding)) {
190
+ r_encoding = rb_funcall(port, s_encoding, 0);
191
+ }
192
+ else {
193
+ r_encoding = rb_enc_from_encoding(rb_ascii8bit_encoding());
194
+ }
195
+ if (NIL_P(r_encoding)) {
196
+ r_encoding = rb_enc_from_encoding(rb_default_internal_encoding());
197
+ }
198
+ if (NIL_P(r_encoding)) {
199
+ r_encoding = rb_enc_from_encoding(rb_default_external_encoding());
200
+ }
201
+ if (enc2 != NULL) {
202
+ encoding = enc2;
203
+ }
204
+ else if (enc != NULL) {
205
+ encoding = enc;
206
+ }
207
+ else if (!NIL_P(r_encoding)) {
208
+ encoding = rb_enc_get(r_encoding);
209
+ }
210
+
151
211
  buffer_size = BUFSIZE;
152
212
  if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
153
213
  bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
@@ -161,7 +221,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
161
221
  }
162
222
 
163
223
 
164
- #line 165 "ext/fastcsv/fastcsv.c"
224
+ #line 225 "ext/fastcsv/fastcsv.c"
165
225
  {
166
226
  cs = fastcsv_start;
167
227
  ts = 0;
@@ -169,7 +229,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
169
229
  act = 0;
170
230
  }
171
231
 
172
- #line 261 "ext/fastcsv/fastcsv.rl"
232
+ #line 311 "ext/fastcsv/fastcsv.rl"
173
233
 
174
234
  while (!done) {
175
235
  VALUE str;
@@ -217,12 +277,8 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
217
277
  }
218
278
 
219
279
  pe = p + len;
220
- // if (done) {
221
- // // This triggers the eof action in the non-scanner version.
222
- // eof = pe;
223
- // }
224
280
 
225
- #line 226 "ext/fastcsv/fastcsv.c"
281
+ #line 282 "ext/fastcsv/fastcsv.c"
226
282
  {
227
283
  if ( p == pe )
228
284
  goto _test_eof;
@@ -241,7 +297,7 @@ tr0:
241
297
  }
242
298
  goto st4;
243
299
  tr10:
244
- #line 105 "ext/fastcsv/fastcsv.rl"
300
+ #line 101 "ext/fastcsv/fastcsv.rl"
245
301
  {
246
302
  if (!NIL_P(field) || RARRAY_LEN(row)) {
247
303
  rb_ary_push(row, field);
@@ -250,19 +306,19 @@ tr10:
250
306
  rb_yield(row);
251
307
  }
252
308
  }
253
- #line 129 "ext/fastcsv/fastcsv.rl"
309
+ #line 123 "ext/fastcsv/fastcsv.rl"
254
310
  {te = p+1;}
255
311
  goto st4;
256
312
  tr16:
257
- #line 129 "ext/fastcsv/fastcsv.rl"
313
+ #line 123 "ext/fastcsv/fastcsv.rl"
258
314
  {te = p;p--;}
259
315
  goto st4;
260
316
  tr17:
261
- #line 128 "ext/fastcsv/fastcsv.rl"
317
+ #line 122 "ext/fastcsv/fastcsv.rl"
262
318
  {te = p;p--;}
263
319
  goto st4;
264
320
  tr18:
265
- #line 105 "ext/fastcsv/fastcsv.rl"
321
+ #line 101 "ext/fastcsv/fastcsv.rl"
266
322
  {
267
323
  if (!NIL_P(field) || RARRAY_LEN(row)) {
268
324
  rb_ary_push(row, field);
@@ -271,15 +327,15 @@ tr18:
271
327
  rb_yield(row);
272
328
  }
273
329
  }
274
- #line 128 "ext/fastcsv/fastcsv.rl"
330
+ #line 122 "ext/fastcsv/fastcsv.rl"
275
331
  {te = p+1;}
276
332
  goto st4;
277
333
  tr20:
278
- #line 127 "ext/fastcsv/fastcsv.rl"
334
+ #line 121 "ext/fastcsv/fastcsv.rl"
279
335
  {te = p;p--;}
280
336
  goto st4;
281
337
  tr21:
282
- #line 105 "ext/fastcsv/fastcsv.rl"
338
+ #line 101 "ext/fastcsv/fastcsv.rl"
283
339
  {
284
340
  if (!NIL_P(field) || RARRAY_LEN(row)) {
285
341
  rb_ary_push(row, field);
@@ -288,7 +344,7 @@ tr21:
288
344
  rb_yield(row);
289
345
  }
290
346
  }
291
- #line 127 "ext/fastcsv/fastcsv.rl"
347
+ #line 121 "ext/fastcsv/fastcsv.rl"
292
348
  {te = p+1;}
293
349
  goto st4;
294
350
  st4:
@@ -301,7 +357,7 @@ st4:
301
357
  case 4:
302
358
  #line 1 "NONE"
303
359
  {ts = p;}
304
- #line 305 "ext/fastcsv/fastcsv.c"
360
+ #line 361 "ext/fastcsv/fastcsv.c"
305
361
  switch( (*p) ) {
306
362
  case 0: goto tr14;
307
363
  case 10: goto tr3;
@@ -325,18 +381,18 @@ case 1:
325
381
  tr2:
326
382
  #line 1 "NONE"
327
383
  {te = p+1;}
328
- #line 44 "ext/fastcsv/fastcsv.rl"
384
+ #line 40 "ext/fastcsv/fastcsv.rl"
329
385
  {
330
386
  if (p == ts) {
331
387
  // Unquoted empty fields are nil, not "", in Ruby.
332
388
  field = Qnil;
333
389
  }
334
390
  else if (p > ts) {
335
- field = rb_str_new(ts, p - ts);
336
- ASSOCIATE_INDEX;
391
+ field = rb_enc_str_new(ts, p - ts, encoding);
392
+ ENCODE;
337
393
  }
338
394
  }
339
- #line 105 "ext/fastcsv/fastcsv.rl"
395
+ #line 101 "ext/fastcsv/fastcsv.rl"
340
396
  {
341
397
  if (!NIL_P(field) || RARRAY_LEN(row)) {
342
398
  rb_ary_push(row, field);
@@ -345,14 +401,14 @@ tr2:
345
401
  rb_yield(row);
346
402
  }
347
403
  }
348
- #line 129 "ext/fastcsv/fastcsv.rl"
404
+ #line 123 "ext/fastcsv/fastcsv.rl"
349
405
  {act = 3;}
350
406
  goto st5;
351
407
  st5:
352
408
  if ( ++p == pe )
353
409
  goto _test_eof5;
354
410
  case 5:
355
- #line 356 "ext/fastcsv/fastcsv.c"
411
+ #line 412 "ext/fastcsv/fastcsv.c"
356
412
  switch( (*p) ) {
357
413
  case 0: goto tr2;
358
414
  case 10: goto tr3;
@@ -362,18 +418,18 @@ case 5:
362
418
  }
363
419
  goto st1;
364
420
  tr3:
365
- #line 44 "ext/fastcsv/fastcsv.rl"
421
+ #line 40 "ext/fastcsv/fastcsv.rl"
366
422
  {
367
423
  if (p == ts) {
368
424
  // Unquoted empty fields are nil, not "", in Ruby.
369
425
  field = Qnil;
370
426
  }
371
427
  else if (p > ts) {
372
- field = rb_str_new(ts, p - ts);
373
- ASSOCIATE_INDEX;
428
+ field = rb_enc_str_new(ts, p - ts, encoding);
429
+ ENCODE;
374
430
  }
375
431
  }
376
- #line 95 "ext/fastcsv/fastcsv.rl"
432
+ #line 91 "ext/fastcsv/fastcsv.rl"
377
433
  {
378
434
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
379
435
  rb_ary_push(row, field);
@@ -383,19 +439,19 @@ tr3:
383
439
  rb_yield(row);
384
440
  row = rb_ary_new();
385
441
  }
386
- #line 32 "ext/fastcsv/fastcsv.rl"
442
+ #line 28 "ext/fastcsv/fastcsv.rl"
387
443
  {
388
444
  curline++;
389
445
  }
390
446
  goto st6;
391
447
  tr19:
392
- #line 32 "ext/fastcsv/fastcsv.rl"
448
+ #line 28 "ext/fastcsv/fastcsv.rl"
393
449
  {
394
450
  curline++;
395
451
  }
396
452
  goto st6;
397
453
  tr11:
398
- #line 95 "ext/fastcsv/fastcsv.rl"
454
+ #line 91 "ext/fastcsv/fastcsv.rl"
399
455
  {
400
456
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
401
457
  rb_ary_push(row, field);
@@ -405,7 +461,7 @@ tr11:
405
461
  rb_yield(row);
406
462
  row = rb_ary_new();
407
463
  }
408
- #line 32 "ext/fastcsv/fastcsv.rl"
464
+ #line 28 "ext/fastcsv/fastcsv.rl"
409
465
  {
410
466
  curline++;
411
467
  }
@@ -414,23 +470,23 @@ st6:
414
470
  if ( ++p == pe )
415
471
  goto _test_eof6;
416
472
  case 6:
417
- #line 418 "ext/fastcsv/fastcsv.c"
473
+ #line 474 "ext/fastcsv/fastcsv.c"
418
474
  if ( (*p) == 0 )
419
475
  goto tr18;
420
476
  goto tr17;
421
477
  tr4:
422
- #line 44 "ext/fastcsv/fastcsv.rl"
478
+ #line 40 "ext/fastcsv/fastcsv.rl"
423
479
  {
424
480
  if (p == ts) {
425
481
  // Unquoted empty fields are nil, not "", in Ruby.
426
482
  field = Qnil;
427
483
  }
428
484
  else if (p > ts) {
429
- field = rb_str_new(ts, p - ts);
430
- ASSOCIATE_INDEX;
485
+ field = rb_enc_str_new(ts, p - ts, encoding);
486
+ ENCODE;
431
487
  }
432
488
  }
433
- #line 95 "ext/fastcsv/fastcsv.rl"
489
+ #line 91 "ext/fastcsv/fastcsv.rl"
434
490
  {
435
491
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
436
492
  rb_ary_push(row, field);
@@ -440,13 +496,13 @@ tr4:
440
496
  rb_yield(row);
441
497
  row = rb_ary_new();
442
498
  }
443
- #line 32 "ext/fastcsv/fastcsv.rl"
499
+ #line 28 "ext/fastcsv/fastcsv.rl"
444
500
  {
445
501
  curline++;
446
502
  }
447
503
  goto st7;
448
504
  tr12:
449
- #line 95 "ext/fastcsv/fastcsv.rl"
505
+ #line 91 "ext/fastcsv/fastcsv.rl"
450
506
  {
451
507
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
452
508
  rb_ary_push(row, field);
@@ -456,7 +512,7 @@ tr12:
456
512
  rb_yield(row);
457
513
  row = rb_ary_new();
458
514
  }
459
- #line 32 "ext/fastcsv/fastcsv.rl"
515
+ #line 28 "ext/fastcsv/fastcsv.rl"
460
516
  {
461
517
  curline++;
462
518
  }
@@ -465,32 +521,32 @@ st7:
465
521
  if ( ++p == pe )
466
522
  goto _test_eof7;
467
523
  case 7:
468
- #line 469 "ext/fastcsv/fastcsv.c"
524
+ #line 525 "ext/fastcsv/fastcsv.c"
469
525
  switch( (*p) ) {
470
526
  case 0: goto tr18;
471
527
  case 10: goto tr19;
472
528
  }
473
529
  goto tr17;
474
530
  tr5:
475
- #line 44 "ext/fastcsv/fastcsv.rl"
531
+ #line 40 "ext/fastcsv/fastcsv.rl"
476
532
  {
477
533
  if (p == ts) {
478
534
  // Unquoted empty fields are nil, not "", in Ruby.
479
535
  field = Qnil;
480
536
  }
481
537
  else if (p > ts) {
482
- field = rb_str_new(ts, p - ts);
483
- ASSOCIATE_INDEX;
538
+ field = rb_enc_str_new(ts, p - ts, encoding);
539
+ ENCODE;
484
540
  }
485
541
  }
486
- #line 90 "ext/fastcsv/fastcsv.rl"
542
+ #line 86 "ext/fastcsv/fastcsv.rl"
487
543
  {
488
544
  rb_ary_push(row, field);
489
545
  field = Qnil;
490
546
  }
491
547
  goto st8;
492
548
  tr13:
493
- #line 90 "ext/fastcsv/fastcsv.rl"
549
+ #line 86 "ext/fastcsv/fastcsv.rl"
494
550
  {
495
551
  rb_ary_push(row, field);
496
552
  field = Qnil;
@@ -500,14 +556,14 @@ st8:
500
556
  if ( ++p == pe )
501
557
  goto _test_eof8;
502
558
  case 8:
503
- #line 504 "ext/fastcsv/fastcsv.c"
559
+ #line 560 "ext/fastcsv/fastcsv.c"
504
560
  if ( (*p) == 0 )
505
561
  goto tr21;
506
562
  goto tr20;
507
563
  tr14:
508
564
  #line 1 "NONE"
509
565
  {te = p+1;}
510
- #line 105 "ext/fastcsv/fastcsv.rl"
566
+ #line 101 "ext/fastcsv/fastcsv.rl"
511
567
  {
512
568
  if (!NIL_P(field) || RARRAY_LEN(row)) {
513
569
  rb_ary_push(row, field);
@@ -516,25 +572,25 @@ tr14:
516
572
  rb_yield(row);
517
573
  }
518
574
  }
519
- #line 44 "ext/fastcsv/fastcsv.rl"
575
+ #line 40 "ext/fastcsv/fastcsv.rl"
520
576
  {
521
577
  if (p == ts) {
522
578
  // Unquoted empty fields are nil, not "", in Ruby.
523
579
  field = Qnil;
524
580
  }
525
581
  else if (p > ts) {
526
- field = rb_str_new(ts, p - ts);
527
- ASSOCIATE_INDEX;
582
+ field = rb_enc_str_new(ts, p - ts, encoding);
583
+ ENCODE;
528
584
  }
529
585
  }
530
- #line 129 "ext/fastcsv/fastcsv.rl"
586
+ #line 123 "ext/fastcsv/fastcsv.rl"
531
587
  {act = 3;}
532
588
  goto st9;
533
589
  st9:
534
590
  if ( ++p == pe )
535
591
  goto _test_eof9;
536
592
  case 9:
537
- #line 538 "ext/fastcsv/fastcsv.c"
593
+ #line 594 "ext/fastcsv/fastcsv.c"
538
594
  switch( (*p) ) {
539
595
  case 10: goto tr16;
540
596
  case 13: goto tr16;
@@ -543,13 +599,13 @@ case 9:
543
599
  }
544
600
  goto st1;
545
601
  tr8:
546
- #line 32 "ext/fastcsv/fastcsv.rl"
602
+ #line 28 "ext/fastcsv/fastcsv.rl"
547
603
  {
548
604
  curline++;
549
605
  }
550
606
  goto st2;
551
607
  tr15:
552
- #line 36 "ext/fastcsv/fastcsv.rl"
608
+ #line 32 "ext/fastcsv/fastcsv.rl"
553
609
  {
554
610
  unclosed_line = curline;
555
611
  }
@@ -558,7 +614,7 @@ st2:
558
614
  if ( ++p == pe )
559
615
  goto _test_eof2;
560
616
  case 2:
561
- #line 562 "ext/fastcsv/fastcsv.c"
617
+ #line 618 "ext/fastcsv/fastcsv.c"
562
618
  switch( (*p) ) {
563
619
  case 0: goto st0;
564
620
  case 10: goto tr8;
@@ -570,11 +626,11 @@ st0:
570
626
  cs = 0;
571
627
  goto _out;
572
628
  tr9:
573
- #line 55 "ext/fastcsv/fastcsv.rl"
629
+ #line 51 "ext/fastcsv/fastcsv.rl"
574
630
  {
575
631
  if (p == ts) {
576
- field = rb_str_new2("");
577
- ASSOCIATE_INDEX;
632
+ field = rb_enc_str_new("", 0, encoding);
633
+ ENCODE;
578
634
  }
579
635
  // @note If we add an action on '""', we can skip some steps if no '""' is found.
580
636
  else if (p > ts) {
@@ -597,15 +653,15 @@ tr9:
597
653
  reader++;
598
654
  }
599
655
 
600
- field = rb_str_new(copy, writer - copy);
601
- ASSOCIATE_INDEX;
656
+ field = rb_enc_str_new(copy, writer - copy, enc);
657
+ ENCODE;
602
658
 
603
659
  if (copy != NULL) {
604
660
  free(copy);
605
661
  }
606
662
  }
607
663
  }
608
- #line 40 "ext/fastcsv/fastcsv.rl"
664
+ #line 36 "ext/fastcsv/fastcsv.rl"
609
665
  {
610
666
  unclosed_line = 0;
611
667
  }
@@ -614,7 +670,7 @@ st3:
614
670
  if ( ++p == pe )
615
671
  goto _test_eof3;
616
672
  case 3:
617
- #line 618 "ext/fastcsv/fastcsv.c"
673
+ #line 674 "ext/fastcsv/fastcsv.c"
618
674
  switch( (*p) ) {
619
675
  case 0: goto tr10;
620
676
  case 10: goto tr11;
@@ -650,7 +706,7 @@ case 3:
650
706
  _out: {}
651
707
  }
652
708
 
653
- #line 313 "ext/fastcsv/fastcsv.rl"
709
+ #line 359 "ext/fastcsv/fastcsv.rl"
654
710
 
655
711
  if (done && cs < fastcsv_first_final) {
656
712
  if (buf != NULL) {
@@ -689,6 +745,10 @@ case 3:
689
745
  void Init_fastcsv() {
690
746
  s_read = rb_intern("read");
691
747
  s_to_str = rb_intern("to_str");
748
+ s_internal_encoding = rb_intern("internal_encoding");
749
+ s_external_encoding = rb_intern("external_encoding");
750
+ s_string = rb_intern("string");
751
+ s_encoding = rb_intern("encoding");
692
752
 
693
753
  mModule = rb_define_module("FastCSV");
694
754
  rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
@@ -14,17 +14,13 @@
14
14
  // Ragel help.
15
15
  // https://www.mail-archive.com/ragel-users@complang.org/
16
16
 
17
- # define ASSOCIATE_INDEX \
18
- if (internal_index >= 0) { \
19
- rb_enc_associate_index(field, internal_index); \
20
- field = rb_str_encode(field, rb_enc_from_encoding(external_encoding), 0, Qnil); \
21
- } \
22
- else { \
23
- rb_enc_associate_index(field, rb_enc_to_index(external_encoding)); \
24
- }
17
+ #define ENCODE \
18
+ if (enc2 != NULL) { \
19
+ field = rb_str_encode(field, rb_enc_from_encoding(enc), 0, Qnil); \
20
+ }
25
21
 
26
22
  static VALUE mModule, rb_eParseError;
27
- static ID s_read, s_to_str;
23
+ static ID s_read, s_to_str, s_internal_encoding, s_external_encoding, s_string, s_encoding;
28
24
 
29
25
  %%{
30
26
  machine fastcsv;
@@ -47,15 +43,15 @@ static ID s_read, s_to_str;
47
43
  field = Qnil;
48
44
  }
49
45
  else if (p > ts) {
50
- field = rb_str_new(ts, p - ts);
51
- ASSOCIATE_INDEX;
46
+ field = rb_enc_str_new(ts, p - ts, encoding);
47
+ ENCODE;
52
48
  }
53
49
  }
54
50
 
55
51
  action read_quoted {
56
52
  if (p == ts) {
57
- field = rb_str_new2("");
58
- ASSOCIATE_INDEX;
53
+ field = rb_enc_str_new("", 0, encoding);
54
+ ENCODE;
59
55
  }
60
56
  // @note If we add an action on '""', we can skip some steps if no '""' is found.
61
57
  else if (p > ts) {
@@ -78,8 +74,8 @@ static ID s_read, s_to_str;
78
74
  reader++;
79
75
  }
80
76
 
81
- field = rb_str_new(copy, writer - copy);
82
- ASSOCIATE_INDEX;
77
+ field = rb_enc_str_new(copy, writer - copy, enc);
78
+ ENCODE;
83
79
 
84
80
  if (copy != NULL) {
85
81
  free(copy);
@@ -118,30 +114,49 @@ static ID s_read, s_to_str;
118
114
  unquoted = (any* -- quote_char -- col_sep -- row_sep - EOF) %read_unquoted;
119
115
  quoted = quote_char >open_quote (any - quote_char - EOF | quote_char quote_char | row_sep)* %read_quoted quote_char >close_quote;
120
116
  field = unquoted | quoted;
121
- # fields = (field col_sep)* field?;
122
- # file = (fields row_sep >new_row)* fields?;
123
117
 
124
118
  # @see Ragel Guide: 6.3 Scanners
125
- # Remember that an unquoted field can be zero-length.
119
+ # An unquoted field can be zero-length.
126
120
  main := |*
127
121
  field col_sep EOF?;
128
122
  field row_sep >new_row EOF?;
129
123
  field EOF;
130
124
  *|;
131
-
132
- # Non-scanner version requires very large buffer.
133
- # main := file $/{
134
- # if (!NIL_P(field) || RARRAY_LEN(row)) {
135
- # rb_ary_push(row, field);
136
- # rb_yield(row);
137
- # }
138
- # };
139
125
  }%%
140
126
 
141
127
  %% write data;
142
128
 
129
+ // 16 kB
143
130
  #define BUFSIZE 16384
144
131
 
132
+ // @see http://rxr.whitequark.org/mri/source/io.c#4845
133
+ static void
134
+ rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
135
+ {
136
+ int default_ext = 0;
137
+
138
+ if (ext == NULL) {
139
+ ext = rb_default_external_encoding();
140
+ default_ext = 1;
141
+ }
142
+ if (ext == rb_ascii8bit_encoding()) {
143
+ /* If external is ASCII-8BIT, no transcoding */
144
+ intern = NULL;
145
+ }
146
+ else if (intern == NULL) {
147
+ intern = rb_default_internal_encoding();
148
+ }
149
+ if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) {
150
+ /* No internal encoding => use external + no transcoding */
151
+ *enc = (default_ext && intern != ext) ? NULL : ext;
152
+ *enc2 = NULL;
153
+ }
154
+ else {
155
+ *enc = intern;
156
+ *enc2 = ext;
157
+ }
158
+ }
159
+
145
160
  VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
146
161
  int cs, act, have = 0, curline = 1, io = 0;
147
162
  char *ts = 0, *te = 0, *buf = 0, *eof = 0;
@@ -149,11 +164,11 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
149
164
  VALUE port, opts;
150
165
  VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
151
166
  int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
152
- int internal_index = 0, external_index = rb_enc_to_index(rb_default_external_encoding());
153
- rb_encoding *external_encoding = rb_default_external_encoding();
167
+ rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL;
168
+ VALUE r_encoding;
154
169
 
155
170
  VALUE option;
156
- char quote_char = '"'; //, *col_sep = ",", *row_sep = "\r\n";
171
+ char quote_char = '"';
157
172
 
158
173
  rb_scan_args(argc, argv, "11", &port, &opts);
159
174
  taint = OBJ_TAINTED(port);
@@ -175,76 +190,111 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
175
190
  rb_raise(rb_eArgError, "options has to be a Hash or nil");
176
191
  }
177
192
 
178
- // @note Add machines for common CSV dialects, or see if we can use "when"
179
- // from Chapter 6 to compare the character to the host program's variable.
180
- // option = rb_hash_aref(opts, ID2SYM(rb_intern("quote_char")));
181
- // if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) {
182
- // quote_char = *StringValueCStr(option);
183
- // }
184
- // else if (!NIL_P(option)) {
185
- // rb_raise(rb_eArgError, ":quote_char has to be a single character String");
186
- // }
187
-
188
- // option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep")));
189
- // if (TYPE(option) == T_STRING) {
190
- // col_sep = StringValueCStr(option);
191
- // }
192
- // else if (!NIL_P(option)) {
193
- // rb_raise(rb_eArgError, ":col_sep has to be a String");
194
- // }
195
-
196
- // option = rb_hash_aref(opts, ID2SYM(rb_intern("row_sep")));
197
- // if (TYPE(option) == T_STRING) {
198
- // row_sep = StringValueCStr(option);
199
- // }
200
- // else if (!NIL_P(option)) {
201
- // rb_raise(rb_eArgError, ":row_sep has to be a String");
202
- // }
193
+ // @see rb_io_extract_modeenc
194
+ /* Set to defaults */
195
+ rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0);
203
196
 
197
+ // "enc" (internal) or "enc2:enc" (external:internal) or "enc:-" (external).
198
+ // We don't support binmode, which would force "ASCII-8BIT", or "BOM|UTF-*".
199
+ // @see http://ruby-doc.org/core-2.1.1/IO.html#method-c-new-label-Open+Mode
204
200
  option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
205
201
  if (TYPE(option) == T_STRING) {
206
- // @see parse_mode_enc in Ruby's io.c
207
- const char *string = StringValueCStr(option), *pointer;
208
- char internal_encoding_name[ENCODING_MAXNAMELEN + 1];
202
+ // parse_mode_enc is not in header file.
203
+ const char *estr = StringValueCStr(option), *ptr;
204
+ char encname[ENCODING_MAXNAMELEN+1];
205
+ int idx, idx2;
206
+ rb_encoding *ext_enc, *int_enc;
207
+
208
+ /* parse estr as "enc" or "enc2:enc" or "enc:-" */
209
209
 
210
- pointer = strrchr(string, ':');
211
- if (pointer) {
212
- long len = (pointer++) - string;
210
+ ptr = strrchr(estr, ':');
211
+ if (ptr) {
212
+ long len = (ptr++) - estr;
213
213
  if (len == 0 || len > ENCODING_MAXNAMELEN) {
214
- internal_index = -1;
214
+ idx = -1;
215
215
  }
216
216
  else {
217
- memcpy(internal_encoding_name, string, len);
218
- internal_encoding_name[len] = '\0';
219
- string = internal_encoding_name;
220
- internal_index = rb_enc_find_index(internal_encoding_name);
217
+ memcpy(encname, estr, len);
218
+ encname[len] = '\0';
219
+ estr = encname;
220
+ idx = rb_enc_find_index(encname);
221
221
  }
222
222
  }
223
223
  else {
224
- internal_index = rb_enc_find_index(string);
224
+ idx = rb_enc_find_index(estr);
225
225
  }
226
226
 
227
- if (internal_index < 0 && internal_index != -2) {
228
- rb_warn("Unsupported encoding %s ignored", string);
227
+ if (idx >= 0) {
228
+ ext_enc = rb_enc_from_index(idx);
229
+ }
230
+ else {
231
+ if (idx != -2) {
232
+ // `unsupported_encoding` is not in header file.
233
+ rb_warn("Unsupported encoding %s ignored", estr);
234
+ }
235
+ ext_enc = NULL;
229
236
  }
230
237
 
231
- if (pointer) {
232
- external_index = rb_enc_find_index(pointer);
233
- if (external_index >= 0) {
234
- external_encoding = rb_enc_from_index(external_index);
238
+ int_enc = NULL;
239
+ if (ptr) {
240
+ if (*ptr == '-' && *(ptr+1) == '\0') {
241
+ /* Special case - "-" => no transcoding */
242
+ int_enc = (rb_encoding *)Qnil;
235
243
  }
236
244
  else {
237
- rb_warn("Unsupported encoding %s ignored", string);
245
+ idx2 = rb_enc_find_index(ptr);
246
+ if (idx2 < 0) {
247
+ // `unsupported_encoding` is not in header file.
248
+ rb_warn("Unsupported encoding %s ignored", ptr);
249
+ }
250
+ else if (idx2 == idx) {
251
+ int_enc = (rb_encoding *)Qnil;
252
+ }
253
+ else {
254
+ int_enc = rb_enc_from_index(idx2);
255
+ }
238
256
  }
239
257
  }
240
- else if (internal_index >= 0) {
241
- external_encoding = rb_enc_from_index(internal_index);
242
- }
258
+
259
+ rb_io_ext_int_to_encs(ext_enc, int_enc, &enc, &enc2, 0);
243
260
  }
244
261
  else if (!NIL_P(option)) {
245
262
  rb_raise(rb_eArgError, ":encoding has to be a String");
246
263
  }
247
264
 
265
+ // @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L1567
266
+ // @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L2300
267
+ if (rb_respond_to(port, s_internal_encoding)) {
268
+ r_encoding = rb_funcall(port, s_internal_encoding, 0);
269
+ if (NIL_P(r_encoding)) {
270
+ r_encoding = rb_funcall(port, s_external_encoding, 0);
271
+ }
272
+ }
273
+ else if (rb_respond_to(port, s_string)) {
274
+ r_encoding = rb_funcall(rb_funcall(port, s_string, 0), s_encoding, 0);
275
+ }
276
+ else if (rb_respond_to(port, s_encoding)) {
277
+ r_encoding = rb_funcall(port, s_encoding, 0);
278
+ }
279
+ else {
280
+ r_encoding = rb_enc_from_encoding(rb_ascii8bit_encoding());
281
+ }
282
+ if (NIL_P(r_encoding)) {
283
+ r_encoding = rb_enc_from_encoding(rb_default_internal_encoding());
284
+ }
285
+ if (NIL_P(r_encoding)) {
286
+ r_encoding = rb_enc_from_encoding(rb_default_external_encoding());
287
+ }
288
+ if (enc2 != NULL) {
289
+ encoding = enc2;
290
+ }
291
+ else if (enc != NULL) {
292
+ encoding = enc;
293
+ }
294
+ else if (!NIL_P(r_encoding)) {
295
+ encoding = rb_enc_get(r_encoding);
296
+ }
297
+
248
298
  buffer_size = BUFSIZE;
249
299
  if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
250
300
  bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
@@ -305,10 +355,6 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
305
355
  }
306
356
 
307
357
  pe = p + len;
308
- // if (done) {
309
- // // This triggers the eof action in the non-scanner version.
310
- // eof = pe;
311
- // }
312
358
  %% write exec;
313
359
 
314
360
  if (done && cs < fastcsv_first_final) {
@@ -348,6 +394,10 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
348
394
  void Init_fastcsv() {
349
395
  s_read = rb_intern("read");
350
396
  s_to_str = rb_intern("to_str");
397
+ s_internal_encoding = rb_intern("internal_encoding");
398
+ s_external_encoding = rb_intern("external_encoding");
399
+ s_string = rb_intern("string");
400
+ s_encoding = rb_intern("encoding");
351
401
 
352
402
  mModule = rb_define_module("FastCSV");
353
403
  rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "fastcsv"
5
- s.version = '0.0.1'
5
+ s.version = '0.0.2'
6
6
  s.platform = Gem::Platform::RUBY
7
7
  s.authors = ["Open North"]
8
8
  s.email = ["info@opennorth.ca"]
@@ -57,6 +57,9 @@ RSpec.shared_examples 'a CSV parser' do
57
57
  %(foo,"bar\nbaz",bzz),
58
58
  %(foo,"""bar""baz""bzz""",zzz),
59
59
 
60
+ # Single quotes.
61
+ %('foo','bar','baz'),
62
+
60
63
  # Buffers.
61
64
  "01234567890" * 2_000, # 20,000 > BUFSIZE
62
65
  "0123456789," * 2_000,
@@ -68,7 +71,7 @@ RSpec.shared_examples 'a CSV parser' do
68
71
  # Uneven data types.
69
72
  "2000-01-01,2,x\nx,2000-01-01,2",
70
73
  ].each do |csv|
71
- it "should parse: #{csv}" do
74
+ it "should parse: #{csv.inspect.gsub('\"', '"')}" do
72
75
  expect(parse(csv)).to eq(CSV.parse(csv))
73
76
  end
74
77
  end
@@ -112,34 +115,45 @@ RSpec.shared_examples 'a CSV parser' do
112
115
  end
113
116
  end
114
117
 
118
+ it "should parse an encoded string" do
119
+ csv = "ß"
120
+ actual = parse(csv)
121
+ expected = CSV.parse(csv)
122
+ expect(actual[0][0].encoding).to eq(expected[0][0].encoding)
123
+ expect(actual).to eq(expected)
124
+ end
125
+
115
126
  it 'should raise an error on mixed row separators are' do
116
- csv = "foo\rbar\nbaz\r\n"
117
- expect{CSV.parse(csv)}.to raise_error(CSV::MalformedCSVError, 'Unquoted fields do not allow \r or \n (line 2).')
127
+ expect{CSV.parse("foo\rbar\nbaz\r\n")}.to raise_error(CSV::MalformedCSVError, 'Unquoted fields do not allow \r or \n (line 2).')
118
128
  skip
119
129
  end
120
130
 
121
- it 'should raise an error if no block is given' do
122
- expect{parse_without_block('x')}.to raise_error(LocalJumpError, 'no block given')
123
- end
131
+ context 'when initializing' do
132
+ it 'should raise an error if no block is given' do
133
+ expect{parse_without_block('x')}.to raise_error(LocalJumpError, 'no block given')
134
+ end
124
135
 
125
- it 'should not raise an error if no block and empty input' do
126
- expect{parse_without_block('')}.to_not raise_error
127
- end
136
+ it 'should not raise an error if no block and empty input' do
137
+ expect{parse_without_block('')}.to_not raise_error
138
+ end
128
139
 
129
- it 'should raise an error if the options are not a Hash or nil' do
130
- expect{parse('', '')}.to raise_error(ArgumentError, 'options has to be a Hash or nil')
140
+ it 'should raise an error if the options are not a Hash or nil' do
141
+ expect{parse('', '')}.to raise_error(ArgumentError, 'options has to be a Hash or nil')
142
+ end
131
143
  end
132
144
 
133
- it 'should allow nil buffer size' do
134
- FastCSV.buffer_size = nil
135
- expect(parse(simple)).to eq(CSV.parse(simple))
136
- FastCSV.buffer_size = nil
137
- end
145
+ context 'when setting a buffer size' do
146
+ it 'should allow nil' do
147
+ FastCSV.buffer_size = nil
148
+ expect(parse(simple)).to eq(CSV.parse(simple))
149
+ FastCSV.buffer_size = nil
150
+ end
138
151
 
139
- it 'should recover from a zero buffer size' do
140
- FastCSV.buffer_size = 0
141
- expect(parse(simple)).to eq(CSV.parse(simple))
142
- FastCSV.buffer_size = nil
152
+ it 'should allow zero' do
153
+ FastCSV.buffer_size = 0
154
+ expect(parse(simple)).to eq(CSV.parse(simple))
155
+ FastCSV.buffer_size = nil
156
+ end
143
157
  end
144
158
  end
145
159
 
@@ -184,35 +198,47 @@ RSpec.describe FastCSV do
184
198
  end
185
199
  end
186
200
 
187
- def parse_with_encoding(basename, encoding)
188
- filename = File.expand_path(File.join('..', 'fixtures', basename), __FILE__)
189
- options = {encoding: encoding}
190
- File.open(filename) do |io|
191
- rows = []
192
- FastCSV.raw_parse(io, options){|row| rows << row}
193
- expected = CSV.read(filename, options)
194
- expect(rows).to eq(expected)
195
- expect(rows[0][0].encoding).to eq(expected[0][0].encoding)
201
+ context 'with encoded strings' do
202
+ def parse_with_encoding(basename, encoding)
203
+ filename = File.expand_path(File.join('..', 'fixtures', basename), __FILE__)
204
+ options = {encoding: encoding}
205
+ File.open(filename) do |io|
206
+ rows = []
207
+ FastCSV.raw_parse(io, options){|row| rows << row}
208
+ expected = CSV.read(filename, options)
209
+ expect(rows[0][0].encoding).to eq(expected[0][0].encoding)
210
+ expect(rows).to eq(expected)
211
+ end
196
212
  end
197
- end
198
213
 
199
- it 'should encode the input' do
200
- parse_with_encoding('iso-8859-1.csv', 'iso-8859-1')
201
- end
214
+ it 'should encode' do
215
+ parse_with_encoding('iso-8859-1.csv', 'iso-8859-1')
216
+ end
202
217
 
203
- it 'should encode the input with a blank internal encoding' do
204
- parse_with_encoding('utf-8.csv', ':utf-8')
205
- end
218
+ it 'should transcode' do
219
+ parse_with_encoding('iso-8859-1.csv', 'iso-8859-1:utf-8')
220
+ end
206
221
 
207
- it 'should transcode the input' do
208
- parse_with_encoding('iso-8859-1.csv', 'iso-8859-1:utf-8')
209
- end
222
+ it 'should recover from blank external encoding' do
223
+ parse_with_encoding('utf-8.csv', ':utf-8')
224
+ end
225
+
226
+ it 'should recover from invalid internal encoding' do
227
+ parse_with_encoding('utf-8.csv', 'invalid')
228
+ end
210
229
 
211
- it 'should invalid encoding' do
212
- parse_with_encoding('utf-8.csv', 'invalid')
230
+ it 'should recover from invalid external encoding' do
231
+ parse_with_encoding('utf-8.csv', 'invalid:-')
232
+ end
233
+
234
+ it 'should recover from invalid encodings' do
235
+ parse_with_encoding('utf-8.csv', 'invalid:invalid')
236
+ end
213
237
  end
214
238
 
215
- it 'should raise an error if the input is not a String or IO' do
216
- expect{FastCSV.raw_parse(nil)}.to raise_error(ArgumentError, 'data has to respond to #read or #to_str')
239
+ context 'when initializing' do
240
+ it 'should raise an error if the input is not a String or IO' do
241
+ expect{FastCSV.raw_parse(nil)}.to raise_error(ArgumentError, 'data has to respond to #read or #to_str')
242
+ end
217
243
  end
218
244
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fastcsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Open North