fastcsv 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e5d991d026c76068b9e646ba62cefdad823f01e1
4
- data.tar.gz: 8815f0bb3b00e01593f2a46320cf58c88926099c
3
+ metadata.gz: 129c6ed1d3b30a44456108f280a582ccdaac96e9
4
+ data.tar.gz: 4d819f3bb6e637cb5fb3e130c378583202f8d3ee
5
5
  SHA512:
6
- metadata.gz: 8762ce01e3e5af4cd0395bf541879db46f677f79201e2f44dc5f35dd30514c53fa2c2c5808ec61149898d64bff908a74fe2153c690a04e75c82ba7306794fa15
7
- data.tar.gz: e6dd0a3f89f9d330428fbc8d6f1b469d9f3a8ca255561c6fa972d1ad2147422bd090f3c7a373d08d29b07748f781f9b8d70fae27a092a075d183de897094eea5
6
+ metadata.gz: 8a960b458260e864346755a7b00afca9735e8851f70b9ebe3c4d95e1c5300c016fda9b1db5ff7c39cfcc288fdfa51ec038b5796eb12d01c8a7a7cb0d24ae1fe3
7
+ data.tar.gz: 76612ddd0aedef55ca914a5de6b141d9d274c395ec3b9fcc28897e4ca2762ade22759e078446f6ef8ee673ff96954f328e9d23a645e7ba89fe0168ae75e6e7dc
@@ -16,24 +16,20 @@
16
16
  // Ragel help.
17
17
  // https://www.mail-archive.com/ragel-users@complang.org/
18
18
 
19
- # define ASSOCIATE_INDEX \
20
- if (internal_index >= 0) { \
21
- rb_enc_associate_index(field, internal_index); \
22
- field = rb_str_encode(field, rb_enc_from_encoding(external_encoding), 0, Qnil); \
23
- } \
24
- else { \
25
- rb_enc_associate_index(field, rb_enc_to_index(external_encoding)); \
26
- }
19
+ #define ENCODE \
20
+ if (enc2 != NULL) { \
21
+ field = rb_str_encode(field, rb_enc_from_encoding(enc), 0, Qnil); \
22
+ }
27
23
 
28
24
  static VALUE mModule, rb_eParseError;
29
- static ID s_read, s_to_str;
25
+ static ID s_read, s_to_str, s_internal_encoding, s_external_encoding, s_string, s_encoding;
30
26
 
31
27
 
32
- #line 139 "ext/fastcsv/fastcsv.rl"
28
+ #line 125 "ext/fastcsv/fastcsv.rl"
33
29
 
34
30
 
35
31
 
36
- #line 37 "ext/fastcsv/fastcsv.c"
32
+ #line 33 "ext/fastcsv/fastcsv.c"
37
33
  static const int fastcsv_start = 4;
38
34
  static const int fastcsv_first_final = 4;
39
35
  static const int fastcsv_error = 0;
@@ -41,10 +37,39 @@ static const int fastcsv_error = 0;
41
37
  static const int fastcsv_en_main = 4;
42
38
 
43
39
 
44
- #line 142 "ext/fastcsv/fastcsv.rl"
40
+ #line 128 "ext/fastcsv/fastcsv.rl"
45
41
 
42
+ // 16 kB
46
43
  #define BUFSIZE 16384
47
44
 
45
+ // @see http://rxr.whitequark.org/mri/source/io.c#4845
46
+ static void
47
+ rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
48
+ {
49
+ int default_ext = 0;
50
+
51
+ if (ext == NULL) {
52
+ ext = rb_default_external_encoding();
53
+ default_ext = 1;
54
+ }
55
+ if (ext == rb_ascii8bit_encoding()) {
56
+ /* If external is ASCII-8BIT, no transcoding */
57
+ intern = NULL;
58
+ }
59
+ else if (intern == NULL) {
60
+ intern = rb_default_internal_encoding();
61
+ }
62
+ if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) {
63
+ /* No internal encoding => use external + no transcoding */
64
+ *enc = (default_ext && intern != ext) ? NULL : ext;
65
+ *enc2 = NULL;
66
+ }
67
+ else {
68
+ *enc = intern;
69
+ *enc2 = ext;
70
+ }
71
+ }
72
+
48
73
  VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
49
74
  int cs, act, have = 0, curline = 1, io = 0;
50
75
  char *ts = 0, *te = 0, *buf = 0, *eof = 0;
@@ -52,11 +77,11 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
52
77
  VALUE port, opts;
53
78
  VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
54
79
  int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
55
- int internal_index = 0, external_index = rb_enc_to_index(rb_default_external_encoding());
56
- rb_encoding *external_encoding = rb_default_external_encoding();
80
+ rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL;
81
+ VALUE r_encoding;
57
82
 
58
83
  VALUE option;
59
- char quote_char = '"'; //, *col_sep = ",", *row_sep = "\r\n";
84
+ char quote_char = '"';
60
85
 
61
86
  rb_scan_args(argc, argv, "11", &port, &opts);
62
87
  taint = OBJ_TAINTED(port);
@@ -78,76 +103,111 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
78
103
  rb_raise(rb_eArgError, "options has to be a Hash or nil");
79
104
  }
80
105
 
81
- // @note Add machines for common CSV dialects, or see if we can use "when"
82
- // from Chapter 6 to compare the character to the host program's variable.
83
- // option = rb_hash_aref(opts, ID2SYM(rb_intern("quote_char")));
84
- // if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) {
85
- // quote_char = *StringValueCStr(option);
86
- // }
87
- // else if (!NIL_P(option)) {
88
- // rb_raise(rb_eArgError, ":quote_char has to be a single character String");
89
- // }
90
-
91
- // option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep")));
92
- // if (TYPE(option) == T_STRING) {
93
- // col_sep = StringValueCStr(option);
94
- // }
95
- // else if (!NIL_P(option)) {
96
- // rb_raise(rb_eArgError, ":col_sep has to be a String");
97
- // }
98
-
99
- // option = rb_hash_aref(opts, ID2SYM(rb_intern("row_sep")));
100
- // if (TYPE(option) == T_STRING) {
101
- // row_sep = StringValueCStr(option);
102
- // }
103
- // else if (!NIL_P(option)) {
104
- // rb_raise(rb_eArgError, ":row_sep has to be a String");
105
- // }
106
+ // @see rb_io_extract_modeenc
107
+ /* Set to defaults */
108
+ rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0);
106
109
 
110
+ // "enc" (internal) or "enc2:enc" (external:internal) or "enc:-" (external).
111
+ // We don't support binmode, which would force "ASCII-8BIT", or "BOM|UTF-*".
112
+ // @see http://ruby-doc.org/core-2.1.1/IO.html#method-c-new-label-Open+Mode
107
113
  option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
108
114
  if (TYPE(option) == T_STRING) {
109
- // @see parse_mode_enc in Ruby's io.c
110
- const char *string = StringValueCStr(option), *pointer;
111
- char internal_encoding_name[ENCODING_MAXNAMELEN + 1];
115
+ // parse_mode_enc is not in header file.
116
+ const char *estr = StringValueCStr(option), *ptr;
117
+ char encname[ENCODING_MAXNAMELEN+1];
118
+ int idx, idx2;
119
+ rb_encoding *ext_enc, *int_enc;
120
+
121
+ /* parse estr as "enc" or "enc2:enc" or "enc:-" */
112
122
 
113
- pointer = strrchr(string, ':');
114
- if (pointer) {
115
- long len = (pointer++) - string;
123
+ ptr = strrchr(estr, ':');
124
+ if (ptr) {
125
+ long len = (ptr++) - estr;
116
126
  if (len == 0 || len > ENCODING_MAXNAMELEN) {
117
- internal_index = -1;
127
+ idx = -1;
118
128
  }
119
129
  else {
120
- memcpy(internal_encoding_name, string, len);
121
- internal_encoding_name[len] = '\0';
122
- string = internal_encoding_name;
123
- internal_index = rb_enc_find_index(internal_encoding_name);
130
+ memcpy(encname, estr, len);
131
+ encname[len] = '\0';
132
+ estr = encname;
133
+ idx = rb_enc_find_index(encname);
124
134
  }
125
135
  }
126
136
  else {
127
- internal_index = rb_enc_find_index(string);
137
+ idx = rb_enc_find_index(estr);
128
138
  }
129
139
 
130
- if (internal_index < 0 && internal_index != -2) {
131
- rb_warn("Unsupported encoding %s ignored", string);
140
+ if (idx >= 0) {
141
+ ext_enc = rb_enc_from_index(idx);
142
+ }
143
+ else {
144
+ if (idx != -2) {
145
+ // `unsupported_encoding` is not in header file.
146
+ rb_warn("Unsupported encoding %s ignored", estr);
147
+ }
148
+ ext_enc = NULL;
132
149
  }
133
150
 
134
- if (pointer) {
135
- external_index = rb_enc_find_index(pointer);
136
- if (external_index >= 0) {
137
- external_encoding = rb_enc_from_index(external_index);
151
+ int_enc = NULL;
152
+ if (ptr) {
153
+ if (*ptr == '-' && *(ptr+1) == '\0') {
154
+ /* Special case - "-" => no transcoding */
155
+ int_enc = (rb_encoding *)Qnil;
138
156
  }
139
157
  else {
140
- rb_warn("Unsupported encoding %s ignored", string);
158
+ idx2 = rb_enc_find_index(ptr);
159
+ if (idx2 < 0) {
160
+ // `unsupported_encoding` is not in header file.
161
+ rb_warn("Unsupported encoding %s ignored", ptr);
162
+ }
163
+ else if (idx2 == idx) {
164
+ int_enc = (rb_encoding *)Qnil;
165
+ }
166
+ else {
167
+ int_enc = rb_enc_from_index(idx2);
168
+ }
141
169
  }
142
170
  }
143
- else if (internal_index >= 0) {
144
- external_encoding = rb_enc_from_index(internal_index);
145
- }
171
+
172
+ rb_io_ext_int_to_encs(ext_enc, int_enc, &enc, &enc2, 0);
146
173
  }
147
174
  else if (!NIL_P(option)) {
148
175
  rb_raise(rb_eArgError, ":encoding has to be a String");
149
176
  }
150
177
 
178
+ // @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L1567
179
+ // @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L2300
180
+ if (rb_respond_to(port, s_internal_encoding)) {
181
+ r_encoding = rb_funcall(port, s_internal_encoding, 0);
182
+ if (NIL_P(r_encoding)) {
183
+ r_encoding = rb_funcall(port, s_external_encoding, 0);
184
+ }
185
+ }
186
+ else if (rb_respond_to(port, s_string)) {
187
+ r_encoding = rb_funcall(rb_funcall(port, s_string, 0), s_encoding, 0);
188
+ }
189
+ else if (rb_respond_to(port, s_encoding)) {
190
+ r_encoding = rb_funcall(port, s_encoding, 0);
191
+ }
192
+ else {
193
+ r_encoding = rb_enc_from_encoding(rb_ascii8bit_encoding());
194
+ }
195
+ if (NIL_P(r_encoding)) {
196
+ r_encoding = rb_enc_from_encoding(rb_default_internal_encoding());
197
+ }
198
+ if (NIL_P(r_encoding)) {
199
+ r_encoding = rb_enc_from_encoding(rb_default_external_encoding());
200
+ }
201
+ if (enc2 != NULL) {
202
+ encoding = enc2;
203
+ }
204
+ else if (enc != NULL) {
205
+ encoding = enc;
206
+ }
207
+ else if (!NIL_P(r_encoding)) {
208
+ encoding = rb_enc_get(r_encoding);
209
+ }
210
+
151
211
  buffer_size = BUFSIZE;
152
212
  if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
153
213
  bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
@@ -161,7 +221,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
161
221
  }
162
222
 
163
223
 
164
- #line 165 "ext/fastcsv/fastcsv.c"
224
+ #line 225 "ext/fastcsv/fastcsv.c"
165
225
  {
166
226
  cs = fastcsv_start;
167
227
  ts = 0;
@@ -169,7 +229,7 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
169
229
  act = 0;
170
230
  }
171
231
 
172
- #line 261 "ext/fastcsv/fastcsv.rl"
232
+ #line 311 "ext/fastcsv/fastcsv.rl"
173
233
 
174
234
  while (!done) {
175
235
  VALUE str;
@@ -217,12 +277,8 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
217
277
  }
218
278
 
219
279
  pe = p + len;
220
- // if (done) {
221
- // // This triggers the eof action in the non-scanner version.
222
- // eof = pe;
223
- // }
224
280
 
225
- #line 226 "ext/fastcsv/fastcsv.c"
281
+ #line 282 "ext/fastcsv/fastcsv.c"
226
282
  {
227
283
  if ( p == pe )
228
284
  goto _test_eof;
@@ -241,7 +297,7 @@ tr0:
241
297
  }
242
298
  goto st4;
243
299
  tr10:
244
- #line 105 "ext/fastcsv/fastcsv.rl"
300
+ #line 101 "ext/fastcsv/fastcsv.rl"
245
301
  {
246
302
  if (!NIL_P(field) || RARRAY_LEN(row)) {
247
303
  rb_ary_push(row, field);
@@ -250,19 +306,19 @@ tr10:
250
306
  rb_yield(row);
251
307
  }
252
308
  }
253
- #line 129 "ext/fastcsv/fastcsv.rl"
309
+ #line 123 "ext/fastcsv/fastcsv.rl"
254
310
  {te = p+1;}
255
311
  goto st4;
256
312
  tr16:
257
- #line 129 "ext/fastcsv/fastcsv.rl"
313
+ #line 123 "ext/fastcsv/fastcsv.rl"
258
314
  {te = p;p--;}
259
315
  goto st4;
260
316
  tr17:
261
- #line 128 "ext/fastcsv/fastcsv.rl"
317
+ #line 122 "ext/fastcsv/fastcsv.rl"
262
318
  {te = p;p--;}
263
319
  goto st4;
264
320
  tr18:
265
- #line 105 "ext/fastcsv/fastcsv.rl"
321
+ #line 101 "ext/fastcsv/fastcsv.rl"
266
322
  {
267
323
  if (!NIL_P(field) || RARRAY_LEN(row)) {
268
324
  rb_ary_push(row, field);
@@ -271,15 +327,15 @@ tr18:
271
327
  rb_yield(row);
272
328
  }
273
329
  }
274
- #line 128 "ext/fastcsv/fastcsv.rl"
330
+ #line 122 "ext/fastcsv/fastcsv.rl"
275
331
  {te = p+1;}
276
332
  goto st4;
277
333
  tr20:
278
- #line 127 "ext/fastcsv/fastcsv.rl"
334
+ #line 121 "ext/fastcsv/fastcsv.rl"
279
335
  {te = p;p--;}
280
336
  goto st4;
281
337
  tr21:
282
- #line 105 "ext/fastcsv/fastcsv.rl"
338
+ #line 101 "ext/fastcsv/fastcsv.rl"
283
339
  {
284
340
  if (!NIL_P(field) || RARRAY_LEN(row)) {
285
341
  rb_ary_push(row, field);
@@ -288,7 +344,7 @@ tr21:
288
344
  rb_yield(row);
289
345
  }
290
346
  }
291
- #line 127 "ext/fastcsv/fastcsv.rl"
347
+ #line 121 "ext/fastcsv/fastcsv.rl"
292
348
  {te = p+1;}
293
349
  goto st4;
294
350
  st4:
@@ -301,7 +357,7 @@ st4:
301
357
  case 4:
302
358
  #line 1 "NONE"
303
359
  {ts = p;}
304
- #line 305 "ext/fastcsv/fastcsv.c"
360
+ #line 361 "ext/fastcsv/fastcsv.c"
305
361
  switch( (*p) ) {
306
362
  case 0: goto tr14;
307
363
  case 10: goto tr3;
@@ -325,18 +381,18 @@ case 1:
325
381
  tr2:
326
382
  #line 1 "NONE"
327
383
  {te = p+1;}
328
- #line 44 "ext/fastcsv/fastcsv.rl"
384
+ #line 40 "ext/fastcsv/fastcsv.rl"
329
385
  {
330
386
  if (p == ts) {
331
387
  // Unquoted empty fields are nil, not "", in Ruby.
332
388
  field = Qnil;
333
389
  }
334
390
  else if (p > ts) {
335
- field = rb_str_new(ts, p - ts);
336
- ASSOCIATE_INDEX;
391
+ field = rb_enc_str_new(ts, p - ts, encoding);
392
+ ENCODE;
337
393
  }
338
394
  }
339
- #line 105 "ext/fastcsv/fastcsv.rl"
395
+ #line 101 "ext/fastcsv/fastcsv.rl"
340
396
  {
341
397
  if (!NIL_P(field) || RARRAY_LEN(row)) {
342
398
  rb_ary_push(row, field);
@@ -345,14 +401,14 @@ tr2:
345
401
  rb_yield(row);
346
402
  }
347
403
  }
348
- #line 129 "ext/fastcsv/fastcsv.rl"
404
+ #line 123 "ext/fastcsv/fastcsv.rl"
349
405
  {act = 3;}
350
406
  goto st5;
351
407
  st5:
352
408
  if ( ++p == pe )
353
409
  goto _test_eof5;
354
410
  case 5:
355
- #line 356 "ext/fastcsv/fastcsv.c"
411
+ #line 412 "ext/fastcsv/fastcsv.c"
356
412
  switch( (*p) ) {
357
413
  case 0: goto tr2;
358
414
  case 10: goto tr3;
@@ -362,18 +418,18 @@ case 5:
362
418
  }
363
419
  goto st1;
364
420
  tr3:
365
- #line 44 "ext/fastcsv/fastcsv.rl"
421
+ #line 40 "ext/fastcsv/fastcsv.rl"
366
422
  {
367
423
  if (p == ts) {
368
424
  // Unquoted empty fields are nil, not "", in Ruby.
369
425
  field = Qnil;
370
426
  }
371
427
  else if (p > ts) {
372
- field = rb_str_new(ts, p - ts);
373
- ASSOCIATE_INDEX;
428
+ field = rb_enc_str_new(ts, p - ts, encoding);
429
+ ENCODE;
374
430
  }
375
431
  }
376
- #line 95 "ext/fastcsv/fastcsv.rl"
432
+ #line 91 "ext/fastcsv/fastcsv.rl"
377
433
  {
378
434
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
379
435
  rb_ary_push(row, field);
@@ -383,19 +439,19 @@ tr3:
383
439
  rb_yield(row);
384
440
  row = rb_ary_new();
385
441
  }
386
- #line 32 "ext/fastcsv/fastcsv.rl"
442
+ #line 28 "ext/fastcsv/fastcsv.rl"
387
443
  {
388
444
  curline++;
389
445
  }
390
446
  goto st6;
391
447
  tr19:
392
- #line 32 "ext/fastcsv/fastcsv.rl"
448
+ #line 28 "ext/fastcsv/fastcsv.rl"
393
449
  {
394
450
  curline++;
395
451
  }
396
452
  goto st6;
397
453
  tr11:
398
- #line 95 "ext/fastcsv/fastcsv.rl"
454
+ #line 91 "ext/fastcsv/fastcsv.rl"
399
455
  {
400
456
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
401
457
  rb_ary_push(row, field);
@@ -405,7 +461,7 @@ tr11:
405
461
  rb_yield(row);
406
462
  row = rb_ary_new();
407
463
  }
408
- #line 32 "ext/fastcsv/fastcsv.rl"
464
+ #line 28 "ext/fastcsv/fastcsv.rl"
409
465
  {
410
466
  curline++;
411
467
  }
@@ -414,23 +470,23 @@ st6:
414
470
  if ( ++p == pe )
415
471
  goto _test_eof6;
416
472
  case 6:
417
- #line 418 "ext/fastcsv/fastcsv.c"
473
+ #line 474 "ext/fastcsv/fastcsv.c"
418
474
  if ( (*p) == 0 )
419
475
  goto tr18;
420
476
  goto tr17;
421
477
  tr4:
422
- #line 44 "ext/fastcsv/fastcsv.rl"
478
+ #line 40 "ext/fastcsv/fastcsv.rl"
423
479
  {
424
480
  if (p == ts) {
425
481
  // Unquoted empty fields are nil, not "", in Ruby.
426
482
  field = Qnil;
427
483
  }
428
484
  else if (p > ts) {
429
- field = rb_str_new(ts, p - ts);
430
- ASSOCIATE_INDEX;
485
+ field = rb_enc_str_new(ts, p - ts, encoding);
486
+ ENCODE;
431
487
  }
432
488
  }
433
- #line 95 "ext/fastcsv/fastcsv.rl"
489
+ #line 91 "ext/fastcsv/fastcsv.rl"
434
490
  {
435
491
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
436
492
  rb_ary_push(row, field);
@@ -440,13 +496,13 @@ tr4:
440
496
  rb_yield(row);
441
497
  row = rb_ary_new();
442
498
  }
443
- #line 32 "ext/fastcsv/fastcsv.rl"
499
+ #line 28 "ext/fastcsv/fastcsv.rl"
444
500
  {
445
501
  curline++;
446
502
  }
447
503
  goto st7;
448
504
  tr12:
449
- #line 95 "ext/fastcsv/fastcsv.rl"
505
+ #line 91 "ext/fastcsv/fastcsv.rl"
450
506
  {
451
507
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
452
508
  rb_ary_push(row, field);
@@ -456,7 +512,7 @@ tr12:
456
512
  rb_yield(row);
457
513
  row = rb_ary_new();
458
514
  }
459
- #line 32 "ext/fastcsv/fastcsv.rl"
515
+ #line 28 "ext/fastcsv/fastcsv.rl"
460
516
  {
461
517
  curline++;
462
518
  }
@@ -465,32 +521,32 @@ st7:
465
521
  if ( ++p == pe )
466
522
  goto _test_eof7;
467
523
  case 7:
468
- #line 469 "ext/fastcsv/fastcsv.c"
524
+ #line 525 "ext/fastcsv/fastcsv.c"
469
525
  switch( (*p) ) {
470
526
  case 0: goto tr18;
471
527
  case 10: goto tr19;
472
528
  }
473
529
  goto tr17;
474
530
  tr5:
475
- #line 44 "ext/fastcsv/fastcsv.rl"
531
+ #line 40 "ext/fastcsv/fastcsv.rl"
476
532
  {
477
533
  if (p == ts) {
478
534
  // Unquoted empty fields are nil, not "", in Ruby.
479
535
  field = Qnil;
480
536
  }
481
537
  else if (p > ts) {
482
- field = rb_str_new(ts, p - ts);
483
- ASSOCIATE_INDEX;
538
+ field = rb_enc_str_new(ts, p - ts, encoding);
539
+ ENCODE;
484
540
  }
485
541
  }
486
- #line 90 "ext/fastcsv/fastcsv.rl"
542
+ #line 86 "ext/fastcsv/fastcsv.rl"
487
543
  {
488
544
  rb_ary_push(row, field);
489
545
  field = Qnil;
490
546
  }
491
547
  goto st8;
492
548
  tr13:
493
- #line 90 "ext/fastcsv/fastcsv.rl"
549
+ #line 86 "ext/fastcsv/fastcsv.rl"
494
550
  {
495
551
  rb_ary_push(row, field);
496
552
  field = Qnil;
@@ -500,14 +556,14 @@ st8:
500
556
  if ( ++p == pe )
501
557
  goto _test_eof8;
502
558
  case 8:
503
- #line 504 "ext/fastcsv/fastcsv.c"
559
+ #line 560 "ext/fastcsv/fastcsv.c"
504
560
  if ( (*p) == 0 )
505
561
  goto tr21;
506
562
  goto tr20;
507
563
  tr14:
508
564
  #line 1 "NONE"
509
565
  {te = p+1;}
510
- #line 105 "ext/fastcsv/fastcsv.rl"
566
+ #line 101 "ext/fastcsv/fastcsv.rl"
511
567
  {
512
568
  if (!NIL_P(field) || RARRAY_LEN(row)) {
513
569
  rb_ary_push(row, field);
@@ -516,25 +572,25 @@ tr14:
516
572
  rb_yield(row);
517
573
  }
518
574
  }
519
- #line 44 "ext/fastcsv/fastcsv.rl"
575
+ #line 40 "ext/fastcsv/fastcsv.rl"
520
576
  {
521
577
  if (p == ts) {
522
578
  // Unquoted empty fields are nil, not "", in Ruby.
523
579
  field = Qnil;
524
580
  }
525
581
  else if (p > ts) {
526
- field = rb_str_new(ts, p - ts);
527
- ASSOCIATE_INDEX;
582
+ field = rb_enc_str_new(ts, p - ts, encoding);
583
+ ENCODE;
528
584
  }
529
585
  }
530
- #line 129 "ext/fastcsv/fastcsv.rl"
586
+ #line 123 "ext/fastcsv/fastcsv.rl"
531
587
  {act = 3;}
532
588
  goto st9;
533
589
  st9:
534
590
  if ( ++p == pe )
535
591
  goto _test_eof9;
536
592
  case 9:
537
- #line 538 "ext/fastcsv/fastcsv.c"
593
+ #line 594 "ext/fastcsv/fastcsv.c"
538
594
  switch( (*p) ) {
539
595
  case 10: goto tr16;
540
596
  case 13: goto tr16;
@@ -543,13 +599,13 @@ case 9:
543
599
  }
544
600
  goto st1;
545
601
  tr8:
546
- #line 32 "ext/fastcsv/fastcsv.rl"
602
+ #line 28 "ext/fastcsv/fastcsv.rl"
547
603
  {
548
604
  curline++;
549
605
  }
550
606
  goto st2;
551
607
  tr15:
552
- #line 36 "ext/fastcsv/fastcsv.rl"
608
+ #line 32 "ext/fastcsv/fastcsv.rl"
553
609
  {
554
610
  unclosed_line = curline;
555
611
  }
@@ -558,7 +614,7 @@ st2:
558
614
  if ( ++p == pe )
559
615
  goto _test_eof2;
560
616
  case 2:
561
- #line 562 "ext/fastcsv/fastcsv.c"
617
+ #line 618 "ext/fastcsv/fastcsv.c"
562
618
  switch( (*p) ) {
563
619
  case 0: goto st0;
564
620
  case 10: goto tr8;
@@ -570,11 +626,11 @@ st0:
570
626
  cs = 0;
571
627
  goto _out;
572
628
  tr9:
573
- #line 55 "ext/fastcsv/fastcsv.rl"
629
+ #line 51 "ext/fastcsv/fastcsv.rl"
574
630
  {
575
631
  if (p == ts) {
576
- field = rb_str_new2("");
577
- ASSOCIATE_INDEX;
632
+ field = rb_enc_str_new("", 0, encoding);
633
+ ENCODE;
578
634
  }
579
635
  // @note If we add an action on '""', we can skip some steps if no '""' is found.
580
636
  else if (p > ts) {
@@ -597,15 +653,15 @@ tr9:
597
653
  reader++;
598
654
  }
599
655
 
600
- field = rb_str_new(copy, writer - copy);
601
- ASSOCIATE_INDEX;
656
+ field = rb_enc_str_new(copy, writer - copy, enc);
657
+ ENCODE;
602
658
 
603
659
  if (copy != NULL) {
604
660
  free(copy);
605
661
  }
606
662
  }
607
663
  }
608
- #line 40 "ext/fastcsv/fastcsv.rl"
664
+ #line 36 "ext/fastcsv/fastcsv.rl"
609
665
  {
610
666
  unclosed_line = 0;
611
667
  }
@@ -614,7 +670,7 @@ st3:
614
670
  if ( ++p == pe )
615
671
  goto _test_eof3;
616
672
  case 3:
617
- #line 618 "ext/fastcsv/fastcsv.c"
673
+ #line 674 "ext/fastcsv/fastcsv.c"
618
674
  switch( (*p) ) {
619
675
  case 0: goto tr10;
620
676
  case 10: goto tr11;
@@ -650,7 +706,7 @@ case 3:
650
706
  _out: {}
651
707
  }
652
708
 
653
- #line 313 "ext/fastcsv/fastcsv.rl"
709
+ #line 359 "ext/fastcsv/fastcsv.rl"
654
710
 
655
711
  if (done && cs < fastcsv_first_final) {
656
712
  if (buf != NULL) {
@@ -689,6 +745,10 @@ case 3:
689
745
  void Init_fastcsv() {
690
746
  s_read = rb_intern("read");
691
747
  s_to_str = rb_intern("to_str");
748
+ s_internal_encoding = rb_intern("internal_encoding");
749
+ s_external_encoding = rb_intern("external_encoding");
750
+ s_string = rb_intern("string");
751
+ s_encoding = rb_intern("encoding");
692
752
 
693
753
  mModule = rb_define_module("FastCSV");
694
754
  rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
@@ -14,17 +14,13 @@
14
14
  // Ragel help.
15
15
  // https://www.mail-archive.com/ragel-users@complang.org/
16
16
 
17
- # define ASSOCIATE_INDEX \
18
- if (internal_index >= 0) { \
19
- rb_enc_associate_index(field, internal_index); \
20
- field = rb_str_encode(field, rb_enc_from_encoding(external_encoding), 0, Qnil); \
21
- } \
22
- else { \
23
- rb_enc_associate_index(field, rb_enc_to_index(external_encoding)); \
24
- }
17
+ #define ENCODE \
18
+ if (enc2 != NULL) { \
19
+ field = rb_str_encode(field, rb_enc_from_encoding(enc), 0, Qnil); \
20
+ }
25
21
 
26
22
  static VALUE mModule, rb_eParseError;
27
- static ID s_read, s_to_str;
23
+ static ID s_read, s_to_str, s_internal_encoding, s_external_encoding, s_string, s_encoding;
28
24
 
29
25
  %%{
30
26
  machine fastcsv;
@@ -47,15 +43,15 @@ static ID s_read, s_to_str;
47
43
  field = Qnil;
48
44
  }
49
45
  else if (p > ts) {
50
- field = rb_str_new(ts, p - ts);
51
- ASSOCIATE_INDEX;
46
+ field = rb_enc_str_new(ts, p - ts, encoding);
47
+ ENCODE;
52
48
  }
53
49
  }
54
50
 
55
51
  action read_quoted {
56
52
  if (p == ts) {
57
- field = rb_str_new2("");
58
- ASSOCIATE_INDEX;
53
+ field = rb_enc_str_new("", 0, encoding);
54
+ ENCODE;
59
55
  }
60
56
  // @note If we add an action on '""', we can skip some steps if no '""' is found.
61
57
  else if (p > ts) {
@@ -78,8 +74,8 @@ static ID s_read, s_to_str;
78
74
  reader++;
79
75
  }
80
76
 
81
- field = rb_str_new(copy, writer - copy);
82
- ASSOCIATE_INDEX;
77
+ field = rb_enc_str_new(copy, writer - copy, enc);
78
+ ENCODE;
83
79
 
84
80
  if (copy != NULL) {
85
81
  free(copy);
@@ -118,30 +114,49 @@ static ID s_read, s_to_str;
118
114
  unquoted = (any* -- quote_char -- col_sep -- row_sep - EOF) %read_unquoted;
119
115
  quoted = quote_char >open_quote (any - quote_char - EOF | quote_char quote_char | row_sep)* %read_quoted quote_char >close_quote;
120
116
  field = unquoted | quoted;
121
- # fields = (field col_sep)* field?;
122
- # file = (fields row_sep >new_row)* fields?;
123
117
 
124
118
  # @see Ragel Guide: 6.3 Scanners
125
- # Remember that an unquoted field can be zero-length.
119
+ # An unquoted field can be zero-length.
126
120
  main := |*
127
121
  field col_sep EOF?;
128
122
  field row_sep >new_row EOF?;
129
123
  field EOF;
130
124
  *|;
131
-
132
- # Non-scanner version requires very large buffer.
133
- # main := file $/{
134
- # if (!NIL_P(field) || RARRAY_LEN(row)) {
135
- # rb_ary_push(row, field);
136
- # rb_yield(row);
137
- # }
138
- # };
139
125
  }%%
140
126
 
141
127
  %% write data;
142
128
 
129
+ // 16 kB
143
130
  #define BUFSIZE 16384
144
131
 
132
+ // @see http://rxr.whitequark.org/mri/source/io.c#4845
133
+ static void
134
+ rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
135
+ {
136
+ int default_ext = 0;
137
+
138
+ if (ext == NULL) {
139
+ ext = rb_default_external_encoding();
140
+ default_ext = 1;
141
+ }
142
+ if (ext == rb_ascii8bit_encoding()) {
143
+ /* If external is ASCII-8BIT, no transcoding */
144
+ intern = NULL;
145
+ }
146
+ else if (intern == NULL) {
147
+ intern = rb_default_internal_encoding();
148
+ }
149
+ if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) {
150
+ /* No internal encoding => use external + no transcoding */
151
+ *enc = (default_ext && intern != ext) ? NULL : ext;
152
+ *enc2 = NULL;
153
+ }
154
+ else {
155
+ *enc = intern;
156
+ *enc2 = ext;
157
+ }
158
+ }
159
+
145
160
  VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
146
161
  int cs, act, have = 0, curline = 1, io = 0;
147
162
  char *ts = 0, *te = 0, *buf = 0, *eof = 0;
@@ -149,11 +164,11 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
149
164
  VALUE port, opts;
150
165
  VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
151
166
  int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
152
- int internal_index = 0, external_index = rb_enc_to_index(rb_default_external_encoding());
153
- rb_encoding *external_encoding = rb_default_external_encoding();
167
+ rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL;
168
+ VALUE r_encoding;
154
169
 
155
170
  VALUE option;
156
- char quote_char = '"'; //, *col_sep = ",", *row_sep = "\r\n";
171
+ char quote_char = '"';
157
172
 
158
173
  rb_scan_args(argc, argv, "11", &port, &opts);
159
174
  taint = OBJ_TAINTED(port);
@@ -175,76 +190,111 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
175
190
  rb_raise(rb_eArgError, "options has to be a Hash or nil");
176
191
  }
177
192
 
178
- // @note Add machines for common CSV dialects, or see if we can use "when"
179
- // from Chapter 6 to compare the character to the host program's variable.
180
- // option = rb_hash_aref(opts, ID2SYM(rb_intern("quote_char")));
181
- // if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) {
182
- // quote_char = *StringValueCStr(option);
183
- // }
184
- // else if (!NIL_P(option)) {
185
- // rb_raise(rb_eArgError, ":quote_char has to be a single character String");
186
- // }
187
-
188
- // option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep")));
189
- // if (TYPE(option) == T_STRING) {
190
- // col_sep = StringValueCStr(option);
191
- // }
192
- // else if (!NIL_P(option)) {
193
- // rb_raise(rb_eArgError, ":col_sep has to be a String");
194
- // }
195
-
196
- // option = rb_hash_aref(opts, ID2SYM(rb_intern("row_sep")));
197
- // if (TYPE(option) == T_STRING) {
198
- // row_sep = StringValueCStr(option);
199
- // }
200
- // else if (!NIL_P(option)) {
201
- // rb_raise(rb_eArgError, ":row_sep has to be a String");
202
- // }
193
+ // @see rb_io_extract_modeenc
194
+ /* Set to defaults */
195
+ rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0);
203
196
 
197
+ // "enc" (internal) or "enc2:enc" (external:internal) or "enc:-" (external).
198
+ // We don't support binmode, which would force "ASCII-8BIT", or "BOM|UTF-*".
199
+ // @see http://ruby-doc.org/core-2.1.1/IO.html#method-c-new-label-Open+Mode
204
200
  option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
205
201
  if (TYPE(option) == T_STRING) {
206
- // @see parse_mode_enc in Ruby's io.c
207
- const char *string = StringValueCStr(option), *pointer;
208
- char internal_encoding_name[ENCODING_MAXNAMELEN + 1];
202
+ // parse_mode_enc is not in header file.
203
+ const char *estr = StringValueCStr(option), *ptr;
204
+ char encname[ENCODING_MAXNAMELEN+1];
205
+ int idx, idx2;
206
+ rb_encoding *ext_enc, *int_enc;
207
+
208
+ /* parse estr as "enc" or "enc2:enc" or "enc:-" */
209
209
 
210
- pointer = strrchr(string, ':');
211
- if (pointer) {
212
- long len = (pointer++) - string;
210
+ ptr = strrchr(estr, ':');
211
+ if (ptr) {
212
+ long len = (ptr++) - estr;
213
213
  if (len == 0 || len > ENCODING_MAXNAMELEN) {
214
- internal_index = -1;
214
+ idx = -1;
215
215
  }
216
216
  else {
217
- memcpy(internal_encoding_name, string, len);
218
- internal_encoding_name[len] = '\0';
219
- string = internal_encoding_name;
220
- internal_index = rb_enc_find_index(internal_encoding_name);
217
+ memcpy(encname, estr, len);
218
+ encname[len] = '\0';
219
+ estr = encname;
220
+ idx = rb_enc_find_index(encname);
221
221
  }
222
222
  }
223
223
  else {
224
- internal_index = rb_enc_find_index(string);
224
+ idx = rb_enc_find_index(estr);
225
225
  }
226
226
 
227
- if (internal_index < 0 && internal_index != -2) {
228
- rb_warn("Unsupported encoding %s ignored", string);
227
+ if (idx >= 0) {
228
+ ext_enc = rb_enc_from_index(idx);
229
+ }
230
+ else {
231
+ if (idx != -2) {
232
+ // `unsupported_encoding` is not in header file.
233
+ rb_warn("Unsupported encoding %s ignored", estr);
234
+ }
235
+ ext_enc = NULL;
229
236
  }
230
237
 
231
- if (pointer) {
232
- external_index = rb_enc_find_index(pointer);
233
- if (external_index >= 0) {
234
- external_encoding = rb_enc_from_index(external_index);
238
+ int_enc = NULL;
239
+ if (ptr) {
240
+ if (*ptr == '-' && *(ptr+1) == '\0') {
241
+ /* Special case - "-" => no transcoding */
242
+ int_enc = (rb_encoding *)Qnil;
235
243
  }
236
244
  else {
237
- rb_warn("Unsupported encoding %s ignored", string);
245
+ idx2 = rb_enc_find_index(ptr);
246
+ if (idx2 < 0) {
247
+ // `unsupported_encoding` is not in header file.
248
+ rb_warn("Unsupported encoding %s ignored", ptr);
249
+ }
250
+ else if (idx2 == idx) {
251
+ int_enc = (rb_encoding *)Qnil;
252
+ }
253
+ else {
254
+ int_enc = rb_enc_from_index(idx2);
255
+ }
238
256
  }
239
257
  }
240
- else if (internal_index >= 0) {
241
- external_encoding = rb_enc_from_index(internal_index);
242
- }
258
+
259
+ rb_io_ext_int_to_encs(ext_enc, int_enc, &enc, &enc2, 0);
243
260
  }
244
261
  else if (!NIL_P(option)) {
245
262
  rb_raise(rb_eArgError, ":encoding has to be a String");
246
263
  }
247
264
 
265
+ // @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L1567
266
+ // @see https://github.com/ruby/ruby/blob/70510d026f8d86693dccaba07417488eed09b41d/lib/csv.rb#L2300
267
+ if (rb_respond_to(port, s_internal_encoding)) {
268
+ r_encoding = rb_funcall(port, s_internal_encoding, 0);
269
+ if (NIL_P(r_encoding)) {
270
+ r_encoding = rb_funcall(port, s_external_encoding, 0);
271
+ }
272
+ }
273
+ else if (rb_respond_to(port, s_string)) {
274
+ r_encoding = rb_funcall(rb_funcall(port, s_string, 0), s_encoding, 0);
275
+ }
276
+ else if (rb_respond_to(port, s_encoding)) {
277
+ r_encoding = rb_funcall(port, s_encoding, 0);
278
+ }
279
+ else {
280
+ r_encoding = rb_enc_from_encoding(rb_ascii8bit_encoding());
281
+ }
282
+ if (NIL_P(r_encoding)) {
283
+ r_encoding = rb_enc_from_encoding(rb_default_internal_encoding());
284
+ }
285
+ if (NIL_P(r_encoding)) {
286
+ r_encoding = rb_enc_from_encoding(rb_default_external_encoding());
287
+ }
288
+ if (enc2 != NULL) {
289
+ encoding = enc2;
290
+ }
291
+ else if (enc != NULL) {
292
+ encoding = enc;
293
+ }
294
+ else if (!NIL_P(r_encoding)) {
295
+ encoding = rb_enc_get(r_encoding);
296
+ }
297
+
248
298
  buffer_size = BUFSIZE;
249
299
  if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
250
300
  bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
@@ -305,10 +355,6 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
305
355
  }
306
356
 
307
357
  pe = p + len;
308
- // if (done) {
309
- // // This triggers the eof action in the non-scanner version.
310
- // eof = pe;
311
- // }
312
358
  %% write exec;
313
359
 
314
360
  if (done && cs < fastcsv_first_final) {
@@ -348,6 +394,10 @@ VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
348
394
  void Init_fastcsv() {
349
395
  s_read = rb_intern("read");
350
396
  s_to_str = rb_intern("to_str");
397
+ s_internal_encoding = rb_intern("internal_encoding");
398
+ s_external_encoding = rb_intern("external_encoding");
399
+ s_string = rb_intern("string");
400
+ s_encoding = rb_intern("encoding");
351
401
 
352
402
  mModule = rb_define_module("FastCSV");
353
403
  rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "fastcsv"
5
- s.version = '0.0.1'
5
+ s.version = '0.0.2'
6
6
  s.platform = Gem::Platform::RUBY
7
7
  s.authors = ["Open North"]
8
8
  s.email = ["info@opennorth.ca"]
@@ -57,6 +57,9 @@ RSpec.shared_examples 'a CSV parser' do
57
57
  %(foo,"bar\nbaz",bzz),
58
58
  %(foo,"""bar""baz""bzz""",zzz),
59
59
 
60
+ # Single quotes.
61
+ %('foo','bar','baz'),
62
+
60
63
  # Buffers.
61
64
  "01234567890" * 2_000, # 20,000 > BUFSIZE
62
65
  "0123456789," * 2_000,
@@ -68,7 +71,7 @@ RSpec.shared_examples 'a CSV parser' do
68
71
  # Uneven data types.
69
72
  "2000-01-01,2,x\nx,2000-01-01,2",
70
73
  ].each do |csv|
71
- it "should parse: #{csv}" do
74
+ it "should parse: #{csv.inspect.gsub('\"', '"')}" do
72
75
  expect(parse(csv)).to eq(CSV.parse(csv))
73
76
  end
74
77
  end
@@ -112,34 +115,45 @@ RSpec.shared_examples 'a CSV parser' do
112
115
  end
113
116
  end
114
117
 
118
+ it "should parse an encoded string" do
119
+ csv = "ß"
120
+ actual = parse(csv)
121
+ expected = CSV.parse(csv)
122
+ expect(actual[0][0].encoding).to eq(expected[0][0].encoding)
123
+ expect(actual).to eq(expected)
124
+ end
125
+
115
126
  it 'should raise an error on mixed row separators are' do
116
- csv = "foo\rbar\nbaz\r\n"
117
- expect{CSV.parse(csv)}.to raise_error(CSV::MalformedCSVError, 'Unquoted fields do not allow \r or \n (line 2).')
127
+ expect{CSV.parse("foo\rbar\nbaz\r\n")}.to raise_error(CSV::MalformedCSVError, 'Unquoted fields do not allow \r or \n (line 2).')
118
128
  skip
119
129
  end
120
130
 
121
- it 'should raise an error if no block is given' do
122
- expect{parse_without_block('x')}.to raise_error(LocalJumpError, 'no block given')
123
- end
131
+ context 'when initializing' do
132
+ it 'should raise an error if no block is given' do
133
+ expect{parse_without_block('x')}.to raise_error(LocalJumpError, 'no block given')
134
+ end
124
135
 
125
- it 'should not raise an error if no block and empty input' do
126
- expect{parse_without_block('')}.to_not raise_error
127
- end
136
+ it 'should not raise an error if no block and empty input' do
137
+ expect{parse_without_block('')}.to_not raise_error
138
+ end
128
139
 
129
- it 'should raise an error if the options are not a Hash or nil' do
130
- expect{parse('', '')}.to raise_error(ArgumentError, 'options has to be a Hash or nil')
140
+ it 'should raise an error if the options are not a Hash or nil' do
141
+ expect{parse('', '')}.to raise_error(ArgumentError, 'options has to be a Hash or nil')
142
+ end
131
143
  end
132
144
 
133
- it 'should allow nil buffer size' do
134
- FastCSV.buffer_size = nil
135
- expect(parse(simple)).to eq(CSV.parse(simple))
136
- FastCSV.buffer_size = nil
137
- end
145
+ context 'when setting a buffer size' do
146
+ it 'should allow nil' do
147
+ FastCSV.buffer_size = nil
148
+ expect(parse(simple)).to eq(CSV.parse(simple))
149
+ FastCSV.buffer_size = nil
150
+ end
138
151
 
139
- it 'should recover from a zero buffer size' do
140
- FastCSV.buffer_size = 0
141
- expect(parse(simple)).to eq(CSV.parse(simple))
142
- FastCSV.buffer_size = nil
152
+ it 'should allow zero' do
153
+ FastCSV.buffer_size = 0
154
+ expect(parse(simple)).to eq(CSV.parse(simple))
155
+ FastCSV.buffer_size = nil
156
+ end
143
157
  end
144
158
  end
145
159
 
@@ -184,35 +198,47 @@ RSpec.describe FastCSV do
184
198
  end
185
199
  end
186
200
 
187
- def parse_with_encoding(basename, encoding)
188
- filename = File.expand_path(File.join('..', 'fixtures', basename), __FILE__)
189
- options = {encoding: encoding}
190
- File.open(filename) do |io|
191
- rows = []
192
- FastCSV.raw_parse(io, options){|row| rows << row}
193
- expected = CSV.read(filename, options)
194
- expect(rows).to eq(expected)
195
- expect(rows[0][0].encoding).to eq(expected[0][0].encoding)
201
+ context 'with encoded strings' do
202
+ def parse_with_encoding(basename, encoding)
203
+ filename = File.expand_path(File.join('..', 'fixtures', basename), __FILE__)
204
+ options = {encoding: encoding}
205
+ File.open(filename) do |io|
206
+ rows = []
207
+ FastCSV.raw_parse(io, options){|row| rows << row}
208
+ expected = CSV.read(filename, options)
209
+ expect(rows[0][0].encoding).to eq(expected[0][0].encoding)
210
+ expect(rows).to eq(expected)
211
+ end
196
212
  end
197
- end
198
213
 
199
- it 'should encode the input' do
200
- parse_with_encoding('iso-8859-1.csv', 'iso-8859-1')
201
- end
214
+ it 'should encode' do
215
+ parse_with_encoding('iso-8859-1.csv', 'iso-8859-1')
216
+ end
202
217
 
203
- it 'should encode the input with a blank internal encoding' do
204
- parse_with_encoding('utf-8.csv', ':utf-8')
205
- end
218
+ it 'should transcode' do
219
+ parse_with_encoding('iso-8859-1.csv', 'iso-8859-1:utf-8')
220
+ end
206
221
 
207
- it 'should transcode the input' do
208
- parse_with_encoding('iso-8859-1.csv', 'iso-8859-1:utf-8')
209
- end
222
+ it 'should recover from blank external encoding' do
223
+ parse_with_encoding('utf-8.csv', ':utf-8')
224
+ end
225
+
226
+ it 'should recover from invalid internal encoding' do
227
+ parse_with_encoding('utf-8.csv', 'invalid')
228
+ end
210
229
 
211
- it 'should invalid encoding' do
212
- parse_with_encoding('utf-8.csv', 'invalid')
230
+ it 'should recover from invalid external encoding' do
231
+ parse_with_encoding('utf-8.csv', 'invalid:-')
232
+ end
233
+
234
+ it 'should recover from invalid encodings' do
235
+ parse_with_encoding('utf-8.csv', 'invalid:invalid')
236
+ end
213
237
  end
214
238
 
215
- it 'should raise an error if the input is not a String or IO' do
216
- expect{FastCSV.raw_parse(nil)}.to raise_error(ArgumentError, 'data has to respond to #read or #to_str')
239
+ context 'when initializing' do
240
+ it 'should raise an error if the input is not a String or IO' do
241
+ expect{FastCSV.raw_parse(nil)}.to raise_error(ArgumentError, 'data has to respond to #read or #to_str')
242
+ end
217
243
  end
218
244
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fastcsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Open North