fastcsv 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE +20 -0
- data/README.md +56 -0
- data/Rakefile +21 -0
- data/USAGE +1 -0
- data/ext/fastcsv/extconf.rb +3 -0
- data/ext/fastcsv/fastcsv.c +697 -0
- data/ext/fastcsv/fastcsv.rl +356 -0
- data/fastcsv.gemspec +24 -0
- data/lib/fastcsv.rb +1 -0
- data/spec/fastcsv_spec.rb +218 -0
- data/spec/fixtures/iso-8859-1.csv +1 -0
- data/spec/fixtures/utf-8.csv +1 -0
- data/spec/spec_helper.rb +14 -0
- metadata +136 -0
@@ -0,0 +1,356 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <ruby/encoding.h>
|
3
|
+
// CSV specifications.
|
4
|
+
// http://tools.ietf.org/html/rfc4180
|
5
|
+
// http://w3c.github.io/csvw/syntax/#ebnf
|
6
|
+
|
7
|
+
// CSV implementation.
|
8
|
+
// https://github.com/ruby/ruby/blob/master/lib/csv.rb
|
9
|
+
|
10
|
+
// Ruby C extensions help.
|
11
|
+
// https://github.com/ruby/ruby/blob/trunk/README.EXT
|
12
|
+
// http://rxr.whitequark.org/mri/source
|
13
|
+
|
14
|
+
// Ragel help.
|
15
|
+
// https://www.mail-archive.com/ragel-users@complang.org/
|
16
|
+
|
17
|
+
# define ASSOCIATE_INDEX \
|
18
|
+
if (internal_index >= 0) { \
|
19
|
+
rb_enc_associate_index(field, internal_index); \
|
20
|
+
field = rb_str_encode(field, rb_enc_from_encoding(external_encoding), 0, Qnil); \
|
21
|
+
} \
|
22
|
+
else { \
|
23
|
+
rb_enc_associate_index(field, rb_enc_to_index(external_encoding)); \
|
24
|
+
}
|
25
|
+
|
26
|
+
static VALUE mModule, rb_eParseError;
|
27
|
+
static ID s_read, s_to_str;
|
28
|
+
|
29
|
+
%%{
|
30
|
+
machine fastcsv;
|
31
|
+
|
32
|
+
action new_line {
|
33
|
+
curline++;
|
34
|
+
}
|
35
|
+
|
36
|
+
action open_quote {
|
37
|
+
unclosed_line = curline;
|
38
|
+
}
|
39
|
+
|
40
|
+
action close_quote {
|
41
|
+
unclosed_line = 0;
|
42
|
+
}
|
43
|
+
|
44
|
+
action read_unquoted {
|
45
|
+
if (p == ts) {
|
46
|
+
// Unquoted empty fields are nil, not "", in Ruby.
|
47
|
+
field = Qnil;
|
48
|
+
}
|
49
|
+
else if (p > ts) {
|
50
|
+
field = rb_str_new(ts, p - ts);
|
51
|
+
ASSOCIATE_INDEX;
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
action read_quoted {
|
56
|
+
if (p == ts) {
|
57
|
+
field = rb_str_new2("");
|
58
|
+
ASSOCIATE_INDEX;
|
59
|
+
}
|
60
|
+
// @note If we add an action on '""', we can skip some steps if no '""' is found.
|
61
|
+
else if (p > ts) {
|
62
|
+
// Operating on ts in-place produces odd behavior, FYI.
|
63
|
+
char *copy = ALLOC_N(char, p - ts);
|
64
|
+
memcpy(copy, ts, p - ts);
|
65
|
+
|
66
|
+
char *reader = ts, *writer = copy;
|
67
|
+
int escaped = 0;
|
68
|
+
|
69
|
+
while (p > reader) {
|
70
|
+
if (*reader == quote_char && !escaped) {
|
71
|
+
// Skip the escaping character.
|
72
|
+
escaped = 1;
|
73
|
+
}
|
74
|
+
else {
|
75
|
+
escaped = 0;
|
76
|
+
*writer++ = *reader;
|
77
|
+
}
|
78
|
+
reader++;
|
79
|
+
}
|
80
|
+
|
81
|
+
field = rb_str_new(copy, writer - copy);
|
82
|
+
ASSOCIATE_INDEX;
|
83
|
+
|
84
|
+
if (copy != NULL) {
|
85
|
+
free(copy);
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
action new_field {
|
91
|
+
rb_ary_push(row, field);
|
92
|
+
field = Qnil;
|
93
|
+
}
|
94
|
+
|
95
|
+
action new_row {
|
96
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
97
|
+
rb_ary_push(row, field);
|
98
|
+
field = Qnil;
|
99
|
+
}
|
100
|
+
|
101
|
+
rb_yield(row);
|
102
|
+
row = rb_ary_new();
|
103
|
+
}
|
104
|
+
|
105
|
+
action last_row {
|
106
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
107
|
+
rb_ary_push(row, field);
|
108
|
+
}
|
109
|
+
if (RARRAY_LEN(row)) {
|
110
|
+
rb_yield(row);
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
EOF = 0 >last_row;
|
115
|
+
quote_char = '"';
|
116
|
+
col_sep = ',' >new_field;
|
117
|
+
row_sep = ('\r' '\n'? | '\n') @new_line;
|
118
|
+
unquoted = (any* -- quote_char -- col_sep -- row_sep - EOF) %read_unquoted;
|
119
|
+
quoted = quote_char >open_quote (any - quote_char - EOF | quote_char quote_char | row_sep)* %read_quoted quote_char >close_quote;
|
120
|
+
field = unquoted | quoted;
|
121
|
+
# fields = (field col_sep)* field?;
|
122
|
+
# file = (fields row_sep >new_row)* fields?;
|
123
|
+
|
124
|
+
# @see Ragel Guide: 6.3 Scanners
|
125
|
+
# Remember that an unquoted field can be zero-length.
|
126
|
+
main := |*
|
127
|
+
field col_sep EOF?;
|
128
|
+
field row_sep >new_row EOF?;
|
129
|
+
field EOF;
|
130
|
+
*|;
|
131
|
+
|
132
|
+
# Non-scanner version requires very large buffer.
|
133
|
+
# main := file $/{
|
134
|
+
# if (!NIL_P(field) || RARRAY_LEN(row)) {
|
135
|
+
# rb_ary_push(row, field);
|
136
|
+
# rb_yield(row);
|
137
|
+
# }
|
138
|
+
# };
|
139
|
+
}%%
|
140
|
+
|
141
|
+
%% write data;
|
142
|
+
|
143
|
+
#define BUFSIZE 16384
|
144
|
+
|
145
|
+
VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
146
|
+
int cs, act, have = 0, curline = 1, io = 0;
|
147
|
+
char *ts = 0, *te = 0, *buf = 0, *eof = 0;
|
148
|
+
|
149
|
+
VALUE port, opts;
|
150
|
+
VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
|
151
|
+
int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
|
152
|
+
int internal_index = 0, external_index = rb_enc_to_index(rb_default_external_encoding());
|
153
|
+
rb_encoding *external_encoding = rb_default_external_encoding();
|
154
|
+
|
155
|
+
VALUE option;
|
156
|
+
char quote_char = '"'; //, *col_sep = ",", *row_sep = "\r\n";
|
157
|
+
|
158
|
+
rb_scan_args(argc, argv, "11", &port, &opts);
|
159
|
+
taint = OBJ_TAINTED(port);
|
160
|
+
io = rb_respond_to(port, s_read);
|
161
|
+
if (!io) {
|
162
|
+
if (rb_respond_to(port, s_to_str)) {
|
163
|
+
port = rb_funcall(port, s_to_str, 0);
|
164
|
+
StringValue(port);
|
165
|
+
}
|
166
|
+
else {
|
167
|
+
rb_raise(rb_eArgError, "data has to respond to #read or #to_str");
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
171
|
+
if (NIL_P(opts)) {
|
172
|
+
opts = rb_hash_new();
|
173
|
+
}
|
174
|
+
else if (TYPE(opts) != T_HASH) {
|
175
|
+
rb_raise(rb_eArgError, "options has to be a Hash or nil");
|
176
|
+
}
|
177
|
+
|
178
|
+
// @note Add machines for common CSV dialects, or see if we can use "when"
|
179
|
+
// from Chapter 6 to compare the character to the host program's variable.
|
180
|
+
// option = rb_hash_aref(opts, ID2SYM(rb_intern("quote_char")));
|
181
|
+
// if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) {
|
182
|
+
// quote_char = *StringValueCStr(option);
|
183
|
+
// }
|
184
|
+
// else if (!NIL_P(option)) {
|
185
|
+
// rb_raise(rb_eArgError, ":quote_char has to be a single character String");
|
186
|
+
// }
|
187
|
+
|
188
|
+
// option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep")));
|
189
|
+
// if (TYPE(option) == T_STRING) {
|
190
|
+
// col_sep = StringValueCStr(option);
|
191
|
+
// }
|
192
|
+
// else if (!NIL_P(option)) {
|
193
|
+
// rb_raise(rb_eArgError, ":col_sep has to be a String");
|
194
|
+
// }
|
195
|
+
|
196
|
+
// option = rb_hash_aref(opts, ID2SYM(rb_intern("row_sep")));
|
197
|
+
// if (TYPE(option) == T_STRING) {
|
198
|
+
// row_sep = StringValueCStr(option);
|
199
|
+
// }
|
200
|
+
// else if (!NIL_P(option)) {
|
201
|
+
// rb_raise(rb_eArgError, ":row_sep has to be a String");
|
202
|
+
// }
|
203
|
+
|
204
|
+
option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
|
205
|
+
if (TYPE(option) == T_STRING) {
|
206
|
+
// @see parse_mode_enc in Ruby's io.c
|
207
|
+
const char *string = StringValueCStr(option), *pointer;
|
208
|
+
char internal_encoding_name[ENCODING_MAXNAMELEN + 1];
|
209
|
+
|
210
|
+
pointer = strrchr(string, ':');
|
211
|
+
if (pointer) {
|
212
|
+
long len = (pointer++) - string;
|
213
|
+
if (len == 0 || len > ENCODING_MAXNAMELEN) {
|
214
|
+
internal_index = -1;
|
215
|
+
}
|
216
|
+
else {
|
217
|
+
memcpy(internal_encoding_name, string, len);
|
218
|
+
internal_encoding_name[len] = '\0';
|
219
|
+
string = internal_encoding_name;
|
220
|
+
internal_index = rb_enc_find_index(internal_encoding_name);
|
221
|
+
}
|
222
|
+
}
|
223
|
+
else {
|
224
|
+
internal_index = rb_enc_find_index(string);
|
225
|
+
}
|
226
|
+
|
227
|
+
if (internal_index < 0 && internal_index != -2) {
|
228
|
+
rb_warn("Unsupported encoding %s ignored", string);
|
229
|
+
}
|
230
|
+
|
231
|
+
if (pointer) {
|
232
|
+
external_index = rb_enc_find_index(pointer);
|
233
|
+
if (external_index >= 0) {
|
234
|
+
external_encoding = rb_enc_from_index(external_index);
|
235
|
+
}
|
236
|
+
else {
|
237
|
+
rb_warn("Unsupported encoding %s ignored", string);
|
238
|
+
}
|
239
|
+
}
|
240
|
+
else if (internal_index >= 0) {
|
241
|
+
external_encoding = rb_enc_from_index(internal_index);
|
242
|
+
}
|
243
|
+
}
|
244
|
+
else if (!NIL_P(option)) {
|
245
|
+
rb_raise(rb_eArgError, ":encoding has to be a String");
|
246
|
+
}
|
247
|
+
|
248
|
+
buffer_size = BUFSIZE;
|
249
|
+
if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
|
250
|
+
bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
|
251
|
+
if (!NIL_P(bufsize)) {
|
252
|
+
buffer_size = NUM2INT(bufsize);
|
253
|
+
}
|
254
|
+
}
|
255
|
+
|
256
|
+
if (io) {
|
257
|
+
buf = ALLOC_N(char, buffer_size);
|
258
|
+
}
|
259
|
+
|
260
|
+
%% write init;
|
261
|
+
|
262
|
+
while (!done) {
|
263
|
+
VALUE str;
|
264
|
+
char *p, *pe;
|
265
|
+
int len, space = buffer_size - have, tokstart_diff, tokend_diff;
|
266
|
+
|
267
|
+
if (io) {
|
268
|
+
if (space == 0) {
|
269
|
+
tokstart_diff = ts - buf;
|
270
|
+
tokend_diff = te - buf;
|
271
|
+
|
272
|
+
buffer_size += BUFSIZE;
|
273
|
+
REALLOC_N(buf, char, buffer_size);
|
274
|
+
|
275
|
+
space = buffer_size - have;
|
276
|
+
|
277
|
+
ts = buf + tokstart_diff;
|
278
|
+
te = buf + tokend_diff;
|
279
|
+
}
|
280
|
+
p = buf + have;
|
281
|
+
|
282
|
+
str = rb_funcall(port, s_read, 1, INT2FIX(space));
|
283
|
+
if (NIL_P(str)) {
|
284
|
+
// StringIO#read returns nil for empty string.
|
285
|
+
len = 0;
|
286
|
+
}
|
287
|
+
else {
|
288
|
+
len = RSTRING_LEN(str);
|
289
|
+
memcpy(p, StringValuePtr(str), len);
|
290
|
+
}
|
291
|
+
|
292
|
+
if (len < space) {
|
293
|
+
// EOF actions don't work in scanners, so we add a sentinel value.
|
294
|
+
// @see http://www.complang.org/pipermail/ragel-users/2007-May/001516.html
|
295
|
+
// @see https://github.com/leeonix/lua-csv-ragel/blob/master/src/csv.rl
|
296
|
+
p[len++] = 0;
|
297
|
+
done = 1;
|
298
|
+
}
|
299
|
+
}
|
300
|
+
else {
|
301
|
+
p = RSTRING_PTR(port);
|
302
|
+
len = RSTRING_LEN(port);
|
303
|
+
p[len++] = 0;
|
304
|
+
done = 1;
|
305
|
+
}
|
306
|
+
|
307
|
+
pe = p + len;
|
308
|
+
// if (done) {
|
309
|
+
// // This triggers the eof action in the non-scanner version.
|
310
|
+
// eof = pe;
|
311
|
+
// }
|
312
|
+
%% write exec;
|
313
|
+
|
314
|
+
if (done && cs < fastcsv_first_final) {
|
315
|
+
if (buf != NULL) {
|
316
|
+
free(buf);
|
317
|
+
}
|
318
|
+
if (unclosed_line) {
|
319
|
+
rb_raise(rb_eParseError, "Unclosed quoted field on line %d.", unclosed_line);
|
320
|
+
}
|
321
|
+
// Ruby raises different errors for illegal quoting, depending on whether
|
322
|
+
// a quoted string is followed by a string ("Unclosed quoted field on line
|
323
|
+
// %d.") or by a string ending in a quote ("Missing or stray quote in line
|
324
|
+
// %d"). These precisions are kind of bogus, but we can try using $!.
|
325
|
+
else {
|
326
|
+
rb_raise(rb_eParseError, "Illegal quoting in line %d.", curline);
|
327
|
+
}
|
328
|
+
}
|
329
|
+
|
330
|
+
if (ts == 0) {
|
331
|
+
have = 0;
|
332
|
+
}
|
333
|
+
else if (io) {
|
334
|
+
have = pe - ts;
|
335
|
+
memmove(buf, ts, have);
|
336
|
+
te = buf + (te - ts);
|
337
|
+
ts = buf;
|
338
|
+
}
|
339
|
+
}
|
340
|
+
|
341
|
+
if (buf != NULL) {
|
342
|
+
free(buf);
|
343
|
+
}
|
344
|
+
|
345
|
+
return Qnil;
|
346
|
+
}
|
347
|
+
|
348
|
+
void Init_fastcsv() {
|
349
|
+
s_read = rb_intern("read");
|
350
|
+
s_to_str = rb_intern("to_str");
|
351
|
+
|
352
|
+
mModule = rb_define_module("FastCSV");
|
353
|
+
rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
|
354
|
+
rb_define_singleton_method(mModule, "raw_parse", fastcsv, -1);
|
355
|
+
rb_eParseError = rb_define_class_under(mModule, "ParseError", rb_eStandardError);
|
356
|
+
}
|
data/fastcsv.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = "fastcsv"
|
5
|
+
s.version = '0.0.1'
|
6
|
+
s.platform = Gem::Platform::RUBY
|
7
|
+
s.authors = ["Open North"]
|
8
|
+
s.email = ["info@opennorth.ca"]
|
9
|
+
s.homepage = "http://github.com/opennorth/fastcsv"
|
10
|
+
s.summary = %q{A fast Ragel-based CSV parser}
|
11
|
+
s.license = 'MIT'
|
12
|
+
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.extensions = ["ext/fastcsv/extconf.rb"]
|
18
|
+
|
19
|
+
s.add_development_dependency('coveralls')
|
20
|
+
s.add_development_dependency('json', '~> 1.7.7') # to silence coveralls warning
|
21
|
+
s.add_development_dependency('rake')
|
22
|
+
s.add_development_dependency('rake-compiler')
|
23
|
+
s.add_development_dependency('rspec', '~> 3.1')
|
24
|
+
end
|
data/lib/fastcsv.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'fastcsv/fastcsv'
|
@@ -0,0 +1,218 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
RSpec.shared_examples 'a CSV parser' do
|
6
|
+
let :simple do
|
7
|
+
"foo\nbar\nbaz"
|
8
|
+
end
|
9
|
+
|
10
|
+
[
|
11
|
+
# Single tokens.
|
12
|
+
"",
|
13
|
+
"x",
|
14
|
+
%(""),
|
15
|
+
%("x"),
|
16
|
+
",",
|
17
|
+
"\n",
|
18
|
+
|
19
|
+
# Last tokens.
|
20
|
+
"x,y",
|
21
|
+
%(x,"y"),
|
22
|
+
"x,",
|
23
|
+
"x\n",
|
24
|
+
|
25
|
+
# Line endings.
|
26
|
+
"\n\n\n",
|
27
|
+
"\r\r\r",
|
28
|
+
"\r\n\r\n\r\n",
|
29
|
+
"foo\rbar\rbaz\r",
|
30
|
+
"foo\nbar\nbaz\n",
|
31
|
+
"foo\r\nbar\r\nbaz\r\n",
|
32
|
+
|
33
|
+
# Repetition.
|
34
|
+
"x,x,x",
|
35
|
+
"x\nx\nx",
|
36
|
+
%("x","x","x"),
|
37
|
+
%("x"\n"x"\n"x"),
|
38
|
+
",,,",
|
39
|
+
",\n,\n,",
|
40
|
+
|
41
|
+
# Blank.
|
42
|
+
%(,""),
|
43
|
+
%("",),
|
44
|
+
"\n\n\nfoo\n\n\n",
|
45
|
+
|
46
|
+
# Whitespace.
|
47
|
+
" x",
|
48
|
+
"x ",
|
49
|
+
" x ",
|
50
|
+
# Tab.
|
51
|
+
" x",
|
52
|
+
"x ",
|
53
|
+
" x ",
|
54
|
+
|
55
|
+
# Quoting.
|
56
|
+
%(foo,"bar,baz",bzz),
|
57
|
+
%(foo,"bar\nbaz",bzz),
|
58
|
+
%(foo,"""bar""baz""bzz""",zzz),
|
59
|
+
|
60
|
+
# Buffers.
|
61
|
+
"01234567890" * 2_000, # 20,000 > BUFSIZE
|
62
|
+
"0123456789," * 2_000,
|
63
|
+
|
64
|
+
# Uneven rows.
|
65
|
+
"1,2,3\n1,2",
|
66
|
+
"1,2\n1,2,3",
|
67
|
+
|
68
|
+
# Uneven data types.
|
69
|
+
"2000-01-01,2,x\nx,2000-01-01,2",
|
70
|
+
].each do |csv|
|
71
|
+
it "should parse: #{csv}" do
|
72
|
+
expect(parse(csv)).to eq(CSV.parse(csv))
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
[
|
77
|
+
# Whitespace.
|
78
|
+
# @note Ruby's CSV library has inexplicably inconsistent error messages for
|
79
|
+
# the same class of error.
|
80
|
+
[%( "x"), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
81
|
+
[%("x" ), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'],
|
82
|
+
[%( "x" ), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
83
|
+
# Tab.
|
84
|
+
[%( "x"), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
85
|
+
[%("x" ), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'],
|
86
|
+
[%( "x" ), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
87
|
+
|
88
|
+
# Quoted next to unquoted.
|
89
|
+
[%("x"x), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'],
|
90
|
+
[%(x"x"), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
91
|
+
[%(x"x"x), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
92
|
+
[%("x"x"x"), 'Missing or stray quote in line %d', 'Illegal quoting in line %d.'],
|
93
|
+
|
94
|
+
# Unclosed quote.
|
95
|
+
[%("x), 'Unclosed quoted field on line %d.', 'Unclosed quoted field on line %d.'],
|
96
|
+
|
97
|
+
# Quote in unquoted field.
|
98
|
+
[%(x"x), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
99
|
+
|
100
|
+
# Unescaped quote in quoted field.
|
101
|
+
[%("x"x"), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'],
|
102
|
+
].each do |csv,csv_error,fastcsv_error|
|
103
|
+
it "should raise an error on: #{csv.inspect.gsub('\"', '"')}" do
|
104
|
+
expect{CSV.parse(csv)}.to raise_error(CSV::MalformedCSVError, csv_error % 1)
|
105
|
+
expect{parse(csv)}.to raise_error(FastCSV::ParseError, fastcsv_error % 1)
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should raise an error with the correct line number on: #{"\n#{csv}\n".inspect.gsub('\"', '"')}" do
|
109
|
+
csv = "\n#{csv}\n"
|
110
|
+
expect{CSV.parse(csv)}.to raise_error(CSV::MalformedCSVError, csv_error % 2)
|
111
|
+
expect{parse(csv)}.to raise_error(FastCSV::ParseError, fastcsv_error % 2)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
it 'should raise an error on mixed row separators are' do
|
116
|
+
csv = "foo\rbar\nbaz\r\n"
|
117
|
+
expect{CSV.parse(csv)}.to raise_error(CSV::MalformedCSVError, 'Unquoted fields do not allow \r or \n (line 2).')
|
118
|
+
skip
|
119
|
+
end
|
120
|
+
|
121
|
+
it 'should raise an error if no block is given' do
|
122
|
+
expect{parse_without_block('x')}.to raise_error(LocalJumpError, 'no block given')
|
123
|
+
end
|
124
|
+
|
125
|
+
it 'should not raise an error if no block and empty input' do
|
126
|
+
expect{parse_without_block('')}.to_not raise_error
|
127
|
+
end
|
128
|
+
|
129
|
+
it 'should raise an error if the options are not a Hash or nil' do
|
130
|
+
expect{parse('', '')}.to raise_error(ArgumentError, 'options has to be a Hash or nil')
|
131
|
+
end
|
132
|
+
|
133
|
+
it 'should allow nil buffer size' do
|
134
|
+
FastCSV.buffer_size = nil
|
135
|
+
expect(parse(simple)).to eq(CSV.parse(simple))
|
136
|
+
FastCSV.buffer_size = nil
|
137
|
+
end
|
138
|
+
|
139
|
+
it 'should recover from a zero buffer size' do
|
140
|
+
FastCSV.buffer_size = 0
|
141
|
+
expect(parse(simple)).to eq(CSV.parse(simple))
|
142
|
+
FastCSV.buffer_size = nil
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
RSpec.describe FastCSV do
|
147
|
+
context "with String" do
|
148
|
+
def parse(csv, options = nil)
|
149
|
+
rows = []
|
150
|
+
FastCSV.raw_parse(csv, options){|row| rows << row}
|
151
|
+
rows
|
152
|
+
end
|
153
|
+
|
154
|
+
def parse_without_block(csv, options = nil)
|
155
|
+
FastCSV.raw_parse(csv, options)
|
156
|
+
end
|
157
|
+
|
158
|
+
include_examples 'a CSV parser'
|
159
|
+
|
160
|
+
it 'should not raise an error on negative buffer size' do
|
161
|
+
FastCSV.buffer_size = -1
|
162
|
+
expect{parse(simple)}.to_not raise_error
|
163
|
+
FastCSV.buffer_size = nil
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
context "with StringIO" do
|
168
|
+
def parse(csv, options = nil)
|
169
|
+
rows = []
|
170
|
+
FastCSV.raw_parse(StringIO.new(csv), options){|row| rows << row}
|
171
|
+
rows
|
172
|
+
end
|
173
|
+
|
174
|
+
def parse_without_block(csv, options = nil)
|
175
|
+
FastCSV.raw_parse(StringIO.new(csv), options)
|
176
|
+
end
|
177
|
+
|
178
|
+
include_examples 'a CSV parser'
|
179
|
+
|
180
|
+
it 'should raise an error on negative buffer size' do
|
181
|
+
FastCSV.buffer_size = -1
|
182
|
+
expect{parse(simple)}.to raise_error(NoMemoryError)
|
183
|
+
FastCSV.buffer_size = nil
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def parse_with_encoding(basename, encoding)
|
188
|
+
filename = File.expand_path(File.join('..', 'fixtures', basename), __FILE__)
|
189
|
+
options = {encoding: encoding}
|
190
|
+
File.open(filename) do |io|
|
191
|
+
rows = []
|
192
|
+
FastCSV.raw_parse(io, options){|row| rows << row}
|
193
|
+
expected = CSV.read(filename, options)
|
194
|
+
expect(rows).to eq(expected)
|
195
|
+
expect(rows[0][0].encoding).to eq(expected[0][0].encoding)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
it 'should encode the input' do
|
200
|
+
parse_with_encoding('iso-8859-1.csv', 'iso-8859-1')
|
201
|
+
end
|
202
|
+
|
203
|
+
it 'should encode the input with a blank internal encoding' do
|
204
|
+
parse_with_encoding('utf-8.csv', ':utf-8')
|
205
|
+
end
|
206
|
+
|
207
|
+
it 'should transcode the input' do
|
208
|
+
parse_with_encoding('iso-8859-1.csv', 'iso-8859-1:utf-8')
|
209
|
+
end
|
210
|
+
|
211
|
+
it 'should invalid encoding' do
|
212
|
+
parse_with_encoding('utf-8.csv', 'invalid')
|
213
|
+
end
|
214
|
+
|
215
|
+
it 'should raise an error if the input is not a String or IO' do
|
216
|
+
expect{FastCSV.raw_parse(nil)}.to raise_error(ArgumentError, 'data has to respond to #read or #to_str')
|
217
|
+
end
|
218
|
+
end
|