fastcsv 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +11 -0
- data/README.md +37 -2
- data/TESTS.md +42 -0
- data/ext/fastcsv/fastcsv.c +281 -223
- data/ext/fastcsv/fastcsv.rl +149 -72
- data/fastcsv.gemspec +1 -1
- data/lib/fastcsv.rb +130 -0
- data/spec/fastcsv_spec.rb +189 -57
- data/spec/fixtures/csv.csv +3 -0
- data/spec/fixtures/iso-8859-1-quoted.csv +1 -0
- data/spec/fixtures/utf-8-quoted.csv +1 -0
- data/spec/spec_helper.rb +5 -0
- data/test/csv/base.rb +8 -0
- data/test/csv/line_endings.gz +0 -0
- data/test/csv/test_csv_parsing.rb +221 -0
- data/test/csv/test_csv_writing.rb +97 -0
- data/test/csv/test_data_converters.rb +263 -0
- data/test/csv/test_encodings.rb +339 -0
- data/test/csv/test_features.rb +317 -0
- data/test/csv/test_headers.rb +289 -0
- data/test/csv/test_interface.rb +362 -0
- data/test/csv/test_row.rb +349 -0
- data/test/csv/test_table.rb +420 -0
- data/test/csv/ts_all.rb +20 -0
- data/test/runner.rb +36 -0
- data/test/with_different_ofs.rb +17 -0
- metadata +38 -2
data/spec/fastcsv_spec.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
require 'spec_helper'
|
2
3
|
|
3
|
-
|
4
|
+
$ORIGINAL_VERBOSE = $VERBOSE
|
4
5
|
|
5
6
|
RSpec.shared_examples 'a CSV parser' do
|
6
7
|
let :simple do
|
@@ -61,7 +62,6 @@ RSpec.shared_examples 'a CSV parser' do
|
|
61
62
|
%('foo','bar','baz'),
|
62
63
|
|
63
64
|
# Buffers.
|
64
|
-
"01234567890" * 2_000, # 20,000 > BUFSIZE
|
65
65
|
"0123456789," * 2_000,
|
66
66
|
|
67
67
|
# Uneven rows.
|
@@ -76,23 +76,44 @@ RSpec.shared_examples 'a CSV parser' do
|
|
76
76
|
end
|
77
77
|
end
|
78
78
|
|
79
|
+
# This has caused segmentation faults in the StringIO context in the past, so
|
80
|
+
# we separate it out so that it's easier to special case this spec. The fault
|
81
|
+
# seems to occur less frequently when the spec is run in isolation. The
|
82
|
+
# "TypeError: no implicit conversion from nil to integer" exception after a
|
83
|
+
# fault is related to RSpec, not the fault.
|
84
|
+
it "should parse long rows" do
|
85
|
+
csv = "01234567890" * 2_000 # 20,000 > BUFSIZE
|
86
|
+
expect(parse(csv)).to eq(CSV.parse(csv))
|
87
|
+
end
|
88
|
+
|
79
89
|
[
|
80
90
|
# Whitespace.
|
81
|
-
#
|
82
|
-
#
|
91
|
+
#
|
92
|
+
# CSV's error messages are a consequence of its parser's implementation. It
|
93
|
+
# splits on :col_sep and then reads parts, making it possible to identify
|
94
|
+
# its "Missing or stray quote". FastCSV, as a state machine, wouldn't even
|
95
|
+
# get that far, as it would simply find no match and quit.
|
96
|
+
#
|
97
|
+
# * "Missing or stray quote in line %d" if quoted field matches /[^"]"[^"]/
|
98
|
+
# (for any quote char). Raises "Unclosed quoted field" instead if the
|
99
|
+
# quoted field has an odd number of quote chars.
|
100
|
+
# * "Unquoted fields do not allow \r or \n (line \d)." if unquoted field
|
101
|
+
# contains "\r" or "\n", e.g. if `:row_sep` is "\n" but file uses "\r"
|
102
|
+
# * "Illegal quoting in line %d" if unquoted field contains quote char.
|
103
|
+
# * "Unclosed quoted field on line %d" if reaches EOF without closing.
|
83
104
|
[%( "x"), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
84
|
-
[%("x" ), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'],
|
105
|
+
[%("x" ), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'], # WONTFIX
|
85
106
|
[%( "x" ), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
86
107
|
# Tab.
|
87
108
|
[%( "x"), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
88
|
-
[%("x" ), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'],
|
109
|
+
[%("x" ), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'], # WONTFIX
|
89
110
|
[%( "x" ), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
90
111
|
|
91
112
|
# Quoted next to unquoted.
|
92
|
-
[%("x"x), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'],
|
113
|
+
[%("x"x), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'], # WONTFIX
|
93
114
|
[%(x"x"), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
94
115
|
[%(x"x"x), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
95
|
-
[%("x"x"x"), 'Missing or stray quote in line %d', 'Illegal quoting in line %d.'],
|
116
|
+
[%("x"x"x"), 'Missing or stray quote in line %d', 'Illegal quoting in line %d.'], # WONTFIX
|
96
117
|
|
97
118
|
# Unclosed quote.
|
98
119
|
[%("x), 'Unclosed quoted field on line %d.', 'Unclosed quoted field on line %d.'],
|
@@ -101,17 +122,17 @@ RSpec.shared_examples 'a CSV parser' do
|
|
101
122
|
[%(x"x), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
102
123
|
|
103
124
|
# Unescaped quote in quoted field.
|
104
|
-
[%("x"x"), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'],
|
125
|
+
[%("x"x"), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'], # WONTFIX
|
105
126
|
].each do |csv,csv_error,fastcsv_error|
|
106
127
|
it "should raise an error on: #{csv.inspect.gsub('\"', '"')}" do
|
107
128
|
expect{CSV.parse(csv)}.to raise_error(CSV::MalformedCSVError, csv_error % 1)
|
108
|
-
expect{parse(csv)}.to raise_error(FastCSV::
|
129
|
+
expect{parse(csv)}.to raise_error(FastCSV::MalformedCSVError, fastcsv_error % 1)
|
109
130
|
end
|
110
131
|
|
111
132
|
it "should raise an error with the correct line number on: #{"\n#{csv}\n".inspect.gsub('\"', '"')}" do
|
112
133
|
csv = "\n#{csv}\n"
|
113
134
|
expect{CSV.parse(csv)}.to raise_error(CSV::MalformedCSVError, csv_error % 2)
|
114
|
-
expect{parse(csv)}.to raise_error(FastCSV::
|
135
|
+
expect{parse(csv)}.to raise_error(FastCSV::MalformedCSVError, fastcsv_error % 2)
|
115
136
|
end
|
116
137
|
end
|
117
138
|
|
@@ -123,9 +144,10 @@ RSpec.shared_examples 'a CSV parser' do
|
|
123
144
|
expect(actual).to eq(expected)
|
124
145
|
end
|
125
146
|
|
126
|
-
it 'should raise an error on mixed row separators
|
127
|
-
|
128
|
-
|
147
|
+
it 'should raise an error on mixed row separators' do
|
148
|
+
csv = "foo\rbar\nbaz\r\n"
|
149
|
+
expect{CSV.parse(csv)}.to raise_error(CSV::MalformedCSVError, 'Unquoted fields do not allow \r or \n (line 2).')
|
150
|
+
expect{FastCSV.parse(csv)}.to raise_error(FastCSV::MalformedCSVError, 'Unquoted fields do not allow \r or \n (line 2).')
|
129
151
|
end
|
130
152
|
|
131
153
|
context 'when initializing' do
|
@@ -143,25 +165,82 @@ RSpec.shared_examples 'a CSV parser' do
|
|
143
165
|
end
|
144
166
|
|
145
167
|
context 'when setting a buffer size' do
|
168
|
+
def parse_with_buffer_size(csv, buffer_size)
|
169
|
+
parser = FastCSV::Parser.new
|
170
|
+
parser.buffer_size = buffer_size
|
171
|
+
rows = parse(csv, nil, parser)
|
172
|
+
parser.buffer_size = nil
|
173
|
+
rows
|
174
|
+
end
|
175
|
+
|
146
176
|
it 'should allow nil' do
|
147
|
-
|
148
|
-
expect(parse(simple)).to eq(CSV.parse(simple))
|
149
|
-
FastCSV.buffer_size = nil
|
177
|
+
expect(parse_with_buffer_size(simple, nil)).to eq(CSV.parse(simple))
|
150
178
|
end
|
151
179
|
|
180
|
+
# If buffer_size is actually set to 0, it can cause segmentation faults.
|
152
181
|
it 'should allow zero' do
|
153
|
-
|
154
|
-
expect(parse(simple)).to eq(CSV.parse(simple))
|
155
|
-
FastCSV.buffer_size = nil
|
182
|
+
expect(parse_with_buffer_size(simple, 0)).to eq(CSV.parse(simple))
|
156
183
|
end
|
157
184
|
end
|
158
185
|
end
|
159
186
|
|
187
|
+
RSpec.shared_examples 'with encoded strings' do
|
188
|
+
before(:all) do
|
189
|
+
$VERBOSE = nil
|
190
|
+
end
|
191
|
+
|
192
|
+
after(:all) do
|
193
|
+
$VERBOSE = $ORIGINAL_VERBOSE
|
194
|
+
end
|
195
|
+
|
196
|
+
def parse_with_encoding(basename, encoding)
|
197
|
+
filename = File.expand_path(File.join('..', 'fixtures', basename), __FILE__)
|
198
|
+
options = {encoding: encoding}
|
199
|
+
File.open(filename) do |io|
|
200
|
+
rows = []
|
201
|
+
FastCSV.raw_parse(io, options){|row| rows << row}
|
202
|
+
expected = CSV.read(filename, options)
|
203
|
+
expect(rows[0][0].encoding).to eq(expected[0][0].encoding)
|
204
|
+
expect(rows).to eq(expected)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
it 'should encode with internal encoding' do
|
209
|
+
parse_with_encoding("iso-8859-1#{suffix}.csv", 'iso-8859-1')
|
210
|
+
end
|
211
|
+
|
212
|
+
it 'should encode with external encoding' do
|
213
|
+
parse_with_encoding("iso-8859-1#{suffix}.csv", 'iso-8859-1:-')
|
214
|
+
end
|
215
|
+
|
216
|
+
it 'should transcode' do
|
217
|
+
parse_with_encoding("iso-8859-1#{suffix}.csv", 'iso-8859-1:utf-8')
|
218
|
+
end
|
219
|
+
|
220
|
+
it 'should recover from blank external encoding' do
|
221
|
+
parse_with_encoding("utf-8#{suffix}.csv", ':utf-8')
|
222
|
+
end
|
223
|
+
|
224
|
+
it 'should recover from invalid internal encoding' do
|
225
|
+
parse_with_encoding("utf-8#{suffix}.csv", 'invalid')
|
226
|
+
parse_with_encoding("utf-8#{suffix}.csv", 'utf-8:invalid')
|
227
|
+
end
|
228
|
+
|
229
|
+
it 'should recover from invalid external encoding' do
|
230
|
+
parse_with_encoding("utf-8#{suffix}.csv", 'invalid:-')
|
231
|
+
parse_with_encoding("utf-8#{suffix}.csv", 'invalid:utf-8')
|
232
|
+
end
|
233
|
+
|
234
|
+
it 'should recover from invalid internal and external encodings' do
|
235
|
+
parse_with_encoding("utf-8#{suffix}.csv", 'invalid:invalid')
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
160
239
|
RSpec.describe FastCSV do
|
161
240
|
context "with String" do
|
162
|
-
def parse(csv, options = nil)
|
241
|
+
def parse(csv, options = nil, parser = FastCSV)
|
163
242
|
rows = []
|
164
|
-
|
243
|
+
parser.raw_parse(csv, options){|row| rows << row}
|
165
244
|
rows
|
166
245
|
end
|
167
246
|
|
@@ -172,16 +251,17 @@ RSpec.describe FastCSV do
|
|
172
251
|
include_examples 'a CSV parser'
|
173
252
|
|
174
253
|
it 'should not raise an error on negative buffer size' do
|
175
|
-
|
176
|
-
|
177
|
-
|
254
|
+
parser = FastCSV::Parser.new
|
255
|
+
parser.buffer_size = -1
|
256
|
+
expect{parse(simple, nil, parser)}.to_not raise_error
|
257
|
+
parser.buffer_size = nil
|
178
258
|
end
|
179
259
|
end
|
180
260
|
|
181
261
|
context "with StringIO" do
|
182
|
-
def parse(csv, options = nil)
|
262
|
+
def parse(csv, options = nil, parser = FastCSV)
|
183
263
|
rows = []
|
184
|
-
|
264
|
+
parser.raw_parse(StringIO.new(csv), options){|row| rows << row}
|
185
265
|
rows
|
186
266
|
end
|
187
267
|
|
@@ -192,53 +272,105 @@ RSpec.describe FastCSV do
|
|
192
272
|
include_examples 'a CSV parser'
|
193
273
|
|
194
274
|
it 'should raise an error on negative buffer size' do
|
195
|
-
|
196
|
-
|
197
|
-
|
275
|
+
parser = FastCSV::Parser.new
|
276
|
+
parser.buffer_size = -1
|
277
|
+
expect{parse(simple, nil, parser)}.to raise_error(NoMemoryError)
|
278
|
+
parser.buffer_size = nil
|
198
279
|
end
|
199
280
|
end
|
200
281
|
|
201
|
-
context 'with encoded
|
202
|
-
def
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
282
|
+
context 'with encoded unquoted fields' do
|
283
|
+
def suffix
|
284
|
+
''
|
285
|
+
end
|
286
|
+
|
287
|
+
include_examples 'with encoded strings'
|
288
|
+
end
|
289
|
+
|
290
|
+
context 'with encoded quoted fields' do
|
291
|
+
def suffix
|
292
|
+
'-quoted'
|
293
|
+
end
|
294
|
+
|
295
|
+
include_examples 'with encoded strings'
|
296
|
+
end
|
297
|
+
|
298
|
+
context 'when initializing' do
|
299
|
+
it 'should raise an error if the input is not a String or IO' do
|
300
|
+
expect{FastCSV.raw_parse(nil)}.to raise_error(ArgumentError, 'data has to respond to #read or #to_str')
|
212
301
|
end
|
302
|
+
end
|
213
303
|
|
214
|
-
|
215
|
-
|
304
|
+
describe '#row' do
|
305
|
+
[
|
306
|
+
"",
|
307
|
+
"\n",
|
308
|
+
"\n\n",
|
309
|
+
"a,b,",
|
310
|
+
"a,b,\n",
|
311
|
+
"a,b,\nx,y,\n",
|
312
|
+
"a,b,c",
|
313
|
+
"a,b,c\n",
|
314
|
+
"a,b,c\nx,y,z\n",
|
315
|
+
].each do |csv|
|
316
|
+
it "should return the current row for: #{csv.inspect.gsub('\"', '"')}" do
|
317
|
+
parser = FastCSV::Parser.new
|
318
|
+
rows = []
|
319
|
+
parser.raw_parse(csv) do |row|
|
320
|
+
rows << parser.row
|
321
|
+
end
|
322
|
+
expect(rows).to eq(CSV.parse(csv).map{|row| CSV.generate_line(row).chomp("\n")})
|
323
|
+
end
|
216
324
|
end
|
325
|
+
end
|
217
326
|
|
218
|
-
|
219
|
-
|
327
|
+
context 'with IO methods' do
|
328
|
+
let(:csv) do
|
329
|
+
FastCSV.open(File.expand_path(File.join('..', 'fixtures', 'csv.csv'), __FILE__))
|
220
330
|
end
|
221
331
|
|
222
|
-
|
223
|
-
|
332
|
+
let(:csv2) do
|
333
|
+
FastCSV.open(File.expand_path(File.join('..', 'fixtures', 'csv.csv'), __FILE__))
|
224
334
|
end
|
225
335
|
|
226
|
-
|
227
|
-
|
336
|
+
describe '#pos=' do
|
337
|
+
it 'should read from the new position' do
|
338
|
+
expect(csv.shift).to eq(%w(name age))
|
339
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
340
|
+
csv.pos = 9
|
341
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
342
|
+
expect(csv.shift).to eq(%w(Bob 40))
|
343
|
+
end
|
228
344
|
end
|
229
345
|
|
230
|
-
|
231
|
-
|
346
|
+
describe '#reopen' do
|
347
|
+
it 'should read from the new position' do
|
348
|
+
expect(csv.shift).to eq(%w(name age))
|
349
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
350
|
+
csv.reopen(csv2)
|
351
|
+
expect(csv.shift).to eq(%w(name age))
|
352
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
353
|
+
end
|
232
354
|
end
|
233
355
|
|
234
|
-
|
235
|
-
|
356
|
+
describe '#seek' do
|
357
|
+
it 'should read from the new position' do
|
358
|
+
expect(csv.shift).to eq(%w(name age))
|
359
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
360
|
+
csv.seek(9)
|
361
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
362
|
+
expect(csv.shift).to eq(%w(Bob 40))
|
363
|
+
end
|
236
364
|
end
|
237
|
-
end
|
238
365
|
|
239
|
-
|
240
|
-
|
241
|
-
|
366
|
+
describe '#rewind' do
|
367
|
+
it 'should read from the new position' do
|
368
|
+
expect(csv.shift).to eq(%w(name age))
|
369
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
370
|
+
csv.rewind
|
371
|
+
expect(csv.shift).to eq(%w(name age))
|
372
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
373
|
+
end
|
242
374
|
end
|
243
375
|
end
|
244
376
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
ñ
|
@@ -0,0 +1 @@
|
|
1
|
+
"ñ"
|
data/spec/spec_helper.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
require 'coveralls'
|
2
2
|
Coveralls.wear!
|
3
3
|
|
4
|
+
SimpleCov.start do
|
5
|
+
add_filter '/spec/'
|
6
|
+
add_filter '/test/'
|
7
|
+
end
|
8
|
+
|
4
9
|
RSpec.configure do |config|
|
5
10
|
config.expect_with :rspec do |expectations|
|
6
11
|
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
data/test/csv/base.rb
ADDED
Binary file
|
@@ -0,0 +1,221 @@
|
|
1
|
+
#!/usr/bin/env ruby -w
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
# tc_csv_parsing.rb
|
5
|
+
#
|
6
|
+
# Created by James Edward Gray II on 2005-10-31.
|
7
|
+
# Copyright 2005 James Edward Gray II. You can redistribute or modify this code
|
8
|
+
# under the terms of Ruby's license.
|
9
|
+
|
10
|
+
require "timeout"
|
11
|
+
|
12
|
+
require_relative "base"
|
13
|
+
|
14
|
+
#
|
15
|
+
# Following tests are my interpretation of the
|
16
|
+
# {FastCSV RCF}[http://www.ietf.org/rfc/rfc4180.txt]. I only deviate from that
|
17
|
+
# document in one place (intentionally) and that is to make the default row
|
18
|
+
# separator <tt>$/</tt>.
|
19
|
+
#
|
20
|
+
class TestCSV::Parsing < TestCSV
|
21
|
+
extend DifferentOFS
|
22
|
+
|
23
|
+
BIG_DATA = "123456789\n" * 1024
|
24
|
+
|
25
|
+
def test_mastering_regex_example
|
26
|
+
ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
|
27
|
+
assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
|
28
|
+
"It's \"10 Grand\", baby", "10K" ],
|
29
|
+
FastCSV.parse_line(ex) )
|
30
|
+
end
|
31
|
+
|
32
|
+
# Old Ruby 1.8 FastCSV library tests.
|
33
|
+
def test_std_lib_csv
|
34
|
+
[ ["\t", ["\t"]],
|
35
|
+
["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
|
36
|
+
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
|
37
|
+
["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]],
|
38
|
+
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
|
39
|
+
["\"\"", [""]],
|
40
|
+
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
|
41
|
+
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
|
42
|
+
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
|
43
|
+
["foo,\"\",baz", ["foo", "", "baz"]],
|
44
|
+
["\",\"", [","]],
|
45
|
+
["foo", ["foo"]],
|
46
|
+
[",,", [nil, nil, nil]],
|
47
|
+
[",", [nil, nil]],
|
48
|
+
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
|
49
|
+
["foo,,baz", ["foo", nil, "baz"]],
|
50
|
+
["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]],
|
51
|
+
["\",\",\",\"", [",", ","]],
|
52
|
+
["foo,bar,", ["foo", "bar", nil]],
|
53
|
+
[",foo,bar", [nil, "foo", "bar"]],
|
54
|
+
["foo,bar", ["foo", "bar"]],
|
55
|
+
[";", [";"]],
|
56
|
+
["\t,\t", ["\t", "\t"]],
|
57
|
+
["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]],
|
58
|
+
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
59
|
+
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
|
60
|
+
[";,;", [";", ";"]] ].each do |csv_test|
|
61
|
+
assert_equal(csv_test.last, FastCSV.parse_line(csv_test.first))
|
62
|
+
end
|
63
|
+
|
64
|
+
[ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
|
65
|
+
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
|
66
|
+
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
|
67
|
+
["\"\"", [""]],
|
68
|
+
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
|
69
|
+
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
|
70
|
+
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
|
71
|
+
["foo,\"\",baz", ["foo", "", "baz"]],
|
72
|
+
["foo", ["foo"]],
|
73
|
+
[",,", [nil, nil, nil]],
|
74
|
+
[",", [nil, nil]],
|
75
|
+
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
|
76
|
+
["foo,,baz", ["foo", nil, "baz"]],
|
77
|
+
["foo,bar", ["foo", "bar"]],
|
78
|
+
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
79
|
+
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test|
|
80
|
+
assert_equal(csv_test.last, FastCSV.parse_line(csv_test.first))
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# From: http://ruby-talk.org/cgi-bin/scat.rb/ruby/ruby-core/6496
|
85
|
+
def test_aras_edge_cases
|
86
|
+
[ [%Q{a,b}, ["a", "b"]],
|
87
|
+
[%Q{a,"""b"""}, ["a", "\"b\""]],
|
88
|
+
[%Q{a,"""b"}, ["a", "\"b"]],
|
89
|
+
[%Q{a,"b"""}, ["a", "b\""]],
|
90
|
+
[%Q{a,"\nb"""}, ["a", "\nb\""]],
|
91
|
+
[%Q{a,"""\nb"}, ["a", "\"\nb"]],
|
92
|
+
[%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]],
|
93
|
+
[%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]],
|
94
|
+
[%Q{a,,,}, ["a", nil, nil, nil]],
|
95
|
+
[%Q{,}, [nil, nil]],
|
96
|
+
[%Q{"",""}, ["", ""]],
|
97
|
+
[%Q{""""}, ["\""]],
|
98
|
+
[%Q{"""",""}, ["\"",""]],
|
99
|
+
[%Q{,""}, [nil,""]],
|
100
|
+
[%Q{,"\r"}, [nil,"\r"]],
|
101
|
+
[%Q{"\r\n,"}, ["\r\n,"]],
|
102
|
+
[%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case|
|
103
|
+
assert_equal(edge_case.last, FastCSV.parse_line(edge_case.first))
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def test_james_edge_cases
|
108
|
+
# A read at eof? should return nil.
|
109
|
+
assert_equal(nil, FastCSV.parse_line(""))
|
110
|
+
#
|
111
|
+
# With Ruby 1.8 FastCSV it's impossible to tell an empty line from a line
|
112
|
+
# containing a single +nil+ field. The old FastCSV library returns
|
113
|
+
# <tt>[nil]</tt> in these cases, but <tt>Array.new</tt> makes more sense to
|
114
|
+
# me.
|
115
|
+
#
|
116
|
+
assert_equal(Array.new, FastCSV.parse_line("\n1,2,3\n"))
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_rob_edge_cases
|
120
|
+
[ [%Q{"a\nb"}, ["a\nb"]],
|
121
|
+
[%Q{"\n\n\n"}, ["\n\n\n"]],
|
122
|
+
[%Q{a,"b\n\nc"}, ['a', "b\n\nc"]],
|
123
|
+
[%Q{,"\r\n"}, [nil,"\r\n"]],
|
124
|
+
[%Q{,"\r\n."}, [nil,"\r\n."]],
|
125
|
+
[%Q{"a\na","one newline"}, ["a\na", 'one newline']],
|
126
|
+
[%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']],
|
127
|
+
[%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']],
|
128
|
+
[%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']],
|
129
|
+
[%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]],
|
130
|
+
].each do |edge_case|
|
131
|
+
assert_equal(edge_case.last, FastCSV.parse_line(edge_case.first))
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def test_non_regex_edge_cases
|
136
|
+
# An early version of the non-regex parser fails this test
|
137
|
+
[ [ "foo,\"foo,bar,baz,foo\",\"foo\"",
|
138
|
+
["foo", "foo,bar,baz,foo", "foo"] ] ].each do |edge_case|
|
139
|
+
assert_equal(edge_case.last, FastCSV.parse_line(edge_case.first))
|
140
|
+
end
|
141
|
+
|
142
|
+
assert_raise(FastCSV::MalformedCSVError) do
|
143
|
+
FastCSV.parse_line("1,\"23\"4\"5\", 6")
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_malformed_csv
|
148
|
+
# assert_raise(FastCSV::MalformedCSVError) do
|
149
|
+
# FastCSV.parse_line("1,2\r,3", row_sep: "\n")
|
150
|
+
# end
|
151
|
+
|
152
|
+
bad_data = <<-END_DATA.gsub(/^ +/, "")
|
153
|
+
line,1,abc
|
154
|
+
line,2,"def\nghi"
|
155
|
+
|
156
|
+
line,4,some\rjunk
|
157
|
+
line,5,jkl
|
158
|
+
END_DATA
|
159
|
+
lines = bad_data.lines.to_a
|
160
|
+
assert_equal(6, lines.size)
|
161
|
+
assert_match(/\Aline,4/, lines.find { |l| l =~ /some\rjunk/ })
|
162
|
+
|
163
|
+
csv = FastCSV.new(bad_data)
|
164
|
+
begin
|
165
|
+
loop do
|
166
|
+
assert_not_nil(csv.shift)
|
167
|
+
assert_send([csv.lineno, :<, 5]) # FIXME 4
|
168
|
+
end
|
169
|
+
rescue FastCSV::MalformedCSVError
|
170
|
+
assert_equal( "Unquoted fields do not allow \\r or \\n (line 4).",
|
171
|
+
$!.message )
|
172
|
+
end
|
173
|
+
|
174
|
+
assert_raise(FastCSV::MalformedCSVError) { FastCSV.parse_line('1,2,"3...') }
|
175
|
+
|
176
|
+
bad_data = <<-END_DATA.gsub(/^ +/, "")
|
177
|
+
line,1,abc
|
178
|
+
line,2,"def\nghi"
|
179
|
+
|
180
|
+
line,4,8'10"
|
181
|
+
line,5,jkl
|
182
|
+
END_DATA
|
183
|
+
lines = bad_data.lines.to_a
|
184
|
+
assert_equal(6, lines.size)
|
185
|
+
assert_match(/\Aline,4/, lines.find { |l| l =~ /8'10"/ })
|
186
|
+
|
187
|
+
csv = FastCSV.new(bad_data)
|
188
|
+
begin
|
189
|
+
loop do
|
190
|
+
assert_not_nil(csv.shift)
|
191
|
+
assert_send([csv.lineno, :<, 4])
|
192
|
+
end
|
193
|
+
rescue FastCSV::MalformedCSVError
|
194
|
+
assert_equal("Illegal quoting in line 4.", $!.message)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def test_the_parse_fails_fast_when_it_can_for_unquoted_fields
|
199
|
+
assert_parse_errors_out('valid,fields,bad start"' + BIG_DATA)
|
200
|
+
end
|
201
|
+
|
202
|
+
def test_the_parse_fails_fast_when_it_can_for_unescaped_quotes
|
203
|
+
assert_parse_errors_out('valid,fields,"bad start"unescaped' + BIG_DATA)
|
204
|
+
end
|
205
|
+
|
206
|
+
# def test_field_size_limit_controls_lookahead
|
207
|
+
# assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"',
|
208
|
+
# field_size_limit: 2048 )
|
209
|
+
# end
|
210
|
+
|
211
|
+
private
|
212
|
+
|
213
|
+
def assert_parse_errors_out(*args)
|
214
|
+
assert_raise(FastCSV::MalformedCSVError) do
|
215
|
+
Timeout.timeout(0.2) do
|
216
|
+
FastCSV.parse(*args)
|
217
|
+
fail("Parse didn't error out")
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|