fastcsv 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +11 -0
- data/README.md +37 -2
- data/TESTS.md +42 -0
- data/ext/fastcsv/fastcsv.c +281 -223
- data/ext/fastcsv/fastcsv.rl +149 -72
- data/fastcsv.gemspec +1 -1
- data/lib/fastcsv.rb +130 -0
- data/spec/fastcsv_spec.rb +189 -57
- data/spec/fixtures/csv.csv +3 -0
- data/spec/fixtures/iso-8859-1-quoted.csv +1 -0
- data/spec/fixtures/utf-8-quoted.csv +1 -0
- data/spec/spec_helper.rb +5 -0
- data/test/csv/base.rb +8 -0
- data/test/csv/line_endings.gz +0 -0
- data/test/csv/test_csv_parsing.rb +221 -0
- data/test/csv/test_csv_writing.rb +97 -0
- data/test/csv/test_data_converters.rb +263 -0
- data/test/csv/test_encodings.rb +339 -0
- data/test/csv/test_features.rb +317 -0
- data/test/csv/test_headers.rb +289 -0
- data/test/csv/test_interface.rb +362 -0
- data/test/csv/test_row.rb +349 -0
- data/test/csv/test_table.rb +420 -0
- data/test/csv/ts_all.rb +20 -0
- data/test/runner.rb +36 -0
- data/test/with_different_ofs.rb +17 -0
- metadata +38 -2
data/spec/fastcsv_spec.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
require 'spec_helper'
|
2
3
|
|
3
|
-
|
4
|
+
$ORIGINAL_VERBOSE = $VERBOSE
|
4
5
|
|
5
6
|
RSpec.shared_examples 'a CSV parser' do
|
6
7
|
let :simple do
|
@@ -61,7 +62,6 @@ RSpec.shared_examples 'a CSV parser' do
|
|
61
62
|
%('foo','bar','baz'),
|
62
63
|
|
63
64
|
# Buffers.
|
64
|
-
"01234567890" * 2_000, # 20,000 > BUFSIZE
|
65
65
|
"0123456789," * 2_000,
|
66
66
|
|
67
67
|
# Uneven rows.
|
@@ -76,23 +76,44 @@ RSpec.shared_examples 'a CSV parser' do
|
|
76
76
|
end
|
77
77
|
end
|
78
78
|
|
79
|
+
# This has caused segmentation faults in the StringIO context in the past, so
|
80
|
+
# we separate it out so that it's easier to special case this spec. The fault
|
81
|
+
# seems to occur less frequently when the spec is run in isolation. The
|
82
|
+
# "TypeError: no implicit conversion from nil to integer" exception after a
|
83
|
+
# fault is related to RSpec, not the fault.
|
84
|
+
it "should parse long rows" do
|
85
|
+
csv = "01234567890" * 2_000 # 20,000 > BUFSIZE
|
86
|
+
expect(parse(csv)).to eq(CSV.parse(csv))
|
87
|
+
end
|
88
|
+
|
79
89
|
[
|
80
90
|
# Whitespace.
|
81
|
-
#
|
82
|
-
#
|
91
|
+
#
|
92
|
+
# CSV's error messages are a consequence of its parser's implementation. It
|
93
|
+
# splits on :col_sep and then reads parts, making it possible to identify
|
94
|
+
# its "Missing or stray quote". FastCSV, as a state machine, wouldn't even
|
95
|
+
# get that far, as it would simply find no match and quit.
|
96
|
+
#
|
97
|
+
# * "Missing or stray quote in line %d" if quoted field matches /[^"]"[^"]/
|
98
|
+
# (for any quote char). Raises "Unclosed quoted field" instead if the
|
99
|
+
# quoted field has an odd number of quote chars.
|
100
|
+
# * "Unquoted fields do not allow \r or \n (line \d)." if unquoted field
|
101
|
+
# contains "\r" or "\n", e.g. if `:row_sep` is "\n" but file uses "\r"
|
102
|
+
# * "Illegal quoting in line %d" if unquoted field contains quote char.
|
103
|
+
# * "Unclosed quoted field on line %d" if reaches EOF without closing.
|
83
104
|
[%( "x"), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
84
|
-
[%("x" ), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'],
|
105
|
+
[%("x" ), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'], # WONTFIX
|
85
106
|
[%( "x" ), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
86
107
|
# Tab.
|
87
108
|
[%( "x"), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
88
|
-
[%("x" ), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'],
|
109
|
+
[%("x" ), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'], # WONTFIX
|
89
110
|
[%( "x" ), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
90
111
|
|
91
112
|
# Quoted next to unquoted.
|
92
|
-
[%("x"x), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'],
|
113
|
+
[%("x"x), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'], # WONTFIX
|
93
114
|
[%(x"x"), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
94
115
|
[%(x"x"x), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
95
|
-
[%("x"x"x"), 'Missing or stray quote in line %d', 'Illegal quoting in line %d.'],
|
116
|
+
[%("x"x"x"), 'Missing or stray quote in line %d', 'Illegal quoting in line %d.'], # WONTFIX
|
96
117
|
|
97
118
|
# Unclosed quote.
|
98
119
|
[%("x), 'Unclosed quoted field on line %d.', 'Unclosed quoted field on line %d.'],
|
@@ -101,17 +122,17 @@ RSpec.shared_examples 'a CSV parser' do
|
|
101
122
|
[%(x"x), 'Illegal quoting in line %d.', 'Illegal quoting in line %d.'],
|
102
123
|
|
103
124
|
# Unescaped quote in quoted field.
|
104
|
-
[%("x"x"), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'],
|
125
|
+
[%("x"x"), 'Unclosed quoted field on line %d.', 'Illegal quoting in line %d.'], # WONTFIX
|
105
126
|
].each do |csv,csv_error,fastcsv_error|
|
106
127
|
it "should raise an error on: #{csv.inspect.gsub('\"', '"')}" do
|
107
128
|
expect{CSV.parse(csv)}.to raise_error(CSV::MalformedCSVError, csv_error % 1)
|
108
|
-
expect{parse(csv)}.to raise_error(FastCSV::
|
129
|
+
expect{parse(csv)}.to raise_error(FastCSV::MalformedCSVError, fastcsv_error % 1)
|
109
130
|
end
|
110
131
|
|
111
132
|
it "should raise an error with the correct line number on: #{"\n#{csv}\n".inspect.gsub('\"', '"')}" do
|
112
133
|
csv = "\n#{csv}\n"
|
113
134
|
expect{CSV.parse(csv)}.to raise_error(CSV::MalformedCSVError, csv_error % 2)
|
114
|
-
expect{parse(csv)}.to raise_error(FastCSV::
|
135
|
+
expect{parse(csv)}.to raise_error(FastCSV::MalformedCSVError, fastcsv_error % 2)
|
115
136
|
end
|
116
137
|
end
|
117
138
|
|
@@ -123,9 +144,10 @@ RSpec.shared_examples 'a CSV parser' do
|
|
123
144
|
expect(actual).to eq(expected)
|
124
145
|
end
|
125
146
|
|
126
|
-
it 'should raise an error on mixed row separators
|
127
|
-
|
128
|
-
|
147
|
+
it 'should raise an error on mixed row separators' do
|
148
|
+
csv = "foo\rbar\nbaz\r\n"
|
149
|
+
expect{CSV.parse(csv)}.to raise_error(CSV::MalformedCSVError, 'Unquoted fields do not allow \r or \n (line 2).')
|
150
|
+
expect{FastCSV.parse(csv)}.to raise_error(FastCSV::MalformedCSVError, 'Unquoted fields do not allow \r or \n (line 2).')
|
129
151
|
end
|
130
152
|
|
131
153
|
context 'when initializing' do
|
@@ -143,25 +165,82 @@ RSpec.shared_examples 'a CSV parser' do
|
|
143
165
|
end
|
144
166
|
|
145
167
|
context 'when setting a buffer size' do
|
168
|
+
def parse_with_buffer_size(csv, buffer_size)
|
169
|
+
parser = FastCSV::Parser.new
|
170
|
+
parser.buffer_size = buffer_size
|
171
|
+
rows = parse(csv, nil, parser)
|
172
|
+
parser.buffer_size = nil
|
173
|
+
rows
|
174
|
+
end
|
175
|
+
|
146
176
|
it 'should allow nil' do
|
147
|
-
|
148
|
-
expect(parse(simple)).to eq(CSV.parse(simple))
|
149
|
-
FastCSV.buffer_size = nil
|
177
|
+
expect(parse_with_buffer_size(simple, nil)).to eq(CSV.parse(simple))
|
150
178
|
end
|
151
179
|
|
180
|
+
# If buffer_size is actually set to 0, it can cause segmentation faults.
|
152
181
|
it 'should allow zero' do
|
153
|
-
|
154
|
-
expect(parse(simple)).to eq(CSV.parse(simple))
|
155
|
-
FastCSV.buffer_size = nil
|
182
|
+
expect(parse_with_buffer_size(simple, 0)).to eq(CSV.parse(simple))
|
156
183
|
end
|
157
184
|
end
|
158
185
|
end
|
159
186
|
|
187
|
+
RSpec.shared_examples 'with encoded strings' do
|
188
|
+
before(:all) do
|
189
|
+
$VERBOSE = nil
|
190
|
+
end
|
191
|
+
|
192
|
+
after(:all) do
|
193
|
+
$VERBOSE = $ORIGINAL_VERBOSE
|
194
|
+
end
|
195
|
+
|
196
|
+
def parse_with_encoding(basename, encoding)
|
197
|
+
filename = File.expand_path(File.join('..', 'fixtures', basename), __FILE__)
|
198
|
+
options = {encoding: encoding}
|
199
|
+
File.open(filename) do |io|
|
200
|
+
rows = []
|
201
|
+
FastCSV.raw_parse(io, options){|row| rows << row}
|
202
|
+
expected = CSV.read(filename, options)
|
203
|
+
expect(rows[0][0].encoding).to eq(expected[0][0].encoding)
|
204
|
+
expect(rows).to eq(expected)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
it 'should encode with internal encoding' do
|
209
|
+
parse_with_encoding("iso-8859-1#{suffix}.csv", 'iso-8859-1')
|
210
|
+
end
|
211
|
+
|
212
|
+
it 'should encode with external encoding' do
|
213
|
+
parse_with_encoding("iso-8859-1#{suffix}.csv", 'iso-8859-1:-')
|
214
|
+
end
|
215
|
+
|
216
|
+
it 'should transcode' do
|
217
|
+
parse_with_encoding("iso-8859-1#{suffix}.csv", 'iso-8859-1:utf-8')
|
218
|
+
end
|
219
|
+
|
220
|
+
it 'should recover from blank external encoding' do
|
221
|
+
parse_with_encoding("utf-8#{suffix}.csv", ':utf-8')
|
222
|
+
end
|
223
|
+
|
224
|
+
it 'should recover from invalid internal encoding' do
|
225
|
+
parse_with_encoding("utf-8#{suffix}.csv", 'invalid')
|
226
|
+
parse_with_encoding("utf-8#{suffix}.csv", 'utf-8:invalid')
|
227
|
+
end
|
228
|
+
|
229
|
+
it 'should recover from invalid external encoding' do
|
230
|
+
parse_with_encoding("utf-8#{suffix}.csv", 'invalid:-')
|
231
|
+
parse_with_encoding("utf-8#{suffix}.csv", 'invalid:utf-8')
|
232
|
+
end
|
233
|
+
|
234
|
+
it 'should recover from invalid internal and external encodings' do
|
235
|
+
parse_with_encoding("utf-8#{suffix}.csv", 'invalid:invalid')
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
160
239
|
RSpec.describe FastCSV do
|
161
240
|
context "with String" do
|
162
|
-
def parse(csv, options = nil)
|
241
|
+
def parse(csv, options = nil, parser = FastCSV)
|
163
242
|
rows = []
|
164
|
-
|
243
|
+
parser.raw_parse(csv, options){|row| rows << row}
|
165
244
|
rows
|
166
245
|
end
|
167
246
|
|
@@ -172,16 +251,17 @@ RSpec.describe FastCSV do
|
|
172
251
|
include_examples 'a CSV parser'
|
173
252
|
|
174
253
|
it 'should not raise an error on negative buffer size' do
|
175
|
-
|
176
|
-
|
177
|
-
|
254
|
+
parser = FastCSV::Parser.new
|
255
|
+
parser.buffer_size = -1
|
256
|
+
expect{parse(simple, nil, parser)}.to_not raise_error
|
257
|
+
parser.buffer_size = nil
|
178
258
|
end
|
179
259
|
end
|
180
260
|
|
181
261
|
context "with StringIO" do
|
182
|
-
def parse(csv, options = nil)
|
262
|
+
def parse(csv, options = nil, parser = FastCSV)
|
183
263
|
rows = []
|
184
|
-
|
264
|
+
parser.raw_parse(StringIO.new(csv), options){|row| rows << row}
|
185
265
|
rows
|
186
266
|
end
|
187
267
|
|
@@ -192,53 +272,105 @@ RSpec.describe FastCSV do
|
|
192
272
|
include_examples 'a CSV parser'
|
193
273
|
|
194
274
|
it 'should raise an error on negative buffer size' do
|
195
|
-
|
196
|
-
|
197
|
-
|
275
|
+
parser = FastCSV::Parser.new
|
276
|
+
parser.buffer_size = -1
|
277
|
+
expect{parse(simple, nil, parser)}.to raise_error(NoMemoryError)
|
278
|
+
parser.buffer_size = nil
|
198
279
|
end
|
199
280
|
end
|
200
281
|
|
201
|
-
context 'with encoded
|
202
|
-
def
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
282
|
+
context 'with encoded unquoted fields' do
|
283
|
+
def suffix
|
284
|
+
''
|
285
|
+
end
|
286
|
+
|
287
|
+
include_examples 'with encoded strings'
|
288
|
+
end
|
289
|
+
|
290
|
+
context 'with encoded quoted fields' do
|
291
|
+
def suffix
|
292
|
+
'-quoted'
|
293
|
+
end
|
294
|
+
|
295
|
+
include_examples 'with encoded strings'
|
296
|
+
end
|
297
|
+
|
298
|
+
context 'when initializing' do
|
299
|
+
it 'should raise an error if the input is not a String or IO' do
|
300
|
+
expect{FastCSV.raw_parse(nil)}.to raise_error(ArgumentError, 'data has to respond to #read or #to_str')
|
212
301
|
end
|
302
|
+
end
|
213
303
|
|
214
|
-
|
215
|
-
|
304
|
+
describe '#row' do
|
305
|
+
[
|
306
|
+
"",
|
307
|
+
"\n",
|
308
|
+
"\n\n",
|
309
|
+
"a,b,",
|
310
|
+
"a,b,\n",
|
311
|
+
"a,b,\nx,y,\n",
|
312
|
+
"a,b,c",
|
313
|
+
"a,b,c\n",
|
314
|
+
"a,b,c\nx,y,z\n",
|
315
|
+
].each do |csv|
|
316
|
+
it "should return the current row for: #{csv.inspect.gsub('\"', '"')}" do
|
317
|
+
parser = FastCSV::Parser.new
|
318
|
+
rows = []
|
319
|
+
parser.raw_parse(csv) do |row|
|
320
|
+
rows << parser.row
|
321
|
+
end
|
322
|
+
expect(rows).to eq(CSV.parse(csv).map{|row| CSV.generate_line(row).chomp("\n")})
|
323
|
+
end
|
216
324
|
end
|
325
|
+
end
|
217
326
|
|
218
|
-
|
219
|
-
|
327
|
+
context 'with IO methods' do
|
328
|
+
let(:csv) do
|
329
|
+
FastCSV.open(File.expand_path(File.join('..', 'fixtures', 'csv.csv'), __FILE__))
|
220
330
|
end
|
221
331
|
|
222
|
-
|
223
|
-
|
332
|
+
let(:csv2) do
|
333
|
+
FastCSV.open(File.expand_path(File.join('..', 'fixtures', 'csv.csv'), __FILE__))
|
224
334
|
end
|
225
335
|
|
226
|
-
|
227
|
-
|
336
|
+
describe '#pos=' do
|
337
|
+
it 'should read from the new position' do
|
338
|
+
expect(csv.shift).to eq(%w(name age))
|
339
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
340
|
+
csv.pos = 9
|
341
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
342
|
+
expect(csv.shift).to eq(%w(Bob 40))
|
343
|
+
end
|
228
344
|
end
|
229
345
|
|
230
|
-
|
231
|
-
|
346
|
+
describe '#reopen' do
|
347
|
+
it 'should read from the new position' do
|
348
|
+
expect(csv.shift).to eq(%w(name age))
|
349
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
350
|
+
csv.reopen(csv2)
|
351
|
+
expect(csv.shift).to eq(%w(name age))
|
352
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
353
|
+
end
|
232
354
|
end
|
233
355
|
|
234
|
-
|
235
|
-
|
356
|
+
describe '#seek' do
|
357
|
+
it 'should read from the new position' do
|
358
|
+
expect(csv.shift).to eq(%w(name age))
|
359
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
360
|
+
csv.seek(9)
|
361
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
362
|
+
expect(csv.shift).to eq(%w(Bob 40))
|
363
|
+
end
|
236
364
|
end
|
237
|
-
end
|
238
365
|
|
239
|
-
|
240
|
-
|
241
|
-
|
366
|
+
describe '#rewind' do
|
367
|
+
it 'should read from the new position' do
|
368
|
+
expect(csv.shift).to eq(%w(name age))
|
369
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
370
|
+
csv.rewind
|
371
|
+
expect(csv.shift).to eq(%w(name age))
|
372
|
+
expect(csv.shift).to eq(%w(Alice 42))
|
373
|
+
end
|
242
374
|
end
|
243
375
|
end
|
244
376
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
ñ
|
@@ -0,0 +1 @@
|
|
1
|
+
"ñ"
|
data/spec/spec_helper.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
require 'coveralls'
|
2
2
|
Coveralls.wear!
|
3
3
|
|
4
|
+
SimpleCov.start do
|
5
|
+
add_filter '/spec/'
|
6
|
+
add_filter '/test/'
|
7
|
+
end
|
8
|
+
|
4
9
|
RSpec.configure do |config|
|
5
10
|
config.expect_with :rspec do |expectations|
|
6
11
|
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
data/test/csv/base.rb
ADDED
Binary file
|
@@ -0,0 +1,221 @@
|
|
1
|
+
#!/usr/bin/env ruby -w
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
# tc_csv_parsing.rb
|
5
|
+
#
|
6
|
+
# Created by James Edward Gray II on 2005-10-31.
|
7
|
+
# Copyright 2005 James Edward Gray II. You can redistribute or modify this code
|
8
|
+
# under the terms of Ruby's license.
|
9
|
+
|
10
|
+
require "timeout"
|
11
|
+
|
12
|
+
require_relative "base"
|
13
|
+
|
14
|
+
#
|
15
|
+
# Following tests are my interpretation of the
|
16
|
+
# {FastCSV RCF}[http://www.ietf.org/rfc/rfc4180.txt]. I only deviate from that
|
17
|
+
# document in one place (intentionally) and that is to make the default row
|
18
|
+
# separator <tt>$/</tt>.
|
19
|
+
#
|
20
|
+
class TestCSV::Parsing < TestCSV
|
21
|
+
extend DifferentOFS
|
22
|
+
|
23
|
+
BIG_DATA = "123456789\n" * 1024
|
24
|
+
|
25
|
+
def test_mastering_regex_example
|
26
|
+
ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
|
27
|
+
assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
|
28
|
+
"It's \"10 Grand\", baby", "10K" ],
|
29
|
+
FastCSV.parse_line(ex) )
|
30
|
+
end
|
31
|
+
|
32
|
+
# Old Ruby 1.8 FastCSV library tests.
|
33
|
+
def test_std_lib_csv
|
34
|
+
[ ["\t", ["\t"]],
|
35
|
+
["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
|
36
|
+
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
|
37
|
+
["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]],
|
38
|
+
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
|
39
|
+
["\"\"", [""]],
|
40
|
+
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
|
41
|
+
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
|
42
|
+
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
|
43
|
+
["foo,\"\",baz", ["foo", "", "baz"]],
|
44
|
+
["\",\"", [","]],
|
45
|
+
["foo", ["foo"]],
|
46
|
+
[",,", [nil, nil, nil]],
|
47
|
+
[",", [nil, nil]],
|
48
|
+
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
|
49
|
+
["foo,,baz", ["foo", nil, "baz"]],
|
50
|
+
["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]],
|
51
|
+
["\",\",\",\"", [",", ","]],
|
52
|
+
["foo,bar,", ["foo", "bar", nil]],
|
53
|
+
[",foo,bar", [nil, "foo", "bar"]],
|
54
|
+
["foo,bar", ["foo", "bar"]],
|
55
|
+
[";", [";"]],
|
56
|
+
["\t,\t", ["\t", "\t"]],
|
57
|
+
["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]],
|
58
|
+
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
59
|
+
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
|
60
|
+
[";,;", [";", ";"]] ].each do |csv_test|
|
61
|
+
assert_equal(csv_test.last, FastCSV.parse_line(csv_test.first))
|
62
|
+
end
|
63
|
+
|
64
|
+
[ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
|
65
|
+
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
|
66
|
+
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
|
67
|
+
["\"\"", [""]],
|
68
|
+
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
|
69
|
+
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
|
70
|
+
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
|
71
|
+
["foo,\"\",baz", ["foo", "", "baz"]],
|
72
|
+
["foo", ["foo"]],
|
73
|
+
[",,", [nil, nil, nil]],
|
74
|
+
[",", [nil, nil]],
|
75
|
+
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
|
76
|
+
["foo,,baz", ["foo", nil, "baz"]],
|
77
|
+
["foo,bar", ["foo", "bar"]],
|
78
|
+
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
79
|
+
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test|
|
80
|
+
assert_equal(csv_test.last, FastCSV.parse_line(csv_test.first))
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# From: http://ruby-talk.org/cgi-bin/scat.rb/ruby/ruby-core/6496
|
85
|
+
def test_aras_edge_cases
|
86
|
+
[ [%Q{a,b}, ["a", "b"]],
|
87
|
+
[%Q{a,"""b"""}, ["a", "\"b\""]],
|
88
|
+
[%Q{a,"""b"}, ["a", "\"b"]],
|
89
|
+
[%Q{a,"b"""}, ["a", "b\""]],
|
90
|
+
[%Q{a,"\nb"""}, ["a", "\nb\""]],
|
91
|
+
[%Q{a,"""\nb"}, ["a", "\"\nb"]],
|
92
|
+
[%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]],
|
93
|
+
[%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]],
|
94
|
+
[%Q{a,,,}, ["a", nil, nil, nil]],
|
95
|
+
[%Q{,}, [nil, nil]],
|
96
|
+
[%Q{"",""}, ["", ""]],
|
97
|
+
[%Q{""""}, ["\""]],
|
98
|
+
[%Q{"""",""}, ["\"",""]],
|
99
|
+
[%Q{,""}, [nil,""]],
|
100
|
+
[%Q{,"\r"}, [nil,"\r"]],
|
101
|
+
[%Q{"\r\n,"}, ["\r\n,"]],
|
102
|
+
[%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case|
|
103
|
+
assert_equal(edge_case.last, FastCSV.parse_line(edge_case.first))
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def test_james_edge_cases
|
108
|
+
# A read at eof? should return nil.
|
109
|
+
assert_equal(nil, FastCSV.parse_line(""))
|
110
|
+
#
|
111
|
+
# With Ruby 1.8 FastCSV it's impossible to tell an empty line from a line
|
112
|
+
# containing a single +nil+ field. The old FastCSV library returns
|
113
|
+
# <tt>[nil]</tt> in these cases, but <tt>Array.new</tt> makes more sense to
|
114
|
+
# me.
|
115
|
+
#
|
116
|
+
assert_equal(Array.new, FastCSV.parse_line("\n1,2,3\n"))
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_rob_edge_cases
|
120
|
+
[ [%Q{"a\nb"}, ["a\nb"]],
|
121
|
+
[%Q{"\n\n\n"}, ["\n\n\n"]],
|
122
|
+
[%Q{a,"b\n\nc"}, ['a', "b\n\nc"]],
|
123
|
+
[%Q{,"\r\n"}, [nil,"\r\n"]],
|
124
|
+
[%Q{,"\r\n."}, [nil,"\r\n."]],
|
125
|
+
[%Q{"a\na","one newline"}, ["a\na", 'one newline']],
|
126
|
+
[%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']],
|
127
|
+
[%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']],
|
128
|
+
[%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']],
|
129
|
+
[%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]],
|
130
|
+
].each do |edge_case|
|
131
|
+
assert_equal(edge_case.last, FastCSV.parse_line(edge_case.first))
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def test_non_regex_edge_cases
|
136
|
+
# An early version of the non-regex parser fails this test
|
137
|
+
[ [ "foo,\"foo,bar,baz,foo\",\"foo\"",
|
138
|
+
["foo", "foo,bar,baz,foo", "foo"] ] ].each do |edge_case|
|
139
|
+
assert_equal(edge_case.last, FastCSV.parse_line(edge_case.first))
|
140
|
+
end
|
141
|
+
|
142
|
+
assert_raise(FastCSV::MalformedCSVError) do
|
143
|
+
FastCSV.parse_line("1,\"23\"4\"5\", 6")
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_malformed_csv
|
148
|
+
# assert_raise(FastCSV::MalformedCSVError) do
|
149
|
+
# FastCSV.parse_line("1,2\r,3", row_sep: "\n")
|
150
|
+
# end
|
151
|
+
|
152
|
+
bad_data = <<-END_DATA.gsub(/^ +/, "")
|
153
|
+
line,1,abc
|
154
|
+
line,2,"def\nghi"
|
155
|
+
|
156
|
+
line,4,some\rjunk
|
157
|
+
line,5,jkl
|
158
|
+
END_DATA
|
159
|
+
lines = bad_data.lines.to_a
|
160
|
+
assert_equal(6, lines.size)
|
161
|
+
assert_match(/\Aline,4/, lines.find { |l| l =~ /some\rjunk/ })
|
162
|
+
|
163
|
+
csv = FastCSV.new(bad_data)
|
164
|
+
begin
|
165
|
+
loop do
|
166
|
+
assert_not_nil(csv.shift)
|
167
|
+
assert_send([csv.lineno, :<, 5]) # FIXME 4
|
168
|
+
end
|
169
|
+
rescue FastCSV::MalformedCSVError
|
170
|
+
assert_equal( "Unquoted fields do not allow \\r or \\n (line 4).",
|
171
|
+
$!.message )
|
172
|
+
end
|
173
|
+
|
174
|
+
assert_raise(FastCSV::MalformedCSVError) { FastCSV.parse_line('1,2,"3...') }
|
175
|
+
|
176
|
+
bad_data = <<-END_DATA.gsub(/^ +/, "")
|
177
|
+
line,1,abc
|
178
|
+
line,2,"def\nghi"
|
179
|
+
|
180
|
+
line,4,8'10"
|
181
|
+
line,5,jkl
|
182
|
+
END_DATA
|
183
|
+
lines = bad_data.lines.to_a
|
184
|
+
assert_equal(6, lines.size)
|
185
|
+
assert_match(/\Aline,4/, lines.find { |l| l =~ /8'10"/ })
|
186
|
+
|
187
|
+
csv = FastCSV.new(bad_data)
|
188
|
+
begin
|
189
|
+
loop do
|
190
|
+
assert_not_nil(csv.shift)
|
191
|
+
assert_send([csv.lineno, :<, 4])
|
192
|
+
end
|
193
|
+
rescue FastCSV::MalformedCSVError
|
194
|
+
assert_equal("Illegal quoting in line 4.", $!.message)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def test_the_parse_fails_fast_when_it_can_for_unquoted_fields
|
199
|
+
assert_parse_errors_out('valid,fields,bad start"' + BIG_DATA)
|
200
|
+
end
|
201
|
+
|
202
|
+
def test_the_parse_fails_fast_when_it_can_for_unescaped_quotes
|
203
|
+
assert_parse_errors_out('valid,fields,"bad start"unescaped' + BIG_DATA)
|
204
|
+
end
|
205
|
+
|
206
|
+
# def test_field_size_limit_controls_lookahead
|
207
|
+
# assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"',
|
208
|
+
# field_size_limit: 2048 )
|
209
|
+
# end
|
210
|
+
|
211
|
+
private
|
212
|
+
|
213
|
+
def assert_parse_errors_out(*args)
|
214
|
+
assert_raise(FastCSV::MalformedCSVError) do
|
215
|
+
Timeout.timeout(0.2) do
|
216
|
+
FastCSV.parse(*args)
|
217
|
+
fail("Parse didn't error out")
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|