fastcsv 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +11 -0
- data/README.md +37 -2
- data/TESTS.md +42 -0
- data/ext/fastcsv/fastcsv.c +281 -223
- data/ext/fastcsv/fastcsv.rl +149 -72
- data/fastcsv.gemspec +1 -1
- data/lib/fastcsv.rb +130 -0
- data/spec/fastcsv_spec.rb +189 -57
- data/spec/fixtures/csv.csv +3 -0
- data/spec/fixtures/iso-8859-1-quoted.csv +1 -0
- data/spec/fixtures/utf-8-quoted.csv +1 -0
- data/spec/spec_helper.rb +5 -0
- data/test/csv/base.rb +8 -0
- data/test/csv/line_endings.gz +0 -0
- data/test/csv/test_csv_parsing.rb +221 -0
- data/test/csv/test_csv_writing.rb +97 -0
- data/test/csv/test_data_converters.rb +263 -0
- data/test/csv/test_encodings.rb +339 -0
- data/test/csv/test_features.rb +317 -0
- data/test/csv/test_headers.rb +289 -0
- data/test/csv/test_interface.rb +362 -0
- data/test/csv/test_row.rb +349 -0
- data/test/csv/test_table.rb +420 -0
- data/test/csv/ts_all.rb +20 -0
- data/test/runner.rb +36 -0
- data/test/with_different_ofs.rb +17 -0
- metadata +38 -2
@@ -0,0 +1,97 @@
|
|
1
|
+
#!/usr/bin/env ruby -w
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
# tc_csv_writing.rb
|
5
|
+
#
|
6
|
+
# Created by James Edward Gray II on 2005-10-31.
|
7
|
+
# Copyright 2005 James Edward Gray II. You can redistribute or modify this code
|
8
|
+
# under the terms of Ruby's license.
|
9
|
+
|
10
|
+
require_relative "base"
|
11
|
+
|
12
|
+
class TestCSV::Writing < TestCSV
|
13
|
+
extend DifferentOFS
|
14
|
+
|
15
|
+
def test_writing
|
16
|
+
[ ["\t", ["\t"]],
|
17
|
+
["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
|
18
|
+
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
|
19
|
+
["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]],
|
20
|
+
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
|
21
|
+
["\"\"", [""]],
|
22
|
+
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
|
23
|
+
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
|
24
|
+
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
|
25
|
+
["foo,\"\",baz", ["foo", "", "baz"]],
|
26
|
+
["\",\"", [","]],
|
27
|
+
["foo", ["foo"]],
|
28
|
+
[",,", [nil, nil, nil]],
|
29
|
+
[",", [nil, nil]],
|
30
|
+
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
|
31
|
+
["foo,,baz", ["foo", nil, "baz"]],
|
32
|
+
["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]],
|
33
|
+
["\",\",\",\"", [",", ","]],
|
34
|
+
["foo,bar,", ["foo", "bar", nil]],
|
35
|
+
[",foo,bar", [nil, "foo", "bar"]],
|
36
|
+
["foo,bar", ["foo", "bar"]],
|
37
|
+
[";", [";"]],
|
38
|
+
["\t,\t", ["\t", "\t"]],
|
39
|
+
["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]],
|
40
|
+
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
41
|
+
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
|
42
|
+
[";,;", [";", ";"]],
|
43
|
+
["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
|
44
|
+
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
|
45
|
+
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
|
46
|
+
["\"\"", [""]],
|
47
|
+
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
|
48
|
+
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
|
49
|
+
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
|
50
|
+
["foo,\"\",baz", ["foo", "", "baz"]],
|
51
|
+
["foo", ["foo"]],
|
52
|
+
[",,", [nil, nil, nil]],
|
53
|
+
[",", [nil, nil]],
|
54
|
+
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
|
55
|
+
["foo,,baz", ["foo", nil, "baz"]],
|
56
|
+
["foo,bar", ["foo", "bar"]],
|
57
|
+
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
58
|
+
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
|
59
|
+
[%Q{a,b}, ["a", "b"]],
|
60
|
+
[%Q{a,"""b"""}, ["a", "\"b\""]],
|
61
|
+
[%Q{a,"""b"}, ["a", "\"b"]],
|
62
|
+
[%Q{a,"b"""}, ["a", "b\""]],
|
63
|
+
[%Q{a,"\nb"""}, ["a", "\nb\""]],
|
64
|
+
[%Q{a,"""\nb"}, ["a", "\"\nb"]],
|
65
|
+
[%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]],
|
66
|
+
[%Q{a,"""\nb\n""",}, ["a", "\"\nb\n\"", nil]],
|
67
|
+
[%Q{a,,,}, ["a", nil, nil, nil]],
|
68
|
+
[%Q{,}, [nil, nil]],
|
69
|
+
[%Q{"",""}, ["", ""]],
|
70
|
+
[%Q{""""}, ["\""]],
|
71
|
+
[%Q{"""",""}, ["\"",""]],
|
72
|
+
[%Q{,""}, [nil,""]],
|
73
|
+
[%Q{,"\r"}, [nil,"\r"]],
|
74
|
+
[%Q{"\r\n,"}, ["\r\n,"]],
|
75
|
+
[%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |test_case|
|
76
|
+
assert_equal(test_case.first + $/, FastCSV.generate_line(test_case.last))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_col_sep
|
81
|
+
assert_equal( "a;b;;c\n", FastCSV.generate_line( ["a", "b", nil, "c"],
|
82
|
+
col_sep: ";" ) )
|
83
|
+
assert_equal( "a\tb\t\tc\n", FastCSV.generate_line( ["a", "b", nil, "c"],
|
84
|
+
col_sep: "\t" ) )
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_row_sep
|
88
|
+
assert_equal( "a,b,,c\r\n", FastCSV.generate_line( ["a", "b", nil, "c"],
|
89
|
+
row_sep: "\r\n" ) )
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_force_quotes
|
93
|
+
assert_equal( %Q{"1","b","","already ""quoted"""\n},
|
94
|
+
FastCSV.generate_line( [1, "b", nil, %Q{already "quoted"}],
|
95
|
+
force_quotes: true ) )
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,263 @@
|
|
1
|
+
#!/usr/bin/env ruby -w
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
# tc_data_converters.rb
|
5
|
+
#
|
6
|
+
# Created by James Edward Gray II on 2005-10-31.
|
7
|
+
# Copyright 2005 James Edward Gray II. You can redistribute or modify this code
|
8
|
+
# under the terms of Ruby's license.
|
9
|
+
|
10
|
+
require_relative "base"
|
11
|
+
|
12
|
+
class TestCSV::DataConverters < TestCSV
|
13
|
+
extend DifferentOFS
|
14
|
+
|
15
|
+
def setup
|
16
|
+
super
|
17
|
+
@data = "Numbers,:integer,1,:float,3.015"
|
18
|
+
@parser = FastCSV.new(@data)
|
19
|
+
|
20
|
+
@custom = lambda { |field| field =~ /\A:(\S.*?)\s*\Z/ ? $1.to_sym : field }
|
21
|
+
|
22
|
+
@win_safe_time_str = Time.now.strftime("%a %b %d %H:%M:%S %Y")
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_builtin_integer_converter
|
26
|
+
# does convert
|
27
|
+
[-5, 1, 10000000000].each do |n|
|
28
|
+
assert_equal(n, FastCSV::Converters[:integer][n.to_s])
|
29
|
+
end
|
30
|
+
|
31
|
+
# does not convert
|
32
|
+
(%w{junk 1.0} + [""]).each do |str|
|
33
|
+
assert_equal(str, FastCSV::Converters[:integer][str])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_builtin_float_converter
|
38
|
+
# does convert
|
39
|
+
[-5.1234, 0, 2.3e-11].each do |n|
|
40
|
+
assert_equal(n, FastCSV::Converters[:float][n.to_s])
|
41
|
+
end
|
42
|
+
|
43
|
+
# does not convert
|
44
|
+
(%w{junk 1..0 .015F} + [""]).each do |str|
|
45
|
+
assert_equal(str, FastCSV::Converters[:float][str])
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_builtin_date_converter
|
50
|
+
# does convert
|
51
|
+
assert_instance_of(
|
52
|
+
Date,
|
53
|
+
FastCSV::Converters[:date][@win_safe_time_str.sub(/\d+:\d+:\d+ /, "")]
|
54
|
+
)
|
55
|
+
|
56
|
+
# does not convert
|
57
|
+
assert_instance_of(String, FastCSV::Converters[:date]["junk"])
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_builtin_date_time_converter
|
61
|
+
# does convert
|
62
|
+
assert_instance_of( DateTime,
|
63
|
+
FastCSV::Converters[:date_time][@win_safe_time_str] )
|
64
|
+
|
65
|
+
# does not convert
|
66
|
+
assert_instance_of(String, FastCSV::Converters[:date_time]["junk"])
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_convert_with_builtin_integer
|
70
|
+
# setup parser...
|
71
|
+
assert(@parser.respond_to?(:convert))
|
72
|
+
assert_nothing_raised(Exception) { @parser.convert(:integer) }
|
73
|
+
|
74
|
+
# and use
|
75
|
+
assert_equal(["Numbers", ":integer", 1, ":float", "3.015"], @parser.shift)
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_convert_with_builtin_float
|
79
|
+
# setup parser...
|
80
|
+
assert(@parser.respond_to?(:convert))
|
81
|
+
assert_nothing_raised(Exception) { @parser.convert(:float) }
|
82
|
+
|
83
|
+
# and use
|
84
|
+
assert_equal(["Numbers", ":integer", 1.0, ":float", 3.015], @parser.shift)
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_convert_order_float_integer
|
88
|
+
# floats first, then integers...
|
89
|
+
assert_nothing_raised(Exception) do
|
90
|
+
@parser.convert(:float)
|
91
|
+
@parser.convert(:integer)
|
92
|
+
end
|
93
|
+
|
94
|
+
# gets us nothing but floats
|
95
|
+
assert_equal( [String, String, Float, String, Float],
|
96
|
+
@parser.shift.map { |field| field.class } )
|
97
|
+
end
|
98
|
+
|
99
|
+
def test_convert_order_integer_float
|
100
|
+
# integers have precendance...
|
101
|
+
assert_nothing_raised(Exception) do
|
102
|
+
@parser.convert(:integer)
|
103
|
+
@parser.convert(:float)
|
104
|
+
end
|
105
|
+
|
106
|
+
# gives us proper number conversion
|
107
|
+
assert_equal( [String, String, Fixnum, String, Float],
|
108
|
+
@parser.shift.map { |field| field.class } )
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_builtin_numeric_combo_converter
|
112
|
+
# setup parser...
|
113
|
+
assert_nothing_raised(Exception) { @parser.convert(:numeric) }
|
114
|
+
|
115
|
+
# and use
|
116
|
+
assert_equal( [String, String, Fixnum, String, Float],
|
117
|
+
@parser.shift.map { |field| field.class } )
|
118
|
+
end
|
119
|
+
|
120
|
+
def test_builtin_all_nested_combo_converter
|
121
|
+
# setup parser...
|
122
|
+
@data << ",#{@win_safe_time_str}" # add a DateTime field
|
123
|
+
@parser = FastCSV.new(@data) # reset parser
|
124
|
+
assert_nothing_raised(Exception) { @parser.convert(:all) }
|
125
|
+
|
126
|
+
# and use
|
127
|
+
assert_equal( [String, String, Fixnum, String, Float, DateTime],
|
128
|
+
@parser.shift.map { |field| field.class } )
|
129
|
+
end
|
130
|
+
|
131
|
+
def test_convert_with_custom_code
|
132
|
+
# define custom converter...
|
133
|
+
assert_nothing_raised(Exception) do
|
134
|
+
@parser.convert { |field| field =~ /\A:(\S.*?)\s*\Z/ ? $1.to_sym : field }
|
135
|
+
end
|
136
|
+
|
137
|
+
# and use
|
138
|
+
assert_equal(["Numbers", :integer, "1", :float, "3.015"], @parser.shift)
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_convert_with_custom_code_mix
|
142
|
+
# mix built-in and custom...
|
143
|
+
assert_nothing_raised(Exception) { @parser.convert(:numeric) }
|
144
|
+
assert_nothing_raised(Exception) { @parser.convert(&@custom) }
|
145
|
+
|
146
|
+
# and use
|
147
|
+
assert_equal(["Numbers", :integer, 1, :float, 3.015], @parser.shift)
|
148
|
+
end
|
149
|
+
|
150
|
+
def test_convert_with_custom_code_using_field_info
|
151
|
+
# define custom converter that uses field information...
|
152
|
+
assert_nothing_raised(Exception) do
|
153
|
+
@parser.convert do |field, info|
|
154
|
+
assert_equal(1, info.line)
|
155
|
+
info.index == 4 ? Float(field).floor : field
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# and use
|
160
|
+
assert_equal(["Numbers", ":integer", "1", ":float", 3], @parser.shift)
|
161
|
+
end
|
162
|
+
|
163
|
+
def test_convert_with_custom_code_using_field_info_header
|
164
|
+
@parser = FastCSV.new(@data, headers: %w{one two three four five})
|
165
|
+
|
166
|
+
# define custom converter that uses field header information...
|
167
|
+
assert_nothing_raised(Exception) do
|
168
|
+
@parser.convert do |field, info|
|
169
|
+
info.header == "three" ? Integer(field) * 100 : field
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
# and use
|
174
|
+
assert_equal( ["Numbers", ":integer", 100, ":float", "3.015"],
|
175
|
+
@parser.shift.fields )
|
176
|
+
end
|
177
|
+
|
178
|
+
def test_shortcut_interface
|
179
|
+
assert_equal( ["Numbers", ":integer", 1, ":float", 3.015],
|
180
|
+
FastCSV.parse_line(@data, converters: :numeric) )
|
181
|
+
|
182
|
+
assert_equal( ["Numbers", ":integer", 1, ":float", 3.015],
|
183
|
+
FastCSV.parse_line(@data, converters: [:integer, :float]) )
|
184
|
+
|
185
|
+
assert_equal( ["Numbers", :integer, 1, :float, 3.015],
|
186
|
+
FastCSV.parse_line(@data, converters: [:numeric, @custom]) )
|
187
|
+
end
|
188
|
+
|
189
|
+
def test_unconverted_fields
|
190
|
+
[ [ @data,
|
191
|
+
["Numbers", :integer, 1, :float, 3.015],
|
192
|
+
%w{Numbers :integer 1 :float 3.015} ],
|
193
|
+
["\n", Array.new, Array.new] ].each do |test, fields, unconverted|
|
194
|
+
row = nil
|
195
|
+
assert_nothing_raised(Exception) do
|
196
|
+
row = FastCSV.parse_line( test,
|
197
|
+
converters: [:numeric, @custom],
|
198
|
+
unconverted_fields: true )
|
199
|
+
end
|
200
|
+
assert_not_nil(row)
|
201
|
+
assert_equal(fields, row)
|
202
|
+
assert_respond_to(row, :unconverted_fields)
|
203
|
+
assert_equal(unconverted, row.unconverted_fields)
|
204
|
+
end
|
205
|
+
|
206
|
+
data = <<-END_CSV.gsub(/^\s+/, "")
|
207
|
+
first,second,third
|
208
|
+
1,2,3
|
209
|
+
END_CSV
|
210
|
+
row = nil
|
211
|
+
assert_nothing_raised(Exception) do
|
212
|
+
row = FastCSV.parse_line( data,
|
213
|
+
converters: :numeric,
|
214
|
+
unconverted_fields: true,
|
215
|
+
headers: :first_row )
|
216
|
+
end
|
217
|
+
assert_not_nil(row)
|
218
|
+
assert_equal([["first", 1], ["second", 2], ["third", 3]], row.to_a)
|
219
|
+
assert_respond_to(row, :unconverted_fields)
|
220
|
+
assert_equal(%w{1 2 3}, row.unconverted_fields)
|
221
|
+
|
222
|
+
assert_nothing_raised(Exception) do
|
223
|
+
row = FastCSV.parse_line( data,
|
224
|
+
converters: :numeric,
|
225
|
+
unconverted_fields: true,
|
226
|
+
headers: :first_row,
|
227
|
+
return_headers: true )
|
228
|
+
end
|
229
|
+
assert_not_nil(row)
|
230
|
+
assert_equal( [%w{first first}, %w{second second}, %w{third third}],
|
231
|
+
row.to_a )
|
232
|
+
assert_respond_to(row, :unconverted_fields)
|
233
|
+
assert_equal(%w{first second third}, row.unconverted_fields)
|
234
|
+
|
235
|
+
assert_nothing_raised(Exception) do
|
236
|
+
row = FastCSV.parse_line( data,
|
237
|
+
converters: :numeric,
|
238
|
+
unconverted_fields: true,
|
239
|
+
headers: :first_row,
|
240
|
+
return_headers: true,
|
241
|
+
header_converters: :symbol )
|
242
|
+
end
|
243
|
+
assert_not_nil(row)
|
244
|
+
assert_equal( [[:first, "first"], [:second, "second"], [:third, "third"]],
|
245
|
+
row.to_a )
|
246
|
+
assert_respond_to(row, :unconverted_fields)
|
247
|
+
assert_equal(%w{first second third}, row.unconverted_fields)
|
248
|
+
|
249
|
+
assert_nothing_raised(Exception) do
|
250
|
+
row = FastCSV.parse_line( data,
|
251
|
+
converters: :numeric,
|
252
|
+
unconverted_fields: true,
|
253
|
+
headers: %w{my new headers},
|
254
|
+
return_headers: true,
|
255
|
+
header_converters: :symbol )
|
256
|
+
end
|
257
|
+
assert_not_nil(row)
|
258
|
+
assert_equal( [[:my, "my"], [:new, "new"], [:headers, "headers"]],
|
259
|
+
row.to_a )
|
260
|
+
assert_respond_to(row, :unconverted_fields)
|
261
|
+
assert_equal(Array.new, row.unconverted_fields)
|
262
|
+
end
|
263
|
+
end
|
@@ -0,0 +1,339 @@
|
|
1
|
+
#!/usr/bin/env ruby -w
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
# tc_encodings.rb
|
5
|
+
#
|
6
|
+
# Created by James Edward Gray II on 2008-09-13.
|
7
|
+
# Copyright 2008 James Edward Gray II. You can redistribute or modify this code
|
8
|
+
# under the terms of Ruby's license.
|
9
|
+
|
10
|
+
require_relative "base"
|
11
|
+
|
12
|
+
class TestCSV::Encodings < TestCSV
|
13
|
+
extend DifferentOFS
|
14
|
+
|
15
|
+
def setup
|
16
|
+
super
|
17
|
+
require 'tempfile'
|
18
|
+
@temp_csv_file = Tempfile.new(%w"test_csv. .csv")
|
19
|
+
@temp_csv_path = @temp_csv_file.path
|
20
|
+
@temp_csv_file.close
|
21
|
+
end
|
22
|
+
|
23
|
+
def teardown
|
24
|
+
@temp_csv_file.close!
|
25
|
+
super
|
26
|
+
end
|
27
|
+
|
28
|
+
########################################
|
29
|
+
### Hand Test Some Popular Encodings ###
|
30
|
+
########################################
|
31
|
+
|
32
|
+
def test_parses_utf8_encoding
|
33
|
+
assert_parses( [ %w[ one two … ],
|
34
|
+
%w[ 1 … 3 ],
|
35
|
+
%w[ … 5 6 ] ], "UTF-8" )
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_parses_latin1_encoding
|
39
|
+
assert_parses( [ %w[ one two Résumé ],
|
40
|
+
%w[ 1 Résumé 3 ],
|
41
|
+
%w[ Résumé 5 6 ] ], "ISO-8859-1" )
|
42
|
+
end
|
43
|
+
|
44
|
+
# def test_parses_utf16be_encoding
|
45
|
+
# assert_parses( [ %w[ one two … ],
|
46
|
+
# %w[ 1 … 3 ],
|
47
|
+
# %w[ … 5 6 ] ], "UTF-16BE" )
|
48
|
+
# end
|
49
|
+
|
50
|
+
def test_parses_shift_jis_encoding
|
51
|
+
assert_parses( [ %w[ 一 二 三 ],
|
52
|
+
%w[ 四 五 六 ],
|
53
|
+
%w[ 七 八 九 ] ], "Shift_JIS" )
|
54
|
+
end
|
55
|
+
|
56
|
+
###########################################################
|
57
|
+
### Try Simple Reading for All Non-dummy Ruby Encodings ###
|
58
|
+
###########################################################
|
59
|
+
|
60
|
+
def test_reading_with_most_encodings
|
61
|
+
each_encoding do |encoding|
|
62
|
+
begin
|
63
|
+
assert_parses( [ %w[ abc def ],
|
64
|
+
%w[ ghi jkl ] ], encoding )
|
65
|
+
rescue Encoding::ConverterNotFoundError
|
66
|
+
fail("Failed to support #{encoding.name}.")
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_regular_expression_escaping
|
72
|
+
each_encoding do |encoding|
|
73
|
+
begin
|
74
|
+
assert_parses( [ %w[ abc def ],
|
75
|
+
%w[ ghi jkl ] ], encoding, col_sep: "," )
|
76
|
+
rescue Encoding::ConverterNotFoundError
|
77
|
+
fail("Failed to properly escape #{encoding.name}.")
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_read_with_default_encoding
|
83
|
+
data = "abc"
|
84
|
+
default_external = Encoding.default_external
|
85
|
+
each_encoding do |encoding|
|
86
|
+
File.open(@temp_csv_path, "wb", encoding: encoding) {|f| f << data}
|
87
|
+
begin
|
88
|
+
no_warnings do
|
89
|
+
Encoding.default_external = encoding
|
90
|
+
end
|
91
|
+
result = FastCSV.read(@temp_csv_path)[0][0]
|
92
|
+
ensure
|
93
|
+
no_warnings do
|
94
|
+
Encoding.default_external = default_external
|
95
|
+
end
|
96
|
+
end
|
97
|
+
assert_equal(encoding, result.encoding)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
#######################################################################
|
102
|
+
### Stress Test ASCII Compatible and Non-ASCII Compatible Encodings ###
|
103
|
+
#######################################################################
|
104
|
+
|
105
|
+
def test_auto_line_ending_detection
|
106
|
+
# arrange data to place a \r at the end of FastCSV's read ahead point
|
107
|
+
encode_for_tests([["a" * 509]], row_sep: "\r\n") do |data|
|
108
|
+
assert_equal("\r\n".encode(data.encoding), FastCSV.new(data).row_sep)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_csv_chars_are_transcoded
|
113
|
+
encode_for_tests([%w[abc def]]) do |data|
|
114
|
+
%w[col_sep row_sep quote_char].each do |csv_char|
|
115
|
+
assert_equal( ",".encode(data.encoding),
|
116
|
+
FastCSV.new(data, csv_char.to_sym => ",").send(csv_char) )
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def test_parser_works_with_encoded_headers
|
122
|
+
encode_for_tests([%w[one two three], %w[1 2 3]]) do |data|
|
123
|
+
parsed = FastCSV.parse(data, headers: true)
|
124
|
+
assert( parsed.headers.all? { |h| h.encoding == data.encoding },
|
125
|
+
"Wrong data encoding." )
|
126
|
+
parsed.each do |row|
|
127
|
+
assert( row.fields.all? { |f| f.encoding == data.encoding },
|
128
|
+
"Wrong data encoding." )
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_built_in_converters_transcode_to_utf_8_then_convert
|
134
|
+
encode_for_tests([%w[one two three], %w[1 2 3]]) do |data|
|
135
|
+
parsed = FastCSV.parse(data, converters: :integer)
|
136
|
+
assert( parsed[0].all? { |f| f.encoding == data.encoding },
|
137
|
+
"Wrong data encoding." )
|
138
|
+
assert_equal([1, 2, 3], parsed[1])
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_built_in_header_converters_transcode_to_utf_8_then_convert
|
143
|
+
encode_for_tests([%w[one two three], %w[1 2 3]]) do |data|
|
144
|
+
parsed = FastCSV.parse( data, headers: true,
|
145
|
+
header_converters: :downcase )
|
146
|
+
assert( parsed.headers.all? { |h| h.encoding.name == "UTF-8" },
|
147
|
+
"Wrong data encoding." )
|
148
|
+
assert( parsed[0].fields.all? { |f| f.encoding == data.encoding },
|
149
|
+
"Wrong data encoding." )
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_open_allows_you_to_set_encodings
|
154
|
+
encode_for_tests([%w[abc def]]) do |data|
|
155
|
+
# read and write in encoding
|
156
|
+
File.open(@temp_csv_path, "wb:#{data.encoding.name}") { |f| f << data }
|
157
|
+
FastCSV.open(@temp_csv_path, "rb:#{data.encoding.name}") do |csv|
|
158
|
+
csv.each do |row|
|
159
|
+
assert( row.all? { |f| f.encoding == data.encoding },
|
160
|
+
"Wrong data encoding." )
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# read and write with transcoding
|
165
|
+
# File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f|
|
166
|
+
# f << data
|
167
|
+
# end
|
168
|
+
# FastCSV.open(@temp_csv_path, "rb:UTF-32BE:#{data.encoding.name}") do |csv|
|
169
|
+
# csv.each do |row|
|
170
|
+
# assert( row.all? { |f| f.encoding == data.encoding },
|
171
|
+
# "Wrong data encoding." )
|
172
|
+
# end
|
173
|
+
# end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def test_foreach_allows_you_to_set_encodings
|
178
|
+
encode_for_tests([%w[abc def]]) do |data|
|
179
|
+
# read and write in encoding
|
180
|
+
File.open(@temp_csv_path, "wb", encoding: data.encoding) { |f| f << data }
|
181
|
+
FastCSV.foreach(@temp_csv_path, encoding: data.encoding) do |row|
|
182
|
+
row.each {|f| assert_equal(f.encoding, data.encoding)}
|
183
|
+
end
|
184
|
+
|
185
|
+
# read and write with transcoding
|
186
|
+
# File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f|
|
187
|
+
# f << data
|
188
|
+
# end
|
189
|
+
# FastCSV.foreach( @temp_csv_path,
|
190
|
+
# encoding: "UTF-32BE:#{data.encoding.name}" ) do |row|
|
191
|
+
# assert( row.all? { |f| f.encoding == data.encoding },
|
192
|
+
# "Wrong data encoding." )
|
193
|
+
# end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def test_read_allows_you_to_set_encodings
|
198
|
+
encode_for_tests([%w[abc def]]) do |data|
|
199
|
+
# read and write in encoding
|
200
|
+
File.open(@temp_csv_path, "wb:#{data.encoding.name}") { |f| f << data }
|
201
|
+
rows = FastCSV.read(@temp_csv_path, encoding: data.encoding.name)
|
202
|
+
assert( rows.flatten.all? { |f| f.encoding == data.encoding },
|
203
|
+
"Wrong data encoding." )
|
204
|
+
|
205
|
+
# read and write with transcoding
|
206
|
+
# File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f|
|
207
|
+
# f << data
|
208
|
+
# end
|
209
|
+
# rows = FastCSV.read( @temp_csv_path,
|
210
|
+
# encoding: "UTF-32BE:#{data.encoding.name}" )
|
211
|
+
# assert( rows.flatten.all? { |f| f.encoding == data.encoding },
|
212
|
+
# "Wrong data encoding." )
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
#################################
|
217
|
+
### Write FastCSV in any Encoding ###
|
218
|
+
#################################
|
219
|
+
|
220
|
+
def test_can_write_csv_in_any_encoding
|
221
|
+
each_encoding do |encoding|
|
222
|
+
# test generate_line with encoding hint
|
223
|
+
begin
|
224
|
+
csv = %w[abc d,ef].map { |f| f.encode(encoding) }.
|
225
|
+
to_csv(col_sep: ",", encoding: encoding.name)
|
226
|
+
rescue Encoding::ConverterNotFoundError
|
227
|
+
next
|
228
|
+
end
|
229
|
+
assert_equal(encoding, csv.encoding)
|
230
|
+
|
231
|
+
# test generate_line with encoding guessing from fields
|
232
|
+
csv = %w[abc d,ef].map { |f| f.encode(encoding) }.to_csv(col_sep: ",")
|
233
|
+
assert_equal(encoding, csv.encoding)
|
234
|
+
|
235
|
+
# writing to files
|
236
|
+
data = encode_ary([%w[abc d,ef], %w[123 456 ]], encoding)
|
237
|
+
FastCSV.open(@temp_csv_path, "wb:#{encoding.name}") do |f|
|
238
|
+
data.each { |row| f << row }
|
239
|
+
end
|
240
|
+
assert_equal(data, FastCSV.read(@temp_csv_path, encoding: encoding.name))
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
def test_encoding_is_upgraded_during_writing_as_needed
|
245
|
+
data = ["foo".force_encoding("US-ASCII"), "\u3042"]
|
246
|
+
assert_equal("US-ASCII", data.first.encoding.name)
|
247
|
+
assert_equal("UTF-8", data.last.encoding.name)
|
248
|
+
assert_equal("UTF-8", data.join('').encoding.name)
|
249
|
+
assert_equal("UTF-8", data.to_csv.encoding.name)
|
250
|
+
end
|
251
|
+
|
252
|
+
def test_encoding_is_upgraded_for_ascii_content_during_writing_as_needed
|
253
|
+
data = ["foo".force_encoding("ISO-8859-1"), "\u3042"]
|
254
|
+
assert_equal("ISO-8859-1", data.first.encoding.name)
|
255
|
+
assert_equal("UTF-8", data.last.encoding.name)
|
256
|
+
assert_equal("UTF-8", data.join('').encoding.name)
|
257
|
+
assert_equal("UTF-8", data.to_csv.encoding.name)
|
258
|
+
end
|
259
|
+
|
260
|
+
private
|
261
|
+
|
262
|
+
def assert_parses(fields, encoding, options = { })
|
263
|
+
encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
|
264
|
+
orig_fields = fields
|
265
|
+
fields = encode_ary(fields, encoding)
|
266
|
+
data = ary_to_data(fields, options)
|
267
|
+
parsed = FastCSV.parse(data, options)
|
268
|
+
assert_equal(fields, parsed)
|
269
|
+
parsed.flatten.each_with_index do |field, i|
|
270
|
+
assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.")
|
271
|
+
end
|
272
|
+
File.open(@temp_csv_path, "wb") {|f| f.print(data)}
|
273
|
+
FastCSV.open(@temp_csv_path, "rb:#{encoding}", options) do |csv|
|
274
|
+
csv.each_with_index do |row, i|
|
275
|
+
assert_equal(fields[i], row)
|
276
|
+
end
|
277
|
+
end
|
278
|
+
begin
|
279
|
+
FastCSV.open(@temp_csv_path, "rb:#{encoding}:#{__ENCODING__}", options) do |csv|
|
280
|
+
csv.each_with_index do |row, i|
|
281
|
+
assert_equal(orig_fields[i], row)
|
282
|
+
end
|
283
|
+
end unless encoding == __ENCODING__
|
284
|
+
rescue Encoding::ConverterNotFoundError
|
285
|
+
end
|
286
|
+
# options[:encoding] = encoding.name
|
287
|
+
# FastCSV.open(@temp_csv_path, options) do |csv|
|
288
|
+
# csv.each_with_index do |row, i|
|
289
|
+
# assert_equal(fields[i], row)
|
290
|
+
# end
|
291
|
+
# end
|
292
|
+
# options.delete(:encoding)
|
293
|
+
# options[:external_encoding] = encoding.name
|
294
|
+
# options[:internal_encoding] = __ENCODING__.name
|
295
|
+
# begin
|
296
|
+
# FastCSV.open(@temp_csv_path, options) do |csv|
|
297
|
+
# csv.each_with_index do |row, i|
|
298
|
+
# assert_equal(orig_fields[i], row)
|
299
|
+
# end
|
300
|
+
# end unless encoding == __ENCODING__
|
301
|
+
# rescue Encoding::ConverterNotFoundError
|
302
|
+
# end
|
303
|
+
end
|
304
|
+
|
305
|
+
def encode_ary(ary, encoding)
|
306
|
+
ary.map { |row| row.map { |field| field.encode(encoding) } }
|
307
|
+
end
|
308
|
+
|
309
|
+
def ary_to_data(ary, options = { })
|
310
|
+
encoding = ary.flatten.first.encoding
|
311
|
+
quote_char = (options[:quote_char] || '"').encode(encoding)
|
312
|
+
col_sep = (options[:col_sep] || ",").encode(encoding)
|
313
|
+
row_sep = (options[:row_sep] || "\n").encode(encoding)
|
314
|
+
ary.map { |row|
|
315
|
+
row.map { |field|
|
316
|
+
[quote_char, field.encode(encoding), quote_char].join('')
|
317
|
+
}.join(col_sep) + row_sep
|
318
|
+
}.join('').encode(encoding)
|
319
|
+
end
|
320
|
+
|
321
|
+
def encode_for_tests(data, options = { })
|
322
|
+
yield ary_to_data(encode_ary(data, "UTF-8"), options)
|
323
|
+
# yield ary_to_data(encode_ary(data, "UTF-16BE"), options)
|
324
|
+
end
|
325
|
+
|
326
|
+
def each_encoding
|
327
|
+
Encoding.list.reject{|e| e.name[/\AUTF-\d\d/]}.each do |encoding|
|
328
|
+
next if encoding.dummy? # skip "dummy" encodings
|
329
|
+
yield encoding
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
def no_warnings
|
334
|
+
old_verbose, $VERBOSE = $VERBOSE, nil
|
335
|
+
yield
|
336
|
+
ensure
|
337
|
+
$VERBOSE = old_verbose
|
338
|
+
end
|
339
|
+
end
|