fastcsv 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +11 -0
- data/README.md +37 -2
- data/TESTS.md +42 -0
- data/ext/fastcsv/fastcsv.c +281 -223
- data/ext/fastcsv/fastcsv.rl +149 -72
- data/fastcsv.gemspec +1 -1
- data/lib/fastcsv.rb +130 -0
- data/spec/fastcsv_spec.rb +189 -57
- data/spec/fixtures/csv.csv +3 -0
- data/spec/fixtures/iso-8859-1-quoted.csv +1 -0
- data/spec/fixtures/utf-8-quoted.csv +1 -0
- data/spec/spec_helper.rb +5 -0
- data/test/csv/base.rb +8 -0
- data/test/csv/line_endings.gz +0 -0
- data/test/csv/test_csv_parsing.rb +221 -0
- data/test/csv/test_csv_writing.rb +97 -0
- data/test/csv/test_data_converters.rb +263 -0
- data/test/csv/test_encodings.rb +339 -0
- data/test/csv/test_features.rb +317 -0
- data/test/csv/test_headers.rb +289 -0
- data/test/csv/test_interface.rb +362 -0
- data/test/csv/test_row.rb +349 -0
- data/test/csv/test_table.rb +420 -0
- data/test/csv/ts_all.rb +20 -0
- data/test/runner.rb +36 -0
- data/test/with_different_ofs.rb +17 -0
- metadata +38 -2
@@ -0,0 +1,97 @@
|
|
1
|
+
#!/usr/bin/env ruby -w
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
# tc_csv_writing.rb
|
5
|
+
#
|
6
|
+
# Created by James Edward Gray II on 2005-10-31.
|
7
|
+
# Copyright 2005 James Edward Gray II. You can redistribute or modify this code
|
8
|
+
# under the terms of Ruby's license.
|
9
|
+
|
10
|
+
require_relative "base"
|
11
|
+
|
12
|
+
class TestCSV::Writing < TestCSV
|
13
|
+
extend DifferentOFS
|
14
|
+
|
15
|
+
def test_writing
|
16
|
+
[ ["\t", ["\t"]],
|
17
|
+
["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
|
18
|
+
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
|
19
|
+
["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]],
|
20
|
+
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
|
21
|
+
["\"\"", [""]],
|
22
|
+
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
|
23
|
+
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
|
24
|
+
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
|
25
|
+
["foo,\"\",baz", ["foo", "", "baz"]],
|
26
|
+
["\",\"", [","]],
|
27
|
+
["foo", ["foo"]],
|
28
|
+
[",,", [nil, nil, nil]],
|
29
|
+
[",", [nil, nil]],
|
30
|
+
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
|
31
|
+
["foo,,baz", ["foo", nil, "baz"]],
|
32
|
+
["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]],
|
33
|
+
["\",\",\",\"", [",", ","]],
|
34
|
+
["foo,bar,", ["foo", "bar", nil]],
|
35
|
+
[",foo,bar", [nil, "foo", "bar"]],
|
36
|
+
["foo,bar", ["foo", "bar"]],
|
37
|
+
[";", [";"]],
|
38
|
+
["\t,\t", ["\t", "\t"]],
|
39
|
+
["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]],
|
40
|
+
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
41
|
+
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
|
42
|
+
[";,;", [";", ";"]],
|
43
|
+
["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
|
44
|
+
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
|
45
|
+
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
|
46
|
+
["\"\"", [""]],
|
47
|
+
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
|
48
|
+
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
|
49
|
+
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
|
50
|
+
["foo,\"\",baz", ["foo", "", "baz"]],
|
51
|
+
["foo", ["foo"]],
|
52
|
+
[",,", [nil, nil, nil]],
|
53
|
+
[",", [nil, nil]],
|
54
|
+
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
|
55
|
+
["foo,,baz", ["foo", nil, "baz"]],
|
56
|
+
["foo,bar", ["foo", "bar"]],
|
57
|
+
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
58
|
+
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
|
59
|
+
[%Q{a,b}, ["a", "b"]],
|
60
|
+
[%Q{a,"""b"""}, ["a", "\"b\""]],
|
61
|
+
[%Q{a,"""b"}, ["a", "\"b"]],
|
62
|
+
[%Q{a,"b"""}, ["a", "b\""]],
|
63
|
+
[%Q{a,"\nb"""}, ["a", "\nb\""]],
|
64
|
+
[%Q{a,"""\nb"}, ["a", "\"\nb"]],
|
65
|
+
[%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]],
|
66
|
+
[%Q{a,"""\nb\n""",}, ["a", "\"\nb\n\"", nil]],
|
67
|
+
[%Q{a,,,}, ["a", nil, nil, nil]],
|
68
|
+
[%Q{,}, [nil, nil]],
|
69
|
+
[%Q{"",""}, ["", ""]],
|
70
|
+
[%Q{""""}, ["\""]],
|
71
|
+
[%Q{"""",""}, ["\"",""]],
|
72
|
+
[%Q{,""}, [nil,""]],
|
73
|
+
[%Q{,"\r"}, [nil,"\r"]],
|
74
|
+
[%Q{"\r\n,"}, ["\r\n,"]],
|
75
|
+
[%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |test_case|
|
76
|
+
assert_equal(test_case.first + $/, FastCSV.generate_line(test_case.last))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_col_sep
|
81
|
+
assert_equal( "a;b;;c\n", FastCSV.generate_line( ["a", "b", nil, "c"],
|
82
|
+
col_sep: ";" ) )
|
83
|
+
assert_equal( "a\tb\t\tc\n", FastCSV.generate_line( ["a", "b", nil, "c"],
|
84
|
+
col_sep: "\t" ) )
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_row_sep
|
88
|
+
assert_equal( "a,b,,c\r\n", FastCSV.generate_line( ["a", "b", nil, "c"],
|
89
|
+
row_sep: "\r\n" ) )
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_force_quotes
|
93
|
+
assert_equal( %Q{"1","b","","already ""quoted"""\n},
|
94
|
+
FastCSV.generate_line( [1, "b", nil, %Q{already "quoted"}],
|
95
|
+
force_quotes: true ) )
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,263 @@
|
|
1
|
+
#!/usr/bin/env ruby -w
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
# tc_data_converters.rb
|
5
|
+
#
|
6
|
+
# Created by James Edward Gray II on 2005-10-31.
|
7
|
+
# Copyright 2005 James Edward Gray II. You can redistribute or modify this code
|
8
|
+
# under the terms of Ruby's license.
|
9
|
+
|
10
|
+
require_relative "base"
|
11
|
+
|
12
|
+
class TestCSV::DataConverters < TestCSV
|
13
|
+
extend DifferentOFS
|
14
|
+
|
15
|
+
def setup
|
16
|
+
super
|
17
|
+
@data = "Numbers,:integer,1,:float,3.015"
|
18
|
+
@parser = FastCSV.new(@data)
|
19
|
+
|
20
|
+
@custom = lambda { |field| field =~ /\A:(\S.*?)\s*\Z/ ? $1.to_sym : field }
|
21
|
+
|
22
|
+
@win_safe_time_str = Time.now.strftime("%a %b %d %H:%M:%S %Y")
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_builtin_integer_converter
|
26
|
+
# does convert
|
27
|
+
[-5, 1, 10000000000].each do |n|
|
28
|
+
assert_equal(n, FastCSV::Converters[:integer][n.to_s])
|
29
|
+
end
|
30
|
+
|
31
|
+
# does not convert
|
32
|
+
(%w{junk 1.0} + [""]).each do |str|
|
33
|
+
assert_equal(str, FastCSV::Converters[:integer][str])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_builtin_float_converter
|
38
|
+
# does convert
|
39
|
+
[-5.1234, 0, 2.3e-11].each do |n|
|
40
|
+
assert_equal(n, FastCSV::Converters[:float][n.to_s])
|
41
|
+
end
|
42
|
+
|
43
|
+
# does not convert
|
44
|
+
(%w{junk 1..0 .015F} + [""]).each do |str|
|
45
|
+
assert_equal(str, FastCSV::Converters[:float][str])
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_builtin_date_converter
|
50
|
+
# does convert
|
51
|
+
assert_instance_of(
|
52
|
+
Date,
|
53
|
+
FastCSV::Converters[:date][@win_safe_time_str.sub(/\d+:\d+:\d+ /, "")]
|
54
|
+
)
|
55
|
+
|
56
|
+
# does not convert
|
57
|
+
assert_instance_of(String, FastCSV::Converters[:date]["junk"])
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_builtin_date_time_converter
|
61
|
+
# does convert
|
62
|
+
assert_instance_of( DateTime,
|
63
|
+
FastCSV::Converters[:date_time][@win_safe_time_str] )
|
64
|
+
|
65
|
+
# does not convert
|
66
|
+
assert_instance_of(String, FastCSV::Converters[:date_time]["junk"])
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_convert_with_builtin_integer
|
70
|
+
# setup parser...
|
71
|
+
assert(@parser.respond_to?(:convert))
|
72
|
+
assert_nothing_raised(Exception) { @parser.convert(:integer) }
|
73
|
+
|
74
|
+
# and use
|
75
|
+
assert_equal(["Numbers", ":integer", 1, ":float", "3.015"], @parser.shift)
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_convert_with_builtin_float
|
79
|
+
# setup parser...
|
80
|
+
assert(@parser.respond_to?(:convert))
|
81
|
+
assert_nothing_raised(Exception) { @parser.convert(:float) }
|
82
|
+
|
83
|
+
# and use
|
84
|
+
assert_equal(["Numbers", ":integer", 1.0, ":float", 3.015], @parser.shift)
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_convert_order_float_integer
|
88
|
+
# floats first, then integers...
|
89
|
+
assert_nothing_raised(Exception) do
|
90
|
+
@parser.convert(:float)
|
91
|
+
@parser.convert(:integer)
|
92
|
+
end
|
93
|
+
|
94
|
+
# gets us nothing but floats
|
95
|
+
assert_equal( [String, String, Float, String, Float],
|
96
|
+
@parser.shift.map { |field| field.class } )
|
97
|
+
end
|
98
|
+
|
99
|
+
def test_convert_order_integer_float
|
100
|
+
# integers have precendance...
|
101
|
+
assert_nothing_raised(Exception) do
|
102
|
+
@parser.convert(:integer)
|
103
|
+
@parser.convert(:float)
|
104
|
+
end
|
105
|
+
|
106
|
+
# gives us proper number conversion
|
107
|
+
assert_equal( [String, String, Fixnum, String, Float],
|
108
|
+
@parser.shift.map { |field| field.class } )
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_builtin_numeric_combo_converter
|
112
|
+
# setup parser...
|
113
|
+
assert_nothing_raised(Exception) { @parser.convert(:numeric) }
|
114
|
+
|
115
|
+
# and use
|
116
|
+
assert_equal( [String, String, Fixnum, String, Float],
|
117
|
+
@parser.shift.map { |field| field.class } )
|
118
|
+
end
|
119
|
+
|
120
|
+
def test_builtin_all_nested_combo_converter
|
121
|
+
# setup parser...
|
122
|
+
@data << ",#{@win_safe_time_str}" # add a DateTime field
|
123
|
+
@parser = FastCSV.new(@data) # reset parser
|
124
|
+
assert_nothing_raised(Exception) { @parser.convert(:all) }
|
125
|
+
|
126
|
+
# and use
|
127
|
+
assert_equal( [String, String, Fixnum, String, Float, DateTime],
|
128
|
+
@parser.shift.map { |field| field.class } )
|
129
|
+
end
|
130
|
+
|
131
|
+
def test_convert_with_custom_code
|
132
|
+
# define custom converter...
|
133
|
+
assert_nothing_raised(Exception) do
|
134
|
+
@parser.convert { |field| field =~ /\A:(\S.*?)\s*\Z/ ? $1.to_sym : field }
|
135
|
+
end
|
136
|
+
|
137
|
+
# and use
|
138
|
+
assert_equal(["Numbers", :integer, "1", :float, "3.015"], @parser.shift)
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_convert_with_custom_code_mix
|
142
|
+
# mix built-in and custom...
|
143
|
+
assert_nothing_raised(Exception) { @parser.convert(:numeric) }
|
144
|
+
assert_nothing_raised(Exception) { @parser.convert(&@custom) }
|
145
|
+
|
146
|
+
# and use
|
147
|
+
assert_equal(["Numbers", :integer, 1, :float, 3.015], @parser.shift)
|
148
|
+
end
|
149
|
+
|
150
|
+
def test_convert_with_custom_code_using_field_info
|
151
|
+
# define custom converter that uses field information...
|
152
|
+
assert_nothing_raised(Exception) do
|
153
|
+
@parser.convert do |field, info|
|
154
|
+
assert_equal(1, info.line)
|
155
|
+
info.index == 4 ? Float(field).floor : field
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# and use
|
160
|
+
assert_equal(["Numbers", ":integer", "1", ":float", 3], @parser.shift)
|
161
|
+
end
|
162
|
+
|
163
|
+
def test_convert_with_custom_code_using_field_info_header
|
164
|
+
@parser = FastCSV.new(@data, headers: %w{one two three four five})
|
165
|
+
|
166
|
+
# define custom converter that uses field header information...
|
167
|
+
assert_nothing_raised(Exception) do
|
168
|
+
@parser.convert do |field, info|
|
169
|
+
info.header == "three" ? Integer(field) * 100 : field
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
# and use
|
174
|
+
assert_equal( ["Numbers", ":integer", 100, ":float", "3.015"],
|
175
|
+
@parser.shift.fields )
|
176
|
+
end
|
177
|
+
|
178
|
+
def test_shortcut_interface
|
179
|
+
assert_equal( ["Numbers", ":integer", 1, ":float", 3.015],
|
180
|
+
FastCSV.parse_line(@data, converters: :numeric) )
|
181
|
+
|
182
|
+
assert_equal( ["Numbers", ":integer", 1, ":float", 3.015],
|
183
|
+
FastCSV.parse_line(@data, converters: [:integer, :float]) )
|
184
|
+
|
185
|
+
assert_equal( ["Numbers", :integer, 1, :float, 3.015],
|
186
|
+
FastCSV.parse_line(@data, converters: [:numeric, @custom]) )
|
187
|
+
end
|
188
|
+
|
189
|
+
def test_unconverted_fields
|
190
|
+
[ [ @data,
|
191
|
+
["Numbers", :integer, 1, :float, 3.015],
|
192
|
+
%w{Numbers :integer 1 :float 3.015} ],
|
193
|
+
["\n", Array.new, Array.new] ].each do |test, fields, unconverted|
|
194
|
+
row = nil
|
195
|
+
assert_nothing_raised(Exception) do
|
196
|
+
row = FastCSV.parse_line( test,
|
197
|
+
converters: [:numeric, @custom],
|
198
|
+
unconverted_fields: true )
|
199
|
+
end
|
200
|
+
assert_not_nil(row)
|
201
|
+
assert_equal(fields, row)
|
202
|
+
assert_respond_to(row, :unconverted_fields)
|
203
|
+
assert_equal(unconverted, row.unconverted_fields)
|
204
|
+
end
|
205
|
+
|
206
|
+
data = <<-END_CSV.gsub(/^\s+/, "")
|
207
|
+
first,second,third
|
208
|
+
1,2,3
|
209
|
+
END_CSV
|
210
|
+
row = nil
|
211
|
+
assert_nothing_raised(Exception) do
|
212
|
+
row = FastCSV.parse_line( data,
|
213
|
+
converters: :numeric,
|
214
|
+
unconverted_fields: true,
|
215
|
+
headers: :first_row )
|
216
|
+
end
|
217
|
+
assert_not_nil(row)
|
218
|
+
assert_equal([["first", 1], ["second", 2], ["third", 3]], row.to_a)
|
219
|
+
assert_respond_to(row, :unconverted_fields)
|
220
|
+
assert_equal(%w{1 2 3}, row.unconverted_fields)
|
221
|
+
|
222
|
+
assert_nothing_raised(Exception) do
|
223
|
+
row = FastCSV.parse_line( data,
|
224
|
+
converters: :numeric,
|
225
|
+
unconverted_fields: true,
|
226
|
+
headers: :first_row,
|
227
|
+
return_headers: true )
|
228
|
+
end
|
229
|
+
assert_not_nil(row)
|
230
|
+
assert_equal( [%w{first first}, %w{second second}, %w{third third}],
|
231
|
+
row.to_a )
|
232
|
+
assert_respond_to(row, :unconverted_fields)
|
233
|
+
assert_equal(%w{first second third}, row.unconverted_fields)
|
234
|
+
|
235
|
+
assert_nothing_raised(Exception) do
|
236
|
+
row = FastCSV.parse_line( data,
|
237
|
+
converters: :numeric,
|
238
|
+
unconverted_fields: true,
|
239
|
+
headers: :first_row,
|
240
|
+
return_headers: true,
|
241
|
+
header_converters: :symbol )
|
242
|
+
end
|
243
|
+
assert_not_nil(row)
|
244
|
+
assert_equal( [[:first, "first"], [:second, "second"], [:third, "third"]],
|
245
|
+
row.to_a )
|
246
|
+
assert_respond_to(row, :unconverted_fields)
|
247
|
+
assert_equal(%w{first second third}, row.unconverted_fields)
|
248
|
+
|
249
|
+
assert_nothing_raised(Exception) do
|
250
|
+
row = FastCSV.parse_line( data,
|
251
|
+
converters: :numeric,
|
252
|
+
unconverted_fields: true,
|
253
|
+
headers: %w{my new headers},
|
254
|
+
return_headers: true,
|
255
|
+
header_converters: :symbol )
|
256
|
+
end
|
257
|
+
assert_not_nil(row)
|
258
|
+
assert_equal( [[:my, "my"], [:new, "new"], [:headers, "headers"]],
|
259
|
+
row.to_a )
|
260
|
+
assert_respond_to(row, :unconverted_fields)
|
261
|
+
assert_equal(Array.new, row.unconverted_fields)
|
262
|
+
end
|
263
|
+
end
|
@@ -0,0 +1,339 @@
|
|
1
|
+
#!/usr/bin/env ruby -w
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
# tc_encodings.rb
|
5
|
+
#
|
6
|
+
# Created by James Edward Gray II on 2008-09-13.
|
7
|
+
# Copyright 2008 James Edward Gray II. You can redistribute or modify this code
|
8
|
+
# under the terms of Ruby's license.
|
9
|
+
|
10
|
+
require_relative "base"
|
11
|
+
|
12
|
+
class TestCSV::Encodings < TestCSV
|
13
|
+
extend DifferentOFS
|
14
|
+
|
15
|
+
def setup
|
16
|
+
super
|
17
|
+
require 'tempfile'
|
18
|
+
@temp_csv_file = Tempfile.new(%w"test_csv. .csv")
|
19
|
+
@temp_csv_path = @temp_csv_file.path
|
20
|
+
@temp_csv_file.close
|
21
|
+
end
|
22
|
+
|
23
|
+
def teardown
|
24
|
+
@temp_csv_file.close!
|
25
|
+
super
|
26
|
+
end
|
27
|
+
|
28
|
+
########################################
|
29
|
+
### Hand Test Some Popular Encodings ###
|
30
|
+
########################################
|
31
|
+
|
32
|
+
def test_parses_utf8_encoding
|
33
|
+
assert_parses( [ %w[ one two … ],
|
34
|
+
%w[ 1 … 3 ],
|
35
|
+
%w[ … 5 6 ] ], "UTF-8" )
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_parses_latin1_encoding
|
39
|
+
assert_parses( [ %w[ one two Résumé ],
|
40
|
+
%w[ 1 Résumé 3 ],
|
41
|
+
%w[ Résumé 5 6 ] ], "ISO-8859-1" )
|
42
|
+
end
|
43
|
+
|
44
|
+
# def test_parses_utf16be_encoding
|
45
|
+
# assert_parses( [ %w[ one two … ],
|
46
|
+
# %w[ 1 … 3 ],
|
47
|
+
# %w[ … 5 6 ] ], "UTF-16BE" )
|
48
|
+
# end
|
49
|
+
|
50
|
+
def test_parses_shift_jis_encoding
|
51
|
+
assert_parses( [ %w[ 一 二 三 ],
|
52
|
+
%w[ 四 五 六 ],
|
53
|
+
%w[ 七 八 九 ] ], "Shift_JIS" )
|
54
|
+
end
|
55
|
+
|
56
|
+
###########################################################
|
57
|
+
### Try Simple Reading for All Non-dummy Ruby Encodings ###
|
58
|
+
###########################################################
|
59
|
+
|
60
|
+
def test_reading_with_most_encodings
|
61
|
+
each_encoding do |encoding|
|
62
|
+
begin
|
63
|
+
assert_parses( [ %w[ abc def ],
|
64
|
+
%w[ ghi jkl ] ], encoding )
|
65
|
+
rescue Encoding::ConverterNotFoundError
|
66
|
+
fail("Failed to support #{encoding.name}.")
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_regular_expression_escaping
|
72
|
+
each_encoding do |encoding|
|
73
|
+
begin
|
74
|
+
assert_parses( [ %w[ abc def ],
|
75
|
+
%w[ ghi jkl ] ], encoding, col_sep: "," )
|
76
|
+
rescue Encoding::ConverterNotFoundError
|
77
|
+
fail("Failed to properly escape #{encoding.name}.")
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_read_with_default_encoding
|
83
|
+
data = "abc"
|
84
|
+
default_external = Encoding.default_external
|
85
|
+
each_encoding do |encoding|
|
86
|
+
File.open(@temp_csv_path, "wb", encoding: encoding) {|f| f << data}
|
87
|
+
begin
|
88
|
+
no_warnings do
|
89
|
+
Encoding.default_external = encoding
|
90
|
+
end
|
91
|
+
result = FastCSV.read(@temp_csv_path)[0][0]
|
92
|
+
ensure
|
93
|
+
no_warnings do
|
94
|
+
Encoding.default_external = default_external
|
95
|
+
end
|
96
|
+
end
|
97
|
+
assert_equal(encoding, result.encoding)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
#######################################################################
|
102
|
+
### Stress Test ASCII Compatible and Non-ASCII Compatible Encodings ###
|
103
|
+
#######################################################################
|
104
|
+
|
105
|
+
def test_auto_line_ending_detection
|
106
|
+
# arrange data to place a \r at the end of FastCSV's read ahead point
|
107
|
+
encode_for_tests([["a" * 509]], row_sep: "\r\n") do |data|
|
108
|
+
assert_equal("\r\n".encode(data.encoding), FastCSV.new(data).row_sep)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_csv_chars_are_transcoded
|
113
|
+
encode_for_tests([%w[abc def]]) do |data|
|
114
|
+
%w[col_sep row_sep quote_char].each do |csv_char|
|
115
|
+
assert_equal( ",".encode(data.encoding),
|
116
|
+
FastCSV.new(data, csv_char.to_sym => ",").send(csv_char) )
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def test_parser_works_with_encoded_headers
|
122
|
+
encode_for_tests([%w[one two three], %w[1 2 3]]) do |data|
|
123
|
+
parsed = FastCSV.parse(data, headers: true)
|
124
|
+
assert( parsed.headers.all? { |h| h.encoding == data.encoding },
|
125
|
+
"Wrong data encoding." )
|
126
|
+
parsed.each do |row|
|
127
|
+
assert( row.fields.all? { |f| f.encoding == data.encoding },
|
128
|
+
"Wrong data encoding." )
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_built_in_converters_transcode_to_utf_8_then_convert
|
134
|
+
encode_for_tests([%w[one two three], %w[1 2 3]]) do |data|
|
135
|
+
parsed = FastCSV.parse(data, converters: :integer)
|
136
|
+
assert( parsed[0].all? { |f| f.encoding == data.encoding },
|
137
|
+
"Wrong data encoding." )
|
138
|
+
assert_equal([1, 2, 3], parsed[1])
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_built_in_header_converters_transcode_to_utf_8_then_convert
|
143
|
+
encode_for_tests([%w[one two three], %w[1 2 3]]) do |data|
|
144
|
+
parsed = FastCSV.parse( data, headers: true,
|
145
|
+
header_converters: :downcase )
|
146
|
+
assert( parsed.headers.all? { |h| h.encoding.name == "UTF-8" },
|
147
|
+
"Wrong data encoding." )
|
148
|
+
assert( parsed[0].fields.all? { |f| f.encoding == data.encoding },
|
149
|
+
"Wrong data encoding." )
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_open_allows_you_to_set_encodings
|
154
|
+
encode_for_tests([%w[abc def]]) do |data|
|
155
|
+
# read and write in encoding
|
156
|
+
File.open(@temp_csv_path, "wb:#{data.encoding.name}") { |f| f << data }
|
157
|
+
FastCSV.open(@temp_csv_path, "rb:#{data.encoding.name}") do |csv|
|
158
|
+
csv.each do |row|
|
159
|
+
assert( row.all? { |f| f.encoding == data.encoding },
|
160
|
+
"Wrong data encoding." )
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# read and write with transcoding
|
165
|
+
# File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f|
|
166
|
+
# f << data
|
167
|
+
# end
|
168
|
+
# FastCSV.open(@temp_csv_path, "rb:UTF-32BE:#{data.encoding.name}") do |csv|
|
169
|
+
# csv.each do |row|
|
170
|
+
# assert( row.all? { |f| f.encoding == data.encoding },
|
171
|
+
# "Wrong data encoding." )
|
172
|
+
# end
|
173
|
+
# end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def test_foreach_allows_you_to_set_encodings
|
178
|
+
encode_for_tests([%w[abc def]]) do |data|
|
179
|
+
# read and write in encoding
|
180
|
+
File.open(@temp_csv_path, "wb", encoding: data.encoding) { |f| f << data }
|
181
|
+
FastCSV.foreach(@temp_csv_path, encoding: data.encoding) do |row|
|
182
|
+
row.each {|f| assert_equal(f.encoding, data.encoding)}
|
183
|
+
end
|
184
|
+
|
185
|
+
# read and write with transcoding
|
186
|
+
# File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f|
|
187
|
+
# f << data
|
188
|
+
# end
|
189
|
+
# FastCSV.foreach( @temp_csv_path,
|
190
|
+
# encoding: "UTF-32BE:#{data.encoding.name}" ) do |row|
|
191
|
+
# assert( row.all? { |f| f.encoding == data.encoding },
|
192
|
+
# "Wrong data encoding." )
|
193
|
+
# end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def test_read_allows_you_to_set_encodings
|
198
|
+
encode_for_tests([%w[abc def]]) do |data|
|
199
|
+
# read and write in encoding
|
200
|
+
File.open(@temp_csv_path, "wb:#{data.encoding.name}") { |f| f << data }
|
201
|
+
rows = FastCSV.read(@temp_csv_path, encoding: data.encoding.name)
|
202
|
+
assert( rows.flatten.all? { |f| f.encoding == data.encoding },
|
203
|
+
"Wrong data encoding." )
|
204
|
+
|
205
|
+
# read and write with transcoding
|
206
|
+
# File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f|
|
207
|
+
# f << data
|
208
|
+
# end
|
209
|
+
# rows = FastCSV.read( @temp_csv_path,
|
210
|
+
# encoding: "UTF-32BE:#{data.encoding.name}" )
|
211
|
+
# assert( rows.flatten.all? { |f| f.encoding == data.encoding },
|
212
|
+
# "Wrong data encoding." )
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
#################################
|
217
|
+
### Write FastCSV in any Encoding ###
|
218
|
+
#################################
|
219
|
+
|
220
|
+
def test_can_write_csv_in_any_encoding
|
221
|
+
each_encoding do |encoding|
|
222
|
+
# test generate_line with encoding hint
|
223
|
+
begin
|
224
|
+
csv = %w[abc d,ef].map { |f| f.encode(encoding) }.
|
225
|
+
to_csv(col_sep: ",", encoding: encoding.name)
|
226
|
+
rescue Encoding::ConverterNotFoundError
|
227
|
+
next
|
228
|
+
end
|
229
|
+
assert_equal(encoding, csv.encoding)
|
230
|
+
|
231
|
+
# test generate_line with encoding guessing from fields
|
232
|
+
csv = %w[abc d,ef].map { |f| f.encode(encoding) }.to_csv(col_sep: ",")
|
233
|
+
assert_equal(encoding, csv.encoding)
|
234
|
+
|
235
|
+
# writing to files
|
236
|
+
data = encode_ary([%w[abc d,ef], %w[123 456 ]], encoding)
|
237
|
+
FastCSV.open(@temp_csv_path, "wb:#{encoding.name}") do |f|
|
238
|
+
data.each { |row| f << row }
|
239
|
+
end
|
240
|
+
assert_equal(data, FastCSV.read(@temp_csv_path, encoding: encoding.name))
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
def test_encoding_is_upgraded_during_writing_as_needed
|
245
|
+
data = ["foo".force_encoding("US-ASCII"), "\u3042"]
|
246
|
+
assert_equal("US-ASCII", data.first.encoding.name)
|
247
|
+
assert_equal("UTF-8", data.last.encoding.name)
|
248
|
+
assert_equal("UTF-8", data.join('').encoding.name)
|
249
|
+
assert_equal("UTF-8", data.to_csv.encoding.name)
|
250
|
+
end
|
251
|
+
|
252
|
+
def test_encoding_is_upgraded_for_ascii_content_during_writing_as_needed
|
253
|
+
data = ["foo".force_encoding("ISO-8859-1"), "\u3042"]
|
254
|
+
assert_equal("ISO-8859-1", data.first.encoding.name)
|
255
|
+
assert_equal("UTF-8", data.last.encoding.name)
|
256
|
+
assert_equal("UTF-8", data.join('').encoding.name)
|
257
|
+
assert_equal("UTF-8", data.to_csv.encoding.name)
|
258
|
+
end
|
259
|
+
|
260
|
+
private
|
261
|
+
|
262
|
+
def assert_parses(fields, encoding, options = { })
|
263
|
+
encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
|
264
|
+
orig_fields = fields
|
265
|
+
fields = encode_ary(fields, encoding)
|
266
|
+
data = ary_to_data(fields, options)
|
267
|
+
parsed = FastCSV.parse(data, options)
|
268
|
+
assert_equal(fields, parsed)
|
269
|
+
parsed.flatten.each_with_index do |field, i|
|
270
|
+
assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.")
|
271
|
+
end
|
272
|
+
File.open(@temp_csv_path, "wb") {|f| f.print(data)}
|
273
|
+
FastCSV.open(@temp_csv_path, "rb:#{encoding}", options) do |csv|
|
274
|
+
csv.each_with_index do |row, i|
|
275
|
+
assert_equal(fields[i], row)
|
276
|
+
end
|
277
|
+
end
|
278
|
+
begin
|
279
|
+
FastCSV.open(@temp_csv_path, "rb:#{encoding}:#{__ENCODING__}", options) do |csv|
|
280
|
+
csv.each_with_index do |row, i|
|
281
|
+
assert_equal(orig_fields[i], row)
|
282
|
+
end
|
283
|
+
end unless encoding == __ENCODING__
|
284
|
+
rescue Encoding::ConverterNotFoundError
|
285
|
+
end
|
286
|
+
# options[:encoding] = encoding.name
|
287
|
+
# FastCSV.open(@temp_csv_path, options) do |csv|
|
288
|
+
# csv.each_with_index do |row, i|
|
289
|
+
# assert_equal(fields[i], row)
|
290
|
+
# end
|
291
|
+
# end
|
292
|
+
# options.delete(:encoding)
|
293
|
+
# options[:external_encoding] = encoding.name
|
294
|
+
# options[:internal_encoding] = __ENCODING__.name
|
295
|
+
# begin
|
296
|
+
# FastCSV.open(@temp_csv_path, options) do |csv|
|
297
|
+
# csv.each_with_index do |row, i|
|
298
|
+
# assert_equal(orig_fields[i], row)
|
299
|
+
# end
|
300
|
+
# end unless encoding == __ENCODING__
|
301
|
+
# rescue Encoding::ConverterNotFoundError
|
302
|
+
# end
|
303
|
+
end
|
304
|
+
|
305
|
+
def encode_ary(ary, encoding)
|
306
|
+
ary.map { |row| row.map { |field| field.encode(encoding) } }
|
307
|
+
end
|
308
|
+
|
309
|
+
def ary_to_data(ary, options = { })
|
310
|
+
encoding = ary.flatten.first.encoding
|
311
|
+
quote_char = (options[:quote_char] || '"').encode(encoding)
|
312
|
+
col_sep = (options[:col_sep] || ",").encode(encoding)
|
313
|
+
row_sep = (options[:row_sep] || "\n").encode(encoding)
|
314
|
+
ary.map { |row|
|
315
|
+
row.map { |field|
|
316
|
+
[quote_char, field.encode(encoding), quote_char].join('')
|
317
|
+
}.join(col_sep) + row_sep
|
318
|
+
}.join('').encode(encoding)
|
319
|
+
end
|
320
|
+
|
321
|
+
def encode_for_tests(data, options = { })
|
322
|
+
yield ary_to_data(encode_ary(data, "UTF-8"), options)
|
323
|
+
# yield ary_to_data(encode_ary(data, "UTF-16BE"), options)
|
324
|
+
end
|
325
|
+
|
326
|
+
def each_encoding
|
327
|
+
Encoding.list.reject{|e| e.name[/\AUTF-\d\d/]}.each do |encoding|
|
328
|
+
next if encoding.dummy? # skip "dummy" encodings
|
329
|
+
yield encoding
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
def no_warnings
|
334
|
+
old_verbose, $VERBOSE = $VERBOSE, nil
|
335
|
+
yield
|
336
|
+
ensure
|
337
|
+
$VERBOSE = old_verbose
|
338
|
+
end
|
339
|
+
end
|