smarter_csv 1.4.0 → 1.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGELOG.md +24 -2
- data/CONTRIBUTORS.md +46 -0
- data/LICENSE.txt +1 -1
- data/README.md +53 -69
- data/Rakefile +8 -15
- data/lib/smarter_csv/smarter_csv.rb +168 -112
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +8 -0
- data/smarter_csv.gemspec +1 -0
- data/spec/fixtures/additional_separator.csv +6 -0
- data/spec/fixtures/duplicate_headers.csv +1 -1
- data/spec/fixtures/hard_sample.csv +2 -0
- data/spec/smarter_csv/additional_separator_spec.rb +45 -0
- data/spec/smarter_csv/binary_file2_spec.rb +1 -1
- data/spec/smarter_csv/carriage_return_spec.rb +27 -7
- data/spec/smarter_csv/column_separator_spec.rb +7 -1
- data/spec/smarter_csv/duplicate_headers_spec.rb +76 -0
- data/spec/smarter_csv/hard_sample_spec.rb +24 -0
- data/spec/smarter_csv/ignore_comments_spec.rb +45 -30
- data/spec/smarter_csv/invalid_headers_spec.rb +8 -22
- data/spec/smarter_csv/no_header_spec.rb +16 -11
- metadata +28 -3
@@ -5,116 +5,37 @@ module SmarterCSV
|
|
5
5
|
class DuplicateHeaders < SmarterCSVException; end
|
6
6
|
class MissingHeaders < SmarterCSVException; end
|
7
7
|
class NoColSepDetected < SmarterCSVException; end
|
8
|
+
class KeyMappingError < SmarterCSVException; end
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
:remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
|
12
|
-
:convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
|
13
|
-
:comment_regexp => /\A#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
|
14
|
-
:remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => '',
|
15
|
-
:auto_row_sep_chars => 500, :required_headers => nil
|
16
|
-
}
|
10
|
+
# first parameter: filename or input object which responds to readline method
|
11
|
+
def SmarterCSV.process(input, options={}, &block)
|
17
12
|
options = default_options.merge(options)
|
18
13
|
options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
|
19
|
-
|
14
|
+
|
20
15
|
headerA = []
|
21
16
|
result = []
|
22
|
-
|
23
|
-
|
24
|
-
csv_line_count = 0
|
17
|
+
@file_line_count = 0
|
18
|
+
@csv_line_count = 0
|
25
19
|
has_rails = !! defined?(Rails)
|
26
20
|
begin
|
27
21
|
f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
|
28
22
|
|
23
|
+
# auto-detect the row separator
|
24
|
+
options[:row_sep] = SmarterCSV.guess_line_ending(f, options) if options[:row_sep].to_sym == :auto
|
29
25
|
# attempt to auto-detect column separator
|
30
|
-
options[:col_sep] = guess_column_separator(f) if options[:col_sep] ==
|
26
|
+
options[:col_sep] = guess_column_separator(f, options) if options[:col_sep].to_sym == :auto
|
27
|
+
# preserve options, in case we need to call the CSV class
|
28
|
+
csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
|
29
|
+
csv_options.delete(:row_sep) if [nil, :auto].include?( options[:row_sep].to_sym )
|
30
|
+
csv_options.delete(:col_sep) if [nil, :auto].include?( options[:col_sep].to_sym )
|
31
31
|
|
32
32
|
if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && ( f.respond_to?(:external_encoding) && f.external_encoding != Encoding.find('UTF-8') || f.respond_to?(:encoding) && f.encoding != Encoding.find('UTF-8') )
|
33
33
|
puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
|
34
34
|
end
|
35
35
|
|
36
|
-
if options[:
|
37
|
-
options[:row_sep] = line_ending = SmarterCSV.guess_line_ending( f, options )
|
38
|
-
f.rewind
|
39
|
-
end
|
40
|
-
$INPUT_RECORD_SEPARATOR = options[:row_sep]
|
41
|
-
|
42
|
-
if options[:skip_lines].to_i > 0
|
43
|
-
options[:skip_lines].to_i.times{f.readline}
|
44
|
-
end
|
36
|
+
options[:skip_lines].to_i.times{f.readline(options[:row_sep])} if options[:skip_lines].to_i > 0
|
45
37
|
|
46
|
-
|
47
|
-
# process the header line in the CSV file..
|
48
|
-
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
49
|
-
header = f.readline
|
50
|
-
header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
51
|
-
header = header.sub(options[:comment_regexp],'').chomp(options[:row_sep])
|
52
|
-
|
53
|
-
file_line_count += 1
|
54
|
-
csv_line_count += 1
|
55
|
-
header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
|
56
|
-
|
57
|
-
if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
58
|
-
file_headerA = begin
|
59
|
-
CSV.parse( header, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
60
|
-
rescue CSV::MalformedCSVError => e
|
61
|
-
raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
|
62
|
-
end
|
63
|
-
else
|
64
|
-
file_headerA = header.split(options[:col_sep])
|
65
|
-
end
|
66
|
-
file_header_size = file_headerA.size # before mapping, which could delete keys
|
67
|
-
|
68
|
-
file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
|
69
|
-
file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
|
70
|
-
unless options[:keep_original_headers]
|
71
|
-
file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
|
72
|
-
file_headerA.map!{|x| x.downcase } if options[:downcase_header]
|
73
|
-
end
|
74
|
-
else
|
75
|
-
raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" if options[:user_provided_headers].nil?
|
76
|
-
end
|
77
|
-
if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
|
78
|
-
# use user-provided headers
|
79
|
-
headerA = options[:user_provided_headers]
|
80
|
-
if defined?(file_header_size) && ! file_header_size.nil?
|
81
|
-
if headerA.size != file_header_size
|
82
|
-
raise SmarterCSV::HeaderSizeMismatch , "ERROR: :user_provided_headers defines #{headerA.size} headers != CSV-file #{input} has #{file_header_size} headers"
|
83
|
-
else
|
84
|
-
# we could print out the mapping of file_headerA to headerA here
|
85
|
-
end
|
86
|
-
end
|
87
|
-
else
|
88
|
-
headerA = file_headerA
|
89
|
-
end
|
90
|
-
header_size = headerA.size
|
91
|
-
|
92
|
-
headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
|
93
|
-
|
94
|
-
unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
|
95
|
-
key_mappingH = options[:key_mapping]
|
96
|
-
|
97
|
-
# do some key mapping on the keys in the file header
|
98
|
-
# if you want to completely delete a key, then map it to nil or to ''
|
99
|
-
if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
100
|
-
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
# header_validations
|
105
|
-
duplicate_headers = []
|
106
|
-
headerA.compact.each do |k|
|
107
|
-
duplicate_headers << k if headerA.select{|x| x == k}.size > 1
|
108
|
-
end
|
109
|
-
raise SmarterCSV::DuplicateHeaders , "ERROR: duplicate headers: #{duplicate_headers.join(',')}" unless duplicate_headers.empty?
|
110
|
-
|
111
|
-
if options[:required_headers] && options[:required_headers].is_a?(Array)
|
112
|
-
missing_headers = []
|
113
|
-
options[:required_headers].each do |k|
|
114
|
-
missing_headers << k unless headerA.include?(k)
|
115
|
-
end
|
116
|
-
raise SmarterCSV::MissingHeaders , "ERROR: missing headers: #{missing_headers.join(',')}" unless missing_headers.empty?
|
117
|
-
end
|
38
|
+
headerA, header_size = process_headers(f, options, csv_options)
|
118
39
|
|
119
40
|
# in case we use chunking.. we'll need to set it up..
|
120
41
|
if ! options[:chunk_size].nil? && options[:chunk_size].to_i > 0
|
@@ -128,41 +49,42 @@ module SmarterCSV
|
|
128
49
|
|
129
50
|
# now on to processing all the rest of the lines in the CSV file:
|
130
51
|
while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
|
131
|
-
line = f.readline # read one line
|
52
|
+
line = f.readline(options[:row_sep]) # read one line
|
53
|
+
@file_line_count += 1
|
54
|
+
@csv_line_count += 1
|
132
55
|
|
133
56
|
# replace invalid byte sequence in UTF-8 with question mark to avoid errors
|
134
57
|
line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
135
58
|
|
136
|
-
file_line_count
|
137
|
-
|
138
|
-
|
139
|
-
next if line =~ options[:comment_regexp] # ignore all comment lines if there are any
|
59
|
+
print "processing file line %10d, csv line %10d\r" % [@file_line_count, @csv_line_count] if options[:verbose]
|
60
|
+
|
61
|
+
next if options[:comment_regexp] && line =~ options[:comment_regexp] # ignore all comment lines if there are any
|
140
62
|
|
141
63
|
# cater for the quoted csv data containing the row separator carriage return character
|
142
64
|
# in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
|
143
65
|
# by detecting the existence of an uneven number of quote characters
|
144
|
-
multiline = line.count(options[:quote_char])%2 == 1
|
145
|
-
while line.count(options[:quote_char])%2 == 1
|
146
|
-
next_line = f.readline
|
66
|
+
multiline = line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
|
67
|
+
while line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
|
68
|
+
next_line = f.readline(options[:row_sep])
|
147
69
|
next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
148
70
|
line += next_line
|
149
|
-
file_line_count += 1
|
71
|
+
@file_line_count += 1
|
150
72
|
end
|
151
|
-
print "\nline contains uneven number of quote chars so including content through file line %d\n" % file_line_count if options[:verbose] && multiline
|
73
|
+
print "\nline contains uneven number of quote chars so including content through file line %d\n" % @file_line_count if options[:verbose] && multiline
|
152
74
|
|
153
|
-
line.chomp!
|
75
|
+
line.chomp!(options[:row_sep])
|
154
76
|
|
155
77
|
if (line =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
156
78
|
dataA = begin
|
157
79
|
CSV.parse( line, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
158
80
|
rescue CSV::MalformedCSVError => e
|
159
|
-
raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
|
81
|
+
raise $!, "#{$!} [SmarterCSV: csv line #{@csv_line_count}]", $!.backtrace
|
160
82
|
end
|
161
83
|
else
|
162
|
-
dataA =
|
84
|
+
dataA = line.split(options[:col_sep], header_size)
|
163
85
|
end
|
164
|
-
|
165
|
-
dataA.map!{|x| x.strip}
|
86
|
+
dataA.map!{|x| x.sub(/(#{options[:col_sep]})+\z/, '')} # remove any unwanted trailing col_sep characters at the end
|
87
|
+
dataA.map!{|x| x.strip} if options[:strip_whitespace]
|
166
88
|
|
167
89
|
# if all values are blank, then ignore this line
|
168
90
|
# SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
|
@@ -257,7 +179,6 @@ module SmarterCSV
|
|
257
179
|
chunk = [] # initialize for next chunk of data
|
258
180
|
end
|
259
181
|
ensure
|
260
|
-
$INPUT_RECORD_SEPARATOR = old_row_sep # make sure this stupid global variable is always reset to it's previous value after we're done!
|
261
182
|
f.close if f.respond_to?(:close)
|
262
183
|
end
|
263
184
|
if block_given?
|
@@ -269,6 +190,40 @@ module SmarterCSV
|
|
269
190
|
|
270
191
|
private
|
271
192
|
|
193
|
+
def self.default_options
|
194
|
+
{
|
195
|
+
auto_row_sep_chars: 500,
|
196
|
+
chunk_size: nil ,
|
197
|
+
col_sep: ',',
|
198
|
+
comment_regexp: nil, # was: /\A#/,
|
199
|
+
convert_values_to_numeric: true,
|
200
|
+
downcase_header: true,
|
201
|
+
duplicate_header_suffix: nil,
|
202
|
+
file_encoding: 'utf-8',
|
203
|
+
force_simple_split: false ,
|
204
|
+
force_utf8: false,
|
205
|
+
headers_in_file: true,
|
206
|
+
invalid_byte_sequence: '',
|
207
|
+
keep_original_headers: false,
|
208
|
+
key_mapping_hash: nil ,
|
209
|
+
quote_char: '"',
|
210
|
+
remove_empty_hashes: true ,
|
211
|
+
remove_empty_values: true,
|
212
|
+
remove_unmapped_keys: false,
|
213
|
+
remove_values_matching: nil,
|
214
|
+
remove_zero_values: false,
|
215
|
+
required_headers: nil,
|
216
|
+
row_sep: $INPUT_RECORD_SEPARATOR,
|
217
|
+
skip_lines: nil,
|
218
|
+
strings_as_keys: false,
|
219
|
+
strip_chars_from_headers: nil,
|
220
|
+
strip_whitespace: true,
|
221
|
+
user_provided_headers: nil,
|
222
|
+
value_converters: nil,
|
223
|
+
verbose: false,
|
224
|
+
}
|
225
|
+
end
|
226
|
+
|
272
227
|
def self.blank?(value)
|
273
228
|
case value
|
274
229
|
when Array
|
@@ -304,11 +259,11 @@ module SmarterCSV
|
|
304
259
|
end
|
305
260
|
|
306
261
|
# raise exception if none is found
|
307
|
-
def self.guess_column_separator(filehandle)
|
262
|
+
def self.guess_column_separator(filehandle, options)
|
308
263
|
del = [',', "\t", ';', ':', '|']
|
309
264
|
n = Hash.new(0)
|
310
265
|
5.times do
|
311
|
-
line = filehandle.readline
|
266
|
+
line = filehandle.readline(options[:row_sep])
|
312
267
|
del.each do |d|
|
313
268
|
n[d] += line.scan(d).count
|
314
269
|
end
|
@@ -347,9 +302,110 @@ module SmarterCSV
|
|
347
302
|
lines += 1
|
348
303
|
break if options[:auto_row_sep_chars] && options[:auto_row_sep_chars] > 0 && lines >= options[:auto_row_sep_chars]
|
349
304
|
end
|
305
|
+
filehandle.rewind
|
306
|
+
|
350
307
|
counts["\r"] += 1 if last_char == "\r"
|
351
308
|
# find the key/value pair with the largest counter:
|
352
309
|
k,_ = counts.max_by{|_,v| v}
|
353
310
|
return k # the most frequent one is it
|
354
311
|
end
|
312
|
+
|
313
|
+
def self.process_headers(filehandle, options, csv_options)
|
314
|
+
if options[:headers_in_file] # extract the header line
|
315
|
+
# process the header line in the CSV file..
|
316
|
+
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
317
|
+
header = filehandle.readline(options[:row_sep])
|
318
|
+
@file_line_count += 1
|
319
|
+
@csv_line_count += 1
|
320
|
+
|
321
|
+
header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
322
|
+
header = header.sub(options[:comment_regexp],'') if options[:comment_regexp]
|
323
|
+
header = header.chomp(options[:row_sep])
|
324
|
+
|
325
|
+
header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
|
326
|
+
|
327
|
+
if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
328
|
+
file_headerA = begin
|
329
|
+
CSV.parse( header, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
330
|
+
rescue CSV::MalformedCSVError => e
|
331
|
+
raise $!, "#{$!} [SmarterCSV: csv line #{@csv_line_count}]", $!.backtrace
|
332
|
+
end
|
333
|
+
else
|
334
|
+
file_headerA = header.split(options[:col_sep])
|
335
|
+
end
|
336
|
+
file_header_size = file_headerA.size # before mapping, which could delete keys
|
337
|
+
|
338
|
+
file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
|
339
|
+
file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
|
340
|
+
unless options[:keep_original_headers]
|
341
|
+
file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
|
342
|
+
file_headerA.map!{|x| x.downcase } if options[:downcase_header]
|
343
|
+
end
|
344
|
+
else
|
345
|
+
raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" unless options[:user_provided_headers]
|
346
|
+
end
|
347
|
+
if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
|
348
|
+
# use user-provided headers
|
349
|
+
headerA = options[:user_provided_headers]
|
350
|
+
if defined?(file_header_size) && ! file_header_size.nil?
|
351
|
+
if headerA.size != file_header_size
|
352
|
+
raise SmarterCSV::HeaderSizeMismatch , "ERROR: :user_provided_headers defines #{headerA.size} headers != CSV-file #{input} has #{file_header_size} headers"
|
353
|
+
else
|
354
|
+
# we could print out the mapping of file_headerA to headerA here
|
355
|
+
end
|
356
|
+
end
|
357
|
+
else
|
358
|
+
headerA = file_headerA
|
359
|
+
end
|
360
|
+
|
361
|
+
# detect duplicate headers and disambiguate
|
362
|
+
headerA = process_duplicate_headers(headerA, options) if options[:duplicate_header_suffix]
|
363
|
+
header_size = headerA.size # used for splitting lines
|
364
|
+
|
365
|
+
headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
|
366
|
+
|
367
|
+
unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
|
368
|
+
key_mappingH = options[:key_mapping]
|
369
|
+
|
370
|
+
# do some key mapping on the keys in the file header
|
371
|
+
# if you want to completely delete a key, then map it to nil or to ''
|
372
|
+
if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
373
|
+
# we can't map keys that are not there
|
374
|
+
raise SmarterCSV::KeyMappingError unless (key_mappingH.keys - headerA).empty?
|
375
|
+
|
376
|
+
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
# header_validations
|
381
|
+
duplicate_headers = []
|
382
|
+
headerA.compact.each do |k|
|
383
|
+
duplicate_headers << k if headerA.select{|x| x == k}.size > 1
|
384
|
+
end
|
385
|
+
raise SmarterCSV::DuplicateHeaders , "ERROR: duplicate headers: #{duplicate_headers.join(',')}" unless duplicate_headers.empty?
|
386
|
+
|
387
|
+
if options[:required_headers] && options[:required_headers].is_a?(Array)
|
388
|
+
missing_headers = []
|
389
|
+
options[:required_headers].each do |k|
|
390
|
+
missing_headers << k unless headerA.include?(k)
|
391
|
+
end
|
392
|
+
raise SmarterCSV::MissingHeaders , "ERROR: missing headers: #{missing_headers.join(',')}" unless missing_headers.empty?
|
393
|
+
end
|
394
|
+
|
395
|
+
[headerA, header_size]
|
396
|
+
end
|
397
|
+
|
398
|
+
def self.process_duplicate_headers(headers, options)
|
399
|
+
counts = Hash.new(0)
|
400
|
+
result = []
|
401
|
+
headers.each do |key|
|
402
|
+
counts[key] += 1
|
403
|
+
if counts[key] == 1
|
404
|
+
result << key
|
405
|
+
else
|
406
|
+
result << [key, options[:duplicate_header_suffix], counts[key]].join
|
407
|
+
end
|
408
|
+
end
|
409
|
+
result
|
410
|
+
end
|
355
411
|
end
|
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv.rb
CHANGED
data/smarter_csv.gemspec
CHANGED
@@ -18,6 +18,7 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.require_paths = ["lib"]
|
19
19
|
spec.requirements = ['csv'] # for CSV.parse() only needed in case we have quoted fields
|
20
20
|
spec.add_development_dependency "rspec"
|
21
|
+
spec.add_development_dependency "simplecov"
|
21
22
|
# spec.add_development_dependency "guard-rspec"
|
22
23
|
|
23
24
|
spec.metadata["homepage_uri"] = spec.homepage
|
@@ -0,0 +1,2 @@
|
|
1
|
+
Name,Email,Financial Status,Paid at,Fulfillment Status,Fulfilled at,Accepts Marketing,Currency,Subtotal,Shipping,Taxes,Total,Discount Code,Discount Amount,Shipping Method,Created at,Lineitem quantity,Lineitem name,Lineitem price,Lineitem compare at price,Lineitem sku,Lineitem requires shipping,Lineitem taxable,Lineitem fulfillment status,Billing Name,Billing Street,Billing Address1,Billing Address2,Billing Company,Billing City,Billing Zip,Billing Province,Billing Country,Billing Phone,Shipping Name,Shipping Street,Shipping Address1,Shipping Address2,Shipping Company,Shipping City,Shipping Zip,Shipping Province,Shipping Country,Shipping Phone,Notes,Note Attributes,Cancelled at,Payment Method,Payment Reference,Refunded Amount,Vendor, rece,Tags,Risk Level,Source,Lineitem discount,Tax 1 Name,Tax 1 Value,Tax 2 Name,Tax 2 Value,Tax 3 Name,Tax 3 Value,Tax 4 Name,Tax 4 Value,Tax 5 Name,Tax 5 Value,Phone,Receipt Number,Duties,Billing Province Name,Shipping Province Name,Payment ID,Payment Terms Name,Next Payment Due At
|
2
|
+
#MR1220817,foo@bar.com,paid,2022-02-08 22:31:28 +0100,unfulfilled,,yes,EUR,144,0,24,144,VIP,119.6,"Livraison Standard GRATUITE, 2-5 jours avec suivi",2022-02-08 22:31:26 +0100,2,Cire Épilation Nacrée,37,,WAX-200-NAC,true,true,pending,French Fry,64 Boulevard Budgié,64 Boulevard Budgié,,,dootdoot’,'49100,,FR,06 12 34 56 78,French Fry,64 Boulevard Budgi,64 Boulevard Budgié,,,dootdoot,'49100,,FR,06 12 34 56 78,,,,Stripe,c23800013619353.2,0,Goober Rég,4331065802905,902,Low,web,0,FR TVA 20%,24,,,,,,,,,3366012111111,,,,,,,
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
fixture_path = 'spec/fixtures'
|
4
|
+
|
5
|
+
describe 'handling of additional trailing column separators' do
|
6
|
+
let(:file) { "#{fixture_path}/additional_separator.csv" }
|
7
|
+
|
8
|
+
describe '' do
|
9
|
+
let(:data) { SmarterCSV.process(file) }
|
10
|
+
|
11
|
+
it 'reads all lines' do
|
12
|
+
data.size.should eq 5
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'reads regular lines' do
|
16
|
+
item = data[0]
|
17
|
+
item[:col1].should == 'eins'
|
18
|
+
item[:col2].should == 'zwei'
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'strips single trailing col_sep character' do
|
22
|
+
item = data[1]
|
23
|
+
item[:col1].should == 'uno'
|
24
|
+
item[:col2].should == 'dos'
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'strips multiple trailing col_sep characters' do
|
28
|
+
item = data[2]
|
29
|
+
item[:col1].should == 'one'
|
30
|
+
item[:col2].should == 'two'
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'strips multiple trailing col_sep chars' do
|
34
|
+
item = data[3]
|
35
|
+
item[:col1].should == 'ichi'
|
36
|
+
item[:col2].should == nil
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'strips multiple trailing col_sep chars' do
|
40
|
+
item = data[4]
|
41
|
+
item[:col1].should == 'un'
|
42
|
+
item[:col2].should == nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -12,7 +12,7 @@ describe 'be_able_to' do
|
|
12
12
|
it 'loads_binary_file_with_strings_as_keys' do
|
13
13
|
options = {:col_sep => "\cA", :row_sep => "\cB", :comment_regexp => /^#/, :strings_as_keys => true}
|
14
14
|
data = SmarterCSV.process("#{fixture_path}/binary.csv", options)
|
15
|
-
data.
|
15
|
+
data.size.should == 8
|
16
16
|
data.each do |item|
|
17
17
|
# all keys should be strings
|
18
18
|
item.keys.each{|x| x.class.should be == String}
|
@@ -3,7 +3,6 @@ require 'spec_helper'
|
|
3
3
|
fixture_path = 'spec/fixtures'
|
4
4
|
|
5
5
|
describe 'process files with line endings explicitly pre-specified' do
|
6
|
-
|
7
6
|
it 'should process a file with \n for line endings and within data fields' do
|
8
7
|
sep = "\n"
|
9
8
|
options = {:row_sep => sep}
|
@@ -83,14 +82,14 @@ describe 'process files with line endings explicitly pre-specified' do
|
|
83
82
|
data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
|
84
83
|
data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
|
85
84
|
end
|
86
|
-
|
87
85
|
end
|
88
86
|
|
89
87
|
describe 'process files with line endings in automatic mode' do
|
88
|
+
let(:options) { { row_sep: :auto } }
|
90
89
|
|
91
90
|
it 'should process a file with \n for line endings and within data fields' do
|
92
91
|
sep = "\n"
|
93
|
-
data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv",
|
92
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv", options)
|
94
93
|
data.flatten.size.should == 8
|
95
94
|
data[0][:name].should == "Anfield"
|
96
95
|
data[0][:street].should == "Anfield Road"
|
@@ -112,7 +111,29 @@ describe 'process files with line endings in automatic mode' do
|
|
112
111
|
|
113
112
|
it 'should process a file with \r for line endings and within data fields' do
|
114
113
|
sep = "\r"
|
115
|
-
data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv",
|
114
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", options)
|
115
|
+
data.flatten.size.should == 8
|
116
|
+
data[0][:name].should == "Anfield"
|
117
|
+
data[0][:street].should == "Anfield Road"
|
118
|
+
data[0][:city].should == "Liverpool"
|
119
|
+
data[1][:name].should == ["Highbury", "Highbury House"].join(sep)
|
120
|
+
data[2][:street].should == ["Sir Matt ", "Busby Way"].join(sep)
|
121
|
+
data[3][:city].should == ["Newcastle-upon-tyne ", "Tyne and Wear"].join(sep)
|
122
|
+
data[4][:name].should == ["White Hart Lane", "(The Lane)"].join(sep)
|
123
|
+
data[4][:street].should == ["Bill Nicholson Way ", "748 High Rd"].join(sep)
|
124
|
+
data[4][:city].should == ["Tottenham", "London"].join(sep)
|
125
|
+
data[5][:name].should == "Stamford Bridge"
|
126
|
+
data[5][:street].should == ["Fulham Road", "London"].join(sep)
|
127
|
+
data[5][:city].should be_nil
|
128
|
+
data[6][:name].should == ["Etihad Stadium", "Rowsley St", "Manchester"].join(sep)
|
129
|
+
data[7][:name].should == "Goodison"
|
130
|
+
data[7][:street].should == "Goodison Road"
|
131
|
+
data[7][:city].should == "Liverpool"
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'also works when auto is given a string' do
|
135
|
+
sep = "\r"
|
136
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", {row_sep: 'auto'})
|
116
137
|
data.flatten.size.should == 8
|
117
138
|
data[0][:name].should == "Anfield"
|
118
139
|
data[0][:street].should == "Anfield Road"
|
@@ -134,7 +155,7 @@ describe 'process files with line endings in automatic mode' do
|
|
134
155
|
|
135
156
|
it 'should process a file with \r\n for line endings and within data fields' do
|
136
157
|
sep = "\r\n"
|
137
|
-
data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv",
|
158
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv", options)
|
138
159
|
data.flatten.size.should == 8
|
139
160
|
data[0][:name].should == "Anfield"
|
140
161
|
data[0][:street].should == "Anfield Road"
|
@@ -157,7 +178,7 @@ describe 'process files with line endings in automatic mode' do
|
|
157
178
|
it 'should process a file with more quoted text carriage return characters (\r) than line ending characters (\n)' do
|
158
179
|
row_sep = "\n"
|
159
180
|
text_sep = "\r"
|
160
|
-
data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv",
|
181
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", options)
|
161
182
|
data.flatten.size.should == 2
|
162
183
|
data[0][:band].should == "New Order"
|
163
184
|
data[0][:members].should == ["Bernard Sumner", "Peter Hook", "Stephen Morris", "Gillian Gilbert"].join(text_sep)
|
@@ -166,5 +187,4 @@ describe 'process files with line endings in automatic mode' do
|
|
166
187
|
data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
|
167
188
|
data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
|
168
189
|
end
|
169
|
-
|
170
190
|
end
|
@@ -48,7 +48,7 @@ describe 'can handle col_sep' do
|
|
48
48
|
end
|
49
49
|
|
50
50
|
describe 'auto-detection of separator' do
|
51
|
-
options = {:
|
51
|
+
options = {col_sep: :auto}
|
52
52
|
|
53
53
|
it 'auto-detects comma separator and loads data' do
|
54
54
|
data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
|
@@ -85,5 +85,11 @@ describe 'can handle col_sep' do
|
|
85
85
|
SmarterCSV.process("#{fixture_path}/binary.csv", options)
|
86
86
|
}.to raise_exception SmarterCSV::NoColSepDetected
|
87
87
|
end
|
88
|
+
|
89
|
+
it 'also works when auto is given a string' do
|
90
|
+
data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", col_sep: 'auto')
|
91
|
+
data.first.keys.size.should == 4
|
92
|
+
data.size.should eq 3
|
93
|
+
end
|
88
94
|
end
|
89
95
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
fixture_path = 'spec/fixtures'
|
4
|
+
|
5
|
+
describe 'duplicate headers' do
|
6
|
+
describe 'without special handling / default behavior' do
|
7
|
+
it 'raises error on duplicate headers' do
|
8
|
+
expect {
|
9
|
+
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", {})
|
10
|
+
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'raises error on duplicate given headers' do
|
14
|
+
expect {
|
15
|
+
options = {:user_provided_headers => [:a,:b,:c,:d,:a]}
|
16
|
+
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
17
|
+
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'raises error on missing mapped headers' do
|
21
|
+
expect {
|
22
|
+
# the mapping is right, but the underlying csv file is bad
|
23
|
+
options = {:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :manager_email => :d, :age => :e} }
|
24
|
+
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
25
|
+
}.to raise_exception(SmarterCSV::KeyMappingError)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe 'with special handling' do
|
30
|
+
context 'with given suffix' do
|
31
|
+
let(:options) { {duplicate_header_suffix: '_'} }
|
32
|
+
|
33
|
+
it 'reads whole file' do
|
34
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
35
|
+
expect(data.size).to eq 2
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'generates the correct keys' do
|
39
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
40
|
+
expect(data.first.keys).to eq [:email, :firstname, :lastname, :email_2, :age]
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'enumerates when duplicate headers are given' do
|
44
|
+
options.merge!({:user_provided_headers => [:a,:b,:c,:a,:a]})
|
45
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
46
|
+
expect(data.first.keys).to eq [:a, :b, :c, :a_2, :a_3]
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'can remap duplicated headers' do
|
50
|
+
options.merge!({:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :email_2 => :d, :age => :e}})
|
51
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
52
|
+
expect(data.first).to eq({a: 'tom@bla.com', b: 'Tom', c: 'Sawyer', d: 'mike@bla.com', e: 34})
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context 'with empty suffix' do
|
57
|
+
let(:options) { {duplicate_header_suffix: ''} }
|
58
|
+
|
59
|
+
it 'reads whole file' do
|
60
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
61
|
+
expect(data.size).to eq 2
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'generates the correct keys' do
|
65
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
66
|
+
expect(data.first.keys).to eq [:email, :firstname, :lastname, :email2, :age]
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'enumerates when duplicate headers are given' do
|
70
|
+
options.merge!({:user_provided_headers => [:a,:b,:c,:a,:a]})
|
71
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
72
|
+
expect(data.first.keys).to eq [:a, :b, :c, :a2, :a3]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|