smarter_csv 1.5.0 → 1.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +9 -0
- data/lib/smarter_csv/smarter_csv.rb +112 -82
- data/lib/smarter_csv/version.rb +1 -1
- data/spec/fixtures/duplicate_headers.csv +1 -1
- data/spec/smarter_csv/duplicate_headers_spec.rb +76 -0
- data/spec/smarter_csv/invalid_headers_spec.rb +8 -22
- data/spec/smarter_csv/no_header_spec.rb +16 -11
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 352cf76ac0cd6b2eb4a1cac9e5056aa6e92a8a61b627d7c922e063dcf82ad675
|
4
|
+
data.tar.gz: 0c6e3ab1eaee02a9361fe0b418191244d81bc558dbcd10ee1d2c5f15390d91b6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3763cd8e493e7da6560e8ce9adc58bd411f745f5af119c97d70c02667a524ccb1055b5c640ef795c3cb25b79fa5e17800018da6e76f9d358afa1c7a3513caae3
|
7
|
+
data.tar.gz: '039183fdece20e80007f3f0d3e395fac8d273df6c21928a35b685ade5915503b3c55918fc7015ead0a7d768a545bb76bcfb1087006af6118dc3d22df83e68ddb'
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
3
3
|
|
4
|
+
## 1.5.1 (2022-04-26)
|
5
|
+
* added raising of `KeyMappingError` if `key_mapping` refers to a non-existent key
|
6
|
+
* added option `duplicate_header_suffix` (thanks to Skye Shaw)
|
7
|
+
When given a non-nil string, it uses the suffix to append numbering 2..n to duplicate headers.
|
8
|
+
If your code will need to process arbitrary CSV files, please set `duplicate_header_suffix`.
|
9
|
+
|
4
10
|
## 1.5.0 (2022-04-25)
|
5
11
|
* fixed bug with trailing col_sep characters, introduced in 1.4.0
|
6
12
|
* Fix deprecation warning in Ruby 3.0.3 / $INPUT_RECORD_SEPARATOR (thanks to Joel Fouse )
|
data/README.md
CHANGED
@@ -228,6 +228,7 @@ The options and the block are optional.
|
|
228
228
|
| :headers_in_file | true | Whether or not the file contains headers as the first line. |
|
229
229
|
| | | Important if the file does not contain headers, |
|
230
230
|
| | | otherwise you would lose the first line of data. |
|
231
|
+
| :duplicate_header_suffix | nil | If set, adds numbers to duplicated headers and separates them by the given suffix |
|
231
232
|
| :user_provided_headers | nil | *careful with that axe!* |
|
232
233
|
| | | user provided Array of header strings or symbols, to define |
|
233
234
|
| | | what headers should be used, overriding any in-file headers. |
|
@@ -282,6 +283,7 @@ And header and data validations will also be supported in 2.x
|
|
282
283
|
data = SmarterCSV.process(f)
|
283
284
|
end
|
284
285
|
```
|
286
|
+
|
285
287
|
#### NOTES about CSV Headers:
|
286
288
|
* as this method parses CSV files, it is assumed that the first line of any file will contain a valid header
|
287
289
|
* the first line with the header might be commented out, in which case you will need to set `comment_regexp: /\A#/`
|
@@ -291,6 +293,13 @@ And header and data validations will also be supported in 2.x
|
|
291
293
|
* you can not combine the :user_provided_headers and :key_mapping options
|
292
294
|
* if the incorrect number of headers are provided via :user_provided_headers, exception SmarterCSV::HeaderSizeMismatch is raised
|
293
295
|
|
296
|
+
#### NOTES on Duplicate Headers:
|
297
|
+
As a corner case, it is possible that a CSV file contains multiple headers with the same name.
|
298
|
+
* If that happens, by default `smarter_csv` will raise a `DuplicateHeaders` error.
|
299
|
+
* If you set `duplicate_header_suffix` to a non-nil string, it will use it to append numbers 2..n to the duplicate headers. To further disambiguate the headers, you can further use `key_mapping` to assign meaningful names.
|
300
|
+
* If your code will need to process arbitrary CSV files, please set `duplicate_header_suffix`.
|
301
|
+
* Another way to deal with duplicate headers it to use `user_assigned_headers` to ignore any headers in the file.
|
302
|
+
|
294
303
|
#### NOTES on Key Mapping:
|
295
304
|
* keys in the header line of the file can be re-mapped to a chosen set of symbols, so the resulting Hashes can be better used internally in your application (e.g. when directly creating MongoDB entries with them)
|
296
305
|
* if you want to completely delete a key, then map it to nil or to '', they will be automatically deleted from any result Hash
|
@@ -5,15 +5,17 @@ module SmarterCSV
|
|
5
5
|
class DuplicateHeaders < SmarterCSVException; end
|
6
6
|
class MissingHeaders < SmarterCSVException; end
|
7
7
|
class NoColSepDetected < SmarterCSVException; end
|
8
|
+
class KeyMappingError < SmarterCSVException; end
|
8
9
|
|
9
|
-
|
10
|
+
# first parameter: filename or input object which responds to readline method
|
11
|
+
def SmarterCSV.process(input, options={}, &block)
|
10
12
|
options = default_options.merge(options)
|
11
13
|
options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
|
12
14
|
|
13
15
|
headerA = []
|
14
16
|
result = []
|
15
|
-
file_line_count = 0
|
16
|
-
csv_line_count = 0
|
17
|
+
@file_line_count = 0
|
18
|
+
@csv_line_count = 0
|
17
19
|
has_rails = !! defined?(Rails)
|
18
20
|
begin
|
19
21
|
f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
|
@@ -33,79 +35,7 @@ module SmarterCSV
|
|
33
35
|
|
34
36
|
options[:skip_lines].to_i.times{f.readline(options[:row_sep])} if options[:skip_lines].to_i > 0
|
35
37
|
|
36
|
-
|
37
|
-
# process the header line in the CSV file..
|
38
|
-
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
39
|
-
header = f.readline(options[:row_sep])
|
40
|
-
header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
41
|
-
header = header.sub(options[:comment_regexp],'') if options[:comment_regexp]
|
42
|
-
header = header.chomp(options[:row_sep])
|
43
|
-
|
44
|
-
file_line_count += 1
|
45
|
-
csv_line_count += 1
|
46
|
-
header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
|
47
|
-
|
48
|
-
if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
49
|
-
file_headerA = begin
|
50
|
-
CSV.parse( header, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
51
|
-
rescue CSV::MalformedCSVError => e
|
52
|
-
raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
|
53
|
-
end
|
54
|
-
else
|
55
|
-
file_headerA = header.split(options[:col_sep])
|
56
|
-
end
|
57
|
-
file_header_size = file_headerA.size # before mapping, which could delete keys
|
58
|
-
|
59
|
-
file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
|
60
|
-
file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
|
61
|
-
unless options[:keep_original_headers]
|
62
|
-
file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
|
63
|
-
file_headerA.map!{|x| x.downcase } if options[:downcase_header]
|
64
|
-
end
|
65
|
-
else
|
66
|
-
raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" if options[:user_provided_headers].nil?
|
67
|
-
end
|
68
|
-
if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
|
69
|
-
# use user-provided headers
|
70
|
-
headerA = options[:user_provided_headers]
|
71
|
-
if defined?(file_header_size) && ! file_header_size.nil?
|
72
|
-
if headerA.size != file_header_size
|
73
|
-
raise SmarterCSV::HeaderSizeMismatch , "ERROR: :user_provided_headers defines #{headerA.size} headers != CSV-file #{input} has #{file_header_size} headers"
|
74
|
-
else
|
75
|
-
# we could print out the mapping of file_headerA to headerA here
|
76
|
-
end
|
77
|
-
end
|
78
|
-
else
|
79
|
-
headerA = file_headerA
|
80
|
-
end
|
81
|
-
header_size = headerA.size # used for splitting lines
|
82
|
-
|
83
|
-
headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
|
84
|
-
|
85
|
-
unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
|
86
|
-
key_mappingH = options[:key_mapping]
|
87
|
-
|
88
|
-
# do some key mapping on the keys in the file header
|
89
|
-
# if you want to completely delete a key, then map it to nil or to ''
|
90
|
-
if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
91
|
-
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
# header_validations
|
96
|
-
duplicate_headers = []
|
97
|
-
headerA.compact.each do |k|
|
98
|
-
duplicate_headers << k if headerA.select{|x| x == k}.size > 1
|
99
|
-
end
|
100
|
-
raise SmarterCSV::DuplicateHeaders , "ERROR: duplicate headers: #{duplicate_headers.join(',')}" unless duplicate_headers.empty?
|
101
|
-
|
102
|
-
if options[:required_headers] && options[:required_headers].is_a?(Array)
|
103
|
-
missing_headers = []
|
104
|
-
options[:required_headers].each do |k|
|
105
|
-
missing_headers << k unless headerA.include?(k)
|
106
|
-
end
|
107
|
-
raise SmarterCSV::MissingHeaders , "ERROR: missing headers: #{missing_headers.join(',')}" unless missing_headers.empty?
|
108
|
-
end
|
38
|
+
headerA, header_size = process_headers(f, options, csv_options)
|
109
39
|
|
110
40
|
# in case we use chunking.. we'll need to set it up..
|
111
41
|
if ! options[:chunk_size].nil? && options[:chunk_size].to_i > 0
|
@@ -120,13 +50,13 @@ module SmarterCSV
|
|
120
50
|
# now on to processing all the rest of the lines in the CSV file:
|
121
51
|
while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
|
122
52
|
line = f.readline(options[:row_sep]) # read one line
|
53
|
+
@file_line_count += 1
|
54
|
+
@csv_line_count += 1
|
123
55
|
|
124
56
|
# replace invalid byte sequence in UTF-8 with question mark to avoid errors
|
125
57
|
line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
126
58
|
|
127
|
-
file_line_count
|
128
|
-
csv_line_count += 1
|
129
|
-
print "processing file line %10d, csv line %10d\r" % [file_line_count, csv_line_count] if options[:verbose]
|
59
|
+
print "processing file line %10d, csv line %10d\r" % [@file_line_count, @csv_line_count] if options[:verbose]
|
130
60
|
|
131
61
|
next if options[:comment_regexp] && line =~ options[:comment_regexp] # ignore all comment lines if there are any
|
132
62
|
|
@@ -138,9 +68,9 @@ module SmarterCSV
|
|
138
68
|
next_line = f.readline(options[:row_sep])
|
139
69
|
next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
140
70
|
line += next_line
|
141
|
-
file_line_count += 1
|
71
|
+
@file_line_count += 1
|
142
72
|
end
|
143
|
-
print "\nline contains uneven number of quote chars so including content through file line %d\n" % file_line_count if options[:verbose] && multiline
|
73
|
+
print "\nline contains uneven number of quote chars so including content through file line %d\n" % @file_line_count if options[:verbose] && multiline
|
144
74
|
|
145
75
|
line.chomp!(options[:row_sep])
|
146
76
|
|
@@ -148,7 +78,7 @@ module SmarterCSV
|
|
148
78
|
dataA = begin
|
149
79
|
CSV.parse( line, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
150
80
|
rescue CSV::MalformedCSVError => e
|
151
|
-
raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
|
81
|
+
raise $!, "#{$!} [SmarterCSV: csv line #{@csv_line_count}]", $!.backtrace
|
152
82
|
end
|
153
83
|
else
|
154
84
|
dataA = line.split(options[:col_sep], header_size)
|
@@ -268,6 +198,7 @@ module SmarterCSV
|
|
268
198
|
comment_regexp: nil, # was: /\A#/,
|
269
199
|
convert_values_to_numeric: true,
|
270
200
|
downcase_header: true,
|
201
|
+
duplicate_header_suffix: nil,
|
271
202
|
file_encoding: 'utf-8',
|
272
203
|
force_simple_split: false ,
|
273
204
|
force_utf8: false,
|
@@ -378,4 +309,103 @@ module SmarterCSV
|
|
378
309
|
k,_ = counts.max_by{|_,v| v}
|
379
310
|
return k # the most frequent one is it
|
380
311
|
end
|
312
|
+
|
313
|
+
def self.process_headers(filehandle, options, csv_options)
|
314
|
+
if options[:headers_in_file] # extract the header line
|
315
|
+
# process the header line in the CSV file..
|
316
|
+
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
317
|
+
header = filehandle.readline(options[:row_sep])
|
318
|
+
@file_line_count += 1
|
319
|
+
@csv_line_count += 1
|
320
|
+
|
321
|
+
header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
322
|
+
header = header.sub(options[:comment_regexp],'') if options[:comment_regexp]
|
323
|
+
header = header.chomp(options[:row_sep])
|
324
|
+
|
325
|
+
header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
|
326
|
+
|
327
|
+
if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
328
|
+
file_headerA = begin
|
329
|
+
CSV.parse( header, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
330
|
+
rescue CSV::MalformedCSVError => e
|
331
|
+
raise $!, "#{$!} [SmarterCSV: csv line #{@csv_line_count}]", $!.backtrace
|
332
|
+
end
|
333
|
+
else
|
334
|
+
file_headerA = header.split(options[:col_sep])
|
335
|
+
end
|
336
|
+
file_header_size = file_headerA.size # before mapping, which could delete keys
|
337
|
+
|
338
|
+
file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
|
339
|
+
file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
|
340
|
+
unless options[:keep_original_headers]
|
341
|
+
file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
|
342
|
+
file_headerA.map!{|x| x.downcase } if options[:downcase_header]
|
343
|
+
end
|
344
|
+
else
|
345
|
+
raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" unless options[:user_provided_headers]
|
346
|
+
end
|
347
|
+
if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
|
348
|
+
# use user-provided headers
|
349
|
+
headerA = options[:user_provided_headers]
|
350
|
+
if defined?(file_header_size) && ! file_header_size.nil?
|
351
|
+
if headerA.size != file_header_size
|
352
|
+
raise SmarterCSV::HeaderSizeMismatch , "ERROR: :user_provided_headers defines #{headerA.size} headers != CSV-file #{input} has #{file_header_size} headers"
|
353
|
+
else
|
354
|
+
# we could print out the mapping of file_headerA to headerA here
|
355
|
+
end
|
356
|
+
end
|
357
|
+
else
|
358
|
+
headerA = file_headerA
|
359
|
+
end
|
360
|
+
|
361
|
+
# detect duplicate headers and disambiguate
|
362
|
+
headerA = process_duplicate_headers(headerA, options) if options[:duplicate_header_suffix]
|
363
|
+
header_size = headerA.size # used for splitting lines
|
364
|
+
|
365
|
+
headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
|
366
|
+
|
367
|
+
unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
|
368
|
+
key_mappingH = options[:key_mapping]
|
369
|
+
|
370
|
+
# do some key mapping on the keys in the file header
|
371
|
+
# if you want to completely delete a key, then map it to nil or to ''
|
372
|
+
if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
373
|
+
# we can't map keys that are not there
|
374
|
+
raise SmarterCSV::KeyMappingError unless (key_mappingH.keys - headerA).empty?
|
375
|
+
|
376
|
+
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
# header_validations
|
381
|
+
duplicate_headers = []
|
382
|
+
headerA.compact.each do |k|
|
383
|
+
duplicate_headers << k if headerA.select{|x| x == k}.size > 1
|
384
|
+
end
|
385
|
+
raise SmarterCSV::DuplicateHeaders , "ERROR: duplicate headers: #{duplicate_headers.join(',')}" unless duplicate_headers.empty?
|
386
|
+
|
387
|
+
if options[:required_headers] && options[:required_headers].is_a?(Array)
|
388
|
+
missing_headers = []
|
389
|
+
options[:required_headers].each do |k|
|
390
|
+
missing_headers << k unless headerA.include?(k)
|
391
|
+
end
|
392
|
+
raise SmarterCSV::MissingHeaders , "ERROR: missing headers: #{missing_headers.join(',')}" unless missing_headers.empty?
|
393
|
+
end
|
394
|
+
|
395
|
+
[headerA, header_size]
|
396
|
+
end
|
397
|
+
|
398
|
+
def self.process_duplicate_headers(headers, options)
|
399
|
+
counts = Hash.new(0)
|
400
|
+
result = []
|
401
|
+
headers.each do |key|
|
402
|
+
counts[key] += 1
|
403
|
+
if counts[key] == 1
|
404
|
+
result << key
|
405
|
+
else
|
406
|
+
result << [key, options[:duplicate_header_suffix], counts[key]].join
|
407
|
+
end
|
408
|
+
end
|
409
|
+
result
|
410
|
+
end
|
381
411
|
end
|
data/lib/smarter_csv/version.rb
CHANGED
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
fixture_path = 'spec/fixtures'
|
4
|
+
|
5
|
+
describe 'duplicate headers' do
|
6
|
+
describe 'without special handling / default behavior' do
|
7
|
+
it 'raises error on duplicate headers' do
|
8
|
+
expect {
|
9
|
+
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", {})
|
10
|
+
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'raises error on duplicate given headers' do
|
14
|
+
expect {
|
15
|
+
options = {:user_provided_headers => [:a,:b,:c,:d,:a]}
|
16
|
+
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
17
|
+
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'raises error on missing mapped headers' do
|
21
|
+
expect {
|
22
|
+
# the mapping is right, but the underlying csv file is bad
|
23
|
+
options = {:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :manager_email => :d, :age => :e} }
|
24
|
+
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
25
|
+
}.to raise_exception(SmarterCSV::KeyMappingError)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe 'with special handling' do
|
30
|
+
context 'with given suffix' do
|
31
|
+
let(:options) { {duplicate_header_suffix: '_'} }
|
32
|
+
|
33
|
+
it 'reads whole file' do
|
34
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
35
|
+
expect(data.size).to eq 2
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'generates the correct keys' do
|
39
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
40
|
+
expect(data.first.keys).to eq [:email, :firstname, :lastname, :email_2, :age]
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'enumerates when duplicate headers are given' do
|
44
|
+
options.merge!({:user_provided_headers => [:a,:b,:c,:a,:a]})
|
45
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
46
|
+
expect(data.first.keys).to eq [:a, :b, :c, :a_2, :a_3]
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'can remap duplicated headers' do
|
50
|
+
options.merge!({:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :email_2 => :d, :age => :e}})
|
51
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
52
|
+
expect(data.first).to eq({a: 'tom@bla.com', b: 'Tom', c: 'Sawyer', d: 'mike@bla.com', e: 34})
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context 'with empty suffix' do
|
57
|
+
let(:options) { {duplicate_header_suffix: ''} }
|
58
|
+
|
59
|
+
it 'reads whole file' do
|
60
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
61
|
+
expect(data.size).to eq 2
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'generates the correct keys' do
|
65
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
66
|
+
expect(data.first.keys).to eq [:email, :firstname, :lastname, :email2, :age]
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'enumerates when duplicate headers are given' do
|
70
|
+
options.merge!({:user_provided_headers => [:a,:b,:c,:a,:a]})
|
71
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
72
|
+
expect(data.first.keys).to eq [:a, :b, :c, :a2, :a3]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -3,28 +3,6 @@ require 'spec_helper'
|
|
3
3
|
fixture_path = 'spec/fixtures'
|
4
4
|
|
5
5
|
describe 'test exceptions for invalid headers' do
|
6
|
-
it 'raises error on duplicate headers' do
|
7
|
-
expect {
|
8
|
-
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", {})
|
9
|
-
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
10
|
-
end
|
11
|
-
|
12
|
-
it 'raises error on duplicate given headers' do
|
13
|
-
expect {
|
14
|
-
options = {:user_provided_headers => [:a,:b,:c,:d,:a]}
|
15
|
-
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
16
|
-
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
17
|
-
end
|
18
|
-
|
19
|
-
it 'raises error on duplicate mapped headers' do
|
20
|
-
expect {
|
21
|
-
# the mapping is right, but the underlying csv file is bad
|
22
|
-
options = {:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :manager_email => :d, :age => :e} }
|
23
|
-
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
24
|
-
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
6
|
it 'does not raise an error if no required headers are given' do
|
29
7
|
options = {:required_headers => nil} # order does not matter
|
30
8
|
data = SmarterCSV.process("#{fixture_path}/user_import.csv", options)
|
@@ -49,4 +27,12 @@ describe 'test exceptions for invalid headers' do
|
|
49
27
|
SmarterCSV.process("#{fixture_path}/user_import.csv", options)
|
50
28
|
}.to raise_exception(SmarterCSV::MissingHeaders)
|
51
29
|
end
|
30
|
+
|
31
|
+
it 'raises error on missing mapped headers' do
|
32
|
+
expect {
|
33
|
+
# :age does not exist in the CSV header
|
34
|
+
options = {:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :manager_email => :d, :age => :e} }
|
35
|
+
SmarterCSV.process("#{fixture_path}/user_import.csv", options)
|
36
|
+
}.to raise_exception(SmarterCSV::KeyMappingError)
|
37
|
+
end
|
52
38
|
end
|
@@ -2,23 +2,28 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
fixture_path = 'spec/fixtures'
|
4
4
|
|
5
|
-
describe '
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
describe 'no header in file' do
|
6
|
+
let(:headers) { [:a,:b,:c,:d,:e,:f] }
|
7
|
+
let(:options) { {:headers_in_file => false, :user_provided_headers => headers} }
|
8
|
+
subject(:data) { SmarterCSV.process("#{fixture_path}/no_header.csv", options) }
|
9
|
+
|
10
|
+
it 'load the correct number of records' do
|
9
11
|
data.size.should == 5
|
10
|
-
|
11
|
-
data.each{|item| item.keys.each{|x| x.class.should be == Symbol}}
|
12
|
+
end
|
12
13
|
|
13
|
-
|
14
|
+
it 'uses given symbols for all records' do
|
15
|
+
data.each do |item|
|
14
16
|
item.keys.each do |key|
|
15
17
|
[:a,:b,:c,:d,:e,:f].should include( key )
|
16
18
|
end
|
17
19
|
end
|
18
|
-
|
19
|
-
data.each do |h|
|
20
|
-
h.size.should <= 6
|
21
|
-
end
|
22
20
|
end
|
23
21
|
|
22
|
+
it 'loads the correct data' do
|
23
|
+
data[0].should == {a: "Dan", b: "McAllister", c: 2, d: 0}
|
24
|
+
data[1].should == {a: "Lucy", b: "Laweless", d: 5, e: 0}
|
25
|
+
data[2].should == {a: "Miles", b: "O'Brian", c: 0, d: 0, e: 0, f: 21}
|
26
|
+
data[3].should == {a: "Nancy", b: "Homes", c: 2, d: 0, e: 1}
|
27
|
+
data[4].should == {a: "Hernán", b: "Curaçon", c: 3, d: 0, e: 0}
|
28
|
+
end
|
24
29
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tilo Sloboda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
11
|
+
date: 2022-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -112,6 +112,7 @@ files:
|
|
112
112
|
- spec/smarter_csv/close_file_spec.rb
|
113
113
|
- spec/smarter_csv/column_separator_spec.rb
|
114
114
|
- spec/smarter_csv/convert_values_to_numeric_spec.rb
|
115
|
+
- spec/smarter_csv/duplicate_headers_spec.rb
|
115
116
|
- spec/smarter_csv/empty_columns_spec.rb
|
116
117
|
- spec/smarter_csv/extenstions_spec.rb
|
117
118
|
- spec/smarter_csv/hard_sample_spec.rb
|
@@ -218,6 +219,7 @@ test_files:
|
|
218
219
|
- spec/smarter_csv/close_file_spec.rb
|
219
220
|
- spec/smarter_csv/column_separator_spec.rb
|
220
221
|
- spec/smarter_csv/convert_values_to_numeric_spec.rb
|
222
|
+
- spec/smarter_csv/duplicate_headers_spec.rb
|
221
223
|
- spec/smarter_csv/empty_columns_spec.rb
|
222
224
|
- spec/smarter_csv/extenstions_spec.rb
|
223
225
|
- spec/smarter_csv/hard_sample_spec.rb
|