smarter_csv 1.5.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +9 -0
- data/lib/smarter_csv/smarter_csv.rb +112 -82
- data/lib/smarter_csv/version.rb +1 -1
- data/spec/fixtures/duplicate_headers.csv +1 -1
- data/spec/smarter_csv/duplicate_headers_spec.rb +76 -0
- data/spec/smarter_csv/invalid_headers_spec.rb +8 -22
- data/spec/smarter_csv/no_header_spec.rb +16 -11
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 352cf76ac0cd6b2eb4a1cac9e5056aa6e92a8a61b627d7c922e063dcf82ad675
|
4
|
+
data.tar.gz: 0c6e3ab1eaee02a9361fe0b418191244d81bc558dbcd10ee1d2c5f15390d91b6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3763cd8e493e7da6560e8ce9adc58bd411f745f5af119c97d70c02667a524ccb1055b5c640ef795c3cb25b79fa5e17800018da6e76f9d358afa1c7a3513caae3
|
7
|
+
data.tar.gz: '039183fdece20e80007f3f0d3e395fac8d273df6c21928a35b685ade5915503b3c55918fc7015ead0a7d768a545bb76bcfb1087006af6118dc3d22df83e68ddb'
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
3
3
|
|
4
|
+
## 1.5.1 (2022-04-26)
|
5
|
+
* added raising of `KeyMappingError` if `key_mapping` refers to a non-existent key
|
6
|
+
* added option `duplicate_header_suffix` (thanks to Skye Shaw)
|
7
|
+
When given a non-nil string, it uses the suffix to append numbering 2..n to duplicate headers.
|
8
|
+
If your code will need to process arbitrary CSV files, please set `duplicate_header_suffix`.
|
9
|
+
|
4
10
|
## 1.5.0 (2022-04-25)
|
5
11
|
* fixed bug with trailing col_sep characters, introduced in 1.4.0
|
6
12
|
* Fix deprecation warning in Ruby 3.0.3 / $INPUT_RECORD_SEPARATOR (thanks to Joel Fouse )
|
data/README.md
CHANGED
@@ -228,6 +228,7 @@ The options and the block are optional.
|
|
228
228
|
| :headers_in_file | true | Whether or not the file contains headers as the first line. |
|
229
229
|
| | | Important if the file does not contain headers, |
|
230
230
|
| | | otherwise you would lose the first line of data. |
|
231
|
+
| :duplicate_header_suffix | nil | If set, adds numbers to duplicated headers and separates them by the given suffix |
|
231
232
|
| :user_provided_headers | nil | *careful with that axe!* |
|
232
233
|
| | | user provided Array of header strings or symbols, to define |
|
233
234
|
| | | what headers should be used, overriding any in-file headers. |
|
@@ -282,6 +283,7 @@ And header and data validations will also be supported in 2.x
|
|
282
283
|
data = SmarterCSV.process(f)
|
283
284
|
end
|
284
285
|
```
|
286
|
+
|
285
287
|
#### NOTES about CSV Headers:
|
286
288
|
* as this method parses CSV files, it is assumed that the first line of any file will contain a valid header
|
287
289
|
* the first line with the header might be commented out, in which case you will need to set `comment_regexp: /\A#/`
|
@@ -291,6 +293,13 @@ And header and data validations will also be supported in 2.x
|
|
291
293
|
* you can not combine the :user_provided_headers and :key_mapping options
|
292
294
|
* if the incorrect number of headers are provided via :user_provided_headers, exception SmarterCSV::HeaderSizeMismatch is raised
|
293
295
|
|
296
|
+
#### NOTES on Duplicate Headers:
|
297
|
+
As a corner case, it is possible that a CSV file contains multiple headers with the same name.
|
298
|
+
* If that happens, by default `smarter_csv` will raise a `DuplicateHeaders` error.
|
299
|
+
* If you set `duplicate_header_suffix` to a non-nil string, it will use it to append numbers 2..n to the duplicate headers. To further disambiguate the headers, you can further use `key_mapping` to assign meaningful names.
|
300
|
+
* If your code will need to process arbitrary CSV files, please set `duplicate_header_suffix`.
|
301
|
+
* Another way to deal with duplicate headers it to use `user_assigned_headers` to ignore any headers in the file.
|
302
|
+
|
294
303
|
#### NOTES on Key Mapping:
|
295
304
|
* keys in the header line of the file can be re-mapped to a chosen set of symbols, so the resulting Hashes can be better used internally in your application (e.g. when directly creating MongoDB entries with them)
|
296
305
|
* if you want to completely delete a key, then map it to nil or to '', they will be automatically deleted from any result Hash
|
@@ -5,15 +5,17 @@ module SmarterCSV
|
|
5
5
|
class DuplicateHeaders < SmarterCSVException; end
|
6
6
|
class MissingHeaders < SmarterCSVException; end
|
7
7
|
class NoColSepDetected < SmarterCSVException; end
|
8
|
+
class KeyMappingError < SmarterCSVException; end
|
8
9
|
|
9
|
-
|
10
|
+
# first parameter: filename or input object which responds to readline method
|
11
|
+
def SmarterCSV.process(input, options={}, &block)
|
10
12
|
options = default_options.merge(options)
|
11
13
|
options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
|
12
14
|
|
13
15
|
headerA = []
|
14
16
|
result = []
|
15
|
-
file_line_count = 0
|
16
|
-
csv_line_count = 0
|
17
|
+
@file_line_count = 0
|
18
|
+
@csv_line_count = 0
|
17
19
|
has_rails = !! defined?(Rails)
|
18
20
|
begin
|
19
21
|
f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
|
@@ -33,79 +35,7 @@ module SmarterCSV
|
|
33
35
|
|
34
36
|
options[:skip_lines].to_i.times{f.readline(options[:row_sep])} if options[:skip_lines].to_i > 0
|
35
37
|
|
36
|
-
|
37
|
-
# process the header line in the CSV file..
|
38
|
-
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
39
|
-
header = f.readline(options[:row_sep])
|
40
|
-
header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
41
|
-
header = header.sub(options[:comment_regexp],'') if options[:comment_regexp]
|
42
|
-
header = header.chomp(options[:row_sep])
|
43
|
-
|
44
|
-
file_line_count += 1
|
45
|
-
csv_line_count += 1
|
46
|
-
header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
|
47
|
-
|
48
|
-
if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
49
|
-
file_headerA = begin
|
50
|
-
CSV.parse( header, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
51
|
-
rescue CSV::MalformedCSVError => e
|
52
|
-
raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
|
53
|
-
end
|
54
|
-
else
|
55
|
-
file_headerA = header.split(options[:col_sep])
|
56
|
-
end
|
57
|
-
file_header_size = file_headerA.size # before mapping, which could delete keys
|
58
|
-
|
59
|
-
file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
|
60
|
-
file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
|
61
|
-
unless options[:keep_original_headers]
|
62
|
-
file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
|
63
|
-
file_headerA.map!{|x| x.downcase } if options[:downcase_header]
|
64
|
-
end
|
65
|
-
else
|
66
|
-
raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" if options[:user_provided_headers].nil?
|
67
|
-
end
|
68
|
-
if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
|
69
|
-
# use user-provided headers
|
70
|
-
headerA = options[:user_provided_headers]
|
71
|
-
if defined?(file_header_size) && ! file_header_size.nil?
|
72
|
-
if headerA.size != file_header_size
|
73
|
-
raise SmarterCSV::HeaderSizeMismatch , "ERROR: :user_provided_headers defines #{headerA.size} headers != CSV-file #{input} has #{file_header_size} headers"
|
74
|
-
else
|
75
|
-
# we could print out the mapping of file_headerA to headerA here
|
76
|
-
end
|
77
|
-
end
|
78
|
-
else
|
79
|
-
headerA = file_headerA
|
80
|
-
end
|
81
|
-
header_size = headerA.size # used for splitting lines
|
82
|
-
|
83
|
-
headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
|
84
|
-
|
85
|
-
unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
|
86
|
-
key_mappingH = options[:key_mapping]
|
87
|
-
|
88
|
-
# do some key mapping on the keys in the file header
|
89
|
-
# if you want to completely delete a key, then map it to nil or to ''
|
90
|
-
if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
91
|
-
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
# header_validations
|
96
|
-
duplicate_headers = []
|
97
|
-
headerA.compact.each do |k|
|
98
|
-
duplicate_headers << k if headerA.select{|x| x == k}.size > 1
|
99
|
-
end
|
100
|
-
raise SmarterCSV::DuplicateHeaders , "ERROR: duplicate headers: #{duplicate_headers.join(',')}" unless duplicate_headers.empty?
|
101
|
-
|
102
|
-
if options[:required_headers] && options[:required_headers].is_a?(Array)
|
103
|
-
missing_headers = []
|
104
|
-
options[:required_headers].each do |k|
|
105
|
-
missing_headers << k unless headerA.include?(k)
|
106
|
-
end
|
107
|
-
raise SmarterCSV::MissingHeaders , "ERROR: missing headers: #{missing_headers.join(',')}" unless missing_headers.empty?
|
108
|
-
end
|
38
|
+
headerA, header_size = process_headers(f, options, csv_options)
|
109
39
|
|
110
40
|
# in case we use chunking.. we'll need to set it up..
|
111
41
|
if ! options[:chunk_size].nil? && options[:chunk_size].to_i > 0
|
@@ -120,13 +50,13 @@ module SmarterCSV
|
|
120
50
|
# now on to processing all the rest of the lines in the CSV file:
|
121
51
|
while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
|
122
52
|
line = f.readline(options[:row_sep]) # read one line
|
53
|
+
@file_line_count += 1
|
54
|
+
@csv_line_count += 1
|
123
55
|
|
124
56
|
# replace invalid byte sequence in UTF-8 with question mark to avoid errors
|
125
57
|
line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
126
58
|
|
127
|
-
file_line_count
|
128
|
-
csv_line_count += 1
|
129
|
-
print "processing file line %10d, csv line %10d\r" % [file_line_count, csv_line_count] if options[:verbose]
|
59
|
+
print "processing file line %10d, csv line %10d\r" % [@file_line_count, @csv_line_count] if options[:verbose]
|
130
60
|
|
131
61
|
next if options[:comment_regexp] && line =~ options[:comment_regexp] # ignore all comment lines if there are any
|
132
62
|
|
@@ -138,9 +68,9 @@ module SmarterCSV
|
|
138
68
|
next_line = f.readline(options[:row_sep])
|
139
69
|
next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
140
70
|
line += next_line
|
141
|
-
file_line_count += 1
|
71
|
+
@file_line_count += 1
|
142
72
|
end
|
143
|
-
print "\nline contains uneven number of quote chars so including content through file line %d\n" % file_line_count if options[:verbose] && multiline
|
73
|
+
print "\nline contains uneven number of quote chars so including content through file line %d\n" % @file_line_count if options[:verbose] && multiline
|
144
74
|
|
145
75
|
line.chomp!(options[:row_sep])
|
146
76
|
|
@@ -148,7 +78,7 @@ module SmarterCSV
|
|
148
78
|
dataA = begin
|
149
79
|
CSV.parse( line, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
150
80
|
rescue CSV::MalformedCSVError => e
|
151
|
-
raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
|
81
|
+
raise $!, "#{$!} [SmarterCSV: csv line #{@csv_line_count}]", $!.backtrace
|
152
82
|
end
|
153
83
|
else
|
154
84
|
dataA = line.split(options[:col_sep], header_size)
|
@@ -268,6 +198,7 @@ module SmarterCSV
|
|
268
198
|
comment_regexp: nil, # was: /\A#/,
|
269
199
|
convert_values_to_numeric: true,
|
270
200
|
downcase_header: true,
|
201
|
+
duplicate_header_suffix: nil,
|
271
202
|
file_encoding: 'utf-8',
|
272
203
|
force_simple_split: false ,
|
273
204
|
force_utf8: false,
|
@@ -378,4 +309,103 @@ module SmarterCSV
|
|
378
309
|
k,_ = counts.max_by{|_,v| v}
|
379
310
|
return k # the most frequent one is it
|
380
311
|
end
|
312
|
+
|
313
|
+
def self.process_headers(filehandle, options, csv_options)
|
314
|
+
if options[:headers_in_file] # extract the header line
|
315
|
+
# process the header line in the CSV file..
|
316
|
+
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
317
|
+
header = filehandle.readline(options[:row_sep])
|
318
|
+
@file_line_count += 1
|
319
|
+
@csv_line_count += 1
|
320
|
+
|
321
|
+
header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
322
|
+
header = header.sub(options[:comment_regexp],'') if options[:comment_regexp]
|
323
|
+
header = header.chomp(options[:row_sep])
|
324
|
+
|
325
|
+
header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
|
326
|
+
|
327
|
+
if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
328
|
+
file_headerA = begin
|
329
|
+
CSV.parse( header, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
330
|
+
rescue CSV::MalformedCSVError => e
|
331
|
+
raise $!, "#{$!} [SmarterCSV: csv line #{@csv_line_count}]", $!.backtrace
|
332
|
+
end
|
333
|
+
else
|
334
|
+
file_headerA = header.split(options[:col_sep])
|
335
|
+
end
|
336
|
+
file_header_size = file_headerA.size # before mapping, which could delete keys
|
337
|
+
|
338
|
+
file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
|
339
|
+
file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
|
340
|
+
unless options[:keep_original_headers]
|
341
|
+
file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
|
342
|
+
file_headerA.map!{|x| x.downcase } if options[:downcase_header]
|
343
|
+
end
|
344
|
+
else
|
345
|
+
raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" unless options[:user_provided_headers]
|
346
|
+
end
|
347
|
+
if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
|
348
|
+
# use user-provided headers
|
349
|
+
headerA = options[:user_provided_headers]
|
350
|
+
if defined?(file_header_size) && ! file_header_size.nil?
|
351
|
+
if headerA.size != file_header_size
|
352
|
+
raise SmarterCSV::HeaderSizeMismatch , "ERROR: :user_provided_headers defines #{headerA.size} headers != CSV-file #{input} has #{file_header_size} headers"
|
353
|
+
else
|
354
|
+
# we could print out the mapping of file_headerA to headerA here
|
355
|
+
end
|
356
|
+
end
|
357
|
+
else
|
358
|
+
headerA = file_headerA
|
359
|
+
end
|
360
|
+
|
361
|
+
# detect duplicate headers and disambiguate
|
362
|
+
headerA = process_duplicate_headers(headerA, options) if options[:duplicate_header_suffix]
|
363
|
+
header_size = headerA.size # used for splitting lines
|
364
|
+
|
365
|
+
headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
|
366
|
+
|
367
|
+
unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
|
368
|
+
key_mappingH = options[:key_mapping]
|
369
|
+
|
370
|
+
# do some key mapping on the keys in the file header
|
371
|
+
# if you want to completely delete a key, then map it to nil or to ''
|
372
|
+
if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
373
|
+
# we can't map keys that are not there
|
374
|
+
raise SmarterCSV::KeyMappingError unless (key_mappingH.keys - headerA).empty?
|
375
|
+
|
376
|
+
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
# header_validations
|
381
|
+
duplicate_headers = []
|
382
|
+
headerA.compact.each do |k|
|
383
|
+
duplicate_headers << k if headerA.select{|x| x == k}.size > 1
|
384
|
+
end
|
385
|
+
raise SmarterCSV::DuplicateHeaders , "ERROR: duplicate headers: #{duplicate_headers.join(',')}" unless duplicate_headers.empty?
|
386
|
+
|
387
|
+
if options[:required_headers] && options[:required_headers].is_a?(Array)
|
388
|
+
missing_headers = []
|
389
|
+
options[:required_headers].each do |k|
|
390
|
+
missing_headers << k unless headerA.include?(k)
|
391
|
+
end
|
392
|
+
raise SmarterCSV::MissingHeaders , "ERROR: missing headers: #{missing_headers.join(',')}" unless missing_headers.empty?
|
393
|
+
end
|
394
|
+
|
395
|
+
[headerA, header_size]
|
396
|
+
end
|
397
|
+
|
398
|
+
def self.process_duplicate_headers(headers, options)
|
399
|
+
counts = Hash.new(0)
|
400
|
+
result = []
|
401
|
+
headers.each do |key|
|
402
|
+
counts[key] += 1
|
403
|
+
if counts[key] == 1
|
404
|
+
result << key
|
405
|
+
else
|
406
|
+
result << [key, options[:duplicate_header_suffix], counts[key]].join
|
407
|
+
end
|
408
|
+
end
|
409
|
+
result
|
410
|
+
end
|
381
411
|
end
|
data/lib/smarter_csv/version.rb
CHANGED
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
fixture_path = 'spec/fixtures'
|
4
|
+
|
5
|
+
describe 'duplicate headers' do
|
6
|
+
describe 'without special handling / default behavior' do
|
7
|
+
it 'raises error on duplicate headers' do
|
8
|
+
expect {
|
9
|
+
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", {})
|
10
|
+
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'raises error on duplicate given headers' do
|
14
|
+
expect {
|
15
|
+
options = {:user_provided_headers => [:a,:b,:c,:d,:a]}
|
16
|
+
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
17
|
+
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'raises error on missing mapped headers' do
|
21
|
+
expect {
|
22
|
+
# the mapping is right, but the underlying csv file is bad
|
23
|
+
options = {:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :manager_email => :d, :age => :e} }
|
24
|
+
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
25
|
+
}.to raise_exception(SmarterCSV::KeyMappingError)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe 'with special handling' do
|
30
|
+
context 'with given suffix' do
|
31
|
+
let(:options) { {duplicate_header_suffix: '_'} }
|
32
|
+
|
33
|
+
it 'reads whole file' do
|
34
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
35
|
+
expect(data.size).to eq 2
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'generates the correct keys' do
|
39
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
40
|
+
expect(data.first.keys).to eq [:email, :firstname, :lastname, :email_2, :age]
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'enumerates when duplicate headers are given' do
|
44
|
+
options.merge!({:user_provided_headers => [:a,:b,:c,:a,:a]})
|
45
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
46
|
+
expect(data.first.keys).to eq [:a, :b, :c, :a_2, :a_3]
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'can remap duplicated headers' do
|
50
|
+
options.merge!({:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :email_2 => :d, :age => :e}})
|
51
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
52
|
+
expect(data.first).to eq({a: 'tom@bla.com', b: 'Tom', c: 'Sawyer', d: 'mike@bla.com', e: 34})
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context 'with empty suffix' do
|
57
|
+
let(:options) { {duplicate_header_suffix: ''} }
|
58
|
+
|
59
|
+
it 'reads whole file' do
|
60
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
61
|
+
expect(data.size).to eq 2
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'generates the correct keys' do
|
65
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
66
|
+
expect(data.first.keys).to eq [:email, :firstname, :lastname, :email2, :age]
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'enumerates when duplicate headers are given' do
|
70
|
+
options.merge!({:user_provided_headers => [:a,:b,:c,:a,:a]})
|
71
|
+
data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
72
|
+
expect(data.first.keys).to eq [:a, :b, :c, :a2, :a3]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -3,28 +3,6 @@ require 'spec_helper'
|
|
3
3
|
fixture_path = 'spec/fixtures'
|
4
4
|
|
5
5
|
describe 'test exceptions for invalid headers' do
|
6
|
-
it 'raises error on duplicate headers' do
|
7
|
-
expect {
|
8
|
-
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", {})
|
9
|
-
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
10
|
-
end
|
11
|
-
|
12
|
-
it 'raises error on duplicate given headers' do
|
13
|
-
expect {
|
14
|
-
options = {:user_provided_headers => [:a,:b,:c,:d,:a]}
|
15
|
-
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
16
|
-
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
17
|
-
end
|
18
|
-
|
19
|
-
it 'raises error on duplicate mapped headers' do
|
20
|
-
expect {
|
21
|
-
# the mapping is right, but the underlying csv file is bad
|
22
|
-
options = {:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :manager_email => :d, :age => :e} }
|
23
|
-
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
24
|
-
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
6
|
it 'does not raise an error if no required headers are given' do
|
29
7
|
options = {:required_headers => nil} # order does not matter
|
30
8
|
data = SmarterCSV.process("#{fixture_path}/user_import.csv", options)
|
@@ -49,4 +27,12 @@ describe 'test exceptions for invalid headers' do
|
|
49
27
|
SmarterCSV.process("#{fixture_path}/user_import.csv", options)
|
50
28
|
}.to raise_exception(SmarterCSV::MissingHeaders)
|
51
29
|
end
|
30
|
+
|
31
|
+
it 'raises error on missing mapped headers' do
|
32
|
+
expect {
|
33
|
+
# :age does not exist in the CSV header
|
34
|
+
options = {:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :manager_email => :d, :age => :e} }
|
35
|
+
SmarterCSV.process("#{fixture_path}/user_import.csv", options)
|
36
|
+
}.to raise_exception(SmarterCSV::KeyMappingError)
|
37
|
+
end
|
52
38
|
end
|
@@ -2,23 +2,28 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
fixture_path = 'spec/fixtures'
|
4
4
|
|
5
|
-
describe '
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
describe 'no header in file' do
|
6
|
+
let(:headers) { [:a,:b,:c,:d,:e,:f] }
|
7
|
+
let(:options) { {:headers_in_file => false, :user_provided_headers => headers} }
|
8
|
+
subject(:data) { SmarterCSV.process("#{fixture_path}/no_header.csv", options) }
|
9
|
+
|
10
|
+
it 'load the correct number of records' do
|
9
11
|
data.size.should == 5
|
10
|
-
|
11
|
-
data.each{|item| item.keys.each{|x| x.class.should be == Symbol}}
|
12
|
+
end
|
12
13
|
|
13
|
-
|
14
|
+
it 'uses given symbols for all records' do
|
15
|
+
data.each do |item|
|
14
16
|
item.keys.each do |key|
|
15
17
|
[:a,:b,:c,:d,:e,:f].should include( key )
|
16
18
|
end
|
17
19
|
end
|
18
|
-
|
19
|
-
data.each do |h|
|
20
|
-
h.size.should <= 6
|
21
|
-
end
|
22
20
|
end
|
23
21
|
|
22
|
+
it 'loads the correct data' do
|
23
|
+
data[0].should == {a: "Dan", b: "McAllister", c: 2, d: 0}
|
24
|
+
data[1].should == {a: "Lucy", b: "Laweless", d: 5, e: 0}
|
25
|
+
data[2].should == {a: "Miles", b: "O'Brian", c: 0, d: 0, e: 0, f: 21}
|
26
|
+
data[3].should == {a: "Nancy", b: "Homes", c: 2, d: 0, e: 1}
|
27
|
+
data[4].should == {a: "Hernán", b: "Curaçon", c: 3, d: 0, e: 0}
|
28
|
+
end
|
24
29
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tilo Sloboda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
11
|
+
date: 2022-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -112,6 +112,7 @@ files:
|
|
112
112
|
- spec/smarter_csv/close_file_spec.rb
|
113
113
|
- spec/smarter_csv/column_separator_spec.rb
|
114
114
|
- spec/smarter_csv/convert_values_to_numeric_spec.rb
|
115
|
+
- spec/smarter_csv/duplicate_headers_spec.rb
|
115
116
|
- spec/smarter_csv/empty_columns_spec.rb
|
116
117
|
- spec/smarter_csv/extenstions_spec.rb
|
117
118
|
- spec/smarter_csv/hard_sample_spec.rb
|
@@ -218,6 +219,7 @@ test_files:
|
|
218
219
|
- spec/smarter_csv/close_file_spec.rb
|
219
220
|
- spec/smarter_csv/column_separator_spec.rb
|
220
221
|
- spec/smarter_csv/convert_values_to_numeric_spec.rb
|
222
|
+
- spec/smarter_csv/duplicate_headers_spec.rb
|
221
223
|
- spec/smarter_csv/empty_columns_spec.rb
|
222
224
|
- spec/smarter_csv/extenstions_spec.rb
|
223
225
|
- spec/smarter_csv/hard_sample_spec.rb
|