smarter_csv 1.4.0 → 1.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,116 +5,37 @@ module SmarterCSV
5
5
  class DuplicateHeaders < SmarterCSVException; end
6
6
  class MissingHeaders < SmarterCSVException; end
7
7
  class NoColSepDetected < SmarterCSVException; end
8
+ class KeyMappingError < SmarterCSVException; end
8
9
 
9
- def SmarterCSV.process(input, options={}, &block) # first parameter: filename or input object with readline method
10
- default_options = {:col_sep => ',', :row_sep => $INPUT_RECORD_SEPARATOR, :quote_char => '"', :force_simple_split => false , :verbose => false ,
11
- :remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
12
- :convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
13
- :comment_regexp => /\A#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
14
- :remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => '',
15
- :auto_row_sep_chars => 500, :required_headers => nil
16
- }
10
+ # first parameter: filename or input object which responds to readline method
11
+ def SmarterCSV.process(input, options={}, &block)
17
12
  options = default_options.merge(options)
18
13
  options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
19
- csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
14
+
20
15
  headerA = []
21
16
  result = []
22
- old_row_sep = $INPUT_RECORD_SEPARATOR
23
- file_line_count = 0
24
- csv_line_count = 0
17
+ @file_line_count = 0
18
+ @csv_line_count = 0
25
19
  has_rails = !! defined?(Rails)
26
20
  begin
27
21
  f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
28
22
 
23
+ # auto-detect the row separator
24
+ options[:row_sep] = SmarterCSV.guess_line_ending(f, options) if options[:row_sep].to_sym == :auto
29
25
  # attempt to auto-detect column separator
30
- options[:col_sep] = guess_column_separator(f) if options[:col_sep] == 'auto'
26
+ options[:col_sep] = guess_column_separator(f, options) if options[:col_sep].to_sym == :auto
27
+ # preserve options, in case we need to call the CSV class
28
+ csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
29
+ csv_options.delete(:row_sep) if [nil, :auto].include?( options[:row_sep].to_sym )
30
+ csv_options.delete(:col_sep) if [nil, :auto].include?( options[:col_sep].to_sym )
31
31
 
32
32
  if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && ( f.respond_to?(:external_encoding) && f.external_encoding != Encoding.find('UTF-8') || f.respond_to?(:encoding) && f.encoding != Encoding.find('UTF-8') )
33
33
  puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
34
34
  end
35
35
 
36
- if options[:row_sep] == :auto
37
- options[:row_sep] = line_ending = SmarterCSV.guess_line_ending( f, options )
38
- f.rewind
39
- end
40
- $INPUT_RECORD_SEPARATOR = options[:row_sep]
41
-
42
- if options[:skip_lines].to_i > 0
43
- options[:skip_lines].to_i.times{f.readline}
44
- end
36
+ options[:skip_lines].to_i.times{f.readline(options[:row_sep])} if options[:skip_lines].to_i > 0
45
37
 
46
- if options[:headers_in_file] # extract the header line
47
- # process the header line in the CSV file..
48
- # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
49
- header = f.readline
50
- header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
51
- header = header.sub(options[:comment_regexp],'').chomp(options[:row_sep])
52
-
53
- file_line_count += 1
54
- csv_line_count += 1
55
- header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
56
-
57
- if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
58
- file_headerA = begin
59
- CSV.parse( header, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
60
- rescue CSV::MalformedCSVError => e
61
- raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
62
- end
63
- else
64
- file_headerA = header.split(options[:col_sep])
65
- end
66
- file_header_size = file_headerA.size # before mapping, which could delete keys
67
-
68
- file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
69
- file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
70
- unless options[:keep_original_headers]
71
- file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
72
- file_headerA.map!{|x| x.downcase } if options[:downcase_header]
73
- end
74
- else
75
- raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" if options[:user_provided_headers].nil?
76
- end
77
- if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
78
- # use user-provided headers
79
- headerA = options[:user_provided_headers]
80
- if defined?(file_header_size) && ! file_header_size.nil?
81
- if headerA.size != file_header_size
82
- raise SmarterCSV::HeaderSizeMismatch , "ERROR: :user_provided_headers defines #{headerA.size} headers != CSV-file #{input} has #{file_header_size} headers"
83
- else
84
- # we could print out the mapping of file_headerA to headerA here
85
- end
86
- end
87
- else
88
- headerA = file_headerA
89
- end
90
- header_size = headerA.size
91
-
92
- headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
93
-
94
- unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
95
- key_mappingH = options[:key_mapping]
96
-
97
- # do some key mapping on the keys in the file header
98
- # if you want to completely delete a key, then map it to nil or to ''
99
- if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
100
- headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
101
- end
102
- end
103
-
104
- # header_validations
105
- duplicate_headers = []
106
- headerA.compact.each do |k|
107
- duplicate_headers << k if headerA.select{|x| x == k}.size > 1
108
- end
109
- raise SmarterCSV::DuplicateHeaders , "ERROR: duplicate headers: #{duplicate_headers.join(',')}" unless duplicate_headers.empty?
110
-
111
- if options[:required_headers] && options[:required_headers].is_a?(Array)
112
- missing_headers = []
113
- options[:required_headers].each do |k|
114
- missing_headers << k unless headerA.include?(k)
115
- end
116
- raise SmarterCSV::MissingHeaders , "ERROR: missing headers: #{missing_headers.join(',')}" unless missing_headers.empty?
117
- end
38
+ headerA, header_size = process_headers(f, options, csv_options)
118
39
 
119
40
  # in case we use chunking.. we'll need to set it up..
120
41
  if ! options[:chunk_size].nil? && options[:chunk_size].to_i > 0
@@ -128,41 +49,42 @@ module SmarterCSV
128
49
 
129
50
  # now on to processing all the rest of the lines in the CSV file:
130
51
  while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
131
- line = f.readline # read one line.. this uses the input_record_separator $INPUT_RECORD_SEPARATOR which we set previously!
52
+ line = f.readline(options[:row_sep]) # read one line
53
+ @file_line_count += 1
54
+ @csv_line_count += 1
132
55
 
133
56
  # replace invalid byte sequence in UTF-8 with question mark to avoid errors
134
57
  line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
135
58
 
136
- file_line_count += 1
137
- csv_line_count += 1
138
- print "processing file line %10d, csv line %10d\r" % [file_line_count, csv_line_count] if options[:verbose]
139
- next if line =~ options[:comment_regexp] # ignore all comment lines if there are any
59
+ print "processing file line %10d, csv line %10d\r" % [@file_line_count, @csv_line_count] if options[:verbose]
60
+
61
+ next if options[:comment_regexp] && line =~ options[:comment_regexp] # ignore all comment lines if there are any
140
62
 
141
63
  # cater for the quoted csv data containing the row separator carriage return character
142
64
  # in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
143
65
  # by detecting the existence of an uneven number of quote characters
144
- multiline = line.count(options[:quote_char])%2 == 1
145
- while line.count(options[:quote_char])%2 == 1
146
- next_line = f.readline
66
+ multiline = line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
67
+ while line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
68
+ next_line = f.readline(options[:row_sep])
147
69
  next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
148
70
  line += next_line
149
- file_line_count += 1
71
+ @file_line_count += 1
150
72
  end
151
- print "\nline contains uneven number of quote chars so including content through file line %d\n" % file_line_count if options[:verbose] && multiline
73
+ print "\nline contains uneven number of quote chars so including content through file line %d\n" % @file_line_count if options[:verbose] && multiline
152
74
 
153
- line.chomp! # will use $INPUT_RECORD_SEPARATOR which is set to options[:col_sep]
75
+ line.chomp!(options[:row_sep])
154
76
 
155
77
  if (line =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
156
78
  dataA = begin
157
79
  CSV.parse( line, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
158
80
  rescue CSV::MalformedCSVError => e
159
- raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
81
+ raise $!, "#{$!} [SmarterCSV: csv line #{@csv_line_count}]", $!.backtrace
160
82
  end
161
83
  else
162
- dataA = line.split(options[:col_sep], header_size)
84
+ dataA = line.split(options[:col_sep], header_size)
163
85
  end
164
- #### dataA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') } # this is actually not a good idea as a default
165
- dataA.map!{|x| x.strip} if options[:strip_whitespace]
86
+ dataA.map!{|x| x.sub(/(#{options[:col_sep]})+\z/, '')} # remove any unwanted trailing col_sep characters at the end
87
+ dataA.map!{|x| x.strip} if options[:strip_whitespace]
166
88
 
167
89
  # if all values are blank, then ignore this line
168
90
  # SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
@@ -257,7 +179,6 @@ module SmarterCSV
257
179
  chunk = [] # initialize for next chunk of data
258
180
  end
259
181
  ensure
260
- $INPUT_RECORD_SEPARATOR = old_row_sep # make sure this stupid global variable is always reset to it's previous value after we're done!
261
182
  f.close if f.respond_to?(:close)
262
183
  end
263
184
  if block_given?
@@ -269,6 +190,40 @@ module SmarterCSV
269
190
 
270
191
  private
271
192
 
193
+ def self.default_options
194
+ {
195
+ auto_row_sep_chars: 500,
196
+ chunk_size: nil ,
197
+ col_sep: ',',
198
+ comment_regexp: nil, # was: /\A#/,
199
+ convert_values_to_numeric: true,
200
+ downcase_header: true,
201
+ duplicate_header_suffix: nil,
202
+ file_encoding: 'utf-8',
203
+ force_simple_split: false ,
204
+ force_utf8: false,
205
+ headers_in_file: true,
206
+ invalid_byte_sequence: '',
207
+ keep_original_headers: false,
208
+ key_mapping_hash: nil ,
209
+ quote_char: '"',
210
+ remove_empty_hashes: true ,
211
+ remove_empty_values: true,
212
+ remove_unmapped_keys: false,
213
+ remove_values_matching: nil,
214
+ remove_zero_values: false,
215
+ required_headers: nil,
216
+ row_sep: $INPUT_RECORD_SEPARATOR,
217
+ skip_lines: nil,
218
+ strings_as_keys: false,
219
+ strip_chars_from_headers: nil,
220
+ strip_whitespace: true,
221
+ user_provided_headers: nil,
222
+ value_converters: nil,
223
+ verbose: false,
224
+ }
225
+ end
226
+
272
227
  def self.blank?(value)
273
228
  case value
274
229
  when Array
@@ -304,11 +259,11 @@ module SmarterCSV
304
259
  end
305
260
 
306
261
  # raise exception if none is found
307
- def self.guess_column_separator(filehandle)
262
+ def self.guess_column_separator(filehandle, options)
308
263
  del = [',', "\t", ';', ':', '|']
309
264
  n = Hash.new(0)
310
265
  5.times do
311
- line = filehandle.readline
266
+ line = filehandle.readline(options[:row_sep])
312
267
  del.each do |d|
313
268
  n[d] += line.scan(d).count
314
269
  end
@@ -347,9 +302,110 @@ module SmarterCSV
347
302
  lines += 1
348
303
  break if options[:auto_row_sep_chars] && options[:auto_row_sep_chars] > 0 && lines >= options[:auto_row_sep_chars]
349
304
  end
305
+ filehandle.rewind
306
+
350
307
  counts["\r"] += 1 if last_char == "\r"
351
308
  # find the key/value pair with the largest counter:
352
309
  k,_ = counts.max_by{|_,v| v}
353
310
  return k # the most frequent one is it
354
311
  end
312
+
313
+ def self.process_headers(filehandle, options, csv_options)
314
+ if options[:headers_in_file] # extract the header line
315
+ # process the header line in the CSV file..
316
+ # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
317
+ header = filehandle.readline(options[:row_sep])
318
+ @file_line_count += 1
319
+ @csv_line_count += 1
320
+
321
+ header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
322
+ header = header.sub(options[:comment_regexp],'') if options[:comment_regexp]
323
+ header = header.chomp(options[:row_sep])
324
+
325
+ header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
326
+
327
+ if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
328
+ file_headerA = begin
329
+ CSV.parse( header, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
330
+ rescue CSV::MalformedCSVError => e
331
+ raise $!, "#{$!} [SmarterCSV: csv line #{@csv_line_count}]", $!.backtrace
332
+ end
333
+ else
334
+ file_headerA = header.split(options[:col_sep])
335
+ end
336
+ file_header_size = file_headerA.size # before mapping, which could delete keys
337
+
338
+ file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
339
+ file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
340
+ unless options[:keep_original_headers]
341
+ file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
342
+ file_headerA.map!{|x| x.downcase } if options[:downcase_header]
343
+ end
344
+ else
345
+ raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" unless options[:user_provided_headers]
346
+ end
347
+ if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
348
+ # use user-provided headers
349
+ headerA = options[:user_provided_headers]
350
+ if defined?(file_header_size) && ! file_header_size.nil?
351
+ if headerA.size != file_header_size
352
+ raise SmarterCSV::HeaderSizeMismatch , "ERROR: :user_provided_headers defines #{headerA.size} headers != CSV-file #{input} has #{file_header_size} headers"
353
+ else
354
+ # we could print out the mapping of file_headerA to headerA here
355
+ end
356
+ end
357
+ else
358
+ headerA = file_headerA
359
+ end
360
+
361
+ # detect duplicate headers and disambiguate
362
+ headerA = process_duplicate_headers(headerA, options) if options[:duplicate_header_suffix]
363
+ header_size = headerA.size # used for splitting lines
364
+
365
+ headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
366
+
367
+ unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
368
+ key_mappingH = options[:key_mapping]
369
+
370
+ # do some key mapping on the keys in the file header
371
+ # if you want to completely delete a key, then map it to nil or to ''
372
+ if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
373
+ # we can't map keys that are not there
374
+ raise SmarterCSV::KeyMappingError unless (key_mappingH.keys - headerA).empty?
375
+
376
+ headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
377
+ end
378
+ end
379
+
380
+ # header_validations
381
+ duplicate_headers = []
382
+ headerA.compact.each do |k|
383
+ duplicate_headers << k if headerA.select{|x| x == k}.size > 1
384
+ end
385
+ raise SmarterCSV::DuplicateHeaders , "ERROR: duplicate headers: #{duplicate_headers.join(',')}" unless duplicate_headers.empty?
386
+
387
+ if options[:required_headers] && options[:required_headers].is_a?(Array)
388
+ missing_headers = []
389
+ options[:required_headers].each do |k|
390
+ missing_headers << k unless headerA.include?(k)
391
+ end
392
+ raise SmarterCSV::MissingHeaders , "ERROR: missing headers: #{missing_headers.join(',')}" unless missing_headers.empty?
393
+ end
394
+
395
+ [headerA, header_size]
396
+ end
397
+
398
+ def self.process_duplicate_headers(headers, options)
399
+ counts = Hash.new(0)
400
+ result = []
401
+ headers.each do |key|
402
+ counts[key] += 1
403
+ if counts[key] == 1
404
+ result << key
405
+ else
406
+ result << [key, options[:duplicate_header_suffix], counts[key]].join
407
+ end
408
+ end
409
+ result
410
+ end
355
411
  end
@@ -1,3 +1,3 @@
1
1
  module SmarterCSV
2
- VERSION = "1.4.0"
2
+ VERSION = "1.5.1"
3
3
  end
data/lib/smarter_csv.rb CHANGED
@@ -1,3 +1,11 @@
1
+ if ENV['COVERAGE']
2
+ require 'simplecov'
3
+ SimpleCov.start do
4
+ add_filter "/spec/"
5
+ add_filter "/pkg/"
6
+ end
7
+ end
8
+
1
9
  require 'csv'
2
10
  require "smarter_csv/version"
3
11
  require "extensions/hash.rb"
data/smarter_csv.gemspec CHANGED
@@ -18,6 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.require_paths = ["lib"]
19
19
  spec.requirements = ['csv'] # for CSV.parse() only needed in case we have quoted fields
20
20
  spec.add_development_dependency "rspec"
21
+ spec.add_development_dependency "simplecov"
21
22
  # spec.add_development_dependency "guard-rspec"
22
23
 
23
24
  spec.metadata["homepage_uri"] = spec.homepage
@@ -0,0 +1,6 @@
1
+ col1,col2
2
+ eins,zwei
3
+ uno,dos,
4
+ one,two ,,,
5
+ ichi, ,,,,,
6
+ un
@@ -1,3 +1,3 @@
1
1
  email,firstname,lastname,email,age
2
2
  tom@bla.com,Tom,Sawyer,mike@bla.com,34
3
- eri@bla.com,Eri Chan,tom@bla.com,21
3
+ eri@bla.com,Eri,Chan,tom@bla.com,21
@@ -0,0 +1,2 @@
1
+ Name,Email,Financial Status,Paid at,Fulfillment Status,Fulfilled at,Accepts Marketing,Currency,Subtotal,Shipping,Taxes,Total,Discount Code,Discount Amount,Shipping Method,Created at,Lineitem quantity,Lineitem name,Lineitem price,Lineitem compare at price,Lineitem sku,Lineitem requires shipping,Lineitem taxable,Lineitem fulfillment status,Billing Name,Billing Street,Billing Address1,Billing Address2,Billing Company,Billing City,Billing Zip,Billing Province,Billing Country,Billing Phone,Shipping Name,Shipping Street,Shipping Address1,Shipping Address2,Shipping Company,Shipping City,Shipping Zip,Shipping Province,Shipping Country,Shipping Phone,Notes,Note Attributes,Cancelled at,Payment Method,Payment Reference,Refunded Amount,Vendor, rece,Tags,Risk Level,Source,Lineitem discount,Tax 1 Name,Tax 1 Value,Tax 2 Name,Tax 2 Value,Tax 3 Name,Tax 3 Value,Tax 4 Name,Tax 4 Value,Tax 5 Name,Tax 5 Value,Phone,Receipt Number,Duties,Billing Province Name,Shipping Province Name,Payment ID,Payment Terms Name,Next Payment Due At
2
+ #MR1220817,foo@bar.com,paid,2022-02-08 22:31:28 +0100,unfulfilled,,yes,EUR,144,0,24,144,VIP,119.6,"Livraison Standard GRATUITE, 2-5 jours avec suivi",2022-02-08 22:31:26 +0100,2,Cire Épilation Nacrée,37,,WAX-200-NAC,true,true,pending,French Fry,64 Boulevard Budgié,64 Boulevard Budgié,,,dootdoot’,'49100,,FR,06 12 34 56 78,French Fry,64 Boulevard Budgi,64 Boulevard Budgié,,,dootdoot,'49100,,FR,06 12 34 56 78,,,,Stripe,c23800013619353.2,0,Goober Rég,4331065802905,902,Low,web,0,FR TVA 20%,24,,,,,,,,,3366012111111,,,,,,,
@@ -0,0 +1,45 @@
1
+ require 'spec_helper'
2
+
3
+ fixture_path = 'spec/fixtures'
4
+
5
+ describe 'handling of additional trailing column separators' do
6
+ let(:file) { "#{fixture_path}/additional_separator.csv" }
7
+
8
+ describe '' do
9
+ let(:data) { SmarterCSV.process(file) }
10
+
11
+ it 'reads all lines' do
12
+ data.size.should eq 5
13
+ end
14
+
15
+ it 'reads regular lines' do
16
+ item = data[0]
17
+ item[:col1].should == 'eins'
18
+ item[:col2].should == 'zwei'
19
+ end
20
+
21
+ it 'strips single trailing col_sep character' do
22
+ item = data[1]
23
+ item[:col1].should == 'uno'
24
+ item[:col2].should == 'dos'
25
+ end
26
+
27
+ it 'strips multiple trailing col_sep characters' do
28
+ item = data[2]
29
+ item[:col1].should == 'one'
30
+ item[:col2].should == 'two'
31
+ end
32
+
33
+ it 'strips multiple trailing col_sep chars' do
34
+ item = data[3]
35
+ item[:col1].should == 'ichi'
36
+ item[:col2].should == nil
37
+ end
38
+
39
+ it 'strips multiple trailing col_sep chars' do
40
+ item = data[4]
41
+ item[:col1].should == 'un'
42
+ item[:col2].should == nil
43
+ end
44
+ end
45
+ end
@@ -12,7 +12,7 @@ describe 'be_able_to' do
12
12
  it 'loads_binary_file_with_strings_as_keys' do
13
13
  options = {:col_sep => "\cA", :row_sep => "\cB", :comment_regexp => /^#/, :strings_as_keys => true}
14
14
  data = SmarterCSV.process("#{fixture_path}/binary.csv", options)
15
- data.flatten.size.should == 8
15
+ data.size.should == 8
16
16
  data.each do |item|
17
17
  # all keys should be strings
18
18
  item.keys.each{|x| x.class.should be == String}
@@ -3,7 +3,6 @@ require 'spec_helper'
3
3
  fixture_path = 'spec/fixtures'
4
4
 
5
5
  describe 'process files with line endings explicitly pre-specified' do
6
-
7
6
  it 'should process a file with \n for line endings and within data fields' do
8
7
  sep = "\n"
9
8
  options = {:row_sep => sep}
@@ -83,14 +82,14 @@ describe 'process files with line endings explicitly pre-specified' do
83
82
  data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
84
83
  data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
85
84
  end
86
-
87
85
  end
88
86
 
89
87
  describe 'process files with line endings in automatic mode' do
88
+ let(:options) { { row_sep: :auto } }
90
89
 
91
90
  it 'should process a file with \n for line endings and within data fields' do
92
91
  sep = "\n"
93
- data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv", {:row_sep => :auto})
92
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv", options)
94
93
  data.flatten.size.should == 8
95
94
  data[0][:name].should == "Anfield"
96
95
  data[0][:street].should == "Anfield Road"
@@ -112,7 +111,29 @@ describe 'process files with line endings in automatic mode' do
112
111
 
113
112
  it 'should process a file with \r for line endings and within data fields' do
114
113
  sep = "\r"
115
- data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", {:row_sep => :auto})
114
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", options)
115
+ data.flatten.size.should == 8
116
+ data[0][:name].should == "Anfield"
117
+ data[0][:street].should == "Anfield Road"
118
+ data[0][:city].should == "Liverpool"
119
+ data[1][:name].should == ["Highbury", "Highbury House"].join(sep)
120
+ data[2][:street].should == ["Sir Matt ", "Busby Way"].join(sep)
121
+ data[3][:city].should == ["Newcastle-upon-tyne ", "Tyne and Wear"].join(sep)
122
+ data[4][:name].should == ["White Hart Lane", "(The Lane)"].join(sep)
123
+ data[4][:street].should == ["Bill Nicholson Way ", "748 High Rd"].join(sep)
124
+ data[4][:city].should == ["Tottenham", "London"].join(sep)
125
+ data[5][:name].should == "Stamford Bridge"
126
+ data[5][:street].should == ["Fulham Road", "London"].join(sep)
127
+ data[5][:city].should be_nil
128
+ data[6][:name].should == ["Etihad Stadium", "Rowsley St", "Manchester"].join(sep)
129
+ data[7][:name].should == "Goodison"
130
+ data[7][:street].should == "Goodison Road"
131
+ data[7][:city].should == "Liverpool"
132
+ end
133
+
134
+ it 'also works when auto is given a string' do
135
+ sep = "\r"
136
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", {row_sep: 'auto'})
116
137
  data.flatten.size.should == 8
117
138
  data[0][:name].should == "Anfield"
118
139
  data[0][:street].should == "Anfield Road"
@@ -134,7 +155,7 @@ describe 'process files with line endings in automatic mode' do
134
155
 
135
156
  it 'should process a file with \r\n for line endings and within data fields' do
136
157
  sep = "\r\n"
137
- data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv", {:row_sep => :auto})
158
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv", options)
138
159
  data.flatten.size.should == 8
139
160
  data[0][:name].should == "Anfield"
140
161
  data[0][:street].should == "Anfield Road"
@@ -157,7 +178,7 @@ describe 'process files with line endings in automatic mode' do
157
178
  it 'should process a file with more quoted text carriage return characters (\r) than line ending characters (\n)' do
158
179
  row_sep = "\n"
159
180
  text_sep = "\r"
160
- data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", {:row_sep => :auto})
181
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", options)
161
182
  data.flatten.size.should == 2
162
183
  data[0][:band].should == "New Order"
163
184
  data[0][:members].should == ["Bernard Sumner", "Peter Hook", "Stephen Morris", "Gillian Gilbert"].join(text_sep)
@@ -166,5 +187,4 @@ describe 'process files with line endings in automatic mode' do
166
187
  data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
167
188
  data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
168
189
  end
169
-
170
190
  end
@@ -48,7 +48,7 @@ describe 'can handle col_sep' do
48
48
  end
49
49
 
50
50
  describe 'auto-detection of separator' do
51
- options = {:col_sep => 'auto'}
51
+ options = {col_sep: :auto}
52
52
 
53
53
  it 'auto-detects comma separator and loads data' do
54
54
  data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
@@ -85,5 +85,11 @@ describe 'can handle col_sep' do
85
85
  SmarterCSV.process("#{fixture_path}/binary.csv", options)
86
86
  }.to raise_exception SmarterCSV::NoColSepDetected
87
87
  end
88
+
89
+ it 'also works when auto is given a string' do
90
+ data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", col_sep: 'auto')
91
+ data.first.keys.size.should == 4
92
+ data.size.should eq 3
93
+ end
88
94
  end
89
95
  end
@@ -0,0 +1,76 @@
1
+ require 'spec_helper'
2
+
3
+ fixture_path = 'spec/fixtures'
4
+
5
+ describe 'duplicate headers' do
6
+ describe 'without special handling / default behavior' do
7
+ it 'raises error on duplicate headers' do
8
+ expect {
9
+ SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", {})
10
+ }.to raise_exception(SmarterCSV::DuplicateHeaders)
11
+ end
12
+
13
+ it 'raises error on duplicate given headers' do
14
+ expect {
15
+ options = {:user_provided_headers => [:a,:b,:c,:d,:a]}
16
+ SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
17
+ }.to raise_exception(SmarterCSV::DuplicateHeaders)
18
+ end
19
+
20
+ it 'raises error on missing mapped headers' do
21
+ expect {
22
+ # the mapping is right, but the underlying csv file is bad
23
+ options = {:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :manager_email => :d, :age => :e} }
24
+ SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
25
+ }.to raise_exception(SmarterCSV::KeyMappingError)
26
+ end
27
+ end
28
+
29
+ describe 'with special handling' do
30
+ context 'with given suffix' do
31
+ let(:options) { {duplicate_header_suffix: '_'} }
32
+
33
+ it 'reads whole file' do
34
+ data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
35
+ expect(data.size).to eq 2
36
+ end
37
+
38
+ it 'generates the correct keys' do
39
+ data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
40
+ expect(data.first.keys).to eq [:email, :firstname, :lastname, :email_2, :age]
41
+ end
42
+
43
+ it 'enumerates when duplicate headers are given' do
44
+ options.merge!({:user_provided_headers => [:a,:b,:c,:a,:a]})
45
+ data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
46
+ expect(data.first.keys).to eq [:a, :b, :c, :a_2, :a_3]
47
+ end
48
+
49
+ it 'can remap duplicated headers' do
50
+ options.merge!({:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :email_2 => :d, :age => :e}})
51
+ data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
52
+ expect(data.first).to eq({a: 'tom@bla.com', b: 'Tom', c: 'Sawyer', d: 'mike@bla.com', e: 34})
53
+ end
54
+ end
55
+
56
+ context 'with empty suffix' do
57
+ let(:options) { {duplicate_header_suffix: ''} }
58
+
59
+ it 'reads whole file' do
60
+ data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
61
+ expect(data.size).to eq 2
62
+ end
63
+
64
+ it 'generates the correct keys' do
65
+ data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
66
+ expect(data.first.keys).to eq [:email, :firstname, :lastname, :email2, :age]
67
+ end
68
+
69
+ it 'enumerates when duplicate headers are given' do
70
+ options.merge!({:user_provided_headers => [:a,:b,:c,:a,:a]})
71
+ data = SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
72
+ expect(data.first.keys).to eq [:a, :b, :c, :a2, :a3]
73
+ end
74
+ end
75
+ end
76
+ end