smarter_csv 1.6.1 → 1.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +133 -0
  3. data/CHANGELOG.md +22 -1
  4. data/CONTRIBUTORS.md +3 -0
  5. data/Gemfile +7 -4
  6. data/README.md +8 -6
  7. data/Rakefile +15 -13
  8. data/ext/smarter_csv/extconf.rb +14 -0
  9. data/ext/smarter_csv/smarter_csv.c +86 -0
  10. data/lib/extensions/hash.rb +4 -2
  11. data/lib/smarter_csv/version.rb +3 -1
  12. data/lib/smarter_csv.rb +519 -10
  13. data/smarter_csv.gemspec +22 -7
  14. metadata +54 -176
  15. data/.gitignore +0 -10
  16. data/.rspec +0 -2
  17. data/.travis.yml +0 -27
  18. data/lib/smarter_csv/smarter_csv.rb +0 -461
  19. data/spec/fixtures/additional_separator.csv +0 -6
  20. data/spec/fixtures/basic.csv +0 -8
  21. data/spec/fixtures/binary.csv +0 -1
  22. data/spec/fixtures/carriage_returns_n.csv +0 -18
  23. data/spec/fixtures/carriage_returns_quoted.csv +0 -3
  24. data/spec/fixtures/carriage_returns_r.csv +0 -1
  25. data/spec/fixtures/carriage_returns_rn.csv +0 -18
  26. data/spec/fixtures/chunk_cornercase.csv +0 -10
  27. data/spec/fixtures/duplicate_headers.csv +0 -3
  28. data/spec/fixtures/empty.csv +0 -5
  29. data/spec/fixtures/empty_columns_1.csv +0 -2
  30. data/spec/fixtures/empty_columns_2.csv +0 -2
  31. data/spec/fixtures/hard_sample.csv +0 -2
  32. data/spec/fixtures/ignore_comments.csv +0 -11
  33. data/spec/fixtures/ignore_comments2.csv +0 -3
  34. data/spec/fixtures/key_mapping.csv +0 -2
  35. data/spec/fixtures/line_endings_n.csv +0 -4
  36. data/spec/fixtures/line_endings_r.csv +0 -1
  37. data/spec/fixtures/line_endings_rn.csv +0 -4
  38. data/spec/fixtures/lots_of_columns.csv +0 -2
  39. data/spec/fixtures/malformed.csv +0 -3
  40. data/spec/fixtures/malformed_header.csv +0 -3
  41. data/spec/fixtures/money.csv +0 -3
  42. data/spec/fixtures/no_header.csv +0 -7
  43. data/spec/fixtures/numeric.csv +0 -5
  44. data/spec/fixtures/pets.csv +0 -5
  45. data/spec/fixtures/problematic.csv +0 -8
  46. data/spec/fixtures/quote_char.csv +0 -9
  47. data/spec/fixtures/quoted.csv +0 -5
  48. data/spec/fixtures/quoted2.csv +0 -4
  49. data/spec/fixtures/separator_colon.csv +0 -4
  50. data/spec/fixtures/separator_comma.csv +0 -4
  51. data/spec/fixtures/separator_pipe.csv +0 -4
  52. data/spec/fixtures/separator_semi.csv +0 -4
  53. data/spec/fixtures/separator_tab.csv +0 -4
  54. data/spec/fixtures/skip_lines.csv +0 -8
  55. data/spec/fixtures/trading.csv +0 -3
  56. data/spec/fixtures/user_import.csv +0 -3
  57. data/spec/fixtures/valid_unicode.csv +0 -5
  58. data/spec/fixtures/with_dashes.csv +0 -8
  59. data/spec/fixtures/with_dates.csv +0 -4
  60. data/spec/smarter_csv/additional_separator_spec.rb +0 -45
  61. data/spec/smarter_csv/binary_file2_spec.rb +0 -24
  62. data/spec/smarter_csv/binary_file_spec.rb +0 -22
  63. data/spec/smarter_csv/blank_spec.rb +0 -55
  64. data/spec/smarter_csv/carriage_return_spec.rb +0 -190
  65. data/spec/smarter_csv/chunked_reading_spec.rb +0 -14
  66. data/spec/smarter_csv/close_file_spec.rb +0 -15
  67. data/spec/smarter_csv/column_separator_spec.rb +0 -95
  68. data/spec/smarter_csv/convert_values_to_numeric_spec.rb +0 -48
  69. data/spec/smarter_csv/duplicate_headers_spec.rb +0 -76
  70. data/spec/smarter_csv/empty_columns_spec.rb +0 -74
  71. data/spec/smarter_csv/extenstions_spec.rb +0 -17
  72. data/spec/smarter_csv/hard_sample_spec.rb +0 -24
  73. data/spec/smarter_csv/header_transformation_spec.rb +0 -21
  74. data/spec/smarter_csv/ignore_comments_spec.rb +0 -45
  75. data/spec/smarter_csv/invalid_headers_spec.rb +0 -38
  76. data/spec/smarter_csv/keep_headers_spec.rb +0 -24
  77. data/spec/smarter_csv/key_mapping_spec.rb +0 -56
  78. data/spec/smarter_csv/line_ending_spec.rb +0 -43
  79. data/spec/smarter_csv/load_basic_spec.rb +0 -20
  80. data/spec/smarter_csv/malformed_spec.rb +0 -25
  81. data/spec/smarter_csv/no_header_spec.rb +0 -29
  82. data/spec/smarter_csv/not_downcase_header_spec.rb +0 -24
  83. data/spec/smarter_csv/parse/column_separator_spec.rb +0 -61
  84. data/spec/smarter_csv/parse/old_csv_library_spec.rb +0 -74
  85. data/spec/smarter_csv/parse/rfc4180_and_more_spec.rb +0 -170
  86. data/spec/smarter_csv/problematic.rb +0 -34
  87. data/spec/smarter_csv/quoted_spec.rb +0 -52
  88. data/spec/smarter_csv/remove_empty_values_spec.rb +0 -13
  89. data/spec/smarter_csv/remove_keys_from_hashes_spec.rb +0 -25
  90. data/spec/smarter_csv/remove_not_mapped_keys_spec.rb +0 -35
  91. data/spec/smarter_csv/remove_values_matching_spec.rb +0 -26
  92. data/spec/smarter_csv/remove_zero_values_spec.rb +0 -25
  93. data/spec/smarter_csv/skip_lines_spec.rb +0 -29
  94. data/spec/smarter_csv/strings_as_keys_spec.rb +0 -24
  95. data/spec/smarter_csv/strip_chars_from_headers_spec.rb +0 -24
  96. data/spec/smarter_csv/trading_spec.rb +0 -25
  97. data/spec/smarter_csv/valid_unicode_spec.rb +0 -94
  98. data/spec/smarter_csv/value_converters_spec.rb +0 -52
  99. data/spec/spec/spec_helper.rb +0 -17
  100. data/spec/spec.opts +0 -2
  101. data/spec/spec_helper.rb +0 -21
@@ -1,461 +0,0 @@
1
- module SmarterCSV
2
- class SmarterCSVException < StandardError; end
3
- class HeaderSizeMismatch < SmarterCSVException; end
4
- class IncorrectOption < SmarterCSVException; end
5
- class DuplicateHeaders < SmarterCSVException; end
6
- class MissingHeaders < SmarterCSVException; end
7
- class NoColSepDetected < SmarterCSVException; end
8
- class KeyMappingError < SmarterCSVException; end
9
- class MalformedCSVError < SmarterCSVException; end
10
-
11
- # first parameter: filename or input object which responds to readline method
12
- def SmarterCSV.process(input, options={}, &block)
13
- options = default_options.merge(options)
14
- options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
15
-
16
- headerA = []
17
- result = []
18
- @file_line_count = 0
19
- @csv_line_count = 0
20
- has_rails = !! defined?(Rails)
21
- begin
22
- fh = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
23
-
24
- # auto-detect the row separator
25
- options[:row_sep] = SmarterCSV.guess_line_ending(fh, options) if options[:row_sep].to_sym == :auto
26
- # attempt to auto-detect column separator
27
- options[:col_sep] = guess_column_separator(fh, options) if options[:col_sep].to_sym == :auto
28
-
29
- if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && ( fh.respond_to?(:external_encoding) && fh.external_encoding != Encoding.find('UTF-8') || fh.respond_to?(:encoding) && fh.encoding != Encoding.find('UTF-8') )
30
- puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
31
- end
32
-
33
- if options[:skip_lines].to_i > 0
34
- options[:skip_lines].to_i.times do
35
- readline_with_counts(fh, options)
36
- end
37
- end
38
-
39
- headerA, header_size = process_headers(fh, options)
40
-
41
- # in case we use chunking.. we'll need to set it up..
42
- if ! options[:chunk_size].nil? && options[:chunk_size].to_i > 0
43
- use_chunks = true
44
- chunk_size = options[:chunk_size].to_i
45
- chunk_count = 0
46
- chunk = []
47
- else
48
- use_chunks = false
49
- end
50
-
51
- # now on to processing all the rest of the lines in the CSV file:
52
- while ! fh.eof? # we can't use fh.readlines() here, because this would read the whole file into memory at once, and eof => true
53
- line = readline_with_counts(fh, options)
54
-
55
- # replace invalid byte sequence in UTF-8 with question mark to avoid errors
56
- line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
57
-
58
- print "processing file line %10d, csv line %10d\r" % [@file_line_count, @csv_line_count] if options[:verbose]
59
-
60
- next if options[:comment_regexp] && line =~ options[:comment_regexp] # ignore all comment lines if there are any
61
-
62
- # cater for the quoted csv data containing the row separator carriage return character
63
- # in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
64
- # by detecting the existence of an uneven number of quote characters
65
-
66
- multiline = line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
67
- while line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
68
- next_line = fh.readline(options[:row_sep])
69
- next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
70
- line += next_line
71
- @file_line_count += 1
72
- end
73
- print "\nline contains uneven number of quote chars so including content through file line %d\n" % @file_line_count if options[:verbose] && multiline
74
-
75
- line.chomp!(options[:row_sep])
76
-
77
- dataA, data_size = parse(line, options, header_size)
78
-
79
- dataA.map!{|x| x.strip} if options[:strip_whitespace]
80
-
81
- # if all values are blank, then ignore this line
82
- # SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
83
- next if options[:remove_empty_hashes] && blank?(dataA)
84
-
85
- hash = Hash.zip(headerA,dataA) # from Facets of Ruby library
86
-
87
- # make sure we delete any key/value pairs from the hash, which the user wanted to delete:
88
- # Note: Ruby < 1.9 doesn't allow empty symbol literals!
89
- hash.delete(nil); hash.delete('');
90
- if RUBY_VERSION.to_f > 1.8
91
- eval('hash.delete(:"")')
92
- end
93
-
94
- if options[:remove_empty_values] == true
95
- if has_rails
96
- hash.delete_if{|k,v| v.blank?}
97
- else
98
- hash.delete_if{|k,v| blank?(v)}
99
- end
100
- end
101
-
102
- hash.delete_if{|k,v| ! v.nil? && v =~ /^(\d+|\d+\.\d+)$/ && v.to_f == 0} if options[:remove_zero_values] # values are typically Strings!
103
- hash.delete_if{|k,v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
104
-
105
- if options[:convert_values_to_numeric]
106
- hash.each do |k,v|
107
- # deal with the :only / :except options to :convert_values_to_numeric
108
- next if SmarterCSV.only_or_except_limit_execution( options, :convert_values_to_numeric , k )
109
-
110
- # convert if it's a numeric value:
111
- case v
112
- when /^[+-]?\d+\.\d+$/
113
- hash[k] = v.to_f
114
- when /^[+-]?\d+$/
115
- hash[k] = v.to_i
116
- end
117
- end
118
- end
119
-
120
- if options[:value_converters]
121
- hash.each do |k,v|
122
- converter = options[:value_converters][k]
123
- next unless converter
124
- hash[k] = converter.convert(v)
125
- end
126
- end
127
-
128
- next if hash.empty? if options[:remove_empty_hashes]
129
-
130
- if use_chunks
131
- chunk << hash # append temp result to chunk
132
-
133
- if chunk.size >= chunk_size || fh.eof? # if chunk if full, or EOF reached
134
- # do something with the chunk
135
- if block_given?
136
- yield chunk # do something with the hashes in the chunk in the block
137
- else
138
- result << chunk # not sure yet, why anybody would want to do this without a block
139
- end
140
- chunk_count += 1
141
- chunk = [] # initialize for next chunk of data
142
- else
143
-
144
- # the last chunk may contain partial data, which also needs to be returned (BUG / ISSUE-18)
145
-
146
- end
147
-
148
- # while a chunk is being filled up we don't need to do anything else here
149
-
150
- else # no chunk handling
151
- if block_given?
152
- yield [hash] # do something with the hash in the block (better to use chunking here)
153
- else
154
- result << hash
155
- end
156
- end
157
- end
158
-
159
- # print new line to retain last processing line message
160
- print "\n" if options[:verbose]
161
-
162
- # last chunk:
163
- if ! chunk.nil? && chunk.size > 0
164
- # do something with the chunk
165
- if block_given?
166
- yield chunk # do something with the hashes in the chunk in the block
167
- else
168
- result << chunk # not sure yet, why anybody would want to do this without a block
169
- end
170
- chunk_count += 1
171
- chunk = [] # initialize for next chunk of data
172
- end
173
- ensure
174
- fh.close if fh.respond_to?(:close)
175
- end
176
- if block_given?
177
- return chunk_count # when we do processing through a block we only care how many chunks we processed
178
- else
179
- return result # returns either an Array of Hashes, or an Array of Arrays of Hashes (if in chunked mode)
180
- end
181
- end
182
-
183
- private
184
-
185
- def self.default_options
186
- {
187
- auto_row_sep_chars: 500,
188
- chunk_size: nil ,
189
- col_sep: ',',
190
- comment_regexp: nil, # was: /\A#/,
191
- convert_values_to_numeric: true,
192
- downcase_header: true,
193
- duplicate_header_suffix: nil,
194
- file_encoding: 'utf-8',
195
- force_simple_split: false ,
196
- force_utf8: false,
197
- headers_in_file: true,
198
- invalid_byte_sequence: '',
199
- keep_original_headers: false,
200
- key_mapping_hash: nil ,
201
- quote_char: '"',
202
- remove_empty_hashes: true ,
203
- remove_empty_values: true,
204
- remove_unmapped_keys: false,
205
- remove_values_matching: nil,
206
- remove_zero_values: false,
207
- required_headers: nil,
208
- row_sep: $INPUT_RECORD_SEPARATOR,
209
- skip_lines: nil,
210
- strings_as_keys: false,
211
- strip_chars_from_headers: nil,
212
- strip_whitespace: true,
213
- user_provided_headers: nil,
214
- value_converters: nil,
215
- verbose: false,
216
- }
217
- end
218
-
219
- def self.readline_with_counts(filehandle, options)
220
- line = filehandle.readline(options[:row_sep])
221
- @file_line_count += 1
222
- @csv_line_count += 1
223
- line
224
- end
225
-
226
- # parses a single line: either a CSV header and body line
227
- # - quoting rules compared to RFC-4180 are somewhat relaxed
228
- # - we are not assuming that quotes inside a fields need to be doubled
229
- # - we are not assuming that all fields need to be quoted (0 is even)
230
- # - works with multi-char col_sep
231
- # - if header_size is given, only up to header_size fields are parsed
232
- #
233
- # We use header_size for parsing the body lines to make sure we always match the number of headers
234
- # in case there are trailing col_sep characters in line
235
- #
236
- # Our convention is that empty fields are returned as empty strings, not as nil.
237
- #
238
- def self.parse(line, options, header_size = nil)
239
- return [] if line.nil?
240
-
241
- col_sep = options[:col_sep]
242
- quote = options[:quote_char]
243
- quote_count = 0
244
- elements = []
245
- start = 0
246
- i = 0
247
-
248
- while i < line.size do
249
- if line[i...i+col_sep.size] == col_sep && quote_count.even?
250
- break if !header_size.nil? && elements.size >= header_size
251
-
252
- elements << cleanup_quotes(line[start...i], quote)
253
- i += col_sep.size
254
- start = i
255
- else
256
- quote_count += 1 if line[i] == quote
257
- i += 1
258
- end
259
- end
260
- elements << cleanup_quotes(line[start..-1], quote) if header_size.nil? || elements.size < header_size
261
- [elements, elements.size]
262
- end
263
-
264
- def self.cleanup_quotes(field, quote)
265
- return field if field.nil? || field !~ /#{quote}/
266
-
267
- if field.start_with?(quote) && field.end_with?(quote)
268
- field.delete_prefix!(quote)
269
- field.delete_suffix!(quote)
270
- end
271
- field.gsub!("#{quote}#{quote}", quote)
272
- field
273
- end
274
-
275
- def self.blank?(value)
276
- case value
277
- when Array
278
- value.inject(true){|result, x| result &&= elem_blank?(x)}
279
- when Hash
280
- value.inject(true){|result, x| result &&= elem_blank?(x.last)}
281
- else
282
- elem_blank?(value)
283
- end
284
- end
285
-
286
- def self.elem_blank?(value)
287
- case value
288
- when NilClass
289
- true
290
- when String
291
- value !~ /\S/
292
- else
293
- false
294
- end
295
- end
296
-
297
- # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
298
- def self.only_or_except_limit_execution( options, option_name, key )
299
- if options[option_name].is_a?(Hash)
300
- if options[option_name].has_key?( :except )
301
- return true if Array( options[ option_name ][:except] ).include?(key)
302
- elsif options[ option_name ].has_key?(:only)
303
- return true unless Array( options[ option_name ][:only] ).include?(key)
304
- end
305
- end
306
- return false
307
- end
308
-
309
- # raise exception if none is found
310
- def self.guess_column_separator(filehandle, options)
311
- del = [',', "\t", ';', ':', '|']
312
- n = Hash.new(0)
313
- 5.times do
314
- line = filehandle.readline(options[:row_sep])
315
- del.each do |d|
316
- n[d] += line.scan(d).count
317
- end
318
- rescue EOFError # short files
319
- break
320
- end
321
- filehandle.rewind
322
- raise SmarterCSV::NoColSepDetected if n.values.max == 0
323
-
324
- col_sep = n.key(n.values.max)
325
- end
326
-
327
- # limitation: this currently reads the whole file in before making a decision
328
- def self.guess_line_ending( filehandle, options )
329
- counts = {"\n" => 0 , "\r" => 0, "\r\n" => 0}
330
- quoted_char = false
331
-
332
- # count how many of the pre-defined line-endings we find
333
- # ignoring those contained within quote characters
334
- last_char = nil
335
- lines = 0
336
- filehandle.each_char do |c|
337
- quoted_char = !quoted_char if c == options[:quote_char]
338
- next if quoted_char
339
-
340
- if last_char == "\r"
341
- if c == "\n"
342
- counts["\r\n"] += 1
343
- else
344
- counts["\r"] += 1 # \r are counted after they appeared, we might
345
- end
346
- elsif c == "\n"
347
- counts["\n"] += 1
348
- end
349
- last_char = c
350
- lines += 1
351
- break if options[:auto_row_sep_chars] && options[:auto_row_sep_chars] > 0 && lines >= options[:auto_row_sep_chars]
352
- end
353
- filehandle.rewind
354
-
355
- counts["\r"] += 1 if last_char == "\r"
356
- # find the key/value pair with the largest counter:
357
- k,_ = counts.max_by{|_,v| v}
358
- return k # the most frequent one is it
359
- end
360
-
361
- def self.raw_hearder
362
- @raw_header
363
- end
364
-
365
- def self.headers
366
- @headers
367
- end
368
-
369
- def self.process_headers(filehandle, options)
370
- @raw_header = nil
371
- @headers = nil
372
- if options[:headers_in_file] # extract the header line
373
- # process the header line in the CSV file..
374
- # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
375
- header = readline_with_counts(filehandle, options)
376
- @raw_header = header
377
-
378
- header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
379
- header = header.sub(options[:comment_regexp],'') if options[:comment_regexp]
380
- header = header.chomp(options[:row_sep])
381
-
382
- header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
383
-
384
- file_headerA, file_header_size = parse(header, options)
385
-
386
- file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
387
- file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
388
- unless options[:keep_original_headers]
389
- file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
390
- file_headerA.map!{|x| x.downcase } if options[:downcase_header]
391
- end
392
- else
393
- raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" unless options[:user_provided_headers]
394
- end
395
- if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
396
- # use user-provided headers
397
- headerA = options[:user_provided_headers]
398
- if defined?(file_header_size) && ! file_header_size.nil?
399
- if headerA.size != file_header_size
400
- raise SmarterCSV::HeaderSizeMismatch , "ERROR: :user_provided_headers defines #{headerA.size} headers != CSV-file #{input} has #{file_header_size} headers"
401
- else
402
- # we could print out the mapping of file_headerA to headerA here
403
- end
404
- end
405
- else
406
- headerA = file_headerA
407
- end
408
-
409
- # detect duplicate headers and disambiguate
410
- headerA = process_duplicate_headers(headerA, options) if options[:duplicate_header_suffix]
411
- header_size = headerA.size # used for splitting lines
412
-
413
- headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
414
-
415
- unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
416
- key_mappingH = options[:key_mapping]
417
-
418
- # do some key mapping on the keys in the file header
419
- # if you want to completely delete a key, then map it to nil or to ''
420
- if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
421
- # we can't map keys that are not there
422
- missing_keys = key_mappingH.keys - headerA
423
- puts "WARNING: missing header(s): #{missing_keys.join(",")}" unless missing_keys.empty?
424
-
425
- headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
426
- end
427
- end
428
-
429
- # header_validations
430
- duplicate_headers = []
431
- headerA.compact.each do |k|
432
- duplicate_headers << k if headerA.select{|x| x == k}.size > 1
433
- end
434
- raise SmarterCSV::DuplicateHeaders , "ERROR: duplicate headers: #{duplicate_headers.join(',')}" unless duplicate_headers.empty?
435
-
436
- if options[:required_headers] && options[:required_headers].is_a?(Array)
437
- missing_headers = []
438
- options[:required_headers].each do |k|
439
- missing_headers << k unless headerA.include?(k)
440
- end
441
- raise SmarterCSV::MissingHeaders , "ERROR: missing headers: #{missing_headers.join(',')}" unless missing_headers.empty?
442
- end
443
-
444
- @headers = headerA
445
- [headerA, header_size]
446
- end
447
-
448
- def self.process_duplicate_headers(headers, options)
449
- counts = Hash.new(0)
450
- result = []
451
- headers.each do |key|
452
- counts[key] += 1
453
- if counts[key] == 1
454
- result << key
455
- else
456
- result << [key, options[:duplicate_header_suffix], counts[key]].join
457
- end
458
- end
459
- result
460
- end
461
- end
@@ -1,6 +0,0 @@
1
- col1,col2
2
- eins,zwei
3
- uno,dos,
4
- one,two ,,,
5
- ichi, ,,,,,
6
- un
@@ -1,8 +0,0 @@
1
- First Name,Last Name,Dogs,Cats,Birds,Fish
2
- Dan,McAllister,2,0,,
3
- Lucy,Laweless,,5,0,
4
- ,,,,,
5
- Miles,O'Brian,0,0,0,21
6
- Nancy,Homes,2,0,1,
7
- Hernán,Curaçon,3,0,0,
8
- ,,,,,
@@ -1 +0,0 @@
1
- #timestampitem_idparent_idname#some comments here#these can be multiple comments in the header section#even more comments here1381388409101Thing 11381388409111Thing 21381388409121Thing 313813884091Parent 113813884092Parent 21381388409202Thing 41381388409212Thing 51381388409222Thing 6
@@ -1,18 +0,0 @@
1
- Name,Street,City
2
- Anfield,Anfield Road,Liverpool
3
- "Highbury
4
- Highbury House",75 Drayton Park,London
5
- Old Trafford,"Sir Matt
6
- Busby Way",Manchester
7
- St. James' Park,,"Newcastle-upon-tyne
8
- Tyne and Wear"
9
- "White Hart Lane
10
- (The Lane)","Bill Nicholson Way
11
- 748 High Rd","Tottenham
12
- London"
13
- Stamford Bridge,"Fulham Road
14
- London",
15
- "Etihad Stadium
16
- Rowsley St
17
- Manchester",,
18
- Goodison,Goodison Road,Liverpool
@@ -1,3 +0,0 @@
1
- Band,Members,Albums
2
- New Order,"Bernard Sumner
3
- Led Zeppelin,"Jimmy Page
@@ -1 +0,0 @@
1
- Name,Street,City
@@ -1,18 +0,0 @@
1
- Name,Street,City
2
- Anfield,Anfield Road,Liverpool
3
- "Highbury
4
- Highbury House",75 Drayton Park,London
5
- Old Trafford,"Sir Matt
6
- Busby Way",Manchester
7
- St. James' Park,,"Newcastle-upon-tyne
8
- Tyne and Wear"
9
- "White Hart Lane
10
- (The Lane)","Bill Nicholson Way
11
- 748 High Rd","Tottenham
12
- London"
13
- Stamford Bridge,"Fulham Road
14
- London",
15
- "Etihad Stadium
16
- Rowsley St
17
- Manchester",,
18
- Goodison,Goodison Road,Liverpool
@@ -1,10 +0,0 @@
1
- a,b,c
2
- 1,2,3
3
- ,,
4
- 4,5,6
5
- 7,8,9
6
- ,,
7
- 10,11,12
8
- ,,
9
- 13,14,15
10
- ,,
@@ -1,3 +0,0 @@
1
- email,firstname,lastname,email,age
2
- tom@bla.com,Tom,Sawyer,mike@bla.com,34
3
- eri@bla.com,Eri,Chan,tom@bla.com,21
@@ -1,5 +0,0 @@
1
- not empty 1,not empty 2,not empty 3,empty 1,empty 2
2
- 56
3
-
4
- 666
5
- ",?,"
@@ -1,2 +0,0 @@
1
- id,col1,col2,col3
2
- 123,,,
@@ -1,2 +0,0 @@
1
- id,col1,col2,col3
2
- 123,,,1
@@ -1,2 +0,0 @@
1
- Name,Email,Financial Status,Paid at,Fulfillment Status,Fulfilled at,Accepts Marketing,Currency,Subtotal,Shipping,Taxes,Total,Discount Code,Discount Amount,Shipping Method,Created at,Lineitem quantity,Lineitem name,Lineitem price,Lineitem compare at price,Lineitem sku,Lineitem requires shipping,Lineitem taxable,Lineitem fulfillment status,Billing Name,Billing Street,Billing Address1,Billing Address2,Billing Company,Billing City,Billing Zip,Billing Province,Billing Country,Billing Phone,Shipping Name,Shipping Street,Shipping Address1,Shipping Address2,Shipping Company,Shipping City,Shipping Zip,Shipping Province,Shipping Country,Shipping Phone,Notes,Note Attributes,Cancelled at,Payment Method,Payment Reference,Refunded Amount,Vendor, rece,Tags,Risk Level,Source,Lineitem discount,Tax 1 Name,Tax 1 Value,Tax 2 Name,Tax 2 Value,Tax 3 Name,Tax 3 Value,Tax 4 Name,Tax 4 Value,Tax 5 Name,Tax 5 Value,Phone,Receipt Number,Duties,Billing Province Name,Shipping Province Name,Payment ID,Payment Terms Name,Next Payment Due At
2
- #MR1220817,foo@bar.com,paid,2022-02-08 22:31:28 +0100,unfulfilled,,yes,EUR,144,0,24,144,VIP,119.6,"Livraison Standard GRATUITE, 2-5 jours avec suivi",2022-02-08 22:31:26 +0100,2,Cire Épilation Nacrée,37,,WAX-200-NAC,true,true,pending,French Fry,64 Boulevard Budgié,64 Boulevard Budgié,,,dootdoot’,'49100,,FR,06 12 34 56 78,French Fry,64 Boulevard Budgi,64 Boulevard Budgié,,,dootdoot,'49100,,FR,06 12 34 56 78,,,,Stripe,c23800013619353.2,0,Goober Rég,4331065802905,902,Low,web,0,FR TVA 20%,24,,,,,,,,,3366012111111,,,,,,,
@@ -1,11 +0,0 @@
1
- not a comment#First Name,Last Name,Dogs,Cats,Birds,Fish
2
- # comment two
3
- Dan,McAllister,2,0,,
4
- Lucy#L,Laweless,,5,0,
5
- # anothter comment
6
- ,,,,,
7
- Miles,O'Brian,0,0,0,21
8
- Nancy,Homes,2,0,1,
9
- Hernán,Curaçon,3,0,0,
10
- #comment,comment,1,2,3,4
11
- ,,,,,
@@ -1,3 +0,0 @@
1
- h1,h2
2
- a,"b
3
- #c"
@@ -1,2 +0,0 @@
1
- THIS,THAT,other
2
- this,that,other
@@ -1,4 +0,0 @@
1
- name,count,price
2
- hammer,4,12.50
3
- axe,2,7.30
4
- crowbar,3,17.50
@@ -1 +0,0 @@
1
- name,count,price
@@ -1,4 +0,0 @@
1
- name,count,price
2
- hammer,4,12.50
3
- axe,2,7.30
4
- crowbar,3,17.50
@@ -1,2 +0,0 @@
1
- Column 0,Column 1,Column 2,Column 3,Column 4,Column 5,Column 6,Column 7,Column 8,Column 9,Column 10,Column 11,Column 12,Column 13,Column 14,Column 15,Column 16,Column 17,Column 18,Column 19,Column 20,Column 21,Column 22,Column 23,Column 24,Column 25,Column 26,Column 27,Column 28,Column 29,Column 30,Column 31,Column 32,Column 33,Column 34,Column 35,Column 36,Column 37,Column 38,Column 39,Column 40,Column 41,Column 42,Column 43,Column 44,Column 45,Column 46,Column 47,Column 48,Column 49,Column 50,Column 51,Column 52,Column 53,Column 54,Column 55,Column 56,Column 57,Column 58,Column 59,Column 60,Column 61,Column 62,Column 63,Column 64,Column 65,Column 66,Column 67,Column 68,Column 69,Column 70,Column 71,Column 72,Column 73,Column 74,Column 75,Column 76,Column 77,Column 78,Column 79,Column 80,Column 81,Column 82,Column 83,Column 84,Column 85,Column 86,Column 87,Column 88,Column 89,Column 90,Column 91,Column 92,Column 93,Column 94,Column 95,Column 96,Column 97,Column 98,Column 99,Column 100,Column 101,Column 102,Column 103,Column 104,Column 105,Column 106,Column 107,Column 108,Column 109,Column 110,Column 111,Column 112,Column 113,Column 114,Column 115,Column 116,Column 117,Column 118,Column 119,Column 120,Column 121,Column 122,Column 123,Column 124,Column 125,Column 126,Column 127,Column 128,Column 129,Column 130,Column 131,Column 132,Column 133,Column 134,Column 135,Column 136,Column 137,Column 138,Column 139,Column 140,Column 141,Column 142,Column 143,Column 144,Column 145,Column 146,Column 147,Column 148,Column 149,Column 150,Column 151,Column 152,Column 153,Column 154,Column 155,Column 156,Column 157,Column 158,Column 159,Column 160,Column 161,Column 162,Column 163,Column 164,Column 165,Column 166,Column 167,Column 168,Column 169,Column 170,Column 171,Column 172,Column 173,Column 174,Column 175,Column 176,Column 177,Column 178,Column 179,Column 180,Column 181,Column 182,Column 183,Column 184,Column 185,Column 186,Column 187,Column 188,Column 189,Column 190,Column 191,Column 192,Column 193,Column 194,Column 195,Column 196,Column 197,Column 198,Column 199,Column 200,Column 201,Column 202,Column 203,Column 204,Column 205,Column 206,Column 207,Column 208,Column 209,Column 210,Column 211,Column 212,Column 213,Column 214,Column 215,Column 216,Column 217,Column 218,Column 219,Column 220,Column 221,Column 222,Column 223,Column 224,Column 225,Column 226,Column 227,Column 228,Column 229,Column 230,Column 231,Column 232,Column 233,Column 234,Column 235,Column 236,Column 237,Column 238,Column 239,Column 240,Column 241,Column 242,Column 243,Column 244,Column 245,Column 246,Column 247,Column 248,Column 249,Column 250,Column 251,Column 252,Column 253,Column 254,Column 255,Column 256,Column 257,Column 258,Column 259,Column 260,Column 261,Column 262,Column 263,Column 264,Column 265,Column 266,Column 267,Column 268,Column 269,Column 270,Column 271,Column 272,Column 273,Column 274,Column 275,Column 276,Column 277,Column 278,Column 279,Column 280,Column 281,Column 282,Column 283,Column 284,Column 285,Column 286,Column 287,Column 288,Column 289,Column 290,Column 291,Column 292,Column 293,Column 294,Column 295,Column 296,Column 297,Column 298,Column 299,Column 300,Column 301,Column 302,Column 303,Column 304,Column 305,Column 306,Column 307,Column 308,Column 309,Column 310,Column 311,Column 312,Column 313,Column 314,Column 315,Column 316,Column 317,Column 318,Column 319,Column 320,Column 321,Column 322,Column 323,Column 324,Column 325,Column 326,Column 327,Column 328,Column 329,Column 330,Column 331,Column 332,Column 333,Column 334,Column 335,Column 336,Column 337,Column 338,Column 339,Column 340,Column 341,Column 342,Column 343,Column 344,Column 345,Column 346,Column 347,Column 348,Column 349,Column 350,Column 351,Column 352,Column 353,Column 354,Column 355,Column 356,Column 357,Column 358,Column 359,Column 360,Column 361,Column 362,Column 363,Column 364,Column 365,Column 366,Column 367,Column 368,Column 369,Column 370,Column 371,Column 372,Column 373,Column 374,Column 375,Column 376,Column 377,Column 378,Column 379,Column 380,Column 381,Column 382,Column 383,Column 384,Column 385,Column 386,Column 387,Column 388,Column 389,Column 390,Column 391,Column 392,Column 393,Column 394,Column 395,Column 396,Column 397,Column 398,Column 399,Column 400,Column 401,Column 402,Column 403,Column 404,Column 405,Column 406,Column 407,Column 408,Column 409,Column 410,Column 411,Column 412,Column 413,Column 414,Column 415,Column 416,Column 417,Column 418,Column 419,Column 420,Column 421,Column 422,Column 423,Column 424,Column 425,Column 426,Column 427,Column 428,Column 429,Column 430,Column 431,Column 432,Column 433,Column 434,Column 435,Column 436,Column 437,Column 438,Column 439,Column 440,Column 441,Column 442,Column 443,Column 444,Column 445,Column 446,Column 447,Column 448,Column 449,Column 450,Column 451,Column 452,Column 453,Column 454,Column 455,Column 456,Column 457,Column 458,Column 459,Column 460,Column 461,Column 462,Column 463,Column 464,Column 465,Column 466,Column 467,Column 468,Column 469,Column 470,Column 471,Column 472,Column 473,Column 474,Column 475,Column 476,Column 477,Column 478,Column 479,Column 480,Column 481,Column 482,Column 483,Column 484,Column 485,Column 486,Column 487,Column 488,Column 489,Column 490,Column 491,Column 492,Column 493,Column 494,Column 495,Column 496,Column 497,Column 498,Column 499
2
- rFDAsZLKkINpb,ChLVlxWpwZfrOZg,uGJbxzrFncG,R,P,j,QoKRekBnucMA,N,KJFTwrG,Fs,pKazVnOVFnLvAcK,bo,EgJwvAwnGYPdvj,qVPGxupPKQI,FtNJBxxMIBxezfS,,qrcOm,Angkc,kwhJvozx,H,Yk,Bs,JZTVzstyk,sDiZuoBOVPA,hsgzXcqSnUaF,,AMQcprcuLABRvLF,NxwQvVApPdtTJSw,tKjftA,lOyqAtUuXQtK,ImgNiMBPRo,dyVW,dVbtwSuZuX,IQN,pUEEshP,QVKcImQ,HDoRZ,jrQpVdvtjGHv,YkP,dHyjESzVsgUdRh,p,qz,oCP,clMCAiuiw,JzxVq,zpn,bwfoYmrJkTc,t,HGYcQdMmNjb,CFFbJTeOozA,,MwroljZZwEoBOoc,,wkIRXssJZ,LSaIDarn,qky,BDMJVYE,thOpnBTTiGMfa,JVhoOwTdoukH,xIECafnWcE,,,kbxzc,viKcs,dwVMljnI,fYXJYw,PhohmrDa,alGbl,aeqwrXgjr,MJ,gapeNlzKYUP,tys,PJPfxFYYFPclGJV,HIUXf,AjmDljNZjYqB,HbxoDVzzkQ,aLQFk,PormYxT,rATMrIlCFrDwOzD,Krv,pMwcHztVfVkKo,srb,fdlXgWtPK,r,UZvVNkLYigVzZO,RPDHskEaMqq,dVkUXNKFRbwq,FGSqF,mqG,aZzufH,LHsN,NkKlcjoRFTbFH,ChEHvIQDYomC,L,URFkXirNgU,M,gAtCyH,FLkbLzwEBLgtlt,OadGQgGxGqGkT,tEQUWJDpb,p,fXIyPxIftbgHNPL,dM,mrz,cGXxRFLUG,MMvNyk,mKEcw,QMROkjgOWnBSZI,DQQ,QeKZt,Xaor,KUPKflOhGnrOMu,VpwHZUN,raDbObrwsRaa,,WqddHDTh,wj,XZtQIS,CGPUgYOQtiu,,zRkl,,ATlS,JheeokutCEtvAdV,hozEKLGRET,DsEiWDjQ,lTQjDhEah,WqREQoJLBSP,,FOaImBDZclTMM,vhEjpJddIVh,xC,ZCSpDtdJw,phRxoGO,VdhVyRebsNynrk,HfCkVLQnLnRLA,dwxQUCPZc,koFg,lumNQmIweYyTc,R,bDRcfIO,cxH,n,navCHjPdfj,puDQpjuIIMRDqK,EVkAd,ARVpVku,hUEqyejzKt,,Qtygnqk,HlZgSRnbmIHZP,QZpVTn,OQgoIZhxIsh,TYPUMV,ofjg,YvYMWpSOAA,PGtCCHInmFBqPp,va,viWLIIjBOEwQCP,E,yjKVemUmGC,FQhSaVdeAJjO,flcBAZzLOlrg,bShmRGHdOemzMZi,gjUzxDONxWymoK,ZS,VBFVHug,igVYBKUJIcGKbim,msrUQUHTNymL,,pVeEHwNN,BqnU,UkYsIhEnqayMIa,RQHqYCoEU,ujuLHy,DctKlTNlADp,MJmfaBxeNIONUTQ,BcPqmfBSWJEs,kH,bYQiLOweBh,ajEmwTPgdGtDI,aeMJiQthn,QfvLiAuJjYB,aNauvl,bcCqdQie,tCwTqF,tNVw,sbjLwbktZUM,Mkrlh,sQihM,clRevuPt,weuxLMDulNP,juaeGRXAz,SKenpQsEZqAA,UGdQdLHvOH,brBHqPjs,WLS,fZ,DBSBUMvgLYzbKH,eFks,zLt,hqnDnIsUsremb,dizzYLAKUGXsH,vDWNIbiepsMPPm,QHNpZgZ,vl,KpukrVVjLDll,rbOPLzI,woALqQUQnt,JiG,SGZGCjPyvceLmy,ofcCQgXjgVmRsxV,,EERhEDO,PxuJxrOka,ohMLIz,vpUnwsJejGkurJ,ISjplKfjrYLE,jZgXFIcRDQQkcI,XfKGtVsclDH,UtKeOSPhbf,,AtWJyGatftYCiA,pzEJCjFA,beuwjh,OrrcIHW,SjuBT,xVNUQEjxyB,CTDOUCAxhx,YpCApsrXvk,Gp,Us,ALBNnNFhdbMYjpl,msB,XniwYCr,aMImLpFlzbz,p,guQBgEARv,NRwcXFoVyjuRi,QfLRUsEQq,WrzdF,QVb,oDTGOMYEVu,iAfcPl,wLkI,VON,,P,TPwCynToFkJI,gZzcqx,QrZNqo,p,WSfbj,ldxi,taws,meZ,g,icjnchZcQQIqrnQ,RQRmriLCPE,Lxnxu,AoC,XNbQ,GhQqR,GSHHsytQGKNLJGU,PBi,IjIszY,WfXyRMqGBkI,jL,loeFjtFErHKhPv,YHAubVlaujA,hNAbVNu,sLhChzeLLvYXR,DnL,hamyOyGAouk,cNOZnSPNhoFjAFq,RXOFzh,JkgdgUCEI,ZidLIxtn,KaMuBFTdvSSN,Owj,dSHiNUxjaGQcieG,rVSK,DwxWsgHgbeiIVoF,I,NsOEAk,Hqdv,Xeedwold,aVFPYNWIehmpgZ,gH,yl,GDesAPfYPtrkDEx,YZbPYzQBUaseUBi,FYSJAlciOmnK,DjRdbezjrAdQcX,FUNKjgklfnksvn,NMCfDtqFvcI,T,rpkwYAmR,nTAJTyiVPbFuv,qKdFCrPCEPRE,xATVXeqsAKRcYV,tHzCGeqnplni,cXGiZMgXct,souXHmYybMmbyhg,i,oDhWt,L,kwesSP,jyksdMdyne,TCyYt,wX,JiSsPdDajKirPgL,cc,qnCQz,TS,luVvIgALvyB,,eGpRJq,RKWwtfwHo,ISEH,SiRldWBiF,jyFuxIz,cVojag,FGWd,HF,xI,xOwUyr,iDDEDH,vyDxjfVus,zLvpx,ssrFvXDVo,kXEzRcW,opsKOmCLbFwSM,xHfeHRWwalx,n,KOrSY,CAOTgyQJm,wCuuuzwffVEY,ldmjKUum,EDQzj,czqFC,GYAKCxOiu,nAM,uNQnVaBCj,TvoVlpJWYI,SFlUDJ,XTuQjsZtpUhgWId,D,xEiowDk,PaBftY,,PNdZMqagJawiLKy,pUcrcOEKSP,RlDCo,,kcK,RJugqQIBYfhC,W,ov,PzfdKIR,OBIa,JYXxqRucy,UaTDgAkJfw,,ydlnbzoww,fQdyIegvIc,DlT,LSqWLh,ihm,sR,MInOCbVexl,NuwJxdxvkTXa,nlJrPpKDIf,iWOwycGEID,PQdoTHebXj,DROZTCRnma,cFAMtx,BkYpK,JitcTdIPlTMEfl,wOSBOJEnIOw,CeEItVPQEeIfFgZ,OiCeEuVRKJ,hjFaJxsTYRCgFYz,giSKHpCNXNk,RaNmFDao,htRSneNtuDta,HdRaC,vkIkWSGebQbpaT,kzyJVn,gMMGxfRjFskmoo,xtfUXOdrlUs,CoSIbIPfVfTXko,zVOXIJ,UGQIlWuRzDJAPsn,,sWXjt,dNkgPEgim,WVmBPZSTa,KJnedQffhg,e,XhgwuoHiR,rt,FYNI,Rk,ImHT,QXR,Ta,wvU,qhsGIux,yyrLPMQmKVkCp,SBYM,a,aCxwZKlj,,lcNXLvIEkjQTO,XjpXtlM,PhdrxqxDVWO,pCIQtTuwu,WAbZHWAEtzLJOql,XDETKm,KuF,PozCBInp,d,tWtj,QKjx,eFevbQsOtGGxJM,FZIvXPnOKDMDgdB,khe,YdnPTMxczkm,L,SREiJirL,irZJxq,vl,SYsHw,vKSe,ypvRthKFyTRs,IuAdRz,lGotaaKUU,hxMTxzrUFT,dFCbGN,WDTco,xXhedTTXbbDllWX,M,MrhdNyXzFxP,LjX,qRqvrGrNzC,E,nQ,gOWQSgeCwVIq,ASGxcPYYmTAVm,gbnOqDOicyz,h,gviqOP,oV,QjDOngDXFeYp,ojPwZPSodxzxbu,jcWTp,ERHZmFboGYVCQxP,ox,PsFrmpSe,GbhohxlLVcU,rKMLpQNsvH,UzMJMaK,GilkAEVQWzUA,PGCrmxvKB,jOEDmSM,arTAqyNOUtsWj,,,sQXstIGqFfJg,FYOsFdUYr,DwYhqnBlsGtfez,FcKkwqG,sqZFwfyPByL,RkB,tYVdRtbHpdTRPBf,kObsAtqgghqe,,awzrSqHxz,,aXarXMPnQJgqJyM,ldb,NOdwJ,gUYvxxVPCcrmDOd,dcNR,,uglxXybrDyaHCU,JYRLQC,SoolzzZaZ,OZXZiMSimJrH,iZQiZl,bIDq,OSd,UopfHT,f,v,ynUYQwMmKrH,MtF,Cn,ad
@@ -1,3 +0,0 @@
1
- "name","dob"
2
- "Arnold Schwarzenegger","1947-07-30"
3
- "Jeff "the dude" Bridges","1949-12-04"
@@ -1,3 +0,0 @@
1
- "name","dob"dob""
2
- "Arnold Schwarzenegger","1947-07-30"
3
- "Jeff Bridges","1949-12-04"
@@ -1,3 +0,0 @@
1
- item,price
2
- Book,$9.99
3
- Mug,$14.99
@@ -1,7 +0,0 @@
1
- Dan,McAllister,2,0,,
2
- Lucy,Laweless,,5,0,
3
- ,,,,,
4
- Miles,O'Brian,0,0,0,21
5
- Nancy,Homes,2,0,1,
6
- Hernán,Curaçon,3,0,0,
7
- ,,,,,
@@ -1,5 +0,0 @@
1
- First Name,Last Name,Reference, Wealth
2
- Dan,McAllister,0123,3.5
3
- ,,,
4
- Miles,O'Brian,2345,3
5
- Nancy,Homes,2345,01
@@ -1,5 +0,0 @@
1
- first name,last name,dogs,cats,birds,fish
2
- Dan,McAllister,2,,,
3
- Lucy,Laweless,,5,,
4
- Miles,O'Brian,,,,21
5
- Nancy,Homes,2,,1,