smarter_csv 1.2.7 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,41 +4,36 @@ module SmarterCSV
4
4
  class IncorrectOption < SmarterCSVException; end
5
5
  class DuplicateHeaders < SmarterCSVException; end
6
6
  class MissingHeaders < SmarterCSVException; end
7
-
7
+ class NoColSepDetected < SmarterCSVException; end
8
8
 
9
9
  def SmarterCSV.process(input, options={}, &block) # first parameter: filename or input object with readline method
10
- default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"', :force_simple_split => false , :verbose => false ,
11
- :remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
12
- :convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
13
- :comment_regexp => /\A#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
14
- :remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => '',
15
- :auto_row_sep_chars => 500, :required_headers => nil
16
- }
17
10
  options = default_options.merge(options)
18
11
  options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
19
- csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
12
+
20
13
  headerA = []
21
14
  result = []
22
- old_row_sep = $/
15
+ old_row_sep = $INPUT_RECORD_SEPARATOR
23
16
  file_line_count = 0
24
17
  csv_line_count = 0
25
18
  has_rails = !! defined?(Rails)
26
19
  begin
27
20
  f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
28
21
 
22
+ # auto-detect the row separator
23
+ options[:row_sep] = SmarterCSV.guess_line_ending(f, options) if options[:row_sep].to_sym == :auto
24
+ $INPUT_RECORD_SEPARATOR = options[:row_sep]
25
+ # attempt to auto-detect column separator
26
+ options[:col_sep] = guess_column_separator(f) if options[:col_sep].to_sym == :auto
27
+ # preserve options, in case we need to call the CSV class
28
+ csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
29
+ csv_options.delete(:row_sep) if [nil, :auto].include?( options[:row_sep].to_sym )
30
+ csv_options.delete(:col_sep) if [nil, :auto].include?( options[:col_sep].to_sym )
31
+
29
32
  if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && ( f.respond_to?(:external_encoding) && f.external_encoding != Encoding.find('UTF-8') || f.respond_to?(:encoding) && f.encoding != Encoding.find('UTF-8') )
30
33
  puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
31
34
  end
32
35
 
33
- if options[:row_sep] == :auto
34
- options[:row_sep] = line_ending = SmarterCSV.guess_line_ending( f, options )
35
- f.rewind
36
- end
37
- $/ = options[:row_sep]
38
-
39
- if options[:skip_lines].to_i > 0
40
- options[:skip_lines].to_i.times{f.readline}
41
- end
36
+ options[:skip_lines].to_i.times{f.readline} if options[:skip_lines].to_i > 0
42
37
 
43
38
  if options[:headers_in_file] # extract the header line
44
39
  # process the header line in the CSV file..
@@ -53,21 +48,21 @@ module SmarterCSV
53
48
 
54
49
  if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
55
50
  file_headerA = begin
56
- CSV.parse( header, csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
51
+ CSV.parse( header, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
57
52
  rescue CSV::MalformedCSVError => e
58
53
  raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
59
54
  end
60
55
  else
61
56
  file_headerA = header.split(options[:col_sep])
62
57
  end
58
+ file_header_size = file_headerA.size # before mapping, which could delete keys
59
+
63
60
  file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
64
61
  file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
65
62
  unless options[:keep_original_headers]
66
63
  file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
67
64
  file_headerA.map!{|x| x.downcase } if options[:downcase_header]
68
65
  end
69
-
70
- file_header_size = file_headerA.size
71
66
  else
72
67
  raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" if options[:user_provided_headers].nil?
73
68
  end
@@ -84,6 +79,8 @@ module SmarterCSV
84
79
  else
85
80
  headerA = file_headerA
86
81
  end
82
+ header_size = headerA.size # used for splitting lines
83
+
87
84
  headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
88
85
 
89
86
  unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
@@ -92,7 +89,7 @@ module SmarterCSV
92
89
  # do some key mapping on the keys in the file header
93
90
  # if you want to completely delete a key, then map it to nil or to ''
94
91
  if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
95
- headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x].to_sym) : (options[:remove_unmapped_keys] ? nil : x)}
92
+ headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
96
93
  end
97
94
  end
98
95
 
@@ -123,7 +120,7 @@ module SmarterCSV
123
120
 
124
121
  # now on to processing all the rest of the lines in the CSV file:
125
122
  while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
126
- line = f.readline # read one line.. this uses the input_record_separator $/ which we set previously!
123
+ line = f.readline # read one line.. this uses the input_record_separator $INPUT_RECORD_SEPARATOR which we set previously!
127
124
 
128
125
  # replace invalid byte sequence in UTF-8 with question mark to avoid errors
129
126
  line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
@@ -136,8 +133,8 @@ module SmarterCSV
136
133
  # cater for the quoted csv data containing the row separator carriage return character
137
134
  # in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
138
135
  # by detecting the existence of an uneven number of quote characters
139
- multiline = line.count(options[:quote_char])%2 == 1
140
- while line.count(options[:quote_char])%2 == 1
136
+ multiline = line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
137
+ while line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
141
138
  next_line = f.readline
142
139
  next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
143
140
  line += next_line
@@ -145,20 +142,26 @@ module SmarterCSV
145
142
  end
146
143
  print "\nline contains uneven number of quote chars so including content through file line %d\n" % file_line_count if options[:verbose] && multiline
147
144
 
148
- line.chomp! # will use $/ which is set to options[:col_sep]
145
+ line.chomp! # will use $INPUT_RECORD_SEPARATOR which is set to options[:col_sep]
149
146
 
150
147
  if (line =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
151
148
  dataA = begin
152
- CSV.parse( line, csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
149
+ CSV.parse( line, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
153
150
  rescue CSV::MalformedCSVError => e
154
151
  raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
155
152
  end
156
153
  else
157
- dataA = line.split(options[:col_sep])
154
+ dataA = line.split(options[:col_sep], header_size)
158
155
  end
159
156
  #### dataA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') } # this is actually not a good idea as a default
160
157
  dataA.map!{|x| x.strip} if options[:strip_whitespace]
158
+
159
+ # if all values are blank, then ignore this line
160
+ # SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
161
+ next if options[:remove_empty_hashes] && blank?(dataA)
162
+
161
163
  hash = Hash.zip(headerA,dataA) # from Facets of Ruby library
164
+
162
165
  # make sure we delete any key/value pairs from the hash, which the user wanted to delete:
163
166
  # Note: Ruby < 1.9 doesn't allow empty symbol literals!
164
167
  hash.delete(nil); hash.delete('');
@@ -166,18 +169,17 @@ module SmarterCSV
166
169
  eval('hash.delete(:"")')
167
170
  end
168
171
 
169
- # remove empty values using the same regexp as used by the rails blank? method
170
- # which caters for double \n and \r\n characters such as "1\r\n\r\n2" whereas the original check (v =~ /^\s*$/) does not
171
- if options[:remove_empty_values]
172
+ if options[:remove_empty_values] == true
172
173
  if has_rails
173
174
  hash.delete_if{|k,v| v.blank?}
174
175
  else
175
- hash.delete_if{|k,v| v.nil? || v !~ /[^[:space:]]/}
176
+ hash.delete_if{|k,v| blank?(v)}
176
177
  end
177
178
  end
178
179
 
179
180
  hash.delete_if{|k,v| ! v.nil? && v =~ /^(\d+|\d+\.\d+)$/ && v.to_f == 0} if options[:remove_zero_values] # values are typically Strings!
180
181
  hash.delete_if{|k,v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
182
+
181
183
  if options[:convert_values_to_numeric]
182
184
  hash.each do |k,v|
183
185
  # deal with the :only / :except options to :convert_values_to_numeric
@@ -247,7 +249,7 @@ module SmarterCSV
247
249
  chunk = [] # initialize for next chunk of data
248
250
  end
249
251
  ensure
250
- $/ = old_row_sep # make sure this stupid global variable is always reset to it's previous value after we're done!
252
+ $INPUT_RECORD_SEPARATOR = old_row_sep # make sure this stupid global variable is always reset to it's previous value after we're done!
251
253
  f.close if f.respond_to?(:close)
252
254
  end
253
255
  if block_given?
@@ -258,8 +260,63 @@ module SmarterCSV
258
260
  end
259
261
 
260
262
  private
261
- # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
262
263
 
264
+ def self.default_options
265
+ {
266
+ auto_row_sep_chars: 500,
267
+ chunk_size: nil ,
268
+ col_sep: ',',
269
+ comment_regexp: /\A#/,
270
+ convert_values_to_numeric: true,
271
+ downcase_header: true,
272
+ file_encoding: 'utf-8',
273
+ force_simple_split: false ,
274
+ force_utf8: false,
275
+ headers_in_file: true,
276
+ invalid_byte_sequence: '',
277
+ keep_original_headers: false,
278
+ key_mapping_hash: nil ,
279
+ quote_char: '"',
280
+ remove_empty_hashes: true ,
281
+ remove_empty_values: true,
282
+ remove_unmapped_keys: false,
283
+ remove_values_matching: nil,
284
+ remove_zero_values: false,
285
+ required_headers: nil,
286
+ row_sep: $INPUT_RECORD_SEPARATOR,
287
+ skip_lines: nil,
288
+ strings_as_keys: false,
289
+ strip_chars_from_headers: nil,
290
+ strip_whitespace: true,
291
+ user_provided_headers: nil,
292
+ value_converters: nil,
293
+ verbose: false,
294
+ }
295
+ end
296
+
297
+ def self.blank?(value)
298
+ case value
299
+ when Array
300
+ value.inject(true){|result, x| result &&= elem_blank?(x)}
301
+ when Hash
302
+ value.inject(true){|result, x| result &&= elem_blank?(x.last)}
303
+ else
304
+ elem_blank?(value)
305
+ end
306
+ end
307
+
308
+ def self.elem_blank?(value)
309
+ case value
310
+ when NilClass
311
+ true
312
+ when String
313
+ value !~ /\S/
314
+ else
315
+ false
316
+ end
317
+ end
318
+
319
+ # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
263
320
  def self.only_or_except_limit_execution( options, option_name, key )
264
321
  if options[option_name].is_a?(Hash)
265
322
  if options[option_name].has_key?( :except )
@@ -271,6 +328,24 @@ module SmarterCSV
271
328
  return false
272
329
  end
273
330
 
331
+ # raise exception if none is found
332
+ def self.guess_column_separator(filehandle)
333
+ del = [',', "\t", ';', ':', '|']
334
+ n = Hash.new(0)
335
+ 5.times do
336
+ line = filehandle.readline
337
+ del.each do |d|
338
+ n[d] += line.scan(d).count
339
+ end
340
+ rescue EOFError # short files
341
+ break
342
+ end
343
+ filehandle.rewind
344
+ raise SmarterCSV::NoColSepDetected if n.values.max == 0
345
+
346
+ col_sep = n.key(n.values.max)
347
+ end
348
+
274
349
  # limitation: this currently reads the whole file in before making a decision
275
350
  def self.guess_line_ending( filehandle, options )
276
351
  counts = {"\n" => 0 , "\r" => 0, "\r\n" => 0}
@@ -297,6 +372,8 @@ module SmarterCSV
297
372
  lines += 1
298
373
  break if options[:auto_row_sep_chars] && options[:auto_row_sep_chars] > 0 && lines >= options[:auto_row_sep_chars]
299
374
  end
375
+ filehandle.rewind
376
+
300
377
  counts["\r"] += 1 if last_char == "\r"
301
378
  # find the key/value pair with the largest counter:
302
379
  k,_ = counts.max_by{|_,v| v}
@@ -1,3 +1,3 @@
1
1
  module SmarterCSV
2
- VERSION = "1.2.7"
2
+ VERSION = "1.4.2"
3
3
  end
data/lib/smarter_csv.rb CHANGED
@@ -1,3 +1,11 @@
1
+ if ENV['COVERAGE']
2
+ require 'simplecov'
3
+ SimpleCov.start do
4
+ add_filter "/spec/"
5
+ add_filter "/pkg/"
6
+ end
7
+ end
8
+
1
9
  require 'csv'
2
10
  require "smarter_csv/version"
3
11
  require "extensions/hash.rb"
data/smarter_csv.gemspec CHANGED
@@ -1,21 +1,25 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require File.expand_path('../lib/smarter_csv/version', __FILE__)
3
3
 
4
- Gem::Specification.new do |gem|
5
- gem.authors = ["Tilo Sloboda\n"]
6
- gem.email = ["tilo.sloboda@gmail.com\n"]
7
- gem.description = %q{Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys}
8
- gem.summary = %q{Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files}
9
- gem.homepage = "https://github.com/tilo/smarter_csv"
4
+ Gem::Specification.new do |spec|
5
+ spec.name = "smarter_csv"
6
+ spec.version = SmarterCSV::VERSION
7
+ spec.authors = ["Tilo Sloboda"]
8
+ spec.email = ["tilo.sloboda@gmail.com"]
10
9
 
11
- gem.files = `git ls-files`.split($\)
12
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
- gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
- gem.name = "smarter_csv"
15
- gem.require_paths = ["lib"]
16
- gem.requirements = ['csv'] # for CSV.parse() only needed in case we have quoted fields
17
- gem.version = SmarterCSV::VERSION
18
- gem.licenses = ['MIT','GPL-2']
19
- gem.add_development_dependency "rspec"
20
- # gem.add_development_dependency "guard-rspec"
10
+ spec.summary = %q{Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files}
11
+ spec.description = %q{Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys}
12
+ spec.homepage = "https://github.com/tilo/smarter_csv"
13
+ spec.license = 'MIT'
14
+
15
+ spec.files = `git ls-files`.split($\)
16
+ spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+ spec.requirements = ['csv'] # for CSV.parse() only needed in case we have quoted fields
20
+ spec.add_development_dependency "rspec"
21
+ spec.add_development_dependency "simplecov"
22
+ # spec.add_development_dependency "guard-rspec"
23
+
24
+ spec.metadata["homepage_uri"] = spec.homepage
21
25
  end
@@ -0,0 +1,2 @@
1
+ id,col1,col2,col3
2
+ 123,,,
@@ -0,0 +1,2 @@
1
+ id,col1,col2,col3
2
+ 123,,,1
@@ -0,0 +1,2 @@
1
+ THIS,THAT,other
2
+ this,that,other
@@ -1,5 +1,5 @@
1
1
  First Name,Last Name,Reference, Wealth
2
2
  Dan,McAllister,0123,3.5
3
- ,,,,,
3
+ ,,,
4
4
  Miles,O'Brian,2345,3
5
5
  Nancy,Homes,2345,01
@@ -0,0 +1,4 @@
1
+ Year:Make:Model:Length
2
+ 1997:Ford:E350:2,34
3
+ 2000:Mercury:Cougar:2,38
4
+ 2013:Tesla:Model S:4,97
@@ -0,0 +1,4 @@
1
+ Year,Make,Model,Length
2
+ 1997,Ford,E350,2.34
3
+ 2000,Mercury,Cougar,2.38
4
+ 2013,Tesla,Model S,4.97
@@ -0,0 +1,4 @@
1
+ Year|Make|Model|Length
2
+ 1997|Ford|E350|2,34
3
+ 2000|Mercury|Cougar|2,38
4
+ 2013|Tesla|Model S|4,97
File without changes
@@ -0,0 +1,4 @@
1
+ Year Make Model Length
2
+ 1997 Ford E350 2,34
3
+ 2000 Mercury Cougar 2,38
4
+ 2013 Tesla Model S 4,97
@@ -0,0 +1,55 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'blank?' do
4
+ it 'is true for nil' do
5
+ SmarterCSV.send(:blank?, nil).should eq true
6
+ end
7
+
8
+ it 'is true for empty string' do
9
+ SmarterCSV.send(:blank?, '').should eq true
10
+ end
11
+
12
+ it 'is true for blank string' do
13
+ SmarterCSV.send(:blank?, ' ').should eq true
14
+ end
15
+
16
+ it 'is true for tab string' do
17
+ SmarterCSV.send(:blank?, " \t ").should eq true
18
+ end
19
+
20
+ it 'is false for string with content' do
21
+ SmarterCSV.send(:blank?, " 1 ").should eq false
22
+ end
23
+
24
+ it 'is false for numeic values' do
25
+ SmarterCSV.send(:blank?, 1).should eq false
26
+ end
27
+
28
+ describe 'arrays' do
29
+ it 'is true for empty arrays' do
30
+ SmarterCSV.send(:blank?, []).should eq true
31
+ end
32
+
33
+ it 'is true for blank arrays' do
34
+ SmarterCSV.send(:blank?, [nil, '', ' ', " \t "]).should eq true
35
+ end
36
+
37
+ it 'is false for non-blank arrays' do
38
+ SmarterCSV.send(:blank?, [nil, '', ' ', " 1 "]).should eq false
39
+ end
40
+ end
41
+
42
+ describe 'hashes' do
43
+ it 'is true for empty arrays' do
44
+ SmarterCSV.send(:blank?, {}).should eq true
45
+ end
46
+
47
+ it 'is true for blank arrays' do
48
+ SmarterCSV.send(:blank?, {a: nil, b: '', c: ' ', d: " \t "}).should eq true
49
+ end
50
+
51
+ it 'is false for non-blank arrays' do
52
+ SmarterCSV.send(:blank?, {a: nil, b: '', c: ' ', d: " 1 "}).should eq false
53
+ end
54
+ end
55
+ end
@@ -3,7 +3,6 @@ require 'spec_helper'
3
3
  fixture_path = 'spec/fixtures'
4
4
 
5
5
  describe 'process files with line endings explicitly pre-specified' do
6
-
7
6
  it 'should process a file with \n for line endings and within data fields' do
8
7
  sep = "\n"
9
8
  options = {:row_sep => sep}
@@ -83,14 +82,14 @@ describe 'process files with line endings explicitly pre-specified' do
83
82
  data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
84
83
  data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
85
84
  end
86
-
87
85
  end
88
86
 
89
87
  describe 'process files with line endings in automatic mode' do
88
+ let(:options) { { row_sep: :auto } }
90
89
 
91
90
  it 'should process a file with \n for line endings and within data fields' do
92
91
  sep = "\n"
93
- data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv", {:row_sep => :auto})
92
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv", options)
94
93
  data.flatten.size.should == 8
95
94
  data[0][:name].should == "Anfield"
96
95
  data[0][:street].should == "Anfield Road"
@@ -112,7 +111,29 @@ describe 'process files with line endings in automatic mode' do
112
111
 
113
112
  it 'should process a file with \r for line endings and within data fields' do
114
113
  sep = "\r"
115
- data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", {:row_sep => :auto})
114
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", options)
115
+ data.flatten.size.should == 8
116
+ data[0][:name].should == "Anfield"
117
+ data[0][:street].should == "Anfield Road"
118
+ data[0][:city].should == "Liverpool"
119
+ data[1][:name].should == ["Highbury", "Highbury House"].join(sep)
120
+ data[2][:street].should == ["Sir Matt ", "Busby Way"].join(sep)
121
+ data[3][:city].should == ["Newcastle-upon-tyne ", "Tyne and Wear"].join(sep)
122
+ data[4][:name].should == ["White Hart Lane", "(The Lane)"].join(sep)
123
+ data[4][:street].should == ["Bill Nicholson Way ", "748 High Rd"].join(sep)
124
+ data[4][:city].should == ["Tottenham", "London"].join(sep)
125
+ data[5][:name].should == "Stamford Bridge"
126
+ data[5][:street].should == ["Fulham Road", "London"].join(sep)
127
+ data[5][:city].should be_nil
128
+ data[6][:name].should == ["Etihad Stadium", "Rowsley St", "Manchester"].join(sep)
129
+ data[7][:name].should == "Goodison"
130
+ data[7][:street].should == "Goodison Road"
131
+ data[7][:city].should == "Liverpool"
132
+ end
133
+
134
+ it 'also works when auto is given a string' do
135
+ sep = "\r"
136
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", {row_sep: 'auto'})
116
137
  data.flatten.size.should == 8
117
138
  data[0][:name].should == "Anfield"
118
139
  data[0][:street].should == "Anfield Road"
@@ -134,7 +155,7 @@ describe 'process files with line endings in automatic mode' do
134
155
 
135
156
  it 'should process a file with \r\n for line endings and within data fields' do
136
157
  sep = "\r\n"
137
- data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv", {:row_sep => :auto})
158
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv", options)
138
159
  data.flatten.size.should == 8
139
160
  data[0][:name].should == "Anfield"
140
161
  data[0][:street].should == "Anfield Road"
@@ -157,7 +178,7 @@ describe 'process files with line endings in automatic mode' do
157
178
  it 'should process a file with more quoted text carriage return characters (\r) than line ending characters (\n)' do
158
179
  row_sep = "\n"
159
180
  text_sep = "\r"
160
- data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", {:row_sep => :auto})
181
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", options)
161
182
  data.flatten.size.should == 2
162
183
  data[0][:band].should == "New Order"
163
184
  data[0][:members].should == ["Bernard Sumner", "Peter Hook", "Stephen Morris", "Gillian Gilbert"].join(text_sep)
@@ -166,5 +187,4 @@ describe 'process files with line endings in automatic mode' do
166
187
  data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
167
188
  data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
168
189
  end
169
-
170
190
  end
@@ -2,10 +2,94 @@ require 'spec_helper'
2
2
 
3
3
  fixture_path = 'spec/fixtures'
4
4
 
5
- describe 'be_able_to' do
6
- it 'loads_file_with_different_column_separator' do
7
- options = {:col_sep => ';'}
8
- data = SmarterCSV.process("#{fixture_path}/separator.csv", options)
9
- data.flatten.size.should == 3
5
+ describe 'can handle col_sep' do
6
+
7
+ it 'has default of comma as col_sep' do
8
+ data = SmarterCSV.process("#{fixture_path}/separator_comma.csv") # no options
9
+ data.first.keys.size.should == 4
10
+ data.size.should eq 3
11
+ end
12
+
13
+ describe 'with explicitly given col_sep' do
14
+ it 'loads file with comma separator' do
15
+ options = {:col_sep => ','}
16
+ data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
17
+ data.first.keys.size.should == 4
18
+ data.size.should eq 3
19
+ end
20
+
21
+ it 'loads file with tab separator' do
22
+ options = {:col_sep => "\t"}
23
+ data = SmarterCSV.process("#{fixture_path}/separator_tab.csv", options)
24
+ data.first.keys.size.should == 4
25
+ data.size.should eq 3
26
+ end
27
+
28
+ it 'loads file with semi-colon separator' do
29
+ options = {:col_sep => ';'}
30
+ data = SmarterCSV.process("#{fixture_path}/separator_semi.csv", options)
31
+ data.first.keys.size.should == 4
32
+ data.size.should eq 3
33
+ end
34
+
35
+ it 'loads file with colon separator' do
36
+ options = {:col_sep => ':'}
37
+ data = SmarterCSV.process("#{fixture_path}/separator_colon.csv", options)
38
+ data.first.keys.size.should == 4
39
+ data.size.should eq 3
40
+ end
41
+
42
+ it 'loads file with pipe separator' do
43
+ options = {:col_sep => '|'}
44
+ data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", options)
45
+ data.first.keys.size.should == 4
46
+ data.size.should eq 3
47
+ end
48
+ end
49
+
50
+ describe 'auto-detection of separator' do
51
+ options = {col_sep: :auto}
52
+
53
+ it 'auto-detects comma separator and loads data' do
54
+ data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
55
+ data.first.keys.size.should == 4
56
+ data.size.should eq 3
57
+ end
58
+
59
+ it 'auto-detects tab separator and loads data' do
60
+ data = SmarterCSV.process("#{fixture_path}/separator_tab.csv", options)
61
+ data.first.keys.size.should == 4
62
+ data.size.should eq 3
63
+ end
64
+
65
+ it 'auto-detects semi-colon separator and loads data' do
66
+ data = SmarterCSV.process("#{fixture_path}/separator_semi.csv", options)
67
+ data.first.keys.size.should == 4
68
+ data.size.should eq 3
69
+ end
70
+
71
+ it 'auto-detects colon separator and loads data' do
72
+ data = SmarterCSV.process("#{fixture_path}/separator_colon.csv", options)
73
+ data.first.keys.size.should == 4
74
+ data.size.should eq 3
75
+ end
76
+
77
+ it 'auto-detects pipe separator and loads data' do
78
+ data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", options)
79
+ data.first.keys.size.should == 4
80
+ data.size.should eq 3
81
+ end
82
+
83
+ it 'does not auto-detect other separators' do
84
+ expect {
85
+ SmarterCSV.process("#{fixture_path}/binary.csv", options)
86
+ }.to raise_exception SmarterCSV::NoColSepDetected
87
+ end
88
+
89
+ it 'also works when auto is given a string' do
90
+ data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", col_sep: 'auto')
91
+ data.first.keys.size.should == 4
92
+ data.size.should eq 3
93
+ end
10
94
  end
11
95
  end