RubyGems - smarter_csv - Versions diffs - 1.2.7 → 1.4.2 - Mend

smarter_csv 1.2.7 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/.gitignore +2 -0
data/.travis.yml +8 -4
data/CHANGELOG.md +169 -0
data/CONTRIBUTORS.md +45 -0
data/LICENSE.txt +21 -0
data/README.md +47 -219
data/Rakefile +8 -15
data/lib/smarter_csv/smarter_csv.rb +112 -35
data/lib/smarter_csv/version.rb +1 -1
data/lib/smarter_csv.rb +8 -0
data/smarter_csv.gemspec +20 -16
data/spec/fixtures/empty_columns_1.csv +2 -0
data/spec/fixtures/empty_columns_2.csv +2 -0
data/spec/fixtures/key_mapping.csv +2 -0
data/spec/fixtures/numeric.csv +1 -1
data/spec/fixtures/separator_colon.csv +4 -0
data/spec/fixtures/separator_comma.csv +4 -0
data/spec/fixtures/separator_pipe.csv +4 -0
data/spec/fixtures/{separator.csv → separator_semi.csv} +0 -0
data/spec/fixtures/separator_tab.csv +4 -0
data/spec/smarter_csv/blank_spec.rb +55 -0
data/spec/smarter_csv/carriage_return_spec.rb +27 -7
data/spec/smarter_csv/column_separator_spec.rb +89 -5
data/spec/smarter_csv/empty_columns_spec.rb +74 -0
data/spec/smarter_csv/key_mapping_spec.rb +31 -0
data/spec/smarter_csv/malformed_spec.rb +0 -4
metadata +44 -13

data/lib/smarter_csv/smarter_csv.rb CHANGED Viewed

@@ -4,41 +4,36 @@ module SmarterCSV
   class IncorrectOption < SmarterCSVException; end
   class DuplicateHeaders < SmarterCSVException; end
   class MissingHeaders < SmarterCSVException; end
+  class NoColSepDetected < SmarterCSVException; end
   def SmarterCSV.process(input, options={}, &block)   # first parameter: filename or input object with readline method
-    default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"', :force_simple_split => false , :verbose => false ,
-      :remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
-      :convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
-      :comment_regexp => /\A#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
-      :remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => '',
-      :auto_row_sep_chars => 500, :required_headers => nil
-    }
     options = default_options.merge(options)
     options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
-    csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
     headerA = []
     result = []
-    old_row_sep = $/
+    old_row_sep = $INPUT_RECORD_SEPARATOR
     file_line_count = 0
     csv_line_count = 0
     has_rails = !! defined?(Rails)
     begin
       f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
+      # auto-detect the row separator
+      options[:row_sep] = SmarterCSV.guess_line_ending(f, options) if options[:row_sep].to_sym == :auto
+      $INPUT_RECORD_SEPARATOR = options[:row_sep]
+      # attempt to auto-detect column separator
+      options[:col_sep] = guess_column_separator(f) if options[:col_sep].to_sym == :auto
+      # preserve options, in case we need to call the CSV class
+      csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
+      csv_options.delete(:row_sep) if [nil, :auto].include?( options[:row_sep].to_sym )
+      csv_options.delete(:col_sep) if [nil, :auto].include?( options[:col_sep].to_sym )
       if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && ( f.respond_to?(:external_encoding) && f.external_encoding != Encoding.find('UTF-8') || f.respond_to?(:encoding) && f.encoding != Encoding.find('UTF-8') )
         puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
       end
-      if options[:row_sep] == :auto
-        options[:row_sep] = line_ending = SmarterCSV.guess_line_ending( f, options )
-        f.rewind
-      end
-      $/ = options[:row_sep]
-      if options[:skip_lines].to_i > 0
-        options[:skip_lines].to_i.times{f.readline}
-      end
+      options[:skip_lines].to_i.times{f.readline} if options[:skip_lines].to_i > 0
       if options[:headers_in_file]        # extract the header line
         # process the header line in the CSV file..
@@ -53,21 +48,21 @@ module SmarterCSV
         if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
           file_headerA = begin
-            CSV.parse( header, csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
+            CSV.parse( header, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
           rescue CSV::MalformedCSVError => e
             raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
           end
         else
           file_headerA =  header.split(options[:col_sep])
         end
+        file_header_size = file_headerA.size # before mapping, which could delete keys
         file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
         file_headerA.map!{|x| x.strip}  if options[:strip_whitespace]
         unless options[:keep_original_headers]
           file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
           file_headerA.map!{|x| x.downcase }   if options[:downcase_header]
         end
-        file_header_size = file_headerA.size
       else
         raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" if options[:user_provided_headers].nil?
       end
@@ -84,6 +79,8 @@ module SmarterCSV
       else
         headerA = file_headerA
       end
+      header_size = headerA.size # used for splitting lines
       headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
       unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
@@ -92,7 +89,7 @@ module SmarterCSV
         # do some key mapping on the keys in the file header
         #   if you want to completely delete a key, then map it to nil or to ''
         if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
-          headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x].to_sym) : (options[:remove_unmapped_keys] ? nil : x)}
+          headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
         end
       end
@@ -123,7 +120,7 @@ module SmarterCSV
       # now on to processing all the rest of the lines in the CSV file:
       while ! f.eof?    # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
-        line = f.readline  # read one line.. this uses the input_record_separator $/ which we set previously!
+        line = f.readline  # read one line.. this uses the input_record_separator $INPUT_RECORD_SEPARATOR which we set previously!
         # replace invalid byte sequence in UTF-8 with question mark to avoid errors
         line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
@@ -136,8 +133,8 @@ module SmarterCSV
         # cater for the quoted csv data containing the row separator carriage return character
         # in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
         # by detecting the existence of an uneven number of quote characters
-        multiline = line.count(options[:quote_char])%2 == 1
-        while line.count(options[:quote_char])%2 == 1
+        multiline = line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
+        while line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
           next_line = f.readline
           next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
           line += next_line
@@ -145,20 +142,26 @@ module SmarterCSV
         end
         print "\nline contains uneven number of quote chars so including content through file line %d\n" % file_line_count if options[:verbose] && multiline
-        line.chomp!    # will use $/ which is set to options[:col_sep]
+        line.chomp!    # will use $INPUT_RECORD_SEPARATOR which is set to options[:col_sep]
         if (line =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
           dataA = begin
-            CSV.parse( line, csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
+            CSV.parse( line, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
           rescue CSV::MalformedCSVError => e
             raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
           end
         else
-          dataA =  line.split(options[:col_sep])
+          dataA =  line.split(options[:col_sep], header_size)
         end
 ####     dataA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }  # this is actually not a good idea as a default
         dataA.map!{|x| x.strip}  if options[:strip_whitespace]
+        # if all values are blank, then ignore this line
+        # SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
+        next if options[:remove_empty_hashes] && blank?(dataA)
         hash = Hash.zip(headerA,dataA)  # from Facets of Ruby library
         # make sure we delete any key/value pairs from the hash, which the user wanted to delete:
         # Note: Ruby < 1.9 doesn't allow empty symbol literals!
         hash.delete(nil); hash.delete('');
@@ -166,18 +169,17 @@ module SmarterCSV
           eval('hash.delete(:"")')
         end
-        # remove empty values using the same regexp as used by the rails blank? method
-        # which caters for double \n and \r\n characters such as "1\r\n\r\n2" whereas the original check (v =~ /^\s*$/) does not
-        if options[:remove_empty_values]
+        if options[:remove_empty_values] == true
           if has_rails
             hash.delete_if{|k,v| v.blank?}
           else
-            hash.delete_if{|k,v| v.nil? || v !~ /[^[:space:]]/}
+            hash.delete_if{|k,v| blank?(v)}
           end
         end
         hash.delete_if{|k,v| ! v.nil? && v =~ /^(\d+|\d+\.\d+)$/ && v.to_f == 0} if options[:remove_zero_values]   # values are typically Strings!
         hash.delete_if{|k,v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
         if options[:convert_values_to_numeric]
           hash.each do |k,v|
             # deal with the :only / :except options to :convert_values_to_numeric
@@ -247,7 +249,7 @@ module SmarterCSV
         chunk = []  # initialize for next chunk of data
       end
     ensure
-      $/ = old_row_sep   # make sure this stupid global variable is always reset to it's previous value after we're done!
+      $INPUT_RECORD_SEPARATOR = old_row_sep   # make sure this stupid global variable is always reset to it's previous value after we're done!
       f.close if f.respond_to?(:close)
     end
     if block_given?
@@ -258,8 +260,63 @@ module SmarterCSV
   end
   private
-  # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
+  def self.default_options
+    {
+      auto_row_sep_chars: 500,
+      chunk_size: nil ,
+      col_sep: ',',
+      comment_regexp: /\A#/,
+      convert_values_to_numeric: true,
+      downcase_header: true,
+      file_encoding: 'utf-8',
+      force_simple_split: false ,
+      force_utf8: false,
+      headers_in_file: true,
+      invalid_byte_sequence: '',
+      keep_original_headers: false,
+      key_mapping_hash: nil ,
+      quote_char: '"',
+      remove_empty_hashes: true ,
+      remove_empty_values: true,
+      remove_unmapped_keys: false,
+      remove_values_matching: nil,
+      remove_zero_values: false,
+      required_headers: nil,
+      row_sep: $INPUT_RECORD_SEPARATOR,
+      skip_lines: nil,
+      strings_as_keys: false,
+      strip_chars_from_headers: nil,
+      strip_whitespace: true,
+      user_provided_headers: nil,
+      value_converters: nil,
+      verbose: false,
+    }
+  end
+  def self.blank?(value)
+    case value
+    when Array
+      value.inject(true){|result, x| result &&= elem_blank?(x)}
+    when Hash
+      value.inject(true){|result, x| result &&= elem_blank?(x.last)}
+    else
+      elem_blank?(value)
+    end
+  end
+  def self.elem_blank?(value)
+    case value
+    when NilClass
+      true
+    when String
+      value !~ /\S/
+    else
+      false
+    end
+  end
+  # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
   def self.only_or_except_limit_execution( options, option_name, key )
     if options[option_name].is_a?(Hash)
       if options[option_name].has_key?( :except )
@@ -271,6 +328,24 @@ module SmarterCSV
     return false
   end
+  # raise exception if none is found
+  def self.guess_column_separator(filehandle)
+    del = [',', "\t", ';', ':', '|']
+    n = Hash.new(0)
+    5.times do
+      line = filehandle.readline
+      del.each do |d|
+        n[d] += line.scan(d).count
+      end
+    rescue EOFError # short files
+      break
+    end
+    filehandle.rewind
+    raise SmarterCSV::NoColSepDetected if n.values.max == 0
+    col_sep = n.key(n.values.max)
+  end
   # limitation: this currently reads the whole file in before making a decision
   def self.guess_line_ending( filehandle, options )
     counts = {"\n" => 0 , "\r" => 0, "\r\n" => 0}
@@ -297,6 +372,8 @@ module SmarterCSV
       lines += 1
       break if options[:auto_row_sep_chars] && options[:auto_row_sep_chars] > 0 && lines >= options[:auto_row_sep_chars]
     end
+    filehandle.rewind
     counts["\r"] += 1 if last_char == "\r"
     # find the key/value pair with the largest counter:
     k,_ = counts.max_by{|_,v| v}

data/lib/smarter_csv/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module SmarterCSV
-  VERSION = "1.2.7"
+  VERSION = "1.4.2"
 end

data/lib/smarter_csv.rb CHANGED Viewed

@@ -1,3 +1,11 @@
+if ENV['COVERAGE']
+  require 'simplecov'
+  SimpleCov.start do
+    add_filter "/spec/"
+    add_filter "/pkg/"
+  end
+end
 require 'csv'
 require "smarter_csv/version"
 require "extensions/hash.rb"

data/smarter_csv.gemspec CHANGED Viewed

@@ -1,21 +1,25 @@
 # -*- encoding: utf-8 -*-
 require File.expand_path('../lib/smarter_csv/version', __FILE__)
-Gem::Specification.new do |gem|
-  gem.authors       = ["Tilo Sloboda\n"]
-  gem.email         = ["tilo.sloboda@gmail.com\n"]
-  gem.description   = %q{Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys}
-  gem.summary       = %q{Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files}
-  gem.homepage      = "https://github.com/tilo/smarter_csv"
+Gem::Specification.new do |spec|
+  spec.name          = "smarter_csv"
+  spec.version       = SmarterCSV::VERSION
+  spec.authors       = ["Tilo Sloboda"]
+  spec.email         = ["tilo.sloboda@gmail.com"]
-  gem.files         = `git ls-files`.split($\)
-  gem.executables   = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
-  gem.test_files    = gem.files.grep(%r{^(test|spec|features)/})
-  gem.name          = "smarter_csv"
-  gem.require_paths = ["lib"]
-  gem.requirements  = ['csv'] # for CSV.parse() only needed in case we have quoted fields
-  gem.version       = SmarterCSV::VERSION
-  gem.licenses      = ['MIT','GPL-2']
-  gem.add_development_dependency "rspec"
-#  gem.add_development_dependency "guard-rspec"
+  spec.summary       = %q{Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files}
+  spec.description   = %q{Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys}
+  spec.homepage      = "https://github.com/tilo/smarter_csv"
+  spec.license       = 'MIT'
+  spec.files         = `git ls-files`.split($\)
+  spec.executables   = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ["lib"]
+  spec.requirements  = ['csv'] # for CSV.parse() only needed in case we have quoted fields
+  spec.add_development_dependency "rspec"
+  spec.add_development_dependency "simplecov"
+  #  spec.add_development_dependency "guard-rspec"
+  spec.metadata["homepage_uri"] = spec.homepage
 end

data/spec/fixtures/empty_columns_1.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id,col1,col2,col3
2	+ 123,,,

data/spec/fixtures/empty_columns_2.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id,col1,col2,col3
2	+ 123,,,1

data/spec/fixtures/key_mapping.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ THIS,THAT,other
2	+ this,that,other

data/spec/fixtures/numeric.csv CHANGED Viewed

@@ -1,5 +1,5 @@
 First Name,Last Name,Reference, Wealth
 Dan,McAllister,0123,3.5
-,,,,,
+,,,
 Miles,O'Brian,2345,3
 Nancy,Homes,2345,01

data/spec/fixtures/separator_colon.csv ADDED Viewed

@@ -0,0 +1,4 @@
+Year:Make:Model:Length
+1997:Ford:E350:2,34
+2000:Mercury:Cougar:2,38
+2013:Tesla:Model S:4,97

data/spec/fixtures/separator_comma.csv ADDED Viewed

@@ -0,0 +1,4 @@
+Year,Make,Model,Length
+1997,Ford,E350,2.34
+2000,Mercury,Cougar,2.38
+2013,Tesla,Model S,4.97

data/spec/fixtures/separator_pipe.csv ADDED Viewed

@@ -0,0 +1,4 @@
+Year|Make|Model|Length
+1997|Ford|E350|2,34
+2000|Mercury|Cougar|2,38
+2013|Tesla|Model S|4,97

data/spec/fixtures/{separator.csv → separator_semi.csv} RENAMED Viewed

File without changes

data/spec/fixtures/separator_tab.csv ADDED Viewed

@@ -0,0 +1,4 @@
+Year	Make	Model	Length
+1997	Ford	E350	2,34
+2000	Mercury	Cougar	2,38
+2013	Tesla	Model S	4,97

data/spec/smarter_csv/blank_spec.rb ADDED Viewed

@@ -0,0 +1,55 @@
+require 'spec_helper'
+describe 'blank?' do
+  it 'is true for nil' do
+    SmarterCSV.send(:blank?, nil).should eq true
+  end
+  it 'is true for empty string' do
+    SmarterCSV.send(:blank?, '').should eq true
+  end
+  it 'is true for blank string' do
+    SmarterCSV.send(:blank?, '   ').should eq true
+  end
+  it 'is true for tab string' do
+    SmarterCSV.send(:blank?, " \t ").should eq true
+  end
+  it 'is false for string with content' do
+    SmarterCSV.send(:blank?, " 1 ").should eq false
+  end
+  it 'is false for numeic values' do
+    SmarterCSV.send(:blank?, 1).should eq false
+  end
+  describe 'arrays' do
+    it 'is true for empty arrays' do
+      SmarterCSV.send(:blank?, []).should eq true
+    end
+    it 'is true for blank arrays' do
+      SmarterCSV.send(:blank?, [nil, '', '  ', " \t "]).should eq true
+    end
+    it 'is false for non-blank arrays' do
+      SmarterCSV.send(:blank?, [nil, '', '  ', " 1 "]).should eq false
+    end
+  end
+  describe 'hashes' do
+    it 'is true for empty arrays' do
+      SmarterCSV.send(:blank?, {}).should eq true
+    end
+    it 'is true for blank arrays' do
+      SmarterCSV.send(:blank?, {a: nil, b: '', c: '  ', d: " \t "}).should eq true
+    end
+    it 'is false for non-blank arrays' do
+      SmarterCSV.send(:blank?, {a: nil, b: '', c: '  ', d: " 1 "}).should eq false
+    end
+  end
+end

data/spec/smarter_csv/carriage_return_spec.rb CHANGED Viewed

@@ -3,7 +3,6 @@ require 'spec_helper'
 fixture_path = 'spec/fixtures'
 describe 'process files with line endings explicitly pre-specified' do
   it 'should process a file with \n for line endings and within data fields' do
     sep = "\n"
     options = {:row_sep => sep}
@@ -83,14 +82,14 @@ describe 'process files with line endings explicitly pre-specified' do
     data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
     data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
   end
 end
 describe 'process files with line endings in automatic mode' do
+  let(:options) { { row_sep: :auto } }
   it 'should process a file with \n for line endings and within data fields' do
     sep = "\n"
-    data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv", {:row_sep => :auto})
+    data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv", options)
     data.flatten.size.should == 8
     data[0][:name].should == "Anfield"
     data[0][:street].should == "Anfield Road"
@@ -112,7 +111,29 @@ describe 'process files with line endings in automatic mode' do
   it 'should process a file with \r for line endings and within data fields' do
     sep = "\r"
-    data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", {:row_sep => :auto})
+    data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", options)
+    data.flatten.size.should == 8
+    data[0][:name].should == "Anfield"
+    data[0][:street].should == "Anfield Road"
+    data[0][:city].should == "Liverpool"
+    data[1][:name].should == ["Highbury", "Highbury House"].join(sep)
+    data[2][:street].should == ["Sir Matt ", "Busby Way"].join(sep)
+    data[3][:city].should == ["Newcastle-upon-tyne ", "Tyne and Wear"].join(sep)
+    data[4][:name].should == ["White Hart Lane", "(The Lane)"].join(sep)
+    data[4][:street].should == ["Bill Nicholson Way ", "748 High Rd"].join(sep)
+    data[4][:city].should == ["Tottenham", "London"].join(sep)
+    data[5][:name].should == "Stamford Bridge"
+    data[5][:street].should == ["Fulham Road", "London"].join(sep)
+    data[5][:city].should be_nil
+    data[6][:name].should == ["Etihad Stadium", "Rowsley St", "Manchester"].join(sep)
+    data[7][:name].should == "Goodison"
+    data[7][:street].should == "Goodison Road"
+    data[7][:city].should == "Liverpool"
+  end
+  it 'also works when auto is given a string' do
+    sep = "\r"
+    data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", {row_sep: 'auto'})
     data.flatten.size.should == 8
     data[0][:name].should == "Anfield"
     data[0][:street].should == "Anfield Road"
@@ -134,7 +155,7 @@ describe 'process files with line endings in automatic mode' do
   it 'should process a file with \r\n for line endings and within data fields' do
     sep = "\r\n"
-    data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv", {:row_sep => :auto})
+    data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv", options)
     data.flatten.size.should == 8
     data[0][:name].should == "Anfield"
     data[0][:street].should == "Anfield Road"
@@ -157,7 +178,7 @@ describe 'process files with line endings in automatic mode' do
   it 'should process a file with more quoted text carriage return characters (\r) than line ending characters (\n)' do
     row_sep = "\n"
     text_sep = "\r"
-    data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", {:row_sep => :auto})
+    data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", options)
     data.flatten.size.should == 2
     data[0][:band].should == "New Order"
     data[0][:members].should == ["Bernard Sumner", "Peter Hook", "Stephen Morris", "Gillian Gilbert"].join(text_sep)
@@ -166,5 +187,4 @@ describe 'process files with line endings in automatic mode' do
     data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
     data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
   end
 end

data/spec/smarter_csv/column_separator_spec.rb CHANGED Viewed

@@ -2,10 +2,94 @@ require 'spec_helper'
 fixture_path = 'spec/fixtures'
-describe 'be_able_to' do
-  it 'loads_file_with_different_column_separator' do
-    options = {:col_sep => ';'}
-    data = SmarterCSV.process("#{fixture_path}/separator.csv", options)
-    data.flatten.size.should == 3
+describe 'can handle col_sep' do
+  it 'has default of comma as col_sep' do
+    data = SmarterCSV.process("#{fixture_path}/separator_comma.csv") # no options
+    data.first.keys.size.should == 4
+    data.size.should eq 3
+  end
+  describe 'with explicitly given col_sep' do
+    it 'loads file with comma separator' do
+      options = {:col_sep => ','}
+      data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
+      data.first.keys.size.should == 4
+      data.size.should eq 3
+    end
+    it 'loads file with tab separator' do
+      options = {:col_sep => "\t"}
+      data = SmarterCSV.process("#{fixture_path}/separator_tab.csv", options)
+      data.first.keys.size.should == 4
+      data.size.should eq 3
+    end
+    it 'loads file with semi-colon separator' do
+      options = {:col_sep => ';'}
+      data = SmarterCSV.process("#{fixture_path}/separator_semi.csv", options)
+      data.first.keys.size.should == 4
+      data.size.should eq 3
+    end
+    it 'loads file with colon separator' do
+      options = {:col_sep => ':'}
+      data = SmarterCSV.process("#{fixture_path}/separator_colon.csv", options)
+      data.first.keys.size.should == 4
+      data.size.should eq 3
+    end
+    it 'loads file with pipe separator' do
+      options = {:col_sep => '|'}
+      data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", options)
+      data.first.keys.size.should == 4
+      data.size.should eq 3
+    end
+  end
+  describe 'auto-detection of separator' do
+    options = {col_sep: :auto}
+    it 'auto-detects comma separator and loads data' do
+      data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
+      data.first.keys.size.should == 4
+      data.size.should eq 3
+    end
+    it 'auto-detects tab separator and loads data' do
+      data = SmarterCSV.process("#{fixture_path}/separator_tab.csv", options)
+      data.first.keys.size.should == 4
+      data.size.should eq 3
+    end
+    it 'auto-detects semi-colon separator and loads data' do
+      data = SmarterCSV.process("#{fixture_path}/separator_semi.csv", options)
+      data.first.keys.size.should == 4
+      data.size.should eq 3
+    end
+    it 'auto-detects colon separator and loads data' do
+      data = SmarterCSV.process("#{fixture_path}/separator_colon.csv", options)
+      data.first.keys.size.should == 4
+      data.size.should eq 3
+    end
+    it 'auto-detects pipe separator and loads data' do
+      data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", options)
+      data.first.keys.size.should == 4
+      data.size.should eq 3
+    end
+    it 'does not auto-detect other separators' do
+      expect {
+        SmarterCSV.process("#{fixture_path}/binary.csv", options)
+      }.to raise_exception SmarterCSV::NoColSepDetected
+    end
+    it 'also works when auto is given a string' do
+      data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", col_sep: 'auto')
+      data.first.keys.size.should == 4
+      data.size.should eq 3
+    end
   end
 end