smarter_csv 1.2.7 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,41 +4,36 @@ module SmarterCSV
4
4
  class IncorrectOption < SmarterCSVException; end
5
5
  class DuplicateHeaders < SmarterCSVException; end
6
6
  class MissingHeaders < SmarterCSVException; end
7
-
7
+ class NoColSepDetected < SmarterCSVException; end
8
8
 
9
9
  def SmarterCSV.process(input, options={}, &block) # first parameter: filename or input object with readline method
10
- default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"', :force_simple_split => false , :verbose => false ,
11
- :remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
12
- :convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
13
- :comment_regexp => /\A#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
14
- :remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => '',
15
- :auto_row_sep_chars => 500, :required_headers => nil
16
- }
17
10
  options = default_options.merge(options)
18
11
  options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
19
- csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
12
+
20
13
  headerA = []
21
14
  result = []
22
- old_row_sep = $/
15
+ old_row_sep = $INPUT_RECORD_SEPARATOR
23
16
  file_line_count = 0
24
17
  csv_line_count = 0
25
18
  has_rails = !! defined?(Rails)
26
19
  begin
27
20
  f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
28
21
 
22
+ # auto-detect the row separator
23
+ options[:row_sep] = SmarterCSV.guess_line_ending(f, options) if options[:row_sep].to_sym == :auto
24
+ $INPUT_RECORD_SEPARATOR = options[:row_sep]
25
+ # attempt to auto-detect column separator
26
+ options[:col_sep] = guess_column_separator(f) if options[:col_sep].to_sym == :auto
27
+ # preserve options, in case we need to call the CSV class
28
+ csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
29
+ csv_options.delete(:row_sep) if [nil, :auto].include?( options[:row_sep].to_sym )
30
+ csv_options.delete(:col_sep) if [nil, :auto].include?( options[:col_sep].to_sym )
31
+
29
32
  if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && ( f.respond_to?(:external_encoding) && f.external_encoding != Encoding.find('UTF-8') || f.respond_to?(:encoding) && f.encoding != Encoding.find('UTF-8') )
30
33
  puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
31
34
  end
32
35
 
33
- if options[:row_sep] == :auto
34
- options[:row_sep] = line_ending = SmarterCSV.guess_line_ending( f, options )
35
- f.rewind
36
- end
37
- $/ = options[:row_sep]
38
-
39
- if options[:skip_lines].to_i > 0
40
- options[:skip_lines].to_i.times{f.readline}
41
- end
36
+ options[:skip_lines].to_i.times{f.readline} if options[:skip_lines].to_i > 0
42
37
 
43
38
  if options[:headers_in_file] # extract the header line
44
39
  # process the header line in the CSV file..
@@ -53,21 +48,21 @@ module SmarterCSV
53
48
 
54
49
  if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
55
50
  file_headerA = begin
56
- CSV.parse( header, csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
51
+ CSV.parse( header, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
57
52
  rescue CSV::MalformedCSVError => e
58
53
  raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
59
54
  end
60
55
  else
61
56
  file_headerA = header.split(options[:col_sep])
62
57
  end
58
+ file_header_size = file_headerA.size # before mapping, which could delete keys
59
+
63
60
  file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
64
61
  file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
65
62
  unless options[:keep_original_headers]
66
63
  file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
67
64
  file_headerA.map!{|x| x.downcase } if options[:downcase_header]
68
65
  end
69
-
70
- file_header_size = file_headerA.size
71
66
  else
72
67
  raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" if options[:user_provided_headers].nil?
73
68
  end
@@ -84,6 +79,8 @@ module SmarterCSV
84
79
  else
85
80
  headerA = file_headerA
86
81
  end
82
+ header_size = headerA.size # used for splitting lines
83
+
87
84
  headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
88
85
 
89
86
  unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
@@ -92,7 +89,7 @@ module SmarterCSV
92
89
  # do some key mapping on the keys in the file header
93
90
  # if you want to completely delete a key, then map it to nil or to ''
94
91
  if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
95
- headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x].to_sym) : (options[:remove_unmapped_keys] ? nil : x)}
92
+ headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
96
93
  end
97
94
  end
98
95
 
@@ -123,7 +120,7 @@ module SmarterCSV
123
120
 
124
121
  # now on to processing all the rest of the lines in the CSV file:
125
122
  while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
126
- line = f.readline # read one line.. this uses the input_record_separator $/ which we set previously!
123
+ line = f.readline # read one line.. this uses the input_record_separator $INPUT_RECORD_SEPARATOR which we set previously!
127
124
 
128
125
  # replace invalid byte sequence in UTF-8 with question mark to avoid errors
129
126
  line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
@@ -136,8 +133,8 @@ module SmarterCSV
136
133
  # cater for the quoted csv data containing the row separator carriage return character
137
134
  # in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
138
135
  # by detecting the existence of an uneven number of quote characters
139
- multiline = line.count(options[:quote_char])%2 == 1
140
- while line.count(options[:quote_char])%2 == 1
136
+ multiline = line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
137
+ while line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
141
138
  next_line = f.readline
142
139
  next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
143
140
  line += next_line
@@ -145,20 +142,26 @@ module SmarterCSV
145
142
  end
146
143
  print "\nline contains uneven number of quote chars so including content through file line %d\n" % file_line_count if options[:verbose] && multiline
147
144
 
148
- line.chomp! # will use $/ which is set to options[:col_sep]
145
+ line.chomp! # will use $INPUT_RECORD_SEPARATOR which is set to options[:col_sep]
149
146
 
150
147
  if (line =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
151
148
  dataA = begin
152
- CSV.parse( line, csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
149
+ CSV.parse( line, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
153
150
  rescue CSV::MalformedCSVError => e
154
151
  raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
155
152
  end
156
153
  else
157
- dataA = line.split(options[:col_sep])
154
+ dataA = line.split(options[:col_sep], header_size)
158
155
  end
159
156
  #### dataA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') } # this is actually not a good idea as a default
160
157
  dataA.map!{|x| x.strip} if options[:strip_whitespace]
158
+
159
+ # if all values are blank, then ignore this line
160
+ # SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
161
+ next if options[:remove_empty_hashes] && blank?(dataA)
162
+
161
163
  hash = Hash.zip(headerA,dataA) # from Facets of Ruby library
164
+
162
165
  # make sure we delete any key/value pairs from the hash, which the user wanted to delete:
163
166
  # Note: Ruby < 1.9 doesn't allow empty symbol literals!
164
167
  hash.delete(nil); hash.delete('');
@@ -166,18 +169,17 @@ module SmarterCSV
166
169
  eval('hash.delete(:"")')
167
170
  end
168
171
 
169
- # remove empty values using the same regexp as used by the rails blank? method
170
- # which caters for double \n and \r\n characters such as "1\r\n\r\n2" whereas the original check (v =~ /^\s*$/) does not
171
- if options[:remove_empty_values]
172
+ if options[:remove_empty_values] == true
172
173
  if has_rails
173
174
  hash.delete_if{|k,v| v.blank?}
174
175
  else
175
- hash.delete_if{|k,v| v.nil? || v !~ /[^[:space:]]/}
176
+ hash.delete_if{|k,v| blank?(v)}
176
177
  end
177
178
  end
178
179
 
179
180
  hash.delete_if{|k,v| ! v.nil? && v =~ /^(\d+|\d+\.\d+)$/ && v.to_f == 0} if options[:remove_zero_values] # values are typically Strings!
180
181
  hash.delete_if{|k,v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
182
+
181
183
  if options[:convert_values_to_numeric]
182
184
  hash.each do |k,v|
183
185
  # deal with the :only / :except options to :convert_values_to_numeric
@@ -247,7 +249,7 @@ module SmarterCSV
247
249
  chunk = [] # initialize for next chunk of data
248
250
  end
249
251
  ensure
250
- $/ = old_row_sep # make sure this stupid global variable is always reset to it's previous value after we're done!
252
+ $INPUT_RECORD_SEPARATOR = old_row_sep # make sure this stupid global variable is always reset to it's previous value after we're done!
251
253
  f.close if f.respond_to?(:close)
252
254
  end
253
255
  if block_given?
@@ -258,8 +260,63 @@ module SmarterCSV
258
260
  end
259
261
 
260
262
  private
261
- # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
262
263
 
264
+ def self.default_options
265
+ {
266
+ auto_row_sep_chars: 500,
267
+ chunk_size: nil ,
268
+ col_sep: ',',
269
+ comment_regexp: /\A#/,
270
+ convert_values_to_numeric: true,
271
+ downcase_header: true,
272
+ file_encoding: 'utf-8',
273
+ force_simple_split: false ,
274
+ force_utf8: false,
275
+ headers_in_file: true,
276
+ invalid_byte_sequence: '',
277
+ keep_original_headers: false,
278
+ key_mapping_hash: nil ,
279
+ quote_char: '"',
280
+ remove_empty_hashes: true ,
281
+ remove_empty_values: true,
282
+ remove_unmapped_keys: false,
283
+ remove_values_matching: nil,
284
+ remove_zero_values: false,
285
+ required_headers: nil,
286
+ row_sep: $INPUT_RECORD_SEPARATOR,
287
+ skip_lines: nil,
288
+ strings_as_keys: false,
289
+ strip_chars_from_headers: nil,
290
+ strip_whitespace: true,
291
+ user_provided_headers: nil,
292
+ value_converters: nil,
293
+ verbose: false,
294
+ }
295
+ end
296
+
297
+ def self.blank?(value)
298
+ case value
299
+ when Array
300
+ value.inject(true){|result, x| result &&= elem_blank?(x)}
301
+ when Hash
302
+ value.inject(true){|result, x| result &&= elem_blank?(x.last)}
303
+ else
304
+ elem_blank?(value)
305
+ end
306
+ end
307
+
308
+ def self.elem_blank?(value)
309
+ case value
310
+ when NilClass
311
+ true
312
+ when String
313
+ value !~ /\S/
314
+ else
315
+ false
316
+ end
317
+ end
318
+
319
+ # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
263
320
  def self.only_or_except_limit_execution( options, option_name, key )
264
321
  if options[option_name].is_a?(Hash)
265
322
  if options[option_name].has_key?( :except )
@@ -271,6 +328,24 @@ module SmarterCSV
271
328
  return false
272
329
  end
273
330
 
331
+ # raise exception if none is found
332
+ def self.guess_column_separator(filehandle)
333
+ del = [',', "\t", ';', ':', '|']
334
+ n = Hash.new(0)
335
+ 5.times do
336
+ line = filehandle.readline
337
+ del.each do |d|
338
+ n[d] += line.scan(d).count
339
+ end
340
+ rescue EOFError # short files
341
+ break
342
+ end
343
+ filehandle.rewind
344
+ raise SmarterCSV::NoColSepDetected if n.values.max == 0
345
+
346
+ col_sep = n.key(n.values.max)
347
+ end
348
+
274
349
  # limitation: this currently reads the whole file in before making a decision
275
350
  def self.guess_line_ending( filehandle, options )
276
351
  counts = {"\n" => 0 , "\r" => 0, "\r\n" => 0}
@@ -297,6 +372,8 @@ module SmarterCSV
297
372
  lines += 1
298
373
  break if options[:auto_row_sep_chars] && options[:auto_row_sep_chars] > 0 && lines >= options[:auto_row_sep_chars]
299
374
  end
375
+ filehandle.rewind
376
+
300
377
  counts["\r"] += 1 if last_char == "\r"
301
378
  # find the key/value pair with the largest counter:
302
379
  k,_ = counts.max_by{|_,v| v}
@@ -1,3 +1,3 @@
1
1
  module SmarterCSV
2
- VERSION = "1.2.7"
2
+ VERSION = "1.4.2"
3
3
  end
data/lib/smarter_csv.rb CHANGED
@@ -1,3 +1,11 @@
1
+ if ENV['COVERAGE']
2
+ require 'simplecov'
3
+ SimpleCov.start do
4
+ add_filter "/spec/"
5
+ add_filter "/pkg/"
6
+ end
7
+ end
8
+
1
9
  require 'csv'
2
10
  require "smarter_csv/version"
3
11
  require "extensions/hash.rb"
data/smarter_csv.gemspec CHANGED
@@ -1,21 +1,25 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require File.expand_path('../lib/smarter_csv/version', __FILE__)
3
3
 
4
- Gem::Specification.new do |gem|
5
- gem.authors = ["Tilo Sloboda\n"]
6
- gem.email = ["tilo.sloboda@gmail.com\n"]
7
- gem.description = %q{Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys}
8
- gem.summary = %q{Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files}
9
- gem.homepage = "https://github.com/tilo/smarter_csv"
4
+ Gem::Specification.new do |spec|
5
+ spec.name = "smarter_csv"
6
+ spec.version = SmarterCSV::VERSION
7
+ spec.authors = ["Tilo Sloboda"]
8
+ spec.email = ["tilo.sloboda@gmail.com"]
10
9
 
11
- gem.files = `git ls-files`.split($\)
12
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
- gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
- gem.name = "smarter_csv"
15
- gem.require_paths = ["lib"]
16
- gem.requirements = ['csv'] # for CSV.parse() only needed in case we have quoted fields
17
- gem.version = SmarterCSV::VERSION
18
- gem.licenses = ['MIT','GPL-2']
19
- gem.add_development_dependency "rspec"
20
- # gem.add_development_dependency "guard-rspec"
10
+ spec.summary = %q{Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files}
11
+ spec.description = %q{Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys}
12
+ spec.homepage = "https://github.com/tilo/smarter_csv"
13
+ spec.license = 'MIT'
14
+
15
+ spec.files = `git ls-files`.split($\)
16
+ spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+ spec.requirements = ['csv'] # for CSV.parse() only needed in case we have quoted fields
20
+ spec.add_development_dependency "rspec"
21
+ spec.add_development_dependency "simplecov"
22
+ # spec.add_development_dependency "guard-rspec"
23
+
24
+ spec.metadata["homepage_uri"] = spec.homepage
21
25
  end
@@ -0,0 +1,2 @@
1
+ id,col1,col2,col3
2
+ 123,,,
@@ -0,0 +1,2 @@
1
+ id,col1,col2,col3
2
+ 123,,,1
@@ -0,0 +1,2 @@
1
+ THIS,THAT,other
2
+ this,that,other
@@ -1,5 +1,5 @@
1
1
  First Name,Last Name,Reference, Wealth
2
2
  Dan,McAllister,0123,3.5
3
- ,,,,,
3
+ ,,,
4
4
  Miles,O'Brian,2345,3
5
5
  Nancy,Homes,2345,01
@@ -0,0 +1,4 @@
1
+ Year:Make:Model:Length
2
+ 1997:Ford:E350:2,34
3
+ 2000:Mercury:Cougar:2,38
4
+ 2013:Tesla:Model S:4,97
@@ -0,0 +1,4 @@
1
+ Year,Make,Model,Length
2
+ 1997,Ford,E350,2.34
3
+ 2000,Mercury,Cougar,2.38
4
+ 2013,Tesla,Model S,4.97
@@ -0,0 +1,4 @@
1
+ Year|Make|Model|Length
2
+ 1997|Ford|E350|2,34
3
+ 2000|Mercury|Cougar|2,38
4
+ 2013|Tesla|Model S|4,97
File without changes
@@ -0,0 +1,4 @@
1
+ Year Make Model Length
2
+ 1997 Ford E350 2,34
3
+ 2000 Mercury Cougar 2,38
4
+ 2013 Tesla Model S 4,97
@@ -0,0 +1,55 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'blank?' do
4
+ it 'is true for nil' do
5
+ SmarterCSV.send(:blank?, nil).should eq true
6
+ end
7
+
8
+ it 'is true for empty string' do
9
+ SmarterCSV.send(:blank?, '').should eq true
10
+ end
11
+
12
+ it 'is true for blank string' do
13
+ SmarterCSV.send(:blank?, ' ').should eq true
14
+ end
15
+
16
+ it 'is true for tab string' do
17
+ SmarterCSV.send(:blank?, " \t ").should eq true
18
+ end
19
+
20
+ it 'is false for string with content' do
21
+ SmarterCSV.send(:blank?, " 1 ").should eq false
22
+ end
23
+
24
+ it 'is false for numeic values' do
25
+ SmarterCSV.send(:blank?, 1).should eq false
26
+ end
27
+
28
+ describe 'arrays' do
29
+ it 'is true for empty arrays' do
30
+ SmarterCSV.send(:blank?, []).should eq true
31
+ end
32
+
33
+ it 'is true for blank arrays' do
34
+ SmarterCSV.send(:blank?, [nil, '', ' ', " \t "]).should eq true
35
+ end
36
+
37
+ it 'is false for non-blank arrays' do
38
+ SmarterCSV.send(:blank?, [nil, '', ' ', " 1 "]).should eq false
39
+ end
40
+ end
41
+
42
+ describe 'hashes' do
43
+ it 'is true for empty arrays' do
44
+ SmarterCSV.send(:blank?, {}).should eq true
45
+ end
46
+
47
+ it 'is true for blank arrays' do
48
+ SmarterCSV.send(:blank?, {a: nil, b: '', c: ' ', d: " \t "}).should eq true
49
+ end
50
+
51
+ it 'is false for non-blank arrays' do
52
+ SmarterCSV.send(:blank?, {a: nil, b: '', c: ' ', d: " 1 "}).should eq false
53
+ end
54
+ end
55
+ end
@@ -3,7 +3,6 @@ require 'spec_helper'
3
3
  fixture_path = 'spec/fixtures'
4
4
 
5
5
  describe 'process files with line endings explicitly pre-specified' do
6
-
7
6
  it 'should process a file with \n for line endings and within data fields' do
8
7
  sep = "\n"
9
8
  options = {:row_sep => sep}
@@ -83,14 +82,14 @@ describe 'process files with line endings explicitly pre-specified' do
83
82
  data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
84
83
  data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
85
84
  end
86
-
87
85
  end
88
86
 
89
87
  describe 'process files with line endings in automatic mode' do
88
+ let(:options) { { row_sep: :auto } }
90
89
 
91
90
  it 'should process a file with \n for line endings and within data fields' do
92
91
  sep = "\n"
93
- data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv", {:row_sep => :auto})
92
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv", options)
94
93
  data.flatten.size.should == 8
95
94
  data[0][:name].should == "Anfield"
96
95
  data[0][:street].should == "Anfield Road"
@@ -112,7 +111,29 @@ describe 'process files with line endings in automatic mode' do
112
111
 
113
112
  it 'should process a file with \r for line endings and within data fields' do
114
113
  sep = "\r"
115
- data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", {:row_sep => :auto})
114
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", options)
115
+ data.flatten.size.should == 8
116
+ data[0][:name].should == "Anfield"
117
+ data[0][:street].should == "Anfield Road"
118
+ data[0][:city].should == "Liverpool"
119
+ data[1][:name].should == ["Highbury", "Highbury House"].join(sep)
120
+ data[2][:street].should == ["Sir Matt ", "Busby Way"].join(sep)
121
+ data[3][:city].should == ["Newcastle-upon-tyne ", "Tyne and Wear"].join(sep)
122
+ data[4][:name].should == ["White Hart Lane", "(The Lane)"].join(sep)
123
+ data[4][:street].should == ["Bill Nicholson Way ", "748 High Rd"].join(sep)
124
+ data[4][:city].should == ["Tottenham", "London"].join(sep)
125
+ data[5][:name].should == "Stamford Bridge"
126
+ data[5][:street].should == ["Fulham Road", "London"].join(sep)
127
+ data[5][:city].should be_nil
128
+ data[6][:name].should == ["Etihad Stadium", "Rowsley St", "Manchester"].join(sep)
129
+ data[7][:name].should == "Goodison"
130
+ data[7][:street].should == "Goodison Road"
131
+ data[7][:city].should == "Liverpool"
132
+ end
133
+
134
+ it 'also works when auto is given a string' do
135
+ sep = "\r"
136
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", {row_sep: 'auto'})
116
137
  data.flatten.size.should == 8
117
138
  data[0][:name].should == "Anfield"
118
139
  data[0][:street].should == "Anfield Road"
@@ -134,7 +155,7 @@ describe 'process files with line endings in automatic mode' do
134
155
 
135
156
  it 'should process a file with \r\n for line endings and within data fields' do
136
157
  sep = "\r\n"
137
- data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv", {:row_sep => :auto})
158
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv", options)
138
159
  data.flatten.size.should == 8
139
160
  data[0][:name].should == "Anfield"
140
161
  data[0][:street].should == "Anfield Road"
@@ -157,7 +178,7 @@ describe 'process files with line endings in automatic mode' do
157
178
  it 'should process a file with more quoted text carriage return characters (\r) than line ending characters (\n)' do
158
179
  row_sep = "\n"
159
180
  text_sep = "\r"
160
- data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", {:row_sep => :auto})
181
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", options)
161
182
  data.flatten.size.should == 2
162
183
  data[0][:band].should == "New Order"
163
184
  data[0][:members].should == ["Bernard Sumner", "Peter Hook", "Stephen Morris", "Gillian Gilbert"].join(text_sep)
@@ -166,5 +187,4 @@ describe 'process files with line endings in automatic mode' do
166
187
  data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
167
188
  data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
168
189
  end
169
-
170
190
  end
@@ -2,10 +2,94 @@ require 'spec_helper'
2
2
 
3
3
  fixture_path = 'spec/fixtures'
4
4
 
5
- describe 'be_able_to' do
6
- it 'loads_file_with_different_column_separator' do
7
- options = {:col_sep => ';'}
8
- data = SmarterCSV.process("#{fixture_path}/separator.csv", options)
9
- data.flatten.size.should == 3
5
+ describe 'can handle col_sep' do
6
+
7
+ it 'has default of comma as col_sep' do
8
+ data = SmarterCSV.process("#{fixture_path}/separator_comma.csv") # no options
9
+ data.first.keys.size.should == 4
10
+ data.size.should eq 3
11
+ end
12
+
13
+ describe 'with explicitly given col_sep' do
14
+ it 'loads file with comma separator' do
15
+ options = {:col_sep => ','}
16
+ data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
17
+ data.first.keys.size.should == 4
18
+ data.size.should eq 3
19
+ end
20
+
21
+ it 'loads file with tab separator' do
22
+ options = {:col_sep => "\t"}
23
+ data = SmarterCSV.process("#{fixture_path}/separator_tab.csv", options)
24
+ data.first.keys.size.should == 4
25
+ data.size.should eq 3
26
+ end
27
+
28
+ it 'loads file with semi-colon separator' do
29
+ options = {:col_sep => ';'}
30
+ data = SmarterCSV.process("#{fixture_path}/separator_semi.csv", options)
31
+ data.first.keys.size.should == 4
32
+ data.size.should eq 3
33
+ end
34
+
35
+ it 'loads file with colon separator' do
36
+ options = {:col_sep => ':'}
37
+ data = SmarterCSV.process("#{fixture_path}/separator_colon.csv", options)
38
+ data.first.keys.size.should == 4
39
+ data.size.should eq 3
40
+ end
41
+
42
+ it 'loads file with pipe separator' do
43
+ options = {:col_sep => '|'}
44
+ data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", options)
45
+ data.first.keys.size.should == 4
46
+ data.size.should eq 3
47
+ end
48
+ end
49
+
50
+ describe 'auto-detection of separator' do
51
+ options = {col_sep: :auto}
52
+
53
+ it 'auto-detects comma separator and loads data' do
54
+ data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
55
+ data.first.keys.size.should == 4
56
+ data.size.should eq 3
57
+ end
58
+
59
+ it 'auto-detects tab separator and loads data' do
60
+ data = SmarterCSV.process("#{fixture_path}/separator_tab.csv", options)
61
+ data.first.keys.size.should == 4
62
+ data.size.should eq 3
63
+ end
64
+
65
+ it 'auto-detects semi-colon separator and loads data' do
66
+ data = SmarterCSV.process("#{fixture_path}/separator_semi.csv", options)
67
+ data.first.keys.size.should == 4
68
+ data.size.should eq 3
69
+ end
70
+
71
+ it 'auto-detects colon separator and loads data' do
72
+ data = SmarterCSV.process("#{fixture_path}/separator_colon.csv", options)
73
+ data.first.keys.size.should == 4
74
+ data.size.should eq 3
75
+ end
76
+
77
+ it 'auto-detects pipe separator and loads data' do
78
+ data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", options)
79
+ data.first.keys.size.should == 4
80
+ data.size.should eq 3
81
+ end
82
+
83
+ it 'does not auto-detect other separators' do
84
+ expect {
85
+ SmarterCSV.process("#{fixture_path}/binary.csv", options)
86
+ }.to raise_exception SmarterCSV::NoColSepDetected
87
+ end
88
+
89
+ it 'also works when auto is given a string' do
90
+ data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", col_sep: 'auto')
91
+ data.first.keys.size.should == 4
92
+ data.size.should eq 3
93
+ end
10
94
  end
11
95
  end