smarter_csv 1.2.7 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +8 -4
- data/CHANGELOG.md +169 -0
- data/CONTRIBUTORS.md +45 -0
- data/LICENSE.txt +21 -0
- data/README.md +47 -219
- data/Rakefile +8 -15
- data/lib/smarter_csv/smarter_csv.rb +112 -35
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +8 -0
- data/smarter_csv.gemspec +20 -16
- data/spec/fixtures/empty_columns_1.csv +2 -0
- data/spec/fixtures/empty_columns_2.csv +2 -0
- data/spec/fixtures/key_mapping.csv +2 -0
- data/spec/fixtures/numeric.csv +1 -1
- data/spec/fixtures/separator_colon.csv +4 -0
- data/spec/fixtures/separator_comma.csv +4 -0
- data/spec/fixtures/separator_pipe.csv +4 -0
- data/spec/fixtures/{separator.csv → separator_semi.csv} +0 -0
- data/spec/fixtures/separator_tab.csv +4 -0
- data/spec/smarter_csv/blank_spec.rb +55 -0
- data/spec/smarter_csv/carriage_return_spec.rb +27 -7
- data/spec/smarter_csv/column_separator_spec.rb +89 -5
- data/spec/smarter_csv/empty_columns_spec.rb +74 -0
- data/spec/smarter_csv/key_mapping_spec.rb +31 -0
- data/spec/smarter_csv/malformed_spec.rb +0 -4
- metadata +44 -13
@@ -4,41 +4,36 @@ module SmarterCSV
|
|
4
4
|
class IncorrectOption < SmarterCSVException; end
|
5
5
|
class DuplicateHeaders < SmarterCSVException; end
|
6
6
|
class MissingHeaders < SmarterCSVException; end
|
7
|
-
|
7
|
+
class NoColSepDetected < SmarterCSVException; end
|
8
8
|
|
9
9
|
def SmarterCSV.process(input, options={}, &block) # first parameter: filename or input object with readline method
|
10
|
-
default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"', :force_simple_split => false , :verbose => false ,
|
11
|
-
:remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
|
12
|
-
:convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
|
13
|
-
:comment_regexp => /\A#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
|
14
|
-
:remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => '',
|
15
|
-
:auto_row_sep_chars => 500, :required_headers => nil
|
16
|
-
}
|
17
10
|
options = default_options.merge(options)
|
18
11
|
options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
|
19
|
-
|
12
|
+
|
20
13
|
headerA = []
|
21
14
|
result = []
|
22
|
-
old_row_sep =
|
15
|
+
old_row_sep = $INPUT_RECORD_SEPARATOR
|
23
16
|
file_line_count = 0
|
24
17
|
csv_line_count = 0
|
25
18
|
has_rails = !! defined?(Rails)
|
26
19
|
begin
|
27
20
|
f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
|
28
21
|
|
22
|
+
# auto-detect the row separator
|
23
|
+
options[:row_sep] = SmarterCSV.guess_line_ending(f, options) if options[:row_sep].to_sym == :auto
|
24
|
+
$INPUT_RECORD_SEPARATOR = options[:row_sep]
|
25
|
+
# attempt to auto-detect column separator
|
26
|
+
options[:col_sep] = guess_column_separator(f) if options[:col_sep].to_sym == :auto
|
27
|
+
# preserve options, in case we need to call the CSV class
|
28
|
+
csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
|
29
|
+
csv_options.delete(:row_sep) if [nil, :auto].include?( options[:row_sep].to_sym )
|
30
|
+
csv_options.delete(:col_sep) if [nil, :auto].include?( options[:col_sep].to_sym )
|
31
|
+
|
29
32
|
if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && ( f.respond_to?(:external_encoding) && f.external_encoding != Encoding.find('UTF-8') || f.respond_to?(:encoding) && f.encoding != Encoding.find('UTF-8') )
|
30
33
|
puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
|
31
34
|
end
|
32
35
|
|
33
|
-
if options[:
|
34
|
-
options[:row_sep] = line_ending = SmarterCSV.guess_line_ending( f, options )
|
35
|
-
f.rewind
|
36
|
-
end
|
37
|
-
$/ = options[:row_sep]
|
38
|
-
|
39
|
-
if options[:skip_lines].to_i > 0
|
40
|
-
options[:skip_lines].to_i.times{f.readline}
|
41
|
-
end
|
36
|
+
options[:skip_lines].to_i.times{f.readline} if options[:skip_lines].to_i > 0
|
42
37
|
|
43
38
|
if options[:headers_in_file] # extract the header line
|
44
39
|
# process the header line in the CSV file..
|
@@ -53,21 +48,21 @@ module SmarterCSV
|
|
53
48
|
|
54
49
|
if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
55
50
|
file_headerA = begin
|
56
|
-
CSV.parse( header, csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
51
|
+
CSV.parse( header, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
57
52
|
rescue CSV::MalformedCSVError => e
|
58
53
|
raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
|
59
54
|
end
|
60
55
|
else
|
61
56
|
file_headerA = header.split(options[:col_sep])
|
62
57
|
end
|
58
|
+
file_header_size = file_headerA.size # before mapping, which could delete keys
|
59
|
+
|
63
60
|
file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
|
64
61
|
file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
|
65
62
|
unless options[:keep_original_headers]
|
66
63
|
file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
|
67
64
|
file_headerA.map!{|x| x.downcase } if options[:downcase_header]
|
68
65
|
end
|
69
|
-
|
70
|
-
file_header_size = file_headerA.size
|
71
66
|
else
|
72
67
|
raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" if options[:user_provided_headers].nil?
|
73
68
|
end
|
@@ -84,6 +79,8 @@ module SmarterCSV
|
|
84
79
|
else
|
85
80
|
headerA = file_headerA
|
86
81
|
end
|
82
|
+
header_size = headerA.size # used for splitting lines
|
83
|
+
|
87
84
|
headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
|
88
85
|
|
89
86
|
unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
|
@@ -92,7 +89,7 @@ module SmarterCSV
|
|
92
89
|
# do some key mapping on the keys in the file header
|
93
90
|
# if you want to completely delete a key, then map it to nil or to ''
|
94
91
|
if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
95
|
-
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]
|
92
|
+
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
|
96
93
|
end
|
97
94
|
end
|
98
95
|
|
@@ -123,7 +120,7 @@ module SmarterCSV
|
|
123
120
|
|
124
121
|
# now on to processing all the rest of the lines in the CSV file:
|
125
122
|
while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
|
126
|
-
line = f.readline # read one line.. this uses the input_record_separator
|
123
|
+
line = f.readline # read one line.. this uses the input_record_separator $INPUT_RECORD_SEPARATOR which we set previously!
|
127
124
|
|
128
125
|
# replace invalid byte sequence in UTF-8 with question mark to avoid errors
|
129
126
|
line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
@@ -136,8 +133,8 @@ module SmarterCSV
|
|
136
133
|
# cater for the quoted csv data containing the row separator carriage return character
|
137
134
|
# in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
|
138
135
|
# by detecting the existence of an uneven number of quote characters
|
139
|
-
multiline = line.count(options[:quote_char])%2 == 1
|
140
|
-
while line.count(options[:quote_char])%2 == 1
|
136
|
+
multiline = line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
|
137
|
+
while line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
|
141
138
|
next_line = f.readline
|
142
139
|
next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
143
140
|
line += next_line
|
@@ -145,20 +142,26 @@ module SmarterCSV
|
|
145
142
|
end
|
146
143
|
print "\nline contains uneven number of quote chars so including content through file line %d\n" % file_line_count if options[:verbose] && multiline
|
147
144
|
|
148
|
-
line.chomp! # will use
|
145
|
+
line.chomp! # will use $INPUT_RECORD_SEPARATOR which is set to options[:col_sep]
|
149
146
|
|
150
147
|
if (line =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
151
148
|
dataA = begin
|
152
|
-
CSV.parse( line, csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
149
|
+
CSV.parse( line, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
153
150
|
rescue CSV::MalformedCSVError => e
|
154
151
|
raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
|
155
152
|
end
|
156
153
|
else
|
157
|
-
dataA = line.split(options[:col_sep])
|
154
|
+
dataA = line.split(options[:col_sep], header_size)
|
158
155
|
end
|
159
156
|
#### dataA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') } # this is actually not a good idea as a default
|
160
157
|
dataA.map!{|x| x.strip} if options[:strip_whitespace]
|
158
|
+
|
159
|
+
# if all values are blank, then ignore this line
|
160
|
+
# SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
|
161
|
+
next if options[:remove_empty_hashes] && blank?(dataA)
|
162
|
+
|
161
163
|
hash = Hash.zip(headerA,dataA) # from Facets of Ruby library
|
164
|
+
|
162
165
|
# make sure we delete any key/value pairs from the hash, which the user wanted to delete:
|
163
166
|
# Note: Ruby < 1.9 doesn't allow empty symbol literals!
|
164
167
|
hash.delete(nil); hash.delete('');
|
@@ -166,18 +169,17 @@ module SmarterCSV
|
|
166
169
|
eval('hash.delete(:"")')
|
167
170
|
end
|
168
171
|
|
169
|
-
|
170
|
-
# which caters for double \n and \r\n characters such as "1\r\n\r\n2" whereas the original check (v =~ /^\s*$/) does not
|
171
|
-
if options[:remove_empty_values]
|
172
|
+
if options[:remove_empty_values] == true
|
172
173
|
if has_rails
|
173
174
|
hash.delete_if{|k,v| v.blank?}
|
174
175
|
else
|
175
|
-
hash.delete_if{|k,v|
|
176
|
+
hash.delete_if{|k,v| blank?(v)}
|
176
177
|
end
|
177
178
|
end
|
178
179
|
|
179
180
|
hash.delete_if{|k,v| ! v.nil? && v =~ /^(\d+|\d+\.\d+)$/ && v.to_f == 0} if options[:remove_zero_values] # values are typically Strings!
|
180
181
|
hash.delete_if{|k,v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
|
182
|
+
|
181
183
|
if options[:convert_values_to_numeric]
|
182
184
|
hash.each do |k,v|
|
183
185
|
# deal with the :only / :except options to :convert_values_to_numeric
|
@@ -247,7 +249,7 @@ module SmarterCSV
|
|
247
249
|
chunk = [] # initialize for next chunk of data
|
248
250
|
end
|
249
251
|
ensure
|
250
|
-
|
252
|
+
$INPUT_RECORD_SEPARATOR = old_row_sep # make sure this stupid global variable is always reset to it's previous value after we're done!
|
251
253
|
f.close if f.respond_to?(:close)
|
252
254
|
end
|
253
255
|
if block_given?
|
@@ -258,8 +260,63 @@ module SmarterCSV
|
|
258
260
|
end
|
259
261
|
|
260
262
|
private
|
261
|
-
# acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
|
262
263
|
|
264
|
+
def self.default_options
|
265
|
+
{
|
266
|
+
auto_row_sep_chars: 500,
|
267
|
+
chunk_size: nil ,
|
268
|
+
col_sep: ',',
|
269
|
+
comment_regexp: /\A#/,
|
270
|
+
convert_values_to_numeric: true,
|
271
|
+
downcase_header: true,
|
272
|
+
file_encoding: 'utf-8',
|
273
|
+
force_simple_split: false ,
|
274
|
+
force_utf8: false,
|
275
|
+
headers_in_file: true,
|
276
|
+
invalid_byte_sequence: '',
|
277
|
+
keep_original_headers: false,
|
278
|
+
key_mapping_hash: nil ,
|
279
|
+
quote_char: '"',
|
280
|
+
remove_empty_hashes: true ,
|
281
|
+
remove_empty_values: true,
|
282
|
+
remove_unmapped_keys: false,
|
283
|
+
remove_values_matching: nil,
|
284
|
+
remove_zero_values: false,
|
285
|
+
required_headers: nil,
|
286
|
+
row_sep: $INPUT_RECORD_SEPARATOR,
|
287
|
+
skip_lines: nil,
|
288
|
+
strings_as_keys: false,
|
289
|
+
strip_chars_from_headers: nil,
|
290
|
+
strip_whitespace: true,
|
291
|
+
user_provided_headers: nil,
|
292
|
+
value_converters: nil,
|
293
|
+
verbose: false,
|
294
|
+
}
|
295
|
+
end
|
296
|
+
|
297
|
+
def self.blank?(value)
|
298
|
+
case value
|
299
|
+
when Array
|
300
|
+
value.inject(true){|result, x| result &&= elem_blank?(x)}
|
301
|
+
when Hash
|
302
|
+
value.inject(true){|result, x| result &&= elem_blank?(x.last)}
|
303
|
+
else
|
304
|
+
elem_blank?(value)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
def self.elem_blank?(value)
|
309
|
+
case value
|
310
|
+
when NilClass
|
311
|
+
true
|
312
|
+
when String
|
313
|
+
value !~ /\S/
|
314
|
+
else
|
315
|
+
false
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
# acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
|
263
320
|
def self.only_or_except_limit_execution( options, option_name, key )
|
264
321
|
if options[option_name].is_a?(Hash)
|
265
322
|
if options[option_name].has_key?( :except )
|
@@ -271,6 +328,24 @@ module SmarterCSV
|
|
271
328
|
return false
|
272
329
|
end
|
273
330
|
|
331
|
+
# raise exception if none is found
|
332
|
+
def self.guess_column_separator(filehandle)
|
333
|
+
del = [',', "\t", ';', ':', '|']
|
334
|
+
n = Hash.new(0)
|
335
|
+
5.times do
|
336
|
+
line = filehandle.readline
|
337
|
+
del.each do |d|
|
338
|
+
n[d] += line.scan(d).count
|
339
|
+
end
|
340
|
+
rescue EOFError # short files
|
341
|
+
break
|
342
|
+
end
|
343
|
+
filehandle.rewind
|
344
|
+
raise SmarterCSV::NoColSepDetected if n.values.max == 0
|
345
|
+
|
346
|
+
col_sep = n.key(n.values.max)
|
347
|
+
end
|
348
|
+
|
274
349
|
# limitation: this currently reads the whole file in before making a decision
|
275
350
|
def self.guess_line_ending( filehandle, options )
|
276
351
|
counts = {"\n" => 0 , "\r" => 0, "\r\n" => 0}
|
@@ -297,6 +372,8 @@ module SmarterCSV
|
|
297
372
|
lines += 1
|
298
373
|
break if options[:auto_row_sep_chars] && options[:auto_row_sep_chars] > 0 && lines >= options[:auto_row_sep_chars]
|
299
374
|
end
|
375
|
+
filehandle.rewind
|
376
|
+
|
300
377
|
counts["\r"] += 1 if last_char == "\r"
|
301
378
|
# find the key/value pair with the largest counter:
|
302
379
|
k,_ = counts.max_by{|_,v| v}
|
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv.rb
CHANGED
data/smarter_csv.gemspec
CHANGED
@@ -1,21 +1,25 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
require File.expand_path('../lib/smarter_csv/version', __FILE__)
|
3
3
|
|
4
|
-
Gem::Specification.new do |
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
gem.homepage = "https://github.com/tilo/smarter_csv"
|
4
|
+
Gem::Specification.new do |spec|
|
5
|
+
spec.name = "smarter_csv"
|
6
|
+
spec.version = SmarterCSV::VERSION
|
7
|
+
spec.authors = ["Tilo Sloboda"]
|
8
|
+
spec.email = ["tilo.sloboda@gmail.com"]
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
10
|
+
spec.summary = %q{Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files}
|
11
|
+
spec.description = %q{Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys}
|
12
|
+
spec.homepage = "https://github.com/tilo/smarter_csv"
|
13
|
+
spec.license = 'MIT'
|
14
|
+
|
15
|
+
spec.files = `git ls-files`.split($\)
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
spec.requirements = ['csv'] # for CSV.parse() only needed in case we have quoted fields
|
20
|
+
spec.add_development_dependency "rspec"
|
21
|
+
spec.add_development_dependency "simplecov"
|
22
|
+
# spec.add_development_dependency "guard-rspec"
|
23
|
+
|
24
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
21
25
|
end
|
data/spec/fixtures/numeric.csv
CHANGED
File without changes
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'blank?' do
|
4
|
+
it 'is true for nil' do
|
5
|
+
SmarterCSV.send(:blank?, nil).should eq true
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'is true for empty string' do
|
9
|
+
SmarterCSV.send(:blank?, '').should eq true
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'is true for blank string' do
|
13
|
+
SmarterCSV.send(:blank?, ' ').should eq true
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'is true for tab string' do
|
17
|
+
SmarterCSV.send(:blank?, " \t ").should eq true
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'is false for string with content' do
|
21
|
+
SmarterCSV.send(:blank?, " 1 ").should eq false
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'is false for numeic values' do
|
25
|
+
SmarterCSV.send(:blank?, 1).should eq false
|
26
|
+
end
|
27
|
+
|
28
|
+
describe 'arrays' do
|
29
|
+
it 'is true for empty arrays' do
|
30
|
+
SmarterCSV.send(:blank?, []).should eq true
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'is true for blank arrays' do
|
34
|
+
SmarterCSV.send(:blank?, [nil, '', ' ', " \t "]).should eq true
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'is false for non-blank arrays' do
|
38
|
+
SmarterCSV.send(:blank?, [nil, '', ' ', " 1 "]).should eq false
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe 'hashes' do
|
43
|
+
it 'is true for empty arrays' do
|
44
|
+
SmarterCSV.send(:blank?, {}).should eq true
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'is true for blank arrays' do
|
48
|
+
SmarterCSV.send(:blank?, {a: nil, b: '', c: ' ', d: " \t "}).should eq true
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'is false for non-blank arrays' do
|
52
|
+
SmarterCSV.send(:blank?, {a: nil, b: '', c: ' ', d: " 1 "}).should eq false
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -3,7 +3,6 @@ require 'spec_helper'
|
|
3
3
|
fixture_path = 'spec/fixtures'
|
4
4
|
|
5
5
|
describe 'process files with line endings explicitly pre-specified' do
|
6
|
-
|
7
6
|
it 'should process a file with \n for line endings and within data fields' do
|
8
7
|
sep = "\n"
|
9
8
|
options = {:row_sep => sep}
|
@@ -83,14 +82,14 @@ describe 'process files with line endings explicitly pre-specified' do
|
|
83
82
|
data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
|
84
83
|
data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
|
85
84
|
end
|
86
|
-
|
87
85
|
end
|
88
86
|
|
89
87
|
describe 'process files with line endings in automatic mode' do
|
88
|
+
let(:options) { { row_sep: :auto } }
|
90
89
|
|
91
90
|
it 'should process a file with \n for line endings and within data fields' do
|
92
91
|
sep = "\n"
|
93
|
-
data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv",
|
92
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv", options)
|
94
93
|
data.flatten.size.should == 8
|
95
94
|
data[0][:name].should == "Anfield"
|
96
95
|
data[0][:street].should == "Anfield Road"
|
@@ -112,7 +111,29 @@ describe 'process files with line endings in automatic mode' do
|
|
112
111
|
|
113
112
|
it 'should process a file with \r for line endings and within data fields' do
|
114
113
|
sep = "\r"
|
115
|
-
data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv",
|
114
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", options)
|
115
|
+
data.flatten.size.should == 8
|
116
|
+
data[0][:name].should == "Anfield"
|
117
|
+
data[0][:street].should == "Anfield Road"
|
118
|
+
data[0][:city].should == "Liverpool"
|
119
|
+
data[1][:name].should == ["Highbury", "Highbury House"].join(sep)
|
120
|
+
data[2][:street].should == ["Sir Matt ", "Busby Way"].join(sep)
|
121
|
+
data[3][:city].should == ["Newcastle-upon-tyne ", "Tyne and Wear"].join(sep)
|
122
|
+
data[4][:name].should == ["White Hart Lane", "(The Lane)"].join(sep)
|
123
|
+
data[4][:street].should == ["Bill Nicholson Way ", "748 High Rd"].join(sep)
|
124
|
+
data[4][:city].should == ["Tottenham", "London"].join(sep)
|
125
|
+
data[5][:name].should == "Stamford Bridge"
|
126
|
+
data[5][:street].should == ["Fulham Road", "London"].join(sep)
|
127
|
+
data[5][:city].should be_nil
|
128
|
+
data[6][:name].should == ["Etihad Stadium", "Rowsley St", "Manchester"].join(sep)
|
129
|
+
data[7][:name].should == "Goodison"
|
130
|
+
data[7][:street].should == "Goodison Road"
|
131
|
+
data[7][:city].should == "Liverpool"
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'also works when auto is given a string' do
|
135
|
+
sep = "\r"
|
136
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", {row_sep: 'auto'})
|
116
137
|
data.flatten.size.should == 8
|
117
138
|
data[0][:name].should == "Anfield"
|
118
139
|
data[0][:street].should == "Anfield Road"
|
@@ -134,7 +155,7 @@ describe 'process files with line endings in automatic mode' do
|
|
134
155
|
|
135
156
|
it 'should process a file with \r\n for line endings and within data fields' do
|
136
157
|
sep = "\r\n"
|
137
|
-
data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv",
|
158
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv", options)
|
138
159
|
data.flatten.size.should == 8
|
139
160
|
data[0][:name].should == "Anfield"
|
140
161
|
data[0][:street].should == "Anfield Road"
|
@@ -157,7 +178,7 @@ describe 'process files with line endings in automatic mode' do
|
|
157
178
|
it 'should process a file with more quoted text carriage return characters (\r) than line ending characters (\n)' do
|
158
179
|
row_sep = "\n"
|
159
180
|
text_sep = "\r"
|
160
|
-
data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv",
|
181
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", options)
|
161
182
|
data.flatten.size.should == 2
|
162
183
|
data[0][:band].should == "New Order"
|
163
184
|
data[0][:members].should == ["Bernard Sumner", "Peter Hook", "Stephen Morris", "Gillian Gilbert"].join(text_sep)
|
@@ -166,5 +187,4 @@ describe 'process files with line endings in automatic mode' do
|
|
166
187
|
data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
|
167
188
|
data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
|
168
189
|
end
|
169
|
-
|
170
190
|
end
|
@@ -2,10 +2,94 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
fixture_path = 'spec/fixtures'
|
4
4
|
|
5
|
-
describe '
|
6
|
-
|
7
|
-
|
8
|
-
data = SmarterCSV.process("#{fixture_path}/
|
9
|
-
data.
|
5
|
+
describe 'can handle col_sep' do
|
6
|
+
|
7
|
+
it 'has default of comma as col_sep' do
|
8
|
+
data = SmarterCSV.process("#{fixture_path}/separator_comma.csv") # no options
|
9
|
+
data.first.keys.size.should == 4
|
10
|
+
data.size.should eq 3
|
11
|
+
end
|
12
|
+
|
13
|
+
describe 'with explicitly given col_sep' do
|
14
|
+
it 'loads file with comma separator' do
|
15
|
+
options = {:col_sep => ','}
|
16
|
+
data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
|
17
|
+
data.first.keys.size.should == 4
|
18
|
+
data.size.should eq 3
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'loads file with tab separator' do
|
22
|
+
options = {:col_sep => "\t"}
|
23
|
+
data = SmarterCSV.process("#{fixture_path}/separator_tab.csv", options)
|
24
|
+
data.first.keys.size.should == 4
|
25
|
+
data.size.should eq 3
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'loads file with semi-colon separator' do
|
29
|
+
options = {:col_sep => ';'}
|
30
|
+
data = SmarterCSV.process("#{fixture_path}/separator_semi.csv", options)
|
31
|
+
data.first.keys.size.should == 4
|
32
|
+
data.size.should eq 3
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'loads file with colon separator' do
|
36
|
+
options = {:col_sep => ':'}
|
37
|
+
data = SmarterCSV.process("#{fixture_path}/separator_colon.csv", options)
|
38
|
+
data.first.keys.size.should == 4
|
39
|
+
data.size.should eq 3
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'loads file with pipe separator' do
|
43
|
+
options = {:col_sep => '|'}
|
44
|
+
data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", options)
|
45
|
+
data.first.keys.size.should == 4
|
46
|
+
data.size.should eq 3
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe 'auto-detection of separator' do
|
51
|
+
options = {col_sep: :auto}
|
52
|
+
|
53
|
+
it 'auto-detects comma separator and loads data' do
|
54
|
+
data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
|
55
|
+
data.first.keys.size.should == 4
|
56
|
+
data.size.should eq 3
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'auto-detects tab separator and loads data' do
|
60
|
+
data = SmarterCSV.process("#{fixture_path}/separator_tab.csv", options)
|
61
|
+
data.first.keys.size.should == 4
|
62
|
+
data.size.should eq 3
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'auto-detects semi-colon separator and loads data' do
|
66
|
+
data = SmarterCSV.process("#{fixture_path}/separator_semi.csv", options)
|
67
|
+
data.first.keys.size.should == 4
|
68
|
+
data.size.should eq 3
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'auto-detects colon separator and loads data' do
|
72
|
+
data = SmarterCSV.process("#{fixture_path}/separator_colon.csv", options)
|
73
|
+
data.first.keys.size.should == 4
|
74
|
+
data.size.should eq 3
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'auto-detects pipe separator and loads data' do
|
78
|
+
data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", options)
|
79
|
+
data.first.keys.size.should == 4
|
80
|
+
data.size.should eq 3
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'does not auto-detect other separators' do
|
84
|
+
expect {
|
85
|
+
SmarterCSV.process("#{fixture_path}/binary.csv", options)
|
86
|
+
}.to raise_exception SmarterCSV::NoColSepDetected
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'also works when auto is given a string' do
|
90
|
+
data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", col_sep: 'auto')
|
91
|
+
data.first.keys.size.should == 4
|
92
|
+
data.size.should eq 3
|
93
|
+
end
|
10
94
|
end
|
11
95
|
end
|