smarter_csv 1.2.7 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +8 -4
- data/CHANGELOG.md +169 -0
- data/CONTRIBUTORS.md +45 -0
- data/LICENSE.txt +21 -0
- data/README.md +47 -219
- data/Rakefile +8 -15
- data/lib/smarter_csv/smarter_csv.rb +112 -35
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +8 -0
- data/smarter_csv.gemspec +20 -16
- data/spec/fixtures/empty_columns_1.csv +2 -0
- data/spec/fixtures/empty_columns_2.csv +2 -0
- data/spec/fixtures/key_mapping.csv +2 -0
- data/spec/fixtures/numeric.csv +1 -1
- data/spec/fixtures/separator_colon.csv +4 -0
- data/spec/fixtures/separator_comma.csv +4 -0
- data/spec/fixtures/separator_pipe.csv +4 -0
- data/spec/fixtures/{separator.csv → separator_semi.csv} +0 -0
- data/spec/fixtures/separator_tab.csv +4 -0
- data/spec/smarter_csv/blank_spec.rb +55 -0
- data/spec/smarter_csv/carriage_return_spec.rb +27 -7
- data/spec/smarter_csv/column_separator_spec.rb +89 -5
- data/spec/smarter_csv/empty_columns_spec.rb +74 -0
- data/spec/smarter_csv/key_mapping_spec.rb +31 -0
- data/spec/smarter_csv/malformed_spec.rb +0 -4
- metadata +44 -13
@@ -4,41 +4,36 @@ module SmarterCSV
|
|
4
4
|
class IncorrectOption < SmarterCSVException; end
|
5
5
|
class DuplicateHeaders < SmarterCSVException; end
|
6
6
|
class MissingHeaders < SmarterCSVException; end
|
7
|
-
|
7
|
+
class NoColSepDetected < SmarterCSVException; end
|
8
8
|
|
9
9
|
def SmarterCSV.process(input, options={}, &block) # first parameter: filename or input object with readline method
|
10
|
-
default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"', :force_simple_split => false , :verbose => false ,
|
11
|
-
:remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
|
12
|
-
:convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
|
13
|
-
:comment_regexp => /\A#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
|
14
|
-
:remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => '',
|
15
|
-
:auto_row_sep_chars => 500, :required_headers => nil
|
16
|
-
}
|
17
10
|
options = default_options.merge(options)
|
18
11
|
options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
|
19
|
-
|
12
|
+
|
20
13
|
headerA = []
|
21
14
|
result = []
|
22
|
-
old_row_sep =
|
15
|
+
old_row_sep = $INPUT_RECORD_SEPARATOR
|
23
16
|
file_line_count = 0
|
24
17
|
csv_line_count = 0
|
25
18
|
has_rails = !! defined?(Rails)
|
26
19
|
begin
|
27
20
|
f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
|
28
21
|
|
22
|
+
# auto-detect the row separator
|
23
|
+
options[:row_sep] = SmarterCSV.guess_line_ending(f, options) if options[:row_sep].to_sym == :auto
|
24
|
+
$INPUT_RECORD_SEPARATOR = options[:row_sep]
|
25
|
+
# attempt to auto-detect column separator
|
26
|
+
options[:col_sep] = guess_column_separator(f) if options[:col_sep].to_sym == :auto
|
27
|
+
# preserve options, in case we need to call the CSV class
|
28
|
+
csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
|
29
|
+
csv_options.delete(:row_sep) if [nil, :auto].include?( options[:row_sep].to_sym )
|
30
|
+
csv_options.delete(:col_sep) if [nil, :auto].include?( options[:col_sep].to_sym )
|
31
|
+
|
29
32
|
if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && ( f.respond_to?(:external_encoding) && f.external_encoding != Encoding.find('UTF-8') || f.respond_to?(:encoding) && f.encoding != Encoding.find('UTF-8') )
|
30
33
|
puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
|
31
34
|
end
|
32
35
|
|
33
|
-
if options[:
|
34
|
-
options[:row_sep] = line_ending = SmarterCSV.guess_line_ending( f, options )
|
35
|
-
f.rewind
|
36
|
-
end
|
37
|
-
$/ = options[:row_sep]
|
38
|
-
|
39
|
-
if options[:skip_lines].to_i > 0
|
40
|
-
options[:skip_lines].to_i.times{f.readline}
|
41
|
-
end
|
36
|
+
options[:skip_lines].to_i.times{f.readline} if options[:skip_lines].to_i > 0
|
42
37
|
|
43
38
|
if options[:headers_in_file] # extract the header line
|
44
39
|
# process the header line in the CSV file..
|
@@ -53,21 +48,21 @@ module SmarterCSV
|
|
53
48
|
|
54
49
|
if (header =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
55
50
|
file_headerA = begin
|
56
|
-
CSV.parse( header, csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
51
|
+
CSV.parse( header, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
57
52
|
rescue CSV::MalformedCSVError => e
|
58
53
|
raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
|
59
54
|
end
|
60
55
|
else
|
61
56
|
file_headerA = header.split(options[:col_sep])
|
62
57
|
end
|
58
|
+
file_header_size = file_headerA.size # before mapping, which could delete keys
|
59
|
+
|
63
60
|
file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
|
64
61
|
file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
|
65
62
|
unless options[:keep_original_headers]
|
66
63
|
file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
|
67
64
|
file_headerA.map!{|x| x.downcase } if options[:downcase_header]
|
68
65
|
end
|
69
|
-
|
70
|
-
file_header_size = file_headerA.size
|
71
66
|
else
|
72
67
|
raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" if options[:user_provided_headers].nil?
|
73
68
|
end
|
@@ -84,6 +79,8 @@ module SmarterCSV
|
|
84
79
|
else
|
85
80
|
headerA = file_headerA
|
86
81
|
end
|
82
|
+
header_size = headerA.size # used for splitting lines
|
83
|
+
|
87
84
|
headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
|
88
85
|
|
89
86
|
unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
|
@@ -92,7 +89,7 @@ module SmarterCSV
|
|
92
89
|
# do some key mapping on the keys in the file header
|
93
90
|
# if you want to completely delete a key, then map it to nil or to ''
|
94
91
|
if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
95
|
-
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]
|
92
|
+
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
|
96
93
|
end
|
97
94
|
end
|
98
95
|
|
@@ -123,7 +120,7 @@ module SmarterCSV
|
|
123
120
|
|
124
121
|
# now on to processing all the rest of the lines in the CSV file:
|
125
122
|
while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
|
126
|
-
line = f.readline # read one line.. this uses the input_record_separator
|
123
|
+
line = f.readline # read one line.. this uses the input_record_separator $INPUT_RECORD_SEPARATOR which we set previously!
|
127
124
|
|
128
125
|
# replace invalid byte sequence in UTF-8 with question mark to avoid errors
|
129
126
|
line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
@@ -136,8 +133,8 @@ module SmarterCSV
|
|
136
133
|
# cater for the quoted csv data containing the row separator carriage return character
|
137
134
|
# in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
|
138
135
|
# by detecting the existence of an uneven number of quote characters
|
139
|
-
multiline = line.count(options[:quote_char])%2 == 1
|
140
|
-
while line.count(options[:quote_char])%2 == 1
|
136
|
+
multiline = line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
|
137
|
+
while line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
|
141
138
|
next_line = f.readline
|
142
139
|
next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
143
140
|
line += next_line
|
@@ -145,20 +142,26 @@ module SmarterCSV
|
|
145
142
|
end
|
146
143
|
print "\nline contains uneven number of quote chars so including content through file line %d\n" % file_line_count if options[:verbose] && multiline
|
147
144
|
|
148
|
-
line.chomp! # will use
|
145
|
+
line.chomp! # will use $INPUT_RECORD_SEPARATOR which is set to options[:col_sep]
|
149
146
|
|
150
147
|
if (line =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
|
151
148
|
dataA = begin
|
152
|
-
CSV.parse( line, csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
149
|
+
CSV.parse( line, **csv_options ).flatten.collect!{|x| x.nil? ? '' : x} # to deal with nil values from CSV.parse
|
153
150
|
rescue CSV::MalformedCSVError => e
|
154
151
|
raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
|
155
152
|
end
|
156
153
|
else
|
157
|
-
dataA = line.split(options[:col_sep])
|
154
|
+
dataA = line.split(options[:col_sep], header_size)
|
158
155
|
end
|
159
156
|
#### dataA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') } # this is actually not a good idea as a default
|
160
157
|
dataA.map!{|x| x.strip} if options[:strip_whitespace]
|
158
|
+
|
159
|
+
# if all values are blank, then ignore this line
|
160
|
+
# SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
|
161
|
+
next if options[:remove_empty_hashes] && blank?(dataA)
|
162
|
+
|
161
163
|
hash = Hash.zip(headerA,dataA) # from Facets of Ruby library
|
164
|
+
|
162
165
|
# make sure we delete any key/value pairs from the hash, which the user wanted to delete:
|
163
166
|
# Note: Ruby < 1.9 doesn't allow empty symbol literals!
|
164
167
|
hash.delete(nil); hash.delete('');
|
@@ -166,18 +169,17 @@ module SmarterCSV
|
|
166
169
|
eval('hash.delete(:"")')
|
167
170
|
end
|
168
171
|
|
169
|
-
|
170
|
-
# which caters for double \n and \r\n characters such as "1\r\n\r\n2" whereas the original check (v =~ /^\s*$/) does not
|
171
|
-
if options[:remove_empty_values]
|
172
|
+
if options[:remove_empty_values] == true
|
172
173
|
if has_rails
|
173
174
|
hash.delete_if{|k,v| v.blank?}
|
174
175
|
else
|
175
|
-
hash.delete_if{|k,v|
|
176
|
+
hash.delete_if{|k,v| blank?(v)}
|
176
177
|
end
|
177
178
|
end
|
178
179
|
|
179
180
|
hash.delete_if{|k,v| ! v.nil? && v =~ /^(\d+|\d+\.\d+)$/ && v.to_f == 0} if options[:remove_zero_values] # values are typically Strings!
|
180
181
|
hash.delete_if{|k,v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
|
182
|
+
|
181
183
|
if options[:convert_values_to_numeric]
|
182
184
|
hash.each do |k,v|
|
183
185
|
# deal with the :only / :except options to :convert_values_to_numeric
|
@@ -247,7 +249,7 @@ module SmarterCSV
|
|
247
249
|
chunk = [] # initialize for next chunk of data
|
248
250
|
end
|
249
251
|
ensure
|
250
|
-
|
252
|
+
$INPUT_RECORD_SEPARATOR = old_row_sep # make sure this stupid global variable is always reset to it's previous value after we're done!
|
251
253
|
f.close if f.respond_to?(:close)
|
252
254
|
end
|
253
255
|
if block_given?
|
@@ -258,8 +260,63 @@ module SmarterCSV
|
|
258
260
|
end
|
259
261
|
|
260
262
|
private
|
261
|
-
# acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
|
262
263
|
|
264
|
+
def self.default_options
|
265
|
+
{
|
266
|
+
auto_row_sep_chars: 500,
|
267
|
+
chunk_size: nil ,
|
268
|
+
col_sep: ',',
|
269
|
+
comment_regexp: /\A#/,
|
270
|
+
convert_values_to_numeric: true,
|
271
|
+
downcase_header: true,
|
272
|
+
file_encoding: 'utf-8',
|
273
|
+
force_simple_split: false ,
|
274
|
+
force_utf8: false,
|
275
|
+
headers_in_file: true,
|
276
|
+
invalid_byte_sequence: '',
|
277
|
+
keep_original_headers: false,
|
278
|
+
key_mapping_hash: nil ,
|
279
|
+
quote_char: '"',
|
280
|
+
remove_empty_hashes: true ,
|
281
|
+
remove_empty_values: true,
|
282
|
+
remove_unmapped_keys: false,
|
283
|
+
remove_values_matching: nil,
|
284
|
+
remove_zero_values: false,
|
285
|
+
required_headers: nil,
|
286
|
+
row_sep: $INPUT_RECORD_SEPARATOR,
|
287
|
+
skip_lines: nil,
|
288
|
+
strings_as_keys: false,
|
289
|
+
strip_chars_from_headers: nil,
|
290
|
+
strip_whitespace: true,
|
291
|
+
user_provided_headers: nil,
|
292
|
+
value_converters: nil,
|
293
|
+
verbose: false,
|
294
|
+
}
|
295
|
+
end
|
296
|
+
|
297
|
+
def self.blank?(value)
|
298
|
+
case value
|
299
|
+
when Array
|
300
|
+
value.inject(true){|result, x| result &&= elem_blank?(x)}
|
301
|
+
when Hash
|
302
|
+
value.inject(true){|result, x| result &&= elem_blank?(x.last)}
|
303
|
+
else
|
304
|
+
elem_blank?(value)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
def self.elem_blank?(value)
|
309
|
+
case value
|
310
|
+
when NilClass
|
311
|
+
true
|
312
|
+
when String
|
313
|
+
value !~ /\S/
|
314
|
+
else
|
315
|
+
false
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
# acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
|
263
320
|
def self.only_or_except_limit_execution( options, option_name, key )
|
264
321
|
if options[option_name].is_a?(Hash)
|
265
322
|
if options[option_name].has_key?( :except )
|
@@ -271,6 +328,24 @@ module SmarterCSV
|
|
271
328
|
return false
|
272
329
|
end
|
273
330
|
|
331
|
+
# raise exception if none is found
|
332
|
+
def self.guess_column_separator(filehandle)
|
333
|
+
del = [',', "\t", ';', ':', '|']
|
334
|
+
n = Hash.new(0)
|
335
|
+
5.times do
|
336
|
+
line = filehandle.readline
|
337
|
+
del.each do |d|
|
338
|
+
n[d] += line.scan(d).count
|
339
|
+
end
|
340
|
+
rescue EOFError # short files
|
341
|
+
break
|
342
|
+
end
|
343
|
+
filehandle.rewind
|
344
|
+
raise SmarterCSV::NoColSepDetected if n.values.max == 0
|
345
|
+
|
346
|
+
col_sep = n.key(n.values.max)
|
347
|
+
end
|
348
|
+
|
274
349
|
# limitation: this currently reads the whole file in before making a decision
|
275
350
|
def self.guess_line_ending( filehandle, options )
|
276
351
|
counts = {"\n" => 0 , "\r" => 0, "\r\n" => 0}
|
@@ -297,6 +372,8 @@ module SmarterCSV
|
|
297
372
|
lines += 1
|
298
373
|
break if options[:auto_row_sep_chars] && options[:auto_row_sep_chars] > 0 && lines >= options[:auto_row_sep_chars]
|
299
374
|
end
|
375
|
+
filehandle.rewind
|
376
|
+
|
300
377
|
counts["\r"] += 1 if last_char == "\r"
|
301
378
|
# find the key/value pair with the largest counter:
|
302
379
|
k,_ = counts.max_by{|_,v| v}
|
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv.rb
CHANGED
data/smarter_csv.gemspec
CHANGED
@@ -1,21 +1,25 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
require File.expand_path('../lib/smarter_csv/version', __FILE__)
|
3
3
|
|
4
|
-
Gem::Specification.new do |
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
gem.homepage = "https://github.com/tilo/smarter_csv"
|
4
|
+
Gem::Specification.new do |spec|
|
5
|
+
spec.name = "smarter_csv"
|
6
|
+
spec.version = SmarterCSV::VERSION
|
7
|
+
spec.authors = ["Tilo Sloboda"]
|
8
|
+
spec.email = ["tilo.sloboda@gmail.com"]
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
10
|
+
spec.summary = %q{Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files}
|
11
|
+
spec.description = %q{Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys}
|
12
|
+
spec.homepage = "https://github.com/tilo/smarter_csv"
|
13
|
+
spec.license = 'MIT'
|
14
|
+
|
15
|
+
spec.files = `git ls-files`.split($\)
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
spec.requirements = ['csv'] # for CSV.parse() only needed in case we have quoted fields
|
20
|
+
spec.add_development_dependency "rspec"
|
21
|
+
spec.add_development_dependency "simplecov"
|
22
|
+
# spec.add_development_dependency "guard-rspec"
|
23
|
+
|
24
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
21
25
|
end
|
data/spec/fixtures/numeric.csv
CHANGED
File without changes
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'blank?' do
|
4
|
+
it 'is true for nil' do
|
5
|
+
SmarterCSV.send(:blank?, nil).should eq true
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'is true for empty string' do
|
9
|
+
SmarterCSV.send(:blank?, '').should eq true
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'is true for blank string' do
|
13
|
+
SmarterCSV.send(:blank?, ' ').should eq true
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'is true for tab string' do
|
17
|
+
SmarterCSV.send(:blank?, " \t ").should eq true
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'is false for string with content' do
|
21
|
+
SmarterCSV.send(:blank?, " 1 ").should eq false
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'is false for numeic values' do
|
25
|
+
SmarterCSV.send(:blank?, 1).should eq false
|
26
|
+
end
|
27
|
+
|
28
|
+
describe 'arrays' do
|
29
|
+
it 'is true for empty arrays' do
|
30
|
+
SmarterCSV.send(:blank?, []).should eq true
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'is true for blank arrays' do
|
34
|
+
SmarterCSV.send(:blank?, [nil, '', ' ', " \t "]).should eq true
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'is false for non-blank arrays' do
|
38
|
+
SmarterCSV.send(:blank?, [nil, '', ' ', " 1 "]).should eq false
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe 'hashes' do
|
43
|
+
it 'is true for empty arrays' do
|
44
|
+
SmarterCSV.send(:blank?, {}).should eq true
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'is true for blank arrays' do
|
48
|
+
SmarterCSV.send(:blank?, {a: nil, b: '', c: ' ', d: " \t "}).should eq true
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'is false for non-blank arrays' do
|
52
|
+
SmarterCSV.send(:blank?, {a: nil, b: '', c: ' ', d: " 1 "}).should eq false
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -3,7 +3,6 @@ require 'spec_helper'
|
|
3
3
|
fixture_path = 'spec/fixtures'
|
4
4
|
|
5
5
|
describe 'process files with line endings explicitly pre-specified' do
|
6
|
-
|
7
6
|
it 'should process a file with \n for line endings and within data fields' do
|
8
7
|
sep = "\n"
|
9
8
|
options = {:row_sep => sep}
|
@@ -83,14 +82,14 @@ describe 'process files with line endings explicitly pre-specified' do
|
|
83
82
|
data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
|
84
83
|
data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
|
85
84
|
end
|
86
|
-
|
87
85
|
end
|
88
86
|
|
89
87
|
describe 'process files with line endings in automatic mode' do
|
88
|
+
let(:options) { { row_sep: :auto } }
|
90
89
|
|
91
90
|
it 'should process a file with \n for line endings and within data fields' do
|
92
91
|
sep = "\n"
|
93
|
-
data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv",
|
92
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv", options)
|
94
93
|
data.flatten.size.should == 8
|
95
94
|
data[0][:name].should == "Anfield"
|
96
95
|
data[0][:street].should == "Anfield Road"
|
@@ -112,7 +111,29 @@ describe 'process files with line endings in automatic mode' do
|
|
112
111
|
|
113
112
|
it 'should process a file with \r for line endings and within data fields' do
|
114
113
|
sep = "\r"
|
115
|
-
data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv",
|
114
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", options)
|
115
|
+
data.flatten.size.should == 8
|
116
|
+
data[0][:name].should == "Anfield"
|
117
|
+
data[0][:street].should == "Anfield Road"
|
118
|
+
data[0][:city].should == "Liverpool"
|
119
|
+
data[1][:name].should == ["Highbury", "Highbury House"].join(sep)
|
120
|
+
data[2][:street].should == ["Sir Matt ", "Busby Way"].join(sep)
|
121
|
+
data[3][:city].should == ["Newcastle-upon-tyne ", "Tyne and Wear"].join(sep)
|
122
|
+
data[4][:name].should == ["White Hart Lane", "(The Lane)"].join(sep)
|
123
|
+
data[4][:street].should == ["Bill Nicholson Way ", "748 High Rd"].join(sep)
|
124
|
+
data[4][:city].should == ["Tottenham", "London"].join(sep)
|
125
|
+
data[5][:name].should == "Stamford Bridge"
|
126
|
+
data[5][:street].should == ["Fulham Road", "London"].join(sep)
|
127
|
+
data[5][:city].should be_nil
|
128
|
+
data[6][:name].should == ["Etihad Stadium", "Rowsley St", "Manchester"].join(sep)
|
129
|
+
data[7][:name].should == "Goodison"
|
130
|
+
data[7][:street].should == "Goodison Road"
|
131
|
+
data[7][:city].should == "Liverpool"
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'also works when auto is given a string' do
|
135
|
+
sep = "\r"
|
136
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", {row_sep: 'auto'})
|
116
137
|
data.flatten.size.should == 8
|
117
138
|
data[0][:name].should == "Anfield"
|
118
139
|
data[0][:street].should == "Anfield Road"
|
@@ -134,7 +155,7 @@ describe 'process files with line endings in automatic mode' do
|
|
134
155
|
|
135
156
|
it 'should process a file with \r\n for line endings and within data fields' do
|
136
157
|
sep = "\r\n"
|
137
|
-
data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv",
|
158
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv", options)
|
138
159
|
data.flatten.size.should == 8
|
139
160
|
data[0][:name].should == "Anfield"
|
140
161
|
data[0][:street].should == "Anfield Road"
|
@@ -157,7 +178,7 @@ describe 'process files with line endings in automatic mode' do
|
|
157
178
|
it 'should process a file with more quoted text carriage return characters (\r) than line ending characters (\n)' do
|
158
179
|
row_sep = "\n"
|
159
180
|
text_sep = "\r"
|
160
|
-
data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv",
|
181
|
+
data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", options)
|
161
182
|
data.flatten.size.should == 2
|
162
183
|
data[0][:band].should == "New Order"
|
163
184
|
data[0][:members].should == ["Bernard Sumner", "Peter Hook", "Stephen Morris", "Gillian Gilbert"].join(text_sep)
|
@@ -166,5 +187,4 @@ describe 'process files with line endings in automatic mode' do
|
|
166
187
|
data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
|
167
188
|
data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
|
168
189
|
end
|
169
|
-
|
170
190
|
end
|
@@ -2,10 +2,94 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
fixture_path = 'spec/fixtures'
|
4
4
|
|
5
|
-
describe '
|
6
|
-
|
7
|
-
|
8
|
-
data = SmarterCSV.process("#{fixture_path}/
|
9
|
-
data.
|
5
|
+
describe 'can handle col_sep' do
|
6
|
+
|
7
|
+
it 'has default of comma as col_sep' do
|
8
|
+
data = SmarterCSV.process("#{fixture_path}/separator_comma.csv") # no options
|
9
|
+
data.first.keys.size.should == 4
|
10
|
+
data.size.should eq 3
|
11
|
+
end
|
12
|
+
|
13
|
+
describe 'with explicitly given col_sep' do
|
14
|
+
it 'loads file with comma separator' do
|
15
|
+
options = {:col_sep => ','}
|
16
|
+
data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
|
17
|
+
data.first.keys.size.should == 4
|
18
|
+
data.size.should eq 3
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'loads file with tab separator' do
|
22
|
+
options = {:col_sep => "\t"}
|
23
|
+
data = SmarterCSV.process("#{fixture_path}/separator_tab.csv", options)
|
24
|
+
data.first.keys.size.should == 4
|
25
|
+
data.size.should eq 3
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'loads file with semi-colon separator' do
|
29
|
+
options = {:col_sep => ';'}
|
30
|
+
data = SmarterCSV.process("#{fixture_path}/separator_semi.csv", options)
|
31
|
+
data.first.keys.size.should == 4
|
32
|
+
data.size.should eq 3
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'loads file with colon separator' do
|
36
|
+
options = {:col_sep => ':'}
|
37
|
+
data = SmarterCSV.process("#{fixture_path}/separator_colon.csv", options)
|
38
|
+
data.first.keys.size.should == 4
|
39
|
+
data.size.should eq 3
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'loads file with pipe separator' do
|
43
|
+
options = {:col_sep => '|'}
|
44
|
+
data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", options)
|
45
|
+
data.first.keys.size.should == 4
|
46
|
+
data.size.should eq 3
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe 'auto-detection of separator' do
|
51
|
+
options = {col_sep: :auto}
|
52
|
+
|
53
|
+
it 'auto-detects comma separator and loads data' do
|
54
|
+
data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
|
55
|
+
data.first.keys.size.should == 4
|
56
|
+
data.size.should eq 3
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'auto-detects tab separator and loads data' do
|
60
|
+
data = SmarterCSV.process("#{fixture_path}/separator_tab.csv", options)
|
61
|
+
data.first.keys.size.should == 4
|
62
|
+
data.size.should eq 3
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'auto-detects semi-colon separator and loads data' do
|
66
|
+
data = SmarterCSV.process("#{fixture_path}/separator_semi.csv", options)
|
67
|
+
data.first.keys.size.should == 4
|
68
|
+
data.size.should eq 3
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'auto-detects colon separator and loads data' do
|
72
|
+
data = SmarterCSV.process("#{fixture_path}/separator_colon.csv", options)
|
73
|
+
data.first.keys.size.should == 4
|
74
|
+
data.size.should eq 3
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'auto-detects pipe separator and loads data' do
|
78
|
+
data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", options)
|
79
|
+
data.first.keys.size.should == 4
|
80
|
+
data.size.should eq 3
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'does not auto-detect other separators' do
|
84
|
+
expect {
|
85
|
+
SmarterCSV.process("#{fixture_path}/binary.csv", options)
|
86
|
+
}.to raise_exception SmarterCSV::NoColSepDetected
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'also works when auto is given a string' do
|
90
|
+
data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", col_sep: 'auto')
|
91
|
+
data.first.keys.size.should == 4
|
92
|
+
data.size.should eq 3
|
93
|
+
end
|
10
94
|
end
|
11
95
|
end
|