smarter_csv 1.2.0 → 1.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +8 -4
- data/Gemfile +0 -1
- data/README.md +64 -21
- data/lib/smarter_csv/smarter_csv.rb +21 -14
- data/lib/smarter_csv/version.rb +1 -1
- data/spec/fixtures/ignore_comments.csv +11 -0
- data/spec/fixtures/ignore_comments2.csv +3 -0
- data/spec/fixtures/problematic.csv +8 -0
- data/spec/fixtures/quote_char.csv +9 -0
- data/spec/fixtures/quoted2.csv +4 -0
- data/spec/fixtures/trading.csv +3 -0
- data/spec/smarter_csv/ignore_comments_spec.rb +30 -0
- data/spec/smarter_csv/problematic.rb +34 -0
- data/spec/smarter_csv/quoted_spec.rb +27 -2
- data/spec/smarter_csv/trading_spec.rb +25 -0
- metadata +26 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: c912f58ae42fab60cb40e5a708e1551efda1a4abf4f5ad88e79f5803b5c84137
|
4
|
+
data.tar.gz: 438245fc8061b621a15ad2c5f7cc16d0ea9dbaa7049f60c806d88673eaeffbe0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ab27854092a7b93aee3f596eeb5e3757cdfe0cf15fa95ff1e131268e02ce934fbbe2e7f25d786014f04222e8e5147821122f84fed9a9178bae555a13bafb505
|
7
|
+
data.tar.gz: 10129052b5e02a7b48a5e37aeb073abaceb43c526e05de51f84442d9a8f059caf3db530b43c59b44716eb688d8b76a1429b4f0a1e317375f8fc480004933999c
|
data/.travis.yml
CHANGED
@@ -6,10 +6,12 @@ before_install:
|
|
6
6
|
|
7
7
|
matrix:
|
8
8
|
include:
|
9
|
-
- rvm: 2.2.
|
10
|
-
- rvm: 2.3.
|
11
|
-
- rvm: 2.4.
|
12
|
-
- rvm:
|
9
|
+
- rvm: 2.2.10
|
10
|
+
- rvm: 2.3.8
|
11
|
+
- rvm: 2.4.6
|
12
|
+
- rvm: 2.5.5
|
13
|
+
- rvm: 2.6.3
|
14
|
+
- rvm: jruby-9.2.7.0
|
13
15
|
env:
|
14
16
|
- JRUBY_OPTS="--server -Xcompile.invokedynamic=false -J-XX:+TieredCompilation -J-XX:TieredStopAtLevel=1 -J-noverify -J-Xms512m -J-Xmx1024m"
|
15
17
|
- rvm: ruby-head
|
@@ -17,3 +19,5 @@ matrix:
|
|
17
19
|
branches:
|
18
20
|
only:
|
19
21
|
- master
|
22
|
+
- 1.2-stable
|
23
|
+
- 2.0-develop
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -2,6 +2,19 @@
|
|
2
2
|
|
3
3
|
[](http://travis-ci.org/tilo/smarter_csv) [](http://badge.fury.io/rb/smarter_csv)
|
4
4
|
|
5
|
+
---------------
|
6
|
+
#### Service Announcement
|
7
|
+
|
8
|
+
Work towards SmarterCSV 2.0 is on it's way, with much improved features, and more streamlined options.
|
9
|
+
|
10
|
+
Please check the 2.0-develop branch, and open issues marked v2.0 and leave your comments.
|
11
|
+
|
12
|
+
New versions on the 1.2 branch will soon print a deprecation warning if you set :verbose to true
|
13
|
+
See below for list of deprecated options.
|
14
|
+
|
15
|
+
---------------
|
16
|
+
#### SmarterCSV
|
17
|
+
|
5
18
|
`smarter_csv` is a Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, suitable for direct processing with Mongoid or ActiveRecord,
|
6
19
|
and parallel processing with Resque or Sidekiq.
|
7
20
|
|
@@ -182,18 +195,48 @@ The options and the block are optional.
|
|
182
195
|
|
183
196
|
`SmarterCSV.process` supports the following options:
|
184
197
|
|
198
|
+
#### Options:
|
199
|
+
|
185
200
|
| Option | Default | Explanation |
|
186
201
|
---------------------------------------------------------------------------------------------------------------------------------
|
202
|
+
| :chunk_size | nil | if set, determines the desired chunk-size (defaults to nil, no chunk processing) |
|
203
|
+
| | | |
|
204
|
+
| :file_encoding | utf-8 | Set the file encoding eg.: 'windows-1252' or 'iso-8859-1' |
|
205
|
+
| :invalid_byte_sequence | '' | what to replace invalid byte sequences with |
|
206
|
+
| :force_utf8 | false | force UTF-8 encoding of all lines (including headers) in the CSV file |
|
207
|
+
| :skip_lines | nil | how many lines to skip before the first line or header line is processed |
|
208
|
+
| :comment_regexp | /^#/ | regular expression which matches comment lines (see NOTE about the CSV header) |
|
209
|
+
---------------------------------------------------------------------------------------------------------------------------------
|
187
210
|
| :col_sep | ',' | column separator |
|
211
|
+
| :force_simple_split | false | force simple splitting on :col_sep character for non-standard CSV-files. |
|
212
|
+
| | | e.g. when :quote_char is not properly escaped |
|
188
213
|
| :row_sep | $/ ,"\n" | row separator or record separator , defaults to system's $/ , which defaults to "\n" |
|
189
214
|
| | | This can also be set to :auto, but will process the whole cvs file first (slow!) |
|
190
215
|
| :auto_row_sep_chars | 500 | How many characters to analyze when using `:row_sep => :auto`. nil or 0 means whole file. |
|
191
216
|
| :quote_char | '"' | quotation character |
|
192
|
-
|
193
|
-
| :
|
217
|
+
---------------------------------------------------------------------------------------------------------------------------------
|
218
|
+
| :headers_in_file | true | Whether or not the file contains headers as the first line. |
|
219
|
+
| | | Important if the file does not contain headers, |
|
220
|
+
| | | otherwise you would lose the first line of data. |
|
221
|
+
| :user_provided_headers | nil | *careful with that axe!* |
|
222
|
+
| | | user provided Array of header strings or symbols, to define |
|
223
|
+
| | | what headers should be used, overriding any in-file headers. |
|
224
|
+
| | | You can not combine the :user_provided_headers and :key_mapping options |
|
225
|
+
| :remove_empty_hashes | true | remove / ignore any hashes which don't have any key/value pairs |
|
226
|
+
| :verbose | false | print out line number while processing (to track down problems in input files) |
|
227
|
+
---------------------------------------------------------------------------------------------------------------------------------
|
228
|
+
|
229
|
+
#### Deprecated 1.x Options: to be replaced in 2.0
|
230
|
+
|
231
|
+
There have been a lot of 1-offs and feature creep around these options, and going forward we'll have a simpler, but more flexible way to address these features.
|
232
|
+
|
233
|
+
Instead of these options, there will be a new and more flexible way to process the header fields, as well as the fields in each line of the CSV.
|
234
|
+
And header and data validations will also be supported in 2.x
|
235
|
+
|
236
|
+
| Option | Default | Explanation |
|
194
237
|
---------------------------------------------------------------------------------------------------------------------------------
|
195
238
|
| :key_mapping | nil | a hash which maps headers from the CSV file to keys in the result hash |
|
196
|
-
| :required_headers | nil | An array. Eacn of the given headers must be present
|
239
|
+
| :required_headers | nil | An array. Eacn of the given headers must be present after header manipulation, |
|
197
240
|
| | | or an exception is raised No validation if nil is given. |
|
198
241
|
| :remove_unmapped_keys | false | when using :key_mapping option, should non-mapped keys / columns be removed? |
|
199
242
|
| :downcase_header | true | downcase all column headers |
|
@@ -201,17 +244,7 @@ The options and the block are optional.
|
|
201
244
|
| :strip_whitespace | true | remove whitespace before/after values and headers |
|
202
245
|
| :keep_original_headers | false | keep the original headers from the CSV-file as-is. |
|
203
246
|
| | | Disables other flags manipulating the header fields. |
|
204
|
-
| :user_provided_headers | nil | *careful with that axe!* |
|
205
|
-
| | | user provided Array of header strings or symbols, to define |
|
206
|
-
| | | what headers should be used, overriding any in-file headers. |
|
207
|
-
| | | You can not combine the :user_provided_headers and :key_mapping options |
|
208
247
|
| :strip_chars_from_headers | nil | RegExp to remove extraneous characters from the header line (e.g. if headers are quoted) |
|
209
|
-
| :headers_in_file | true | Whether or not the file contains headers as the first line. |
|
210
|
-
| | | Important if the file does not contain headers, |
|
211
|
-
| | | otherwise you would lose the first line of data. |
|
212
|
-
| :skip_lines | nil | how many lines to skip before the first line or header line is processed |
|
213
|
-
| :force_utf8 | false | force UTF-8 encoding of all lines (including headers) in the CSV file |
|
214
|
-
| :invalid_byte_sequence | '' | how to replace invalid byte sequences with |
|
215
248
|
---------------------------------------------------------------------------------------------------------------------------------
|
216
249
|
| :value_converters | nil | supply a hash of :header => KlassName; the class needs to implement self.convert(val)|
|
217
250
|
| :remove_empty_values | true | remove values which have nil or empty strings as values |
|
@@ -220,11 +253,7 @@ The options and the block are optional.
|
|
220
253
|
| | | /^\$0\.0+$/ to match $0.00 , or /^#VALUE!$/ to match errors in Excel spreadsheets |
|
221
254
|
| :convert_values_to_numeric | true | converts strings containing Integers or Floats to the appropriate class |
|
222
255
|
| | | also accepts either {:except => [:key1,:key2]} or {:only => :key3} |
|
223
|
-
|
224
|
-
| :file_encoding | utf-8 | Set the file encoding eg.: 'windows-1252' or 'iso-8859-1' |
|
225
|
-
| :force_simple_split | false | force simple splitting on :col_sep character for non-standard CSV-files. |
|
226
|
-
| | | e.g. when :quote_char is not properly escaped |
|
227
|
-
| :verbose | false | print out line number while processing (to track down problems in input files) |
|
256
|
+
---------------------------------------------------------------------------------------------------------------------------------
|
228
257
|
|
229
258
|
|
230
259
|
#### NOTES about File Encodings:
|
@@ -295,6 +324,21 @@ Planned in the next releases:
|
|
295
324
|
|
296
325
|
## Changes
|
297
326
|
|
327
|
+
#### 1.2.6 (2018-11-13)
|
328
|
+
* fixing error caused by calling f.close when we do not hand in a file
|
329
|
+
|
330
|
+
#### 1.2.5 (2018-09-16)
|
331
|
+
* fixing issue #136 with comments in CSV files
|
332
|
+
* fixing error class hierarchy
|
333
|
+
|
334
|
+
#### 1.2.4 (2018-08-06)
|
335
|
+
* using Rails blank? if it's available
|
336
|
+
|
337
|
+
#### 1.2.3 (2018-01-27)
|
338
|
+
* fixed regression / test
|
339
|
+
* fuxed quote_char interpolation for headers, but not data (thanks to Colin Petruno)
|
340
|
+
* bugfix (thanks to Joshua Smith for reporting)
|
341
|
+
|
298
342
|
#### 1.2.0 (2018-01-20)
|
299
343
|
* add default validation that a header can only appear once
|
300
344
|
* add option `required_headers`
|
@@ -349,9 +393,6 @@ Planned in the next releases:
|
|
349
393
|
#### 1.0.14 (2013-11-01)
|
350
394
|
* added GPL-2 and MIT license to GEM spec file; if you need another license contact me
|
351
395
|
|
352
|
-
#### 1.0.13 (2013-11-01) ### YANKED!
|
353
|
-
* added GPL-2 license to GEM spec file; if you need another license contact me
|
354
|
-
|
355
396
|
#### 1.0.12 (2013-10-15)
|
356
397
|
* added RSpec tests
|
357
398
|
|
@@ -465,6 +506,8 @@ And a special thanks to those who contributed pull requests:
|
|
465
506
|
* [Ivan Ushakov](https://github.com/IvanUshakov)
|
466
507
|
* [Matthieu Paret](https://github.com/mtparet)
|
467
508
|
* [Rohit Amarnath](https://github.com/ramarnat)
|
509
|
+
* [Joshua Smith](https://github.com/enviable)
|
510
|
+
* [Colin Petruno](https://github.com/colinpetruno)
|
468
511
|
|
469
512
|
|
470
513
|
## Contributing
|
@@ -1,16 +1,16 @@
|
|
1
1
|
module SmarterCSV
|
2
|
-
|
3
|
-
class HeaderSizeMismatch <
|
4
|
-
class IncorrectOption <
|
5
|
-
class DuplicateHeaders <
|
6
|
-
class MissingHeaders <
|
2
|
+
class SmarterCSVException < StandardError; end
|
3
|
+
class HeaderSizeMismatch < SmarterCSVException; end
|
4
|
+
class IncorrectOption < SmarterCSVException; end
|
5
|
+
class DuplicateHeaders < SmarterCSVException; end
|
6
|
+
class MissingHeaders < SmarterCSVException; end
|
7
7
|
|
8
8
|
|
9
9
|
def SmarterCSV.process(input, options={}, &block) # first parameter: filename or input object with readline method
|
10
10
|
default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"', :force_simple_split => false , :verbose => false ,
|
11
11
|
:remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
|
12
12
|
:convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
|
13
|
-
:comment_regexp =>
|
13
|
+
:comment_regexp => /\A#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
|
14
14
|
:remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => '',
|
15
15
|
:auto_row_sep_chars => 500, :required_headers => nil
|
16
16
|
}
|
@@ -22,6 +22,7 @@ module SmarterCSV
|
|
22
22
|
old_row_sep = $/
|
23
23
|
file_line_count = 0
|
24
24
|
csv_line_count = 0
|
25
|
+
has_rails = !! defined?(Rails)
|
25
26
|
begin
|
26
27
|
f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
|
27
28
|
|
@@ -59,7 +60,7 @@ module SmarterCSV
|
|
59
60
|
else
|
60
61
|
file_headerA = header.split(options[:col_sep])
|
61
62
|
end
|
62
|
-
file_headerA.map!{|x| x.gsub(%r
|
63
|
+
file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
|
63
64
|
file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
|
64
65
|
unless options[:keep_original_headers]
|
65
66
|
file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
|
@@ -68,14 +69,14 @@ module SmarterCSV
|
|
68
69
|
|
69
70
|
file_header_size = file_headerA.size
|
70
71
|
else
|
71
|
-
raise SmarterCSV::IncorrectOption , "ERROR
|
72
|
+
raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" if options[:user_provided_headers].nil?
|
72
73
|
end
|
73
74
|
if options[:user_provided_headers] && options[:user_provided_headers].class == Array && ! options[:user_provided_headers].empty?
|
74
75
|
# use user-provided headers
|
75
76
|
headerA = options[:user_provided_headers]
|
76
77
|
if defined?(file_header_size) && ! file_header_size.nil?
|
77
78
|
if headerA.size != file_header_size
|
78
|
-
raise SmarterCSV::HeaderSizeMismatch , "ERROR
|
79
|
+
raise SmarterCSV::HeaderSizeMismatch , "ERROR: :user_provided_headers defines #{headerA.size} headers != CSV-file #{input} has #{file_header_size} headers"
|
79
80
|
else
|
80
81
|
# we could print out the mapping of file_headerA to headerA here
|
81
82
|
end
|
@@ -100,14 +101,14 @@ module SmarterCSV
|
|
100
101
|
headerA.compact.each do |k|
|
101
102
|
duplicate_headers << k if headerA.select{|x| x == k}.size > 1
|
102
103
|
end
|
103
|
-
raise SmarterCSV::DuplicateHeaders , "
|
104
|
+
raise SmarterCSV::DuplicateHeaders , "ERROR: duplicate headers: #{duplicate_headers.join(',')}" unless duplicate_headers.empty?
|
104
105
|
|
105
106
|
if options[:required_headers] && options[:required_headers].is_a?(Array)
|
106
107
|
missing_headers = []
|
107
108
|
options[:required_headers].each do |k|
|
108
109
|
missing_headers << k unless headerA.include?(k)
|
109
110
|
end
|
110
|
-
raise SmarterCSV::MissingHeaders , "
|
111
|
+
raise SmarterCSV::MissingHeaders , "ERROR: missing headers: #{missing_headers.join(',')}" unless missing_headers.empty?
|
111
112
|
end
|
112
113
|
|
113
114
|
# in case we use chunking.. we'll need to set it up..
|
@@ -155,7 +156,7 @@ module SmarterCSV
|
|
155
156
|
else
|
156
157
|
dataA = line.split(options[:col_sep])
|
157
158
|
end
|
158
|
-
|
159
|
+
#### dataA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') } # this is actually not a good idea as a default
|
159
160
|
dataA.map!{|x| x.strip} if options[:strip_whitespace]
|
160
161
|
hash = Hash.zip(headerA,dataA) # from Facets of Ruby library
|
161
162
|
# make sure we delete any key/value pairs from the hash, which the user wanted to delete:
|
@@ -167,7 +168,13 @@ module SmarterCSV
|
|
167
168
|
|
168
169
|
# remove empty values using the same regexp as used by the rails blank? method
|
169
170
|
# which caters for double \n and \r\n characters such as "1\r\n\r\n2" whereas the original check (v =~ /^\s*$/) does not
|
170
|
-
|
171
|
+
if options[:remove_empty_values]
|
172
|
+
if has_rails
|
173
|
+
hash.delete_if{|k,v| v.blank?}
|
174
|
+
else
|
175
|
+
hash.delete_if{|k,v| v.nil? || v !~ /[^[:space:]]/}
|
176
|
+
end
|
177
|
+
end
|
171
178
|
|
172
179
|
hash.delete_if{|k,v| ! v.nil? && v =~ /^(\d+|\d+\.\d+)$/ && v.to_f == 0} if options[:remove_zero_values] # values are typically Strings!
|
173
180
|
hash.delete_if{|k,v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
|
@@ -241,7 +248,7 @@ module SmarterCSV
|
|
241
248
|
end
|
242
249
|
ensure
|
243
250
|
$/ = old_row_sep # make sure this stupid global variable is always reset to it's previous value after we're done!
|
244
|
-
f.close
|
251
|
+
f.close if f.respond_to?(:close)
|
245
252
|
end
|
246
253
|
if block_given?
|
247
254
|
return chunk_count # when we do processing through a block we only care how many chunks we processed
|
data/lib/smarter_csv/version.rb
CHANGED
@@ -0,0 +1,8 @@
|
|
1
|
+
Compte;Date de comptabilisation;Date op�ration;Libell�;R�f�rence;Date valeur;Montant
|
2
|
+
22215449203;02/06/2018;01/06/2018;ECHEANCE PRET DONT CAP 410,33 ASS. 8,00E INT. 21,87 COM. 0,00E;8711552;01/06/2018;-440,20;
|
3
|
+
22215449203;04/06/2018;04/06/2018;EVI Gaultier Laperche remboursement compte courant;1038326;04/06/2018;-144,07;
|
4
|
+
22215449203;04/06/2018;04/06/2018;EVI Guillemain Nicolas remboursement CC pret d'honneur;1038328;04/06/2018;-144,07;
|
5
|
+
22215449203;01/06/2018;01/06/2018;310518 SC****5448 INTERMARCHE 95ERMONT;701JQ1K;01/06/2018;-16,00;
|
6
|
+
22215449203;01/06/2018;01/06/2018;EVI Stripe Payments UK L STRIPE E7U0R1;706AO1Q;01/06/2018;45,89;
|
7
|
+
22215449203;01/06/2018;01/06/2018;EVI Compte N26 Heroku;1100653;01/06/2018;-700,00;
|
8
|
+
22215449203;31/05/2018;31/05/2018;EVI Stripe Payments UK L STRIPE L2J1N7;6YISBWF;31/05/2018;465,89;
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
fixture_path = 'spec/fixtures'
|
4
|
+
|
5
|
+
describe 'be_able_to' do
|
6
|
+
it 'ignore comments in CSV files' do
|
7
|
+
options = {}
|
8
|
+
data = SmarterCSV.process("#{fixture_path}/ignore_comments.csv", options)
|
9
|
+
|
10
|
+
data.size.should eq 5
|
11
|
+
|
12
|
+
# all the keys should be symbols
|
13
|
+
data.each{|item| item.keys.each{|x| x.is_a?(Symbol).should be_truthy}}
|
14
|
+
data.each do |h|
|
15
|
+
h.keys.each do |key|
|
16
|
+
[:"not_a_comment#first_name", :last_name, :dogs, :cats, :birds, :fish].should include( key )
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'ignore comments in CSV files with CRLF' do
|
22
|
+
options = {row_sep: "\r\n"}
|
23
|
+
data = SmarterCSV.process("#{fixture_path}/ignore_comments2.csv", options)
|
24
|
+
|
25
|
+
# all the keys should be symbols
|
26
|
+
data.size.should eq 1
|
27
|
+
data.first[:h1].should eq 'a'
|
28
|
+
data.first[:h2].should eq "b\r\n#c"
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
fixture_path = 'spec/fixtures'
|
4
|
+
|
5
|
+
describe 'loading file with UTF-8 characters in the header' do
|
6
|
+
|
7
|
+
# file which caused issues because of UTF-8 characters in the header
|
8
|
+
it 'loads the file with force_utf8 flag set' do
|
9
|
+
options = {col_sep: ";", force_utf8: true}
|
10
|
+
data = SmarterCSV.process("#{fixture_path}/problematic.csv", options)
|
11
|
+
|
12
|
+
data.length.should eq 7
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'loads the file with strings as keys' do
|
16
|
+
options = {
|
17
|
+
file_encoding: 'iso-8859-1:UTF-8', # important!
|
18
|
+
col_sep: ";", strings_as_keys: true,
|
19
|
+
}
|
20
|
+
data = SmarterCSV.process("#{fixture_path}/problematic.csv", options)
|
21
|
+
|
22
|
+
data.length.should eq 7
|
23
|
+
data.first.keys.sort.should eq [
|
24
|
+
"compte",
|
25
|
+
"date_de_comptabilisation",
|
26
|
+
"date_opération",
|
27
|
+
"date_valeur",
|
28
|
+
"libellé",
|
29
|
+
"montant",
|
30
|
+
"référence"
|
31
|
+
]
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -2,9 +2,9 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
fixture_path = 'spec/fixtures'
|
4
4
|
|
5
|
-
describe '
|
5
|
+
describe 'loading file with quoted fields' do
|
6
6
|
|
7
|
-
it '
|
7
|
+
it 'leaving the quotes in the data' do
|
8
8
|
options = {}
|
9
9
|
data = SmarterCSV.process("#{fixture_path}/quoted.csv", options)
|
10
10
|
data.flatten.size.should == 4
|
@@ -20,4 +20,29 @@ describe 'be_able_to' do
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
+
|
24
|
+
it 'removes quotes around quoted fields, but not inside data' do
|
25
|
+
options = {}
|
26
|
+
data = SmarterCSV.process("#{fixture_path}/quote_char.csv", options)
|
27
|
+
|
28
|
+
data.length.should eq 6
|
29
|
+
data[1][:first_name].should eq "Jam\ne\nson\""
|
30
|
+
data[2][:first_name].should eq "\"Jean"
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
# NOTE: quotes inside headers need to be escaped by doubling them
|
35
|
+
# e.g. 'correct ""EXAMPLE""'
|
36
|
+
# this escaping is illegal: 'incorrect \"EXAMPLE\"' <-- this caused CSV parsing error
|
37
|
+
# in case of CSV parsing errirs, use :user_provided_headers, or key_mapping
|
38
|
+
#
|
39
|
+
it 'removes quotes around headers and extra quotes inside headers' do
|
40
|
+
options = {}
|
41
|
+
data = SmarterCSV.process("#{fixture_path}/quoted2.csv", options)
|
42
|
+
|
43
|
+
data.length.should eq 3
|
44
|
+
data.first.keys[2].should eq :isbn
|
45
|
+
data.first.keys[3].should eq :discounted_price
|
46
|
+
end
|
47
|
+
|
23
48
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
fixture_path = 'spec/fixtures'
|
4
|
+
|
5
|
+
# somebody reported that a column called 'options_trader' would be truncated to 'trader'
|
6
|
+
|
7
|
+
describe 'loads simple file format' do
|
8
|
+
|
9
|
+
it 'with symbols as keys when using defaults' do
|
10
|
+
options = {}
|
11
|
+
data = SmarterCSV.process("#{fixture_path}/trading.csv", options)
|
12
|
+
|
13
|
+
data.flatten.size.should eq 2
|
14
|
+
data.each do |item|
|
15
|
+
# all keys should be symbols when using v1.x backwards compatible mode
|
16
|
+
item.keys.each{|x| x.class.should eq Symbol}
|
17
|
+
item[:account_id].class.should eq Fixnum
|
18
|
+
item[:options_trader].class.should eq String
|
19
|
+
item[:stock_symbol].class.should eq String
|
20
|
+
item[:shares_issued].class.should eq Fixnum
|
21
|
+
item[:purchase_date].class.should eq String
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- 'Tilo Sloboda
|
8
8
|
|
9
|
-
'
|
10
|
-
autorequire:
|
9
|
+
'
|
10
|
+
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2021-02-04 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rspec
|
@@ -32,7 +32,7 @@ description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes,
|
|
32
32
|
email:
|
33
33
|
- 'tilo.sloboda@gmail.com
|
34
34
|
|
35
|
-
'
|
35
|
+
'
|
36
36
|
executables: []
|
37
37
|
extensions: []
|
38
38
|
extra_rdoc_files: []
|
@@ -58,6 +58,8 @@ files:
|
|
58
58
|
- spec/fixtures/chunk_cornercase.csv
|
59
59
|
- spec/fixtures/duplicate_headers.csv
|
60
60
|
- spec/fixtures/empty.csv
|
61
|
+
- spec/fixtures/ignore_comments.csv
|
62
|
+
- spec/fixtures/ignore_comments2.csv
|
61
63
|
- spec/fixtures/line_endings_n.csv
|
62
64
|
- spec/fixtures/line_endings_r.csv
|
63
65
|
- spec/fixtures/line_endings_rn.csv
|
@@ -68,9 +70,13 @@ files:
|
|
68
70
|
- spec/fixtures/no_header.csv
|
69
71
|
- spec/fixtures/numeric.csv
|
70
72
|
- spec/fixtures/pets.csv
|
73
|
+
- spec/fixtures/problematic.csv
|
74
|
+
- spec/fixtures/quote_char.csv
|
71
75
|
- spec/fixtures/quoted.csv
|
76
|
+
- spec/fixtures/quoted2.csv
|
72
77
|
- spec/fixtures/separator.csv
|
73
78
|
- spec/fixtures/skip_lines.csv
|
79
|
+
- spec/fixtures/trading.csv
|
74
80
|
- spec/fixtures/user_import.csv
|
75
81
|
- spec/fixtures/valid_unicode.csv
|
76
82
|
- spec/fixtures/with_dashes.csv
|
@@ -84,6 +90,7 @@ files:
|
|
84
90
|
- spec/smarter_csv/convert_values_to_numeric_spec.rb
|
85
91
|
- spec/smarter_csv/extenstions_spec.rb
|
86
92
|
- spec/smarter_csv/header_transformation_spec.rb
|
93
|
+
- spec/smarter_csv/ignore_comments_spec.rb
|
87
94
|
- spec/smarter_csv/invalid_headers_spec.rb
|
88
95
|
- spec/smarter_csv/keep_headers_spec.rb
|
89
96
|
- spec/smarter_csv/key_mapping_spec.rb
|
@@ -92,6 +99,7 @@ files:
|
|
92
99
|
- spec/smarter_csv/malformed_spec.rb
|
93
100
|
- spec/smarter_csv/no_header_spec.rb
|
94
101
|
- spec/smarter_csv/not_downcase_header_spec.rb
|
102
|
+
- spec/smarter_csv/problematic.rb
|
95
103
|
- spec/smarter_csv/quoted_spec.rb
|
96
104
|
- spec/smarter_csv/remove_empty_values_spec.rb
|
97
105
|
- spec/smarter_csv/remove_keys_from_hashes_spec.rb
|
@@ -101,6 +109,7 @@ files:
|
|
101
109
|
- spec/smarter_csv/skip_lines_spec.rb
|
102
110
|
- spec/smarter_csv/strings_as_keys_spec.rb
|
103
111
|
- spec/smarter_csv/strip_chars_from_headers_spec.rb
|
112
|
+
- spec/smarter_csv/trading_spec.rb
|
104
113
|
- spec/smarter_csv/valid_unicode_spec.rb
|
105
114
|
- spec/smarter_csv/value_converters_spec.rb
|
106
115
|
- spec/spec.opts
|
@@ -111,7 +120,7 @@ licenses:
|
|
111
120
|
- MIT
|
112
121
|
- GPL-2
|
113
122
|
metadata: {}
|
114
|
-
post_install_message:
|
123
|
+
post_install_message:
|
115
124
|
rdoc_options: []
|
116
125
|
require_paths:
|
117
126
|
- lib
|
@@ -127,9 +136,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
127
136
|
version: '0'
|
128
137
|
requirements:
|
129
138
|
- csv
|
130
|
-
|
131
|
-
|
132
|
-
signing_key:
|
139
|
+
rubygems_version: 3.0.6
|
140
|
+
signing_key:
|
133
141
|
specification_version: 4
|
134
142
|
summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots
|
135
143
|
of optional features, e.g. chunked processing for huge CSV files
|
@@ -143,6 +151,8 @@ test_files:
|
|
143
151
|
- spec/fixtures/chunk_cornercase.csv
|
144
152
|
- spec/fixtures/duplicate_headers.csv
|
145
153
|
- spec/fixtures/empty.csv
|
154
|
+
- spec/fixtures/ignore_comments.csv
|
155
|
+
- spec/fixtures/ignore_comments2.csv
|
146
156
|
- spec/fixtures/line_endings_n.csv
|
147
157
|
- spec/fixtures/line_endings_r.csv
|
148
158
|
- spec/fixtures/line_endings_rn.csv
|
@@ -153,9 +163,13 @@ test_files:
|
|
153
163
|
- spec/fixtures/no_header.csv
|
154
164
|
- spec/fixtures/numeric.csv
|
155
165
|
- spec/fixtures/pets.csv
|
166
|
+
- spec/fixtures/problematic.csv
|
167
|
+
- spec/fixtures/quote_char.csv
|
156
168
|
- spec/fixtures/quoted.csv
|
169
|
+
- spec/fixtures/quoted2.csv
|
157
170
|
- spec/fixtures/separator.csv
|
158
171
|
- spec/fixtures/skip_lines.csv
|
172
|
+
- spec/fixtures/trading.csv
|
159
173
|
- spec/fixtures/user_import.csv
|
160
174
|
- spec/fixtures/valid_unicode.csv
|
161
175
|
- spec/fixtures/with_dashes.csv
|
@@ -169,6 +183,7 @@ test_files:
|
|
169
183
|
- spec/smarter_csv/convert_values_to_numeric_spec.rb
|
170
184
|
- spec/smarter_csv/extenstions_spec.rb
|
171
185
|
- spec/smarter_csv/header_transformation_spec.rb
|
186
|
+
- spec/smarter_csv/ignore_comments_spec.rb
|
172
187
|
- spec/smarter_csv/invalid_headers_spec.rb
|
173
188
|
- spec/smarter_csv/keep_headers_spec.rb
|
174
189
|
- spec/smarter_csv/key_mapping_spec.rb
|
@@ -177,6 +192,7 @@ test_files:
|
|
177
192
|
- spec/smarter_csv/malformed_spec.rb
|
178
193
|
- spec/smarter_csv/no_header_spec.rb
|
179
194
|
- spec/smarter_csv/not_downcase_header_spec.rb
|
195
|
+
- spec/smarter_csv/problematic.rb
|
180
196
|
- spec/smarter_csv/quoted_spec.rb
|
181
197
|
- spec/smarter_csv/remove_empty_values_spec.rb
|
182
198
|
- spec/smarter_csv/remove_keys_from_hashes_spec.rb
|
@@ -186,6 +202,7 @@ test_files:
|
|
186
202
|
- spec/smarter_csv/skip_lines_spec.rb
|
187
203
|
- spec/smarter_csv/strings_as_keys_spec.rb
|
188
204
|
- spec/smarter_csv/strip_chars_from_headers_spec.rb
|
205
|
+
- spec/smarter_csv/trading_spec.rb
|
189
206
|
- spec/smarter_csv/valid_unicode_spec.rb
|
190
207
|
- spec/smarter_csv/value_converters_spec.rb
|
191
208
|
- spec/spec.opts
|