smarter_csv 1.3.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGELOG.md +181 -0
- data/CONTRIBUTORS.md +46 -0
- data/LICENSE.txt +21 -0
- data/README.md +50 -239
- data/Rakefile +8 -15
- data/lib/smarter_csv/smarter_csv.rb +114 -38
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +8 -0
- data/smarter_csv.gemspec +20 -16
- data/spec/fixtures/additional_separator.csv +6 -0
- data/spec/fixtures/empty_columns_1.csv +2 -0
- data/spec/fixtures/empty_columns_2.csv +2 -0
- data/spec/fixtures/hard_sample.csv +2 -0
- data/spec/fixtures/numeric.csv +1 -1
- data/spec/fixtures/separator_colon.csv +4 -0
- data/spec/fixtures/separator_comma.csv +4 -0
- data/spec/fixtures/separator_pipe.csv +4 -0
- data/spec/fixtures/{separator.csv → separator_semi.csv} +0 -0
- data/spec/fixtures/separator_tab.csv +4 -0
- data/spec/smarter_csv/additional_separator_spec.rb +45 -0
- data/spec/smarter_csv/binary_file2_spec.rb +1 -1
- data/spec/smarter_csv/blank_spec.rb +55 -0
- data/spec/smarter_csv/carriage_return_spec.rb +27 -7
- data/spec/smarter_csv/column_separator_spec.rb +89 -5
- data/spec/smarter_csv/empty_columns_spec.rb +74 -0
- data/spec/smarter_csv/hard_sample_spec.rb +24 -0
- data/spec/smarter_csv/ignore_comments_spec.rb +45 -30
- metadata +50 -13
@@ -2,10 +2,94 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
fixture_path = 'spec/fixtures'
|
4
4
|
|
5
|
-
describe '
|
6
|
-
|
7
|
-
|
8
|
-
data = SmarterCSV.process("#{fixture_path}/
|
9
|
-
data.
|
5
|
+
describe 'can handle col_sep' do
|
6
|
+
|
7
|
+
it 'has default of comma as col_sep' do
|
8
|
+
data = SmarterCSV.process("#{fixture_path}/separator_comma.csv") # no options
|
9
|
+
data.first.keys.size.should == 4
|
10
|
+
data.size.should eq 3
|
11
|
+
end
|
12
|
+
|
13
|
+
describe 'with explicitly given col_sep' do
|
14
|
+
it 'loads file with comma separator' do
|
15
|
+
options = {:col_sep => ','}
|
16
|
+
data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
|
17
|
+
data.first.keys.size.should == 4
|
18
|
+
data.size.should eq 3
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'loads file with tab separator' do
|
22
|
+
options = {:col_sep => "\t"}
|
23
|
+
data = SmarterCSV.process("#{fixture_path}/separator_tab.csv", options)
|
24
|
+
data.first.keys.size.should == 4
|
25
|
+
data.size.should eq 3
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'loads file with semi-colon separator' do
|
29
|
+
options = {:col_sep => ';'}
|
30
|
+
data = SmarterCSV.process("#{fixture_path}/separator_semi.csv", options)
|
31
|
+
data.first.keys.size.should == 4
|
32
|
+
data.size.should eq 3
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'loads file with colon separator' do
|
36
|
+
options = {:col_sep => ':'}
|
37
|
+
data = SmarterCSV.process("#{fixture_path}/separator_colon.csv", options)
|
38
|
+
data.first.keys.size.should == 4
|
39
|
+
data.size.should eq 3
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'loads file with pipe separator' do
|
43
|
+
options = {:col_sep => '|'}
|
44
|
+
data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", options)
|
45
|
+
data.first.keys.size.should == 4
|
46
|
+
data.size.should eq 3
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe 'auto-detection of separator' do
|
51
|
+
options = {col_sep: :auto}
|
52
|
+
|
53
|
+
it 'auto-detects comma separator and loads data' do
|
54
|
+
data = SmarterCSV.process("#{fixture_path}/separator_comma.csv", options)
|
55
|
+
data.first.keys.size.should == 4
|
56
|
+
data.size.should eq 3
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'auto-detects tab separator and loads data' do
|
60
|
+
data = SmarterCSV.process("#{fixture_path}/separator_tab.csv", options)
|
61
|
+
data.first.keys.size.should == 4
|
62
|
+
data.size.should eq 3
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'auto-detects semi-colon separator and loads data' do
|
66
|
+
data = SmarterCSV.process("#{fixture_path}/separator_semi.csv", options)
|
67
|
+
data.first.keys.size.should == 4
|
68
|
+
data.size.should eq 3
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'auto-detects colon separator and loads data' do
|
72
|
+
data = SmarterCSV.process("#{fixture_path}/separator_colon.csv", options)
|
73
|
+
data.first.keys.size.should == 4
|
74
|
+
data.size.should eq 3
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'auto-detects pipe separator and loads data' do
|
78
|
+
data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", options)
|
79
|
+
data.first.keys.size.should == 4
|
80
|
+
data.size.should eq 3
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'does not auto-detect other separators' do
|
84
|
+
expect {
|
85
|
+
SmarterCSV.process("#{fixture_path}/binary.csv", options)
|
86
|
+
}.to raise_exception SmarterCSV::NoColSepDetected
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'also works when auto is given a string' do
|
90
|
+
data = SmarterCSV.process("#{fixture_path}/separator_pipe.csv", col_sep: 'auto')
|
91
|
+
data.first.keys.size.should == 4
|
92
|
+
data.size.should eq 3
|
93
|
+
end
|
10
94
|
end
|
11
95
|
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
fixture_path = 'spec/fixtures'
|
4
|
+
|
5
|
+
describe 'can handle empty columns' do
|
6
|
+
|
7
|
+
describe 'default behavior' do
|
8
|
+
it 'has empty columns at end' do
|
9
|
+
data = SmarterCSV.process("#{fixture_path}/empty_columns_1.csv")
|
10
|
+
data.size.should eq 1
|
11
|
+
item = data.first
|
12
|
+
item[:id].should == 123
|
13
|
+
item[:col1].should == nil
|
14
|
+
item[:col2].should == nil
|
15
|
+
item[:col3].should == nil
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'has empty columns in the middle' do
|
19
|
+
data = SmarterCSV.process("#{fixture_path}/empty_columns_2.csv")
|
20
|
+
data.size.should eq 1
|
21
|
+
item = data.first
|
22
|
+
item[:id].should == 123
|
23
|
+
item[:col1].should == nil
|
24
|
+
item[:col2].should == nil
|
25
|
+
item[:col3].should == 1
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe 'with remove_empty_values: true' do
|
30
|
+
options = {remove_empty_values: true}
|
31
|
+
it 'has empty columns at end' do
|
32
|
+
data = SmarterCSV.process("#{fixture_path}/empty_columns_1.csv", options)
|
33
|
+
data.size.should eq 1
|
34
|
+
item = data.first
|
35
|
+
item[:id].should == 123
|
36
|
+
item[:col1].should == nil
|
37
|
+
item[:col2].should == nil
|
38
|
+
item[:col3].should == nil
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'has empty columns in the middle' do
|
42
|
+
data = SmarterCSV.process("#{fixture_path}/empty_columns_2.csv", options)
|
43
|
+
data.size.should eq 1
|
44
|
+
item = data.first
|
45
|
+
item[:id].should == 123
|
46
|
+
item[:col1].should == nil
|
47
|
+
item[:col2].should == nil
|
48
|
+
item[:col3].should == 1
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe 'with remove_empty_values: false' do
|
53
|
+
options = {remove_empty_values: false}
|
54
|
+
it 'has empty columns at end' do
|
55
|
+
data = SmarterCSV.process("#{fixture_path}/empty_columns_1.csv", options)
|
56
|
+
data.size.should eq 1
|
57
|
+
item = data.first
|
58
|
+
item[:id].should == 123
|
59
|
+
item[:col1].should == ''
|
60
|
+
item[:col2].should == ''
|
61
|
+
item[:col3].should == ''
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'has empty columns in the middle' do
|
65
|
+
data = SmarterCSV.process("#{fixture_path}/empty_columns_2.csv", options)
|
66
|
+
data.size.should eq 1
|
67
|
+
item = data.first
|
68
|
+
item[:id].should == 123
|
69
|
+
item[:col1].should == ''
|
70
|
+
item[:col2].should == ''
|
71
|
+
item[:col3].should == 1
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
fixture_path = 'spec/fixtures'
|
4
|
+
|
5
|
+
describe 'can handle the difficult CSV file' do
|
6
|
+
|
7
|
+
it 'loads the data with default values' do
|
8
|
+
data = SmarterCSV.process("#{fixture_path}/hard_sample.csv")
|
9
|
+
data.size.should eq 1
|
10
|
+
item = data.first
|
11
|
+
item.keys.count.should == 48
|
12
|
+
item[:name].should == '#MR1220817'
|
13
|
+
item[:shipping_method].should == 'Livraison Standard GRATUITE, 2-5 jours avec suivi'
|
14
|
+
item[:lineitem_name].should == 'Cire Épilation Nacrée'
|
15
|
+
item[:phone].should == 3366012111111
|
16
|
+
end
|
17
|
+
|
18
|
+
# the main problem is the data line starting with a # character, but not being a comment
|
19
|
+
it 'fails to load the CSV file with incorrectly set comment_regexp' do
|
20
|
+
options = {comment_regexp: /\A#/ }
|
21
|
+
data = SmarterCSV.process("#{fixture_path}/hard_sample.csv", options)
|
22
|
+
data.size.should eq 0
|
23
|
+
end
|
24
|
+
end
|
@@ -1,30 +1,45 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
fixture_path = 'spec/fixtures'
|
4
|
-
|
5
|
-
describe 'be_able_to' do
|
6
|
-
it 'ignore comments in CSV files' do
|
7
|
-
options = {}
|
8
|
-
data = SmarterCSV.process("#{fixture_path}/ignore_comments.csv", options)
|
9
|
-
|
10
|
-
data.size.should eq
|
11
|
-
|
12
|
-
# all the keys should be symbols
|
13
|
-
data.each{|item| item.keys.each{|x| x.is_a?(Symbol).should be_truthy}}
|
14
|
-
data.each do |h|
|
15
|
-
h.keys.each do |key|
|
16
|
-
[:"not_a_comment#first_name", :last_name, :dogs, :cats, :birds, :fish].should include( key )
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
it 'ignore comments in CSV files
|
22
|
-
options = {
|
23
|
-
data = SmarterCSV.process("#{fixture_path}/
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
data.
|
29
|
-
|
30
|
-
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
fixture_path = 'spec/fixtures'
|
4
|
+
|
5
|
+
describe 'be_able_to' do
|
6
|
+
it 'by default does not ignore comments in CSV files' do
|
7
|
+
options = {}
|
8
|
+
data = SmarterCSV.process("#{fixture_path}/ignore_comments.csv", options)
|
9
|
+
|
10
|
+
data.size.should eq 8
|
11
|
+
|
12
|
+
# all the keys should be symbols
|
13
|
+
data.each{|item| item.keys.each{|x| x.is_a?(Symbol).should be_truthy}}
|
14
|
+
data.each do |h|
|
15
|
+
h.keys.each do |key|
|
16
|
+
[:"not_a_comment#first_name", :last_name, :dogs, :cats, :birds, :fish].should include( key )
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'ignore comments in CSV files using comment_regexp' do
|
22
|
+
options = {comment_regexp: /\A#/}
|
23
|
+
data = SmarterCSV.process("#{fixture_path}/ignore_comments.csv", options)
|
24
|
+
|
25
|
+
data.size.should eq 5
|
26
|
+
|
27
|
+
# all the keys should be symbols
|
28
|
+
data.each{|item| item.keys.each{|x| x.is_a?(Symbol).should be_truthy}}
|
29
|
+
data.each do |h|
|
30
|
+
h.keys.each do |key|
|
31
|
+
[:"not_a_comment#first_name", :last_name, :dogs, :cats, :birds, :fish].should include( key )
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'ignore comments in CSV files with CRLF' do
|
37
|
+
options = {row_sep: "\r\n"}
|
38
|
+
data = SmarterCSV.process("#{fixture_path}/ignore_comments2.csv", options)
|
39
|
+
|
40
|
+
# all the keys should be symbols
|
41
|
+
data.size.should eq 1
|
42
|
+
data.first[:h1].should eq 'a'
|
43
|
+
data.first[:h2].should eq "b\r\n#c"
|
44
|
+
end
|
45
|
+
end
|
metadata
CHANGED
@@ -1,16 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
8
|
-
|
9
|
-
'
|
7
|
+
- Tilo Sloboda
|
10
8
|
autorequire:
|
11
9
|
bindir: bin
|
12
10
|
cert_chain: []
|
13
|
-
date: 2022-
|
11
|
+
date: 2022-04-25 00:00:00.000000000 Z
|
14
12
|
dependencies:
|
15
13
|
- !ruby/object:Gem::Dependency
|
16
14
|
name: rspec
|
@@ -26,13 +24,25 @@ dependencies:
|
|
26
24
|
- - ">="
|
27
25
|
- !ruby/object:Gem::Version
|
28
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: simplecov
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
29
41
|
description: Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with
|
30
42
|
optional features for processing large files in parallel, embedded comments, unusual
|
31
43
|
field- and record-separators, flexible mapping of CSV-headers to Hash-keys
|
32
44
|
email:
|
33
|
-
-
|
34
|
-
|
35
|
-
'
|
45
|
+
- tilo.sloboda@gmail.com
|
36
46
|
executables: []
|
37
47
|
extensions: []
|
38
48
|
extra_rdoc_files: []
|
@@ -41,7 +51,10 @@ files:
|
|
41
51
|
- ".rspec"
|
42
52
|
- ".rvmrc"
|
43
53
|
- ".travis.yml"
|
54
|
+
- CHANGELOG.md
|
55
|
+
- CONTRIBUTORS.md
|
44
56
|
- Gemfile
|
57
|
+
- LICENSE.txt
|
45
58
|
- README.md
|
46
59
|
- Rakefile
|
47
60
|
- lib/extensions/hash.rb
|
@@ -49,6 +62,7 @@ files:
|
|
49
62
|
- lib/smarter_csv/smarter_csv.rb
|
50
63
|
- lib/smarter_csv/version.rb
|
51
64
|
- smarter_csv.gemspec
|
65
|
+
- spec/fixtures/additional_separator.csv
|
52
66
|
- spec/fixtures/basic.csv
|
53
67
|
- spec/fixtures/binary.csv
|
54
68
|
- spec/fixtures/carriage_returns_n.csv
|
@@ -58,6 +72,9 @@ files:
|
|
58
72
|
- spec/fixtures/chunk_cornercase.csv
|
59
73
|
- spec/fixtures/duplicate_headers.csv
|
60
74
|
- spec/fixtures/empty.csv
|
75
|
+
- spec/fixtures/empty_columns_1.csv
|
76
|
+
- spec/fixtures/empty_columns_2.csv
|
77
|
+
- spec/fixtures/hard_sample.csv
|
61
78
|
- spec/fixtures/ignore_comments.csv
|
62
79
|
- spec/fixtures/ignore_comments2.csv
|
63
80
|
- spec/fixtures/key_mapping.csv
|
@@ -75,21 +92,29 @@ files:
|
|
75
92
|
- spec/fixtures/quote_char.csv
|
76
93
|
- spec/fixtures/quoted.csv
|
77
94
|
- spec/fixtures/quoted2.csv
|
78
|
-
- spec/fixtures/
|
95
|
+
- spec/fixtures/separator_colon.csv
|
96
|
+
- spec/fixtures/separator_comma.csv
|
97
|
+
- spec/fixtures/separator_pipe.csv
|
98
|
+
- spec/fixtures/separator_semi.csv
|
99
|
+
- spec/fixtures/separator_tab.csv
|
79
100
|
- spec/fixtures/skip_lines.csv
|
80
101
|
- spec/fixtures/trading.csv
|
81
102
|
- spec/fixtures/user_import.csv
|
82
103
|
- spec/fixtures/valid_unicode.csv
|
83
104
|
- spec/fixtures/with_dashes.csv
|
84
105
|
- spec/fixtures/with_dates.csv
|
106
|
+
- spec/smarter_csv/additional_separator_spec.rb
|
85
107
|
- spec/smarter_csv/binary_file2_spec.rb
|
86
108
|
- spec/smarter_csv/binary_file_spec.rb
|
109
|
+
- spec/smarter_csv/blank_spec.rb
|
87
110
|
- spec/smarter_csv/carriage_return_spec.rb
|
88
111
|
- spec/smarter_csv/chunked_reading_spec.rb
|
89
112
|
- spec/smarter_csv/close_file_spec.rb
|
90
113
|
- spec/smarter_csv/column_separator_spec.rb
|
91
114
|
- spec/smarter_csv/convert_values_to_numeric_spec.rb
|
115
|
+
- spec/smarter_csv/empty_columns_spec.rb
|
92
116
|
- spec/smarter_csv/extenstions_spec.rb
|
117
|
+
- spec/smarter_csv/hard_sample_spec.rb
|
93
118
|
- spec/smarter_csv/header_transformation_spec.rb
|
94
119
|
- spec/smarter_csv/ignore_comments_spec.rb
|
95
120
|
- spec/smarter_csv/invalid_headers_spec.rb
|
@@ -119,8 +144,8 @@ files:
|
|
119
144
|
homepage: https://github.com/tilo/smarter_csv
|
120
145
|
licenses:
|
121
146
|
- MIT
|
122
|
-
|
123
|
-
|
147
|
+
metadata:
|
148
|
+
homepage_uri: https://github.com/tilo/smarter_csv
|
124
149
|
post_install_message:
|
125
150
|
rdoc_options: []
|
126
151
|
require_paths:
|
@@ -137,12 +162,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
137
162
|
version: '0'
|
138
163
|
requirements:
|
139
164
|
- csv
|
140
|
-
rubygems_version: 3.1.
|
165
|
+
rubygems_version: 3.1.6
|
141
166
|
signing_key:
|
142
167
|
specification_version: 4
|
143
168
|
summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots
|
144
169
|
of optional features, e.g. chunked processing for huge CSV files
|
145
170
|
test_files:
|
171
|
+
- spec/fixtures/additional_separator.csv
|
146
172
|
- spec/fixtures/basic.csv
|
147
173
|
- spec/fixtures/binary.csv
|
148
174
|
- spec/fixtures/carriage_returns_n.csv
|
@@ -152,6 +178,9 @@ test_files:
|
|
152
178
|
- spec/fixtures/chunk_cornercase.csv
|
153
179
|
- spec/fixtures/duplicate_headers.csv
|
154
180
|
- spec/fixtures/empty.csv
|
181
|
+
- spec/fixtures/empty_columns_1.csv
|
182
|
+
- spec/fixtures/empty_columns_2.csv
|
183
|
+
- spec/fixtures/hard_sample.csv
|
155
184
|
- spec/fixtures/ignore_comments.csv
|
156
185
|
- spec/fixtures/ignore_comments2.csv
|
157
186
|
- spec/fixtures/key_mapping.csv
|
@@ -169,21 +198,29 @@ test_files:
|
|
169
198
|
- spec/fixtures/quote_char.csv
|
170
199
|
- spec/fixtures/quoted.csv
|
171
200
|
- spec/fixtures/quoted2.csv
|
172
|
-
- spec/fixtures/
|
201
|
+
- spec/fixtures/separator_colon.csv
|
202
|
+
- spec/fixtures/separator_comma.csv
|
203
|
+
- spec/fixtures/separator_pipe.csv
|
204
|
+
- spec/fixtures/separator_semi.csv
|
205
|
+
- spec/fixtures/separator_tab.csv
|
173
206
|
- spec/fixtures/skip_lines.csv
|
174
207
|
- spec/fixtures/trading.csv
|
175
208
|
- spec/fixtures/user_import.csv
|
176
209
|
- spec/fixtures/valid_unicode.csv
|
177
210
|
- spec/fixtures/with_dashes.csv
|
178
211
|
- spec/fixtures/with_dates.csv
|
212
|
+
- spec/smarter_csv/additional_separator_spec.rb
|
179
213
|
- spec/smarter_csv/binary_file2_spec.rb
|
180
214
|
- spec/smarter_csv/binary_file_spec.rb
|
215
|
+
- spec/smarter_csv/blank_spec.rb
|
181
216
|
- spec/smarter_csv/carriage_return_spec.rb
|
182
217
|
- spec/smarter_csv/chunked_reading_spec.rb
|
183
218
|
- spec/smarter_csv/close_file_spec.rb
|
184
219
|
- spec/smarter_csv/column_separator_spec.rb
|
185
220
|
- spec/smarter_csv/convert_values_to_numeric_spec.rb
|
221
|
+
- spec/smarter_csv/empty_columns_spec.rb
|
186
222
|
- spec/smarter_csv/extenstions_spec.rb
|
223
|
+
- spec/smarter_csv/hard_sample_spec.rb
|
187
224
|
- spec/smarter_csv/header_transformation_spec.rb
|
188
225
|
- spec/smarter_csv/ignore_comments_spec.rb
|
189
226
|
- spec/smarter_csv/invalid_headers_spec.rb
|