csv-importer 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +267 -47
- data/lib/csv_importer.rb +14 -4
- data/lib/csv_importer/report.rb +1 -0
- data/lib/csv_importer/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b856aa990331713775dbd29f7323111622f05a3e
|
4
|
+
data.tar.gz: 5629137af1753b7d58df2f2e3b21ac3e8d3c075a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2882b69b3801caa070731985fc856b0c8d3716102b61e5b1a5f0c5a54411cb1c07b9dbaa963ac07c1f8ed54700e3066131e776459900778eb3fa7f7c8d18da7f
|
7
|
+
data.tar.gz: e02004d36c285f1510ba45b68a83c0d9ed72ffd6ff2f7e8f5905196a4377d501b73f0fc9d166fe13ae8a3be30b40b92e78572e3ac50e389cd89b63cedbca18a6
|
data/README.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
Importing a CSV file is easy to code until real users attempt to import
|
4
4
|
real data.
|
5
5
|
|
6
|
-
CSVImporter aims to handle validations, column mapping,
|
6
|
+
CSVImporter aims to handle validations, column mapping, import
|
7
7
|
and reporting.
|
8
8
|
|
9
9
|
[](https://travis-ci
|
|
12
12
|
Climate](https://codeclimate.com/github/BrewhouseTeam/csv-importer/badges/gpa.svg)](https://codeclimate.com/github/BrewhouseTeam/csv-importer)
|
13
13
|
[](https://codeclimate.com/github/BrewhouseTeam/csv-importer/coverage)
|
15
|
+
[](http://badge.fury.io/rb/csv-importer)
|
15
17
|
|
16
|
-
##
|
18
|
+
## Rationale
|
19
|
+
|
20
|
+
Importing CSV files seems easy until you deal with *real* users uploading
|
21
|
+
their *real* CSV file. You then have to deal with ASCII-8BIT formats,
|
22
|
+
missing columns, empty rows, malformed headers, wild separators, etc.
|
23
|
+
Reporting progress and errors to the end-user is also key for a good
|
24
|
+
experience.
|
25
|
+
|
26
|
+
I went through this many times so I decided to build CSV Importer to
|
27
|
+
save us a lot of trouble.
|
28
|
+
|
29
|
+
|
30
|
+
CSV Importer provides:
|
31
|
+
|
32
|
+
* a DSL to define the mapping between CSV columns and your model
|
33
|
+
* good reporting to the end user
|
34
|
+
* support for wild encodings and CSV formats.
|
17
35
|
|
18
|
-
|
36
|
+
## Usage tldr;
|
19
37
|
|
20
38
|
Define your CSVImporter:
|
21
39
|
|
@@ -23,12 +41,12 @@ Define your CSVImporter:
|
|
23
41
|
class ImportUserCSV
|
24
42
|
include CSVImporter
|
25
43
|
|
26
|
-
model User
|
44
|
+
model User # an active record like model
|
27
45
|
|
28
46
|
column :email, to: ->(email) { email.downcase }, required: true
|
29
47
|
column :first_name, as: [ /first.?name/i, /pr(é|e)nom/i ]
|
30
|
-
column :last_name,
|
31
|
-
column :published, to: ->(published,
|
48
|
+
column :last_name, as: [ /last.?name/i, "nom" ]
|
49
|
+
column :published, to: ->(published, user) { user.published_at = published ? Time.now : nil }
|
32
50
|
|
33
51
|
identifier :email # will find_or_update via :email
|
34
52
|
|
@@ -36,71 +54,273 @@ class ImportUserCSV
|
|
36
54
|
end
|
37
55
|
```
|
38
56
|
|
39
|
-
|
57
|
+
Run the import:
|
40
58
|
|
41
59
|
```ruby
|
42
|
-
|
60
|
+
import = ImportUserCSV.new(file: my_file)
|
43
61
|
|
44
|
-
import
|
45
|
-
import
|
46
|
-
import = ImportUserCSV.new(content: String)
|
62
|
+
import.valid_header? # => false
|
63
|
+
import.report.message # => "The following columns are required: email"
|
47
64
|
|
48
|
-
#
|
65
|
+
# Assuming the header was valid, let's run the import!
|
49
66
|
|
50
|
-
import.
|
51
|
-
|
67
|
+
import.run!
|
68
|
+
import.report.success? # => true
|
69
|
+
import.report.message # => "Import completed. 4 created, 2 updated, 1 failed to update"
|
70
|
+
```
|
52
71
|
|
53
|
-
|
54
|
-
# => returns an instance of `CSVImporter::Header`
|
72
|
+
## Installation
|
55
73
|
|
56
|
-
|
57
|
-
import.header.missing_columns # => ["email", "first_name"]
|
58
|
-
import.header.extra_columns # => ["zip_code"]
|
59
|
-
import.header.columns # => ["last_name", "zip_code"]
|
74
|
+
Add this line to your application's Gemfile:
|
60
75
|
|
61
|
-
|
76
|
+
```ruby
|
77
|
+
gem 'csv-importer'
|
78
|
+
```
|
79
|
+
|
80
|
+
And then execute:
|
81
|
+
|
82
|
+
$ bundle
|
62
83
|
|
63
|
-
|
64
|
-
# => return a (lazy?) Array of Rows
|
65
|
-
row = rows.first
|
84
|
+
Or install it yourself as:
|
66
85
|
|
67
|
-
|
68
|
-
row.raw_array # => [ "bob@example.com", "bob", "", "extra" ]
|
69
|
-
row.csv_attributes # => { email: "bob@example.com", first_name: "bob" }
|
70
|
-
row.model # => User<email: "bob@example.com", f_name: "bob", id: nil>
|
71
|
-
row.valid? # delegate to model.valid?
|
86
|
+
$ gem install csv-importer
|
72
87
|
|
73
|
-
|
88
|
+
## Usage
|
74
89
|
|
75
|
-
|
90
|
+
### Create an Importer
|
76
91
|
|
77
|
-
|
78
|
-
report.valid_rows
|
79
|
-
report.invalid_rows
|
80
|
-
report.created_rows
|
81
|
-
report.updated_rows
|
82
|
-
report.failed_to_create_rows
|
83
|
-
report.failed_to_update_rows
|
92
|
+
Create a class and include `CSVImporter`.
|
84
93
|
|
85
|
-
|
86
|
-
|
94
|
+
```ruby
|
95
|
+
class ImportUserCSV
|
96
|
+
include CSVImporter
|
97
|
+
end
|
87
98
|
```
|
88
99
|
|
89
|
-
|
100
|
+
### Associate an active record model
|
90
101
|
|
91
|
-
|
102
|
+
The `model` is likely to be an active record model.
|
92
103
|
|
93
104
|
```ruby
|
94
|
-
|
105
|
+
class ImportUserCSV
|
106
|
+
include CSVImporter
|
107
|
+
|
108
|
+
model User
|
109
|
+
end
|
95
110
|
```
|
96
111
|
|
97
|
-
|
112
|
+
It can also be a relation which is handy to preset attributes:
|
98
113
|
|
99
|
-
|
114
|
+
```ruby
|
115
|
+
class User
|
116
|
+
scope :pending, -> { where(status: 'pending') }
|
117
|
+
end
|
100
118
|
|
101
|
-
|
119
|
+
class ImportUserCSV
|
120
|
+
include CSVImporter
|
102
121
|
|
103
|
-
|
122
|
+
model User.pending
|
123
|
+
end
|
124
|
+
```
|
125
|
+
|
126
|
+
You can change the configuration at runtime to import associated records.
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
class Team
|
130
|
+
has_many :users
|
131
|
+
end
|
132
|
+
|
133
|
+
team = Team.find(1)
|
134
|
+
|
135
|
+
ImportUserCSV.new(path: "tmp/my_file.csv", model: team.users)
|
136
|
+
```
|
137
|
+
|
138
|
+
### Define columns and their mapping
|
139
|
+
|
140
|
+
This is where the fun begins.
|
141
|
+
|
142
|
+
```ruby
|
143
|
+
class ImportUserCSV
|
144
|
+
include CSVImporter
|
145
|
+
|
146
|
+
model User
|
147
|
+
|
148
|
+
column :email
|
149
|
+
end
|
150
|
+
```
|
151
|
+
|
152
|
+
This will map the column named email to the email attribute. By default,
|
153
|
+
we downcase and strip the columns so it will work with a column spelled " EMail ".
|
154
|
+
|
155
|
+
Now, email could also be spelled "e-mail", or "mail", or even "courriel"
|
156
|
+
(oh, canada). Let's give it a couple of aliases then:
|
157
|
+
|
158
|
+
|
159
|
+
```ruby
|
160
|
+
class ImportUserCSV
|
161
|
+
include CSVImporter
|
162
|
+
|
163
|
+
model User
|
164
|
+
|
165
|
+
column :email, as: [/e.?mail/i, "courriel"]
|
166
|
+
end
|
167
|
+
```
|
168
|
+
|
169
|
+
Nice, emails should be downcased though, so let's do this.
|
170
|
+
|
171
|
+
```ruby
|
172
|
+
class ImportUserCSV
|
173
|
+
include CSVImporter
|
174
|
+
|
175
|
+
model User
|
176
|
+
|
177
|
+
column :email, as: [/e.?mail/i, "courriel"], to: ->(email) { email.downcase if email }
|
178
|
+
end
|
179
|
+
```
|
180
|
+
|
181
|
+
If you need to do more advanced stuff, you've got access to the model:
|
182
|
+
|
183
|
+
```ruby
|
184
|
+
class ImportUserCSV
|
185
|
+
include CSVImporter
|
186
|
+
|
187
|
+
model User
|
188
|
+
|
189
|
+
column :email, as: [/e.?mail/i, "courriel"], to: ->(email, user) { user.email = email.downcase; model.super_user! if email[/@brewhouse.io\z/] }
|
190
|
+
end
|
191
|
+
```
|
192
|
+
|
193
|
+
Now, what if the user does not provide the email column? It's not worth
|
194
|
+
running the import, we should just reject the CSV file right away.
|
195
|
+
That's easy:
|
196
|
+
|
197
|
+
```ruby
|
198
|
+
class ImportUserCSV
|
199
|
+
include CSVImporter
|
200
|
+
|
201
|
+
model User
|
202
|
+
|
203
|
+
column :email, required: true
|
204
|
+
end
|
205
|
+
|
206
|
+
import = ImportUserCSV.new(content: "name\nbob")
|
207
|
+
import.valid_header? # => false
|
208
|
+
import.report.status # => :invalid_header
|
209
|
+
import.report.message # => "The following columns are required: 'email'"
|
210
|
+
```
|
211
|
+
|
212
|
+
|
213
|
+
### Update or Create
|
214
|
+
|
215
|
+
You often want to find-and-update-or-create when importing a CSV file.
|
216
|
+
Just provide an identifier, and we'll do the hard work for you.
|
217
|
+
|
218
|
+
```ruby
|
219
|
+
class ImportUserCSV
|
220
|
+
include CSVImporter
|
221
|
+
|
222
|
+
model User
|
223
|
+
|
224
|
+
column :email, to: ->(email) { email.downcase if email }
|
225
|
+
|
226
|
+
identifier :email
|
227
|
+
end
|
228
|
+
```
|
229
|
+
|
230
|
+
And yes, we'll look for an existing record using the downcased email. :)
|
231
|
+
|
232
|
+
### Skip or Abort on error
|
233
|
+
|
234
|
+
By default, we skip invalid records and report errors back to the user.
|
235
|
+
There are times where you want your import to be an all or nothing. The
|
236
|
+
`on_error` option is here for you.
|
237
|
+
|
238
|
+
```ruby
|
239
|
+
class ImportUserCSV
|
240
|
+
include CSVImporter
|
241
|
+
|
242
|
+
model User
|
243
|
+
|
244
|
+
column :email, to: ->(email) { email.downcase if email }
|
245
|
+
|
246
|
+
on_error :abort
|
247
|
+
end
|
248
|
+
|
249
|
+
import = ImportUserCSV.new(content: "email\nbob@example.com\nINVALID_EMAIL")
|
250
|
+
import.valid_header? # => true
|
251
|
+
import.run!
|
252
|
+
import.success? # => false
|
253
|
+
import.report.status # => :aborted
|
254
|
+
import.report.message # => "Import aborted"
|
255
|
+
```
|
256
|
+
|
257
|
+
You are now done defining your importer, let's run it!
|
258
|
+
|
259
|
+
### Import from a file, path or string
|
260
|
+
|
261
|
+
You can import from a file, path or just the CSV content. Please note
|
262
|
+
that we currently load the entire file in memory. Feel free to
|
263
|
+
contribute if you need to support CSV files with millions of lines! :)
|
264
|
+
|
265
|
+
```ruby
|
266
|
+
import = ImportUserCSV.new(file: my_file)
|
267
|
+
import = ImportUserCSV.new(path: "tmp/new_users.csv")
|
268
|
+
import = ImportUserCSV.new(content: "email,name\nbob@example.com,bob")
|
269
|
+
```
|
270
|
+
|
271
|
+
### Overwrite configuration at runtime
|
272
|
+
|
273
|
+
It is often needed to change the configuration at runtime, that's quite
|
274
|
+
easy:
|
275
|
+
|
276
|
+
```ruby
|
277
|
+
team = Team.find(1)
|
278
|
+
import = ImportUserCSV.new(file: my_file, model: team.users)
|
279
|
+
```
|
280
|
+
|
281
|
+
### Validate the header
|
282
|
+
|
283
|
+
On a web application, as soon as a CSV file is uploaded, you can check
|
284
|
+
if it has the required columns. This is handy to fail early an provide
|
285
|
+
the user with a meaningful error message right away.
|
286
|
+
|
287
|
+
```ruby
|
288
|
+
import = ImportUserCSV.new(file: params[:csv_file])
|
289
|
+
import.valid_header? # => false
|
290
|
+
import.report.message # => "The following columns are required: "email""
|
291
|
+
```
|
292
|
+
|
293
|
+
### Run the import and provide feedback to the user
|
294
|
+
|
295
|
+
```ruby
|
296
|
+
import = ImportUserCSV.new(file: params[:csv_file])
|
297
|
+
import.run!
|
298
|
+
import.report.message # => "Import completed. 4 created, 2 updated, 1 failed to update"
|
299
|
+
```
|
300
|
+
|
301
|
+
You can get your hands dirty and fetch the errored rows and the
|
302
|
+
associated error message:
|
303
|
+
|
304
|
+
```ruby
|
305
|
+
import.report.invalid_rows.map { |row| [row.model.email, row.errors] }
|
306
|
+
# => [ [ "INVALID_EMAIL", { "email" => "is invalid" } ] ]
|
307
|
+
```
|
308
|
+
|
309
|
+
We do our best to map the errors back to the original column name. So
|
310
|
+
with the following definition:
|
311
|
+
|
312
|
+
```ruby
|
313
|
+
column :email, as: /e.?mail/i
|
314
|
+
```
|
315
|
+
|
316
|
+
and csv:
|
317
|
+
|
318
|
+
```csv
|
319
|
+
E-Mail,name
|
320
|
+
INVALID_EMAIL,bob
|
321
|
+
```
|
322
|
+
|
323
|
+
The error returned should be: `{ "E-Mail" => "is invalid" }`
|
104
324
|
|
105
325
|
## Development
|
106
326
|
|
data/lib/csv_importer.rb
CHANGED
@@ -49,6 +49,7 @@ module CSVImporter
|
|
49
49
|
@csv = CSVReader.new(*args)
|
50
50
|
@config = self.class.csv_importer_config.dup
|
51
51
|
@config.attributes = args.last
|
52
|
+
@report = Report.new
|
52
53
|
end
|
53
54
|
|
54
55
|
attr_reader :csv, :report, :config
|
@@ -64,14 +65,23 @@ module CSVImporter
|
|
64
65
|
model_klass: config.model, identifier: config.identifier) }
|
65
66
|
end
|
66
67
|
|
68
|
+
def valid_header?
|
69
|
+
if @report.pending?
|
70
|
+
if header.valid?
|
71
|
+
@report = Report.new(status: :pending, extra_columns: header.extra_columns)
|
72
|
+
else
|
73
|
+
@report = Report.new(status: :invalid_header, missing_columns: header.missing_required_columns, extra_columns: header.extra_columns)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
header.valid?
|
78
|
+
end
|
79
|
+
|
67
80
|
# Run the import. Return a Report.
|
68
81
|
def run!
|
69
|
-
if
|
82
|
+
if valid_header?
|
70
83
|
@report = Runner.call(rows: rows, when_invalid: config.when_invalid)
|
71
|
-
else
|
72
|
-
@report = Report.new(status: :invalid_header, missing_columns: header.missing_required_columns)
|
73
84
|
end
|
74
|
-
|
75
85
|
rescue CSV::MalformedCSVError => e
|
76
86
|
@report = Report.new(status: :invalid_csv_file, parser_error: e.message)
|
77
87
|
end
|
data/lib/csv_importer/report.rb
CHANGED
data/lib/csv_importer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Philippe Creux
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: virtus
|