importu 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +15 -0
- data/.github/workflows/ci.yml +48 -0
- data/.gitignore +4 -0
- data/.rspec +1 -0
- data/.rubocop.yml +311 -0
- data/.simplecov +14 -0
- data/.yardstick.yml +36 -0
- data/Appraisals +22 -0
- data/CHANGELOG.md +51 -0
- data/CONTRIBUTING.md +86 -0
- data/Gemfile +5 -1
- data/LICENSE +21 -0
- data/README.md +435 -52
- data/Rakefile +71 -0
- data/UPGRADING.md +188 -0
- data/gemfiles/rails_7_2.gemfile +11 -0
- data/gemfiles/rails_7_2.gemfile.lock +268 -0
- data/gemfiles/rails_8_0.gemfile +11 -0
- data/gemfiles/rails_8_0.gemfile.lock +271 -0
- data/gemfiles/rails_8_1.gemfile +11 -0
- data/gemfiles/rails_8_1.gemfile.lock +269 -0
- data/gemfiles/standalone.gemfile +8 -0
- data/gemfiles/standalone.gemfile.lock +197 -0
- data/importu.gemspec +41 -22
- data/lib/importu/backends/active_record.rb +171 -0
- data/lib/importu/backends/middleware/duplicate_manager_proxy.rb +41 -0
- data/lib/importu/backends/middleware/enforce_allowed_actions.rb +52 -0
- data/lib/importu/backends/middleware.rb +11 -0
- data/lib/importu/backends.rb +103 -0
- data/lib/importu/config_dsl.rb +381 -0
- data/lib/importu/converter_context.rb +94 -0
- data/lib/importu/converters.rb +119 -64
- data/lib/importu/definition.rb +23 -0
- data/lib/importu/duplicate_manager.rb +88 -0
- data/lib/importu/exceptions.rb +135 -4
- data/lib/importu/importer.rb +183 -96
- data/lib/importu/record.rb +138 -102
- data/lib/importu/sources/csv.rb +122 -0
- data/lib/importu/sources/json.rb +106 -0
- data/lib/importu/sources/ruby.rb +46 -0
- data/lib/importu/sources/xml.rb +133 -0
- data/lib/importu/sources.rb +13 -0
- data/lib/importu/summary.rb +277 -0
- data/lib/importu/version.rb +3 -1
- data/lib/importu.rb +45 -9
- data/spec/fixtures/books-duplicates/README.md +7 -0
- data/spec/fixtures/books-duplicates/infile.csv +7 -0
- data/spec/fixtures/books-duplicates/model.json +23 -0
- data/spec/fixtures/books-duplicates/summary.json +10 -0
- data/spec/fixtures/books-valid/README.md +13 -0
- data/spec/fixtures/books-valid/infile.csv +4 -0
- data/spec/fixtures/books-valid/infile.json +23 -0
- data/spec/fixtures/books-valid/infile.xml +21 -0
- data/spec/fixtures/books-valid/model.json +23 -0
- data/spec/fixtures/books-valid/record.json +26 -0
- data/spec/fixtures/books-valid/summary.json +8 -0
- data/spec/fixtures/source-empty-file/infile.csv +0 -0
- data/spec/fixtures/source-empty-file/infile.json +0 -0
- data/spec/fixtures/source-empty-file/infile.xml +0 -0
- data/spec/fixtures/source-empty-records/infile.csv +3 -0
- data/spec/fixtures/source-empty-records/infile.json +1 -0
- data/spec/fixtures/source-empty-records/infile.xml +6 -0
- data/spec/fixtures/source-malformed/infile.csv +1 -0
- data/spec/fixtures/source-malformed/infile.json +1 -0
- data/spec/fixtures/source-malformed/infile.xml +3 -0
- data/spec/fixtures/source-no-records/infile.csv +1 -0
- data/spec/fixtures/source-no-records/infile.json +1 -0
- data/spec/fixtures/source-no-records/infile.xml +3 -0
- data/spec/lib/importu/backends/active_record_spec.rb +150 -0
- data/spec/lib/importu/backends/middleware/duplicate_manager_proxy_spec.rb +70 -0
- data/spec/lib/importu/backends/middleware/enforce_allowed_actions_spec.rb +70 -0
- data/spec/lib/importu/backends_spec.rb +170 -0
- data/spec/lib/importu/converters_spec.rb +184 -141
- data/spec/lib/importu/definition_spec.rb +248 -0
- data/spec/lib/importu/duplicate_manager_spec.rb +92 -0
- data/spec/lib/importu/exceptions_spec.rb +69 -16
- data/spec/lib/importu/import_context_spec.rb +199 -0
- data/spec/lib/importu/importer_spec.rb +95 -0
- data/spec/lib/importu/integration_spec.rb +221 -0
- data/spec/lib/importu/record_spec.rb +130 -80
- data/spec/lib/importu/sources/csv_spec.rb +29 -0
- data/spec/lib/importu/sources/importer_source_examples.rb +175 -0
- data/spec/lib/importu/sources/json_spec.rb +29 -0
- data/spec/lib/importu/sources/ruby_spec.rb +102 -0
- data/spec/lib/importu/sources/xml_spec.rb +70 -0
- data/spec/lib/importu/summary_spec.rb +186 -0
- data/spec/spec_helper.rb +91 -7
- data/spec/support/active_record.rb +20 -0
- data/spec/support/book_importer.rb +31 -0
- data/spec/support/dummy_backend.rb +50 -0
- data/spec/support/fixtures_helper.rb +43 -0
- data/spec/support/matchers/delegate_matcher.rb +14 -8
- metadata +173 -100
- data/lib/importu/core_ext/array/deep_freeze.rb +0 -7
- data/lib/importu/core_ext/deep_freeze.rb +0 -3
- data/lib/importu/core_ext/hash/deep_freeze.rb +0 -7
- data/lib/importu/core_ext/object/deep_freeze.rb +0 -6
- data/lib/importu/core_ext.rb +0 -3
- data/lib/importu/dsl.rb +0 -127
- data/lib/importu/importer/csv.rb +0 -52
- data/lib/importu/importer/json.rb +0 -45
- data/lib/importu/importer/xml.rb +0 -55
- data/spec/factories/importer.rb +0 -12
- data/spec/factories/importer_record.rb +0 -13
- data/spec/factories/json_importer.rb +0 -14
- data/spec/factories/xml_importer.rb +0 -12
- data/spec/lib/importu/dsl_spec.rb +0 -26
- data/spec/lib/importu/importer/json_spec.rb +0 -37
- data/spec/lib/importu/importer/xml_spec.rb +0 -14
data/README.md
CHANGED
|
@@ -1,19 +1,85 @@
|
|
|
1
|
-
|
|
2
|
-
Importu is a framework and DSL for simplifying the process of importing
|
|
3
|
-
structured data into your application. It is also a tool for separating
|
|
4
|
-
import-related business logic from the rest of your code.
|
|
1
|
+
# Importu
|
|
5
2
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
`lib/importu/importer` directory for implementations of supported importers.
|
|
3
|
+
[][gem]
|
|
4
|
+
[][ci]
|
|
9
5
|
|
|
10
|
-
|
|
11
|
-
|
|
6
|
+
[gem]: https://rubygems.org/gems/importu
|
|
7
|
+
[ci]: https://github.com/dhedlund/importu/actions/workflows/ci.yml
|
|
8
|
+
|
|
9
|
+
Importu is a declarative data import library for Ruby. Define importers that
|
|
10
|
+
read like specifications - with fields, converters, and validation rules -
|
|
11
|
+
then parse CSV, JSON, or XML with consistent error handling.
|
|
12
|
+
|
|
13
|
+
## Table of Contents
|
|
14
|
+
|
|
15
|
+
- [Goals](#goals)
|
|
16
|
+
- [Installation](#installation)
|
|
17
|
+
- [Quick Start](#quick-start)
|
|
18
|
+
- [Example](#example)
|
|
19
|
+
- [Sources](#sources) (CSV, JSON, XML, Ruby)
|
|
20
|
+
- [Converters](#converters)
|
|
21
|
+
- [Backends](#backends) (ActiveRecord)
|
|
22
|
+
- [Error Handling](#error-handling)
|
|
23
|
+
- [Contributing](#contributing)
|
|
24
|
+
|
|
25
|
+
For working examples, see the [importu-examples](https://github.com/dhedlund/importu-examples) repository.
|
|
26
|
+
|
|
27
|
+
## Goals
|
|
28
|
+
|
|
29
|
+
**Primary goal:** Importers that read like specifications.
|
|
30
|
+
|
|
31
|
+
- Define fields, converters, and rules declaratively
|
|
32
|
+
- Separate what the data should look like from how you process it
|
|
33
|
+
- Use the importer as the contract shared with data providers
|
|
34
|
+
|
|
35
|
+
**Secondary goals:**
|
|
36
|
+
|
|
37
|
+
- Reusable parsers for common formats (CSV, JSON, XML)
|
|
38
|
+
- Modular design - extend or replace components as needed
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
Add to your Gemfile:
|
|
43
|
+
|
|
44
|
+
```ruby
|
|
45
|
+
gem "importu"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Then run `bundle install`.
|
|
49
|
+
|
|
50
|
+
Or install directly:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
gem install importu
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Requirements
|
|
57
|
+
|
|
58
|
+
- Ruby >= 3.1
|
|
59
|
+
- Rails >= 7.2 (optional, for ActiveRecord backend)
|
|
60
|
+
- nokogiri (optional, for XML source)
|
|
61
|
+
|
|
62
|
+
## Quick Start
|
|
63
|
+
|
|
64
|
+
```ruby
|
|
65
|
+
require "importu"
|
|
66
|
+
|
|
67
|
+
# Define an importer with the fields you expect
|
|
68
|
+
class BookImporter < Importu::Importer
|
|
69
|
+
fields :title, :author, :isbn10
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Create a source and importer
|
|
73
|
+
source = Importu::Sources::CSV.new("books.csv")
|
|
74
|
+
importer = BookImporter.new(source)
|
|
75
|
+
|
|
76
|
+
# Iterate over records
|
|
77
|
+
importer.records.each do |record|
|
|
78
|
+
puts "#{record[:title]} by #{record[:author]}"
|
|
79
|
+
end
|
|
80
|
+
```
|
|
12
81
|
|
|
13
82
|
## Example
|
|
14
|
-
**Please read the tutorial in the
|
|
15
|
-
[import-examples](https://github.com/dhedlund/importu-examples) repository for
|
|
16
|
-
a more complete overview of available features.**
|
|
17
83
|
|
|
18
84
|
Assuming you have the following data in the file `data.csv`:
|
|
19
85
|
```
|
|
@@ -25,7 +91,7 @@ Assuming you have the following data in the file `data.csv`:
|
|
|
25
91
|
|
|
26
92
|
You can create a minimal importer to read the CSV data:
|
|
27
93
|
```ruby
|
|
28
|
-
class BookImporter < Importu::Importer
|
|
94
|
+
class BookImporter < Importu::Importer
|
|
29
95
|
# fields we expect to find in the CSV file, field order is not important
|
|
30
96
|
fields :title, :author, :isbn10, :pages, :release_date
|
|
31
97
|
end
|
|
@@ -33,10 +99,10 @@ end
|
|
|
33
99
|
|
|
34
100
|
And then load that data in your application:
|
|
35
101
|
```ruby
|
|
36
|
-
require
|
|
102
|
+
require "importu"
|
|
37
103
|
|
|
38
|
-
filename = File.expand_path(
|
|
39
|
-
importer = BookImporter.new(filename)
|
|
104
|
+
filename = File.expand_path("data.csv", __dir__)
|
|
105
|
+
importer = BookImporter.new(Importu::Sources::CSV.new(filename))
|
|
40
106
|
|
|
41
107
|
# importer.records returns an Enumerable
|
|
42
108
|
importer.records.count # => 3
|
|
@@ -50,16 +116,16 @@ importer.records.map(&:to_hash)
|
|
|
50
116
|
|
|
51
117
|
A more complete example of the book importer above might look like the following:
|
|
52
118
|
```ruby
|
|
53
|
-
require
|
|
119
|
+
require "importu"
|
|
54
120
|
|
|
55
|
-
class BookImporter < Importu::Importer
|
|
56
|
-
# if you want to define multiple fields with similar rules, use
|
|
57
|
-
# NOTE:
|
|
121
|
+
class BookImporter < Importu::Importer
|
|
122
|
+
# if you want to define multiple fields with similar rules, use "fields"
|
|
123
|
+
# NOTE: `required: true` is redundant in this example; any defined
|
|
58
124
|
# fields must have a corresponding column in the source data by default
|
|
59
|
-
fields :title, :isbn10, :authors, :
|
|
125
|
+
fields :title, :isbn10, :authors, required: true
|
|
60
126
|
|
|
61
127
|
# to mark a field as optional in the source data
|
|
62
|
-
field :pages, :
|
|
128
|
+
field :pages, required: false
|
|
63
129
|
|
|
64
130
|
# you can reference the same field multiple times and apply rules
|
|
65
131
|
# incrementally; this provides a lot of flexibility in describing your
|
|
@@ -67,16 +133,16 @@ class BookImporter < Importu::Importer::Csv
|
|
|
67
133
|
# explicitly stating that "these are required"; the importer becomes the
|
|
68
134
|
# reference document:
|
|
69
135
|
#
|
|
70
|
-
# fields :title, :isbn10, :authors, :release_date, :
|
|
71
|
-
# fields :pages, :
|
|
136
|
+
# fields :title, :isbn10, :authors, :release_date, required: true
|
|
137
|
+
# fields :pages, required: false
|
|
72
138
|
#
|
|
73
139
|
# ...or keep all the rules for that field with that field, whatever makes
|
|
74
140
|
# sense for your particular use case.
|
|
75
141
|
|
|
76
142
|
# if your field is not named the same as the source data, you can use
|
|
77
|
-
# :
|
|
143
|
+
# `label: "..."` to reference the correct field, where the label is what
|
|
78
144
|
# the field is labelled in the source data
|
|
79
|
-
field :authors, :
|
|
145
|
+
field :authors, label: "author"
|
|
80
146
|
|
|
81
147
|
# you can convert fields using one of the built-in converters
|
|
82
148
|
field :pages, &convert_to(:integer)
|
|
@@ -85,12 +151,12 @@ class BookImporter < Importu::Importer::Csv
|
|
|
85
151
|
# some converters allow you to pass additional arguments; in the case of
|
|
86
152
|
# the date converter, you can pass an explicit format and it will raise an
|
|
87
153
|
# error if a date is encountered that doesn't match
|
|
88
|
-
field :release_date, &convert_to(:date, :
|
|
154
|
+
field :release_date, &convert_to(:date, format: "%b %d, %Y")
|
|
89
155
|
|
|
90
156
|
# passing a block to a field definition allows you to add your own logic
|
|
91
157
|
# for converting data or checking for unexpected values
|
|
92
158
|
field :authors do
|
|
93
|
-
value =
|
|
159
|
+
value = trimmed(:authors) # apply :trimmed converter which strips whitespace
|
|
94
160
|
authors = value ? value.split(/(?:, )|(?: and )|(?: & )/i) : []
|
|
95
161
|
|
|
96
162
|
if authors.none?
|
|
@@ -103,61 +169,378 @@ class BookImporter < Importu::Importer::Csv
|
|
|
103
169
|
end
|
|
104
170
|
|
|
105
171
|
# abstract fields that are not part of the original data set can be created
|
|
106
|
-
field :by_matz, :
|
|
172
|
+
field :by_matz, abstract: true do
|
|
107
173
|
# field conversion rules can reference other fields; the field value is
|
|
108
174
|
# what would be returned after referenced field's rules have been applied
|
|
109
|
-
field_value(:authors).include?(
|
|
175
|
+
field_value(:authors).include?("Yukihiro Matsumoto")
|
|
110
176
|
end
|
|
111
177
|
end
|
|
112
178
|
```
|
|
113
179
|
|
|
114
180
|
A more condensed version of the above, with all the rules grouped into individual field definitions:
|
|
115
181
|
```ruby
|
|
116
|
-
class BookImporter < Importu::Importer
|
|
182
|
+
class BookImporter < Importu::Importer
|
|
117
183
|
fields :title, :isbn10
|
|
118
184
|
|
|
119
|
-
field :authors, :
|
|
120
|
-
authors =
|
|
185
|
+
field :authors, label: "author" do
|
|
186
|
+
authors = trimmed(:authors).to_s.split(/(?:, )|(?: and )|(?: & )/i)
|
|
121
187
|
raise ArgumentError, "at least one author is required" if authors.none?
|
|
122
|
-
|
|
188
|
+
|
|
123
189
|
authors
|
|
124
190
|
end
|
|
125
191
|
|
|
126
|
-
field :pages, :
|
|
127
|
-
field :release_date, &convert_to(:date, :
|
|
192
|
+
field :pages, required: false, &convert_to(:integer)
|
|
193
|
+
field :release_date, &convert_to(:date, format: "%b %d, %Y")
|
|
128
194
|
|
|
129
|
-
field :by_matz, :
|
|
130
|
-
field_value(:authors).include?(
|
|
195
|
+
field :by_matz, abstract: true do
|
|
196
|
+
field_value(:authors).include?("Yukihiro Matsumoto")
|
|
131
197
|
end
|
|
132
198
|
end
|
|
133
199
|
```
|
|
134
200
|
|
|
201
|
+
## Sources
|
|
202
|
+
|
|
203
|
+
Importu supports multiple source formats. Each source parses input data and
|
|
204
|
+
provides an enumerator of row hashes.
|
|
205
|
+
|
|
206
|
+
### CSV
|
|
207
|
+
|
|
208
|
+
```ruby
|
|
209
|
+
source = Importu::Sources::CSV.new("data.csv")
|
|
210
|
+
|
|
211
|
+
# With custom options
|
|
212
|
+
source = Importu::Sources::CSV.new("data.csv", csv_options: {
|
|
213
|
+
col_sep: ";",
|
|
214
|
+
encoding: "ISO-8859-1"
|
|
215
|
+
})
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
Options inside `csv_options` are passed directly to Ruby's
|
|
219
|
+
[CSV library](https://ruby-doc.org/stdlib/libdoc/csv/rdoc/CSV.html).
|
|
220
|
+
Common options include `col_sep`, `quote_char`, and `encoding`.
|
|
221
|
+
|
|
222
|
+
### JSON
|
|
223
|
+
|
|
224
|
+
```ruby
|
|
225
|
+
source = Importu::Sources::JSON.new("data.json")
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
The JSON file must have an array as the root element. The entire file is loaded
|
|
229
|
+
into memory, so this source is not suitable for very large files.
|
|
230
|
+
|
|
231
|
+
### XML
|
|
232
|
+
|
|
233
|
+
```ruby
|
|
234
|
+
# records_xpath is required
|
|
235
|
+
source = Importu::Sources::XML.new("data.xml", records_xpath: "//book")
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
The `records_xpath` option specifies which elements to treat as records. Each
|
|
239
|
+
matching element becomes a row, with child elements and attributes becoming
|
|
240
|
+
fields.
|
|
241
|
+
|
|
242
|
+
### Ruby
|
|
243
|
+
|
|
244
|
+
```ruby
|
|
245
|
+
data = [
|
|
246
|
+
{ "name" => "Alice", "email" => "alice@example.com" },
|
|
247
|
+
{ "name" => "Bob", "email" => "bob@example.com" }
|
|
248
|
+
]
|
|
249
|
+
source = Importu::Sources::Ruby.new(data)
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
Accepts an array of hashes or any enumerable that yields objects responding to
|
|
253
|
+
`to_hash`. Useful for importing data already in memory or from other Ruby
|
|
254
|
+
sources.
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
## Converters
|
|
258
|
+
|
|
259
|
+
### Built-in Converters
|
|
260
|
+
|
|
261
|
+
Importu comes with several built-in converters for the most common ruby
|
|
262
|
+
data types and data cleanup operations. Assigning a converter to your fields
|
|
263
|
+
ensures that the value can be translated to the desired type or a validation
|
|
264
|
+
error will be generated and the record flagged as invalid.
|
|
265
|
+
|
|
266
|
+
To use a converter, add `&convert_to(type)` to the end of a field definition,
|
|
267
|
+
where `type` is one of the types below.
|
|
268
|
+
|
|
269
|
+
| Type | Description |
|
|
270
|
+
|-----------|-------------|
|
|
271
|
+
| :boolean | Coerces value to a boolean. Must be true, yes, 1, false, no, 0. Case-insensitive. |
|
|
272
|
+
| :date | Coerces value to a date. Tries to guess format unless `format: ...` is provided. |
|
|
273
|
+
| :datetime | Coerces value to a datetime. Tries to guess format unless `format: ...` is provided. |
|
|
274
|
+
| :decimal | Coerces value to a BigDecimal. |
|
|
275
|
+
| :float | Coerces value to a Float. |
|
|
276
|
+
| :integer | Coerces value to an integer. Must look like an integer ("1.0" is invalid). |
|
|
277
|
+
| :raw | Do nothing. Value will be passed through as-is from the source value. |
|
|
278
|
+
| :string | Coerces value to a string, trimming leading a trailing whitespaces. |
|
|
279
|
+
| :trimmed | Trims leading and trailing whitespace if value is a string, otherwise leave as-is. Empty strings are converted to nil. |
|
|
280
|
+
|
|
281
|
+
Some converters, such as :date and :datetime, accept optional arguments. To
|
|
282
|
+
pass arguments to a converter, add them after the converter's type, For
|
|
283
|
+
example, `&convert_to(:date, format: "%Y-%m-%d")` will force date parsing to
|
|
284
|
+
use the "YYYY-MM-DD" format.
|
|
285
|
+
|
|
286
|
+
| Type | Argument | Default | Description |
|
|
287
|
+
|-----------|----------|--------------|-------------|
|
|
288
|
+
| :date | :format | _autodetect_ | Parse value using a [strftime format](https://ruby-doc.org/stdlib/libdoc/date/rdoc/Date.html#method-i-strftime).
|
|
289
|
+
| :datetime | :format | _autodetect_ | Parse value using a [strftime format](https://ruby-doc.org/stdlib/libdoc/date/rdoc/DateTime.html#method-i-strftime).
|
|
290
|
+
|
|
291
|
+
Built-in converters can be overridden by creating a custom converter using
|
|
292
|
+
the same name as the built-in converter. Overriding a converter in one import
|
|
293
|
+
definition will not affect any converters outside of that definition.
|
|
294
|
+
|
|
295
|
+
### Custom Converters
|
|
296
|
+
|
|
297
|
+
All built-in converters are defined using the same method as custom
|
|
298
|
+
converters. See `lib/importu/converters.rb` for their implementation, which
|
|
299
|
+
can be used as a guide for writing your own.
|
|
300
|
+
|
|
301
|
+
```ruby
|
|
302
|
+
class BookImporter < Importu::Importer
|
|
303
|
+
converter :varchar do |field_name, length: 255|
|
|
304
|
+
value = trimmed(field_name)
|
|
305
|
+
value.nil? ? nil : String(value).slice(0, length)
|
|
306
|
+
|
|
307
|
+
# Instead of taking the first 255 characters, you may prefer to raise
|
|
308
|
+
# an error that enforces values from source data cannot exceed length.
|
|
309
|
+
# raise ArgumentError, "cannot exceed "#{length}" if value.length > length
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
fields :title, :author, &convert_to(:varchar)
|
|
313
|
+
fields :title, &convert_to(:varchar, length: 50)
|
|
314
|
+
end
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
To raise an error from within a converter, raise an `ArgumentError` with a
|
|
318
|
+
message. That field will then be marked as invalid on the record and the
|
|
319
|
+
message will be used as the validation error message.
|
|
320
|
+
|
|
321
|
+
If you would like to use the same custom converters across multiple import
|
|
322
|
+
definitions, they can be defined in a mixin and then included at the top of
|
|
323
|
+
each definition or in a class that the imports inherit from. Importu takes
|
|
324
|
+
this approach with its default converters, so you can look at the built-in
|
|
325
|
+
converters as an example.
|
|
326
|
+
|
|
327
|
+
### Default Converter
|
|
328
|
+
|
|
329
|
+
By default, importu uses the `:trimmed` converter unless a converter has been
|
|
330
|
+
explicitly defined for the field. This should work for the vast majority of use
|
|
331
|
+
cases, but there are some cases where the default isn't exactly what you
|
|
332
|
+
wanted.
|
|
333
|
+
|
|
334
|
+
1. If you have a couple fields that cannot have their values trimmed, consider
|
|
335
|
+
changing those fields to use the :raw converter.
|
|
336
|
+
|
|
337
|
+
2. If your opinion of trimmed is different than importu's, you can override the
|
|
338
|
+
built-in :trimmed converter to match your preferred behavior.
|
|
339
|
+
|
|
340
|
+
3. If you never want any fields to have the :trimmed converter applied, you can
|
|
341
|
+
change the default converter to use the :raw converter:
|
|
342
|
+
```ruby
|
|
343
|
+
class BookImporter < Importu::Importer
|
|
344
|
+
converter :default, &convert_to(:raw)
|
|
345
|
+
end
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
4. If you want to raise an error if a converter is not explicitly set for each
|
|
349
|
+
field:
|
|
350
|
+
```ruby
|
|
351
|
+
class BookImporter < Importu::Importer
|
|
352
|
+
converter :default do |name|
|
|
353
|
+
raise ArgumentError, "converter not defined for field #{name}"
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
## Backends
|
|
360
|
+
|
|
135
361
|
### Rails / ActiveRecord
|
|
362
|
+
|
|
136
363
|
If you define a model in the importer definition and the importer fields are
|
|
137
364
|
named the same as the attributes in your model, Importu can iterate through and
|
|
138
365
|
create or update records for you:
|
|
139
366
|
|
|
140
367
|
```ruby
|
|
141
|
-
class BookImporter < Importu::Importer
|
|
142
|
-
model
|
|
368
|
+
class BookImporter < Importu::Importer
|
|
369
|
+
model "Book"
|
|
143
370
|
|
|
144
371
|
# ...
|
|
145
372
|
end
|
|
146
373
|
|
|
147
|
-
filename = File.expand_path(
|
|
148
|
-
importer = BookImporter.new(filename)
|
|
374
|
+
filename = File.expand_path("data.csv", __dir__)
|
|
375
|
+
importer = BookImporter.new(Importu::Sources::CSV.new(filename))
|
|
376
|
+
|
|
377
|
+
summary = importer.import!
|
|
378
|
+
|
|
379
|
+
summary.total # => 3
|
|
380
|
+
summary.invalid # => 0
|
|
381
|
+
summary.created # => 3
|
|
382
|
+
summary.updated # => 0
|
|
383
|
+
summary.unchanged # => 0
|
|
384
|
+
|
|
385
|
+
summary = importer.import!
|
|
386
|
+
|
|
387
|
+
summary.total # => 3
|
|
388
|
+
summary.created # => 0
|
|
389
|
+
summary.unchanged # => 3
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
### Allowed Actions
|
|
393
|
+
|
|
394
|
+
By default, importers only allow creating new records. If you want to update
|
|
395
|
+
existing records, you must explicitly allow it:
|
|
396
|
+
|
|
397
|
+
```ruby
|
|
398
|
+
class BookImporter < Importu::Importer
|
|
399
|
+
model "Book"
|
|
400
|
+
allow_actions :create, :update # Allow both creating and updating
|
|
401
|
+
|
|
402
|
+
find_by :isbn10 # Find existing records by ISBN
|
|
403
|
+
# ...
|
|
404
|
+
end
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
If an action is not allowed, the record will be marked as invalid with an error
|
|
408
|
+
message explaining which action was rejected.
|
|
409
|
+
|
|
410
|
+
| Configuration | Behavior |
|
|
411
|
+
|---------------|----------|
|
|
412
|
+
| `allow_actions :create` | Only create new records (default) |
|
|
413
|
+
| `allow_actions :update` | Only update existing records |
|
|
414
|
+
| `allow_actions :create, :update` | Create new records and update existing ones |
|
|
415
|
+
|
|
416
|
+
### Finding Existing Records
|
|
417
|
+
|
|
418
|
+
Use `find_by` to specify which fields identify existing records:
|
|
419
|
+
|
|
420
|
+
```ruby
|
|
421
|
+
class BookImporter < Importu::Importer
|
|
422
|
+
model "Book"
|
|
423
|
+
allow_actions :create, :update
|
|
424
|
+
|
|
425
|
+
find_by :isbn10 # Single field
|
|
426
|
+
# or
|
|
427
|
+
find_by :title, :author # Multiple fields (all must match)
|
|
428
|
+
# or
|
|
429
|
+
find_by do |record| # Custom lookup logic
|
|
430
|
+
find_by(title: record[:title].downcase)
|
|
431
|
+
end
|
|
432
|
+
end
|
|
433
|
+
```
|
|
434
|
+
|
|
435
|
+
### Before Save Hook
|
|
436
|
+
|
|
437
|
+
Use `before_save` to modify records just before they're saved:
|
|
438
|
+
|
|
439
|
+
```ruby
|
|
440
|
+
class BookImporter < Importu::Importer
|
|
441
|
+
model "Book"
|
|
442
|
+
|
|
443
|
+
before_save do
|
|
444
|
+
# `object` is the model instance, `record` is the import data, `action` is :create or :update
|
|
445
|
+
object.title = object.title.titleize
|
|
446
|
+
object.imported_at = Time.current
|
|
447
|
+
object.created_by = "importer" if action == :create
|
|
448
|
+
end
|
|
449
|
+
end
|
|
450
|
+
```
|
|
451
|
+
|
|
452
|
+
### Controlling Field Assignment
|
|
453
|
+
|
|
454
|
+
By default, all fields are assigned on both create and update. You can
|
|
455
|
+
control this per-field:
|
|
456
|
+
|
|
457
|
+
```ruby
|
|
458
|
+
class BookImporter < Importu::Importer
|
|
459
|
+
model "Book"
|
|
460
|
+
allow_actions :create, :update
|
|
461
|
+
|
|
462
|
+
field :isbn10 # Assigned on create and update (default)
|
|
463
|
+
field :created_by, update: false # Only assigned on create
|
|
464
|
+
field :updated_by, create: false # Only assigned on update
|
|
465
|
+
end
|
|
466
|
+
```
|
|
467
|
+
|
|
468
|
+
## Error Handling
|
|
469
|
+
|
|
470
|
+
### Checking Individual Records
|
|
471
|
+
|
|
472
|
+
Records can have conversion errors (invalid data types, missing required
|
|
473
|
+
fields). Check validity before processing:
|
|
474
|
+
|
|
475
|
+
```ruby
|
|
476
|
+
importer.records.each do |record|
|
|
477
|
+
if record.valid?
|
|
478
|
+
process(record.to_hash)
|
|
479
|
+
else
|
|
480
|
+
record.errors.each { |e| puts e.to_s }
|
|
481
|
+
end
|
|
482
|
+
end
|
|
483
|
+
```
|
|
484
|
+
|
|
485
|
+
### Import Summary
|
|
486
|
+
|
|
487
|
+
When using `import!` with a backend, the returned summary contains aggregate
|
|
488
|
+
results and error details:
|
|
489
|
+
|
|
490
|
+
```ruby
|
|
491
|
+
summary = importer.import!
|
|
492
|
+
|
|
493
|
+
# Aggregate counts
|
|
494
|
+
puts "Total: #{summary.total}"
|
|
495
|
+
puts "Created: #{summary.created}"
|
|
496
|
+
puts "Updated: #{summary.updated}"
|
|
497
|
+
puts "Unchanged: #{summary.unchanged}"
|
|
498
|
+
puts "Invalid: #{summary.invalid}"
|
|
499
|
+
|
|
500
|
+
# Human-readable output
|
|
501
|
+
puts summary.result_msg
|
|
502
|
+
|
|
503
|
+
# Machine-readable output (for JSON APIs, etc.)
|
|
504
|
+
summary.to_hash
|
|
505
|
+
```
|
|
506
|
+
|
|
507
|
+
### Error Details
|
|
508
|
+
|
|
509
|
+
```ruby
|
|
510
|
+
# Aggregated error counts
|
|
511
|
+
summary.validation_errors.each do |message, count|
|
|
512
|
+
puts "#{message}: #{count} occurrences"
|
|
513
|
+
end
|
|
514
|
+
|
|
515
|
+
# Errors by record index (0-based)
|
|
516
|
+
summary.itemized_errors.each do |index, errors|
|
|
517
|
+
puts "Record #{index}: #{errors.map(&:to_s).join(', ')}"
|
|
518
|
+
end
|
|
519
|
+
```
|
|
520
|
+
|
|
521
|
+
### Generating Error Reports
|
|
522
|
+
|
|
523
|
+
All file-based sources can generate a copy of the input with errors appended,
|
|
524
|
+
useful for returning to data providers:
|
|
525
|
+
|
|
526
|
+
```ruby
|
|
527
|
+
summary = importer.import!
|
|
528
|
+
|
|
529
|
+
if summary.invalid > 0
|
|
530
|
+
error_file = source.write_errors(summary)
|
|
531
|
+
# error_file is a Tempfile with "_errors" column/field added
|
|
532
|
+
|
|
533
|
+
# To include only rows that had errors:
|
|
534
|
+
error_file = source.write_errors(summary, only_errors: true)
|
|
535
|
+
end
|
|
536
|
+
```
|
|
149
537
|
|
|
150
|
-
|
|
538
|
+
## Contributing
|
|
151
539
|
|
|
152
|
-
|
|
153
|
-
importer.invalid # => 0
|
|
154
|
-
importer.created # => 3
|
|
155
|
-
importer.updated # => 0
|
|
156
|
-
importer.unchanged # => 0
|
|
540
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and guidelines.
|
|
157
541
|
|
|
158
|
-
|
|
542
|
+
Before submitting changes, run the preflight checks:
|
|
159
543
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
importer.unchanged # => 3
|
|
544
|
+
```bash
|
|
545
|
+
bundle exec rake preflight
|
|
163
546
|
```
|
data/Rakefile
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "yaml"
|
|
4
|
+
|
|
5
|
+
begin
|
|
6
|
+
require "rspec/core/rake_task"
|
|
7
|
+
RSpec::Core::RakeTask.new(:spec)
|
|
8
|
+
rescue LoadError
|
|
9
|
+
# rspec not available
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
begin
|
|
13
|
+
require "rubocop/rake_task"
|
|
14
|
+
RuboCop::RakeTask.new(:rubocop)
|
|
15
|
+
rescue LoadError
|
|
16
|
+
# rubocop not available
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
begin
|
|
20
|
+
require "yardstick/rake/verify"
|
|
21
|
+
|
|
22
|
+
config = YAML.safe_load_file(".yardstick.yml")
|
|
23
|
+
|
|
24
|
+
Yardstick::Rake::Verify.new(:yardstick_verify, config) do |verify|
|
|
25
|
+
verify.threshold = config.fetch("threshold", 100)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
task yardstick: :yardstick_verify
|
|
29
|
+
rescue LoadError
|
|
30
|
+
# yardstick not available
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
desc "Run all checks before release (specs, rubocop, yardstick)"
|
|
34
|
+
task :preflight do
|
|
35
|
+
puts "=== Preflight Checks ===\n\n"
|
|
36
|
+
|
|
37
|
+
# Check for uncommitted changes
|
|
38
|
+
uncommitted = `git status --porcelain`.strip
|
|
39
|
+
unless uncommitted.empty?
|
|
40
|
+
puts "WARNING: Uncommitted changes detected:"
|
|
41
|
+
puts uncommitted.lines.map { |l| " #{l}" }.join
|
|
42
|
+
puts
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Run checks
|
|
46
|
+
checks = [
|
|
47
|
+
["RSpec", "spec"],
|
|
48
|
+
["RuboCop", "rubocop"],
|
|
49
|
+
["YARD Coverage", "yardstick"]
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
failed = []
|
|
53
|
+
|
|
54
|
+
checks.each do |name, task_name|
|
|
55
|
+
print "Running #{name}... "
|
|
56
|
+
begin
|
|
57
|
+
Rake::Task[task_name].invoke
|
|
58
|
+
puts "OK"
|
|
59
|
+
rescue SystemExit, RuntimeError
|
|
60
|
+
puts "FAILED"
|
|
61
|
+
failed << name
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
puts
|
|
66
|
+
if failed.any?
|
|
67
|
+
abort "Preflight FAILED: #{failed.join(", ")}"
|
|
68
|
+
else
|
|
69
|
+
puts "All preflight checks passed."
|
|
70
|
+
end
|
|
71
|
+
end
|