importu 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +15 -0
  3. data/.github/workflows/ci.yml +48 -0
  4. data/.gitignore +4 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +311 -0
  7. data/.simplecov +14 -0
  8. data/.yardstick.yml +36 -0
  9. data/Appraisals +22 -0
  10. data/CHANGELOG.md +51 -0
  11. data/CONTRIBUTING.md +86 -0
  12. data/Gemfile +5 -1
  13. data/LICENSE +21 -0
  14. data/README.md +435 -52
  15. data/Rakefile +71 -0
  16. data/UPGRADING.md +188 -0
  17. data/gemfiles/rails_7_2.gemfile +11 -0
  18. data/gemfiles/rails_7_2.gemfile.lock +268 -0
  19. data/gemfiles/rails_8_0.gemfile +11 -0
  20. data/gemfiles/rails_8_0.gemfile.lock +271 -0
  21. data/gemfiles/rails_8_1.gemfile +11 -0
  22. data/gemfiles/rails_8_1.gemfile.lock +269 -0
  23. data/gemfiles/standalone.gemfile +8 -0
  24. data/gemfiles/standalone.gemfile.lock +197 -0
  25. data/importu.gemspec +41 -22
  26. data/lib/importu/backends/active_record.rb +171 -0
  27. data/lib/importu/backends/middleware/duplicate_manager_proxy.rb +41 -0
  28. data/lib/importu/backends/middleware/enforce_allowed_actions.rb +52 -0
  29. data/lib/importu/backends/middleware.rb +11 -0
  30. data/lib/importu/backends.rb +103 -0
  31. data/lib/importu/config_dsl.rb +381 -0
  32. data/lib/importu/converter_context.rb +94 -0
  33. data/lib/importu/converters.rb +119 -64
  34. data/lib/importu/definition.rb +23 -0
  35. data/lib/importu/duplicate_manager.rb +88 -0
  36. data/lib/importu/exceptions.rb +135 -4
  37. data/lib/importu/importer.rb +183 -96
  38. data/lib/importu/record.rb +138 -102
  39. data/lib/importu/sources/csv.rb +122 -0
  40. data/lib/importu/sources/json.rb +106 -0
  41. data/lib/importu/sources/ruby.rb +46 -0
  42. data/lib/importu/sources/xml.rb +133 -0
  43. data/lib/importu/sources.rb +13 -0
  44. data/lib/importu/summary.rb +277 -0
  45. data/lib/importu/version.rb +3 -1
  46. data/lib/importu.rb +45 -9
  47. data/spec/fixtures/books-duplicates/README.md +7 -0
  48. data/spec/fixtures/books-duplicates/infile.csv +7 -0
  49. data/spec/fixtures/books-duplicates/model.json +23 -0
  50. data/spec/fixtures/books-duplicates/summary.json +10 -0
  51. data/spec/fixtures/books-valid/README.md +13 -0
  52. data/spec/fixtures/books-valid/infile.csv +4 -0
  53. data/spec/fixtures/books-valid/infile.json +23 -0
  54. data/spec/fixtures/books-valid/infile.xml +21 -0
  55. data/spec/fixtures/books-valid/model.json +23 -0
  56. data/spec/fixtures/books-valid/record.json +26 -0
  57. data/spec/fixtures/books-valid/summary.json +8 -0
  58. data/spec/fixtures/source-empty-file/infile.csv +0 -0
  59. data/spec/fixtures/source-empty-file/infile.json +0 -0
  60. data/spec/fixtures/source-empty-file/infile.xml +0 -0
  61. data/spec/fixtures/source-empty-records/infile.csv +3 -0
  62. data/spec/fixtures/source-empty-records/infile.json +1 -0
  63. data/spec/fixtures/source-empty-records/infile.xml +6 -0
  64. data/spec/fixtures/source-malformed/infile.csv +1 -0
  65. data/spec/fixtures/source-malformed/infile.json +1 -0
  66. data/spec/fixtures/source-malformed/infile.xml +3 -0
  67. data/spec/fixtures/source-no-records/infile.csv +1 -0
  68. data/spec/fixtures/source-no-records/infile.json +1 -0
  69. data/spec/fixtures/source-no-records/infile.xml +3 -0
  70. data/spec/lib/importu/backends/active_record_spec.rb +150 -0
  71. data/spec/lib/importu/backends/middleware/duplicate_manager_proxy_spec.rb +70 -0
  72. data/spec/lib/importu/backends/middleware/enforce_allowed_actions_spec.rb +70 -0
  73. data/spec/lib/importu/backends_spec.rb +170 -0
  74. data/spec/lib/importu/converters_spec.rb +184 -141
  75. data/spec/lib/importu/definition_spec.rb +248 -0
  76. data/spec/lib/importu/duplicate_manager_spec.rb +92 -0
  77. data/spec/lib/importu/exceptions_spec.rb +69 -16
  78. data/spec/lib/importu/import_context_spec.rb +199 -0
  79. data/spec/lib/importu/importer_spec.rb +95 -0
  80. data/spec/lib/importu/integration_spec.rb +221 -0
  81. data/spec/lib/importu/record_spec.rb +130 -80
  82. data/spec/lib/importu/sources/csv_spec.rb +29 -0
  83. data/spec/lib/importu/sources/importer_source_examples.rb +175 -0
  84. data/spec/lib/importu/sources/json_spec.rb +29 -0
  85. data/spec/lib/importu/sources/ruby_spec.rb +102 -0
  86. data/spec/lib/importu/sources/xml_spec.rb +70 -0
  87. data/spec/lib/importu/summary_spec.rb +186 -0
  88. data/spec/spec_helper.rb +91 -7
  89. data/spec/support/active_record.rb +20 -0
  90. data/spec/support/book_importer.rb +31 -0
  91. data/spec/support/dummy_backend.rb +50 -0
  92. data/spec/support/fixtures_helper.rb +43 -0
  93. data/spec/support/matchers/delegate_matcher.rb +14 -8
  94. metadata +173 -100
  95. data/lib/importu/core_ext/array/deep_freeze.rb +0 -7
  96. data/lib/importu/core_ext/deep_freeze.rb +0 -3
  97. data/lib/importu/core_ext/hash/deep_freeze.rb +0 -7
  98. data/lib/importu/core_ext/object/deep_freeze.rb +0 -6
  99. data/lib/importu/core_ext.rb +0 -3
  100. data/lib/importu/dsl.rb +0 -127
  101. data/lib/importu/importer/csv.rb +0 -52
  102. data/lib/importu/importer/json.rb +0 -45
  103. data/lib/importu/importer/xml.rb +0 -55
  104. data/spec/factories/importer.rb +0 -12
  105. data/spec/factories/importer_record.rb +0 -13
  106. data/spec/factories/json_importer.rb +0 -14
  107. data/spec/factories/xml_importer.rb +0 -12
  108. data/spec/lib/importu/dsl_spec.rb +0 -26
  109. data/spec/lib/importu/importer/json_spec.rb +0 -37
  110. data/spec/lib/importu/importer/xml_spec.rb +0 -14
data/README.md CHANGED
@@ -1,19 +1,85 @@
1
- ## Overview
2
- Importu is a framework and DSL for simplifying the process of importing
3
- structured data into your application. It is also a tool for separating
4
- import-related business logic from the rest of your code.
1
+ # Importu
5
2
 
6
- Current supported source formats include CSV/TSV, XML and JSON. It is fairly
7
- trivial to extend Importu to handle additional formats. See the
8
- `lib/importu/importer` directory for implementations of supported importers.
3
+ [![Gem Version](http://img.shields.io/gem/v/importu.svg)][gem]
4
+ [![CI](https://github.com/dhedlund/importu/actions/workflows/ci.yml/badge.svg)][ci]
9
5
 
10
- The current version of Importu depends on both ActiveRecord and ActiveSupport,
11
- which will become optional in a future release.
6
+ [gem]: https://rubygems.org/gems/importu
7
+ [ci]: https://github.com/dhedlund/importu/actions/workflows/ci.yml
8
+
9
+ Importu is a declarative data import library for Ruby. Define importers that
10
+ read like specifications - with fields, converters, and validation rules -
11
+ then parse CSV, JSON, or XML with consistent error handling.
12
+
13
+ ## Table of Contents
14
+
15
+ - [Goals](#goals)
16
+ - [Installation](#installation)
17
+ - [Quick Start](#quick-start)
18
+ - [Example](#example)
19
+ - [Sources](#sources) (CSV, JSON, XML, Ruby)
20
+ - [Converters](#converters)
21
+ - [Backends](#backends) (ActiveRecord)
22
+ - [Error Handling](#error-handling)
23
+ - [Contributing](#contributing)
24
+
25
+ For working examples, see the [importu-examples](https://github.com/dhedlund/importu-examples) repository.
26
+
27
+ ## Goals
28
+
29
+ **Primary goal:** Importers that read like specifications.
30
+
31
+ - Define fields, converters, and rules declaratively
32
+ - Separate what the data should look like from how you process it
33
+ - Use the importer as the contract shared with data providers
34
+
35
+ **Secondary goals:**
36
+
37
+ - Reusable parsers for common formats (CSV, JSON, XML)
38
+ - Modular design - extend or replace components as needed
39
+
40
+ ## Installation
41
+
42
+ Add to your Gemfile:
43
+
44
+ ```ruby
45
+ gem "importu"
46
+ ```
47
+
48
+ Then run `bundle install`.
49
+
50
+ Or install directly:
51
+
52
+ ```bash
53
+ gem install importu
54
+ ```
55
+
56
+ ## Requirements
57
+
58
+ - Ruby >= 3.1
59
+ - Rails >= 7.2 (optional, for ActiveRecord backend)
60
+ - nokogiri (optional, for XML source)
61
+
62
+ ## Quick Start
63
+
64
+ ```ruby
65
+ require "importu"
66
+
67
+ # Define an importer with the fields you expect
68
+ class BookImporter < Importu::Importer
69
+ fields :title, :author, :isbn10
70
+ end
71
+
72
+ # Create a source and importer
73
+ source = Importu::Sources::CSV.new("books.csv")
74
+ importer = BookImporter.new(source)
75
+
76
+ # Iterate over records
77
+ importer.records.each do |record|
78
+ puts "#{record[:title]} by #{record[:author]}"
79
+ end
80
+ ```
12
81
 
13
82
  ## Example
14
- **Please read the tutorial in the
15
- [import-examples](https://github.com/dhedlund/importu-examples) repository for
16
- a more complete overview of available features.**
17
83
 
18
84
  Assuming you have the following data in the file `data.csv`:
19
85
  ```
@@ -25,7 +91,7 @@ Assuming you have the following data in the file `data.csv`:
25
91
 
26
92
  You can create a minimal importer to read the CSV data:
27
93
  ```ruby
28
- class BookImporter < Importu::Importer::Csv
94
+ class BookImporter < Importu::Importer
29
95
  # fields we expect to find in the CSV file, field order is not important
30
96
  fields :title, :author, :isbn10, :pages, :release_date
31
97
  end
@@ -33,10 +99,10 @@ end
33
99
 
34
100
  And then load that data in your application:
35
101
  ```ruby
36
- require 'importu'
102
+ require "importu"
37
103
 
38
- filename = File.expand_path('../data.csv', __FILE__)
39
- importer = BookImporter.new(filename)
104
+ filename = File.expand_path("data.csv", __dir__)
105
+ importer = BookImporter.new(Importu::Sources::CSV.new(filename))
40
106
 
41
107
  # importer.records returns an Enumerable
42
108
  importer.records.count # => 3
@@ -50,16 +116,16 @@ importer.records.map(&:to_hash)
50
116
 
51
117
  A more complete example of the book importer above might look like the following:
52
118
  ```ruby
53
- require 'importu'
119
+ require "importu"
54
120
 
55
- class BookImporter < Importu::Importer::Csv
56
- # if you want to define multiple fields with similar rules, use 'fields'
57
- # NOTE: ':required => true' is redundant in this example; any defined
121
+ class BookImporter < Importu::Importer
122
+ # if you want to define multiple fields with similar rules, use "fields"
123
+ # NOTE: `required: true` is redundant in this example; any defined
58
124
  # fields must have a corresponding column in the source data by default
59
- fields :title, :isbn10, :authors, :required => true
125
+ fields :title, :isbn10, :authors, required: true
60
126
 
61
127
  # to mark a field as optional in the source data
62
- field :pages, :required => false
128
+ field :pages, required: false
63
129
 
64
130
  # you can reference the same field multiple times and apply rules
65
131
  # incrementally; this provides a lot of flexibility in describing your
@@ -67,16 +133,16 @@ class BookImporter < Importu::Importer::Csv
67
133
  # explicitly stating that "these are required"; the importer becomes the
68
134
  # reference document:
69
135
  #
70
- # fields :title, :isbn10, :authors, :release_date, :required => true
71
- # fields :pages, :required => false
136
+ # fields :title, :isbn10, :authors, :release_date, required: true
137
+ # fields :pages, required: false
72
138
  #
73
139
  # ...or keep all the rules for that field with that field, whatever makes
74
140
  # sense for your particular use case.
75
141
 
76
142
  # if your field is not named the same as the source data, you can use
77
- # :label => '...' to reference the correct field, where the label is what
143
+ # `label: "..."` to reference the correct field, where the label is what
78
144
  # the field is labelled in the source data
79
- field :authors, :label => 'author'
145
+ field :authors, label: "author"
80
146
 
81
147
  # you can convert fields using one of the built-in converters
82
148
  field :pages, &convert_to(:integer)
@@ -85,12 +151,12 @@ class BookImporter < Importu::Importer::Csv
85
151
  # some converters allow you to pass additional arguments; in the case of
86
152
  # the date converter, you can pass an explicit format and it will raise an
87
153
  # error if a date is encountered that doesn't match
88
- field :release_date, &convert_to(:date, :format => '%b %d, %Y')
154
+ field :release_date, &convert_to(:date, format: "%b %d, %Y")
89
155
 
90
156
  # passing a block to a field definition allows you to add your own logic
91
157
  # for converting data or checking for unexpected values
92
158
  field :authors do
93
- value = clean(:authors) # apply :clean converter which strips whitespace
159
+ value = trimmed(:authors) # apply :trimmed converter which strips whitespace
94
160
  authors = value ? value.split(/(?:, )|(?: and )|(?: & )/i) : []
95
161
 
96
162
  if authors.none?
@@ -103,61 +169,378 @@ class BookImporter < Importu::Importer::Csv
103
169
  end
104
170
 
105
171
  # abstract fields that are not part of the original data set can be created
106
- field :by_matz, :abstract => true do
172
+ field :by_matz, abstract: true do
107
173
  # field conversion rules can reference other fields; the field value is
108
174
  # what would be returned after referenced field's rules have been applied
109
- field_value(:authors).include?('Yukihiro Matsumoto')
175
+ field_value(:authors).include?("Yukihiro Matsumoto")
110
176
  end
111
177
  end
112
178
  ```
113
179
 
114
180
  A more condensed version of the above, with all the rules grouped into individual field definitions:
115
181
  ```ruby
116
- class BookImporter < Importu::Importer::Csv
182
+ class BookImporter < Importu::Importer
117
183
  fields :title, :isbn10
118
184
 
119
- field :authors, :label => 'author' do
120
- authors = clean(:authors).to_s.split(/(?:, )|(?: and )|(?: & )/i)
185
+ field :authors, label: "author" do
186
+ authors = trimmed(:authors).to_s.split(/(?:, )|(?: and )|(?: & )/i)
121
187
  raise ArgumentError, "at least one author is required" if authors.none?
122
-
188
+
123
189
  authors
124
190
  end
125
191
 
126
- field :pages, :required => false, &convert_to(:integer)
127
- field :release_date, &convert_to(:date, :format => '%b %d, %Y')
192
+ field :pages, required: false, &convert_to(:integer)
193
+ field :release_date, &convert_to(:date, format: "%b %d, %Y")
128
194
 
129
- field :by_matz, :abstract => true do
130
- field_value(:authors).include?('Yukihiro Matsumoto')
195
+ field :by_matz, abstract: true do
196
+ field_value(:authors).include?("Yukihiro Matsumoto")
131
197
  end
132
198
  end
133
199
  ```
134
200
 
201
+ ## Sources
202
+
203
+ Importu supports multiple source formats. Each source parses input data and
204
+ provides an enumerator of row hashes.
205
+
206
+ ### CSV
207
+
208
+ ```ruby
209
+ source = Importu::Sources::CSV.new("data.csv")
210
+
211
+ # With custom options
212
+ source = Importu::Sources::CSV.new("data.csv", csv_options: {
213
+ col_sep: ";",
214
+ encoding: "ISO-8859-1"
215
+ })
216
+ ```
217
+
218
+ Options inside `csv_options` are passed directly to Ruby's
219
+ [CSV library](https://ruby-doc.org/stdlib/libdoc/csv/rdoc/CSV.html).
220
+ Common options include `col_sep`, `quote_char`, and `encoding`.
221
+
222
+ ### JSON
223
+
224
+ ```ruby
225
+ source = Importu::Sources::JSON.new("data.json")
226
+ ```
227
+
228
+ The JSON file must have an array as the root element. The entire file is loaded
229
+ into memory, so this source is not suitable for very large files.
230
+
231
+ ### XML
232
+
233
+ ```ruby
234
+ # records_xpath is required
235
+ source = Importu::Sources::XML.new("data.xml", records_xpath: "//book")
236
+ ```
237
+
238
+ The `records_xpath` option specifies which elements to treat as records. Each
239
+ matching element becomes a row, with child elements and attributes becoming
240
+ fields.
241
+
242
+ ### Ruby
243
+
244
+ ```ruby
245
+ data = [
246
+ { "name" => "Alice", "email" => "alice@example.com" },
247
+ { "name" => "Bob", "email" => "bob@example.com" }
248
+ ]
249
+ source = Importu::Sources::Ruby.new(data)
250
+ ```
251
+
252
+ Accepts an array of hashes or any enumerable that yields objects responding to
253
+ `to_hash`. Useful for importing data already in memory or from other Ruby
254
+ sources.
255
+
256
+
257
+ ## Converters
258
+
259
+ ### Built-in Converters
260
+
261
+ Importu comes with several built-in converters for the most common ruby
262
+ data types and data cleanup operations. Assigning a converter to your fields
263
+ ensures that the value can be translated to the desired type or a validation
264
+ error will be generated and the record flagged as invalid.
265
+
266
+ To use a converter, add `&convert_to(type)` to the end of a field definition,
267
+ where `type` is one of the types below.
268
+
269
+ | Type | Description |
270
+ |-----------|-------------|
271
+ | :boolean | Coerces value to a boolean. Must be true, yes, 1, false, no, 0. Case-insensitive. |
272
+ | :date | Coerces value to a date. Tries to guess format unless `format: ...` is provided. |
273
+ | :datetime | Coerces value to a datetime. Tries to guess format unless `format: ...` is provided. |
274
+ | :decimal | Coerces value to a BigDecimal. |
275
+ | :float | Coerces value to a Float. |
276
+ | :integer | Coerces value to an integer. Must look like an integer ("1.0" is invalid). |
277
+ | :raw | Do nothing. Value will be passed through as-is from the source value. |
278
+ | :string | Coerces value to a string, trimming leading a trailing whitespaces. |
279
+ | :trimmed | Trims leading and trailing whitespace if value is a string, otherwise leave as-is. Empty strings are converted to nil. |
280
+
281
+ Some converters, such as :date and :datetime, accept optional arguments. To
282
+ pass arguments to a converter, add them after the converter's type, For
283
+ example, `&convert_to(:date, format: "%Y-%m-%d")` will force date parsing to
284
+ use the "YYYY-MM-DD" format.
285
+
286
+ | Type | Argument | Default | Description |
287
+ |-----------|----------|--------------|-------------|
288
+ | :date | :format | _autodetect_ | Parse value using a [strftime format](https://ruby-doc.org/stdlib/libdoc/date/rdoc/Date.html#method-i-strftime).
289
+ | :datetime | :format | _autodetect_ | Parse value using a [strftime format](https://ruby-doc.org/stdlib/libdoc/date/rdoc/DateTime.html#method-i-strftime).
290
+
291
+ Built-in converters can be overridden by creating a custom converter using
292
+ the same name as the built-in converter. Overriding a converter in one import
293
+ definition will not affect any converters outside of that definition.
294
+
295
+ ### Custom Converters
296
+
297
+ All built-in converters are defined using the same method as custom
298
+ converters. See `lib/importu/converters.rb` for their implementation, which
299
+ can be used as a guide for writing your own.
300
+
301
+ ```ruby
302
+ class BookImporter < Importu::Importer
303
+ converter :varchar do |field_name, length: 255|
304
+ value = trimmed(field_name)
305
+ value.nil? ? nil : String(value).slice(0, length)
306
+
307
+ # Instead of taking the first 255 characters, you may prefer to raise
308
+ # an error that enforces values from source data cannot exceed length.
309
+ # raise ArgumentError, "cannot exceed "#{length}" if value.length > length
310
+ end
311
+
312
+ fields :title, :author, &convert_to(:varchar)
313
+ fields :title, &convert_to(:varchar, length: 50)
314
+ end
315
+ ```
316
+
317
+ To raise an error from within a converter, raise an `ArgumentError` with a
318
+ message. That field will then be marked as invalid on the record and the
319
+ message will be used as the validation error message.
320
+
321
+ If you would like to use the same custom converters across multiple import
322
+ definitions, they can be defined in a mixin and then included at the top of
323
+ each definition or in a class that the imports inherit from. Importu takes
324
+ this approach with its default converters, so you can look at the built-in
325
+ converters as an example.
326
+
327
+ ### Default Converter
328
+
329
+ By default, importu uses the `:trimmed` converter unless a converter has been
330
+ explicitly defined for the field. This should work for the vast majority of use
331
+ cases, but there are some cases where the default isn't exactly what you
332
+ wanted.
333
+
334
+ 1. If you have a couple fields that cannot have their values trimmed, consider
335
+ changing those fields to use the :raw converter.
336
+
337
+ 2. If your opinion of trimmed is different than importu's, you can override the
338
+ built-in :trimmed converter to match your preferred behavior.
339
+
340
+ 3. If you never want any fields to have the :trimmed converter applied, you can
341
+ change the default converter to use the :raw converter:
342
+ ```ruby
343
+ class BookImporter < Importu::Importer
344
+ converter :default, &convert_to(:raw)
345
+ end
346
+ ```
347
+
348
+ 4. If you want to raise an error if a converter is not explicitly set for each
349
+ field:
350
+ ```ruby
351
+ class BookImporter < Importu::Importer
352
+ converter :default do |name|
353
+ raise ArgumentError, "converter not defined for field #{name}"
354
+ end
355
+ end
356
+ ```
357
+
358
+
359
+ ## Backends
360
+
135
361
  ### Rails / ActiveRecord
362
+
136
363
  If you define a model in the importer definition and the importer fields are
137
364
  named the same as the attributes in your model, Importu can iterate through and
138
365
  create or update records for you:
139
366
 
140
367
  ```ruby
141
- class BookImporter < Importu::Importer::Csv
142
- model 'Book'
368
+ class BookImporter < Importu::Importer
369
+ model "Book"
143
370
 
144
371
  # ...
145
372
  end
146
373
 
147
- filename = File.expand_path('../data.csv', __FILE__)
148
- importer = BookImporter.new(filename)
374
+ filename = File.expand_path("data.csv", __dir__)
375
+ importer = BookImporter.new(Importu::Sources::CSV.new(filename))
376
+
377
+ summary = importer.import!
378
+
379
+ summary.total # => 3
380
+ summary.invalid # => 0
381
+ summary.created # => 3
382
+ summary.updated # => 0
383
+ summary.unchanged # => 0
384
+
385
+ summary = importer.import!
386
+
387
+ summary.total # => 3
388
+ summary.created # => 0
389
+ summary.unchanged # => 3
390
+ ```
391
+
392
+ ### Allowed Actions
393
+
394
+ By default, importers only allow creating new records. If you want to update
395
+ existing records, you must explicitly allow it:
396
+
397
+ ```ruby
398
+ class BookImporter < Importu::Importer
399
+ model "Book"
400
+ allow_actions :create, :update # Allow both creating and updating
401
+
402
+ find_by :isbn10 # Find existing records by ISBN
403
+ # ...
404
+ end
405
+ ```
406
+
407
+ If an action is not allowed, the record will be marked as invalid with an error
408
+ message explaining which action was rejected.
409
+
410
+ | Configuration | Behavior |
411
+ |---------------|----------|
412
+ | `allow_actions :create` | Only create new records (default) |
413
+ | `allow_actions :update` | Only update existing records |
414
+ | `allow_actions :create, :update` | Create new records and update existing ones |
415
+
416
+ ### Finding Existing Records
417
+
418
+ Use `find_by` to specify which fields identify existing records:
419
+
420
+ ```ruby
421
+ class BookImporter < Importu::Importer
422
+ model "Book"
423
+ allow_actions :create, :update
424
+
425
+ find_by :isbn10 # Single field
426
+ # or
427
+ find_by :title, :author # Multiple fields (all must match)
428
+ # or
429
+ find_by do |record| # Custom lookup logic
430
+ find_by(title: record[:title].downcase)
431
+ end
432
+ end
433
+ ```
434
+
435
+ ### Before Save Hook
436
+
437
+ Use `before_save` to modify records just before they're saved:
438
+
439
+ ```ruby
440
+ class BookImporter < Importu::Importer
441
+ model "Book"
442
+
443
+ before_save do
444
+ # `object` is the model instance, `record` is the import data, `action` is :create or :update
445
+ object.title = object.title.titleize
446
+ object.imported_at = Time.current
447
+ object.created_by = "importer" if action == :create
448
+ end
449
+ end
450
+ ```
451
+
452
+ ### Controlling Field Assignment
453
+
454
+ By default, all fields are assigned on both create and update. You can
455
+ control this per-field:
456
+
457
+ ```ruby
458
+ class BookImporter < Importu::Importer
459
+ model "Book"
460
+ allow_actions :create, :update
461
+
462
+ field :isbn10 # Assigned on create and update (default)
463
+ field :created_by, update: false # Only assigned on create
464
+ field :updated_by, create: false # Only assigned on update
465
+ end
466
+ ```
467
+
468
+ ## Error Handling
469
+
470
+ ### Checking Individual Records
471
+
472
+ Records can have conversion errors (invalid data types, missing required
473
+ fields). Check validity before processing:
474
+
475
+ ```ruby
476
+ importer.records.each do |record|
477
+ if record.valid?
478
+ process(record.to_hash)
479
+ else
480
+ record.errors.each { |e| puts e.to_s }
481
+ end
482
+ end
483
+ ```
484
+
485
+ ### Import Summary
486
+
487
+ When using `import!` with a backend, the returned summary contains aggregate
488
+ results and error details:
489
+
490
+ ```ruby
491
+ summary = importer.import!
492
+
493
+ # Aggregate counts
494
+ puts "Total: #{summary.total}"
495
+ puts "Created: #{summary.created}"
496
+ puts "Updated: #{summary.updated}"
497
+ puts "Unchanged: #{summary.unchanged}"
498
+ puts "Invalid: #{summary.invalid}"
499
+
500
+ # Human-readable output
501
+ puts summary.result_msg
502
+
503
+ # Machine-readable output (for JSON APIs, etc.)
504
+ summary.to_hash
505
+ ```
506
+
507
+ ### Error Details
508
+
509
+ ```ruby
510
+ # Aggregated error counts
511
+ summary.validation_errors.each do |message, count|
512
+ puts "#{message}: #{count} occurrences"
513
+ end
514
+
515
+ # Errors by record index (0-based)
516
+ summary.itemized_errors.each do |index, errors|
517
+ puts "Record #{index}: #{errors.map(&:to_s).join(', ')}"
518
+ end
519
+ ```
520
+
521
+ ### Generating Error Reports
522
+
523
+ All file-based sources can generate a copy of the input with errors appended,
524
+ useful for returning to data providers:
525
+
526
+ ```ruby
527
+ summary = importer.import!
528
+
529
+ if summary.invalid > 0
530
+ error_file = source.write_errors(summary)
531
+ # error_file is a Tempfile with "_errors" column/field added
532
+
533
+ # To include only rows that had errors:
534
+ error_file = source.write_errors(summary, only_errors: true)
535
+ end
536
+ ```
149
537
 
150
- importer.import!
538
+ ## Contributing
151
539
 
152
- importer.total # => 3
153
- importer.invalid # => 0
154
- importer.created # => 3
155
- importer.updated # => 0
156
- importer.unchanged # => 0
540
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and guidelines.
157
541
 
158
- importer.import!
542
+ Before submitting changes, run the preflight checks:
159
543
 
160
- importer.total # => 3
161
- importer.created # => 0
162
- importer.unchanged # => 3
544
+ ```bash
545
+ bundle exec rake preflight
163
546
  ```
data/Rakefile ADDED
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "yaml"
4
+
5
+ begin
6
+ require "rspec/core/rake_task"
7
+ RSpec::Core::RakeTask.new(:spec)
8
+ rescue LoadError
9
+ # rspec not available
10
+ end
11
+
12
+ begin
13
+ require "rubocop/rake_task"
14
+ RuboCop::RakeTask.new(:rubocop)
15
+ rescue LoadError
16
+ # rubocop not available
17
+ end
18
+
19
+ begin
20
+ require "yardstick/rake/verify"
21
+
22
+ config = YAML.safe_load_file(".yardstick.yml")
23
+
24
+ Yardstick::Rake::Verify.new(:yardstick_verify, config) do |verify|
25
+ verify.threshold = config.fetch("threshold", 100)
26
+ end
27
+
28
+ task yardstick: :yardstick_verify
29
+ rescue LoadError
30
+ # yardstick not available
31
+ end
32
+
33
+ desc "Run all checks before release (specs, rubocop, yardstick)"
34
+ task :preflight do
35
+ puts "=== Preflight Checks ===\n\n"
36
+
37
+ # Check for uncommitted changes
38
+ uncommitted = `git status --porcelain`.strip
39
+ unless uncommitted.empty?
40
+ puts "WARNING: Uncommitted changes detected:"
41
+ puts uncommitted.lines.map { |l| " #{l}" }.join
42
+ puts
43
+ end
44
+
45
+ # Run checks
46
+ checks = [
47
+ ["RSpec", "spec"],
48
+ ["RuboCop", "rubocop"],
49
+ ["YARD Coverage", "yardstick"]
50
+ ]
51
+
52
+ failed = []
53
+
54
+ checks.each do |name, task_name|
55
+ print "Running #{name}... "
56
+ begin
57
+ Rake::Task[task_name].invoke
58
+ puts "OK"
59
+ rescue SystemExit, RuntimeError
60
+ puts "FAILED"
61
+ failed << name
62
+ end
63
+ end
64
+
65
+ puts
66
+ if failed.any?
67
+ abort "Preflight FAILED: #{failed.join(", ")}"
68
+ else
69
+ puts "All preflight checks passed."
70
+ end
71
+ end