data_porter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. checksums.yaml +7 -0
  2. data/.claude/commands/blog-status.md +10 -0
  3. data/.claude/commands/blog.md +109 -0
  4. data/.claude/commands/task-done.md +27 -0
  5. data/.claude/commands/tm/add-dependency.md +58 -0
  6. data/.claude/commands/tm/add-subtask.md +79 -0
  7. data/.claude/commands/tm/add-task.md +81 -0
  8. data/.claude/commands/tm/analyze-complexity.md +124 -0
  9. data/.claude/commands/tm/analyze-project.md +100 -0
  10. data/.claude/commands/tm/auto-implement-tasks.md +100 -0
  11. data/.claude/commands/tm/command-pipeline.md +80 -0
  12. data/.claude/commands/tm/complexity-report.md +120 -0
  13. data/.claude/commands/tm/convert-task-to-subtask.md +74 -0
  14. data/.claude/commands/tm/expand-all-tasks.md +52 -0
  15. data/.claude/commands/tm/expand-task.md +52 -0
  16. data/.claude/commands/tm/fix-dependencies.md +82 -0
  17. data/.claude/commands/tm/help.md +101 -0
  18. data/.claude/commands/tm/init-project-quick.md +49 -0
  19. data/.claude/commands/tm/init-project.md +53 -0
  20. data/.claude/commands/tm/install-taskmaster.md +118 -0
  21. data/.claude/commands/tm/learn.md +106 -0
  22. data/.claude/commands/tm/list-tasks-by-status.md +42 -0
  23. data/.claude/commands/tm/list-tasks-with-subtasks.md +30 -0
  24. data/.claude/commands/tm/list-tasks.md +46 -0
  25. data/.claude/commands/tm/next-task.md +69 -0
  26. data/.claude/commands/tm/parse-prd-with-research.md +51 -0
  27. data/.claude/commands/tm/parse-prd.md +52 -0
  28. data/.claude/commands/tm/project-status.md +67 -0
  29. data/.claude/commands/tm/quick-install-taskmaster.md +23 -0
  30. data/.claude/commands/tm/remove-all-subtasks.md +94 -0
  31. data/.claude/commands/tm/remove-dependency.md +65 -0
  32. data/.claude/commands/tm/remove-subtask.md +87 -0
  33. data/.claude/commands/tm/remove-subtasks.md +89 -0
  34. data/.claude/commands/tm/remove-task.md +110 -0
  35. data/.claude/commands/tm/setup-models.md +52 -0
  36. data/.claude/commands/tm/show-task.md +85 -0
  37. data/.claude/commands/tm/smart-workflow.md +58 -0
  38. data/.claude/commands/tm/sync-readme.md +120 -0
  39. data/.claude/commands/tm/tm-main.md +147 -0
  40. data/.claude/commands/tm/to-cancelled.md +58 -0
  41. data/.claude/commands/tm/to-deferred.md +50 -0
  42. data/.claude/commands/tm/to-done.md +47 -0
  43. data/.claude/commands/tm/to-in-progress.md +39 -0
  44. data/.claude/commands/tm/to-pending.md +35 -0
  45. data/.claude/commands/tm/to-review.md +43 -0
  46. data/.claude/commands/tm/update-single-task.md +122 -0
  47. data/.claude/commands/tm/update-task.md +75 -0
  48. data/.claude/commands/tm/update-tasks-from-id.md +111 -0
  49. data/.claude/commands/tm/validate-dependencies.md +72 -0
  50. data/.claude/commands/tm/view-models.md +52 -0
  51. data/.env.example +12 -0
  52. data/.mcp.json +24 -0
  53. data/.taskmaster/CLAUDE.md +435 -0
  54. data/.taskmaster/config.json +44 -0
  55. data/.taskmaster/docs/prd.txt +2044 -0
  56. data/.taskmaster/state.json +6 -0
  57. data/.taskmaster/tasks/task_001.md +19 -0
  58. data/.taskmaster/tasks/task_002.md +19 -0
  59. data/.taskmaster/tasks/task_003.md +19 -0
  60. data/.taskmaster/tasks/task_004.md +19 -0
  61. data/.taskmaster/tasks/task_005.md +19 -0
  62. data/.taskmaster/tasks/task_006.md +19 -0
  63. data/.taskmaster/tasks/task_007.md +19 -0
  64. data/.taskmaster/tasks/task_008.md +19 -0
  65. data/.taskmaster/tasks/task_009.md +19 -0
  66. data/.taskmaster/tasks/task_010.md +19 -0
  67. data/.taskmaster/tasks/task_011.md +19 -0
  68. data/.taskmaster/tasks/task_012.md +19 -0
  69. data/.taskmaster/tasks/task_013.md +19 -0
  70. data/.taskmaster/tasks/task_014.md +19 -0
  71. data/.taskmaster/tasks/task_015.md +19 -0
  72. data/.taskmaster/tasks/task_016.md +19 -0
  73. data/.taskmaster/tasks/task_017.md +19 -0
  74. data/.taskmaster/tasks/task_018.md +19 -0
  75. data/.taskmaster/tasks/task_019.md +19 -0
  76. data/.taskmaster/tasks/task_020.md +19 -0
  77. data/.taskmaster/tasks/tasks.json +299 -0
  78. data/.taskmaster/templates/example_prd.txt +47 -0
  79. data/.taskmaster/templates/example_prd_rpg.txt +511 -0
  80. data/CHANGELOG.md +29 -0
  81. data/CLAUDE.md +65 -0
  82. data/CODE_OF_CONDUCT.md +10 -0
  83. data/CONTRIBUTING.md +49 -0
  84. data/LICENSE +21 -0
  85. data/README.md +463 -0
  86. data/Rakefile +12 -0
  87. data/app/assets/stylesheets/data_porter/application.css +646 -0
  88. data/app/channels/data_porter/import_channel.rb +10 -0
  89. data/app/controllers/data_porter/imports_controller.rb +68 -0
  90. data/app/javascript/data_porter/progress_controller.js +33 -0
  91. data/app/jobs/data_porter/dry_run_job.rb +12 -0
  92. data/app/jobs/data_porter/import_job.rb +12 -0
  93. data/app/jobs/data_porter/parse_job.rb +12 -0
  94. data/app/models/data_porter/data_import.rb +49 -0
  95. data/app/views/data_porter/imports/index.html.erb +142 -0
  96. data/app/views/data_porter/imports/new.html.erb +88 -0
  97. data/app/views/data_porter/imports/show.html.erb +49 -0
  98. data/config/database.yml +3 -0
  99. data/config/routes.rb +12 -0
  100. data/docs/SPEC.md +2012 -0
  101. data/docs/UI.md +32 -0
  102. data/docs/blog/001-why-build-a-data-import-engine.md +166 -0
  103. data/docs/blog/002-scaffolding-a-rails-engine.md +188 -0
  104. data/docs/blog/003-configuration-dsl.md +222 -0
  105. data/docs/blog/004-store-model-jsonb.md +237 -0
  106. data/docs/blog/005-target-dsl.md +284 -0
  107. data/docs/blog/006-parsing-csv-sources.md +300 -0
  108. data/docs/blog/007-orchestrator.md +247 -0
  109. data/docs/blog/008-actioncable-stimulus.md +376 -0
  110. data/docs/blog/009-phlex-ui-components.md +446 -0
  111. data/docs/blog/010-controllers-routing.md +374 -0
  112. data/docs/blog/011-generators.md +364 -0
  113. data/docs/blog/012-json-api-sources.md +323 -0
  114. data/docs/blog/013-testing-rails-engine.md +618 -0
  115. data/docs/blog/014-dry-run.md +307 -0
  116. data/docs/blog/015-publishing-retro.md +264 -0
  117. data/docs/blog/016-erb-view-templates.md +431 -0
  118. data/docs/blog/017-showcase-final-retro.md +220 -0
  119. data/docs/blog/BACKLOG.md +8 -0
  120. data/docs/blog/SERIES.md +154 -0
  121. data/docs/screenshots/index-with-previewing.jpg +0 -0
  122. data/docs/screenshots/index.jpg +0 -0
  123. data/docs/screenshots/modal-new-import.jpg +0 -0
  124. data/docs/screenshots/preview.jpg +0 -0
  125. data/lib/data_porter/broadcaster.rb +29 -0
  126. data/lib/data_porter/components/base.rb +10 -0
  127. data/lib/data_porter/components/failure_alert.rb +20 -0
  128. data/lib/data_porter/components/preview_table.rb +54 -0
  129. data/lib/data_porter/components/progress_bar.rb +33 -0
  130. data/lib/data_porter/components/results_summary.rb +19 -0
  131. data/lib/data_porter/components/status_badge.rb +16 -0
  132. data/lib/data_porter/components/summary_cards.rb +30 -0
  133. data/lib/data_porter/components.rb +14 -0
  134. data/lib/data_porter/configuration.rb +25 -0
  135. data/lib/data_porter/dsl/api_config.rb +25 -0
  136. data/lib/data_porter/dsl/column.rb +17 -0
  137. data/lib/data_porter/engine.rb +15 -0
  138. data/lib/data_porter/orchestrator.rb +141 -0
  139. data/lib/data_porter/record_validator.rb +32 -0
  140. data/lib/data_porter/registry.rb +33 -0
  141. data/lib/data_porter/sources/api.rb +49 -0
  142. data/lib/data_porter/sources/base.rb +35 -0
  143. data/lib/data_porter/sources/csv.rb +43 -0
  144. data/lib/data_porter/sources/json.rb +45 -0
  145. data/lib/data_porter/sources.rb +20 -0
  146. data/lib/data_porter/store_models/error.rb +13 -0
  147. data/lib/data_porter/store_models/import_record.rb +52 -0
  148. data/lib/data_porter/store_models/report.rb +21 -0
  149. data/lib/data_porter/target.rb +89 -0
  150. data/lib/data_porter/type_validator.rb +46 -0
  151. data/lib/data_porter/version.rb +5 -0
  152. data/lib/data_porter.rb +32 -0
  153. data/lib/generators/data_porter/install/install_generator.rb +33 -0
  154. data/lib/generators/data_porter/install/templates/create_data_porter_imports.rb.erb +21 -0
  155. data/lib/generators/data_porter/install/templates/initializer.rb +30 -0
  156. data/lib/generators/data_porter/target/target_generator.rb +44 -0
  157. data/lib/generators/data_porter/target/templates/target.rb.tt +20 -0
  158. data/sig/data_porter.rbs +4 -0
  159. metadata +274 -0
@@ -0,0 +1,364 @@
1
+ ---
2
+ title: "Building DataPorter #11 -- Generators: Install & Target Scaffolding"
3
+ series: "Building DataPorter - A Data Import Engine for Rails"
4
+ part: 11
5
+ tags: [ruby, rails, rails-engine, gem-development, generators, scaffolding, templates]
6
+ published: false
7
+ ---
8
+
9
+ # Generators: Install & Target Scaffolding
10
+
11
+ > A great gem installs in one command. A great engine scaffolds new import types from the command line. Here is how to build Rails generators that bootstrap everything -- migration, initializer, routes, and per-target files -- so adopters never have to wire anything by hand.
12
+
13
+ ## Context
14
+
15
+ This is part 11 of the series where we build **DataPorter**, a mountable Rails engine for data import workflows. In [part 10](#), we built the ImportsController, wired up engine routes, and solved the dynamic parent controller inheritance problem.
16
+
17
+ At this point the engine is feature-complete: targets, sources, the orchestrator, real-time progress, a Phlex UI, and controllers all work together. But onboarding a new host app still requires manually creating a migration, writing an initializer, mounting the engine in routes, and knowing the exact Target DSL to define an import type. That is too many steps for someone evaluating the gem for the first time. We need generators that collapse all of that into a single `rails generate` command.
18
+
19
+ ## The problem
20
+
21
+ Installing a Rails engine by hand means at least four discrete steps: copy a migration, create an initializer with sane defaults, add a route mount, and create the directory where import targets will live. Miss any one of these and the engine will not work -- but the error messages will not tell you which step you forgot. A missing migration produces an `ActiveRecord::StatementInvalid`; a missing route mount means a `NoMethodError` when the engine tries to resolve its URL helpers; a missing initializer silently uses defaults that may not match your app.
22
+
23
+ On top of that, every time a developer wants to add a new import type, they need to remember the Target DSL: which class to inherit from, which methods to define, how to declare columns. That is cognitive overhead that belongs in a generator, not in someone's memory.
24
+
25
+ ## What we are building
26
+
27
+ Two generators. The first bootstraps the entire engine into a host app:
28
+
29
+ ```bash
30
+ $ rails generate data_porter:install
31
+ create db/migrate/20260206120000_create_data_porter_imports.rb
32
+ create config/initializers/data_porter.rb
33
+ create app/importers
34
+ route mount DataPorter::Engine, at: "/imports"
35
+ ```
36
+
37
+ The second scaffolds a new target with parsed column definitions:
38
+
39
+ ```bash
40
+ $ rails generate data_porter:target guests first_name:string:required email:email last_name:string
41
+ create app/importers/guests_target.rb
42
+ ```
43
+
44
+ One command to install, one command per import type. No manual file creation, no DSL memorization.
45
+
46
+ ## Implementation
47
+
48
+ ### Step 1 -- The install generator
49
+
50
+ The install generator inherits from `Rails::Generators::Base` and mixes in `ActiveRecord::Generators::Migration` for timestamped migration support. Each public method in the generator becomes a step that Rails executes in definition order:
51
+
52
+ ```ruby
53
+ # lib/generators/data_porter/install/install_generator.rb
54
+ module DataPorter
55
+ module Generators
56
+ class InstallGenerator < Rails::Generators::Base
57
+ include ActiveRecord::Generators::Migration
58
+
59
+ source_root File.expand_path("templates", __dir__)
60
+
61
+ def copy_migration
62
+ migration_template(
63
+ "create_data_porter_imports.rb.erb",
64
+ "db/migrate/create_data_porter_imports.rb"
65
+ )
66
+ end
67
+
68
+ def copy_initializer
69
+ template("initializer.rb", "config/initializers/data_porter.rb")
70
+ end
71
+
72
+ def create_importers_directory
73
+ empty_directory("app/importers")
74
+ end
75
+
76
+ def mount_engine
77
+ route 'mount DataPorter::Engine, at: "/imports"'
78
+ end
79
+ end
80
+ end
81
+ end
82
+ ```
83
+
84
+ Four methods, four artifacts. Let us walk through each one.
85
+
86
+ `copy_migration` uses `migration_template` instead of the plain `template` method. The difference matters: `migration_template` adds a timestamp prefix to the filename and raises an error if a migration with the same name already exists, preventing duplicate migrations when someone accidentally runs the generator twice. The template itself is an ERB file that interpolates the current ActiveRecord migration version:
87
+
88
+ ```ruby
89
+ # lib/generators/data_porter/install/templates/create_data_porter_imports.rb.erb
90
+ class CreateDataPorterImports < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
91
+ def change
92
+ create_table :data_porter_imports do |t|
93
+ t.string :target_key, null: false
94
+ t.string :source_type, null: false, default: "csv"
95
+ t.integer :status, null: false, default: 0
96
+ t.jsonb :records, null: false, default: []
97
+ t.jsonb :report, null: false, default: {}
98
+ t.jsonb :config, null: false, default: {}
99
+
100
+ t.references :user, polymorphic: true, null: false
101
+
102
+ t.timestamps
103
+ end
104
+
105
+ add_index :data_porter_imports, :status
106
+ add_index :data_porter_imports, :target_key
107
+ end
108
+ end
109
+ ```
110
+
111
+ The `<%= ActiveRecord::Migration.current_version %>` call means the generated migration always matches the host app's Rails version -- a Rails 7.1 app gets `Migration[7.1]`, a Rails 7.2 app gets `Migration[7.2]`. No hardcoding, no compatibility issues.
112
+
113
+ `copy_initializer` generates a commented-out configuration file. Every option is present but disabled, so developers can see what is available without digging through source code:
114
+
115
+ ```ruby
116
+ # lib/generators/data_porter/install/templates/initializer.rb
117
+ DataPorter.configure do |config|
118
+ # Parent controller for the engine's controllers to inherit from.
119
+ # This controls authentication, layouts, and helpers.
120
+ # config.parent_controller = "ApplicationController"
121
+
122
+ # ActiveJob queue name for import jobs.
123
+ # config.queue_name = :imports
124
+
125
+ # ActiveStorage service for uploaded files.
126
+ # config.storage_service = :local
127
+
128
+ # ActionCable channel prefix.
129
+ # config.cable_channel_prefix = "data_porter"
130
+
131
+ # Context builder: inject business data into targets.
132
+ # Receives the current controller instance.
133
+ # config.context_builder = ->(controller) {
134
+ # OpenStruct.new(
135
+ # user: controller.current_user
136
+ # )
137
+ # }
138
+
139
+ # Maximum number of records displayed in preview.
140
+ # config.preview_limit = 500
141
+
142
+ # Enabled source types.
143
+ # config.enabled_sources = %i[csv json api]
144
+ end
145
+ ```
146
+
147
+ This is a deliberate design choice: every option documents itself with a comment that explains *what it controls*, not just what it is. The `context_builder` lambda even includes a usage example. When someone opens this file for the first time, they should understand the engine's configuration surface without reading the README.
148
+
149
+ `create_importers_directory` calls `empty_directory`, which creates `app/importers` and silently skips if it already exists. This is where host apps will place their target files.
150
+
151
+ `mount_engine` uses the built-in `route` helper, which injects the mount line into `config/routes.rb`. The default mount point is `/imports`, but because it is a standard route mount, developers can change the path or wrap it in constraints after generation.
152
+
153
+ ### Step 2 -- The target generator
154
+
155
+ The target generator inherits from `Rails::Generators::NamedBase` instead of `Base`. This gives us the `name` argument for free -- Rails automatically parses the first argument as the target name and provides helper methods like `class_name`, `file_name`, and `singular_name`:
156
+
157
+ ```ruby
158
+ # lib/generators/data_porter/target/target_generator.rb
159
+ module DataPorter
160
+ module Generators
161
+ class TargetGenerator < Rails::Generators::NamedBase
162
+ source_root File.expand_path("templates", __dir__)
163
+
164
+ argument :columns, type: :array, default: [], banner: "name:type[:required]"
165
+
166
+ def create_target_file
167
+ template("target.rb.tt", "app/importers/#{file_name}_target.rb")
168
+ end
169
+
170
+ private
171
+
172
+ def target_class_name
173
+ "#{class_name}Target"
174
+ end
175
+
176
+ def model_name
177
+ class_name.singularize
178
+ end
179
+
180
+ def target_label
181
+ class_name.titleize
182
+ end
183
+
184
+ def parsed_columns
185
+ columns.map { |col| parse_column(col) }
186
+ end
187
+
188
+ def parse_column(definition)
189
+ parts = definition.split(":")
190
+ {
191
+ name: parts[0],
192
+ type: parts[1] || "string",
193
+ required: parts[2] == "required"
194
+ }
195
+ end
196
+ end
197
+ end
198
+ end
199
+ ```
200
+
201
+ The `columns` argument uses `type: :array` with a `default: []`, which means you can generate a bare target with no columns and fill them in later, or you can specify everything up front. The `banner` string `"name:type[:required]"` tells `rails generate data_porter:target --help` exactly what format columns expect.
202
+
203
+ The `parse_column` method splits each column definition on colons. `first_name:string:required` becomes `{ name: "first_name", type: "string", required: true }`. `email:email` becomes `{ name: "email", type: "email", required: false }`. If you omit the type entirely, it defaults to `"string"`. This mirrors the familiar `rails generate model` syntax but adapts it to our column DSL.
204
+
205
+ The naming helpers derive everything from the single `name` argument. Pass `guests` and you get: `target_class_name` returns `"GuestsTarget"`, `model_name` returns `"Guest"` (singularized, because the target usually maps to one ActiveRecord model), and `target_label` returns `"Guests"` (titleized, for the UI).
206
+
207
+ ### Step 3 -- The target template
208
+
209
+ The template uses Thor's `.tt` format (which processes ERB tags using the generator's binding) to produce a complete, working target file:
210
+
211
+ ```erb
212
+ # lib/generators/data_porter/target/templates/target.rb.tt
213
+ class <%= target_class_name %> < DataPorter::Target
214
+ label "<%= target_label %>"
215
+ model_name "<%= model_name %>"
216
+ icon "fas fa-file-import"
217
+ sources :csv
218
+ <% if parsed_columns.any? %>
219
+
220
+ columns do
221
+ <% parsed_columns.each do |col| -%>
222
+ column :<%= col[:name] %>, type: :<%= col[:type] %><%= ", required: true" if col[:required] %>
223
+ <% end -%>
224
+ end
225
+ <% end %>
226
+
227
+ def persist(record, context:)
228
+ # <%= model_name %>.create!(record.attributes)
229
+ end
230
+ end
231
+ ```
232
+
233
+ Running `rails generate data_porter:target guests first_name:string:required email:email last_name:string` produces:
234
+
235
+ ```ruby
236
+ # app/importers/guests_target.rb
237
+ class GuestsTarget < DataPorter::Target
238
+ label "Guests"
239
+ model_name "Guest"
240
+ icon "fas fa-file-import"
241
+ sources :csv
242
+
243
+ columns do
244
+ column :first_name, type: :string, required: true
245
+ column :email, type: :email
246
+ column :last_name, type: :string
247
+ end
248
+
249
+ def persist(record, context:)
250
+ # Guest.create!(record.attributes)
251
+ end
252
+ end
253
+ ```
254
+
255
+ Two details worth noting. First, the `persist` method is generated with a commented-out implementation line. This is intentional: we want the developer to think about what persistence means for their domain rather than blindly accepting a default. Maybe they need `find_or_create_by`, maybe they need to call a service object, maybe they need to update existing records. The commented hint shows the simplest path while making it clear this is the one method they *must* customize.
256
+
257
+ Second, the `columns` block is conditionally rendered. If you run `rails generate data_porter:target guests` with no columns, you get a clean skeleton without an empty `columns do; end` block. You can add columns later as you figure out your CSV structure.
258
+
259
+ ## Decisions & tradeoffs
260
+
261
+ | Decision | We chose | Over | Because |
262
+ |----------|----------|------|---------|
263
+ | Generator base class | `NamedBase` for target, `Base` for install | Both using `Base` | `NamedBase` gives us `class_name`, `file_name`, and argument parsing for free; install does not need a name argument |
264
+ | Migration template format | ERB (`.rb.erb`) with `ActiveRecord::Migration.current_version` | Hardcoded migration version | The generated migration automatically matches the host app's Rails version |
265
+ | Column parsing syntax | `name:type[:required]` colon-separated | A flag-based approach (`--columns name:string --required name`) | Matches the familiar `rails generate model` convention; less typing, easier to remember |
266
+ | Initializer style | All options commented out with explanations | Only showing uncommented defaults | Developers discover every configuration option on first read; uncommenting is easier than looking up what is available |
267
+ | Target output directory | `app/importers/` | `app/data_porter/targets/` or `app/models/concerns/` | Short, clear, conventional; mirrors how apps organize service objects in `app/services/` |
268
+ | Persist method | Commented-out hint (`# Guest.create!(...)`) | Working default implementation | Forces the developer to make an intentional choice about their persistence strategy |
269
+
270
+ ## Testing it
271
+
272
+ Generator testing is unusual -- you are testing that files get created with the right content, not that objects behave correctly. The specs verify the generator's structure and its column-parsing logic without actually running the generator against a filesystem:
273
+
274
+ ```ruby
275
+ # spec/data_porter/generators/install_generator_spec.rb
276
+ RSpec.describe DataPorter::Generators::InstallGenerator do
277
+ it "inherits from Rails::Generators::Base" do
278
+ expect(described_class.superclass).to eq(Rails::Generators::Base)
279
+ end
280
+
281
+ it "has a source_root pointing to templates" do
282
+ expect(described_class.source_root).to end_with("lib/generators/data_porter/install/templates")
283
+ end
284
+
285
+ describe "generator methods" do
286
+ it "defines copy_migration" do
287
+ expect(described_class.instance_method(:copy_migration)).to be_a(UnboundMethod)
288
+ end
289
+
290
+ it "defines copy_initializer" do
291
+ expect(described_class.instance_method(:copy_initializer)).to be_a(UnboundMethod)
292
+ end
293
+
294
+ it "defines create_importers_directory" do
295
+ expect(described_class.instance_method(:create_importers_directory)).to be_a(UnboundMethod)
296
+ end
297
+
298
+ it "defines mount_engine" do
299
+ expect(described_class.instance_method(:mount_engine)).to be_a(UnboundMethod)
300
+ end
301
+ end
302
+ end
303
+ ```
304
+
305
+ The install generator specs verify that the class inherits from the right base, that the source root points to the templates directory, and that each expected method exists. This is a structural test: if someone renames a method or changes the inheritance chain, the spec catches it immediately.
306
+
307
+ The target generator specs go deeper, testing the column parsing and naming derivation:
308
+
309
+ ```ruby
310
+ # spec/data_porter/generators/target_generator_spec.rb
311
+ RSpec.describe DataPorter::Generators::TargetGenerator do
312
+ it "inherits from Rails::Generators::NamedBase" do
313
+ expect(described_class.superclass).to eq(Rails::Generators::NamedBase)
314
+ end
315
+
316
+ describe "column parsing" do
317
+ let(:generator) { described_class.new(["guests", "first_name:string:required", "email:email"]) }
318
+
319
+ it "parses column definitions" do
320
+ columns = generator.send(:parsed_columns)
321
+
322
+ expect(columns.size).to eq(2)
323
+ expect(columns[0]).to eq({ name: "first_name", type: "string", required: true })
324
+ expect(columns[1]).to eq({ name: "email", type: "email", required: false })
325
+ end
326
+ end
327
+
328
+ describe "naming" do
329
+ let(:generator) { described_class.new(["guests"]) }
330
+
331
+ it "derives the class name" do
332
+ expect(generator.send(:target_class_name)).to eq("GuestsTarget")
333
+ end
334
+
335
+ it "derives the model name" do
336
+ expect(generator.send(:model_name)).to eq("Guest")
337
+ end
338
+
339
+ it "derives the label" do
340
+ expect(generator.send(:target_label)).to eq("Guests")
341
+ end
342
+ end
343
+ end
344
+ ```
345
+
346
+ The column parsing spec is the most important one. It confirms that `first_name:string:required` correctly splits into a hash with `required: true`, while `email:email` (no third segment) defaults to `required: false`. The naming specs verify that `guests` as input produces `GuestsTarget` for the class, `Guest` for the model, and `Guests` for the label -- the singularization and titleization that the template depends on.
347
+
348
+ Notice that the specs instantiate the generator directly with `described_class.new(["guests", ...])` rather than invoking it through the Rails generator runner. This keeps the tests fast and focused: we are testing the logic, not the file I/O.
349
+
350
+ ## Recap
351
+
352
+ - The **install generator** bootstraps the entire engine in one command: migration, initializer, importers directory, and route mount -- four steps that previously required reading the README and manually creating files.
353
+ - The **target generator** scaffolds a new import type with parsed column definitions, producing a ready-to-customize target file that follows the DSL conventions established in earlier parts of the series.
354
+ - The **migration template** uses ERB with `ActiveRecord::Migration.current_version` to match the host app's Rails version automatically, avoiding hardcoded version numbers.
355
+ - The **column parsing syntax** (`name:type[:required]`) mirrors `rails generate model` conventions, keeping the learning curve flat for Rails developers.
356
+ - The **initializer template** documents every configuration option with comments, making the engine's surface area discoverable without external documentation.
357
+
358
+ ## Next up
359
+
360
+ The engine can now parse CSV files, but real-world data does not always arrive in a spreadsheet. In part 12, we extend the Source layer with **JSON and API sources** -- a JSON source that accepts files or raw text, and an API source that fetches data from HTTP endpoints with injectable parameters and authentication headers. The source abstraction we designed in part 6 is about to prove its worth.
361
+
362
+ ---
363
+
364
+ *This is part 11 of the series "Building DataPorter - A Data Import Engine for Rails". [Previous: Controllers & Routing in a Rails Engine](#) | [Next: Adding JSON & API Sources](#)*
@@ -0,0 +1,323 @@
1
+ ---
2
+ title: "Building DataPorter #12 -- Au-dela du CSV : Sources JSON et API"
3
+ series: "Building DataPorter - A Data Import Engine for Rails"
4
+ part: 12
5
+ tags: [ruby, rails, rails-engine, gem-development, json, api, http, sources, dsl]
6
+ published: false
7
+ ---
8
+
9
+ # Au-dela du CSV : Sources JSON et API
10
+
11
+ > Le CSV est le format roi de l'import de donnees -- mais dans la vraie vie, les donnees arrivent aussi en JSON depuis un fichier, ou directement depuis une API tierce. Voici comment DataPorter etend son architecture de sources pour absorber ces nouveaux formats sans rien casser.
12
+
13
+ ## Contexte
14
+
15
+ Ceci est la partie 12 de la serie ou nous construisons **DataPorter**, un engine Rails montable pour les workflows d'import de donnees. Dans la [partie 11](#), nous avons construit les generateurs install et target pour que l'adoption du gem se fasse en une seule commande.
16
+
17
+ Jusqu'ici, DataPorter ne sait lire que du CSV. C'est suffisant pour beaucoup de cas, mais le monde reel est plus varie : un partenaire envoie un export JSON, un service interne expose une API REST, un front-end pousse du JSON brut dans un formulaire. Si chaque nouveau format demande de rearchitecturer le moteur, on a rate quelque chose. L'abstraction `Sources::Base` que nous avons posee dans la partie 6 va maintenant montrer sa valeur.
18
+
19
+ ## Pourquoi plusieurs sources ?
20
+
21
+ Un moteur d'import qui ne parle que CSV force les utilisateurs a convertir leurs donnees avant de les importer. C'est de la friction inutile. En supportant JSON et API nativement, on couvre trois scenarios courants :
22
+
23
+ - **CSV** -- L'utilisateur uploade un fichier depuis son poste.
24
+ - **JSON** -- L'utilisateur uploade un fichier JSON, ou bien le systeme injecte du JSON brut via la configuration.
25
+ - **API** -- Le systeme va chercher les donnees directement sur un endpoint HTTP, avec authentification et parametres dynamiques.
26
+
27
+ Le point cle : chaque source doit respecter le meme contrat -- une methode `fetch` qui retourne un tableau de hashes avec des cles symboliques. Le reste du pipeline (validation, transformation, persistence) ne change pas.
28
+
29
+ ## La source JSON
30
+
31
+ La source JSON doit gerer trois manieres de recevoir du contenu : injection directe (pour les tests ou l'usage programmatique), JSON brut stocke dans la configuration de l'import, et telechargement depuis un fichier ActiveStorage.
32
+
33
+ ```ruby
34
+ # lib/data_porter/sources/json.rb
35
+ module DataPorter
36
+ module Sources
37
+ class Json < Base
38
+ def initialize(data_import, content: nil)
39
+ super(data_import)
40
+ @content = content
41
+ end
42
+
43
+ def fetch
44
+ parsed = ::JSON.parse(json_content)
45
+ records = extract_records(parsed)
46
+
47
+ Array(records).map do |hash|
48
+ hash.transform_keys { |k| k.parameterize(separator: "_").to_sym }
49
+ end
50
+ end
51
+
52
+ private
53
+
54
+ def json_content
55
+ @content || config_raw_json || download_file
56
+ end
57
+
58
+ def config_raw_json
59
+ config = @data_import.config
60
+ config["raw_json"] if config.is_a?(Hash)
61
+ end
62
+
63
+ def download_file
64
+ @data_import.file.download
65
+ end
66
+
67
+ def extract_records(parsed)
68
+ root = @target_class._json_root
69
+ return parsed unless root
70
+
71
+ parsed.dig(*root.split("."))
72
+ end
73
+ end
74
+ end
75
+ end
76
+ ```
77
+
78
+ Trois choses meritent attention.
79
+
80
+ **La cascade de `json_content`.** La methode essaie trois sources dans l'ordre : le contenu injecte au constructeur, la cle `raw_json` dans la configuration de l'import, et enfin le fichier ActiveStorage. Cette cascade permet une grande flexibilite sans parametrage explicite -- le bon chemin est choisi automatiquement selon ce qui est disponible.
81
+
82
+ **Le `json_root` pour les chemins imbriques.** Les API et les fichiers JSON du monde reel enveloppent souvent les donnees dans une structure : `{"data": {"guests": [...]}}`. Plutot que de forcer l'utilisateur a aplatir son JSON, on lui donne un DSL dans le Target :
83
+
84
+ ```ruby
85
+ class GuestsTarget < DataPorter::Target
86
+ label "Guests"
87
+ model_name "Guest"
88
+ json_root "data.guests"
89
+
90
+ columns do
91
+ column :name, type: :string
92
+ end
93
+ end
94
+ ```
95
+
96
+ La methode `extract_records` utilise `dig` en decoupant le chemin sur les points. `"data.guests"` devient `parsed.dig("data", "guests")`. Simple, lisible, et supporte n'importe quel niveau d'imbrication.
97
+
98
+ **La normalisation des cles.** Comme pour le CSV, chaque cle est transformee via `parameterize(separator: "_").to_sym`. `"First Name"` devient `:first_name`. Cela garantit que le reste du pipeline recoit toujours des cles au meme format, quel que soit le format source.
99
+
100
+ ## La source API
101
+
102
+ La source API va chercher les donnees sur un endpoint HTTP. Elle doit supporter des endpoints statiques et dynamiques, des headers fixes et generes a la volee, et l'extraction de donnees depuis une cle de reponse.
103
+
104
+ ```ruby
105
+ # lib/data_porter/sources/api.rb
106
+ module DataPorter
107
+ module Sources
108
+ class Api < Base
109
+ def fetch
110
+ api = @target_class._api_config
111
+ response = perform_request(api)
112
+ parsed = ::JSON.parse(response.body)
113
+ records = extract_records(parsed, api)
114
+
115
+ Array(records).map do |hash|
116
+ hash.transform_keys { |k| k.parameterize(separator: "_").to_sym }
117
+ end
118
+ end
119
+
120
+ private
121
+
122
+ def perform_request(api)
123
+ url = resolve_endpoint(api)
124
+ headers = resolve_headers(api)
125
+ uri = URI(url)
126
+
127
+ Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http|
128
+ request = Net::HTTP::Get.new(uri)
129
+ headers.each { |k, v| request[k] = v }
130
+ http.request(request)
131
+ end
132
+ end
133
+
134
+ def resolve_endpoint(api)
135
+ params = @data_import.config.symbolize_keys
136
+ api.endpoint.is_a?(Proc) ? api.endpoint.call(params) : api.endpoint
137
+ end
138
+
139
+ def resolve_headers(api)
140
+ api.headers.is_a?(Proc) ? api.headers.call : (api.headers || {})
141
+ end
142
+
143
+ def extract_records(parsed, api)
144
+ root = api.response_root
145
+ root ? parsed[root.to_s] : parsed
146
+ end
147
+ end
148
+ end
149
+ end
150
+ ```
151
+
152
+ Le coeur de la logique se trouve dans `resolve_endpoint` et `resolve_headers`. Chacune de ces methodes accepte soit une valeur statique, soit un lambda. Cela ouvre deux modes d'utilisation :
153
+
154
+ ```ruby
155
+ # Endpoint statique, headers fixes
156
+ api_config do
157
+ endpoint "https://api.example.com/stays"
158
+ headers({ "Authorization" => "Bearer abc123" })
159
+ response_root :stays
160
+ end
161
+
162
+ # Endpoint dynamique, headers generes a la volee
163
+ api_config do
164
+ endpoint ->(params) { "https://api.example.com/items?id=#{params[:item_id]}" }
165
+ headers(-> { { "Authorization" => "Bearer #{Token.current}" } })
166
+ end
167
+ ```
168
+
169
+ Dans le cas du lambda d'endpoint, les parametres proviennent de `@data_import.config.symbolize_keys`. L'utilisateur passe `config: { item_id: "42" }` au moment de la creation de l'import, et le lambda recoit ces parametres pour construire l'URL. Pour les headers, le lambda est appele sans argument -- il va chercher le token la ou il se trouve (variable d'environnement, modele, service externe).
170
+
171
+ Le `response_root` fonctionne comme le `json_root` de la source JSON, mais en plus simple : il extrait une seule cle du hash de reponse. `response_root :stays` sur une reponse `{"stays": [...]}` retourne directement le tableau. Si aucun `response_root` n'est defini, la reponse entiere est utilisee.
172
+
173
+ ## Le pattern DSL d'ApiConfig
174
+
175
+ La configuration API utilise un objet DSL dedie plutot que de simples `attr_accessor` :
176
+
177
+ ```ruby
178
+ # lib/data_porter/dsl/api_config.rb
179
+ module DataPorter
180
+ module DSL
181
+ class ApiConfig
182
+ def endpoint(value = nil)
183
+ return @endpoint if value.nil?
184
+
185
+ @endpoint = value
186
+ end
187
+
188
+ def headers(value = nil)
189
+ return @headers if value.nil?
190
+
191
+ @headers = value
192
+ end
193
+
194
+ def response_root(value = nil)
195
+ return @response_root if value.nil?
196
+
197
+ @response_root = value
198
+ end
199
+ end
200
+ end
201
+ end
202
+ ```
203
+
204
+ Chaque methode joue un double role : appellee avec un argument, elle agit comme un setter ; appellee sans argument, elle agit comme un getter. Ce pattern evite de separer `attr_reader` et `attr_writer` et produit un DSL naturel :
205
+
206
+ ```ruby
207
+ api_config do
208
+ endpoint "https://api.example.com/data" # setter
209
+ end
210
+
211
+ api.endpoint # => "https://api.example.com/data" (getter)
212
+ ```
213
+
214
+ Dans le Target, `api_config` cree une instance d'`ApiConfig` et execute le bloc dans son contexte via `instance_eval` :
215
+
216
+ ```ruby
217
+ # Dans DataPorter::Target
218
+ def api_config(&)
219
+ @_api_config = DSL::ApiConfig.new
220
+ @_api_config.instance_eval(&)
221
+ end
222
+ ```
223
+
224
+ Ce pattern -- objet DSL + `instance_eval` -- est le meme que celui utilise pour le bloc `columns`. C'est un idiome Ruby classique qui donne une syntaxe propre tout en gardant l'implementation testable (l'objet `ApiConfig` est un PORO normal, facile a instancier et inspecter dans les specs).
225
+
226
+ ## Le dispatch via Sources.resolve
227
+
228
+ L'ajout de nouvelles sources ne modifie rien au code existant. Le module `Sources` maintient un registre simple :
229
+
230
+ ```ruby
231
+ # lib/data_porter/sources.rb
232
+ module DataPorter
233
+ module Sources
234
+ REGISTRY = {
235
+ api: Api,
236
+ csv: Csv,
237
+ json: Json
238
+ }.freeze
239
+
240
+ def self.resolve(type)
241
+ REGISTRY.fetch(type.to_sym) { raise Error, "Unknown source type: #{type}" }
242
+ end
243
+ end
244
+ end
245
+ ```
246
+
247
+ L'Orchestrator appelle `Sources.resolve(import.source_type)` et recoit la bonne classe. Il instancie ensuite la source et appelle `fetch`. Ni l'Orchestrator ni les controllers ne savent quel type de source est utilise -- c'est le `source_type` stocke dans l'import qui decide. Ajouter une source XML ou Parquet demanderait : une classe heritant de `Base`, une entree dans le `REGISTRY`, et c'est tout.
248
+
249
+ ## L'approche TDD
250
+
251
+ Les deux sources ont ete construites en TDD. La source JSON est testee avec trois scenarios :
252
+
253
+ ```ruby
254
+ it "parses JSON array content" do
255
+ json = '[{"first_name": "Alice", "last_name": "Smith"}]'
256
+ source = described_class.new(import, content: json)
257
+ records = source.fetch
258
+
259
+ expect(records.first[:first_name]).to eq("Alice")
260
+ end
261
+
262
+ it "extracts records from a nested path" do
263
+ json = '{"data": {"guests": [{"name": "Alice"}, {"name": "Bob"}]}}'
264
+ source = described_class.new(import_with_root, content: json)
265
+
266
+ expect(source.fetch.size).to eq(2)
267
+ end
268
+
269
+ it "reads from config raw_json when no content provided" do
270
+ import.update!(config: { "raw_json" => '[{"first_name": "Config"}]' })
271
+ source = described_class.new(import)
272
+
273
+ expect(source.fetch.first[:first_name]).to eq("Config")
274
+ end
275
+ ```
276
+
277
+ Chaque test couvre un chemin de la cascade : injection directe, `json_root`, et fallback `raw_json`. Pour la source API, on stubbe `Net::HTTP.start` pour eviter les vrais appels HTTP, et on teste les quatre axes : endpoint statique, endpoint lambda, headers lambda, et absence de `response_root` :
278
+
279
+ ```ruby
280
+ it "fetches and parses records from response_root" do
281
+ response_body = '{"stays": [{"name": "Beach House"}, {"name": "Mountain Cabin"}]}'
282
+ stub_http_get(response_body)
283
+
284
+ source = described_class.new(import)
285
+ expect(source.fetch.size).to eq(2)
286
+ end
287
+
288
+ it "resolves the endpoint lambda with params" do
289
+ response_body = '[{"title": "Item 42"}]'
290
+ stub_http_get(response_body)
291
+
292
+ source = described_class.new(import_with_lambda)
293
+ expect(source.fetch.first[:title]).to eq("Item 42")
294
+ end
295
+ ```
296
+
297
+ Le stub est minimal : `allow(Net::HTTP).to receive(:start).and_return(response)`. On ne teste pas que `Net::HTTP` fonctionne -- on teste que notre code compose correctement l'URL, les headers, et extrait les bonnes donnees de la reponse.
298
+
299
+ ## Decisions et compromis
300
+
301
+ | Decision | Choix retenu | Alternative ecartee | Raison |
302
+ |----------|-------------|---------------------|--------|
303
+ | Client HTTP | `Net::HTTP` (stdlib) | Faraday, HTTParty | Zero dependance supplementaire ; suffisant pour des GET simples |
304
+ | Endpoint dynamique | Lambda recevant `params` | String avec interpolation | Le lambda permet toute logique (conditions, appels de service) sans eval de string |
305
+ | Headers dynamiques | Lambda sans argument | Callback avec contexte | Les headers viennent souvent d'un service global (ENV, token store), pas du contexte de l'import |
306
+ | Cascade JSON | `content` > `raw_json` > `file` | Argument obligatoire | Flexibilite maximale ; chaque cas d'usage trouve son chemin naturellement |
307
+ | Normalisation des cles | `parameterize` + `to_sym` | Mapping explicite | Coherent avec la source CSV ; le pipeline en aval recoit toujours le meme format |
308
+
309
+ ## Recap
310
+
311
+ - **La source JSON** supporte trois modes d'entree (injection, config `raw_json`, fichier) via une cascade de fallbacks, et utilise `json_root` pour naviguer dans des structures imbriquees.
312
+ - **La source API** resout dynamiquement endpoints et headers grace a un systeme dual statique/lambda, et extrait les donnees via `response_root`.
313
+ - **Le DSL `ApiConfig`** utilise un pattern getter/setter sans `attr_reader`, evaluee dans un bloc `instance_eval` pour une syntaxe naturelle.
314
+ - **`Sources.resolve`** dispatche vers la bonne classe via un registre fige -- ajouter une source est une operation en deux lignes.
315
+ - **Les tests** couvrent chaque chemin de chaque source sans toucher le reseau, grace a l'injection de contenu et au stubbing HTTP.
316
+
317
+ ## La suite
318
+
319
+ Les sources JSON et API completent le trio de formats supportes. Mais nous n'avons pas encore parle de la strategie de test globale de l'engine -- comment tester un moteur Rails sans application hote complete, comment organiser les specs entre tests unitaires et integration, comment mocker ActiveStorage et ActionCable. Dans la partie 13, nous plongeons dans le **testing d'un Rails Engine avec RSpec** et les patterns qui gardent la suite rapide et fiable.
320
+
321
+ ---
322
+
323
+ *Ceci est la partie 12 de la serie "Building DataPorter - A Data Import Engine for Rails". [Precedent : Generators: Install & Target Scaffolding](#) | [Suivant : Testing a Rails Engine with RSpec](#)*