data_porter 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/lib/data_porter/version.rb +1 -1
- metadata +1 -11
- data/docs/CONFIGURATION.md +0 -115
- data/docs/MAPPING.md +0 -44
- data/docs/ROADMAP.md +0 -199
- data/docs/SOURCES.md +0 -94
- data/docs/TARGETS.md +0 -332
- data/docs/screenshots/index-with-previewing.jpg +0 -0
- data/docs/screenshots/index.jpg +0 -0
- data/docs/screenshots/mapping.jpg +0 -0
- data/docs/screenshots/modal-new-import.jpg +0 -0
- data/docs/screenshots/preview.jpg +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7ca6bfabfc9f831d71c60a1942516a5dccf95c85e3787f16a1217188c9feb3a0
|
|
4
|
+
data.tar.gz: f703da9261612953fcacad2674e38bef3037b804191e7fb3087577846b096461
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a7d8ad32cb5d80e027d9adfe2a089a84703c6e8e5b00901c0d057a4b2bb24cb2ffbe0f6edb53f61021219fd03384830517192657fbe4c693dd74b6977279b22a
|
|
7
|
+
data.tar.gz: 6d96ecefa39d191cea801ff8e4075f5c9bb4e13979f7754041db33a6685701d98f4521230500a1d0a47a417ce141f1b08973b5cbfd65868b0c3920c99afb61f6
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.0.2] - 2026-02-07
|
|
9
|
+
|
|
10
|
+
### Changed
|
|
11
|
+
|
|
12
|
+
- Exclude `docs/` from gem package (194 KB → 80 KB)
|
|
13
|
+
|
|
8
14
|
## [1.0.1] - 2026-02-07
|
|
9
15
|
|
|
10
16
|
### Added
|
data/lib/data_porter/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: data_porter
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Seryl Lounis
|
|
@@ -139,16 +139,6 @@ files:
|
|
|
139
139
|
- app/views/data_porter/mapping_templates/new.html.erb
|
|
140
140
|
- app/views/layouts/data_porter/application.html.erb
|
|
141
141
|
- config/routes.rb
|
|
142
|
-
- docs/CONFIGURATION.md
|
|
143
|
-
- docs/MAPPING.md
|
|
144
|
-
- docs/ROADMAP.md
|
|
145
|
-
- docs/SOURCES.md
|
|
146
|
-
- docs/TARGETS.md
|
|
147
|
-
- docs/screenshots/index-with-previewing.jpg
|
|
148
|
-
- docs/screenshots/index.jpg
|
|
149
|
-
- docs/screenshots/mapping.jpg
|
|
150
|
-
- docs/screenshots/modal-new-import.jpg
|
|
151
|
-
- docs/screenshots/preview.jpg
|
|
152
142
|
- lib/data_porter.rb
|
|
153
143
|
- lib/data_porter/broadcaster.rb
|
|
154
144
|
- lib/data_porter/components.rb
|
data/docs/CONFIGURATION.md
DELETED
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
# Configuration
|
|
2
|
-
|
|
3
|
-
All options are set in `config/initializers/data_porter.rb`:
|
|
4
|
-
|
|
5
|
-
```ruby
|
|
6
|
-
DataPorter.configure do |config|
|
|
7
|
-
# Parent controller for the engine's controllers to inherit from.
|
|
8
|
-
# Controls authentication, layouts, and helpers.
|
|
9
|
-
config.parent_controller = "ApplicationController"
|
|
10
|
-
|
|
11
|
-
# ActiveJob queue name for import jobs.
|
|
12
|
-
config.queue_name = :imports
|
|
13
|
-
|
|
14
|
-
# ActiveStorage service for uploaded files.
|
|
15
|
-
config.storage_service = :local
|
|
16
|
-
|
|
17
|
-
# ActionCable channel prefix.
|
|
18
|
-
config.cable_channel_prefix = "data_porter"
|
|
19
|
-
|
|
20
|
-
# Context builder: inject business data into targets.
|
|
21
|
-
# Receives the DataImport record.
|
|
22
|
-
config.context_builder = ->(data_import) {
|
|
23
|
-
{ user: data_import.user }
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
# Maximum number of records displayed in preview.
|
|
27
|
-
config.preview_limit = 500
|
|
28
|
-
|
|
29
|
-
# Enabled source types.
|
|
30
|
-
config.enabled_sources = %i[csv json api xlsx]
|
|
31
|
-
|
|
32
|
-
# Auto-purge completed/failed imports older than this duration.
|
|
33
|
-
# Set to nil to disable. Run `rake data_porter:purge` manually or via cron.
|
|
34
|
-
config.purge_after = 60.days
|
|
35
|
-
|
|
36
|
-
# Maximum file size for uploads (default: 10 MB).
|
|
37
|
-
config.max_file_size = 10.megabytes
|
|
38
|
-
|
|
39
|
-
# Maximum number of records per import (default: 10,000).
|
|
40
|
-
# Set to nil to disable.
|
|
41
|
-
config.max_records = 10_000
|
|
42
|
-
|
|
43
|
-
# Transaction mode for imports.
|
|
44
|
-
# :per_record -- each record persisted independently (default)
|
|
45
|
-
# :all -- single transaction, rolls back entirely on any failure
|
|
46
|
-
config.transaction_mode = :per_record
|
|
47
|
-
end
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
## Options reference
|
|
51
|
-
|
|
52
|
-
| Option | Default | Description |
|
|
53
|
-
|---|---|---|
|
|
54
|
-
| `parent_controller` | `"ApplicationController"` | Controller class the engine inherits from |
|
|
55
|
-
| `queue_name` | `:imports` | ActiveJob queue for import jobs |
|
|
56
|
-
| `storage_service` | `:local` | ActiveStorage service name |
|
|
57
|
-
| `cable_channel_prefix` | `"data_porter"` | ActionCable stream prefix |
|
|
58
|
-
| `context_builder` | `nil` | Lambda receiving the DataImport record, returns context passed to target methods |
|
|
59
|
-
| `preview_limit` | `500` | Max records shown in the preview step |
|
|
60
|
-
| `enabled_sources` | `%i[csv json api xlsx]` | Source types available in the UI |
|
|
61
|
-
| `purge_after` | `60.days` | Auto-purge completed/failed imports older than this duration |
|
|
62
|
-
| `max_file_size` | `10.megabytes` | Maximum file size for uploads |
|
|
63
|
-
| `max_records` | `10_000` | Maximum number of records per import (nil to disable) |
|
|
64
|
-
| `transaction_mode` | `:per_record` | `:per_record` or `:all` (single transaction rollback) |
|
|
65
|
-
|
|
66
|
-
## Authentication
|
|
67
|
-
|
|
68
|
-
The engine inherits authentication from `parent_controller`. Set it to your authenticated base controller:
|
|
69
|
-
|
|
70
|
-
```ruby
|
|
71
|
-
config.parent_controller = "Admin::BaseController"
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
All engine routes will require the same authentication as your base controller.
|
|
75
|
-
|
|
76
|
-
## Context builder
|
|
77
|
-
|
|
78
|
-
The `context_builder` lambda lets you inject business data (current user, tenant, permissions) into target methods (`persist`, `after_import`, `on_error`). It receives the `DataImport` record:
|
|
79
|
-
|
|
80
|
-
```ruby
|
|
81
|
-
config.context_builder = ->(data_import) {
|
|
82
|
-
{ user: data_import.user, import_id: data_import.id }
|
|
83
|
-
}
|
|
84
|
-
```
|
|
85
|
-
|
|
86
|
-
The returned object is available as `context` in all target instance methods.
|
|
87
|
-
|
|
88
|
-
## Real-time progress
|
|
89
|
-
|
|
90
|
-
DataPorter tracks import progress via JSON polling. The Stimulus progress controller polls `GET /imports/:id/status` every second and updates an animated progress bar.
|
|
91
|
-
|
|
92
|
-
The status endpoint returns:
|
|
93
|
-
|
|
94
|
-
```json
|
|
95
|
-
{
|
|
96
|
-
"status": "importing",
|
|
97
|
-
"progress": { "current": 42, "total": 100, "percentage": 42 }
|
|
98
|
-
}
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
No ActionCable or WebSocket configuration required -- it works out of the box with any deployment.
|
|
102
|
-
|
|
103
|
-
## Auto-purge
|
|
104
|
-
|
|
105
|
-
Old completed/failed imports can be cleaned up automatically:
|
|
106
|
-
|
|
107
|
-
```bash
|
|
108
|
-
# Run manually
|
|
109
|
-
bin/rails data_porter:purge
|
|
110
|
-
|
|
111
|
-
# Or schedule via cron (e.g. with whenever or solid_queue)
|
|
112
|
-
# Removes imports older than purge_after (default: 60 days)
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
Attached files are purged from ActiveStorage along with the import record.
|
data/docs/MAPPING.md
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
# Column Mapping
|
|
2
|
-
|
|
3
|
-
For file-based sources (CSV/XLSX), DataPorter adds an interactive mapping step between upload and parsing. Users see their file's actual column headers and map each one to a target field via dropdowns.
|
|
4
|
-
|
|
5
|
-
```
|
|
6
|
-
File Header Target Field
|
|
7
|
-
+-----------+ +---------------+
|
|
8
|
-
| Prenom | -> | First Name v |
|
|
9
|
-
+-----------+ +---------------+
|
|
10
|
-
+-----------+ +---------------+
|
|
11
|
-
| Nom | -> | Last Name v |
|
|
12
|
-
+-----------+ +---------------+
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
Dropdowns are pre-filled from the Target's `csv_mapping` when headers match. Users can adjust any mapping before continuing to the preview step.
|
|
16
|
-
|
|
17
|
-
## Required fields
|
|
18
|
-
|
|
19
|
-
Required target fields are marked with `*` in the dropdown labels. If any required field is left unmapped, a warning banner appears listing the missing fields. This validation is client-side only -- it warns but does not block submission.
|
|
20
|
-
|
|
21
|
-
## Duplicate detection
|
|
22
|
-
|
|
23
|
-
If two file headers are mapped to the same target field, the affected rows are highlighted with an orange border and a warning message appears. This helps catch accidental duplicate mappings before parsing.
|
|
24
|
-
|
|
25
|
-
## Mapping Templates
|
|
26
|
-
|
|
27
|
-
Mappings can be saved as reusable templates. When starting a new import, users select a saved template from a dropdown to auto-fill all column mappings at once. Templates are stored per-target, so each import type has its own template library.
|
|
28
|
-
|
|
29
|
-
### Managing templates
|
|
30
|
-
|
|
31
|
-
- **Inline**: Check "Save as template" in the mapping form and give it a name
|
|
32
|
-
- **CRUD**: Use the "Mapping Templates" link on the imports index page to create, edit, and delete templates
|
|
33
|
-
|
|
34
|
-
When a template is loaded, the "Save as template" checkbox is hidden since the user is already working from an existing template.
|
|
35
|
-
|
|
36
|
-
## Mapping Priority
|
|
37
|
-
|
|
38
|
-
When parsing, mappings are resolved in priority order:
|
|
39
|
-
|
|
40
|
-
1. **User mapping** -- from the mapping UI (`config["column_mapping"]`)
|
|
41
|
-
2. **Code mapping** -- from the Target DSL (`csv_mapping`)
|
|
42
|
-
3. **Auto-map** -- parameterize headers to match column names
|
|
43
|
-
|
|
44
|
-
Non-file sources (JSON, API) skip the mapping step entirely.
|
data/docs/ROADMAP.md
DELETED
|
@@ -1,199 +0,0 @@
|
|
|
1
|
-
# Roadmap
|
|
2
|
-
|
|
3
|
-
## v1.0 — Production-ready DONE
|
|
4
|
-
|
|
5
|
-
The goal is a gem that handles real-world imports reliably at scale.
|
|
6
|
-
|
|
7
|
-
### ~~1. Records pagination~~ DONE
|
|
8
|
-
|
|
9
|
-
Implemented in v0.6.0. Preview and completed pages are paginated (50 per page).
|
|
10
|
-
Controller limits records loaded via `RecordPagination` concern.
|
|
11
|
-
|
|
12
|
-
### ~~2. Import params~~ DONE
|
|
13
|
-
|
|
14
|
-
Implemented in v0.9.0. Targets declare `params` with a DSL (`:select`, `:text`,
|
|
15
|
-
`:number`, `:hidden`). Values stored in `config["import_params"]`, accessible
|
|
16
|
-
via `import_params` in all target instance methods. See [Targets docs](TARGETS.md#params--).
|
|
17
|
-
|
|
18
|
-
### ~~3. Security audit~~ DONE
|
|
19
|
-
|
|
20
|
-
- Replaced `permit!` on import params and column mapping with whitelists
|
|
21
|
-
- File size validation (`config.max_file_size`, default 10 MB)
|
|
22
|
-
- MIME type validation per source type
|
|
23
|
-
- XSS fix in template form controller (safe DOM methods)
|
|
24
|
-
|
|
25
|
-
### ~~4. Safety guards~~ DONE
|
|
26
|
-
|
|
27
|
-
- Max records guard (`config.max_records`, default 10,000)
|
|
28
|
-
- Transaction mode (`config.transaction_mode`: `:per_record` or `:all`)
|
|
29
|
-
- Fallback headers (auto-generate `col_1, col_2...` for empty header rows)
|
|
30
|
-
|
|
31
|
-
### ~~5. Reject rows export~~ DONE
|
|
32
|
-
|
|
33
|
-
Download CSV of failed/errored records with original data + error messages.
|
|
34
|
-
Zero-dependency streaming via `send_data`.
|
|
35
|
-
|
|
36
|
-
### ~~6. E2E integration tests~~ DONE
|
|
37
|
-
|
|
38
|
-
6 end-to-end specs covering all source types (CSV, XLSX, JSON, API),
|
|
39
|
-
import params flow, and reject rows CSV export. 395 specs total.
|
|
40
|
-
|
|
41
|
-
---
|
|
42
|
-
|
|
43
|
-
## v1.1 — Deploy-ready
|
|
44
|
-
|
|
45
|
-
Priority: features required to deploy DataPorter to real users.
|
|
46
|
-
|
|
47
|
-
### Scoped imports
|
|
48
|
-
|
|
49
|
-
Wire up `config.scope` so each user only sees their own imports. The
|
|
50
|
-
configuration hook already exists but isn't connected to the controller query:
|
|
51
|
-
|
|
52
|
-
```ruby
|
|
53
|
-
DataPorter.configure do |config|
|
|
54
|
-
config.scope = ->(user) { { user_type: user.class.name, user_id: user.id } }
|
|
55
|
-
end
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
The controller `index` and `show` actions apply the scope automatically.
|
|
59
|
-
Combined with `parent_controller` inheriting from an authenticated controller,
|
|
60
|
-
this enables full multi-tenant isolation — suitable for both B2B (tenant per
|
|
61
|
-
organization) and B2C (user-level) scenarios.
|
|
62
|
-
|
|
63
|
-
### Preview ↔ Mapping navigation
|
|
64
|
-
|
|
65
|
-
Allow users to go back from preview to the mapping step and adjust column
|
|
66
|
-
mapping without restarting the import. Currently the flow is one-way:
|
|
67
|
-
mapping → parse → preview. Adding a "Back to mapping" button on the preview
|
|
68
|
-
page would let users correct mapping mistakes after seeing the parsed data.
|
|
69
|
-
|
|
70
|
-
### ~~CSV auto-detect: delimiter & encoding~~ DONE
|
|
71
|
-
|
|
72
|
-
Implemented in v1.0.1. Auto-detect CSV delimiter (`,` `;` `\t`) via frequency
|
|
73
|
-
analysis on the first line. Auto-detect file encoding: strip UTF-8 BOM, validate
|
|
74
|
-
UTF-8, fallback to ISO-8859-1 transcoding. Explicit `col_sep` config takes
|
|
75
|
-
precedence.
|
|
76
|
-
|
|
77
|
-
### Column mapping for JSON and API sources
|
|
78
|
-
|
|
79
|
-
The interactive column mapping step currently only works for file-based sources
|
|
80
|
-
(CSV, XLSX). JSON and API sources have stable, predictable keys that rarely need
|
|
81
|
-
remapping, but supporting mapping for all sources would provide a consistent UX.
|
|
82
|
-
|
|
83
|
-
---
|
|
84
|
-
|
|
85
|
-
## v1.2 — Smart imports
|
|
86
|
-
|
|
87
|
-
### Dry-run performance estimate
|
|
88
|
-
|
|
89
|
-
After a dry run, display an estimated import time based on average record
|
|
90
|
-
processing speed: "Estimated import time: ~2m30s". Helps users decide whether
|
|
91
|
-
to launch the import now or schedule it for off-peak hours.
|
|
92
|
-
|
|
93
|
-
### Permissions / RBAC
|
|
94
|
-
|
|
95
|
-
Role-based access control for import operations. Allow host apps to restrict
|
|
96
|
-
who can create imports, confirm imports, or access specific targets. Integrate
|
|
97
|
-
with existing authorization frameworks (Pundit, CanCanCan) via a configurable
|
|
98
|
-
policy hook.
|
|
99
|
-
|
|
100
|
-
### Column transformers
|
|
101
|
-
|
|
102
|
-
Built-in transformation pipeline applied per-column before the target's
|
|
103
|
-
`transform` method. Declarative DSL in the target:
|
|
104
|
-
|
|
105
|
-
```ruby
|
|
106
|
-
columns do
|
|
107
|
-
column :email, type: :email, transform: [:strip, :downcase]
|
|
108
|
-
column :phone, type: :string, transform: [:strip, :normalize_phone]
|
|
109
|
-
column :born_on, type: :date, transform: [:parse_date]
|
|
110
|
-
end
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
Ships with common transformers (`strip`, `downcase`, `titleize`,
|
|
114
|
-
`normalize_phone`, `parse_date`). Custom transformers via a registry.
|
|
115
|
-
|
|
116
|
-
### Auto-map heuristics
|
|
117
|
-
|
|
118
|
-
Smart column mapping suggestions using tokenized header matching and synonym
|
|
119
|
-
dictionaries. When a CSV has "E-mail Address", auto-suggest mapping to `:email`.
|
|
120
|
-
Built-in synonyms for common patterns (phone → phone_number,
|
|
121
|
-
first name → first_name). Configurable synonym lists per target.
|
|
122
|
-
|
|
123
|
-
---
|
|
124
|
-
|
|
125
|
-
## v2.0 — Scale & Automation
|
|
126
|
-
|
|
127
|
-
### Bulk import
|
|
128
|
-
|
|
129
|
-
High-volume import support using `insert_all` / `upsert_all` for batch
|
|
130
|
-
persistence. Bypass per-record `persist` calls when the target opts in,
|
|
131
|
-
enabling 10-100x throughput for simple create/upsert scenarios. Configurable
|
|
132
|
-
batch size, with fallback to per-record mode on conflict.
|
|
133
|
-
|
|
134
|
-
### Update & diff mode
|
|
135
|
-
|
|
136
|
-
Support update (upsert) imports alongside create-only. Given a
|
|
137
|
-
`deduplicate_by` key, detect existing records and show a diff preview:
|
|
138
|
-
new records, changed fields (highlighted), unchanged rows. User confirms
|
|
139
|
-
which changes to apply. Enables recurring data sync workflows.
|
|
140
|
-
|
|
141
|
-
### Resume / retry on failure
|
|
142
|
-
|
|
143
|
-
If an import fails mid-way (timeout, crash, transient error), resume from
|
|
144
|
-
the last successful record instead of restarting from scratch. Track a
|
|
145
|
-
checkpoint index in the report. Critical for large imports (5k+ records)
|
|
146
|
-
where re-processing everything is not acceptable.
|
|
147
|
-
|
|
148
|
-
### API pagination
|
|
149
|
-
|
|
150
|
-
Support paginated API sources. The current API source does a single GET,
|
|
151
|
-
which works for small datasets but not for APIs returning thousands of
|
|
152
|
-
records across multiple pages. Support offset, cursor, and link-header
|
|
153
|
-
pagination strategies via `api_config`:
|
|
154
|
-
|
|
155
|
-
```ruby
|
|
156
|
-
api_config do
|
|
157
|
-
endpoint "https://api.example.com/contacts"
|
|
158
|
-
pagination :cursor, param: "after", root: "data", next_key: "meta.next_cursor"
|
|
159
|
-
end
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
### Scheduled imports
|
|
163
|
-
|
|
164
|
-
Recurring imports from API or remote sources on a cron schedule. A target
|
|
165
|
-
declares a schedule, and DataPorter automatically fetches and imports at
|
|
166
|
-
the configured interval. Built on ActiveJob with configurable queue.
|
|
167
|
-
|
|
168
|
-
---
|
|
169
|
-
|
|
170
|
-
## v3.0 — Platform
|
|
171
|
-
|
|
172
|
-
### Webhooks
|
|
173
|
-
|
|
174
|
-
HTTP callbacks on import lifecycle events (started, completed, failed).
|
|
175
|
-
Configurable per-target with URL, headers, and payload template. Enables
|
|
176
|
-
integration with Slack notifications, CI pipelines, or external dashboards.
|
|
177
|
-
|
|
178
|
-
### External connectors
|
|
179
|
-
|
|
180
|
-
Source plugins beyond local files and HTTP APIs:
|
|
181
|
-
|
|
182
|
-
- **Google Sheets** — OAuth2 + Sheets API, treat a spreadsheet as a source
|
|
183
|
-
- **SFTP** — Poll a remote directory for new files
|
|
184
|
-
- **AWS S3** — Watch a bucket/prefix for uploads
|
|
185
|
-
- **Remote HTTP polling** — Periodically fetch from a paginated API
|
|
186
|
-
|
|
187
|
-
Each connector implements the `Sources::Base` interface. Installed as
|
|
188
|
-
optional companion gems (`data_porter-google_sheets`, `data_porter-s3`).
|
|
189
|
-
|
|
190
|
-
### i18n
|
|
191
|
-
|
|
192
|
-
Full internationalization of all UI strings, error messages, and status
|
|
193
|
-
labels. Ship with English and French translations. Host apps can override
|
|
194
|
-
or add languages via standard Rails I18n.
|
|
195
|
-
|
|
196
|
-
### Dashboard & analytics
|
|
197
|
-
|
|
198
|
-
Import statistics dashboard: success rates, average duration, records per
|
|
199
|
-
import, most-used targets, failure trends. Mountable as an admin-only route.
|
data/docs/SOURCES.md
DELETED
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
# Sources
|
|
2
|
-
|
|
3
|
-
DataPorter supports four source types. Each source reads data from a different format and feeds it through the same parsing pipeline.
|
|
4
|
-
|
|
5
|
-
## CSV
|
|
6
|
-
|
|
7
|
-
Upload a CSV file. Headers are extracted automatically and presented in the [column mapping](MAPPING.md) step. Configure header mappings with `csv_mapping` in your [Target](TARGETS.md) when file headers don't match your column names.
|
|
8
|
-
|
|
9
|
-
Custom separator:
|
|
10
|
-
|
|
11
|
-
```ruby
|
|
12
|
-
import.config = { "separator" => ";" }
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
## XLSX
|
|
16
|
-
|
|
17
|
-
Upload an Excel `.xlsx` file. Uses the same `csv_mapping` for header-to-column mapping as CSV. By default the first sheet is parsed; select a different sheet via config:
|
|
18
|
-
|
|
19
|
-
```ruby
|
|
20
|
-
import.config = { "sheet_index" => 1 }
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
Powered by [creek](https://github.com/pythonicrubyist/creek) for streaming, memory-efficient parsing.
|
|
24
|
-
|
|
25
|
-
## JSON
|
|
26
|
-
|
|
27
|
-
Upload a JSON file. Use `json_root` in your Target to specify the path to the records array. Raw JSON arrays are supported without `json_root`.
|
|
28
|
-
|
|
29
|
-
```ruby
|
|
30
|
-
json_root "data.users"
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
Given `{ "data": { "users": [...] } }`, records are extracted from `data.users`.
|
|
34
|
-
|
|
35
|
-
## API
|
|
36
|
-
|
|
37
|
-
Fetch records from an external API endpoint. No file upload is needed -- the engine calls the API directly.
|
|
38
|
-
|
|
39
|
-
### Basic usage
|
|
40
|
-
|
|
41
|
-
```ruby
|
|
42
|
-
api_config do
|
|
43
|
-
endpoint "https://api.example.com/data"
|
|
44
|
-
headers({ "Authorization" => "Bearer token" })
|
|
45
|
-
response_root "results"
|
|
46
|
-
end
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
| Option | Type | Description |
|
|
50
|
-
|---|---|---|
|
|
51
|
-
| `endpoint` | String or Proc | URL to fetch records from |
|
|
52
|
-
| `headers` | Hash or Proc | HTTP headers sent with the request |
|
|
53
|
-
| `response_root` | String | Key in the JSON response containing the records array (omit for top-level arrays) |
|
|
54
|
-
|
|
55
|
-
### Dynamic endpoints and headers
|
|
56
|
-
|
|
57
|
-
Both `endpoint` and `headers` accept lambdas for runtime values. The endpoint lambda receives the import's `config` hash:
|
|
58
|
-
|
|
59
|
-
```ruby
|
|
60
|
-
api_config do
|
|
61
|
-
endpoint ->(params) { "https://api.example.com/events?page=#{params[:page]}" }
|
|
62
|
-
headers -> { { "Authorization" => "Bearer #{ENV['API_TOKEN']}" } }
|
|
63
|
-
response_root "data"
|
|
64
|
-
end
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
### Full example
|
|
68
|
-
|
|
69
|
-
```ruby
|
|
70
|
-
class EventTarget < DataPorter::Target
|
|
71
|
-
label "Events"
|
|
72
|
-
model_name "Event"
|
|
73
|
-
sources :api
|
|
74
|
-
|
|
75
|
-
api_config do
|
|
76
|
-
endpoint "https://api.example.com/events"
|
|
77
|
-
headers -> { { "Authorization" => "Bearer #{ENV['EVENTS_API_KEY']}" } }
|
|
78
|
-
response_root "events"
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
columns do
|
|
82
|
-
column :name, type: :string, required: true
|
|
83
|
-
column :date, type: :date
|
|
84
|
-
column :venue, type: :string
|
|
85
|
-
column :capacity, type: :integer
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
def persist(record, context:)
|
|
89
|
-
Event.create!(record.attributes)
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
```
|
|
93
|
-
|
|
94
|
-
When a user creates an import with source type **API**, the engine skips file upload entirely, calls the configured endpoint, parses the JSON response, and feeds the records through the same preview/validate/import pipeline as file-based sources.
|
data/docs/TARGETS.md
DELETED
|
@@ -1,332 +0,0 @@
|
|
|
1
|
-
# Targets
|
|
2
|
-
|
|
3
|
-
Targets are plain Ruby classes in `app/importers/` that inherit from `DataPorter::Target`. Each target defines one import type: its columns, sources, mappings, and persistence logic.
|
|
4
|
-
|
|
5
|
-
## Generator
|
|
6
|
-
|
|
7
|
-
```bash
|
|
8
|
-
bin/rails generate data_porter:target ModelName column:type[:required] ... [--sources csv xlsx]
|
|
9
|
-
```
|
|
10
|
-
|
|
11
|
-
Examples:
|
|
12
|
-
|
|
13
|
-
```bash
|
|
14
|
-
bin/rails generate data_porter:target User email:string:required name:string age:integer --sources csv xlsx
|
|
15
|
-
bin/rails generate data_porter:target Product name:string price:decimal --sources csv
|
|
16
|
-
bin/rails generate data_porter:target Order order_number:string total:decimal
|
|
17
|
-
```
|
|
18
|
-
|
|
19
|
-
Column format: `name:type[:required]`
|
|
20
|
-
|
|
21
|
-
Supported types: `string`, `integer`, `decimal`, `boolean`, `date`.
|
|
22
|
-
|
|
23
|
-
The `--sources` option specifies which source types the target accepts (default: `csv`). The UI will only show these sources when the target is selected.
|
|
24
|
-
|
|
25
|
-
## Class-level DSL
|
|
26
|
-
|
|
27
|
-
```ruby
|
|
28
|
-
class OrderTarget < DataPorter::Target
|
|
29
|
-
label "Orders"
|
|
30
|
-
model_name "Order"
|
|
31
|
-
icon "fas fa-shopping-cart"
|
|
32
|
-
sources :csv, :json, :api, :xlsx
|
|
33
|
-
|
|
34
|
-
columns do
|
|
35
|
-
column :order_number, type: :string, required: true
|
|
36
|
-
column :total, type: :decimal
|
|
37
|
-
column :placed_at, type: :date
|
|
38
|
-
column :active, type: :boolean
|
|
39
|
-
column :quantity, type: :integer
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
csv_mapping do
|
|
43
|
-
map "Order #" => :order_number
|
|
44
|
-
map "Total ($)" => :total
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
json_root "data.orders"
|
|
48
|
-
|
|
49
|
-
api_config do
|
|
50
|
-
endpoint "https://api.example.com/orders"
|
|
51
|
-
headers({ "Authorization" => "Bearer token" })
|
|
52
|
-
response_root "data.orders"
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
deduplicate_by :order_number
|
|
56
|
-
|
|
57
|
-
dry_run_enabled
|
|
58
|
-
|
|
59
|
-
params do
|
|
60
|
-
param :warehouse_id, type: :select, label: "Warehouse", required: true,
|
|
61
|
-
collection: -> { Warehouse.pluck(:name, :id) }
|
|
62
|
-
param :currency, type: :text, default: "USD"
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
### `label(value)`
|
|
68
|
-
|
|
69
|
-
Human-readable name shown in the UI.
|
|
70
|
-
|
|
71
|
-
### `model_name(value)`
|
|
72
|
-
|
|
73
|
-
The ActiveRecord model name this target imports into (for display purposes).
|
|
74
|
-
|
|
75
|
-
### `icon(value)`
|
|
76
|
-
|
|
77
|
-
CSS icon class (e.g. FontAwesome) shown in the UI.
|
|
78
|
-
|
|
79
|
-
### `sources(*types)`
|
|
80
|
-
|
|
81
|
-
Accepted source types: `:csv`, `:json`, `:api`, `:xlsx`.
|
|
82
|
-
|
|
83
|
-
### `columns { ... }`
|
|
84
|
-
|
|
85
|
-
Defines the expected columns for this import. Each column accepts:
|
|
86
|
-
|
|
87
|
-
| Parameter | Type | Default | Description |
|
|
88
|
-
|---|---|---|---|
|
|
89
|
-
| `name` | Symbol | (required) | Column identifier |
|
|
90
|
-
| `type` | Symbol | `:string` | One of `:string`, `:integer`, `:decimal`, `:boolean`, `:date` |
|
|
91
|
-
| `required` | Boolean | `false` | Whether the column must have a value |
|
|
92
|
-
| `label` | String | Humanized name | Display label in the preview |
|
|
93
|
-
|
|
94
|
-
### `csv_mapping { ... }`
|
|
95
|
-
|
|
96
|
-
Maps CSV/XLSX header names to column names when they don't match:
|
|
97
|
-
|
|
98
|
-
```ruby
|
|
99
|
-
csv_mapping do
|
|
100
|
-
map "First Name" => :first_name
|
|
101
|
-
map "E-mail" => :email
|
|
102
|
-
end
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
### `json_root(path)`
|
|
106
|
-
|
|
107
|
-
Dot-separated path to the array of records within a JSON document:
|
|
108
|
-
|
|
109
|
-
```ruby
|
|
110
|
-
json_root "data.users"
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
Given `{ "data": { "users": [...] } }`, records are extracted from `data.users`.
|
|
114
|
-
|
|
115
|
-
### `api_config { ... }`
|
|
116
|
-
|
|
117
|
-
See [Sources: API](SOURCES.md#api) for full documentation.
|
|
118
|
-
|
|
119
|
-
### `deduplicate_by(*keys)`
|
|
120
|
-
|
|
121
|
-
Skip records that share the same value(s) for the given column(s):
|
|
122
|
-
|
|
123
|
-
```ruby
|
|
124
|
-
deduplicate_by :email
|
|
125
|
-
deduplicate_by :first_name, :last_name
|
|
126
|
-
```
|
|
127
|
-
|
|
128
|
-
### `dry_run_enabled`
|
|
129
|
-
|
|
130
|
-
Enables dry run mode for this target. A "Dry Run" button appears in the preview step. Dry run executes the full import pipeline (transform, validate, persist) inside a rolled-back transaction, giving a validation report without modifying the database.
|
|
131
|
-
|
|
132
|
-
### `params { ... }`
|
|
133
|
-
|
|
134
|
-
Declares extra form fields shown when this target is selected in the import form. Values are stored in `config["import_params"]` and accessible via `import_params` in all instance methods.
|
|
135
|
-
|
|
136
|
-
```ruby
|
|
137
|
-
params do
|
|
138
|
-
param :hotel_id, type: :select, label: "Hotel", required: true,
|
|
139
|
-
collection: -> { Hotel.pluck(:name, :id) }
|
|
140
|
-
param :currency, type: :text, label: "Currency", default: "EUR"
|
|
141
|
-
param :batch_size, type: :number, label: "Batch Size", default: "100"
|
|
142
|
-
param :tenant_id, type: :hidden, default: "abc123"
|
|
143
|
-
end
|
|
144
|
-
```
|
|
145
|
-
|
|
146
|
-
Each param accepts:
|
|
147
|
-
|
|
148
|
-
| Parameter | Type | Default | Description |
|
|
149
|
-
|---|---|---|---|
|
|
150
|
-
| `name` | Symbol | (required) | Param identifier |
|
|
151
|
-
| `type` | Symbol | `:text` | One of `:select`, `:text`, `:number`, `:hidden` |
|
|
152
|
-
| `required` | Boolean | `false` | Validated on import creation, shown with `*` in the form |
|
|
153
|
-
| `label` | String | Humanized name | Display label in the form |
|
|
154
|
-
| `default` | String | `nil` | Pre-filled value in the form |
|
|
155
|
-
| `collection` | Lambda or Array | `nil` | For `:select` type -- `[[label, value], ...]` |
|
|
156
|
-
|
|
157
|
-
Collection accepts both a lambda and a plain array. Use a lambda for dynamic data (evaluated when the form loads, not at boot time):
|
|
158
|
-
|
|
159
|
-
```ruby
|
|
160
|
-
param :hotel_id, type: :select, collection: -> { Hotel.pluck(:name, :id) }
|
|
161
|
-
param :status, type: :select, collection: [%w[Active active], %w[Archived archived]]
|
|
162
|
-
```
|
|
163
|
-
|
|
164
|
-
## Instance Methods
|
|
165
|
-
|
|
166
|
-
### `import_params`
|
|
167
|
-
|
|
168
|
-
Returns a hash of the import params values set by the user in the form. Available in all instance methods (`persist`, `transform`, `validate`, `after_import`, `on_error`). Defaults to `{}` when no params are declared.
|
|
169
|
-
|
|
170
|
-
```ruby
|
|
171
|
-
def persist(record, context:)
|
|
172
|
-
Guest.create!(
|
|
173
|
-
record.attributes.merge(
|
|
174
|
-
hotel_id: import_params["hotel_id"],
|
|
175
|
-
currency: import_params["currency"]
|
|
176
|
-
)
|
|
177
|
-
)
|
|
178
|
-
end
|
|
179
|
-
```
|
|
180
|
-
|
|
181
|
-
Override these in your target to customize behavior.
|
|
182
|
-
|
|
183
|
-
### `transform(record)`
|
|
184
|
-
|
|
185
|
-
Transform a record before validation. Must return the (modified) record.
|
|
186
|
-
|
|
187
|
-
```ruby
|
|
188
|
-
def transform(record)
|
|
189
|
-
record.attributes["email"] = record.attributes["email"]&.downcase
|
|
190
|
-
record
|
|
191
|
-
end
|
|
192
|
-
```
|
|
193
|
-
|
|
194
|
-
### `validate(record)`
|
|
195
|
-
|
|
196
|
-
Add custom validation errors to a record:
|
|
197
|
-
|
|
198
|
-
```ruby
|
|
199
|
-
def validate(record)
|
|
200
|
-
record.add_error("Email is invalid") unless record.attributes["email"]&.include?("@")
|
|
201
|
-
end
|
|
202
|
-
```
|
|
203
|
-
|
|
204
|
-
### `persist(record, context:)`
|
|
205
|
-
|
|
206
|
-
**Required.** Save the record to your database. Raises `NotImplementedError` if not overridden.
|
|
207
|
-
|
|
208
|
-
```ruby
|
|
209
|
-
def persist(record, context:)
|
|
210
|
-
User.create!(record.attributes)
|
|
211
|
-
end
|
|
212
|
-
```
|
|
213
|
-
|
|
214
|
-
### `after_import(results, context:)`
|
|
215
|
-
|
|
216
|
-
Called once after all records have been processed:
|
|
217
|
-
|
|
218
|
-
```ruby
|
|
219
|
-
def after_import(results, context:)
|
|
220
|
-
AdminMailer.import_complete(context.user, results).deliver_later
|
|
221
|
-
end
|
|
222
|
-
```
|
|
223
|
-
|
|
224
|
-
### `on_error(record, error, context:)`
|
|
225
|
-
|
|
226
|
-
Called when a record fails to import:
|
|
227
|
-
|
|
228
|
-
```ruby
|
|
229
|
-
def on_error(record, error, context:)
|
|
230
|
-
Sentry.capture_exception(error, extra: { record: record.attributes })
|
|
231
|
-
end
|
|
232
|
-
```
|
|
233
|
-
|
|
234
|
-
## Full example
|
|
235
|
-
|
|
236
|
-
A complete target using most DSL features: multiple sources, import params, JSON root, API config, transform, custom validation, and lifecycle hooks.
|
|
237
|
-
|
|
238
|
-
```ruby
|
|
239
|
-
# frozen_string_literal: true
|
|
240
|
-
|
|
241
|
-
class ContactTarget < DataPorter::Target
|
|
242
|
-
label "Contacts"
|
|
243
|
-
model_name "Contact"
|
|
244
|
-
icon "fas fa-address-book"
|
|
245
|
-
sources :csv, :xlsx, :json, :api
|
|
246
|
-
dry_run_enabled
|
|
247
|
-
|
|
248
|
-
columns do
|
|
249
|
-
column :name, type: :string, required: true
|
|
250
|
-
column :email, type: :email
|
|
251
|
-
column :phone_number, type: :string
|
|
252
|
-
column :address, type: :string
|
|
253
|
-
column :room, type: :string
|
|
254
|
-
end
|
|
255
|
-
|
|
256
|
-
params do
|
|
257
|
-
param :default_room, type: :text, label: "Default room"
|
|
258
|
-
param :import_source, type: :select, label: "Import source",
|
|
259
|
-
collection: [%w[Manual manual], %w[Migration migration], ["Directory sync", "sync"]]
|
|
260
|
-
end
|
|
261
|
-
|
|
262
|
-
json_root "contacts"
|
|
263
|
-
|
|
264
|
-
api_config do
|
|
265
|
-
endpoint "http://localhost:3001/contacts"
|
|
266
|
-
headers({ "Authorization" => "Bearer token" })
|
|
267
|
-
response_root :contacts
|
|
268
|
-
end
|
|
269
|
-
|
|
270
|
-
def transform(record)
|
|
271
|
-
apply_default_room(record)
|
|
272
|
-
normalize_phone(record)
|
|
273
|
-
record
|
|
274
|
-
end
|
|
275
|
-
|
|
276
|
-
def validate(record)
|
|
277
|
-
validate_email_format(record)
|
|
278
|
-
end
|
|
279
|
-
|
|
280
|
-
def persist(record, context:)
|
|
281
|
-
Contact.create!(record.attributes)
|
|
282
|
-
end
|
|
283
|
-
|
|
284
|
-
def after_import(results, context:)
|
|
285
|
-
Rails.logger.info("[DataPorter] Contacts: #{results[:created]} created, #{results[:errored]} errors")
|
|
286
|
-
end
|
|
287
|
-
|
|
288
|
-
def on_error(record, error, context:)
|
|
289
|
-
Rails.logger.warn("[DataPorter] Line #{record.line_number}: #{error.message}")
|
|
290
|
-
end
|
|
291
|
-
|
|
292
|
-
private
|
|
293
|
-
|
|
294
|
-
def apply_default_room(record)
|
|
295
|
-
return if record.data["room"].present?
|
|
296
|
-
return unless import_params["default_room"].present?
|
|
297
|
-
|
|
298
|
-
record.data["room"] = import_params["default_room"]
|
|
299
|
-
end
|
|
300
|
-
|
|
301
|
-
def normalize_phone(record)
|
|
302
|
-
phone = record.data["phone_number"]
|
|
303
|
-
return if phone.blank?
|
|
304
|
-
|
|
305
|
-
record.data["phone_number"] = phone.gsub(/\s/, "")
|
|
306
|
-
end
|
|
307
|
-
|
|
308
|
-
def validate_email_format(record)
|
|
309
|
-
email = record.data["email"]
|
|
310
|
-
return if email.blank?
|
|
311
|
-
return if email.match?(/\A[^@\s]+@[^@\s]+\z/)
|
|
312
|
-
|
|
313
|
-
record.add_error("Invalid email format: #{email}")
|
|
314
|
-
end
|
|
315
|
-
end
|
|
316
|
-
```
|
|
317
|
-
|
|
318
|
-
This target exercises:
|
|
319
|
-
|
|
320
|
-
| Feature | DSL / Hook | Effect |
|
|
321
|
-
|---|---|---|
|
|
322
|
-
| 4 sources | `sources :csv, :xlsx, :json, :api` | All source types available |
|
|
323
|
-
| Typed columns | `type: :string, :email` | Built-in validation |
|
|
324
|
-
| Required field | `required: true` on `name` | Rows without name get "missing" status |
|
|
325
|
-
| Dry run | `dry_run_enabled` | Dry run button in preview |
|
|
326
|
-
| Import params | `param :default_room`, `param :import_source` | Dynamic form fields |
|
|
327
|
-
| JSON root | `json_root "contacts"` | Extracts from `{"contacts": [...]}` |
|
|
328
|
-
| API config | `endpoint`, `headers`, `response_root` | Authenticated API fetch |
|
|
329
|
-
| Transform | `apply_default_room`, `normalize_phone` | Fill blanks, strip whitespace |
|
|
330
|
-
| Validate | `validate_email_format` | Custom error on invalid email |
|
|
331
|
-
| After import | Logs summary | Post-import hook |
|
|
332
|
-
| On error | Logs failed line | Per-record error hook |
|
|
Binary file
|
data/docs/screenshots/index.jpg
DELETED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|