data_porter 0.9.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +42 -0
- data/README.md +5 -1
- data/app/assets/javascripts/data_porter/import_form_controller.js +1 -0
- data/app/assets/javascripts/data_porter/template_form_controller.js +31 -8
- data/app/assets/stylesheets/data_porter/alerts.css +2 -1
- data/app/assets/stylesheets/data_porter/layout.css +2 -2
- data/app/controllers/data_porter/concerns/import_validation.rb +29 -0
- data/app/controllers/data_porter/concerns/mapping_management.rb +13 -4
- data/app/controllers/data_porter/imports_controller.rb +28 -4
- data/app/views/data_porter/imports/show.html.erb +4 -0
- data/config/routes.rb +1 -0
- data/docs/CONFIGURATION.md +22 -10
- data/docs/ROADMAP.md +180 -9
- data/docs/TARGETS.md +107 -2
- data/lib/data_porter/components/preview/results_summary.rb +6 -1
- data/lib/data_porter/configuration.rb +7 -1
- data/lib/data_porter/orchestrator/importer.rb +27 -0
- data/lib/data_porter/orchestrator/record_builder.rb +9 -0
- data/lib/data_porter/registry.rb +7 -1
- data/lib/data_porter/rejects_csv_builder.rb +35 -0
- data/lib/data_porter/sources/base.rb +6 -0
- data/lib/data_porter/sources/csv.rb +32 -5
- data/lib/data_porter/sources/xlsx.rb +2 -1
- data/lib/data_porter/version.rb +1 -1
- data/lib/data_porter.rb +1 -0
- data/lib/generators/data_porter/install/templates/create_data_porter_imports.rb.erb +1 -1
- data/lib/generators/data_porter/install/templates/initializer.rb +3 -5
- metadata +5 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3f4f148347707854fe641913e1e21616ca6ed93b8e6c5ddcd0afb2421ddba7dc
|
|
4
|
+
data.tar.gz: e1ac18c356d7edcca901c54f1349d003e5a9b31d9feca5aed1ac51717402bbeb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4e96e22590744496c31aa51060cb1c043365eabbcefa9d17f97edc8eba18ae045fd598a6483aa57fad35b3524b120a2ee358c3103a36fbc44f595ff5eda23bc8
|
|
7
|
+
data.tar.gz: 5dbc857197bafd89108528aa95ee12617c5ee7e09b9784bbeb924442d11e07300f469a53d72ec44c0c39099836d2b24ac9a7d616422ddbc7d68fcbcbc06606f4
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,48 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.0.1] - 2026-02-07
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- **CSV delimiter auto-detection** -- Automatically detect `,` `;` `\t` separators via frequency analysis on the first line; explicit `col_sep` config still takes precedence
|
|
13
|
+
- **CSV encoding auto-detection** -- Detect and transcode Latin-1 / ISO-8859-1 content to UTF-8; strip UTF-8 BOM when present
|
|
14
|
+
|
|
15
|
+
### Fixed
|
|
16
|
+
|
|
17
|
+
- **`param.collection` accepts arrays** -- `Registry.serialize_param` now duck-types with `respond_to?(:call)` so both lambdas and plain arrays work
|
|
18
|
+
- **`dp-input` styling** -- Text inputs now share the same CSS rules as `dp-select` and `dp-file-input`
|
|
19
|
+
- **Migration template nullable user** -- Removed `null: false` from polymorphic `user` reference so the engine works without authentication
|
|
20
|
+
- **Skipped records visible in results** -- Added "Skipped" stat card for missing + partial records; title reflects errors; export rejects button includes all rejected rows
|
|
21
|
+
- **Hidden param label removed** -- `type: :hidden` params no longer render a label or wrapper div
|
|
22
|
+
|
|
23
|
+
### Changed
|
|
24
|
+
|
|
25
|
+
- 402 RSpec examples (up from 391), 0 failures
|
|
26
|
+
|
|
27
|
+
## [1.0.0] - 2026-02-07
|
|
28
|
+
|
|
29
|
+
### Added
|
|
30
|
+
|
|
31
|
+
- **Max records guard** -- `config.max_records` (default: 10,000) rejects files exceeding the limit before parsing
|
|
32
|
+
- **Transaction mode** -- `config.transaction_mode` (`:per_record` or `:all`); `:all` wraps entire import in a single transaction that rolls back on any failure
|
|
33
|
+
- **Fallback headers** -- Auto-generate `col_1, col_2...` when CSV/XLSX header row is empty
|
|
34
|
+
- **Reject rows CSV export** -- Download CSV of failed/errored records with original data + error messages after import; available when `errored_count > 0`
|
|
35
|
+
- **E2E specs** -- 6 end-to-end integration tests covering all source types (CSV, XLSX, JSON, API), import params, and reject rows export
|
|
36
|
+
|
|
37
|
+
### Fixed
|
|
38
|
+
|
|
39
|
+
- **Import params whitelist** -- `merge_import_params` now permits only param names declared in the Target DSL instead of using `permit!`
|
|
40
|
+
- **Column mapping whitelist** -- `permitted_column_mapping` filters mapping values to valid target column names; invalid values replaced with `""`
|
|
41
|
+
- **File size validation** -- Uploads exceeding `config.max_file_size` (default: 10 MB) are rejected before save
|
|
42
|
+
- **MIME type validation** -- Uploaded files must match allowed content types per source (CSV: `text/csv`, `text/plain`; JSON: `application/json`, `text/plain`; XLSX: OpenXML spreadsheet)
|
|
43
|
+
- **XSS in template form** -- Replaced `innerHTML` with safe DOM methods in `template_form_controller.js`
|
|
44
|
+
|
|
45
|
+
### Changed
|
|
46
|
+
|
|
47
|
+
- Validation chain refactored to `all_validations_pass?` using `.all?` to collect all errors at once instead of short-circuiting
|
|
48
|
+
- 391 RSpec examples (up from 354), 0 failures
|
|
49
|
+
|
|
8
50
|
## [0.9.0] - 2026-02-07
|
|
9
51
|
|
|
10
52
|
### Added
|
data/README.md
CHANGED
|
@@ -30,6 +30,9 @@ Supports CSV, JSON, XLSX, and API sources with a declarative DSL for defining im
|
|
|
30
30
|
- **Import params** -- Declare extra form fields (select, text, number, hidden) per target for scoped imports ([docs](docs/TARGETS.md#params--))
|
|
31
31
|
- **Per-target source filtering** -- Each target declares its allowed sources, the UI filters accordingly
|
|
32
32
|
- **Import deletion & auto-purge** -- Delete imports from the UI, or schedule `rake data_porter:purge` for automatic cleanup
|
|
33
|
+
- **Reject rows export** -- Download a CSV of failed/errored records with error messages after import
|
|
34
|
+
- **Security validations** -- File size limit, MIME type check, strong parameter whitelisting
|
|
35
|
+
- **Safety guards** -- Max records limit (`config.max_records`), configurable transaction mode (`:per_record` or `:all`)
|
|
33
36
|
- **Declarative Target DSL** -- One class per import type, zero boilerplate ([docs](docs/TARGETS.md))
|
|
34
37
|
|
|
35
38
|
## Requirements
|
|
@@ -129,6 +132,7 @@ pending -> parsing -> previewing -> importing -> completed
|
|
|
129
132
|
| POST | `/imports/:id/confirm` | Run import |
|
|
130
133
|
| POST | `/imports/:id/cancel` | Cancel import |
|
|
131
134
|
| POST | `/imports/:id/dry_run` | Dry run validation |
|
|
135
|
+
| GET | `/imports/:id/export_rejects` | Download rejects CSV |
|
|
132
136
|
| | `/mapping_templates` | Full CRUD for templates |
|
|
133
137
|
|
|
134
138
|
## Development
|
|
@@ -137,7 +141,7 @@ pending -> parsing -> previewing -> importing -> completed
|
|
|
137
141
|
git clone https://github.com/SerylLns/data_porter.git
|
|
138
142
|
cd data_porter
|
|
139
143
|
bin/setup
|
|
140
|
-
bundle exec rspec #
|
|
144
|
+
bundle exec rspec # 391 specs
|
|
141
145
|
bundle exec rubocop # 0 offenses
|
|
142
146
|
```
|
|
143
147
|
|
|
@@ -18,7 +18,8 @@ export default class extends Controller {
|
|
|
18
18
|
const pair = document.createElement("div")
|
|
19
19
|
pair.className = "dp-mapping-pair"
|
|
20
20
|
pair.style.cssText = "display: flex; gap: 0.5rem; margin-bottom: 0.5rem;"
|
|
21
|
-
pair.
|
|
21
|
+
pair.appendChild(this.buildKeyInput())
|
|
22
|
+
pair.appendChild(this.buildValueSelect(columns))
|
|
22
23
|
container.appendChild(pair)
|
|
23
24
|
}
|
|
24
25
|
|
|
@@ -34,13 +35,35 @@ export default class extends Controller {
|
|
|
34
35
|
})
|
|
35
36
|
}
|
|
36
37
|
|
|
37
|
-
|
|
38
|
-
const
|
|
39
|
-
|
|
40
|
-
|
|
38
|
+
buildKeyInput() {
|
|
39
|
+
const input = document.createElement("input")
|
|
40
|
+
input.type = "text"
|
|
41
|
+
input.name = "mapping_template[mapping_keys][]"
|
|
42
|
+
input.placeholder = "File header"
|
|
43
|
+
input.className = "dp-select"
|
|
44
|
+
input.style.flex = "1"
|
|
45
|
+
return input
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
buildValueSelect(columns) {
|
|
49
|
+
const select = document.createElement("select")
|
|
50
|
+
select.name = "mapping_template[mapping_values][]"
|
|
51
|
+
select.className = "dp-select"
|
|
52
|
+
select.style.flex = "1"
|
|
53
|
+
select.dataset.dataPorterTemplateFormTarget = "fieldSelect"
|
|
54
|
+
|
|
55
|
+
const blank = document.createElement("option")
|
|
56
|
+
blank.value = ""
|
|
57
|
+
blank.textContent = "Select a field..."
|
|
58
|
+
select.appendChild(blank)
|
|
59
|
+
|
|
60
|
+
columns.forEach(([label, name]) => {
|
|
61
|
+
const opt = document.createElement("option")
|
|
62
|
+
opt.value = name
|
|
63
|
+
opt.textContent = label
|
|
64
|
+
select.appendChild(opt)
|
|
65
|
+
})
|
|
41
66
|
|
|
42
|
-
return
|
|
43
|
-
`<select name="mapping_template[mapping_values][]" class="dp-select" style="flex: 1;" data-data-porter--template-form-target="fieldSelect">` +
|
|
44
|
-
`<option value="">Select a field...</option>${options}</select>`
|
|
67
|
+
return select
|
|
45
68
|
}
|
|
46
69
|
}
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
display: grid;
|
|
33
33
|
grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
|
|
34
34
|
gap: 1rem;
|
|
35
|
-
max-width:
|
|
35
|
+
max-width: 500px;
|
|
36
36
|
margin: 0 auto;
|
|
37
37
|
}
|
|
38
38
|
|
|
@@ -60,6 +60,7 @@
|
|
|
60
60
|
|
|
61
61
|
.dp-results__stat--success strong { color: var(--dp-success); }
|
|
62
62
|
.dp-results__stat--error strong { color: var(--dp-danger); }
|
|
63
|
+
.dp-results__stat--warning strong { color: var(--dp-warning); }
|
|
63
64
|
|
|
64
65
|
.dp-results__duration {
|
|
65
66
|
margin-top: 1rem;
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
color: var(--dp-gray-700);
|
|
30
30
|
}
|
|
31
31
|
|
|
32
|
-
.dp-select, .dp-file-input {
|
|
32
|
+
.dp-select, .dp-input, .dp-file-input {
|
|
33
33
|
display: block;
|
|
34
34
|
width: 100%;
|
|
35
35
|
padding: 0.625rem 0.875rem;
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
padding-right: 2.5rem;
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
-
.dp-select:focus, .dp-file-input:focus {
|
|
53
|
+
.dp-select:focus, .dp-input:focus, .dp-file-input:focus {
|
|
54
54
|
outline: none;
|
|
55
55
|
border-color: var(--dp-primary);
|
|
56
56
|
box-shadow: 0 0 0 3px rgba(79, 70, 229, 0.15);
|
|
@@ -5,6 +5,12 @@ module DataPorter
|
|
|
5
5
|
module ImportValidation
|
|
6
6
|
extend ActiveSupport::Concern
|
|
7
7
|
|
|
8
|
+
ALLOWED_CONTENT_TYPES = {
|
|
9
|
+
"csv" => %w[text/csv text/plain],
|
|
10
|
+
"json" => %w[application/json text/plain],
|
|
11
|
+
"xlsx" => %w[application/vnd.openxmlformats-officedocument.spreadsheetml.sheet]
|
|
12
|
+
}.freeze
|
|
13
|
+
|
|
8
14
|
private
|
|
9
15
|
|
|
10
16
|
def valid_source_for_target?
|
|
@@ -42,6 +48,29 @@ module DataPorter
|
|
|
42
48
|
def import_param_values
|
|
43
49
|
(@import.config || {}).fetch("import_params", {})
|
|
44
50
|
end
|
|
51
|
+
|
|
52
|
+
def valid_file_size?
|
|
53
|
+
return true unless @import.file.attached?
|
|
54
|
+
|
|
55
|
+
max = DataPorter.configuration.max_file_size
|
|
56
|
+
return true if @import.file.blob.byte_size <= max
|
|
57
|
+
|
|
58
|
+
@import.errors.add(:file, "is too large (max #{max / 1.megabyte} MB)")
|
|
59
|
+
false
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def valid_file_content_type?
|
|
63
|
+
return true unless @import.file.attached?
|
|
64
|
+
|
|
65
|
+
allowed = ALLOWED_CONTENT_TYPES[@import.source_type]
|
|
66
|
+
return true unless allowed
|
|
67
|
+
|
|
68
|
+
content_type = @import.file.blob.content_type
|
|
69
|
+
return true if allowed.include?(content_type)
|
|
70
|
+
|
|
71
|
+
@import.errors.add(:file, "has an invalid content type (#{content_type})")
|
|
72
|
+
false
|
|
73
|
+
end
|
|
45
74
|
end
|
|
46
75
|
end
|
|
47
76
|
end
|
|
@@ -23,8 +23,7 @@ module DataPorter
|
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
def save_column_mapping
|
|
26
|
-
|
|
27
|
-
merged = (@import.config || {}).merge("column_mapping" => mapping)
|
|
26
|
+
merged = (@import.config || {}).merge("column_mapping" => permitted_column_mapping)
|
|
28
27
|
@import.update!(config: merged, status: :pending)
|
|
29
28
|
end
|
|
30
29
|
|
|
@@ -32,11 +31,21 @@ module DataPorter
|
|
|
32
31
|
return unless params[:save_template] == "1"
|
|
33
32
|
return unless defined?(DataPorter::MappingTemplate)
|
|
34
33
|
|
|
35
|
-
mapping = params.require(:column_mapping).permit!.to_h
|
|
36
34
|
DataPorter::MappingTemplate.find_or_initialize_by(
|
|
37
35
|
target_key: @import.target_key,
|
|
38
36
|
name: params[:template_name].presence || "Default"
|
|
39
|
-
).update!(mapping:
|
|
37
|
+
).update!(mapping: permitted_column_mapping)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def permitted_column_mapping
|
|
41
|
+
raw = params.require(:column_mapping).permit!.to_h
|
|
42
|
+
valid_names = valid_column_names
|
|
43
|
+
raw.transform_values { |v| valid_names.include?(v) ? v : "" }
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def valid_column_names
|
|
47
|
+
columns = @import.target_class._columns || []
|
|
48
|
+
columns.to_set { |c| c.name.to_s }
|
|
40
49
|
end
|
|
41
50
|
end
|
|
42
51
|
end
|
|
@@ -8,7 +8,7 @@ module DataPorter
|
|
|
8
8
|
|
|
9
9
|
layout "data_porter/application"
|
|
10
10
|
|
|
11
|
-
before_action :set_import, only: %i[show parse confirm cancel dry_run update_mapping status destroy]
|
|
11
|
+
before_action :set_import, only: %i[show parse confirm cancel dry_run update_mapping status export_rejects destroy]
|
|
12
12
|
before_action :load_targets, only: %i[index new create]
|
|
13
13
|
|
|
14
14
|
def index
|
|
@@ -22,7 +22,7 @@ module DataPorter
|
|
|
22
22
|
def create
|
|
23
23
|
build_import
|
|
24
24
|
|
|
25
|
-
if
|
|
25
|
+
if all_validations_pass? && @import.save
|
|
26
26
|
enqueue_after_create
|
|
27
27
|
redirect_to import_path(@import)
|
|
28
28
|
else
|
|
@@ -73,6 +73,12 @@ module DataPorter
|
|
|
73
73
|
render json: { status: @import.status, progress: progress }
|
|
74
74
|
end
|
|
75
75
|
|
|
76
|
+
def export_rejects
|
|
77
|
+
columns = @import.target_class._columns || []
|
|
78
|
+
csv = RejectsCsvBuilder.new(columns, @import.records).generate
|
|
79
|
+
send_data csv, filename: "rejects_import_#{@import.id}.csv", type: "text/csv"
|
|
80
|
+
end
|
|
81
|
+
|
|
76
82
|
def destroy
|
|
77
83
|
@import.file.purge if @import.file.attached?
|
|
78
84
|
@import.destroy!
|
|
@@ -95,6 +101,16 @@ module DataPorter
|
|
|
95
101
|
@import.status = :pending
|
|
96
102
|
end
|
|
97
103
|
|
|
104
|
+
def all_validations_pass?
|
|
105
|
+
[
|
|
106
|
+
valid_source_for_target?,
|
|
107
|
+
valid_file_presence?,
|
|
108
|
+
valid_file_size?,
|
|
109
|
+
valid_file_content_type?,
|
|
110
|
+
valid_import_params?
|
|
111
|
+
].all?
|
|
112
|
+
end
|
|
113
|
+
|
|
98
114
|
def import_params
|
|
99
115
|
permitted = params.require(:data_import).permit(:target_key, :source_type, :file, config: {})
|
|
100
116
|
merge_import_params(permitted)
|
|
@@ -104,11 +120,19 @@ module DataPorter
|
|
|
104
120
|
nested = params.dig(:data_import, :config, :import_params)
|
|
105
121
|
return permitted unless nested
|
|
106
122
|
|
|
107
|
-
config = permitted[:config] || {}
|
|
108
|
-
config["import_params"] = nested.permit
|
|
123
|
+
config = permitted[:config]&.to_unsafe_h || {}
|
|
124
|
+
config["import_params"] = nested.permit(*allowed_param_keys).to_h
|
|
109
125
|
permitted.merge(config: config)
|
|
110
126
|
end
|
|
111
127
|
|
|
128
|
+
def allowed_param_keys
|
|
129
|
+
target_key = params.dig(:data_import, :target_key)
|
|
130
|
+
return [] unless target_key
|
|
131
|
+
|
|
132
|
+
target = DataPorter::Registry.find(target_key)
|
|
133
|
+
(target._params || []).map { |p| p.name.to_s }
|
|
134
|
+
end
|
|
135
|
+
|
|
112
136
|
def enqueue_after_create
|
|
113
137
|
if @import.file_based?
|
|
114
138
|
DataPorter::ExtractHeadersJob.perform_later(@import.id)
|
|
@@ -86,6 +86,10 @@
|
|
|
86
86
|
<% end %>
|
|
87
87
|
<div class="dp-actions">
|
|
88
88
|
<%= link_to "Back to imports", imports_path, class: "dp-btn dp-btn--primary" %>
|
|
89
|
+
<% rejected = @import.report.errored_count.to_i + @import.report.missing_count.to_i + @import.report.partial_count.to_i %>
|
|
90
|
+
<% if rejected.positive? %>
|
|
91
|
+
<%= link_to "Download rejects CSV", export_rejects_import_path(@import), class: "dp-btn dp-btn--secondary" %>
|
|
92
|
+
<% end %>
|
|
89
93
|
<%= button_to "Delete", import_path(@import),
|
|
90
94
|
method: :delete, class: "dp-btn dp-btn--danger",
|
|
91
95
|
data: { turbo_confirm: "Delete this import?" } %>
|
data/config/routes.rb
CHANGED
data/docs/CONFIGURATION.md
CHANGED
|
@@ -18,9 +18,9 @@ DataPorter.configure do |config|
|
|
|
18
18
|
config.cable_channel_prefix = "data_porter"
|
|
19
19
|
|
|
20
20
|
# Context builder: inject business data into targets.
|
|
21
|
-
# Receives the
|
|
22
|
-
config.context_builder = ->(
|
|
23
|
-
|
|
21
|
+
# Receives the DataImport record.
|
|
22
|
+
config.context_builder = ->(data_import) {
|
|
23
|
+
{ user: data_import.user }
|
|
24
24
|
}
|
|
25
25
|
|
|
26
26
|
# Maximum number of records displayed in preview.
|
|
@@ -32,6 +32,18 @@ DataPorter.configure do |config|
|
|
|
32
32
|
# Auto-purge completed/failed imports older than this duration.
|
|
33
33
|
# Set to nil to disable. Run `rake data_porter:purge` manually or via cron.
|
|
34
34
|
config.purge_after = 60.days
|
|
35
|
+
|
|
36
|
+
# Maximum file size for uploads (default: 10 MB).
|
|
37
|
+
config.max_file_size = 10.megabytes
|
|
38
|
+
|
|
39
|
+
# Maximum number of records per import (default: 10,000).
|
|
40
|
+
# Set to nil to disable.
|
|
41
|
+
config.max_records = 10_000
|
|
42
|
+
|
|
43
|
+
# Transaction mode for imports.
|
|
44
|
+
# :per_record -- each record persisted independently (default)
|
|
45
|
+
# :all -- single transaction, rolls back entirely on any failure
|
|
46
|
+
config.transaction_mode = :per_record
|
|
35
47
|
end
|
|
36
48
|
```
|
|
37
49
|
|
|
@@ -43,10 +55,13 @@ end
|
|
|
43
55
|
| `queue_name` | `:imports` | ActiveJob queue for import jobs |
|
|
44
56
|
| `storage_service` | `:local` | ActiveStorage service name |
|
|
45
57
|
| `cable_channel_prefix` | `"data_porter"` | ActionCable stream prefix |
|
|
46
|
-
| `context_builder` | `nil` | Lambda receiving the
|
|
58
|
+
| `context_builder` | `nil` | Lambda receiving the DataImport record, returns context passed to target methods |
|
|
47
59
|
| `preview_limit` | `500` | Max records shown in the preview step |
|
|
48
60
|
| `enabled_sources` | `%i[csv json api xlsx]` | Source types available in the UI |
|
|
49
61
|
| `purge_after` | `60.days` | Auto-purge completed/failed imports older than this duration |
|
|
62
|
+
| `max_file_size` | `10.megabytes` | Maximum file size for uploads |
|
|
63
|
+
| `max_records` | `10_000` | Maximum number of records per import (nil to disable) |
|
|
64
|
+
| `transaction_mode` | `:per_record` | `:per_record` or `:all` (single transaction rollback) |
|
|
50
65
|
|
|
51
66
|
## Authentication
|
|
52
67
|
|
|
@@ -60,14 +75,11 @@ All engine routes will require the same authentication as your base controller.
|
|
|
60
75
|
|
|
61
76
|
## Context builder
|
|
62
77
|
|
|
63
|
-
The `context_builder` lambda lets you inject business data (current user, tenant, permissions) into target methods (`persist`, `after_import`, `on_error`):
|
|
78
|
+
The `context_builder` lambda lets you inject business data (current user, tenant, permissions) into target methods (`persist`, `after_import`, `on_error`). It receives the `DataImport` record:
|
|
64
79
|
|
|
65
80
|
```ruby
|
|
66
|
-
config.context_builder = ->(
|
|
67
|
-
|
|
68
|
-
user: controller.current_user,
|
|
69
|
-
organization: controller.current_organization
|
|
70
|
-
)
|
|
81
|
+
config.context_builder = ->(data_import) {
|
|
82
|
+
{ user: data_import.user, import_id: data_import.id }
|
|
71
83
|
}
|
|
72
84
|
```
|
|
73
85
|
|
data/docs/ROADMAP.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Roadmap
|
|
2
2
|
|
|
3
|
-
## v1.0 — Production-ready
|
|
3
|
+
## v1.0 — Production-ready DONE
|
|
4
4
|
|
|
5
5
|
The goal is a gem that handles real-world imports reliably at scale.
|
|
6
6
|
|
|
@@ -15,14 +15,185 @@ Implemented in v0.9.0. Targets declare `params` with a DSL (`:select`, `:text`,
|
|
|
15
15
|
`:number`, `:hidden`). Values stored in `config["import_params"]`, accessible
|
|
16
16
|
via `import_params` in all target instance methods. See [Targets docs](TARGETS.md#params--).
|
|
17
17
|
|
|
18
|
+
### ~~3. Security audit~~ DONE
|
|
19
|
+
|
|
20
|
+
- Replaced `permit!` on import params and column mapping with whitelists
|
|
21
|
+
- File size validation (`config.max_file_size`, default 10 MB)
|
|
22
|
+
- MIME type validation per source type
|
|
23
|
+
- XSS fix in template form controller (safe DOM methods)
|
|
24
|
+
|
|
25
|
+
### ~~4. Safety guards~~ DONE
|
|
26
|
+
|
|
27
|
+
- Max records guard (`config.max_records`, default 10,000)
|
|
28
|
+
- Transaction mode (`config.transaction_mode`: `:per_record` or `:all`)
|
|
29
|
+
- Fallback headers (auto-generate `col_1, col_2...` for empty header rows)
|
|
30
|
+
|
|
31
|
+
### ~~5. Reject rows export~~ DONE
|
|
32
|
+
|
|
33
|
+
Download CSV of failed/errored records with original data + error messages.
|
|
34
|
+
Zero-dependency streaming via `send_data`.
|
|
35
|
+
|
|
36
|
+
### ~~6. E2E integration tests~~ DONE
|
|
37
|
+
|
|
38
|
+
6 end-to-end specs covering all source types (CSV, XLSX, JSON, API),
|
|
39
|
+
import params flow, and reject rows CSV export. 395 specs total.
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## v1.1 — Deploy-ready
|
|
44
|
+
|
|
45
|
+
Priority: features required to deploy DataPorter to real users.
|
|
46
|
+
|
|
47
|
+
### Scoped imports
|
|
48
|
+
|
|
49
|
+
Wire up `config.scope` so each user only sees their own imports. The
|
|
50
|
+
configuration hook already exists but isn't connected to the controller query:
|
|
51
|
+
|
|
52
|
+
```ruby
|
|
53
|
+
DataPorter.configure do |config|
|
|
54
|
+
config.scope = ->(user) { { user_type: user.class.name, user_id: user.id } }
|
|
55
|
+
end
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
The controller `index` and `show` actions apply the scope automatically.
|
|
59
|
+
Combined with `parent_controller` inheriting from an authenticated controller,
|
|
60
|
+
this enables full multi-tenant isolation — suitable for both B2B (tenant per
|
|
61
|
+
organization) and B2C (user-level) scenarios.
|
|
62
|
+
|
|
63
|
+
### Preview ↔ Mapping navigation
|
|
64
|
+
|
|
65
|
+
Allow users to go back from preview to the mapping step and adjust column
|
|
66
|
+
mapping without restarting the import. Currently the flow is one-way:
|
|
67
|
+
mapping → parse → preview. Adding a "Back to mapping" button on the preview
|
|
68
|
+
page would let users correct mapping mistakes after seeing the parsed data.
|
|
69
|
+
|
|
70
|
+
### ~~CSV auto-detect: delimiter & encoding~~ DONE
|
|
71
|
+
|
|
72
|
+
Implemented in v1.0.1. Auto-detect CSV delimiter (`,` `;` `\t`) via frequency
|
|
73
|
+
analysis on the first line. Auto-detect file encoding: strip UTF-8 BOM, validate
|
|
74
|
+
UTF-8, fallback to ISO-8859-1 transcoding. Explicit `col_sep` config takes
|
|
75
|
+
precedence.
|
|
76
|
+
|
|
77
|
+
### Column mapping for JSON and API sources
|
|
78
|
+
|
|
79
|
+
The interactive column mapping step currently only works for file-based sources
|
|
80
|
+
(CSV, XLSX). JSON and API sources have stable, predictable keys that rarely need
|
|
81
|
+
remapping, but supporting mapping for all sources would provide a consistent UX.
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## v1.2 — Smart imports
|
|
86
|
+
|
|
87
|
+
### Dry-run performance estimate
|
|
88
|
+
|
|
89
|
+
After a dry run, display an estimated import time based on average record
|
|
90
|
+
processing speed: "Estimated import time: ~2m30s". Helps users decide whether
|
|
91
|
+
to launch the import now or schedule it for off-peak hours.
|
|
92
|
+
|
|
93
|
+
### Permissions / RBAC
|
|
94
|
+
|
|
95
|
+
Role-based access control for import operations. Allow host apps to restrict
|
|
96
|
+
who can create imports, confirm imports, or access specific targets. Integrate
|
|
97
|
+
with existing authorization frameworks (Pundit, CanCanCan) via a configurable
|
|
98
|
+
policy hook.
|
|
99
|
+
|
|
100
|
+
### Column transformers
|
|
101
|
+
|
|
102
|
+
Built-in transformation pipeline applied per-column before the target's
|
|
103
|
+
`transform` method. Declarative DSL in the target:
|
|
104
|
+
|
|
105
|
+
```ruby
|
|
106
|
+
columns do
|
|
107
|
+
column :email, type: :email, transform: [:strip, :downcase]
|
|
108
|
+
column :phone, type: :string, transform: [:strip, :normalize_phone]
|
|
109
|
+
column :born_on, type: :date, transform: [:parse_date]
|
|
110
|
+
end
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Ships with common transformers (`strip`, `downcase`, `titleize`,
|
|
114
|
+
`normalize_phone`, `parse_date`). Custom transformers via a registry.
|
|
115
|
+
|
|
116
|
+
### Auto-map heuristics
|
|
117
|
+
|
|
118
|
+
Smart column mapping suggestions using tokenized header matching and synonym
|
|
119
|
+
dictionaries. When a CSV has "E-mail Address", auto-suggest mapping to `:email`.
|
|
120
|
+
Built-in synonyms for common patterns (phone → phone_number,
|
|
121
|
+
first name → first_name). Configurable synonym lists per target.
|
|
122
|
+
|
|
18
123
|
---
|
|
19
124
|
|
|
20
|
-
## v2
|
|
125
|
+
## v2.0 — Scale & Automation
|
|
126
|
+
|
|
127
|
+
### Bulk import
|
|
128
|
+
|
|
129
|
+
High-volume import support using `insert_all` / `upsert_all` for batch
|
|
130
|
+
persistence. Bypass per-record `persist` calls when the target opts in,
|
|
131
|
+
enabling 10-100x throughput for simple create/upsert scenarios. Configurable
|
|
132
|
+
batch size, with fallback to per-record mode on conflict.
|
|
133
|
+
|
|
134
|
+
### Update & diff mode
|
|
135
|
+
|
|
136
|
+
Support update (upsert) imports alongside create-only. Given a
|
|
137
|
+
`deduplicate_by` key, detect existing records and show a diff preview:
|
|
138
|
+
new records, changed fields (highlighted), unchanged rows. User confirms
|
|
139
|
+
which changes to apply. Enables recurring data sync workflows.
|
|
140
|
+
|
|
141
|
+
### Resume / retry on failure
|
|
142
|
+
|
|
143
|
+
If an import fails mid-way (timeout, crash, transient error), resume from
|
|
144
|
+
the last successful record instead of restarting from scratch. Track a
|
|
145
|
+
checkpoint index in the report. Critical for large imports (5k+ records)
|
|
146
|
+
where re-processing everything is not acceptable.
|
|
147
|
+
|
|
148
|
+
### API pagination
|
|
149
|
+
|
|
150
|
+
Support paginated API sources. The current API source does a single GET,
|
|
151
|
+
which works for small datasets but not for APIs returning thousands of
|
|
152
|
+
records across multiple pages. Support offset, cursor, and link-header
|
|
153
|
+
pagination strategies via `api_config`:
|
|
154
|
+
|
|
155
|
+
```ruby
|
|
156
|
+
api_config do
|
|
157
|
+
endpoint "https://api.example.com/contacts"
|
|
158
|
+
pagination :cursor, param: "after", root: "data", next_key: "meta.next_cursor"
|
|
159
|
+
end
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### Scheduled imports
|
|
163
|
+
|
|
164
|
+
Recurring imports from API or remote sources on a cron schedule. A target
|
|
165
|
+
declares a schedule, and DataPorter automatically fetches and imports at
|
|
166
|
+
the configured interval. Built on ActiveJob with configurable queue.
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## v3.0 — Platform
|
|
171
|
+
|
|
172
|
+
### Webhooks
|
|
173
|
+
|
|
174
|
+
HTTP callbacks on import lifecycle events (started, completed, failed).
|
|
175
|
+
Configurable per-target with URL, headers, and payload template. Enables
|
|
176
|
+
integration with Slack notifications, CI pipelines, or external dashboards.
|
|
177
|
+
|
|
178
|
+
### External connectors
|
|
179
|
+
|
|
180
|
+
Source plugins beyond local files and HTTP APIs:
|
|
181
|
+
|
|
182
|
+
- **Google Sheets** — OAuth2 + Sheets API, treat a spreadsheet as a source
|
|
183
|
+
- **SFTP** — Poll a remote directory for new files
|
|
184
|
+
- **AWS S3** — Watch a bucket/prefix for uploads
|
|
185
|
+
- **Remote HTTP polling** — Periodically fetch from a paginated API
|
|
186
|
+
|
|
187
|
+
Each connector implements the `Sources::Base` interface. Installed as
|
|
188
|
+
optional companion gems (`data_porter-google_sheets`, `data_porter-s3`).
|
|
189
|
+
|
|
190
|
+
### i18n
|
|
191
|
+
|
|
192
|
+
Full internationalization of all UI strings, error messages, and status
|
|
193
|
+
labels. Ship with English and French translations. Host apps can override
|
|
194
|
+
or add languages via standard Rails I18n.
|
|
195
|
+
|
|
196
|
+
### Dashboard & analytics
|
|
21
197
|
|
|
22
|
-
|
|
23
|
-
-
|
|
24
|
-
- Batch persist (`insert_all` support)
|
|
25
|
-
- Resume / partial retry
|
|
26
|
-
- Scheduled imports (recurring API source)
|
|
27
|
-
- i18n
|
|
28
|
-
- Dashboard stats
|
|
198
|
+
Import statistics dashboard: success rates, average duration, records per
|
|
199
|
+
import, most-used targets, failure trends. Mountable as an admin-only route.
|
data/docs/TARGETS.md
CHANGED
|
@@ -152,9 +152,14 @@ Each param accepts:
|
|
|
152
152
|
| `required` | Boolean | `false` | Validated on import creation, shown with `*` in the form |
|
|
153
153
|
| `label` | String | Humanized name | Display label in the form |
|
|
154
154
|
| `default` | String | `nil` | Pre-filled value in the form |
|
|
155
|
-
| `collection` | Lambda | `nil` | For `:select` type --
|
|
155
|
+
| `collection` | Lambda or Array | `nil` | For `:select` type -- `[[label, value], ...]` |
|
|
156
156
|
|
|
157
|
-
Collection
|
|
157
|
+
Collection accepts both a lambda and a plain array. Use a lambda for dynamic data (evaluated when the form loads, not at boot time):
|
|
158
|
+
|
|
159
|
+
```ruby
|
|
160
|
+
param :hotel_id, type: :select, collection: -> { Hotel.pluck(:name, :id) }
|
|
161
|
+
param :status, type: :select, collection: [%w[Active active], %w[Archived archived]]
|
|
162
|
+
```
|
|
158
163
|
|
|
159
164
|
## Instance Methods
|
|
160
165
|
|
|
@@ -225,3 +230,103 @@ def on_error(record, error, context:)
|
|
|
225
230
|
Sentry.capture_exception(error, extra: { record: record.attributes })
|
|
226
231
|
end
|
|
227
232
|
```
|
|
233
|
+
|
|
234
|
+
## Full example
|
|
235
|
+
|
|
236
|
+
A complete target using most DSL features: multiple sources, import params, JSON root, API config, transform, custom validation, and lifecycle hooks.
|
|
237
|
+
|
|
238
|
+
```ruby
|
|
239
|
+
# frozen_string_literal: true
|
|
240
|
+
|
|
241
|
+
class ContactTarget < DataPorter::Target
|
|
242
|
+
label "Contacts"
|
|
243
|
+
model_name "Contact"
|
|
244
|
+
icon "fas fa-address-book"
|
|
245
|
+
sources :csv, :xlsx, :json, :api
|
|
246
|
+
dry_run_enabled
|
|
247
|
+
|
|
248
|
+
columns do
|
|
249
|
+
column :name, type: :string, required: true
|
|
250
|
+
column :email, type: :email
|
|
251
|
+
column :phone_number, type: :string
|
|
252
|
+
column :address, type: :string
|
|
253
|
+
column :room, type: :string
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
params do
|
|
257
|
+
param :default_room, type: :text, label: "Default room"
|
|
258
|
+
param :import_source, type: :select, label: "Import source",
|
|
259
|
+
collection: [%w[Manual manual], %w[Migration migration], ["Directory sync", "sync"]]
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
json_root "contacts"
|
|
263
|
+
|
|
264
|
+
api_config do
|
|
265
|
+
endpoint "http://localhost:3001/contacts"
|
|
266
|
+
headers({ "Authorization" => "Bearer token" })
|
|
267
|
+
response_root :contacts
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def transform(record)
|
|
271
|
+
apply_default_room(record)
|
|
272
|
+
normalize_phone(record)
|
|
273
|
+
record
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def validate(record)
|
|
277
|
+
validate_email_format(record)
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def persist(record, context:)
|
|
281
|
+
Contact.create!(record.attributes)
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def after_import(results, context:)
|
|
285
|
+
Rails.logger.info("[DataPorter] Contacts: #{results[:created]} created, #{results[:errored]} errors")
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def on_error(record, error, context:)
|
|
289
|
+
Rails.logger.warn("[DataPorter] Line #{record.line_number}: #{error.message}")
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
private
|
|
293
|
+
|
|
294
|
+
def apply_default_room(record)
|
|
295
|
+
return if record.data["room"].present?
|
|
296
|
+
return unless import_params["default_room"].present?
|
|
297
|
+
|
|
298
|
+
record.data["room"] = import_params["default_room"]
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def normalize_phone(record)
|
|
302
|
+
phone = record.data["phone_number"]
|
|
303
|
+
return if phone.blank?
|
|
304
|
+
|
|
305
|
+
record.data["phone_number"] = phone.gsub(/\s/, "")
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def validate_email_format(record)
|
|
309
|
+
email = record.data["email"]
|
|
310
|
+
return if email.blank?
|
|
311
|
+
return if email.match?(/\A[^@\s]+@[^@\s]+\z/)
|
|
312
|
+
|
|
313
|
+
record.add_error("Invalid email format: #{email}")
|
|
314
|
+
end
|
|
315
|
+
end
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
This target exercises:
|
|
319
|
+
|
|
320
|
+
| Feature | DSL / Hook | Effect |
|
|
321
|
+
|---|---|---|
|
|
322
|
+
| 4 sources | `sources :csv, :xlsx, :json, :api` | All source types available |
|
|
323
|
+
| Typed columns | `type: :string, :email` | Built-in validation |
|
|
324
|
+
| Required field | `required: true` on `name` | Rows without name get "missing" status |
|
|
325
|
+
| Dry run | `dry_run_enabled` | Dry run button in preview |
|
|
326
|
+
| Import params | `param :default_room`, `param :import_source` | Dynamic form fields |
|
|
327
|
+
| JSON root | `json_root "contacts"` | Extracts from `{"contacts": [...]}` |
|
|
328
|
+
| API config | `endpoint`, `headers`, `response_root` | Authenticated API fetch |
|
|
329
|
+
| Transform | `apply_default_room`, `normalize_phone` | Fill blanks, strip whitespace |
|
|
330
|
+
| Validate | `validate_email_format` | Custom error on invalid email |
|
|
331
|
+
| After import | Logs summary | Post-import hook |
|
|
332
|
+
| On error | Logs failed line | Per-record error hook |
|
|
@@ -33,6 +33,7 @@ module DataPorter
|
|
|
33
33
|
div(class: "dp-results__cards") do
|
|
34
34
|
stat("dp-results__stat--success", @report.imported_count, "Imported")
|
|
35
35
|
stat("dp-results__stat--error", @report.errored_count, "Errors")
|
|
36
|
+
stat("dp-results__stat--warning", skipped_count, "Skipped") if skipped_count.positive?
|
|
36
37
|
end
|
|
37
38
|
end
|
|
38
39
|
|
|
@@ -52,7 +53,11 @@ module DataPorter
|
|
|
52
53
|
end
|
|
53
54
|
|
|
54
55
|
def success?
|
|
55
|
-
@report.errored_count.zero?
|
|
56
|
+
@report.errored_count.zero? && skipped_count.zero?
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def skipped_count
|
|
60
|
+
@report.missing_count.to_i + @report.partial_count.to_i
|
|
56
61
|
end
|
|
57
62
|
end
|
|
58
63
|
end
|
|
@@ -10,7 +10,10 @@ module DataPorter
|
|
|
10
10
|
:preview_limit,
|
|
11
11
|
:enabled_sources,
|
|
12
12
|
:scope,
|
|
13
|
-
:purge_after
|
|
13
|
+
:purge_after,
|
|
14
|
+
:max_file_size,
|
|
15
|
+
:max_records,
|
|
16
|
+
:transaction_mode
|
|
14
17
|
|
|
15
18
|
def initialize
|
|
16
19
|
@parent_controller = "ApplicationController"
|
|
@@ -22,6 +25,9 @@ module DataPorter
|
|
|
22
25
|
@enabled_sources = %i[csv json api xlsx]
|
|
23
26
|
@scope = nil
|
|
24
27
|
@purge_after = 60.days
|
|
28
|
+
@max_file_size = 10.megabytes
|
|
29
|
+
@max_records = 10_000
|
|
30
|
+
@transaction_mode = :per_record
|
|
25
31
|
end
|
|
26
32
|
end
|
|
27
33
|
end
|
|
@@ -6,6 +6,14 @@ module DataPorter
|
|
|
6
6
|
private
|
|
7
7
|
|
|
8
8
|
def import_records
|
|
9
|
+
if DataPorter.configuration.transaction_mode == :all
|
|
10
|
+
import_all_or_nothing
|
|
11
|
+
else
|
|
12
|
+
import_per_record
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def import_per_record
|
|
9
17
|
importable = @data_import.importable_records
|
|
10
18
|
context = build_context
|
|
11
19
|
results = { created: 0, errored: 0 }
|
|
@@ -16,6 +24,25 @@ module DataPorter
|
|
|
16
24
|
broadcast_progress(index + 1, total)
|
|
17
25
|
end
|
|
18
26
|
|
|
27
|
+
finalize_import(results)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def import_all_or_nothing
|
|
31
|
+
importable = @data_import.importable_records
|
|
32
|
+
context = build_context
|
|
33
|
+
total = importable.size
|
|
34
|
+
|
|
35
|
+
ActiveRecord::Base.transaction do
|
|
36
|
+
importable.each_with_index do |record, index|
|
|
37
|
+
@target.persist(record, context: context)
|
|
38
|
+
broadcast_progress(index + 1, total)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
finalize_import(created: total, errored: 0)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def finalize_import(results)
|
|
19
46
|
@data_import.update!(status: :completed)
|
|
20
47
|
@broadcaster.success
|
|
21
48
|
results
|
|
@@ -8,6 +8,7 @@ module DataPorter
|
|
|
8
8
|
def build_records
|
|
9
9
|
source = build_source
|
|
10
10
|
raw_rows = source.fetch
|
|
11
|
+
enforce_max_records!(raw_rows.size)
|
|
11
12
|
columns = @target.class._columns || []
|
|
12
13
|
validator = RecordValidator.new(columns)
|
|
13
14
|
|
|
@@ -16,6 +17,14 @@ module DataPorter
|
|
|
16
17
|
end
|
|
17
18
|
end
|
|
18
19
|
|
|
20
|
+
def enforce_max_records!(count)
|
|
21
|
+
max = DataPorter.configuration.max_records
|
|
22
|
+
return unless max
|
|
23
|
+
return if count <= max
|
|
24
|
+
|
|
25
|
+
raise Error, "File contains #{count} records, exceeds maximum of #{max}"
|
|
26
|
+
end
|
|
27
|
+
|
|
19
28
|
def build_record(row, index, columns, validator)
|
|
20
29
|
record = StoreModels::ImportRecord.new(
|
|
21
30
|
line_number: index + 1,
|
data/lib/data_porter/registry.rb
CHANGED
|
@@ -37,6 +37,12 @@ module DataPorter
|
|
|
37
37
|
|
|
38
38
|
private
|
|
39
39
|
|
|
40
|
+
def resolve_collection(collection)
|
|
41
|
+
return unless collection
|
|
42
|
+
|
|
43
|
+
collection.respond_to?(:call) ? collection.call : collection
|
|
44
|
+
end
|
|
45
|
+
|
|
40
46
|
def serialize_params(params)
|
|
41
47
|
return [] unless params
|
|
42
48
|
|
|
@@ -50,7 +56,7 @@ module DataPorter
|
|
|
50
56
|
required: param.required,
|
|
51
57
|
label: param.label,
|
|
52
58
|
default: param.default,
|
|
53
|
-
collection: param.collection
|
|
59
|
+
collection: resolve_collection(param.collection)
|
|
54
60
|
}.compact
|
|
55
61
|
end
|
|
56
62
|
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module DataPorter
|
|
6
|
+
class RejectsCsvBuilder
|
|
7
|
+
def initialize(columns, records)
|
|
8
|
+
@columns = columns
|
|
9
|
+
@records = records
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def generate
|
|
13
|
+
CSV.generate do |csv|
|
|
14
|
+
csv << header_row
|
|
15
|
+
rejected_records.each { |r| csv << record_row(r) }
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def header_row
|
|
22
|
+
["line"] + @columns.map { |c| c.name.to_s } + ["errors"]
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def rejected_records
|
|
26
|
+
@records.reject(&:complete?)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def record_row(record)
|
|
30
|
+
values = @columns.map { |c| record.data[c.name.to_s] }
|
|
31
|
+
errors = record.errors_list.map(&:message).join("; ")
|
|
32
|
+
[record.line_number] + values + [errors]
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -45,6 +45,12 @@ module DataPorter
|
|
|
45
45
|
def auto_map(row)
|
|
46
46
|
row.to_h.transform_keys { |k| k.parameterize(separator: "_").to_sym }
|
|
47
47
|
end
|
|
48
|
+
|
|
49
|
+
def fallback_headers(raw_headers)
|
|
50
|
+
return raw_headers if raw_headers.any?(&:present?)
|
|
51
|
+
|
|
52
|
+
raw_headers.each_with_index.map { |_, i| "col_#{i + 1}" }
|
|
53
|
+
end
|
|
48
54
|
end
|
|
49
55
|
end
|
|
50
56
|
end
|
|
@@ -5,6 +5,8 @@ require "csv"
|
|
|
5
5
|
module DataPorter
|
|
6
6
|
module Sources
|
|
7
7
|
class Csv < Base
|
|
8
|
+
SEPARATORS = [",", ";", "\t"].freeze
|
|
9
|
+
|
|
8
10
|
def initialize(data_import, content: nil)
|
|
9
11
|
super(data_import)
|
|
10
12
|
@content = content
|
|
@@ -12,7 +14,8 @@ module DataPorter
|
|
|
12
14
|
|
|
13
15
|
def headers
|
|
14
16
|
first_line = csv_content.lines.first
|
|
15
|
-
::CSV.parse_line(first_line, **extra_options).map(&:to_s)
|
|
17
|
+
raw = ::CSV.parse_line(first_line, **extra_options).map(&:to_s)
|
|
18
|
+
fallback_headers(raw)
|
|
16
19
|
end
|
|
17
20
|
|
|
18
21
|
def fetch
|
|
@@ -26,11 +29,28 @@ module DataPorter
|
|
|
26
29
|
private
|
|
27
30
|
|
|
28
31
|
def csv_content
|
|
29
|
-
@content || download_file
|
|
32
|
+
@csv_content ||= ensure_utf8(@content || download_file)
|
|
30
33
|
end
|
|
31
34
|
|
|
32
35
|
def download_file
|
|
33
|
-
@data_import.file.download
|
|
36
|
+
@data_import.file.download
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def ensure_utf8(raw)
|
|
40
|
+
raw = strip_bom(raw)
|
|
41
|
+
return raw if raw.encoding == Encoding::UTF_8 && raw.valid_encoding?
|
|
42
|
+
|
|
43
|
+
raw.force_encoding("UTF-8")
|
|
44
|
+
return raw if raw.valid_encoding?
|
|
45
|
+
|
|
46
|
+
raw.encode("UTF-8", "ISO-8859-1")
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def strip_bom(raw)
|
|
50
|
+
bytes = raw.b
|
|
51
|
+
return raw unless bytes.start_with?("\xEF\xBB\xBF".b)
|
|
52
|
+
|
|
53
|
+
bytes[3..].force_encoding("UTF-8")
|
|
34
54
|
end
|
|
35
55
|
|
|
36
56
|
def csv_options
|
|
@@ -39,9 +59,16 @@ module DataPorter
|
|
|
39
59
|
|
|
40
60
|
def extra_options
|
|
41
61
|
config = @data_import.config
|
|
42
|
-
return {} unless config.is_a?(Hash)
|
|
62
|
+
return { col_sep: detect_separator } unless config.is_a?(Hash)
|
|
63
|
+
|
|
64
|
+
opts = config.symbolize_keys.slice(:col_sep, :encoding)
|
|
65
|
+
opts[:col_sep] ||= detect_separator
|
|
66
|
+
opts
|
|
67
|
+
end
|
|
43
68
|
|
|
44
|
-
|
|
69
|
+
def detect_separator
|
|
70
|
+
first_line = csv_content.lines.first.to_s
|
|
71
|
+
SEPARATORS.max_by { |sep| first_line.count(sep) }
|
|
45
72
|
end
|
|
46
73
|
end
|
|
47
74
|
end
|
data/lib/data_porter/version.rb
CHANGED
data/lib/data_porter.rb
CHANGED
|
@@ -18,6 +18,7 @@ require_relative "data_porter/sources"
|
|
|
18
18
|
require_relative "data_porter/record_validator"
|
|
19
19
|
require_relative "data_porter/broadcaster"
|
|
20
20
|
require_relative "data_porter/orchestrator"
|
|
21
|
+
require_relative "data_porter/rejects_csv_builder"
|
|
21
22
|
require_relative "data_porter/components"
|
|
22
23
|
require_relative "data_porter/engine"
|
|
23
24
|
|
|
@@ -10,7 +10,7 @@ class CreateDataPorterImports < ActiveRecord::Migration[<%= ActiveRecord::Migrat
|
|
|
10
10
|
t.jsonb :report, null: false, default: {}
|
|
11
11
|
t.jsonb :config, null: false, default: {}
|
|
12
12
|
|
|
13
|
-
t.references :user, polymorphic: true
|
|
13
|
+
t.references :user, polymorphic: true
|
|
14
14
|
|
|
15
15
|
t.timestamps
|
|
16
16
|
end
|
|
@@ -15,11 +15,9 @@ DataPorter.configure do |config|
|
|
|
15
15
|
# config.cable_channel_prefix = "data_porter"
|
|
16
16
|
|
|
17
17
|
# Context builder: inject business data into targets.
|
|
18
|
-
# Receives the
|
|
19
|
-
# config.context_builder = ->(
|
|
20
|
-
#
|
|
21
|
-
# user: controller.current_user
|
|
22
|
-
# )
|
|
18
|
+
# Receives the DataImport record.
|
|
19
|
+
# config.context_builder = ->(data_import) {
|
|
20
|
+
# { user: data_import.user }
|
|
23
21
|
# }
|
|
24
22
|
|
|
25
23
|
# Maximum number of records displayed in preview.
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: data_porter
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 1.0.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Seryl Lounis
|
|
@@ -174,6 +174,7 @@ files:
|
|
|
174
174
|
- lib/data_porter/orchestrator/record_builder.rb
|
|
175
175
|
- lib/data_porter/record_validator.rb
|
|
176
176
|
- lib/data_porter/registry.rb
|
|
177
|
+
- lib/data_porter/rejects_csv_builder.rb
|
|
177
178
|
- lib/data_porter/sources.rb
|
|
178
179
|
- lib/data_porter/sources/api.rb
|
|
179
180
|
- lib/data_porter/sources/base.rb
|
|
@@ -198,8 +199,11 @@ homepage: https://github.com/SerylLns/data_porter
|
|
|
198
199
|
licenses:
|
|
199
200
|
- MIT
|
|
200
201
|
metadata:
|
|
202
|
+
homepage_uri: https://github.com/SerylLns/data_porter
|
|
201
203
|
source_code_uri: https://github.com/SerylLns/data_porter
|
|
202
204
|
changelog_uri: https://github.com/SerylLns/data_porter/blob/main/CHANGELOG.md
|
|
205
|
+
documentation_uri: https://github.com/SerylLns/data_porter#readme
|
|
206
|
+
bug_tracker_uri: https://github.com/SerylLns/data_porter/issues
|
|
203
207
|
rubygems_mcp_server_uri: https://rubygems.org/gems/data_porter
|
|
204
208
|
rubygems_mfa_required: 'true'
|
|
205
209
|
rdoc_options: []
|