data_porter 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 46a9ae914232272194aaa961ed2619a423e2fcbbe9b5dc4dbdb2762bbcdb7129
4
- data.tar.gz: 4de6b7f13955136df74552b6788792b3f9e8ba0b201e8b0fa25000e28294e13b
3
+ metadata.gz: '0439ba2634bbf47987015524141c3f961b54c362d234dcaeecc7811895c4ec34'
4
+ data.tar.gz: b4b2daf3519743518b2119b75863ca83114db258599fa2026e1b7a2ae844668e
5
5
  SHA512:
6
- metadata.gz: 0af51f459c999859b1ef723787998134e96a371bfe25c4e99a086a38cd787e35f4c8b2e58331b5594a1baeb7bc0c8220d0ccd5a9c2738f0cbabc1cc69021a754
7
- data.tar.gz: 735f5bc4d814f7e641fd8e2d9498487ddf75a5ce784f40439fbcfcdd7a995b131e243538dd261ea5d5b4494148a0035b83b7c5bac924de2f9279f2a019dd6af5
6
+ metadata.gz: b2ddc41b18ab043cbabb97518594ec27a0d1ec4ea0f4f1e542a6dde664c6914d6d1b61c609e6436d9a9bd741298c31766c623886eb3964a417b9a5ebec532b44
7
+ data.tar.gz: 2fe7cc8d5910005f86b9198db007afc406bb186d933d0ff1a5e4c147fab191ade9386550fdbb5b85f4756c6795da979f81da5b4681782c027f7901763f1dd972
data/CHANGELOG.md CHANGED
@@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [2.0.0] - 2026-02-19
9
+
10
+ ### Breaking
11
+
12
+ - **Default parent controller changed to `ActionController::Base`** -- Engine controllers no longer inherit from `ApplicationController` by default, avoiding conflicts with authorization gems (Pundit, CanCanCan, etc.). Set `config.parent_controller = "ApplicationController"` to restore the previous behavior
13
+ - **Engine routes mounted at root** -- `resources :imports` now uses `path: "/"` so the mount point controls the full URL (e.g. `mount DataPorter::Engine, at: "/imports"` gives `/imports`, not `/imports/imports`)
14
+
15
+ ### Added
16
+
17
+ - **Back to mapping** -- `back_to_mapping` action resets a previewing import to the mapping step, preserving file headers and column mapping for re-mapping
18
+ - **Saved mapping persistence** -- Mapping form restores previously saved column mapping instead of resetting to defaults
19
+
20
+ ### Changed
21
+
22
+ - 423 RSpec examples (up from 413), 0 failures
23
+
8
24
  ## [1.1.0] - 2026-02-08
9
25
 
10
26
  ### Added
data/README.md CHANGED
@@ -1,23 +1,10 @@
1
1
  # DataPorter
2
2
 
3
- > [!CAUTION]
4
- > **This gem is under active development and not yet production-ready.**
5
- > APIs and features may change without notice. Use at your own risk.
6
-
7
3
  A mountable Rails engine for data import workflows: **Upload**, **Map**, **Preview**, **Import**.
8
4
 
9
5
  Supports CSV, JSON, XLSX, and API sources with a declarative DSL for defining import targets. Business-agnostic by design -- all domain logic lives in your host app.
10
6
 
11
- <table>
12
- <tr>
13
- <td><img src="docs/screenshots/index-with-previewing.jpg" width="400" alt="Import list with status badges" /></td>
14
- <td><img src="docs/screenshots/modal-new-import.jpg" width="400" alt="New import modal with dropzone" /></td>
15
- </tr>
16
- <tr>
17
- <td><img src="docs/screenshots/mapping.jpg" width="400" alt="Interactive column mapping with templates" /></td>
18
- <td><img src="docs/screenshots/preview.jpg" width="400" alt="Preview with summary cards and data table" /></td>
19
- </tr>
20
- </table>
7
+ ![DataPorter demo](docs/screenshots/demo_fast.gif)
21
8
 
22
9
  ## Features
23
10
 
@@ -142,7 +129,7 @@ pending -> parsing -> previewing -> importing -> completed
142
129
  git clone https://github.com/SerylLns/data_porter.git
143
130
  cd data_porter
144
131
  bin/setup
145
- bundle exec rspec # 405 specs
132
+ bundle exec rspec # 413 specs
146
133
  bundle exec rubocop # 0 offenses
147
134
  ```
148
135
 
@@ -12,7 +12,7 @@ module DataPorter
12
12
  columns = target._columns || []
13
13
  @file_headers = @import.config["file_headers"] || []
14
14
  @target_columns = columns.map { |c| [c.label, c.name.to_s, c.required] }
15
- @default_mapping = (target._csv_mappings || {}).transform_values(&:to_s)
15
+ @default_mapping = saved_or_default_mapping(target)
16
16
  @templates = load_templates
17
17
  end
18
18
 
@@ -23,6 +23,13 @@ module DataPorter
23
23
  scope.for_target(@import.target_key)
24
24
  end
25
25
 
26
+ def saved_or_default_mapping(target)
27
+ saved = @import.config&.dig("column_mapping")
28
+ return saved if saved.present?
29
+
30
+ (target._csv_mappings || {}).transform_values(&:to_s)
31
+ end
32
+
26
33
  def save_column_mapping
27
34
  merged = (@import.config || {}).merge("column_mapping" => permitted_column_mapping)
28
35
  @import.update!(config: merged, status: :pending)
@@ -9,7 +9,8 @@ module DataPorter
9
9
 
10
10
  layout "data_porter/application"
11
11
 
12
- before_action :set_import, only: %i[show parse confirm cancel dry_run update_mapping status export_rejects destroy]
12
+ before_action :set_import, only: %i[show parse confirm cancel dry_run update_mapping
13
+ status export_rejects destroy back_to_mapping]
13
14
  before_action :load_targets, only: %i[index new create]
14
15
 
15
16
  def index
@@ -63,6 +64,11 @@ module DataPorter
63
64
  redirect_to imports_path
64
65
  end
65
66
 
67
+ def back_to_mapping
68
+ @import.reset_to_mapping!
69
+ redirect_to import_path(@import)
70
+ end
71
+
66
72
  def dry_run
67
73
  @import.update!(status: :pending)
68
74
  DataPorter::DryRunJob.perform_later(@import.id)
@@ -53,6 +53,15 @@ module DataPorter
53
53
  records.group_by(&:status).transform_values(&:count)
54
54
  end
55
55
 
56
+ def reset_to_mapping!
57
+ update!(
58
+ status: :mapping,
59
+ records: [],
60
+ report: StoreModels::Report.new,
61
+ config: (config || {}).except("progress")
62
+ )
63
+ end
64
+
56
65
  def file_based?
57
66
  %w[csv xlsx].include?(source_type)
58
67
  end
@@ -65,6 +65,10 @@
65
65
  Dry Run
66
66
  <% end %>
67
67
  <% end %>
68
+ <% if @import.file_based? %>
69
+ <%= button_to "Back to Mapping", back_to_mapping_import_path(@import),
70
+ method: :post, class: "dp-btn dp-btn--secondary" %>
71
+ <% end %>
68
72
  <%= button_to "Cancel", cancel_import_path(@import),
69
73
  method: :post, class: "dp-btn dp-btn--danger" %>
70
74
  </div>
data/bookmarklet.md ADDED
@@ -0,0 +1,217 @@
1
+ ---
2
+ title: "Building a Product Clipper Bookmarklet with Shadow DOM and Structured Data"
3
+ published: false
4
+ tags: javascript, webdev, architecture, bookmarklet
5
+ ---
6
+
7
+ # Building a Product Clipper Bookmarklet with Shadow DOM and Structured Data
8
+
9
+ We’re in 2008.
10
+
11
+ The iPhone 3G just dropped. Facebook crosses 100 million users. Bitcoin quietly appears on a cryptography mailing list. The web is shifting.
12
+
13
+ And while the world is obsessing over apps and platforms… we’re going back to something beautifully simple.
14
+
15
+ **A bookmarklet.**
16
+
17
+ No extension store review.
18
+ No packaging.
19
+ No deployment delays.
20
+
21
+ Just a small piece of JavaScript living inside a browser bookmark — capable of injecting a clean, isolated UI into any e-commerce page, detecting product data automatically, and sending it to your backend.
22
+
23
+ One click. Any product page. Instant extraction.
24
+
25
+ Here’s how the architecture works.
26
+
27
+ ---
28
+
29
+ ## High-Level Architecture
30
+
31
+ The clipper follows a simple execution model:
32
+
33
+ 1. The bookmarklet injects a remote script into the current page.
34
+ 2. The script scans the DOM using multiple detection strategies.
35
+ 3. A sidebar panel renders inside a Shadow DOM (fully isolated).
36
+ 4. Detected products are visually highlighted.
37
+ 5. The user selects items to import.
38
+ 6. Selected data is sent to the backend for processing.
39
+
40
+ No browser extension. No build complexity. Just runtime execution.
41
+
42
+ ---
43
+
44
+ ## Why a Bookmarklet?
45
+
46
+ For a user-triggered action ("Import this page"), a bookmarklet offers strong trade-offs:
47
+
48
+ | | Bookmarklet | Browser Extension |
49
+ | ----------- | --------------------- | -------------------------------- |
50
+ | Install | Drag a link | Store review required |
51
+ | Updates | Instant (server-side) | Requires store re-approval |
52
+ | Permissions | None | Explicit permission prompts |
53
+ | Maintenance | Single hosted file | Multi-file manifest architecture |
54
+
55
+ If your tool runs only when explicitly triggered, a bookmarklet is often the leanest solution.
56
+
57
+ ---
58
+
59
+ ## Product Detection Strategy
60
+
61
+ No single detection method works across all e-commerce sites.
62
+
63
+ A robust clipper layers multiple strategies, ordered by confidence:
64
+
65
+ - **Structured Data (JSON-LD)**
66
+ Many sites expose `schema.org/Product` data for SEO.
67
+ - **Microdata attributes**
68
+ - **OpenGraph metadata**
69
+ - **Heuristic DOM scanning**
70
+ - **URL pattern matching (fallback)**
71
+
72
+ The key principle is:
73
+
74
+ > Prefer high-confidence structured data, then gracefully degrade.
75
+
76
+ ### Example: Extracting JSON-LD Products
77
+
78
+ ```javascript
79
+ function extractFromJsonLd() {
80
+ const scripts = document.querySelectorAll(
81
+ 'script[type="application/ld+json"]',
82
+ );
83
+ const products = [];
84
+
85
+ scripts.forEach((script) => {
86
+ try {
87
+ const data = JSON.parse(script.textContent);
88
+ // Traverse recursively and collect Product objects
89
+ collectProducts(data, products);
90
+ } catch (e) {}
91
+ });
92
+
93
+ return products;
94
+ }
95
+ ```
96
+
97
+ In practice, you normalize URLs, merge duplicates, and score sources by confidence before presenting results.
98
+
99
+ The goal is reliability, not perfection.
100
+
101
+ ---
102
+
103
+ ## Shadow DOM: Isolation Is Non-Negotiable
104
+
105
+ Injecting UI into arbitrary websites is dangerous.
106
+
107
+ CSS resets, `!important` rules, framework styles — they will break your interface.
108
+
109
+ Shadow DOM solves this by creating an isolated rendering tree:
110
+
111
+ ```javascript
112
+ const host = document.createElement("div");
113
+ document.body.appendChild(host);
114
+
115
+ const shadow = host.attachShadow({ mode: "open" });
116
+ shadow.innerHTML = `
117
+ <style>
118
+ :host { all: initial; font-family: system-ui; }
119
+ .panel { position: fixed; right: 0; top: 0; }
120
+ </style>
121
+ <div class="panel">Clipper UI</div>
122
+ `;
123
+ ```
124
+
125
+ Key principle:
126
+
127
+ > Your UI must behave identically on Shopify, Magento, custom React apps, or legacy PHP pages.
128
+
129
+ Isolation is mandatory.
130
+
131
+ ---
132
+
133
+ ## Visual Feedback & Interaction Control
134
+
135
+ When products are detected, highlighting them directly on the page improves user confidence.
136
+
137
+ Because many e-commerce sites attach their own click handlers (analytics, routing, SPA navigation), event handling must be carefully managed.
138
+
139
+ Best practice:
140
+
141
+ - Use capture phase listeners
142
+ - Prevent unintended navigation
143
+ - Clean up all listeners and styles on teardown
144
+
145
+ A clipper should leave **zero traces** after closing.
146
+
147
+ ---
148
+
149
+ ## Backend Communication
150
+
151
+ Once items are selected, the clipper sends a structured payload to your backend.
152
+
153
+ The backend typically:
154
+
155
+ - Normalizes URLs
156
+ - Deduplicates products
157
+ - Associates them with a source domain
158
+ - Triggers downstream processing (price tracking, enrichment, etc.)
159
+
160
+ Security considerations:
161
+
162
+ - Use scoped, short-lived API tokens
163
+ - Never expose sensitive credentials
164
+ - Sanitize all extracted DOM content before rendering
165
+
166
+ ---
167
+
168
+ ## Limitations
169
+
170
+ A bookmarklet runs inside the page’s context. That comes with constraints.
171
+
172
+ ### Content Security Policy (CSP)
173
+
174
+ Strict `script-src` headers can block injected scripts entirely.
175
+ There is no client-side workaround. A browser extension is required in those cases.
176
+
177
+ ### Single Page Applications (SPAs)
178
+
179
+ React/Next.js apps often load content asynchronously.
180
+ Mutation observers or delayed scans improve detection reliability.
181
+
182
+ ### Bot Protection (Backend)
183
+
184
+ While the bookmarklet runs in the user’s real browser session, backend scraping of submitted URLs may face anti-bot systems.
185
+ That is a server-side concern.
186
+
187
+ ---
188
+
189
+ ## Legal & Ethical Considerations
190
+
191
+ If you build a commercial tool around product extraction:
192
+
193
+ - Only collect publicly visible data
194
+ - Do not bypass authentication or CAPTCHAs
195
+ - Respect rate limits
196
+ - Be transparent about data usage
197
+ - Review relevant laws (CFAA, GDPR, local regulations)
198
+
199
+ User-initiated clipping is typically lower risk than automated crawling, but not risk-free.
200
+
201
+ ---
202
+
203
+ ## Lessons Learned
204
+
205
+ 1. **Isolation first.** Shadow DOM prevents 90% of UI conflicts.
206
+ 2. **Layered detection beats single heuristics.**
207
+ 3. **Keep it framework-free.** Dependencies increase fragility.
208
+ 4. **Design for hostile environments.** You do not control the host page.
209
+ 5. **Simplicity wins.** A single hosted file can outperform complex extension architectures.
210
+
211
+ ---
212
+
213
+ A bookmarklet is not flashy.
214
+
215
+ But when designed correctly, it becomes a powerful bridge between arbitrary web pages and your product.
216
+
217
+ Sometimes the most effective architecture is the one that avoids complexity entirely.
data/config/routes.rb CHANGED
@@ -1,11 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  DataPorter::Engine.routes.draw do
4
- resources :imports, only: %i[index new create show destroy] do
4
+ resources :imports, path: "/", only: %i[index new create show destroy] do
5
5
  member do
6
6
  post :parse
7
7
  post :confirm
8
8
  post :cancel
9
+ post :back_to_mapping
9
10
  post :dry_run
10
11
  patch :update_mapping
11
12
  get :status
@@ -13,5 +14,5 @@ DataPorter::Engine.routes.draw do
13
14
  end
14
15
  end
15
16
 
16
- resources :mapping_templates, except: :show
17
+ resources :mapping_templates
17
18
  end
@@ -16,8 +16,8 @@ module DataPorter
16
16
  :transaction_mode
17
17
 
18
18
  def initialize
19
- @parent_controller = "ApplicationController"
20
- @queue_name = :imports
19
+ @parent_controller = "ActionController::Base"
20
+ @queue_name = :default
21
21
  @storage_service = :local
22
22
  @cable_channel_prefix = "data_porter"
23
23
  @context_builder = nil
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DataPorter
4
- VERSION = "1.1.0"
4
+ VERSION = "2.0.0"
5
5
  end
@@ -2,11 +2,12 @@
2
2
 
3
3
  DataPorter.configure do |config|
4
4
  # Parent controller for the engine's controllers to inherit from.
5
- # This controls authentication, layouts, and helpers.
5
+ # Defaults to ActionController::Base. Set to "ApplicationController" to inherit
6
+ # authentication, layouts, and helpers from your app.
6
7
  # config.parent_controller = "ApplicationController"
7
8
 
8
9
  # ActiveJob queue name for import jobs.
9
- # config.queue_name = :imports
10
+ # config.queue_name = :default
10
11
 
11
12
  # ActiveStorage service for uploaded files.
12
13
  # config.storage_service = :local
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_porter
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seryl Lounis
@@ -139,6 +139,7 @@ files:
139
139
  - app/views/data_porter/mapping_templates/index.html.erb
140
140
  - app/views/data_porter/mapping_templates/new.html.erb
141
141
  - app/views/layouts/data_porter/application.html.erb
142
+ - bookmarklet.md
142
143
  - config/routes.rb
143
144
  - lib/data_porter.rb
144
145
  - lib/data_porter/broadcaster.rb