data_shifter 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.husky/pre-commit +4 -0
- data/.lintstagedrc +3 -0
- data/LICENSE.txt +21 -0
- data/README.md +256 -0
- data/Rakefile +18 -0
- data/lib/data_shifter/internal/env.rb +38 -0
- data/lib/data_shifter/internal/output.rb +150 -0
- data/lib/data_shifter/internal/progress_bar.rb +29 -0
- data/lib/data_shifter/internal/record_utils.rb +38 -0
- data/lib/data_shifter/internal/signal_handler.rb +37 -0
- data/lib/data_shifter/railtie.rb +77 -0
- data/lib/data_shifter/rubocop.rb +4 -0
- data/lib/data_shifter/shift.rb +373 -0
- data/lib/data_shifter/spec_helper.rb +75 -0
- data/lib/data_shifter/version.rb +5 -0
- data/lib/data_shifter.rb +5 -0
- data/lib/generators/data_shift_generator.rb +132 -0
- data/lib/rubocop/cop/data_shifter/skip_transaction_guard_dry_run.rb +55 -0
- metadata +139 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 6e4e5f5aa36cfac3275fcb493a2555e6f093873c310395e92d4c7c64c42bb63b
|
|
4
|
+
data.tar.gz: d7c3e9a682d237887960a0bb3946e73a19bd913709ffb7c069a17a89e45f876a
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 74d1ab829e7d3a695d934f6d624fa6bdad25b7bf13ee11d680ecd90ab9ccfbfa8df33eadf9f3239193a246a183db4604691747db7ae5ea12c4548afed547645a
|
|
7
|
+
data.tar.gz: c70d3e6be2982dc83501349aeee74ae2e3dd476fd2315f46a76b4e2b52e77cfa6f7c1042a7c4f613460880d07382731b2dd24db41d9e85991b54075d6cf76fa0
|
data/.husky/pre-commit
ADDED
data/.lintstagedrc
ADDED
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# DataShifter
|
|
2
|
+
|
|
3
|
+
Rake-backed data migrations (“shifts”) for Rails apps, with **dry run by default**, progress output, and a consistent summary. Define shift classes in `lib/data_shifts/*.rb`; run them as `rake data:shift:<task_name>`.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
# Gemfile
|
|
9
|
+
gem "data_shifter"
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
bundle install
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
No extra setup in a Rails app: the railtie registers the generator and defines rake tasks by scanning `lib/data_shifts/*.rb`.
|
|
17
|
+
|
|
18
|
+
## Quickstart
|
|
19
|
+
|
|
20
|
+
Generate a shift (optionally scoped to a model):
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
bin/rails generate data_shift backfill_foo
|
|
24
|
+
bin/rails generate data_shift backfill_users --model=User
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Add your logic to the generated file in `lib/data_shifts/`.
|
|
28
|
+
|
|
29
|
+
Run it:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
rake data:shift:backfill_foo
|
|
33
|
+
COMMIT=1 rake data:shift:backfill_foo
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## How shift files map to rake tasks
|
|
37
|
+
|
|
38
|
+
DataShifter defines one rake task per file in `lib/data_shifts/*.rb`.
|
|
39
|
+
|
|
40
|
+
- **Task name**: derived from the filename with any leading digits removed.
|
|
41
|
+
- `20260201120000_backfill_foo.rb` → `data:shift:backfill_foo` (leading `<digits>_` prefix is stripped)
|
|
42
|
+
- `backfill_foo.rb` → `data:shift:backfill_foo`
|
|
43
|
+
- **Class name**: task name camelized, inside the `DataShifts` module.
|
|
44
|
+
- `backfill_foo` → `DataShifts::BackfillFoo`
|
|
45
|
+
|
|
46
|
+
Shift files are **required only when the task runs** (tasks are defined up front; classes load lazily).
|
|
47
|
+
The `description "..."` line is extracted from the file and used for `rake -T` output without loading the shift class.
|
|
48
|
+
|
|
49
|
+
## Defining a shift
|
|
50
|
+
|
|
51
|
+
Typical shifts implement:
|
|
52
|
+
|
|
53
|
+
- **`collection`**: an `ActiveRecord::Relation` (uses `find_each`) or an `Array`/Enumerable
|
|
54
|
+
- **`process_record(record)`**: applies the change for one record
|
|
55
|
+
|
|
56
|
+
```ruby
|
|
57
|
+
module DataShifts
|
|
58
|
+
class BackfillCanceledById < DataShifter::Shift
|
|
59
|
+
description "Backfill canceled_by_id"
|
|
60
|
+
|
|
61
|
+
def collection
|
|
62
|
+
Bar.where(canceled_by_id: nil).where.not(canceled_at: nil)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def process_record(bar)
|
|
66
|
+
bar.update!(canceled_by_id: bar.company.primary_contact_id)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Dry run vs commit
|
|
73
|
+
|
|
74
|
+
Shifts run in **dry run** mode by default. In the automatic transaction modes (`transaction :single` / `true`, and `transaction :per_record`), DB changes are rolled back automatically.
|
|
75
|
+
|
|
76
|
+
- **Dry run (default)**: `rake data:shift:backfill_foo`
|
|
77
|
+
- **Commit**: `COMMIT=1 rake data:shift:backfill_foo`
|
|
78
|
+
- (`COMMIT=true` or `DRY_RUN=false` also commit)
|
|
79
|
+
|
|
80
|
+
Non-DB side effects (API calls, emails, enqueued jobs, etc.) obviously cannot be automatically rolled back, so guard them with e.g. `return if dry_run?`.
|
|
81
|
+
|
|
82
|
+
## Transaction modes
|
|
83
|
+
|
|
84
|
+
Set the transaction mode at the class level:
|
|
85
|
+
|
|
86
|
+
- **`transaction :single` / `transaction true` (default)**: one DB transaction for the entire run; dry run rolls back at the end; a record error aborts the run.
|
|
87
|
+
- **`transaction :per_record`**: in commit mode, each record runs in its own transaction (errors are collected and the run continues); in dry run, the run is wrapped in a single rollback transaction.
|
|
88
|
+
- **`transaction false` / `transaction :none`**: CAUTION: NOT RECOMMENDED. No automatic transactions and no automatic rollback; ⚠️ **you must manually guard DB writes AND side effects with `dry_run?`.**
|
|
89
|
+
|
|
90
|
+
```ruby
|
|
91
|
+
module DataShifts
|
|
92
|
+
class BackfillLegacyId < DataShifter::Shift
|
|
93
|
+
description "Per-record so one failure doesn't roll back all"
|
|
94
|
+
transaction :per_record
|
|
95
|
+
|
|
96
|
+
def collection = Item.where(legacy_id: nil)
|
|
97
|
+
def process_record(item)
|
|
98
|
+
item.update!(legacy_id: LegacyIdService.fetch(item))
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
```ruby
|
|
105
|
+
module DataShifts
|
|
106
|
+
class SyncToExternal < DataShifter::Shift
|
|
107
|
+
description "Side effects outside DB"
|
|
108
|
+
transaction false
|
|
109
|
+
|
|
110
|
+
def process_record(record)
|
|
111
|
+
return if dry_run?
|
|
112
|
+
|
|
113
|
+
record.update!(synced_at: Time.current)
|
|
114
|
+
ExternalAPI.notify(record)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Progress, status, and output
|
|
121
|
+
|
|
122
|
+
- **Progress bar**: enabled by default (requires `ruby-progressbar`), and only shown for collections with at least 5 records.
|
|
123
|
+
- **Header**: prints mode (DRY RUN vs LIVE), record count, transaction mode, and available status triggers.
|
|
124
|
+
- **Live status (without aborting)**:
|
|
125
|
+
- `STATUS_INTERVAL=60` prints a status block periodically (checked between records)
|
|
126
|
+
- **macOS/BSD**: `Ctrl+T` (SIGINFO)
|
|
127
|
+
- **Any OS**: `kill -USR1 <pid>` (SIGUSR1)
|
|
128
|
+
|
|
129
|
+
## Resuming a partial run (`CONTINUE_FROM`)
|
|
130
|
+
|
|
131
|
+
If your `collection` is an `ActiveRecord::Relation`, you can resume by filtering the primary key:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
CONTINUE_FROM=123 COMMIT=1 rake data:shift:backfill_foo
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Notes:
|
|
138
|
+
|
|
139
|
+
- Only supported for `ActiveRecord::Relation` collections (Array-based collections—like those from `find_exactly!`—cannot be resumed).
|
|
140
|
+
- The filter is `primary_key > CONTINUE_FROM`, so it’s only useful with monotonically increasing primary keys (e.g. `find_each`'s default behavior).
|
|
141
|
+
|
|
142
|
+
## Operational tips
|
|
143
|
+
|
|
144
|
+
### Safety checklist (recommended)
|
|
145
|
+
|
|
146
|
+
- **Start with a dry run**: run the task once with no environment variables set, confirm logs and summary look right, then re-run with `COMMIT=1`.
|
|
147
|
+
- **Make shifts idempotent**: structure `process_record` so re-running is safe (for example, update only when the target column is `NULL`, or compute the same derived value deterministically).
|
|
148
|
+
- **Guard side effects explicitly**: even in dry run, API calls / emails / enqueues are not rolled back. Use `dry_run?` helper to skip side-effectful code.
|
|
149
|
+
|
|
150
|
+
### Choosing a transaction mode (behavior + guidance)
|
|
151
|
+
|
|
152
|
+
- **`transaction :single` (default)**:
|
|
153
|
+
- **Behavior**: the first raised error aborts the run (all-or-nothing).
|
|
154
|
+
- **Use when**: partial success is worse than failure, or you want a clean rollback on any unexpected error.
|
|
155
|
+
- **`transaction :per_record`**:
|
|
156
|
+
- **Behavior**: in commit mode, records are committed one-by-one; errors are collected and the run continues; the overall run fails at the end if any record failed.
|
|
157
|
+
- **Use when**: you want maximum progress and are OK investigating/fixing a subset of failures.
|
|
158
|
+
- **`transaction false` / `:none`**:
|
|
159
|
+
- **Behavior**: no automatic transaction wrapper (even in dry run) and no automatic rollback.
|
|
160
|
+
- **Use when**: you have intentional external side effects, or you’re doing your own transaction/locking strategy—**but always guard writes/side effects with `dry_run?`.**
|
|
161
|
+
|
|
162
|
+
### Performance and operability (recommended)
|
|
163
|
+
|
|
164
|
+
- **Prefer returning an `ActiveRecord::Relation` from `collection`** for large datasets (DataShifter iterates relations with `find_each`).
|
|
165
|
+
- **Be aware `count` happens up front for relations** to print the header and size the progress bar. On very large/expensive relations, that extra query may be non-trivial.
|
|
166
|
+
- **Use status output for long runs**: set `STATUS_INTERVAL` in environments where signals are awkward (for example, some process managers).
|
|
167
|
+
|
|
168
|
+
## Utilities for building shifts
|
|
169
|
+
|
|
170
|
+
### `find_exactly!` (fail fast for ID lists)
|
|
171
|
+
|
|
172
|
+
Use `find_exactly!(Model, ids)` to fetch a fixed list and raise if any are missing:
|
|
173
|
+
|
|
174
|
+
```ruby
|
|
175
|
+
def collection
|
|
176
|
+
ids = ENV.fetch("BUYBACK_IDS").split(",").map(&:strip)
|
|
177
|
+
find_exactly!(Buyback, ids)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def process_record(buyback)
|
|
181
|
+
buyback.recompute!
|
|
182
|
+
end
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### `skip!` (count but don’t update)
|
|
186
|
+
|
|
187
|
+
Mark a record as skipped (it will increment “Skipped” in the summary):
|
|
188
|
+
|
|
189
|
+
```ruby
|
|
190
|
+
def process_record(record)
|
|
191
|
+
skip!("already done") if record.foo.present?
|
|
192
|
+
record.update!(foo: value)
|
|
193
|
+
end
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Throttling and disabling the progress bar
|
|
197
|
+
|
|
198
|
+
```ruby
|
|
199
|
+
class SomeShift < DataShifter::Shift
|
|
200
|
+
throttle 0.1 # sleep seconds between records
|
|
201
|
+
progress false # disable progress bar rendering
|
|
202
|
+
end
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
## Generator
|
|
206
|
+
|
|
207
|
+
| Command | Generates |
|
|
208
|
+
|--------|----------|
|
|
209
|
+
| `bin/rails generate data_shift backfill_foo` | `lib/data_shifts/<timestamp>_backfill_foo.rb` with a `DataShifts::BackfillFoo` class |
|
|
210
|
+
| `bin/rails generate data_shift backfill_users --model=User` | Same, with `User.all` in `collection` and `process_record(user)` |
|
|
211
|
+
| `bin/rails generate data_shift backfill_users --spec` | Also generates `spec/lib/data_shifts/backfill_users_spec.rb` when RSpec is enabled |
|
|
212
|
+
|
|
213
|
+
The generator refuses to create a second shift if it would produce a duplicate rake task name.
|
|
214
|
+
|
|
215
|
+
## Testing shifts (RSpec)
|
|
216
|
+
|
|
217
|
+
This gem ships a small helper module for running shifts in tests:
|
|
218
|
+
|
|
219
|
+
```ruby
|
|
220
|
+
require "data_shifter/spec_helper"
|
|
221
|
+
|
|
222
|
+
RSpec.describe DataShifts::BackfillFoo do
|
|
223
|
+
include DataShifter::SpecHelper
|
|
224
|
+
|
|
225
|
+
before { allow($stdout).to receive(:puts) } # silence shift output
|
|
226
|
+
|
|
227
|
+
it "does not persist changes in dry run" do
|
|
228
|
+
result = run_data_shift(described_class, dry_run: true)
|
|
229
|
+
expect(result).to be_ok
|
|
230
|
+
# TODO: add some check confirming data is unchanged
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
it "persists changes when committed" do
|
|
234
|
+
result = run_data_shift(described_class, commit: true)
|
|
235
|
+
expect(result).to be_ok
|
|
236
|
+
# TODO: add some check confirming data is changed
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## Optional RuboCop cop
|
|
242
|
+
|
|
243
|
+
If you use `transaction false` / `transaction :none`, you should guard writes and side effects with `dry_run?`. You can help avoid mistakes by linting that the helper is at least called once via the bundled cop:
|
|
244
|
+
|
|
245
|
+
```yaml
|
|
246
|
+
# .rubocop.yml
|
|
247
|
+
require:
|
|
248
|
+
- data_shifter/rubocop
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## Requirements
|
|
252
|
+
|
|
253
|
+
- Ruby ≥ 3.2.1
|
|
254
|
+
- Rails (ActiveRecord, ActiveSupport, Railties) ≥ 6.1
|
|
255
|
+
- `axn` (Shift classes include `Axn`)
|
|
256
|
+
- `ruby-progressbar` (for progress bars)
|
data/Rakefile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/gem_tasks"
|
|
4
|
+
require "rubocop/rake_task"
|
|
5
|
+
|
|
6
|
+
task :spec do
|
|
7
|
+
Dir.chdir("spec/dummy_app") do
|
|
8
|
+
sh "BUNDLE_GEMFILE=Gemfile bundle exec rspec spec/"
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
RuboCop::RakeTask.new
|
|
13
|
+
|
|
14
|
+
task default: %i[spec rubocop]
|
|
15
|
+
|
|
16
|
+
# Ensure specs and rubocop pass before release (must run first; enhance appends)
|
|
17
|
+
release_task = Rake::Task["release"]
|
|
18
|
+
release_task.prerequisites.unshift(:default)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DataShifter
|
|
4
|
+
module Internal
|
|
5
|
+
# Environment variable parsing utilities.
|
|
6
|
+
# All methods are stateless module functions.
|
|
7
|
+
module Env
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
# Determine dry_run mode from environment variables.
|
|
11
|
+
# COMMIT=1 or COMMIT=true means dry_run=false
|
|
12
|
+
# DRY_RUN=false means dry_run=false; default is true
|
|
13
|
+
def dry_run?
|
|
14
|
+
if ENV["COMMIT"].present?
|
|
15
|
+
!%w[1 true].include?(ENV["COMMIT"].to_s.downcase)
|
|
16
|
+
else
|
|
17
|
+
ENV.fetch("DRY_RUN", "true") == "true"
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Parse STATUS_INTERVAL environment variable.
|
|
22
|
+
# Returns nil if not set or invalid.
|
|
23
|
+
def status_interval_seconds
|
|
24
|
+
return nil unless ENV["STATUS_INTERVAL"].present?
|
|
25
|
+
|
|
26
|
+
Integer(ENV.fetch("STATUS_INTERVAL", nil), 10)
|
|
27
|
+
rescue ArgumentError
|
|
28
|
+
nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Get CONTINUE_FROM environment variable value.
|
|
32
|
+
# Returns nil if not set or empty.
|
|
33
|
+
def continue_from_id
|
|
34
|
+
ENV.fetch("CONTINUE_FROM", nil).presence
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DataShifter
|
|
4
|
+
module Internal
|
|
5
|
+
# Output formatting utilities for data shift runs.
|
|
6
|
+
# All methods are stateless module functions that accept IO and context parameters.
|
|
7
|
+
module Output
|
|
8
|
+
TRANSACTION_MODE_LABELS = {
|
|
9
|
+
single: "single (all-or-nothing)",
|
|
10
|
+
per_record: "per-record",
|
|
11
|
+
none: "none",
|
|
12
|
+
}.freeze
|
|
13
|
+
|
|
14
|
+
module_function
|
|
15
|
+
|
|
16
|
+
def print_header(io:, shift_class:, total:, label:, dry_run:, transaction_mode:, status_interval:)
|
|
17
|
+
io.puts ""
|
|
18
|
+
io.puts "=" * 60
|
|
19
|
+
io.puts shift_class.name || "DataShifter::Shift (anonymous)"
|
|
20
|
+
io.puts "\"#{shift_class.description}\"" if shift_class.description.present?
|
|
21
|
+
io.puts "-" * 60
|
|
22
|
+
io.puts "Mode: #{dry_run ? "DRY RUN (no changes will be persisted)" : "LIVE"}"
|
|
23
|
+
io.puts "Records: #{total} #{label}"
|
|
24
|
+
io.puts "Transaction: #{TRANSACTION_MODE_LABELS[transaction_mode]}"
|
|
25
|
+
|
|
26
|
+
status_line = build_status_line(status_interval)
|
|
27
|
+
io.puts "Status: #{status_line} for live progress (no abort)" if status_line
|
|
28
|
+
|
|
29
|
+
io.puts "=" * 60
|
|
30
|
+
io.puts ""
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def print_summary(io:, stats:, errors:, start_time:, dry_run:, transaction_mode:, interrupted:, task_name:, last_successful_id:)
|
|
34
|
+
return unless start_time
|
|
35
|
+
|
|
36
|
+
elapsed = (Time.current - start_time).round(1)
|
|
37
|
+
io.puts ""
|
|
38
|
+
io.puts "=" * 60
|
|
39
|
+
io.puts summary_title(dry_run:, interrupted:)
|
|
40
|
+
io.puts "-" * 60
|
|
41
|
+
io.puts "Duration: #{elapsed}s"
|
|
42
|
+
io.puts "Processed: #{stats[:processed]}"
|
|
43
|
+
io.puts "Succeeded: #{stats[:succeeded]}"
|
|
44
|
+
io.puts "Failed: #{stats[:failed]}"
|
|
45
|
+
io.puts "Skipped: #{stats[:skipped]}"
|
|
46
|
+
|
|
47
|
+
print_errors(io:, errors:) if errors.any?
|
|
48
|
+
print_interrupt_warning(io:, transaction_mode:, dry_run:) if interrupted
|
|
49
|
+
print_dry_run_instructions(io:, task_name:) if dry_run && !interrupted
|
|
50
|
+
print_continue_from_hint(io:, task_name:, last_successful_id:, dry_run:, transaction_mode:, errors:)
|
|
51
|
+
|
|
52
|
+
io.puts "=" * 60
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def print_progress(io:, stats:, errors:, start_time:, status_interval:)
|
|
56
|
+
return unless start_time
|
|
57
|
+
|
|
58
|
+
elapsed = (Time.current - start_time).round(1)
|
|
59
|
+
io.puts ""
|
|
60
|
+
io.puts "=" * 60
|
|
61
|
+
|
|
62
|
+
trigger = if status_interval
|
|
63
|
+
"every #{status_interval}s (STATUS_INTERVAL)"
|
|
64
|
+
elsif Signal.list.key?("INFO")
|
|
65
|
+
"Ctrl+T"
|
|
66
|
+
else
|
|
67
|
+
"SIGUSR1"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
io.puts "STATUS (still running) — triggered by #{trigger}"
|
|
71
|
+
io.puts "-" * 60
|
|
72
|
+
io.puts "Duration: #{elapsed}s"
|
|
73
|
+
io.puts "Processed: #{stats[:processed]}"
|
|
74
|
+
io.puts "Succeeded: #{stats[:succeeded]}"
|
|
75
|
+
io.puts "Failed: #{stats[:failed]}"
|
|
76
|
+
io.puts "Skipped: #{stats[:skipped]}"
|
|
77
|
+
|
|
78
|
+
print_errors(io:, errors:) if errors.any?
|
|
79
|
+
|
|
80
|
+
io.puts "=" * 60
|
|
81
|
+
io.puts ""
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def print_errors(io:, errors:)
|
|
85
|
+
io.puts ""
|
|
86
|
+
io.puts "ERRORS:"
|
|
87
|
+
errors.each do |err|
|
|
88
|
+
io.puts " #{err[:record]}: #{err[:error]}"
|
|
89
|
+
err[:backtrace]&.each { |line| io.puts " #{line}" }
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def summary_title(dry_run:, interrupted:)
|
|
94
|
+
base = dry_run ? "SUMMARY (DRY RUN)" : "SUMMARY"
|
|
95
|
+
interrupted ? "#{base} - INTERRUPTED" : base
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def print_interrupt_warning(io:, transaction_mode:, dry_run:)
|
|
99
|
+
io.puts ""
|
|
100
|
+
if transaction_mode == :none
|
|
101
|
+
io.puts "[!] INTERRUPTED: `transaction false` mode was active."
|
|
102
|
+
io.puts " Some DB changes may have been applied before interruption."
|
|
103
|
+
io.puts " Non-DB side effects (API calls, emails, etc.) are not rolled back."
|
|
104
|
+
io.puts " Review the database state before re-running."
|
|
105
|
+
elsif dry_run
|
|
106
|
+
io.puts "[!] INTERRUPTED: All DB changes have been rolled back (dry run)."
|
|
107
|
+
io.puts " Non-DB side effects (API calls, emails, etc.) are not rolled back."
|
|
108
|
+
else
|
|
109
|
+
io.puts "[!] INTERRUPTED: DB transaction has been rolled back."
|
|
110
|
+
io.puts " No DB changes were persisted."
|
|
111
|
+
io.puts " Non-DB side effects (API calls, emails, etc.) are not rolled back."
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def print_dry_run_instructions(io:, task_name:)
|
|
116
|
+
io.puts ""
|
|
117
|
+
io.puts "[!] No changes were saved."
|
|
118
|
+
return unless task_name.present?
|
|
119
|
+
|
|
120
|
+
io.puts "To apply these changes, run:"
|
|
121
|
+
io.puts " COMMIT=1 rake data:shift:#{task_name}"
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def print_continue_from_hint(io:, task_name:, last_successful_id:, dry_run:, transaction_mode:, errors:)
|
|
125
|
+
return if dry_run
|
|
126
|
+
return unless transaction_mode == :none
|
|
127
|
+
return if errors.empty?
|
|
128
|
+
return unless last_successful_id
|
|
129
|
+
return unless task_name.present?
|
|
130
|
+
|
|
131
|
+
io.puts ""
|
|
132
|
+
io.puts "To resume from the last successful record:"
|
|
133
|
+
io.puts " CONTINUE_FROM=#{last_successful_id} COMMIT=1 rake data:shift:#{task_name}"
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def build_status_line(status_interval)
|
|
137
|
+
status_tips = []
|
|
138
|
+
status_tips << "Ctrl+T" if Signal.list.key?("INFO")
|
|
139
|
+
status_tips << "kill -USR1 #{Process.pid}" if Signal.list.key?("USR1")
|
|
140
|
+
|
|
141
|
+
if status_interval
|
|
142
|
+
interval_msg = "STATUS_INTERVAL is set to #{status_interval}s."
|
|
143
|
+
status_tips.any? ? "#{interval_msg} Or: #{status_tips.join(", ")}" : interval_msg
|
|
144
|
+
elsif status_tips.any?
|
|
145
|
+
status_tips.join(" or ")
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DataShifter
|
|
4
|
+
module Internal
|
|
5
|
+
# Progress bar creation utility.
|
|
6
|
+
# All methods are stateless module functions.
|
|
7
|
+
module ProgressBar
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
# Create a progress bar for iteration.
|
|
11
|
+
# Returns nil if progress is disabled or total is too small.
|
|
12
|
+
#
|
|
13
|
+
# @param total [Integer] total number of items
|
|
14
|
+
# @param dry_run [Boolean] whether running in dry run mode
|
|
15
|
+
# @param enabled [Boolean] whether progress bar is enabled
|
|
16
|
+
# @return [ProgressBar::Base, nil] the progress bar or nil
|
|
17
|
+
def create(total:, dry_run:, enabled:)
|
|
18
|
+
return unless enabled && total >= 5
|
|
19
|
+
|
|
20
|
+
require "ruby-progressbar"
|
|
21
|
+
::ProgressBar.create(
|
|
22
|
+
total:,
|
|
23
|
+
format: "%t: |%B| %c/%C (%P%%) %e",
|
|
24
|
+
title: dry_run ? "Dry run" : "Processing",
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DataShifter
|
|
4
|
+
module Internal
|
|
5
|
+
# Record-related utility functions.
|
|
6
|
+
# All methods are stateless module functions.
|
|
7
|
+
module RecordUtils
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
# Generate a human-readable identifier for a record.
|
|
11
|
+
#
|
|
12
|
+
# @param record [Object] the record to identify
|
|
13
|
+
# @return [String] identifier string
|
|
14
|
+
def identifier(record)
|
|
15
|
+
return "#{record.class.name}##{record.id}" if record.respond_to?(:id)
|
|
16
|
+
|
|
17
|
+
record.inspect.truncate(80)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Derive a default label from an array of items.
|
|
21
|
+
#
|
|
22
|
+
# @param items [Array] collection of items
|
|
23
|
+
# @return [String] pluralized model name or "records"
|
|
24
|
+
def default_label(items)
|
|
25
|
+
sample = items.first
|
|
26
|
+
sample.respond_to?(:model_name) ? sample.model_name.human.pluralize : "records"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Derive a default label from an ActiveRecord::Relation.
|
|
30
|
+
#
|
|
31
|
+
# @param relation [ActiveRecord::Relation] the relation
|
|
32
|
+
# @return [String] pluralized model name or "records"
|
|
33
|
+
def default_label_for_relation(relation)
|
|
34
|
+
relation.respond_to?(:model) ? relation.model.model_name.human.pluralize : "records"
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DataShifter
|
|
4
|
+
module Internal
|
|
5
|
+
# Signal trap management for status reporting during data shift runs.
|
|
6
|
+
# All methods are stateless module functions.
|
|
7
|
+
module SignalHandler
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
# Install signal traps for status reporting (SIGUSR1, SIGINFO).
|
|
11
|
+
# Returns a hash of previous handlers to restore later.
|
|
12
|
+
#
|
|
13
|
+
# @param status_proc [Proc] the proc to call when signal is received
|
|
14
|
+
# @return [Hash] previous signal handlers keyed by signal name
|
|
15
|
+
def install_status_traps(status_proc)
|
|
16
|
+
handlers = {}
|
|
17
|
+
|
|
18
|
+
%w[USR1 INFO].each do |sig|
|
|
19
|
+
next unless Signal.list.key?(sig)
|
|
20
|
+
|
|
21
|
+
handlers[sig] = Signal.trap(sig, status_proc)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
handlers
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Restore previous signal handlers.
|
|
28
|
+
#
|
|
29
|
+
# @param handlers [Hash] previous handlers from install_status_traps
|
|
30
|
+
def restore_status_traps(handlers)
|
|
31
|
+
handlers.each do |sig, prev|
|
|
32
|
+
Signal.trap(sig, prev) if prev && Signal.list.key?(sig)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/railtie"
|
|
4
|
+
|
|
5
|
+
module DataShifter
|
|
6
|
+
class Railtie < Rails::Railtie
|
|
7
|
+
# Extract description DSL from shift file without loading it.
|
|
8
|
+
# Supports: description "text", description 'text', description %(text), description <<~HEREDOC
|
|
9
|
+
def self.extract_description(file_path)
|
|
10
|
+
content = File.read(file_path)
|
|
11
|
+
|
|
12
|
+
# Single/double quoted strings: description "text" or description 'text'
|
|
13
|
+
if (match = content.match(/^\s*description\s+["'](.+?)["']/))
|
|
14
|
+
return match[1]
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Percent strings: description %(text) or description %Q(text)
|
|
18
|
+
if (match = content.match(/^\s*description\s+%Q?\((.+?)\)/m))
|
|
19
|
+
return match[1].gsub(/\s+/, " ").strip
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Heredoc: description <<~HEREDOC or <<-HEREDOC or <<HEREDOC
|
|
23
|
+
if (match = content.match(/^\s*description\s+<<[~-]?(\w+)\s*\n(.*?)\n\s*\1/m))
|
|
24
|
+
return match[2].gsub(/\s+/, " ").strip
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
nil
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
generators do
|
|
31
|
+
require "generators/data_shift_generator"
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Zeitwerk infers constants from filenames. Timestamped shift files
|
|
35
|
+
# (e.g. 20260211_backfill_users.rb) would map to an invalid constant
|
|
36
|
+
# starting with a digit. Tell Zeitwerk to ignore the shifts directory
|
|
37
|
+
# so we can load them manually with require.
|
|
38
|
+
initializer "data_shifter.ignore_shifts_dir" do
|
|
39
|
+
shifts_dir = Rails.root.join("lib/data_shifts")
|
|
40
|
+
Rails.autoloaders.main.ignore(shifts_dir) if shifts_dir.exist?
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
rake_tasks do
|
|
44
|
+
namespace :data do
|
|
45
|
+
namespace :shift do
|
|
46
|
+
shifts_dir = Rails.root.join("lib/data_shifts")
|
|
47
|
+
next unless shifts_dir.exist?
|
|
48
|
+
|
|
49
|
+
Dir.glob(shifts_dir.join("*.rb")).each do |file_path|
|
|
50
|
+
# Infer task name from filename: "20260211_backfill_users.rb" -> "backfill_users"
|
|
51
|
+
filename = File.basename(file_path, ".rb")
|
|
52
|
+
task_name = filename.sub(/\A\d+_/, "")
|
|
53
|
+
class_name = task_name.camelize
|
|
54
|
+
|
|
55
|
+
# Extract description from file without loading it (for rake -T)
|
|
56
|
+
task_desc = Railtie.extract_description(file_path) || "Run data shift: #{class_name}"
|
|
57
|
+
|
|
58
|
+
# Define the rake task lazily (only loads class when task runs)
|
|
59
|
+
desc task_desc
|
|
60
|
+
task task_name => :environment do
|
|
61
|
+
require file_path
|
|
62
|
+
|
|
63
|
+
# Resolve the constant inside the DataShifts namespace
|
|
64
|
+
klass = "DataShifts::#{class_name}".constantize
|
|
65
|
+
klass.task_name(task_name)
|
|
66
|
+
klass.run!
|
|
67
|
+
rescue Interrupt
|
|
68
|
+
exit(130)
|
|
69
|
+
rescue StandardError
|
|
70
|
+
exit(1)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|