data_shifter 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.husky/pre-commit +4 -0
- data/.lintstagedrc +3 -0
- data/LICENSE.txt +21 -0
- data/README.md +256 -0
- data/Rakefile +18 -0
- data/lib/data_shifter/internal/env.rb +38 -0
- data/lib/data_shifter/internal/output.rb +150 -0
- data/lib/data_shifter/internal/progress_bar.rb +29 -0
- data/lib/data_shifter/internal/record_utils.rb +38 -0
- data/lib/data_shifter/internal/signal_handler.rb +37 -0
- data/lib/data_shifter/railtie.rb +77 -0
- data/lib/data_shifter/rubocop.rb +4 -0
- data/lib/data_shifter/shift.rb +373 -0
- data/lib/data_shifter/spec_helper.rb +75 -0
- data/lib/data_shifter/version.rb +5 -0
- data/lib/data_shifter.rb +5 -0
- data/lib/generators/data_shift_generator.rb +132 -0
- data/lib/rubocop/cop/data_shifter/skip_transaction_guard_dry_run.rb +55 -0
- metadata +139 -0
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "axn"
|
|
4
|
+
require "active_support/isolated_execution_state"
|
|
5
|
+
require_relative "internal/env"
|
|
6
|
+
require_relative "internal/output"
|
|
7
|
+
require_relative "internal/signal_handler"
|
|
8
|
+
require_relative "internal/record_utils"
|
|
9
|
+
require_relative "internal/progress_bar"
|
|
10
|
+
|
|
11
|
+
# Base class for data shifts. Dry-run by default, progress bars, transaction modes, consistent summaries.
|
|
12
|
+
#
|
|
13
|
+
# Usage:
|
|
14
|
+
#
|
|
15
|
+
# # lib/data_shifts/20260201120000_backfill_foo.rb
|
|
16
|
+
# module DataShifts
|
|
17
|
+
# class BackfillFoo < DataShifter::Shift
|
|
18
|
+
# description "Backfill foo on bars"
|
|
19
|
+
#
|
|
20
|
+
# def collection
|
|
21
|
+
# Bar.where(foo: nil)
|
|
22
|
+
# end
|
|
23
|
+
#
|
|
24
|
+
# def process_record(bar)
|
|
25
|
+
# bar.update!(foo: computed_value(bar))
|
|
26
|
+
# end
|
|
27
|
+
# end
|
|
28
|
+
# end
|
|
29
|
+
#
|
|
30
|
+
# Running:
|
|
31
|
+
# - `rake data:shift:backfill_foo` (dry run by default)
|
|
32
|
+
# - `COMMIT=1 rake data:shift:backfill_foo` (apply changes)
|
|
33
|
+
# - Or call directly: `MyShift.call(dry_run: false)` (Axn semantics) - but note default location not auto-loaded
|
|
34
|
+
#
|
|
35
|
+
# Transaction modes (set at class level with `transaction`):
|
|
36
|
+
# - `transaction :single` (default): one transaction for the whole run (all-or-nothing).
|
|
37
|
+
# - `transaction :per_record`: each record in its own transaction.
|
|
38
|
+
# - `transaction false`: no automatic transactions; guard writes with `return if dry_run?`.
|
|
39
|
+
#
|
|
40
|
+
# Dry run: In `:single` and `:per_record`, dry_run rolls back DB changes automatically.
|
|
41
|
+
# Non-DB side effects are not rolled back; guard with `return if dry_run?` / `return unless dry_run?`.
|
|
42
|
+
#
|
|
43
|
+
# Fixed list of IDs (fail fast): Use find_exactly!(Model, [id1, id2, ...]) in `collection`.
|
|
44
|
+
# Large collections: Return an ActiveRecord::Relation and iteration uses `find_each`.
|
|
45
|
+
#
|
|
46
|
+
module DataShifter
|
|
47
|
+
class Shift
|
|
48
|
+
include Axn
|
|
49
|
+
|
|
50
|
+
expects :dry_run, type: :boolean, default: true
|
|
51
|
+
|
|
52
|
+
log_calls false if respond_to?(:log_calls)
|
|
53
|
+
|
|
54
|
+
around :_with_transaction_for_dry_run
|
|
55
|
+
before :_reset_tracking
|
|
56
|
+
on_success :_print_summary
|
|
57
|
+
on_error :_print_summary
|
|
58
|
+
|
|
59
|
+
class_attribute :_transaction_mode, default: :single
|
|
60
|
+
class_attribute :_progress_enabled, default: true
|
|
61
|
+
class_attribute :_description, default: nil
|
|
62
|
+
class_attribute :_task_name, default: nil
|
|
63
|
+
class_attribute :_throttle_interval, default: nil
|
|
64
|
+
|
|
65
|
+
class << self
|
|
66
|
+
def description(text = nil)
|
|
67
|
+
if text.nil?
|
|
68
|
+
_description
|
|
69
|
+
else
|
|
70
|
+
self._description = text.to_s.presence
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def task_name(value = nil)
|
|
75
|
+
if value.nil?
|
|
76
|
+
_task_name
|
|
77
|
+
else
|
|
78
|
+
self._task_name = value.to_s.presence
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def transaction(mode)
|
|
83
|
+
case mode
|
|
84
|
+
when :per_record
|
|
85
|
+
self._transaction_mode = :per_record
|
|
86
|
+
when :none, false
|
|
87
|
+
self._transaction_mode = :none
|
|
88
|
+
when :single, true
|
|
89
|
+
self._transaction_mode = :single
|
|
90
|
+
else
|
|
91
|
+
raise ArgumentError, "Invalid transaction mode: #{mode.inspect}. Expected :single, :per_record, :none, true, or false."
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def progress(enabled = nil)
|
|
96
|
+
if enabled.nil?
|
|
97
|
+
_progress_enabled
|
|
98
|
+
else
|
|
99
|
+
self._progress_enabled = !!enabled
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def throttle(interval)
|
|
104
|
+
self._throttle_interval = interval
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def run!
|
|
108
|
+
dry_run = Internal::Env.dry_run?
|
|
109
|
+
result = call(dry_run:)
|
|
110
|
+
raise result.exception if result.exception
|
|
111
|
+
raise StandardError, result.error unless result.ok?
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# --- Public API (intentionally exposed to subclasses) ---
|
|
116
|
+
|
|
117
|
+
def call
|
|
118
|
+
_for_each_record_in(collection) { |record| process_record(record) }
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def find_exactly!(model, ids)
|
|
122
|
+
ids = Array(ids).compact.uniq
|
|
123
|
+
return model.none if ids.empty?
|
|
124
|
+
|
|
125
|
+
records_by_id = model.where(id: ids).index_by(&:id)
|
|
126
|
+
missing = ids.reject { |id| records_by_id.key?(id) }
|
|
127
|
+
raise "Expected #{model.name} with ids #{ids.inspect}, but missing: #{missing.inspect}" if missing.any?
|
|
128
|
+
|
|
129
|
+
ids.map { |id| records_by_id[id] }
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def dry_run? = dry_run
|
|
133
|
+
|
|
134
|
+
def skip!(reason = nil)
|
|
135
|
+
@stats[:skipped] += 1
|
|
136
|
+
@stats[:succeeded] -= 1
|
|
137
|
+
log " SKIP: #{reason}" if reason
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def log(message)
|
|
141
|
+
puts message
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
private
|
|
145
|
+
|
|
146
|
+
# --- Axn lifecycle hooks ---
|
|
147
|
+
|
|
148
|
+
def _with_transaction_for_dry_run(chain)
|
|
149
|
+
if _transaction_mode == :none
|
|
150
|
+
chain.call
|
|
151
|
+
return
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
if _transaction_mode == :single
|
|
155
|
+
ActiveRecord::Base.transaction do
|
|
156
|
+
chain.call
|
|
157
|
+
raise ActiveRecord::Rollback if dry_run?
|
|
158
|
+
end
|
|
159
|
+
return
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
if dry_run?
|
|
163
|
+
ActiveRecord::Base.transaction do
|
|
164
|
+
chain.call
|
|
165
|
+
raise ActiveRecord::Rollback
|
|
166
|
+
end
|
|
167
|
+
else
|
|
168
|
+
chain.call
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def _reset_tracking
|
|
173
|
+
@stats = { processed: 0, succeeded: 0, failed: 0, skipped: 0 }
|
|
174
|
+
@errors = []
|
|
175
|
+
@start_time = Time.current
|
|
176
|
+
@last_status_print = @start_time
|
|
177
|
+
@_data_shift_interrupted = false
|
|
178
|
+
@_last_successful_id = nil
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def _print_summary
|
|
182
|
+
Internal::Output.print_summary(
|
|
183
|
+
io: $stdout,
|
|
184
|
+
stats: @stats,
|
|
185
|
+
errors: @errors,
|
|
186
|
+
start_time: @start_time,
|
|
187
|
+
dry_run: dry_run?,
|
|
188
|
+
transaction_mode: _transaction_mode,
|
|
189
|
+
interrupted: @_data_shift_interrupted,
|
|
190
|
+
task_name: self.class.task_name,
|
|
191
|
+
last_successful_id: @_last_successful_id,
|
|
192
|
+
)
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# --- Override points ---
|
|
196
|
+
|
|
197
|
+
def collection
|
|
198
|
+
raise NotImplementedError, "#{self.class.name}: override `collection`"
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def process_record(_record)
|
|
202
|
+
raise NotImplementedError, "#{self.class.name}: override `process_record`"
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# --- Record iteration ---
|
|
206
|
+
|
|
207
|
+
def _print_progress
|
|
208
|
+
Internal::Output.print_progress(
|
|
209
|
+
io: $stdout,
|
|
210
|
+
stats: @stats,
|
|
211
|
+
errors: @errors,
|
|
212
|
+
start_time: @start_time,
|
|
213
|
+
status_interval: Internal::Env.status_interval_seconds,
|
|
214
|
+
)
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def _for_each_record_in(records, label: nil, &)
|
|
218
|
+
_reset_tracking
|
|
219
|
+
ActiveSupport::IsolatedExecutionState[:_data_shifter_current_run] = self
|
|
220
|
+
status_proc = proc { ActiveSupport::IsolatedExecutionState[:_data_shifter_current_run]&.send(:_print_progress) }
|
|
221
|
+
prev_handlers = Internal::SignalHandler.install_status_traps(status_proc)
|
|
222
|
+
begin
|
|
223
|
+
_each_record_impl(records, label:, &)
|
|
224
|
+
rescue Interrupt
|
|
225
|
+
_handle_interrupt
|
|
226
|
+
ensure
|
|
227
|
+
ActiveSupport::IsolatedExecutionState.delete(:_data_shifter_current_run)
|
|
228
|
+
Internal::SignalHandler.restore_status_traps(prev_handlers)
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def _each_record_impl(records, label: nil, &)
|
|
233
|
+
records = _apply_continue_from(records)
|
|
234
|
+
|
|
235
|
+
if records.respond_to?(:find_each)
|
|
236
|
+
total = records.count
|
|
237
|
+
@label = label || Internal::RecordUtils.default_label_for_relation(records)
|
|
238
|
+
_print_header(total)
|
|
239
|
+
enum = records
|
|
240
|
+
else
|
|
241
|
+
items = records.respond_to?(:to_a) ? records.to_a : Array(records)
|
|
242
|
+
total = items.size
|
|
243
|
+
@label = label || Internal::RecordUtils.default_label(items)
|
|
244
|
+
_print_header(total)
|
|
245
|
+
enum = items
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
case _transaction_mode
|
|
249
|
+
when :single
|
|
250
|
+
_run_in_single_transaction(enum, total, &)
|
|
251
|
+
when :per_record
|
|
252
|
+
_run_per_record(enum, total, &)
|
|
253
|
+
when :none
|
|
254
|
+
_run_without_transaction(enum, total, &)
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
fail! "#{@stats[:failed]} record(s) failed" if @errors.any?
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def _apply_continue_from(records)
|
|
261
|
+
continue_from = Internal::Env.continue_from_id
|
|
262
|
+
return records if continue_from.nil?
|
|
263
|
+
|
|
264
|
+
unless records.respond_to?(:find_each)
|
|
265
|
+
raise ArgumentError,
|
|
266
|
+
"CONTINUE_FROM is only supported for ActiveRecord::Relation collections. " \
|
|
267
|
+
"Array-based collections (e.g. from find_exactly!) cannot be resumed."
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
primary_key = records.model.primary_key
|
|
271
|
+
log "[CONTINUE_FROM] Resuming from #{primary_key} > #{continue_from}"
|
|
272
|
+
records.where("#{records.model.quoted_table_name}.#{primary_key} > ?", continue_from)
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# --- Transaction execution strategies ---
|
|
276
|
+
|
|
277
|
+
def _run_in_single_transaction(enum, total, &block)
|
|
278
|
+
ActiveRecord::Base.transaction do
|
|
279
|
+
_iterate(enum, total, &block)
|
|
280
|
+
if dry_run?
|
|
281
|
+
log "\nDry run complete — rolling back all changes."
|
|
282
|
+
raise ActiveRecord::Rollback
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
rescue StandardError => e
|
|
286
|
+
return if @errors.any?
|
|
287
|
+
|
|
288
|
+
@stats[:failed] += 1
|
|
289
|
+
@errors << { record: "transaction", error: e.message, backtrace: e.backtrace&.first(3) }
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def _run_per_record(enum, total, &)
|
|
293
|
+
_iterate(enum, total) do |record|
|
|
294
|
+
if dry_run?
|
|
295
|
+
yield record
|
|
296
|
+
else
|
|
297
|
+
ActiveRecord::Base.transaction { yield record }
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def _run_without_transaction(enum, total, &)
|
|
303
|
+
_iterate(enum, total, &)
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def _iterate(enum, total)
|
|
307
|
+
bar = Internal::ProgressBar.create(total:, dry_run: dry_run?, enabled: _progress_enabled)
|
|
308
|
+
if enum.respond_to?(:find_each)
|
|
309
|
+
enum.find_each do |record|
|
|
310
|
+
_process_one(record) { yield record }
|
|
311
|
+
bar&.increment
|
|
312
|
+
sleep(_throttle_interval) if _throttle_interval
|
|
313
|
+
end
|
|
314
|
+
else
|
|
315
|
+
enum.each do |record|
|
|
316
|
+
_process_one(record) { yield record }
|
|
317
|
+
bar&.increment
|
|
318
|
+
sleep(_throttle_interval) if _throttle_interval
|
|
319
|
+
end
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def _process_one(record)
|
|
324
|
+
@stats[:processed] += 1
|
|
325
|
+
yield
|
|
326
|
+
@stats[:succeeded] += 1
|
|
327
|
+
@_last_successful_id = record.id if record.respond_to?(:id)
|
|
328
|
+
rescue StandardError => e
|
|
329
|
+
@stats[:failed] += 1
|
|
330
|
+
identifier = Internal::RecordUtils.identifier(record)
|
|
331
|
+
@errors << { record: identifier, error: e.message, backtrace: e.backtrace&.first(3) }
|
|
332
|
+
log "ERROR #{identifier}: #{e.message}"
|
|
333
|
+
|
|
334
|
+
raise if _transaction_mode == :single
|
|
335
|
+
ensure
|
|
336
|
+
_maybe_print_interval_status
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
def _maybe_print_interval_status
|
|
340
|
+
interval = Internal::Env.status_interval_seconds
|
|
341
|
+
return unless interval&.positive?
|
|
342
|
+
return unless @start_time && (Time.current - @last_status_print) >= interval
|
|
343
|
+
|
|
344
|
+
@last_status_print = Time.current
|
|
345
|
+
_print_progress
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
# --- Output helpers ---
|
|
349
|
+
|
|
350
|
+
def _print_header(total)
|
|
351
|
+
Internal::Output.print_header(
|
|
352
|
+
io: $stdout,
|
|
353
|
+
shift_class: self.class,
|
|
354
|
+
total:,
|
|
355
|
+
label: @label,
|
|
356
|
+
dry_run: dry_run?,
|
|
357
|
+
transaction_mode: _transaction_mode,
|
|
358
|
+
status_interval: Internal::Env.status_interval_seconds,
|
|
359
|
+
)
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
def _handle_interrupt
|
|
363
|
+
@_data_shift_interrupted = true
|
|
364
|
+
log "\n\n*** Interrupted by user (Ctrl+C) ***"
|
|
365
|
+
|
|
366
|
+
# Print summary now since on_error may not fire for Interrupt (SignalException)
|
|
367
|
+
_print_summary
|
|
368
|
+
|
|
369
|
+
# Re-raise to trigger transaction rollback in the wrapping transaction block
|
|
370
|
+
raise Interrupt
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DataShifter
|
|
4
|
+
# Test helpers for RSpec. Include this module in your spec_helper or rails_helper:
|
|
5
|
+
#
|
|
6
|
+
# require "data_shifter/spec_helper"
|
|
7
|
+
#
|
|
8
|
+
# RSpec.configure do |config|
|
|
9
|
+
# config.include DataShifter::SpecHelper, type: :data_shift
|
|
10
|
+
# end
|
|
11
|
+
#
|
|
12
|
+
# Or include it in individual specs:
|
|
13
|
+
#
|
|
14
|
+
# RSpec.describe DataShifts::BackfillFoo do
|
|
15
|
+
# include DataShifter::SpecHelper
|
|
16
|
+
# ...
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
module SpecHelper
|
|
20
|
+
# Run a data shift class with the given options.
|
|
21
|
+
# Returns the Axn::Result.
|
|
22
|
+
#
|
|
23
|
+
# @param shift_class [Class] the DataShifter::Shift subclass
|
|
24
|
+
# @param dry_run [Boolean] whether to run in dry_run mode (default: true)
|
|
25
|
+
# @param commit [Boolean] shorthand for dry_run: false (default: false)
|
|
26
|
+
# @return [Axn::Result]
|
|
27
|
+
#
|
|
28
|
+
# @example
|
|
29
|
+
# result = run_data_shift(DataShifts::BackfillFoo)
|
|
30
|
+
# expect(result).to be_ok
|
|
31
|
+
#
|
|
32
|
+
# @example with commit
|
|
33
|
+
# result = run_data_shift(DataShifts::BackfillFoo, commit: true)
|
|
34
|
+
# expect(record.reload.foo).to eq("bar")
|
|
35
|
+
#
|
|
36
|
+
def run_data_shift(shift_class, dry_run: true, commit: false)
|
|
37
|
+
effective_dry_run = commit ? false : dry_run
|
|
38
|
+
shift_class.call(dry_run: effective_dry_run)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Suppress STDOUT output during a block (useful for cleaner test output).
|
|
42
|
+
#
|
|
43
|
+
# @example
|
|
44
|
+
# silence_data_shift_output do
|
|
45
|
+
# run_data_shift(DataShifts::BackfillFoo, commit: true)
|
|
46
|
+
# end
|
|
47
|
+
#
|
|
48
|
+
def silence_data_shift_output
|
|
49
|
+
original_stdout = $stdout
|
|
50
|
+
$stdout = StringIO.new
|
|
51
|
+
yield
|
|
52
|
+
ensure
|
|
53
|
+
$stdout = original_stdout
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Run a shift and capture its output.
|
|
57
|
+
# Returns [Axn::Result, String] tuple.
|
|
58
|
+
#
|
|
59
|
+
# @example
|
|
60
|
+
# result, output = capture_data_shift_output do
|
|
61
|
+
# run_data_shift(DataShifts::BackfillFoo)
|
|
62
|
+
# end
|
|
63
|
+
# expect(output).to include("DRY RUN")
|
|
64
|
+
#
|
|
65
|
+
def capture_data_shift_output
|
|
66
|
+
original_stdout = $stdout
|
|
67
|
+
$stdout = StringIO.new
|
|
68
|
+
result = yield
|
|
69
|
+
output = $stdout.string
|
|
70
|
+
[result, output]
|
|
71
|
+
ensure
|
|
72
|
+
$stdout = original_stdout
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
data/lib/data_shifter.rb
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Generator for data shifts.
|
|
4
|
+
#
|
|
5
|
+
# Usage:
|
|
6
|
+
# rails g data_shift backfill_users
|
|
7
|
+
# rails g data_shift backfill_users --model=User
|
|
8
|
+
#
|
|
9
|
+
class DataShiftGenerator < Rails::Generators::NamedBase
|
|
10
|
+
class_option :model,
|
|
11
|
+
type: :string,
|
|
12
|
+
default: nil,
|
|
13
|
+
desc: "Model to operate on (e.g. User). Pre-fills the collection method."
|
|
14
|
+
|
|
15
|
+
class_option :spec,
|
|
16
|
+
type: :boolean,
|
|
17
|
+
default: false,
|
|
18
|
+
desc: "Generate RSpec file"
|
|
19
|
+
|
|
20
|
+
def check_for_naming_conflict
|
|
21
|
+
underscored_name = name.underscore
|
|
22
|
+
|
|
23
|
+
# Use destination_root if available (for testing), otherwise Rails.root
|
|
24
|
+
root = respond_to?(:destination_root) ? Pathname.new(destination_root) : Rails.root
|
|
25
|
+
shifts_dir = root.join("lib/data_shifts")
|
|
26
|
+
return unless shifts_dir.exist?
|
|
27
|
+
|
|
28
|
+
# Look for any existing file that would create the same rake task name
|
|
29
|
+
conflicting_file = Dir.glob(shifts_dir.join("*_#{underscored_name}.rb")).first
|
|
30
|
+
return unless conflicting_file
|
|
31
|
+
|
|
32
|
+
raise Thor::Error, <<~ERROR
|
|
33
|
+
A data shift with task name '#{underscored_name}' already exists:
|
|
34
|
+
#{conflicting_file}
|
|
35
|
+
|
|
36
|
+
Rake task names must be unique. Please choose a different name.
|
|
37
|
+
ERROR
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def create_shift_file
|
|
41
|
+
underscored_name = name.underscore
|
|
42
|
+
@timestamp = Time.current.strftime("%Y%m%d%H%M%S")
|
|
43
|
+
@class_name = underscored_name.camelize
|
|
44
|
+
model_name_raw = options[:model].to_s.strip
|
|
45
|
+
@model_name = model_name_raw.present? ? model_name_raw.underscore.singularize.camelize : nil
|
|
46
|
+
|
|
47
|
+
collection_body = if @model_name.present?
|
|
48
|
+
"#{@model_name}.all"
|
|
49
|
+
else
|
|
50
|
+
"# Model.where(foo: nil)"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
record_arg = @model_name.present? ? @model_name.underscore : "record"
|
|
54
|
+
|
|
55
|
+
create_file "lib/data_shifts/#{@timestamp}_#{underscored_name}.rb", <<~RUBY
|
|
56
|
+
# frozen_string_literal: true
|
|
57
|
+
|
|
58
|
+
# rake data:shift:#{underscored_name} # Dry run (default)
|
|
59
|
+
# COMMIT=1 rake data:shift:#{underscored_name} # Apply changes
|
|
60
|
+
|
|
61
|
+
module DataShifts
|
|
62
|
+
class #{@class_name} < DataShifter::Shift
|
|
63
|
+
description "TODO: Describe this shift"
|
|
64
|
+
|
|
65
|
+
transaction true # or false or :per_record
|
|
66
|
+
|
|
67
|
+
def collection
|
|
68
|
+
#{collection_body}
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def process_record(#{record_arg})
|
|
72
|
+
# #{record_arg}.update!(...)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
RUBY
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def create_spec_file
|
|
80
|
+
return unless options[:spec]
|
|
81
|
+
return unless rspec_enabled?
|
|
82
|
+
|
|
83
|
+
underscored_name = name.underscore
|
|
84
|
+
record_arg = @model_name.present? ? @model_name.underscore : "record"
|
|
85
|
+
|
|
86
|
+
create_file "spec/lib/data_shifts/#{underscored_name}_spec.rb", <<~RUBY
|
|
87
|
+
# frozen_string_literal: true
|
|
88
|
+
|
|
89
|
+
require "rails_helper"
|
|
90
|
+
require "data_shifter/spec_helper"
|
|
91
|
+
|
|
92
|
+
RSpec.describe DataShifts::#{@class_name} do
|
|
93
|
+
include DataShifter::SpecHelper
|
|
94
|
+
|
|
95
|
+
before { allow($stdout).to receive(:puts) }
|
|
96
|
+
|
|
97
|
+
# TODO: Set up test records
|
|
98
|
+
# let(:#{record_arg}) { create(:#{record_arg}) }
|
|
99
|
+
|
|
100
|
+
describe "dry run" do
|
|
101
|
+
it "does not persist changes" do
|
|
102
|
+
result = run_data_shift(described_class, dry_run: true)
|
|
103
|
+
expect(result).to be_ok
|
|
104
|
+
# TODO: Assert that records are unchanged
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
describe "commit" do
|
|
109
|
+
it "applies changes" do
|
|
110
|
+
result = run_data_shift(described_class, commit: true)
|
|
111
|
+
expect(result).to be_ok
|
|
112
|
+
# TODO: Assert that records are updated
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
RUBY
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
private
|
|
120
|
+
|
|
121
|
+
def rspec_enabled?
|
|
122
|
+
# Check if rspec-rails is available and configured as the test framework
|
|
123
|
+
return false unless defined?(Rails)
|
|
124
|
+
|
|
125
|
+
# Check Rails generator configuration
|
|
126
|
+
test_framework = Rails.configuration.generators.options.dig(:rails, :test_framework)
|
|
127
|
+
return test_framework == :rspec if test_framework
|
|
128
|
+
|
|
129
|
+
# Fall back to checking if rspec-rails is loaded
|
|
130
|
+
defined?(RSpec::Rails)
|
|
131
|
+
end
|
|
132
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RuboCop
|
|
4
|
+
module Cop
|
|
5
|
+
module DataShifter
|
|
6
|
+
# In data shift files, `transaction false` disables automatic transaction
|
|
7
|
+
# and rollback. DB writes (and side effects) are not rolled back on dry run, so
|
|
8
|
+
# the shift must guard them with `return if dry_run?` or `return unless dry_run?`.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# # bad
|
|
12
|
+
# class BackfillUsers < DataShifter::Shift
|
|
13
|
+
# transaction false
|
|
14
|
+
# def process_record(record)
|
|
15
|
+
# record.update!(foo: 1)
|
|
16
|
+
# end
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# # good
|
|
20
|
+
# class BackfillUsers < DataShifter::Shift
|
|
21
|
+
# transaction false
|
|
22
|
+
# def process_record(record)
|
|
23
|
+
# return if dry_run?
|
|
24
|
+
# record.update!(foo: 1)
|
|
25
|
+
# end
|
|
26
|
+
# end
|
|
27
|
+
class SkipTransactionGuardDryRun < Base
|
|
28
|
+
MSG = "Data shifts using `transaction false` must guard writes/side effects with " \
|
|
29
|
+
"`return if dry_run?` or `return unless dry_run?`."
|
|
30
|
+
|
|
31
|
+
def_node_matcher :skip_transaction_call?, <<~PATTERN
|
|
32
|
+
(send _ :transaction {(sym :none) (false)})
|
|
33
|
+
PATTERN
|
|
34
|
+
|
|
35
|
+
def on_send(node)
|
|
36
|
+
return unless skip_transaction_call?(node)
|
|
37
|
+
return if file_contains_dry_run_guard?
|
|
38
|
+
|
|
39
|
+
add_offense(node, message: MSG)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def file_contains_dry_run_guard?
|
|
45
|
+
return true unless processed_source.ast
|
|
46
|
+
|
|
47
|
+
processed_source.ast.each_node(:send) do |send_node|
|
|
48
|
+
return true if send_node.method?(:dry_run?)
|
|
49
|
+
end
|
|
50
|
+
false
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|