data_shifter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rubocop"
4
+ require "rubocop/cop/data_shifter/skip_transaction_guard_dry_run"
@@ -0,0 +1,373 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "axn"
4
+ require "active_support/isolated_execution_state"
5
+ require_relative "internal/env"
6
+ require_relative "internal/output"
7
+ require_relative "internal/signal_handler"
8
+ require_relative "internal/record_utils"
9
+ require_relative "internal/progress_bar"
10
+
11
+ # Base class for data shifts. Dry-run by default, progress bars, transaction modes, consistent summaries.
12
+ #
13
+ # Usage:
14
+ #
15
+ # # lib/data_shifts/20260201120000_backfill_foo.rb
16
+ # module DataShifts
17
+ # class BackfillFoo < DataShifter::Shift
18
+ # description "Backfill foo on bars"
19
+ #
20
+ # def collection
21
+ # Bar.where(foo: nil)
22
+ # end
23
+ #
24
+ # def process_record(bar)
25
+ # bar.update!(foo: computed_value(bar))
26
+ # end
27
+ # end
28
+ # end
29
+ #
30
+ # Running:
31
+ # - `rake data:shift:backfill_foo` (dry run by default)
32
+ # - `COMMIT=1 rake data:shift:backfill_foo` (apply changes)
33
+ # - Or call directly: `MyShift.call(dry_run: false)` (Axn semantics) - but note default location not auto-loaded
34
+ #
35
+ # Transaction modes (set at class level with `transaction`):
36
+ # - `transaction :single` (default): one transaction for the whole run (all-or-nothing).
37
+ # - `transaction :per_record`: each record in its own transaction.
38
+ # - `transaction false`: no automatic transactions; guard writes with `return if dry_run?`.
39
+ #
40
+ # Dry run: In `:single` and `:per_record`, dry_run rolls back DB changes automatically.
41
+ # Non-DB side effects are not rolled back; guard with `return if dry_run?` / `return unless dry_run?`.
42
+ #
43
+ # Fixed list of IDs (fail fast): Use find_exactly!(Model, [id1, id2, ...]) in `collection`.
44
+ # Large collections: Return an ActiveRecord::Relation and iteration uses `find_each`.
45
+ #
46
+ module DataShifter
47
+ class Shift
48
+ include Axn
49
+
50
+ expects :dry_run, type: :boolean, default: true
51
+
52
+ log_calls false if respond_to?(:log_calls)
53
+
54
+ around :_with_transaction_for_dry_run
55
+ before :_reset_tracking
56
+ on_success :_print_summary
57
+ on_error :_print_summary
58
+
59
+ class_attribute :_transaction_mode, default: :single
60
+ class_attribute :_progress_enabled, default: true
61
+ class_attribute :_description, default: nil
62
+ class_attribute :_task_name, default: nil
63
+ class_attribute :_throttle_interval, default: nil
64
+
65
+ class << self
66
+ def description(text = nil)
67
+ if text.nil?
68
+ _description
69
+ else
70
+ self._description = text.to_s.presence
71
+ end
72
+ end
73
+
74
+ def task_name(value = nil)
75
+ if value.nil?
76
+ _task_name
77
+ else
78
+ self._task_name = value.to_s.presence
79
+ end
80
+ end
81
+
82
+ def transaction(mode)
83
+ case mode
84
+ when :per_record
85
+ self._transaction_mode = :per_record
86
+ when :none, false
87
+ self._transaction_mode = :none
88
+ when :single, true
89
+ self._transaction_mode = :single
90
+ else
91
+ raise ArgumentError, "Invalid transaction mode: #{mode.inspect}. Expected :single, :per_record, :none, true, or false."
92
+ end
93
+ end
94
+
95
+ def progress(enabled = nil)
96
+ if enabled.nil?
97
+ _progress_enabled
98
+ else
99
+ self._progress_enabled = !!enabled
100
+ end
101
+ end
102
+
103
+ def throttle(interval)
104
+ self._throttle_interval = interval
105
+ end
106
+
107
+ def run!
108
+ dry_run = Internal::Env.dry_run?
109
+ result = call(dry_run:)
110
+ raise result.exception if result.exception
111
+ raise StandardError, result.error unless result.ok?
112
+ end
113
+ end
114
+
115
+ # --- Public API (intentionally exposed to subclasses) ---
116
+
117
+ def call
118
+ _for_each_record_in(collection) { |record| process_record(record) }
119
+ end
120
+
121
+ def find_exactly!(model, ids)
122
+ ids = Array(ids).compact.uniq
123
+ return model.none if ids.empty?
124
+
125
+ records_by_id = model.where(id: ids).index_by(&:id)
126
+ missing = ids.reject { |id| records_by_id.key?(id) }
127
+ raise "Expected #{model.name} with ids #{ids.inspect}, but missing: #{missing.inspect}" if missing.any?
128
+
129
+ ids.map { |id| records_by_id[id] }
130
+ end
131
+
132
+ def dry_run? = dry_run
133
+
134
+ def skip!(reason = nil)
135
+ @stats[:skipped] += 1
136
+ @stats[:succeeded] -= 1
137
+ log " SKIP: #{reason}" if reason
138
+ end
139
+
140
+ def log(message)
141
+ puts message
142
+ end
143
+
144
+ private
145
+
146
+ # --- Axn lifecycle hooks ---
147
+
148
+ def _with_transaction_for_dry_run(chain)
149
+ if _transaction_mode == :none
150
+ chain.call
151
+ return
152
+ end
153
+
154
+ if _transaction_mode == :single
155
+ ActiveRecord::Base.transaction do
156
+ chain.call
157
+ raise ActiveRecord::Rollback if dry_run?
158
+ end
159
+ return
160
+ end
161
+
162
+ if dry_run?
163
+ ActiveRecord::Base.transaction do
164
+ chain.call
165
+ raise ActiveRecord::Rollback
166
+ end
167
+ else
168
+ chain.call
169
+ end
170
+ end
171
+
172
+ def _reset_tracking
173
+ @stats = { processed: 0, succeeded: 0, failed: 0, skipped: 0 }
174
+ @errors = []
175
+ @start_time = Time.current
176
+ @last_status_print = @start_time
177
+ @_data_shift_interrupted = false
178
+ @_last_successful_id = nil
179
+ end
180
+
181
+ def _print_summary
182
+ Internal::Output.print_summary(
183
+ io: $stdout,
184
+ stats: @stats,
185
+ errors: @errors,
186
+ start_time: @start_time,
187
+ dry_run: dry_run?,
188
+ transaction_mode: _transaction_mode,
189
+ interrupted: @_data_shift_interrupted,
190
+ task_name: self.class.task_name,
191
+ last_successful_id: @_last_successful_id,
192
+ )
193
+ end
194
+
195
+ # --- Override points ---
196
+
197
+ def collection
198
+ raise NotImplementedError, "#{self.class.name}: override `collection`"
199
+ end
200
+
201
+ def process_record(_record)
202
+ raise NotImplementedError, "#{self.class.name}: override `process_record`"
203
+ end
204
+
205
+ # --- Record iteration ---
206
+
207
+ def _print_progress
208
+ Internal::Output.print_progress(
209
+ io: $stdout,
210
+ stats: @stats,
211
+ errors: @errors,
212
+ start_time: @start_time,
213
+ status_interval: Internal::Env.status_interval_seconds,
214
+ )
215
+ end
216
+
217
+ def _for_each_record_in(records, label: nil, &)
218
+ _reset_tracking
219
+ ActiveSupport::IsolatedExecutionState[:_data_shifter_current_run] = self
220
+ status_proc = proc { ActiveSupport::IsolatedExecutionState[:_data_shifter_current_run]&.send(:_print_progress) }
221
+ prev_handlers = Internal::SignalHandler.install_status_traps(status_proc)
222
+ begin
223
+ _each_record_impl(records, label:, &)
224
+ rescue Interrupt
225
+ _handle_interrupt
226
+ ensure
227
+ ActiveSupport::IsolatedExecutionState.delete(:_data_shifter_current_run)
228
+ Internal::SignalHandler.restore_status_traps(prev_handlers)
229
+ end
230
+ end
231
+
232
+ def _each_record_impl(records, label: nil, &)
233
+ records = _apply_continue_from(records)
234
+
235
+ if records.respond_to?(:find_each)
236
+ total = records.count
237
+ @label = label || Internal::RecordUtils.default_label_for_relation(records)
238
+ _print_header(total)
239
+ enum = records
240
+ else
241
+ items = records.respond_to?(:to_a) ? records.to_a : Array(records)
242
+ total = items.size
243
+ @label = label || Internal::RecordUtils.default_label(items)
244
+ _print_header(total)
245
+ enum = items
246
+ end
247
+
248
+ case _transaction_mode
249
+ when :single
250
+ _run_in_single_transaction(enum, total, &)
251
+ when :per_record
252
+ _run_per_record(enum, total, &)
253
+ when :none
254
+ _run_without_transaction(enum, total, &)
255
+ end
256
+
257
+ fail! "#{@stats[:failed]} record(s) failed" if @errors.any?
258
+ end
259
+
260
+ def _apply_continue_from(records)
261
+ continue_from = Internal::Env.continue_from_id
262
+ return records if continue_from.nil?
263
+
264
+ unless records.respond_to?(:find_each)
265
+ raise ArgumentError,
266
+ "CONTINUE_FROM is only supported for ActiveRecord::Relation collections. " \
267
+ "Array-based collections (e.g. from find_exactly!) cannot be resumed."
268
+ end
269
+
270
+ primary_key = records.model.primary_key
271
+ log "[CONTINUE_FROM] Resuming from #{primary_key} > #{continue_from}"
272
+ records.where("#{records.model.quoted_table_name}.#{primary_key} > ?", continue_from)
273
+ end
274
+
275
+ # --- Transaction execution strategies ---
276
+
277
+ def _run_in_single_transaction(enum, total, &block)
278
+ ActiveRecord::Base.transaction do
279
+ _iterate(enum, total, &block)
280
+ if dry_run?
281
+ log "\nDry run complete — rolling back all changes."
282
+ raise ActiveRecord::Rollback
283
+ end
284
+ end
285
+ rescue StandardError => e
286
+ return if @errors.any?
287
+
288
+ @stats[:failed] += 1
289
+ @errors << { record: "transaction", error: e.message, backtrace: e.backtrace&.first(3) }
290
+ end
291
+
292
+ def _run_per_record(enum, total, &)
293
+ _iterate(enum, total) do |record|
294
+ if dry_run?
295
+ yield record
296
+ else
297
+ ActiveRecord::Base.transaction { yield record }
298
+ end
299
+ end
300
+ end
301
+
302
+ def _run_without_transaction(enum, total, &)
303
+ _iterate(enum, total, &)
304
+ end
305
+
306
+ def _iterate(enum, total)
307
+ bar = Internal::ProgressBar.create(total:, dry_run: dry_run?, enabled: _progress_enabled)
308
+ if enum.respond_to?(:find_each)
309
+ enum.find_each do |record|
310
+ _process_one(record) { yield record }
311
+ bar&.increment
312
+ sleep(_throttle_interval) if _throttle_interval
313
+ end
314
+ else
315
+ enum.each do |record|
316
+ _process_one(record) { yield record }
317
+ bar&.increment
318
+ sleep(_throttle_interval) if _throttle_interval
319
+ end
320
+ end
321
+ end
322
+
323
+ def _process_one(record)
324
+ @stats[:processed] += 1
325
+ yield
326
+ @stats[:succeeded] += 1
327
+ @_last_successful_id = record.id if record.respond_to?(:id)
328
+ rescue StandardError => e
329
+ @stats[:failed] += 1
330
+ identifier = Internal::RecordUtils.identifier(record)
331
+ @errors << { record: identifier, error: e.message, backtrace: e.backtrace&.first(3) }
332
+ log "ERROR #{identifier}: #{e.message}"
333
+
334
+ raise if _transaction_mode == :single
335
+ ensure
336
+ _maybe_print_interval_status
337
+ end
338
+
339
+ def _maybe_print_interval_status
340
+ interval = Internal::Env.status_interval_seconds
341
+ return unless interval&.positive?
342
+ return unless @start_time && (Time.current - @last_status_print) >= interval
343
+
344
+ @last_status_print = Time.current
345
+ _print_progress
346
+ end
347
+
348
+ # --- Output helpers ---
349
+
350
+ def _print_header(total)
351
+ Internal::Output.print_header(
352
+ io: $stdout,
353
+ shift_class: self.class,
354
+ total:,
355
+ label: @label,
356
+ dry_run: dry_run?,
357
+ transaction_mode: _transaction_mode,
358
+ status_interval: Internal::Env.status_interval_seconds,
359
+ )
360
+ end
361
+
362
+ def _handle_interrupt
363
+ @_data_shift_interrupted = true
364
+ log "\n\n*** Interrupted by user (Ctrl+C) ***"
365
+
366
+ # Print summary now since on_error may not fire for Interrupt (SignalException)
367
+ _print_summary
368
+
369
+ # Re-raise to trigger transaction rollback in the wrapping transaction block
370
+ raise Interrupt
371
+ end
372
+ end
373
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DataShifter
4
+ # Test helpers for RSpec. Include this module in your spec_helper or rails_helper:
5
+ #
6
+ # require "data_shifter/spec_helper"
7
+ #
8
+ # RSpec.configure do |config|
9
+ # config.include DataShifter::SpecHelper, type: :data_shift
10
+ # end
11
+ #
12
+ # Or include it in individual specs:
13
+ #
14
+ # RSpec.describe DataShifts::BackfillFoo do
15
+ # include DataShifter::SpecHelper
16
+ # ...
17
+ # end
18
+ #
19
+ module SpecHelper
20
+ # Run a data shift class with the given options.
21
+ # Returns the Axn::Result.
22
+ #
23
+ # @param shift_class [Class] the DataShifter::Shift subclass
24
+ # @param dry_run [Boolean] whether to run in dry_run mode (default: true)
25
+ # @param commit [Boolean] shorthand for dry_run: false (default: false)
26
+ # @return [Axn::Result]
27
+ #
28
+ # @example
29
+ # result = run_data_shift(DataShifts::BackfillFoo)
30
+ # expect(result).to be_ok
31
+ #
32
+ # @example with commit
33
+ # result = run_data_shift(DataShifts::BackfillFoo, commit: true)
34
+ # expect(record.reload.foo).to eq("bar")
35
+ #
36
+ def run_data_shift(shift_class, dry_run: true, commit: false)
37
+ effective_dry_run = commit ? false : dry_run
38
+ shift_class.call(dry_run: effective_dry_run)
39
+ end
40
+
41
+ # Suppress STDOUT output during a block (useful for cleaner test output).
42
+ #
43
+ # @example
44
+ # silence_data_shift_output do
45
+ # run_data_shift(DataShifts::BackfillFoo, commit: true)
46
+ # end
47
+ #
48
+ def silence_data_shift_output
49
+ original_stdout = $stdout
50
+ $stdout = StringIO.new
51
+ yield
52
+ ensure
53
+ $stdout = original_stdout
54
+ end
55
+
56
+ # Run a shift and capture its output.
57
+ # Returns [Axn::Result, String] tuple.
58
+ #
59
+ # @example
60
+ # result, output = capture_data_shift_output do
61
+ # run_data_shift(DataShifts::BackfillFoo)
62
+ # end
63
+ # expect(output).to include("DRY RUN")
64
+ #
65
+ def capture_data_shift_output
66
+ original_stdout = $stdout
67
+ $stdout = StringIO.new
68
+ result = yield
69
+ output = $stdout.string
70
+ [result, output]
71
+ ensure
72
+ $stdout = original_stdout
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DataShifter
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "data_shifter/version"
4
+ require_relative "data_shifter/shift"
5
+ require_relative "data_shifter/railtie"
@@ -0,0 +1,132 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Generator for data shifts.
4
+ #
5
+ # Usage:
6
+ # rails g data_shift backfill_users
7
+ # rails g data_shift backfill_users --model=User
8
+ #
9
+ class DataShiftGenerator < Rails::Generators::NamedBase
10
+ class_option :model,
11
+ type: :string,
12
+ default: nil,
13
+ desc: "Model to operate on (e.g. User). Pre-fills the collection method."
14
+
15
+ class_option :spec,
16
+ type: :boolean,
17
+ default: false,
18
+ desc: "Generate RSpec file"
19
+
20
+ def check_for_naming_conflict
21
+ underscored_name = name.underscore
22
+
23
+ # Use destination_root if available (for testing), otherwise Rails.root
24
+ root = respond_to?(:destination_root) ? Pathname.new(destination_root) : Rails.root
25
+ shifts_dir = root.join("lib/data_shifts")
26
+ return unless shifts_dir.exist?
27
+
28
+ # Look for any existing file that would create the same rake task name
29
+ conflicting_file = Dir.glob(shifts_dir.join("*_#{underscored_name}.rb")).first
30
+ return unless conflicting_file
31
+
32
+ raise Thor::Error, <<~ERROR
33
+ A data shift with task name '#{underscored_name}' already exists:
34
+ #{conflicting_file}
35
+
36
+ Rake task names must be unique. Please choose a different name.
37
+ ERROR
38
+ end
39
+
40
+ def create_shift_file
41
+ underscored_name = name.underscore
42
+ @timestamp = Time.current.strftime("%Y%m%d%H%M%S")
43
+ @class_name = underscored_name.camelize
44
+ model_name_raw = options[:model].to_s.strip
45
+ @model_name = model_name_raw.present? ? model_name_raw.underscore.singularize.camelize : nil
46
+
47
+ collection_body = if @model_name.present?
48
+ "#{@model_name}.all"
49
+ else
50
+ "# Model.where(foo: nil)"
51
+ end
52
+
53
+ record_arg = @model_name.present? ? @model_name.underscore : "record"
54
+
55
+ create_file "lib/data_shifts/#{@timestamp}_#{underscored_name}.rb", <<~RUBY
56
+ # frozen_string_literal: true
57
+
58
+ # rake data:shift:#{underscored_name} # Dry run (default)
59
+ # COMMIT=1 rake data:shift:#{underscored_name} # Apply changes
60
+
61
+ module DataShifts
62
+ class #{@class_name} < DataShifter::Shift
63
+ description "TODO: Describe this shift"
64
+
65
+ transaction true # or false or :per_record
66
+
67
+ def collection
68
+ #{collection_body}
69
+ end
70
+
71
+ def process_record(#{record_arg})
72
+ # #{record_arg}.update!(...)
73
+ end
74
+ end
75
+ end
76
+ RUBY
77
+ end
78
+
79
+ def create_spec_file
80
+ return unless options[:spec]
81
+ return unless rspec_enabled?
82
+
83
+ underscored_name = name.underscore
84
+ record_arg = @model_name.present? ? @model_name.underscore : "record"
85
+
86
+ create_file "spec/lib/data_shifts/#{underscored_name}_spec.rb", <<~RUBY
87
+ # frozen_string_literal: true
88
+
89
+ require "rails_helper"
90
+ require "data_shifter/spec_helper"
91
+
92
+ RSpec.describe DataShifts::#{@class_name} do
93
+ include DataShifter::SpecHelper
94
+
95
+ before { allow($stdout).to receive(:puts) }
96
+
97
+ # TODO: Set up test records
98
+ # let(:#{record_arg}) { create(:#{record_arg}) }
99
+
100
+ describe "dry run" do
101
+ it "does not persist changes" do
102
+ result = run_data_shift(described_class, dry_run: true)
103
+ expect(result).to be_ok
104
+ # TODO: Assert that records are unchanged
105
+ end
106
+ end
107
+
108
+ describe "commit" do
109
+ it "applies changes" do
110
+ result = run_data_shift(described_class, commit: true)
111
+ expect(result).to be_ok
112
+ # TODO: Assert that records are updated
113
+ end
114
+ end
115
+ end
116
+ RUBY
117
+ end
118
+
119
+ private
120
+
121
+ def rspec_enabled?
122
+ # Check if rspec-rails is available and configured as the test framework
123
+ return false unless defined?(Rails)
124
+
125
+ # Check Rails generator configuration
126
+ test_framework = Rails.configuration.generators.options.dig(:rails, :test_framework)
127
+ return test_framework == :rspec if test_framework
128
+
129
+ # Fall back to checking if rspec-rails is loaded
130
+ defined?(RSpec::Rails)
131
+ end
132
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RuboCop
4
+ module Cop
5
+ module DataShifter
6
+ # In data shift files, `transaction false` disables automatic transaction
7
+ # and rollback. DB writes (and side effects) are not rolled back on dry run, so
8
+ # the shift must guard them with `return if dry_run?` or `return unless dry_run?`.
9
+ #
10
+ # @example
11
+ # # bad
12
+ # class BackfillUsers < DataShifter::Shift
13
+ # transaction false
14
+ # def process_record(record)
15
+ # record.update!(foo: 1)
16
+ # end
17
+ # end
18
+ #
19
+ # # good
20
+ # class BackfillUsers < DataShifter::Shift
21
+ # transaction false
22
+ # def process_record(record)
23
+ # return if dry_run?
24
+ # record.update!(foo: 1)
25
+ # end
26
+ # end
27
+ class SkipTransactionGuardDryRun < Base
28
+ MSG = "Data shifts using `transaction false` must guard writes/side effects with " \
29
+ "`return if dry_run?` or `return unless dry_run?`."
30
+
31
+ def_node_matcher :skip_transaction_call?, <<~PATTERN
32
+ (send _ :transaction {(sym :none) (false)})
33
+ PATTERN
34
+
35
+ def on_send(node)
36
+ return unless skip_transaction_call?(node)
37
+ return if file_contains_dry_run_guard?
38
+
39
+ add_offense(node, message: MSG)
40
+ end
41
+
42
+ private
43
+
44
+ def file_contains_dry_run_guard?
45
+ return true unless processed_source.ast
46
+
47
+ processed_source.ast.each_node(:send) do |send_node|
48
+ return true if send_node.method?(:dry_run?)
49
+ end
50
+ false
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end