online_migrations 0.26.0 → 0.27.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/docs/0.27-upgrade.md +24 -0
  4. data/docs/background_data_migrations.md +200 -101
  5. data/docs/background_schema_migrations.md +2 -2
  6. data/lib/generators/online_migrations/{background_migration_generator.rb → data_migration_generator.rb} +4 -4
  7. data/lib/generators/online_migrations/templates/change_background_data_migrations.rb.tt +34 -0
  8. data/lib/generators/online_migrations/templates/{background_data_migration.rb.tt → data_migration.rb.tt} +8 -9
  9. data/lib/generators/online_migrations/templates/initializer.rb.tt +19 -25
  10. data/lib/generators/online_migrations/templates/install_migration.rb.tt +9 -40
  11. data/lib/generators/online_migrations/upgrade_generator.rb +16 -8
  12. data/lib/online_migrations/active_record_batch_enumerator.rb +8 -0
  13. data/lib/online_migrations/background_data_migrations/backfill_column.rb +50 -0
  14. data/lib/online_migrations/background_data_migrations/config.rb +62 -0
  15. data/lib/online_migrations/{background_migrations → background_data_migrations}/copy_column.rb +15 -28
  16. data/lib/online_migrations/{background_migrations → background_data_migrations}/delete_associated_records.rb +9 -5
  17. data/lib/online_migrations/{background_migrations → background_data_migrations}/delete_orphaned_records.rb +5 -9
  18. data/lib/online_migrations/background_data_migrations/migration.rb +312 -0
  19. data/lib/online_migrations/{background_migrations → background_data_migrations}/migration_helpers.rb +72 -61
  20. data/lib/online_migrations/background_data_migrations/migration_job.rb +160 -0
  21. data/lib/online_migrations/background_data_migrations/migration_status_validator.rb +65 -0
  22. data/lib/online_migrations/{background_migrations → background_data_migrations}/perform_action_on_relation.rb +5 -5
  23. data/lib/online_migrations/{background_migrations → background_data_migrations}/reset_counters.rb +5 -5
  24. data/lib/online_migrations/background_data_migrations/scheduler.rb +78 -0
  25. data/lib/online_migrations/background_data_migrations/ticker.rb +62 -0
  26. data/lib/online_migrations/background_schema_migrations/config.rb +2 -2
  27. data/lib/online_migrations/background_schema_migrations/migration.rb +51 -123
  28. data/lib/online_migrations/background_schema_migrations/migration_helpers.rb +25 -46
  29. data/lib/online_migrations/background_schema_migrations/migration_runner.rb +43 -97
  30. data/lib/online_migrations/background_schema_migrations/scheduler.rb +2 -2
  31. data/lib/online_migrations/change_column_type_helpers.rb +17 -4
  32. data/lib/online_migrations/config.rb +4 -4
  33. data/lib/online_migrations/data_migration.rb +127 -0
  34. data/lib/online_migrations/error_messages.rb +2 -0
  35. data/lib/online_migrations/lock_retrier.rb +5 -2
  36. data/lib/online_migrations/schema_statements.rb +1 -1
  37. data/lib/online_migrations/shard_aware.rb +44 -0
  38. data/lib/online_migrations/version.rb +1 -1
  39. data/lib/online_migrations.rb +18 -11
  40. metadata +22 -21
  41. data/lib/online_migrations/background_migration.rb +0 -64
  42. data/lib/online_migrations/background_migrations/backfill_column.rb +0 -54
  43. data/lib/online_migrations/background_migrations/background_migration_class_validator.rb +0 -29
  44. data/lib/online_migrations/background_migrations/config.rb +0 -74
  45. data/lib/online_migrations/background_migrations/migration.rb +0 -329
  46. data/lib/online_migrations/background_migrations/migration_job.rb +0 -109
  47. data/lib/online_migrations/background_migrations/migration_job_runner.rb +0 -66
  48. data/lib/online_migrations/background_migrations/migration_job_status_validator.rb +0 -29
  49. data/lib/online_migrations/background_migrations/migration_runner.rb +0 -161
  50. data/lib/online_migrations/background_migrations/migration_status_validator.rb +0 -48
  51. data/lib/online_migrations/background_migrations/scheduler.rb +0 -42
@@ -0,0 +1,160 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundDataMigrations
5
+ # Sidekiq job responsible for running background data migrations.
6
+ class MigrationJob
7
+ include Sidekiq::IterableJob
8
+
9
+ sidekiq_retry_in do |count, _exception, jobhash|
10
+ migration_id = jobhash["args"].fetch(0)
11
+ migration = Migration.find(migration_id)
12
+
13
+ if count + 1 >= migration.max_attempts
14
+ :kill
15
+ end
16
+ end
17
+
18
+ sidekiq_retries_exhausted do |jobhash, exception|
19
+ migration_id = jobhash["args"].fetch(0)
20
+ migration = Migration.find(migration_id)
21
+ migration.persist_error(exception)
22
+
23
+ OnlineMigrations.config.background_data_migrations.error_handler.call(exception, migration)
24
+ end
25
+
26
+ TICKER_INTERVAL = 5 # seconds
27
+
28
+ def initialize
29
+ super
30
+
31
+ @migration = nil
32
+ @data_migration = nil
33
+
34
+ @ticker = Ticker.new(TICKER_INTERVAL) do |ticks, duration|
35
+ # TODO: use 'cursor' accessor from sidekiq in the future.
36
+ # https://github.com/sidekiq/sidekiq/pull/6606
37
+ @migration.persist_progress(@_cursor, ticks, duration)
38
+ @migration.reload
39
+ end
40
+
41
+ @throttle_checked_at = current_time
42
+ end
43
+
44
+ def on_start
45
+ @migration.start
46
+ end
47
+
48
+ def on_resume
49
+ @data_migration.after_resume
50
+ end
51
+
52
+ def on_stop
53
+ @ticker.persist
54
+ @migration.stop
55
+ end
56
+
57
+ def on_complete
58
+ # Job was manually cancelled.
59
+ @migration.cancel if cancelled?
60
+
61
+ @migration.complete
62
+ end
63
+
64
+ def build_enumerator(migration_id, cursor:)
65
+ @migration = BackgroundDataMigrations::Migration.find(migration_id)
66
+ cursor ||= @migration.cursor
67
+
68
+ @migration.on_shard_if_present do
69
+ @data_migration = @migration.data_migration
70
+ collection_enum = @data_migration.build_enumerator(cursor: cursor)
71
+
72
+ if collection_enum
73
+ if !collection_enum.is_a?(Enumerator)
74
+ raise ArgumentError, <<~MSG.squish
75
+ #{@data_migration.class.name}#build_enumerator must return an Enumerator,
76
+ got #{collection_enum.class.name}.
77
+ MSG
78
+ end
79
+
80
+ collection_enum
81
+ else
82
+ collection = @data_migration.collection
83
+
84
+ case collection
85
+ when ActiveRecord::Relation
86
+ options = {
87
+ cursor: cursor,
88
+ batch_size: @data_migration.class.active_record_enumerator_batch_size || 100,
89
+ }
90
+ active_record_records_enumerator(collection, **options)
91
+ when ActiveRecord::Batches::BatchEnumerator
92
+ if collection.start || collection.finish
93
+ raise ArgumentError, <<~MSG.squish
94
+ #{@data_migration.class.name}#collection does not support
95
+ a batch enumerator with the "start" or "finish" options.
96
+ MSG
97
+ end
98
+
99
+ active_record_relations_enumerator(
100
+ collection.relation,
101
+ batch_size: collection.batch_size,
102
+ cursor: cursor,
103
+ use_ranges: collection.use_ranges
104
+ )
105
+ when Array
106
+ array_enumerator(collection, cursor: cursor)
107
+ else
108
+ raise ArgumentError, <<~MSG.squish
109
+ #{@data_migration.class.name}#collection must be either an ActiveRecord::Relation,
110
+ ActiveRecord::Batches::BatchEnumerator, or Array.
111
+ MSG
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ def each_iteration(item, _migration_id)
118
+ if @migration.cancelling? || @migration.pausing? || @migration.paused?
119
+ # Finish this exact sidekiq job. When the migration is paused
120
+ # and will be resumed, a new job will be enqueued.
121
+ finished = true
122
+ throw :abort, finished
123
+ elsif should_throttle?
124
+ ActiveSupport::Notifications.instrument("throttled.background_data_migrations", migration: @migration)
125
+ finished = false
126
+ throw :abort, finished
127
+ else
128
+ @data_migration.around_process do
129
+ @migration.data_migration.process(item)
130
+
131
+ pause = OnlineMigrations.config.background_data_migrations.iteration_pause
132
+ sleep(pause) if pause > 0
133
+ end
134
+ @ticker.tick
135
+ end
136
+ end
137
+
138
+ private
139
+ # It would be better for sidekiq to have a callback like `around_perform`,
140
+ # but currently this is the way to make job iteration shard aware.
141
+ def iterate_with_enumerator(enumerator, arguments)
142
+ @migration.on_shard_if_present { super }
143
+ end
144
+
145
+ THROTTLE_CHECK_INTERVAL = 5 # seconds
146
+ private_constant :THROTTLE_CHECK_INTERVAL
147
+
148
+ def should_throttle?
149
+ if current_time - @throttle_checked_at >= THROTTLE_CHECK_INTERVAL
150
+ @throttle_checked_at = current_time
151
+ OnlineMigrations.config.throttler.call
152
+ end
153
+ end
154
+
155
+ def current_time
156
+ ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundDataMigrations
5
+ # @private
6
+ class MigrationStatusValidator < ActiveModel::Validator
7
+ # Valid status transitions a Migration can make.
8
+ VALID_STATUS_TRANSITIONS = {
9
+ # enqueued -> running occurs when the migration starts performing.
10
+ # enqueued -> paused occurs when the migration is paused before starting.
11
+ # enqueued -> cancelled occurs when the migration is cancelled before starting.
12
+ # enqueued -> failed occurs when the migration job fails to be enqueued, or
13
+ # if the migration is deleted before is starts running.
14
+ "enqueued" => ["running", "paused", "cancelled", "failed"],
15
+ # running -> succeeded occurs when the migration completes successfully.
16
+ # running -> pausing occurs when a user pauses the migration as it's performing.
17
+ # running -> cancelling occurs when a user cancels the migration as it's performing.
18
+ # running -> failed occurs when the job raises an exception when running.
19
+ "running" => [
20
+ "succeeded",
21
+ "pausing",
22
+ "cancelling",
23
+ "failed",
24
+ ],
25
+ # pausing -> paused occurs when the migration actually halts performing and
26
+ # occupies a status of paused.
27
+ # pausing -> cancelling occurs when the user cancels a migration immediately
28
+ # after it was paused, such that the migration had not actually halted yet.
29
+ # pausing -> succeeded occurs when the migration completes immediately after
30
+ # being paused. This can happen if the migration is on its last iteration
31
+ # when it is paused, or if the migration is paused after enqueue but has
32
+ # nothing in its collection to process.
33
+ # pausing -> failed occurs when the job raises an exception after the
34
+ # user has paused it.
35
+ "pausing" => ["paused", "cancelling", "succeeded", "failed"],
36
+ # paused -> enqueued occurs when the migration is resumed after being paused.
37
+ # paused -> cancelled when the user cancels the migration after it is paused.
38
+ "paused" => ["enqueued", "cancelled"],
39
+ # failed -> enqueued occurs when the migration is retried after encounting an error.
40
+ "failed" => ["enqueued"],
41
+ # cancelling -> cancelled occurs when the migration actually halts performing
42
+ # and occupies a status of cancelled.
43
+ # cancelling -> succeeded occurs when the migration completes immediately after
44
+ # being cancelled. See description for pausing -> succeeded.
45
+ # cancelling -> failed occurs when the job raises an exception after the
46
+ # user has cancelled it.
47
+ "cancelling" => ["cancelled", "succeeded", "failed"],
48
+ }
49
+
50
+ def validate(record)
51
+ return if !record.status_changed?
52
+
53
+ previous_status, new_status = record.status_change
54
+ valid_new_statuses = VALID_STATUS_TRANSITIONS.fetch(previous_status, [])
55
+
56
+ if !valid_new_statuses.include?(new_status)
57
+ record.errors.add(
58
+ :status,
59
+ "cannot transition data migration from status '#{previous_status}' to '#{new_status}'"
60
+ )
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OnlineMigrations
4
- module BackgroundMigrations
4
+ module BackgroundDataMigrations
5
5
  # @private
6
- class PerformActionOnRelation < BackgroundMigration
6
+ class PerformActionOnRelation < DataMigration
7
7
  attr_reader :model, :conditions, :action, :options
8
8
 
9
9
  def initialize(model_name, conditions, action, options = {})
@@ -13,11 +13,11 @@ module OnlineMigrations
13
13
  @options = options.symbolize_keys
14
14
  end
15
15
 
16
- def relation
17
- model.unscoped.where(conditions)
16
+ def collection
17
+ model.unscoped.where(conditions).in_batches(of: 100)
18
18
  end
19
19
 
20
- def process_batch(relation)
20
+ def process(relation)
21
21
  case action
22
22
  when :update_all
23
23
  updates = options.fetch(:updates)
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OnlineMigrations
4
- module BackgroundMigrations
4
+ module BackgroundDataMigrations
5
5
  # @private
6
- class ResetCounters < BackgroundMigration
6
+ class ResetCounters < DataMigration
7
7
  attr_reader :model, :counters, :touch
8
8
 
9
9
  def initialize(model_name, counters, options = {})
@@ -12,11 +12,11 @@ module OnlineMigrations
12
12
  @touch = options[:touch]
13
13
  end
14
14
 
15
- def relation
16
- model.unscoped
15
+ def collection
16
+ model.unscoped.in_batches(of: 100)
17
17
  end
18
18
 
19
- def process_batch(relation)
19
+ def process(relation)
20
20
  updates = counters.map do |counter_association|
21
21
  has_many_association = has_many_association(counter_association)
22
22
 
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundDataMigrations
5
+ # Class responsible for scheduling background data migrations.
6
+ #
7
+ # Scheduler should be configured to run periodically, for example, via cron.
8
+ #
9
+ # @example Run via whenever
10
+ # # add this to schedule.rb
11
+ # every 1.minute do
12
+ # runner "OnlineMigrations.run_background_data_migrations"
13
+ # end
14
+ #
15
+ # @example Specific shard
16
+ # every 1.minute do
17
+ # runner "OnlineMigrations.run_background_data_migrations(shard: :shard_two)"
18
+ # end
19
+ #
20
+ # @example Custom concurrency
21
+ # every 1.minute do
22
+ # # Allow to run 2 data migrations in parallel.
23
+ # runner "OnlineMigrations.run_background_data_migrations(concurrency: 2)"
24
+ # end
25
+ #
26
+ class Scheduler
27
+ def self.run(**options)
28
+ new.run(**options)
29
+ end
30
+
31
+ # Runs Scheduler
32
+ def run(shard: nil, concurrency: 1)
33
+ relation = Migration.queue_order
34
+ relation = relation.where(shard: shard) if shard
35
+
36
+ with_lock do
37
+ running = relation.running
38
+ enqueued = relation.enqueued
39
+
40
+ # Ensure no more than 'concurrency' migrations are running at the same time.
41
+ remaining_to_enqueue = concurrency - running.count
42
+ if remaining_to_enqueue > 0
43
+ migrations_to_enqueue = enqueued.limit(remaining_to_enqueue)
44
+ migrations_to_enqueue.each do |migration|
45
+ enqueue_migration(migration)
46
+ end
47
+ end
48
+ end
49
+
50
+ true
51
+ end
52
+
53
+ private
54
+ def with_lock(&block)
55
+ # Don't lock the whole table if we can lock only a single record (must be always the same).
56
+ first_record = Migration.queue_order.first
57
+ if first_record
58
+ first_record.with_lock(&block)
59
+ else
60
+ Migration.transaction do
61
+ Migration.connection.execute("LOCK #{Migration.table_name} IN ACCESS EXCLUSIVE MODE")
62
+ yield
63
+ end
64
+ end
65
+ end
66
+
67
+ def enqueue_migration(migration)
68
+ job = OnlineMigrations.config.background_data_migrations.job
69
+ job_class = job.constantize
70
+
71
+ jid = job_class.perform_async(migration.id)
72
+ if jid
73
+ migration.update!(status: :running, jid: jid)
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundDataMigrations
5
+ # This class encapsulates the logic behind updating the tick counter.
6
+ #
7
+ # It's initialized with a duration for the throttle, and a block to persist
8
+ # the number of ticks to increment.
9
+ #
10
+ # When +tick+ is called, the block will be called with the increment,
11
+ # provided the duration since the last update (or initialization) has been
12
+ # long enough.
13
+ #
14
+ # To not lose any increments, +persist+ should be used, which may call the
15
+ # block with any leftover ticks.
16
+ #
17
+ # @private
18
+ class Ticker
19
+ # Creates a Ticker that will call the block each time +tick+ is called,
20
+ # unless the tick is being throttled.
21
+ #
22
+ # @param interval [ActiveSupport::Duration, Numeric] Duration
23
+ # since initialization or last call that will cause a throttle.
24
+ # @yieldparam ticks [Integer] the increment in ticks to be persisted.
25
+ #
26
+ def initialize(interval, &block)
27
+ @interval = interval
28
+ @block = block
29
+ @last_persisted_at = Time.current
30
+ @ticks_recorded = 0
31
+ end
32
+
33
+ # Increments the tick count by one, and may persist the new value if the
34
+ # threshold duration has passed since initialization or the tick count was
35
+ # last persisted.
36
+ #
37
+ def tick
38
+ @ticks_recorded += 1
39
+ persist if persist?
40
+ end
41
+
42
+ # Persists the tick increments by calling the block passed to the
43
+ # initializer. This is idempotent in the sense that calling it twice in a
44
+ # row will call the block at most once (if it had been throttled).
45
+ #
46
+ def persist
47
+ return if @ticks_recorded == 0
48
+
49
+ now = Time.current
50
+ duration = now - @last_persisted_at
51
+ @last_persisted_at = now
52
+ @block.call(@ticks_recorded, duration)
53
+ @ticks_recorded = 0
54
+ end
55
+
56
+ private
57
+ def persist?
58
+ Time.now - @last_persisted_at >= @interval
59
+ end
60
+ end
61
+ end
62
+ end
@@ -4,9 +4,9 @@ module OnlineMigrations
4
4
  module BackgroundSchemaMigrations
5
5
  # Class representing configuration options for background schema migrations.
6
6
  class Config
7
- # Maximum number of run attempts
7
+ # Maximum number of run attempts.
8
8
  #
9
- # When attempts are exhausted, the migration is marked as failed.
9
+ # When attempts are exhausted, the schema migration is marked as failed.
10
10
  # @return [Integer] defaults to 5
11
11
  #
12
12
  attr_accessor :max_attempts