online_migrations 0.25.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +31 -0
  3. data/README.md +18 -73
  4. data/docs/0.27-upgrade.md +24 -0
  5. data/docs/background_data_migrations.md +200 -101
  6. data/docs/background_schema_migrations.md +2 -2
  7. data/docs/configuring.md +8 -0
  8. data/lib/generators/online_migrations/{background_migration_generator.rb → data_migration_generator.rb} +4 -4
  9. data/lib/generators/online_migrations/templates/add_sharding_to_online_migrations.rb.tt +1 -1
  10. data/lib/generators/online_migrations/templates/add_timestamps_to_background_migrations.rb.tt +1 -1
  11. data/lib/generators/online_migrations/templates/background_schema_migrations_change_unique_index.rb.tt +1 -1
  12. data/lib/generators/online_migrations/templates/change_background_data_migrations.rb.tt +34 -0
  13. data/lib/generators/online_migrations/templates/{background_data_migration.rb.tt → data_migration.rb.tt} +8 -9
  14. data/lib/generators/online_migrations/templates/initializer.rb.tt +22 -25
  15. data/lib/generators/online_migrations/templates/install_migration.rb.tt +9 -40
  16. data/lib/generators/online_migrations/upgrade_generator.rb +16 -8
  17. data/lib/online_migrations/active_record_batch_enumerator.rb +8 -0
  18. data/lib/online_migrations/background_data_migrations/backfill_column.rb +50 -0
  19. data/lib/online_migrations/background_data_migrations/config.rb +62 -0
  20. data/lib/online_migrations/{background_migrations → background_data_migrations}/copy_column.rb +15 -28
  21. data/lib/online_migrations/{background_migrations → background_data_migrations}/delete_associated_records.rb +9 -5
  22. data/lib/online_migrations/{background_migrations → background_data_migrations}/delete_orphaned_records.rb +5 -9
  23. data/lib/online_migrations/background_data_migrations/migration.rb +312 -0
  24. data/lib/online_migrations/{background_migrations → background_data_migrations}/migration_helpers.rb +72 -61
  25. data/lib/online_migrations/background_data_migrations/migration_job.rb +158 -0
  26. data/lib/online_migrations/background_data_migrations/migration_status_validator.rb +65 -0
  27. data/lib/online_migrations/{background_migrations → background_data_migrations}/perform_action_on_relation.rb +5 -5
  28. data/lib/online_migrations/{background_migrations → background_data_migrations}/reset_counters.rb +5 -5
  29. data/lib/online_migrations/background_data_migrations/scheduler.rb +78 -0
  30. data/lib/online_migrations/background_data_migrations/ticker.rb +62 -0
  31. data/lib/online_migrations/background_schema_migrations/config.rb +2 -2
  32. data/lib/online_migrations/background_schema_migrations/migration.rb +57 -127
  33. data/lib/online_migrations/background_schema_migrations/migration_helpers.rb +26 -47
  34. data/lib/online_migrations/background_schema_migrations/migration_runner.rb +43 -97
  35. data/lib/online_migrations/background_schema_migrations/scheduler.rb +2 -2
  36. data/lib/online_migrations/batch_iterator.rb +7 -4
  37. data/lib/online_migrations/change_column_type_helpers.rb +75 -14
  38. data/lib/online_migrations/command_checker.rb +32 -20
  39. data/lib/online_migrations/config.rb +12 -4
  40. data/lib/online_migrations/data_migration.rb +127 -0
  41. data/lib/online_migrations/error_messages.rb +16 -0
  42. data/lib/online_migrations/index_definition.rb +1 -1
  43. data/lib/online_migrations/lock_retrier.rb +5 -2
  44. data/lib/online_migrations/migration.rb +8 -1
  45. data/lib/online_migrations/schema_cache.rb +0 -78
  46. data/lib/online_migrations/schema_statements.rb +18 -74
  47. data/lib/online_migrations/shard_aware.rb +44 -0
  48. data/lib/online_migrations/utils.rb +1 -20
  49. data/lib/online_migrations/verbose_sql_logs.rb +1 -7
  50. data/lib/online_migrations/version.rb +1 -1
  51. data/lib/online_migrations.rb +19 -19
  52. metadata +25 -24
  53. data/lib/online_migrations/background_migration.rb +0 -64
  54. data/lib/online_migrations/background_migrations/backfill_column.rb +0 -54
  55. data/lib/online_migrations/background_migrations/background_migration_class_validator.rb +0 -29
  56. data/lib/online_migrations/background_migrations/config.rb +0 -74
  57. data/lib/online_migrations/background_migrations/migration.rb +0 -329
  58. data/lib/online_migrations/background_migrations/migration_job.rb +0 -109
  59. data/lib/online_migrations/background_migrations/migration_job_runner.rb +0 -66
  60. data/lib/online_migrations/background_migrations/migration_job_status_validator.rb +0 -29
  61. data/lib/online_migrations/background_migrations/migration_runner.rb +0 -161
  62. data/lib/online_migrations/background_migrations/migration_status_validator.rb +0 -48
  63. data/lib/online_migrations/background_migrations/scheduler.rb +0 -42
@@ -0,0 +1,158 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundDataMigrations
5
+ # Sidekiq job responsible for running background data migrations.
6
+ class MigrationJob
7
+ include Sidekiq::IterableJob
8
+
9
+ sidekiq_retry_in do |count, _exception, jobhash|
10
+ migration_id = jobhash["args"].fetch(0)
11
+ migration = Migration.find(migration_id)
12
+
13
+ if count + 1 >= migration.max_attempts
14
+ :kill
15
+ end
16
+ end
17
+
18
+ sidekiq_retries_exhausted do |jobhash, exception|
19
+ migration_id = jobhash["args"].fetch(0)
20
+ migration = Migration.find(migration_id)
21
+ migration.persist_error(exception)
22
+
23
+ OnlineMigrations.config.background_data_migrations.error_handler.call(exception, migration)
24
+ end
25
+
26
+ TICKER_INTERVAL = 5 # seconds
27
+
28
+ def initialize
29
+ super
30
+
31
+ @migration = nil
32
+ @data_migration = nil
33
+
34
+ @ticker = Ticker.new(TICKER_INTERVAL) do |ticks, duration|
35
+ # TODO: use 'cursor' accessor from sidekiq in the future.
36
+ # https://github.com/sidekiq/sidekiq/pull/6606
37
+ @migration.persist_progress(@_cursor, ticks, duration)
38
+ @migration.reload
39
+ end
40
+
41
+ @throttle_checked_at = current_time
42
+ end
43
+
44
+ def on_start
45
+ @migration.start
46
+ end
47
+
48
+ def around_iteration(&block)
49
+ @migration.on_shard_if_present(&block)
50
+ end
51
+
52
+ def on_resume
53
+ @data_migration.after_resume
54
+ end
55
+
56
+ def on_stop
57
+ @ticker.persist
58
+ @migration.stop
59
+ end
60
+
61
+ def on_complete
62
+ # Job was manually cancelled.
63
+ @migration.cancel if cancelled?
64
+
65
+ @migration.complete
66
+ end
67
+
68
+ def build_enumerator(migration_id, cursor:)
69
+ @migration = BackgroundDataMigrations::Migration.find(migration_id)
70
+ cursor ||= @migration.cursor
71
+
72
+ @migration.on_shard_if_present do
73
+ @data_migration = @migration.data_migration
74
+ collection_enum = @data_migration.build_enumerator(cursor: cursor)
75
+
76
+ if collection_enum
77
+ if !collection_enum.is_a?(Enumerator)
78
+ raise ArgumentError, <<~MSG.squish
79
+ #{@data_migration.class.name}#build_enumerator must return an Enumerator,
80
+ got #{collection_enum.class.name}.
81
+ MSG
82
+ end
83
+
84
+ collection_enum
85
+ else
86
+ collection = @data_migration.collection
87
+
88
+ case collection
89
+ when ActiveRecord::Relation
90
+ options = {
91
+ cursor: cursor,
92
+ batch_size: @data_migration.class.active_record_enumerator_batch_size || 100,
93
+ }
94
+ active_record_records_enumerator(collection, **options)
95
+ when ActiveRecord::Batches::BatchEnumerator
96
+ if collection.start || collection.finish
97
+ raise ArgumentError, <<~MSG.squish
98
+ #{@data_migration.class.name}#collection does not support
99
+ a batch enumerator with the "start" or "finish" options.
100
+ MSG
101
+ end
102
+
103
+ active_record_relations_enumerator(
104
+ collection.relation,
105
+ batch_size: collection.batch_size,
106
+ cursor: cursor,
107
+ use_ranges: collection.use_ranges
108
+ )
109
+ when Array
110
+ array_enumerator(collection, cursor: cursor)
111
+ else
112
+ raise ArgumentError, <<~MSG.squish
113
+ #{@data_migration.class.name}#collection must be either an ActiveRecord::Relation,
114
+ ActiveRecord::Batches::BatchEnumerator, or Array.
115
+ MSG
116
+ end
117
+ end
118
+ end
119
+ end
120
+
121
+ def each_iteration(item, _migration_id)
122
+ if @migration.cancelling? || @migration.pausing? || @migration.paused?
123
+ # Finish this exact sidekiq job. When the migration is paused
124
+ # and will be resumed, a new job will be enqueued.
125
+ finished = true
126
+ throw :abort, finished
127
+ elsif should_throttle?
128
+ ActiveSupport::Notifications.instrument("throttled.background_data_migrations", migration: @migration)
129
+ finished = false
130
+ throw :abort, finished
131
+ else
132
+ @data_migration.around_process do
133
+ @migration.data_migration.process(item)
134
+
135
+ pause = OnlineMigrations.config.background_data_migrations.iteration_pause
136
+ sleep(pause) if pause > 0
137
+ end
138
+ @ticker.tick
139
+ end
140
+ end
141
+
142
+ private
143
+ THROTTLE_CHECK_INTERVAL = 5 # seconds
144
+ private_constant :THROTTLE_CHECK_INTERVAL
145
+
146
+ def should_throttle?
147
+ if current_time - @throttle_checked_at >= THROTTLE_CHECK_INTERVAL
148
+ @throttle_checked_at = current_time
149
+ OnlineMigrations.config.throttler.call
150
+ end
151
+ end
152
+
153
+ def current_time
154
+ ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundDataMigrations
5
+ # @private
6
+ class MigrationStatusValidator < ActiveModel::Validator
7
+ # Valid status transitions a Migration can make.
8
+ VALID_STATUS_TRANSITIONS = {
9
+ # enqueued -> running occurs when the migration starts performing.
10
+ # enqueued -> paused occurs when the migration is paused before starting.
11
+ # enqueued -> cancelled occurs when the migration is cancelled before starting.
12
+ # enqueued -> failed occurs when the migration job fails to be enqueued, or
13
+ # if the migration is deleted before is starts running.
14
+ "enqueued" => ["running", "paused", "cancelled", "failed"],
15
+ # running -> succeeded occurs when the migration completes successfully.
16
+ # running -> pausing occurs when a user pauses the migration as it's performing.
17
+ # running -> cancelling occurs when a user cancels the migration as it's performing.
18
+ # running -> failed occurs when the job raises an exception when running.
19
+ "running" => [
20
+ "succeeded",
21
+ "pausing",
22
+ "cancelling",
23
+ "failed",
24
+ ],
25
+ # pausing -> paused occurs when the migration actually halts performing and
26
+ # occupies a status of paused.
27
+ # pausing -> cancelling occurs when the user cancels a migration immediately
28
+ # after it was paused, such that the migration had not actually halted yet.
29
+ # pausing -> succeeded occurs when the migration completes immediately after
30
+ # being paused. This can happen if the migration is on its last iteration
31
+ # when it is paused, or if the migration is paused after enqueue but has
32
+ # nothing in its collection to process.
33
+ # pausing -> failed occurs when the job raises an exception after the
34
+ # user has paused it.
35
+ "pausing" => ["paused", "cancelling", "succeeded", "failed"],
36
+ # paused -> enqueued occurs when the migration is resumed after being paused.
37
+ # paused -> cancelled when the user cancels the migration after it is paused.
38
+ "paused" => ["enqueued", "cancelled"],
39
+ # failed -> enqueued occurs when the migration is retried after encounting an error.
40
+ "failed" => ["enqueued"],
41
+ # cancelling -> cancelled occurs when the migration actually halts performing
42
+ # and occupies a status of cancelled.
43
+ # cancelling -> succeeded occurs when the migration completes immediately after
44
+ # being cancelled. See description for pausing -> succeeded.
45
+ # cancelling -> failed occurs when the job raises an exception after the
46
+ # user has cancelled it.
47
+ "cancelling" => ["cancelled", "succeeded", "failed"],
48
+ }
49
+
50
+ def validate(record)
51
+ return if !record.status_changed?
52
+
53
+ previous_status, new_status = record.status_change
54
+ valid_new_statuses = VALID_STATUS_TRANSITIONS.fetch(previous_status, [])
55
+
56
+ if !valid_new_statuses.include?(new_status)
57
+ record.errors.add(
58
+ :status,
59
+ "cannot transition data migration from status '#{previous_status}' to '#{new_status}'"
60
+ )
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OnlineMigrations
4
- module BackgroundMigrations
4
+ module BackgroundDataMigrations
5
5
  # @private
6
- class PerformActionOnRelation < BackgroundMigration
6
+ class PerformActionOnRelation < DataMigration
7
7
  attr_reader :model, :conditions, :action, :options
8
8
 
9
9
  def initialize(model_name, conditions, action, options = {})
@@ -13,11 +13,11 @@ module OnlineMigrations
13
13
  @options = options.symbolize_keys
14
14
  end
15
15
 
16
- def relation
17
- model.unscoped.where(conditions)
16
+ def collection
17
+ model.unscoped.where(conditions).in_batches(of: 100)
18
18
  end
19
19
 
20
- def process_batch(relation)
20
+ def process(relation)
21
21
  case action
22
22
  when :update_all
23
23
  updates = options.fetch(:updates)
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OnlineMigrations
4
- module BackgroundMigrations
4
+ module BackgroundDataMigrations
5
5
  # @private
6
- class ResetCounters < BackgroundMigration
6
+ class ResetCounters < DataMigration
7
7
  attr_reader :model, :counters, :touch
8
8
 
9
9
  def initialize(model_name, counters, options = {})
@@ -12,11 +12,11 @@ module OnlineMigrations
12
12
  @touch = options[:touch]
13
13
  end
14
14
 
15
- def relation
16
- model.unscoped
15
+ def collection
16
+ model.unscoped.in_batches(of: 100)
17
17
  end
18
18
 
19
- def process_batch(relation)
19
+ def process(relation)
20
20
  updates = counters.map do |counter_association|
21
21
  has_many_association = has_many_association(counter_association)
22
22
 
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundDataMigrations
5
+ # Class responsible for scheduling background data migrations.
6
+ #
7
+ # Scheduler should be configured to run periodically, for example, via cron.
8
+ #
9
+ # @example Run via whenever
10
+ # # add this to schedule.rb
11
+ # every 1.minute do
12
+ # runner "OnlineMigrations.run_background_data_migrations"
13
+ # end
14
+ #
15
+ # @example Specific shard
16
+ # every 1.minute do
17
+ # runner "OnlineMigrations.run_background_data_migrations(shard: :shard_two)"
18
+ # end
19
+ #
20
+ # @example Custom concurrency
21
+ # every 1.minute do
22
+ # # Allow to run 2 data migrations in parallel.
23
+ # runner "OnlineMigrations.run_background_data_migrations(concurrency: 2)"
24
+ # end
25
+ #
26
+ class Scheduler
27
+ def self.run(**options)
28
+ new.run(**options)
29
+ end
30
+
31
+ # Runs Scheduler
32
+ def run(shard: nil, concurrency: 1)
33
+ relation = Migration.queue_order
34
+ relation = relation.where(shard: shard) if shard
35
+
36
+ with_lock do
37
+ running = relation.running
38
+ enqueued = relation.enqueued
39
+
40
+ # Ensure no more than 'concurrency' migrations are running at the same time.
41
+ remaining_to_enqueue = concurrency - running.count
42
+ if remaining_to_enqueue > 0
43
+ migrations_to_enqueue = enqueued.limit(remaining_to_enqueue)
44
+ migrations_to_enqueue.each do |migration|
45
+ enqueue_migration(migration)
46
+ end
47
+ end
48
+ end
49
+
50
+ true
51
+ end
52
+
53
+ private
54
+ def with_lock(&block)
55
+ # Don't lock the whole table if we can lock only a single record (must be always the same).
56
+ first_record = Migration.queue_order.first
57
+ if first_record
58
+ first_record.with_lock(&block)
59
+ else
60
+ Migration.transaction do
61
+ Migration.connection.execute("LOCK #{Migration.table_name} IN ACCESS EXCLUSIVE MODE")
62
+ yield
63
+ end
64
+ end
65
+ end
66
+
67
+ def enqueue_migration(migration)
68
+ job = OnlineMigrations.config.background_data_migrations.job
69
+ job_class = job.constantize
70
+
71
+ jid = job_class.perform_async(migration.id)
72
+ if jid
73
+ migration.update!(status: :running, jid: jid)
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundDataMigrations
5
+ # This class encapsulates the logic behind updating the tick counter.
6
+ #
7
+ # It's initialized with a duration for the throttle, and a block to persist
8
+ # the number of ticks to increment.
9
+ #
10
+ # When +tick+ is called, the block will be called with the increment,
11
+ # provided the duration since the last update (or initialization) has been
12
+ # long enough.
13
+ #
14
+ # To not lose any increments, +persist+ should be used, which may call the
15
+ # block with any leftover ticks.
16
+ #
17
+ # @private
18
+ class Ticker
19
+ # Creates a Ticker that will call the block each time +tick+ is called,
20
+ # unless the tick is being throttled.
21
+ #
22
+ # @param interval [ActiveSupport::Duration, Numeric] Duration
23
+ # since initialization or last call that will cause a throttle.
24
+ # @yieldparam ticks [Integer] the increment in ticks to be persisted.
25
+ #
26
+ def initialize(interval, &block)
27
+ @interval = interval
28
+ @block = block
29
+ @last_persisted_at = Time.current
30
+ @ticks_recorded = 0
31
+ end
32
+
33
+ # Increments the tick count by one, and may persist the new value if the
34
+ # threshold duration has passed since initialization or the tick count was
35
+ # last persisted.
36
+ #
37
+ def tick
38
+ @ticks_recorded += 1
39
+ persist if persist?
40
+ end
41
+
42
+ # Persists the tick increments by calling the block passed to the
43
+ # initializer. This is idempotent in the sense that calling it twice in a
44
+ # row will call the block at most once (if it had been throttled).
45
+ #
46
+ def persist
47
+ return if @ticks_recorded == 0
48
+
49
+ now = Time.current
50
+ duration = now - @last_persisted_at
51
+ @last_persisted_at = now
52
+ @block.call(@ticks_recorded, duration)
53
+ @ticks_recorded = 0
54
+ end
55
+
56
+ private
57
+ def persist?
58
+ Time.now - @last_persisted_at >= @interval
59
+ end
60
+ end
61
+ end
62
+ end
@@ -4,9 +4,9 @@ module OnlineMigrations
4
4
  module BackgroundSchemaMigrations
5
5
  # Class representing configuration options for background schema migrations.
6
6
  class Config
7
- # Maximum number of run attempts
7
+ # Maximum number of run attempts.
8
8
  #
9
- # When attempts are exhausted, the migration is marked as failed.
9
+ # When attempts are exhausted, the schema migration is marked as failed.
10
10
  # @return [Integer] defaults to 5
11
11
  #
12
12
  attr_accessor :max_attempts