online_migrations 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/test.yml +112 -0
  3. data/.gitignore +10 -0
  4. data/.rubocop.yml +113 -0
  5. data/.yardopts +1 -0
  6. data/BACKGROUND_MIGRATIONS.md +288 -0
  7. data/CHANGELOG.md +5 -0
  8. data/Gemfile +27 -0
  9. data/Gemfile.lock +108 -0
  10. data/LICENSE.txt +21 -0
  11. data/README.md +1067 -0
  12. data/Rakefile +23 -0
  13. data/gemfiles/activerecord_42.gemfile +6 -0
  14. data/gemfiles/activerecord_50.gemfile +5 -0
  15. data/gemfiles/activerecord_51.gemfile +5 -0
  16. data/gemfiles/activerecord_52.gemfile +5 -0
  17. data/gemfiles/activerecord_60.gemfile +5 -0
  18. data/gemfiles/activerecord_61.gemfile +5 -0
  19. data/gemfiles/activerecord_70.gemfile +5 -0
  20. data/gemfiles/activerecord_head.gemfile +5 -0
  21. data/lib/generators/online_migrations/background_migration_generator.rb +29 -0
  22. data/lib/generators/online_migrations/install_generator.rb +34 -0
  23. data/lib/generators/online_migrations/templates/background_migration.rb.tt +22 -0
  24. data/lib/generators/online_migrations/templates/initializer.rb.tt +94 -0
  25. data/lib/generators/online_migrations/templates/migration.rb.tt +46 -0
  26. data/lib/online_migrations/background_migration.rb +64 -0
  27. data/lib/online_migrations/background_migrations/advisory_lock.rb +62 -0
  28. data/lib/online_migrations/background_migrations/backfill_column.rb +52 -0
  29. data/lib/online_migrations/background_migrations/background_migration_class_validator.rb +36 -0
  30. data/lib/online_migrations/background_migrations/config.rb +98 -0
  31. data/lib/online_migrations/background_migrations/copy_column.rb +90 -0
  32. data/lib/online_migrations/background_migrations/migration.rb +210 -0
  33. data/lib/online_migrations/background_migrations/migration_helpers.rb +238 -0
  34. data/lib/online_migrations/background_migrations/migration_job.rb +92 -0
  35. data/lib/online_migrations/background_migrations/migration_job_runner.rb +63 -0
  36. data/lib/online_migrations/background_migrations/migration_job_status_validator.rb +27 -0
  37. data/lib/online_migrations/background_migrations/migration_runner.rb +97 -0
  38. data/lib/online_migrations/background_migrations/migration_status_validator.rb +45 -0
  39. data/lib/online_migrations/background_migrations/scheduler.rb +49 -0
  40. data/lib/online_migrations/batch_iterator.rb +87 -0
  41. data/lib/online_migrations/change_column_type_helpers.rb +587 -0
  42. data/lib/online_migrations/command_checker.rb +590 -0
  43. data/lib/online_migrations/command_recorder.rb +137 -0
  44. data/lib/online_migrations/config.rb +198 -0
  45. data/lib/online_migrations/copy_trigger.rb +91 -0
  46. data/lib/online_migrations/database_tasks.rb +19 -0
  47. data/lib/online_migrations/error_messages.rb +388 -0
  48. data/lib/online_migrations/foreign_key_definition.rb +17 -0
  49. data/lib/online_migrations/foreign_keys_collector.rb +33 -0
  50. data/lib/online_migrations/indexes_collector.rb +48 -0
  51. data/lib/online_migrations/lock_retrier.rb +250 -0
  52. data/lib/online_migrations/migration.rb +63 -0
  53. data/lib/online_migrations/migrator.rb +23 -0
  54. data/lib/online_migrations/schema_cache.rb +96 -0
  55. data/lib/online_migrations/schema_statements.rb +1042 -0
  56. data/lib/online_migrations/utils.rb +140 -0
  57. data/lib/online_migrations/version.rb +5 -0
  58. data/lib/online_migrations.rb +74 -0
  59. data/online_migrations.gemspec +28 -0
  60. metadata +119 -0
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundMigrations
5
+ # @private
6
+ class CopyColumn < BackgroundMigration
7
+ attr_reader :table_name, :copy_from, :copy_to, :model_name, :type_cast_functions
8
+
9
+ def initialize(table_name, copy_from, copy_to, model_name = nil, type_cast_functions = {})
10
+ @table_name = table_name
11
+
12
+ if copy_from.is_a?(Array) && type_cast_functions && !type_cast_functions.is_a?(Hash)
13
+ raise ArgumentError, "type_cast_functions must be a Hash"
14
+ end
15
+
16
+ @copy_from = Array.wrap(copy_from)
17
+ @copy_to = Array.wrap(copy_to)
18
+
19
+ if @copy_from.size != @copy_to.size
20
+ raise ArgumentError, "Number of source and destination columns must match"
21
+ end
22
+
23
+ @model_name = model_name
24
+ @type_cast_functions = type_cast_functions
25
+ end
26
+
27
+ def relation
28
+ relation = model
29
+ .where(Hash[copy_to.map { |to_column| [to_column, nil] }])
30
+
31
+ Utils.ar_where_not_multiple_conditions(
32
+ relation,
33
+ copy_from.map { |from_column| [from_column, nil] }.to_h
34
+ )
35
+ end
36
+
37
+ def process_batch(relation)
38
+ arel = relation.arel
39
+ arel_table = relation.arel_table
40
+
41
+ old_values = copy_from.map do |from_column|
42
+ old_value = arel_table[from_column]
43
+ if (type_cast_function = type_cast_functions[from_column])
44
+ if Utils.ar_version <= 5.2
45
+ # ActiveRecord <= 5.2 does not support quoting of Arel::Nodes::NamedFunction
46
+ old_value = Arel.sql("#{type_cast_function}(#{connection.quote_column_name(from_column)})")
47
+ else
48
+ old_value = Arel::Nodes::NamedFunction.new(type_cast_function, [old_value])
49
+ end
50
+ end
51
+ old_value
52
+ end
53
+
54
+ if Utils.ar_version <= 4.2
55
+ stmt = Arel::UpdateManager.new(arel.engine)
56
+ else
57
+ stmt = Arel::UpdateManager.new
58
+ end
59
+
60
+ stmt.table(arel_table)
61
+ stmt.wheres = arel.constraints
62
+
63
+ updates = copy_to.zip(old_values).map { |to_column, old_value| [arel_table[to_column], old_value] }
64
+ stmt.set(updates)
65
+
66
+ connection.update(stmt)
67
+ end
68
+
69
+ def count
70
+ # Exact counts are expensive on large tables, since PostgreSQL
71
+ # needs to do a full scan. An estimated count should give a pretty decent
72
+ # approximation of rows count in this case.
73
+ Utils.estimated_count(connection, table_name)
74
+ end
75
+
76
+ private
77
+ def model
78
+ @model ||= if model_name.present?
79
+ Object.const_get(model_name, false)
80
+ else
81
+ Utils.define_model(ActiveRecord::Base.connection, table_name)
82
+ end
83
+ end
84
+
85
+ def connection
86
+ model.connection
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,210 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundMigrations
5
+ class Migration < ActiveRecord::Base
6
+ STATUSES = [
7
+ :enqueued, # The migration has been enqueued by the user.
8
+ :running, # The migration is being performed by a migration executor.
9
+ :paused, # The migration was paused in the middle of the run by the user.
10
+ :finishing, # The migration is being manually finishing inline by the user.
11
+ :failed, # The migration raises an exception when running.
12
+ :succeeded, # The migration finished without error.
13
+ ]
14
+
15
+ self.table_name = :background_migrations
16
+
17
+ scope :queue_order, -> { order(created_at: :asc) }
18
+ scope :active, -> { where(status: [statuses[:enqueued], statuses[:running]]) }
19
+ scope :for_migration_name, ->(migration_name) { where(migration_name: normalize_migration_name(migration_name)) }
20
+ scope :for_configuration, ->(migration_name, arguments) do
21
+ for_migration_name(migration_name).where("arguments = ?", arguments.to_json)
22
+ end
23
+
24
+ enum status: STATUSES.map { |status| [status, status.to_s] }.to_h
25
+
26
+ has_many :migration_jobs
27
+
28
+ validates :migration_name, :batch_column_name, presence: true
29
+
30
+ validates :batch_pause, :min_value, :max_value, :batch_size, :sub_batch_size,
31
+ presence: true, numericality: { greater_than: 0 }
32
+
33
+ validates :sub_batch_pause_ms, presence: true, numericality: { greater_than_or_equal_to: 0 }
34
+ validates :rows_count, numericality: { greater_than_or_equal_to: 0 }, allow_nil: true
35
+ validates :arguments, uniqueness: { scope: :migration_name }
36
+
37
+ validate :validate_batch_column_values
38
+ validate :validate_batch_sizes
39
+ validate :validate_jobs_status, if: :status_changed?
40
+
41
+ validates_with BackgroundMigrationClassValidator
42
+ validates_with MigrationStatusValidator, on: :update
43
+
44
+ before_validation :set_defaults
45
+
46
+ # @private
47
+ def self.normalize_migration_name(migration_name)
48
+ namespace = ::OnlineMigrations.config.background_migrations.migrations_module
49
+ migration_name.sub(/^(::)?#{namespace}::/, "")
50
+ end
51
+
52
+ def migration_name=(class_name)
53
+ write_attribute(:migration_name, self.class.normalize_migration_name(class_name))
54
+ end
55
+
56
+ def completed?
57
+ succeeded? || failed?
58
+ end
59
+
60
+ def last_job
61
+ migration_jobs.order(max_value: :desc).first
62
+ end
63
+
64
+ def last_completed_job
65
+ migration_jobs.completed.order(finished_at: :desc).first
66
+ end
67
+
68
+ # Returns the progress of the background migration.
69
+ #
70
+ # @return [Float, nil]
71
+ # - when background migration is configured to not to track progress, returns `nil`
72
+ # - otherwise returns value in range of 0.0 and 1.0
73
+ #
74
+ def progress
75
+ if succeeded?
76
+ 1.0
77
+ elsif rows_count
78
+ jobs_rows_count = migration_jobs.succeeded.sum(:batch_size)
79
+ # The last migration job may need to process the amount of rows
80
+ # less than the batch size, so we can get a value > 1.0.
81
+ [jobs_rows_count.to_f / rows_count, 1.0].min
82
+ end
83
+ end
84
+
85
+ def migration_class
86
+ BackgroundMigration.named(migration_name)
87
+ end
88
+
89
+ def migration_object
90
+ @migration_object ||= migration_class.new(*arguments)
91
+ end
92
+
93
+ def migration_relation
94
+ migration_object.relation
95
+ end
96
+
97
+ # Returns whether the interval between previous step run has passed.
98
+ # @return [Boolean]
99
+ #
100
+ def interval_elapsed?
101
+ if migration_jobs.running.exists?
102
+ false
103
+ elsif (job = last_completed_job)
104
+ job.finished_at + batch_pause <= Time.current
105
+ else
106
+ true
107
+ end
108
+ end
109
+
110
+ # Manually retry failed jobs.
111
+ #
112
+ # This method marks failed jobs as ready to be processed again, and
113
+ # they will be picked up on the next Scheduler run.
114
+ #
115
+ def retry_failed_jobs
116
+ iterator = BatchIterator.new(migration_jobs.failed)
117
+ iterator.each_batch(of: 100) do |batch|
118
+ transaction do
119
+ batch.each(&:retry)
120
+ enqueued!
121
+ end
122
+ end
123
+ end
124
+
125
+ # @private
126
+ def next_batch_range
127
+ iterator = BatchIterator.new(migration_relation)
128
+ batch_range = nil
129
+
130
+ # rubocop:disable Lint/UnreachableLoop
131
+ iterator.each_batch(of: batch_size, column: batch_column_name, start: next_min_value) do |relation|
132
+ if Utils.ar_version <= 4.2
133
+ # ActiveRecord <= 4.2 does not support pluck with Arel nodes
134
+ quoted_column = self.class.connection.quote_column_name(batch_column_name)
135
+ batch_range = relation.pluck("MIN(#{quoted_column}), MAX(#{quoted_column})").first
136
+ else
137
+ min = relation.arel_table[batch_column_name].minimum
138
+ max = relation.arel_table[batch_column_name].maximum
139
+
140
+ batch_range = relation.pluck(min, max).first
141
+ end
142
+ break
143
+ end
144
+ # rubocop:enable Lint/UnreachableLoop
145
+
146
+ return if batch_range.nil?
147
+
148
+ min_value, max_value = batch_range
149
+ return if min_value > self.max_value
150
+
151
+ max_value = [max_value, self.max_value].min
152
+
153
+ [min_value, max_value]
154
+ end
155
+
156
+ private
157
+ def validate_batch_column_values
158
+ if max_value.to_i < min_value.to_i
159
+ errors.add(:base, "max_value should be greater than or equal to min_value")
160
+ end
161
+ end
162
+
163
+ def validate_batch_sizes
164
+ if sub_batch_size.to_i > batch_size.to_i
165
+ errors.add(:base, "sub_batch_size should be smaller than or equal to batch_size")
166
+ end
167
+ end
168
+
169
+ def validate_jobs_status
170
+ if succeeded? && migration_jobs.except_succeeded.exists?
171
+ errors.add(:base, "all migration jobs must be succeeded")
172
+ elsif failed? && !migration_jobs.failed.exists?
173
+ errors.add(:base, "at least one migration job must be failed")
174
+ end
175
+ end
176
+
177
+ def set_defaults
178
+ if migration_relation.is_a?(ActiveRecord::Relation)
179
+ self.batch_column_name ||= migration_relation.primary_key
180
+ self.min_value ||= migration_relation.minimum(batch_column_name)
181
+ self.max_value ||= migration_relation.maximum(batch_column_name)
182
+
183
+ count = migration_object.count
184
+ self.rows_count = count if count != :no_count
185
+ end
186
+
187
+ config = ::OnlineMigrations.config.background_migrations
188
+ self.batch_size ||= config.batch_size
189
+ self.sub_batch_size ||= config.sub_batch_size
190
+ self.batch_pause ||= config.batch_pause
191
+ self.sub_batch_pause_ms ||= config.sub_batch_pause_ms
192
+ self.batch_max_attempts ||= config.batch_max_attempts
193
+
194
+ # This can be the case when run in development on empty tables
195
+ if min_value.nil?
196
+ # integer IDs minimum value is 1
197
+ self.min_value = self.max_value = 1
198
+ end
199
+ end
200
+
201
+ def next_min_value
202
+ if last_job
203
+ last_job.max_value.next
204
+ else
205
+ min_value
206
+ end
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,238 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundMigrations
5
+ module MigrationHelpers
6
+ # Backfills column data using background migrations.
7
+ #
8
+ # @param table_name [String, Symbol]
9
+ # @param column_name [String, Symbol]
10
+ # @param value
11
+ # @param model_name [String] If Active Record multiple databases feature is used,
12
+ # the class name of the model to get connection from.
13
+ # @param options [Hash] used to control the behavior of background migration.
14
+ # See `#enqueue_background_migration`
15
+ #
16
+ # @return [OnlineMigrations::BackgroundMigrations::Migration]
17
+ #
18
+ # @example
19
+ # backfill_column_in_background(:users, :admin, false)
20
+ #
21
+ # @example Additional background migration options
22
+ # backfill_column_in_background(:users, :admin, false, batch_size: 10_000)
23
+ #
24
+ # @note This method is better suited for extra large tables (100s of millions of records).
25
+ # For smaller tables it is probably better and easier to use more flexible `update_column_in_batches`.
26
+ #
27
+ # @note Consider `backfill_columns_in_background` when backfilling multiple columns
28
+ # to avoid rewriting the table multiple times.
29
+ #
30
+ def backfill_column_in_background(table_name, column_name, value, model_name: nil, **options)
31
+ backfill_columns_in_background(table_name, { column_name => value },
32
+ model_name: model_name, **options)
33
+ end
34
+
35
+ # Same as `backfill_column_in_background` but for multiple columns.
36
+ #
37
+ # @param updates [Hash] keys - column names, values - corresponding values
38
+ #
39
+ # @example
40
+ # backfill_columns_in_background(:users, { admin: false, status: "active" })
41
+ #
42
+ # @see #backfill_column_in_background
43
+ #
44
+ def backfill_columns_in_background(table_name, updates, model_name: nil, **options)
45
+ model_name = model_name.name if model_name.is_a?(Class)
46
+
47
+ enqueue_background_migration(
48
+ "BackfillColumn",
49
+ table_name,
50
+ updates,
51
+ model_name,
52
+ **options
53
+ )
54
+ end
55
+
56
+ # Backfills data from the old column to the new column using background migrations.
57
+ #
58
+ # @param table_name [String, Symbol]
59
+ # @param column_name [String, Symbol]
60
+ # @param model_name [String] If Active Record multiple databases feature is used,
61
+ # the class name of the model to get connection from.
62
+ # @param type_cast_function [String, Symbol] Some type changes require casting data to a new type.
63
+ # For example when changing from `text` to `jsonb`. In this case, use the `type_cast_function` option.
64
+ # You need to make sure there is no bad data and the cast will always succeed
65
+ # @param options [Hash] used to control the behavior of background migration.
66
+ # See `#enqueue_background_migration`
67
+ #
68
+ # @return [OnlineMigrations::BackgroundMigrations::Migration]
69
+ #
70
+ # @example
71
+ # backfill_column_for_type_change_in_background(:files, :size)
72
+ #
73
+ # @example With type casting
74
+ # backfill_column_for_type_change_in_background(:users, :settings, type_cast_function: "jsonb")
75
+ #
76
+ # @example Additional background migration options
77
+ # backfill_column_for_type_change_in_background(:files, :size, batch_size: 10_000)
78
+ #
79
+ # @note This method is better suited for extra large tables (100s of millions of records).
80
+ # For smaller tables it is probably better and easier to use more flexible `backfill_column_for_type_change`.
81
+ #
82
+ def backfill_column_for_type_change_in_background(table_name, column_name, model_name: nil,
83
+ type_cast_function: nil, **options)
84
+ backfill_columns_for_type_change_in_background(
85
+ table_name,
86
+ column_name,
87
+ model_name: model_name,
88
+ type_cast_functions: { column_name => type_cast_function },
89
+ **options
90
+ )
91
+ end
92
+
93
+ # Same as `backfill_column_for_type_change_in_background` but for multiple columns.
94
+ #
95
+ # @param type_cast_functions [Hash] if not empty, keys - column names,
96
+ # values - corresponding type cast functions
97
+ #
98
+ # @see #backfill_column_for_type_change_in_background
99
+ #
100
+ def backfill_columns_for_type_change_in_background(table_name, *column_names, model_name: nil,
101
+ type_cast_functions: {}, **options)
102
+ tmp_columns = column_names.map { |column_name| "#{column_name}_for_type_change" }
103
+ model_name = model_name.name if model_name.is_a?(Class)
104
+
105
+ enqueue_background_migration(
106
+ "CopyColumn",
107
+ table_name,
108
+ column_names,
109
+ tmp_columns,
110
+ model_name,
111
+ type_cast_functions,
112
+ **options
113
+ )
114
+ end
115
+
116
+ # Copies data from the old column to the new column using background migrations.
117
+ #
118
+ # @param table_name [String, Symbol]
119
+ # @param copy_from [String, Symbol] source column name
120
+ # @param copy_to [String, Symbol] destination column name
121
+ # @param model_name [String] If Active Record multiple databases feature is used,
122
+ # the class name of the model to get connection from.
123
+ # @param type_cast_function [String, Symbol] Some type changes require casting data to a new type.
124
+ # For example when changing from `text` to `jsonb`. In this case, use the `type_cast_function` option.
125
+ # You need to make sure there is no bad data and the cast will always succeed
126
+ # @param options [Hash] used to control the behavior of background migration.
127
+ # See `#enqueue_background_migration`
128
+ #
129
+ # @return [OnlineMigrations::BackgroundMigrations::Migration]
130
+ #
131
+ # @example
132
+ # copy_column_in_background(:users, :id, :id_for_type_change)
133
+ #
134
+ # @note This method is better suited for extra large tables (100s of millions of records).
135
+ # For smaller tables it is probably better and easier to use more flexible `update_column_in_batches`.
136
+ #
137
+ def copy_column_in_background(table_name, copy_from, copy_to, model_name: nil, type_cast_function: nil, **options)
138
+ copy_columns_in_background(
139
+ table_name,
140
+ [copy_from],
141
+ [copy_to],
142
+ model_name: model_name,
143
+ type_cast_functions: { copy_from => type_cast_function },
144
+ **options
145
+ )
146
+ end
147
+
148
+ # Same as `copy_column_in_background` but for multiple columns.
149
+ #
150
+ # @param type_cast_functions [Hash] if not empty, keys - column names,
151
+ # values - corresponding type cast functions
152
+ #
153
+ # @see #copy_column_in_background
154
+ #
155
+ def copy_columns_in_background(table_name, copy_from, copy_to, model_name: nil, type_cast_functions: {}, **options)
156
+ model_name = model_name.name if model_name.is_a?(Class)
157
+
158
+ enqueue_background_migration(
159
+ "CopyColumn",
160
+ table_name,
161
+ copy_from,
162
+ copy_to,
163
+ model_name,
164
+ type_cast_functions,
165
+ **options
166
+ )
167
+ end
168
+
169
+ # Creates a background migration for the given job class name.
170
+ #
171
+ # A background migration runs one job at a time, computing the bounds of the next batch
172
+ # based on the current migration settings and the previous batch bounds. Each job's execution status
173
+ # is tracked in the database as the migration runs.
174
+ #
175
+ # @param migration_name [String, Class] Background migration job class name
176
+ # @param arguments [Array] Extra arguments to pass to the job instance when the migration runs
177
+ # @option options [Symbol, String] :batch_column_name (primary key) Column name the migration will batch over
178
+ # @option options [Integer] :min_value Value in the column the batching will begin at,
179
+ # defaults to `SELECT MIN(batch_column_name)`
180
+ # @option options [Integer] :max_value Value in the column the batching will end at,
181
+ # defaults to `SELECT MAX(batch_column_name)`
182
+ # @option options [Integer] :batch_size (20_000) Number of rows to process in a single background migration run
183
+ # @option options [Integer] :sub_batch_size (1000) Smaller batches size that the batches will be divided into
184
+ # @option options [Integer] :batch_pause (0) Pause interval between each background migration job's execution (in seconds)
185
+ # @option options [Integer] :sub_batch_pause_ms (100) Number of milliseconds to sleep between each sub_batch execution
186
+ # @option options [Integer] :batch_max_attempts (5) Maximum number of batch run attempts
187
+ #
188
+ # @return [OnlineMigrations::BackgroundMigrations::Migration]
189
+ #
190
+ # @example
191
+ # enqueue_background_migration("BackfillProjectIssuesCount",
192
+ # batch_size: 10_000, batch_max_attempts: 10)
193
+ #
194
+ # # Given the background migration exists:
195
+ #
196
+ # class BackfillProjectIssuesCount < OnlineMigrations::BackgroundMigration
197
+ # def relation
198
+ # Project.all
199
+ # end
200
+ #
201
+ # def process_batch(projects)
202
+ # projects.update_all(
203
+ # "issues_count = (SELECT COUNT(*) FROM issues WHERE issues.project_id = projects.id)"
204
+ # )
205
+ # end
206
+ #
207
+ # # To be able to track progress, you need to define this method
208
+ # def count
209
+ # Project.maximum(:id)
210
+ # end
211
+ # end
212
+ #
213
+ # @note For convenience, the enqueued background migration is run inline
214
+ # in development and test environments
215
+ #
216
+ def enqueue_background_migration(migration_name, *arguments, **options)
217
+ options.assert_valid_keys(:batch_column_name, :min_value, :max_value, :batch_size, :sub_batch_size,
218
+ :batch_pause, :sub_batch_pause_ms, :batch_max_attempts)
219
+
220
+ migration_name = migration_name.name if migration_name.is_a?(Class)
221
+
222
+ migration = Migration.create!(
223
+ migration_name: migration_name,
224
+ arguments: arguments,
225
+ **options
226
+ )
227
+
228
+ # For convenience in dev/test environments
229
+ if Utils.developer_env?
230
+ runner = MigrationRunner.new(migration)
231
+ runner.run_all_migration_jobs
232
+ end
233
+
234
+ migration
235
+ end
236
+ end
237
+ end
238
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundMigrations
5
+ class MigrationJob < ActiveRecord::Base
6
+ STATUSES = [
7
+ :enqueued,
8
+ :running,
9
+ :failed,
10
+ :succeeded,
11
+ ]
12
+
13
+ self.table_name = :background_migration_jobs
14
+
15
+ # For ActiveRecord <= 4.2 needs to fully specify enum values
16
+ scope :active, -> { where(status: [statuses[:enqueued], statuses[:running]]) }
17
+ scope :completed, -> { where(status: [statuses[:failed], statuses[:succeeded]]) }
18
+ scope :stuck, -> do
19
+ timeout = ::OnlineMigrations.config.background_migrations.stuck_jobs_timeout
20
+ active.where("updated_at <= ?", timeout.ago)
21
+ end
22
+
23
+ scope :retriable, -> do
24
+ failed_retriable = failed.where("attempts < max_attempts")
25
+
26
+ stuck_sql = connection.unprepared_statement { stuck.to_sql }
27
+ failed_retriable_sql = connection.unprepared_statement { failed_retriable.to_sql }
28
+
29
+ from(Arel.sql(<<~SQL))
30
+ (
31
+ (#{failed_retriable_sql})
32
+ UNION
33
+ (#{stuck_sql})
34
+ ) AS #{table_name}
35
+ SQL
36
+ end
37
+
38
+ scope :except_succeeded, -> { where("status != ?", statuses[:succeeded]) }
39
+
40
+ enum status: STATUSES.map { |status| [status, status.to_s] }.to_h
41
+
42
+ delegate :migration_class, :migration_object, :migration_relation, :batch_column_name,
43
+ :arguments, :batch_pause, to: :migration
44
+
45
+ belongs_to :migration
46
+
47
+ # For ActiveRecord 5.0+ this is validated by default from belongs_to
48
+ validates :migration, presence: true
49
+
50
+ validates :min_value, :max_value, presence: true, numericality: { greater_than: 0 }
51
+ validate :values_in_migration_range, if: :min_value?
52
+ validate :validate_values_order, if: :min_value?
53
+
54
+ validates_with MigrationJobStatusValidator, on: :update
55
+
56
+ before_create :copy_settings_from_migration
57
+
58
+ # Mark this job as ready to be processed again.
59
+ #
60
+ # This is used when retrying failed jobs.
61
+ #
62
+ def retry
63
+ update!(
64
+ status: self.class.statuses[:enqueued],
65
+ attempts: 0,
66
+ started_at: nil,
67
+ finished_at: nil
68
+ )
69
+ end
70
+
71
+ private
72
+ def values_in_migration_range
73
+ if min_value < migration.min_value || max_value > migration.max_value
74
+ errors.add(:base, "min_value and max_value should be in background migration values range")
75
+ end
76
+ end
77
+
78
+ def validate_values_order
79
+ if max_value.to_i < min_value.to_i
80
+ errors.add(:base, "max_value should be greater than or equal to min_value")
81
+ end
82
+ end
83
+
84
+ def copy_settings_from_migration
85
+ self.batch_size = migration.batch_size
86
+ self.sub_batch_size = migration.sub_batch_size
87
+ self.pause_ms = migration.sub_batch_pause_ms
88
+ self.max_attempts = migration.batch_max_attempts
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundMigrations
5
+ # @private
6
+ class MigrationJobRunner
7
+ attr_reader :migration_job
8
+
9
+ delegate :attempts, :migration_relation, :migration_object, :sub_batch_size,
10
+ :batch_column_name, :min_value, :max_value, :pause_ms, to: :migration_job
11
+
12
+ def initialize(migration_job)
13
+ @migration_job = migration_job
14
+ end
15
+
16
+ def run
17
+ job_payload = { background_migration_job: migration_job }
18
+ if migration_job.attempts >= 1
19
+ ActiveSupport::Notifications.instrument("retried.background_migrations", job_payload)
20
+ end
21
+
22
+ migration_job.update!(
23
+ attempts: attempts + 1,
24
+ status: :running,
25
+ started_at: Time.current,
26
+ finished_at: nil,
27
+ error_class: nil,
28
+ error_message: nil,
29
+ backtrace: nil
30
+ )
31
+
32
+ ActiveSupport::Notifications.instrument("process_batch.background_migrations", job_payload) do
33
+ run_batch
34
+ end
35
+
36
+ migration_job.update!(status: :succeeded, finished_at: Time.current)
37
+ rescue Exception => e # rubocop:disable Lint/RescueException
38
+ backtrace_cleaner = ::OnlineMigrations.config.background_migrations.backtrace_cleaner
39
+
40
+ migration_job.update!(
41
+ status: :failed,
42
+ finished_at: Time.current,
43
+ error_class: e.class.name,
44
+ error_message: e.message,
45
+ backtrace: backtrace_cleaner ? backtrace_cleaner.clean(e.backtrace) : e.backtrace
46
+ )
47
+
48
+ ::OnlineMigrations.config.background_migrations.error_handler.call(e, migration_job)
49
+ end
50
+
51
+ private
52
+ def run_batch
53
+ iterator = ::OnlineMigrations::BatchIterator.new(migration_relation)
54
+
55
+ iterator.each_batch(of: sub_batch_size, column: batch_column_name,
56
+ start: min_value, finish: max_value) do |sub_batch|
57
+ migration_object.process_batch(sub_batch)
58
+ sleep(pause_ms * 0.001) if pause_ms > 0
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end