online_migrations 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/test.yml +112 -0
- data/.gitignore +10 -0
- data/.rubocop.yml +113 -0
- data/.yardopts +1 -0
- data/BACKGROUND_MIGRATIONS.md +288 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +27 -0
- data/Gemfile.lock +108 -0
- data/LICENSE.txt +21 -0
- data/README.md +1067 -0
- data/Rakefile +23 -0
- data/gemfiles/activerecord_42.gemfile +6 -0
- data/gemfiles/activerecord_50.gemfile +5 -0
- data/gemfiles/activerecord_51.gemfile +5 -0
- data/gemfiles/activerecord_52.gemfile +5 -0
- data/gemfiles/activerecord_60.gemfile +5 -0
- data/gemfiles/activerecord_61.gemfile +5 -0
- data/gemfiles/activerecord_70.gemfile +5 -0
- data/gemfiles/activerecord_head.gemfile +5 -0
- data/lib/generators/online_migrations/background_migration_generator.rb +29 -0
- data/lib/generators/online_migrations/install_generator.rb +34 -0
- data/lib/generators/online_migrations/templates/background_migration.rb.tt +22 -0
- data/lib/generators/online_migrations/templates/initializer.rb.tt +94 -0
- data/lib/generators/online_migrations/templates/migration.rb.tt +46 -0
- data/lib/online_migrations/background_migration.rb +64 -0
- data/lib/online_migrations/background_migrations/advisory_lock.rb +62 -0
- data/lib/online_migrations/background_migrations/backfill_column.rb +52 -0
- data/lib/online_migrations/background_migrations/background_migration_class_validator.rb +36 -0
- data/lib/online_migrations/background_migrations/config.rb +98 -0
- data/lib/online_migrations/background_migrations/copy_column.rb +90 -0
- data/lib/online_migrations/background_migrations/migration.rb +210 -0
- data/lib/online_migrations/background_migrations/migration_helpers.rb +238 -0
- data/lib/online_migrations/background_migrations/migration_job.rb +92 -0
- data/lib/online_migrations/background_migrations/migration_job_runner.rb +63 -0
- data/lib/online_migrations/background_migrations/migration_job_status_validator.rb +27 -0
- data/lib/online_migrations/background_migrations/migration_runner.rb +97 -0
- data/lib/online_migrations/background_migrations/migration_status_validator.rb +45 -0
- data/lib/online_migrations/background_migrations/scheduler.rb +49 -0
- data/lib/online_migrations/batch_iterator.rb +87 -0
- data/lib/online_migrations/change_column_type_helpers.rb +587 -0
- data/lib/online_migrations/command_checker.rb +590 -0
- data/lib/online_migrations/command_recorder.rb +137 -0
- data/lib/online_migrations/config.rb +198 -0
- data/lib/online_migrations/copy_trigger.rb +91 -0
- data/lib/online_migrations/database_tasks.rb +19 -0
- data/lib/online_migrations/error_messages.rb +388 -0
- data/lib/online_migrations/foreign_key_definition.rb +17 -0
- data/lib/online_migrations/foreign_keys_collector.rb +33 -0
- data/lib/online_migrations/indexes_collector.rb +48 -0
- data/lib/online_migrations/lock_retrier.rb +250 -0
- data/lib/online_migrations/migration.rb +63 -0
- data/lib/online_migrations/migrator.rb +23 -0
- data/lib/online_migrations/schema_cache.rb +96 -0
- data/lib/online_migrations/schema_statements.rb +1042 -0
- data/lib/online_migrations/utils.rb +140 -0
- data/lib/online_migrations/version.rb +5 -0
- data/lib/online_migrations.rb +74 -0
- data/online_migrations.gemspec +28 -0
- metadata +119 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OnlineMigrations
|
4
|
+
module BackgroundMigrations
|
5
|
+
# @private
|
6
|
+
class CopyColumn < BackgroundMigration
|
7
|
+
attr_reader :table_name, :copy_from, :copy_to, :model_name, :type_cast_functions
|
8
|
+
|
9
|
+
def initialize(table_name, copy_from, copy_to, model_name = nil, type_cast_functions = {})
|
10
|
+
@table_name = table_name
|
11
|
+
|
12
|
+
if copy_from.is_a?(Array) && type_cast_functions && !type_cast_functions.is_a?(Hash)
|
13
|
+
raise ArgumentError, "type_cast_functions must be a Hash"
|
14
|
+
end
|
15
|
+
|
16
|
+
@copy_from = Array.wrap(copy_from)
|
17
|
+
@copy_to = Array.wrap(copy_to)
|
18
|
+
|
19
|
+
if @copy_from.size != @copy_to.size
|
20
|
+
raise ArgumentError, "Number of source and destination columns must match"
|
21
|
+
end
|
22
|
+
|
23
|
+
@model_name = model_name
|
24
|
+
@type_cast_functions = type_cast_functions
|
25
|
+
end
|
26
|
+
|
27
|
+
def relation
|
28
|
+
relation = model
|
29
|
+
.where(Hash[copy_to.map { |to_column| [to_column, nil] }])
|
30
|
+
|
31
|
+
Utils.ar_where_not_multiple_conditions(
|
32
|
+
relation,
|
33
|
+
copy_from.map { |from_column| [from_column, nil] }.to_h
|
34
|
+
)
|
35
|
+
end
|
36
|
+
|
37
|
+
def process_batch(relation)
|
38
|
+
arel = relation.arel
|
39
|
+
arel_table = relation.arel_table
|
40
|
+
|
41
|
+
old_values = copy_from.map do |from_column|
|
42
|
+
old_value = arel_table[from_column]
|
43
|
+
if (type_cast_function = type_cast_functions[from_column])
|
44
|
+
if Utils.ar_version <= 5.2
|
45
|
+
# ActiveRecord <= 5.2 does not support quoting of Arel::Nodes::NamedFunction
|
46
|
+
old_value = Arel.sql("#{type_cast_function}(#{connection.quote_column_name(from_column)})")
|
47
|
+
else
|
48
|
+
old_value = Arel::Nodes::NamedFunction.new(type_cast_function, [old_value])
|
49
|
+
end
|
50
|
+
end
|
51
|
+
old_value
|
52
|
+
end
|
53
|
+
|
54
|
+
if Utils.ar_version <= 4.2
|
55
|
+
stmt = Arel::UpdateManager.new(arel.engine)
|
56
|
+
else
|
57
|
+
stmt = Arel::UpdateManager.new
|
58
|
+
end
|
59
|
+
|
60
|
+
stmt.table(arel_table)
|
61
|
+
stmt.wheres = arel.constraints
|
62
|
+
|
63
|
+
updates = copy_to.zip(old_values).map { |to_column, old_value| [arel_table[to_column], old_value] }
|
64
|
+
stmt.set(updates)
|
65
|
+
|
66
|
+
connection.update(stmt)
|
67
|
+
end
|
68
|
+
|
69
|
+
def count
|
70
|
+
# Exact counts are expensive on large tables, since PostgreSQL
|
71
|
+
# needs to do a full scan. An estimated count should give a pretty decent
|
72
|
+
# approximation of rows count in this case.
|
73
|
+
Utils.estimated_count(connection, table_name)
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
def model
|
78
|
+
@model ||= if model_name.present?
|
79
|
+
Object.const_get(model_name, false)
|
80
|
+
else
|
81
|
+
Utils.define_model(ActiveRecord::Base.connection, table_name)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def connection
|
86
|
+
model.connection
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,210 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OnlineMigrations
|
4
|
+
module BackgroundMigrations
|
5
|
+
class Migration < ActiveRecord::Base
|
6
|
+
STATUSES = [
|
7
|
+
:enqueued, # The migration has been enqueued by the user.
|
8
|
+
:running, # The migration is being performed by a migration executor.
|
9
|
+
:paused, # The migration was paused in the middle of the run by the user.
|
10
|
+
:finishing, # The migration is being manually finishing inline by the user.
|
11
|
+
:failed, # The migration raises an exception when running.
|
12
|
+
:succeeded, # The migration finished without error.
|
13
|
+
]
|
14
|
+
|
15
|
+
self.table_name = :background_migrations
|
16
|
+
|
17
|
+
scope :queue_order, -> { order(created_at: :asc) }
|
18
|
+
scope :active, -> { where(status: [statuses[:enqueued], statuses[:running]]) }
|
19
|
+
scope :for_migration_name, ->(migration_name) { where(migration_name: normalize_migration_name(migration_name)) }
|
20
|
+
scope :for_configuration, ->(migration_name, arguments) do
|
21
|
+
for_migration_name(migration_name).where("arguments = ?", arguments.to_json)
|
22
|
+
end
|
23
|
+
|
24
|
+
enum status: STATUSES.map { |status| [status, status.to_s] }.to_h
|
25
|
+
|
26
|
+
has_many :migration_jobs
|
27
|
+
|
28
|
+
validates :migration_name, :batch_column_name, presence: true
|
29
|
+
|
30
|
+
validates :batch_pause, :min_value, :max_value, :batch_size, :sub_batch_size,
|
31
|
+
presence: true, numericality: { greater_than: 0 }
|
32
|
+
|
33
|
+
validates :sub_batch_pause_ms, presence: true, numericality: { greater_than_or_equal_to: 0 }
|
34
|
+
validates :rows_count, numericality: { greater_than_or_equal_to: 0 }, allow_nil: true
|
35
|
+
validates :arguments, uniqueness: { scope: :migration_name }
|
36
|
+
|
37
|
+
validate :validate_batch_column_values
|
38
|
+
validate :validate_batch_sizes
|
39
|
+
validate :validate_jobs_status, if: :status_changed?
|
40
|
+
|
41
|
+
validates_with BackgroundMigrationClassValidator
|
42
|
+
validates_with MigrationStatusValidator, on: :update
|
43
|
+
|
44
|
+
before_validation :set_defaults
|
45
|
+
|
46
|
+
# @private
|
47
|
+
def self.normalize_migration_name(migration_name)
|
48
|
+
namespace = ::OnlineMigrations.config.background_migrations.migrations_module
|
49
|
+
migration_name.sub(/^(::)?#{namespace}::/, "")
|
50
|
+
end
|
51
|
+
|
52
|
+
def migration_name=(class_name)
|
53
|
+
write_attribute(:migration_name, self.class.normalize_migration_name(class_name))
|
54
|
+
end
|
55
|
+
|
56
|
+
def completed?
|
57
|
+
succeeded? || failed?
|
58
|
+
end
|
59
|
+
|
60
|
+
def last_job
|
61
|
+
migration_jobs.order(max_value: :desc).first
|
62
|
+
end
|
63
|
+
|
64
|
+
def last_completed_job
|
65
|
+
migration_jobs.completed.order(finished_at: :desc).first
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns the progress of the background migration.
|
69
|
+
#
|
70
|
+
# @return [Float, nil]
|
71
|
+
# - when background migration is configured to not to track progress, returns `nil`
|
72
|
+
# - otherwise returns value in range of 0.0 and 1.0
|
73
|
+
#
|
74
|
+
def progress
|
75
|
+
if succeeded?
|
76
|
+
1.0
|
77
|
+
elsif rows_count
|
78
|
+
jobs_rows_count = migration_jobs.succeeded.sum(:batch_size)
|
79
|
+
# The last migration job may need to process the amount of rows
|
80
|
+
# less than the batch size, so we can get a value > 1.0.
|
81
|
+
[jobs_rows_count.to_f / rows_count, 1.0].min
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def migration_class
|
86
|
+
BackgroundMigration.named(migration_name)
|
87
|
+
end
|
88
|
+
|
89
|
+
def migration_object
|
90
|
+
@migration_object ||= migration_class.new(*arguments)
|
91
|
+
end
|
92
|
+
|
93
|
+
def migration_relation
|
94
|
+
migration_object.relation
|
95
|
+
end
|
96
|
+
|
97
|
+
# Returns whether the interval between previous step run has passed.
|
98
|
+
# @return [Boolean]
|
99
|
+
#
|
100
|
+
def interval_elapsed?
|
101
|
+
if migration_jobs.running.exists?
|
102
|
+
false
|
103
|
+
elsif (job = last_completed_job)
|
104
|
+
job.finished_at + batch_pause <= Time.current
|
105
|
+
else
|
106
|
+
true
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Manually retry failed jobs.
|
111
|
+
#
|
112
|
+
# This method marks failed jobs as ready to be processed again, and
|
113
|
+
# they will be picked up on the next Scheduler run.
|
114
|
+
#
|
115
|
+
def retry_failed_jobs
|
116
|
+
iterator = BatchIterator.new(migration_jobs.failed)
|
117
|
+
iterator.each_batch(of: 100) do |batch|
|
118
|
+
transaction do
|
119
|
+
batch.each(&:retry)
|
120
|
+
enqueued!
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
# @private
|
126
|
+
def next_batch_range
|
127
|
+
iterator = BatchIterator.new(migration_relation)
|
128
|
+
batch_range = nil
|
129
|
+
|
130
|
+
# rubocop:disable Lint/UnreachableLoop
|
131
|
+
iterator.each_batch(of: batch_size, column: batch_column_name, start: next_min_value) do |relation|
|
132
|
+
if Utils.ar_version <= 4.2
|
133
|
+
# ActiveRecord <= 4.2 does not support pluck with Arel nodes
|
134
|
+
quoted_column = self.class.connection.quote_column_name(batch_column_name)
|
135
|
+
batch_range = relation.pluck("MIN(#{quoted_column}), MAX(#{quoted_column})").first
|
136
|
+
else
|
137
|
+
min = relation.arel_table[batch_column_name].minimum
|
138
|
+
max = relation.arel_table[batch_column_name].maximum
|
139
|
+
|
140
|
+
batch_range = relation.pluck(min, max).first
|
141
|
+
end
|
142
|
+
break
|
143
|
+
end
|
144
|
+
# rubocop:enable Lint/UnreachableLoop
|
145
|
+
|
146
|
+
return if batch_range.nil?
|
147
|
+
|
148
|
+
min_value, max_value = batch_range
|
149
|
+
return if min_value > self.max_value
|
150
|
+
|
151
|
+
max_value = [max_value, self.max_value].min
|
152
|
+
|
153
|
+
[min_value, max_value]
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
def validate_batch_column_values
|
158
|
+
if max_value.to_i < min_value.to_i
|
159
|
+
errors.add(:base, "max_value should be greater than or equal to min_value")
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def validate_batch_sizes
|
164
|
+
if sub_batch_size.to_i > batch_size.to_i
|
165
|
+
errors.add(:base, "sub_batch_size should be smaller than or equal to batch_size")
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def validate_jobs_status
|
170
|
+
if succeeded? && migration_jobs.except_succeeded.exists?
|
171
|
+
errors.add(:base, "all migration jobs must be succeeded")
|
172
|
+
elsif failed? && !migration_jobs.failed.exists?
|
173
|
+
errors.add(:base, "at least one migration job must be failed")
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def set_defaults
|
178
|
+
if migration_relation.is_a?(ActiveRecord::Relation)
|
179
|
+
self.batch_column_name ||= migration_relation.primary_key
|
180
|
+
self.min_value ||= migration_relation.minimum(batch_column_name)
|
181
|
+
self.max_value ||= migration_relation.maximum(batch_column_name)
|
182
|
+
|
183
|
+
count = migration_object.count
|
184
|
+
self.rows_count = count if count != :no_count
|
185
|
+
end
|
186
|
+
|
187
|
+
config = ::OnlineMigrations.config.background_migrations
|
188
|
+
self.batch_size ||= config.batch_size
|
189
|
+
self.sub_batch_size ||= config.sub_batch_size
|
190
|
+
self.batch_pause ||= config.batch_pause
|
191
|
+
self.sub_batch_pause_ms ||= config.sub_batch_pause_ms
|
192
|
+
self.batch_max_attempts ||= config.batch_max_attempts
|
193
|
+
|
194
|
+
# This can be the case when run in development on empty tables
|
195
|
+
if min_value.nil?
|
196
|
+
# integer IDs minimum value is 1
|
197
|
+
self.min_value = self.max_value = 1
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
def next_min_value
|
202
|
+
if last_job
|
203
|
+
last_job.max_value.next
|
204
|
+
else
|
205
|
+
min_value
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
@@ -0,0 +1,238 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OnlineMigrations
|
4
|
+
module BackgroundMigrations
|
5
|
+
module MigrationHelpers
|
6
|
+
# Backfills column data using background migrations.
|
7
|
+
#
|
8
|
+
# @param table_name [String, Symbol]
|
9
|
+
# @param column_name [String, Symbol]
|
10
|
+
# @param value
|
11
|
+
# @param model_name [String] If Active Record multiple databases feature is used,
|
12
|
+
# the class name of the model to get connection from.
|
13
|
+
# @param options [Hash] used to control the behavior of background migration.
|
14
|
+
# See `#enqueue_background_migration`
|
15
|
+
#
|
16
|
+
# @return [OnlineMigrations::BackgroundMigrations::Migration]
|
17
|
+
#
|
18
|
+
# @example
|
19
|
+
# backfill_column_in_background(:users, :admin, false)
|
20
|
+
#
|
21
|
+
# @example Additional background migration options
|
22
|
+
# backfill_column_in_background(:users, :admin, false, batch_size: 10_000)
|
23
|
+
#
|
24
|
+
# @note This method is better suited for extra large tables (100s of millions of records).
|
25
|
+
# For smaller tables it is probably better and easier to use more flexible `update_column_in_batches`.
|
26
|
+
#
|
27
|
+
# @note Consider `backfill_columns_in_background` when backfilling multiple columns
|
28
|
+
# to avoid rewriting the table multiple times.
|
29
|
+
#
|
30
|
+
def backfill_column_in_background(table_name, column_name, value, model_name: nil, **options)
|
31
|
+
backfill_columns_in_background(table_name, { column_name => value },
|
32
|
+
model_name: model_name, **options)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Same as `backfill_column_in_background` but for multiple columns.
|
36
|
+
#
|
37
|
+
# @param updates [Hash] keys - column names, values - corresponding values
|
38
|
+
#
|
39
|
+
# @example
|
40
|
+
# backfill_columns_in_background(:users, { admin: false, status: "active" })
|
41
|
+
#
|
42
|
+
# @see #backfill_column_in_background
|
43
|
+
#
|
44
|
+
def backfill_columns_in_background(table_name, updates, model_name: nil, **options)
|
45
|
+
model_name = model_name.name if model_name.is_a?(Class)
|
46
|
+
|
47
|
+
enqueue_background_migration(
|
48
|
+
"BackfillColumn",
|
49
|
+
table_name,
|
50
|
+
updates,
|
51
|
+
model_name,
|
52
|
+
**options
|
53
|
+
)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Backfills data from the old column to the new column using background migrations.
|
57
|
+
#
|
58
|
+
# @param table_name [String, Symbol]
|
59
|
+
# @param column_name [String, Symbol]
|
60
|
+
# @param model_name [String] If Active Record multiple databases feature is used,
|
61
|
+
# the class name of the model to get connection from.
|
62
|
+
# @param type_cast_function [String, Symbol] Some type changes require casting data to a new type.
|
63
|
+
# For example when changing from `text` to `jsonb`. In this case, use the `type_cast_function` option.
|
64
|
+
# You need to make sure there is no bad data and the cast will always succeed
|
65
|
+
# @param options [Hash] used to control the behavior of background migration.
|
66
|
+
# See `#enqueue_background_migration`
|
67
|
+
#
|
68
|
+
# @return [OnlineMigrations::BackgroundMigrations::Migration]
|
69
|
+
#
|
70
|
+
# @example
|
71
|
+
# backfill_column_for_type_change_in_background(:files, :size)
|
72
|
+
#
|
73
|
+
# @example With type casting
|
74
|
+
# backfill_column_for_type_change_in_background(:users, :settings, type_cast_function: "jsonb")
|
75
|
+
#
|
76
|
+
# @example Additional background migration options
|
77
|
+
# backfill_column_for_type_change_in_background(:files, :size, batch_size: 10_000)
|
78
|
+
#
|
79
|
+
# @note This method is better suited for extra large tables (100s of millions of records).
|
80
|
+
# For smaller tables it is probably better and easier to use more flexible `backfill_column_for_type_change`.
|
81
|
+
#
|
82
|
+
def backfill_column_for_type_change_in_background(table_name, column_name, model_name: nil,
|
83
|
+
type_cast_function: nil, **options)
|
84
|
+
backfill_columns_for_type_change_in_background(
|
85
|
+
table_name,
|
86
|
+
column_name,
|
87
|
+
model_name: model_name,
|
88
|
+
type_cast_functions: { column_name => type_cast_function },
|
89
|
+
**options
|
90
|
+
)
|
91
|
+
end
|
92
|
+
|
93
|
+
# Same as `backfill_column_for_type_change_in_background` but for multiple columns.
|
94
|
+
#
|
95
|
+
# @param type_cast_functions [Hash] if not empty, keys - column names,
|
96
|
+
# values - corresponding type cast functions
|
97
|
+
#
|
98
|
+
# @see #backfill_column_for_type_change_in_background
|
99
|
+
#
|
100
|
+
def backfill_columns_for_type_change_in_background(table_name, *column_names, model_name: nil,
|
101
|
+
type_cast_functions: {}, **options)
|
102
|
+
tmp_columns = column_names.map { |column_name| "#{column_name}_for_type_change" }
|
103
|
+
model_name = model_name.name if model_name.is_a?(Class)
|
104
|
+
|
105
|
+
enqueue_background_migration(
|
106
|
+
"CopyColumn",
|
107
|
+
table_name,
|
108
|
+
column_names,
|
109
|
+
tmp_columns,
|
110
|
+
model_name,
|
111
|
+
type_cast_functions,
|
112
|
+
**options
|
113
|
+
)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Copies data from the old column to the new column using background migrations.
|
117
|
+
#
|
118
|
+
# @param table_name [String, Symbol]
|
119
|
+
# @param copy_from [String, Symbol] source column name
|
120
|
+
# @param copy_to [String, Symbol] destination column name
|
121
|
+
# @param model_name [String] If Active Record multiple databases feature is used,
|
122
|
+
# the class name of the model to get connection from.
|
123
|
+
# @param type_cast_function [String, Symbol] Some type changes require casting data to a new type.
|
124
|
+
# For example when changing from `text` to `jsonb`. In this case, use the `type_cast_function` option.
|
125
|
+
# You need to make sure there is no bad data and the cast will always succeed
|
126
|
+
# @param options [Hash] used to control the behavior of background migration.
|
127
|
+
# See `#enqueue_background_migration`
|
128
|
+
#
|
129
|
+
# @return [OnlineMigrations::BackgroundMigrations::Migration]
|
130
|
+
#
|
131
|
+
# @example
|
132
|
+
# copy_column_in_background(:users, :id, :id_for_type_change)
|
133
|
+
#
|
134
|
+
# @note This method is better suited for extra large tables (100s of millions of records).
|
135
|
+
# For smaller tables it is probably better and easier to use more flexible `update_column_in_batches`.
|
136
|
+
#
|
137
|
+
def copy_column_in_background(table_name, copy_from, copy_to, model_name: nil, type_cast_function: nil, **options)
|
138
|
+
copy_columns_in_background(
|
139
|
+
table_name,
|
140
|
+
[copy_from],
|
141
|
+
[copy_to],
|
142
|
+
model_name: model_name,
|
143
|
+
type_cast_functions: { copy_from => type_cast_function },
|
144
|
+
**options
|
145
|
+
)
|
146
|
+
end
|
147
|
+
|
148
|
+
# Same as `copy_column_in_background` but for multiple columns.
|
149
|
+
#
|
150
|
+
# @param type_cast_functions [Hash] if not empty, keys - column names,
|
151
|
+
# values - corresponding type cast functions
|
152
|
+
#
|
153
|
+
# @see #copy_column_in_background
|
154
|
+
#
|
155
|
+
def copy_columns_in_background(table_name, copy_from, copy_to, model_name: nil, type_cast_functions: {}, **options)
|
156
|
+
model_name = model_name.name if model_name.is_a?(Class)
|
157
|
+
|
158
|
+
enqueue_background_migration(
|
159
|
+
"CopyColumn",
|
160
|
+
table_name,
|
161
|
+
copy_from,
|
162
|
+
copy_to,
|
163
|
+
model_name,
|
164
|
+
type_cast_functions,
|
165
|
+
**options
|
166
|
+
)
|
167
|
+
end
|
168
|
+
|
169
|
+
# Creates a background migration for the given job class name.
|
170
|
+
#
|
171
|
+
# A background migration runs one job at a time, computing the bounds of the next batch
|
172
|
+
# based on the current migration settings and the previous batch bounds. Each job's execution status
|
173
|
+
# is tracked in the database as the migration runs.
|
174
|
+
#
|
175
|
+
# @param migration_name [String, Class] Background migration job class name
|
176
|
+
# @param arguments [Array] Extra arguments to pass to the job instance when the migration runs
|
177
|
+
# @option options [Symbol, String] :batch_column_name (primary key) Column name the migration will batch over
|
178
|
+
# @option options [Integer] :min_value Value in the column the batching will begin at,
|
179
|
+
# defaults to `SELECT MIN(batch_column_name)`
|
180
|
+
# @option options [Integer] :max_value Value in the column the batching will end at,
|
181
|
+
# defaults to `SELECT MAX(batch_column_name)`
|
182
|
+
# @option options [Integer] :batch_size (20_000) Number of rows to process in a single background migration run
|
183
|
+
# @option options [Integer] :sub_batch_size (1000) Smaller batches size that the batches will be divided into
|
184
|
+
# @option options [Integer] :batch_pause (0) Pause interval between each background migration job's execution (in seconds)
|
185
|
+
# @option options [Integer] :sub_batch_pause_ms (100) Number of milliseconds to sleep between each sub_batch execution
|
186
|
+
# @option options [Integer] :batch_max_attempts (5) Maximum number of batch run attempts
|
187
|
+
#
|
188
|
+
# @return [OnlineMigrations::BackgroundMigrations::Migration]
|
189
|
+
#
|
190
|
+
# @example
|
191
|
+
# enqueue_background_migration("BackfillProjectIssuesCount",
|
192
|
+
# batch_size: 10_000, batch_max_attempts: 10)
|
193
|
+
#
|
194
|
+
# # Given the background migration exists:
|
195
|
+
#
|
196
|
+
# class BackfillProjectIssuesCount < OnlineMigrations::BackgroundMigration
|
197
|
+
# def relation
|
198
|
+
# Project.all
|
199
|
+
# end
|
200
|
+
#
|
201
|
+
# def process_batch(projects)
|
202
|
+
# projects.update_all(
|
203
|
+
# "issues_count = (SELECT COUNT(*) FROM issues WHERE issues.project_id = projects.id)"
|
204
|
+
# )
|
205
|
+
# end
|
206
|
+
#
|
207
|
+
# # To be able to track progress, you need to define this method
|
208
|
+
# def count
|
209
|
+
# Project.maximum(:id)
|
210
|
+
# end
|
211
|
+
# end
|
212
|
+
#
|
213
|
+
# @note For convenience, the enqueued background migration is run inline
|
214
|
+
# in development and test environments
|
215
|
+
#
|
216
|
+
def enqueue_background_migration(migration_name, *arguments, **options)
|
217
|
+
options.assert_valid_keys(:batch_column_name, :min_value, :max_value, :batch_size, :sub_batch_size,
|
218
|
+
:batch_pause, :sub_batch_pause_ms, :batch_max_attempts)
|
219
|
+
|
220
|
+
migration_name = migration_name.name if migration_name.is_a?(Class)
|
221
|
+
|
222
|
+
migration = Migration.create!(
|
223
|
+
migration_name: migration_name,
|
224
|
+
arguments: arguments,
|
225
|
+
**options
|
226
|
+
)
|
227
|
+
|
228
|
+
# For convenience in dev/test environments
|
229
|
+
if Utils.developer_env?
|
230
|
+
runner = MigrationRunner.new(migration)
|
231
|
+
runner.run_all_migration_jobs
|
232
|
+
end
|
233
|
+
|
234
|
+
migration
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OnlineMigrations
|
4
|
+
module BackgroundMigrations
|
5
|
+
class MigrationJob < ActiveRecord::Base
|
6
|
+
STATUSES = [
|
7
|
+
:enqueued,
|
8
|
+
:running,
|
9
|
+
:failed,
|
10
|
+
:succeeded,
|
11
|
+
]
|
12
|
+
|
13
|
+
self.table_name = :background_migration_jobs
|
14
|
+
|
15
|
+
# For ActiveRecord <= 4.2 needs to fully specify enum values
|
16
|
+
scope :active, -> { where(status: [statuses[:enqueued], statuses[:running]]) }
|
17
|
+
scope :completed, -> { where(status: [statuses[:failed], statuses[:succeeded]]) }
|
18
|
+
scope :stuck, -> do
|
19
|
+
timeout = ::OnlineMigrations.config.background_migrations.stuck_jobs_timeout
|
20
|
+
active.where("updated_at <= ?", timeout.ago)
|
21
|
+
end
|
22
|
+
|
23
|
+
scope :retriable, -> do
|
24
|
+
failed_retriable = failed.where("attempts < max_attempts")
|
25
|
+
|
26
|
+
stuck_sql = connection.unprepared_statement { stuck.to_sql }
|
27
|
+
failed_retriable_sql = connection.unprepared_statement { failed_retriable.to_sql }
|
28
|
+
|
29
|
+
from(Arel.sql(<<~SQL))
|
30
|
+
(
|
31
|
+
(#{failed_retriable_sql})
|
32
|
+
UNION
|
33
|
+
(#{stuck_sql})
|
34
|
+
) AS #{table_name}
|
35
|
+
SQL
|
36
|
+
end
|
37
|
+
|
38
|
+
scope :except_succeeded, -> { where("status != ?", statuses[:succeeded]) }
|
39
|
+
|
40
|
+
enum status: STATUSES.map { |status| [status, status.to_s] }.to_h
|
41
|
+
|
42
|
+
delegate :migration_class, :migration_object, :migration_relation, :batch_column_name,
|
43
|
+
:arguments, :batch_pause, to: :migration
|
44
|
+
|
45
|
+
belongs_to :migration
|
46
|
+
|
47
|
+
# For ActiveRecord 5.0+ this is validated by default from belongs_to
|
48
|
+
validates :migration, presence: true
|
49
|
+
|
50
|
+
validates :min_value, :max_value, presence: true, numericality: { greater_than: 0 }
|
51
|
+
validate :values_in_migration_range, if: :min_value?
|
52
|
+
validate :validate_values_order, if: :min_value?
|
53
|
+
|
54
|
+
validates_with MigrationJobStatusValidator, on: :update
|
55
|
+
|
56
|
+
before_create :copy_settings_from_migration
|
57
|
+
|
58
|
+
# Mark this job as ready to be processed again.
|
59
|
+
#
|
60
|
+
# This is used when retrying failed jobs.
|
61
|
+
#
|
62
|
+
def retry
|
63
|
+
update!(
|
64
|
+
status: self.class.statuses[:enqueued],
|
65
|
+
attempts: 0,
|
66
|
+
started_at: nil,
|
67
|
+
finished_at: nil
|
68
|
+
)
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
def values_in_migration_range
|
73
|
+
if min_value < migration.min_value || max_value > migration.max_value
|
74
|
+
errors.add(:base, "min_value and max_value should be in background migration values range")
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def validate_values_order
|
79
|
+
if max_value.to_i < min_value.to_i
|
80
|
+
errors.add(:base, "max_value should be greater than or equal to min_value")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def copy_settings_from_migration
|
85
|
+
self.batch_size = migration.batch_size
|
86
|
+
self.sub_batch_size = migration.sub_batch_size
|
87
|
+
self.pause_ms = migration.sub_batch_pause_ms
|
88
|
+
self.max_attempts = migration.batch_max_attempts
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OnlineMigrations
|
4
|
+
module BackgroundMigrations
|
5
|
+
# @private
|
6
|
+
class MigrationJobRunner
|
7
|
+
attr_reader :migration_job
|
8
|
+
|
9
|
+
delegate :attempts, :migration_relation, :migration_object, :sub_batch_size,
|
10
|
+
:batch_column_name, :min_value, :max_value, :pause_ms, to: :migration_job
|
11
|
+
|
12
|
+
def initialize(migration_job)
|
13
|
+
@migration_job = migration_job
|
14
|
+
end
|
15
|
+
|
16
|
+
def run
|
17
|
+
job_payload = { background_migration_job: migration_job }
|
18
|
+
if migration_job.attempts >= 1
|
19
|
+
ActiveSupport::Notifications.instrument("retried.background_migrations", job_payload)
|
20
|
+
end
|
21
|
+
|
22
|
+
migration_job.update!(
|
23
|
+
attempts: attempts + 1,
|
24
|
+
status: :running,
|
25
|
+
started_at: Time.current,
|
26
|
+
finished_at: nil,
|
27
|
+
error_class: nil,
|
28
|
+
error_message: nil,
|
29
|
+
backtrace: nil
|
30
|
+
)
|
31
|
+
|
32
|
+
ActiveSupport::Notifications.instrument("process_batch.background_migrations", job_payload) do
|
33
|
+
run_batch
|
34
|
+
end
|
35
|
+
|
36
|
+
migration_job.update!(status: :succeeded, finished_at: Time.current)
|
37
|
+
rescue Exception => e # rubocop:disable Lint/RescueException
|
38
|
+
backtrace_cleaner = ::OnlineMigrations.config.background_migrations.backtrace_cleaner
|
39
|
+
|
40
|
+
migration_job.update!(
|
41
|
+
status: :failed,
|
42
|
+
finished_at: Time.current,
|
43
|
+
error_class: e.class.name,
|
44
|
+
error_message: e.message,
|
45
|
+
backtrace: backtrace_cleaner ? backtrace_cleaner.clean(e.backtrace) : e.backtrace
|
46
|
+
)
|
47
|
+
|
48
|
+
::OnlineMigrations.config.background_migrations.error_handler.call(e, migration_job)
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def run_batch
|
53
|
+
iterator = ::OnlineMigrations::BatchIterator.new(migration_relation)
|
54
|
+
|
55
|
+
iterator.each_batch(of: sub_batch_size, column: batch_column_name,
|
56
|
+
start: min_value, finish: max_value) do |sub_batch|
|
57
|
+
migration_object.process_batch(sub_batch)
|
58
|
+
sleep(pause_ms * 0.001) if pause_ms > 0
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|