online_migrations 0.26.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -0
  3. data/docs/0.27-upgrade.md +24 -0
  4. data/docs/background_data_migrations.md +200 -101
  5. data/docs/background_schema_migrations.md +2 -2
  6. data/lib/generators/online_migrations/{background_migration_generator.rb → data_migration_generator.rb} +4 -4
  7. data/lib/generators/online_migrations/templates/change_background_data_migrations.rb.tt +34 -0
  8. data/lib/generators/online_migrations/templates/{background_data_migration.rb.tt → data_migration.rb.tt} +8 -9
  9. data/lib/generators/online_migrations/templates/initializer.rb.tt +19 -25
  10. data/lib/generators/online_migrations/templates/install_migration.rb.tt +9 -40
  11. data/lib/generators/online_migrations/upgrade_generator.rb +16 -8
  12. data/lib/online_migrations/active_record_batch_enumerator.rb +8 -0
  13. data/lib/online_migrations/background_data_migrations/backfill_column.rb +50 -0
  14. data/lib/online_migrations/background_data_migrations/config.rb +62 -0
  15. data/lib/online_migrations/{background_migrations → background_data_migrations}/copy_column.rb +15 -28
  16. data/lib/online_migrations/{background_migrations → background_data_migrations}/delete_associated_records.rb +9 -5
  17. data/lib/online_migrations/{background_migrations → background_data_migrations}/delete_orphaned_records.rb +5 -9
  18. data/lib/online_migrations/background_data_migrations/migration.rb +312 -0
  19. data/lib/online_migrations/{background_migrations → background_data_migrations}/migration_helpers.rb +72 -61
  20. data/lib/online_migrations/background_data_migrations/migration_job.rb +158 -0
  21. data/lib/online_migrations/background_data_migrations/migration_status_validator.rb +65 -0
  22. data/lib/online_migrations/{background_migrations → background_data_migrations}/perform_action_on_relation.rb +5 -5
  23. data/lib/online_migrations/{background_migrations → background_data_migrations}/reset_counters.rb +5 -5
  24. data/lib/online_migrations/background_data_migrations/scheduler.rb +78 -0
  25. data/lib/online_migrations/background_data_migrations/ticker.rb +62 -0
  26. data/lib/online_migrations/background_schema_migrations/config.rb +2 -2
  27. data/lib/online_migrations/background_schema_migrations/migration.rb +51 -123
  28. data/lib/online_migrations/background_schema_migrations/migration_helpers.rb +25 -46
  29. data/lib/online_migrations/background_schema_migrations/migration_runner.rb +43 -97
  30. data/lib/online_migrations/background_schema_migrations/scheduler.rb +2 -2
  31. data/lib/online_migrations/change_column_type_helpers.rb +3 -2
  32. data/lib/online_migrations/config.rb +4 -4
  33. data/lib/online_migrations/data_migration.rb +127 -0
  34. data/lib/online_migrations/lock_retrier.rb +5 -2
  35. data/lib/online_migrations/schema_statements.rb +1 -1
  36. data/lib/online_migrations/shard_aware.rb +44 -0
  37. data/lib/online_migrations/version.rb +1 -1
  38. data/lib/online_migrations.rb +18 -11
  39. metadata +22 -21
  40. data/lib/online_migrations/background_migration.rb +0 -64
  41. data/lib/online_migrations/background_migrations/backfill_column.rb +0 -54
  42. data/lib/online_migrations/background_migrations/background_migration_class_validator.rb +0 -29
  43. data/lib/online_migrations/background_migrations/config.rb +0 -74
  44. data/lib/online_migrations/background_migrations/migration.rb +0 -329
  45. data/lib/online_migrations/background_migrations/migration_job.rb +0 -109
  46. data/lib/online_migrations/background_migrations/migration_job_runner.rb +0 -66
  47. data/lib/online_migrations/background_migrations/migration_job_status_validator.rb +0 -29
  48. data/lib/online_migrations/background_migrations/migration_runner.rb +0 -161
  49. data/lib/online_migrations/background_migrations/migration_status_validator.rb +0 -48
  50. data/lib/online_migrations/background_migrations/scheduler.rb +0 -42
@@ -2,17 +2,16 @@
2
2
 
3
3
  module <%= migrations_module %>
4
4
  <% module_namespacing do -%>
5
- class <%= class_name %> < OnlineMigrations::BackgroundMigration
6
- def relation
7
- # return ActiveRecord::Relation to be iterated over
5
+ class <%= class_name %> < OnlineMigrations::DataMigration
6
+ def collection
7
+ # Collection to iterate over.
8
+ # Must be ActiveRecord::Relation, ActiveRecord::Batches::BatchEnumerator, or Array.
8
9
  end
9
10
 
10
- def process_batch(relation)
11
- # 'relation' is an ActiveRecord::Relation instance containing a batch to process.
12
- #
13
- # The work to be done in a single iteration of the background migration.
14
- # This should be idempotent, as the same batch may be processed more
15
- # than once if the background migration is interrupted and resumed.
11
+ def process(element)
12
+ # The work to be done in a single iteration of the migration.
13
+ # This should be idempotent, as the same element may be processed more
14
+ # than once if the migration is interrupted and resumed.
16
15
  end
17
16
 
18
17
  # Optional.
@@ -96,40 +96,34 @@ OnlineMigrations.configure do |config|
96
96
  config.backtrace_cleaner = Rails.backtrace_cleaner
97
97
 
98
98
  # ==> Background data migrations configuration
99
- # The path where generated background migrations will be placed.
100
- config.background_migrations.migrations_path = "lib"
101
-
102
- # The module in which background migrations will be placed.
103
- config.background_migrations.migrations_module = "OnlineMigrations::BackgroundMigrations"
104
-
105
- # The number of rows to process in a single background migration run.
106
- config.background_migrations.batch_size = 1_000
107
-
108
- # The smaller batches size that the batches will be divided into.
109
- config.background_migrations.sub_batch_size = 100
110
-
111
- # The pause interval between each background migration job's execution (in seconds).
112
- config.background_migrations.batch_pause = 0.seconds
99
+ #
100
+ # The path where generated data migrations will be placed.
101
+ config.background_data_migrations.migrations_path = "lib"
113
102
 
114
- # The number of milliseconds to sleep between each sub_batch execution.
115
- config.background_migrations.sub_batch_pause_ms = 100
103
+ # The module in which data migrations will be placed.
104
+ config.background_data_migrations.migrations_module = "OnlineMigrations::DataMigrations"
116
105
 
117
- # Maximum number of batch run attempts.
118
- # When attempts are exhausted, the individual batch is marked as failed.
119
- config.background_migrations.batch_max_attempts = 5
106
+ # Maximum number of run attempts.
107
+ # When attempts are exhausted, the data migration is marked as failed.
108
+ config.background_data_migrations.max_attempts = 5
120
109
 
121
- # The number of seconds that must pass before the running job is considered stuck.
122
- config.background_migrations.stuck_jobs_timeout = 1.hour
110
+ # The number of seconds that must pass before the cancelling or pausing data migration is considered stuck.
111
+ config.background_data_migrations.stuck_timeout = 5.minutes
123
112
 
124
- # The callback to perform when an error occurs in the migration job.
125
- # config.background_migrations.error_handler = ->(error, errored_job) do
113
+ # The callback to perform when an error occurs during the data migration.
114
+ # config.background_data_migrations.error_handler = ->(error, errored_migration) do
126
115
  # Bugsnag.notify(error) do |notification|
127
- # notification.add_metadata(:background_migration, { name: errored_job.migration_name })
116
+ # notification.add_metadata(:background_migration, { name: errored_migration.migration_name })
128
117
  # end
129
118
  # end
130
119
 
120
+ # The name of the sidekiq job to be used to perform data migrations.
121
+ config.background_data_migrations.job = "OnlineMigrations::BackgroundDataMigrations::MigrationJob"
122
+
131
123
  # ==> Background schema migrations configuration
132
- # When attempts are exhausted, the failing migration stops to be retried.
124
+ #
125
+ # Maximum number of run attempts.
126
+ # When attempts are exhausted, the background schema migration is marked as failed.
133
127
  config.background_schema_migrations.max_attempts = 5
134
128
 
135
129
  # Statement timeout value used when running background schema migration.
@@ -1,63 +1,34 @@
1
1
  class InstallOnlineMigrations < <%= migration_parent %>
2
2
  def change
3
- create_table :background_migrations do |t|
4
- t.bigint :parent_id
3
+ create_table :background_data_migrations do |t|
5
4
  t.string :migration_name, null: false
6
5
  t.jsonb :arguments, default: [], null: false
7
- t.string :batch_column_name, null: false
8
- t.bigint :min_value, null: false
9
- t.bigint :max_value, null: false
10
- t.bigint :rows_count
11
- t.integer :batch_size, null: false
12
- t.integer :sub_batch_size, null: false
13
- t.integer :batch_pause, null: false
14
- t.integer :sub_batch_pause_ms, null: false
15
- t.integer :batch_max_attempts, null: false
16
6
  t.string :status, default: "enqueued", null: false
17
7
  t.string :shard
18
- t.boolean :composite, default: false, null: false
8
+ t.string :cursor
9
+ t.string :jid
19
10
  t.datetime :started_at
20
11
  t.datetime :finished_at
21
- t.timestamps
22
-
23
- t.foreign_key :background_migrations, column: :parent_id, on_delete: :cascade
24
-
25
- t.index [:migration_name, :arguments, :shard],
26
- unique: true, name: :index_background_migrations_on_unique_configuration
27
- end
28
-
29
- create_table :background_migration_jobs do |t|
30
- t.bigint :migration_id, null: false
31
- t.bigint :min_value, null: false
32
- t.bigint :max_value, null: false
33
- t.integer :batch_size, null: false
34
- t.integer :sub_batch_size, null: false
35
- t.integer :pause_ms, null: false
36
- t.datetime :started_at
37
- t.datetime :finished_at
38
- t.string :status, default: "enqueued", null: false
12
+ t.bigint :tick_total
13
+ t.bigint :tick_count, default: 0, null: false
14
+ t.float :time_running, default: 0.0, null: false
39
15
  t.integer :max_attempts, null: false
40
- t.integer :attempts, default: 0, null: false
41
16
  t.string :error_class
42
17
  t.string :error_message
43
18
  t.string :backtrace, array: true
19
+ t.string :connection_class_name
44
20
  t.timestamps
45
21
 
46
- t.foreign_key :background_migrations, column: :migration_id, on_delete: :cascade
47
-
48
- t.index [:migration_id, :max_value], name: :index_background_migration_jobs_on_max_value
49
- t.index [:migration_id, :status, :updated_at], name: :index_background_migration_jobs_on_updated_at
50
- t.index [:migration_id, :finished_at], name: :index_background_migration_jobs_on_finished_at
22
+ t.index [:migration_name, :arguments, :shard],
23
+ unique: true, name: :index_background_data_migrations_on_unique_configuration
51
24
  end
52
25
 
53
26
  create_table :background_schema_migrations do |t|
54
- t.bigint :parent_id
55
27
  t.string :migration_name, null: false
56
28
  t.string :table_name, null: false
57
29
  t.string :definition, null: false
58
30
  t.string :status, default: "enqueued", null: false
59
31
  t.string :shard
60
- t.boolean :composite, default: false, null: false
61
32
  t.integer :statement_timeout
62
33
  t.datetime :started_at
63
34
  t.datetime :finished_at
@@ -69,8 +40,6 @@ class InstallOnlineMigrations < <%= migration_parent %>
69
40
  t.string :connection_class_name
70
41
  t.timestamps
71
42
 
72
- t.foreign_key :background_schema_migrations, column: :parent_id, on_delete: :cascade
73
-
74
43
  t.index [:migration_name, :shard, :connection_class_name], unique: true,
75
44
  name: :index_background_schema_migrations_on_unique_configuration
76
45
  end
@@ -11,33 +11,41 @@ module OnlineMigrations
11
11
  source_root File.expand_path("templates", __dir__)
12
12
 
13
13
  def copy_templates
14
- migrations_to_be_applied.each do |migration|
14
+ migrations_to_apply.each do |migration|
15
15
  migration_template("#{migration}.rb", File.join(db_migrate_path, "#{migration}.rb"))
16
16
  end
17
17
  end
18
18
 
19
19
  private
20
- def migrations_to_be_applied
21
- connection = BackgroundMigrations::Migration.connection
22
- columns = connection.columns(BackgroundMigrations::Migration.table_name).map(&:name)
20
+ def migrations_to_apply
21
+ connection = BackgroundDataMigrations::Migration.connection
22
+ data_table = "background_migrations"
23
+ schema_table = "background_schema_migrations"
24
+ columns = connection.columns(data_table).map(&:name)
23
25
 
24
26
  migrations = []
25
- migrations << "add_sharding_to_online_migrations" if !columns.include?("shard")
27
+ if connection.table_exists?(data_table) && !columns.include?("shard")
28
+ migrations << "add_sharding_to_online_migrations"
29
+ end
26
30
 
27
- if !connection.table_exists?(BackgroundSchemaMigrations::Migration.table_name)
31
+ if !connection.table_exists?(schema_table)
28
32
  migrations << "create_background_schema_migrations"
29
33
  end
30
34
 
31
- indexes = connection.indexes(BackgroundSchemaMigrations::Migration.table_name)
35
+ indexes = connection.indexes(schema_table)
32
36
  unique_index = indexes.find { |i| i.unique && i.columns.sort == ["connection_class_name", "migration_name", "shard"] }
33
37
  if !unique_index
34
38
  migrations << "background_schema_migrations_change_unique_index"
35
39
  end
36
40
 
37
- if !connection.column_exists?(BackgroundMigrations::Migration.table_name, :started_at)
41
+ if connection.table_exists?(data_table) && !connection.column_exists?(data_table, :started_at)
38
42
  migrations << "add_timestamps_to_background_migrations"
39
43
  end
40
44
 
45
+ if connection.table_exists?(data_table)
46
+ migrations << "change_background_data_migrations"
47
+ end
48
+
41
49
  migrations
42
50
  end
43
51
 
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ # @private
5
+ module ActiveRecordBatchEnumerator
6
+ attr_reader :use_ranges
7
+ end
8
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundDataMigrations
5
+ # @private
6
+ class BackfillColumn < DataMigration
7
+ attr_reader :table_name, :updates, :model
8
+
9
+ def initialize(table_name, updates, model_name = nil)
10
+ @table_name = table_name
11
+ @updates = updates
12
+
13
+ @model =
14
+ if model_name
15
+ Object.const_get(model_name, false)
16
+ else
17
+ Utils.define_model(table_name)
18
+ end
19
+ end
20
+
21
+ def collection
22
+ column, value = updates.first
23
+
24
+ relation =
25
+ if updates.size == 1 && !value.nil?
26
+ # If value is nil, the generated SQL is correct (`WHERE column IS NOT NULL`).
27
+ # Otherwise, the SQL is `WHERE column != value`. This condition ignores column
28
+ # with NULLs in it, so we need to also manually check for NULLs.
29
+ arel_column = model.arel_table[column]
30
+ model.unscoped.where(arel_column.not_eq(value).or(arel_column.eq(nil)))
31
+ else
32
+ model.unscoped.where.not(updates)
33
+ end
34
+
35
+ relation.in_batches(of: 100, use_ranges: true)
36
+ end
37
+
38
+ def process(relation)
39
+ relation.update_all(updates)
40
+ end
41
+
42
+ def count
43
+ # Exact counts are expensive on large tables, since PostgreSQL
44
+ # needs to do a full scan. An estimated count should give a pretty decent
45
+ # approximation of rows count in this case.
46
+ Utils.estimated_count(model.connection, table_name)
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnlineMigrations
4
+ module BackgroundDataMigrations
5
+ # Class representing configuration options for data migrations.
6
+ class Config
7
+ # The path where generated data migrations will be placed.
8
+ # @return [String] defaults to "lib"
9
+ attr_accessor :migrations_path
10
+
11
+ # The module in which data migrations will be placed.
12
+ # @return [String] defaults to "OnlineMigrations::DataMigrations"
13
+ attr_accessor :migrations_module
14
+
15
+ # Maximum number of run attempts.
16
+ #
17
+ # When attempts are exhausted, the data migration is marked as failed.
18
+ # @return [Integer] defaults to 5
19
+ attr_accessor :max_attempts
20
+
21
+ # The number of seconds that must pass before the cancelling or pausing data migration is considered stuck.
22
+ #
23
+ # @return [Integer] defaults to 5 minutes
24
+ #
25
+ attr_accessor :stuck_timeout
26
+
27
+ # The pause interval between each data migration's `process` method execution (in seconds).
28
+ # @return [Integer] defaults to 0
29
+ #
30
+ attr_accessor :iteration_pause
31
+
32
+ # The callback to perform when an error occurs during the data migration.
33
+ #
34
+ # @example
35
+ # OnlineMigrations.config.background_migrations.error_handler = ->(error, errored_migration) do
36
+ # Bugsnag.notify(error) do |notification|
37
+ # notification.add_metadata(:background_migration, { name: errored_migration.migration_name })
38
+ # end
39
+ # end
40
+ #
41
+ # @return [Proc]
42
+ #
43
+ attr_accessor :error_handler
44
+
45
+ # The name of the sidekiq job to be used to perform data migrations.
46
+ #
47
+ # @return [String] defaults to "OnlineMigrations::BackgroundDataMigrations::MigrationJob"
48
+ #
49
+ attr_accessor :job
50
+
51
+ def initialize
52
+ @migrations_path = "lib"
53
+ @migrations_module = "OnlineMigrations::DataMigrations"
54
+ @max_attempts = 5
55
+ @stuck_timeout = 5.minutes
56
+ @iteration_pause = 0.seconds
57
+ @error_handler = ->(error, errored_migration) {}
58
+ @job = "OnlineMigrations::BackgroundDataMigrations::MigrationJob"
59
+ end
60
+ end
61
+ end
62
+ end
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OnlineMigrations
4
- module BackgroundMigrations
4
+ module BackgroundDataMigrations
5
5
  # @private
6
- class CopyColumn < BackgroundMigration
6
+ class CopyColumn < DataMigration
7
7
  attr_reader :table_name, :copy_from, :copy_to, :model_name, :type_cast_functions
8
8
 
9
9
  def initialize(table_name, copy_from, copy_to, model_name = nil, type_cast_functions = {})
@@ -21,15 +21,21 @@ module OnlineMigrations
21
21
  end
22
22
 
23
23
  @model_name = model_name
24
+ @model =
25
+ if model_name.present?
26
+ Object.const_get(model_name, false)
27
+ else
28
+ Utils.define_model(table_name)
29
+ end
30
+
24
31
  @type_cast_functions = type_cast_functions
25
32
  end
26
33
 
27
- def relation
28
- model.unscoped
34
+ def collection
35
+ @model.unscoped.in_batches(of: 100, use_ranges: true)
29
36
  end
30
37
 
31
- def process_batch(relation)
32
- arel = relation.arel
38
+ def process(relation)
33
39
  arel_table = relation.arel_table
34
40
 
35
41
  old_values = copy_from.map do |from_column|
@@ -46,35 +52,16 @@ module OnlineMigrations
46
52
  old_value
47
53
  end
48
54
 
49
- stmt = Arel::UpdateManager.new
50
- stmt.table(arel_table)
51
- stmt.wheres = arel.constraints
52
-
53
- updates = copy_to.zip(old_values).map { |to_column, old_value| [arel_table[to_column], old_value] }
54
- stmt.set(updates)
55
-
56
- connection.update(stmt)
55
+ updates = copy_to.zip(old_values).to_h { |to_column, old_value| [to_column, old_value] }
56
+ relation.update_all(updates)
57
57
  end
58
58
 
59
59
  def count
60
60
  # Exact counts are expensive on large tables, since PostgreSQL
61
61
  # needs to do a full scan. An estimated count should give a pretty decent
62
62
  # approximation of rows count in this case.
63
- Utils.estimated_count(connection, table_name)
63
+ Utils.estimated_count(@model.connection, table_name)
64
64
  end
65
-
66
- private
67
- def model
68
- @model ||= if model_name.present?
69
- Object.const_get(model_name, false)
70
- else
71
- Utils.define_model(table_name)
72
- end
73
- end
74
-
75
- def connection
76
- model.connection
77
- end
78
65
  end
79
66
  end
80
67
  end
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OnlineMigrations
4
- module BackgroundMigrations
4
+ module BackgroundDataMigrations
5
5
  # @private
6
- class DeleteAssociatedRecords < BackgroundMigration
6
+ class DeleteAssociatedRecords < DataMigration
7
7
  attr_reader :record, :association
8
8
 
9
9
  def initialize(model_name, record_id, association, _options = {})
@@ -12,17 +12,21 @@ module OnlineMigrations
12
12
  @association = association
13
13
  end
14
14
 
15
- def relation
15
+ def collection
16
16
  if !@record.respond_to?(association)
17
17
  raise ArgumentError, "'#{@record.class.name}' has no association called '#{association}'"
18
18
  end
19
19
 
20
- record.public_send(association)
20
+ record.public_send(association).in_batches(of: 100)
21
21
  end
22
22
 
23
- def process_batch(relation)
23
+ def process(relation)
24
24
  relation.delete_all
25
25
  end
26
+
27
+ def count
28
+ record.public_send(association).count
29
+ end
26
30
  end
27
31
  end
28
32
  end
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OnlineMigrations
4
- module BackgroundMigrations
4
+ module BackgroundDataMigrations
5
5
  # @private
6
- class DeleteOrphanedRecords < BackgroundMigration
6
+ class DeleteOrphanedRecords < DataMigration
7
7
  attr_reader :model, :associations
8
8
 
9
9
  def initialize(model_name, associations, _options = {})
@@ -11,16 +11,12 @@ module OnlineMigrations
11
11
  @associations = associations.map(&:to_sym)
12
12
  end
13
13
 
14
- def relation
14
+ def collection
15
15
  model.unscoped.where.missing(*associations)
16
16
  end
17
17
 
18
- def process_batch(relation)
19
- relation.delete_all
20
- end
21
-
22
- def count
23
- Utils.estimated_count(model.connection, model.table_name)
18
+ def process(record)
19
+ record.destroy
24
20
  end
25
21
  end
26
22
  end