pg_online_schema_change 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,302 @@
1
+ require "pg_query"
2
+ require "pg"
3
+
4
+ module PgOnlineSchemaChange
5
+ class Query
6
+ extend Helper
7
+
8
+ INDEX_SUFFIX = "_pgosc".freeze
9
+ DROPPED_COLUMN_TYPE = :AT_DropColumn
10
+ RENAMED_COLUMN_TYPE = :AT_RenameColumn
11
+ LOCK_ATTEMPT = 4
12
+
13
+ class << self
14
+ def alter_statement?(query)
15
+ PgQuery.parse(query).tree.stmts.all? do |statement|
16
+ statement.stmt.alter_table_stmt.instance_of?(PgQuery::AlterTableStmt) || statement.stmt.rename_stmt.instance_of?(PgQuery::RenameStmt)
17
+ end
18
+ rescue PgQuery::ParseError => e
19
+ false
20
+ end
21
+
22
+ def same_table?(query)
23
+ tables = PgQuery.parse(query).tree.stmts.map do |statement|
24
+ if statement.stmt.alter_table_stmt.instance_of?(PgQuery::AlterTableStmt)
25
+ statement.stmt.alter_table_stmt.relation.relname
26
+ elsif statement.stmt.rename_stmt.instance_of?(PgQuery::RenameStmt)
27
+ statement.stmt.rename_stmt.relation.relname
28
+ end
29
+ end.compact
30
+
31
+ tables.uniq.count == 1
32
+ rescue PgQuery::ParseError => e
33
+ false
34
+ end
35
+
36
+ def table(query)
37
+ from_rename_statement = PgQuery.parse(query).tree.stmts.map do |statement|
38
+ statement.stmt.rename_stmt&.relation&.relname
39
+ end.compact[0]
40
+ PgQuery.parse(query).tables[0] || from_rename_statement
41
+ end
42
+
43
+ def run(connection, query, reuse_trasaction = false, &block)
44
+ connection.cancel if [PG::PQTRANS_INERROR, PG::PQTRANS_UNKNOWN].include?(connection.transaction_status)
45
+
46
+ logger.debug("Running query", { query: query })
47
+
48
+ connection.async_exec("BEGIN;")
49
+
50
+ result = connection.async_exec(query, &block)
51
+ rescue Exception
52
+ connection.cancel if connection.transaction_status != PG::PQTRANS_IDLE
53
+ connection.block
54
+ logger.info("Exception raised, rolling back query", { rollback: true, query: query })
55
+ connection.async_exec("ROLLBACK;")
56
+ connection.async_exec("COMMIT;")
57
+ raise
58
+ else
59
+ connection.async_exec("COMMIT;") unless reuse_trasaction
60
+ result
61
+ end
62
+
63
+ def table_columns(client, table = nil)
64
+ sql = <<~SQL
65
+ SELECT attname as column_name, format_type(atttypid, atttypmod) as type, attnum as column_position FROM pg_attribute
66
+ WHERE attrelid = \'#{table || client.table}\'::regclass AND attnum > 0 AND NOT attisdropped
67
+ ORDER BY attnum;
68
+ SQL
69
+ mapped_columns = []
70
+
71
+ run(client.connection, sql) do |result|
72
+ mapped_columns = result.map do |row|
73
+ row["column_name_regular"] = row["column_name"]
74
+ row["column_name"] = client.connection.quote_ident(row["column_name"])
75
+ row["column_position"] = row["column_position"].to_i
76
+ row
77
+ end
78
+ end
79
+
80
+ mapped_columns
81
+ end
82
+
83
+ def alter_statement_for(client, shadow_table)
84
+ parsed_query = PgQuery.parse(client.alter_statement)
85
+
86
+ parsed_query.tree.stmts.each do |statement|
87
+ statement.stmt.alter_table_stmt.relation.relname = shadow_table if statement.stmt.alter_table_stmt
88
+
89
+ statement.stmt.rename_stmt.relation.relname = shadow_table if statement.stmt.rename_stmt
90
+ end
91
+ parsed_query.deparse
92
+ end
93
+
94
+ def get_indexes_for(client, table)
95
+ query = <<~SQL
96
+ SELECT indexdef, schemaname
97
+ FROM pg_indexes
98
+ WHERE schemaname = \'#{client.schema}\' AND tablename = \'#{table}\'
99
+ SQL
100
+
101
+ indexes = []
102
+ run(client.connection, query) do |result|
103
+ indexes = result.map { |row| row["indexdef"] }
104
+ end
105
+
106
+ indexes
107
+ end
108
+
109
+ def get_all_constraints_for(client)
110
+ query = <<~SQL
111
+ SELECT conrelid::regclass AS table_on,
112
+ confrelid::regclass AS table_from,
113
+ contype as constraint_type,
114
+ conname AS constraint_name,
115
+ convalidated AS constraint_validated,
116
+ pg_get_constraintdef(oid) AS definition
117
+ FROM pg_constraint
118
+ WHERE contype IN ('f', 'p')
119
+ SQL
120
+
121
+ constraints = []
122
+ run(client.connection, query) do |result|
123
+ constraints = result.map { |row| row }
124
+ end
125
+
126
+ constraints
127
+ end
128
+
129
+ def get_primary_keys_for(client, table)
130
+ get_all_constraints_for(client).select do |row|
131
+ row["table_on"] == table && row["constraint_type"] == "p"
132
+ end
133
+ end
134
+
135
+ def get_foreign_keys_for(client, table)
136
+ get_all_constraints_for(client).select do |row|
137
+ row["table_on"] == table && row["constraint_type"] == "f"
138
+ end
139
+ end
140
+
141
+ def get_foreign_keys_to_refresh(client, table)
142
+ references = get_all_constraints_for(client).select do |row|
143
+ row["table_from"] == table && row["constraint_type"] == "f"
144
+ end
145
+
146
+ references.map do |row|
147
+ if row["definition"].end_with?("NOT VALID")
148
+ add_statement = "ALTER TABLE #{row["table_on"]} ADD CONSTRAINT #{row["constraint_name"]} #{row["definition"]};"
149
+ else
150
+ add_statement = "ALTER TABLE #{row["table_on"]} ADD CONSTRAINT #{row["constraint_name"]} #{row["definition"]} NOT VALID;"
151
+ end
152
+
153
+ drop_statement = "ALTER TABLE #{row["table_on"]} DROP CONSTRAINT #{row["constraint_name"]};"
154
+
155
+ "#{drop_statement} #{add_statement}"
156
+ end.join
157
+ end
158
+
159
+ def get_foreign_keys_to_validate(client, table)
160
+ references = get_all_constraints_for(client).select do |row|
161
+ row["table_from"] == table && row["constraint_type"] == "f"
162
+ end
163
+
164
+ references.map do |row|
165
+ "ALTER TABLE #{row["table_on"]} VALIDATE CONSTRAINT #{row["constraint_name"]};"
166
+ end.join
167
+ end
168
+
169
+ def dropped_columns(client)
170
+ PgQuery.parse(client.alter_statement).tree.stmts.map do |statement|
171
+ next if statement.stmt.alter_table_stmt.nil?
172
+
173
+ statement.stmt.alter_table_stmt.cmds.map do |cmd|
174
+ cmd.alter_table_cmd.name if cmd.alter_table_cmd.subtype == DROPPED_COLUMN_TYPE
175
+ end
176
+ end.flatten.compact
177
+ end
178
+
179
+ def renamed_columns(client)
180
+ PgQuery.parse(client.alter_statement).tree.stmts.map do |statement|
181
+ next if statement.stmt.rename_stmt.nil?
182
+
183
+ {
184
+ old_name: statement.stmt.rename_stmt.subname,
185
+ new_name: statement.stmt.rename_stmt.newname,
186
+ }
187
+ end.flatten.compact
188
+ end
189
+
190
+ def primary_key_for(client, table)
191
+ query = <<~SQL
192
+ SELECT
193
+ pg_attribute.attname as column_name
194
+ FROM pg_index, pg_class, pg_attribute, pg_namespace
195
+ WHERE
196
+ pg_class.oid = \'#{table}\'::regclass AND
197
+ indrelid = pg_class.oid AND
198
+ nspname = \'#{client.schema}\' AND
199
+ pg_class.relnamespace = pg_namespace.oid AND
200
+ pg_attribute.attrelid = pg_class.oid AND
201
+ pg_attribute.attnum = any(pg_index.indkey)
202
+ AND indisprimary
203
+ SQL
204
+
205
+ columns = []
206
+ run(client.connection, query) do |result|
207
+ columns = result.map { |row| row["column_name"] }
208
+ end
209
+
210
+ columns.first
211
+ end
212
+
213
+ def storage_parameters_for(client, table)
214
+ query = <<~SQL
215
+ SELECT array_to_string(reloptions, ',') as params FROM pg_class WHERE relname=\'#{table}\';
216
+ SQL
217
+
218
+ columns = []
219
+ run(client.connection, query) do |result|
220
+ columns = result.map { |row| row["params"] }
221
+ end
222
+
223
+ columns.first
224
+ end
225
+
226
+ # This function acquires the lock and keeps the transaction
227
+ # open. If a lock is acquired, its upon the caller
228
+ # to call COMMIT to end the transaction. If a lock
229
+ # is not acquired, transaction is closed and a new transaction
230
+ # is started to acquire lock again
231
+ def open_lock_exclusive(client, table)
232
+ attempts ||= 1
233
+
234
+ query = <<~SQL
235
+ SET lock_timeout = '#{client.wait_time_for_lock}s';
236
+ LOCK TABLE #{client.table} IN ACCESS EXCLUSIVE MODE;
237
+ SQL
238
+ run(client.connection, query, true)
239
+
240
+ true
241
+ rescue PG::LockNotAvailable, PG::InFailedSqlTransaction
242
+ if (attempts += 1) < LOCK_ATTEMPT
243
+ logger.info("Couldn't acquire lock, attempt: #{attempts}")
244
+
245
+ run(client.connection, "RESET lock_timeout;")
246
+ kill_backends(client, table)
247
+
248
+ retry
249
+ end
250
+
251
+ logger.info("Lock acquire failed")
252
+ run(client.connection, "RESET lock_timeout;")
253
+
254
+ false
255
+ end
256
+
257
+ def kill_backends(client, table)
258
+ return unless client.kill_backends
259
+
260
+ logger.info("Terminating other backends")
261
+
262
+ query = <<~SQL
263
+ SELECT pg_terminate_backend(pid) FROM pg_locks WHERE locktype = 'relation' AND relation = \'#{table}\'::regclass::oid AND pid <> pg_backend_pid()
264
+ SQL
265
+
266
+ run(client.connection, query, true)
267
+ end
268
+
269
+ def copy_data_statement(client, shadow_table)
270
+ select_columns = table_columns(client).map do |entry|
271
+ entry["column_name_regular"]
272
+ end
273
+
274
+ select_columns -= dropped_columns_list if dropped_columns_list.any?
275
+
276
+ insert_into_columns = select_columns.dup
277
+
278
+ if renamed_columns_list.any?
279
+ renamed_columns_list.each do |obj|
280
+ insert_into_columns.each_with_index do |insert_into_column, index|
281
+ insert_into_columns[index] = obj[:new_name] if insert_into_column == obj[:old_name]
282
+ end
283
+ end
284
+ end
285
+
286
+ insert_into_columns.map! do |insert_into_column|
287
+ client.connection.quote_ident(insert_into_column)
288
+ end
289
+
290
+ select_columns.map! do |select_column|
291
+ client.connection.quote_ident(select_column)
292
+ end
293
+
294
+ sql = <<~SQL
295
+ INSERT INTO #{shadow_table}(#{insert_into_columns.join(", ")})
296
+ SELECT #{select_columns.join(", ")}
297
+ FROM ONLY #{client.table}
298
+ SQL
299
+ end
300
+ end
301
+ end
302
+ end
@@ -0,0 +1,122 @@
1
+ module PgOnlineSchemaChange
2
+ class Replay
3
+ extend Helper
4
+
5
+ class << self
6
+ PULL_BATCH_COUNT = 1000
7
+ DELTA_COUNT = 20
8
+ RESERVED_COLUMNS = %w[operation_type trigger_time].freeze
9
+
10
+ # This, picks PULL_BATCH_COUNT rows by primary key from audit_table,
11
+ # replays it on the shadow_table. Once the batch is done,
12
+ # it them deletes those PULL_BATCH_COUNT rows from audit_table. Then, pull another batch,
13
+ # check if the row count matches PULL_BATCH_COUNT, if so swap, otherwise
14
+ # continue. Swap because, the row count is minimal to replay them altogether
15
+ # and perform the rename while holding an access exclusive lock for minimal time.
16
+ def begin!
17
+ loop do
18
+ rows = rows_to_play
19
+
20
+ raise CountBelowDelta if rows.count <= DELTA_COUNT
21
+
22
+ play!(rows)
23
+ end
24
+ end
25
+
26
+ def rows_to_play(reuse_trasaction = false)
27
+ select_query = <<~SQL
28
+ SELECT * FROM #{audit_table} ORDER BY #{primary_key} LIMIT #{PULL_BATCH_COUNT};
29
+ SQL
30
+
31
+ rows = []
32
+ Query.run(client.connection, select_query, reuse_trasaction) { |result| rows = result.map { |row| row } }
33
+
34
+ rows
35
+ end
36
+
37
+ def play!(rows, reuse_trasaction = false)
38
+ logger.info("Replaying rows, count: #{rows.size}")
39
+
40
+ to_be_deleted_rows = []
41
+ to_be_replayed = []
42
+ rows.each do |row|
43
+ new_row = row.dup
44
+
45
+ # Remove audit table cols, since we will be
46
+ # re-mapping them for inserts and updates
47
+ RESERVED_COLUMNS.each do |col|
48
+ new_row.delete(col)
49
+ end
50
+
51
+ if dropped_columns_list.any?
52
+ dropped_columns_list.each do |dropped_column|
53
+ new_row.delete(dropped_column)
54
+ end
55
+ end
56
+
57
+ if renamed_columns_list.any?
58
+ renamed_columns_list.each do |object|
59
+ value = new_row.delete(object[:old_name])
60
+ new_row[object[:new_name]] = value
61
+ end
62
+ end
63
+
64
+ new_row = new_row.compact
65
+
66
+ # quote indent column to preserve case insensitivity
67
+ # ensure rows are escaped
68
+ new_row = new_row.transform_keys do |column|
69
+ client.connection.quote_ident(column)
70
+ end
71
+
72
+ new_row = new_row.transform_values do |value|
73
+ client.connection.escape_string(value)
74
+ end
75
+
76
+ case row["operation_type"]
77
+ when "INSERT"
78
+ values = new_row.map { |_, val| "'#{val}'" }.join(",")
79
+
80
+ sql = <<~SQL
81
+ INSERT INTO #{shadow_table} (#{new_row.keys.join(",")})
82
+ VALUES (#{values});
83
+ SQL
84
+ to_be_replayed << sql
85
+
86
+ to_be_deleted_rows << "'#{row[primary_key]}'"
87
+ when "UPDATE"
88
+ set_values = new_row.map do |column, value|
89
+ "#{column} = '#{value}'"
90
+ end.join(",")
91
+
92
+ sql = <<~SQL
93
+ UPDATE #{shadow_table}
94
+ SET #{set_values}
95
+ WHERE #{primary_key}=\'#{row[primary_key]}\';
96
+ SQL
97
+ to_be_replayed << sql
98
+
99
+ to_be_deleted_rows << "'#{row[primary_key]}'"
100
+ when "DELETE"
101
+ sql = <<~SQL
102
+ DELETE FROM #{shadow_table} WHERE #{primary_key}=\'#{row[primary_key]}\';
103
+ SQL
104
+ to_be_replayed << sql
105
+
106
+ to_be_deleted_rows << "'#{row[primary_key]}'"
107
+ end
108
+ end
109
+
110
+ Query.run(client.connection, to_be_replayed.join, reuse_trasaction)
111
+
112
+ # Delete items from the audit now that are replayed
113
+ if to_be_deleted_rows.count >= 1
114
+ delete_query = <<~SQL
115
+ DELETE FROM #{audit_table} WHERE #{primary_key} IN (#{to_be_deleted_rows.join(",")})
116
+ SQL
117
+ Query.run(client.connection, delete_query, reuse_trasaction)
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,18 @@
1
+ require "pg_query"
2
+ require "pg"
3
+
4
+ module PgOnlineSchemaChange
5
+ class Store
6
+ class << self
7
+ @@object = {}
8
+
9
+ def get(key)
10
+ @@object[key.to_s] || @@object[key.to_sym]
11
+ end
12
+
13
+ def set(key, value)
14
+ @@object[key.to_sym] = value
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PgOnlineSchemaChange
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "ougai"
5
+
6
+ require "pg_online_schema_change/version"
7
+ require "pg_online_schema_change/helper"
8
+ require "pg_online_schema_change/functions"
9
+ require "pg_online_schema_change/cli"
10
+ require "pg_online_schema_change/client"
11
+ require "pg_online_schema_change/query"
12
+ require "pg_online_schema_change/store"
13
+ require "pg_online_schema_change/replay"
14
+ require "pg_online_schema_change/orchestrate"
15
+
16
+ module PgOnlineSchemaChange
17
+ class Error < StandardError; end
18
+ class CountBelowDelta < StandardError; end
19
+ class AccessExclusiveLockNotAcquired < StandardError; end
20
+
21
+ def self.logger=(verbose)
22
+ @@logger ||= begin
23
+ logger = Ougai::Logger.new($stdout)
24
+ logger.level = verbose ? Ougai::Logger::TRACE : Ougai::Logger::INFO
25
+ logger.with_fields = { version: PgOnlineSchemaChange::VERSION }
26
+ logger
27
+ end
28
+ end
29
+
30
+ def self.logger
31
+ @@logger
32
+ end
33
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pg_online_schema_change
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Shayon Mukherjee
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-02-16 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: pg-online-schema-change (pg-osc) is a tool for making schema changes
14
+ (any ALTER statements) in Postgres tables with minimal locks, thus helping achieve
15
+ zero downtime schema changes against production workloads.
16
+ email:
17
+ - shayonj@gmail.com
18
+ executables:
19
+ - console
20
+ - pg-online-schema-change
21
+ - setup
22
+ extensions: []
23
+ extra_rdoc_files: []
24
+ files:
25
+ - ".rspec"
26
+ - ".rubocop.yml"
27
+ - ".ruby-version"
28
+ - CHANGELOG.md
29
+ - CODE_OF_CONDUCT.md
30
+ - Gemfile
31
+ - Gemfile.lock
32
+ - LICENSE.txt
33
+ - README.md
34
+ - Rakefile
35
+ - bin/console
36
+ - bin/pg-online-schema-change
37
+ - bin/setup
38
+ - diagrams/how-it-works.excalidraw
39
+ - diagrams/how-it-works.png
40
+ - docker-compose.yml
41
+ - lib/pg_online_schema_change.rb
42
+ - lib/pg_online_schema_change/cli.rb
43
+ - lib/pg_online_schema_change/client.rb
44
+ - lib/pg_online_schema_change/functions.rb
45
+ - lib/pg_online_schema_change/helper.rb
46
+ - lib/pg_online_schema_change/orchestrate.rb
47
+ - lib/pg_online_schema_change/query.rb
48
+ - lib/pg_online_schema_change/replay.rb
49
+ - lib/pg_online_schema_change/store.rb
50
+ - lib/pg_online_schema_change/version.rb
51
+ homepage: https://github.com/shayonj/pg-online-schema-change
52
+ licenses:
53
+ - MIT
54
+ metadata:
55
+ rubygems_mfa_required: 'true'
56
+ post_install_message:
57
+ rdoc_options: []
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 2.6.0
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubygems_version: 3.2.3
72
+ signing_key:
73
+ specification_version: 4
74
+ summary: pg-online-schema-change is a tool for schema changes for Postgres tables
75
+ with minimal locks
76
+ test_files: []