pg_online_schema_change 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,302 @@
1
+ require "pg_query"
2
+ require "pg"
3
+
4
+ module PgOnlineSchemaChange
5
+ class Query
6
+ extend Helper
7
+
8
+ INDEX_SUFFIX = "_pgosc".freeze
9
+ DROPPED_COLUMN_TYPE = :AT_DropColumn
10
+ RENAMED_COLUMN_TYPE = :AT_RenameColumn
11
+ LOCK_ATTEMPT = 4
12
+
13
+ class << self
14
+ def alter_statement?(query)
15
+ PgQuery.parse(query).tree.stmts.all? do |statement|
16
+ statement.stmt.alter_table_stmt.instance_of?(PgQuery::AlterTableStmt) || statement.stmt.rename_stmt.instance_of?(PgQuery::RenameStmt)
17
+ end
18
+ rescue PgQuery::ParseError => e
19
+ false
20
+ end
21
+
22
+ def same_table?(query)
23
+ tables = PgQuery.parse(query).tree.stmts.map do |statement|
24
+ if statement.stmt.alter_table_stmt.instance_of?(PgQuery::AlterTableStmt)
25
+ statement.stmt.alter_table_stmt.relation.relname
26
+ elsif statement.stmt.rename_stmt.instance_of?(PgQuery::RenameStmt)
27
+ statement.stmt.rename_stmt.relation.relname
28
+ end
29
+ end.compact
30
+
31
+ tables.uniq.count == 1
32
+ rescue PgQuery::ParseError => e
33
+ false
34
+ end
35
+
36
+ def table(query)
37
+ from_rename_statement = PgQuery.parse(query).tree.stmts.map do |statement|
38
+ statement.stmt.rename_stmt&.relation&.relname
39
+ end.compact[0]
40
+ PgQuery.parse(query).tables[0] || from_rename_statement
41
+ end
42
+
43
+ def run(connection, query, reuse_trasaction = false, &block)
44
+ connection.cancel if [PG::PQTRANS_INERROR, PG::PQTRANS_UNKNOWN].include?(connection.transaction_status)
45
+
46
+ logger.debug("Running query", { query: query })
47
+
48
+ connection.async_exec("BEGIN;")
49
+
50
+ result = connection.async_exec(query, &block)
51
+ rescue Exception
52
+ connection.cancel if connection.transaction_status != PG::PQTRANS_IDLE
53
+ connection.block
54
+ logger.info("Exception raised, rolling back query", { rollback: true, query: query })
55
+ connection.async_exec("ROLLBACK;")
56
+ connection.async_exec("COMMIT;")
57
+ raise
58
+ else
59
+ connection.async_exec("COMMIT;") unless reuse_trasaction
60
+ result
61
+ end
62
+
63
+ def table_columns(client, table = nil)
64
+ sql = <<~SQL
65
+ SELECT attname as column_name, format_type(atttypid, atttypmod) as type, attnum as column_position FROM pg_attribute
66
+ WHERE attrelid = \'#{table || client.table}\'::regclass AND attnum > 0 AND NOT attisdropped
67
+ ORDER BY attnum;
68
+ SQL
69
+ mapped_columns = []
70
+
71
+ run(client.connection, sql) do |result|
72
+ mapped_columns = result.map do |row|
73
+ row["column_name_regular"] = row["column_name"]
74
+ row["column_name"] = client.connection.quote_ident(row["column_name"])
75
+ row["column_position"] = row["column_position"].to_i
76
+ row
77
+ end
78
+ end
79
+
80
+ mapped_columns
81
+ end
82
+
83
+ def alter_statement_for(client, shadow_table)
84
+ parsed_query = PgQuery.parse(client.alter_statement)
85
+
86
+ parsed_query.tree.stmts.each do |statement|
87
+ statement.stmt.alter_table_stmt.relation.relname = shadow_table if statement.stmt.alter_table_stmt
88
+
89
+ statement.stmt.rename_stmt.relation.relname = shadow_table if statement.stmt.rename_stmt
90
+ end
91
+ parsed_query.deparse
92
+ end
93
+
94
+ def get_indexes_for(client, table)
95
+ query = <<~SQL
96
+ SELECT indexdef, schemaname
97
+ FROM pg_indexes
98
+ WHERE schemaname = \'#{client.schema}\' AND tablename = \'#{table}\'
99
+ SQL
100
+
101
+ indexes = []
102
+ run(client.connection, query) do |result|
103
+ indexes = result.map { |row| row["indexdef"] }
104
+ end
105
+
106
+ indexes
107
+ end
108
+
109
+ def get_all_constraints_for(client)
110
+ query = <<~SQL
111
+ SELECT conrelid::regclass AS table_on,
112
+ confrelid::regclass AS table_from,
113
+ contype as constraint_type,
114
+ conname AS constraint_name,
115
+ convalidated AS constraint_validated,
116
+ pg_get_constraintdef(oid) AS definition
117
+ FROM pg_constraint
118
+ WHERE contype IN ('f', 'p')
119
+ SQL
120
+
121
+ constraints = []
122
+ run(client.connection, query) do |result|
123
+ constraints = result.map { |row| row }
124
+ end
125
+
126
+ constraints
127
+ end
128
+
129
+ def get_primary_keys_for(client, table)
130
+ get_all_constraints_for(client).select do |row|
131
+ row["table_on"] == table && row["constraint_type"] == "p"
132
+ end
133
+ end
134
+
135
+ def get_foreign_keys_for(client, table)
136
+ get_all_constraints_for(client).select do |row|
137
+ row["table_on"] == table && row["constraint_type"] == "f"
138
+ end
139
+ end
140
+
141
+ def get_foreign_keys_to_refresh(client, table)
142
+ references = get_all_constraints_for(client).select do |row|
143
+ row["table_from"] == table && row["constraint_type"] == "f"
144
+ end
145
+
146
+ references.map do |row|
147
+ if row["definition"].end_with?("NOT VALID")
148
+ add_statement = "ALTER TABLE #{row["table_on"]} ADD CONSTRAINT #{row["constraint_name"]} #{row["definition"]};"
149
+ else
150
+ add_statement = "ALTER TABLE #{row["table_on"]} ADD CONSTRAINT #{row["constraint_name"]} #{row["definition"]} NOT VALID;"
151
+ end
152
+
153
+ drop_statement = "ALTER TABLE #{row["table_on"]} DROP CONSTRAINT #{row["constraint_name"]};"
154
+
155
+ "#{drop_statement} #{add_statement}"
156
+ end.join
157
+ end
158
+
159
+ def get_foreign_keys_to_validate(client, table)
160
+ references = get_all_constraints_for(client).select do |row|
161
+ row["table_from"] == table && row["constraint_type"] == "f"
162
+ end
163
+
164
+ references.map do |row|
165
+ "ALTER TABLE #{row["table_on"]} VALIDATE CONSTRAINT #{row["constraint_name"]};"
166
+ end.join
167
+ end
168
+
169
+ def dropped_columns(client)
170
+ PgQuery.parse(client.alter_statement).tree.stmts.map do |statement|
171
+ next if statement.stmt.alter_table_stmt.nil?
172
+
173
+ statement.stmt.alter_table_stmt.cmds.map do |cmd|
174
+ cmd.alter_table_cmd.name if cmd.alter_table_cmd.subtype == DROPPED_COLUMN_TYPE
175
+ end
176
+ end.flatten.compact
177
+ end
178
+
179
+ def renamed_columns(client)
180
+ PgQuery.parse(client.alter_statement).tree.stmts.map do |statement|
181
+ next if statement.stmt.rename_stmt.nil?
182
+
183
+ {
184
+ old_name: statement.stmt.rename_stmt.subname,
185
+ new_name: statement.stmt.rename_stmt.newname,
186
+ }
187
+ end.flatten.compact
188
+ end
189
+
190
+ def primary_key_for(client, table)
191
+ query = <<~SQL
192
+ SELECT
193
+ pg_attribute.attname as column_name
194
+ FROM pg_index, pg_class, pg_attribute, pg_namespace
195
+ WHERE
196
+ pg_class.oid = \'#{table}\'::regclass AND
197
+ indrelid = pg_class.oid AND
198
+ nspname = \'#{client.schema}\' AND
199
+ pg_class.relnamespace = pg_namespace.oid AND
200
+ pg_attribute.attrelid = pg_class.oid AND
201
+ pg_attribute.attnum = any(pg_index.indkey)
202
+ AND indisprimary
203
+ SQL
204
+
205
+ columns = []
206
+ run(client.connection, query) do |result|
207
+ columns = result.map { |row| row["column_name"] }
208
+ end
209
+
210
+ columns.first
211
+ end
212
+
213
+ def storage_parameters_for(client, table)
214
+ query = <<~SQL
215
+ SELECT array_to_string(reloptions, ',') as params FROM pg_class WHERE relname=\'#{table}\';
216
+ SQL
217
+
218
+ columns = []
219
+ run(client.connection, query) do |result|
220
+ columns = result.map { |row| row["params"] }
221
+ end
222
+
223
+ columns.first
224
+ end
225
+
226
+ # This function acquires the lock and keeps the transaction
227
+ # open. If a lock is acquired, its upon the caller
228
+ # to call COMMIT to end the transaction. If a lock
229
+ # is not acquired, transaction is closed and a new transaction
230
+ # is started to acquire lock again
231
+ def open_lock_exclusive(client, table)
232
+ attempts ||= 1
233
+
234
+ query = <<~SQL
235
+ SET lock_timeout = '#{client.wait_time_for_lock}s';
236
+ LOCK TABLE #{client.table} IN ACCESS EXCLUSIVE MODE;
237
+ SQL
238
+ run(client.connection, query, true)
239
+
240
+ true
241
+ rescue PG::LockNotAvailable, PG::InFailedSqlTransaction
242
+ if (attempts += 1) < LOCK_ATTEMPT
243
+ logger.info("Couldn't acquire lock, attempt: #{attempts}")
244
+
245
+ run(client.connection, "RESET lock_timeout;")
246
+ kill_backends(client, table)
247
+
248
+ retry
249
+ end
250
+
251
+ logger.info("Lock acquire failed")
252
+ run(client.connection, "RESET lock_timeout;")
253
+
254
+ false
255
+ end
256
+
257
+ def kill_backends(client, table)
258
+ return unless client.kill_backends
259
+
260
+ logger.info("Terminating other backends")
261
+
262
+ query = <<~SQL
263
+ SELECT pg_terminate_backend(pid) FROM pg_locks WHERE locktype = 'relation' AND relation = \'#{table}\'::regclass::oid AND pid <> pg_backend_pid()
264
+ SQL
265
+
266
+ run(client.connection, query, true)
267
+ end
268
+
269
+ def copy_data_statement(client, shadow_table)
270
+ select_columns = table_columns(client).map do |entry|
271
+ entry["column_name_regular"]
272
+ end
273
+
274
+ select_columns -= dropped_columns_list if dropped_columns_list.any?
275
+
276
+ insert_into_columns = select_columns.dup
277
+
278
+ if renamed_columns_list.any?
279
+ renamed_columns_list.each do |obj|
280
+ insert_into_columns.each_with_index do |insert_into_column, index|
281
+ insert_into_columns[index] = obj[:new_name] if insert_into_column == obj[:old_name]
282
+ end
283
+ end
284
+ end
285
+
286
+ insert_into_columns.map! do |insert_into_column|
287
+ client.connection.quote_ident(insert_into_column)
288
+ end
289
+
290
+ select_columns.map! do |select_column|
291
+ client.connection.quote_ident(select_column)
292
+ end
293
+
294
+ sql = <<~SQL
295
+ INSERT INTO #{shadow_table}(#{insert_into_columns.join(", ")})
296
+ SELECT #{select_columns.join(", ")}
297
+ FROM ONLY #{client.table}
298
+ SQL
299
+ end
300
+ end
301
+ end
302
+ end
@@ -0,0 +1,122 @@
1
+ module PgOnlineSchemaChange
2
+ class Replay
3
+ extend Helper
4
+
5
+ class << self
6
+ PULL_BATCH_COUNT = 1000
7
+ DELTA_COUNT = 20
8
+ RESERVED_COLUMNS = %w[operation_type trigger_time].freeze
9
+
10
+ # This, picks PULL_BATCH_COUNT rows by primary key from audit_table,
11
+ # replays it on the shadow_table. Once the batch is done,
12
+ # it them deletes those PULL_BATCH_COUNT rows from audit_table. Then, pull another batch,
13
+ # check if the row count matches PULL_BATCH_COUNT, if so swap, otherwise
14
+ # continue. Swap because, the row count is minimal to replay them altogether
15
+ # and perform the rename while holding an access exclusive lock for minimal time.
16
+ def begin!
17
+ loop do
18
+ rows = rows_to_play
19
+
20
+ raise CountBelowDelta if rows.count <= DELTA_COUNT
21
+
22
+ play!(rows)
23
+ end
24
+ end
25
+
26
+ def rows_to_play(reuse_trasaction = false)
27
+ select_query = <<~SQL
28
+ SELECT * FROM #{audit_table} ORDER BY #{primary_key} LIMIT #{PULL_BATCH_COUNT};
29
+ SQL
30
+
31
+ rows = []
32
+ Query.run(client.connection, select_query, reuse_trasaction) { |result| rows = result.map { |row| row } }
33
+
34
+ rows
35
+ end
36
+
37
+ def play!(rows, reuse_trasaction = false)
38
+ logger.info("Replaying rows, count: #{rows.size}")
39
+
40
+ to_be_deleted_rows = []
41
+ to_be_replayed = []
42
+ rows.each do |row|
43
+ new_row = row.dup
44
+
45
+ # Remove audit table cols, since we will be
46
+ # re-mapping them for inserts and updates
47
+ RESERVED_COLUMNS.each do |col|
48
+ new_row.delete(col)
49
+ end
50
+
51
+ if dropped_columns_list.any?
52
+ dropped_columns_list.each do |dropped_column|
53
+ new_row.delete(dropped_column)
54
+ end
55
+ end
56
+
57
+ if renamed_columns_list.any?
58
+ renamed_columns_list.each do |object|
59
+ value = new_row.delete(object[:old_name])
60
+ new_row[object[:new_name]] = value
61
+ end
62
+ end
63
+
64
+ new_row = new_row.compact
65
+
66
+ # quote indent column to preserve case insensitivity
67
+ # ensure rows are escaped
68
+ new_row = new_row.transform_keys do |column|
69
+ client.connection.quote_ident(column)
70
+ end
71
+
72
+ new_row = new_row.transform_values do |value|
73
+ client.connection.escape_string(value)
74
+ end
75
+
76
+ case row["operation_type"]
77
+ when "INSERT"
78
+ values = new_row.map { |_, val| "'#{val}'" }.join(",")
79
+
80
+ sql = <<~SQL
81
+ INSERT INTO #{shadow_table} (#{new_row.keys.join(",")})
82
+ VALUES (#{values});
83
+ SQL
84
+ to_be_replayed << sql
85
+
86
+ to_be_deleted_rows << "'#{row[primary_key]}'"
87
+ when "UPDATE"
88
+ set_values = new_row.map do |column, value|
89
+ "#{column} = '#{value}'"
90
+ end.join(",")
91
+
92
+ sql = <<~SQL
93
+ UPDATE #{shadow_table}
94
+ SET #{set_values}
95
+ WHERE #{primary_key}=\'#{row[primary_key]}\';
96
+ SQL
97
+ to_be_replayed << sql
98
+
99
+ to_be_deleted_rows << "'#{row[primary_key]}'"
100
+ when "DELETE"
101
+ sql = <<~SQL
102
+ DELETE FROM #{shadow_table} WHERE #{primary_key}=\'#{row[primary_key]}\';
103
+ SQL
104
+ to_be_replayed << sql
105
+
106
+ to_be_deleted_rows << "'#{row[primary_key]}'"
107
+ end
108
+ end
109
+
110
+ Query.run(client.connection, to_be_replayed.join, reuse_trasaction)
111
+
112
+ # Delete items from the audit now that are replayed
113
+ if to_be_deleted_rows.count >= 1
114
+ delete_query = <<~SQL
115
+ DELETE FROM #{audit_table} WHERE #{primary_key} IN (#{to_be_deleted_rows.join(",")})
116
+ SQL
117
+ Query.run(client.connection, delete_query, reuse_trasaction)
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,18 @@
1
+ require "pg_query"
2
+ require "pg"
3
+
4
+ module PgOnlineSchemaChange
5
+ class Store
6
+ class << self
7
+ @@object = {}
8
+
9
+ def get(key)
10
+ @@object[key.to_s] || @@object[key.to_sym]
11
+ end
12
+
13
+ def set(key, value)
14
+ @@object[key.to_sym] = value
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PgOnlineSchemaChange
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "ougai"
5
+
6
+ require "pg_online_schema_change/version"
7
+ require "pg_online_schema_change/helper"
8
+ require "pg_online_schema_change/functions"
9
+ require "pg_online_schema_change/cli"
10
+ require "pg_online_schema_change/client"
11
+ require "pg_online_schema_change/query"
12
+ require "pg_online_schema_change/store"
13
+ require "pg_online_schema_change/replay"
14
+ require "pg_online_schema_change/orchestrate"
15
+
16
+ module PgOnlineSchemaChange
17
+ class Error < StandardError; end
18
+ class CountBelowDelta < StandardError; end
19
+ class AccessExclusiveLockNotAcquired < StandardError; end
20
+
21
+ def self.logger=(verbose)
22
+ @@logger ||= begin
23
+ logger = Ougai::Logger.new($stdout)
24
+ logger.level = verbose ? Ougai::Logger::TRACE : Ougai::Logger::INFO
25
+ logger.with_fields = { version: PgOnlineSchemaChange::VERSION }
26
+ logger
27
+ end
28
+ end
29
+
30
+ def self.logger
31
+ @@logger
32
+ end
33
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pg_online_schema_change
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Shayon Mukherjee
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-02-16 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: pg-online-schema-change (pg-osc) is a tool for making schema changes
14
+ (any ALTER statements) in Postgres tables with minimal locks, thus helping achieve
15
+ zero downtime schema changes against production workloads.
16
+ email:
17
+ - shayonj@gmail.com
18
+ executables:
19
+ - console
20
+ - pg-online-schema-change
21
+ - setup
22
+ extensions: []
23
+ extra_rdoc_files: []
24
+ files:
25
+ - ".rspec"
26
+ - ".rubocop.yml"
27
+ - ".ruby-version"
28
+ - CHANGELOG.md
29
+ - CODE_OF_CONDUCT.md
30
+ - Gemfile
31
+ - Gemfile.lock
32
+ - LICENSE.txt
33
+ - README.md
34
+ - Rakefile
35
+ - bin/console
36
+ - bin/pg-online-schema-change
37
+ - bin/setup
38
+ - diagrams/how-it-works.excalidraw
39
+ - diagrams/how-it-works.png
40
+ - docker-compose.yml
41
+ - lib/pg_online_schema_change.rb
42
+ - lib/pg_online_schema_change/cli.rb
43
+ - lib/pg_online_schema_change/client.rb
44
+ - lib/pg_online_schema_change/functions.rb
45
+ - lib/pg_online_schema_change/helper.rb
46
+ - lib/pg_online_schema_change/orchestrate.rb
47
+ - lib/pg_online_schema_change/query.rb
48
+ - lib/pg_online_schema_change/replay.rb
49
+ - lib/pg_online_schema_change/store.rb
50
+ - lib/pg_online_schema_change/version.rb
51
+ homepage: https://github.com/shayonj/pg-online-schema-change
52
+ licenses:
53
+ - MIT
54
+ metadata:
55
+ rubygems_mfa_required: 'true'
56
+ post_install_message:
57
+ rdoc_options: []
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 2.6.0
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubygems_version: 3.2.3
72
+ signing_key:
73
+ specification_version: 4
74
+ summary: pg-online-schema-change is a tool for schema changes for Postgres tables
75
+ with minimal locks
76
+ test_files: []