pg_online_schema_change 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Binary file
@@ -0,0 +1,9 @@
1
+ version: '2'
2
+ services:
3
+ postgres:
4
+ image: postgres:9.6.2-alpine
5
+ ports:
6
+ - "5432:5432"
7
+ environment:
8
+ POSTGRES_USER: jamesbond
9
+ POSTGRES_DB: postgres
@@ -0,0 +1,44 @@
1
+ require "thor"
2
+
3
+ module PgOnlineSchemaChange
4
+ class CLI < Thor
5
+ desc "perform", "Perform the set of operations to safely apply the schema change with minimal locks"
6
+ method_option :alter_statement, aliases: "-a", type: :string, required: true,
7
+ desc: "The ALTER statement to perform the schema change"
8
+ method_option :schema, aliases: "-s", type: :string, required: true, default: "public",
9
+ desc: "The schema in which the table is"
10
+ method_option :dbname, aliases: "-d", type: :string, required: true, desc: "Name of the database"
11
+ method_option :host, aliases: "-h", type: :string, required: true, desc: "Server host where the Database is located"
12
+ method_option :username, aliases: "-u", type: :string, required: true, desc: "Username for the Database"
13
+ method_option :port, aliases: "-p", type: :numeric, required: true, default: 5432, desc: "Port for the Database"
14
+ method_option :password, aliases: "-w", type: :string, required: true, desc: "Password for the Database"
15
+ method_option :verbose, aliases: "-v", type: :boolean, default: false, desc: "Emit logs in debug mode"
16
+ method_option :drop, aliases: "-f", type: :boolean, default: false,
17
+ desc: "Drop the original table in the end after the swap"
18
+ method_option :kill_backends, aliases: "-k", type: :boolean, default: false,
19
+ desc: "Kill other competing queries/backends when trying to acquire lock for the shadow table creation and swap. It will wait for --wait-time-for-lock duration before killing backends and try upto 3 times."
20
+ method_option :wait_time_for_lock, aliases: "-w", type: :numeric, default: 10,
21
+ desc: "Time to wait before killing backends to acquire lock and/or retrying upto 3 times. It will kill backends if --kill-backends is true, otherwise try upto 3 times and exit if it cannot acquire a lock."
22
+ method_option :copy_statement, aliases: "-c", type: :string, required: false, default: "",
23
+ desc: "Takes a .sql file location where you can provide a custom query to be played (ex: backfills) when pgosc copies data from the primary to the shadow table. More examples in README."
24
+
25
+ def perform
26
+ client_options = Struct.new(*options.keys.map(&:to_sym)).new(*options.values)
27
+
28
+ PgOnlineSchemaChange.logger = client_options.verbose
29
+ PgOnlineSchemaChange::Orchestrate.run!(client_options)
30
+ end
31
+
32
+ desc "--version, -v", "print the version"
33
+
34
+ def version
35
+ puts PgOnlineSchemaChange::VERSION
36
+ end
37
+
38
+ def self.exit_on_failure?
39
+ true
40
+ end
41
+
42
+ default_task :perform
43
+ end
44
+ end
@@ -0,0 +1,49 @@
1
+ require "pg"
2
+
3
+ module PgOnlineSchemaChange
4
+ class Client
5
+ attr_accessor :alter_statement, :schema, :dbname, :host, :username, :port, :password, :connection, :table, :drop,
6
+ :kill_backends, :wait_time_for_lock, :copy_statement
7
+
8
+ def initialize(options)
9
+ @alter_statement = options.alter_statement
10
+ @schema = options.schema
11
+ @dbname = options.dbname
12
+ @host = options.host
13
+ @username = options.username
14
+ @port = options.port
15
+ @password = options.password
16
+ @drop = options.drop
17
+ @kill_backends = options.kill_backends
18
+ @wait_time_for_lock = options.wait_time_for_lock
19
+ handle_copy_statement(options.copy_statement)
20
+
21
+ @connection = PG.connect(
22
+ dbname: @dbname,
23
+ host: @host,
24
+ user: @username,
25
+ password: @password,
26
+ port: @port,
27
+ )
28
+
29
+ raise Error, "Not a valid ALTER statement: #{@alter_statement}" unless Query.alter_statement?(@alter_statement)
30
+
31
+ unless Query.same_table?(@alter_statement)
32
+ raise Error "All statements should belong to the same table: #{@alter_statement}"
33
+ end
34
+
35
+ @table = Query.table(@alter_statement)
36
+
37
+ PgOnlineSchemaChange.logger.debug("Connection established")
38
+ end
39
+
40
+ def handle_copy_statement(statement)
41
+ return if statement.nil? || statement == ""
42
+
43
+ file_path = File.expand_path(statement)
44
+ raise Error, "File not found: #{file_path}" unless File.file?(file_path)
45
+
46
+ @copy_statement = File.open(file_path, "rb", &:read)
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,62 @@
1
+ FUNC_FIX_SERIAL_SEQUENCE = <<~SQL.freeze
2
+ CREATE OR REPLACE FUNCTION fix_serial_sequence(_table regclass, _newtable text)
3
+ RETURNS void AS
4
+ $func$
5
+ DECLARE
6
+ _sql text;
7
+ BEGIN
8
+
9
+ -- Update serial columns to ensure copied table doesn't follow same sequence as primary table
10
+ SELECT INTO _sql
11
+ string_agg('CREATE SEQUENCE ' || seq, E';\n') || E';\n'
12
+ || string_agg(format('ALTER SEQUENCE %s OWNED BY %I.%I'
13
+ , seq, _newtable, a.attname), E';\n') || E';\n'
14
+ || 'ALTER TABLE ' || quote_ident(_newtable) || E'\n '
15
+ || string_agg(format($$ALTER %I SET DEFAULT nextval('%s'::regclass)$$
16
+ , a.attname, seq), E'\n, ')
17
+ FROM pg_attribute a
18
+ JOIN pg_attrdef ad ON ad.adrelid = a.attrelid
19
+ AND ad.adnum = a.attnum
20
+ , quote_ident(_newtable || '_' || a.attname || '_seq') AS seq
21
+ WHERE a.attrelid = _table
22
+ AND a.attnum > 0
23
+ AND NOT a.attisdropped
24
+ AND a.atttypid = ANY ('{int,int8,int2}'::regtype[])
25
+ AND pg_get_expr(ad.adbin, ad.adrelid) = 'nextval('''
26
+ || (pg_get_serial_sequence (a.attrelid::regclass::text, a.attname))::regclass
27
+ || '''::regclass)'
28
+ ;
29
+
30
+ IF _sql IS NOT NULL THEN
31
+ EXECUTE _sql;
32
+ END IF;
33
+
34
+ END
35
+ $func$ LANGUAGE plpgsql VOLATILE;
36
+ SQL
37
+
38
+ FUNC_CREATE_TABLE_ALL = <<~SQL.freeze
39
+ CREATE OR REPLACE FUNCTION create_table_all(source_table text, newsource_table text)
40
+ RETURNS void language plpgsql
41
+ as $$
42
+ declare
43
+ rec record;
44
+ begin
45
+ EXECUTE format(
46
+ 'CREATE TABLE %s (LIKE %s including all)',
47
+ newsource_table, source_table);
48
+ for rec in
49
+ SELECT oid, conname
50
+ FROM pg_constraint
51
+ WHERE contype = 'f'
52
+ AND conrelid = source_table::regclass
53
+ LOOP
54
+ EXECUTE format(
55
+ 'ALTER TABLE %s add constraint %s %s',
56
+ newsource_table,
57
+ rec.conname,
58
+ pg_get_constraintdef(rec.oid));
59
+ END LOOP;
60
+ END
61
+ $$;
62
+ SQL
@@ -0,0 +1,21 @@
1
+ module PgOnlineSchemaChange
2
+ module Helper
3
+ def primary_key
4
+ result = Store.get(:primary_key)
5
+ return result if result
6
+
7
+ Store.set(:primary_key, Query.primary_key_for(client, client.table))
8
+ end
9
+
10
+ def logger
11
+ PgOnlineSchemaChange.logger
12
+ end
13
+
14
+ def method_missing(method, *_args)
15
+ result = Store.send(:get, method)
16
+ return result if result
17
+
18
+ raise ArgumentError, "Method `#{method}` doesn't exist."
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,263 @@
1
+ require "securerandom"
2
+
3
+ module PgOnlineSchemaChange
4
+ class Orchestrate
5
+ SWAP_STATEMENT_TIMEOUT = "5s".freeze
6
+
7
+ extend Helper
8
+
9
+ class << self
10
+ def setup!(options)
11
+ client = Store.set(:client, Client.new(options))
12
+
13
+ sql = <<~SQL
14
+ SET statement_timeout = 0;
15
+ SET client_min_messages = warning;
16
+ SET search_path TO #{client.schema};
17
+ SQL
18
+
19
+ Query.run(client.connection, sql)
20
+ # install functions
21
+ Query.run(client.connection, FUNC_FIX_SERIAL_SEQUENCE)
22
+ Query.run(client.connection, FUNC_CREATE_TABLE_ALL)
23
+
24
+ # Set this early on to ensure their creation and cleanup (unexpected)
25
+ # happens at all times. IOW, the calls from Store.get always return
26
+ # the same value.
27
+ Store.set(:old_primary_table, "pgosc_op_table_#{client.table}")
28
+ Store.set(:audit_table, "pgosc_at_#{client.table}_#{random_string}")
29
+ Store.set(:shadow_table, "pgosc_st_#{client.table}_#{random_string}")
30
+ end
31
+
32
+ def run!(options)
33
+ setup!(options)
34
+ Thread.new { handle_signals! }
35
+
36
+ raise Error, "Parent table has no primary key, exiting..." if primary_key.nil?
37
+
38
+ setup_audit_table!
39
+ setup_trigger!
40
+ setup_shadow_table!
41
+ disable_vacuum!
42
+ run_alter_statement!
43
+ copy_data!
44
+ run_analyze!
45
+ replay_and_swap!
46
+ run_analyze!
47
+ validate_constraints!
48
+ drop_and_cleanup!
49
+
50
+ logger.info("All tasks successfully completed")
51
+ rescue StandardError => e
52
+ logger.fatal("Something went wrong: #{e.message}", { e: e })
53
+
54
+ drop_and_cleanup!
55
+
56
+ raise e
57
+ end
58
+
59
+ def setup_signals!
60
+ reader, writer = IO.pipe
61
+
62
+ %w[TERM QUIT INT].each do |sig|
63
+ trap(sig) { writer.puts sig }
64
+ end
65
+
66
+ reader
67
+ end
68
+
69
+ def handle_signals!
70
+ reader = setup_signals!
71
+ signal = reader.gets.chomp
72
+
73
+ while !reader.closed? && IO.select([reader])
74
+ logger.info "Signal #{signal} received, cleaning up"
75
+
76
+ client.connection.cancel
77
+ drop_and_cleanup!
78
+ reader.close
79
+
80
+ exit Signal.list[signal]
81
+ end
82
+ end
83
+
84
+ def setup_audit_table!
85
+ logger.info("Setting up audit table", { audit_table: audit_table })
86
+
87
+ sql = <<~SQL
88
+ CREATE TABLE #{audit_table} (operation_type text, trigger_time timestamp, LIKE #{client.table});
89
+ SQL
90
+
91
+ Query.run(client.connection, sql)
92
+ end
93
+
94
+ def setup_trigger!
95
+ # acquire access exclusive lock to ensure audit triggers
96
+ # are setup fine. This also calls kill_backends (if opted in via flag)
97
+ # so any competing backends will be killed to setup the trigger
98
+ opened = Query.open_lock_exclusive(client, client.table)
99
+
100
+ raise AccessExclusiveLockNotAcquired unless opened
101
+
102
+ logger.info("Setting up triggers")
103
+
104
+ sql = <<~SQL
105
+ DROP TRIGGER IF EXISTS primary_to_audit_table_trigger ON #{client.table};
106
+
107
+ CREATE OR REPLACE FUNCTION primary_to_audit_table_trigger()
108
+ RETURNS TRIGGER AS
109
+ $$
110
+ BEGIN
111
+ IF ( TG_OP = 'INSERT') THEN
112
+ INSERT INTO \"#{audit_table}\" select 'INSERT', now(), NEW.* ;
113
+ RETURN NEW;
114
+ ELSIF ( TG_OP = 'UPDATE') THEN
115
+ INSERT INTO \"#{audit_table}\" select 'UPDATE', now(), NEW.* ;
116
+ RETURN NEW;
117
+ ELSIF ( TG_OP = 'DELETE') THEN
118
+ INSERT INTO \"#{audit_table}\" select 'DELETE', now(), OLD.* ;
119
+ RETURN NEW;
120
+ END IF;
121
+ END;
122
+ $$ LANGUAGE PLPGSQL SECURITY DEFINER;
123
+
124
+ CREATE TRIGGER primary_to_audit_table_trigger
125
+ AFTER INSERT OR UPDATE OR DELETE ON #{client.table}
126
+ FOR EACH ROW EXECUTE PROCEDURE primary_to_audit_table_trigger();
127
+ SQL
128
+
129
+ Query.run(client.connection, sql, opened)
130
+ ensure
131
+ Query.run(client.connection, "COMMIT;")
132
+ end
133
+
134
+ def setup_shadow_table!
135
+ logger.info("Setting up shadow table", { shadow_table: shadow_table })
136
+
137
+ Query.run(client.connection, "SELECT create_table_all('#{client.table}', '#{shadow_table}');")
138
+
139
+ # update serials
140
+ Query.run(client.connection, "SELECT fix_serial_sequence('#{client.table}', '#{shadow_table}');")
141
+ end
142
+
143
+ # Disabling vacuum to avoid any issues during the process
144
+ def disable_vacuum!
145
+ result = Query.storage_parameters_for(client, client.table) || ""
146
+ primary_table_storage_parameters = Store.set(:primary_table_storage_parameters, result)
147
+
148
+ logger.debug("Disabling vacuum on shadow and audit table",
149
+ { shadow_table: shadow_table, audit_table: audit_table })
150
+ sql = <<~SQL
151
+ ALTER TABLE #{shadow_table} SET (
152
+ autovacuum_enabled = false, toast.autovacuum_enabled = false
153
+ );
154
+
155
+ ALTER TABLE #{audit_table} SET (
156
+ autovacuum_enabled = false, toast.autovacuum_enabled = false
157
+ );
158
+ SQL
159
+ Query.run(client.connection, sql)
160
+ end
161
+
162
+ def run_alter_statement!
163
+ statement = Query.alter_statement_for(client, shadow_table)
164
+ logger.info("Running alter statement on shadow table",
165
+ { shadow_table: shadow_table, parent_table: client.table })
166
+ Query.run(client.connection, statement)
167
+
168
+ Store.set(:dropped_columns_list, Query.dropped_columns(client))
169
+ Store.set(:renamed_columns_list, Query.renamed_columns(client))
170
+ end
171
+
172
+ # Begin the process to copy data into copy table
173
+ # depending on the size of the table, this can be a time
174
+ # taking operation.
175
+ def copy_data!
176
+ logger.info("Copying contents..", { shadow_table: shadow_table, parent_table: client.table })
177
+
178
+ if client.copy_statement
179
+ query = format(client.copy_statement, shadow_table: shadow_table)
180
+ return Query.run(client.connection, query)
181
+ end
182
+
183
+ sql = Query.copy_data_statement(client, shadow_table)
184
+ Query.run(client.connection, sql)
185
+ end
186
+
187
+ def replay_and_swap!
188
+ Replay.begin!
189
+ rescue CountBelowDelta
190
+ logger.info("Remaining rows below delta count, proceeding towards swap")
191
+
192
+ swap!
193
+ end
194
+
195
+ def swap!
196
+ logger.info("Performing swap!")
197
+
198
+ foreign_key_statements = Query.get_foreign_keys_to_refresh(client, client.table)
199
+ storage_params_reset = primary_table_storage_parameters.empty? ? "" : "ALTER TABLE #{client.table} SET (#{primary_table_storage_parameters});"
200
+
201
+ # From here on, all statements are carried out in a single
202
+ # transaction with access exclusive lock
203
+
204
+ opened = Query.open_lock_exclusive(client, client.table)
205
+
206
+ raise AccessExclusiveLockNotAcquired unless opened
207
+
208
+ Query.run(client.connection, "SET statement_timeout to '#{SWAP_STATEMENT_TIMEOUT}';", opened)
209
+
210
+ rows = Replay.rows_to_play(opened)
211
+ Replay.play!(rows, opened)
212
+
213
+ sql = <<~SQL
214
+ ALTER TABLE #{client.table} RENAME to #{old_primary_table};
215
+ ALTER TABLE #{shadow_table} RENAME to #{client.table};
216
+ #{foreign_key_statements}
217
+ #{storage_params_reset}
218
+ DROP TRIGGER IF EXISTS primary_to_audit_table_trigger ON #{client.table};
219
+ SQL
220
+
221
+ Query.run(client.connection, sql)
222
+ ensure
223
+ Query.run(client.connection, "COMMIT;")
224
+ Query.run(client.connection, "SET statement_timeout = 0;")
225
+ end
226
+
227
+ def run_analyze!
228
+ logger.info("Performing ANALYZE!")
229
+
230
+ Query.run(client.connection, "ANALYZE VERBOSE #{client.table};")
231
+ end
232
+
233
+ def validate_constraints!
234
+ logger.info("Validating constraints!")
235
+
236
+ validate_statements = Query.get_foreign_keys_to_validate(client, client.table)
237
+
238
+ Query.run(client.connection, validate_statements)
239
+ end
240
+
241
+ def drop_and_cleanup!
242
+ primary_drop = client.drop ? "DROP TABLE IF EXISTS #{old_primary_table};" : ""
243
+ audit_table_drop = audit_table ? "DROP TABLE IF EXISTS #{audit_table}" : ""
244
+ shadow_table_drop = shadow_table ? "DROP TABLE IF EXISTS #{shadow_table}" : ""
245
+
246
+ sql = <<~SQL
247
+ #{audit_table_drop};
248
+ #{shadow_table_drop};
249
+ #{primary_drop}
250
+ RESET statement_timeout;
251
+ RESET client_min_messages;
252
+ RESET lock_timeout;
253
+ SQL
254
+
255
+ Query.run(client.connection, sql)
256
+ end
257
+
258
+ private def random_string
259
+ @random_string ||= SecureRandom.hex(3)
260
+ end
261
+ end
262
+ end
263
+ end