pg_online_schema_change 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
@@ -0,0 +1,9 @@
1
+ version: '2'
2
+ services:
3
+ postgres:
4
+ image: postgres:9.6.2-alpine
5
+ ports:
6
+ - "5432:5432"
7
+ environment:
8
+ POSTGRES_USER: jamesbond
9
+ POSTGRES_DB: postgres
@@ -0,0 +1,44 @@
1
+ require "thor"
2
+
3
+ module PgOnlineSchemaChange
4
+ class CLI < Thor
5
+ desc "perform", "Perform the set of operations to safely apply the schema change with minimal locks"
6
+ method_option :alter_statement, aliases: "-a", type: :string, required: true,
7
+ desc: "The ALTER statement to perform the schema change"
8
+ method_option :schema, aliases: "-s", type: :string, required: true, default: "public",
9
+ desc: "The schema in which the table is"
10
+ method_option :dbname, aliases: "-d", type: :string, required: true, desc: "Name of the database"
11
+ method_option :host, aliases: "-h", type: :string, required: true, desc: "Server host where the Database is located"
12
+ method_option :username, aliases: "-u", type: :string, required: true, desc: "Username for the Database"
13
+ method_option :port, aliases: "-p", type: :numeric, required: true, default: 5432, desc: "Port for the Database"
14
+ method_option :password, aliases: "-w", type: :string, required: true, desc: "Password for the Database"
15
+ method_option :verbose, aliases: "-v", type: :boolean, default: false, desc: "Emit logs in debug mode"
16
+ method_option :drop, aliases: "-f", type: :boolean, default: false,
17
+ desc: "Drop the original table in the end after the swap"
18
+ method_option :kill_backends, aliases: "-k", type: :boolean, default: false,
19
+ desc: "Kill other competing queries/backends when trying to acquire lock for the shadow table creation and swap. It will wait for --wait-time-for-lock duration before killing backends and try upto 3 times."
20
+ method_option :wait_time_for_lock, aliases: "-w", type: :numeric, default: 10,
21
+ desc: "Time to wait before killing backends to acquire lock and/or retrying upto 3 times. It will kill backends if --kill-backends is true, otherwise try upto 3 times and exit if it cannot acquire a lock."
22
+ method_option :copy_statement, aliases: "-c", type: :string, required: false, default: "",
23
+ desc: "Takes a .sql file location where you can provide a custom query to be played (ex: backfills) when pgosc copies data from the primary to the shadow table. More examples in README."
24
+
25
+ def perform
26
+ client_options = Struct.new(*options.keys.map(&:to_sym)).new(*options.values)
27
+
28
+ PgOnlineSchemaChange.logger = client_options.verbose
29
+ PgOnlineSchemaChange::Orchestrate.run!(client_options)
30
+ end
31
+
32
+ desc "--version, -v", "print the version"
33
+
34
+ def version
35
+ puts PgOnlineSchemaChange::VERSION
36
+ end
37
+
38
+ def self.exit_on_failure?
39
+ true
40
+ end
41
+
42
+ default_task :perform
43
+ end
44
+ end
@@ -0,0 +1,49 @@
1
+ require "pg"
2
+
3
+ module PgOnlineSchemaChange
4
+ class Client
5
+ attr_accessor :alter_statement, :schema, :dbname, :host, :username, :port, :password, :connection, :table, :drop,
6
+ :kill_backends, :wait_time_for_lock, :copy_statement
7
+
8
+ def initialize(options)
9
+ @alter_statement = options.alter_statement
10
+ @schema = options.schema
11
+ @dbname = options.dbname
12
+ @host = options.host
13
+ @username = options.username
14
+ @port = options.port
15
+ @password = options.password
16
+ @drop = options.drop
17
+ @kill_backends = options.kill_backends
18
+ @wait_time_for_lock = options.wait_time_for_lock
19
+ handle_copy_statement(options.copy_statement)
20
+
21
+ @connection = PG.connect(
22
+ dbname: @dbname,
23
+ host: @host,
24
+ user: @username,
25
+ password: @password,
26
+ port: @port,
27
+ )
28
+
29
+ raise Error, "Not a valid ALTER statement: #{@alter_statement}" unless Query.alter_statement?(@alter_statement)
30
+
31
+ unless Query.same_table?(@alter_statement)
32
+ raise Error "All statements should belong to the same table: #{@alter_statement}"
33
+ end
34
+
35
+ @table = Query.table(@alter_statement)
36
+
37
+ PgOnlineSchemaChange.logger.debug("Connection established")
38
+ end
39
+
40
+ def handle_copy_statement(statement)
41
+ return if statement.nil? || statement == ""
42
+
43
+ file_path = File.expand_path(statement)
44
+ raise Error, "File not found: #{file_path}" unless File.file?(file_path)
45
+
46
+ @copy_statement = File.open(file_path, "rb", &:read)
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,62 @@
1
+ FUNC_FIX_SERIAL_SEQUENCE = <<~SQL.freeze
2
+ CREATE OR REPLACE FUNCTION fix_serial_sequence(_table regclass, _newtable text)
3
+ RETURNS void AS
4
+ $func$
5
+ DECLARE
6
+ _sql text;
7
+ BEGIN
8
+
9
+ -- Update serial columns to ensure copied table doesn't follow same sequence as primary table
10
+ SELECT INTO _sql
11
+ string_agg('CREATE SEQUENCE ' || seq, E';\n') || E';\n'
12
+ || string_agg(format('ALTER SEQUENCE %s OWNED BY %I.%I'
13
+ , seq, _newtable, a.attname), E';\n') || E';\n'
14
+ || 'ALTER TABLE ' || quote_ident(_newtable) || E'\n '
15
+ || string_agg(format($$ALTER %I SET DEFAULT nextval('%s'::regclass)$$
16
+ , a.attname, seq), E'\n, ')
17
+ FROM pg_attribute a
18
+ JOIN pg_attrdef ad ON ad.adrelid = a.attrelid
19
+ AND ad.adnum = a.attnum
20
+ , quote_ident(_newtable || '_' || a.attname || '_seq') AS seq
21
+ WHERE a.attrelid = _table
22
+ AND a.attnum > 0
23
+ AND NOT a.attisdropped
24
+ AND a.atttypid = ANY ('{int,int8,int2}'::regtype[])
25
+ AND pg_get_expr(ad.adbin, ad.adrelid) = 'nextval('''
26
+ || (pg_get_serial_sequence (a.attrelid::regclass::text, a.attname))::regclass
27
+ || '''::regclass)'
28
+ ;
29
+
30
+ IF _sql IS NOT NULL THEN
31
+ EXECUTE _sql;
32
+ END IF;
33
+
34
+ END
35
+ $func$ LANGUAGE plpgsql VOLATILE;
36
+ SQL
37
+
38
+ FUNC_CREATE_TABLE_ALL = <<~SQL.freeze
39
+ CREATE OR REPLACE FUNCTION create_table_all(source_table text, newsource_table text)
40
+ RETURNS void language plpgsql
41
+ as $$
42
+ declare
43
+ rec record;
44
+ begin
45
+ EXECUTE format(
46
+ 'CREATE TABLE %s (LIKE %s including all)',
47
+ newsource_table, source_table);
48
+ for rec in
49
+ SELECT oid, conname
50
+ FROM pg_constraint
51
+ WHERE contype = 'f'
52
+ AND conrelid = source_table::regclass
53
+ LOOP
54
+ EXECUTE format(
55
+ 'ALTER TABLE %s add constraint %s %s',
56
+ newsource_table,
57
+ rec.conname,
58
+ pg_get_constraintdef(rec.oid));
59
+ END LOOP;
60
+ END
61
+ $$;
62
+ SQL
@@ -0,0 +1,21 @@
1
+ module PgOnlineSchemaChange
2
+ module Helper
3
+ def primary_key
4
+ result = Store.get(:primary_key)
5
+ return result if result
6
+
7
+ Store.set(:primary_key, Query.primary_key_for(client, client.table))
8
+ end
9
+
10
+ def logger
11
+ PgOnlineSchemaChange.logger
12
+ end
13
+
14
+ def method_missing(method, *_args)
15
+ result = Store.send(:get, method)
16
+ return result if result
17
+
18
+ raise ArgumentError, "Method `#{method}` doesn't exist."
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,263 @@
1
+ require "securerandom"
2
+
3
+ module PgOnlineSchemaChange
4
+ class Orchestrate
5
+ SWAP_STATEMENT_TIMEOUT = "5s".freeze
6
+
7
+ extend Helper
8
+
9
+ class << self
10
+ def setup!(options)
11
+ client = Store.set(:client, Client.new(options))
12
+
13
+ sql = <<~SQL
14
+ SET statement_timeout = 0;
15
+ SET client_min_messages = warning;
16
+ SET search_path TO #{client.schema};
17
+ SQL
18
+
19
+ Query.run(client.connection, sql)
20
+ # install functions
21
+ Query.run(client.connection, FUNC_FIX_SERIAL_SEQUENCE)
22
+ Query.run(client.connection, FUNC_CREATE_TABLE_ALL)
23
+
24
+ # Set this early on to ensure their creation and cleanup (unexpected)
25
+ # happens at all times. IOW, the calls from Store.get always return
26
+ # the same value.
27
+ Store.set(:old_primary_table, "pgosc_op_table_#{client.table}")
28
+ Store.set(:audit_table, "pgosc_at_#{client.table}_#{random_string}")
29
+ Store.set(:shadow_table, "pgosc_st_#{client.table}_#{random_string}")
30
+ end
31
+
32
+ def run!(options)
33
+ setup!(options)
34
+ Thread.new { handle_signals! }
35
+
36
+ raise Error, "Parent table has no primary key, exiting..." if primary_key.nil?
37
+
38
+ setup_audit_table!
39
+ setup_trigger!
40
+ setup_shadow_table!
41
+ disable_vacuum!
42
+ run_alter_statement!
43
+ copy_data!
44
+ run_analyze!
45
+ replay_and_swap!
46
+ run_analyze!
47
+ validate_constraints!
48
+ drop_and_cleanup!
49
+
50
+ logger.info("All tasks successfully completed")
51
+ rescue StandardError => e
52
+ logger.fatal("Something went wrong: #{e.message}", { e: e })
53
+
54
+ drop_and_cleanup!
55
+
56
+ raise e
57
+ end
58
+
59
+ def setup_signals!
60
+ reader, writer = IO.pipe
61
+
62
+ %w[TERM QUIT INT].each do |sig|
63
+ trap(sig) { writer.puts sig }
64
+ end
65
+
66
+ reader
67
+ end
68
+
69
+ def handle_signals!
70
+ reader = setup_signals!
71
+ signal = reader.gets.chomp
72
+
73
+ while !reader.closed? && IO.select([reader])
74
+ logger.info "Signal #{signal} received, cleaning up"
75
+
76
+ client.connection.cancel
77
+ drop_and_cleanup!
78
+ reader.close
79
+
80
+ exit Signal.list[signal]
81
+ end
82
+ end
83
+
84
+ def setup_audit_table!
85
+ logger.info("Setting up audit table", { audit_table: audit_table })
86
+
87
+ sql = <<~SQL
88
+ CREATE TABLE #{audit_table} (operation_type text, trigger_time timestamp, LIKE #{client.table});
89
+ SQL
90
+
91
+ Query.run(client.connection, sql)
92
+ end
93
+
94
+ def setup_trigger!
95
+ # acquire access exclusive lock to ensure audit triggers
96
+ # are setup fine. This also calls kill_backends (if opted in via flag)
97
+ # so any competing backends will be killed to setup the trigger
98
+ opened = Query.open_lock_exclusive(client, client.table)
99
+
100
+ raise AccessExclusiveLockNotAcquired unless opened
101
+
102
+ logger.info("Setting up triggers")
103
+
104
+ sql = <<~SQL
105
+ DROP TRIGGER IF EXISTS primary_to_audit_table_trigger ON #{client.table};
106
+
107
+ CREATE OR REPLACE FUNCTION primary_to_audit_table_trigger()
108
+ RETURNS TRIGGER AS
109
+ $$
110
+ BEGIN
111
+ IF ( TG_OP = 'INSERT') THEN
112
+ INSERT INTO \"#{audit_table}\" select 'INSERT', now(), NEW.* ;
113
+ RETURN NEW;
114
+ ELSIF ( TG_OP = 'UPDATE') THEN
115
+ INSERT INTO \"#{audit_table}\" select 'UPDATE', now(), NEW.* ;
116
+ RETURN NEW;
117
+ ELSIF ( TG_OP = 'DELETE') THEN
118
+ INSERT INTO \"#{audit_table}\" select 'DELETE', now(), OLD.* ;
119
+ RETURN NEW;
120
+ END IF;
121
+ END;
122
+ $$ LANGUAGE PLPGSQL SECURITY DEFINER;
123
+
124
+ CREATE TRIGGER primary_to_audit_table_trigger
125
+ AFTER INSERT OR UPDATE OR DELETE ON #{client.table}
126
+ FOR EACH ROW EXECUTE PROCEDURE primary_to_audit_table_trigger();
127
+ SQL
128
+
129
+ Query.run(client.connection, sql, opened)
130
+ ensure
131
+ Query.run(client.connection, "COMMIT;")
132
+ end
133
+
134
+ def setup_shadow_table!
135
+ logger.info("Setting up shadow table", { shadow_table: shadow_table })
136
+
137
+ Query.run(client.connection, "SELECT create_table_all('#{client.table}', '#{shadow_table}');")
138
+
139
+ # update serials
140
+ Query.run(client.connection, "SELECT fix_serial_sequence('#{client.table}', '#{shadow_table}');")
141
+ end
142
+
143
+ # Disabling vacuum to avoid any issues during the process
144
+ def disable_vacuum!
145
+ result = Query.storage_parameters_for(client, client.table) || ""
146
+ primary_table_storage_parameters = Store.set(:primary_table_storage_parameters, result)
147
+
148
+ logger.debug("Disabling vacuum on shadow and audit table",
149
+ { shadow_table: shadow_table, audit_table: audit_table })
150
+ sql = <<~SQL
151
+ ALTER TABLE #{shadow_table} SET (
152
+ autovacuum_enabled = false, toast.autovacuum_enabled = false
153
+ );
154
+
155
+ ALTER TABLE #{audit_table} SET (
156
+ autovacuum_enabled = false, toast.autovacuum_enabled = false
157
+ );
158
+ SQL
159
+ Query.run(client.connection, sql)
160
+ end
161
+
162
+ def run_alter_statement!
163
+ statement = Query.alter_statement_for(client, shadow_table)
164
+ logger.info("Running alter statement on shadow table",
165
+ { shadow_table: shadow_table, parent_table: client.table })
166
+ Query.run(client.connection, statement)
167
+
168
+ Store.set(:dropped_columns_list, Query.dropped_columns(client))
169
+ Store.set(:renamed_columns_list, Query.renamed_columns(client))
170
+ end
171
+
172
+ # Begin the process to copy data into copy table
173
+ # depending on the size of the table, this can be a time
174
+ # taking operation.
175
+ def copy_data!
176
+ logger.info("Copying contents..", { shadow_table: shadow_table, parent_table: client.table })
177
+
178
+ if client.copy_statement
179
+ query = format(client.copy_statement, shadow_table: shadow_table)
180
+ return Query.run(client.connection, query)
181
+ end
182
+
183
+ sql = Query.copy_data_statement(client, shadow_table)
184
+ Query.run(client.connection, sql)
185
+ end
186
+
187
+ def replay_and_swap!
188
+ Replay.begin!
189
+ rescue CountBelowDelta
190
+ logger.info("Remaining rows below delta count, proceeding towards swap")
191
+
192
+ swap!
193
+ end
194
+
195
+ def swap!
196
+ logger.info("Performing swap!")
197
+
198
+ foreign_key_statements = Query.get_foreign_keys_to_refresh(client, client.table)
199
+ storage_params_reset = primary_table_storage_parameters.empty? ? "" : "ALTER TABLE #{client.table} SET (#{primary_table_storage_parameters});"
200
+
201
+ # From here on, all statements are carried out in a single
202
+ # transaction with access exclusive lock
203
+
204
+ opened = Query.open_lock_exclusive(client, client.table)
205
+
206
+ raise AccessExclusiveLockNotAcquired unless opened
207
+
208
+ Query.run(client.connection, "SET statement_timeout to '#{SWAP_STATEMENT_TIMEOUT}';", opened)
209
+
210
+ rows = Replay.rows_to_play(opened)
211
+ Replay.play!(rows, opened)
212
+
213
+ sql = <<~SQL
214
+ ALTER TABLE #{client.table} RENAME to #{old_primary_table};
215
+ ALTER TABLE #{shadow_table} RENAME to #{client.table};
216
+ #{foreign_key_statements}
217
+ #{storage_params_reset}
218
+ DROP TRIGGER IF EXISTS primary_to_audit_table_trigger ON #{client.table};
219
+ SQL
220
+
221
+ Query.run(client.connection, sql)
222
+ ensure
223
+ Query.run(client.connection, "COMMIT;")
224
+ Query.run(client.connection, "SET statement_timeout = 0;")
225
+ end
226
+
227
+ def run_analyze!
228
+ logger.info("Performing ANALYZE!")
229
+
230
+ Query.run(client.connection, "ANALYZE VERBOSE #{client.table};")
231
+ end
232
+
233
+ def validate_constraints!
234
+ logger.info("Validating constraints!")
235
+
236
+ validate_statements = Query.get_foreign_keys_to_validate(client, client.table)
237
+
238
+ Query.run(client.connection, validate_statements)
239
+ end
240
+
241
+ def drop_and_cleanup!
242
+ primary_drop = client.drop ? "DROP TABLE IF EXISTS #{old_primary_table};" : ""
243
+ audit_table_drop = audit_table ? "DROP TABLE IF EXISTS #{audit_table}" : ""
244
+ shadow_table_drop = shadow_table ? "DROP TABLE IF EXISTS #{shadow_table}" : ""
245
+
246
+ sql = <<~SQL
247
+ #{audit_table_drop};
248
+ #{shadow_table_drop};
249
+ #{primary_drop}
250
+ RESET statement_timeout;
251
+ RESET client_min_messages;
252
+ RESET lock_timeout;
253
+ SQL
254
+
255
+ Query.run(client.connection, sql)
256
+ end
257
+
258
+ private def random_string
259
+ @random_string ||= SecureRandom.hex(3)
260
+ end
261
+ end
262
+ end
263
+ end