pg_easy_replicate 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,308 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PgEasyReplicate
4
+ class Orchestrate
5
+ extend Helper
6
+
7
+ class << self
8
+ DEFAULT_LAG = 200_000 # 200kb
9
+ DEFAULT_WAIT = 5 # seconds
10
+
11
+ def start_sync(options)
12
+ PgEasyReplicate.assert_config
13
+
14
+ create_publication(
15
+ group_name: options[:group_name],
16
+ conn_string: source_db_url,
17
+ )
18
+
19
+ add_tables_to_publication(
20
+ group_name: options[:group_name],
21
+ tables: options[:tables],
22
+ conn_string: source_db_url,
23
+ schema: options[:schema],
24
+ )
25
+
26
+ create_subscription(
27
+ group_name: options[:group_name],
28
+ source_conn_string: secondary_source_db_url || source_db_url,
29
+ target_conn_string: target_db_url,
30
+ )
31
+
32
+ Group.create(
33
+ name: options[:group_name],
34
+ table_names: options[:tables],
35
+ schema_name: options[:schema],
36
+ started_at: Time.now.utc,
37
+ )
38
+ rescue => e
39
+ stop_sync(
40
+ group_name: options[:group_name],
41
+ source_conn_string: source_db_url,
42
+ target_conn_string: target_db_url,
43
+ )
44
+
45
+ if Group.find(options[:group_name])
46
+ Group.update(group_name: options[:group_name], failed_at: Time.now)
47
+ else
48
+ Group.create(
49
+ name: options[:group_name],
50
+ table_names: options[:tables],
51
+ schema_name: options[:schema],
52
+ started_at: Time.now.utc,
53
+ failed_at: Time.now.utc,
54
+ )
55
+ end
56
+
57
+ abort_with("Starting sync failed: #{e.message}")
58
+ end
59
+
60
+ def create_publication(group_name:, conn_string:)
61
+ logger.info(
62
+ "Setting up publication",
63
+ { publication_name: publication_name(group_name) },
64
+ )
65
+ Query.run(
66
+ query: "create publication #{publication_name(group_name)}",
67
+ connection_url: conn_string,
68
+ )
69
+ end
70
+
71
+ def add_tables_to_publication(
72
+ schema:,
73
+ group_name:,
74
+ conn_string:,
75
+ tables: ""
76
+ )
77
+ logger.info(
78
+ "Adding tables up publication",
79
+ { publication_name: publication_name(group_name) },
80
+ )
81
+ tables = tables&.split(",") || []
82
+ unless tables.size > 0
83
+ tables = list_all_tables(schema: schema, conn_string: conn_string)
84
+ end
85
+
86
+ tables.map do |table_name|
87
+ Query.run(
88
+ query:
89
+ "ALTER PUBLICATION #{publication_name(group_name)} ADD TABLE \"#{table_name}\"",
90
+ connection_url: conn_string,
91
+ schema: schema,
92
+ )
93
+ end
94
+ end
95
+
96
+ def list_all_tables(schema:, conn_string:)
97
+ Query
98
+ .run(
99
+ query:
100
+ "SELECT table_name FROM information_schema.tables WHERE table_schema = '#{schema}'",
101
+ connection_url: conn_string,
102
+ )
103
+ .map(&:values)
104
+ .flatten
105
+ end
106
+
107
+ def drop_publication(group_name:, conn_string:)
108
+ logger.info(
109
+ "Dropping publication",
110
+ { publication_name: publication_name(group_name) },
111
+ )
112
+ Query.run(
113
+ query: "DROP PUBLICATION IF EXISTS #{publication_name(group_name)}",
114
+ connection_url: conn_string,
115
+ )
116
+ end
117
+
118
+ def create_subscription(
119
+ group_name:,
120
+ source_conn_string:,
121
+ target_conn_string:
122
+ )
123
+ logger.info(
124
+ "Setting up subscription",
125
+ {
126
+ publication_name: publication_name(group_name),
127
+ subscription_name: subscription_name(group_name),
128
+ },
129
+ )
130
+
131
+ Query.run(
132
+ query:
133
+ "CREATE SUBSCRIPTION #{subscription_name(group_name)} CONNECTION '#{source_conn_string}' PUBLICATION #{publication_name(group_name)}",
134
+ connection_url: target_conn_string,
135
+ transaction: false,
136
+ )
137
+ rescue Sequel::DatabaseError => e
138
+ if e.message.include?("canceling statement due to statement timeout")
139
+ abort_with(
140
+ "Subscription creation failed, please ensure both databases are in the same network region: #{e.message}",
141
+ )
142
+ end
143
+
144
+ raise
145
+ end
146
+
147
+ def drop_subscription(group_name:, target_conn_string:)
148
+ logger.info(
149
+ "Dropping subscription",
150
+ {
151
+ publication_name: publication_name(group_name),
152
+ subscription_name: subscription_name(group_name),
153
+ },
154
+ )
155
+ Query.run(
156
+ query: "DROP SUBSCRIPTION IF EXISTS #{subscription_name(group_name)}",
157
+ connection_url: target_conn_string,
158
+ transaction: false,
159
+ )
160
+ end
161
+
162
+ def stop_sync(target_conn_string:, source_conn_string:, group_name:)
163
+ PgEasyReplicate.assert_config
164
+
165
+ logger.info(
166
+ "Stopping sync",
167
+ {
168
+ publication_name: publication_name(group_name),
169
+ subscription_name: subscription_name(group_name),
170
+ },
171
+ )
172
+ drop_publication(
173
+ group_name: group_name,
174
+ conn_string: source_conn_string,
175
+ )
176
+ drop_subscription(
177
+ group_name: group_name,
178
+ target_conn_string: target_conn_string,
179
+ )
180
+ end
181
+
182
+ def switchover(
183
+ group_name:,
184
+ source_conn_string: source_db_url,
185
+ target_conn_string: target_db_url,
186
+ lag_delta_size: DEFAULT_LAG
187
+ )
188
+ PgEasyReplicate.assert_config
189
+ group = Group.find(group_name)
190
+
191
+ watch_lag(group_name: group_name, lag: lag_delta_size)
192
+ revoke_connections_on_source_db(group_name)
193
+ wait_for_remaining_catchup(group_name)
194
+ refresh_sequences(
195
+ conn_string: target_conn_string,
196
+ schema: group[:schema_name],
197
+ )
198
+ mark_switchover_complete(group_name)
199
+ drop_subscription(
200
+ group_name: group_name,
201
+ target_conn_string: target_conn_string,
202
+ )
203
+ rescue => e
204
+ restore_connections_on_source_db(group_name)
205
+
206
+ abort_with("Switchover sync failed: #{e.message}")
207
+ end
208
+
209
+ def watch_lag(group_name:, wait_time: DEFAULT_WAIT, lag: DEFAULT_LAG)
210
+ logger.info("Watching lag stats")
211
+
212
+ loop do
213
+ sleep(wait_time)
214
+
215
+ unless Stats.all_tables_replicating?(group_name)
216
+ logger.debug(
217
+ "All tables haven't reached replicating state, skipping check",
218
+ )
219
+ next
220
+ end
221
+
222
+ lag_stat = Stats.lag_stats(group_name).first
223
+ if lag_stat[:write_lag].nil? || lag_stat[:flush_lag].nil? ||
224
+ lag_stat[:replay_lag].nil?
225
+ next
226
+ end
227
+
228
+ logger.debug("Current lag stats: #{lag_stat}")
229
+
230
+ below_write_lag = lag_stat[:write_lag] <= lag
231
+ below_flush_lag = lag_stat[:flush_lag] <= lag
232
+ below_replay_lag = lag_stat[:replay_lag] <= lag
233
+
234
+ break if below_write_lag && below_flush_lag && below_replay_lag
235
+ end
236
+
237
+ logger.info("Lag below #{DEFAULT_LAG} bytes. Continuing...")
238
+ end
239
+
240
+ def wait_for_remaining_catchup(group_name)
241
+ logger.info("Waiting for remaining WAL to get flushed")
242
+
243
+ watch_lag(group_name: group_name, lag: 0, wait_time: 0.2)
244
+
245
+ logger.info("Caught up on remaining WAL lag")
246
+ end
247
+
248
+ def revoke_connections_on_source_db(group_name)
249
+ logger.info(
250
+ "Lag is now below #{DEFAULT_LAG}, marking source DB to read only",
251
+ )
252
+
253
+ alter_sql =
254
+ "ALTER USER #{db_user(source_db_url)} set default_transaction_read_only = true"
255
+ Query.run(query: alter_sql, connection_url: source_db_url)
256
+
257
+ kill_sql =
258
+ "SELECT pg_terminate_backend(pg_stat_activity.pid) FROM pg_stat_activity WHERE usename = '#{db_user(source_db_url)}';"
259
+
260
+ Query.run(query: kill_sql, connection_url: source_db_url)
261
+ end
262
+
263
+ def restore_connections_on_source_db(group_name)
264
+ logger.info("Restoring connections")
265
+
266
+ alter_sql =
267
+ "ALTER USER #{db_user(source_db_url)} set default_transaction_read_only = false"
268
+ Query.run(query: alter_sql, connection_url: source_db_url)
269
+ end
270
+
271
+ def refresh_sequences(conn_string:, schema: nil)
272
+ logger.info("Refreshing sequences")
273
+ sql = <<~SQL
274
+ DO $$
275
+ DECLARE
276
+ i TEXT;
277
+ BEGIN
278
+ FOR i IN (
279
+ SELECT 'SELECT SETVAL('
280
+ || quote_literal(quote_ident(PGT.schemaname) || '.' || quote_ident(S.relname))
281
+ || ', COALESCE(MAX(' ||quote_ident(C.attname)|| '), 1) ) FROM '
282
+ || quote_ident(PGT.schemaname)|| '.'||quote_ident(T.relname)|| ';'
283
+ FROM pg_class AS S,
284
+ pg_depend AS D,
285
+ pg_class AS T,
286
+ pg_attribute AS C,
287
+ pg_tables AS PGT
288
+ WHERE S.relkind = 'S'
289
+ AND S.oid = D.objid
290
+ AND D.refobjid = T.oid
291
+ AND D.refobjid = C.attrelid
292
+ AND D.refobjsubid = C.attnum
293
+ AND T.relname = PGT.tablename
294
+ ) LOOP
295
+ EXECUTE i;
296
+ END LOOP;
297
+ END $$;
298
+ SQL
299
+
300
+ Query.run(query: sql, connection_url: conn_string, schema: schema)
301
+ end
302
+
303
+ def mark_switchover_complete(group_name)
304
+ Group.update(group_name: group_name, switchover_completed_at: Time.now)
305
+ end
306
+ end
307
+ end
308
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PgEasyReplicate
4
+ class Query
5
+ extend Helper
6
+
7
+ class << self
8
+ def run(
9
+ query:,
10
+ connection_url:,
11
+ user: internal_user_name,
12
+ schema: nil,
13
+ transaction: true
14
+ )
15
+ conn =
16
+ connect(connection_url: connection_url, schema: schema, user: user)
17
+ if transaction
18
+ r =
19
+ conn.transaction do
20
+ conn.run("SET search_path to #{schema}") if schema
21
+ conn.run("SET statement_timeout to '5s'")
22
+ conn.fetch(query).to_a
23
+ end
24
+ else
25
+ conn.run("SET search_path to #{schema}") if schema
26
+ conn.run("SET statement_timeout to '5s'")
27
+ r = conn.fetch(query).to_a
28
+ end
29
+ conn.disconnect
30
+ r
31
+ ensure
32
+ conn&.fetch("RESET statement_timeout")
33
+ conn&.disconnect
34
+ end
35
+
36
+ def connect(connection_url:, user: internal_user_name, schema: nil)
37
+ c =
38
+ Sequel.connect(
39
+ connection_url,
40
+ user: user,
41
+ logger: ENV.fetch("DEBUG", nil) ? logger : nil,
42
+ search_path: schema,
43
+ )
44
+ logger.debug("Connection established")
45
+ c
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PgEasyReplicate
4
+ class Stats
5
+ REPLICATION_STATE_MAP = {
6
+ "i" => "initializing",
7
+ "d" => "data_is_being_copied",
8
+ "f" => "finished_table_copy",
9
+ "s" => "synchronized",
10
+ "r" => "replicating",
11
+ }.freeze
12
+ extend Helper
13
+
14
+ class << self
15
+ def object(group_name)
16
+ PgEasyReplicate.assert_config
17
+ stats = replication_stats(group_name)
18
+ group = Group.find(group_name)
19
+ {
20
+ lag_stats: lag_stats(group_name),
21
+ replication_slots: pg_replication_slots(group_name),
22
+ replication_stats: stats,
23
+ replication_stats_count_by_state:
24
+ replication_stats_count_by_state(stats),
25
+ message_lsn_receipts: message_lsn_receipts(group_name),
26
+ sync_started_at: group[:started_at],
27
+ sync_failed_at: group[:failed_at],
28
+ switchover_completed_at: group[:switchover_completed_at],
29
+ }
30
+ end
31
+
32
+ def print(group_name)
33
+ puts JSON.pretty_generate(object(group_name))
34
+ end
35
+
36
+ def follow(group_name)
37
+ loop do
38
+ print(group_name)
39
+ sleep(1)
40
+ end
41
+ end
42
+
43
+ # Get
44
+ def lag_stats(group_name)
45
+ sql = <<~SQL
46
+ SELECT pid,
47
+ client_addr,
48
+ usename as user_name,
49
+ application_name,
50
+ state,
51
+ sync_state,
52
+ pg_wal_lsn_diff(sent_lsn, write_lsn) AS write_lag,
53
+ pg_wal_lsn_diff(sent_lsn, flush_lsn) AS flush_lag,
54
+ pg_wal_lsn_diff(sent_lsn, replay_lsn) AS replay_lag
55
+ FROM pg_stat_replication
56
+ WHERE application_name = '#{subscription_name(group_name)}';
57
+ SQL
58
+
59
+ Query.run(query: sql, connection_url: source_db_url)
60
+ end
61
+
62
+ def pg_replication_slots(group_name)
63
+ sql = <<~SQL
64
+ select * from pg_replication_slots WHERE slot_name = '#{subscription_name(group_name)}';
65
+ SQL
66
+
67
+ Query.run(query: sql, connection_url: source_db_url)
68
+ end
69
+
70
+ def replication_stats(group_name)
71
+ sql = <<~SQL
72
+ SELECT
73
+ s.subname AS subscription_name,
74
+ c.relnamespace :: regnamespace :: text as table_schema,
75
+ c.relname as table_name,
76
+ rel.srsubstate as replication_state
77
+ FROM
78
+ pg_catalog.pg_subscription s
79
+ JOIN pg_catalog.pg_subscription_rel rel ON rel.srsubid = s.oid
80
+ JOIN pg_catalog.pg_class c on c.oid = rel.srrelid
81
+ WHERE s.subname = '#{subscription_name(group_name)}'
82
+ SQL
83
+
84
+ Query
85
+ .run(query: sql, connection_url: target_db_url)
86
+ .each do |obj|
87
+ obj[:replication_state] = REPLICATION_STATE_MAP[
88
+ obj[:replication_state]
89
+ ]
90
+ end
91
+ end
92
+
93
+ def all_tables_replicating?(group_name)
94
+ result =
95
+ replication_stats(group_name)
96
+ .each
97
+ .with_object(Hash.new(0)) do |state, counts|
98
+ counts[state[:replication_state]] += 1
99
+ end
100
+ result.keys.uniq.count == 1 &&
101
+ result.keys.first == REPLICATION_STATE_MAP["r"]
102
+ end
103
+
104
+ def replication_stats_count_by_state(stats)
105
+ stats
106
+ .each
107
+ .with_object(Hash.new(0)) do |state, counts|
108
+ counts[state[:replication_state]] += 1
109
+ end
110
+ end
111
+
112
+ def message_lsn_receipts(group_name)
113
+ sql = <<~SQL
114
+ select
115
+ received_lsn,
116
+ last_msg_send_time,
117
+ last_msg_receipt_time,
118
+ latest_end_lsn,
119
+ latest_end_time
120
+ from
121
+ pg_catalog.pg_stat_subscription
122
+ WHERE subname = '#{subscription_name(group_name)}'
123
+ SQL
124
+ Query.run(query: sql, connection_url: target_db_url)
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PgEasyReplicate
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,197 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "ougai"
5
+ require "lockbox"
6
+ require "pg"
7
+ require "sequel"
8
+
9
+ require "pg_easy_replicate/helper"
10
+ require "pg_easy_replicate/version"
11
+ require "pg_easy_replicate/query"
12
+ require "pg_easy_replicate/orchestrate"
13
+ require "pg_easy_replicate/stats"
14
+ require "pg_easy_replicate/group"
15
+ require "pg_easy_replicate/cli"
16
+
17
+ Sequel.default_timezone = :utc
18
+ module PgEasyReplicate
19
+ class Error < StandardError
20
+ end
21
+
22
+ extend Helper
23
+
24
+ class << self
25
+ def config
26
+ abort_with("SOURCE_DB_URL is missing") if source_db_url.nil?
27
+ abort_with("TARGET_DB_URL is missing") if target_db_url.nil?
28
+ @config ||=
29
+ begin
30
+ q =
31
+ "select name, setting from pg_settings where name in ('max_wal_senders', 'max_worker_processes', 'wal_level', 'max_replication_slots', 'max_logical_replication_workers');"
32
+
33
+ {
34
+ source_db_is_superuser: is_super_user?(source_db_url),
35
+ target_db_is_superuser: is_super_user?(target_db_url),
36
+ source_db:
37
+ Query.run(
38
+ query: q,
39
+ connection_url: source_db_url,
40
+ user: db_user(source_db_url),
41
+ ),
42
+ target_db:
43
+ Query.run(
44
+ query: q,
45
+ connection_url: target_db_url,
46
+ user: db_user(target_db_url),
47
+ ),
48
+ }
49
+ rescue => e
50
+ abort_with("Unable to check config: #{e.message}")
51
+ end
52
+ end
53
+
54
+ def assert_config
55
+ unless assert_wal_level_logical(config.dig(:source_db))
56
+ abort_with("WAL_LEVEL should be LOGICAL on source DB")
57
+ end
58
+
59
+ unless assert_wal_level_logical(config.dig(:target_db))
60
+ abort_with("WAL_LEVEL should be LOGICAL on target DB")
61
+ end
62
+
63
+ unless config.dig(:source_db_is_superuser)
64
+ abort_with("User on source database should be a superuser")
65
+ end
66
+
67
+ return if config.dig(:target_db_is_superuser)
68
+ abort_with("User on target database should be a superuser")
69
+ end
70
+
71
+ def bootstrap(options)
72
+ assert_config
73
+ logger.info("Setting up schema")
74
+ setup_schema
75
+
76
+ logger.info("Setting up replication user on source database")
77
+ create_user(conn_string: source_db_url, group_name: options[:group_name])
78
+
79
+ logger.info("Setting up replication user on target database")
80
+ create_user(conn_string: target_db_url, group_name: options[:group_name])
81
+
82
+ logger.info("Setting up groups tables")
83
+ Group.setup
84
+ rescue => e
85
+ abort_with("Unable to bootstrap: #{e.message}")
86
+ end
87
+
88
+ def cleanup(options)
89
+ logger.info("Dropping groups table")
90
+ Group.drop
91
+
92
+ if options[:everything]
93
+ logger.info("Dropping schema")
94
+ drop_schema
95
+ end
96
+
97
+ if options[:everything] || options[:sync]
98
+ Orchestrate.drop_publication(
99
+ group_name: options[:group_name],
100
+ conn_string: source_db_url,
101
+ )
102
+
103
+ Orchestrate.drop_subscription(
104
+ group_name: options[:group_name],
105
+ target_conn_string: target_db_url,
106
+ )
107
+ end
108
+
109
+ if options[:everything]
110
+ # Drop users at last
111
+ logger.info("Dropping replication user on source database")
112
+ drop_user(conn_string: source_db_url, group_name: options[:group_name])
113
+
114
+ logger.info("Dropping replication user on target database")
115
+ drop_user(conn_string: target_db_url, group_name: options[:group_name])
116
+ end
117
+ rescue => e
118
+ abort_with("Unable to cleanup: #{e.message}")
119
+ end
120
+
121
+ def drop_schema
122
+ Query.run(
123
+ query: "DROP SCHEMA IF EXISTS #{internal_schema_name} CASCADE",
124
+ connection_url: source_db_url,
125
+ schema: internal_schema_name,
126
+ )
127
+ end
128
+
129
+ def setup_schema
130
+ sql = <<~SQL
131
+ create schema if not exists #{internal_schema_name};
132
+ grant usage on schema #{internal_schema_name} to #{db_user(source_db_url)};
133
+ grant create on schema #{internal_schema_name} to #{db_user(source_db_url)};
134
+ SQL
135
+
136
+ Query.run(
137
+ query: sql,
138
+ connection_url: source_db_url,
139
+ schema: internal_schema_name,
140
+ user: db_user(target_db_url),
141
+ )
142
+ end
143
+
144
+ def logger
145
+ @logger ||=
146
+ begin
147
+ logger = Ougai::Logger.new($stdout)
148
+ logger.level =
149
+ ENV["DEBUG"] ? Ougai::Logger::TRACE : Ougai::Logger::INFO
150
+ logger.with_fields = { version: PgEasyReplicate::VERSION }
151
+ logger
152
+ end
153
+ end
154
+
155
+ private
156
+
157
+ def assert_wal_level_logical(db_config)
158
+ db_config&.find do |r|
159
+ r.dig(:name) == "wal_level" && r.dig(:setting) == "logical"
160
+ end
161
+ end
162
+
163
+ def is_super_user?(url)
164
+ Query.run(
165
+ query:
166
+ "select usesuper from pg_user where usename = '#{db_user(url)}';",
167
+ connection_url: url,
168
+ user: db_user(target_db_url),
169
+ ).first[
170
+ :usesuper
171
+ ]
172
+ end
173
+
174
+ def create_user(conn_string:, group_name:)
175
+ password = connection_info(conn_string)[:user]
176
+ sql = <<~SQL
177
+ drop role if exists #{internal_user_name};
178
+ create role #{internal_user_name} with password '#{password}' login superuser createdb createrole;
179
+ SQL
180
+
181
+ Query.run(
182
+ query: sql,
183
+ connection_url: conn_string,
184
+ user: db_user(target_db_url),
185
+ )
186
+ end
187
+
188
+ def drop_user(conn_string:, group_name:)
189
+ sql = "drop role if exists #{internal_user_name};"
190
+ Query.run(
191
+ query: sql,
192
+ connection_url: conn_string,
193
+ user: db_user(conn_string),
194
+ )
195
+ end
196
+ end
197
+ end
data/package.json ADDED
@@ -0,0 +1,13 @@
1
+ {
2
+ "name": "pg-osc",
3
+ "version": "1.0.0",
4
+ "main": "index.js",
5
+ "repository": "git@github.com:shayonj/pg-osc.git",
6
+ "author": "Shayon Mukherjee <shayonj@gmail.com>",
7
+ "license": "MIT",
8
+ "private": true,
9
+ "dependencies": {
10
+ "@prettier/plugin-ruby": "^3.2.2",
11
+ "prettier": "^2.8.8"
12
+ }
13
+ }