active_postgres 0.8.0 ā 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +142 -5
- data/lib/active_postgres/cli.rb +7 -0
- data/lib/active_postgres/components/core.rb +2 -7
- data/lib/active_postgres/components/extensions.rb +40 -35
- data/lib/active_postgres/components/monitoring.rb +256 -4
- data/lib/active_postgres/components/pgbackrest.rb +91 -5
- data/lib/active_postgres/components/pgbouncer.rb +58 -7
- data/lib/active_postgres/components/repmgr.rb +448 -62
- data/lib/active_postgres/configuration.rb +66 -1
- data/lib/active_postgres/connection_pooler.rb +3 -12
- data/lib/active_postgres/credentials.rb +3 -3
- data/lib/active_postgres/direct_executor.rb +1 -2
- data/lib/active_postgres/generators/active_postgres/install_generator.rb +2 -0
- data/lib/active_postgres/generators/active_postgres/templates/postgres.yml.erb +28 -1
- data/lib/active_postgres/health_checker.rb +29 -7
- data/lib/active_postgres/installer.rb +9 -0
- data/lib/active_postgres/overview.rb +351 -0
- data/lib/active_postgres/rollback_manager.rb +4 -8
- data/lib/active_postgres/secrets.rb +23 -5
- data/lib/active_postgres/ssh_executor.rb +44 -19
- data/lib/active_postgres/version.rb +1 -1
- data/lib/active_postgres.rb +1 -0
- data/lib/tasks/postgres.rake +77 -4
- data/lib/tasks/rotate_credentials.rake +4 -16
- data/templates/pg_hba.conf.erb +4 -1
- data/templates/pgbackrest.conf.erb +28 -0
- data/templates/pgbouncer-follow-primary.service.erb +8 -0
- data/templates/pgbouncer-follow-primary.timer.erb +11 -0
- data/templates/pgbouncer.ini.erb +2 -0
- data/templates/pgbouncer_follow_primary.sh.erb +34 -0
- data/templates/postgresql.conf.erb +4 -0
- data/templates/repmgr.conf.erb +10 -3
- data/templates/repmgr_dns_failover.sh.erb +59 -0
- metadata +6 -1
|
@@ -11,6 +11,8 @@ module ActivePostgres
|
|
|
11
11
|
setup_standby(host)
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
+
setup_dns_failover if dns_failover_enabled?
|
|
15
|
+
|
|
14
16
|
# Verify the entire cluster is healthy after setup
|
|
15
17
|
puts "\nš„ Performing final health check..."
|
|
16
18
|
verify_cluster_health
|
|
@@ -68,6 +70,8 @@ module ActivePostgres
|
|
|
68
70
|
end
|
|
69
71
|
|
|
70
72
|
setup_standby(standby_host)
|
|
73
|
+
|
|
74
|
+
setup_dns_failover if dns_failover_enabled?
|
|
71
75
|
end
|
|
72
76
|
|
|
73
77
|
private
|
|
@@ -91,15 +95,22 @@ module ActivePostgres
|
|
|
91
95
|
repmgr_config = config.component_config(:repmgr)
|
|
92
96
|
version = config.version
|
|
93
97
|
repmgr_password = normalize_repmgr_password(secrets.resolve('repmgr_password'))
|
|
98
|
+
replication_password = normalize_replication_password(secrets.resolve('replication_password'))
|
|
94
99
|
secrets_obj = secrets
|
|
95
100
|
repmgr_user = config.repmgr_user
|
|
96
101
|
repmgr_db = config.repmgr_database
|
|
102
|
+
replication_user = config.replication_user
|
|
103
|
+
if replication_user == repmgr_user && replication_password != repmgr_password
|
|
104
|
+
raise Error, 'replication_user matches repmgr user but passwords differ. Use a distinct replication_user or the same password.'
|
|
105
|
+
end
|
|
106
|
+
effective_replication_password = replication_user == repmgr_user ? repmgr_password : replication_password
|
|
97
107
|
|
|
98
108
|
# Variables used in ERB templates via binding
|
|
99
109
|
_ = repmgr_config
|
|
100
110
|
_ = secrets_obj
|
|
101
111
|
|
|
102
|
-
|
|
112
|
+
cluster_exists = cluster_exists?(host, version)
|
|
113
|
+
ssh_executor.ensure_cluster_exists(host, version) unless cluster_exists
|
|
103
114
|
|
|
104
115
|
puts ' Configuring PostgreSQL...'
|
|
105
116
|
core_config = config.component_config(:core)
|
|
@@ -117,12 +128,14 @@ module ActivePostgres
|
|
|
117
128
|
upload_template(host, 'pg_hba.conf.erb', "/etc/postgresql/#{version}/main/pg_hba.conf", binding,
|
|
118
129
|
owner: 'postgres:postgres')
|
|
119
130
|
|
|
120
|
-
#
|
|
131
|
+
# Ensure SSL certificates are present if SSL is enabled
|
|
121
132
|
if config.component_enabled?(:ssl)
|
|
122
|
-
puts '
|
|
123
|
-
|
|
133
|
+
puts ' Ensuring SSL certificates...'
|
|
134
|
+
ensure_ssl_certs(host, version, force: !cluster_exists)
|
|
124
135
|
end
|
|
125
136
|
|
|
137
|
+
repmgr_component = self
|
|
138
|
+
executor = ssh_executor
|
|
126
139
|
ssh_executor.execute_on_host(host) do
|
|
127
140
|
execute :sudo, 'pg_ctlcluster', version.to_s, 'main', 'restart'
|
|
128
141
|
|
|
@@ -148,26 +161,24 @@ module ActivePostgres
|
|
|
148
161
|
execute :sudo, '-u', 'postgres', 'psql', '-l'
|
|
149
162
|
|
|
150
163
|
info 'Creating repmgr database and user...'
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
execute :sudo, '-u', 'postgres', 'psql', '-p', '5432', '-f', '/tmp/setup_repmgr.sql'
|
|
162
|
-
execute :rm, '-f', '/tmp/setup_repmgr.sql'
|
|
164
|
+
repmgr_sql = repmgr_component.send(:build_repmgr_setup_sql, repmgr_user, repmgr_db, repmgr_password)
|
|
165
|
+
executor.run_sql_on_backend(self, repmgr_sql, postgres_user: 'postgres', port: 5432, tuples_only: false,
|
|
166
|
+
capture: false)
|
|
167
|
+
|
|
168
|
+
if replication_user != repmgr_user
|
|
169
|
+
info 'Ensuring replication user exists...'
|
|
170
|
+
repl_sql = repmgr_component.send(:build_replication_user_sql, replication_user, effective_replication_password)
|
|
171
|
+
executor.run_sql_on_backend(self, repl_sql, postgres_user: 'postgres', port: 5432, tuples_only: false,
|
|
172
|
+
capture: false)
|
|
173
|
+
end
|
|
163
174
|
|
|
164
175
|
info 'Reloading PostgreSQL configuration to apply pg_hba.conf changes...'
|
|
165
176
|
execute :sudo, 'pg_ctlcluster', version.to_s, 'main', 'reload'
|
|
166
177
|
end
|
|
167
178
|
|
|
168
|
-
setup_pgpass_file(host, repmgr_password)
|
|
179
|
+
setup_pgpass_file(host, repmgr_password, replication_password: effective_replication_password)
|
|
169
180
|
|
|
170
|
-
upload_template(host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '
|
|
181
|
+
upload_template(host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '600', owner: 'postgres:postgres')
|
|
171
182
|
|
|
172
183
|
ssh_executor.execute_on_host(host) do
|
|
173
184
|
info 'Registering primary with repmgr...'
|
|
@@ -176,22 +187,37 @@ module ActivePostgres
|
|
|
176
187
|
'repmgr', 'primary', 'register',
|
|
177
188
|
'-f', '/etc/repmgr.conf', '--force'
|
|
178
189
|
|
|
179
|
-
# Verify registration succeeded
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
capture(:sudo, '-u', 'postgres', '
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
190
|
+
# Verify registration succeeded (repmgr can take a moment to report)
|
|
191
|
+
cluster_show = nil
|
|
192
|
+
5.times do |attempt|
|
|
193
|
+
cluster_show = capture(:sudo, '-u', 'postgres', 'bash', '-lc',
|
|
194
|
+
"repmgr cluster show -f /etc/repmgr.conf 2>&1", raise_on_non_zero_exit: false).to_s
|
|
195
|
+
|
|
196
|
+
break if cluster_show.match?(/primary/i)
|
|
197
|
+
|
|
198
|
+
sleep 2 if attempt < 4
|
|
186
199
|
end
|
|
187
200
|
|
|
188
|
-
unless cluster_show
|
|
189
|
-
|
|
190
|
-
|
|
201
|
+
unless cluster_show && cluster_show.match?(/primary/i)
|
|
202
|
+
# Fallback: verify via repmgr metadata in the repmgr database
|
|
203
|
+
db_check = executor.run_sql_on_backend(self,
|
|
204
|
+
'SELECT type FROM repmgr.nodes WHERE node_id = 1;',
|
|
205
|
+
postgres_user: 'postgres',
|
|
206
|
+
database: repmgr_db,
|
|
207
|
+
tuples_only: true,
|
|
208
|
+
capture: true).to_s
|
|
209
|
+
|
|
210
|
+
unless db_check.match?(/primary/i)
|
|
211
|
+
safe_show = LogSanitizer.sanitize(cluster_show.to_s)
|
|
212
|
+
error "ā Primary registration verification failed:\n#{safe_show}"
|
|
213
|
+
raise 'Primary registration failed'
|
|
214
|
+
end
|
|
191
215
|
end
|
|
192
216
|
|
|
193
217
|
info 'ā Primary successfully registered with repmgr!'
|
|
194
218
|
end
|
|
219
|
+
|
|
220
|
+
enable_repmgrd_if_configured(host, repmgr_config)
|
|
195
221
|
end
|
|
196
222
|
|
|
197
223
|
def setup_standby(standby_host)
|
|
@@ -205,6 +231,7 @@ module ActivePostgres
|
|
|
205
231
|
repmgr_user = config.repmgr_user
|
|
206
232
|
repmgr_db = config.repmgr_database
|
|
207
233
|
postgres_user = config.postgres_user
|
|
234
|
+
replication_user = config.replication_user
|
|
208
235
|
|
|
209
236
|
# Variables used in ERB templates via binding
|
|
210
237
|
_ = host
|
|
@@ -213,10 +240,27 @@ module ActivePostgres
|
|
|
213
240
|
|
|
214
241
|
node_id = config.standby_hosts.index(standby_host) + 2
|
|
215
242
|
repmgr_password = normalize_repmgr_password(secrets_obj.resolve('repmgr_password'))
|
|
243
|
+
replication_password = normalize_replication_password(secrets_obj.resolve('replication_password'))
|
|
244
|
+
if replication_user == repmgr_user && replication_password != repmgr_password
|
|
245
|
+
raise Error, 'replication_user matches repmgr user but passwords differ. Use a distinct replication_user or the same password.'
|
|
246
|
+
end
|
|
247
|
+
effective_replication_password = replication_user == repmgr_user ? repmgr_password : replication_password
|
|
216
248
|
|
|
217
249
|
ensure_primary_registered
|
|
218
250
|
|
|
219
|
-
setup_pgpass_file(standby_host, repmgr_password,
|
|
251
|
+
setup_pgpass_file(standby_host, repmgr_password, replication_password: effective_replication_password,
|
|
252
|
+
primary_ip: primary_replication_host)
|
|
253
|
+
|
|
254
|
+
if standby_already_configured?(standby_host)
|
|
255
|
+
puts ' Standby already configured, updating configs...'
|
|
256
|
+
upload_template(standby_host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '600',
|
|
257
|
+
owner: 'postgres:postgres')
|
|
258
|
+
update_postgres_configs_on_standby(standby_host, version)
|
|
259
|
+
ensure_ssl_certs(standby_host, version) if config.component_enabled?(:ssl)
|
|
260
|
+
register_standby_with_primary(standby_host)
|
|
261
|
+
enable_repmgrd_if_configured(standby_host, repmgr_config)
|
|
262
|
+
return
|
|
263
|
+
end
|
|
220
264
|
|
|
221
265
|
ssh_executor.execute_on_host(standby_host) do
|
|
222
266
|
info 'Preparing standby cluster...'
|
|
@@ -244,12 +288,10 @@ module ActivePostgres
|
|
|
244
288
|
info 'Cloning from primary over private network...'
|
|
245
289
|
# Create a temporary repmgr config for cloning that points to the primary
|
|
246
290
|
# The regular config points to localhost which doesn't work during initial clone
|
|
247
|
-
escaped_password = repmgr_password.gsub("'", "\\\\'")
|
|
248
|
-
|
|
249
291
|
temp_repmgr_conf = <<~CONF
|
|
250
292
|
node_id=#{node_id}
|
|
251
293
|
node_name='#{standby_host}'
|
|
252
|
-
conninfo='host=#{primary_replication_host} user=#{repmgr_user} dbname=#{repmgr_db}
|
|
294
|
+
conninfo='host=#{primary_replication_host} user=#{repmgr_user} dbname=#{repmgr_db} connect_timeout=10'
|
|
253
295
|
data_directory='/var/lib/postgresql/#{version}/main'
|
|
254
296
|
CONF
|
|
255
297
|
|
|
@@ -257,7 +299,7 @@ module ActivePostgres
|
|
|
257
299
|
upload! StringIO.new(temp_repmgr_conf), '/tmp/repmgr_clone.conf'
|
|
258
300
|
execute :sudo, 'mv', '/tmp/repmgr_clone.conf', '/etc/repmgr_clone.conf'
|
|
259
301
|
execute :sudo, 'chown', 'postgres:postgres', '/etc/repmgr_clone.conf'
|
|
260
|
-
execute :sudo, 'chmod', '
|
|
302
|
+
execute :sudo, 'chmod', '600', '/etc/repmgr_clone.conf'
|
|
261
303
|
|
|
262
304
|
info 'Running: repmgr standby clone (this may take a few minutes to copy the database)...'
|
|
263
305
|
|
|
@@ -300,7 +342,7 @@ module ActivePostgres
|
|
|
300
342
|
|
|
301
343
|
# Upload the proper repmgr.conf now that clone is complete
|
|
302
344
|
puts ' Uploading final repmgr configuration...'
|
|
303
|
-
upload_template(standby_host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '
|
|
345
|
+
upload_template(standby_host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '600',
|
|
304
346
|
owner: 'postgres:postgres')
|
|
305
347
|
|
|
306
348
|
# Create config directory and setup PostgreSQL configuration
|
|
@@ -327,10 +369,10 @@ module ActivePostgres
|
|
|
327
369
|
upload_template(standby_host, 'pg_hba.conf.erb', "/etc/postgresql/#{version}/main/pg_hba.conf", binding,
|
|
328
370
|
owner: 'postgres:postgres')
|
|
329
371
|
|
|
330
|
-
#
|
|
372
|
+
# Ensure SSL certificates if SSL is enabled (clone doesn't copy config files)
|
|
331
373
|
if config.component_enabled?(:ssl)
|
|
332
|
-
puts '
|
|
333
|
-
|
|
374
|
+
puts ' Ensuring SSL certificates on standby...'
|
|
375
|
+
ensure_ssl_certs(standby_host, version, force: true)
|
|
334
376
|
end
|
|
335
377
|
|
|
336
378
|
ssh_executor.execute_on_host(standby_host) do
|
|
@@ -357,6 +399,163 @@ module ActivePostgres
|
|
|
357
399
|
end
|
|
358
400
|
|
|
359
401
|
register_standby_with_primary(standby_host)
|
|
402
|
+
enable_repmgrd_if_configured(standby_host, repmgr_config)
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
def setup_dns_failover
|
|
406
|
+
dns_config = dns_failover_config
|
|
407
|
+
return unless dns_config
|
|
408
|
+
|
|
409
|
+
dns_servers = normalize_dns_servers(dns_config[:dns_servers])
|
|
410
|
+
dns_private_ips = dns_servers.map { |server| server[:private_ip] }.reject(&:empty?)
|
|
411
|
+
dns_ssh_hosts = dns_servers.map { |server| server[:ssh_host] }.reject(&:empty?)
|
|
412
|
+
dns_user = dns_config[:dns_user] || config.user
|
|
413
|
+
dns_ssh_key_path = dns_config[:ssh_key_path] || '/var/lib/postgresql/.ssh/active_postgres_dns'
|
|
414
|
+
ssh_strict_host_key = normalize_dns_host_key_verification(
|
|
415
|
+
dns_config[:ssh_host_key_verification] || config.ssh_host_key_verification
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
pub_keys = {}
|
|
419
|
+
config.all_hosts.each do |host|
|
|
420
|
+
pub_keys[host] = ensure_dns_ssh_key(host, dns_ssh_key_path, dns_private_ips)
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
dns_ssh_hosts.each do |dns_server|
|
|
424
|
+
authorize_dns_keys(dns_server, dns_user, pub_keys.values.compact)
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
config.all_hosts.each do |host|
|
|
428
|
+
install_dns_failover_script(host, dns_config, dns_private_ips, dns_user, dns_ssh_key_path, ssh_strict_host_key)
|
|
429
|
+
end
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
def dns_failover_enabled?
|
|
433
|
+
dns_failover_config != nil
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
def dns_failover_config
|
|
437
|
+
repmgr_config = config.component_config(:repmgr)
|
|
438
|
+
dns_config = repmgr_config[:dns_failover]
|
|
439
|
+
return nil unless dns_config && dns_config[:enabled]
|
|
440
|
+
|
|
441
|
+
dns_config
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
def normalize_dns_servers(raw_servers)
|
|
445
|
+
Array(raw_servers).map do |server|
|
|
446
|
+
if server.is_a?(Hash)
|
|
447
|
+
ssh_host = server[:ssh_host] || server['ssh_host'] || server[:host] || server['host']
|
|
448
|
+
private_ip = server[:private_ip] || server['private_ip'] || server[:ip] || server['ip']
|
|
449
|
+
private_ip ||= ssh_host
|
|
450
|
+
ssh_host ||= private_ip
|
|
451
|
+
{ ssh_host: ssh_host.to_s, private_ip: private_ip.to_s }
|
|
452
|
+
else
|
|
453
|
+
value = server.to_s
|
|
454
|
+
{ ssh_host: value, private_ip: value }
|
|
455
|
+
end
|
|
456
|
+
end
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
def ensure_dns_ssh_key(host, key_path, dns_servers)
|
|
460
|
+
postgres_user = config.postgres_user
|
|
461
|
+
public_key = nil
|
|
462
|
+
|
|
463
|
+
ssh_executor.execute_on_host(host) do
|
|
464
|
+
execute :sudo, 'mkdir', '-p', '/var/lib/postgresql/.ssh'
|
|
465
|
+
execute :sudo, 'chown', "#{postgres_user}:#{postgres_user}", '/var/lib/postgresql/.ssh'
|
|
466
|
+
execute :sudo, 'chmod', '700', '/var/lib/postgresql/.ssh'
|
|
467
|
+
|
|
468
|
+
unless test(:sudo, '-u', postgres_user, 'test', '-f', key_path)
|
|
469
|
+
execute :sudo, '-u', postgres_user, "ssh-keygen -t ed25519 -N '' -f #{key_path}"
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
public_key = capture(:sudo, '-u', postgres_user, 'cat', "#{key_path}.pub").strip
|
|
473
|
+
|
|
474
|
+
unless dns_servers.empty?
|
|
475
|
+
execute :sudo, '-u', postgres_user, 'touch', '/var/lib/postgresql/.ssh/known_hosts'
|
|
476
|
+
scan_cmd = "ssh-keyscan -H #{dns_servers.join(' ')} >> /var/lib/postgresql/.ssh/known_hosts 2>/dev/null || true"
|
|
477
|
+
execute :sudo, '-u', postgres_user, 'bash', '-c', scan_cmd
|
|
478
|
+
execute :sudo, '-u', postgres_user, 'chmod', '600', '/var/lib/postgresql/.ssh/known_hosts'
|
|
479
|
+
end
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
public_key
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
def authorize_dns_keys(dns_server, dns_user, keys)
|
|
486
|
+
return if keys.empty?
|
|
487
|
+
|
|
488
|
+
ssh_executor.execute_on_host_as(dns_server, dns_user) do
|
|
489
|
+
execute :mkdir, '-p', '~/.ssh'
|
|
490
|
+
execute :chmod, '700', '~/.ssh'
|
|
491
|
+
execute :touch, '~/.ssh/authorized_keys'
|
|
492
|
+
execute :chmod, '600', '~/.ssh/authorized_keys'
|
|
493
|
+
|
|
494
|
+
keys.each do |key|
|
|
495
|
+
next if key.to_s.empty?
|
|
496
|
+
|
|
497
|
+
upload! StringIO.new("#{key}\n"), '/tmp/active_postgres_dns_key.pub'
|
|
498
|
+
execute :bash, '-c',
|
|
499
|
+
"grep -qxF -f /tmp/active_postgres_dns_key.pub ~/.ssh/authorized_keys || " \
|
|
500
|
+
"cat /tmp/active_postgres_dns_key.pub >> ~/.ssh/authorized_keys"
|
|
501
|
+
execute :rm, '-f', '/tmp/active_postgres_dns_key.pub'
|
|
502
|
+
end
|
|
503
|
+
end
|
|
504
|
+
rescue Net::SSH::ConnectionTimeout, Net::SSH::AuthenticationFailed => e
|
|
505
|
+
raise Error,
|
|
506
|
+
"Failed to SSH to DNS server #{dns_server} as #{dns_user}. " \
|
|
507
|
+
'If you run setup outside the mesh, use dns_failover.dns_servers entries ' \
|
|
508
|
+
'with host/private_ip or run setup from a mesh node. ' \
|
|
509
|
+
"(#{e.class})"
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
def install_dns_failover_script(host, dns_config, dns_servers, dns_user, dns_ssh_key_path, ssh_strict_host_key)
|
|
513
|
+
return if dns_servers.empty?
|
|
514
|
+
|
|
515
|
+
domains = normalize_dns_domains(dns_config)
|
|
516
|
+
primary_records = normalize_dns_records(dns_config[:primary_records] || dns_config[:primary_record],
|
|
517
|
+
default_prefix: 'db-primary',
|
|
518
|
+
domains: domains)
|
|
519
|
+
replica_records = normalize_dns_records(dns_config[:replica_records] || dns_config[:replica_record],
|
|
520
|
+
default_prefix: 'db-replica',
|
|
521
|
+
domains: domains)
|
|
522
|
+
|
|
523
|
+
_ = primary_records
|
|
524
|
+
_ = replica_records
|
|
525
|
+
_ = dns_servers
|
|
526
|
+
_ = dns_user
|
|
527
|
+
_ = dns_ssh_key_path
|
|
528
|
+
_ = ssh_strict_host_key
|
|
529
|
+
|
|
530
|
+
upload_template(host, 'repmgr_dns_failover.sh.erb', '/usr/local/bin/active-postgres-dns-failover', binding,
|
|
531
|
+
mode: '755', owner: 'root:root')
|
|
532
|
+
end
|
|
533
|
+
|
|
534
|
+
def normalize_dns_domains(dns_config)
|
|
535
|
+
domains = Array(dns_config[:domains] || dns_config[:domain]).map(&:to_s).map(&:strip).reject(&:empty?)
|
|
536
|
+
domains = ['mesh'] if domains.empty?
|
|
537
|
+
domains
|
|
538
|
+
end
|
|
539
|
+
|
|
540
|
+
def normalize_dns_records(value, default_prefix:, domains:)
|
|
541
|
+
records = Array(value).map(&:to_s).map(&:strip).reject(&:empty?)
|
|
542
|
+
return records unless records.empty?
|
|
543
|
+
|
|
544
|
+
domains.map { |domain| "#{default_prefix}.#{domain}" }
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
def normalize_dns_host_key_verification(value)
|
|
548
|
+
normalized = case value
|
|
549
|
+
when Symbol
|
|
550
|
+
value
|
|
551
|
+
else
|
|
552
|
+
value.to_s.strip.downcase.tr('-', '_').to_sym
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
return 'accept-new' if normalized == :accept_new
|
|
556
|
+
return 'no' if normalized == :never
|
|
557
|
+
|
|
558
|
+
'yes'
|
|
360
559
|
end
|
|
361
560
|
|
|
362
561
|
def register_standby_with_primary(standby_host)
|
|
@@ -374,6 +573,7 @@ module ActivePostgres
|
|
|
374
573
|
info 'Setting primary_conninfo with application_name...'
|
|
375
574
|
upload! StringIO.new(sql_content), temp_sql
|
|
376
575
|
execute :sudo, 'chown', "#{postgres_user}:#{postgres_user}", temp_sql
|
|
576
|
+
execute :sudo, 'chmod', '600', temp_sql
|
|
377
577
|
|
|
378
578
|
begin
|
|
379
579
|
execute :sudo, '-u', postgres_user, 'psql', '-f', temp_sql
|
|
@@ -402,6 +602,8 @@ module ActivePostgres
|
|
|
402
602
|
standby_hosts = config.standby_hosts
|
|
403
603
|
version = config.version
|
|
404
604
|
postgres_user = config.postgres_user
|
|
605
|
+
repmgr_db = config.repmgr_database
|
|
606
|
+
executor = ssh_executor
|
|
405
607
|
all_healthy = true
|
|
406
608
|
|
|
407
609
|
# Check primary
|
|
@@ -424,24 +626,35 @@ module ActivePostgres
|
|
|
424
626
|
end
|
|
425
627
|
|
|
426
628
|
# Check repmgr registration
|
|
427
|
-
cluster_output =
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
''
|
|
431
|
-
end
|
|
629
|
+
cluster_output = capture(:sudo, '-u', 'postgres',
|
|
630
|
+
'repmgr', '-f', '/etc/repmgr.conf', 'cluster', 'show',
|
|
631
|
+
raise_on_non_zero_exit: false).to_s
|
|
432
632
|
# Primary always has node_id=1, check if it's registered and running
|
|
433
|
-
if cluster_output.match?(/\s+1\s+\|.*primary.*\*\s+running/)
|
|
633
|
+
if cluster_output.match?(/\s+1\s+\|.*primary.*\*\s+running/i)
|
|
434
634
|
info 'ā Primary is registered with repmgr'
|
|
435
635
|
else
|
|
436
|
-
|
|
437
|
-
|
|
636
|
+
db_check = executor.run_sql_on_backend(self,
|
|
637
|
+
'SELECT type FROM repmgr.nodes WHERE node_id = 1 AND active IS TRUE;',
|
|
638
|
+
postgres_user: postgres_user,
|
|
639
|
+
database: repmgr_db,
|
|
640
|
+
tuples_only: true,
|
|
641
|
+
capture: true).to_s
|
|
642
|
+
if db_check.match?(/primary/i)
|
|
643
|
+
info 'ā Primary is registered with repmgr'
|
|
644
|
+
else
|
|
645
|
+
error 'ā Primary is not registered with repmgr'
|
|
646
|
+
all_healthy = false
|
|
647
|
+
end
|
|
438
648
|
end
|
|
439
649
|
|
|
440
650
|
# Check replication slots (if standbys exist)
|
|
441
651
|
if standby_hosts.any?
|
|
442
652
|
begin
|
|
443
|
-
|
|
444
|
-
|
|
653
|
+
slots = executor.run_sql_on_backend(self,
|
|
654
|
+
'SELECT slot_name, active FROM pg_replication_slots;',
|
|
655
|
+
postgres_user: postgres_user,
|
|
656
|
+
tuples_only: false,
|
|
657
|
+
capture: true)
|
|
445
658
|
info "Replication slots:\n#{slots}"
|
|
446
659
|
rescue StandardError => e
|
|
447
660
|
error "Failed to fetch replication slots: #{e.message}"
|
|
@@ -472,8 +685,11 @@ module ActivePostgres
|
|
|
472
685
|
|
|
473
686
|
# Check replication status
|
|
474
687
|
begin
|
|
475
|
-
|
|
476
|
-
|
|
688
|
+
rep_status = executor.run_sql_on_backend(self,
|
|
689
|
+
'SELECT pg_is_in_recovery();',
|
|
690
|
+
postgres_user: postgres_user,
|
|
691
|
+
tuples_only: true,
|
|
692
|
+
capture: true).to_s
|
|
477
693
|
if rep_status.include?('t')
|
|
478
694
|
info 'ā Standby is in recovery mode (receiving replication)'
|
|
479
695
|
else
|
|
@@ -487,8 +703,11 @@ module ActivePostgres
|
|
|
487
703
|
|
|
488
704
|
# Check lag
|
|
489
705
|
begin
|
|
490
|
-
|
|
491
|
-
|
|
706
|
+
lag_result = executor.run_sql_on_backend(self,
|
|
707
|
+
'SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::int AS lag;',
|
|
708
|
+
postgres_user: postgres_user,
|
|
709
|
+
tuples_only: true,
|
|
710
|
+
capture: true).to_s.strip
|
|
492
711
|
info "Replication lag: #{lag_result}"
|
|
493
712
|
rescue StandardError => e
|
|
494
713
|
error "Failed to get replication lag: #{e.message}"
|
|
@@ -500,11 +719,9 @@ module ActivePostgres
|
|
|
500
719
|
# Show final cluster status
|
|
501
720
|
ssh_executor.execute_on_host(primary_host) do
|
|
502
721
|
info 'Final cluster status:'
|
|
503
|
-
cluster_show =
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
'Error'
|
|
507
|
-
end
|
|
722
|
+
cluster_show = capture(:sudo, '-u', 'postgres',
|
|
723
|
+
'repmgr', '-f', '/etc/repmgr.conf', 'cluster', 'show',
|
|
724
|
+
raise_on_non_zero_exit: false).to_s
|
|
508
725
|
safe_show = LogSanitizer.sanitize(cluster_show)
|
|
509
726
|
info safe_show
|
|
510
727
|
end
|
|
@@ -518,10 +735,11 @@ module ActivePostgres
|
|
|
518
735
|
all_healthy
|
|
519
736
|
end
|
|
520
737
|
|
|
521
|
-
def setup_pgpass_file(host,
|
|
738
|
+
def setup_pgpass_file(host, repmgr_password, replication_password: nil, primary_ip: nil)
|
|
522
739
|
# Create .pgpass file for postgres user to avoid password exposure in logs
|
|
523
740
|
# Format: hostname:port:database:username:password
|
|
524
|
-
pgpass_content = build_pgpass_content(host,
|
|
741
|
+
pgpass_content = build_pgpass_content(host, repmgr_password, replication_password: replication_password,
|
|
742
|
+
primary_ip: primary_ip)
|
|
525
743
|
|
|
526
744
|
ssh_executor.execute_on_host(host) do
|
|
527
745
|
# Create .pgpass in postgres user's home directory
|
|
@@ -534,10 +752,13 @@ module ActivePostgres
|
|
|
534
752
|
end
|
|
535
753
|
end
|
|
536
754
|
|
|
537
|
-
def build_pgpass_content(host,
|
|
538
|
-
escaped_password = escape_pgpass_value(
|
|
755
|
+
def build_pgpass_content(host, repmgr_password, replication_password: nil, primary_ip: nil)
|
|
756
|
+
escaped_password = escape_pgpass_value(repmgr_password)
|
|
539
757
|
repmgr_user = config.repmgr_user
|
|
540
758
|
repmgr_db = config.repmgr_database
|
|
759
|
+
replication_user = config.replication_user
|
|
760
|
+
replication_password ||= secrets.resolve('replication_password')
|
|
761
|
+
replication_password = normalize_replication_password(replication_password) if replication_password
|
|
541
762
|
|
|
542
763
|
entries = [
|
|
543
764
|
"localhost:5432:#{repmgr_db}:#{repmgr_user}:#{escaped_password}",
|
|
@@ -557,7 +778,31 @@ module ActivePostgres
|
|
|
557
778
|
entries << "#{local_replication_host}:5432:*:#{repmgr_user}:#{escaped_password}"
|
|
558
779
|
end
|
|
559
780
|
|
|
560
|
-
|
|
781
|
+
# Allow repmgr to connect to all nodes for cluster status checks
|
|
782
|
+
config.all_hosts.each do |node|
|
|
783
|
+
replication_host = config.replication_host_for(node)
|
|
784
|
+
next if replication_host.nil? || %w[localhost 127.0.0.1].include?(replication_host)
|
|
785
|
+
|
|
786
|
+
entries << "#{replication_host}:5432:#{repmgr_db}:#{repmgr_user}:#{escaped_password}"
|
|
787
|
+
entries << "#{replication_host}:5432:*:#{repmgr_user}:#{escaped_password}"
|
|
788
|
+
end
|
|
789
|
+
|
|
790
|
+
if replication_password && !replication_password.empty?
|
|
791
|
+
escaped_replication_password = escape_pgpass_value(replication_password)
|
|
792
|
+
entries << "localhost:5432:replication:#{replication_user}:#{escaped_replication_password}"
|
|
793
|
+
entries << "127.0.0.1:5432:replication:#{replication_user}:#{escaped_replication_password}"
|
|
794
|
+
entries << "localhost:5432:*:#{replication_user}:#{escaped_replication_password}"
|
|
795
|
+
entries << "127.0.0.1:5432:*:#{replication_user}:#{escaped_replication_password}"
|
|
796
|
+
if primary_ip
|
|
797
|
+
entries << "#{primary_ip}:5432:replication:#{replication_user}:#{escaped_replication_password}"
|
|
798
|
+
entries << "#{primary_ip}:5432:*:#{replication_user}:#{escaped_replication_password}"
|
|
799
|
+
end
|
|
800
|
+
if local_replication_host && !%w[localhost 127.0.0.1].include?(local_replication_host)
|
|
801
|
+
entries << "#{local_replication_host}:5432:replication:#{replication_user}:#{escaped_replication_password}"
|
|
802
|
+
entries << "#{local_replication_host}:5432:*:#{replication_user}:#{escaped_replication_password}"
|
|
803
|
+
end
|
|
804
|
+
end
|
|
805
|
+
"#{entries.uniq.join("\n")}\n"
|
|
561
806
|
end
|
|
562
807
|
|
|
563
808
|
def escape_pgpass_value(value)
|
|
@@ -571,7 +816,13 @@ module ActivePostgres
|
|
|
571
816
|
primary_host = config.primary_replication_host
|
|
572
817
|
repmgr_user = config.repmgr_user
|
|
573
818
|
repmgr_db = config.repmgr_database
|
|
574
|
-
|
|
819
|
+
replication_password = secrets.resolve('replication_password')
|
|
820
|
+
replication_password = normalize_replication_password(replication_password) if replication_password
|
|
821
|
+
replication_user = config.replication_user
|
|
822
|
+
|
|
823
|
+
user = replication_password && !replication_password.empty? ? replication_user : repmgr_user
|
|
824
|
+
dbname = replication_password && !replication_password.empty? ? 'replication' : repmgr_db
|
|
825
|
+
"host=#{primary_host} user=#{user} dbname=#{dbname} application_name=#{standby_label}"
|
|
575
826
|
end
|
|
576
827
|
|
|
577
828
|
def normalize_repmgr_password(raw_password)
|
|
@@ -582,6 +833,14 @@ module ActivePostgres
|
|
|
582
833
|
password
|
|
583
834
|
end
|
|
584
835
|
|
|
836
|
+
def normalize_replication_password(raw_password)
|
|
837
|
+
password = raw_password.to_s.rstrip
|
|
838
|
+
|
|
839
|
+
raise 'replication_password secret is missing' if password.empty?
|
|
840
|
+
|
|
841
|
+
password
|
|
842
|
+
end
|
|
843
|
+
|
|
585
844
|
def reload_postgres_cluster
|
|
586
845
|
execute :sudo, 'pg_ctlcluster', config.version.to_s, 'main', 'reload'
|
|
587
846
|
rescue StandardError => e
|
|
@@ -652,6 +911,133 @@ module ActivePostgres
|
|
|
652
911
|
end
|
|
653
912
|
end
|
|
654
913
|
end
|
|
914
|
+
|
|
915
|
+
def ensure_ssl_certs(host, version, force: false)
|
|
916
|
+
ssl_cert = secrets.resolve('ssl_cert')
|
|
917
|
+
ssl_key = secrets.resolve('ssl_key')
|
|
918
|
+
|
|
919
|
+
return regenerate_ssl_certs(host, version) if force
|
|
920
|
+
|
|
921
|
+
if ssl_cert && ssl_key
|
|
922
|
+
regenerate_ssl_certs(host, version)
|
|
923
|
+
return
|
|
924
|
+
end
|
|
925
|
+
|
|
926
|
+
cert_path = "/etc/postgresql/#{version}/main/server.crt"
|
|
927
|
+
key_path = "/etc/postgresql/#{version}/main/server.key"
|
|
928
|
+
|
|
929
|
+
cert_exists = false
|
|
930
|
+
key_exists = false
|
|
931
|
+
|
|
932
|
+
ssh_executor.execute_on_host(host) do
|
|
933
|
+
cert_exists = test(:sudo, 'test', '-f', cert_path)
|
|
934
|
+
key_exists = test(:sudo, 'test', '-f', key_path)
|
|
935
|
+
end
|
|
936
|
+
|
|
937
|
+
regenerate_ssl_certs(host, version) unless cert_exists && key_exists
|
|
938
|
+
end
|
|
939
|
+
|
|
940
|
+
def cluster_exists?(host, version)
|
|
941
|
+
exists = false
|
|
942
|
+
ssh_executor.execute_on_host(host) do
|
|
943
|
+
exists = test(:sudo, 'test', '-d', "/var/lib/postgresql/#{version}/main/base")
|
|
944
|
+
end
|
|
945
|
+
exists
|
|
946
|
+
rescue StandardError
|
|
947
|
+
false
|
|
948
|
+
end
|
|
949
|
+
|
|
950
|
+
def standby_already_configured?(host)
|
|
951
|
+
version = config.version
|
|
952
|
+
postgres_user = config.postgres_user
|
|
953
|
+
configured = false
|
|
954
|
+
|
|
955
|
+
ssh_executor.execute_on_host(host) do
|
|
956
|
+
data_dir = test(:sudo, 'test', '-d', "/var/lib/postgresql/#{version}/main/base")
|
|
957
|
+
repmgr_conf = test(:sudo, 'test', '-f', '/etc/repmgr.conf')
|
|
958
|
+
clusters = begin
|
|
959
|
+
capture(:sudo, 'pg_lsclusters', '-h')
|
|
960
|
+
rescue StandardError
|
|
961
|
+
''
|
|
962
|
+
end
|
|
963
|
+
online = clusters.lines.any? { |line| line.include?(version.to_s) && line.include?('main') && line.include?('online') }
|
|
964
|
+
in_recovery = begin
|
|
965
|
+
capture(:sudo, '-u', postgres_user, 'psql', '-tA', '-c', '"SELECT pg_is_in_recovery();"').strip == 't'
|
|
966
|
+
rescue StandardError
|
|
967
|
+
false
|
|
968
|
+
end
|
|
969
|
+
|
|
970
|
+
configured = data_dir && repmgr_conf && online && in_recovery
|
|
971
|
+
end
|
|
972
|
+
|
|
973
|
+
configured
|
|
974
|
+
rescue StandardError
|
|
975
|
+
false
|
|
976
|
+
end
|
|
977
|
+
|
|
978
|
+
def update_postgres_configs_on_standby(host, version)
|
|
979
|
+
core_config = config.component_config(:core)
|
|
980
|
+
component_config = core_config
|
|
981
|
+
pg_config = component_config[:postgresql] || {}
|
|
982
|
+
private_ip = config.replication_host_for(host)
|
|
983
|
+
pg_config = substitute_private_ip(pg_config, private_ip)
|
|
984
|
+
_ = pg_config
|
|
985
|
+
|
|
986
|
+
upload_template(host, 'postgresql.conf.erb', "/etc/postgresql/#{version}/main/postgresql.conf",
|
|
987
|
+
binding, owner: 'postgres:postgres')
|
|
988
|
+
upload_template(host, 'pg_hba.conf.erb', "/etc/postgresql/#{version}/main/pg_hba.conf",
|
|
989
|
+
binding, owner: 'postgres:postgres')
|
|
990
|
+
|
|
991
|
+
ssh_executor.restart_postgres(host, version)
|
|
992
|
+
end
|
|
993
|
+
|
|
994
|
+
def enable_repmgrd_if_configured(host, repmgr_config)
|
|
995
|
+
return if repmgr_config[:auto_failover] == false
|
|
996
|
+
|
|
997
|
+
ssh_executor.execute_on_host(host) do
|
|
998
|
+
begin
|
|
999
|
+
execute :sudo, 'systemctl', 'enable', 'repmgrd'
|
|
1000
|
+
execute :sudo, 'systemctl', 'restart', 'repmgrd'
|
|
1001
|
+
rescue StandardError
|
|
1002
|
+
nil
|
|
1003
|
+
end
|
|
1004
|
+
end
|
|
1005
|
+
end
|
|
1006
|
+
|
|
1007
|
+
def build_repmgr_setup_sql(repmgr_user, repmgr_db, repmgr_password)
|
|
1008
|
+
escaped_password = repmgr_password.gsub("'", "''")
|
|
1009
|
+
|
|
1010
|
+
[
|
|
1011
|
+
'DO $$',
|
|
1012
|
+
'BEGIN',
|
|
1013
|
+
" IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = '#{repmgr_user}') THEN",
|
|
1014
|
+
" CREATE USER #{repmgr_user} WITH SUPERUSER PASSWORD '#{escaped_password}';",
|
|
1015
|
+
' ELSE',
|
|
1016
|
+
" ALTER USER #{repmgr_user} WITH SUPERUSER PASSWORD '#{escaped_password}';",
|
|
1017
|
+
' END IF;',
|
|
1018
|
+
'END $$;',
|
|
1019
|
+
'',
|
|
1020
|
+
"SELECT 'CREATE DATABASE #{repmgr_db} OWNER #{repmgr_user}'",
|
|
1021
|
+
"WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = '#{repmgr_db}')\\gexec",
|
|
1022
|
+
''
|
|
1023
|
+
].join("\n")
|
|
1024
|
+
end
|
|
1025
|
+
|
|
1026
|
+
def build_replication_user_sql(replication_user, replication_password)
|
|
1027
|
+
escaped_password = replication_password.gsub("'", "''")
|
|
1028
|
+
|
|
1029
|
+
[
|
|
1030
|
+
'DO $$',
|
|
1031
|
+
'BEGIN',
|
|
1032
|
+
" IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = '#{replication_user}') THEN",
|
|
1033
|
+
" CREATE USER #{replication_user} WITH REPLICATION LOGIN PASSWORD '#{escaped_password}';",
|
|
1034
|
+
' ELSE',
|
|
1035
|
+
" ALTER USER #{replication_user} WITH REPLICATION LOGIN PASSWORD '#{escaped_password}';",
|
|
1036
|
+
' END IF;',
|
|
1037
|
+
'END $$;',
|
|
1038
|
+
''
|
|
1039
|
+
].join("\n")
|
|
1040
|
+
end
|
|
655
1041
|
end
|
|
656
1042
|
end
|
|
657
1043
|
end
|