active_postgres 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +142 -5
  3. data/lib/active_postgres/cli.rb +7 -0
  4. data/lib/active_postgres/components/core.rb +2 -7
  5. data/lib/active_postgres/components/extensions.rb +40 -35
  6. data/lib/active_postgres/components/monitoring.rb +256 -4
  7. data/lib/active_postgres/components/pgbackrest.rb +91 -5
  8. data/lib/active_postgres/components/pgbouncer.rb +58 -7
  9. data/lib/active_postgres/components/repmgr.rb +448 -62
  10. data/lib/active_postgres/configuration.rb +66 -1
  11. data/lib/active_postgres/connection_pooler.rb +3 -12
  12. data/lib/active_postgres/credentials.rb +3 -3
  13. data/lib/active_postgres/direct_executor.rb +1 -2
  14. data/lib/active_postgres/generators/active_postgres/install_generator.rb +2 -0
  15. data/lib/active_postgres/generators/active_postgres/templates/postgres.yml.erb +28 -1
  16. data/lib/active_postgres/health_checker.rb +29 -7
  17. data/lib/active_postgres/installer.rb +9 -0
  18. data/lib/active_postgres/overview.rb +351 -0
  19. data/lib/active_postgres/rollback_manager.rb +4 -8
  20. data/lib/active_postgres/secrets.rb +23 -5
  21. data/lib/active_postgres/ssh_executor.rb +44 -19
  22. data/lib/active_postgres/version.rb +1 -1
  23. data/lib/active_postgres.rb +1 -0
  24. data/lib/tasks/postgres.rake +77 -4
  25. data/lib/tasks/rotate_credentials.rake +4 -16
  26. data/templates/pg_hba.conf.erb +4 -1
  27. data/templates/pgbackrest.conf.erb +28 -0
  28. data/templates/pgbouncer-follow-primary.service.erb +8 -0
  29. data/templates/pgbouncer-follow-primary.timer.erb +11 -0
  30. data/templates/pgbouncer.ini.erb +2 -0
  31. data/templates/pgbouncer_follow_primary.sh.erb +34 -0
  32. data/templates/postgresql.conf.erb +4 -0
  33. data/templates/repmgr.conf.erb +10 -3
  34. data/templates/repmgr_dns_failover.sh.erb +59 -0
  35. metadata +6 -1
@@ -11,6 +11,8 @@ module ActivePostgres
11
11
  setup_standby(host)
12
12
  end
13
13
 
14
+ setup_dns_failover if dns_failover_enabled?
15
+
14
16
  # Verify the entire cluster is healthy after setup
15
17
  puts "\nšŸ„ Performing final health check..."
16
18
  verify_cluster_health
@@ -68,6 +70,8 @@ module ActivePostgres
68
70
  end
69
71
 
70
72
  setup_standby(standby_host)
73
+
74
+ setup_dns_failover if dns_failover_enabled?
71
75
  end
72
76
 
73
77
  private
@@ -91,15 +95,22 @@ module ActivePostgres
91
95
  repmgr_config = config.component_config(:repmgr)
92
96
  version = config.version
93
97
  repmgr_password = normalize_repmgr_password(secrets.resolve('repmgr_password'))
98
+ replication_password = normalize_replication_password(secrets.resolve('replication_password'))
94
99
  secrets_obj = secrets
95
100
  repmgr_user = config.repmgr_user
96
101
  repmgr_db = config.repmgr_database
102
+ replication_user = config.replication_user
103
+ if replication_user == repmgr_user && replication_password != repmgr_password
104
+ raise Error, 'replication_user matches repmgr user but passwords differ. Use a distinct replication_user or the same password.'
105
+ end
106
+ effective_replication_password = replication_user == repmgr_user ? repmgr_password : replication_password
97
107
 
98
108
  # Variables used in ERB templates via binding
99
109
  _ = repmgr_config
100
110
  _ = secrets_obj
101
111
 
102
- ssh_executor.recreate_cluster(host, version)
112
+ cluster_exists = cluster_exists?(host, version)
113
+ ssh_executor.ensure_cluster_exists(host, version) unless cluster_exists
103
114
 
104
115
  puts ' Configuring PostgreSQL...'
105
116
  core_config = config.component_config(:core)
@@ -117,12 +128,14 @@ module ActivePostgres
117
128
  upload_template(host, 'pg_hba.conf.erb', "/etc/postgresql/#{version}/main/pg_hba.conf", binding,
118
129
  owner: 'postgres:postgres')
119
130
 
120
- # Regenerate SSL certificates if SSL is enabled (cluster recreation deleted them)
131
+ # Ensure SSL certificates are present if SSL is enabled
121
132
  if config.component_enabled?(:ssl)
122
- puts ' Regenerating SSL certificates...'
123
- regenerate_ssl_certs(host, version)
133
+ puts ' Ensuring SSL certificates...'
134
+ ensure_ssl_certs(host, version, force: !cluster_exists)
124
135
  end
125
136
 
137
+ repmgr_component = self
138
+ executor = ssh_executor
126
139
  ssh_executor.execute_on_host(host) do
127
140
  execute :sudo, 'pg_ctlcluster', version.to_s, 'main', 'restart'
128
141
 
@@ -148,26 +161,24 @@ module ActivePostgres
148
161
  execute :sudo, '-u', 'postgres', 'psql', '-l'
149
162
 
150
163
  info 'Creating repmgr database and user...'
151
- escaped_password = repmgr_password.gsub("'", "''")
152
- sql = [
153
- "DROP DATABASE IF EXISTS #{repmgr_db}",
154
- "DROP USER IF EXISTS #{repmgr_user}",
155
- "CREATE USER #{repmgr_user} WITH SUPERUSER PASSWORD '#{escaped_password}'",
156
- "CREATE DATABASE #{repmgr_db} OWNER #{repmgr_user}",
157
- '' # Ensures trailing semicolon after join
158
- ].join('; ')
159
- upload! StringIO.new(sql), '/tmp/setup_repmgr.sql'
160
- execute :chmod, '644', '/tmp/setup_repmgr.sql'
161
- execute :sudo, '-u', 'postgres', 'psql', '-p', '5432', '-f', '/tmp/setup_repmgr.sql'
162
- execute :rm, '-f', '/tmp/setup_repmgr.sql'
164
+ repmgr_sql = repmgr_component.send(:build_repmgr_setup_sql, repmgr_user, repmgr_db, repmgr_password)
165
+ executor.run_sql_on_backend(self, repmgr_sql, postgres_user: 'postgres', port: 5432, tuples_only: false,
166
+ capture: false)
167
+
168
+ if replication_user != repmgr_user
169
+ info 'Ensuring replication user exists...'
170
+ repl_sql = repmgr_component.send(:build_replication_user_sql, replication_user, effective_replication_password)
171
+ executor.run_sql_on_backend(self, repl_sql, postgres_user: 'postgres', port: 5432, tuples_only: false,
172
+ capture: false)
173
+ end
163
174
 
164
175
  info 'Reloading PostgreSQL configuration to apply pg_hba.conf changes...'
165
176
  execute :sudo, 'pg_ctlcluster', version.to_s, 'main', 'reload'
166
177
  end
167
178
 
168
- setup_pgpass_file(host, repmgr_password)
179
+ setup_pgpass_file(host, repmgr_password, replication_password: effective_replication_password)
169
180
 
170
- upload_template(host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '644', owner: 'postgres:postgres')
181
+ upload_template(host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '600', owner: 'postgres:postgres')
171
182
 
172
183
  ssh_executor.execute_on_host(host) do
173
184
  info 'Registering primary with repmgr...'
@@ -176,22 +187,37 @@ module ActivePostgres
176
187
  'repmgr', 'primary', 'register',
177
188
  '-f', '/etc/repmgr.conf', '--force'
178
189
 
179
- # Verify registration succeeded
180
- sleep 2
181
- cluster_show = begin
182
- capture(:sudo, '-u', 'postgres', 'repmgr', 'cluster', 'show', '-f',
183
- '/etc/repmgr.conf')
184
- rescue StandardError
185
- 'Could not show cluster'
190
+ # Verify registration succeeded (repmgr can take a moment to report)
191
+ cluster_show = nil
192
+ 5.times do |attempt|
193
+ cluster_show = capture(:sudo, '-u', 'postgres', 'bash', '-lc',
194
+ "repmgr cluster show -f /etc/repmgr.conf 2>&1", raise_on_non_zero_exit: false).to_s
195
+
196
+ break if cluster_show.match?(/primary/i)
197
+
198
+ sleep 2 if attempt < 4
186
199
  end
187
200
 
188
- unless cluster_show.include?('primary') && cluster_show.include?('running')
189
- error 'āœ— Primary registration verification failed'
190
- raise 'Primary registration failed'
201
+ unless cluster_show && cluster_show.match?(/primary/i)
202
+ # Fallback: verify via repmgr metadata in the repmgr database
203
+ db_check = executor.run_sql_on_backend(self,
204
+ 'SELECT type FROM repmgr.nodes WHERE node_id = 1;',
205
+ postgres_user: 'postgres',
206
+ database: repmgr_db,
207
+ tuples_only: true,
208
+ capture: true).to_s
209
+
210
+ unless db_check.match?(/primary/i)
211
+ safe_show = LogSanitizer.sanitize(cluster_show.to_s)
212
+ error "āœ— Primary registration verification failed:\n#{safe_show}"
213
+ raise 'Primary registration failed'
214
+ end
191
215
  end
192
216
 
193
217
  info 'āœ“ Primary successfully registered with repmgr!'
194
218
  end
219
+
220
+ enable_repmgrd_if_configured(host, repmgr_config)
195
221
  end
196
222
 
197
223
  def setup_standby(standby_host)
@@ -205,6 +231,7 @@ module ActivePostgres
205
231
  repmgr_user = config.repmgr_user
206
232
  repmgr_db = config.repmgr_database
207
233
  postgres_user = config.postgres_user
234
+ replication_user = config.replication_user
208
235
 
209
236
  # Variables used in ERB templates via binding
210
237
  _ = host
@@ -213,10 +240,27 @@ module ActivePostgres
213
240
 
214
241
  node_id = config.standby_hosts.index(standby_host) + 2
215
242
  repmgr_password = normalize_repmgr_password(secrets_obj.resolve('repmgr_password'))
243
+ replication_password = normalize_replication_password(secrets_obj.resolve('replication_password'))
244
+ if replication_user == repmgr_user && replication_password != repmgr_password
245
+ raise Error, 'replication_user matches repmgr user but passwords differ. Use a distinct replication_user or the same password.'
246
+ end
247
+ effective_replication_password = replication_user == repmgr_user ? repmgr_password : replication_password
216
248
 
217
249
  ensure_primary_registered
218
250
 
219
- setup_pgpass_file(standby_host, repmgr_password, primary_replication_host)
251
+ setup_pgpass_file(standby_host, repmgr_password, replication_password: effective_replication_password,
252
+ primary_ip: primary_replication_host)
253
+
254
+ if standby_already_configured?(standby_host)
255
+ puts ' Standby already configured, updating configs...'
256
+ upload_template(standby_host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '600',
257
+ owner: 'postgres:postgres')
258
+ update_postgres_configs_on_standby(standby_host, version)
259
+ ensure_ssl_certs(standby_host, version) if config.component_enabled?(:ssl)
260
+ register_standby_with_primary(standby_host)
261
+ enable_repmgrd_if_configured(standby_host, repmgr_config)
262
+ return
263
+ end
220
264
 
221
265
  ssh_executor.execute_on_host(standby_host) do
222
266
  info 'Preparing standby cluster...'
@@ -244,12 +288,10 @@ module ActivePostgres
244
288
  info 'Cloning from primary over private network...'
245
289
  # Create a temporary repmgr config for cloning that points to the primary
246
290
  # The regular config points to localhost which doesn't work during initial clone
247
- escaped_password = repmgr_password.gsub("'", "\\\\'")
248
-
249
291
  temp_repmgr_conf = <<~CONF
250
292
  node_id=#{node_id}
251
293
  node_name='#{standby_host}'
252
- conninfo='host=#{primary_replication_host} user=#{repmgr_user} dbname=#{repmgr_db} password=#{escaped_password} connect_timeout=10'
294
+ conninfo='host=#{primary_replication_host} user=#{repmgr_user} dbname=#{repmgr_db} connect_timeout=10'
253
295
  data_directory='/var/lib/postgresql/#{version}/main'
254
296
  CONF
255
297
 
@@ -257,7 +299,7 @@ module ActivePostgres
257
299
  upload! StringIO.new(temp_repmgr_conf), '/tmp/repmgr_clone.conf'
258
300
  execute :sudo, 'mv', '/tmp/repmgr_clone.conf', '/etc/repmgr_clone.conf'
259
301
  execute :sudo, 'chown', 'postgres:postgres', '/etc/repmgr_clone.conf'
260
- execute :sudo, 'chmod', '644', '/etc/repmgr_clone.conf'
302
+ execute :sudo, 'chmod', '600', '/etc/repmgr_clone.conf'
261
303
 
262
304
  info 'Running: repmgr standby clone (this may take a few minutes to copy the database)...'
263
305
 
@@ -300,7 +342,7 @@ module ActivePostgres
300
342
 
301
343
  # Upload the proper repmgr.conf now that clone is complete
302
344
  puts ' Uploading final repmgr configuration...'
303
- upload_template(standby_host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '644',
345
+ upload_template(standby_host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '600',
304
346
  owner: 'postgres:postgres')
305
347
 
306
348
  # Create config directory and setup PostgreSQL configuration
@@ -327,10 +369,10 @@ module ActivePostgres
327
369
  upload_template(standby_host, 'pg_hba.conf.erb', "/etc/postgresql/#{version}/main/pg_hba.conf", binding,
328
370
  owner: 'postgres:postgres')
329
371
 
330
- # Regenerate SSL certificates if SSL is enabled (clone doesn't copy config files)
372
+ # Ensure SSL certificates if SSL is enabled (clone doesn't copy config files)
331
373
  if config.component_enabled?(:ssl)
332
- puts ' Regenerating SSL certificates on standby...'
333
- regenerate_ssl_certs(standby_host, version)
374
+ puts ' Ensuring SSL certificates on standby...'
375
+ ensure_ssl_certs(standby_host, version, force: true)
334
376
  end
335
377
 
336
378
  ssh_executor.execute_on_host(standby_host) do
@@ -357,6 +399,163 @@ module ActivePostgres
357
399
  end
358
400
 
359
401
  register_standby_with_primary(standby_host)
402
+ enable_repmgrd_if_configured(standby_host, repmgr_config)
403
+ end
404
+
405
+ def setup_dns_failover
406
+ dns_config = dns_failover_config
407
+ return unless dns_config
408
+
409
+ dns_servers = normalize_dns_servers(dns_config[:dns_servers])
410
+ dns_private_ips = dns_servers.map { |server| server[:private_ip] }.reject(&:empty?)
411
+ dns_ssh_hosts = dns_servers.map { |server| server[:ssh_host] }.reject(&:empty?)
412
+ dns_user = dns_config[:dns_user] || config.user
413
+ dns_ssh_key_path = dns_config[:ssh_key_path] || '/var/lib/postgresql/.ssh/active_postgres_dns'
414
+ ssh_strict_host_key = normalize_dns_host_key_verification(
415
+ dns_config[:ssh_host_key_verification] || config.ssh_host_key_verification
416
+ )
417
+
418
+ pub_keys = {}
419
+ config.all_hosts.each do |host|
420
+ pub_keys[host] = ensure_dns_ssh_key(host, dns_ssh_key_path, dns_private_ips)
421
+ end
422
+
423
+ dns_ssh_hosts.each do |dns_server|
424
+ authorize_dns_keys(dns_server, dns_user, pub_keys.values.compact)
425
+ end
426
+
427
+ config.all_hosts.each do |host|
428
+ install_dns_failover_script(host, dns_config, dns_private_ips, dns_user, dns_ssh_key_path, ssh_strict_host_key)
429
+ end
430
+ end
431
+
432
+ def dns_failover_enabled?
433
+ dns_failover_config != nil
434
+ end
435
+
436
+ def dns_failover_config
437
+ repmgr_config = config.component_config(:repmgr)
438
+ dns_config = repmgr_config[:dns_failover]
439
+ return nil unless dns_config && dns_config[:enabled]
440
+
441
+ dns_config
442
+ end
443
+
444
+ def normalize_dns_servers(raw_servers)
445
+ Array(raw_servers).map do |server|
446
+ if server.is_a?(Hash)
447
+ ssh_host = server[:ssh_host] || server['ssh_host'] || server[:host] || server['host']
448
+ private_ip = server[:private_ip] || server['private_ip'] || server[:ip] || server['ip']
449
+ private_ip ||= ssh_host
450
+ ssh_host ||= private_ip
451
+ { ssh_host: ssh_host.to_s, private_ip: private_ip.to_s }
452
+ else
453
+ value = server.to_s
454
+ { ssh_host: value, private_ip: value }
455
+ end
456
+ end
457
+ end
458
+
459
+ def ensure_dns_ssh_key(host, key_path, dns_servers)
460
+ postgres_user = config.postgres_user
461
+ public_key = nil
462
+
463
+ ssh_executor.execute_on_host(host) do
464
+ execute :sudo, 'mkdir', '-p', '/var/lib/postgresql/.ssh'
465
+ execute :sudo, 'chown', "#{postgres_user}:#{postgres_user}", '/var/lib/postgresql/.ssh'
466
+ execute :sudo, 'chmod', '700', '/var/lib/postgresql/.ssh'
467
+
468
+ unless test(:sudo, '-u', postgres_user, 'test', '-f', key_path)
469
+ execute :sudo, '-u', postgres_user, "ssh-keygen -t ed25519 -N '' -f #{key_path}"
470
+ end
471
+
472
+ public_key = capture(:sudo, '-u', postgres_user, 'cat', "#{key_path}.pub").strip
473
+
474
+ unless dns_servers.empty?
475
+ execute :sudo, '-u', postgres_user, 'touch', '/var/lib/postgresql/.ssh/known_hosts'
476
+ scan_cmd = "ssh-keyscan -H #{dns_servers.join(' ')} >> /var/lib/postgresql/.ssh/known_hosts 2>/dev/null || true"
477
+ execute :sudo, '-u', postgres_user, 'bash', '-c', scan_cmd
478
+ execute :sudo, '-u', postgres_user, 'chmod', '600', '/var/lib/postgresql/.ssh/known_hosts'
479
+ end
480
+ end
481
+
482
+ public_key
483
+ end
484
+
485
+ def authorize_dns_keys(dns_server, dns_user, keys)
486
+ return if keys.empty?
487
+
488
+ ssh_executor.execute_on_host_as(dns_server, dns_user) do
489
+ execute :mkdir, '-p', '~/.ssh'
490
+ execute :chmod, '700', '~/.ssh'
491
+ execute :touch, '~/.ssh/authorized_keys'
492
+ execute :chmod, '600', '~/.ssh/authorized_keys'
493
+
494
+ keys.each do |key|
495
+ next if key.to_s.empty?
496
+
497
+ upload! StringIO.new("#{key}\n"), '/tmp/active_postgres_dns_key.pub'
498
+ execute :bash, '-c',
499
+ "grep -qxF -f /tmp/active_postgres_dns_key.pub ~/.ssh/authorized_keys || " \
500
+ "cat /tmp/active_postgres_dns_key.pub >> ~/.ssh/authorized_keys"
501
+ execute :rm, '-f', '/tmp/active_postgres_dns_key.pub'
502
+ end
503
+ end
504
+ rescue Net::SSH::ConnectionTimeout, Net::SSH::AuthenticationFailed => e
505
+ raise Error,
506
+ "Failed to SSH to DNS server #{dns_server} as #{dns_user}. " \
507
+ 'If you run setup outside the mesh, use dns_failover.dns_servers entries ' \
508
+ 'with host/private_ip or run setup from a mesh node. ' \
509
+ "(#{e.class})"
510
+ end
511
+
512
+ def install_dns_failover_script(host, dns_config, dns_servers, dns_user, dns_ssh_key_path, ssh_strict_host_key)
513
+ return if dns_servers.empty?
514
+
515
+ domains = normalize_dns_domains(dns_config)
516
+ primary_records = normalize_dns_records(dns_config[:primary_records] || dns_config[:primary_record],
517
+ default_prefix: 'db-primary',
518
+ domains: domains)
519
+ replica_records = normalize_dns_records(dns_config[:replica_records] || dns_config[:replica_record],
520
+ default_prefix: 'db-replica',
521
+ domains: domains)
522
+
523
+ _ = primary_records
524
+ _ = replica_records
525
+ _ = dns_servers
526
+ _ = dns_user
527
+ _ = dns_ssh_key_path
528
+ _ = ssh_strict_host_key
529
+
530
+ upload_template(host, 'repmgr_dns_failover.sh.erb', '/usr/local/bin/active-postgres-dns-failover', binding,
531
+ mode: '755', owner: 'root:root')
532
+ end
533
+
534
+ def normalize_dns_domains(dns_config)
535
+ domains = Array(dns_config[:domains] || dns_config[:domain]).map(&:to_s).map(&:strip).reject(&:empty?)
536
+ domains = ['mesh'] if domains.empty?
537
+ domains
538
+ end
539
+
540
+ def normalize_dns_records(value, default_prefix:, domains:)
541
+ records = Array(value).map(&:to_s).map(&:strip).reject(&:empty?)
542
+ return records unless records.empty?
543
+
544
+ domains.map { |domain| "#{default_prefix}.#{domain}" }
545
+ end
546
+
547
+ def normalize_dns_host_key_verification(value)
548
+ normalized = case value
549
+ when Symbol
550
+ value
551
+ else
552
+ value.to_s.strip.downcase.tr('-', '_').to_sym
553
+ end
554
+
555
+ return 'accept-new' if normalized == :accept_new
556
+ return 'no' if normalized == :never
557
+
558
+ 'yes'
360
559
  end
361
560
 
362
561
  def register_standby_with_primary(standby_host)
@@ -374,6 +573,7 @@ module ActivePostgres
374
573
  info 'Setting primary_conninfo with application_name...'
375
574
  upload! StringIO.new(sql_content), temp_sql
376
575
  execute :sudo, 'chown', "#{postgres_user}:#{postgres_user}", temp_sql
576
+ execute :sudo, 'chmod', '600', temp_sql
377
577
 
378
578
  begin
379
579
  execute :sudo, '-u', postgres_user, 'psql', '-f', temp_sql
@@ -402,6 +602,8 @@ module ActivePostgres
402
602
  standby_hosts = config.standby_hosts
403
603
  version = config.version
404
604
  postgres_user = config.postgres_user
605
+ repmgr_db = config.repmgr_database
606
+ executor = ssh_executor
405
607
  all_healthy = true
406
608
 
407
609
  # Check primary
@@ -424,24 +626,35 @@ module ActivePostgres
424
626
  end
425
627
 
426
628
  # Check repmgr registration
427
- cluster_output = begin
428
- capture(:sudo, '-u', 'postgres', 'repmgr', 'cluster', 'show')
429
- rescue StandardError
430
- ''
431
- end
629
+ cluster_output = capture(:sudo, '-u', 'postgres',
630
+ 'repmgr', '-f', '/etc/repmgr.conf', 'cluster', 'show',
631
+ raise_on_non_zero_exit: false).to_s
432
632
  # Primary always has node_id=1, check if it's registered and running
433
- if cluster_output.match?(/\s+1\s+\|.*primary.*\*\s+running/)
633
+ if cluster_output.match?(/\s+1\s+\|.*primary.*\*\s+running/i)
434
634
  info 'āœ“ Primary is registered with repmgr'
435
635
  else
436
- error 'āœ— Primary is not registered with repmgr'
437
- all_healthy = false
636
+ db_check = executor.run_sql_on_backend(self,
637
+ 'SELECT type FROM repmgr.nodes WHERE node_id = 1 AND active IS TRUE;',
638
+ postgres_user: postgres_user,
639
+ database: repmgr_db,
640
+ tuples_only: true,
641
+ capture: true).to_s
642
+ if db_check.match?(/primary/i)
643
+ info 'āœ“ Primary is registered with repmgr'
644
+ else
645
+ error 'āœ— Primary is not registered with repmgr'
646
+ all_healthy = false
647
+ end
438
648
  end
439
649
 
440
650
  # Check replication slots (if standbys exist)
441
651
  if standby_hosts.any?
442
652
  begin
443
- slots_sql = 'SELECT slot_name, active FROM pg_replication_slots;'
444
- slots = capture(:sudo, '-u', postgres_user, 'psql', '-c', "'#{slots_sql}'")
653
+ slots = executor.run_sql_on_backend(self,
654
+ 'SELECT slot_name, active FROM pg_replication_slots;',
655
+ postgres_user: postgres_user,
656
+ tuples_only: false,
657
+ capture: true)
445
658
  info "Replication slots:\n#{slots}"
446
659
  rescue StandardError => e
447
660
  error "Failed to fetch replication slots: #{e.message}"
@@ -472,8 +685,11 @@ module ActivePostgres
472
685
 
473
686
  # Check replication status
474
687
  begin
475
- recovery_sql = 'SELECT pg_is_in_recovery();'
476
- rep_status = capture(:sudo, '-u', postgres_user, 'psql', '-t', '-c', "'#{recovery_sql}'")
688
+ rep_status = executor.run_sql_on_backend(self,
689
+ 'SELECT pg_is_in_recovery();',
690
+ postgres_user: postgres_user,
691
+ tuples_only: true,
692
+ capture: true).to_s
477
693
  if rep_status.include?('t')
478
694
  info 'āœ“ Standby is in recovery mode (receiving replication)'
479
695
  else
@@ -487,8 +703,11 @@ module ActivePostgres
487
703
 
488
704
  # Check lag
489
705
  begin
490
- lag_sql = 'SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::int AS lag;'
491
- lag_result = capture(:sudo, '-u', postgres_user, 'psql', '-t', '-c', "'#{lag_sql}'").strip
706
+ lag_result = executor.run_sql_on_backend(self,
707
+ 'SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::int AS lag;',
708
+ postgres_user: postgres_user,
709
+ tuples_only: true,
710
+ capture: true).to_s.strip
492
711
  info "Replication lag: #{lag_result}"
493
712
  rescue StandardError => e
494
713
  error "Failed to get replication lag: #{e.message}"
@@ -500,11 +719,9 @@ module ActivePostgres
500
719
  # Show final cluster status
501
720
  ssh_executor.execute_on_host(primary_host) do
502
721
  info 'Final cluster status:'
503
- cluster_show = begin
504
- capture(:sudo, '-u', 'postgres', 'repmgr', 'cluster', 'show')
505
- rescue StandardError
506
- 'Error'
507
- end
722
+ cluster_show = capture(:sudo, '-u', 'postgres',
723
+ 'repmgr', '-f', '/etc/repmgr.conf', 'cluster', 'show',
724
+ raise_on_non_zero_exit: false).to_s
508
725
  safe_show = LogSanitizer.sanitize(cluster_show)
509
726
  info safe_show
510
727
  end
@@ -518,10 +735,11 @@ module ActivePostgres
518
735
  all_healthy
519
736
  end
520
737
 
521
- def setup_pgpass_file(host, password, primary_ip = nil)
738
+ def setup_pgpass_file(host, repmgr_password, replication_password: nil, primary_ip: nil)
522
739
  # Create .pgpass file for postgres user to avoid password exposure in logs
523
740
  # Format: hostname:port:database:username:password
524
- pgpass_content = build_pgpass_content(host, password, primary_ip)
741
+ pgpass_content = build_pgpass_content(host, repmgr_password, replication_password: replication_password,
742
+ primary_ip: primary_ip)
525
743
 
526
744
  ssh_executor.execute_on_host(host) do
527
745
  # Create .pgpass in postgres user's home directory
@@ -534,10 +752,13 @@ module ActivePostgres
534
752
  end
535
753
  end
536
754
 
537
- def build_pgpass_content(host, password, primary_ip)
538
- escaped_password = escape_pgpass_value(password)
755
+ def build_pgpass_content(host, repmgr_password, replication_password: nil, primary_ip: nil)
756
+ escaped_password = escape_pgpass_value(repmgr_password)
539
757
  repmgr_user = config.repmgr_user
540
758
  repmgr_db = config.repmgr_database
759
+ replication_user = config.replication_user
760
+ replication_password ||= secrets.resolve('replication_password')
761
+ replication_password = normalize_replication_password(replication_password) if replication_password
541
762
 
542
763
  entries = [
543
764
  "localhost:5432:#{repmgr_db}:#{repmgr_user}:#{escaped_password}",
@@ -557,7 +778,31 @@ module ActivePostgres
557
778
  entries << "#{local_replication_host}:5432:*:#{repmgr_user}:#{escaped_password}"
558
779
  end
559
780
 
560
- "#{entries.join("\n")}\n"
781
+ # Allow repmgr to connect to all nodes for cluster status checks
782
+ config.all_hosts.each do |node|
783
+ replication_host = config.replication_host_for(node)
784
+ next if replication_host.nil? || %w[localhost 127.0.0.1].include?(replication_host)
785
+
786
+ entries << "#{replication_host}:5432:#{repmgr_db}:#{repmgr_user}:#{escaped_password}"
787
+ entries << "#{replication_host}:5432:*:#{repmgr_user}:#{escaped_password}"
788
+ end
789
+
790
+ if replication_password && !replication_password.empty?
791
+ escaped_replication_password = escape_pgpass_value(replication_password)
792
+ entries << "localhost:5432:replication:#{replication_user}:#{escaped_replication_password}"
793
+ entries << "127.0.0.1:5432:replication:#{replication_user}:#{escaped_replication_password}"
794
+ entries << "localhost:5432:*:#{replication_user}:#{escaped_replication_password}"
795
+ entries << "127.0.0.1:5432:*:#{replication_user}:#{escaped_replication_password}"
796
+ if primary_ip
797
+ entries << "#{primary_ip}:5432:replication:#{replication_user}:#{escaped_replication_password}"
798
+ entries << "#{primary_ip}:5432:*:#{replication_user}:#{escaped_replication_password}"
799
+ end
800
+ if local_replication_host && !%w[localhost 127.0.0.1].include?(local_replication_host)
801
+ entries << "#{local_replication_host}:5432:replication:#{replication_user}:#{escaped_replication_password}"
802
+ entries << "#{local_replication_host}:5432:*:#{replication_user}:#{escaped_replication_password}"
803
+ end
804
+ end
805
+ "#{entries.uniq.join("\n")}\n"
561
806
  end
562
807
 
563
808
  def escape_pgpass_value(value)
@@ -571,7 +816,13 @@ module ActivePostgres
571
816
  primary_host = config.primary_replication_host
572
817
  repmgr_user = config.repmgr_user
573
818
  repmgr_db = config.repmgr_database
574
- "host=#{primary_host} user=#{repmgr_user} dbname=#{repmgr_db} application_name=#{standby_label}"
819
+ replication_password = secrets.resolve('replication_password')
820
+ replication_password = normalize_replication_password(replication_password) if replication_password
821
+ replication_user = config.replication_user
822
+
823
+ user = replication_password && !replication_password.empty? ? replication_user : repmgr_user
824
+ dbname = replication_password && !replication_password.empty? ? 'replication' : repmgr_db
825
+ "host=#{primary_host} user=#{user} dbname=#{dbname} application_name=#{standby_label}"
575
826
  end
576
827
 
577
828
  def normalize_repmgr_password(raw_password)
@@ -582,6 +833,14 @@ module ActivePostgres
582
833
  password
583
834
  end
584
835
 
836
+ def normalize_replication_password(raw_password)
837
+ password = raw_password.to_s.rstrip
838
+
839
+ raise 'replication_password secret is missing' if password.empty?
840
+
841
+ password
842
+ end
843
+
585
844
  def reload_postgres_cluster
586
845
  execute :sudo, 'pg_ctlcluster', config.version.to_s, 'main', 'reload'
587
846
  rescue StandardError => e
@@ -652,6 +911,133 @@ module ActivePostgres
652
911
  end
653
912
  end
654
913
  end
914
+
915
+ def ensure_ssl_certs(host, version, force: false)
916
+ ssl_cert = secrets.resolve('ssl_cert')
917
+ ssl_key = secrets.resolve('ssl_key')
918
+
919
+ return regenerate_ssl_certs(host, version) if force
920
+
921
+ if ssl_cert && ssl_key
922
+ regenerate_ssl_certs(host, version)
923
+ return
924
+ end
925
+
926
+ cert_path = "/etc/postgresql/#{version}/main/server.crt"
927
+ key_path = "/etc/postgresql/#{version}/main/server.key"
928
+
929
+ cert_exists = false
930
+ key_exists = false
931
+
932
+ ssh_executor.execute_on_host(host) do
933
+ cert_exists = test(:sudo, 'test', '-f', cert_path)
934
+ key_exists = test(:sudo, 'test', '-f', key_path)
935
+ end
936
+
937
+ regenerate_ssl_certs(host, version) unless cert_exists && key_exists
938
+ end
939
+
940
+ def cluster_exists?(host, version)
941
+ exists = false
942
+ ssh_executor.execute_on_host(host) do
943
+ exists = test(:sudo, 'test', '-d', "/var/lib/postgresql/#{version}/main/base")
944
+ end
945
+ exists
946
+ rescue StandardError
947
+ false
948
+ end
949
+
950
+ def standby_already_configured?(host)
951
+ version = config.version
952
+ postgres_user = config.postgres_user
953
+ configured = false
954
+
955
+ ssh_executor.execute_on_host(host) do
956
+ data_dir = test(:sudo, 'test', '-d', "/var/lib/postgresql/#{version}/main/base")
957
+ repmgr_conf = test(:sudo, 'test', '-f', '/etc/repmgr.conf')
958
+ clusters = begin
959
+ capture(:sudo, 'pg_lsclusters', '-h')
960
+ rescue StandardError
961
+ ''
962
+ end
963
+ online = clusters.lines.any? { |line| line.include?(version.to_s) && line.include?('main') && line.include?('online') }
964
+ in_recovery = begin
965
+ capture(:sudo, '-u', postgres_user, 'psql', '-tA', '-c', '"SELECT pg_is_in_recovery();"').strip == 't'
966
+ rescue StandardError
967
+ false
968
+ end
969
+
970
+ configured = data_dir && repmgr_conf && online && in_recovery
971
+ end
972
+
973
+ configured
974
+ rescue StandardError
975
+ false
976
+ end
977
+
978
+ def update_postgres_configs_on_standby(host, version)
979
+ core_config = config.component_config(:core)
980
+ component_config = core_config
981
+ pg_config = component_config[:postgresql] || {}
982
+ private_ip = config.replication_host_for(host)
983
+ pg_config = substitute_private_ip(pg_config, private_ip)
984
+ _ = pg_config
985
+
986
+ upload_template(host, 'postgresql.conf.erb', "/etc/postgresql/#{version}/main/postgresql.conf",
987
+ binding, owner: 'postgres:postgres')
988
+ upload_template(host, 'pg_hba.conf.erb', "/etc/postgresql/#{version}/main/pg_hba.conf",
989
+ binding, owner: 'postgres:postgres')
990
+
991
+ ssh_executor.restart_postgres(host, version)
992
+ end
993
+
994
+ def enable_repmgrd_if_configured(host, repmgr_config)
995
+ return if repmgr_config[:auto_failover] == false
996
+
997
+ ssh_executor.execute_on_host(host) do
998
+ begin
999
+ execute :sudo, 'systemctl', 'enable', 'repmgrd'
1000
+ execute :sudo, 'systemctl', 'restart', 'repmgrd'
1001
+ rescue StandardError
1002
+ nil
1003
+ end
1004
+ end
1005
+ end
1006
+
1007
+ def build_repmgr_setup_sql(repmgr_user, repmgr_db, repmgr_password)
1008
+ escaped_password = repmgr_password.gsub("'", "''")
1009
+
1010
+ [
1011
+ 'DO $$',
1012
+ 'BEGIN',
1013
+ " IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = '#{repmgr_user}') THEN",
1014
+ " CREATE USER #{repmgr_user} WITH SUPERUSER PASSWORD '#{escaped_password}';",
1015
+ ' ELSE',
1016
+ " ALTER USER #{repmgr_user} WITH SUPERUSER PASSWORD '#{escaped_password}';",
1017
+ ' END IF;',
1018
+ 'END $$;',
1019
+ '',
1020
+ "SELECT 'CREATE DATABASE #{repmgr_db} OWNER #{repmgr_user}'",
1021
+ "WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = '#{repmgr_db}')\\gexec",
1022
+ ''
1023
+ ].join("\n")
1024
+ end
1025
+
1026
+ def build_replication_user_sql(replication_user, replication_password)
1027
+ escaped_password = replication_password.gsub("'", "''")
1028
+
1029
+ [
1030
+ 'DO $$',
1031
+ 'BEGIN',
1032
+ " IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = '#{replication_user}') THEN",
1033
+ " CREATE USER #{replication_user} WITH REPLICATION LOGIN PASSWORD '#{escaped_password}';",
1034
+ ' ELSE',
1035
+ " ALTER USER #{replication_user} WITH REPLICATION LOGIN PASSWORD '#{escaped_password}';",
1036
+ ' END IF;',
1037
+ 'END $$;',
1038
+ ''
1039
+ ].join("\n")
1040
+ end
655
1041
  end
656
1042
  end
657
1043
  end