active_postgres 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,8 @@ module ActivePostgres
11
11
  setup_standby(host)
12
12
  end
13
13
 
14
+ setup_dns_failover if dns_failover_enabled?
15
+
14
16
  # Verify the entire cluster is healthy after setup
15
17
  puts "\nšŸ„ Performing final health check..."
16
18
  verify_cluster_health
@@ -68,6 +70,8 @@ module ActivePostgres
68
70
  end
69
71
 
70
72
  setup_standby(standby_host)
73
+
74
+ setup_dns_failover if dns_failover_enabled?
71
75
  end
72
76
 
73
77
  private
@@ -91,15 +95,22 @@ module ActivePostgres
91
95
  repmgr_config = config.component_config(:repmgr)
92
96
  version = config.version
93
97
  repmgr_password = normalize_repmgr_password(secrets.resolve('repmgr_password'))
98
+ replication_password = normalize_replication_password(secrets.resolve('replication_password'))
94
99
  secrets_obj = secrets
95
100
  repmgr_user = config.repmgr_user
96
101
  repmgr_db = config.repmgr_database
102
+ replication_user = config.replication_user
103
+ if replication_user == repmgr_user && replication_password != repmgr_password
104
+ raise Error, 'replication_user matches repmgr user but passwords differ. Use a distinct replication_user or the same password.'
105
+ end
106
+ effective_replication_password = replication_user == repmgr_user ? repmgr_password : replication_password
97
107
 
98
108
  # Variables used in ERB templates via binding
99
109
  _ = repmgr_config
100
110
  _ = secrets_obj
101
111
 
102
- ssh_executor.recreate_cluster(host, version)
112
+ cluster_exists = cluster_exists?(host, version)
113
+ ssh_executor.ensure_cluster_exists(host, version) unless cluster_exists
103
114
 
104
115
  puts ' Configuring PostgreSQL...'
105
116
  core_config = config.component_config(:core)
@@ -117,12 +128,14 @@ module ActivePostgres
117
128
  upload_template(host, 'pg_hba.conf.erb', "/etc/postgresql/#{version}/main/pg_hba.conf", binding,
118
129
  owner: 'postgres:postgres')
119
130
 
120
- # Regenerate SSL certificates if SSL is enabled (cluster recreation deleted them)
131
+ # Ensure SSL certificates are present if SSL is enabled
121
132
  if config.component_enabled?(:ssl)
122
- puts ' Regenerating SSL certificates...'
123
- regenerate_ssl_certs(host, version)
133
+ puts ' Ensuring SSL certificates...'
134
+ ensure_ssl_certs(host, version, force: !cluster_exists)
124
135
  end
125
136
 
137
+ repmgr_component = self
138
+ executor = ssh_executor
126
139
  ssh_executor.execute_on_host(host) do
127
140
  execute :sudo, 'pg_ctlcluster', version.to_s, 'main', 'restart'
128
141
 
@@ -148,26 +161,24 @@ module ActivePostgres
148
161
  execute :sudo, '-u', 'postgres', 'psql', '-l'
149
162
 
150
163
  info 'Creating repmgr database and user...'
151
- escaped_password = repmgr_password.gsub("'", "''")
152
- sql = [
153
- "DROP DATABASE IF EXISTS #{repmgr_db}",
154
- "DROP USER IF EXISTS #{repmgr_user}",
155
- "CREATE USER #{repmgr_user} WITH SUPERUSER PASSWORD '#{escaped_password}'",
156
- "CREATE DATABASE #{repmgr_db} OWNER #{repmgr_user}",
157
- '' # Ensures trailing semicolon after join
158
- ].join('; ')
159
- upload! StringIO.new(sql), '/tmp/setup_repmgr.sql'
160
- execute :chmod, '644', '/tmp/setup_repmgr.sql'
161
- execute :sudo, '-u', 'postgres', 'psql', '-p', '5432', '-f', '/tmp/setup_repmgr.sql'
162
- execute :rm, '-f', '/tmp/setup_repmgr.sql'
164
+ repmgr_sql = repmgr_component.send(:build_repmgr_setup_sql, repmgr_user, repmgr_db, repmgr_password)
165
+ executor.run_sql_on_backend(self, repmgr_sql, postgres_user: 'postgres', port: 5432, tuples_only: false,
166
+ capture: false)
167
+
168
+ if replication_user != repmgr_user
169
+ info 'Ensuring replication user exists...'
170
+ repl_sql = repmgr_component.send(:build_replication_user_sql, replication_user, effective_replication_password)
171
+ executor.run_sql_on_backend(self, repl_sql, postgres_user: 'postgres', port: 5432, tuples_only: false,
172
+ capture: false)
173
+ end
163
174
 
164
175
  info 'Reloading PostgreSQL configuration to apply pg_hba.conf changes...'
165
176
  execute :sudo, 'pg_ctlcluster', version.to_s, 'main', 'reload'
166
177
  end
167
178
 
168
- setup_pgpass_file(host, repmgr_password)
179
+ setup_pgpass_file(host, repmgr_password, replication_password: effective_replication_password)
169
180
 
170
- upload_template(host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '644', owner: 'postgres:postgres')
181
+ upload_template(host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '600', owner: 'postgres:postgres')
171
182
 
172
183
  ssh_executor.execute_on_host(host) do
173
184
  info 'Registering primary with repmgr...'
@@ -176,22 +187,37 @@ module ActivePostgres
176
187
  'repmgr', 'primary', 'register',
177
188
  '-f', '/etc/repmgr.conf', '--force'
178
189
 
179
- # Verify registration succeeded
180
- sleep 2
181
- cluster_show = begin
182
- capture(:sudo, '-u', 'postgres', 'repmgr', 'cluster', 'show', '-f',
183
- '/etc/repmgr.conf')
184
- rescue StandardError
185
- 'Could not show cluster'
190
+ # Verify registration succeeded (repmgr can take a moment to report)
191
+ cluster_show = nil
192
+ 5.times do |attempt|
193
+ cluster_show = capture(:sudo, '-u', 'postgres', 'bash', '-lc',
194
+ "repmgr cluster show -f /etc/repmgr.conf 2>&1", raise_on_non_zero_exit: false).to_s
195
+
196
+ break if cluster_show.match?(/primary/i)
197
+
198
+ sleep 2 if attempt < 4
186
199
  end
187
200
 
188
- unless cluster_show.include?('primary') && cluster_show.include?('running')
189
- error 'āœ— Primary registration verification failed'
190
- raise 'Primary registration failed'
201
+ unless cluster_show && cluster_show.match?(/primary/i)
202
+ # Fallback: verify via repmgr metadata in the repmgr database
203
+ db_check = executor.run_sql_on_backend(self,
204
+ 'SELECT type FROM repmgr.nodes WHERE node_id = 1;',
205
+ postgres_user: 'postgres',
206
+ database: repmgr_db,
207
+ tuples_only: true,
208
+ capture: true).to_s
209
+
210
+ unless db_check.match?(/primary/i)
211
+ safe_show = LogSanitizer.sanitize(cluster_show.to_s)
212
+ error "āœ— Primary registration verification failed:\n#{safe_show}"
213
+ raise 'Primary registration failed'
214
+ end
191
215
  end
192
216
 
193
217
  info 'āœ“ Primary successfully registered with repmgr!'
194
218
  end
219
+
220
+ enable_repmgrd_if_configured(host, repmgr_config)
195
221
  end
196
222
 
197
223
  def setup_standby(standby_host)
@@ -205,6 +231,7 @@ module ActivePostgres
205
231
  repmgr_user = config.repmgr_user
206
232
  repmgr_db = config.repmgr_database
207
233
  postgres_user = config.postgres_user
234
+ replication_user = config.replication_user
208
235
 
209
236
  # Variables used in ERB templates via binding
210
237
  _ = host
@@ -213,10 +240,27 @@ module ActivePostgres
213
240
 
214
241
  node_id = config.standby_hosts.index(standby_host) + 2
215
242
  repmgr_password = normalize_repmgr_password(secrets_obj.resolve('repmgr_password'))
243
+ replication_password = normalize_replication_password(secrets_obj.resolve('replication_password'))
244
+ if replication_user == repmgr_user && replication_password != repmgr_password
245
+ raise Error, 'replication_user matches repmgr user but passwords differ. Use a distinct replication_user or the same password.'
246
+ end
247
+ effective_replication_password = replication_user == repmgr_user ? repmgr_password : replication_password
216
248
 
217
249
  ensure_primary_registered
218
250
 
219
- setup_pgpass_file(standby_host, repmgr_password, primary_replication_host)
251
+ setup_pgpass_file(standby_host, repmgr_password, replication_password: effective_replication_password,
252
+ primary_ip: primary_replication_host)
253
+
254
+ if standby_already_configured?(standby_host)
255
+ puts ' Standby already configured, updating configs...'
256
+ upload_template(standby_host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '600',
257
+ owner: 'postgres:postgres')
258
+ update_postgres_configs_on_standby(standby_host, version)
259
+ ensure_ssl_certs(standby_host, version) if config.component_enabled?(:ssl)
260
+ register_standby_with_primary(standby_host)
261
+ enable_repmgrd_if_configured(standby_host, repmgr_config)
262
+ return
263
+ end
220
264
 
221
265
  ssh_executor.execute_on_host(standby_host) do
222
266
  info 'Preparing standby cluster...'
@@ -244,12 +288,10 @@ module ActivePostgres
244
288
  info 'Cloning from primary over private network...'
245
289
  # Create a temporary repmgr config for cloning that points to the primary
246
290
  # The regular config points to localhost which doesn't work during initial clone
247
- escaped_password = repmgr_password.gsub("'", "\\\\'")
248
-
249
291
  temp_repmgr_conf = <<~CONF
250
292
  node_id=#{node_id}
251
293
  node_name='#{standby_host}'
252
- conninfo='host=#{primary_replication_host} user=#{repmgr_user} dbname=#{repmgr_db} password=#{escaped_password} connect_timeout=10'
294
+ conninfo='host=#{primary_replication_host} user=#{repmgr_user} dbname=#{repmgr_db} connect_timeout=10'
253
295
  data_directory='/var/lib/postgresql/#{version}/main'
254
296
  CONF
255
297
 
@@ -257,7 +299,7 @@ module ActivePostgres
257
299
  upload! StringIO.new(temp_repmgr_conf), '/tmp/repmgr_clone.conf'
258
300
  execute :sudo, 'mv', '/tmp/repmgr_clone.conf', '/etc/repmgr_clone.conf'
259
301
  execute :sudo, 'chown', 'postgres:postgres', '/etc/repmgr_clone.conf'
260
- execute :sudo, 'chmod', '644', '/etc/repmgr_clone.conf'
302
+ execute :sudo, 'chmod', '600', '/etc/repmgr_clone.conf'
261
303
 
262
304
  info 'Running: repmgr standby clone (this may take a few minutes to copy the database)...'
263
305
 
@@ -300,7 +342,7 @@ module ActivePostgres
300
342
 
301
343
  # Upload the proper repmgr.conf now that clone is complete
302
344
  puts ' Uploading final repmgr configuration...'
303
- upload_template(standby_host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '644',
345
+ upload_template(standby_host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '600',
304
346
  owner: 'postgres:postgres')
305
347
 
306
348
  # Create config directory and setup PostgreSQL configuration
@@ -327,10 +369,10 @@ module ActivePostgres
327
369
  upload_template(standby_host, 'pg_hba.conf.erb', "/etc/postgresql/#{version}/main/pg_hba.conf", binding,
328
370
  owner: 'postgres:postgres')
329
371
 
330
- # Regenerate SSL certificates if SSL is enabled (clone doesn't copy config files)
372
+ # Ensure SSL certificates if SSL is enabled (clone doesn't copy config files)
331
373
  if config.component_enabled?(:ssl)
332
- puts ' Regenerating SSL certificates on standby...'
333
- regenerate_ssl_certs(standby_host, version)
374
+ puts ' Ensuring SSL certificates on standby...'
375
+ ensure_ssl_certs(standby_host, version, force: true)
334
376
  end
335
377
 
336
378
  ssh_executor.execute_on_host(standby_host) do
@@ -357,6 +399,146 @@ module ActivePostgres
357
399
  end
358
400
 
359
401
  register_standby_with_primary(standby_host)
402
+ enable_repmgrd_if_configured(standby_host, repmgr_config)
403
+ end
404
+
405
+ def setup_dns_failover
406
+ dns_config = dns_failover_config
407
+ return unless dns_config
408
+
409
+ dns_servers = normalize_dns_servers(dns_config[:dns_servers])
410
+ dns_private_ips = dns_servers.map { |server| server[:private_ip] }.reject(&:empty?)
411
+ dns_ssh_hosts = dns_servers.map { |server| server[:ssh_host] }.reject(&:empty?)
412
+ dns_user = dns_config[:dns_user] || config.user
413
+ dns_ssh_key_path = dns_config[:ssh_key_path] || '/var/lib/postgresql/.ssh/active_postgres_dns'
414
+ ssh_strict_host_key = normalize_dns_host_key_verification(
415
+ dns_config[:ssh_host_key_verification] || config.ssh_host_key_verification
416
+ )
417
+
418
+ pub_keys = {}
419
+ config.all_hosts.each do |host|
420
+ pub_keys[host] = ensure_dns_ssh_key(host, dns_ssh_key_path, dns_private_ips)
421
+ end
422
+
423
+ dns_ssh_hosts.each do |dns_server|
424
+ authorize_dns_keys(dns_server, dns_user, pub_keys.values.compact)
425
+ end
426
+
427
+ config.all_hosts.each do |host|
428
+ install_dns_failover_script(host, dns_config, dns_private_ips, dns_user, dns_ssh_key_path, ssh_strict_host_key)
429
+ end
430
+ end
431
+
432
+ def dns_failover_enabled?
433
+ dns_failover_config != nil
434
+ end
435
+
436
+ def dns_failover_config
437
+ repmgr_config = config.component_config(:repmgr)
438
+ dns_config = repmgr_config[:dns_failover]
439
+ return nil unless dns_config && dns_config[:enabled]
440
+
441
+ dns_config
442
+ end
443
+
444
+ def normalize_dns_servers(raw_servers)
445
+ Array(raw_servers).map do |server|
446
+ if server.is_a?(Hash)
447
+ ssh_host = server[:ssh_host] || server['ssh_host'] || server[:host] || server['host']
448
+ private_ip = server[:private_ip] || server['private_ip'] || server[:ip] || server['ip']
449
+ private_ip ||= ssh_host
450
+ ssh_host ||= private_ip
451
+ { ssh_host: ssh_host.to_s, private_ip: private_ip.to_s }
452
+ else
453
+ value = server.to_s
454
+ { ssh_host: value, private_ip: value }
455
+ end
456
+ end
457
+ end
458
+
459
+ def ensure_dns_ssh_key(host, key_path, dns_servers)
460
+ postgres_user = config.postgres_user
461
+ public_key = nil
462
+
463
+ ssh_executor.execute_on_host(host) do
464
+ execute :sudo, 'mkdir', '-p', '/var/lib/postgresql/.ssh'
465
+ execute :sudo, 'chown', "#{postgres_user}:#{postgres_user}", '/var/lib/postgresql/.ssh'
466
+ execute :sudo, 'chmod', '700', '/var/lib/postgresql/.ssh'
467
+
468
+ unless test(:sudo, '-u', postgres_user, 'test', '-f', key_path)
469
+ execute :sudo, '-u', postgres_user, "ssh-keygen -t ed25519 -N '' -f #{key_path}"
470
+ end
471
+
472
+ public_key = capture(:sudo, '-u', postgres_user, 'cat', "#{key_path}.pub").strip
473
+
474
+ unless dns_servers.empty?
475
+ execute :sudo, '-u', postgres_user, 'touch', '/var/lib/postgresql/.ssh/known_hosts'
476
+ scan_cmd = "ssh-keyscan -H #{dns_servers.join(' ')} >> /var/lib/postgresql/.ssh/known_hosts 2>/dev/null || true"
477
+ execute :sudo, '-u', postgres_user, 'bash', '-c', scan_cmd
478
+ execute :sudo, '-u', postgres_user, 'chmod', '600', '/var/lib/postgresql/.ssh/known_hosts'
479
+ end
480
+ end
481
+
482
+ public_key
483
+ end
484
+
485
+ def authorize_dns_keys(dns_server, dns_user, keys)
486
+ return if keys.empty?
487
+
488
+ ssh_executor.execute_on_host_as(dns_server, dns_user) do
489
+ execute :mkdir, '-p', '~/.ssh'
490
+ execute :chmod, '700', '~/.ssh'
491
+ execute :touch, '~/.ssh/authorized_keys'
492
+ execute :chmod, '600', '~/.ssh/authorized_keys'
493
+
494
+ keys.each do |key|
495
+ next if key.to_s.empty?
496
+
497
+ upload! StringIO.new("#{key}\n"), '/tmp/active_postgres_dns_key.pub'
498
+ execute :bash, '-c',
499
+ "grep -qxF -f /tmp/active_postgres_dns_key.pub ~/.ssh/authorized_keys || " \
500
+ "cat /tmp/active_postgres_dns_key.pub >> ~/.ssh/authorized_keys"
501
+ execute :rm, '-f', '/tmp/active_postgres_dns_key.pub'
502
+ end
503
+ end
504
+ rescue Net::SSH::ConnectionTimeout, Net::SSH::AuthenticationFailed => e
505
+ raise Error,
506
+ "Failed to SSH to DNS server #{dns_server} as #{dns_user}. " \
507
+ 'If you run setup outside the mesh, use dns_failover.dns_servers entries ' \
508
+ 'with host/private_ip or run setup from a mesh node. ' \
509
+ "(#{e.class})"
510
+ end
511
+
512
+ def install_dns_failover_script(host, dns_config, dns_servers, dns_user, dns_ssh_key_path, ssh_strict_host_key)
513
+ return if dns_servers.empty?
514
+
515
+ domain = dns_config[:domain] || 'mesh'
516
+ primary_record = dns_config[:primary_record] || "db-primary.#{domain}"
517
+ replica_record = dns_config[:replica_record] || "db-replica.#{domain}"
518
+
519
+ _ = primary_record
520
+ _ = replica_record
521
+ _ = dns_servers
522
+ _ = dns_user
523
+ _ = dns_ssh_key_path
524
+ _ = ssh_strict_host_key
525
+
526
+ upload_template(host, 'repmgr_dns_failover.sh.erb', '/usr/local/bin/active-postgres-dns-failover', binding,
527
+ mode: '755', owner: 'root:root')
528
+ end
529
+
530
+ def normalize_dns_host_key_verification(value)
531
+ normalized = case value
532
+ when Symbol
533
+ value
534
+ else
535
+ value.to_s.strip.downcase.tr('-', '_').to_sym
536
+ end
537
+
538
+ return 'accept-new' if normalized == :accept_new
539
+ return 'no' if normalized == :never
540
+
541
+ 'yes'
360
542
  end
361
543
 
362
544
  def register_standby_with_primary(standby_host)
@@ -374,6 +556,7 @@ module ActivePostgres
374
556
  info 'Setting primary_conninfo with application_name...'
375
557
  upload! StringIO.new(sql_content), temp_sql
376
558
  execute :sudo, 'chown', "#{postgres_user}:#{postgres_user}", temp_sql
559
+ execute :sudo, 'chmod', '600', temp_sql
377
560
 
378
561
  begin
379
562
  execute :sudo, '-u', postgres_user, 'psql', '-f', temp_sql
@@ -402,6 +585,8 @@ module ActivePostgres
402
585
  standby_hosts = config.standby_hosts
403
586
  version = config.version
404
587
  postgres_user = config.postgres_user
588
+ repmgr_db = config.repmgr_database
589
+ executor = ssh_executor
405
590
  all_healthy = true
406
591
 
407
592
  # Check primary
@@ -424,24 +609,35 @@ module ActivePostgres
424
609
  end
425
610
 
426
611
  # Check repmgr registration
427
- cluster_output = begin
428
- capture(:sudo, '-u', 'postgres', 'repmgr', 'cluster', 'show')
429
- rescue StandardError
430
- ''
431
- end
612
+ cluster_output = capture(:sudo, '-u', 'postgres',
613
+ 'repmgr', '-f', '/etc/repmgr.conf', 'cluster', 'show',
614
+ raise_on_non_zero_exit: false).to_s
432
615
  # Primary always has node_id=1, check if it's registered and running
433
- if cluster_output.match?(/\s+1\s+\|.*primary.*\*\s+running/)
616
+ if cluster_output.match?(/\s+1\s+\|.*primary.*\*\s+running/i)
434
617
  info 'āœ“ Primary is registered with repmgr'
435
618
  else
436
- error 'āœ— Primary is not registered with repmgr'
437
- all_healthy = false
619
+ db_check = executor.run_sql_on_backend(self,
620
+ 'SELECT type FROM repmgr.nodes WHERE node_id = 1 AND active IS TRUE;',
621
+ postgres_user: postgres_user,
622
+ database: repmgr_db,
623
+ tuples_only: true,
624
+ capture: true).to_s
625
+ if db_check.match?(/primary/i)
626
+ info 'āœ“ Primary is registered with repmgr'
627
+ else
628
+ error 'āœ— Primary is not registered with repmgr'
629
+ all_healthy = false
630
+ end
438
631
  end
439
632
 
440
633
  # Check replication slots (if standbys exist)
441
634
  if standby_hosts.any?
442
635
  begin
443
- slots_sql = 'SELECT slot_name, active FROM pg_replication_slots;'
444
- slots = capture(:sudo, '-u', postgres_user, 'psql', '-c', "'#{slots_sql}'")
636
+ slots = executor.run_sql_on_backend(self,
637
+ 'SELECT slot_name, active FROM pg_replication_slots;',
638
+ postgres_user: postgres_user,
639
+ tuples_only: false,
640
+ capture: true)
445
641
  info "Replication slots:\n#{slots}"
446
642
  rescue StandardError => e
447
643
  error "Failed to fetch replication slots: #{e.message}"
@@ -472,8 +668,11 @@ module ActivePostgres
472
668
 
473
669
  # Check replication status
474
670
  begin
475
- recovery_sql = 'SELECT pg_is_in_recovery();'
476
- rep_status = capture(:sudo, '-u', postgres_user, 'psql', '-t', '-c', "'#{recovery_sql}'")
671
+ rep_status = executor.run_sql_on_backend(self,
672
+ 'SELECT pg_is_in_recovery();',
673
+ postgres_user: postgres_user,
674
+ tuples_only: true,
675
+ capture: true).to_s
477
676
  if rep_status.include?('t')
478
677
  info 'āœ“ Standby is in recovery mode (receiving replication)'
479
678
  else
@@ -487,8 +686,11 @@ module ActivePostgres
487
686
 
488
687
  # Check lag
489
688
  begin
490
- lag_sql = 'SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::int AS lag;'
491
- lag_result = capture(:sudo, '-u', postgres_user, 'psql', '-t', '-c', "'#{lag_sql}'").strip
689
+ lag_result = executor.run_sql_on_backend(self,
690
+ 'SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::int AS lag;',
691
+ postgres_user: postgres_user,
692
+ tuples_only: true,
693
+ capture: true).to_s.strip
492
694
  info "Replication lag: #{lag_result}"
493
695
  rescue StandardError => e
494
696
  error "Failed to get replication lag: #{e.message}"
@@ -500,11 +702,9 @@ module ActivePostgres
500
702
  # Show final cluster status
501
703
  ssh_executor.execute_on_host(primary_host) do
502
704
  info 'Final cluster status:'
503
- cluster_show = begin
504
- capture(:sudo, '-u', 'postgres', 'repmgr', 'cluster', 'show')
505
- rescue StandardError
506
- 'Error'
507
- end
705
+ cluster_show = capture(:sudo, '-u', 'postgres',
706
+ 'repmgr', '-f', '/etc/repmgr.conf', 'cluster', 'show',
707
+ raise_on_non_zero_exit: false).to_s
508
708
  safe_show = LogSanitizer.sanitize(cluster_show)
509
709
  info safe_show
510
710
  end
@@ -518,10 +718,11 @@ module ActivePostgres
518
718
  all_healthy
519
719
  end
520
720
 
521
- def setup_pgpass_file(host, password, primary_ip = nil)
721
+ def setup_pgpass_file(host, repmgr_password, replication_password: nil, primary_ip: nil)
522
722
  # Create .pgpass file for postgres user to avoid password exposure in logs
523
723
  # Format: hostname:port:database:username:password
524
- pgpass_content = build_pgpass_content(host, password, primary_ip)
724
+ pgpass_content = build_pgpass_content(host, repmgr_password, replication_password: replication_password,
725
+ primary_ip: primary_ip)
525
726
 
526
727
  ssh_executor.execute_on_host(host) do
527
728
  # Create .pgpass in postgres user's home directory
@@ -534,10 +735,13 @@ module ActivePostgres
534
735
  end
535
736
  end
536
737
 
537
- def build_pgpass_content(host, password, primary_ip)
538
- escaped_password = escape_pgpass_value(password)
738
+ def build_pgpass_content(host, repmgr_password, replication_password: nil, primary_ip: nil)
739
+ escaped_password = escape_pgpass_value(repmgr_password)
539
740
  repmgr_user = config.repmgr_user
540
741
  repmgr_db = config.repmgr_database
742
+ replication_user = config.replication_user
743
+ replication_password ||= secrets.resolve('replication_password')
744
+ replication_password = normalize_replication_password(replication_password) if replication_password
541
745
 
542
746
  entries = [
543
747
  "localhost:5432:#{repmgr_db}:#{repmgr_user}:#{escaped_password}",
@@ -557,7 +761,31 @@ module ActivePostgres
557
761
  entries << "#{local_replication_host}:5432:*:#{repmgr_user}:#{escaped_password}"
558
762
  end
559
763
 
560
- "#{entries.join("\n")}\n"
764
+ # Allow repmgr to connect to all nodes for cluster status checks
765
+ config.all_hosts.each do |node|
766
+ replication_host = config.replication_host_for(node)
767
+ next if replication_host.nil? || %w[localhost 127.0.0.1].include?(replication_host)
768
+
769
+ entries << "#{replication_host}:5432:#{repmgr_db}:#{repmgr_user}:#{escaped_password}"
770
+ entries << "#{replication_host}:5432:*:#{repmgr_user}:#{escaped_password}"
771
+ end
772
+
773
+ if replication_password && !replication_password.empty?
774
+ escaped_replication_password = escape_pgpass_value(replication_password)
775
+ entries << "localhost:5432:replication:#{replication_user}:#{escaped_replication_password}"
776
+ entries << "127.0.0.1:5432:replication:#{replication_user}:#{escaped_replication_password}"
777
+ entries << "localhost:5432:*:#{replication_user}:#{escaped_replication_password}"
778
+ entries << "127.0.0.1:5432:*:#{replication_user}:#{escaped_replication_password}"
779
+ if primary_ip
780
+ entries << "#{primary_ip}:5432:replication:#{replication_user}:#{escaped_replication_password}"
781
+ entries << "#{primary_ip}:5432:*:#{replication_user}:#{escaped_replication_password}"
782
+ end
783
+ if local_replication_host && !%w[localhost 127.0.0.1].include?(local_replication_host)
784
+ entries << "#{local_replication_host}:5432:replication:#{replication_user}:#{escaped_replication_password}"
785
+ entries << "#{local_replication_host}:5432:*:#{replication_user}:#{escaped_replication_password}"
786
+ end
787
+ end
788
+ "#{entries.uniq.join("\n")}\n"
561
789
  end
562
790
 
563
791
  def escape_pgpass_value(value)
@@ -571,7 +799,13 @@ module ActivePostgres
571
799
  primary_host = config.primary_replication_host
572
800
  repmgr_user = config.repmgr_user
573
801
  repmgr_db = config.repmgr_database
574
- "host=#{primary_host} user=#{repmgr_user} dbname=#{repmgr_db} application_name=#{standby_label}"
802
+ replication_password = secrets.resolve('replication_password')
803
+ replication_password = normalize_replication_password(replication_password) if replication_password
804
+ replication_user = config.replication_user
805
+
806
+ user = replication_password && !replication_password.empty? ? replication_user : repmgr_user
807
+ dbname = replication_password && !replication_password.empty? ? 'replication' : repmgr_db
808
+ "host=#{primary_host} user=#{user} dbname=#{dbname} application_name=#{standby_label}"
575
809
  end
576
810
 
577
811
  def normalize_repmgr_password(raw_password)
@@ -582,6 +816,14 @@ module ActivePostgres
582
816
  password
583
817
  end
584
818
 
819
+ def normalize_replication_password(raw_password)
820
+ password = raw_password.to_s.rstrip
821
+
822
+ raise 'replication_password secret is missing' if password.empty?
823
+
824
+ password
825
+ end
826
+
585
827
  def reload_postgres_cluster
586
828
  execute :sudo, 'pg_ctlcluster', config.version.to_s, 'main', 'reload'
587
829
  rescue StandardError => e
@@ -652,6 +894,133 @@ module ActivePostgres
652
894
  end
653
895
  end
654
896
  end
897
+
898
+ def ensure_ssl_certs(host, version, force: false)
899
+ ssl_cert = secrets.resolve('ssl_cert')
900
+ ssl_key = secrets.resolve('ssl_key')
901
+
902
+ return regenerate_ssl_certs(host, version) if force
903
+
904
+ if ssl_cert && ssl_key
905
+ regenerate_ssl_certs(host, version)
906
+ return
907
+ end
908
+
909
+ cert_path = "/etc/postgresql/#{version}/main/server.crt"
910
+ key_path = "/etc/postgresql/#{version}/main/server.key"
911
+
912
+ cert_exists = false
913
+ key_exists = false
914
+
915
+ ssh_executor.execute_on_host(host) do
916
+ cert_exists = test(:sudo, 'test', '-f', cert_path)
917
+ key_exists = test(:sudo, 'test', '-f', key_path)
918
+ end
919
+
920
+ regenerate_ssl_certs(host, version) unless cert_exists && key_exists
921
+ end
922
+
923
+ def cluster_exists?(host, version)
924
+ exists = false
925
+ ssh_executor.execute_on_host(host) do
926
+ exists = test(:sudo, 'test', '-d', "/var/lib/postgresql/#{version}/main/base")
927
+ end
928
+ exists
929
+ rescue StandardError
930
+ false
931
+ end
932
+
933
+ def standby_already_configured?(host)
934
+ version = config.version
935
+ postgres_user = config.postgres_user
936
+ configured = false
937
+
938
+ ssh_executor.execute_on_host(host) do
939
+ data_dir = test(:sudo, 'test', '-d', "/var/lib/postgresql/#{version}/main/base")
940
+ repmgr_conf = test(:sudo, 'test', '-f', '/etc/repmgr.conf')
941
+ clusters = begin
942
+ capture(:sudo, 'pg_lsclusters', '-h')
943
+ rescue StandardError
944
+ ''
945
+ end
946
+ online = clusters.lines.any? { |line| line.include?(version.to_s) && line.include?('main') && line.include?('online') }
947
+ in_recovery = begin
948
+ capture(:sudo, '-u', postgres_user, 'psql', '-tA', '-c', '"SELECT pg_is_in_recovery();"').strip == 't'
949
+ rescue StandardError
950
+ false
951
+ end
952
+
953
+ configured = data_dir && repmgr_conf && online && in_recovery
954
+ end
955
+
956
+ configured
957
+ rescue StandardError
958
+ false
959
+ end
960
+
961
+ def update_postgres_configs_on_standby(host, version)
962
+ core_config = config.component_config(:core)
963
+ component_config = core_config
964
+ pg_config = component_config[:postgresql] || {}
965
+ private_ip = config.replication_host_for(host)
966
+ pg_config = substitute_private_ip(pg_config, private_ip)
967
+ _ = pg_config
968
+
969
+ upload_template(host, 'postgresql.conf.erb', "/etc/postgresql/#{version}/main/postgresql.conf",
970
+ binding, owner: 'postgres:postgres')
971
+ upload_template(host, 'pg_hba.conf.erb', "/etc/postgresql/#{version}/main/pg_hba.conf",
972
+ binding, owner: 'postgres:postgres')
973
+
974
+ ssh_executor.restart_postgres(host, version)
975
+ end
976
+
977
+ def enable_repmgrd_if_configured(host, repmgr_config)
978
+ return if repmgr_config[:auto_failover] == false
979
+
980
+ ssh_executor.execute_on_host(host) do
981
+ begin
982
+ execute :sudo, 'systemctl', 'enable', 'repmgrd'
983
+ execute :sudo, 'systemctl', 'restart', 'repmgrd'
984
+ rescue StandardError
985
+ nil
986
+ end
987
+ end
988
+ end
989
+
990
+ def build_repmgr_setup_sql(repmgr_user, repmgr_db, repmgr_password)
991
+ escaped_password = repmgr_password.gsub("'", "''")
992
+
993
+ [
994
+ 'DO $$',
995
+ 'BEGIN',
996
+ " IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = '#{repmgr_user}') THEN",
997
+ " CREATE USER #{repmgr_user} WITH SUPERUSER PASSWORD '#{escaped_password}';",
998
+ ' ELSE',
999
+ " ALTER USER #{repmgr_user} WITH SUPERUSER PASSWORD '#{escaped_password}';",
1000
+ ' END IF;',
1001
+ 'END $$;',
1002
+ '',
1003
+ "SELECT 'CREATE DATABASE #{repmgr_db} OWNER #{repmgr_user}'",
1004
+ "WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = '#{repmgr_db}')\\gexec",
1005
+ ''
1006
+ ].join("\n")
1007
+ end
1008
+
1009
+ def build_replication_user_sql(replication_user, replication_password)
1010
+ escaped_password = replication_password.gsub("'", "''")
1011
+
1012
+ [
1013
+ 'DO $$',
1014
+ 'BEGIN',
1015
+ " IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = '#{replication_user}') THEN",
1016
+ " CREATE USER #{replication_user} WITH REPLICATION LOGIN PASSWORD '#{escaped_password}';",
1017
+ ' ELSE',
1018
+ " ALTER USER #{replication_user} WITH REPLICATION LOGIN PASSWORD '#{escaped_password}';",
1019
+ ' END IF;',
1020
+ 'END $$;',
1021
+ ''
1022
+ ].join("\n")
1023
+ end
655
1024
  end
656
1025
  end
657
1026
  end