active_postgres 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +23 -0
- data/README.md +158 -0
- data/exe/activepostgres +5 -0
- data/lib/active_postgres/cli.rb +157 -0
- data/lib/active_postgres/cluster_deployment_flow.rb +85 -0
- data/lib/active_postgres/component_resolver.rb +24 -0
- data/lib/active_postgres/components/base.rb +38 -0
- data/lib/active_postgres/components/core.rb +158 -0
- data/lib/active_postgres/components/extensions.rb +99 -0
- data/lib/active_postgres/components/monitoring.rb +55 -0
- data/lib/active_postgres/components/pgbackrest.rb +94 -0
- data/lib/active_postgres/components/pgbouncer.rb +137 -0
- data/lib/active_postgres/components/repmgr.rb +651 -0
- data/lib/active_postgres/components/ssl.rb +86 -0
- data/lib/active_postgres/configuration.rb +190 -0
- data/lib/active_postgres/connection_pooler.rb +429 -0
- data/lib/active_postgres/credentials.rb +17 -0
- data/lib/active_postgres/deployment_flow.rb +154 -0
- data/lib/active_postgres/error_handler.rb +185 -0
- data/lib/active_postgres/failover.rb +83 -0
- data/lib/active_postgres/generators/active_postgres/install_generator.rb +186 -0
- data/lib/active_postgres/health_checker.rb +244 -0
- data/lib/active_postgres/installer.rb +114 -0
- data/lib/active_postgres/log_sanitizer.rb +67 -0
- data/lib/active_postgres/logger.rb +125 -0
- data/lib/active_postgres/performance_tuner.rb +246 -0
- data/lib/active_postgres/rails/database_config.rb +174 -0
- data/lib/active_postgres/rails/migration_guard.rb +25 -0
- data/lib/active_postgres/railtie.rb +28 -0
- data/lib/active_postgres/retry_helper.rb +80 -0
- data/lib/active_postgres/rollback_manager.rb +140 -0
- data/lib/active_postgres/secrets.rb +86 -0
- data/lib/active_postgres/ssh_executor.rb +288 -0
- data/lib/active_postgres/standby_deployment_flow.rb +122 -0
- data/lib/active_postgres/validator.rb +143 -0
- data/lib/active_postgres/version.rb +3 -0
- data/lib/active_postgres.rb +67 -0
- data/lib/tasks/postgres.rake +855 -0
- data/lib/tasks/rolling_update.rake +258 -0
- data/lib/tasks/rotate_credentials.rake +193 -0
- data/templates/pg_hba.conf.erb +47 -0
- data/templates/pgbackrest.conf.erb +43 -0
- data/templates/pgbouncer.ini.erb +55 -0
- data/templates/postgresql.conf.erb +157 -0
- data/templates/repmgr.conf.erb +40 -0
- metadata +224 -0
|
@@ -0,0 +1,651 @@
|
|
|
1
|
+
module ActivePostgres
|
|
2
|
+
module Components
|
|
3
|
+
class Repmgr < Base
|
|
4
|
+
def install
|
|
5
|
+
puts 'Installing repmgr for High Availability...'
|
|
6
|
+
|
|
7
|
+
install_repmgr_package
|
|
8
|
+
setup_primary
|
|
9
|
+
|
|
10
|
+
config.standby_hosts.each do |host|
|
|
11
|
+
setup_standby(host)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Verify the entire cluster is healthy after setup
|
|
15
|
+
puts "\nš„ Performing final health check..."
|
|
16
|
+
verify_cluster_health
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def uninstall
|
|
20
|
+
puts 'Uninstalling repmgr...'
|
|
21
|
+
|
|
22
|
+
config.all_hosts.each do |host|
|
|
23
|
+
is_primary = host == config.primary_host
|
|
24
|
+
node_id = if is_primary
|
|
25
|
+
1
|
|
26
|
+
else
|
|
27
|
+
begin
|
|
28
|
+
config.standby_hosts.index(host) + 2
|
|
29
|
+
rescue StandardError
|
|
30
|
+
999
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
ssh_executor.execute_on_host(host) do
|
|
35
|
+
cluster_output = begin
|
|
36
|
+
capture(:sudo, '-u', 'postgres', 'repmgr', 'cluster', 'show')
|
|
37
|
+
rescue StandardError
|
|
38
|
+
''
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
if cluster_output.include?("| #{node_id}") && cluster_output.include?('running')
|
|
42
|
+
info "ā Skipping repmgr removal from #{host} - node #{node_id} is running"
|
|
43
|
+
else
|
|
44
|
+
info "Removing repmgr from #{host}"
|
|
45
|
+
execute :sudo, 'apt-get', 'remove', '-y', '-q', 'postgresql-*-repmgr', '||', 'true'
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def restart
|
|
52
|
+
puts 'Restarting repmgr...'
|
|
53
|
+
|
|
54
|
+
config.all_hosts.each do |host|
|
|
55
|
+
ssh_executor.execute_on_host(host) do
|
|
56
|
+
execute :sudo, 'systemctl', 'restart', 'repmgrd' if test('systemctl is-active repmgrd')
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def setup_standby_only(standby_host)
|
|
62
|
+
puts "Setting up standby #{standby_host} (primary will not be touched)..."
|
|
63
|
+
|
|
64
|
+
version = config.version
|
|
65
|
+
ssh_executor.execute_on_host(standby_host) do
|
|
66
|
+
execute :sudo, 'DEBIAN_FRONTEND=noninteractive', 'apt-get', 'install', '-y', '-qq',
|
|
67
|
+
"postgresql-#{version}-repmgr"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
setup_standby(standby_host)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
def install_repmgr_package
|
|
76
|
+
puts ' Installing repmgr package...'
|
|
77
|
+
|
|
78
|
+
version = config.version
|
|
79
|
+
config.all_hosts.each do |host|
|
|
80
|
+
ssh_executor.execute_on_host(host) do
|
|
81
|
+
execute :sudo, 'DEBIAN_FRONTEND=noninteractive', 'apt-get', 'install', '-y', '-qq',
|
|
82
|
+
"postgresql-#{version}-repmgr"
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def setup_primary
|
|
88
|
+
puts ' Setting up primary with repmgr...'
|
|
89
|
+
|
|
90
|
+
host = config.primary_host
|
|
91
|
+
repmgr_config = config.component_config(:repmgr)
|
|
92
|
+
version = config.version
|
|
93
|
+
repmgr_password = normalize_repmgr_password(secrets.resolve('repmgr_password'))
|
|
94
|
+
secrets_obj = secrets
|
|
95
|
+
repmgr_user = config.repmgr_user
|
|
96
|
+
repmgr_db = config.repmgr_database
|
|
97
|
+
|
|
98
|
+
# Variables used in ERB templates via binding
|
|
99
|
+
_ = repmgr_config
|
|
100
|
+
_ = secrets_obj
|
|
101
|
+
|
|
102
|
+
ssh_executor.recreate_cluster(host, version)
|
|
103
|
+
|
|
104
|
+
puts ' Configuring PostgreSQL...'
|
|
105
|
+
core_config = config.component_config(:core)
|
|
106
|
+
component_config = core_config
|
|
107
|
+
|
|
108
|
+
# Performance tuning is handled by the Core component
|
|
109
|
+
pg_config = component_config[:postgresql] || {}
|
|
110
|
+
_ = pg_config # Used in ERB template
|
|
111
|
+
|
|
112
|
+
upload_template(host, 'postgresql.conf.erb', "/etc/postgresql/#{version}/main/postgresql.conf", binding,
|
|
113
|
+
owner: 'postgres:postgres')
|
|
114
|
+
upload_template(host, 'pg_hba.conf.erb', "/etc/postgresql/#{version}/main/pg_hba.conf", binding,
|
|
115
|
+
owner: 'postgres:postgres')
|
|
116
|
+
|
|
117
|
+
# Regenerate SSL certificates if SSL is enabled (cluster recreation deleted them)
|
|
118
|
+
if config.component_enabled?(:ssl)
|
|
119
|
+
puts ' Regenerating SSL certificates...'
|
|
120
|
+
regenerate_ssl_certs(host, version)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
ssh_executor.execute_on_host(host) do
|
|
124
|
+
execute :sudo, 'pg_ctlcluster', version.to_s, 'main', 'restart'
|
|
125
|
+
|
|
126
|
+
info 'Waiting for PostgreSQL to start...'
|
|
127
|
+
sleep 3
|
|
128
|
+
|
|
129
|
+
# Check if cluster is running
|
|
130
|
+
cluster_status = begin
|
|
131
|
+
capture(:sudo, 'pg_lsclusters', '-h')
|
|
132
|
+
rescue StandardError
|
|
133
|
+
''
|
|
134
|
+
end
|
|
135
|
+
running = cluster_status.lines.any? do |line|
|
|
136
|
+
line.include?(version.to_s) && line.include?('main') && line.include?('online')
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
unless running
|
|
140
|
+
error "PostgreSQL cluster #{version}/main is not running!"
|
|
141
|
+
raise 'PostgreSQL service is not running'
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
info 'Verifying PostgreSQL is accepting connections...'
|
|
145
|
+
execute :sudo, '-u', 'postgres', 'psql', '-l'
|
|
146
|
+
|
|
147
|
+
info 'Creating repmgr database and user...'
|
|
148
|
+
escaped_password = repmgr_password.gsub("'", "''")
|
|
149
|
+
sql = [
|
|
150
|
+
"DROP DATABASE IF EXISTS #{repmgr_db}",
|
|
151
|
+
"DROP USER IF EXISTS #{repmgr_user}",
|
|
152
|
+
"CREATE USER #{repmgr_user} WITH SUPERUSER PASSWORD '#{escaped_password}'",
|
|
153
|
+
"CREATE DATABASE #{repmgr_db} OWNER #{repmgr_user}",
|
|
154
|
+
'' # Ensures trailing semicolon after join
|
|
155
|
+
].join('; ')
|
|
156
|
+
upload! StringIO.new(sql), '/tmp/setup_repmgr.sql'
|
|
157
|
+
execute :chmod, '644', '/tmp/setup_repmgr.sql'
|
|
158
|
+
execute :sudo, '-u', 'postgres', 'psql', '-p', '5432', '-f', '/tmp/setup_repmgr.sql'
|
|
159
|
+
execute :rm, '-f', '/tmp/setup_repmgr.sql'
|
|
160
|
+
|
|
161
|
+
info 'Reloading PostgreSQL configuration to apply pg_hba.conf changes...'
|
|
162
|
+
execute :sudo, 'pg_ctlcluster', version.to_s, 'main', 'reload'
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
setup_pgpass_file(host, repmgr_password)
|
|
166
|
+
|
|
167
|
+
upload_template(host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '644', owner: 'postgres:postgres')
|
|
168
|
+
|
|
169
|
+
ssh_executor.execute_on_host(host) do
|
|
170
|
+
info 'Registering primary with repmgr...'
|
|
171
|
+
execute :sudo, '-u', 'postgres', 'env',
|
|
172
|
+
'HOME=/var/lib/postgresql',
|
|
173
|
+
'repmgr', 'primary', 'register',
|
|
174
|
+
'-f', '/etc/repmgr.conf', '--force'
|
|
175
|
+
|
|
176
|
+
# Verify registration succeeded
|
|
177
|
+
sleep 2
|
|
178
|
+
cluster_show = begin
|
|
179
|
+
capture(:sudo, '-u', 'postgres', 'repmgr', 'cluster', 'show', '-f',
|
|
180
|
+
'/etc/repmgr.conf')
|
|
181
|
+
rescue StandardError
|
|
182
|
+
'Could not show cluster'
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
unless cluster_show.include?('primary') && cluster_show.include?('running')
|
|
186
|
+
error 'ā Primary registration verification failed'
|
|
187
|
+
raise 'Primary registration failed'
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
info 'ā Primary successfully registered with repmgr!'
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def setup_standby(standby_host)
|
|
195
|
+
puts " Setting up standby: #{standby_host}..."
|
|
196
|
+
|
|
197
|
+
host = standby_host
|
|
198
|
+
repmgr_config = config.component_config(:repmgr)
|
|
199
|
+
primary_replication_host = config.primary_replication_host
|
|
200
|
+
version = config.version
|
|
201
|
+
secrets_obj = secrets
|
|
202
|
+
repmgr_user = config.repmgr_user
|
|
203
|
+
repmgr_db = config.repmgr_database
|
|
204
|
+
postgres_user = config.postgres_user
|
|
205
|
+
|
|
206
|
+
# Variables used in ERB templates via binding
|
|
207
|
+
_ = host
|
|
208
|
+
_ = repmgr_config
|
|
209
|
+
_ = secrets_obj
|
|
210
|
+
|
|
211
|
+
node_id = config.standby_hosts.index(standby_host) + 2
|
|
212
|
+
repmgr_password = normalize_repmgr_password(secrets_obj.resolve('repmgr_password'))
|
|
213
|
+
|
|
214
|
+
ensure_primary_registered
|
|
215
|
+
|
|
216
|
+
setup_pgpass_file(standby_host, repmgr_password, primary_replication_host)
|
|
217
|
+
|
|
218
|
+
ssh_executor.execute_on_host(standby_host) do
|
|
219
|
+
info 'Preparing standby cluster...'
|
|
220
|
+
begin
|
|
221
|
+
execute :sudo, 'systemctl', 'stop', 'postgresql'
|
|
222
|
+
rescue StandardError
|
|
223
|
+
nil
|
|
224
|
+
end
|
|
225
|
+
begin
|
|
226
|
+
execute :sudo, 'pg_dropcluster', '--stop', version.to_s, 'main'
|
|
227
|
+
rescue StandardError
|
|
228
|
+
nil
|
|
229
|
+
end
|
|
230
|
+
begin
|
|
231
|
+
execute :sudo, 'rm', '-rf', "/etc/postgresql/#{version}/main"
|
|
232
|
+
rescue StandardError
|
|
233
|
+
nil
|
|
234
|
+
end
|
|
235
|
+
begin
|
|
236
|
+
execute :sudo, 'rm', '-rf', "/var/lib/postgresql/#{version}/main"
|
|
237
|
+
rescue StandardError
|
|
238
|
+
nil
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
info 'Cloning from primary over private network...'
|
|
242
|
+
# Create a temporary repmgr config for cloning that points to the primary
|
|
243
|
+
# The regular config points to localhost which doesn't work during initial clone
|
|
244
|
+
escaped_password = repmgr_password.gsub("'", "\\\\'")
|
|
245
|
+
|
|
246
|
+
temp_repmgr_conf = <<~CONF
|
|
247
|
+
node_id=#{node_id}
|
|
248
|
+
node_name='#{standby_host}'
|
|
249
|
+
conninfo='host=#{primary_replication_host} user=#{repmgr_user} dbname=#{repmgr_db} password=#{escaped_password} connect_timeout=10'
|
|
250
|
+
data_directory='/var/lib/postgresql/#{version}/main'
|
|
251
|
+
CONF
|
|
252
|
+
|
|
253
|
+
info 'Creating temporary repmgr config for cloning...'
|
|
254
|
+
upload! StringIO.new(temp_repmgr_conf), '/tmp/repmgr_clone.conf'
|
|
255
|
+
execute :sudo, 'mv', '/tmp/repmgr_clone.conf', '/etc/repmgr_clone.conf'
|
|
256
|
+
execute :sudo, 'chown', 'postgres:postgres', '/etc/repmgr_clone.conf'
|
|
257
|
+
execute :sudo, 'chmod', '644', '/etc/repmgr_clone.conf'
|
|
258
|
+
|
|
259
|
+
info 'Running: repmgr standby clone (this may take a few minutes to copy the database)...'
|
|
260
|
+
|
|
261
|
+
clone_success = false
|
|
262
|
+
begin
|
|
263
|
+
execute :sudo, '-u', postgres_user, 'env',
|
|
264
|
+
'HOME=/var/lib/postgresql',
|
|
265
|
+
'repmgr', 'standby', 'clone',
|
|
266
|
+
'-h', primary_replication_host,
|
|
267
|
+
'-U', repmgr_user,
|
|
268
|
+
'-d', repmgr_db,
|
|
269
|
+
'-f', '/etc/repmgr_clone.conf',
|
|
270
|
+
'--force',
|
|
271
|
+
'--verbose'
|
|
272
|
+
|
|
273
|
+
info 'Clone command completed'
|
|
274
|
+
|
|
275
|
+
# Check if clone actually succeeded
|
|
276
|
+
if test(:sudo, 'test', '-d', "/var/lib/postgresql/#{version}/main")
|
|
277
|
+
clone_success = true
|
|
278
|
+
info 'Data directory successfully cloned!'
|
|
279
|
+
|
|
280
|
+
# Clean up temporary clone config
|
|
281
|
+
execute :sudo, 'rm', '-f', '/etc/repmgr_clone.conf'
|
|
282
|
+
else
|
|
283
|
+
error "Data directory /var/lib/postgresql/#{version}/main was not created even though command succeeded!"
|
|
284
|
+
end
|
|
285
|
+
rescue SSHKit::Command::Failed => e
|
|
286
|
+
error "Clone command failed with exit code #{e.message}"
|
|
287
|
+
rescue StandardError => e
|
|
288
|
+
error "Clone command raised exception: #{e.class}: #{e.message}"
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# If clone failed, raise error
|
|
292
|
+
unless clone_success
|
|
293
|
+
error 'ā repmgr standby clone failed'
|
|
294
|
+
raise 'repmgr standby clone failed to create data directory'
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
# Upload the proper repmgr.conf now that clone is complete
|
|
299
|
+
puts ' Uploading final repmgr configuration...'
|
|
300
|
+
upload_template(standby_host, 'repmgr.conf.erb', '/etc/repmgr.conf', binding, mode: '644',
|
|
301
|
+
owner: 'postgres:postgres')
|
|
302
|
+
|
|
303
|
+
# Create config directory and setup PostgreSQL configuration
|
|
304
|
+
puts ' Setting up PostgreSQL configuration on standby...'
|
|
305
|
+
core_config = config.component_config(:core)
|
|
306
|
+
component_config = core_config
|
|
307
|
+
|
|
308
|
+
# Performance tuning is handled by the Core component
|
|
309
|
+
pg_config = component_config[:postgresql] || {}
|
|
310
|
+
_ = pg_config # Used in ERB template
|
|
311
|
+
|
|
312
|
+
ssh_executor.execute_on_host(standby_host) do
|
|
313
|
+
# Create config directory if it doesn't exist
|
|
314
|
+
execute :sudo, 'mkdir', '-p', "/etc/postgresql/#{version}/main"
|
|
315
|
+
execute :sudo, 'chown', 'postgres:postgres', "/etc/postgresql/#{version}/main"
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# Upload configuration files
|
|
319
|
+
upload_template(standby_host, 'postgresql.conf.erb', "/etc/postgresql/#{version}/main/postgresql.conf",
|
|
320
|
+
binding, owner: 'postgres:postgres')
|
|
321
|
+
upload_template(standby_host, 'pg_hba.conf.erb', "/etc/postgresql/#{version}/main/pg_hba.conf", binding,
|
|
322
|
+
owner: 'postgres:postgres')
|
|
323
|
+
|
|
324
|
+
# Regenerate SSL certificates if SSL is enabled (clone doesn't copy config files)
|
|
325
|
+
if config.component_enabled?(:ssl)
|
|
326
|
+
puts ' Regenerating SSL certificates on standby...'
|
|
327
|
+
regenerate_ssl_certs(standby_host, version)
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
ssh_executor.execute_on_host(standby_host) do
|
|
331
|
+
info 'Starting PostgreSQL cluster...'
|
|
332
|
+
execute :sudo, 'pg_ctlcluster', version.to_s, 'main', 'start'
|
|
333
|
+
|
|
334
|
+
# Wait for PostgreSQL to be ready
|
|
335
|
+
max_attempts = 12
|
|
336
|
+
attempt = 0
|
|
337
|
+
pg_ready = false
|
|
338
|
+
|
|
339
|
+
while attempt < max_attempts && !pg_ready
|
|
340
|
+
attempt += 1
|
|
341
|
+
sleep 3
|
|
342
|
+
pg_ready = test(:sudo, '-u', 'postgres', 'pg_isready', '-h', '127.0.0.1', '-p', '5432')
|
|
343
|
+
|
|
344
|
+
if !pg_ready && attempt == max_attempts
|
|
345
|
+
error 'ā PostgreSQL failed to start on standby'
|
|
346
|
+
raise 'PostgreSQL failed to start on standby'
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
info 'ā PostgreSQL is ready!'
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
register_standby_with_primary(standby_host)
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
def register_standby_with_primary(standby_host)
|
|
357
|
+
standby_config = config.standby_config_for(standby_host)
|
|
358
|
+
standby_label = standby_config&.dig('label') || "standby-#{standby_host.split('.').first}"
|
|
359
|
+
|
|
360
|
+
primary_conninfo = build_primary_conninfo(standby_label)
|
|
361
|
+
escaped_conninfo = primary_conninfo.gsub("'", "''")
|
|
362
|
+
sql_content = "ALTER SYSTEM SET primary_conninfo = '#{escaped_conninfo}';\n"
|
|
363
|
+
temp_sql = '/tmp/set_primary_conninfo.sql'
|
|
364
|
+
version = config.version
|
|
365
|
+
postgres_user = config.postgres_user
|
|
366
|
+
|
|
367
|
+
ssh_executor.execute_on_host(standby_host) do
|
|
368
|
+
info 'Setting primary_conninfo with application_name...'
|
|
369
|
+
upload! StringIO.new(sql_content), temp_sql
|
|
370
|
+
execute :sudo, 'chown', "#{postgres_user}:#{postgres_user}", temp_sql
|
|
371
|
+
|
|
372
|
+
begin
|
|
373
|
+
execute :sudo, '-u', postgres_user, 'psql', '-f', temp_sql
|
|
374
|
+
ensure
|
|
375
|
+
execute :sudo, 'rm', '-f', temp_sql
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
execute :sudo, 'pg_ctlcluster', version.to_s, 'main', 'reload'
|
|
379
|
+
|
|
380
|
+
info 'Registering standby with repmgr...'
|
|
381
|
+
|
|
382
|
+
execute :sudo, '-u', postgres_user, 'env',
|
|
383
|
+
'HOME=/var/lib/postgresql',
|
|
384
|
+
'repmgr', '-f', '/etc/repmgr.conf',
|
|
385
|
+
'standby', 'register',
|
|
386
|
+
'--force'
|
|
387
|
+
|
|
388
|
+
info 'ā Standby successfully registered!'
|
|
389
|
+
end
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
def verify_cluster_health
|
|
393
|
+
puts 'Verifying PostgreSQL HA cluster health...'
|
|
394
|
+
|
|
395
|
+
primary_host = config.primary_host
|
|
396
|
+
standby_hosts = config.standby_hosts
|
|
397
|
+
version = config.version
|
|
398
|
+
postgres_user = config.postgres_user
|
|
399
|
+
all_healthy = true
|
|
400
|
+
|
|
401
|
+
# Check primary
|
|
402
|
+
ssh_executor.execute_on_host(primary_host) do
|
|
403
|
+
info "Checking primary node #{primary_host}..."
|
|
404
|
+
|
|
405
|
+
# Check PostgreSQL is running
|
|
406
|
+
begin
|
|
407
|
+
clusters = capture(:sudo, 'pg_lsclusters')
|
|
408
|
+
rescue StandardError => e
|
|
409
|
+
clusters = ''
|
|
410
|
+
error "Failed to check clusters on primary: #{e.message}"
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
if clusters.match?(/#{version}.*main.*online/)
|
|
414
|
+
info 'ā PostgreSQL is running on primary'
|
|
415
|
+
else
|
|
416
|
+
error 'ā PostgreSQL is not running on primary'
|
|
417
|
+
all_healthy = false
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
# Check repmgr registration
|
|
421
|
+
cluster_output = begin
|
|
422
|
+
capture(:sudo, '-u', 'postgres', 'repmgr', 'cluster', 'show')
|
|
423
|
+
rescue StandardError
|
|
424
|
+
''
|
|
425
|
+
end
|
|
426
|
+
# Primary always has node_id=1, check if it's registered and running
|
|
427
|
+
if cluster_output.match?(/\s+1\s+\|.*primary.*\*\s+running/)
|
|
428
|
+
info 'ā Primary is registered with repmgr'
|
|
429
|
+
else
|
|
430
|
+
error 'ā Primary is not registered with repmgr'
|
|
431
|
+
all_healthy = false
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
# Check replication slots (if standbys exist)
|
|
435
|
+
if standby_hosts.any?
|
|
436
|
+
begin
|
|
437
|
+
slots_sql = 'SELECT slot_name, active FROM pg_replication_slots;'
|
|
438
|
+
slots = capture(:sudo, '-u', postgres_user, 'psql', '-c', "'#{slots_sql}'")
|
|
439
|
+
info "Replication slots:\n#{slots}"
|
|
440
|
+
rescue StandardError => e
|
|
441
|
+
error "Failed to fetch replication slots: #{e.message}"
|
|
442
|
+
all_healthy = false
|
|
443
|
+
end
|
|
444
|
+
end
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
# Check standbys
|
|
448
|
+
standby_hosts.each do |standby_host|
|
|
449
|
+
ssh_executor.execute_on_host(standby_host) do
|
|
450
|
+
info "Checking standby node #{standby_host}..."
|
|
451
|
+
|
|
452
|
+
# Check PostgreSQL is running
|
|
453
|
+
begin
|
|
454
|
+
clusters = capture(:sudo, 'pg_lsclusters')
|
|
455
|
+
rescue StandardError => e
|
|
456
|
+
clusters = ''
|
|
457
|
+
error "Failed to check clusters on #{standby_host}: #{e.message}"
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
if clusters.match?(/#{version}.*main.*online/)
|
|
461
|
+
info 'ā PostgreSQL is running on standby'
|
|
462
|
+
else
|
|
463
|
+
error 'ā PostgreSQL is not running on standby'
|
|
464
|
+
all_healthy = false
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
# Check replication status
|
|
468
|
+
begin
|
|
469
|
+
recovery_sql = 'SELECT pg_is_in_recovery();'
|
|
470
|
+
rep_status = capture(:sudo, '-u', postgres_user, 'psql', '-t', '-c', "'#{recovery_sql}'")
|
|
471
|
+
if rep_status.include?('t')
|
|
472
|
+
info 'ā Standby is in recovery mode (receiving replication)'
|
|
473
|
+
else
|
|
474
|
+
error 'ā Standby is not in recovery mode'
|
|
475
|
+
all_healthy = false
|
|
476
|
+
end
|
|
477
|
+
rescue StandardError => e
|
|
478
|
+
error "Failed to check recovery status: #{e.message}"
|
|
479
|
+
all_healthy = false
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
# Check lag
|
|
483
|
+
begin
|
|
484
|
+
lag_sql = 'SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::int AS lag;'
|
|
485
|
+
lag_result = capture(:sudo, '-u', postgres_user, 'psql', '-t', '-c', "'#{lag_sql}'").strip
|
|
486
|
+
info "Replication lag: #{lag_result}"
|
|
487
|
+
rescue StandardError => e
|
|
488
|
+
error "Failed to get replication lag: #{e.message}"
|
|
489
|
+
all_healthy = false
|
|
490
|
+
end
|
|
491
|
+
end
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
# Show final cluster status
|
|
495
|
+
ssh_executor.execute_on_host(primary_host) do
|
|
496
|
+
info 'Final cluster status:'
|
|
497
|
+
cluster_show = begin
|
|
498
|
+
capture(:sudo, '-u', 'postgres', 'repmgr', 'cluster', 'show')
|
|
499
|
+
rescue StandardError
|
|
500
|
+
'Error'
|
|
501
|
+
end
|
|
502
|
+
safe_show = LogSanitizer.sanitize(cluster_show)
|
|
503
|
+
info safe_show
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
if all_healthy
|
|
507
|
+
puts 'ā
PostgreSQL HA cluster is healthy!'
|
|
508
|
+
else
|
|
509
|
+
puts 'ā ļø PostgreSQL HA cluster has issues - check the errors above'
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
all_healthy
|
|
513
|
+
end
|
|
514
|
+
|
|
515
|
+
def setup_pgpass_file(host, password, primary_ip = nil)
|
|
516
|
+
# Create .pgpass file for postgres user to avoid password exposure in logs
|
|
517
|
+
# Format: hostname:port:database:username:password
|
|
518
|
+
pgpass_content = build_pgpass_content(host, password, primary_ip)
|
|
519
|
+
|
|
520
|
+
ssh_executor.execute_on_host(host) do
|
|
521
|
+
# Create .pgpass in postgres user's home directory
|
|
522
|
+
upload! StringIO.new(pgpass_content), '/tmp/.pgpass'
|
|
523
|
+
execute :sudo, 'mv', '/tmp/.pgpass', '/var/lib/postgresql/.pgpass'
|
|
524
|
+
execute :sudo, 'chown', 'postgres:postgres', '/var/lib/postgresql/.pgpass'
|
|
525
|
+
execute :sudo, 'chmod', '600', '/var/lib/postgresql/.pgpass' # Must be 600 for security
|
|
526
|
+
|
|
527
|
+
info 'ā Configured .pgpass file for secure authentication'
|
|
528
|
+
end
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
def build_pgpass_content(host, password, primary_ip)
|
|
532
|
+
escaped_password = escape_pgpass_value(password)
|
|
533
|
+
repmgr_user = config.repmgr_user
|
|
534
|
+
repmgr_db = config.repmgr_database
|
|
535
|
+
|
|
536
|
+
entries = [
|
|
537
|
+
"localhost:5432:#{repmgr_db}:#{repmgr_user}:#{escaped_password}",
|
|
538
|
+
"127.0.0.1:5432:#{repmgr_db}:#{repmgr_user}:#{escaped_password}",
|
|
539
|
+
"localhost:5432:*:#{repmgr_user}:#{escaped_password}",
|
|
540
|
+
"127.0.0.1:5432:*:#{repmgr_user}:#{escaped_password}"
|
|
541
|
+
]
|
|
542
|
+
|
|
543
|
+
if primary_ip
|
|
544
|
+
entries << "#{primary_ip}:5432:#{repmgr_db}:#{repmgr_user}:#{escaped_password}"
|
|
545
|
+
entries << "#{primary_ip}:5432:*:#{repmgr_user}:#{escaped_password}"
|
|
546
|
+
end
|
|
547
|
+
|
|
548
|
+
local_replication_host = config.replication_host_for(host)
|
|
549
|
+
if local_replication_host && !%w[localhost 127.0.0.1].include?(local_replication_host)
|
|
550
|
+
entries << "#{local_replication_host}:5432:#{repmgr_db}:#{repmgr_user}:#{escaped_password}"
|
|
551
|
+
entries << "#{local_replication_host}:5432:*:#{repmgr_user}:#{escaped_password}"
|
|
552
|
+
end
|
|
553
|
+
|
|
554
|
+
"#{entries.join("\n")}\n"
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
def escape_pgpass_value(value)
|
|
558
|
+
return '' if value.nil?
|
|
559
|
+
|
|
560
|
+
# Must escape backslashes first, then colons (order matters!)
|
|
561
|
+
value.to_s.gsub('\\', '\\\\\\\\').gsub(':', '\\:')
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
def build_primary_conninfo(standby_label)
|
|
565
|
+
primary_host = config.primary_replication_host
|
|
566
|
+
repmgr_user = config.repmgr_user
|
|
567
|
+
repmgr_db = config.repmgr_database
|
|
568
|
+
"host=#{primary_host} user=#{repmgr_user} dbname=#{repmgr_db} application_name=#{standby_label}"
|
|
569
|
+
end
|
|
570
|
+
|
|
571
|
+
def normalize_repmgr_password(raw_password)
|
|
572
|
+
password = raw_password.to_s.rstrip
|
|
573
|
+
|
|
574
|
+
raise 'repmgr_password secret is missing' if password.empty?
|
|
575
|
+
|
|
576
|
+
password
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
def reload_postgres_cluster
|
|
580
|
+
execute :sudo, 'pg_ctlcluster', config.version.to_s, 'main', 'reload'
|
|
581
|
+
rescue StandardError => e
|
|
582
|
+
warn "Failed to reload via pg_ctlcluster: #{e.message}, falling back to systemctl"
|
|
583
|
+
service_name = "postgresql@#{config.version}-main"
|
|
584
|
+
execute :sudo, 'systemctl', 'reload', service_name
|
|
585
|
+
end
|
|
586
|
+
|
|
587
|
+
def ensure_primary_registered
|
|
588
|
+
host = config.primary_host
|
|
589
|
+
config.version
|
|
590
|
+
is_registered = false
|
|
591
|
+
|
|
592
|
+
ssh_executor.execute_on_host(host) do
|
|
593
|
+
info 'Verifying primary registration...'
|
|
594
|
+
|
|
595
|
+
cluster_output = begin
|
|
596
|
+
capture(:sudo, '-u', 'postgres', 'env',
|
|
597
|
+
'HOME=/var/lib/postgresql',
|
|
598
|
+
'repmgr', 'cluster', 'show')
|
|
599
|
+
rescue StandardError
|
|
600
|
+
''
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
if cluster_output.include?('| 1') && cluster_output.include?('primary')
|
|
604
|
+
info 'ā Primary is registered (node_id=1)'
|
|
605
|
+
else
|
|
606
|
+
warn "ā Primary not found in cluster, assuming it's registered"
|
|
607
|
+
end
|
|
608
|
+
is_registered = true
|
|
609
|
+
end
|
|
610
|
+
|
|
611
|
+
is_registered
|
|
612
|
+
end
|
|
613
|
+
|
|
614
|
+
def regenerate_ssl_certs(host, version)
|
|
615
|
+
ssl_config = config.component_config(:ssl)
|
|
616
|
+
ssl_cert = secrets.resolve('ssl_cert')
|
|
617
|
+
ssl_key = secrets.resolve('ssl_key')
|
|
618
|
+
|
|
619
|
+
if ssl_cert && ssl_key
|
|
620
|
+
puts ' Using SSL certificates from secrets...'
|
|
621
|
+
ssh_executor.upload_file(host, ssl_cert, "/etc/postgresql/#{version}/main/server.crt", mode: '644',
|
|
622
|
+
owner: 'postgres:postgres')
|
|
623
|
+
ssh_executor.upload_file(host, ssl_key, "/etc/postgresql/#{version}/main/server.key", mode: '600',
|
|
624
|
+
owner: 'postgres:postgres')
|
|
625
|
+
else
|
|
626
|
+
puts ' Generating self-signed SSL certificates...'
|
|
627
|
+
cert_path = "/etc/postgresql/#{version}/main/server.crt"
|
|
628
|
+
key_path = "/etc/postgresql/#{version}/main/server.key"
|
|
629
|
+
|
|
630
|
+
ssh_executor.execute_on_host(host) do
|
|
631
|
+
days = ssl_config[:cert_days] || 3650
|
|
632
|
+
cn = ssl_config[:common_name] || host
|
|
633
|
+
|
|
634
|
+
info "Generating self-signed certificate (CN=#{cn}, valid for #{days} days)..."
|
|
635
|
+
|
|
636
|
+
execute :sudo, 'openssl', 'req', '-new', '-x509', '-days', days.to_s,
|
|
637
|
+
'-nodes', '-text',
|
|
638
|
+
'-out', cert_path,
|
|
639
|
+
'-keyout', key_path,
|
|
640
|
+
'-subj', "/CN=#{cn}"
|
|
641
|
+
|
|
642
|
+
execute :sudo, 'chown', 'postgres:postgres', cert_path
|
|
643
|
+
execute :sudo, 'chown', 'postgres:postgres', key_path
|
|
644
|
+
execute :sudo, 'chmod', '644', cert_path
|
|
645
|
+
execute :sudo, 'chmod', '600', key_path
|
|
646
|
+
end
|
|
647
|
+
end
|
|
648
|
+
end
|
|
649
|
+
end
|
|
650
|
+
end
|
|
651
|
+
end
|