active_postgres 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +58 -3
- data/lib/active_postgres/cli.rb +7 -0
- data/lib/active_postgres/components/monitoring.rb +165 -0
- data/lib/active_postgres/components/pgbackrest.rb +59 -9
- data/lib/active_postgres/components/pgbouncer.rb +17 -5
- data/lib/active_postgres/components/repmgr.rb +23 -6
- data/lib/active_postgres/configuration.rb +19 -2
- data/lib/active_postgres/generators/active_postgres/install_generator.rb +1 -0
- data/lib/active_postgres/generators/active_postgres/templates/postgres.yml.erb +10 -0
- data/lib/active_postgres/health_checker.rb +29 -7
- data/lib/active_postgres/installer.rb +9 -0
- data/lib/active_postgres/overview.rb +351 -0
- data/lib/active_postgres/ssh_executor.rb +8 -5
- data/lib/active_postgres/version.rb +1 -1
- data/lib/active_postgres.rb +1 -0
- data/lib/tasks/postgres.rake +67 -0
- data/templates/repmgr.conf.erb +3 -0
- data/templates/repmgr_dns_failover.sh.erb +16 -6
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c89909c556964abfe7dbede0550c7fa1fbd60fc332e667fce5de858114ba2146
|
|
4
|
+
data.tar.gz: e715719391d53f89cb394203018a5e2951f2602018ef97a417ee369d443e410c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 603d5b28ef5730d595b24bf36b517148a882c5889d339675545fc98ffda547d8c1ead15ff8ee6a048fd6b117b5e257309460e7b5e25a9ee5eb4e55c730050ba4
|
|
7
|
+
data.tar.gz: 50f632e74185dc90d7da5e6ece125fc625b04665cc4859ca7fd3d136e82df6e295b5fde10d8a2239675ce67a1a527f24ef8ec00cfeff79c57d84601a11168105
|
data/README.md
CHANGED
|
@@ -69,6 +69,8 @@ postgres:
|
|
|
69
69
|
```bash
|
|
70
70
|
rake postgres:setup # Deploy cluster
|
|
71
71
|
rake postgres:status # Check health
|
|
72
|
+
rake postgres:overview # Control tower overview
|
|
73
|
+
rake postgres:help # Task summary
|
|
72
74
|
```
|
|
73
75
|
|
|
74
76
|
## Common Operations
|
|
@@ -83,8 +85,10 @@ rake postgres:promote[host] # Promote standby to primary
|
|
|
83
85
|
|
|
84
86
|
# Backups (requires pgBackRest)
|
|
85
87
|
rake postgres:backup:full
|
|
88
|
+
rake postgres:backup:incremental
|
|
86
89
|
rake postgres:backup:list
|
|
87
90
|
rake postgres:backup:restore[backup_id]
|
|
91
|
+
rake postgres:backup:restore_at["2026-01-29 01:15:00",promote]
|
|
88
92
|
|
|
89
93
|
# Credential rotation (zero downtime)
|
|
90
94
|
rake postgres:credentials:rotate_random
|
|
@@ -101,6 +105,7 @@ rake postgres:update:patch # Security patches
|
|
|
101
105
|
active_postgres setup --environment=production
|
|
102
106
|
active_postgres setup-standby HOST
|
|
103
107
|
active_postgres status
|
|
108
|
+
active_postgres overview
|
|
104
109
|
active_postgres promote HOST
|
|
105
110
|
active_postgres backup --type=full
|
|
106
111
|
active_postgres cache-secrets
|
|
@@ -115,21 +120,37 @@ active_postgres cache-secrets
|
|
|
115
120
|
| **repmgr** | HA & automatic failover | `repmgr: {enabled: true}` |
|
|
116
121
|
| **PgBouncer** | Connection pooling | `pgbouncer: {enabled: true}` |
|
|
117
122
|
| **pgBackRest** | Backup & restore | `pgbackrest: {enabled: true}` |
|
|
118
|
-
| **Monitoring** | postgres_exporter
|
|
123
|
+
| **Monitoring** | postgres_exporter + optional node_exporter/Grafana | `monitoring: {enabled: true}` |
|
|
119
124
|
| **SSL** | Encrypted connections | `ssl: {enabled: true}` |
|
|
120
125
|
| **Extensions** | pgvector, PostGIS, etc. | `extensions: {enabled: true, list: [pgvector]}` |
|
|
121
126
|
|
|
122
127
|
### Monitoring credentials
|
|
123
128
|
|
|
124
|
-
`postgres_exporter` uses a dedicated `pg_monitor` user. Provide a password in secrets and optionally set the username
|
|
129
|
+
`postgres_exporter` uses a dedicated `pg_monitor` user. Provide a password in secrets and optionally set the username.
|
|
130
|
+
Enable node_exporter for host-level metrics, and Grafana if you want a local UI:
|
|
125
131
|
|
|
126
132
|
```yaml
|
|
127
133
|
components:
|
|
128
|
-
monitoring:
|
|
134
|
+
monitoring:
|
|
135
|
+
enabled: true
|
|
136
|
+
user: postgres_exporter
|
|
137
|
+
node_exporter: true
|
|
138
|
+
node_exporter_port: 9100
|
|
139
|
+
# node_exporter_listen_address: 10.8.0.10
|
|
140
|
+
# grafana:
|
|
141
|
+
# enabled: true
|
|
142
|
+
# host: grafana.example.com
|
|
143
|
+
# listen_address: 10.8.0.10
|
|
144
|
+
# port: 3000
|
|
145
|
+
# prometheus_url: http://prometheus.example.com:9090
|
|
129
146
|
secrets:
|
|
130
147
|
monitoring_password: $POSTGRES_MONITORING_PASSWORD
|
|
148
|
+
# grafana_admin_password: $GRAFANA_ADMIN_PASSWORD
|
|
131
149
|
```
|
|
132
150
|
|
|
151
|
+
When Grafana is enabled, `grafana_admin_password` is required and Grafana will be installed
|
|
152
|
+
on the configured host. If `prometheus_url` is provided, a Prometheus datasource is provisioned.
|
|
153
|
+
|
|
133
154
|
### pgBackRest S3-compatible endpoints
|
|
134
155
|
|
|
135
156
|
For Tigris/MinIO/Wasabi/DO Spaces, set a custom endpoint and use path-style URLs:
|
|
@@ -145,6 +166,35 @@ components:
|
|
|
145
166
|
s3_uri_style: path
|
|
146
167
|
```
|
|
147
168
|
|
|
169
|
+
### Point-in-time recovery (PITR)
|
|
170
|
+
|
|
171
|
+
Ensure WAL retention is long enough for your recovery window:
|
|
172
|
+
|
|
173
|
+
```yaml
|
|
174
|
+
components:
|
|
175
|
+
pgbackrest:
|
|
176
|
+
retention_full: 7
|
|
177
|
+
retention_archive: 14
|
|
178
|
+
# You can schedule full + incremental separately:
|
|
179
|
+
# schedule_full: "0 2 * * *"
|
|
180
|
+
# schedule_incremental: "0 * * * *"
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Run a PITR restore:
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
rake postgres:backup:restore_at["2026-01-29 01:15:00",promote]
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
If you want the old primary to rejoin without a full re-clone after failover,
|
|
190
|
+
enable pg_rewind support in repmgr:
|
|
191
|
+
|
|
192
|
+
```yaml
|
|
193
|
+
components:
|
|
194
|
+
repmgr:
|
|
195
|
+
use_rewind: true
|
|
196
|
+
```
|
|
197
|
+
|
|
148
198
|
### Stable app endpoint with PgBouncer
|
|
149
199
|
|
|
150
200
|
If you run PgBouncer on each PostgreSQL node and want a fixed app URL, enable `follow_primary`.
|
|
@@ -175,6 +225,8 @@ components:
|
|
|
175
225
|
enabled: true
|
|
176
226
|
provider: dnsmasq
|
|
177
227
|
domain: mesh.internal
|
|
228
|
+
# Or use multiple domains during cutover:
|
|
229
|
+
# domains: [mesh.internal, mesh.v2.internal]
|
|
178
230
|
dns_servers:
|
|
179
231
|
- host: 18.170.173.14
|
|
180
232
|
private_ip: 10.8.0.10
|
|
@@ -182,6 +234,9 @@ components:
|
|
|
182
234
|
private_ip: 10.8.0.110
|
|
183
235
|
primary_record: db-primary.mesh.internal
|
|
184
236
|
replica_record: db-replica.mesh.internal
|
|
237
|
+
# Or multiple records explicitly:
|
|
238
|
+
# primary_records: [db-primary.mesh.internal, db-primary.mesh.v2.internal]
|
|
239
|
+
# replica_records: [db-replica.mesh.internal, db-replica.mesh.v2.internal]
|
|
185
240
|
```
|
|
186
241
|
|
|
187
242
|
This installs an event hook so repmgr updates `/etc/dnsmasq.d/active_postgres.conf`
|
data/lib/active_postgres/cli.rb
CHANGED
|
@@ -49,6 +49,13 @@ module ActivePostgres
|
|
|
49
49
|
health_checker.show_status
|
|
50
50
|
end
|
|
51
51
|
|
|
52
|
+
desc 'overview', 'Show control tower overview'
|
|
53
|
+
def overview
|
|
54
|
+
config = load_config
|
|
55
|
+
overview = Overview.new(config)
|
|
56
|
+
overview.show
|
|
57
|
+
end
|
|
58
|
+
|
|
52
59
|
desc 'health', 'Run health checks'
|
|
53
60
|
def health
|
|
54
61
|
config = load_config
|
|
@@ -11,6 +11,8 @@ module ActivePostgres
|
|
|
11
11
|
config.all_hosts.each do |host|
|
|
12
12
|
install_on_host(host)
|
|
13
13
|
end
|
|
14
|
+
|
|
15
|
+
install_grafana_if_enabled
|
|
14
16
|
end
|
|
15
17
|
|
|
16
18
|
def uninstall
|
|
@@ -21,8 +23,16 @@ module ActivePostgres
|
|
|
21
23
|
execute :sudo, 'systemctl', 'stop', 'prometheus-postgres-exporter'
|
|
22
24
|
execute :sudo, 'systemctl', 'disable', 'prometheus-postgres-exporter'
|
|
23
25
|
execute :sudo, 'apt-get', 'remove', '-y', 'prometheus-postgres-exporter'
|
|
26
|
+
|
|
27
|
+
if node_exporter_enabled?
|
|
28
|
+
execute :sudo, 'systemctl', 'stop', 'prometheus-node-exporter'
|
|
29
|
+
execute :sudo, 'systemctl', 'disable', 'prometheus-node-exporter'
|
|
30
|
+
execute :sudo, 'apt-get', 'remove', '-y', 'prometheus-node-exporter'
|
|
31
|
+
end
|
|
24
32
|
end
|
|
25
33
|
end
|
|
34
|
+
|
|
35
|
+
uninstall_grafana_if_enabled
|
|
26
36
|
end
|
|
27
37
|
|
|
28
38
|
def restart
|
|
@@ -31,6 +41,7 @@ module ActivePostgres
|
|
|
31
41
|
config.all_hosts.each do |host|
|
|
32
42
|
ssh_executor.execute_on_host(host) do
|
|
33
43
|
execute :sudo, 'systemctl', 'restart', 'prometheus-postgres-exporter'
|
|
44
|
+
execute :sudo, 'systemctl', 'restart', 'prometheus-node-exporter' if node_exporter_enabled?
|
|
34
45
|
end
|
|
35
46
|
end
|
|
36
47
|
end
|
|
@@ -63,6 +74,8 @@ module ActivePostgres
|
|
|
63
74
|
end
|
|
64
75
|
|
|
65
76
|
puts " Metrics available at: http://#{host}:#{exporter_port}/metrics"
|
|
77
|
+
|
|
78
|
+
install_node_exporter(host, monitoring_config) if node_exporter_enabled?
|
|
66
79
|
end
|
|
67
80
|
|
|
68
81
|
def ensure_monitoring_user
|
|
@@ -134,6 +147,158 @@ module ActivePostgres
|
|
|
134
147
|
password
|
|
135
148
|
end
|
|
136
149
|
|
|
150
|
+
def node_exporter_enabled?
|
|
151
|
+
monitoring_config = config.component_config(:monitoring)
|
|
152
|
+
monitoring_config.fetch(:node_exporter, false) == true
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def install_node_exporter(host, monitoring_config)
|
|
156
|
+
port = monitoring_config[:node_exporter_port] || 9100
|
|
157
|
+
listen_address = monitoring_config[:node_exporter_listen_address].to_s.strip
|
|
158
|
+
puts " Installing node_exporter on #{host}..."
|
|
159
|
+
|
|
160
|
+
ssh_executor.execute_on_host(host) do
|
|
161
|
+
execute :sudo, 'apt-get', 'install', '-y', '-qq', 'prometheus-node-exporter'
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
configure_node_exporter_service(host, port, listen_address)
|
|
165
|
+
|
|
166
|
+
ssh_executor.execute_on_host(host) do
|
|
167
|
+
execute :sudo, 'systemctl', 'enable', 'prometheus-node-exporter'
|
|
168
|
+
execute :sudo, 'systemctl', 'restart', 'prometheus-node-exporter'
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
puts " Node metrics available at: http://#{host}:#{port}/metrics"
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def configure_node_exporter_service(host, port, listen_address)
|
|
175
|
+
listen = if listen_address.empty?
|
|
176
|
+
":#{port}"
|
|
177
|
+
else
|
|
178
|
+
"#{listen_address}:#{port}"
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
return if listen == ':9100'
|
|
182
|
+
|
|
183
|
+
override = <<~CONF
|
|
184
|
+
[Service]
|
|
185
|
+
ExecStart=
|
|
186
|
+
ExecStart=/usr/bin/prometheus-node-exporter --web.listen-address=#{listen}
|
|
187
|
+
CONF
|
|
188
|
+
|
|
189
|
+
ssh_executor.execute_on_host(host) do
|
|
190
|
+
execute :sudo, 'mkdir', '-p', '/etc/systemd/system/prometheus-node-exporter.service.d'
|
|
191
|
+
upload! StringIO.new(override), '/tmp/prometheus-node-exporter.override'
|
|
192
|
+
execute :sudo, 'mv', '/tmp/prometheus-node-exporter.override',
|
|
193
|
+
'/etc/systemd/system/prometheus-node-exporter.service.d/override.conf'
|
|
194
|
+
execute :sudo, 'chown', 'root:root', '/etc/systemd/system/prometheus-node-exporter.service.d/override.conf'
|
|
195
|
+
execute :sudo, 'chmod', '644', '/etc/systemd/system/prometheus-node-exporter.service.d/override.conf'
|
|
196
|
+
execute :sudo, 'systemctl', 'daemon-reload'
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def install_grafana_if_enabled
|
|
201
|
+
monitoring_config = config.component_config(:monitoring)
|
|
202
|
+
grafana_config = monitoring_config[:grafana] || {}
|
|
203
|
+
return unless grafana_config[:enabled]
|
|
204
|
+
|
|
205
|
+
host = grafana_config[:host].to_s.strip
|
|
206
|
+
raise Error, 'monitoring.grafana.host is required when grafana is enabled' if host.empty?
|
|
207
|
+
|
|
208
|
+
admin_password = normalize_grafana_password(secrets.resolve('grafana_admin_password'))
|
|
209
|
+
prometheus_url = grafana_config[:prometheus_url]
|
|
210
|
+
listen_address = grafana_config[:listen_address].to_s.strip
|
|
211
|
+
port = (grafana_config[:port] || 3000).to_i
|
|
212
|
+
|
|
213
|
+
puts "Installing Grafana on #{host}..."
|
|
214
|
+
|
|
215
|
+
ssh_executor.execute_on_host(host) do
|
|
216
|
+
execute :sudo, 'apt-get', 'install', '-y', '-qq', 'apt-transport-https', 'software-properties-common', 'wget'
|
|
217
|
+
execute :sudo, 'wget', '-q', '-O', '/usr/share/keyrings/grafana.gpg', 'https://apt.grafana.com/gpg.key'
|
|
218
|
+
execute :sudo, 'sh', '-c',
|
|
219
|
+
'echo "deb [signed-by=/usr/share/keyrings/grafana.gpg] https://apt.grafana.com stable main" > /etc/apt/sources.list.d/grafana.list'
|
|
220
|
+
execute :sudo, 'apt-get', 'update', '-qq'
|
|
221
|
+
execute :sudo, 'apt-get', 'install', '-y', '-qq', 'grafana'
|
|
222
|
+
execute :sudo, 'systemctl', 'enable', '--now', 'grafana-server'
|
|
223
|
+
execute :sudo, 'grafana-cli', 'admin', 'reset-admin-password', admin_password
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
configure_grafana_service(host, listen_address, port)
|
|
227
|
+
configure_grafana_datasource(host, prometheus_url) if prometheus_url
|
|
228
|
+
|
|
229
|
+
puts " Grafana available at: http://#{host}:#{port}"
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def configure_grafana_service(host, listen_address, port)
|
|
233
|
+
env_lines = []
|
|
234
|
+
env_lines << "Environment=\"GF_SERVER_HTTP_ADDR=#{listen_address}\"" unless listen_address.empty?
|
|
235
|
+
env_lines << "Environment=\"GF_SERVER_HTTP_PORT=#{port}\"" if port && port != 3000
|
|
236
|
+
return if env_lines.empty?
|
|
237
|
+
|
|
238
|
+
override = <<~CONF
|
|
239
|
+
[Service]
|
|
240
|
+
#{env_lines.join("\n ")}
|
|
241
|
+
CONF
|
|
242
|
+
|
|
243
|
+
ssh_executor.execute_on_host(host) do
|
|
244
|
+
execute :sudo, 'mkdir', '-p', '/etc/systemd/system/grafana-server.service.d'
|
|
245
|
+
upload! StringIO.new(override), '/tmp/active_postgres_grafana.override'
|
|
246
|
+
execute :sudo, 'mv', '/tmp/active_postgres_grafana.override',
|
|
247
|
+
'/etc/systemd/system/grafana-server.service.d/override.conf'
|
|
248
|
+
execute :sudo, 'chown', 'root:root', '/etc/systemd/system/grafana-server.service.d/override.conf'
|
|
249
|
+
execute :sudo, 'chmod', '644', '/etc/systemd/system/grafana-server.service.d/override.conf'
|
|
250
|
+
execute :sudo, 'systemctl', 'daemon-reload'
|
|
251
|
+
execute :sudo, 'systemctl', 'restart', 'grafana-server'
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def uninstall_grafana_if_enabled
|
|
256
|
+
monitoring_config = config.component_config(:monitoring)
|
|
257
|
+
grafana_config = monitoring_config[:grafana] || {}
|
|
258
|
+
return unless grafana_config[:enabled]
|
|
259
|
+
|
|
260
|
+
host = grafana_config[:host].to_s.strip
|
|
261
|
+
return if host.empty?
|
|
262
|
+
|
|
263
|
+
ssh_executor.execute_on_host(host) do
|
|
264
|
+
execute :sudo, 'systemctl', 'stop', 'grafana-server'
|
|
265
|
+
execute :sudo, 'systemctl', 'disable', 'grafana-server'
|
|
266
|
+
execute :sudo, 'apt-get', 'remove', '-y', 'grafana'
|
|
267
|
+
execute :sudo, 'rm', '-f', '/etc/apt/sources.list.d/grafana.list'
|
|
268
|
+
execute :sudo, 'rm', '-f', '/usr/share/keyrings/grafana.gpg'
|
|
269
|
+
execute :sudo, 'rm', '-rf', '/etc/grafana/provisioning/datasources/active_postgres.yml'
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def configure_grafana_datasource(host, prometheus_url)
|
|
274
|
+
datasource = <<~YAML
|
|
275
|
+
apiVersion: 1
|
|
276
|
+
datasources:
|
|
277
|
+
- name: Prometheus
|
|
278
|
+
type: prometheus
|
|
279
|
+
access: proxy
|
|
280
|
+
url: #{prometheus_url}
|
|
281
|
+
isDefault: true
|
|
282
|
+
YAML
|
|
283
|
+
|
|
284
|
+
ssh_executor.execute_on_host(host) do
|
|
285
|
+
execute :sudo, 'mkdir', '-p', '/etc/grafana/provisioning/datasources'
|
|
286
|
+
upload! StringIO.new(datasource), '/tmp/active_postgres_grafana_ds.yml'
|
|
287
|
+
execute :sudo, 'mv', '/tmp/active_postgres_grafana_ds.yml', '/etc/grafana/provisioning/datasources/active_postgres.yml'
|
|
288
|
+
execute :sudo, 'chown', 'root:root', '/etc/grafana/provisioning/datasources/active_postgres.yml'
|
|
289
|
+
execute :sudo, 'chmod', '644', '/etc/grafana/provisioning/datasources/active_postgres.yml'
|
|
290
|
+
execute :sudo, 'systemctl', 'restart', 'grafana-server'
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
def normalize_grafana_password(raw_password)
|
|
295
|
+
password = raw_password.to_s.rstrip
|
|
296
|
+
|
|
297
|
+
raise Error, 'grafana_admin_password secret is missing (required for grafana)' if password.empty?
|
|
298
|
+
|
|
299
|
+
password
|
|
300
|
+
end
|
|
301
|
+
|
|
137
302
|
def escape_systemd_env(value)
|
|
138
303
|
value.to_s.gsub('\\', '\\\\').gsub('"', '\\"')
|
|
139
304
|
end
|
|
@@ -20,6 +20,7 @@ module ActivePostgres
|
|
|
20
20
|
ssh_executor.execute_on_host(host) do
|
|
21
21
|
execute :sudo, 'apt-get', 'remove', '-y', 'pgbackrest'
|
|
22
22
|
execute :sudo, 'rm', '-f', '/etc/cron.d/pgbackrest-backup'
|
|
23
|
+
execute :sudo, 'rm', '-f', '/etc/cron.d/pgbackrest-backup-incremental'
|
|
23
24
|
end
|
|
24
25
|
end
|
|
25
26
|
end
|
|
@@ -37,9 +38,12 @@ module ActivePostgres
|
|
|
37
38
|
puts "Running #{type} backup..."
|
|
38
39
|
postgres_user = config.postgres_user
|
|
39
40
|
|
|
41
|
+
output = nil
|
|
40
42
|
ssh_executor.execute_on_host(config.primary_host) do
|
|
41
|
-
|
|
43
|
+
output = capture(:sudo, '-u', postgres_user, 'pgbackrest', '--stanza=main', "--type=#{type}", 'backup')
|
|
42
44
|
end
|
|
45
|
+
|
|
46
|
+
puts output if output
|
|
43
47
|
end
|
|
44
48
|
|
|
45
49
|
def run_restore(backup_id)
|
|
@@ -51,20 +55,43 @@ module ActivePostgres
|
|
|
51
55
|
execute :sudo, 'systemctl', 'stop', 'postgresql'
|
|
52
56
|
|
|
53
57
|
# Restore
|
|
54
|
-
|
|
58
|
+
output = capture(:sudo, '-u', postgres_user, 'pgbackrest', '--stanza=main', "--set=#{backup_id}", 'restore')
|
|
59
|
+
puts output if output
|
|
55
60
|
|
|
56
61
|
# Start PostgreSQL
|
|
57
62
|
execute :sudo, 'systemctl', 'start', 'postgresql'
|
|
58
63
|
end
|
|
59
64
|
end
|
|
60
65
|
|
|
66
|
+
def run_restore_at(target_time, target_action: 'promote')
|
|
67
|
+
puts "Restoring to #{target_time} (PITR)..."
|
|
68
|
+
postgres_user = config.postgres_user
|
|
69
|
+
|
|
70
|
+
ssh_executor.execute_on_host(config.primary_host) do
|
|
71
|
+
execute :sudo, 'systemctl', 'stop', 'postgresql'
|
|
72
|
+
|
|
73
|
+
output = capture(:sudo, '-u', postgres_user, 'pgbackrest',
|
|
74
|
+
'--stanza=main',
|
|
75
|
+
'--type=time',
|
|
76
|
+
"--target=#{target_time}",
|
|
77
|
+
"--target-action=#{target_action}",
|
|
78
|
+
'restore')
|
|
79
|
+
puts output if output
|
|
80
|
+
|
|
81
|
+
execute :sudo, 'systemctl', 'start', 'postgresql'
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
61
85
|
def list_backups
|
|
62
86
|
puts 'Available backups:'
|
|
63
87
|
postgres_user = config.postgres_user
|
|
64
88
|
|
|
89
|
+
output = nil
|
|
65
90
|
ssh_executor.execute_on_host(config.primary_host) do
|
|
66
|
-
|
|
91
|
+
output = capture(:sudo, '-u', postgres_user, 'pgbackrest', 'info')
|
|
67
92
|
end
|
|
93
|
+
|
|
94
|
+
puts output if output
|
|
68
95
|
end
|
|
69
96
|
|
|
70
97
|
private
|
|
@@ -108,13 +135,35 @@ module ActivePostgres
|
|
|
108
135
|
end
|
|
109
136
|
|
|
110
137
|
# Set up scheduled backups on primary only
|
|
111
|
-
if create_stanza
|
|
112
|
-
|
|
138
|
+
if create_stanza
|
|
139
|
+
setup_backup_schedules(host, pgbackrest_config)
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def setup_backup_schedules(host, pgbackrest_config)
|
|
144
|
+
schedules = backup_schedules(pgbackrest_config)
|
|
145
|
+
schedules.each do |entry|
|
|
146
|
+
setup_backup_schedule(host, entry[:schedule], entry[:type], entry[:file])
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def backup_schedules(pgbackrest_config)
|
|
151
|
+
schedule_full = pgbackrest_config[:schedule_full] || pgbackrest_config[:schedule]
|
|
152
|
+
schedule_incremental = pgbackrest_config[:schedule_incremental]
|
|
153
|
+
|
|
154
|
+
schedules = []
|
|
155
|
+
if schedule_full
|
|
156
|
+
schedules << { type: 'full', schedule: schedule_full, file: '/etc/cron.d/pgbackrest-backup' }
|
|
113
157
|
end
|
|
158
|
+
if schedule_incremental
|
|
159
|
+
schedules << { type: 'incremental', schedule: schedule_incremental, file: '/etc/cron.d/pgbackrest-backup-incremental' }
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
schedules
|
|
114
163
|
end
|
|
115
164
|
|
|
116
|
-
def setup_backup_schedule(host, schedule)
|
|
117
|
-
puts " Setting up backup schedule: #{schedule}"
|
|
165
|
+
def setup_backup_schedule(host, schedule, backup_type = 'full', cron_file = '/etc/cron.d/pgbackrest-backup')
|
|
166
|
+
puts " Setting up #{backup_type} backup schedule: #{schedule}"
|
|
118
167
|
postgres_user = config.postgres_user
|
|
119
168
|
|
|
120
169
|
# Create cron job for scheduled backups
|
|
@@ -123,17 +172,18 @@ module ActivePostgres
|
|
|
123
172
|
# pgBackRest scheduled backups (managed by active_postgres)
|
|
124
173
|
SHELL=/bin/bash
|
|
125
174
|
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
|
|
126
|
-
#{schedule} #{postgres_user} pgbackrest --stanza=main --type
|
|
175
|
+
#{schedule} #{postgres_user} pgbackrest --stanza=main --type=#{backup_type} backup
|
|
127
176
|
CRON
|
|
128
177
|
|
|
129
178
|
# Install cron job in /etc/cron.d (system cron directory)
|
|
130
179
|
# Use a temp file + sudo mv to avoid shell redirection permission issues.
|
|
131
|
-
ssh_executor.upload_file(host, cron_content,
|
|
180
|
+
ssh_executor.upload_file(host, cron_content, cron_file, mode: '644', owner: 'root:root')
|
|
132
181
|
end
|
|
133
182
|
|
|
134
183
|
def remove_backup_schedule(host)
|
|
135
184
|
ssh_executor.execute_on_host(host) do
|
|
136
185
|
execute :sudo, 'rm', '-f', '/etc/cron.d/pgbackrest-backup'
|
|
186
|
+
execute :sudo, 'rm', '-f', '/etc/cron.d/pgbackrest-backup-incremental'
|
|
137
187
|
end
|
|
138
188
|
end
|
|
139
189
|
end
|
|
@@ -5,7 +5,8 @@ module ActivePostgres
|
|
|
5
5
|
puts 'Installing PgBouncer for connection pooling...'
|
|
6
6
|
|
|
7
7
|
config.all_hosts.each do |host|
|
|
8
|
-
|
|
8
|
+
is_standby = config.standby_hosts.include?(host)
|
|
9
|
+
install_on_host(host, is_standby: is_standby)
|
|
9
10
|
end
|
|
10
11
|
end
|
|
11
12
|
|
|
@@ -49,16 +50,26 @@ module ActivePostgres
|
|
|
49
50
|
|
|
50
51
|
def install_on_standby(standby_host)
|
|
51
52
|
puts "Installing PgBouncer on standby #{standby_host}..."
|
|
52
|
-
install_on_host(standby_host)
|
|
53
|
+
install_on_host(standby_host, is_standby: true)
|
|
53
54
|
end
|
|
54
55
|
|
|
55
56
|
private
|
|
56
57
|
|
|
57
|
-
def install_on_host(host)
|
|
58
|
+
def install_on_host(host, is_standby: false)
|
|
58
59
|
puts " Installing PgBouncer on #{host}..."
|
|
59
60
|
|
|
60
61
|
user_config = config.component_config(:pgbouncer)
|
|
61
|
-
|
|
62
|
+
|
|
63
|
+
# Determine follow_primary behavior:
|
|
64
|
+
# - Primary: use global follow_primary setting
|
|
65
|
+
# - Standby: check per-standby pgbouncer_follow_primary, default false (use localhost for read replicas)
|
|
66
|
+
if is_standby
|
|
67
|
+
standby_config = config.standby_config_for(host) || {}
|
|
68
|
+
follow_primary = standby_config['pgbouncer_follow_primary'] == true
|
|
69
|
+
else
|
|
70
|
+
follow_primary = user_config[:follow_primary] == true
|
|
71
|
+
end
|
|
72
|
+
|
|
62
73
|
if follow_primary && !config.component_enabled?(:repmgr)
|
|
63
74
|
raise Error, 'PgBouncer follow_primary requires repmgr to be enabled'
|
|
64
75
|
end
|
|
@@ -67,7 +78,8 @@ module ActivePostgres
|
|
|
67
78
|
optimal_pool = ConnectionPooler.calculate_optimal_pool_sizes(max_connections)
|
|
68
79
|
|
|
69
80
|
pgbouncer_config = optimal_pool.merge(user_config)
|
|
70
|
-
|
|
81
|
+
# For standbys not following primary, use localhost; otherwise use primary host
|
|
82
|
+
pgbouncer_config[:database_host] = follow_primary ? config.primary_replication_host : '127.0.0.1'
|
|
71
83
|
ssl_enabled = config.component_enabled?(:ssl)
|
|
72
84
|
has_ca_cert = ssl_enabled && secrets.resolve('ssl_chain')
|
|
73
85
|
secrets_obj = secrets
|
|
@@ -512,12 +512,16 @@ module ActivePostgres
|
|
|
512
512
|
def install_dns_failover_script(host, dns_config, dns_servers, dns_user, dns_ssh_key_path, ssh_strict_host_key)
|
|
513
513
|
return if dns_servers.empty?
|
|
514
514
|
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
515
|
+
domains = normalize_dns_domains(dns_config)
|
|
516
|
+
primary_records = normalize_dns_records(dns_config[:primary_records] || dns_config[:primary_record],
|
|
517
|
+
default_prefix: 'db-primary',
|
|
518
|
+
domains: domains)
|
|
519
|
+
replica_records = normalize_dns_records(dns_config[:replica_records] || dns_config[:replica_record],
|
|
520
|
+
default_prefix: 'db-replica',
|
|
521
|
+
domains: domains)
|
|
522
|
+
|
|
523
|
+
_ = primary_records
|
|
524
|
+
_ = replica_records
|
|
521
525
|
_ = dns_servers
|
|
522
526
|
_ = dns_user
|
|
523
527
|
_ = dns_ssh_key_path
|
|
@@ -527,6 +531,19 @@ module ActivePostgres
|
|
|
527
531
|
mode: '755', owner: 'root:root')
|
|
528
532
|
end
|
|
529
533
|
|
|
534
|
+
def normalize_dns_domains(dns_config)
|
|
535
|
+
domains = Array(dns_config[:domains] || dns_config[:domain]).map(&:to_s).map(&:strip).reject(&:empty?)
|
|
536
|
+
domains = ['mesh'] if domains.empty?
|
|
537
|
+
domains
|
|
538
|
+
end
|
|
539
|
+
|
|
540
|
+
def normalize_dns_records(value, default_prefix:, domains:)
|
|
541
|
+
records = Array(value).map(&:to_s).map(&:strip).reject(&:empty?)
|
|
542
|
+
return records unless records.empty?
|
|
543
|
+
|
|
544
|
+
domains.map { |domain| "#{default_prefix}.#{domain}" }
|
|
545
|
+
end
|
|
546
|
+
|
|
530
547
|
def normalize_dns_host_key_verification(value)
|
|
531
548
|
normalized = case value
|
|
532
549
|
when Symbol
|
|
@@ -96,15 +96,32 @@ module ActivePostgres
|
|
|
96
96
|
# Validate required secrets if components are enabled
|
|
97
97
|
raise Error, 'Missing replication_password secret' if component_enabled?(:repmgr) && !secrets_config['replication_password']
|
|
98
98
|
raise Error, 'Missing monitoring_password secret' if component_enabled?(:monitoring) && !secrets_config['monitoring_password']
|
|
99
|
+
if component_enabled?(:monitoring)
|
|
100
|
+
grafana_config = component_config(:monitoring)[:grafana] || {}
|
|
101
|
+
if grafana_config[:enabled] && !secrets_config['grafana_admin_password']
|
|
102
|
+
raise Error, 'Missing grafana_admin_password secret'
|
|
103
|
+
end
|
|
104
|
+
if grafana_config[:enabled] && grafana_config[:host].to_s.strip.empty?
|
|
105
|
+
raise Error, 'monitoring.grafana.host is required when grafana is enabled'
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
if component_enabled?(:pgbackrest)
|
|
109
|
+
pg_config = component_config(:pgbackrest)
|
|
110
|
+
retention_full = pg_config[:retention_full]
|
|
111
|
+
retention_archive = pg_config[:retention_archive]
|
|
112
|
+
if retention_full && retention_archive && retention_archive.to_i < retention_full.to_i
|
|
113
|
+
raise Error, 'pgbackrest.retention_archive must be >= retention_full for PITR safety'
|
|
114
|
+
end
|
|
115
|
+
end
|
|
99
116
|
|
|
100
117
|
if component_enabled?(:repmgr)
|
|
101
118
|
dns_failover = component_config(:repmgr)[:dns_failover]
|
|
102
119
|
if dns_failover && dns_failover[:enabled]
|
|
103
|
-
|
|
120
|
+
domains = Array(dns_failover[:domains] || dns_failover[:domain]).map(&:to_s).map(&:strip).reject(&:empty?)
|
|
104
121
|
servers = Array(dns_failover[:dns_servers])
|
|
105
122
|
provider = (dns_failover[:provider] || 'dnsmasq').to_s.strip
|
|
106
123
|
|
|
107
|
-
raise Error, 'dns_failover.domain is required when enabled' if
|
|
124
|
+
raise Error, 'dns_failover.domain or dns_failover.domains is required when enabled' if domains.empty?
|
|
108
125
|
raise Error, 'dns_failover.dns_servers is required when enabled' if servers.empty?
|
|
109
126
|
raise Error, "Unsupported dns_failover provider '#{provider}'" unless provider == 'dnsmasq'
|
|
110
127
|
|
|
@@ -113,6 +113,7 @@ module ActivePostgres
|
|
|
113
113
|
puts " replication_password: \"#{generate_secure_password}\""
|
|
114
114
|
puts " repmgr_password: \"#{generate_secure_password}\""
|
|
115
115
|
puts " monitoring_password: \"#{generate_secure_password}\""
|
|
116
|
+
puts " grafana_admin_password: \"#{generate_secure_password}\""
|
|
116
117
|
puts
|
|
117
118
|
puts '🔐 Secure passwords have been auto-generated above.'
|
|
118
119
|
puts '📌 Update primary_host and replica_host with your actual IPs after provisioning.'
|
|
@@ -59,6 +59,15 @@ shared: &shared
|
|
|
59
59
|
monitoring:
|
|
60
60
|
enabled: false
|
|
61
61
|
# user: postgres_exporter
|
|
62
|
+
# node_exporter: true
|
|
63
|
+
# node_exporter_port: 9100
|
|
64
|
+
# node_exporter_listen_address: 10.8.0.10
|
|
65
|
+
# grafana:
|
|
66
|
+
# enabled: true
|
|
67
|
+
# host: grafana.example.com
|
|
68
|
+
# listen_address: 10.8.0.10
|
|
69
|
+
# port: 3000
|
|
70
|
+
# prometheus_url: http://prometheus.example.com:9090
|
|
62
71
|
|
|
63
72
|
ssl:
|
|
64
73
|
enabled: false
|
|
@@ -98,5 +107,6 @@ production:
|
|
|
98
107
|
replication_password: rails_credentials:postgres.replication_password
|
|
99
108
|
repmgr_password: rails_credentials:postgres.repmgr_password
|
|
100
109
|
monitoring_password: rails_credentials:postgres.monitoring_password
|
|
110
|
+
# grafana_admin_password: rails_credentials:postgres.grafana_admin_password
|
|
101
111
|
pgbouncer_password: rails_credentials:postgres.password
|
|
102
112
|
app_password: rails_credentials:postgres.password
|
|
@@ -13,7 +13,21 @@ module ActivePostgres
|
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
private def create_executor
|
|
16
|
-
|
|
16
|
+
if use_ssh_executor?
|
|
17
|
+
SSHExecutor.new(config, quiet: true)
|
|
18
|
+
else
|
|
19
|
+
DirectExecutor.new(config, quiet: true)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def use_ssh_executor?
|
|
24
|
+
mode = ENV['ACTIVE_POSTGRES_STATUS_MODE'].to_s.strip.downcase
|
|
25
|
+
return true if mode == 'ssh'
|
|
26
|
+
return false if mode == 'direct'
|
|
27
|
+
|
|
28
|
+
return false if %w[localhost 127.0.0.1 ::1].include?(config.primary_host.to_s)
|
|
29
|
+
|
|
30
|
+
true
|
|
17
31
|
end
|
|
18
32
|
|
|
19
33
|
def show_status
|
|
@@ -312,13 +326,21 @@ module ActivePostgres
|
|
|
312
326
|
end
|
|
313
327
|
|
|
314
328
|
def check_pgbouncer_direct(host)
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
329
|
+
if ssh_executor.respond_to?(:execute_on_host)
|
|
330
|
+
status = nil
|
|
331
|
+
ssh_executor.execute_on_host(host) do
|
|
332
|
+
status = capture(:systemctl, 'is-active', 'pgbouncer', raise_on_non_zero_exit: false).to_s.strip
|
|
333
|
+
end
|
|
334
|
+
status == 'active'
|
|
335
|
+
else
|
|
336
|
+
require 'socket'
|
|
337
|
+
connection_host = config.connection_host_for(host)
|
|
338
|
+
pgbouncer_port = config.component_config(:pgbouncer)[:listen_port] || 6432
|
|
318
339
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
340
|
+
socket = TCPSocket.new(connection_host, pgbouncer_port)
|
|
341
|
+
socket.close
|
|
342
|
+
true
|
|
343
|
+
end
|
|
322
344
|
rescue StandardError
|
|
323
345
|
false
|
|
324
346
|
end
|
|
@@ -91,6 +91,15 @@ module ActivePostgres
|
|
|
91
91
|
puts '✓ Restore complete'
|
|
92
92
|
end
|
|
93
93
|
|
|
94
|
+
def run_restore_at(target_time, target_action: 'promote')
|
|
95
|
+
puts "==> Restoring to #{target_time} (PITR)..."
|
|
96
|
+
|
|
97
|
+
component = Components::PgBackRest.new(config, ssh_executor, secrets)
|
|
98
|
+
component.run_restore_at(target_time, target_action: target_action)
|
|
99
|
+
|
|
100
|
+
puts '✓ Restore complete'
|
|
101
|
+
end
|
|
102
|
+
|
|
94
103
|
def list_backups
|
|
95
104
|
component = Components::PgBackRest.new(config, ssh_executor, secrets)
|
|
96
105
|
component.list_backups
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
require 'shellwords'
|
|
2
|
+
require 'timeout'
|
|
3
|
+
|
|
4
|
+
module ActivePostgres
|
|
5
|
+
class Overview
|
|
6
|
+
DEFAULT_TIMEOUT = 10
|
|
7
|
+
|
|
8
|
+
def initialize(config)
|
|
9
|
+
@config = config
|
|
10
|
+
@executor = SSHExecutor.new(config, quiet: true)
|
|
11
|
+
@health_checker = HealthChecker.new(config)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def show
|
|
15
|
+
puts
|
|
16
|
+
puts "ActivePostgres Control Tower (#{config.environment})"
|
|
17
|
+
puts '=' * 70
|
|
18
|
+
puts
|
|
19
|
+
|
|
20
|
+
health_checker.show_status
|
|
21
|
+
|
|
22
|
+
show_system_stats
|
|
23
|
+
show_repmgr_cluster if config.component_enabled?(:repmgr)
|
|
24
|
+
show_pgbouncer_targets if config.component_enabled?(:pgbouncer)
|
|
25
|
+
show_dns_status if dns_failover_enabled?
|
|
26
|
+
show_backups if config.component_enabled?(:pgbackrest)
|
|
27
|
+
|
|
28
|
+
puts
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
attr_reader :config, :executor, :health_checker
|
|
34
|
+
|
|
35
|
+
def show_repmgr_cluster
|
|
36
|
+
output = nil
|
|
37
|
+
postgres_user = config.postgres_user
|
|
38
|
+
with_timeout('repmgr cluster') do
|
|
39
|
+
executor.execute_on_host(config.primary_host) do
|
|
40
|
+
output = capture(:sudo, '-u', postgres_user, 'repmgr', '-f', '/etc/repmgr.conf',
|
|
41
|
+
'cluster', 'show', raise_on_non_zero_exit: false).to_s
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
puts '==> repmgr cluster'
|
|
46
|
+
puts LogSanitizer.sanitize(output) if output && !output.strip.empty?
|
|
47
|
+
puts
|
|
48
|
+
rescue StandardError => e
|
|
49
|
+
puts "==> repmgr cluster (error: #{e.message})"
|
|
50
|
+
puts
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def show_system_stats
|
|
54
|
+
hosts = config.all_hosts
|
|
55
|
+
return if hosts.empty?
|
|
56
|
+
|
|
57
|
+
paths = system_stat_paths
|
|
58
|
+
|
|
59
|
+
puts '==> System stats (DB nodes)'
|
|
60
|
+
hosts.each do |host|
|
|
61
|
+
label = config.node_label_for(host)
|
|
62
|
+
stats = fetch_system_stats(host, paths)
|
|
63
|
+
if stats.nil?
|
|
64
|
+
puts " #{host}#{label ? " (#{label})" : ''}: unavailable"
|
|
65
|
+
next
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
mem_used_kb = stats[:mem_total_kb] - stats[:mem_avail_kb]
|
|
69
|
+
mem_pct = stats[:mem_total_kb].positive? ? (mem_used_kb.to_f / stats[:mem_total_kb] * 100).round : 0
|
|
70
|
+
|
|
71
|
+
puts " #{host}#{label ? " (#{label})" : ''}: load #{stats[:loadavg]} | cpu #{stats[:cpu]}% | " \
|
|
72
|
+
"mem #{format_kb(mem_used_kb)}/#{format_kb(stats[:mem_total_kb])} (#{mem_pct}%)"
|
|
73
|
+
|
|
74
|
+
stats[:disks].each do |disk|
|
|
75
|
+
puts " disk #{disk[:mount]}: #{format_kb(disk[:used_kb])}/#{format_kb(disk[:total_kb])} (#{disk[:use_pct]})"
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
puts
|
|
79
|
+
rescue StandardError => e
|
|
80
|
+
puts "==> System stats (error: #{e.message})"
|
|
81
|
+
puts
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def show_pgbouncer_targets
|
|
85
|
+
puts '==> PgBouncer targets'
|
|
86
|
+
config.all_hosts.each do |host|
|
|
87
|
+
status = pgbouncer_status(host)
|
|
88
|
+
target = pgbouncer_target(host)
|
|
89
|
+
puts " #{host}: #{status} -> #{target || 'unknown'}"
|
|
90
|
+
end
|
|
91
|
+
puts
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def pgbouncer_status(host)
|
|
95
|
+
status = nil
|
|
96
|
+
executor.execute_on_host(host) do
|
|
97
|
+
status = capture(:systemctl, 'is-active', 'pgbouncer', raise_on_non_zero_exit: false).to_s.strip
|
|
98
|
+
end
|
|
99
|
+
status == 'active' ? '✓ running' : '✗ down'
|
|
100
|
+
rescue StandardError
|
|
101
|
+
'✗ down'
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def pgbouncer_target(host)
|
|
105
|
+
ini = nil
|
|
106
|
+
executor.execute_on_host(host) do
|
|
107
|
+
ini = capture(:sudo, 'cat', '/etc/pgbouncer/pgbouncer.ini', raise_on_non_zero_exit: false).to_s
|
|
108
|
+
end
|
|
109
|
+
ini[/^\* = host=([^\s]+)/, 1]
|
|
110
|
+
rescue StandardError
|
|
111
|
+
nil
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def show_dns_status
|
|
115
|
+
dns_config = dns_failover_config
|
|
116
|
+
dns_servers = normalize_dns_servers(dns_config[:dns_servers])
|
|
117
|
+
dns_user = (dns_config[:dns_user] || config.user).to_s
|
|
118
|
+
|
|
119
|
+
primary_records = normalize_dns_records(dns_config[:primary_records] || dns_config[:primary_record],
|
|
120
|
+
default_prefix: 'db-primary',
|
|
121
|
+
domains: normalize_dns_domains(dns_config))
|
|
122
|
+
replica_records = normalize_dns_records(dns_config[:replica_records] || dns_config[:replica_record],
|
|
123
|
+
default_prefix: 'db-replica',
|
|
124
|
+
domains: normalize_dns_domains(dns_config))
|
|
125
|
+
records = primary_records + replica_records
|
|
126
|
+
|
|
127
|
+
puts '==> DNS (dnsmasq)'
|
|
128
|
+
puts " Servers: #{dns_servers.map { |s| s[:ssh_host] }.join(', ')}"
|
|
129
|
+
|
|
130
|
+
record_map = Hash.new { |h, k| h[k] = [] }
|
|
131
|
+
dns_servers.each do |server|
|
|
132
|
+
content = dnsmasq_config(server[:ssh_host], dns_user)
|
|
133
|
+
next if content.to_s.strip.empty?
|
|
134
|
+
|
|
135
|
+
content.each_line do |line|
|
|
136
|
+
next unless line.start_with?('address=/')
|
|
137
|
+
|
|
138
|
+
match = line.strip.match(%r{\Aaddress=/([^/]+)/(.+)\z})
|
|
139
|
+
next unless match
|
|
140
|
+
|
|
141
|
+
name = match[1]
|
|
142
|
+
ip = match[2]
|
|
143
|
+
next unless records.include?(name)
|
|
144
|
+
|
|
145
|
+
record_map[name] << ip
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
records.uniq.each do |record|
|
|
150
|
+
ips = record_map[record].uniq
|
|
151
|
+
display = ips.empty? ? 'missing' : ips.join(', ')
|
|
152
|
+
warn_suffix = if primary_records.include?(record) && ips.size > 1
|
|
153
|
+
' ⚠️'
|
|
154
|
+
else
|
|
155
|
+
''
|
|
156
|
+
end
|
|
157
|
+
puts " #{record}: #{display}#{warn_suffix}"
|
|
158
|
+
end
|
|
159
|
+
puts
|
|
160
|
+
rescue StandardError => e
|
|
161
|
+
puts "==> DNS (error: #{e.message})"
|
|
162
|
+
puts
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def dnsmasq_config(host, dns_user)
|
|
166
|
+
content = nil
|
|
167
|
+
with_timeout("dnsmasq #{host}") do
|
|
168
|
+
executor.execute_on_host_as(host, dns_user) do
|
|
169
|
+
content = capture(:sudo, 'sh', '-c',
|
|
170
|
+
'cat /etc/dnsmasq.d/active_postgres.conf 2>/dev/null || ' \
|
|
171
|
+
'cat /etc/dnsmasq.d/messhy.conf 2>/dev/null || true',
|
|
172
|
+
raise_on_non_zero_exit: false).to_s
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
content
|
|
176
|
+
rescue StandardError
|
|
177
|
+
nil
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def normalize_dns_servers(raw_servers)
|
|
181
|
+
Array(raw_servers).map do |server|
|
|
182
|
+
if server.is_a?(Hash)
|
|
183
|
+
ssh_host = server[:ssh_host] || server['ssh_host'] || server[:host] || server['host']
|
|
184
|
+
private_ip = server[:private_ip] || server['private_ip'] || server[:ip] || server['ip']
|
|
185
|
+
private_ip ||= ssh_host
|
|
186
|
+
ssh_host ||= private_ip
|
|
187
|
+
{ ssh_host: ssh_host.to_s, private_ip: private_ip.to_s }
|
|
188
|
+
else
|
|
189
|
+
value = server.to_s
|
|
190
|
+
{ ssh_host: value, private_ip: value }
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def normalize_dns_domains(dns_config)
|
|
196
|
+
Array(dns_config[:domains] || dns_config[:domain]).map(&:to_s).map(&:strip).reject(&:empty?)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def normalize_dns_records(value, default_prefix:, domains:)
|
|
200
|
+
records = Array(value).map(&:to_s).map(&:strip).reject(&:empty?)
|
|
201
|
+
return records unless records.empty?
|
|
202
|
+
|
|
203
|
+
domains = ['mesh'] if domains.empty?
|
|
204
|
+
domains.map { |domain| "#{default_prefix}.#{domain}" }
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def show_backups
|
|
208
|
+
output = nil
|
|
209
|
+
postgres_user = config.postgres_user
|
|
210
|
+
with_timeout('pgbackrest info') do
|
|
211
|
+
executor.execute_on_host(config.primary_host) do
|
|
212
|
+
output = capture(:sudo, '-u', postgres_user, 'pgbackrest', 'info',
|
|
213
|
+
raise_on_non_zero_exit: false).to_s
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
puts '==> Backups (pgBackRest)'
|
|
218
|
+
puts output if output && !output.strip.empty?
|
|
219
|
+
puts
|
|
220
|
+
rescue StandardError => e
|
|
221
|
+
puts "==> Backups (error: #{e.message})"
|
|
222
|
+
puts
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def fetch_system_stats(host, paths)
|
|
226
|
+
output = nil
|
|
227
|
+
with_timeout("system stats #{host}") do
|
|
228
|
+
ssh_executor = executor
|
|
229
|
+
safe_paths = paths.map { |p| Shellwords.escape(p) }.join(' ')
|
|
230
|
+
script = <<~'BASH'
|
|
231
|
+
set -e
|
|
232
|
+
loadavg=$(cut -d' ' -f1-3 /proc/loadavg)
|
|
233
|
+
|
|
234
|
+
read _ user nice system idle iowait irq softirq steal _ _ < /proc/stat
|
|
235
|
+
sleep 0.2
|
|
236
|
+
read _ user2 nice2 system2 idle2 iowait2 irq2 softirq2 steal2 _ _ < /proc/stat
|
|
237
|
+
|
|
238
|
+
total1=$((user + nice + system + idle + iowait + irq + softirq + steal))
|
|
239
|
+
total2=$((user2 + nice2 + system2 + idle2 + iowait2 + irq2 + softirq2 + steal2))
|
|
240
|
+
total=$((total2 - total1))
|
|
241
|
+
idle_delta=$((idle2 + iowait2 - idle - iowait))
|
|
242
|
+
|
|
243
|
+
cpu=0
|
|
244
|
+
if [ "$total" -gt 0 ]; then
|
|
245
|
+
cpu=$(( (100 * (total - idle_delta)) / total ))
|
|
246
|
+
fi
|
|
247
|
+
|
|
248
|
+
mem_total=$(awk '/MemTotal/ {print $2}' /proc/meminfo)
|
|
249
|
+
mem_avail=$(awk '/MemAvailable/ {print $2}' /proc/meminfo)
|
|
250
|
+
|
|
251
|
+
echo "loadavg=${loadavg}"
|
|
252
|
+
echo "cpu=${cpu}"
|
|
253
|
+
echo "mem_total_kb=${mem_total}"
|
|
254
|
+
echo "mem_avail_kb=${mem_avail}"
|
|
255
|
+
BASH
|
|
256
|
+
|
|
257
|
+
ssh_executor.execute_on_host(host) do
|
|
258
|
+
output = capture(:bash, '-lc', "#{script}\n df -kP #{safe_paths} 2>/dev/null | tail -n +2 | " \
|
|
259
|
+
"awk '{print \"disk=\" $6 \"|\" $2 \"|\" $3 \"|\" $4 \"|\" $5}'",
|
|
260
|
+
raise_on_non_zero_exit: false).to_s
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
parse_system_stats(output)
|
|
265
|
+
rescue StandardError
|
|
266
|
+
nil
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def parse_system_stats(output)
|
|
270
|
+
stats = { disks: [] }
|
|
271
|
+
output.to_s.each_line do |line|
|
|
272
|
+
line = line.strip
|
|
273
|
+
next if line.empty?
|
|
274
|
+
|
|
275
|
+
if line.start_with?('loadavg=')
|
|
276
|
+
stats[:loadavg] = line.split('=', 2)[1]
|
|
277
|
+
elsif line.start_with?('cpu=')
|
|
278
|
+
stats[:cpu] = line.split('=', 2)[1].to_i
|
|
279
|
+
elsif line.start_with?('mem_total_kb=')
|
|
280
|
+
stats[:mem_total_kb] = line.split('=', 2)[1].to_i
|
|
281
|
+
elsif line.start_with?('mem_avail_kb=')
|
|
282
|
+
stats[:mem_avail_kb] = line.split('=', 2)[1].to_i
|
|
283
|
+
elsif line.start_with?('disk=')
|
|
284
|
+
_, payload = line.split('=', 2)
|
|
285
|
+
mount, total, used, _avail, pct = payload.split('|')
|
|
286
|
+
stats[:disks] << {
|
|
287
|
+
mount: mount,
|
|
288
|
+
total_kb: total.to_i,
|
|
289
|
+
used_kb: used.to_i,
|
|
290
|
+
use_pct: pct
|
|
291
|
+
}
|
|
292
|
+
end
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
stats[:loadavg] ||= 'n/a'
|
|
296
|
+
stats[:cpu] ||= 0
|
|
297
|
+
stats[:mem_total_kb] ||= 0
|
|
298
|
+
stats[:mem_avail_kb] ||= 0
|
|
299
|
+
stats
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def format_kb(kb)
|
|
303
|
+
kb = kb.to_f
|
|
304
|
+
gb = kb / 1024 / 1024
|
|
305
|
+
return format('%.1fG', gb) if gb >= 1
|
|
306
|
+
|
|
307
|
+
mb = kb / 1024
|
|
308
|
+
format('%.0fM', mb)
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def system_stat_paths
|
|
312
|
+
paths = ['/']
|
|
313
|
+
paths << '/var/lib/postgresql'
|
|
314
|
+
repo_path = pgbackrest_repo_path
|
|
315
|
+
paths << repo_path if repo_path
|
|
316
|
+
paths.compact.uniq
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def pgbackrest_repo_path
|
|
320
|
+
return nil unless config.component_enabled?(:pgbackrest)
|
|
321
|
+
|
|
322
|
+
pg_config = config.component_config(:pgbackrest)
|
|
323
|
+
return pg_config[:repo_path] if pg_config[:repo_path]
|
|
324
|
+
|
|
325
|
+
pg_config[:repo_type].to_s == 'local' ? '/var/lib/pgbackrest' : '/backups'
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def dns_failover_config
|
|
329
|
+
repmgr_config = config.component_config(:repmgr)
|
|
330
|
+
dns_config = repmgr_config[:dns_failover]
|
|
331
|
+
return nil unless dns_config && dns_config[:enabled]
|
|
332
|
+
|
|
333
|
+
dns_config
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def dns_failover_enabled?
|
|
337
|
+
dns_failover_config != nil
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
def overview_timeout
|
|
341
|
+
value = ENV.fetch('ACTIVE_POSTGRES_OVERVIEW_TIMEOUT', DEFAULT_TIMEOUT.to_s).to_i
|
|
342
|
+
value.positive? ? value : DEFAULT_TIMEOUT
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def with_timeout(label)
|
|
346
|
+
Timeout.timeout(overview_timeout) { yield }
|
|
347
|
+
rescue Timeout::Error
|
|
348
|
+
raise StandardError, "#{label} timed out after #{overview_timeout}s"
|
|
349
|
+
end
|
|
350
|
+
end
|
|
351
|
+
end
|
|
@@ -294,18 +294,21 @@ module ActivePostgres
|
|
|
294
294
|
SSHKit.config.format = :pretty
|
|
295
295
|
end
|
|
296
296
|
|
|
297
|
-
return unless File.exist?(config.ssh_key)
|
|
298
|
-
|
|
299
297
|
SSHKit::Backend::Netssh.configure do |ssh|
|
|
300
|
-
|
|
301
|
-
keys: [config.ssh_key],
|
|
302
|
-
keys_only: true,
|
|
298
|
+
options = {
|
|
303
299
|
forward_agent: false,
|
|
304
300
|
auth_methods: ['publickey'],
|
|
305
301
|
verify_host_key: config.ssh_host_key_verification || :always,
|
|
306
302
|
timeout: 10,
|
|
307
303
|
number_of_password_prompts: 0
|
|
308
304
|
}
|
|
305
|
+
|
|
306
|
+
if config.ssh_key && File.exist?(config.ssh_key)
|
|
307
|
+
options[:keys] = [config.ssh_key]
|
|
308
|
+
options[:keys_only] = true
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
ssh.ssh_options = options
|
|
309
312
|
end
|
|
310
313
|
end
|
|
311
314
|
end
|
data/lib/active_postgres.rb
CHANGED
|
@@ -23,6 +23,7 @@ require_relative 'active_postgres/installer'
|
|
|
23
23
|
require_relative 'active_postgres/ssh_executor'
|
|
24
24
|
require_relative 'active_postgres/direct_executor'
|
|
25
25
|
require_relative 'active_postgres/health_checker'
|
|
26
|
+
require_relative 'active_postgres/overview'
|
|
26
27
|
require_relative 'active_postgres/failover'
|
|
27
28
|
require_relative 'active_postgres/performance_tuner'
|
|
28
29
|
require_relative 'active_postgres/connection_pooler'
|
data/lib/tasks/postgres.rake
CHANGED
|
@@ -150,6 +150,15 @@ namespace :postgres do
|
|
|
150
150
|
health_checker.show_status
|
|
151
151
|
end
|
|
152
152
|
|
|
153
|
+
desc 'Show control tower overview'
|
|
154
|
+
task overview: :environment do
|
|
155
|
+
require 'active_postgres'
|
|
156
|
+
|
|
157
|
+
config = ActivePostgres::Configuration.load
|
|
158
|
+
overview = ActivePostgres::Overview.new(config)
|
|
159
|
+
overview.show
|
|
160
|
+
end
|
|
161
|
+
|
|
153
162
|
desc 'Visualize cluster nodes and topology'
|
|
154
163
|
task nodes: :environment do
|
|
155
164
|
require 'active_postgres'
|
|
@@ -270,6 +279,50 @@ namespace :postgres do
|
|
|
270
279
|
puts "\n#{'=' * 80}\n"
|
|
271
280
|
end
|
|
272
281
|
|
|
282
|
+
desc 'Show help for PostgreSQL rake tasks'
|
|
283
|
+
task help: :environment do
|
|
284
|
+
puts "\nPostgreSQL Rake Tasks"
|
|
285
|
+
puts '=' * 70
|
|
286
|
+
puts "\nTip: set RAILS_ENV=production for production targets"
|
|
287
|
+
|
|
288
|
+
puts "\nSetup & Maintenance"
|
|
289
|
+
puts " rake postgres:setup # Deploy HA cluster (CLEAN=true for fresh install)"
|
|
290
|
+
puts " rake postgres:purge # Destroy cluster (DESTRUCTIVE)"
|
|
291
|
+
puts " rake postgres:setup:core # PostgreSQL config (postgresql.conf, pg_hba.conf)"
|
|
292
|
+
puts " rake postgres:setup:repmgr # HA + failover (repmgr)"
|
|
293
|
+
puts " rake postgres:setup:pgbouncer # PgBouncer pooling"
|
|
294
|
+
puts " rake postgres:setup:pgbackrest # Backups (pgBackRest)"
|
|
295
|
+
puts " rake postgres:setup:monitoring # postgres_exporter"
|
|
296
|
+
puts " rake postgres:setup:ssl # SSL certs"
|
|
297
|
+
|
|
298
|
+
puts "\nStatus & Health"
|
|
299
|
+
puts " rake postgres:status # Cluster status (SSH by default)"
|
|
300
|
+
puts " rake postgres:overview # Control tower overview"
|
|
301
|
+
puts " rake postgres:nodes # Topology view"
|
|
302
|
+
puts " rake postgres:verify # Comprehensive checklist"
|
|
303
|
+
puts " ACTIVE_POSTGRES_STATUS_MODE=ssh|direct rake postgres:status"
|
|
304
|
+
|
|
305
|
+
puts "\nBackups"
|
|
306
|
+
puts " rake postgres:backup:full # Full backup"
|
|
307
|
+
puts " rake postgres:backup:incremental # Incremental backup"
|
|
308
|
+
puts " rake postgres:backup:list # List backups"
|
|
309
|
+
puts " rake postgres:backup:restore[ID] # Restore backup set"
|
|
310
|
+
puts " rake postgres:backup:restore_at[\"YYYY-MM-DD HH:MM:SS\",promote] # PITR"
|
|
311
|
+
|
|
312
|
+
puts "\nMigrations"
|
|
313
|
+
puts " rake postgres:migrate # Run migrations on primary only"
|
|
314
|
+
|
|
315
|
+
puts "\nPgBouncer"
|
|
316
|
+
puts " rake postgres:pgbouncer:update_userlist[users] # Update userlist"
|
|
317
|
+
puts " rake postgres:pgbouncer:stats # Service status + stats"
|
|
318
|
+
|
|
319
|
+
puts "\nTests"
|
|
320
|
+
puts " rake postgres:test:replication[rows] # Replication stress test"
|
|
321
|
+
puts " rake postgres:test:pgbouncer[connections] # PgBouncer load test"
|
|
322
|
+
|
|
323
|
+
puts
|
|
324
|
+
end
|
|
325
|
+
|
|
273
326
|
desc 'Promote standby to primary'
|
|
274
327
|
task :promote, [:host] => :environment do |_t, args|
|
|
275
328
|
require 'active_postgres'
|
|
@@ -325,6 +378,20 @@ namespace :postgres do
|
|
|
325
378
|
installer.run_restore(args[:backup_id])
|
|
326
379
|
end
|
|
327
380
|
|
|
381
|
+
desc 'Restore to a point in time (PITR)'
|
|
382
|
+
task :restore_at, [:target_time, :target_action] => :environment do |_t, args|
|
|
383
|
+
require 'active_postgres'
|
|
384
|
+
|
|
385
|
+
unless args[:target_time]
|
|
386
|
+
puts 'Usage: rake postgres:backup:restore_at["2026-01-29 01:15:00",promote]'
|
|
387
|
+
exit 1
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
config = ActivePostgres::Configuration.load
|
|
391
|
+
installer = ActivePostgres::Installer.new(config)
|
|
392
|
+
installer.run_restore_at(args[:target_time], target_action: args[:target_action] || 'promote')
|
|
393
|
+
end
|
|
394
|
+
|
|
328
395
|
desc 'List available backups'
|
|
329
396
|
task list: :environment do
|
|
330
397
|
require 'active_postgres'
|
data/templates/repmgr.conf.erb
CHANGED
|
@@ -30,6 +30,9 @@ follow_command='repmgr standby follow -f /etc/repmgr.conf --upstream-node-id=%n'
|
|
|
30
30
|
|
|
31
31
|
reconnect_attempts=<%= repmgr_config[:reconnect_attempts] || 6 %>
|
|
32
32
|
reconnect_interval=<%= repmgr_config[:reconnect_interval] || 10 %>
|
|
33
|
+
<% if repmgr_config.key?(:use_rewind) %>
|
|
34
|
+
use_rewind=<%= repmgr_config[:use_rewind] ? 'yes' : 'no' %>
|
|
35
|
+
<% end %>
|
|
33
36
|
|
|
34
37
|
log_level=INFO
|
|
35
38
|
log_facility=STDERR
|
|
@@ -6,8 +6,8 @@ DNS_USER="<%= dns_user %>"
|
|
|
6
6
|
DNS_SERVERS=(<%= dns_servers.map(&:to_s).join(' ') %>)
|
|
7
7
|
DNS_SSH_KEY="<%= dns_ssh_key_path %>"
|
|
8
8
|
DNSMASQ_FILE="/etc/dnsmasq.d/active_postgres.conf"
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
PRIMARY_RECORDS=(<%= primary_records.map(&:to_s).join(' ') %>)
|
|
10
|
+
REPLICA_RECORDS=(<%= replica_records.map(&:to_s).join(' ') %>)
|
|
11
11
|
SSH_STRICT_HOST_KEY="<%= ssh_strict_host_key %>"
|
|
12
12
|
SSH_KNOWN_HOSTS="/var/lib/postgresql/.ssh/known_hosts"
|
|
13
13
|
LOG_TAG="active_postgres_dns"
|
|
@@ -25,11 +25,21 @@ if [[ -z "$primary_host" ]]; then
|
|
|
25
25
|
fi
|
|
26
26
|
|
|
27
27
|
content=$'# Managed by active_postgres\n'
|
|
28
|
-
|
|
28
|
+
for record in "${PRIMARY_RECORDS[@]}"; do
|
|
29
|
+
if [[ -n "$record" ]]; then
|
|
30
|
+
printf -v content '%saddress=/%s/%s\n' "$content" "$record" "$primary_host"
|
|
31
|
+
fi
|
|
32
|
+
done
|
|
29
33
|
|
|
30
34
|
if [[ -n "$standby_hosts" ]]; then
|
|
31
|
-
for
|
|
32
|
-
|
|
35
|
+
for record in "${REPLICA_RECORDS[@]}"; do
|
|
36
|
+
if [[ -z "$record" ]]; then
|
|
37
|
+
continue
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
for host in $standby_hosts; do
|
|
41
|
+
printf -v content '%saddress=/%s/%s\n' "$content" "$record" "$host"
|
|
42
|
+
done
|
|
33
43
|
done
|
|
34
44
|
fi
|
|
35
45
|
|
|
@@ -46,4 +56,4 @@ for server in "${DNS_SERVERS[@]}"; do
|
|
|
46
56
|
fi
|
|
47
57
|
done
|
|
48
58
|
|
|
49
|
-
/usr/bin/logger -t "$LOG_TAG" "Updated DNS records for ${
|
|
59
|
+
/usr/bin/logger -t "$LOG_TAG" "Updated DNS records for ${PRIMARY_RECORDS[*]} and ${REPLICA_RECORDS[*]} (primary=${primary_host})"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: active_postgres
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.9.
|
|
4
|
+
version: 0.9.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- BoringCache
|
|
@@ -179,6 +179,7 @@ files:
|
|
|
179
179
|
- lib/active_postgres/installer.rb
|
|
180
180
|
- lib/active_postgres/log_sanitizer.rb
|
|
181
181
|
- lib/active_postgres/logger.rb
|
|
182
|
+
- lib/active_postgres/overview.rb
|
|
182
183
|
- lib/active_postgres/performance_tuner.rb
|
|
183
184
|
- lib/active_postgres/rails/database_config.rb
|
|
184
185
|
- lib/active_postgres/rails/migration_guard.rb
|