@intentsolutionsio/fairdb-operations-kit 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +26 -0
- package/LICENSE +21 -0
- package/README.md +298 -0
- package/agents/fairdb-automation-agent.md +307 -0
- package/commands/fairdb-emergency-response.md +480 -0
- package/commands/fairdb-health-check.md +459 -0
- package/commands/fairdb-onboard-customer.md +446 -0
- package/commands/fairdb-setup-backup.md +420 -0
- package/package.json +48 -0
- package/skills/fairdb-backup-manager/SKILL.md +72 -0
- package/skills/fairdb-backup-manager/assets/README.md +26 -0
- package/skills/fairdb-backup-manager/references/README.md +26 -0
- package/skills/fairdb-backup-manager/scripts/README.md +24 -0
- package/skills/skill-adapter/assets/README.md +4 -0
- package/skills/skill-adapter/assets/config-template.json +32 -0
- package/skills/skill-adapter/assets/skill-schema.json +28 -0
- package/skills/skill-adapter/assets/test-data.json +27 -0
- package/skills/skill-adapter/references/README.md +4 -0
- package/skills/skill-adapter/references/best-practices.md +69 -0
- package/skills/skill-adapter/references/examples.md +73 -0
- package/skills/skill-adapter/scripts/README.md +10 -0
- package/skills/skill-adapter/scripts/helper-template.sh +42 -0
- package/skills/skill-adapter/scripts/validation.sh +32 -0
|
@@ -0,0 +1,459 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: fairdb-health-check
|
|
3
|
+
description: Comprehensive health check for FairDB PostgreSQL infrastructure
|
|
4
|
+
model: sonnet
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# FairDB System Health Check
|
|
8
|
+
|
|
9
|
+
Perform a comprehensive health check of the FairDB PostgreSQL infrastructure including server resources, database status, backup integrity, and customer databases.
|
|
10
|
+
|
|
11
|
+
## System Health Overview
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
#!/bin/bash
|
|
15
|
+
# FairDB Comprehensive Health Check
|
|
16
|
+
|
|
17
|
+
echo "================================================"
|
|
18
|
+
echo " FairDB System Health Check"
|
|
19
|
+
echo " $(date '+%Y-%m-%d %H:%M:%S')"
|
|
20
|
+
echo "================================================"
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Step 1: Server Resources Check
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
echo -e "\n[1/10] SERVER RESOURCES"
|
|
27
|
+
echo "------------------------"
|
|
28
|
+
|
|
29
|
+
# CPU Usage
|
|
30
|
+
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
|
|
31
|
+
echo "CPU Usage: ${CPU_USAGE}%"
|
|
32
|
+
if (( $(echo "$CPU_USAGE > 80" | bc -l) )); then
|
|
33
|
+
echo "⚠️ WARNING: High CPU usage detected"
|
|
34
|
+
fi
|
|
35
|
+
|
|
36
|
+
# Memory Usage
|
|
37
|
+
MEM_INFO=$(free -m | awk 'NR==2{printf "Memory: %s/%sMB (%.2f%%)\n", $3,$2,$3*100/$2 }')
|
|
38
|
+
echo "$MEM_INFO"
|
|
39
|
+
MEM_PERCENT=$(free | grep Mem | awk '{print $3/$2 * 100.0}')
|
|
40
|
+
if (( $(echo "$MEM_PERCENT > 90" | bc -l) )); then
|
|
41
|
+
echo "⚠️ WARNING: High memory usage detected"
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
# Disk Usage
|
|
45
|
+
echo "Disk Usage:"
|
|
46
|
+
df -h | grep -E '^/dev/' | while read line; do
|
|
47
|
+
USAGE=$(echo $line | awk '{print $5}' | sed 's/%//')
|
|
48
|
+
MOUNT=$(echo $line | awk '{print $6}')
|
|
49
|
+
echo " $MOUNT: $line"
|
|
50
|
+
if [ $USAGE -gt 85 ]; then
|
|
51
|
+
echo " ⚠️ WARNING: Disk space critical on $MOUNT"
|
|
52
|
+
fi
|
|
53
|
+
done
|
|
54
|
+
|
|
55
|
+
# Load Average
|
|
56
|
+
LOAD=$(uptime | awk -F'load average:' '{print $2}')
|
|
57
|
+
echo "Load Average:$LOAD"
|
|
58
|
+
CORES=$(nproc)
|
|
59
|
+
LOAD_1=$(echo $LOAD | cut -d, -f1 | tr -d ' ')
|
|
60
|
+
if (( $(echo "$LOAD_1 > $CORES" | bc -l) )); then
|
|
61
|
+
echo "⚠️ WARNING: High load average detected"
|
|
62
|
+
fi
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Step 2: PostgreSQL Service Status
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
echo -e "\n[2/10] POSTGRESQL SERVICE"
|
|
69
|
+
echo "-------------------------"
|
|
70
|
+
|
|
71
|
+
# Check if PostgreSQL is running
|
|
72
|
+
if systemctl is-active --quiet postgresql; then
|
|
73
|
+
echo "✅ PostgreSQL service: RUNNING"
|
|
74
|
+
|
|
75
|
+
# Get version and uptime
|
|
76
|
+
sudo -u postgres psql -t -c "SELECT version();" | head -1
|
|
77
|
+
|
|
78
|
+
UPTIME=$(sudo -u postgres psql -t -c "
|
|
79
|
+
SELECT now() - pg_postmaster_start_time() as uptime;")
|
|
80
|
+
echo "Uptime: $UPTIME"
|
|
81
|
+
else
|
|
82
|
+
echo "❌ CRITICAL: PostgreSQL service is NOT running!"
|
|
83
|
+
echo "Attempting to start..."
|
|
84
|
+
sudo systemctl start postgresql
|
|
85
|
+
sleep 5
|
|
86
|
+
if systemctl is-active --quiet postgresql; then
|
|
87
|
+
echo "✅ Service restarted successfully"
|
|
88
|
+
else
|
|
89
|
+
echo "❌ Failed to start PostgreSQL - manual intervention required!"
|
|
90
|
+
exit 1
|
|
91
|
+
fi
|
|
92
|
+
fi
|
|
93
|
+
|
|
94
|
+
# Check PostgreSQL cluster status
|
|
95
|
+
sudo pg_lsclusters
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Step 3: Database Connections
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
echo -e "\n[3/10] DATABASE CONNECTIONS"
|
|
102
|
+
echo "---------------------------"
|
|
103
|
+
|
|
104
|
+
# Connection statistics
|
|
105
|
+
sudo -u postgres psql -t << EOF
|
|
106
|
+
SELECT
|
|
107
|
+
'Total Connections: ' || count(*) || '/' || setting AS connection_info
|
|
108
|
+
FROM pg_stat_activity, pg_settings
|
|
109
|
+
WHERE pg_settings.name = 'max_connections'
|
|
110
|
+
GROUP BY setting;
|
|
111
|
+
EOF
|
|
112
|
+
|
|
113
|
+
# Connections by database
|
|
114
|
+
echo -e "\nConnections by database:"
|
|
115
|
+
sudo -u postgres psql -t -c "
|
|
116
|
+
SELECT datname, count(*) as connections
|
|
117
|
+
FROM pg_stat_activity
|
|
118
|
+
GROUP BY datname
|
|
119
|
+
ORDER BY connections DESC;"
|
|
120
|
+
|
|
121
|
+
# Connections by user
|
|
122
|
+
echo -e "\nConnections by user:"
|
|
123
|
+
sudo -u postgres psql -t -c "
|
|
124
|
+
SELECT usename, count(*) as connections
|
|
125
|
+
FROM pg_stat_activity
|
|
126
|
+
GROUP BY usename
|
|
127
|
+
ORDER BY connections DESC;"
|
|
128
|
+
|
|
129
|
+
# Check for idle connections
|
|
130
|
+
IDLE_COUNT=$(sudo -u postgres psql -t -c "
|
|
131
|
+
SELECT count(*)
|
|
132
|
+
FROM pg_stat_activity
|
|
133
|
+
WHERE state = 'idle'
|
|
134
|
+
AND state_change < NOW() - INTERVAL '10 minutes';")
|
|
135
|
+
|
|
136
|
+
if [ $IDLE_COUNT -gt 10 ]; then
|
|
137
|
+
echo "⚠️ WARNING: $IDLE_COUNT idle connections older than 10 minutes"
|
|
138
|
+
fi
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Step 4: Database Performance Metrics
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
echo -e "\n[4/10] PERFORMANCE METRICS"
|
|
145
|
+
echo "--------------------------"
|
|
146
|
+
|
|
147
|
+
# Cache hit ratio
|
|
148
|
+
sudo -u postgres psql -t << 'EOF'
|
|
149
|
+
SELECT
|
|
150
|
+
'Cache Hit Ratio: ' ||
|
|
151
|
+
ROUND(100.0 * sum(heap_blks_hit) /
|
|
152
|
+
NULLIF(sum(heap_blks_hit) + sum(heap_blks_read), 0), 2) || '%'
|
|
153
|
+
FROM pg_statio_user_tables;
|
|
154
|
+
EOF
|
|
155
|
+
|
|
156
|
+
# Transaction statistics
|
|
157
|
+
sudo -u postgres psql -t -c "
|
|
158
|
+
SELECT
|
|
159
|
+
'Transactions: ' || xact_commit || ' commits, ' ||
|
|
160
|
+
xact_rollback || ' rollbacks, ' ||
|
|
161
|
+
ROUND(100.0 * xact_rollback / NULLIF(xact_commit + xact_rollback, 0), 2) || '% rollback rate'
|
|
162
|
+
FROM pg_stat_database
|
|
163
|
+
WHERE datname = 'postgres';"
|
|
164
|
+
|
|
165
|
+
# Longest running queries
|
|
166
|
+
echo -e "\nLong-running queries (>1 minute):"
|
|
167
|
+
sudo -u postgres psql -t -c "
|
|
168
|
+
SELECT pid, now() - query_start as duration,
|
|
169
|
+
LEFT(query, 50) as query_preview
|
|
170
|
+
FROM pg_stat_activity
|
|
171
|
+
WHERE state = 'active'
|
|
172
|
+
AND now() - query_start > interval '1 minute'
|
|
173
|
+
ORDER BY duration DESC
|
|
174
|
+
LIMIT 5;"
|
|
175
|
+
|
|
176
|
+
# Table bloat check
|
|
177
|
+
echo -e "\nTable bloat (top 5):"
|
|
178
|
+
sudo -u postgres psql -t << 'EOF'
|
|
179
|
+
SELECT
|
|
180
|
+
schemaname || '.' || tablename AS table,
|
|
181
|
+
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size,
|
|
182
|
+
ROUND(100 * pg_total_relation_size(schemaname||'.'||tablename) /
|
|
183
|
+
NULLIF(sum(pg_total_relation_size(schemaname||'.'||tablename))
|
|
184
|
+
OVER (), 0), 2) AS percentage
|
|
185
|
+
FROM pg_tables
|
|
186
|
+
WHERE schemaname NOT IN ('pg_catalog', 'information_schema')
|
|
187
|
+
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
|
|
188
|
+
LIMIT 5;
|
|
189
|
+
EOF
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## Step 5: Backup Status
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
echo -e "\n[5/10] BACKUP STATUS"
|
|
196
|
+
echo "--------------------"
|
|
197
|
+
|
|
198
|
+
# Check pgBackRest status
|
|
199
|
+
if command -v pgbackrest &> /dev/null; then
|
|
200
|
+
echo "pgBackRest Status:"
|
|
201
|
+
|
|
202
|
+
# Get all stanzas
|
|
203
|
+
STANZAS=$(sudo -u postgres pgbackrest info --output=json 2>/dev/null | jq -r '.[].name' 2>/dev/null)
|
|
204
|
+
|
|
205
|
+
if [ -z "$STANZAS" ]; then
|
|
206
|
+
echo "⚠️ WARNING: No backup stanzas configured"
|
|
207
|
+
else
|
|
208
|
+
for STANZA in $STANZAS; do
|
|
209
|
+
echo -e "\nStanza: $STANZA"
|
|
210
|
+
|
|
211
|
+
# Get last backup info
|
|
212
|
+
LAST_BACKUP=$(sudo -u postgres pgbackrest --stanza=$STANZA info --output=json 2>/dev/null | \
|
|
213
|
+
jq -r '.[] | select(.name=="'$STANZA'") | .backup[-1].timestamp.stop' 2>/dev/null)
|
|
214
|
+
|
|
215
|
+
if [ ! -z "$LAST_BACKUP" ]; then
|
|
216
|
+
echo " Last backup: $LAST_BACKUP"
|
|
217
|
+
|
|
218
|
+
# Calculate age in hours
|
|
219
|
+
BACKUP_AGE=$(( ($(date +%s) - $(date -d "$LAST_BACKUP" +%s)) / 3600 ))
|
|
220
|
+
|
|
221
|
+
if [ $BACKUP_AGE -gt 25 ]; then
|
|
222
|
+
echo " ⚠️ WARNING: Last backup is $BACKUP_AGE hours old"
|
|
223
|
+
else
|
|
224
|
+
echo " ✅ Backup is current ($BACKUP_AGE hours old)"
|
|
225
|
+
fi
|
|
226
|
+
else
|
|
227
|
+
echo " ❌ ERROR: No backups found for this stanza"
|
|
228
|
+
fi
|
|
229
|
+
done
|
|
230
|
+
fi
|
|
231
|
+
else
|
|
232
|
+
echo "❌ ERROR: pgBackRest is not installed"
|
|
233
|
+
fi
|
|
234
|
+
|
|
235
|
+
# Check WAL archiving
|
|
236
|
+
WAL_STATUS=$(sudo -u postgres psql -t -c "SHOW archive_mode;")
|
|
237
|
+
echo -e "\nWAL Archiving: $WAL_STATUS"
|
|
238
|
+
|
|
239
|
+
if [ "$WAL_STATUS" = " on" ]; then
|
|
240
|
+
LAST_ARCHIVED=$(sudo -u postgres psql -t -c "
|
|
241
|
+
SELECT age(now(), last_archived_time)
|
|
242
|
+
FROM pg_stat_archiver;")
|
|
243
|
+
echo "Last WAL archived: $LAST_ARCHIVED ago"
|
|
244
|
+
fi
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
## Step 6: Replication Status
|
|
248
|
+
|
|
249
|
+
```bash
|
|
250
|
+
echo -e "\n[6/10] REPLICATION STATUS"
|
|
251
|
+
echo "-------------------------"
|
|
252
|
+
|
|
253
|
+
# Check if this is a primary or replica
|
|
254
|
+
IS_PRIMARY=$(sudo -u postgres psql -t -c "SELECT pg_is_in_recovery();")
|
|
255
|
+
|
|
256
|
+
if [ "$IS_PRIMARY" = " f" ]; then
|
|
257
|
+
echo "Role: PRIMARY"
|
|
258
|
+
|
|
259
|
+
# Check replication slots
|
|
260
|
+
REP_SLOTS=$(sudo -u postgres psql -t -c "
|
|
261
|
+
SELECT count(*) FROM pg_replication_slots WHERE active = true;")
|
|
262
|
+
echo "Active replication slots: $REP_SLOTS"
|
|
263
|
+
|
|
264
|
+
# Check connected replicas
|
|
265
|
+
sudo -u postgres psql -t -c "
|
|
266
|
+
SELECT client_addr, state, sync_state,
|
|
267
|
+
pg_size_pretty(pg_wal_lsn_diff(sent_lsn, replay_lsn)) as lag
|
|
268
|
+
FROM pg_stat_replication;" 2>/dev/null
|
|
269
|
+
else
|
|
270
|
+
echo "Role: REPLICA"
|
|
271
|
+
|
|
272
|
+
# Check replication lag
|
|
273
|
+
LAG=$(sudo -u postgres psql -t -c "
|
|
274
|
+
SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp())) AS lag;")
|
|
275
|
+
echo "Replication lag: ${LAG} seconds"
|
|
276
|
+
|
|
277
|
+
if (( $(echo "$LAG > 60" | bc -l) )); then
|
|
278
|
+
echo "⚠️ WARNING: High replication lag detected"
|
|
279
|
+
fi
|
|
280
|
+
fi
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
## Step 7: Security Audit
|
|
284
|
+
|
|
285
|
+
```bash
|
|
286
|
+
echo -e "\n[7/10] SECURITY AUDIT"
|
|
287
|
+
echo "---------------------"
|
|
288
|
+
|
|
289
|
+
# Check for default passwords
|
|
290
|
+
echo "Checking for common issues..."
|
|
291
|
+
|
|
292
|
+
# SSL status
|
|
293
|
+
SSL_STATUS=$(sudo -u postgres psql -t -c "SHOW ssl;")
|
|
294
|
+
echo "SSL: $SSL_STATUS"
|
|
295
|
+
if [ "$SSL_STATUS" != " on" ]; then
|
|
296
|
+
echo "⚠️ WARNING: SSL is not enabled"
|
|
297
|
+
fi
|
|
298
|
+
|
|
299
|
+
# Check for users without passwords
|
|
300
|
+
NO_PASS=$(sudo -u postgres psql -t -c "
|
|
301
|
+
SELECT count(*) FROM pg_shadow WHERE passwd IS NULL;")
|
|
302
|
+
if [ $NO_PASS -gt 0 ]; then
|
|
303
|
+
echo "⚠️ WARNING: $NO_PASS users without passwords"
|
|
304
|
+
fi
|
|
305
|
+
|
|
306
|
+
# Check firewall status
|
|
307
|
+
if sudo ufw status | grep -q "Status: active"; then
|
|
308
|
+
echo "✅ Firewall: ACTIVE"
|
|
309
|
+
else
|
|
310
|
+
echo "⚠️ WARNING: Firewall is not active"
|
|
311
|
+
fi
|
|
312
|
+
|
|
313
|
+
# Check fail2ban status
|
|
314
|
+
if systemctl is-active --quiet fail2ban; then
|
|
315
|
+
echo "✅ Fail2ban: RUNNING"
|
|
316
|
+
JAIL_STATUS=$(sudo fail2ban-client status postgresql 2>/dev/null | grep "Currently banned" || echo "Jail not configured")
|
|
317
|
+
echo " PostgreSQL jail: $JAIL_STATUS"
|
|
318
|
+
else
|
|
319
|
+
echo "⚠️ WARNING: Fail2ban is not running"
|
|
320
|
+
fi
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
## Step 8: Customer Database Health
|
|
324
|
+
|
|
325
|
+
```bash
|
|
326
|
+
echo -e "\n[8/10] CUSTOMER DATABASES"
|
|
327
|
+
echo "-------------------------"
|
|
328
|
+
|
|
329
|
+
# Check each customer database
|
|
330
|
+
CUSTOMER_DBS=$(sudo -u postgres psql -t -c "
|
|
331
|
+
SELECT datname FROM pg_database
|
|
332
|
+
WHERE datname NOT IN ('postgres', 'template0', 'template1')
|
|
333
|
+
ORDER BY datname;")
|
|
334
|
+
|
|
335
|
+
for DB in $CUSTOMER_DBS; do
|
|
336
|
+
echo -e "\nDatabase: $DB"
|
|
337
|
+
|
|
338
|
+
# Size
|
|
339
|
+
SIZE=$(sudo -u postgres psql -t -c "
|
|
340
|
+
SELECT pg_size_pretty(pg_database_size('$DB'));")
|
|
341
|
+
echo " Size: $SIZE"
|
|
342
|
+
|
|
343
|
+
# Connection count
|
|
344
|
+
CONN=$(sudo -u postgres psql -t -c "
|
|
345
|
+
SELECT count(*) FROM pg_stat_activity WHERE datname = '$DB';")
|
|
346
|
+
echo " Connections: $CONN"
|
|
347
|
+
|
|
348
|
+
# Transaction rate
|
|
349
|
+
TPS=$(sudo -u postgres psql -t -c "
|
|
350
|
+
SELECT xact_commit + xact_rollback as transactions
|
|
351
|
+
FROM pg_stat_database WHERE datname = '$DB';")
|
|
352
|
+
echo " Total transactions: $TPS"
|
|
353
|
+
|
|
354
|
+
# Check for locks
|
|
355
|
+
LOCKS=$(sudo -u postgres psql -t -d $DB -c "
|
|
356
|
+
SELECT count(*) FROM pg_locks WHERE granted = false;")
|
|
357
|
+
if [ $LOCKS -gt 0 ]; then
|
|
358
|
+
echo " ⚠️ WARNING: $LOCKS blocked locks detected"
|
|
359
|
+
fi
|
|
360
|
+
done
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
## Step 9: System Logs Analysis
|
|
364
|
+
|
|
365
|
+
```bash
|
|
366
|
+
echo -e "\n[9/10] LOG ANALYSIS"
|
|
367
|
+
echo "-------------------"
|
|
368
|
+
|
|
369
|
+
# Check PostgreSQL logs for errors
|
|
370
|
+
LOG_DIR="/var/log/postgresql"
|
|
371
|
+
if [ -d "$LOG_DIR" ]; then
|
|
372
|
+
echo "Recent PostgreSQL errors (last 24 hours):"
|
|
373
|
+
find $LOG_DIR -name "*.log" -mtime -1 -exec grep -i "error\|fatal\|panic" {} \; | \
|
|
374
|
+
tail -10 | head -5
|
|
375
|
+
|
|
376
|
+
ERROR_COUNT=$(find $LOG_DIR -name "*.log" -mtime -1 -exec grep -i "error\|fatal\|panic" {} \; | wc -l)
|
|
377
|
+
echo "Total errors in last 24 hours: $ERROR_COUNT"
|
|
378
|
+
|
|
379
|
+
if [ $ERROR_COUNT -gt 100 ]; then
|
|
380
|
+
echo "⚠️ WARNING: High error rate detected"
|
|
381
|
+
fi
|
|
382
|
+
fi
|
|
383
|
+
|
|
384
|
+
# Check system logs
|
|
385
|
+
echo -e "\nRecent system issues:"
|
|
386
|
+
sudo journalctl -p err -since "24 hours ago" --no-pager | tail -5
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
## Step 10: Recommendations
|
|
390
|
+
|
|
391
|
+
```bash
|
|
392
|
+
echo -e "\n[10/10] HEALTH SUMMARY & RECOMMENDATIONS"
|
|
393
|
+
echo "========================================="
|
|
394
|
+
|
|
395
|
+
# Collect all warnings
|
|
396
|
+
WARNINGS=0
|
|
397
|
+
CRITICAL=0
|
|
398
|
+
|
|
399
|
+
# Generate recommendations based on findings
|
|
400
|
+
echo -e "\nRecommendations:"
|
|
401
|
+
|
|
402
|
+
# Check if vacuum is needed
|
|
403
|
+
LAST_VACUUM=$(sudo -u postgres psql -t -c "
|
|
404
|
+
SELECT MAX(last_autovacuum) FROM pg_stat_user_tables;")
|
|
405
|
+
echo "- Last autovacuum: $LAST_VACUUM"
|
|
406
|
+
|
|
407
|
+
# Check if analyze is needed
|
|
408
|
+
LAST_ANALYZE=$(sudo -u postgres psql -t -c "
|
|
409
|
+
SELECT MAX(last_autoanalyze) FROM pg_stat_user_tables;")
|
|
410
|
+
echo "- Last autoanalyze: $LAST_ANALYZE"
|
|
411
|
+
|
|
412
|
+
# Generate overall health score
|
|
413
|
+
echo -e "\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
414
|
+
if [ $CRITICAL -eq 0 ] && [ $WARNINGS -lt 3 ]; then
|
|
415
|
+
echo "✅ OVERALL HEALTH: GOOD"
|
|
416
|
+
elif [ $CRITICAL -eq 0 ] && [ $WARNINGS -lt 10 ]; then
|
|
417
|
+
echo "⚠️ OVERALL HEALTH: FAIR - Review warnings"
|
|
418
|
+
else
|
|
419
|
+
echo "❌ OVERALL HEALTH: POOR - Immediate action required"
|
|
420
|
+
fi
|
|
421
|
+
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
422
|
+
|
|
423
|
+
# Save report
|
|
424
|
+
REPORT_FILE="/opt/fairdb/logs/health-check-$(date +%Y%m%d-%H%M%S).log"
|
|
425
|
+
echo -e "\nFull report saved to: $REPORT_FILE"
|
|
426
|
+
```
|
|
427
|
+
|
|
428
|
+
## Actions Based on Results
|
|
429
|
+
|
|
430
|
+
### If Critical Issues Found:
|
|
431
|
+
1. Check PostgreSQL service status
|
|
432
|
+
2. Review disk space availability
|
|
433
|
+
3. Verify backup integrity
|
|
434
|
+
4. Check for data corruption
|
|
435
|
+
5. Review security vulnerabilities
|
|
436
|
+
|
|
437
|
+
### If Warnings Found:
|
|
438
|
+
1. Schedule maintenance window
|
|
439
|
+
2. Plan capacity upgrades
|
|
440
|
+
3. Review query performance
|
|
441
|
+
4. Update monitoring thresholds
|
|
442
|
+
5. Document issues for trending
|
|
443
|
+
|
|
444
|
+
### Regular Maintenance Tasks:
|
|
445
|
+
1. Run VACUUM ANALYZE on large tables
|
|
446
|
+
2. Update table statistics
|
|
447
|
+
3. Review and optimize slow queries
|
|
448
|
+
4. Clean up old logs
|
|
449
|
+
5. Test backup restoration
|
|
450
|
+
|
|
451
|
+
## Schedule Next Health Check
|
|
452
|
+
|
|
453
|
+
```bash
|
|
454
|
+
# Schedule regular health checks
|
|
455
|
+
echo "30 */6 * * * root /usr/local/bin/fairdb-health-check > /dev/null 2>&1" | \
|
|
456
|
+
sudo tee /etc/cron.d/fairdb-health-check
|
|
457
|
+
|
|
458
|
+
echo "Health checks scheduled every 6 hours"
|
|
459
|
+
```
|