mcp-ssh-vps 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_ssh_vps-0.4.1.dist-info/METADATA +482 -0
- mcp_ssh_vps-0.4.1.dist-info/RECORD +47 -0
- mcp_ssh_vps-0.4.1.dist-info/WHEEL +5 -0
- mcp_ssh_vps-0.4.1.dist-info/entry_points.txt +4 -0
- mcp_ssh_vps-0.4.1.dist-info/licenses/LICENSE +21 -0
- mcp_ssh_vps-0.4.1.dist-info/top_level.txt +1 -0
- sshmcp/__init__.py +3 -0
- sshmcp/cli.py +473 -0
- sshmcp/config.py +155 -0
- sshmcp/core/__init__.py +5 -0
- sshmcp/core/container.py +291 -0
- sshmcp/models/__init__.py +15 -0
- sshmcp/models/command.py +69 -0
- sshmcp/models/file.py +102 -0
- sshmcp/models/machine.py +139 -0
- sshmcp/monitoring/__init__.py +0 -0
- sshmcp/monitoring/alerts.py +464 -0
- sshmcp/prompts/__init__.py +7 -0
- sshmcp/prompts/backup.py +151 -0
- sshmcp/prompts/deploy.py +115 -0
- sshmcp/prompts/monitor.py +146 -0
- sshmcp/resources/__init__.py +7 -0
- sshmcp/resources/logs.py +99 -0
- sshmcp/resources/metrics.py +204 -0
- sshmcp/resources/status.py +160 -0
- sshmcp/security/__init__.py +7 -0
- sshmcp/security/audit.py +314 -0
- sshmcp/security/rate_limiter.py +221 -0
- sshmcp/security/totp.py +392 -0
- sshmcp/security/validator.py +234 -0
- sshmcp/security/whitelist.py +169 -0
- sshmcp/server.py +632 -0
- sshmcp/ssh/__init__.py +6 -0
- sshmcp/ssh/async_client.py +247 -0
- sshmcp/ssh/client.py +464 -0
- sshmcp/ssh/executor.py +79 -0
- sshmcp/ssh/forwarding.py +368 -0
- sshmcp/ssh/pool.py +343 -0
- sshmcp/ssh/shell.py +518 -0
- sshmcp/ssh/transfer.py +461 -0
- sshmcp/tools/__init__.py +13 -0
- sshmcp/tools/commands.py +226 -0
- sshmcp/tools/files.py +220 -0
- sshmcp/tools/helpers.py +321 -0
- sshmcp/tools/history.py +372 -0
- sshmcp/tools/processes.py +214 -0
- sshmcp/tools/servers.py +484 -0
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
"""System monitoring and alerting for VPS servers."""
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Any, Callable
|
|
8
|
+
|
|
9
|
+
import structlog
|
|
10
|
+
|
|
11
|
+
from sshmcp.models.machine import MachineConfig
|
|
12
|
+
from sshmcp.ssh.client import SSHClient
|
|
13
|
+
|
|
14
|
+
logger = structlog.get_logger()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AlertSeverity(Enum):
|
|
18
|
+
"""Alert severity levels."""
|
|
19
|
+
|
|
20
|
+
INFO = "info"
|
|
21
|
+
WARNING = "warning"
|
|
22
|
+
CRITICAL = "critical"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MetricType(Enum):
|
|
26
|
+
"""Types of metrics to monitor."""
|
|
27
|
+
|
|
28
|
+
CPU = "cpu"
|
|
29
|
+
MEMORY = "memory"
|
|
30
|
+
DISK = "disk"
|
|
31
|
+
LOAD = "load"
|
|
32
|
+
PROCESS = "process"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class AlertThreshold:
|
|
37
|
+
"""Threshold configuration for alerts."""
|
|
38
|
+
|
|
39
|
+
metric: MetricType
|
|
40
|
+
warning: float
|
|
41
|
+
critical: float
|
|
42
|
+
duration_seconds: int = 60 # How long threshold must be exceeded
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class Alert:
|
|
47
|
+
"""Alert event."""
|
|
48
|
+
|
|
49
|
+
host: str
|
|
50
|
+
metric: MetricType
|
|
51
|
+
severity: AlertSeverity
|
|
52
|
+
value: float
|
|
53
|
+
threshold: float
|
|
54
|
+
message: str
|
|
55
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
56
|
+
|
|
57
|
+
def to_dict(self) -> dict[str, Any]:
|
|
58
|
+
"""Convert to dictionary."""
|
|
59
|
+
return {
|
|
60
|
+
"host": self.host,
|
|
61
|
+
"metric": self.metric.value,
|
|
62
|
+
"severity": self.severity.value,
|
|
63
|
+
"value": self.value,
|
|
64
|
+
"threshold": self.threshold,
|
|
65
|
+
"message": self.message,
|
|
66
|
+
"timestamp": self.timestamp.isoformat(),
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class HostMetrics:
|
|
72
|
+
"""Current metrics for a host."""
|
|
73
|
+
|
|
74
|
+
host: str
|
|
75
|
+
cpu_percent: float = 0.0
|
|
76
|
+
memory_percent: float = 0.0
|
|
77
|
+
disk_percent: float = 0.0
|
|
78
|
+
load_1min: float = 0.0
|
|
79
|
+
load_5min: float = 0.0
|
|
80
|
+
load_15min: float = 0.0
|
|
81
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
82
|
+
|
|
83
|
+
def to_dict(self) -> dict[str, Any]:
|
|
84
|
+
"""Convert to dictionary."""
|
|
85
|
+
return {
|
|
86
|
+
"host": self.host,
|
|
87
|
+
"cpu_percent": self.cpu_percent,
|
|
88
|
+
"memory_percent": self.memory_percent,
|
|
89
|
+
"disk_percent": self.disk_percent,
|
|
90
|
+
"load_1min": self.load_1min,
|
|
91
|
+
"load_5min": self.load_5min,
|
|
92
|
+
"load_15min": self.load_15min,
|
|
93
|
+
"timestamp": self.timestamp.isoformat(),
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class AlertManager:
|
|
98
|
+
"""
|
|
99
|
+
Monitors system metrics and generates alerts.
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
DEFAULT_THRESHOLDS = [
|
|
103
|
+
AlertThreshold(MetricType.CPU, warning=80.0, critical=95.0),
|
|
104
|
+
AlertThreshold(MetricType.MEMORY, warning=80.0, critical=95.0),
|
|
105
|
+
AlertThreshold(MetricType.DISK, warning=80.0, critical=95.0),
|
|
106
|
+
AlertThreshold(MetricType.LOAD, warning=2.0, critical=5.0),
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
def __init__(
|
|
110
|
+
self,
|
|
111
|
+
check_interval: int = 60,
|
|
112
|
+
thresholds: list[AlertThreshold] | None = None,
|
|
113
|
+
) -> None:
|
|
114
|
+
"""
|
|
115
|
+
Initialize alert manager.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
check_interval: Seconds between metric checks.
|
|
119
|
+
thresholds: Custom alert thresholds.
|
|
120
|
+
"""
|
|
121
|
+
self.check_interval = check_interval
|
|
122
|
+
self.thresholds = thresholds or self.DEFAULT_THRESHOLDS.copy()
|
|
123
|
+
|
|
124
|
+
self._machines: dict[str, MachineConfig] = {}
|
|
125
|
+
self._metrics: dict[str, HostMetrics] = {}
|
|
126
|
+
self._alerts: list[Alert] = []
|
|
127
|
+
self._active_alerts: dict[str, Alert] = {} # host:metric -> alert
|
|
128
|
+
self._callbacks: list[Callable[[Alert], None]] = []
|
|
129
|
+
|
|
130
|
+
self._lock = threading.Lock()
|
|
131
|
+
self._shutdown = threading.Event()
|
|
132
|
+
self._monitor_thread: threading.Thread | None = None
|
|
133
|
+
|
|
134
|
+
def register_machine(self, machine: MachineConfig) -> None:
|
|
135
|
+
"""Register a machine for monitoring."""
|
|
136
|
+
with self._lock:
|
|
137
|
+
self._machines[machine.name] = machine
|
|
138
|
+
logger.info("alert_machine_registered", host=machine.name)
|
|
139
|
+
|
|
140
|
+
def unregister_machine(self, name: str) -> None:
|
|
141
|
+
"""Unregister a machine from monitoring."""
|
|
142
|
+
with self._lock:
|
|
143
|
+
if name in self._machines:
|
|
144
|
+
del self._machines[name]
|
|
145
|
+
if name in self._metrics:
|
|
146
|
+
del self._metrics[name]
|
|
147
|
+
logger.info("alert_machine_unregistered", host=name)
|
|
148
|
+
|
|
149
|
+
def set_threshold(
|
|
150
|
+
self,
|
|
151
|
+
metric: MetricType,
|
|
152
|
+
warning: float,
|
|
153
|
+
critical: float,
|
|
154
|
+
duration: int = 60,
|
|
155
|
+
) -> None:
|
|
156
|
+
"""
|
|
157
|
+
Set or update a threshold.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
metric: Metric type.
|
|
161
|
+
warning: Warning threshold.
|
|
162
|
+
critical: Critical threshold.
|
|
163
|
+
duration: Duration in seconds.
|
|
164
|
+
"""
|
|
165
|
+
# Remove existing threshold for this metric
|
|
166
|
+
self.thresholds = [t for t in self.thresholds if t.metric != metric]
|
|
167
|
+
self.thresholds.append(AlertThreshold(metric, warning, critical, duration))
|
|
168
|
+
|
|
169
|
+
def register_callback(self, callback: Callable[[Alert], None]) -> None:
|
|
170
|
+
"""Register a callback for alert events."""
|
|
171
|
+
self._callbacks.append(callback)
|
|
172
|
+
|
|
173
|
+
def start_monitoring(self) -> None:
|
|
174
|
+
"""Start background monitoring."""
|
|
175
|
+
if self._monitor_thread is not None and self._monitor_thread.is_alive():
|
|
176
|
+
return
|
|
177
|
+
|
|
178
|
+
self._shutdown.clear()
|
|
179
|
+
self._monitor_thread = threading.Thread(
|
|
180
|
+
target=self._monitor_loop,
|
|
181
|
+
daemon=True,
|
|
182
|
+
name="alert-monitor",
|
|
183
|
+
)
|
|
184
|
+
self._monitor_thread.start()
|
|
185
|
+
logger.info("alert_monitoring_started")
|
|
186
|
+
|
|
187
|
+
def stop_monitoring(self) -> None:
|
|
188
|
+
"""Stop background monitoring."""
|
|
189
|
+
self._shutdown.set()
|
|
190
|
+
if self._monitor_thread:
|
|
191
|
+
self._monitor_thread.join(timeout=5)
|
|
192
|
+
self._monitor_thread = None
|
|
193
|
+
logger.info("alert_monitoring_stopped")
|
|
194
|
+
|
|
195
|
+
def check_host(self, name: str) -> HostMetrics | None:
|
|
196
|
+
"""
|
|
197
|
+
Check metrics for a single host.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
name: Host name.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
HostMetrics or None if check failed.
|
|
204
|
+
"""
|
|
205
|
+
with self._lock:
|
|
206
|
+
if name not in self._machines:
|
|
207
|
+
return None
|
|
208
|
+
machine = self._machines[name]
|
|
209
|
+
|
|
210
|
+
try:
|
|
211
|
+
client = SSHClient(machine)
|
|
212
|
+
client.connect(retry=False)
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
metrics = self._collect_metrics(client, name)
|
|
216
|
+
|
|
217
|
+
with self._lock:
|
|
218
|
+
self._metrics[name] = metrics
|
|
219
|
+
|
|
220
|
+
self._check_thresholds(metrics)
|
|
221
|
+
return metrics
|
|
222
|
+
|
|
223
|
+
finally:
|
|
224
|
+
client.disconnect()
|
|
225
|
+
|
|
226
|
+
except Exception as e:
|
|
227
|
+
logger.error("alert_check_failed", host=name, error=str(e))
|
|
228
|
+
return None
|
|
229
|
+
|
|
230
|
+
def get_metrics(self, name: str | None = None) -> dict[str, HostMetrics]:
|
|
231
|
+
"""
|
|
232
|
+
Get current metrics.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
name: Optional host name filter.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
Dictionary of host metrics.
|
|
239
|
+
"""
|
|
240
|
+
with self._lock:
|
|
241
|
+
if name:
|
|
242
|
+
if name in self._metrics:
|
|
243
|
+
return {name: self._metrics[name]}
|
|
244
|
+
return {}
|
|
245
|
+
return dict(self._metrics)
|
|
246
|
+
|
|
247
|
+
def get_alerts(
|
|
248
|
+
self,
|
|
249
|
+
host: str | None = None,
|
|
250
|
+
severity: AlertSeverity | None = None,
|
|
251
|
+
limit: int = 100,
|
|
252
|
+
) -> list[Alert]:
|
|
253
|
+
"""
|
|
254
|
+
Get alert history.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
host: Filter by host.
|
|
258
|
+
severity: Filter by severity.
|
|
259
|
+
limit: Maximum alerts to return.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
List of alerts.
|
|
263
|
+
"""
|
|
264
|
+
with self._lock:
|
|
265
|
+
alerts = list(self._alerts)
|
|
266
|
+
|
|
267
|
+
if host:
|
|
268
|
+
alerts = [a for a in alerts if a.host == host]
|
|
269
|
+
if severity:
|
|
270
|
+
alerts = [a for a in alerts if a.severity == severity]
|
|
271
|
+
|
|
272
|
+
# Sort by timestamp descending
|
|
273
|
+
alerts.sort(key=lambda a: a.timestamp, reverse=True)
|
|
274
|
+
return alerts[:limit]
|
|
275
|
+
|
|
276
|
+
def get_active_alerts(self) -> list[Alert]:
|
|
277
|
+
"""Get currently active alerts."""
|
|
278
|
+
with self._lock:
|
|
279
|
+
return list(self._active_alerts.values())
|
|
280
|
+
|
|
281
|
+
def clear_alert(self, host: str, metric: MetricType) -> bool:
|
|
282
|
+
"""
|
|
283
|
+
Clear an active alert.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
host: Host name.
|
|
287
|
+
metric: Metric type.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
True if alert was cleared.
|
|
291
|
+
"""
|
|
292
|
+
key = f"{host}:{metric.value}"
|
|
293
|
+
with self._lock:
|
|
294
|
+
if key in self._active_alerts:
|
|
295
|
+
del self._active_alerts[key]
|
|
296
|
+
return True
|
|
297
|
+
return False
|
|
298
|
+
|
|
299
|
+
def _monitor_loop(self) -> None:
|
|
300
|
+
"""Background monitoring loop."""
|
|
301
|
+
while not self._shutdown.is_set():
|
|
302
|
+
with self._lock:
|
|
303
|
+
machines = list(self._machines.values())
|
|
304
|
+
|
|
305
|
+
for machine in machines:
|
|
306
|
+
if self._shutdown.is_set():
|
|
307
|
+
break
|
|
308
|
+
self.check_host(machine.name)
|
|
309
|
+
|
|
310
|
+
self._shutdown.wait(timeout=self.check_interval)
|
|
311
|
+
|
|
312
|
+
def _collect_metrics(self, client: SSHClient, host: str) -> HostMetrics:
|
|
313
|
+
"""Collect metrics from a host."""
|
|
314
|
+
metrics = HostMetrics(host=host)
|
|
315
|
+
|
|
316
|
+
# Get CPU usage
|
|
317
|
+
try:
|
|
318
|
+
result = client.execute("top -bn1 | grep 'Cpu' | awk '{print 100-$8}'")
|
|
319
|
+
if result.exit_code == 0 and result.stdout.strip():
|
|
320
|
+
metrics.cpu_percent = float(result.stdout.strip())
|
|
321
|
+
except Exception:
|
|
322
|
+
pass
|
|
323
|
+
|
|
324
|
+
# Get memory usage
|
|
325
|
+
try:
|
|
326
|
+
result = client.execute("free | grep Mem | awk '{print $3/$2 * 100}'")
|
|
327
|
+
if result.exit_code == 0 and result.stdout.strip():
|
|
328
|
+
metrics.memory_percent = float(result.stdout.strip())
|
|
329
|
+
except Exception:
|
|
330
|
+
pass
|
|
331
|
+
|
|
332
|
+
# Get disk usage
|
|
333
|
+
try:
|
|
334
|
+
result = client.execute("df / | tail -1 | awk '{print $5}' | tr -d '%'")
|
|
335
|
+
if result.exit_code == 0 and result.stdout.strip():
|
|
336
|
+
metrics.disk_percent = float(result.stdout.strip())
|
|
337
|
+
except Exception:
|
|
338
|
+
pass
|
|
339
|
+
|
|
340
|
+
# Get load average
|
|
341
|
+
try:
|
|
342
|
+
result = client.execute("cat /proc/loadavg")
|
|
343
|
+
if result.exit_code == 0:
|
|
344
|
+
parts = result.stdout.split()
|
|
345
|
+
if len(parts) >= 3:
|
|
346
|
+
metrics.load_1min = float(parts[0])
|
|
347
|
+
metrics.load_5min = float(parts[1])
|
|
348
|
+
metrics.load_15min = float(parts[2])
|
|
349
|
+
except Exception:
|
|
350
|
+
pass
|
|
351
|
+
|
|
352
|
+
return metrics
|
|
353
|
+
|
|
354
|
+
def _check_thresholds(self, metrics: HostMetrics) -> None:
|
|
355
|
+
"""Check metrics against thresholds and generate alerts."""
|
|
356
|
+
for threshold in self.thresholds:
|
|
357
|
+
value = self._get_metric_value(metrics, threshold.metric)
|
|
358
|
+
if value is None:
|
|
359
|
+
continue
|
|
360
|
+
|
|
361
|
+
severity = None
|
|
362
|
+
if value >= threshold.critical:
|
|
363
|
+
severity = AlertSeverity.CRITICAL
|
|
364
|
+
elif value >= threshold.warning:
|
|
365
|
+
severity = AlertSeverity.WARNING
|
|
366
|
+
|
|
367
|
+
key = f"{metrics.host}:{threshold.metric.value}"
|
|
368
|
+
|
|
369
|
+
if severity:
|
|
370
|
+
alert = Alert(
|
|
371
|
+
host=metrics.host,
|
|
372
|
+
metric=threshold.metric,
|
|
373
|
+
severity=severity,
|
|
374
|
+
value=value,
|
|
375
|
+
threshold=threshold.critical
|
|
376
|
+
if severity == AlertSeverity.CRITICAL
|
|
377
|
+
else threshold.warning,
|
|
378
|
+
message=f"{threshold.metric.value} is {value:.1f}% (threshold: {threshold.warning}/{threshold.critical})",
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
with self._lock:
|
|
382
|
+
# Only notify if new or severity changed
|
|
383
|
+
existing = self._active_alerts.get(key)
|
|
384
|
+
if not existing or existing.severity != severity:
|
|
385
|
+
self._active_alerts[key] = alert
|
|
386
|
+
self._alerts.append(alert)
|
|
387
|
+
|
|
388
|
+
# Limit alert history
|
|
389
|
+
if len(self._alerts) > 1000:
|
|
390
|
+
self._alerts = self._alerts[-500:]
|
|
391
|
+
|
|
392
|
+
# Notify callbacks
|
|
393
|
+
if not existing or existing.severity != severity:
|
|
394
|
+
for callback in self._callbacks:
|
|
395
|
+
try:
|
|
396
|
+
callback(alert)
|
|
397
|
+
except Exception:
|
|
398
|
+
pass
|
|
399
|
+
|
|
400
|
+
logger.warning(
|
|
401
|
+
"alert_triggered",
|
|
402
|
+
host=metrics.host,
|
|
403
|
+
metric=threshold.metric.value,
|
|
404
|
+
severity=severity.value,
|
|
405
|
+
value=value,
|
|
406
|
+
)
|
|
407
|
+
else:
|
|
408
|
+
# Clear alert if below threshold
|
|
409
|
+
with self._lock:
|
|
410
|
+
if key in self._active_alerts:
|
|
411
|
+
del self._active_alerts[key]
|
|
412
|
+
logger.info(
|
|
413
|
+
"alert_cleared",
|
|
414
|
+
host=metrics.host,
|
|
415
|
+
metric=threshold.metric.value,
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
def _get_metric_value(
|
|
419
|
+
self, metrics: HostMetrics, metric_type: MetricType
|
|
420
|
+
) -> float | None:
|
|
421
|
+
"""Get metric value by type."""
|
|
422
|
+
if metric_type == MetricType.CPU:
|
|
423
|
+
return metrics.cpu_percent
|
|
424
|
+
elif metric_type == MetricType.MEMORY:
|
|
425
|
+
return metrics.memory_percent
|
|
426
|
+
elif metric_type == MetricType.DISK:
|
|
427
|
+
return metrics.disk_percent
|
|
428
|
+
elif metric_type == MetricType.LOAD:
|
|
429
|
+
return metrics.load_1min
|
|
430
|
+
return None
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
# Global alert manager instance
|
|
434
|
+
_alert_manager: AlertManager | None = None
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def get_alert_manager() -> AlertManager:
|
|
438
|
+
"""Get or create the global alert manager."""
|
|
439
|
+
global _alert_manager
|
|
440
|
+
if _alert_manager is None:
|
|
441
|
+
_alert_manager = AlertManager()
|
|
442
|
+
return _alert_manager
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def init_alert_manager(
|
|
446
|
+
check_interval: int = 60,
|
|
447
|
+
thresholds: list[AlertThreshold] | None = None,
|
|
448
|
+
) -> AlertManager:
|
|
449
|
+
"""
|
|
450
|
+
Initialize the global alert manager.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
check_interval: Seconds between checks.
|
|
454
|
+
thresholds: Custom thresholds.
|
|
455
|
+
|
|
456
|
+
Returns:
|
|
457
|
+
Initialized AlertManager.
|
|
458
|
+
"""
|
|
459
|
+
global _alert_manager
|
|
460
|
+
_alert_manager = AlertManager(
|
|
461
|
+
check_interval=check_interval,
|
|
462
|
+
thresholds=thresholds,
|
|
463
|
+
)
|
|
464
|
+
return _alert_manager
|
sshmcp/prompts/backup.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""MCP Prompt for database backup."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def backup_database(
|
|
5
|
+
host: str,
|
|
6
|
+
database_name: str,
|
|
7
|
+
database_type: str = "postgresql",
|
|
8
|
+
backup_path: str = "/var/backups",
|
|
9
|
+
compress: bool = True,
|
|
10
|
+
) -> str:
|
|
11
|
+
"""
|
|
12
|
+
Generate database backup prompt.
|
|
13
|
+
|
|
14
|
+
Creates a step-by-step backup plan for a database.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
host: Target host name.
|
|
18
|
+
database_name: Name of the database to backup.
|
|
19
|
+
database_type: Database type (postgresql, mysql, mongodb).
|
|
20
|
+
backup_path: Directory for backup files.
|
|
21
|
+
compress: Whether to compress the backup.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Backup instructions as multi-line string.
|
|
25
|
+
"""
|
|
26
|
+
# Build backup command based on database type
|
|
27
|
+
timestamp = "$(date +%Y%m%d_%H%M%S)"
|
|
28
|
+
|
|
29
|
+
if database_type == "postgresql":
|
|
30
|
+
dump_cmd = f"pg_dump {database_name}"
|
|
31
|
+
backup_file = f"{backup_path}/{database_name}_{timestamp}.sql"
|
|
32
|
+
if compress:
|
|
33
|
+
dump_cmd = f"pg_dump {database_name} | gzip"
|
|
34
|
+
backup_file = f"{backup_path}/{database_name}_{timestamp}.sql.gz"
|
|
35
|
+
dump_full = f"{dump_cmd} > {backup_file}"
|
|
36
|
+
verify_cmd = f"{'zcat' if compress else 'head -20'} {backup_file}"
|
|
37
|
+
|
|
38
|
+
elif database_type == "mysql":
|
|
39
|
+
dump_cmd = f"mysqldump {database_name}"
|
|
40
|
+
backup_file = f"{backup_path}/{database_name}_{timestamp}.sql"
|
|
41
|
+
if compress:
|
|
42
|
+
dump_cmd = f"mysqldump {database_name} | gzip"
|
|
43
|
+
backup_file = f"{backup_path}/{database_name}_{timestamp}.sql.gz"
|
|
44
|
+
dump_full = f"{dump_cmd} > {backup_file}"
|
|
45
|
+
verify_cmd = f"{'zcat' if compress else 'head -20'} {backup_file}"
|
|
46
|
+
|
|
47
|
+
elif database_type == "mongodb":
|
|
48
|
+
backup_file = f"{backup_path}/{database_name}_{timestamp}"
|
|
49
|
+
dump_full = f"mongodump --db {database_name} --out {backup_file}"
|
|
50
|
+
if compress:
|
|
51
|
+
dump_full = (
|
|
52
|
+
f"mongodump --db {database_name} --archive={backup_file}.gz --gzip"
|
|
53
|
+
)
|
|
54
|
+
backup_file = f"{backup_file}.gz"
|
|
55
|
+
verify_cmd = f"ls -la {backup_file}"
|
|
56
|
+
|
|
57
|
+
else:
|
|
58
|
+
dump_full = f"echo 'Unknown database type: {database_type}'"
|
|
59
|
+
backup_file = "unknown"
|
|
60
|
+
verify_cmd = "echo 'Cannot verify'"
|
|
61
|
+
|
|
62
|
+
# Build connectivity check command
|
|
63
|
+
if database_type == "postgresql":
|
|
64
|
+
conn_check = 'psql -c "SELECT 1"'
|
|
65
|
+
elif database_type == "mysql":
|
|
66
|
+
conn_check = 'mysql -e "SELECT 1"'
|
|
67
|
+
else:
|
|
68
|
+
conn_check = 'mongosh --eval "db.runCommand({ping: 1})"'
|
|
69
|
+
|
|
70
|
+
return f"""Backup database {database_name} on {host}:
|
|
71
|
+
|
|
72
|
+
## Pre-backup Checks
|
|
73
|
+
|
|
74
|
+
1. Check server status and disk space:
|
|
75
|
+
- Use get_status resource for {host}
|
|
76
|
+
- Use get_metrics resource for {host} to check disk space
|
|
77
|
+
```
|
|
78
|
+
execute_command(host="{host}", command="df -h {backup_path}")
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
2. Ensure backup directory exists:
|
|
82
|
+
```
|
|
83
|
+
execute_command(host="{host}", command="mkdir -p {backup_path}")
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
3. Check database connectivity:
|
|
87
|
+
```
|
|
88
|
+
execute_command(host="{host}", command="{conn_check}")
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Backup Steps
|
|
92
|
+
|
|
93
|
+
4. Create database dump:
|
|
94
|
+
```
|
|
95
|
+
execute_command(host="{host}", command="{dump_full}", timeout=600)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
5. Verify backup file was created:
|
|
99
|
+
```
|
|
100
|
+
execute_command(host="{host}", command="ls -la {backup_file}")
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
6. Verify backup integrity:
|
|
104
|
+
```
|
|
105
|
+
execute_command(host="{host}", command="{verify_cmd}")
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Post-backup Tasks
|
|
109
|
+
|
|
110
|
+
7. Calculate backup checksum:
|
|
111
|
+
```
|
|
112
|
+
execute_command(host="{host}", command="md5sum {backup_file}")
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
8. Clean up old backups (keep last 7):
|
|
116
|
+
```
|
|
117
|
+
execute_command(host="{host}", command="ls -t {backup_path}/{database_name}_* | tail -n +8 | xargs -r rm")
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
9. List current backups:
|
|
121
|
+
```
|
|
122
|
+
execute_command(host="{host}", command="ls -la {backup_path}/{database_name}_*")
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Backup Details
|
|
126
|
+
- Database: {database_name}
|
|
127
|
+
- Type: {database_type}
|
|
128
|
+
- Backup path: {backup_path}
|
|
129
|
+
- Compression: {"Enabled (gzip)" if compress else "Disabled"}
|
|
130
|
+
- Expected file: {backup_file}
|
|
131
|
+
|
|
132
|
+
## Restore Command (for reference)
|
|
133
|
+
{_get_restore_command(database_type, database_name, backup_file)}
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _get_restore_command(db_type: str, db_name: str, backup_file: str) -> str:
|
|
138
|
+
"""Get restore command for database type."""
|
|
139
|
+
if db_type == "postgresql":
|
|
140
|
+
if backup_file.endswith(".gz"):
|
|
141
|
+
return f"gunzip -c {backup_file} | psql {db_name}"
|
|
142
|
+
return f"psql {db_name} < {backup_file}"
|
|
143
|
+
elif db_type == "mysql":
|
|
144
|
+
if backup_file.endswith(".gz"):
|
|
145
|
+
return f"gunzip -c {backup_file} | mysql {db_name}"
|
|
146
|
+
return f"mysql {db_name} < {backup_file}"
|
|
147
|
+
elif db_type == "mongodb":
|
|
148
|
+
if backup_file.endswith(".gz"):
|
|
149
|
+
return f"mongorestore --db {db_name} --archive={backup_file} --gzip"
|
|
150
|
+
return f"mongorestore --db {db_name} {backup_file}"
|
|
151
|
+
return "# Unknown database type"
|