kekkai-cli 1.1.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
portal/ops/restore.py DELETED
@@ -1,469 +0,0 @@
1
- """Restore functionality for Kekkai Portal.
2
-
3
- Provides restore operations for:
4
- - PostgreSQL database (via pg_restore)
5
- - Media/upload files
6
- - Audit logs
7
-
8
- Security controls:
9
- - Backup integrity verification before restore
10
- - Dry-run capability for validation
11
- - Transaction-safe database restore
12
- - No secrets in restore logs
13
- """
14
-
15
- from __future__ import annotations
16
-
17
- import json
18
- import logging
19
- import os
20
- import shutil
21
- import subprocess
22
- import tarfile
23
- import tempfile
24
- from dataclasses import dataclass, field
25
- from datetime import UTC, datetime
26
- from enum import Enum
27
- from pathlib import Path
28
- from typing import Any
29
-
30
- from .backup import BackupJob
31
-
32
- logger = logging.getLogger(__name__)
33
-
34
-
35
- class RestoreScope(Enum):
36
- """Scope of restore operation."""
37
-
38
- FULL = "full"
39
- DATABASE = "database"
40
- MEDIA = "media"
41
- AUDIT_LOGS = "audit_logs"
42
-
43
-
44
- @dataclass
45
- class RestoreConfig:
46
- """Configuration for restore operations."""
47
-
48
- db_host: str = "localhost"
49
- db_port: int = 5432
50
- db_name: str = "defectdojo"
51
- db_user: str = "defectdojo"
52
-
53
- media_path: Path = field(default_factory=lambda: Path("/var/lib/kekkai-portal/uploads"))
54
- audit_log_path: Path | None = None
55
-
56
- dry_run: bool = False
57
- verify_before_restore: bool = True
58
- stop_services: bool = True
59
-
60
- def __post_init__(self) -> None:
61
- if isinstance(self.media_path, str):
62
- self.media_path = Path(self.media_path)
63
- if self.audit_log_path and isinstance(self.audit_log_path, str):
64
- self.audit_log_path = Path(self.audit_log_path)
65
-
66
-
67
- @dataclass
68
- class RestoreResult:
69
- """Result of a restore operation."""
70
-
71
- success: bool
72
- backup_id: str
73
- scope: RestoreScope
74
- timestamp: datetime
75
- components_restored: list[str] = field(default_factory=list)
76
- error: str | None = None
77
- duration_seconds: float = 0.0
78
- dry_run: bool = False
79
- warnings: list[str] = field(default_factory=list)
80
-
81
- def to_dict(self) -> dict[str, Any]:
82
- """Convert to dictionary for serialization."""
83
- return {
84
- "success": self.success,
85
- "backup_id": self.backup_id,
86
- "scope": self.scope.value,
87
- "timestamp": self.timestamp.isoformat(),
88
- "components_restored": self.components_restored,
89
- "error": self.error,
90
- "duration_seconds": self.duration_seconds,
91
- "dry_run": self.dry_run,
92
- "warnings": self.warnings,
93
- }
94
-
95
-
96
- class RestoreJob:
97
- """Manages restore operations for Kekkai Portal."""
98
-
99
- def __init__(self, config: RestoreConfig, backup_job: BackupJob | None = None) -> None:
100
- self._config = config
101
- self._backup_job = backup_job
102
-
103
- def restore_full(self, backup_path: str | Path) -> RestoreResult:
104
- """Perform a full restore from backup."""
105
- backup_path = Path(backup_path)
106
- start_time = datetime.now(UTC)
107
- backup_id = self._extract_backup_id(backup_path)
108
-
109
- if self._config.verify_before_restore and self._backup_job:
110
- valid, msg = self._backup_job.verify_backup(backup_path)
111
- if not valid:
112
- return RestoreResult(
113
- success=False,
114
- backup_id=backup_id,
115
- scope=RestoreScope.FULL,
116
- timestamp=start_time,
117
- error=f"Backup verification failed: {msg}",
118
- )
119
-
120
- try:
121
- with tempfile.TemporaryDirectory() as tmpdir:
122
- tmp_path = Path(tmpdir)
123
- extract_dir = tmp_path / "extracted"
124
-
125
- self._extract_backup(backup_path, extract_dir)
126
-
127
- backup_content_dir = self._find_backup_content(extract_dir)
128
- if not backup_content_dir:
129
- return RestoreResult(
130
- success=False,
131
- backup_id=backup_id,
132
- scope=RestoreScope.FULL,
133
- timestamp=start_time,
134
- error="Invalid backup structure",
135
- )
136
-
137
- _ = self._read_manifest(backup_content_dir) # Validate manifest exists
138
- components_restored = []
139
- warnings: list[str] = []
140
-
141
- db_file = backup_content_dir / "database.sql"
142
- if db_file.exists():
143
- if self._config.dry_run:
144
- logger.info("restore.dry_run component=database")
145
- else:
146
- db_result = self._restore_database(db_file)
147
- if db_result["success"]:
148
- components_restored.append("database")
149
- else:
150
- warnings.append(f"Database restore failed: {db_result.get('error')}")
151
-
152
- media_dir = backup_content_dir / "media"
153
- if media_dir.exists():
154
- if self._config.dry_run:
155
- logger.info("restore.dry_run component=media")
156
- else:
157
- media_result = self._restore_media(media_dir)
158
- if media_result["success"]:
159
- components_restored.append("media")
160
- else:
161
- warnings.append(f"Media restore failed: {media_result.get('error')}")
162
-
163
- audit_dir = backup_content_dir / "audit"
164
- if audit_dir.exists() and self._config.audit_log_path:
165
- if self._config.dry_run:
166
- logger.info("restore.dry_run component=audit_logs")
167
- else:
168
- audit_result = self._restore_audit_logs(audit_dir)
169
- if audit_result["success"]:
170
- components_restored.append("audit_logs")
171
- else:
172
- warnings.append(
173
- f"Audit log restore failed: {audit_result.get('error')}"
174
- )
175
-
176
- duration = (datetime.now(UTC) - start_time).total_seconds()
177
-
178
- logger.info(
179
- "restore.complete backup_id=%s components=%s duration=%.2f dry_run=%s",
180
- backup_id,
181
- ",".join(components_restored),
182
- duration,
183
- self._config.dry_run,
184
- )
185
-
186
- return RestoreResult(
187
- success=True,
188
- backup_id=backup_id,
189
- scope=RestoreScope.FULL,
190
- timestamp=start_time,
191
- components_restored=components_restored,
192
- duration_seconds=duration,
193
- dry_run=self._config.dry_run,
194
- warnings=warnings if warnings else [],
195
- )
196
-
197
- except Exception as e:
198
- logger.error("restore.failed backup_id=%s error=%s", backup_id, str(e))
199
- return RestoreResult(
200
- success=False,
201
- backup_id=backup_id,
202
- scope=RestoreScope.FULL,
203
- timestamp=start_time,
204
- error=f"Restore failed: {type(e).__name__}",
205
- )
206
-
207
- def restore_database(self, backup_path: str | Path) -> RestoreResult:
208
- """Restore only the database from backup."""
209
- backup_path = Path(backup_path)
210
- start_time = datetime.now(UTC)
211
- backup_id = self._extract_backup_id(backup_path)
212
-
213
- try:
214
- with tempfile.TemporaryDirectory() as tmpdir:
215
- tmp_path = Path(tmpdir)
216
- extract_dir = tmp_path / "extracted"
217
- self._extract_backup(backup_path, extract_dir)
218
-
219
- backup_content_dir = self._find_backup_content(extract_dir)
220
- if not backup_content_dir:
221
- return RestoreResult(
222
- success=False,
223
- backup_id=backup_id,
224
- scope=RestoreScope.DATABASE,
225
- timestamp=start_time,
226
- error="Invalid backup structure",
227
- )
228
-
229
- db_file = backup_content_dir / "database.sql"
230
- if not db_file.exists():
231
- return RestoreResult(
232
- success=False,
233
- backup_id=backup_id,
234
- scope=RestoreScope.DATABASE,
235
- timestamp=start_time,
236
- error="Database backup not found in archive",
237
- )
238
-
239
- if self._config.dry_run:
240
- logger.info("restore.dry_run component=database")
241
- return RestoreResult(
242
- success=True,
243
- backup_id=backup_id,
244
- scope=RestoreScope.DATABASE,
245
- timestamp=start_time,
246
- components_restored=[],
247
- dry_run=True,
248
- )
249
-
250
- db_result = self._restore_database(db_file)
251
- duration = (datetime.now(UTC) - start_time).total_seconds()
252
-
253
- if not db_result["success"]:
254
- return RestoreResult(
255
- success=False,
256
- backup_id=backup_id,
257
- scope=RestoreScope.DATABASE,
258
- timestamp=start_time,
259
- error=db_result.get("error", "Database restore failed"),
260
- )
261
-
262
- return RestoreResult(
263
- success=True,
264
- backup_id=backup_id,
265
- scope=RestoreScope.DATABASE,
266
- timestamp=start_time,
267
- components_restored=["database"],
268
- duration_seconds=duration,
269
- )
270
-
271
- except Exception as e:
272
- logger.error("restore.database.failed backup_id=%s error=%s", backup_id, str(e))
273
- return RestoreResult(
274
- success=False,
275
- backup_id=backup_id,
276
- scope=RestoreScope.DATABASE,
277
- timestamp=start_time,
278
- error=f"Database restore failed: {type(e).__name__}",
279
- )
280
-
281
- def validate_backup(self, backup_path: str | Path) -> tuple[bool, dict[str, Any]]:
282
- """Validate backup contents without restoring.
283
-
284
- Returns:
285
- Tuple of (is_valid, details dict)
286
- """
287
- backup_path = Path(backup_path)
288
- if not backup_path.exists():
289
- return False, {"error": "Backup file not found"}
290
-
291
- details: dict[str, Any] = {
292
- "path": str(backup_path),
293
- "size_bytes": backup_path.stat().st_size,
294
- "components": [],
295
- "manifest": None,
296
- }
297
-
298
- try:
299
- with tempfile.TemporaryDirectory() as tmpdir:
300
- tmp_path = Path(tmpdir)
301
- extract_dir = tmp_path / "extracted"
302
- self._extract_backup(backup_path, extract_dir)
303
-
304
- backup_content_dir = self._find_backup_content(extract_dir)
305
- if not backup_content_dir:
306
- return False, {"error": "Invalid backup structure", **details}
307
-
308
- manifest = self._read_manifest(backup_content_dir)
309
- details["manifest"] = manifest
310
-
311
- if (backup_content_dir / "database.sql").exists():
312
- details["components"].append("database")
313
- if (backup_content_dir / "media").exists():
314
- details["components"].append("media")
315
- if (backup_content_dir / "audit").exists():
316
- details["components"].append("audit_logs")
317
-
318
- return True, details
319
-
320
- except Exception as e:
321
- return False, {"error": str(e), **details}
322
-
323
- def _extract_backup(self, backup_path: Path, extract_dir: Path) -> None:
324
- """Extract backup archive to directory."""
325
- extract_dir.mkdir(parents=True, exist_ok=True)
326
-
327
- if backup_path.suffix == ".gz" or backup_path.name.endswith(".tar.gz"):
328
- with tarfile.open(backup_path, "r:gz") as tar:
329
- tar.extractall(extract_dir, filter="data")
330
- else:
331
- with tarfile.open(backup_path, "r") as tar:
332
- tar.extractall(extract_dir, filter="data")
333
-
334
- def _find_backup_content(self, extract_dir: Path) -> Path | None:
335
- """Find the backup content directory within extraction."""
336
- for item in extract_dir.iterdir():
337
- if item.is_dir() and (
338
- (item / "manifest.json").exists() or (item / "database.sql").exists()
339
- ):
340
- return item
341
- if (extract_dir / "manifest.json").exists() or (extract_dir / "database.sql").exists():
342
- return extract_dir
343
- return None
344
-
345
- def _read_manifest(self, backup_dir: Path) -> dict[str, Any] | None:
346
- """Read backup manifest if present."""
347
- manifest_path = backup_dir / "manifest.json"
348
- if manifest_path.exists():
349
- try:
350
- result: dict[str, Any] = json.loads(manifest_path.read_text())
351
- return result
352
- except json.JSONDecodeError:
353
- return None
354
- return None
355
-
356
- def _restore_database(self, db_file: Path) -> dict[str, Any]:
357
- """Execute pg_restore for database restore."""
358
- env = os.environ.copy()
359
- db_password = os.environ.get("DD_DATABASE_PASSWORD", "")
360
- if db_password:
361
- env["PGPASSWORD"] = db_password
362
-
363
- cmd = [
364
- "pg_restore",
365
- "-h",
366
- self._config.db_host,
367
- "-p",
368
- str(self._config.db_port),
369
- "-U",
370
- self._config.db_user,
371
- "-d",
372
- self._config.db_name,
373
- "--clean",
374
- "--if-exists",
375
- "--no-owner",
376
- "--no-password",
377
- str(db_file),
378
- ]
379
-
380
- try:
381
- result = subprocess.run( # noqa: S603
382
- cmd,
383
- env=env,
384
- capture_output=True,
385
- text=True,
386
- timeout=7200,
387
- check=False,
388
- )
389
-
390
- if result.returncode not in (0, 1):
391
- error_msg = result.stderr[:500] if result.stderr else "Unknown error"
392
- logger.error("pg_restore failed: %s", error_msg)
393
- return {"success": False, "error": "Database restore failed"}
394
-
395
- return {"success": True}
396
-
397
- except subprocess.TimeoutExpired:
398
- return {"success": False, "error": "Database restore timed out"}
399
- except FileNotFoundError:
400
- return {"success": False, "error": "pg_restore not found"}
401
-
402
- def _restore_media(self, source_dir: Path) -> dict[str, Any]:
403
- """Restore media files from backup."""
404
- try:
405
- self._config.media_path.mkdir(parents=True, exist_ok=True)
406
- shutil.copytree(source_dir, self._config.media_path, dirs_exist_ok=True)
407
- return {"success": True}
408
- except OSError as e:
409
- return {"success": False, "error": str(e)}
410
-
411
- def _restore_audit_logs(self, source_dir: Path) -> dict[str, Any]:
412
- """Restore audit logs from backup."""
413
- if not self._config.audit_log_path:
414
- return {"success": False, "error": "Audit log path not configured"}
415
-
416
- try:
417
- self._config.audit_log_path.parent.mkdir(parents=True, exist_ok=True)
418
-
419
- audit_file = source_dir / "audit.jsonl"
420
- if audit_file.exists():
421
- shutil.copy2(audit_file, self._config.audit_log_path)
422
- else:
423
- shutil.copytree(source_dir, self._config.audit_log_path, dirs_exist_ok=True)
424
- return {"success": True}
425
- except OSError as e:
426
- return {"success": False, "error": str(e)}
427
-
428
- def _extract_backup_id(self, backup_path: Path) -> str:
429
- """Extract backup ID from path."""
430
- name = backup_path.name
431
- if name.endswith(".tar.gz"):
432
- name = name[:-7]
433
- elif name.endswith(".tar"):
434
- name = name[:-4]
435
- elif name.endswith(".gz"):
436
- name = name[:-3]
437
- return name
438
-
439
-
440
- def create_restore_job(
441
- db_host: str | None = None,
442
- db_name: str | None = None,
443
- media_path: str | Path | None = None,
444
- dry_run: bool = False,
445
- backup_job: BackupJob | None = None,
446
- ) -> RestoreJob:
447
- """Create a configured RestoreJob instance."""
448
- config = RestoreConfig()
449
- config.dry_run = dry_run
450
-
451
- if db_host:
452
- config.db_host = db_host
453
- elif env_host := os.environ.get("DD_DATABASE_HOST"):
454
- config.db_host = env_host
455
-
456
- if db_name:
457
- config.db_name = db_name
458
- elif env_name := os.environ.get("DD_DATABASE_NAME"):
459
- config.db_name = env_name
460
-
461
- if media_path:
462
- config.media_path = Path(media_path)
463
- elif env_media := os.environ.get("PORTAL_UPLOAD_DIR"):
464
- config.media_path = Path(env_media)
465
-
466
- if env_audit := os.environ.get("PORTAL_AUDIT_DIR"):
467
- config.audit_log_path = Path(env_audit)
468
-
469
- return RestoreJob(config, backup_job)