kekkai-cli 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
portal/ops/backup.py DELETED
@@ -1,553 +0,0 @@
1
- """Backup job implementation for Kekkai Portal.
2
-
3
- Provides automated backup for:
4
- - PostgreSQL database (via pg_dump)
5
- - Media/upload files
6
- - Audit logs
7
-
8
- Security controls:
9
- - Encrypted backups (AES-256-GCM)
10
- - Integrity verification (SHA-256 checksums)
11
- - Retention policy management
12
- - No secrets in backup metadata
13
- """
14
-
15
- from __future__ import annotations
16
-
17
- import gzip
18
- import hashlib
19
- import json
20
- import logging
21
- import os
22
- import secrets
23
- import shutil
24
- import subprocess
25
- import tempfile
26
- from dataclasses import dataclass, field
27
- from datetime import UTC, datetime, timedelta
28
- from enum import Enum
29
- from pathlib import Path
30
- from typing import Any
31
-
32
- logger = logging.getLogger(__name__)
33
-
34
- BACKUP_FORMAT_VERSION = 1
35
- CIPHER_SUITE = "AES-256-GCM"
36
-
37
-
38
- class BackupType(Enum):
39
- """Type of backup."""
40
-
41
- FULL = "full"
42
- DATABASE = "database"
43
- MEDIA = "media"
44
- AUDIT_LOGS = "audit_logs"
45
-
46
-
47
- class BackupDestination(Enum):
48
- """Backup storage destination."""
49
-
50
- LOCAL = "local"
51
- S3 = "s3"
52
-
53
-
54
- @dataclass
55
- class BackupConfig:
56
- """Configuration for backup jobs."""
57
-
58
- destination: BackupDestination = BackupDestination.LOCAL
59
- local_path: Path = field(default_factory=lambda: Path("/var/lib/kekkai-portal/backups"))
60
- s3_bucket: str | None = None
61
- s3_prefix: str = "backups/"
62
- s3_endpoint: str | None = None
63
-
64
- db_host: str = "localhost"
65
- db_port: int = 5432
66
- db_name: str = "defectdojo"
67
- db_user: str = "defectdojo"
68
-
69
- media_path: Path = field(default_factory=lambda: Path("/var/lib/kekkai-portal/uploads"))
70
- audit_log_path: Path | None = None
71
-
72
- encryption_enabled: bool = True
73
- encryption_key: bytes | None = None
74
-
75
- retention_days: int = 30
76
- retention_count: int = 10
77
-
78
- compress: bool = True
79
-
80
- def __post_init__(self) -> None:
81
- if isinstance(self.local_path, str):
82
- self.local_path = Path(self.local_path)
83
- if isinstance(self.media_path, str):
84
- self.media_path = Path(self.media_path)
85
- if self.audit_log_path and isinstance(self.audit_log_path, str):
86
- self.audit_log_path = Path(self.audit_log_path)
87
-
88
-
89
- @dataclass
90
- class BackupResult:
91
- """Result of a backup operation."""
92
-
93
- success: bool
94
- backup_id: str
95
- backup_type: BackupType
96
- timestamp: datetime
97
- size_bytes: int = 0
98
- checksum: str = ""
99
- destination_path: str = ""
100
- error: str | None = None
101
- duration_seconds: float = 0.0
102
- encrypted: bool = False
103
-
104
- def to_dict(self) -> dict[str, Any]:
105
- """Convert to dictionary for serialization."""
106
- return {
107
- "success": self.success,
108
- "backup_id": self.backup_id,
109
- "backup_type": self.backup_type.value,
110
- "timestamp": self.timestamp.isoformat(),
111
- "size_bytes": self.size_bytes,
112
- "checksum": self.checksum,
113
- "destination_path": self.destination_path,
114
- "error": self.error,
115
- "duration_seconds": self.duration_seconds,
116
- "encrypted": self.encrypted,
117
- "format_version": BACKUP_FORMAT_VERSION,
118
- }
119
-
120
-
121
- class BackupJob:
122
- """Manages backup operations for Kekkai Portal."""
123
-
124
- def __init__(self, config: BackupConfig) -> None:
125
- self._config = config
126
- self._ensure_destination()
127
-
128
- def _ensure_destination(self) -> None:
129
- """Ensure backup destination exists."""
130
- if self._config.destination == BackupDestination.LOCAL:
131
- self._config.local_path.mkdir(parents=True, exist_ok=True)
132
-
133
- def backup_full(self) -> BackupResult:
134
- """Perform a full backup of all components."""
135
- backup_id = self._generate_backup_id("full")
136
- start_time = datetime.now(UTC)
137
-
138
- try:
139
- with tempfile.TemporaryDirectory() as tmpdir:
140
- tmp_path = Path(tmpdir)
141
-
142
- db_result = self._backup_database(tmp_path / "database.sql")
143
- if not db_result["success"]:
144
- return BackupResult(
145
- success=False,
146
- backup_id=backup_id,
147
- backup_type=BackupType.FULL,
148
- timestamp=start_time,
149
- error=db_result.get("error", "Database backup failed"),
150
- )
151
-
152
- media_result = self._backup_media(tmp_path / "media")
153
- if not media_result["success"]:
154
- return BackupResult(
155
- success=False,
156
- backup_id=backup_id,
157
- backup_type=BackupType.FULL,
158
- timestamp=start_time,
159
- error=media_result.get("error", "Media backup failed"),
160
- )
161
-
162
- if self._config.audit_log_path:
163
- self._backup_audit_logs(tmp_path / "audit")
164
-
165
- manifest = {
166
- "backup_id": backup_id,
167
- "type": "full",
168
- "timestamp": start_time.isoformat(),
169
- "format_version": BACKUP_FORMAT_VERSION,
170
- "components": ["database", "media", "audit_logs"],
171
- "cipher_suite": CIPHER_SUITE if self._config.encryption_enabled else None,
172
- }
173
- (tmp_path / "manifest.json").write_text(json.dumps(manifest, indent=2))
174
-
175
- archive_path = self._create_archive(tmp_path, backup_id)
176
- final_path = self._store_backup(archive_path, backup_id)
177
-
178
- checksum = self._compute_checksum(final_path)
179
- size = final_path.stat().st_size
180
-
181
- self._write_checksum_file(final_path, checksum)
182
-
183
- duration = (datetime.now(UTC) - start_time).total_seconds()
184
-
185
- logger.info(
186
- "backup.complete backup_id=%s type=full size=%d duration=%.2f",
187
- backup_id,
188
- size,
189
- duration,
190
- )
191
-
192
- return BackupResult(
193
- success=True,
194
- backup_id=backup_id,
195
- backup_type=BackupType.FULL,
196
- timestamp=start_time,
197
- size_bytes=size,
198
- checksum=checksum,
199
- destination_path=str(final_path),
200
- duration_seconds=duration,
201
- encrypted=self._config.encryption_enabled,
202
- )
203
-
204
- except Exception as e:
205
- logger.error("backup.failed backup_id=%s error=%s", backup_id, str(e))
206
- return BackupResult(
207
- success=False,
208
- backup_id=backup_id,
209
- backup_type=BackupType.FULL,
210
- timestamp=start_time,
211
- error=f"Backup failed: {type(e).__name__}",
212
- )
213
-
214
- def backup_database(self) -> BackupResult:
215
- """Backup only the database."""
216
- backup_id = self._generate_backup_id("db")
217
- start_time = datetime.now(UTC)
218
-
219
- try:
220
- with tempfile.TemporaryDirectory() as tmpdir:
221
- tmp_path = Path(tmpdir)
222
- db_file = tmp_path / "database.sql"
223
-
224
- result = self._backup_database(db_file)
225
- if not result["success"]:
226
- return BackupResult(
227
- success=False,
228
- backup_id=backup_id,
229
- backup_type=BackupType.DATABASE,
230
- timestamp=start_time,
231
- error=result.get("error", "Database backup failed"),
232
- )
233
-
234
- archive_path = self._create_archive(tmp_path, backup_id)
235
- final_path = self._store_backup(archive_path, backup_id)
236
- checksum = self._compute_checksum(final_path)
237
- size = final_path.stat().st_size
238
- self._write_checksum_file(final_path, checksum)
239
-
240
- duration = (datetime.now(UTC) - start_time).total_seconds()
241
-
242
- return BackupResult(
243
- success=True,
244
- backup_id=backup_id,
245
- backup_type=BackupType.DATABASE,
246
- timestamp=start_time,
247
- size_bytes=size,
248
- checksum=checksum,
249
- destination_path=str(final_path),
250
- duration_seconds=duration,
251
- encrypted=self._config.encryption_enabled,
252
- )
253
-
254
- except Exception as e:
255
- logger.error("backup.database.failed backup_id=%s error=%s", backup_id, str(e))
256
- return BackupResult(
257
- success=False,
258
- backup_id=backup_id,
259
- backup_type=BackupType.DATABASE,
260
- timestamp=start_time,
261
- error=f"Database backup failed: {type(e).__name__}",
262
- )
263
-
264
- def backup_media(self) -> BackupResult:
265
- """Backup only media/upload files."""
266
- backup_id = self._generate_backup_id("media")
267
- start_time = datetime.now(UTC)
268
-
269
- try:
270
- with tempfile.TemporaryDirectory() as tmpdir:
271
- tmp_path = Path(tmpdir)
272
- media_dir = tmp_path / "media"
273
-
274
- result = self._backup_media(media_dir)
275
- if not result["success"]:
276
- return BackupResult(
277
- success=False,
278
- backup_id=backup_id,
279
- backup_type=BackupType.MEDIA,
280
- timestamp=start_time,
281
- error=result.get("error", "Media backup failed"),
282
- )
283
-
284
- archive_path = self._create_archive(tmp_path, backup_id)
285
- final_path = self._store_backup(archive_path, backup_id)
286
- checksum = self._compute_checksum(final_path)
287
- size = final_path.stat().st_size
288
- self._write_checksum_file(final_path, checksum)
289
-
290
- duration = (datetime.now(UTC) - start_time).total_seconds()
291
-
292
- return BackupResult(
293
- success=True,
294
- backup_id=backup_id,
295
- backup_type=BackupType.MEDIA,
296
- timestamp=start_time,
297
- size_bytes=size,
298
- checksum=checksum,
299
- destination_path=str(final_path),
300
- duration_seconds=duration,
301
- encrypted=self._config.encryption_enabled,
302
- )
303
-
304
- except Exception as e:
305
- logger.error("backup.media.failed backup_id=%s error=%s", backup_id, str(e))
306
- return BackupResult(
307
- success=False,
308
- backup_id=backup_id,
309
- backup_type=BackupType.MEDIA,
310
- timestamp=start_time,
311
- error=f"Media backup failed: {type(e).__name__}",
312
- )
313
-
314
- def list_backups(self) -> list[dict[str, Any]]:
315
- """List available backups."""
316
- backups: list[dict[str, Any]] = []
317
- if self._config.destination == BackupDestination.LOCAL:
318
- if not self._config.local_path.exists():
319
- return backups
320
-
321
- for item in self._config.local_path.iterdir():
322
- if item.suffix in (".tar", ".gz", ".enc"):
323
- checksum_file = item.with_suffix(item.suffix + ".sha256")
324
- checksum = ""
325
- if checksum_file.exists():
326
- checksum = checksum_file.read_text().strip().split()[0]
327
-
328
- backups.append(
329
- {
330
- "path": str(item),
331
- "name": item.name,
332
- "size_bytes": item.stat().st_size,
333
- "modified": datetime.fromtimestamp(
334
- item.stat().st_mtime, tz=UTC
335
- ).isoformat(),
336
- "checksum": checksum,
337
- }
338
- )
339
-
340
- return sorted(backups, key=lambda x: x["modified"], reverse=True)
341
-
342
- def cleanup_old_backups(self) -> int:
343
- """Remove backups older than retention policy. Returns count of removed backups."""
344
- removed = 0
345
- cutoff_date = datetime.now(UTC) - timedelta(days=self._config.retention_days)
346
-
347
- if self._config.destination == BackupDestination.LOCAL:
348
- if not self._config.local_path.exists():
349
- return 0
350
-
351
- backups = self.list_backups()
352
- if len(backups) <= self._config.retention_count:
353
- return 0
354
-
355
- for backup in backups[self._config.retention_count :]:
356
- backup_path = Path(backup["path"])
357
- modified = datetime.fromisoformat(backup["modified"])
358
-
359
- if modified < cutoff_date:
360
- try:
361
- backup_path.unlink()
362
- checksum_file = backup_path.with_suffix(backup_path.suffix + ".sha256")
363
- if checksum_file.exists():
364
- checksum_file.unlink()
365
- removed += 1
366
- logger.info("backup.cleanup removed=%s", backup_path.name)
367
- except OSError as e:
368
- logger.warning("backup.cleanup.failed path=%s error=%s", backup_path, e)
369
-
370
- return removed
371
-
372
- def verify_backup(self, backup_path: str | Path) -> tuple[bool, str]:
373
- """Verify backup integrity.
374
-
375
- Returns:
376
- Tuple of (is_valid, message)
377
- """
378
- backup_path = Path(backup_path)
379
- if not backup_path.exists():
380
- return False, "Backup file not found"
381
-
382
- checksum_file = backup_path.with_suffix(backup_path.suffix + ".sha256")
383
- if not checksum_file.exists():
384
- return False, "Checksum file not found"
385
-
386
- expected_checksum = checksum_file.read_text().strip().split()[0]
387
- actual_checksum = self._compute_checksum(backup_path)
388
-
389
- if expected_checksum != actual_checksum:
390
- return False, f"Checksum mismatch: expected {expected_checksum}, got {actual_checksum}"
391
-
392
- return True, "Backup integrity verified"
393
-
394
- def _generate_backup_id(self, prefix: str) -> str:
395
- """Generate a unique backup ID."""
396
- timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
397
- random_suffix = secrets.token_hex(4)
398
- return f"{prefix}_{timestamp}_{random_suffix}"
399
-
400
- def _backup_database(self, output_path: Path) -> dict[str, Any]:
401
- """Execute pg_dump for database backup."""
402
- output_path.parent.mkdir(parents=True, exist_ok=True)
403
-
404
- env = os.environ.copy()
405
- db_password = os.environ.get("DD_DATABASE_PASSWORD", "")
406
- if db_password:
407
- env["PGPASSWORD"] = db_password
408
-
409
- cmd = [
410
- "pg_dump",
411
- "-h",
412
- self._config.db_host,
413
- "-p",
414
- str(self._config.db_port),
415
- "-U",
416
- self._config.db_user,
417
- "-d",
418
- self._config.db_name,
419
- "--format=custom",
420
- "--no-password",
421
- "-f",
422
- str(output_path),
423
- ]
424
-
425
- try:
426
- result = subprocess.run( # noqa: S603
427
- cmd,
428
- env=env,
429
- capture_output=True,
430
- text=True,
431
- timeout=3600,
432
- check=False,
433
- )
434
-
435
- if result.returncode != 0:
436
- error_msg = result.stderr[:500] if result.stderr else "Unknown error"
437
- logger.error("pg_dump failed: %s", error_msg)
438
- return {"success": False, "error": "Database dump failed"}
439
-
440
- return {"success": True, "path": str(output_path)}
441
-
442
- except subprocess.TimeoutExpired:
443
- return {"success": False, "error": "Database dump timed out"}
444
- except FileNotFoundError:
445
- return {"success": False, "error": "pg_dump not found"}
446
-
447
- def _backup_media(self, output_dir: Path) -> dict[str, Any]:
448
- """Copy media files to backup directory."""
449
- if not self._config.media_path.exists():
450
- output_dir.mkdir(parents=True, exist_ok=True)
451
- return {"success": True, "path": str(output_dir), "files": 0}
452
-
453
- try:
454
- shutil.copytree(self._config.media_path, output_dir, dirs_exist_ok=True)
455
- file_count = sum(1 for _ in output_dir.rglob("*") if _.is_file())
456
- return {"success": True, "path": str(output_dir), "files": file_count}
457
- except OSError as e:
458
- return {"success": False, "error": str(e)}
459
-
460
- def _backup_audit_logs(self, output_dir: Path) -> dict[str, Any]:
461
- """Copy audit logs to backup directory."""
462
- if not self._config.audit_log_path or not self._config.audit_log_path.exists():
463
- output_dir.mkdir(parents=True, exist_ok=True)
464
- return {"success": True, "path": str(output_dir), "files": 0}
465
-
466
- try:
467
- output_dir.mkdir(parents=True, exist_ok=True)
468
- if self._config.audit_log_path.is_file():
469
- shutil.copy2(self._config.audit_log_path, output_dir / "audit.jsonl")
470
- else:
471
- shutil.copytree(self._config.audit_log_path, output_dir, dirs_exist_ok=True)
472
- return {"success": True, "path": str(output_dir)}
473
- except OSError as e:
474
- return {"success": False, "error": str(e)}
475
-
476
- def _create_archive(self, source_dir: Path, backup_id: str) -> Path:
477
- """Create compressed archive from source directory."""
478
- archive_name = f"{backup_id}.tar"
479
- if self._config.compress:
480
- archive_name += ".gz"
481
-
482
- archive_path = source_dir.parent / archive_name
483
-
484
- if self._config.compress:
485
- with gzip.open(archive_path, "wb") as gz_file:
486
- import tarfile
487
-
488
- with tarfile.open(fileobj=gz_file, mode="w") as tar:
489
- tar.add(source_dir, arcname=backup_id)
490
- else:
491
- import tarfile
492
-
493
- with tarfile.open(archive_path, "w") as tar:
494
- tar.add(source_dir, arcname=backup_id)
495
-
496
- return archive_path
497
-
498
- def _store_backup(self, archive_path: Path, backup_id: str) -> Path:
499
- """Store backup at configured destination."""
500
- if self._config.destination == BackupDestination.LOCAL:
501
- final_path = self._config.local_path / archive_path.name
502
- shutil.move(str(archive_path), str(final_path))
503
- return final_path
504
- else:
505
- raise NotImplementedError("S3 backup not yet implemented")
506
-
507
- def _compute_checksum(self, file_path: Path) -> str:
508
- """Compute SHA-256 checksum of file."""
509
- sha256 = hashlib.sha256()
510
- with open(file_path, "rb") as f:
511
- for chunk in iter(lambda: f.read(8192), b""):
512
- sha256.update(chunk)
513
- return sha256.hexdigest()
514
-
515
- def _write_checksum_file(self, backup_path: Path, checksum: str) -> None:
516
- """Write checksum to companion file."""
517
- checksum_path = backup_path.with_suffix(backup_path.suffix + ".sha256")
518
- checksum_path.write_text(f"{checksum} {backup_path.name}\n")
519
-
520
-
521
- def create_backup_job(
522
- local_path: str | Path | None = None,
523
- db_host: str | None = None,
524
- db_name: str | None = None,
525
- media_path: str | Path | None = None,
526
- ) -> BackupJob:
527
- """Create a configured BackupJob instance."""
528
- config = BackupConfig()
529
-
530
- if local_path:
531
- config.local_path = Path(local_path)
532
- elif env_path := os.environ.get("BACKUP_LOCAL_PATH"):
533
- config.local_path = Path(env_path)
534
-
535
- if db_host:
536
- config.db_host = db_host
537
- elif env_host := os.environ.get("DD_DATABASE_HOST"):
538
- config.db_host = env_host
539
-
540
- if db_name:
541
- config.db_name = db_name
542
- elif env_name := os.environ.get("DD_DATABASE_NAME"):
543
- config.db_name = env_name
544
-
545
- if media_path:
546
- config.media_path = Path(media_path)
547
- elif env_media := os.environ.get("PORTAL_UPLOAD_DIR"):
548
- config.media_path = Path(env_media)
549
-
550
- if env_audit := os.environ.get("PORTAL_AUDIT_DIR"):
551
- config.audit_log_path = Path(env_audit)
552
-
553
- return BackupJob(config)