observal-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. observal_cli/README.md +150 -0
  2. observal_cli/__init__.py +0 -0
  3. observal_cli/analyzer.py +565 -0
  4. observal_cli/branding.py +19 -0
  5. observal_cli/client.py +264 -0
  6. observal_cli/cmd_agent.py +783 -0
  7. observal_cli/cmd_auth.py +823 -0
  8. observal_cli/cmd_doctor.py +674 -0
  9. observal_cli/cmd_hook.py +246 -0
  10. observal_cli/cmd_mcp.py +1044 -0
  11. observal_cli/cmd_migrate.py +764 -0
  12. observal_cli/cmd_ops.py +1250 -0
  13. observal_cli/cmd_profile.py +308 -0
  14. observal_cli/cmd_prompt.py +200 -0
  15. observal_cli/cmd_pull.py +324 -0
  16. observal_cli/cmd_sandbox.py +178 -0
  17. observal_cli/cmd_scan.py +1056 -0
  18. observal_cli/cmd_skill.py +202 -0
  19. observal_cli/cmd_uninstall.py +340 -0
  20. observal_cli/config.py +160 -0
  21. observal_cli/constants.py +151 -0
  22. observal_cli/hooks/__init__.py +0 -0
  23. observal_cli/hooks/buffer_event.py +97 -0
  24. observal_cli/hooks/flush_buffer.py +141 -0
  25. observal_cli/hooks/kiro_hook.py +210 -0
  26. observal_cli/hooks/kiro_stop_hook.py +220 -0
  27. observal_cli/hooks/observal-hook.sh +31 -0
  28. observal_cli/hooks/observal-stop-hook.sh +134 -0
  29. observal_cli/hooks/payload_crypto.py +78 -0
  30. observal_cli/hooks_spec.py +154 -0
  31. observal_cli/main.py +105 -0
  32. observal_cli/prompts.py +92 -0
  33. observal_cli/proxy.py +205 -0
  34. observal_cli/render.py +139 -0
  35. observal_cli/requirements.txt +3 -0
  36. observal_cli/sandbox_runner.py +217 -0
  37. observal_cli/settings_reconciler.py +188 -0
  38. observal_cli/shim.py +459 -0
  39. observal_cli/telemetry_buffer.py +163 -0
  40. observal_cli-0.2.0.dist-info/METADATA +528 -0
  41. observal_cli-0.2.0.dist-info/RECORD +44 -0
  42. observal_cli-0.2.0.dist-info/WHEEL +4 -0
  43. observal_cli-0.2.0.dist-info/entry_points.txt +5 -0
  44. observal_cli-0.2.0.dist-info/licenses/LICENSE +108 -0
@@ -0,0 +1,764 @@
1
+ """observal migrate: PostgreSQL shallow-copy migration tools."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import hashlib
7
+ import json
8
+ import os
9
+ import shutil
10
+ import tarfile
11
+ import tempfile
12
+ import time
13
+ import uuid
14
+ from dataclasses import dataclass
15
+ from datetime import UTC, datetime, timedelta
16
+ from pathlib import Path
17
+ from typing import TYPE_CHECKING
18
+
19
+ import typer
20
+
21
+ if TYPE_CHECKING:
22
+ import asyncpg
23
+ from rich import print as rprint
24
+
25
+ from observal_cli import client
26
+ from observal_cli.render import spinner
27
+
28
+ # ── Constants ────────────────────────────────────────────
29
+
30
+ CHUNK_SIZE = 500
31
+
32
+ INSERT_ORDER: list[str] = [
33
+ # Tier 0 — no FK dependencies
34
+ "organizations",
35
+ "enterprise_config",
36
+ "component_sources",
37
+ "penalty_definitions",
38
+ # Tier 1 — FK to organizations
39
+ "users",
40
+ "exporter_configs",
41
+ # Tier 1.5 — FK to users
42
+ "component_bundles",
43
+ # Tier 2 — FK to orgs + users + component_bundles
44
+ "mcp_listings",
45
+ "skill_listings",
46
+ "hook_listings",
47
+ "prompt_listings",
48
+ "sandbox_listings",
49
+ "agents",
50
+ # Tier 3 — FK to listings/users
51
+ "mcp_validation_results",
52
+ "mcp_downloads",
53
+ "skill_downloads",
54
+ "hook_downloads",
55
+ "prompt_downloads",
56
+ "sandbox_downloads",
57
+ "submissions",
58
+ "alert_rules",
59
+ # Tier 4 — FK to agents
60
+ "agent_goal_templates",
61
+ "agent_download_records",
62
+ "component_download_records",
63
+ "dimension_weights",
64
+ # Tier 5 — FK to agent_goal_templates
65
+ "agent_goal_sections",
66
+ # Tier 6 — FK to agents (polymorphic component_id)
67
+ "agent_components",
68
+ # Tier 7 — FK to users (polymorphic listing_id)
69
+ "feedback",
70
+ # Tier 8 — FK to alert_rules
71
+ "alert_history",
72
+ # Tier 9 — FK to agents + users
73
+ "eval_runs",
74
+ # Tier 10 — FK to eval_runs
75
+ "scorecards",
76
+ # Tier 11 — FK to scorecards + penalty_definitions
77
+ "scorecard_dimensions",
78
+ "trace_penalties",
79
+ ]
80
+
81
+ JSONB_COLUMNS: dict[str, list[str]] = {
82
+ "agents": ["model_config_json", "external_mcps", "supported_ides"],
83
+ "mcp_listings": ["tools_schema", "environment_variables", "supported_ides"],
84
+ "skill_listings": ["supported_ides", "target_agents", "triggers", "mcp_server_config", "activation_keywords"],
85
+ "hook_listings": ["supported_ides", "handler_config", "input_schema", "output_schema"],
86
+ "prompt_listings": ["variables", "model_hints", "tags", "supported_ides"],
87
+ "sandbox_listings": ["resource_limits", "allowed_mounts", "env_vars", "supported_ides"],
88
+ "scorecards": ["raw_output", "dimension_scores", "scoring_recommendations", "dimensions_skipped", "warnings"],
89
+ "agent_components": ["config_override"],
90
+ "exporter_configs": ["config"],
91
+ }
92
+
93
+
94
+ # ── PGEncoder ────────────────────────────────────────────
95
+
96
+
97
+ class PGEncoder(json.JSONEncoder):
98
+ """Custom JSON encoder for PostgreSQL row data."""
99
+
100
+ def default(self, obj: object) -> object:
101
+ if isinstance(obj, uuid.UUID):
102
+ return str(obj)
103
+ if isinstance(obj, datetime):
104
+ return obj.isoformat()
105
+ if isinstance(obj, timedelta):
106
+ return obj.total_seconds()
107
+ return super().default(obj)
108
+
109
+
110
+ # ── Dataclasses ──────────────────────────────────────────
111
+
112
+
113
+ @dataclass
114
+ class ExportResult:
115
+ archive_path: str
116
+ migration_id: str
117
+ table_counts: dict[str, int]
118
+ checksums: dict[str, str]
119
+ duration_seconds: float
120
+ total_rows: int
121
+
122
+
123
+ @dataclass
124
+ class ImportResult:
125
+ migration_id: str
126
+ tables_imported: int
127
+ rows_inserted: dict[str, int]
128
+ rows_skipped: dict[str, int]
129
+ duration_seconds: float
130
+ warnings: list[str]
131
+
132
+
133
+ @dataclass
134
+ class ChecksumResult:
135
+ table_name: str
136
+ expected_checksum: str
137
+ actual_checksum: str
138
+ passed: bool
139
+
140
+
141
+ @dataclass
142
+ class ValidationResult:
143
+ archive_valid: bool
144
+ checksum_results: list[ChecksumResult]
145
+ cross_db_results: dict[str, tuple[int, int]] | None
146
+
147
+
148
+ # ── Helper functions ─────────────────────────────────────
149
+
150
+
151
+ def _require_admin() -> None:
152
+ """Verify the current user has admin or super_admin role. Exit if not."""
153
+ try:
154
+ user = client.get("/api/v1/auth/whoami")
155
+ except SystemExit:
156
+ rprint("[red]Authentication required.[/red]")
157
+ rprint("[dim] Run [bold]observal auth login[/bold] first.[/dim]")
158
+ raise typer.Exit(1)
159
+ role = user.get("role", "")
160
+ if role not in ("admin", "super_admin"):
161
+ rprint("[red]Permission denied.[/red] The migrate command requires admin or super_admin role.")
162
+ rprint(f"[dim] Current role: {role}[/dim]")
163
+ raise typer.Exit(1)
164
+
165
+
166
+ def _build_select(table: str, columns: list[str]) -> str:
167
+ """Build SELECT query, casting JSONB columns to ::text."""
168
+ jsonb_cols = JSONB_COLUMNS.get(table, [])
169
+ if not jsonb_cols:
170
+ return f"SELECT * FROM {table}"
171
+ parts = []
172
+ for col in columns:
173
+ if col in jsonb_cols:
174
+ parts.append(f"{col}::text AS {col}")
175
+ else:
176
+ parts.append(col)
177
+ return f"SELECT {', '.join(parts)} FROM {table}"
178
+
179
+
180
+ def _sha256_file(path: Path) -> str:
181
+ """Compute SHA-256 hex digest of a file."""
182
+ h = hashlib.sha256()
183
+ with open(path, "rb") as f:
184
+ for chunk in iter(lambda: f.read(8192), b""):
185
+ h.update(chunk)
186
+ return h.hexdigest()
187
+
188
+
189
+ # ── Async helpers ────────────────────────────────────────
190
+
191
+
192
+ async def _connect(db_url: str) -> asyncpg.Connection:
193
+ """Establish asyncpg connection, verify alembic_version table exists."""
194
+ try:
195
+ import asyncpg
196
+ except ImportError:
197
+ rprint(
198
+ "[red]asyncpg not found.[/red] Install the migrate extra: [bold]pip install 'observal-cli[migrate]'[/bold]"
199
+ )
200
+ raise typer.Exit(1)
201
+
202
+ # Strip SQLAlchemy dialect suffixes (e.g. postgresql+asyncpg:// → postgresql://)
203
+ clean_url = (
204
+ db_url.split("+")[0] + db_url[db_url.index("://") :] if "+asyncpg" in db_url or "+psycopg" in db_url else db_url
205
+ )
206
+ try:
207
+ conn = await asyncpg.connect(clean_url)
208
+ except (asyncpg.InvalidCatalogNameError, asyncpg.InvalidPasswordError, OSError, Exception) as e:
209
+ rprint(f"[red]Database connection failed:[/red] {type(e).__name__}: {e}")
210
+ raise typer.Exit(1)
211
+ # Verify this is an Observal database
212
+ result = await conn.fetchval(
213
+ "SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'alembic_version')"
214
+ )
215
+ if not result:
216
+ await conn.close()
217
+ rprint("[red]Database does not contain an Observal schema[/red] (alembic_version table not found).")
218
+ rprint("[dim] Is this the right database?[/dim]")
219
+ raise typer.Exit(1)
220
+ return conn
221
+
222
+
223
+ async def _get_column_types(conn: asyncpg.Connection, table: str) -> dict[str, str]:
224
+ """Get column name -> PostgreSQL type mapping for a table."""
225
+ rows = await conn.fetch(
226
+ "SELECT column_name, udt_name FROM information_schema.columns WHERE table_name = $1 ORDER BY ordinal_position",
227
+ table,
228
+ )
229
+ return {row["column_name"]: row["udt_name"] for row in rows}
230
+
231
+
232
+ def _coerce_value(value: object, pg_type: str) -> object:
233
+ """Coerce a JSON-deserialized value to the correct Python type for asyncpg."""
234
+ if value is None:
235
+ return None
236
+ if pg_type == "uuid" and isinstance(value, str):
237
+ return uuid.UUID(value)
238
+ if pg_type in ("timestamptz", "timestamp") and isinstance(value, str):
239
+ return datetime.fromisoformat(value)
240
+ if pg_type == "interval" and isinstance(value, (int, float)):
241
+ return timedelta(seconds=value)
242
+ if pg_type in ("bool",) and isinstance(value, bool):
243
+ return value
244
+ if pg_type in ("int4", "int8", "int2") and isinstance(value, (int, float)):
245
+ return int(value)
246
+ if pg_type in ("float4", "float8", "numeric") and isinstance(value, (int, float)):
247
+ return float(value)
248
+ return value
249
+
250
+
251
+ def _build_insert(table: str, columns: list[str], col_types: dict[str, str]) -> str:
252
+ """Build INSERT query with proper type casts for JSONB columns."""
253
+ cols_str = ", ".join(f'"{col}"' for col in columns)
254
+ parts = []
255
+ for i, col in enumerate(columns):
256
+ pg_type = col_types.get(col, "")
257
+ if pg_type in ("json", "jsonb"):
258
+ parts.append(f"${i + 1}::jsonb")
259
+ else:
260
+ parts.append(f"${i + 1}")
261
+ placeholders = ", ".join(parts)
262
+ return f'INSERT INTO {table} ({cols_str}) VALUES ({placeholders}) ON CONFLICT ("id") DO NOTHING'
263
+
264
+
265
+ async def _flush_batch(
266
+ conn: asyncpg.Connection,
267
+ table: str,
268
+ columns: list[str],
269
+ col_types: dict[str, str],
270
+ batch: list[dict],
271
+ ) -> tuple[int, int]:
272
+ """Flush a batch of rows to the database. Returns (inserted, skipped)."""
273
+ try:
274
+ import asyncpg
275
+ except ImportError:
276
+ rprint(
277
+ "[red]asyncpg not found.[/red] Install the migrate extra: [bold]pip install 'observal-cli[migrate]'[/bold]"
278
+ )
279
+ raise typer.Exit(1)
280
+
281
+ if not batch:
282
+ return 0, 0
283
+
284
+ query = _build_insert(table, columns, col_types)
285
+
286
+ inserted = 0
287
+ skipped = 0
288
+
289
+ for row in batch:
290
+ values = [_coerce_value(row.get(col), col_types.get(col, "")) for col in columns]
291
+ try:
292
+ status = await conn.execute(query, *values)
293
+ # status is like "INSERT 0 1" (inserted) or "INSERT 0 0" (conflict)
294
+ count = int(status.split()[-1])
295
+ if count > 0:
296
+ inserted += 1
297
+ else:
298
+ skipped += 1
299
+ except asyncpg.ForeignKeyViolationError as e:
300
+ row_id = row.get("id", "unknown")
301
+ rprint(f"[yellow] FK violation in {table}, row {row_id}: {e.constraint_name}[/yellow]")
302
+ skipped += 1
303
+
304
+ return inserted, skipped
305
+
306
+
307
+ async def _insert_table(
308
+ conn: asyncpg.Connection,
309
+ table: str,
310
+ jsonl_path: Path,
311
+ col_types: dict[str, str],
312
+ ) -> tuple[int, int]:
313
+ """Insert rows from a JSONL file into a table. Returns (inserted, skipped)."""
314
+ inserted = 0
315
+ skipped = 0
316
+ batch: list[dict] = []
317
+ columns: list[str] | None = None
318
+
319
+ with open(jsonl_path, encoding="utf-8") as f:
320
+ for line in f:
321
+ line = line.strip()
322
+ if not line:
323
+ continue
324
+ row = json.loads(line)
325
+
326
+ if columns is None:
327
+ columns = list(row.keys())
328
+
329
+ batch.append(row)
330
+
331
+ if len(batch) >= CHUNK_SIZE:
332
+ ins, sk = await _flush_batch(conn, table, columns, col_types, batch)
333
+ inserted += ins
334
+ skipped += sk
335
+ batch = []
336
+
337
+ if batch and columns:
338
+ ins, sk = await _flush_batch(conn, table, columns, col_types, batch)
339
+ inserted += ins
340
+ skipped += sk
341
+
342
+ return inserted, skipped
343
+
344
+
345
+ async def _import_archive(db_url: str, archive_path: Path) -> ImportResult:
346
+ """Import a migration archive into the target database."""
347
+ t0 = time.monotonic()
348
+ warnings: list[str] = []
349
+
350
+ staging_dir = Path(tempfile.mkdtemp())
351
+ os.chmod(staging_dir, 0o700)
352
+ try:
353
+ # Extract archive
354
+ with tarfile.open(archive_path, "r:gz") as tar:
355
+ tar.extractall(staging_dir, filter="data")
356
+
357
+ # Read manifest
358
+ manifest_path = staging_dir / "manifest.json"
359
+ if not manifest_path.exists():
360
+ rprint("[red]Archive does not contain manifest.json[/red]")
361
+ raise typer.Exit(1)
362
+ manifest = json.loads(manifest_path.read_text())
363
+ migration_id = manifest["migration_id"]
364
+
365
+ # Verify checksums BEFORE any DB operations
366
+ failed_checksums: list[str] = []
367
+ for table in INSERT_ORDER:
368
+ jsonl_path = staging_dir / "pg" / f"{table}.jsonl"
369
+ if not jsonl_path.exists():
370
+ failed_checksums.append(f"{table} (file missing)")
371
+ continue
372
+ expected = manifest["tables"][table]["checksum"]
373
+ actual = _sha256_file(jsonl_path)
374
+ if actual != expected:
375
+ failed_checksums.append(table)
376
+
377
+ if failed_checksums:
378
+ rprint("[red]Checksum verification failed:[/red]")
379
+ for name in failed_checksums:
380
+ rprint(f" [red]✗[/red] {name}")
381
+ rprint("\n[dim]Archive may be corrupted or tampered. Re-export from source.[/dim]")
382
+ raise typer.Exit(1)
383
+
384
+ # Connect and verify schema version
385
+ conn = await _connect(db_url)
386
+ try:
387
+ target_version = await conn.fetchval("SELECT version_num FROM alembic_version LIMIT 1")
388
+ source_version = manifest["source_alembic_version"]
389
+ if target_version != source_version:
390
+ rprint("[red]Schema version mismatch:[/red]")
391
+ rprint(f" Archive: {source_version}")
392
+ rprint(f" Target: {target_version}")
393
+ rprint("\n[dim] Run: cd observal-server && alembic upgrade head[/dim]")
394
+ raise typer.Exit(1)
395
+
396
+ rows_inserted: dict[str, int] = {}
397
+ rows_skipped: dict[str, int] = {}
398
+
399
+ # Discover which tables exist on the target
400
+ existing_tables = {
401
+ row["table_name"]
402
+ for row in await conn.fetch(
403
+ "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'"
404
+ )
405
+ }
406
+
407
+ for table in INSERT_ORDER:
408
+ jsonl_path = staging_dir / "pg" / f"{table}.jsonl"
409
+
410
+ # Skip tables that don't exist on target
411
+ if table not in existing_tables:
412
+ rprint(f"[dim] Skipping {table} (table does not exist on target)[/dim]")
413
+ rows_inserted[table] = 0
414
+ rows_skipped[table] = 0
415
+ continue
416
+
417
+ # Get column types for proper coercion
418
+ col_types = await _get_column_types(conn, table)
419
+
420
+ ins, sk = await _insert_table(conn, table, jsonl_path, col_types)
421
+ rows_inserted[table] = ins
422
+ rows_skipped[table] = sk
423
+
424
+ finally:
425
+ await conn.close()
426
+
427
+ elapsed = time.monotonic() - t0
428
+ return ImportResult(
429
+ migration_id=migration_id,
430
+ tables_imported=len(INSERT_ORDER),
431
+ rows_inserted=rows_inserted,
432
+ rows_skipped=rows_skipped,
433
+ duration_seconds=round(elapsed, 2),
434
+ warnings=warnings,
435
+ )
436
+
437
+ finally:
438
+ shutil.rmtree(staging_dir, ignore_errors=True)
439
+
440
+
441
+ async def _validate_archive(archive_path: Path, db_url: str | None) -> ValidationResult:
442
+ """Validate archive checksums and optionally compare against a database."""
443
+ staging_dir = Path(tempfile.mkdtemp())
444
+ os.chmod(staging_dir, 0o700)
445
+ try:
446
+ with tarfile.open(archive_path, "r:gz") as tar:
447
+ tar.extractall(staging_dir, filter="data")
448
+
449
+ manifest_path = staging_dir / "manifest.json"
450
+ if not manifest_path.exists():
451
+ rprint("[red]Archive does not contain manifest.json[/red]")
452
+ raise typer.Exit(1)
453
+ manifest = json.loads(manifest_path.read_text())
454
+
455
+ # Verify checksums
456
+ checksum_results: list[ChecksumResult] = []
457
+ for table in INSERT_ORDER:
458
+ jsonl_path = staging_dir / "pg" / f"{table}.jsonl"
459
+ expected = manifest["tables"][table]["checksum"]
460
+ if not jsonl_path.exists():
461
+ checksum_results.append(ChecksumResult(table, expected, "", False))
462
+ continue
463
+ actual = _sha256_file(jsonl_path)
464
+ checksum_results.append(ChecksumResult(table, expected, actual, actual == expected))
465
+
466
+ all_ok = all(r.passed for r in checksum_results)
467
+
468
+ # Optional cross-database validation
469
+ cross_db_results: dict[str, tuple[int, int]] | None = None
470
+ if db_url:
471
+ conn = await _connect(db_url)
472
+ try:
473
+ existing_tables = {
474
+ row["table_name"]
475
+ for row in await conn.fetch(
476
+ "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'"
477
+ )
478
+ }
479
+ cross_db_results = {}
480
+ for table in INSERT_ORDER:
481
+ archive_count = manifest["tables"][table]["row_count"]
482
+ if table not in existing_tables:
483
+ cross_db_results[table] = (archive_count, -1) # -1 signals table missing
484
+ continue
485
+ db_count = await conn.fetchval(f"SELECT count(*) FROM {table}")
486
+ cross_db_results[table] = (archive_count, db_count)
487
+ finally:
488
+ await conn.close()
489
+
490
+ return ValidationResult(
491
+ archive_valid=all_ok,
492
+ checksum_results=checksum_results,
493
+ cross_db_results=cross_db_results,
494
+ )
495
+
496
+ finally:
497
+ shutil.rmtree(staging_dir, ignore_errors=True)
498
+
499
+
500
+ async def _export_database(db_url: str, output_path: Path) -> ExportResult:
501
+ """Export all tables to JSONL files and pack into a tar.gz archive."""
502
+ t0 = time.monotonic()
503
+ migration_id = str(uuid.uuid4())
504
+
505
+ staging_dir = Path(tempfile.mkdtemp())
506
+ os.chmod(staging_dir, 0o700)
507
+ try:
508
+ pg_dir = staging_dir / "pg"
509
+ pg_dir.mkdir()
510
+
511
+ conn = await _connect(db_url)
512
+ try:
513
+ # Read alembic version
514
+ alembic_version = await conn.fetchval("SELECT version_num FROM alembic_version LIMIT 1")
515
+ if not alembic_version:
516
+ rprint("[red]Could not read alembic version from source database.[/red]")
517
+ raise typer.Exit(1)
518
+
519
+ table_counts: dict[str, int] = {}
520
+ file_hashes: dict[str, str] = {}
521
+ uuid_ranges: dict[str, dict[str, str]] = {}
522
+
523
+ # Open REPEATABLE READ transaction for consistent snapshot
524
+ async with conn.transaction(isolation="repeatable_read", readonly=True):
525
+ # Discover which tables actually exist in the database
526
+ existing_tables = {
527
+ row["table_name"]
528
+ for row in await conn.fetch(
529
+ "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'"
530
+ )
531
+ }
532
+
533
+ for table in INSERT_ORDER:
534
+ dest = pg_dir / f"{table}.jsonl"
535
+
536
+ # Skip tables that don't exist yet (DB on older migration)
537
+ if table not in existing_tables:
538
+ rprint(f"[dim] Skipping {table} (table does not exist)[/dim]")
539
+ # Write empty JSONL file so archive structure is consistent
540
+ dest.write_text("")
541
+ table_counts[table] = 0
542
+ file_hashes[table] = _sha256_file(dest)
543
+ continue
544
+
545
+ # Discover columns via prepared statement
546
+ stmt = await conn.prepare(f"SELECT * FROM {table} LIMIT 0")
547
+ columns = [attr.name for attr in stmt.get_attributes()]
548
+
549
+ query = _build_select(table, columns)
550
+
551
+ row_count = 0
552
+ min_id: str | None = None
553
+ max_id: str | None = None
554
+
555
+ with open(dest, "w", encoding="utf-8") as f:
556
+ async for record in conn.cursor(query, prefetch=CHUNK_SIZE):
557
+ row = dict(record)
558
+ line = json.dumps(row, cls=PGEncoder)
559
+ f.write(line + "\n")
560
+ row_count += 1
561
+
562
+ # Track UUID range
563
+ row_id = row.get("id")
564
+ if row_id is not None:
565
+ id_str = str(row_id)
566
+ if min_id is None or id_str < min_id:
567
+ min_id = id_str
568
+ if max_id is None or id_str > max_id:
569
+ max_id = id_str
570
+
571
+ table_counts[table] = row_count
572
+ file_hashes[table] = _sha256_file(dest)
573
+
574
+ if min_id is not None:
575
+ uuid_ranges[table] = {"min_id": min_id, "max_id": max_id}
576
+
577
+ finally:
578
+ await conn.close()
579
+
580
+ # Write manifest.json
581
+ exported_at = datetime.now(UTC).isoformat()
582
+ manifest = {
583
+ "schema_version": "1.0",
584
+ "migration_id": migration_id,
585
+ "exported_at": exported_at,
586
+ "source_alembic_version": alembic_version,
587
+ "tables": {
588
+ table: {"checksum": file_hashes[table], "row_count": table_counts[table]} for table in INSERT_ORDER
589
+ },
590
+ }
591
+ manifest_path = staging_dir / "manifest.json"
592
+ manifest_path.write_text(json.dumps(manifest, indent=2) + "\n")
593
+
594
+ # Write migration_manifest.json
595
+ db_url_hash = hashlib.sha256(db_url.encode()).hexdigest()
596
+ migration_manifest = {
597
+ "migration_id": migration_id,
598
+ "phase1_completed_at": exported_at,
599
+ "source_db_url_hash": db_url_hash,
600
+ "table_row_counts": dict(table_counts),
601
+ "uuid_ranges": uuid_ranges,
602
+ }
603
+ migration_manifest_path = staging_dir / "migration_manifest.json"
604
+ migration_manifest_path.write_text(json.dumps(migration_manifest, indent=2) + "\n")
605
+
606
+ # Ensure output parent directory exists
607
+ output_path.parent.mkdir(parents=True, exist_ok=True)
608
+
609
+ # Pack archive
610
+ with tarfile.open(output_path, "w:gz") as tar:
611
+ tar.add(str(manifest_path), arcname="manifest.json")
612
+ tar.add(str(migration_manifest_path), arcname="migration_manifest.json")
613
+ for table in INSERT_ORDER:
614
+ jsonl_file = pg_dir / f"{table}.jsonl"
615
+ tar.add(str(jsonl_file), arcname=f"pg/{table}.jsonl")
616
+
617
+ elapsed = time.monotonic() - t0
618
+ total_rows = sum(table_counts.values())
619
+
620
+ return ExportResult(
621
+ archive_path=str(output_path),
622
+ migration_id=migration_id,
623
+ table_counts=table_counts,
624
+ checksums=file_hashes,
625
+ duration_seconds=round(elapsed, 2),
626
+ total_rows=total_rows,
627
+ )
628
+
629
+ finally:
630
+ shutil.rmtree(staging_dir, ignore_errors=True)
631
+
632
+
633
+ # ── Typer app ────────────────────────────────────────────
634
+
635
+ migrate_app = typer.Typer(help="PostgreSQL shallow-copy migration tools")
636
+
637
+
638
+ @migrate_app.command("export")
639
+ def export_cmd(
640
+ db_url: str = typer.Option(..., "--db-url", help="Source PostgreSQL connection string"),
641
+ output: str | None = typer.Option(None, "--output", "-o", help="Output archive path"),
642
+ ) -> None:
643
+ """Export all PostgreSQL registry data to a portable archive."""
644
+ _require_admin()
645
+
646
+ # Default output filename
647
+ if output is None:
648
+ ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
649
+ output = f"observal-export-{ts}.tar.gz"
650
+
651
+ output_path = Path(output)
652
+ if output_path.exists():
653
+ rprint(f"[red]Output file already exists:[/red] {output_path}")
654
+ rprint("[dim] Choose a different path or remove the existing file.[/dim]")
655
+ raise typer.Exit(1)
656
+
657
+ rprint(f"[bold]Exporting to:[/bold] {output_path}")
658
+ with spinner("Connecting to source database..."):
659
+ result = asyncio.run(_export_database(db_url, output_path))
660
+
661
+ # Summary
662
+ archive_size = output_path.stat().st_size
663
+ size_mb = archive_size / (1024 * 1024)
664
+ rprint("\n[bold green]✓ Export complete[/bold green]")
665
+ rprint(f" Archive: {result.archive_path}")
666
+ rprint(f" Migration: {result.migration_id}")
667
+ rprint(f" Tables: {len(result.table_counts)}")
668
+ rprint(f" Rows: {result.total_rows:,}")
669
+ rprint(f" Size: {size_mb:.1f} MB")
670
+ rprint(f" Duration: {result.duration_seconds:.1f}s")
671
+
672
+ # Security warning
673
+ rprint()
674
+ rprint("[yellow]⚠ Archive contains hashed credentials (passwords, API keys).[/yellow]")
675
+ rprint("[yellow] Store securely and delete after import.[/yellow]")
676
+
677
+
678
+ @migrate_app.command("import")
679
+ def import_cmd(
680
+ db_url: str = typer.Option(..., "--db-url", help="Target PostgreSQL connection string"),
681
+ archive: str = typer.Option(..., "--archive", "-a", help="Path to .tar.gz archive"),
682
+ ) -> None:
683
+ """Import a migration archive into the target database."""
684
+ _require_admin()
685
+
686
+ archive_path = Path(archive)
687
+ if not archive_path.exists():
688
+ rprint(f"[red]Archive not found:[/red] {archive_path}")
689
+ raise typer.Exit(1)
690
+
691
+ if not tarfile.is_tarfile(archive_path):
692
+ rprint(f"[red]Invalid archive format:[/red] {archive_path}")
693
+ rprint("[dim] Expected a .tar.gz file.[/dim]")
694
+ raise typer.Exit(1)
695
+
696
+ rprint(f"[bold]Importing from:[/bold] {archive_path}")
697
+ with spinner("Importing..."):
698
+ result = asyncio.run(_import_archive(db_url, archive_path))
699
+
700
+ total_inserted = sum(result.rows_inserted.values())
701
+ total_skipped = sum(result.rows_skipped.values())
702
+
703
+ rprint("\n[bold green]✓ Import complete[/bold green]")
704
+ rprint(f" Migration: {result.migration_id}")
705
+ rprint(f" Tables: {result.tables_imported}")
706
+ rprint(f" Inserted: {total_inserted:,}")
707
+ rprint(f" Skipped: {total_skipped:,}")
708
+ rprint(f" Duration: {result.duration_seconds:.1f}s")
709
+
710
+ if result.warnings:
711
+ rprint("\n[yellow]Warnings:[/yellow]")
712
+ for w in result.warnings:
713
+ rprint(f" [yellow]⚠[/yellow] {w}")
714
+
715
+
716
+ @migrate_app.command("validate")
717
+ def validate_cmd(
718
+ archive: str = typer.Option(..., "--archive", "-a", help="Path to .tar.gz archive"),
719
+ db_url: str | None = typer.Option(None, "--db-url", help="Optional database for cross-validation"),
720
+ ) -> None:
721
+ """Validate archive integrity and optionally compare against a database."""
722
+ _require_admin()
723
+
724
+ archive_path = Path(archive)
725
+ if not archive_path.exists():
726
+ rprint(f"[red]Archive not found:[/red] {archive_path}")
727
+ raise typer.Exit(1)
728
+
729
+ if not tarfile.is_tarfile(archive_path):
730
+ rprint(f"[red]Invalid archive format:[/red] {archive_path}")
731
+ raise typer.Exit(1)
732
+
733
+ with spinner("Validating archive..."):
734
+ result = asyncio.run(_validate_archive(archive_path, db_url))
735
+
736
+ # Print checksum results
737
+ rprint("\n[bold]Checksum verification:[/bold]")
738
+ for cr in result.checksum_results:
739
+ status = "[green]✓[/green]" if cr.passed else "[red]✗[/red]"
740
+ rprint(f" {status} {cr.table_name}")
741
+
742
+ if not result.archive_valid:
743
+ rprint("\n[red]Archive validation failed.[/red]")
744
+ raise typer.Exit(1)
745
+
746
+ rprint("\n[green]✓ All checksums valid[/green]")
747
+
748
+ # Cross-database comparison
749
+ if result.cross_db_results:
750
+ rprint("\n[bold]Row count comparison:[/bold]")
751
+ mismatches = 0
752
+ for table, (archive_count, db_count) in result.cross_db_results.items():
753
+ if db_count == -1:
754
+ rprint(f" [dim]-[/dim] {table}: [dim]table not in database[/dim]")
755
+ elif archive_count == db_count:
756
+ rprint(f" [green]✓[/green] {table}: {archive_count}")
757
+ else:
758
+ rprint(f" [yellow]≠[/yellow] {table}: archive={archive_count}, db={db_count}")
759
+ mismatches += 1
760
+
761
+ if mismatches == 0:
762
+ rprint("\n[green]✓ All row counts match[/green]")
763
+ else:
764
+ rprint(f"\n[yellow]⚠ {mismatches} table(s) have different row counts[/yellow]")