observal-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- observal_cli/README.md +150 -0
- observal_cli/__init__.py +0 -0
- observal_cli/analyzer.py +565 -0
- observal_cli/branding.py +19 -0
- observal_cli/client.py +264 -0
- observal_cli/cmd_agent.py +783 -0
- observal_cli/cmd_auth.py +823 -0
- observal_cli/cmd_doctor.py +674 -0
- observal_cli/cmd_hook.py +246 -0
- observal_cli/cmd_mcp.py +1044 -0
- observal_cli/cmd_migrate.py +764 -0
- observal_cli/cmd_ops.py +1250 -0
- observal_cli/cmd_profile.py +308 -0
- observal_cli/cmd_prompt.py +200 -0
- observal_cli/cmd_pull.py +324 -0
- observal_cli/cmd_sandbox.py +178 -0
- observal_cli/cmd_scan.py +1056 -0
- observal_cli/cmd_skill.py +202 -0
- observal_cli/cmd_uninstall.py +340 -0
- observal_cli/config.py +160 -0
- observal_cli/constants.py +151 -0
- observal_cli/hooks/__init__.py +0 -0
- observal_cli/hooks/buffer_event.py +97 -0
- observal_cli/hooks/flush_buffer.py +141 -0
- observal_cli/hooks/kiro_hook.py +210 -0
- observal_cli/hooks/kiro_stop_hook.py +220 -0
- observal_cli/hooks/observal-hook.sh +31 -0
- observal_cli/hooks/observal-stop-hook.sh +134 -0
- observal_cli/hooks/payload_crypto.py +78 -0
- observal_cli/hooks_spec.py +154 -0
- observal_cli/main.py +105 -0
- observal_cli/prompts.py +92 -0
- observal_cli/proxy.py +205 -0
- observal_cli/render.py +139 -0
- observal_cli/requirements.txt +3 -0
- observal_cli/sandbox_runner.py +217 -0
- observal_cli/settings_reconciler.py +188 -0
- observal_cli/shim.py +459 -0
- observal_cli/telemetry_buffer.py +163 -0
- observal_cli-0.2.0.dist-info/METADATA +528 -0
- observal_cli-0.2.0.dist-info/RECORD +44 -0
- observal_cli-0.2.0.dist-info/WHEEL +4 -0
- observal_cli-0.2.0.dist-info/entry_points.txt +5 -0
- observal_cli-0.2.0.dist-info/licenses/LICENSE +108 -0
|
@@ -0,0 +1,764 @@
|
|
|
1
|
+
"""observal migrate: PostgreSQL shallow-copy migration tools."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import hashlib
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import shutil
|
|
10
|
+
import tarfile
|
|
11
|
+
import tempfile
|
|
12
|
+
import time
|
|
13
|
+
import uuid
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from datetime import UTC, datetime, timedelta
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import TYPE_CHECKING
|
|
18
|
+
|
|
19
|
+
import typer
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
import asyncpg
|
|
23
|
+
from rich import print as rprint
|
|
24
|
+
|
|
25
|
+
from observal_cli import client
|
|
26
|
+
from observal_cli.render import spinner
|
|
27
|
+
|
|
28
|
+
# ── Constants ────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
CHUNK_SIZE = 500
|
|
31
|
+
|
|
32
|
+
INSERT_ORDER: list[str] = [
|
|
33
|
+
# Tier 0 — no FK dependencies
|
|
34
|
+
"organizations",
|
|
35
|
+
"enterprise_config",
|
|
36
|
+
"component_sources",
|
|
37
|
+
"penalty_definitions",
|
|
38
|
+
# Tier 1 — FK to organizations
|
|
39
|
+
"users",
|
|
40
|
+
"exporter_configs",
|
|
41
|
+
# Tier 1.5 — FK to users
|
|
42
|
+
"component_bundles",
|
|
43
|
+
# Tier 2 — FK to orgs + users + component_bundles
|
|
44
|
+
"mcp_listings",
|
|
45
|
+
"skill_listings",
|
|
46
|
+
"hook_listings",
|
|
47
|
+
"prompt_listings",
|
|
48
|
+
"sandbox_listings",
|
|
49
|
+
"agents",
|
|
50
|
+
# Tier 3 — FK to listings/users
|
|
51
|
+
"mcp_validation_results",
|
|
52
|
+
"mcp_downloads",
|
|
53
|
+
"skill_downloads",
|
|
54
|
+
"hook_downloads",
|
|
55
|
+
"prompt_downloads",
|
|
56
|
+
"sandbox_downloads",
|
|
57
|
+
"submissions",
|
|
58
|
+
"alert_rules",
|
|
59
|
+
# Tier 4 — FK to agents
|
|
60
|
+
"agent_goal_templates",
|
|
61
|
+
"agent_download_records",
|
|
62
|
+
"component_download_records",
|
|
63
|
+
"dimension_weights",
|
|
64
|
+
# Tier 5 — FK to agent_goal_templates
|
|
65
|
+
"agent_goal_sections",
|
|
66
|
+
# Tier 6 — FK to agents (polymorphic component_id)
|
|
67
|
+
"agent_components",
|
|
68
|
+
# Tier 7 — FK to users (polymorphic listing_id)
|
|
69
|
+
"feedback",
|
|
70
|
+
# Tier 8 — FK to alert_rules
|
|
71
|
+
"alert_history",
|
|
72
|
+
# Tier 9 — FK to agents + users
|
|
73
|
+
"eval_runs",
|
|
74
|
+
# Tier 10 — FK to eval_runs
|
|
75
|
+
"scorecards",
|
|
76
|
+
# Tier 11 — FK to scorecards + penalty_definitions
|
|
77
|
+
"scorecard_dimensions",
|
|
78
|
+
"trace_penalties",
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
JSONB_COLUMNS: dict[str, list[str]] = {
|
|
82
|
+
"agents": ["model_config_json", "external_mcps", "supported_ides"],
|
|
83
|
+
"mcp_listings": ["tools_schema", "environment_variables", "supported_ides"],
|
|
84
|
+
"skill_listings": ["supported_ides", "target_agents", "triggers", "mcp_server_config", "activation_keywords"],
|
|
85
|
+
"hook_listings": ["supported_ides", "handler_config", "input_schema", "output_schema"],
|
|
86
|
+
"prompt_listings": ["variables", "model_hints", "tags", "supported_ides"],
|
|
87
|
+
"sandbox_listings": ["resource_limits", "allowed_mounts", "env_vars", "supported_ides"],
|
|
88
|
+
"scorecards": ["raw_output", "dimension_scores", "scoring_recommendations", "dimensions_skipped", "warnings"],
|
|
89
|
+
"agent_components": ["config_override"],
|
|
90
|
+
"exporter_configs": ["config"],
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# ── PGEncoder ────────────────────────────────────────────
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class PGEncoder(json.JSONEncoder):
|
|
98
|
+
"""Custom JSON encoder for PostgreSQL row data."""
|
|
99
|
+
|
|
100
|
+
def default(self, obj: object) -> object:
|
|
101
|
+
if isinstance(obj, uuid.UUID):
|
|
102
|
+
return str(obj)
|
|
103
|
+
if isinstance(obj, datetime):
|
|
104
|
+
return obj.isoformat()
|
|
105
|
+
if isinstance(obj, timedelta):
|
|
106
|
+
return obj.total_seconds()
|
|
107
|
+
return super().default(obj)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# ── Dataclasses ──────────────────────────────────────────
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class ExportResult:
|
|
115
|
+
archive_path: str
|
|
116
|
+
migration_id: str
|
|
117
|
+
table_counts: dict[str, int]
|
|
118
|
+
checksums: dict[str, str]
|
|
119
|
+
duration_seconds: float
|
|
120
|
+
total_rows: int
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@dataclass
|
|
124
|
+
class ImportResult:
|
|
125
|
+
migration_id: str
|
|
126
|
+
tables_imported: int
|
|
127
|
+
rows_inserted: dict[str, int]
|
|
128
|
+
rows_skipped: dict[str, int]
|
|
129
|
+
duration_seconds: float
|
|
130
|
+
warnings: list[str]
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dataclass
|
|
134
|
+
class ChecksumResult:
|
|
135
|
+
table_name: str
|
|
136
|
+
expected_checksum: str
|
|
137
|
+
actual_checksum: str
|
|
138
|
+
passed: bool
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class ValidationResult:
|
|
143
|
+
archive_valid: bool
|
|
144
|
+
checksum_results: list[ChecksumResult]
|
|
145
|
+
cross_db_results: dict[str, tuple[int, int]] | None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# ── Helper functions ─────────────────────────────────────
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _require_admin() -> None:
|
|
152
|
+
"""Verify the current user has admin or super_admin role. Exit if not."""
|
|
153
|
+
try:
|
|
154
|
+
user = client.get("/api/v1/auth/whoami")
|
|
155
|
+
except SystemExit:
|
|
156
|
+
rprint("[red]Authentication required.[/red]")
|
|
157
|
+
rprint("[dim] Run [bold]observal auth login[/bold] first.[/dim]")
|
|
158
|
+
raise typer.Exit(1)
|
|
159
|
+
role = user.get("role", "")
|
|
160
|
+
if role not in ("admin", "super_admin"):
|
|
161
|
+
rprint("[red]Permission denied.[/red] The migrate command requires admin or super_admin role.")
|
|
162
|
+
rprint(f"[dim] Current role: {role}[/dim]")
|
|
163
|
+
raise typer.Exit(1)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _build_select(table: str, columns: list[str]) -> str:
|
|
167
|
+
"""Build SELECT query, casting JSONB columns to ::text."""
|
|
168
|
+
jsonb_cols = JSONB_COLUMNS.get(table, [])
|
|
169
|
+
if not jsonb_cols:
|
|
170
|
+
return f"SELECT * FROM {table}"
|
|
171
|
+
parts = []
|
|
172
|
+
for col in columns:
|
|
173
|
+
if col in jsonb_cols:
|
|
174
|
+
parts.append(f"{col}::text AS {col}")
|
|
175
|
+
else:
|
|
176
|
+
parts.append(col)
|
|
177
|
+
return f"SELECT {', '.join(parts)} FROM {table}"
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _sha256_file(path: Path) -> str:
|
|
181
|
+
"""Compute SHA-256 hex digest of a file."""
|
|
182
|
+
h = hashlib.sha256()
|
|
183
|
+
with open(path, "rb") as f:
|
|
184
|
+
for chunk in iter(lambda: f.read(8192), b""):
|
|
185
|
+
h.update(chunk)
|
|
186
|
+
return h.hexdigest()
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
# ── Async helpers ────────────────────────────────────────
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
async def _connect(db_url: str) -> asyncpg.Connection:
|
|
193
|
+
"""Establish asyncpg connection, verify alembic_version table exists."""
|
|
194
|
+
try:
|
|
195
|
+
import asyncpg
|
|
196
|
+
except ImportError:
|
|
197
|
+
rprint(
|
|
198
|
+
"[red]asyncpg not found.[/red] Install the migrate extra: [bold]pip install 'observal-cli[migrate]'[/bold]"
|
|
199
|
+
)
|
|
200
|
+
raise typer.Exit(1)
|
|
201
|
+
|
|
202
|
+
# Strip SQLAlchemy dialect suffixes (e.g. postgresql+asyncpg:// → postgresql://)
|
|
203
|
+
clean_url = (
|
|
204
|
+
db_url.split("+")[0] + db_url[db_url.index("://") :] if "+asyncpg" in db_url or "+psycopg" in db_url else db_url
|
|
205
|
+
)
|
|
206
|
+
try:
|
|
207
|
+
conn = await asyncpg.connect(clean_url)
|
|
208
|
+
except (asyncpg.InvalidCatalogNameError, asyncpg.InvalidPasswordError, OSError, Exception) as e:
|
|
209
|
+
rprint(f"[red]Database connection failed:[/red] {type(e).__name__}: {e}")
|
|
210
|
+
raise typer.Exit(1)
|
|
211
|
+
# Verify this is an Observal database
|
|
212
|
+
result = await conn.fetchval(
|
|
213
|
+
"SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'alembic_version')"
|
|
214
|
+
)
|
|
215
|
+
if not result:
|
|
216
|
+
await conn.close()
|
|
217
|
+
rprint("[red]Database does not contain an Observal schema[/red] (alembic_version table not found).")
|
|
218
|
+
rprint("[dim] Is this the right database?[/dim]")
|
|
219
|
+
raise typer.Exit(1)
|
|
220
|
+
return conn
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
async def _get_column_types(conn: asyncpg.Connection, table: str) -> dict[str, str]:
|
|
224
|
+
"""Get column name -> PostgreSQL type mapping for a table."""
|
|
225
|
+
rows = await conn.fetch(
|
|
226
|
+
"SELECT column_name, udt_name FROM information_schema.columns WHERE table_name = $1 ORDER BY ordinal_position",
|
|
227
|
+
table,
|
|
228
|
+
)
|
|
229
|
+
return {row["column_name"]: row["udt_name"] for row in rows}
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _coerce_value(value: object, pg_type: str) -> object:
|
|
233
|
+
"""Coerce a JSON-deserialized value to the correct Python type for asyncpg."""
|
|
234
|
+
if value is None:
|
|
235
|
+
return None
|
|
236
|
+
if pg_type == "uuid" and isinstance(value, str):
|
|
237
|
+
return uuid.UUID(value)
|
|
238
|
+
if pg_type in ("timestamptz", "timestamp") and isinstance(value, str):
|
|
239
|
+
return datetime.fromisoformat(value)
|
|
240
|
+
if pg_type == "interval" and isinstance(value, (int, float)):
|
|
241
|
+
return timedelta(seconds=value)
|
|
242
|
+
if pg_type in ("bool",) and isinstance(value, bool):
|
|
243
|
+
return value
|
|
244
|
+
if pg_type in ("int4", "int8", "int2") and isinstance(value, (int, float)):
|
|
245
|
+
return int(value)
|
|
246
|
+
if pg_type in ("float4", "float8", "numeric") and isinstance(value, (int, float)):
|
|
247
|
+
return float(value)
|
|
248
|
+
return value
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _build_insert(table: str, columns: list[str], col_types: dict[str, str]) -> str:
|
|
252
|
+
"""Build INSERT query with proper type casts for JSONB columns."""
|
|
253
|
+
cols_str = ", ".join(f'"{col}"' for col in columns)
|
|
254
|
+
parts = []
|
|
255
|
+
for i, col in enumerate(columns):
|
|
256
|
+
pg_type = col_types.get(col, "")
|
|
257
|
+
if pg_type in ("json", "jsonb"):
|
|
258
|
+
parts.append(f"${i + 1}::jsonb")
|
|
259
|
+
else:
|
|
260
|
+
parts.append(f"${i + 1}")
|
|
261
|
+
placeholders = ", ".join(parts)
|
|
262
|
+
return f'INSERT INTO {table} ({cols_str}) VALUES ({placeholders}) ON CONFLICT ("id") DO NOTHING'
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
async def _flush_batch(
|
|
266
|
+
conn: asyncpg.Connection,
|
|
267
|
+
table: str,
|
|
268
|
+
columns: list[str],
|
|
269
|
+
col_types: dict[str, str],
|
|
270
|
+
batch: list[dict],
|
|
271
|
+
) -> tuple[int, int]:
|
|
272
|
+
"""Flush a batch of rows to the database. Returns (inserted, skipped)."""
|
|
273
|
+
try:
|
|
274
|
+
import asyncpg
|
|
275
|
+
except ImportError:
|
|
276
|
+
rprint(
|
|
277
|
+
"[red]asyncpg not found.[/red] Install the migrate extra: [bold]pip install 'observal-cli[migrate]'[/bold]"
|
|
278
|
+
)
|
|
279
|
+
raise typer.Exit(1)
|
|
280
|
+
|
|
281
|
+
if not batch:
|
|
282
|
+
return 0, 0
|
|
283
|
+
|
|
284
|
+
query = _build_insert(table, columns, col_types)
|
|
285
|
+
|
|
286
|
+
inserted = 0
|
|
287
|
+
skipped = 0
|
|
288
|
+
|
|
289
|
+
for row in batch:
|
|
290
|
+
values = [_coerce_value(row.get(col), col_types.get(col, "")) for col in columns]
|
|
291
|
+
try:
|
|
292
|
+
status = await conn.execute(query, *values)
|
|
293
|
+
# status is like "INSERT 0 1" (inserted) or "INSERT 0 0" (conflict)
|
|
294
|
+
count = int(status.split()[-1])
|
|
295
|
+
if count > 0:
|
|
296
|
+
inserted += 1
|
|
297
|
+
else:
|
|
298
|
+
skipped += 1
|
|
299
|
+
except asyncpg.ForeignKeyViolationError as e:
|
|
300
|
+
row_id = row.get("id", "unknown")
|
|
301
|
+
rprint(f"[yellow] FK violation in {table}, row {row_id}: {e.constraint_name}[/yellow]")
|
|
302
|
+
skipped += 1
|
|
303
|
+
|
|
304
|
+
return inserted, skipped
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
async def _insert_table(
|
|
308
|
+
conn: asyncpg.Connection,
|
|
309
|
+
table: str,
|
|
310
|
+
jsonl_path: Path,
|
|
311
|
+
col_types: dict[str, str],
|
|
312
|
+
) -> tuple[int, int]:
|
|
313
|
+
"""Insert rows from a JSONL file into a table. Returns (inserted, skipped)."""
|
|
314
|
+
inserted = 0
|
|
315
|
+
skipped = 0
|
|
316
|
+
batch: list[dict] = []
|
|
317
|
+
columns: list[str] | None = None
|
|
318
|
+
|
|
319
|
+
with open(jsonl_path, encoding="utf-8") as f:
|
|
320
|
+
for line in f:
|
|
321
|
+
line = line.strip()
|
|
322
|
+
if not line:
|
|
323
|
+
continue
|
|
324
|
+
row = json.loads(line)
|
|
325
|
+
|
|
326
|
+
if columns is None:
|
|
327
|
+
columns = list(row.keys())
|
|
328
|
+
|
|
329
|
+
batch.append(row)
|
|
330
|
+
|
|
331
|
+
if len(batch) >= CHUNK_SIZE:
|
|
332
|
+
ins, sk = await _flush_batch(conn, table, columns, col_types, batch)
|
|
333
|
+
inserted += ins
|
|
334
|
+
skipped += sk
|
|
335
|
+
batch = []
|
|
336
|
+
|
|
337
|
+
if batch and columns:
|
|
338
|
+
ins, sk = await _flush_batch(conn, table, columns, col_types, batch)
|
|
339
|
+
inserted += ins
|
|
340
|
+
skipped += sk
|
|
341
|
+
|
|
342
|
+
return inserted, skipped
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
async def _import_archive(db_url: str, archive_path: Path) -> ImportResult:
|
|
346
|
+
"""Import a migration archive into the target database."""
|
|
347
|
+
t0 = time.monotonic()
|
|
348
|
+
warnings: list[str] = []
|
|
349
|
+
|
|
350
|
+
staging_dir = Path(tempfile.mkdtemp())
|
|
351
|
+
os.chmod(staging_dir, 0o700)
|
|
352
|
+
try:
|
|
353
|
+
# Extract archive
|
|
354
|
+
with tarfile.open(archive_path, "r:gz") as tar:
|
|
355
|
+
tar.extractall(staging_dir, filter="data")
|
|
356
|
+
|
|
357
|
+
# Read manifest
|
|
358
|
+
manifest_path = staging_dir / "manifest.json"
|
|
359
|
+
if not manifest_path.exists():
|
|
360
|
+
rprint("[red]Archive does not contain manifest.json[/red]")
|
|
361
|
+
raise typer.Exit(1)
|
|
362
|
+
manifest = json.loads(manifest_path.read_text())
|
|
363
|
+
migration_id = manifest["migration_id"]
|
|
364
|
+
|
|
365
|
+
# Verify checksums BEFORE any DB operations
|
|
366
|
+
failed_checksums: list[str] = []
|
|
367
|
+
for table in INSERT_ORDER:
|
|
368
|
+
jsonl_path = staging_dir / "pg" / f"{table}.jsonl"
|
|
369
|
+
if not jsonl_path.exists():
|
|
370
|
+
failed_checksums.append(f"{table} (file missing)")
|
|
371
|
+
continue
|
|
372
|
+
expected = manifest["tables"][table]["checksum"]
|
|
373
|
+
actual = _sha256_file(jsonl_path)
|
|
374
|
+
if actual != expected:
|
|
375
|
+
failed_checksums.append(table)
|
|
376
|
+
|
|
377
|
+
if failed_checksums:
|
|
378
|
+
rprint("[red]Checksum verification failed:[/red]")
|
|
379
|
+
for name in failed_checksums:
|
|
380
|
+
rprint(f" [red]✗[/red] {name}")
|
|
381
|
+
rprint("\n[dim]Archive may be corrupted or tampered. Re-export from source.[/dim]")
|
|
382
|
+
raise typer.Exit(1)
|
|
383
|
+
|
|
384
|
+
# Connect and verify schema version
|
|
385
|
+
conn = await _connect(db_url)
|
|
386
|
+
try:
|
|
387
|
+
target_version = await conn.fetchval("SELECT version_num FROM alembic_version LIMIT 1")
|
|
388
|
+
source_version = manifest["source_alembic_version"]
|
|
389
|
+
if target_version != source_version:
|
|
390
|
+
rprint("[red]Schema version mismatch:[/red]")
|
|
391
|
+
rprint(f" Archive: {source_version}")
|
|
392
|
+
rprint(f" Target: {target_version}")
|
|
393
|
+
rprint("\n[dim] Run: cd observal-server && alembic upgrade head[/dim]")
|
|
394
|
+
raise typer.Exit(1)
|
|
395
|
+
|
|
396
|
+
rows_inserted: dict[str, int] = {}
|
|
397
|
+
rows_skipped: dict[str, int] = {}
|
|
398
|
+
|
|
399
|
+
# Discover which tables exist on the target
|
|
400
|
+
existing_tables = {
|
|
401
|
+
row["table_name"]
|
|
402
|
+
for row in await conn.fetch(
|
|
403
|
+
"SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'"
|
|
404
|
+
)
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
for table in INSERT_ORDER:
|
|
408
|
+
jsonl_path = staging_dir / "pg" / f"{table}.jsonl"
|
|
409
|
+
|
|
410
|
+
# Skip tables that don't exist on target
|
|
411
|
+
if table not in existing_tables:
|
|
412
|
+
rprint(f"[dim] Skipping {table} (table does not exist on target)[/dim]")
|
|
413
|
+
rows_inserted[table] = 0
|
|
414
|
+
rows_skipped[table] = 0
|
|
415
|
+
continue
|
|
416
|
+
|
|
417
|
+
# Get column types for proper coercion
|
|
418
|
+
col_types = await _get_column_types(conn, table)
|
|
419
|
+
|
|
420
|
+
ins, sk = await _insert_table(conn, table, jsonl_path, col_types)
|
|
421
|
+
rows_inserted[table] = ins
|
|
422
|
+
rows_skipped[table] = sk
|
|
423
|
+
|
|
424
|
+
finally:
|
|
425
|
+
await conn.close()
|
|
426
|
+
|
|
427
|
+
elapsed = time.monotonic() - t0
|
|
428
|
+
return ImportResult(
|
|
429
|
+
migration_id=migration_id,
|
|
430
|
+
tables_imported=len(INSERT_ORDER),
|
|
431
|
+
rows_inserted=rows_inserted,
|
|
432
|
+
rows_skipped=rows_skipped,
|
|
433
|
+
duration_seconds=round(elapsed, 2),
|
|
434
|
+
warnings=warnings,
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
finally:
|
|
438
|
+
shutil.rmtree(staging_dir, ignore_errors=True)
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
async def _validate_archive(archive_path: Path, db_url: str | None) -> ValidationResult:
|
|
442
|
+
"""Validate archive checksums and optionally compare against a database."""
|
|
443
|
+
staging_dir = Path(tempfile.mkdtemp())
|
|
444
|
+
os.chmod(staging_dir, 0o700)
|
|
445
|
+
try:
|
|
446
|
+
with tarfile.open(archive_path, "r:gz") as tar:
|
|
447
|
+
tar.extractall(staging_dir, filter="data")
|
|
448
|
+
|
|
449
|
+
manifest_path = staging_dir / "manifest.json"
|
|
450
|
+
if not manifest_path.exists():
|
|
451
|
+
rprint("[red]Archive does not contain manifest.json[/red]")
|
|
452
|
+
raise typer.Exit(1)
|
|
453
|
+
manifest = json.loads(manifest_path.read_text())
|
|
454
|
+
|
|
455
|
+
# Verify checksums
|
|
456
|
+
checksum_results: list[ChecksumResult] = []
|
|
457
|
+
for table in INSERT_ORDER:
|
|
458
|
+
jsonl_path = staging_dir / "pg" / f"{table}.jsonl"
|
|
459
|
+
expected = manifest["tables"][table]["checksum"]
|
|
460
|
+
if not jsonl_path.exists():
|
|
461
|
+
checksum_results.append(ChecksumResult(table, expected, "", False))
|
|
462
|
+
continue
|
|
463
|
+
actual = _sha256_file(jsonl_path)
|
|
464
|
+
checksum_results.append(ChecksumResult(table, expected, actual, actual == expected))
|
|
465
|
+
|
|
466
|
+
all_ok = all(r.passed for r in checksum_results)
|
|
467
|
+
|
|
468
|
+
# Optional cross-database validation
|
|
469
|
+
cross_db_results: dict[str, tuple[int, int]] | None = None
|
|
470
|
+
if db_url:
|
|
471
|
+
conn = await _connect(db_url)
|
|
472
|
+
try:
|
|
473
|
+
existing_tables = {
|
|
474
|
+
row["table_name"]
|
|
475
|
+
for row in await conn.fetch(
|
|
476
|
+
"SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'"
|
|
477
|
+
)
|
|
478
|
+
}
|
|
479
|
+
cross_db_results = {}
|
|
480
|
+
for table in INSERT_ORDER:
|
|
481
|
+
archive_count = manifest["tables"][table]["row_count"]
|
|
482
|
+
if table not in existing_tables:
|
|
483
|
+
cross_db_results[table] = (archive_count, -1) # -1 signals table missing
|
|
484
|
+
continue
|
|
485
|
+
db_count = await conn.fetchval(f"SELECT count(*) FROM {table}")
|
|
486
|
+
cross_db_results[table] = (archive_count, db_count)
|
|
487
|
+
finally:
|
|
488
|
+
await conn.close()
|
|
489
|
+
|
|
490
|
+
return ValidationResult(
|
|
491
|
+
archive_valid=all_ok,
|
|
492
|
+
checksum_results=checksum_results,
|
|
493
|
+
cross_db_results=cross_db_results,
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
finally:
|
|
497
|
+
shutil.rmtree(staging_dir, ignore_errors=True)
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
async def _export_database(db_url: str, output_path: Path) -> ExportResult:
|
|
501
|
+
"""Export all tables to JSONL files and pack into a tar.gz archive."""
|
|
502
|
+
t0 = time.monotonic()
|
|
503
|
+
migration_id = str(uuid.uuid4())
|
|
504
|
+
|
|
505
|
+
staging_dir = Path(tempfile.mkdtemp())
|
|
506
|
+
os.chmod(staging_dir, 0o700)
|
|
507
|
+
try:
|
|
508
|
+
pg_dir = staging_dir / "pg"
|
|
509
|
+
pg_dir.mkdir()
|
|
510
|
+
|
|
511
|
+
conn = await _connect(db_url)
|
|
512
|
+
try:
|
|
513
|
+
# Read alembic version
|
|
514
|
+
alembic_version = await conn.fetchval("SELECT version_num FROM alembic_version LIMIT 1")
|
|
515
|
+
if not alembic_version:
|
|
516
|
+
rprint("[red]Could not read alembic version from source database.[/red]")
|
|
517
|
+
raise typer.Exit(1)
|
|
518
|
+
|
|
519
|
+
table_counts: dict[str, int] = {}
|
|
520
|
+
file_hashes: dict[str, str] = {}
|
|
521
|
+
uuid_ranges: dict[str, dict[str, str]] = {}
|
|
522
|
+
|
|
523
|
+
# Open REPEATABLE READ transaction for consistent snapshot
|
|
524
|
+
async with conn.transaction(isolation="repeatable_read", readonly=True):
|
|
525
|
+
# Discover which tables actually exist in the database
|
|
526
|
+
existing_tables = {
|
|
527
|
+
row["table_name"]
|
|
528
|
+
for row in await conn.fetch(
|
|
529
|
+
"SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'"
|
|
530
|
+
)
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
for table in INSERT_ORDER:
|
|
534
|
+
dest = pg_dir / f"{table}.jsonl"
|
|
535
|
+
|
|
536
|
+
# Skip tables that don't exist yet (DB on older migration)
|
|
537
|
+
if table not in existing_tables:
|
|
538
|
+
rprint(f"[dim] Skipping {table} (table does not exist)[/dim]")
|
|
539
|
+
# Write empty JSONL file so archive structure is consistent
|
|
540
|
+
dest.write_text("")
|
|
541
|
+
table_counts[table] = 0
|
|
542
|
+
file_hashes[table] = _sha256_file(dest)
|
|
543
|
+
continue
|
|
544
|
+
|
|
545
|
+
# Discover columns via prepared statement
|
|
546
|
+
stmt = await conn.prepare(f"SELECT * FROM {table} LIMIT 0")
|
|
547
|
+
columns = [attr.name for attr in stmt.get_attributes()]
|
|
548
|
+
|
|
549
|
+
query = _build_select(table, columns)
|
|
550
|
+
|
|
551
|
+
row_count = 0
|
|
552
|
+
min_id: str | None = None
|
|
553
|
+
max_id: str | None = None
|
|
554
|
+
|
|
555
|
+
with open(dest, "w", encoding="utf-8") as f:
|
|
556
|
+
async for record in conn.cursor(query, prefetch=CHUNK_SIZE):
|
|
557
|
+
row = dict(record)
|
|
558
|
+
line = json.dumps(row, cls=PGEncoder)
|
|
559
|
+
f.write(line + "\n")
|
|
560
|
+
row_count += 1
|
|
561
|
+
|
|
562
|
+
# Track UUID range
|
|
563
|
+
row_id = row.get("id")
|
|
564
|
+
if row_id is not None:
|
|
565
|
+
id_str = str(row_id)
|
|
566
|
+
if min_id is None or id_str < min_id:
|
|
567
|
+
min_id = id_str
|
|
568
|
+
if max_id is None or id_str > max_id:
|
|
569
|
+
max_id = id_str
|
|
570
|
+
|
|
571
|
+
table_counts[table] = row_count
|
|
572
|
+
file_hashes[table] = _sha256_file(dest)
|
|
573
|
+
|
|
574
|
+
if min_id is not None:
|
|
575
|
+
uuid_ranges[table] = {"min_id": min_id, "max_id": max_id}
|
|
576
|
+
|
|
577
|
+
finally:
|
|
578
|
+
await conn.close()
|
|
579
|
+
|
|
580
|
+
# Write manifest.json
|
|
581
|
+
exported_at = datetime.now(UTC).isoformat()
|
|
582
|
+
manifest = {
|
|
583
|
+
"schema_version": "1.0",
|
|
584
|
+
"migration_id": migration_id,
|
|
585
|
+
"exported_at": exported_at,
|
|
586
|
+
"source_alembic_version": alembic_version,
|
|
587
|
+
"tables": {
|
|
588
|
+
table: {"checksum": file_hashes[table], "row_count": table_counts[table]} for table in INSERT_ORDER
|
|
589
|
+
},
|
|
590
|
+
}
|
|
591
|
+
manifest_path = staging_dir / "manifest.json"
|
|
592
|
+
manifest_path.write_text(json.dumps(manifest, indent=2) + "\n")
|
|
593
|
+
|
|
594
|
+
# Write migration_manifest.json
|
|
595
|
+
db_url_hash = hashlib.sha256(db_url.encode()).hexdigest()
|
|
596
|
+
migration_manifest = {
|
|
597
|
+
"migration_id": migration_id,
|
|
598
|
+
"phase1_completed_at": exported_at,
|
|
599
|
+
"source_db_url_hash": db_url_hash,
|
|
600
|
+
"table_row_counts": dict(table_counts),
|
|
601
|
+
"uuid_ranges": uuid_ranges,
|
|
602
|
+
}
|
|
603
|
+
migration_manifest_path = staging_dir / "migration_manifest.json"
|
|
604
|
+
migration_manifest_path.write_text(json.dumps(migration_manifest, indent=2) + "\n")
|
|
605
|
+
|
|
606
|
+
# Ensure output parent directory exists
|
|
607
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
608
|
+
|
|
609
|
+
# Pack archive
|
|
610
|
+
with tarfile.open(output_path, "w:gz") as tar:
|
|
611
|
+
tar.add(str(manifest_path), arcname="manifest.json")
|
|
612
|
+
tar.add(str(migration_manifest_path), arcname="migration_manifest.json")
|
|
613
|
+
for table in INSERT_ORDER:
|
|
614
|
+
jsonl_file = pg_dir / f"{table}.jsonl"
|
|
615
|
+
tar.add(str(jsonl_file), arcname=f"pg/{table}.jsonl")
|
|
616
|
+
|
|
617
|
+
elapsed = time.monotonic() - t0
|
|
618
|
+
total_rows = sum(table_counts.values())
|
|
619
|
+
|
|
620
|
+
return ExportResult(
|
|
621
|
+
archive_path=str(output_path),
|
|
622
|
+
migration_id=migration_id,
|
|
623
|
+
table_counts=table_counts,
|
|
624
|
+
checksums=file_hashes,
|
|
625
|
+
duration_seconds=round(elapsed, 2),
|
|
626
|
+
total_rows=total_rows,
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
finally:
|
|
630
|
+
shutil.rmtree(staging_dir, ignore_errors=True)
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
# ── Typer app ────────────────────────────────────────────
|
|
634
|
+
|
|
635
|
+
migrate_app = typer.Typer(help="PostgreSQL shallow-copy migration tools")
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
@migrate_app.command("export")
|
|
639
|
+
def export_cmd(
|
|
640
|
+
db_url: str = typer.Option(..., "--db-url", help="Source PostgreSQL connection string"),
|
|
641
|
+
output: str | None = typer.Option(None, "--output", "-o", help="Output archive path"),
|
|
642
|
+
) -> None:
|
|
643
|
+
"""Export all PostgreSQL registry data to a portable archive."""
|
|
644
|
+
_require_admin()
|
|
645
|
+
|
|
646
|
+
# Default output filename
|
|
647
|
+
if output is None:
|
|
648
|
+
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
|
649
|
+
output = f"observal-export-{ts}.tar.gz"
|
|
650
|
+
|
|
651
|
+
output_path = Path(output)
|
|
652
|
+
if output_path.exists():
|
|
653
|
+
rprint(f"[red]Output file already exists:[/red] {output_path}")
|
|
654
|
+
rprint("[dim] Choose a different path or remove the existing file.[/dim]")
|
|
655
|
+
raise typer.Exit(1)
|
|
656
|
+
|
|
657
|
+
rprint(f"[bold]Exporting to:[/bold] {output_path}")
|
|
658
|
+
with spinner("Connecting to source database..."):
|
|
659
|
+
result = asyncio.run(_export_database(db_url, output_path))
|
|
660
|
+
|
|
661
|
+
# Summary
|
|
662
|
+
archive_size = output_path.stat().st_size
|
|
663
|
+
size_mb = archive_size / (1024 * 1024)
|
|
664
|
+
rprint("\n[bold green]✓ Export complete[/bold green]")
|
|
665
|
+
rprint(f" Archive: {result.archive_path}")
|
|
666
|
+
rprint(f" Migration: {result.migration_id}")
|
|
667
|
+
rprint(f" Tables: {len(result.table_counts)}")
|
|
668
|
+
rprint(f" Rows: {result.total_rows:,}")
|
|
669
|
+
rprint(f" Size: {size_mb:.1f} MB")
|
|
670
|
+
rprint(f" Duration: {result.duration_seconds:.1f}s")
|
|
671
|
+
|
|
672
|
+
# Security warning
|
|
673
|
+
rprint()
|
|
674
|
+
rprint("[yellow]⚠ Archive contains hashed credentials (passwords, API keys).[/yellow]")
|
|
675
|
+
rprint("[yellow] Store securely and delete after import.[/yellow]")
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
@migrate_app.command("import")
|
|
679
|
+
def import_cmd(
|
|
680
|
+
db_url: str = typer.Option(..., "--db-url", help="Target PostgreSQL connection string"),
|
|
681
|
+
archive: str = typer.Option(..., "--archive", "-a", help="Path to .tar.gz archive"),
|
|
682
|
+
) -> None:
|
|
683
|
+
"""Import a migration archive into the target database."""
|
|
684
|
+
_require_admin()
|
|
685
|
+
|
|
686
|
+
archive_path = Path(archive)
|
|
687
|
+
if not archive_path.exists():
|
|
688
|
+
rprint(f"[red]Archive not found:[/red] {archive_path}")
|
|
689
|
+
raise typer.Exit(1)
|
|
690
|
+
|
|
691
|
+
if not tarfile.is_tarfile(archive_path):
|
|
692
|
+
rprint(f"[red]Invalid archive format:[/red] {archive_path}")
|
|
693
|
+
rprint("[dim] Expected a .tar.gz file.[/dim]")
|
|
694
|
+
raise typer.Exit(1)
|
|
695
|
+
|
|
696
|
+
rprint(f"[bold]Importing from:[/bold] {archive_path}")
|
|
697
|
+
with spinner("Importing..."):
|
|
698
|
+
result = asyncio.run(_import_archive(db_url, archive_path))
|
|
699
|
+
|
|
700
|
+
total_inserted = sum(result.rows_inserted.values())
|
|
701
|
+
total_skipped = sum(result.rows_skipped.values())
|
|
702
|
+
|
|
703
|
+
rprint("\n[bold green]✓ Import complete[/bold green]")
|
|
704
|
+
rprint(f" Migration: {result.migration_id}")
|
|
705
|
+
rprint(f" Tables: {result.tables_imported}")
|
|
706
|
+
rprint(f" Inserted: {total_inserted:,}")
|
|
707
|
+
rprint(f" Skipped: {total_skipped:,}")
|
|
708
|
+
rprint(f" Duration: {result.duration_seconds:.1f}s")
|
|
709
|
+
|
|
710
|
+
if result.warnings:
|
|
711
|
+
rprint("\n[yellow]Warnings:[/yellow]")
|
|
712
|
+
for w in result.warnings:
|
|
713
|
+
rprint(f" [yellow]⚠[/yellow] {w}")
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
@migrate_app.command("validate")
|
|
717
|
+
def validate_cmd(
|
|
718
|
+
archive: str = typer.Option(..., "--archive", "-a", help="Path to .tar.gz archive"),
|
|
719
|
+
db_url: str | None = typer.Option(None, "--db-url", help="Optional database for cross-validation"),
|
|
720
|
+
) -> None:
|
|
721
|
+
"""Validate archive integrity and optionally compare against a database."""
|
|
722
|
+
_require_admin()
|
|
723
|
+
|
|
724
|
+
archive_path = Path(archive)
|
|
725
|
+
if not archive_path.exists():
|
|
726
|
+
rprint(f"[red]Archive not found:[/red] {archive_path}")
|
|
727
|
+
raise typer.Exit(1)
|
|
728
|
+
|
|
729
|
+
if not tarfile.is_tarfile(archive_path):
|
|
730
|
+
rprint(f"[red]Invalid archive format:[/red] {archive_path}")
|
|
731
|
+
raise typer.Exit(1)
|
|
732
|
+
|
|
733
|
+
with spinner("Validating archive..."):
|
|
734
|
+
result = asyncio.run(_validate_archive(archive_path, db_url))
|
|
735
|
+
|
|
736
|
+
# Print checksum results
|
|
737
|
+
rprint("\n[bold]Checksum verification:[/bold]")
|
|
738
|
+
for cr in result.checksum_results:
|
|
739
|
+
status = "[green]✓[/green]" if cr.passed else "[red]✗[/red]"
|
|
740
|
+
rprint(f" {status} {cr.table_name}")
|
|
741
|
+
|
|
742
|
+
if not result.archive_valid:
|
|
743
|
+
rprint("\n[red]Archive validation failed.[/red]")
|
|
744
|
+
raise typer.Exit(1)
|
|
745
|
+
|
|
746
|
+
rprint("\n[green]✓ All checksums valid[/green]")
|
|
747
|
+
|
|
748
|
+
# Cross-database comparison
|
|
749
|
+
if result.cross_db_results:
|
|
750
|
+
rprint("\n[bold]Row count comparison:[/bold]")
|
|
751
|
+
mismatches = 0
|
|
752
|
+
for table, (archive_count, db_count) in result.cross_db_results.items():
|
|
753
|
+
if db_count == -1:
|
|
754
|
+
rprint(f" [dim]-[/dim] {table}: [dim]table not in database[/dim]")
|
|
755
|
+
elif archive_count == db_count:
|
|
756
|
+
rprint(f" [green]✓[/green] {table}: {archive_count}")
|
|
757
|
+
else:
|
|
758
|
+
rprint(f" [yellow]≠[/yellow] {table}: archive={archive_count}, db={db_count}")
|
|
759
|
+
mismatches += 1
|
|
760
|
+
|
|
761
|
+
if mismatches == 0:
|
|
762
|
+
rprint("\n[green]✓ All row counts match[/green]")
|
|
763
|
+
else:
|
|
764
|
+
rprint(f"\n[yellow]⚠ {mismatches} table(s) have different row counts[/yellow]")
|