postgresai 0.15.0 → 0.16.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -203,8 +203,11 @@ postgresai mon health [--wait <sec>] # Check monitoring services health
203
203
  - `--demo` - Demo mode with sample database (testing only, cannot use with --api-key)
204
204
  - `--api-key <key>` - Postgres AI API key for automated report uploads
205
205
  - `--db-url <url>` - PostgreSQL connection URL to monitor (format: `postgresql://user:pass@host:port/db`)
206
+ - `--instance-id <uuid>` - Adopt a console-provisioned monitoring instance (also via the `PGAI_INSTANCE_ID` env var)
206
207
  - `-y, --yes` - Accept all defaults and skip interactive prompts
207
208
 
209
+ When `--instance-id <uuid>` (or `PGAI_INSTANCE_ID`) is set, `local-install` forwards the id to the platform, which **adopts** the already-provisioned monitoring instance instead of self-registering a duplicate under an auto-created `postgres-ai-monitoring` project. The CLI then persists the adopted instance's real project to `.pgwatch-config`, so checkup reports upload alongside the rest of that instance's health data. Adoption is awaited (with one automatic retry); if it fails, the CLI warns and reports fall back to the default project until you re-run `local-install`. Without the flag, the legacy self-registration path is byte-for-byte unchanged.
210
+
208
211
  `local-install` writes `.env` in the monitoring directory. It preserves existing `REPLICATOR_PASSWORD` and `VM_AUTH_*` values or generates new random ones when missing; `VM_AUTH_USERNAME` defaults to `vmauth` when absent. The replication password is used by the demo PostgreSQL standby replication user, and the VM auth credentials are required before Docker Compose can provision Grafana datasources. If you run `docker compose` directly or maintain `.env` yourself, set both VM auth values before upgrading. For rotation, run `VM_AUTH_PASSWORD="$(openssl rand -base64 18)" ./scripts/rotate-vm-auth.sh` from the monitoring directory so `.env`, `sink-prometheus`, and `grafana` update together.
209
212
 
210
213
  #### Monitoring target databases (`mon targets` subgroup)
@@ -14,6 +14,7 @@ import { startMcpServer } from "../lib/mcp-server";
14
14
  import { fetchIssues, fetchIssueComments, createIssueComment, fetchIssue, createIssue, updateIssue, updateIssueComment, fetchActionItem, fetchActionItems, createActionItem, updateActionItem, type ConfigChange } from "../lib/issues";
15
15
  import { fetchReports, fetchAllReports, fetchReportFiles, fetchReportFileData, renderMarkdownForTerminal, parseFlexibleDate } from "../lib/reports";
16
16
  import { resolveBaseUrls } from "../lib/util";
17
+ import { registerAasCollection, parseVcpus } from "../lib/aas-onboard";
17
18
  import { uploadFile, downloadFile, buildMarkdownLink, uploadAttachments, appendAttachmentsToContent } from "../lib/storage";
18
19
  import { applyInitPlan, applyUninitPlan, buildInitPlan, buildUninitPlan, checkCurrentUserPermissions, connectWithSslFallback, DEFAULT_MONITORING_USER, formatPermissionCheckMessages, KNOWN_PROVIDERS, redactPasswordsInSql, resolveAdminConnection, resolveMonitoringPassword, validateProvider, verifyInitSetup } from "../lib/init";
19
20
  import { SupabaseClient, resolveSupabaseConfig, extractProjectRefFromUrl, applyInitPlanViaSupabase, verifyInitSetupViaSupabase, fetchPoolerDatabaseUrl, type PgCompatibleError } from "../lib/supabase";
@@ -24,7 +25,7 @@ import { createInterface } from "readline";
24
25
  import * as childProcess from "child_process";
25
26
  import { REPORT_GENERATORS, CHECK_INFO, generateAllReports } from "../lib/checkup";
26
27
  import { getCheckupEntry } from "../lib/checkup-dictionary";
27
- import { createCheckupReport, uploadCheckupReportJson, convertCheckupReportJsonToMarkdown, RpcError, formatRpcErrorForDisplay, withRetry } from "../lib/checkup-api";
28
+ import { createCheckupReport, uploadCheckupReportJson, convertCheckupReportJsonToMarkdown, RpcError, formatRpcErrorForDisplay, withRetry, verifyApiKey } from "../lib/checkup-api";
28
29
  import { generateCheckSummary } from "../lib/checkup-summary";
29
30
  import {
30
31
  type Instance,
@@ -334,7 +335,13 @@ function prepareUploadConfig(
334
335
  console.error("Tip: run 'postgresai auth' or pass --api-key / set PGAI_API_KEY");
335
336
  return null; // Signal to exit
336
337
  }
337
- return undefined; // Skip upload silently
338
+ // No credentials and upload not explicitly requested: fall back to
339
+ // local-only mode, but say so prominently — skipping the upload silently
340
+ // hides the fact that results never reach the Console.
341
+ console.error("Notice: no API key configured — results will NOT be uploaded to PostgresAI.");
342
+ console.error(" To upload: run 'postgresai auth login' or pass --api-key / set PGAI_API_KEY.");
343
+ console.error(" To run locally without this notice, pass --no-upload.");
344
+ return undefined; // Skip upload, run checks locally
338
345
  }
339
346
 
340
347
  const cfg = config.readConfig();
@@ -2076,6 +2083,30 @@ program
2076
2083
  const projectWasGenerated = uploadResult?.projectWasGenerated ?? false;
2077
2084
  shouldUpload = !!uploadCfg;
2078
2085
 
2086
+ // Preflight: validate the configured API key with a cheap authenticated
2087
+ // call BEFORE connecting / running checks, so an invalid or expired token
2088
+ // fails in seconds instead of after minutes of wasted work (previously the
2089
+ // upload at the very end of the run was the first authenticated call).
2090
+ // Only a definitive 401/403 stops the run; a transient pre-flight failure
2091
+ // (network error, timeout, 5xx) warns and continues — the upload may still
2092
+ // succeed.
2093
+ if (uploadCfg) {
2094
+ const verification = await verifyApiKey({
2095
+ apiKey: uploadCfg.apiKey,
2096
+ apiBaseUrl: uploadCfg.apiBaseUrl,
2097
+ });
2098
+ if (verification.status === "invalid") {
2099
+ console.error(`Error: the configured API key was rejected by the PostgresAI API (HTTP ${verification.statusCode})`);
2100
+ console.error("Tip: run 'postgresai auth login' to re-authenticate, or pass a valid --api-key / set PGAI_API_KEY");
2101
+ console.error("Tip: pass --no-upload to run checks locally without uploading");
2102
+ process.exitCode = 1;
2103
+ return;
2104
+ }
2105
+ if (verification.status === "unknown") {
2106
+ console.error(`Warning: could not verify API key before running checks (${verification.detail}); continuing — upload will still be attempted`);
2107
+ }
2108
+ }
2109
+
2079
2110
  // Connect and run checks
2080
2111
  const adminConn = resolveAdminConnection({
2081
2112
  conn,
@@ -2388,54 +2419,136 @@ function checkRunningContainers(): { running: boolean; containers: string[] } {
2388
2419
  }
2389
2420
  }
2390
2421
 
2422
+ /** Parsed result of v1.monitoring_instance_register. */
2423
+ interface MonitoringRegistration {
2424
+ instanceId?: string;
2425
+ projectId?: number;
2426
+ projectName?: string;
2427
+ created?: boolean;
2428
+ }
2429
+
2391
2430
  /**
2392
- * Register monitoring instance with the API (non-blocking).
2393
- * Returns immediately, logs result in background.
2431
+ * Register the monitoring instance with the API.
2432
+ *
2433
+ * Two modes (issue platform-all#311):
2434
+ * - With `instanceId` (console-provisioned installs; passed via
2435
+ * `--instance-id` / PGAI_INSTANCE_ID, wired from the provisioning flow
2436
+ * through SI/ansible): the platform ADOPTS the existing provisioned
2437
+ * instance instead of self-registering a duplicate under an auto-created
2438
+ * "postgres-ai-monitoring" project. The returned project_name is what the
2439
+ * reporter must upload to, so callers should await the result and persist
2440
+ * it. One automatic retry, since a lost adoption splits the health matrix
2441
+ * across two projects.
2442
+ * - Without it: legacy self-registration by project name.
2443
+ *
2444
+ * Never throws — registration is best-effort; returns null on failure.
2394
2445
  */
2395
- function registerMonitoringInstance(
2446
+ async function registerMonitoringInstance(
2396
2447
  apiKey: string,
2397
2448
  projectName: string,
2398
- opts?: { apiBaseUrl?: string; debug?: boolean }
2399
- ): void {
2449
+ opts?: { apiBaseUrl?: string; debug?: boolean; instanceId?: string; retries?: number; retryDelayMs?: number }
2450
+ ): Promise<MonitoringRegistration | null> {
2400
2451
  const { apiBaseUrl } = resolveBaseUrls(opts);
2401
2452
  const url = `${apiBaseUrl}/rpc/monitoring_instance_register`;
2402
2453
  const debug = opts?.debug;
2454
+ const instanceId = opts?.instanceId;
2455
+ const retries = opts?.retries ?? (instanceId ? 1 : 0);
2456
+ // Brief backoff before a retry so a transient 5xx / connection blip gets a
2457
+ // moment to recover; skipped before the first attempt. Tests pass 0.
2458
+ const retryDelayMs = opts?.retryDelayMs ?? 400;
2403
2459
 
2404
2460
  if (debug) {
2405
2461
  console.error(`\nDebug: Registering monitoring instance...`);
2406
2462
  console.error(`Debug: POST ${url}`);
2407
- console.error(`Debug: project_name=${projectName}`);
2463
+ console.error(`Debug: project_name=${projectName}${instanceId ? ` instance_id=${instanceId}` : ""}`);
2408
2464
  }
2409
2465
 
2410
- // Fire and forget - don't block the main flow
2411
- fetch(url, {
2412
- method: "POST",
2413
- headers: {
2414
- "Content-Type": "application/json",
2415
- },
2416
- body: JSON.stringify({
2417
- api_token: apiKey,
2418
- project_name: projectName,
2419
- }),
2420
- })
2421
- .then(async (res) => {
2466
+ const requestBody: Record<string, string> = {
2467
+ api_token: apiKey,
2468
+ project_name: projectName,
2469
+ };
2470
+ if (instanceId) {
2471
+ requestBody.instance_id = instanceId;
2472
+ }
2473
+
2474
+ for (let attempt = 0; attempt <= retries; attempt++) {
2475
+ if (attempt > 0 && retryDelayMs > 0) {
2476
+ await new Promise((resolve) => setTimeout(resolve, retryDelayMs));
2477
+ }
2478
+ try {
2479
+ const res = await fetch(url, {
2480
+ method: "POST",
2481
+ headers: {
2482
+ "Content-Type": "application/json",
2483
+ },
2484
+ body: JSON.stringify(requestBody),
2485
+ });
2422
2486
  const body = await res.text().catch(() => "");
2423
2487
  if (!res.ok) {
2424
2488
  if (debug) {
2425
2489
  console.error(`Debug: Monitoring registration failed: HTTP ${res.status}`);
2426
2490
  console.error(`Debug: Response: ${body}`);
2427
2491
  }
2428
- return;
2492
+ continue;
2429
2493
  }
2430
2494
  if (debug) {
2431
2495
  console.error(`Debug: Monitoring registration response: ${body}`);
2432
2496
  }
2433
- })
2434
- .catch((err) => {
2497
+ try {
2498
+ const parsed = JSON.parse(body) as {
2499
+ instance_id?: unknown;
2500
+ project_id?: unknown;
2501
+ project_name?: unknown;
2502
+ created?: unknown;
2503
+ };
2504
+ // Runtime-check each field: the `as` cast above is compile-time only,
2505
+ // and a spoofed/older platform could return mistyped values. In
2506
+ // particular `project_id` must be a real number before we trust it
2507
+ // over the (string) project_name in the persistence decision below.
2508
+ return {
2509
+ instanceId: typeof parsed.instance_id === "string" ? parsed.instance_id : undefined,
2510
+ projectId: typeof parsed.project_id === "number" ? parsed.project_id : undefined,
2511
+ projectName: typeof parsed.project_name === "string" ? parsed.project_name : undefined,
2512
+ created: typeof parsed.created === "boolean" ? parsed.created : undefined,
2513
+ };
2514
+ } catch {
2515
+ return {};
2516
+ }
2517
+ } catch (err) {
2435
2518
  if (debug) {
2436
- console.error(`Debug: Monitoring registration error: ${err.message}`);
2519
+ console.error(`Debug: Monitoring registration error: ${(err as Error).message}`);
2437
2520
  }
2438
- });
2521
+ }
2522
+ }
2523
+ return null;
2524
+ }
2525
+
2526
+ /**
2527
+ * Decide what to persist as `.pgwatch-config`'s `project_name` from an
2528
+ * adoption response, or `null` if the response carries no usable project.
2529
+ *
2530
+ * Pure (no I/O) so the branch logic is unit-testable.
2531
+ *
2532
+ * - Prefers the numeric `project_id`: `checkup_report_create` resolves
2533
+ * "project" as id-or-name, and the id survives project renames (a name
2534
+ * match would miss after a rename and silently re-create the old name as a
2535
+ * fresh project). `project_id === 0` is still a valid id and is honored.
2536
+ * - Falls back to `project_name`, but only when it's a safe single-line token.
2537
+ * The value is server-supplied and written verbatim into a `key=value`
2538
+ * config file; a name containing `\r`, `\n`, or `=` could inject extra
2539
+ * config keys (config-file injection, CWE-93/74). Reject those rather than
2540
+ * risk it — over a trusted first-party endpoint this should never fire.
2541
+ */
2542
+ const PROJECT_NAME_RE = /^[A-Za-z0-9._-]+$/;
2543
+ function resolveAdoptedProject(reg: MonitoringRegistration | null): string | null {
2544
+ if (!reg) return null;
2545
+ if (typeof reg.projectId === "number" && Number.isFinite(reg.projectId)) {
2546
+ return String(reg.projectId);
2547
+ }
2548
+ if (typeof reg.projectName === "string" && PROJECT_NAME_RE.test(reg.projectName)) {
2549
+ return reg.projectName;
2550
+ }
2551
+ return null;
2439
2552
  }
2440
2553
 
2441
2554
  /**
@@ -2596,8 +2709,16 @@ mon
2596
2709
  .option("--db-url <url>", "PostgreSQL connection URL to monitor")
2597
2710
  .option("--tag <tag>", "Docker image tag to use (e.g., 0.14.0, 0.14.0-dev.33)")
2598
2711
  .option("--project <name>", "Docker Compose project name (default: postgres_ai)")
2712
+ .option(
2713
+ "--instance-id <uuid>",
2714
+ "adopt a console-provisioned monitoring instance instead of self-registering a new one (set automatically by the provisioning flow; PGAI_INSTANCE_ID env also works)"
2715
+ )
2716
+ .option(
2717
+ "--vcpus <n>",
2718
+ "source DB vCPU count used for AAS zone thresholds (set automatically by the provisioning flow; PGAI_VCPUS env also works). Omit or 0 = unknown — AAS collection stays off until a real value is set."
2719
+ )
2599
2720
  .option("-y, --yes", "accept all defaults and skip interactive prompts", false)
2600
- .action(async (opts: { demo: boolean; apiKey?: string; dbUrl?: string; tag?: string; project?: string; yes: boolean }) => {
2721
+ .action(async (opts: { demo: boolean; apiKey?: string; dbUrl?: string; tag?: string; project?: string; instanceId?: string; vcpus?: string; yes: boolean }) => {
2601
2722
  // Get apiKey from global program options (--api-key is defined globally)
2602
2723
  // This is needed because Commander.js routes --api-key to the global option, not the subcommand's option
2603
2724
  const globalOpts = program.opts<CliOptions>();
@@ -3009,13 +3130,70 @@ mon
3009
3130
  }
3010
3131
  console.log("✓ Services started\n");
3011
3132
 
3012
- // Register monitoring instance with API (non-blocking, only if API key is configured)
3133
+ // Register monitoring instance with API (only if API key is configured).
3134
+ // Console-provisioned installs pass --instance-id (or PGAI_INSTANCE_ID):
3135
+ // the platform then ADOPTS the provisioned instance and tells us its real
3136
+ // project, which the reporter must upload to — so that path is awaited
3137
+ // and persisted; the legacy self-registration stays fire-and-forget
3138
+ // (issue platform-all#311).
3013
3139
  if (apiKey && !opts.demo) {
3014
3140
  const projectName = opts.project || "postgres-ai-monitoring";
3015
- registerMonitoringInstance(apiKey, projectName, {
3016
- apiBaseUrl: globalOpts.apiBaseUrl,
3017
- debug: !!process.env.DEBUG,
3018
- });
3141
+ const instanceId = opts.instanceId || process.env.PGAI_INSTANCE_ID;
3142
+ if (instanceId) {
3143
+ const reg = await registerMonitoringInstance(apiKey, projectName, {
3144
+ apiBaseUrl: globalOpts.apiBaseUrl,
3145
+ debug: !!process.env.DEBUG,
3146
+ instanceId,
3147
+ });
3148
+ const adoptedProject = resolveAdoptedProject(reg);
3149
+ if (adoptedProject != null) {
3150
+ // Point the reporter at the adopted instance's project so checkup
3151
+ // uploads land next to the rest of this instance's health data.
3152
+ updatePgwatchConfig(path.resolve(projectDir, ".pgwatch-config"), {
3153
+ project_name: adoptedProject,
3154
+ });
3155
+ // `created` distinguishes a fresh self-registration from adopting an
3156
+ // existing provisioned row; with an instance_id we expect adoption.
3157
+ const verb = reg?.created ? "Registered" : "Adopted";
3158
+ console.log(`✓ ${verb} monitoring instance (project: ${adoptedProject})\n`);
3159
+ } else if (reg) {
3160
+ // Request succeeded but carried no usable project field — don't claim
3161
+ // adoption, but don't report a hard failure either (no re-run needed).
3162
+ console.error(
3163
+ `⚠ Adopted provisioned instance ${instanceId} but the platform returned no project — reports will use project '${projectName}'`
3164
+ );
3165
+ } else {
3166
+ console.error(
3167
+ `⚠ Could not adopt provisioned instance ${instanceId} — reports will use project '${projectName}' until 'postgresai mon local-install' is re-run`
3168
+ );
3169
+ }
3170
+
3171
+ // Best-effort: arm hands-off AAS auto-collection for this adopted
3172
+ // instance. Mints a Grafana Viewer SA on the LOCAL Grafana, resolves
3173
+ // the datasource id + (cluster, node_name) labels from the pgwatch
3174
+ // config we wrote, and hands the platform a finished token via the
3175
+ // API-token RPC (v1.monitoring_instance_aas_register). Never fatal —
3176
+ // it can be enabled later by re-running local-install.
3177
+ const aas = await registerAasCollection(apiKey, instanceId, {
3178
+ grafanaPassword,
3179
+ instancesPath,
3180
+ vcpus: parseVcpus(opts.vcpus ?? process.env.PGAI_VCPUS),
3181
+ apiBaseUrl: globalOpts.apiBaseUrl,
3182
+ debug: !!process.env.DEBUG,
3183
+ });
3184
+ if (aas.ok) {
3185
+ console.log("✓ AAS auto-collection registered\n");
3186
+ } else {
3187
+ console.error(
3188
+ `⚠ AAS auto-collection not registered (${aas.reason}); it can be enabled later by re-running 'postgresai mon local-install'\n`
3189
+ );
3190
+ }
3191
+ } else {
3192
+ void registerMonitoringInstance(apiKey, projectName, {
3193
+ apiBaseUrl: globalOpts.apiBaseUrl,
3194
+ debug: !!process.env.DEBUG,
3195
+ });
3196
+ }
3019
3197
  }
3020
3198
 
3021
3199
  // Final summary
@@ -5309,3 +5487,4 @@ if (import.meta.main) {
5309
5487
  // Exported for unit tests (the CLI surface above is unaffected; these are the
5310
5488
  // same functions used by the `mon` commands).
5311
5489
  export { refreshBundledComposeIfStale, readDeployedTag, isValidComposeYaml };
5490
+ export { registerMonitoringInstance, resolveAdoptedProject, type MonitoringRegistration };