@percher/core 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-files-manifest.d.ts +28 -0
- package/dist/ai-files-manifest.d.ts.map +1 -0
- package/dist/ai-files-manifest.js +96 -0
- package/dist/ai-files-manifest.js.map +1 -0
- package/dist/commands/account.d.ts +51 -0
- package/dist/commands/account.d.ts.map +1 -0
- package/dist/commands/account.js +88 -0
- package/dist/commands/account.js.map +1 -0
- package/dist/commands/ai-files.d.ts +73 -0
- package/dist/commands/ai-files.d.ts.map +1 -0
- package/dist/commands/ai-files.js +179 -0
- package/dist/commands/ai-files.js.map +1 -0
- package/dist/commands/billing.d.ts +1 -1
- package/dist/commands/billing.d.ts.map +1 -1
- package/dist/commands/billing.js +1 -1
- package/dist/commands/billing.js.map +1 -1
- package/dist/commands/continue.d.ts +48 -0
- package/dist/commands/continue.d.ts.map +1 -0
- package/dist/commands/continue.js +121 -0
- package/dist/commands/continue.js.map +1 -0
- package/dist/commands/create.d.ts +1 -1
- package/dist/commands/create.d.ts.map +1 -1
- package/dist/commands/create.js +1 -1
- package/dist/commands/create.js.map +1 -1
- package/dist/commands/dashboard.d.ts +15 -0
- package/dist/commands/dashboard.d.ts.map +1 -0
- package/dist/commands/dashboard.js +33 -0
- package/dist/commands/dashboard.js.map +1 -0
- package/dist/commands/data-export.d.ts +21 -0
- package/dist/commands/data-export.d.ts.map +1 -0
- package/dist/commands/data-export.js +36 -0
- package/dist/commands/data-export.js.map +1 -0
- package/dist/commands/data.d.ts +1 -1
- package/dist/commands/data.d.ts.map +1 -1
- package/dist/commands/data.js +1 -1
- package/dist/commands/data.js.map +1 -1
- package/dist/commands/delete.d.ts +1 -1
- package/dist/commands/delete.d.ts.map +1 -1
- package/dist/commands/delete.js +1 -1
- package/dist/commands/delete.js.map +1 -1
- package/dist/commands/deploys.d.ts +2 -2
- package/dist/commands/deploys.d.ts.map +1 -1
- package/dist/commands/deploys.js +21 -5
- package/dist/commands/deploys.js.map +1 -1
- package/dist/commands/dev.d.ts +1 -9
- package/dist/commands/dev.d.ts.map +1 -1
- package/dist/commands/dev.js +77 -23
- package/dist/commands/dev.js.map +1 -1
- package/dist/commands/diagnose.d.ts +1 -1
- package/dist/commands/diagnose.d.ts.map +1 -1
- package/dist/commands/diagnose.js +1 -1
- package/dist/commands/diagnose.js.map +1 -1
- package/dist/commands/doctor.d.ts +63 -1
- package/dist/commands/doctor.d.ts.map +1 -1
- package/dist/commands/doctor.js +792 -10
- package/dist/commands/doctor.js.map +1 -1
- package/dist/commands/domains.d.ts +1 -1
- package/dist/commands/domains.d.ts.map +1 -1
- package/dist/commands/domains.js +1 -1
- package/dist/commands/domains.js.map +1 -1
- package/dist/commands/env-scan.d.ts +2 -0
- package/dist/commands/env-scan.d.ts.map +1 -0
- package/dist/commands/env-scan.js +92 -0
- package/dist/commands/env-scan.js.map +1 -0
- package/dist/commands/env.d.ts +1 -1
- package/dist/commands/env.d.ts.map +1 -1
- package/dist/commands/env.js +1 -1
- package/dist/commands/env.js.map +1 -1
- package/dist/commands/export.d.ts +1 -1
- package/dist/commands/export.js +1 -1
- package/dist/commands/generate.d.ts +1 -1
- package/dist/commands/generate.d.ts.map +1 -1
- package/dist/commands/generate.js +14 -9
- package/dist/commands/generate.js.map +1 -1
- package/dist/commands/github.d.ts +60 -0
- package/dist/commands/github.d.ts.map +1 -0
- package/dist/commands/github.js +112 -0
- package/dist/commands/github.js.map +1 -0
- package/dist/commands/import-project.d.ts +1 -1
- package/dist/commands/import-project.d.ts.map +1 -1
- package/dist/commands/import-project.js +1 -1
- package/dist/commands/import-project.js.map +1 -1
- package/dist/commands/init.d.ts +1 -1
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +1 -1
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/insights.d.ts +1 -1
- package/dist/commands/insights.d.ts.map +1 -1
- package/dist/commands/insights.js +1 -1
- package/dist/commands/insights.js.map +1 -1
- package/dist/commands/login.d.ts +1 -1
- package/dist/commands/login.d.ts.map +1 -1
- package/dist/commands/login.js +1 -1
- package/dist/commands/login.js.map +1 -1
- package/dist/commands/logs.d.ts +1 -1
- package/dist/commands/logs.d.ts.map +1 -1
- package/dist/commands/logs.js +1 -1
- package/dist/commands/logs.js.map +1 -1
- package/dist/commands/mcp.d.ts +1 -1
- package/dist/commands/mcp.d.ts.map +1 -1
- package/dist/commands/mcp.js +1 -1
- package/dist/commands/mcp.js.map +1 -1
- package/dist/commands/open.d.ts +1 -1
- package/dist/commands/open.d.ts.map +1 -1
- package/dist/commands/open.js +1 -1
- package/dist/commands/open.js.map +1 -1
- package/dist/commands/publish-failure.d.ts +31 -0
- package/dist/commands/publish-failure.d.ts.map +1 -0
- package/dist/commands/publish-failure.js +142 -0
- package/dist/commands/publish-failure.js.map +1 -0
- package/dist/commands/publish-node.d.ts +13 -0
- package/dist/commands/publish-node.d.ts.map +1 -0
- package/dist/commands/publish-node.js +38 -0
- package/dist/commands/publish-node.js.map +1 -0
- package/dist/commands/publish.d.ts +87 -3
- package/dist/commands/publish.d.ts.map +1 -1
- package/dist/commands/publish.js +589 -156
- package/dist/commands/publish.js.map +1 -1
- package/dist/commands/push.d.ts +45 -8
- package/dist/commands/push.d.ts.map +1 -1
- package/dist/commands/push.js +215 -22
- package/dist/commands/push.js.map +1 -1
- package/dist/commands/redeploy.d.ts +28 -0
- package/dist/commands/redeploy.d.ts.map +1 -0
- package/dist/commands/redeploy.js +417 -0
- package/dist/commands/redeploy.js.map +1 -0
- package/dist/commands/rename.d.ts +1 -1
- package/dist/commands/rename.d.ts.map +1 -1
- package/dist/commands/rename.js +1 -1
- package/dist/commands/rename.js.map +1 -1
- package/dist/commands/reproduce.d.ts +64 -0
- package/dist/commands/reproduce.d.ts.map +1 -0
- package/dist/commands/reproduce.js +211 -0
- package/dist/commands/reproduce.js.map +1 -0
- package/dist/commands/reset-superuser.d.ts +14 -3
- package/dist/commands/reset-superuser.d.ts.map +1 -1
- package/dist/commands/reset-superuser.js +11 -2
- package/dist/commands/reset-superuser.js.map +1 -1
- package/dist/commands/restore.d.ts +79 -0
- package/dist/commands/restore.d.ts.map +1 -0
- package/dist/commands/restore.js +164 -0
- package/dist/commands/restore.js.map +1 -0
- package/dist/commands/resume.d.ts +1 -1
- package/dist/commands/resume.d.ts.map +1 -1
- package/dist/commands/resume.js +1 -1
- package/dist/commands/resume.js.map +1 -1
- package/dist/commands/rollback.d.ts +20 -8
- package/dist/commands/rollback.d.ts.map +1 -1
- package/dist/commands/rollback.js +11 -6
- package/dist/commands/rollback.js.map +1 -1
- package/dist/commands/unsuspend.d.ts +35 -0
- package/dist/commands/unsuspend.d.ts.map +1 -0
- package/dist/commands/unsuspend.js +27 -0
- package/dist/commands/unsuspend.js.map +1 -0
- package/dist/commands/versions.d.ts +1 -1
- package/dist/commands/versions.d.ts.map +1 -1
- package/dist/commands/versions.js +1 -1
- package/dist/commands/versions.js.map +1 -1
- package/dist/commands/wait-deploy.d.ts +92 -0
- package/dist/commands/wait-deploy.d.ts.map +1 -0
- package/dist/commands/wait-deploy.js +225 -0
- package/dist/commands/wait-deploy.js.map +1 -0
- package/dist/env-scan-source.d.ts +39 -0
- package/dist/env-scan-source.d.ts.map +1 -0
- package/dist/env-scan-source.js +332 -0
- package/dist/env-scan-source.js.map +1 -0
- package/dist/error-classifier.d.ts.map +1 -1
- package/dist/error-classifier.js +67 -4
- package/dist/error-classifier.js.map +1 -1
- package/dist/errors.d.ts +8 -1
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +2 -0
- package/dist/errors.js.map +1 -1
- package/dist/index.d.ts +14 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +13 -0
- package/dist/index.js.map +1 -1
- package/dist/plans.d.ts +11 -0
- package/dist/plans.d.ts.map +1 -1
- package/dist/plans.js +10 -0
- package/dist/plans.js.map +1 -1
- package/dist/poll-deployment.d.ts +47 -0
- package/dist/poll-deployment.d.ts.map +1 -0
- package/dist/poll-deployment.js +57 -0
- package/dist/poll-deployment.js.map +1 -0
- package/dist/recovery.d.ts +356 -0
- package/dist/recovery.d.ts.map +1 -0
- package/dist/recovery.js +299 -0
- package/dist/recovery.js.map +1 -0
- package/dist/stream-utils.d.ts +21 -0
- package/dist/stream-utils.d.ts.map +1 -0
- package/dist/stream-utils.js +41 -0
- package/dist/stream-utils.js.map +1 -0
- package/dist/templates/ai-files/claude-md.d.ts +7 -0
- package/dist/templates/ai-files/claude-md.d.ts.map +1 -0
- package/dist/templates/ai-files/claude-md.js +78 -0
- package/dist/templates/ai-files/claude-md.js.map +1 -0
- package/dist/templates/ai-files/cursor-percher-mdc.d.ts +7 -0
- package/dist/templates/ai-files/cursor-percher-mdc.d.ts.map +1 -0
- package/dist/templates/ai-files/cursor-percher-mdc.js +111 -0
- package/dist/templates/ai-files/cursor-percher-mdc.js.map +1 -0
- package/dist/templates/ai-files/index.d.ts +8 -0
- package/dist/templates/ai-files/index.d.ts.map +1 -0
- package/dist/templates/ai-files/index.js +4 -0
- package/dist/templates/ai-files/index.js.map +1 -0
- package/package.json +5 -5
package/dist/commands/doctor.js
CHANGED
|
@@ -1,11 +1,38 @@
|
|
|
1
1
|
import { existsSync } from "node:fs";
|
|
2
2
|
import { join } from "node:path";
|
|
3
3
|
import { PercherTomlError, parseFile } from "@percher/toml";
|
|
4
|
-
import { z } from "zod";
|
|
4
|
+
import { z } from "zod/v3";
|
|
5
5
|
import { readPercherTomlAppName } from "../app-name";
|
|
6
|
+
import { classifyError } from "../error-classifier";
|
|
7
|
+
import { buildProblemToRecoveryProblem, recoveryAsk, recoveryEnv, recoveryFixConfig, recoveryFixProblems, recoveryInspectBuildLog, recoveryLogin, recoveryNone, recoveryWait, } from "../recovery";
|
|
8
|
+
import { resolveReplaced } from "./wait-deploy";
|
|
6
9
|
export const doctorInputSchema = z.object({
|
|
7
10
|
app: z.string().optional(),
|
|
8
11
|
json: z.boolean().optional(),
|
|
12
|
+
/**
|
|
13
|
+
* FUTURE12 Phase 2b/Codex round 5 fix — accept the dispatch-mode
|
|
14
|
+
* hint emitted by `recoveryDoctor`. Public agent path defaults to
|
|
15
|
+
* `auto`; the other modes are passed through from a previous
|
|
16
|
+
* recovery's `args.mode` (e.g. `mode: "deploy"` after a build
|
|
17
|
+
* failure routes the agent back to doctor with that specific
|
|
18
|
+
* deploy in mind). When `mode` is set, doctor's dispatch breaks
|
|
19
|
+
* the otherwise-infinite `run_doctor` recursion by falling back
|
|
20
|
+
* to the right concrete recovery for the case (build_failed →
|
|
21
|
+
* inspect_build_log, runtime_crashed → ask_user with crash
|
|
22
|
+
* details). Phase 2c will replace those fallbacks with deeper
|
|
23
|
+
* mode-specific analysis (build-log fetch + classification, etc).
|
|
24
|
+
*/
|
|
25
|
+
mode: z.enum(["auto", "deploy", "runtime", "config", "env", "account"]).optional(),
|
|
26
|
+
/**
|
|
27
|
+
* FUTURE12 Phase 2b/Codex round 5 fix — accept a specific deploy
|
|
28
|
+
* id to inspect. When set AND the value differs from the app's
|
|
29
|
+
* latest deploy, doctor fetches that specific deploy and uses its
|
|
30
|
+
* status as the dispatch target instead of `lastDeploy` from
|
|
31
|
+
* `/diagnostics`. Pre-fix, Zod silently stripped this field and
|
|
32
|
+
* the agent's targeted `run_doctor` recovery degraded to an
|
|
33
|
+
* untargeted `doctor(app)` call.
|
|
34
|
+
*/
|
|
35
|
+
deployId: z.string().optional(),
|
|
9
36
|
});
|
|
10
37
|
export async function doctor(ctx, input = {}) {
|
|
11
38
|
const checks = [];
|
|
@@ -17,7 +44,7 @@ export async function doctor(ctx, input = {}) {
|
|
|
17
44
|
message: hasToken ? "Token configured" : "No token found. Run: percher login",
|
|
18
45
|
});
|
|
19
46
|
if (!hasToken) {
|
|
20
|
-
return summarize(checks);
|
|
47
|
+
return summarize(checks, { appProvided: !!input.app });
|
|
21
48
|
}
|
|
22
49
|
// 2. API reachability + user account
|
|
23
50
|
try {
|
|
@@ -42,7 +69,7 @@ export async function doctor(ctx, input = {}) {
|
|
|
42
69
|
status: "fail",
|
|
43
70
|
message: `Cannot reach ${ctx.client.apiUrl}: ${err.message}`,
|
|
44
71
|
});
|
|
45
|
-
return summarize(checks);
|
|
72
|
+
return summarize(checks, { appProvided: !!input.app });
|
|
46
73
|
}
|
|
47
74
|
// 4. percher.toml — validated *before* app resolution. readPercherTomlAppName
|
|
48
75
|
// returns null on parse errors AND when the file is missing, so if we waited
|
|
@@ -100,11 +127,34 @@ export async function doctor(ctx, input = {}) {
|
|
|
100
127
|
? "percher.toml is invalid — fix the issues above and re-run"
|
|
101
128
|
: "No app specified and no percher.toml found";
|
|
102
129
|
checks.push({ name: "App", status: "skip", message: reason });
|
|
103
|
-
return summarize(checks);
|
|
130
|
+
return summarize(checks, { appProvided: !!input.app });
|
|
104
131
|
}
|
|
105
132
|
// 5. Fetch diagnostics from the API
|
|
133
|
+
let appStatus;
|
|
134
|
+
let lastDeployStatus;
|
|
135
|
+
let lastDeployId;
|
|
136
|
+
let containerState;
|
|
137
|
+
let containerRunning;
|
|
138
|
+
let containerHealthy;
|
|
139
|
+
let publicRouteHealthy;
|
|
140
|
+
let lastCrashSeverity;
|
|
141
|
+
let lastCrashAt;
|
|
142
|
+
let replacedResolution;
|
|
143
|
+
// FUTURE12 Phase 2c — pre-fetched deep-analysis inputs.
|
|
144
|
+
let buildLogClassification;
|
|
145
|
+
let buildProblems;
|
|
146
|
+
let crashReport;
|
|
106
147
|
try {
|
|
107
148
|
const diag = await ctx.client.apps.getDiagnostics(appName);
|
|
149
|
+
appStatus = diag.app.status;
|
|
150
|
+
lastDeployStatus = diag.lastDeploy?.status;
|
|
151
|
+
lastDeployId = diag.lastDeploy?.id;
|
|
152
|
+
containerState = diag.container.state;
|
|
153
|
+
containerRunning = diag.container.running;
|
|
154
|
+
containerHealthy = diag.containerHealth?.healthy;
|
|
155
|
+
publicRouteHealthy = diag.publicRoute?.healthy;
|
|
156
|
+
lastCrashSeverity = diag.lastCrash?.severity;
|
|
157
|
+
lastCrashAt = diag.lastCrash?.createdAt;
|
|
108
158
|
// App status
|
|
109
159
|
const appOk = diag.app.status === "live";
|
|
110
160
|
checks.push({
|
|
@@ -182,6 +232,187 @@ export async function doctor(ctx, input = {}) {
|
|
|
182
232
|
message: `Exit code ${diag.lastCrash.exitCode}${diag.lastCrash.oomKilled ? " (OOM killed)" : ""} at ${diag.lastCrash.createdAt}`,
|
|
183
233
|
});
|
|
184
234
|
}
|
|
235
|
+
// Targeted deploy override — Codex round 5 P2 fix. When the
|
|
236
|
+
// caller supplied a specific deployId (e.g. echoed back from a
|
|
237
|
+
// previous `recoveryDoctor({ deployId })`), prefer that deploy's
|
|
238
|
+
// status over `diag.lastDeploy` for the verdict. Without this,
|
|
239
|
+
// the agent's targeted recovery degraded to an untargeted
|
|
240
|
+
// doctor(app) call after Zod silently stripped the field.
|
|
241
|
+
if (input.deployId && input.deployId !== diag.lastDeploy?.id) {
|
|
242
|
+
try {
|
|
243
|
+
const targeted = await ctx.client.apps.getDeployment(appName, input.deployId);
|
|
244
|
+
lastDeployStatus = targeted.status;
|
|
245
|
+
lastDeployId = targeted.id;
|
|
246
|
+
checks.push({
|
|
247
|
+
name: `Deploy ${targeted.id}`,
|
|
248
|
+
status: targeted.status === "live" ? "pass" : targeted.status === "failed" ? "fail" : "warn",
|
|
249
|
+
message: `${targeted.status}${targeted.errorMessage ? ` — ${targeted.errorMessage}` : ""}`,
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
catch (err) {
|
|
253
|
+
// Codex round 7 P2: clear lastDeployStatus/Id on fetch
|
|
254
|
+
// failure so diag.lastDeploy doesn't leak into the verdict.
|
|
255
|
+
// Without this, an agent asking about a missing/stale deploy
|
|
256
|
+
// (404 from getDeployment) could still receive
|
|
257
|
+
// `replaced_by_newer` for the app's current head when
|
|
258
|
+
// diag.lastDeploy.status happened to be "replaced" — the
|
|
259
|
+
// resolver-gate would fire on the unrelated head deploy and
|
|
260
|
+
// shadow the agent's explicit request. Clearing the deploy
|
|
261
|
+
// fields means: replaced-resolver gate fails, transitional/
|
|
262
|
+
// failed-deploy branches skip, dispatch falls through to
|
|
263
|
+
// case 7 where the failed `Deploy <id>` check we just
|
|
264
|
+
// pushed drives the verdict.
|
|
265
|
+
lastDeployStatus = undefined;
|
|
266
|
+
lastDeployId = undefined;
|
|
267
|
+
checks.push({
|
|
268
|
+
name: `Deploy ${input.deployId}`,
|
|
269
|
+
status: "fail",
|
|
270
|
+
message: `Could not fetch deploy: ${err.message}`,
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
// Pre-resolve `replaced` here (deriveVerdict is sync, and
|
|
275
|
+
// resolveReplaced may make an API round-trip via listDeploys).
|
|
276
|
+
// We fetch the App + full Deployment so resolveReplaced has the
|
|
277
|
+
// shape it expects — diag.lastDeploy is a thin projection without
|
|
278
|
+
// the `type` field that resolveReplaced uses to filter the lookup.
|
|
279
|
+
//
|
|
280
|
+
// Codex round 6 P2 fix: resolve against the EFFECTIVE deploy, not
|
|
281
|
+
// diag.lastDeploy. When `input.deployId` is set, the targeted
|
|
282
|
+
// override above replaced lastDeployStatus/Id with the targeted
|
|
283
|
+
// deploy's values; running the resolver against diag.lastDeploy
|
|
284
|
+
// here would let an unrelated `replaced` lastDeploy shadow the
|
|
285
|
+
// agent's specific request (e.g. `mode='deploy', deployId='dep_failed'`
|
|
286
|
+
// would return `replaced_by_newer` for the current live deploy
|
|
287
|
+
// instead of `inspect_build_log` for `dep_failed`). The simple
|
|
288
|
+
// rule: only run resolver if the EFFECTIVE deploy itself is
|
|
289
|
+
// `replaced`.
|
|
290
|
+
if (lastDeployStatus === "replaced" && lastDeployId) {
|
|
291
|
+
try {
|
|
292
|
+
const [app, replacedDeployment] = await Promise.all([
|
|
293
|
+
ctx.client.apps.get(appName),
|
|
294
|
+
ctx.client.apps.getDeployment(appName, lastDeployId),
|
|
295
|
+
]);
|
|
296
|
+
const resolved = await resolveReplaced({ ctx, app, replacedDeployment });
|
|
297
|
+
replacedResolution = {
|
|
298
|
+
recovery: resolved.recovery,
|
|
299
|
+
url: resolved.url,
|
|
300
|
+
summary: resolved.summary,
|
|
301
|
+
resolvedDeployId: resolved.resolvedDeployment?.id,
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
catch (err) {
|
|
305
|
+
// Resolver fetch failed — fall through to the dispatch's
|
|
306
|
+
// generic "replaced but couldn't resolve" path so the verdict
|
|
307
|
+
// still routes the user somewhere actionable.
|
|
308
|
+
replacedResolution = {
|
|
309
|
+
recovery: recoveryAsk({
|
|
310
|
+
reasonCode: "replaced_by_newer",
|
|
311
|
+
prompt: `This deploy was replaced but I couldn't determine the current state (resolver fetch failed: ${err.message}). Run \`percher doctor --app ${appName}\` to inspect, or surface to the user.`,
|
|
312
|
+
}),
|
|
313
|
+
summary: `Couldn't resolve the replaced deploy: ${err.message}`,
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
// FUTURE12 Phase 2c — deploy-mode deep analysis. When the agent
|
|
318
|
+
// followed our previous run_doctor recovery (input.mode='deploy')
|
|
319
|
+
// and the targeted/last deploy is failed, fetch the build log
|
|
320
|
+
// and classify it so dispatch can return a specific recovery
|
|
321
|
+
// (set_env_vars / fix_problems) instead of always handing off
|
|
322
|
+
// to inspect_build_log. Pre-fix: doctor was a polite passthrough
|
|
323
|
+
// to percher_deploys_inspect.
|
|
324
|
+
if (input.mode === "deploy" && lastDeployStatus === "failed" && lastDeployId) {
|
|
325
|
+
try {
|
|
326
|
+
// Fetch full deploy (for `errorMessage` + `problems[]`) and
|
|
327
|
+
// build log in parallel — both are inputs to classifyError.
|
|
328
|
+
const [deployment, buildLog] = await Promise.all([
|
|
329
|
+
ctx.client.apps.getDeployment(appName, lastDeployId),
|
|
330
|
+
ctx.client.apps.getBuildLog(appName, lastDeployId).catch((logErr) => {
|
|
331
|
+
// Log fetch can fail transiently (404 if log was pruned,
|
|
332
|
+
// network blip post-retries). classifyError handles
|
|
333
|
+
// missing log; we just lose some signal.
|
|
334
|
+
checks.push({
|
|
335
|
+
name: `Build log ${lastDeployId}`,
|
|
336
|
+
status: "warn",
|
|
337
|
+
message: `Could not fetch build log: ${logErr.message}`,
|
|
338
|
+
});
|
|
339
|
+
return "";
|
|
340
|
+
}),
|
|
341
|
+
]);
|
|
342
|
+
const errorMessage = deployment.errorMessage ?? "";
|
|
343
|
+
buildLogClassification = classifyError(errorMessage, buildLog);
|
|
344
|
+
buildProblems = deployment.problems;
|
|
345
|
+
}
|
|
346
|
+
catch (err) {
|
|
347
|
+
// The deployment fetch itself failed — surface as a check
|
|
348
|
+
// so the verdict can route via the case-7 ask_user fallback.
|
|
349
|
+
checks.push({
|
|
350
|
+
name: "Deploy classification",
|
|
351
|
+
status: "warn",
|
|
352
|
+
message: `Could not classify deploy ${lastDeployId}: ${err.message}`,
|
|
353
|
+
});
|
|
354
|
+
buildLogClassification = null;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
// FUTURE12 Phase 2c — runtime-mode deep analysis. When the agent
|
|
358
|
+
// followed runtime mode to inspect a crashed app, pull the
|
|
359
|
+
// crash report so dispatch can surface the AI-generated
|
|
360
|
+
// explanation + suggestion in the ask_user prompt.
|
|
361
|
+
//
|
|
362
|
+
// Codex round 8 P2 fix (revised in round 9): mirror
|
|
363
|
+
// diagnose.ts's recency guard so doctor doesn't surface a
|
|
364
|
+
// days-old AI explanation as if it explained today's outage
|
|
365
|
+
// when the app is `live` but the container probe is failing
|
|
366
|
+
// (route blip, post-restart noise, etc).
|
|
367
|
+
//
|
|
368
|
+
// The rule is intentionally simple — round 9 removed an earlier
|
|
369
|
+
// `matchesLastCrash` bypass that was wrong in practice:
|
|
370
|
+
// /diagnostics returns `lastCrash` populated with the latest
|
|
371
|
+
// crash row, and /crash-report also returns that same row, so
|
|
372
|
+
// their timestamps ALWAYS match. The bypass effectively
|
|
373
|
+
// disabled the staleness check whenever any crash had ever
|
|
374
|
+
// happened, which was the entire common case the guard was
|
|
375
|
+
// meant to catch.
|
|
376
|
+
//
|
|
377
|
+
// - app.status === "crashed" → trust the report; this is the
|
|
378
|
+
// active outage and the user's question is about it.
|
|
379
|
+
// - app.status !== "crashed" → only trust a report fresher
|
|
380
|
+
// than 24h. Anything older is treated as no report.
|
|
381
|
+
if (input.mode === "runtime") {
|
|
382
|
+
try {
|
|
383
|
+
const report = await ctx.client.apps.getCrashReport(appName);
|
|
384
|
+
if (!report || appStatus === "crashed") {
|
|
385
|
+
crashReport = report;
|
|
386
|
+
}
|
|
387
|
+
else {
|
|
388
|
+
const recencyMs = 24 * 60 * 60 * 1000;
|
|
389
|
+
const age = Date.now() - new Date(report.createdAt).getTime();
|
|
390
|
+
if (age > recencyMs) {
|
|
391
|
+
// Stale + app isn't currently crashed — drop it so
|
|
392
|
+
// dispatch falls back to the safe "check the crash
|
|
393
|
+
// report" framing instead of confidently pushing an
|
|
394
|
+
// old fix at today's failing route.
|
|
395
|
+
crashReport = null;
|
|
396
|
+
checks.push({
|
|
397
|
+
name: "Crash report",
|
|
398
|
+
status: "warn",
|
|
399
|
+
message: `Latest crash report is older than 24h and the app isn't currently crashed — ignoring as stale.`,
|
|
400
|
+
});
|
|
401
|
+
}
|
|
402
|
+
else {
|
|
403
|
+
crashReport = report;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
catch (err) {
|
|
408
|
+
checks.push({
|
|
409
|
+
name: "Crash report",
|
|
410
|
+
status: "warn",
|
|
411
|
+
message: `Could not fetch crash report: ${err.message}`,
|
|
412
|
+
});
|
|
413
|
+
crashReport = null;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
185
416
|
}
|
|
186
417
|
catch (err) {
|
|
187
418
|
checks.push({
|
|
@@ -190,15 +421,566 @@ export async function doctor(ctx, input = {}) {
|
|
|
190
421
|
message: err.message,
|
|
191
422
|
});
|
|
192
423
|
}
|
|
193
|
-
return summarize(checks
|
|
424
|
+
return summarize(checks, {
|
|
425
|
+
appName,
|
|
426
|
+
appProvided: !!input.app,
|
|
427
|
+
appStatus,
|
|
428
|
+
lastDeployStatus,
|
|
429
|
+
lastDeployId,
|
|
430
|
+
containerState,
|
|
431
|
+
containerRunning,
|
|
432
|
+
containerHealthy,
|
|
433
|
+
publicRouteHealthy,
|
|
434
|
+
lastCrashSeverity: lastCrashSeverity ?? undefined,
|
|
435
|
+
lastCrashAt,
|
|
436
|
+
replacedResolution,
|
|
437
|
+
inputMode: input.mode,
|
|
438
|
+
buildLogClassification,
|
|
439
|
+
buildProblems,
|
|
440
|
+
crashReport,
|
|
441
|
+
});
|
|
194
442
|
}
|
|
195
|
-
|
|
443
|
+
/**
|
|
444
|
+
* lastDeploy / app.status values that mean "still in motion — agent
|
|
445
|
+
* should wait, not act." `replaced` is intentionally excluded: it's
|
|
446
|
+
* terminal-but-not-failed and gets its own routing in Phase 2b.
|
|
447
|
+
*/
|
|
448
|
+
const TRANSITIONAL_DEPLOY_STATUSES = new Set(["queued", "building", "deploying"]);
|
|
449
|
+
const TRANSITIONAL_APP_STATUSES = new Set(["provisioning"]);
|
|
450
|
+
function summarize(checks, vctx = {}) {
|
|
451
|
+
const passed = checks.filter((c) => c.status === "pass").length;
|
|
452
|
+
const failed = checks.filter((c) => c.status === "fail").length;
|
|
453
|
+
const warned = checks.filter((c) => c.status === "warn").length;
|
|
454
|
+
const total = checks.filter((c) => c.status !== "skip").length;
|
|
455
|
+
const verdict = deriveVerdict(checks, vctx);
|
|
196
456
|
return {
|
|
197
457
|
checks,
|
|
198
|
-
passed
|
|
199
|
-
failed
|
|
200
|
-
warned
|
|
201
|
-
total
|
|
458
|
+
passed,
|
|
459
|
+
failed,
|
|
460
|
+
warned,
|
|
461
|
+
total,
|
|
462
|
+
status: verdict.status,
|
|
463
|
+
diagnosis: verdict.diagnosis,
|
|
464
|
+
recovery: verdict.recovery,
|
|
465
|
+
summary: verdict.summary,
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
/**
|
|
469
|
+
* FUTURE12 Phase 2a — pattern-match the check list against the
|
|
470
|
+
* blocking cases doctor handles today. Anything we don't recognise
|
|
471
|
+
* yet falls through to a safe `needs_action` / `ask_user` so an
|
|
472
|
+
* agent doesn't think it can auto-resolve from a generic failure.
|
|
473
|
+
* Phase 2b adds the deploy/build/runtime/replaced-specific paths.
|
|
474
|
+
*
|
|
475
|
+
* Two Codex P2 fixes baked into Phase 2a (review of fc267a3):
|
|
476
|
+
*
|
|
477
|
+
* 1. An invalid cwd `percher.toml` MUST NOT block the verdict when
|
|
478
|
+
* the caller passed `--app` — that target was resolved
|
|
479
|
+
* independently of cwd toml. Pre-fix, `doctor --app foo` against
|
|
480
|
+
* a healthy app would still come back blocked/config_invalid if
|
|
481
|
+
* the local toml happened to be broken.
|
|
482
|
+
* 2. Transitional states (`lastDeploy.status` queued/building/
|
|
483
|
+
* deploying, or `app.status` provisioning) MUST surface as
|
|
484
|
+
* `in_progress` + `wait_deploy`, not the warn-collapsed `ok`.
|
|
485
|
+
* Pre-fix, those states looked fully resolved to MCP agents and
|
|
486
|
+
* the new `in_progress` doctor status was effectively
|
|
487
|
+
* unreachable.
|
|
488
|
+
*/
|
|
489
|
+
function deriveVerdict(checks, vctx) {
|
|
490
|
+
const findCheck = (name) => checks.find((c) => c.name === name);
|
|
491
|
+
// Case 1 — auth missing. First check in doctor's flow; if it fails
|
|
492
|
+
// we never reach the other gates, so handle it before anything else.
|
|
493
|
+
const auth = findCheck("Auth token");
|
|
494
|
+
if (auth?.status === "fail") {
|
|
495
|
+
return {
|
|
496
|
+
status: "blocked",
|
|
497
|
+
diagnosis: {
|
|
498
|
+
title: "Authentication required",
|
|
499
|
+
explanation: "No Percher token is configured for this CLI.",
|
|
500
|
+
reasonCode: "auth_required",
|
|
501
|
+
phase: "auth",
|
|
502
|
+
},
|
|
503
|
+
recovery: recoveryLogin({ reasonCode: "auth_required" }),
|
|
504
|
+
summary: "Login required: run `percher login` (or set PERCHER_TOKEN).",
|
|
505
|
+
};
|
|
506
|
+
}
|
|
507
|
+
// Case 2 — API can't be reached. Likely-transient infra problem,
|
|
508
|
+
// but doctor can't see whether it'll recover; surface to the user.
|
|
509
|
+
const api = findCheck("API reachability");
|
|
510
|
+
if (api?.status === "fail") {
|
|
511
|
+
return {
|
|
512
|
+
status: "blocked",
|
|
513
|
+
diagnosis: {
|
|
514
|
+
title: "Percher API is unreachable",
|
|
515
|
+
explanation: api.message,
|
|
516
|
+
reasonCode: "infra_unavailable",
|
|
517
|
+
phase: "infra",
|
|
518
|
+
},
|
|
519
|
+
recovery: recoveryAsk({
|
|
520
|
+
prompt: `Percher API is unreachable (${api.message}). This is usually a transient network issue — wait a moment and try again, or check status.percher.app.`,
|
|
521
|
+
reasonCode: "infra_unavailable",
|
|
522
|
+
retryable: true,
|
|
523
|
+
}),
|
|
524
|
+
summary: "Cannot reach the Percher API — try again in a moment.",
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
// Case 3 — percher.toml present but invalid. Only blocks the
|
|
528
|
+
// verdict when we actually depend on it (no `--app` provided).
|
|
529
|
+
// With explicit `--app`, the cwd toml is irrelevant for the
|
|
530
|
+
// resolved target, so a broken local file shouldn't poison a
|
|
531
|
+
// healthy app's verdict. (Codex P2 #1 fix.)
|
|
532
|
+
const toml = findCheck("percher.toml");
|
|
533
|
+
if (toml?.status === "fail" && !vctx.appProvided) {
|
|
534
|
+
return {
|
|
535
|
+
status: "blocked",
|
|
536
|
+
diagnosis: {
|
|
537
|
+
title: "Invalid percher.toml",
|
|
538
|
+
explanation: toml.message,
|
|
539
|
+
reasonCode: "config_invalid",
|
|
540
|
+
phase: "config",
|
|
541
|
+
},
|
|
542
|
+
recovery: recoveryFixConfig({
|
|
543
|
+
problems: [
|
|
544
|
+
{
|
|
545
|
+
file: "percher.toml",
|
|
546
|
+
message: toml.message,
|
|
547
|
+
},
|
|
548
|
+
],
|
|
549
|
+
reasonCode: "config_invalid",
|
|
550
|
+
}),
|
|
551
|
+
summary: "percher.toml is invalid — fix the issues and re-run.",
|
|
552
|
+
};
|
|
553
|
+
}
|
|
554
|
+
// Case 4 — no app to inspect. The "App" skip-row is set when no
|
|
555
|
+
// --app was passed AND no parseable percher.toml was found in cwd.
|
|
556
|
+
const app = findCheck("App");
|
|
557
|
+
if (app?.status === "skip") {
|
|
558
|
+
return {
|
|
559
|
+
status: "blocked",
|
|
560
|
+
diagnosis: {
|
|
561
|
+
title: "No app specified",
|
|
562
|
+
explanation: app.message,
|
|
563
|
+
reasonCode: "config_missing",
|
|
564
|
+
phase: "config",
|
|
565
|
+
},
|
|
566
|
+
recovery: recoveryAsk({
|
|
567
|
+
prompt: `${app.message} — pass \`--app <name>\` or run \`percher init\` in a project directory to generate a percher.toml.`,
|
|
568
|
+
reasonCode: "config_missing",
|
|
569
|
+
}),
|
|
570
|
+
summary: "No percher.toml found and no --app supplied — pass --app or run percher init.",
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
// Case 5 — transitional state. A lastDeploy in queued/building/
|
|
574
|
+
// deploying or an app in provisioning means the right next step
|
|
575
|
+
// is to wait, not to act. Surface as `in_progress` + `wait_deploy`
|
|
576
|
+
// pointing at the live deployId so agents call
|
|
577
|
+
// percher_wait_for_deploy with the right args instead of treating
|
|
578
|
+
// warn-only checks as "ok". (Codex P2 #2 fix.)
|
|
579
|
+
const inTransitionalDeploy = !!vctx.lastDeployStatus && TRANSITIONAL_DEPLOY_STATUSES.has(vctx.lastDeployStatus);
|
|
580
|
+
const inTransitionalApp = !!vctx.appStatus && TRANSITIONAL_APP_STATUSES.has(vctx.appStatus);
|
|
581
|
+
if (inTransitionalDeploy || inTransitionalApp) {
|
|
582
|
+
const reasonCode = vctx.lastDeployStatus === "queued"
|
|
583
|
+
? "deploy_queued"
|
|
584
|
+
: vctx.lastDeployStatus === "building"
|
|
585
|
+
? "deploy_building"
|
|
586
|
+
: vctx.lastDeployStatus === "deploying"
|
|
587
|
+
? "deploy_deploying"
|
|
588
|
+
: "deploy_queued";
|
|
589
|
+
const what = inTransitionalApp
|
|
590
|
+
? `App ${vctx.appName ?? ""} is provisioning`
|
|
591
|
+
: `Last deploy is ${vctx.lastDeployStatus}`;
|
|
592
|
+
const recovery = vctx.lastDeployId && vctx.appName
|
|
593
|
+
? recoveryWait({
|
|
594
|
+
app: vctx.appName,
|
|
595
|
+
deployId: vctx.lastDeployId,
|
|
596
|
+
reasonCode,
|
|
597
|
+
})
|
|
598
|
+
: recoveryAsk({
|
|
599
|
+
prompt: `${what.trim()} for ${vctx.appName ?? "this app"}. Wait for it to finish before retrying — there's no deployId to track yet.`,
|
|
600
|
+
reasonCode,
|
|
601
|
+
});
|
|
602
|
+
return {
|
|
603
|
+
status: "in_progress",
|
|
604
|
+
diagnosis: {
|
|
605
|
+
title: what.trim(),
|
|
606
|
+
explanation: vctx.lastDeployId
|
|
607
|
+
? `${what.trim()} (deploy ${vctx.lastDeployId}). Wait for it to finish before retrying.`
|
|
608
|
+
: `${what.trim()}. Wait for it to finish before retrying.`,
|
|
609
|
+
reasonCode,
|
|
610
|
+
phase: "deploy",
|
|
611
|
+
},
|
|
612
|
+
recovery,
|
|
613
|
+
summary: vctx.lastDeployId
|
|
614
|
+
? `${what.trim()} — wait for deploy ${vctx.lastDeployId}.`
|
|
615
|
+
: `${what.trim()} — wait a moment and re-run.`,
|
|
616
|
+
};
|
|
617
|
+
}
|
|
618
|
+
// For verdict purposes, drop the cwd `percher.toml` check when
|
|
619
|
+
// the caller passed --app. Without this filter, a `fail` toml
|
|
620
|
+
// would force the all-pass-or-warn check below into the
|
|
621
|
+
// `needs_action` branch (case 7) even though the resolved app is
|
|
622
|
+
// fully healthy.
|
|
623
|
+
const verdictChecks = vctx.appProvided && toml?.status === "fail"
|
|
624
|
+
? checks.filter((c) => c.name !== "percher.toml")
|
|
625
|
+
: checks;
|
|
626
|
+
// Phase 2b — signal-driven dispatches that MUST run before the
|
|
627
|
+
// happy-path collapse below. The `Last deploy` row is rendered
|
|
628
|
+
// as `warn` for any non-live status (replaced, failed, etc), so
|
|
629
|
+
// case 6's all-pass-or-warn predicate would silently hide a
|
|
630
|
+
// failed or replaced deploy. Likewise, a suspended app may
|
|
631
|
+
// surface only as a stopped container (`fail` row) but the
|
|
632
|
+
// dispatch wants to lead with the suspended explanation, not the
|
|
633
|
+
// generic runtime fix.
|
|
634
|
+
// Replaced lastDeploy. Resolution was performed up-front in
|
|
635
|
+
// `doctor()` (deriveVerdict is sync); we just emit what
|
|
636
|
+
// resolveReplaced computed.
|
|
637
|
+
if (vctx.replacedResolution) {
|
|
638
|
+
const r = vctx.replacedResolution;
|
|
639
|
+
const isResolvedLive = r.recovery.nextAction === "none" && !!r.url;
|
|
640
|
+
if (isResolvedLive) {
|
|
641
|
+
return {
|
|
642
|
+
status: "ok",
|
|
643
|
+
recovery: { ...r.recovery, reasonCode: "replaced_by_newer" },
|
|
644
|
+
summary: r.summary,
|
|
645
|
+
};
|
|
646
|
+
}
|
|
647
|
+
const status = r.recovery.nextAction === "wait_deploy" ? "in_progress" : "needs_action";
|
|
648
|
+
return {
|
|
649
|
+
status,
|
|
650
|
+
diagnosis: {
|
|
651
|
+
title: "Last deploy was replaced",
|
|
652
|
+
explanation: r.summary,
|
|
653
|
+
reasonCode: "replaced_by_newer",
|
|
654
|
+
phase: "deploy",
|
|
655
|
+
},
|
|
656
|
+
recovery: { ...r.recovery, reasonCode: "replaced_by_newer" },
|
|
657
|
+
summary: r.summary,
|
|
658
|
+
};
|
|
659
|
+
}
|
|
660
|
+
// lastDeploy.status === "failed". `failed` renders as `warn` in
|
|
661
|
+
// checks[] but is unambiguously not-ok. Doctor can't classify
|
|
662
|
+
// the failure cause from /diagnostics (the build log lives at a
|
|
663
|
+
// different endpoint), so route back to itself with mode='deploy'
|
|
664
|
+
// and the deployId. Phase 4 will migrate publish/wait to the
|
|
665
|
+
// same recovery so deploy-mode expansion has a single owner of
|
|
666
|
+
// build-log inspection.
|
|
667
|
+
if (vctx.lastDeployStatus === "failed") {
|
|
668
|
+
// Loop-break (Codex round 5 P2): if we're already in mode='deploy'
|
|
669
|
+
// (the agent followed our previous run_doctor recovery), emitting
|
|
670
|
+
// run_doctor again with the same args would loop forever. Fall
|
|
671
|
+
// back to inspect_build_log — the existing low-level path that
|
|
672
|
+
// surfaces the build log to the agent. Phase 2c will replace
|
|
673
|
+
// this with deeper analysis (build-log fetch + classification
|
|
674
|
+
// here in doctor) so the agent gets a `set_env_vars` /
|
|
675
|
+
// `fix_problems` recovery directly.
|
|
676
|
+
if (vctx.inputMode === "deploy") {
|
|
677
|
+
// FUTURE12 Phase 2c — deep analysis. We pre-fetched the build
|
|
678
|
+
// log and ran classifyError in doctor() above; if it produced
|
|
679
|
+
// missing env keys or structured file-located problems, emit
|
|
680
|
+
// a specific recovery the agent can act on directly.
|
|
681
|
+
// Otherwise fall back to inspect_build_log (the agent gets
|
|
682
|
+
// the raw log via percher_deploys_inspect).
|
|
683
|
+
const cls = vctx.buildLogClassification;
|
|
684
|
+
// (a) Missing env keys → recoveryEnv. Most actionable case:
|
|
685
|
+
// agent calls percher_env_set with the exact keys.
|
|
686
|
+
if (cls && cls.errorClass === "missing_env" && cls.missingEnvVars.length > 0) {
|
|
687
|
+
return {
|
|
688
|
+
status: "needs_action",
|
|
689
|
+
diagnosis: {
|
|
690
|
+
title: cls.title,
|
|
691
|
+
explanation: cls.explanation,
|
|
692
|
+
reasonCode: "missing_env",
|
|
693
|
+
phase: "build",
|
|
694
|
+
},
|
|
695
|
+
recovery: recoveryEnv({
|
|
696
|
+
app: vctx.appName,
|
|
697
|
+
keys: cls.missingEnvVars,
|
|
698
|
+
}),
|
|
699
|
+
summary: vctx.appName
|
|
700
|
+
? `Build failed — missing env vars on ${vctx.appName}: ${cls.missingEnvVars.join(", ")}.`
|
|
701
|
+
: `Build failed — missing env vars: ${cls.missingEnvVars.join(", ")}.`,
|
|
702
|
+
};
|
|
703
|
+
}
|
|
704
|
+
// (b) Structured BuildProblems with file locations → fix_problems.
|
|
705
|
+
// The agent can patch files directly without log archeology.
|
|
706
|
+
// Codex round 8 P3 fix: route through
|
|
707
|
+
// `buildProblemToRecoveryProblem` so `BuildProblem.hint` is
|
|
708
|
+
// folded into the message — agents and CLI got `Hint: <text>`
|
|
709
|
+
// appended automatically (matters for problems where the hint
|
|
710
|
+
// carries the actionable next step, e.g. malformed
|
|
711
|
+
// package.json with no line/column).
|
|
712
|
+
const fileProblems = (vctx.buildProblems ?? [])
|
|
713
|
+
.filter((p) => p.file)
|
|
714
|
+
.map(buildProblemToRecoveryProblem);
|
|
715
|
+
if (fileProblems.length > 0) {
|
|
716
|
+
return {
|
|
717
|
+
status: "needs_action",
|
|
718
|
+
diagnosis: {
|
|
719
|
+
title: cls?.title ?? "Build failed with file-located problems",
|
|
720
|
+
explanation: cls?.explanation ??
|
|
721
|
+
`Build extracted ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} with file locations. Patch the files directly.`,
|
|
722
|
+
reasonCode: "build_failed",
|
|
723
|
+
phase: "build",
|
|
724
|
+
},
|
|
725
|
+
recovery: recoveryFixProblems({
|
|
726
|
+
problems: fileProblems,
|
|
727
|
+
reasonCode: "build_failed",
|
|
728
|
+
}),
|
|
729
|
+
summary: vctx.lastDeployId
|
|
730
|
+
? `Deploy ${vctx.lastDeployId} failed — ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} to patch.`
|
|
731
|
+
: `Build failed — ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} to patch.`,
|
|
732
|
+
};
|
|
733
|
+
}
|
|
734
|
+
// (c) Fallback: classified but unactionable, or unclassified.
|
|
735
|
+
// Hand off to inspect_build_log so the agent can read the raw
|
|
736
|
+
// log. This is also the no-classification path (cls === null).
|
|
737
|
+
return {
|
|
738
|
+
status: "needs_action",
|
|
739
|
+
diagnosis: {
|
|
740
|
+
title: cls?.title ?? "Last deploy failed",
|
|
741
|
+
explanation: cls
|
|
742
|
+
? `${cls.explanation} Doctor couldn't extract a structured fix; fall back to the build log.`
|
|
743
|
+
: vctx.lastDeployId
|
|
744
|
+
? `Deploy ${vctx.lastDeployId} terminated in failed state. Doctor couldn't classify the cause from the build log; inspect it directly.`
|
|
745
|
+
: "The most recent deploy ended in failed state. Doctor couldn't classify the cause; inspect the build log.",
|
|
746
|
+
reasonCode: "build_failed",
|
|
747
|
+
phase: "build",
|
|
748
|
+
},
|
|
749
|
+
recovery: recoveryInspectBuildLog({
|
|
750
|
+
deployId: vctx.lastDeployId,
|
|
751
|
+
app: vctx.appName,
|
|
752
|
+
reasonCode: "build_failed",
|
|
753
|
+
}),
|
|
754
|
+
summary: vctx.lastDeployId
|
|
755
|
+
? `Deploy ${vctx.lastDeployId} failed — inspect the build log.`
|
|
756
|
+
: "Last deploy failed — inspect the latest failed deploy's build log.",
|
|
757
|
+
};
|
|
758
|
+
}
|
|
759
|
+
return {
|
|
760
|
+
status: "needs_action",
|
|
761
|
+
diagnosis: {
|
|
762
|
+
title: "Last deploy failed",
|
|
763
|
+
explanation: vctx.lastDeployId
|
|
764
|
+
? `Deploy ${vctx.lastDeployId} terminated in failed state. Inspect the build log to find the cause.`
|
|
765
|
+
: "The most recent deploy ended in failed state. Inspect the build log to find the cause.",
|
|
766
|
+
reasonCode: "build_failed",
|
|
767
|
+
phase: "build",
|
|
768
|
+
},
|
|
769
|
+
recovery: {
|
|
770
|
+
retryable: false,
|
|
771
|
+
nextAction: "run_doctor",
|
|
772
|
+
suggestedTool: "percher_doctor",
|
|
773
|
+
args: {
|
|
774
|
+
app: vctx.appName,
|
|
775
|
+
mode: "deploy",
|
|
776
|
+
...(vctx.lastDeployId ? { deployId: vctx.lastDeployId } : {}),
|
|
777
|
+
},
|
|
778
|
+
reasonCode: "build_failed",
|
|
779
|
+
},
|
|
780
|
+
summary: vctx.lastDeployId
|
|
781
|
+
? `Deploy ${vctx.lastDeployId} failed — call percher_doctor with mode='deploy'.`
|
|
782
|
+
: "Last deploy failed — call percher_doctor with mode='deploy'.",
|
|
783
|
+
};
|
|
784
|
+
}
|
|
785
|
+
// App suspended. Suspension reason isn't on /diagnostics
|
|
786
|
+
// (lives on the App row's `suspensionReason` /
|
|
787
|
+
// `suspensionOrigin`), so doctor surfaces to the user with a
|
|
788
|
+
// concrete prompt. The reasonCode `quota_exceeded` is a proxy —
|
|
789
|
+
// most owner-resumable suspensions are quota; admin/moderation
|
|
790
|
+
// suspensions would route differently if doctor had access to
|
|
791
|
+
// suspensionOrigin (Phase 6 can plumb that through if needed).
|
|
792
|
+
if (vctx.appStatus === "suspended") {
|
|
793
|
+
return {
|
|
794
|
+
status: "needs_action",
|
|
795
|
+
diagnosis: {
|
|
796
|
+
title: "App is suspended",
|
|
797
|
+
explanation: vctx.appName
|
|
798
|
+
? `${vctx.appName} is currently suspended. Owner-initiated and quota suspensions can be resumed from the dashboard or via the API.`
|
|
799
|
+
: "This app is currently suspended.",
|
|
800
|
+
reasonCode: "quota_exceeded",
|
|
801
|
+
phase: "infra",
|
|
802
|
+
},
|
|
803
|
+
recovery: recoveryAsk({
|
|
804
|
+
reasonCode: "quota_exceeded",
|
|
805
|
+
prompt: vctx.appName
|
|
806
|
+
? `${vctx.appName} is suspended. Resume the app from the dashboard (or call the unsuspend API) before retrying.`
|
|
807
|
+
: "This app is suspended. Resume it from the dashboard before retrying.",
|
|
808
|
+
}),
|
|
809
|
+
summary: vctx.appName
|
|
810
|
+
? `${vctx.appName} is suspended — resume the app before retrying.`
|
|
811
|
+
: "App is suspended — resume it before retrying.",
|
|
812
|
+
};
|
|
813
|
+
}
|
|
814
|
+
// Case 6 — happy path. All app-level checks passed (warns are
|
|
815
|
+
// informational, not blocking).
|
|
816
|
+
if (verdictChecks.every((c) => c.status === "pass" || c.status === "skip" || c.status === "warn")) {
|
|
817
|
+
const passing = verdictChecks.filter((c) => c.status === "pass").length;
|
|
818
|
+
const noun = passing === 1 ? "check" : "checks";
|
|
819
|
+
return {
|
|
820
|
+
status: "ok",
|
|
821
|
+
recovery: recoveryNone({ reasonCode: "none" }),
|
|
822
|
+
summary: vctx.appName
|
|
823
|
+
? `All ${passing} ${noun} passed for ${vctx.appName}.`
|
|
824
|
+
: `All ${passing} ${noun} passed.`,
|
|
825
|
+
};
|
|
826
|
+
}
|
|
827
|
+
// Case 7 — at least one app-level check failed. Phase 2b refines
|
|
828
|
+
// the previous catch-all `ask_user`/`unknown` into specific
|
|
829
|
+
// dispatches based on structured signals from `/diagnostics`.
|
|
830
|
+
// The signal-driven cases that case 6 would otherwise collapse
|
|
831
|
+
// (replaced/failed lastDeploy, suspended app) are handled above
|
|
832
|
+
// case 6. The remaining checks-driven cases run here:
|
|
833
|
+
// crashed/runtime-down → public-route blip → genuinely unknown.
|
|
834
|
+
// 7a. Runtime crashed. App marked crashed, OR container exited /
|
|
835
|
+
// not running (without a clearer signal), OR direct container-
|
|
836
|
+
// health probe is failing. All three converge on the same
|
|
837
|
+
// recovery: hand back to doctor with `mode: "runtime"` so the
|
|
838
|
+
// runtime-focused expansion (Phase 2 step 9) can take it from
|
|
839
|
+
// here. Self-recursion via mode hint is the explicit Phase 2
|
|
840
|
+
// contract — input mode disambiguates the dispatch.
|
|
841
|
+
const containerDown = (vctx.containerRunning === false &&
|
|
842
|
+
vctx.containerState !== undefined &&
|
|
843
|
+
vctx.containerState !== "not-found") ||
|
|
844
|
+
vctx.containerHealthy === false;
|
|
845
|
+
if (vctx.appStatus === "crashed" || containerDown) {
|
|
846
|
+
const containerCheck = findCheck("Container");
|
|
847
|
+
const healthCheck = findCheck("Container health");
|
|
848
|
+
const lastCrashCheck = findCheck("Last crash");
|
|
849
|
+
const explanationBits = [];
|
|
850
|
+
if (vctx.appStatus === "crashed")
|
|
851
|
+
explanationBits.push("App is in crashed state.");
|
|
852
|
+
if (containerCheck?.status === "fail")
|
|
853
|
+
explanationBits.push(containerCheck.message);
|
|
854
|
+
if (healthCheck?.status === "fail")
|
|
855
|
+
explanationBits.push(healthCheck.message);
|
|
856
|
+
if (lastCrashCheck)
|
|
857
|
+
explanationBits.push(lastCrashCheck.message);
|
|
858
|
+
const explanation = explanationBits.length > 0
|
|
859
|
+
? explanationBits.join(" ")
|
|
860
|
+
: "Runtime is not responding — container or health check is failing.";
|
|
861
|
+
// Loop-break (Codex round 5 P2): if we're already in
|
|
862
|
+
// mode='runtime', emit a concrete ask_user with the crash
|
|
863
|
+
// details rather than recursing into ourselves. Phase 2c will
|
|
864
|
+
// replace this with crash-report fetch + classification (the
|
|
865
|
+
// crash-handler watchdog already produces structured AI-generated
|
|
866
|
+
// explanations — doctor just needs to surface them here).
|
|
867
|
+
if (vctx.inputMode === "runtime") {
|
|
868
|
+
// FUTURE12 Phase 2c — surface the crash report's
|
|
869
|
+
// AI-generated explanation + suggestion in the prompt when
|
|
870
|
+
// available. The watchdog/crash-handler already produces
|
|
871
|
+
// these via /apps/:app/crash-report; doctor just needs to
|
|
872
|
+
// pull them through.
|
|
873
|
+
const cr = vctx.crashReport;
|
|
874
|
+
const hasAnalysis = cr && cr.analysisStatus === "completed";
|
|
875
|
+
const promptParts = [];
|
|
876
|
+
if (hasAnalysis && cr.explanation) {
|
|
877
|
+
promptParts.push(`Crash explanation: ${cr.explanation}`);
|
|
878
|
+
}
|
|
879
|
+
if (hasAnalysis && cr.suggestion) {
|
|
880
|
+
promptParts.push(`Suggested fix: ${cr.suggestion}`);
|
|
881
|
+
}
|
|
882
|
+
if (cr) {
|
|
883
|
+
const exitInfo = cr.oomKilled ? `exit ${cr.exitCode} (OOM killed)` : `exit ${cr.exitCode}`;
|
|
884
|
+
promptParts.push(`Container ${exitInfo} at ${cr.createdAt}.`);
|
|
885
|
+
}
|
|
886
|
+
// Always include the basic explanation so the user has
|
|
887
|
+
// something to read even if no crash report exists.
|
|
888
|
+
promptParts.push(explanation);
|
|
889
|
+
promptParts.push(vctx.appName
|
|
890
|
+
? `Decide whether to redeploy, raise the memory plan, or fix the code. Use percher_diagnose_crash for ${vctx.appName} if you need the full log tail.`
|
|
891
|
+
: "Decide whether to redeploy, raise the memory plan, or fix the code.");
|
|
892
|
+
return {
|
|
893
|
+
status: "needs_action",
|
|
894
|
+
diagnosis: {
|
|
895
|
+
title: vctx.appStatus === "crashed" ? "App crashed" : "Runtime not responding",
|
|
896
|
+
explanation: hasAnalysis && cr.explanation ? `${cr.explanation} ${explanation}` : explanation,
|
|
897
|
+
reasonCode: "runtime_crashed",
|
|
898
|
+
phase: "runtime",
|
|
899
|
+
},
|
|
900
|
+
recovery: recoveryAsk({
|
|
901
|
+
reasonCode: "runtime_crashed",
|
|
902
|
+
prompt: promptParts.join(" "),
|
|
903
|
+
options: cr?.severity === "critical" ? ["redeploy", "fix code", "upgrade plan"] : undefined,
|
|
904
|
+
}),
|
|
905
|
+
summary: vctx.appName
|
|
906
|
+
? hasAnalysis
|
|
907
|
+
? `${vctx.appName} crashed — ${cr.suggestion ?? cr.explanation ?? "surface the crash details to the user."}`
|
|
908
|
+
: `${vctx.appName} runtime is unhealthy — surface the crash details to the user.`
|
|
909
|
+
: "Runtime is unhealthy — surface the crash details to the user.",
|
|
910
|
+
};
|
|
911
|
+
}
|
|
912
|
+
return {
|
|
913
|
+
status: "needs_action",
|
|
914
|
+
diagnosis: {
|
|
915
|
+
title: vctx.appStatus === "crashed" ? "App crashed" : "Runtime not responding",
|
|
916
|
+
explanation,
|
|
917
|
+
reasonCode: "runtime_crashed",
|
|
918
|
+
phase: "runtime",
|
|
919
|
+
},
|
|
920
|
+
recovery: {
|
|
921
|
+
retryable: false,
|
|
922
|
+
nextAction: "run_doctor",
|
|
923
|
+
suggestedTool: "percher_doctor",
|
|
924
|
+
args: { app: vctx.appName, mode: "runtime" },
|
|
925
|
+
reasonCode: "runtime_crashed",
|
|
926
|
+
},
|
|
927
|
+
summary: vctx.appName
|
|
928
|
+
? `${vctx.appName} runtime needs investigation — call percher_doctor with mode='runtime'.`
|
|
929
|
+
: "Runtime needs investigation — call percher_doctor with mode='runtime'.",
|
|
930
|
+
};
|
|
931
|
+
}
|
|
932
|
+
// 7b. Public route is the only thing failing — container is up
|
|
933
|
+
// and healthy, but the external probe via Caddy/TLS/DNS isn't
|
|
934
|
+
// responding. This is usually a transient route-reconcile blip
|
|
935
|
+
// that self-heals; recommend `retry` so the agent re-runs doctor
|
|
936
|
+
// (or the user retries publish) rather than asking the user to
|
|
937
|
+
// act manually.
|
|
938
|
+
if (vctx.publicRouteHealthy === false) {
|
|
939
|
+
const routeCheck = findCheck("Public route");
|
|
940
|
+
return {
|
|
941
|
+
status: "needs_action",
|
|
942
|
+
diagnosis: {
|
|
943
|
+
title: "Public route is not responding",
|
|
944
|
+
explanation: routeCheck?.message ??
|
|
945
|
+
"Container looks healthy but the public URL isn't responding — route reconcile usually self-heals.",
|
|
946
|
+
reasonCode: "infra_transient",
|
|
947
|
+
phase: "infra",
|
|
948
|
+
},
|
|
949
|
+
recovery: {
|
|
950
|
+
retryable: true,
|
|
951
|
+
nextAction: "retry",
|
|
952
|
+
suggestedTool: "percher_doctor",
|
|
953
|
+
args: { app: vctx.appName },
|
|
954
|
+
reasonCode: "infra_transient",
|
|
955
|
+
},
|
|
956
|
+
summary: vctx.appName
|
|
957
|
+
? `${vctx.appName} public route is failing — likely transient, re-run doctor in a moment.`
|
|
958
|
+
: "Public route is failing — likely transient, re-run doctor in a moment.",
|
|
959
|
+
};
|
|
960
|
+
}
|
|
961
|
+
// 7c. Genuinely unknown — none of the structured signals above
|
|
962
|
+
// matched. Keep the safe fallback so an agent doesn't think it
|
|
963
|
+
// can auto-resolve. This is what Phase 2a shipped; Phase 2b only
|
|
964
|
+
// narrows the surface that lands here.
|
|
965
|
+
const firstFailed = verdictChecks.find((c) => c.status === "fail");
|
|
966
|
+
return {
|
|
967
|
+
status: "needs_action",
|
|
968
|
+
diagnosis: firstFailed
|
|
969
|
+
? {
|
|
970
|
+
title: firstFailed.name,
|
|
971
|
+
explanation: firstFailed.message,
|
|
972
|
+
reasonCode: "unknown",
|
|
973
|
+
}
|
|
974
|
+
: undefined,
|
|
975
|
+
recovery: recoveryAsk({
|
|
976
|
+
prompt: firstFailed
|
|
977
|
+
? `Doctor flagged a problem: ${firstFailed.name} — ${firstFailed.message}. Review the checks list and surface the failing item to the user.`
|
|
978
|
+
: "Doctor reported one or more issues. Review the checks list and surface the failing items to the user.",
|
|
979
|
+
reasonCode: "unknown",
|
|
980
|
+
}),
|
|
981
|
+
summary: firstFailed
|
|
982
|
+
? `${firstFailed.name} failed: ${firstFailed.message}`
|
|
983
|
+
: "Doctor reported issues — review the checks list.",
|
|
202
984
|
};
|
|
203
985
|
}
|
|
204
986
|
//# sourceMappingURL=doctor.js.map
|