@percher/core 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. package/dist/ai-files-manifest.d.ts +28 -0
  2. package/dist/ai-files-manifest.d.ts.map +1 -0
  3. package/dist/ai-files-manifest.js +96 -0
  4. package/dist/ai-files-manifest.js.map +1 -0
  5. package/dist/commands/account.d.ts +51 -0
  6. package/dist/commands/account.d.ts.map +1 -0
  7. package/dist/commands/account.js +88 -0
  8. package/dist/commands/account.js.map +1 -0
  9. package/dist/commands/ai-files.d.ts +73 -0
  10. package/dist/commands/ai-files.d.ts.map +1 -0
  11. package/dist/commands/ai-files.js +179 -0
  12. package/dist/commands/ai-files.js.map +1 -0
  13. package/dist/commands/billing.d.ts +1 -1
  14. package/dist/commands/billing.d.ts.map +1 -1
  15. package/dist/commands/billing.js +1 -1
  16. package/dist/commands/billing.js.map +1 -1
  17. package/dist/commands/continue.d.ts +48 -0
  18. package/dist/commands/continue.d.ts.map +1 -0
  19. package/dist/commands/continue.js +121 -0
  20. package/dist/commands/continue.js.map +1 -0
  21. package/dist/commands/create.d.ts +1 -1
  22. package/dist/commands/create.d.ts.map +1 -1
  23. package/dist/commands/create.js +1 -1
  24. package/dist/commands/create.js.map +1 -1
  25. package/dist/commands/dashboard.d.ts +15 -0
  26. package/dist/commands/dashboard.d.ts.map +1 -0
  27. package/dist/commands/dashboard.js +33 -0
  28. package/dist/commands/dashboard.js.map +1 -0
  29. package/dist/commands/data-export.d.ts +21 -0
  30. package/dist/commands/data-export.d.ts.map +1 -0
  31. package/dist/commands/data-export.js +36 -0
  32. package/dist/commands/data-export.js.map +1 -0
  33. package/dist/commands/data.d.ts +1 -1
  34. package/dist/commands/data.d.ts.map +1 -1
  35. package/dist/commands/data.js +1 -1
  36. package/dist/commands/data.js.map +1 -1
  37. package/dist/commands/delete.d.ts +1 -1
  38. package/dist/commands/delete.d.ts.map +1 -1
  39. package/dist/commands/delete.js +1 -1
  40. package/dist/commands/delete.js.map +1 -1
  41. package/dist/commands/deploys.d.ts +2 -2
  42. package/dist/commands/deploys.d.ts.map +1 -1
  43. package/dist/commands/deploys.js +21 -5
  44. package/dist/commands/deploys.js.map +1 -1
  45. package/dist/commands/dev.d.ts +1 -9
  46. package/dist/commands/dev.d.ts.map +1 -1
  47. package/dist/commands/dev.js +77 -23
  48. package/dist/commands/dev.js.map +1 -1
  49. package/dist/commands/diagnose.d.ts +1 -1
  50. package/dist/commands/diagnose.d.ts.map +1 -1
  51. package/dist/commands/diagnose.js +1 -1
  52. package/dist/commands/diagnose.js.map +1 -1
  53. package/dist/commands/doctor.d.ts +63 -1
  54. package/dist/commands/doctor.d.ts.map +1 -1
  55. package/dist/commands/doctor.js +792 -10
  56. package/dist/commands/doctor.js.map +1 -1
  57. package/dist/commands/domains.d.ts +1 -1
  58. package/dist/commands/domains.d.ts.map +1 -1
  59. package/dist/commands/domains.js +1 -1
  60. package/dist/commands/domains.js.map +1 -1
  61. package/dist/commands/env-scan.d.ts +2 -0
  62. package/dist/commands/env-scan.d.ts.map +1 -0
  63. package/dist/commands/env-scan.js +92 -0
  64. package/dist/commands/env-scan.js.map +1 -0
  65. package/dist/commands/env.d.ts +1 -1
  66. package/dist/commands/env.d.ts.map +1 -1
  67. package/dist/commands/env.js +1 -1
  68. package/dist/commands/env.js.map +1 -1
  69. package/dist/commands/export.d.ts +1 -1
  70. package/dist/commands/export.js +1 -1
  71. package/dist/commands/generate.d.ts +1 -1
  72. package/dist/commands/generate.d.ts.map +1 -1
  73. package/dist/commands/generate.js +14 -9
  74. package/dist/commands/generate.js.map +1 -1
  75. package/dist/commands/github.d.ts +60 -0
  76. package/dist/commands/github.d.ts.map +1 -0
  77. package/dist/commands/github.js +112 -0
  78. package/dist/commands/github.js.map +1 -0
  79. package/dist/commands/import-project.d.ts +1 -1
  80. package/dist/commands/import-project.d.ts.map +1 -1
  81. package/dist/commands/import-project.js +1 -1
  82. package/dist/commands/import-project.js.map +1 -1
  83. package/dist/commands/init.d.ts +1 -1
  84. package/dist/commands/init.d.ts.map +1 -1
  85. package/dist/commands/init.js +1 -1
  86. package/dist/commands/init.js.map +1 -1
  87. package/dist/commands/insights.d.ts +1 -1
  88. package/dist/commands/insights.d.ts.map +1 -1
  89. package/dist/commands/insights.js +1 -1
  90. package/dist/commands/insights.js.map +1 -1
  91. package/dist/commands/login.d.ts +1 -1
  92. package/dist/commands/login.d.ts.map +1 -1
  93. package/dist/commands/login.js +1 -1
  94. package/dist/commands/login.js.map +1 -1
  95. package/dist/commands/logs.d.ts +1 -1
  96. package/dist/commands/logs.d.ts.map +1 -1
  97. package/dist/commands/logs.js +1 -1
  98. package/dist/commands/logs.js.map +1 -1
  99. package/dist/commands/mcp.d.ts +1 -1
  100. package/dist/commands/mcp.d.ts.map +1 -1
  101. package/dist/commands/mcp.js +1 -1
  102. package/dist/commands/mcp.js.map +1 -1
  103. package/dist/commands/open.d.ts +1 -1
  104. package/dist/commands/open.d.ts.map +1 -1
  105. package/dist/commands/open.js +1 -1
  106. package/dist/commands/open.js.map +1 -1
  107. package/dist/commands/publish-failure.d.ts +31 -0
  108. package/dist/commands/publish-failure.d.ts.map +1 -0
  109. package/dist/commands/publish-failure.js +142 -0
  110. package/dist/commands/publish-failure.js.map +1 -0
  111. package/dist/commands/publish-node.d.ts +13 -0
  112. package/dist/commands/publish-node.d.ts.map +1 -0
  113. package/dist/commands/publish-node.js +38 -0
  114. package/dist/commands/publish-node.js.map +1 -0
  115. package/dist/commands/publish.d.ts +87 -3
  116. package/dist/commands/publish.d.ts.map +1 -1
  117. package/dist/commands/publish.js +589 -156
  118. package/dist/commands/publish.js.map +1 -1
  119. package/dist/commands/push.d.ts +45 -8
  120. package/dist/commands/push.d.ts.map +1 -1
  121. package/dist/commands/push.js +215 -22
  122. package/dist/commands/push.js.map +1 -1
  123. package/dist/commands/redeploy.d.ts +28 -0
  124. package/dist/commands/redeploy.d.ts.map +1 -0
  125. package/dist/commands/redeploy.js +417 -0
  126. package/dist/commands/redeploy.js.map +1 -0
  127. package/dist/commands/rename.d.ts +1 -1
  128. package/dist/commands/rename.d.ts.map +1 -1
  129. package/dist/commands/rename.js +1 -1
  130. package/dist/commands/rename.js.map +1 -1
  131. package/dist/commands/reproduce.d.ts +64 -0
  132. package/dist/commands/reproduce.d.ts.map +1 -0
  133. package/dist/commands/reproduce.js +211 -0
  134. package/dist/commands/reproduce.js.map +1 -0
  135. package/dist/commands/reset-superuser.d.ts +14 -3
  136. package/dist/commands/reset-superuser.d.ts.map +1 -1
  137. package/dist/commands/reset-superuser.js +11 -2
  138. package/dist/commands/reset-superuser.js.map +1 -1
  139. package/dist/commands/restore.d.ts +79 -0
  140. package/dist/commands/restore.d.ts.map +1 -0
  141. package/dist/commands/restore.js +164 -0
  142. package/dist/commands/restore.js.map +1 -0
  143. package/dist/commands/resume.d.ts +1 -1
  144. package/dist/commands/resume.d.ts.map +1 -1
  145. package/dist/commands/resume.js +1 -1
  146. package/dist/commands/resume.js.map +1 -1
  147. package/dist/commands/rollback.d.ts +20 -8
  148. package/dist/commands/rollback.d.ts.map +1 -1
  149. package/dist/commands/rollback.js +11 -6
  150. package/dist/commands/rollback.js.map +1 -1
  151. package/dist/commands/unsuspend.d.ts +35 -0
  152. package/dist/commands/unsuspend.d.ts.map +1 -0
  153. package/dist/commands/unsuspend.js +27 -0
  154. package/dist/commands/unsuspend.js.map +1 -0
  155. package/dist/commands/versions.d.ts +1 -1
  156. package/dist/commands/versions.d.ts.map +1 -1
  157. package/dist/commands/versions.js +1 -1
  158. package/dist/commands/versions.js.map +1 -1
  159. package/dist/commands/wait-deploy.d.ts +92 -0
  160. package/dist/commands/wait-deploy.d.ts.map +1 -0
  161. package/dist/commands/wait-deploy.js +225 -0
  162. package/dist/commands/wait-deploy.js.map +1 -0
  163. package/dist/env-scan-source.d.ts +39 -0
  164. package/dist/env-scan-source.d.ts.map +1 -0
  165. package/dist/env-scan-source.js +332 -0
  166. package/dist/env-scan-source.js.map +1 -0
  167. package/dist/error-classifier.d.ts.map +1 -1
  168. package/dist/error-classifier.js +67 -4
  169. package/dist/error-classifier.js.map +1 -1
  170. package/dist/errors.d.ts +8 -1
  171. package/dist/errors.d.ts.map +1 -1
  172. package/dist/errors.js +2 -0
  173. package/dist/errors.js.map +1 -1
  174. package/dist/index.d.ts +14 -1
  175. package/dist/index.d.ts.map +1 -1
  176. package/dist/index.js +13 -0
  177. package/dist/index.js.map +1 -1
  178. package/dist/plans.d.ts +11 -0
  179. package/dist/plans.d.ts.map +1 -1
  180. package/dist/plans.js +10 -0
  181. package/dist/plans.js.map +1 -1
  182. package/dist/poll-deployment.d.ts +47 -0
  183. package/dist/poll-deployment.d.ts.map +1 -0
  184. package/dist/poll-deployment.js +57 -0
  185. package/dist/poll-deployment.js.map +1 -0
  186. package/dist/recovery.d.ts +356 -0
  187. package/dist/recovery.d.ts.map +1 -0
  188. package/dist/recovery.js +299 -0
  189. package/dist/recovery.js.map +1 -0
  190. package/dist/stream-utils.d.ts +21 -0
  191. package/dist/stream-utils.d.ts.map +1 -0
  192. package/dist/stream-utils.js +41 -0
  193. package/dist/stream-utils.js.map +1 -0
  194. package/dist/templates/ai-files/claude-md.d.ts +7 -0
  195. package/dist/templates/ai-files/claude-md.d.ts.map +1 -0
  196. package/dist/templates/ai-files/claude-md.js +78 -0
  197. package/dist/templates/ai-files/claude-md.js.map +1 -0
  198. package/dist/templates/ai-files/cursor-percher-mdc.d.ts +7 -0
  199. package/dist/templates/ai-files/cursor-percher-mdc.d.ts.map +1 -0
  200. package/dist/templates/ai-files/cursor-percher-mdc.js +111 -0
  201. package/dist/templates/ai-files/cursor-percher-mdc.js.map +1 -0
  202. package/dist/templates/ai-files/index.d.ts +8 -0
  203. package/dist/templates/ai-files/index.d.ts.map +1 -0
  204. package/dist/templates/ai-files/index.js +4 -0
  205. package/dist/templates/ai-files/index.js.map +1 -0
  206. package/package.json +5 -5
@@ -1,11 +1,38 @@
1
1
  import { existsSync } from "node:fs";
2
2
  import { join } from "node:path";
3
3
  import { PercherTomlError, parseFile } from "@percher/toml";
4
- import { z } from "zod";
4
+ import { z } from "zod/v3";
5
5
  import { readPercherTomlAppName } from "../app-name";
6
+ import { classifyError } from "../error-classifier";
7
+ import { buildProblemToRecoveryProblem, recoveryAsk, recoveryEnv, recoveryFixConfig, recoveryFixProblems, recoveryInspectBuildLog, recoveryLogin, recoveryNone, recoveryWait, } from "../recovery";
8
+ import { resolveReplaced } from "./wait-deploy";
6
9
  export const doctorInputSchema = z.object({
7
10
  app: z.string().optional(),
8
11
  json: z.boolean().optional(),
12
+ /**
13
+ * FUTURE12 Phase 2b/Codex round 5 fix — accept the dispatch-mode
14
+ * hint emitted by `recoveryDoctor`. Public agent path defaults to
15
+ * `auto`; the other modes are passed through from a previous
16
+ * recovery's `args.mode` (e.g. `mode: "deploy"` after a build
17
+ * failure routes the agent back to doctor with that specific
18
+ * deploy in mind). When `mode` is set, doctor's dispatch breaks
19
+ * the otherwise-infinite `run_doctor` recursion by falling back
20
+ * to the right concrete recovery for the case (build_failed →
21
+ * inspect_build_log, runtime_crashed → ask_user with crash
22
+ * details). Phase 2c will replace those fallbacks with deeper
23
+ * mode-specific analysis (build-log fetch + classification, etc).
24
+ */
25
+ mode: z.enum(["auto", "deploy", "runtime", "config", "env", "account"]).optional(),
26
+ /**
27
+ * FUTURE12 Phase 2b/Codex round 5 fix — accept a specific deploy
28
+ * id to inspect. When set AND the value differs from the app's
29
+ * latest deploy, doctor fetches that specific deploy and uses its
30
+ * status as the dispatch target instead of `lastDeploy` from
31
+ * `/diagnostics`. Pre-fix, Zod silently stripped this field and
32
+ * the agent's targeted `run_doctor` recovery degraded to an
33
+ * untargeted `doctor(app)` call.
34
+ */
35
+ deployId: z.string().optional(),
9
36
  });
10
37
  export async function doctor(ctx, input = {}) {
11
38
  const checks = [];
@@ -17,7 +44,7 @@ export async function doctor(ctx, input = {}) {
17
44
  message: hasToken ? "Token configured" : "No token found. Run: percher login",
18
45
  });
19
46
  if (!hasToken) {
20
- return summarize(checks);
47
+ return summarize(checks, { appProvided: !!input.app });
21
48
  }
22
49
  // 2. API reachability + user account
23
50
  try {
@@ -42,7 +69,7 @@ export async function doctor(ctx, input = {}) {
42
69
  status: "fail",
43
70
  message: `Cannot reach ${ctx.client.apiUrl}: ${err.message}`,
44
71
  });
45
- return summarize(checks);
72
+ return summarize(checks, { appProvided: !!input.app });
46
73
  }
47
74
  // 4. percher.toml — validated *before* app resolution. readPercherTomlAppName
48
75
  // returns null on parse errors AND when the file is missing, so if we waited
@@ -100,11 +127,34 @@ export async function doctor(ctx, input = {}) {
100
127
  ? "percher.toml is invalid — fix the issues above and re-run"
101
128
  : "No app specified and no percher.toml found";
102
129
  checks.push({ name: "App", status: "skip", message: reason });
103
- return summarize(checks);
130
+ return summarize(checks, { appProvided: !!input.app });
104
131
  }
105
132
  // 5. Fetch diagnostics from the API
133
+ let appStatus;
134
+ let lastDeployStatus;
135
+ let lastDeployId;
136
+ let containerState;
137
+ let containerRunning;
138
+ let containerHealthy;
139
+ let publicRouteHealthy;
140
+ let lastCrashSeverity;
141
+ let lastCrashAt;
142
+ let replacedResolution;
143
+ // FUTURE12 Phase 2c — pre-fetched deep-analysis inputs.
144
+ let buildLogClassification;
145
+ let buildProblems;
146
+ let crashReport;
106
147
  try {
107
148
  const diag = await ctx.client.apps.getDiagnostics(appName);
149
+ appStatus = diag.app.status;
150
+ lastDeployStatus = diag.lastDeploy?.status;
151
+ lastDeployId = diag.lastDeploy?.id;
152
+ containerState = diag.container.state;
153
+ containerRunning = diag.container.running;
154
+ containerHealthy = diag.containerHealth?.healthy;
155
+ publicRouteHealthy = diag.publicRoute?.healthy;
156
+ lastCrashSeverity = diag.lastCrash?.severity;
157
+ lastCrashAt = diag.lastCrash?.createdAt;
108
158
  // App status
109
159
  const appOk = diag.app.status === "live";
110
160
  checks.push({
@@ -182,6 +232,187 @@ export async function doctor(ctx, input = {}) {
182
232
  message: `Exit code ${diag.lastCrash.exitCode}${diag.lastCrash.oomKilled ? " (OOM killed)" : ""} at ${diag.lastCrash.createdAt}`,
183
233
  });
184
234
  }
235
+ // Targeted deploy override — Codex round 5 P2 fix. When the
236
+ // caller supplied a specific deployId (e.g. echoed back from a
237
+ // previous `recoveryDoctor({ deployId })`), prefer that deploy's
238
+ // status over `diag.lastDeploy` for the verdict. Without this,
239
+ // the agent's targeted recovery degraded to an untargeted
240
+ // doctor(app) call after Zod silently stripped the field.
241
+ if (input.deployId && input.deployId !== diag.lastDeploy?.id) {
242
+ try {
243
+ const targeted = await ctx.client.apps.getDeployment(appName, input.deployId);
244
+ lastDeployStatus = targeted.status;
245
+ lastDeployId = targeted.id;
246
+ checks.push({
247
+ name: `Deploy ${targeted.id}`,
248
+ status: targeted.status === "live" ? "pass" : targeted.status === "failed" ? "fail" : "warn",
249
+ message: `${targeted.status}${targeted.errorMessage ? ` — ${targeted.errorMessage}` : ""}`,
250
+ });
251
+ }
252
+ catch (err) {
253
+ // Codex round 7 P2: clear lastDeployStatus/Id on fetch
254
+ // failure so diag.lastDeploy doesn't leak into the verdict.
255
+ // Without this, an agent asking about a missing/stale deploy
256
+ // (404 from getDeployment) could still receive
257
+ // `replaced_by_newer` for the app's current head when
258
+ // diag.lastDeploy.status happened to be "replaced" — the
259
+ // resolver-gate would fire on the unrelated head deploy and
260
+ // shadow the agent's explicit request. Clearing the deploy
261
+ // fields means: replaced-resolver gate fails, transitional/
262
+ // failed-deploy branches skip, dispatch falls through to
263
+ // case 7 where the failed `Deploy <id>` check we just
264
+ // pushed drives the verdict.
265
+ lastDeployStatus = undefined;
266
+ lastDeployId = undefined;
267
+ checks.push({
268
+ name: `Deploy ${input.deployId}`,
269
+ status: "fail",
270
+ message: `Could not fetch deploy: ${err.message}`,
271
+ });
272
+ }
273
+ }
274
+ // Pre-resolve `replaced` here (deriveVerdict is sync, and
275
+ // resolveReplaced may make an API round-trip via listDeploys).
276
+ // We fetch the App + full Deployment so resolveReplaced has the
277
+ // shape it expects — diag.lastDeploy is a thin projection without
278
+ // the `type` field that resolveReplaced uses to filter the lookup.
279
+ //
280
+ // Codex round 6 P2 fix: resolve against the EFFECTIVE deploy, not
281
+ // diag.lastDeploy. When `input.deployId` is set, the targeted
282
+ // override above replaced lastDeployStatus/Id with the targeted
283
+ // deploy's values; running the resolver against diag.lastDeploy
284
+ // here would let an unrelated `replaced` lastDeploy shadow the
285
+ // agent's specific request (e.g. `mode='deploy', deployId='dep_failed'`
286
+ // would return `replaced_by_newer` for the current live deploy
287
+ // instead of `inspect_build_log` for `dep_failed`). The simple
288
+ // rule: only run resolver if the EFFECTIVE deploy itself is
289
+ // `replaced`.
290
+ if (lastDeployStatus === "replaced" && lastDeployId) {
291
+ try {
292
+ const [app, replacedDeployment] = await Promise.all([
293
+ ctx.client.apps.get(appName),
294
+ ctx.client.apps.getDeployment(appName, lastDeployId),
295
+ ]);
296
+ const resolved = await resolveReplaced({ ctx, app, replacedDeployment });
297
+ replacedResolution = {
298
+ recovery: resolved.recovery,
299
+ url: resolved.url,
300
+ summary: resolved.summary,
301
+ resolvedDeployId: resolved.resolvedDeployment?.id,
302
+ };
303
+ }
304
+ catch (err) {
305
+ // Resolver fetch failed — fall through to the dispatch's
306
+ // generic "replaced but couldn't resolve" path so the verdict
307
+ // still routes the user somewhere actionable.
308
+ replacedResolution = {
309
+ recovery: recoveryAsk({
310
+ reasonCode: "replaced_by_newer",
311
+ prompt: `This deploy was replaced but I couldn't determine the current state (resolver fetch failed: ${err.message}). Run \`percher doctor --app ${appName}\` to inspect, or surface to the user.`,
312
+ }),
313
+ summary: `Couldn't resolve the replaced deploy: ${err.message}`,
314
+ };
315
+ }
316
+ }
317
+ // FUTURE12 Phase 2c — deploy-mode deep analysis. When the agent
318
+ // followed our previous run_doctor recovery (input.mode='deploy')
319
+ // and the targeted/last deploy is failed, fetch the build log
320
+ // and classify it so dispatch can return a specific recovery
321
+ // (set_env_vars / fix_problems) instead of always handing off
322
+ // to inspect_build_log. Pre-fix: doctor was a polite passthrough
323
+ // to percher_deploys_inspect.
324
+ if (input.mode === "deploy" && lastDeployStatus === "failed" && lastDeployId) {
325
+ try {
326
+ // Fetch full deploy (for `errorMessage` + `problems[]`) and
327
+ // build log in parallel — both are inputs to classifyError.
328
+ const [deployment, buildLog] = await Promise.all([
329
+ ctx.client.apps.getDeployment(appName, lastDeployId),
330
+ ctx.client.apps.getBuildLog(appName, lastDeployId).catch((logErr) => {
331
+ // Log fetch can fail transiently (404 if log was pruned,
332
+ // network blip post-retries). classifyError handles
333
+ // missing log; we just lose some signal.
334
+ checks.push({
335
+ name: `Build log ${lastDeployId}`,
336
+ status: "warn",
337
+ message: `Could not fetch build log: ${logErr.message}`,
338
+ });
339
+ return "";
340
+ }),
341
+ ]);
342
+ const errorMessage = deployment.errorMessage ?? "";
343
+ buildLogClassification = classifyError(errorMessage, buildLog);
344
+ buildProblems = deployment.problems;
345
+ }
346
+ catch (err) {
347
+ // The deployment fetch itself failed — surface as a check
348
+ // so the verdict can route via the case-7 ask_user fallback.
349
+ checks.push({
350
+ name: "Deploy classification",
351
+ status: "warn",
352
+ message: `Could not classify deploy ${lastDeployId}: ${err.message}`,
353
+ });
354
+ buildLogClassification = null;
355
+ }
356
+ }
357
+ // FUTURE12 Phase 2c — runtime-mode deep analysis. When the agent
358
+ // followed runtime mode to inspect a crashed app, pull the
359
+ // crash report so dispatch can surface the AI-generated
360
+ // explanation + suggestion in the ask_user prompt.
361
+ //
362
+ // Codex round 8 P2 fix (revised in round 9): mirror
363
+ // diagnose.ts's recency guard so doctor doesn't surface a
364
+ // days-old AI explanation as if it explained today's outage
365
+ // when the app is `live` but the container probe is failing
366
+ // (route blip, post-restart noise, etc).
367
+ //
368
+ // The rule is intentionally simple — round 9 removed an earlier
369
+ // `matchesLastCrash` bypass that was wrong in practice:
370
+ // /diagnostics returns `lastCrash` populated with the latest
371
+ // crash row, and /crash-report also returns that same row, so
372
+ // their timestamps ALWAYS match. The bypass effectively
373
+ // disabled the staleness check whenever any crash had ever
374
+ // happened, which was the entire common case the guard was
375
+ // meant to catch.
376
+ //
377
+ // - app.status === "crashed" → trust the report; this is the
378
+ // active outage and the user's question is about it.
379
+ // - app.status !== "crashed" → only trust a report fresher
380
+ // than 24h. Anything older is treated as no report.
381
+ if (input.mode === "runtime") {
382
+ try {
383
+ const report = await ctx.client.apps.getCrashReport(appName);
384
+ if (!report || appStatus === "crashed") {
385
+ crashReport = report;
386
+ }
387
+ else {
388
+ const recencyMs = 24 * 60 * 60 * 1000;
389
+ const age = Date.now() - new Date(report.createdAt).getTime();
390
+ if (age > recencyMs) {
391
+ // Stale + app isn't currently crashed — drop it so
392
+ // dispatch falls back to the safe "check the crash
393
+ // report" framing instead of confidently pushing an
394
+ // old fix at today's failing route.
395
+ crashReport = null;
396
+ checks.push({
397
+ name: "Crash report",
398
+ status: "warn",
399
+ message: `Latest crash report is older than 24h and the app isn't currently crashed — ignoring as stale.`,
400
+ });
401
+ }
402
+ else {
403
+ crashReport = report;
404
+ }
405
+ }
406
+ }
407
+ catch (err) {
408
+ checks.push({
409
+ name: "Crash report",
410
+ status: "warn",
411
+ message: `Could not fetch crash report: ${err.message}`,
412
+ });
413
+ crashReport = null;
414
+ }
415
+ }
185
416
  }
186
417
  catch (err) {
187
418
  checks.push({
@@ -190,15 +421,566 @@ export async function doctor(ctx, input = {}) {
190
421
  message: err.message,
191
422
  });
192
423
  }
193
- return summarize(checks);
424
+ return summarize(checks, {
425
+ appName,
426
+ appProvided: !!input.app,
427
+ appStatus,
428
+ lastDeployStatus,
429
+ lastDeployId,
430
+ containerState,
431
+ containerRunning,
432
+ containerHealthy,
433
+ publicRouteHealthy,
434
+ lastCrashSeverity: lastCrashSeverity ?? undefined,
435
+ lastCrashAt,
436
+ replacedResolution,
437
+ inputMode: input.mode,
438
+ buildLogClassification,
439
+ buildProblems,
440
+ crashReport,
441
+ });
194
442
  }
195
- function summarize(checks) {
443
+ /**
444
+ * lastDeploy / app.status values that mean "still in motion — agent
445
+ * should wait, not act." `replaced` is intentionally excluded: it's
446
+ * terminal-but-not-failed and gets its own routing in Phase 2b.
447
+ */
448
+ const TRANSITIONAL_DEPLOY_STATUSES = new Set(["queued", "building", "deploying"]);
449
+ const TRANSITIONAL_APP_STATUSES = new Set(["provisioning"]);
450
+ function summarize(checks, vctx = {}) {
451
+ const passed = checks.filter((c) => c.status === "pass").length;
452
+ const failed = checks.filter((c) => c.status === "fail").length;
453
+ const warned = checks.filter((c) => c.status === "warn").length;
454
+ const total = checks.filter((c) => c.status !== "skip").length;
455
+ const verdict = deriveVerdict(checks, vctx);
196
456
  return {
197
457
  checks,
198
- passed: checks.filter((c) => c.status === "pass").length,
199
- failed: checks.filter((c) => c.status === "fail").length,
200
- warned: checks.filter((c) => c.status === "warn").length,
201
- total: checks.filter((c) => c.status !== "skip").length,
458
+ passed,
459
+ failed,
460
+ warned,
461
+ total,
462
+ status: verdict.status,
463
+ diagnosis: verdict.diagnosis,
464
+ recovery: verdict.recovery,
465
+ summary: verdict.summary,
466
+ };
467
+ }
468
+ /**
469
+ * FUTURE12 Phase 2a — pattern-match the check list against the
470
+ * blocking cases doctor handles today. Anything we don't recognise
471
+ * yet falls through to a safe `needs_action` / `ask_user` so an
472
+ * agent doesn't think it can auto-resolve from a generic failure.
473
+ * Phase 2b adds the deploy/build/runtime/replaced-specific paths.
474
+ *
475
+ * Two Codex P2 fixes baked into Phase 2a (review of fc267a3):
476
+ *
477
+ * 1. An invalid cwd `percher.toml` MUST NOT block the verdict when
478
+ * the caller passed `--app` — that target was resolved
479
+ * independently of cwd toml. Pre-fix, `doctor --app foo` against
480
+ * a healthy app would still come back blocked/config_invalid if
481
+ * the local toml happened to be broken.
482
+ * 2. Transitional states (`lastDeploy.status` queued/building/
483
+ * deploying, or `app.status` provisioning) MUST surface as
484
+ * `in_progress` + `wait_deploy`, not the warn-collapsed `ok`.
485
+ * Pre-fix, those states looked fully resolved to MCP agents and
486
+ * the new `in_progress` doctor status was effectively
487
+ * unreachable.
488
+ */
489
+ function deriveVerdict(checks, vctx) {
490
+ const findCheck = (name) => checks.find((c) => c.name === name);
491
+ // Case 1 — auth missing. First check in doctor's flow; if it fails
492
+ // we never reach the other gates, so handle it before anything else.
493
+ const auth = findCheck("Auth token");
494
+ if (auth?.status === "fail") {
495
+ return {
496
+ status: "blocked",
497
+ diagnosis: {
498
+ title: "Authentication required",
499
+ explanation: "No Percher token is configured for this CLI.",
500
+ reasonCode: "auth_required",
501
+ phase: "auth",
502
+ },
503
+ recovery: recoveryLogin({ reasonCode: "auth_required" }),
504
+ summary: "Login required: run `percher login` (or set PERCHER_TOKEN).",
505
+ };
506
+ }
507
+ // Case 2 — API can't be reached. Likely-transient infra problem,
508
+ // but doctor can't see whether it'll recover; surface to the user.
509
+ const api = findCheck("API reachability");
510
+ if (api?.status === "fail") {
511
+ return {
512
+ status: "blocked",
513
+ diagnosis: {
514
+ title: "Percher API is unreachable",
515
+ explanation: api.message,
516
+ reasonCode: "infra_unavailable",
517
+ phase: "infra",
518
+ },
519
+ recovery: recoveryAsk({
520
+ prompt: `Percher API is unreachable (${api.message}). This is usually a transient network issue — wait a moment and try again, or check status.percher.app.`,
521
+ reasonCode: "infra_unavailable",
522
+ retryable: true,
523
+ }),
524
+ summary: "Cannot reach the Percher API — try again in a moment.",
525
+ };
526
+ }
527
+ // Case 3 — percher.toml present but invalid. Only blocks the
528
+ // verdict when we actually depend on it (no `--app` provided).
529
+ // With explicit `--app`, the cwd toml is irrelevant for the
530
+ // resolved target, so a broken local file shouldn't poison a
531
+ // healthy app's verdict. (Codex P2 #1 fix.)
532
+ const toml = findCheck("percher.toml");
533
+ if (toml?.status === "fail" && !vctx.appProvided) {
534
+ return {
535
+ status: "blocked",
536
+ diagnosis: {
537
+ title: "Invalid percher.toml",
538
+ explanation: toml.message,
539
+ reasonCode: "config_invalid",
540
+ phase: "config",
541
+ },
542
+ recovery: recoveryFixConfig({
543
+ problems: [
544
+ {
545
+ file: "percher.toml",
546
+ message: toml.message,
547
+ },
548
+ ],
549
+ reasonCode: "config_invalid",
550
+ }),
551
+ summary: "percher.toml is invalid — fix the issues and re-run.",
552
+ };
553
+ }
554
+ // Case 4 — no app to inspect. The "App" skip-row is set when no
555
+ // --app was passed AND no parseable percher.toml was found in cwd.
556
+ const app = findCheck("App");
557
+ if (app?.status === "skip") {
558
+ return {
559
+ status: "blocked",
560
+ diagnosis: {
561
+ title: "No app specified",
562
+ explanation: app.message,
563
+ reasonCode: "config_missing",
564
+ phase: "config",
565
+ },
566
+ recovery: recoveryAsk({
567
+ prompt: `${app.message} — pass \`--app <name>\` or run \`percher init\` in a project directory to generate a percher.toml.`,
568
+ reasonCode: "config_missing",
569
+ }),
570
+ summary: "No percher.toml found and no --app supplied — pass --app or run percher init.",
571
+ };
572
+ }
573
+ // Case 5 — transitional state. A lastDeploy in queued/building/
574
+ // deploying or an app in provisioning means the right next step
575
+ // is to wait, not to act. Surface as `in_progress` + `wait_deploy`
576
+ // pointing at the live deployId so agents call
577
+ // percher_wait_for_deploy with the right args instead of treating
578
+ // warn-only checks as "ok". (Codex P2 #2 fix.)
579
+ const inTransitionalDeploy = !!vctx.lastDeployStatus && TRANSITIONAL_DEPLOY_STATUSES.has(vctx.lastDeployStatus);
580
+ const inTransitionalApp = !!vctx.appStatus && TRANSITIONAL_APP_STATUSES.has(vctx.appStatus);
581
+ if (inTransitionalDeploy || inTransitionalApp) {
582
+ const reasonCode = vctx.lastDeployStatus === "queued"
583
+ ? "deploy_queued"
584
+ : vctx.lastDeployStatus === "building"
585
+ ? "deploy_building"
586
+ : vctx.lastDeployStatus === "deploying"
587
+ ? "deploy_deploying"
588
+ : "deploy_queued";
589
+ const what = inTransitionalApp
590
+ ? `App ${vctx.appName ?? ""} is provisioning`
591
+ : `Last deploy is ${vctx.lastDeployStatus}`;
592
+ const recovery = vctx.lastDeployId && vctx.appName
593
+ ? recoveryWait({
594
+ app: vctx.appName,
595
+ deployId: vctx.lastDeployId,
596
+ reasonCode,
597
+ })
598
+ : recoveryAsk({
599
+ prompt: `${what.trim()} for ${vctx.appName ?? "this app"}. Wait for it to finish before retrying — there's no deployId to track yet.`,
600
+ reasonCode,
601
+ });
602
+ return {
603
+ status: "in_progress",
604
+ diagnosis: {
605
+ title: what.trim(),
606
+ explanation: vctx.lastDeployId
607
+ ? `${what.trim()} (deploy ${vctx.lastDeployId}). Wait for it to finish before retrying.`
608
+ : `${what.trim()}. Wait for it to finish before retrying.`,
609
+ reasonCode,
610
+ phase: "deploy",
611
+ },
612
+ recovery,
613
+ summary: vctx.lastDeployId
614
+ ? `${what.trim()} — wait for deploy ${vctx.lastDeployId}.`
615
+ : `${what.trim()} — wait a moment and re-run.`,
616
+ };
617
+ }
618
+ // For verdict purposes, drop the cwd `percher.toml` check when
619
+ // the caller passed --app. Without this filter, a `fail` toml
620
+ // would force the all-pass-or-warn check below into the
621
+ // `needs_action` branch (case 7) even though the resolved app is
622
+ // fully healthy.
623
+ const verdictChecks = vctx.appProvided && toml?.status === "fail"
624
+ ? checks.filter((c) => c.name !== "percher.toml")
625
+ : checks;
626
+ // Phase 2b — signal-driven dispatches that MUST run before the
627
+ // happy-path collapse below. The `Last deploy` row is rendered
628
+ // as `warn` for any non-live status (replaced, failed, etc), so
629
+ // case 6's all-pass-or-warn predicate would silently hide a
630
+ // failed or replaced deploy. Likewise, a suspended app may
631
+ // surface only as a stopped container (`fail` row) but the
632
+ // dispatch wants to lead with the suspended explanation, not the
633
+ // generic runtime fix.
634
+ // Replaced lastDeploy. Resolution was performed up-front in
635
+ // `doctor()` (deriveVerdict is sync); we just emit what
636
+ // resolveReplaced computed.
637
+ if (vctx.replacedResolution) {
638
+ const r = vctx.replacedResolution;
639
+ const isResolvedLive = r.recovery.nextAction === "none" && !!r.url;
640
+ if (isResolvedLive) {
641
+ return {
642
+ status: "ok",
643
+ recovery: { ...r.recovery, reasonCode: "replaced_by_newer" },
644
+ summary: r.summary,
645
+ };
646
+ }
647
+ const status = r.recovery.nextAction === "wait_deploy" ? "in_progress" : "needs_action";
648
+ return {
649
+ status,
650
+ diagnosis: {
651
+ title: "Last deploy was replaced",
652
+ explanation: r.summary,
653
+ reasonCode: "replaced_by_newer",
654
+ phase: "deploy",
655
+ },
656
+ recovery: { ...r.recovery, reasonCode: "replaced_by_newer" },
657
+ summary: r.summary,
658
+ };
659
+ }
660
+ // lastDeploy.status === "failed". `failed` renders as `warn` in
661
+ // checks[] but is unambiguously not-ok. Doctor can't classify
662
+ // the failure cause from /diagnostics (the build log lives at a
663
+ // different endpoint), so route back to itself with mode='deploy'
664
+ // and the deployId. Phase 4 will migrate publish/wait to the
665
+ // same recovery so deploy-mode expansion has a single owner of
666
+ // build-log inspection.
667
+ if (vctx.lastDeployStatus === "failed") {
668
+ // Loop-break (Codex round 5 P2): if we're already in mode='deploy'
669
+ // (the agent followed our previous run_doctor recovery), emitting
670
+ // run_doctor again with the same args would loop forever. Fall
671
+ // back to inspect_build_log — the existing low-level path that
672
+ // surfaces the build log to the agent. Phase 2c will replace
673
+ // this with deeper analysis (build-log fetch + classification
674
+ // here in doctor) so the agent gets a `set_env_vars` /
675
+ // `fix_problems` recovery directly.
676
+ if (vctx.inputMode === "deploy") {
677
+ // FUTURE12 Phase 2c — deep analysis. We pre-fetched the build
678
+ // log and ran classifyError in doctor() above; if it produced
679
+ // missing env keys or structured file-located problems, emit
680
+ // a specific recovery the agent can act on directly.
681
+ // Otherwise fall back to inspect_build_log (the agent gets
682
+ // the raw log via percher_deploys_inspect).
683
+ const cls = vctx.buildLogClassification;
684
+ // (a) Missing env keys → recoveryEnv. Most actionable case:
685
+ // agent calls percher_env_set with the exact keys.
686
+ if (cls && cls.errorClass === "missing_env" && cls.missingEnvVars.length > 0) {
687
+ return {
688
+ status: "needs_action",
689
+ diagnosis: {
690
+ title: cls.title,
691
+ explanation: cls.explanation,
692
+ reasonCode: "missing_env",
693
+ phase: "build",
694
+ },
695
+ recovery: recoveryEnv({
696
+ app: vctx.appName,
697
+ keys: cls.missingEnvVars,
698
+ }),
699
+ summary: vctx.appName
700
+ ? `Build failed — missing env vars on ${vctx.appName}: ${cls.missingEnvVars.join(", ")}.`
701
+ : `Build failed — missing env vars: ${cls.missingEnvVars.join(", ")}.`,
702
+ };
703
+ }
704
+ // (b) Structured BuildProblems with file locations → fix_problems.
705
+ // The agent can patch files directly without log archeology.
706
+ // Codex round 8 P3 fix: route through
707
+ // `buildProblemToRecoveryProblem` so `BuildProblem.hint` is
708
+ // folded into the message — agents and CLI got `Hint: <text>`
709
+ // appended automatically (matters for problems where the hint
710
+ // carries the actionable next step, e.g. malformed
711
+ // package.json with no line/column).
712
+ const fileProblems = (vctx.buildProblems ?? [])
713
+ .filter((p) => p.file)
714
+ .map(buildProblemToRecoveryProblem);
715
+ if (fileProblems.length > 0) {
716
+ return {
717
+ status: "needs_action",
718
+ diagnosis: {
719
+ title: cls?.title ?? "Build failed with file-located problems",
720
+ explanation: cls?.explanation ??
721
+ `Build extracted ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} with file locations. Patch the files directly.`,
722
+ reasonCode: "build_failed",
723
+ phase: "build",
724
+ },
725
+ recovery: recoveryFixProblems({
726
+ problems: fileProblems,
727
+ reasonCode: "build_failed",
728
+ }),
729
+ summary: vctx.lastDeployId
730
+ ? `Deploy ${vctx.lastDeployId} failed — ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} to patch.`
731
+ : `Build failed — ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} to patch.`,
732
+ };
733
+ }
734
+ // (c) Fallback: classified but unactionable, or unclassified.
735
+ // Hand off to inspect_build_log so the agent can read the raw
736
+ // log. This is also the no-classification path (cls === null).
737
+ return {
738
+ status: "needs_action",
739
+ diagnosis: {
740
+ title: cls?.title ?? "Last deploy failed",
741
+ explanation: cls
742
+ ? `${cls.explanation} Doctor couldn't extract a structured fix; fall back to the build log.`
743
+ : vctx.lastDeployId
744
+ ? `Deploy ${vctx.lastDeployId} terminated in failed state. Doctor couldn't classify the cause from the build log; inspect it directly.`
745
+ : "The most recent deploy ended in failed state. Doctor couldn't classify the cause; inspect the build log.",
746
+ reasonCode: "build_failed",
747
+ phase: "build",
748
+ },
749
+ recovery: recoveryInspectBuildLog({
750
+ deployId: vctx.lastDeployId,
751
+ app: vctx.appName,
752
+ reasonCode: "build_failed",
753
+ }),
754
+ summary: vctx.lastDeployId
755
+ ? `Deploy ${vctx.lastDeployId} failed — inspect the build log.`
756
+ : "Last deploy failed — inspect the latest failed deploy's build log.",
757
+ };
758
+ }
759
+ return {
760
+ status: "needs_action",
761
+ diagnosis: {
762
+ title: "Last deploy failed",
763
+ explanation: vctx.lastDeployId
764
+ ? `Deploy ${vctx.lastDeployId} terminated in failed state. Inspect the build log to find the cause.`
765
+ : "The most recent deploy ended in failed state. Inspect the build log to find the cause.",
766
+ reasonCode: "build_failed",
767
+ phase: "build",
768
+ },
769
+ recovery: {
770
+ retryable: false,
771
+ nextAction: "run_doctor",
772
+ suggestedTool: "percher_doctor",
773
+ args: {
774
+ app: vctx.appName,
775
+ mode: "deploy",
776
+ ...(vctx.lastDeployId ? { deployId: vctx.lastDeployId } : {}),
777
+ },
778
+ reasonCode: "build_failed",
779
+ },
780
+ summary: vctx.lastDeployId
781
+ ? `Deploy ${vctx.lastDeployId} failed — call percher_doctor with mode='deploy'.`
782
+ : "Last deploy failed — call percher_doctor with mode='deploy'.",
783
+ };
784
+ }
785
+ // App suspended. Suspension reason isn't on /diagnostics
786
+ // (lives on the App row's `suspensionReason` /
787
+ // `suspensionOrigin`), so doctor surfaces to the user with a
788
+ // concrete prompt. The reasonCode `quota_exceeded` is a proxy —
789
+ // most owner-resumable suspensions are quota; admin/moderation
790
+ // suspensions would route differently if doctor had access to
791
+ // suspensionOrigin (Phase 6 can plumb that through if needed).
792
+ if (vctx.appStatus === "suspended") {
793
+ return {
794
+ status: "needs_action",
795
+ diagnosis: {
796
+ title: "App is suspended",
797
+ explanation: vctx.appName
798
+ ? `${vctx.appName} is currently suspended. Owner-initiated and quota suspensions can be resumed from the dashboard or via the API.`
799
+ : "This app is currently suspended.",
800
+ reasonCode: "quota_exceeded",
801
+ phase: "infra",
802
+ },
803
+ recovery: recoveryAsk({
804
+ reasonCode: "quota_exceeded",
805
+ prompt: vctx.appName
806
+ ? `${vctx.appName} is suspended. Resume the app from the dashboard (or call the unsuspend API) before retrying.`
807
+ : "This app is suspended. Resume it from the dashboard before retrying.",
808
+ }),
809
+ summary: vctx.appName
810
+ ? `${vctx.appName} is suspended — resume the app before retrying.`
811
+ : "App is suspended — resume it before retrying.",
812
+ };
813
+ }
814
+ // Case 6 — happy path. All app-level checks passed (warns are
815
+ // informational, not blocking).
816
+ if (verdictChecks.every((c) => c.status === "pass" || c.status === "skip" || c.status === "warn")) {
817
+ const passing = verdictChecks.filter((c) => c.status === "pass").length;
818
+ const noun = passing === 1 ? "check" : "checks";
819
+ return {
820
+ status: "ok",
821
+ recovery: recoveryNone({ reasonCode: "none" }),
822
+ summary: vctx.appName
823
+ ? `All ${passing} ${noun} passed for ${vctx.appName}.`
824
+ : `All ${passing} ${noun} passed.`,
825
+ };
826
+ }
827
+ // Case 7 — at least one app-level check failed. Phase 2b refines
828
+ // the previous catch-all `ask_user`/`unknown` into specific
829
+ // dispatches based on structured signals from `/diagnostics`.
830
+ // The signal-driven cases that case 6 would otherwise collapse
831
+ // (replaced/failed lastDeploy, suspended app) are handled above
832
+ // case 6. The remaining checks-driven cases run here:
833
+ // crashed/runtime-down → public-route blip → genuinely unknown.
834
+ // 7a. Runtime crashed. App marked crashed, OR container exited /
835
+ // not running (without a clearer signal), OR direct container-
836
+ // health probe is failing. All three converge on the same
837
+ // recovery: hand back to doctor with `mode: "runtime"` so the
838
+ // runtime-focused expansion (Phase 2 step 9) can take it from
839
+ // here. Self-recursion via mode hint is the explicit Phase 2
840
+ // contract — input mode disambiguates the dispatch.
841
+ const containerDown = (vctx.containerRunning === false &&
842
+ vctx.containerState !== undefined &&
843
+ vctx.containerState !== "not-found") ||
844
+ vctx.containerHealthy === false;
845
+ if (vctx.appStatus === "crashed" || containerDown) {
846
+ const containerCheck = findCheck("Container");
847
+ const healthCheck = findCheck("Container health");
848
+ const lastCrashCheck = findCheck("Last crash");
849
+ const explanationBits = [];
850
+ if (vctx.appStatus === "crashed")
851
+ explanationBits.push("App is in crashed state.");
852
+ if (containerCheck?.status === "fail")
853
+ explanationBits.push(containerCheck.message);
854
+ if (healthCheck?.status === "fail")
855
+ explanationBits.push(healthCheck.message);
856
+ if (lastCrashCheck)
857
+ explanationBits.push(lastCrashCheck.message);
858
+ const explanation = explanationBits.length > 0
859
+ ? explanationBits.join(" ")
860
+ : "Runtime is not responding — container or health check is failing.";
861
+ // Loop-break (Codex round 5 P2): if we're already in
862
+ // mode='runtime', emit a concrete ask_user with the crash
863
+ // details rather than recursing into ourselves. Phase 2c will
864
+ // replace this with crash-report fetch + classification (the
865
+ // crash-handler watchdog already produces structured AI-generated
866
+ // explanations — doctor just needs to surface them here).
867
+ if (vctx.inputMode === "runtime") {
868
+ // FUTURE12 Phase 2c — surface the crash report's
869
+ // AI-generated explanation + suggestion in the prompt when
870
+ // available. The watchdog/crash-handler already produces
871
+ // these via /apps/:app/crash-report; doctor just needs to
872
+ // pull them through.
873
+ const cr = vctx.crashReport;
874
+ const hasAnalysis = cr && cr.analysisStatus === "completed";
875
+ const promptParts = [];
876
+ if (hasAnalysis && cr.explanation) {
877
+ promptParts.push(`Crash explanation: ${cr.explanation}`);
878
+ }
879
+ if (hasAnalysis && cr.suggestion) {
880
+ promptParts.push(`Suggested fix: ${cr.suggestion}`);
881
+ }
882
+ if (cr) {
883
+ const exitInfo = cr.oomKilled ? `exit ${cr.exitCode} (OOM killed)` : `exit ${cr.exitCode}`;
884
+ promptParts.push(`Container ${exitInfo} at ${cr.createdAt}.`);
885
+ }
886
+ // Always include the basic explanation so the user has
887
+ // something to read even if no crash report exists.
888
+ promptParts.push(explanation);
889
+ promptParts.push(vctx.appName
890
+ ? `Decide whether to redeploy, raise the memory plan, or fix the code. Use percher_diagnose_crash for ${vctx.appName} if you need the full log tail.`
891
+ : "Decide whether to redeploy, raise the memory plan, or fix the code.");
892
+ return {
893
+ status: "needs_action",
894
+ diagnosis: {
895
+ title: vctx.appStatus === "crashed" ? "App crashed" : "Runtime not responding",
896
+ explanation: hasAnalysis && cr.explanation ? `${cr.explanation} ${explanation}` : explanation,
897
+ reasonCode: "runtime_crashed",
898
+ phase: "runtime",
899
+ },
900
+ recovery: recoveryAsk({
901
+ reasonCode: "runtime_crashed",
902
+ prompt: promptParts.join(" "),
903
+ options: cr?.severity === "critical" ? ["redeploy", "fix code", "upgrade plan"] : undefined,
904
+ }),
905
+ summary: vctx.appName
906
+ ? hasAnalysis
907
+ ? `${vctx.appName} crashed — ${cr.suggestion ?? cr.explanation ?? "surface the crash details to the user."}`
908
+ : `${vctx.appName} runtime is unhealthy — surface the crash details to the user.`
909
+ : "Runtime is unhealthy — surface the crash details to the user.",
910
+ };
911
+ }
912
+ return {
913
+ status: "needs_action",
914
+ diagnosis: {
915
+ title: vctx.appStatus === "crashed" ? "App crashed" : "Runtime not responding",
916
+ explanation,
917
+ reasonCode: "runtime_crashed",
918
+ phase: "runtime",
919
+ },
920
+ recovery: {
921
+ retryable: false,
922
+ nextAction: "run_doctor",
923
+ suggestedTool: "percher_doctor",
924
+ args: { app: vctx.appName, mode: "runtime" },
925
+ reasonCode: "runtime_crashed",
926
+ },
927
+ summary: vctx.appName
928
+ ? `${vctx.appName} runtime needs investigation — call percher_doctor with mode='runtime'.`
929
+ : "Runtime needs investigation — call percher_doctor with mode='runtime'.",
930
+ };
931
+ }
932
+ // 7b. Public route is the only thing failing — container is up
933
+ // and healthy, but the external probe via Caddy/TLS/DNS isn't
934
+ // responding. This is usually a transient route-reconcile blip
935
+ // that self-heals; recommend `retry` so the agent re-runs doctor
936
+ // (or the user retries publish) rather than asking the user to
937
+ // act manually.
938
+ if (vctx.publicRouteHealthy === false) {
939
+ const routeCheck = findCheck("Public route");
940
+ return {
941
+ status: "needs_action",
942
+ diagnosis: {
943
+ title: "Public route is not responding",
944
+ explanation: routeCheck?.message ??
945
+ "Container looks healthy but the public URL isn't responding — route reconcile usually self-heals.",
946
+ reasonCode: "infra_transient",
947
+ phase: "infra",
948
+ },
949
+ recovery: {
950
+ retryable: true,
951
+ nextAction: "retry",
952
+ suggestedTool: "percher_doctor",
953
+ args: { app: vctx.appName },
954
+ reasonCode: "infra_transient",
955
+ },
956
+ summary: vctx.appName
957
+ ? `${vctx.appName} public route is failing — likely transient, re-run doctor in a moment.`
958
+ : "Public route is failing — likely transient, re-run doctor in a moment.",
959
+ };
960
+ }
961
+ // 7c. Genuinely unknown — none of the structured signals above
962
+ // matched. Keep the safe fallback so an agent doesn't think it
963
+ // can auto-resolve. This is what Phase 2a shipped; Phase 2b only
964
+ // narrows the surface that lands here.
965
+ const firstFailed = verdictChecks.find((c) => c.status === "fail");
966
+ return {
967
+ status: "needs_action",
968
+ diagnosis: firstFailed
969
+ ? {
970
+ title: firstFailed.name,
971
+ explanation: firstFailed.message,
972
+ reasonCode: "unknown",
973
+ }
974
+ : undefined,
975
+ recovery: recoveryAsk({
976
+ prompt: firstFailed
977
+ ? `Doctor flagged a problem: ${firstFailed.name} — ${firstFailed.message}. Review the checks list and surface the failing item to the user.`
978
+ : "Doctor reported one or more issues. Review the checks list and surface the failing items to the user.",
979
+ reasonCode: "unknown",
980
+ }),
981
+ summary: firstFailed
982
+ ? `${firstFailed.name} failed: ${firstFailed.message}`
983
+ : "Doctor reported issues — review the checks list.",
202
984
  };
203
985
  }
204
986
  //# sourceMappingURL=doctor.js.map