@percher/core 0.2.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. package/dist/ai-files-manifest.d.ts +28 -0
  2. package/dist/ai-files-manifest.d.ts.map +1 -0
  3. package/dist/ai-files-manifest.js +96 -0
  4. package/dist/ai-files-manifest.js.map +1 -0
  5. package/dist/commands/account.d.ts +51 -0
  6. package/dist/commands/account.d.ts.map +1 -0
  7. package/dist/commands/account.js +88 -0
  8. package/dist/commands/account.js.map +1 -0
  9. package/dist/commands/ai-files.d.ts +73 -0
  10. package/dist/commands/ai-files.d.ts.map +1 -0
  11. package/dist/commands/ai-files.js +179 -0
  12. package/dist/commands/ai-files.js.map +1 -0
  13. package/dist/commands/billing.d.ts +1 -1
  14. package/dist/commands/billing.d.ts.map +1 -1
  15. package/dist/commands/billing.js +1 -1
  16. package/dist/commands/billing.js.map +1 -1
  17. package/dist/commands/continue.d.ts +48 -0
  18. package/dist/commands/continue.d.ts.map +1 -0
  19. package/dist/commands/continue.js +121 -0
  20. package/dist/commands/continue.js.map +1 -0
  21. package/dist/commands/create.d.ts +1 -1
  22. package/dist/commands/create.d.ts.map +1 -1
  23. package/dist/commands/create.js +1 -1
  24. package/dist/commands/create.js.map +1 -1
  25. package/dist/commands/dashboard.d.ts +15 -0
  26. package/dist/commands/dashboard.d.ts.map +1 -0
  27. package/dist/commands/dashboard.js +33 -0
  28. package/dist/commands/dashboard.js.map +1 -0
  29. package/dist/commands/data-export.d.ts +21 -0
  30. package/dist/commands/data-export.d.ts.map +1 -0
  31. package/dist/commands/data-export.js +36 -0
  32. package/dist/commands/data-export.js.map +1 -0
  33. package/dist/commands/data.d.ts +3 -3
  34. package/dist/commands/data.d.ts.map +1 -1
  35. package/dist/commands/data.js +1 -1
  36. package/dist/commands/data.js.map +1 -1
  37. package/dist/commands/delete.d.ts +1 -1
  38. package/dist/commands/delete.d.ts.map +1 -1
  39. package/dist/commands/delete.js +1 -1
  40. package/dist/commands/delete.js.map +1 -1
  41. package/dist/commands/deploys.d.ts +2 -2
  42. package/dist/commands/deploys.d.ts.map +1 -1
  43. package/dist/commands/deploys.js +21 -5
  44. package/dist/commands/deploys.js.map +1 -1
  45. package/dist/commands/dev.d.ts +1 -9
  46. package/dist/commands/dev.d.ts.map +1 -1
  47. package/dist/commands/dev.js +79 -24
  48. package/dist/commands/dev.js.map +1 -1
  49. package/dist/commands/diagnose.d.ts +1 -1
  50. package/dist/commands/diagnose.d.ts.map +1 -1
  51. package/dist/commands/diagnose.js +1 -1
  52. package/dist/commands/diagnose.js.map +1 -1
  53. package/dist/commands/doctor.d.ts +75 -3
  54. package/dist/commands/doctor.d.ts.map +1 -1
  55. package/dist/commands/doctor.js +822 -10
  56. package/dist/commands/doctor.js.map +1 -1
  57. package/dist/commands/domains.d.ts +1 -1
  58. package/dist/commands/domains.d.ts.map +1 -1
  59. package/dist/commands/domains.js +1 -1
  60. package/dist/commands/domains.js.map +1 -1
  61. package/dist/commands/env-scan.d.ts +2 -0
  62. package/dist/commands/env-scan.d.ts.map +1 -0
  63. package/dist/commands/env-scan.js +92 -0
  64. package/dist/commands/env-scan.js.map +1 -0
  65. package/dist/commands/env.d.ts +1 -1
  66. package/dist/commands/env.d.ts.map +1 -1
  67. package/dist/commands/env.js +1 -1
  68. package/dist/commands/env.js.map +1 -1
  69. package/dist/commands/export.d.ts +1 -1
  70. package/dist/commands/export.js +1 -1
  71. package/dist/commands/generate.d.ts +1 -1
  72. package/dist/commands/generate.d.ts.map +1 -1
  73. package/dist/commands/generate.js +14 -9
  74. package/dist/commands/generate.js.map +1 -1
  75. package/dist/commands/github.d.ts +60 -0
  76. package/dist/commands/github.d.ts.map +1 -0
  77. package/dist/commands/github.js +112 -0
  78. package/dist/commands/github.js.map +1 -0
  79. package/dist/commands/import-project.d.ts +1 -1
  80. package/dist/commands/import-project.d.ts.map +1 -1
  81. package/dist/commands/import-project.js +1 -1
  82. package/dist/commands/import-project.js.map +1 -1
  83. package/dist/commands/init.d.ts +1 -1
  84. package/dist/commands/init.d.ts.map +1 -1
  85. package/dist/commands/init.js +1 -1
  86. package/dist/commands/init.js.map +1 -1
  87. package/dist/commands/insights.d.ts +1 -1
  88. package/dist/commands/insights.d.ts.map +1 -1
  89. package/dist/commands/insights.js +1 -1
  90. package/dist/commands/insights.js.map +1 -1
  91. package/dist/commands/login.d.ts +1 -1
  92. package/dist/commands/login.d.ts.map +1 -1
  93. package/dist/commands/login.js +1 -1
  94. package/dist/commands/login.js.map +1 -1
  95. package/dist/commands/logs.d.ts +1 -1
  96. package/dist/commands/logs.d.ts.map +1 -1
  97. package/dist/commands/logs.js +1 -1
  98. package/dist/commands/logs.js.map +1 -1
  99. package/dist/commands/mcp.d.ts +1 -1
  100. package/dist/commands/mcp.d.ts.map +1 -1
  101. package/dist/commands/mcp.js +1 -1
  102. package/dist/commands/mcp.js.map +1 -1
  103. package/dist/commands/open.d.ts +1 -1
  104. package/dist/commands/open.d.ts.map +1 -1
  105. package/dist/commands/open.js +1 -1
  106. package/dist/commands/open.js.map +1 -1
  107. package/dist/commands/publish-failure.d.ts +31 -0
  108. package/dist/commands/publish-failure.d.ts.map +1 -0
  109. package/dist/commands/publish-failure.js +150 -0
  110. package/dist/commands/publish-failure.js.map +1 -0
  111. package/dist/commands/publish-node.d.ts +16 -0
  112. package/dist/commands/publish-node.d.ts.map +1 -0
  113. package/dist/commands/publish-node.js +42 -0
  114. package/dist/commands/publish-node.js.map +1 -0
  115. package/dist/commands/publish.d.ts +105 -3
  116. package/dist/commands/publish.d.ts.map +1 -1
  117. package/dist/commands/publish.js +746 -158
  118. package/dist/commands/publish.js.map +1 -1
  119. package/dist/commands/push.d.ts +45 -8
  120. package/dist/commands/push.d.ts.map +1 -1
  121. package/dist/commands/push.js +233 -22
  122. package/dist/commands/push.js.map +1 -1
  123. package/dist/commands/redeploy.d.ts +28 -0
  124. package/dist/commands/redeploy.d.ts.map +1 -0
  125. package/dist/commands/redeploy.js +421 -0
  126. package/dist/commands/redeploy.js.map +1 -0
  127. package/dist/commands/rename.d.ts +1 -1
  128. package/dist/commands/rename.d.ts.map +1 -1
  129. package/dist/commands/rename.js +1 -1
  130. package/dist/commands/rename.js.map +1 -1
  131. package/dist/commands/reproduce.d.ts +64 -0
  132. package/dist/commands/reproduce.d.ts.map +1 -0
  133. package/dist/commands/reproduce.js +211 -0
  134. package/dist/commands/reproduce.js.map +1 -0
  135. package/dist/commands/reset-superuser.d.ts +1 -1
  136. package/dist/commands/reset-superuser.d.ts.map +1 -1
  137. package/dist/commands/reset-superuser.js +1 -1
  138. package/dist/commands/reset-superuser.js.map +1 -1
  139. package/dist/commands/restore.d.ts +79 -0
  140. package/dist/commands/restore.d.ts.map +1 -0
  141. package/dist/commands/restore.js +164 -0
  142. package/dist/commands/restore.js.map +1 -0
  143. package/dist/commands/resume.d.ts +1 -1
  144. package/dist/commands/resume.d.ts.map +1 -1
  145. package/dist/commands/resume.js +1 -1
  146. package/dist/commands/resume.js.map +1 -1
  147. package/dist/commands/rollback.d.ts +21 -8
  148. package/dist/commands/rollback.d.ts.map +1 -1
  149. package/dist/commands/rollback.js +12 -6
  150. package/dist/commands/rollback.js.map +1 -1
  151. package/dist/commands/status.d.ts +33 -0
  152. package/dist/commands/status.d.ts.map +1 -0
  153. package/dist/commands/status.js +48 -0
  154. package/dist/commands/status.js.map +1 -0
  155. package/dist/commands/unsuspend.d.ts +35 -0
  156. package/dist/commands/unsuspend.d.ts.map +1 -0
  157. package/dist/commands/unsuspend.js +27 -0
  158. package/dist/commands/unsuspend.js.map +1 -0
  159. package/dist/commands/versions.d.ts +1 -1
  160. package/dist/commands/versions.d.ts.map +1 -1
  161. package/dist/commands/versions.js +1 -1
  162. package/dist/commands/versions.js.map +1 -1
  163. package/dist/commands/wait-deploy.d.ts +92 -0
  164. package/dist/commands/wait-deploy.d.ts.map +1 -0
  165. package/dist/commands/wait-deploy.js +226 -0
  166. package/dist/commands/wait-deploy.js.map +1 -0
  167. package/dist/env-scan-source.d.ts +39 -0
  168. package/dist/env-scan-source.d.ts.map +1 -0
  169. package/dist/env-scan-source.js +332 -0
  170. package/dist/env-scan-source.js.map +1 -0
  171. package/dist/error-classifier.d.ts.map +1 -1
  172. package/dist/error-classifier.js +67 -4
  173. package/dist/error-classifier.js.map +1 -1
  174. package/dist/errors.d.ts +8 -1
  175. package/dist/errors.d.ts.map +1 -1
  176. package/dist/errors.js +2 -0
  177. package/dist/errors.js.map +1 -1
  178. package/dist/event-renderer.d.ts +17 -0
  179. package/dist/event-renderer.d.ts.map +1 -0
  180. package/dist/event-renderer.js +130 -0
  181. package/dist/event-renderer.js.map +1 -0
  182. package/dist/index.d.ts +16 -1
  183. package/dist/index.d.ts.map +1 -1
  184. package/dist/index.js +15 -0
  185. package/dist/index.js.map +1 -1
  186. package/dist/plans.d.ts +20 -0
  187. package/dist/plans.d.ts.map +1 -1
  188. package/dist/plans.js +15 -0
  189. package/dist/plans.js.map +1 -1
  190. package/dist/poll-deployment.d.ts +59 -0
  191. package/dist/poll-deployment.d.ts.map +1 -0
  192. package/dist/poll-deployment.js +93 -0
  193. package/dist/poll-deployment.js.map +1 -0
  194. package/dist/publish-retry.d.ts +29 -0
  195. package/dist/publish-retry.d.ts.map +1 -0
  196. package/dist/publish-retry.js +224 -0
  197. package/dist/publish-retry.js.map +1 -0
  198. package/dist/recovery.d.ts +356 -0
  199. package/dist/recovery.d.ts.map +1 -0
  200. package/dist/recovery.js +300 -0
  201. package/dist/recovery.js.map +1 -0
  202. package/dist/stream-utils.d.ts +21 -0
  203. package/dist/stream-utils.d.ts.map +1 -0
  204. package/dist/stream-utils.js +41 -0
  205. package/dist/stream-utils.js.map +1 -0
  206. package/dist/structured-error-codes.d.ts +30 -0
  207. package/dist/structured-error-codes.d.ts.map +1 -0
  208. package/dist/structured-error-codes.js +86 -0
  209. package/dist/structured-error-codes.js.map +1 -0
  210. package/dist/tarball.d.ts +11 -0
  211. package/dist/tarball.d.ts.map +1 -1
  212. package/dist/tarball.js +30 -9
  213. package/dist/tarball.js.map +1 -1
  214. package/dist/templates/ai-files/claude-md.d.ts +7 -0
  215. package/dist/templates/ai-files/claude-md.d.ts.map +1 -0
  216. package/dist/templates/ai-files/claude-md.js +78 -0
  217. package/dist/templates/ai-files/claude-md.js.map +1 -0
  218. package/dist/templates/ai-files/cursor-percher-mdc.d.ts +7 -0
  219. package/dist/templates/ai-files/cursor-percher-mdc.d.ts.map +1 -0
  220. package/dist/templates/ai-files/cursor-percher-mdc.js +111 -0
  221. package/dist/templates/ai-files/cursor-percher-mdc.js.map +1 -0
  222. package/dist/templates/ai-files/index.d.ts +8 -0
  223. package/dist/templates/ai-files/index.d.ts.map +1 -0
  224. package/dist/templates/ai-files/index.js +4 -0
  225. package/dist/templates/ai-files/index.js.map +1 -0
  226. package/package.json +6 -5
@@ -1,12 +1,56 @@
1
1
  import { existsSync } from "node:fs";
2
2
  import { join } from "node:path";
3
3
  import { PercherTomlError, parseFile } from "@percher/toml";
4
- import { z } from "zod";
4
+ import { z } from "zod/v3";
5
5
  import { readPercherTomlAppName } from "../app-name";
6
+ import { classifyError } from "../error-classifier";
7
+ import { buildProblemToRecoveryProblem, recoveryAsk, recoveryEnv, recoveryFixConfig, recoveryFixProblems, recoveryInspectBuildLog, recoveryLogin, recoveryNone, recoveryWait, } from "../recovery";
8
+ import { resolveReplaced } from "./wait-deploy";
6
9
  export const doctorInputSchema = z.object({
7
10
  app: z.string().optional(),
8
11
  json: z.boolean().optional(),
12
+ /**
13
+ * FUTURE12 Phase 2b/Codex round 5 fix — accept the dispatch-mode
14
+ * hint emitted by `recoveryDoctor`. Public agent path defaults to
15
+ * `auto`; the other modes are passed through from a previous
16
+ * recovery's `args.mode` (e.g. `mode: "deploy"` after a build
17
+ * failure routes the agent back to doctor with that specific
18
+ * deploy in mind). When `mode` is set, doctor's dispatch breaks
19
+ * the otherwise-infinite `run_doctor` recursion by falling back
20
+ * to the right concrete recovery for the case (build_failed →
21
+ * inspect_build_log, runtime_crashed → ask_user with crash
22
+ * details). Phase 2c will replace those fallbacks with deeper
23
+ * mode-specific analysis (build-log fetch + classification, etc).
24
+ */
25
+ mode: z.enum(["auto", "deploy", "runtime", "config", "env", "account"]).optional(),
26
+ /**
27
+ * FUTURE12 Phase 2b/Codex round 5 fix — accept a specific deploy
28
+ * id to inspect. When set AND the value differs from the app's
29
+ * latest deploy, doctor fetches that specific deploy and uses its
30
+ * status as the dispatch target instead of `lastDeploy` from
31
+ * `/diagnostics`. Pre-fix, Zod silently stripped this field and
32
+ * the agent's targeted `run_doctor` recovery degraded to an
33
+ * untargeted `doctor(app)` call.
34
+ */
35
+ deployId: z.string().optional(),
9
36
  });
37
+ /**
38
+ * Phase 6.2 — exported for tests. Returns true when a deploy is in a
39
+ * pre-terminal state (`queued` or `building`) AND its createdAt is
40
+ * more than `STUCK_DEPLOY_THRESHOLD_MS` in the past. Mirrors the
41
+ * platform-side stale-deploy reconciler's 15-minute window so the
42
+ * user-facing callout fires only once we're sure the stall isn't
43
+ * legitimate slow-progress.
44
+ */
45
+ export const STUCK_DEPLOY_THRESHOLD_MS = 15 * 60 * 1000;
46
+ export function isStuckDeploy(status, createdAt, now = Date.now()) {
47
+ if (status !== "building" && status !== "queued")
48
+ return false;
49
+ const created = Date.parse(createdAt);
50
+ if (Number.isNaN(created))
51
+ return false;
52
+ return now - created > STUCK_DEPLOY_THRESHOLD_MS;
53
+ }
10
54
  export async function doctor(ctx, input = {}) {
11
55
  const checks = [];
12
56
  // 1. Auth token
@@ -17,7 +61,7 @@ export async function doctor(ctx, input = {}) {
17
61
  message: hasToken ? "Token configured" : "No token found. Run: percher login",
18
62
  });
19
63
  if (!hasToken) {
20
- return summarize(checks);
64
+ return summarize(checks, { appProvided: !!input.app });
21
65
  }
22
66
  // 2. API reachability + user account
23
67
  try {
@@ -42,7 +86,7 @@ export async function doctor(ctx, input = {}) {
42
86
  status: "fail",
43
87
  message: `Cannot reach ${ctx.client.apiUrl}: ${err.message}`,
44
88
  });
45
- return summarize(checks);
89
+ return summarize(checks, { appProvided: !!input.app });
46
90
  }
47
91
  // 4. percher.toml — validated *before* app resolution. readPercherTomlAppName
48
92
  // returns null on parse errors AND when the file is missing, so if we waited
@@ -100,11 +144,34 @@ export async function doctor(ctx, input = {}) {
100
144
  ? "percher.toml is invalid — fix the issues above and re-run"
101
145
  : "No app specified and no percher.toml found";
102
146
  checks.push({ name: "App", status: "skip", message: reason });
103
- return summarize(checks);
147
+ return summarize(checks, { appProvided: !!input.app });
104
148
  }
105
149
  // 5. Fetch diagnostics from the API
150
+ let appStatus;
151
+ let lastDeployStatus;
152
+ let lastDeployId;
153
+ let containerState;
154
+ let containerRunning;
155
+ let containerHealthy;
156
+ let publicRouteHealthy;
157
+ let lastCrashSeverity;
158
+ let lastCrashAt;
159
+ let replacedResolution;
160
+ // FUTURE12 Phase 2c — pre-fetched deep-analysis inputs.
161
+ let buildLogClassification;
162
+ let buildProblems;
163
+ let crashReport;
106
164
  try {
107
165
  const diag = await ctx.client.apps.getDiagnostics(appName);
166
+ appStatus = diag.app.status;
167
+ lastDeployStatus = diag.lastDeploy?.status;
168
+ lastDeployId = diag.lastDeploy?.id;
169
+ containerState = diag.container.state;
170
+ containerRunning = diag.container.running;
171
+ containerHealthy = diag.containerHealth?.healthy;
172
+ publicRouteHealthy = diag.publicRoute?.healthy;
173
+ lastCrashSeverity = diag.lastCrash?.severity;
174
+ lastCrashAt = diag.lastCrash?.createdAt;
108
175
  // App status
109
176
  const appOk = diag.app.status === "live";
110
177
  checks.push({
@@ -173,6 +240,19 @@ export async function doctor(ctx, input = {}) {
173
240
  status: deployOk ? "pass" : "warn",
174
241
  message: `${diag.lastDeploy.status} (${diag.lastDeploy.createdAt})`,
175
242
  });
243
+ // Phase 6.2 — stuck-deploy callout. The platform's reconciler
244
+ // self-heals stale `building` rows after 15 minutes (Phase 3 of
245
+ // this same plan), so anything still `building` past that
246
+ // window means the user's local poll lost track AND the
247
+ // platform hasn't yet swept it. Surface a clear next-step so
248
+ // the user doesn't sit waiting on a deploy that won't progress.
249
+ if (isStuckDeploy(diag.lastDeploy.status, diag.lastDeploy.createdAt)) {
250
+ checks.push({
251
+ name: "Deploy stuck",
252
+ status: "warn",
253
+ message: "Last deploy has been in-progress > 15 min — likely platform restart mid-build. Run `percher publish` again; the platform will clear the stuck row within ~2 min.",
254
+ });
255
+ }
176
256
  }
177
257
  // Last crash
178
258
  if (diag.lastCrash) {
@@ -182,6 +262,187 @@ export async function doctor(ctx, input = {}) {
182
262
  message: `Exit code ${diag.lastCrash.exitCode}${diag.lastCrash.oomKilled ? " (OOM killed)" : ""} at ${diag.lastCrash.createdAt}`,
183
263
  });
184
264
  }
265
+ // Targeted deploy override — Codex round 5 P2 fix. When the
266
+ // caller supplied a specific deployId (e.g. echoed back from a
267
+ // previous `recoveryDoctor({ deployId })`), prefer that deploy's
268
+ // status over `diag.lastDeploy` for the verdict. Without this,
269
+ // the agent's targeted recovery degraded to an untargeted
270
+ // doctor(app) call after Zod silently stripped the field.
271
+ if (input.deployId && input.deployId !== diag.lastDeploy?.id) {
272
+ try {
273
+ const targeted = await ctx.client.apps.getDeployment(appName, input.deployId);
274
+ lastDeployStatus = targeted.status;
275
+ lastDeployId = targeted.id;
276
+ checks.push({
277
+ name: `Deploy ${targeted.id}`,
278
+ status: targeted.status === "live" ? "pass" : targeted.status === "failed" ? "fail" : "warn",
279
+ message: `${targeted.status}${targeted.errorMessage ? ` — ${targeted.errorMessage}` : ""}`,
280
+ });
281
+ }
282
+ catch (err) {
283
+ // Codex round 7 P2: clear lastDeployStatus/Id on fetch
284
+ // failure so diag.lastDeploy doesn't leak into the verdict.
285
+ // Without this, an agent asking about a missing/stale deploy
286
+ // (404 from getDeployment) could still receive
287
+ // `replaced_by_newer` for the app's current head when
288
+ // diag.lastDeploy.status happened to be "replaced" — the
289
+ // resolver-gate would fire on the unrelated head deploy and
290
+ // shadow the agent's explicit request. Clearing the deploy
291
+ // fields means: replaced-resolver gate fails, transitional/
292
+ // failed-deploy branches skip, dispatch falls through to
293
+ // case 7 where the failed `Deploy <id>` check we just
294
+ // pushed drives the verdict.
295
+ lastDeployStatus = undefined;
296
+ lastDeployId = undefined;
297
+ checks.push({
298
+ name: `Deploy ${input.deployId}`,
299
+ status: "fail",
300
+ message: `Could not fetch deploy: ${err.message}`,
301
+ });
302
+ }
303
+ }
304
+ // Pre-resolve `replaced` here (deriveVerdict is sync, and
305
+ // resolveReplaced may make an API round-trip via listDeploys).
306
+ // We fetch the App + full Deployment so resolveReplaced has the
307
+ // shape it expects — diag.lastDeploy is a thin projection without
308
+ // the `type` field that resolveReplaced uses to filter the lookup.
309
+ //
310
+ // Codex round 6 P2 fix: resolve against the EFFECTIVE deploy, not
311
+ // diag.lastDeploy. When `input.deployId` is set, the targeted
312
+ // override above replaced lastDeployStatus/Id with the targeted
313
+ // deploy's values; running the resolver against diag.lastDeploy
314
+ // here would let an unrelated `replaced` lastDeploy shadow the
315
+ // agent's specific request (e.g. `mode='deploy', deployId='dep_failed'`
316
+ // would return `replaced_by_newer` for the current live deploy
317
+ // instead of `inspect_build_log` for `dep_failed`). The simple
318
+ // rule: only run resolver if the EFFECTIVE deploy itself is
319
+ // `replaced`.
320
+ if (lastDeployStatus === "replaced" && lastDeployId) {
321
+ try {
322
+ const [app, replacedDeployment] = await Promise.all([
323
+ ctx.client.apps.get(appName),
324
+ ctx.client.apps.getDeployment(appName, lastDeployId),
325
+ ]);
326
+ const resolved = await resolveReplaced({ ctx, app, replacedDeployment });
327
+ replacedResolution = {
328
+ recovery: resolved.recovery,
329
+ url: resolved.url,
330
+ summary: resolved.summary,
331
+ resolvedDeployId: resolved.resolvedDeployment?.id,
332
+ };
333
+ }
334
+ catch (err) {
335
+ // Resolver fetch failed — fall through to the dispatch's
336
+ // generic "replaced but couldn't resolve" path so the verdict
337
+ // still routes the user somewhere actionable.
338
+ replacedResolution = {
339
+ recovery: recoveryAsk({
340
+ reasonCode: "replaced_by_newer",
341
+ prompt: `This deploy was replaced but I couldn't determine the current state (resolver fetch failed: ${err.message}). Run \`percher doctor --app ${appName}\` to inspect, or surface to the user.`,
342
+ }),
343
+ summary: `Couldn't resolve the replaced deploy: ${err.message}`,
344
+ };
345
+ }
346
+ }
347
+ // FUTURE12 Phase 2c — deploy-mode deep analysis. When the agent
348
+ // followed our previous run_doctor recovery (input.mode='deploy')
349
+ // and the targeted/last deploy is failed, fetch the build log
350
+ // and classify it so dispatch can return a specific recovery
351
+ // (set_env_vars / fix_problems) instead of always handing off
352
+ // to inspect_build_log. Pre-fix: doctor was a polite passthrough
353
+ // to percher_deploys_inspect.
354
+ if (input.mode === "deploy" && lastDeployStatus === "failed" && lastDeployId) {
355
+ try {
356
+ // Fetch full deploy (for `errorMessage` + `problems[]`) and
357
+ // build log in parallel — both are inputs to classifyError.
358
+ const [deployment, buildLog] = await Promise.all([
359
+ ctx.client.apps.getDeployment(appName, lastDeployId),
360
+ ctx.client.apps.getBuildLog(appName, lastDeployId).catch((logErr) => {
361
+ // Log fetch can fail transiently (404 if log was pruned,
362
+ // network blip post-retries). classifyError handles
363
+ // missing log; we just lose some signal.
364
+ checks.push({
365
+ name: `Build log ${lastDeployId}`,
366
+ status: "warn",
367
+ message: `Could not fetch build log: ${logErr.message}`,
368
+ });
369
+ return "";
370
+ }),
371
+ ]);
372
+ const errorMessage = deployment.errorMessage ?? "";
373
+ buildLogClassification = classifyError(errorMessage, buildLog);
374
+ buildProblems = deployment.problems;
375
+ }
376
+ catch (err) {
377
+ // The deployment fetch itself failed — surface as a check
378
+ // so the verdict can route via the case-7 ask_user fallback.
379
+ checks.push({
380
+ name: "Deploy classification",
381
+ status: "warn",
382
+ message: `Could not classify deploy ${lastDeployId}: ${err.message}`,
383
+ });
384
+ buildLogClassification = null;
385
+ }
386
+ }
387
+ // FUTURE12 Phase 2c — runtime-mode deep analysis. When the agent
388
+ // followed runtime mode to inspect a crashed app, pull the
389
+ // crash report so dispatch can surface the AI-generated
390
+ // explanation + suggestion in the ask_user prompt.
391
+ //
392
+ // Codex round 8 P2 fix (revised in round 9): mirror
393
+ // diagnose.ts's recency guard so doctor doesn't surface a
394
+ // days-old AI explanation as if it explained today's outage
395
+ // when the app is `live` but the container probe is failing
396
+ // (route blip, post-restart noise, etc).
397
+ //
398
+ // The rule is intentionally simple — round 9 removed an earlier
399
+ // `matchesLastCrash` bypass that was wrong in practice:
400
+ // /diagnostics returns `lastCrash` populated with the latest
401
+ // crash row, and /crash-report also returns that same row, so
402
+ // their timestamps ALWAYS match. The bypass effectively
403
+ // disabled the staleness check whenever any crash had ever
404
+ // happened, which was the entire common case the guard was
405
+ // meant to catch.
406
+ //
407
+ // - app.status === "crashed" → trust the report; this is the
408
+ // active outage and the user's question is about it.
409
+ // - app.status !== "crashed" → only trust a report fresher
410
+ // than 24h. Anything older is treated as no report.
411
+ if (input.mode === "runtime") {
412
+ try {
413
+ const report = await ctx.client.apps.getCrashReport(appName);
414
+ if (!report || appStatus === "crashed") {
415
+ crashReport = report;
416
+ }
417
+ else {
418
+ const recencyMs = 24 * 60 * 60 * 1000;
419
+ const age = Date.now() - new Date(report.createdAt).getTime();
420
+ if (age > recencyMs) {
421
+ // Stale + app isn't currently crashed — drop it so
422
+ // dispatch falls back to the safe "check the crash
423
+ // report" framing instead of confidently pushing an
424
+ // old fix at today's failing route.
425
+ crashReport = null;
426
+ checks.push({
427
+ name: "Crash report",
428
+ status: "warn",
429
+ message: `Latest crash report is older than 24h and the app isn't currently crashed — ignoring as stale.`,
430
+ });
431
+ }
432
+ else {
433
+ crashReport = report;
434
+ }
435
+ }
436
+ }
437
+ catch (err) {
438
+ checks.push({
439
+ name: "Crash report",
440
+ status: "warn",
441
+ message: `Could not fetch crash report: ${err.message}`,
442
+ });
443
+ crashReport = null;
444
+ }
445
+ }
185
446
  }
186
447
  catch (err) {
187
448
  checks.push({
@@ -190,15 +451,566 @@ export async function doctor(ctx, input = {}) {
190
451
  message: err.message,
191
452
  });
192
453
  }
193
- return summarize(checks);
454
+ return summarize(checks, {
455
+ appName,
456
+ appProvided: !!input.app,
457
+ appStatus,
458
+ lastDeployStatus,
459
+ lastDeployId,
460
+ containerState,
461
+ containerRunning,
462
+ containerHealthy,
463
+ publicRouteHealthy,
464
+ lastCrashSeverity: lastCrashSeverity ?? undefined,
465
+ lastCrashAt,
466
+ replacedResolution,
467
+ inputMode: input.mode,
468
+ buildLogClassification,
469
+ buildProblems,
470
+ crashReport,
471
+ });
194
472
  }
195
- function summarize(checks) {
473
+ /**
474
+ * lastDeploy / app.status values that mean "still in motion — agent
475
+ * should wait, not act." `replaced` is intentionally excluded: it's
476
+ * terminal-but-not-failed and gets its own routing in Phase 2b.
477
+ */
478
+ const TRANSITIONAL_DEPLOY_STATUSES = new Set(["queued", "building", "deploying"]);
479
+ const TRANSITIONAL_APP_STATUSES = new Set(["provisioning"]);
480
+ function summarize(checks, vctx = {}) {
481
+ const passed = checks.filter((c) => c.status === "pass").length;
482
+ const failed = checks.filter((c) => c.status === "fail").length;
483
+ const warned = checks.filter((c) => c.status === "warn").length;
484
+ const total = checks.filter((c) => c.status !== "skip").length;
485
+ const verdict = deriveVerdict(checks, vctx);
196
486
  return {
197
487
  checks,
198
- passed: checks.filter((c) => c.status === "pass").length,
199
- failed: checks.filter((c) => c.status === "fail").length,
200
- warned: checks.filter((c) => c.status === "warn").length,
201
- total: checks.filter((c) => c.status !== "skip").length,
488
+ passed,
489
+ failed,
490
+ warned,
491
+ total,
492
+ status: verdict.status,
493
+ diagnosis: verdict.diagnosis,
494
+ recovery: verdict.recovery,
495
+ summary: verdict.summary,
496
+ };
497
+ }
498
+ /**
499
+ * FUTURE12 Phase 2a — pattern-match the check list against the
500
+ * blocking cases doctor handles today. Anything we don't recognise
501
+ * yet falls through to a safe `needs_action` / `ask_user` so an
502
+ * agent doesn't think it can auto-resolve from a generic failure.
503
+ * Phase 2b adds the deploy/build/runtime/replaced-specific paths.
504
+ *
505
+ * Two Codex P2 fixes baked into Phase 2a (review of fc267a3):
506
+ *
507
+ * 1. An invalid cwd `percher.toml` MUST NOT block the verdict when
508
+ * the caller passed `--app` — that target was resolved
509
+ * independently of cwd toml. Pre-fix, `doctor --app foo` against
510
+ * a healthy app would still come back blocked/config_invalid if
511
+ * the local toml happened to be broken.
512
+ * 2. Transitional states (`lastDeploy.status` queued/building/
513
+ * deploying, or `app.status` provisioning) MUST surface as
514
+ * `in_progress` + `wait_deploy`, not the warn-collapsed `ok`.
515
+ * Pre-fix, those states looked fully resolved to MCP agents and
516
+ * the new `in_progress` doctor status was effectively
517
+ * unreachable.
518
+ */
519
+ function deriveVerdict(checks, vctx) {
520
+ const findCheck = (name) => checks.find((c) => c.name === name);
521
+ // Case 1 — auth missing. First check in doctor's flow; if it fails
522
+ // we never reach the other gates, so handle it before anything else.
523
+ const auth = findCheck("Auth token");
524
+ if (auth?.status === "fail") {
525
+ return {
526
+ status: "blocked",
527
+ diagnosis: {
528
+ title: "Authentication required",
529
+ explanation: "No Percher token is configured for this CLI.",
530
+ reasonCode: "auth_required",
531
+ phase: "auth",
532
+ },
533
+ recovery: recoveryLogin({ reasonCode: "auth_required" }),
534
+ summary: "Login required: run `percher login` (or set PERCHER_TOKEN).",
535
+ };
536
+ }
537
+ // Case 2 — API can't be reached. Likely-transient infra problem,
538
+ // but doctor can't see whether it'll recover; surface to the user.
539
+ const api = findCheck("API reachability");
540
+ if (api?.status === "fail") {
541
+ return {
542
+ status: "blocked",
543
+ diagnosis: {
544
+ title: "Percher API is unreachable",
545
+ explanation: api.message,
546
+ reasonCode: "infra_unavailable",
547
+ phase: "infra",
548
+ },
549
+ recovery: recoveryAsk({
550
+ prompt: `Percher API is unreachable (${api.message}). This is usually a transient network issue — wait a moment and try again, or check status.percher.app.`,
551
+ reasonCode: "infra_unavailable",
552
+ retryable: true,
553
+ }),
554
+ summary: "Cannot reach the Percher API — try again in a moment.",
555
+ };
556
+ }
557
+ // Case 3 — percher.toml present but invalid. Only blocks the
558
+ // verdict when we actually depend on it (no `--app` provided).
559
+ // With explicit `--app`, the cwd toml is irrelevant for the
560
+ // resolved target, so a broken local file shouldn't poison a
561
+ // healthy app's verdict. (Codex P2 #1 fix.)
562
+ const toml = findCheck("percher.toml");
563
+ if (toml?.status === "fail" && !vctx.appProvided) {
564
+ return {
565
+ status: "blocked",
566
+ diagnosis: {
567
+ title: "Invalid percher.toml",
568
+ explanation: toml.message,
569
+ reasonCode: "config_invalid",
570
+ phase: "config",
571
+ },
572
+ recovery: recoveryFixConfig({
573
+ problems: [
574
+ {
575
+ file: "percher.toml",
576
+ message: toml.message,
577
+ },
578
+ ],
579
+ reasonCode: "config_invalid",
580
+ }),
581
+ summary: "percher.toml is invalid — fix the issues and re-run.",
582
+ };
583
+ }
584
+ // Case 4 — no app to inspect. The "App" skip-row is set when no
585
+ // --app was passed AND no parseable percher.toml was found in cwd.
586
+ const app = findCheck("App");
587
+ if (app?.status === "skip") {
588
+ return {
589
+ status: "blocked",
590
+ diagnosis: {
591
+ title: "No app specified",
592
+ explanation: app.message,
593
+ reasonCode: "config_missing",
594
+ phase: "config",
595
+ },
596
+ recovery: recoveryAsk({
597
+ prompt: `${app.message} — pass \`--app <name>\` or run \`percher init\` in a project directory to generate a percher.toml.`,
598
+ reasonCode: "config_missing",
599
+ }),
600
+ summary: "No percher.toml found and no --app supplied — pass --app or run percher init.",
601
+ };
602
+ }
603
+ // Case 5 — transitional state. A lastDeploy in queued/building/
604
+ // deploying or an app in provisioning means the right next step
605
+ // is to wait, not to act. Surface as `in_progress` + `wait_deploy`
606
+ // pointing at the live deployId so agents call
607
+ // percher_wait_for_deploy with the right args instead of treating
608
+ // warn-only checks as "ok". (Codex P2 #2 fix.)
609
+ const inTransitionalDeploy = !!vctx.lastDeployStatus && TRANSITIONAL_DEPLOY_STATUSES.has(vctx.lastDeployStatus);
610
+ const inTransitionalApp = !!vctx.appStatus && TRANSITIONAL_APP_STATUSES.has(vctx.appStatus);
611
+ if (inTransitionalDeploy || inTransitionalApp) {
612
+ const reasonCode = vctx.lastDeployStatus === "queued"
613
+ ? "deploy_queued"
614
+ : vctx.lastDeployStatus === "building"
615
+ ? "deploy_building"
616
+ : vctx.lastDeployStatus === "deploying"
617
+ ? "deploy_deploying"
618
+ : "deploy_queued";
619
+ const what = inTransitionalApp
620
+ ? `App ${vctx.appName ?? ""} is provisioning`
621
+ : `Last deploy is ${vctx.lastDeployStatus}`;
622
+ const recovery = vctx.lastDeployId && vctx.appName
623
+ ? recoveryWait({
624
+ app: vctx.appName,
625
+ deployId: vctx.lastDeployId,
626
+ reasonCode,
627
+ })
628
+ : recoveryAsk({
629
+ prompt: `${what.trim()} for ${vctx.appName ?? "this app"}. Wait for it to finish before retrying — there's no deployId to track yet.`,
630
+ reasonCode,
631
+ });
632
+ return {
633
+ status: "in_progress",
634
+ diagnosis: {
635
+ title: what.trim(),
636
+ explanation: vctx.lastDeployId
637
+ ? `${what.trim()} (deploy ${vctx.lastDeployId}). Wait for it to finish before retrying.`
638
+ : `${what.trim()}. Wait for it to finish before retrying.`,
639
+ reasonCode,
640
+ phase: "deploy",
641
+ },
642
+ recovery,
643
+ summary: vctx.lastDeployId
644
+ ? `${what.trim()} — wait for deploy ${vctx.lastDeployId}.`
645
+ : `${what.trim()} — wait a moment and re-run.`,
646
+ };
647
+ }
648
+ // For verdict purposes, drop the cwd `percher.toml` check when
649
+ // the caller passed --app. Without this filter, a `fail` toml
650
+ // would force the all-pass-or-warn check below into the
651
+ // `needs_action` branch (case 7) even though the resolved app is
652
+ // fully healthy.
653
+ const verdictChecks = vctx.appProvided && toml?.status === "fail"
654
+ ? checks.filter((c) => c.name !== "percher.toml")
655
+ : checks;
656
+ // Phase 2b — signal-driven dispatches that MUST run before the
657
+ // happy-path collapse below. The `Last deploy` row is rendered
658
+ // as `warn` for any non-live status (replaced, failed, etc), so
659
+ // case 6's all-pass-or-warn predicate would silently hide a
660
+ // failed or replaced deploy. Likewise, a suspended app may
661
+ // surface only as a stopped container (`fail` row) but the
662
+ // dispatch wants to lead with the suspended explanation, not the
663
+ // generic runtime fix.
664
+ // Replaced lastDeploy. Resolution was performed up-front in
665
+ // `doctor()` (deriveVerdict is sync); we just emit what
666
+ // resolveReplaced computed.
667
+ if (vctx.replacedResolution) {
668
+ const r = vctx.replacedResolution;
669
+ const isResolvedLive = r.recovery.nextAction === "none" && !!r.url;
670
+ if (isResolvedLive) {
671
+ return {
672
+ status: "ok",
673
+ recovery: { ...r.recovery, reasonCode: "replaced_by_newer" },
674
+ summary: r.summary,
675
+ };
676
+ }
677
+ const status = r.recovery.nextAction === "wait_deploy" ? "in_progress" : "needs_action";
678
+ return {
679
+ status,
680
+ diagnosis: {
681
+ title: "Last deploy was replaced",
682
+ explanation: r.summary,
683
+ reasonCode: "replaced_by_newer",
684
+ phase: "deploy",
685
+ },
686
+ recovery: { ...r.recovery, reasonCode: "replaced_by_newer" },
687
+ summary: r.summary,
688
+ };
689
+ }
690
+ // lastDeploy.status === "failed". `failed` renders as `warn` in
691
+ // checks[] but is unambiguously not-ok. Doctor can't classify
692
+ // the failure cause from /diagnostics (the build log lives at a
693
+ // different endpoint), so route back to itself with mode='deploy'
694
+ // and the deployId. Phase 4 will migrate publish/wait to the
695
+ // same recovery so deploy-mode expansion has a single owner of
696
+ // build-log inspection.
697
+ if (vctx.lastDeployStatus === "failed") {
698
+ // Loop-break (Codex round 5 P2): if we're already in mode='deploy'
699
+ // (the agent followed our previous run_doctor recovery), emitting
700
+ // run_doctor again with the same args would loop forever. Fall
701
+ // back to inspect_build_log — the existing low-level path that
702
+ // surfaces the build log to the agent. Phase 2c will replace
703
+ // this with deeper analysis (build-log fetch + classification
704
+ // here in doctor) so the agent gets a `set_env_vars` /
705
+ // `fix_problems` recovery directly.
706
+ if (vctx.inputMode === "deploy") {
707
+ // FUTURE12 Phase 2c — deep analysis. We pre-fetched the build
708
+ // log and ran classifyError in doctor() above; if it produced
709
+ // missing env keys or structured file-located problems, emit
710
+ // a specific recovery the agent can act on directly.
711
+ // Otherwise fall back to inspect_build_log (the agent gets
712
+ // the raw log via percher_deploys_inspect).
713
+ const cls = vctx.buildLogClassification;
714
+ // (a) Missing env keys → recoveryEnv. Most actionable case:
715
+ // agent calls percher_env_set with the exact keys.
716
+ if (cls && cls.errorClass === "missing_env" && cls.missingEnvVars.length > 0) {
717
+ return {
718
+ status: "needs_action",
719
+ diagnosis: {
720
+ title: cls.title,
721
+ explanation: cls.explanation,
722
+ reasonCode: "missing_env",
723
+ phase: "build",
724
+ },
725
+ recovery: recoveryEnv({
726
+ app: vctx.appName,
727
+ keys: cls.missingEnvVars,
728
+ }),
729
+ summary: vctx.appName
730
+ ? `Build failed — missing env vars on ${vctx.appName}: ${cls.missingEnvVars.join(", ")}.`
731
+ : `Build failed — missing env vars: ${cls.missingEnvVars.join(", ")}.`,
732
+ };
733
+ }
734
+ // (b) Structured BuildProblems with file locations → fix_problems.
735
+ // The agent can patch files directly without log archeology.
736
+ // Codex round 8 P3 fix: route through
737
+ // `buildProblemToRecoveryProblem` so `BuildProblem.hint` is
738
+ // folded into the message — agents and CLI got `Hint: <text>`
739
+ // appended automatically (matters for problems where the hint
740
+ // carries the actionable next step, e.g. malformed
741
+ // package.json with no line/column).
742
+ const fileProblems = (vctx.buildProblems ?? [])
743
+ .filter((p) => p.file)
744
+ .map(buildProblemToRecoveryProblem);
745
+ if (fileProblems.length > 0) {
746
+ return {
747
+ status: "needs_action",
748
+ diagnosis: {
749
+ title: cls?.title ?? "Build failed with file-located problems",
750
+ explanation: cls?.explanation ??
751
+ `Build extracted ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} with file locations. Patch the files directly.`,
752
+ reasonCode: "build_failed",
753
+ phase: "build",
754
+ },
755
+ recovery: recoveryFixProblems({
756
+ problems: fileProblems,
757
+ reasonCode: "build_failed",
758
+ }),
759
+ summary: vctx.lastDeployId
760
+ ? `Deploy ${vctx.lastDeployId} failed — ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} to patch.`
761
+ : `Build failed — ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} to patch.`,
762
+ };
763
+ }
764
+ // (c) Fallback: classified but unactionable, or unclassified.
765
+ // Hand off to inspect_build_log so the agent can read the raw
766
+ // log. This is also the no-classification path (cls === null).
767
+ return {
768
+ status: "needs_action",
769
+ diagnosis: {
770
+ title: cls?.title ?? "Last deploy failed",
771
+ explanation: cls
772
+ ? `${cls.explanation} Doctor couldn't extract a structured fix; fall back to the build log.`
773
+ : vctx.lastDeployId
774
+ ? `Deploy ${vctx.lastDeployId} terminated in failed state. Doctor couldn't classify the cause from the build log; inspect it directly.`
775
+ : "The most recent deploy ended in failed state. Doctor couldn't classify the cause; inspect the build log.",
776
+ reasonCode: "build_failed",
777
+ phase: "build",
778
+ },
779
+ recovery: recoveryInspectBuildLog({
780
+ deployId: vctx.lastDeployId,
781
+ app: vctx.appName,
782
+ reasonCode: "build_failed",
783
+ }),
784
+ summary: vctx.lastDeployId
785
+ ? `Deploy ${vctx.lastDeployId} failed — inspect the build log.`
786
+ : "Last deploy failed — inspect the latest failed deploy's build log.",
787
+ };
788
+ }
789
+ return {
790
+ status: "needs_action",
791
+ diagnosis: {
792
+ title: "Last deploy failed",
793
+ explanation: vctx.lastDeployId
794
+ ? `Deploy ${vctx.lastDeployId} terminated in failed state. Inspect the build log to find the cause.`
795
+ : "The most recent deploy ended in failed state. Inspect the build log to find the cause.",
796
+ reasonCode: "build_failed",
797
+ phase: "build",
798
+ },
799
+ recovery: {
800
+ retryable: false,
801
+ nextAction: "run_doctor",
802
+ suggestedTool: "percher_doctor",
803
+ args: {
804
+ app: vctx.appName,
805
+ mode: "deploy",
806
+ ...(vctx.lastDeployId ? { deployId: vctx.lastDeployId } : {}),
807
+ },
808
+ reasonCode: "build_failed",
809
+ },
810
+ summary: vctx.lastDeployId
811
+ ? `Deploy ${vctx.lastDeployId} failed — call percher_doctor with mode='deploy'.`
812
+ : "Last deploy failed — call percher_doctor with mode='deploy'.",
813
+ };
814
+ }
815
+ // App suspended. Suspension reason isn't on /diagnostics
816
+ // (lives on the App row's `suspensionReason` /
817
+ // `suspensionOrigin`), so doctor surfaces to the user with a
818
+ // concrete prompt. The reasonCode `quota_exceeded` is a proxy —
819
+ // most owner-resumable suspensions are quota; admin/moderation
820
+ // suspensions would route differently if doctor had access to
821
+ // suspensionOrigin (Phase 6 can plumb that through if needed).
822
+ if (vctx.appStatus === "suspended") {
823
+ return {
824
+ status: "needs_action",
825
+ diagnosis: {
826
+ title: "App is suspended",
827
+ explanation: vctx.appName
828
+ ? `${vctx.appName} is currently suspended. Owner-initiated and quota suspensions can be resumed from the dashboard or via the API.`
829
+ : "This app is currently suspended.",
830
+ reasonCode: "quota_exceeded",
831
+ phase: "infra",
832
+ },
833
+ recovery: recoveryAsk({
834
+ reasonCode: "quota_exceeded",
835
+ prompt: vctx.appName
836
+ ? `${vctx.appName} is suspended. Resume the app from the dashboard (or call the unsuspend API) before retrying.`
837
+ : "This app is suspended. Resume it from the dashboard before retrying.",
838
+ }),
839
+ summary: vctx.appName
840
+ ? `${vctx.appName} is suspended — resume the app before retrying.`
841
+ : "App is suspended — resume it before retrying.",
842
+ };
843
+ }
844
+ // Case 6 — happy path. All app-level checks passed (warns are
845
+ // informational, not blocking).
846
+ if (verdictChecks.every((c) => c.status === "pass" || c.status === "skip" || c.status === "warn")) {
847
+ const passing = verdictChecks.filter((c) => c.status === "pass").length;
848
+ const noun = passing === 1 ? "check" : "checks";
849
+ return {
850
+ status: "ok",
851
+ recovery: recoveryNone({ reasonCode: "none" }),
852
+ summary: vctx.appName
853
+ ? `All ${passing} ${noun} passed for ${vctx.appName}.`
854
+ : `All ${passing} ${noun} passed.`,
855
+ };
856
+ }
857
+ // Case 7 — at least one app-level check failed. Phase 2b refines
858
+ // the previous catch-all `ask_user`/`unknown` into specific
859
+ // dispatches based on structured signals from `/diagnostics`.
860
+ // The signal-driven cases that case 6 would otherwise collapse
861
+ // (replaced/failed lastDeploy, suspended app) are handled above
862
+ // case 6. The remaining checks-driven cases run here:
863
+ // crashed/runtime-down → public-route blip → genuinely unknown.
864
+ // 7a. Runtime crashed. App marked crashed, OR container exited /
865
+ // not running (without a clearer signal), OR direct container-
866
+ // health probe is failing. All three converge on the same
867
+ // recovery: hand back to doctor with `mode: "runtime"` so the
868
+ // runtime-focused expansion (Phase 2 step 9) can take it from
869
+ // here. Self-recursion via mode hint is the explicit Phase 2
870
+ // contract — input mode disambiguates the dispatch.
871
+ const containerDown = (vctx.containerRunning === false &&
872
+ vctx.containerState !== undefined &&
873
+ vctx.containerState !== "not-found") ||
874
+ vctx.containerHealthy === false;
875
+ if (vctx.appStatus === "crashed" || containerDown) {
876
+ const containerCheck = findCheck("Container");
877
+ const healthCheck = findCheck("Container health");
878
+ const lastCrashCheck = findCheck("Last crash");
879
+ const explanationBits = [];
880
+ if (vctx.appStatus === "crashed")
881
+ explanationBits.push("App is in crashed state.");
882
+ if (containerCheck?.status === "fail")
883
+ explanationBits.push(containerCheck.message);
884
+ if (healthCheck?.status === "fail")
885
+ explanationBits.push(healthCheck.message);
886
+ if (lastCrashCheck)
887
+ explanationBits.push(lastCrashCheck.message);
888
+ const explanation = explanationBits.length > 0
889
+ ? explanationBits.join(" ")
890
+ : "Runtime is not responding — container or health check is failing.";
891
+ // Loop-break (Codex round 5 P2): if we're already in
892
+ // mode='runtime', emit a concrete ask_user with the crash
893
+ // details rather than recursing into ourselves. Phase 2c will
894
+ // replace this with crash-report fetch + classification (the
895
+ // crash-handler watchdog already produces structured AI-generated
896
+ // explanations — doctor just needs to surface them here).
897
+ if (vctx.inputMode === "runtime") {
898
+ // FUTURE12 Phase 2c — surface the crash report's
899
+ // AI-generated explanation + suggestion in the prompt when
900
+ // available. The watchdog/crash-handler already produces
901
+ // these via /apps/:app/crash-report; doctor just needs to
902
+ // pull them through.
903
+ const cr = vctx.crashReport;
904
+ const hasAnalysis = cr && cr.analysisStatus === "completed";
905
+ const promptParts = [];
906
+ if (hasAnalysis && cr.explanation) {
907
+ promptParts.push(`Crash explanation: ${cr.explanation}`);
908
+ }
909
+ if (hasAnalysis && cr.suggestion) {
910
+ promptParts.push(`Suggested fix: ${cr.suggestion}`);
911
+ }
912
+ if (cr) {
913
+ const exitInfo = cr.oomKilled ? `exit ${cr.exitCode} (OOM killed)` : `exit ${cr.exitCode}`;
914
+ promptParts.push(`Container ${exitInfo} at ${cr.createdAt}.`);
915
+ }
916
+ // Always include the basic explanation so the user has
917
+ // something to read even if no crash report exists.
918
+ promptParts.push(explanation);
919
+ promptParts.push(vctx.appName
920
+ ? `Decide whether to redeploy, raise the memory plan, or fix the code. Use percher_diagnose_crash for ${vctx.appName} if you need the full log tail.`
921
+ : "Decide whether to redeploy, raise the memory plan, or fix the code.");
922
+ return {
923
+ status: "needs_action",
924
+ diagnosis: {
925
+ title: vctx.appStatus === "crashed" ? "App crashed" : "Runtime not responding",
926
+ explanation: hasAnalysis && cr.explanation ? `${cr.explanation} ${explanation}` : explanation,
927
+ reasonCode: "runtime_crashed",
928
+ phase: "runtime",
929
+ },
930
+ recovery: recoveryAsk({
931
+ reasonCode: "runtime_crashed",
932
+ prompt: promptParts.join(" "),
933
+ options: cr?.severity === "critical" ? ["redeploy", "fix code", "upgrade plan"] : undefined,
934
+ }),
935
+ summary: vctx.appName
936
+ ? hasAnalysis
937
+ ? `${vctx.appName} crashed — ${cr.suggestion ?? cr.explanation ?? "surface the crash details to the user."}`
938
+ : `${vctx.appName} runtime is unhealthy — surface the crash details to the user.`
939
+ : "Runtime is unhealthy — surface the crash details to the user.",
940
+ };
941
+ }
942
+ return {
943
+ status: "needs_action",
944
+ diagnosis: {
945
+ title: vctx.appStatus === "crashed" ? "App crashed" : "Runtime not responding",
946
+ explanation,
947
+ reasonCode: "runtime_crashed",
948
+ phase: "runtime",
949
+ },
950
+ recovery: {
951
+ retryable: false,
952
+ nextAction: "run_doctor",
953
+ suggestedTool: "percher_doctor",
954
+ args: { app: vctx.appName, mode: "runtime" },
955
+ reasonCode: "runtime_crashed",
956
+ },
957
+ summary: vctx.appName
958
+ ? `${vctx.appName} runtime needs investigation — call percher_doctor with mode='runtime'.`
959
+ : "Runtime needs investigation — call percher_doctor with mode='runtime'.",
960
+ };
961
+ }
962
+ // 7b. Public route is the only thing failing — container is up
963
+ // and healthy, but the external probe via Caddy/TLS/DNS isn't
964
+ // responding. This is usually a transient route-reconcile blip
965
+ // that self-heals; recommend `retry` so the agent re-runs doctor
966
+ // (or the user retries publish) rather than asking the user to
967
+ // act manually.
968
+ if (vctx.publicRouteHealthy === false) {
969
+ const routeCheck = findCheck("Public route");
970
+ return {
971
+ status: "needs_action",
972
+ diagnosis: {
973
+ title: "Public route is not responding",
974
+ explanation: routeCheck?.message ??
975
+ "Container looks healthy but the public URL isn't responding — route reconcile usually self-heals.",
976
+ reasonCode: "infra_transient",
977
+ phase: "infra",
978
+ },
979
+ recovery: {
980
+ retryable: true,
981
+ nextAction: "retry",
982
+ suggestedTool: "percher_doctor",
983
+ args: { app: vctx.appName },
984
+ reasonCode: "infra_transient",
985
+ },
986
+ summary: vctx.appName
987
+ ? `${vctx.appName} public route is failing — likely transient, re-run doctor in a moment.`
988
+ : "Public route is failing — likely transient, re-run doctor in a moment.",
989
+ };
990
+ }
991
+ // 7c. Genuinely unknown — none of the structured signals above
992
+ // matched. Keep the safe fallback so an agent doesn't think it
993
+ // can auto-resolve. This is what Phase 2a shipped; Phase 2b only
994
+ // narrows the surface that lands here.
995
+ const firstFailed = verdictChecks.find((c) => c.status === "fail");
996
+ return {
997
+ status: "needs_action",
998
+ diagnosis: firstFailed
999
+ ? {
1000
+ title: firstFailed.name,
1001
+ explanation: firstFailed.message,
1002
+ reasonCode: "unknown",
1003
+ }
1004
+ : undefined,
1005
+ recovery: recoveryAsk({
1006
+ prompt: firstFailed
1007
+ ? `Doctor flagged a problem: ${firstFailed.name} — ${firstFailed.message}. Review the checks list and surface the failing item to the user.`
1008
+ : "Doctor reported one or more issues. Review the checks list and surface the failing items to the user.",
1009
+ reasonCode: "unknown",
1010
+ }),
1011
+ summary: firstFailed
1012
+ ? `${firstFailed.name} failed: ${firstFailed.message}`
1013
+ : "Doctor reported issues — review the checks list.",
202
1014
  };
203
1015
  }
204
1016
  //# sourceMappingURL=doctor.js.map