@percher/core 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/commands/cache.d.ts +23 -0
  2. package/dist/commands/cache.d.ts.map +1 -0
  3. package/dist/commands/cache.js +24 -0
  4. package/dist/commands/cache.js.map +1 -0
  5. package/dist/commands/create.d.ts.map +1 -1
  6. package/dist/commands/create.js +1 -1
  7. package/dist/commands/create.js.map +1 -1
  8. package/dist/commands/dashboard.d.ts.map +1 -1
  9. package/dist/commands/dashboard.js +12 -1
  10. package/dist/commands/dashboard.js.map +1 -1
  11. package/dist/commands/doctor.d.ts +126 -0
  12. package/dist/commands/doctor.d.ts.map +1 -1
  13. package/dist/commands/doctor.js +445 -313
  14. package/dist/commands/doctor.js.map +1 -1
  15. package/dist/commands/preview-branch.d.ts +31 -0
  16. package/dist/commands/preview-branch.d.ts.map +1 -0
  17. package/dist/commands/preview-branch.js +55 -0
  18. package/dist/commands/preview-branch.js.map +1 -0
  19. package/dist/commands/publish-api-error.d.ts +15 -0
  20. package/dist/commands/publish-api-error.d.ts.map +1 -1
  21. package/dist/commands/publish-api-error.js +155 -6
  22. package/dist/commands/publish-api-error.js.map +1 -1
  23. package/dist/commands/publish-failure.d.ts +3 -1
  24. package/dist/commands/publish-failure.d.ts.map +1 -1
  25. package/dist/commands/publish-failure.js +11 -7
  26. package/dist/commands/publish-failure.js.map +1 -1
  27. package/dist/commands/publish.d.ts.map +1 -1
  28. package/dist/commands/publish.js +62 -328
  29. package/dist/commands/publish.js.map +1 -1
  30. package/dist/commands/push.d.ts.map +1 -1
  31. package/dist/commands/push.js +4 -4
  32. package/dist/commands/push.js.map +1 -1
  33. package/dist/commands/redeploy.js +3 -3
  34. package/dist/commands/redeploy.js.map +1 -1
  35. package/dist/commands/rename.d.ts +7 -0
  36. package/dist/commands/rename.d.ts.map +1 -1
  37. package/dist/commands/rename.js +32 -1
  38. package/dist/commands/rename.js.map +1 -1
  39. package/dist/commands/wait-deploy.d.ts +12 -2
  40. package/dist/commands/wait-deploy.d.ts.map +1 -1
  41. package/dist/commands/wait-deploy.js +115 -46
  42. package/dist/commands/wait-deploy.js.map +1 -1
  43. package/dist/errors.d.ts +1 -1
  44. package/dist/errors.d.ts.map +1 -1
  45. package/dist/errors.js.map +1 -1
  46. package/dist/index.d.ts +2 -0
  47. package/dist/index.d.ts.map +1 -1
  48. package/dist/index.js +2 -0
  49. package/dist/index.js.map +1 -1
  50. package/dist/poll-deployment.d.ts +3 -3
  51. package/dist/poll-deployment.d.ts.map +1 -1
  52. package/dist/poll-deployment.js +5 -4
  53. package/dist/poll-deployment.js.map +1 -1
  54. package/dist/publish-retry.d.ts.map +1 -1
  55. package/dist/publish-retry.js +2 -3
  56. package/dist/publish-retry.js.map +1 -1
  57. package/package.json +4 -4
  58. package/dist/commands/continue.d.ts +0 -48
  59. package/dist/commands/continue.d.ts.map +0 -1
  60. package/dist/commands/continue.js +0 -121
  61. package/dist/commands/continue.js.map +0 -1
@@ -189,23 +189,27 @@ export async function doctor(ctx, input = {}) {
189
189
  ? "No container found"
190
190
  : `State: ${diag.container.state}`,
191
191
  });
192
- // Container health — direct HTTP to container
192
+ // Container health — direct HTTP to container. The API omits
193
+ // latencyMs when it has no fresh probe sample — skip the suffix
194
+ // rather than rendering "(nullms)".
193
195
  if (diag.containerHealth) {
196
+ const latency = diag.containerHealth.latencyMs != null ? ` (${diag.containerHealth.latencyMs}ms)` : "";
194
197
  checks.push({
195
198
  name: "Container health",
196
199
  status: diag.containerHealth.healthy ? "pass" : "fail",
197
200
  message: diag.containerHealth.healthy
198
- ? `${diag.containerHealth.path} OK (${diag.containerHealth.latencyMs}ms)`
201
+ ? `${diag.containerHealth.path} OK${latency}`
199
202
  : `${diag.containerHealth.path} — not responding`,
200
203
  });
201
204
  }
202
205
  // Public route — tests actual Caddy/TLS/DNS path
203
206
  if (diag.publicRoute) {
207
+ const latency = diag.publicRoute.latencyMs != null ? ` (${diag.publicRoute.latencyMs}ms)` : "";
204
208
  checks.push({
205
209
  name: "Public route",
206
210
  status: diag.publicRoute.healthy ? "pass" : "fail",
207
211
  message: diag.publicRoute.healthy
208
- ? `${diag.publicRoute.url} (${diag.publicRoute.latencyMs}ms)`
212
+ ? `${diag.publicRoute.url}${latency}`
209
213
  : `${diag.publicRoute.url} — not responding`,
210
214
  });
211
215
  }
@@ -495,7 +499,84 @@ function summarize(checks, vctx = {}) {
495
499
  summary: verdict.summary,
496
500
  };
497
501
  }
502
+ export function buildVerdictRuleContext(checks, vctx) {
503
+ const findCheck = (name) => checks.find((c) => c.name === name);
504
+ // For verdict purposes, drop the cwd `percher.toml` check when
505
+ // the caller passed --app. Without this filter, a `fail` toml
506
+ // would force the all-pass-or-warn check below into the
507
+ // `needs_action` branch (case 7) even though the resolved app is
508
+ // fully healthy.
509
+ const toml = findCheck("percher.toml");
510
+ const verdictChecks = vctx.appProvided && toml?.status === "fail"
511
+ ? checks.filter((c) => c.name !== "percher.toml")
512
+ : checks;
513
+ const containerDown = (vctx.containerRunning === false &&
514
+ vctx.containerState !== undefined &&
515
+ vctx.containerState !== "not-found") ||
516
+ vctx.containerHealthy === false;
517
+ return { checks, vctx, findCheck, verdictChecks, containerDown };
518
+ }
498
519
  /**
520
+ * Shared by the two runtime-crashed rules — assembles the human
521
+ * explanation from the failing container/health/crash checks.
522
+ */
523
+ function runtimeCrashExplanation({ vctx, findCheck }) {
524
+ const containerCheck = findCheck("Container");
525
+ const healthCheck = findCheck("Container health");
526
+ const lastCrashCheck = findCheck("Last crash");
527
+ const explanationBits = [];
528
+ if (vctx.appStatus === "crashed")
529
+ explanationBits.push("App is in crashed state.");
530
+ if (containerCheck?.status === "fail")
531
+ explanationBits.push(containerCheck.message);
532
+ if (healthCheck?.status === "fail")
533
+ explanationBits.push(healthCheck.message);
534
+ if (lastCrashCheck)
535
+ explanationBits.push(lastCrashCheck.message);
536
+ return explanationBits.length > 0
537
+ ? explanationBits.join(" ")
538
+ : "Runtime is not responding — container or health check is failing.";
539
+ }
540
+ // 7c. Genuinely unknown — none of the structured signals above
541
+ // matched. Keep the safe fallback so an agent doesn't think it
542
+ // can auto-resolve. This is what Phase 2a shipped; Phase 2b only
543
+ // narrows the surface that lands here.
544
+ //
545
+ // Defined as a named const (and appended as the table's last rule)
546
+ // so narrowing-guard verdicts and `deriveVerdict` can delegate to it.
547
+ const FALLBACK_RULE = {
548
+ id: "unknown-fallback",
549
+ when: () => true,
550
+ verdict: ({ verdictChecks }) => {
551
+ const firstFailed = verdictChecks.find((c) => c.status === "fail");
552
+ return {
553
+ status: "needs_action",
554
+ diagnosis: firstFailed
555
+ ? {
556
+ title: firstFailed.name,
557
+ explanation: firstFailed.message,
558
+ reasonCode: "unknown",
559
+ }
560
+ : undefined,
561
+ recovery: recoveryAsk({
562
+ prompt: firstFailed
563
+ ? `Doctor flagged a problem: ${firstFailed.name} — ${firstFailed.message}. Review the checks list and surface the failing item to the user.`
564
+ : "Doctor reported one or more issues. Review the checks list and surface the failing items to the user.",
565
+ reasonCode: "unknown",
566
+ }),
567
+ summary: firstFailed
568
+ ? `${firstFailed.name} failed: ${firstFailed.message}`
569
+ : "Doctor reported issues — review the checks list.",
570
+ };
571
+ },
572
+ };
573
+ /**
574
+ * Verdict rule table — evaluated top-to-bottom, FIRST MATCH WINS.
575
+ * Array order is load-bearing: earlier rules shadow later ones, and
576
+ * several rules are only correct because a more specific rule sits
577
+ * above them. Each rule carries its ordering rationale as a comment;
578
+ * insert new rules at a justified position, never just append.
579
+ *
499
580
  * FUTURE12 Phase 2a — pattern-match the check list against the
500
581
  * blocking cases doctor handles today. Anything we don't recognise
501
582
  * yet falls through to a safe `needs_action` / `ask_user` so an
@@ -516,13 +597,13 @@ function summarize(checks, vctx = {}) {
516
597
  * the new `in_progress` doctor status was effectively
517
598
  * unreachable.
518
599
  */
519
- function deriveVerdict(checks, vctx) {
520
- const findCheck = (name) => checks.find((c) => c.name === name);
600
+ export const VERDICT_RULES = [
521
601
  // Case 1 — auth missing. First check in doctor's flow; if it fails
522
602
  // we never reach the other gates, so handle it before anything else.
523
- const auth = findCheck("Auth token");
524
- if (auth?.status === "fail") {
525
- return {
603
+ {
604
+ id: "auth-missing",
605
+ when: ({ findCheck }) => findCheck("Auth token")?.status === "fail",
606
+ verdict: () => ({
526
607
  status: "blocked",
527
608
  diagnosis: {
528
609
  title: "Authentication required",
@@ -532,127 +613,138 @@ function deriveVerdict(checks, vctx) {
532
613
  },
533
614
  recovery: recoveryLogin({ reasonCode: "auth_required" }),
534
615
  summary: "Login required: run `percher login` (or set PERCHER_TOKEN).",
535
- };
536
- }
616
+ }),
617
+ },
537
618
  // Case 2 — API can't be reached. Likely-transient infra problem,
538
619
  // but doctor can't see whether it'll recover; surface to the user.
539
- const api = findCheck("API reachability");
540
- if (api?.status === "fail") {
541
- return {
542
- status: "blocked",
543
- diagnosis: {
544
- title: "Percher API is unreachable",
545
- explanation: api.message,
546
- reasonCode: "infra_unavailable",
547
- phase: "infra",
548
- },
549
- recovery: recoveryAsk({
550
- prompt: `Percher API is unreachable (${api.message}). This is usually a transient network issue — wait a moment and try again, or check status.percher.app.`,
551
- reasonCode: "infra_unavailable",
552
- retryable: true,
553
- }),
554
- summary: "Cannot reach the Percher API — try again in a moment.",
555
- };
556
- }
620
+ {
621
+ id: "api-unreachable",
622
+ when: ({ findCheck }) => findCheck("API reachability")?.status === "fail",
623
+ verdict: ({ findCheck }) => {
624
+ const message = findCheck("API reachability")?.message ?? "";
625
+ return {
626
+ status: "blocked",
627
+ diagnosis: {
628
+ title: "Percher API is unreachable",
629
+ explanation: message,
630
+ reasonCode: "infra_unavailable",
631
+ phase: "infra",
632
+ },
633
+ recovery: recoveryAsk({
634
+ prompt: `Percher API is unreachable (${message}). This is usually a transient network issue — wait a moment and try again, or check status.percher.app.`,
635
+ reasonCode: "infra_unavailable",
636
+ retryable: true,
637
+ }),
638
+ summary: "Cannot reach the Percher API — try again in a moment.",
639
+ };
640
+ },
641
+ },
557
642
  // Case 3 — percher.toml present but invalid. Only blocks the
558
643
  // verdict when we actually depend on it (no `--app` provided).
559
644
  // With explicit `--app`, the cwd toml is irrelevant for the
560
645
  // resolved target, so a broken local file shouldn't poison a
561
646
  // healthy app's verdict. (Codex P2 #1 fix.)
562
- const toml = findCheck("percher.toml");
563
- if (toml?.status === "fail" && !vctx.appProvided) {
564
- return {
565
- status: "blocked",
566
- diagnosis: {
567
- title: "Invalid percher.toml",
568
- explanation: toml.message,
569
- reasonCode: "config_invalid",
570
- phase: "config",
571
- },
572
- recovery: recoveryFixConfig({
573
- problems: [
574
- {
575
- file: "percher.toml",
576
- message: toml.message,
577
- },
578
- ],
579
- reasonCode: "config_invalid",
580
- }),
581
- summary: "percher.toml is invalid — fix the issues and re-run.",
582
- };
583
- }
647
+ {
648
+ id: "toml-invalid",
649
+ when: ({ vctx, findCheck }) => findCheck("percher.toml")?.status === "fail" && !vctx.appProvided,
650
+ verdict: ({ findCheck }) => {
651
+ const message = findCheck("percher.toml")?.message ?? "";
652
+ return {
653
+ status: "blocked",
654
+ diagnosis: {
655
+ title: "Invalid percher.toml",
656
+ explanation: message,
657
+ reasonCode: "config_invalid",
658
+ phase: "config",
659
+ },
660
+ recovery: recoveryFixConfig({
661
+ problems: [
662
+ {
663
+ file: "percher.toml",
664
+ message,
665
+ },
666
+ ],
667
+ reasonCode: "config_invalid",
668
+ }),
669
+ summary: "percher.toml is invalid — fix the issues and re-run.",
670
+ };
671
+ },
672
+ },
584
673
  // Case 4 — no app to inspect. The "App" skip-row is set when no
585
674
  // --app was passed AND no parseable percher.toml was found in cwd.
586
- const app = findCheck("App");
587
- if (app?.status === "skip") {
588
- return {
589
- status: "blocked",
590
- diagnosis: {
591
- title: "No app specified",
592
- explanation: app.message,
593
- reasonCode: "config_missing",
594
- phase: "config",
595
- },
596
- recovery: recoveryAsk({
597
- prompt: `${app.message} — pass \`--app <name>\` or run \`percher init\` in a project directory to generate a percher.toml.`,
598
- reasonCode: "config_missing",
599
- }),
600
- summary: "No percher.toml found and no --app supplied — pass --app or run percher init.",
601
- };
602
- }
675
+ {
676
+ id: "app-unresolved",
677
+ when: ({ findCheck }) => findCheck("App")?.status === "skip",
678
+ verdict: ({ findCheck }) => {
679
+ const message = findCheck("App")?.message ?? "";
680
+ return {
681
+ status: "blocked",
682
+ diagnosis: {
683
+ title: "No app specified",
684
+ explanation: message,
685
+ reasonCode: "config_missing",
686
+ phase: "config",
687
+ },
688
+ recovery: recoveryAsk({
689
+ prompt: `${message} — pass \`--app <name>\` or run \`percher init\` in a project directory to generate a percher.toml.`,
690
+ reasonCode: "config_missing",
691
+ }),
692
+ summary: "No percher.toml found and no --app supplied — pass --app or run percher init.",
693
+ };
694
+ },
695
+ },
603
696
  // Case 5 — transitional state. A lastDeploy in queued/building/
604
697
  // deploying or an app in provisioning means the right next step
605
698
  // is to wait, not to act. Surface as `in_progress` + `wait_deploy`
606
699
  // pointing at the live deployId so agents call
607
700
  // percher_wait_for_deploy with the right args instead of treating
608
701
  // warn-only checks as "ok". (Codex P2 #2 fix.)
609
- const inTransitionalDeploy = !!vctx.lastDeployStatus && TRANSITIONAL_DEPLOY_STATUSES.has(vctx.lastDeployStatus);
610
- const inTransitionalApp = !!vctx.appStatus && TRANSITIONAL_APP_STATUSES.has(vctx.appStatus);
611
- if (inTransitionalDeploy || inTransitionalApp) {
612
- const reasonCode = vctx.lastDeployStatus === "queued"
613
- ? "deploy_queued"
614
- : vctx.lastDeployStatus === "building"
615
- ? "deploy_building"
616
- : vctx.lastDeployStatus === "deploying"
617
- ? "deploy_deploying"
618
- : "deploy_queued";
619
- const what = inTransitionalApp
620
- ? `App ${vctx.appName ?? ""} is provisioning`
621
- : `Last deploy is ${vctx.lastDeployStatus}`;
622
- const recovery = vctx.lastDeployId && vctx.appName
623
- ? recoveryWait({
624
- app: vctx.appName,
625
- deployId: vctx.lastDeployId,
626
- reasonCode,
627
- })
628
- : recoveryAsk({
629
- prompt: `${what.trim()} for ${vctx.appName ?? "this app"}. Wait for it to finish before retrying — there's no deployId to track yet.`,
630
- reasonCode,
631
- });
632
- return {
633
- status: "in_progress",
634
- diagnosis: {
635
- title: what.trim(),
636
- explanation: vctx.lastDeployId
637
- ? `${what.trim()} (deploy ${vctx.lastDeployId}). Wait for it to finish before retrying.`
638
- : `${what.trim()}. Wait for it to finish before retrying.`,
639
- reasonCode,
640
- phase: "deploy",
641
- },
642
- recovery,
643
- summary: vctx.lastDeployId
644
- ? `${what.trim()} wait for deploy ${vctx.lastDeployId}.`
645
- : `${what.trim()} — wait a moment and re-run.`,
646
- };
647
- }
648
- // For verdict purposes, drop the cwd `percher.toml` check when
649
- // the caller passed --app. Without this filter, a `fail` toml
650
- // would force the all-pass-or-warn check below into the
651
- // `needs_action` branch (case 7) even though the resolved app is
652
- // fully healthy.
653
- const verdictChecks = vctx.appProvided && toml?.status === "fail"
654
- ? checks.filter((c) => c.name !== "percher.toml")
655
- : checks;
702
+ //
703
+ // Sits above the replaced/failed rules: a provisioning app with a
704
+ // stale terminal lastDeploy should wait, not act.
705
+ {
706
+ id: "deploy-transitional",
707
+ when: ({ vctx }) => (!!vctx.lastDeployStatus && TRANSITIONAL_DEPLOY_STATUSES.has(vctx.lastDeployStatus)) ||
708
+ (!!vctx.appStatus && TRANSITIONAL_APP_STATUSES.has(vctx.appStatus)),
709
+ verdict: ({ vctx }) => {
710
+ const inTransitionalApp = !!vctx.appStatus && TRANSITIONAL_APP_STATUSES.has(vctx.appStatus);
711
+ const reasonCode = vctx.lastDeployStatus === "queued"
712
+ ? "deploy_queued"
713
+ : vctx.lastDeployStatus === "building"
714
+ ? "deploy_building"
715
+ : vctx.lastDeployStatus === "deploying"
716
+ ? "deploy_deploying"
717
+ : "deploy_queued";
718
+ const what = inTransitionalApp
719
+ ? `App ${vctx.appName ?? ""} is provisioning`
720
+ : `Last deploy is ${vctx.lastDeployStatus}`;
721
+ const recovery = vctx.lastDeployId && vctx.appName
722
+ ? recoveryWait({
723
+ app: vctx.appName,
724
+ deployId: vctx.lastDeployId,
725
+ reasonCode,
726
+ })
727
+ : recoveryAsk({
728
+ prompt: `${what.trim()} for ${vctx.appName ?? "this app"}. Wait for it to finish before retrying — there's no deployId to track yet.`,
729
+ reasonCode,
730
+ });
731
+ return {
732
+ status: "in_progress",
733
+ diagnosis: {
734
+ title: what.trim(),
735
+ explanation: vctx.lastDeployId
736
+ ? `${what.trim()} (deploy ${vctx.lastDeployId}). Wait for it to finish before retrying.`
737
+ : `${what.trim()}. Wait for it to finish before retrying.`,
738
+ reasonCode,
739
+ phase: "deploy",
740
+ },
741
+ recovery,
742
+ summary: vctx.lastDeployId
743
+ ? `${what.trim()} wait for deploy ${vctx.lastDeployId}.`
744
+ : `${what.trim()} wait a moment and re-run.`,
745
+ };
746
+ },
747
+ },
656
748
  // Phase 2b — signal-driven dispatches that MUST run before the
657
749
  // happy-path collapse below. The `Last deploy` row is rendered
658
750
  // as `warn` for any non-live status (replaced, failed, etc), so
@@ -664,29 +756,36 @@ function deriveVerdict(checks, vctx) {
664
756
  // Replaced lastDeploy. Resolution was performed up-front in
665
757
  // `doctor()` (deriveVerdict is sync); we just emit what
666
758
  // resolveReplaced computed.
667
- if (vctx.replacedResolution) {
668
- const r = vctx.replacedResolution;
669
- const isResolvedLive = r.recovery.nextAction === "none" && !!r.url;
670
- if (isResolvedLive) {
759
+ {
760
+ id: "deploy-replaced",
761
+ when: ({ vctx }) => !!vctx.replacedResolution,
762
+ verdict: (ctx) => {
763
+ const r = ctx.vctx.replacedResolution;
764
+ // Unreachable: `when` requires replacedResolution.
765
+ if (!r)
766
+ return FALLBACK_RULE.verdict(ctx);
767
+ const isResolvedLive = r.recovery.nextAction === "none" && !!r.url;
768
+ if (isResolvedLive) {
769
+ return {
770
+ status: "ok",
771
+ recovery: { ...r.recovery, reasonCode: "replaced_by_newer" },
772
+ summary: r.summary,
773
+ };
774
+ }
775
+ const status = r.recovery.nextAction === "wait_deploy" ? "in_progress" : "needs_action";
671
776
  return {
672
- status: "ok",
777
+ status,
778
+ diagnosis: {
779
+ title: "Last deploy was replaced",
780
+ explanation: r.summary,
781
+ reasonCode: "replaced_by_newer",
782
+ phase: "deploy",
783
+ },
673
784
  recovery: { ...r.recovery, reasonCode: "replaced_by_newer" },
674
785
  summary: r.summary,
675
786
  };
676
- }
677
- const status = r.recovery.nextAction === "wait_deploy" ? "in_progress" : "needs_action";
678
- return {
679
- status,
680
- diagnosis: {
681
- title: "Last deploy was replaced",
682
- explanation: r.summary,
683
- reasonCode: "replaced_by_newer",
684
- phase: "deploy",
685
- },
686
- recovery: { ...r.recovery, reasonCode: "replaced_by_newer" },
687
- summary: r.summary,
688
- };
689
- }
787
+ },
788
+ },
690
789
  // lastDeploy.status === "failed". `failed` renders as `warn` in
691
790
  // checks[] but is unambiguously not-ok. Doctor can't classify
692
791
  // the failure cause from /diagnostics (the build log lives at a
@@ -694,76 +793,106 @@ function deriveVerdict(checks, vctx) {
694
793
  // and the deployId. Phase 4 will migrate publish/wait to the
695
794
  // same recovery so deploy-mode expansion has a single owner of
696
795
  // build-log inspection.
697
- if (vctx.lastDeployStatus === "failed") {
698
- // Loop-break (Codex round 5 P2): if we're already in mode='deploy'
699
- // (the agent followed our previous run_doctor recovery), emitting
700
- // run_doctor again with the same args would loop forever. Fall
701
- // back to inspect_build_log — the existing low-level path that
702
- // surfaces the build log to the agent. Phase 2c will replace
703
- // this with deeper analysis (build-log fetch + classification
704
- // here in doctor) so the agent gets a `set_env_vars` /
705
- // `fix_problems` recovery directly.
706
- if (vctx.inputMode === "deploy") {
707
- // FUTURE12 Phase 2c — deep analysis. We pre-fetched the build
708
- // log and ran classifyError in doctor() above; if it produced
709
- // missing env keys or structured file-located problems, emit
710
- // a specific recovery the agent can act on directly.
711
- // Otherwise fall back to inspect_build_log (the agent gets
712
- // the raw log via percher_deploys_inspect).
796
+ //
797
+ // Loop-break (Codex round 5 P2): if we're already in mode='deploy'
798
+ // (the agent followed our previous run_doctor recovery), emitting
799
+ // run_doctor again with the same args would loop forever. Fall
800
+ // back to inspect_build_log — the existing low-level path that
801
+ // surfaces the build log to the agent. Phase 2c will replace
802
+ // this with deeper analysis (build-log fetch + classification
803
+ // here in doctor) so the agent gets a `set_env_vars` /
804
+ // `fix_problems` recovery directly.
805
+ //
806
+ // FUTURE12 Phase 2c — deep analysis. We pre-fetched the build
807
+ // log and ran classifyError in doctor() above; if it produced
808
+ // missing env keys or structured file-located problems, emit
809
+ // a specific recovery the agent can act on directly.
810
+ // Otherwise fall back to inspect_build_log (the agent gets
811
+ // the raw log via percher_deploys_inspect).
812
+ //
813
+ // The three mode='deploy' rules below are ordered most- to
814
+ // least-actionable, and all sit above the no-mode dispatch rule.
815
+ // (a) Missing env keys → recoveryEnv. Most actionable case:
816
+ // agent calls percher_env_set with the exact keys.
817
+ {
818
+ id: "deploy-failed-missing-env",
819
+ when: ({ vctx }) => {
820
+ const cls = vctx.buildLogClassification;
821
+ return (vctx.lastDeployStatus === "failed" &&
822
+ vctx.inputMode === "deploy" &&
823
+ !!cls &&
824
+ cls.errorClass === "missing_env" &&
825
+ cls.missingEnvVars.length > 0);
826
+ },
827
+ verdict: (ctx) => {
828
+ const { vctx } = ctx;
829
+ const cls = vctx.buildLogClassification;
830
+ // Unreachable: `when` requires the classification.
831
+ if (!cls)
832
+ return FALLBACK_RULE.verdict(ctx);
833
+ return {
834
+ status: "needs_action",
835
+ diagnosis: {
836
+ title: cls.title,
837
+ explanation: cls.explanation,
838
+ reasonCode: "missing_env",
839
+ phase: "build",
840
+ },
841
+ recovery: recoveryEnv({
842
+ app: vctx.appName,
843
+ keys: cls.missingEnvVars,
844
+ }),
845
+ summary: vctx.appName
846
+ ? `Build failed — missing env vars on ${vctx.appName}: ${cls.missingEnvVars.join(", ")}.`
847
+ : `Build failed — missing env vars: ${cls.missingEnvVars.join(", ")}.`,
848
+ };
849
+ },
850
+ },
851
+ // (b) Structured BuildProblems with file locations → fix_problems.
852
+ // The agent can patch files directly without log archeology.
853
+ // Codex round 8 P3 fix: route through
854
+ // `buildProblemToRecoveryProblem` so `BuildProblem.hint` is
855
+ // folded into the message — agents and CLI got `Hint: <text>`
856
+ // appended automatically (matters for problems where the hint
857
+ // carries the actionable next step, e.g. malformed
858
+ // package.json with no line/column).
859
+ {
860
+ id: "deploy-failed-file-problems",
861
+ when: ({ vctx }) => vctx.lastDeployStatus === "failed" &&
862
+ vctx.inputMode === "deploy" &&
863
+ (vctx.buildProblems ?? []).filter((p) => p.file).length > 0,
864
+ verdict: ({ vctx }) => {
713
865
  const cls = vctx.buildLogClassification;
714
- // (a) Missing env keys → recoveryEnv. Most actionable case:
715
- // agent calls percher_env_set with the exact keys.
716
- if (cls && cls.errorClass === "missing_env" && cls.missingEnvVars.length > 0) {
717
- return {
718
- status: "needs_action",
719
- diagnosis: {
720
- title: cls.title,
721
- explanation: cls.explanation,
722
- reasonCode: "missing_env",
723
- phase: "build",
724
- },
725
- recovery: recoveryEnv({
726
- app: vctx.appName,
727
- keys: cls.missingEnvVars,
728
- }),
729
- summary: vctx.appName
730
- ? `Build failed — missing env vars on ${vctx.appName}: ${cls.missingEnvVars.join(", ")}.`
731
- : `Build failed — missing env vars: ${cls.missingEnvVars.join(", ")}.`,
732
- };
733
- }
734
- // (b) Structured BuildProblems with file locations → fix_problems.
735
- // The agent can patch files directly without log archeology.
736
- // Codex round 8 P3 fix: route through
737
- // `buildProblemToRecoveryProblem` so `BuildProblem.hint` is
738
- // folded into the message — agents and CLI got `Hint: <text>`
739
- // appended automatically (matters for problems where the hint
740
- // carries the actionable next step, e.g. malformed
741
- // package.json with no line/column).
742
866
  const fileProblems = (vctx.buildProblems ?? [])
743
867
  .filter((p) => p.file)
744
868
  .map(buildProblemToRecoveryProblem);
745
- if (fileProblems.length > 0) {
746
- return {
747
- status: "needs_action",
748
- diagnosis: {
749
- title: cls?.title ?? "Build failed with file-located problems",
750
- explanation: cls?.explanation ??
751
- `Build extracted ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} with file locations. Patch the files directly.`,
752
- reasonCode: "build_failed",
753
- phase: "build",
754
- },
755
- recovery: recoveryFixProblems({
756
- problems: fileProblems,
757
- reasonCode: "build_failed",
758
- }),
759
- summary: vctx.lastDeployId
760
- ? `Deploy ${vctx.lastDeployId} failed — ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} to patch.`
761
- : `Build failed — ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} to patch.`,
762
- };
763
- }
764
- // (c) Fallback: classified but unactionable, or unclassified.
765
- // Hand off to inspect_build_log so the agent can read the raw
766
- // log. This is also the no-classification path (cls === null).
869
+ return {
870
+ status: "needs_action",
871
+ diagnosis: {
872
+ title: cls?.title ?? "Build failed with file-located problems",
873
+ explanation: cls?.explanation ??
874
+ `Build extracted ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} with file locations. Patch the files directly.`,
875
+ reasonCode: "build_failed",
876
+ phase: "build",
877
+ },
878
+ recovery: recoveryFixProblems({
879
+ problems: fileProblems,
880
+ reasonCode: "build_failed",
881
+ }),
882
+ summary: vctx.lastDeployId
883
+ ? `Deploy ${vctx.lastDeployId} failed — ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} to patch.`
884
+ : `Build failed — ${fileProblems.length} structured problem${fileProblems.length === 1 ? "" : "s"} to patch.`,
885
+ };
886
+ },
887
+ },
888
+ // (c) Fallback: classified but unactionable, or unclassified.
889
+ // Hand off to inspect_build_log so the agent can read the raw
890
+ // log. This is also the no-classification path (cls === null).
891
+ {
892
+ id: "deploy-failed-inspect-log",
893
+ when: ({ vctx }) => vctx.lastDeployStatus === "failed" && vctx.inputMode === "deploy",
894
+ verdict: ({ vctx }) => {
895
+ const cls = vctx.buildLogClassification;
767
896
  return {
768
897
  status: "needs_action",
769
898
  diagnosis: {
@@ -785,8 +914,14 @@ function deriveVerdict(checks, vctx) {
785
914
  ? `Deploy ${vctx.lastDeployId} failed — inspect the build log.`
786
915
  : "Last deploy failed — inspect the latest failed deploy's build log.",
787
916
  };
788
- }
789
- return {
917
+ },
918
+ },
919
+ // No mode hint — route the agent back to doctor with mode='deploy'
920
+ // so the deep-analysis rules above run on the follow-up call.
921
+ {
922
+ id: "deploy-failed-dispatch",
923
+ when: ({ vctx }) => vctx.lastDeployStatus === "failed",
924
+ verdict: ({ vctx }) => ({
790
925
  status: "needs_action",
791
926
  diagnosis: {
792
927
  title: "Last deploy failed",
@@ -810,8 +945,8 @@ function deriveVerdict(checks, vctx) {
810
945
  summary: vctx.lastDeployId
811
946
  ? `Deploy ${vctx.lastDeployId} failed — call percher_doctor with mode='deploy'.`
812
947
  : "Last deploy failed — call percher_doctor with mode='deploy'.",
813
- };
814
- }
948
+ }),
949
+ },
815
950
  // App suspended. Suspension reason isn't on /diagnostics
816
951
  // (lives on the App row's `suspensionReason` /
817
952
  // `suspensionOrigin`), so doctor surfaces to the user with a
@@ -819,8 +954,13 @@ function deriveVerdict(checks, vctx) {
819
954
  // most owner-resumable suspensions are quota; admin/moderation
820
955
  // suspensions would route differently if doctor had access to
821
956
  // suspensionOrigin (Phase 6 can plumb that through if needed).
822
- if (vctx.appStatus === "suspended") {
823
- return {
957
+ //
958
+ // Sits above the runtime-crashed rules: a suspended app's stopped
959
+ // container must lead with the suspension, not a crash fix.
960
+ {
961
+ id: "app-suspended",
962
+ when: ({ vctx }) => vctx.appStatus === "suspended",
963
+ verdict: ({ vctx }) => ({
824
964
  status: "needs_action",
825
965
  diagnosis: {
826
966
  title: "App is suspended",
@@ -839,21 +979,25 @@ function deriveVerdict(checks, vctx) {
839
979
  summary: vctx.appName
840
980
  ? `${vctx.appName} is suspended — resume the app before retrying.`
841
981
  : "App is suspended — resume it before retrying.",
842
- };
843
- }
982
+ }),
983
+ },
844
984
  // Case 6 — happy path. All app-level checks passed (warns are
845
985
  // informational, not blocking).
846
- if (verdictChecks.every((c) => c.status === "pass" || c.status === "skip" || c.status === "warn")) {
847
- const passing = verdictChecks.filter((c) => c.status === "pass").length;
848
- const noun = passing === 1 ? "check" : "checks";
849
- return {
850
- status: "ok",
851
- recovery: recoveryNone({ reasonCode: "none" }),
852
- summary: vctx.appName
853
- ? `All ${passing} ${noun} passed for ${vctx.appName}.`
854
- : `All ${passing} ${noun} passed.`,
855
- };
856
- }
986
+ {
987
+ id: "all-healthy",
988
+ when: ({ verdictChecks }) => verdictChecks.every((c) => c.status === "pass" || c.status === "skip" || c.status === "warn"),
989
+ verdict: ({ vctx, verdictChecks }) => {
990
+ const passing = verdictChecks.filter((c) => c.status === "pass").length;
991
+ const noun = passing === 1 ? "check" : "checks";
992
+ return {
993
+ status: "ok",
994
+ recovery: recoveryNone({ reasonCode: "none" }),
995
+ summary: vctx.appName
996
+ ? `All ${passing} ${noun} passed for ${vctx.appName}.`
997
+ : `All ${passing} ${noun} passed.`,
998
+ };
999
+ },
1000
+ },
857
1001
  // Case 7 — at least one app-level check failed. Phase 2b refines
858
1002
  // the previous catch-all `ask_user`/`unknown` into specific
859
1003
  // dispatches based on structured signals from `/diagnostics`.
@@ -868,33 +1012,19 @@ function deriveVerdict(checks, vctx) {
868
1012
  // runtime-focused expansion (Phase 2 step 9) can take it from
869
1013
  // here. Self-recursion via mode hint is the explicit Phase 2
870
1014
  // contract — input mode disambiguates the dispatch.
871
- const containerDown = (vctx.containerRunning === false &&
872
- vctx.containerState !== undefined &&
873
- vctx.containerState !== "not-found") ||
874
- vctx.containerHealthy === false;
875
- if (vctx.appStatus === "crashed" || containerDown) {
876
- const containerCheck = findCheck("Container");
877
- const healthCheck = findCheck("Container health");
878
- const lastCrashCheck = findCheck("Last crash");
879
- const explanationBits = [];
880
- if (vctx.appStatus === "crashed")
881
- explanationBits.push("App is in crashed state.");
882
- if (containerCheck?.status === "fail")
883
- explanationBits.push(containerCheck.message);
884
- if (healthCheck?.status === "fail")
885
- explanationBits.push(healthCheck.message);
886
- if (lastCrashCheck)
887
- explanationBits.push(lastCrashCheck.message);
888
- const explanation = explanationBits.length > 0
889
- ? explanationBits.join(" ")
890
- : "Runtime is not responding — container or health check is failing.";
891
- // Loop-break (Codex round 5 P2): if we're already in
892
- // mode='runtime', emit a concrete ask_user with the crash
893
- // details rather than recursing into ourselves. Phase 2c will
894
- // replace this with crash-report fetch + classification (the
895
- // crash-handler watchdog already produces structured AI-generated
896
- // explanations — doctor just needs to surface them here).
897
- if (vctx.inputMode === "runtime") {
1015
+ //
1016
+ // Loop-break (Codex round 5 P2): if we're already in
1017
+ // mode='runtime', emit a concrete ask_user with the crash
1018
+ // details rather than recursing into ourselves. Phase 2c will
1019
+ // replace this with crash-report fetch + classification (the
1020
+ // crash-handler watchdog already produces structured AI-generated
1021
+ // explanations doctor just needs to surface them here).
1022
+ {
1023
+ id: "runtime-crashed-report",
1024
+ when: ({ vctx, containerDown }) => (vctx.appStatus === "crashed" || containerDown) && vctx.inputMode === "runtime",
1025
+ verdict: (ctx) => {
1026
+ const { vctx } = ctx;
1027
+ const explanation = runtimeCrashExplanation(ctx);
898
1028
  // FUTURE12 Phase 2c — surface the crash report's
899
1029
  // AI-generated explanation + suggestion in the prompt when
900
1030
  // available. The watchdog/crash-handler already produces
@@ -946,79 +1076,81 @@ function deriveVerdict(checks, vctx) {
946
1076
  : `${vctx.appName} runtime is unhealthy — surface the crash details to the user.`
947
1077
  : "Runtime is unhealthy — surface the crash details to the user.",
948
1078
  };
949
- }
950
- return {
951
- status: "needs_action",
952
- diagnosis: {
953
- title: vctx.appStatus === "crashed" ? "App crashed" : "Runtime not responding",
954
- explanation,
955
- reasonCode: "runtime_crashed",
956
- phase: "runtime",
957
- },
958
- recovery: {
959
- retryable: false,
960
- nextAction: "run_doctor",
961
- suggestedTool: "percher_doctor",
962
- args: { app: vctx.appName, mode: "runtime" },
963
- reasonCode: "runtime_crashed",
964
- },
965
- summary: vctx.appName
966
- ? `${vctx.appName} runtime needs investigation — call percher_doctor with mode='runtime'.`
967
- : "Runtime needs investigation — call percher_doctor with mode='runtime'.",
968
- };
969
- }
1079
+ },
1080
+ },
1081
+ // No mode hint — route the agent back to doctor with mode='runtime'
1082
+ // so the crash-report rule above runs on the follow-up call.
1083
+ {
1084
+ id: "runtime-crashed-dispatch",
1085
+ when: ({ vctx, containerDown }) => vctx.appStatus === "crashed" || containerDown,
1086
+ verdict: (ctx) => {
1087
+ const { vctx } = ctx;
1088
+ const explanation = runtimeCrashExplanation(ctx);
1089
+ return {
1090
+ status: "needs_action",
1091
+ diagnosis: {
1092
+ title: vctx.appStatus === "crashed" ? "App crashed" : "Runtime not responding",
1093
+ explanation,
1094
+ reasonCode: "runtime_crashed",
1095
+ phase: "runtime",
1096
+ },
1097
+ recovery: {
1098
+ retryable: false,
1099
+ nextAction: "run_doctor",
1100
+ suggestedTool: "percher_doctor",
1101
+ args: { app: vctx.appName, mode: "runtime" },
1102
+ reasonCode: "runtime_crashed",
1103
+ },
1104
+ summary: vctx.appName
1105
+ ? `${vctx.appName} runtime needs investigation — call percher_doctor with mode='runtime'.`
1106
+ : "Runtime needs investigation — call percher_doctor with mode='runtime'.",
1107
+ };
1108
+ },
1109
+ },
970
1110
  // 7b. Public route is the only thing failing — container is up
971
1111
  // and healthy, but the external probe via Caddy/TLS/DNS isn't
972
1112
  // responding. This is usually a transient route-reconcile blip
973
1113
  // that self-heals; recommend `retry` so the agent re-runs doctor
974
1114
  // (or the user retries publish) rather than asking the user to
975
1115
  // act manually.
976
- if (vctx.publicRouteHealthy === false) {
977
- const routeCheck = findCheck("Public route");
978
- return {
979
- status: "needs_action",
980
- diagnosis: {
981
- title: "Public route is not responding",
982
- explanation: routeCheck?.message ??
983
- "Container looks healthy but the public URL isn't responding — route reconcile usually self-heals.",
984
- reasonCode: "infra_transient",
985
- phase: "infra",
986
- },
987
- recovery: {
988
- retryable: true,
989
- nextAction: "retry",
990
- suggestedTool: "percher_doctor",
991
- args: { app: vctx.appName },
992
- reasonCode: "infra_transient",
993
- },
994
- summary: vctx.appName
995
- ? `${vctx.appName} public route is failing — likely transient, re-run doctor in a moment.`
996
- : "Public route is failing — likely transient, re-run doctor in a moment.",
997
- };
998
- }
999
- // 7c. Genuinely unknownnone of the structured signals above
1000
- // matched. Keep the safe fallback so an agent doesn't think it
1001
- // can auto-resolve. This is what Phase 2a shipped; Phase 2b only
1002
- // narrows the surface that lands here.
1003
- const firstFailed = verdictChecks.find((c) => c.status === "fail");
1004
- return {
1005
- status: "needs_action",
1006
- diagnosis: firstFailed
1007
- ? {
1008
- title: firstFailed.name,
1009
- explanation: firstFailed.message,
1010
- reasonCode: "unknown",
1011
- }
1012
- : undefined,
1013
- recovery: recoveryAsk({
1014
- prompt: firstFailed
1015
- ? `Doctor flagged a problem: ${firstFailed.name} — ${firstFailed.message}. Review the checks list and surface the failing item to the user.`
1016
- : "Doctor reported one or more issues. Review the checks list and surface the failing items to the user.",
1017
- reasonCode: "unknown",
1018
- }),
1019
- summary: firstFailed
1020
- ? `${firstFailed.name} failed: ${firstFailed.message}`
1021
- : "Doctor reported issues — review the checks list.",
1022
- };
1116
+ {
1117
+ id: "public-route-unhealthy",
1118
+ when: ({ vctx }) => vctx.publicRouteHealthy === false,
1119
+ verdict: ({ vctx, findCheck }) => {
1120
+ const routeCheck = findCheck("Public route");
1121
+ return {
1122
+ status: "needs_action",
1123
+ diagnosis: {
1124
+ title: "Public route is not responding",
1125
+ explanation: routeCheck?.message ??
1126
+ "Container looks healthy but the public URL isn't responding — route reconcile usually self-heals.",
1127
+ reasonCode: "infra_transient",
1128
+ phase: "infra",
1129
+ },
1130
+ recovery: {
1131
+ retryable: true,
1132
+ nextAction: "retry",
1133
+ suggestedTool: "percher_doctor",
1134
+ args: { app: vctx.appName },
1135
+ reasonCode: "infra_transient",
1136
+ },
1137
+ summary: vctx.appName
1138
+ ? `${vctx.appName} public route is failing — likely transient, re-run doctor in a moment.`
1139
+ : "Public route is failing likely transient, re-run doctor in a moment.",
1140
+ };
1141
+ },
1142
+ },
1143
+ FALLBACK_RULE,
1144
+ ];
1145
+ /**
1146
+ * Walk VERDICT_RULES top-to-bottom and return the first matching
1147
+ * rule's verdict. The fallback rule matches everything, so the `??`
1148
+ * arm is unreachable in practice — it exists so a future table edit
1149
+ * that drops the fallback can't make this function partial.
1150
+ */
1151
+ function deriveVerdict(checks, vctx) {
1152
+ const ctx = buildVerdictRuleContext(checks, vctx);
1153
+ const rule = VERDICT_RULES.find((r) => r.when(ctx)) ?? FALLBACK_RULE;
1154
+ return rule.verdict(ctx);
1023
1155
  }
1024
1156
  //# sourceMappingURL=doctor.js.map