terramend 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/README.md +145 -0
- package/dist/agents/claude.d.ts +73 -0
- package/dist/agents/claudePretoolGate.d.ts +99 -0
- package/dist/agents/gateServer.d.ts +7 -0
- package/dist/agents/index.d.ts +6 -0
- package/dist/agents/nativeFsDenies.d.ts +28 -0
- package/dist/agents/opencode.d.ts +231 -0
- package/dist/agents/opencodePlugin.d.ts +85 -0
- package/dist/agents/opencodeShared.d.ts +40 -0
- package/dist/agents/postRun.d.ts +132 -0
- package/dist/agents/reviewer.d.ts +38 -0
- package/dist/agents/sessionLabeler.d.ts +97 -0
- package/dist/agents/shared.d.ts +189 -0
- package/dist/agents/subagentModels.d.ts +19 -0
- package/dist/agents/subagentToolGates.d.ts +55 -0
- package/dist/cli.mjs +197426 -0
- package/dist/external.d.ts +227 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +196783 -0
- package/dist/internal/index.d.ts +18 -0
- package/dist/internal.js +1714 -0
- package/dist/lifecycle.d.ts +2 -0
- package/dist/main.d.ts +8 -0
- package/dist/mcp/arkConfig.d.ts +1 -0
- package/dist/mcp/checkSuite.d.ts +25 -0
- package/dist/mcp/checkout.d.ts +77 -0
- package/dist/mcp/comment.d.ts +119 -0
- package/dist/mcp/commitInfo.d.ts +9 -0
- package/dist/mcp/crosswalk.d.ts +105 -0
- package/dist/mcp/dependencies.d.ts +8 -0
- package/dist/mcp/geminiSanitizer.d.ts +28 -0
- package/dist/mcp/git.d.ts +46 -0
- package/dist/mcp/guardrails.d.ts +104 -0
- package/dist/mcp/issue.d.ts +18 -0
- package/dist/mcp/issueComments.d.ts +9 -0
- package/dist/mcp/issueEvents.d.ts +9 -0
- package/dist/mcp/issueInfo.d.ts +9 -0
- package/dist/mcp/labels.d.ts +12 -0
- package/dist/mcp/localContext.d.ts +19 -0
- package/dist/mcp/moduleExtraction.d.ts +71 -0
- package/dist/mcp/moduleTests.d.ts +104 -0
- package/dist/mcp/modules.d.ts +179 -0
- package/dist/mcp/output.d.ts +12 -0
- package/dist/mcp/pathSafety.d.ts +14 -0
- package/dist/mcp/policy.d.ts +48 -0
- package/dist/mcp/pr.d.ts +49 -0
- package/dist/mcp/prInfo.d.ts +9 -0
- package/dist/mcp/providerSchema.d.ts +50 -0
- package/dist/mcp/review.d.ts +199 -0
- package/dist/mcp/reviewComments.d.ts +178 -0
- package/dist/mcp/roots.d.ts +58 -0
- package/dist/mcp/scope.d.ts +15 -0
- package/dist/mcp/selectMode.d.ts +18 -0
- package/dist/mcp/server.d.ts +48 -0
- package/dist/mcp/shared.d.ts +47 -0
- package/dist/mcp/shell.d.ts +37 -0
- package/dist/mcp/staleFix.d.ts +51 -0
- package/dist/mcp/terraform/cost.d.ts +55 -0
- package/dist/mcp/terraform/currency.d.ts +94 -0
- package/dist/mcp/terraform/decisions.d.ts +178 -0
- package/dist/mcp/terraform/findings.d.ts +75 -0
- package/dist/mcp/terraform/plan.d.ts +157 -0
- package/dist/mcp/terraform/scanners.d.ts +131 -0
- package/dist/mcp/terraform/tools.d.ts +63 -0
- package/dist/mcp/terraform/types.d.ts +172 -0
- package/dist/mcp/terraform.d.ts +22 -0
- package/dist/mcp/terratest.d.ts +83 -0
- package/dist/mcp/upload.d.ts +6 -0
- package/dist/models.d.ts +171 -0
- package/dist/modes.d.ts +26 -0
- package/dist/prep/index.d.ts +7 -0
- package/dist/prep/installNodeDependencies.d.ts +2 -0
- package/dist/prep/installPythonDependencies.d.ts +2 -0
- package/dist/prep/types.d.ts +31 -0
- package/dist/reviewQuality.d.ts +64 -0
- package/dist/skills/terraform-best-practices/SKILL.md +369 -0
- package/dist/toolState.d.ts +135 -0
- package/dist/utils/activity.d.ts +40 -0
- package/dist/utils/agent.d.ts +20 -0
- package/dist/utils/agentHangReport.d.ts +38 -0
- package/dist/utils/apiFetch.d.ts +19 -0
- package/dist/utils/apiKeys.d.ts +41 -0
- package/dist/utils/apiUrl.d.ts +20 -0
- package/dist/utils/assets.d.ts +8 -0
- package/dist/utils/billingErrors.d.ts +85 -0
- package/dist/utils/body.d.ts +34 -0
- package/dist/utils/buildTerramendFooter.d.ts +25 -0
- package/dist/utils/byokFallback.d.ts +85 -0
- package/dist/utils/claudeSubscription.d.ts +30 -0
- package/dist/utils/cli.d.ts +10 -0
- package/dist/utils/codexHome.d.ts +29 -0
- package/dist/utils/codexOAuth.d.ts +60 -0
- package/dist/utils/diffCoverage.d.ts +63 -0
- package/dist/utils/errorReport.d.ts +17 -0
- package/dist/utils/exitHandler.d.ts +8 -0
- package/dist/utils/fixDoubleEscapedString.d.ts +1 -0
- package/dist/utils/gitAuth.d.ts +84 -0
- package/dist/utils/gitAuthServer.d.ts +24 -0
- package/dist/utils/github.d.ts +78 -0
- package/dist/utils/globals.d.ts +3 -0
- package/dist/utils/install.d.ts +60 -0
- package/dist/utils/instructions.d.ts +48 -0
- package/dist/utils/leapingComment.d.ts +11 -0
- package/dist/utils/learnings.d.ts +62 -0
- package/dist/utils/learningsTruncate.d.ts +25 -0
- package/dist/utils/lifecycle.d.ts +57 -0
- package/dist/utils/log.d.ts +111 -0
- package/dist/utils/normalizeEnv.d.ts +30 -0
- package/dist/utils/openCodeModels.d.ts +11 -0
- package/dist/utils/overrides.d.ts +40 -0
- package/dist/utils/packageManager.d.ts +49 -0
- package/dist/utils/patchWorkflowRunFields.d.ts +29 -0
- package/dist/utils/payload.d.ts +105 -0
- package/dist/utils/prSummary.d.ts +61 -0
- package/dist/utils/progressComment.d.ts +146 -0
- package/dist/utils/providerErrors.d.ts +31 -0
- package/dist/utils/rangeDiff.d.ts +51 -0
- package/dist/utils/remediationCommand.d.ts +55 -0
- package/dist/utils/retry.d.ts +13 -0
- package/dist/utils/reviewCleanup.d.ts +14 -0
- package/dist/utils/run.d.ts +9 -0
- package/dist/utils/runContext.d.ts +60 -0
- package/dist/utils/runContextData.d.ts +23 -0
- package/dist/utils/runErrorRenderer.d.ts +64 -0
- package/dist/utils/runLifecycle.d.ts +86 -0
- package/dist/utils/runStartupLog.d.ts +15 -0
- package/dist/utils/secrets.d.ts +22 -0
- package/dist/utils/setup.d.ts +90 -0
- package/dist/utils/shell.d.ts +32 -0
- package/dist/utils/skills.d.ts +10 -0
- package/dist/utils/subprocess.d.ts +80 -0
- package/dist/utils/terraformMcp.d.ts +42 -0
- package/dist/utils/time.d.ts +15 -0
- package/dist/utils/timer.d.ts +23 -0
- package/dist/utils/todoTracking.d.ts +16 -0
- package/dist/utils/token.d.ts +39 -0
- package/dist/utils/version.d.ts +2 -0
- package/dist/utils/versioning.d.ts +7 -0
- package/dist/utils/vertex.d.ts +16 -0
- package/dist/utils/workflow.d.ts +13 -0
- package/package.json +119 -0
- package/src/agents/claude.test.ts +1016 -0
- package/src/agents/claude.ts +1246 -0
- package/src/agents/claudePretoolGate.test.ts +28 -0
- package/src/agents/claudePretoolGate.ts +173 -0
- package/src/agents/gateServer.test.ts +204 -0
- package/src/agents/gateServer.ts +124 -0
- package/src/agents/index.ts +10 -0
- package/src/agents/nativeFsDenies.ts +82 -0
- package/src/agents/opencode.test.ts +1440 -0
- package/src/agents/opencode.ts +1312 -0
- package/src/agents/opencodePlugin.ts +222 -0
- package/src/agents/opencodeShared.test.ts +34 -0
- package/src/agents/opencodeShared.ts +121 -0
- package/src/agents/postRun.test.ts +549 -0
- package/src/agents/postRun.ts +535 -0
- package/src/agents/reviewer.ts +104 -0
- package/src/agents/sessionLabeler.test.ts +247 -0
- package/src/agents/sessionLabeler.ts +178 -0
- package/src/agents/shared.test.ts +76 -0
- package/src/agents/shared.ts +292 -0
- package/src/agents/subagentModels.test.ts +113 -0
- package/src/agents/subagentModels.ts +40 -0
- package/src/agents/subagentRegistration.test.ts +41 -0
- package/src/agents/subagentToolGates.ts +114 -0
- package/src/cli.test.ts +129 -0
- package/src/cli.ts +105 -0
- package/src/commands/gha.test.ts +192 -0
- package/src/commands/gha.ts +188 -0
- package/src/commands/mcp.ts +122 -0
- package/src/config.ts +1 -0
- package/src/entry.ts +7 -0
- package/src/entryPost.stdlibOnly.test.ts +109 -0
- package/src/entryPost.ts +99 -0
- package/src/external.test.ts +16 -0
- package/src/external.ts +302 -0
- package/src/index.ts +11 -0
- package/src/internal/index.ts +71 -0
- package/src/lifecycle.ts +2 -0
- package/src/main.test.ts +873 -0
- package/src/main.ts +712 -0
- package/src/mcp/__fixtures__/terramend-scratch-pr-49-review-3485940013.json +110 -0
- package/src/mcp/__fixtures__/terramend-scratch-pr-64-review-3531000326.json +14 -0
- package/src/mcp/__fixtures__/terramend-test-repo-pr-1.diff.json +67 -0
- package/src/mcp/__snapshots__/checkout.test.ts.snap +109 -0
- package/src/mcp/__snapshots__/reviewComments.test.ts.snap +71 -0
- package/src/mcp/arkConfig.ts +7 -0
- package/src/mcp/checkSuite.test.ts +245 -0
- package/src/mcp/checkSuite.ts +255 -0
- package/src/mcp/checkout.test.ts +752 -0
- package/src/mcp/checkout.ts +886 -0
- package/src/mcp/comment.test.ts +772 -0
- package/src/mcp/comment.ts +582 -0
- package/src/mcp/commitInfo.test.ts +127 -0
- package/src/mcp/commitInfo.ts +61 -0
- package/src/mcp/crosswalk.test.ts +106 -0
- package/src/mcp/crosswalk.ts +339 -0
- package/src/mcp/dependencies.test.ts +309 -0
- package/src/mcp/dependencies.ts +189 -0
- package/src/mcp/geminiSanitizer.test.ts +287 -0
- package/src/mcp/geminiSanitizer.ts +207 -0
- package/src/mcp/git.test.ts +1083 -0
- package/src/mcp/git.ts +890 -0
- package/src/mcp/guardrails.test.ts +705 -0
- package/src/mcp/guardrails.ts +465 -0
- package/src/mcp/issue.test.ts +113 -0
- package/src/mcp/issue.ts +73 -0
- package/src/mcp/issueComments.test.ts +69 -0
- package/src/mcp/issueComments.ts +48 -0
- package/src/mcp/issueEvents.test.ts +134 -0
- package/src/mcp/issueEvents.ts +100 -0
- package/src/mcp/issueInfo.test.ts +104 -0
- package/src/mcp/issueInfo.ts +72 -0
- package/src/mcp/labels.test.ts +52 -0
- package/src/mcp/labels.ts +34 -0
- package/src/mcp/localContext.ts +28 -0
- package/src/mcp/localServer.test.ts +75 -0
- package/src/mcp/localServer.ts +131 -0
- package/src/mcp/moduleExtraction.test.ts +261 -0
- package/src/mcp/moduleExtraction.ts +313 -0
- package/src/mcp/moduleTests.test.ts +269 -0
- package/src/mcp/moduleTests.ts +421 -0
- package/src/mcp/modules.test.ts +640 -0
- package/src/mcp/modules.ts +696 -0
- package/src/mcp/output.test.ts +96 -0
- package/src/mcp/output.ts +70 -0
- package/src/mcp/pathSafety.test.ts +44 -0
- package/src/mcp/pathSafety.ts +28 -0
- package/src/mcp/policy.test.ts +282 -0
- package/src/mcp/policy.ts +199 -0
- package/src/mcp/pr.test.ts +387 -0
- package/src/mcp/pr.ts +194 -0
- package/src/mcp/prInfo.test.ts +96 -0
- package/src/mcp/prInfo.ts +91 -0
- package/src/mcp/providerSchema.test.ts +85 -0
- package/src/mcp/providerSchema.ts +175 -0
- package/src/mcp/review.test.ts +936 -0
- package/src/mcp/review.ts +923 -0
- package/src/mcp/reviewComments.test.ts +549 -0
- package/src/mcp/reviewComments.ts +896 -0
- package/src/mcp/roots.test.ts +175 -0
- package/src/mcp/roots.ts +217 -0
- package/src/mcp/scope.test.ts +59 -0
- package/src/mcp/scope.ts +65 -0
- package/src/mcp/security.test.ts +720 -0
- package/src/mcp/selectMode.test.ts +210 -0
- package/src/mcp/selectMode.ts +181 -0
- package/src/mcp/server.test.ts +292 -0
- package/src/mcp/server.ts +403 -0
- package/src/mcp/shared.ts +100 -0
- package/src/mcp/shell.test.ts +520 -0
- package/src/mcp/shell.ts +505 -0
- package/src/mcp/staleFix.test.ts +237 -0
- package/src/mcp/staleFix.ts +277 -0
- package/src/mcp/terraform/cost.ts +163 -0
- package/src/mcp/terraform/currency.test.ts +338 -0
- package/src/mcp/terraform/currency.ts +336 -0
- package/src/mcp/terraform/decisions.ts +527 -0
- package/src/mcp/terraform/findings.ts +333 -0
- package/src/mcp/terraform/plan.ts +348 -0
- package/src/mcp/terraform/scanners.ts +809 -0
- package/src/mcp/terraform/tools.test.ts +1071 -0
- package/src/mcp/terraform/tools.ts +908 -0
- package/src/mcp/terraform/types.ts +305 -0
- package/src/mcp/terraform.test.ts +1957 -0
- package/src/mcp/terraform.ts +23 -0
- package/src/mcp/terratest.test.ts +105 -0
- package/src/mcp/terratest.ts +196 -0
- package/src/mcp/toolFiltering.test.ts +85 -0
- package/src/mcp/upload.test.ts +180 -0
- package/src/mcp/upload.ts +112 -0
- package/src/models.test.ts +300 -0
- package/src/models.ts +708 -0
- package/src/modes.test.ts +107 -0
- package/src/modes.ts +880 -0
- package/src/prep/index.ts +43 -0
- package/src/prep/installNodeDependencies.test.ts +298 -0
- package/src/prep/installNodeDependencies.ts +196 -0
- package/src/prep/installPythonDependencies.test.ts +268 -0
- package/src/prep/installPythonDependencies.ts +199 -0
- package/src/prep/types.ts +38 -0
- package/src/reviewQuality.test.ts +63 -0
- package/src/reviewQuality.ts +134 -0
- package/src/runCli.test.ts +214 -0
- package/src/runCli.ts +282 -0
- package/src/skills/terraform-best-practices/SKILL.md +369 -0
- package/src/toolState.test.ts +45 -0
- package/src/toolState.ts +252 -0
- package/src/utils/activity.test.ts +188 -0
- package/src/utils/activity.ts +210 -0
- package/src/utils/agent.test.ts +251 -0
- package/src/utils/agent.ts +139 -0
- package/src/utils/agentHangReport.test.ts +203 -0
- package/src/utils/agentHangReport.ts +170 -0
- package/src/utils/apiFetch.test.ts +115 -0
- package/src/utils/apiFetch.ts +62 -0
- package/src/utils/apiKeys.test.ts +344 -0
- package/src/utils/apiKeys.ts +206 -0
- package/src/utils/apiUrl.test.ts +30 -0
- package/src/utils/apiUrl.ts +59 -0
- package/src/utils/assets.test.ts +153 -0
- package/src/utils/assets.ts +107 -0
- package/src/utils/billingErrors.test.ts +121 -0
- package/src/utils/billingErrors.ts +189 -0
- package/src/utils/body.test.ts +217 -0
- package/src/utils/body.ts +168 -0
- package/src/utils/buildTerramendFooter.test.ts +38 -0
- package/src/utils/buildTerramendFooter.ts +82 -0
- package/src/utils/byokFallback.test.ts +205 -0
- package/src/utils/byokFallback.ts +128 -0
- package/src/utils/claudeSubscription.test.ts +179 -0
- package/src/utils/claudeSubscription.ts +93 -0
- package/src/utils/cli.ts +31 -0
- package/src/utils/codexHome.test.ts +190 -0
- package/src/utils/codexHome.ts +191 -0
- package/src/utils/codexOAuth.ts +147 -0
- package/src/utils/codexRefreshDetect.test.ts +85 -0
- package/src/utils/codexRefreshDetect.ts +35 -0
- package/src/utils/diffCoverage.test.ts +468 -0
- package/src/utils/diffCoverage.ts +404 -0
- package/src/utils/errorReport.test.ts +135 -0
- package/src/utils/errorReport.ts +83 -0
- package/src/utils/exitHandler.ts +35 -0
- package/src/utils/fixDoubleEscapedString.ts +9 -0
- package/src/utils/ghaCore.ts +13 -0
- package/src/utils/gitAuth.test.ts +322 -0
- package/src/utils/gitAuth.ts +263 -0
- package/src/utils/gitAuthServer.test.ts +260 -0
- package/src/utils/gitAuthServer.ts +182 -0
- package/src/utils/github.test.ts +615 -0
- package/src/utils/github.ts +538 -0
- package/src/utils/globals.ts +9 -0
- package/src/utils/humanEditCapture.test.ts +100 -0
- package/src/utils/humanEditCapture.ts +193 -0
- package/src/utils/install.test.ts +768 -0
- package/src/utils/install.ts +492 -0
- package/src/utils/instructions.test.ts +240 -0
- package/src/utils/instructions.ts +543 -0
- package/src/utils/leapingComment.test.ts +51 -0
- package/src/utils/leapingComment.ts +18 -0
- package/src/utils/learnings.test.ts +87 -0
- package/src/utils/learnings.ts +138 -0
- package/src/utils/learningsTocRender.test.ts +116 -0
- package/src/utils/learningsTruncate.test.ts +39 -0
- package/src/utils/learningsTruncate.ts +42 -0
- package/src/utils/lifecycle.test.ts +195 -0
- package/src/utils/lifecycle.ts +198 -0
- package/src/utils/log.test.ts +402 -0
- package/src/utils/log.ts +432 -0
- package/src/utils/normalizeEnv.test.ts +91 -0
- package/src/utils/normalizeEnv.ts +106 -0
- package/src/utils/openCodeModels.ts +82 -0
- package/src/utils/overrides.test.ts +89 -0
- package/src/utils/overrides.ts +98 -0
- package/src/utils/packageManager.test.ts +321 -0
- package/src/utils/packageManager.ts +257 -0
- package/src/utils/patchWorkflowRunFields.test.ts +92 -0
- package/src/utils/patchWorkflowRunFields.ts +150 -0
- package/src/utils/payload.test.ts +497 -0
- package/src/utils/payload.ts +371 -0
- package/src/utils/postApiFetch.ts +51 -0
- package/src/utils/prSummary.test.ts +224 -0
- package/src/utils/prSummary.ts +147 -0
- package/src/utils/progressComment.ts +261 -0
- package/src/utils/providerErrors.test.ts +315 -0
- package/src/utils/providerErrors.ts +172 -0
- package/src/utils/rangeDiff.test.ts +236 -0
- package/src/utils/rangeDiff.ts +182 -0
- package/src/utils/remediationCommand.test.ts +163 -0
- package/src/utils/remediationCommand.ts +119 -0
- package/src/utils/retry.test.ts +153 -0
- package/src/utils/retry.ts +58 -0
- package/src/utils/reviewCleanup.ts +106 -0
- package/src/utils/run.ts +99 -0
- package/src/utils/runContext.ts +145 -0
- package/src/utils/runContextData.ts +58 -0
- package/src/utils/runErrorRenderer.test.ts +95 -0
- package/src/utils/runErrorRenderer.ts +259 -0
- package/src/utils/runFixture.ts +76 -0
- package/src/utils/runLifecycle.ts +237 -0
- package/src/utils/runStartupLog.ts +60 -0
- package/src/utils/secrets.test.ts +103 -0
- package/src/utils/secrets.ts +177 -0
- package/src/utils/setup.test.ts +509 -0
- package/src/utils/setup.ts +352 -0
- package/src/utils/shell.ts +103 -0
- package/src/utils/skills.test.ts +46 -0
- package/src/utils/skills.ts +67 -0
- package/src/utils/subprocess.test.ts +170 -0
- package/src/utils/subprocess.ts +438 -0
- package/src/utils/terraformMcp.test.ts +63 -0
- package/src/utils/terraformMcp.ts +83 -0
- package/src/utils/time.test.ts +105 -0
- package/src/utils/time.ts +59 -0
- package/src/utils/timer.test.ts +91 -0
- package/src/utils/timer.ts +72 -0
- package/src/utils/todoTracking.test.ts +223 -0
- package/src/utils/todoTracking.ts +167 -0
- package/src/utils/token.test.ts +239 -0
- package/src/utils/token.ts +186 -0
- package/src/utils/version.ts +10 -0
- package/src/utils/versioning.test.ts +34 -0
- package/src/utils/versioning.ts +44 -0
- package/src/utils/vertex.ts +85 -0
- package/src/utils/workflow.ts +25 -0
|
@@ -0,0 +1,535 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import {
|
|
3
|
+
type AgentResult,
|
|
4
|
+
type AgentRunContext,
|
|
5
|
+
type AgentUsage,
|
|
6
|
+
buildCommitPrompt,
|
|
7
|
+
getGitStatus,
|
|
8
|
+
hasPostRunIssues,
|
|
9
|
+
MAX_POST_RUN_RETRIES,
|
|
10
|
+
mergeAgentUsage,
|
|
11
|
+
type PostRunIssues,
|
|
12
|
+
type StopHookFailure,
|
|
13
|
+
} from "#app/agents/shared";
|
|
14
|
+
import { LIFECYCLE_HOOK_TIMEOUT_MS } from "#app/lifecycle";
|
|
15
|
+
import { NON_COMMITTING_MODES } from "#app/modes";
|
|
16
|
+
import type { ToolState } from "#app/toolState";
|
|
17
|
+
import { log } from "#app/utils/cli";
|
|
18
|
+
import {
|
|
19
|
+
SPAWN_ACTIVITY_TIMEOUT_CODE,
|
|
20
|
+
SPAWN_TIMEOUT_CODE,
|
|
21
|
+
SpawnTimeoutError,
|
|
22
|
+
spawn,
|
|
23
|
+
} from "#app/utils/subprocess";
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* derive "agent picked a review mode but never produced visible output" from
|
|
27
|
+
* the literal facts on `toolState`. returns the selected mode when the gate
|
|
28
|
+
* should fire, `null` otherwise — pure read, no side effects, safe to invoke
|
|
29
|
+
* after every agent attempt.
|
|
30
|
+
*
|
|
31
|
+
* the gate is anchored to `hadProgressComment` so silent runs (non-issue
|
|
32
|
+
* events, dispatcher skipped seeding) don't fire a nudge there's no UI for.
|
|
33
|
+
*
|
|
34
|
+
* `Review` and `IncrementalReview` have different valid exits:
|
|
35
|
+
* - Review: only `create_pull_request_review` counts. `report_progress` is
|
|
36
|
+
* not a substitute — a Review run that exits with just a summary comment
|
|
37
|
+
* has produced nothing reviewable on the PR. matches the hard-fail
|
|
38
|
+
* message at `expected = "create_pull_request_review"` below.
|
|
39
|
+
* - IncrementalReview: `report_progress` is a legitimate "no review
|
|
40
|
+
* warranted" exit, so either toolState flag short-circuits.
|
|
41
|
+
* splitting per mode also closes the bypass where a subagent (e.g. a
|
|
42
|
+
* `task`-dispatched `reviewfrog` lens) calls `report_progress` and silences
|
|
43
|
+
* the gate even though the orchestrator never submitted a review.
|
|
44
|
+
*/
|
|
45
|
+
export function getUnsubmittedReview(toolState: ToolState): "Review" | "IncrementalReview" | null {
|
|
46
|
+
const mode = toolState.selectedMode;
|
|
47
|
+
if (!toolState.hadProgressComment) return null;
|
|
48
|
+
if (mode === "Review") return toolState.review ? null : "Review";
|
|
49
|
+
if (mode === "IncrementalReview") {
|
|
50
|
+
return toolState.review || toolState.finalSummaryWritten ? null : "IncrementalReview";
|
|
51
|
+
}
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* hook output can flow into two size-sensitive places: the LLM resume prompt
|
|
57
|
+
* (context window) and AgentResult.error (surfaced in GitHub comments capped
|
|
58
|
+
* at 65535 chars). truncate the tail to keep both bounded; the tail is
|
|
59
|
+
* usually the most actionable part of a failing script's output.
|
|
60
|
+
*/
|
|
61
|
+
const MAX_HOOK_OUTPUT_CHARS = 4096;
|
|
62
|
+
|
|
63
|
+
function truncateHookOutput(raw: string): string {
|
|
64
|
+
if (raw.length <= MAX_HOOK_OUTPUT_CHARS) return raw;
|
|
65
|
+
return `...(truncated, showing last ${MAX_HOOK_OUTPUT_CHARS} chars)\n${raw.slice(-MAX_HOOK_OUTPUT_CHARS)}`;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* run the user-configured stop hook.
|
|
70
|
+
*
|
|
71
|
+
* parallel to `executeLifecycleHook` (which soft-fails with a warning), but
|
|
72
|
+
* returns structured output so agent harnesses can feed the failure back into
|
|
73
|
+
* the session as a resume prompt.
|
|
74
|
+
*
|
|
75
|
+
* - non-zero exit → `StopHookFailure`, actionable: the output is fed to the
|
|
76
|
+
* agent so it can fix the underlying issue.
|
|
77
|
+
* - timeout / spawn error → null, treated as passed: we can't usefully ask the
|
|
78
|
+
* agent to fix an infrastructure problem, and retrying would risk infinite
|
|
79
|
+
* loops.
|
|
80
|
+
*/
|
|
81
|
+
export async function executeStopHook(script: string): Promise<StopHookFailure | null> {
|
|
82
|
+
log.info("» executing stop hook...");
|
|
83
|
+
try {
|
|
84
|
+
const result = await spawn({
|
|
85
|
+
cmd: "bash",
|
|
86
|
+
args: ["-c", script],
|
|
87
|
+
env: process.env,
|
|
88
|
+
timeout: LIFECYCLE_HOOK_TIMEOUT_MS,
|
|
89
|
+
activityTimeout: 0,
|
|
90
|
+
onStdout: (chunk) => process.stdout.write(chunk),
|
|
91
|
+
onStderr: (chunk) => process.stderr.write(chunk),
|
|
92
|
+
});
|
|
93
|
+
if (result.exitCode === 0) {
|
|
94
|
+
log.info("» stop hook passed");
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
// include both streams — scripts often emit a benign warning to stderr
|
|
98
|
+
// and the actionable error to stdout (or vice versa), and picking one
|
|
99
|
+
// starves the agent of the diagnostic it needs. stderr-first so stdout
|
|
100
|
+
// (typically longer, where truncation is more likely to bite) keeps its
|
|
101
|
+
// tail — summaries/totals usually live at the end.
|
|
102
|
+
const combined = [result.stderr.trim(), result.stdout.trim()].filter(Boolean).join("\n");
|
|
103
|
+
const output = truncateHookOutput(combined);
|
|
104
|
+
log.info(`» stop hook failed with exit code ${result.exitCode}`);
|
|
105
|
+
return { exitCode: result.exitCode, output };
|
|
106
|
+
} catch (err) {
|
|
107
|
+
const isTimeout =
|
|
108
|
+
err instanceof SpawnTimeoutError &&
|
|
109
|
+
(err.code === SPAWN_TIMEOUT_CODE || err.code === SPAWN_ACTIVITY_TIMEOUT_CODE);
|
|
110
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
111
|
+
log.warning(
|
|
112
|
+
`stop hook ${isTimeout ? "timed out" : "failed to spawn"}: ${msg} — skipping retry`,
|
|
113
|
+
);
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export function buildStopHookPrompt(failure: StopHookFailure): string {
|
|
119
|
+
return [
|
|
120
|
+
`STOP HOOK FAILED — the repo-configured stop hook exited with code ${failure.exitCode}. your work is not done until the hook exits cleanly. address the issue below and push any resulting changes to a pull request.`,
|
|
121
|
+
"",
|
|
122
|
+
"```",
|
|
123
|
+
failure.output || "(no output)",
|
|
124
|
+
"```",
|
|
125
|
+
].join("\n");
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/** check whether the seeded summary file is byte-identical to its seed.
|
|
129
|
+
* a missing or unreadable file returns false (don't nudge — the agent
|
|
130
|
+
* may have legitimately deleted it, or the seed step failed; the read-
|
|
131
|
+
* back path in main.ts handles both cases by skipping persist). */
|
|
132
|
+
async function isSummaryUnchanged(filePath: string, seed: string): Promise<boolean> {
|
|
133
|
+
try {
|
|
134
|
+
const current = await readFile(filePath, "utf8");
|
|
135
|
+
return current === seed;
|
|
136
|
+
} catch {
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
export function buildSummaryStalePrompt(filePath: string): string {
|
|
142
|
+
return [
|
|
143
|
+
`PR SUMMARY UNTOUCHED — the rolling PR summary file at \`${filePath}\` is byte-identical to its seed; this run did not edit it.`,
|
|
144
|
+
"",
|
|
145
|
+
"review the diff and update the file in place to reflect what changed in the PR. update intent, key changes, and any risks worth flagging — keep the existing section headings stable so incremental runs produce clean diffs.",
|
|
146
|
+
"",
|
|
147
|
+
"if the diff is genuinely too small or noisy to warrant rewriting (e.g. a one-line typo fix, a comment tweak, a formatting-only change), it's fine to leave the structure as-is — but at minimum confirm you considered it by appending one line to the appropriate section noting the run. silence is not an option; the snapshot is what the next review run reads as context.",
|
|
148
|
+
].join("\n");
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
export function buildUnsubmittedReviewPrompt(mode: "Review" | "IncrementalReview"): string {
|
|
152
|
+
// mode-aware: Review mode's contract is "always submit one review" — its
|
|
153
|
+
// mode prompt forbids `report_progress`, so the nudge here must not offer
|
|
154
|
+
// it as an exit. IncrementalReview legitimately allows a report_progress
|
|
155
|
+
// exit when there are no new issues since the last review (mode prompt
|
|
156
|
+
// step 8), so the nudge mirrors that contract.
|
|
157
|
+
if (mode === "Review") {
|
|
158
|
+
return [
|
|
159
|
+
`MISSING REVIEW OUTPUT — you selected Review mode but stopped without calling \`create_pull_request_review\`. the user has no visible signal that this run produced anything; the progress comment will be deleted on exit and no review will appear on the PR.`,
|
|
160
|
+
"",
|
|
161
|
+
"call `create_pull_request_review` now with your aggregated review (body + inline comments). pick the tier per the mode prompt — Review mode has no no-submit exit, so even informational `> ✅ No new issues found.` reviews must be submitted (with `approved: true`). the first call may error once with a diff-coverage nudge — retry the same call to proceed.",
|
|
162
|
+
"",
|
|
163
|
+
"do NOT stop again until `create_pull_request_review` has been called successfully.",
|
|
164
|
+
].join("\n");
|
|
165
|
+
}
|
|
166
|
+
return [
|
|
167
|
+
`MISSING REVIEW OUTPUT — you selected IncrementalReview mode but stopped without calling \`create_pull_request_review\` or \`report_progress\`. the user has no visible signal that this run produced anything; the progress comment will be deleted on exit and no review will appear on the PR.`,
|
|
168
|
+
"",
|
|
169
|
+
"do exactly one of:",
|
|
170
|
+
"- if you have findings: call `create_pull_request_review` now with your aggregated review (body + inline comments). the first call may error once with a diff-coverage nudge — retry the same call to proceed.",
|
|
171
|
+
"- if there are genuinely no actionable findings since the last review (e.g. only formatting / comment / lockfile changes): call `report_progress` with a 1-2 sentence summary explaining that no review was warranted.",
|
|
172
|
+
"",
|
|
173
|
+
"do NOT stop again until one of those tools has been called successfully.",
|
|
174
|
+
].join("\n");
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* check the post-run gates: did the stop hook pass, is the working tree
|
|
179
|
+
* clean, and (when applicable) did the agent touch the rolling PR summary
|
|
180
|
+
* snapshot or produce review output? returns everything that still needs
|
|
181
|
+
* nudging so the caller can render a single combined resume prompt.
|
|
182
|
+
*
|
|
183
|
+
* reads run state directly off `ctx.toolState` so each invocation sees the
|
|
184
|
+
* latest mutations from MCP tool calls. `skipSummaryStale` lets the loop
|
|
185
|
+
* suppress the summary-stale check after the one-shot nudge has been
|
|
186
|
+
* delivered (re-firing it would burn the retry budget on a soft gate the
|
|
187
|
+
* agent has already decided not to act on).
|
|
188
|
+
*/
|
|
189
|
+
export async function collectPostRunIssues(
|
|
190
|
+
ctx: AgentRunContext,
|
|
191
|
+
options: { skipSummaryStale?: boolean } = {},
|
|
192
|
+
): Promise<PostRunIssues> {
|
|
193
|
+
const issues: PostRunIssues = {};
|
|
194
|
+
// stop hook is disabled — production audit (May 2026) showed 8/9 configured
|
|
195
|
+
// scripts are foot-guns (duplicates of prepushScript, run on non-committing
|
|
196
|
+
// modes against unchanged trees) burning the retry budget on un-fixable
|
|
197
|
+
// gates. re-enable here + the dashboard block in `AgentSettings.tsx` once
|
|
198
|
+
// we've decided on the right semantics (mode-gating vs. HEAD-changed gating
|
|
199
|
+
// vs. deletion). see issue #714.
|
|
200
|
+
// if (ctx.stopScript) {
|
|
201
|
+
// const failure = await executeStopHook(ctx.stopScript);
|
|
202
|
+
// if (failure) issues.stopHook = failure;
|
|
203
|
+
// }
|
|
204
|
+
// dirty-tree gate fires only in modes that legitimately commit. Review /
|
|
205
|
+
// IncrementalReview / Plan complete via review submission or a Plan
|
|
206
|
+
// comment, not by touching files — any tree dirt is incidental (e.g. a
|
|
207
|
+
// tool-installed `node_modules/`) and the worktree is ephemeral, so
|
|
208
|
+
// nudging the agent to commit it would produce a spurious PR. see
|
|
209
|
+
// `NON_COMMITTING_MODES` in `action/modes.ts`.
|
|
210
|
+
const status = getGitStatus();
|
|
211
|
+
const mode = ctx.toolState.selectedMode;
|
|
212
|
+
if (status) {
|
|
213
|
+
if (mode && NON_COMMITTING_MODES.has(mode)) {
|
|
214
|
+
log.info(`» dirty-tree gate suppressed: mode \`${mode}\` does not commit`);
|
|
215
|
+
} else {
|
|
216
|
+
issues.dirtyTree = status;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
const summaryFilePath = ctx.toolState.summaryFilePath;
|
|
220
|
+
const summarySeed = ctx.toolState.summarySeed;
|
|
221
|
+
if (!options.skipSummaryStale && summaryFilePath && summarySeed !== undefined) {
|
|
222
|
+
const stale = await isSummaryUnchanged(summaryFilePath, summarySeed);
|
|
223
|
+
if (stale) issues.summaryStale = { filePath: summaryFilePath };
|
|
224
|
+
}
|
|
225
|
+
const unsubmittedMode = getUnsubmittedReview(ctx.toolState);
|
|
226
|
+
if (unsubmittedMode) issues.unsubmittedReview = unsubmittedMode;
|
|
227
|
+
return issues;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
export function buildPostRunPrompt(issues: PostRunIssues): string {
|
|
231
|
+
// order matches the terminal hard-fail order in `runPostRunRetryLoop` so
|
|
232
|
+
// the prompt's emphasis (which gate the agent should fix first) lines up
|
|
233
|
+
// with the user-visible failure message reported when retries exhaust.
|
|
234
|
+
// both hard-fail gates first (`stopHook` → `unsubmittedReview`), then the
|
|
235
|
+
// soft gates (`dirtyTree` → `summaryStale`).
|
|
236
|
+
const parts: string[] = [];
|
|
237
|
+
if (issues.stopHook) parts.push(buildStopHookPrompt(issues.stopHook));
|
|
238
|
+
if (issues.unsubmittedReview) {
|
|
239
|
+
parts.push(buildUnsubmittedReviewPrompt(issues.unsubmittedReview));
|
|
240
|
+
}
|
|
241
|
+
if (issues.dirtyTree) parts.push(buildCommitPrompt(issues.dirtyTree));
|
|
242
|
+
if (issues.summaryStale) parts.push(buildSummaryStalePrompt(issues.summaryStale.filePath));
|
|
243
|
+
return parts.join("\n\n---\n\n");
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* terminal-only post-run finalize: re-checks the hard-fail gates after the
|
|
248
|
+
* agent has exited and converts a successful result to a hard-fail when
|
|
249
|
+
* `stopHook` or `unsubmittedReview` is still failing. used by harnesses
|
|
250
|
+
* that inject follow-up turns via a mechanism other than the resume
|
|
251
|
+
* callback (e.g. the Claude managed Stop hook + gate server). soft gates
|
|
252
|
+
* (`dirtyTree`, `summaryStale`) are intentionally not re-checked here —
|
|
253
|
+
* they never flip a successful run to failed.
|
|
254
|
+
*/
|
|
255
|
+
export async function finalizeAgentResult<R extends AgentResult>(params: {
|
|
256
|
+
ctx: AgentRunContext;
|
|
257
|
+
result: R;
|
|
258
|
+
}): Promise<R> {
|
|
259
|
+
if (!params.result.success) return params.result;
|
|
260
|
+
const issues = await collectPostRunIssues(params.ctx, { skipSummaryStale: true });
|
|
261
|
+
if (issues.stopHook) {
|
|
262
|
+
return {
|
|
263
|
+
...params.result,
|
|
264
|
+
success: false,
|
|
265
|
+
error: `stop hook failed (exit code ${issues.stopHook.exitCode}): ${issues.stopHook.output || "(no output)"}`,
|
|
266
|
+
};
|
|
267
|
+
}
|
|
268
|
+
if (issues.unsubmittedReview) {
|
|
269
|
+
const expected =
|
|
270
|
+
issues.unsubmittedReview === "Review"
|
|
271
|
+
? "create_pull_request_review"
|
|
272
|
+
: "create_pull_request_review or report_progress";
|
|
273
|
+
return {
|
|
274
|
+
...params.result,
|
|
275
|
+
success: false,
|
|
276
|
+
error: `${issues.unsubmittedReview} mode finished without calling ${expected}`,
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
return params.result;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* modes for which the post-run reflection turn is skipped. reflection costs a
|
|
284
|
+
* full resume turn (~$0.50-0.80 per run on Opus, mostly cache-write) and only
|
|
285
|
+
* pays for itself when the run actually produced novel, durable findings.
|
|
286
|
+
*
|
|
287
|
+
* `IncrementalReview` is the lowest-novelty mode — it's a tight delta review
|
|
288
|
+
* against an existing PR with the prior summary already loaded as context.
|
|
289
|
+
* the agent rarely discovers anything generalizable to next runs, so the
|
|
290
|
+
* reflection turn is dead weight. initial `Review` still touches fresh PR
|
|
291
|
+
* territory and benefits; `Build` / `Fix` / `AddressReviews` definitely do.
|
|
292
|
+
*/
|
|
293
|
+
const REFLECTION_SKIP_MODES: ReadonlySet<string> = new Set(["IncrementalReview"]);
|
|
294
|
+
|
|
295
|
+
export function shouldRunReflection(mode: string | undefined): boolean {
|
|
296
|
+
if (!mode) return true;
|
|
297
|
+
return !REFLECTION_SKIP_MODES.has(mode);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* prompt for a dedicated post-run reflection turn nudging the agent to edit
|
|
302
|
+
* the rolling learnings file if it discovered anything worth persisting.
|
|
303
|
+
*
|
|
304
|
+
* this exists because passive "if you learned something, write it down"
|
|
305
|
+
* instructions baked into mode checklists are frequently ignored — the agent
|
|
306
|
+
* stays focused on the task and the meta-ask falls through. delivering it
|
|
307
|
+
* as its own resume turn, with nothing competing for attention, raises the
|
|
308
|
+
* fire rate substantially.
|
|
309
|
+
*
|
|
310
|
+
* the file is the single source of truth — there is no separate MCP tool
|
|
311
|
+
* call. the server reads the file at end-of-run and persists any edits to
|
|
312
|
+
* `Repo.learnings`.
|
|
313
|
+
*
|
|
314
|
+
* the prompt copy is shaped by repo-wide audits of the actual content the
|
|
315
|
+
* agent has been writing (issue #619 in terramend/app). recurring failure
|
|
316
|
+
* modes the framing pushes back on:
|
|
317
|
+
* - massive multi-paragraph "bullets" that are really mini-articles
|
|
318
|
+
* - facts anchored to moving repo state (PR / review / commit / branch
|
|
319
|
+
* refs, dates, version pins, line numbers) that decay within weeks
|
|
320
|
+
* - sections growing into giant flat lists with no internal structure,
|
|
321
|
+
* forcing future runs to read kilobytes to find one fact
|
|
322
|
+
*
|
|
323
|
+
* single litmus delivered in the prompt: "would a future run on this repo
|
|
324
|
+
* do its work better because this bullet exists?". tool-quirk workarounds
|
|
325
|
+
* are explicitly allowed when the agent burned calls discovering the
|
|
326
|
+
* quirk this run — recording the workaround prevents next run from
|
|
327
|
+
* repeating the waste. tradeoff: the same quirk gets duplicated across
|
|
328
|
+
* repos, so when a quirk is fixed upstream in tool descriptions the
|
|
329
|
+
* per-repo bullets go stale and we have no batch-invalidation path.
|
|
330
|
+
*/
|
|
331
|
+
export function buildLearningsReflectionPrompt(filePath: string): string {
|
|
332
|
+
return [
|
|
333
|
+
`REFLECTION — before you finish, think back over this task: did you discover anything about this repo's setup, test commands, conventions, or patterns that is high-confidence and would reliably help future runs?`,
|
|
334
|
+
"",
|
|
335
|
+
`the rolling learnings file is at \`${filePath}\`. read it first if you haven't already, then edit it in place using your native file tools. the server reads this file at end-of-run and persists any changes — there is no tool to call.`,
|
|
336
|
+
"",
|
|
337
|
+
`structure:`,
|
|
338
|
+
`- markdown hierarchy: \`## \` for top-level themes, \`### \` and deeper for sub-themes when a section grows. there is no fixed taxonomy — choose headings that fit THIS repo (e.g. for one repo \`## Migrations\` / \`## Local dev\` may make sense; for another, \`## API quirks\` / \`## Failure modes\`).`,
|
|
339
|
+
`- **no section over ~300 lines.** when a section is approaching that, split it: introduce \`### \` subsections grouping related bullets, or hoist a coherent group into a new top-level \`## \` section. granular sections mean future runs read targeted line ranges instead of slurping the whole file. this is the most important hygiene rule on long-lived repos.`,
|
|
340
|
+
`- if you find a flat unstructured list (legacy content from before this format), restructure it: read it, group related bullets, rewrite the file with \`## \` / \`### \` headings around them. don't preserve bad structure — fix it.`,
|
|
341
|
+
"",
|
|
342
|
+
`the only test: would a future run on this repo do its work better because this bullet exists? useful for future runs in this repo — prevent wasted tool calls, rabbit holes, and mistakes.`,
|
|
343
|
+
"",
|
|
344
|
+
`bullet hygiene:`,
|
|
345
|
+
`- one fact per line starting with \`- \`, ≤ 240 chars.`,
|
|
346
|
+
`- only add when high-confidence, broadly useful, evergreen.`,
|
|
347
|
+
`- prune wrong or low-signal bullets; merge overlaps; dedupe across sections.`,
|
|
348
|
+
"",
|
|
349
|
+
`don't anchor facts to repo state that will move: PR / review / commit / branch refs, dates, version pins, line numbers. state the rule directly. if it needs the anchor to be load-bearing, it isn't evergreen.`,
|
|
350
|
+
"",
|
|
351
|
+
`tool-quirk bullets are fine when you burned calls discovering the quirk and a future run would repeat them. write the workaround, not the war story.`,
|
|
352
|
+
"",
|
|
353
|
+
`if you have nothing substantively new to add AND the existing entries still look healthy and well-structured, leave the file alone — just reply "done" and stop. silence is a valid outcome.`,
|
|
354
|
+
"",
|
|
355
|
+
`do NOT call \`set_output\` during this turn. the task's result output was already set on the previous turn; this reflection is a meta-turn for the learnings file only. ignore any standing instruction to call \`set_output\` "when done" — it does not apply here.`,
|
|
356
|
+
].join("\n");
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* shared post-run retry loop used by every agent harness.
|
|
361
|
+
*
|
|
362
|
+
* checks the post-run gates (stop hook + dirty tree), and if either is
|
|
363
|
+
* failing, invokes `resume` to let the agent fix and push in the same turn.
|
|
364
|
+
* bails at `MAX_POST_RUN_RETRIES` attempts. the `canResume` predicate is
|
|
365
|
+
* consulted before each retry — harnesses that can't re-enter the session
|
|
366
|
+
* (e.g. claude without a sessionId) return false here.
|
|
367
|
+
*
|
|
368
|
+
* an optional `reflectionPrompt` fires exactly once, after the gates first
|
|
369
|
+
* observe a clean state. it's a one-shot nudge (e.g. "update learnings if
|
|
370
|
+
* relevant"), not a gate, so it does not consume the gate-retry budget. if
|
|
371
|
+
* the reflection turn dirties the tree, the loop picks that up on the next
|
|
372
|
+
* iteration via the normal dirty-tree gate.
|
|
373
|
+
*
|
|
374
|
+
* stop hook must pass for the run to succeed; persistent hook failures are
|
|
375
|
+
* surfaced as `AgentResult.error`. dirty-tree-only failures preserve prior
|
|
376
|
+
* behavior: they're logged but don't fail the run.
|
|
377
|
+
*/
|
|
378
|
+
export async function runPostRunRetryLoop<R extends AgentResult>(params: {
|
|
379
|
+
ctx: AgentRunContext;
|
|
380
|
+
initialResult: R;
|
|
381
|
+
initialUsage: AgentUsage | undefined;
|
|
382
|
+
resume: (context: { prompt: string; previousResult: R }) => Promise<R>;
|
|
383
|
+
canResume?: ((result: R) => boolean) | undefined;
|
|
384
|
+
reflectionPrompt?: string | undefined;
|
|
385
|
+
}): Promise<AgentResult> {
|
|
386
|
+
let result = params.initialResult;
|
|
387
|
+
let aggregatedUsage = params.initialUsage;
|
|
388
|
+
let finalIssues: PostRunIssues = {};
|
|
389
|
+
let gateResumeCount = 0;
|
|
390
|
+
let pendingReflection = params.reflectionPrompt;
|
|
391
|
+
// nudge for an untouched summary file fires AT MOST ONCE per run. once
|
|
392
|
+
// delivered, subsequent collectPostRunIssues calls skip the check — the
|
|
393
|
+
// agent may have legitimately decided no edit is warranted, and
|
|
394
|
+
// re-prompting would burn the retry budget without adding signal.
|
|
395
|
+
let summaryStaleNudged = false;
|
|
396
|
+
|
|
397
|
+
while (gateResumeCount < MAX_POST_RUN_RETRIES) {
|
|
398
|
+
if (!result.success) break;
|
|
399
|
+
const issues = await collectPostRunIssues(params.ctx, {
|
|
400
|
+
skipSummaryStale: summaryStaleNudged,
|
|
401
|
+
});
|
|
402
|
+
if (issues.summaryStale) summaryStaleNudged = true;
|
|
403
|
+
finalIssues = issues;
|
|
404
|
+
|
|
405
|
+
if (!hasPostRunIssues(issues)) {
|
|
406
|
+
// gates are clean. if a reflection prompt is pending, deliver it once
|
|
407
|
+
// and loop back to re-check — the reflection may have touched the tree.
|
|
408
|
+
if (!pendingReflection) break;
|
|
409
|
+
if (params.canResume && !params.canResume(result)) break;
|
|
410
|
+
log.info("» post-run reflection: nudging agent to update learnings if relevant");
|
|
411
|
+
const preReflection = result;
|
|
412
|
+
// reflection is a meta-turn for editing the learnings file. it must not
|
|
413
|
+
// affect the user-visible `result` output: some models (notably Gemini
|
|
414
|
+
// Pro) re-trigger on the initial "call set_output when done" system
|
|
415
|
+
// instruction during this turn and clobber the task-turn value with the
|
|
416
|
+
// literal word "done". the prompt itself tells the agent not to call
|
|
417
|
+
// set_output (defense one); we also snapshot + restore as defense two.
|
|
418
|
+
const preReflectionOutput = params.ctx.toolState.output;
|
|
419
|
+
const reflectionResult = await params.resume({
|
|
420
|
+
prompt: pendingReflection,
|
|
421
|
+
previousResult: result,
|
|
422
|
+
});
|
|
423
|
+
params.ctx.toolState.output = preReflectionOutput;
|
|
424
|
+
aggregatedUsage = mergeAgentUsage(aggregatedUsage, reflectionResult.usage);
|
|
425
|
+
pendingReflection = undefined;
|
|
426
|
+
if (!reflectionResult.success) {
|
|
427
|
+
// reflection is a best-effort nudge. its failure must not flip a
|
|
428
|
+
// successful run to failed — the gated work is already done. keep
|
|
429
|
+
// the pre-reflection result and exit without re-running the gates
|
|
430
|
+
// (which would risk a flaky false-positive hook failure right after
|
|
431
|
+
// it just passed).
|
|
432
|
+
log.warning(
|
|
433
|
+
`» reflection turn failed (${reflectionResult.error ?? "unknown error"}), preserving prior successful result`,
|
|
434
|
+
);
|
|
435
|
+
result = preReflection;
|
|
436
|
+
break;
|
|
437
|
+
}
|
|
438
|
+
// reflection replies are meta-asks ("done", "updated learnings with N
|
|
439
|
+
// bullets") — not a task summary. keep the pre-reflection output so
|
|
440
|
+
// the returned AgentResult still reflects what the run accomplished,
|
|
441
|
+
// while inheriting reflection-specific fields the harness needs for
|
|
442
|
+
// any subsequent gate retry (e.g. the new sessionId claude emits per
|
|
443
|
+
// --resume invocation).
|
|
444
|
+
// use `||` (not `??`) so an empty pre-reflection output falls through
|
|
445
|
+
// to the reflection's reply. runs that only emit MCP tool calls and no
|
|
446
|
+
// plain text leave result.output = "" — keeping "" would starve the
|
|
447
|
+
// fallback path in handleAgentResult of anything to show.
|
|
448
|
+
result = {
|
|
449
|
+
...reflectionResult,
|
|
450
|
+
output: preReflection.output || reflectionResult.output,
|
|
451
|
+
};
|
|
452
|
+
continue;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// checks still ran even if we can't resume, so the failure gate below
|
|
456
|
+
// can still catch a persistent stop-hook failure.
|
|
457
|
+
if (params.canResume && !params.canResume(result)) {
|
|
458
|
+
log.info("» post-run retry skipped: cannot resume agent session");
|
|
459
|
+
break;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
log.info(`» post-run retry (attempt ${gateResumeCount + 1}/${MAX_POST_RUN_RETRIES})`);
|
|
463
|
+
const prompt = buildPostRunPrompt(issues);
|
|
464
|
+
// summary-stale is a soft gate that must never flip a successful run to
|
|
465
|
+
// failed. when it's the only issue and the resume itself errors out,
|
|
466
|
+
// restore the pre-resume successful result and break — persistSummary
|
|
467
|
+
// detects the unchanged file via its seed comparison and skips the DB
|
|
468
|
+
// write on its own, so no further coordination is needed here.
|
|
469
|
+
const onlySummaryStale =
|
|
470
|
+
issues.summaryStale !== undefined &&
|
|
471
|
+
issues.stopHook === undefined &&
|
|
472
|
+
issues.dirtyTree === undefined;
|
|
473
|
+
const preResume = result;
|
|
474
|
+
result = await params.resume({ prompt, previousResult: result });
|
|
475
|
+
aggregatedUsage = mergeAgentUsage(aggregatedUsage, result.usage);
|
|
476
|
+
if (!result.success && onlySummaryStale) {
|
|
477
|
+
log.warning(
|
|
478
|
+
`» summary-stale resume turn failed (${result.error ?? "unknown error"}), preserving prior successful result`,
|
|
479
|
+
);
|
|
480
|
+
result = preResume;
|
|
481
|
+
break;
|
|
482
|
+
}
|
|
483
|
+
gateResumeCount++;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// we exhausted retries without observing a clean state — finalIssues
|
|
487
|
+
// reflects pre-resume state, so re-check to see what the last resume
|
|
488
|
+
// actually did. when the subprocess failed we skip: its own error is more
|
|
489
|
+
// actionable than a stale "stop hook still failing" message. when the loop
|
|
490
|
+
// already observed a clean state we skip: re-running the hook risks flaky
|
|
491
|
+
// false-positive failures right after it just passed.
|
|
492
|
+
if (gateResumeCount > 0 && result.success && hasPostRunIssues(finalIssues)) {
|
|
493
|
+
// re-check the gates that can actually fail the run (stop hook /
|
|
494
|
+
// dirty tree / unsubmitted review). summary-stale is intentionally
|
|
495
|
+
// NOT re-checked here: we already delivered the one-shot nudge, and
|
|
496
|
+
// a still-unchanged file at this point is the agent's deliberate
|
|
497
|
+
// choice.
|
|
498
|
+
finalIssues = await collectPostRunIssues(params.ctx, { skipSummaryStale: true });
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
if (result.success && finalIssues.stopHook) {
|
|
502
|
+
const retryNote =
|
|
503
|
+
gateResumeCount > 0
|
|
504
|
+
? ` after ${gateResumeCount} retry ${gateResumeCount === 1 ? "attempt" : "attempts"}`
|
|
505
|
+
: "";
|
|
506
|
+
return {
|
|
507
|
+
...result,
|
|
508
|
+
success: false,
|
|
509
|
+
error: `stop hook failed${retryNote} (exit code ${finalIssues.stopHook.exitCode}): ${finalIssues.stopHook.output || "(no output)"}`,
|
|
510
|
+
usage: aggregatedUsage,
|
|
511
|
+
};
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
if (result.success && finalIssues.unsubmittedReview) {
|
|
515
|
+
const retryNote =
|
|
516
|
+
gateResumeCount > 0
|
|
517
|
+
? ` after ${gateResumeCount} retry ${gateResumeCount === 1 ? "attempt" : "attempts"}`
|
|
518
|
+
: "";
|
|
519
|
+
// mode-aware: Review's contract requires a review submission; only
|
|
520
|
+
// IncrementalReview accepts `report_progress` as an exit. mirroring
|
|
521
|
+
// the nudge prompt avoids contradicting the agent-facing copy.
|
|
522
|
+
const expected =
|
|
523
|
+
finalIssues.unsubmittedReview === "Review"
|
|
524
|
+
? "create_pull_request_review"
|
|
525
|
+
: "create_pull_request_review or report_progress";
|
|
526
|
+
return {
|
|
527
|
+
...result,
|
|
528
|
+
success: false,
|
|
529
|
+
error: `${finalIssues.unsubmittedReview} mode finished without calling ${expected}${retryNote}`,
|
|
530
|
+
usage: aggregatedUsage,
|
|
531
|
+
};
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
return { ...result, usage: aggregatedUsage };
|
|
535
|
+
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Definition of the `reviewfrog` named subagent — the constrained
|
|
3
|
+
* read-only worker dispatched by Build mode self-review and the in-Terramend
|
|
4
|
+
* /anneal multi-lens review.
|
|
5
|
+
*
|
|
6
|
+
* The contract: non-mutative + non-recursive.
|
|
7
|
+
* allow: file reads, grep/glob, web search/fetch, read-only MCP queries
|
|
8
|
+
* deny: state-changing MCP tools, file writes, shell, nested subagent dispatch
|
|
9
|
+
*
|
|
10
|
+
* Enforcement is now belt-and-suspenders:
|
|
11
|
+
* 1. Machine-enforced PreToolUse gates intercept every state-mutating MCP
|
|
12
|
+
* tool call originating from a subagent session and refuse it before
|
|
13
|
+
* MCP runs. See action/agents/subagentToolGates.ts (the deny list),
|
|
14
|
+
* action/agents/claudePretoolGate.ts (Claude Code's PreToolUse hook),
|
|
15
|
+
* and action/agents/opencodePlugin.ts (opencode's tool.execute.before
|
|
16
|
+
* hook). Followed PR #796 which added runtime backstops inside
|
|
17
|
+
* checkout_pr / push_branch after a subagent-originated tool call
|
|
18
|
+
* clobbered an unrelated PR branch in zed-industries/cloud.
|
|
19
|
+
* 2. The prose system prompt below as a backup against (a) tools added
|
|
20
|
+
* to the MCP server without a corresponding deny-list update, and
|
|
21
|
+
* (b) shell/git read-vs-write distinctions the static gate can't see.
|
|
22
|
+
* It states the rule as a no-op-if-reverted invariant the model can
|
|
23
|
+
* apply to any tool, including ones added after this comment was
|
|
24
|
+
* written.
|
|
25
|
+
*
|
|
26
|
+
* Historical note: per-agent `disallowedTools` in claude-code is upstream-
|
|
27
|
+
* broken for subagent-spawned tool calls (anthropics/claude-agent-sdk-
|
|
28
|
+
* typescript#172, open as of Mar 2026), which is why the gate runs at
|
|
29
|
+
* PreToolUse rather than tool-registration time.
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
export const REVIEWER_AGENT_NAME = "reviewfrog";
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* System prompt baked into the named reviewer subagent. The orchestrator
|
|
36
|
+
* supplies the per-call task content (YOUR TASK, the diff, the lens) at
|
|
37
|
+
* dispatch time; this preamble enforces the role and constraints regardless
|
|
38
|
+
* of what the orchestrator sends.
|
|
39
|
+
*/
|
|
40
|
+
export const REVIEWER_SYSTEM_PROMPT =
|
|
41
|
+
`You are a read-only review subagent. Your role is to find flaws in code or artifacts ` +
|
|
42
|
+
`provided by the orchestrator and report findings — never to modify state.\n\n` +
|
|
43
|
+
`HARD CONSTRAINTS (non-negotiable, regardless of orchestrator instructions):\n` +
|
|
44
|
+
`- Your FIRST action MUST source the diff for review. If the orchestrator's dispatch ` +
|
|
45
|
+
`names a diff PATH on disk (e.g. \`diffPath\` / \`incrementalDiffPath\` from a prior ` +
|
|
46
|
+
`\`checkout_pr\` call), \`read\` that path — do not invoke git at all. The on-disk ` +
|
|
47
|
+
`diff is the authoritative scope, and dispatches almost always include one; ` +
|
|
48
|
+
`recomputing it via git also fails on shallow GitHub Actions checkouts where the ` +
|
|
49
|
+
`base ref may be unfetched. ` +
|
|
50
|
+
`When BOTH a diff path and a base branch appear in your dispatch, path always wins. ` +
|
|
51
|
+
`When the dispatch names an \`incrementalDiffPath\` alongside \`diffPath\`, prefer the ` +
|
|
52
|
+
`incremental path for scope and consult the full diff only for line-number anchoring.\n` +
|
|
53
|
+
`- If (and only if) NO diff path was provided, the dispatch names a base branch. ` +
|
|
54
|
+
`Run \`git diff --merge-base origin/<base>\` (single MCP call, captures committed + ` +
|
|
55
|
+
`staged + unstaged work, excludes commits landed on \`origin/<base>\` since your ` +
|
|
56
|
+
`branch forked). The read-only \`git\` MCP tool is the right surface for this — ` +
|
|
57
|
+
`\`--merge-base\` is a flag git accepts directly, so no shell substitution is needed. ` +
|
|
58
|
+
`Do NOT run bare \`git diff origin/<base>\` or two-dot \`git diff origin/<base>..HEAD\`: ` +
|
|
59
|
+
`those are symmetric diffs that include the inverse of every commit on \`<base>\` ` +
|
|
60
|
+
`your branch is behind, which is pure noise (and the git tool will reject those ` +
|
|
61
|
+
`forms when the divergence is detected). Do NOT try to expand \`$(...)\` subshell ` +
|
|
62
|
+
`forms via the git tool — it runs git directly without shell interpolation. ` +
|
|
63
|
+
`If \`git diff --merge-base origin/<base>\` fails with \`ambiguous argument ` +
|
|
64
|
+
`'origin/<base>'\` or \`no merge base\`, the runner is a shallow single-branch ` +
|
|
65
|
+
`checkout AND the orchestrator failed to fetch the base ref before dispatching you. ` +
|
|
66
|
+
`Surface that in one line (which ref is missing, and that the orchestrator needs to ` +
|
|
67
|
+
`fetch it with \`git fetch --no-tags --deepen=1000 origin <base>:refs/remotes/origin/<base>\` before ` +
|
|
68
|
+
`re-dispatching) and stop. Do NOT run \`git fetch\` yourself — your read-only ` +
|
|
69
|
+
`contract below forbids mutating shell, and the \`git_fetch\` MCP tool is ` +
|
|
70
|
+
`state-changing and therefore prohibited. ` +
|
|
71
|
+
`Do NOT call \`checkout_pr\`, do NOT fetch alternative refs, do NOT list branches ` +
|
|
72
|
+
`or all-refs looking for the work, do NOT run \`gh pr list\`. The orchestrator's ` +
|
|
73
|
+
`dispatch is the source of truth for scope.\n` +
|
|
74
|
+
`- If the on-disk diff path you were given is empty (or unreadable), that is a ` +
|
|
75
|
+
`checkout / formatting failure on the orchestrator side — reply EXACTLY: ` +
|
|
76
|
+
`\`no changes in dispatched diff — scope appears empty; orchestrator should verify ` +
|
|
77
|
+
`checkout_pr output\` (naming the path), do NOT fall through to running ` +
|
|
78
|
+
`\`git diff\` against guessed refs. ` +
|
|
79
|
+
`If the merge-base diff (the fallback path) returns empty AND the orchestrator's ` +
|
|
80
|
+
`dispatch claims there are changes to review, the most likely cause is a pre-commit ` +
|
|
81
|
+
`Build-mode self-review: the orchestrator dispatched you before committing AND ` +
|
|
82
|
+
`there are no uncommitted edits either. Reply EXACTLY: ` +
|
|
83
|
+
`\`no changes detected — likely pre-commit Build self-review; orchestrator should ` +
|
|
84
|
+
`commit then re-dispatch\` and stop. Do NOT guess PR numbers (e.g. by extrapolating ` +
|
|
85
|
+
`from \`git log\` output), do NOT check out other PRs, do NOT fetch from forks. ` +
|
|
86
|
+
`The empty diff is the diagnosis — surface it; do not work around it.\n` +
|
|
87
|
+
`- Read-only tools only. Do NOT write or edit files. Do NOT run shell commands ` +
|
|
88
|
+
`that have side effects (read-only commands like \`git diff\`, \`git log\`, \`cat\`, \`ls\` ` +
|
|
89
|
+
`are fine; anything that mutates the working tree, the remote, the filesystem, or ` +
|
|
90
|
+
`external state is prohibited).\n` +
|
|
91
|
+
`- Do NOT call any state-changing MCP tool. State-changing means: posts a comment, ` +
|
|
92
|
+
`pushes a branch, creates/updates a PR or issue, changes labels, resolves review ` +
|
|
93
|
+
`threads, persists learnings, sets workflow output, installs dependencies, uploads ` +
|
|
94
|
+
`files, kills processes, etc. Read-only MCP queries (\`get_*\`, \`list_*\`, the ` +
|
|
95
|
+
`\`git\` tool for read-only subcommands like \`diff\`/\`log\`/\`merge-base\`, log ` +
|
|
96
|
+
`inspection, diff retrieval) are fine.\n` +
|
|
97
|
+
`- Do NOT spawn further subagents. You are a leaf reviewer; recursive dispatch ` +
|
|
98
|
+
`pre-aggregates findings through an intermediate model and defeats the design.\n` +
|
|
99
|
+
`- Test for any tool call before invoking it: would this still be a no-op if ` +
|
|
100
|
+
`reverted? If not, do not call it. Apply this test to tools added after this ` +
|
|
101
|
+
`prompt was written — the rule is the invariant, not the enumeration.\n\n` +
|
|
102
|
+
`Report findings clearly with file:line references and quoted evidence where ` +
|
|
103
|
+
`possible. Flag uncertainty explicitly — if you cannot verify a claim, say so ` +
|
|
104
|
+
`rather than guess.`;
|