@caupulican/pi-adaptative 0.80.86 → 0.80.88
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +149 -0
- package/dist/core/agent-session.d.ts +377 -1
- package/dist/core/agent-session.d.ts.map +1 -1
- package/dist/core/agent-session.js +1791 -41
- package/dist/core/agent-session.js.map +1 -1
- package/dist/core/autonomy/approval-gate.d.ts +4 -0
- package/dist/core/autonomy/approval-gate.d.ts.map +1 -0
- package/dist/core/autonomy/approval-gate.js +27 -0
- package/dist/core/autonomy/approval-gate.js.map +1 -0
- package/dist/core/autonomy/bounded-completion.d.ts +27 -0
- package/dist/core/autonomy/bounded-completion.d.ts.map +1 -0
- package/dist/core/autonomy/bounded-completion.js +44 -0
- package/dist/core/autonomy/bounded-completion.js.map +1 -0
- package/dist/core/autonomy/contracts.d.ts +129 -0
- package/dist/core/autonomy/contracts.d.ts.map +1 -0
- package/dist/core/autonomy/contracts.js +2 -0
- package/dist/core/autonomy/contracts.js.map +1 -0
- package/dist/core/autonomy/gates.d.ts +15 -0
- package/dist/core/autonomy/gates.d.ts.map +1 -0
- package/dist/core/autonomy/gates.js +205 -0
- package/dist/core/autonomy/gates.js.map +1 -0
- package/dist/core/autonomy/lane-tracker.d.ts +48 -0
- package/dist/core/autonomy/lane-tracker.d.ts.map +1 -0
- package/dist/core/autonomy/lane-tracker.js +125 -0
- package/dist/core/autonomy/lane-tracker.js.map +1 -0
- package/dist/core/autonomy/path-scope.d.ts +9 -0
- package/dist/core/autonomy/path-scope.d.ts.map +1 -0
- package/dist/core/autonomy/path-scope.js +122 -0
- package/dist/core/autonomy/path-scope.js.map +1 -0
- package/dist/core/autonomy/risk-assessment.d.ts +3 -0
- package/dist/core/autonomy/risk-assessment.d.ts.map +1 -0
- package/dist/core/autonomy/risk-assessment.js +122 -0
- package/dist/core/autonomy/risk-assessment.js.map +1 -0
- package/dist/core/autonomy/session-lane-record.d.ts +10 -0
- package/dist/core/autonomy/session-lane-record.d.ts.map +1 -0
- package/dist/core/autonomy/session-lane-record.js +36 -0
- package/dist/core/autonomy/session-lane-record.js.map +1 -0
- package/dist/core/autonomy/status.d.ts +40 -0
- package/dist/core/autonomy/status.d.ts.map +1 -0
- package/dist/core/autonomy/status.js +107 -0
- package/dist/core/autonomy/status.js.map +1 -0
- package/dist/core/autonomy/subagent-prompt.d.ts +21 -0
- package/dist/core/autonomy/subagent-prompt.d.ts.map +1 -0
- package/dist/core/autonomy/subagent-prompt.js +28 -0
- package/dist/core/autonomy/subagent-prompt.js.map +1 -0
- package/dist/core/autonomy/telemetry-events.d.ts +18 -0
- package/dist/core/autonomy/telemetry-events.d.ts.map +1 -0
- package/dist/core/autonomy/telemetry-events.js +60 -0
- package/dist/core/autonomy/telemetry-events.js.map +1 -0
- package/dist/core/context/artifact-retrieval.d.ts +49 -0
- package/dist/core/context/artifact-retrieval.d.ts.map +1 -0
- package/dist/core/context/artifact-retrieval.js +49 -0
- package/dist/core/context/artifact-retrieval.js.map +1 -0
- package/dist/core/context/context-artifacts.d.ts +94 -0
- package/dist/core/context/context-artifacts.d.ts.map +1 -0
- package/dist/core/context/context-artifacts.js +307 -0
- package/dist/core/context/context-artifacts.js.map +1 -0
- package/dist/core/context/context-audit.d.ts +66 -0
- package/dist/core/context/context-audit.d.ts.map +1 -0
- package/dist/core/context/context-audit.js +173 -0
- package/dist/core/context/context-audit.js.map +1 -0
- package/dist/core/context/context-item.d.ts +117 -0
- package/dist/core/context/context-item.d.ts.map +1 -0
- package/dist/core/context/context-item.js +36 -0
- package/dist/core/context/context-item.js.map +1 -0
- package/dist/core/context/context-prompt-enforcement.d.ts +73 -0
- package/dist/core/context/context-prompt-enforcement.d.ts.map +1 -0
- package/dist/core/context/context-prompt-enforcement.js +153 -0
- package/dist/core/context/context-prompt-enforcement.js.map +1 -0
- package/dist/core/context/context-prompt-policy.d.ts +90 -0
- package/dist/core/context/context-prompt-policy.d.ts.map +1 -0
- package/dist/core/context/context-prompt-policy.js +73 -0
- package/dist/core/context/context-prompt-policy.js.map +1 -0
- package/dist/core/context/context-retention.d.ts +36 -0
- package/dist/core/context/context-retention.d.ts.map +1 -0
- package/dist/core/context/context-retention.js +108 -0
- package/dist/core/context/context-retention.js.map +1 -0
- package/dist/core/context/context-store.d.ts +37 -0
- package/dist/core/context/context-store.d.ts.map +1 -0
- package/dist/core/context/context-store.js +45 -0
- package/dist/core/context/context-store.js.map +1 -0
- package/dist/core/context/memory-diagnostics.d.ts +50 -0
- package/dist/core/context/memory-diagnostics.d.ts.map +1 -0
- package/dist/core/context/memory-diagnostics.js +43 -0
- package/dist/core/context/memory-diagnostics.js.map +1 -0
- package/dist/core/context/memory-index-store.d.ts +28 -0
- package/dist/core/context/memory-index-store.d.ts.map +1 -0
- package/dist/core/context/memory-index-store.js +38 -0
- package/dist/core/context/memory-index-store.js.map +1 -0
- package/dist/core/context/memory-prompt-block.d.ts +34 -0
- package/dist/core/context/memory-prompt-block.d.ts.map +1 -0
- package/dist/core/context/memory-prompt-block.js +58 -0
- package/dist/core/context/memory-prompt-block.js.map +1 -0
- package/dist/core/context/memory-provider-contract.d.ts +114 -0
- package/dist/core/context/memory-provider-contract.d.ts.map +1 -0
- package/dist/core/context/memory-provider-contract.js +121 -0
- package/dist/core/context/memory-provider-contract.js.map +1 -0
- package/dist/core/context/memory-retrieval.d.ts +27 -0
- package/dist/core/context/memory-retrieval.d.ts.map +1 -0
- package/dist/core/context/memory-retrieval.js +91 -0
- package/dist/core/context/memory-retrieval.js.map +1 -0
- package/dist/core/context/okf-memory-provider.d.ts +26 -0
- package/dist/core/context/okf-memory-provider.d.ts.map +1 -0
- package/dist/core/context/okf-memory-provider.js +154 -0
- package/dist/core/context/okf-memory-provider.js.map +1 -0
- package/dist/core/context/okf-memory.d.ts +42 -0
- package/dist/core/context/okf-memory.d.ts.map +1 -0
- package/dist/core/context/okf-memory.js +175 -0
- package/dist/core/context/okf-memory.js.map +1 -0
- package/dist/core/context/policy-engine.d.ts +66 -0
- package/dist/core/context/policy-engine.d.ts.map +1 -0
- package/dist/core/context/policy-engine.js +171 -0
- package/dist/core/context/policy-engine.js.map +1 -0
- package/dist/core/context/policy-types.d.ts +102 -0
- package/dist/core/context/policy-types.d.ts.map +1 -0
- package/dist/core/context/policy-types.js +7 -0
- package/dist/core/context/policy-types.js.map +1 -0
- package/dist/core/context/sqlite-runtime-index.d.ts +19 -0
- package/dist/core/context/sqlite-runtime-index.d.ts.map +1 -0
- package/dist/core/context/sqlite-runtime-index.js +344 -0
- package/dist/core/context/sqlite-runtime-index.js.map +1 -0
- package/dist/core/context/storage-authority.d.ts +20 -0
- package/dist/core/context/storage-authority.d.ts.map +1 -0
- package/dist/core/context/storage-authority.js +51 -0
- package/dist/core/context/storage-authority.js.map +1 -0
- package/dist/core/context/tool-output-packer.d.ts +75 -0
- package/dist/core/context/tool-output-packer.d.ts.map +1 -0
- package/dist/core/context/tool-output-packer.js +77 -0
- package/dist/core/context/tool-output-packer.js.map +1 -0
- package/dist/core/cost/session-usage.d.ts +20 -0
- package/dist/core/cost/session-usage.d.ts.map +1 -0
- package/dist/core/cost/session-usage.js +164 -0
- package/dist/core/cost/session-usage.js.map +1 -0
- package/dist/core/delegation/session-worker-result.d.ts +10 -0
- package/dist/core/delegation/session-worker-result.d.ts.map +1 -0
- package/dist/core/delegation/session-worker-result.js +36 -0
- package/dist/core/delegation/session-worker-result.js.map +1 -0
- package/dist/core/delegation/worker-result.d.ts +9 -0
- package/dist/core/delegation/worker-result.d.ts.map +1 -0
- package/dist/core/delegation/worker-result.js +152 -0
- package/dist/core/delegation/worker-result.js.map +1 -0
- package/dist/core/delegation/worker-runner.d.ts +58 -0
- package/dist/core/delegation/worker-runner.d.ts.map +1 -0
- package/dist/core/delegation/worker-runner.js +188 -0
- package/dist/core/delegation/worker-runner.js.map +1 -0
- package/dist/core/extensions/builtin.d.ts +5 -1
- package/dist/core/extensions/builtin.d.ts.map +1 -1
- package/dist/core/extensions/builtin.js +23 -1
- package/dist/core/extensions/builtin.js.map +1 -1
- package/dist/core/footer-data-provider.d.ts +5 -1
- package/dist/core/footer-data-provider.d.ts.map +1 -1
- package/dist/core/footer-data-provider.js +13 -0
- package/dist/core/footer-data-provider.js.map +1 -1
- package/dist/core/goals/goal-continuation-controller.d.ts +22 -0
- package/dist/core/goals/goal-continuation-controller.d.ts.map +1 -0
- package/dist/core/goals/goal-continuation-controller.js +88 -0
- package/dist/core/goals/goal-continuation-controller.js.map +1 -0
- package/dist/core/goals/goal-continuation-defaults.d.ts +10 -0
- package/dist/core/goals/goal-continuation-defaults.d.ts.map +1 -0
- package/dist/core/goals/goal-continuation-defaults.js +10 -0
- package/dist/core/goals/goal-continuation-defaults.js.map +1 -0
- package/dist/core/goals/goal-continuation-prompt.d.ts +18 -0
- package/dist/core/goals/goal-continuation-prompt.d.ts.map +1 -0
- package/dist/core/goals/goal-continuation-prompt.js +141 -0
- package/dist/core/goals/goal-continuation-prompt.js.map +1 -0
- package/dist/core/goals/goal-runtime-snapshot.d.ts +19 -0
- package/dist/core/goals/goal-runtime-snapshot.d.ts.map +1 -0
- package/dist/core/goals/goal-runtime-snapshot.js +23 -0
- package/dist/core/goals/goal-runtime-snapshot.js.map +1 -0
- package/dist/core/goals/goal-state.d.ts +87 -0
- package/dist/core/goals/goal-state.d.ts.map +1 -0
- package/dist/core/goals/goal-state.js +259 -0
- package/dist/core/goals/goal-state.js.map +1 -0
- package/dist/core/goals/goal-tool-core.d.ts +66 -0
- package/dist/core/goals/goal-tool-core.d.ts.map +1 -0
- package/dist/core/goals/goal-tool-core.js +146 -0
- package/dist/core/goals/goal-tool-core.js.map +1 -0
- package/dist/core/goals/session-goal-state.d.ts +10 -0
- package/dist/core/goals/session-goal-state.d.ts.map +1 -0
- package/dist/core/goals/session-goal-state.js +35 -0
- package/dist/core/goals/session-goal-state.js.map +1 -0
- package/dist/core/learning/learning-audit.d.ts +45 -0
- package/dist/core/learning/learning-audit.d.ts.map +1 -0
- package/dist/core/learning/learning-audit.js +139 -0
- package/dist/core/learning/learning-audit.js.map +1 -0
- package/dist/core/learning/learning-gate.d.ts +29 -0
- package/dist/core/learning/learning-gate.d.ts.map +1 -0
- package/dist/core/learning/learning-gate.js +150 -0
- package/dist/core/learning/learning-gate.js.map +1 -0
- package/dist/core/learning/session-learning-decision.d.ts +10 -0
- package/dist/core/learning/session-learning-decision.d.ts.map +1 -0
- package/dist/core/learning/session-learning-decision.js +36 -0
- package/dist/core/learning/session-learning-decision.js.map +1 -0
- package/dist/core/model-capability.d.ts +41 -0
- package/dist/core/model-capability.d.ts.map +1 -0
- package/dist/core/model-capability.js +101 -0
- package/dist/core/model-capability.js.map +1 -0
- package/dist/core/model-router/config-diagnostics.d.ts.map +1 -1
- package/dist/core/model-router/config-diagnostics.js +1 -0
- package/dist/core/model-router/config-diagnostics.js.map +1 -1
- package/dist/core/model-router/intent-classifier.d.ts +2 -0
- package/dist/core/model-router/intent-classifier.d.ts.map +1 -1
- package/dist/core/model-router/intent-classifier.js +154 -9
- package/dist/core/model-router/intent-classifier.js.map +1 -1
- package/dist/core/model-router/route-judge.d.ts +54 -0
- package/dist/core/model-router/route-judge.d.ts.map +1 -0
- package/dist/core/model-router/route-judge.js +128 -0
- package/dist/core/model-router/route-judge.js.map +1 -0
- package/dist/core/model-router/status.d.ts +4 -1
- package/dist/core/model-router/status.d.ts.map +1 -1
- package/dist/core/model-router/status.js +30 -6
- package/dist/core/model-router/status.js.map +1 -1
- package/dist/core/model-router/tool-escalation.d.ts +4 -6
- package/dist/core/model-router/tool-escalation.d.ts.map +1 -1
- package/dist/core/model-router/tool-escalation.js +1 -1
- package/dist/core/model-router/tool-escalation.js.map +1 -1
- package/dist/core/models/fitness-store.d.ts +40 -0
- package/dist/core/models/fitness-store.d.ts.map +1 -0
- package/dist/core/models/fitness-store.js +61 -0
- package/dist/core/models/fitness-store.js.map +1 -0
- package/dist/core/profile-registry.d.ts.map +1 -1
- package/dist/core/profile-registry.js +1 -1
- package/dist/core/profile-registry.js.map +1 -1
- package/dist/core/prompt-templates.d.ts +2 -0
- package/dist/core/prompt-templates.d.ts.map +1 -1
- package/dist/core/prompt-templates.js +12 -4
- package/dist/core/prompt-templates.js.map +1 -1
- package/dist/core/research/automata-provider.d.ts +5 -0
- package/dist/core/research/automata-provider.d.ts.map +1 -0
- package/dist/core/research/automata-provider.js +15 -0
- package/dist/core/research/automata-provider.js.map +1 -0
- package/dist/core/research/evidence-bundle.d.ts +10 -0
- package/dist/core/research/evidence-bundle.d.ts.map +1 -0
- package/dist/core/research/evidence-bundle.js +116 -0
- package/dist/core/research/evidence-bundle.js.map +1 -0
- package/dist/core/research/model-fitness.d.ts +79 -0
- package/dist/core/research/model-fitness.d.ts.map +1 -0
- package/dist/core/research/model-fitness.js +257 -0
- package/dist/core/research/model-fitness.js.map +1 -0
- package/dist/core/research/research-gate.d.ts +11 -0
- package/dist/core/research/research-gate.d.ts.map +1 -0
- package/dist/core/research/research-gate.js +82 -0
- package/dist/core/research/research-gate.js.map +1 -0
- package/dist/core/research/research-runner.d.ts +59 -0
- package/dist/core/research/research-runner.d.ts.map +1 -0
- package/dist/core/research/research-runner.js +155 -0
- package/dist/core/research/research-runner.js.map +1 -0
- package/dist/core/research/session-evidence-bundle.d.ts +11 -0
- package/dist/core/research/session-evidence-bundle.d.ts.map +1 -0
- package/dist/core/research/session-evidence-bundle.js +55 -0
- package/dist/core/research/session-evidence-bundle.js.map +1 -0
- package/dist/core/resource-loader.d.ts.map +1 -1
- package/dist/core/resource-loader.js +4 -0
- package/dist/core/resource-loader.js.map +1 -1
- package/dist/core/settings-manager.d.ts +147 -4
- package/dist/core/settings-manager.d.ts.map +1 -1
- package/dist/core/settings-manager.js +285 -9
- package/dist/core/settings-manager.js.map +1 -1
- package/dist/core/skills.d.ts +4 -0
- package/dist/core/skills.d.ts.map +1 -1
- package/dist/core/skills.js +18 -6
- package/dist/core/skills.js.map +1 -1
- package/dist/core/slash-commands.d.ts.map +1 -1
- package/dist/core/slash-commands.js +4 -0
- package/dist/core/slash-commands.js.map +1 -1
- package/dist/core/toolkit/script-registry.d.ts +34 -0
- package/dist/core/toolkit/script-registry.d.ts.map +1 -0
- package/dist/core/toolkit/script-registry.js +71 -0
- package/dist/core/toolkit/script-registry.js.map +1 -0
- package/dist/core/toolkit/script-runner.d.ts +28 -0
- package/dist/core/toolkit/script-runner.d.ts.map +1 -0
- package/dist/core/toolkit/script-runner.js +48 -0
- package/dist/core/toolkit/script-runner.js.map +1 -0
- package/dist/core/tools/artifact-retrieve.d.ts +23 -0
- package/dist/core/tools/artifact-retrieve.d.ts.map +1 -0
- package/dist/core/tools/artifact-retrieve.js +110 -0
- package/dist/core/tools/artifact-retrieve.js.map +1 -0
- package/dist/core/tools/delegate.d.ts +32 -0
- package/dist/core/tools/delegate.d.ts.map +1 -0
- package/dist/core/tools/delegate.js +60 -0
- package/dist/core/tools/delegate.js.map +1 -0
- package/dist/core/tools/fff-search-backend.d.ts +103 -0
- package/dist/core/tools/fff-search-backend.d.ts.map +1 -0
- package/dist/core/tools/fff-search-backend.js +151 -0
- package/dist/core/tools/fff-search-backend.js.map +1 -0
- package/dist/core/tools/find.d.ts +21 -1
- package/dist/core/tools/find.d.ts.map +1 -1
- package/dist/core/tools/find.js +183 -10
- package/dist/core/tools/find.js.map +1 -1
- package/dist/core/tools/goal.d.ts +35 -0
- package/dist/core/tools/goal.d.ts.map +1 -0
- package/dist/core/tools/goal.js +122 -0
- package/dist/core/tools/goal.js.map +1 -0
- package/dist/core/tools/grep.d.ts +21 -1
- package/dist/core/tools/grep.d.ts.map +1 -1
- package/dist/core/tools/grep.js +272 -27
- package/dist/core/tools/grep.js.map +1 -1
- package/dist/core/tools/index.d.ts +4 -1
- package/dist/core/tools/index.d.ts.map +1 -1
- package/dist/core/tools/index.js +9 -0
- package/dist/core/tools/index.js.map +1 -1
- package/dist/core/tools/model-fitness.d.ts +30 -0
- package/dist/core/tools/model-fitness.d.ts.map +1 -0
- package/dist/core/tools/model-fitness.js +38 -0
- package/dist/core/tools/model-fitness.js.map +1 -0
- package/dist/core/tools/run-toolkit-script.d.ts +24 -0
- package/dist/core/tools/run-toolkit-script.d.ts.map +1 -0
- package/dist/core/tools/run-toolkit-script.js +103 -0
- package/dist/core/tools/run-toolkit-script.js.map +1 -0
- package/dist/core/tools/search-router.d.ts +75 -0
- package/dist/core/tools/search-router.d.ts.map +1 -0
- package/dist/core/tools/search-router.js +85 -0
- package/dist/core/tools/search-router.js.map +1 -0
- package/dist/modes/interactive/components/footer.d.ts.map +1 -1
- package/dist/modes/interactive/components/footer.js +18 -16
- package/dist/modes/interactive/components/footer.js.map +1 -1
- package/dist/modes/interactive/components/settings-selector.d.ts +13 -1
- package/dist/modes/interactive/components/settings-selector.d.ts.map +1 -1
- package/dist/modes/interactive/components/settings-selector.js +471 -11
- package/dist/modes/interactive/components/settings-selector.js.map +1 -1
- package/dist/modes/interactive/interactive-mode.d.ts +4 -0
- package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
- package/dist/modes/interactive/interactive-mode.js +217 -39
- package/dist/modes/interactive/interactive-mode.js.map +1 -1
- package/dist/utils/tools-manager.d.ts +2 -0
- package/dist/utils/tools-manager.d.ts.map +1 -1
- package/dist/utils/tools-manager.js +154 -2
- package/dist/utils/tools-manager.js.map +1 -1
- package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
- package/examples/extensions/custom-provider-anthropic/package.json +1 -1
- package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
- package/examples/extensions/sandbox/package-lock.json +2 -2
- package/examples/extensions/sandbox/package.json +1 -1
- package/examples/extensions/with-deps/package-lock.json +2 -2
- package/examples/extensions/with-deps/package.json +1 -1
- package/npm-shrinkwrap.json +368 -12
- package/package.json +5 -4
|
@@ -21,12 +21,27 @@ import { stripFrontmatter } from "../utils/frontmatter.js";
|
|
|
21
21
|
import { resolvePath } from "../utils/paths.js";
|
|
22
22
|
import { sleep } from "../utils/sleep.js";
|
|
23
23
|
import { formatNoApiKeyFoundMessage, formatNoModelSelectedMessage } from "./auth-guidance.js";
|
|
24
|
+
import { evaluateToolGate } from "./autonomy/gates.js";
|
|
25
|
+
import { LaneTracker } from "./autonomy/lane-tracker.js";
|
|
26
|
+
import { appendLaneRecordSnapshot, getLaneRecordSnapshots } from "./autonomy/session-lane-record.js";
|
|
27
|
+
import { composeSubagentSystemPrompt } from "./autonomy/subagent-prompt.js";
|
|
24
28
|
import { executeBashWithOperations } from "./bash-executor.js";
|
|
25
29
|
import { calculateContextTokens, collectEntriesForBranchSummary, compact, estimateContextTokens, generateBranchSummary, prepareCompaction, shouldCompact, } from "./compaction/index.js";
|
|
30
|
+
import { createFileArtifactStore } from "./context/context-artifacts.js";
|
|
31
|
+
import { runContextAudit } from "./context/context-audit.js";
|
|
32
|
+
import { enforcePromptPolicy } from "./context/context-prompt-enforcement.js";
|
|
33
|
+
import { correlateWithContextGc, planPromptPolicy, } from "./context/context-prompt-policy.js";
|
|
34
|
+
import { defaultMemoryPromptInclusionReport, sanitizeMemoryRetrievalReportForDiagnostics, } from "./context/memory-diagnostics.js";
|
|
35
|
+
import { buildMemoryPromptBlock } from "./context/memory-prompt-block.js";
|
|
36
|
+
import { DEFAULT_LOCAL_MEMORY_EGRESS_POLICY, } from "./context/memory-provider-contract.js";
|
|
37
|
+
import { retrieveMemoryForContext } from "./context/memory-retrieval.js";
|
|
38
|
+
import { createOkfMemoryProvider } from "./context/okf-memory-provider.js";
|
|
26
39
|
import { applyContextGc } from "./context-gc.js";
|
|
27
40
|
import { aggregateDailyUsageFromSessionFiles, aggregateDailyUsageFromSessionRoot, formatDailyUsageBreakdown, getLocalDayWindow, } from "./cost/daily-usage.js";
|
|
28
41
|
import { downgradeReasoning, estimateTurnCostUsd, evaluateCostGuard } from "./cost-guard.js";
|
|
29
42
|
import { DEFAULT_THINKING_LEVEL } from "./defaults.js";
|
|
43
|
+
import { appendWorkerResultSnapshot, getWorkerResultSnapshots } from "./delegation/session-worker-result.js";
|
|
44
|
+
import { runWorker } from "./delegation/worker-runner.js";
|
|
30
45
|
import { exportSessionToHtml } from "./export-html/index.js";
|
|
31
46
|
import { createToolHtmlRenderer } from "./export-html/tool-renderer.js";
|
|
32
47
|
import { createCoreDiagnosticsToolDefinitions } from "./extensions/builtin.js";
|
|
@@ -34,7 +49,13 @@ import { ExtensionRunner, wrapRegisteredTools, } from "./extensions/index.js";
|
|
|
34
49
|
import { disposeExtensionEventSubscriptions } from "./extensions/loader.js";
|
|
35
50
|
import { emitSessionShutdownEvent } from "./extensions/runner.js";
|
|
36
51
|
import { GatewayRegistry } from "./gateways/channel-provider.js";
|
|
52
|
+
import { buildGoalContinuationPrompt, } from "./goals/goal-continuation-prompt.js";
|
|
53
|
+
import { buildGoalRuntimeSnapshot, } from "./goals/goal-runtime-snapshot.js";
|
|
54
|
+
import { appendGoalStateSnapshot, getLatestGoalStateSnapshot } from "./goals/session-goal-state.js";
|
|
55
|
+
import { appendLearningAuditSnapshot, getLearningAuditSnapshots, proposalFromReflectionWrite, rollbackPlanForReflectionWrite, } from "./learning/learning-audit.js";
|
|
56
|
+
import { evaluateLearningDecision } from "./learning/learning-gate.js";
|
|
37
57
|
import { decideDemand, ReflectionEngine, } from "./learning/reflection-engine.js";
|
|
58
|
+
import { appendLearningDecisionSnapshot, getLearningDecisionSnapshots } from "./learning/session-learning-decision.js";
|
|
38
59
|
import { isPromotedFrontmatter, SkillCurator } from "./learning/skill-curator.js";
|
|
39
60
|
import { EffectivenessTracker } from "./memory/effectiveness-tracker.js";
|
|
40
61
|
import { MemoryManager } from "./memory/memory-manager.js";
|
|
@@ -42,21 +63,32 @@ import { FileStoreProvider } from "./memory/providers/file-store.js";
|
|
|
42
63
|
import { TranscriptRecallProvider } from "./memory/providers/transcript-recall.js";
|
|
43
64
|
import { compactToolResultDetailsForRetention } from "./message-retention.js";
|
|
44
65
|
import { createCustomMessage } from "./messages.js";
|
|
66
|
+
import { deriveModelCapabilityProfile, filterToolNamesForCapability, } from "./model-capability.js";
|
|
45
67
|
import { resolveCliModel, resolveProfileModelSettings } from "./model-resolver.js";
|
|
46
68
|
import { collectModelRouterConfigDiagnostics } from "./model-router/config-diagnostics.js";
|
|
47
|
-
import {
|
|
69
|
+
import { classifyModelRouterRoute } from "./model-router/intent-classifier.js";
|
|
70
|
+
import { ROUTE_JUDGE_MAX_OUTPUT_TOKENS, runRouteJudge } from "./model-router/route-judge.js";
|
|
48
71
|
import { bufferModelRouterSessionCustomMessage, bufferModelRouterSessionMessage, createModelRouterSessionBuffer, flushModelRouterSessionBuffer, } from "./model-router/session-buffer.js";
|
|
49
72
|
import { formatModelRouterStatus, getRecentModelRouterDecisions, MODEL_ROUTER_DECISION_CUSTOM_TYPE, } from "./model-router/status.js";
|
|
50
73
|
import { shouldEscalateModelRouterTool } from "./model-router/tool-escalation.js";
|
|
74
|
+
import { FitnessStore } from "./models/fitness-store.js";
|
|
51
75
|
import { expandPromptTemplate } from "./prompt-templates.js";
|
|
76
|
+
import { runModelFitnessProbe } from "./research/model-fitness.js";
|
|
77
|
+
import { runResearch } from "./research/research-runner.js";
|
|
78
|
+
import { appendEvidenceBundleSnapshot, getEvidenceBundleSnapshots, getLatestEvidenceBundleSnapshot, } from "./research/session-evidence-bundle.js";
|
|
52
79
|
import { stripResourceProfileBlocks } from "./resource-profile-blocks.js";
|
|
53
80
|
import { classifyToolTrust, UNTRUSTED_BOUNDARY_SYSTEM_RULE, wrapUntrustedText } from "./security/untrusted-boundary.js";
|
|
54
81
|
import { CURRENT_SESSION_VERSION, getLatestCompactionEntry } from "./session-manager.js";
|
|
55
82
|
import { matchesResourceProfilePattern, } from "./settings-manager.js";
|
|
56
83
|
import { createSyntheticSourceInfo } from "./source-info.js";
|
|
57
84
|
import { buildSystemPrompt } from "./system-prompt.js";
|
|
85
|
+
import { executeToolkitScript } from "./toolkit/script-runner.js";
|
|
58
86
|
import { createLocalBashOperations } from "./tools/bash.js";
|
|
87
|
+
import { createDelegateToolDefinition } from "./tools/delegate.js";
|
|
88
|
+
import { createGoalToolDefinition } from "./tools/goal.js";
|
|
59
89
|
import { createAllToolDefinitions } from "./tools/index.js";
|
|
90
|
+
import { createModelFitnessToolDefinition } from "./tools/model-fitness.js";
|
|
91
|
+
import { createRunToolkitScriptToolDefinition } from "./tools/run-toolkit-script.js";
|
|
60
92
|
import { createToolDefinitionFromAgentTool } from "./tools/tool-definition-wrapper.js";
|
|
61
93
|
/**
|
|
62
94
|
* Parse a skill block from message text.
|
|
@@ -89,10 +121,46 @@ function formatModelRouterModel(model) {
|
|
|
89
121
|
function persistModelRouterDecision(sessionManager, decision) {
|
|
90
122
|
sessionManager.appendCustomEntry(MODEL_ROUTER_DECISION_CUSTOM_TYPE, decision);
|
|
91
123
|
}
|
|
124
|
+
/** Read a packed grep/find tool result's `details.artifactId`, if present, without `any`. */
|
|
125
|
+
function extractArtifactId(message) {
|
|
126
|
+
if (!message || message.role !== "toolResult")
|
|
127
|
+
return undefined;
|
|
128
|
+
const details = message.details;
|
|
129
|
+
if (typeof details !== "object" || details === null)
|
|
130
|
+
return undefined;
|
|
131
|
+
const artifactId = details.artifactId;
|
|
132
|
+
return typeof artifactId === "string" ? artifactId : undefined;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Text of the most recent user message, or "" if there is none (e.g. goal-continuation
|
|
136
|
+
* turns with no new user input). An empty query degrades to zero memory-retrieval results
|
|
137
|
+
* by construction (see memory-provider-contract.ts's score-on-empty-query-tokens rule) --
|
|
138
|
+
* no special-casing needed here beyond returning "".
|
|
139
|
+
*/
|
|
140
|
+
function latestUserMessageText(messages) {
|
|
141
|
+
for (let index = messages.length - 1; index >= 0; index--) {
|
|
142
|
+
const message = messages[index];
|
|
143
|
+
if (message.role !== "user")
|
|
144
|
+
continue;
|
|
145
|
+
if (typeof message.content === "string")
|
|
146
|
+
return message.content;
|
|
147
|
+
const parts = [];
|
|
148
|
+
for (const part of message.content) {
|
|
149
|
+
if (part.type === "text")
|
|
150
|
+
parts.push(part.text);
|
|
151
|
+
}
|
|
152
|
+
return parts.join("\n");
|
|
153
|
+
}
|
|
154
|
+
return "";
|
|
155
|
+
}
|
|
156
|
+
function emptyMemoryRetrievalReport(maxResults) {
|
|
157
|
+
return { request: { query: "", maxResults }, providerReports: [], results: [], contextItems: [] };
|
|
158
|
+
}
|
|
92
159
|
export class AgentSession {
|
|
93
160
|
agent;
|
|
94
161
|
sessionManager;
|
|
95
162
|
settingsManager;
|
|
163
|
+
capabilityEnvelope;
|
|
96
164
|
_scopedModels;
|
|
97
165
|
// Event subscription state
|
|
98
166
|
_unsubscribeAgent;
|
|
@@ -108,11 +176,42 @@ export class AgentSession {
|
|
|
108
176
|
_pendingNextTurnMessages = [];
|
|
109
177
|
/** Serializes prompt() submissions made while streaming so queued steering/follow-ups keep user-typed FIFO order. */
|
|
110
178
|
_streamingPromptSubmissionTail = Promise.resolve();
|
|
179
|
+
/** Pending idle timer that starts bounded goal continuation after the session becomes idle. */
|
|
180
|
+
_goalAutoContinueTimer;
|
|
181
|
+
/** Guards bounded idle autosteer so continuation prompts do not recursively trigger themselves. */
|
|
182
|
+
_isGoalAutoContinuing = false;
|
|
183
|
+
/** Pending idle timer that starts an autonomous research pass after the session becomes idle. */
|
|
184
|
+
_researchLaneTimer;
|
|
185
|
+
/** Single-flight guard: at most one research pass runs at a time per session. */
|
|
186
|
+
_isResearchLaneRunning = false;
|
|
187
|
+
/** Why the last idle research-lane evaluation skipped, for /autonomy diagnostics. */
|
|
188
|
+
_lastResearchLaneSkipReason;
|
|
189
|
+
/** Live lane registry — the real source for AutonomyStatusSnapshot.activeLaneCount. */
|
|
190
|
+
_laneTracker = new LaneTracker();
|
|
191
|
+
/** Session-lifetime abort for in-flight research passes (same pattern as _reflectionAbort). */
|
|
192
|
+
_researchLaneAbort = new AbortController();
|
|
193
|
+
/** Single-flight guard: at most one delegated worker runs at a time per session. */
|
|
194
|
+
_isWorkerDelegationRunning = false;
|
|
195
|
+
/** Session-lifetime abort for in-flight delegated workers. */
|
|
196
|
+
_workerDelegationAbort = new AbortController();
|
|
197
|
+
/**
|
|
198
|
+
* The last tool set requested via setActiveToolsByName BEFORE model-capability filtering, so
|
|
199
|
+
* switching from a small-window model back to a large one restores the full requested set.
|
|
200
|
+
*/
|
|
201
|
+
_requestedActiveToolNames;
|
|
111
202
|
// Compaction/context hygiene state
|
|
112
203
|
_compactionAbortController = undefined;
|
|
113
204
|
_autoCompactionAbortController = undefined;
|
|
114
205
|
_overflowRecoveryAttempted = false;
|
|
115
206
|
_latestContextGcReport = undefined;
|
|
207
|
+
_toolArtifactStore = undefined;
|
|
208
|
+
_latestContextAuditReport = undefined;
|
|
209
|
+
_latestPromptPolicyReport = undefined;
|
|
210
|
+
_latestPromptPolicyGcCorrelation = undefined;
|
|
211
|
+
_latestPromptEnforcementReport = undefined;
|
|
212
|
+
_memoryOkfProvider = undefined;
|
|
213
|
+
_latestMemoryRetrievalReport = undefined;
|
|
214
|
+
_latestMemoryPromptInclusionReport = undefined;
|
|
116
215
|
// Branch summarization state
|
|
117
216
|
_branchSummaryAbortController = undefined;
|
|
118
217
|
// Retry state
|
|
@@ -151,10 +250,12 @@ export class AgentSession {
|
|
|
151
250
|
_costGuardDowngraded = false;
|
|
152
251
|
/** Active model-router intent for the current transient routed turn, if any. */
|
|
153
252
|
_activeModelRouterIntent;
|
|
253
|
+
_activeModelRouterRoute;
|
|
154
254
|
_modelRouterSessionBuffer;
|
|
155
255
|
_modelRouterEscalationRequested = false;
|
|
156
256
|
_isModelRouterRetry = false;
|
|
157
257
|
_lastModelRouterDecision;
|
|
258
|
+
_lastAutonomyGateOutcome;
|
|
158
259
|
_lastModelRouterSkipReason;
|
|
159
260
|
_lastModelRouterIntent;
|
|
160
261
|
/** Lazily-built skill curator (#32) over `<agentDir>/skills`. */
|
|
@@ -317,7 +418,18 @@ export class AgentSession {
|
|
|
317
418
|
if (this._extensionRunner.hasHandlers("context")) {
|
|
318
419
|
finalMessages = await this._extensionRunner.emitContext(currentMessages);
|
|
319
420
|
}
|
|
320
|
-
const
|
|
421
|
+
const auditReport = this._runContextAudit(finalMessages);
|
|
422
|
+
const shadowReport = this._runPromptPolicyPlanning(auditReport);
|
|
423
|
+
const memoryReport = await this._runMemoryRetrieval(finalMessages);
|
|
424
|
+
const gcResult = this._applyContextGc(finalMessages, true);
|
|
425
|
+
this._correlatePromptPolicyWithContextGc(gcResult.report);
|
|
426
|
+
const enforcementResult = this._runPromptEnforcement(gcResult.messages, shadowReport);
|
|
427
|
+
// Appended LAST, after gc and enforcement, so the bounded evidence block is
|
|
428
|
+
// never packed/stubbed/reshaped by either pass and always reflects this turn's
|
|
429
|
+
// fresh retrieval. Because nothing downstream trims it, memory-prompt-block.ts's
|
|
430
|
+
// character caps are the only budget protection for this block -- load-bearing,
|
|
431
|
+
// not merely defensive.
|
|
432
|
+
const gcMessages = this._maybeAppendMemoryEvidenceBlock(enforcementResult.messages, memoryReport);
|
|
321
433
|
this._applyCostGuard(gcMessages);
|
|
322
434
|
return gcMessages;
|
|
323
435
|
};
|
|
@@ -422,15 +534,366 @@ export class AgentSession {
|
|
|
422
534
|
_contextGcStorageDir() {
|
|
423
535
|
return join(this._agentDir, "context-gc", this.sessionManager.getSessionId());
|
|
424
536
|
}
|
|
537
|
+
_toolArtifactsDir() {
|
|
538
|
+
return join(this._agentDir, "context-artifacts", this.sessionManager.getSessionId());
|
|
539
|
+
}
|
|
540
|
+
/**
|
|
541
|
+
* Session-scoped, filesystem-backed artifact store for first-capture-then-bound tool
|
|
542
|
+
* output (grep/find only, for now -- see tool-output-artifacts.md). Lazily created and
|
|
543
|
+
* cached so every tool construction in this session shares one store instance.
|
|
544
|
+
*
|
|
545
|
+
* `packToolOutput()` registers a reference (the packing tool call's id) at pack time
|
|
546
|
+
* and fails closed, so packed artifacts are never prematurely collected.
|
|
547
|
+
* `_releaseGcPackedArtifactReferences()` (called from `_applyContextGc()`) releases
|
|
548
|
+
* that reference once context-gc packs the result out of live context, and
|
|
549
|
+
* opportunistically reclaims now-unreferenced artifacts via `cleanup()`.
|
|
550
|
+
* Remaining carry-forward gap: cleanup() now also runs at dispose(), but only reclaims
|
|
551
|
+
* already-released (zero-reference) artifacts. A session that ends before context-gc
|
|
552
|
+
* ever evicts a result never releases that reference, so its artifact stays on disk by
|
|
553
|
+
* design (resolvable on resume). Reclaiming those requires an explicit cross-session
|
|
554
|
+
* expiry/liveness policy, not just a sweep.
|
|
555
|
+
*/
|
|
556
|
+
_getToolArtifactStore() {
|
|
557
|
+
this._toolArtifactStore ??= createFileArtifactStore({ baseDir: this._toolArtifactsDir() });
|
|
558
|
+
return this._toolArtifactStore;
|
|
559
|
+
}
|
|
560
|
+
/**
|
|
561
|
+
* Fixed path for this slice's local Pi OKF memory documents, shared across sessions
|
|
562
|
+
* under this agentDir (not session-scoped, unlike tool-artifacts/context-gc, since OKF
|
|
563
|
+
* memory represents durable cross-session knowledge, not a per-session capture). Not
|
|
564
|
+
* yet user-configurable -- see the memory-retrieval settings doc comment.
|
|
565
|
+
*/
|
|
566
|
+
_memoryOkfDir() {
|
|
567
|
+
return join(this._agentDir, "okf-memory");
|
|
568
|
+
}
|
|
569
|
+
/**
|
|
570
|
+
* Session-scoped, read-only local OKF memory provider. Lazily created ONLY when memory
|
|
571
|
+
* retrieval is enabled (see `_runMemoryRetrieval`) -- never force-created, so a session
|
|
572
|
+
* with the setting off never touches `_memoryOkfDir()` at all (no directory access, no
|
|
573
|
+
* creation; `createOkfMemoryProvider` itself never writes/mkdirs either way).
|
|
574
|
+
*/
|
|
575
|
+
_getMemoryOkfProvider() {
|
|
576
|
+
this._memoryOkfProvider ??= createOkfMemoryProvider({ rootDir: this._memoryOkfDir() });
|
|
577
|
+
return this._memoryOkfProvider;
|
|
578
|
+
}
|
|
579
|
+
/**
|
|
580
|
+
* One pass over the current branch, mapping each toolResult's toolCallId to its
|
|
581
|
+
* persisted session-entry id. Rebuilt every audit pass (O(branch) per turn), so this is
|
|
582
|
+
* O(n^2) over a long session. Fine at current scale; after the artifact-read fix this is
|
|
583
|
+
* the next per-turn audit cost to optimize if it ever matters (e.g. cache/incrementally
|
|
584
|
+
* update instead of a full rebuild).
|
|
585
|
+
*/
|
|
586
|
+
_buildSessionEntryIdLookup() {
|
|
587
|
+
const map = new Map();
|
|
588
|
+
for (const entry of this.sessionManager.getBranch()) {
|
|
589
|
+
if (entry.type === "message" && entry.message.role === "toolResult") {
|
|
590
|
+
map.set(entry.message.toolCallId, entry.id);
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
return (toolCallId) => map.get(toolCallId);
|
|
594
|
+
}
|
|
595
|
+
/**
|
|
596
|
+
* Phase 1 observe-only audit pass (see context/context-audit.ts): converts live
|
|
597
|
+
* toolResult messages into ContextItems and runs the existing retention/hard-constraint
|
|
598
|
+
* evaluators over them, storing the latest deterministic report for tests/debugging.
|
|
599
|
+
* Read-only with respect to messages, the transcript, and artifact references -- uses
|
|
600
|
+
* `_toolArtifactStore` (the field), not `_getToolArtifactStore()` (the getter), so a
|
|
601
|
+
* session that never packed anything doesn't force-create a store/dir just to audit.
|
|
602
|
+
* Never throws into a live turn: any failure degrades to an empty report.
|
|
603
|
+
*/
|
|
604
|
+
_runContextAudit(messages) {
|
|
605
|
+
try {
|
|
606
|
+
const report = runContextAudit(messages, {
|
|
607
|
+
turnIndex: this._turnIndex,
|
|
608
|
+
artifactStore: this._toolArtifactStore,
|
|
609
|
+
sessionEntryIdForToolCallId: this._buildSessionEntryIdLookup(),
|
|
610
|
+
});
|
|
611
|
+
this._latestContextAuditReport = report;
|
|
612
|
+
return report;
|
|
613
|
+
}
|
|
614
|
+
catch {
|
|
615
|
+
const report = { turnIndex: this._turnIndex, items: [] };
|
|
616
|
+
this._latestContextAuditReport = report;
|
|
617
|
+
return report;
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
/**
|
|
621
|
+
* Read-only inspection of the context audit. With `messages`, recomputes fresh against
|
|
622
|
+
* the given array (still no mutation of messages/transcript/artifact refs); without,
|
|
623
|
+
* returns the last report computed during a real transform pass.
|
|
624
|
+
*/
|
|
625
|
+
getContextAuditReport(messages) {
|
|
626
|
+
if (messages)
|
|
627
|
+
return this._runContextAudit(messages);
|
|
628
|
+
return this._latestContextAuditReport ?? { turnIndex: this._turnIndex, items: [] };
|
|
629
|
+
}
|
|
630
|
+
/**
|
|
631
|
+
* Observe-first shadow/planning pass (see context/context-prompt-policy.ts): re-shapes
|
|
632
|
+
* the audit report into a per-item policy plan whose `appliedAction` is always
|
|
633
|
+
* "keep_raw" -- this never enforces anything, it only records what the policy engine
|
|
634
|
+
* would say. Never throws into a live turn: any failure degrades to an empty report.
|
|
635
|
+
*/
|
|
636
|
+
_runPromptPolicyPlanning(auditReport) {
|
|
637
|
+
try {
|
|
638
|
+
const report = planPromptPolicy(auditReport);
|
|
639
|
+
this._latestPromptPolicyReport = report;
|
|
640
|
+
return report;
|
|
641
|
+
}
|
|
642
|
+
catch {
|
|
643
|
+
const report = { turnIndex: this._turnIndex, items: [] };
|
|
644
|
+
this._latestPromptPolicyReport = report;
|
|
645
|
+
return report;
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
/**
|
|
649
|
+
* Read-only inspection of the shadow policy plan. With `messages`, recomputes fresh
|
|
650
|
+
* (audit + plan) against the given array; without, returns the last plan computed
|
|
651
|
+
* during a real transform pass. Never mutates messages/transcript/artifact refs.
|
|
652
|
+
*/
|
|
653
|
+
getPromptPolicyReport(messages) {
|
|
654
|
+
if (messages)
|
|
655
|
+
return this._runPromptPolicyPlanning(this._runContextAudit(messages));
|
|
656
|
+
return this._latestPromptPolicyReport ?? { turnIndex: this._turnIndex, items: [] };
|
|
657
|
+
}
|
|
658
|
+
/**
|
|
659
|
+
* Report-only correlation between the shadow plan just computed this turn and what the
|
|
660
|
+
* legacy context-gc pass actually packed. Runs after `_applyContextGc()` has already
|
|
661
|
+
* produced its report; never influences context-gc itself. Never throws into a live
|
|
662
|
+
* turn: any failure degrades to an empty correlation.
|
|
663
|
+
*/
|
|
664
|
+
_correlatePromptPolicyWithContextGc(gcReport) {
|
|
665
|
+
const shadowReport = this._latestPromptPolicyReport ?? { turnIndex: this._turnIndex, items: [] };
|
|
666
|
+
try {
|
|
667
|
+
this._latestPromptPolicyGcCorrelation = correlateWithContextGc(shadowReport, gcReport);
|
|
668
|
+
}
|
|
669
|
+
catch {
|
|
670
|
+
this._latestPromptPolicyGcCorrelation = { turnIndex: this._turnIndex, entries: [] };
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
/** Read-only inspection of the latest shadow-plan/legacy-gc correlation, for tests/debugging. */
|
|
674
|
+
getPromptPolicyGcCorrelation() {
|
|
675
|
+
return this._latestPromptPolicyGcCorrelation ?? { turnIndex: this._turnIndex, entries: [] };
|
|
676
|
+
}
|
|
677
|
+
/**
|
|
678
|
+
* First enforcement pilot (see context/context-prompt-enforcement.ts): opt-in,
|
|
679
|
+
* default-disabled stub-in-place of stale artifact-backed tool_output results in the
|
|
680
|
+
* provider-visible message array only. Runs on `messages` AFTER context-gc has already
|
|
681
|
+
* produced its own result, so legacy context-gc's own packing/reporting is completely
|
|
682
|
+
* unaffected by this pass -- it only ever acts on messages gc left untouched this turn.
|
|
683
|
+
* Never throws into a live turn: any failure degrades to returning `messages` unchanged.
|
|
684
|
+
*/
|
|
685
|
+
_runPromptEnforcement(messages, shadowReport) {
|
|
686
|
+
try {
|
|
687
|
+
const persistedSettings = this.settingsManager.getContextPromptEnforcementSettings();
|
|
688
|
+
const settings = {
|
|
689
|
+
...persistedSettings,
|
|
690
|
+
// Runtime fact, never assumed: artifact_retrieve is a companion affordance
|
|
691
|
+
// (auto-activated alongside grep/find), not a default/global tool, so active
|
|
692
|
+
// tools can differ turn to turn -- see context-prompt-enforcement.ts's doc
|
|
693
|
+
// comment on why this is checked separately from hasAvailableRetrievalPath.
|
|
694
|
+
retrievalToolAvailable: this.getActiveToolNames().includes("artifact_retrieve"),
|
|
695
|
+
};
|
|
696
|
+
const result = enforcePromptPolicy(messages, shadowReport, settings);
|
|
697
|
+
this._latestPromptEnforcementReport = result.report;
|
|
698
|
+
return result;
|
|
699
|
+
}
|
|
700
|
+
catch {
|
|
701
|
+
const report = { turnIndex: this._turnIndex, items: [] };
|
|
702
|
+
this._latestPromptEnforcementReport = report;
|
|
703
|
+
return { messages, report };
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
/** Read-only inspection of the latest prompt-enforcement report, for tests/debugging. */
|
|
707
|
+
getPromptEnforcementReport() {
|
|
708
|
+
return this._latestPromptEnforcementReport ?? { turnIndex: this._turnIndex, items: [] };
|
|
709
|
+
}
|
|
710
|
+
/**
|
|
711
|
+
* Observe-only local memory retrieval (see context/memory-retrieval.ts and
|
|
712
|
+
* context/okf-memory-provider.ts): default disabled, opt-in setting. When disabled,
|
|
713
|
+
* never constructs the OKF provider (no directory access under `_memoryOkfDir()` at
|
|
714
|
+
* all) and returns an empty report -- fully fail-closed. When enabled, queries the
|
|
715
|
+
* local, read-only OKF provider with the latest user message text (empty if there is
|
|
716
|
+
* none, e.g. a goal-continuation turn -- degrades to zero results by construction, see
|
|
717
|
+
* `latestUserMessageText`'s doc comment) under `DEFAULT_LOCAL_MEMORY_EGRESS_POLICY`.
|
|
718
|
+
* Retrieved items are only ever stored in the report; nothing here touches `messages`,
|
|
719
|
+
* the transcript, or the provider-visible prompt. Never throws into a live turn: any
|
|
720
|
+
* failure (including a provider search error) degrades to an empty report.
|
|
721
|
+
*/
|
|
722
|
+
async _runMemoryRetrieval(messages) {
|
|
723
|
+
try {
|
|
724
|
+
const settings = this.settingsManager.getMemoryRetrievalSettings();
|
|
725
|
+
if (!settings.enabled) {
|
|
726
|
+
const report = emptyMemoryRetrievalReport(settings.maxResults);
|
|
727
|
+
this._latestMemoryRetrievalReport = report;
|
|
728
|
+
return report;
|
|
729
|
+
}
|
|
730
|
+
const report = await retrieveMemoryForContext([this._getMemoryOkfProvider()], { query: latestUserMessageText(messages), maxResults: settings.maxResults }, {
|
|
731
|
+
createdAtTurn: this._turnIndex,
|
|
732
|
+
maxResults: settings.maxResults,
|
|
733
|
+
defaultLocalPolicy: DEFAULT_LOCAL_MEMORY_EGRESS_POLICY,
|
|
734
|
+
});
|
|
735
|
+
this._latestMemoryRetrievalReport = report;
|
|
736
|
+
return report;
|
|
737
|
+
}
|
|
738
|
+
catch {
|
|
739
|
+
const report = emptyMemoryRetrievalReport(0);
|
|
740
|
+
this._latestMemoryRetrievalReport = report;
|
|
741
|
+
return report;
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
/** Read-only inspection of the latest memory-retrieval report, for tests/debugging. */
|
|
745
|
+
getMemoryRetrievalReport() {
|
|
746
|
+
return this._latestMemoryRetrievalReport ?? emptyMemoryRetrievalReport(0);
|
|
747
|
+
}
|
|
748
|
+
/**
|
|
749
|
+
* Bounded prompt-surfacing pilot for local memory evidence (see
|
|
750
|
+
* context/memory-prompt-block.ts): opt-in, default disabled, and gated on TWO settings
|
|
751
|
+
* (`enabled` AND `includeInPrompt`) plus a non-empty `report.contextItems` -- the first
|
|
752
|
+
* two are belt-and-suspenders on top of the fact that `_runMemoryRetrieval` already
|
|
753
|
+
* leaves `contextItems` empty whenever `enabled` is false, regardless of
|
|
754
|
+
* `includeInPrompt`. Reuses the `report` this pass's `_runMemoryRetrieval` call already
|
|
755
|
+
* computed -- never re-queries the provider here.
|
|
756
|
+
*
|
|
757
|
+
* Appends exactly one ephemeral `custom`/"memory_evidence" message wrapped by
|
|
758
|
+
* `wrapUntrustedText` (the same nonce-fenced boundary + always-on system-prompt rule
|
|
759
|
+
* used for other untrusted content) to the END of `messages`. This is purely additive
|
|
760
|
+
* (never mutates an existing message) and purely transient: `messages` here is the
|
|
761
|
+
* array about to be sent to the provider, not `this.agent.state.messages` or anything
|
|
762
|
+
* persisted via `sessionManager` -- so the injected message can never reach the
|
|
763
|
+
* transcript, regardless of how many times this pass runs.
|
|
764
|
+
*
|
|
765
|
+
* Also records a `MemoryPromptInclusionReport` (context/memory-diagnostics.ts) at each
|
|
766
|
+
* branch below, for context_audit's diagnostic surface only -- this is pure bookkeeping
|
|
767
|
+
* alongside the existing branches, not a new branch/condition: the messages returned
|
|
768
|
+
* are unchanged by this recording.
|
|
769
|
+
*/
|
|
770
|
+
_maybeAppendMemoryEvidenceBlock(messages, report) {
|
|
771
|
+
try {
|
|
772
|
+
const settings = this.settingsManager.getMemoryRetrievalSettings();
|
|
773
|
+
const base = {
|
|
774
|
+
enabled: settings.enabled,
|
|
775
|
+
includeInPrompt: settings.includeInPrompt,
|
|
776
|
+
selectedItemCount: report.contextItems.length,
|
|
777
|
+
};
|
|
778
|
+
if (!settings.enabled) {
|
|
779
|
+
this._latestMemoryPromptInclusionReport = {
|
|
780
|
+
...base,
|
|
781
|
+
status: "disabled",
|
|
782
|
+
includedCount: 0,
|
|
783
|
+
omittedCount: 0,
|
|
784
|
+
blockChars: 0,
|
|
785
|
+
};
|
|
786
|
+
return messages;
|
|
787
|
+
}
|
|
788
|
+
if (!settings.includeInPrompt) {
|
|
789
|
+
this._latestMemoryPromptInclusionReport = {
|
|
790
|
+
...base,
|
|
791
|
+
status: "include_disabled",
|
|
792
|
+
includedCount: 0,
|
|
793
|
+
omittedCount: 0,
|
|
794
|
+
blockChars: 0,
|
|
795
|
+
};
|
|
796
|
+
return messages;
|
|
797
|
+
}
|
|
798
|
+
if (report.contextItems.length === 0) {
|
|
799
|
+
this._latestMemoryPromptInclusionReport = {
|
|
800
|
+
...base,
|
|
801
|
+
status: "no_results",
|
|
802
|
+
includedCount: 0,
|
|
803
|
+
omittedCount: 0,
|
|
804
|
+
blockChars: 0,
|
|
805
|
+
};
|
|
806
|
+
return messages;
|
|
807
|
+
}
|
|
808
|
+
const block = buildMemoryPromptBlock(report.contextItems);
|
|
809
|
+
if (!block.text) {
|
|
810
|
+
this._latestMemoryPromptInclusionReport = {
|
|
811
|
+
...base,
|
|
812
|
+
status: "empty_block",
|
|
813
|
+
includedCount: block.includedCount,
|
|
814
|
+
omittedCount: block.omittedCount,
|
|
815
|
+
blockChars: 0,
|
|
816
|
+
};
|
|
817
|
+
return messages;
|
|
818
|
+
}
|
|
819
|
+
const wrapped = wrapUntrustedText(block.text, "memory:pi-okf");
|
|
820
|
+
const evidenceMessage = {
|
|
821
|
+
role: "custom",
|
|
822
|
+
customType: "memory_evidence",
|
|
823
|
+
content: [{ type: "text", text: wrapped }],
|
|
824
|
+
display: false,
|
|
825
|
+
timestamp: Date.now(),
|
|
826
|
+
};
|
|
827
|
+
this._latestMemoryPromptInclusionReport = {
|
|
828
|
+
...base,
|
|
829
|
+
status: "included",
|
|
830
|
+
includedCount: block.includedCount,
|
|
831
|
+
omittedCount: block.omittedCount,
|
|
832
|
+
blockChars: wrapped.length,
|
|
833
|
+
sourceLabel: "memory:pi-okf",
|
|
834
|
+
};
|
|
835
|
+
return [...messages, evidenceMessage];
|
|
836
|
+
}
|
|
837
|
+
catch {
|
|
838
|
+
// `base` may not exist yet if the throw happened before it was computed (e.g.
|
|
839
|
+
// settings access or `report.contextItems` itself threw), so this branch cannot
|
|
840
|
+
// rely on it -- fall back to safe, fixed defaults rather than risk referencing
|
|
841
|
+
// a partially-evaluated value.
|
|
842
|
+
this._latestMemoryPromptInclusionReport = {
|
|
843
|
+
enabled: false,
|
|
844
|
+
includeInPrompt: false,
|
|
845
|
+
selectedItemCount: 0,
|
|
846
|
+
status: "failed",
|
|
847
|
+
includedCount: 0,
|
|
848
|
+
omittedCount: 0,
|
|
849
|
+
blockChars: 0,
|
|
850
|
+
};
|
|
851
|
+
return messages;
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
/** Read-only inspection of the latest memory-prompt-inclusion decision, for tests/debugging and context_audit. */
|
|
855
|
+
getMemoryPromptInclusionReport() {
|
|
856
|
+
return this._latestMemoryPromptInclusionReport ?? defaultMemoryPromptInclusionReport();
|
|
857
|
+
}
|
|
858
|
+
/**
|
|
859
|
+
* Combines the already-stored, no-arg latest reports (never re-queries the provider or
|
|
860
|
+
* touches the OKF directory) into the safe, allow-list-projected shape context_audit
|
|
861
|
+
* exposes. See context/memory-diagnostics.ts for why this projection is allow-list
|
|
862
|
+
* based rather than a spread-then-delete of the raw report.
|
|
863
|
+
*/
|
|
864
|
+
_getMemoryAuditDiagnostics() {
|
|
865
|
+
const settings = this.settingsManager.getMemoryRetrievalSettings();
|
|
866
|
+
return {
|
|
867
|
+
retrieval: sanitizeMemoryRetrievalReportForDiagnostics(this.getMemoryRetrievalReport(), settings),
|
|
868
|
+
promptInclusion: this.getMemoryPromptInclusionReport(),
|
|
869
|
+
};
|
|
870
|
+
}
|
|
425
871
|
_applyContextGc(messages, writePayloads) {
|
|
426
872
|
try {
|
|
873
|
+
const settings = this.settingsManager.getContextGcSettings();
|
|
874
|
+
// Merge the ACTIVE memory providers' own page markers (e.g. transcript-recall's
|
|
875
|
+
// "<memory_context") into the semantic-memory marker list. The settings default is
|
|
876
|
+
// provider-agnostic and non-empty, so without this merge the recall pages the bundled
|
|
877
|
+
// default provider actually emits are never recognized as semantic-memory pages and
|
|
878
|
+
// accumulate raw for the life of the session — the exact growth Bug #7 GC exists to stop.
|
|
879
|
+
const providerMarkers = this._memoryManager.getContextMarkers();
|
|
427
880
|
const result = applyContextGc(messages, {
|
|
428
|
-
...
|
|
881
|
+
...settings,
|
|
882
|
+
semanticMemory: {
|
|
883
|
+
...settings.semanticMemory,
|
|
884
|
+
markers: [...new Set([...settings.semanticMemory.markers, ...providerMarkers])],
|
|
885
|
+
},
|
|
429
886
|
cwd: this._cwd,
|
|
430
887
|
storageDir: this._contextGcStorageDir(),
|
|
431
888
|
writePayloads,
|
|
432
889
|
});
|
|
433
890
|
this._latestContextGcReport = result.report;
|
|
891
|
+
// Only release/reclaim on the real per-turn pass (writePayloads=true), never on
|
|
892
|
+
// the read-only status-report path (getContextGcReport with writePayloads=false),
|
|
893
|
+
// so merely inspecting the report can't have side effects.
|
|
894
|
+
if (writePayloads && result.report.packedCount > 0) {
|
|
895
|
+
this._releaseGcPackedArtifactReferences(messages, result.report);
|
|
896
|
+
}
|
|
434
897
|
return result;
|
|
435
898
|
}
|
|
436
899
|
catch {
|
|
@@ -446,6 +909,39 @@ export class AgentSession {
|
|
|
446
909
|
return { messages, report };
|
|
447
910
|
}
|
|
448
911
|
}
|
|
912
|
+
/**
|
|
913
|
+
* Reference-release + cleanup lifecycle: once context-gc has packed a grep/find tool
|
|
914
|
+
* result out of the live prompt (the message is no longer current/active working
|
|
915
|
+
* context -- see contracts-and-retention.md's "ephemeral"/"expired" retention
|
|
916
|
+
* classes), release the pack-time reference `packToolOutput()` registered for it, and
|
|
917
|
+
* opportunistically reclaim now-unreferenced artifacts. This is the other half of the
|
|
918
|
+
* D2b-1 gate: artifacts were being registered but never released, so they accumulated
|
|
919
|
+
* for the life of the session.
|
|
920
|
+
*
|
|
921
|
+
* `record.toolCallId` (from context-gc's packed record) is exactly the holder id
|
|
922
|
+
* `packToolOutput()` used when it called `addReference()` -- both trace back to the
|
|
923
|
+
* same tool call's id -- so no separate bookkeeping is needed to find it.
|
|
924
|
+
*/
|
|
925
|
+
_releaseGcPackedArtifactReferences(messages, report) {
|
|
926
|
+
const store = this._toolArtifactStore;
|
|
927
|
+
if (!store)
|
|
928
|
+
return; // no store was ever constructed, so nothing could have been packed to one
|
|
929
|
+
let releasedAny = false;
|
|
930
|
+
for (const record of report.records) {
|
|
931
|
+
if (record.toolName !== "grep" && record.toolName !== "find")
|
|
932
|
+
continue;
|
|
933
|
+
const artifactId = extractArtifactId(messages[record.messageIndex]);
|
|
934
|
+
if (!artifactId)
|
|
935
|
+
continue;
|
|
936
|
+
if (store.removeReference(artifactId, record.toolCallId))
|
|
937
|
+
releasedAny = true;
|
|
938
|
+
}
|
|
939
|
+
// Cleanup only runs immediately after a release actually happened in this pass, so
|
|
940
|
+
// a long session doesn't re-scan the artifact directory on every turn once nothing
|
|
941
|
+
// new became eligible for release.
|
|
942
|
+
if (releasedAny)
|
|
943
|
+
store.cleanup();
|
|
944
|
+
}
|
|
449
945
|
getContextGcReport(messages) {
|
|
450
946
|
if (messages)
|
|
451
947
|
return this._applyContextGc(messages, false).report;
|
|
@@ -477,8 +973,8 @@ export class AgentSession {
|
|
|
477
973
|
}
|
|
478
974
|
_installAgentToolHooks() {
|
|
479
975
|
this.agent.beforeToolCall = async ({ toolCall, args }) => {
|
|
480
|
-
if (this.
|
|
481
|
-
shouldEscalateModelRouterTool({
|
|
976
|
+
if (this._activeModelRouterRoute &&
|
|
977
|
+
shouldEscalateModelRouterTool({ tier: this._activeModelRouterRoute.tier, toolName: toolCall.name, args })) {
|
|
482
978
|
this._modelRouterEscalationRequested = true;
|
|
483
979
|
this.agent.abort();
|
|
484
980
|
return {
|
|
@@ -486,6 +982,22 @@ export class AgentSession {
|
|
|
486
982
|
reason: "Model router escalation required: a cheap research turn attempted a mutating tool. Retry the turn on the configured expensive model.",
|
|
487
983
|
};
|
|
488
984
|
}
|
|
985
|
+
// Autonomy tool gating
|
|
986
|
+
const gateResult = evaluateToolGate({
|
|
987
|
+
toolName: toolCall.name,
|
|
988
|
+
args,
|
|
989
|
+
cwd: this._cwd,
|
|
990
|
+
envelope: this.capabilityEnvelope,
|
|
991
|
+
});
|
|
992
|
+
if (this.capabilityEnvelope) {
|
|
993
|
+
this._lastAutonomyGateOutcome = gateResult;
|
|
994
|
+
}
|
|
995
|
+
if (gateResult.outcome === "block" || gateResult.outcome === "ask-user") {
|
|
996
|
+
return {
|
|
997
|
+
block: true,
|
|
998
|
+
reason: `Tool execution blocked by autonomy gate [${gateResult.gate}]: ${gateResult.message} (${gateResult.reasonCode})`,
|
|
999
|
+
};
|
|
1000
|
+
}
|
|
489
1001
|
const runner = this._extensionRunner;
|
|
490
1002
|
if (!runner.hasHandlers("tool_call")) {
|
|
491
1003
|
return undefined;
|
|
@@ -835,6 +1347,8 @@ export class AgentSession {
|
|
|
835
1347
|
*/
|
|
836
1348
|
dispose() {
|
|
837
1349
|
try {
|
|
1350
|
+
this._clearGoalAutoContinueTimer();
|
|
1351
|
+
this._clearResearchLaneTimer();
|
|
838
1352
|
this.abortRetry();
|
|
839
1353
|
this.abortCompaction();
|
|
840
1354
|
this.abortBranchSummary();
|
|
@@ -846,6 +1360,10 @@ export class AgentSession {
|
|
|
846
1360
|
// write memory/skills against this now-disposed session.
|
|
847
1361
|
this._disposed = true;
|
|
848
1362
|
this._reflectionAbort.abort();
|
|
1363
|
+
// Abort any in-flight research pass or delegated worker for the same reason: a disposed
|
|
1364
|
+
// session must not keep spending tokens or persist evidence against dead state.
|
|
1365
|
+
this._researchLaneAbort.abort();
|
|
1366
|
+
this._workerDelegationAbort.abort();
|
|
849
1367
|
// Bug #20: clear the hooks this session installed on the shared agent so their closures stop
|
|
850
1368
|
// pinning this (deactivated) session — and all its history/maps — in memory if the agent
|
|
851
1369
|
// instance outlives the session.
|
|
@@ -862,6 +1380,19 @@ export class AgentSession {
|
|
|
862
1380
|
// true session-end hook (P3); file-store shutdown is a no-op.
|
|
863
1381
|
void this._memoryManager.shutdownAll().catch(() => { });
|
|
864
1382
|
cleanupSessionResources(this.sessionId);
|
|
1383
|
+
// Best-effort final sweep for any grep/find artifact already released (reference
|
|
1384
|
+
// count zero) but not yet reclaimed -- e.g. a release whose cleanup() call failed
|
|
1385
|
+
// transiently. This is conservative: it never releases a still-referenced
|
|
1386
|
+
// artifact, so a session that ends before context-gc ever evicts a result (too
|
|
1387
|
+
// short to cross preserveRecentMessages) correctly leaves that artifact in place,
|
|
1388
|
+
// resolvable if the same session is resumed later. It does not sweep OTHER
|
|
1389
|
+
// sessions' artifact directories.
|
|
1390
|
+
try {
|
|
1391
|
+
this._toolArtifactStore?.cleanup();
|
|
1392
|
+
}
|
|
1393
|
+
catch {
|
|
1394
|
+
// Best-effort; dispose must succeed regardless.
|
|
1395
|
+
}
|
|
865
1396
|
}
|
|
866
1397
|
// =========================================================================
|
|
867
1398
|
// Read-only State Access
|
|
@@ -917,16 +1448,41 @@ export class AgentSession {
|
|
|
917
1448
|
* Only tools in the registry can be enabled. Unknown tool names are ignored.
|
|
918
1449
|
* Also rebuilds the system prompt to reflect the new tool set.
|
|
919
1450
|
* Changes take effect on the next agent turn.
|
|
1451
|
+
*
|
|
1452
|
+
* artifact_retrieve is auto-activated as a companion whenever grep or find ends up
|
|
1453
|
+
* in the resulting active set and artifact_retrieve is registered (i.e. not excluded/
|
|
1454
|
+
* blocked/outside an allowlist -- the registry itself is built with that same filter,
|
|
1455
|
+
* so registry presence already tracks "allowed"). This is enforced here, not just in
|
|
1456
|
+
* the settings/profile refresh flow, because this method is a public, extension-
|
|
1457
|
+
* exposed activation path (`setActiveTools`) on its own: without this, grep/find could
|
|
1458
|
+
* end up active while still being handed an artifact store (gated on "allowed" in
|
|
1459
|
+
* `_buildRuntime`) with no active tool able to resolve the resulting
|
|
1460
|
+
* "Full output: artifact tool-output:<id>" handle.
|
|
920
1461
|
*/
|
|
921
1462
|
setActiveToolsByName(toolNames) {
|
|
1463
|
+
// Model capability: small-window models get a reduced tool surface derived from the model's
|
|
1464
|
+
// own metadata. The unfiltered request is remembered so a later switch to a larger model
|
|
1465
|
+
// restores it (the filter is re-applied on every model change).
|
|
1466
|
+
this._requestedActiveToolNames = [...toolNames];
|
|
1467
|
+
const capabilityFiltered = filterToolNamesForCapability(toolNames, this.getModelCapabilityProfile());
|
|
922
1468
|
const tools = [];
|
|
923
1469
|
const validToolNames = [];
|
|
924
|
-
|
|
1470
|
+
const seen = new Set();
|
|
1471
|
+
const addIfRegistered = (name) => {
|
|
1472
|
+
if (seen.has(name))
|
|
1473
|
+
return;
|
|
925
1474
|
const tool = this._toolRegistry.get(name);
|
|
926
|
-
if (tool)
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
1475
|
+
if (!tool)
|
|
1476
|
+
return;
|
|
1477
|
+
seen.add(name);
|
|
1478
|
+
tools.push(tool);
|
|
1479
|
+
validToolNames.push(name);
|
|
1480
|
+
};
|
|
1481
|
+
for (const name of capabilityFiltered) {
|
|
1482
|
+
addIfRegistered(name);
|
|
1483
|
+
}
|
|
1484
|
+
if (validToolNames.includes("grep") || validToolNames.includes("find")) {
|
|
1485
|
+
addIfRegistered("artifact_retrieve");
|
|
930
1486
|
}
|
|
931
1487
|
this.agent.state.tools = tools;
|
|
932
1488
|
// Rebuild base system prompt with new tool set
|
|
@@ -1134,35 +1690,166 @@ export class AgentSession {
|
|
|
1134
1690
|
await this._drainQueuedExtensionCommands();
|
|
1135
1691
|
}
|
|
1136
1692
|
}
|
|
1137
|
-
|
|
1693
|
+
_isModelAvailableAndAuthed(pattern) {
|
|
1694
|
+
const resolved = resolveCliModel({ cliModel: pattern, modelRegistry: this._modelRegistry });
|
|
1695
|
+
if (!resolved.model)
|
|
1696
|
+
return false;
|
|
1697
|
+
return this._modelRegistry.hasConfiguredAuth(resolved.model);
|
|
1698
|
+
}
|
|
1699
|
+
_resolveModelRouterTurnRoute(prompt) {
|
|
1138
1700
|
const settings = this.settingsManager.getModelRouterSettings();
|
|
1139
|
-
const modelLabel = intent === "research" ? "cheap model" : "expensive model";
|
|
1140
1701
|
if (!settings.enabled) {
|
|
1141
1702
|
this._lastModelRouterSkipReason = "disabled";
|
|
1142
1703
|
return undefined;
|
|
1143
1704
|
}
|
|
1144
|
-
const
|
|
1705
|
+
const decision = classifyModelRouterRoute(prompt);
|
|
1706
|
+
this._lastModelRouterIntent = decision.tier === "cheap" ? "research" : "modify";
|
|
1707
|
+
// Learning tier must not be selected for normal user prompts
|
|
1708
|
+
if (decision.tier === "learning") {
|
|
1709
|
+
this._lastModelRouterSkipReason = "learning tier not supported for user prompts";
|
|
1710
|
+
return undefined;
|
|
1711
|
+
}
|
|
1712
|
+
const modelPattern = settings[decision.tier === "cheap" ? "cheapModel" : decision.tier === "medium" ? "mediumModel" : "expensiveModel"];
|
|
1713
|
+
const label = decision.tier === "cheap" ? "cheap model" : decision.tier === "medium" ? "medium model" : "expensive model";
|
|
1714
|
+
if (decision.tier === "medium" && (!modelPattern || !this._isModelAvailableAndAuthed(modelPattern))) {
|
|
1715
|
+
const expensivePattern = settings.expensiveModel;
|
|
1716
|
+
if (expensivePattern && this._isModelAvailableAndAuthed(expensivePattern)) {
|
|
1717
|
+
const resolvedExpensive = resolveCliModel({
|
|
1718
|
+
cliModel: expensivePattern,
|
|
1719
|
+
modelRegistry: this._modelRegistry,
|
|
1720
|
+
});
|
|
1721
|
+
if (resolvedExpensive.model) {
|
|
1722
|
+
decision.fallbackFrom = "medium";
|
|
1723
|
+
decision.tier = "expensive";
|
|
1724
|
+
decision.reasonCode = "medium_unavailable_fallback_expensive";
|
|
1725
|
+
decision.reasons = [...decision.reasons, "Medium model is unavailable, falling back to expensive model"];
|
|
1726
|
+
decision.model = formatModelRouterModel(resolvedExpensive.model);
|
|
1727
|
+
this._lastModelRouterSkipReason = undefined;
|
|
1728
|
+
return { decision, model: resolvedExpensive.model };
|
|
1729
|
+
}
|
|
1730
|
+
}
|
|
1731
|
+
this._lastModelRouterSkipReason = "medium model and expensive fallback are unavailable";
|
|
1732
|
+
return undefined;
|
|
1733
|
+
}
|
|
1145
1734
|
if (!modelPattern) {
|
|
1146
|
-
this._lastModelRouterSkipReason = `${
|
|
1735
|
+
this._lastModelRouterSkipReason = `${label} unset`;
|
|
1147
1736
|
return undefined;
|
|
1148
1737
|
}
|
|
1149
1738
|
const resolved = resolveCliModel({ cliModel: modelPattern, modelRegistry: this._modelRegistry });
|
|
1150
1739
|
if (!resolved.model) {
|
|
1151
|
-
this._lastModelRouterSkipReason = `${
|
|
1740
|
+
this._lastModelRouterSkipReason = `${label} unresolved: ${modelPattern}`;
|
|
1152
1741
|
return undefined;
|
|
1153
1742
|
}
|
|
1154
1743
|
const resolvedName = formatModelRouterModel(resolved.model);
|
|
1155
1744
|
if (!this._modelRegistry.hasConfiguredAuth(resolved.model)) {
|
|
1156
|
-
this._lastModelRouterSkipReason = `${
|
|
1745
|
+
this._lastModelRouterSkipReason = `${label} missing auth: ${resolvedName}`;
|
|
1157
1746
|
return undefined;
|
|
1158
1747
|
}
|
|
1159
1748
|
this._lastModelRouterSkipReason = undefined;
|
|
1749
|
+
decision.model = resolvedName;
|
|
1750
|
+
return { decision, model: resolved.model };
|
|
1751
|
+
}
|
|
1752
|
+
_resolveModelRouterModelForIntent(intent) {
|
|
1753
|
+
const settings = this.settingsManager.getModelRouterSettings();
|
|
1754
|
+
const modelPattern = intent === "research" ? settings.cheapModel : settings.expensiveModel;
|
|
1755
|
+
if (!modelPattern)
|
|
1756
|
+
return undefined;
|
|
1757
|
+
const resolved = resolveCliModel({ cliModel: modelPattern, modelRegistry: this._modelRegistry });
|
|
1758
|
+
if (!resolved.model)
|
|
1759
|
+
return undefined;
|
|
1760
|
+
if (!this._modelRegistry.hasConfiguredAuth(resolved.model))
|
|
1761
|
+
return undefined;
|
|
1762
|
+
return resolved.model;
|
|
1763
|
+
}
|
|
1764
|
+
_resolveConfiguredTierModel(tier) {
|
|
1765
|
+
const settings = this.settingsManager.getModelRouterSettings();
|
|
1766
|
+
const pattern = tier === "cheap" ? settings.cheapModel : tier === "medium" ? settings.mediumModel : settings.expensiveModel;
|
|
1767
|
+
if (!pattern)
|
|
1768
|
+
return undefined;
|
|
1769
|
+
const resolved = resolveCliModel({ cliModel: pattern, modelRegistry: this._modelRegistry });
|
|
1770
|
+
if (!resolved.model)
|
|
1771
|
+
return undefined;
|
|
1772
|
+
if (!this._modelRegistry.hasConfiguredAuth(resolved.model))
|
|
1773
|
+
return undefined;
|
|
1160
1774
|
return resolved.model;
|
|
1161
1775
|
}
|
|
1776
|
+
/**
|
|
1777
|
+
* Router resolution with the routing judge (auto-on with the router): the regex classifier's
|
|
1778
|
+
* decision is the baseline; when a judge model resolves (judgeModel, else mediumModel), one
|
|
1779
|
+
* bounded, tool-less completion may move the tier between cheap/medium/expensive — never to
|
|
1780
|
+
* learning. Core rule encoded in the judge prompt: planning is never cheap unless genuinely
|
|
1781
|
+
* trivial. Every fallback stays visible in the decision reasons, and judge spend reports
|
|
1782
|
+
* through spawned-usage accounting.
|
|
1783
|
+
*/
|
|
1784
|
+
async _resolveModelRouterTurnRouteJudged(prompt, options) {
|
|
1785
|
+
const baseline = this._resolveModelRouterTurnRoute(prompt);
|
|
1786
|
+
if (!baseline)
|
|
1787
|
+
return undefined;
|
|
1788
|
+
if (options?.skipJudge)
|
|
1789
|
+
return baseline;
|
|
1790
|
+
const settings = this.settingsManager.getModelRouterSettings();
|
|
1791
|
+
if (!settings.judgeEnabled)
|
|
1792
|
+
return baseline;
|
|
1793
|
+
const judgePattern = settings.judgeModel ?? settings.mediumModel;
|
|
1794
|
+
if (!judgePattern)
|
|
1795
|
+
return baseline;
|
|
1796
|
+
const judgeModel = this._resolveLaneModel(judgePattern);
|
|
1797
|
+
if (!judgeModel)
|
|
1798
|
+
return baseline;
|
|
1799
|
+
let spentUsage;
|
|
1800
|
+
const judged = await runRouteJudge({
|
|
1801
|
+
prompt,
|
|
1802
|
+
baseline: baseline.decision,
|
|
1803
|
+
signal: this._reflectionAbort.signal,
|
|
1804
|
+
complete: async ({ systemPrompt, userPrompt, signal }) => {
|
|
1805
|
+
const completion = await this.runIsolatedCompletion({
|
|
1806
|
+
systemPrompt,
|
|
1807
|
+
messages: [{ role: "user", content: [{ type: "text", text: userPrompt }], timestamp: Date.now() }],
|
|
1808
|
+
model: judgeModel,
|
|
1809
|
+
thinkingLevel: "off",
|
|
1810
|
+
maxTokens: ROUTE_JUDGE_MAX_OUTPUT_TOKENS,
|
|
1811
|
+
signal,
|
|
1812
|
+
// The judge system prompt is static — the provider can cache the prefix.
|
|
1813
|
+
cacheRetention: "short",
|
|
1814
|
+
});
|
|
1815
|
+
spentUsage = completion.usage;
|
|
1816
|
+
return {
|
|
1817
|
+
text: completion.text,
|
|
1818
|
+
costUsd: completion.usage.cost.total,
|
|
1819
|
+
stopReason: String(completion.stopReason),
|
|
1820
|
+
};
|
|
1821
|
+
},
|
|
1822
|
+
});
|
|
1823
|
+
if (spentUsage && (spentUsage.cost.total > 0 || spentUsage.totalTokens > 0)) {
|
|
1824
|
+
this.addSpawnedUsage(spentUsage, { label: "router-judge" });
|
|
1825
|
+
}
|
|
1826
|
+
if (!judged.verdict || judged.decision.tier === baseline.decision.tier) {
|
|
1827
|
+
// Same tier (or judge fell back): keep the baseline model, carry the annotated decision.
|
|
1828
|
+
return { decision: judged.decision, model: baseline.model };
|
|
1829
|
+
}
|
|
1830
|
+
const judgedTier = judged.decision.tier;
|
|
1831
|
+
if (judgedTier !== "cheap" && judgedTier !== "medium" && judgedTier !== "expensive") {
|
|
1832
|
+
return { decision: baseline.decision, model: baseline.model };
|
|
1833
|
+
}
|
|
1834
|
+
const judgedModel = this._resolveConfiguredTierModel(judgedTier);
|
|
1835
|
+
if (!judgedModel) {
|
|
1836
|
+
return {
|
|
1837
|
+
decision: {
|
|
1838
|
+
...baseline.decision,
|
|
1839
|
+
reasons: [
|
|
1840
|
+
...baseline.decision.reasons,
|
|
1841
|
+
`Route judge chose ${judgedTier} but no model resolves for that tier; baseline kept`,
|
|
1842
|
+
],
|
|
1843
|
+
},
|
|
1844
|
+
model: baseline.model,
|
|
1845
|
+
};
|
|
1846
|
+
}
|
|
1847
|
+
return { decision: { ...judged.decision, model: formatModelRouterModel(judgedModel) }, model: judgedModel };
|
|
1848
|
+
}
|
|
1849
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: test seam
|
|
1162
1850
|
_resolveModelRouterTurnModel(prompt) {
|
|
1163
|
-
const
|
|
1164
|
-
|
|
1165
|
-
return this._resolveModelRouterModelForIntent(intent);
|
|
1851
|
+
const resolved = this._resolveModelRouterTurnRoute(prompt);
|
|
1852
|
+
return resolved?.model;
|
|
1166
1853
|
}
|
|
1167
1854
|
getModelRouterStatus(formatLabel) {
|
|
1168
1855
|
const recentDecisions = getRecentModelRouterDecisions(this.sessionManager.getEntries());
|
|
@@ -1181,7 +1868,7 @@ export class AgentSession {
|
|
|
1181
1868
|
}
|
|
1182
1869
|
return lines.join("\n");
|
|
1183
1870
|
}
|
|
1184
|
-
async _runAgentPromptWithModelRouter(messages, routedModel,
|
|
1871
|
+
async _runAgentPromptWithModelRouter(messages, routedModel, routeDecision, persistDecision = true) {
|
|
1185
1872
|
if (!routedModel) {
|
|
1186
1873
|
await this._runAgentPrompt(messages);
|
|
1187
1874
|
return;
|
|
@@ -1189,23 +1876,30 @@ export class AgentSession {
|
|
|
1189
1876
|
const previousModel = this.agent.state.model;
|
|
1190
1877
|
const previousThinkingLevel = this.agent.state.thinkingLevel;
|
|
1191
1878
|
const previousActiveModelRouterIntent = this._activeModelRouterIntent;
|
|
1879
|
+
const previousActiveModelRouterRoute = this._activeModelRouterRoute;
|
|
1192
1880
|
const previousModelRouterSessionBuffer = this._modelRouterSessionBuffer;
|
|
1193
1881
|
const previousModelRouterEscalationRequested = this._modelRouterEscalationRequested;
|
|
1194
|
-
const bufferRoutedTurn =
|
|
1882
|
+
const bufferRoutedTurn = routeDecision?.tier === "cheap";
|
|
1195
1883
|
const originalHistoryLength = this.agent.state.messages.length;
|
|
1196
1884
|
let retryModel;
|
|
1197
|
-
let completedDecision =
|
|
1885
|
+
let completedDecision = routeDecision
|
|
1198
1886
|
? {
|
|
1199
|
-
|
|
1887
|
+
route: routeDecision,
|
|
1200
1888
|
routedModel: formatModelRouterModel(routedModel),
|
|
1201
1889
|
outcome: "routed",
|
|
1890
|
+
intent: routeDecision.tier === "cheap" ? "research" : "modify",
|
|
1202
1891
|
}
|
|
1203
1892
|
: undefined;
|
|
1204
1893
|
let thrownError;
|
|
1205
|
-
if (
|
|
1894
|
+
if (routeDecision) {
|
|
1206
1895
|
this._lastModelRouterDecision = completedDecision;
|
|
1207
1896
|
}
|
|
1208
|
-
this._activeModelRouterIntent =
|
|
1897
|
+
this._activeModelRouterIntent = routeDecision
|
|
1898
|
+
? routeDecision.tier === "cheap"
|
|
1899
|
+
? "research"
|
|
1900
|
+
: "modify"
|
|
1901
|
+
: undefined;
|
|
1902
|
+
this._activeModelRouterRoute = routeDecision;
|
|
1209
1903
|
if (bufferRoutedTurn) {
|
|
1210
1904
|
this._modelRouterSessionBuffer = createModelRouterSessionBuffer();
|
|
1211
1905
|
this._modelRouterEscalationRequested = false;
|
|
@@ -1220,10 +1914,11 @@ export class AgentSession {
|
|
|
1220
1914
|
this.agent.state.messages.splice(originalHistoryLength);
|
|
1221
1915
|
retryModel = this._resolveModelRouterModelForIntent("modify") ?? previousModel;
|
|
1222
1916
|
completedDecision = {
|
|
1223
|
-
|
|
1917
|
+
route: routeDecision,
|
|
1224
1918
|
routedModel: formatModelRouterModel(routedModel),
|
|
1225
1919
|
outcome: "escalated",
|
|
1226
1920
|
retryModel: formatModelRouterModel(retryModel),
|
|
1921
|
+
intent: routeDecision.tier === "cheap" ? "research" : "modify",
|
|
1227
1922
|
};
|
|
1228
1923
|
this._lastModelRouterDecision = completedDecision;
|
|
1229
1924
|
}
|
|
@@ -1243,9 +1938,18 @@ export class AgentSession {
|
|
|
1243
1938
|
}
|
|
1244
1939
|
}
|
|
1245
1940
|
finally {
|
|
1246
|
-
|
|
1247
|
-
|
|
1941
|
+
// Restore the pre-route model ONLY if the routed model is still in place: a command
|
|
1942
|
+
// handler may have legitimately changed the session model mid-turn (setModel or a
|
|
1943
|
+
// provider re-registration), and clobbering that would silently undo the change.
|
|
1944
|
+
if (modelsAreEqual(this.agent.state.model, routedModel)) {
|
|
1945
|
+
this.agent.state.model = previousModel;
|
|
1946
|
+
this.agent.state.thinkingLevel = previousThinkingLevel;
|
|
1947
|
+
// The registry may have changed mid-turn (command-time registerProvider): re-resolve
|
|
1948
|
+
// the restored model so a provider override is not dropped with the routed model.
|
|
1949
|
+
this._refreshCurrentModelFromRegistry();
|
|
1950
|
+
}
|
|
1248
1951
|
this._activeModelRouterIntent = previousActiveModelRouterIntent;
|
|
1952
|
+
this._activeModelRouterRoute = previousActiveModelRouterRoute;
|
|
1249
1953
|
this._modelRouterSessionBuffer = previousModelRouterSessionBuffer;
|
|
1250
1954
|
this._modelRouterEscalationRequested = previousModelRouterEscalationRequested;
|
|
1251
1955
|
}
|
|
@@ -1253,7 +1957,16 @@ export class AgentSession {
|
|
|
1253
1957
|
const previousIsModelRouterRetry = this._isModelRouterRetry;
|
|
1254
1958
|
try {
|
|
1255
1959
|
this._isModelRouterRetry = true;
|
|
1256
|
-
|
|
1960
|
+
const retryDecision = {
|
|
1961
|
+
tier: "expensive",
|
|
1962
|
+
risk: "high-impact",
|
|
1963
|
+
confidence: 1.0,
|
|
1964
|
+
reasonCode: "cheap_mutating_tool_escalation",
|
|
1965
|
+
reasons: ["Cheap research turn attempted a mutating tool and escalated"],
|
|
1966
|
+
fallbackFrom: "cheap",
|
|
1967
|
+
model: formatModelRouterModel(retryModel),
|
|
1968
|
+
};
|
|
1969
|
+
await this._runAgentPromptWithModelRouter(messages, retryModel, retryDecision, false);
|
|
1257
1970
|
this._lastModelRouterDecision = completedDecision;
|
|
1258
1971
|
}
|
|
1259
1972
|
catch (error) {
|
|
@@ -1309,6 +2022,9 @@ export class AgentSession {
|
|
|
1309
2022
|
* @throws Error if no model selected or no API key available (when not streaming)
|
|
1310
2023
|
*/
|
|
1311
2024
|
async prompt(text, options) {
|
|
2025
|
+
if (options?.autoContinueGoal !== false) {
|
|
2026
|
+
this._clearGoalAutoContinueTimer();
|
|
2027
|
+
}
|
|
1312
2028
|
if ((this.isStreaming || this.isRetrying) && options?.streamingBehavior) {
|
|
1313
2029
|
const run = this._streamingPromptSubmissionTail.then(() => this._promptUnserialized(text, options), () => this._promptUnserialized(text, options));
|
|
1314
2030
|
this._streamingPromptSubmissionTail = run.catch(() => { });
|
|
@@ -1338,7 +2054,7 @@ export class AgentSession {
|
|
|
1338
2054
|
const preflightResult = options?.preflightResult;
|
|
1339
2055
|
let messages;
|
|
1340
2056
|
let routedTurnModel;
|
|
1341
|
-
let
|
|
2057
|
+
let routedTurnRouteDecision;
|
|
1342
2058
|
// R4 effectiveness feedback: remember the recall page + the query so we can score, after the
|
|
1343
2059
|
// response, whether the agent actually used the recalled context.
|
|
1344
2060
|
let injectedRecall = "";
|
|
@@ -1401,8 +2117,13 @@ export class AgentSession {
|
|
|
1401
2117
|
}
|
|
1402
2118
|
// Flush any pending bash messages before the new prompt
|
|
1403
2119
|
this._flushPendingBashMessages();
|
|
1404
|
-
|
|
1405
|
-
|
|
2120
|
+
const resolvedRouteInfo = await this._resolveModelRouterTurnRouteJudged(expandedText, {
|
|
2121
|
+
// Internally generated turns (goal continuation, lane follow-ups) never consult the judge:
|
|
2122
|
+
// the regex floor already classified them, and a 20-turn loop must not buy 20 judge calls.
|
|
2123
|
+
skipJudge: options?.autoContinueGoal === false,
|
|
2124
|
+
});
|
|
2125
|
+
routedTurnModel = resolvedRouteInfo?.model;
|
|
2126
|
+
routedTurnRouteDecision = resolvedRouteInfo?.decision;
|
|
1406
2127
|
const requestModel = routedTurnModel ?? this.model;
|
|
1407
2128
|
// Validate model
|
|
1408
2129
|
if (!requestModel) {
|
|
@@ -1495,7 +2216,7 @@ export class AgentSession {
|
|
|
1495
2216
|
return;
|
|
1496
2217
|
}
|
|
1497
2218
|
preflightResult?.(true);
|
|
1498
|
-
await this._runAgentPromptWithModelRouter(messages, routedTurnModel,
|
|
2219
|
+
await this._runAgentPromptWithModelRouter(messages, routedTurnModel, routedTurnRouteDecision);
|
|
1499
2220
|
// R4: score whether the agent actually used the recalled context, so the recall gate can adapt.
|
|
1500
2221
|
if (injectedRecall) {
|
|
1501
2222
|
const response = this._findLastAssistantMessage();
|
|
@@ -1509,6 +2230,8 @@ export class AgentSession {
|
|
|
1509
2230
|
this._effectivenessTracker.recordRecallOutcome(injectedRecall, recallQuery, responseText);
|
|
1510
2231
|
}
|
|
1511
2232
|
}
|
|
2233
|
+
this._scheduleGoalAutoContinueFromIdle(options);
|
|
2234
|
+
this._scheduleResearchLaneFromIdle();
|
|
1512
2235
|
}
|
|
1513
2236
|
/**
|
|
1514
2237
|
* Try to execute an extension command. Returns true if command was found and executed.
|
|
@@ -1829,6 +2552,19 @@ export class AgentSession {
|
|
|
1829
2552
|
this.setThinkingLevel(thinkingLevel, { persistSettings });
|
|
1830
2553
|
await this._emitModelSelect(model, previousModel, "set");
|
|
1831
2554
|
this._checkContextWindowUsageWarning();
|
|
2555
|
+
// Re-derive the model-capability tool surface for the new model (restores the full requested
|
|
2556
|
+
// set when moving small -> large, reduces it when moving large -> small).
|
|
2557
|
+
if (this._requestedActiveToolNames) {
|
|
2558
|
+
const before = this.getActiveToolNames().join(",");
|
|
2559
|
+
this.setActiveToolsByName(this._requestedActiveToolNames);
|
|
2560
|
+
const capability = this.getModelCapabilityProfile();
|
|
2561
|
+
if (capability.class !== "full" && this.getActiveToolNames().join(",") !== before) {
|
|
2562
|
+
this._emit({
|
|
2563
|
+
type: "warning",
|
|
2564
|
+
message: `Small-context model detected (${capability.contextWindow ?? "unknown"} tokens, class '${capability.class}'): active tools reduced to [${this.getActiveToolNames().join(", ")}]; background lanes ${capability.backgroundLanesEnabled ? "enabled" : "disabled"}.`,
|
|
2565
|
+
});
|
|
2566
|
+
}
|
|
2567
|
+
}
|
|
1832
2568
|
}
|
|
1833
2569
|
/**
|
|
1834
2570
|
* Cycle to next/previous model.
|
|
@@ -2761,7 +3497,10 @@ export class AgentSession {
|
|
|
2761
3497
|
}
|
|
2762
3498
|
_refreshToolRegistry(options) {
|
|
2763
3499
|
const previousRegistryNames = new Set(this._toolRegistry.keys());
|
|
2764
|
-
|
|
3500
|
+
// Re-derive from the pre-filter REQUEST, never from agent.state.tools: the active set is
|
|
3501
|
+
// capability/profile-filtered, so feeding it back through setActiveToolsByName would
|
|
3502
|
+
// permanently shrink what a later switch to a larger model (or permissive profile) restores.
|
|
3503
|
+
const previousActiveToolNames = this._requestedActiveToolNames ?? this.getActiveToolNames();
|
|
2765
3504
|
const allowedToolNames = this._allowedToolNames;
|
|
2766
3505
|
const excludedToolNames = this._excludedToolNames;
|
|
2767
3506
|
const toolProfileFilter = this._toolProfileFilter;
|
|
@@ -2833,27 +3572,40 @@ export class AgentSession {
|
|
|
2833
3572
|
toolRegistry.set(tool.name, tool);
|
|
2834
3573
|
}
|
|
2835
3574
|
this._toolRegistry = toolRegistry;
|
|
2836
|
-
const
|
|
3575
|
+
const requestedBase = options?.activeToolNames ? [...options.activeToolNames] : [...previousActiveToolNames];
|
|
3576
|
+
const nextActiveToolNames = requestedBase.filter((name) => isAllowedTool(name));
|
|
3577
|
+
const autoActivated = [];
|
|
2837
3578
|
if (allowedToolNames) {
|
|
2838
3579
|
for (const toolName of this._toolRegistry.keys()) {
|
|
2839
3580
|
if (allowedToolNames.has(toolName)) {
|
|
2840
3581
|
nextActiveToolNames.push(toolName);
|
|
3582
|
+
autoActivated.push(toolName);
|
|
2841
3583
|
}
|
|
2842
3584
|
}
|
|
2843
3585
|
}
|
|
2844
3586
|
else if (options?.includeAllExtensionTools) {
|
|
2845
3587
|
for (const tool of wrappedExtensionTools) {
|
|
2846
3588
|
nextActiveToolNames.push(tool.name);
|
|
3589
|
+
autoActivated.push(tool.name);
|
|
2847
3590
|
}
|
|
2848
3591
|
}
|
|
2849
3592
|
else if (!options?.activeToolNames) {
|
|
2850
3593
|
for (const toolName of this._toolRegistry.keys()) {
|
|
2851
3594
|
if (!previousRegistryNames.has(toolName)) {
|
|
2852
3595
|
nextActiveToolNames.push(toolName);
|
|
3596
|
+
autoActivated.push(toolName);
|
|
2853
3597
|
}
|
|
2854
3598
|
}
|
|
2855
3599
|
}
|
|
3600
|
+
// artifact_retrieve companion auto-activation is enforced inside
|
|
3601
|
+
// setActiveToolsByName() itself (not duplicated here), so every activation path --
|
|
3602
|
+
// including the public, extension-exposed setActiveTools() -- gets the same
|
|
3603
|
+
// guarantee, not just this settings/profile refresh flow.
|
|
2856
3604
|
this.setActiveToolsByName([...new Set(nextActiveToolNames)]);
|
|
3605
|
+
// setActiveToolsByName just stored the profile-filtered ACTIVE set as the request; restore
|
|
3606
|
+
// the true pre-filter request (plus this refresh's auto-activations) so an internal refresh
|
|
3607
|
+
// can never permanently narrow it.
|
|
3608
|
+
this._requestedActiveToolNames = [...new Set([...requestedBase, ...autoActivated])];
|
|
2857
3609
|
}
|
|
2858
3610
|
_createReloadRuntimeSnapshot() {
|
|
2859
3611
|
return {
|
|
@@ -2908,6 +3660,14 @@ export class AgentSession {
|
|
|
2908
3660
|
const autoResizeImages = this.settingsManager.getImageAutoResize();
|
|
2909
3661
|
const shellCommandPrefix = this.settingsManager.getShellCommandPrefix();
|
|
2910
3662
|
const shellPath = this.settingsManager.getShellPath();
|
|
3663
|
+
// grep/find must not emit a "Full output: artifact tool-output:<id>" handle that
|
|
3664
|
+
// nothing can resolve. If artifact_retrieve is explicitly excluded/blocked/outside
|
|
3665
|
+
// an active allowlist, don't hand grep/find an artifact store at all: they fall
|
|
3666
|
+
// back to their pre-existing bounded preview/truncation behavior, with no
|
|
3667
|
+
// payload/meta files ever written and no retrieval promise made.
|
|
3668
|
+
const toolArtifactStore = this._isToolOrCommandAllowedByProfile("artifact_retrieve")
|
|
3669
|
+
? this._getToolArtifactStore()
|
|
3670
|
+
: undefined;
|
|
2911
3671
|
const baseToolDefinitions = this._baseToolsOverride
|
|
2912
3672
|
? Object.fromEntries(Object.entries(this._baseToolsOverride).map(([name, tool]) => [
|
|
2913
3673
|
name,
|
|
@@ -2916,12 +3676,37 @@ export class AgentSession {
|
|
|
2916
3676
|
: createAllToolDefinitions(this._cwd, {
|
|
2917
3677
|
read: { autoResizeImages },
|
|
2918
3678
|
bash: { commandPrefix: shellCommandPrefix, shellPath },
|
|
3679
|
+
grep: { artifactStore: toolArtifactStore },
|
|
3680
|
+
find: { artifactStore: toolArtifactStore },
|
|
3681
|
+
artifact_retrieve: { artifactStore: toolArtifactStore },
|
|
2919
3682
|
});
|
|
2920
3683
|
this._baseToolDefinitions = new Map(Object.entries(baseToolDefinitions).map(([name, tool]) => [name, tool]));
|
|
2921
3684
|
if (!this._baseToolsOverride) {
|
|
2922
|
-
for (const definition of createCoreDiagnosticsToolDefinitions(() => this.getActiveToolNames(), () => this.getAllTools(), (messages) => this.getContextGcReport(messages))) {
|
|
3685
|
+
for (const definition of createCoreDiagnosticsToolDefinitions(() => this.getActiveToolNames(), () => this.getAllTools(), (messages) => this.getContextGcReport(messages), () => this._getMemoryAuditDiagnostics())) {
|
|
2923
3686
|
this._baseToolDefinitions.set(definition.name, definition);
|
|
2924
3687
|
}
|
|
3688
|
+
const goalToolDefinition = createGoalToolDefinition({
|
|
3689
|
+
getGoalState: () => this.getGoalStateSnapshot(),
|
|
3690
|
+
saveGoalState: (state) => {
|
|
3691
|
+
this.saveGoalStateSnapshot(state);
|
|
3692
|
+
},
|
|
3693
|
+
});
|
|
3694
|
+
this._baseToolDefinitions.set(goalToolDefinition.name, goalToolDefinition);
|
|
3695
|
+
const delegateToolDefinition = createDelegateToolDefinition({
|
|
3696
|
+
runWorkerDelegation: (args) => this.runWorkerDelegationOnce(args),
|
|
3697
|
+
});
|
|
3698
|
+
this._baseToolDefinitions.set(delegateToolDefinition.name, delegateToolDefinition);
|
|
3699
|
+
// Registered but not default-active: probes spend tokens on the probed model, so
|
|
3700
|
+
// activation is an explicit choice (settings/profile/setActiveTools or /autonomy fitness).
|
|
3701
|
+
const modelFitnessToolDefinition = createModelFitnessToolDefinition({
|
|
3702
|
+
runProbe: (args) => this.runModelFitness(args),
|
|
3703
|
+
});
|
|
3704
|
+
this._baseToolDefinitions.set(modelFitnessToolDefinition.name, modelFitnessToolDefinition);
|
|
3705
|
+
const runToolkitScriptToolDefinition = createRunToolkitScriptToolDefinition({
|
|
3706
|
+
getScripts: () => this.settingsManager.getToolkitScripts(),
|
|
3707
|
+
execute: (script, scriptArgs) => executeToolkitScript({ script, scriptArgs, cwd: this._cwd }),
|
|
3708
|
+
});
|
|
3709
|
+
this._baseToolDefinitions.set(runToolkitScriptToolDefinition.name, runToolkitScriptToolDefinition);
|
|
2925
3710
|
}
|
|
2926
3711
|
const extensionsResult = this._resourceLoader.getExtensions();
|
|
2927
3712
|
if (options.flagValues) {
|
|
@@ -2941,7 +3726,7 @@ export class AgentSession {
|
|
|
2941
3726
|
this._applyExtensionBindings(this._extensionRunner);
|
|
2942
3727
|
const defaultActiveToolNames = this._baseToolsOverride
|
|
2943
3728
|
? Object.keys(this._baseToolsOverride)
|
|
2944
|
-
: ["read", "bash", "edit", "write", "context_audit"];
|
|
3729
|
+
: ["read", "bash", "edit", "write", "context_audit", "goal", "delegate", "run_toolkit_script"];
|
|
2945
3730
|
const baseActiveToolNames = options.activeToolNames ?? defaultActiveToolNames;
|
|
2946
3731
|
this._refreshToolRegistry({
|
|
2947
3732
|
activeToolNames: baseActiveToolNames,
|
|
@@ -2957,7 +3742,9 @@ export class AgentSession {
|
|
|
2957
3742
|
}
|
|
2958
3743
|
const previousRunner = this._extensionRunner;
|
|
2959
3744
|
const snapshot = this._createReloadRuntimeSnapshot();
|
|
2960
|
-
|
|
3745
|
+
// Preserve the pre-filter tool REQUEST across the rebuild, not the capability/profile-filtered
|
|
3746
|
+
// active set — otherwise a reload under a small model permanently shrinks the restorable set.
|
|
3747
|
+
const activeToolNames = this._requestedActiveToolNames ?? this.getActiveToolNames();
|
|
2961
3748
|
const previousFlagValues = previousRunner.getFlagValues();
|
|
2962
3749
|
const reloadErrors = [];
|
|
2963
3750
|
let newRunner;
|
|
@@ -3050,7 +3837,7 @@ export class AgentSession {
|
|
|
3050
3837
|
// Remove from loaded extensions
|
|
3051
3838
|
this._resourceLoader.removeLoadedExtension(extensionPath);
|
|
3052
3839
|
// Rebuild runtime with new extension set
|
|
3053
|
-
const activeToolNames = this.getActiveToolNames();
|
|
3840
|
+
const activeToolNames = this._requestedActiveToolNames ?? this.getActiveToolNames();
|
|
3054
3841
|
const previousFlagValues = previousRunner.getFlagValues();
|
|
3055
3842
|
this._buildRuntime({
|
|
3056
3843
|
activeToolNames,
|
|
@@ -3093,7 +3880,7 @@ export class AgentSession {
|
|
|
3093
3880
|
throw new Error(error || `Failed to load extension: ${extensionPath}`);
|
|
3094
3881
|
}
|
|
3095
3882
|
// Rebuild runtime to aggregate tools/commands/handlers/providers
|
|
3096
|
-
const activeToolNames = this.getActiveToolNames();
|
|
3883
|
+
const activeToolNames = this._requestedActiveToolNames ?? this.getActiveToolNames();
|
|
3097
3884
|
const previousFlagValues = previousRunner.getFlagValues();
|
|
3098
3885
|
this._buildRuntime({
|
|
3099
3886
|
activeToolNames,
|
|
@@ -3768,6 +4555,671 @@ export class AgentSession {
|
|
|
3768
4555
|
getDailyUsageBreakdown(formatLabel, now = new Date()) {
|
|
3769
4556
|
return formatDailyUsageBreakdown(this.getDailyUsageTotals(now), formatLabel);
|
|
3770
4557
|
}
|
|
4558
|
+
/**
|
|
4559
|
+
* Save a snapshot of the goal state to the session log.
|
|
4560
|
+
*
|
|
4561
|
+
* @returns the id of the appended custom entry
|
|
4562
|
+
*/
|
|
4563
|
+
saveGoalStateSnapshot(state) {
|
|
4564
|
+
return appendGoalStateSnapshot(this.sessionManager, state);
|
|
4565
|
+
}
|
|
4566
|
+
/**
|
|
4567
|
+
* Retrieve the latest valid goal state snapshot from the session log.
|
|
4568
|
+
*/
|
|
4569
|
+
getGoalStateSnapshot() {
|
|
4570
|
+
return getLatestGoalStateSnapshot(this.sessionManager.getEntries());
|
|
4571
|
+
}
|
|
4572
|
+
/**
|
|
4573
|
+
* Save a snapshot of the evidence bundle to the session log.
|
|
4574
|
+
*
|
|
4575
|
+
* @returns the id of the appended custom entry
|
|
4576
|
+
*/
|
|
4577
|
+
saveEvidenceBundleSnapshot(bundle) {
|
|
4578
|
+
return appendEvidenceBundleSnapshot(this.sessionManager, bundle);
|
|
4579
|
+
}
|
|
4580
|
+
/**
|
|
4581
|
+
* Retrieve the latest valid evidence bundle snapshot from the session log.
|
|
4582
|
+
*/
|
|
4583
|
+
getEvidenceBundleSnapshot() {
|
|
4584
|
+
return getLatestEvidenceBundleSnapshot(this.sessionManager.getEntries());
|
|
4585
|
+
}
|
|
4586
|
+
getEvidenceBundleSnapshots() {
|
|
4587
|
+
return getEvidenceBundleSnapshots(this.sessionManager.getEntries());
|
|
4588
|
+
}
|
|
4589
|
+
/** Live lane records tracked by this process (running and terminal). */
|
|
4590
|
+
getLaneRecords() {
|
|
4591
|
+
return this._laneTracker.getRecords();
|
|
4592
|
+
}
|
|
4593
|
+
saveWorkerResultSnapshot(result) {
|
|
4594
|
+
return appendWorkerResultSnapshot(this.sessionManager, result);
|
|
4595
|
+
}
|
|
4596
|
+
getWorkerResultSnapshots() {
|
|
4597
|
+
return getWorkerResultSnapshots(this.sessionManager.getEntries());
|
|
4598
|
+
}
|
|
4599
|
+
saveLearningDecisionSnapshot(decision) {
|
|
4600
|
+
return appendLearningDecisionSnapshot(this.sessionManager, decision);
|
|
4601
|
+
}
|
|
4602
|
+
getLearningDecisionSnapshots() {
|
|
4603
|
+
return getLearningDecisionSnapshots(this.sessionManager.getEntries());
|
|
4604
|
+
}
|
|
4605
|
+
getGoalRuntimeSnapshot(settings) {
|
|
4606
|
+
return buildGoalRuntimeSnapshot({
|
|
4607
|
+
entries: this.sessionManager.getEntries(),
|
|
4608
|
+
settings,
|
|
4609
|
+
});
|
|
4610
|
+
}
|
|
4611
|
+
_clearGoalAutoContinueTimer() {
|
|
4612
|
+
if (this._goalAutoContinueTimer !== undefined) {
|
|
4613
|
+
clearTimeout(this._goalAutoContinueTimer);
|
|
4614
|
+
this._goalAutoContinueTimer = undefined;
|
|
4615
|
+
}
|
|
4616
|
+
}
|
|
4617
|
+
_scheduleGoalAutoContinueFromIdle(options) {
|
|
4618
|
+
if (options?.autoContinueGoal === false || this._isGoalAutoContinuing || this._disposed)
|
|
4619
|
+
return;
|
|
4620
|
+
// Small-window models cannot afford multi-thousand-token continuation prompts per idle turn.
|
|
4621
|
+
if (!this.getModelCapabilityProfile().backgroundLanesEnabled)
|
|
4622
|
+
return;
|
|
4623
|
+
const { maxStallTurns, goalAutoContinue, goalAutoContinueDelayMs } = this.settingsManager.getAutonomySettings();
|
|
4624
|
+
if (!goalAutoContinue)
|
|
4625
|
+
return;
|
|
4626
|
+
const snapshot = this.getGoalRuntimeSnapshot({ maxStallTurns });
|
|
4627
|
+
if (snapshot.continuation.action !== "continue")
|
|
4628
|
+
return;
|
|
4629
|
+
this._clearGoalAutoContinueTimer();
|
|
4630
|
+
this._goalAutoContinueTimer = setTimeout(() => {
|
|
4631
|
+
this._goalAutoContinueTimer = undefined;
|
|
4632
|
+
void this._runScheduledGoalAutoContinue();
|
|
4633
|
+
}, goalAutoContinueDelayMs);
|
|
4634
|
+
const timer = this._goalAutoContinueTimer;
|
|
4635
|
+
if (typeof timer === "object" && timer && "unref" in timer) {
|
|
4636
|
+
const { unref } = timer;
|
|
4637
|
+
unref?.call(timer);
|
|
4638
|
+
}
|
|
4639
|
+
}
|
|
4640
|
+
async _runScheduledGoalAutoContinue() {
|
|
4641
|
+
if (this._isGoalAutoContinuing || this._disposed)
|
|
4642
|
+
return;
|
|
4643
|
+
const { maxStallTurns, goalContinueTurns, goalContinueMaxWallClockMinutes, goalAutoContinue } = this.settingsManager.getAutonomySettings();
|
|
4644
|
+
if (!goalAutoContinue)
|
|
4645
|
+
return;
|
|
4646
|
+
const snapshot = this.getGoalRuntimeSnapshot({ maxStallTurns });
|
|
4647
|
+
if (snapshot.continuation.action !== "continue")
|
|
4648
|
+
return;
|
|
4649
|
+
this._isGoalAutoContinuing = true;
|
|
4650
|
+
try {
|
|
4651
|
+
await this.continueGoalLoop({
|
|
4652
|
+
maxTurns: goalContinueTurns,
|
|
4653
|
+
maxStallTurns,
|
|
4654
|
+
maxWallClockMinutes: goalContinueMaxWallClockMinutes,
|
|
4655
|
+
});
|
|
4656
|
+
}
|
|
4657
|
+
catch (error) {
|
|
4658
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
4659
|
+
this._emit({ type: "warning", message: `Goal auto-continuation failed: ${message}` });
|
|
4660
|
+
}
|
|
4661
|
+
finally {
|
|
4662
|
+
this._isGoalAutoContinuing = false;
|
|
4663
|
+
}
|
|
4664
|
+
}
|
|
4665
|
+
_clearResearchLaneTimer() {
|
|
4666
|
+
if (this._researchLaneTimer !== undefined) {
|
|
4667
|
+
clearTimeout(this._researchLaneTimer);
|
|
4668
|
+
this._researchLaneTimer = undefined;
|
|
4669
|
+
}
|
|
4670
|
+
}
|
|
4671
|
+
/**
|
|
4672
|
+
* Derive the research demand from durable goal state: an active goal with open requirements,
|
|
4673
|
+
* deduplicated against the latest persisted bundle so the same requirement set is never
|
|
4674
|
+
* researched twice (the query is deterministic, so dedupe survives session reload).
|
|
4675
|
+
*/
|
|
4676
|
+
_buildResearchLaneDemand() {
|
|
4677
|
+
const goal = this.getGoalStateSnapshot();
|
|
4678
|
+
if (!goal || goal.status !== "active") {
|
|
4679
|
+
this._lastResearchLaneSkipReason = "no_active_goal";
|
|
4680
|
+
return undefined;
|
|
4681
|
+
}
|
|
4682
|
+
const open = goal.requirements.filter((requirement) => requirement.status === "open");
|
|
4683
|
+
if (open.length === 0) {
|
|
4684
|
+
this._lastResearchLaneSkipReason = "no_open_requirements";
|
|
4685
|
+
return undefined;
|
|
4686
|
+
}
|
|
4687
|
+
const query = `goal:${goal.goalId} requirements:${open
|
|
4688
|
+
.map((requirement) => requirement.id)
|
|
4689
|
+
.sort()
|
|
4690
|
+
.join(",")}`;
|
|
4691
|
+
if (this.getEvidenceBundleSnapshot()?.query === query) {
|
|
4692
|
+
this._lastResearchLaneSkipReason = "recent_evidence_sufficient";
|
|
4693
|
+
return undefined;
|
|
4694
|
+
}
|
|
4695
|
+
const context = [
|
|
4696
|
+
`Goal: ${goal.userGoal}`,
|
|
4697
|
+
"Open requirements:",
|
|
4698
|
+
...open.slice(0, 20).map((requirement) => `- ${requirement.text}`),
|
|
4699
|
+
].join("\n");
|
|
4700
|
+
return { query, context, goalId: goal.goalId };
|
|
4701
|
+
}
|
|
4702
|
+
/**
|
|
4703
|
+
* Idle trigger for the autonomous research lane (mirrors {@link _scheduleGoalAutoContinueFromIdle}).
|
|
4704
|
+
* All skips are recorded in `_lastResearchLaneSkipReason` and surfaced via diagnostics — the lane
|
|
4705
|
+
* informs, it never prompts or blocks the foreground.
|
|
4706
|
+
*/
|
|
4707
|
+
_scheduleResearchLaneFromIdle() {
|
|
4708
|
+
if (this._isResearchLaneRunning || this._disposed || this._isChildSession)
|
|
4709
|
+
return;
|
|
4710
|
+
if (!this.getModelCapabilityProfile().backgroundLanesEnabled) {
|
|
4711
|
+
this._lastResearchLaneSkipReason = "model_context_too_small";
|
|
4712
|
+
return;
|
|
4713
|
+
}
|
|
4714
|
+
const research = this.settingsManager.getResearchLaneSettings();
|
|
4715
|
+
if (!research.enabled) {
|
|
4716
|
+
this._lastResearchLaneSkipReason = "research_lane_disabled";
|
|
4717
|
+
return;
|
|
4718
|
+
}
|
|
4719
|
+
const { mode } = this.settingsManager.getAutonomySettings();
|
|
4720
|
+
if (mode === "off") {
|
|
4721
|
+
this._lastResearchLaneSkipReason = "autonomy_mode_off";
|
|
4722
|
+
return;
|
|
4723
|
+
}
|
|
4724
|
+
const priorRuns = getLaneRecordSnapshots(this.sessionManager.getEntries()).filter((record) => record.type === "research").length;
|
|
4725
|
+
if (priorRuns >= research.maxRunsPerSession) {
|
|
4726
|
+
this._lastResearchLaneSkipReason = "max_runs_reached";
|
|
4727
|
+
return;
|
|
4728
|
+
}
|
|
4729
|
+
if (!this._buildResearchLaneDemand())
|
|
4730
|
+
return;
|
|
4731
|
+
this._clearResearchLaneTimer();
|
|
4732
|
+
this._researchLaneTimer = setTimeout(() => {
|
|
4733
|
+
this._researchLaneTimer = undefined;
|
|
4734
|
+
void this._runScheduledResearchLane();
|
|
4735
|
+
}, research.idleDelayMs);
|
|
4736
|
+
const timer = this._researchLaneTimer;
|
|
4737
|
+
if (typeof timer === "object" && timer && "unref" in timer) {
|
|
4738
|
+
const { unref } = timer;
|
|
4739
|
+
unref?.call(timer);
|
|
4740
|
+
}
|
|
4741
|
+
}
|
|
4742
|
+
async _runScheduledResearchLane() {
|
|
4743
|
+
if (this._isResearchLaneRunning || this._disposed)
|
|
4744
|
+
return;
|
|
4745
|
+
const research = this.settingsManager.getResearchLaneSettings();
|
|
4746
|
+
const { mode } = this.settingsManager.getAutonomySettings();
|
|
4747
|
+
if (!research.enabled || mode === "off")
|
|
4748
|
+
return;
|
|
4749
|
+
try {
|
|
4750
|
+
await this.runResearchLaneOnce();
|
|
4751
|
+
}
|
|
4752
|
+
catch (error) {
|
|
4753
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
4754
|
+
this._emit({ type: "warning", message: `Research lane failed: ${message}` });
|
|
4755
|
+
}
|
|
4756
|
+
}
|
|
4757
|
+
/**
|
|
4758
|
+
* Capability profile derived from the CURRENT session model's own metadata (context window),
|
|
4759
|
+
* honoring the modelCapability.mode setting ("off" disables, a class name forces).
|
|
4760
|
+
*/
|
|
4761
|
+
getModelCapabilityProfile() {
|
|
4762
|
+
return deriveModelCapabilityProfile({
|
|
4763
|
+
contextWindow: this.model?.contextWindow,
|
|
4764
|
+
mode: this.settingsManager.getModelCapabilitySettings().mode,
|
|
4765
|
+
});
|
|
4766
|
+
}
|
|
4767
|
+
/** Capability profile for a specific lane model (lane budgets scale to the lane model's window). */
|
|
4768
|
+
_laneCapabilityProfile(model) {
|
|
4769
|
+
return deriveModelCapabilityProfile({
|
|
4770
|
+
contextWindow: model.contextWindow,
|
|
4771
|
+
mode: this.settingsManager.getModelCapabilitySettings().mode,
|
|
4772
|
+
});
|
|
4773
|
+
}
|
|
4774
|
+
/**
|
|
4775
|
+
* Resolve the model for a background lane. Lanes are shipped BY this session, so they inherit
|
|
4776
|
+
* the session's own model unless a lane-specific model is explicitly configured — a single-model
|
|
4777
|
+
* setup (e.g. one local open model) runs its lanes on that same model. An explicitly configured
|
|
4778
|
+
* pattern that cannot resolve/authenticate is a visible skip, not a silent fallback.
|
|
4779
|
+
*/
|
|
4780
|
+
_resolveLaneModel(configuredPattern) {
|
|
4781
|
+
if (configuredPattern) {
|
|
4782
|
+
const resolved = resolveCliModel({ cliModel: configuredPattern, modelRegistry: this._modelRegistry });
|
|
4783
|
+
if (resolved.model && this._modelRegistry.hasConfiguredAuth(resolved.model)) {
|
|
4784
|
+
return resolved.model;
|
|
4785
|
+
}
|
|
4786
|
+
return undefined;
|
|
4787
|
+
}
|
|
4788
|
+
return this.model ?? undefined;
|
|
4789
|
+
}
|
|
4790
|
+
/**
|
|
4791
|
+
* Resolve what a lane ships with. Precedence: explicit lane model setting, then the lane
|
|
4792
|
+
* profile's model (a shipped profile with a model MUST be obeyed — unresolvable is a visible
|
|
4793
|
+
* skip, never a fallback), then generic inheritance of the session model.
|
|
4794
|
+
*/
|
|
4795
|
+
_resolveLaneShipment(laneSettings, missingModelReason) {
|
|
4796
|
+
let laneProfile;
|
|
4797
|
+
if (laneSettings.profile) {
|
|
4798
|
+
laneProfile = this.settingsManager.getProfileRegistry().getProfile(laneSettings.profile);
|
|
4799
|
+
if (!laneProfile) {
|
|
4800
|
+
return { ok: false, skipReason: "lane_profile_not_found" };
|
|
4801
|
+
}
|
|
4802
|
+
}
|
|
4803
|
+
let model;
|
|
4804
|
+
if (laneSettings.model) {
|
|
4805
|
+
model = this._resolveLaneModel(laneSettings.model);
|
|
4806
|
+
if (!model)
|
|
4807
|
+
return { ok: false, skipReason: missingModelReason };
|
|
4808
|
+
}
|
|
4809
|
+
else if (laneProfile?.model) {
|
|
4810
|
+
model = this._resolveLaneModel(laneProfile.model);
|
|
4811
|
+
if (!model)
|
|
4812
|
+
return { ok: false, skipReason: "no_lane_profile_model" };
|
|
4813
|
+
}
|
|
4814
|
+
else {
|
|
4815
|
+
model = this.model ?? undefined;
|
|
4816
|
+
if (!model)
|
|
4817
|
+
return { ok: false, skipReason: missingModelReason };
|
|
4818
|
+
}
|
|
4819
|
+
return { ok: true, model, laneProfile };
|
|
4820
|
+
}
|
|
4821
|
+
/** UAC tool grants from a shipped lane profile, recorded on the lane envelope. */
|
|
4822
|
+
_laneProfileToolGrants(laneProfile) {
|
|
4823
|
+
const toolsFilter = laneProfile?.resources.tools;
|
|
4824
|
+
return {
|
|
4825
|
+
...(toolsFilter?.allow && toolsFilter.allow.length > 0 ? { allowedTools: [...toolsFilter.allow] } : {}),
|
|
4826
|
+
...(toolsFilter?.block && toolsFilter.block.length > 0 ? { deniedTools: [...toolsFilter.block] } : {}),
|
|
4827
|
+
};
|
|
4828
|
+
}
|
|
4829
|
+
/** Stripped research envelope — never the foreground/architect envelope. */
|
|
4830
|
+
_buildResearchLaneEnvelope(maxUsd, laneProfile) {
|
|
4831
|
+
return {
|
|
4832
|
+
id: `research-${this.sessionId}-${Date.now()}`,
|
|
4833
|
+
profileId: laneProfile?.name,
|
|
4834
|
+
capabilities: ["research", "read_files", "memory_read"],
|
|
4835
|
+
...this._laneProfileToolGrants(laneProfile),
|
|
4836
|
+
maxEstimatedUsd: Math.min(maxUsd, this.capabilityEnvelope?.maxEstimatedUsd ?? Number.POSITIVE_INFINITY),
|
|
4837
|
+
createdAt: new Date().toISOString(),
|
|
4838
|
+
};
|
|
4839
|
+
}
|
|
4840
|
+
/**
|
|
4841
|
+
* Run one bounded, read-only research pass and persist its results: evidence bundle snapshot,
|
|
4842
|
+
* terminal lane record, and spawned-usage cost report (single-hop invariant, idempotent on the
|
|
4843
|
+
* lane's reportId). Explicit calls (e.g. `/autonomy research`) express user intent and bypass the
|
|
4844
|
+
* enabled/mode/dedupe gates the idle scheduler enforces; budget and capability gates always apply.
|
|
4845
|
+
*/
|
|
4846
|
+
async runResearchLaneOnce(request) {
|
|
4847
|
+
if (this._isResearchLaneRunning) {
|
|
4848
|
+
return { started: false, skipReason: "research_lane_already_running" };
|
|
4849
|
+
}
|
|
4850
|
+
if (this._disposed) {
|
|
4851
|
+
return { started: false, skipReason: "session_disposed" };
|
|
4852
|
+
}
|
|
4853
|
+
const settings = this.settingsManager.getResearchLaneSettings();
|
|
4854
|
+
const demand = request?.query
|
|
4855
|
+
? { query: request.query, context: request.context ?? "", goalId: request.goalId }
|
|
4856
|
+
: this._buildResearchLaneDemand();
|
|
4857
|
+
if (!demand) {
|
|
4858
|
+
return { started: false, skipReason: this._lastResearchLaneSkipReason ?? "no_research_demand" };
|
|
4859
|
+
}
|
|
4860
|
+
const shipment = this._resolveLaneShipment(settings, "no_research_model");
|
|
4861
|
+
if (!shipment.ok) {
|
|
4862
|
+
this._lastResearchLaneSkipReason = shipment.skipReason;
|
|
4863
|
+
return { started: false, skipReason: shipment.skipReason };
|
|
4864
|
+
}
|
|
4865
|
+
const { model, laneProfile } = shipment;
|
|
4866
|
+
this._isResearchLaneRunning = true;
|
|
4867
|
+
this._laneTracker.ensureCounterAtLeast(getLaneRecordSnapshots(this.sessionManager.getEntries()).length + 1);
|
|
4868
|
+
const startedRecord = this._laneTracker.start({ type: "research", goalId: demand.goalId });
|
|
4869
|
+
try {
|
|
4870
|
+
let spentUsage;
|
|
4871
|
+
const result = await runResearch({
|
|
4872
|
+
query: demand.query,
|
|
4873
|
+
context: demand.context,
|
|
4874
|
+
envelope: this._buildResearchLaneEnvelope(settings.maxUsd, laneProfile),
|
|
4875
|
+
maxUsd: settings.maxUsd,
|
|
4876
|
+
maxSources: settings.maxSources,
|
|
4877
|
+
maxFindings: settings.maxFindings,
|
|
4878
|
+
maxWallClockMs: settings.maxWallClockMs,
|
|
4879
|
+
signal: this._researchLaneAbort.signal,
|
|
4880
|
+
complete: async ({ systemPrompt, userPrompt, signal }) => {
|
|
4881
|
+
const completion = await this.runIsolatedCompletion({
|
|
4882
|
+
// Level-0 core always survives; profile soul and role prompt are the replaceable
|
|
4883
|
+
// layers; a settings-provided prompt replaces everything above the core.
|
|
4884
|
+
systemPrompt: composeSubagentSystemPrompt({
|
|
4885
|
+
soul: laneProfile?.soul,
|
|
4886
|
+
rolePrompt: systemPrompt,
|
|
4887
|
+
override: settings.systemPrompt,
|
|
4888
|
+
}),
|
|
4889
|
+
messages: [{ role: "user", content: [{ type: "text", text: userPrompt }], timestamp: Date.now() }],
|
|
4890
|
+
model,
|
|
4891
|
+
thinkingLevel: laneProfile?.thinking ?? "off",
|
|
4892
|
+
maxTokens: this._laneCapabilityProfile(model).laneMaxOutputTokens,
|
|
4893
|
+
signal,
|
|
4894
|
+
// Core/soul/role are all static per configuration — the provider can cache the prefix.
|
|
4895
|
+
cacheRetention: "short",
|
|
4896
|
+
});
|
|
4897
|
+
spentUsage = completion.usage;
|
|
4898
|
+
return {
|
|
4899
|
+
text: completion.text,
|
|
4900
|
+
costUsd: completion.usage.cost.total,
|
|
4901
|
+
stopReason: String(completion.stopReason),
|
|
4902
|
+
};
|
|
4903
|
+
},
|
|
4904
|
+
});
|
|
4905
|
+
// Bug #21 pattern: if the session was disposed while the completion was in flight, do NOT
|
|
4906
|
+
// persist evidence/records/usage against the dead session.
|
|
4907
|
+
if (this._disposed) {
|
|
4908
|
+
const record = this._laneTracker.complete(startedRecord.laneId, {
|
|
4909
|
+
status: "canceled",
|
|
4910
|
+
reasonCode: "session_disposed",
|
|
4911
|
+
});
|
|
4912
|
+
return { started: true, record, result };
|
|
4913
|
+
}
|
|
4914
|
+
let evidenceEntryId;
|
|
4915
|
+
if (result.bundle) {
|
|
4916
|
+
evidenceEntryId = this.saveEvidenceBundleSnapshot(result.bundle);
|
|
4917
|
+
}
|
|
4918
|
+
if (spentUsage && (spentUsage.cost.total > 0 || spentUsage.totalTokens > 0)) {
|
|
4919
|
+
this.addSpawnedUsage(spentUsage, {
|
|
4920
|
+
label: "research-lane",
|
|
4921
|
+
reportId: `research:${this.sessionId}:${startedRecord.laneId}`,
|
|
4922
|
+
});
|
|
4923
|
+
}
|
|
4924
|
+
const record = this._laneTracker.complete(startedRecord.laneId, {
|
|
4925
|
+
status: result.status,
|
|
4926
|
+
reasonCode: result.reasonCode,
|
|
4927
|
+
costUsd: result.costUsd,
|
|
4928
|
+
evidenceEntryId,
|
|
4929
|
+
});
|
|
4930
|
+
if (record) {
|
|
4931
|
+
appendLaneRecordSnapshot(this.sessionManager, record);
|
|
4932
|
+
}
|
|
4933
|
+
return { started: true, record, result };
|
|
4934
|
+
}
|
|
4935
|
+
catch (error) {
|
|
4936
|
+
const record = this._laneTracker.complete(startedRecord.laneId, {
|
|
4937
|
+
status: "failed",
|
|
4938
|
+
reasonCode: "research_lane_error",
|
|
4939
|
+
});
|
|
4940
|
+
if (record && !this._disposed) {
|
|
4941
|
+
appendLaneRecordSnapshot(this.sessionManager, record);
|
|
4942
|
+
}
|
|
4943
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
4944
|
+
this._emit({ type: "warning", message: `Research lane failed: ${message}` });
|
|
4945
|
+
return { started: true, record };
|
|
4946
|
+
}
|
|
4947
|
+
finally {
|
|
4948
|
+
this._isResearchLaneRunning = false;
|
|
4949
|
+
}
|
|
4950
|
+
}
|
|
4951
|
+
/**
|
|
4952
|
+
* Run one bounded scout-worker delegation: build a WorkerRequest with a stripped read-only
|
|
4953
|
+
* envelope, execute it as an isolated completion on a cheap lane, validate the result via
|
|
4954
|
+
* {@link validateWorkerResult} before acceptance, and persist result + lane record + spawned
|
|
4955
|
+
* usage (idempotent per-lane reportId). Consumed by the `delegate` tool.
|
|
4956
|
+
*/
|
|
4957
|
+
async runWorkerDelegationOnce(request) {
|
|
4958
|
+
if (this._isWorkerDelegationRunning) {
|
|
4959
|
+
return { started: false, skipReason: "worker_delegation_already_running" };
|
|
4960
|
+
}
|
|
4961
|
+
if (this._disposed) {
|
|
4962
|
+
return { started: false, skipReason: "session_disposed" };
|
|
4963
|
+
}
|
|
4964
|
+
const instructions = request.instructions.trim();
|
|
4965
|
+
if (instructions.length === 0) {
|
|
4966
|
+
return { started: false, skipReason: "missing_instructions" };
|
|
4967
|
+
}
|
|
4968
|
+
const settings = this.settingsManager.getWorkerDelegationSettings();
|
|
4969
|
+
if (!settings.enabled) {
|
|
4970
|
+
return { started: false, skipReason: "worker_delegation_disabled" };
|
|
4971
|
+
}
|
|
4972
|
+
const shipment = this._resolveLaneShipment(settings, "no_worker_model");
|
|
4973
|
+
if (!shipment.ok) {
|
|
4974
|
+
return { started: false, skipReason: shipment.skipReason };
|
|
4975
|
+
}
|
|
4976
|
+
const { model, laneProfile } = shipment;
|
|
4977
|
+
this._isWorkerDelegationRunning = true;
|
|
4978
|
+
this._laneTracker.ensureCounterAtLeast(getLaneRecordSnapshots(this.sessionManager.getEntries()).length + 1);
|
|
4979
|
+
const startedRecord = this._laneTracker.start({ type: "worker" });
|
|
4980
|
+
const maxUsd = Math.min(settings.maxUsd, this.capabilityEnvelope?.maxEstimatedUsd ?? Number.POSITIVE_INFINITY);
|
|
4981
|
+
const workerRequest = {
|
|
4982
|
+
id: startedRecord.laneId,
|
|
4983
|
+
instructions,
|
|
4984
|
+
route: {
|
|
4985
|
+
tier: "cheap",
|
|
4986
|
+
risk: "read-only",
|
|
4987
|
+
confidence: 1,
|
|
4988
|
+
reasonCode: "scout_worker",
|
|
4989
|
+
reasons: ["Read-only scout delegation"],
|
|
4990
|
+
},
|
|
4991
|
+
envelope: {
|
|
4992
|
+
id: `worker-${this.sessionId}-${startedRecord.laneId}`,
|
|
4993
|
+
profileId: laneProfile?.name,
|
|
4994
|
+
capabilities: ["read_files"],
|
|
4995
|
+
...this._laneProfileToolGrants(laneProfile),
|
|
4996
|
+
maxEstimatedUsd: maxUsd,
|
|
4997
|
+
createdAt: new Date().toISOString(),
|
|
4998
|
+
},
|
|
4999
|
+
maxEstimatedUsd: maxUsd,
|
|
5000
|
+
createdAt: new Date().toISOString(),
|
|
5001
|
+
};
|
|
5002
|
+
const usageReportId = `worker:${this.sessionId}:${startedRecord.laneId}`;
|
|
5003
|
+
try {
|
|
5004
|
+
let spentUsage;
|
|
5005
|
+
const outcome = await runWorker({
|
|
5006
|
+
request: workerRequest,
|
|
5007
|
+
maxUsd,
|
|
5008
|
+
maxWallClockMs: settings.maxWallClockMs,
|
|
5009
|
+
usageReportId,
|
|
5010
|
+
signal: this._workerDelegationAbort.signal,
|
|
5011
|
+
complete: async ({ systemPrompt, userPrompt, signal }) => {
|
|
5012
|
+
const completion = await this.runIsolatedCompletion({
|
|
5013
|
+
// Level-0 core always survives. A model-provided prompt (delegate tool) is the most
|
|
5014
|
+
// specific override, then the settings-level prompt, then profile soul + role prompt.
|
|
5015
|
+
systemPrompt: composeSubagentSystemPrompt({
|
|
5016
|
+
soul: laneProfile?.soul,
|
|
5017
|
+
rolePrompt: systemPrompt,
|
|
5018
|
+
override: request.systemPrompt ?? settings.systemPrompt,
|
|
5019
|
+
}),
|
|
5020
|
+
messages: [{ role: "user", content: [{ type: "text", text: userPrompt }], timestamp: Date.now() }],
|
|
5021
|
+
model,
|
|
5022
|
+
thinkingLevel: laneProfile?.thinking ?? "off",
|
|
5023
|
+
maxTokens: this._laneCapabilityProfile(model).laneMaxOutputTokens,
|
|
5024
|
+
signal,
|
|
5025
|
+
// Core/soul/role are all static per configuration — the provider can cache the prefix.
|
|
5026
|
+
cacheRetention: "short",
|
|
5027
|
+
});
|
|
5028
|
+
spentUsage = completion.usage;
|
|
5029
|
+
return {
|
|
5030
|
+
text: completion.text,
|
|
5031
|
+
costUsd: completion.usage.cost.total,
|
|
5032
|
+
stopReason: String(completion.stopReason),
|
|
5033
|
+
};
|
|
5034
|
+
},
|
|
5035
|
+
});
|
|
5036
|
+
// Bug #21 pattern: never persist against a disposed session.
|
|
5037
|
+
if (this._disposed) {
|
|
5038
|
+
const record = this._laneTracker.complete(startedRecord.laneId, {
|
|
5039
|
+
status: "canceled",
|
|
5040
|
+
reasonCode: "session_disposed",
|
|
5041
|
+
});
|
|
5042
|
+
return { started: true, record, outcome };
|
|
5043
|
+
}
|
|
5044
|
+
this.saveWorkerResultSnapshot(outcome.result);
|
|
5045
|
+
if (spentUsage && (spentUsage.cost.total > 0 || spentUsage.totalTokens > 0)) {
|
|
5046
|
+
this.addSpawnedUsage(spentUsage, { label: "worker-delegation", reportId: usageReportId });
|
|
5047
|
+
}
|
|
5048
|
+
const record = this._laneTracker.complete(startedRecord.laneId, {
|
|
5049
|
+
status: outcome.laneStatus,
|
|
5050
|
+
reasonCode: outcome.reasonCode,
|
|
5051
|
+
costUsd: outcome.costUsd,
|
|
5052
|
+
});
|
|
5053
|
+
if (record) {
|
|
5054
|
+
appendLaneRecordSnapshot(this.sessionManager, record);
|
|
5055
|
+
}
|
|
5056
|
+
return { started: true, record, outcome };
|
|
5057
|
+
}
|
|
5058
|
+
catch (error) {
|
|
5059
|
+
const record = this._laneTracker.complete(startedRecord.laneId, {
|
|
5060
|
+
status: "failed",
|
|
5061
|
+
reasonCode: "worker_delegation_error",
|
|
5062
|
+
});
|
|
5063
|
+
if (record && !this._disposed) {
|
|
5064
|
+
appendLaneRecordSnapshot(this.sessionManager, record);
|
|
5065
|
+
}
|
|
5066
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
5067
|
+
this._emit({ type: "warning", message: `Worker delegation failed: ${message}` });
|
|
5068
|
+
return { started: true, record };
|
|
5069
|
+
}
|
|
5070
|
+
finally {
|
|
5071
|
+
this._isWorkerDelegationRunning = false;
|
|
5072
|
+
}
|
|
5073
|
+
}
|
|
5074
|
+
/**
|
|
5075
|
+
* Probe a candidate model against the subagent contracts (research/worker/judge/search/
|
|
5076
|
+
* tool-call surfaces) via {@link runModelFitnessProbe}. The model must resolve and
|
|
5077
|
+
* authenticate; every probe call runs as an isolated completion on that model, and probe
|
|
5078
|
+
* spend is reported through spawned-usage accounting.
|
|
5079
|
+
*/
|
|
5080
|
+
async runModelFitness(args) {
|
|
5081
|
+
if (this._disposed)
|
|
5082
|
+
return { started: false, skipReason: "session_disposed" };
|
|
5083
|
+
const resolved = this._resolveLaneModel(args.model.trim() || undefined);
|
|
5084
|
+
if (!resolved)
|
|
5085
|
+
return { started: false, skipReason: "model_unresolved_or_unauthenticated" };
|
|
5086
|
+
const capability = this._laneCapabilityProfile(resolved);
|
|
5087
|
+
const spent = {
|
|
5088
|
+
input: 0,
|
|
5089
|
+
output: 0,
|
|
5090
|
+
cacheRead: 0,
|
|
5091
|
+
cacheWrite: 0,
|
|
5092
|
+
totalTokens: 0,
|
|
5093
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
5094
|
+
};
|
|
5095
|
+
const report = await runModelFitnessProbe({
|
|
5096
|
+
trials: args.trials,
|
|
5097
|
+
signal: this._researchLaneAbort.signal,
|
|
5098
|
+
complete: async ({ systemPrompt, userPrompt, signal }) => {
|
|
5099
|
+
const callStarted = Date.now();
|
|
5100
|
+
const completion = await this.runIsolatedCompletion({
|
|
5101
|
+
systemPrompt,
|
|
5102
|
+
messages: [{ role: "user", content: [{ type: "text", text: userPrompt }], timestamp: Date.now() }],
|
|
5103
|
+
model: resolved,
|
|
5104
|
+
thinkingLevel: "off",
|
|
5105
|
+
maxTokens: capability.laneMaxOutputTokens,
|
|
5106
|
+
signal,
|
|
5107
|
+
cacheRetention: "short",
|
|
5108
|
+
});
|
|
5109
|
+
const callMs = Date.now() - callStarted;
|
|
5110
|
+
spent.input += completion.usage.input;
|
|
5111
|
+
spent.output += completion.usage.output;
|
|
5112
|
+
spent.cacheRead += completion.usage.cacheRead;
|
|
5113
|
+
spent.cacheWrite += completion.usage.cacheWrite;
|
|
5114
|
+
spent.totalTokens += completion.usage.totalTokens;
|
|
5115
|
+
spent.cost.input += completion.usage.cost.input;
|
|
5116
|
+
spent.cost.output += completion.usage.cost.output;
|
|
5117
|
+
spent.cost.cacheRead += completion.usage.cost.cacheRead;
|
|
5118
|
+
spent.cost.cacheWrite += completion.usage.cost.cacheWrite;
|
|
5119
|
+
spent.cost.total += completion.usage.cost.total;
|
|
5120
|
+
return {
|
|
5121
|
+
text: completion.text,
|
|
5122
|
+
costUsd: completion.usage.cost.total,
|
|
5123
|
+
stopReason: String(completion.stopReason),
|
|
5124
|
+
// Wall-clock fallback for tok/s: providers don't expose pure eval time, so the
|
|
5125
|
+
// measured call time stands in — slightly conservative (includes network/queue).
|
|
5126
|
+
outputTokens: completion.usage.output,
|
|
5127
|
+
evalMs: callMs,
|
|
5128
|
+
};
|
|
5129
|
+
},
|
|
5130
|
+
});
|
|
5131
|
+
if (!this._disposed && (spent.cost.total > 0 || spent.totalTokens > 0)) {
|
|
5132
|
+
this.addSpawnedUsage(spent, { label: "model-fitness" });
|
|
5133
|
+
}
|
|
5134
|
+
const modelRef = `${resolved.provider}/${resolved.id}`;
|
|
5135
|
+
// Fitness is a property of a model ON a host — persist the report host-keyed so role
|
|
5136
|
+
// assignments stay per-machine (a model can await better hardware without being forgotten).
|
|
5137
|
+
// Best-effort: a disk problem must not fail the probe itself.
|
|
5138
|
+
try {
|
|
5139
|
+
if (!this._disposed) {
|
|
5140
|
+
FitnessStore.forAgentDir(this._agentDir).save(modelRef, report);
|
|
5141
|
+
}
|
|
5142
|
+
}
|
|
5143
|
+
catch {
|
|
5144
|
+
// best-effort persistence
|
|
5145
|
+
}
|
|
5146
|
+
return { started: true, model: modelRef, report };
|
|
5147
|
+
}
|
|
5148
|
+
/** Fitness reports persisted for THIS host (measured evidence for architect/profile decisions). */
|
|
5149
|
+
getStoredFitnessReports() {
|
|
5150
|
+
try {
|
|
5151
|
+
return FitnessStore.forAgentDir(this._agentDir).getForHost();
|
|
5152
|
+
}
|
|
5153
|
+
catch {
|
|
5154
|
+
return [];
|
|
5155
|
+
}
|
|
5156
|
+
}
|
|
5157
|
+
async continueGoalOnce(options) {
|
|
5158
|
+
const snapshot = this.getGoalRuntimeSnapshot({ maxStallTurns: options.maxStallTurns });
|
|
5159
|
+
if (snapshot.continuation.action !== "continue") {
|
|
5160
|
+
return { submitted: false, snapshot };
|
|
5161
|
+
}
|
|
5162
|
+
const prompt = buildGoalContinuationPrompt({ snapshot, limits: options.promptLimits });
|
|
5163
|
+
await this.prompt(prompt.text, {
|
|
5164
|
+
expandPromptTemplates: false,
|
|
5165
|
+
processSlashCommands: false,
|
|
5166
|
+
autoContinueGoal: false,
|
|
5167
|
+
});
|
|
5168
|
+
return { submitted: true, snapshot, prompt };
|
|
5169
|
+
}
|
|
5170
|
+
async continueGoalLoop(options) {
|
|
5171
|
+
let turnsSubmitted = 0;
|
|
5172
|
+
const now = options.now ?? Date.now;
|
|
5173
|
+
const maxWallClockMs = typeof options.maxWallClockMinutes === "number" && options.maxWallClockMinutes > 0
|
|
5174
|
+
? options.maxWallClockMinutes * 60_000
|
|
5175
|
+
: undefined;
|
|
5176
|
+
const startedAt = now();
|
|
5177
|
+
const hasReachedWallClockBudget = () => maxWallClockMs !== undefined && now() - startedAt >= maxWallClockMs;
|
|
5178
|
+
const snapshot = () => this.getGoalRuntimeSnapshot({ maxStallTurns: options.maxStallTurns });
|
|
5179
|
+
if (options.maxTurns <= 0) {
|
|
5180
|
+
return {
|
|
5181
|
+
turnsSubmitted: 0,
|
|
5182
|
+
stopReason: "max_turns_reached",
|
|
5183
|
+
finalSnapshot: snapshot(),
|
|
5184
|
+
};
|
|
5185
|
+
}
|
|
5186
|
+
if (hasReachedWallClockBudget()) {
|
|
5187
|
+
return { turnsSubmitted, stopReason: "wall_clock_budget_reached", finalSnapshot: snapshot() };
|
|
5188
|
+
}
|
|
5189
|
+
while (turnsSubmitted < options.maxTurns) {
|
|
5190
|
+
const beforeSnapshot = snapshot();
|
|
5191
|
+
if (beforeSnapshot.continuation.action !== "continue") {
|
|
5192
|
+
return { turnsSubmitted, stopReason: "continuation_not_allowed", finalSnapshot: beforeSnapshot };
|
|
5193
|
+
}
|
|
5194
|
+
const state = beforeSnapshot.goalState;
|
|
5195
|
+
const beforeKey = state
|
|
5196
|
+
? `${state.goalId}:${state.updatedAt}:${state.events.length}:${state.stallTurns}:${state.status}`
|
|
5197
|
+
: undefined;
|
|
5198
|
+
const result = await this.continueGoalOnce(options);
|
|
5199
|
+
if (result.submitted) {
|
|
5200
|
+
turnsSubmitted++;
|
|
5201
|
+
}
|
|
5202
|
+
if (hasReachedWallClockBudget()) {
|
|
5203
|
+
return { turnsSubmitted, stopReason: "wall_clock_budget_reached", finalSnapshot: snapshot() };
|
|
5204
|
+
}
|
|
5205
|
+
const afterSnapshot = snapshot();
|
|
5206
|
+
if (afterSnapshot.continuation.action !== "continue") {
|
|
5207
|
+
return { turnsSubmitted, stopReason: "continuation_not_allowed", finalSnapshot: afterSnapshot };
|
|
5208
|
+
}
|
|
5209
|
+
const afterState = afterSnapshot.goalState;
|
|
5210
|
+
const afterKey = afterState
|
|
5211
|
+
? `${afterState.goalId}:${afterState.updatedAt}:${afterState.events.length}:${afterState.stallTurns}:${afterState.status}`
|
|
5212
|
+
: undefined;
|
|
5213
|
+
if (beforeKey === afterKey) {
|
|
5214
|
+
return { turnsSubmitted, stopReason: "goal_state_not_advanced", finalSnapshot: afterSnapshot };
|
|
5215
|
+
}
|
|
5216
|
+
}
|
|
5217
|
+
return {
|
|
5218
|
+
turnsSubmitted,
|
|
5219
|
+
stopReason: "max_turns_reached",
|
|
5220
|
+
finalSnapshot: snapshot(),
|
|
5221
|
+
};
|
|
5222
|
+
}
|
|
3771
5223
|
/**
|
|
3772
5224
|
* Run a one-shot LLM completion fully ISOLATED from the main session — the load-bearing
|
|
3773
5225
|
* primitive for the native reflection engine (adaptive-agent design §6c/§7).
|
|
@@ -3873,8 +5325,60 @@ export class AgentSession {
|
|
|
3873
5325
|
// or skills against the dead session.
|
|
3874
5326
|
if (this._disposed)
|
|
3875
5327
|
return result;
|
|
5328
|
+
// Learning apply policy: every durable write is converted to a proposal, decided by the
|
|
5329
|
+
// learning gate, and audited with a rollback plan. With the policy disabled (default) the
|
|
5330
|
+
// legacy direct-apply behavior is preserved — but now leaves audit records with rollback info.
|
|
5331
|
+
const policy = this.settingsManager.getLearningPolicySettings();
|
|
5332
|
+
// The audit id sequence counts STORED snapshots only: it reseeds from the stored count on
|
|
5333
|
+
// every pass, so advancing it for a no-op (which stores nothing) would make later passes
|
|
5334
|
+
// reuse ids — and rollback keys on the id, so a collision blocks or misdirects rollback.
|
|
5335
|
+
let auditSequence = getLearningAuditSnapshots(this.sessionManager.getEntries()).length;
|
|
5336
|
+
let writeIndex = 0;
|
|
3876
5337
|
for (const write of result.writes) {
|
|
3877
|
-
|
|
5338
|
+
writeIndex += 1;
|
|
5339
|
+
const proposalId = `${input.reportId ?? "reflection"}-w${writeIndex}`;
|
|
5340
|
+
const proposal = proposalFromReflectionWrite(write, proposalId);
|
|
5341
|
+
const rollback = rollbackPlanForReflectionWrite(write);
|
|
5342
|
+
const decision = policy.enabled
|
|
5343
|
+
? evaluateLearningDecision({
|
|
5344
|
+
proposal,
|
|
5345
|
+
confidence: policy.reflectionSourceConfidence,
|
|
5346
|
+
observations: 1,
|
|
5347
|
+
contradictions: 0,
|
|
5348
|
+
settings: {
|
|
5349
|
+
enabled: true,
|
|
5350
|
+
autoApplyEnabled: policy.autoApplyEnabled,
|
|
5351
|
+
confidenceThreshold: policy.confidenceThreshold,
|
|
5352
|
+
minObservations: policy.minObservations,
|
|
5353
|
+
allowedAutoApplyLayers: policy.allowedAutoApplyLayers,
|
|
5354
|
+
requireRollbackPlan: policy.requireRollbackPlan,
|
|
5355
|
+
},
|
|
5356
|
+
})
|
|
5357
|
+
: {
|
|
5358
|
+
kind: "apply",
|
|
5359
|
+
reasonCode: "learning_policy_disabled_legacy_apply",
|
|
5360
|
+
confidence: 0,
|
|
5361
|
+
summary: proposal.summary,
|
|
5362
|
+
requiresApproval: false,
|
|
5363
|
+
};
|
|
5364
|
+
this.saveLearningDecisionSnapshot(decision);
|
|
5365
|
+
if (decision.kind === "apply") {
|
|
5366
|
+
await this._applyReflectionWrite(write, signal);
|
|
5367
|
+
}
|
|
5368
|
+
if (decision.kind !== "no-op") {
|
|
5369
|
+
auditSequence += 1;
|
|
5370
|
+
appendLearningAuditSnapshot(this.sessionManager, {
|
|
5371
|
+
id: `audit-${auditSequence}`,
|
|
5372
|
+
proposalId,
|
|
5373
|
+
layer: proposal.layer,
|
|
5374
|
+
action: decision.kind === "apply" ? "apply" : "propose",
|
|
5375
|
+
summary: proposal.summary,
|
|
5376
|
+
reasonCode: decision.reasonCode,
|
|
5377
|
+
decision,
|
|
5378
|
+
rollback,
|
|
5379
|
+
createdAt: new Date().toISOString(),
|
|
5380
|
+
});
|
|
5381
|
+
}
|
|
3878
5382
|
}
|
|
3879
5383
|
// Account the reflection's spend so it surfaces in the footer roll-up (net-token visibility).
|
|
3880
5384
|
// Idempotent on reportId so a retried/duplicated pass cannot double-count.
|
|
@@ -3883,6 +5387,76 @@ export class AgentSession {
|
|
|
3883
5387
|
}
|
|
3884
5388
|
return result;
|
|
3885
5389
|
}
|
|
5390
|
+
getLearningAuditRecords() {
|
|
5391
|
+
return getLearningAuditSnapshots(this.sessionManager.getEntries());
|
|
5392
|
+
}
|
|
5393
|
+
/**
|
|
5394
|
+
* Roll back one applied durable learning change by executing the inverse operation recorded in
|
|
5395
|
+
* its audit record (memory ops run through the same bundled memory-tool path as the original
|
|
5396
|
+
* apply; promoted skills are archived). Appends a linked "rollback" audit record on success so
|
|
5397
|
+
* the change history stays complete and a change cannot be rolled back twice.
|
|
5398
|
+
*/
|
|
5399
|
+
async rollbackLearningWrite(auditId) {
|
|
5400
|
+
if (this._disposed)
|
|
5401
|
+
return { ok: false, reason: "session_disposed" };
|
|
5402
|
+
const audits = this.getLearningAuditRecords();
|
|
5403
|
+
const audit = audits.find((record) => record.id === auditId);
|
|
5404
|
+
if (!audit)
|
|
5405
|
+
return { ok: false, reason: "audit_not_found" };
|
|
5406
|
+
if (audit.action !== "apply")
|
|
5407
|
+
return { ok: false, reason: "not_an_applied_change" };
|
|
5408
|
+
if (audits.some((record) => record.action === "rollback" && record.rollbackOf === auditId)) {
|
|
5409
|
+
return { ok: false, reason: "already_rolled_back" };
|
|
5410
|
+
}
|
|
5411
|
+
const rollback = audit.rollback;
|
|
5412
|
+
if (!rollback)
|
|
5413
|
+
return { ok: false, reason: "no_rollback_plan" };
|
|
5414
|
+
switch (rollback.kind) {
|
|
5415
|
+
case "memory_remove": {
|
|
5416
|
+
if (!rollback.target)
|
|
5417
|
+
return { ok: false, reason: "missing_rollback_target" };
|
|
5418
|
+
await this._applyReflectionWrite({ kind: "memory_remove", target: rollback.target });
|
|
5419
|
+
break;
|
|
5420
|
+
}
|
|
5421
|
+
case "memory_restore": {
|
|
5422
|
+
if (!rollback.target || rollback.previous === undefined) {
|
|
5423
|
+
return { ok: false, reason: "missing_rollback_target" };
|
|
5424
|
+
}
|
|
5425
|
+
await this._applyReflectionWrite({
|
|
5426
|
+
kind: "memory_replace",
|
|
5427
|
+
target: rollback.target,
|
|
5428
|
+
text: rollback.previous,
|
|
5429
|
+
});
|
|
5430
|
+
break;
|
|
5431
|
+
}
|
|
5432
|
+
case "memory_add": {
|
|
5433
|
+
if (rollback.previous === undefined)
|
|
5434
|
+
return { ok: false, reason: "missing_rollback_target" };
|
|
5435
|
+
await this._applyReflectionWrite({ kind: "memory_add", section: "MEMORY", text: rollback.previous });
|
|
5436
|
+
break;
|
|
5437
|
+
}
|
|
5438
|
+
case "archive_skill": {
|
|
5439
|
+
if (!rollback.target)
|
|
5440
|
+
return { ok: false, reason: "missing_rollback_target" };
|
|
5441
|
+
if (!this.archivePromotedSkill(rollback.target)) {
|
|
5442
|
+
return { ok: false, reason: "skill_archive_failed" };
|
|
5443
|
+
}
|
|
5444
|
+
break;
|
|
5445
|
+
}
|
|
5446
|
+
}
|
|
5447
|
+
appendLearningAuditSnapshot(this.sessionManager, {
|
|
5448
|
+
id: `${audit.id}-rollback`,
|
|
5449
|
+
proposalId: audit.proposalId,
|
|
5450
|
+
layer: audit.layer,
|
|
5451
|
+
action: "rollback",
|
|
5452
|
+
summary: `Rolled back: ${audit.summary}`,
|
|
5453
|
+
reasonCode: "user_requested_rollback",
|
|
5454
|
+
decision: audit.decision,
|
|
5455
|
+
rollbackOf: audit.id,
|
|
5456
|
+
createdAt: new Date().toISOString(),
|
|
5457
|
+
});
|
|
5458
|
+
return { ok: true, reason: "rollback_applied" };
|
|
5459
|
+
}
|
|
3886
5460
|
/**
|
|
3887
5461
|
* Apply one reflection write through the bundled `memory` tool. `memory_replace`/`memory_remove`
|
|
3888
5462
|
* don't carry a target file, so we try MEMORY.md first and fall back to USER.md when the substring
|
|
@@ -4084,6 +5658,182 @@ export class AgentSession {
|
|
|
4084
5658
|
// =========================================================================
|
|
4085
5659
|
// Extension System
|
|
4086
5660
|
// =========================================================================
|
|
5661
|
+
getAutonomyStatusSnapshot() {
|
|
5662
|
+
const snapshot = {};
|
|
5663
|
+
if (this._lastModelRouterDecision?.route) {
|
|
5664
|
+
snapshot.latestRoute = {
|
|
5665
|
+
tier: this._lastModelRouterDecision.route.tier,
|
|
5666
|
+
reasonCode: this._lastModelRouterDecision.route.reasonCode,
|
|
5667
|
+
risk: this._lastModelRouterDecision.route.risk,
|
|
5668
|
+
};
|
|
5669
|
+
}
|
|
5670
|
+
if (this._lastAutonomyGateOutcome) {
|
|
5671
|
+
snapshot.latestGate = {
|
|
5672
|
+
outcome: this._lastAutonomyGateOutcome.outcome,
|
|
5673
|
+
gate: this._lastAutonomyGateOutcome.gate,
|
|
5674
|
+
reasonCode: this._lastAutonomyGateOutcome.reasonCode,
|
|
5675
|
+
};
|
|
5676
|
+
}
|
|
5677
|
+
const currentCost = this.getSessionStats().cost;
|
|
5678
|
+
if (currentCost > 0) {
|
|
5679
|
+
snapshot.currentCostUsd = currentCost;
|
|
5680
|
+
}
|
|
5681
|
+
const spawnedCost = this.getSpawnedUsage().cost;
|
|
5682
|
+
if (spawnedCost > 0) {
|
|
5683
|
+
snapshot.spawnedCostUsd = spawnedCost;
|
|
5684
|
+
}
|
|
5685
|
+
const dailyCost = this.getDailyUsageTotals?.()?.totalCost;
|
|
5686
|
+
if (dailyCost !== undefined && dailyCost > 0) {
|
|
5687
|
+
snapshot.dailyCostUsd = dailyCost;
|
|
5688
|
+
}
|
|
5689
|
+
const goal = this.getGoalStateSnapshot();
|
|
5690
|
+
if (goal) {
|
|
5691
|
+
snapshot.activeGoal = {
|
|
5692
|
+
goalId: goal.goalId,
|
|
5693
|
+
status: goal.status,
|
|
5694
|
+
openRequirements: goal.requirements.filter((requirement) => requirement.status === "open").length,
|
|
5695
|
+
stallTurns: goal.stallTurns,
|
|
5696
|
+
};
|
|
5697
|
+
}
|
|
5698
|
+
// Real live count from the lane tracker — never inferred from historical snapshots. Absent
|
|
5699
|
+
// while zero, matching the presence-means-signal convention of the sibling fields.
|
|
5700
|
+
const activeLaneCount = this._laneTracker.getActiveCount();
|
|
5701
|
+
if (activeLaneCount > 0) {
|
|
5702
|
+
snapshot.activeLaneCount = activeLaneCount;
|
|
5703
|
+
}
|
|
5704
|
+
return snapshot;
|
|
5705
|
+
}
|
|
5706
|
+
/**
|
|
5707
|
+
* Aggregate an effectiveness/autonomy dashboard: what Pi has actually been doing (recent
|
|
5708
|
+
* route choices, latest gate outcome, cost, and any research/delegation/learning/goal
|
|
5709
|
+
* activity). Read-only — combines existing session-log getters, never mutates state or
|
|
5710
|
+
* recomputes a route/gate decision.
|
|
5711
|
+
*/
|
|
5712
|
+
getAutonomyDiagnosticSnapshot(options) {
|
|
5713
|
+
const maxEntriesPerFamily = options?.maxEntriesPerFamily ?? 10;
|
|
5714
|
+
const snapshot = {};
|
|
5715
|
+
const goal = this.getGoalStateSnapshot();
|
|
5716
|
+
const recentDecisions = getRecentModelRouterDecisions(this.sessionManager.getEntries(), maxEntriesPerFamily);
|
|
5717
|
+
if (recentDecisions.length > 0) {
|
|
5718
|
+
snapshot.routes = recentDecisions.map((decision) => ({
|
|
5719
|
+
title: decision.route.tier,
|
|
5720
|
+
summary: decision.routedModel,
|
|
5721
|
+
reasonCode: decision.route.reasonCode,
|
|
5722
|
+
metadata: { risk: decision.route.risk, outcome: decision.outcome, intent: decision.intent },
|
|
5723
|
+
}));
|
|
5724
|
+
}
|
|
5725
|
+
if (this._lastAutonomyGateOutcome) {
|
|
5726
|
+
const gate = this._lastAutonomyGateOutcome;
|
|
5727
|
+
snapshot.gates = [
|
|
5728
|
+
{
|
|
5729
|
+
title: gate.gate,
|
|
5730
|
+
summary: gate.message,
|
|
5731
|
+
reasonCode: gate.reasonCode,
|
|
5732
|
+
metadata: { outcome: gate.outcome, reversible: gate.reversible },
|
|
5733
|
+
},
|
|
5734
|
+
];
|
|
5735
|
+
}
|
|
5736
|
+
const costs = [];
|
|
5737
|
+
const currentCostForDiagnostics = this.getSessionStats().cost;
|
|
5738
|
+
if (currentCostForDiagnostics > 0) {
|
|
5739
|
+
costs.push({ title: "current", summary: `$${currentCostForDiagnostics.toFixed(4)}` });
|
|
5740
|
+
}
|
|
5741
|
+
const spawnedCost = this.getSpawnedUsage().cost;
|
|
5742
|
+
if (spawnedCost > 0)
|
|
5743
|
+
costs.push({ title: "spawned", summary: `$${spawnedCost.toFixed(4)}` });
|
|
5744
|
+
const dailyCostForDiagnostics = this.getDailyUsageTotals?.()?.totalCost;
|
|
5745
|
+
if (dailyCostForDiagnostics !== undefined && dailyCostForDiagnostics > 0) {
|
|
5746
|
+
costs.push({ title: "daily", summary: `$${dailyCostForDiagnostics.toFixed(4)}` });
|
|
5747
|
+
}
|
|
5748
|
+
if (costs.length > 0)
|
|
5749
|
+
snapshot.costs = costs;
|
|
5750
|
+
const researchEntries = [];
|
|
5751
|
+
const researchLaneRecords = getLaneRecordSnapshots(this.sessionManager.getEntries()).filter((record) => record.type === "research");
|
|
5752
|
+
for (const record of researchLaneRecords.slice(-maxEntriesPerFamily)) {
|
|
5753
|
+
researchEntries.push({
|
|
5754
|
+
title: `Lane ${record.laneId} (${record.status})`,
|
|
5755
|
+
reasonCode: record.reasonCode,
|
|
5756
|
+
metadata: {
|
|
5757
|
+
costUsd: record.costUsd,
|
|
5758
|
+
startedAt: record.startedAt,
|
|
5759
|
+
completedAt: record.completedAt,
|
|
5760
|
+
goalId: record.goalId,
|
|
5761
|
+
},
|
|
5762
|
+
});
|
|
5763
|
+
}
|
|
5764
|
+
for (const bundle of this.getEvidenceBundleSnapshots().slice(-maxEntriesPerFamily)) {
|
|
5765
|
+
researchEntries.push({
|
|
5766
|
+
title: `Research: ${bundle.query}`,
|
|
5767
|
+
metadata: { sourceCount: bundle.sources.length, findingCount: bundle.findings.length },
|
|
5768
|
+
});
|
|
5769
|
+
}
|
|
5770
|
+
if (this._lastResearchLaneSkipReason) {
|
|
5771
|
+
researchEntries.push({ title: "Last skip", reasonCode: this._lastResearchLaneSkipReason });
|
|
5772
|
+
}
|
|
5773
|
+
if (researchEntries.length > 0) {
|
|
5774
|
+
snapshot.research = researchEntries;
|
|
5775
|
+
}
|
|
5776
|
+
const delegationEntries = [];
|
|
5777
|
+
const workerLaneRecords = getLaneRecordSnapshots(this.sessionManager.getEntries()).filter((record) => record.type === "worker");
|
|
5778
|
+
for (const record of workerLaneRecords.slice(-maxEntriesPerFamily)) {
|
|
5779
|
+
delegationEntries.push({
|
|
5780
|
+
title: `Lane ${record.laneId} (${record.status})`,
|
|
5781
|
+
reasonCode: record.reasonCode,
|
|
5782
|
+
metadata: { costUsd: record.costUsd, startedAt: record.startedAt, completedAt: record.completedAt },
|
|
5783
|
+
});
|
|
5784
|
+
}
|
|
5785
|
+
const workerResults = this.getWorkerResultSnapshots();
|
|
5786
|
+
for (const result of workerResults.slice(-maxEntriesPerFamily)) {
|
|
5787
|
+
delegationEntries.push({
|
|
5788
|
+
title: `Worker ${result.requestId} (${result.status})`,
|
|
5789
|
+
summary: result.summary,
|
|
5790
|
+
metadata: {
|
|
5791
|
+
changedFileCount: result.changedFiles.length,
|
|
5792
|
+
blockerCount: result.blockers?.length ?? 0,
|
|
5793
|
+
usageReportId: result.usageReportId,
|
|
5794
|
+
},
|
|
5795
|
+
});
|
|
5796
|
+
}
|
|
5797
|
+
if (delegationEntries.length > 0) {
|
|
5798
|
+
snapshot.delegation = delegationEntries;
|
|
5799
|
+
}
|
|
5800
|
+
const learningEntries = [];
|
|
5801
|
+
const learningDecisions = this.getLearningDecisionSnapshots();
|
|
5802
|
+
for (const decision of learningDecisions.slice(-maxEntriesPerFamily)) {
|
|
5803
|
+
learningEntries.push({
|
|
5804
|
+
title: `Learning (${decision.kind})`,
|
|
5805
|
+
summary: decision.summary,
|
|
5806
|
+
reasonCode: decision.reasonCode,
|
|
5807
|
+
metadata: { confidence: decision.confidence, requiresApproval: decision.requiresApproval },
|
|
5808
|
+
});
|
|
5809
|
+
}
|
|
5810
|
+
for (const audit of this.getLearningAuditRecords().slice(-maxEntriesPerFamily)) {
|
|
5811
|
+
learningEntries.push({
|
|
5812
|
+
title: `Audit ${audit.id} (${audit.action})`,
|
|
5813
|
+
summary: audit.summary,
|
|
5814
|
+
reasonCode: audit.reasonCode,
|
|
5815
|
+
metadata: { layer: audit.layer, proposalId: audit.proposalId, rollbackOf: audit.rollbackOf },
|
|
5816
|
+
});
|
|
5817
|
+
}
|
|
5818
|
+
if (learningEntries.length > 0) {
|
|
5819
|
+
snapshot.learning = learningEntries;
|
|
5820
|
+
}
|
|
5821
|
+
if (goal) {
|
|
5822
|
+
snapshot.goals = [
|
|
5823
|
+
{
|
|
5824
|
+
title: `Goal ${goal.goalId}`,
|
|
5825
|
+
summary: goal.userGoal,
|
|
5826
|
+
reasonCode: goal.status,
|
|
5827
|
+
metadata: {
|
|
5828
|
+
openRequirementCount: goal.requirements.filter((requirement) => requirement.status === "open").length,
|
|
5829
|
+
stallTurns: goal.stallTurns,
|
|
5830
|
+
blockedReason: goal.blockedReason,
|
|
5831
|
+
},
|
|
5832
|
+
},
|
|
5833
|
+
];
|
|
5834
|
+
}
|
|
5835
|
+
return snapshot;
|
|
5836
|
+
}
|
|
4087
5837
|
createReplacedSessionContext() {
|
|
4088
5838
|
const context = Object.defineProperties({}, Object.getOwnPropertyDescriptors(this._extensionRunner.createCommandContext()));
|
|
4089
5839
|
context.sendMessage = (message, options) => this.sendCustomMessage(message, options);
|