@visorcraft/idlehands 1.1.16 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/formatting.js +30 -13
- package/dist/agent/formatting.js.map +1 -1
- package/dist/agent/review-artifact.js +12 -8
- package/dist/agent/review-artifact.js.map +1 -1
- package/dist/agent/tool-calls.js +62 -21
- package/dist/agent/tool-calls.js.map +1 -1
- package/dist/agent/tool-loop-detection.js +310 -0
- package/dist/agent/tool-loop-detection.js.map +1 -0
- package/dist/agent/tool-loop-guard.js +235 -0
- package/dist/agent/tool-loop-guard.js.map +1 -0
- package/dist/agent.js +585 -144
- package/dist/agent.js.map +1 -1
- package/dist/anton/controller.js +46 -30
- package/dist/anton/controller.js.map +1 -1
- package/dist/anton/lock.js +5 -1
- package/dist/anton/lock.js.map +1 -1
- package/dist/anton/parser.js +18 -19
- package/dist/anton/parser.js.map +1 -1
- package/dist/anton/prompt.js +42 -11
- package/dist/anton/prompt.js.map +1 -1
- package/dist/anton/reporter.js.map +1 -1
- package/dist/anton/session.js.map +1 -1
- package/dist/anton/verifier.js +3 -5
- package/dist/anton/verifier.js.map +1 -1
- package/dist/bench/compare.js +53 -20
- package/dist/bench/compare.js.map +1 -1
- package/dist/bench/openclaw.js +4 -4
- package/dist/bench/openclaw.js.map +1 -1
- package/dist/bench/report.js +11 -3
- package/dist/bench/report.js.map +1 -1
- package/dist/bench/runner.js +20 -14
- package/dist/bench/runner.js.map +1 -1
- package/dist/bot/commands.js +69 -26
- package/dist/bot/commands.js.map +1 -1
- package/dist/bot/confirm-discord.js +32 -9
- package/dist/bot/confirm-discord.js.map +1 -1
- package/dist/bot/confirm-telegram.js +26 -10
- package/dist/bot/confirm-telegram.js.map +1 -1
- package/dist/bot/dir-guard.js +18 -3
- package/dist/bot/dir-guard.js.map +1 -1
- package/dist/bot/discord-routing.js +28 -4
- package/dist/bot/discord-routing.js.map +1 -1
- package/dist/bot/discord-streaming.js +3 -3
- package/dist/bot/discord-streaming.js.map +1 -1
- package/dist/bot/discord.js +93 -37
- package/dist/bot/discord.js.map +1 -1
- package/dist/bot/escalation.js +124 -0
- package/dist/bot/escalation.js.map +1 -0
- package/dist/bot/format.js +2 -5
- package/dist/bot/format.js.map +1 -1
- package/dist/bot/session-manager.js +17 -6
- package/dist/bot/session-manager.js.map +1 -1
- package/dist/bot/telegram.js +92 -29
- package/dist/bot/telegram.js.map +1 -1
- package/dist/cli/agent-turn.js +10 -4
- package/dist/cli/agent-turn.js.map +1 -1
- package/dist/cli/args.js +51 -9
- package/dist/cli/args.js.map +1 -1
- package/dist/cli/bot.js +19 -9
- package/dist/cli/bot.js.map +1 -1
- package/dist/cli/build-repl-context.js +60 -26
- package/dist/cli/build-repl-context.js.map +1 -1
- package/dist/cli/command-registry.js.map +1 -1
- package/dist/cli/commands/anton.js +5 -3
- package/dist/cli/commands/anton.js.map +1 -1
- package/dist/cli/commands/editing.js +27 -12
- package/dist/cli/commands/editing.js.map +1 -1
- package/dist/cli/commands/model.js +16 -7
- package/dist/cli/commands/model.js.map +1 -1
- package/dist/cli/commands/project.js +52 -17
- package/dist/cli/commands/project.js.map +1 -1
- package/dist/cli/commands/runtime.js +1 -1
- package/dist/cli/commands/runtime.js.map +1 -1
- package/dist/cli/commands/secrets.js +279 -0
- package/dist/cli/commands/secrets.js.map +1 -0
- package/dist/cli/commands/session.js +57 -2
- package/dist/cli/commands/session.js.map +1 -1
- package/dist/cli/commands/tools.js +3 -1
- package/dist/cli/commands/tools.js.map +1 -1
- package/dist/cli/commands/trifecta.js +1 -1
- package/dist/cli/commands/trifecta.js.map +1 -1
- package/dist/cli/commands/tui.js.map +1 -1
- package/dist/cli/init.js +50 -16
- package/dist/cli/init.js.map +1 -1
- package/dist/cli/input.js +25 -7
- package/dist/cli/input.js.map +1 -1
- package/dist/cli/oneshot.js +31 -19
- package/dist/cli/oneshot.js.map +1 -1
- package/dist/cli/repl-dispatch.js +10 -6
- package/dist/cli/repl-dispatch.js.map +1 -1
- package/dist/cli/runtime-cmds.js +110 -46
- package/dist/cli/runtime-cmds.js.map +1 -1
- package/dist/cli/service.js +3 -3
- package/dist/cli/service.js.map +1 -1
- package/dist/cli/session-state.js +12 -5
- package/dist/cli/session-state.js.map +1 -1
- package/dist/cli/setup.js +86 -33
- package/dist/cli/setup.js.map +1 -1
- package/dist/cli/shell.js +4 -4
- package/dist/cli/shell.js.map +1 -1
- package/dist/cli/status.js +56 -12
- package/dist/cli/status.js.map +1 -1
- package/dist/client.js +40 -21
- package/dist/client.js.map +1 -1
- package/dist/commands.js +1 -1
- package/dist/commands.js.map +1 -1
- package/dist/config.js +171 -15
- package/dist/config.js.map +1 -1
- package/dist/confirm/auto.js.map +1 -1
- package/dist/confirm/headless.js +13 -2
- package/dist/confirm/headless.js.map +1 -1
- package/dist/confirm/terminal.js +1 -5
- package/dist/confirm/terminal.js.map +1 -1
- package/dist/context.js +9 -3
- package/dist/context.js.map +1 -1
- package/dist/git.js +56 -61
- package/dist/git.js.map +1 -1
- package/dist/harnesses.js +137 -37
- package/dist/harnesses.js.map +1 -1
- package/dist/history.js +12 -4
- package/dist/history.js.map +1 -1
- package/dist/hooks/index.js +2 -2
- package/dist/hooks/index.js.map +1 -1
- package/dist/hooks/loader.js +6 -5
- package/dist/hooks/loader.js.map +1 -1
- package/dist/hooks/manager.js.map +1 -1
- package/dist/hooks/plugins/example-console.js.map +1 -1
- package/dist/hooks/scaffold.js +8 -6
- package/dist/hooks/scaffold.js.map +1 -1
- package/dist/index.js +120 -66
- package/dist/index.js.map +1 -1
- package/dist/indexer.js +6 -18
- package/dist/indexer.js.map +1 -1
- package/dist/jsonrpc.js.map +1 -1
- package/dist/lens.js +38 -16
- package/dist/lens.js.map +1 -1
- package/dist/lsp.js +60 -24
- package/dist/lsp.js.map +1 -1
- package/dist/markdown.js +6 -6
- package/dist/markdown.js.map +1 -1
- package/dist/mcp.js +15 -6
- package/dist/mcp.js.map +1 -1
- package/dist/model-customization.js +7 -3
- package/dist/model-customization.js.map +1 -1
- package/dist/progress/message-edit-scheduler.js +15 -3
- package/dist/progress/message-edit-scheduler.js.map +1 -1
- package/dist/progress/progress-message-renderer.js.map +1 -1
- package/dist/progress/progress-presenter.js +3 -3
- package/dist/progress/progress-presenter.js.map +1 -1
- package/dist/progress/serialize-telegram.js.map +1 -1
- package/dist/progress/tool-summary.js +3 -1
- package/dist/progress/tool-summary.js.map +1 -1
- package/dist/progress/turn-progress.js +3 -1
- package/dist/progress/turn-progress.js.map +1 -1
- package/dist/recovery.js +11 -3
- package/dist/recovery.js.map +1 -1
- package/dist/replay.js +9 -3
- package/dist/replay.js.map +1 -1
- package/dist/replay_cli.js +5 -3
- package/dist/replay_cli.js.map +1 -1
- package/dist/runtime/executor.js +66 -20
- package/dist/runtime/executor.js.map +1 -1
- package/dist/runtime/health.js.map +1 -1
- package/dist/runtime/host-runner.js +103 -0
- package/dist/runtime/host-runner.js.map +1 -0
- package/dist/runtime/planner.js +3 -1
- package/dist/runtime/planner.js.map +1 -1
- package/dist/runtime/secrets.js +102 -0
- package/dist/runtime/secrets.js.map +1 -0
- package/dist/runtime/store.js +95 -19
- package/dist/runtime/store.js.map +1 -1
- package/dist/safety.js +38 -21
- package/dist/safety.js.map +1 -1
- package/dist/spinner.js +7 -8
- package/dist/spinner.js.map +1 -1
- package/dist/sys/context.js +3 -3
- package/dist/sys/context.js.map +1 -1
- package/dist/term.js +1 -1
- package/dist/term.js.map +1 -1
- package/dist/themes.js +11 -5
- package/dist/themes.js.map +1 -1
- package/dist/tools/tool-error.js +2 -5
- package/dist/tools/tool-error.js.map +1 -1
- package/dist/tools.js +84 -35
- package/dist/tools.js.map +1 -1
- package/dist/tui/branch-picker.js +9 -3
- package/dist/tui/branch-picker.js.map +1 -1
- package/dist/tui/command-handler.js +88 -36
- package/dist/tui/command-handler.js.map +1 -1
- package/dist/tui/confirm.js.map +1 -1
- package/dist/tui/controller.js +234 -117
- package/dist/tui/controller.js.map +1 -1
- package/dist/tui/event-bridge.js.map +1 -1
- package/dist/tui/keymap.js +93 -71
- package/dist/tui/keymap.js.map +1 -1
- package/dist/tui/layout.js +9 -1
- package/dist/tui/layout.js.map +1 -1
- package/dist/tui/render.js +17 -5
- package/dist/tui/render.js.map +1 -1
- package/dist/tui/screen.js.map +1 -1
- package/dist/tui/state.js +129 -63
- package/dist/tui/state.js.map +1 -1
- package/dist/tui/theme.js +12 -3
- package/dist/tui/theme.js.map +1 -1
- package/dist/upgrade.js +28 -15
- package/dist/upgrade.js.map +1 -1
- package/dist/utils.js +8 -5
- package/dist/utils.js.map +1 -1
- package/dist/vault.js +48 -12
- package/dist/vault.js.map +1 -1
- package/dist/vim.js.map +1 -1
- package/package.json +11 -2
package/dist/agent.js
CHANGED
|
@@ -17,6 +17,7 @@ import { LspManager, detectInstalledLspServers } from './lsp.js';
|
|
|
17
17
|
import { generateMinimalDiff, toolResultSummary, execCommandFromSig, formatDurationMs, looksLikePlanningNarration, capTextByApproxTokens, isLikelyBinaryBuffer, sanitizePathsInMessage, digestToolResult, } from './agent/formatting.js';
|
|
18
18
|
import { parseToolCallsFromContent, getMissingRequiredParams, getArgValidationIssues, stripMarkdownFences } from './agent/tool-calls.js';
|
|
19
19
|
import { ToolError, ValidationError } from './tools/tool-error.js';
|
|
20
|
+
import { ToolLoopGuard } from './agent/tool-loop-guard.js';
|
|
20
21
|
export { parseToolCallsFromContent };
|
|
21
22
|
import { reviewArtifactKeys, looksLikeCodeReviewRequest, looksLikeReviewRetrievalRequest, retrievalAllowsStaleArtifact, parseReviewArtifactStalePolicy, parseReviewArtifact, reviewArtifactStaleReason, gitHead, normalizeModelsResponse, } from './agent/review-artifact.js';
|
|
22
23
|
import fs from 'node:fs/promises';
|
|
@@ -28,7 +29,12 @@ function makeAbortController() {
|
|
|
28
29
|
}
|
|
29
30
|
const CACHED_EXEC_OBSERVATION_HINT = '[idlehands hint] Reused cached output for repeated read-only exec call (unchanged observation).';
|
|
30
31
|
function looksLikeReadOnlyExecCommand(command) {
|
|
31
|
-
|
|
32
|
+
// Strip leading `cd <path> &&` / `cd <path>;` prefixes — cd is read-only
|
|
33
|
+
// navigation, the actual command that matters comes after.
|
|
34
|
+
let cmd = String(command || '').trim().toLowerCase();
|
|
35
|
+
if (!cmd)
|
|
36
|
+
return false;
|
|
37
|
+
cmd = cmd.replace(/^(\s*cd\s+[^;&|]+\s*(?:&&|;)\s*)+/i, '').trim();
|
|
32
38
|
if (!cmd)
|
|
33
39
|
return false;
|
|
34
40
|
// Shell redirects are likely writes.
|
|
@@ -98,6 +104,25 @@ function withCachedExecObservationHint(content) {
|
|
|
98
104
|
return `${content}\n${CACHED_EXEC_OBSERVATION_HINT}`;
|
|
99
105
|
}
|
|
100
106
|
}
|
|
107
|
+
const REPLAYED_EXEC_HINT = '[idlehands hint] You already ran this exact command. This is the replayed result from your previous execution. Do NOT re-run it — use the output below to continue your task.';
|
|
108
|
+
function withReplayedExecHint(content) {
|
|
109
|
+
if (!content)
|
|
110
|
+
return content;
|
|
111
|
+
try {
|
|
112
|
+
const parsed = JSON.parse(content);
|
|
113
|
+
const out = typeof parsed?.out === 'string' ? parsed.out : '';
|
|
114
|
+
if (out.includes(REPLAYED_EXEC_HINT))
|
|
115
|
+
return content;
|
|
116
|
+
parsed.out = out ? `${REPLAYED_EXEC_HINT}\n${out}` : REPLAYED_EXEC_HINT;
|
|
117
|
+
parsed.replayed = true;
|
|
118
|
+
return JSON.stringify(parsed);
|
|
119
|
+
}
|
|
120
|
+
catch {
|
|
121
|
+
if (content.includes(REPLAYED_EXEC_HINT))
|
|
122
|
+
return content;
|
|
123
|
+
return `${REPLAYED_EXEC_HINT}\n${content}`;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
101
126
|
function readOnlyExecCacheable(content) {
|
|
102
127
|
try {
|
|
103
128
|
const parsed = JSON.parse(content);
|
|
@@ -116,6 +141,15 @@ function ensureInformativeAssistantText(text, ctx) {
|
|
|
116
141
|
}
|
|
117
142
|
return `I have no user-visible response text for this turn (turn=${ctx.turns}). Please try again or rephrase your request.`;
|
|
118
143
|
}
|
|
144
|
+
function isContextWindowExceededError(err) {
|
|
145
|
+
const status = Number(err?.status ?? NaN);
|
|
146
|
+
const msg = String(err?.message ?? err ?? '');
|
|
147
|
+
if (status === 413)
|
|
148
|
+
return true;
|
|
149
|
+
if (!msg)
|
|
150
|
+
return false;
|
|
151
|
+
return /(exceeds?\s+the\s+available\s+context\s+size|exceed_context|context\s+size|context\s+window|maximum\s+context\s+length|too\s+many\s+tokens|request\s*\(\d+\s*tokens\))/i.test(msg);
|
|
152
|
+
}
|
|
119
153
|
/** Errors that should break the outer agent loop, not be caught by per-tool handlers */
|
|
120
154
|
class AgentLoopBreak extends Error {
|
|
121
155
|
constructor(message) {
|
|
@@ -133,6 +167,7 @@ Rules:
|
|
|
133
167
|
- Use read_file with search=... to jump to relevant code; avoid reading whole files.
|
|
134
168
|
- Never call read_file/read_files/list_dir twice in a row with identical arguments (same path/options). Reuse the previous result instead.
|
|
135
169
|
- Prefer apply_patch or edit_range for code edits (token-efficient). Use edit_file only when exact old_text replacement is necessary.
|
|
170
|
+
- write_file is for new files or explicit full rewrites only. Existing non-empty files require overwrite=true/force=true.
|
|
136
171
|
- Use insert_file for insertions (prepend/append/line).
|
|
137
172
|
- Use exec to run commands, tests, builds; check results before reporting success.
|
|
138
173
|
- When running commands in a subdirectory, use exec's cwd parameter — NOT "cd /path && cmd". Each exec call is a fresh shell; cd does not persist.
|
|
@@ -305,8 +340,8 @@ function buildToolsSchema(opts) {
|
|
|
305
340
|
type: 'function',
|
|
306
341
|
function: {
|
|
307
342
|
name: 'write_file',
|
|
308
|
-
description: 'Write file (atomic, backup).',
|
|
309
|
-
parameters: obj({ path: str(), content: str() }, ['path', 'content']),
|
|
343
|
+
description: 'Write file (atomic, backup). Existing non-empty files require overwrite=true (or force=true).',
|
|
344
|
+
parameters: obj({ path: str(), content: str(), overwrite: bool(), force: bool() }, ['path', 'content']),
|
|
310
345
|
},
|
|
311
346
|
},
|
|
312
347
|
{
|
|
@@ -404,6 +439,11 @@ function buildToolsSchema(opts) {
|
|
|
404
439
|
if (opts?.activeVaultTools) {
|
|
405
440
|
schemas.push({ type: 'function', function: { name: 'vault_search', description: 'Search vault.', parameters: obj({ query: str(), limit: int() }, ['query']) } }, { type: 'function', function: { name: 'vault_note', description: 'Write vault note.', parameters: obj({ key: str(), value: str() }, ['key', 'value']) } });
|
|
406
441
|
}
|
|
442
|
+
else if (opts?.passiveVault) {
|
|
443
|
+
// In passive mode, expose vault_search (read-only) so the model can recover
|
|
444
|
+
// compacted context on demand, but don't expose vault_note (write).
|
|
445
|
+
schemas.push({ type: 'function', function: { name: 'vault_search', description: 'Search vault memory for earlier context that was compacted away. Use sparingly — only when you need to recall specific details from earlier in the conversation.', parameters: obj({ query: str(), limit: int() }, ['query']) } });
|
|
446
|
+
}
|
|
407
447
|
// Phase 9: sys_context tool is only available in sys mode.
|
|
408
448
|
if (opts?.sysMode) {
|
|
409
449
|
schemas.push(SYS_CONTEXT_SCHEMA);
|
|
@@ -648,6 +688,7 @@ export async function createSession(opts) {
|
|
|
648
688
|
let mcpToolsLoaded = !mcpLazySchemaMode;
|
|
649
689
|
const getToolsSchema = () => buildToolsSchema({
|
|
650
690
|
activeVaultTools,
|
|
691
|
+
passiveVault: !activeVaultTools && vaultEnabled && vaultMode === 'passive',
|
|
651
692
|
sysMode: cfg.mode === 'sys',
|
|
652
693
|
lspTools: lspManager?.hasServers() === true,
|
|
653
694
|
mcpTools: mcpToolsLoaded ? (mcpManager?.getEnabledToolSchemas() ?? []) : [],
|
|
@@ -1155,21 +1196,68 @@ export async function createSession(opts) {
|
|
|
1155
1196
|
const clearPlan = () => {
|
|
1156
1197
|
planSteps = [];
|
|
1157
1198
|
};
|
|
1199
|
+
const getLatestObjectiveText = () => {
|
|
1200
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
1201
|
+
const m = messages[i];
|
|
1202
|
+
if (m.role !== 'user')
|
|
1203
|
+
continue;
|
|
1204
|
+
const text = userContentToText((m.content ?? '')).trim();
|
|
1205
|
+
if (!text)
|
|
1206
|
+
continue;
|
|
1207
|
+
if (text.startsWith('[system]'))
|
|
1208
|
+
continue;
|
|
1209
|
+
if (text.startsWith('[Trifecta Vault'))
|
|
1210
|
+
continue;
|
|
1211
|
+
if (text.startsWith('[Vault context'))
|
|
1212
|
+
continue;
|
|
1213
|
+
return text;
|
|
1214
|
+
}
|
|
1215
|
+
return '';
|
|
1216
|
+
};
|
|
1217
|
+
const compactionVaultGuidance = () => {
|
|
1218
|
+
if (!vault)
|
|
1219
|
+
return '';
|
|
1220
|
+
if (vaultMode === 'active' || activeVaultTools) {
|
|
1221
|
+
return 'Vault memory is available. Retrieve prior context with vault_search(query="...") when needed.';
|
|
1222
|
+
}
|
|
1223
|
+
if (vaultMode === 'passive') {
|
|
1224
|
+
return 'Vault memory is in passive mode; relevant entries may be auto-injected. You can also use vault_search(query="...") to recover specific earlier context if needed.';
|
|
1225
|
+
}
|
|
1226
|
+
return '';
|
|
1227
|
+
};
|
|
1228
|
+
const buildCompactionSystemNote = (kind, dropped) => {
|
|
1229
|
+
const prefix = kind === 'auto'
|
|
1230
|
+
? `[auto-compacted: ${dropped} old messages dropped to stay within context budget.]`
|
|
1231
|
+
: `[compacted: ${dropped} messages dropped.]`;
|
|
1232
|
+
const guidance = compactionVaultGuidance();
|
|
1233
|
+
return guidance ? `${prefix} ${guidance}` : prefix;
|
|
1234
|
+
};
|
|
1235
|
+
let lastAskInstructionText = '';
|
|
1236
|
+
let lastCompactionReminderObjective = '';
|
|
1237
|
+
const injectCompactionReminder = (reason) => {
|
|
1238
|
+
const objective = (getLatestObjectiveText() || lastAskInstructionText || '').trim();
|
|
1239
|
+
if (!objective)
|
|
1240
|
+
return;
|
|
1241
|
+
const clippedObjective = objective.length > 1600 ? `${objective.slice(0, 1600)}\n[truncated]` : objective;
|
|
1242
|
+
if (clippedObjective === lastCompactionReminderObjective)
|
|
1243
|
+
return;
|
|
1244
|
+
lastCompactionReminderObjective = clippedObjective;
|
|
1245
|
+
const vaultHint = compactionVaultGuidance();
|
|
1246
|
+
messages.push({
|
|
1247
|
+
role: 'user',
|
|
1248
|
+
content: `[system] Context was compacted (${reason}). Continue the SAME task from the current state; do not restart.\n` +
|
|
1249
|
+
`Most recent user objective:\n${clippedObjective}` +
|
|
1250
|
+
(vaultHint ? `\n\n${vaultHint}` : ''),
|
|
1251
|
+
});
|
|
1252
|
+
};
|
|
1158
1253
|
// Session-level vault context injection: search vault for entries relevant to
|
|
1159
|
-
// the
|
|
1160
|
-
// compaction to restore context the model lost when messages were dropped.
|
|
1254
|
+
// the latest substantive objective and inject them into the conversation.
|
|
1255
|
+
// Used after compaction to restore context the model lost when messages were dropped.
|
|
1161
1256
|
let lastVaultInjectionQuery = '';
|
|
1162
1257
|
const injectVaultContext = async () => {
|
|
1163
1258
|
if (!vault)
|
|
1164
1259
|
return;
|
|
1165
|
-
|
|
1166
|
-
for (let j = messages.length - 1; j >= 0; j--) {
|
|
1167
|
-
if (messages[j].role === 'user') {
|
|
1168
|
-
lastUser = messages[j];
|
|
1169
|
-
break;
|
|
1170
|
-
}
|
|
1171
|
-
}
|
|
1172
|
-
const userText = userContentToText((lastUser?.content ?? '')).trim();
|
|
1260
|
+
const userText = (getLatestObjectiveText() || lastAskInstructionText || '').trim();
|
|
1173
1261
|
if (!userText)
|
|
1174
1262
|
return;
|
|
1175
1263
|
const query = userText.slice(0, 200);
|
|
@@ -1190,77 +1278,150 @@ export async function createSession(opts) {
|
|
|
1190
1278
|
content: `${vaultContextHeader} Relevant entries for "${query}":\n${lines.join('\n')}`
|
|
1191
1279
|
});
|
|
1192
1280
|
};
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1281
|
+
let compactionLockTail = Promise.resolve();
|
|
1282
|
+
let compactionStats = {
|
|
1283
|
+
inProgress: false,
|
|
1284
|
+
lockHeld: false,
|
|
1285
|
+
runs: 0,
|
|
1286
|
+
failedRuns: 0,
|
|
1287
|
+
beforeMessages: 0,
|
|
1288
|
+
afterMessages: 0,
|
|
1289
|
+
freedTokens: 0,
|
|
1290
|
+
archivedToolMessages: 0,
|
|
1291
|
+
droppedMessages: 0,
|
|
1292
|
+
dryRun: false,
|
|
1293
|
+
};
|
|
1294
|
+
const runCompactionWithLock = async (reason, runner) => {
|
|
1295
|
+
const prev = compactionLockTail;
|
|
1296
|
+
let release = () => { };
|
|
1297
|
+
compactionLockTail = new Promise((resolve) => {
|
|
1298
|
+
release = () => resolve();
|
|
1299
|
+
});
|
|
1300
|
+
await prev;
|
|
1301
|
+
compactionStats = {
|
|
1302
|
+
...compactionStats,
|
|
1303
|
+
inProgress: true,
|
|
1304
|
+
lockHeld: true,
|
|
1305
|
+
lastReason: reason,
|
|
1306
|
+
lastError: undefined,
|
|
1307
|
+
updatedAt: new Date().toISOString(),
|
|
1308
|
+
// Reset run stats before fresh calculation.
|
|
1309
|
+
beforeMessages: 0,
|
|
1310
|
+
afterMessages: 0,
|
|
1311
|
+
freedTokens: 0,
|
|
1312
|
+
archivedToolMessages: 0,
|
|
1313
|
+
droppedMessages: 0,
|
|
1314
|
+
dryRun: false,
|
|
1315
|
+
};
|
|
1316
|
+
try {
|
|
1317
|
+
const result = await runner();
|
|
1318
|
+
compactionStats = {
|
|
1319
|
+
...compactionStats,
|
|
1320
|
+
...result,
|
|
1321
|
+
inProgress: false,
|
|
1322
|
+
lockHeld: false,
|
|
1323
|
+
runs: compactionStats.runs + 1,
|
|
1324
|
+
lastReason: reason,
|
|
1325
|
+
updatedAt: new Date().toISOString(),
|
|
1326
|
+
};
|
|
1327
|
+
return result;
|
|
1201
1328
|
}
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
}
|
|
1329
|
+
catch (e) {
|
|
1330
|
+
compactionStats = {
|
|
1331
|
+
...compactionStats,
|
|
1332
|
+
inProgress: false,
|
|
1333
|
+
lockHeld: false,
|
|
1334
|
+
failedRuns: compactionStats.failedRuns + 1,
|
|
1335
|
+
lastReason: reason,
|
|
1336
|
+
lastError: e?.message ?? String(e),
|
|
1337
|
+
updatedAt: new Date().toISOString(),
|
|
1338
|
+
};
|
|
1339
|
+
throw e;
|
|
1212
1340
|
}
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
if (opts?.topic) {
|
|
1216
|
-
const topic = opts.topic.toLowerCase();
|
|
1217
|
-
dropped = dropped.filter((m) => !userContentToText(m.content ?? '').toLowerCase().includes(topic));
|
|
1218
|
-
const keepFromTopic = messages.filter((m) => userContentToText(m.content ?? '').toLowerCase().includes(topic));
|
|
1219
|
-
compacted = [...compacted, ...keepFromTopic.filter((m) => !compactedByRefs.has(m))];
|
|
1341
|
+
finally {
|
|
1342
|
+
release();
|
|
1220
1343
|
}
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
const
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1344
|
+
};
|
|
1345
|
+
const compactHistory = async (opts) => {
|
|
1346
|
+
const reason = opts?.reason
|
|
1347
|
+
?? (opts?.hard ? 'manual hard compaction'
|
|
1348
|
+
: opts?.force ? 'manual force compaction'
|
|
1349
|
+
: 'manual compaction');
|
|
1350
|
+
return await runCompactionWithLock(reason, async () => {
|
|
1351
|
+
const beforeMessages = messages.length;
|
|
1352
|
+
const beforeTokens = estimateTokensFromMessages(messages);
|
|
1353
|
+
let compacted;
|
|
1354
|
+
if (opts?.hard) {
|
|
1355
|
+
const sys = messages[0]?.role === 'system' ? [messages[0]] : [];
|
|
1356
|
+
const tail = messages.slice(-2);
|
|
1357
|
+
compacted = [...sys, ...tail];
|
|
1358
|
+
}
|
|
1359
|
+
else {
|
|
1360
|
+
compacted = enforceContextBudget({
|
|
1361
|
+
messages,
|
|
1362
|
+
contextWindow,
|
|
1363
|
+
maxTokens,
|
|
1364
|
+
minTailMessages: opts?.force ? 2 : 12,
|
|
1365
|
+
compactAt: opts?.force ? 0.5 : (cfg.compact_at ?? 0.8),
|
|
1366
|
+
toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
|
|
1367
|
+
force: opts?.force,
|
|
1368
|
+
});
|
|
1369
|
+
}
|
|
1370
|
+
const compactedByRefs = new Set(compacted);
|
|
1371
|
+
let dropped = messages.filter((m) => !compactedByRefs.has(m));
|
|
1372
|
+
if (opts?.topic) {
|
|
1373
|
+
const topic = opts.topic.toLowerCase();
|
|
1374
|
+
dropped = dropped.filter((m) => !userContentToText(m.content ?? '').toLowerCase().includes(topic));
|
|
1375
|
+
const keepFromTopic = messages.filter((m) => userContentToText(m.content ?? '').toLowerCase().includes(topic));
|
|
1376
|
+
compacted = [...compacted, ...keepFromTopic.filter((m) => !compactedByRefs.has(m))];
|
|
1377
|
+
}
|
|
1378
|
+
const archivedToolMessages = dropped.filter((m) => m.role === 'tool').length;
|
|
1379
|
+
const afterMessages = compacted.length;
|
|
1380
|
+
const afterTokens = estimateTokensFromMessages(compacted);
|
|
1381
|
+
const freedTokens = Math.max(0, beforeTokens - afterTokens);
|
|
1382
|
+
if (!opts?.dry) {
|
|
1383
|
+
if (dropped.length && vault) {
|
|
1384
|
+
try {
|
|
1385
|
+
// Store the original/current user prompt before compaction so it survives context loss.
|
|
1386
|
+
let userPromptToPreserve = null;
|
|
1387
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
1388
|
+
const m = messages[i];
|
|
1389
|
+
if (m.role === 'user') {
|
|
1390
|
+
const text = userContentToText((m.content ?? '')).trim();
|
|
1391
|
+
if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
|
|
1392
|
+
userPromptToPreserve = text;
|
|
1393
|
+
break;
|
|
1394
|
+
}
|
|
1237
1395
|
}
|
|
1238
1396
|
}
|
|
1397
|
+
if (userPromptToPreserve) {
|
|
1398
|
+
await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
|
|
1399
|
+
}
|
|
1400
|
+
await vault.archiveToolMessages(dropped, new Map());
|
|
1401
|
+
await vault.note('compaction_summary', `Dropped ${dropped.length} messages (${freedTokens} tokens).`);
|
|
1239
1402
|
}
|
|
1240
|
-
|
|
1241
|
-
|
|
1403
|
+
catch {
|
|
1404
|
+
// best-effort
|
|
1242
1405
|
}
|
|
1243
|
-
await vault.archiveToolMessages(dropped, new Map());
|
|
1244
|
-
await vault.note('compaction_summary', `Dropped ${dropped.length} messages (${freedTokens} tokens).`);
|
|
1245
1406
|
}
|
|
1246
|
-
|
|
1247
|
-
|
|
1407
|
+
messages = compacted;
|
|
1408
|
+
if (dropped.length) {
|
|
1409
|
+
messages.push({ role: 'system', content: buildCompactionSystemNote('manual', dropped.length) });
|
|
1410
|
+
await injectVaultContext().catch(() => { });
|
|
1411
|
+
if (opts?.reason || opts?.force) {
|
|
1412
|
+
injectCompactionReminder(opts?.reason ?? 'history compaction');
|
|
1413
|
+
}
|
|
1248
1414
|
}
|
|
1249
1415
|
}
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
freedTokens,
|
|
1260
|
-
archivedToolMessages,
|
|
1261
|
-
droppedMessages: dropped.length,
|
|
1262
|
-
dryRun: !!opts?.dry,
|
|
1263
|
-
};
|
|
1416
|
+
return {
|
|
1417
|
+
beforeMessages,
|
|
1418
|
+
afterMessages,
|
|
1419
|
+
freedTokens,
|
|
1420
|
+
archivedToolMessages,
|
|
1421
|
+
droppedMessages: dropped.length,
|
|
1422
|
+
dryRun: !!opts?.dry,
|
|
1423
|
+
};
|
|
1424
|
+
});
|
|
1264
1425
|
};
|
|
1265
1426
|
const cumulativeUsage = { prompt: 0, completion: 0 };
|
|
1266
1427
|
const turnDurationsMs = [];
|
|
@@ -1269,6 +1430,17 @@ export async function createSession(opts) {
|
|
|
1269
1430
|
const tgSamples = [];
|
|
1270
1431
|
let lastTurnMetrics;
|
|
1271
1432
|
let lastServerHealth;
|
|
1433
|
+
let lastToolLoopStats = { totalHistory: 0, signatures: [], outcomes: [], telemetry: {
|
|
1434
|
+
callsRegistered: 0,
|
|
1435
|
+
dedupedReplays: 0,
|
|
1436
|
+
readCacheLookups: 0,
|
|
1437
|
+
readCacheHits: 0,
|
|
1438
|
+
warnings: 0,
|
|
1439
|
+
criticals: 0,
|
|
1440
|
+
recoveryRecommended: 0,
|
|
1441
|
+
readCacheHitRate: 0,
|
|
1442
|
+
dedupeRate: 0,
|
|
1443
|
+
} };
|
|
1272
1444
|
let lastModelsProbeMs = 0;
|
|
1273
1445
|
const capturesDir = path.join(stateDir(), 'captures');
|
|
1274
1446
|
let captureEnabled = false;
|
|
@@ -1617,6 +1789,10 @@ export async function createSession(opts) {
|
|
|
1617
1789
|
await hookObj.onToolResult?.(result);
|
|
1618
1790
|
await hookManager.emit('tool_result', { askId, turn: turns, result });
|
|
1619
1791
|
};
|
|
1792
|
+
const emitToolLoop = async (loop) => {
|
|
1793
|
+
await hookObj.onToolLoop?.(loop);
|
|
1794
|
+
await hookManager.emit('tool_loop', { askId, turn: turns, loop });
|
|
1795
|
+
};
|
|
1620
1796
|
const emitTurnEnd = async (stats) => {
|
|
1621
1797
|
await hookObj.onTurnEnd?.(stats);
|
|
1622
1798
|
await hookManager.emit('turn_end', { askId, stats });
|
|
@@ -1627,6 +1803,8 @@ export async function createSession(opts) {
|
|
|
1627
1803
|
return { text: finalText, turns, toolCalls };
|
|
1628
1804
|
};
|
|
1629
1805
|
const rawInstructionText = userContentToText(instruction).trim();
|
|
1806
|
+
lastAskInstructionText = rawInstructionText;
|
|
1807
|
+
lastCompactionReminderObjective = '';
|
|
1630
1808
|
await hookManager.emit('ask_start', { askId, instruction: rawInstructionText });
|
|
1631
1809
|
const projectDir = cfg.dir ?? process.cwd();
|
|
1632
1810
|
const reviewKeys = reviewArtifactKeys(projectDir);
|
|
@@ -1750,12 +1928,50 @@ export async function createSession(opts) {
|
|
|
1750
1928
|
const blockedExecAttemptsBySig = new Map();
|
|
1751
1929
|
// Cache successful read-only exec observations by exact signature.
|
|
1752
1930
|
const execObservationCacheBySig = new Map();
|
|
1931
|
+
// Cache ALL successful exec results so repeated identical calls under context
|
|
1932
|
+
// pressure can replay the cached result instead of re-executing.
|
|
1933
|
+
const lastExecResultBySig = new Map();
|
|
1934
|
+
// Cache successful read_file/read_files/list_dir results by signature + mtime for invalidation.
|
|
1935
|
+
const readFileCacheBySig = new Map();
|
|
1936
|
+
const READ_FILE_CACHE_TOOLS = new Set(['read_file', 'read_files', 'list_dir']);
|
|
1937
|
+
const toolLoopGuard = new ToolLoopGuard({
|
|
1938
|
+
enabled: cfg.tool_loop_detection?.enabled,
|
|
1939
|
+
historySize: cfg.tool_loop_detection?.history_size,
|
|
1940
|
+
warningThreshold: cfg.tool_loop_detection?.warning_threshold,
|
|
1941
|
+
criticalThreshold: cfg.tool_loop_detection?.critical_threshold,
|
|
1942
|
+
globalCircuitBreakerThreshold: cfg.tool_loop_detection?.global_circuit_breaker_threshold,
|
|
1943
|
+
readCacheTtlMs: cfg.tool_loop_detection?.read_cache_ttl_ms,
|
|
1944
|
+
detectors: {
|
|
1945
|
+
genericRepeat: cfg.tool_loop_detection?.detectors?.generic_repeat,
|
|
1946
|
+
knownPollNoProgress: cfg.tool_loop_detection?.detectors?.known_poll_no_progress,
|
|
1947
|
+
pingPong: cfg.tool_loop_detection?.detectors?.ping_pong,
|
|
1948
|
+
},
|
|
1949
|
+
perTool: Object.fromEntries(Object.entries(cfg.tool_loop_detection?.per_tool ?? {}).map(([tool, policy]) => [
|
|
1950
|
+
tool,
|
|
1951
|
+
{
|
|
1952
|
+
warningThreshold: policy?.warning_threshold,
|
|
1953
|
+
criticalThreshold: policy?.critical_threshold,
|
|
1954
|
+
globalCircuitBreakerThreshold: policy?.global_circuit_breaker_threshold,
|
|
1955
|
+
detectors: {
|
|
1956
|
+
genericRepeat: policy?.detectors?.generic_repeat,
|
|
1957
|
+
knownPollNoProgress: policy?.detectors?.known_poll_no_progress,
|
|
1958
|
+
pingPong: policy?.detectors?.ping_pong,
|
|
1959
|
+
},
|
|
1960
|
+
},
|
|
1961
|
+
])),
|
|
1962
|
+
});
|
|
1963
|
+
const toolLoopWarningKeys = new Set();
|
|
1964
|
+
let forceToollessRecoveryTurn = false;
|
|
1965
|
+
let toollessRecoveryUsed = false;
|
|
1753
1966
|
// Prevent repeating the same "stop rerunning" reminder every turn.
|
|
1754
1967
|
const readOnlyExecHintedSigs = new Set();
|
|
1755
1968
|
// Keep a lightweight breadcrumb for diagnostics on partial failures.
|
|
1756
1969
|
let lastSuccessfulTestRun = null;
|
|
1757
1970
|
// One-time nudge to prevent post-success churn after green test runs.
|
|
1758
1971
|
let finalizeAfterTestsNudgeUsed = false;
|
|
1972
|
+
// Recover once/twice from server-side context-overflow 400/413s by forcing compaction and retrying.
|
|
1973
|
+
let overflowCompactionAttempts = 0;
|
|
1974
|
+
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 2;
|
|
1759
1975
|
const archiveToolOutputForVault = async (msg) => {
|
|
1760
1976
|
if (!lens || !vault || msg.role !== 'tool' || typeof msg.content !== 'string')
|
|
1761
1977
|
return msg;
|
|
@@ -1869,50 +2085,63 @@ export async function createSession(opts) {
|
|
|
1869
2085
|
throw new Error(`session timeout exceeded (${cfg.timeout}s) after ${wallElapsed.toFixed(1)}s`);
|
|
1870
2086
|
}
|
|
1871
2087
|
await maybeAutoDetectModelChange();
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
const
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
2088
|
+
await runCompactionWithLock('auto context-budget compaction', async () => {
|
|
2089
|
+
const beforeMsgs = messages;
|
|
2090
|
+
const beforeTokens = estimateTokensFromMessages(beforeMsgs);
|
|
2091
|
+
const compacted = enforceContextBudget({
|
|
2092
|
+
messages: beforeMsgs,
|
|
2093
|
+
contextWindow,
|
|
2094
|
+
maxTokens: maxTokens,
|
|
2095
|
+
minTailMessages: 12,
|
|
2096
|
+
compactAt: cfg.compact_at ?? 0.8,
|
|
2097
|
+
toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
|
|
2098
|
+
});
|
|
2099
|
+
const compactedByRefs = new Set(compacted);
|
|
2100
|
+
const dropped = beforeMsgs.filter((m) => !compactedByRefs.has(m));
|
|
2101
|
+
if (dropped.length && vault) {
|
|
2102
|
+
try {
|
|
2103
|
+
// Store the original/current user prompt before compaction so it survives context loss.
|
|
2104
|
+
// Find the last substantive user message that looks like a task/instruction.
|
|
2105
|
+
let userPromptToPreserve = null;
|
|
2106
|
+
for (let i = beforeMsgs.length - 1; i >= 0; i--) {
|
|
2107
|
+
const m = beforeMsgs[i];
|
|
2108
|
+
if (m.role === 'user') {
|
|
2109
|
+
const text = userContentToText((m.content ?? '')).trim();
|
|
2110
|
+
// Skip vault injection messages and short prompts
|
|
2111
|
+
if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
|
|
2112
|
+
userPromptToPreserve = text;
|
|
2113
|
+
break;
|
|
2114
|
+
}
|
|
1896
2115
|
}
|
|
1897
2116
|
}
|
|
2117
|
+
if (userPromptToPreserve) {
|
|
2118
|
+
await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
|
|
2119
|
+
}
|
|
2120
|
+
const toArchive = lens
|
|
2121
|
+
? await Promise.all(dropped.map((m) => archiveToolOutputForVault(m)))
|
|
2122
|
+
: dropped;
|
|
2123
|
+
await vault.archiveToolMessages(toArchive, toolNameByCallId);
|
|
1898
2124
|
}
|
|
1899
|
-
|
|
1900
|
-
|
|
2125
|
+
catch (e) {
|
|
2126
|
+
console.warn(`[warn] vault archive failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
1901
2127
|
}
|
|
1902
|
-
const toArchive = lens
|
|
1903
|
-
? await Promise.all(dropped.map((m) => archiveToolOutputForVault(m)))
|
|
1904
|
-
: dropped;
|
|
1905
|
-
await vault.archiveToolMessages(toArchive, toolNameByCallId);
|
|
1906
2128
|
}
|
|
1907
|
-
|
|
1908
|
-
|
|
2129
|
+
messages = compacted;
|
|
2130
|
+
if (dropped.length) {
|
|
2131
|
+
messages.push({ role: 'system', content: buildCompactionSystemNote('auto', dropped.length) });
|
|
2132
|
+
await injectVaultContext().catch(() => { });
|
|
2133
|
+
injectCompactionReminder('auto context-budget compaction');
|
|
1909
2134
|
}
|
|
1910
|
-
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
|
|
1915
|
-
|
|
2135
|
+
const afterTokens = estimateTokensFromMessages(compacted);
|
|
2136
|
+
return {
|
|
2137
|
+
beforeMessages: beforeMsgs.length,
|
|
2138
|
+
afterMessages: compacted.length,
|
|
2139
|
+
freedTokens: Math.max(0, beforeTokens - afterTokens),
|
|
2140
|
+
archivedToolMessages: dropped.filter((m) => m.role === 'tool').length,
|
|
2141
|
+
droppedMessages: dropped.length,
|
|
2142
|
+
dryRun: false,
|
|
2143
|
+
};
|
|
2144
|
+
});
|
|
1916
2145
|
const ac = makeAbortController();
|
|
1917
2146
|
inFlight = ac;
|
|
1918
2147
|
// If caller provided an AbortSignal (bench iteration timeout, etc), propagate it.
|
|
@@ -1936,20 +2165,45 @@ export async function createSession(opts) {
|
|
|
1936
2165
|
};
|
|
1937
2166
|
let resp;
|
|
1938
2167
|
try {
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
2168
|
+
try {
|
|
2169
|
+
const toolsForTurn = forceToollessRecoveryTurn ? [] : getToolsSchema();
|
|
2170
|
+
const toolChoiceForTurn = forceToollessRecoveryTurn ? 'none' : 'auto';
|
|
2171
|
+
resp = await client.chatStream({
|
|
2172
|
+
model,
|
|
2173
|
+
messages,
|
|
2174
|
+
tools: toolsForTurn,
|
|
2175
|
+
tool_choice: toolChoiceForTurn,
|
|
2176
|
+
temperature,
|
|
2177
|
+
top_p: topP,
|
|
2178
|
+
max_tokens: maxTokens,
|
|
2179
|
+
extra: { cache_prompt: cfg.cache_prompt ?? true },
|
|
2180
|
+
signal: ac.signal,
|
|
2181
|
+
requestId: `r${reqCounter}`,
|
|
2182
|
+
onToken: hookObj.onToken,
|
|
2183
|
+
onFirstDelta,
|
|
2184
|
+
});
|
|
2185
|
+
// Successful response resets overflow recovery budget.
|
|
2186
|
+
overflowCompactionAttempts = 0;
|
|
2187
|
+
}
|
|
2188
|
+
catch (e) {
|
|
2189
|
+
if (isContextWindowExceededError(e) && overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS) {
|
|
2190
|
+
overflowCompactionAttempts++;
|
|
2191
|
+
const useHardCompaction = overflowCompactionAttempts > 1;
|
|
2192
|
+
const compacted = await compactHistory({
|
|
2193
|
+
force: true,
|
|
2194
|
+
hard: useHardCompaction,
|
|
2195
|
+
reason: 'server context-window overflow recovery',
|
|
2196
|
+
});
|
|
2197
|
+
const mode = useHardCompaction ? 'hard' : 'force';
|
|
2198
|
+
messages.push({
|
|
2199
|
+
role: 'system',
|
|
2200
|
+
content: `[auto-recovery] Previous request exceeded model context window. Ran ${mode} compaction ` +
|
|
2201
|
+
`(freed ~${compacted.freedTokens} tokens, dropped ${compacted.droppedMessages} messages). Continue from latest state; do not restart work.`,
|
|
2202
|
+
});
|
|
2203
|
+
continue;
|
|
2204
|
+
}
|
|
2205
|
+
throw e;
|
|
2206
|
+
}
|
|
1953
2207
|
}
|
|
1954
2208
|
finally {
|
|
1955
2209
|
clearTimeout(timer);
|
|
@@ -2008,6 +2262,8 @@ export async function createSession(opts) {
|
|
|
2008
2262
|
},
|
|
2009
2263
|
}
|
|
2010
2264
|
: undefined;
|
|
2265
|
+
const wasToollessRecoveryTurn = forceToollessRecoveryTurn;
|
|
2266
|
+
forceToollessRecoveryTurn = false;
|
|
2011
2267
|
const choice0 = resp.choices?.[0] ?? legacyChoice;
|
|
2012
2268
|
const finishReason = choice0?.finish_reason ?? 'unknown';
|
|
2013
2269
|
const msg = choice0?.message;
|
|
@@ -2058,6 +2314,10 @@ export async function createSession(opts) {
|
|
|
2058
2314
|
}
|
|
2059
2315
|
}
|
|
2060
2316
|
}
|
|
2317
|
+
if (wasToollessRecoveryTurn && toolCallsArr?.length) {
|
|
2318
|
+
// Recovery turn explicitly disables tools; ignore any stray tool-call output.
|
|
2319
|
+
toolCallsArr = undefined;
|
|
2320
|
+
}
|
|
2061
2321
|
if (cfg.verbose) {
|
|
2062
2322
|
console.warn(`[turn ${turns}] finish_reason=${finishReason} content_chars=${content.length} visible_chars=${visible.length} tool_calls=${toolCallsArr?.length ?? 0}`);
|
|
2063
2323
|
}
|
|
@@ -2136,12 +2396,17 @@ export async function createSession(opts) {
|
|
|
2136
2396
|
// narration chunk starts on a fresh line (avoids wall-of-text output).
|
|
2137
2397
|
if (visible && hookObj.onToken)
|
|
2138
2398
|
hookObj.onToken('\n');
|
|
2139
|
-
|
|
2399
|
+
const originalToolCallsArr = toolCallsArr;
|
|
2400
|
+
const preparedTurn = toolLoopGuard.prepareTurn(originalToolCallsArr);
|
|
2401
|
+
const replayByCallId = preparedTurn.replayByCallId;
|
|
2402
|
+
const parsedArgsByCallId = preparedTurn.parsedArgsByCallId;
|
|
2403
|
+
toolCallsArr = preparedTurn.uniqueCalls;
|
|
2404
|
+
toolCalls += originalToolCallsArr.length;
|
|
2140
2405
|
const assistantToolCallText = visible || '';
|
|
2141
2406
|
const compactAssistantToolCallText = assistantToolCallText.length > 900
|
|
2142
2407
|
? `${assistantToolCallText.slice(0, 900)}\n[history-compacted: assistant narration truncated before tool execution]`
|
|
2143
2408
|
: assistantToolCallText;
|
|
2144
|
-
messages.push({ role: 'assistant', content: compactAssistantToolCallText, tool_calls:
|
|
2409
|
+
messages.push({ role: 'assistant', content: compactAssistantToolCallText, tool_calls: originalToolCallsArr });
|
|
2145
2410
|
// sigCounts is scoped to the entire ask() run (see above)
|
|
2146
2411
|
// Bridge ConfirmationProvider → legacy confirm callback for tools.
|
|
2147
2412
|
// If a ConfirmationProvider is given, wrap it; otherwise fall back to raw callback.
|
|
@@ -2185,18 +2450,64 @@ export async function createSession(opts) {
|
|
|
2185
2450
|
// We only treat repeated exec as a loop if no file mutations happened since the
|
|
2186
2451
|
// last time we saw that exact exec signature.
|
|
2187
2452
|
const turnSigs = new Set();
|
|
2453
|
+
const sigMetaBySig = new Map();
|
|
2188
2454
|
for (const tc of toolCallsArr) {
|
|
2189
|
-
const
|
|
2455
|
+
const callId = resolveCallId(tc);
|
|
2456
|
+
const parsedArgs = parsedArgsByCallId.get(callId) ?? {};
|
|
2457
|
+
const sig = toolLoopGuard.computeSignature(tc.function.name, parsedArgs);
|
|
2190
2458
|
turnSigs.add(sig);
|
|
2459
|
+
if (!sigMetaBySig.has(sig)) {
|
|
2460
|
+
sigMetaBySig.set(sig, { toolName: tc.function.name, args: parsedArgs });
|
|
2461
|
+
}
|
|
2191
2462
|
}
|
|
2192
2463
|
// Repeated read-only exec calls can be served from cache instead of hard-breaking.
|
|
2193
2464
|
const repeatedReadOnlyExecSigs = new Set();
|
|
2194
2465
|
const readOnlyExecTurnHints = [];
|
|
2466
|
+
// Repeated exec calls (any kind) can replay cached results under pressure.
|
|
2467
|
+
const replayExecSigs = new Set();
|
|
2468
|
+
// Repeated read_file/read_files/list_dir calls can be served from cache.
|
|
2469
|
+
const repeatedReadFileSigs = new Set();
|
|
2470
|
+
let shouldForceToollessRecovery = false;
|
|
2471
|
+
const criticalLoopSigs = new Set();
|
|
2472
|
+
for (const tc of toolCallsArr) {
|
|
2473
|
+
const callId = resolveCallId(tc);
|
|
2474
|
+
const args = parsedArgsByCallId.get(callId) ?? {};
|
|
2475
|
+
const detected = toolLoopGuard.detect(tc.function.name, args);
|
|
2476
|
+
const warning = toolLoopGuard.formatWarning(detected, tc.function.name);
|
|
2477
|
+
if (warning) {
|
|
2478
|
+
const warningKey = `${warning.level}:${warning.detector}:${detected.signature}`;
|
|
2479
|
+
if (!toolLoopWarningKeys.has(warningKey)) {
|
|
2480
|
+
toolLoopWarningKeys.add(warningKey);
|
|
2481
|
+
await emitToolLoop({
|
|
2482
|
+
level: warning.level,
|
|
2483
|
+
detector: warning.detector,
|
|
2484
|
+
toolName: warning.toolName,
|
|
2485
|
+
count: warning.count,
|
|
2486
|
+
message: warning.message,
|
|
2487
|
+
});
|
|
2488
|
+
messages.push({
|
|
2489
|
+
role: 'system',
|
|
2490
|
+
content: `[tool-loop ${warning.level}] ${warning.message}. Stop repeating ${warning.toolName} with unchanged inputs; continue with analysis or next step.`,
|
|
2491
|
+
});
|
|
2492
|
+
}
|
|
2493
|
+
}
|
|
2494
|
+
if (toolLoopGuard.shouldDisableToolsNextTurn(detected)) {
|
|
2495
|
+
shouldForceToollessRecovery = true;
|
|
2496
|
+
criticalLoopSigs.add(detected.signature);
|
|
2497
|
+
}
|
|
2498
|
+
}
|
|
2195
2499
|
// Track whether a mutation happened since a given signature was last seen.
|
|
2196
2500
|
// (Tool-loop is single-threaded across turns; this is safe to keep in-memory.)
|
|
2197
2501
|
for (const sig of turnSigs) {
|
|
2198
2502
|
sigCounts.set(sig, (sigCounts.get(sig) ?? 0) + 1);
|
|
2199
|
-
const
|
|
2503
|
+
const sigMeta = sigMetaBySig.get(sig);
|
|
2504
|
+
const toolName = sigMeta?.toolName ?? sig.split(':')[0];
|
|
2505
|
+
if (criticalLoopSigs.has(sig)) {
|
|
2506
|
+
// Critical detector already fired for this signature; recover next turn
|
|
2507
|
+
// with tools disabled instead of throwing in per-tool hard-break logic.
|
|
2508
|
+
shouldForceToollessRecovery = true;
|
|
2509
|
+
continue;
|
|
2510
|
+
}
|
|
2200
2511
|
// For exec loops, only break if nothing changed since last identical exec.
|
|
2201
2512
|
if (toolName === 'exec') {
|
|
2202
2513
|
// If this exact exec signature was seen before, record the mutation version at that time.
|
|
@@ -2207,6 +2518,18 @@ export async function createSession(opts) {
|
|
|
2207
2518
|
mutationVersionBySig.set(sig, mutationVersion);
|
|
2208
2519
|
if (!hasMutatedSince) {
|
|
2209
2520
|
const count = sigCounts.get(sig) ?? 0;
|
|
2521
|
+
// Early replay: if this exact exec was already run (count >= 2) and
|
|
2522
|
+
// we have a cached result, replay it instead of re-executing. This
|
|
2523
|
+
// prevents the compaction death spiral where tool results get dropped,
|
|
2524
|
+
// the model forgets it ran the command, and re-runs it endlessly.
|
|
2525
|
+
// Skip read-only commands that already have their own observation cache —
|
|
2526
|
+
// those are handled by the dedicated read-only path at loopThreshold.
|
|
2527
|
+
const command = execCommandFromSig(sig);
|
|
2528
|
+
const hasReadOnlyCache = looksLikeReadOnlyExecCommand(command) && execObservationCacheBySig.has(sig);
|
|
2529
|
+
if (count >= 2 && lastExecResultBySig.has(sig) && !hasReadOnlyCache) {
|
|
2530
|
+
replayExecSigs.add(sig);
|
|
2531
|
+
continue;
|
|
2532
|
+
}
|
|
2210
2533
|
let loopThreshold = harness.quirks.loopsOnToolError ? 3 : 6;
|
|
2211
2534
|
// If the cached observation already tells the model "no matches found",
|
|
2212
2535
|
// break much earlier — the model is ignoring the hint.
|
|
@@ -2219,7 +2542,8 @@ export async function createSession(opts) {
|
|
|
2219
2542
|
await injectVaultContext().catch(() => { });
|
|
2220
2543
|
}
|
|
2221
2544
|
if (count >= loopThreshold) {
|
|
2222
|
-
const
|
|
2545
|
+
const sigArgs = sigMetaBySig.get(sig)?.args ?? {};
|
|
2546
|
+
const command = typeof sigArgs?.command === 'string' ? String(sigArgs.command) : '';
|
|
2223
2547
|
const canReuseReadOnlyObservation = looksLikeReadOnlyExecCommand(command) &&
|
|
2224
2548
|
execObservationCacheBySig.has(sig);
|
|
2225
2549
|
if (canReuseReadOnlyObservation) {
|
|
@@ -2230,8 +2554,8 @@ export async function createSession(opts) {
|
|
|
2230
2554
|
}
|
|
2231
2555
|
continue;
|
|
2232
2556
|
}
|
|
2233
|
-
const
|
|
2234
|
-
const argsPreview =
|
|
2557
|
+
const argsPreviewRaw = JSON.stringify(sigArgs);
|
|
2558
|
+
const argsPreview = argsPreviewRaw.length > 220 ? argsPreviewRaw.slice(0, 220) + '…' : argsPreviewRaw;
|
|
2235
2559
|
throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
|
|
2236
2560
|
`args=${argsPreview}`);
|
|
2237
2561
|
}
|
|
@@ -2250,7 +2574,9 @@ export async function createSession(opts) {
|
|
|
2250
2574
|
consecutiveCounts.set(sig, 1);
|
|
2251
2575
|
}
|
|
2252
2576
|
const consec = consecutiveCounts.get(sig) ?? 1;
|
|
2253
|
-
|
|
2577
|
+
const isReadFileTool = READ_FILE_CACHE_TOOLS.has(toolName);
|
|
2578
|
+
const hardBreakAt = isReadFileTool ? 6 : 4;
|
|
2579
|
+
// At 3x, inject vault context and first warning
|
|
2254
2580
|
if (consec >= 3) {
|
|
2255
2581
|
await injectVaultContext().catch(() => { });
|
|
2256
2582
|
if (consec === 3) {
|
|
@@ -2272,18 +2598,45 @@ export async function createSession(opts) {
|
|
|
2272
2598
|
}
|
|
2273
2599
|
}
|
|
2274
2600
|
}
|
|
2275
|
-
//
|
|
2276
|
-
if (consec >=
|
|
2277
|
-
|
|
2278
|
-
|
|
2601
|
+
// At 2x, serve from cache if available AND inject final warning
|
|
2602
|
+
if (consec >= 2 && isReadFileTool) {
|
|
2603
|
+
if (consec === 4) {
|
|
2604
|
+
let resourceType = 'resource';
|
|
2605
|
+
if (toolName === 'read_file')
|
|
2606
|
+
resourceType = 'file';
|
|
2607
|
+
else if (toolName === 'read_files')
|
|
2608
|
+
resourceType = 'files';
|
|
2609
|
+
else if (toolName === 'list_dir')
|
|
2610
|
+
resourceType = 'directory';
|
|
2611
|
+
messages.push({
|
|
2612
|
+
role: 'system',
|
|
2613
|
+
content: `CRITICAL: DO NOT make another identical call for this ${resourceType}. It HAS NOT CHANGED. You already have the content. Move on to the NEXT step NOW.`,
|
|
2614
|
+
});
|
|
2615
|
+
}
|
|
2616
|
+
const argsForSig = sigMetaBySig.get(sig)?.args ?? {};
|
|
2617
|
+
const replay = await toolLoopGuard.getReadCacheReplay(toolName, argsForSig, ctx.cwd);
|
|
2618
|
+
if (replay) {
|
|
2619
|
+
repeatedReadFileSigs.add(sig);
|
|
2620
|
+
continue;
|
|
2621
|
+
}
|
|
2622
|
+
}
|
|
2623
|
+
// Deterministic recovery at threshold (no hard throw): force one no-tools turn.
|
|
2624
|
+
if (consec >= hardBreakAt) {
|
|
2625
|
+
shouldForceToollessRecovery = true;
|
|
2626
|
+
messages.push({
|
|
2627
|
+
role: 'system',
|
|
2628
|
+
content: `[tool-loop critical] ${toolName} repeated ${consec}x with unchanged inputs. ` +
|
|
2629
|
+
'Next turn will run with tools disabled so you must use existing results and provide a concrete next step/final response.',
|
|
2630
|
+
});
|
|
2279
2631
|
}
|
|
2280
2632
|
continue;
|
|
2281
2633
|
}
|
|
2282
2634
|
// Default behavior for mutating/other tools: break on repeated identical signature.
|
|
2283
2635
|
const loopThreshold = harness.quirks.loopsOnToolError ? 2 : 3;
|
|
2284
2636
|
if ((sigCounts.get(sig) ?? 0) >= loopThreshold) {
|
|
2285
|
-
const
|
|
2286
|
-
const
|
|
2637
|
+
const argsObj = sigMetaBySig.get(sig)?.args ?? {};
|
|
2638
|
+
const argsRaw = JSON.stringify(argsObj);
|
|
2639
|
+
const argsPreview = argsRaw.length > 220 ? argsRaw.slice(0, 220) + '…' : argsRaw;
|
|
2287
2640
|
throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
|
|
2288
2641
|
`args=${argsPreview}\n` +
|
|
2289
2642
|
`Hint: you repeated the same tool call ${loopThreshold} times with identical arguments. ` +
|
|
@@ -2294,6 +2647,31 @@ export async function createSession(opts) {
|
|
|
2294
2647
|
}
|
|
2295
2648
|
// Update consecutive tracking: save this turn's signatures for next turn comparison.
|
|
2296
2649
|
lastTurnSigs = turnSigs;
|
|
2650
|
+
if (shouldForceToollessRecovery) {
|
|
2651
|
+
if (!toollessRecoveryUsed) {
|
|
2652
|
+
forceToollessRecoveryTurn = true;
|
|
2653
|
+
toollessRecoveryUsed = true;
|
|
2654
|
+
messages.push({
|
|
2655
|
+
role: 'user',
|
|
2656
|
+
content: '[system] Critical tool loop detected. Next turn will run with tools disabled. ' +
|
|
2657
|
+
'Use already available tool results to provide a concrete next step or final response; do not request more tools.',
|
|
2658
|
+
});
|
|
2659
|
+
await emitTurnEnd({
|
|
2660
|
+
turn: turns,
|
|
2661
|
+
toolCalls,
|
|
2662
|
+
promptTokens: cumulativeUsage.prompt,
|
|
2663
|
+
completionTokens: cumulativeUsage.completion,
|
|
2664
|
+
promptTokensTurn,
|
|
2665
|
+
completionTokensTurn,
|
|
2666
|
+
ttftMs,
|
|
2667
|
+
ttcMs,
|
|
2668
|
+
ppTps,
|
|
2669
|
+
tgTps,
|
|
2670
|
+
});
|
|
2671
|
+
continue;
|
|
2672
|
+
}
|
|
2673
|
+
throw new AgentLoopBreak('critical tool-loop persisted after one tools-disabled recovery turn. Stopping to avoid infinite loop.');
|
|
2674
|
+
}
|
|
2297
2675
|
const runOne = async (tc) => {
|
|
2298
2676
|
const name = tc.function.name;
|
|
2299
2677
|
const rawArgs = tc.function.arguments ?? '{}';
|
|
@@ -2323,6 +2701,7 @@ export async function createSession(opts) {
|
|
|
2323
2701
|
throw new Error(`unknown tool: ${name}`);
|
|
2324
2702
|
// Keep parsed args by call-id so we can digest/archive tool outputs with context.
|
|
2325
2703
|
toolArgsByCallId.set(callId, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
|
|
2704
|
+
toolLoopGuard.registerCall(name, args && typeof args === 'object' && !Array.isArray(args) ? args : {}, callId);
|
|
2326
2705
|
// Pre-dispatch argument validation.
|
|
2327
2706
|
// - Required params
|
|
2328
2707
|
// - Type/range/enums
|
|
@@ -2427,9 +2806,10 @@ export async function createSession(opts) {
|
|
|
2427
2806
|
return { id: callId, content: '[skipped by user: step mode]' };
|
|
2428
2807
|
}
|
|
2429
2808
|
}
|
|
2430
|
-
const sig =
|
|
2809
|
+
const sig = toolLoopGuard.computeSignature(name, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
|
|
2431
2810
|
let content = '';
|
|
2432
2811
|
let reusedCachedReadOnlyExec = false;
|
|
2812
|
+
let reusedCachedReadTool = false;
|
|
2433
2813
|
if (name === 'exec' && repeatedReadOnlyExecSigs.has(sig)) {
|
|
2434
2814
|
const cached = execObservationCacheBySig.get(sig);
|
|
2435
2815
|
if (cached) {
|
|
@@ -2437,7 +2817,22 @@ export async function createSession(opts) {
|
|
|
2437
2817
|
reusedCachedReadOnlyExec = true;
|
|
2438
2818
|
}
|
|
2439
2819
|
}
|
|
2440
|
-
|
|
2820
|
+
// Replay any exec result (read-only or not) when the loop detector flagged it.
|
|
2821
|
+
if (name === 'exec' && !reusedCachedReadOnlyExec && replayExecSigs.has(sig)) {
|
|
2822
|
+
const cached = lastExecResultBySig.get(sig);
|
|
2823
|
+
if (cached) {
|
|
2824
|
+
content = withReplayedExecHint(cached);
|
|
2825
|
+
reusedCachedReadOnlyExec = true; // skip re-execution below
|
|
2826
|
+
}
|
|
2827
|
+
}
|
|
2828
|
+
if (READ_FILE_CACHE_TOOLS.has(name) && repeatedReadFileSigs.has(sig)) {
|
|
2829
|
+
const replay = await toolLoopGuard.getReadCacheReplay(name, args, ctx.cwd);
|
|
2830
|
+
if (replay) {
|
|
2831
|
+
content = replay;
|
|
2832
|
+
reusedCachedReadTool = true;
|
|
2833
|
+
}
|
|
2834
|
+
}
|
|
2835
|
+
if (!reusedCachedReadOnlyExec && !reusedCachedReadTool) {
|
|
2441
2836
|
if (isSpawnTask) {
|
|
2442
2837
|
content = await runSpawnTask(args);
|
|
2443
2838
|
}
|
|
@@ -2450,9 +2845,16 @@ export async function createSession(opts) {
|
|
|
2450
2845
|
};
|
|
2451
2846
|
const value = await builtInFn(callCtx, args);
|
|
2452
2847
|
content = typeof value === 'string' ? value : JSON.stringify(value);
|
|
2848
|
+
if (READ_FILE_CACHE_TOOLS.has(name) && typeof content === 'string' && !content.startsWith('ERROR:')) {
|
|
2849
|
+
const baseCwd = typeof args?.cwd === 'string' ? String(args.cwd) : ctx.cwd;
|
|
2850
|
+
await toolLoopGuard.storeReadCache(name, args, baseCwd, content);
|
|
2851
|
+
}
|
|
2453
2852
|
if (name === 'exec') {
|
|
2454
2853
|
// Successful exec clears blocked-loop counters.
|
|
2455
2854
|
blockedExecAttemptsBySig.clear();
|
|
2855
|
+
// Cache every exec result so repeated calls under context pressure
|
|
2856
|
+
// can replay the result instead of re-executing.
|
|
2857
|
+
lastExecResultBySig.set(sig, content);
|
|
2456
2858
|
const cmd = String(args?.command ?? '');
|
|
2457
2859
|
if (looksLikeReadOnlyExecCommand(cmd) && readOnlyExecCacheable(content)) {
|
|
2458
2860
|
execObservationCacheBySig.set(sig, content);
|
|
@@ -2591,6 +2993,10 @@ export async function createSession(opts) {
|
|
|
2591
2993
|
}
|
|
2592
2994
|
}
|
|
2593
2995
|
}
|
|
2996
|
+
toolLoopGuard.registerOutcome(name, args, {
|
|
2997
|
+
toolCallId: callId,
|
|
2998
|
+
result: content,
|
|
2999
|
+
});
|
|
2594
3000
|
return { id: callId, content };
|
|
2595
3001
|
};
|
|
2596
3002
|
const results = [];
|
|
@@ -2611,7 +3017,8 @@ export async function createSession(opts) {
|
|
|
2611
3017
|
// Applies to direct exec attempts and spawn_task delegation attempts.
|
|
2612
3018
|
if (tc.function.name === 'exec' || tc.function.name === 'spawn_task') {
|
|
2613
3019
|
const blockedMatch = msg.match(/^exec:\s*blocked\s*\(([^)]+)\)\s*without --no-confirm\/--yolo:\s*(.*)$/i)
|
|
2614
|
-
|| msg.match(/^(spawn_task):\s*blocked\s*—\s*(.*)$/i)
|
|
3020
|
+
|| msg.match(/^(spawn_task):\s*blocked\s*—\s*(.*)$/i)
|
|
3021
|
+
|| msg.match(/^exec:\s*blocked\s+(background command\b[^.]*)\./i);
|
|
2615
3022
|
if (blockedMatch) {
|
|
2616
3023
|
const reason = (blockedMatch[1] || blockedMatch[2] || 'blocked command').trim();
|
|
2617
3024
|
let parsedArgs = {};
|
|
@@ -2623,7 +3030,8 @@ export async function createSession(opts) {
|
|
|
2623
3030
|
? String(parsedArgs?.command ?? '')
|
|
2624
3031
|
: String(parsedArgs?.task ?? '');
|
|
2625
3032
|
const normalizedReason = reason.toLowerCase();
|
|
2626
|
-
const aggregateByReason = normalizedReason.includes('package install/remove')
|
|
3033
|
+
const aggregateByReason = normalizedReason.includes('package install/remove')
|
|
3034
|
+
|| normalizedReason.includes('background command');
|
|
2627
3035
|
const sig = aggregateByReason
|
|
2628
3036
|
? `${tc.function.name}|${reason}`
|
|
2629
3037
|
: `${tc.function.name}|${reason}|${cmd}`;
|
|
@@ -2645,6 +3053,20 @@ export async function createSession(opts) {
|
|
|
2645
3053
|
retryable: te.retryable,
|
|
2646
3054
|
result: toolErrorContent,
|
|
2647
3055
|
});
|
|
3056
|
+
let parsedArgs = {};
|
|
3057
|
+
try {
|
|
3058
|
+
const parsed = JSON.parse(tc.function.arguments ?? '{}');
|
|
3059
|
+
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
|
3060
|
+
parsedArgs = parsed;
|
|
3061
|
+
}
|
|
3062
|
+
}
|
|
3063
|
+
catch {
|
|
3064
|
+
// keep empty object
|
|
3065
|
+
}
|
|
3066
|
+
toolLoopGuard.registerOutcome(tc.function.name, parsedArgs, {
|
|
3067
|
+
toolCallId: callId,
|
|
3068
|
+
error: msg,
|
|
3069
|
+
});
|
|
2648
3070
|
return { id: callId, content: toolErrorContent };
|
|
2649
3071
|
};
|
|
2650
3072
|
// ── Anti-scan guardrails (§ read budget, dir scan, same-search) ──
|
|
@@ -2708,6 +3130,19 @@ export async function createSession(opts) {
|
|
|
2708
3130
|
}
|
|
2709
3131
|
}
|
|
2710
3132
|
}
|
|
3133
|
+
if (replayByCallId.size > 0) {
|
|
3134
|
+
const canonicalById = new Map(results.map((r) => [r.id, r.content]));
|
|
3135
|
+
for (const [dupId, canonicalId] of replayByCallId.entries()) {
|
|
3136
|
+
const canonical = canonicalById.get(canonicalId);
|
|
3137
|
+
if (canonical == null)
|
|
3138
|
+
continue;
|
|
3139
|
+
results.push({
|
|
3140
|
+
id: dupId,
|
|
3141
|
+
content: `[idlehands dedupe] Identical tool call replayed from ${canonicalId}. ` +
|
|
3142
|
+
'Use that earlier tool result; no new execution was performed.',
|
|
3143
|
+
});
|
|
3144
|
+
}
|
|
3145
|
+
}
|
|
2711
3146
|
// Bail immediately if cancelled during tool execution
|
|
2712
3147
|
if (ac.signal.aborted)
|
|
2713
3148
|
break;
|
|
@@ -2755,6 +3190,8 @@ export async function createSession(opts) {
|
|
|
2755
3190
|
'Do not narrate. Fix required/mistyped fields and unknown keys.',
|
|
2756
3191
|
});
|
|
2757
3192
|
}
|
|
3193
|
+
// Update session-level tool loop stats for observability
|
|
3194
|
+
lastToolLoopStats = toolLoopGuard.getStats();
|
|
2758
3195
|
// Hook: onTurnEnd (Phase 8.5)
|
|
2759
3196
|
await emitTurnEnd({
|
|
2760
3197
|
turn: turns,
|
|
@@ -2928,6 +3365,7 @@ export async function createSession(opts) {
|
|
|
2928
3365
|
listModels,
|
|
2929
3366
|
refreshServerHealth,
|
|
2930
3367
|
getPerfSummary,
|
|
3368
|
+
getToolLoopStats: () => lastToolLoopStats,
|
|
2931
3369
|
captureOn,
|
|
2932
3370
|
captureOff,
|
|
2933
3371
|
captureLast,
|
|
@@ -2965,6 +3403,9 @@ export async function createSession(opts) {
|
|
|
2965
3403
|
get planSteps() {
|
|
2966
3404
|
return planSteps;
|
|
2967
3405
|
},
|
|
3406
|
+
get compactionStats() {
|
|
3407
|
+
return { ...compactionStats };
|
|
3408
|
+
},
|
|
2968
3409
|
executePlanStep,
|
|
2969
3410
|
clearPlan,
|
|
2970
3411
|
compactHistory
|