@visorcraft/idlehands 1.1.17 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/formatting.js +30 -13
- package/dist/agent/formatting.js.map +1 -1
- package/dist/agent/review-artifact.js +12 -8
- package/dist/agent/review-artifact.js.map +1 -1
- package/dist/agent/tool-calls.js +57 -20
- package/dist/agent/tool-calls.js.map +1 -1
- package/dist/agent/tool-loop-detection.js +310 -0
- package/dist/agent/tool-loop-detection.js.map +1 -0
- package/dist/agent/tool-loop-guard.js +251 -0
- package/dist/agent/tool-loop-guard.js.map +1 -0
- package/dist/agent.js +460 -144
- package/dist/agent.js.map +1 -1
- package/dist/anton/controller.js +46 -30
- package/dist/anton/controller.js.map +1 -1
- package/dist/anton/lock.js +5 -1
- package/dist/anton/lock.js.map +1 -1
- package/dist/anton/parser.js +18 -19
- package/dist/anton/parser.js.map +1 -1
- package/dist/anton/prompt.js +42 -11
- package/dist/anton/prompt.js.map +1 -1
- package/dist/anton/reporter.js.map +1 -1
- package/dist/anton/session.js.map +1 -1
- package/dist/anton/verifier.js +3 -5
- package/dist/anton/verifier.js.map +1 -1
- package/dist/bench/compare.js +53 -20
- package/dist/bench/compare.js.map +1 -1
- package/dist/bench/openclaw.js +4 -4
- package/dist/bench/openclaw.js.map +1 -1
- package/dist/bench/report.js +11 -3
- package/dist/bench/report.js.map +1 -1
- package/dist/bench/runner.js +20 -14
- package/dist/bench/runner.js.map +1 -1
- package/dist/bot/commands.js +65 -31
- package/dist/bot/commands.js.map +1 -1
- package/dist/bot/confirm-discord.js +32 -9
- package/dist/bot/confirm-discord.js.map +1 -1
- package/dist/bot/confirm-telegram.js +26 -10
- package/dist/bot/confirm-telegram.js.map +1 -1
- package/dist/bot/dir-guard.js +18 -3
- package/dist/bot/dir-guard.js.map +1 -1
- package/dist/bot/discord-routing.js +28 -4
- package/dist/bot/discord-routing.js.map +1 -1
- package/dist/bot/discord-streaming.js +3 -3
- package/dist/bot/discord-streaming.js.map +1 -1
- package/dist/bot/discord.js +82 -37
- package/dist/bot/discord.js.map +1 -1
- package/dist/bot/escalation.js +124 -0
- package/dist/bot/escalation.js.map +1 -0
- package/dist/bot/format.js +2 -5
- package/dist/bot/format.js.map +1 -1
- package/dist/bot/session-manager.js +17 -6
- package/dist/bot/session-manager.js.map +1 -1
- package/dist/bot/telegram.js +88 -28
- package/dist/bot/telegram.js.map +1 -1
- package/dist/cli/agent-turn.js +10 -4
- package/dist/cli/agent-turn.js.map +1 -1
- package/dist/cli/args.js +51 -9
- package/dist/cli/args.js.map +1 -1
- package/dist/cli/bot.js +19 -9
- package/dist/cli/bot.js.map +1 -1
- package/dist/cli/build-repl-context.js +60 -26
- package/dist/cli/build-repl-context.js.map +1 -1
- package/dist/cli/command-registry.js.map +1 -1
- package/dist/cli/commands/anton.js +5 -3
- package/dist/cli/commands/anton.js.map +1 -1
- package/dist/cli/commands/editing.js +27 -12
- package/dist/cli/commands/editing.js.map +1 -1
- package/dist/cli/commands/model.js +16 -7
- package/dist/cli/commands/model.js.map +1 -1
- package/dist/cli/commands/project.js +52 -17
- package/dist/cli/commands/project.js.map +1 -1
- package/dist/cli/commands/runtime.js +1 -1
- package/dist/cli/commands/runtime.js.map +1 -1
- package/dist/cli/commands/secrets.js +279 -0
- package/dist/cli/commands/secrets.js.map +1 -0
- package/dist/cli/commands/session.js +49 -1
- package/dist/cli/commands/session.js.map +1 -1
- package/dist/cli/commands/tools.js +3 -1
- package/dist/cli/commands/tools.js.map +1 -1
- package/dist/cli/commands/trifecta.js +1 -1
- package/dist/cli/commands/trifecta.js.map +1 -1
- package/dist/cli/commands/tui.js.map +1 -1
- package/dist/cli/init.js +50 -16
- package/dist/cli/init.js.map +1 -1
- package/dist/cli/input.js +25 -7
- package/dist/cli/input.js.map +1 -1
- package/dist/cli/oneshot.js +31 -19
- package/dist/cli/oneshot.js.map +1 -1
- package/dist/cli/repl-dispatch.js +10 -6
- package/dist/cli/repl-dispatch.js.map +1 -1
- package/dist/cli/runtime-cmds.js +110 -46
- package/dist/cli/runtime-cmds.js.map +1 -1
- package/dist/cli/service.js +3 -3
- package/dist/cli/service.js.map +1 -1
- package/dist/cli/session-state.js +12 -5
- package/dist/cli/session-state.js.map +1 -1
- package/dist/cli/setup.js +86 -33
- package/dist/cli/setup.js.map +1 -1
- package/dist/cli/shell.js +4 -4
- package/dist/cli/shell.js.map +1 -1
- package/dist/cli/status.js +56 -12
- package/dist/cli/status.js.map +1 -1
- package/dist/client.js +40 -21
- package/dist/client.js.map +1 -1
- package/dist/commands.js +1 -1
- package/dist/commands.js.map +1 -1
- package/dist/config.js +171 -15
- package/dist/config.js.map +1 -1
- package/dist/confirm/auto.js.map +1 -1
- package/dist/confirm/headless.js +13 -2
- package/dist/confirm/headless.js.map +1 -1
- package/dist/confirm/terminal.js +1 -5
- package/dist/confirm/terminal.js.map +1 -1
- package/dist/context.js +9 -3
- package/dist/context.js.map +1 -1
- package/dist/git.js +56 -61
- package/dist/git.js.map +1 -1
- package/dist/harnesses.js +137 -37
- package/dist/harnesses.js.map +1 -1
- package/dist/history.js +12 -4
- package/dist/history.js.map +1 -1
- package/dist/hooks/index.js +2 -2
- package/dist/hooks/index.js.map +1 -1
- package/dist/hooks/loader.js +6 -5
- package/dist/hooks/loader.js.map +1 -1
- package/dist/hooks/manager.js.map +1 -1
- package/dist/hooks/plugins/example-console.js.map +1 -1
- package/dist/hooks/scaffold.js +8 -6
- package/dist/hooks/scaffold.js.map +1 -1
- package/dist/index.js +120 -66
- package/dist/index.js.map +1 -1
- package/dist/indexer.js +6 -18
- package/dist/indexer.js.map +1 -1
- package/dist/jsonrpc.js.map +1 -1
- package/dist/lens.js +38 -16
- package/dist/lens.js.map +1 -1
- package/dist/lsp.js +60 -24
- package/dist/lsp.js.map +1 -1
- package/dist/markdown.js +6 -6
- package/dist/markdown.js.map +1 -1
- package/dist/mcp.js +15 -6
- package/dist/mcp.js.map +1 -1
- package/dist/model-customization.js +7 -3
- package/dist/model-customization.js.map +1 -1
- package/dist/progress/message-edit-scheduler.js +15 -3
- package/dist/progress/message-edit-scheduler.js.map +1 -1
- package/dist/progress/progress-message-renderer.js.map +1 -1
- package/dist/progress/progress-presenter.js +3 -3
- package/dist/progress/progress-presenter.js.map +1 -1
- package/dist/progress/serialize-telegram.js.map +1 -1
- package/dist/progress/tool-summary.js +3 -1
- package/dist/progress/tool-summary.js.map +1 -1
- package/dist/progress/turn-progress.js +3 -1
- package/dist/progress/turn-progress.js.map +1 -1
- package/dist/recovery.js +11 -3
- package/dist/recovery.js.map +1 -1
- package/dist/replay.js +9 -3
- package/dist/replay.js.map +1 -1
- package/dist/replay_cli.js +5 -3
- package/dist/replay_cli.js.map +1 -1
- package/dist/runtime/executor.js +66 -20
- package/dist/runtime/executor.js.map +1 -1
- package/dist/runtime/health.js.map +1 -1
- package/dist/runtime/host-runner.js +103 -0
- package/dist/runtime/host-runner.js.map +1 -0
- package/dist/runtime/planner.js +3 -1
- package/dist/runtime/planner.js.map +1 -1
- package/dist/runtime/secrets.js +102 -0
- package/dist/runtime/secrets.js.map +1 -0
- package/dist/runtime/store.js +95 -19
- package/dist/runtime/store.js.map +1 -1
- package/dist/safety.js +38 -21
- package/dist/safety.js.map +1 -1
- package/dist/spinner.js +7 -8
- package/dist/spinner.js.map +1 -1
- package/dist/sys/context.js +3 -3
- package/dist/sys/context.js.map +1 -1
- package/dist/term.js +1 -1
- package/dist/term.js.map +1 -1
- package/dist/themes.js +11 -5
- package/dist/themes.js.map +1 -1
- package/dist/tools/tool-error.js +2 -5
- package/dist/tools/tool-error.js.map +1 -1
- package/dist/tools.js +69 -34
- package/dist/tools.js.map +1 -1
- package/dist/tui/branch-picker.js +9 -3
- package/dist/tui/branch-picker.js.map +1 -1
- package/dist/tui/command-handler.js +88 -36
- package/dist/tui/command-handler.js.map +1 -1
- package/dist/tui/confirm.js.map +1 -1
- package/dist/tui/controller.js +234 -117
- package/dist/tui/controller.js.map +1 -1
- package/dist/tui/event-bridge.js.map +1 -1
- package/dist/tui/keymap.js +93 -71
- package/dist/tui/keymap.js.map +1 -1
- package/dist/tui/layout.js +9 -1
- package/dist/tui/layout.js.map +1 -1
- package/dist/tui/render.js +17 -5
- package/dist/tui/render.js.map +1 -1
- package/dist/tui/screen.js.map +1 -1
- package/dist/tui/state.js +129 -63
- package/dist/tui/state.js.map +1 -1
- package/dist/tui/theme.js +12 -3
- package/dist/tui/theme.js.map +1 -1
- package/dist/upgrade.js +28 -15
- package/dist/upgrade.js.map +1 -1
- package/dist/utils.js +8 -5
- package/dist/utils.js.map +1 -1
- package/dist/vault.js +48 -12
- package/dist/vault.js.map +1 -1
- package/dist/vim.js.map +1 -1
- package/package.json +11 -2
package/dist/agent.js
CHANGED
|
@@ -17,6 +17,7 @@ import { LspManager, detectInstalledLspServers } from './lsp.js';
|
|
|
17
17
|
import { generateMinimalDiff, toolResultSummary, execCommandFromSig, formatDurationMs, looksLikePlanningNarration, capTextByApproxTokens, isLikelyBinaryBuffer, sanitizePathsInMessage, digestToolResult, } from './agent/formatting.js';
|
|
18
18
|
import { parseToolCallsFromContent, getMissingRequiredParams, getArgValidationIssues, stripMarkdownFences } from './agent/tool-calls.js';
|
|
19
19
|
import { ToolError, ValidationError } from './tools/tool-error.js';
|
|
20
|
+
import { ToolLoopGuard } from './agent/tool-loop-guard.js';
|
|
20
21
|
export { parseToolCallsFromContent };
|
|
21
22
|
import { reviewArtifactKeys, looksLikeCodeReviewRequest, looksLikeReviewRetrievalRequest, retrievalAllowsStaleArtifact, parseReviewArtifactStalePolicy, parseReviewArtifact, reviewArtifactStaleReason, gitHead, normalizeModelsResponse, } from './agent/review-artifact.js';
|
|
22
23
|
import fs from 'node:fs/promises';
|
|
@@ -27,9 +28,13 @@ function makeAbortController() {
|
|
|
27
28
|
return new AbortController();
|
|
28
29
|
}
|
|
29
30
|
const CACHED_EXEC_OBSERVATION_HINT = '[idlehands hint] Reused cached output for repeated read-only exec call (unchanged observation).';
|
|
30
|
-
const CACHED_READ_OBSERVATION_HINT = '[idlehands hint] Reused cached output for repeated identical read call.';
|
|
31
31
|
function looksLikeReadOnlyExecCommand(command) {
|
|
32
|
-
|
|
32
|
+
// Strip leading `cd <path> &&` / `cd <path>;` prefixes — cd is read-only
|
|
33
|
+
// navigation, the actual command that matters comes after.
|
|
34
|
+
let cmd = String(command || '').trim().toLowerCase();
|
|
35
|
+
if (!cmd)
|
|
36
|
+
return false;
|
|
37
|
+
cmd = cmd.replace(/^(\s*cd\s+[^;&|]+\s*(?:&&|;)\s*)+/i, '').trim();
|
|
33
38
|
if (!cmd)
|
|
34
39
|
return false;
|
|
35
40
|
// Shell redirects are likely writes.
|
|
@@ -99,6 +104,25 @@ function withCachedExecObservationHint(content) {
|
|
|
99
104
|
return `${content}\n${CACHED_EXEC_OBSERVATION_HINT}`;
|
|
100
105
|
}
|
|
101
106
|
}
|
|
107
|
+
const REPLAYED_EXEC_HINT = '[idlehands hint] You already ran this exact command. This is the replayed result from your previous execution. Do NOT re-run it — use the output below to continue your task.';
|
|
108
|
+
function withReplayedExecHint(content) {
|
|
109
|
+
if (!content)
|
|
110
|
+
return content;
|
|
111
|
+
try {
|
|
112
|
+
const parsed = JSON.parse(content);
|
|
113
|
+
const out = typeof parsed?.out === 'string' ? parsed.out : '';
|
|
114
|
+
if (out.includes(REPLAYED_EXEC_HINT))
|
|
115
|
+
return content;
|
|
116
|
+
parsed.out = out ? `${REPLAYED_EXEC_HINT}\n${out}` : REPLAYED_EXEC_HINT;
|
|
117
|
+
parsed.replayed = true;
|
|
118
|
+
return JSON.stringify(parsed);
|
|
119
|
+
}
|
|
120
|
+
catch {
|
|
121
|
+
if (content.includes(REPLAYED_EXEC_HINT))
|
|
122
|
+
return content;
|
|
123
|
+
return `${REPLAYED_EXEC_HINT}\n${content}`;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
102
126
|
function readOnlyExecCacheable(content) {
|
|
103
127
|
try {
|
|
104
128
|
const parsed = JSON.parse(content);
|
|
@@ -109,18 +133,6 @@ function readOnlyExecCacheable(content) {
|
|
|
109
133
|
return false;
|
|
110
134
|
}
|
|
111
135
|
}
|
|
112
|
-
function withCachedReadObservationHint(content) {
|
|
113
|
-
if (!content)
|
|
114
|
-
return CACHED_READ_OBSERVATION_HINT;
|
|
115
|
-
if (content.includes(CACHED_READ_OBSERVATION_HINT))
|
|
116
|
-
return content;
|
|
117
|
-
// Keep cached read replay lightweight to avoid re-inflating context.
|
|
118
|
-
const lines = String(content).split(/\r?\n/);
|
|
119
|
-
const previewLines = lines.slice(0, 12);
|
|
120
|
-
const omitted = Math.max(0, lines.length - previewLines.length);
|
|
121
|
-
const trailer = omitted > 0 ? `\n# ... (${omitted} more lines omitted; use previous identical read result)` : '';
|
|
122
|
-
return `${CACHED_READ_OBSERVATION_HINT}\n${previewLines.join('\n')}${trailer}`;
|
|
123
|
-
}
|
|
124
136
|
function ensureInformativeAssistantText(text, ctx) {
|
|
125
137
|
if (String(text ?? '').trim())
|
|
126
138
|
return text;
|
|
@@ -155,6 +167,9 @@ Rules:
|
|
|
155
167
|
- Use read_file with search=... to jump to relevant code; avoid reading whole files.
|
|
156
168
|
- Never call read_file/read_files/list_dir twice in a row with identical arguments (same path/options). Reuse the previous result instead.
|
|
157
169
|
- Prefer apply_patch or edit_range for code edits (token-efficient). Use edit_file only when exact old_text replacement is necessary.
|
|
170
|
+
- Tool-call arguments MUST be strict JSON (double-quoted keys/strings, no comments, no trailing commas).
|
|
171
|
+
- edit_range example: {"path":"src/foo.ts","start_line":10,"end_line":14,"replacement":"line A\nline B"}
|
|
172
|
+
- apply_patch example: {"patch":"--- a/src/foo.ts\n+++ b/src/foo.ts\n@@ -10,2 +10,2 @@\n-old\n+new","files":["src/foo.ts"]}
|
|
158
173
|
- write_file is for new files or explicit full rewrites only. Existing non-empty files require overwrite=true/force=true.
|
|
159
174
|
- Use insert_file for insertions (prepend/append/line).
|
|
160
175
|
- Use exec to run commands, tests, builds; check results before reporting success.
|
|
@@ -336,7 +351,7 @@ function buildToolsSchema(opts) {
|
|
|
336
351
|
type: 'function',
|
|
337
352
|
function: {
|
|
338
353
|
name: 'apply_patch',
|
|
339
|
-
description: 'Apply unified diff patch (multi-file).',
|
|
354
|
+
description: 'Apply unified diff patch (multi-file).\n\nUSAGE EXAMPLE:\n apply_patch({\n patch: "--- a/src/file.ts\\n+++ b/src/file.ts\\n@@ -1,5 +1,5 @@\\n-old text\\n+new text\\n",\n files: ["src/file.ts"]\n })\n\nThe patch must be valid unified diff text. Tool-call arguments must be valid JSON. Use strip=1 if paths include directory prefixes.\nFiles listed must match the paths in the diff.',
|
|
340
355
|
parameters: obj({
|
|
341
356
|
patch: str(),
|
|
342
357
|
files: { type: 'array', items: str() },
|
|
@@ -348,7 +363,7 @@ function buildToolsSchema(opts) {
|
|
|
348
363
|
type: 'function',
|
|
349
364
|
function: {
|
|
350
365
|
name: 'edit_range',
|
|
351
|
-
description: 'Replace a line range in a file.',
|
|
366
|
+
description: 'Replace a line range in a file.\n\nUSAGE EXAMPLE:\n edit_range({\n path: "src/file.ts",\n start_line: 10,\n end_line: 15,\n replacement: "new content\\nmore content"\n })\n\n- start_line and end_line are 1-indexed (first line is 1, not 0)\n- To delete lines, set replacement to empty string ""\n- To insert at a position, set start_line and end_line to the same value\n- Tool-call arguments must be valid JSON (double quotes, no trailing commas/comments)\n- The replacement text replaces the entire range inclusive',
|
|
352
367
|
parameters: obj({
|
|
353
368
|
path: str(),
|
|
354
369
|
start_line: int(1),
|
|
@@ -427,6 +442,11 @@ function buildToolsSchema(opts) {
|
|
|
427
442
|
if (opts?.activeVaultTools) {
|
|
428
443
|
schemas.push({ type: 'function', function: { name: 'vault_search', description: 'Search vault.', parameters: obj({ query: str(), limit: int() }, ['query']) } }, { type: 'function', function: { name: 'vault_note', description: 'Write vault note.', parameters: obj({ key: str(), value: str() }, ['key', 'value']) } });
|
|
429
444
|
}
|
|
445
|
+
else if (opts?.passiveVault) {
|
|
446
|
+
// In passive mode, expose vault_search (read-only) so the model can recover
|
|
447
|
+
// compacted context on demand, but don't expose vault_note (write).
|
|
448
|
+
schemas.push({ type: 'function', function: { name: 'vault_search', description: 'Search vault memory for earlier context that was compacted away. Use sparingly — only when you need to recall specific details from earlier in the conversation.', parameters: obj({ query: str(), limit: int() }, ['query']) } });
|
|
449
|
+
}
|
|
430
450
|
// Phase 9: sys_context tool is only available in sys mode.
|
|
431
451
|
if (opts?.sysMode) {
|
|
432
452
|
schemas.push(SYS_CONTEXT_SCHEMA);
|
|
@@ -671,6 +691,7 @@ export async function createSession(opts) {
|
|
|
671
691
|
let mcpToolsLoaded = !mcpLazySchemaMode;
|
|
672
692
|
const getToolsSchema = () => buildToolsSchema({
|
|
673
693
|
activeVaultTools,
|
|
694
|
+
passiveVault: !activeVaultTools && vaultEnabled && vaultMode === 'passive',
|
|
674
695
|
sysMode: cfg.mode === 'sys',
|
|
675
696
|
lspTools: lspManager?.hasServers() === true,
|
|
676
697
|
mcpTools: mcpToolsLoaded ? (mcpManager?.getEnabledToolSchemas() ?? []) : [],
|
|
@@ -1203,7 +1224,7 @@ export async function createSession(opts) {
|
|
|
1203
1224
|
return 'Vault memory is available. Retrieve prior context with vault_search(query="...") when needed.';
|
|
1204
1225
|
}
|
|
1205
1226
|
if (vaultMode === 'passive') {
|
|
1206
|
-
return 'Vault memory is in passive mode; relevant entries may be auto-injected
|
|
1227
|
+
return 'Vault memory is in passive mode; relevant entries may be auto-injected. You can also use vault_search(query="...") to recover specific earlier context if needed.';
|
|
1207
1228
|
}
|
|
1208
1229
|
return '';
|
|
1209
1230
|
};
|
|
@@ -1260,80 +1281,150 @@ export async function createSession(opts) {
|
|
|
1260
1281
|
content: `${vaultContextHeader} Relevant entries for "${query}":\n${lines.join('\n')}`
|
|
1261
1282
|
});
|
|
1262
1283
|
};
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1284
|
+
let compactionLockTail = Promise.resolve();
|
|
1285
|
+
let compactionStats = {
|
|
1286
|
+
inProgress: false,
|
|
1287
|
+
lockHeld: false,
|
|
1288
|
+
runs: 0,
|
|
1289
|
+
failedRuns: 0,
|
|
1290
|
+
beforeMessages: 0,
|
|
1291
|
+
afterMessages: 0,
|
|
1292
|
+
freedTokens: 0,
|
|
1293
|
+
archivedToolMessages: 0,
|
|
1294
|
+
droppedMessages: 0,
|
|
1295
|
+
dryRun: false,
|
|
1296
|
+
};
|
|
1297
|
+
const runCompactionWithLock = async (reason, runner) => {
|
|
1298
|
+
const prev = compactionLockTail;
|
|
1299
|
+
let release = () => { };
|
|
1300
|
+
compactionLockTail = new Promise((resolve) => {
|
|
1301
|
+
release = () => resolve();
|
|
1302
|
+
});
|
|
1303
|
+
await prev;
|
|
1304
|
+
compactionStats = {
|
|
1305
|
+
...compactionStats,
|
|
1306
|
+
inProgress: true,
|
|
1307
|
+
lockHeld: true,
|
|
1308
|
+
lastReason: reason,
|
|
1309
|
+
lastError: undefined,
|
|
1310
|
+
updatedAt: new Date().toISOString(),
|
|
1311
|
+
// Reset run stats before fresh calculation.
|
|
1312
|
+
beforeMessages: 0,
|
|
1313
|
+
afterMessages: 0,
|
|
1314
|
+
freedTokens: 0,
|
|
1315
|
+
archivedToolMessages: 0,
|
|
1316
|
+
droppedMessages: 0,
|
|
1317
|
+
dryRun: false,
|
|
1318
|
+
};
|
|
1319
|
+
try {
|
|
1320
|
+
const result = await runner();
|
|
1321
|
+
compactionStats = {
|
|
1322
|
+
...compactionStats,
|
|
1323
|
+
...result,
|
|
1324
|
+
inProgress: false,
|
|
1325
|
+
lockHeld: false,
|
|
1326
|
+
runs: compactionStats.runs + 1,
|
|
1327
|
+
lastReason: reason,
|
|
1328
|
+
updatedAt: new Date().toISOString(),
|
|
1329
|
+
};
|
|
1330
|
+
return result;
|
|
1271
1331
|
}
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
}
|
|
1332
|
+
catch (e) {
|
|
1333
|
+
compactionStats = {
|
|
1334
|
+
...compactionStats,
|
|
1335
|
+
inProgress: false,
|
|
1336
|
+
lockHeld: false,
|
|
1337
|
+
failedRuns: compactionStats.failedRuns + 1,
|
|
1338
|
+
lastReason: reason,
|
|
1339
|
+
lastError: e?.message ?? String(e),
|
|
1340
|
+
updatedAt: new Date().toISOString(),
|
|
1341
|
+
};
|
|
1342
|
+
throw e;
|
|
1282
1343
|
}
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
if (opts?.topic) {
|
|
1286
|
-
const topic = opts.topic.toLowerCase();
|
|
1287
|
-
dropped = dropped.filter((m) => !userContentToText(m.content ?? '').toLowerCase().includes(topic));
|
|
1288
|
-
const keepFromTopic = messages.filter((m) => userContentToText(m.content ?? '').toLowerCase().includes(topic));
|
|
1289
|
-
compacted = [...compacted, ...keepFromTopic.filter((m) => !compactedByRefs.has(m))];
|
|
1344
|
+
finally {
|
|
1345
|
+
release();
|
|
1290
1346
|
}
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
const
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1347
|
+
};
|
|
1348
|
+
const compactHistory = async (opts) => {
|
|
1349
|
+
const reason = opts?.reason
|
|
1350
|
+
?? (opts?.hard ? 'manual hard compaction'
|
|
1351
|
+
: opts?.force ? 'manual force compaction'
|
|
1352
|
+
: 'manual compaction');
|
|
1353
|
+
return await runCompactionWithLock(reason, async () => {
|
|
1354
|
+
const beforeMessages = messages.length;
|
|
1355
|
+
const beforeTokens = estimateTokensFromMessages(messages);
|
|
1356
|
+
let compacted;
|
|
1357
|
+
if (opts?.hard) {
|
|
1358
|
+
const sys = messages[0]?.role === 'system' ? [messages[0]] : [];
|
|
1359
|
+
const tail = messages.slice(-2);
|
|
1360
|
+
compacted = [...sys, ...tail];
|
|
1361
|
+
}
|
|
1362
|
+
else {
|
|
1363
|
+
compacted = enforceContextBudget({
|
|
1364
|
+
messages,
|
|
1365
|
+
contextWindow,
|
|
1366
|
+
maxTokens,
|
|
1367
|
+
minTailMessages: opts?.force ? 2 : 12,
|
|
1368
|
+
compactAt: opts?.force ? 0.5 : (cfg.compact_at ?? 0.8),
|
|
1369
|
+
toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
|
|
1370
|
+
force: opts?.force,
|
|
1371
|
+
});
|
|
1372
|
+
}
|
|
1373
|
+
const compactedByRefs = new Set(compacted);
|
|
1374
|
+
let dropped = messages.filter((m) => !compactedByRefs.has(m));
|
|
1375
|
+
if (opts?.topic) {
|
|
1376
|
+
const topic = opts.topic.toLowerCase();
|
|
1377
|
+
dropped = dropped.filter((m) => !userContentToText(m.content ?? '').toLowerCase().includes(topic));
|
|
1378
|
+
const keepFromTopic = messages.filter((m) => userContentToText(m.content ?? '').toLowerCase().includes(topic));
|
|
1379
|
+
compacted = [...compacted, ...keepFromTopic.filter((m) => !compactedByRefs.has(m))];
|
|
1380
|
+
}
|
|
1381
|
+
const archivedToolMessages = dropped.filter((m) => m.role === 'tool').length;
|
|
1382
|
+
const afterMessages = compacted.length;
|
|
1383
|
+
const afterTokens = estimateTokensFromMessages(compacted);
|
|
1384
|
+
const freedTokens = Math.max(0, beforeTokens - afterTokens);
|
|
1385
|
+
if (!opts?.dry) {
|
|
1386
|
+
if (dropped.length && vault) {
|
|
1387
|
+
try {
|
|
1388
|
+
// Store the original/current user prompt before compaction so it survives context loss.
|
|
1389
|
+
let userPromptToPreserve = null;
|
|
1390
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
1391
|
+
const m = messages[i];
|
|
1392
|
+
if (m.role === 'user') {
|
|
1393
|
+
const text = userContentToText((m.content ?? '')).trim();
|
|
1394
|
+
if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
|
|
1395
|
+
userPromptToPreserve = text;
|
|
1396
|
+
break;
|
|
1397
|
+
}
|
|
1307
1398
|
}
|
|
1308
1399
|
}
|
|
1400
|
+
if (userPromptToPreserve) {
|
|
1401
|
+
await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
|
|
1402
|
+
}
|
|
1403
|
+
await vault.archiveToolMessages(dropped, new Map());
|
|
1404
|
+
await vault.note('compaction_summary', `Dropped ${dropped.length} messages (${freedTokens} tokens).`);
|
|
1309
1405
|
}
|
|
1310
|
-
|
|
1311
|
-
|
|
1406
|
+
catch {
|
|
1407
|
+
// best-effort
|
|
1312
1408
|
}
|
|
1313
|
-
await vault.archiveToolMessages(dropped, new Map());
|
|
1314
|
-
await vault.note('compaction_summary', `Dropped ${dropped.length} messages (${freedTokens} tokens).`);
|
|
1315
1409
|
}
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
await injectVaultContext().catch(() => { });
|
|
1324
|
-
if (opts?.reason || opts?.force) {
|
|
1325
|
-
injectCompactionReminder(opts?.reason ?? 'history compaction');
|
|
1410
|
+
messages = compacted;
|
|
1411
|
+
if (dropped.length) {
|
|
1412
|
+
messages.push({ role: 'system', content: buildCompactionSystemNote('manual', dropped.length) });
|
|
1413
|
+
await injectVaultContext().catch(() => { });
|
|
1414
|
+
if (opts?.reason || opts?.force) {
|
|
1415
|
+
injectCompactionReminder(opts?.reason ?? 'history compaction');
|
|
1416
|
+
}
|
|
1326
1417
|
}
|
|
1327
1418
|
}
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
};
|
|
1419
|
+
return {
|
|
1420
|
+
beforeMessages,
|
|
1421
|
+
afterMessages,
|
|
1422
|
+
freedTokens,
|
|
1423
|
+
archivedToolMessages,
|
|
1424
|
+
droppedMessages: dropped.length,
|
|
1425
|
+
dryRun: !!opts?.dry,
|
|
1426
|
+
};
|
|
1427
|
+
});
|
|
1337
1428
|
};
|
|
1338
1429
|
const cumulativeUsage = { prompt: 0, completion: 0 };
|
|
1339
1430
|
const turnDurationsMs = [];
|
|
@@ -1342,6 +1433,17 @@ export async function createSession(opts) {
|
|
|
1342
1433
|
const tgSamples = [];
|
|
1343
1434
|
let lastTurnMetrics;
|
|
1344
1435
|
let lastServerHealth;
|
|
1436
|
+
let lastToolLoopStats = { totalHistory: 0, signatures: [], outcomes: [], telemetry: {
|
|
1437
|
+
callsRegistered: 0,
|
|
1438
|
+
dedupedReplays: 0,
|
|
1439
|
+
readCacheLookups: 0,
|
|
1440
|
+
readCacheHits: 0,
|
|
1441
|
+
warnings: 0,
|
|
1442
|
+
criticals: 0,
|
|
1443
|
+
recoveryRecommended: 0,
|
|
1444
|
+
readCacheHitRate: 0,
|
|
1445
|
+
dedupeRate: 0,
|
|
1446
|
+
} };
|
|
1345
1447
|
let lastModelsProbeMs = 0;
|
|
1346
1448
|
const capturesDir = path.join(stateDir(), 'captures');
|
|
1347
1449
|
let captureEnabled = false;
|
|
@@ -1690,6 +1792,10 @@ export async function createSession(opts) {
|
|
|
1690
1792
|
await hookObj.onToolResult?.(result);
|
|
1691
1793
|
await hookManager.emit('tool_result', { askId, turn: turns, result });
|
|
1692
1794
|
};
|
|
1795
|
+
const emitToolLoop = async (loop) => {
|
|
1796
|
+
await hookObj.onToolLoop?.(loop);
|
|
1797
|
+
await hookManager.emit('tool_loop', { askId, turn: turns, loop });
|
|
1798
|
+
};
|
|
1693
1799
|
const emitTurnEnd = async (stats) => {
|
|
1694
1800
|
await hookObj.onTurnEnd?.(stats);
|
|
1695
1801
|
await hookManager.emit('turn_end', { askId, stats });
|
|
@@ -1825,9 +1931,41 @@ export async function createSession(opts) {
|
|
|
1825
1931
|
const blockedExecAttemptsBySig = new Map();
|
|
1826
1932
|
// Cache successful read-only exec observations by exact signature.
|
|
1827
1933
|
const execObservationCacheBySig = new Map();
|
|
1828
|
-
// Cache successful
|
|
1934
|
+
// Cache ALL successful exec results so repeated identical calls under context
|
|
1935
|
+
// pressure can replay the cached result instead of re-executing.
|
|
1936
|
+
const lastExecResultBySig = new Map();
|
|
1937
|
+
// Cache successful read_file/read_files/list_dir results by signature + mtime for invalidation.
|
|
1829
1938
|
const readFileCacheBySig = new Map();
|
|
1830
1939
|
const READ_FILE_CACHE_TOOLS = new Set(['read_file', 'read_files', 'list_dir']);
|
|
1940
|
+
const toolLoopGuard = new ToolLoopGuard({
|
|
1941
|
+
enabled: cfg.tool_loop_detection?.enabled,
|
|
1942
|
+
historySize: cfg.tool_loop_detection?.history_size,
|
|
1943
|
+
warningThreshold: cfg.tool_loop_detection?.warning_threshold,
|
|
1944
|
+
criticalThreshold: cfg.tool_loop_detection?.critical_threshold,
|
|
1945
|
+
globalCircuitBreakerThreshold: cfg.tool_loop_detection?.global_circuit_breaker_threshold,
|
|
1946
|
+
readCacheTtlMs: cfg.tool_loop_detection?.read_cache_ttl_ms,
|
|
1947
|
+
detectors: {
|
|
1948
|
+
genericRepeat: cfg.tool_loop_detection?.detectors?.generic_repeat,
|
|
1949
|
+
knownPollNoProgress: cfg.tool_loop_detection?.detectors?.known_poll_no_progress,
|
|
1950
|
+
pingPong: cfg.tool_loop_detection?.detectors?.ping_pong,
|
|
1951
|
+
},
|
|
1952
|
+
perTool: Object.fromEntries(Object.entries(cfg.tool_loop_detection?.per_tool ?? {}).map(([tool, policy]) => [
|
|
1953
|
+
tool,
|
|
1954
|
+
{
|
|
1955
|
+
warningThreshold: policy?.warning_threshold,
|
|
1956
|
+
criticalThreshold: policy?.critical_threshold,
|
|
1957
|
+
globalCircuitBreakerThreshold: policy?.global_circuit_breaker_threshold,
|
|
1958
|
+
detectors: {
|
|
1959
|
+
genericRepeat: policy?.detectors?.generic_repeat,
|
|
1960
|
+
knownPollNoProgress: policy?.detectors?.known_poll_no_progress,
|
|
1961
|
+
pingPong: policy?.detectors?.ping_pong,
|
|
1962
|
+
},
|
|
1963
|
+
},
|
|
1964
|
+
])),
|
|
1965
|
+
});
|
|
1966
|
+
const toolLoopWarningKeys = new Set();
|
|
1967
|
+
let forceToollessRecoveryTurn = false;
|
|
1968
|
+
let toollessRecoveryUsed = false;
|
|
1831
1969
|
// Prevent repeating the same "stop rerunning" reminder every turn.
|
|
1832
1970
|
const readOnlyExecHintedSigs = new Set();
|
|
1833
1971
|
// Keep a lightweight breadcrumb for diagnostics on partial failures.
|
|
@@ -1950,51 +2088,63 @@ export async function createSession(opts) {
|
|
|
1950
2088
|
throw new Error(`session timeout exceeded (${cfg.timeout}s) after ${wallElapsed.toFixed(1)}s`);
|
|
1951
2089
|
}
|
|
1952
2090
|
await maybeAutoDetectModelChange();
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1971
|
-
|
|
1972
|
-
const
|
|
1973
|
-
|
|
1974
|
-
|
|
1975
|
-
|
|
1976
|
-
|
|
2091
|
+
await runCompactionWithLock('auto context-budget compaction', async () => {
|
|
2092
|
+
const beforeMsgs = messages;
|
|
2093
|
+
const beforeTokens = estimateTokensFromMessages(beforeMsgs);
|
|
2094
|
+
const compacted = enforceContextBudget({
|
|
2095
|
+
messages: beforeMsgs,
|
|
2096
|
+
contextWindow,
|
|
2097
|
+
maxTokens: maxTokens,
|
|
2098
|
+
minTailMessages: 12,
|
|
2099
|
+
compactAt: cfg.compact_at ?? 0.8,
|
|
2100
|
+
toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
|
|
2101
|
+
});
|
|
2102
|
+
const compactedByRefs = new Set(compacted);
|
|
2103
|
+
const dropped = beforeMsgs.filter((m) => !compactedByRefs.has(m));
|
|
2104
|
+
if (dropped.length && vault) {
|
|
2105
|
+
try {
|
|
2106
|
+
// Store the original/current user prompt before compaction so it survives context loss.
|
|
2107
|
+
// Find the last substantive user message that looks like a task/instruction.
|
|
2108
|
+
let userPromptToPreserve = null;
|
|
2109
|
+
for (let i = beforeMsgs.length - 1; i >= 0; i--) {
|
|
2110
|
+
const m = beforeMsgs[i];
|
|
2111
|
+
if (m.role === 'user') {
|
|
2112
|
+
const text = userContentToText((m.content ?? '')).trim();
|
|
2113
|
+
// Skip vault injection messages and short prompts
|
|
2114
|
+
if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
|
|
2115
|
+
userPromptToPreserve = text;
|
|
2116
|
+
break;
|
|
2117
|
+
}
|
|
1977
2118
|
}
|
|
1978
2119
|
}
|
|
2120
|
+
if (userPromptToPreserve) {
|
|
2121
|
+
await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
|
|
2122
|
+
}
|
|
2123
|
+
const toArchive = lens
|
|
2124
|
+
? await Promise.all(dropped.map((m) => archiveToolOutputForVault(m)))
|
|
2125
|
+
: dropped;
|
|
2126
|
+
await vault.archiveToolMessages(toArchive, toolNameByCallId);
|
|
1979
2127
|
}
|
|
1980
|
-
|
|
1981
|
-
|
|
2128
|
+
catch (e) {
|
|
2129
|
+
console.warn(`[warn] vault archive failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
1982
2130
|
}
|
|
1983
|
-
const toArchive = lens
|
|
1984
|
-
? await Promise.all(dropped.map((m) => archiveToolOutputForVault(m)))
|
|
1985
|
-
: dropped;
|
|
1986
|
-
await vault.archiveToolMessages(toArchive, toolNameByCallId);
|
|
1987
2131
|
}
|
|
1988
|
-
|
|
1989
|
-
|
|
2132
|
+
messages = compacted;
|
|
2133
|
+
if (dropped.length) {
|
|
2134
|
+
messages.push({ role: 'system', content: buildCompactionSystemNote('auto', dropped.length) });
|
|
2135
|
+
await injectVaultContext().catch(() => { });
|
|
2136
|
+
injectCompactionReminder('auto context-budget compaction');
|
|
1990
2137
|
}
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
2138
|
+
const afterTokens = estimateTokensFromMessages(compacted);
|
|
2139
|
+
return {
|
|
2140
|
+
beforeMessages: beforeMsgs.length,
|
|
2141
|
+
afterMessages: compacted.length,
|
|
2142
|
+
freedTokens: Math.max(0, beforeTokens - afterTokens),
|
|
2143
|
+
archivedToolMessages: dropped.filter((m) => m.role === 'tool').length,
|
|
2144
|
+
droppedMessages: dropped.length,
|
|
2145
|
+
dryRun: false,
|
|
2146
|
+
};
|
|
2147
|
+
});
|
|
1998
2148
|
const ac = makeAbortController();
|
|
1999
2149
|
inFlight = ac;
|
|
2000
2150
|
// If caller provided an AbortSignal (bench iteration timeout, etc), propagate it.
|
|
@@ -2019,11 +2169,13 @@ export async function createSession(opts) {
|
|
|
2019
2169
|
let resp;
|
|
2020
2170
|
try {
|
|
2021
2171
|
try {
|
|
2172
|
+
const toolsForTurn = forceToollessRecoveryTurn ? [] : getToolsSchema();
|
|
2173
|
+
const toolChoiceForTurn = forceToollessRecoveryTurn ? 'none' : 'auto';
|
|
2022
2174
|
resp = await client.chatStream({
|
|
2023
2175
|
model,
|
|
2024
2176
|
messages,
|
|
2025
|
-
tools:
|
|
2026
|
-
tool_choice:
|
|
2177
|
+
tools: toolsForTurn,
|
|
2178
|
+
tool_choice: toolChoiceForTurn,
|
|
2027
2179
|
temperature,
|
|
2028
2180
|
top_p: topP,
|
|
2029
2181
|
max_tokens: maxTokens,
|
|
@@ -2113,6 +2265,8 @@ export async function createSession(opts) {
|
|
|
2113
2265
|
},
|
|
2114
2266
|
}
|
|
2115
2267
|
: undefined;
|
|
2268
|
+
const wasToollessRecoveryTurn = forceToollessRecoveryTurn;
|
|
2269
|
+
forceToollessRecoveryTurn = false;
|
|
2116
2270
|
const choice0 = resp.choices?.[0] ?? legacyChoice;
|
|
2117
2271
|
const finishReason = choice0?.finish_reason ?? 'unknown';
|
|
2118
2272
|
const msg = choice0?.message;
|
|
@@ -2163,6 +2317,10 @@ export async function createSession(opts) {
|
|
|
2163
2317
|
}
|
|
2164
2318
|
}
|
|
2165
2319
|
}
|
|
2320
|
+
if (wasToollessRecoveryTurn && toolCallsArr?.length) {
|
|
2321
|
+
// Recovery turn explicitly disables tools; ignore any stray tool-call output.
|
|
2322
|
+
toolCallsArr = undefined;
|
|
2323
|
+
}
|
|
2166
2324
|
if (cfg.verbose) {
|
|
2167
2325
|
console.warn(`[turn ${turns}] finish_reason=${finishReason} content_chars=${content.length} visible_chars=${visible.length} tool_calls=${toolCallsArr?.length ?? 0}`);
|
|
2168
2326
|
}
|
|
@@ -2241,12 +2399,17 @@ export async function createSession(opts) {
|
|
|
2241
2399
|
// narration chunk starts on a fresh line (avoids wall-of-text output).
|
|
2242
2400
|
if (visible && hookObj.onToken)
|
|
2243
2401
|
hookObj.onToken('\n');
|
|
2244
|
-
|
|
2402
|
+
const originalToolCallsArr = toolCallsArr;
|
|
2403
|
+
const preparedTurn = toolLoopGuard.prepareTurn(originalToolCallsArr);
|
|
2404
|
+
const replayByCallId = preparedTurn.replayByCallId;
|
|
2405
|
+
const parsedArgsByCallId = preparedTurn.parsedArgsByCallId;
|
|
2406
|
+
toolCallsArr = preparedTurn.uniqueCalls;
|
|
2407
|
+
toolCalls += originalToolCallsArr.length;
|
|
2245
2408
|
const assistantToolCallText = visible || '';
|
|
2246
2409
|
const compactAssistantToolCallText = assistantToolCallText.length > 900
|
|
2247
2410
|
? `${assistantToolCallText.slice(0, 900)}\n[history-compacted: assistant narration truncated before tool execution]`
|
|
2248
2411
|
: assistantToolCallText;
|
|
2249
|
-
messages.push({ role: 'assistant', content: compactAssistantToolCallText, tool_calls:
|
|
2412
|
+
messages.push({ role: 'assistant', content: compactAssistantToolCallText, tool_calls: originalToolCallsArr });
|
|
2250
2413
|
// sigCounts is scoped to the entire ask() run (see above)
|
|
2251
2414
|
// Bridge ConfirmationProvider → legacy confirm callback for tools.
|
|
2252
2415
|
// If a ConfirmationProvider is given, wrap it; otherwise fall back to raw callback.
|
|
@@ -2290,20 +2453,64 @@ export async function createSession(opts) {
|
|
|
2290
2453
|
// We only treat repeated exec as a loop if no file mutations happened since the
|
|
2291
2454
|
// last time we saw that exact exec signature.
|
|
2292
2455
|
const turnSigs = new Set();
|
|
2456
|
+
const sigMetaBySig = new Map();
|
|
2293
2457
|
for (const tc of toolCallsArr) {
|
|
2294
|
-
const
|
|
2458
|
+
const callId = resolveCallId(tc);
|
|
2459
|
+
const parsedArgs = parsedArgsByCallId.get(callId) ?? {};
|
|
2460
|
+
const sig = toolLoopGuard.computeSignature(tc.function.name, parsedArgs);
|
|
2295
2461
|
turnSigs.add(sig);
|
|
2462
|
+
if (!sigMetaBySig.has(sig)) {
|
|
2463
|
+
sigMetaBySig.set(sig, { toolName: tc.function.name, args: parsedArgs });
|
|
2464
|
+
}
|
|
2296
2465
|
}
|
|
2297
2466
|
// Repeated read-only exec calls can be served from cache instead of hard-breaking.
|
|
2298
2467
|
const repeatedReadOnlyExecSigs = new Set();
|
|
2299
2468
|
const readOnlyExecTurnHints = [];
|
|
2469
|
+
// Repeated exec calls (any kind) can replay cached results under pressure.
|
|
2470
|
+
const replayExecSigs = new Set();
|
|
2300
2471
|
// Repeated read_file/read_files/list_dir calls can be served from cache.
|
|
2301
2472
|
const repeatedReadFileSigs = new Set();
|
|
2473
|
+
let shouldForceToollessRecovery = false;
|
|
2474
|
+
const criticalLoopSigs = new Set();
|
|
2475
|
+
for (const tc of toolCallsArr) {
|
|
2476
|
+
const callId = resolveCallId(tc);
|
|
2477
|
+
const args = parsedArgsByCallId.get(callId) ?? {};
|
|
2478
|
+
const detected = toolLoopGuard.detect(tc.function.name, args);
|
|
2479
|
+
const warning = toolLoopGuard.formatWarning(detected, tc.function.name);
|
|
2480
|
+
if (warning) {
|
|
2481
|
+
const warningKey = `${warning.level}:${warning.detector}:${detected.signature}`;
|
|
2482
|
+
if (!toolLoopWarningKeys.has(warningKey)) {
|
|
2483
|
+
toolLoopWarningKeys.add(warningKey);
|
|
2484
|
+
await emitToolLoop({
|
|
2485
|
+
level: warning.level,
|
|
2486
|
+
detector: warning.detector,
|
|
2487
|
+
toolName: warning.toolName,
|
|
2488
|
+
count: warning.count,
|
|
2489
|
+
message: warning.message,
|
|
2490
|
+
});
|
|
2491
|
+
messages.push({
|
|
2492
|
+
role: 'system',
|
|
2493
|
+
content: `[tool-loop ${warning.level}] ${warning.message}. Stop repeating ${warning.toolName} with unchanged inputs; continue with analysis or next step.`,
|
|
2494
|
+
});
|
|
2495
|
+
}
|
|
2496
|
+
}
|
|
2497
|
+
if (toolLoopGuard.shouldDisableToolsNextTurn(detected)) {
|
|
2498
|
+
shouldForceToollessRecovery = true;
|
|
2499
|
+
criticalLoopSigs.add(detected.signature);
|
|
2500
|
+
}
|
|
2501
|
+
}
|
|
2302
2502
|
// Track whether a mutation happened since a given signature was last seen.
|
|
2303
2503
|
// (Tool-loop is single-threaded across turns; this is safe to keep in-memory.)
|
|
2304
2504
|
for (const sig of turnSigs) {
|
|
2305
2505
|
sigCounts.set(sig, (sigCounts.get(sig) ?? 0) + 1);
|
|
2306
|
-
const
|
|
2506
|
+
const sigMeta = sigMetaBySig.get(sig);
|
|
2507
|
+
const toolName = sigMeta?.toolName ?? sig.split(':')[0];
|
|
2508
|
+
if (criticalLoopSigs.has(sig)) {
|
|
2509
|
+
// Critical detector already fired for this signature; recover next turn
|
|
2510
|
+
// with tools disabled instead of throwing in per-tool hard-break logic.
|
|
2511
|
+
shouldForceToollessRecovery = true;
|
|
2512
|
+
continue;
|
|
2513
|
+
}
|
|
2307
2514
|
// For exec loops, only break if nothing changed since last identical exec.
|
|
2308
2515
|
if (toolName === 'exec') {
|
|
2309
2516
|
// If this exact exec signature was seen before, record the mutation version at that time.
|
|
@@ -2314,6 +2521,18 @@ export async function createSession(opts) {
|
|
|
2314
2521
|
mutationVersionBySig.set(sig, mutationVersion);
|
|
2315
2522
|
if (!hasMutatedSince) {
|
|
2316
2523
|
const count = sigCounts.get(sig) ?? 0;
|
|
2524
|
+
// Early replay: if this exact exec was already run (count >= 2) and
|
|
2525
|
+
// we have a cached result, replay it instead of re-executing. This
|
|
2526
|
+
// prevents the compaction death spiral where tool results get dropped,
|
|
2527
|
+
// the model forgets it ran the command, and re-runs it endlessly.
|
|
2528
|
+
// Skip read-only commands that already have their own observation cache —
|
|
2529
|
+
// those are handled by the dedicated read-only path at loopThreshold.
|
|
2530
|
+
const command = execCommandFromSig(sig);
|
|
2531
|
+
const hasReadOnlyCache = looksLikeReadOnlyExecCommand(command) && execObservationCacheBySig.has(sig);
|
|
2532
|
+
if (count >= 2 && lastExecResultBySig.has(sig) && !hasReadOnlyCache) {
|
|
2533
|
+
replayExecSigs.add(sig);
|
|
2534
|
+
continue;
|
|
2535
|
+
}
|
|
2317
2536
|
let loopThreshold = harness.quirks.loopsOnToolError ? 3 : 6;
|
|
2318
2537
|
// If the cached observation already tells the model "no matches found",
|
|
2319
2538
|
// break much earlier — the model is ignoring the hint.
|
|
@@ -2326,7 +2545,8 @@ export async function createSession(opts) {
|
|
|
2326
2545
|
await injectVaultContext().catch(() => { });
|
|
2327
2546
|
}
|
|
2328
2547
|
if (count >= loopThreshold) {
|
|
2329
|
-
const
|
|
2548
|
+
const sigArgs = sigMetaBySig.get(sig)?.args ?? {};
|
|
2549
|
+
const command = typeof sigArgs?.command === 'string' ? String(sigArgs.command) : '';
|
|
2330
2550
|
const canReuseReadOnlyObservation = looksLikeReadOnlyExecCommand(command) &&
|
|
2331
2551
|
execObservationCacheBySig.has(sig);
|
|
2332
2552
|
if (canReuseReadOnlyObservation) {
|
|
@@ -2337,8 +2557,8 @@ export async function createSession(opts) {
|
|
|
2337
2557
|
}
|
|
2338
2558
|
continue;
|
|
2339
2559
|
}
|
|
2340
|
-
const
|
|
2341
|
-
const argsPreview =
|
|
2560
|
+
const argsPreviewRaw = JSON.stringify(sigArgs);
|
|
2561
|
+
const argsPreview = argsPreviewRaw.length > 220 ? argsPreviewRaw.slice(0, 220) + '…' : argsPreviewRaw;
|
|
2342
2562
|
throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
|
|
2343
2563
|
`args=${argsPreview}`);
|
|
2344
2564
|
}
|
|
@@ -2396,23 +2616,30 @@ export async function createSession(opts) {
|
|
|
2396
2616
|
content: `CRITICAL: DO NOT make another identical call for this ${resourceType}. It HAS NOT CHANGED. You already have the content. Move on to the NEXT step NOW.`,
|
|
2397
2617
|
});
|
|
2398
2618
|
}
|
|
2399
|
-
|
|
2619
|
+
const argsForSig = sigMetaBySig.get(sig)?.args ?? {};
|
|
2620
|
+
const replay = await toolLoopGuard.getReadCacheReplay(toolName, argsForSig, ctx.cwd);
|
|
2621
|
+
if (replay) {
|
|
2400
2622
|
repeatedReadFileSigs.add(sig);
|
|
2401
2623
|
continue;
|
|
2402
2624
|
}
|
|
2403
2625
|
}
|
|
2404
|
-
//
|
|
2626
|
+
// Deterministic recovery at threshold (no hard throw): force one no-tools turn.
|
|
2405
2627
|
if (consec >= hardBreakAt) {
|
|
2406
|
-
|
|
2407
|
-
|
|
2628
|
+
shouldForceToollessRecovery = true;
|
|
2629
|
+
messages.push({
|
|
2630
|
+
role: 'system',
|
|
2631
|
+
content: `[tool-loop critical] ${toolName} repeated ${consec}x with unchanged inputs. ` +
|
|
2632
|
+
'Next turn will run with tools disabled so you must use existing results and provide a concrete next step/final response.',
|
|
2633
|
+
});
|
|
2408
2634
|
}
|
|
2409
2635
|
continue;
|
|
2410
2636
|
}
|
|
2411
2637
|
// Default behavior for mutating/other tools: break on repeated identical signature.
|
|
2412
2638
|
const loopThreshold = harness.quirks.loopsOnToolError ? 2 : 3;
|
|
2413
2639
|
if ((sigCounts.get(sig) ?? 0) >= loopThreshold) {
|
|
2414
|
-
const
|
|
2415
|
-
const
|
|
2640
|
+
const argsObj = sigMetaBySig.get(sig)?.args ?? {};
|
|
2641
|
+
const argsRaw = JSON.stringify(argsObj);
|
|
2642
|
+
const argsPreview = argsRaw.length > 220 ? argsRaw.slice(0, 220) + '…' : argsRaw;
|
|
2416
2643
|
throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
|
|
2417
2644
|
`args=${argsPreview}\n` +
|
|
2418
2645
|
`Hint: you repeated the same tool call ${loopThreshold} times with identical arguments. ` +
|
|
@@ -2423,6 +2650,31 @@ export async function createSession(opts) {
|
|
|
2423
2650
|
}
|
|
2424
2651
|
// Update consecutive tracking: save this turn's signatures for next turn comparison.
|
|
2425
2652
|
lastTurnSigs = turnSigs;
|
|
2653
|
+
if (shouldForceToollessRecovery) {
|
|
2654
|
+
if (!toollessRecoveryUsed) {
|
|
2655
|
+
forceToollessRecoveryTurn = true;
|
|
2656
|
+
toollessRecoveryUsed = true;
|
|
2657
|
+
messages.push({
|
|
2658
|
+
role: 'user',
|
|
2659
|
+
content: '[system] Critical tool loop detected. Next turn will run with tools disabled. ' +
|
|
2660
|
+
'Use already available tool results to provide a concrete next step or final response; do not request more tools.',
|
|
2661
|
+
});
|
|
2662
|
+
await emitTurnEnd({
|
|
2663
|
+
turn: turns,
|
|
2664
|
+
toolCalls,
|
|
2665
|
+
promptTokens: cumulativeUsage.prompt,
|
|
2666
|
+
completionTokens: cumulativeUsage.completion,
|
|
2667
|
+
promptTokensTurn,
|
|
2668
|
+
completionTokensTurn,
|
|
2669
|
+
ttftMs,
|
|
2670
|
+
ttcMs,
|
|
2671
|
+
ppTps,
|
|
2672
|
+
tgTps,
|
|
2673
|
+
});
|
|
2674
|
+
continue;
|
|
2675
|
+
}
|
|
2676
|
+
throw new AgentLoopBreak('critical tool-loop persisted after one tools-disabled recovery turn. Stopping to avoid infinite loop.');
|
|
2677
|
+
}
|
|
2426
2678
|
const runOne = async (tc) => {
|
|
2427
2679
|
const name = tc.function.name;
|
|
2428
2680
|
const rawArgs = tc.function.arguments ?? '{}';
|
|
@@ -2452,6 +2704,7 @@ export async function createSession(opts) {
|
|
|
2452
2704
|
throw new Error(`unknown tool: ${name}`);
|
|
2453
2705
|
// Keep parsed args by call-id so we can digest/archive tool outputs with context.
|
|
2454
2706
|
toolArgsByCallId.set(callId, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
|
|
2707
|
+
toolLoopGuard.registerCall(name, args && typeof args === 'object' && !Array.isArray(args) ? args : {}, callId);
|
|
2455
2708
|
// Pre-dispatch argument validation.
|
|
2456
2709
|
// - Required params
|
|
2457
2710
|
// - Type/range/enums
|
|
@@ -2556,7 +2809,7 @@ export async function createSession(opts) {
|
|
|
2556
2809
|
return { id: callId, content: '[skipped by user: step mode]' };
|
|
2557
2810
|
}
|
|
2558
2811
|
}
|
|
2559
|
-
const sig =
|
|
2812
|
+
const sig = toolLoopGuard.computeSignature(name, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
|
|
2560
2813
|
let content = '';
|
|
2561
2814
|
let reusedCachedReadOnlyExec = false;
|
|
2562
2815
|
let reusedCachedReadTool = false;
|
|
@@ -2567,10 +2820,18 @@ export async function createSession(opts) {
|
|
|
2567
2820
|
reusedCachedReadOnlyExec = true;
|
|
2568
2821
|
}
|
|
2569
2822
|
}
|
|
2570
|
-
|
|
2571
|
-
|
|
2823
|
+
// Replay any exec result (read-only or not) when the loop detector flagged it.
|
|
2824
|
+
if (name === 'exec' && !reusedCachedReadOnlyExec && replayExecSigs.has(sig)) {
|
|
2825
|
+
const cached = lastExecResultBySig.get(sig);
|
|
2572
2826
|
if (cached) {
|
|
2573
|
-
content =
|
|
2827
|
+
content = withReplayedExecHint(cached);
|
|
2828
|
+
reusedCachedReadOnlyExec = true; // skip re-execution below
|
|
2829
|
+
}
|
|
2830
|
+
}
|
|
2831
|
+
if (READ_FILE_CACHE_TOOLS.has(name) && repeatedReadFileSigs.has(sig)) {
|
|
2832
|
+
const replay = await toolLoopGuard.getReadCacheReplay(name, args, ctx.cwd);
|
|
2833
|
+
if (replay) {
|
|
2834
|
+
content = replay;
|
|
2574
2835
|
reusedCachedReadTool = true;
|
|
2575
2836
|
}
|
|
2576
2837
|
}
|
|
@@ -2588,11 +2849,15 @@ export async function createSession(opts) {
|
|
|
2588
2849
|
const value = await builtInFn(callCtx, args);
|
|
2589
2850
|
content = typeof value === 'string' ? value : JSON.stringify(value);
|
|
2590
2851
|
if (READ_FILE_CACHE_TOOLS.has(name) && typeof content === 'string' && !content.startsWith('ERROR:')) {
|
|
2591
|
-
|
|
2852
|
+
const baseCwd = typeof args?.cwd === 'string' ? String(args.cwd) : ctx.cwd;
|
|
2853
|
+
await toolLoopGuard.storeReadCache(name, args, baseCwd, content);
|
|
2592
2854
|
}
|
|
2593
2855
|
if (name === 'exec') {
|
|
2594
2856
|
// Successful exec clears blocked-loop counters.
|
|
2595
2857
|
blockedExecAttemptsBySig.clear();
|
|
2858
|
+
// Cache every exec result so repeated calls under context pressure
|
|
2859
|
+
// can replay the result instead of re-executing.
|
|
2860
|
+
lastExecResultBySig.set(sig, content);
|
|
2596
2861
|
const cmd = String(args?.command ?? '');
|
|
2597
2862
|
if (looksLikeReadOnlyExecCommand(cmd) && readOnlyExecCacheable(content)) {
|
|
2598
2863
|
execObservationCacheBySig.set(sig, content);
|
|
@@ -2731,6 +2996,10 @@ export async function createSession(opts) {
|
|
|
2731
2996
|
}
|
|
2732
2997
|
}
|
|
2733
2998
|
}
|
|
2999
|
+
toolLoopGuard.registerOutcome(name, args, {
|
|
3000
|
+
toolCallId: callId,
|
|
3001
|
+
result: content,
|
|
3002
|
+
});
|
|
2734
3003
|
return { id: callId, content };
|
|
2735
3004
|
};
|
|
2736
3005
|
const results = [];
|
|
@@ -2751,7 +3020,8 @@ export async function createSession(opts) {
|
|
|
2751
3020
|
// Applies to direct exec attempts and spawn_task delegation attempts.
|
|
2752
3021
|
if (tc.function.name === 'exec' || tc.function.name === 'spawn_task') {
|
|
2753
3022
|
const blockedMatch = msg.match(/^exec:\s*blocked\s*\(([^)]+)\)\s*without --no-confirm\/--yolo:\s*(.*)$/i)
|
|
2754
|
-
|| msg.match(/^(spawn_task):\s*blocked\s*—\s*(.*)$/i)
|
|
3023
|
+
|| msg.match(/^(spawn_task):\s*blocked\s*—\s*(.*)$/i)
|
|
3024
|
+
|| msg.match(/^exec:\s*blocked\s+(background command\b[^.]*)\./i);
|
|
2755
3025
|
if (blockedMatch) {
|
|
2756
3026
|
const reason = (blockedMatch[1] || blockedMatch[2] || 'blocked command').trim();
|
|
2757
3027
|
let parsedArgs = {};
|
|
@@ -2763,7 +3033,8 @@ export async function createSession(opts) {
|
|
|
2763
3033
|
? String(parsedArgs?.command ?? '')
|
|
2764
3034
|
: String(parsedArgs?.task ?? '');
|
|
2765
3035
|
const normalizedReason = reason.toLowerCase();
|
|
2766
|
-
const aggregateByReason = normalizedReason.includes('package install/remove')
|
|
3036
|
+
const aggregateByReason = normalizedReason.includes('package install/remove')
|
|
3037
|
+
|| normalizedReason.includes('background command');
|
|
2767
3038
|
const sig = aggregateByReason
|
|
2768
3039
|
? `${tc.function.name}|${reason}`
|
|
2769
3040
|
: `${tc.function.name}|${reason}|${cmd}`;
|
|
@@ -2785,7 +3056,33 @@ export async function createSession(opts) {
|
|
|
2785
3056
|
retryable: te.retryable,
|
|
2786
3057
|
result: toolErrorContent,
|
|
2787
3058
|
});
|
|
2788
|
-
|
|
3059
|
+
let parsedArgs = {};
|
|
3060
|
+
try {
|
|
3061
|
+
const parsed = JSON.parse(tc.function.arguments ?? '{}');
|
|
3062
|
+
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
|
3063
|
+
parsedArgs = parsed;
|
|
3064
|
+
}
|
|
3065
|
+
}
|
|
3066
|
+
catch {
|
|
3067
|
+
// keep empty object
|
|
3068
|
+
}
|
|
3069
|
+
toolLoopGuard.registerOutcome(tc.function.name, parsedArgs, {
|
|
3070
|
+
toolCallId: callId,
|
|
3071
|
+
error: msg,
|
|
3072
|
+
});
|
|
3073
|
+
// Inject fallback guidance after 2 consecutive read_file/read_files failures.
|
|
3074
|
+
let resultContent = toolErrorContent;
|
|
3075
|
+
if (tc.function.name === 'read_file' || tc.function.name === 'read_files') {
|
|
3076
|
+
const failureCount = toolLoopGuard.getReadFileFailureCount();
|
|
3077
|
+
if (failureCount >= 2) {
|
|
3078
|
+
resultContent +=
|
|
3079
|
+
`\n\n[WARNING: ${tc.function.name} has failed ${failureCount} times consecutively. ` +
|
|
3080
|
+
'Try using `sed` and the `edit_range` tool; if those do not work, create a temporary file with the full contents and save it. ' +
|
|
3081
|
+
'Then remove the existing file and rename the temporary file to bypass edit_file failing.]';
|
|
3082
|
+
toolLoopGuard.resetReadFileFailureCount();
|
|
3083
|
+
}
|
|
3084
|
+
}
|
|
3085
|
+
return { id: callId, content: resultContent };
|
|
2789
3086
|
};
|
|
2790
3087
|
// ── Anti-scan guardrails (§ read budget, dir scan, same-search) ──
|
|
2791
3088
|
const readOnlyInTurn = toolCallsArr.filter((tc) => isReadOnlyToolDynamic(tc.function.name));
|
|
@@ -2848,6 +3145,19 @@ export async function createSession(opts) {
|
|
|
2848
3145
|
}
|
|
2849
3146
|
}
|
|
2850
3147
|
}
|
|
3148
|
+
if (replayByCallId.size > 0) {
|
|
3149
|
+
const canonicalById = new Map(results.map((r) => [r.id, r.content]));
|
|
3150
|
+
for (const [dupId, canonicalId] of replayByCallId.entries()) {
|
|
3151
|
+
const canonical = canonicalById.get(canonicalId);
|
|
3152
|
+
if (canonical == null)
|
|
3153
|
+
continue;
|
|
3154
|
+
results.push({
|
|
3155
|
+
id: dupId,
|
|
3156
|
+
content: `[idlehands dedupe] Identical tool call replayed from ${canonicalId}. ` +
|
|
3157
|
+
'Use that earlier tool result; no new execution was performed.',
|
|
3158
|
+
});
|
|
3159
|
+
}
|
|
3160
|
+
}
|
|
2851
3161
|
// Bail immediately if cancelled during tool execution
|
|
2852
3162
|
if (ac.signal.aborted)
|
|
2853
3163
|
break;
|
|
@@ -2895,6 +3205,8 @@ export async function createSession(opts) {
|
|
|
2895
3205
|
'Do not narrate. Fix required/mistyped fields and unknown keys.',
|
|
2896
3206
|
});
|
|
2897
3207
|
}
|
|
3208
|
+
// Update session-level tool loop stats for observability
|
|
3209
|
+
lastToolLoopStats = toolLoopGuard.getStats();
|
|
2898
3210
|
// Hook: onTurnEnd (Phase 8.5)
|
|
2899
3211
|
await emitTurnEnd({
|
|
2900
3212
|
turn: turns,
|
|
@@ -3068,6 +3380,7 @@ export async function createSession(opts) {
|
|
|
3068
3380
|
listModels,
|
|
3069
3381
|
refreshServerHealth,
|
|
3070
3382
|
getPerfSummary,
|
|
3383
|
+
getToolLoopStats: () => lastToolLoopStats,
|
|
3071
3384
|
captureOn,
|
|
3072
3385
|
captureOff,
|
|
3073
3386
|
captureLast,
|
|
@@ -3105,6 +3418,9 @@ export async function createSession(opts) {
|
|
|
3105
3418
|
get planSteps() {
|
|
3106
3419
|
return planSteps;
|
|
3107
3420
|
},
|
|
3421
|
+
get compactionStats() {
|
|
3422
|
+
return { ...compactionStats };
|
|
3423
|
+
},
|
|
3108
3424
|
executePlanStep,
|
|
3109
3425
|
clearPlan,
|
|
3110
3426
|
compactHistory
|