@visorcraft/idlehands 1.1.17 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/formatting.js +30 -13
- package/dist/agent/formatting.js.map +1 -1
- package/dist/agent/review-artifact.js +12 -8
- package/dist/agent/review-artifact.js.map +1 -1
- package/dist/agent/tool-calls.js +57 -20
- package/dist/agent/tool-calls.js.map +1 -1
- package/dist/agent/tool-loop-detection.js +310 -0
- package/dist/agent/tool-loop-detection.js.map +1 -0
- package/dist/agent/tool-loop-guard.js +235 -0
- package/dist/agent/tool-loop-guard.js.map +1 -0
- package/dist/agent.js +442 -141
- package/dist/agent.js.map +1 -1
- package/dist/anton/controller.js +46 -30
- package/dist/anton/controller.js.map +1 -1
- package/dist/anton/lock.js +5 -1
- package/dist/anton/lock.js.map +1 -1
- package/dist/anton/parser.js +18 -19
- package/dist/anton/parser.js.map +1 -1
- package/dist/anton/prompt.js +42 -11
- package/dist/anton/prompt.js.map +1 -1
- package/dist/anton/reporter.js.map +1 -1
- package/dist/anton/session.js.map +1 -1
- package/dist/anton/verifier.js +3 -5
- package/dist/anton/verifier.js.map +1 -1
- package/dist/bench/compare.js +53 -20
- package/dist/bench/compare.js.map +1 -1
- package/dist/bench/openclaw.js +4 -4
- package/dist/bench/openclaw.js.map +1 -1
- package/dist/bench/report.js +11 -3
- package/dist/bench/report.js.map +1 -1
- package/dist/bench/runner.js +20 -14
- package/dist/bench/runner.js.map +1 -1
- package/dist/bot/commands.js +65 -31
- package/dist/bot/commands.js.map +1 -1
- package/dist/bot/confirm-discord.js +32 -9
- package/dist/bot/confirm-discord.js.map +1 -1
- package/dist/bot/confirm-telegram.js +26 -10
- package/dist/bot/confirm-telegram.js.map +1 -1
- package/dist/bot/dir-guard.js +18 -3
- package/dist/bot/dir-guard.js.map +1 -1
- package/dist/bot/discord-routing.js +28 -4
- package/dist/bot/discord-routing.js.map +1 -1
- package/dist/bot/discord-streaming.js +3 -3
- package/dist/bot/discord-streaming.js.map +1 -1
- package/dist/bot/discord.js +82 -37
- package/dist/bot/discord.js.map +1 -1
- package/dist/bot/escalation.js +124 -0
- package/dist/bot/escalation.js.map +1 -0
- package/dist/bot/format.js +2 -5
- package/dist/bot/format.js.map +1 -1
- package/dist/bot/session-manager.js +17 -6
- package/dist/bot/session-manager.js.map +1 -1
- package/dist/bot/telegram.js +88 -28
- package/dist/bot/telegram.js.map +1 -1
- package/dist/cli/agent-turn.js +10 -4
- package/dist/cli/agent-turn.js.map +1 -1
- package/dist/cli/args.js +51 -9
- package/dist/cli/args.js.map +1 -1
- package/dist/cli/bot.js +19 -9
- package/dist/cli/bot.js.map +1 -1
- package/dist/cli/build-repl-context.js +60 -26
- package/dist/cli/build-repl-context.js.map +1 -1
- package/dist/cli/command-registry.js.map +1 -1
- package/dist/cli/commands/anton.js +5 -3
- package/dist/cli/commands/anton.js.map +1 -1
- package/dist/cli/commands/editing.js +27 -12
- package/dist/cli/commands/editing.js.map +1 -1
- package/dist/cli/commands/model.js +16 -7
- package/dist/cli/commands/model.js.map +1 -1
- package/dist/cli/commands/project.js +52 -17
- package/dist/cli/commands/project.js.map +1 -1
- package/dist/cli/commands/runtime.js +1 -1
- package/dist/cli/commands/runtime.js.map +1 -1
- package/dist/cli/commands/secrets.js +279 -0
- package/dist/cli/commands/secrets.js.map +1 -0
- package/dist/cli/commands/session.js +49 -1
- package/dist/cli/commands/session.js.map +1 -1
- package/dist/cli/commands/tools.js +3 -1
- package/dist/cli/commands/tools.js.map +1 -1
- package/dist/cli/commands/trifecta.js +1 -1
- package/dist/cli/commands/trifecta.js.map +1 -1
- package/dist/cli/commands/tui.js.map +1 -1
- package/dist/cli/init.js +50 -16
- package/dist/cli/init.js.map +1 -1
- package/dist/cli/input.js +25 -7
- package/dist/cli/input.js.map +1 -1
- package/dist/cli/oneshot.js +31 -19
- package/dist/cli/oneshot.js.map +1 -1
- package/dist/cli/repl-dispatch.js +10 -6
- package/dist/cli/repl-dispatch.js.map +1 -1
- package/dist/cli/runtime-cmds.js +110 -46
- package/dist/cli/runtime-cmds.js.map +1 -1
- package/dist/cli/service.js +3 -3
- package/dist/cli/service.js.map +1 -1
- package/dist/cli/session-state.js +12 -5
- package/dist/cli/session-state.js.map +1 -1
- package/dist/cli/setup.js +86 -33
- package/dist/cli/setup.js.map +1 -1
- package/dist/cli/shell.js +4 -4
- package/dist/cli/shell.js.map +1 -1
- package/dist/cli/status.js +56 -12
- package/dist/cli/status.js.map +1 -1
- package/dist/client.js +40 -21
- package/dist/client.js.map +1 -1
- package/dist/commands.js +1 -1
- package/dist/commands.js.map +1 -1
- package/dist/config.js +171 -15
- package/dist/config.js.map +1 -1
- package/dist/confirm/auto.js.map +1 -1
- package/dist/confirm/headless.js +13 -2
- package/dist/confirm/headless.js.map +1 -1
- package/dist/confirm/terminal.js +1 -5
- package/dist/confirm/terminal.js.map +1 -1
- package/dist/context.js +9 -3
- package/dist/context.js.map +1 -1
- package/dist/git.js +56 -61
- package/dist/git.js.map +1 -1
- package/dist/harnesses.js +137 -37
- package/dist/harnesses.js.map +1 -1
- package/dist/history.js +12 -4
- package/dist/history.js.map +1 -1
- package/dist/hooks/index.js +2 -2
- package/dist/hooks/index.js.map +1 -1
- package/dist/hooks/loader.js +6 -5
- package/dist/hooks/loader.js.map +1 -1
- package/dist/hooks/manager.js.map +1 -1
- package/dist/hooks/plugins/example-console.js.map +1 -1
- package/dist/hooks/scaffold.js +8 -6
- package/dist/hooks/scaffold.js.map +1 -1
- package/dist/index.js +120 -66
- package/dist/index.js.map +1 -1
- package/dist/indexer.js +6 -18
- package/dist/indexer.js.map +1 -1
- package/dist/jsonrpc.js.map +1 -1
- package/dist/lens.js +38 -16
- package/dist/lens.js.map +1 -1
- package/dist/lsp.js +60 -24
- package/dist/lsp.js.map +1 -1
- package/dist/markdown.js +6 -6
- package/dist/markdown.js.map +1 -1
- package/dist/mcp.js +15 -6
- package/dist/mcp.js.map +1 -1
- package/dist/model-customization.js +7 -3
- package/dist/model-customization.js.map +1 -1
- package/dist/progress/message-edit-scheduler.js +15 -3
- package/dist/progress/message-edit-scheduler.js.map +1 -1
- package/dist/progress/progress-message-renderer.js.map +1 -1
- package/dist/progress/progress-presenter.js +3 -3
- package/dist/progress/progress-presenter.js.map +1 -1
- package/dist/progress/serialize-telegram.js.map +1 -1
- package/dist/progress/tool-summary.js +3 -1
- package/dist/progress/tool-summary.js.map +1 -1
- package/dist/progress/turn-progress.js +3 -1
- package/dist/progress/turn-progress.js.map +1 -1
- package/dist/recovery.js +11 -3
- package/dist/recovery.js.map +1 -1
- package/dist/replay.js +9 -3
- package/dist/replay.js.map +1 -1
- package/dist/replay_cli.js +5 -3
- package/dist/replay_cli.js.map +1 -1
- package/dist/runtime/executor.js +66 -20
- package/dist/runtime/executor.js.map +1 -1
- package/dist/runtime/health.js.map +1 -1
- package/dist/runtime/host-runner.js +103 -0
- package/dist/runtime/host-runner.js.map +1 -0
- package/dist/runtime/planner.js +3 -1
- package/dist/runtime/planner.js.map +1 -1
- package/dist/runtime/secrets.js +102 -0
- package/dist/runtime/secrets.js.map +1 -0
- package/dist/runtime/store.js +95 -19
- package/dist/runtime/store.js.map +1 -1
- package/dist/safety.js +38 -21
- package/dist/safety.js.map +1 -1
- package/dist/spinner.js +7 -8
- package/dist/spinner.js.map +1 -1
- package/dist/sys/context.js +3 -3
- package/dist/sys/context.js.map +1 -1
- package/dist/term.js +1 -1
- package/dist/term.js.map +1 -1
- package/dist/themes.js +11 -5
- package/dist/themes.js.map +1 -1
- package/dist/tools/tool-error.js +2 -5
- package/dist/tools/tool-error.js.map +1 -1
- package/dist/tools.js +69 -34
- package/dist/tools.js.map +1 -1
- package/dist/tui/branch-picker.js +9 -3
- package/dist/tui/branch-picker.js.map +1 -1
- package/dist/tui/command-handler.js +88 -36
- package/dist/tui/command-handler.js.map +1 -1
- package/dist/tui/confirm.js.map +1 -1
- package/dist/tui/controller.js +234 -117
- package/dist/tui/controller.js.map +1 -1
- package/dist/tui/event-bridge.js.map +1 -1
- package/dist/tui/keymap.js +93 -71
- package/dist/tui/keymap.js.map +1 -1
- package/dist/tui/layout.js +9 -1
- package/dist/tui/layout.js.map +1 -1
- package/dist/tui/render.js +17 -5
- package/dist/tui/render.js.map +1 -1
- package/dist/tui/screen.js.map +1 -1
- package/dist/tui/state.js +129 -63
- package/dist/tui/state.js.map +1 -1
- package/dist/tui/theme.js +12 -3
- package/dist/tui/theme.js.map +1 -1
- package/dist/upgrade.js +28 -15
- package/dist/upgrade.js.map +1 -1
- package/dist/utils.js +8 -5
- package/dist/utils.js.map +1 -1
- package/dist/vault.js +48 -12
- package/dist/vault.js.map +1 -1
- package/dist/vim.js.map +1 -1
- package/package.json +11 -2
package/dist/agent.js
CHANGED
|
@@ -17,6 +17,7 @@ import { LspManager, detectInstalledLspServers } from './lsp.js';
|
|
|
17
17
|
import { generateMinimalDiff, toolResultSummary, execCommandFromSig, formatDurationMs, looksLikePlanningNarration, capTextByApproxTokens, isLikelyBinaryBuffer, sanitizePathsInMessage, digestToolResult, } from './agent/formatting.js';
|
|
18
18
|
import { parseToolCallsFromContent, getMissingRequiredParams, getArgValidationIssues, stripMarkdownFences } from './agent/tool-calls.js';
|
|
19
19
|
import { ToolError, ValidationError } from './tools/tool-error.js';
|
|
20
|
+
import { ToolLoopGuard } from './agent/tool-loop-guard.js';
|
|
20
21
|
export { parseToolCallsFromContent };
|
|
21
22
|
import { reviewArtifactKeys, looksLikeCodeReviewRequest, looksLikeReviewRetrievalRequest, retrievalAllowsStaleArtifact, parseReviewArtifactStalePolicy, parseReviewArtifact, reviewArtifactStaleReason, gitHead, normalizeModelsResponse, } from './agent/review-artifact.js';
|
|
22
23
|
import fs from 'node:fs/promises';
|
|
@@ -27,9 +28,13 @@ function makeAbortController() {
|
|
|
27
28
|
return new AbortController();
|
|
28
29
|
}
|
|
29
30
|
const CACHED_EXEC_OBSERVATION_HINT = '[idlehands hint] Reused cached output for repeated read-only exec call (unchanged observation).';
|
|
30
|
-
const CACHED_READ_OBSERVATION_HINT = '[idlehands hint] Reused cached output for repeated identical read call.';
|
|
31
31
|
function looksLikeReadOnlyExecCommand(command) {
|
|
32
|
-
|
|
32
|
+
// Strip leading `cd <path> &&` / `cd <path>;` prefixes — cd is read-only
|
|
33
|
+
// navigation, the actual command that matters comes after.
|
|
34
|
+
let cmd = String(command || '').trim().toLowerCase();
|
|
35
|
+
if (!cmd)
|
|
36
|
+
return false;
|
|
37
|
+
cmd = cmd.replace(/^(\s*cd\s+[^;&|]+\s*(?:&&|;)\s*)+/i, '').trim();
|
|
33
38
|
if (!cmd)
|
|
34
39
|
return false;
|
|
35
40
|
// Shell redirects are likely writes.
|
|
@@ -99,6 +104,25 @@ function withCachedExecObservationHint(content) {
|
|
|
99
104
|
return `${content}\n${CACHED_EXEC_OBSERVATION_HINT}`;
|
|
100
105
|
}
|
|
101
106
|
}
|
|
107
|
+
const REPLAYED_EXEC_HINT = '[idlehands hint] You already ran this exact command. This is the replayed result from your previous execution. Do NOT re-run it — use the output below to continue your task.';
|
|
108
|
+
function withReplayedExecHint(content) {
|
|
109
|
+
if (!content)
|
|
110
|
+
return content;
|
|
111
|
+
try {
|
|
112
|
+
const parsed = JSON.parse(content);
|
|
113
|
+
const out = typeof parsed?.out === 'string' ? parsed.out : '';
|
|
114
|
+
if (out.includes(REPLAYED_EXEC_HINT))
|
|
115
|
+
return content;
|
|
116
|
+
parsed.out = out ? `${REPLAYED_EXEC_HINT}\n${out}` : REPLAYED_EXEC_HINT;
|
|
117
|
+
parsed.replayed = true;
|
|
118
|
+
return JSON.stringify(parsed);
|
|
119
|
+
}
|
|
120
|
+
catch {
|
|
121
|
+
if (content.includes(REPLAYED_EXEC_HINT))
|
|
122
|
+
return content;
|
|
123
|
+
return `${REPLAYED_EXEC_HINT}\n${content}`;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
102
126
|
function readOnlyExecCacheable(content) {
|
|
103
127
|
try {
|
|
104
128
|
const parsed = JSON.parse(content);
|
|
@@ -109,18 +133,6 @@ function readOnlyExecCacheable(content) {
|
|
|
109
133
|
return false;
|
|
110
134
|
}
|
|
111
135
|
}
|
|
112
|
-
function withCachedReadObservationHint(content) {
|
|
113
|
-
if (!content)
|
|
114
|
-
return CACHED_READ_OBSERVATION_HINT;
|
|
115
|
-
if (content.includes(CACHED_READ_OBSERVATION_HINT))
|
|
116
|
-
return content;
|
|
117
|
-
// Keep cached read replay lightweight to avoid re-inflating context.
|
|
118
|
-
const lines = String(content).split(/\r?\n/);
|
|
119
|
-
const previewLines = lines.slice(0, 12);
|
|
120
|
-
const omitted = Math.max(0, lines.length - previewLines.length);
|
|
121
|
-
const trailer = omitted > 0 ? `\n# ... (${omitted} more lines omitted; use previous identical read result)` : '';
|
|
122
|
-
return `${CACHED_READ_OBSERVATION_HINT}\n${previewLines.join('\n')}${trailer}`;
|
|
123
|
-
}
|
|
124
136
|
function ensureInformativeAssistantText(text, ctx) {
|
|
125
137
|
if (String(text ?? '').trim())
|
|
126
138
|
return text;
|
|
@@ -427,6 +439,11 @@ function buildToolsSchema(opts) {
|
|
|
427
439
|
if (opts?.activeVaultTools) {
|
|
428
440
|
schemas.push({ type: 'function', function: { name: 'vault_search', description: 'Search vault.', parameters: obj({ query: str(), limit: int() }, ['query']) } }, { type: 'function', function: { name: 'vault_note', description: 'Write vault note.', parameters: obj({ key: str(), value: str() }, ['key', 'value']) } });
|
|
429
441
|
}
|
|
442
|
+
else if (opts?.passiveVault) {
|
|
443
|
+
// In passive mode, expose vault_search (read-only) so the model can recover
|
|
444
|
+
// compacted context on demand, but don't expose vault_note (write).
|
|
445
|
+
schemas.push({ type: 'function', function: { name: 'vault_search', description: 'Search vault memory for earlier context that was compacted away. Use sparingly — only when you need to recall specific details from earlier in the conversation.', parameters: obj({ query: str(), limit: int() }, ['query']) } });
|
|
446
|
+
}
|
|
430
447
|
// Phase 9: sys_context tool is only available in sys mode.
|
|
431
448
|
if (opts?.sysMode) {
|
|
432
449
|
schemas.push(SYS_CONTEXT_SCHEMA);
|
|
@@ -671,6 +688,7 @@ export async function createSession(opts) {
|
|
|
671
688
|
let mcpToolsLoaded = !mcpLazySchemaMode;
|
|
672
689
|
const getToolsSchema = () => buildToolsSchema({
|
|
673
690
|
activeVaultTools,
|
|
691
|
+
passiveVault: !activeVaultTools && vaultEnabled && vaultMode === 'passive',
|
|
674
692
|
sysMode: cfg.mode === 'sys',
|
|
675
693
|
lspTools: lspManager?.hasServers() === true,
|
|
676
694
|
mcpTools: mcpToolsLoaded ? (mcpManager?.getEnabledToolSchemas() ?? []) : [],
|
|
@@ -1203,7 +1221,7 @@ export async function createSession(opts) {
|
|
|
1203
1221
|
return 'Vault memory is available. Retrieve prior context with vault_search(query="...") when needed.';
|
|
1204
1222
|
}
|
|
1205
1223
|
if (vaultMode === 'passive') {
|
|
1206
|
-
return 'Vault memory is in passive mode; relevant entries may be auto-injected
|
|
1224
|
+
return 'Vault memory is in passive mode; relevant entries may be auto-injected. You can also use vault_search(query="...") to recover specific earlier context if needed.';
|
|
1207
1225
|
}
|
|
1208
1226
|
return '';
|
|
1209
1227
|
};
|
|
@@ -1260,80 +1278,150 @@ export async function createSession(opts) {
|
|
|
1260
1278
|
content: `${vaultContextHeader} Relevant entries for "${query}":\n${lines.join('\n')}`
|
|
1261
1279
|
});
|
|
1262
1280
|
};
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1281
|
+
let compactionLockTail = Promise.resolve();
|
|
1282
|
+
let compactionStats = {
|
|
1283
|
+
inProgress: false,
|
|
1284
|
+
lockHeld: false,
|
|
1285
|
+
runs: 0,
|
|
1286
|
+
failedRuns: 0,
|
|
1287
|
+
beforeMessages: 0,
|
|
1288
|
+
afterMessages: 0,
|
|
1289
|
+
freedTokens: 0,
|
|
1290
|
+
archivedToolMessages: 0,
|
|
1291
|
+
droppedMessages: 0,
|
|
1292
|
+
dryRun: false,
|
|
1293
|
+
};
|
|
1294
|
+
const runCompactionWithLock = async (reason, runner) => {
|
|
1295
|
+
const prev = compactionLockTail;
|
|
1296
|
+
let release = () => { };
|
|
1297
|
+
compactionLockTail = new Promise((resolve) => {
|
|
1298
|
+
release = () => resolve();
|
|
1299
|
+
});
|
|
1300
|
+
await prev;
|
|
1301
|
+
compactionStats = {
|
|
1302
|
+
...compactionStats,
|
|
1303
|
+
inProgress: true,
|
|
1304
|
+
lockHeld: true,
|
|
1305
|
+
lastReason: reason,
|
|
1306
|
+
lastError: undefined,
|
|
1307
|
+
updatedAt: new Date().toISOString(),
|
|
1308
|
+
// Reset run stats before fresh calculation.
|
|
1309
|
+
beforeMessages: 0,
|
|
1310
|
+
afterMessages: 0,
|
|
1311
|
+
freedTokens: 0,
|
|
1312
|
+
archivedToolMessages: 0,
|
|
1313
|
+
droppedMessages: 0,
|
|
1314
|
+
dryRun: false,
|
|
1315
|
+
};
|
|
1316
|
+
try {
|
|
1317
|
+
const result = await runner();
|
|
1318
|
+
compactionStats = {
|
|
1319
|
+
...compactionStats,
|
|
1320
|
+
...result,
|
|
1321
|
+
inProgress: false,
|
|
1322
|
+
lockHeld: false,
|
|
1323
|
+
runs: compactionStats.runs + 1,
|
|
1324
|
+
lastReason: reason,
|
|
1325
|
+
updatedAt: new Date().toISOString(),
|
|
1326
|
+
};
|
|
1327
|
+
return result;
|
|
1271
1328
|
}
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
}
|
|
1329
|
+
catch (e) {
|
|
1330
|
+
compactionStats = {
|
|
1331
|
+
...compactionStats,
|
|
1332
|
+
inProgress: false,
|
|
1333
|
+
lockHeld: false,
|
|
1334
|
+
failedRuns: compactionStats.failedRuns + 1,
|
|
1335
|
+
lastReason: reason,
|
|
1336
|
+
lastError: e?.message ?? String(e),
|
|
1337
|
+
updatedAt: new Date().toISOString(),
|
|
1338
|
+
};
|
|
1339
|
+
throw e;
|
|
1282
1340
|
}
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
if (opts?.topic) {
|
|
1286
|
-
const topic = opts.topic.toLowerCase();
|
|
1287
|
-
dropped = dropped.filter((m) => !userContentToText(m.content ?? '').toLowerCase().includes(topic));
|
|
1288
|
-
const keepFromTopic = messages.filter((m) => userContentToText(m.content ?? '').toLowerCase().includes(topic));
|
|
1289
|
-
compacted = [...compacted, ...keepFromTopic.filter((m) => !compactedByRefs.has(m))];
|
|
1341
|
+
finally {
|
|
1342
|
+
release();
|
|
1290
1343
|
}
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
const
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1344
|
+
};
|
|
1345
|
+
const compactHistory = async (opts) => {
|
|
1346
|
+
const reason = opts?.reason
|
|
1347
|
+
?? (opts?.hard ? 'manual hard compaction'
|
|
1348
|
+
: opts?.force ? 'manual force compaction'
|
|
1349
|
+
: 'manual compaction');
|
|
1350
|
+
return await runCompactionWithLock(reason, async () => {
|
|
1351
|
+
const beforeMessages = messages.length;
|
|
1352
|
+
const beforeTokens = estimateTokensFromMessages(messages);
|
|
1353
|
+
let compacted;
|
|
1354
|
+
if (opts?.hard) {
|
|
1355
|
+
const sys = messages[0]?.role === 'system' ? [messages[0]] : [];
|
|
1356
|
+
const tail = messages.slice(-2);
|
|
1357
|
+
compacted = [...sys, ...tail];
|
|
1358
|
+
}
|
|
1359
|
+
else {
|
|
1360
|
+
compacted = enforceContextBudget({
|
|
1361
|
+
messages,
|
|
1362
|
+
contextWindow,
|
|
1363
|
+
maxTokens,
|
|
1364
|
+
minTailMessages: opts?.force ? 2 : 12,
|
|
1365
|
+
compactAt: opts?.force ? 0.5 : (cfg.compact_at ?? 0.8),
|
|
1366
|
+
toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
|
|
1367
|
+
force: opts?.force,
|
|
1368
|
+
});
|
|
1369
|
+
}
|
|
1370
|
+
const compactedByRefs = new Set(compacted);
|
|
1371
|
+
let dropped = messages.filter((m) => !compactedByRefs.has(m));
|
|
1372
|
+
if (opts?.topic) {
|
|
1373
|
+
const topic = opts.topic.toLowerCase();
|
|
1374
|
+
dropped = dropped.filter((m) => !userContentToText(m.content ?? '').toLowerCase().includes(topic));
|
|
1375
|
+
const keepFromTopic = messages.filter((m) => userContentToText(m.content ?? '').toLowerCase().includes(topic));
|
|
1376
|
+
compacted = [...compacted, ...keepFromTopic.filter((m) => !compactedByRefs.has(m))];
|
|
1377
|
+
}
|
|
1378
|
+
const archivedToolMessages = dropped.filter((m) => m.role === 'tool').length;
|
|
1379
|
+
const afterMessages = compacted.length;
|
|
1380
|
+
const afterTokens = estimateTokensFromMessages(compacted);
|
|
1381
|
+
const freedTokens = Math.max(0, beforeTokens - afterTokens);
|
|
1382
|
+
if (!opts?.dry) {
|
|
1383
|
+
if (dropped.length && vault) {
|
|
1384
|
+
try {
|
|
1385
|
+
// Store the original/current user prompt before compaction so it survives context loss.
|
|
1386
|
+
let userPromptToPreserve = null;
|
|
1387
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
1388
|
+
const m = messages[i];
|
|
1389
|
+
if (m.role === 'user') {
|
|
1390
|
+
const text = userContentToText((m.content ?? '')).trim();
|
|
1391
|
+
if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
|
|
1392
|
+
userPromptToPreserve = text;
|
|
1393
|
+
break;
|
|
1394
|
+
}
|
|
1307
1395
|
}
|
|
1308
1396
|
}
|
|
1397
|
+
if (userPromptToPreserve) {
|
|
1398
|
+
await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
|
|
1399
|
+
}
|
|
1400
|
+
await vault.archiveToolMessages(dropped, new Map());
|
|
1401
|
+
await vault.note('compaction_summary', `Dropped ${dropped.length} messages (${freedTokens} tokens).`);
|
|
1309
1402
|
}
|
|
1310
|
-
|
|
1311
|
-
|
|
1403
|
+
catch {
|
|
1404
|
+
// best-effort
|
|
1312
1405
|
}
|
|
1313
|
-
await vault.archiveToolMessages(dropped, new Map());
|
|
1314
|
-
await vault.note('compaction_summary', `Dropped ${dropped.length} messages (${freedTokens} tokens).`);
|
|
1315
|
-
}
|
|
1316
|
-
catch {
|
|
1317
|
-
// best-effort
|
|
1318
1406
|
}
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1407
|
+
messages = compacted;
|
|
1408
|
+
if (dropped.length) {
|
|
1409
|
+
messages.push({ role: 'system', content: buildCompactionSystemNote('manual', dropped.length) });
|
|
1410
|
+
await injectVaultContext().catch(() => { });
|
|
1411
|
+
if (opts?.reason || opts?.force) {
|
|
1412
|
+
injectCompactionReminder(opts?.reason ?? 'history compaction');
|
|
1413
|
+
}
|
|
1326
1414
|
}
|
|
1327
1415
|
}
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
};
|
|
1416
|
+
return {
|
|
1417
|
+
beforeMessages,
|
|
1418
|
+
afterMessages,
|
|
1419
|
+
freedTokens,
|
|
1420
|
+
archivedToolMessages,
|
|
1421
|
+
droppedMessages: dropped.length,
|
|
1422
|
+
dryRun: !!opts?.dry,
|
|
1423
|
+
};
|
|
1424
|
+
});
|
|
1337
1425
|
};
|
|
1338
1426
|
const cumulativeUsage = { prompt: 0, completion: 0 };
|
|
1339
1427
|
const turnDurationsMs = [];
|
|
@@ -1342,6 +1430,17 @@ export async function createSession(opts) {
|
|
|
1342
1430
|
const tgSamples = [];
|
|
1343
1431
|
let lastTurnMetrics;
|
|
1344
1432
|
let lastServerHealth;
|
|
1433
|
+
let lastToolLoopStats = { totalHistory: 0, signatures: [], outcomes: [], telemetry: {
|
|
1434
|
+
callsRegistered: 0,
|
|
1435
|
+
dedupedReplays: 0,
|
|
1436
|
+
readCacheLookups: 0,
|
|
1437
|
+
readCacheHits: 0,
|
|
1438
|
+
warnings: 0,
|
|
1439
|
+
criticals: 0,
|
|
1440
|
+
recoveryRecommended: 0,
|
|
1441
|
+
readCacheHitRate: 0,
|
|
1442
|
+
dedupeRate: 0,
|
|
1443
|
+
} };
|
|
1345
1444
|
let lastModelsProbeMs = 0;
|
|
1346
1445
|
const capturesDir = path.join(stateDir(), 'captures');
|
|
1347
1446
|
let captureEnabled = false;
|
|
@@ -1690,6 +1789,10 @@ export async function createSession(opts) {
|
|
|
1690
1789
|
await hookObj.onToolResult?.(result);
|
|
1691
1790
|
await hookManager.emit('tool_result', { askId, turn: turns, result });
|
|
1692
1791
|
};
|
|
1792
|
+
const emitToolLoop = async (loop) => {
|
|
1793
|
+
await hookObj.onToolLoop?.(loop);
|
|
1794
|
+
await hookManager.emit('tool_loop', { askId, turn: turns, loop });
|
|
1795
|
+
};
|
|
1693
1796
|
const emitTurnEnd = async (stats) => {
|
|
1694
1797
|
await hookObj.onTurnEnd?.(stats);
|
|
1695
1798
|
await hookManager.emit('turn_end', { askId, stats });
|
|
@@ -1825,9 +1928,41 @@ export async function createSession(opts) {
|
|
|
1825
1928
|
const blockedExecAttemptsBySig = new Map();
|
|
1826
1929
|
// Cache successful read-only exec observations by exact signature.
|
|
1827
1930
|
const execObservationCacheBySig = new Map();
|
|
1828
|
-
// Cache successful
|
|
1931
|
+
// Cache ALL successful exec results so repeated identical calls under context
|
|
1932
|
+
// pressure can replay the cached result instead of re-executing.
|
|
1933
|
+
const lastExecResultBySig = new Map();
|
|
1934
|
+
// Cache successful read_file/read_files/list_dir results by signature + mtime for invalidation.
|
|
1829
1935
|
const readFileCacheBySig = new Map();
|
|
1830
1936
|
const READ_FILE_CACHE_TOOLS = new Set(['read_file', 'read_files', 'list_dir']);
|
|
1937
|
+
const toolLoopGuard = new ToolLoopGuard({
|
|
1938
|
+
enabled: cfg.tool_loop_detection?.enabled,
|
|
1939
|
+
historySize: cfg.tool_loop_detection?.history_size,
|
|
1940
|
+
warningThreshold: cfg.tool_loop_detection?.warning_threshold,
|
|
1941
|
+
criticalThreshold: cfg.tool_loop_detection?.critical_threshold,
|
|
1942
|
+
globalCircuitBreakerThreshold: cfg.tool_loop_detection?.global_circuit_breaker_threshold,
|
|
1943
|
+
readCacheTtlMs: cfg.tool_loop_detection?.read_cache_ttl_ms,
|
|
1944
|
+
detectors: {
|
|
1945
|
+
genericRepeat: cfg.tool_loop_detection?.detectors?.generic_repeat,
|
|
1946
|
+
knownPollNoProgress: cfg.tool_loop_detection?.detectors?.known_poll_no_progress,
|
|
1947
|
+
pingPong: cfg.tool_loop_detection?.detectors?.ping_pong,
|
|
1948
|
+
},
|
|
1949
|
+
perTool: Object.fromEntries(Object.entries(cfg.tool_loop_detection?.per_tool ?? {}).map(([tool, policy]) => [
|
|
1950
|
+
tool,
|
|
1951
|
+
{
|
|
1952
|
+
warningThreshold: policy?.warning_threshold,
|
|
1953
|
+
criticalThreshold: policy?.critical_threshold,
|
|
1954
|
+
globalCircuitBreakerThreshold: policy?.global_circuit_breaker_threshold,
|
|
1955
|
+
detectors: {
|
|
1956
|
+
genericRepeat: policy?.detectors?.generic_repeat,
|
|
1957
|
+
knownPollNoProgress: policy?.detectors?.known_poll_no_progress,
|
|
1958
|
+
pingPong: policy?.detectors?.ping_pong,
|
|
1959
|
+
},
|
|
1960
|
+
},
|
|
1961
|
+
])),
|
|
1962
|
+
});
|
|
1963
|
+
const toolLoopWarningKeys = new Set();
|
|
1964
|
+
let forceToollessRecoveryTurn = false;
|
|
1965
|
+
let toollessRecoveryUsed = false;
|
|
1831
1966
|
// Prevent repeating the same "stop rerunning" reminder every turn.
|
|
1832
1967
|
const readOnlyExecHintedSigs = new Set();
|
|
1833
1968
|
// Keep a lightweight breadcrumb for diagnostics on partial failures.
|
|
@@ -1950,51 +2085,63 @@ export async function createSession(opts) {
|
|
|
1950
2085
|
throw new Error(`session timeout exceeded (${cfg.timeout}s) after ${wallElapsed.toFixed(1)}s`);
|
|
1951
2086
|
}
|
|
1952
2087
|
await maybeAutoDetectModelChange();
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1971
|
-
|
|
1972
|
-
const
|
|
1973
|
-
|
|
1974
|
-
|
|
1975
|
-
|
|
1976
|
-
|
|
2088
|
+
await runCompactionWithLock('auto context-budget compaction', async () => {
|
|
2089
|
+
const beforeMsgs = messages;
|
|
2090
|
+
const beforeTokens = estimateTokensFromMessages(beforeMsgs);
|
|
2091
|
+
const compacted = enforceContextBudget({
|
|
2092
|
+
messages: beforeMsgs,
|
|
2093
|
+
contextWindow,
|
|
2094
|
+
maxTokens: maxTokens,
|
|
2095
|
+
minTailMessages: 12,
|
|
2096
|
+
compactAt: cfg.compact_at ?? 0.8,
|
|
2097
|
+
toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
|
|
2098
|
+
});
|
|
2099
|
+
const compactedByRefs = new Set(compacted);
|
|
2100
|
+
const dropped = beforeMsgs.filter((m) => !compactedByRefs.has(m));
|
|
2101
|
+
if (dropped.length && vault) {
|
|
2102
|
+
try {
|
|
2103
|
+
// Store the original/current user prompt before compaction so it survives context loss.
|
|
2104
|
+
// Find the last substantive user message that looks like a task/instruction.
|
|
2105
|
+
let userPromptToPreserve = null;
|
|
2106
|
+
for (let i = beforeMsgs.length - 1; i >= 0; i--) {
|
|
2107
|
+
const m = beforeMsgs[i];
|
|
2108
|
+
if (m.role === 'user') {
|
|
2109
|
+
const text = userContentToText((m.content ?? '')).trim();
|
|
2110
|
+
// Skip vault injection messages and short prompts
|
|
2111
|
+
if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
|
|
2112
|
+
userPromptToPreserve = text;
|
|
2113
|
+
break;
|
|
2114
|
+
}
|
|
1977
2115
|
}
|
|
1978
2116
|
}
|
|
2117
|
+
if (userPromptToPreserve) {
|
|
2118
|
+
await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
|
|
2119
|
+
}
|
|
2120
|
+
const toArchive = lens
|
|
2121
|
+
? await Promise.all(dropped.map((m) => archiveToolOutputForVault(m)))
|
|
2122
|
+
: dropped;
|
|
2123
|
+
await vault.archiveToolMessages(toArchive, toolNameByCallId);
|
|
1979
2124
|
}
|
|
1980
|
-
|
|
1981
|
-
|
|
2125
|
+
catch (e) {
|
|
2126
|
+
console.warn(`[warn] vault archive failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
1982
2127
|
}
|
|
1983
|
-
const toArchive = lens
|
|
1984
|
-
? await Promise.all(dropped.map((m) => archiveToolOutputForVault(m)))
|
|
1985
|
-
: dropped;
|
|
1986
|
-
await vault.archiveToolMessages(toArchive, toolNameByCallId);
|
|
1987
2128
|
}
|
|
1988
|
-
|
|
1989
|
-
|
|
2129
|
+
messages = compacted;
|
|
2130
|
+
if (dropped.length) {
|
|
2131
|
+
messages.push({ role: 'system', content: buildCompactionSystemNote('auto', dropped.length) });
|
|
2132
|
+
await injectVaultContext().catch(() => { });
|
|
2133
|
+
injectCompactionReminder('auto context-budget compaction');
|
|
1990
2134
|
}
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
2135
|
+
const afterTokens = estimateTokensFromMessages(compacted);
|
|
2136
|
+
return {
|
|
2137
|
+
beforeMessages: beforeMsgs.length,
|
|
2138
|
+
afterMessages: compacted.length,
|
|
2139
|
+
freedTokens: Math.max(0, beforeTokens - afterTokens),
|
|
2140
|
+
archivedToolMessages: dropped.filter((m) => m.role === 'tool').length,
|
|
2141
|
+
droppedMessages: dropped.length,
|
|
2142
|
+
dryRun: false,
|
|
2143
|
+
};
|
|
2144
|
+
});
|
|
1998
2145
|
const ac = makeAbortController();
|
|
1999
2146
|
inFlight = ac;
|
|
2000
2147
|
// If caller provided an AbortSignal (bench iteration timeout, etc), propagate it.
|
|
@@ -2019,11 +2166,13 @@ export async function createSession(opts) {
|
|
|
2019
2166
|
let resp;
|
|
2020
2167
|
try {
|
|
2021
2168
|
try {
|
|
2169
|
+
const toolsForTurn = forceToollessRecoveryTurn ? [] : getToolsSchema();
|
|
2170
|
+
const toolChoiceForTurn = forceToollessRecoveryTurn ? 'none' : 'auto';
|
|
2022
2171
|
resp = await client.chatStream({
|
|
2023
2172
|
model,
|
|
2024
2173
|
messages,
|
|
2025
|
-
tools:
|
|
2026
|
-
tool_choice:
|
|
2174
|
+
tools: toolsForTurn,
|
|
2175
|
+
tool_choice: toolChoiceForTurn,
|
|
2027
2176
|
temperature,
|
|
2028
2177
|
top_p: topP,
|
|
2029
2178
|
max_tokens: maxTokens,
|
|
@@ -2113,6 +2262,8 @@ export async function createSession(opts) {
|
|
|
2113
2262
|
},
|
|
2114
2263
|
}
|
|
2115
2264
|
: undefined;
|
|
2265
|
+
const wasToollessRecoveryTurn = forceToollessRecoveryTurn;
|
|
2266
|
+
forceToollessRecoveryTurn = false;
|
|
2116
2267
|
const choice0 = resp.choices?.[0] ?? legacyChoice;
|
|
2117
2268
|
const finishReason = choice0?.finish_reason ?? 'unknown';
|
|
2118
2269
|
const msg = choice0?.message;
|
|
@@ -2163,6 +2314,10 @@ export async function createSession(opts) {
|
|
|
2163
2314
|
}
|
|
2164
2315
|
}
|
|
2165
2316
|
}
|
|
2317
|
+
if (wasToollessRecoveryTurn && toolCallsArr?.length) {
|
|
2318
|
+
// Recovery turn explicitly disables tools; ignore any stray tool-call output.
|
|
2319
|
+
toolCallsArr = undefined;
|
|
2320
|
+
}
|
|
2166
2321
|
if (cfg.verbose) {
|
|
2167
2322
|
console.warn(`[turn ${turns}] finish_reason=${finishReason} content_chars=${content.length} visible_chars=${visible.length} tool_calls=${toolCallsArr?.length ?? 0}`);
|
|
2168
2323
|
}
|
|
@@ -2241,12 +2396,17 @@ export async function createSession(opts) {
|
|
|
2241
2396
|
// narration chunk starts on a fresh line (avoids wall-of-text output).
|
|
2242
2397
|
if (visible && hookObj.onToken)
|
|
2243
2398
|
hookObj.onToken('\n');
|
|
2244
|
-
|
|
2399
|
+
const originalToolCallsArr = toolCallsArr;
|
|
2400
|
+
const preparedTurn = toolLoopGuard.prepareTurn(originalToolCallsArr);
|
|
2401
|
+
const replayByCallId = preparedTurn.replayByCallId;
|
|
2402
|
+
const parsedArgsByCallId = preparedTurn.parsedArgsByCallId;
|
|
2403
|
+
toolCallsArr = preparedTurn.uniqueCalls;
|
|
2404
|
+
toolCalls += originalToolCallsArr.length;
|
|
2245
2405
|
const assistantToolCallText = visible || '';
|
|
2246
2406
|
const compactAssistantToolCallText = assistantToolCallText.length > 900
|
|
2247
2407
|
? `${assistantToolCallText.slice(0, 900)}\n[history-compacted: assistant narration truncated before tool execution]`
|
|
2248
2408
|
: assistantToolCallText;
|
|
2249
|
-
messages.push({ role: 'assistant', content: compactAssistantToolCallText, tool_calls:
|
|
2409
|
+
messages.push({ role: 'assistant', content: compactAssistantToolCallText, tool_calls: originalToolCallsArr });
|
|
2250
2410
|
// sigCounts is scoped to the entire ask() run (see above)
|
|
2251
2411
|
// Bridge ConfirmationProvider → legacy confirm callback for tools.
|
|
2252
2412
|
// If a ConfirmationProvider is given, wrap it; otherwise fall back to raw callback.
|
|
@@ -2290,20 +2450,64 @@ export async function createSession(opts) {
|
|
|
2290
2450
|
// We only treat repeated exec as a loop if no file mutations happened since the
|
|
2291
2451
|
// last time we saw that exact exec signature.
|
|
2292
2452
|
const turnSigs = new Set();
|
|
2453
|
+
const sigMetaBySig = new Map();
|
|
2293
2454
|
for (const tc of toolCallsArr) {
|
|
2294
|
-
const
|
|
2455
|
+
const callId = resolveCallId(tc);
|
|
2456
|
+
const parsedArgs = parsedArgsByCallId.get(callId) ?? {};
|
|
2457
|
+
const sig = toolLoopGuard.computeSignature(tc.function.name, parsedArgs);
|
|
2295
2458
|
turnSigs.add(sig);
|
|
2459
|
+
if (!sigMetaBySig.has(sig)) {
|
|
2460
|
+
sigMetaBySig.set(sig, { toolName: tc.function.name, args: parsedArgs });
|
|
2461
|
+
}
|
|
2296
2462
|
}
|
|
2297
2463
|
// Repeated read-only exec calls can be served from cache instead of hard-breaking.
|
|
2298
2464
|
const repeatedReadOnlyExecSigs = new Set();
|
|
2299
2465
|
const readOnlyExecTurnHints = [];
|
|
2466
|
+
// Repeated exec calls (any kind) can replay cached results under pressure.
|
|
2467
|
+
const replayExecSigs = new Set();
|
|
2300
2468
|
// Repeated read_file/read_files/list_dir calls can be served from cache.
|
|
2301
2469
|
const repeatedReadFileSigs = new Set();
|
|
2470
|
+
let shouldForceToollessRecovery = false;
|
|
2471
|
+
const criticalLoopSigs = new Set();
|
|
2472
|
+
for (const tc of toolCallsArr) {
|
|
2473
|
+
const callId = resolveCallId(tc);
|
|
2474
|
+
const args = parsedArgsByCallId.get(callId) ?? {};
|
|
2475
|
+
const detected = toolLoopGuard.detect(tc.function.name, args);
|
|
2476
|
+
const warning = toolLoopGuard.formatWarning(detected, tc.function.name);
|
|
2477
|
+
if (warning) {
|
|
2478
|
+
const warningKey = `${warning.level}:${warning.detector}:${detected.signature}`;
|
|
2479
|
+
if (!toolLoopWarningKeys.has(warningKey)) {
|
|
2480
|
+
toolLoopWarningKeys.add(warningKey);
|
|
2481
|
+
await emitToolLoop({
|
|
2482
|
+
level: warning.level,
|
|
2483
|
+
detector: warning.detector,
|
|
2484
|
+
toolName: warning.toolName,
|
|
2485
|
+
count: warning.count,
|
|
2486
|
+
message: warning.message,
|
|
2487
|
+
});
|
|
2488
|
+
messages.push({
|
|
2489
|
+
role: 'system',
|
|
2490
|
+
content: `[tool-loop ${warning.level}] ${warning.message}. Stop repeating ${warning.toolName} with unchanged inputs; continue with analysis or next step.`,
|
|
2491
|
+
});
|
|
2492
|
+
}
|
|
2493
|
+
}
|
|
2494
|
+
if (toolLoopGuard.shouldDisableToolsNextTurn(detected)) {
|
|
2495
|
+
shouldForceToollessRecovery = true;
|
|
2496
|
+
criticalLoopSigs.add(detected.signature);
|
|
2497
|
+
}
|
|
2498
|
+
}
|
|
2302
2499
|
// Track whether a mutation happened since a given signature was last seen.
|
|
2303
2500
|
// (Tool-loop is single-threaded across turns; this is safe to keep in-memory.)
|
|
2304
2501
|
for (const sig of turnSigs) {
|
|
2305
2502
|
sigCounts.set(sig, (sigCounts.get(sig) ?? 0) + 1);
|
|
2306
|
-
const
|
|
2503
|
+
const sigMeta = sigMetaBySig.get(sig);
|
|
2504
|
+
const toolName = sigMeta?.toolName ?? sig.split(':')[0];
|
|
2505
|
+
if (criticalLoopSigs.has(sig)) {
|
|
2506
|
+
// Critical detector already fired for this signature; recover next turn
|
|
2507
|
+
// with tools disabled instead of throwing in per-tool hard-break logic.
|
|
2508
|
+
shouldForceToollessRecovery = true;
|
|
2509
|
+
continue;
|
|
2510
|
+
}
|
|
2307
2511
|
// For exec loops, only break if nothing changed since last identical exec.
|
|
2308
2512
|
if (toolName === 'exec') {
|
|
2309
2513
|
// If this exact exec signature was seen before, record the mutation version at that time.
|
|
@@ -2314,6 +2518,18 @@ export async function createSession(opts) {
|
|
|
2314
2518
|
mutationVersionBySig.set(sig, mutationVersion);
|
|
2315
2519
|
if (!hasMutatedSince) {
|
|
2316
2520
|
const count = sigCounts.get(sig) ?? 0;
|
|
2521
|
+
// Early replay: if this exact exec was already run (count >= 2) and
|
|
2522
|
+
// we have a cached result, replay it instead of re-executing. This
|
|
2523
|
+
// prevents the compaction death spiral where tool results get dropped,
|
|
2524
|
+
// the model forgets it ran the command, and re-runs it endlessly.
|
|
2525
|
+
// Skip read-only commands that already have their own observation cache —
|
|
2526
|
+
// those are handled by the dedicated read-only path at loopThreshold.
|
|
2527
|
+
const command = execCommandFromSig(sig);
|
|
2528
|
+
const hasReadOnlyCache = looksLikeReadOnlyExecCommand(command) && execObservationCacheBySig.has(sig);
|
|
2529
|
+
if (count >= 2 && lastExecResultBySig.has(sig) && !hasReadOnlyCache) {
|
|
2530
|
+
replayExecSigs.add(sig);
|
|
2531
|
+
continue;
|
|
2532
|
+
}
|
|
2317
2533
|
let loopThreshold = harness.quirks.loopsOnToolError ? 3 : 6;
|
|
2318
2534
|
// If the cached observation already tells the model "no matches found",
|
|
2319
2535
|
// break much earlier — the model is ignoring the hint.
|
|
@@ -2326,7 +2542,8 @@ export async function createSession(opts) {
|
|
|
2326
2542
|
await injectVaultContext().catch(() => { });
|
|
2327
2543
|
}
|
|
2328
2544
|
if (count >= loopThreshold) {
|
|
2329
|
-
const
|
|
2545
|
+
const sigArgs = sigMetaBySig.get(sig)?.args ?? {};
|
|
2546
|
+
const command = typeof sigArgs?.command === 'string' ? String(sigArgs.command) : '';
|
|
2330
2547
|
const canReuseReadOnlyObservation = looksLikeReadOnlyExecCommand(command) &&
|
|
2331
2548
|
execObservationCacheBySig.has(sig);
|
|
2332
2549
|
if (canReuseReadOnlyObservation) {
|
|
@@ -2337,8 +2554,8 @@ export async function createSession(opts) {
|
|
|
2337
2554
|
}
|
|
2338
2555
|
continue;
|
|
2339
2556
|
}
|
|
2340
|
-
const
|
|
2341
|
-
const argsPreview =
|
|
2557
|
+
const argsPreviewRaw = JSON.stringify(sigArgs);
|
|
2558
|
+
const argsPreview = argsPreviewRaw.length > 220 ? argsPreviewRaw.slice(0, 220) + '…' : argsPreviewRaw;
|
|
2342
2559
|
throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
|
|
2343
2560
|
`args=${argsPreview}`);
|
|
2344
2561
|
}
|
|
@@ -2396,23 +2613,30 @@ export async function createSession(opts) {
|
|
|
2396
2613
|
content: `CRITICAL: DO NOT make another identical call for this ${resourceType}. It HAS NOT CHANGED. You already have the content. Move on to the NEXT step NOW.`,
|
|
2397
2614
|
});
|
|
2398
2615
|
}
|
|
2399
|
-
|
|
2616
|
+
const argsForSig = sigMetaBySig.get(sig)?.args ?? {};
|
|
2617
|
+
const replay = await toolLoopGuard.getReadCacheReplay(toolName, argsForSig, ctx.cwd);
|
|
2618
|
+
if (replay) {
|
|
2400
2619
|
repeatedReadFileSigs.add(sig);
|
|
2401
2620
|
continue;
|
|
2402
2621
|
}
|
|
2403
2622
|
}
|
|
2404
|
-
//
|
|
2623
|
+
// Deterministic recovery at threshold (no hard throw): force one no-tools turn.
|
|
2405
2624
|
if (consec >= hardBreakAt) {
|
|
2406
|
-
|
|
2407
|
-
|
|
2625
|
+
shouldForceToollessRecovery = true;
|
|
2626
|
+
messages.push({
|
|
2627
|
+
role: 'system',
|
|
2628
|
+
content: `[tool-loop critical] ${toolName} repeated ${consec}x with unchanged inputs. ` +
|
|
2629
|
+
'Next turn will run with tools disabled so you must use existing results and provide a concrete next step/final response.',
|
|
2630
|
+
});
|
|
2408
2631
|
}
|
|
2409
2632
|
continue;
|
|
2410
2633
|
}
|
|
2411
2634
|
// Default behavior for mutating/other tools: break on repeated identical signature.
|
|
2412
2635
|
const loopThreshold = harness.quirks.loopsOnToolError ? 2 : 3;
|
|
2413
2636
|
if ((sigCounts.get(sig) ?? 0) >= loopThreshold) {
|
|
2414
|
-
const
|
|
2415
|
-
const
|
|
2637
|
+
const argsObj = sigMetaBySig.get(sig)?.args ?? {};
|
|
2638
|
+
const argsRaw = JSON.stringify(argsObj);
|
|
2639
|
+
const argsPreview = argsRaw.length > 220 ? argsRaw.slice(0, 220) + '…' : argsRaw;
|
|
2416
2640
|
throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
|
|
2417
2641
|
`args=${argsPreview}\n` +
|
|
2418
2642
|
`Hint: you repeated the same tool call ${loopThreshold} times with identical arguments. ` +
|
|
@@ -2423,6 +2647,31 @@ export async function createSession(opts) {
|
|
|
2423
2647
|
}
|
|
2424
2648
|
// Update consecutive tracking: save this turn's signatures for next turn comparison.
|
|
2425
2649
|
lastTurnSigs = turnSigs;
|
|
2650
|
+
if (shouldForceToollessRecovery) {
|
|
2651
|
+
if (!toollessRecoveryUsed) {
|
|
2652
|
+
forceToollessRecoveryTurn = true;
|
|
2653
|
+
toollessRecoveryUsed = true;
|
|
2654
|
+
messages.push({
|
|
2655
|
+
role: 'user',
|
|
2656
|
+
content: '[system] Critical tool loop detected. Next turn will run with tools disabled. ' +
|
|
2657
|
+
'Use already available tool results to provide a concrete next step or final response; do not request more tools.',
|
|
2658
|
+
});
|
|
2659
|
+
await emitTurnEnd({
|
|
2660
|
+
turn: turns,
|
|
2661
|
+
toolCalls,
|
|
2662
|
+
promptTokens: cumulativeUsage.prompt,
|
|
2663
|
+
completionTokens: cumulativeUsage.completion,
|
|
2664
|
+
promptTokensTurn,
|
|
2665
|
+
completionTokensTurn,
|
|
2666
|
+
ttftMs,
|
|
2667
|
+
ttcMs,
|
|
2668
|
+
ppTps,
|
|
2669
|
+
tgTps,
|
|
2670
|
+
});
|
|
2671
|
+
continue;
|
|
2672
|
+
}
|
|
2673
|
+
throw new AgentLoopBreak('critical tool-loop persisted after one tools-disabled recovery turn. Stopping to avoid infinite loop.');
|
|
2674
|
+
}
|
|
2426
2675
|
const runOne = async (tc) => {
|
|
2427
2676
|
const name = tc.function.name;
|
|
2428
2677
|
const rawArgs = tc.function.arguments ?? '{}';
|
|
@@ -2452,6 +2701,7 @@ export async function createSession(opts) {
|
|
|
2452
2701
|
throw new Error(`unknown tool: ${name}`);
|
|
2453
2702
|
// Keep parsed args by call-id so we can digest/archive tool outputs with context.
|
|
2454
2703
|
toolArgsByCallId.set(callId, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
|
|
2704
|
+
toolLoopGuard.registerCall(name, args && typeof args === 'object' && !Array.isArray(args) ? args : {}, callId);
|
|
2455
2705
|
// Pre-dispatch argument validation.
|
|
2456
2706
|
// - Required params
|
|
2457
2707
|
// - Type/range/enums
|
|
@@ -2556,7 +2806,7 @@ export async function createSession(opts) {
|
|
|
2556
2806
|
return { id: callId, content: '[skipped by user: step mode]' };
|
|
2557
2807
|
}
|
|
2558
2808
|
}
|
|
2559
|
-
const sig =
|
|
2809
|
+
const sig = toolLoopGuard.computeSignature(name, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
|
|
2560
2810
|
let content = '';
|
|
2561
2811
|
let reusedCachedReadOnlyExec = false;
|
|
2562
2812
|
let reusedCachedReadTool = false;
|
|
@@ -2567,10 +2817,18 @@ export async function createSession(opts) {
|
|
|
2567
2817
|
reusedCachedReadOnlyExec = true;
|
|
2568
2818
|
}
|
|
2569
2819
|
}
|
|
2570
|
-
|
|
2571
|
-
|
|
2820
|
+
// Replay any exec result (read-only or not) when the loop detector flagged it.
|
|
2821
|
+
if (name === 'exec' && !reusedCachedReadOnlyExec && replayExecSigs.has(sig)) {
|
|
2822
|
+
const cached = lastExecResultBySig.get(sig);
|
|
2572
2823
|
if (cached) {
|
|
2573
|
-
content =
|
|
2824
|
+
content = withReplayedExecHint(cached);
|
|
2825
|
+
reusedCachedReadOnlyExec = true; // skip re-execution below
|
|
2826
|
+
}
|
|
2827
|
+
}
|
|
2828
|
+
if (READ_FILE_CACHE_TOOLS.has(name) && repeatedReadFileSigs.has(sig)) {
|
|
2829
|
+
const replay = await toolLoopGuard.getReadCacheReplay(name, args, ctx.cwd);
|
|
2830
|
+
if (replay) {
|
|
2831
|
+
content = replay;
|
|
2574
2832
|
reusedCachedReadTool = true;
|
|
2575
2833
|
}
|
|
2576
2834
|
}
|
|
@@ -2588,11 +2846,15 @@ export async function createSession(opts) {
|
|
|
2588
2846
|
const value = await builtInFn(callCtx, args);
|
|
2589
2847
|
content = typeof value === 'string' ? value : JSON.stringify(value);
|
|
2590
2848
|
if (READ_FILE_CACHE_TOOLS.has(name) && typeof content === 'string' && !content.startsWith('ERROR:')) {
|
|
2591
|
-
|
|
2849
|
+
const baseCwd = typeof args?.cwd === 'string' ? String(args.cwd) : ctx.cwd;
|
|
2850
|
+
await toolLoopGuard.storeReadCache(name, args, baseCwd, content);
|
|
2592
2851
|
}
|
|
2593
2852
|
if (name === 'exec') {
|
|
2594
2853
|
// Successful exec clears blocked-loop counters.
|
|
2595
2854
|
blockedExecAttemptsBySig.clear();
|
|
2855
|
+
// Cache every exec result so repeated calls under context pressure
|
|
2856
|
+
// can replay the result instead of re-executing.
|
|
2857
|
+
lastExecResultBySig.set(sig, content);
|
|
2596
2858
|
const cmd = String(args?.command ?? '');
|
|
2597
2859
|
if (looksLikeReadOnlyExecCommand(cmd) && readOnlyExecCacheable(content)) {
|
|
2598
2860
|
execObservationCacheBySig.set(sig, content);
|
|
@@ -2731,6 +2993,10 @@ export async function createSession(opts) {
|
|
|
2731
2993
|
}
|
|
2732
2994
|
}
|
|
2733
2995
|
}
|
|
2996
|
+
toolLoopGuard.registerOutcome(name, args, {
|
|
2997
|
+
toolCallId: callId,
|
|
2998
|
+
result: content,
|
|
2999
|
+
});
|
|
2734
3000
|
return { id: callId, content };
|
|
2735
3001
|
};
|
|
2736
3002
|
const results = [];
|
|
@@ -2751,7 +3017,8 @@ export async function createSession(opts) {
|
|
|
2751
3017
|
// Applies to direct exec attempts and spawn_task delegation attempts.
|
|
2752
3018
|
if (tc.function.name === 'exec' || tc.function.name === 'spawn_task') {
|
|
2753
3019
|
const blockedMatch = msg.match(/^exec:\s*blocked\s*\(([^)]+)\)\s*without --no-confirm\/--yolo:\s*(.*)$/i)
|
|
2754
|
-
|| msg.match(/^(spawn_task):\s*blocked\s*—\s*(.*)$/i)
|
|
3020
|
+
|| msg.match(/^(spawn_task):\s*blocked\s*—\s*(.*)$/i)
|
|
3021
|
+
|| msg.match(/^exec:\s*blocked\s+(background command\b[^.]*)\./i);
|
|
2755
3022
|
if (blockedMatch) {
|
|
2756
3023
|
const reason = (blockedMatch[1] || blockedMatch[2] || 'blocked command').trim();
|
|
2757
3024
|
let parsedArgs = {};
|
|
@@ -2763,7 +3030,8 @@ export async function createSession(opts) {
|
|
|
2763
3030
|
? String(parsedArgs?.command ?? '')
|
|
2764
3031
|
: String(parsedArgs?.task ?? '');
|
|
2765
3032
|
const normalizedReason = reason.toLowerCase();
|
|
2766
|
-
const aggregateByReason = normalizedReason.includes('package install/remove')
|
|
3033
|
+
const aggregateByReason = normalizedReason.includes('package install/remove')
|
|
3034
|
+
|| normalizedReason.includes('background command');
|
|
2767
3035
|
const sig = aggregateByReason
|
|
2768
3036
|
? `${tc.function.name}|${reason}`
|
|
2769
3037
|
: `${tc.function.name}|${reason}|${cmd}`;
|
|
@@ -2785,6 +3053,20 @@ export async function createSession(opts) {
|
|
|
2785
3053
|
retryable: te.retryable,
|
|
2786
3054
|
result: toolErrorContent,
|
|
2787
3055
|
});
|
|
3056
|
+
let parsedArgs = {};
|
|
3057
|
+
try {
|
|
3058
|
+
const parsed = JSON.parse(tc.function.arguments ?? '{}');
|
|
3059
|
+
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
|
3060
|
+
parsedArgs = parsed;
|
|
3061
|
+
}
|
|
3062
|
+
}
|
|
3063
|
+
catch {
|
|
3064
|
+
// keep empty object
|
|
3065
|
+
}
|
|
3066
|
+
toolLoopGuard.registerOutcome(tc.function.name, parsedArgs, {
|
|
3067
|
+
toolCallId: callId,
|
|
3068
|
+
error: msg,
|
|
3069
|
+
});
|
|
2788
3070
|
return { id: callId, content: toolErrorContent };
|
|
2789
3071
|
};
|
|
2790
3072
|
// ── Anti-scan guardrails (§ read budget, dir scan, same-search) ──
|
|
@@ -2848,6 +3130,19 @@ export async function createSession(opts) {
|
|
|
2848
3130
|
}
|
|
2849
3131
|
}
|
|
2850
3132
|
}
|
|
3133
|
+
if (replayByCallId.size > 0) {
|
|
3134
|
+
const canonicalById = new Map(results.map((r) => [r.id, r.content]));
|
|
3135
|
+
for (const [dupId, canonicalId] of replayByCallId.entries()) {
|
|
3136
|
+
const canonical = canonicalById.get(canonicalId);
|
|
3137
|
+
if (canonical == null)
|
|
3138
|
+
continue;
|
|
3139
|
+
results.push({
|
|
3140
|
+
id: dupId,
|
|
3141
|
+
content: `[idlehands dedupe] Identical tool call replayed from ${canonicalId}. ` +
|
|
3142
|
+
'Use that earlier tool result; no new execution was performed.',
|
|
3143
|
+
});
|
|
3144
|
+
}
|
|
3145
|
+
}
|
|
2851
3146
|
// Bail immediately if cancelled during tool execution
|
|
2852
3147
|
if (ac.signal.aborted)
|
|
2853
3148
|
break;
|
|
@@ -2895,6 +3190,8 @@ export async function createSession(opts) {
|
|
|
2895
3190
|
'Do not narrate. Fix required/mistyped fields and unknown keys.',
|
|
2896
3191
|
});
|
|
2897
3192
|
}
|
|
3193
|
+
// Update session-level tool loop stats for observability
|
|
3194
|
+
lastToolLoopStats = toolLoopGuard.getStats();
|
|
2898
3195
|
// Hook: onTurnEnd (Phase 8.5)
|
|
2899
3196
|
await emitTurnEnd({
|
|
2900
3197
|
turn: turns,
|
|
@@ -3068,6 +3365,7 @@ export async function createSession(opts) {
|
|
|
3068
3365
|
listModels,
|
|
3069
3366
|
refreshServerHealth,
|
|
3070
3367
|
getPerfSummary,
|
|
3368
|
+
getToolLoopStats: () => lastToolLoopStats,
|
|
3071
3369
|
captureOn,
|
|
3072
3370
|
captureOff,
|
|
3073
3371
|
captureLast,
|
|
@@ -3105,6 +3403,9 @@ export async function createSession(opts) {
|
|
|
3105
3403
|
get planSteps() {
|
|
3106
3404
|
return planSteps;
|
|
3107
3405
|
},
|
|
3406
|
+
get compactionStats() {
|
|
3407
|
+
return { ...compactionStats };
|
|
3408
|
+
},
|
|
3108
3409
|
executePlanStep,
|
|
3109
3410
|
clearPlan,
|
|
3110
3411
|
compactHistory
|