@visorcraft/idlehands 1.1.17 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/dist/agent/formatting.js +30 -13
  2. package/dist/agent/formatting.js.map +1 -1
  3. package/dist/agent/review-artifact.js +12 -8
  4. package/dist/agent/review-artifact.js.map +1 -1
  5. package/dist/agent/tool-calls.js +57 -20
  6. package/dist/agent/tool-calls.js.map +1 -1
  7. package/dist/agent/tool-loop-detection.js +310 -0
  8. package/dist/agent/tool-loop-detection.js.map +1 -0
  9. package/dist/agent/tool-loop-guard.js +235 -0
  10. package/dist/agent/tool-loop-guard.js.map +1 -0
  11. package/dist/agent.js +442 -141
  12. package/dist/agent.js.map +1 -1
  13. package/dist/anton/controller.js +46 -30
  14. package/dist/anton/controller.js.map +1 -1
  15. package/dist/anton/lock.js +5 -1
  16. package/dist/anton/lock.js.map +1 -1
  17. package/dist/anton/parser.js +18 -19
  18. package/dist/anton/parser.js.map +1 -1
  19. package/dist/anton/prompt.js +42 -11
  20. package/dist/anton/prompt.js.map +1 -1
  21. package/dist/anton/reporter.js.map +1 -1
  22. package/dist/anton/session.js.map +1 -1
  23. package/dist/anton/verifier.js +3 -5
  24. package/dist/anton/verifier.js.map +1 -1
  25. package/dist/bench/compare.js +53 -20
  26. package/dist/bench/compare.js.map +1 -1
  27. package/dist/bench/openclaw.js +4 -4
  28. package/dist/bench/openclaw.js.map +1 -1
  29. package/dist/bench/report.js +11 -3
  30. package/dist/bench/report.js.map +1 -1
  31. package/dist/bench/runner.js +20 -14
  32. package/dist/bench/runner.js.map +1 -1
  33. package/dist/bot/commands.js +65 -31
  34. package/dist/bot/commands.js.map +1 -1
  35. package/dist/bot/confirm-discord.js +32 -9
  36. package/dist/bot/confirm-discord.js.map +1 -1
  37. package/dist/bot/confirm-telegram.js +26 -10
  38. package/dist/bot/confirm-telegram.js.map +1 -1
  39. package/dist/bot/dir-guard.js +18 -3
  40. package/dist/bot/dir-guard.js.map +1 -1
  41. package/dist/bot/discord-routing.js +28 -4
  42. package/dist/bot/discord-routing.js.map +1 -1
  43. package/dist/bot/discord-streaming.js +3 -3
  44. package/dist/bot/discord-streaming.js.map +1 -1
  45. package/dist/bot/discord.js +82 -37
  46. package/dist/bot/discord.js.map +1 -1
  47. package/dist/bot/escalation.js +124 -0
  48. package/dist/bot/escalation.js.map +1 -0
  49. package/dist/bot/format.js +2 -5
  50. package/dist/bot/format.js.map +1 -1
  51. package/dist/bot/session-manager.js +17 -6
  52. package/dist/bot/session-manager.js.map +1 -1
  53. package/dist/bot/telegram.js +88 -28
  54. package/dist/bot/telegram.js.map +1 -1
  55. package/dist/cli/agent-turn.js +10 -4
  56. package/dist/cli/agent-turn.js.map +1 -1
  57. package/dist/cli/args.js +51 -9
  58. package/dist/cli/args.js.map +1 -1
  59. package/dist/cli/bot.js +19 -9
  60. package/dist/cli/bot.js.map +1 -1
  61. package/dist/cli/build-repl-context.js +60 -26
  62. package/dist/cli/build-repl-context.js.map +1 -1
  63. package/dist/cli/command-registry.js.map +1 -1
  64. package/dist/cli/commands/anton.js +5 -3
  65. package/dist/cli/commands/anton.js.map +1 -1
  66. package/dist/cli/commands/editing.js +27 -12
  67. package/dist/cli/commands/editing.js.map +1 -1
  68. package/dist/cli/commands/model.js +16 -7
  69. package/dist/cli/commands/model.js.map +1 -1
  70. package/dist/cli/commands/project.js +52 -17
  71. package/dist/cli/commands/project.js.map +1 -1
  72. package/dist/cli/commands/runtime.js +1 -1
  73. package/dist/cli/commands/runtime.js.map +1 -1
  74. package/dist/cli/commands/secrets.js +279 -0
  75. package/dist/cli/commands/secrets.js.map +1 -0
  76. package/dist/cli/commands/session.js +49 -1
  77. package/dist/cli/commands/session.js.map +1 -1
  78. package/dist/cli/commands/tools.js +3 -1
  79. package/dist/cli/commands/tools.js.map +1 -1
  80. package/dist/cli/commands/trifecta.js +1 -1
  81. package/dist/cli/commands/trifecta.js.map +1 -1
  82. package/dist/cli/commands/tui.js.map +1 -1
  83. package/dist/cli/init.js +50 -16
  84. package/dist/cli/init.js.map +1 -1
  85. package/dist/cli/input.js +25 -7
  86. package/dist/cli/input.js.map +1 -1
  87. package/dist/cli/oneshot.js +31 -19
  88. package/dist/cli/oneshot.js.map +1 -1
  89. package/dist/cli/repl-dispatch.js +10 -6
  90. package/dist/cli/repl-dispatch.js.map +1 -1
  91. package/dist/cli/runtime-cmds.js +110 -46
  92. package/dist/cli/runtime-cmds.js.map +1 -1
  93. package/dist/cli/service.js +3 -3
  94. package/dist/cli/service.js.map +1 -1
  95. package/dist/cli/session-state.js +12 -5
  96. package/dist/cli/session-state.js.map +1 -1
  97. package/dist/cli/setup.js +86 -33
  98. package/dist/cli/setup.js.map +1 -1
  99. package/dist/cli/shell.js +4 -4
  100. package/dist/cli/shell.js.map +1 -1
  101. package/dist/cli/status.js +56 -12
  102. package/dist/cli/status.js.map +1 -1
  103. package/dist/client.js +40 -21
  104. package/dist/client.js.map +1 -1
  105. package/dist/commands.js +1 -1
  106. package/dist/commands.js.map +1 -1
  107. package/dist/config.js +171 -15
  108. package/dist/config.js.map +1 -1
  109. package/dist/confirm/auto.js.map +1 -1
  110. package/dist/confirm/headless.js +13 -2
  111. package/dist/confirm/headless.js.map +1 -1
  112. package/dist/confirm/terminal.js +1 -5
  113. package/dist/confirm/terminal.js.map +1 -1
  114. package/dist/context.js +9 -3
  115. package/dist/context.js.map +1 -1
  116. package/dist/git.js +56 -61
  117. package/dist/git.js.map +1 -1
  118. package/dist/harnesses.js +137 -37
  119. package/dist/harnesses.js.map +1 -1
  120. package/dist/history.js +12 -4
  121. package/dist/history.js.map +1 -1
  122. package/dist/hooks/index.js +2 -2
  123. package/dist/hooks/index.js.map +1 -1
  124. package/dist/hooks/loader.js +6 -5
  125. package/dist/hooks/loader.js.map +1 -1
  126. package/dist/hooks/manager.js.map +1 -1
  127. package/dist/hooks/plugins/example-console.js.map +1 -1
  128. package/dist/hooks/scaffold.js +8 -6
  129. package/dist/hooks/scaffold.js.map +1 -1
  130. package/dist/index.js +120 -66
  131. package/dist/index.js.map +1 -1
  132. package/dist/indexer.js +6 -18
  133. package/dist/indexer.js.map +1 -1
  134. package/dist/jsonrpc.js.map +1 -1
  135. package/dist/lens.js +38 -16
  136. package/dist/lens.js.map +1 -1
  137. package/dist/lsp.js +60 -24
  138. package/dist/lsp.js.map +1 -1
  139. package/dist/markdown.js +6 -6
  140. package/dist/markdown.js.map +1 -1
  141. package/dist/mcp.js +15 -6
  142. package/dist/mcp.js.map +1 -1
  143. package/dist/model-customization.js +7 -3
  144. package/dist/model-customization.js.map +1 -1
  145. package/dist/progress/message-edit-scheduler.js +15 -3
  146. package/dist/progress/message-edit-scheduler.js.map +1 -1
  147. package/dist/progress/progress-message-renderer.js.map +1 -1
  148. package/dist/progress/progress-presenter.js +3 -3
  149. package/dist/progress/progress-presenter.js.map +1 -1
  150. package/dist/progress/serialize-telegram.js.map +1 -1
  151. package/dist/progress/tool-summary.js +3 -1
  152. package/dist/progress/tool-summary.js.map +1 -1
  153. package/dist/progress/turn-progress.js +3 -1
  154. package/dist/progress/turn-progress.js.map +1 -1
  155. package/dist/recovery.js +11 -3
  156. package/dist/recovery.js.map +1 -1
  157. package/dist/replay.js +9 -3
  158. package/dist/replay.js.map +1 -1
  159. package/dist/replay_cli.js +5 -3
  160. package/dist/replay_cli.js.map +1 -1
  161. package/dist/runtime/executor.js +66 -20
  162. package/dist/runtime/executor.js.map +1 -1
  163. package/dist/runtime/health.js.map +1 -1
  164. package/dist/runtime/host-runner.js +103 -0
  165. package/dist/runtime/host-runner.js.map +1 -0
  166. package/dist/runtime/planner.js +3 -1
  167. package/dist/runtime/planner.js.map +1 -1
  168. package/dist/runtime/secrets.js +102 -0
  169. package/dist/runtime/secrets.js.map +1 -0
  170. package/dist/runtime/store.js +95 -19
  171. package/dist/runtime/store.js.map +1 -1
  172. package/dist/safety.js +38 -21
  173. package/dist/safety.js.map +1 -1
  174. package/dist/spinner.js +7 -8
  175. package/dist/spinner.js.map +1 -1
  176. package/dist/sys/context.js +3 -3
  177. package/dist/sys/context.js.map +1 -1
  178. package/dist/term.js +1 -1
  179. package/dist/term.js.map +1 -1
  180. package/dist/themes.js +11 -5
  181. package/dist/themes.js.map +1 -1
  182. package/dist/tools/tool-error.js +2 -5
  183. package/dist/tools/tool-error.js.map +1 -1
  184. package/dist/tools.js +69 -34
  185. package/dist/tools.js.map +1 -1
  186. package/dist/tui/branch-picker.js +9 -3
  187. package/dist/tui/branch-picker.js.map +1 -1
  188. package/dist/tui/command-handler.js +88 -36
  189. package/dist/tui/command-handler.js.map +1 -1
  190. package/dist/tui/confirm.js.map +1 -1
  191. package/dist/tui/controller.js +234 -117
  192. package/dist/tui/controller.js.map +1 -1
  193. package/dist/tui/event-bridge.js.map +1 -1
  194. package/dist/tui/keymap.js +93 -71
  195. package/dist/tui/keymap.js.map +1 -1
  196. package/dist/tui/layout.js +9 -1
  197. package/dist/tui/layout.js.map +1 -1
  198. package/dist/tui/render.js +17 -5
  199. package/dist/tui/render.js.map +1 -1
  200. package/dist/tui/screen.js.map +1 -1
  201. package/dist/tui/state.js +129 -63
  202. package/dist/tui/state.js.map +1 -1
  203. package/dist/tui/theme.js +12 -3
  204. package/dist/tui/theme.js.map +1 -1
  205. package/dist/upgrade.js +28 -15
  206. package/dist/upgrade.js.map +1 -1
  207. package/dist/utils.js +8 -5
  208. package/dist/utils.js.map +1 -1
  209. package/dist/vault.js +48 -12
  210. package/dist/vault.js.map +1 -1
  211. package/dist/vim.js.map +1 -1
  212. package/package.json +11 -2
package/dist/agent.js CHANGED
@@ -17,6 +17,7 @@ import { LspManager, detectInstalledLspServers } from './lsp.js';
17
17
  import { generateMinimalDiff, toolResultSummary, execCommandFromSig, formatDurationMs, looksLikePlanningNarration, capTextByApproxTokens, isLikelyBinaryBuffer, sanitizePathsInMessage, digestToolResult, } from './agent/formatting.js';
18
18
  import { parseToolCallsFromContent, getMissingRequiredParams, getArgValidationIssues, stripMarkdownFences } from './agent/tool-calls.js';
19
19
  import { ToolError, ValidationError } from './tools/tool-error.js';
20
+ import { ToolLoopGuard } from './agent/tool-loop-guard.js';
20
21
  export { parseToolCallsFromContent };
21
22
  import { reviewArtifactKeys, looksLikeCodeReviewRequest, looksLikeReviewRetrievalRequest, retrievalAllowsStaleArtifact, parseReviewArtifactStalePolicy, parseReviewArtifact, reviewArtifactStaleReason, gitHead, normalizeModelsResponse, } from './agent/review-artifact.js';
22
23
  import fs from 'node:fs/promises';
@@ -27,9 +28,13 @@ function makeAbortController() {
27
28
  return new AbortController();
28
29
  }
29
30
  const CACHED_EXEC_OBSERVATION_HINT = '[idlehands hint] Reused cached output for repeated read-only exec call (unchanged observation).';
30
- const CACHED_READ_OBSERVATION_HINT = '[idlehands hint] Reused cached output for repeated identical read call.';
31
31
  function looksLikeReadOnlyExecCommand(command) {
32
- const cmd = String(command || '').trim().toLowerCase();
32
+ // Strip leading `cd <path> &&` / `cd <path>;` prefixes — cd is read-only
33
+ // navigation, the actual command that matters comes after.
34
+ let cmd = String(command || '').trim().toLowerCase();
35
+ if (!cmd)
36
+ return false;
37
+ cmd = cmd.replace(/^(\s*cd\s+[^;&|]+\s*(?:&&|;)\s*)+/i, '').trim();
33
38
  if (!cmd)
34
39
  return false;
35
40
  // Shell redirects are likely writes.
@@ -99,6 +104,25 @@ function withCachedExecObservationHint(content) {
99
104
  return `${content}\n${CACHED_EXEC_OBSERVATION_HINT}`;
100
105
  }
101
106
  }
107
+ const REPLAYED_EXEC_HINT = '[idlehands hint] You already ran this exact command. This is the replayed result from your previous execution. Do NOT re-run it — use the output below to continue your task.';
108
+ function withReplayedExecHint(content) {
109
+ if (!content)
110
+ return content;
111
+ try {
112
+ const parsed = JSON.parse(content);
113
+ const out = typeof parsed?.out === 'string' ? parsed.out : '';
114
+ if (out.includes(REPLAYED_EXEC_HINT))
115
+ return content;
116
+ parsed.out = out ? `${REPLAYED_EXEC_HINT}\n${out}` : REPLAYED_EXEC_HINT;
117
+ parsed.replayed = true;
118
+ return JSON.stringify(parsed);
119
+ }
120
+ catch {
121
+ if (content.includes(REPLAYED_EXEC_HINT))
122
+ return content;
123
+ return `${REPLAYED_EXEC_HINT}\n${content}`;
124
+ }
125
+ }
102
126
  function readOnlyExecCacheable(content) {
103
127
  try {
104
128
  const parsed = JSON.parse(content);
@@ -109,18 +133,6 @@ function readOnlyExecCacheable(content) {
109
133
  return false;
110
134
  }
111
135
  }
112
- function withCachedReadObservationHint(content) {
113
- if (!content)
114
- return CACHED_READ_OBSERVATION_HINT;
115
- if (content.includes(CACHED_READ_OBSERVATION_HINT))
116
- return content;
117
- // Keep cached read replay lightweight to avoid re-inflating context.
118
- const lines = String(content).split(/\r?\n/);
119
- const previewLines = lines.slice(0, 12);
120
- const omitted = Math.max(0, lines.length - previewLines.length);
121
- const trailer = omitted > 0 ? `\n# ... (${omitted} more lines omitted; use previous identical read result)` : '';
122
- return `${CACHED_READ_OBSERVATION_HINT}\n${previewLines.join('\n')}${trailer}`;
123
- }
124
136
  function ensureInformativeAssistantText(text, ctx) {
125
137
  if (String(text ?? '').trim())
126
138
  return text;
@@ -427,6 +439,11 @@ function buildToolsSchema(opts) {
427
439
  if (opts?.activeVaultTools) {
428
440
  schemas.push({ type: 'function', function: { name: 'vault_search', description: 'Search vault.', parameters: obj({ query: str(), limit: int() }, ['query']) } }, { type: 'function', function: { name: 'vault_note', description: 'Write vault note.', parameters: obj({ key: str(), value: str() }, ['key', 'value']) } });
429
441
  }
442
+ else if (opts?.passiveVault) {
443
+ // In passive mode, expose vault_search (read-only) so the model can recover
444
+ // compacted context on demand, but don't expose vault_note (write).
445
+ schemas.push({ type: 'function', function: { name: 'vault_search', description: 'Search vault memory for earlier context that was compacted away. Use sparingly — only when you need to recall specific details from earlier in the conversation.', parameters: obj({ query: str(), limit: int() }, ['query']) } });
446
+ }
430
447
  // Phase 9: sys_context tool is only available in sys mode.
431
448
  if (opts?.sysMode) {
432
449
  schemas.push(SYS_CONTEXT_SCHEMA);
@@ -671,6 +688,7 @@ export async function createSession(opts) {
671
688
  let mcpToolsLoaded = !mcpLazySchemaMode;
672
689
  const getToolsSchema = () => buildToolsSchema({
673
690
  activeVaultTools,
691
+ passiveVault: !activeVaultTools && vaultEnabled && vaultMode === 'passive',
674
692
  sysMode: cfg.mode === 'sys',
675
693
  lspTools: lspManager?.hasServers() === true,
676
694
  mcpTools: mcpToolsLoaded ? (mcpManager?.getEnabledToolSchemas() ?? []) : [],
@@ -1203,7 +1221,7 @@ export async function createSession(opts) {
1203
1221
  return 'Vault memory is available. Retrieve prior context with vault_search(query="...") when needed.';
1204
1222
  }
1205
1223
  if (vaultMode === 'passive') {
1206
- return 'Vault memory is in passive mode; relevant entries may be auto-injected when available.';
1224
+ return 'Vault memory is in passive mode; relevant entries may be auto-injected. You can also use vault_search(query="...") to recover specific earlier context if needed.';
1207
1225
  }
1208
1226
  return '';
1209
1227
  };
@@ -1260,80 +1278,150 @@ export async function createSession(opts) {
1260
1278
  content: `${vaultContextHeader} Relevant entries for "${query}":\n${lines.join('\n')}`
1261
1279
  });
1262
1280
  };
1263
- const compactHistory = async (opts) => {
1264
- const beforeMessages = messages.length;
1265
- const beforeTokens = estimateTokensFromMessages(messages);
1266
- let compacted;
1267
- if (opts?.hard) {
1268
- const sys = messages[0]?.role === 'system' ? [messages[0]] : [];
1269
- const tail = messages.slice(-2);
1270
- compacted = [...sys, ...tail];
1281
+ let compactionLockTail = Promise.resolve();
1282
+ let compactionStats = {
1283
+ inProgress: false,
1284
+ lockHeld: false,
1285
+ runs: 0,
1286
+ failedRuns: 0,
1287
+ beforeMessages: 0,
1288
+ afterMessages: 0,
1289
+ freedTokens: 0,
1290
+ archivedToolMessages: 0,
1291
+ droppedMessages: 0,
1292
+ dryRun: false,
1293
+ };
1294
+ const runCompactionWithLock = async (reason, runner) => {
1295
+ const prev = compactionLockTail;
1296
+ let release = () => { };
1297
+ compactionLockTail = new Promise((resolve) => {
1298
+ release = () => resolve();
1299
+ });
1300
+ await prev;
1301
+ compactionStats = {
1302
+ ...compactionStats,
1303
+ inProgress: true,
1304
+ lockHeld: true,
1305
+ lastReason: reason,
1306
+ lastError: undefined,
1307
+ updatedAt: new Date().toISOString(),
1308
+ // Reset run stats before fresh calculation.
1309
+ beforeMessages: 0,
1310
+ afterMessages: 0,
1311
+ freedTokens: 0,
1312
+ archivedToolMessages: 0,
1313
+ droppedMessages: 0,
1314
+ dryRun: false,
1315
+ };
1316
+ try {
1317
+ const result = await runner();
1318
+ compactionStats = {
1319
+ ...compactionStats,
1320
+ ...result,
1321
+ inProgress: false,
1322
+ lockHeld: false,
1323
+ runs: compactionStats.runs + 1,
1324
+ lastReason: reason,
1325
+ updatedAt: new Date().toISOString(),
1326
+ };
1327
+ return result;
1271
1328
  }
1272
- else {
1273
- compacted = enforceContextBudget({
1274
- messages,
1275
- contextWindow,
1276
- maxTokens,
1277
- minTailMessages: opts?.force ? 2 : 12,
1278
- compactAt: opts?.force ? 0.5 : (cfg.compact_at ?? 0.8),
1279
- toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
1280
- force: opts?.force,
1281
- });
1329
+ catch (e) {
1330
+ compactionStats = {
1331
+ ...compactionStats,
1332
+ inProgress: false,
1333
+ lockHeld: false,
1334
+ failedRuns: compactionStats.failedRuns + 1,
1335
+ lastReason: reason,
1336
+ lastError: e?.message ?? String(e),
1337
+ updatedAt: new Date().toISOString(),
1338
+ };
1339
+ throw e;
1282
1340
  }
1283
- const compactedByRefs = new Set(compacted);
1284
- let dropped = messages.filter((m) => !compactedByRefs.has(m));
1285
- if (opts?.topic) {
1286
- const topic = opts.topic.toLowerCase();
1287
- dropped = dropped.filter((m) => !userContentToText(m.content ?? '').toLowerCase().includes(topic));
1288
- const keepFromTopic = messages.filter((m) => userContentToText(m.content ?? '').toLowerCase().includes(topic));
1289
- compacted = [...compacted, ...keepFromTopic.filter((m) => !compactedByRefs.has(m))];
1341
+ finally {
1342
+ release();
1290
1343
  }
1291
- const archivedToolMessages = dropped.filter((m) => m.role === 'tool').length;
1292
- const afterMessages = compacted.length;
1293
- const afterTokens = estimateTokensFromMessages(compacted);
1294
- const freedTokens = Math.max(0, beforeTokens - afterTokens);
1295
- if (!opts?.dry) {
1296
- if (dropped.length && vault) {
1297
- try {
1298
- // Store the original/current user prompt before compaction so it survives context loss.
1299
- let userPromptToPreserve = null;
1300
- for (let i = messages.length - 1; i >= 0; i--) {
1301
- const m = messages[i];
1302
- if (m.role === 'user') {
1303
- const text = userContentToText((m.content ?? '')).trim();
1304
- if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
1305
- userPromptToPreserve = text;
1306
- break;
1344
+ };
1345
+ const compactHistory = async (opts) => {
1346
+ const reason = opts?.reason
1347
+ ?? (opts?.hard ? 'manual hard compaction'
1348
+ : opts?.force ? 'manual force compaction'
1349
+ : 'manual compaction');
1350
+ return await runCompactionWithLock(reason, async () => {
1351
+ const beforeMessages = messages.length;
1352
+ const beforeTokens = estimateTokensFromMessages(messages);
1353
+ let compacted;
1354
+ if (opts?.hard) {
1355
+ const sys = messages[0]?.role === 'system' ? [messages[0]] : [];
1356
+ const tail = messages.slice(-2);
1357
+ compacted = [...sys, ...tail];
1358
+ }
1359
+ else {
1360
+ compacted = enforceContextBudget({
1361
+ messages,
1362
+ contextWindow,
1363
+ maxTokens,
1364
+ minTailMessages: opts?.force ? 2 : 12,
1365
+ compactAt: opts?.force ? 0.5 : (cfg.compact_at ?? 0.8),
1366
+ toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
1367
+ force: opts?.force,
1368
+ });
1369
+ }
1370
+ const compactedByRefs = new Set(compacted);
1371
+ let dropped = messages.filter((m) => !compactedByRefs.has(m));
1372
+ if (opts?.topic) {
1373
+ const topic = opts.topic.toLowerCase();
1374
+ dropped = dropped.filter((m) => !userContentToText(m.content ?? '').toLowerCase().includes(topic));
1375
+ const keepFromTopic = messages.filter((m) => userContentToText(m.content ?? '').toLowerCase().includes(topic));
1376
+ compacted = [...compacted, ...keepFromTopic.filter((m) => !compactedByRefs.has(m))];
1377
+ }
1378
+ const archivedToolMessages = dropped.filter((m) => m.role === 'tool').length;
1379
+ const afterMessages = compacted.length;
1380
+ const afterTokens = estimateTokensFromMessages(compacted);
1381
+ const freedTokens = Math.max(0, beforeTokens - afterTokens);
1382
+ if (!opts?.dry) {
1383
+ if (dropped.length && vault) {
1384
+ try {
1385
+ // Store the original/current user prompt before compaction so it survives context loss.
1386
+ let userPromptToPreserve = null;
1387
+ for (let i = messages.length - 1; i >= 0; i--) {
1388
+ const m = messages[i];
1389
+ if (m.role === 'user') {
1390
+ const text = userContentToText((m.content ?? '')).trim();
1391
+ if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
1392
+ userPromptToPreserve = text;
1393
+ break;
1394
+ }
1307
1395
  }
1308
1396
  }
1397
+ if (userPromptToPreserve) {
1398
+ await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
1399
+ }
1400
+ await vault.archiveToolMessages(dropped, new Map());
1401
+ await vault.note('compaction_summary', `Dropped ${dropped.length} messages (${freedTokens} tokens).`);
1309
1402
  }
1310
- if (userPromptToPreserve) {
1311
- await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
1403
+ catch {
1404
+ // best-effort
1312
1405
  }
1313
- await vault.archiveToolMessages(dropped, new Map());
1314
- await vault.note('compaction_summary', `Dropped ${dropped.length} messages (${freedTokens} tokens).`);
1315
- }
1316
- catch {
1317
- // best-effort
1318
1406
  }
1319
- }
1320
- messages = compacted;
1321
- if (dropped.length) {
1322
- messages.push({ role: 'system', content: buildCompactionSystemNote('manual', dropped.length) });
1323
- await injectVaultContext().catch(() => { });
1324
- if (opts?.reason || opts?.force) {
1325
- injectCompactionReminder(opts?.reason ?? 'history compaction');
1407
+ messages = compacted;
1408
+ if (dropped.length) {
1409
+ messages.push({ role: 'system', content: buildCompactionSystemNote('manual', dropped.length) });
1410
+ await injectVaultContext().catch(() => { });
1411
+ if (opts?.reason || opts?.force) {
1412
+ injectCompactionReminder(opts?.reason ?? 'history compaction');
1413
+ }
1326
1414
  }
1327
1415
  }
1328
- }
1329
- return {
1330
- beforeMessages,
1331
- afterMessages,
1332
- freedTokens,
1333
- archivedToolMessages,
1334
- droppedMessages: dropped.length,
1335
- dryRun: !!opts?.dry,
1336
- };
1416
+ return {
1417
+ beforeMessages,
1418
+ afterMessages,
1419
+ freedTokens,
1420
+ archivedToolMessages,
1421
+ droppedMessages: dropped.length,
1422
+ dryRun: !!opts?.dry,
1423
+ };
1424
+ });
1337
1425
  };
1338
1426
  const cumulativeUsage = { prompt: 0, completion: 0 };
1339
1427
  const turnDurationsMs = [];
@@ -1342,6 +1430,17 @@ export async function createSession(opts) {
1342
1430
  const tgSamples = [];
1343
1431
  let lastTurnMetrics;
1344
1432
  let lastServerHealth;
1433
+ let lastToolLoopStats = { totalHistory: 0, signatures: [], outcomes: [], telemetry: {
1434
+ callsRegistered: 0,
1435
+ dedupedReplays: 0,
1436
+ readCacheLookups: 0,
1437
+ readCacheHits: 0,
1438
+ warnings: 0,
1439
+ criticals: 0,
1440
+ recoveryRecommended: 0,
1441
+ readCacheHitRate: 0,
1442
+ dedupeRate: 0,
1443
+ } };
1345
1444
  let lastModelsProbeMs = 0;
1346
1445
  const capturesDir = path.join(stateDir(), 'captures');
1347
1446
  let captureEnabled = false;
@@ -1690,6 +1789,10 @@ export async function createSession(opts) {
1690
1789
  await hookObj.onToolResult?.(result);
1691
1790
  await hookManager.emit('tool_result', { askId, turn: turns, result });
1692
1791
  };
1792
+ const emitToolLoop = async (loop) => {
1793
+ await hookObj.onToolLoop?.(loop);
1794
+ await hookManager.emit('tool_loop', { askId, turn: turns, loop });
1795
+ };
1693
1796
  const emitTurnEnd = async (stats) => {
1694
1797
  await hookObj.onTurnEnd?.(stats);
1695
1798
  await hookManager.emit('turn_end', { askId, stats });
@@ -1825,9 +1928,41 @@ export async function createSession(opts) {
1825
1928
  const blockedExecAttemptsBySig = new Map();
1826
1929
  // Cache successful read-only exec observations by exact signature.
1827
1930
  const execObservationCacheBySig = new Map();
1828
- // Cache successful read_file/read_files/list_dir results by exact signature.
1931
+ // Cache ALL successful exec results so repeated identical calls under context
1932
+ // pressure can replay the cached result instead of re-executing.
1933
+ const lastExecResultBySig = new Map();
1934
+ // Cache successful read_file/read_files/list_dir results by signature + mtime for invalidation.
1829
1935
  const readFileCacheBySig = new Map();
1830
1936
  const READ_FILE_CACHE_TOOLS = new Set(['read_file', 'read_files', 'list_dir']);
1937
+ const toolLoopGuard = new ToolLoopGuard({
1938
+ enabled: cfg.tool_loop_detection?.enabled,
1939
+ historySize: cfg.tool_loop_detection?.history_size,
1940
+ warningThreshold: cfg.tool_loop_detection?.warning_threshold,
1941
+ criticalThreshold: cfg.tool_loop_detection?.critical_threshold,
1942
+ globalCircuitBreakerThreshold: cfg.tool_loop_detection?.global_circuit_breaker_threshold,
1943
+ readCacheTtlMs: cfg.tool_loop_detection?.read_cache_ttl_ms,
1944
+ detectors: {
1945
+ genericRepeat: cfg.tool_loop_detection?.detectors?.generic_repeat,
1946
+ knownPollNoProgress: cfg.tool_loop_detection?.detectors?.known_poll_no_progress,
1947
+ pingPong: cfg.tool_loop_detection?.detectors?.ping_pong,
1948
+ },
1949
+ perTool: Object.fromEntries(Object.entries(cfg.tool_loop_detection?.per_tool ?? {}).map(([tool, policy]) => [
1950
+ tool,
1951
+ {
1952
+ warningThreshold: policy?.warning_threshold,
1953
+ criticalThreshold: policy?.critical_threshold,
1954
+ globalCircuitBreakerThreshold: policy?.global_circuit_breaker_threshold,
1955
+ detectors: {
1956
+ genericRepeat: policy?.detectors?.generic_repeat,
1957
+ knownPollNoProgress: policy?.detectors?.known_poll_no_progress,
1958
+ pingPong: policy?.detectors?.ping_pong,
1959
+ },
1960
+ },
1961
+ ])),
1962
+ });
1963
+ const toolLoopWarningKeys = new Set();
1964
+ let forceToollessRecoveryTurn = false;
1965
+ let toollessRecoveryUsed = false;
1831
1966
  // Prevent repeating the same "stop rerunning" reminder every turn.
1832
1967
  const readOnlyExecHintedSigs = new Set();
1833
1968
  // Keep a lightweight breadcrumb for diagnostics on partial failures.
@@ -1950,51 +2085,63 @@ export async function createSession(opts) {
1950
2085
  throw new Error(`session timeout exceeded (${cfg.timeout}s) after ${wallElapsed.toFixed(1)}s`);
1951
2086
  }
1952
2087
  await maybeAutoDetectModelChange();
1953
- const beforeMsgs = messages;
1954
- const compacted = enforceContextBudget({
1955
- messages: beforeMsgs,
1956
- contextWindow,
1957
- maxTokens: maxTokens,
1958
- minTailMessages: 12,
1959
- compactAt: cfg.compact_at ?? 0.8,
1960
- toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
1961
- });
1962
- const compactedByRefs = new Set(compacted);
1963
- const dropped = beforeMsgs.filter((m) => !compactedByRefs.has(m));
1964
- if (dropped.length && vault) {
1965
- try {
1966
- // Store the original/current user prompt before compaction so it survives context loss.
1967
- // Find the last substantive user message that looks like a task/instruction.
1968
- let userPromptToPreserve = null;
1969
- for (let i = beforeMsgs.length - 1; i >= 0; i--) {
1970
- const m = beforeMsgs[i];
1971
- if (m.role === 'user') {
1972
- const text = userContentToText((m.content ?? '')).trim();
1973
- // Skip vault injection messages and short prompts
1974
- if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
1975
- userPromptToPreserve = text;
1976
- break;
2088
+ await runCompactionWithLock('auto context-budget compaction', async () => {
2089
+ const beforeMsgs = messages;
2090
+ const beforeTokens = estimateTokensFromMessages(beforeMsgs);
2091
+ const compacted = enforceContextBudget({
2092
+ messages: beforeMsgs,
2093
+ contextWindow,
2094
+ maxTokens: maxTokens,
2095
+ minTailMessages: 12,
2096
+ compactAt: cfg.compact_at ?? 0.8,
2097
+ toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
2098
+ });
2099
+ const compactedByRefs = new Set(compacted);
2100
+ const dropped = beforeMsgs.filter((m) => !compactedByRefs.has(m));
2101
+ if (dropped.length && vault) {
2102
+ try {
2103
+ // Store the original/current user prompt before compaction so it survives context loss.
2104
+ // Find the last substantive user message that looks like a task/instruction.
2105
+ let userPromptToPreserve = null;
2106
+ for (let i = beforeMsgs.length - 1; i >= 0; i--) {
2107
+ const m = beforeMsgs[i];
2108
+ if (m.role === 'user') {
2109
+ const text = userContentToText((m.content ?? '')).trim();
2110
+ // Skip vault injection messages and short prompts
2111
+ if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
2112
+ userPromptToPreserve = text;
2113
+ break;
2114
+ }
1977
2115
  }
1978
2116
  }
2117
+ if (userPromptToPreserve) {
2118
+ await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
2119
+ }
2120
+ const toArchive = lens
2121
+ ? await Promise.all(dropped.map((m) => archiveToolOutputForVault(m)))
2122
+ : dropped;
2123
+ await vault.archiveToolMessages(toArchive, toolNameByCallId);
1979
2124
  }
1980
- if (userPromptToPreserve) {
1981
- await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
2125
+ catch (e) {
2126
+ console.warn(`[warn] vault archive failed: ${e instanceof Error ? e.message : String(e)}`);
1982
2127
  }
1983
- const toArchive = lens
1984
- ? await Promise.all(dropped.map((m) => archiveToolOutputForVault(m)))
1985
- : dropped;
1986
- await vault.archiveToolMessages(toArchive, toolNameByCallId);
1987
2128
  }
1988
- catch (e) {
1989
- console.warn(`[warn] vault archive failed: ${e instanceof Error ? e.message : String(e)}`);
2129
+ messages = compacted;
2130
+ if (dropped.length) {
2131
+ messages.push({ role: 'system', content: buildCompactionSystemNote('auto', dropped.length) });
2132
+ await injectVaultContext().catch(() => { });
2133
+ injectCompactionReminder('auto context-budget compaction');
1990
2134
  }
1991
- }
1992
- messages = compacted;
1993
- if (dropped.length) {
1994
- messages.push({ role: 'system', content: buildCompactionSystemNote('auto', dropped.length) });
1995
- await injectVaultContext().catch(() => { });
1996
- injectCompactionReminder('auto context-budget compaction');
1997
- }
2135
+ const afterTokens = estimateTokensFromMessages(compacted);
2136
+ return {
2137
+ beforeMessages: beforeMsgs.length,
2138
+ afterMessages: compacted.length,
2139
+ freedTokens: Math.max(0, beforeTokens - afterTokens),
2140
+ archivedToolMessages: dropped.filter((m) => m.role === 'tool').length,
2141
+ droppedMessages: dropped.length,
2142
+ dryRun: false,
2143
+ };
2144
+ });
1998
2145
  const ac = makeAbortController();
1999
2146
  inFlight = ac;
2000
2147
  // If caller provided an AbortSignal (bench iteration timeout, etc), propagate it.
@@ -2019,11 +2166,13 @@ export async function createSession(opts) {
2019
2166
  let resp;
2020
2167
  try {
2021
2168
  try {
2169
+ const toolsForTurn = forceToollessRecoveryTurn ? [] : getToolsSchema();
2170
+ const toolChoiceForTurn = forceToollessRecoveryTurn ? 'none' : 'auto';
2022
2171
  resp = await client.chatStream({
2023
2172
  model,
2024
2173
  messages,
2025
- tools: getToolsSchema(),
2026
- tool_choice: 'auto',
2174
+ tools: toolsForTurn,
2175
+ tool_choice: toolChoiceForTurn,
2027
2176
  temperature,
2028
2177
  top_p: topP,
2029
2178
  max_tokens: maxTokens,
@@ -2113,6 +2262,8 @@ export async function createSession(opts) {
2113
2262
  },
2114
2263
  }
2115
2264
  : undefined;
2265
+ const wasToollessRecoveryTurn = forceToollessRecoveryTurn;
2266
+ forceToollessRecoveryTurn = false;
2116
2267
  const choice0 = resp.choices?.[0] ?? legacyChoice;
2117
2268
  const finishReason = choice0?.finish_reason ?? 'unknown';
2118
2269
  const msg = choice0?.message;
@@ -2163,6 +2314,10 @@ export async function createSession(opts) {
2163
2314
  }
2164
2315
  }
2165
2316
  }
2317
+ if (wasToollessRecoveryTurn && toolCallsArr?.length) {
2318
+ // Recovery turn explicitly disables tools; ignore any stray tool-call output.
2319
+ toolCallsArr = undefined;
2320
+ }
2166
2321
  if (cfg.verbose) {
2167
2322
  console.warn(`[turn ${turns}] finish_reason=${finishReason} content_chars=${content.length} visible_chars=${visible.length} tool_calls=${toolCallsArr?.length ?? 0}`);
2168
2323
  }
@@ -2241,12 +2396,17 @@ export async function createSession(opts) {
2241
2396
  // narration chunk starts on a fresh line (avoids wall-of-text output).
2242
2397
  if (visible && hookObj.onToken)
2243
2398
  hookObj.onToken('\n');
2244
- toolCalls += toolCallsArr.length;
2399
+ const originalToolCallsArr = toolCallsArr;
2400
+ const preparedTurn = toolLoopGuard.prepareTurn(originalToolCallsArr);
2401
+ const replayByCallId = preparedTurn.replayByCallId;
2402
+ const parsedArgsByCallId = preparedTurn.parsedArgsByCallId;
2403
+ toolCallsArr = preparedTurn.uniqueCalls;
2404
+ toolCalls += originalToolCallsArr.length;
2245
2405
  const assistantToolCallText = visible || '';
2246
2406
  const compactAssistantToolCallText = assistantToolCallText.length > 900
2247
2407
  ? `${assistantToolCallText.slice(0, 900)}\n[history-compacted: assistant narration truncated before tool execution]`
2248
2408
  : assistantToolCallText;
2249
- messages.push({ role: 'assistant', content: compactAssistantToolCallText, tool_calls: toolCallsArr });
2409
+ messages.push({ role: 'assistant', content: compactAssistantToolCallText, tool_calls: originalToolCallsArr });
2250
2410
  // sigCounts is scoped to the entire ask() run (see above)
2251
2411
  // Bridge ConfirmationProvider → legacy confirm callback for tools.
2252
2412
  // If a ConfirmationProvider is given, wrap it; otherwise fall back to raw callback.
@@ -2290,20 +2450,64 @@ export async function createSession(opts) {
2290
2450
  // We only treat repeated exec as a loop if no file mutations happened since the
2291
2451
  // last time we saw that exact exec signature.
2292
2452
  const turnSigs = new Set();
2453
+ const sigMetaBySig = new Map();
2293
2454
  for (const tc of toolCallsArr) {
2294
- const sig = `${tc.function.name}:${tc.function.arguments ?? '{}'}`;
2455
+ const callId = resolveCallId(tc);
2456
+ const parsedArgs = parsedArgsByCallId.get(callId) ?? {};
2457
+ const sig = toolLoopGuard.computeSignature(tc.function.name, parsedArgs);
2295
2458
  turnSigs.add(sig);
2459
+ if (!sigMetaBySig.has(sig)) {
2460
+ sigMetaBySig.set(sig, { toolName: tc.function.name, args: parsedArgs });
2461
+ }
2296
2462
  }
2297
2463
  // Repeated read-only exec calls can be served from cache instead of hard-breaking.
2298
2464
  const repeatedReadOnlyExecSigs = new Set();
2299
2465
  const readOnlyExecTurnHints = [];
2466
+ // Repeated exec calls (any kind) can replay cached results under pressure.
2467
+ const replayExecSigs = new Set();
2300
2468
  // Repeated read_file/read_files/list_dir calls can be served from cache.
2301
2469
  const repeatedReadFileSigs = new Set();
2470
+ let shouldForceToollessRecovery = false;
2471
+ const criticalLoopSigs = new Set();
2472
+ for (const tc of toolCallsArr) {
2473
+ const callId = resolveCallId(tc);
2474
+ const args = parsedArgsByCallId.get(callId) ?? {};
2475
+ const detected = toolLoopGuard.detect(tc.function.name, args);
2476
+ const warning = toolLoopGuard.formatWarning(detected, tc.function.name);
2477
+ if (warning) {
2478
+ const warningKey = `${warning.level}:${warning.detector}:${detected.signature}`;
2479
+ if (!toolLoopWarningKeys.has(warningKey)) {
2480
+ toolLoopWarningKeys.add(warningKey);
2481
+ await emitToolLoop({
2482
+ level: warning.level,
2483
+ detector: warning.detector,
2484
+ toolName: warning.toolName,
2485
+ count: warning.count,
2486
+ message: warning.message,
2487
+ });
2488
+ messages.push({
2489
+ role: 'system',
2490
+ content: `[tool-loop ${warning.level}] ${warning.message}. Stop repeating ${warning.toolName} with unchanged inputs; continue with analysis or next step.`,
2491
+ });
2492
+ }
2493
+ }
2494
+ if (toolLoopGuard.shouldDisableToolsNextTurn(detected)) {
2495
+ shouldForceToollessRecovery = true;
2496
+ criticalLoopSigs.add(detected.signature);
2497
+ }
2498
+ }
2302
2499
  // Track whether a mutation happened since a given signature was last seen.
2303
2500
  // (Tool-loop is single-threaded across turns; this is safe to keep in-memory.)
2304
2501
  for (const sig of turnSigs) {
2305
2502
  sigCounts.set(sig, (sigCounts.get(sig) ?? 0) + 1);
2306
- const toolName = sig.split(':')[0];
2503
+ const sigMeta = sigMetaBySig.get(sig);
2504
+ const toolName = sigMeta?.toolName ?? sig.split(':')[0];
2505
+ if (criticalLoopSigs.has(sig)) {
2506
+ // Critical detector already fired for this signature; recover next turn
2507
+ // with tools disabled instead of throwing in per-tool hard-break logic.
2508
+ shouldForceToollessRecovery = true;
2509
+ continue;
2510
+ }
2307
2511
  // For exec loops, only break if nothing changed since last identical exec.
2308
2512
  if (toolName === 'exec') {
2309
2513
  // If this exact exec signature was seen before, record the mutation version at that time.
@@ -2314,6 +2518,18 @@ export async function createSession(opts) {
2314
2518
  mutationVersionBySig.set(sig, mutationVersion);
2315
2519
  if (!hasMutatedSince) {
2316
2520
  const count = sigCounts.get(sig) ?? 0;
2521
+ // Early replay: if this exact exec was already run (count >= 2) and
2522
+ // we have a cached result, replay it instead of re-executing. This
2523
+ // prevents the compaction death spiral where tool results get dropped,
2524
+ // the model forgets it ran the command, and re-runs it endlessly.
2525
+ // Skip read-only commands that already have their own observation cache —
2526
+ // those are handled by the dedicated read-only path at loopThreshold.
2527
+ const command = execCommandFromSig(sig);
2528
+ const hasReadOnlyCache = looksLikeReadOnlyExecCommand(command) && execObservationCacheBySig.has(sig);
2529
+ if (count >= 2 && lastExecResultBySig.has(sig) && !hasReadOnlyCache) {
2530
+ replayExecSigs.add(sig);
2531
+ continue;
2532
+ }
2317
2533
  let loopThreshold = harness.quirks.loopsOnToolError ? 3 : 6;
2318
2534
  // If the cached observation already tells the model "no matches found",
2319
2535
  // break much earlier — the model is ignoring the hint.
@@ -2326,7 +2542,8 @@ export async function createSession(opts) {
2326
2542
  await injectVaultContext().catch(() => { });
2327
2543
  }
2328
2544
  if (count >= loopThreshold) {
2329
- const command = execCommandFromSig(sig);
2545
+ const sigArgs = sigMetaBySig.get(sig)?.args ?? {};
2546
+ const command = typeof sigArgs?.command === 'string' ? String(sigArgs.command) : '';
2330
2547
  const canReuseReadOnlyObservation = looksLikeReadOnlyExecCommand(command) &&
2331
2548
  execObservationCacheBySig.has(sig);
2332
2549
  if (canReuseReadOnlyObservation) {
@@ -2337,8 +2554,8 @@ export async function createSession(opts) {
2337
2554
  }
2338
2555
  continue;
2339
2556
  }
2340
- const args = sig.slice(toolName.length + 1);
2341
- const argsPreview = args.length > 220 ? args.slice(0, 220) + '…' : args;
2557
+ const argsPreviewRaw = JSON.stringify(sigArgs);
2558
+ const argsPreview = argsPreviewRaw.length > 220 ? argsPreviewRaw.slice(0, 220) + '…' : argsPreviewRaw;
2342
2559
  throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
2343
2560
  `args=${argsPreview}`);
2344
2561
  }
@@ -2396,23 +2613,30 @@ export async function createSession(opts) {
2396
2613
  content: `CRITICAL: DO NOT make another identical call for this ${resourceType}. It HAS NOT CHANGED. You already have the content. Move on to the NEXT step NOW.`,
2397
2614
  });
2398
2615
  }
2399
- if (readFileCacheBySig.has(sig)) {
2616
+ const argsForSig = sigMetaBySig.get(sig)?.args ?? {};
2617
+ const replay = await toolLoopGuard.getReadCacheReplay(toolName, argsForSig, ctx.cwd);
2618
+ if (replay) {
2400
2619
  repeatedReadFileSigs.add(sig);
2401
2620
  continue;
2402
2621
  }
2403
2622
  }
2404
- // Hard-break at threshold
2623
+ // Deterministic recovery at threshold (no hard throw): force one no-tools turn.
2405
2624
  if (consec >= hardBreakAt) {
2406
- throw new Error(`tool ${toolName}: identical read repeated ${consec}x consecutively; breaking loop. ` +
2407
- `The resource content has not changed between reads.`);
2625
+ shouldForceToollessRecovery = true;
2626
+ messages.push({
2627
+ role: 'system',
2628
+ content: `[tool-loop critical] ${toolName} repeated ${consec}x with unchanged inputs. ` +
2629
+ 'Next turn will run with tools disabled so you must use existing results and provide a concrete next step/final response.',
2630
+ });
2408
2631
  }
2409
2632
  continue;
2410
2633
  }
2411
2634
  // Default behavior for mutating/other tools: break on repeated identical signature.
2412
2635
  const loopThreshold = harness.quirks.loopsOnToolError ? 2 : 3;
2413
2636
  if ((sigCounts.get(sig) ?? 0) >= loopThreshold) {
2414
- const args = sig.slice(toolName.length + 1);
2415
- const argsPreview = args.length > 220 ? args.slice(0, 220) + '…' : args;
2637
+ const argsObj = sigMetaBySig.get(sig)?.args ?? {};
2638
+ const argsRaw = JSON.stringify(argsObj);
2639
+ const argsPreview = argsRaw.length > 220 ? argsRaw.slice(0, 220) + '…' : argsRaw;
2416
2640
  throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
2417
2641
  `args=${argsPreview}\n` +
2418
2642
  `Hint: you repeated the same tool call ${loopThreshold} times with identical arguments. ` +
@@ -2423,6 +2647,31 @@ export async function createSession(opts) {
2423
2647
  }
2424
2648
  // Update consecutive tracking: save this turn's signatures for next turn comparison.
2425
2649
  lastTurnSigs = turnSigs;
2650
+ if (shouldForceToollessRecovery) {
2651
+ if (!toollessRecoveryUsed) {
2652
+ forceToollessRecoveryTurn = true;
2653
+ toollessRecoveryUsed = true;
2654
+ messages.push({
2655
+ role: 'user',
2656
+ content: '[system] Critical tool loop detected. Next turn will run with tools disabled. ' +
2657
+ 'Use already available tool results to provide a concrete next step or final response; do not request more tools.',
2658
+ });
2659
+ await emitTurnEnd({
2660
+ turn: turns,
2661
+ toolCalls,
2662
+ promptTokens: cumulativeUsage.prompt,
2663
+ completionTokens: cumulativeUsage.completion,
2664
+ promptTokensTurn,
2665
+ completionTokensTurn,
2666
+ ttftMs,
2667
+ ttcMs,
2668
+ ppTps,
2669
+ tgTps,
2670
+ });
2671
+ continue;
2672
+ }
2673
+ throw new AgentLoopBreak('critical tool-loop persisted after one tools-disabled recovery turn. Stopping to avoid infinite loop.');
2674
+ }
2426
2675
  const runOne = async (tc) => {
2427
2676
  const name = tc.function.name;
2428
2677
  const rawArgs = tc.function.arguments ?? '{}';
@@ -2452,6 +2701,7 @@ export async function createSession(opts) {
2452
2701
  throw new Error(`unknown tool: ${name}`);
2453
2702
  // Keep parsed args by call-id so we can digest/archive tool outputs with context.
2454
2703
  toolArgsByCallId.set(callId, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
2704
+ toolLoopGuard.registerCall(name, args && typeof args === 'object' && !Array.isArray(args) ? args : {}, callId);
2455
2705
  // Pre-dispatch argument validation.
2456
2706
  // - Required params
2457
2707
  // - Type/range/enums
@@ -2556,7 +2806,7 @@ export async function createSession(opts) {
2556
2806
  return { id: callId, content: '[skipped by user: step mode]' };
2557
2807
  }
2558
2808
  }
2559
- const sig = `${name}:${rawArgs || '{}'}`;
2809
+ const sig = toolLoopGuard.computeSignature(name, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
2560
2810
  let content = '';
2561
2811
  let reusedCachedReadOnlyExec = false;
2562
2812
  let reusedCachedReadTool = false;
@@ -2567,10 +2817,18 @@ export async function createSession(opts) {
2567
2817
  reusedCachedReadOnlyExec = true;
2568
2818
  }
2569
2819
  }
2570
- if (READ_FILE_CACHE_TOOLS.has(name) && repeatedReadFileSigs.has(sig)) {
2571
- const cached = readFileCacheBySig.get(sig);
2820
+ // Replay any exec result (read-only or not) when the loop detector flagged it.
2821
+ if (name === 'exec' && !reusedCachedReadOnlyExec && replayExecSigs.has(sig)) {
2822
+ const cached = lastExecResultBySig.get(sig);
2572
2823
  if (cached) {
2573
- content = withCachedReadObservationHint(cached);
2824
+ content = withReplayedExecHint(cached);
2825
+ reusedCachedReadOnlyExec = true; // skip re-execution below
2826
+ }
2827
+ }
2828
+ if (READ_FILE_CACHE_TOOLS.has(name) && repeatedReadFileSigs.has(sig)) {
2829
+ const replay = await toolLoopGuard.getReadCacheReplay(name, args, ctx.cwd);
2830
+ if (replay) {
2831
+ content = replay;
2574
2832
  reusedCachedReadTool = true;
2575
2833
  }
2576
2834
  }
@@ -2588,11 +2846,15 @@ export async function createSession(opts) {
2588
2846
  const value = await builtInFn(callCtx, args);
2589
2847
  content = typeof value === 'string' ? value : JSON.stringify(value);
2590
2848
  if (READ_FILE_CACHE_TOOLS.has(name) && typeof content === 'string' && !content.startsWith('ERROR:')) {
2591
- readFileCacheBySig.set(sig, content);
2849
+ const baseCwd = typeof args?.cwd === 'string' ? String(args.cwd) : ctx.cwd;
2850
+ await toolLoopGuard.storeReadCache(name, args, baseCwd, content);
2592
2851
  }
2593
2852
  if (name === 'exec') {
2594
2853
  // Successful exec clears blocked-loop counters.
2595
2854
  blockedExecAttemptsBySig.clear();
2855
+ // Cache every exec result so repeated calls under context pressure
2856
+ // can replay the result instead of re-executing.
2857
+ lastExecResultBySig.set(sig, content);
2596
2858
  const cmd = String(args?.command ?? '');
2597
2859
  if (looksLikeReadOnlyExecCommand(cmd) && readOnlyExecCacheable(content)) {
2598
2860
  execObservationCacheBySig.set(sig, content);
@@ -2731,6 +2993,10 @@ export async function createSession(opts) {
2731
2993
  }
2732
2994
  }
2733
2995
  }
2996
+ toolLoopGuard.registerOutcome(name, args, {
2997
+ toolCallId: callId,
2998
+ result: content,
2999
+ });
2734
3000
  return { id: callId, content };
2735
3001
  };
2736
3002
  const results = [];
@@ -2751,7 +3017,8 @@ export async function createSession(opts) {
2751
3017
  // Applies to direct exec attempts and spawn_task delegation attempts.
2752
3018
  if (tc.function.name === 'exec' || tc.function.name === 'spawn_task') {
2753
3019
  const blockedMatch = msg.match(/^exec:\s*blocked\s*\(([^)]+)\)\s*without --no-confirm\/--yolo:\s*(.*)$/i)
2754
- || msg.match(/^(spawn_task):\s*blocked\s*—\s*(.*)$/i);
3020
+ || msg.match(/^(spawn_task):\s*blocked\s*—\s*(.*)$/i)
3021
+ || msg.match(/^exec:\s*blocked\s+(background command\b[^.]*)\./i);
2755
3022
  if (blockedMatch) {
2756
3023
  const reason = (blockedMatch[1] || blockedMatch[2] || 'blocked command').trim();
2757
3024
  let parsedArgs = {};
@@ -2763,7 +3030,8 @@ export async function createSession(opts) {
2763
3030
  ? String(parsedArgs?.command ?? '')
2764
3031
  : String(parsedArgs?.task ?? '');
2765
3032
  const normalizedReason = reason.toLowerCase();
2766
- const aggregateByReason = normalizedReason.includes('package install/remove');
3033
+ const aggregateByReason = normalizedReason.includes('package install/remove')
3034
+ || normalizedReason.includes('background command');
2767
3035
  const sig = aggregateByReason
2768
3036
  ? `${tc.function.name}|${reason}`
2769
3037
  : `${tc.function.name}|${reason}|${cmd}`;
@@ -2785,6 +3053,20 @@ export async function createSession(opts) {
2785
3053
  retryable: te.retryable,
2786
3054
  result: toolErrorContent,
2787
3055
  });
3056
+ let parsedArgs = {};
3057
+ try {
3058
+ const parsed = JSON.parse(tc.function.arguments ?? '{}');
3059
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
3060
+ parsedArgs = parsed;
3061
+ }
3062
+ }
3063
+ catch {
3064
+ // keep empty object
3065
+ }
3066
+ toolLoopGuard.registerOutcome(tc.function.name, parsedArgs, {
3067
+ toolCallId: callId,
3068
+ error: msg,
3069
+ });
2788
3070
  return { id: callId, content: toolErrorContent };
2789
3071
  };
2790
3072
  // ── Anti-scan guardrails (§ read budget, dir scan, same-search) ──
@@ -2848,6 +3130,19 @@ export async function createSession(opts) {
2848
3130
  }
2849
3131
  }
2850
3132
  }
3133
+ if (replayByCallId.size > 0) {
3134
+ const canonicalById = new Map(results.map((r) => [r.id, r.content]));
3135
+ for (const [dupId, canonicalId] of replayByCallId.entries()) {
3136
+ const canonical = canonicalById.get(canonicalId);
3137
+ if (canonical == null)
3138
+ continue;
3139
+ results.push({
3140
+ id: dupId,
3141
+ content: `[idlehands dedupe] Identical tool call replayed from ${canonicalId}. ` +
3142
+ 'Use that earlier tool result; no new execution was performed.',
3143
+ });
3144
+ }
3145
+ }
2851
3146
  // Bail immediately if cancelled during tool execution
2852
3147
  if (ac.signal.aborted)
2853
3148
  break;
@@ -2895,6 +3190,8 @@ export async function createSession(opts) {
2895
3190
  'Do not narrate. Fix required/mistyped fields and unknown keys.',
2896
3191
  });
2897
3192
  }
3193
+ // Update session-level tool loop stats for observability
3194
+ lastToolLoopStats = toolLoopGuard.getStats();
2898
3195
  // Hook: onTurnEnd (Phase 8.5)
2899
3196
  await emitTurnEnd({
2900
3197
  turn: turns,
@@ -3068,6 +3365,7 @@ export async function createSession(opts) {
3068
3365
  listModels,
3069
3366
  refreshServerHealth,
3070
3367
  getPerfSummary,
3368
+ getToolLoopStats: () => lastToolLoopStats,
3071
3369
  captureOn,
3072
3370
  captureOff,
3073
3371
  captureLast,
@@ -3105,6 +3403,9 @@ export async function createSession(opts) {
3105
3403
  get planSteps() {
3106
3404
  return planSteps;
3107
3405
  },
3406
+ get compactionStats() {
3407
+ return { ...compactionStats };
3408
+ },
3108
3409
  executePlanStep,
3109
3410
  clearPlan,
3110
3411
  compactHistory