@visorcraft/idlehands 1.1.17 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/dist/agent/formatting.js +30 -13
  2. package/dist/agent/formatting.js.map +1 -1
  3. package/dist/agent/review-artifact.js +12 -8
  4. package/dist/agent/review-artifact.js.map +1 -1
  5. package/dist/agent/tool-calls.js +57 -20
  6. package/dist/agent/tool-calls.js.map +1 -1
  7. package/dist/agent/tool-loop-detection.js +310 -0
  8. package/dist/agent/tool-loop-detection.js.map +1 -0
  9. package/dist/agent/tool-loop-guard.js +251 -0
  10. package/dist/agent/tool-loop-guard.js.map +1 -0
  11. package/dist/agent.js +460 -144
  12. package/dist/agent.js.map +1 -1
  13. package/dist/anton/controller.js +46 -30
  14. package/dist/anton/controller.js.map +1 -1
  15. package/dist/anton/lock.js +5 -1
  16. package/dist/anton/lock.js.map +1 -1
  17. package/dist/anton/parser.js +18 -19
  18. package/dist/anton/parser.js.map +1 -1
  19. package/dist/anton/prompt.js +42 -11
  20. package/dist/anton/prompt.js.map +1 -1
  21. package/dist/anton/reporter.js.map +1 -1
  22. package/dist/anton/session.js.map +1 -1
  23. package/dist/anton/verifier.js +3 -5
  24. package/dist/anton/verifier.js.map +1 -1
  25. package/dist/bench/compare.js +53 -20
  26. package/dist/bench/compare.js.map +1 -1
  27. package/dist/bench/openclaw.js +4 -4
  28. package/dist/bench/openclaw.js.map +1 -1
  29. package/dist/bench/report.js +11 -3
  30. package/dist/bench/report.js.map +1 -1
  31. package/dist/bench/runner.js +20 -14
  32. package/dist/bench/runner.js.map +1 -1
  33. package/dist/bot/commands.js +65 -31
  34. package/dist/bot/commands.js.map +1 -1
  35. package/dist/bot/confirm-discord.js +32 -9
  36. package/dist/bot/confirm-discord.js.map +1 -1
  37. package/dist/bot/confirm-telegram.js +26 -10
  38. package/dist/bot/confirm-telegram.js.map +1 -1
  39. package/dist/bot/dir-guard.js +18 -3
  40. package/dist/bot/dir-guard.js.map +1 -1
  41. package/dist/bot/discord-routing.js +28 -4
  42. package/dist/bot/discord-routing.js.map +1 -1
  43. package/dist/bot/discord-streaming.js +3 -3
  44. package/dist/bot/discord-streaming.js.map +1 -1
  45. package/dist/bot/discord.js +82 -37
  46. package/dist/bot/discord.js.map +1 -1
  47. package/dist/bot/escalation.js +124 -0
  48. package/dist/bot/escalation.js.map +1 -0
  49. package/dist/bot/format.js +2 -5
  50. package/dist/bot/format.js.map +1 -1
  51. package/dist/bot/session-manager.js +17 -6
  52. package/dist/bot/session-manager.js.map +1 -1
  53. package/dist/bot/telegram.js +88 -28
  54. package/dist/bot/telegram.js.map +1 -1
  55. package/dist/cli/agent-turn.js +10 -4
  56. package/dist/cli/agent-turn.js.map +1 -1
  57. package/dist/cli/args.js +51 -9
  58. package/dist/cli/args.js.map +1 -1
  59. package/dist/cli/bot.js +19 -9
  60. package/dist/cli/bot.js.map +1 -1
  61. package/dist/cli/build-repl-context.js +60 -26
  62. package/dist/cli/build-repl-context.js.map +1 -1
  63. package/dist/cli/command-registry.js.map +1 -1
  64. package/dist/cli/commands/anton.js +5 -3
  65. package/dist/cli/commands/anton.js.map +1 -1
  66. package/dist/cli/commands/editing.js +27 -12
  67. package/dist/cli/commands/editing.js.map +1 -1
  68. package/dist/cli/commands/model.js +16 -7
  69. package/dist/cli/commands/model.js.map +1 -1
  70. package/dist/cli/commands/project.js +52 -17
  71. package/dist/cli/commands/project.js.map +1 -1
  72. package/dist/cli/commands/runtime.js +1 -1
  73. package/dist/cli/commands/runtime.js.map +1 -1
  74. package/dist/cli/commands/secrets.js +279 -0
  75. package/dist/cli/commands/secrets.js.map +1 -0
  76. package/dist/cli/commands/session.js +49 -1
  77. package/dist/cli/commands/session.js.map +1 -1
  78. package/dist/cli/commands/tools.js +3 -1
  79. package/dist/cli/commands/tools.js.map +1 -1
  80. package/dist/cli/commands/trifecta.js +1 -1
  81. package/dist/cli/commands/trifecta.js.map +1 -1
  82. package/dist/cli/commands/tui.js.map +1 -1
  83. package/dist/cli/init.js +50 -16
  84. package/dist/cli/init.js.map +1 -1
  85. package/dist/cli/input.js +25 -7
  86. package/dist/cli/input.js.map +1 -1
  87. package/dist/cli/oneshot.js +31 -19
  88. package/dist/cli/oneshot.js.map +1 -1
  89. package/dist/cli/repl-dispatch.js +10 -6
  90. package/dist/cli/repl-dispatch.js.map +1 -1
  91. package/dist/cli/runtime-cmds.js +110 -46
  92. package/dist/cli/runtime-cmds.js.map +1 -1
  93. package/dist/cli/service.js +3 -3
  94. package/dist/cli/service.js.map +1 -1
  95. package/dist/cli/session-state.js +12 -5
  96. package/dist/cli/session-state.js.map +1 -1
  97. package/dist/cli/setup.js +86 -33
  98. package/dist/cli/setup.js.map +1 -1
  99. package/dist/cli/shell.js +4 -4
  100. package/dist/cli/shell.js.map +1 -1
  101. package/dist/cli/status.js +56 -12
  102. package/dist/cli/status.js.map +1 -1
  103. package/dist/client.js +40 -21
  104. package/dist/client.js.map +1 -1
  105. package/dist/commands.js +1 -1
  106. package/dist/commands.js.map +1 -1
  107. package/dist/config.js +171 -15
  108. package/dist/config.js.map +1 -1
  109. package/dist/confirm/auto.js.map +1 -1
  110. package/dist/confirm/headless.js +13 -2
  111. package/dist/confirm/headless.js.map +1 -1
  112. package/dist/confirm/terminal.js +1 -5
  113. package/dist/confirm/terminal.js.map +1 -1
  114. package/dist/context.js +9 -3
  115. package/dist/context.js.map +1 -1
  116. package/dist/git.js +56 -61
  117. package/dist/git.js.map +1 -1
  118. package/dist/harnesses.js +137 -37
  119. package/dist/harnesses.js.map +1 -1
  120. package/dist/history.js +12 -4
  121. package/dist/history.js.map +1 -1
  122. package/dist/hooks/index.js +2 -2
  123. package/dist/hooks/index.js.map +1 -1
  124. package/dist/hooks/loader.js +6 -5
  125. package/dist/hooks/loader.js.map +1 -1
  126. package/dist/hooks/manager.js.map +1 -1
  127. package/dist/hooks/plugins/example-console.js.map +1 -1
  128. package/dist/hooks/scaffold.js +8 -6
  129. package/dist/hooks/scaffold.js.map +1 -1
  130. package/dist/index.js +120 -66
  131. package/dist/index.js.map +1 -1
  132. package/dist/indexer.js +6 -18
  133. package/dist/indexer.js.map +1 -1
  134. package/dist/jsonrpc.js.map +1 -1
  135. package/dist/lens.js +38 -16
  136. package/dist/lens.js.map +1 -1
  137. package/dist/lsp.js +60 -24
  138. package/dist/lsp.js.map +1 -1
  139. package/dist/markdown.js +6 -6
  140. package/dist/markdown.js.map +1 -1
  141. package/dist/mcp.js +15 -6
  142. package/dist/mcp.js.map +1 -1
  143. package/dist/model-customization.js +7 -3
  144. package/dist/model-customization.js.map +1 -1
  145. package/dist/progress/message-edit-scheduler.js +15 -3
  146. package/dist/progress/message-edit-scheduler.js.map +1 -1
  147. package/dist/progress/progress-message-renderer.js.map +1 -1
  148. package/dist/progress/progress-presenter.js +3 -3
  149. package/dist/progress/progress-presenter.js.map +1 -1
  150. package/dist/progress/serialize-telegram.js.map +1 -1
  151. package/dist/progress/tool-summary.js +3 -1
  152. package/dist/progress/tool-summary.js.map +1 -1
  153. package/dist/progress/turn-progress.js +3 -1
  154. package/dist/progress/turn-progress.js.map +1 -1
  155. package/dist/recovery.js +11 -3
  156. package/dist/recovery.js.map +1 -1
  157. package/dist/replay.js +9 -3
  158. package/dist/replay.js.map +1 -1
  159. package/dist/replay_cli.js +5 -3
  160. package/dist/replay_cli.js.map +1 -1
  161. package/dist/runtime/executor.js +66 -20
  162. package/dist/runtime/executor.js.map +1 -1
  163. package/dist/runtime/health.js.map +1 -1
  164. package/dist/runtime/host-runner.js +103 -0
  165. package/dist/runtime/host-runner.js.map +1 -0
  166. package/dist/runtime/planner.js +3 -1
  167. package/dist/runtime/planner.js.map +1 -1
  168. package/dist/runtime/secrets.js +102 -0
  169. package/dist/runtime/secrets.js.map +1 -0
  170. package/dist/runtime/store.js +95 -19
  171. package/dist/runtime/store.js.map +1 -1
  172. package/dist/safety.js +38 -21
  173. package/dist/safety.js.map +1 -1
  174. package/dist/spinner.js +7 -8
  175. package/dist/spinner.js.map +1 -1
  176. package/dist/sys/context.js +3 -3
  177. package/dist/sys/context.js.map +1 -1
  178. package/dist/term.js +1 -1
  179. package/dist/term.js.map +1 -1
  180. package/dist/themes.js +11 -5
  181. package/dist/themes.js.map +1 -1
  182. package/dist/tools/tool-error.js +2 -5
  183. package/dist/tools/tool-error.js.map +1 -1
  184. package/dist/tools.js +69 -34
  185. package/dist/tools.js.map +1 -1
  186. package/dist/tui/branch-picker.js +9 -3
  187. package/dist/tui/branch-picker.js.map +1 -1
  188. package/dist/tui/command-handler.js +88 -36
  189. package/dist/tui/command-handler.js.map +1 -1
  190. package/dist/tui/confirm.js.map +1 -1
  191. package/dist/tui/controller.js +234 -117
  192. package/dist/tui/controller.js.map +1 -1
  193. package/dist/tui/event-bridge.js.map +1 -1
  194. package/dist/tui/keymap.js +93 -71
  195. package/dist/tui/keymap.js.map +1 -1
  196. package/dist/tui/layout.js +9 -1
  197. package/dist/tui/layout.js.map +1 -1
  198. package/dist/tui/render.js +17 -5
  199. package/dist/tui/render.js.map +1 -1
  200. package/dist/tui/screen.js.map +1 -1
  201. package/dist/tui/state.js +129 -63
  202. package/dist/tui/state.js.map +1 -1
  203. package/dist/tui/theme.js +12 -3
  204. package/dist/tui/theme.js.map +1 -1
  205. package/dist/upgrade.js +28 -15
  206. package/dist/upgrade.js.map +1 -1
  207. package/dist/utils.js +8 -5
  208. package/dist/utils.js.map +1 -1
  209. package/dist/vault.js +48 -12
  210. package/dist/vault.js.map +1 -1
  211. package/dist/vim.js.map +1 -1
  212. package/package.json +11 -2
package/dist/agent.js CHANGED
@@ -17,6 +17,7 @@ import { LspManager, detectInstalledLspServers } from './lsp.js';
17
17
  import { generateMinimalDiff, toolResultSummary, execCommandFromSig, formatDurationMs, looksLikePlanningNarration, capTextByApproxTokens, isLikelyBinaryBuffer, sanitizePathsInMessage, digestToolResult, } from './agent/formatting.js';
18
18
  import { parseToolCallsFromContent, getMissingRequiredParams, getArgValidationIssues, stripMarkdownFences } from './agent/tool-calls.js';
19
19
  import { ToolError, ValidationError } from './tools/tool-error.js';
20
+ import { ToolLoopGuard } from './agent/tool-loop-guard.js';
20
21
  export { parseToolCallsFromContent };
21
22
  import { reviewArtifactKeys, looksLikeCodeReviewRequest, looksLikeReviewRetrievalRequest, retrievalAllowsStaleArtifact, parseReviewArtifactStalePolicy, parseReviewArtifact, reviewArtifactStaleReason, gitHead, normalizeModelsResponse, } from './agent/review-artifact.js';
22
23
  import fs from 'node:fs/promises';
@@ -27,9 +28,13 @@ function makeAbortController() {
27
28
  return new AbortController();
28
29
  }
29
30
  const CACHED_EXEC_OBSERVATION_HINT = '[idlehands hint] Reused cached output for repeated read-only exec call (unchanged observation).';
30
- const CACHED_READ_OBSERVATION_HINT = '[idlehands hint] Reused cached output for repeated identical read call.';
31
31
  function looksLikeReadOnlyExecCommand(command) {
32
- const cmd = String(command || '').trim().toLowerCase();
32
+ // Strip leading `cd <path> &&` / `cd <path>;` prefixes — cd is read-only
33
+ // navigation, the actual command that matters comes after.
34
+ let cmd = String(command || '').trim().toLowerCase();
35
+ if (!cmd)
36
+ return false;
37
+ cmd = cmd.replace(/^(\s*cd\s+[^;&|]+\s*(?:&&|;)\s*)+/i, '').trim();
33
38
  if (!cmd)
34
39
  return false;
35
40
  // Shell redirects are likely writes.
@@ -99,6 +104,25 @@ function withCachedExecObservationHint(content) {
99
104
  return `${content}\n${CACHED_EXEC_OBSERVATION_HINT}`;
100
105
  }
101
106
  }
107
+ const REPLAYED_EXEC_HINT = '[idlehands hint] You already ran this exact command. This is the replayed result from your previous execution. Do NOT re-run it — use the output below to continue your task.';
108
+ function withReplayedExecHint(content) {
109
+ if (!content)
110
+ return content;
111
+ try {
112
+ const parsed = JSON.parse(content);
113
+ const out = typeof parsed?.out === 'string' ? parsed.out : '';
114
+ if (out.includes(REPLAYED_EXEC_HINT))
115
+ return content;
116
+ parsed.out = out ? `${REPLAYED_EXEC_HINT}\n${out}` : REPLAYED_EXEC_HINT;
117
+ parsed.replayed = true;
118
+ return JSON.stringify(parsed);
119
+ }
120
+ catch {
121
+ if (content.includes(REPLAYED_EXEC_HINT))
122
+ return content;
123
+ return `${REPLAYED_EXEC_HINT}\n${content}`;
124
+ }
125
+ }
102
126
  function readOnlyExecCacheable(content) {
103
127
  try {
104
128
  const parsed = JSON.parse(content);
@@ -109,18 +133,6 @@ function readOnlyExecCacheable(content) {
109
133
  return false;
110
134
  }
111
135
  }
112
- function withCachedReadObservationHint(content) {
113
- if (!content)
114
- return CACHED_READ_OBSERVATION_HINT;
115
- if (content.includes(CACHED_READ_OBSERVATION_HINT))
116
- return content;
117
- // Keep cached read replay lightweight to avoid re-inflating context.
118
- const lines = String(content).split(/\r?\n/);
119
- const previewLines = lines.slice(0, 12);
120
- const omitted = Math.max(0, lines.length - previewLines.length);
121
- const trailer = omitted > 0 ? `\n# ... (${omitted} more lines omitted; use previous identical read result)` : '';
122
- return `${CACHED_READ_OBSERVATION_HINT}\n${previewLines.join('\n')}${trailer}`;
123
- }
124
136
  function ensureInformativeAssistantText(text, ctx) {
125
137
  if (String(text ?? '').trim())
126
138
  return text;
@@ -155,6 +167,9 @@ Rules:
155
167
  - Use read_file with search=... to jump to relevant code; avoid reading whole files.
156
168
  - Never call read_file/read_files/list_dir twice in a row with identical arguments (same path/options). Reuse the previous result instead.
157
169
  - Prefer apply_patch or edit_range for code edits (token-efficient). Use edit_file only when exact old_text replacement is necessary.
170
+ - Tool-call arguments MUST be strict JSON (double-quoted keys/strings, no comments, no trailing commas).
171
+ - edit_range example: {"path":"src/foo.ts","start_line":10,"end_line":14,"replacement":"line A\nline B"}
172
+ - apply_patch example: {"patch":"--- a/src/foo.ts\n+++ b/src/foo.ts\n@@ -10,2 +10,2 @@\n-old\n+new","files":["src/foo.ts"]}
158
173
  - write_file is for new files or explicit full rewrites only. Existing non-empty files require overwrite=true/force=true.
159
174
  - Use insert_file for insertions (prepend/append/line).
160
175
  - Use exec to run commands, tests, builds; check results before reporting success.
@@ -336,7 +351,7 @@ function buildToolsSchema(opts) {
336
351
  type: 'function',
337
352
  function: {
338
353
  name: 'apply_patch',
339
- description: 'Apply unified diff patch (multi-file).',
354
+ description: 'Apply unified diff patch (multi-file).\n\nUSAGE EXAMPLE:\n apply_patch({\n patch: "--- a/src/file.ts\\n+++ b/src/file.ts\\n@@ -1,5 +1,5 @@\\n-old text\\n+new text\\n",\n files: ["src/file.ts"]\n })\n\nThe patch must be valid unified diff text. Tool-call arguments must be valid JSON. Use strip=1 if paths include directory prefixes.\nFiles listed must match the paths in the diff.',
340
355
  parameters: obj({
341
356
  patch: str(),
342
357
  files: { type: 'array', items: str() },
@@ -348,7 +363,7 @@ function buildToolsSchema(opts) {
348
363
  type: 'function',
349
364
  function: {
350
365
  name: 'edit_range',
351
- description: 'Replace a line range in a file.',
366
+ description: 'Replace a line range in a file.\n\nUSAGE EXAMPLE:\n edit_range({\n path: "src/file.ts",\n start_line: 10,\n end_line: 15,\n replacement: "new content\\nmore content"\n })\n\n- start_line and end_line are 1-indexed (first line is 1, not 0)\n- To delete lines, set replacement to empty string ""\n- To insert at a position, set start_line and end_line to the same value\n- Tool-call arguments must be valid JSON (double quotes, no trailing commas/comments)\n- The replacement text replaces the entire range inclusive',
352
367
  parameters: obj({
353
368
  path: str(),
354
369
  start_line: int(1),
@@ -427,6 +442,11 @@ function buildToolsSchema(opts) {
427
442
  if (opts?.activeVaultTools) {
428
443
  schemas.push({ type: 'function', function: { name: 'vault_search', description: 'Search vault.', parameters: obj({ query: str(), limit: int() }, ['query']) } }, { type: 'function', function: { name: 'vault_note', description: 'Write vault note.', parameters: obj({ key: str(), value: str() }, ['key', 'value']) } });
429
444
  }
445
+ else if (opts?.passiveVault) {
446
+ // In passive mode, expose vault_search (read-only) so the model can recover
447
+ // compacted context on demand, but don't expose vault_note (write).
448
+ schemas.push({ type: 'function', function: { name: 'vault_search', description: 'Search vault memory for earlier context that was compacted away. Use sparingly — only when you need to recall specific details from earlier in the conversation.', parameters: obj({ query: str(), limit: int() }, ['query']) } });
449
+ }
430
450
  // Phase 9: sys_context tool is only available in sys mode.
431
451
  if (opts?.sysMode) {
432
452
  schemas.push(SYS_CONTEXT_SCHEMA);
@@ -671,6 +691,7 @@ export async function createSession(opts) {
671
691
  let mcpToolsLoaded = !mcpLazySchemaMode;
672
692
  const getToolsSchema = () => buildToolsSchema({
673
693
  activeVaultTools,
694
+ passiveVault: !activeVaultTools && vaultEnabled && vaultMode === 'passive',
674
695
  sysMode: cfg.mode === 'sys',
675
696
  lspTools: lspManager?.hasServers() === true,
676
697
  mcpTools: mcpToolsLoaded ? (mcpManager?.getEnabledToolSchemas() ?? []) : [],
@@ -1203,7 +1224,7 @@ export async function createSession(opts) {
1203
1224
  return 'Vault memory is available. Retrieve prior context with vault_search(query="...") when needed.';
1204
1225
  }
1205
1226
  if (vaultMode === 'passive') {
1206
- return 'Vault memory is in passive mode; relevant entries may be auto-injected when available.';
1227
+ return 'Vault memory is in passive mode; relevant entries may be auto-injected. You can also use vault_search(query="...") to recover specific earlier context if needed.';
1207
1228
  }
1208
1229
  return '';
1209
1230
  };
@@ -1260,80 +1281,150 @@ export async function createSession(opts) {
1260
1281
  content: `${vaultContextHeader} Relevant entries for "${query}":\n${lines.join('\n')}`
1261
1282
  });
1262
1283
  };
1263
- const compactHistory = async (opts) => {
1264
- const beforeMessages = messages.length;
1265
- const beforeTokens = estimateTokensFromMessages(messages);
1266
- let compacted;
1267
- if (opts?.hard) {
1268
- const sys = messages[0]?.role === 'system' ? [messages[0]] : [];
1269
- const tail = messages.slice(-2);
1270
- compacted = [...sys, ...tail];
1284
+ let compactionLockTail = Promise.resolve();
1285
+ let compactionStats = {
1286
+ inProgress: false,
1287
+ lockHeld: false,
1288
+ runs: 0,
1289
+ failedRuns: 0,
1290
+ beforeMessages: 0,
1291
+ afterMessages: 0,
1292
+ freedTokens: 0,
1293
+ archivedToolMessages: 0,
1294
+ droppedMessages: 0,
1295
+ dryRun: false,
1296
+ };
1297
+ const runCompactionWithLock = async (reason, runner) => {
1298
+ const prev = compactionLockTail;
1299
+ let release = () => { };
1300
+ compactionLockTail = new Promise((resolve) => {
1301
+ release = () => resolve();
1302
+ });
1303
+ await prev;
1304
+ compactionStats = {
1305
+ ...compactionStats,
1306
+ inProgress: true,
1307
+ lockHeld: true,
1308
+ lastReason: reason,
1309
+ lastError: undefined,
1310
+ updatedAt: new Date().toISOString(),
1311
+ // Reset run stats before fresh calculation.
1312
+ beforeMessages: 0,
1313
+ afterMessages: 0,
1314
+ freedTokens: 0,
1315
+ archivedToolMessages: 0,
1316
+ droppedMessages: 0,
1317
+ dryRun: false,
1318
+ };
1319
+ try {
1320
+ const result = await runner();
1321
+ compactionStats = {
1322
+ ...compactionStats,
1323
+ ...result,
1324
+ inProgress: false,
1325
+ lockHeld: false,
1326
+ runs: compactionStats.runs + 1,
1327
+ lastReason: reason,
1328
+ updatedAt: new Date().toISOString(),
1329
+ };
1330
+ return result;
1271
1331
  }
1272
- else {
1273
- compacted = enforceContextBudget({
1274
- messages,
1275
- contextWindow,
1276
- maxTokens,
1277
- minTailMessages: opts?.force ? 2 : 12,
1278
- compactAt: opts?.force ? 0.5 : (cfg.compact_at ?? 0.8),
1279
- toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
1280
- force: opts?.force,
1281
- });
1332
+ catch (e) {
1333
+ compactionStats = {
1334
+ ...compactionStats,
1335
+ inProgress: false,
1336
+ lockHeld: false,
1337
+ failedRuns: compactionStats.failedRuns + 1,
1338
+ lastReason: reason,
1339
+ lastError: e?.message ?? String(e),
1340
+ updatedAt: new Date().toISOString(),
1341
+ };
1342
+ throw e;
1282
1343
  }
1283
- const compactedByRefs = new Set(compacted);
1284
- let dropped = messages.filter((m) => !compactedByRefs.has(m));
1285
- if (opts?.topic) {
1286
- const topic = opts.topic.toLowerCase();
1287
- dropped = dropped.filter((m) => !userContentToText(m.content ?? '').toLowerCase().includes(topic));
1288
- const keepFromTopic = messages.filter((m) => userContentToText(m.content ?? '').toLowerCase().includes(topic));
1289
- compacted = [...compacted, ...keepFromTopic.filter((m) => !compactedByRefs.has(m))];
1344
+ finally {
1345
+ release();
1290
1346
  }
1291
- const archivedToolMessages = dropped.filter((m) => m.role === 'tool').length;
1292
- const afterMessages = compacted.length;
1293
- const afterTokens = estimateTokensFromMessages(compacted);
1294
- const freedTokens = Math.max(0, beforeTokens - afterTokens);
1295
- if (!opts?.dry) {
1296
- if (dropped.length && vault) {
1297
- try {
1298
- // Store the original/current user prompt before compaction so it survives context loss.
1299
- let userPromptToPreserve = null;
1300
- for (let i = messages.length - 1; i >= 0; i--) {
1301
- const m = messages[i];
1302
- if (m.role === 'user') {
1303
- const text = userContentToText((m.content ?? '')).trim();
1304
- if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
1305
- userPromptToPreserve = text;
1306
- break;
1347
+ };
1348
+ const compactHistory = async (opts) => {
1349
+ const reason = opts?.reason
1350
+ ?? (opts?.hard ? 'manual hard compaction'
1351
+ : opts?.force ? 'manual force compaction'
1352
+ : 'manual compaction');
1353
+ return await runCompactionWithLock(reason, async () => {
1354
+ const beforeMessages = messages.length;
1355
+ const beforeTokens = estimateTokensFromMessages(messages);
1356
+ let compacted;
1357
+ if (opts?.hard) {
1358
+ const sys = messages[0]?.role === 'system' ? [messages[0]] : [];
1359
+ const tail = messages.slice(-2);
1360
+ compacted = [...sys, ...tail];
1361
+ }
1362
+ else {
1363
+ compacted = enforceContextBudget({
1364
+ messages,
1365
+ contextWindow,
1366
+ maxTokens,
1367
+ minTailMessages: opts?.force ? 2 : 12,
1368
+ compactAt: opts?.force ? 0.5 : (cfg.compact_at ?? 0.8),
1369
+ toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
1370
+ force: opts?.force,
1371
+ });
1372
+ }
1373
+ const compactedByRefs = new Set(compacted);
1374
+ let dropped = messages.filter((m) => !compactedByRefs.has(m));
1375
+ if (opts?.topic) {
1376
+ const topic = opts.topic.toLowerCase();
1377
+ dropped = dropped.filter((m) => !userContentToText(m.content ?? '').toLowerCase().includes(topic));
1378
+ const keepFromTopic = messages.filter((m) => userContentToText(m.content ?? '').toLowerCase().includes(topic));
1379
+ compacted = [...compacted, ...keepFromTopic.filter((m) => !compactedByRefs.has(m))];
1380
+ }
1381
+ const archivedToolMessages = dropped.filter((m) => m.role === 'tool').length;
1382
+ const afterMessages = compacted.length;
1383
+ const afterTokens = estimateTokensFromMessages(compacted);
1384
+ const freedTokens = Math.max(0, beforeTokens - afterTokens);
1385
+ if (!opts?.dry) {
1386
+ if (dropped.length && vault) {
1387
+ try {
1388
+ // Store the original/current user prompt before compaction so it survives context loss.
1389
+ let userPromptToPreserve = null;
1390
+ for (let i = messages.length - 1; i >= 0; i--) {
1391
+ const m = messages[i];
1392
+ if (m.role === 'user') {
1393
+ const text = userContentToText((m.content ?? '')).trim();
1394
+ if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
1395
+ userPromptToPreserve = text;
1396
+ break;
1397
+ }
1307
1398
  }
1308
1399
  }
1400
+ if (userPromptToPreserve) {
1401
+ await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
1402
+ }
1403
+ await vault.archiveToolMessages(dropped, new Map());
1404
+ await vault.note('compaction_summary', `Dropped ${dropped.length} messages (${freedTokens} tokens).`);
1309
1405
  }
1310
- if (userPromptToPreserve) {
1311
- await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
1406
+ catch {
1407
+ // best-effort
1312
1408
  }
1313
- await vault.archiveToolMessages(dropped, new Map());
1314
- await vault.note('compaction_summary', `Dropped ${dropped.length} messages (${freedTokens} tokens).`);
1315
1409
  }
1316
- catch {
1317
- // best-effort
1318
- }
1319
- }
1320
- messages = compacted;
1321
- if (dropped.length) {
1322
- messages.push({ role: 'system', content: buildCompactionSystemNote('manual', dropped.length) });
1323
- await injectVaultContext().catch(() => { });
1324
- if (opts?.reason || opts?.force) {
1325
- injectCompactionReminder(opts?.reason ?? 'history compaction');
1410
+ messages = compacted;
1411
+ if (dropped.length) {
1412
+ messages.push({ role: 'system', content: buildCompactionSystemNote('manual', dropped.length) });
1413
+ await injectVaultContext().catch(() => { });
1414
+ if (opts?.reason || opts?.force) {
1415
+ injectCompactionReminder(opts?.reason ?? 'history compaction');
1416
+ }
1326
1417
  }
1327
1418
  }
1328
- }
1329
- return {
1330
- beforeMessages,
1331
- afterMessages,
1332
- freedTokens,
1333
- archivedToolMessages,
1334
- droppedMessages: dropped.length,
1335
- dryRun: !!opts?.dry,
1336
- };
1419
+ return {
1420
+ beforeMessages,
1421
+ afterMessages,
1422
+ freedTokens,
1423
+ archivedToolMessages,
1424
+ droppedMessages: dropped.length,
1425
+ dryRun: !!opts?.dry,
1426
+ };
1427
+ });
1337
1428
  };
1338
1429
  const cumulativeUsage = { prompt: 0, completion: 0 };
1339
1430
  const turnDurationsMs = [];
@@ -1342,6 +1433,17 @@ export async function createSession(opts) {
1342
1433
  const tgSamples = [];
1343
1434
  let lastTurnMetrics;
1344
1435
  let lastServerHealth;
1436
+ let lastToolLoopStats = { totalHistory: 0, signatures: [], outcomes: [], telemetry: {
1437
+ callsRegistered: 0,
1438
+ dedupedReplays: 0,
1439
+ readCacheLookups: 0,
1440
+ readCacheHits: 0,
1441
+ warnings: 0,
1442
+ criticals: 0,
1443
+ recoveryRecommended: 0,
1444
+ readCacheHitRate: 0,
1445
+ dedupeRate: 0,
1446
+ } };
1345
1447
  let lastModelsProbeMs = 0;
1346
1448
  const capturesDir = path.join(stateDir(), 'captures');
1347
1449
  let captureEnabled = false;
@@ -1690,6 +1792,10 @@ export async function createSession(opts) {
1690
1792
  await hookObj.onToolResult?.(result);
1691
1793
  await hookManager.emit('tool_result', { askId, turn: turns, result });
1692
1794
  };
1795
+ const emitToolLoop = async (loop) => {
1796
+ await hookObj.onToolLoop?.(loop);
1797
+ await hookManager.emit('tool_loop', { askId, turn: turns, loop });
1798
+ };
1693
1799
  const emitTurnEnd = async (stats) => {
1694
1800
  await hookObj.onTurnEnd?.(stats);
1695
1801
  await hookManager.emit('turn_end', { askId, stats });
@@ -1825,9 +1931,41 @@ export async function createSession(opts) {
1825
1931
  const blockedExecAttemptsBySig = new Map();
1826
1932
  // Cache successful read-only exec observations by exact signature.
1827
1933
  const execObservationCacheBySig = new Map();
1828
- // Cache successful read_file/read_files/list_dir results by exact signature.
1934
+ // Cache ALL successful exec results so repeated identical calls under context
1935
+ // pressure can replay the cached result instead of re-executing.
1936
+ const lastExecResultBySig = new Map();
1937
+ // Cache successful read_file/read_files/list_dir results by signature + mtime for invalidation.
1829
1938
  const readFileCacheBySig = new Map();
1830
1939
  const READ_FILE_CACHE_TOOLS = new Set(['read_file', 'read_files', 'list_dir']);
1940
+ const toolLoopGuard = new ToolLoopGuard({
1941
+ enabled: cfg.tool_loop_detection?.enabled,
1942
+ historySize: cfg.tool_loop_detection?.history_size,
1943
+ warningThreshold: cfg.tool_loop_detection?.warning_threshold,
1944
+ criticalThreshold: cfg.tool_loop_detection?.critical_threshold,
1945
+ globalCircuitBreakerThreshold: cfg.tool_loop_detection?.global_circuit_breaker_threshold,
1946
+ readCacheTtlMs: cfg.tool_loop_detection?.read_cache_ttl_ms,
1947
+ detectors: {
1948
+ genericRepeat: cfg.tool_loop_detection?.detectors?.generic_repeat,
1949
+ knownPollNoProgress: cfg.tool_loop_detection?.detectors?.known_poll_no_progress,
1950
+ pingPong: cfg.tool_loop_detection?.detectors?.ping_pong,
1951
+ },
1952
+ perTool: Object.fromEntries(Object.entries(cfg.tool_loop_detection?.per_tool ?? {}).map(([tool, policy]) => [
1953
+ tool,
1954
+ {
1955
+ warningThreshold: policy?.warning_threshold,
1956
+ criticalThreshold: policy?.critical_threshold,
1957
+ globalCircuitBreakerThreshold: policy?.global_circuit_breaker_threshold,
1958
+ detectors: {
1959
+ genericRepeat: policy?.detectors?.generic_repeat,
1960
+ knownPollNoProgress: policy?.detectors?.known_poll_no_progress,
1961
+ pingPong: policy?.detectors?.ping_pong,
1962
+ },
1963
+ },
1964
+ ])),
1965
+ });
1966
+ const toolLoopWarningKeys = new Set();
1967
+ let forceToollessRecoveryTurn = false;
1968
+ let toollessRecoveryUsed = false;
1831
1969
  // Prevent repeating the same "stop rerunning" reminder every turn.
1832
1970
  const readOnlyExecHintedSigs = new Set();
1833
1971
  // Keep a lightweight breadcrumb for diagnostics on partial failures.
@@ -1950,51 +2088,63 @@ export async function createSession(opts) {
1950
2088
  throw new Error(`session timeout exceeded (${cfg.timeout}s) after ${wallElapsed.toFixed(1)}s`);
1951
2089
  }
1952
2090
  await maybeAutoDetectModelChange();
1953
- const beforeMsgs = messages;
1954
- const compacted = enforceContextBudget({
1955
- messages: beforeMsgs,
1956
- contextWindow,
1957
- maxTokens: maxTokens,
1958
- minTailMessages: 12,
1959
- compactAt: cfg.compact_at ?? 0.8,
1960
- toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
1961
- });
1962
- const compactedByRefs = new Set(compacted);
1963
- const dropped = beforeMsgs.filter((m) => !compactedByRefs.has(m));
1964
- if (dropped.length && vault) {
1965
- try {
1966
- // Store the original/current user prompt before compaction so it survives context loss.
1967
- // Find the last substantive user message that looks like a task/instruction.
1968
- let userPromptToPreserve = null;
1969
- for (let i = beforeMsgs.length - 1; i >= 0; i--) {
1970
- const m = beforeMsgs[i];
1971
- if (m.role === 'user') {
1972
- const text = userContentToText((m.content ?? '')).trim();
1973
- // Skip vault injection messages and short prompts
1974
- if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
1975
- userPromptToPreserve = text;
1976
- break;
2091
+ await runCompactionWithLock('auto context-budget compaction', async () => {
2092
+ const beforeMsgs = messages;
2093
+ const beforeTokens = estimateTokensFromMessages(beforeMsgs);
2094
+ const compacted = enforceContextBudget({
2095
+ messages: beforeMsgs,
2096
+ contextWindow,
2097
+ maxTokens: maxTokens,
2098
+ minTailMessages: 12,
2099
+ compactAt: cfg.compact_at ?? 0.8,
2100
+ toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
2101
+ });
2102
+ const compactedByRefs = new Set(compacted);
2103
+ const dropped = beforeMsgs.filter((m) => !compactedByRefs.has(m));
2104
+ if (dropped.length && vault) {
2105
+ try {
2106
+ // Store the original/current user prompt before compaction so it survives context loss.
2107
+ // Find the last substantive user message that looks like a task/instruction.
2108
+ let userPromptToPreserve = null;
2109
+ for (let i = beforeMsgs.length - 1; i >= 0; i--) {
2110
+ const m = beforeMsgs[i];
2111
+ if (m.role === 'user') {
2112
+ const text = userContentToText((m.content ?? '')).trim();
2113
+ // Skip vault injection messages and short prompts
2114
+ if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
2115
+ userPromptToPreserve = text;
2116
+ break;
2117
+ }
1977
2118
  }
1978
2119
  }
2120
+ if (userPromptToPreserve) {
2121
+ await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
2122
+ }
2123
+ const toArchive = lens
2124
+ ? await Promise.all(dropped.map((m) => archiveToolOutputForVault(m)))
2125
+ : dropped;
2126
+ await vault.archiveToolMessages(toArchive, toolNameByCallId);
1979
2127
  }
1980
- if (userPromptToPreserve) {
1981
- await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
2128
+ catch (e) {
2129
+ console.warn(`[warn] vault archive failed: ${e instanceof Error ? e.message : String(e)}`);
1982
2130
  }
1983
- const toArchive = lens
1984
- ? await Promise.all(dropped.map((m) => archiveToolOutputForVault(m)))
1985
- : dropped;
1986
- await vault.archiveToolMessages(toArchive, toolNameByCallId);
1987
2131
  }
1988
- catch (e) {
1989
- console.warn(`[warn] vault archive failed: ${e instanceof Error ? e.message : String(e)}`);
2132
+ messages = compacted;
2133
+ if (dropped.length) {
2134
+ messages.push({ role: 'system', content: buildCompactionSystemNote('auto', dropped.length) });
2135
+ await injectVaultContext().catch(() => { });
2136
+ injectCompactionReminder('auto context-budget compaction');
1990
2137
  }
1991
- }
1992
- messages = compacted;
1993
- if (dropped.length) {
1994
- messages.push({ role: 'system', content: buildCompactionSystemNote('auto', dropped.length) });
1995
- await injectVaultContext().catch(() => { });
1996
- injectCompactionReminder('auto context-budget compaction');
1997
- }
2138
+ const afterTokens = estimateTokensFromMessages(compacted);
2139
+ return {
2140
+ beforeMessages: beforeMsgs.length,
2141
+ afterMessages: compacted.length,
2142
+ freedTokens: Math.max(0, beforeTokens - afterTokens),
2143
+ archivedToolMessages: dropped.filter((m) => m.role === 'tool').length,
2144
+ droppedMessages: dropped.length,
2145
+ dryRun: false,
2146
+ };
2147
+ });
1998
2148
  const ac = makeAbortController();
1999
2149
  inFlight = ac;
2000
2150
  // If caller provided an AbortSignal (bench iteration timeout, etc), propagate it.
@@ -2019,11 +2169,13 @@ export async function createSession(opts) {
2019
2169
  let resp;
2020
2170
  try {
2021
2171
  try {
2172
+ const toolsForTurn = forceToollessRecoveryTurn ? [] : getToolsSchema();
2173
+ const toolChoiceForTurn = forceToollessRecoveryTurn ? 'none' : 'auto';
2022
2174
  resp = await client.chatStream({
2023
2175
  model,
2024
2176
  messages,
2025
- tools: getToolsSchema(),
2026
- tool_choice: 'auto',
2177
+ tools: toolsForTurn,
2178
+ tool_choice: toolChoiceForTurn,
2027
2179
  temperature,
2028
2180
  top_p: topP,
2029
2181
  max_tokens: maxTokens,
@@ -2113,6 +2265,8 @@ export async function createSession(opts) {
2113
2265
  },
2114
2266
  }
2115
2267
  : undefined;
2268
+ const wasToollessRecoveryTurn = forceToollessRecoveryTurn;
2269
+ forceToollessRecoveryTurn = false;
2116
2270
  const choice0 = resp.choices?.[0] ?? legacyChoice;
2117
2271
  const finishReason = choice0?.finish_reason ?? 'unknown';
2118
2272
  const msg = choice0?.message;
@@ -2163,6 +2317,10 @@ export async function createSession(opts) {
2163
2317
  }
2164
2318
  }
2165
2319
  }
2320
+ if (wasToollessRecoveryTurn && toolCallsArr?.length) {
2321
+ // Recovery turn explicitly disables tools; ignore any stray tool-call output.
2322
+ toolCallsArr = undefined;
2323
+ }
2166
2324
  if (cfg.verbose) {
2167
2325
  console.warn(`[turn ${turns}] finish_reason=${finishReason} content_chars=${content.length} visible_chars=${visible.length} tool_calls=${toolCallsArr?.length ?? 0}`);
2168
2326
  }
@@ -2241,12 +2399,17 @@ export async function createSession(opts) {
2241
2399
  // narration chunk starts on a fresh line (avoids wall-of-text output).
2242
2400
  if (visible && hookObj.onToken)
2243
2401
  hookObj.onToken('\n');
2244
- toolCalls += toolCallsArr.length;
2402
+ const originalToolCallsArr = toolCallsArr;
2403
+ const preparedTurn = toolLoopGuard.prepareTurn(originalToolCallsArr);
2404
+ const replayByCallId = preparedTurn.replayByCallId;
2405
+ const parsedArgsByCallId = preparedTurn.parsedArgsByCallId;
2406
+ toolCallsArr = preparedTurn.uniqueCalls;
2407
+ toolCalls += originalToolCallsArr.length;
2245
2408
  const assistantToolCallText = visible || '';
2246
2409
  const compactAssistantToolCallText = assistantToolCallText.length > 900
2247
2410
  ? `${assistantToolCallText.slice(0, 900)}\n[history-compacted: assistant narration truncated before tool execution]`
2248
2411
  : assistantToolCallText;
2249
- messages.push({ role: 'assistant', content: compactAssistantToolCallText, tool_calls: toolCallsArr });
2412
+ messages.push({ role: 'assistant', content: compactAssistantToolCallText, tool_calls: originalToolCallsArr });
2250
2413
  // sigCounts is scoped to the entire ask() run (see above)
2251
2414
  // Bridge ConfirmationProvider → legacy confirm callback for tools.
2252
2415
  // If a ConfirmationProvider is given, wrap it; otherwise fall back to raw callback.
@@ -2290,20 +2453,64 @@ export async function createSession(opts) {
2290
2453
  // We only treat repeated exec as a loop if no file mutations happened since the
2291
2454
  // last time we saw that exact exec signature.
2292
2455
  const turnSigs = new Set();
2456
+ const sigMetaBySig = new Map();
2293
2457
  for (const tc of toolCallsArr) {
2294
- const sig = `${tc.function.name}:${tc.function.arguments ?? '{}'}`;
2458
+ const callId = resolveCallId(tc);
2459
+ const parsedArgs = parsedArgsByCallId.get(callId) ?? {};
2460
+ const sig = toolLoopGuard.computeSignature(tc.function.name, parsedArgs);
2295
2461
  turnSigs.add(sig);
2462
+ if (!sigMetaBySig.has(sig)) {
2463
+ sigMetaBySig.set(sig, { toolName: tc.function.name, args: parsedArgs });
2464
+ }
2296
2465
  }
2297
2466
  // Repeated read-only exec calls can be served from cache instead of hard-breaking.
2298
2467
  const repeatedReadOnlyExecSigs = new Set();
2299
2468
  const readOnlyExecTurnHints = [];
2469
+ // Repeated exec calls (any kind) can replay cached results under pressure.
2470
+ const replayExecSigs = new Set();
2300
2471
  // Repeated read_file/read_files/list_dir calls can be served from cache.
2301
2472
  const repeatedReadFileSigs = new Set();
2473
+ let shouldForceToollessRecovery = false;
2474
+ const criticalLoopSigs = new Set();
2475
+ for (const tc of toolCallsArr) {
2476
+ const callId = resolveCallId(tc);
2477
+ const args = parsedArgsByCallId.get(callId) ?? {};
2478
+ const detected = toolLoopGuard.detect(tc.function.name, args);
2479
+ const warning = toolLoopGuard.formatWarning(detected, tc.function.name);
2480
+ if (warning) {
2481
+ const warningKey = `${warning.level}:${warning.detector}:${detected.signature}`;
2482
+ if (!toolLoopWarningKeys.has(warningKey)) {
2483
+ toolLoopWarningKeys.add(warningKey);
2484
+ await emitToolLoop({
2485
+ level: warning.level,
2486
+ detector: warning.detector,
2487
+ toolName: warning.toolName,
2488
+ count: warning.count,
2489
+ message: warning.message,
2490
+ });
2491
+ messages.push({
2492
+ role: 'system',
2493
+ content: `[tool-loop ${warning.level}] ${warning.message}. Stop repeating ${warning.toolName} with unchanged inputs; continue with analysis or next step.`,
2494
+ });
2495
+ }
2496
+ }
2497
+ if (toolLoopGuard.shouldDisableToolsNextTurn(detected)) {
2498
+ shouldForceToollessRecovery = true;
2499
+ criticalLoopSigs.add(detected.signature);
2500
+ }
2501
+ }
2302
2502
  // Track whether a mutation happened since a given signature was last seen.
2303
2503
  // (Tool-loop is single-threaded across turns; this is safe to keep in-memory.)
2304
2504
  for (const sig of turnSigs) {
2305
2505
  sigCounts.set(sig, (sigCounts.get(sig) ?? 0) + 1);
2306
- const toolName = sig.split(':')[0];
2506
+ const sigMeta = sigMetaBySig.get(sig);
2507
+ const toolName = sigMeta?.toolName ?? sig.split(':')[0];
2508
+ if (criticalLoopSigs.has(sig)) {
2509
+ // Critical detector already fired for this signature; recover next turn
2510
+ // with tools disabled instead of throwing in per-tool hard-break logic.
2511
+ shouldForceToollessRecovery = true;
2512
+ continue;
2513
+ }
2307
2514
  // For exec loops, only break if nothing changed since last identical exec.
2308
2515
  if (toolName === 'exec') {
2309
2516
  // If this exact exec signature was seen before, record the mutation version at that time.
@@ -2314,6 +2521,18 @@ export async function createSession(opts) {
2314
2521
  mutationVersionBySig.set(sig, mutationVersion);
2315
2522
  if (!hasMutatedSince) {
2316
2523
  const count = sigCounts.get(sig) ?? 0;
2524
+ // Early replay: if this exact exec was already run (count >= 2) and
2525
+ // we have a cached result, replay it instead of re-executing. This
2526
+ // prevents the compaction death spiral where tool results get dropped,
2527
+ // the model forgets it ran the command, and re-runs it endlessly.
2528
+ // Skip read-only commands that already have their own observation cache —
2529
+ // those are handled by the dedicated read-only path at loopThreshold.
2530
+ const command = execCommandFromSig(sig);
2531
+ const hasReadOnlyCache = looksLikeReadOnlyExecCommand(command) && execObservationCacheBySig.has(sig);
2532
+ if (count >= 2 && lastExecResultBySig.has(sig) && !hasReadOnlyCache) {
2533
+ replayExecSigs.add(sig);
2534
+ continue;
2535
+ }
2317
2536
  let loopThreshold = harness.quirks.loopsOnToolError ? 3 : 6;
2318
2537
  // If the cached observation already tells the model "no matches found",
2319
2538
  // break much earlier — the model is ignoring the hint.
@@ -2326,7 +2545,8 @@ export async function createSession(opts) {
2326
2545
  await injectVaultContext().catch(() => { });
2327
2546
  }
2328
2547
  if (count >= loopThreshold) {
2329
- const command = execCommandFromSig(sig);
2548
+ const sigArgs = sigMetaBySig.get(sig)?.args ?? {};
2549
+ const command = typeof sigArgs?.command === 'string' ? String(sigArgs.command) : '';
2330
2550
  const canReuseReadOnlyObservation = looksLikeReadOnlyExecCommand(command) &&
2331
2551
  execObservationCacheBySig.has(sig);
2332
2552
  if (canReuseReadOnlyObservation) {
@@ -2337,8 +2557,8 @@ export async function createSession(opts) {
2337
2557
  }
2338
2558
  continue;
2339
2559
  }
2340
- const args = sig.slice(toolName.length + 1);
2341
- const argsPreview = args.length > 220 ? args.slice(0, 220) + '…' : args;
2560
+ const argsPreviewRaw = JSON.stringify(sigArgs);
2561
+ const argsPreview = argsPreviewRaw.length > 220 ? argsPreviewRaw.slice(0, 220) + '…' : argsPreviewRaw;
2342
2562
  throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
2343
2563
  `args=${argsPreview}`);
2344
2564
  }
@@ -2396,23 +2616,30 @@ export async function createSession(opts) {
2396
2616
  content: `CRITICAL: DO NOT make another identical call for this ${resourceType}. It HAS NOT CHANGED. You already have the content. Move on to the NEXT step NOW.`,
2397
2617
  });
2398
2618
  }
2399
- if (readFileCacheBySig.has(sig)) {
2619
+ const argsForSig = sigMetaBySig.get(sig)?.args ?? {};
2620
+ const replay = await toolLoopGuard.getReadCacheReplay(toolName, argsForSig, ctx.cwd);
2621
+ if (replay) {
2400
2622
  repeatedReadFileSigs.add(sig);
2401
2623
  continue;
2402
2624
  }
2403
2625
  }
2404
- // Hard-break at threshold
2626
+ // Deterministic recovery at threshold (no hard throw): force one no-tools turn.
2405
2627
  if (consec >= hardBreakAt) {
2406
- throw new Error(`tool ${toolName}: identical read repeated ${consec}x consecutively; breaking loop. ` +
2407
- `The resource content has not changed between reads.`);
2628
+ shouldForceToollessRecovery = true;
2629
+ messages.push({
2630
+ role: 'system',
2631
+ content: `[tool-loop critical] ${toolName} repeated ${consec}x with unchanged inputs. ` +
2632
+ 'Next turn will run with tools disabled so you must use existing results and provide a concrete next step/final response.',
2633
+ });
2408
2634
  }
2409
2635
  continue;
2410
2636
  }
2411
2637
  // Default behavior for mutating/other tools: break on repeated identical signature.
2412
2638
  const loopThreshold = harness.quirks.loopsOnToolError ? 2 : 3;
2413
2639
  if ((sigCounts.get(sig) ?? 0) >= loopThreshold) {
2414
- const args = sig.slice(toolName.length + 1);
2415
- const argsPreview = args.length > 220 ? args.slice(0, 220) + '…' : args;
2640
+ const argsObj = sigMetaBySig.get(sig)?.args ?? {};
2641
+ const argsRaw = JSON.stringify(argsObj);
2642
+ const argsPreview = argsRaw.length > 220 ? argsRaw.slice(0, 220) + '…' : argsRaw;
2416
2643
  throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
2417
2644
  `args=${argsPreview}\n` +
2418
2645
  `Hint: you repeated the same tool call ${loopThreshold} times with identical arguments. ` +
@@ -2423,6 +2650,31 @@ export async function createSession(opts) {
2423
2650
  }
2424
2651
  // Update consecutive tracking: save this turn's signatures for next turn comparison.
2425
2652
  lastTurnSigs = turnSigs;
2653
+ if (shouldForceToollessRecovery) {
2654
+ if (!toollessRecoveryUsed) {
2655
+ forceToollessRecoveryTurn = true;
2656
+ toollessRecoveryUsed = true;
2657
+ messages.push({
2658
+ role: 'user',
2659
+ content: '[system] Critical tool loop detected. Next turn will run with tools disabled. ' +
2660
+ 'Use already available tool results to provide a concrete next step or final response; do not request more tools.',
2661
+ });
2662
+ await emitTurnEnd({
2663
+ turn: turns,
2664
+ toolCalls,
2665
+ promptTokens: cumulativeUsage.prompt,
2666
+ completionTokens: cumulativeUsage.completion,
2667
+ promptTokensTurn,
2668
+ completionTokensTurn,
2669
+ ttftMs,
2670
+ ttcMs,
2671
+ ppTps,
2672
+ tgTps,
2673
+ });
2674
+ continue;
2675
+ }
2676
+ throw new AgentLoopBreak('critical tool-loop persisted after one tools-disabled recovery turn. Stopping to avoid infinite loop.');
2677
+ }
2426
2678
  const runOne = async (tc) => {
2427
2679
  const name = tc.function.name;
2428
2680
  const rawArgs = tc.function.arguments ?? '{}';
@@ -2452,6 +2704,7 @@ export async function createSession(opts) {
2452
2704
  throw new Error(`unknown tool: ${name}`);
2453
2705
  // Keep parsed args by call-id so we can digest/archive tool outputs with context.
2454
2706
  toolArgsByCallId.set(callId, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
2707
+ toolLoopGuard.registerCall(name, args && typeof args === 'object' && !Array.isArray(args) ? args : {}, callId);
2455
2708
  // Pre-dispatch argument validation.
2456
2709
  // - Required params
2457
2710
  // - Type/range/enums
@@ -2556,7 +2809,7 @@ export async function createSession(opts) {
2556
2809
  return { id: callId, content: '[skipped by user: step mode]' };
2557
2810
  }
2558
2811
  }
2559
- const sig = `${name}:${rawArgs || '{}'}`;
2812
+ const sig = toolLoopGuard.computeSignature(name, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
2560
2813
  let content = '';
2561
2814
  let reusedCachedReadOnlyExec = false;
2562
2815
  let reusedCachedReadTool = false;
@@ -2567,10 +2820,18 @@ export async function createSession(opts) {
2567
2820
  reusedCachedReadOnlyExec = true;
2568
2821
  }
2569
2822
  }
2570
- if (READ_FILE_CACHE_TOOLS.has(name) && repeatedReadFileSigs.has(sig)) {
2571
- const cached = readFileCacheBySig.get(sig);
2823
+ // Replay any exec result (read-only or not) when the loop detector flagged it.
2824
+ if (name === 'exec' && !reusedCachedReadOnlyExec && replayExecSigs.has(sig)) {
2825
+ const cached = lastExecResultBySig.get(sig);
2572
2826
  if (cached) {
2573
- content = withCachedReadObservationHint(cached);
2827
+ content = withReplayedExecHint(cached);
2828
+ reusedCachedReadOnlyExec = true; // skip re-execution below
2829
+ }
2830
+ }
2831
+ if (READ_FILE_CACHE_TOOLS.has(name) && repeatedReadFileSigs.has(sig)) {
2832
+ const replay = await toolLoopGuard.getReadCacheReplay(name, args, ctx.cwd);
2833
+ if (replay) {
2834
+ content = replay;
2574
2835
  reusedCachedReadTool = true;
2575
2836
  }
2576
2837
  }
@@ -2588,11 +2849,15 @@ export async function createSession(opts) {
2588
2849
  const value = await builtInFn(callCtx, args);
2589
2850
  content = typeof value === 'string' ? value : JSON.stringify(value);
2590
2851
  if (READ_FILE_CACHE_TOOLS.has(name) && typeof content === 'string' && !content.startsWith('ERROR:')) {
2591
- readFileCacheBySig.set(sig, content);
2852
+ const baseCwd = typeof args?.cwd === 'string' ? String(args.cwd) : ctx.cwd;
2853
+ await toolLoopGuard.storeReadCache(name, args, baseCwd, content);
2592
2854
  }
2593
2855
  if (name === 'exec') {
2594
2856
  // Successful exec clears blocked-loop counters.
2595
2857
  blockedExecAttemptsBySig.clear();
2858
+ // Cache every exec result so repeated calls under context pressure
2859
+ // can replay the result instead of re-executing.
2860
+ lastExecResultBySig.set(sig, content);
2596
2861
  const cmd = String(args?.command ?? '');
2597
2862
  if (looksLikeReadOnlyExecCommand(cmd) && readOnlyExecCacheable(content)) {
2598
2863
  execObservationCacheBySig.set(sig, content);
@@ -2731,6 +2996,10 @@ export async function createSession(opts) {
2731
2996
  }
2732
2997
  }
2733
2998
  }
2999
+ toolLoopGuard.registerOutcome(name, args, {
3000
+ toolCallId: callId,
3001
+ result: content,
3002
+ });
2734
3003
  return { id: callId, content };
2735
3004
  };
2736
3005
  const results = [];
@@ -2751,7 +3020,8 @@ export async function createSession(opts) {
2751
3020
  // Applies to direct exec attempts and spawn_task delegation attempts.
2752
3021
  if (tc.function.name === 'exec' || tc.function.name === 'spawn_task') {
2753
3022
  const blockedMatch = msg.match(/^exec:\s*blocked\s*\(([^)]+)\)\s*without --no-confirm\/--yolo:\s*(.*)$/i)
2754
- || msg.match(/^(spawn_task):\s*blocked\s*—\s*(.*)$/i);
3023
+ || msg.match(/^(spawn_task):\s*blocked\s*—\s*(.*)$/i)
3024
+ || msg.match(/^exec:\s*blocked\s+(background command\b[^.]*)\./i);
2755
3025
  if (blockedMatch) {
2756
3026
  const reason = (blockedMatch[1] || blockedMatch[2] || 'blocked command').trim();
2757
3027
  let parsedArgs = {};
@@ -2763,7 +3033,8 @@ export async function createSession(opts) {
2763
3033
  ? String(parsedArgs?.command ?? '')
2764
3034
  : String(parsedArgs?.task ?? '');
2765
3035
  const normalizedReason = reason.toLowerCase();
2766
- const aggregateByReason = normalizedReason.includes('package install/remove');
3036
+ const aggregateByReason = normalizedReason.includes('package install/remove')
3037
+ || normalizedReason.includes('background command');
2767
3038
  const sig = aggregateByReason
2768
3039
  ? `${tc.function.name}|${reason}`
2769
3040
  : `${tc.function.name}|${reason}|${cmd}`;
@@ -2785,7 +3056,33 @@ export async function createSession(opts) {
2785
3056
  retryable: te.retryable,
2786
3057
  result: toolErrorContent,
2787
3058
  });
2788
- return { id: callId, content: toolErrorContent };
3059
+ let parsedArgs = {};
3060
+ try {
3061
+ const parsed = JSON.parse(tc.function.arguments ?? '{}');
3062
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
3063
+ parsedArgs = parsed;
3064
+ }
3065
+ }
3066
+ catch {
3067
+ // keep empty object
3068
+ }
3069
+ toolLoopGuard.registerOutcome(tc.function.name, parsedArgs, {
3070
+ toolCallId: callId,
3071
+ error: msg,
3072
+ });
3073
+ // Inject fallback guidance after 2 consecutive read_file/read_files failures.
3074
+ let resultContent = toolErrorContent;
3075
+ if (tc.function.name === 'read_file' || tc.function.name === 'read_files') {
3076
+ const failureCount = toolLoopGuard.getReadFileFailureCount();
3077
+ if (failureCount >= 2) {
3078
+ resultContent +=
3079
+ `\n\n[WARNING: ${tc.function.name} has failed ${failureCount} times consecutively. ` +
3080
+ 'Try using `sed` and the `edit_range` tool; if those do not work, create a temporary file with the full contents and save it. ' +
3081
+ 'Then remove the existing file and rename the temporary file to bypass edit_file failing.]';
3082
+ toolLoopGuard.resetReadFileFailureCount();
3083
+ }
3084
+ }
3085
+ return { id: callId, content: resultContent };
2789
3086
  };
2790
3087
  // ── Anti-scan guardrails (§ read budget, dir scan, same-search) ──
2791
3088
  const readOnlyInTurn = toolCallsArr.filter((tc) => isReadOnlyToolDynamic(tc.function.name));
@@ -2848,6 +3145,19 @@ export async function createSession(opts) {
2848
3145
  }
2849
3146
  }
2850
3147
  }
3148
+ if (replayByCallId.size > 0) {
3149
+ const canonicalById = new Map(results.map((r) => [r.id, r.content]));
3150
+ for (const [dupId, canonicalId] of replayByCallId.entries()) {
3151
+ const canonical = canonicalById.get(canonicalId);
3152
+ if (canonical == null)
3153
+ continue;
3154
+ results.push({
3155
+ id: dupId,
3156
+ content: `[idlehands dedupe] Identical tool call replayed from ${canonicalId}. ` +
3157
+ 'Use that earlier tool result; no new execution was performed.',
3158
+ });
3159
+ }
3160
+ }
2851
3161
  // Bail immediately if cancelled during tool execution
2852
3162
  if (ac.signal.aborted)
2853
3163
  break;
@@ -2895,6 +3205,8 @@ export async function createSession(opts) {
2895
3205
  'Do not narrate. Fix required/mistyped fields and unknown keys.',
2896
3206
  });
2897
3207
  }
3208
+ // Update session-level tool loop stats for observability
3209
+ lastToolLoopStats = toolLoopGuard.getStats();
2898
3210
  // Hook: onTurnEnd (Phase 8.5)
2899
3211
  await emitTurnEnd({
2900
3212
  turn: turns,
@@ -3068,6 +3380,7 @@ export async function createSession(opts) {
3068
3380
  listModels,
3069
3381
  refreshServerHealth,
3070
3382
  getPerfSummary,
3383
+ getToolLoopStats: () => lastToolLoopStats,
3071
3384
  captureOn,
3072
3385
  captureOff,
3073
3386
  captureLast,
@@ -3105,6 +3418,9 @@ export async function createSession(opts) {
3105
3418
  get planSteps() {
3106
3419
  return planSteps;
3107
3420
  },
3421
+ get compactionStats() {
3422
+ return { ...compactionStats };
3423
+ },
3108
3424
  executePlanStep,
3109
3425
  clearPlan,
3110
3426
  compactHistory