@visorcraft/idlehands 1.1.16 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/dist/agent/formatting.js +30 -13
  2. package/dist/agent/formatting.js.map +1 -1
  3. package/dist/agent/review-artifact.js +12 -8
  4. package/dist/agent/review-artifact.js.map +1 -1
  5. package/dist/agent/tool-calls.js +62 -21
  6. package/dist/agent/tool-calls.js.map +1 -1
  7. package/dist/agent/tool-loop-detection.js +310 -0
  8. package/dist/agent/tool-loop-detection.js.map +1 -0
  9. package/dist/agent/tool-loop-guard.js +235 -0
  10. package/dist/agent/tool-loop-guard.js.map +1 -0
  11. package/dist/agent.js +585 -144
  12. package/dist/agent.js.map +1 -1
  13. package/dist/anton/controller.js +46 -30
  14. package/dist/anton/controller.js.map +1 -1
  15. package/dist/anton/lock.js +5 -1
  16. package/dist/anton/lock.js.map +1 -1
  17. package/dist/anton/parser.js +18 -19
  18. package/dist/anton/parser.js.map +1 -1
  19. package/dist/anton/prompt.js +42 -11
  20. package/dist/anton/prompt.js.map +1 -1
  21. package/dist/anton/reporter.js.map +1 -1
  22. package/dist/anton/session.js.map +1 -1
  23. package/dist/anton/verifier.js +3 -5
  24. package/dist/anton/verifier.js.map +1 -1
  25. package/dist/bench/compare.js +53 -20
  26. package/dist/bench/compare.js.map +1 -1
  27. package/dist/bench/openclaw.js +4 -4
  28. package/dist/bench/openclaw.js.map +1 -1
  29. package/dist/bench/report.js +11 -3
  30. package/dist/bench/report.js.map +1 -1
  31. package/dist/bench/runner.js +20 -14
  32. package/dist/bench/runner.js.map +1 -1
  33. package/dist/bot/commands.js +69 -26
  34. package/dist/bot/commands.js.map +1 -1
  35. package/dist/bot/confirm-discord.js +32 -9
  36. package/dist/bot/confirm-discord.js.map +1 -1
  37. package/dist/bot/confirm-telegram.js +26 -10
  38. package/dist/bot/confirm-telegram.js.map +1 -1
  39. package/dist/bot/dir-guard.js +18 -3
  40. package/dist/bot/dir-guard.js.map +1 -1
  41. package/dist/bot/discord-routing.js +28 -4
  42. package/dist/bot/discord-routing.js.map +1 -1
  43. package/dist/bot/discord-streaming.js +3 -3
  44. package/dist/bot/discord-streaming.js.map +1 -1
  45. package/dist/bot/discord.js +93 -37
  46. package/dist/bot/discord.js.map +1 -1
  47. package/dist/bot/escalation.js +124 -0
  48. package/dist/bot/escalation.js.map +1 -0
  49. package/dist/bot/format.js +2 -5
  50. package/dist/bot/format.js.map +1 -1
  51. package/dist/bot/session-manager.js +17 -6
  52. package/dist/bot/session-manager.js.map +1 -1
  53. package/dist/bot/telegram.js +92 -29
  54. package/dist/bot/telegram.js.map +1 -1
  55. package/dist/cli/agent-turn.js +10 -4
  56. package/dist/cli/agent-turn.js.map +1 -1
  57. package/dist/cli/args.js +51 -9
  58. package/dist/cli/args.js.map +1 -1
  59. package/dist/cli/bot.js +19 -9
  60. package/dist/cli/bot.js.map +1 -1
  61. package/dist/cli/build-repl-context.js +60 -26
  62. package/dist/cli/build-repl-context.js.map +1 -1
  63. package/dist/cli/command-registry.js.map +1 -1
  64. package/dist/cli/commands/anton.js +5 -3
  65. package/dist/cli/commands/anton.js.map +1 -1
  66. package/dist/cli/commands/editing.js +27 -12
  67. package/dist/cli/commands/editing.js.map +1 -1
  68. package/dist/cli/commands/model.js +16 -7
  69. package/dist/cli/commands/model.js.map +1 -1
  70. package/dist/cli/commands/project.js +52 -17
  71. package/dist/cli/commands/project.js.map +1 -1
  72. package/dist/cli/commands/runtime.js +1 -1
  73. package/dist/cli/commands/runtime.js.map +1 -1
  74. package/dist/cli/commands/secrets.js +279 -0
  75. package/dist/cli/commands/secrets.js.map +1 -0
  76. package/dist/cli/commands/session.js +57 -2
  77. package/dist/cli/commands/session.js.map +1 -1
  78. package/dist/cli/commands/tools.js +3 -1
  79. package/dist/cli/commands/tools.js.map +1 -1
  80. package/dist/cli/commands/trifecta.js +1 -1
  81. package/dist/cli/commands/trifecta.js.map +1 -1
  82. package/dist/cli/commands/tui.js.map +1 -1
  83. package/dist/cli/init.js +50 -16
  84. package/dist/cli/init.js.map +1 -1
  85. package/dist/cli/input.js +25 -7
  86. package/dist/cli/input.js.map +1 -1
  87. package/dist/cli/oneshot.js +31 -19
  88. package/dist/cli/oneshot.js.map +1 -1
  89. package/dist/cli/repl-dispatch.js +10 -6
  90. package/dist/cli/repl-dispatch.js.map +1 -1
  91. package/dist/cli/runtime-cmds.js +110 -46
  92. package/dist/cli/runtime-cmds.js.map +1 -1
  93. package/dist/cli/service.js +3 -3
  94. package/dist/cli/service.js.map +1 -1
  95. package/dist/cli/session-state.js +12 -5
  96. package/dist/cli/session-state.js.map +1 -1
  97. package/dist/cli/setup.js +86 -33
  98. package/dist/cli/setup.js.map +1 -1
  99. package/dist/cli/shell.js +4 -4
  100. package/dist/cli/shell.js.map +1 -1
  101. package/dist/cli/status.js +56 -12
  102. package/dist/cli/status.js.map +1 -1
  103. package/dist/client.js +40 -21
  104. package/dist/client.js.map +1 -1
  105. package/dist/commands.js +1 -1
  106. package/dist/commands.js.map +1 -1
  107. package/dist/config.js +171 -15
  108. package/dist/config.js.map +1 -1
  109. package/dist/confirm/auto.js.map +1 -1
  110. package/dist/confirm/headless.js +13 -2
  111. package/dist/confirm/headless.js.map +1 -1
  112. package/dist/confirm/terminal.js +1 -5
  113. package/dist/confirm/terminal.js.map +1 -1
  114. package/dist/context.js +9 -3
  115. package/dist/context.js.map +1 -1
  116. package/dist/git.js +56 -61
  117. package/dist/git.js.map +1 -1
  118. package/dist/harnesses.js +137 -37
  119. package/dist/harnesses.js.map +1 -1
  120. package/dist/history.js +12 -4
  121. package/dist/history.js.map +1 -1
  122. package/dist/hooks/index.js +2 -2
  123. package/dist/hooks/index.js.map +1 -1
  124. package/dist/hooks/loader.js +6 -5
  125. package/dist/hooks/loader.js.map +1 -1
  126. package/dist/hooks/manager.js.map +1 -1
  127. package/dist/hooks/plugins/example-console.js.map +1 -1
  128. package/dist/hooks/scaffold.js +8 -6
  129. package/dist/hooks/scaffold.js.map +1 -1
  130. package/dist/index.js +120 -66
  131. package/dist/index.js.map +1 -1
  132. package/dist/indexer.js +6 -18
  133. package/dist/indexer.js.map +1 -1
  134. package/dist/jsonrpc.js.map +1 -1
  135. package/dist/lens.js +38 -16
  136. package/dist/lens.js.map +1 -1
  137. package/dist/lsp.js +60 -24
  138. package/dist/lsp.js.map +1 -1
  139. package/dist/markdown.js +6 -6
  140. package/dist/markdown.js.map +1 -1
  141. package/dist/mcp.js +15 -6
  142. package/dist/mcp.js.map +1 -1
  143. package/dist/model-customization.js +7 -3
  144. package/dist/model-customization.js.map +1 -1
  145. package/dist/progress/message-edit-scheduler.js +15 -3
  146. package/dist/progress/message-edit-scheduler.js.map +1 -1
  147. package/dist/progress/progress-message-renderer.js.map +1 -1
  148. package/dist/progress/progress-presenter.js +3 -3
  149. package/dist/progress/progress-presenter.js.map +1 -1
  150. package/dist/progress/serialize-telegram.js.map +1 -1
  151. package/dist/progress/tool-summary.js +3 -1
  152. package/dist/progress/tool-summary.js.map +1 -1
  153. package/dist/progress/turn-progress.js +3 -1
  154. package/dist/progress/turn-progress.js.map +1 -1
  155. package/dist/recovery.js +11 -3
  156. package/dist/recovery.js.map +1 -1
  157. package/dist/replay.js +9 -3
  158. package/dist/replay.js.map +1 -1
  159. package/dist/replay_cli.js +5 -3
  160. package/dist/replay_cli.js.map +1 -1
  161. package/dist/runtime/executor.js +66 -20
  162. package/dist/runtime/executor.js.map +1 -1
  163. package/dist/runtime/health.js.map +1 -1
  164. package/dist/runtime/host-runner.js +103 -0
  165. package/dist/runtime/host-runner.js.map +1 -0
  166. package/dist/runtime/planner.js +3 -1
  167. package/dist/runtime/planner.js.map +1 -1
  168. package/dist/runtime/secrets.js +102 -0
  169. package/dist/runtime/secrets.js.map +1 -0
  170. package/dist/runtime/store.js +95 -19
  171. package/dist/runtime/store.js.map +1 -1
  172. package/dist/safety.js +38 -21
  173. package/dist/safety.js.map +1 -1
  174. package/dist/spinner.js +7 -8
  175. package/dist/spinner.js.map +1 -1
  176. package/dist/sys/context.js +3 -3
  177. package/dist/sys/context.js.map +1 -1
  178. package/dist/term.js +1 -1
  179. package/dist/term.js.map +1 -1
  180. package/dist/themes.js +11 -5
  181. package/dist/themes.js.map +1 -1
  182. package/dist/tools/tool-error.js +2 -5
  183. package/dist/tools/tool-error.js.map +1 -1
  184. package/dist/tools.js +84 -35
  185. package/dist/tools.js.map +1 -1
  186. package/dist/tui/branch-picker.js +9 -3
  187. package/dist/tui/branch-picker.js.map +1 -1
  188. package/dist/tui/command-handler.js +88 -36
  189. package/dist/tui/command-handler.js.map +1 -1
  190. package/dist/tui/confirm.js.map +1 -1
  191. package/dist/tui/controller.js +234 -117
  192. package/dist/tui/controller.js.map +1 -1
  193. package/dist/tui/event-bridge.js.map +1 -1
  194. package/dist/tui/keymap.js +93 -71
  195. package/dist/tui/keymap.js.map +1 -1
  196. package/dist/tui/layout.js +9 -1
  197. package/dist/tui/layout.js.map +1 -1
  198. package/dist/tui/render.js +17 -5
  199. package/dist/tui/render.js.map +1 -1
  200. package/dist/tui/screen.js.map +1 -1
  201. package/dist/tui/state.js +129 -63
  202. package/dist/tui/state.js.map +1 -1
  203. package/dist/tui/theme.js +12 -3
  204. package/dist/tui/theme.js.map +1 -1
  205. package/dist/upgrade.js +28 -15
  206. package/dist/upgrade.js.map +1 -1
  207. package/dist/utils.js +8 -5
  208. package/dist/utils.js.map +1 -1
  209. package/dist/vault.js +48 -12
  210. package/dist/vault.js.map +1 -1
  211. package/dist/vim.js.map +1 -1
  212. package/package.json +11 -2
package/dist/agent.js CHANGED
@@ -17,6 +17,7 @@ import { LspManager, detectInstalledLspServers } from './lsp.js';
17
17
  import { generateMinimalDiff, toolResultSummary, execCommandFromSig, formatDurationMs, looksLikePlanningNarration, capTextByApproxTokens, isLikelyBinaryBuffer, sanitizePathsInMessage, digestToolResult, } from './agent/formatting.js';
18
18
  import { parseToolCallsFromContent, getMissingRequiredParams, getArgValidationIssues, stripMarkdownFences } from './agent/tool-calls.js';
19
19
  import { ToolError, ValidationError } from './tools/tool-error.js';
20
+ import { ToolLoopGuard } from './agent/tool-loop-guard.js';
20
21
  export { parseToolCallsFromContent };
21
22
  import { reviewArtifactKeys, looksLikeCodeReviewRequest, looksLikeReviewRetrievalRequest, retrievalAllowsStaleArtifact, parseReviewArtifactStalePolicy, parseReviewArtifact, reviewArtifactStaleReason, gitHead, normalizeModelsResponse, } from './agent/review-artifact.js';
22
23
  import fs from 'node:fs/promises';
@@ -28,7 +29,12 @@ function makeAbortController() {
28
29
  }
29
30
  const CACHED_EXEC_OBSERVATION_HINT = '[idlehands hint] Reused cached output for repeated read-only exec call (unchanged observation).';
30
31
  function looksLikeReadOnlyExecCommand(command) {
31
- const cmd = String(command || '').trim().toLowerCase();
32
+ // Strip leading `cd <path> &&` / `cd <path>;` prefixes — cd is read-only
33
+ // navigation, the actual command that matters comes after.
34
+ let cmd = String(command || '').trim().toLowerCase();
35
+ if (!cmd)
36
+ return false;
37
+ cmd = cmd.replace(/^(\s*cd\s+[^;&|]+\s*(?:&&|;)\s*)+/i, '').trim();
32
38
  if (!cmd)
33
39
  return false;
34
40
  // Shell redirects are likely writes.
@@ -98,6 +104,25 @@ function withCachedExecObservationHint(content) {
98
104
  return `${content}\n${CACHED_EXEC_OBSERVATION_HINT}`;
99
105
  }
100
106
  }
107
+ const REPLAYED_EXEC_HINT = '[idlehands hint] You already ran this exact command. This is the replayed result from your previous execution. Do NOT re-run it — use the output below to continue your task.';
108
+ function withReplayedExecHint(content) {
109
+ if (!content)
110
+ return content;
111
+ try {
112
+ const parsed = JSON.parse(content);
113
+ const out = typeof parsed?.out === 'string' ? parsed.out : '';
114
+ if (out.includes(REPLAYED_EXEC_HINT))
115
+ return content;
116
+ parsed.out = out ? `${REPLAYED_EXEC_HINT}\n${out}` : REPLAYED_EXEC_HINT;
117
+ parsed.replayed = true;
118
+ return JSON.stringify(parsed);
119
+ }
120
+ catch {
121
+ if (content.includes(REPLAYED_EXEC_HINT))
122
+ return content;
123
+ return `${REPLAYED_EXEC_HINT}\n${content}`;
124
+ }
125
+ }
101
126
  function readOnlyExecCacheable(content) {
102
127
  try {
103
128
  const parsed = JSON.parse(content);
@@ -116,6 +141,15 @@ function ensureInformativeAssistantText(text, ctx) {
116
141
  }
117
142
  return `I have no user-visible response text for this turn (turn=${ctx.turns}). Please try again or rephrase your request.`;
118
143
  }
144
+ function isContextWindowExceededError(err) {
145
+ const status = Number(err?.status ?? NaN);
146
+ const msg = String(err?.message ?? err ?? '');
147
+ if (status === 413)
148
+ return true;
149
+ if (!msg)
150
+ return false;
151
+ return /(exceeds?\s+the\s+available\s+context\s+size|exceed_context|context\s+size|context\s+window|maximum\s+context\s+length|too\s+many\s+tokens|request\s*\(\d+\s*tokens\))/i.test(msg);
152
+ }
119
153
  /** Errors that should break the outer agent loop, not be caught by per-tool handlers */
120
154
  class AgentLoopBreak extends Error {
121
155
  constructor(message) {
@@ -133,6 +167,7 @@ Rules:
133
167
  - Use read_file with search=... to jump to relevant code; avoid reading whole files.
134
168
  - Never call read_file/read_files/list_dir twice in a row with identical arguments (same path/options). Reuse the previous result instead.
135
169
  - Prefer apply_patch or edit_range for code edits (token-efficient). Use edit_file only when exact old_text replacement is necessary.
170
+ - write_file is for new files or explicit full rewrites only. Existing non-empty files require overwrite=true/force=true.
136
171
  - Use insert_file for insertions (prepend/append/line).
137
172
  - Use exec to run commands, tests, builds; check results before reporting success.
138
173
  - When running commands in a subdirectory, use exec's cwd parameter — NOT "cd /path && cmd". Each exec call is a fresh shell; cd does not persist.
@@ -305,8 +340,8 @@ function buildToolsSchema(opts) {
305
340
  type: 'function',
306
341
  function: {
307
342
  name: 'write_file',
308
- description: 'Write file (atomic, backup).',
309
- parameters: obj({ path: str(), content: str() }, ['path', 'content']),
343
+ description: 'Write file (atomic, backup). Existing non-empty files require overwrite=true (or force=true).',
344
+ parameters: obj({ path: str(), content: str(), overwrite: bool(), force: bool() }, ['path', 'content']),
310
345
  },
311
346
  },
312
347
  {
@@ -404,6 +439,11 @@ function buildToolsSchema(opts) {
404
439
  if (opts?.activeVaultTools) {
405
440
  schemas.push({ type: 'function', function: { name: 'vault_search', description: 'Search vault.', parameters: obj({ query: str(), limit: int() }, ['query']) } }, { type: 'function', function: { name: 'vault_note', description: 'Write vault note.', parameters: obj({ key: str(), value: str() }, ['key', 'value']) } });
406
441
  }
442
+ else if (opts?.passiveVault) {
443
+ // In passive mode, expose vault_search (read-only) so the model can recover
444
+ // compacted context on demand, but don't expose vault_note (write).
445
+ schemas.push({ type: 'function', function: { name: 'vault_search', description: 'Search vault memory for earlier context that was compacted away. Use sparingly — only when you need to recall specific details from earlier in the conversation.', parameters: obj({ query: str(), limit: int() }, ['query']) } });
446
+ }
407
447
  // Phase 9: sys_context tool is only available in sys mode.
408
448
  if (opts?.sysMode) {
409
449
  schemas.push(SYS_CONTEXT_SCHEMA);
@@ -648,6 +688,7 @@ export async function createSession(opts) {
648
688
  let mcpToolsLoaded = !mcpLazySchemaMode;
649
689
  const getToolsSchema = () => buildToolsSchema({
650
690
  activeVaultTools,
691
+ passiveVault: !activeVaultTools && vaultEnabled && vaultMode === 'passive',
651
692
  sysMode: cfg.mode === 'sys',
652
693
  lspTools: lspManager?.hasServers() === true,
653
694
  mcpTools: mcpToolsLoaded ? (mcpManager?.getEnabledToolSchemas() ?? []) : [],
@@ -1155,21 +1196,68 @@ export async function createSession(opts) {
1155
1196
  const clearPlan = () => {
1156
1197
  planSteps = [];
1157
1198
  };
1199
+ const getLatestObjectiveText = () => {
1200
+ for (let i = messages.length - 1; i >= 0; i--) {
1201
+ const m = messages[i];
1202
+ if (m.role !== 'user')
1203
+ continue;
1204
+ const text = userContentToText((m.content ?? '')).trim();
1205
+ if (!text)
1206
+ continue;
1207
+ if (text.startsWith('[system]'))
1208
+ continue;
1209
+ if (text.startsWith('[Trifecta Vault'))
1210
+ continue;
1211
+ if (text.startsWith('[Vault context'))
1212
+ continue;
1213
+ return text;
1214
+ }
1215
+ return '';
1216
+ };
1217
+ const compactionVaultGuidance = () => {
1218
+ if (!vault)
1219
+ return '';
1220
+ if (vaultMode === 'active' || activeVaultTools) {
1221
+ return 'Vault memory is available. Retrieve prior context with vault_search(query="...") when needed.';
1222
+ }
1223
+ if (vaultMode === 'passive') {
1224
+ return 'Vault memory is in passive mode; relevant entries may be auto-injected. You can also use vault_search(query="...") to recover specific earlier context if needed.';
1225
+ }
1226
+ return '';
1227
+ };
1228
+ const buildCompactionSystemNote = (kind, dropped) => {
1229
+ const prefix = kind === 'auto'
1230
+ ? `[auto-compacted: ${dropped} old messages dropped to stay within context budget.]`
1231
+ : `[compacted: ${dropped} messages dropped.]`;
1232
+ const guidance = compactionVaultGuidance();
1233
+ return guidance ? `${prefix} ${guidance}` : prefix;
1234
+ };
1235
+ let lastAskInstructionText = '';
1236
+ let lastCompactionReminderObjective = '';
1237
+ const injectCompactionReminder = (reason) => {
1238
+ const objective = (getLatestObjectiveText() || lastAskInstructionText || '').trim();
1239
+ if (!objective)
1240
+ return;
1241
+ const clippedObjective = objective.length > 1600 ? `${objective.slice(0, 1600)}\n[truncated]` : objective;
1242
+ if (clippedObjective === lastCompactionReminderObjective)
1243
+ return;
1244
+ lastCompactionReminderObjective = clippedObjective;
1245
+ const vaultHint = compactionVaultGuidance();
1246
+ messages.push({
1247
+ role: 'user',
1248
+ content: `[system] Context was compacted (${reason}). Continue the SAME task from the current state; do not restart.\n` +
1249
+ `Most recent user objective:\n${clippedObjective}` +
1250
+ (vaultHint ? `\n\n${vaultHint}` : ''),
1251
+ });
1252
+ };
1158
1253
  // Session-level vault context injection: search vault for entries relevant to
1159
- // the last user message and inject them into the conversation. Used after any
1160
- // compaction to restore context the model lost when messages were dropped.
1254
+ // the latest substantive objective and inject them into the conversation.
1255
+ // Used after compaction to restore context the model lost when messages were dropped.
1161
1256
  let lastVaultInjectionQuery = '';
1162
1257
  const injectVaultContext = async () => {
1163
1258
  if (!vault)
1164
1259
  return;
1165
- let lastUser = null;
1166
- for (let j = messages.length - 1; j >= 0; j--) {
1167
- if (messages[j].role === 'user') {
1168
- lastUser = messages[j];
1169
- break;
1170
- }
1171
- }
1172
- const userText = userContentToText((lastUser?.content ?? '')).trim();
1260
+ const userText = (getLatestObjectiveText() || lastAskInstructionText || '').trim();
1173
1261
  if (!userText)
1174
1262
  return;
1175
1263
  const query = userText.slice(0, 200);
@@ -1190,77 +1278,150 @@ export async function createSession(opts) {
1190
1278
  content: `${vaultContextHeader} Relevant entries for "${query}":\n${lines.join('\n')}`
1191
1279
  });
1192
1280
  };
1193
- const compactHistory = async (opts) => {
1194
- const beforeMessages = messages.length;
1195
- const beforeTokens = estimateTokensFromMessages(messages);
1196
- let compacted;
1197
- if (opts?.hard) {
1198
- const sys = messages[0]?.role === 'system' ? [messages[0]] : [];
1199
- const tail = messages.slice(-2);
1200
- compacted = [...sys, ...tail];
1281
+ let compactionLockTail = Promise.resolve();
1282
+ let compactionStats = {
1283
+ inProgress: false,
1284
+ lockHeld: false,
1285
+ runs: 0,
1286
+ failedRuns: 0,
1287
+ beforeMessages: 0,
1288
+ afterMessages: 0,
1289
+ freedTokens: 0,
1290
+ archivedToolMessages: 0,
1291
+ droppedMessages: 0,
1292
+ dryRun: false,
1293
+ };
1294
+ const runCompactionWithLock = async (reason, runner) => {
1295
+ const prev = compactionLockTail;
1296
+ let release = () => { };
1297
+ compactionLockTail = new Promise((resolve) => {
1298
+ release = () => resolve();
1299
+ });
1300
+ await prev;
1301
+ compactionStats = {
1302
+ ...compactionStats,
1303
+ inProgress: true,
1304
+ lockHeld: true,
1305
+ lastReason: reason,
1306
+ lastError: undefined,
1307
+ updatedAt: new Date().toISOString(),
1308
+ // Reset run stats before fresh calculation.
1309
+ beforeMessages: 0,
1310
+ afterMessages: 0,
1311
+ freedTokens: 0,
1312
+ archivedToolMessages: 0,
1313
+ droppedMessages: 0,
1314
+ dryRun: false,
1315
+ };
1316
+ try {
1317
+ const result = await runner();
1318
+ compactionStats = {
1319
+ ...compactionStats,
1320
+ ...result,
1321
+ inProgress: false,
1322
+ lockHeld: false,
1323
+ runs: compactionStats.runs + 1,
1324
+ lastReason: reason,
1325
+ updatedAt: new Date().toISOString(),
1326
+ };
1327
+ return result;
1201
1328
  }
1202
- else {
1203
- compacted = enforceContextBudget({
1204
- messages,
1205
- contextWindow,
1206
- maxTokens,
1207
- minTailMessages: opts?.force ? 2 : 12,
1208
- compactAt: opts?.force ? 0.5 : (cfg.compact_at ?? 0.8),
1209
- toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
1210
- force: opts?.force,
1211
- });
1329
+ catch (e) {
1330
+ compactionStats = {
1331
+ ...compactionStats,
1332
+ inProgress: false,
1333
+ lockHeld: false,
1334
+ failedRuns: compactionStats.failedRuns + 1,
1335
+ lastReason: reason,
1336
+ lastError: e?.message ?? String(e),
1337
+ updatedAt: new Date().toISOString(),
1338
+ };
1339
+ throw e;
1212
1340
  }
1213
- const compactedByRefs = new Set(compacted);
1214
- let dropped = messages.filter((m) => !compactedByRefs.has(m));
1215
- if (opts?.topic) {
1216
- const topic = opts.topic.toLowerCase();
1217
- dropped = dropped.filter((m) => !userContentToText(m.content ?? '').toLowerCase().includes(topic));
1218
- const keepFromTopic = messages.filter((m) => userContentToText(m.content ?? '').toLowerCase().includes(topic));
1219
- compacted = [...compacted, ...keepFromTopic.filter((m) => !compactedByRefs.has(m))];
1341
+ finally {
1342
+ release();
1220
1343
  }
1221
- const archivedToolMessages = dropped.filter((m) => m.role === 'tool').length;
1222
- const afterMessages = compacted.length;
1223
- const afterTokens = estimateTokensFromMessages(compacted);
1224
- const freedTokens = Math.max(0, beforeTokens - afterTokens);
1225
- if (!opts?.dry) {
1226
- if (dropped.length && vault) {
1227
- try {
1228
- // Store the original/current user prompt before compaction so it survives context loss.
1229
- let userPromptToPreserve = null;
1230
- for (let i = messages.length - 1; i >= 0; i--) {
1231
- const m = messages[i];
1232
- if (m.role === 'user') {
1233
- const text = userContentToText((m.content ?? '')).trim();
1234
- if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
1235
- userPromptToPreserve = text;
1236
- break;
1344
+ };
1345
+ const compactHistory = async (opts) => {
1346
+ const reason = opts?.reason
1347
+ ?? (opts?.hard ? 'manual hard compaction'
1348
+ : opts?.force ? 'manual force compaction'
1349
+ : 'manual compaction');
1350
+ return await runCompactionWithLock(reason, async () => {
1351
+ const beforeMessages = messages.length;
1352
+ const beforeTokens = estimateTokensFromMessages(messages);
1353
+ let compacted;
1354
+ if (opts?.hard) {
1355
+ const sys = messages[0]?.role === 'system' ? [messages[0]] : [];
1356
+ const tail = messages.slice(-2);
1357
+ compacted = [...sys, ...tail];
1358
+ }
1359
+ else {
1360
+ compacted = enforceContextBudget({
1361
+ messages,
1362
+ contextWindow,
1363
+ maxTokens,
1364
+ minTailMessages: opts?.force ? 2 : 12,
1365
+ compactAt: opts?.force ? 0.5 : (cfg.compact_at ?? 0.8),
1366
+ toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
1367
+ force: opts?.force,
1368
+ });
1369
+ }
1370
+ const compactedByRefs = new Set(compacted);
1371
+ let dropped = messages.filter((m) => !compactedByRefs.has(m));
1372
+ if (opts?.topic) {
1373
+ const topic = opts.topic.toLowerCase();
1374
+ dropped = dropped.filter((m) => !userContentToText(m.content ?? '').toLowerCase().includes(topic));
1375
+ const keepFromTopic = messages.filter((m) => userContentToText(m.content ?? '').toLowerCase().includes(topic));
1376
+ compacted = [...compacted, ...keepFromTopic.filter((m) => !compactedByRefs.has(m))];
1377
+ }
1378
+ const archivedToolMessages = dropped.filter((m) => m.role === 'tool').length;
1379
+ const afterMessages = compacted.length;
1380
+ const afterTokens = estimateTokensFromMessages(compacted);
1381
+ const freedTokens = Math.max(0, beforeTokens - afterTokens);
1382
+ if (!opts?.dry) {
1383
+ if (dropped.length && vault) {
1384
+ try {
1385
+ // Store the original/current user prompt before compaction so it survives context loss.
1386
+ let userPromptToPreserve = null;
1387
+ for (let i = messages.length - 1; i >= 0; i--) {
1388
+ const m = messages[i];
1389
+ if (m.role === 'user') {
1390
+ const text = userContentToText((m.content ?? '')).trim();
1391
+ if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
1392
+ userPromptToPreserve = text;
1393
+ break;
1394
+ }
1237
1395
  }
1238
1396
  }
1397
+ if (userPromptToPreserve) {
1398
+ await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
1399
+ }
1400
+ await vault.archiveToolMessages(dropped, new Map());
1401
+ await vault.note('compaction_summary', `Dropped ${dropped.length} messages (${freedTokens} tokens).`);
1239
1402
  }
1240
- if (userPromptToPreserve) {
1241
- await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
1403
+ catch {
1404
+ // best-effort
1242
1405
  }
1243
- await vault.archiveToolMessages(dropped, new Map());
1244
- await vault.note('compaction_summary', `Dropped ${dropped.length} messages (${freedTokens} tokens).`);
1245
1406
  }
1246
- catch {
1247
- // best-effort
1407
+ messages = compacted;
1408
+ if (dropped.length) {
1409
+ messages.push({ role: 'system', content: buildCompactionSystemNote('manual', dropped.length) });
1410
+ await injectVaultContext().catch(() => { });
1411
+ if (opts?.reason || opts?.force) {
1412
+ injectCompactionReminder(opts?.reason ?? 'history compaction');
1413
+ }
1248
1414
  }
1249
1415
  }
1250
- messages = compacted;
1251
- if (dropped.length) {
1252
- messages.push({ role: 'system', content: `[compacted: ${dropped.length} messages archived to Vault - vault_search to recall]` });
1253
- await injectVaultContext().catch(() => { });
1254
- }
1255
- }
1256
- return {
1257
- beforeMessages,
1258
- afterMessages,
1259
- freedTokens,
1260
- archivedToolMessages,
1261
- droppedMessages: dropped.length,
1262
- dryRun: !!opts?.dry,
1263
- };
1416
+ return {
1417
+ beforeMessages,
1418
+ afterMessages,
1419
+ freedTokens,
1420
+ archivedToolMessages,
1421
+ droppedMessages: dropped.length,
1422
+ dryRun: !!opts?.dry,
1423
+ };
1424
+ });
1264
1425
  };
1265
1426
  const cumulativeUsage = { prompt: 0, completion: 0 };
1266
1427
  const turnDurationsMs = [];
@@ -1269,6 +1430,17 @@ export async function createSession(opts) {
1269
1430
  const tgSamples = [];
1270
1431
  let lastTurnMetrics;
1271
1432
  let lastServerHealth;
1433
+ let lastToolLoopStats = { totalHistory: 0, signatures: [], outcomes: [], telemetry: {
1434
+ callsRegistered: 0,
1435
+ dedupedReplays: 0,
1436
+ readCacheLookups: 0,
1437
+ readCacheHits: 0,
1438
+ warnings: 0,
1439
+ criticals: 0,
1440
+ recoveryRecommended: 0,
1441
+ readCacheHitRate: 0,
1442
+ dedupeRate: 0,
1443
+ } };
1272
1444
  let lastModelsProbeMs = 0;
1273
1445
  const capturesDir = path.join(stateDir(), 'captures');
1274
1446
  let captureEnabled = false;
@@ -1617,6 +1789,10 @@ export async function createSession(opts) {
1617
1789
  await hookObj.onToolResult?.(result);
1618
1790
  await hookManager.emit('tool_result', { askId, turn: turns, result });
1619
1791
  };
1792
+ const emitToolLoop = async (loop) => {
1793
+ await hookObj.onToolLoop?.(loop);
1794
+ await hookManager.emit('tool_loop', { askId, turn: turns, loop });
1795
+ };
1620
1796
  const emitTurnEnd = async (stats) => {
1621
1797
  await hookObj.onTurnEnd?.(stats);
1622
1798
  await hookManager.emit('turn_end', { askId, stats });
@@ -1627,6 +1803,8 @@ export async function createSession(opts) {
1627
1803
  return { text: finalText, turns, toolCalls };
1628
1804
  };
1629
1805
  const rawInstructionText = userContentToText(instruction).trim();
1806
+ lastAskInstructionText = rawInstructionText;
1807
+ lastCompactionReminderObjective = '';
1630
1808
  await hookManager.emit('ask_start', { askId, instruction: rawInstructionText });
1631
1809
  const projectDir = cfg.dir ?? process.cwd();
1632
1810
  const reviewKeys = reviewArtifactKeys(projectDir);
@@ -1750,12 +1928,50 @@ export async function createSession(opts) {
1750
1928
  const blockedExecAttemptsBySig = new Map();
1751
1929
  // Cache successful read-only exec observations by exact signature.
1752
1930
  const execObservationCacheBySig = new Map();
1931
+ // Cache ALL successful exec results so repeated identical calls under context
1932
+ // pressure can replay the cached result instead of re-executing.
1933
+ const lastExecResultBySig = new Map();
1934
+ // Cache successful read_file/read_files/list_dir results by signature + mtime for invalidation.
1935
+ const readFileCacheBySig = new Map();
1936
+ const READ_FILE_CACHE_TOOLS = new Set(['read_file', 'read_files', 'list_dir']);
1937
+ const toolLoopGuard = new ToolLoopGuard({
1938
+ enabled: cfg.tool_loop_detection?.enabled,
1939
+ historySize: cfg.tool_loop_detection?.history_size,
1940
+ warningThreshold: cfg.tool_loop_detection?.warning_threshold,
1941
+ criticalThreshold: cfg.tool_loop_detection?.critical_threshold,
1942
+ globalCircuitBreakerThreshold: cfg.tool_loop_detection?.global_circuit_breaker_threshold,
1943
+ readCacheTtlMs: cfg.tool_loop_detection?.read_cache_ttl_ms,
1944
+ detectors: {
1945
+ genericRepeat: cfg.tool_loop_detection?.detectors?.generic_repeat,
1946
+ knownPollNoProgress: cfg.tool_loop_detection?.detectors?.known_poll_no_progress,
1947
+ pingPong: cfg.tool_loop_detection?.detectors?.ping_pong,
1948
+ },
1949
+ perTool: Object.fromEntries(Object.entries(cfg.tool_loop_detection?.per_tool ?? {}).map(([tool, policy]) => [
1950
+ tool,
1951
+ {
1952
+ warningThreshold: policy?.warning_threshold,
1953
+ criticalThreshold: policy?.critical_threshold,
1954
+ globalCircuitBreakerThreshold: policy?.global_circuit_breaker_threshold,
1955
+ detectors: {
1956
+ genericRepeat: policy?.detectors?.generic_repeat,
1957
+ knownPollNoProgress: policy?.detectors?.known_poll_no_progress,
1958
+ pingPong: policy?.detectors?.ping_pong,
1959
+ },
1960
+ },
1961
+ ])),
1962
+ });
1963
+ const toolLoopWarningKeys = new Set();
1964
+ let forceToollessRecoveryTurn = false;
1965
+ let toollessRecoveryUsed = false;
1753
1966
  // Prevent repeating the same "stop rerunning" reminder every turn.
1754
1967
  const readOnlyExecHintedSigs = new Set();
1755
1968
  // Keep a lightweight breadcrumb for diagnostics on partial failures.
1756
1969
  let lastSuccessfulTestRun = null;
1757
1970
  // One-time nudge to prevent post-success churn after green test runs.
1758
1971
  let finalizeAfterTestsNudgeUsed = false;
1972
+ // Recover once/twice from server-side context-overflow 400/413s by forcing compaction and retrying.
1973
+ let overflowCompactionAttempts = 0;
1974
+ const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 2;
1759
1975
  const archiveToolOutputForVault = async (msg) => {
1760
1976
  if (!lens || !vault || msg.role !== 'tool' || typeof msg.content !== 'string')
1761
1977
  return msg;
@@ -1869,50 +2085,63 @@ export async function createSession(opts) {
1869
2085
  throw new Error(`session timeout exceeded (${cfg.timeout}s) after ${wallElapsed.toFixed(1)}s`);
1870
2086
  }
1871
2087
  await maybeAutoDetectModelChange();
1872
- const beforeMsgs = messages;
1873
- const compacted = enforceContextBudget({
1874
- messages: beforeMsgs,
1875
- contextWindow,
1876
- maxTokens: maxTokens,
1877
- minTailMessages: 12,
1878
- compactAt: cfg.compact_at ?? 0.8,
1879
- toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
1880
- });
1881
- const compactedByRefs = new Set(compacted);
1882
- const dropped = beforeMsgs.filter((m) => !compactedByRefs.has(m));
1883
- if (dropped.length && vault) {
1884
- try {
1885
- // Store the original/current user prompt before compaction so it survives context loss.
1886
- // Find the last substantive user message that looks like a task/instruction.
1887
- let userPromptToPreserve = null;
1888
- for (let i = beforeMsgs.length - 1; i >= 0; i--) {
1889
- const m = beforeMsgs[i];
1890
- if (m.role === 'user') {
1891
- const text = userContentToText((m.content ?? '')).trim();
1892
- // Skip vault injection messages and short prompts
1893
- if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
1894
- userPromptToPreserve = text;
1895
- break;
2088
+ await runCompactionWithLock('auto context-budget compaction', async () => {
2089
+ const beforeMsgs = messages;
2090
+ const beforeTokens = estimateTokensFromMessages(beforeMsgs);
2091
+ const compacted = enforceContextBudget({
2092
+ messages: beforeMsgs,
2093
+ contextWindow,
2094
+ maxTokens: maxTokens,
2095
+ minTailMessages: 12,
2096
+ compactAt: cfg.compact_at ?? 0.8,
2097
+ toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
2098
+ });
2099
+ const compactedByRefs = new Set(compacted);
2100
+ const dropped = beforeMsgs.filter((m) => !compactedByRefs.has(m));
2101
+ if (dropped.length && vault) {
2102
+ try {
2103
+ // Store the original/current user prompt before compaction so it survives context loss.
2104
+ // Find the last substantive user message that looks like a task/instruction.
2105
+ let userPromptToPreserve = null;
2106
+ for (let i = beforeMsgs.length - 1; i >= 0; i--) {
2107
+ const m = beforeMsgs[i];
2108
+ if (m.role === 'user') {
2109
+ const text = userContentToText((m.content ?? '')).trim();
2110
+ // Skip vault injection messages and short prompts
2111
+ if (text && !text.startsWith('[Trifecta Vault') && !text.startsWith('[Vault context') && text.length > 20) {
2112
+ userPromptToPreserve = text;
2113
+ break;
2114
+ }
1896
2115
  }
1897
2116
  }
2117
+ if (userPromptToPreserve) {
2118
+ await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
2119
+ }
2120
+ const toArchive = lens
2121
+ ? await Promise.all(dropped.map((m) => archiveToolOutputForVault(m)))
2122
+ : dropped;
2123
+ await vault.archiveToolMessages(toArchive, toolNameByCallId);
1898
2124
  }
1899
- if (userPromptToPreserve) {
1900
- await vault.upsertNote('current_task', userPromptToPreserve.slice(0, 2000), 'system');
2125
+ catch (e) {
2126
+ console.warn(`[warn] vault archive failed: ${e instanceof Error ? e.message : String(e)}`);
1901
2127
  }
1902
- const toArchive = lens
1903
- ? await Promise.all(dropped.map((m) => archiveToolOutputForVault(m)))
1904
- : dropped;
1905
- await vault.archiveToolMessages(toArchive, toolNameByCallId);
1906
2128
  }
1907
- catch (e) {
1908
- console.warn(`[warn] vault archive failed: ${e instanceof Error ? e.message : String(e)}`);
2129
+ messages = compacted;
2130
+ if (dropped.length) {
2131
+ messages.push({ role: 'system', content: buildCompactionSystemNote('auto', dropped.length) });
2132
+ await injectVaultContext().catch(() => { });
2133
+ injectCompactionReminder('auto context-budget compaction');
1909
2134
  }
1910
- }
1911
- messages = compacted;
1912
- if (dropped.length) {
1913
- messages.push({ role: 'system', content: `[auto-compacted: ${dropped.length} old messages dropped to stay within context budget. Do NOT re-read files or re-run commands you have already seen — use vault_search to recall prior results if needed.]` });
1914
- await injectVaultContext().catch(() => { });
1915
- }
2135
+ const afterTokens = estimateTokensFromMessages(compacted);
2136
+ return {
2137
+ beforeMessages: beforeMsgs.length,
2138
+ afterMessages: compacted.length,
2139
+ freedTokens: Math.max(0, beforeTokens - afterTokens),
2140
+ archivedToolMessages: dropped.filter((m) => m.role === 'tool').length,
2141
+ droppedMessages: dropped.length,
2142
+ dryRun: false,
2143
+ };
2144
+ });
1916
2145
  const ac = makeAbortController();
1917
2146
  inFlight = ac;
1918
2147
  // If caller provided an AbortSignal (bench iteration timeout, etc), propagate it.
@@ -1936,20 +2165,45 @@ export async function createSession(opts) {
1936
2165
  };
1937
2166
  let resp;
1938
2167
  try {
1939
- resp = await client.chatStream({
1940
- model,
1941
- messages,
1942
- tools: getToolsSchema(),
1943
- tool_choice: 'auto',
1944
- temperature,
1945
- top_p: topP,
1946
- max_tokens: maxTokens,
1947
- extra: { cache_prompt: cfg.cache_prompt ?? true },
1948
- signal: ac.signal,
1949
- requestId: `r${reqCounter}`,
1950
- onToken: hookObj.onToken,
1951
- onFirstDelta,
1952
- });
2168
+ try {
2169
+ const toolsForTurn = forceToollessRecoveryTurn ? [] : getToolsSchema();
2170
+ const toolChoiceForTurn = forceToollessRecoveryTurn ? 'none' : 'auto';
2171
+ resp = await client.chatStream({
2172
+ model,
2173
+ messages,
2174
+ tools: toolsForTurn,
2175
+ tool_choice: toolChoiceForTurn,
2176
+ temperature,
2177
+ top_p: topP,
2178
+ max_tokens: maxTokens,
2179
+ extra: { cache_prompt: cfg.cache_prompt ?? true },
2180
+ signal: ac.signal,
2181
+ requestId: `r${reqCounter}`,
2182
+ onToken: hookObj.onToken,
2183
+ onFirstDelta,
2184
+ });
2185
+ // Successful response resets overflow recovery budget.
2186
+ overflowCompactionAttempts = 0;
2187
+ }
2188
+ catch (e) {
2189
+ if (isContextWindowExceededError(e) && overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS) {
2190
+ overflowCompactionAttempts++;
2191
+ const useHardCompaction = overflowCompactionAttempts > 1;
2192
+ const compacted = await compactHistory({
2193
+ force: true,
2194
+ hard: useHardCompaction,
2195
+ reason: 'server context-window overflow recovery',
2196
+ });
2197
+ const mode = useHardCompaction ? 'hard' : 'force';
2198
+ messages.push({
2199
+ role: 'system',
2200
+ content: `[auto-recovery] Previous request exceeded model context window. Ran ${mode} compaction ` +
2201
+ `(freed ~${compacted.freedTokens} tokens, dropped ${compacted.droppedMessages} messages). Continue from latest state; do not restart work.`,
2202
+ });
2203
+ continue;
2204
+ }
2205
+ throw e;
2206
+ }
1953
2207
  }
1954
2208
  finally {
1955
2209
  clearTimeout(timer);
@@ -2008,6 +2262,8 @@ export async function createSession(opts) {
2008
2262
  },
2009
2263
  }
2010
2264
  : undefined;
2265
+ const wasToollessRecoveryTurn = forceToollessRecoveryTurn;
2266
+ forceToollessRecoveryTurn = false;
2011
2267
  const choice0 = resp.choices?.[0] ?? legacyChoice;
2012
2268
  const finishReason = choice0?.finish_reason ?? 'unknown';
2013
2269
  const msg = choice0?.message;
@@ -2058,6 +2314,10 @@ export async function createSession(opts) {
2058
2314
  }
2059
2315
  }
2060
2316
  }
2317
+ if (wasToollessRecoveryTurn && toolCallsArr?.length) {
2318
+ // Recovery turn explicitly disables tools; ignore any stray tool-call output.
2319
+ toolCallsArr = undefined;
2320
+ }
2061
2321
  if (cfg.verbose) {
2062
2322
  console.warn(`[turn ${turns}] finish_reason=${finishReason} content_chars=${content.length} visible_chars=${visible.length} tool_calls=${toolCallsArr?.length ?? 0}`);
2063
2323
  }
@@ -2136,12 +2396,17 @@ export async function createSession(opts) {
2136
2396
  // narration chunk starts on a fresh line (avoids wall-of-text output).
2137
2397
  if (visible && hookObj.onToken)
2138
2398
  hookObj.onToken('\n');
2139
- toolCalls += toolCallsArr.length;
2399
+ const originalToolCallsArr = toolCallsArr;
2400
+ const preparedTurn = toolLoopGuard.prepareTurn(originalToolCallsArr);
2401
+ const replayByCallId = preparedTurn.replayByCallId;
2402
+ const parsedArgsByCallId = preparedTurn.parsedArgsByCallId;
2403
+ toolCallsArr = preparedTurn.uniqueCalls;
2404
+ toolCalls += originalToolCallsArr.length;
2140
2405
  const assistantToolCallText = visible || '';
2141
2406
  const compactAssistantToolCallText = assistantToolCallText.length > 900
2142
2407
  ? `${assistantToolCallText.slice(0, 900)}\n[history-compacted: assistant narration truncated before tool execution]`
2143
2408
  : assistantToolCallText;
2144
- messages.push({ role: 'assistant', content: compactAssistantToolCallText, tool_calls: toolCallsArr });
2409
+ messages.push({ role: 'assistant', content: compactAssistantToolCallText, tool_calls: originalToolCallsArr });
2145
2410
  // sigCounts is scoped to the entire ask() run (see above)
2146
2411
  // Bridge ConfirmationProvider → legacy confirm callback for tools.
2147
2412
  // If a ConfirmationProvider is given, wrap it; otherwise fall back to raw callback.
@@ -2185,18 +2450,64 @@ export async function createSession(opts) {
2185
2450
  // We only treat repeated exec as a loop if no file mutations happened since the
2186
2451
  // last time we saw that exact exec signature.
2187
2452
  const turnSigs = new Set();
2453
+ const sigMetaBySig = new Map();
2188
2454
  for (const tc of toolCallsArr) {
2189
- const sig = `${tc.function.name}:${tc.function.arguments ?? '{}'}`;
2455
+ const callId = resolveCallId(tc);
2456
+ const parsedArgs = parsedArgsByCallId.get(callId) ?? {};
2457
+ const sig = toolLoopGuard.computeSignature(tc.function.name, parsedArgs);
2190
2458
  turnSigs.add(sig);
2459
+ if (!sigMetaBySig.has(sig)) {
2460
+ sigMetaBySig.set(sig, { toolName: tc.function.name, args: parsedArgs });
2461
+ }
2191
2462
  }
2192
2463
  // Repeated read-only exec calls can be served from cache instead of hard-breaking.
2193
2464
  const repeatedReadOnlyExecSigs = new Set();
2194
2465
  const readOnlyExecTurnHints = [];
2466
+ // Repeated exec calls (any kind) can replay cached results under pressure.
2467
+ const replayExecSigs = new Set();
2468
+ // Repeated read_file/read_files/list_dir calls can be served from cache.
2469
+ const repeatedReadFileSigs = new Set();
2470
+ let shouldForceToollessRecovery = false;
2471
+ const criticalLoopSigs = new Set();
2472
+ for (const tc of toolCallsArr) {
2473
+ const callId = resolveCallId(tc);
2474
+ const args = parsedArgsByCallId.get(callId) ?? {};
2475
+ const detected = toolLoopGuard.detect(tc.function.name, args);
2476
+ const warning = toolLoopGuard.formatWarning(detected, tc.function.name);
2477
+ if (warning) {
2478
+ const warningKey = `${warning.level}:${warning.detector}:${detected.signature}`;
2479
+ if (!toolLoopWarningKeys.has(warningKey)) {
2480
+ toolLoopWarningKeys.add(warningKey);
2481
+ await emitToolLoop({
2482
+ level: warning.level,
2483
+ detector: warning.detector,
2484
+ toolName: warning.toolName,
2485
+ count: warning.count,
2486
+ message: warning.message,
2487
+ });
2488
+ messages.push({
2489
+ role: 'system',
2490
+ content: `[tool-loop ${warning.level}] ${warning.message}. Stop repeating ${warning.toolName} with unchanged inputs; continue with analysis or next step.`,
2491
+ });
2492
+ }
2493
+ }
2494
+ if (toolLoopGuard.shouldDisableToolsNextTurn(detected)) {
2495
+ shouldForceToollessRecovery = true;
2496
+ criticalLoopSigs.add(detected.signature);
2497
+ }
2498
+ }
2195
2499
  // Track whether a mutation happened since a given signature was last seen.
2196
2500
  // (Tool-loop is single-threaded across turns; this is safe to keep in-memory.)
2197
2501
  for (const sig of turnSigs) {
2198
2502
  sigCounts.set(sig, (sigCounts.get(sig) ?? 0) + 1);
2199
- const toolName = sig.split(':')[0];
2503
+ const sigMeta = sigMetaBySig.get(sig);
2504
+ const toolName = sigMeta?.toolName ?? sig.split(':')[0];
2505
+ if (criticalLoopSigs.has(sig)) {
2506
+ // Critical detector already fired for this signature; recover next turn
2507
+ // with tools disabled instead of throwing in per-tool hard-break logic.
2508
+ shouldForceToollessRecovery = true;
2509
+ continue;
2510
+ }
2200
2511
  // For exec loops, only break if nothing changed since last identical exec.
2201
2512
  if (toolName === 'exec') {
2202
2513
  // If this exact exec signature was seen before, record the mutation version at that time.
@@ -2207,6 +2518,18 @@ export async function createSession(opts) {
2207
2518
  mutationVersionBySig.set(sig, mutationVersion);
2208
2519
  if (!hasMutatedSince) {
2209
2520
  const count = sigCounts.get(sig) ?? 0;
2521
+ // Early replay: if this exact exec was already run (count >= 2) and
2522
+ // we have a cached result, replay it instead of re-executing. This
2523
+ // prevents the compaction death spiral where tool results get dropped,
2524
+ // the model forgets it ran the command, and re-runs it endlessly.
2525
+ // Skip read-only commands that already have their own observation cache —
2526
+ // those are handled by the dedicated read-only path at loopThreshold.
2527
+ const command = execCommandFromSig(sig);
2528
+ const hasReadOnlyCache = looksLikeReadOnlyExecCommand(command) && execObservationCacheBySig.has(sig);
2529
+ if (count >= 2 && lastExecResultBySig.has(sig) && !hasReadOnlyCache) {
2530
+ replayExecSigs.add(sig);
2531
+ continue;
2532
+ }
2210
2533
  let loopThreshold = harness.quirks.loopsOnToolError ? 3 : 6;
2211
2534
  // If the cached observation already tells the model "no matches found",
2212
2535
  // break much earlier — the model is ignoring the hint.
@@ -2219,7 +2542,8 @@ export async function createSession(opts) {
2219
2542
  await injectVaultContext().catch(() => { });
2220
2543
  }
2221
2544
  if (count >= loopThreshold) {
2222
- const command = execCommandFromSig(sig);
2545
+ const sigArgs = sigMetaBySig.get(sig)?.args ?? {};
2546
+ const command = typeof sigArgs?.command === 'string' ? String(sigArgs.command) : '';
2223
2547
  const canReuseReadOnlyObservation = looksLikeReadOnlyExecCommand(command) &&
2224
2548
  execObservationCacheBySig.has(sig);
2225
2549
  if (canReuseReadOnlyObservation) {
@@ -2230,8 +2554,8 @@ export async function createSession(opts) {
2230
2554
  }
2231
2555
  continue;
2232
2556
  }
2233
- const args = sig.slice(toolName.length + 1);
2234
- const argsPreview = args.length > 220 ? args.slice(0, 220) + '…' : args;
2557
+ const argsPreviewRaw = JSON.stringify(sigArgs);
2558
+ const argsPreview = argsPreviewRaw.length > 220 ? argsPreviewRaw.slice(0, 220) + '…' : argsPreviewRaw;
2235
2559
  throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
2236
2560
  `args=${argsPreview}`);
2237
2561
  }
@@ -2250,7 +2574,9 @@ export async function createSession(opts) {
2250
2574
  consecutiveCounts.set(sig, 1);
2251
2575
  }
2252
2576
  const consec = consecutiveCounts.get(sig) ?? 1;
2253
- // At 3x, inject vault context and a strong warning before the hard break at 4x.
2577
+ const isReadFileTool = READ_FILE_CACHE_TOOLS.has(toolName);
2578
+ const hardBreakAt = isReadFileTool ? 6 : 4;
2579
+ // At 3x, inject vault context and first warning
2254
2580
  if (consec >= 3) {
2255
2581
  await injectVaultContext().catch(() => { });
2256
2582
  if (consec === 3) {
@@ -2272,18 +2598,45 @@ export async function createSession(opts) {
2272
2598
  }
2273
2599
  }
2274
2600
  }
2275
- // Hard-break: after 4 consecutive identical reads, stop the session
2276
- if (consec >= 4) {
2277
- throw new Error(`tool ${toolName}: identical read repeated ${consec}x consecutively; breaking loop. ` +
2278
- `The resource content has not changed between reads.`);
2601
+ // At 2x, serve from cache if available AND inject final warning
2602
+ if (consec >= 2 && isReadFileTool) {
2603
+ if (consec === 4) {
2604
+ let resourceType = 'resource';
2605
+ if (toolName === 'read_file')
2606
+ resourceType = 'file';
2607
+ else if (toolName === 'read_files')
2608
+ resourceType = 'files';
2609
+ else if (toolName === 'list_dir')
2610
+ resourceType = 'directory';
2611
+ messages.push({
2612
+ role: 'system',
2613
+ content: `CRITICAL: DO NOT make another identical call for this ${resourceType}. It HAS NOT CHANGED. You already have the content. Move on to the NEXT step NOW.`,
2614
+ });
2615
+ }
2616
+ const argsForSig = sigMetaBySig.get(sig)?.args ?? {};
2617
+ const replay = await toolLoopGuard.getReadCacheReplay(toolName, argsForSig, ctx.cwd);
2618
+ if (replay) {
2619
+ repeatedReadFileSigs.add(sig);
2620
+ continue;
2621
+ }
2622
+ }
2623
+ // Deterministic recovery at threshold (no hard throw): force one no-tools turn.
2624
+ if (consec >= hardBreakAt) {
2625
+ shouldForceToollessRecovery = true;
2626
+ messages.push({
2627
+ role: 'system',
2628
+ content: `[tool-loop critical] ${toolName} repeated ${consec}x with unchanged inputs. ` +
2629
+ 'Next turn will run with tools disabled so you must use existing results and provide a concrete next step/final response.',
2630
+ });
2279
2631
  }
2280
2632
  continue;
2281
2633
  }
2282
2634
  // Default behavior for mutating/other tools: break on repeated identical signature.
2283
2635
  const loopThreshold = harness.quirks.loopsOnToolError ? 2 : 3;
2284
2636
  if ((sigCounts.get(sig) ?? 0) >= loopThreshold) {
2285
- const args = sig.slice(toolName.length + 1);
2286
- const argsPreview = args.length > 220 ? args.slice(0, 220) + '…' : args;
2637
+ const argsObj = sigMetaBySig.get(sig)?.args ?? {};
2638
+ const argsRaw = JSON.stringify(argsObj);
2639
+ const argsPreview = argsRaw.length > 220 ? argsRaw.slice(0, 220) + '…' : argsRaw;
2287
2640
  throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
2288
2641
  `args=${argsPreview}\n` +
2289
2642
  `Hint: you repeated the same tool call ${loopThreshold} times with identical arguments. ` +
@@ -2294,6 +2647,31 @@ export async function createSession(opts) {
2294
2647
  }
2295
2648
  // Update consecutive tracking: save this turn's signatures for next turn comparison.
2296
2649
  lastTurnSigs = turnSigs;
2650
+ if (shouldForceToollessRecovery) {
2651
+ if (!toollessRecoveryUsed) {
2652
+ forceToollessRecoveryTurn = true;
2653
+ toollessRecoveryUsed = true;
2654
+ messages.push({
2655
+ role: 'user',
2656
+ content: '[system] Critical tool loop detected. Next turn will run with tools disabled. ' +
2657
+ 'Use already available tool results to provide a concrete next step or final response; do not request more tools.',
2658
+ });
2659
+ await emitTurnEnd({
2660
+ turn: turns,
2661
+ toolCalls,
2662
+ promptTokens: cumulativeUsage.prompt,
2663
+ completionTokens: cumulativeUsage.completion,
2664
+ promptTokensTurn,
2665
+ completionTokensTurn,
2666
+ ttftMs,
2667
+ ttcMs,
2668
+ ppTps,
2669
+ tgTps,
2670
+ });
2671
+ continue;
2672
+ }
2673
+ throw new AgentLoopBreak('critical tool-loop persisted after one tools-disabled recovery turn. Stopping to avoid infinite loop.');
2674
+ }
2297
2675
  const runOne = async (tc) => {
2298
2676
  const name = tc.function.name;
2299
2677
  const rawArgs = tc.function.arguments ?? '{}';
@@ -2323,6 +2701,7 @@ export async function createSession(opts) {
2323
2701
  throw new Error(`unknown tool: ${name}`);
2324
2702
  // Keep parsed args by call-id so we can digest/archive tool outputs with context.
2325
2703
  toolArgsByCallId.set(callId, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
2704
+ toolLoopGuard.registerCall(name, args && typeof args === 'object' && !Array.isArray(args) ? args : {}, callId);
2326
2705
  // Pre-dispatch argument validation.
2327
2706
  // - Required params
2328
2707
  // - Type/range/enums
@@ -2427,9 +2806,10 @@ export async function createSession(opts) {
2427
2806
  return { id: callId, content: '[skipped by user: step mode]' };
2428
2807
  }
2429
2808
  }
2430
- const sig = `${name}:${rawArgs || '{}'}`;
2809
+ const sig = toolLoopGuard.computeSignature(name, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
2431
2810
  let content = '';
2432
2811
  let reusedCachedReadOnlyExec = false;
2812
+ let reusedCachedReadTool = false;
2433
2813
  if (name === 'exec' && repeatedReadOnlyExecSigs.has(sig)) {
2434
2814
  const cached = execObservationCacheBySig.get(sig);
2435
2815
  if (cached) {
@@ -2437,7 +2817,22 @@ export async function createSession(opts) {
2437
2817
  reusedCachedReadOnlyExec = true;
2438
2818
  }
2439
2819
  }
2440
- if (!reusedCachedReadOnlyExec) {
2820
+ // Replay any exec result (read-only or not) when the loop detector flagged it.
2821
+ if (name === 'exec' && !reusedCachedReadOnlyExec && replayExecSigs.has(sig)) {
2822
+ const cached = lastExecResultBySig.get(sig);
2823
+ if (cached) {
2824
+ content = withReplayedExecHint(cached);
2825
+ reusedCachedReadOnlyExec = true; // skip re-execution below
2826
+ }
2827
+ }
2828
+ if (READ_FILE_CACHE_TOOLS.has(name) && repeatedReadFileSigs.has(sig)) {
2829
+ const replay = await toolLoopGuard.getReadCacheReplay(name, args, ctx.cwd);
2830
+ if (replay) {
2831
+ content = replay;
2832
+ reusedCachedReadTool = true;
2833
+ }
2834
+ }
2835
+ if (!reusedCachedReadOnlyExec && !reusedCachedReadTool) {
2441
2836
  if (isSpawnTask) {
2442
2837
  content = await runSpawnTask(args);
2443
2838
  }
@@ -2450,9 +2845,16 @@ export async function createSession(opts) {
2450
2845
  };
2451
2846
  const value = await builtInFn(callCtx, args);
2452
2847
  content = typeof value === 'string' ? value : JSON.stringify(value);
2848
+ if (READ_FILE_CACHE_TOOLS.has(name) && typeof content === 'string' && !content.startsWith('ERROR:')) {
2849
+ const baseCwd = typeof args?.cwd === 'string' ? String(args.cwd) : ctx.cwd;
2850
+ await toolLoopGuard.storeReadCache(name, args, baseCwd, content);
2851
+ }
2453
2852
  if (name === 'exec') {
2454
2853
  // Successful exec clears blocked-loop counters.
2455
2854
  blockedExecAttemptsBySig.clear();
2855
+ // Cache every exec result so repeated calls under context pressure
2856
+ // can replay the result instead of re-executing.
2857
+ lastExecResultBySig.set(sig, content);
2456
2858
  const cmd = String(args?.command ?? '');
2457
2859
  if (looksLikeReadOnlyExecCommand(cmd) && readOnlyExecCacheable(content)) {
2458
2860
  execObservationCacheBySig.set(sig, content);
@@ -2591,6 +2993,10 @@ export async function createSession(opts) {
2591
2993
  }
2592
2994
  }
2593
2995
  }
2996
+ toolLoopGuard.registerOutcome(name, args, {
2997
+ toolCallId: callId,
2998
+ result: content,
2999
+ });
2594
3000
  return { id: callId, content };
2595
3001
  };
2596
3002
  const results = [];
@@ -2611,7 +3017,8 @@ export async function createSession(opts) {
2611
3017
  // Applies to direct exec attempts and spawn_task delegation attempts.
2612
3018
  if (tc.function.name === 'exec' || tc.function.name === 'spawn_task') {
2613
3019
  const blockedMatch = msg.match(/^exec:\s*blocked\s*\(([^)]+)\)\s*without --no-confirm\/--yolo:\s*(.*)$/i)
2614
- || msg.match(/^(spawn_task):\s*blocked\s*—\s*(.*)$/i);
3020
+ || msg.match(/^(spawn_task):\s*blocked\s*—\s*(.*)$/i)
3021
+ || msg.match(/^exec:\s*blocked\s+(background command\b[^.]*)\./i);
2615
3022
  if (blockedMatch) {
2616
3023
  const reason = (blockedMatch[1] || blockedMatch[2] || 'blocked command').trim();
2617
3024
  let parsedArgs = {};
@@ -2623,7 +3030,8 @@ export async function createSession(opts) {
2623
3030
  ? String(parsedArgs?.command ?? '')
2624
3031
  : String(parsedArgs?.task ?? '');
2625
3032
  const normalizedReason = reason.toLowerCase();
2626
- const aggregateByReason = normalizedReason.includes('package install/remove');
3033
+ const aggregateByReason = normalizedReason.includes('package install/remove')
3034
+ || normalizedReason.includes('background command');
2627
3035
  const sig = aggregateByReason
2628
3036
  ? `${tc.function.name}|${reason}`
2629
3037
  : `${tc.function.name}|${reason}|${cmd}`;
@@ -2645,6 +3053,20 @@ export async function createSession(opts) {
2645
3053
  retryable: te.retryable,
2646
3054
  result: toolErrorContent,
2647
3055
  });
3056
+ let parsedArgs = {};
3057
+ try {
3058
+ const parsed = JSON.parse(tc.function.arguments ?? '{}');
3059
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
3060
+ parsedArgs = parsed;
3061
+ }
3062
+ }
3063
+ catch {
3064
+ // keep empty object
3065
+ }
3066
+ toolLoopGuard.registerOutcome(tc.function.name, parsedArgs, {
3067
+ toolCallId: callId,
3068
+ error: msg,
3069
+ });
2648
3070
  return { id: callId, content: toolErrorContent };
2649
3071
  };
2650
3072
  // ── Anti-scan guardrails (§ read budget, dir scan, same-search) ──
@@ -2708,6 +3130,19 @@ export async function createSession(opts) {
2708
3130
  }
2709
3131
  }
2710
3132
  }
3133
+ if (replayByCallId.size > 0) {
3134
+ const canonicalById = new Map(results.map((r) => [r.id, r.content]));
3135
+ for (const [dupId, canonicalId] of replayByCallId.entries()) {
3136
+ const canonical = canonicalById.get(canonicalId);
3137
+ if (canonical == null)
3138
+ continue;
3139
+ results.push({
3140
+ id: dupId,
3141
+ content: `[idlehands dedupe] Identical tool call replayed from ${canonicalId}. ` +
3142
+ 'Use that earlier tool result; no new execution was performed.',
3143
+ });
3144
+ }
3145
+ }
2711
3146
  // Bail immediately if cancelled during tool execution
2712
3147
  if (ac.signal.aborted)
2713
3148
  break;
@@ -2755,6 +3190,8 @@ export async function createSession(opts) {
2755
3190
  'Do not narrate. Fix required/mistyped fields and unknown keys.',
2756
3191
  });
2757
3192
  }
3193
+ // Update session-level tool loop stats for observability
3194
+ lastToolLoopStats = toolLoopGuard.getStats();
2758
3195
  // Hook: onTurnEnd (Phase 8.5)
2759
3196
  await emitTurnEnd({
2760
3197
  turn: turns,
@@ -2928,6 +3365,7 @@ export async function createSession(opts) {
2928
3365
  listModels,
2929
3366
  refreshServerHealth,
2930
3367
  getPerfSummary,
3368
+ getToolLoopStats: () => lastToolLoopStats,
2931
3369
  captureOn,
2932
3370
  captureOff,
2933
3371
  captureLast,
@@ -2965,6 +3403,9 @@ export async function createSession(opts) {
2965
3403
  get planSteps() {
2966
3404
  return planSteps;
2967
3405
  },
3406
+ get compactionStats() {
3407
+ return { ...compactionStats };
3408
+ },
2968
3409
  executePlanStep,
2969
3410
  clearPlan,
2970
3411
  compactHistory