@visorcraft/idlehands 1.4.6 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/dist/agent/constants.js +12 -0
  2. package/dist/agent/constants.js.map +1 -0
  3. package/dist/agent/context-budget.js +103 -0
  4. package/dist/agent/context-budget.js.map +1 -0
  5. package/dist/agent/errors.js +8 -0
  6. package/dist/agent/errors.js.map +1 -0
  7. package/dist/agent/exec-helpers.js +105 -0
  8. package/dist/agent/exec-helpers.js.map +1 -0
  9. package/dist/agent/model-pick.js +21 -0
  10. package/dist/agent/model-pick.js.map +1 -0
  11. package/dist/agent/session-utils.js +63 -0
  12. package/dist/agent/session-utils.js.map +1 -0
  13. package/dist/agent/subagent-context.js +78 -0
  14. package/dist/agent/subagent-context.js.map +1 -0
  15. package/dist/agent/tool-loop-detection.js +91 -20
  16. package/dist/agent/tool-loop-detection.js.map +1 -1
  17. package/dist/agent/tool-loop-guard.js.map +1 -1
  18. package/dist/agent/tool-policy.js +54 -0
  19. package/dist/agent/tool-policy.js.map +1 -0
  20. package/dist/agent/tools-schema.js +281 -0
  21. package/dist/agent/tools-schema.js.map +1 -0
  22. package/dist/agent.js +191 -641
  23. package/dist/agent.js.map +1 -1
  24. package/dist/anton/controller.js +235 -163
  25. package/dist/anton/controller.js.map +1 -1
  26. package/dist/anton/lint-baseline.js +64 -0
  27. package/dist/anton/lint-baseline.js.map +1 -0
  28. package/dist/anton/preflight.js +7 -0
  29. package/dist/anton/preflight.js.map +1 -1
  30. package/dist/anton/prompt.js +71 -71
  31. package/dist/anton/reporter.js.map +1 -1
  32. package/dist/anton/runtime-ready.js +120 -0
  33. package/dist/anton/runtime-ready.js.map +1 -0
  34. package/dist/anton/session.js +7 -1
  35. package/dist/anton/session.js.map +1 -1
  36. package/dist/anton/verifier-utils.js +148 -0
  37. package/dist/anton/verifier-utils.js.map +1 -0
  38. package/dist/anton/verifier.js +26 -227
  39. package/dist/anton/verifier.js.map +1 -1
  40. package/dist/bot/anton-auto-pin.js +12 -0
  41. package/dist/bot/anton-auto-pin.js.map +1 -0
  42. package/dist/bot/anton-commands.js +137 -0
  43. package/dist/bot/anton-commands.js.map +1 -0
  44. package/dist/bot/anton-run.js +155 -0
  45. package/dist/bot/anton-run.js.map +1 -0
  46. package/dist/bot/anton-status-format.js +18 -0
  47. package/dist/bot/anton-status-format.js.map +1 -0
  48. package/dist/bot/basic-commands.js +114 -0
  49. package/dist/bot/basic-commands.js.map +1 -0
  50. package/dist/bot/command-format.js.map +1 -1
  51. package/dist/bot/command-logic.js +8 -728
  52. package/dist/bot/command-logic.js.map +1 -1
  53. package/dist/bot/commands.js +18 -1
  54. package/dist/bot/commands.js.map +1 -1
  55. package/dist/bot/discord-anton-autopin.js +29 -0
  56. package/dist/bot/discord-anton-autopin.js.map +1 -0
  57. package/dist/bot/discord-anton.js +45 -0
  58. package/dist/bot/discord-anton.js.map +1 -0
  59. package/dist/bot/discord-commands.js +20 -52
  60. package/dist/bot/discord-commands.js.map +1 -1
  61. package/dist/bot/discord-result.js +9 -0
  62. package/dist/bot/discord-result.js.map +1 -0
  63. package/dist/bot/discord-routing.js.map +1 -1
  64. package/dist/bot/discord.js +55 -12
  65. package/dist/bot/discord.js.map +1 -1
  66. package/dist/bot/escalation-commands.js +145 -0
  67. package/dist/bot/escalation-commands.js.map +1 -0
  68. package/dist/bot/escalation.js.map +1 -1
  69. package/dist/bot/format.js +0 -5
  70. package/dist/bot/format.js.map +1 -1
  71. package/dist/bot/git-status-command.js +28 -0
  72. package/dist/bot/git-status-command.js.map +1 -0
  73. package/dist/bot/model-endpoint.js +25 -0
  74. package/dist/bot/model-endpoint.js.map +1 -0
  75. package/dist/bot/session-history.js +61 -0
  76. package/dist/bot/session-history.js.map +1 -0
  77. package/dist/bot/session-settings.js +89 -0
  78. package/dist/bot/session-settings.js.map +1 -0
  79. package/dist/bot/telegram-commands.js +15 -7
  80. package/dist/bot/telegram-commands.js.map +1 -1
  81. package/dist/bot/telegram.js +15 -29
  82. package/dist/bot/telegram.js.map +1 -1
  83. package/dist/cli/agent-turn.js +8 -2
  84. package/dist/cli/agent-turn.js.map +1 -1
  85. package/dist/cli/commands/anton.js +6 -1
  86. package/dist/cli/commands/anton.js.map +1 -1
  87. package/dist/cli/commands/model.js +1 -3
  88. package/dist/cli/commands/model.js.map +1 -1
  89. package/dist/cli/commands/project.js +1 -1
  90. package/dist/cli/commands/project.js.map +1 -1
  91. package/dist/cli/commands/secrets.js +1 -1
  92. package/dist/cli/commands/secrets.js.map +1 -1
  93. package/dist/cli/commands/session.js +22 -12
  94. package/dist/cli/commands/session.js.map +1 -1
  95. package/dist/cli/guided-onboarding.js +20 -0
  96. package/dist/cli/guided-onboarding.js.map +1 -0
  97. package/dist/cli/runtime-cmds.js +8 -133
  98. package/dist/cli/runtime-cmds.js.map +1 -1
  99. package/dist/cli/runtime-common.js +35 -0
  100. package/dist/cli/runtime-common.js.map +1 -0
  101. package/dist/cli/runtime-detect.js +12 -0
  102. package/dist/cli/runtime-detect.js.map +1 -0
  103. package/dist/cli/runtime-host-command.js +7 -0
  104. package/dist/cli/runtime-host-command.js.map +1 -0
  105. package/dist/cli/runtime-probe-defaults.js +63 -0
  106. package/dist/cli/runtime-probe-defaults.js.map +1 -0
  107. package/dist/cli/runtime-scan-ports.js +30 -0
  108. package/dist/cli/runtime-scan-ports.js.map +1 -0
  109. package/dist/cli/setup-bot-step.js +51 -0
  110. package/dist/cli/setup-bot-step.js.map +1 -0
  111. package/dist/cli/setup-runtime-forms.js +214 -0
  112. package/dist/cli/setup-runtime-forms.js.map +1 -0
  113. package/dist/cli/setup-style.js +8 -0
  114. package/dist/cli/setup-style.js.map +1 -0
  115. package/dist/cli/setup-ui.js +146 -0
  116. package/dist/cli/setup-ui.js.map +1 -0
  117. package/dist/cli/setup.js +11 -449
  118. package/dist/cli/setup.js.map +1 -1
  119. package/dist/client/error-utils.js +37 -0
  120. package/dist/client/error-utils.js.map +1 -0
  121. package/dist/client/pressure.js +77 -0
  122. package/dist/client/pressure.js.map +1 -0
  123. package/dist/client.js +24 -122
  124. package/dist/client.js.map +1 -1
  125. package/dist/config.js +31 -14
  126. package/dist/config.js.map +1 -1
  127. package/dist/git.js +8 -2
  128. package/dist/git.js.map +1 -1
  129. package/dist/history.js +418 -0
  130. package/dist/history.js.map +1 -1
  131. package/dist/hooks/types.js.map +1 -1
  132. package/dist/index.js.map +1 -1
  133. package/dist/progress/message-edit-scheduler.js.map +1 -1
  134. package/dist/progress/turn-progress.js.map +1 -1
  135. package/dist/runtime/executor.js +4 -1
  136. package/dist/runtime/executor.js.map +1 -1
  137. package/dist/runtime/health.js.map +1 -1
  138. package/dist/runtime/host-runner.js.map +1 -1
  139. package/dist/safety.js +3 -2
  140. package/dist/safety.js.map +1 -1
  141. package/dist/shared/config-utils.js.map +1 -1
  142. package/dist/tools/exec-core.js +252 -0
  143. package/dist/tools/exec-core.js.map +1 -0
  144. package/dist/tools/exec-pty.js +89 -0
  145. package/dist/tools/exec-pty.js.map +1 -0
  146. package/dist/tools/exec-utils.js +94 -0
  147. package/dist/tools/exec-utils.js.map +1 -0
  148. package/dist/tools/file-discovery.js +144 -0
  149. package/dist/tools/file-discovery.js.map +1 -0
  150. package/dist/tools/file-mutations.js +326 -0
  151. package/dist/tools/file-mutations.js.map +1 -0
  152. package/dist/tools/file-read.js +133 -0
  153. package/dist/tools/file-read.js.map +1 -0
  154. package/dist/tools/patch-apply.js +168 -0
  155. package/dist/tools/patch-apply.js.map +1 -0
  156. package/dist/tools/path-safety.js.map +1 -1
  157. package/dist/tools/replay-utils.js +25 -0
  158. package/dist/tools/replay-utils.js.map +1 -0
  159. package/dist/tools/search-utils.js +55 -0
  160. package/dist/tools/search-utils.js.map +1 -0
  161. package/dist/tools/sys-notes.js +34 -0
  162. package/dist/tools/sys-notes.js.map +1 -0
  163. package/dist/tools/text-utils.js +164 -0
  164. package/dist/tools/text-utils.js.map +1 -0
  165. package/dist/tools/undo.js +1 -1
  166. package/dist/tools/undo.js.map +1 -1
  167. package/dist/tools/vault-tools.js +36 -0
  168. package/dist/tools/vault-tools.js.map +1 -0
  169. package/dist/tools.js +19 -1460
  170. package/dist/tools.js.map +1 -1
  171. package/dist/tui/controller.js +5 -2
  172. package/dist/tui/controller.js.map +1 -1
  173. package/dist/tui/render.js.map +1 -1
  174. package/dist/utils.js +2 -2
  175. package/dist/utils.js.map +1 -1
  176. package/dist/vault.js +134 -1
  177. package/dist/vault.js.map +1 -1
  178. package/dist/watchdog.js +1 -3
  179. package/dist/watchdog.js.map +1 -1
  180. package/package.json +2 -1
package/dist/agent.js CHANGED
@@ -1,14 +1,23 @@
1
1
  import fs from 'node:fs/promises';
2
2
  import path from 'node:path';
3
- import { generateMinimalDiff, toolResultSummary, execCommandFromSig, formatDurationMs, looksLikePlanningNarration, capTextByApproxTokens, isLikelyBinaryBuffer, sanitizePathsInMessage, digestToolResult, } from './agent/formatting.js';
3
+ import { DEFAULT_SUB_AGENT_RESULT_TOKEN_CAP, DEFAULT_SUB_AGENT_SYSTEM_PROMPT, MCP_TOOLS_REQUEST_TOKEN, } from './agent/constants.js';
4
+ import { AgentLoopBreak } from './agent/errors.js';
5
+ import { execRcShouldSignalFailure, looksLikeReadOnlyExecCommand, readOnlyExecCacheable, withCachedExecObservationHint, withReplayedExecHint, } from './agent/exec-helpers.js';
6
+ import { generateMinimalDiff, toolResultSummary, execCommandFromSig, formatDurationMs, looksLikePlanningNarration, capTextByApproxTokens, sanitizePathsInMessage, digestToolResult, } from './agent/formatting.js';
7
+ import { autoPickModel } from './agent/model-pick.js';
4
8
  import { reviewArtifactKeys, looksLikeCodeReviewRequest, looksLikeReviewRetrievalRequest, retrievalAllowsStaleArtifact, parseReviewArtifactStalePolicy, parseReviewArtifact, reviewArtifactStaleReason, gitHead, normalizeModelsResponse, } from './agent/review-artifact.js';
9
+ import { capApprovalMode, ensureInformativeAssistantText, isContextWindowExceededError, makeAbortController, userContentToText, userDisallowsDelegation, } from './agent/session-utils.js';
10
+ import { buildSubAgentContextBlock, extractLensBody } from './agent/subagent-context.js';
5
11
  import { parseToolCallsFromContent, getMissingRequiredParams, getArgValidationIssues, stripMarkdownFences, parseJsonArgs, } from './agent/tool-calls.js';
6
12
  import { ToolLoopGuard } from './agent/tool-loop-guard.js';
13
+ import { isLspTool, isMutationTool, isReadOnlyTool, planModeSummary } from './agent/tool-policy.js';
14
+ import { buildToolsSchema } from './agent/tools-schema.js';
7
15
  import { OpenAIClient } from './client.js';
8
16
  import { loadProjectContext } from './context.js';
9
17
  import { loadGitContext, isGitDirty, stashWorkingTree } from './git.js';
10
18
  import { selectHarness } from './harnesses.js';
11
19
  import { enforceContextBudget, stripThinking, estimateTokensFromMessages, estimateToolSchemaTokens, } from './history.js';
20
+ import { truncateToolResultContent } from './agent/context-budget.js';
12
21
  import { HookManager, loadHookPlugins } from './hooks/index.js';
13
22
  import { projectIndexKeys, parseIndexMeta, isFreshIndex, indexSummaryLine } from './indexer.js';
14
23
  import { LensStore } from './lens.js';
@@ -18,148 +27,12 @@ import { BASE_MAX_TOKENS, deriveContextWindow, deriveGenerationParams, supportsV
18
27
  import { ReplayStore } from './replay.js';
19
28
  import { checkExecSafety, checkPathSafety } from './safety.js';
20
29
  import { normalizeApprovalMode } from './shared/config-utils.js';
21
- import { SYS_CONTEXT_SCHEMA, collectSnapshot } from './sys/context.js';
30
+ import { collectSnapshot } from './sys/context.js';
22
31
  import { ToolError, ValidationError } from './tools/tool-error.js';
23
32
  import * as tools from './tools.js';
24
33
  import { stateDir, timestampedId } from './utils.js';
25
34
  import { VaultStore } from './vault.js';
26
35
  export { parseToolCallsFromContent };
27
- function makeAbortController() {
28
- // Node 24: AbortController is global.
29
- return new AbortController();
30
- }
31
- const CACHED_EXEC_OBSERVATION_HINT = '[idlehands hint] Reused cached output for repeated read-only exec call (unchanged observation).';
32
- function looksLikeReadOnlyExecCommand(command) {
33
- // Strip leading `cd <path> &&` / `cd <path>;` prefixes — cd is read-only
34
- // navigation, the actual command that matters comes after.
35
- let cmd = String(command || '')
36
- .trim()
37
- .toLowerCase();
38
- if (!cmd)
39
- return false;
40
- cmd = cmd.replace(/^(\s*cd\s+[^;&|]+\s*(?:&&|;)\s*)+/i, '').trim();
41
- if (!cmd)
42
- return false;
43
- // Shell redirects are likely writes.
44
- if (/(^|\s)(?:>>?|<<?)\s*/.test(cmd))
45
- return false;
46
- // Obvious mutators.
47
- if (/\b(?:rm|mv|cp|touch|mkdir|rmdir|chmod|chown|truncate|dd)\b/.test(cmd))
48
- return false;
49
- if (/\b(?:sed|perl)\b[^\n]*\s-i\b/.test(cmd))
50
- return false;
51
- if (/\btee\b/.test(cmd))
52
- return false;
53
- // Git: allow common read-only subcommands, block mutating verbs.
54
- if (/\bgit\b/.test(cmd)) {
55
- if (/\bgit\b[^\n|;&]*\b(?:add|am|apply|bisect|checkout|switch|clean|clone|commit|fetch|merge|pull|push|rebase|reset|revert|stash)\b/.test(cmd)) {
56
- return false;
57
- }
58
- if (/\bgit\b[^\n|;&]*\b(?:log|show|status|diff|rev-parse|branch(?:\s+--list)?|tag(?:\s+--list)?|ls-files|grep)\b/.test(cmd)) {
59
- return true;
60
- }
61
- }
62
- if (/^\s*(?:grep|rg|ag|ack|find|ls|cat|head|tail|wc|stat)\b/.test(cmd))
63
- return true;
64
- if (/\|\s*(?:grep|rg|ag|ack)\b/.test(cmd))
65
- return true;
66
- // Additional read-only commands: file info, path lookup, system/user info
67
- if (/^\s*(?:file|which|type|uname|env|printenv|id|whoami|pwd)\b/.test(cmd))
68
- return true;
69
- // Git read-only subcommands that aren't covered above
70
- if (/\bgit\b[^\n|;&]*\b(?:blame|remote|config\s+--(?:get|list|global|local|system))\b/.test(cmd))
71
- return true;
72
- return false;
73
- }
74
- function execRcShouldSignalFailure(command) {
75
- const cmd = String(command || '').toLowerCase();
76
- if (!cmd)
77
- return false;
78
- // Common checks where non-zero usually means real failure.
79
- if (/\b(?:npm|pnpm|yarn)\s+(?:run\s+)?(?:test|build|lint|typecheck|check)\b/.test(cmd))
80
- return true;
81
- if (/\bnode\s+--test\b/.test(cmd))
82
- return true;
83
- if (/\b(?:pytest|go\s+test|cargo\s+test|ctest|mvn\s+test|gradle\s+test)\b/.test(cmd))
84
- return true;
85
- if (/\b(?:cargo\s+build|go\s+build|tsc\b)\b/.test(cmd))
86
- return true;
87
- // Grep/rg no-match rc=1 should not be treated as failure.
88
- if (/^\s*(?:rg|grep|ag|ack)\b/.test(cmd))
89
- return false;
90
- return false;
91
- }
92
- function withCachedExecObservationHint(content) {
93
- if (!content)
94
- return content;
95
- try {
96
- const parsed = JSON.parse(content);
97
- const out = typeof parsed?.out === 'string' ? parsed.out : '';
98
- if (out.includes(CACHED_EXEC_OBSERVATION_HINT))
99
- return content;
100
- parsed.out = out ? `${out}\n${CACHED_EXEC_OBSERVATION_HINT}` : CACHED_EXEC_OBSERVATION_HINT;
101
- parsed.cached_observation = true;
102
- return JSON.stringify(parsed);
103
- }
104
- catch {
105
- if (content.includes(CACHED_EXEC_OBSERVATION_HINT))
106
- return content;
107
- return `${content}\n${CACHED_EXEC_OBSERVATION_HINT}`;
108
- }
109
- }
110
- const REPLAYED_EXEC_HINT = '[idlehands hint] You already ran this exact command. This is the replayed result from your previous execution. Do NOT re-run it — use the output below to continue your task.';
111
- function withReplayedExecHint(content) {
112
- if (!content)
113
- return content;
114
- try {
115
- const parsed = JSON.parse(content);
116
- const out = typeof parsed?.out === 'string' ? parsed.out : '';
117
- if (out.includes(REPLAYED_EXEC_HINT))
118
- return content;
119
- parsed.out = out ? `${REPLAYED_EXEC_HINT}\n${out}` : REPLAYED_EXEC_HINT;
120
- parsed.replayed = true;
121
- return JSON.stringify(parsed);
122
- }
123
- catch {
124
- if (content.includes(REPLAYED_EXEC_HINT))
125
- return content;
126
- return `${REPLAYED_EXEC_HINT}\n${content}`;
127
- }
128
- }
129
- function readOnlyExecCacheable(content) {
130
- try {
131
- const parsed = JSON.parse(content);
132
- const rc = Number(parsed?.rc ?? NaN);
133
- return Number.isFinite(rc) && rc === 0;
134
- }
135
- catch {
136
- return false;
137
- }
138
- }
139
- function ensureInformativeAssistantText(text, ctx) {
140
- if (String(text ?? '').trim())
141
- return text;
142
- if (ctx.toolCalls > 0) {
143
- return 'I completed the requested tool work, but I have no user-visible response text yet. Ask me to summarize what was done.';
144
- }
145
- return `I have no user-visible response text for this turn (turn=${ctx.turns}). Please try again or rephrase your request.`;
146
- }
147
- function isContextWindowExceededError(err) {
148
- const status = Number(err?.status ?? NaN);
149
- const msg = String(err?.message ?? err ?? '');
150
- if (status === 413)
151
- return true;
152
- if (!msg)
153
- return false;
154
- return /(exceeds?\s+the\s+available\s+context\s+size|exceed_context|context\s+size|context\s+window|maximum\s+context\s+length|too\s+many\s+tokens|request\s*\(\d+\s*tokens\))/i.test(msg);
155
- }
156
- /** Errors that should break the outer agent loop, not be caught by per-tool handlers */
157
- class AgentLoopBreak extends Error {
158
- constructor(message) {
159
- super(message);
160
- this.name = 'AgentLoopBreak';
161
- }
162
- }
163
36
  const SYSTEM_PROMPT = `You are a coding agent with filesystem and shell access. Execute the user's request using the provided tools.
164
37
 
165
38
  Rules:
@@ -186,429 +59,6 @@ Rules:
186
59
  Tool call format:
187
60
  - Use tool_calls. Do not write JSON tool invocations in your message text.
188
61
  `;
189
- const MCP_TOOLS_REQUEST_TOKEN = '[[MCP_TOOLS_REQUEST]]';
190
- const DEFAULT_SUB_AGENT_SYSTEM_PROMPT = `You are a focused coding sub-agent. Execute only the delegated task.
191
- - Work in the current directory. Use relative paths for all file operations.
192
- - Read the target file before editing. You need the exact text for search/replace.
193
- - Keep tool usage tight and efficient.
194
- - Prefer surgical edits over rewrites.
195
- - Do NOT create files outside the working directory unless explicitly requested.
196
- - When running commands in a subdirectory, use exec's cwd parameter — NOT "cd /path && cmd".
197
- - Run verification commands when relevant.
198
- - Return a concise outcome summary.`;
199
- const DEFAULT_SUB_AGENT_RESULT_TOKEN_CAP = 4000;
200
- const LSP_TOOL_NAMES = [
201
- 'lsp_diagnostics',
202
- 'lsp_symbols',
203
- 'lsp_hover',
204
- 'lsp_definition',
205
- 'lsp_references',
206
- ];
207
- const LSP_TOOL_NAME_SET = new Set(LSP_TOOL_NAMES);
208
- const FILE_MUTATION_TOOL_SET = new Set([
209
- 'edit_file',
210
- 'edit_range',
211
- 'apply_patch',
212
- 'write_file',
213
- 'insert_file',
214
- ]);
215
- /** Approval mode permissiveness ranking (lower = more restrictive). */
216
- const APPROVAL_MODE_RANK = {
217
- plan: 0,
218
- reject: 1,
219
- default: 2,
220
- 'auto-edit': 3,
221
- yolo: 4,
222
- };
223
- /**
224
- * Cap a sub-agent's approval mode at the parent's level.
225
- * Sub-agents cannot escalate beyond the parent's approval mode.
226
- */
227
- function capApprovalMode(requested, parentMode) {
228
- return APPROVAL_MODE_RANK[requested] <= APPROVAL_MODE_RANK[parentMode] ? requested : parentMode;
229
- }
230
- async function buildSubAgentContextBlock(cwd, rawFiles) {
231
- const values = Array.isArray(rawFiles) ? rawFiles : [];
232
- const files = values
233
- .map((v) => (typeof v === 'string' ? v.trim() : ''))
234
- .filter(Boolean)
235
- .slice(0, 12);
236
- if (!files.length)
237
- return { block: '', included: [], skipped: [] };
238
- const MAX_TOTAL_CHARS = 24_000;
239
- const MAX_PER_FILE_CHARS = 4_000;
240
- let total = 0;
241
- const parts = [];
242
- const included = [];
243
- const skipped = [];
244
- for (const rel of files) {
245
- const abs = path.resolve(cwd, rel);
246
- const relFromCwd = path.relative(cwd, abs);
247
- if (relFromCwd.startsWith('..') || path.isAbsolute(relFromCwd)) {
248
- skipped.push(`${rel} (outside cwd)`);
249
- continue;
250
- }
251
- let stat;
252
- try {
253
- stat = await fs.stat(abs);
254
- }
255
- catch {
256
- skipped.push(`${rel} (missing)`);
257
- continue;
258
- }
259
- if (!stat?.isFile()) {
260
- skipped.push(`${rel} (not a file)`);
261
- continue;
262
- }
263
- const buf = await fs.readFile(abs).catch(() => null);
264
- if (!buf) {
265
- skipped.push(`${rel} (unreadable)`);
266
- continue;
267
- }
268
- if (isLikelyBinaryBuffer(buf)) {
269
- skipped.push(`${rel} (binary)`);
270
- continue;
271
- }
272
- const raw = buf.toString('utf8');
273
- const body = raw.length > MAX_PER_FILE_CHARS
274
- ? `${raw.slice(0, MAX_PER_FILE_CHARS)}\n[truncated: ${raw.length} chars total]`
275
- : raw;
276
- const section = `[file:${rel}]\n${body}\n[/file:${rel}]`;
277
- if (total + section.length > MAX_TOTAL_CHARS) {
278
- skipped.push(`${rel} (context budget reached)`);
279
- continue;
280
- }
281
- parts.push(section);
282
- included.push(rel);
283
- total += section.length;
284
- }
285
- return { block: parts.join('\n\n'), included, skipped };
286
- }
287
- function extractLensBody(projection) {
288
- const lines = String(projection ?? '').split(/\r?\n/);
289
- if (!lines.length)
290
- return '';
291
- let start = 0;
292
- if (lines[0].startsWith('# '))
293
- start = 1;
294
- if (lines[start]?.startsWith('# lens:'))
295
- start += 1;
296
- return lines
297
- .slice(start)
298
- .filter((line) => line.trim().length > 0)
299
- .slice(0, 40)
300
- .join('\n');
301
- }
302
- function buildToolsSchema(opts) {
303
- const obj = (properties, required = []) => ({
304
- type: 'object',
305
- additionalProperties: false,
306
- properties,
307
- required,
308
- });
309
- const str = () => ({ type: 'string' });
310
- const bool = () => ({ type: 'boolean' });
311
- const int = (min, max) => ({
312
- type: 'integer',
313
- ...(min !== undefined && { minimum: min }),
314
- ...(max !== undefined && { maximum: max }),
315
- });
316
- const schemas = [
317
- // ────────────────────────────────────────────────────────────────────────────
318
- // Token-safe reads (require limit; allow plain output without per-line numbers)
319
- // ────────────────────────────────────────────────────────────────────────────
320
- {
321
- type: 'function',
322
- function: {
323
- name: 'read_file',
324
- description: 'Read a bounded slice of a file. Never repeat an identical call consecutively; reuse the prior result.',
325
- parameters: obj({
326
- path: str(),
327
- offset: int(1, 1_000_000),
328
- limit: int(1, 240),
329
- search: str(),
330
- context: int(0, 80),
331
- format: { type: 'string', enum: ['plain', 'numbered', 'sparse'] },
332
- max_bytes: int(256, 20_000),
333
- }, ['path', 'limit']),
334
- },
335
- },
336
- {
337
- type: 'function',
338
- function: {
339
- name: 'read_files',
340
- description: 'Batch read bounded file slices. Never repeat an identical call consecutively; reuse the prior result.',
341
- parameters: obj({
342
- requests: {
343
- type: 'array',
344
- items: obj({
345
- path: str(),
346
- offset: int(1, 1_000_000),
347
- limit: int(1, 240),
348
- search: str(),
349
- context: int(0, 80),
350
- format: { type: 'string', enum: ['plain', 'numbered', 'sparse'] },
351
- max_bytes: int(256, 20_000),
352
- }, ['path', 'limit']),
353
- },
354
- }, ['requests']),
355
- },
356
- },
357
- // ────────────────────────────────────────────────────────────────────────────
358
- // Writes/edits
359
- // ────────────────────────────────────────────────────────────────────────────
360
- {
361
- type: 'function',
362
- function: {
363
- name: 'write_file',
364
- description: 'Write file (atomic, backup). Existing non-empty files require overwrite=true (or force=true).',
365
- parameters: obj({ path: str(), content: str(), overwrite: bool(), force: bool() }, [
366
- 'path',
367
- 'content',
368
- ]),
369
- },
370
- },
371
- {
372
- type: 'function',
373
- function: {
374
- name: 'apply_patch',
375
- description: 'Apply unified diff patch (multi-file).\n\nUSAGE EXAMPLE:\n apply_patch({\n patch: "--- a/src/file.ts\\n+++ b/src/file.ts\\n@@ -1,5 +1,5 @@\\n-old text\\n+new text\\n",\n files: ["src/file.ts"]\n })\n\nThe patch must be valid unified diff text. Tool-call arguments must be valid JSON. Use strip=1 if paths include directory prefixes.\nFiles listed must match the paths in the diff.',
376
- parameters: obj({
377
- patch: str(),
378
- files: { type: 'array', items: str() },
379
- strip: int(0, 5),
380
- }, ['patch', 'files']),
381
- },
382
- },
383
- {
384
- type: 'function',
385
- function: {
386
- name: 'edit_range',
387
- description: 'Replace a line range in a file.\n\nUSAGE EXAMPLE:\n edit_range({\n path: "src/file.ts",\n start_line: 10,\n end_line: 15,\n replacement: "new content\\nmore content"\n })\n\n- start_line and end_line are 1-indexed (first line is 1, not 0)\n- To delete lines, set replacement to empty string ""\n- To insert at a position, set start_line and end_line to the same value\n- Tool-call arguments must be valid JSON (double quotes, no trailing commas/comments)\n- The replacement text replaces the entire range inclusive',
388
- parameters: obj({
389
- path: str(),
390
- start_line: int(1),
391
- end_line: int(1),
392
- replacement: str(),
393
- }, ['path', 'start_line', 'end_line', 'replacement']),
394
- },
395
- },
396
- {
397
- type: 'function',
398
- function: {
399
- name: 'edit_file',
400
- description: 'Legacy exact replace (requires old_text). Prefer apply_patch/edit_range.',
401
- parameters: obj({ path: str(), old_text: str(), new_text: str(), replace_all: bool() }, [
402
- 'path',
403
- 'old_text',
404
- 'new_text',
405
- ]),
406
- },
407
- },
408
- {
409
- type: 'function',
410
- function: {
411
- name: 'insert_file',
412
- description: 'Insert text at line (0=prepend, -1=append).',
413
- parameters: obj({ path: str(), line: int(), text: str() }, ['path', 'line', 'text']),
414
- },
415
- },
416
- // ────────────────────────────────────────────────────────────────────────────
417
- // Bounded listings/search (expose existing caps)
418
- // ────────────────────────────────────────────────────────────────────────────
419
- {
420
- type: 'function',
421
- function: {
422
- name: 'list_dir',
423
- description: 'List directory entries. Never repeat an identical call consecutively for the same path/options; reuse the prior result.',
424
- parameters: obj({ path: str(), recursive: bool(), max_entries: int(1, 500) }, ['path']),
425
- },
426
- },
427
- {
428
- type: 'function',
429
- function: {
430
- name: 'search_files',
431
- description: 'Search regex in files.',
432
- parameters: obj({ pattern: str(), path: str(), include: str(), max_results: int(1, 100) }, [
433
- 'pattern',
434
- 'path',
435
- ]),
436
- },
437
- },
438
- // ────────────────────────────────────────────────────────────────────────────
439
- // Exec (minified schema)
440
- // ────────────────────────────────────────────────────────────────────────────
441
- {
442
- type: 'function',
443
- function: {
444
- name: 'exec',
445
- description: 'Run bash -c; returns JSON rc/out/err.',
446
- parameters: obj({ command: str(), cwd: str(), timeout: int(1, 120) }, ['command']),
447
- },
448
- },
449
- ];
450
- if (opts?.allowSpawnTask !== false) {
451
- schemas.push({
452
- type: 'function',
453
- function: {
454
- name: 'spawn_task',
455
- description: 'Run a sub-agent task (no parent history).',
456
- parameters: obj({
457
- task: str(),
458
- context_files: { type: 'array', items: str() },
459
- model: str(),
460
- endpoint: str(),
461
- max_iterations: int(),
462
- max_tokens: int(),
463
- timeout_sec: int(),
464
- system_prompt: str(),
465
- approval_mode: {
466
- type: 'string',
467
- enum: ['plan', 'reject', 'default', 'auto-edit', 'yolo'],
468
- },
469
- }, ['task']),
470
- },
471
- });
472
- }
473
- if (opts?.activeVaultTools) {
474
- schemas.push({
475
- type: 'function',
476
- function: {
477
- name: 'vault_search',
478
- description: 'Search vault.',
479
- parameters: obj({ query: str(), limit: int() }, ['query']),
480
- },
481
- }, {
482
- type: 'function',
483
- function: {
484
- name: 'vault_note',
485
- description: 'Write vault note.',
486
- parameters: obj({ key: str(), value: str() }, ['key', 'value']),
487
- },
488
- });
489
- }
490
- else if (opts?.passiveVault) {
491
- // In passive mode, expose vault_search (read-only) so the model can recover
492
- // compacted context on demand, but don't expose vault_note (write).
493
- schemas.push({
494
- type: 'function',
495
- function: {
496
- name: 'vault_search',
497
- description: 'Search vault memory for earlier context that was compacted away. Use sparingly — only when you need to recall specific details from earlier in the conversation.',
498
- parameters: obj({ query: str(), limit: int() }, ['query']),
499
- },
500
- });
501
- }
502
- // Phase 9: sys_context tool is only available in sys mode.
503
- if (opts?.sysMode) {
504
- schemas.push(SYS_CONTEXT_SCHEMA);
505
- }
506
- if (opts?.lspTools) {
507
- schemas.push({
508
- type: 'function',
509
- function: {
510
- name: 'lsp_diagnostics',
511
- description: 'Get LSP diagnostics (errors/warnings) for file or project.',
512
- parameters: obj({ path: str(), severity: int() }, []),
513
- },
514
- }, {
515
- type: 'function',
516
- function: {
517
- name: 'lsp_symbols',
518
- description: 'List symbols (functions, classes, vars) in a file.',
519
- parameters: obj({ path: str() }, ['path']),
520
- },
521
- }, {
522
- type: 'function',
523
- function: {
524
- name: 'lsp_hover',
525
- description: 'Get type/docs for symbol at position.',
526
- parameters: obj({ path: str(), line: int(), character: int() }, [
527
- 'path',
528
- 'line',
529
- 'character',
530
- ]),
531
- },
532
- }, {
533
- type: 'function',
534
- function: {
535
- name: 'lsp_definition',
536
- description: 'Go to definition of symbol at position.',
537
- parameters: obj({ path: str(), line: int(), character: int() }, [
538
- 'path',
539
- 'line',
540
- 'character',
541
- ]),
542
- },
543
- }, {
544
- type: 'function',
545
- function: {
546
- name: 'lsp_references',
547
- description: 'Find all references to symbol at position.',
548
- parameters: obj({ path: str(), line: int(), character: int(), max_results: int() }, [
549
- 'path',
550
- 'line',
551
- 'character',
552
- ]),
553
- },
554
- });
555
- }
556
- if (opts?.mcpTools?.length) {
557
- schemas.push(...opts.mcpTools);
558
- }
559
- return schemas;
560
- }
561
- function isReadOnlyTool(name) {
562
- return (name === 'read_file' ||
563
- name === 'read_files' ||
564
- name === 'list_dir' ||
565
- name === 'search_files' ||
566
- name === 'vault_search' ||
567
- name === 'sys_context');
568
- }
569
- /** Human-readable summary of what a blocked tool call would do. */
570
- function planModeSummary(name, args) {
571
- switch (name) {
572
- case 'write_file':
573
- return `write ${args.path ?? 'unknown'} (${typeof args.content === 'string' ? args.content.split('\n').length : '?'} lines)`;
574
- case 'apply_patch':
575
- return `apply patch to ${Array.isArray(args.files) ? args.files.length : '?'} file(s)`;
576
- case 'edit_range':
577
- return `edit ${args.path ?? 'unknown'} lines ${args.start_line ?? '?'}-${args.end_line ?? '?'}`;
578
- case 'edit_file':
579
- return `edit ${args.path ?? 'unknown'} (replace ${typeof args.old_text === 'string' ? args.old_text.split('\n').length : '?'} lines)`;
580
- case 'insert_file':
581
- return `insert into ${args.path ?? 'unknown'} at line ${args.line ?? '?'}`;
582
- case 'exec':
583
- return `run: ${typeof args.command === 'string' ? args.command.slice(0, 80) : 'unknown'}`;
584
- case 'spawn_task':
585
- return `spawn sub-agent task: ${typeof args.task === 'string' ? args.task.slice(0, 80) : 'unknown'}`;
586
- case 'vault_note':
587
- return `vault note: ${args.key ?? 'unknown'}`;
588
- default:
589
- return `${name}(${Object.keys(args).join(', ')})`;
590
- }
591
- }
592
- function userContentToText(content) {
593
- if (typeof content === 'string')
594
- return content;
595
- return content
596
- .filter((p) => p.type === 'text')
597
- .map((p) => p.text)
598
- .join('\n')
599
- .trim();
600
- }
601
- function userDisallowsDelegation(content) {
602
- const text = userContentToText(content).toLowerCase();
603
- if (!text)
604
- return false;
605
- const mentionsDelegation = /\b(?:spawn[_\-\s]?task|sub[\-\s]?agents?|delegate|delegation)\b/.test(text);
606
- if (!mentionsDelegation)
607
- return false;
608
- const negationNearDelegation = /\b(?:do not|don't|dont|no|without|avoid|skip|never)\b[^\n.]{0,90}\b(?:spawn[_\-\s]?task|sub[\-\s]?agents?|delegate|delegation)\b/.test(text) ||
609
- /\b(?:spawn[_\-\s]?task|sub[\-\s]?agents?|delegate|delegation)\b[^\n.]{0,50}\b(?:do not|don't|dont|not allowed|forbidden|no)\b/.test(text);
610
- return negationNearDelegation;
611
- }
612
62
  export async function createSession(opts) {
613
63
  const cfg = opts.config;
614
64
  const projectDir = cfg.dir ?? process.cwd();
@@ -1313,7 +763,7 @@ export async function createSession(opts) {
1313
763
  else if (step.tool === 'spawn_task') {
1314
764
  content = await runSpawnTaskCore(step.args, { signal: inFlight?.signal });
1315
765
  }
1316
- else if (LSP_TOOL_NAME_SET.has(step.tool) && lspManager) {
766
+ else if (isLspTool(step.tool) && lspManager) {
1317
767
  content = await dispatchLspTool(step.tool, step.args);
1318
768
  }
1319
769
  else if (mcpManager?.hasTool(step.tool)) {
@@ -1968,10 +1418,39 @@ export async function createSession(opts) {
1968
1418
  const hookObj = typeof hooks === 'function' ? { onToken: hooks } : (hooks ?? {});
1969
1419
  let turns = 0;
1970
1420
  let toolCalls = 0;
1421
+ const tokenEstimateCache = new WeakMap();
1422
+ const estimateTokensCached = (msgs) => {
1423
+ const key = msgs;
1424
+ const cached = tokenEstimateCache.get(key);
1425
+ if (cached !== undefined)
1426
+ return cached;
1427
+ const v = estimateTokensFromMessages(msgs);
1428
+ tokenEstimateCache.set(key, v);
1429
+ return v;
1430
+ };
1431
+ const perfEnabled = process.env.IDLEHANDS_PERF_TRACE === '1';
1432
+ const perf = {
1433
+ modelMs: 0,
1434
+ ttftMsSum: 0,
1435
+ ttftSamples: 0,
1436
+ compactions: 0,
1437
+ compactMs: 0,
1438
+ };
1971
1439
  const askId = `ask-${timestampedId()}`;
1972
- const emitToolCall = async (call) => {
1973
- hookObj.onToolCall?.(call);
1974
- await hookManager.emit('tool_call', { askId, turn: turns, call });
1440
+ const hooksEnabled = hookManager.isEnabled();
1441
+ const hasOnToolCall = Boolean(hookObj.onToolCall);
1442
+ const hasOnToolResult = Boolean(hookObj.onToolResult);
1443
+ const hasOnToolLoop = Boolean(hookObj.onToolLoop);
1444
+ const hasOnTurnEnd = Boolean(hookObj.onTurnEnd);
1445
+ const emitToolCall = async (id, name, args) => {
1446
+ if (!hasOnToolCall && !hooksEnabled)
1447
+ return;
1448
+ const call = { id, name, args };
1449
+ if (hasOnToolCall)
1450
+ hookObj.onToolCall?.(call);
1451
+ if (hooksEnabled) {
1452
+ await hookManager.emit('tool_call', { askId, turn: turns, call });
1453
+ }
1975
1454
  };
1976
1455
  const emitToolStream = (stream) => {
1977
1456
  try {
@@ -1980,29 +1459,46 @@ export async function createSession(opts) {
1980
1459
  catch {
1981
1460
  // best effort
1982
1461
  }
1983
- try {
1984
- void hookManager.emit('tool_stream', { askId, turn: turns, stream });
1985
- }
1986
- catch {
1987
- // best effort
1462
+ if (hooksEnabled) {
1463
+ try {
1464
+ void hookManager.emit('tool_stream', { askId, turn: turns, stream });
1465
+ }
1466
+ catch {
1467
+ // best effort
1468
+ }
1988
1469
  }
1989
1470
  };
1990
1471
  const isReadOnlyToolDynamic = (toolName) => {
1991
1472
  return (isReadOnlyTool(toolName) ||
1992
- LSP_TOOL_NAME_SET.has(toolName) ||
1473
+ isLspTool(toolName) ||
1993
1474
  Boolean(mcpManager?.isToolReadOnly(toolName)));
1994
1475
  };
1995
1476
  const emitToolResult = async (result) => {
1996
- await hookObj.onToolResult?.(result);
1997
- await hookManager.emit('tool_result', { askId, turn: turns, result });
1477
+ if (!hasOnToolResult && !hooksEnabled)
1478
+ return;
1479
+ if (hasOnToolResult)
1480
+ await hookObj.onToolResult?.(result);
1481
+ if (hooksEnabled) {
1482
+ await hookManager.emit('tool_result', { askId, turn: turns, result });
1483
+ }
1998
1484
  };
1999
1485
  const emitToolLoop = async (loop) => {
2000
- await hookObj.onToolLoop?.(loop);
2001
- await hookManager.emit('tool_loop', { askId, turn: turns, loop });
1486
+ if (!hasOnToolLoop && !hooksEnabled)
1487
+ return;
1488
+ if (hasOnToolLoop)
1489
+ await hookObj.onToolLoop?.(loop);
1490
+ if (hooksEnabled) {
1491
+ await hookManager.emit('tool_loop', { askId, turn: turns, loop });
1492
+ }
2002
1493
  };
2003
1494
  const emitTurnEnd = async (stats) => {
2004
- await hookObj.onTurnEnd?.(stats);
2005
- await hookManager.emit('turn_end', { askId, stats });
1495
+ if (!hasOnTurnEnd && !hooksEnabled)
1496
+ return;
1497
+ if (hasOnTurnEnd)
1498
+ await hookObj.onTurnEnd?.(stats);
1499
+ if (hooksEnabled) {
1500
+ await hookManager.emit('turn_end', { askId, stats });
1501
+ }
2006
1502
  };
2007
1503
  const finalizeAsk = async (text) => {
2008
1504
  const finalText = ensureInformativeAssistantText(text, { toolCalls, turns });
@@ -2034,13 +1530,20 @@ export async function createSession(opts) {
2034
1530
  // best-effort — never block ask completion for summary persistence
2035
1531
  }
2036
1532
  }
2037
- await hookManager.emit('ask_end', { askId, text: finalText, turns, toolCalls });
1533
+ if (hooksEnabled)
1534
+ await hookManager.emit('ask_end', { askId, text: finalText, turns, toolCalls });
1535
+ if (perfEnabled) {
1536
+ const wallMs = Date.now() - wallStart;
1537
+ const avgTtft = perf.ttftSamples > 0 ? Math.round(perf.ttftMsSum / perf.ttftSamples) : 0;
1538
+ console.error(`[perf] ask=${askId} turns=${turns} toolCalls=${toolCalls} wallMs=${wallMs} modelMs=${perf.modelMs} compactMs=${perf.compactMs} avgTTFTms=${avgTtft} compactions=${perf.compactions}`);
1539
+ }
2038
1540
  return { text: finalText, turns, toolCalls };
2039
1541
  };
2040
1542
  const rawInstructionText = userContentToText(instruction).trim();
2041
1543
  lastAskInstructionText = rawInstructionText;
2042
1544
  lastCompactionReminderObjective = '';
2043
- await hookManager.emit('ask_start', { askId, instruction: rawInstructionText });
1545
+ if (hooksEnabled)
1546
+ await hookManager.emit('ask_start', { askId, instruction: rawInstructionText });
2044
1547
  const reviewKeys = reviewArtifactKeys(projectDir);
2045
1548
  const retrievalRequested = looksLikeReviewRetrievalRequest(rawInstructionText);
2046
1549
  const shouldPersistReviewArtifact = looksLikeCodeReviewRequest(rawInstructionText) && !retrievalRequested;
@@ -2313,15 +1816,17 @@ export async function createSession(opts) {
2313
1816
  if (inFlight?.signal?.aborted)
2314
1817
  break;
2315
1818
  turns++;
2316
- await hookManager.emit('turn_start', { askId, turn: turns });
1819
+ if (hooksEnabled)
1820
+ await hookManager.emit('turn_start', { askId, turn: turns });
2317
1821
  const wallElapsed = (Date.now() - wallStart) / 1000;
2318
1822
  if (wallElapsed > cfg.timeout) {
2319
1823
  throw new Error(`session timeout exceeded (${cfg.timeout}s) after ${wallElapsed.toFixed(1)}s`);
2320
1824
  }
2321
1825
  await maybeAutoDetectModelChange();
1826
+ const compactionStartMs = Date.now();
2322
1827
  await runCompactionWithLock('auto context-budget compaction', async () => {
2323
1828
  const beforeMsgs = messages;
2324
- const beforeTokens = estimateTokensFromMessages(beforeMsgs);
1829
+ const beforeTokens = estimateTokensCached(beforeMsgs);
2325
1830
  const compacted = enforceContextBudget({
2326
1831
  messages: beforeMsgs,
2327
1832
  contextWindow,
@@ -2330,8 +1835,15 @@ export async function createSession(opts) {
2330
1835
  compactAt: cfg.compact_at ?? 0.8,
2331
1836
  toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
2332
1837
  });
2333
- const compactedByRefs = new Set(compacted);
2334
- const dropped = beforeMsgs.filter((m) => !compactedByRefs.has(m));
1838
+ let dropped;
1839
+ if (compacted.length === beforeMsgs.length) {
1840
+ // Fast path: no drops expected when lengths match.
1841
+ dropped = [];
1842
+ }
1843
+ else {
1844
+ const compactedByRefs = new Set(compacted);
1845
+ dropped = beforeMsgs.filter((m) => !compactedByRefs.has(m));
1846
+ }
2335
1847
  if (dropped.length && vault) {
2336
1848
  try {
2337
1849
  // Store the original/current user prompt before compaction so it survives context loss.
@@ -2366,7 +1878,10 @@ export async function createSession(opts) {
2366
1878
  messages = compacted;
2367
1879
  let summaryUsed = false;
2368
1880
  if (dropped.length) {
2369
- const droppedTokens = estimateTokensFromMessages(dropped);
1881
+ let droppedTokens = 0;
1882
+ if (cfg.compact_summary !== false) {
1883
+ droppedTokens = estimateTokensCached(dropped);
1884
+ }
2370
1885
  if (cfg.compact_summary !== false && droppedTokens > 200) {
2371
1886
  try {
2372
1887
  const summaryContent = buildCompactionSummaryPrompt(dropped);
@@ -2414,11 +1929,12 @@ export async function createSession(opts) {
2414
1929
  }
2415
1930
  }
2416
1931
  // Update token count AFTER injections so downstream reads are accurate
2417
- currentContextTokens = estimateTokensFromMessages(messages);
2418
- const afterTokens = estimateTokensFromMessages(compacted);
1932
+ currentContextTokens = estimateTokensCached(messages);
1933
+ const afterTokens = estimateTokensCached(compacted);
2419
1934
  const freedTokens = Math.max(0, beforeTokens - afterTokens);
2420
1935
  // Emit compaction event for callers (e.g. Anton controller → Discord)
2421
1936
  if (dropped.length) {
1937
+ perf.compactions++;
2422
1938
  try {
2423
1939
  await hookObj.onCompaction?.({
2424
1940
  droppedMessages: dropped.length,
@@ -2440,6 +1956,7 @@ export async function createSession(opts) {
2440
1956
  dryRun: false,
2441
1957
  };
2442
1958
  });
1959
+ perf.compactMs += Date.now() - compactionStartMs;
2443
1960
  const ac = makeAbortController();
2444
1961
  inFlight = ac;
2445
1962
  // If caller provided an AbortSignal (bench iteration timeout, etc), propagate it.
@@ -2511,6 +2028,11 @@ export async function createSession(opts) {
2511
2028
  inFlight = null;
2512
2029
  }
2513
2030
  const ttcMs = Date.now() - turnStartMs;
2031
+ perf.modelMs += ttcMs;
2032
+ if (ttftMs !== undefined) {
2033
+ perf.ttftMsSum += ttftMs;
2034
+ perf.ttftSamples++;
2035
+ }
2514
2036
  const promptTokensTurn = resp.usage?.prompt_tokens ?? 0;
2515
2037
  const completionTokensTurn = resp.usage?.completion_tokens ?? 0;
2516
2038
  // Track server-reported usage when available
@@ -2743,7 +2265,7 @@ export async function createSession(opts) {
2743
2265
  },
2744
2266
  });
2745
2267
  // Tool-call argument parsing and validation logic
2746
- const fileMutationsInTurn = toolCallsArr.filter((tc) => FILE_MUTATION_TOOL_SET.has(tc.function?.name)).length;
2268
+ const fileMutationsInTurn = toolCallsArr.filter((tc) => isMutationTool(tc.function?.name)).length;
2747
2269
  if (fileMutationsInTurn >= 3 && isGitDirty(ctx.cwd)) {
2748
2270
  const shouldStash = confirmBridge
2749
2271
  ? await confirmBridge(`Working tree is dirty and the agent plans ${fileMutationsInTurn} file edits. Stash current changes first? [Y/n]`, { tool: 'git_stash', args: { fileMutationsInTurn } })
@@ -2948,18 +2470,40 @@ export async function createSession(opts) {
2948
2470
  }
2949
2471
  continue;
2950
2472
  }
2951
- // Default behavior for mutating/other tools: break on repeated identical signature.
2473
+ // Improved handling of mutating tool loops - gradual recovery instead of immediate error
2474
+ const sigCount = sigCounts.get(sig) ?? 0;
2952
2475
  const loopThreshold = harness.quirks.loopsOnToolError ? 2 : 3;
2953
- if ((sigCounts.get(sig) ?? 0) >= loopThreshold) {
2476
+ if (sigCount >= loopThreshold) {
2954
2477
  const argsObj = sigMetaBySig.get(sig)?.args ?? {};
2955
2478
  const argsRaw = JSON.stringify(argsObj);
2956
2479
  const argsPreview = argsRaw.length > 220 ? argsRaw.slice(0, 220) + '…' : argsRaw;
2957
- throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
2958
- `args=${argsPreview}\n` +
2959
- `Hint: you repeated the same tool call ${loopThreshold} times with identical arguments. ` +
2960
- `If the call succeeded, move on to the next step. ` +
2961
- `If it failed, check that all required parameters are present and correct. ` +
2962
- `For write_file/edit_file/apply_patch/edit_range, ensure required args are present (content/old_text/new_text/patch/files/start_line/end_line/replacement).`);
2480
+ // At threshold: trigger toolless recovery instead of throwing error
2481
+ // This gives the model a chance to think and try a different approach
2482
+ console.error(`[tool-loop] critical: ${toolName} repeated ${sigCount}x with same args. Triggering recovery turn.`);
2483
+ shouldForceToollessRecovery = true;
2484
+ // Poison this specific tool signature to prevent re-execution
2485
+ poisonedToolSigs.add(sig);
2486
+ // Add helpful guidance
2487
+ messages.push({
2488
+ role: 'user',
2489
+ content: `[system] Tool loop detected: ${toolName} called ${sigCount}x with identical arguments.\n` +
2490
+ `args=${argsPreview}\n\n` +
2491
+ `The same edit is being attempted repeatedly. This usually means:\n` +
2492
+ `1. The edit already succeeded - verify by reading the file\n` +
2493
+ `2. The old_text doesn't match - read the file to see actual content\n` +
2494
+ `3. A different approach is needed\n\n` +
2495
+ `Do NOT repeat the same edit. Read the file first, then decide on next steps.`,
2496
+ });
2497
+ continue;
2498
+ }
2499
+ // At sigCount === loopThreshold - 1: inject early warning
2500
+ if (sigCount === loopThreshold - 1) {
2501
+ console.error(`[tool-loop] warning: ${toolName} repeated ${sigCount}x. Next repeat will trigger recovery.`);
2502
+ messages.push({
2503
+ role: 'user',
2504
+ content: `[system] Warning: ${toolName} has been called ${sigCount} times with identical arguments. ` +
2505
+ `If this edit keeps failing, read the target file to verify its current state before trying again.`,
2506
+ });
2963
2507
  }
2964
2508
  }
2965
2509
  // Update consecutive tracking: save this turn's signatures for next turn comparison.
@@ -3002,11 +2546,26 @@ export async function createSession(opts) {
3002
2546
  catch {
3003
2547
  // Respect harness retry limit for malformed JSON (§4i)
3004
2548
  malformedCount++;
2549
+ // Detect if the model is outputting diff/patch format instead of JSON
2550
+ const looksLikeDiff = /^[\s"]*---\s+a\/|^\+\+\+\s+b\/|^@@\s+-\d+/m.test(rawArgs);
2551
+ const looksLikePatch = /^diff\s+--git|^Index:|^\*\*\*\s+/m.test(rawArgs);
3005
2552
  if (malformedCount > harness.toolCalls.retryOnMalformed) {
3006
2553
  // Break the outer loop — this model won't self-correct
3007
- throw new AgentLoopBreak(`tool ${name}: malformed JSON exceeded retry limit (${harness.toolCalls.retryOnMalformed}): ${rawArgs.slice(0, 200)}`);
2554
+ const hint = looksLikeDiff || looksLikePatch
2555
+ ? ' The model is outputting diff/patch format instead of JSON. This may be a model compatibility issue.'
2556
+ : '';
2557
+ throw new AgentLoopBreak(`tool ${name}: malformed JSON exceeded retry limit (${harness.toolCalls.retryOnMalformed}): ${rawArgs.slice(0, 200)}${hint}`);
2558
+ }
2559
+ // Give specific guidance based on the error pattern
2560
+ let hint = 'Return a valid JSON object for function.arguments.';
2561
+ if (looksLikeDiff || looksLikePatch) {
2562
+ hint =
2563
+ 'ERROR: You output a diff/patch format instead of JSON. ' +
2564
+ 'Tool arguments must be a JSON object like {"path": "file.txt", "old_text": "...", "new_text": "..."}. ' +
2565
+ 'Do NOT use unified diff format (--- a/ +++ b/ @@). ' +
2566
+ 'Use the exact JSON schema required by the tool.';
3008
2567
  }
3009
- throw new ToolError('invalid_args', `tool ${name}: arguments not valid JSON`, false, 'Return a valid JSON object for function.arguments.', { raw: rawArgs.slice(0, 200) });
2568
+ throw new ToolError('invalid_args', `tool ${name}: arguments not valid JSON`, false, hint, { raw: rawArgs.slice(0, 200) });
3010
2569
  }
3011
2570
  if (args == null || typeof args !== 'object' || Array.isArray(args)) {
3012
2571
  throw new ValidationError([
@@ -3014,10 +2573,10 @@ export async function createSession(opts) {
3014
2573
  ]);
3015
2574
  }
3016
2575
  const builtInFn = tools[name];
3017
- const isLspTool = LSP_TOOL_NAME_SET.has(name);
2576
+ const lspToolCall = isLspTool(name);
3018
2577
  const isSpawnTask = name === 'spawn_task';
3019
2578
  const hasMcpTool = mcpManager?.hasTool(name) === true;
3020
- if (!builtInFn && !isLspTool && !hasMcpTool && !isSpawnTask)
2579
+ if (!builtInFn && !lspToolCall && !hasMcpTool && !isSpawnTask)
3021
2580
  throw new Error(`unknown tool: ${name}`);
3022
2581
  // Keep parsed args by call-id so we can digest/archive tool outputs with context.
3023
2582
  toolArgsByCallId.set(callId, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
@@ -3051,7 +2610,7 @@ export async function createSession(opts) {
3051
2610
  throw new Error(`exec: ${reason} — command: ${args.command}`);
3052
2611
  }
3053
2612
  }
3054
- if (FILE_MUTATION_TOOL_SET.has(name) && typeof args.path === 'string') {
2613
+ if (isMutationTool(name) && typeof args.path === 'string') {
3055
2614
  const absPath = args.path.startsWith('/')
3056
2615
  ? args.path
3057
2616
  : path.resolve(projectDir, args.path);
@@ -3075,7 +2634,7 @@ export async function createSession(opts) {
3075
2634
  const searchTerm = typeof args.search === 'string' ? args.search : '';
3076
2635
  // Fix 1: Hard cumulative budget — refuse reads past hard cap
3077
2636
  if (cumulativeReadOnlyCalls > READ_BUDGET_HARD) {
3078
- await emitToolCall({ id: callId, name, args });
2637
+ await emitToolCall(callId, name, args);
3079
2638
  await emitToolResult({
3080
2639
  id: callId,
3081
2640
  name,
@@ -3102,7 +2661,7 @@ export async function createSession(opts) {
3102
2661
  blockedDirs.add(parentDir);
3103
2662
  }
3104
2663
  if (blockedDirs.has(parentDir) && uniqueCount > 8) {
3105
- await emitToolCall({ id: callId, name, args });
2664
+ await emitToolCall(callId, name, args);
3106
2665
  await emitToolResult({
3107
2666
  id: callId,
3108
2667
  name,
@@ -3123,7 +2682,7 @@ export async function createSession(opts) {
3123
2682
  searchTermFiles.set(key, new Set());
3124
2683
  searchTermFiles.get(key).add(filePath);
3125
2684
  if (searchTermFiles.get(key).size >= 3) {
3126
- await emitToolCall({ id: callId, name, args });
2685
+ await emitToolCall(callId, name, args);
3127
2686
  await emitToolResult({
3128
2687
  id: callId,
3129
2688
  name,
@@ -3159,7 +2718,7 @@ export async function createSession(opts) {
3159
2718
  reason: `plan mode: ${summary}`,
3160
2719
  });
3161
2720
  // Hook: onToolCall + onToolResult for plan-blocked actions
3162
- await emitToolCall({ id: callId, name, args });
2721
+ await emitToolCall(callId, name, args);
3163
2722
  await emitToolResult({
3164
2723
  id: callId,
3165
2724
  name,
@@ -3170,7 +2729,7 @@ export async function createSession(opts) {
3170
2729
  return { id: callId, content: blockedMsg };
3171
2730
  }
3172
2731
  // Hook: onToolCall (Phase 8.5)
3173
- await emitToolCall({ id: callId, name, args });
2732
+ await emitToolCall(callId, name, args);
3174
2733
  if (cfg.step_mode) {
3175
2734
  const stepPrompt = `Step mode: execute ${name}(${JSON.stringify(args).slice(0, 200)}) ? [Y/n]`;
3176
2735
  const ok = confirmBridge
@@ -3259,7 +2818,7 @@ export async function createSession(opts) {
3259
2818
  }
3260
2819
  }
3261
2820
  }
3262
- else if (isLspTool && lspManager) {
2821
+ else if (isLspTool(name) && lspManager) {
3263
2822
  // LSP tool dispatch
3264
2823
  content = await dispatchLspTool(name, args);
3265
2824
  }
@@ -3325,7 +2884,7 @@ export async function createSession(opts) {
3325
2884
  if (lines.length > 0)
3326
2885
  resultEvent.searchMatches = lines.slice(0, 20);
3327
2886
  }
3328
- else if (FILE_MUTATION_TOOL_SET.has(name) && replay) {
2887
+ else if (isMutationTool(name) && replay) {
3329
2888
  // Grab the most recent checkpoint for a diff preview
3330
2889
  try {
3331
2890
  const cps = await replay.list(1);
@@ -3348,7 +2907,7 @@ export async function createSession(opts) {
3348
2907
  await emitToolResult(resultEvent);
3349
2908
  // Proactive LSP diagnostics after file mutations
3350
2909
  if (lspManager?.hasServers() && lspCfg?.proactive_diagnostics !== false) {
3351
- if (FILE_MUTATION_TOOL_SET.has(name)) {
2910
+ if (isMutationTool(name)) {
3352
2911
  const mutatedPath = typeof args.path === 'string' ? args.path : '';
3353
2912
  if (mutatedPath) {
3354
2913
  try {
@@ -3380,7 +2939,7 @@ export async function createSession(opts) {
3380
2939
  // ── Per-file mutation spiral detection ──
3381
2940
  // Track edits to the same file. If the model keeps editing the same file
3382
2941
  // over and over, it's likely in an edit→break→read→edit corruption spiral.
3383
- if (FILE_MUTATION_TOOL_SET.has(name) && toolSuccess && typeof args.path === 'string') {
2942
+ if (isMutationTool(name) && toolSuccess && typeof args.path === 'string') {
3384
2943
  const absPath = args.path.startsWith('/')
3385
2944
  ? args.path
3386
2945
  : path.resolve(projectDir, args.path);
@@ -3431,7 +2990,13 @@ export async function createSession(opts) {
3431
2990
  }
3432
2991
  }
3433
2992
  }
3434
- return { id: callId, content };
2993
+ // Context-aware truncation: cap oversized tool results before returning
2994
+ // to prevent blowing out the context window on subsequent LLM calls.
2995
+ const truncated = truncateToolResultContent(content, contextWindow);
2996
+ if (truncated.truncated && cfg.verbose) {
2997
+ console.warn(`[context-budget] truncated ${name} result: ${content.length} → ${truncated.content.length} chars`);
2998
+ }
2999
+ return { id: callId, content: truncated.content };
3435
3000
  };
3436
3001
  const results = [];
3437
3002
  let invalidArgsThisTurn = false;
@@ -3550,7 +3115,7 @@ export async function createSession(opts) {
3550
3115
  }
3551
3116
  catch (e) {
3552
3117
  results.push(await catchToolError(e, tc));
3553
- if (FILE_MUTATION_TOOL_SET.has(tc.function.name)) {
3118
+ if (isMutationTool(tc.function.name)) {
3554
3119
  // Fail-fast: after mutating tool failure, stop the remaining batch.
3555
3120
  break;
3556
3121
  }
@@ -3568,7 +3133,7 @@ export async function createSession(opts) {
3568
3133
  }
3569
3134
  catch (e) {
3570
3135
  results.push(await catchToolError(e, tc));
3571
- if (FILE_MUTATION_TOOL_SET.has(tc.function.name)) {
3136
+ if (isMutationTool(tc.function.name)) {
3572
3137
  // Fail-fast: after mutating tool failure, stop the remaining batch.
3573
3138
  break;
3574
3139
  }
@@ -3797,12 +3362,13 @@ export async function createSession(opts) {
3797
3362
  })();
3798
3363
  const err = new Error(`BUG: threw undefined in agent.ask() (turn=${turns}). lastMsg=${lastMsg?.role ?? 'unknown'}:${lastMsgPreview}`);
3799
3364
  await persistFailure(err, `ask turn ${turns}`);
3800
- await hookManager.emit('ask_error', {
3801
- askId,
3802
- error: err.message,
3803
- turns,
3804
- toolCalls,
3805
- });
3365
+ if (hooksEnabled)
3366
+ await hookManager.emit('ask_error', {
3367
+ askId,
3368
+ error: err.message,
3369
+ turns,
3370
+ toolCalls,
3371
+ });
3806
3372
  throw err;
3807
3373
  }
3808
3374
  await persistFailure(e, `ask turn ${turns}`);
@@ -3813,12 +3379,13 @@ export async function createSession(opts) {
3813
3379
  // Never rethrow undefined; normalize to Error for debuggability.
3814
3380
  if (e === undefined) {
3815
3381
  const normalized = new Error('BUG: threw undefined (normalized at ask() boundary)');
3816
- await hookManager.emit('ask_error', {
3817
- askId,
3818
- error: normalized.message,
3819
- turns,
3820
- toolCalls,
3821
- });
3382
+ if (hooksEnabled)
3383
+ await hookManager.emit('ask_error', {
3384
+ askId,
3385
+ error: normalized.message,
3386
+ turns,
3387
+ toolCalls,
3388
+ });
3822
3389
  throw normalized;
3823
3390
  }
3824
3391
  await hookManager.emit('ask_error', {
@@ -3918,21 +3485,4 @@ export async function runAgent(opts) {
3918
3485
  });
3919
3486
  return session.ask(opts.instruction, opts.onToken);
3920
3487
  }
3921
- async function autoPickModel(client, cached) {
3922
- const ac = makeAbortController();
3923
- const timer = setTimeout(() => ac.abort(), 3000);
3924
- try {
3925
- const models = cached ?? normalizeModelsResponse(await client.models(ac.signal));
3926
- const q = models.data.find((m) => /qwen/i.test(m.id));
3927
- if (q)
3928
- return q.id;
3929
- const first = models.data[0]?.id;
3930
- if (!first)
3931
- throw new Error('No models found on server. Check your endpoint and that a model is loaded.');
3932
- return first;
3933
- }
3934
- finally {
3935
- clearTimeout(timer);
3936
- }
3937
- }
3938
3488
  //# sourceMappingURL=agent.js.map