@visorcraft/idlehands 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +30 -0
  3. package/dist/agent.js +2604 -0
  4. package/dist/agent.js.map +1 -0
  5. package/dist/anton/controller.js +341 -0
  6. package/dist/anton/controller.js.map +1 -0
  7. package/dist/anton/lock.js +110 -0
  8. package/dist/anton/lock.js.map +1 -0
  9. package/dist/anton/parser.js +303 -0
  10. package/dist/anton/parser.js.map +1 -0
  11. package/dist/anton/prompt.js +203 -0
  12. package/dist/anton/prompt.js.map +1 -0
  13. package/dist/anton/reporter.js +119 -0
  14. package/dist/anton/reporter.js.map +1 -0
  15. package/dist/anton/session.js +51 -0
  16. package/dist/anton/session.js.map +1 -0
  17. package/dist/anton/types.js +7 -0
  18. package/dist/anton/types.js.map +1 -0
  19. package/dist/anton/verifier.js +263 -0
  20. package/dist/anton/verifier.js.map +1 -0
  21. package/dist/bench/compare.js +239 -0
  22. package/dist/bench/compare.js.map +1 -0
  23. package/dist/bench/debug_hooks.js +17 -0
  24. package/dist/bench/debug_hooks.js.map +1 -0
  25. package/dist/bench/json_extract.js +22 -0
  26. package/dist/bench/json_extract.js.map +1 -0
  27. package/dist/bench/openclaw.js +86 -0
  28. package/dist/bench/openclaw.js.map +1 -0
  29. package/dist/bench/report.js +116 -0
  30. package/dist/bench/report.js.map +1 -0
  31. package/dist/bench/runner.js +312 -0
  32. package/dist/bench/runner.js.map +1 -0
  33. package/dist/bench/types.js +2 -0
  34. package/dist/bench/types.js.map +1 -0
  35. package/dist/bot/commands.js +444 -0
  36. package/dist/bot/commands.js.map +1 -0
  37. package/dist/bot/confirm-discord.js +133 -0
  38. package/dist/bot/confirm-discord.js.map +1 -0
  39. package/dist/bot/confirm-telegram.js +290 -0
  40. package/dist/bot/confirm-telegram.js.map +1 -0
  41. package/dist/bot/discord.js +826 -0
  42. package/dist/bot/discord.js.map +1 -0
  43. package/dist/bot/format.js +210 -0
  44. package/dist/bot/format.js.map +1 -0
  45. package/dist/bot/session-manager.js +270 -0
  46. package/dist/bot/session-manager.js.map +1 -0
  47. package/dist/bot/telegram.js +678 -0
  48. package/dist/bot/telegram.js.map +1 -0
  49. package/dist/cli/agent-turn.js +45 -0
  50. package/dist/cli/agent-turn.js.map +1 -0
  51. package/dist/cli/args.js +236 -0
  52. package/dist/cli/args.js.map +1 -0
  53. package/dist/cli/bot.js +252 -0
  54. package/dist/cli/bot.js.map +1 -0
  55. package/dist/cli/build-repl-context.js +365 -0
  56. package/dist/cli/build-repl-context.js.map +1 -0
  57. package/dist/cli/command-registry.js +20 -0
  58. package/dist/cli/command-registry.js.map +1 -0
  59. package/dist/cli/commands/anton.js +271 -0
  60. package/dist/cli/commands/anton.js.map +1 -0
  61. package/dist/cli/commands/editing.js +328 -0
  62. package/dist/cli/commands/editing.js.map +1 -0
  63. package/dist/cli/commands/model.js +274 -0
  64. package/dist/cli/commands/model.js.map +1 -0
  65. package/dist/cli/commands/project.js +255 -0
  66. package/dist/cli/commands/project.js.map +1 -0
  67. package/dist/cli/commands/runtime.js +63 -0
  68. package/dist/cli/commands/runtime.js.map +1 -0
  69. package/dist/cli/commands/session.js +281 -0
  70. package/dist/cli/commands/session.js.map +1 -0
  71. package/dist/cli/commands/tools.js +126 -0
  72. package/dist/cli/commands/tools.js.map +1 -0
  73. package/dist/cli/commands/trifecta.js +221 -0
  74. package/dist/cli/commands/trifecta.js.map +1 -0
  75. package/dist/cli/commands/tui.js +17 -0
  76. package/dist/cli/commands/tui.js.map +1 -0
  77. package/dist/cli/init.js +222 -0
  78. package/dist/cli/init.js.map +1 -0
  79. package/dist/cli/input.js +360 -0
  80. package/dist/cli/input.js.map +1 -0
  81. package/dist/cli/oneshot.js +254 -0
  82. package/dist/cli/oneshot.js.map +1 -0
  83. package/dist/cli/repl-context.js +2 -0
  84. package/dist/cli/repl-context.js.map +1 -0
  85. package/dist/cli/runtime-cmds.js +811 -0
  86. package/dist/cli/runtime-cmds.js.map +1 -0
  87. package/dist/cli/service.js +145 -0
  88. package/dist/cli/service.js.map +1 -0
  89. package/dist/cli/session-state.js +130 -0
  90. package/dist/cli/session-state.js.map +1 -0
  91. package/dist/cli/setup.js +815 -0
  92. package/dist/cli/setup.js.map +1 -0
  93. package/dist/cli/shell.js +79 -0
  94. package/dist/cli/shell.js.map +1 -0
  95. package/dist/cli/status.js +392 -0
  96. package/dist/cli/status.js.map +1 -0
  97. package/dist/cli/watch.js +33 -0
  98. package/dist/cli/watch.js.map +1 -0
  99. package/dist/client.js +676 -0
  100. package/dist/client.js.map +1 -0
  101. package/dist/commands.js +194 -0
  102. package/dist/commands.js.map +1 -0
  103. package/dist/config.js +507 -0
  104. package/dist/config.js.map +1 -0
  105. package/dist/confirm/auto.js +13 -0
  106. package/dist/confirm/auto.js.map +1 -0
  107. package/dist/confirm/headless.js +41 -0
  108. package/dist/confirm/headless.js.map +1 -0
  109. package/dist/confirm/terminal.js +90 -0
  110. package/dist/confirm/terminal.js.map +1 -0
  111. package/dist/context.js +49 -0
  112. package/dist/context.js.map +1 -0
  113. package/dist/git.js +136 -0
  114. package/dist/git.js.map +1 -0
  115. package/dist/harnesses.js +171 -0
  116. package/dist/harnesses.js.map +1 -0
  117. package/dist/history.js +139 -0
  118. package/dist/history.js.map +1 -0
  119. package/dist/index.js +700 -0
  120. package/dist/index.js.map +1 -0
  121. package/dist/indexer.js +374 -0
  122. package/dist/indexer.js.map +1 -0
  123. package/dist/jsonrpc.js +76 -0
  124. package/dist/jsonrpc.js.map +1 -0
  125. package/dist/lens.js +525 -0
  126. package/dist/lens.js.map +1 -0
  127. package/dist/lsp.js +605 -0
  128. package/dist/lsp.js.map +1 -0
  129. package/dist/markdown.js +275 -0
  130. package/dist/markdown.js.map +1 -0
  131. package/dist/mcp.js +554 -0
  132. package/dist/mcp.js.map +1 -0
  133. package/dist/recovery.js +178 -0
  134. package/dist/recovery.js.map +1 -0
  135. package/dist/replay.js +132 -0
  136. package/dist/replay.js.map +1 -0
  137. package/dist/replay_cli.js +24 -0
  138. package/dist/replay_cli.js.map +1 -0
  139. package/dist/runtime/executor.js +418 -0
  140. package/dist/runtime/executor.js.map +1 -0
  141. package/dist/runtime/planner.js +197 -0
  142. package/dist/runtime/planner.js.map +1 -0
  143. package/dist/runtime/store.js +289 -0
  144. package/dist/runtime/store.js.map +1 -0
  145. package/dist/runtime/types.js +2 -0
  146. package/dist/runtime/types.js.map +1 -0
  147. package/dist/safety.js +446 -0
  148. package/dist/safety.js.map +1 -0
  149. package/dist/spinner.js +224 -0
  150. package/dist/spinner.js.map +1 -0
  151. package/dist/sys/context.js +124 -0
  152. package/dist/sys/context.js.map +1 -0
  153. package/dist/sys/snapshot.sh +97 -0
  154. package/dist/term.js +61 -0
  155. package/dist/term.js.map +1 -0
  156. package/dist/themes.js +135 -0
  157. package/dist/themes.js.map +1 -0
  158. package/dist/tools.js +1114 -0
  159. package/dist/tools.js.map +1 -0
  160. package/dist/tui/branch-picker.js +65 -0
  161. package/dist/tui/branch-picker.js.map +1 -0
  162. package/dist/tui/command-handler.js +108 -0
  163. package/dist/tui/command-handler.js.map +1 -0
  164. package/dist/tui/confirm.js +90 -0
  165. package/dist/tui/confirm.js.map +1 -0
  166. package/dist/tui/controller.js +463 -0
  167. package/dist/tui/controller.js.map +1 -0
  168. package/dist/tui/event-bridge.js +44 -0
  169. package/dist/tui/event-bridge.js.map +1 -0
  170. package/dist/tui/events.js +2 -0
  171. package/dist/tui/events.js.map +1 -0
  172. package/dist/tui/keymap.js +144 -0
  173. package/dist/tui/keymap.js.map +1 -0
  174. package/dist/tui/layout.js +11 -0
  175. package/dist/tui/layout.js.map +1 -0
  176. package/dist/tui/render.js +186 -0
  177. package/dist/tui/render.js.map +1 -0
  178. package/dist/tui/screen.js +48 -0
  179. package/dist/tui/screen.js.map +1 -0
  180. package/dist/tui/state.js +167 -0
  181. package/dist/tui/state.js.map +1 -0
  182. package/dist/tui/theme.js +70 -0
  183. package/dist/tui/theme.js.map +1 -0
  184. package/dist/tui/types.js +2 -0
  185. package/dist/tui/types.js.map +1 -0
  186. package/dist/types.js +2 -0
  187. package/dist/types.js.map +1 -0
  188. package/dist/upgrade.js +412 -0
  189. package/dist/upgrade.js.map +1 -0
  190. package/dist/utils.js +87 -0
  191. package/dist/utils.js.map +1 -0
  192. package/dist/vault.js +520 -0
  193. package/dist/vault.js.map +1 -0
  194. package/dist/vim.js +160 -0
  195. package/dist/vim.js.map +1 -0
  196. package/package.json +67 -0
  197. package/src/sys/snapshot.sh +97 -0
package/dist/agent.js ADDED
@@ -0,0 +1,2604 @@
1
+ import { OpenAIClient } from './client.js';
2
+ import { enforceContextBudget, stripThinking, estimateTokensFromMessages, estimateToolSchemaTokens } from './history.js';
3
+ import * as tools from './tools.js';
4
+ import { selectHarness } from './harnesses.js';
5
+ import { checkExecSafety, checkPathSafety } from './safety.js';
6
+ import { loadProjectContext } from './context.js';
7
+ import { loadGitContext, isGitDirty, stashWorkingTree } from './git.js';
8
+ import { projectIndexKeys, parseIndexMeta, isFreshIndex, indexSummaryLine } from './indexer.js';
9
+ import { ReplayStore } from './replay.js';
10
+ import { VaultStore } from './vault.js';
11
+ import { LensStore } from './lens.js';
12
+ import { SYS_CONTEXT_SCHEMA, collectSnapshot } from './sys/context.js';
13
+ import { MCPManager } from './mcp.js';
14
+ import { LspManager, detectInstalledLspServers } from './lsp.js';
15
+ import fs from 'node:fs/promises';
16
+ import path from 'node:path';
17
+ import { stateDir } from './utils.js';
18
+ function makeAbortController() {
19
+ // Node 24: AbortController is global.
20
+ return new AbortController();
21
+ }
22
+ /** Generate a minimal unified diff for Phase 7 rich display (max 20 lines, truncated). */
23
+ function generateMinimalDiff(before, after, filePath) {
24
+ const bLines = before.split('\n');
25
+ const aLines = after.split('\n');
26
+ const out = [];
27
+ out.push(`--- a/${filePath}`);
28
+ out.push(`+++ b/${filePath}`);
29
+ // Simple line-by-line diff (find changed region)
30
+ let diffStart = 0;
31
+ while (diffStart < bLines.length && diffStart < aLines.length && bLines[diffStart] === aLines[diffStart])
32
+ diffStart++;
33
+ let bEnd = bLines.length - 1;
34
+ let aEnd = aLines.length - 1;
35
+ while (bEnd > diffStart && aEnd > diffStart && bLines[bEnd] === aLines[aEnd]) {
36
+ bEnd--;
37
+ aEnd--;
38
+ }
39
+ const contextBefore = Math.max(0, diffStart - 2);
40
+ const contextAfter = Math.min(Math.max(bLines.length, aLines.length) - 1, Math.max(bEnd, aEnd) + 2);
41
+ const bEndContext = Math.min(bLines.length - 1, contextAfter);
42
+ const aEndContext = Math.min(aLines.length - 1, contextAfter);
43
+ out.push(`@@ -${contextBefore + 1},${bEndContext - contextBefore + 1} +${contextBefore + 1},${aEndContext - contextBefore + 1} @@`);
44
+ let lineCount = 0;
45
+ const MAX_LINES = 20;
46
+ // Context before change
47
+ for (let i = contextBefore; i < diffStart && lineCount < MAX_LINES; i++) {
48
+ out.push(` ${bLines[i]}`);
49
+ lineCount++;
50
+ }
51
+ // Removed lines
52
+ for (let i = diffStart; i <= bEnd && i < bLines.length && lineCount < MAX_LINES; i++) {
53
+ out.push(`-${bLines[i]}`);
54
+ lineCount++;
55
+ }
56
+ // Added lines
57
+ for (let i = diffStart; i <= aEnd && i < aLines.length && lineCount < MAX_LINES; i++) {
58
+ out.push(`+${aLines[i]}`);
59
+ lineCount++;
60
+ }
61
+ // Context after change
62
+ const afterStart = Math.max(bEnd, aEnd) + 1;
63
+ for (let i = afterStart; i <= contextAfter && i < Math.max(bLines.length, aLines.length) && lineCount < MAX_LINES; i++) {
64
+ const line = i < aLines.length ? aLines[i] : bLines[i] ?? '';
65
+ out.push(` ${line}`);
66
+ lineCount++;
67
+ }
68
+ const totalChanges = (bEnd - diffStart + 1) + (aEnd - diffStart + 1);
69
+ if (lineCount >= MAX_LINES && totalChanges > MAX_LINES) {
70
+ out.push(`[+${totalChanges - MAX_LINES} more lines]`);
71
+ }
72
+ return out.join('\n');
73
+ }
74
+ /** Generate a one-line summary of a tool result for hooks/display. */
75
+ function toolResultSummary(name, args, content, success) {
76
+ if (!success)
77
+ return content.slice(0, 120);
78
+ switch (name) {
79
+ case 'read_file':
80
+ case 'read_files': {
81
+ const lines = content.split('\n').length;
82
+ return `${lines} lines read`;
83
+ }
84
+ case 'write_file':
85
+ return `wrote ${args.path || 'file'}`;
86
+ case 'edit_file':
87
+ return content.startsWith('ERROR') ? content.slice(0, 120) : `applied edit`;
88
+ case 'insert_file':
89
+ return `inserted at line ${args.line ?? '?'}`;
90
+ case 'exec': {
91
+ try {
92
+ const r = JSON.parse(content);
93
+ const lines = (r.out || '').split('\n').filter(Boolean).length;
94
+ return `rc=${r.rc}, ${lines} lines`;
95
+ }
96
+ catch {
97
+ return content.slice(0, 80);
98
+ }
99
+ }
100
+ case 'list_dir': {
101
+ const entries = content.split('\n').filter(Boolean).length;
102
+ return `${entries} entries`;
103
+ }
104
+ case 'search_files': {
105
+ const matches = (content.match(/^\d+:/gm) || []).length;
106
+ return `${matches} matches`;
107
+ }
108
+ case 'spawn_task': {
109
+ const line = content.split(/\r?\n/).find((l) => l.includes('status='));
110
+ return line ? line.trim() : 'sub-agent task finished';
111
+ }
112
+ case 'vault_search':
113
+ return `vault results`;
114
+ default:
115
+ return content.slice(0, 80);
116
+ }
117
+ }
118
+ /** Errors that should break the outer agent loop, not be caught by per-tool handlers */
119
+ class AgentLoopBreak extends Error {
120
+ constructor(message) {
121
+ super(message);
122
+ this.name = 'AgentLoopBreak';
123
+ }
124
+ }
125
+ const SYSTEM_PROMPT = `You are a coding agent with filesystem and shell access. Execute the user's request using the provided tools.
126
+
127
+ Rules:
128
+ - Work in the current directory. Use relative paths for all file operations.
129
+ - Do the work directly. Do NOT use spawn_task to delegate the user's primary request — only use it for genuinely independent subtasks that benefit from parallel execution.
130
+ - Read the target file before editing. You need the exact text for search/replace.
131
+ - Use read_file with search=... to jump to relevant code; avoid reading whole files.
132
+ - Use edit_file for surgical changes. Never rewrite entire files when a targeted edit works.
133
+ - Use insert_file for insertions (prepend/append/line).
134
+ - Use exec to run commands, tests, builds; check results before reporting success.
135
+ - When running commands in a subdirectory, use exec's cwd parameter — NOT "cd /path && cmd". Each exec call is a fresh shell; cd does not persist.
136
+ - Batch work: read all files you need, then apply all edits, then verify.
137
+ - Be concise. Report what you changed and why.
138
+ - Do NOT read every file in a directory. Use search_files or exec with grep to locate relevant code first, then read only the files that match.
139
+ - If search_files returns 0 matches, try a broader pattern or use: exec grep -rn "keyword" path/
140
+ - Anton (the autonomous task runner) is ONLY activated when the user explicitly invokes /anton. Never self-activate as Anton or start processing task files on your own.
141
+
142
+ Tool call format:
143
+ - Use tool_calls. Do not write JSON tool invocations in your message text.
144
+ `;
145
+ const MCP_TOOLS_REQUEST_TOKEN = '[[MCP_TOOLS_REQUEST]]';
146
+ const DEFAULT_SUB_AGENT_SYSTEM_PROMPT = `You are a focused coding sub-agent. Execute only the delegated task.
147
+ - Work in the current directory. Use relative paths for all file operations.
148
+ - Read the target file before editing. You need the exact text for search/replace.
149
+ - Keep tool usage tight and efficient.
150
+ - Prefer surgical edits over rewrites.
151
+ - Do NOT create files outside the working directory unless explicitly requested.
152
+ - When running commands in a subdirectory, use exec's cwd parameter — NOT "cd /path && cmd".
153
+ - Run verification commands when relevant.
154
+ - Return a concise outcome summary.`;
155
+ const DEFAULT_SUB_AGENT_RESULT_TOKEN_CAP = 2000;
156
+ const APPROVAL_MODE_SET = new Set(['plan', 'reject', 'default', 'auto-edit', 'yolo']);
157
+ const LSP_TOOL_NAMES = ['lsp_diagnostics', 'lsp_symbols', 'lsp_hover', 'lsp_definition', 'lsp_references'];
158
+ const LSP_TOOL_NAME_SET = new Set(LSP_TOOL_NAMES);
159
+ const FILE_MUTATION_TOOL_SET = new Set(['edit_file', 'write_file', 'insert_file']);
160
+ function normalizeApprovalMode(value) {
161
+ if (typeof value !== 'string')
162
+ return undefined;
163
+ const mode = value.trim();
164
+ return APPROVAL_MODE_SET.has(mode) ? mode : undefined;
165
+ }
166
+ /** Approval mode permissiveness ranking (lower = more restrictive). */
167
+ const APPROVAL_MODE_RANK = { plan: 0, reject: 1, default: 2, 'auto-edit': 3, yolo: 4 };
168
+ /**
169
+ * Cap a sub-agent's approval mode at the parent's level.
170
+ * Sub-agents cannot escalate beyond the parent's approval mode.
171
+ */
172
+ function capApprovalMode(requested, parentMode) {
173
+ return APPROVAL_MODE_RANK[requested] <= APPROVAL_MODE_RANK[parentMode] ? requested : parentMode;
174
+ }
175
+ function formatDurationMs(ms) {
176
+ if (!Number.isFinite(ms) || ms <= 0)
177
+ return '0.0s';
178
+ return `${(ms / 1000).toFixed(1)}s`;
179
+ }
180
+ function approxTokenCharCap(maxTokens) {
181
+ const safe = Math.max(64, Math.floor(maxTokens));
182
+ return safe * 4;
183
+ }
184
+ function capTextByApproxTokens(text, maxTokens) {
185
+ const raw = String(text ?? '');
186
+ const maxChars = approxTokenCharCap(maxTokens);
187
+ if (raw.length <= maxChars)
188
+ return { text: raw, truncated: false };
189
+ const clipped = raw.slice(0, maxChars);
190
+ return {
191
+ text: `${clipped}\n\n[sub-agent] result truncated to ~${maxTokens} tokens (${raw.length} chars original)`,
192
+ truncated: true,
193
+ };
194
+ }
195
+ function isLikelyBinaryBuffer(buf) {
196
+ const n = Math.min(buf.length, 512);
197
+ for (let i = 0; i < n; i++) {
198
+ if (buf[i] === 0)
199
+ return true;
200
+ }
201
+ return false;
202
+ }
203
+ /**
204
+ * Strip absolute paths from a message to prevent cross-project leaks in vault.
205
+ * Paths within cwd are replaced with relative equivalents; other absolute paths
206
+ * are replaced with just the basename.
207
+ */
208
+ function sanitizePathsInMessage(message, cwd) {
209
+ const normCwd = cwd.replace(/\/+$/, '');
210
+ // Match absolute Unix paths (at least 2 segments)
211
+ return message.replace(/\/(?:home|tmp|var|usr|opt|etc|root)\/[^\s"',;)\]}>]+/g, (match) => {
212
+ const normMatch = match.replace(/\/+$/, '');
213
+ if (normMatch.startsWith(normCwd + '/')) {
214
+ // Within cwd — make relative
215
+ return normMatch.slice(normCwd.length + 1);
216
+ }
217
+ // Outside cwd — strip to basename
218
+ const base = path.basename(normMatch);
219
+ return base || match;
220
+ });
221
+ }
222
+ async function buildSubAgentContextBlock(cwd, rawFiles) {
223
+ const values = Array.isArray(rawFiles) ? rawFiles : [];
224
+ const files = values
225
+ .map((v) => (typeof v === 'string' ? v.trim() : ''))
226
+ .filter(Boolean)
227
+ .slice(0, 12);
228
+ if (!files.length)
229
+ return { block: '', included: [], skipped: [] };
230
+ const MAX_TOTAL_CHARS = 24_000;
231
+ const MAX_PER_FILE_CHARS = 4_000;
232
+ let total = 0;
233
+ const parts = [];
234
+ const included = [];
235
+ const skipped = [];
236
+ for (const rel of files) {
237
+ const abs = path.resolve(cwd, rel);
238
+ const relFromCwd = path.relative(cwd, abs);
239
+ if (relFromCwd.startsWith('..') || path.isAbsolute(relFromCwd)) {
240
+ skipped.push(`${rel} (outside cwd)`);
241
+ continue;
242
+ }
243
+ let stat;
244
+ try {
245
+ stat = await fs.stat(abs);
246
+ }
247
+ catch {
248
+ skipped.push(`${rel} (missing)`);
249
+ continue;
250
+ }
251
+ if (!stat?.isFile()) {
252
+ skipped.push(`${rel} (not a file)`);
253
+ continue;
254
+ }
255
+ const buf = await fs.readFile(abs).catch(() => null);
256
+ if (!buf) {
257
+ skipped.push(`${rel} (unreadable)`);
258
+ continue;
259
+ }
260
+ if (isLikelyBinaryBuffer(buf)) {
261
+ skipped.push(`${rel} (binary)`);
262
+ continue;
263
+ }
264
+ const raw = buf.toString('utf8');
265
+ const body = raw.length > MAX_PER_FILE_CHARS
266
+ ? `${raw.slice(0, MAX_PER_FILE_CHARS)}\n[truncated: ${raw.length} chars total]`
267
+ : raw;
268
+ const section = `[file:${rel}]\n${body}\n[/file:${rel}]`;
269
+ if (total + section.length > MAX_TOTAL_CHARS) {
270
+ skipped.push(`${rel} (context budget reached)`);
271
+ continue;
272
+ }
273
+ parts.push(section);
274
+ included.push(rel);
275
+ total += section.length;
276
+ }
277
+ return { block: parts.join('\n\n'), included, skipped };
278
+ }
279
+ function extractLensBody(projection) {
280
+ const lines = String(projection ?? '').split(/\r?\n/);
281
+ if (!lines.length)
282
+ return '';
283
+ let start = 0;
284
+ if (lines[0].startsWith('# '))
285
+ start = 1;
286
+ if (lines[start]?.startsWith('# lens:'))
287
+ start += 1;
288
+ return lines
289
+ .slice(start)
290
+ .filter((line) => line.trim().length > 0)
291
+ .slice(0, 40)
292
+ .join('\n');
293
+ }
294
+ function buildToolsSchema(opts) {
295
+ const obj = (properties, required = []) => ({
296
+ type: 'object',
297
+ additionalProperties: false,
298
+ properties,
299
+ required
300
+ });
301
+ const schemas = [
302
+ {
303
+ type: 'function',
304
+ function: {
305
+ name: 'read_file',
306
+ description: 'Read file contents with line numbers. Use search/context to jump to relevant code.',
307
+ parameters: obj({
308
+ path: { type: 'string' },
309
+ offset: { type: 'integer' },
310
+ limit: { type: 'integer' },
311
+ search: { type: 'string' },
312
+ context: { type: 'integer' },
313
+ }, ['path'])
314
+ }
315
+ },
316
+ {
317
+ type: 'function',
318
+ function: {
319
+ name: 'read_files',
320
+ description: 'Batch read multiple files.',
321
+ parameters: obj({
322
+ requests: {
323
+ type: 'array',
324
+ items: obj({
325
+ path: { type: 'string' },
326
+ offset: { type: 'integer' },
327
+ limit: { type: 'integer' },
328
+ search: { type: 'string' },
329
+ context: { type: 'integer' },
330
+ }, ['path'])
331
+ }
332
+ }, ['requests'])
333
+ }
334
+ },
335
+ {
336
+ type: 'function',
337
+ function: {
338
+ name: 'write_file',
339
+ description: 'Write a file (atomic). Creates parents. Makes a backup first.',
340
+ parameters: obj({ path: { type: 'string' }, content: { type: 'string' } }, ['path', 'content'])
341
+ }
342
+ },
343
+ {
344
+ type: 'function',
345
+ function: {
346
+ name: 'edit_file',
347
+ description: 'Search/replace exact text in a file. Fails if old_text not found.',
348
+ parameters: obj({
349
+ path: { type: 'string' },
350
+ old_text: { type: 'string' },
351
+ new_text: { type: 'string' },
352
+ replace_all: { type: 'boolean' }
353
+ }, ['path', 'old_text', 'new_text'])
354
+ }
355
+ },
356
+ {
357
+ type: 'function',
358
+ function: {
359
+ name: 'insert_file',
360
+ description: 'Insert text at a specific line (0=prepend, -1=append).',
361
+ parameters: obj({
362
+ path: { type: 'string' },
363
+ line: { type: 'integer' },
364
+ text: { type: 'string' }
365
+ }, ['path', 'line', 'text'])
366
+ }
367
+ },
368
+ {
369
+ type: 'function',
370
+ function: {
371
+ name: 'list_dir',
372
+ description: 'List directory contents (optional recursive, max depth 3).',
373
+ parameters: obj({
374
+ path: { type: 'string' },
375
+ recursive: { type: 'boolean' },
376
+ }, ['path'])
377
+ }
378
+ },
379
+ {
380
+ type: 'function',
381
+ function: {
382
+ name: 'search_files',
383
+ description: 'Search for a regex pattern in files under a directory.',
384
+ parameters: obj({
385
+ pattern: { type: 'string' },
386
+ path: { type: 'string' },
387
+ include: { type: 'string' },
388
+ }, ['pattern', 'path'])
389
+ }
390
+ },
391
+ {
392
+ type: 'function',
393
+ function: {
394
+ name: 'exec',
395
+ description: 'Run a shell command (bash -c) with timeout; returns JSON rc/out/err. Each call is a new shell — cwd does not persist between calls.',
396
+ parameters: obj({
397
+ command: { type: 'string', description: 'Shell command to run' },
398
+ cwd: { type: 'string', description: 'Working directory (default: project root). Use this instead of cd.' },
399
+ timeout: { type: 'integer', description: 'Timeout in seconds (default: 30, max: 120). Use 60-120 for npm install, builds, or test suites.' }
400
+ }, ['command'])
401
+ }
402
+ }
403
+ ];
404
+ if (opts?.allowSpawnTask !== false) {
405
+ schemas.push({
406
+ type: 'function',
407
+ function: {
408
+ name: 'spawn_task',
409
+ description: 'Delegate a focused task to an isolated sub-agent session (no parent chat history).',
410
+ parameters: obj({
411
+ task: { type: 'string', description: 'Instruction for the sub-agent' },
412
+ context_files: {
413
+ type: 'array',
414
+ description: 'Optional extra files to inject into sub-agent context',
415
+ items: { type: 'string' },
416
+ },
417
+ model: { type: 'string', description: 'Optional model override for this task' },
418
+ endpoint: { type: 'string', description: 'Optional endpoint override for this task' },
419
+ max_iterations: { type: 'integer', description: 'Optional max turn cap for the sub-agent' },
420
+ max_tokens: { type: 'integer', description: 'Optional max completion tokens for the sub-agent' },
421
+ timeout_sec: { type: 'integer', description: 'Optional timeout for this sub-agent run (seconds)' },
422
+ system_prompt: { type: 'string', description: 'Optional sub-agent system prompt override for this task' },
423
+ approval_mode: { type: 'string', enum: ['plan', 'reject', 'default', 'auto-edit', 'yolo'] },
424
+ }, ['task'])
425
+ }
426
+ });
427
+ }
428
+ if (opts?.activeVaultTools) {
429
+ schemas.push({
430
+ type: 'function',
431
+ function: {
432
+ name: 'vault_search',
433
+ description: 'Search vault entries (notes and previous tool outputs) to reuse prior high-signal findings.',
434
+ parameters: obj({
435
+ query: { type: 'string' },
436
+ limit: { type: 'integer' }
437
+ }, ['query'])
438
+ }
439
+ }, {
440
+ type: 'function',
441
+ function: {
442
+ name: 'vault_note',
443
+ description: 'Persist a concise, high-signal note into the Trifecta vault.',
444
+ parameters: obj({
445
+ key: { type: 'string' },
446
+ value: { type: 'string' }
447
+ }, ['key', 'value'])
448
+ }
449
+ });
450
+ }
451
+ // Phase 9: sys_context tool is only available in sys mode.
452
+ if (opts?.sysMode) {
453
+ schemas.push(SYS_CONTEXT_SCHEMA);
454
+ }
455
+ if (opts?.lspTools) {
456
+ schemas.push({
457
+ type: 'function',
458
+ function: {
459
+ name: 'lsp_diagnostics',
460
+ description: 'Get current LSP diagnostics (errors/warnings) for a file or the whole project. Structured — replaces running build commands to check for errors.',
461
+ parameters: obj({
462
+ path: { type: 'string', description: 'File path (omit for project-wide diagnostics)' },
463
+ severity: { type: 'integer', description: '1=Error, 2=Warning, 3=Info, 4=Hint (default: config threshold)' },
464
+ }, [])
465
+ }
466
+ }, {
467
+ type: 'function',
468
+ function: {
469
+ name: 'lsp_symbols',
470
+ description: 'List all symbols (functions, classes, variables) in a file via LSP.',
471
+ parameters: obj({
472
+ path: { type: 'string' },
473
+ }, ['path'])
474
+ }
475
+ }, {
476
+ type: 'function',
477
+ function: {
478
+ name: 'lsp_hover',
479
+ description: 'Get type info and documentation for a symbol at a position.',
480
+ parameters: obj({
481
+ path: { type: 'string' },
482
+ line: { type: 'integer' },
483
+ character: { type: 'integer' },
484
+ }, ['path', 'line', 'character'])
485
+ }
486
+ }, {
487
+ type: 'function',
488
+ function: {
489
+ name: 'lsp_definition',
490
+ description: 'Go to definition of a symbol at a given position.',
491
+ parameters: obj({
492
+ path: { type: 'string' },
493
+ line: { type: 'integer' },
494
+ character: { type: 'integer' },
495
+ }, ['path', 'line', 'character'])
496
+ }
497
+ }, {
498
+ type: 'function',
499
+ function: {
500
+ name: 'lsp_references',
501
+ description: 'Find all references to a symbol at a given position.',
502
+ parameters: obj({
503
+ path: { type: 'string' },
504
+ line: { type: 'integer' },
505
+ character: { type: 'integer' },
506
+ max_results: { type: 'integer', description: 'Cap results (default 50)' },
507
+ }, ['path', 'line', 'character'])
508
+ }
509
+ });
510
+ }
511
+ if (opts?.mcpTools?.length) {
512
+ schemas.push(...opts.mcpTools);
513
+ }
514
+ return schemas;
515
+ }
516
+ /** @internal Exported for testing. Parses tool calls from model content when tool_calls array is empty. */
517
+ export function parseToolCallsFromContent(content) {
518
+ // Fallback parser: if model printed JSON tool_calls in content.
519
+ const trimmed = content.trim();
520
+ const tryParse = (s) => {
521
+ try {
522
+ return JSON.parse(s);
523
+ }
524
+ catch {
525
+ return null;
526
+ }
527
+ };
528
+ // Case 1: whole content is JSON
529
+ const whole = tryParse(trimmed);
530
+ if (whole?.tool_calls && Array.isArray(whole.tool_calls))
531
+ return whole.tool_calls;
532
+ if (whole?.name && whole?.arguments) {
533
+ return [
534
+ {
535
+ id: 'call_0',
536
+ type: 'function',
537
+ function: { name: String(whole.name), arguments: JSON.stringify(whole.arguments) }
538
+ }
539
+ ];
540
+ }
541
+ // Case 2: raw JSON array of tool calls (model writes [{name, arguments}, ...])
542
+ const arrStart = trimmed.indexOf('[');
543
+ const arrEnd = trimmed.lastIndexOf(']');
544
+ if (arrStart !== -1 && arrEnd !== -1 && arrEnd > arrStart) {
545
+ const arrSub = tryParse(trimmed.slice(arrStart, arrEnd + 1));
546
+ if (Array.isArray(arrSub) && arrSub.length > 0 && arrSub[0]?.name) {
547
+ return arrSub.map((item, i) => ({
548
+ id: `call_${i}`,
549
+ type: 'function',
550
+ function: {
551
+ name: String(item.name),
552
+ arguments: typeof item.arguments === 'string' ? item.arguments : JSON.stringify(item.arguments ?? {})
553
+ }
554
+ }));
555
+ }
556
+ }
557
+ // Case 3: find a JSON object substring (handles tool_calls wrapper OR single tool-call)
558
+ const start = trimmed.indexOf('{');
559
+ const end = trimmed.lastIndexOf('}');
560
+ if (start !== -1 && end !== -1 && end > start) {
561
+ const sub = tryParse(trimmed.slice(start, end + 1));
562
+ if (sub?.tool_calls && Array.isArray(sub.tool_calls))
563
+ return sub.tool_calls;
564
+ if (sub?.name && sub?.arguments) {
565
+ return [
566
+ {
567
+ id: 'call_0',
568
+ type: 'function',
569
+ function: { name: String(sub.name), arguments: typeof sub.arguments === 'string' ? sub.arguments : JSON.stringify(sub.arguments) }
570
+ }
571
+ ];
572
+ }
573
+ }
574
+ // Case 4: XML tool calls — used by Qwen, Hermes, and other models whose chat
575
+ // templates emit <tool_call><function=name><parameter=key>value</parameter></function></tool_call>.
576
+ // When llama-server's XML→JSON conversion fails (common with large write_file content),
577
+ // the raw XML leaks into the content field. This recovers it.
578
+ const xmlCalls = parseXmlToolCalls(trimmed);
579
+ if (xmlCalls?.length)
580
+ return xmlCalls;
581
+ return null;
582
+ }
583
+ /**
584
+ * Parse XML-style tool calls from content.
585
+ * Format: <tool_call><function=name><parameter=key>value</parameter>...</function></tool_call>
586
+ * Handles multiple tool call blocks and arbitrary parameter names/values.
587
+ */
588
+ function parseXmlToolCalls(content) {
589
+ // Quick bailout: no point parsing if there's no <tool_call> marker
590
+ if (!content.includes('<tool_call>'))
591
+ return null;
592
+ const calls = [];
593
+ // Match each <tool_call>...</tool_call> block.
594
+ // Using a manual scan instead of a single greedy regex to handle nested angle brackets
595
+ // in parameter values (e.g. TypeScript generics, JSX, comparison operators).
596
+ let searchFrom = 0;
597
+ while (searchFrom < content.length) {
598
+ const blockStart = content.indexOf('<tool_call>', searchFrom);
599
+ if (blockStart === -1)
600
+ break;
601
+ const blockEnd = content.indexOf('</tool_call>', blockStart);
602
+ if (blockEnd === -1)
603
+ break; // Truncated — can't recover partial tool calls
604
+ const block = content.slice(blockStart + '<tool_call>'.length, blockEnd);
605
+ searchFrom = blockEnd + '</tool_call>'.length;
606
+ // Extract function name: <function=name>...</function>
607
+ const fnMatch = block.match(/<function=(\w[\w.-]*)>/);
608
+ if (!fnMatch)
609
+ continue;
610
+ const fnName = fnMatch[1];
611
+ const fnStart = block.indexOf(fnMatch[0]) + fnMatch[0].length;
612
+ const fnEnd = block.lastIndexOf('</function>');
613
+ const fnBody = fnEnd !== -1 ? block.slice(fnStart, fnEnd) : block.slice(fnStart);
614
+ // Extract parameters: <parameter=key>value</parameter>
615
+ // Uses bracket-matching (depth counting) so that parameter values containing
616
+ // literal <parameter=...>...</parameter> (e.g. writing XML files) are handled
617
+ // correctly instead of being truncated at the inner close tag.
618
+ const args = {};
619
+ const openRe = /<parameter=(\w[\w.-]*)>/g;
620
+ const closeTag = '</parameter>';
621
+ let paramMatch;
622
+ while ((paramMatch = openRe.exec(fnBody)) !== null) {
623
+ const paramName = paramMatch[1];
624
+ const valueStart = paramMatch.index + paramMatch[0].length;
625
+ // Bracket-match: find the </parameter> that balances this open tag.
626
+ // Depth starts at 1; nested <parameter=...> increments, </parameter> decrements.
627
+ let depth = 1;
628
+ let scanPos = valueStart;
629
+ let closeIdx = -1;
630
+ while (scanPos < fnBody.length && depth > 0) {
631
+ const nextOpen = fnBody.indexOf('<parameter=', scanPos);
632
+ const nextClose = fnBody.indexOf(closeTag, scanPos);
633
+ if (nextClose === -1)
634
+ break; // No more close tags — truncated
635
+ if (nextOpen !== -1 && nextOpen < nextClose) {
636
+ // An open tag comes before the next close — increase depth
637
+ depth++;
638
+ scanPos = nextOpen + 1; // advance past '<' to avoid re-matching
639
+ }
640
+ else {
641
+ // Close tag comes first — decrease depth
642
+ depth--;
643
+ if (depth === 0) {
644
+ closeIdx = nextClose;
645
+ }
646
+ scanPos = nextClose + closeTag.length;
647
+ }
648
+ }
649
+ if (closeIdx === -1) {
650
+ // No matching close tag — take rest of body as value (truncated output)
651
+ args[paramName] = fnBody.slice(valueStart).trim();
652
+ break;
653
+ }
654
+ // Trim exactly the template-added leading/trailing newline, preserve internal whitespace
655
+ let value = fnBody.slice(valueStart, closeIdx);
656
+ if (value.startsWith('\n'))
657
+ value = value.slice(1);
658
+ if (value.endsWith('\n'))
659
+ value = value.slice(0, -1);
660
+ args[paramName] = value;
661
+ // Advance the regex past the close tag so the next openRe.exec starts after it
662
+ openRe.lastIndex = closeIdx + closeTag.length;
663
+ }
664
+ if (fnName && Object.keys(args).length > 0) {
665
+ calls.push({
666
+ id: `call_xml_${calls.length}`,
667
+ type: 'function',
668
+ function: {
669
+ name: fnName,
670
+ arguments: JSON.stringify(args)
671
+ }
672
+ });
673
+ }
674
+ }
675
+ return calls.length > 0 ? calls : null;
676
+ }
677
+ /** Check for missing required params by tool name — universal pre-dispatch validation */
678
+ function getMissingRequiredParams(toolName, args) {
679
+ const required = {
680
+ read_file: ['path'],
681
+ read_files: ['requests'],
682
+ write_file: ['path', 'content'],
683
+ edit_file: ['path', 'old_text', 'new_text'],
684
+ insert_file: ['path', 'line', 'text'],
685
+ list_dir: ['path'],
686
+ search_files: ['pattern', 'path'],
687
+ exec: ['command'],
688
+ spawn_task: ['task'],
689
+ sys_context: [],
690
+ vault_search: ['query'],
691
+ vault_note: ['key', 'value']
692
+ };
693
+ const req = required[toolName];
694
+ if (!req)
695
+ return [];
696
+ return req.filter(p => args[p] === undefined || args[p] === null);
697
+ }
698
+ /** Strip markdown code fences (```json ... ```) from tool argument strings */
699
+ function stripMarkdownFences(s) {
700
+ const trimmed = s.trim();
701
+ // Match ```json\n...\n``` or ```\n...\n```
702
+ const m = /^```(?:json)?\s*\n?([\s\S]*?)\n?```\s*$/.exec(trimmed);
703
+ return m ? m[1] : s;
704
+ }
705
+ function isReadOnlyTool(name) {
706
+ return name === 'read_file' || name === 'read_files' || name === 'list_dir' || name === 'search_files' || name === 'vault_search' || name === 'sys_context';
707
+ }
708
+ /** Human-readable summary of what a blocked tool call would do. */
709
+ function planModeSummary(name, args) {
710
+ switch (name) {
711
+ case 'write_file':
712
+ return `write ${args.path ?? 'unknown'} (${typeof args.content === 'string' ? args.content.split('\n').length : '?'} lines)`;
713
+ case 'edit_file':
714
+ return `edit ${args.path ?? 'unknown'} (replace ${typeof args.old_text === 'string' ? args.old_text.split('\n').length : '?'} lines)`;
715
+ case 'insert_file':
716
+ return `insert into ${args.path ?? 'unknown'} at line ${args.line ?? '?'}`;
717
+ case 'exec':
718
+ return `run: ${typeof args.command === 'string' ? args.command.slice(0, 80) : 'unknown'}`;
719
+ case 'spawn_task':
720
+ return `spawn sub-agent task: ${typeof args.task === 'string' ? args.task.slice(0, 80) : 'unknown'}`;
721
+ case 'vault_note':
722
+ return `vault note: ${args.key ?? 'unknown'}`;
723
+ default:
724
+ return `${name}(${Object.keys(args).join(', ')})`;
725
+ }
726
+ }
727
+ function userContentToText(content) {
728
+ if (typeof content === 'string')
729
+ return content;
730
+ return content
731
+ .filter((p) => p.type === 'text')
732
+ .map((p) => p.text)
733
+ .join('\n')
734
+ .trim();
735
+ }
736
+ function supportsVisionModel(model, modelMeta, harness) {
737
+ if (typeof harness.supportsVision === 'boolean')
738
+ return harness.supportsVision;
739
+ if (typeof modelMeta?.vision === 'boolean')
740
+ return modelMeta.vision;
741
+ const inputModalities = modelMeta?.input_modalities;
742
+ if (Array.isArray(inputModalities) && inputModalities.some((m) => String(m).toLowerCase().includes('image'))) {
743
+ return true;
744
+ }
745
+ const modalities = modelMeta?.modalities;
746
+ if (Array.isArray(modalities) && modalities.some((m) => String(m).toLowerCase().includes('image'))) {
747
+ return true;
748
+ }
749
+ const id = model.toLowerCase();
750
+ if (/(vision|multimodal|\bvl\b|llava|qwen2\.5-vl|gpt-4o|gemini|claude-3)/i.test(id))
751
+ return true;
752
+ if (harness.id.includes('vision') || harness.id.includes('vl'))
753
+ return true;
754
+ return false;
755
+ }
756
+ function normalizeModelsResponse(raw) {
757
+ if (Array.isArray(raw)) {
758
+ return {
759
+ data: raw
760
+ .map((m) => {
761
+ if (!m)
762
+ return null;
763
+ if (typeof m === 'string')
764
+ return { id: m };
765
+ if (typeof m.id === 'string' && m.id)
766
+ return m;
767
+ return null;
768
+ })
769
+ .filter(Boolean)
770
+ };
771
+ }
772
+ if (raw && Array.isArray(raw.data)) {
773
+ return {
774
+ data: raw.data
775
+ .map((m) => (m && typeof m.id === 'string' && m.id ? m : null))
776
+ .filter(Boolean)
777
+ };
778
+ }
779
+ return { data: [] };
780
+ }
781
+ export async function createSession(opts) {
782
+ const cfg = opts.config;
783
+ let client = opts.runtime?.client ?? new OpenAIClient(cfg.endpoint, opts.apiKey, cfg.verbose);
784
+ if (typeof client.setVerbose === 'function') {
785
+ client.setVerbose(cfg.verbose);
786
+ }
787
+ if (typeof cfg.response_timeout === 'number' && cfg.response_timeout > 0) {
788
+ client.setResponseTimeout(cfg.response_timeout);
789
+ }
790
+ // Health check + model list (cheap, avoids wasting GPU on chat warmups if unreachable)
791
+ let modelsList = normalizeModelsResponse(await client.models().catch(() => null));
792
+ let model = cfg.model && cfg.model.trim().length
793
+ ? cfg.model
794
+ : await autoPickModel(client, modelsList);
795
+ let harness = selectHarness(model, cfg.harness && cfg.harness.trim() ? cfg.harness.trim() : undefined);
796
+ // Try to derive context window from /v1/models (if provided by server).
797
+ const explicitContextWindow = cfg.context_window != null;
798
+ const modelMeta = modelsList?.data?.find((m) => m.id === model);
799
+ const derivedCtx = (modelMeta?.context_window ?? modelMeta?.context_length ?? modelMeta?.max_context_length);
800
+ let contextWindow = cfg.context_window ?? derivedCtx ?? 131072;
801
+ let supportsVision = supportsVisionModel(model, modelMeta, harness);
802
+ if (!cfg.i_know_what_im_doing && contextWindow > 131072) {
803
+ console.warn('[warn] context_window is above 131072; this can increase memory usage and hurt throughput. Use --i-know-what-im-doing to proceed.');
804
+ }
805
+ // Apply harness defaults for values the user didn't explicitly override.
806
+ // Config always fills max_tokens from DEFAULTS (16384), so we need to check
807
+ // whether the harness wants a higher value — harness.defaults.max_tokens wins
808
+ // when it's larger than the base default (16384), unless the user explicitly
809
+ // configured a value in their config file or CLI.
810
+ const BASE_MAX_TOKENS = 16384;
811
+ let maxTokens = cfg.max_tokens ?? BASE_MAX_TOKENS;
812
+ if (maxTokens === BASE_MAX_TOKENS && harness.defaults?.max_tokens && harness.defaults.max_tokens > BASE_MAX_TOKENS) {
813
+ maxTokens = harness.defaults.max_tokens;
814
+ }
815
+ let temperature = cfg.temperature ?? harness.defaults?.temperature ?? 0.2;
816
+ let topP = cfg.top_p ?? harness.defaults?.top_p ?? 0.95;
817
+ const harnessVaultMode = harness.defaults?.trifecta?.vaultMode || 'off';
818
+ const vaultMode = (cfg.trifecta?.vault?.mode || harnessVaultMode);
819
+ const vaultEnabled = cfg.trifecta?.enabled !== false && cfg.trifecta?.vault?.enabled !== false;
820
+ const activeVaultTools = vaultEnabled && vaultMode === 'active';
821
+ const lensEnabled = cfg.trifecta?.enabled !== false && cfg.trifecta?.lens?.enabled !== false;
822
+ const spawnTaskEnabled = opts.allowSpawnTask !== false && cfg.sub_agents?.enabled !== false;
823
+ const mcpServers = Array.isArray(cfg.mcp?.servers) ? cfg.mcp.servers : [];
824
+ const mcpEnabledTools = Array.isArray(cfg.mcp?.enabled_tools) ? cfg.mcp?.enabled_tools : undefined;
825
+ const mcpToolBudget = Number.isFinite(cfg.mcp_tool_budget)
826
+ ? Number(cfg.mcp_tool_budget)
827
+ : (Number.isFinite(cfg.mcp?.tool_budget) ? Number(cfg.mcp?.tool_budget) : 1000);
828
+ const mcpCallTimeoutSec = Number.isFinite(cfg.mcp_call_timeout_sec)
829
+ ? Number(cfg.mcp_call_timeout_sec)
830
+ : (Number.isFinite(cfg.mcp?.call_timeout_sec) ? Number(cfg.mcp?.call_timeout_sec) : 30);
831
+ const builtInToolNames = [
832
+ 'read_file', 'read_files', 'write_file', 'edit_file', 'insert_file',
833
+ 'list_dir', 'search_files', 'exec', 'vault_search', 'vault_note', 'sys_context',
834
+ ...(spawnTaskEnabled ? ['spawn_task'] : []),
835
+ ];
836
+ const mcpManager = mcpServers.length
837
+ ? new MCPManager({
838
+ servers: mcpServers,
839
+ toolBudgetTokens: mcpToolBudget,
840
+ callTimeoutMs: Math.max(1000, Math.floor(mcpCallTimeoutSec * 1000)),
841
+ offline: cfg.offline === true,
842
+ builtInToolNames,
843
+ enabledTools: mcpEnabledTools,
844
+ })
845
+ : null;
846
+ if (mcpManager) {
847
+ await mcpManager.init();
848
+ }
849
+ // LSP integration (Phase 17)
850
+ const lspCfg = cfg.lsp;
851
+ const lspEnabled = lspCfg?.enabled === true;
852
+ let lspManager = null;
853
+ if (lspEnabled) {
854
+ lspManager = new LspManager({
855
+ rootPath: cfg.dir ?? process.cwd(),
856
+ severityThreshold: lspCfg?.diagnostic_severity_threshold ?? 1,
857
+ quiet: Boolean(process.env.IDLEHANDS_QUIET_WARNINGS),
858
+ });
859
+ // Add explicitly configured servers.
860
+ if (Array.isArray(lspCfg?.servers)) {
861
+ for (const srv of lspCfg.servers) {
862
+ await lspManager.addServer(srv);
863
+ }
864
+ }
865
+ // Auto-detect servers on PATH if configured.
866
+ if (lspCfg?.auto_detect !== false) {
867
+ const detected = detectInstalledLspServers();
868
+ for (const d of detected) {
869
+ await lspManager.addServer({
870
+ language: d.language,
871
+ command: d.command,
872
+ args: d.args,
873
+ });
874
+ }
875
+ }
876
+ }
877
+ const mcpHasEnabledTools = (mcpManager?.listTools().length ?? 0) > 0;
878
+ const mcpLazySchemaMode = Boolean(mcpManager && mcpHasEnabledTools);
879
+ let mcpToolsLoaded = !mcpLazySchemaMode;
880
+ const getToolsSchema = () => buildToolsSchema({
881
+ activeVaultTools,
882
+ sysMode: cfg.mode === 'sys',
883
+ lspTools: lspManager?.hasServers() === true,
884
+ mcpTools: mcpToolsLoaded ? (mcpManager?.getEnabledToolSchemas() ?? []) : [],
885
+ allowSpawnTask: spawnTaskEnabled,
886
+ });
887
+ const vault = vaultEnabled ? (opts.runtime?.vault ?? new VaultStore()) : undefined;
888
+ if (vault) {
889
+ // Scope vault entries by project directory to prevent cross-project context leaks
890
+ vault.setProjectDir(cfg.dir ?? process.cwd());
891
+ }
892
+ if (vaultEnabled && !opts.runtime?.vault) {
893
+ await vault?.init().catch((e) => {
894
+ if (!process.env.IDLEHANDS_QUIET_WARNINGS) {
895
+ console.warn(`[warn] vault init failed: ${e?.message ?? e}`);
896
+ }
897
+ });
898
+ }
899
+ const lens = lensEnabled ? (opts.runtime?.lens ?? new LensStore()) : undefined;
900
+ if (!opts.runtime?.lens && lens) {
901
+ await lens.init().catch((e) => {
902
+ console.warn(`[warn] lens init failed: ${e?.message ?? e}`);
903
+ });
904
+ }
905
+ const projectCtx = await loadProjectContext(cfg).catch((e) => {
906
+ console.warn(`[warn] project context disabled for startup: ${e?.message ?? e}`);
907
+ return '';
908
+ });
909
+ const gitCtx = await loadGitContext(cfg.dir ?? process.cwd()).catch((e) => {
910
+ console.warn(`[warn] git context disabled for startup: ${e?.message ?? e}`);
911
+ return '';
912
+ });
913
+ let freshIndexSummary = '';
914
+ if (vault) {
915
+ try {
916
+ const keys = projectIndexKeys(cfg.dir ?? process.cwd());
917
+ const metaRow = await vault.getLatestByKey(keys.metaKey, 'system');
918
+ if (metaRow?.value) {
919
+ const meta = parseIndexMeta(metaRow.value);
920
+ if (meta && isFreshIndex(meta, 24 * 60 * 60 * 1000)) {
921
+ const summaryRow = await vault.getLatestByKey(keys.summaryKey, 'system');
922
+ freshIndexSummary = summaryRow?.value || indexSummaryLine(meta);
923
+ }
924
+ }
925
+ }
926
+ catch {
927
+ // best effort only
928
+ }
929
+ }
930
+ let sessionMeta = `[cwd: ${cfg.dir}]\n[harness: ${harness.id}]` +
931
+ (gitCtx ? `\n\n${gitCtx}` : '') +
932
+ (projectCtx ? `\n\n${projectCtx}` : '') +
933
+ (freshIndexSummary ? `\n\n${freshIndexSummary}` : '');
934
+ if (vaultEnabled && vaultMode === 'active') {
935
+ sessionMeta +=
936
+ '\n\n[Trifecta Vault] Active vault mode is enabled. Record high-signal decisions and reuse them with vault tools when needed.';
937
+ }
938
+ if (lensEnabled) {
939
+ sessionMeta += '\n\n[Trifecta Lens] Structural projection is enabled where available.';
940
+ }
941
+ if (lspManager?.hasServers()) {
942
+ const lspServers = lspManager.listServers();
943
+ const running = lspServers.filter((s) => s.running).length;
944
+ sessionMeta += `\n\n[LSP] ${running} language server(s) active: ${lspServers.map((s) => `${s.language} (${s.command})`).join(', ')}.`;
945
+ sessionMeta += '\n[LSP] Use lsp_diagnostics, lsp_symbols, lsp_hover, lsp_definition, lsp_references tools for semantic code intelligence.';
946
+ if (lensEnabled) {
947
+ sessionMeta += '\n[LSP+Lens] lsp_symbols combines semantic symbol data with structural Lens context when available.';
948
+ }
949
+ if (lspCfg?.proactive_diagnostics !== false) {
950
+ sessionMeta += '\n[LSP] Proactive diagnostics enabled: errors will be reported automatically after file edits.';
951
+ }
952
+ }
953
+ if (mcpManager) {
954
+ const mcpServers = mcpManager.listServers();
955
+ const connected = mcpServers.filter((s) => s.connected).length;
956
+ const enabledTools = mcpManager.listTools().length;
957
+ sessionMeta += `\n\n[MCP] ${connected}/${mcpServers.length} servers connected; ${enabledTools} tools enabled.`;
958
+ if (mcpLazySchemaMode) {
959
+ sessionMeta += `\n[MCP] Lazy schema mode on. MCP tools are hidden until requested.`;
960
+ sessionMeta += `\n[MCP] If external tools are needed, reply exactly with ${MCP_TOOLS_REQUEST_TOKEN}.`;
961
+ }
962
+ for (const w of mcpManager.getWarnings()) {
963
+ sessionMeta += `\n[MCP warning] ${w}`;
964
+ }
965
+ }
966
+ if (spawnTaskEnabled) {
967
+ const subDefaults = cfg.sub_agents ?? {};
968
+ const subMaxIter = Number.isFinite(subDefaults.max_iterations)
969
+ ? Math.max(1, Math.floor(Number(subDefaults.max_iterations)))
970
+ : 10;
971
+ sessionMeta += `\n\n[Sub-agents] spawn_task is available (isolated context, sequential queue, default max_iterations=${subMaxIter}).`;
972
+ }
973
+ // Harness-driven suffix: append to first user message (NOT system prompt — §9b KV cache rule)
974
+ if (harness.quirks.needsExplicitToolCallFormatReminder) {
975
+ sessionMeta += '\n\nIMPORTANT: Use the tool_calls mechanism to invoke tools. Do NOT write JSON tool invocations in your message text.';
976
+ }
977
+ if (harness.systemPromptSuffix) {
978
+ sessionMeta += '\n\n' + harness.systemPromptSuffix;
979
+ }
980
+ // Phase 9: sys-eager — inject full system snapshot into first message
981
+ if (cfg.sys_eager && cfg.mode === 'sys') {
982
+ try {
983
+ const snapshot = await collectSnapshot('all');
984
+ sessionMeta += '\n\n' + snapshot;
985
+ }
986
+ catch (e) {
987
+ console.warn(`[warn] sys-eager snapshot failed: ${e?.message ?? e}`);
988
+ }
989
+ }
990
+ const defaultSystemPrompt = SYSTEM_PROMPT;
991
+ let activeSystemPrompt = (cfg.system_prompt_override ?? '').trim() || defaultSystemPrompt;
992
+ let messages = [
993
+ { role: 'system', content: activeSystemPrompt }
994
+ ];
995
+ let sessionMetaPending = sessionMeta;
996
+ const setSystemPrompt = (prompt) => {
997
+ const next = String(prompt ?? '').trim();
998
+ if (!next)
999
+ throw new Error('system prompt cannot be empty');
1000
+ activeSystemPrompt = next;
1001
+ if (messages.length > 0 && messages[0].role === 'system') {
1002
+ messages[0] = { role: 'system', content: activeSystemPrompt };
1003
+ }
1004
+ else {
1005
+ messages.unshift({ role: 'system', content: activeSystemPrompt });
1006
+ }
1007
+ };
1008
+ const resetSystemPrompt = () => {
1009
+ setSystemPrompt(defaultSystemPrompt);
1010
+ };
1011
+ const reset = () => {
1012
+ messages = [
1013
+ { role: 'system', content: activeSystemPrompt }
1014
+ ];
1015
+ sessionMetaPending = sessionMeta;
1016
+ lastEditedPath = undefined;
1017
+ mcpToolsLoaded = !mcpLazySchemaMode;
1018
+ };
1019
+ const restore = (next) => {
1020
+ if (!Array.isArray(next) || next.length < 2) {
1021
+ throw new Error('restore: invalid messages array');
1022
+ }
1023
+ if (next[0].role !== 'system') {
1024
+ throw new Error('restore: first message must be system');
1025
+ }
1026
+ messages = next;
1027
+ activeSystemPrompt = String(next[0].content ?? defaultSystemPrompt);
1028
+ if (mcpManager) {
1029
+ const usedMcpTool = next.some((msg) => {
1030
+ if (msg?.role !== 'assistant' || !Array.isArray(msg.tool_calls))
1031
+ return false;
1032
+ return msg.tool_calls.some((tc) => mcpManager.hasTool(String(tc?.function?.name ?? '')));
1033
+ });
1034
+ mcpToolsLoaded = usedMcpTool || !mcpLazySchemaMode;
1035
+ }
1036
+ };
1037
+ let reqCounter = 0;
1038
+ let inFlight = null;
1039
+ let lastEditedPath;
1040
+ // Plan mode state (Phase 8)
1041
+ let planSteps = [];
1042
+ // Sub-agent queue state (Phase 18): enforce sequential execution on single-GPU setups.
1043
+ let subTaskSeq = 0;
1044
+ let subTaskQueuePending = 0;
1045
+ let subTaskQueueTail = Promise.resolve();
1046
+ const enqueueSubTask = async (runner) => {
1047
+ const queuePosition = subTaskQueuePending + 1;
1048
+ subTaskQueuePending += 1;
1049
+ const waitFor = subTaskQueueTail;
1050
+ let release;
1051
+ subTaskQueueTail = new Promise((resolve) => {
1052
+ release = resolve;
1053
+ });
1054
+ try {
1055
+ await waitFor;
1056
+ return await runner(queuePosition);
1057
+ }
1058
+ finally {
1059
+ subTaskQueuePending = Math.max(0, subTaskQueuePending - 1);
1060
+ release();
1061
+ }
1062
+ };
1063
+ const summarizeReplayDelta = async (beforeIds) => {
1064
+ if (!replay || !beforeIds)
1065
+ return [];
1066
+ const rows = await replay.list(10000);
1067
+ const byFile = new Map();
1068
+ for (const row of rows) {
1069
+ if (beforeIds.has(row.id))
1070
+ continue;
1071
+ byFile.set(row.filePath, (byFile.get(row.filePath) ?? 0) + 1);
1072
+ }
1073
+ return [...byFile.entries()]
1074
+ .sort((a, b) => a[0].localeCompare(b[0]))
1075
+ .map(([filePath, edits]) => `${filePath}${edits > 1 ? ` (${edits} edits)` : ''}`);
1076
+ };
1077
+ const runSpawnTaskCore = async (args, options) => {
1078
+ if (!spawnTaskEnabled) {
1079
+ throw new Error('spawn_task: disabled in this session');
1080
+ }
1081
+ const task = typeof args?.task === 'string' ? args.task.trim() : '';
1082
+ if (!task) {
1083
+ throw new Error('spawn_task: missing task');
1084
+ }
1085
+ const defaults = cfg.sub_agents ?? {};
1086
+ const taskId = ++subTaskSeq;
1087
+ const emitStatus = options?.emitStatus ?? (() => { });
1088
+ const maxIterations = Number.isFinite(args?.max_iterations)
1089
+ ? Math.max(1, Math.floor(Number(args.max_iterations)))
1090
+ : (Number.isFinite(defaults.max_iterations)
1091
+ ? Math.max(1, Math.floor(Number(defaults.max_iterations)))
1092
+ : 10);
1093
+ const timeoutSec = Number.isFinite(args?.timeout_sec)
1094
+ ? Math.max(1, Math.floor(Number(args.timeout_sec)))
1095
+ : (Number.isFinite(defaults.timeout_sec)
1096
+ ? Math.max(1, Math.floor(Number(defaults.timeout_sec)))
1097
+ : Math.max(30, Math.min(600, cfg.timeout)));
1098
+ const subMaxTokens = Number.isFinite(args?.max_tokens)
1099
+ ? Math.max(128, Math.floor(Number(args.max_tokens)))
1100
+ : (Number.isFinite(defaults.max_tokens)
1101
+ ? Math.max(128, Math.floor(Number(defaults.max_tokens)))
1102
+ : Math.max(256, Math.min(8192, maxTokens)));
1103
+ const resultTokenCap = Number.isFinite(defaults.result_token_cap)
1104
+ ? Math.max(256, Math.floor(Number(defaults.result_token_cap)))
1105
+ : DEFAULT_SUB_AGENT_RESULT_TOKEN_CAP;
1106
+ const parentApproval = cfg.approval_mode ?? 'default';
1107
+ const rawApproval = normalizeApprovalMode(args?.approval_mode)
1108
+ ?? normalizeApprovalMode(defaults.approval_mode)
1109
+ ?? parentApproval;
1110
+ // Sub-agents cannot escalate beyond the parent's approval mode.
1111
+ const approvalMode = capApprovalMode(rawApproval, parentApproval);
1112
+ const requestedModel = typeof args?.model === 'string' && args.model.trim()
1113
+ ? args.model.trim()
1114
+ : (typeof defaults.model === 'string' && defaults.model.trim() ? defaults.model.trim() : model);
1115
+ const requestedEndpoint = typeof args?.endpoint === 'string' && args.endpoint.trim()
1116
+ ? args.endpoint.trim()
1117
+ : (typeof defaults.endpoint === 'string' && defaults.endpoint.trim() ? defaults.endpoint.trim() : cfg.endpoint);
1118
+ const requestedSystemPrompt = typeof args?.system_prompt === 'string' && args.system_prompt.trim()
1119
+ ? args.system_prompt.trim()
1120
+ : (typeof defaults.system_prompt === 'string' && defaults.system_prompt.trim()
1121
+ ? defaults.system_prompt.trim()
1122
+ : DEFAULT_SUB_AGENT_SYSTEM_PROMPT);
1123
+ const cwd = cfg.dir ?? process.cwd();
1124
+ const ctxFiles = await buildSubAgentContextBlock(cwd, args?.context_files);
1125
+ let delegatedInstruction = task;
1126
+ // Explicitly inject cwd into the delegated task so the sub-agent knows where to work.
1127
+ delegatedInstruction += `\n\nIMPORTANT: Your working directory is "${cwd}". Create ALL files inside this directory using relative paths. Do NOT create files or directories outside this path.`;
1128
+ if (ctxFiles.block) {
1129
+ delegatedInstruction += `\n\n[Delegated context files]\n${ctxFiles.block}`;
1130
+ }
1131
+ if (ctxFiles.skipped.length) {
1132
+ delegatedInstruction += `\n\n[context skipped]\n- ${ctxFiles.skipped.join('\n- ')}`;
1133
+ }
1134
+ return await enqueueSubTask(async (queuePosition) => {
1135
+ if (queuePosition > 1) {
1136
+ emitStatus(taskId, 'queued', `position ${queuePosition}`);
1137
+ }
1138
+ const startedAt = Date.now();
1139
+ emitStatus(taskId, 'running', `${task.slice(0, 80)}${task.length > 80 ? '…' : ''}`);
1140
+ const replayBeforeIds = replay
1141
+ ? new Set((await replay.list(10000)).map((row) => row.id))
1142
+ : null;
1143
+ const subConfig = {
1144
+ ...cfg,
1145
+ endpoint: requestedEndpoint,
1146
+ model: requestedModel,
1147
+ max_iterations: maxIterations,
1148
+ max_tokens: subMaxTokens,
1149
+ timeout: timeoutSec,
1150
+ approval_mode: approvalMode,
1151
+ // Sub-agent inherits parent's no_confirm. If parent runs --no-confirm,
1152
+ // sub-agent also auto-confirms. Don't override based on approval_mode alone
1153
+ // (that made auto-edit behave like yolo only for sub-agents).
1154
+ no_confirm: cfg.no_confirm || approvalMode === 'yolo',
1155
+ system_prompt_override: requestedSystemPrompt,
1156
+ };
1157
+ if (defaults.inherit_context_file === false) {
1158
+ subConfig.no_context = true;
1159
+ }
1160
+ const subRuntime = {
1161
+ replay,
1162
+ lens,
1163
+ vault: defaults.inherit_vault === false ? undefined : vault,
1164
+ };
1165
+ const sameEndpoint = requestedEndpoint.replace(/\/+$/, '') === cfg.endpoint.replace(/\/+$/, '');
1166
+ if (sameEndpoint && opts.runtime?.client) {
1167
+ subRuntime.client = opts.runtime.client;
1168
+ }
1169
+ const subSession = await createSession({
1170
+ config: subConfig,
1171
+ apiKey: opts.apiKey,
1172
+ confirm: opts.confirm,
1173
+ confirmProvider: opts.confirmProvider,
1174
+ runtime: subRuntime,
1175
+ allowSpawnTask: false,
1176
+ });
1177
+ let subTurns = 0;
1178
+ let subToolCalls = 0;
1179
+ let failedMessage = '';
1180
+ let resultText = '';
1181
+ try {
1182
+ const subResult = await subSession.ask(delegatedInstruction, {
1183
+ signal: options?.signal,
1184
+ onTurnEnd: (ev) => {
1185
+ subTurns = ev.turn;
1186
+ subToolCalls = ev.toolCalls;
1187
+ emitStatus(taskId, 'running', `turn ${ev.turn}/${maxIterations}`);
1188
+ },
1189
+ });
1190
+ subTurns = subResult.turns;
1191
+ subToolCalls = subResult.toolCalls;
1192
+ resultText = subResult.text;
1193
+ }
1194
+ catch (e) {
1195
+ failedMessage = e?.message ?? String(e);
1196
+ }
1197
+ finally {
1198
+ await subSession.close().catch(() => { });
1199
+ }
1200
+ const duration = Date.now() - startedAt;
1201
+ const filesChanged = await summarizeReplayDelta(replayBeforeIds);
1202
+ if (failedMessage) {
1203
+ emitStatus(taskId, 'failed', failedMessage.slice(0, 120));
1204
+ return [
1205
+ `[sub-agent] status=failed`,
1206
+ `task: ${task}`,
1207
+ `duration: ${formatDurationMs(duration)}`,
1208
+ `model: ${requestedModel}`,
1209
+ `endpoint: ${requestedEndpoint}`,
1210
+ `approval_mode: ${approvalMode}`,
1211
+ `error: ${failedMessage}`,
1212
+ filesChanged.length ? `files_changed: ${filesChanged.join(', ')}` : 'files_changed: none',
1213
+ ].join('\n');
1214
+ }
1215
+ const capped = capTextByApproxTokens(resultText, resultTokenCap);
1216
+ emitStatus(taskId, 'completed', `${subTurns} turns, ${subToolCalls} tool calls`);
1217
+ return [
1218
+ `[sub-agent] status=completed`,
1219
+ `task: ${task}`,
1220
+ `duration: ${formatDurationMs(duration)}`,
1221
+ `model: ${requestedModel}`,
1222
+ `endpoint: ${requestedEndpoint}`,
1223
+ `approval_mode: ${approvalMode}`,
1224
+ `turns: ${subTurns}`,
1225
+ `tool_calls: ${subToolCalls}`,
1226
+ `files_changed: ${filesChanged.length ? filesChanged.join(', ') : 'none'}`,
1227
+ capped.truncated ? `[sub-agent] summarized result capped to ~${resultTokenCap} tokens` : `[sub-agent] summarized result within cap`,
1228
+ `result:\n${capped.text}`,
1229
+ ].join('\n');
1230
+ });
1231
+ };
1232
+ // Build a ToolContext — shared between plan-step execution and the agent loop.
1233
+ const buildToolCtx = (overrides) => {
1234
+ const defaultConfirmBridge = opts.confirmProvider
1235
+ ? async (prompt) => opts.confirmProvider.confirm({
1236
+ tool: '', args: {}, summary: prompt, mode: cfg.approval_mode,
1237
+ })
1238
+ : opts.confirm;
1239
+ return {
1240
+ cwd: cfg.dir ?? process.cwd(),
1241
+ noConfirm: cfg.no_confirm || cfg.approval_mode === 'yolo',
1242
+ dryRun: cfg.dry_run,
1243
+ mode: cfg.mode ?? 'code',
1244
+ confirm: overrides?.confirmBridge ?? defaultConfirmBridge,
1245
+ replay,
1246
+ vault,
1247
+ lens,
1248
+ signal: overrides?.signal ?? inFlight?.signal,
1249
+ onMutation: overrides?.onMutation ?? ((absPath) => { lastEditedPath = absPath; }),
1250
+ };
1251
+ };
1252
+ const executePlanStep = async (index) => {
1253
+ if (!planSteps.length)
1254
+ return ['No plan steps to execute.'];
1255
+ const toExec = index != null
1256
+ ? planSteps.filter(s => s.index === index && s.blocked && !s.executed)
1257
+ : planSteps.filter(s => s.blocked && !s.executed);
1258
+ if (!toExec.length)
1259
+ return ['No pending blocked steps to execute.'];
1260
+ const ctx = buildToolCtx();
1261
+ const results = [];
1262
+ for (const step of toExec) {
1263
+ const fn = tools[step.tool];
1264
+ try {
1265
+ let content = '';
1266
+ if (fn) {
1267
+ const value = await fn(ctx, step.args);
1268
+ content = typeof value === 'string' ? value : JSON.stringify(value);
1269
+ }
1270
+ else if (step.tool === 'spawn_task') {
1271
+ content = await runSpawnTaskCore(step.args, { signal: inFlight?.signal });
1272
+ }
1273
+ else if (LSP_TOOL_NAME_SET.has(step.tool) && lspManager) {
1274
+ if (step.tool === 'lsp_diagnostics') {
1275
+ content = await lspManager.getDiagnostics(typeof step.args?.path === 'string' ? step.args.path : undefined, typeof step.args?.severity === 'number' ? step.args.severity : undefined);
1276
+ }
1277
+ else if (step.tool === 'lsp_symbols') {
1278
+ content = await lspManager.getSymbols(String(step.args?.path ?? ''));
1279
+ }
1280
+ else if (step.tool === 'lsp_hover') {
1281
+ content = await lspManager.getHover(String(step.args?.path ?? ''), Number(step.args?.line ?? 0), Number(step.args?.character ?? 0));
1282
+ }
1283
+ else if (step.tool === 'lsp_definition') {
1284
+ content = await lspManager.getDefinition(String(step.args?.path ?? ''), Number(step.args?.line ?? 0), Number(step.args?.character ?? 0));
1285
+ }
1286
+ else if (step.tool === 'lsp_references') {
1287
+ content = await lspManager.getReferences(String(step.args?.path ?? ''), Number(step.args?.line ?? 0), Number(step.args?.character ?? 0), typeof step.args?.max_results === 'number' ? step.args.max_results : 50);
1288
+ }
1289
+ }
1290
+ else if (mcpManager?.hasTool(step.tool)) {
1291
+ const callArgs = step.args && typeof step.args === 'object' && !Array.isArray(step.args)
1292
+ ? step.args
1293
+ : {};
1294
+ content = await mcpManager.callTool(step.tool, callArgs);
1295
+ }
1296
+ else {
1297
+ throw new Error(`unknown tool: ${step.tool}`);
1298
+ }
1299
+ step.executed = true;
1300
+ step.result = content;
1301
+ results.push(`#${step.index} ✓ ${step.summary}`);
1302
+ // Inject the result into conversation so the model knows it was executed
1303
+ messages.push({
1304
+ role: 'user',
1305
+ content: `[Plan step #${step.index} executed] ${step.tool}: ${content.slice(0, 500)}`,
1306
+ });
1307
+ }
1308
+ catch (e) {
1309
+ const msg = e?.message ?? String(e);
1310
+ step.result = `ERROR: ${msg}`;
1311
+ results.push(`#${step.index} ✗ ${step.summary}: ${msg}`);
1312
+ }
1313
+ }
1314
+ return results;
1315
+ };
1316
+ const clearPlan = () => {
1317
+ planSteps = [];
1318
+ };
1319
+ const compactHistory = async (opts) => {
1320
+ const beforeMessages = messages.length;
1321
+ const beforeTokens = estimateTokensFromMessages(messages);
1322
+ let compacted;
1323
+ if (opts?.hard) {
1324
+ const sys = messages[0]?.role === 'system' ? [messages[0]] : [];
1325
+ const tail = messages.slice(-2);
1326
+ compacted = [...sys, ...tail];
1327
+ }
1328
+ else {
1329
+ compacted = enforceContextBudget({
1330
+ messages,
1331
+ contextWindow,
1332
+ maxTokens,
1333
+ minTailMessages: 12,
1334
+ compactAt: cfg.compact_at ?? 0.8,
1335
+ toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
1336
+ });
1337
+ }
1338
+ const compactedByRefs = new Set(compacted);
1339
+ let dropped = messages.filter((m) => !compactedByRefs.has(m));
1340
+ if (opts?.topic) {
1341
+ const topic = opts.topic.toLowerCase();
1342
+ dropped = dropped.filter((m) => !userContentToText(m.content ?? '').toLowerCase().includes(topic));
1343
+ const keepFromTopic = messages.filter((m) => userContentToText(m.content ?? '').toLowerCase().includes(topic));
1344
+ compacted = [...compacted, ...keepFromTopic.filter((m) => !compactedByRefs.has(m))];
1345
+ }
1346
+ const archivedToolMessages = dropped.filter((m) => m.role === 'tool').length;
1347
+ const afterMessages = compacted.length;
1348
+ const afterTokens = estimateTokensFromMessages(compacted);
1349
+ const freedTokens = Math.max(0, beforeTokens - afterTokens);
1350
+ if (!opts?.dry) {
1351
+ if (dropped.length && vault) {
1352
+ try {
1353
+ await vault.archiveToolMessages(dropped, new Map());
1354
+ await vault.note('compaction_summary', `Dropped ${dropped.length} messages (${freedTokens} tokens).`);
1355
+ }
1356
+ catch {
1357
+ // best-effort
1358
+ }
1359
+ }
1360
+ messages = compacted;
1361
+ if (dropped.length) {
1362
+ messages.push({ role: 'system', content: `[compacted: ${dropped.length} messages archived to Vault - vault_search to recall]` });
1363
+ }
1364
+ }
1365
+ return {
1366
+ beforeMessages,
1367
+ afterMessages,
1368
+ freedTokens,
1369
+ archivedToolMessages,
1370
+ droppedMessages: dropped.length,
1371
+ dryRun: !!opts?.dry,
1372
+ };
1373
+ };
1374
+ const cumulativeUsage = { prompt: 0, completion: 0 };
1375
+ const turnDurationsMs = [];
1376
+ const ttftSamplesMs = [];
1377
+ const ppSamples = [];
1378
+ const tgSamples = [];
1379
+ let lastTurnMetrics;
1380
+ let lastServerHealth;
1381
+ let lastModelsProbeMs = 0;
1382
+ const capturesDir = path.join(stateDir(), 'captures');
1383
+ let captureEnabled = false;
1384
+ let capturePath;
1385
+ let lastCaptureRecord = null;
1386
+ const defaultCapturePath = () => {
1387
+ const stamp = new Date().toISOString().replace(/[:.]/g, '-');
1388
+ return path.join(capturesDir, `${stamp}.jsonl`);
1389
+ };
1390
+ const appendCaptureRecord = async (record, outPath) => {
1391
+ await fs.mkdir(path.dirname(outPath), { recursive: true });
1392
+ await fs.appendFile(outPath, JSON.stringify(record) + '\n', 'utf8');
1393
+ };
1394
+ const wireCaptureHook = () => {
1395
+ if (typeof client.setExchangeHook !== 'function')
1396
+ return;
1397
+ client.setExchangeHook(async (record) => {
1398
+ lastCaptureRecord = record;
1399
+ if (!captureEnabled)
1400
+ return;
1401
+ const target = capturePath || defaultCapturePath();
1402
+ capturePath = target;
1403
+ await appendCaptureRecord(record, target);
1404
+ });
1405
+ };
1406
+ wireCaptureHook();
1407
+ const replayEnabled = cfg.trifecta?.enabled !== false && cfg.trifecta?.replay?.enabled !== false;
1408
+ const replay = replayEnabled ? (opts.runtime?.replay ?? new ReplayStore()) : undefined;
1409
+ // Init is best-effort; Replay must never crash the agent.
1410
+ if (replayEnabled && !opts.runtime?.replay && replay) {
1411
+ await replay.init().catch((e) => {
1412
+ console.warn(`[warn] replay init failed: ${e?.message ?? e}`);
1413
+ });
1414
+ }
1415
+ const cancel = () => {
1416
+ try {
1417
+ inFlight?.abort();
1418
+ }
1419
+ catch {
1420
+ // ignore
1421
+ }
1422
+ };
1423
+ const asNumber = (...values) => {
1424
+ for (const value of values) {
1425
+ if (typeof value === 'number' && Number.isFinite(value))
1426
+ return value;
1427
+ if (typeof value === 'string') {
1428
+ const n = Number(value);
1429
+ if (Number.isFinite(n))
1430
+ return n;
1431
+ }
1432
+ }
1433
+ return undefined;
1434
+ };
1435
+ const normalizeHealth = (raw) => {
1436
+ const modelName = (typeof raw?.model === 'string' ? raw.model : undefined) ??
1437
+ raw?.model?.id ??
1438
+ raw?.model?.name ??
1439
+ raw?.loaded_model ??
1440
+ raw?.model_path;
1441
+ const contextUsedTokens = asNumber(raw?.kv_cache?.used_tokens, raw?.kv_used_tokens, raw?.cache?.used_tokens, raw?.context_used, raw?.ctx_used);
1442
+ const contextTotalTokens = asNumber(raw?.kv_cache?.total_tokens, raw?.kv_total_tokens, raw?.cache?.total_tokens, raw?.context_size, raw?.ctx_size);
1443
+ const kvPct = contextUsedTokens != null && contextTotalTokens != null && contextTotalTokens > 0
1444
+ ? (contextUsedTokens / contextTotalTokens) * 100
1445
+ : asNumber(raw?.kv_cache?.pct, raw?.kv_pct);
1446
+ const pendingRequests = asNumber(raw?.pending_requests, raw?.queue?.pending, raw?.n_pending_requests, raw?.requests_pending);
1447
+ const ppTokensPerSec = asNumber(raw?.speed?.prompt_tokens_per_second, raw?.prompt_tokens_per_second, raw?.pp_tps, raw?.timings?.prompt_per_second);
1448
+ const tgTokensPerSec = asNumber(raw?.speed?.tokens_per_second, raw?.tokens_per_second, raw?.tg_tps, raw?.timings?.tokens_per_second, raw?.generation_tokens_per_second);
1449
+ const slotCount = Array.isArray(raw?.slots)
1450
+ ? raw.slots.length
1451
+ : asNumber(raw?.slot_count, raw?.n_slots);
1452
+ return {
1453
+ ok: true,
1454
+ checkedAt: new Date().toISOString(),
1455
+ model: modelName ? String(modelName) : undefined,
1456
+ status: typeof raw?.status === 'string' ? raw.status : 'ok',
1457
+ contextUsedTokens,
1458
+ contextTotalTokens,
1459
+ kvPct,
1460
+ pendingRequests,
1461
+ ppTokensPerSec,
1462
+ tgTokensPerSec,
1463
+ slotCount,
1464
+ raw,
1465
+ };
1466
+ };
1467
+ const refreshServerHealth = async () => {
1468
+ if (typeof client.health !== 'function') {
1469
+ return null;
1470
+ }
1471
+ try {
1472
+ const raw = await client.health();
1473
+ const snapshot = normalizeHealth(raw);
1474
+ lastServerHealth = snapshot;
1475
+ return snapshot;
1476
+ }
1477
+ catch (e) {
1478
+ const snapshot = {
1479
+ ok: false,
1480
+ checkedAt: new Date().toISOString(),
1481
+ error: e?.message ?? String(e),
1482
+ };
1483
+ if (lastServerHealth?.ok !== false) {
1484
+ console.warn(`[server] health check failed: ${snapshot.error}`);
1485
+ }
1486
+ lastServerHealth = snapshot;
1487
+ return snapshot;
1488
+ }
1489
+ };
1490
+ const listModels = async () => {
1491
+ const fresh = normalizeModelsResponse(await client.models());
1492
+ modelsList = fresh;
1493
+ return fresh.data.map((m) => m.id).filter(Boolean);
1494
+ };
1495
+ const setModel = (name) => {
1496
+ model = name;
1497
+ harness = selectHarness(model, cfg.harness && cfg.harness.trim() ? cfg.harness.trim() : undefined);
1498
+ const nextMeta = modelsList?.data?.find((m) => m.id === model);
1499
+ supportsVision = supportsVisionModel(model, nextMeta, harness);
1500
+ if (!explicitContextWindow) {
1501
+ const derived = asNumber(nextMeta?.context_window, nextMeta?.context_length, nextMeta?.max_context_length);
1502
+ if (derived && derived > 0) {
1503
+ contextWindow = derived;
1504
+ }
1505
+ }
1506
+ maxTokens = cfg.max_tokens ?? BASE_MAX_TOKENS;
1507
+ if (maxTokens === BASE_MAX_TOKENS && harness.defaults?.max_tokens && harness.defaults.max_tokens > BASE_MAX_TOKENS) {
1508
+ maxTokens = harness.defaults.max_tokens;
1509
+ }
1510
+ temperature = cfg.temperature ?? harness.defaults?.temperature ?? 0.2;
1511
+ topP = cfg.top_p ?? harness.defaults?.top_p ?? 0.95;
1512
+ };
1513
+ const setEndpoint = async (endpoint, modelName) => {
1514
+ const normalized = endpoint.replace(/\/+$/, '');
1515
+ cfg.endpoint = normalized;
1516
+ if (opts.runtime?.client) {
1517
+ opts.runtime.client.setEndpoint?.(normalized);
1518
+ client = opts.runtime.client;
1519
+ }
1520
+ else {
1521
+ client = new OpenAIClient(normalized, opts.apiKey, cfg.verbose);
1522
+ }
1523
+ if (typeof client.setVerbose === 'function') {
1524
+ client.setVerbose(cfg.verbose);
1525
+ }
1526
+ wireCaptureHook();
1527
+ modelsList = normalizeModelsResponse(await client.models());
1528
+ const chosen = modelName?.trim()
1529
+ ? modelName.trim()
1530
+ : (modelsList.data.find((m) => m.id === model)?.id ?? await autoPickModel(client, modelsList));
1531
+ setModel(chosen);
1532
+ };
1533
+ const captureOn = async (filePath) => {
1534
+ const target = filePath?.trim() ? path.resolve(filePath) : defaultCapturePath();
1535
+ await fs.mkdir(path.dirname(target), { recursive: true });
1536
+ await fs.appendFile(target, '', 'utf8');
1537
+ captureEnabled = true;
1538
+ capturePath = target;
1539
+ return target;
1540
+ };
1541
+ const captureOff = () => {
1542
+ captureEnabled = false;
1543
+ };
1544
+ const captureLast = async (filePath) => {
1545
+ if (!lastCaptureRecord) {
1546
+ throw new Error('No captured request/response pair is available yet.');
1547
+ }
1548
+ const target = filePath?.trim()
1549
+ ? path.resolve(filePath)
1550
+ : (capturePath || defaultCapturePath());
1551
+ await appendCaptureRecord(lastCaptureRecord, target);
1552
+ return target;
1553
+ };
1554
+ const listMcpServers = () => {
1555
+ return mcpManager?.listServers() ?? [];
1556
+ };
1557
+ const listMcpTools = (opts) => {
1558
+ return mcpManager?.listTools(opts) ?? [];
1559
+ };
1560
+ const restartMcpServer = async (name) => {
1561
+ if (!mcpManager)
1562
+ return { ok: false, message: 'MCP is not configured' };
1563
+ return await mcpManager.restartServer(String(name || '').trim());
1564
+ };
1565
+ const enableMcpTool = (name) => {
1566
+ if (!mcpManager)
1567
+ return false;
1568
+ return mcpManager.enableTool(String(name || '').trim());
1569
+ };
1570
+ const disableMcpTool = (name) => {
1571
+ if (!mcpManager)
1572
+ return false;
1573
+ return mcpManager.disableTool(String(name || '').trim());
1574
+ };
1575
+ const mcpWarnings = () => {
1576
+ return mcpManager?.getWarnings() ?? [];
1577
+ };
1578
+ const listLspServers = () => {
1579
+ return lspManager?.listServers() ?? [];
1580
+ };
1581
+ const close = async () => {
1582
+ await mcpManager?.close().catch(() => { });
1583
+ await lspManager?.close().catch(() => { });
1584
+ vault?.close();
1585
+ lens?.close();
1586
+ };
1587
+ const setVerbose = (on) => {
1588
+ cfg.verbose = !!on;
1589
+ if (typeof client.setVerbose === 'function') {
1590
+ client.setVerbose(cfg.verbose);
1591
+ }
1592
+ };
1593
+ const getPerfSummary = () => {
1594
+ const totalPromptTokens = cumulativeUsage.prompt;
1595
+ const totalCompletionTokens = cumulativeUsage.completion;
1596
+ const totalTokens = totalPromptTokens + totalCompletionTokens;
1597
+ const sorted = [...turnDurationsMs].sort((a, b) => a - b);
1598
+ const quantile = (q) => {
1599
+ if (!sorted.length)
1600
+ return 0;
1601
+ const idx = Math.max(0, Math.min(sorted.length - 1, Math.floor((sorted.length - 1) * q)));
1602
+ return sorted[idx];
1603
+ };
1604
+ const avg = (arr) => (arr.length ? arr.reduce((a, b) => a + b, 0) / arr.length : undefined);
1605
+ return {
1606
+ turns: turnDurationsMs.length,
1607
+ totalTokens,
1608
+ totalPromptTokens,
1609
+ totalCompletionTokens,
1610
+ avgTtftMs: avg(ttftSamplesMs),
1611
+ avgTtcMs: avg(turnDurationsMs) ?? 0,
1612
+ p50TtcMs: quantile(0.5),
1613
+ p95TtcMs: quantile(0.95),
1614
+ avgPpTokensPerSec: avg(ppSamples),
1615
+ avgTgTokensPerSec: avg(tgSamples),
1616
+ };
1617
+ };
1618
+ const maybeAutoDetectModelChange = async () => {
1619
+ if (cfg.auto_detect_model_change === false)
1620
+ return;
1621
+ const now = Date.now();
1622
+ if (now - lastModelsProbeMs < 30_000)
1623
+ return;
1624
+ lastModelsProbeMs = now;
1625
+ let fresh;
1626
+ try {
1627
+ fresh = normalizeModelsResponse(await client.models());
1628
+ }
1629
+ catch {
1630
+ const frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
1631
+ const spinnerStart = Date.now();
1632
+ let spinnerIdx = 0;
1633
+ let spinnerTimer;
1634
+ if (process.stderr.isTTY) {
1635
+ spinnerTimer = setInterval(() => {
1636
+ const elapsedSec = Math.floor((Date.now() - spinnerStart) / 1000);
1637
+ const frame = frames[spinnerIdx % frames.length];
1638
+ spinnerIdx++;
1639
+ process.stderr.write(`\r${frame} Server unavailable - waiting for reconnect (${elapsedSec}s)...`);
1640
+ }, 120);
1641
+ }
1642
+ else {
1643
+ console.warn('[model] Server unavailable - waiting for reconnect...');
1644
+ }
1645
+ try {
1646
+ await client.waitForReady({ timeoutMs: 120_000, pollMs: 2_000 });
1647
+ fresh = normalizeModelsResponse(await client.models());
1648
+ console.warn('[model] Reconnected to server.');
1649
+ }
1650
+ catch {
1651
+ return;
1652
+ }
1653
+ finally {
1654
+ if (spinnerTimer) {
1655
+ clearInterval(spinnerTimer);
1656
+ process.stderr.write('\r\x1b[K');
1657
+ }
1658
+ }
1659
+ }
1660
+ modelsList = fresh;
1661
+ if (!fresh.data.length)
1662
+ return;
1663
+ const exists = fresh.data.some((m) => m.id === model);
1664
+ if (exists)
1665
+ return;
1666
+ const previousModel = model;
1667
+ const nextModel = fresh.data[0].id;
1668
+ setModel(nextModel);
1669
+ messages.push({
1670
+ role: 'system',
1671
+ content: '[system] Model changed mid-session. Previous context may not transfer perfectly.'
1672
+ });
1673
+ console.warn(`[model] Server model changed: ${previousModel} → ${nextModel} - switching harness to ${harness.id}`);
1674
+ };
1675
+ const ask = async (instruction, hooks) => {
1676
+ // Harness can override max_iterations for models that make bad decisions (§4i)
1677
+ const maxIters = harness.quirks.maxIterationsOverride
1678
+ ? Math.min(cfg.max_iterations, harness.quirks.maxIterationsOverride)
1679
+ : cfg.max_iterations;
1680
+ const wallStart = Date.now();
1681
+ // Prepend session meta to the first user instruction (§9b: variable context
1682
+ // goes in first user message, not system prompt, to preserve KV cache).
1683
+ // This avoids two consecutive user messages without an assistant response.
1684
+ let userContent = instruction;
1685
+ if (sessionMetaPending) {
1686
+ if (typeof instruction === 'string') {
1687
+ userContent = `${sessionMetaPending}\n\n${instruction}`;
1688
+ }
1689
+ else {
1690
+ userContent = [{ type: 'text', text: sessionMetaPending }, ...instruction];
1691
+ }
1692
+ sessionMetaPending = null;
1693
+ }
1694
+ messages.push({ role: 'user', content: userContent });
1695
+ const hookObj = typeof hooks === 'function' ? { onToken: hooks } : hooks ?? {};
1696
+ let turns = 0;
1697
+ let toolCalls = 0;
1698
+ // Read-only tool call budgets (§ anti-scan guardrails)
1699
+ const READ_ONLY_PER_TURN_CAP = 6;
1700
+ const READ_BUDGET_WARN = 15;
1701
+ const READ_BUDGET_HARD = harness.quirks.readBudget ?? 20;
1702
+ let cumulativeReadOnlyCalls = 0;
1703
+ // Directory scan detection: track unique file paths per parent dir.
1704
+ // Only counts distinct files (re-reads of the same file after editing are normal).
1705
+ const readDirFiles = new Map();
1706
+ const blockedDirs = new Set();
1707
+ // Same-search detection: track search= params across read_file calls
1708
+ const searchTermFiles = new Map(); // search term → set of file paths
1709
+ // identical tool call signature counts across this ask() run
1710
+ const sigCounts = new Map();
1711
+ const toolNameByCallId = new Map();
1712
+ // Loop-break helper state: bump mutationVersion whenever a tool mutates files.
1713
+ // We also record the mutationVersion at which a given signature was last seen.
1714
+ let mutationVersion = 0;
1715
+ const mutationVersionBySig = new Map();
1716
+ // Consecutive-repeat tracking for read-only tools: only count identical calls
1717
+ // that happen back-to-back with no other tool calls in between.
1718
+ let lastTurnSigs = new Set();
1719
+ const consecutiveCounts = new Map();
1720
+ let lastPassiveVaultQuery = '';
1721
+ let malformedCount = 0;
1722
+ const maybeInjectVaultContext = async () => {
1723
+ if (!vault || vaultMode !== 'passive')
1724
+ return;
1725
+ let lastUser = null;
1726
+ for (let j = messages.length - 1; j >= 0; j--) {
1727
+ if (messages[j].role === 'user') {
1728
+ lastUser = messages[j];
1729
+ break;
1730
+ }
1731
+ }
1732
+ const userText = userContentToText((lastUser?.content ?? '')).trim();
1733
+ if (!userText)
1734
+ return;
1735
+ const query = userText.slice(0, 200);
1736
+ if (query === lastPassiveVaultQuery)
1737
+ return;
1738
+ const hits = await vault.search(query, 4);
1739
+ if (!hits.length)
1740
+ return;
1741
+ const lines = hits.map((r) => `${r.updatedAt} ${r.kind} ${r.key ?? r.tool ?? r.id} ${String(r.value ?? r.snippet ?? '').replace(/\s+/g, ' ').slice(0, 180)}`);
1742
+ if (!lines.length)
1743
+ return;
1744
+ lastPassiveVaultQuery = query;
1745
+ messages.push({
1746
+ role: 'user',
1747
+ content: `[Trifecta Vault (passive)] Relevant entries for "${query}":\n${lines.join('\n')}`
1748
+ });
1749
+ };
1750
+ const archiveToolOutputForVault = async (msg) => {
1751
+ if (!lens || !vault || msg.role !== 'tool' || typeof msg.content !== 'string')
1752
+ return msg;
1753
+ const tool = msg.tool_call_id ? toolNameByCallId.get(msg.tool_call_id) : undefined;
1754
+ if (!tool)
1755
+ return msg;
1756
+ try {
1757
+ const compact = await lens.summarizeToolOutput(msg.content, tool);
1758
+ if (typeof compact === 'string' && compact.length && compact.length < msg.content.length) {
1759
+ return { ...msg, content: compact };
1760
+ }
1761
+ }
1762
+ catch {
1763
+ // ignore and store raw tool output
1764
+ }
1765
+ return msg;
1766
+ };
1767
+ const persistFailure = async (error, contextLine) => {
1768
+ if (!vault)
1769
+ return;
1770
+ const reason = error instanceof Error ? error.message : String(error);
1771
+ // Strip absolute paths from failure messages to prevent cross-project leaks in vault.
1772
+ // Replace /home/.../project/file.ts with just file.ts (relative to cwd) or the basename.
1773
+ const sanitized = sanitizePathsInMessage(`agent abort: ${contextLine ?? ''} ${reason}`, cfg.dir ?? process.cwd());
1774
+ const compact = lens ? await lens.summarizeFailureMessage(sanitized) : sanitized;
1775
+ try {
1776
+ await vault.note('agent failure', compact);
1777
+ }
1778
+ catch {
1779
+ // best-effort only
1780
+ }
1781
+ };
1782
+ const emitSubAgentStatus = (taskId, status, detail) => {
1783
+ if (!hookObj.onToken)
1784
+ return;
1785
+ const tail = detail ? ` — ${detail}` : '';
1786
+ hookObj.onToken(`\n[sub-agent #${taskId}] ${status}${tail}\n`);
1787
+ };
1788
+ const buildLspLensSymbolOutput = async (filePathRaw) => {
1789
+ if (!lspManager)
1790
+ return '[lsp] unavailable';
1791
+ const semantic = await lspManager.getSymbols(filePathRaw);
1792
+ if (!lens)
1793
+ return semantic;
1794
+ const cwd = cfg.dir ?? process.cwd();
1795
+ const absPath = filePathRaw.startsWith('/') ? filePathRaw : path.resolve(cwd, filePathRaw);
1796
+ const body = await fs.readFile(absPath, 'utf8').catch(() => '');
1797
+ if (!body)
1798
+ return semantic;
1799
+ const projection = await lens.projectFile(absPath, body).catch(() => '');
1800
+ const structural = extractLensBody(projection);
1801
+ if (!structural)
1802
+ return semantic;
1803
+ return `${semantic}\n\n[lens] Structural skeleton:\n${structural}`;
1804
+ };
1805
+ const runSpawnTask = async (args) => {
1806
+ return await runSpawnTaskCore(args, {
1807
+ signal: hookObj.signal,
1808
+ emitStatus: emitSubAgentStatus,
1809
+ });
1810
+ };
1811
+ // tool-loop
1812
+ try {
1813
+ while (turns < maxIters) {
1814
+ // Immediate bail if cancelled (Ctrl+C)
1815
+ if (inFlight?.signal?.aborted)
1816
+ break;
1817
+ turns++;
1818
+ const wallElapsed = (Date.now() - wallStart) / 1000;
1819
+ if (wallElapsed > cfg.timeout) {
1820
+ throw new Error(`session timeout exceeded (${cfg.timeout}s) after ${wallElapsed.toFixed(1)}s`);
1821
+ }
1822
+ await maybeAutoDetectModelChange();
1823
+ const beforeMsgs = messages;
1824
+ const beforeTokens = estimateTokensFromMessages(beforeMsgs);
1825
+ const compacted = enforceContextBudget({
1826
+ messages: beforeMsgs,
1827
+ contextWindow,
1828
+ maxTokens: maxTokens,
1829
+ minTailMessages: 12,
1830
+ compactAt: cfg.compact_at ?? 0.8,
1831
+ toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
1832
+ });
1833
+ const compactedDropped = beforeMsgs.length > compacted.length || estimateTokensFromMessages(compacted) < beforeTokens;
1834
+ const compactedByRefs = new Set(compacted);
1835
+ const dropped = beforeMsgs.filter((m) => !compactedByRefs.has(m));
1836
+ if (dropped.length && vault) {
1837
+ try {
1838
+ const toArchive = lens
1839
+ ? await Promise.all(dropped.map((m) => archiveToolOutputForVault(m)))
1840
+ : dropped;
1841
+ await vault.archiveToolMessages(toArchive, toolNameByCallId);
1842
+ }
1843
+ catch (e) {
1844
+ console.warn(`[warn] vault archive failed: ${e instanceof Error ? e.message : String(e)}`);
1845
+ }
1846
+ }
1847
+ messages = compacted;
1848
+ if (vaultMode === 'passive' && compactedDropped) {
1849
+ await maybeInjectVaultContext().catch(() => { });
1850
+ }
1851
+ const ac = makeAbortController();
1852
+ inFlight = ac;
1853
+ // If caller provided an AbortSignal (bench iteration timeout, etc), propagate it.
1854
+ const callerSignal = hookObj.signal;
1855
+ const onCallerAbort = () => ac.abort();
1856
+ callerSignal?.addEventListener('abort', onCallerAbort, { once: true });
1857
+ // Per-request timeout: the lesser of 120s or the remaining session wall time.
1858
+ // This prevents a single slow request from consuming the entire session budget.
1859
+ const wallRemaining = Math.max(0, cfg.timeout - (Date.now() - wallStart) / 1000);
1860
+ const reqTimeout = Math.min(120, Math.max(10, wallRemaining));
1861
+ const timer = setTimeout(() => ac.abort(), reqTimeout * 1000);
1862
+ reqCounter++;
1863
+ const turnStartMs = Date.now();
1864
+ let ttftMs;
1865
+ const onFirstDelta = () => {
1866
+ if (ttftMs === undefined) {
1867
+ ttftMs = Date.now() - turnStartMs;
1868
+ }
1869
+ hookObj.onFirstDelta?.();
1870
+ };
1871
+ let resp;
1872
+ try {
1873
+ resp = await client.chatStream({
1874
+ model,
1875
+ messages,
1876
+ tools: getToolsSchema(),
1877
+ tool_choice: 'auto',
1878
+ temperature,
1879
+ top_p: topP,
1880
+ max_tokens: maxTokens,
1881
+ extra: { cache_prompt: cfg.cache_prompt ?? true },
1882
+ signal: ac.signal,
1883
+ requestId: `r${reqCounter}`,
1884
+ onToken: hookObj.onToken,
1885
+ onFirstDelta,
1886
+ });
1887
+ }
1888
+ finally {
1889
+ clearTimeout(timer);
1890
+ callerSignal?.removeEventListener('abort', onCallerAbort);
1891
+ inFlight = null;
1892
+ }
1893
+ const ttcMs = Date.now() - turnStartMs;
1894
+ const promptTokensTurn = resp.usage?.prompt_tokens ?? 0;
1895
+ const completionTokensTurn = resp.usage?.completion_tokens ?? 0;
1896
+ // Track server-reported usage when available
1897
+ if (resp.usage) {
1898
+ cumulativeUsage.prompt += promptTokensTurn;
1899
+ cumulativeUsage.completion += completionTokensTurn;
1900
+ }
1901
+ const ppTps = ttftMs && ttftMs > 0 && promptTokensTurn > 0
1902
+ ? promptTokensTurn / (ttftMs / 1000)
1903
+ : undefined;
1904
+ const genWindowMs = Math.max(1, ttcMs - (ttftMs ?? 0));
1905
+ const tgTps = completionTokensTurn > 0
1906
+ ? completionTokensTurn / (genWindowMs / 1000)
1907
+ : undefined;
1908
+ if (ttcMs > 0)
1909
+ turnDurationsMs.push(ttcMs);
1910
+ if (ttftMs != null && ttftMs > 0)
1911
+ ttftSamplesMs.push(ttftMs);
1912
+ if (ppTps != null && Number.isFinite(ppTps) && ppTps > 0)
1913
+ ppSamples.push(ppTps);
1914
+ if (tgTps != null && Number.isFinite(tgTps) && tgTps > 0)
1915
+ tgSamples.push(tgTps);
1916
+ const slowThreshold = cfg.slow_tg_tps_threshold ?? 10;
1917
+ if (tgTps != null && Number.isFinite(tgTps) && tgTps > 0 && tgTps < slowThreshold) {
1918
+ console.warn(`[perf] Generation slowed to ${tgTps.toFixed(1)} t/s - context may be too large`);
1919
+ }
1920
+ let healthSnapshot;
1921
+ if (cfg.show_server_metrics !== false) {
1922
+ const health = await refreshServerHealth();
1923
+ if (health)
1924
+ healthSnapshot = health;
1925
+ }
1926
+ lastTurnMetrics = {
1927
+ totalMs: ttcMs,
1928
+ ttftMs,
1929
+ promptTokens: promptTokensTurn,
1930
+ completionTokens: completionTokensTurn,
1931
+ ppTokensPerSec: ppTps,
1932
+ tgTokensPerSec: tgTps,
1933
+ health: healthSnapshot,
1934
+ };
1935
+ const msg = resp.choices?.[0]?.message;
1936
+ const content = msg?.content ?? '';
1937
+ // Conditionally strip thinking blocks based on harness config (§4i).
1938
+ // Non-reasoning models (thinking.strip === false) never emit <think> blocks,
1939
+ // so stripping is a no-op — but we skip the regex work entirely.
1940
+ const st = harness.thinking.strip ? stripThinking(content) : { visible: content, thinking: '' };
1941
+ // Strip XML tool-call tag fragments that leak into visible narration
1942
+ // when llama-server partially parses Qwen/Hermes XML tool calls.
1943
+ const visible = st.visible
1944
+ .replace(/<\/?tool_call>/g, '')
1945
+ .replace(/<function=[\w.-]+>/g, '')
1946
+ .replace(/<\/function>/g, '')
1947
+ .replace(/<parameter=[\w.-]+>/g, '')
1948
+ .replace(/<\/parameter>/g, '')
1949
+ .trim();
1950
+ // Show thinking tokens in verbose mode (plan §10)
1951
+ if (cfg.verbose && st.thinking) {
1952
+ console.warn(`[thinking] ${st.thinking}`);
1953
+ }
1954
+ let toolCallsArr = msg?.tool_calls;
1955
+ // For models with unreliable tool_calls arrays, validate entries and
1956
+ // fall through to content parsing if they look malformed (§4i).
1957
+ if (toolCallsArr?.length && !harness.toolCalls.reliableToolCallsArray) {
1958
+ const hasValid = toolCallsArr.some(tc => tc.function?.name && typeof tc.function.name === 'string' && tc.function.name.length > 0);
1959
+ if (!hasValid) {
1960
+ if (cfg.verbose) {
1961
+ console.warn(`[harness] tool_calls array present but no valid entries (reliableToolCallsArray=false), trying content fallback`);
1962
+ }
1963
+ toolCallsArr = undefined;
1964
+ }
1965
+ }
1966
+ if ((!toolCallsArr || !toolCallsArr.length) && content) {
1967
+ const fallback = parseToolCallsFromContent(content);
1968
+ if (fallback?.length) {
1969
+ toolCallsArr = fallback;
1970
+ if (cfg.verbose) {
1971
+ console.warn(`[harness] extracted ${fallback.length} tool call(s) from content (contentFallbackLikely=${harness.toolCalls.contentFallbackLikely})`);
1972
+ }
1973
+ }
1974
+ }
1975
+ // Strip markdown code fences from tool arguments if harness says model does this
1976
+ if (toolCallsArr?.length && harness.quirks.emitsMarkdownInToolArgs) {
1977
+ for (const tc of toolCallsArr) {
1978
+ if (tc.function?.arguments) {
1979
+ tc.function.arguments = stripMarkdownFences(tc.function.arguments);
1980
+ }
1981
+ }
1982
+ }
1983
+ if (toolCallsArr && toolCallsArr.length) {
1984
+ // Deduplicate ghost tool calls: if llama-server's XML parser splits one
1985
+ // tool call into two entries (one with full args, one empty/partial),
1986
+ // drop the empty one. Only removes entries where a richer version of the
1987
+ // same tool name exists with strictly more params. Preserves genuine
1988
+ // parallel calls (e.g. 13x list_dir with same args = intentional).
1989
+ if (toolCallsArr.length > 1) {
1990
+ const byName = new Map();
1991
+ for (const tc of toolCallsArr) {
1992
+ const n = tc.function?.name ?? '';
1993
+ let argCount = 0;
1994
+ try {
1995
+ argCount = Object.keys(JSON.parse(tc.function?.arguments ?? '{}')).length;
1996
+ }
1997
+ catch { }
1998
+ if (!byName.has(n))
1999
+ byName.set(n, []);
2000
+ byName.get(n).push({ tc, argCount });
2001
+ }
2002
+ const deduped = [];
2003
+ for (const [, group] of byName) {
2004
+ if (group.length > 1) {
2005
+ const maxArgs = Math.max(...group.map(g => g.argCount));
2006
+ // Drop entries with strictly fewer args than the richest (ghost duplicates).
2007
+ // Keep ALL entries that have the max arg count (genuine parallel calls).
2008
+ for (const g of group) {
2009
+ if (g.argCount >= maxArgs || maxArgs === 0) {
2010
+ deduped.push(g.tc);
2011
+ }
2012
+ }
2013
+ }
2014
+ else {
2015
+ deduped.push(group[0].tc);
2016
+ }
2017
+ }
2018
+ if (deduped.length < toolCallsArr.length) {
2019
+ if (cfg.verbose)
2020
+ console.warn(`[dedup] dropped ${toolCallsArr.length - deduped.length} ghost tool call(s)`);
2021
+ }
2022
+ toolCallsArr = deduped;
2023
+ }
2024
+ // Newline after model narration before tool execution, so the next
2025
+ // narration chunk starts on a fresh line (avoids wall-of-text output).
2026
+ if (visible && hookObj.onToken)
2027
+ hookObj.onToken('\n');
2028
+ toolCalls += toolCallsArr.length;
2029
+ messages.push({ role: 'assistant', content: visible || '', tool_calls: toolCallsArr });
2030
+ // sigCounts is scoped to the entire ask() run (see above)
2031
+ // Bridge ConfirmationProvider → legacy confirm callback for tools.
2032
+ // If a ConfirmationProvider is given, wrap it; otherwise fall back to raw callback.
2033
+ // The bridge accepts an optional context object for rich confirm data.
2034
+ const confirmBridge = opts.confirmProvider
2035
+ ? async (prompt, bridgeCtx) => opts.confirmProvider.confirm({
2036
+ tool: bridgeCtx?.tool ?? '', args: bridgeCtx?.args ?? {}, summary: prompt,
2037
+ diff: bridgeCtx?.diff, mode: cfg.approval_mode,
2038
+ })
2039
+ : opts.confirm;
2040
+ const ctx = buildToolCtx({
2041
+ signal: ac.signal,
2042
+ confirmBridge,
2043
+ onMutation: (absPath) => {
2044
+ lastEditedPath = absPath;
2045
+ mutationVersion++;
2046
+ },
2047
+ });
2048
+ const isReadOnlyToolDynamic = (toolName) => {
2049
+ return isReadOnlyTool(toolName) || LSP_TOOL_NAME_SET.has(toolName) || Boolean(mcpManager?.isToolReadOnly(toolName));
2050
+ };
2051
+ const fileMutationsInTurn = toolCallsArr.filter((tc) => FILE_MUTATION_TOOL_SET.has(tc.function?.name)).length;
2052
+ if (fileMutationsInTurn >= 3 && isGitDirty(ctx.cwd)) {
2053
+ const shouldStash = confirmBridge
2054
+ ? await confirmBridge(`Working tree is dirty and the agent plans ${fileMutationsInTurn} file edits. Stash current changes first? [Y/n]`, { tool: 'git_stash', args: { fileMutationsInTurn } })
2055
+ : false;
2056
+ if (shouldStash) {
2057
+ const stashed = stashWorkingTree(ctx.cwd);
2058
+ if (!stashed.ok) {
2059
+ console.warn(`[warn] auto-stash failed: ${stashed.message}`);
2060
+ }
2061
+ }
2062
+ }
2063
+ const resolveCallId = (tc) => tc.id || `call_${Date.now()}_${toolNameByCallId.size}`;
2064
+ // Pre-dispatch loop detection: check tool calls against previous turns.
2065
+ // We deduplicate within a single response (a model may emit multiple identical
2066
+ // read_file calls in one parallel batch — that's fine). We only count unique
2067
+ // signatures per LLM response, then check across responses.
2068
+ //
2069
+ // Important: repeated `exec {command:"npm test"}` can be normal during fix loops.
2070
+ // We only treat repeated exec as a loop if no file mutations happened since the
2071
+ // last time we saw that exact exec signature.
2072
+ const turnSigs = new Set();
2073
+ for (const tc of toolCallsArr) {
2074
+ const sig = `${tc.function.name}:${tc.function.arguments ?? '{}'}`;
2075
+ turnSigs.add(sig);
2076
+ }
2077
+ // Track whether a mutation happened since a given signature was last seen.
2078
+ // (Tool-loop is single-threaded across turns; this is safe to keep in-memory.)
2079
+ for (const sig of turnSigs) {
2080
+ sigCounts.set(sig, (sigCounts.get(sig) ?? 0) + 1);
2081
+ const toolName = sig.split(':')[0];
2082
+ // For exec loops, only break if nothing changed since last identical exec.
2083
+ if (toolName === 'exec') {
2084
+ // If this exact exec signature was seen before, record the mutation version at that time.
2085
+ // (First time we see it, assume it's OK.)
2086
+ const seenAt = mutationVersionBySig.get(sig);
2087
+ const hasMutatedSince = seenAt === undefined ? true : mutationVersion !== seenAt;
2088
+ // Update to "now" for next turn.
2089
+ mutationVersionBySig.set(sig, mutationVersion);
2090
+ if (!hasMutatedSince) {
2091
+ // Allow a few more repeats for exec since "run tests" loops are common.
2092
+ const loopThreshold = harness.quirks.loopsOnToolError ? 3 : 6;
2093
+ if ((sigCounts.get(sig) ?? 0) >= loopThreshold) {
2094
+ const args = sig.slice(toolName.length + 1);
2095
+ const argsPreview = args.length > 220 ? args.slice(0, 220) + '…' : args;
2096
+ throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
2097
+ `args=${argsPreview}`);
2098
+ }
2099
+ }
2100
+ continue;
2101
+ }
2102
+ // Read-only tools: only count consecutive identical calls (back-to-back turns
2103
+ // with no other tool calls in between). A read → edit → read cycle is normal
2104
+ // and resets the counter. After 4 consecutive identical reads, inject a hint.
2105
+ if (isReadOnlyTool(toolName)) {
2106
+ // Check if this sig was also in the previous turn's set
2107
+ if (lastTurnSigs.has(sig)) {
2108
+ consecutiveCounts.set(sig, (consecutiveCounts.get(sig) ?? 1) + 1);
2109
+ }
2110
+ else {
2111
+ consecutiveCounts.set(sig, 1);
2112
+ }
2113
+ const consec = consecutiveCounts.get(sig) ?? 1;
2114
+ if (consec >= 4) {
2115
+ const args = sig.slice(toolName.length + 1);
2116
+ const argsPreview = args.length > 220 ? args.slice(0, 220) + '…' : args;
2117
+ messages.push({
2118
+ role: 'user',
2119
+ content: `[System] You have read the same resource ${consec} consecutive times (${toolName} ${argsPreview}). The content has not changed. Please proceed with your task using the information you already have.`,
2120
+ });
2121
+ }
2122
+ continue;
2123
+ }
2124
+ // Default behavior for mutating/other tools: break on repeated identical signature.
2125
+ const loopThreshold = harness.quirks.loopsOnToolError ? 2 : 3;
2126
+ if ((sigCounts.get(sig) ?? 0) >= loopThreshold) {
2127
+ const args = sig.slice(toolName.length + 1);
2128
+ const argsPreview = args.length > 220 ? args.slice(0, 220) + '…' : args;
2129
+ throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
2130
+ `args=${argsPreview}\n` +
2131
+ `Hint: you repeated the same tool call ${loopThreshold} times with identical arguments. ` +
2132
+ `If the call succeeded, move on to the next step. ` +
2133
+ `If it failed, check that all required parameters are present and correct. ` +
2134
+ `For write_file/edit_file, ensure 'content'/'old_text'/'new_text' are included as strings.`);
2135
+ }
2136
+ }
2137
+ // Update consecutive tracking: save this turn's signatures for next turn comparison.
2138
+ lastTurnSigs = turnSigs;
2139
+ const runOne = async (tc) => {
2140
+ const name = tc.function.name;
2141
+ const rawArgs = tc.function.arguments ?? '{}';
2142
+ const callId = resolveCallId(tc);
2143
+ toolNameByCallId.set(callId, name);
2144
+ let args;
2145
+ try {
2146
+ args = rawArgs ? JSON.parse(rawArgs) : {};
2147
+ }
2148
+ catch {
2149
+ // Respect harness retry limit for malformed JSON (§4i)
2150
+ malformedCount++;
2151
+ if (malformedCount > harness.toolCalls.retryOnMalformed) {
2152
+ // Break the outer loop — this model won't self-correct
2153
+ throw new AgentLoopBreak(`tool ${name}: malformed JSON exceeded retry limit (${harness.toolCalls.retryOnMalformed}): ${rawArgs.slice(0, 200)}`);
2154
+ }
2155
+ throw new Error(`tool ${name}: arguments not valid JSON: ${rawArgs.slice(0, 200)}`);
2156
+ }
2157
+ const builtInFn = tools[name];
2158
+ const isLspTool = LSP_TOOL_NAME_SET.has(name);
2159
+ const isSpawnTask = name === 'spawn_task';
2160
+ const hasMcpTool = mcpManager?.hasTool(name) === true;
2161
+ if (!builtInFn && !isLspTool && !hasMcpTool && !isSpawnTask)
2162
+ throw new Error(`unknown tool: ${name}`);
2163
+ // Pre-dispatch check for missing required params.
2164
+ // Universal: catches omitted params early with a clear, instructive error
2165
+ // before the tool itself throws a less helpful message.
2166
+ if (builtInFn || isSpawnTask) {
2167
+ const missing = getMissingRequiredParams(name, args);
2168
+ if (missing.length) {
2169
+ throw new Error(`REQUIRED parameter(s) ${missing.map(p => `'${p}'`).join(', ')} missing. You MUST include ${missing.join(', ')} in every ${name} call.`);
2170
+ }
2171
+ }
2172
+ // ── Pre-dispatch safety screening (Phase 9) ──
2173
+ // Catches forbidden commands at the agent level before tool execution.
2174
+ // This enables showBlocked notifications and plan mode integration.
2175
+ if (name === 'exec' && typeof args.command === 'string') {
2176
+ const sv = checkExecSafety(args.command);
2177
+ if (sv.tier === 'forbidden') {
2178
+ const reason = sv.reason || 'forbidden command';
2179
+ opts.confirmProvider?.showBlocked?.({ tool: name, args, reason });
2180
+ throw new Error(`exec: ${reason} — command: ${args.command}`);
2181
+ }
2182
+ }
2183
+ if (FILE_MUTATION_TOOL_SET.has(name) && typeof args.path === 'string') {
2184
+ const absPath = args.path.startsWith('/') ? args.path : `${cfg.dir ?? process.cwd()}/${args.path}`;
2185
+ const pv = checkPathSafety(absPath);
2186
+ if (pv.tier === 'forbidden') {
2187
+ const reason = pv.reason || 'protected path';
2188
+ opts.confirmProvider?.showBlocked?.({ tool: name, args, reason });
2189
+ throw new Error(`${name}: ${reason}`);
2190
+ }
2191
+ }
2192
+ // ── Anti-scan: read_file guardrails (Fix 1/2/3) ──
2193
+ if (name === 'read_file' || name === 'read_files') {
2194
+ const filePath = typeof args.path === 'string' ? args.path : '';
2195
+ const searchTerm = typeof args.search === 'string' ? args.search : '';
2196
+ // Fix 1: Hard cumulative budget — refuse reads past hard cap
2197
+ if (cumulativeReadOnlyCalls > READ_BUDGET_HARD) {
2198
+ hookObj.onToolCall?.({ id: callId, name, args });
2199
+ hookObj.onToolResult?.({ id: callId, name, success: false, summary: 'read budget exhausted', result: '' });
2200
+ return { id: callId, content: `STOP: Read budget exhausted (${cumulativeReadOnlyCalls}/${READ_BUDGET_HARD} calls). Do NOT read more files. Use search_files or exec: grep -rn "pattern" path/ to find what you need.` };
2201
+ }
2202
+ // Fix 2: Directory scan detection — counts unique files per dir (re-reads are OK)
2203
+ if (filePath) {
2204
+ const absFilePath = filePath.startsWith('/') ? filePath : path.resolve(cfg.dir ?? process.cwd(), filePath);
2205
+ const parentDir = path.dirname(absFilePath);
2206
+ if (!readDirFiles.has(parentDir))
2207
+ readDirFiles.set(parentDir, new Set());
2208
+ readDirFiles.get(parentDir).add(absFilePath);
2209
+ const uniqueCount = readDirFiles.get(parentDir).size;
2210
+ if (uniqueCount > 8 && !blockedDirs.has(parentDir)) {
2211
+ blockedDirs.add(parentDir);
2212
+ }
2213
+ if (blockedDirs.has(parentDir) && uniqueCount > 8) {
2214
+ hookObj.onToolCall?.({ id: callId, name, args });
2215
+ hookObj.onToolResult?.({ id: callId, name, success: false, summary: 'dir scan blocked', result: '' });
2216
+ return { id: callId, content: `STOP: Directory scan detected — you've read ${uniqueCount} unique files from ${parentDir}/. Use search_files(pattern, '${parentDir}') or exec: grep -rn "pattern" ${parentDir}/ instead of reading files individually.` };
2217
+ }
2218
+ }
2219
+ // Fix 3: Same-search-term detection
2220
+ if (searchTerm && filePath) {
2221
+ const key = searchTerm.toLowerCase();
2222
+ if (!searchTermFiles.has(key))
2223
+ searchTermFiles.set(key, new Set());
2224
+ searchTermFiles.get(key).add(filePath);
2225
+ if (searchTermFiles.get(key).size >= 3) {
2226
+ hookObj.onToolCall?.({ id: callId, name, args });
2227
+ hookObj.onToolResult?.({ id: callId, name, success: false, summary: 'use search_files', result: '' });
2228
+ return { id: callId, content: `STOP: You've searched ${searchTermFiles.get(key).size} files for "${searchTerm}" one at a time. This is what search_files does in one call. Use: search_files(pattern="${searchTerm}", path=".") or exec: grep -rn "${searchTerm}" .` };
2229
+ }
2230
+ }
2231
+ }
2232
+ // ── Plan mode blocking (Phase 8) ──
2233
+ // In plan mode, mutating tools return blocked stubs instead of executing.
2234
+ // Read-only tools still execute normally.
2235
+ if (cfg.approval_mode === 'plan' && !isReadOnlyToolDynamic(name)) {
2236
+ const summary = planModeSummary(name, args);
2237
+ const step = {
2238
+ index: planSteps.length + 1,
2239
+ tool: name,
2240
+ args,
2241
+ blocked: true,
2242
+ summary,
2243
+ };
2244
+ planSteps.push(step);
2245
+ const blockedMsg = `[blocked: approval_mode=plan] Would ${summary}`;
2246
+ // Notify via confirmProvider.showBlocked if available
2247
+ opts.confirmProvider?.showBlocked?.({ tool: name, args, reason: `plan mode: ${summary}` });
2248
+ // Hook: onToolCall + onToolResult for plan-blocked actions
2249
+ hookObj.onToolCall?.({ id: callId, name, args });
2250
+ hookObj.onToolResult?.({ id: callId, name, success: true, summary: `⏸ ${summary} (blocked)`, result: blockedMsg });
2251
+ return { id: callId, content: blockedMsg };
2252
+ }
2253
+ // Hook: onToolCall (Phase 8.5)
2254
+ hookObj.onToolCall?.({ id: callId, name, args });
2255
+ if (cfg.step_mode) {
2256
+ const stepPrompt = `Step mode: execute ${name}(${JSON.stringify(args).slice(0, 200)}) ? [Y/n]`;
2257
+ const ok = confirmBridge ? await confirmBridge(stepPrompt, { tool: name, args }) : true;
2258
+ if (!ok) {
2259
+ return { id: callId, content: '[skipped by user: step mode]' };
2260
+ }
2261
+ }
2262
+ let content = '';
2263
+ if (isSpawnTask) {
2264
+ content = await runSpawnTask(args);
2265
+ }
2266
+ else if (builtInFn) {
2267
+ const value = await builtInFn(ctx, args);
2268
+ content = typeof value === 'string' ? value : JSON.stringify(value);
2269
+ }
2270
+ else if (isLspTool && lspManager) {
2271
+ // LSP tool dispatch
2272
+ if (name === 'lsp_diagnostics') {
2273
+ content = await lspManager.getDiagnostics(typeof args.path === 'string' ? args.path : undefined, typeof args.severity === 'number' ? args.severity : undefined);
2274
+ }
2275
+ else if (name === 'lsp_symbols') {
2276
+ content = await buildLspLensSymbolOutput(String(args.path ?? ''));
2277
+ }
2278
+ else if (name === 'lsp_hover') {
2279
+ content = await lspManager.getHover(String(args.path ?? ''), Number(args.line ?? 0), Number(args.character ?? 0));
2280
+ }
2281
+ else if (name === 'lsp_definition') {
2282
+ content = await lspManager.getDefinition(String(args.path ?? ''), Number(args.line ?? 0), Number(args.character ?? 0));
2283
+ }
2284
+ else if (name === 'lsp_references') {
2285
+ content = await lspManager.getReferences(String(args.path ?? ''), Number(args.line ?? 0), Number(args.character ?? 0), typeof args.max_results === 'number' ? args.max_results : 50);
2286
+ }
2287
+ }
2288
+ else {
2289
+ if (mcpManager == null) {
2290
+ throw new Error(`unknown tool: ${name}`);
2291
+ }
2292
+ const mcpReadOnly = isReadOnlyToolDynamic(name);
2293
+ if (!cfg.step_mode && !ctx.noConfirm && !mcpReadOnly) {
2294
+ const prompt = `Execute MCP tool '${name}'? [Y/n]`;
2295
+ const ok = confirmBridge ? await confirmBridge(prompt, { tool: name, args }) : true;
2296
+ if (!ok) {
2297
+ return { id: callId, content: '[skipped by user: approval]' };
2298
+ }
2299
+ }
2300
+ const callArgs = args && typeof args === 'object' && !Array.isArray(args)
2301
+ ? args
2302
+ : {};
2303
+ content = await mcpManager.callTool(name, callArgs);
2304
+ }
2305
+ // Hook: onToolResult (Phase 8.5 + Phase 7 rich display)
2306
+ const summary = toolResultSummary(name, args, content, true);
2307
+ const resultEvent = { id: callId, name, success: true, summary, result: content };
2308
+ // Phase 7: populate rich display fields
2309
+ if (name === 'exec') {
2310
+ try {
2311
+ const parsed = JSON.parse(content);
2312
+ if (parsed.out)
2313
+ resultEvent.execOutput = parsed.out;
2314
+ }
2315
+ catch { }
2316
+ }
2317
+ else if (name === 'search_files') {
2318
+ const lines = content.split('\n').filter(Boolean);
2319
+ if (lines.length > 0)
2320
+ resultEvent.searchMatches = lines.slice(0, 20);
2321
+ }
2322
+ else if (FILE_MUTATION_TOOL_SET.has(name) && replay) {
2323
+ // Grab the most recent checkpoint for a diff preview
2324
+ try {
2325
+ const cps = await replay.list(1);
2326
+ if (cps.length > 0) {
2327
+ const got = await replay.get(cps[0].id);
2328
+ const before = got.before.toString('utf8');
2329
+ const after = (got.after ?? Buffer.from('')).toString('utf8');
2330
+ if (before !== after) {
2331
+ // Generate a minimal unified diff
2332
+ resultEvent.diff = generateMinimalDiff(before, after, cps[0].filePath);
2333
+ }
2334
+ }
2335
+ }
2336
+ catch { }
2337
+ }
2338
+ hookObj.onToolResult?.(resultEvent);
2339
+ // Proactive LSP diagnostics after file mutations
2340
+ if (lspManager?.hasServers() && lspCfg?.proactive_diagnostics !== false) {
2341
+ if (FILE_MUTATION_TOOL_SET.has(name)) {
2342
+ const mutatedPath = typeof args.path === 'string' ? args.path : '';
2343
+ if (mutatedPath) {
2344
+ try {
2345
+ const absPath = mutatedPath.startsWith('/') ? mutatedPath : path.join(cfg.dir ?? process.cwd(), mutatedPath);
2346
+ const fileText = await fs.readFile(absPath, 'utf8');
2347
+ await lspManager.ensureOpen(absPath, fileText);
2348
+ await lspManager.notifyDidSave(absPath, fileText);
2349
+ // Small delay so the server can process diagnostics
2350
+ await new Promise((r) => setTimeout(r, 200));
2351
+ const diags = await lspManager.getDiagnostics(absPath);
2352
+ if (diags && !diags.startsWith('No diagnostics') && !diags.startsWith('[lsp] no language')) {
2353
+ content += `\n\n[lsp] Diagnostics after edit:\n${diags}`;
2354
+ }
2355
+ }
2356
+ catch {
2357
+ // Best-effort; never block the agent loop.
2358
+ }
2359
+ }
2360
+ }
2361
+ }
2362
+ return { id: callId, content };
2363
+ };
2364
+ const results = [];
2365
+ // Helper: catch tool errors but re-throw AgentLoopBreak (those must break the outer loop)
2366
+ const catchToolError = (e, tc) => {
2367
+ if (e instanceof AgentLoopBreak)
2368
+ throw e;
2369
+ const msg = e?.message ?? String(e);
2370
+ // Hook: onToolResult for errors (Phase 8.5)
2371
+ const callId = resolveCallId(tc);
2372
+ hookObj.onToolResult?.({ id: callId, name: tc.function.name, success: false, summary: msg || 'unknown error', result: `ERROR: ${msg || 'unknown error'}` });
2373
+ // Never return undefined error text; it makes bench failures impossible to debug.
2374
+ return { id: callId, content: `ERROR: ${msg || 'unknown tool error'}` };
2375
+ };
2376
+ // ── Anti-scan guardrails (§ read budget, dir scan, same-search) ──
2377
+ const readOnlyInTurn = toolCallsArr.filter((tc) => isReadOnlyToolDynamic(tc.function.name));
2378
+ // Fix 5: Per-turn cap — drop excess read-only calls in a single response
2379
+ if (readOnlyInTurn.length > READ_ONLY_PER_TURN_CAP) {
2380
+ const kept = new Set(readOnlyInTurn.slice(0, READ_ONLY_PER_TURN_CAP).map((tc) => tc.id ?? tc.function.name));
2381
+ const droppedCount = readOnlyInTurn.length - READ_ONLY_PER_TURN_CAP;
2382
+ toolCallsArr = toolCallsArr.filter((tc) => !isReadOnlyToolDynamic(tc.function.name) || kept.has(tc.id ?? tc.function.name));
2383
+ for (const tc of readOnlyInTurn.slice(READ_ONLY_PER_TURN_CAP)) {
2384
+ const callId = resolveCallId(tc);
2385
+ results.push({
2386
+ id: callId,
2387
+ content: `STOP: Per-turn read limit (${READ_ONLY_PER_TURN_CAP}). Use search_files or exec with grep instead of reading files one by one.`
2388
+ });
2389
+ }
2390
+ if (cfg.verbose) {
2391
+ console.warn(`[guardrail] capped ${droppedCount} read-only tool calls (per-turn limit ${READ_ONLY_PER_TURN_CAP})`);
2392
+ }
2393
+ }
2394
+ // Fix 1: Hard cumulative read budget — escalating enforcement
2395
+ const readOnlyThisTurn = toolCallsArr.filter((tc) => isReadOnlyToolDynamic(tc.function.name));
2396
+ cumulativeReadOnlyCalls += readOnlyThisTurn.length;
2397
+ if (harness.toolCalls.parallelCalls) {
2398
+ // Models that support parallel calls: read-only in parallel, mutations sequential
2399
+ const readonly = toolCallsArr.filter((tc) => isReadOnlyToolDynamic(tc.function.name));
2400
+ const others = toolCallsArr.filter((tc) => !isReadOnlyToolDynamic(tc.function.name));
2401
+ const ro = await Promise.all(readonly.map((tc) => runOne(tc)
2402
+ .catch((e) => catchToolError(e, tc))));
2403
+ results.push(...ro);
2404
+ for (const tc of others) {
2405
+ if (ac.signal.aborted)
2406
+ break;
2407
+ try {
2408
+ results.push(await runOne(tc));
2409
+ }
2410
+ catch (e) {
2411
+ results.push(catchToolError(e, tc));
2412
+ }
2413
+ }
2414
+ }
2415
+ else {
2416
+ // Models with parallelCalls=false: run ALL calls sequentially (§4i).
2417
+ // These models lose track of results when calls are batched in parallel.
2418
+ for (const tc of toolCallsArr) {
2419
+ if (ac.signal.aborted)
2420
+ break;
2421
+ try {
2422
+ results.push(await runOne(tc));
2423
+ }
2424
+ catch (e) {
2425
+ results.push(catchToolError(e, tc));
2426
+ }
2427
+ }
2428
+ }
2429
+ // Bail immediately if cancelled during tool execution
2430
+ if (ac.signal.aborted)
2431
+ break;
2432
+ for (const r of results) {
2433
+ messages.push({ role: 'tool', tool_call_id: r.id, content: r.content });
2434
+ }
2435
+ // ── Escalating cumulative read budget (§ anti-scan guardrails) ──
2436
+ // Warn zone: append warnings to each read result when approaching the hard cap
2437
+ if (cumulativeReadOnlyCalls > READ_BUDGET_WARN && cumulativeReadOnlyCalls <= READ_BUDGET_HARD) {
2438
+ const remaining = READ_BUDGET_HARD - cumulativeReadOnlyCalls;
2439
+ messages.push({
2440
+ role: 'user',
2441
+ content: `[System] ⚠ Read budget: ${cumulativeReadOnlyCalls}/${READ_BUDGET_HARD}. ${remaining} reads remaining before hard stop. Use search_files or exec grep — do NOT continue reading files one at a time.`,
2442
+ });
2443
+ }
2444
+ // Hook: onTurnEnd (Phase 8.5)
2445
+ await hookObj.onTurnEnd?.({
2446
+ turn: turns,
2447
+ toolCalls,
2448
+ promptTokens: cumulativeUsage.prompt,
2449
+ completionTokens: cumulativeUsage.completion,
2450
+ promptTokensTurn,
2451
+ completionTokensTurn,
2452
+ ttftMs,
2453
+ ttcMs,
2454
+ ppTps,
2455
+ tgTps,
2456
+ });
2457
+ continue;
2458
+ }
2459
+ if (mcpManager &&
2460
+ !mcpToolsLoaded &&
2461
+ (visible || content || '').toUpperCase().includes(MCP_TOOLS_REQUEST_TOKEN.toUpperCase())) {
2462
+ mcpToolsLoaded = true;
2463
+ messages.push({ role: 'assistant', content: visible || content || '' });
2464
+ messages.push({
2465
+ role: 'user',
2466
+ content: '[system] MCP tools are now enabled for this task. Continue and call tools as needed.'
2467
+ });
2468
+ continue;
2469
+ }
2470
+ // final assistant message
2471
+ messages.push({ role: 'assistant', content: visible || content || '' });
2472
+ await hookObj.onTurnEnd?.({
2473
+ turn: turns,
2474
+ toolCalls,
2475
+ promptTokens: cumulativeUsage.prompt,
2476
+ completionTokens: cumulativeUsage.completion,
2477
+ promptTokensTurn,
2478
+ completionTokensTurn,
2479
+ ttftMs,
2480
+ ttcMs,
2481
+ ppTps,
2482
+ tgTps,
2483
+ });
2484
+ return { text: visible || content || '', turns, toolCalls };
2485
+ }
2486
+ const reason = `max iterations exceeded (${maxIters})`;
2487
+ throw new Error(reason);
2488
+ }
2489
+ catch (e) {
2490
+ // Some code paths (or upstream libs) may incorrectly throw `undefined`.
2491
+ // Convert it to a real Error so benches can be stable and debuggable.
2492
+ if (e === undefined) {
2493
+ const lastMsg = messages[messages.length - 1];
2494
+ const lastMsgPreview = (() => {
2495
+ try {
2496
+ const c = lastMsg?.content;
2497
+ if (typeof c === 'string')
2498
+ return c.slice(0, 200);
2499
+ return JSON.stringify(c).slice(0, 200);
2500
+ }
2501
+ catch {
2502
+ return '';
2503
+ }
2504
+ })();
2505
+ const err = new Error(`BUG: threw undefined in agent.ask() (turn=${turns}). lastMsg=${lastMsg?.role ?? 'unknown'}:${lastMsgPreview}`);
2506
+ await persistFailure(err, `ask turn ${turns}`);
2507
+ throw err;
2508
+ }
2509
+ await persistFailure(e, `ask turn ${turns}`);
2510
+ // Never rethrow undefined; normalize to Error for debuggability.
2511
+ if (e === undefined) {
2512
+ throw new Error('BUG: threw undefined (normalized at ask() boundary)');
2513
+ }
2514
+ throw e;
2515
+ }
2516
+ };
2517
+ // expose via getters so setModel() / reset() don't break references
2518
+ return {
2519
+ get model() { return model; },
2520
+ get harness() { return harness.id; },
2521
+ get endpoint() { return cfg.endpoint; },
2522
+ get contextWindow() { return contextWindow; },
2523
+ get supportsVision() { return supportsVision; },
2524
+ get messages() {
2525
+ return messages;
2526
+ },
2527
+ get usage() {
2528
+ return { ...cumulativeUsage };
2529
+ },
2530
+ ask,
2531
+ setModel,
2532
+ setEndpoint,
2533
+ listModels,
2534
+ refreshServerHealth,
2535
+ getPerfSummary,
2536
+ captureOn,
2537
+ captureOff,
2538
+ captureLast,
2539
+ get capturePath() {
2540
+ return capturePath;
2541
+ },
2542
+ getSystemPrompt: () => activeSystemPrompt,
2543
+ setSystemPrompt,
2544
+ resetSystemPrompt,
2545
+ listMcpServers,
2546
+ listMcpTools,
2547
+ restartMcpServer,
2548
+ enableMcpTool,
2549
+ disableMcpTool,
2550
+ mcpWarnings,
2551
+ listLspServers,
2552
+ setVerbose,
2553
+ close,
2554
+ reset,
2555
+ cancel,
2556
+ restore,
2557
+ replay,
2558
+ vault,
2559
+ lens,
2560
+ get lastEditedPath() {
2561
+ return lastEditedPath;
2562
+ },
2563
+ get lastTurnMetrics() {
2564
+ return lastTurnMetrics;
2565
+ },
2566
+ get lastServerHealth() {
2567
+ return lastServerHealth;
2568
+ },
2569
+ get planSteps() {
2570
+ return planSteps;
2571
+ },
2572
+ executePlanStep,
2573
+ clearPlan,
2574
+ compactHistory
2575
+ };
2576
+ }
2577
+ export async function runAgent(opts) {
2578
+ const session = await createSession({
2579
+ config: opts.config,
2580
+ apiKey: opts.apiKey,
2581
+ confirm: opts.confirm,
2582
+ confirmProvider: opts.confirmProvider,
2583
+ runtime: opts.runtime
2584
+ });
2585
+ return session.ask(opts.instruction, opts.onToken);
2586
+ }
2587
+ async function autoPickModel(client, cached) {
2588
+ const ac = makeAbortController();
2589
+ const timer = setTimeout(() => ac.abort(), 3000);
2590
+ try {
2591
+ const models = cached ?? normalizeModelsResponse(await client.models(ac.signal));
2592
+ const q = models.data.find((m) => /qwen/i.test(m.id));
2593
+ if (q)
2594
+ return q.id;
2595
+ const first = models.data[0]?.id;
2596
+ if (!first)
2597
+ throw new Error('No models found on server. Check your endpoint and that a model is loaded.');
2598
+ return first;
2599
+ }
2600
+ finally {
2601
+ clearTimeout(timer);
2602
+ }
2603
+ }
2604
+ //# sourceMappingURL=agent.js.map