@visorcraft/idlehands 1.1.7 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/README.md +46 -0
  2. package/dist/agent/formatting.js +273 -0
  3. package/dist/agent/formatting.js.map +1 -0
  4. package/dist/agent/review-artifact.js +147 -0
  5. package/dist/agent/review-artifact.js.map +1 -0
  6. package/dist/agent/tool-calls.js +411 -0
  7. package/dist/agent/tool-calls.js.map +1 -0
  8. package/dist/agent.js +285 -684
  9. package/dist/agent.js.map +1 -1
  10. package/dist/anton/controller.js +1 -1
  11. package/dist/anton/controller.js.map +1 -1
  12. package/dist/anton/lock.js +0 -3
  13. package/dist/anton/lock.js.map +1 -1
  14. package/dist/anton/parser.js +6 -6
  15. package/dist/anton/parser.js.map +1 -1
  16. package/dist/anton/reporter.js +1 -1
  17. package/dist/anton/reporter.js.map +1 -1
  18. package/dist/bot/commands.js +3 -2
  19. package/dist/bot/commands.js.map +1 -1
  20. package/dist/bot/confirm-telegram.js +2 -1
  21. package/dist/bot/confirm-telegram.js.map +1 -1
  22. package/dist/bot/discord-routing.js +186 -0
  23. package/dist/bot/discord-routing.js.map +1 -0
  24. package/dist/bot/discord-streaming.js +107 -0
  25. package/dist/bot/discord-streaming.js.map +1 -0
  26. package/dist/bot/discord.js +49 -237
  27. package/dist/bot/discord.js.map +1 -1
  28. package/dist/bot/format.js +2 -25
  29. package/dist/bot/format.js.map +1 -1
  30. package/dist/bot/session-manager.js +22 -11
  31. package/dist/bot/session-manager.js.map +1 -1
  32. package/dist/bot/telegram.js +83 -94
  33. package/dist/bot/telegram.js.map +1 -1
  34. package/dist/cli/build-repl-context.js.map +1 -1
  35. package/dist/cli/command-registry.js +2 -1
  36. package/dist/cli/command-registry.js.map +1 -1
  37. package/dist/cli/command-utils.js +27 -0
  38. package/dist/cli/command-utils.js.map +1 -0
  39. package/dist/cli/commands/anton.js +3 -2
  40. package/dist/cli/commands/anton.js.map +1 -1
  41. package/dist/cli/commands/model.js +8 -7
  42. package/dist/cli/commands/model.js.map +1 -1
  43. package/dist/cli/commands/project.js +5 -4
  44. package/dist/cli/commands/project.js.map +1 -1
  45. package/dist/cli/commands/session.js +9 -8
  46. package/dist/cli/commands/session.js.map +1 -1
  47. package/dist/cli/commands/tools.js +4 -3
  48. package/dist/cli/commands/tools.js.map +1 -1
  49. package/dist/cli/input.js +2 -1
  50. package/dist/cli/input.js.map +1 -1
  51. package/dist/cli/repl-dispatch.js +85 -0
  52. package/dist/cli/repl-dispatch.js.map +1 -0
  53. package/dist/cli/runtime-cmds.js +148 -20
  54. package/dist/cli/runtime-cmds.js.map +1 -1
  55. package/dist/cli/service.js +0 -14
  56. package/dist/cli/service.js.map +1 -1
  57. package/dist/cli/setup.js +3 -3
  58. package/dist/cli/setup.js.map +1 -1
  59. package/dist/cli/watch.js +2 -1
  60. package/dist/cli/watch.js.map +1 -1
  61. package/dist/client.js +24 -7
  62. package/dist/client.js.map +1 -1
  63. package/dist/context.js +101 -10
  64. package/dist/context.js.map +1 -1
  65. package/dist/harnesses.js +1 -1
  66. package/dist/harnesses.js.map +1 -1
  67. package/dist/hooks/manager.js +5 -0
  68. package/dist/hooks/manager.js.map +1 -1
  69. package/dist/index.js +13 -64
  70. package/dist/index.js.map +1 -1
  71. package/dist/progress/agent-hooks.js +37 -0
  72. package/dist/progress/agent-hooks.js.map +1 -0
  73. package/dist/progress/ir.js +10 -0
  74. package/dist/progress/ir.js.map +1 -0
  75. package/dist/progress/message-edit-scheduler.js +97 -0
  76. package/dist/progress/message-edit-scheduler.js.map +1 -0
  77. package/dist/progress/progress-message-renderer.js +120 -0
  78. package/dist/progress/progress-message-renderer.js.map +1 -0
  79. package/dist/progress/progress-presenter.js +137 -0
  80. package/dist/progress/progress-presenter.js.map +1 -0
  81. package/dist/progress/serialize-discord.js +72 -0
  82. package/dist/progress/serialize-discord.js.map +1 -0
  83. package/dist/progress/serialize-telegram.js +67 -0
  84. package/dist/progress/serialize-telegram.js.map +1 -0
  85. package/dist/progress/serialize-tui.js +52 -0
  86. package/dist/progress/serialize-tui.js.map +1 -0
  87. package/dist/progress/tool-summary.js +58 -0
  88. package/dist/progress/tool-summary.js.map +1 -0
  89. package/dist/progress/tool-tail.js +48 -0
  90. package/dist/progress/tool-tail.js.map +1 -0
  91. package/dist/progress/turn-progress.js +215 -0
  92. package/dist/progress/turn-progress.js.map +1 -0
  93. package/dist/replay.js +2 -5
  94. package/dist/replay.js.map +1 -1
  95. package/dist/runtime/executor.js +58 -10
  96. package/dist/runtime/executor.js.map +1 -1
  97. package/dist/runtime/planner.js +19 -6
  98. package/dist/runtime/planner.js.map +1 -1
  99. package/dist/runtime/store.js +2 -1
  100. package/dist/runtime/store.js.map +1 -1
  101. package/dist/safety.js +0 -1
  102. package/dist/safety.js.map +1 -1
  103. package/dist/spinner.js +8 -0
  104. package/dist/spinner.js.map +1 -1
  105. package/dist/tools/tool-error.js +97 -0
  106. package/dist/tools/tool-error.js.map +1 -0
  107. package/dist/tools.js +471 -41
  108. package/dist/tools.js.map +1 -1
  109. package/dist/tui/branch-picker.js.map +1 -1
  110. package/dist/tui/command-handler.js.map +1 -1
  111. package/dist/tui/controller.js +91 -28
  112. package/dist/tui/controller.js.map +1 -1
  113. package/dist/tui/render.js +15 -2
  114. package/dist/tui/render.js.map +1 -1
  115. package/dist/tui/state.js +13 -0
  116. package/dist/tui/state.js.map +1 -1
  117. package/dist/upgrade.js.map +1 -1
  118. package/dist/utils.js +17 -0
  119. package/dist/utils.js.map +1 -1
  120. package/package.json +1 -1
package/dist/agent.js CHANGED
@@ -14,123 +14,19 @@ import { LensStore } from './lens.js';
14
14
  import { SYS_CONTEXT_SCHEMA, collectSnapshot } from './sys/context.js';
15
15
  import { MCPManager } from './mcp.js';
16
16
  import { LspManager, detectInstalledLspServers } from './lsp.js';
17
+ import { generateMinimalDiff, toolResultSummary, execCommandFromSig, formatDurationMs, looksLikePlanningNarration, capTextByApproxTokens, isLikelyBinaryBuffer, sanitizePathsInMessage, digestToolResult, } from './agent/formatting.js';
18
+ import { parseToolCallsFromContent, getMissingRequiredParams, getArgValidationIssues, stripMarkdownFences } from './agent/tool-calls.js';
19
+ import { ToolError, ValidationError } from './tools/tool-error.js';
20
+ export { parseToolCallsFromContent };
21
+ import { reviewArtifactKeys, looksLikeCodeReviewRequest, looksLikeReviewRetrievalRequest, retrievalAllowsStaleArtifact, parseReviewArtifactStalePolicy, parseReviewArtifact, reviewArtifactStaleReason, gitHead, normalizeModelsResponse, } from './agent/review-artifact.js';
17
22
  import fs from 'node:fs/promises';
18
23
  import path from 'node:path';
19
- import { spawnSync } from 'node:child_process';
20
- import { stateDir, BASH_PATH as BASH } from './utils.js';
24
+ import { stateDir, timestampedId } from './utils.js';
21
25
  function makeAbortController() {
22
26
  // Node 24: AbortController is global.
23
27
  return new AbortController();
24
28
  }
25
- /** Generate a minimal unified diff for Phase 7 rich display (max 20 lines, truncated). */
26
- function generateMinimalDiff(before, after, filePath) {
27
- const bLines = before.split('\n');
28
- const aLines = after.split('\n');
29
- const out = [];
30
- out.push(`--- a/${filePath}`);
31
- out.push(`+++ b/${filePath}`);
32
- // Simple line-by-line diff (find changed region)
33
- let diffStart = 0;
34
- while (diffStart < bLines.length && diffStart < aLines.length && bLines[diffStart] === aLines[diffStart])
35
- diffStart++;
36
- let bEnd = bLines.length - 1;
37
- let aEnd = aLines.length - 1;
38
- while (bEnd > diffStart && aEnd > diffStart && bLines[bEnd] === aLines[aEnd]) {
39
- bEnd--;
40
- aEnd--;
41
- }
42
- const contextBefore = Math.max(0, diffStart - 2);
43
- const contextAfter = Math.min(Math.max(bLines.length, aLines.length) - 1, Math.max(bEnd, aEnd) + 2);
44
- const bEndContext = Math.min(bLines.length - 1, contextAfter);
45
- const aEndContext = Math.min(aLines.length - 1, contextAfter);
46
- out.push(`@@ -${contextBefore + 1},${bEndContext - contextBefore + 1} +${contextBefore + 1},${aEndContext - contextBefore + 1} @@`);
47
- let lineCount = 0;
48
- const MAX_LINES = 20;
49
- // Context before change
50
- for (let i = contextBefore; i < diffStart && lineCount < MAX_LINES; i++) {
51
- out.push(` ${bLines[i]}`);
52
- lineCount++;
53
- }
54
- // Removed lines
55
- for (let i = diffStart; i <= bEnd && i < bLines.length && lineCount < MAX_LINES; i++) {
56
- out.push(`-${bLines[i]}`);
57
- lineCount++;
58
- }
59
- // Added lines
60
- for (let i = diffStart; i <= aEnd && i < aLines.length && lineCount < MAX_LINES; i++) {
61
- out.push(`+${aLines[i]}`);
62
- lineCount++;
63
- }
64
- // Context after change
65
- const afterStart = Math.max(bEnd, aEnd) + 1;
66
- for (let i = afterStart; i <= contextAfter && i < Math.max(bLines.length, aLines.length) && lineCount < MAX_LINES; i++) {
67
- const line = i < aLines.length ? aLines[i] : bLines[i] ?? '';
68
- out.push(` ${line}`);
69
- lineCount++;
70
- }
71
- const totalChanges = (bEnd - diffStart + 1) + (aEnd - diffStart + 1);
72
- if (lineCount >= MAX_LINES && totalChanges > MAX_LINES) {
73
- out.push(`[+${totalChanges - MAX_LINES} more lines]`);
74
- }
75
- return out.join('\n');
76
- }
77
- /** Generate a one-line summary of a tool result for hooks/display. */
78
- function toolResultSummary(name, args, content, success) {
79
- if (!success)
80
- return content.slice(0, 120);
81
- switch (name) {
82
- case 'read_file':
83
- case 'read_files': {
84
- const lines = content.split('\n').length;
85
- return `${lines} lines read`;
86
- }
87
- case 'write_file':
88
- return `wrote ${args.path || 'file'}`;
89
- case 'edit_file':
90
- return content.startsWith('ERROR') ? content.slice(0, 120) : `applied edit`;
91
- case 'insert_file':
92
- return `inserted at line ${args.line ?? '?'}`;
93
- case 'exec': {
94
- try {
95
- const r = JSON.parse(content);
96
- const lines = (r.out || '').split('\n').filter(Boolean).length;
97
- return `rc=${r.rc}, ${lines} lines`;
98
- }
99
- catch {
100
- return content.slice(0, 80);
101
- }
102
- }
103
- case 'list_dir': {
104
- const entries = content.split('\n').filter(Boolean).length;
105
- return `${entries} entries`;
106
- }
107
- case 'search_files': {
108
- const matches = (content.match(/^\d+:/gm) || []).length;
109
- return `${matches} matches`;
110
- }
111
- case 'spawn_task': {
112
- const line = content.split(/\r?\n/).find((l) => l.includes('status='));
113
- return line ? line.trim() : 'sub-agent task finished';
114
- }
115
- case 'vault_search':
116
- return `vault results`;
117
- default:
118
- return content.slice(0, 80);
119
- }
120
- }
121
29
  const CACHED_EXEC_OBSERVATION_HINT = '[idlehands hint] Reused cached output for repeated read-only exec call (unchanged observation).';
122
- function execCommandFromSig(sig) {
123
- if (!sig.startsWith('exec:'))
124
- return '';
125
- const raw = sig.slice('exec:'.length);
126
- try {
127
- const parsed = JSON.parse(raw);
128
- return typeof parsed?.command === 'string' ? parsed.command : '';
129
- }
130
- catch {
131
- return '';
132
- }
133
- }
134
30
  function looksLikeReadOnlyExecCommand(command) {
135
31
  const cmd = String(command || '').trim().toLowerCase();
136
32
  if (!cmd)
@@ -160,6 +56,24 @@ function looksLikeReadOnlyExecCommand(command) {
160
56
  return true;
161
57
  return false;
162
58
  }
59
+ function execRcShouldSignalFailure(command) {
60
+ const cmd = String(command || '').toLowerCase();
61
+ if (!cmd)
62
+ return false;
63
+ // Common checks where non-zero usually means real failure.
64
+ if (/\b(?:npm|pnpm|yarn)\s+(?:run\s+)?(?:test|build|lint|typecheck|check)\b/.test(cmd))
65
+ return true;
66
+ if (/\bnode\s+--test\b/.test(cmd))
67
+ return true;
68
+ if (/\b(?:pytest|go\s+test|cargo\s+test|ctest|mvn\s+test|gradle\s+test)\b/.test(cmd))
69
+ return true;
70
+ if (/\b(?:cargo\s+build|go\s+build|tsc\b)\b/.test(cmd))
71
+ return true;
72
+ // Grep/rg no-match rc=1 should not be treated as failure.
73
+ if (/^\s*(?:rg|grep|ag|ack)\b/.test(cmd))
74
+ return false;
75
+ return false;
76
+ }
163
77
  function withCachedExecObservationHint(content) {
164
78
  if (!content)
165
79
  return content;
@@ -188,6 +102,14 @@ function readOnlyExecCacheable(content) {
188
102
  return false;
189
103
  }
190
104
  }
105
+ function ensureInformativeAssistantText(text, ctx) {
106
+ if (String(text ?? '').trim())
107
+ return text;
108
+ if (ctx.toolCalls > 0) {
109
+ return 'I completed the requested tool work, but I have no user-visible response text yet. Ask me to summarize what was done.';
110
+ }
111
+ return `I have no user-visible response text for this turn (turn=${ctx.turns}). Please try again or rephrase your request.`;
112
+ }
191
113
  /** Errors that should break the outer agent loop, not be caught by per-tool handlers */
192
114
  class AgentLoopBreak extends Error {
193
115
  constructor(message) {
@@ -203,7 +125,7 @@ Rules:
203
125
  - Never use spawn_task to bypass confirmation/safety restrictions (for example blocked package installs). If a command is blocked, adapt the plan or ask the user for approval mode changes.
204
126
  - Read the target file before editing. You need the exact text for search/replace.
205
127
  - Use read_file with search=... to jump to relevant code; avoid reading whole files.
206
- - Use edit_file for surgical changes. Never rewrite entire files when a targeted edit works.
128
+ - Prefer apply_patch or edit_range for code edits (token-efficient). Use edit_file only when exact old_text replacement is necessary.
207
129
  - Use insert_file for insertions (prepend/append/line).
208
130
  - Use exec to run commands, tests, builds; check results before reporting success.
209
131
  - When running commands in a subdirectory, use exec's cwd parameter — NOT "cd /path && cmd". Each exec call is a fresh shell; cd does not persist.
@@ -230,7 +152,7 @@ const DEFAULT_SUB_AGENT_RESULT_TOKEN_CAP = 4000;
230
152
  const APPROVAL_MODE_SET = new Set(['plan', 'reject', 'default', 'auto-edit', 'yolo']);
231
153
  const LSP_TOOL_NAMES = ['lsp_diagnostics', 'lsp_symbols', 'lsp_hover', 'lsp_definition', 'lsp_references'];
232
154
  const LSP_TOOL_NAME_SET = new Set(LSP_TOOL_NAMES);
233
- const FILE_MUTATION_TOOL_SET = new Set(['edit_file', 'write_file', 'insert_file']);
155
+ const FILE_MUTATION_TOOL_SET = new Set(['edit_file', 'edit_range', 'apply_patch', 'write_file', 'insert_file']);
234
156
  function normalizeApprovalMode(value) {
235
157
  if (typeof value !== 'string')
236
158
  return undefined;
@@ -246,66 +168,6 @@ const APPROVAL_MODE_RANK = { plan: 0, reject: 1, default: 2, 'auto-edit': 3, yol
246
168
  function capApprovalMode(requested, parentMode) {
247
169
  return APPROVAL_MODE_RANK[requested] <= APPROVAL_MODE_RANK[parentMode] ? requested : parentMode;
248
170
  }
249
- function formatDurationMs(ms) {
250
- if (!Number.isFinite(ms) || ms <= 0)
251
- return '0.0s';
252
- return `${(ms / 1000).toFixed(1)}s`;
253
- }
254
- function looksLikePlanningNarration(text, finishReason) {
255
- const s = String(text ?? '').trim().toLowerCase();
256
- if (!s)
257
- return false;
258
- // Incomplete streamed answer: likely still needs another turn.
259
- if (finishReason === 'length')
260
- return true;
261
- // Strong completion cues: treat as final answer.
262
- if (/(^|\n)\s*(done|completed|finished|final answer|summary:)\b/.test(s))
263
- return false;
264
- // Typical "thinking out loud"/plan chatter that should continue with tools.
265
- return /\b(let me|i(?:'|’)ll|i will|i'm going to|i am going to|next i(?:'|’)ll|first i(?:'|’)ll|i need to|i should|checking|reviewing|exploring|starting by)\b/.test(s);
266
- }
267
- function approxTokenCharCap(maxTokens) {
268
- const safe = Math.max(64, Math.floor(maxTokens));
269
- return safe * 4;
270
- }
271
- function capTextByApproxTokens(text, maxTokens) {
272
- const raw = String(text ?? '');
273
- const maxChars = approxTokenCharCap(maxTokens);
274
- if (raw.length <= maxChars)
275
- return { text: raw, truncated: false };
276
- const clipped = raw.slice(0, maxChars);
277
- return {
278
- text: `${clipped}\n\n[sub-agent] result truncated to ~${maxTokens} tokens (${raw.length} chars original)`,
279
- truncated: true,
280
- };
281
- }
282
- function isLikelyBinaryBuffer(buf) {
283
- const n = Math.min(buf.length, 512);
284
- for (let i = 0; i < n; i++) {
285
- if (buf[i] === 0)
286
- return true;
287
- }
288
- return false;
289
- }
290
- /**
291
- * Strip absolute paths from a message to prevent cross-project leaks in vault.
292
- * Paths within cwd are replaced with relative equivalents; other absolute paths
293
- * are replaced with just the basename.
294
- */
295
- function sanitizePathsInMessage(message, cwd) {
296
- const normCwd = cwd.replace(/\/+$/, '');
297
- // Match absolute Unix paths (at least 2 segments)
298
- return message.replace(/\/(?:home|tmp|var|usr|opt|etc|root)\/[^\s"',;)\]}>]+/g, (match) => {
299
- const normMatch = match.replace(/\/+$/, '');
300
- if (normMatch.startsWith(normCwd + '/')) {
301
- // Within cwd — make relative
302
- return normMatch.slice(normCwd.length + 1);
303
- }
304
- // Outside cwd — strip to basename
305
- const base = path.basename(normMatch);
306
- return base || match;
307
- });
308
- }
309
171
  async function buildSubAgentContextBlock(cwd, rawFiles) {
310
172
  const values = Array.isArray(rawFiles) ? rawFiles : [];
311
173
  const files = values
@@ -385,155 +247,155 @@ function buildToolsSchema(opts) {
385
247
  properties,
386
248
  required
387
249
  });
250
+ const str = () => ({ type: 'string' });
251
+ const bool = () => ({ type: 'boolean' });
252
+ const int = (min, max) => ({ type: 'integer', ...(min !== undefined && { minimum: min }), ...(max !== undefined && { maximum: max }) });
388
253
  const schemas = [
254
+ // ────────────────────────────────────────────────────────────────────────────
255
+ // Token-safe reads (require limit; allow plain output without per-line numbers)
256
+ // ────────────────────────────────────────────────────────────────────────────
389
257
  {
390
258
  type: 'function',
391
259
  function: {
392
260
  name: 'read_file',
393
- description: 'Read file contents with line numbers. Use search/context to jump to relevant code.',
261
+ description: 'Read a bounded slice of a file.',
394
262
  parameters: obj({
395
- path: { type: 'string' },
396
- offset: { type: 'integer' },
397
- limit: { type: 'integer' },
398
- search: { type: 'string' },
399
- context: { type: 'integer' },
400
- }, ['path'])
401
- }
263
+ path: str(),
264
+ offset: int(1, 1_000_000),
265
+ limit: int(1, 240),
266
+ search: str(),
267
+ context: int(0, 80),
268
+ format: { type: 'string', enum: ['plain', 'numbered', 'sparse'] },
269
+ max_bytes: int(256, 20_000),
270
+ }, ['path', 'limit']),
271
+ },
402
272
  },
403
273
  {
404
274
  type: 'function',
405
275
  function: {
406
276
  name: 'read_files',
407
- description: 'Batch read multiple files.',
277
+ description: 'Batch read bounded file slices.',
408
278
  parameters: obj({
409
279
  requests: {
410
280
  type: 'array',
411
281
  items: obj({
412
- path: { type: 'string' },
413
- offset: { type: 'integer' },
414
- limit: { type: 'integer' },
415
- search: { type: 'string' },
416
- context: { type: 'integer' },
417
- }, ['path'])
418
- }
419
- }, ['requests'])
420
- }
282
+ path: str(),
283
+ offset: int(1, 1_000_000),
284
+ limit: int(1, 240),
285
+ search: str(),
286
+ context: int(0, 80),
287
+ format: { type: 'string', enum: ['plain', 'numbered', 'sparse'] },
288
+ max_bytes: int(256, 20_000),
289
+ }, ['path', 'limit']),
290
+ },
291
+ }, ['requests']),
292
+ },
421
293
  },
294
+ // ────────────────────────────────────────────────────────────────────────────
295
+ // Writes/edits
296
+ // ────────────────────────────────────────────────────────────────────────────
422
297
  {
423
298
  type: 'function',
424
299
  function: {
425
300
  name: 'write_file',
426
- description: 'Write a file (atomic). Creates parents. Makes a backup first.',
427
- parameters: obj({ path: { type: 'string' }, content: { type: 'string' } }, ['path', 'content'])
428
- }
301
+ description: 'Write file (atomic, backup).',
302
+ parameters: obj({ path: str(), content: str() }, ['path', 'content']),
303
+ },
429
304
  },
430
305
  {
431
306
  type: 'function',
432
307
  function: {
433
- name: 'edit_file',
434
- description: 'Search/replace exact text in a file. Fails if old_text not found.',
308
+ name: 'apply_patch',
309
+ description: 'Apply unified diff patch (multi-file).',
435
310
  parameters: obj({
436
- path: { type: 'string' },
437
- old_text: { type: 'string' },
438
- new_text: { type: 'string' },
439
- replace_all: { type: 'boolean' }
440
- }, ['path', 'old_text', 'new_text'])
441
- }
311
+ patch: str(),
312
+ files: { type: 'array', items: str() },
313
+ strip: int(0, 5),
314
+ }, ['patch', 'files']),
315
+ },
442
316
  },
443
317
  {
444
318
  type: 'function',
445
319
  function: {
446
- name: 'insert_file',
447
- description: 'Insert text at a specific line (0=prepend, -1=append).',
320
+ name: 'edit_range',
321
+ description: 'Replace a line range in a file.',
448
322
  parameters: obj({
449
- path: { type: 'string' },
450
- line: { type: 'integer' },
451
- text: { type: 'string' }
452
- }, ['path', 'line', 'text'])
453
- }
323
+ path: str(),
324
+ start_line: int(1),
325
+ end_line: int(1),
326
+ replacement: str(),
327
+ }, ['path', 'start_line', 'end_line', 'replacement']),
328
+ },
329
+ },
330
+ {
331
+ type: 'function',
332
+ function: {
333
+ name: 'edit_file',
334
+ description: 'Legacy exact replace (requires old_text). Prefer apply_patch/edit_range.',
335
+ parameters: obj({ path: str(), old_text: str(), new_text: str(), replace_all: bool() }, ['path', 'old_text', 'new_text']),
336
+ },
337
+ },
338
+ {
339
+ type: 'function',
340
+ function: {
341
+ name: 'insert_file',
342
+ description: 'Insert text at line (0=prepend, -1=append).',
343
+ parameters: obj({ path: str(), line: int(), text: str() }, ['path', 'line', 'text']),
344
+ },
454
345
  },
346
+ // ────────────────────────────────────────────────────────────────────────────
347
+ // Bounded listings/search (expose existing caps)
348
+ // ────────────────────────────────────────────────────────────────────────────
455
349
  {
456
350
  type: 'function',
457
351
  function: {
458
352
  name: 'list_dir',
459
- description: 'List directory contents (optional recursive, max depth 3).',
460
- parameters: obj({
461
- path: { type: 'string' },
462
- recursive: { type: 'boolean' },
463
- }, ['path'])
464
- }
353
+ description: 'List directory entries.',
354
+ parameters: obj({ path: str(), recursive: bool(), max_entries: int(1, 500) }, ['path']),
355
+ },
465
356
  },
466
357
  {
467
358
  type: 'function',
468
359
  function: {
469
360
  name: 'search_files',
470
- description: 'Search for a regex pattern in files under a directory.',
471
- parameters: obj({
472
- pattern: { type: 'string' },
473
- path: { type: 'string' },
474
- include: { type: 'string' },
475
- }, ['pattern', 'path'])
476
- }
361
+ description: 'Search regex in files.',
362
+ parameters: obj({ pattern: str(), path: str(), include: str(), max_results: int(1, 100) }, ['pattern', 'path']),
363
+ },
477
364
  },
365
+ // ────────────────────────────────────────────────────────────────────────────
366
+ // Exec (minified schema)
367
+ // ────────────────────────────────────────────────────────────────────────────
478
368
  {
479
369
  type: 'function',
480
370
  function: {
481
371
  name: 'exec',
482
- description: 'Run a shell command (bash -c) with timeout; returns JSON rc/out/err. Each call is a new shell — cwd does not persist between calls.',
483
- parameters: obj({
484
- command: { type: 'string', description: 'Shell command to run' },
485
- cwd: { type: 'string', description: 'Working directory (default: project root). Use this instead of cd.' },
486
- timeout: { type: 'integer', description: 'Timeout in seconds (default: 30, max: 120). Use 60-120 for npm install, builds, or test suites.' }
487
- }, ['command'])
488
- }
489
- }
372
+ description: 'Run bash -c; returns JSON rc/out/err.',
373
+ parameters: obj({ command: str(), cwd: str(), timeout: int(1, 120) }, ['command']),
374
+ },
375
+ },
490
376
  ];
491
377
  if (opts?.allowSpawnTask !== false) {
492
378
  schemas.push({
493
379
  type: 'function',
494
380
  function: {
495
381
  name: 'spawn_task',
496
- description: 'Delegate a focused task to an isolated sub-agent session (no parent chat history).',
382
+ description: 'Run a sub-agent task (no parent history).',
497
383
  parameters: obj({
498
- task: { type: 'string', description: 'Instruction for the sub-agent' },
499
- context_files: {
500
- type: 'array',
501
- description: 'Optional extra files to inject into sub-agent context',
502
- items: { type: 'string' },
503
- },
504
- model: { type: 'string', description: 'Optional model override for this task' },
505
- endpoint: { type: 'string', description: 'Optional endpoint override for this task' },
506
- max_iterations: { type: 'integer', description: 'Optional max turn cap for the sub-agent' },
507
- max_tokens: { type: 'integer', description: 'Optional max completion tokens for the sub-agent' },
508
- timeout_sec: { type: 'integer', description: 'Optional timeout for this sub-agent run (seconds)' },
509
- system_prompt: { type: 'string', description: 'Optional sub-agent system prompt override for this task' },
384
+ task: str(),
385
+ context_files: { type: 'array', items: str() },
386
+ model: str(),
387
+ endpoint: str(),
388
+ max_iterations: int(),
389
+ max_tokens: int(),
390
+ timeout_sec: int(),
391
+ system_prompt: str(),
510
392
  approval_mode: { type: 'string', enum: ['plan', 'reject', 'default', 'auto-edit', 'yolo'] },
511
- }, ['task'])
512
- }
393
+ }, ['task']),
394
+ },
513
395
  });
514
396
  }
515
397
  if (opts?.activeVaultTools) {
516
- schemas.push({
517
- type: 'function',
518
- function: {
519
- name: 'vault_search',
520
- description: 'Search vault entries (notes and previous tool outputs) to reuse prior high-signal findings.',
521
- parameters: obj({
522
- query: { type: 'string' },
523
- limit: { type: 'integer' }
524
- }, ['query'])
525
- }
526
- }, {
527
- type: 'function',
528
- function: {
529
- name: 'vault_note',
530
- description: 'Persist a concise, high-signal note into the Trifecta vault.',
531
- parameters: obj({
532
- key: { type: 'string' },
533
- value: { type: 'string' }
534
- }, ['key', 'value'])
535
- }
536
- });
398
+ schemas.push({ type: 'function', function: { name: 'vault_search', description: 'Search vault.', parameters: obj({ query: str(), limit: int() }, ['query']) } }, { type: 'function', function: { name: 'vault_note', description: 'Write vault note.', parameters: obj({ key: str(), value: str() }, ['key', 'value']) } });
537
399
  }
538
400
  // Phase 9: sys_context tool is only available in sys mode.
539
401
  if (opts?.sysMode) {
@@ -544,54 +406,36 @@ function buildToolsSchema(opts) {
544
406
  type: 'function',
545
407
  function: {
546
408
  name: 'lsp_diagnostics',
547
- description: 'Get current LSP diagnostics (errors/warnings) for a file or the whole project. Structured — replaces running build commands to check for errors.',
548
- parameters: obj({
549
- path: { type: 'string', description: 'File path (omit for project-wide diagnostics)' },
550
- severity: { type: 'integer', description: '1=Error, 2=Warning, 3=Info, 4=Hint (default: config threshold)' },
551
- }, [])
409
+ description: 'Get LSP diagnostics (errors/warnings) for file or project.',
410
+ parameters: obj({ path: str(), severity: int() }, [])
552
411
  }
553
412
  }, {
554
413
  type: 'function',
555
414
  function: {
556
415
  name: 'lsp_symbols',
557
- description: 'List all symbols (functions, classes, variables) in a file via LSP.',
558
- parameters: obj({
559
- path: { type: 'string' },
560
- }, ['path'])
416
+ description: 'List symbols (functions, classes, vars) in a file.',
417
+ parameters: obj({ path: str() }, ['path'])
561
418
  }
562
419
  }, {
563
420
  type: 'function',
564
421
  function: {
565
422
  name: 'lsp_hover',
566
- description: 'Get type info and documentation for a symbol at a position.',
567
- parameters: obj({
568
- path: { type: 'string' },
569
- line: { type: 'integer' },
570
- character: { type: 'integer' },
571
- }, ['path', 'line', 'character'])
423
+ description: 'Get type/docs for symbol at position.',
424
+ parameters: obj({ path: str(), line: int(), character: int() }, ['path', 'line', 'character'])
572
425
  }
573
426
  }, {
574
427
  type: 'function',
575
428
  function: {
576
429
  name: 'lsp_definition',
577
- description: 'Go to definition of a symbol at a given position.',
578
- parameters: obj({
579
- path: { type: 'string' },
580
- line: { type: 'integer' },
581
- character: { type: 'integer' },
582
- }, ['path', 'line', 'character'])
430
+ description: 'Go to definition of symbol at position.',
431
+ parameters: obj({ path: str(), line: int(), character: int() }, ['path', 'line', 'character'])
583
432
  }
584
433
  }, {
585
434
  type: 'function',
586
435
  function: {
587
436
  name: 'lsp_references',
588
- description: 'Find all references to a symbol at a given position.',
589
- parameters: obj({
590
- path: { type: 'string' },
591
- line: { type: 'integer' },
592
- character: { type: 'integer' },
593
- max_results: { type: 'integer', description: 'Cap results (default 50)' },
594
- }, ['path', 'line', 'character'])
437
+ description: 'Find all references to symbol at position.',
438
+ parameters: obj({ path: str(), line: int(), character: int(), max_results: int() }, ['path', 'line', 'character'])
595
439
  }
596
440
  });
597
441
  }
@@ -600,203 +444,6 @@ function buildToolsSchema(opts) {
600
444
  }
601
445
  return schemas;
602
446
  }
603
- /** @internal Exported for testing. Parses tool calls from model content when tool_calls array is empty. */
604
- export function parseToolCallsFromContent(content) {
605
- // Fallback parser: if model printed JSON tool_calls in content.
606
- const trimmed = content.trim();
607
- const tryParse = (s) => {
608
- try {
609
- return JSON.parse(s);
610
- }
611
- catch {
612
- return null;
613
- }
614
- };
615
- // Case 1: whole content is JSON
616
- const whole = tryParse(trimmed);
617
- if (whole?.tool_calls && Array.isArray(whole.tool_calls))
618
- return whole.tool_calls;
619
- if (whole?.name && whole?.arguments) {
620
- return [
621
- {
622
- id: 'call_0',
623
- type: 'function',
624
- function: { name: String(whole.name), arguments: JSON.stringify(whole.arguments) }
625
- }
626
- ];
627
- }
628
- // Case 2: raw JSON array of tool calls (model writes [{name, arguments}, ...])
629
- const arrStart = trimmed.indexOf('[');
630
- const arrEnd = trimmed.lastIndexOf(']');
631
- if (arrStart !== -1 && arrEnd !== -1 && arrEnd > arrStart) {
632
- const arrSub = tryParse(trimmed.slice(arrStart, arrEnd + 1));
633
- if (Array.isArray(arrSub) && arrSub.length > 0 && arrSub[0]?.name) {
634
- return arrSub.map((item, i) => ({
635
- id: `call_${i}`,
636
- type: 'function',
637
- function: {
638
- name: String(item.name),
639
- arguments: typeof item.arguments === 'string' ? item.arguments : JSON.stringify(item.arguments ?? {})
640
- }
641
- }));
642
- }
643
- }
644
- // Case 3: find a JSON object substring (handles tool_calls wrapper OR single tool-call)
645
- const start = trimmed.indexOf('{');
646
- const end = trimmed.lastIndexOf('}');
647
- if (start !== -1 && end !== -1 && end > start) {
648
- const sub = tryParse(trimmed.slice(start, end + 1));
649
- if (sub?.tool_calls && Array.isArray(sub.tool_calls))
650
- return sub.tool_calls;
651
- if (sub?.name && sub?.arguments) {
652
- return [
653
- {
654
- id: 'call_0',
655
- type: 'function',
656
- function: { name: String(sub.name), arguments: typeof sub.arguments === 'string' ? sub.arguments : JSON.stringify(sub.arguments) }
657
- }
658
- ];
659
- }
660
- }
661
- // Case 4: XML tool calls — used by Qwen, Hermes, and other models whose chat
662
- // templates emit <tool_call><function=name><parameter=key>value</parameter></function></tool_call>.
663
- // When llama-server's XML→JSON conversion fails (common with large write_file content),
664
- // the raw XML leaks into the content field. This recovers it.
665
- const xmlCalls = parseXmlToolCalls(trimmed);
666
- if (xmlCalls?.length)
667
- return xmlCalls;
668
- // Case 5: Lightweight function-tag calls (seen in some Qwen content-mode outputs):
669
- // <function=tool_name>
670
- // {...json args...}
671
- // </function>
672
- // or single-line <function=tool_name>{...}</function>
673
- const fnTagCalls = parseFunctionTagToolCalls(trimmed);
674
- if (fnTagCalls?.length)
675
- return fnTagCalls;
676
- return null;
677
- }
678
- /**
679
- * Parse XML-style tool calls from content.
680
- * Format: <tool_call><function=name><parameter=key>value</parameter>...</function></tool_call>
681
- * Handles multiple tool call blocks and arbitrary parameter names/values.
682
- */
683
- function parseXmlToolCalls(content) {
684
- // Quick bailout: no point parsing if there's no <tool_call> marker
685
- if (!content.includes('<tool_call>'))
686
- return null;
687
- const calls = [];
688
- // Match each <tool_call>...</tool_call> block.
689
- // Using a manual scan instead of a single greedy regex to handle nested angle brackets
690
- // in parameter values (e.g. TypeScript generics, JSX, comparison operators).
691
- let searchFrom = 0;
692
- while (searchFrom < content.length) {
693
- const blockStart = content.indexOf('<tool_call>', searchFrom);
694
- if (blockStart === -1)
695
- break;
696
- const blockEnd = content.indexOf('</tool_call>', blockStart);
697
- if (blockEnd === -1)
698
- break; // Truncated — can't recover partial tool calls
699
- const block = content.slice(blockStart + '<tool_call>'.length, blockEnd);
700
- searchFrom = blockEnd + '</tool_call>'.length;
701
- // Extract function name: <function=name>...</function>
702
- const fnMatch = block.match(/<function=(\w[\w.-]*)>/);
703
- if (!fnMatch)
704
- continue;
705
- const fnName = fnMatch[1];
706
- const fnStart = block.indexOf(fnMatch[0]) + fnMatch[0].length;
707
- const fnEnd = block.lastIndexOf('</function>');
708
- const fnBody = fnEnd !== -1 ? block.slice(fnStart, fnEnd) : block.slice(fnStart);
709
- // Extract parameters: <parameter=key>value</parameter>
710
- // Uses bracket-matching (depth counting) so that parameter values containing
711
- // literal <parameter=...>...</parameter> (e.g. writing XML files) are handled
712
- // correctly instead of being truncated at the inner close tag.
713
- const args = {};
714
- const openRe = /<parameter=(\w[\w.-]*)>/g;
715
- const closeTag = '</parameter>';
716
- let paramMatch;
717
- while ((paramMatch = openRe.exec(fnBody)) !== null) {
718
- const paramName = paramMatch[1];
719
- const valueStart = paramMatch.index + paramMatch[0].length;
720
- // Bracket-match: find the </parameter> that balances this open tag.
721
- // Depth starts at 1; nested <parameter=...> increments, </parameter> decrements.
722
- let depth = 1;
723
- let scanPos = valueStart;
724
- let closeIdx = -1;
725
- while (scanPos < fnBody.length && depth > 0) {
726
- const nextOpen = fnBody.indexOf('<parameter=', scanPos);
727
- const nextClose = fnBody.indexOf(closeTag, scanPos);
728
- if (nextClose === -1)
729
- break; // No more close tags — truncated
730
- if (nextOpen !== -1 && nextOpen < nextClose) {
731
- // An open tag comes before the next close — increase depth
732
- depth++;
733
- scanPos = nextOpen + 1; // advance past '<' to avoid re-matching
734
- }
735
- else {
736
- // Close tag comes first — decrease depth
737
- depth--;
738
- if (depth === 0) {
739
- closeIdx = nextClose;
740
- }
741
- scanPos = nextClose + closeTag.length;
742
- }
743
- }
744
- if (closeIdx === -1) {
745
- // No matching close tag — take rest of body as value (truncated output)
746
- args[paramName] = fnBody.slice(valueStart).trim();
747
- break;
748
- }
749
- // Trim exactly the template-added leading/trailing newline, preserve internal whitespace
750
- let value = fnBody.slice(valueStart, closeIdx);
751
- if (value.startsWith('\n'))
752
- value = value.slice(1);
753
- if (value.endsWith('\n'))
754
- value = value.slice(0, -1);
755
- args[paramName] = value;
756
- // Advance the regex past the close tag so the next openRe.exec starts after it
757
- openRe.lastIndex = closeIdx + closeTag.length;
758
- }
759
- if (fnName && Object.keys(args).length > 0) {
760
- calls.push({
761
- id: `call_xml_${calls.length}`,
762
- type: 'function',
763
- function: {
764
- name: fnName,
765
- arguments: JSON.stringify(args)
766
- }
767
- });
768
- }
769
- }
770
- return calls.length > 0 ? calls : null;
771
- }
772
- /** Check for missing required params by tool name — universal pre-dispatch validation */
773
- function getMissingRequiredParams(toolName, args) {
774
- const required = {
775
- read_file: ['path'],
776
- read_files: ['requests'],
777
- write_file: ['path', 'content'],
778
- edit_file: ['path', 'old_text', 'new_text'],
779
- insert_file: ['path', 'line', 'text'],
780
- list_dir: ['path'],
781
- search_files: ['pattern', 'path'],
782
- exec: ['command'],
783
- spawn_task: ['task'],
784
- sys_context: [],
785
- vault_search: ['query'],
786
- vault_note: ['key', 'value']
787
- };
788
- const req = required[toolName];
789
- if (!req)
790
- return [];
791
- return req.filter(p => args[p] === undefined || args[p] === null);
792
- }
793
- /** Strip markdown code fences (```json ... ```) from tool argument strings */
794
- function stripMarkdownFences(s) {
795
- const trimmed = s.trim();
796
- // Match ```json\n...\n``` or ```\n...\n```
797
- const m = /^```(?:json)?\s*\n?([\s\S]*?)\n?```\s*$/.exec(trimmed);
798
- return m ? m[1] : s;
799
- }
800
447
  function isReadOnlyTool(name) {
801
448
  return name === 'read_file' || name === 'read_files' || name === 'list_dir' || name === 'search_files' || name === 'vault_search' || name === 'sys_context';
802
449
  }
@@ -805,6 +452,10 @@ function planModeSummary(name, args) {
805
452
  switch (name) {
806
453
  case 'write_file':
807
454
  return `write ${args.path ?? 'unknown'} (${typeof args.content === 'string' ? args.content.split('\n').length : '?'} lines)`;
455
+ case 'apply_patch':
456
+ return `apply patch to ${Array.isArray(args.files) ? args.files.length : '?'} file(s)`;
457
+ case 'edit_range':
458
+ return `edit ${args.path ?? 'unknown'} lines ${args.start_line ?? '?'}-${args.end_line ?? '?'}`;
808
459
  case 'edit_file':
809
460
  return `edit ${args.path ?? 'unknown'} (replace ${typeof args.old_text === 'string' ? args.old_text.split('\n').length : '?'} lines)`;
810
461
  case 'insert_file':
@@ -839,148 +490,6 @@ function userDisallowsDelegation(content) {
839
490
  /\b(?:spawn[_\-\s]?task|sub[\-\s]?agents?|delegate|delegation)\b[^\n.]{0,50}\b(?:do not|don't|dont|not allowed|forbidden|no)\b/.test(text);
840
491
  return negationNearDelegation;
841
492
  }
842
- function reviewArtifactKeys(projectDir) {
843
- const { projectId } = projectIndexKeys(projectDir);
844
- return {
845
- projectId,
846
- latestKey: `artifact:review:latest:${projectId}`,
847
- byIdPrefix: `artifact:review:item:${projectId}:`,
848
- };
849
- }
850
- function looksLikeCodeReviewRequest(text) {
851
- const t = text.toLowerCase();
852
- if (!t.trim())
853
- return false;
854
- if (/^\s*\/review\b/.test(t))
855
- return true;
856
- if (/\b(?:code\s+review|security\s+review|review\s+the\s+(?:code|diff|changes|repo|repository|pr)|audit\s+the\s+code)\b/.test(t))
857
- return true;
858
- return /\breview\b/.test(t) && /\b(?:code|repo|repository|diff|changes|pull\s*request|pr)\b/.test(t);
859
- }
860
- function looksLikeReviewRetrievalRequest(text) {
861
- const t = text.toLowerCase();
862
- if (!t.trim())
863
- return false;
864
- if (/^\s*\/review\s+(?:print|show|replay|latest|last|full)\b/.test(t))
865
- return true;
866
- if (!/\breview\b/.test(t))
867
- return false;
868
- if (/\bprint\s+stale\s+review\s+anyway\b/.test(t))
869
- return true;
870
- if (/\b(?:print|show|display|repeat|paste|send|output|give)\b[^\n.]{0,80}\breview\b[^\n.]{0,40}\b(?:again|back)\b/.test(t))
871
- return true;
872
- if (/\b(?:print|show|display|repeat|paste|send|output|give)\b[^\n.]{0,80}\b(?:full|entire|complete|whole)\b[^\n.]{0,80}\breview\b/.test(t))
873
- return true;
874
- if (/\b(?:full|entire|complete|whole)\b[^\n.]{0,30}\bcode\s+review\b/.test(t) && /\b(?:print|show|display|repeat|paste|send|output|give)\b/.test(t))
875
- return true;
876
- if (/\b(?:print|show|display|repeat|paste|send|output|give)\b[^\n.]{0,80}\b(?:last|latest|previous)\b[^\n.]{0,40}\breview\b/.test(t))
877
- return true;
878
- return false;
879
- }
880
- function retrievalAllowsStaleArtifact(text) {
881
- const t = text.toLowerCase();
882
- if (!t.trim())
883
- return false;
884
- if (/\bprint\s+stale\s+review\s+anyway\b/.test(t))
885
- return true;
886
- if (/\b(?:force|override|ignore)\b[^\n.]{0,80}\b(?:stale|old|previous)\b[^\n.]{0,80}\breview\b/.test(t))
887
- return true;
888
- if (/\b(?:stale|old|previous)\b[^\n.]{0,80}\breview\b[^\n.]{0,80}\b(?:anyway|still|force|override|ignore)\b/.test(t))
889
- return true;
890
- return false;
891
- }
892
- function parseReviewArtifactStalePolicy(raw) {
893
- const v = typeof raw === 'string' ? raw.toLowerCase().trim() : '';
894
- if (v === 'block')
895
- return 'block';
896
- return 'warn';
897
- }
898
- function parseReviewArtifact(raw) {
899
- try {
900
- const parsed = JSON.parse(raw);
901
- if (!parsed || typeof parsed !== 'object')
902
- return null;
903
- if (parsed.kind !== 'code_review')
904
- return null;
905
- if (typeof parsed.id !== 'string' || !parsed.id)
906
- return null;
907
- if (typeof parsed.createdAt !== 'string' || !parsed.createdAt)
908
- return null;
909
- if (typeof parsed.model !== 'string')
910
- return null;
911
- if (typeof parsed.projectId !== 'string' || !parsed.projectId)
912
- return null;
913
- if (typeof parsed.projectDir !== 'string' || !parsed.projectDir)
914
- return null;
915
- if (typeof parsed.prompt !== 'string')
916
- return null;
917
- if (typeof parsed.content !== 'string')
918
- return null;
919
- return parsed;
920
- }
921
- catch {
922
- return null;
923
- }
924
- }
925
- function gitHead(cwd) {
926
- const inside = spawnSync(BASH, ['-lc', 'git rev-parse --is-inside-work-tree'], {
927
- cwd,
928
- encoding: 'utf8',
929
- timeout: 1000,
930
- });
931
- if (inside.status !== 0 || !String(inside.stdout || '').trim().startsWith('true'))
932
- return undefined;
933
- const head = spawnSync(BASH, ['-lc', 'git rev-parse HEAD'], {
934
- cwd,
935
- encoding: 'utf8',
936
- timeout: 1000,
937
- });
938
- if (head.status !== 0)
939
- return undefined;
940
- const sha = String(head.stdout || '').trim();
941
- return sha || undefined;
942
- }
943
- function shortSha(sha) {
944
- if (!sha)
945
- return 'unknown';
946
- return sha.slice(0, 8);
947
- }
948
- function reviewArtifactStaleReason(artifact, cwd) {
949
- const currentHead = gitHead(cwd);
950
- const currentDirty = isGitDirty(cwd);
951
- if (artifact.gitHead && currentHead && artifact.gitHead !== currentHead) {
952
- return `Stored review was generated at commit ${shortSha(artifact.gitHead)}; repository is now at ${shortSha(currentHead)}.`;
953
- }
954
- if (artifact.gitDirty === false && currentDirty) {
955
- return 'Stored review was generated on a clean tree; working tree now has uncommitted changes.';
956
- }
957
- return '';
958
- }
959
- function normalizeModelsResponse(raw) {
960
- if (Array.isArray(raw)) {
961
- return {
962
- data: raw
963
- .map((m) => {
964
- if (!m)
965
- return null;
966
- if (typeof m === 'string')
967
- return { id: m };
968
- if (typeof m.id === 'string' && m.id)
969
- return m;
970
- return null;
971
- })
972
- .filter(Boolean)
973
- };
974
- }
975
- if (raw && Array.isArray(raw.data)) {
976
- return {
977
- data: raw.data
978
- .map((m) => (m && typeof m.id === 'string' && m.id ? m : null))
979
- .filter(Boolean)
980
- };
981
- }
982
- return { data: [] };
983
- }
984
493
  export async function createSession(opts) {
985
494
  const cfg = opts.config;
986
495
  let client = opts.runtime?.client ?? new OpenAIClient(cfg.endpoint, opts.apiKey, cfg.verbose);
@@ -1014,7 +523,7 @@ export async function createSession(opts) {
1014
523
  modelMeta,
1015
524
  });
1016
525
  let supportsVision = supportsVisionModel(model, modelMeta, harness);
1017
- const sessionId = `session-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`;
526
+ const sessionId = `session-${timestampedId()}`;
1018
527
  const hookCfg = cfg.hooks ?? {};
1019
528
  const hookManager = opts.runtime?.hookManager ?? new HookManager({
1020
529
  enabled: hookCfg.enabled !== false,
@@ -1082,7 +591,7 @@ export async function createSession(opts) {
1082
591
  ? Number(cfg.mcp_call_timeout_sec)
1083
592
  : (Number.isFinite(cfg.mcp?.call_timeout_sec) ? Number(cfg.mcp?.call_timeout_sec) : 30);
1084
593
  const builtInToolNames = [
1085
- 'read_file', 'read_files', 'write_file', 'edit_file', 'insert_file',
594
+ 'read_file', 'read_files', 'write_file', 'apply_patch', 'edit_range', 'edit_file', 'insert_file',
1086
595
  'list_dir', 'search_files', 'exec', 'vault_search', 'vault_note', 'sys_context',
1087
596
  ...(spawnTaskEnabled ? ['spawn_task'] : []),
1088
597
  ];
@@ -2058,11 +1567,25 @@ export async function createSession(opts) {
2058
1567
  const hookObj = typeof hooks === 'function' ? { onToken: hooks } : hooks ?? {};
2059
1568
  let turns = 0;
2060
1569
  let toolCalls = 0;
2061
- const askId = `ask-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
1570
+ const askId = `ask-${timestampedId()}`;
2062
1571
  const emitToolCall = async (call) => {
2063
1572
  hookObj.onToolCall?.(call);
2064
1573
  await hookManager.emit('tool_call', { askId, turn: turns, call });
2065
1574
  };
1575
+ const emitToolStream = (stream) => {
1576
+ try {
1577
+ void hookObj.onToolStream?.(stream);
1578
+ }
1579
+ catch {
1580
+ // best effort
1581
+ }
1582
+ try {
1583
+ void hookManager.emit('tool_stream', { askId, turn: turns, stream });
1584
+ }
1585
+ catch {
1586
+ // best effort
1587
+ }
1588
+ };
2066
1589
  const emitToolResult = async (result) => {
2067
1590
  await hookObj.onToolResult?.(result);
2068
1591
  await hookManager.emit('tool_result', { askId, turn: turns, result });
@@ -2072,8 +1595,9 @@ export async function createSession(opts) {
2072
1595
  await hookManager.emit('turn_end', { askId, stats });
2073
1596
  };
2074
1597
  const finalizeAsk = async (text) => {
2075
- await hookManager.emit('ask_end', { askId, text, turns, toolCalls });
2076
- return { text, turns, toolCalls };
1598
+ const finalText = ensureInformativeAssistantText(text, { toolCalls, turns });
1599
+ await hookManager.emit('ask_end', { askId, text: finalText, turns, toolCalls });
1600
+ return { text: finalText, turns, toolCalls };
2077
1601
  };
2078
1602
  const rawInstructionText = userContentToText(instruction).trim();
2079
1603
  await hookManager.emit('ask_start', { askId, instruction: rawInstructionText });
@@ -2142,7 +1666,7 @@ export async function createSession(opts) {
2142
1666
  if (!clean)
2143
1667
  return;
2144
1668
  const createdAt = new Date().toISOString();
2145
- const id = `review-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
1669
+ const id = `review-${timestampedId()}`;
2146
1670
  const artifact = {
2147
1671
  id,
2148
1672
  kind: 'code_review',
@@ -2178,6 +1702,7 @@ export async function createSession(opts) {
2178
1702
  // identical tool call signature counts across this ask() run
2179
1703
  const sigCounts = new Map();
2180
1704
  const toolNameByCallId = new Map();
1705
+ const toolArgsByCallId = new Map();
2181
1706
  // Loop-break helper state: bump mutationVersion whenever a tool mutates files.
2182
1707
  // We also record the mutationVersion at which a given signature was last seen.
2183
1708
  let mutationVersion = 0;
@@ -2187,6 +1712,8 @@ export async function createSession(opts) {
2187
1712
  let lastTurnSigs = new Set();
2188
1713
  const consecutiveCounts = new Map();
2189
1714
  let malformedCount = 0;
1715
+ let toolRepairAttempts = 0;
1716
+ const MAX_TOOL_REPAIR_ATTEMPTS = 1;
2190
1717
  let noProgressTurns = 0;
2191
1718
  const NO_PROGRESS_TURN_CAP = 3;
2192
1719
  let noToolTurns = 0;
@@ -2219,6 +1746,42 @@ export async function createSession(opts) {
2219
1746
  }
2220
1747
  return msg;
2221
1748
  };
1749
+ const compactToolMessageForHistory = async (toolCallId, rawContent) => {
1750
+ const toolName = toolNameByCallId.get(toolCallId) ?? 'tool';
1751
+ const toolArgs = toolArgsByCallId.get(toolCallId) ?? {};
1752
+ const rawMsg = { role: 'tool', tool_call_id: toolCallId, content: rawContent };
1753
+ // Persist full-fidelity output immediately so live context can stay small.
1754
+ if (vault && typeof vault.archiveToolResult === 'function') {
1755
+ try {
1756
+ await vault.archiveToolResult(rawMsg, toolName);
1757
+ }
1758
+ catch (e) {
1759
+ console.warn(`[warn] vault archive failed: ${e instanceof Error ? e.message : String(e)}`);
1760
+ }
1761
+ }
1762
+ let compact = rawContent;
1763
+ if (lens) {
1764
+ try {
1765
+ const lensCompact = await lens.summarizeToolOutput(rawContent, toolName, typeof toolArgs.path === 'string' ? String(toolArgs.path) : undefined);
1766
+ if (typeof lensCompact === 'string' && lensCompact.length && lensCompact.length < compact.length) {
1767
+ compact = lensCompact;
1768
+ }
1769
+ }
1770
+ catch {
1771
+ // ignore lens failures; fallback to raw
1772
+ }
1773
+ }
1774
+ const success = !String(rawContent).startsWith('ERROR:');
1775
+ const digested = digestToolResult(toolName, { ...toolArgs, _tool_call_id: toolCallId }, compact, success);
1776
+ if (digested !== rawContent) {
1777
+ return {
1778
+ role: 'tool',
1779
+ tool_call_id: toolCallId,
1780
+ content: `${digested}\n[full output archived in vault: tool=${toolName}, call_id=${toolCallId}]`,
1781
+ };
1782
+ }
1783
+ return rawMsg;
1784
+ };
2222
1785
  const persistFailure = async (error, contextLine) => {
2223
1786
  if (!vault)
2224
1787
  return;
@@ -2280,7 +1843,6 @@ export async function createSession(opts) {
2280
1843
  }
2281
1844
  await maybeAutoDetectModelChange();
2282
1845
  const beforeMsgs = messages;
2283
- const beforeTokens = estimateTokensFromMessages(beforeMsgs);
2284
1846
  const compacted = enforceContextBudget({
2285
1847
  messages: beforeMsgs,
2286
1848
  contextWindow,
@@ -2289,7 +1851,6 @@ export async function createSession(opts) {
2289
1851
  compactAt: cfg.compact_at ?? 0.8,
2290
1852
  toolSchemaTokens: estimateToolSchemaTokens(getToolsSchema()),
2291
1853
  });
2292
- const compactedDropped = beforeMsgs.length > compacted.length || estimateTokensFromMessages(compacted) < beforeTokens;
2293
1854
  const compactedByRefs = new Set(compacted);
2294
1855
  const dropped = beforeMsgs.filter((m) => !compactedByRefs.has(m));
2295
1856
  if (dropped.length && vault) {
@@ -2532,7 +2093,11 @@ export async function createSession(opts) {
2532
2093
  if (visible && hookObj.onToken)
2533
2094
  hookObj.onToken('\n');
2534
2095
  toolCalls += toolCallsArr.length;
2535
- messages.push({ role: 'assistant', content: visible || '', tool_calls: toolCallsArr });
2096
+ const assistantToolCallText = visible || '';
2097
+ const compactAssistantToolCallText = assistantToolCallText.length > 900
2098
+ ? `${assistantToolCallText.slice(0, 900)}\n[history-compacted: assistant narration truncated before tool execution]`
2099
+ : assistantToolCallText;
2100
+ messages.push({ role: 'assistant', content: compactAssistantToolCallText, tool_calls: toolCallsArr });
2536
2101
  // sigCounts is scoped to the entire ask() run (see above)
2537
2102
  // Bridge ConfirmationProvider → legacy confirm callback for tools.
2538
2103
  // If a ConfirmationProvider is given, wrap it; otherwise fall back to raw callback.
@@ -2655,7 +2220,7 @@ export async function createSession(opts) {
2655
2220
  `Hint: you repeated the same tool call ${loopThreshold} times with identical arguments. ` +
2656
2221
  `If the call succeeded, move on to the next step. ` +
2657
2222
  `If it failed, check that all required parameters are present and correct. ` +
2658
- `For write_file/edit_file, ensure 'content'/'old_text'/'new_text' are included as strings.`);
2223
+ `For write_file/edit_file/apply_patch/edit_range, ensure required args are present (content/old_text/new_text/patch/files/start_line/end_line/replacement).`);
2659
2224
  }
2660
2225
  }
2661
2226
  // Update consecutive tracking: save this turn's signatures for next turn comparison.
@@ -2676,7 +2241,10 @@ export async function createSession(opts) {
2676
2241
  // Break the outer loop — this model won't self-correct
2677
2242
  throw new AgentLoopBreak(`tool ${name}: malformed JSON exceeded retry limit (${harness.toolCalls.retryOnMalformed}): ${rawArgs.slice(0, 200)}`);
2678
2243
  }
2679
- throw new Error(`tool ${name}: arguments not valid JSON: ${rawArgs.slice(0, 200)}`);
2244
+ throw new ToolError('invalid_args', `tool ${name}: arguments not valid JSON`, false, 'Return a valid JSON object for function.arguments.', { raw: rawArgs.slice(0, 200) });
2245
+ }
2246
+ if (args == null || typeof args !== 'object' || Array.isArray(args)) {
2247
+ throw new ValidationError([{ field: 'arguments', message: 'must be a JSON object', value: args }]);
2680
2248
  }
2681
2249
  const builtInFn = tools[name];
2682
2250
  const isLspTool = LSP_TOOL_NAME_SET.has(name);
@@ -2684,13 +2252,20 @@ export async function createSession(opts) {
2684
2252
  const hasMcpTool = mcpManager?.hasTool(name) === true;
2685
2253
  if (!builtInFn && !isLspTool && !hasMcpTool && !isSpawnTask)
2686
2254
  throw new Error(`unknown tool: ${name}`);
2687
- // Pre-dispatch check for missing required params.
2688
- // Universal: catches omitted params early with a clear, instructive error
2689
- // before the tool itself throws a less helpful message.
2255
+ // Keep parsed args by call-id so we can digest/archive tool outputs with context.
2256
+ toolArgsByCallId.set(callId, args && typeof args === 'object' && !Array.isArray(args) ? args : {});
2257
+ // Pre-dispatch argument validation.
2258
+ // - Required params
2259
+ // - Type/range/enums
2260
+ // - Unknown properties
2690
2261
  if (builtInFn || isSpawnTask) {
2691
2262
  const missing = getMissingRequiredParams(name, args);
2692
2263
  if (missing.length) {
2693
- throw new Error(`REQUIRED parameter(s) ${missing.map(p => `'${p}'`).join(', ')} missing. You MUST include ${missing.join(', ')} in every ${name} call.`);
2264
+ throw new ValidationError(missing.map((m) => ({ field: m, message: 'required parameter is missing', value: undefined })));
2265
+ }
2266
+ const argIssues = getArgValidationIssues(name, args);
2267
+ if (argIssues.length) {
2268
+ throw new ValidationError(argIssues.map((i) => ({ field: i.field, message: i.message, value: i.value })));
2694
2269
  }
2695
2270
  }
2696
2271
  // ── Pre-dispatch safety screening (Phase 9) ──
@@ -2798,7 +2373,13 @@ export async function createSession(opts) {
2798
2373
  content = await runSpawnTask(args);
2799
2374
  }
2800
2375
  else if (builtInFn) {
2801
- const value = await builtInFn(ctx, args);
2376
+ const callCtx = {
2377
+ ...ctx,
2378
+ toolCallId: callId,
2379
+ toolName: name,
2380
+ onToolStream: emitToolStream,
2381
+ };
2382
+ const value = await builtInFn(callCtx, args);
2802
2383
  content = typeof value === 'string' ? value : JSON.stringify(value);
2803
2384
  if (name === 'exec') {
2804
2385
  // Successful exec clears blocked-loop counters.
@@ -2862,7 +2443,8 @@ export async function createSession(opts) {
2862
2443
  }
2863
2444
  }
2864
2445
  // Hook: onToolResult (Phase 8.5 + Phase 7 rich display)
2865
- const summary = reusedCachedReadOnlyExec
2446
+ let toolSuccess = true;
2447
+ let summary = reusedCachedReadOnlyExec
2866
2448
  ? 'cached read-only exec observation (unchanged)'
2867
2449
  : toolResultSummary(name, args, content, true);
2868
2450
  const resultEvent = { id: callId, name, success: true, summary, result: content };
@@ -2872,6 +2454,14 @@ export async function createSession(opts) {
2872
2454
  const parsed = JSON.parse(content);
2873
2455
  if (parsed.out)
2874
2456
  resultEvent.execOutput = parsed.out;
2457
+ const rc = Number(parsed?.rc ?? NaN);
2458
+ if (Number.isFinite(rc)) {
2459
+ resultEvent.execRc = rc;
2460
+ const cmd = String(args?.command ?? '');
2461
+ if (execRcShouldSignalFailure(cmd) && rc !== 0) {
2462
+ toolSuccess = false;
2463
+ }
2464
+ }
2875
2465
  }
2876
2466
  catch { }
2877
2467
  }
@@ -2896,6 +2486,10 @@ export async function createSession(opts) {
2896
2486
  }
2897
2487
  catch { }
2898
2488
  }
2489
+ resultEvent.success = toolSuccess;
2490
+ if (!toolSuccess && name === 'exec' && typeof resultEvent.execRc === 'number') {
2491
+ resultEvent.summary = `rc=${resultEvent.execRc} (command failed)`;
2492
+ }
2899
2493
  await emitToolResult(resultEvent);
2900
2494
  // Proactive LSP diagnostics after file mutations
2901
2495
  if (lspManager?.hasServers() && lspCfg?.proactive_diagnostics !== false) {
@@ -2923,11 +2517,19 @@ export async function createSession(opts) {
2923
2517
  return { id: callId, content };
2924
2518
  };
2925
2519
  const results = [];
2520
+ let invalidArgsThisTurn = false;
2926
2521
  // Helper: catch tool errors but re-throw AgentLoopBreak (those must break the outer loop)
2927
2522
  const catchToolError = async (e, tc) => {
2928
2523
  if (e instanceof AgentLoopBreak)
2929
2524
  throw e;
2930
- const msg = e?.message ?? String(e);
2525
+ const te = e instanceof ToolError || e instanceof ValidationError
2526
+ ? e
2527
+ : ToolError.fromError(e, 'internal');
2528
+ if (te.code === 'invalid_args' || te.code === 'validation') {
2529
+ invalidArgsThisTurn = true;
2530
+ }
2531
+ const msg = te.message ?? String(e ?? 'unknown error');
2532
+ const toolErrorContent = te instanceof ValidationError ? te.toToolResult() : te.toToolResult();
2931
2533
  // Fast-fail repeated blocked command loops with accurate reason labeling.
2932
2534
  // Applies to direct exec attempts and spawn_task delegation attempts.
2933
2535
  if (tc.function.name === 'exec' || tc.function.name === 'spawn_task') {
@@ -2956,11 +2558,17 @@ export async function createSession(opts) {
2956
2558
  }
2957
2559
  }
2958
2560
  }
2959
- // Hook: onToolResult for errors (Phase 8.5)
2960
2561
  const callId = resolveCallId(tc);
2961
- await emitToolResult({ id: callId, name: tc.function.name, success: false, summary: msg || 'unknown error', result: `ERROR: ${msg || 'unknown error'}` });
2962
- // Never return undefined error text; it makes bench failures impossible to debug.
2963
- return { id: callId, content: `ERROR: ${msg || 'unknown tool error'}` };
2562
+ await emitToolResult({
2563
+ id: callId,
2564
+ name: tc.function.name,
2565
+ success: false,
2566
+ summary: `${te.code}: ${msg}`.slice(0, 240),
2567
+ errorCode: te.code,
2568
+ retryable: te.retryable,
2569
+ result: toolErrorContent,
2570
+ });
2571
+ return { id: callId, content: toolErrorContent };
2964
2572
  };
2965
2573
  // ── Anti-scan guardrails (§ read budget, dir scan, same-search) ──
2966
2574
  const readOnlyInTurn = toolCallsArr.filter((tc) => isReadOnlyToolDynamic(tc.function.name));
@@ -2998,6 +2606,10 @@ export async function createSession(opts) {
2998
2606
  }
2999
2607
  catch (e) {
3000
2608
  results.push(await catchToolError(e, tc));
2609
+ if (FILE_MUTATION_TOOL_SET.has(tc.function.name)) {
2610
+ // Fail-fast: after mutating tool failure, stop the remaining batch.
2611
+ break;
2612
+ }
3001
2613
  }
3002
2614
  }
3003
2615
  }
@@ -3012,6 +2624,10 @@ export async function createSession(opts) {
3012
2624
  }
3013
2625
  catch (e) {
3014
2626
  results.push(await catchToolError(e, tc));
2627
+ if (FILE_MUTATION_TOOL_SET.has(tc.function.name)) {
2628
+ // Fail-fast: after mutating tool failure, stop the remaining batch.
2629
+ break;
2630
+ }
3015
2631
  }
3016
2632
  }
3017
2633
  }
@@ -3019,7 +2635,8 @@ export async function createSession(opts) {
3019
2635
  if (ac.signal.aborted)
3020
2636
  break;
3021
2637
  for (const r of results) {
3022
- messages.push({ role: 'tool', tool_call_id: r.id, content: r.content });
2638
+ const compactToolMsg = await compactToolMessageForHistory(r.id, r.content);
2639
+ messages.push(compactToolMsg);
3023
2640
  }
3024
2641
  if (readOnlyExecTurnHints.length) {
3025
2642
  const previews = readOnlyExecTurnHints
@@ -3052,6 +2669,15 @@ export async function createSession(opts) {
3052
2669
  content: `[System] ⚠ Read budget: ${cumulativeReadOnlyCalls}/${READ_BUDGET_HARD}. ${remaining} reads remaining before hard stop. Use search_files or exec grep — do NOT continue reading files one at a time.`,
3053
2670
  });
3054
2671
  }
2672
+ // One bounded automatic repair attempt for invalid tool args.
2673
+ if (invalidArgsThisTurn && toolRepairAttempts < MAX_TOOL_REPAIR_ATTEMPTS) {
2674
+ toolRepairAttempts++;
2675
+ messages.push({
2676
+ role: 'user',
2677
+ content: '[system] Your previous tool call failed argument validation. Re-issue a corrected tool_calls array only. ' +
2678
+ 'Do not narrate. Fix required/mistyped fields and unknown keys.',
2679
+ });
2680
+ }
3055
2681
  // Hook: onTurnEnd (Phase 8.5)
3056
2682
  await emitTurnEnd({
3057
2683
  turn: turns,
@@ -3131,9 +2757,10 @@ export async function createSession(opts) {
3131
2757
  continue;
3132
2758
  }
3133
2759
  noToolTurns = 0;
2760
+ const assistantOutput = ensureInformativeAssistantText(assistantText, { toolCalls, turns });
3134
2761
  // final assistant message
3135
- messages.push({ role: 'assistant', content: assistantText });
3136
- await persistReviewArtifact(assistantText).catch(() => { });
2762
+ messages.push({ role: 'assistant', content: assistantOutput });
2763
+ await persistReviewArtifact(assistantOutput).catch(() => { });
3137
2764
  await emitTurnEnd({
3138
2765
  turn: turns,
3139
2766
  toolCalls,
@@ -3146,7 +2773,7 @@ export async function createSession(opts) {
3146
2773
  ppTps,
3147
2774
  tgTps,
3148
2775
  });
3149
- return await finalizeAsk(assistantText);
2776
+ return await finalizeAsk(assistantOutput);
3150
2777
  }
3151
2778
  const reason = `max iterations exceeded (${maxIters})`;
3152
2779
  const diag = lastSuccessfulTestRun
@@ -3293,30 +2920,4 @@ async function autoPickModel(client, cached) {
3293
2920
  clearTimeout(timer);
3294
2921
  }
3295
2922
  }
3296
- function parseFunctionTagToolCalls(content) {
3297
- const m = content.match(/<function=([\w.-]+)>([\s\S]*?)<\/function>/i);
3298
- if (!m)
3299
- return null;
3300
- const name = m[1];
3301
- const body = (m[2] ?? '').trim();
3302
- // If body contains JSON object, use it as arguments; else empty object.
3303
- let args = '{}';
3304
- const jsonStart = body.indexOf('{');
3305
- const jsonEnd = body.lastIndexOf('}');
3306
- if (jsonStart !== -1 && jsonEnd > jsonStart) {
3307
- const sub = body.slice(jsonStart, jsonEnd + 1);
3308
- try {
3309
- JSON.parse(sub);
3310
- args = sub;
3311
- }
3312
- catch {
3313
- // keep {}
3314
- }
3315
- }
3316
- return [{
3317
- id: 'call_0',
3318
- type: 'function',
3319
- function: { name, arguments: args }
3320
- }];
3321
- }
3322
2923
  //# sourceMappingURL=agent.js.map