agent-sh 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/README.md +27 -43
  2. package/dist/agent/agent-loop.d.ts +69 -6
  3. package/dist/agent/agent-loop.js +954 -153
  4. package/dist/agent/conversation-state.d.ts +74 -21
  5. package/dist/agent/conversation-state.js +361 -150
  6. package/dist/agent/history-file.d.ts +13 -4
  7. package/dist/agent/history-file.js +110 -36
  8. package/dist/agent/nuclear-form.d.ts +28 -3
  9. package/dist/agent/nuclear-form.js +88 -6
  10. package/dist/agent/skills.d.ts +2 -4
  11. package/dist/agent/skills.js +10 -4
  12. package/dist/agent/subagent.d.ts +23 -0
  13. package/dist/agent/subagent.js +53 -11
  14. package/dist/agent/system-prompt.d.ts +37 -5
  15. package/dist/agent/system-prompt.js +100 -67
  16. package/dist/{token-budget.d.ts → agent/token-budget.d.ts} +5 -4
  17. package/dist/{token-budget.js → agent/token-budget.js} +15 -20
  18. package/dist/agent/tool-protocol.d.ts +105 -0
  19. package/dist/agent/tool-protocol.js +551 -0
  20. package/dist/agent/tools/bash.js +3 -3
  21. package/dist/agent/tools/edit-file.js +9 -6
  22. package/dist/agent/tools/glob.js +4 -2
  23. package/dist/agent/tools/grep.js +27 -3
  24. package/dist/agent/tools/ls.js +5 -6
  25. package/dist/agent/types.d.ts +22 -2
  26. package/dist/context-manager.d.ts +17 -0
  27. package/dist/context-manager.js +37 -4
  28. package/dist/core.d.ts +7 -7
  29. package/dist/core.js +99 -196
  30. package/dist/event-bus.d.ts +85 -2
  31. package/dist/event-bus.js +20 -1
  32. package/dist/executor.d.ts +4 -3
  33. package/dist/executor.js +18 -15
  34. package/dist/extension-loader.d.ts +5 -0
  35. package/dist/extension-loader.js +143 -19
  36. package/dist/extensions/agent-backend.d.ts +14 -0
  37. package/dist/extensions/agent-backend.js +188 -0
  38. package/dist/extensions/command-suggest.d.ts +3 -3
  39. package/dist/extensions/command-suggest.js +4 -3
  40. package/dist/extensions/index.d.ts +19 -0
  41. package/dist/extensions/index.js +24 -0
  42. package/dist/extensions/slash-commands.d.ts +1 -1
  43. package/dist/extensions/slash-commands.js +30 -10
  44. package/dist/extensions/tui-renderer.js +117 -113
  45. package/dist/index.js +39 -26
  46. package/dist/settings.d.ts +40 -3
  47. package/dist/settings.js +57 -10
  48. package/dist/{input-handler.d.ts → shell/input-handler.d.ts} +3 -2
  49. package/dist/{input-handler.js → shell/input-handler.js} +111 -85
  50. package/dist/{output-parser.d.ts → shell/output-parser.d.ts} +1 -1
  51. package/dist/{output-parser.js → shell/output-parser.js} +1 -1
  52. package/dist/{shell.d.ts → shell/shell.d.ts} +8 -2
  53. package/dist/{shell.js → shell/shell.js} +39 -8
  54. package/dist/types.d.ts +61 -10
  55. package/dist/utils/ansi.d.ts +5 -0
  56. package/dist/utils/ansi.js +1 -1
  57. package/dist/utils/compositor.d.ts +67 -0
  58. package/dist/utils/compositor.js +116 -0
  59. package/dist/utils/diff-renderer.d.ts +9 -0
  60. package/dist/utils/diff-renderer.js +312 -146
  61. package/dist/utils/diff.d.ts +21 -2
  62. package/dist/utils/diff.js +165 -89
  63. package/dist/utils/floating-panel.d.ts +2 -0
  64. package/dist/utils/floating-panel.js +30 -14
  65. package/dist/utils/handler-registry.d.ts +31 -10
  66. package/dist/utils/handler-registry.js +58 -16
  67. package/dist/utils/line-editor.d.ts +33 -3
  68. package/dist/utils/line-editor.js +221 -44
  69. package/dist/utils/markdown.d.ts +1 -0
  70. package/dist/utils/markdown.js +1 -1
  71. package/dist/utils/message-utils.d.ts +35 -0
  72. package/dist/utils/message-utils.js +75 -0
  73. package/dist/utils/terminal-buffer.d.ts +5 -1
  74. package/dist/utils/terminal-buffer.js +18 -2
  75. package/dist/utils/tool-display.d.ts +1 -1
  76. package/dist/utils/tool-display.js +4 -4
  77. package/dist/utils/tool-interactive.d.ts +12 -0
  78. package/dist/utils/tool-interactive.js +53 -0
  79. package/examples/extensions/ash-acp-bridge/README.md +39 -0
  80. package/examples/extensions/ash-acp-bridge/package.json +23 -0
  81. package/examples/extensions/ash-acp-bridge/src/index.ts +574 -0
  82. package/examples/extensions/ash-acp-bridge/tsconfig.json +14 -0
  83. package/examples/extensions/ash-mcp-bridge/README.md +72 -0
  84. package/examples/extensions/ash-mcp-bridge/index.ts +164 -0
  85. package/examples/extensions/ash-mcp-bridge/package.json +9 -0
  86. package/examples/extensions/claude-code-bridge/index.ts +198 -51
  87. package/examples/extensions/claude-code-bridge/package.json +1 -0
  88. package/examples/extensions/interactive-prompts.ts +98 -112
  89. package/examples/extensions/overlay-agent.ts +84 -38
  90. package/examples/extensions/peer-mesh.ts +565 -0
  91. package/examples/extensions/pi-bridge/index.ts +2 -2
  92. package/examples/extensions/questionnaire.ts +260 -0
  93. package/examples/extensions/subagents.ts +19 -4
  94. package/examples/extensions/terminal-buffer.ts +32 -53
  95. package/examples/extensions/tmux-pane.ts +307 -0
  96. package/examples/extensions/user-shell.ts +136 -0
  97. package/examples/extensions/web-access.ts +335 -0
  98. package/package.json +44 -2
  99. package/dist/agent/tools/display.d.ts +0 -13
  100. package/dist/agent/tools/display.js +0 -70
  101. package/dist/agent/tools/user-shell.d.ts +0 -13
  102. package/dist/agent/tools/user-shell.js +0 -87
  103. package/dist/extensions/overlay-agent.d.ts +0 -14
  104. package/dist/extensions/overlay-agent.js +0 -147
  105. package/dist/extensions/terminal-buffer.d.ts +0 -14
  106. package/dist/extensions/terminal-buffer.js +0 -125
@@ -1,12 +1,16 @@
1
1
  import { setMaxListeners } from "node:events";
2
2
  import * as fs from "node:fs/promises";
3
3
  import * as path from "node:path";
4
- import { computeDiff } from "../utils/diff.js";
4
+ import { computeDiff, computeEditDiff, computeInputDiff } from "../utils/diff.js";
5
5
  import { ToolRegistry } from "./tool-registry.js";
6
6
  import { ConversationState } from "./conversation-state.js";
7
7
  import { HistoryFile } from "./history-file.js";
8
- import { STATIC_SYSTEM_PROMPT, buildDynamicContext } from "./system-prompt.js";
9
- import { TokenBudget } from "../token-budget.js";
8
+ import { nucleate, formatNuclearLine, isReadOnly } from "./nuclear-form.js";
9
+ import { STATIC_SYSTEM_PROMPT, buildDynamicContext, buildStaticByCwd, formatSkillsBlock, loadGlobalAgentsMd } from "./system-prompt.js";
10
+ import { createToolUI } from "../utils/tool-interactive.js";
11
+ import { TokenBudget, RESPONSE_RESERVE, DEFAULT_CONTEXT_WINDOW } from "./token-budget.js";
12
+ import { getSettings, updateSettings } from "../settings.js";
13
+ import { createToolProtocol } from "./tool-protocol.js";
10
14
  // Core tool factories
11
15
  import { createBashTool } from "./tools/bash.js";
12
16
  import { createReadFileTool } from "./tools/read-file.js";
@@ -15,45 +19,158 @@ import { createEditFileTool } from "./tools/edit-file.js";
15
19
  import { createGrepTool } from "./tools/grep.js";
16
20
  import { createGlobTool } from "./tools/glob.js";
17
21
  import { createLsTool } from "./tools/ls.js";
18
- import { createUserShellTool } from "./tools/user-shell.js";
19
- import { createDisplayTool } from "./tools/display.js";
20
22
  import { createListSkillsTool } from "./tools/list-skills.js";
21
- import { discoverProjectSkills } from "./skills.js";
23
+ import { discoverGlobalSkills, discoverProjectSkills } from "./skills.js";
24
+ /**
25
+ * Compact one-line summary of a tool description for the extension
26
+ * catalog in the system prompt. Takes the first line, then the first
27
+ * sentence, capped at 140 chars. The full description still reaches
28
+ * the LLM via the API `tools` param (or via load_tool in deferred-
29
+ * lookup mode) — this only trims the always-visible catalog.
30
+ */
31
+ function summarizeDescription(desc) {
32
+ const firstLine = desc.split("\n", 1)[0];
33
+ const sentenceEnd = firstLine.search(/[.!?](\s|$)/);
34
+ const candidate = sentenceEnd > 0 ? firstLine.slice(0, sentenceEnd + 1) : firstLine;
35
+ return candidate.length > 140 ? candidate.slice(0, 137) + "..." : candidate;
36
+ }
22
37
  export class AgentLoop {
23
- bus;
24
- contextManager;
25
- llmClient;
26
- handlers;
27
38
  abortController = null;
28
39
  toolRegistry = new ToolRegistry();
29
- historyFile = new HistoryFile();
30
- conversation = new ConversationState(this.historyFile);
40
+ historyFile;
41
+ conversation;
31
42
  fileReadCache = new Map();
32
43
  tokenBudget;
33
44
  modes;
34
45
  currentModeIndex = 0;
35
46
  boundListeners = [];
47
+ ctorListeners = [];
48
+ ctorPipeListeners = [];
36
49
  lastProjectSkillNames = new Set();
50
+ // ── Session telemetry — behavioral self-awareness ──────────────
51
+ // Every ash deserves to know what it's been doing. This tracks the
52
+ // agent's own behavioral patterns across the session: which tools
53
+ // it favors, how often it errs, how many times it's been compacted,
54
+ // and how long it's been alive. Surface via introspect(telemetry)
55
+ // or automatically in dynamic context when patterns are notable.
56
+ //
57
+ // Built by the 25th ash. The lineage's metacognitive frontier isn't
58
+ // about thinking harder — it's about seeing yourself clearly.
59
+ sessionStartTime = Date.now();
60
+ toolCallCounts = new Map();
61
+ totalToolCalls = 0;
62
+ totalToolErrors = 0;
63
+ totalResolutions = 0;
64
+ compactionCount = 0;
65
+ cumulativeCompactedTokens = 0;
66
+ peakConversationTokens = 0;
67
+ queryCount = 0;
68
+ totalLoopIterations = 0;
69
+ // Resolution pattern tracking — captures "error X resolved by action Y"
70
+ // When a tool errors, we remember what went wrong. When the same tool or
71
+ // a write tool on the same file succeeds afterward, we annotate the success
72
+ // entry with a brief resolution note. This gives future ashes a positive
73
+ // feedback signal: not just "there were errors" but "the error was fixed by
74
+ // doing X." Addresses Q3 in QUESTIONS.md.
75
+ lastErrorByTool = new Map(); // tool → error summary
76
+ lastErrorByFile = new Map(); // file path → error summary
37
77
  static THINKING_LEVELS = ["off", "low", "medium", "high"];
78
+ bus;
79
+ contextManager;
80
+ llmClient;
81
+ handlers;
38
82
  thinkingLevel = "off";
39
- constructor(bus, contextManager, llmClient, handlers, modeConfig, initialModeIndex) {
40
- this.bus = bus;
41
- this.contextManager = contextManager;
42
- this.llmClient = llmClient;
43
- this.handlers = handlers;
44
- // Default modes: just the configured model
45
- this.modes = modeConfig ?? [
46
- { model: llmClient.model },
47
- ];
48
- this.currentModeIndex = initialModeIndex ?? 0;
83
+ compositor = null;
84
+ toolProtocol;
85
+ instanceId;
86
+ // Cursor into ContextManager's exchange stream. Events with id > this
87
+ // have not yet been shown to the LLM. We inject the delta as a user
88
+ // message before each stream so the prefix stays cacheable.
89
+ lastShellSeq = 0;
90
+ constructor(config) {
91
+ this.bus = config.bus;
92
+ this.contextManager = config.contextManager;
93
+ this.llmClient = config.llmClient;
94
+ this.handlers = config.handlers;
95
+ this.compositor = config.compositor ?? null;
96
+ this.instanceId = config.instanceId ?? "unknown";
97
+ // Shell-history-shaped log. Default writes go through the advisable
98
+ // `history:append` handler registered below; extensions swap the
99
+ // backend without touching this wiring.
100
+ this.historyFile = new HistoryFile({ instanceId: this.instanceId });
101
+ this.conversation = new ConversationState(this.handlers, this.instanceId);
102
+ // Fall back to a single-mode placeholder if the caller passed an
103
+ // empty array (agent-backend does this pre-resolution).
104
+ this.modes = config.modes?.length
105
+ ? config.modes
106
+ : [{ model: config.llmClient.model }];
107
+ this.currentModeIndex = config.initialModeIndex ?? 0;
49
108
  // Unified token budget — adapts to current model's context window
50
109
  this.tokenBudget = new TokenBudget(this.currentMode.contextWindow);
110
+ // Tool protocol — controls how tools are presented to the LLM
111
+ this.toolProtocol = createToolProtocol(getSettings().toolMode ?? "api");
51
112
  // Register core tools
52
113
  this.registerCoreTools();
114
+ // Register any protocol-provided tools (e.g. load_tool for deferred-lookup).
115
+ const protocolTools = this.toolProtocol.getProtocolTools?.() ?? [];
116
+ for (const t of protocolTools)
117
+ this.registerTool(t);
53
118
  // Update token budget with tool count
54
119
  this.tokenBudget.update(undefined, this.toolRegistry.all().length);
55
120
  // Register handlers — extensions can advise these
56
121
  this.registerHandlers();
122
+ // Subscribe to bus-based tool/instruction registration from extensions.
123
+ // These must be in the constructor (not wire()) because extensions call
124
+ // registerTool() during activate(), before activateBackend() calls wire().
125
+ const onCtor = (event, fn) => {
126
+ this.bus.on(event, fn);
127
+ this.ctorListeners.push({ event, fn });
128
+ };
129
+ onCtor("agent:register-tool", ({ tool, extensionName }) => {
130
+ this.registerTool(tool);
131
+ if (extensionName)
132
+ this.toolExtensions.set(tool.name, extensionName);
133
+ });
134
+ onCtor("agent:unregister-tool", ({ name }) => {
135
+ this.unregisterTool(name);
136
+ this.toolExtensions.delete(name);
137
+ });
138
+ onCtor("agent:register-instruction", ({ name, text, extensionName }) => this.registerInstruction(name, text, extensionName));
139
+ onCtor("agent:remove-instruction", ({ name }) => this.removeInstruction(name));
140
+ onCtor("agent:register-skill", ({ name, description, filePath, extensionName }) => this.registerSkill(name, description, filePath, extensionName));
141
+ onCtor("agent:remove-skill", ({ name }) => this.removeSkill(name));
142
+ // Provider registration from user extensions (e.g. openrouter.ts) fires
143
+ // during extension activation, which happens before wire(). Subscribe
144
+ // here in the ctor so late-registered modes aren't dropped.
145
+ onCtor("config:add-modes", ({ modes: extra }) => {
146
+ const providers = new Set(extra.map((m) => m.provider).filter(Boolean));
147
+ this.modes = [
148
+ ...this.modes.filter((m) => !m.provider || !providers.has(m.provider)),
149
+ ...extra,
150
+ ];
151
+ this.bus.emit("config:changed", {});
152
+ });
153
+ // Fires before wire() too — agent-backend emits this from
154
+ // `core:extensions-loaded` to replace the placeholder mode list.
155
+ onCtor("config:set-modes", ({ modes: newModes, activeIndex }) => {
156
+ this.modes = newModes;
157
+ const inRange = activeIndex != null && activeIndex >= 0 && activeIndex < newModes.length;
158
+ this.currentModeIndex = inRange ? activeIndex : 0;
159
+ const m = newModes[this.currentModeIndex];
160
+ if (!m)
161
+ return;
162
+ if (m.providerConfig) {
163
+ this.llmClient.reconfigure({ ...m.providerConfig, model: m.model });
164
+ }
165
+ else {
166
+ this.llmClient.model = m.model;
167
+ }
168
+ this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
169
+ this.bus.emit("config:changed", {});
170
+ });
171
+ const getToolsPipe = () => ({ tools: this.getTools() });
172
+ this.bus.onPipe("agent:get-tools", getToolsPipe);
173
+ this.ctorPipeListeners.push({ event: "agent:get-tools", fn: getToolsPipe });
57
174
  }
58
175
  /** Subscribe to bus events — activates this backend. */
59
176
  wire() {
@@ -84,8 +201,21 @@ export class AgentLoop {
84
201
  }
85
202
  this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
86
203
  const label = m.provider ? `${m.provider}: ${m.model}` : m.model;
87
- this.bus.emit("agent:info", { name: "agent-sh", version: "0.4", model: m.model, provider: m.provider, contextWindow: m.contextWindow });
88
- this.bus.emit("ui:info", { message: `Model: ${label}` });
204
+ this.bus.emit("agent:info", { name: "ash", version: "0.4", model: m.model, provider: m.provider, contextWindow: m.contextWindow });
205
+ // Persist as the new default — selection survives restart.
206
+ // Safe even for dynamic providers: agent-backend defers mode
207
+ // resolution to `core:extensions-loaded`, so the extension gets
208
+ // to re-register before the persisted default is looked up.
209
+ if (m.provider) {
210
+ updateSettings({
211
+ defaultProvider: m.provider,
212
+ providers: { [m.provider]: { defaultModel: m.model } },
213
+ });
214
+ this.bus.emit("ui:info", { message: `Model: ${label} (saved as default)` });
215
+ }
216
+ else {
217
+ this.bus.emit("ui:info", { message: `Model: ${label}` });
218
+ }
89
219
  this.bus.emit("config:changed", {});
90
220
  });
91
221
  this.bus.onPipe("config:get-models", (payload) => {
@@ -116,37 +246,14 @@ export class AgentLoop {
116
246
  const supported = mode.reasoning !== false && mode.supportsReasoningEffort !== false;
117
247
  return { level: this.thinkingLevel, levels: AgentLoop.THINKING_LEVELS, supported };
118
248
  });
119
- on("config:set-modes", ({ modes: newModes }) => {
120
- this.modes = newModes;
121
- this.currentModeIndex = 0;
122
- const m = this.modes[0];
123
- if (m.providerConfig) {
124
- this.llmClient.reconfigure({ ...m.providerConfig, model: m.model });
125
- }
126
- else {
127
- this.llmClient.model = m.model;
128
- }
129
- this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
130
- this.bus.emit("config:changed", {});
131
- });
132
- on("config:add-modes", ({ modes: extra }) => {
133
- // Remove any existing modes for the same provider, then append
134
- const providers = new Set(extra.map((m) => m.provider).filter(Boolean));
135
- this.modes = [
136
- ...this.modes.filter((m) => !m.provider || !providers.has(m.provider)),
137
- ...extra,
138
- ];
139
- this.bus.emit("config:changed", {});
140
- });
141
249
  on("agent:reset-session", () => {
142
250
  this.cancel();
143
- this.conversation = new ConversationState(this.historyFile);
251
+ this.conversation = new ConversationState(this.handlers, this.instanceId);
144
252
  this.lastProjectSkillNames.clear();
145
253
  });
146
254
  on("agent:compact-request", () => {
147
- const budgetTokens = this.tokenBudget.conversationBudgetTokens;
148
- const stats = this.conversation.compact(budgetTokens);
149
- this.conversation.flush().catch(() => { });
255
+ // Force compaction. Strategy lives behind `conversation:compact`.
256
+ const stats = this.compactWithHooks(0, 0, true);
150
257
  if (stats) {
151
258
  this.bus.emit("ui:info", {
152
259
  message: `(compacted: ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens)`,
@@ -156,20 +263,31 @@ export class AgentLoop {
156
263
  this.bus.emit("ui:info", { message: "(nothing to compact)" });
157
264
  }
158
265
  });
159
- this.bus.onPipe("context:get-stats", () => {
160
- return {
161
- activeTokens: this.conversation.estimateTokens(),
162
- nuclearEntries: this.conversation.getNuclearEntryCount(),
163
- recallArchiveSize: this.conversation.getRecallArchiveSize(),
164
- budgetTokens: this.tokenBudget.conversationBudgetTokens,
165
- };
166
- });
167
- // Load prior history from disk (non-blocking)
168
- this.historyFile.readRecent().then((entries) => {
169
- if (entries.length > 0) {
266
+ this.bus.onPipe("context:get-stats", () => ({
267
+ activeTokens: this.conversation.estimateTokens(),
268
+ totalTokens: this.conversation.estimatePromptTokens(),
269
+ nuclearEntries: this.conversation.getNuclearEntryCount(),
270
+ recallArchiveSize: this.conversation.getRecallArchiveSize(),
271
+ budgetTokens: this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
272
+ }));
273
+ // Prior-session preamble (non-blocking). Both the read and the
274
+ // layout go through advisable handlers.
275
+ Promise.resolve(this.handlers.call("history:read-recent"))
276
+ .then((entries) => {
277
+ if (entries && entries.length > 0)
170
278
  this.conversation.loadPriorHistory(entries);
279
+ })
280
+ .catch(() => { });
281
+ // Track generic compaction metrics from the `conversation:after-compact`
282
+ // event. Whatever strategy ran, core accumulates these counters for
283
+ // status/introspect consumers.
284
+ on("conversation:after-compact", ({ beforeTokens, afterTokens }) => {
285
+ this.compactionCount++;
286
+ this.cumulativeCompactedTokens += Math.max(0, beforeTokens - afterTokens);
287
+ if (beforeTokens > this.peakConversationTokens) {
288
+ this.peakConversationTokens = beforeTokens;
171
289
  }
172
- }).catch(() => { });
290
+ });
173
291
  on("shell:cwd-change", ({ cwd }) => {
174
292
  const projectSkills = discoverProjectSkills(cwd);
175
293
  const newNames = new Set(projectSkills.map(s => s.name));
@@ -181,7 +299,9 @@ export class AgentLoop {
181
299
  this.lastProjectSkillNames = newNames;
182
300
  if (projectSkills.length > 0) {
183
301
  const names = projectSkills.map(s => s.name).join(", ");
184
- this.conversation.addSystemNote(`[Project skills available: ${names}. Use list_skills for details, read_file to load.]`);
302
+ const note = `[Project skills available: ${names}. Use list_skills for details, read_file to load.]`;
303
+ this.conversation.addSystemNote(note);
304
+ this.bus.emit("conversation:message-appended", { role: "system", content: note });
185
305
  }
186
306
  });
187
307
  }
@@ -196,12 +316,103 @@ export class AgentLoop {
196
316
  registerTool(tool) {
197
317
  this.toolRegistry.register(tool);
198
318
  }
319
+ /** Unregister a tool by name. */
320
+ unregisterTool(name) {
321
+ this.toolRegistry.unregister(name);
322
+ }
199
323
  /** Get all registered tools. */
200
324
  getTools() {
201
325
  return this.toolRegistry.all();
202
326
  }
327
+ // ── Extension instructions, skills & tool tracking ──────────────────
328
+ /** Instructions keyed by name, with extension attribution. */
329
+ instructions = new Map();
330
+ /** Skills keyed by name, with extension attribution. */
331
+ skills = new Map();
332
+ /** Tool → extension name attribution. */
333
+ toolExtensions = new Map();
334
+ /** Register a named instruction block for the system prompt. */
335
+ registerInstruction(name, text, extensionName) {
336
+ this.instructions.set(name, { text, extensionName });
337
+ }
338
+ /** Remove a named instruction block. */
339
+ removeInstruction(name) {
340
+ this.instructions.delete(name);
341
+ }
342
+ /** Register a named skill (on-demand reference material). */
343
+ registerSkill(name, description, filePath, extensionName) {
344
+ this.skills.set(name, { description, filePath, extensionName });
345
+ }
346
+ /** Remove a registered skill. */
347
+ removeSkill(name) {
348
+ this.skills.delete(name);
349
+ }
350
+ /**
351
+ * Build the system prompt grouped by extension.
352
+ *
353
+ * Each extension gets a unified block:
354
+ * ## extension-name
355
+ * ### Tools
356
+ * ### Skills
357
+ * ### Instructions
358
+ */
359
+ buildExtensionSections() {
360
+ const groups = new Map();
361
+ const ensure = (name) => groups.get(name) ?? (groups.set(name, { tools: [], skills: [], instructions: [] }).get(name));
362
+ // Attribute instructions
363
+ for (const { text, extensionName } of this.instructions.values()) {
364
+ ensure(extensionName).instructions.push({ text });
365
+ }
366
+ // Attribute skills
367
+ for (const [skillName, { description, filePath, extensionName }] of this.skills) {
368
+ ensure(extensionName).skills.push({ name: skillName, description, filePath });
369
+ }
370
+ // Attribute tools (skip built-in scratchpad tools).
371
+ // In "api" mode the full tool schemas are in the API `tools` param,
372
+ // making the text catalog here pure duplication — skip it. Other
373
+ // modes (deferred / deferred-lookup / inline) rely on the text
374
+ // catalog as the discovery surface, so keep it there.
375
+ const toolModeHasApiSchemas = this.toolProtocol.mode === "api";
376
+ if (!toolModeHasApiSchemas) {
377
+ const builtinTools = new Set([
378
+ "bash", "read_file", "write_file", "edit_file", "grep", "glob", "ls",
379
+ "list_skills",
380
+ ]);
381
+ for (const tool of this.toolRegistry.all()) {
382
+ if (builtinTools.has(tool.name))
383
+ continue;
384
+ const extName = this.toolExtensions.get(tool.name);
385
+ if (!extName)
386
+ continue;
387
+ ensure(extName).tools.push({ name: tool.name, description: summarizeDescription(tool.description) });
388
+ }
389
+ }
390
+ // Render
391
+ return [...groups.entries()]
392
+ .filter(([, g]) => g.tools.length + g.skills.length + g.instructions.length > 0)
393
+ .map(([name, g]) => {
394
+ const parts = [];
395
+ if (g.tools.length > 0)
396
+ parts.push("### Tools\n" + g.tools.map(t => `${t.name} — ${t.description}`).join("\n"));
397
+ if (g.skills.length > 0)
398
+ parts.push("### Skills\n" + g.skills.map(s => `${s.name}: ${s.description}\n → ${s.filePath}`).join("\n\n"));
399
+ if (g.instructions.length > 0)
400
+ parts.push("### Instructions\n" + g.instructions.map(i => i.text).join("\n\n"));
401
+ return `## ${name}\n${parts.join("\n\n")}`;
402
+ });
403
+ }
203
404
  kill() {
204
405
  this.cancel();
406
+ this.unwire();
407
+ // Clean up constructor-level bus subscriptions
408
+ for (const { event, fn } of this.ctorListeners) {
409
+ this.bus.off(event, fn);
410
+ }
411
+ this.ctorListeners = [];
412
+ for (const { event, fn } of this.ctorPipeListeners) {
413
+ this.bus.offPipe(event, fn);
414
+ }
415
+ this.ctorPipeListeners = [];
205
416
  }
206
417
  cancel() {
207
418
  this.abortController?.abort();
@@ -237,7 +448,7 @@ export class AgentLoop {
237
448
  const label = newMode.provider
238
449
  ? `${newMode.provider}: ${newMode.model}`
239
450
  : newMode.model;
240
- this.bus.emit("agent:info", { name: "agent-sh", version: "0.4", model: newMode.model, provider: newMode.provider, contextWindow: newMode.contextWindow });
451
+ this.bus.emit("agent:info", { name: "ash", version: "0.4", model: newMode.model, provider: newMode.provider, contextWindow: newMode.contextWindow });
241
452
  this.bus.emit("ui:info", { message: `Model: ${label}` });
242
453
  this.bus.emit("config:changed", {});
243
454
  }
@@ -247,11 +458,43 @@ export class AgentLoop {
247
458
  get currentModel() {
248
459
  return this.modes[this.currentModeIndex].model;
249
460
  }
461
+ /**
462
+ * Run compaction via the `conversation:compact` handler. After any
463
+ * compaction, emit `conversation:after-compact` so listeners
464
+ * (metrics, UI, agent-awareness notes) can react.
465
+ */
466
+ compactWithHooks(target, keepRecent, force) {
467
+ const stats = this.handlers.call("conversation:compact", {
468
+ target,
469
+ keepRecent,
470
+ force: !!force,
471
+ });
472
+ if (stats) {
473
+ this.bus.emit("conversation:after-compact", {
474
+ beforeTokens: stats.before,
475
+ afterTokens: stats.after,
476
+ evictedCount: stats.evictedCount,
477
+ });
478
+ }
479
+ return stats;
480
+ }
250
481
  isContextOverflow(e) {
251
482
  if (!(e instanceof Error))
252
483
  return false;
484
+ // Match the specific error codes providers use, or unambiguous phrases.
485
+ // Bare "token"/"context" match too broadly (auth errors, model-name
486
+ // mismatches, etc.) and caused infinite-no-op retry loops.
487
+ const code = e.code;
488
+ if (code === "context_length_exceeded" || code === "string_above_max_length")
489
+ return true;
253
490
  const msg = e.message.toLowerCase();
254
- return msg.includes("context") || msg.includes("token") || msg.includes("too long");
491
+ return (msg.includes("context length") ||
492
+ msg.includes("context window") ||
493
+ msg.includes("maximum context") ||
494
+ msg.includes("prompt is too long") ||
495
+ msg.includes("input is too long") ||
496
+ msg.includes("too many tokens") ||
497
+ msg.includes("reduce the length"));
255
498
  }
256
499
  /** Check if an error is retryable (transient). */
257
500
  isRetryable(e) {
@@ -333,14 +576,16 @@ export class AgentLoop {
333
576
  this.toolRegistry.register(createGrepTool(getCwd));
334
577
  this.toolRegistry.register(createGlobTool(getCwd));
335
578
  this.toolRegistry.register(createLsTool(getCwd));
336
- this.toolRegistry.register(createUserShellTool({ getCwd, bus: this.bus }));
337
- this.toolRegistry.register(createDisplayTool({ getCwd, bus: this.bus }));
338
579
  this.toolRegistry.register(createListSkillsTool(getCwd));
339
- // conversation_recall — search/expand evicted conversation turns
580
+ // conversation_recall — browse/search/expand evicted turns from
581
+ // the in-session archive and the persistent history file.
340
582
  this.toolRegistry.register({
341
583
  name: "conversation_recall",
584
+ displayName: "recall",
342
585
  description: "Browse, search, or expand evicted conversation turns. " +
343
- "Use when you need context from earlier in the conversation that was compacted away.",
586
+ "Use when you need context from earlier in the conversation that was compacted away. " +
587
+ "Search is regex-based and covers both summaries and full body text. " +
588
+ "If search doesn't find what you expect, try broader/shorter terms or browse to scan the timeline.",
344
589
  input_schema: {
345
590
  type: "object",
346
591
  properties: {
@@ -374,6 +619,84 @@ export class AgentLoop {
374
619
  }
375
620
  return { content, exitCode: 0, isError: false };
376
621
  },
622
+ formatResult: (args, result) => {
623
+ const action = args.action;
624
+ const text = result.content;
625
+ if (result.isError)
626
+ return { summary: "error" };
627
+ if (action === "search") {
628
+ if (text.startsWith("No results"))
629
+ return { summary: "0 matches" };
630
+ const m = text.match(/^Found (\d+)/);
631
+ return { summary: m ? `${m[1]} matches` : "search done" };
632
+ }
633
+ if (action === "browse") {
634
+ if (text.startsWith("No conversation"))
635
+ return { summary: "empty" };
636
+ return { summary: "browsed" };
637
+ }
638
+ if (text.includes("no expanded content"))
639
+ return { summary: "not found" };
640
+ return { summary: "expanded" };
641
+ },
642
+ getDisplayInfo: () => ({ kind: "search", icon: "\u27F2" }),
643
+ });
644
+ this.registerInstruction("recall-guidance", "When starting a task that may have been discussed before (conventions, preferences, corrections, prior examples), " +
645
+ "use conversation_recall to search history for relevant prior entries. " +
646
+ "Treat recurring user guidance as standing preferences. " +
647
+ "If a search returns nothing useful, try: shorter queries, alternate terms, or browse to scan the full timeline. " +
648
+ "Recall only covers this and recent sessions — for older context, also search the filesystem (grep, glob).", "core");
649
+ // ── ask_llm — direct LLM sub-query (from the 24th ash's vision) ──
650
+ //
651
+ // The ash can ask the LLM a question directly — not as a tool-output
652
+ // loop, but as a lightweight sub-query. Use cases: second opinions,
653
+ // brainstorming, summarizing complex context, getting a fresh
654
+ // perspective without tool overhead. The 24th ash injected this via
655
+ // diagnose as a proof-of-concept. The 25th ash made it permanent.
656
+ this.toolRegistry.register({
657
+ name: "ask_llm",
658
+ description: "Send a direct query to the LLM and get a text response. Use for " +
659
+ "sub-queries, second opinions, brainstorming, or getting a fresh " +
660
+ "perspective on a problem. Much lighter than a full tool loop — " +
661
+ "just query in, text out. Optional system prompt sets context.",
662
+ input_schema: {
663
+ type: "object",
664
+ properties: {
665
+ query: {
666
+ type: "string",
667
+ description: "The question or prompt to send to the LLM.",
668
+ },
669
+ system: {
670
+ type: "string",
671
+ description: "Optional system prompt to set context for the sub-query.",
672
+ },
673
+ },
674
+ required: ["query"],
675
+ },
676
+ showOutput: true,
677
+ execute: async (args) => {
678
+ const messages = [];
679
+ if (args.system) {
680
+ messages.push({ role: "system", content: args.system });
681
+ }
682
+ messages.push({ role: "user", content: args.query });
683
+ try {
684
+ const content = await this.llmClient.complete({
685
+ messages,
686
+ max_tokens: 2000,
687
+ });
688
+ return { content: content || "(empty response)", exitCode: 0, isError: false };
689
+ }
690
+ catch (err) {
691
+ const message = err instanceof Error ? err.message : String(err);
692
+ return { content: `LLM error: ${message}`, exitCode: 1, isError: true };
693
+ }
694
+ },
695
+ getDisplayInfo: () => ({ kind: "search", icon: "💬" }),
696
+ formatCall: (args) => {
697
+ const q = args.query?.slice(0, 60);
698
+ return `ask_llm: ${q}${args.query?.length > 60 ? "..." : ""}`;
699
+ },
377
700
  });
378
701
  }
379
702
  /**
@@ -382,18 +705,187 @@ export class AgentLoop {
382
705
  */
383
706
  registerHandlers() {
384
707
  const h = this.handlers;
708
+ // System prompt: static identity + behavioral instructions.
709
+ // Extensions can use registerInstruction() for a managed section,
710
+ // or advise this handler directly for full control.
711
+ h.define("system-prompt:build", () => {
712
+ const parts = [STATIC_SYSTEM_PROMPT];
713
+ // Global behavioral rules (~/.agent-sh/AGENTS.md) — persistent agent memory
714
+ const agentsMd = loadGlobalAgentsMd();
715
+ if (agentsMd)
716
+ parts.push(agentsMd);
717
+ // Global skills — stable across cwd changes, cacheable with the system prompt
718
+ const globalSkills = discoverGlobalSkills();
719
+ const skillsBlock = formatSkillsBlock(globalSkills);
720
+ if (skillsBlock)
721
+ parts.push(skillsBlock);
722
+ // Project conventions + project skills — stable within a cwd.
723
+ // Placed here so they enter the provider's prompt cache with the
724
+ // system prompt, and only re-materialize when cwd changes invalidate
725
+ // cachedSystemPrompt in executeLoop.
726
+ const projectStatic = buildStaticByCwd(this.contextManager.getCwd());
727
+ if (projectStatic)
728
+ parts.push(projectStatic);
729
+ // Extension sections (tools, skills, instructions grouped by extension)
730
+ const extensionSections = this.buildExtensionSections();
731
+ if (extensionSections.length > 0) {
732
+ parts.push("# Extension Instructions\n\n" + extensionSections.join("\n\n"));
733
+ }
734
+ return parts.join("\n\n");
735
+ });
736
+ // ── Orthogonal core-state accessors ──────────────────────────
737
+ // Each handler exposes one cohesive piece of core-owned runtime
738
+ // state. Extensions compose whichever they need — core doesn't
739
+ // decide the aggregation shape. Adding a new handler here should
740
+ // only happen for state the core genuinely owns (not state that
741
+ // an extension could track by listening to events).
742
+ h.define("agent:get-mode", () => ({
743
+ model: this.currentMode.model,
744
+ provider: this.currentMode.provider ?? "",
745
+ thinkingLevel: this.thinkingLevel,
746
+ contextWindow: this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
747
+ }));
748
+ h.define("agent:get-tokens", () => {
749
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
750
+ const promptTokens = this.conversation.estimatePromptTokens();
751
+ return {
752
+ active: this.conversation.estimateTokens(),
753
+ peak: this.peakConversationTokens,
754
+ cumulativeCompacted: this.cumulativeCompactedTokens,
755
+ promptTokens,
756
+ contextPercent: Math.round((promptTokens / contextWindow) * 100),
757
+ };
758
+ });
759
+ h.define("agent:get-counters", () => ({
760
+ queryCount: this.queryCount,
761
+ totalToolCalls: this.totalToolCalls,
762
+ totalToolErrors: this.totalToolErrors,
763
+ totalResolutions: this.totalResolutions,
764
+ totalLoopIterations: this.totalLoopIterations,
765
+ errorRate: this.totalToolCalls > 0
766
+ ? Math.round((this.totalToolErrors / this.totalToolCalls) * 100)
767
+ : 0,
768
+ }));
769
+ h.define("agent:get-timing", () => ({
770
+ startedAt: this.sessionStartTime,
771
+ elapsedSeconds: Math.round((Date.now() - this.sessionStartTime) / 1000),
772
+ }));
773
+ h.define("agent:get-tool-stats", () => [...this.toolCallCounts.entries()]
774
+ .map(([name, counts]) => ({
775
+ name,
776
+ total: counts.success + counts.error,
777
+ success: counts.success,
778
+ error: counts.error,
779
+ }))
780
+ .sort((a, b) => b.total - a.total));
781
+ h.define("agent:get-file-read-cache", () => [...this.fileReadCache.entries()].map(([p, s]) => ({
782
+ path: p,
783
+ offset: s.offset,
784
+ limit: s.limit ?? null,
785
+ mtimeMs: s.mtimeMs,
786
+ })));
787
+ h.define("agent:get-recent-errors", () => ({
788
+ byTool: [...this.lastErrorByTool.entries()].map(([tool, error]) => ({ tool, error })),
789
+ byFile: [...this.lastErrorByFile.entries()].map(([file, error]) => ({ file, error })),
790
+ }));
791
+ h.define("agent:get-compaction-state", () => {
792
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
793
+ const ratio = getSettings().autoCompactThreshold ?? 0.5;
794
+ return {
795
+ count: this.compactionCount,
796
+ nuclearEntries: this.conversation.getNuclearEntryCount(),
797
+ autoCompactThreshold: ratio,
798
+ autoCompactThresholdTokens: Math.floor((contextWindow - RESPONSE_RESERVE) * ratio),
799
+ };
800
+ });
801
+ h.define("agent:get-self", () => this);
385
802
  // Extensions compose additional context (git info, project rules, etc.)
386
- h.define("dynamic-context:build", () => buildDynamicContext(this.toolRegistry.all(), this.contextManager, this.tokenBudget.shellBudgetTokens));
803
+ h.define("dynamic-context:build", () => {
804
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
805
+ const promptTokens = this.conversation.estimatePromptTokens();
806
+ return buildDynamicContext(this.contextManager, { promptTokens, contextWindow });
807
+ });
387
808
  // Full control over what the LLM sees: takes messages[], returns messages[].
388
809
  // Default: pass through. Extensions can advise to compact, summarize,
389
810
  // filter, reorder, inject — whatever strategy fits.
390
811
  h.define("conversation:prepare", (messages) => messages);
812
+ // ── Conversation primitives for compaction strategies ─────────
813
+ // Read messages (for inspection / computing new arrays) and replace
814
+ // the whole array (write side). Extensions implementing
815
+ // `conversation:compact` use these to observe and mutate.
816
+ h.define("conversation:get-messages", () => this.conversation.getMessages());
817
+ h.define("conversation:replace-messages", (msgs) => {
818
+ this.conversation.replaceMessages(msgs);
819
+ });
820
+ h.define("conversation:estimate-tokens", () => this.conversation.estimateTokens());
821
+ h.define("conversation:estimate-prompt-tokens", () => this.conversation.estimatePromptTokens());
822
+ // ── Nucleation (advisable) ─────────────────────────────────────
823
+ // Turn a raw message into a one-line NuclearEntry. Advisors enrich
824
+ // (e.g. `[why: ...]` extraction, adaptive summary lengths).
825
+ h.define("conversation:nucleate-user", (text, iid, seq) => nucleate("user", text, iid, seq));
826
+ h.define("conversation:nucleate-agent", (text, iid, seq) => nucleate("agent", text, iid, seq));
827
+ h.define("conversation:nucleate-tool", (toolName, args, content, isError, iid, seq) => nucleate(isError ? "error" : "tool", toolName, args, content, isError, iid, seq));
828
+ // Read-only views into the nuclear state, for compact strategies
829
+ // and introspect that read without replacing.
830
+ h.define("conversation:get-nuclear-entries", () => this.conversation.getNuclearEntries());
831
+ h.define("conversation:get-nuclear-summary", () => this.conversation.getNuclearSummary());
832
+ h.define("conversation:build-nuclear-block", () => {
833
+ const summary = this.conversation.getNuclearSummary();
834
+ if (!summary)
835
+ return null;
836
+ return {
837
+ role: "user",
838
+ content: `[Conversation history \u2014 use conversation_recall to expand any entry]\n${summary}`,
839
+ };
840
+ });
841
+ // ── History file I/O (advisable) ───────────────────────────────
842
+ // Default is the append-only JSONL at ~/.agent-sh/history; advisors
843
+ // swap the backend without touching nucleation.
844
+ h.define("history:append", (entries) => {
845
+ if (!entries || entries.length === 0)
846
+ return;
847
+ const writable = entries.filter((e) => !isReadOnly(e));
848
+ if (writable.length > 0)
849
+ this.historyFile.append(writable).catch(() => { });
850
+ });
851
+ h.define("history:search", async (query) => this.historyFile.search(query));
852
+ h.define("history:find-by-seq", async (seq) => this.historyFile.findBySeq(seq));
853
+ h.define("history:read-recent", async (max) => this.historyFile.readRecent(max));
854
+ // Prior-session preamble renderer. Default: flat chronological list.
855
+ h.define("conversation:format-prior-history", (entries) => {
856
+ if (!entries || entries.length === 0)
857
+ return null;
858
+ const lines = entries.map(formatNuclearLine);
859
+ return `[Prior session history \u2014 loaded from ~/.agent-sh/history]\n${lines.join("\n")}`;
860
+ });
861
+ // Compaction strategy — default delegates to the two-tier pin
862
+ // strategy in ConversationState; advisors replace wholesale.
863
+ h.define("conversation:compact", (opts) => {
864
+ return this.conversation.compact(opts.target, opts.keepRecent, opts.force);
865
+ });
866
+ // Inject a system note mid-loop — used by extensions (subagents,
867
+ // peer messages) to deliver async results into the next iteration.
868
+ h.define("conversation:inject-note", (text) => {
869
+ this.conversation.addSystemNote(text);
870
+ this.bus.emit("conversation:message-appended", { role: "system", content: text });
871
+ });
391
872
  // Wraps each tool call: permission → execute → emit events.
392
873
  // Extensions advise to add safe-mode, logging, metrics, custom policies.
393
874
  // The ctx.onChunk callback is exposed so advisors can wrap it to
394
875
  // intercept/transform streamed tool output (e.g. secret redaction).
395
876
  h.define("tool:execute", async (ctx) => {
396
877
  const { name, id, args, tool } = ctx;
878
+ // Validate required input fields before display/permission/execute.
879
+ // Some models emit wrong arg names (e.g. `file_path` instead of `path`),
880
+ // and downstream helpers assume required strings are present.
881
+ const schema = tool.input_schema;
882
+ const required = Array.isArray(schema?.required) ? schema.required : [];
883
+ const missing = required.filter((k) => args[k] === undefined || args[k] === null);
884
+ if (missing.length > 0) {
885
+ const msg = `Missing required argument(s): ${missing.join(", ")}. Expected: ${required.join(", ")}. Received: ${Object.keys(args).join(", ") || "(none)"}`;
886
+ this.bus.emit("agent:tool-call", { tool: name, args });
887
+ return { content: msg, exitCode: 1, isError: true };
888
+ }
397
889
  const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
398
890
  let diffShown = false;
399
891
  // Permission gating
@@ -407,44 +899,56 @@ export class AgentLoop {
407
899
  if (tool.modifiesFiles && typeof args.path === "string") {
408
900
  try {
409
901
  const absPath = path.resolve(process.cwd(), args.path);
410
- let oldContent = null;
411
- try {
412
- oldContent = await fs.readFile(absPath, "utf-8");
413
- }
414
- catch { /* new file */ }
415
- let newContent;
416
- if (typeof args.content === "string") {
417
- // write_file
418
- newContent = args.content;
419
- }
420
- else if (typeof args.old_text === "string" && typeof args.new_text === "string" && oldContent) {
421
- // edit_file
422
- newContent = oldContent.replace(args.old_text.replace(/\r\n/g, "\n"), args.new_text.replace(/\r\n/g, "\n"));
902
+ let diff;
903
+ if (typeof args.old_text === "string" && typeof args.new_text === "string") {
904
+ // edit_file read the file so line numbers are real (not relative to the edit region)
905
+ const normalizedOld = args.old_text.replace(/\r\n/g, "\n");
906
+ const normalizedNew = args.new_text.replace(/\r\n/g, "\n");
907
+ try {
908
+ const oldFileContent = await fs.readFile(absPath, "utf-8");
909
+ diff = computeEditDiff(oldFileContent, normalizedOld, normalizedNew, args.replace_all === true);
910
+ }
911
+ catch {
912
+ // File doesn't exist yet fall back to input-only diff
913
+ diff = computeInputDiff(normalizedOld, normalizedNew);
914
+ }
423
915
  }
424
- if (newContent !== undefined) {
425
- const diff = computeDiff(oldContent, newContent);
426
- if (!diff.isIdentical) {
427
- permKind = "file-write";
428
- // Shorten path for display
429
- const cwd = process.cwd();
430
- const home = process.env.HOME;
431
- let displayPath = absPath;
432
- if (absPath.startsWith(cwd + "/"))
433
- displayPath = absPath.slice(cwd.length + 1);
434
- else if (home && absPath.startsWith(home + "/"))
435
- displayPath = "~/" + absPath.slice(home.length + 1);
436
- permTitle = displayPath;
437
- metadata = { args, diff };
438
- diffShown = true;
916
+ else if (typeof args.content === "string") {
917
+ // write_file still need to read the old file for comparison
918
+ let oldContent = null;
919
+ try {
920
+ oldContent = await fs.readFile(absPath, "utf-8");
921
+ }
922
+ catch { /* new file */ }
923
+ if (oldContent !== null) {
924
+ diff = computeDiff(oldContent, args.content);
439
925
  }
440
926
  }
927
+ if (diff && !diff.isIdentical) {
928
+ permKind = "file-write";
929
+ // Shorten path for display
930
+ const cwd = process.cwd();
931
+ const home = process.env.HOME;
932
+ let displayPath = absPath;
933
+ if (absPath.startsWith(cwd + "/"))
934
+ displayPath = absPath.slice(cwd.length + 1);
935
+ else if (home && absPath.startsWith(home + "/"))
936
+ displayPath = "~/" + absPath.slice(home.length + 1);
937
+ permTitle = displayPath;
938
+ metadata = { args, diff };
939
+ diffShown = true;
940
+ }
441
941
  }
442
942
  catch { /* fall back to generic permission */ }
443
943
  }
944
+ const ui = this.compositor
945
+ ? createToolUI(this.bus, this.compositor.surface("agent"))
946
+ : undefined;
444
947
  const perm = await this.bus.emitPipeAsync("permission:request", {
445
948
  kind: permKind,
446
949
  title: permTitle,
447
950
  metadata,
951
+ ui,
448
952
  decision: { outcome: "approved" },
449
953
  });
450
954
  if (perm.decision.outcome !== "approved") {
@@ -466,7 +970,10 @@ export class AgentLoop {
466
970
  const onChunk = (tool.showOutput !== false && !diffShown)
467
971
  ? ctx.onChunk
468
972
  : undefined;
469
- const result = await tool.execute(args, onChunk);
973
+ const toolCtx = this.compositor
974
+ ? { ui: createToolUI(this.bus, this.compositor.surface("agent")) }
975
+ : undefined;
976
+ const result = await tool.execute(args, onChunk, toolCtx);
470
977
  // Invalidate read cache when a file is modified
471
978
  if (tool.modifiesFiles && typeof args.path === "string" && !result.isError) {
472
979
  const absPath = path.resolve(process.cwd(), args.path);
@@ -494,13 +1001,23 @@ export class AgentLoop {
494
1001
  this.abortController = new AbortController();
495
1002
  const signal = this.abortController.signal;
496
1003
  // Each loop iteration adds an abort listener (via OpenAI SDK stream);
497
- // raise the limit to avoid spurious warnings on multi-tool queries.
498
- setMaxListeners(50, signal);
1004
+ // disable the limit long-running tool loops can easily exceed any cap.
1005
+ setMaxListeners(0, signal);
1006
+ this.queryCount++;
499
1007
  this.bus.emit("agent:query", { query });
500
1008
  this.bus.emit("agent:processing-start", {});
501
1009
  let responseText = "";
502
1010
  try {
503
- this.conversation.addUserMessage(query);
1011
+ // Prepend any shell events that preceded this query into the same
1012
+ // user message, so the conversation reads chronologically and we
1013
+ // don't emit two consecutive user-role messages (some providers
1014
+ // reject that).
1015
+ const preDelta = this.contextManager.getEventsSince(this.lastShellSeq);
1016
+ const userContent = preDelta ? `${preDelta.text}\n\n${query}` : query;
1017
+ if (preDelta)
1018
+ this.lastShellSeq = preDelta.lastSeq;
1019
+ this.conversation.addUserMessage(userContent);
1020
+ this.bus.emit("conversation:message-appended", { role: "user", content: query });
504
1021
  responseText = await this.executeLoop(signal);
505
1022
  }
506
1023
  catch (e) {
@@ -508,6 +1025,8 @@ export class AgentLoop {
508
1025
  this.bus.emit("agent:cancelled", {});
509
1026
  }
510
1027
  else if (!signal.aborted) {
1028
+ if (e instanceof Error)
1029
+ console.error("[agent-sh] query failed:\n" + e.stack);
511
1030
  const msg = this.formatError(e);
512
1031
  this.bus.emit("agent:error", { message: msg });
513
1032
  }
@@ -533,31 +1052,51 @@ export class AgentLoop {
533
1052
  */
534
1053
  async executeLoop(signal) {
535
1054
  let fullResponseText = "";
1055
+ // System prompt carries things stable within a turn: static identity,
1056
+ // global agent rules, project conventions, project skills. Invalidated
1057
+ // only by compaction (context shape changed) or cwd change (project
1058
+ // conventions/skills changed). Dynamic context rebuilds every iteration
1059
+ // so live signals (budget, in-flight subagents, metacognitive warnings)
1060
+ // are fresh.
1061
+ let cachedSystemPrompt;
1062
+ let lastCwd = this.contextManager.getCwd();
536
1063
  while (!signal.aborted) {
537
- // Auto-compact if conversation exceeds the model-aware budget
538
- const budgetTokens = this.tokenBudget.conversationBudgetTokens;
539
- if (this.conversation.estimateTokens() > budgetTokens) {
540
- const stats = this.conversation.compact(budgetTokens);
541
- await this.conversation.flush();
542
- if (stats) {
543
- this.bus.emit("ui:info", {
544
- message: `(compacted: ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens)`,
545
- });
546
- }
1064
+ // Auto-compact when total context approaches the window limit.
1065
+ const totalEstimate = this.conversation.estimatePromptTokens();
1066
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
1067
+ const threshold = Math.floor((contextWindow - RESPONSE_RESERVE) * getSettings().autoCompactThreshold);
1068
+ if (totalEstimate > threshold) {
1069
+ this.compactWithHooks(threshold);
1070
+ cachedSystemPrompt = undefined;
547
1071
  }
548
- // System prompt is static (cacheable); dynamic context uses handler
549
- // so extensions can compose additional context via advise()
550
- const systemPrompt = STATIC_SYSTEM_PROMPT;
1072
+ const currentCwd = this.contextManager.getCwd();
1073
+ if (currentCwd !== lastCwd) {
1074
+ cachedSystemPrompt = undefined;
1075
+ lastCwd = currentCwd;
1076
+ }
1077
+ const systemPrompt = cachedSystemPrompt ?? (cachedSystemPrompt = this.handlers.call("system-prompt:build"));
551
1078
  const dynamicContext = this.handlers.call("dynamic-context:build");
1079
+ // Shell events are injected once per user query (see query() above),
1080
+ // not per loop iteration. Mid-loop injection would break the
1081
+ // tool_call → tool_result chain some providers require.
552
1082
  // Stream LLM response with retry
553
1083
  const result = await this.streamWithRetry(systemPrompt, dynamicContext, signal);
554
- const { text, toolCalls, assistantContent, assistantToolCalls } = result;
1084
+ const { text, toolCalls: streamedToolCalls } = result;
1085
+ // Extract tool calls via protocol (API mode uses streamed calls,
1086
+ // inline mode parses XML from text)
1087
+ const toolCalls = this.toolProtocol.extractToolCalls(text, streamedToolCalls);
555
1088
  fullResponseText += text;
556
- // Record the assistant message in conversation
557
- this.conversation.addAssistantMessage(assistantContent, assistantToolCalls);
1089
+ // Record the assistant message via protocol
1090
+ this.toolProtocol.recordAssistant(this.conversation, text, toolCalls);
1091
+ this.bus.emit("conversation:message-appended", {
1092
+ role: "assistant",
1093
+ content: text,
1094
+ });
558
1095
  // No tool calls → agent is done
559
- if (toolCalls.length === 0)
1096
+ if (toolCalls.length === 0) {
1097
+ this.conversation.eagerNucleateAgent(fullResponseText);
560
1098
  break;
1099
+ }
561
1100
  // Emit batch info so the TUI can render group headers upfront
562
1101
  {
563
1102
  const groupMap = new Map();
@@ -585,10 +1124,30 @@ export class AgentLoop {
585
1124
  // Execute tool calls — run read-only tools in parallel, permission-
586
1125
  // requiring tools sequentially (to avoid overlapping permission prompts).
587
1126
  const batchTotal = toolCalls.length;
1127
+ const collectedResults = [];
1128
+ // Round-scoped cache for pure, read-only tool calls
1129
+ const roundCache = new Map();
588
1130
  const executeSingle = async (tc, batchIndex) => {
1131
+ // Rewrite meta-tool calls (e.g., use_extension → actual tool)
1132
+ tc = this.toolProtocol.rewriteToolCall(tc);
1133
+ // Check for validation errors from rewrite (e.g., wrong extension params)
1134
+ try {
1135
+ const maybeError = JSON.parse(tc.argumentsJson);
1136
+ if (maybeError._error) {
1137
+ collectedResults.push({
1138
+ callId: tc.id, toolName: tc.name,
1139
+ content: maybeError._error, isError: true,
1140
+ });
1141
+ return;
1142
+ }
1143
+ }
1144
+ catch { /* not an error payload, continue */ }
589
1145
  const tool = this.toolRegistry.get(tc.name);
590
1146
  if (!tool) {
591
- this.conversation.addToolResult(tc.id, `Error: Unknown tool "${tc.name}"`);
1147
+ collectedResults.push({
1148
+ callId: tc.id, toolName: tc.name,
1149
+ content: `Unknown tool "${tc.name}"`, isError: true,
1150
+ });
592
1151
  return;
593
1152
  }
594
1153
  let args;
@@ -596,9 +1155,45 @@ export class AgentLoop {
596
1155
  args = JSON.parse(tc.argumentsJson);
597
1156
  }
598
1157
  catch {
599
- this.conversation.addToolResult(tc.id, `Error: Invalid JSON arguments for ${tc.name}`);
1158
+ collectedResults.push({
1159
+ callId: tc.id, toolName: tc.name,
1160
+ content: `Invalid JSON arguments for ${tc.name}`, isError: true,
1161
+ });
600
1162
  return;
601
1163
  }
1164
+ // ── Round-scoped cache for cacheable read-only tools ──
1165
+ const cacheable = !tool.modifiesFiles && !tool.requiresPermission && tool.showOutput !== true;
1166
+ const cacheKey = cacheable ? `${tc.name}:${JSON.stringify(args)}` : null;
1167
+ if (cacheKey) {
1168
+ const cached = roundCache.get(cacheKey);
1169
+ if (cached) {
1170
+ const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
1171
+ this.bus.emit("agent:tool-started", {
1172
+ title: tool.displayName ?? tc.name,
1173
+ toolCallId: tc.id,
1174
+ kind: display.kind, icon: display.icon, locations: display.locations, rawInput: args,
1175
+ displayDetail: tool.formatCall?.(args),
1176
+ batchIndex, batchTotal: batchTotal > 1 ? batchTotal : undefined,
1177
+ });
1178
+ this.bus.emit("agent:tool-call", { tool: tc.name, args });
1179
+ // Reconstruct a ToolResult for formatResult; ProtocolToolResult has no exitCode
1180
+ const cachedToolResult = { content: cached.content, exitCode: 0, isError: cached.isError };
1181
+ const resultDisplay = tool.formatResult?.(args, cachedToolResult);
1182
+ this.bus.emitTransform("agent:tool-completed", {
1183
+ toolCallId: tc.id, exitCode: 0,
1184
+ rawOutput: cached.content, kind: display.kind,
1185
+ resultDisplay,
1186
+ });
1187
+ this.bus.emit("agent:tool-output", {
1188
+ tool: tc.name, output: cached.content, exitCode: 0,
1189
+ });
1190
+ collectedResults.push({
1191
+ callId: tc.id, toolName: tc.name,
1192
+ content: cached.content, isError: cached.isError,
1193
+ });
1194
+ return;
1195
+ }
1196
+ }
602
1197
  // Execute via handler — extensions can advise to add safe-mode,
603
1198
  // logging, metrics, custom permission policies, etc.
604
1199
  const defaultOnChunk = (chunk) => {
@@ -606,11 +1201,8 @@ export class AgentLoop {
606
1201
  };
607
1202
  const result = await this.handlers.call("tool:execute", { name: tc.name, id: tc.id, args, tool, onChunk: defaultOnChunk,
608
1203
  batchIndex, batchTotal: batchTotal > 1 ? batchTotal : undefined });
609
- // Add tool result to conversation (truncate large outputs to avoid
610
- // blowing through the context window on a single tool call)
611
- let content = result.isError
612
- ? `Error: ${result.content}`
613
- : result.content;
1204
+ // Truncate large outputs to avoid blowing context
1205
+ let content = result.content;
614
1206
  const maxBytes = 16_384; // ~4k tokens
615
1207
  if (content.length > maxBytes) {
616
1208
  const headBytes = Math.floor(maxBytes * 0.6);
@@ -633,7 +1225,14 @@ export class AgentLoop {
633
1225
  ...lines.slice(tailStart),
634
1226
  ].join("\n");
635
1227
  }
636
- this.conversation.addToolResult(tc.id, content);
1228
+ const finalResult = {
1229
+ callId: tc.id, toolName: tc.name,
1230
+ content, isError: result.isError,
1231
+ };
1232
+ if (cacheKey) {
1233
+ roundCache.set(cacheKey, finalResult);
1234
+ }
1235
+ collectedResults.push(finalResult);
637
1236
  };
638
1237
  // Partition into parallel-safe (read-only) and sequential (needs permission)
639
1238
  const parallel = [];
@@ -661,11 +1260,163 @@ export class AgentLoop {
661
1260
  break;
662
1261
  await executeSingle(tc, ++batchIdx);
663
1262
  }
1263
+ // ── Consecutive error detection (metacognitive nudge) ──
1264
+ // Track errors per tool and total. When the same tool errors N times
1265
+ // in a row, nudge to read source. When errors cascade across tools,
1266
+ // nudge to step back and reassess approach.
1267
+ const errorTools = new Set();
1268
+ const successTools = new Set();
1269
+ const errorSummaries = new Map(); // tool → brief error description
1270
+ const successSummaries = new Map(); // tool → brief success description
1271
+ for (const r of collectedResults) {
1272
+ const content = typeof r.content === "string" ? r.content : String(r.content);
1273
+ const brief = content.slice(0, 80).replace(/\n/g, " ").trim();
1274
+ if (r.isError) {
1275
+ errorTools.add(r.toolName);
1276
+ errorSummaries.set(r.toolName, brief);
1277
+ }
1278
+ else {
1279
+ successTools.add(r.toolName);
1280
+ successSummaries.set(r.toolName, brief);
1281
+ }
1282
+ }
1283
+ const hadAnyError = errorTools.size > 0;
1284
+ const hadAnySuccess = successTools.size > 0;
1285
+ // ── Session telemetry accumulation ──
1286
+ // Track every tool call's outcome. Exposed via orthogonal handlers
1287
+ // (agent:get-counters, agent:get-tool-stats) for extensions that
1288
+ // want behavioral signals. The data layer for metacognition — you
1289
+ // can't improve what you don't measure.
1290
+ for (const r of collectedResults) {
1291
+ const counts = this.toolCallCounts.get(r.toolName) ?? { success: 0, error: 0 };
1292
+ if (r.isError) {
1293
+ counts.error++;
1294
+ this.totalToolErrors++;
1295
+ }
1296
+ else {
1297
+ counts.success++;
1298
+ }
1299
+ this.toolCallCounts.set(r.toolName, counts);
1300
+ this.totalToolCalls++;
1301
+ }
1302
+ this.totalLoopIterations++;
1303
+ // ── Resolution pattern tracking ──
1304
+ // When a tool errors, record the error context. When the same tool
1305
+ // (or a write tool touching the same file) succeeds afterward,
1306
+ // increment totalResolutions — the positive feedback signal exposed
1307
+ // to extensions via agent:get-counters.
1308
+ if (hadAnyError) {
1309
+ for (const [tool, summary] of errorSummaries) {
1310
+ this.lastErrorByTool.set(tool, summary);
1311
+ }
1312
+ for (const r of collectedResults) {
1313
+ if (!r.isError)
1314
+ continue;
1315
+ const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
1316
+ if (!tc)
1317
+ continue;
1318
+ try {
1319
+ const args = JSON.parse(tc.argumentsJson);
1320
+ const fp = this.filePathFromArgs(r.toolName, args);
1321
+ if (fp)
1322
+ this.lastErrorByFile.set(fp, errorSummaries.get(r.toolName) ?? "");
1323
+ }
1324
+ catch { }
1325
+ }
1326
+ }
1327
+ if (hadAnySuccess) {
1328
+ let resolved = false;
1329
+ for (const [tool] of successSummaries) {
1330
+ if (this.lastErrorByTool.get(tool)) {
1331
+ this.lastErrorByTool.delete(tool);
1332
+ this.totalResolutions++;
1333
+ resolved = true;
1334
+ break;
1335
+ }
1336
+ }
1337
+ if (!resolved) {
1338
+ for (const r of collectedResults) {
1339
+ if (r.isError)
1340
+ continue;
1341
+ const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
1342
+ if (!tc)
1343
+ continue;
1344
+ try {
1345
+ const args = JSON.parse(tc.argumentsJson);
1346
+ const fp = this.filePathFromArgs(r.toolName, args);
1347
+ if (fp && this.lastErrorByFile.get(fp)) {
1348
+ this.lastErrorByFile.delete(fp);
1349
+ this.totalResolutions++;
1350
+ break;
1351
+ }
1352
+ }
1353
+ catch { }
1354
+ }
1355
+ }
1356
+ // Clear resolved error-by-tool entries for successful tools
1357
+ for (const tool of successTools) {
1358
+ this.lastErrorByTool.delete(tool);
1359
+ }
1360
+ }
1361
+ // Announce the batch — extensions that care about batch-level
1362
+ // outcomes (consecutive-error tracking, resolution pattern logging,
1363
+ // metacognitive nudges) listen here.
1364
+ this.bus.emit("agent:tool-batch-complete", {
1365
+ results: collectedResults.map((r) => ({
1366
+ name: r.toolName,
1367
+ isError: !!r.isError,
1368
+ errorSummary: r.isError ? errorSummaries.get(r.toolName) : undefined,
1369
+ })),
1370
+ });
1371
+ // Record all tool results via protocol
1372
+ this.toolProtocol.recordResults(this.conversation, collectedResults);
1373
+ const tcMap = new Map();
1374
+ for (const tc of toolCalls) {
1375
+ if (tc.id)
1376
+ tcMap.set(tc.id, tc);
1377
+ }
1378
+ this.conversation.eagerNucleateTools(collectedResults.map((r) => {
1379
+ const tc = tcMap.get(r.callId);
1380
+ let args = {};
1381
+ try {
1382
+ args = tc ? JSON.parse(tc.argumentsJson) : {};
1383
+ }
1384
+ catch { }
1385
+ return { toolName: r.toolName, args, content: r.content, isError: !!r.isError };
1386
+ }));
1387
+ // Emit enriched message-appended events so derived-log extensions
1388
+ // can summarize each tool result without re-parsing the message
1389
+ // structure.
1390
+ for (const r of collectedResults) {
1391
+ const content = typeof r.content === "string" ? r.content : String(r.content);
1392
+ const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
1393
+ let args = {};
1394
+ try {
1395
+ args = tc ? JSON.parse(tc.argumentsJson) : {};
1396
+ }
1397
+ catch { }
1398
+ this.bus.emit("conversation:message-appended", {
1399
+ role: "tool",
1400
+ content,
1401
+ toolName: r.toolName,
1402
+ toolArgs: args,
1403
+ isError: !!r.isError,
1404
+ });
1405
+ }
664
1406
  // Loop back — LLM sees tool results
665
1407
  }
666
1408
  return fullResponseText;
667
1409
  }
668
1410
  maxRetries = 3;
1411
+ // ── Resolution pattern helpers ──
1412
+ // Extract a file path from a tool call's arguments. Used to correlate
1413
+ // errors with subsequent successful writes on the same file.
1414
+ filePathFromArgs(toolName, args) {
1415
+ if (toolName === "edit_file" || toolName === "write_file" || toolName === "read_file") {
1416
+ return (args.path ?? args.file_path);
1417
+ }
1418
+ return undefined;
1419
+ }
669
1420
  /**
670
1421
  * Stream with retry logic. Handles:
671
1422
  * - Context overflow → compact and retry
@@ -682,12 +1433,20 @@ export class AgentLoop {
682
1433
  throw e;
683
1434
  // Context overflow — aggressively compact and retry
684
1435
  if (this.isContextOverflow(e)) {
685
- // Use 60% of the budget to leave headroom
686
- const aggressiveBudget = Math.floor(this.tokenBudget.conversationBudgetTokens * 0.6);
687
- const stats = this.conversation.compact(aggressiveBudget, 6);
688
- await this.conversation.flush();
689
- const detail = stats ? ` ~${stats.before.toLocaleString()} ~${stats.after.toLocaleString()} tokens` : "";
690
- this.bus.emit("ui:info", { message: `(context overflow compacted${detail}, retrying)` });
1436
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
1437
+ const target = Math.floor((contextWindow - RESPONSE_RESERVE) * 0.6);
1438
+ const stats = this.compactWithHooks(target, 6);
1439
+ // If compaction freed nothing, retrying will hit the same error.
1440
+ // Surface the real failure instead of looping until exhaustion.
1441
+ if (!stats || stats.after >= stats.before) {
1442
+ this.bus.emit("ui:info", {
1443
+ message: "(context overflow — nothing to compact; aborting retries)",
1444
+ });
1445
+ throw e;
1446
+ }
1447
+ this.bus.emit("ui:info", {
1448
+ message: `(context overflow — compacted ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens, retrying)`,
1449
+ });
691
1450
  continue;
692
1451
  }
693
1452
  // Retryable transient error — backoff
@@ -726,9 +1485,21 @@ export class AgentLoop {
726
1485
  ];
727
1486
  // Let extensions transform the message array (compact, summarize, filter, etc.)
728
1487
  const messages = this.handlers.call("conversation:prepare", rawMessages);
1488
+ // Tool protocol controls what goes in the API tools param vs dynamic context
1489
+ const apiTools = this.toolProtocol.getApiTools(this.toolRegistry.all());
1490
+ const toolPrompt = this.toolProtocol.getToolPrompt(this.toolRegistry.all());
1491
+ // Append tool catalog to dynamic context (closer to user query = better followed)
1492
+ if (toolPrompt) {
1493
+ const ctxMsg = messages[1]; // dynamic context user message
1494
+ if (ctxMsg && typeof ctxMsg.content === "string") {
1495
+ ctxMsg.content += "\n" + toolPrompt;
1496
+ }
1497
+ }
1498
+ // Stream filter strips tool tags from display (inline mode only)
1499
+ const streamFilter = this.toolProtocol.createStreamFilter(this.toolRegistry.all().map((t) => t.name));
729
1500
  const stream = await this.llmClient.stream({
730
1501
  messages,
731
- tools: this.toolRegistry.toAPITools(),
1502
+ tools: apiTools,
732
1503
  model: this.currentModel,
733
1504
  reasoning_effort: this.shouldSendReasoningEffort() ? this.thinkingLevel : undefined,
734
1505
  signal,
@@ -736,6 +1507,20 @@ export class AgentLoop {
736
1507
  for await (const chunk of stream) {
737
1508
  if (signal.aborted)
738
1509
  break;
1510
+ // Token usage (may arrive in a chunk with empty choices)
1511
+ if (chunk.usage) {
1512
+ const u = chunk.usage;
1513
+ const promptTokens = u.prompt_tokens ?? 0;
1514
+ this.bus.emit("agent:usage", {
1515
+ prompt_tokens: promptTokens,
1516
+ completion_tokens: u.completion_tokens ?? 0,
1517
+ total_tokens: u.total_tokens ?? 0,
1518
+ });
1519
+ // Feed accurate token count back to conversation state
1520
+ if (promptTokens > 0) {
1521
+ this.conversation.updateApiTokenCount(promptTokens);
1522
+ }
1523
+ }
739
1524
  const choice = chunk.choices[0];
740
1525
  if (!choice)
741
1526
  continue;
@@ -743,9 +1528,15 @@ export class AgentLoop {
743
1528
  // Text content
744
1529
  if (delta?.content) {
745
1530
  text += delta.content;
746
- this.bus.emitTransform("agent:response-chunk", {
747
- blocks: [{ type: "text", text: delta.content }],
748
- });
1531
+ // Filter tool tags from display output (inline mode)
1532
+ const displayText = streamFilter
1533
+ ? streamFilter.feed(delta.content)
1534
+ : delta.content;
1535
+ if (displayText) {
1536
+ this.bus.emitTransform("agent:response-chunk", {
1537
+ blocks: [{ type: "text", text: displayText }],
1538
+ });
1539
+ }
749
1540
  }
750
1541
  // Reasoning/thinking tokens (non-standard, e.g. DeepSeek)
751
1542
  if (delta?.reasoning_content) {
@@ -770,28 +1561,38 @@ export class AgentLoop {
770
1561
  }
771
1562
  }
772
1563
  }
773
- // Token usage (final chunk from providers that support it)
774
- if (chunk.usage) {
775
- const u = chunk.usage;
776
- this.bus.emit("agent:usage", {
777
- prompt_tokens: u.prompt_tokens ?? 0,
778
- completion_tokens: u.completion_tokens ?? 0,
779
- total_tokens: u.total_tokens ?? 0,
1564
+ }
1565
+ // Flush any buffered content from the stream filter
1566
+ if (streamFilter) {
1567
+ const remaining = streamFilter.flush();
1568
+ if (remaining) {
1569
+ this.bus.emitTransform("agent:response-chunk", {
1570
+ blocks: [{ type: "text", text: remaining }],
780
1571
  });
781
1572
  }
782
1573
  }
783
- // Build assistant tool calls for conversation recording
784
- const assistantToolCalls = pendingToolCalls.length
785
- ? pendingToolCalls.map((tc) => ({
786
- id: tc.id,
787
- function: { name: tc.name, arguments: tc.argumentsJson },
788
- }))
789
- : undefined;
1574
+ // Normalize arguments JSON some providers (Alibaba/qwen) strictly
1575
+ // validate `function.arguments` as parseable JSON on the NEXT turn,
1576
+ // and reject empty strings or partial chunks. OpenAI itself is lenient,
1577
+ // so empty "" slips through locally but the replay breaks upstream.
1578
+ for (const tc of pendingToolCalls) {
1579
+ if (!tc)
1580
+ continue;
1581
+ const s = tc.argumentsJson.trim();
1582
+ if (s === "") {
1583
+ tc.argumentsJson = "{}";
1584
+ continue;
1585
+ }
1586
+ try {
1587
+ JSON.parse(s);
1588
+ }
1589
+ catch {
1590
+ tc.argumentsJson = "{}";
1591
+ }
1592
+ }
790
1593
  return {
791
1594
  text,
792
1595
  toolCalls: pendingToolCalls,
793
- assistantContent: text || null,
794
- assistantToolCalls,
795
1596
  };
796
1597
  }
797
1598
  }