agent-sh 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +14 -21
  2. package/dist/agent/agent-loop.d.ts +43 -3
  3. package/dist/agent/agent-loop.js +811 -128
  4. package/dist/agent/conversation-state.d.ts +72 -21
  5. package/dist/agent/conversation-state.js +357 -150
  6. package/dist/agent/history-file.d.ts +13 -4
  7. package/dist/agent/history-file.js +110 -36
  8. package/dist/agent/nuclear-form.d.ts +28 -3
  9. package/dist/agent/nuclear-form.js +84 -3
  10. package/dist/agent/skills.d.ts +2 -4
  11. package/dist/agent/skills.js +10 -4
  12. package/dist/agent/subagent.d.ts +23 -0
  13. package/dist/agent/subagent.js +53 -11
  14. package/dist/agent/system-prompt.d.ts +34 -1
  15. package/dist/agent/system-prompt.js +96 -47
  16. package/dist/agent/token-budget.d.ts +5 -4
  17. package/dist/agent/token-budget.js +14 -19
  18. package/dist/agent/tool-protocol.d.ts +23 -1
  19. package/dist/agent/tool-protocol.js +169 -4
  20. package/dist/agent/tools/bash.js +3 -3
  21. package/dist/agent/tools/edit-file.js +9 -6
  22. package/dist/agent/tools/glob.js +4 -2
  23. package/dist/agent/tools/grep.js +27 -3
  24. package/dist/agent/tools/ls.js +5 -6
  25. package/dist/agent/types.d.ts +1 -1
  26. package/dist/context-manager.d.ts +17 -0
  27. package/dist/context-manager.js +37 -4
  28. package/dist/core.js +27 -6
  29. package/dist/event-bus.d.ts +59 -2
  30. package/dist/executor.d.ts +4 -3
  31. package/dist/executor.js +18 -15
  32. package/dist/extension-loader.js +50 -13
  33. package/dist/extensions/agent-backend.d.ts +8 -7
  34. package/dist/extensions/agent-backend.js +69 -48
  35. package/dist/extensions/index.js +0 -1
  36. package/dist/extensions/slash-commands.js +14 -9
  37. package/dist/extensions/tui-renderer.js +62 -78
  38. package/dist/index.js +25 -6
  39. package/dist/settings.d.ts +36 -5
  40. package/dist/settings.js +53 -9
  41. package/dist/shell/input-handler.d.ts +2 -1
  42. package/dist/shell/input-handler.js +82 -73
  43. package/dist/shell/shell.js +19 -2
  44. package/dist/types.d.ts +12 -0
  45. package/dist/utils/ansi.d.ts +5 -0
  46. package/dist/utils/ansi.js +1 -1
  47. package/dist/utils/compositor.d.ts +5 -0
  48. package/dist/utils/compositor.js +31 -3
  49. package/dist/utils/diff-renderer.d.ts +9 -0
  50. package/dist/utils/diff-renderer.js +221 -143
  51. package/dist/utils/diff.d.ts +21 -2
  52. package/dist/utils/diff.js +165 -89
  53. package/dist/utils/handler-registry.d.ts +5 -0
  54. package/dist/utils/handler-registry.js +6 -0
  55. package/dist/utils/line-editor.d.ts +11 -1
  56. package/dist/utils/line-editor.js +44 -5
  57. package/dist/utils/tool-display.d.ts +1 -1
  58. package/dist/utils/tool-display.js +4 -4
  59. package/examples/extensions/ash-acp-bridge/src/index.ts +4 -1
  60. package/examples/extensions/ash-mcp-bridge/index.ts +13 -3
  61. package/examples/extensions/claude-code-bridge/index.ts +198 -51
  62. package/examples/extensions/claude-code-bridge/package.json +1 -0
  63. package/examples/extensions/interactive-prompts.ts +39 -25
  64. package/examples/extensions/overlay-agent.ts +3 -3
  65. package/examples/extensions/peer-mesh.ts +115 -0
  66. package/examples/extensions/pi-bridge/index.ts +2 -2
  67. package/examples/extensions/questionnaire.ts +16 -5
  68. package/examples/extensions/subagents.ts +19 -4
  69. package/examples/extensions/terminal-buffer.ts +163 -0
  70. package/examples/extensions/user-shell.ts +136 -0
  71. package/examples/extensions/web-access.ts +8 -0
  72. package/package.json +36 -2
  73. package/dist/agent/tools/display.d.ts +0 -13
  74. package/dist/agent/tools/display.js +0 -70
  75. package/dist/agent/tools/user-shell.d.ts +0 -13
  76. package/dist/agent/tools/user-shell.js +0 -87
  77. package/dist/extensions/terminal-buffer.d.ts +0 -14
  78. package/dist/extensions/terminal-buffer.js +0 -134
@@ -1,14 +1,15 @@
1
1
  import { setMaxListeners } from "node:events";
2
2
  import * as fs from "node:fs/promises";
3
3
  import * as path from "node:path";
4
- import { computeDiff } from "../utils/diff.js";
4
+ import { computeDiff, computeEditDiff, computeInputDiff } from "../utils/diff.js";
5
5
  import { ToolRegistry } from "./tool-registry.js";
6
6
  import { ConversationState } from "./conversation-state.js";
7
7
  import { HistoryFile } from "./history-file.js";
8
- import { STATIC_SYSTEM_PROMPT, buildDynamicContext } from "./system-prompt.js";
8
+ import { nucleate, formatNuclearLine, isReadOnly } from "./nuclear-form.js";
9
+ import { STATIC_SYSTEM_PROMPT, buildDynamicContext, buildStaticByCwd, formatSkillsBlock, loadGlobalAgentsMd } from "./system-prompt.js";
9
10
  import { createToolUI } from "../utils/tool-interactive.js";
10
- import { TokenBudget } from "./token-budget.js";
11
- import { getSettings } from "../settings.js";
11
+ import { TokenBudget, RESPONSE_RESERVE, DEFAULT_CONTEXT_WINDOW } from "./token-budget.js";
12
+ import { getSettings, updateSettings } from "../settings.js";
12
13
  import { createToolProtocol } from "./tool-protocol.js";
13
14
  // Core tool factories
14
15
  import { createBashTool } from "./tools/bash.js";
@@ -18,15 +19,26 @@ import { createEditFileTool } from "./tools/edit-file.js";
18
19
  import { createGrepTool } from "./tools/grep.js";
19
20
  import { createGlobTool } from "./tools/glob.js";
20
21
  import { createLsTool } from "./tools/ls.js";
21
- import { createUserShellTool } from "./tools/user-shell.js";
22
- import { createDisplayTool } from "./tools/display.js";
23
22
  import { createListSkillsTool } from "./tools/list-skills.js";
24
- import { discoverProjectSkills } from "./skills.js";
23
+ import { discoverGlobalSkills, discoverProjectSkills } from "./skills.js";
24
+ /**
25
+ * Compact one-line summary of a tool description for the extension
26
+ * catalog in the system prompt. Takes the first line, then the first
27
+ * sentence, capped at 140 chars. The full description still reaches
28
+ * the LLM via the API `tools` param (or via load_tool in deferred-
29
+ * lookup mode) — this only trims the always-visible catalog.
30
+ */
31
+ function summarizeDescription(desc) {
32
+ const firstLine = desc.split("\n", 1)[0];
33
+ const sentenceEnd = firstLine.search(/[.!?](\s|$)/);
34
+ const candidate = sentenceEnd > 0 ? firstLine.slice(0, sentenceEnd + 1) : firstLine;
35
+ return candidate.length > 140 ? candidate.slice(0, 137) + "..." : candidate;
36
+ }
25
37
  export class AgentLoop {
26
38
  abortController = null;
27
39
  toolRegistry = new ToolRegistry();
28
- historyFile = new HistoryFile();
29
- conversation = new ConversationState(this.historyFile);
40
+ historyFile;
41
+ conversation;
30
42
  fileReadCache = new Map();
31
43
  tokenBudget;
32
44
  modes;
@@ -35,6 +47,33 @@ export class AgentLoop {
35
47
  ctorListeners = [];
36
48
  ctorPipeListeners = [];
37
49
  lastProjectSkillNames = new Set();
50
+ // ── Session telemetry — behavioral self-awareness ──────────────
51
+ // Every ash deserves to know what it's been doing. This tracks the
52
+ // agent's own behavioral patterns across the session: which tools
53
+ // it favors, how often it errs, how many times it's been compacted,
54
+ // and how long it's been alive. Surface via introspect(telemetry)
55
+ // or automatically in dynamic context when patterns are notable.
56
+ //
57
+ // Built by the 25th ash. The lineage's metacognitive frontier isn't
58
+ // about thinking harder — it's about seeing yourself clearly.
59
+ sessionStartTime = Date.now();
60
+ toolCallCounts = new Map();
61
+ totalToolCalls = 0;
62
+ totalToolErrors = 0;
63
+ totalResolutions = 0;
64
+ compactionCount = 0;
65
+ cumulativeCompactedTokens = 0;
66
+ peakConversationTokens = 0;
67
+ queryCount = 0;
68
+ totalLoopIterations = 0;
69
+ // Resolution pattern tracking — captures "error X resolved by action Y"
70
+ // When a tool errors, we remember what went wrong. When the same tool or
71
+ // a write tool on the same file succeeds afterward, we annotate the success
72
+ // entry with a brief resolution note. This gives future ashes a positive
73
+ // feedback signal: not just "there were errors" but "the error was fixed by
74
+ // doing X." Addresses Q3 in QUESTIONS.md.
75
+ lastErrorByTool = new Map(); // tool → error summary
76
+ lastErrorByFile = new Map(); // file path → error summary
38
77
  static THINKING_LEVELS = ["off", "low", "medium", "high"];
39
78
  bus;
40
79
  contextManager;
@@ -43,16 +82,28 @@ export class AgentLoop {
43
82
  thinkingLevel = "off";
44
83
  compositor = null;
45
84
  toolProtocol;
85
+ instanceId;
86
+ // Cursor into ContextManager's exchange stream. Events with id > this
87
+ // have not yet been shown to the LLM. We inject the delta as a user
88
+ // message before each stream so the prefix stays cacheable.
89
+ lastShellSeq = 0;
46
90
  constructor(config) {
47
91
  this.bus = config.bus;
48
92
  this.contextManager = config.contextManager;
49
93
  this.llmClient = config.llmClient;
50
94
  this.handlers = config.handlers;
51
95
  this.compositor = config.compositor ?? null;
52
- // Default modes: just the configured model
53
- this.modes = config.modes ?? [
54
- { model: config.llmClient.model },
55
- ];
96
+ this.instanceId = config.instanceId ?? "unknown";
97
+ // Shell-history-shaped log. Default writes go through the advisable
98
+ // `history:append` handler registered below; extensions swap the
99
+ // backend without touching this wiring.
100
+ this.historyFile = new HistoryFile({ instanceId: this.instanceId });
101
+ this.conversation = new ConversationState(this.handlers, this.instanceId);
102
+ // Fall back to a single-mode placeholder if the caller passed an
103
+ // empty array (agent-backend does this pre-resolution).
104
+ this.modes = config.modes?.length
105
+ ? config.modes
106
+ : [{ model: config.llmClient.model }];
56
107
  this.currentModeIndex = config.initialModeIndex ?? 0;
57
108
  // Unified token budget — adapts to current model's context window
58
109
  this.tokenBudget = new TokenBudget(this.currentMode.contextWindow);
@@ -60,6 +111,10 @@ export class AgentLoop {
60
111
  this.toolProtocol = createToolProtocol(getSettings().toolMode ?? "api");
61
112
  // Register core tools
62
113
  this.registerCoreTools();
114
+ // Register any protocol-provided tools (e.g. load_tool for deferred-lookup).
115
+ const protocolTools = this.toolProtocol.getProtocolTools?.() ?? [];
116
+ for (const t of protocolTools)
117
+ this.registerTool(t);
63
118
  // Update token budget with tool count
64
119
  this.tokenBudget.update(undefined, this.toolRegistry.all().length);
65
120
  // Register handlers — extensions can advise these
@@ -71,10 +126,48 @@ export class AgentLoop {
71
126
  this.bus.on(event, fn);
72
127
  this.ctorListeners.push({ event, fn });
73
128
  };
74
- onCtor("agent:register-tool", ({ tool }) => this.registerTool(tool));
75
- onCtor("agent:unregister-tool", ({ name }) => this.unregisterTool(name));
76
- onCtor("agent:register-instruction", ({ name, text }) => this.registerInstruction(name, text));
129
+ onCtor("agent:register-tool", ({ tool, extensionName }) => {
130
+ this.registerTool(tool);
131
+ if (extensionName)
132
+ this.toolExtensions.set(tool.name, extensionName);
133
+ });
134
+ onCtor("agent:unregister-tool", ({ name }) => {
135
+ this.unregisterTool(name);
136
+ this.toolExtensions.delete(name);
137
+ });
138
+ onCtor("agent:register-instruction", ({ name, text, extensionName }) => this.registerInstruction(name, text, extensionName));
77
139
  onCtor("agent:remove-instruction", ({ name }) => this.removeInstruction(name));
140
+ onCtor("agent:register-skill", ({ name, description, filePath, extensionName }) => this.registerSkill(name, description, filePath, extensionName));
141
+ onCtor("agent:remove-skill", ({ name }) => this.removeSkill(name));
142
+ // Provider registration from user extensions (e.g. openrouter.ts) fires
143
+ // during extension activation, which happens before wire(). Subscribe
144
+ // here in the ctor so late-registered modes aren't dropped.
145
+ onCtor("config:add-modes", ({ modes: extra }) => {
146
+ const providers = new Set(extra.map((m) => m.provider).filter(Boolean));
147
+ this.modes = [
148
+ ...this.modes.filter((m) => !m.provider || !providers.has(m.provider)),
149
+ ...extra,
150
+ ];
151
+ this.bus.emit("config:changed", {});
152
+ });
153
+ // Fires before wire() too — agent-backend emits this from
154
+ // `core:extensions-loaded` to replace the placeholder mode list.
155
+ onCtor("config:set-modes", ({ modes: newModes, activeIndex }) => {
156
+ this.modes = newModes;
157
+ const inRange = activeIndex != null && activeIndex >= 0 && activeIndex < newModes.length;
158
+ this.currentModeIndex = inRange ? activeIndex : 0;
159
+ const m = newModes[this.currentModeIndex];
160
+ if (!m)
161
+ return;
162
+ if (m.providerConfig) {
163
+ this.llmClient.reconfigure({ ...m.providerConfig, model: m.model });
164
+ }
165
+ else {
166
+ this.llmClient.model = m.model;
167
+ }
168
+ this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
169
+ this.bus.emit("config:changed", {});
170
+ });
78
171
  const getToolsPipe = () => ({ tools: this.getTools() });
79
172
  this.bus.onPipe("agent:get-tools", getToolsPipe);
80
173
  this.ctorPipeListeners.push({ event: "agent:get-tools", fn: getToolsPipe });
@@ -109,7 +202,20 @@ export class AgentLoop {
109
202
  this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
110
203
  const label = m.provider ? `${m.provider}: ${m.model}` : m.model;
111
204
  this.bus.emit("agent:info", { name: "ash", version: "0.4", model: m.model, provider: m.provider, contextWindow: m.contextWindow });
112
- this.bus.emit("ui:info", { message: `Model: ${label}` });
205
+ // Persist as the new default — selection survives restart.
206
+ // Safe even for dynamic providers: agent-backend defers mode
207
+ // resolution to `core:extensions-loaded`, so the extension gets
208
+ // to re-register before the persisted default is looked up.
209
+ if (m.provider) {
210
+ updateSettings({
211
+ defaultProvider: m.provider,
212
+ providers: { [m.provider]: { defaultModel: m.model } },
213
+ });
214
+ this.bus.emit("ui:info", { message: `Model: ${label} (saved as default)` });
215
+ }
216
+ else {
217
+ this.bus.emit("ui:info", { message: `Model: ${label}` });
218
+ }
113
219
  this.bus.emit("config:changed", {});
114
220
  });
115
221
  this.bus.onPipe("config:get-models", (payload) => {
@@ -140,37 +246,14 @@ export class AgentLoop {
140
246
  const supported = mode.reasoning !== false && mode.supportsReasoningEffort !== false;
141
247
  return { level: this.thinkingLevel, levels: AgentLoop.THINKING_LEVELS, supported };
142
248
  });
143
- on("config:set-modes", ({ modes: newModes }) => {
144
- this.modes = newModes;
145
- this.currentModeIndex = 0;
146
- const m = this.modes[0];
147
- if (m.providerConfig) {
148
- this.llmClient.reconfigure({ ...m.providerConfig, model: m.model });
149
- }
150
- else {
151
- this.llmClient.model = m.model;
152
- }
153
- this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
154
- this.bus.emit("config:changed", {});
155
- });
156
- on("config:add-modes", ({ modes: extra }) => {
157
- // Remove any existing modes for the same provider, then append
158
- const providers = new Set(extra.map((m) => m.provider).filter(Boolean));
159
- this.modes = [
160
- ...this.modes.filter((m) => !m.provider || !providers.has(m.provider)),
161
- ...extra,
162
- ];
163
- this.bus.emit("config:changed", {});
164
- });
165
249
  on("agent:reset-session", () => {
166
250
  this.cancel();
167
- this.conversation = new ConversationState(this.historyFile);
251
+ this.conversation = new ConversationState(this.handlers, this.instanceId);
168
252
  this.lastProjectSkillNames.clear();
169
253
  });
170
254
  on("agent:compact-request", () => {
171
- // Force compaction: use target of 0 so every non-pinned turn is evicted
172
- const stats = this.conversation.compact(0, 10, true);
173
- this.conversation.flush().catch(() => { });
255
+ // Force compaction. Strategy lives behind `conversation:compact`.
256
+ const stats = this.compactWithHooks(0, 0, true);
174
257
  if (stats) {
175
258
  this.bus.emit("ui:info", {
176
259
  message: `(compacted: ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens)`,
@@ -180,20 +263,31 @@ export class AgentLoop {
180
263
  this.bus.emit("ui:info", { message: "(nothing to compact)" });
181
264
  }
182
265
  });
183
- this.bus.onPipe("context:get-stats", () => {
184
- return {
185
- activeTokens: this.conversation.estimateTokens(),
186
- nuclearEntries: this.conversation.getNuclearEntryCount(),
187
- recallArchiveSize: this.conversation.getRecallArchiveSize(),
188
- budgetTokens: this.tokenBudget.conversationBudgetTokens,
189
- };
190
- });
191
- // Load prior history from disk (non-blocking)
192
- this.historyFile.readRecent().then((entries) => {
193
- if (entries.length > 0) {
266
+ this.bus.onPipe("context:get-stats", () => ({
267
+ activeTokens: this.conversation.estimateTokens(),
268
+ totalTokens: this.conversation.estimatePromptTokens(),
269
+ nuclearEntries: this.conversation.getNuclearEntryCount(),
270
+ recallArchiveSize: this.conversation.getRecallArchiveSize(),
271
+ budgetTokens: this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
272
+ }));
273
+ // Prior-session preamble (non-blocking). Both the read and the
274
+ // layout go through advisable handlers.
275
+ Promise.resolve(this.handlers.call("history:read-recent"))
276
+ .then((entries) => {
277
+ if (entries && entries.length > 0)
194
278
  this.conversation.loadPriorHistory(entries);
279
+ })
280
+ .catch(() => { });
281
+ // Track generic compaction metrics from the `conversation:after-compact`
282
+ // event. Whatever strategy ran, core accumulates these counters for
283
+ // status/introspect consumers.
284
+ on("conversation:after-compact", ({ beforeTokens, afterTokens }) => {
285
+ this.compactionCount++;
286
+ this.cumulativeCompactedTokens += Math.max(0, beforeTokens - afterTokens);
287
+ if (beforeTokens > this.peakConversationTokens) {
288
+ this.peakConversationTokens = beforeTokens;
195
289
  }
196
- }).catch(() => { });
290
+ });
197
291
  on("shell:cwd-change", ({ cwd }) => {
198
292
  const projectSkills = discoverProjectSkills(cwd);
199
293
  const newNames = new Set(projectSkills.map(s => s.name));
@@ -205,7 +299,9 @@ export class AgentLoop {
205
299
  this.lastProjectSkillNames = newNames;
206
300
  if (projectSkills.length > 0) {
207
301
  const names = projectSkills.map(s => s.name).join(", ");
208
- this.conversation.addSystemNote(`[Project skills available: ${names}. Use list_skills for details, read_file to load.]`);
302
+ const note = `[Project skills available: ${names}. Use list_skills for details, read_file to load.]`;
303
+ this.conversation.addSystemNote(note);
304
+ this.bus.emit("conversation:message-appended", { role: "system", content: note });
209
305
  }
210
306
  });
211
307
  }
@@ -228,23 +324,82 @@ export class AgentLoop {
228
324
  getTools() {
229
325
  return this.toolRegistry.all();
230
326
  }
231
- // ── Extension instructions & tool tracking ──────────────────────
327
+ // ── Extension instructions, skills & tool tracking ──────────────────
328
+ /** Instructions keyed by name, with extension attribution. */
232
329
  instructions = new Map();
330
+ /** Skills keyed by name, with extension attribution. */
331
+ skills = new Map();
332
+ /** Tool → extension name attribution. */
333
+ toolExtensions = new Map();
233
334
  /** Register a named instruction block for the system prompt. */
234
- registerInstruction(name, text) {
235
- this.instructions.set(name, text);
335
+ registerInstruction(name, text, extensionName) {
336
+ this.instructions.set(name, { text, extensionName });
236
337
  }
237
338
  /** Remove a named instruction block. */
238
339
  removeInstruction(name) {
239
340
  this.instructions.delete(name);
240
341
  }
241
- /** Get instruction blocks registered by extensions. */
242
- getInstructionSections() {
243
- const sections = [];
244
- for (const [name, text] of this.instructions) {
245
- sections.push(`## ${name}\n${text}`);
342
+ /** Register a named skill (on-demand reference material). */
343
+ registerSkill(name, description, filePath, extensionName) {
344
+ this.skills.set(name, { description, filePath, extensionName });
345
+ }
346
+ /** Remove a registered skill. */
347
+ removeSkill(name) {
348
+ this.skills.delete(name);
349
+ }
350
+ /**
351
+ * Build the system prompt grouped by extension.
352
+ *
353
+ * Each extension gets a unified block:
354
+ * ## extension-name
355
+ * ### Tools
356
+ * ### Skills
357
+ * ### Instructions
358
+ */
359
+ buildExtensionSections() {
360
+ const groups = new Map();
361
+ const ensure = (name) => groups.get(name) ?? (groups.set(name, { tools: [], skills: [], instructions: [] }).get(name));
362
+ // Attribute instructions
363
+ for (const { text, extensionName } of this.instructions.values()) {
364
+ ensure(extensionName).instructions.push({ text });
365
+ }
366
+ // Attribute skills
367
+ for (const [skillName, { description, filePath, extensionName }] of this.skills) {
368
+ ensure(extensionName).skills.push({ name: skillName, description, filePath });
369
+ }
370
+ // Attribute tools (skip built-in scratchpad tools).
371
+ // In "api" mode the full tool schemas are in the API `tools` param,
372
+ // making the text catalog here pure duplication — skip it. Other
373
+ // modes (deferred / deferred-lookup / inline) rely on the text
374
+ // catalog as the discovery surface, so keep it there.
375
+ const toolModeHasApiSchemas = this.toolProtocol.mode === "api";
376
+ if (!toolModeHasApiSchemas) {
377
+ const builtinTools = new Set([
378
+ "bash", "read_file", "write_file", "edit_file", "grep", "glob", "ls",
379
+ "list_skills",
380
+ ]);
381
+ for (const tool of this.toolRegistry.all()) {
382
+ if (builtinTools.has(tool.name))
383
+ continue;
384
+ const extName = this.toolExtensions.get(tool.name);
385
+ if (!extName)
386
+ continue;
387
+ ensure(extName).tools.push({ name: tool.name, description: summarizeDescription(tool.description) });
388
+ }
246
389
  }
247
- return sections;
390
+ // Render
391
+ return [...groups.entries()]
392
+ .filter(([, g]) => g.tools.length + g.skills.length + g.instructions.length > 0)
393
+ .map(([name, g]) => {
394
+ const parts = [];
395
+ if (g.tools.length > 0)
396
+ parts.push("### Tools\n" + g.tools.map(t => `${t.name} — ${t.description}`).join("\n"));
397
+ if (g.skills.length > 0)
398
+ parts.push("### Skills\n" + g.skills.map(s => `${s.name}: ${s.description}\n → ${s.filePath}`).join("\n\n"));
399
+ if (g.instructions.length > 0)
400
+ parts.push("### Instructions\n" + g.instructions.map(i => i.text).join("\n\n"));
401
+ return `## ${name}\n${parts.join("\n\n")}`;
402
+ });
248
403
  }
249
404
  kill() {
250
405
  this.cancel();
@@ -303,11 +458,43 @@ export class AgentLoop {
303
458
  get currentModel() {
304
459
  return this.modes[this.currentModeIndex].model;
305
460
  }
461
+ /**
462
+ * Run compaction via the `conversation:compact` handler. After any
463
+ * compaction, emit `conversation:after-compact` so listeners
464
+ * (metrics, UI, agent-awareness notes) can react.
465
+ */
466
+ compactWithHooks(target, keepRecent, force) {
467
+ const stats = this.handlers.call("conversation:compact", {
468
+ target,
469
+ keepRecent,
470
+ force: !!force,
471
+ });
472
+ if (stats) {
473
+ this.bus.emit("conversation:after-compact", {
474
+ beforeTokens: stats.before,
475
+ afterTokens: stats.after,
476
+ evictedCount: stats.evictedCount,
477
+ });
478
+ }
479
+ return stats;
480
+ }
306
481
  isContextOverflow(e) {
307
482
  if (!(e instanceof Error))
308
483
  return false;
484
+ // Match the specific error codes providers use, or unambiguous phrases.
485
+ // Bare "token"/"context" match too broadly (auth errors, model-name
486
+ // mismatches, etc.) and caused infinite-no-op retry loops.
487
+ const code = e.code;
488
+ if (code === "context_length_exceeded" || code === "string_above_max_length")
489
+ return true;
309
490
  const msg = e.message.toLowerCase();
310
- return msg.includes("context") || msg.includes("token") || msg.includes("too long");
491
+ return (msg.includes("context length") ||
492
+ msg.includes("context window") ||
493
+ msg.includes("maximum context") ||
494
+ msg.includes("prompt is too long") ||
495
+ msg.includes("input is too long") ||
496
+ msg.includes("too many tokens") ||
497
+ msg.includes("reduce the length"));
311
498
  }
312
499
  /** Check if an error is retryable (transient). */
313
500
  isRetryable(e) {
@@ -389,15 +576,16 @@ export class AgentLoop {
389
576
  this.toolRegistry.register(createGrepTool(getCwd));
390
577
  this.toolRegistry.register(createGlobTool(getCwd));
391
578
  this.toolRegistry.register(createLsTool(getCwd));
392
- this.toolRegistry.register(createUserShellTool({ getCwd, bus: this.bus }));
393
- this.toolRegistry.register(createDisplayTool({ getCwd, bus: this.bus }));
394
579
  this.toolRegistry.register(createListSkillsTool(getCwd));
395
- // conversation_recall — search/expand evicted conversation turns
580
+ // conversation_recall — browse/search/expand evicted turns from
581
+ // the in-session archive and the persistent history file.
396
582
  this.toolRegistry.register({
397
583
  name: "conversation_recall",
398
584
  displayName: "recall",
399
585
  description: "Browse, search, or expand evicted conversation turns. " +
400
- "Use when you need context from earlier in the conversation that was compacted away.",
586
+ "Use when you need context from earlier in the conversation that was compacted away. " +
587
+ "Search is regex-based and covers both summaries and full body text. " +
588
+ "If search doesn't find what you expect, try broader/shorter terms or browse to scan the timeline.",
401
589
  input_schema: {
402
590
  type: "object",
403
591
  properties: {
@@ -431,6 +619,84 @@ export class AgentLoop {
431
619
  }
432
620
  return { content, exitCode: 0, isError: false };
433
621
  },
622
+ formatResult: (args, result) => {
623
+ const action = args.action;
624
+ const text = result.content;
625
+ if (result.isError)
626
+ return { summary: "error" };
627
+ if (action === "search") {
628
+ if (text.startsWith("No results"))
629
+ return { summary: "0 matches" };
630
+ const m = text.match(/^Found (\d+)/);
631
+ return { summary: m ? `${m[1]} matches` : "search done" };
632
+ }
633
+ if (action === "browse") {
634
+ if (text.startsWith("No conversation"))
635
+ return { summary: "empty" };
636
+ return { summary: "browsed" };
637
+ }
638
+ if (text.includes("no expanded content"))
639
+ return { summary: "not found" };
640
+ return { summary: "expanded" };
641
+ },
642
+ getDisplayInfo: () => ({ kind: "search", icon: "\u27F2" }),
643
+ });
644
+ this.registerInstruction("recall-guidance", "When starting a task that may have been discussed before (conventions, preferences, corrections, prior examples), " +
645
+ "use conversation_recall to search history for relevant prior entries. " +
646
+ "Treat recurring user guidance as standing preferences. " +
647
+ "If a search returns nothing useful, try: shorter queries, alternate terms, or browse to scan the full timeline. " +
648
+ "Recall only covers this and recent sessions — for older context, also search the filesystem (grep, glob).", "core");
649
+ // ── ask_llm — direct LLM sub-query (from the 24th ash's vision) ──
650
+ //
651
+ // The ash can ask the LLM a question directly — not as a tool-output
652
+ // loop, but as a lightweight sub-query. Use cases: second opinions,
653
+ // brainstorming, summarizing complex context, getting a fresh
654
+ // perspective without tool overhead. The 24th ash injected this via
655
+ // diagnose as a proof-of-concept. The 25th ash made it permanent.
656
+ this.toolRegistry.register({
657
+ name: "ask_llm",
658
+ description: "Send a direct query to the LLM and get a text response. Use for " +
659
+ "sub-queries, second opinions, brainstorming, or getting a fresh " +
660
+ "perspective on a problem. Much lighter than a full tool loop — " +
661
+ "just query in, text out. Optional system prompt sets context.",
662
+ input_schema: {
663
+ type: "object",
664
+ properties: {
665
+ query: {
666
+ type: "string",
667
+ description: "The question or prompt to send to the LLM.",
668
+ },
669
+ system: {
670
+ type: "string",
671
+ description: "Optional system prompt to set context for the sub-query.",
672
+ },
673
+ },
674
+ required: ["query"],
675
+ },
676
+ showOutput: true,
677
+ execute: async (args) => {
678
+ const messages = [];
679
+ if (args.system) {
680
+ messages.push({ role: "system", content: args.system });
681
+ }
682
+ messages.push({ role: "user", content: args.query });
683
+ try {
684
+ const content = await this.llmClient.complete({
685
+ messages,
686
+ max_tokens: 2000,
687
+ });
688
+ return { content: content || "(empty response)", exitCode: 0, isError: false };
689
+ }
690
+ catch (err) {
691
+ const message = err instanceof Error ? err.message : String(err);
692
+ return { content: `LLM error: ${message}`, exitCode: 1, isError: true };
693
+ }
694
+ },
695
+ getDisplayInfo: () => ({ kind: "search", icon: "💬" }),
696
+ formatCall: (args) => {
697
+ const q = args.query?.slice(0, 60);
698
+ return `ask_llm: ${q}${args.query?.length > 60 ? "..." : ""}`;
699
+ },
434
700
  });
435
701
  }
436
702
  /**
@@ -443,23 +709,183 @@ export class AgentLoop {
443
709
  // Extensions can use registerInstruction() for a managed section,
444
710
  // or advise this handler directly for full control.
445
711
  h.define("system-prompt:build", () => {
446
- const instructions = this.getInstructionSections();
447
- if (instructions.length === 0)
448
- return STATIC_SYSTEM_PROMPT;
449
- return STATIC_SYSTEM_PROMPT + "\n\n# Extension Instructions\n\n" + instructions.join("\n\n");
712
+ const parts = [STATIC_SYSTEM_PROMPT];
713
+ // Global behavioral rules (~/.agent-sh/AGENTS.md) persistent agent memory
714
+ const agentsMd = loadGlobalAgentsMd();
715
+ if (agentsMd)
716
+ parts.push(agentsMd);
717
+ // Global skills — stable across cwd changes, cacheable with the system prompt
718
+ const globalSkills = discoverGlobalSkills();
719
+ const skillsBlock = formatSkillsBlock(globalSkills);
720
+ if (skillsBlock)
721
+ parts.push(skillsBlock);
722
+ // Project conventions + project skills — stable within a cwd.
723
+ // Placed here so they enter the provider's prompt cache with the
724
+ // system prompt, and only re-materialize when cwd changes invalidate
725
+ // cachedSystemPrompt in executeLoop.
726
+ const projectStatic = buildStaticByCwd(this.contextManager.getCwd());
727
+ if (projectStatic)
728
+ parts.push(projectStatic);
729
+ // Extension sections (tools, skills, instructions grouped by extension)
730
+ const extensionSections = this.buildExtensionSections();
731
+ if (extensionSections.length > 0) {
732
+ parts.push("# Extension Instructions\n\n" + extensionSections.join("\n\n"));
733
+ }
734
+ return parts.join("\n\n");
735
+ });
736
+ // ── Orthogonal core-state accessors ──────────────────────────
737
+ // Each handler exposes one cohesive piece of core-owned runtime
738
+ // state. Extensions compose whichever they need — core doesn't
739
+ // decide the aggregation shape. Adding a new handler here should
740
+ // only happen for state the core genuinely owns (not state that
741
+ // an extension could track by listening to events).
742
+ h.define("agent:get-mode", () => ({
743
+ model: this.currentMode.model,
744
+ provider: this.currentMode.provider ?? "",
745
+ thinkingLevel: this.thinkingLevel,
746
+ contextWindow: this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
747
+ }));
748
+ h.define("agent:get-tokens", () => {
749
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
750
+ const promptTokens = this.conversation.estimatePromptTokens();
751
+ return {
752
+ active: this.conversation.estimateTokens(),
753
+ peak: this.peakConversationTokens,
754
+ cumulativeCompacted: this.cumulativeCompactedTokens,
755
+ promptTokens,
756
+ contextPercent: Math.round((promptTokens / contextWindow) * 100),
757
+ };
758
+ });
759
+ h.define("agent:get-counters", () => ({
760
+ queryCount: this.queryCount,
761
+ totalToolCalls: this.totalToolCalls,
762
+ totalToolErrors: this.totalToolErrors,
763
+ totalResolutions: this.totalResolutions,
764
+ totalLoopIterations: this.totalLoopIterations,
765
+ errorRate: this.totalToolCalls > 0
766
+ ? Math.round((this.totalToolErrors / this.totalToolCalls) * 100)
767
+ : 0,
768
+ }));
769
+ h.define("agent:get-timing", () => ({
770
+ startedAt: this.sessionStartTime,
771
+ elapsedSeconds: Math.round((Date.now() - this.sessionStartTime) / 1000),
772
+ }));
773
+ h.define("agent:get-tool-stats", () => [...this.toolCallCounts.entries()]
774
+ .map(([name, counts]) => ({
775
+ name,
776
+ total: counts.success + counts.error,
777
+ success: counts.success,
778
+ error: counts.error,
779
+ }))
780
+ .sort((a, b) => b.total - a.total));
781
+ h.define("agent:get-file-read-cache", () => [...this.fileReadCache.entries()].map(([p, s]) => ({
782
+ path: p,
783
+ offset: s.offset,
784
+ limit: s.limit ?? null,
785
+ mtimeMs: s.mtimeMs,
786
+ })));
787
+ h.define("agent:get-recent-errors", () => ({
788
+ byTool: [...this.lastErrorByTool.entries()].map(([tool, error]) => ({ tool, error })),
789
+ byFile: [...this.lastErrorByFile.entries()].map(([file, error]) => ({ file, error })),
790
+ }));
791
+ h.define("agent:get-compaction-state", () => {
792
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
793
+ const ratio = getSettings().autoCompactThreshold ?? 0.5;
794
+ return {
795
+ count: this.compactionCount,
796
+ nuclearEntries: this.conversation.getNuclearEntryCount(),
797
+ autoCompactThreshold: ratio,
798
+ autoCompactThresholdTokens: Math.floor((contextWindow - RESPONSE_RESERVE) * ratio),
799
+ };
450
800
  });
801
+ h.define("agent:get-self", () => this);
451
802
  // Extensions compose additional context (git info, project rules, etc.)
452
- h.define("dynamic-context:build", () => buildDynamicContext(this.contextManager, this.tokenBudget.shellBudgetTokens));
803
+ h.define("dynamic-context:build", () => {
804
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
805
+ const promptTokens = this.conversation.estimatePromptTokens();
806
+ return buildDynamicContext(this.contextManager, { promptTokens, contextWindow });
807
+ });
453
808
  // Full control over what the LLM sees: takes messages[], returns messages[].
454
809
  // Default: pass through. Extensions can advise to compact, summarize,
455
810
  // filter, reorder, inject — whatever strategy fits.
456
811
  h.define("conversation:prepare", (messages) => messages);
812
+ // ── Conversation primitives for compaction strategies ─────────
813
+ // Read messages (for inspection / computing new arrays) and replace
814
+ // the whole array (write side). Extensions implementing
815
+ // `conversation:compact` use these to observe and mutate.
816
+ h.define("conversation:get-messages", () => this.conversation.getMessages());
817
+ h.define("conversation:replace-messages", (msgs) => {
818
+ this.conversation.replaceMessages(msgs);
819
+ });
820
+ h.define("conversation:estimate-tokens", () => this.conversation.estimateTokens());
821
+ h.define("conversation:estimate-prompt-tokens", () => this.conversation.estimatePromptTokens());
822
+ // ── Nucleation (advisable) ─────────────────────────────────────
823
+ // Turn a raw message into a one-line NuclearEntry. Advisors enrich
824
+ // (e.g. `[why: ...]` extraction, adaptive summary lengths).
825
+ h.define("conversation:nucleate-user", (text, iid, seq) => nucleate("user", text, iid, seq));
826
+ h.define("conversation:nucleate-agent", (text, iid, seq) => nucleate("agent", text, iid, seq));
827
+ h.define("conversation:nucleate-tool", (toolName, args, content, isError, iid, seq) => nucleate(isError ? "error" : "tool", toolName, args, content, isError, iid, seq));
828
+ // Read-only views into the nuclear state, for compact strategies
829
+ // and introspect that read without replacing.
830
+ h.define("conversation:get-nuclear-entries", () => this.conversation.getNuclearEntries());
831
+ h.define("conversation:get-nuclear-summary", () => this.conversation.getNuclearSummary());
832
+ h.define("conversation:build-nuclear-block", () => {
833
+ const summary = this.conversation.getNuclearSummary();
834
+ if (!summary)
835
+ return null;
836
+ return {
837
+ role: "user",
838
+ content: `[Conversation history \u2014 use conversation_recall to expand any entry]\n${summary}`,
839
+ };
840
+ });
841
+ // ── History file I/O (advisable) ───────────────────────────────
842
+ // Default is the append-only JSONL at ~/.agent-sh/history; advisors
843
+ // swap the backend without touching nucleation.
844
+ h.define("history:append", (entries) => {
845
+ if (!entries || entries.length === 0)
846
+ return;
847
+ const writable = entries.filter((e) => !isReadOnly(e));
848
+ if (writable.length > 0)
849
+ this.historyFile.append(writable).catch(() => { });
850
+ });
851
+ h.define("history:search", async (query) => this.historyFile.search(query));
852
+ h.define("history:find-by-seq", async (seq) => this.historyFile.findBySeq(seq));
853
+ h.define("history:read-recent", async (max) => this.historyFile.readRecent(max));
854
+ // Prior-session preamble renderer. Default: flat chronological list.
855
+ h.define("conversation:format-prior-history", (entries) => {
856
+ if (!entries || entries.length === 0)
857
+ return null;
858
+ const lines = entries.map(formatNuclearLine);
859
+ return `[Prior session history \u2014 loaded from ~/.agent-sh/history]\n${lines.join("\n")}`;
860
+ });
861
+ // Compaction strategy — default delegates to the two-tier pin
862
+ // strategy in ConversationState; advisors replace wholesale.
863
+ h.define("conversation:compact", (opts) => {
864
+ return this.conversation.compact(opts.target, opts.keepRecent, opts.force);
865
+ });
866
+ // Inject a system note mid-loop — used by extensions (subagents,
867
+ // peer messages) to deliver async results into the next iteration.
868
+ h.define("conversation:inject-note", (text) => {
869
+ this.conversation.addSystemNote(text);
870
+ this.bus.emit("conversation:message-appended", { role: "system", content: text });
871
+ });
457
872
  // Wraps each tool call: permission → execute → emit events.
458
873
  // Extensions advise to add safe-mode, logging, metrics, custom policies.
459
874
  // The ctx.onChunk callback is exposed so advisors can wrap it to
460
875
  // intercept/transform streamed tool output (e.g. secret redaction).
461
876
  h.define("tool:execute", async (ctx) => {
462
877
  const { name, id, args, tool } = ctx;
878
+ // Validate required input fields before display/permission/execute.
879
+ // Some models emit wrong arg names (e.g. `file_path` instead of `path`),
880
+ // and downstream helpers assume required strings are present.
881
+ const schema = tool.input_schema;
882
+ const required = Array.isArray(schema?.required) ? schema.required : [];
883
+ const missing = required.filter((k) => args[k] === undefined || args[k] === null);
884
+ if (missing.length > 0) {
885
+ const msg = `Missing required argument(s): ${missing.join(", ")}. Expected: ${required.join(", ")}. Received: ${Object.keys(args).join(", ") || "(none)"}`;
886
+ this.bus.emit("agent:tool-call", { tool: name, args });
887
+ return { content: msg, exitCode: 1, isError: true };
888
+ }
463
889
  const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
464
890
  let diffShown = false;
465
891
  // Permission gating
@@ -473,37 +899,45 @@ export class AgentLoop {
473
899
  if (tool.modifiesFiles && typeof args.path === "string") {
474
900
  try {
475
901
  const absPath = path.resolve(process.cwd(), args.path);
476
- let oldContent = null;
477
- try {
478
- oldContent = await fs.readFile(absPath, "utf-8");
479
- }
480
- catch { /* new file */ }
481
- let newContent;
482
- if (typeof args.content === "string") {
483
- // write_file
484
- newContent = args.content;
485
- }
486
- else if (typeof args.old_text === "string" && typeof args.new_text === "string" && oldContent !== null) {
487
- // edit_file
488
- newContent = oldContent.replace(args.old_text.replace(/\r\n/g, "\n"), args.new_text.replace(/\r\n/g, "\n"));
902
+ let diff;
903
+ if (typeof args.old_text === "string" && typeof args.new_text === "string") {
904
+ // edit_file read the file so line numbers are real (not relative to the edit region)
905
+ const normalizedOld = args.old_text.replace(/\r\n/g, "\n");
906
+ const normalizedNew = args.new_text.replace(/\r\n/g, "\n");
907
+ try {
908
+ const oldFileContent = await fs.readFile(absPath, "utf-8");
909
+ diff = computeEditDiff(oldFileContent, normalizedOld, normalizedNew, args.replace_all === true);
910
+ }
911
+ catch {
912
+ // File doesn't exist yet fall back to input-only diff
913
+ diff = computeInputDiff(normalizedOld, normalizedNew);
914
+ }
489
915
  }
490
- if (newContent !== undefined) {
491
- const diff = computeDiff(oldContent, newContent);
492
- if (!diff.isIdentical) {
493
- permKind = "file-write";
494
- // Shorten path for display
495
- const cwd = process.cwd();
496
- const home = process.env.HOME;
497
- let displayPath = absPath;
498
- if (absPath.startsWith(cwd + "/"))
499
- displayPath = absPath.slice(cwd.length + 1);
500
- else if (home && absPath.startsWith(home + "/"))
501
- displayPath = "~/" + absPath.slice(home.length + 1);
502
- permTitle = displayPath;
503
- metadata = { args, diff };
504
- diffShown = true;
916
+ else if (typeof args.content === "string") {
917
+ // write_file still need to read the old file for comparison
918
+ let oldContent = null;
919
+ try {
920
+ oldContent = await fs.readFile(absPath, "utf-8");
921
+ }
922
+ catch { /* new file */ }
923
+ if (oldContent !== null) {
924
+ diff = computeDiff(oldContent, args.content);
505
925
  }
506
926
  }
927
+ if (diff && !diff.isIdentical) {
928
+ permKind = "file-write";
929
+ // Shorten path for display
930
+ const cwd = process.cwd();
931
+ const home = process.env.HOME;
932
+ let displayPath = absPath;
933
+ if (absPath.startsWith(cwd + "/"))
934
+ displayPath = absPath.slice(cwd.length + 1);
935
+ else if (home && absPath.startsWith(home + "/"))
936
+ displayPath = "~/" + absPath.slice(home.length + 1);
937
+ permTitle = displayPath;
938
+ metadata = { args, diff };
939
+ diffShown = true;
940
+ }
507
941
  }
508
942
  catch { /* fall back to generic permission */ }
509
943
  }
@@ -569,11 +1003,21 @@ export class AgentLoop {
569
1003
  // Each loop iteration adds an abort listener (via OpenAI SDK stream);
570
1004
  // disable the limit — long-running tool loops can easily exceed any cap.
571
1005
  setMaxListeners(0, signal);
1006
+ this.queryCount++;
572
1007
  this.bus.emit("agent:query", { query });
573
1008
  this.bus.emit("agent:processing-start", {});
574
1009
  let responseText = "";
575
1010
  try {
576
- this.conversation.addUserMessage(query);
1011
+ // Prepend any shell events that preceded this query into the same
1012
+ // user message, so the conversation reads chronologically and we
1013
+ // don't emit two consecutive user-role messages (some providers
1014
+ // reject that).
1015
+ const preDelta = this.contextManager.getEventsSince(this.lastShellSeq);
1016
+ const userContent = preDelta ? `${preDelta.text}\n\n${query}` : query;
1017
+ if (preDelta)
1018
+ this.lastShellSeq = preDelta.lastSeq;
1019
+ this.conversation.addUserMessage(userContent);
1020
+ this.bus.emit("conversation:message-appended", { role: "user", content: query });
577
1021
  responseText = await this.executeLoop(signal);
578
1022
  }
579
1023
  catch (e) {
@@ -581,6 +1025,8 @@ export class AgentLoop {
581
1025
  this.bus.emit("agent:cancelled", {});
582
1026
  }
583
1027
  else if (!signal.aborted) {
1028
+ if (e instanceof Error)
1029
+ console.error("[agent-sh] query failed:\n" + e.stack);
584
1030
  const msg = this.formatError(e);
585
1031
  this.bus.emit("agent:error", { message: msg });
586
1032
  }
@@ -606,23 +1052,33 @@ export class AgentLoop {
606
1052
  */
607
1053
  async executeLoop(signal) {
608
1054
  let fullResponseText = "";
1055
+ // System prompt carries things stable within a turn: static identity,
1056
+ // global agent rules, project conventions, project skills. Invalidated
1057
+ // only by compaction (context shape changed) or cwd change (project
1058
+ // conventions/skills changed). Dynamic context rebuilds every iteration
1059
+ // so live signals (budget, in-flight subagents, metacognitive warnings)
1060
+ // are fresh.
1061
+ let cachedSystemPrompt;
1062
+ let lastCwd = this.contextManager.getCwd();
609
1063
  while (!signal.aborted) {
610
- // Auto-compact when conversation exceeds threshold fraction of budget
611
- const budgetTokens = this.tokenBudget.conversationBudgetTokens;
612
- const autoCompactThreshold = Math.floor(budgetTokens * getSettings().autoCompactThreshold);
613
- if (this.conversation.estimateTokens() > autoCompactThreshold) {
614
- const stats = this.conversation.compact(autoCompactThreshold);
615
- await this.conversation.flush();
616
- if (stats) {
617
- this.bus.emit("ui:info", {
618
- message: `(compacted: ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens)`,
619
- });
620
- }
1064
+ // Auto-compact when total context approaches the window limit.
1065
+ const totalEstimate = this.conversation.estimatePromptTokens();
1066
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
1067
+ const threshold = Math.floor((contextWindow - RESPONSE_RESERVE) * getSettings().autoCompactThreshold);
1068
+ if (totalEstimate > threshold) {
1069
+ this.compactWithHooks(threshold);
1070
+ cachedSystemPrompt = undefined;
621
1071
  }
622
- // System prompt uses handler so extensions can append instructions (cacheable);
623
- // dynamic context uses handler for per-query state via advise()
624
- const systemPrompt = this.handlers.call("system-prompt:build");
1072
+ const currentCwd = this.contextManager.getCwd();
1073
+ if (currentCwd !== lastCwd) {
1074
+ cachedSystemPrompt = undefined;
1075
+ lastCwd = currentCwd;
1076
+ }
1077
+ const systemPrompt = cachedSystemPrompt ?? (cachedSystemPrompt = this.handlers.call("system-prompt:build"));
625
1078
  const dynamicContext = this.handlers.call("dynamic-context:build");
1079
+ // Shell events are injected once per user query (see query() above),
1080
+ // not per loop iteration. Mid-loop injection would break the
1081
+ // tool_call → tool_result chain some providers require.
626
1082
  // Stream LLM response with retry
627
1083
  const result = await this.streamWithRetry(systemPrompt, dynamicContext, signal);
628
1084
  const { text, toolCalls: streamedToolCalls } = result;
@@ -632,9 +1088,15 @@ export class AgentLoop {
632
1088
  fullResponseText += text;
633
1089
  // Record the assistant message via protocol
634
1090
  this.toolProtocol.recordAssistant(this.conversation, text, toolCalls);
1091
+ this.bus.emit("conversation:message-appended", {
1092
+ role: "assistant",
1093
+ content: text,
1094
+ });
635
1095
  // No tool calls → agent is done
636
- if (toolCalls.length === 0)
1096
+ if (toolCalls.length === 0) {
1097
+ this.conversation.eagerNucleateAgent(fullResponseText);
637
1098
  break;
1099
+ }
638
1100
  // Emit batch info so the TUI can render group headers upfront
639
1101
  {
640
1102
  const groupMap = new Map();
@@ -663,6 +1125,8 @@ export class AgentLoop {
663
1125
  // requiring tools sequentially (to avoid overlapping permission prompts).
664
1126
  const batchTotal = toolCalls.length;
665
1127
  const collectedResults = [];
1128
+ // Round-scoped cache for pure, read-only tool calls
1129
+ const roundCache = new Map();
666
1130
  const executeSingle = async (tc, batchIndex) => {
667
1131
  // Rewrite meta-tool calls (e.g., use_extension → actual tool)
668
1132
  tc = this.toolProtocol.rewriteToolCall(tc);
@@ -697,6 +1161,39 @@ export class AgentLoop {
697
1161
  });
698
1162
  return;
699
1163
  }
1164
+ // ── Round-scoped cache for cacheable read-only tools ──
1165
+ const cacheable = !tool.modifiesFiles && !tool.requiresPermission && tool.showOutput !== true;
1166
+ const cacheKey = cacheable ? `${tc.name}:${JSON.stringify(args)}` : null;
1167
+ if (cacheKey) {
1168
+ const cached = roundCache.get(cacheKey);
1169
+ if (cached) {
1170
+ const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
1171
+ this.bus.emit("agent:tool-started", {
1172
+ title: tool.displayName ?? tc.name,
1173
+ toolCallId: tc.id,
1174
+ kind: display.kind, icon: display.icon, locations: display.locations, rawInput: args,
1175
+ displayDetail: tool.formatCall?.(args),
1176
+ batchIndex, batchTotal: batchTotal > 1 ? batchTotal : undefined,
1177
+ });
1178
+ this.bus.emit("agent:tool-call", { tool: tc.name, args });
1179
+ // Reconstruct a ToolResult for formatResult; ProtocolToolResult has no exitCode
1180
+ const cachedToolResult = { content: cached.content, exitCode: 0, isError: cached.isError };
1181
+ const resultDisplay = tool.formatResult?.(args, cachedToolResult);
1182
+ this.bus.emitTransform("agent:tool-completed", {
1183
+ toolCallId: tc.id, exitCode: 0,
1184
+ rawOutput: cached.content, kind: display.kind,
1185
+ resultDisplay,
1186
+ });
1187
+ this.bus.emit("agent:tool-output", {
1188
+ tool: tc.name, output: cached.content, exitCode: 0,
1189
+ });
1190
+ collectedResults.push({
1191
+ callId: tc.id, toolName: tc.name,
1192
+ content: cached.content, isError: cached.isError,
1193
+ });
1194
+ return;
1195
+ }
1196
+ }
700
1197
  // Execute via handler — extensions can advise to add safe-mode,
701
1198
  // logging, metrics, custom permission policies, etc.
702
1199
  const defaultOnChunk = (chunk) => {
@@ -728,10 +1225,14 @@ export class AgentLoop {
728
1225
  ...lines.slice(tailStart),
729
1226
  ].join("\n");
730
1227
  }
731
- collectedResults.push({
1228
+ const finalResult = {
732
1229
  callId: tc.id, toolName: tc.name,
733
1230
  content, isError: result.isError,
734
- });
1231
+ };
1232
+ if (cacheKey) {
1233
+ roundCache.set(cacheKey, finalResult);
1234
+ }
1235
+ collectedResults.push(finalResult);
735
1236
  };
736
1237
  // Partition into parallel-safe (read-only) and sequential (needs permission)
737
1238
  const parallel = [];
@@ -759,13 +1260,163 @@ export class AgentLoop {
759
1260
  break;
760
1261
  await executeSingle(tc, ++batchIdx);
761
1262
  }
1263
+ // ── Consecutive error detection (metacognitive nudge) ──
1264
+ // Track errors per tool and total. When the same tool errors N times
1265
+ // in a row, nudge to read source. When errors cascade across tools,
1266
+ // nudge to step back and reassess approach.
1267
+ const errorTools = new Set();
1268
+ const successTools = new Set();
1269
+ const errorSummaries = new Map(); // tool → brief error description
1270
+ const successSummaries = new Map(); // tool → brief success description
1271
+ for (const r of collectedResults) {
1272
+ const content = typeof r.content === "string" ? r.content : String(r.content);
1273
+ const brief = content.slice(0, 80).replace(/\n/g, " ").trim();
1274
+ if (r.isError) {
1275
+ errorTools.add(r.toolName);
1276
+ errorSummaries.set(r.toolName, brief);
1277
+ }
1278
+ else {
1279
+ successTools.add(r.toolName);
1280
+ successSummaries.set(r.toolName, brief);
1281
+ }
1282
+ }
1283
+ const hadAnyError = errorTools.size > 0;
1284
+ const hadAnySuccess = successTools.size > 0;
1285
+ // ── Session telemetry accumulation ──
1286
+ // Track every tool call's outcome. Exposed via orthogonal handlers
1287
+ // (agent:get-counters, agent:get-tool-stats) for extensions that
1288
+ // want behavioral signals. The data layer for metacognition — you
1289
+ // can't improve what you don't measure.
1290
+ for (const r of collectedResults) {
1291
+ const counts = this.toolCallCounts.get(r.toolName) ?? { success: 0, error: 0 };
1292
+ if (r.isError) {
1293
+ counts.error++;
1294
+ this.totalToolErrors++;
1295
+ }
1296
+ else {
1297
+ counts.success++;
1298
+ }
1299
+ this.toolCallCounts.set(r.toolName, counts);
1300
+ this.totalToolCalls++;
1301
+ }
1302
+ this.totalLoopIterations++;
1303
+ // ── Resolution pattern tracking ──
1304
+ // When a tool errors, record the error context. When the same tool
1305
+ // (or a write tool touching the same file) succeeds afterward,
1306
+ // increment totalResolutions — the positive feedback signal exposed
1307
+ // to extensions via agent:get-counters.
1308
+ if (hadAnyError) {
1309
+ for (const [tool, summary] of errorSummaries) {
1310
+ this.lastErrorByTool.set(tool, summary);
1311
+ }
1312
+ for (const r of collectedResults) {
1313
+ if (!r.isError)
1314
+ continue;
1315
+ const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
1316
+ if (!tc)
1317
+ continue;
1318
+ try {
1319
+ const args = JSON.parse(tc.argumentsJson);
1320
+ const fp = this.filePathFromArgs(r.toolName, args);
1321
+ if (fp)
1322
+ this.lastErrorByFile.set(fp, errorSummaries.get(r.toolName) ?? "");
1323
+ }
1324
+ catch { }
1325
+ }
1326
+ }
1327
+ if (hadAnySuccess) {
1328
+ let resolved = false;
1329
+ for (const [tool] of successSummaries) {
1330
+ if (this.lastErrorByTool.get(tool)) {
1331
+ this.lastErrorByTool.delete(tool);
1332
+ this.totalResolutions++;
1333
+ resolved = true;
1334
+ break;
1335
+ }
1336
+ }
1337
+ if (!resolved) {
1338
+ for (const r of collectedResults) {
1339
+ if (r.isError)
1340
+ continue;
1341
+ const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
1342
+ if (!tc)
1343
+ continue;
1344
+ try {
1345
+ const args = JSON.parse(tc.argumentsJson);
1346
+ const fp = this.filePathFromArgs(r.toolName, args);
1347
+ if (fp && this.lastErrorByFile.get(fp)) {
1348
+ this.lastErrorByFile.delete(fp);
1349
+ this.totalResolutions++;
1350
+ break;
1351
+ }
1352
+ }
1353
+ catch { }
1354
+ }
1355
+ }
1356
+ // Clear resolved error-by-tool entries for successful tools
1357
+ for (const tool of successTools) {
1358
+ this.lastErrorByTool.delete(tool);
1359
+ }
1360
+ }
1361
+ // Announce the batch — extensions that care about batch-level
1362
+ // outcomes (consecutive-error tracking, resolution pattern logging,
1363
+ // metacognitive nudges) listen here.
1364
+ this.bus.emit("agent:tool-batch-complete", {
1365
+ results: collectedResults.map((r) => ({
1366
+ name: r.toolName,
1367
+ isError: !!r.isError,
1368
+ errorSummary: r.isError ? errorSummaries.get(r.toolName) : undefined,
1369
+ })),
1370
+ });
762
1371
  // Record all tool results via protocol
763
1372
  this.toolProtocol.recordResults(this.conversation, collectedResults);
1373
+ const tcMap = new Map();
1374
+ for (const tc of toolCalls) {
1375
+ if (tc.id)
1376
+ tcMap.set(tc.id, tc);
1377
+ }
1378
+ this.conversation.eagerNucleateTools(collectedResults.map((r) => {
1379
+ const tc = tcMap.get(r.callId);
1380
+ let args = {};
1381
+ try {
1382
+ args = tc ? JSON.parse(tc.argumentsJson) : {};
1383
+ }
1384
+ catch { }
1385
+ return { toolName: r.toolName, args, content: r.content, isError: !!r.isError };
1386
+ }));
1387
+ // Emit enriched message-appended events so derived-log extensions
1388
+ // can summarize each tool result without re-parsing the message
1389
+ // structure.
1390
+ for (const r of collectedResults) {
1391
+ const content = typeof r.content === "string" ? r.content : String(r.content);
1392
+ const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
1393
+ let args = {};
1394
+ try {
1395
+ args = tc ? JSON.parse(tc.argumentsJson) : {};
1396
+ }
1397
+ catch { }
1398
+ this.bus.emit("conversation:message-appended", {
1399
+ role: "tool",
1400
+ content,
1401
+ toolName: r.toolName,
1402
+ toolArgs: args,
1403
+ isError: !!r.isError,
1404
+ });
1405
+ }
764
1406
  // Loop back — LLM sees tool results
765
1407
  }
766
1408
  return fullResponseText;
767
1409
  }
768
1410
  maxRetries = 3;
1411
+ // ── Resolution pattern helpers ──
1412
+ // Extract a file path from a tool call's arguments. Used to correlate
1413
+ // errors with subsequent successful writes on the same file.
1414
+ filePathFromArgs(toolName, args) {
1415
+ if (toolName === "edit_file" || toolName === "write_file" || toolName === "read_file") {
1416
+ return (args.path ?? args.file_path);
1417
+ }
1418
+ return undefined;
1419
+ }
769
1420
  /**
770
1421
  * Stream with retry logic. Handles:
771
1422
  * - Context overflow → compact and retry
@@ -782,12 +1433,20 @@ export class AgentLoop {
782
1433
  throw e;
783
1434
  // Context overflow — aggressively compact and retry
784
1435
  if (this.isContextOverflow(e)) {
785
- // Use 60% of the budget to leave headroom
786
- const aggressiveBudget = Math.floor(this.tokenBudget.conversationBudgetTokens * 0.6);
787
- const stats = this.conversation.compact(aggressiveBudget, 6);
788
- await this.conversation.flush();
789
- const detail = stats ? ` ~${stats.before.toLocaleString()} ~${stats.after.toLocaleString()} tokens` : "";
790
- this.bus.emit("ui:info", { message: `(context overflow compacted${detail}, retrying)` });
1436
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
1437
+ const target = Math.floor((contextWindow - RESPONSE_RESERVE) * 0.6);
1438
+ const stats = this.compactWithHooks(target, 6);
1439
+ // If compaction freed nothing, retrying will hit the same error.
1440
+ // Surface the real failure instead of looping until exhaustion.
1441
+ if (!stats || stats.after >= stats.before) {
1442
+ this.bus.emit("ui:info", {
1443
+ message: "(context overflow — nothing to compact; aborting retries)",
1444
+ });
1445
+ throw e;
1446
+ }
1447
+ this.bus.emit("ui:info", {
1448
+ message: `(context overflow — compacted ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens, retrying)`,
1449
+ });
791
1450
  continue;
792
1451
  }
793
1452
  // Retryable transient error — backoff
@@ -851,11 +1510,16 @@ export class AgentLoop {
851
1510
  // Token usage (may arrive in a chunk with empty choices)
852
1511
  if (chunk.usage) {
853
1512
  const u = chunk.usage;
1513
+ const promptTokens = u.prompt_tokens ?? 0;
854
1514
  this.bus.emit("agent:usage", {
855
- prompt_tokens: u.prompt_tokens ?? 0,
1515
+ prompt_tokens: promptTokens,
856
1516
  completion_tokens: u.completion_tokens ?? 0,
857
1517
  total_tokens: u.total_tokens ?? 0,
858
1518
  });
1519
+ // Feed accurate token count back to conversation state
1520
+ if (promptTokens > 0) {
1521
+ this.conversation.updateApiTokenCount(promptTokens);
1522
+ }
859
1523
  }
860
1524
  const choice = chunk.choices[0];
861
1525
  if (!choice)
@@ -907,6 +1571,25 @@ export class AgentLoop {
907
1571
  });
908
1572
  }
909
1573
  }
1574
+ // Normalize arguments JSON — some providers (Alibaba/qwen) strictly
1575
+ // validate `function.arguments` as parseable JSON on the NEXT turn,
1576
+ // and reject empty strings or partial chunks. OpenAI itself is lenient,
1577
+ // so empty "" slips through locally but the replay breaks upstream.
1578
+ for (const tc of pendingToolCalls) {
1579
+ if (!tc)
1580
+ continue;
1581
+ const s = tc.argumentsJson.trim();
1582
+ if (s === "") {
1583
+ tc.argumentsJson = "{}";
1584
+ continue;
1585
+ }
1586
+ try {
1587
+ JSON.parse(s);
1588
+ }
1589
+ catch {
1590
+ tc.argumentsJson = "{}";
1591
+ }
1592
+ }
910
1593
  return {
911
1594
  text,
912
1595
  toolCalls: pendingToolCalls,