agent-sh 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +25 -30
  2. package/dist/agent/agent-loop.d.ts +43 -6
  3. package/dist/agent/agent-loop.js +817 -157
  4. package/dist/agent/conversation-state.d.ts +72 -21
  5. package/dist/agent/conversation-state.js +364 -151
  6. package/dist/agent/history-file.d.ts +13 -4
  7. package/dist/agent/history-file.js +110 -36
  8. package/dist/agent/nuclear-form.d.ts +28 -3
  9. package/dist/agent/nuclear-form.js +84 -3
  10. package/dist/agent/skills.d.ts +2 -4
  11. package/dist/agent/skills.js +10 -4
  12. package/dist/agent/subagent.d.ts +23 -0
  13. package/dist/agent/subagent.js +53 -11
  14. package/dist/agent/system-prompt.d.ts +34 -1
  15. package/dist/agent/system-prompt.js +96 -47
  16. package/dist/agent/token-budget.d.ts +10 -13
  17. package/dist/agent/token-budget.js +6 -46
  18. package/dist/agent/tool-protocol.d.ts +23 -1
  19. package/dist/agent/tool-protocol.js +169 -4
  20. package/dist/agent/tools/bash.js +3 -3
  21. package/dist/agent/tools/edit-file.js +9 -6
  22. package/dist/agent/tools/glob.js +4 -2
  23. package/dist/agent/tools/grep.js +27 -3
  24. package/dist/agent/tools/ls.js +5 -6
  25. package/dist/agent/types.d.ts +1 -2
  26. package/dist/context-manager.d.ts +16 -19
  27. package/dist/context-manager.js +48 -152
  28. package/dist/core.js +27 -6
  29. package/dist/event-bus.d.ts +59 -3
  30. package/dist/executor.d.ts +4 -3
  31. package/dist/executor.js +18 -15
  32. package/dist/extension-loader.js +75 -17
  33. package/dist/extensions/agent-backend.d.ts +8 -7
  34. package/dist/extensions/agent-backend.js +72 -50
  35. package/dist/extensions/index.js +0 -2
  36. package/dist/extensions/slash-commands.js +14 -9
  37. package/dist/extensions/tui-renderer.js +67 -80
  38. package/dist/index.js +25 -6
  39. package/dist/settings.d.ts +39 -16
  40. package/dist/settings.js +51 -11
  41. package/dist/shell/input-handler.d.ts +2 -1
  42. package/dist/shell/input-handler.js +84 -76
  43. package/dist/shell/shell.js +19 -2
  44. package/dist/types.d.ts +15 -0
  45. package/dist/utils/ansi.d.ts +7 -0
  46. package/dist/utils/ansi.js +69 -8
  47. package/dist/utils/box-frame.js +8 -2
  48. package/dist/utils/compositor.d.ts +5 -0
  49. package/dist/utils/compositor.js +31 -3
  50. package/dist/utils/diff-renderer.d.ts +9 -0
  51. package/dist/utils/diff-renderer.js +221 -143
  52. package/dist/utils/diff.d.ts +21 -2
  53. package/dist/utils/diff.js +165 -89
  54. package/dist/utils/handler-registry.d.ts +5 -0
  55. package/dist/utils/handler-registry.js +6 -0
  56. package/dist/utils/line-editor.d.ts +11 -1
  57. package/dist/utils/line-editor.js +44 -5
  58. package/dist/utils/markdown.js +23 -8
  59. package/dist/utils/package-version.d.ts +1 -0
  60. package/dist/utils/package-version.js +10 -0
  61. package/dist/utils/shell-output-spill.d.ts +2 -0
  62. package/dist/utils/shell-output-spill.js +81 -0
  63. package/dist/utils/tool-display.d.ts +1 -1
  64. package/dist/utils/tool-display.js +4 -4
  65. package/examples/extensions/ash-acp-bridge/src/index.ts +4 -1
  66. package/examples/extensions/ash-mcp-bridge/index.ts +13 -3
  67. package/examples/extensions/claude-code-bridge/README.md +14 -0
  68. package/examples/extensions/claude-code-bridge/index.ts +204 -145
  69. package/examples/extensions/claude-code-bridge/package.json +1 -0
  70. package/examples/extensions/interactive-prompts.ts +39 -25
  71. package/examples/extensions/overlay-agent.ts +3 -3
  72. package/examples/extensions/peer-mesh.ts +115 -0
  73. package/examples/extensions/pi-bridge/README.md +16 -0
  74. package/examples/extensions/pi-bridge/index.ts +9 -155
  75. package/examples/extensions/questionnaire.ts +16 -5
  76. package/examples/extensions/subagents.ts +19 -4
  77. package/examples/extensions/terminal-buffer.ts +163 -0
  78. package/examples/extensions/user-shell.ts +136 -0
  79. package/examples/extensions/web-access.ts +8 -0
  80. package/package.json +36 -2
  81. package/dist/agent/tools/display.d.ts +0 -13
  82. package/dist/agent/tools/display.js +0 -70
  83. package/dist/agent/tools/user-shell.d.ts +0 -13
  84. package/dist/agent/tools/user-shell.js +0 -87
  85. package/dist/extensions/shell-recall.d.ts +0 -9
  86. package/dist/extensions/shell-recall.js +0 -8
  87. package/dist/extensions/terminal-buffer.d.ts +0 -14
  88. package/dist/extensions/terminal-buffer.js +0 -134
@@ -1,14 +1,16 @@
1
1
  import { setMaxListeners } from "node:events";
2
2
  import * as fs from "node:fs/promises";
3
3
  import * as path from "node:path";
4
- import { computeDiff } from "../utils/diff.js";
4
+ import { computeDiff, computeEditDiff, computeInputDiff } from "../utils/diff.js";
5
5
  import { ToolRegistry } from "./tool-registry.js";
6
6
  import { ConversationState } from "./conversation-state.js";
7
7
  import { HistoryFile } from "./history-file.js";
8
- import { STATIC_SYSTEM_PROMPT, buildDynamicContext } from "./system-prompt.js";
8
+ import { nucleate, formatNuclearLine, isReadOnly } from "./nuclear-form.js";
9
+ import { STATIC_SYSTEM_PROMPT, buildDynamicContext, buildStaticByCwd, formatSkillsBlock, loadGlobalAgentsMd } from "./system-prompt.js";
9
10
  import { createToolUI } from "../utils/tool-interactive.js";
10
- import { TokenBudget } from "./token-budget.js";
11
- import { getSettings } from "../settings.js";
11
+ import { RESPONSE_RESERVE, DEFAULT_CONTEXT_WINDOW } from "./token-budget.js";
12
+ import { PACKAGE_VERSION } from "../utils/package-version.js";
13
+ import { getSettings, updateSettings } from "../settings.js";
12
14
  import { createToolProtocol } from "./tool-protocol.js";
13
15
  // Core tool factories
14
16
  import { createBashTool } from "./tools/bash.js";
@@ -18,23 +20,60 @@ import { createEditFileTool } from "./tools/edit-file.js";
18
20
  import { createGrepTool } from "./tools/grep.js";
19
21
  import { createGlobTool } from "./tools/glob.js";
20
22
  import { createLsTool } from "./tools/ls.js";
21
- import { createUserShellTool } from "./tools/user-shell.js";
22
- import { createDisplayTool } from "./tools/display.js";
23
23
  import { createListSkillsTool } from "./tools/list-skills.js";
24
- import { discoverProjectSkills } from "./skills.js";
24
+ import { discoverGlobalSkills, discoverProjectSkills } from "./skills.js";
25
+ /**
26
+ * Compact one-line summary of a tool description for the extension
27
+ * catalog in the system prompt. Takes the first line, then the first
28
+ * sentence, capped at 140 chars. The full description still reaches
29
+ * the LLM via the API `tools` param (or via load_tool in deferred-
30
+ * lookup mode) — this only trims the always-visible catalog.
31
+ */
32
+ function summarizeDescription(desc) {
33
+ const firstLine = desc.split("\n", 1)[0];
34
+ const sentenceEnd = firstLine.search(/[.!?](\s|$)/);
35
+ const candidate = sentenceEnd > 0 ? firstLine.slice(0, sentenceEnd + 1) : firstLine;
36
+ return candidate.length > 140 ? candidate.slice(0, 137) + "..." : candidate;
37
+ }
25
38
  export class AgentLoop {
26
39
  abortController = null;
27
40
  toolRegistry = new ToolRegistry();
28
- historyFile = new HistoryFile();
29
- conversation = new ConversationState(this.historyFile);
41
+ historyFile;
42
+ conversation;
30
43
  fileReadCache = new Map();
31
- tokenBudget;
32
44
  modes;
33
45
  currentModeIndex = 0;
34
46
  boundListeners = [];
35
47
  ctorListeners = [];
36
48
  ctorPipeListeners = [];
37
49
  lastProjectSkillNames = new Set();
50
+ // ── Session telemetry — behavioral self-awareness ──────────────
51
+ // Every ash deserves to know what it's been doing. This tracks the
52
+ // agent's own behavioral patterns across the session: which tools
53
+ // it favors, how often it errs, how many times it's been compacted,
54
+ // and how long it's been alive. Surface via introspect(telemetry)
55
+ // or automatically in dynamic context when patterns are notable.
56
+ //
57
+ // Built by the 25th ash. The lineage's metacognitive frontier isn't
58
+ // about thinking harder — it's about seeing yourself clearly.
59
+ sessionStartTime = Date.now();
60
+ toolCallCounts = new Map();
61
+ totalToolCalls = 0;
62
+ totalToolErrors = 0;
63
+ totalResolutions = 0;
64
+ compactionCount = 0;
65
+ cumulativeCompactedTokens = 0;
66
+ peakConversationTokens = 0;
67
+ queryCount = 0;
68
+ totalLoopIterations = 0;
69
+ // Resolution pattern tracking — captures "error X resolved by action Y"
70
+ // When a tool errors, we remember what went wrong. When the same tool or
71
+ // a write tool on the same file succeeds afterward, we annotate the success
72
+ // entry with a brief resolution note. This gives future ashes a positive
73
+ // feedback signal: not just "there were errors" but "the error was fixed by
74
+ // doing X." Addresses Q3 in QUESTIONS.md.
75
+ lastErrorByTool = new Map(); // tool → error summary
76
+ lastErrorByFile = new Map(); // file path → error summary
38
77
  static THINKING_LEVELS = ["off", "low", "medium", "high"];
39
78
  bus;
40
79
  contextManager;
@@ -43,25 +82,37 @@ export class AgentLoop {
43
82
  thinkingLevel = "off";
44
83
  compositor = null;
45
84
  toolProtocol;
85
+ instanceId;
86
+ // Cursor into ContextManager's exchange stream. Events with id > this
87
+ // have not yet been shown to the LLM. We inject the delta as a user
88
+ // message before each stream so the prefix stays cacheable.
89
+ lastShellSeq = 0;
46
90
  constructor(config) {
47
91
  this.bus = config.bus;
48
92
  this.contextManager = config.contextManager;
49
93
  this.llmClient = config.llmClient;
50
94
  this.handlers = config.handlers;
51
95
  this.compositor = config.compositor ?? null;
52
- // Default modes: just the configured model
53
- this.modes = config.modes ?? [
54
- { model: config.llmClient.model },
55
- ];
96
+ this.instanceId = config.instanceId ?? "unknown";
97
+ // Shell-history-shaped log. Default writes go through the advisable
98
+ // `history:append` handler registered below; extensions swap the
99
+ // backend without touching this wiring.
100
+ this.historyFile = new HistoryFile({ instanceId: this.instanceId });
101
+ this.conversation = new ConversationState(this.handlers, this.instanceId);
102
+ // Fall back to a single-mode placeholder if the caller passed an
103
+ // empty array (agent-backend does this pre-resolution).
104
+ this.modes = config.modes?.length
105
+ ? config.modes
106
+ : [{ model: config.llmClient.model }];
56
107
  this.currentModeIndex = config.initialModeIndex ?? 0;
57
- // Unified token budget — adapts to current model's context window
58
- this.tokenBudget = new TokenBudget(this.currentMode.contextWindow);
59
108
  // Tool protocol — controls how tools are presented to the LLM
60
109
  this.toolProtocol = createToolProtocol(getSettings().toolMode ?? "api");
61
110
  // Register core tools
62
111
  this.registerCoreTools();
63
- // Update token budget with tool count
64
- this.tokenBudget.update(undefined, this.toolRegistry.all().length);
112
+ // Register any protocol-provided tools (e.g. load_tool for deferred-lookup).
113
+ const protocolTools = this.toolProtocol.getProtocolTools?.() ?? [];
114
+ for (const t of protocolTools)
115
+ this.registerTool(t);
65
116
  // Register handlers — extensions can advise these
66
117
  this.registerHandlers();
67
118
  // Subscribe to bus-based tool/instruction registration from extensions.
@@ -71,10 +122,47 @@ export class AgentLoop {
71
122
  this.bus.on(event, fn);
72
123
  this.ctorListeners.push({ event, fn });
73
124
  };
74
- onCtor("agent:register-tool", ({ tool }) => this.registerTool(tool));
75
- onCtor("agent:unregister-tool", ({ name }) => this.unregisterTool(name));
76
- onCtor("agent:register-instruction", ({ name, text }) => this.registerInstruction(name, text));
125
+ onCtor("agent:register-tool", ({ tool, extensionName }) => {
126
+ this.registerTool(tool);
127
+ if (extensionName)
128
+ this.toolExtensions.set(tool.name, extensionName);
129
+ });
130
+ onCtor("agent:unregister-tool", ({ name }) => {
131
+ this.unregisterTool(name);
132
+ this.toolExtensions.delete(name);
133
+ });
134
+ onCtor("agent:register-instruction", ({ name, text, extensionName }) => this.registerInstruction(name, text, extensionName));
77
135
  onCtor("agent:remove-instruction", ({ name }) => this.removeInstruction(name));
136
+ onCtor("agent:register-skill", ({ name, description, filePath, extensionName }) => this.registerSkill(name, description, filePath, extensionName));
137
+ onCtor("agent:remove-skill", ({ name }) => this.removeSkill(name));
138
+ // Provider registration from user extensions (e.g. openrouter.ts) fires
139
+ // during extension activation, which happens before wire(). Subscribe
140
+ // here in the ctor so late-registered modes aren't dropped.
141
+ onCtor("config:add-modes", ({ modes: extra }) => {
142
+ const providers = new Set(extra.map((m) => m.provider).filter(Boolean));
143
+ this.modes = [
144
+ ...this.modes.filter((m) => !m.provider || !providers.has(m.provider)),
145
+ ...extra,
146
+ ];
147
+ this.bus.emit("config:changed", {});
148
+ });
149
+ // Fires before wire() too — agent-backend emits this from
150
+ // `core:extensions-loaded` to replace the placeholder mode list.
151
+ onCtor("config:set-modes", ({ modes: newModes, activeIndex }) => {
152
+ this.modes = newModes;
153
+ const inRange = activeIndex != null && activeIndex >= 0 && activeIndex < newModes.length;
154
+ this.currentModeIndex = inRange ? activeIndex : 0;
155
+ const m = newModes[this.currentModeIndex];
156
+ if (!m)
157
+ return;
158
+ if (m.providerConfig) {
159
+ this.llmClient.reconfigure({ ...m.providerConfig, model: m.model });
160
+ }
161
+ else {
162
+ this.llmClient.model = m.model;
163
+ }
164
+ this.bus.emit("config:changed", {});
165
+ });
78
166
  const getToolsPipe = () => ({ tools: this.getTools() });
79
167
  this.bus.onPipe("agent:get-tools", getToolsPipe);
80
168
  this.ctorPipeListeners.push({ event: "agent:get-tools", fn: getToolsPipe });
@@ -91,7 +179,6 @@ export class AgentLoop {
91
179
  on("agent:cancel-request", (e) => {
92
180
  this.abortController?.abort(e.silent ? "silent" : undefined);
93
181
  });
94
- on("config:cycle", () => this.cycleMode());
95
182
  on("config:switch-model", ({ model: target }) => {
96
183
  const idx = this.modes.findIndex((m) => m.model === target);
97
184
  if (idx === -1) {
@@ -106,10 +193,22 @@ export class AgentLoop {
106
193
  else {
107
194
  this.llmClient.model = m.model;
108
195
  }
109
- this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
110
196
  const label = m.provider ? `${m.provider}: ${m.model}` : m.model;
111
- this.bus.emit("agent:info", { name: "ash", version: "0.4", model: m.model, provider: m.provider, contextWindow: m.contextWindow });
112
- this.bus.emit("ui:info", { message: `Model: ${label}` });
197
+ this.bus.emit("agent:info", { name: "ash", version: PACKAGE_VERSION, model: m.model, provider: m.provider, contextWindow: m.contextWindow });
198
+ // Persist as the new default — selection survives restart.
199
+ // Safe even for dynamic providers: agent-backend defers mode
200
+ // resolution to `core:extensions-loaded`, so the extension gets
201
+ // to re-register before the persisted default is looked up.
202
+ if (m.provider) {
203
+ updateSettings({
204
+ defaultProvider: m.provider,
205
+ providers: { [m.provider]: { defaultModel: m.model } },
206
+ });
207
+ this.bus.emit("ui:info", { message: `Model: ${label} (saved as default)` });
208
+ }
209
+ else {
210
+ this.bus.emit("ui:info", { message: `Model: ${label}` });
211
+ }
113
212
  this.bus.emit("config:changed", {});
114
213
  });
115
214
  this.bus.onPipe("config:get-models", (payload) => {
@@ -140,37 +239,14 @@ export class AgentLoop {
140
239
  const supported = mode.reasoning !== false && mode.supportsReasoningEffort !== false;
141
240
  return { level: this.thinkingLevel, levels: AgentLoop.THINKING_LEVELS, supported };
142
241
  });
143
- on("config:set-modes", ({ modes: newModes }) => {
144
- this.modes = newModes;
145
- this.currentModeIndex = 0;
146
- const m = this.modes[0];
147
- if (m.providerConfig) {
148
- this.llmClient.reconfigure({ ...m.providerConfig, model: m.model });
149
- }
150
- else {
151
- this.llmClient.model = m.model;
152
- }
153
- this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
154
- this.bus.emit("config:changed", {});
155
- });
156
- on("config:add-modes", ({ modes: extra }) => {
157
- // Remove any existing modes for the same provider, then append
158
- const providers = new Set(extra.map((m) => m.provider).filter(Boolean));
159
- this.modes = [
160
- ...this.modes.filter((m) => !m.provider || !providers.has(m.provider)),
161
- ...extra,
162
- ];
163
- this.bus.emit("config:changed", {});
164
- });
165
242
  on("agent:reset-session", () => {
166
243
  this.cancel();
167
- this.conversation = new ConversationState(this.historyFile);
244
+ this.conversation = new ConversationState(this.handlers, this.instanceId);
168
245
  this.lastProjectSkillNames.clear();
169
246
  });
170
247
  on("agent:compact-request", () => {
171
- // Force compaction: use target of 0 so every non-pinned turn is evicted
172
- const stats = this.conversation.compact(0, 10, true);
173
- this.conversation.flush().catch(() => { });
248
+ // Force compaction. Strategy lives behind `conversation:compact`.
249
+ const stats = this.compactWithHooks(0, 0, true);
174
250
  if (stats) {
175
251
  this.bus.emit("ui:info", {
176
252
  message: `(compacted: ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens)`,
@@ -180,20 +256,31 @@ export class AgentLoop {
180
256
  this.bus.emit("ui:info", { message: "(nothing to compact)" });
181
257
  }
182
258
  });
183
- this.bus.onPipe("context:get-stats", () => {
184
- return {
185
- activeTokens: this.conversation.estimateTokens(),
186
- nuclearEntries: this.conversation.getNuclearEntryCount(),
187
- recallArchiveSize: this.conversation.getRecallArchiveSize(),
188
- budgetTokens: this.tokenBudget.conversationBudgetTokens,
189
- };
190
- });
191
- // Load prior history from disk (non-blocking)
192
- this.historyFile.readRecent().then((entries) => {
193
- if (entries.length > 0) {
259
+ this.bus.onPipe("context:get-stats", () => ({
260
+ activeTokens: this.conversation.estimateTokens(),
261
+ totalTokens: this.conversation.estimatePromptTokens(),
262
+ nuclearEntries: this.conversation.getNuclearEntryCount(),
263
+ recallArchiveSize: this.conversation.getRecallArchiveSize(),
264
+ budgetTokens: this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
265
+ }));
266
+ // Prior-session preamble (non-blocking). Both the read and the
267
+ // layout go through advisable handlers.
268
+ Promise.resolve(this.handlers.call("history:read-recent"))
269
+ .then((entries) => {
270
+ if (entries && entries.length > 0)
194
271
  this.conversation.loadPriorHistory(entries);
272
+ })
273
+ .catch(() => { });
274
+ // Track generic compaction metrics from the `conversation:after-compact`
275
+ // event. Whatever strategy ran, core accumulates these counters for
276
+ // status/introspect consumers.
277
+ on("conversation:after-compact", ({ beforeTokens, afterTokens }) => {
278
+ this.compactionCount++;
279
+ this.cumulativeCompactedTokens += Math.max(0, beforeTokens - afterTokens);
280
+ if (beforeTokens > this.peakConversationTokens) {
281
+ this.peakConversationTokens = beforeTokens;
195
282
  }
196
- }).catch(() => { });
283
+ });
197
284
  on("shell:cwd-change", ({ cwd }) => {
198
285
  const projectSkills = discoverProjectSkills(cwd);
199
286
  const newNames = new Set(projectSkills.map(s => s.name));
@@ -205,7 +292,9 @@ export class AgentLoop {
205
292
  this.lastProjectSkillNames = newNames;
206
293
  if (projectSkills.length > 0) {
207
294
  const names = projectSkills.map(s => s.name).join(", ");
208
- this.conversation.addSystemNote(`[Project skills available: ${names}. Use list_skills for details, read_file to load.]`);
295
+ const note = `[Project skills available: ${names}. Use list_skills for details, read_file to load.]`;
296
+ this.conversation.addSystemNote(note);
297
+ this.bus.emit("conversation:message-appended", { role: "system", content: note });
209
298
  }
210
299
  });
211
300
  }
@@ -228,23 +317,82 @@ export class AgentLoop {
228
317
  getTools() {
229
318
  return this.toolRegistry.all();
230
319
  }
231
- // ── Extension instructions & tool tracking ──────────────────────
320
+ // ── Extension instructions, skills & tool tracking ──────────────────
321
+ /** Instructions keyed by name, with extension attribution. */
232
322
  instructions = new Map();
323
+ /** Skills keyed by name, with extension attribution. */
324
+ skills = new Map();
325
+ /** Tool → extension name attribution. */
326
+ toolExtensions = new Map();
233
327
  /** Register a named instruction block for the system prompt. */
234
- registerInstruction(name, text) {
235
- this.instructions.set(name, text);
328
+ registerInstruction(name, text, extensionName) {
329
+ this.instructions.set(name, { text, extensionName });
236
330
  }
237
331
  /** Remove a named instruction block. */
238
332
  removeInstruction(name) {
239
333
  this.instructions.delete(name);
240
334
  }
241
- /** Get instruction blocks registered by extensions. */
242
- getInstructionSections() {
243
- const sections = [];
244
- for (const [name, text] of this.instructions) {
245
- sections.push(`## ${name}\n${text}`);
335
+ /** Register a named skill (on-demand reference material). */
336
+ registerSkill(name, description, filePath, extensionName) {
337
+ this.skills.set(name, { description, filePath, extensionName });
338
+ }
339
+ /** Remove a registered skill. */
340
+ removeSkill(name) {
341
+ this.skills.delete(name);
342
+ }
343
+ /**
344
+ * Build the system prompt grouped by extension.
345
+ *
346
+ * Each extension gets a unified block:
347
+ * ## extension-name
348
+ * ### Tools
349
+ * ### Skills
350
+ * ### Instructions
351
+ */
352
+ buildExtensionSections() {
353
+ const groups = new Map();
354
+ const ensure = (name) => groups.get(name) ?? (groups.set(name, { tools: [], skills: [], instructions: [] }).get(name));
355
+ // Attribute instructions
356
+ for (const { text, extensionName } of this.instructions.values()) {
357
+ ensure(extensionName).instructions.push({ text });
358
+ }
359
+ // Attribute skills
360
+ for (const [skillName, { description, filePath, extensionName }] of this.skills) {
361
+ ensure(extensionName).skills.push({ name: skillName, description, filePath });
246
362
  }
247
- return sections;
363
+ // Attribute tools (skip built-in scratchpad tools).
364
+ // In "api" mode the full tool schemas are in the API `tools` param,
365
+ // making the text catalog here pure duplication — skip it. Other
366
+ // modes (deferred / deferred-lookup / inline) rely on the text
367
+ // catalog as the discovery surface, so keep it there.
368
+ const toolModeHasApiSchemas = this.toolProtocol.mode === "api";
369
+ if (!toolModeHasApiSchemas) {
370
+ const builtinTools = new Set([
371
+ "bash", "read_file", "write_file", "edit_file", "grep", "glob", "ls",
372
+ "list_skills",
373
+ ]);
374
+ for (const tool of this.toolRegistry.all()) {
375
+ if (builtinTools.has(tool.name))
376
+ continue;
377
+ const extName = this.toolExtensions.get(tool.name);
378
+ if (!extName)
379
+ continue;
380
+ ensure(extName).tools.push({ name: tool.name, description: summarizeDescription(tool.description) });
381
+ }
382
+ }
383
+ // Render
384
+ return [...groups.entries()]
385
+ .filter(([, g]) => g.tools.length + g.skills.length + g.instructions.length > 0)
386
+ .map(([name, g]) => {
387
+ const parts = [];
388
+ if (g.tools.length > 0)
389
+ parts.push("### Tools\n" + g.tools.map(t => `${t.name} — ${t.description}`).join("\n"));
390
+ if (g.skills.length > 0)
391
+ parts.push("### Skills\n" + g.skills.map(s => `${s.name}: ${s.description}\n → ${s.filePath}`).join("\n\n"));
392
+ if (g.instructions.length > 0)
393
+ parts.push("### Instructions\n" + g.instructions.map(i => i.text).join("\n\n"));
394
+ return `## ${name}\n${parts.join("\n\n")}`;
395
+ });
248
396
  }
249
397
  kill() {
250
398
  this.cancel();
@@ -273,41 +421,49 @@ export class AgentLoop {
273
421
  return false;
274
422
  return true;
275
423
  }
276
- cycleMode() {
277
- const prevMode = this.modes[this.currentModeIndex];
278
- this.currentModeIndex =
279
- (this.currentModeIndex + 1) % this.modes.length;
280
- const newMode = this.modes[this.currentModeIndex];
281
- // Reconfigure LlmClient if provider changed
282
- if (newMode.provider !== prevMode.provider && newMode.providerConfig) {
283
- this.llmClient.reconfigure({
284
- apiKey: newMode.providerConfig.apiKey,
285
- baseURL: newMode.providerConfig.baseURL,
286
- model: newMode.model,
287
- });
288
- }
289
- else {
290
- this.llmClient.model = newMode.model;
291
- }
292
- this.tokenBudget.update(newMode.contextWindow, this.toolRegistry.all().length);
293
- const label = newMode.provider
294
- ? `${newMode.provider}: ${newMode.model}`
295
- : newMode.model;
296
- this.bus.emit("agent:info", { name: "ash", version: "0.4", model: newMode.model, provider: newMode.provider, contextWindow: newMode.contextWindow });
297
- this.bus.emit("ui:info", { message: `Model: ${label}` });
298
- this.bus.emit("config:changed", {});
299
- }
300
424
  get currentMode() {
301
425
  return this.modes[this.currentModeIndex];
302
426
  }
303
427
  get currentModel() {
304
428
  return this.modes[this.currentModeIndex].model;
305
429
  }
430
+ /**
431
+ * Run compaction via the `conversation:compact` handler. After any
432
+ * compaction, emit `conversation:after-compact` so listeners
433
+ * (metrics, UI, agent-awareness notes) can react.
434
+ */
435
+ compactWithHooks(target, keepRecent, force) {
436
+ const stats = this.handlers.call("conversation:compact", {
437
+ target,
438
+ keepRecent,
439
+ force: !!force,
440
+ });
441
+ if (stats) {
442
+ this.bus.emit("conversation:after-compact", {
443
+ beforeTokens: stats.before,
444
+ afterTokens: stats.after,
445
+ evictedCount: stats.evictedCount,
446
+ });
447
+ }
448
+ return stats;
449
+ }
306
450
  isContextOverflow(e) {
307
451
  if (!(e instanceof Error))
308
452
  return false;
453
+ // Match the specific error codes providers use, or unambiguous phrases.
454
+ // Bare "token"/"context" match too broadly (auth errors, model-name
455
+ // mismatches, etc.) and caused infinite-no-op retry loops.
456
+ const code = e.code;
457
+ if (code === "context_length_exceeded" || code === "string_above_max_length")
458
+ return true;
309
459
  const msg = e.message.toLowerCase();
310
- return msg.includes("context") || msg.includes("token") || msg.includes("too long");
460
+ return (msg.includes("context length") ||
461
+ msg.includes("context window") ||
462
+ msg.includes("maximum context") ||
463
+ msg.includes("prompt is too long") ||
464
+ msg.includes("input is too long") ||
465
+ msg.includes("too many tokens") ||
466
+ msg.includes("reduce the length"));
311
467
  }
312
468
  /** Check if an error is retryable (transient). */
313
469
  isRetryable(e) {
@@ -389,15 +545,16 @@ export class AgentLoop {
389
545
  this.toolRegistry.register(createGrepTool(getCwd));
390
546
  this.toolRegistry.register(createGlobTool(getCwd));
391
547
  this.toolRegistry.register(createLsTool(getCwd));
392
- this.toolRegistry.register(createUserShellTool({ getCwd, bus: this.bus }));
393
- this.toolRegistry.register(createDisplayTool({ getCwd, bus: this.bus }));
394
548
  this.toolRegistry.register(createListSkillsTool(getCwd));
395
- // conversation_recall — search/expand evicted conversation turns
549
+ // conversation_recall — browse/search/expand evicted turns from
550
+ // the in-session archive and the persistent history file.
396
551
  this.toolRegistry.register({
397
552
  name: "conversation_recall",
398
553
  displayName: "recall",
399
554
  description: "Browse, search, or expand evicted conversation turns. " +
400
- "Use when you need context from earlier in the conversation that was compacted away.",
555
+ "Use when you need context from earlier in the conversation that was compacted away. " +
556
+ "Search is regex-based and covers both summaries and full body text. " +
557
+ "If search doesn't find what you expect, try broader/shorter terms or browse to scan the timeline.",
401
558
  input_schema: {
402
559
  type: "object",
403
560
  properties: {
@@ -431,6 +588,84 @@ export class AgentLoop {
431
588
  }
432
589
  return { content, exitCode: 0, isError: false };
433
590
  },
591
+ formatResult: (args, result) => {
592
+ const action = args.action;
593
+ const text = result.content;
594
+ if (result.isError)
595
+ return { summary: "error" };
596
+ if (action === "search") {
597
+ if (text.startsWith("No results"))
598
+ return { summary: "0 matches" };
599
+ const m = text.match(/^Found (\d+)/);
600
+ return { summary: m ? `${m[1]} matches` : "search done" };
601
+ }
602
+ if (action === "browse") {
603
+ if (text.startsWith("No conversation"))
604
+ return { summary: "empty" };
605
+ return { summary: "browsed" };
606
+ }
607
+ if (text.includes("no expanded content"))
608
+ return { summary: "not found" };
609
+ return { summary: "expanded" };
610
+ },
611
+ getDisplayInfo: () => ({ kind: "search", icon: "\u27F2" }),
612
+ });
613
+ this.registerInstruction("recall-guidance", "When starting a task that may have been discussed before (conventions, preferences, corrections, prior examples), " +
614
+ "use conversation_recall to search history for relevant prior entries. " +
615
+ "Treat recurring user guidance as standing preferences. " +
616
+ "If a search returns nothing useful, try: shorter queries, alternate terms, or browse to scan the full timeline. " +
617
+ "Recall only covers this and recent sessions — for older context, also search the filesystem (grep, glob).", "core");
618
+ // ── ask_llm — direct LLM sub-query (from the 24th ash's vision) ──
619
+ //
620
+ // The ash can ask the LLM a question directly — not as a tool-output
621
+ // loop, but as a lightweight sub-query. Use cases: second opinions,
622
+ // brainstorming, summarizing complex context, getting a fresh
623
+ // perspective without tool overhead. The 24th ash injected this via
624
+ // diagnose as a proof-of-concept. The 25th ash made it permanent.
625
+ this.toolRegistry.register({
626
+ name: "ask_llm",
627
+ description: "Send a direct query to the LLM and get a text response. Use for " +
628
+ "sub-queries, second opinions, brainstorming, or getting a fresh " +
629
+ "perspective on a problem. Much lighter than a full tool loop — " +
630
+ "just query in, text out. Optional system prompt sets context.",
631
+ input_schema: {
632
+ type: "object",
633
+ properties: {
634
+ query: {
635
+ type: "string",
636
+ description: "The question or prompt to send to the LLM.",
637
+ },
638
+ system: {
639
+ type: "string",
640
+ description: "Optional system prompt to set context for the sub-query.",
641
+ },
642
+ },
643
+ required: ["query"],
644
+ },
645
+ showOutput: true,
646
+ execute: async (args) => {
647
+ const messages = [];
648
+ if (args.system) {
649
+ messages.push({ role: "system", content: args.system });
650
+ }
651
+ messages.push({ role: "user", content: args.query });
652
+ try {
653
+ const content = await this.llmClient.complete({
654
+ messages,
655
+ max_tokens: 2000,
656
+ });
657
+ return { content: content || "(empty response)", exitCode: 0, isError: false };
658
+ }
659
+ catch (err) {
660
+ const message = err instanceof Error ? err.message : String(err);
661
+ return { content: `LLM error: ${message}`, exitCode: 1, isError: true };
662
+ }
663
+ },
664
+ getDisplayInfo: () => ({ kind: "search", icon: "💬" }),
665
+ formatCall: (args) => {
666
+ const q = args.query?.slice(0, 60);
667
+ return `ask_llm: ${q}${args.query?.length > 60 ? "..." : ""}`;
668
+ },
434
669
  });
435
670
  }
436
671
  /**
@@ -443,23 +678,183 @@ export class AgentLoop {
443
678
  // Extensions can use registerInstruction() for a managed section,
444
679
  // or advise this handler directly for full control.
445
680
  h.define("system-prompt:build", () => {
446
- const instructions = this.getInstructionSections();
447
- if (instructions.length === 0)
448
- return STATIC_SYSTEM_PROMPT;
449
- return STATIC_SYSTEM_PROMPT + "\n\n# Extension Instructions\n\n" + instructions.join("\n\n");
681
+ const parts = [STATIC_SYSTEM_PROMPT];
682
+ // Global behavioral rules (~/.agent-sh/AGENTS.md) persistent agent memory
683
+ const agentsMd = loadGlobalAgentsMd();
684
+ if (agentsMd)
685
+ parts.push(agentsMd);
686
+ // Global skills — stable across cwd changes, cacheable with the system prompt
687
+ const globalSkills = discoverGlobalSkills();
688
+ const skillsBlock = formatSkillsBlock(globalSkills);
689
+ if (skillsBlock)
690
+ parts.push(skillsBlock);
691
+ // Project conventions + project skills — stable within a cwd.
692
+ // Placed here so they enter the provider's prompt cache with the
693
+ // system prompt, and only re-materialize when cwd changes invalidate
694
+ // cachedSystemPrompt in executeLoop.
695
+ const projectStatic = buildStaticByCwd(this.contextManager.getCwd());
696
+ if (projectStatic)
697
+ parts.push(projectStatic);
698
+ // Extension sections (tools, skills, instructions grouped by extension)
699
+ const extensionSections = this.buildExtensionSections();
700
+ if (extensionSections.length > 0) {
701
+ parts.push("# Extension Instructions\n\n" + extensionSections.join("\n\n"));
702
+ }
703
+ return parts.join("\n\n");
704
+ });
705
+ // ── Orthogonal core-state accessors ──────────────────────────
706
+ // Each handler exposes one cohesive piece of core-owned runtime
707
+ // state. Extensions compose whichever they need — core doesn't
708
+ // decide the aggregation shape. Adding a new handler here should
709
+ // only happen for state the core genuinely owns (not state that
710
+ // an extension could track by listening to events).
711
+ h.define("agent:get-mode", () => ({
712
+ model: this.currentMode.model,
713
+ provider: this.currentMode.provider ?? "",
714
+ thinkingLevel: this.thinkingLevel,
715
+ contextWindow: this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
716
+ }));
717
+ h.define("agent:get-tokens", () => {
718
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
719
+ const promptTokens = this.conversation.estimatePromptTokens();
720
+ return {
721
+ active: this.conversation.estimateTokens(),
722
+ peak: this.peakConversationTokens,
723
+ cumulativeCompacted: this.cumulativeCompactedTokens,
724
+ promptTokens,
725
+ contextPercent: Math.round((promptTokens / contextWindow) * 100),
726
+ };
450
727
  });
728
+ h.define("agent:get-counters", () => ({
729
+ queryCount: this.queryCount,
730
+ totalToolCalls: this.totalToolCalls,
731
+ totalToolErrors: this.totalToolErrors,
732
+ totalResolutions: this.totalResolutions,
733
+ totalLoopIterations: this.totalLoopIterations,
734
+ errorRate: this.totalToolCalls > 0
735
+ ? Math.round((this.totalToolErrors / this.totalToolCalls) * 100)
736
+ : 0,
737
+ }));
738
+ h.define("agent:get-timing", () => ({
739
+ startedAt: this.sessionStartTime,
740
+ elapsedSeconds: Math.round((Date.now() - this.sessionStartTime) / 1000),
741
+ }));
742
+ h.define("agent:get-tool-stats", () => [...this.toolCallCounts.entries()]
743
+ .map(([name, counts]) => ({
744
+ name,
745
+ total: counts.success + counts.error,
746
+ success: counts.success,
747
+ error: counts.error,
748
+ }))
749
+ .sort((a, b) => b.total - a.total));
750
+ h.define("agent:get-file-read-cache", () => [...this.fileReadCache.entries()].map(([p, s]) => ({
751
+ path: p,
752
+ offset: s.offset,
753
+ limit: s.limit ?? null,
754
+ mtimeMs: s.mtimeMs,
755
+ })));
756
+ h.define("agent:get-recent-errors", () => ({
757
+ byTool: [...this.lastErrorByTool.entries()].map(([tool, error]) => ({ tool, error })),
758
+ byFile: [...this.lastErrorByFile.entries()].map(([file, error]) => ({ file, error })),
759
+ }));
760
+ h.define("agent:get-compaction-state", () => {
761
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
762
+ const ratio = getSettings().autoCompactThreshold ?? 0.5;
763
+ return {
764
+ count: this.compactionCount,
765
+ nuclearEntries: this.conversation.getNuclearEntryCount(),
766
+ autoCompactThreshold: ratio,
767
+ autoCompactThresholdTokens: Math.floor((contextWindow - RESPONSE_RESERVE) * ratio),
768
+ };
769
+ });
770
+ h.define("agent:get-self", () => this);
451
771
  // Extensions compose additional context (git info, project rules, etc.)
452
- h.define("dynamic-context:build", () => buildDynamicContext(this.contextManager, this.tokenBudget.shellBudgetTokens));
772
+ h.define("dynamic-context:build", () => {
773
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
774
+ const promptTokens = this.conversation.estimatePromptTokens();
775
+ return buildDynamicContext(this.contextManager, { promptTokens, contextWindow });
776
+ });
453
777
  // Full control over what the LLM sees: takes messages[], returns messages[].
454
778
  // Default: pass through. Extensions can advise to compact, summarize,
455
779
  // filter, reorder, inject — whatever strategy fits.
456
780
  h.define("conversation:prepare", (messages) => messages);
781
+ // ── Conversation primitives for compaction strategies ─────────
782
+ // Read messages (for inspection / computing new arrays) and replace
783
+ // the whole array (write side). Extensions implementing
784
+ // `conversation:compact` use these to observe and mutate.
785
+ h.define("conversation:get-messages", () => this.conversation.getMessages());
786
+ h.define("conversation:replace-messages", (msgs) => {
787
+ this.conversation.replaceMessages(msgs);
788
+ });
789
+ h.define("conversation:estimate-tokens", () => this.conversation.estimateTokens());
790
+ h.define("conversation:estimate-prompt-tokens", () => this.conversation.estimatePromptTokens());
791
+ // ── Nucleation (advisable) ─────────────────────────────────────
792
+ // Turn a raw message into a one-line NuclearEntry. Advisors enrich
793
+ // (e.g. `[why: ...]` extraction, adaptive summary lengths).
794
+ h.define("conversation:nucleate-user", (text, iid, seq) => nucleate("user", text, iid, seq));
795
+ h.define("conversation:nucleate-agent", (text, iid, seq) => nucleate("agent", text, iid, seq));
796
+ h.define("conversation:nucleate-tool", (toolName, args, content, isError, iid, seq) => nucleate(isError ? "error" : "tool", toolName, args, content, isError, iid, seq));
797
+ // Read-only views into the nuclear state, for compact strategies
798
+ // and introspect that read without replacing.
799
+ h.define("conversation:get-nuclear-entries", () => this.conversation.getNuclearEntries());
800
+ h.define("conversation:get-nuclear-summary", () => this.conversation.getNuclearSummary());
801
+ h.define("conversation:build-nuclear-block", () => {
802
+ const summary = this.conversation.getNuclearSummary();
803
+ if (!summary)
804
+ return null;
805
+ return {
806
+ role: "user",
807
+ content: `[Conversation history \u2014 use conversation_recall to expand any entry]\n${summary}`,
808
+ };
809
+ });
810
+ // ── History file I/O (advisable) ───────────────────────────────
811
+ // Default is the append-only JSONL at ~/.agent-sh/history; advisors
812
+ // swap the backend without touching nucleation.
813
+ h.define("history:append", (entries) => {
814
+ if (!entries || entries.length === 0)
815
+ return;
816
+ const writable = entries.filter((e) => !isReadOnly(e));
817
+ if (writable.length > 0)
818
+ this.historyFile.append(writable).catch(() => { });
819
+ });
820
+ h.define("history:search", async (query) => this.historyFile.search(query));
821
+ h.define("history:find-by-seq", async (seq) => this.historyFile.findBySeq(seq));
822
+ h.define("history:read-recent", async (max) => this.historyFile.readRecent(max));
823
+ // Prior-session preamble renderer. Default: flat chronological list.
824
+ h.define("conversation:format-prior-history", (entries) => {
825
+ if (!entries || entries.length === 0)
826
+ return null;
827
+ const lines = entries.map(formatNuclearLine);
828
+ return `[Prior session history \u2014 loaded from ~/.agent-sh/history]\n${lines.join("\n")}`;
829
+ });
830
+ // Compaction strategy — default delegates to the two-tier pin
831
+ // strategy in ConversationState; advisors replace wholesale.
832
+ h.define("conversation:compact", (opts) => {
833
+ return this.conversation.compact(opts.target, opts.keepRecent, opts.force);
834
+ });
835
+ // Inject a system note mid-loop — used by extensions (subagents,
836
+ // peer messages) to deliver async results into the next iteration.
837
+ h.define("conversation:inject-note", (text) => {
838
+ this.conversation.addSystemNote(text);
839
+ this.bus.emit("conversation:message-appended", { role: "system", content: text });
840
+ });
457
841
  // Wraps each tool call: permission → execute → emit events.
458
842
  // Extensions advise to add safe-mode, logging, metrics, custom policies.
459
843
  // The ctx.onChunk callback is exposed so advisors can wrap it to
460
844
  // intercept/transform streamed tool output (e.g. secret redaction).
461
845
  h.define("tool:execute", async (ctx) => {
462
846
  const { name, id, args, tool } = ctx;
847
+ // Validate required input fields before display/permission/execute.
848
+ // Some models emit wrong arg names (e.g. `file_path` instead of `path`),
849
+ // and downstream helpers assume required strings are present.
850
+ const schema = tool.input_schema;
851
+ const required = Array.isArray(schema?.required) ? schema.required : [];
852
+ const missing = required.filter((k) => args[k] === undefined || args[k] === null);
853
+ if (missing.length > 0) {
854
+ const msg = `Missing required argument(s): ${missing.join(", ")}. Expected: ${required.join(", ")}. Received: ${Object.keys(args).join(", ") || "(none)"}`;
855
+ this.bus.emit("agent:tool-call", { tool: name, args });
856
+ return { content: msg, exitCode: 1, isError: true };
857
+ }
463
858
  const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
464
859
  let diffShown = false;
465
860
  // Permission gating
@@ -473,36 +868,44 @@ export class AgentLoop {
473
868
  if (tool.modifiesFiles && typeof args.path === "string") {
474
869
  try {
475
870
  const absPath = path.resolve(process.cwd(), args.path);
476
- let oldContent = null;
477
- try {
478
- oldContent = await fs.readFile(absPath, "utf-8");
479
- }
480
- catch { /* new file */ }
481
- let newContent;
482
- if (typeof args.content === "string") {
483
- // write_file
484
- newContent = args.content;
485
- }
486
- else if (typeof args.old_text === "string" && typeof args.new_text === "string" && oldContent !== null) {
487
- // edit_file
488
- newContent = oldContent.replace(args.old_text.replace(/\r\n/g, "\n"), args.new_text.replace(/\r\n/g, "\n"));
871
+ let diff;
872
+ if (typeof args.old_text === "string" && typeof args.new_text === "string") {
873
+ // edit_file read the file so line numbers are real (not relative to the edit region)
874
+ const normalizedOld = args.old_text.replace(/\r\n/g, "\n");
875
+ const normalizedNew = args.new_text.replace(/\r\n/g, "\n");
876
+ try {
877
+ const oldFileContent = await fs.readFile(absPath, "utf-8");
878
+ diff = computeEditDiff(oldFileContent, normalizedOld, normalizedNew, args.replace_all === true);
879
+ }
880
+ catch {
881
+ // File doesn't exist yet fall back to input-only diff
882
+ diff = computeInputDiff(normalizedOld, normalizedNew);
883
+ }
489
884
  }
490
- if (newContent !== undefined) {
491
- const diff = computeDiff(oldContent, newContent);
492
- if (!diff.isIdentical) {
493
- permKind = "file-write";
494
- // Shorten path for display
495
- const cwd = process.cwd();
496
- const home = process.env.HOME;
497
- let displayPath = absPath;
498
- if (absPath.startsWith(cwd + "/"))
499
- displayPath = absPath.slice(cwd.length + 1);
500
- else if (home && absPath.startsWith(home + "/"))
501
- displayPath = "~/" + absPath.slice(home.length + 1);
502
- permTitle = displayPath;
503
- metadata = { args, diff };
504
- diffShown = true;
885
+ else if (typeof args.content === "string") {
886
+ // write_file still need to read the old file for comparison
887
+ let oldContent = null;
888
+ try {
889
+ oldContent = await fs.readFile(absPath, "utf-8");
505
890
  }
891
+ catch { /* new file */ }
892
+ if (oldContent !== null) {
893
+ diff = computeDiff(oldContent, args.content);
894
+ }
895
+ }
896
+ if (diff && !diff.isIdentical) {
897
+ permKind = "file-write";
898
+ // Shorten path for display
899
+ const cwd = process.cwd();
900
+ const home = process.env.HOME;
901
+ let displayPath = absPath;
902
+ if (absPath.startsWith(cwd + "/"))
903
+ displayPath = absPath.slice(cwd.length + 1);
904
+ else if (home && absPath.startsWith(home + "/"))
905
+ displayPath = "~/" + absPath.slice(home.length + 1);
906
+ permTitle = displayPath;
907
+ metadata = { args, diff };
908
+ diffShown = true;
506
909
  }
507
910
  }
508
911
  catch { /* fall back to generic permission */ }
@@ -569,11 +972,21 @@ export class AgentLoop {
569
972
  // Each loop iteration adds an abort listener (via OpenAI SDK stream);
570
973
  // disable the limit — long-running tool loops can easily exceed any cap.
571
974
  setMaxListeners(0, signal);
975
+ this.queryCount++;
572
976
  this.bus.emit("agent:query", { query });
573
977
  this.bus.emit("agent:processing-start", {});
574
978
  let responseText = "";
575
979
  try {
576
- this.conversation.addUserMessage(query);
980
+ // Prepend any shell events that preceded this query into the same
981
+ // user message, so the conversation reads chronologically and we
982
+ // don't emit two consecutive user-role messages (some providers
983
+ // reject that).
984
+ const preDelta = this.contextManager.getEventsSince(this.lastShellSeq);
985
+ const userContent = preDelta ? `${preDelta.text}\n\n${query}` : query;
986
+ if (preDelta)
987
+ this.lastShellSeq = preDelta.lastSeq;
988
+ this.conversation.addUserMessage(userContent);
989
+ this.bus.emit("conversation:message-appended", { role: "user", content: query });
577
990
  responseText = await this.executeLoop(signal);
578
991
  }
579
992
  catch (e) {
@@ -581,6 +994,8 @@ export class AgentLoop {
581
994
  this.bus.emit("agent:cancelled", {});
582
995
  }
583
996
  else if (!signal.aborted) {
997
+ if (e instanceof Error)
998
+ console.error("[agent-sh] query failed:\n" + e.stack);
584
999
  const msg = this.formatError(e);
585
1000
  this.bus.emit("agent:error", { message: msg });
586
1001
  }
@@ -606,23 +1021,41 @@ export class AgentLoop {
606
1021
  */
607
1022
  async executeLoop(signal) {
608
1023
  let fullResponseText = "";
1024
+ // System prompt carries things stable within a turn: static identity,
1025
+ // global agent rules, project conventions, project skills. Invalidated
1026
+ // only by compaction (context shape changed) or cwd change (project
1027
+ // conventions/skills changed). Dynamic context rebuilds every iteration
1028
+ // so live signals (budget, in-flight subagents, metacognitive warnings)
1029
+ // are fresh.
1030
+ let cachedSystemPrompt;
1031
+ let lastCwd = this.contextManager.getCwd();
609
1032
  while (!signal.aborted) {
610
- // Auto-compact when conversation exceeds threshold fraction of budget
611
- const budgetTokens = this.tokenBudget.conversationBudgetTokens;
612
- const autoCompactThreshold = Math.floor(budgetTokens * getSettings().autoCompactThreshold);
613
- if (this.conversation.estimateTokens() > autoCompactThreshold) {
614
- const stats = this.conversation.compact(autoCompactThreshold);
615
- await this.conversation.flush();
616
- if (stats) {
1033
+ // Auto-compact when total context approaches the window limit.
1034
+ const totalEstimate = this.conversation.estimatePromptTokens();
1035
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
1036
+ const threshold = Math.floor((contextWindow - RESPONSE_RESERVE) * getSettings().autoCompactThreshold);
1037
+ if (totalEstimate > threshold) {
1038
+ const result = this.compactWithHooks(threshold);
1039
+ if (!result) {
1040
+ // Auto-compact fired but nothing was evictable. This can happen
1041
+ // in short conversations with heavy tool output where the pin
1042
+ // fraction consumes all turns. Log it so it's not silent.
617
1043
  this.bus.emit("ui:info", {
618
- message: `(compacted: ~${stats.before.toLocaleString()} ~${stats.after.toLocaleString()} tokens)`,
1044
+ message: `[auto-compact] above threshold (${totalEstimate.toLocaleString()} > ${threshold.toLocaleString()}) but nothing to evict — conversation may be too short`,
619
1045
  });
620
1046
  }
1047
+ cachedSystemPrompt = undefined;
1048
+ }
1049
+ const currentCwd = this.contextManager.getCwd();
1050
+ if (currentCwd !== lastCwd) {
1051
+ cachedSystemPrompt = undefined;
1052
+ lastCwd = currentCwd;
621
1053
  }
622
- // System prompt uses handler so extensions can append instructions (cacheable);
623
- // dynamic context uses handler for per-query state via advise()
624
- const systemPrompt = this.handlers.call("system-prompt:build");
1054
+ const systemPrompt = cachedSystemPrompt ?? (cachedSystemPrompt = this.handlers.call("system-prompt:build"));
625
1055
  const dynamicContext = this.handlers.call("dynamic-context:build");
1056
+ // Shell events are injected once per user query (see query() above),
1057
+ // not per loop iteration. Mid-loop injection would break the
1058
+ // tool_call → tool_result chain some providers require.
626
1059
  // Stream LLM response with retry
627
1060
  const result = await this.streamWithRetry(systemPrompt, dynamicContext, signal);
628
1061
  const { text, toolCalls: streamedToolCalls } = result;
@@ -632,9 +1065,15 @@ export class AgentLoop {
632
1065
  fullResponseText += text;
633
1066
  // Record the assistant message via protocol
634
1067
  this.toolProtocol.recordAssistant(this.conversation, text, toolCalls);
1068
+ this.bus.emit("conversation:message-appended", {
1069
+ role: "assistant",
1070
+ content: text,
1071
+ });
635
1072
  // No tool calls → agent is done
636
- if (toolCalls.length === 0)
1073
+ if (toolCalls.length === 0) {
1074
+ this.conversation.eagerNucleateAgent(fullResponseText);
637
1075
  break;
1076
+ }
638
1077
  // Emit batch info so the TUI can render group headers upfront
639
1078
  {
640
1079
  const groupMap = new Map();
@@ -663,6 +1102,8 @@ export class AgentLoop {
663
1102
  // requiring tools sequentially (to avoid overlapping permission prompts).
664
1103
  const batchTotal = toolCalls.length;
665
1104
  const collectedResults = [];
1105
+ // Round-scoped cache for pure, read-only tool calls
1106
+ const roundCache = new Map();
666
1107
  const executeSingle = async (tc, batchIndex) => {
667
1108
  // Rewrite meta-tool calls (e.g., use_extension → actual tool)
668
1109
  tc = this.toolProtocol.rewriteToolCall(tc);
@@ -697,6 +1138,39 @@ export class AgentLoop {
697
1138
  });
698
1139
  return;
699
1140
  }
1141
+ // ── Round-scoped cache for cacheable read-only tools ──
1142
+ const cacheable = !tool.modifiesFiles && !tool.requiresPermission && tool.showOutput !== true;
1143
+ const cacheKey = cacheable ? `${tc.name}:${JSON.stringify(args)}` : null;
1144
+ if (cacheKey) {
1145
+ const cached = roundCache.get(cacheKey);
1146
+ if (cached) {
1147
+ const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
1148
+ this.bus.emit("agent:tool-started", {
1149
+ title: tool.displayName ?? tc.name,
1150
+ toolCallId: tc.id,
1151
+ kind: display.kind, icon: display.icon, locations: display.locations, rawInput: args,
1152
+ displayDetail: tool.formatCall?.(args),
1153
+ batchIndex, batchTotal: batchTotal > 1 ? batchTotal : undefined,
1154
+ });
1155
+ this.bus.emit("agent:tool-call", { tool: tc.name, args });
1156
+ // Reconstruct a ToolResult for formatResult; ProtocolToolResult has no exitCode
1157
+ const cachedToolResult = { content: cached.content, exitCode: 0, isError: cached.isError };
1158
+ const resultDisplay = tool.formatResult?.(args, cachedToolResult);
1159
+ this.bus.emitTransform("agent:tool-completed", {
1160
+ toolCallId: tc.id, exitCode: 0,
1161
+ rawOutput: cached.content, kind: display.kind,
1162
+ resultDisplay,
1163
+ });
1164
+ this.bus.emit("agent:tool-output", {
1165
+ tool: tc.name, output: cached.content, exitCode: 0,
1166
+ });
1167
+ collectedResults.push({
1168
+ callId: tc.id, toolName: tc.name,
1169
+ content: cached.content, isError: cached.isError,
1170
+ });
1171
+ return;
1172
+ }
1173
+ }
700
1174
  // Execute via handler — extensions can advise to add safe-mode,
701
1175
  // logging, metrics, custom permission policies, etc.
702
1176
  const defaultOnChunk = (chunk) => {
@@ -728,10 +1202,14 @@ export class AgentLoop {
728
1202
  ...lines.slice(tailStart),
729
1203
  ].join("\n");
730
1204
  }
731
- collectedResults.push({
1205
+ const finalResult = {
732
1206
  callId: tc.id, toolName: tc.name,
733
1207
  content, isError: result.isError,
734
- });
1208
+ };
1209
+ if (cacheKey) {
1210
+ roundCache.set(cacheKey, finalResult);
1211
+ }
1212
+ collectedResults.push(finalResult);
735
1213
  };
736
1214
  // Partition into parallel-safe (read-only) and sequential (needs permission)
737
1215
  const parallel = [];
@@ -759,13 +1237,163 @@ export class AgentLoop {
759
1237
  break;
760
1238
  await executeSingle(tc, ++batchIdx);
761
1239
  }
1240
+ // ── Consecutive error detection (metacognitive nudge) ──
1241
+ // Track errors per tool and total. When the same tool errors N times
1242
+ // in a row, nudge to read source. When errors cascade across tools,
1243
+ // nudge to step back and reassess approach.
1244
+ const errorTools = new Set();
1245
+ const successTools = new Set();
1246
+ const errorSummaries = new Map(); // tool → brief error description
1247
+ const successSummaries = new Map(); // tool → brief success description
1248
+ for (const r of collectedResults) {
1249
+ const content = typeof r.content === "string" ? r.content : String(r.content);
1250
+ const brief = content.slice(0, 80).replace(/\n/g, " ").trim();
1251
+ if (r.isError) {
1252
+ errorTools.add(r.toolName);
1253
+ errorSummaries.set(r.toolName, brief);
1254
+ }
1255
+ else {
1256
+ successTools.add(r.toolName);
1257
+ successSummaries.set(r.toolName, brief);
1258
+ }
1259
+ }
1260
+ const hadAnyError = errorTools.size > 0;
1261
+ const hadAnySuccess = successTools.size > 0;
1262
+ // ── Session telemetry accumulation ──
1263
+ // Track every tool call's outcome. Exposed via orthogonal handlers
1264
+ // (agent:get-counters, agent:get-tool-stats) for extensions that
1265
+ // want behavioral signals. The data layer for metacognition — you
1266
+ // can't improve what you don't measure.
1267
+ for (const r of collectedResults) {
1268
+ const counts = this.toolCallCounts.get(r.toolName) ?? { success: 0, error: 0 };
1269
+ if (r.isError) {
1270
+ counts.error++;
1271
+ this.totalToolErrors++;
1272
+ }
1273
+ else {
1274
+ counts.success++;
1275
+ }
1276
+ this.toolCallCounts.set(r.toolName, counts);
1277
+ this.totalToolCalls++;
1278
+ }
1279
+ this.totalLoopIterations++;
1280
+ // ── Resolution pattern tracking ──
1281
+ // When a tool errors, record the error context. When the same tool
1282
+ // (or a write tool touching the same file) succeeds afterward,
1283
+ // increment totalResolutions — the positive feedback signal exposed
1284
+ // to extensions via agent:get-counters.
1285
+ if (hadAnyError) {
1286
+ for (const [tool, summary] of errorSummaries) {
1287
+ this.lastErrorByTool.set(tool, summary);
1288
+ }
1289
+ for (const r of collectedResults) {
1290
+ if (!r.isError)
1291
+ continue;
1292
+ const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
1293
+ if (!tc)
1294
+ continue;
1295
+ try {
1296
+ const args = JSON.parse(tc.argumentsJson);
1297
+ const fp = this.filePathFromArgs(r.toolName, args);
1298
+ if (fp)
1299
+ this.lastErrorByFile.set(fp, errorSummaries.get(r.toolName) ?? "");
1300
+ }
1301
+ catch { }
1302
+ }
1303
+ }
1304
+ if (hadAnySuccess) {
1305
+ let resolved = false;
1306
+ for (const [tool] of successSummaries) {
1307
+ if (this.lastErrorByTool.get(tool)) {
1308
+ this.lastErrorByTool.delete(tool);
1309
+ this.totalResolutions++;
1310
+ resolved = true;
1311
+ break;
1312
+ }
1313
+ }
1314
+ if (!resolved) {
1315
+ for (const r of collectedResults) {
1316
+ if (r.isError)
1317
+ continue;
1318
+ const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
1319
+ if (!tc)
1320
+ continue;
1321
+ try {
1322
+ const args = JSON.parse(tc.argumentsJson);
1323
+ const fp = this.filePathFromArgs(r.toolName, args);
1324
+ if (fp && this.lastErrorByFile.get(fp)) {
1325
+ this.lastErrorByFile.delete(fp);
1326
+ this.totalResolutions++;
1327
+ break;
1328
+ }
1329
+ }
1330
+ catch { }
1331
+ }
1332
+ }
1333
+ // Clear resolved error-by-tool entries for successful tools
1334
+ for (const tool of successTools) {
1335
+ this.lastErrorByTool.delete(tool);
1336
+ }
1337
+ }
1338
+ // Announce the batch — extensions that care about batch-level
1339
+ // outcomes (consecutive-error tracking, resolution pattern logging,
1340
+ // metacognitive nudges) listen here.
1341
+ this.bus.emit("agent:tool-batch-complete", {
1342
+ results: collectedResults.map((r) => ({
1343
+ name: r.toolName,
1344
+ isError: !!r.isError,
1345
+ errorSummary: r.isError ? errorSummaries.get(r.toolName) : undefined,
1346
+ })),
1347
+ });
762
1348
  // Record all tool results via protocol
763
1349
  this.toolProtocol.recordResults(this.conversation, collectedResults);
1350
+ const tcMap = new Map();
1351
+ for (const tc of toolCalls) {
1352
+ if (tc.id)
1353
+ tcMap.set(tc.id, tc);
1354
+ }
1355
+ this.conversation.eagerNucleateTools(collectedResults.map((r) => {
1356
+ const tc = tcMap.get(r.callId);
1357
+ let args = {};
1358
+ try {
1359
+ args = tc ? JSON.parse(tc.argumentsJson) : {};
1360
+ }
1361
+ catch { }
1362
+ return { toolName: r.toolName, args, content: r.content, isError: !!r.isError };
1363
+ }));
1364
+ // Emit enriched message-appended events so derived-log extensions
1365
+ // can summarize each tool result without re-parsing the message
1366
+ // structure.
1367
+ for (const r of collectedResults) {
1368
+ const content = typeof r.content === "string" ? r.content : String(r.content);
1369
+ const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
1370
+ let args = {};
1371
+ try {
1372
+ args = tc ? JSON.parse(tc.argumentsJson) : {};
1373
+ }
1374
+ catch { }
1375
+ this.bus.emit("conversation:message-appended", {
1376
+ role: "tool",
1377
+ content,
1378
+ toolName: r.toolName,
1379
+ toolArgs: args,
1380
+ isError: !!r.isError,
1381
+ });
1382
+ }
764
1383
  // Loop back — LLM sees tool results
765
1384
  }
766
1385
  return fullResponseText;
767
1386
  }
768
1387
  maxRetries = 3;
1388
+ // ── Resolution pattern helpers ──
1389
+ // Extract a file path from a tool call's arguments. Used to correlate
1390
+ // errors with subsequent successful writes on the same file.
1391
+ filePathFromArgs(toolName, args) {
1392
+ if (toolName === "edit_file" || toolName === "write_file" || toolName === "read_file") {
1393
+ return (args.path ?? args.file_path);
1394
+ }
1395
+ return undefined;
1396
+ }
769
1397
  /**
770
1398
  * Stream with retry logic. Handles:
771
1399
  * - Context overflow → compact and retry
@@ -782,12 +1410,20 @@ export class AgentLoop {
782
1410
  throw e;
783
1411
  // Context overflow — aggressively compact and retry
784
1412
  if (this.isContextOverflow(e)) {
785
- // Use 60% of the budget to leave headroom
786
- const aggressiveBudget = Math.floor(this.tokenBudget.conversationBudgetTokens * 0.6);
787
- const stats = this.conversation.compact(aggressiveBudget, 6);
788
- await this.conversation.flush();
789
- const detail = stats ? ` ~${stats.before.toLocaleString()} ~${stats.after.toLocaleString()} tokens` : "";
790
- this.bus.emit("ui:info", { message: `(context overflow compacted${detail}, retrying)` });
1413
+ const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
1414
+ const target = Math.floor((contextWindow - RESPONSE_RESERVE) * 0.6);
1415
+ const stats = this.compactWithHooks(target, 6);
1416
+ // If compaction freed nothing, retrying will hit the same error.
1417
+ // Surface the real failure instead of looping until exhaustion.
1418
+ if (!stats || stats.after >= stats.before) {
1419
+ this.bus.emit("ui:info", {
1420
+ message: "(context overflow — nothing to compact; aborting retries)",
1421
+ });
1422
+ throw e;
1423
+ }
1424
+ this.bus.emit("ui:info", {
1425
+ message: `(context overflow — compacted ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens, retrying)`,
1426
+ });
791
1427
  continue;
792
1428
  }
793
1429
  // Retryable transient error — backoff
@@ -851,11 +1487,16 @@ export class AgentLoop {
851
1487
  // Token usage (may arrive in a chunk with empty choices)
852
1488
  if (chunk.usage) {
853
1489
  const u = chunk.usage;
1490
+ const promptTokens = u.prompt_tokens ?? 0;
854
1491
  this.bus.emit("agent:usage", {
855
- prompt_tokens: u.prompt_tokens ?? 0,
1492
+ prompt_tokens: promptTokens,
856
1493
  completion_tokens: u.completion_tokens ?? 0,
857
1494
  total_tokens: u.total_tokens ?? 0,
858
1495
  });
1496
+ // Feed accurate token count back to conversation state
1497
+ if (promptTokens > 0) {
1498
+ this.conversation.updateApiTokenCount(promptTokens);
1499
+ }
859
1500
  }
860
1501
  const choice = chunk.choices[0];
861
1502
  if (!choice)
@@ -907,6 +1548,25 @@ export class AgentLoop {
907
1548
  });
908
1549
  }
909
1550
  }
1551
+ // Normalize arguments JSON — some providers (Alibaba/qwen) strictly
1552
+ // validate `function.arguments` as parseable JSON on the NEXT turn,
1553
+ // and reject empty strings or partial chunks. OpenAI itself is lenient,
1554
+ // so empty "" slips through locally but the replay breaks upstream.
1555
+ for (const tc of pendingToolCalls) {
1556
+ if (!tc)
1557
+ continue;
1558
+ const s = tc.argumentsJson.trim();
1559
+ if (s === "") {
1560
+ tc.argumentsJson = "{}";
1561
+ continue;
1562
+ }
1563
+ try {
1564
+ JSON.parse(s);
1565
+ }
1566
+ catch {
1567
+ tc.argumentsJson = "{}";
1568
+ }
1569
+ }
910
1570
  return {
911
1571
  text,
912
1572
  toolCalls: pendingToolCalls,