agent-sh 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +5 -1
  2. package/dist/agent/agent-loop.d.ts +2 -2
  3. package/dist/agent/agent-loop.js +106 -13
  4. package/dist/agent/conversation-state.d.ts +39 -9
  5. package/dist/agent/conversation-state.js +336 -17
  6. package/dist/agent/history-file.d.ts +36 -0
  7. package/dist/agent/history-file.js +167 -0
  8. package/dist/agent/nuclear-form.d.ts +41 -0
  9. package/dist/agent/nuclear-form.js +175 -0
  10. package/dist/agent/system-prompt.d.ts +2 -2
  11. package/dist/agent/system-prompt.js +25 -4
  12. package/dist/agent/tools/user-shell.js +4 -1
  13. package/dist/context-manager.d.ts +3 -2
  14. package/dist/context-manager.js +16 -111
  15. package/dist/core.js +30 -1
  16. package/dist/event-bus.d.ts +37 -0
  17. package/dist/extensions/overlay-agent.d.ts +14 -0
  18. package/dist/extensions/overlay-agent.js +147 -0
  19. package/dist/extensions/slash-commands.js +28 -0
  20. package/dist/extensions/terminal-buffer.d.ts +14 -0
  21. package/dist/extensions/terminal-buffer.js +125 -0
  22. package/dist/extensions/tui-renderer.js +122 -84
  23. package/dist/index.js +4 -0
  24. package/dist/input-handler.js +6 -1
  25. package/dist/output-parser.js +8 -0
  26. package/dist/settings.d.ts +19 -2
  27. package/dist/settings.js +21 -3
  28. package/dist/shell.d.ts +5 -0
  29. package/dist/shell.js +31 -2
  30. package/dist/token-budget.d.ts +13 -0
  31. package/dist/token-budget.js +50 -0
  32. package/dist/types.d.ts +13 -22
  33. package/dist/utils/ansi.d.ts +10 -0
  34. package/dist/utils/ansi.js +27 -0
  35. package/dist/utils/floating-panel.d.ts +227 -0
  36. package/dist/utils/floating-panel.js +807 -0
  37. package/dist/utils/line-editor.d.ts +9 -0
  38. package/dist/utils/line-editor.js +44 -0
  39. package/dist/utils/markdown.js +3 -3
  40. package/dist/utils/output-writer.d.ts +14 -0
  41. package/dist/utils/output-writer.js +16 -0
  42. package/dist/utils/terminal-buffer.d.ts +69 -0
  43. package/dist/utils/terminal-buffer.js +179 -0
  44. package/dist/utils/tool-display.d.ts +1 -0
  45. package/dist/utils/tool-display.js +1 -1
  46. package/examples/extensions/claude-code-bridge/index.ts +77 -1
  47. package/examples/extensions/overlay-agent.ts +70 -0
  48. package/examples/extensions/pi-bridge/index.ts +87 -2
  49. package/examples/extensions/terminal-buffer.ts +184 -0
  50. package/package.json +5 -1
package/README.md CHANGED
@@ -21,7 +21,7 @@ agent-sh flips this. It's your shell first — full PTY, your rc config, your al
21
21
 
22
22
  **Real terminal, zero compromise.** Full PTY with your shell config, aliases, and environment. Shell starts instantly — the agent connects asynchronously in the background.
23
23
 
24
- **Context-aware agent.** Every query includes your cwd, recent commands, and their output. Run a failing test, type `> fix this`, and the agent knows exactly what happened. It has built-in tools for file read/write/edit, bash, grep, glob — no external setup needed.
24
+ **Context-aware agent.** Every query includes your cwd, recent commands, and their output. Run a failing test, type `> fix this`, and the agent knows exactly what happened. It has built-in tools for file read/write/edit, bash, grep, glob — no external setup needed. Context management works like shell history — continuous, persistent across restarts, no sessions to manage. See [Context Management](docs/context-management.md).
25
25
 
26
26
  **Agent decides how to help.** One entry point (`>`), three tool categories. The agent uses scratchpad tools to investigate, `display` to show you output, and `user_shell` for commands with lasting effects. No need to pick a mode — the agent reasons about which tools to use based on your intent.
27
27
 
@@ -59,6 +59,9 @@ Everything else works as a normal shell — commands go straight to the PTY. Inp
59
59
  | `/help` | Show available commands |
60
60
  | `/model [name]` | Cycle to the next model, or switch to a specific one |
61
61
  | `/backend [name]` | List backends, or switch to a named backend |
62
+ | `/compact` | Compact conversation (free up context space) |
63
+ | `/context` | Show context budget usage |
64
+ | `/thinking [level]` | Set reasoning effort (off, low, medium, high) |
62
65
 
63
66
  ## Configuration
64
67
 
@@ -68,6 +71,7 @@ Configure via `~/.agent-sh/settings.json`. See the [Usage Guide](docs/usage.md#c
68
71
 
69
72
  - [Usage Guide](docs/usage.md) — providers, models, configuration, provider profiles
70
73
  - [Internal Agent](docs/agent.md) — how the agent loop works: tools, context, streaming
74
+ - [Context Management](docs/context-management.md) — three-tier history, token budget, design philosophy
71
75
  - [Architecture](docs/architecture.md) — design philosophy, component overview, project structure
72
76
  - [Extensions](docs/extensions.md) — event bus, content transforms, custom backends, theming
73
77
  - [Library Usage](docs/library.md) — embedding agent-sh in your own apps
@@ -25,8 +25,10 @@ export declare class AgentLoop implements AgentBackend {
25
25
  private handlers;
26
26
  private abortController;
27
27
  private toolRegistry;
28
+ private historyFile;
28
29
  private conversation;
29
30
  private fileReadCache;
31
+ private tokenBudget;
30
32
  private modes;
31
33
  private currentModeIndex;
32
34
  private boundListeners;
@@ -63,8 +65,6 @@ export declare class AgentLoop implements AgentBackend {
63
65
  */
64
66
  private registerHandlers;
65
67
  private handleQuery;
66
- /** Max tokens before auto-compaction (conservative default). */
67
- private maxContextTokens;
68
68
  /**
69
69
  * Core agent loop: stream LLM response → execute tools → repeat.
70
70
  * Returns the final accumulated response text.
@@ -4,7 +4,9 @@ import * as path from "node:path";
4
4
  import { computeDiff } from "../utils/diff.js";
5
5
  import { ToolRegistry } from "./tool-registry.js";
6
6
  import { ConversationState } from "./conversation-state.js";
7
+ import { HistoryFile } from "./history-file.js";
7
8
  import { STATIC_SYSTEM_PROMPT, buildDynamicContext } from "./system-prompt.js";
9
+ import { TokenBudget } from "../token-budget.js";
8
10
  // Core tool factories
9
11
  import { createBashTool } from "./tools/bash.js";
10
12
  import { createReadFileTool } from "./tools/read-file.js";
@@ -24,8 +26,10 @@ export class AgentLoop {
24
26
  handlers;
25
27
  abortController = null;
26
28
  toolRegistry = new ToolRegistry();
27
- conversation = new ConversationState();
29
+ historyFile = new HistoryFile();
30
+ conversation = new ConversationState(this.historyFile);
28
31
  fileReadCache = new Map();
32
+ tokenBudget;
29
33
  modes;
30
34
  currentModeIndex = 0;
31
35
  boundListeners = [];
@@ -42,8 +46,12 @@ export class AgentLoop {
42
46
  { model: llmClient.model },
43
47
  ];
44
48
  this.currentModeIndex = initialModeIndex ?? 0;
49
+ // Unified token budget — adapts to current model's context window
50
+ this.tokenBudget = new TokenBudget(this.currentMode.contextWindow);
45
51
  // Register core tools
46
52
  this.registerCoreTools();
53
+ // Update token budget with tool count
54
+ this.tokenBudget.update(undefined, this.toolRegistry.all().length);
47
55
  // Register handlers — extensions can advise these
48
56
  this.registerHandlers();
49
57
  }
@@ -74,6 +82,7 @@ export class AgentLoop {
74
82
  else {
75
83
  this.llmClient.model = m.model;
76
84
  }
85
+ this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
77
86
  const label = m.provider ? `${m.provider}: ${m.model}` : m.model;
78
87
  this.bus.emit("agent:info", { name: "agent-sh", version: "0.4", model: m.model, provider: m.provider, contextWindow: m.contextWindow });
79
88
  this.bus.emit("ui:info", { message: `Model: ${label}` });
@@ -117,13 +126,50 @@ export class AgentLoop {
117
126
  else {
118
127
  this.llmClient.model = m.model;
119
128
  }
129
+ this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
130
+ this.bus.emit("config:changed", {});
131
+ });
132
+ on("config:add-modes", ({ modes: extra }) => {
133
+ // Remove any existing modes for the same provider, then append
134
+ const providers = new Set(extra.map((m) => m.provider).filter(Boolean));
135
+ this.modes = [
136
+ ...this.modes.filter((m) => !m.provider || !providers.has(m.provider)),
137
+ ...extra,
138
+ ];
120
139
  this.bus.emit("config:changed", {});
121
140
  });
122
141
  on("agent:reset-session", () => {
123
142
  this.cancel();
124
- this.conversation = new ConversationState();
143
+ this.conversation = new ConversationState(this.historyFile);
125
144
  this.lastProjectSkillNames.clear();
126
145
  });
146
+ on("agent:compact-request", () => {
147
+ const budgetTokens = this.tokenBudget.conversationBudgetTokens;
148
+ const stats = this.conversation.compact(budgetTokens);
149
+ this.conversation.flush().catch(() => { });
150
+ if (stats) {
151
+ this.bus.emit("ui:info", {
152
+ message: `(compacted: ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens)`,
153
+ });
154
+ }
155
+ else {
156
+ this.bus.emit("ui:info", { message: "(nothing to compact)" });
157
+ }
158
+ });
159
+ this.bus.onPipe("context:get-stats", () => {
160
+ return {
161
+ activeTokens: this.conversation.estimateTokens(),
162
+ nuclearEntries: this.conversation.getNuclearEntryCount(),
163
+ recallArchiveSize: this.conversation.getRecallArchiveSize(),
164
+ budgetTokens: this.tokenBudget.conversationBudgetTokens,
165
+ };
166
+ });
167
+ // Load prior history from disk (non-blocking)
168
+ this.historyFile.readRecent().then((entries) => {
169
+ if (entries.length > 0) {
170
+ this.conversation.loadPriorHistory(entries);
171
+ }
172
+ }).catch(() => { });
127
173
  on("shell:cwd-change", ({ cwd }) => {
128
174
  const projectSkills = discoverProjectSkills(cwd);
129
175
  const newNames = new Set(projectSkills.map(s => s.name));
@@ -187,6 +233,7 @@ export class AgentLoop {
187
233
  else {
188
234
  this.llmClient.model = newMode.model;
189
235
  }
236
+ this.tokenBudget.update(newMode.contextWindow, this.toolRegistry.all().length);
190
237
  const label = newMode.provider
191
238
  ? `${newMode.provider}: ${newMode.model}`
192
239
  : newMode.model;
@@ -289,6 +336,45 @@ export class AgentLoop {
289
336
  this.toolRegistry.register(createUserShellTool({ getCwd, bus: this.bus }));
290
337
  this.toolRegistry.register(createDisplayTool({ getCwd, bus: this.bus }));
291
338
  this.toolRegistry.register(createListSkillsTool(getCwd));
339
+ // conversation_recall — search/expand evicted conversation turns
340
+ this.toolRegistry.register({
341
+ name: "conversation_recall",
342
+ description: "Browse, search, or expand evicted conversation turns. " +
343
+ "Use when you need context from earlier in the conversation that was compacted away.",
344
+ input_schema: {
345
+ type: "object",
346
+ properties: {
347
+ action: {
348
+ type: "string",
349
+ enum: ["browse", "search", "expand"],
350
+ description: "browse: list evicted turns, search: regex search, expand: show full turn",
351
+ },
352
+ query: {
353
+ type: "string",
354
+ description: "Search query (for action=search)",
355
+ },
356
+ turn_id: {
357
+ type: "number",
358
+ description: "Turn ID to expand (for action=expand)",
359
+ },
360
+ },
361
+ required: ["action"],
362
+ },
363
+ execute: async (args) => {
364
+ const action = args.action;
365
+ let content;
366
+ if (action === "search") {
367
+ content = await this.conversation.search(args.query ?? "");
368
+ }
369
+ else if (action === "expand") {
370
+ content = await this.conversation.expand(args.turn_id);
371
+ }
372
+ else {
373
+ content = await this.conversation.browse();
374
+ }
375
+ return { content, exitCode: 0, isError: false };
376
+ },
377
+ });
292
378
  }
293
379
  /**
294
380
  * Register named handlers that extensions can advise.
@@ -297,7 +383,7 @@ export class AgentLoop {
297
383
  registerHandlers() {
298
384
  const h = this.handlers;
299
385
  // Extensions compose additional context (git info, project rules, etc.)
300
- h.define("dynamic-context:build", () => buildDynamicContext(this.toolRegistry.all(), this.contextManager));
386
+ h.define("dynamic-context:build", () => buildDynamicContext(this.toolRegistry.all(), this.contextManager, this.tokenBudget.shellBudgetTokens));
301
387
  // Full control over what the LLM sees: takes messages[], returns messages[].
302
388
  // Default: pass through. Extensions can advise to compact, summarize,
303
389
  // filter, reorder, inject — whatever strategy fits.
@@ -441,8 +527,6 @@ export class AgentLoop {
441
527
  this.abortController = null;
442
528
  }
443
529
  }
444
- /** Max tokens before auto-compaction (conservative default). */
445
- maxContextTokens = 60_000;
446
530
  /**
447
531
  * Core agent loop: stream LLM response → execute tools → repeat.
448
532
  * Returns the final accumulated response text.
@@ -450,11 +534,16 @@ export class AgentLoop {
450
534
  async executeLoop(signal) {
451
535
  let fullResponseText = "";
452
536
  while (!signal.aborted) {
453
- // Auto-compact if conversation is getting large
454
- const estimatedTokens = Math.ceil(JSON.stringify(this.conversation.getMessages()).length / 4);
455
- if (estimatedTokens > this.maxContextTokens) {
456
- this.conversation.compact(10);
457
- this.bus.emit("ui:info", { message: "(conversation compacted)" });
537
+ // Auto-compact if conversation exceeds the model-aware budget
538
+ const budgetTokens = this.tokenBudget.conversationBudgetTokens;
539
+ if (this.conversation.estimateTokens() > budgetTokens) {
540
+ const stats = this.conversation.compact(budgetTokens);
541
+ await this.conversation.flush();
542
+ if (stats) {
543
+ this.bus.emit("ui:info", {
544
+ message: `(compacted: ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens)`,
545
+ });
546
+ }
458
547
  }
459
548
  // System prompt is static (cacheable); dynamic context uses handler
460
549
  // so extensions can compose additional context via advise()
@@ -591,10 +680,14 @@ export class AgentLoop {
591
680
  catch (e) {
592
681
  if (signal.aborted)
593
682
  throw e;
594
- // Context overflow — compact and retry (no backoff needed)
683
+ // Context overflow — aggressively compact and retry
595
684
  if (this.isContextOverflow(e)) {
596
- this.conversation.compact(6);
597
- this.bus.emit("ui:info", { message: "(context overflow — compacted, retrying)" });
685
+ // Use 60% of the budget to leave headroom
686
+ const aggressiveBudget = Math.floor(this.tokenBudget.conversationBudgetTokens * 0.6);
687
+ const stats = this.conversation.compact(aggressiveBudget, 6);
688
+ await this.conversation.flush();
689
+ const detail = stats ? ` ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens` : "";
690
+ this.bus.emit("ui:info", { message: `(context overflow — compacted${detail}, retrying)` });
598
691
  continue;
599
692
  }
600
693
  // Retryable transient error — backoff
@@ -1,11 +1,14 @@
1
1
  import type { ChatCompletionMessageParam } from "../utils/llm-client.js";
2
- /**
3
- * Manages the OpenAI chat messages array for the agent loop.
4
- * Separate from ContextManager — this is the LLM conversation,
5
- * not the shell history.
6
- */
2
+ import { type NuclearEntry } from "./nuclear-form.js";
3
+ import type { HistoryFile } from "./history-file.js";
7
4
  export declare class ConversationState {
8
5
  private messages;
6
+ private nuclearEntries;
7
+ private recallArchive;
8
+ private historyFile;
9
+ private nextSeq;
10
+ constructor(historyFile?: HistoryFile);
11
+ get instanceId(): string;
9
12
  addUserMessage(text: string): void;
10
13
  addAssistantMessage(content: string | null, toolCalls?: {
11
14
  id: string;
@@ -15,13 +18,40 @@ export declare class ConversationState {
15
18
  };
16
19
  }[]): void;
17
20
  addToolResult(toolCallId: string, content: string): void;
18
- /** Inject a system-level note into the conversation (e.g. context change). */
19
21
  addSystemNote(text: string): void;
20
22
  getMessages(): ChatCompletionMessageParam[];
23
+ estimateTokens(): number;
21
24
  /**
22
- * Simple compaction drop oldest turns, keeping the first user message
23
- * (original task context) and the most recent turns.
25
+ * Priority-based compaction. Evicts lowest-priority turns, replacing
26
+ * them with nuclear one-liner summaries that stay in the conversation.
27
+ * Read-only tool results are dropped entirely.
24
28
  */
25
- compact(maxTurns: number): void;
29
+ compact(targetTokens: number, recentTurnsToKeep?: number): {
30
+ before: number;
31
+ after: number;
32
+ } | null;
33
+ /**
34
+ * Flush oldest nuclear entries to the history file when the
35
+ * in-context nuclear block grows too large.
36
+ */
37
+ flush(): Promise<void>;
38
+ /**
39
+ * Inject prior session history from the history file as a context note.
40
+ */
41
+ loadPriorHistory(entries: NuclearEntry[]): void;
42
+ /** Search Tier 2 archive + Tier 3 history file. */
43
+ search(query: string): Promise<string>;
44
+ /** Expand full content of a nuclear entry by seq number. */
45
+ expand(seq: number): Promise<string>;
46
+ /** Browse nuclear entries (Tier 2) + recent history (Tier 3). */
47
+ browse(): Promise<string>;
48
+ getNuclearEntryCount(): number;
49
+ getRecallArchiveSize(): number;
26
50
  clear(): void;
51
+ private buildNuclearBlock;
52
+ private updateNuclearBlockInMessages;
53
+ private parseTurns;
54
+ private inferPriority;
55
+ private searchArchive;
56
+ private turnToText;
27
57
  }