noumen 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +767 -51
  2. package/dist/a2a/index.d.ts +148 -0
  3. package/dist/a2a/index.js +579 -0
  4. package/dist/a2a/index.js.map +1 -0
  5. package/dist/acp/index.d.ts +129 -0
  6. package/dist/acp/index.js +498 -0
  7. package/dist/acp/index.js.map +1 -0
  8. package/dist/agent-BrkbZyOT.d.ts +1028 -0
  9. package/dist/cache-DVqaCX8v.d.ts +38 -0
  10. package/dist/chunk-2ZTGQLYK.js +356 -0
  11. package/dist/chunk-2ZTGQLYK.js.map +1 -0
  12. package/dist/chunk-42PHHZUA.js +132 -0
  13. package/dist/chunk-42PHHZUA.js.map +1 -0
  14. package/dist/chunk-4SQA2UCV.js +26 -0
  15. package/dist/chunk-4SQA2UCV.js.map +1 -0
  16. package/dist/chunk-5GEX6ZSB.js +179 -0
  17. package/dist/chunk-5GEX6ZSB.js.map +1 -0
  18. package/dist/chunk-7ZMN7XJE.js +94 -0
  19. package/dist/chunk-7ZMN7XJE.js.map +1 -0
  20. package/dist/chunk-AMYIJSAZ.js +57 -0
  21. package/dist/chunk-AMYIJSAZ.js.map +1 -0
  22. package/dist/chunk-BGG2E6JD.js +10 -0
  23. package/dist/chunk-BGG2E6JD.js.map +1 -0
  24. package/dist/chunk-BZSFUEWM.js +43 -0
  25. package/dist/chunk-BZSFUEWM.js.map +1 -0
  26. package/dist/chunk-CPFHEPW4.js +139 -0
  27. package/dist/chunk-CPFHEPW4.js.map +1 -0
  28. package/dist/chunk-D43BWEZA.js +346 -0
  29. package/dist/chunk-D43BWEZA.js.map +1 -0
  30. package/dist/chunk-DGUM43GV.js +11 -0
  31. package/dist/chunk-DGUM43GV.js.map +1 -0
  32. package/dist/chunk-JACGEMTF.js +43 -0
  33. package/dist/chunk-JACGEMTF.js.map +1 -0
  34. package/dist/chunk-JX7CLUCV.js +21 -0
  35. package/dist/chunk-JX7CLUCV.js.map +1 -0
  36. package/dist/chunk-KXDB56YW.js +39 -0
  37. package/dist/chunk-KXDB56YW.js.map +1 -0
  38. package/dist/chunk-KY6ZPWHO.js +112 -0
  39. package/dist/chunk-KY6ZPWHO.js.map +1 -0
  40. package/dist/chunk-NBDFQYUZ.js +7992 -0
  41. package/dist/chunk-NBDFQYUZ.js.map +1 -0
  42. package/dist/chunk-OGXNFXFA.js +196 -0
  43. package/dist/chunk-OGXNFXFA.js.map +1 -0
  44. package/dist/chunk-QTJ7VTJY.js +1994 -0
  45. package/dist/chunk-QTJ7VTJY.js.map +1 -0
  46. package/dist/chunk-UVSSQBDY.js +192 -0
  47. package/dist/chunk-UVSSQBDY.js.map +1 -0
  48. package/dist/chunk-Y45R3PQL.js +684 -0
  49. package/dist/chunk-Y45R3PQL.js.map +1 -0
  50. package/dist/cli/index.d.ts +1 -0
  51. package/dist/cli/index.js +868 -0
  52. package/dist/cli/index.js.map +1 -0
  53. package/dist/client/index.d.ts +64 -0
  54. package/dist/client/index.js +409 -0
  55. package/dist/client/index.js.map +1 -0
  56. package/dist/client-CRRO2376.js +10 -0
  57. package/dist/client-CRRO2376.js.map +1 -0
  58. package/dist/headless-Q7XHHZIW.js +143 -0
  59. package/dist/headless-Q7XHHZIW.js.map +1 -0
  60. package/dist/history-snip-64GYP4ZL.js +12 -0
  61. package/dist/history-snip-64GYP4ZL.js.map +1 -0
  62. package/dist/index.d.ts +1305 -418
  63. package/dist/index.js +384 -1757
  64. package/dist/index.js.map +1 -1
  65. package/dist/jsonrpc/index.d.ts +54 -0
  66. package/dist/jsonrpc/index.js +34 -0
  67. package/dist/jsonrpc/index.js.map +1 -0
  68. package/dist/lsp/index.d.ts +36 -0
  69. package/dist/lsp/index.js +16 -0
  70. package/dist/lsp/index.js.map +1 -0
  71. package/dist/lsp-PS3BWIHC.js +8 -0
  72. package/dist/lsp-PS3BWIHC.js.map +1 -0
  73. package/dist/manager-DLXK63XC.js +8 -0
  74. package/dist/manager-DLXK63XC.js.map +1 -0
  75. package/dist/mcp/index.d.ts +111 -0
  76. package/dist/mcp/index.js +104 -0
  77. package/dist/mcp/index.js.map +1 -0
  78. package/dist/mcp-auth-AEI2R4ZC.js +9 -0
  79. package/dist/mcp-auth-AEI2R4ZC.js.map +1 -0
  80. package/dist/ollama-YNXAYP3R.js +18 -0
  81. package/dist/ollama-YNXAYP3R.js.map +1 -0
  82. package/dist/provider-factory-34MSWJZ3.js +20 -0
  83. package/dist/provider-factory-34MSWJZ3.js.map +1 -0
  84. package/dist/providers/anthropic.d.ts +19 -0
  85. package/dist/providers/anthropic.js +33 -0
  86. package/dist/providers/anthropic.js.map +1 -0
  87. package/dist/providers/bedrock.d.ts +39 -0
  88. package/dist/providers/bedrock.js +54 -0
  89. package/dist/providers/bedrock.js.map +1 -0
  90. package/dist/providers/gemini.d.ts +16 -0
  91. package/dist/providers/gemini.js +224 -0
  92. package/dist/providers/gemini.js.map +1 -0
  93. package/dist/providers/openai.d.ts +18 -0
  94. package/dist/providers/openai.js +8 -0
  95. package/dist/providers/openai.js.map +1 -0
  96. package/dist/providers/openrouter.d.ts +16 -0
  97. package/dist/providers/openrouter.js +23 -0
  98. package/dist/providers/openrouter.js.map +1 -0
  99. package/dist/providers/vertex.d.ts +40 -0
  100. package/dist/providers/vertex.js +64 -0
  101. package/dist/providers/vertex.js.map +1 -0
  102. package/dist/render-GRN4ZSSW.js +14 -0
  103. package/dist/render-GRN4ZSSW.js.map +1 -0
  104. package/dist/resolve-XM52G7YE.js +14 -0
  105. package/dist/resolve-XM52G7YE.js.map +1 -0
  106. package/dist/server/index.d.ts +128 -0
  107. package/dist/server/index.js +626 -0
  108. package/dist/server/index.js.map +1 -0
  109. package/dist/server-Cg1yWGaV.d.ts +96 -0
  110. package/dist/spinner-OJNR6NFO.js +8 -0
  111. package/dist/spinner-OJNR6NFO.js.map +1 -0
  112. package/dist/types-2kTLUCnD.d.ts +107 -0
  113. package/dist/types-3c88cRKH.d.ts +547 -0
  114. package/dist/types-CwKKucOF.d.ts +620 -0
  115. package/dist/types-DwdzmXfs.d.ts +107 -0
  116. package/dist/types-NIyVwQ4h.d.ts +109 -0
  117. package/dist/types-QwfylltH.d.ts +71 -0
  118. package/package.json +134 -6
package/README.md CHANGED
@@ -1,8 +1,12 @@
1
- # noumen
1
+ # noumen 🐍
2
2
 
3
- Programmatic AI coding agent library with pluggable providers and virtual infrastructure.
3
+ The coding agent you `npm install`.
4
4
 
5
- `noumen` gives you a headless, API-only coding agent that can read, write, edit files, run shell commands, and search codebasesall backed by swappable AI providers (OpenAI, Anthropic, Google Gemini) and virtual filesystems/computers (local Node.js, [sprites.dev](https://sprites.dev) containers).
5
+ `noumen` gives you the full agentic coding loop tool execution, file editing, shell commands, context compaction, and session managementwith sandboxed virtual infrastructure that isolates your agent from the host machine.
6
+
7
+ Any provider. Any sandbox. One package.
8
+
9
+ **[Documentation](https://noumen.dev)** · **[npm](https://www.npmjs.com/package/noumen)** · **[GitHub](https://github.com/UpstreetAI/noumen)**
6
10
 
7
11
  ## Install
8
12
 
@@ -10,25 +14,55 @@ Programmatic AI coding agent library with pluggable providers and virtual infras
10
14
  pnpm add noumen
11
15
  ```
12
16
 
17
+ Then install the provider SDK you need:
18
+
19
+ ```bash
20
+ pnpm add openai # for OpenAI / OpenRouter / Ollama
21
+ pnpm add @anthropic-ai/sdk # for Anthropic
22
+ pnpm add @google/genai # for Gemini
23
+ # Ollama requires no SDK — just install https://ollama.com
24
+ ```
25
+
13
26
  ## Quick Start
14
27
 
15
28
  ```typescript
16
- import {
17
- Code,
18
- OpenAIProvider,
19
- LocalFs,
20
- LocalComputer,
21
- } from "noumen";
29
+ import { Agent } from "noumen";
30
+
31
+ const agent = new Agent({ provider: "anthropic", cwd: "." });
32
+
33
+ for await (const event of agent.run("Add a health-check endpoint to server.ts")) {
34
+ if (event.type === "text_delta") process.stdout.write(event.text);
35
+ }
36
+ ```
37
+
38
+ Three lines to a working coding agent. The string provider auto-detects your `ANTHROPIC_API_KEY` from the environment, and `cwd` defaults to a local sandbox.
39
+
40
+ ### Execute (run to completion)
41
+
42
+ ```typescript
43
+ const result = await agent.execute("Fix the auth bug", {
44
+ onText: (text) => process.stdout.write(text),
45
+ onToolUse: (name) => console.log(`Using ${name}`),
46
+ });
47
+ console.log(`Done — ${result.toolCalls} tool calls`);
48
+ ```
49
+
50
+ `agent.run()` streams events via an async generator. `agent.execute()` runs to completion and returns a `RunResult` — callbacks are optional event listeners along the way.
51
+
52
+ ### Full control
53
+
54
+ ```typescript
55
+ import { Agent, LocalSandbox } from "noumen";
56
+ import { OpenAIProvider } from "noumen/openai";
22
57
 
23
- const code = new Code({
24
- aiProvider: new OpenAIProvider({ apiKey: process.env.OPENAI_API_KEY }),
25
- virtualFs: new LocalFs({ basePath: "/my/project" }),
26
- virtualComputer: new LocalComputer({ defaultCwd: "/my/project" }),
58
+ const agent = new Agent({
59
+ provider: new OpenAIProvider({ apiKey: process.env.OPENAI_API_KEY }),
60
+ sandbox: LocalSandbox({ cwd: "/my/project" }),
27
61
  });
28
62
 
29
- const thread = code.createThread();
63
+ const thread = agent.createThread();
30
64
 
31
- for await (const event of thread.run("Add a health-check endpoint to server.ts")) {
65
+ for await (const event of thread.run("Refactor the auth module")) {
32
66
  switch (event.type) {
33
67
  case "text_delta":
34
68
  process.stdout.write(event.text);
@@ -43,12 +77,182 @@ for await (const event of thread.run("Add a health-check endpoint to server.ts")
43
77
  }
44
78
  ```
45
79
 
80
+ ## Presets
81
+
82
+ For zero-config setup, use a preset that configures everything for you:
83
+
84
+ ```typescript
85
+ import { codingAgent } from "noumen";
86
+ import { OpenAIProvider } from "noumen/openai";
87
+
88
+ const agent = codingAgent({
89
+ provider: new OpenAIProvider({ apiKey: process.env.OPENAI_API_KEY! }),
90
+ cwd: "/my/project",
91
+ });
92
+
93
+ await agent.init();
94
+ const thread = agent.createThread();
95
+
96
+ for await (const event of thread.run("Refactor the auth module")) {
97
+ if (event.type === "text_delta") process.stdout.write(event.text);
98
+ }
99
+
100
+ await agent.close();
101
+ ```
102
+
103
+ Three presets are available:
104
+
105
+ | Preset | Mode | Includes |
106
+ |--------|------|----------|
107
+ | `codingAgent` | `default` | Subagents, tasks, plan mode, auto-compact, retry, cost tracking, project context |
108
+ | `planningAgent` | `plan` | Read-only exploration, plan mode enabled |
109
+ | `reviewAgent` | `plan` | Read-only + web search for documentation lookups |
110
+
111
+ ## CLI
112
+
113
+ noumen ships a CLI for using the agent directly from the terminal, with any provider.
114
+
115
+ ```bash
116
+ # Interactive mode — auto-detects provider from env vars
117
+ npx noumen
118
+
119
+ # One-shot with a specific provider
120
+ npx noumen -p anthropic "Add error handling to server.ts"
121
+
122
+ # Pipe input
123
+ cat plan.md | npx noumen -p openai
124
+
125
+ # JSONL output for scripting
126
+ npx noumen --json -c "List all TODO comments" > events.jsonl
127
+ ```
128
+
129
+ ### Setup
130
+
131
+ ```bash
132
+ noumen init
133
+ ```
134
+
135
+ This creates `.noumen/config.json` with your provider and model choice. The CLI also reads `NOUMEN.md` files for project instructions (see [Project Context](#project-context)).
136
+
137
+ ### Config file
138
+
139
+ ```json
140
+ {
141
+ "provider": "anthropic",
142
+ "model": "claude-sonnet-4",
143
+ "permissions": "acceptEdits"
144
+ }
145
+ ```
146
+
147
+ Place in `.noumen/config.json` at your project root. The CLI walks up from the working directory to find it.
148
+
149
+ ### Flags
150
+
151
+ | Flag | Description |
152
+ |------|-------------|
153
+ | `-p, --provider` | `openai`, `anthropic`, `gemini`, `openrouter`, `bedrock`, `vertex`, `ollama` |
154
+ | `-m, --model` | Model name (provider-specific default if omitted) |
155
+ | `--api-key` | Override API key |
156
+ | `--base-url` | Override provider base URL |
157
+ | `-c, --prompt` | One-shot prompt (non-interactive) |
158
+ | `--permission` | Permission mode: `default`, `plan`, `acceptEdits`, `auto`, `bypassPermissions`, `dontAsk` |
159
+ | `--thinking` | Thinking level: `off`, `low`, `medium`, `high` |
160
+ | `--max-turns` | Max agent turns before stopping |
161
+ | `--json` | Emit JSONL stream events to stdout |
162
+ | `--quiet` | Only output final text |
163
+ | `--verbose` | Show tool calls and thinking |
164
+ | `--cwd` | Working directory |
165
+
166
+ ### API key resolution
167
+
168
+ 1. `--api-key` flag
169
+ 2. Provider-specific env var (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GEMINI_API_KEY`, `OPENROUTER_API_KEY`)
170
+ 3. `NOUMEN_API_KEY` generic env var
171
+ 4. `.noumen/config.json` `apiKey` field
172
+
173
+ Ollama, Bedrock, and Vertex do not require an API key.
174
+
175
+ ### Commands
176
+
177
+ | Command | Description |
178
+ |---------|-------------|
179
+ | `noumen init` | Create `.noumen/config.json` |
180
+ | `noumen sessions` | List past sessions |
181
+ | `noumen resume <id>` | Resume a previous session (prefix match) |
182
+
183
+ ## Embedding
184
+
185
+ noumen is a library first. Six integration patterns:
186
+
187
+ **In-process** — `Agent` + `Thread.run()` async iterator, direct import:
188
+
189
+ ```typescript
190
+ const thread = agent.createThread();
191
+ for await (const event of thread.run("Fix the bug")) {
192
+ if (event.type === "text_delta") process.stdout.write(event.text);
193
+ }
194
+ ```
195
+
196
+ **HTTP/SSE server** — expose the agent over HTTP:
197
+
198
+ ```typescript
199
+ import { createServer } from "noumen/server";
200
+ const server = createServer(agent, { port: 3001, auth: { type: "bearer", token: "..." } });
201
+ await server.start();
202
+ ```
203
+
204
+ **Middleware** — mount on Express, Fastify, or Hono:
205
+
206
+ ```typescript
207
+ import { createRequestHandler } from "noumen/server";
208
+ app.use("/agent", createRequestHandler(agent, { auth: { type: "bearer", token: "..." } }));
209
+ ```
210
+
211
+ **WebSocket** — bidirectional with permission handling:
212
+
213
+ ```typescript
214
+ import { NoumenClient } from "noumen/client";
215
+ const client = new NoumenClient({ baseUrl: "http://localhost:3001", transport: "ws" });
216
+ for await (const event of client.run("Deploy to staging")) { /* ... */ }
217
+ ```
218
+
219
+ **Headless CLI** — NDJSON subprocess control from any language:
220
+
221
+ ```bash
222
+ npx noumen --headless -p anthropic <<< '{"type":"prompt","text":"Fix the bug"}'
223
+ ```
224
+
225
+ **Frameworks** — Next.js API routes, Electron IPC, VS Code extensions. See the [full embedding guide](https://noumen.dev/docs/embedding) and [Server API Reference](https://noumen.dev/docs/server-api).
226
+
227
+ **Health checks** — verify all integrations work before running:
228
+
229
+ ```typescript
230
+ const result = await agent.diagnose();
231
+ // {
232
+ // overall: true,
233
+ // provider: { ok: true, latencyMs: 342, model: "claude-sonnet-4" },
234
+ // sandbox: {
235
+ // fs: { ok: true, latencyMs: 2 },
236
+ // computer: { ok: true, latencyMs: 45 },
237
+ // },
238
+ // mcp: { filesystem: { ok: true, latencyMs: 0, status: "connected", toolCount: 5 } },
239
+ // lsp: {},
240
+ // timestamp: "2026-04-04T12:00:00.000Z",
241
+ // }
242
+ ```
243
+
244
+ Or from the CLI:
245
+
246
+ ```bash
247
+ npx noumen doctor
248
+ ```
249
+
46
250
  ## Providers
47
251
 
48
252
  ### OpenAI
49
253
 
50
254
  ```typescript
51
- import { OpenAIProvider } from "noumen";
255
+ import { OpenAIProvider } from "noumen/openai";
52
256
 
53
257
  const provider = new OpenAIProvider({
54
258
  apiKey: "sk-...",
@@ -60,18 +264,18 @@ const provider = new OpenAIProvider({
60
264
  ### Anthropic
61
265
 
62
266
  ```typescript
63
- import { AnthropicProvider } from "noumen";
267
+ import { AnthropicProvider } from "noumen/anthropic";
64
268
 
65
269
  const provider = new AnthropicProvider({
66
270
  apiKey: "sk-ant-...",
67
- model: "claude-sonnet-4-20250514", // default
271
+ model: "claude-sonnet-4", // default
68
272
  });
69
273
  ```
70
274
 
71
275
  ### Google Gemini
72
276
 
73
277
  ```typescript
74
- import { GeminiProvider } from "noumen";
278
+ import { GeminiProvider } from "noumen/gemini";
75
279
 
76
280
  const provider = new GeminiProvider({
77
281
  apiKey: "...", // Google AI Studio API key
@@ -79,54 +283,248 @@ const provider = new GeminiProvider({
79
283
  });
80
284
  ```
81
285
 
82
- ## Virtual Infrastructure
286
+ ### OpenRouter
287
+
288
+ ```typescript
289
+ import { OpenRouterProvider } from "noumen/openrouter";
290
+
291
+ const provider = new OpenRouterProvider({
292
+ apiKey: "sk-or-...",
293
+ model: "anthropic/claude-sonnet-4", // default
294
+ appName: "My Agent", // optional, for openrouter.ai rankings
295
+ appUrl: "https://myapp.com", // optional
296
+ });
297
+ ```
298
+
299
+ ### AWS Bedrock (Anthropic)
83
300
 
84
- ### Local (Node.js)
301
+ Route Anthropic models through AWS Bedrock. Requires `@anthropic-ai/bedrock-sdk`:
85
302
 
86
- Backed by `fs/promises` and `child_process`:
303
+ ```bash
304
+ pnpm add @anthropic-ai/bedrock-sdk
305
+ ```
87
306
 
88
307
  ```typescript
89
- import { LocalFs, LocalComputer } from "noumen";
308
+ import { BedrockAnthropicProvider } from "noumen/bedrock";
309
+
310
+ const provider = new BedrockAnthropicProvider({
311
+ region: "us-west-2", // default: us-east-1
312
+ model: "us.anthropic.claude-sonnet-4-v1:0", // default
313
+ credentials: { // optional, falls back to default chain
314
+ accessKeyId: process.env.AWS_ACCESS_KEY_ID!,
315
+ secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!,
316
+ sessionToken: process.env.AWS_SESSION_TOKEN,
317
+ },
318
+ cacheControl: { enabled: true }, // optional prompt caching
319
+ });
320
+ ```
321
+
322
+ When `credentials` is omitted, the SDK uses the standard AWS credential chain (env vars, `~/.aws/credentials`, IAM roles, etc.).
323
+
324
+ ### Google Vertex AI (Anthropic)
90
325
 
91
- const fs = new LocalFs({ basePath: "/my/project" });
92
- const computer = new LocalComputer({ defaultCwd: "/my/project" });
326
+ Route Anthropic models through Google Cloud Vertex AI. Requires `@anthropic-ai/vertex-sdk` and `google-auth-library`:
327
+
328
+ ```bash
329
+ pnpm add @anthropic-ai/vertex-sdk google-auth-library
93
330
  ```
94
331
 
95
- ### sprites.dev
332
+ ```typescript
333
+ import { VertexAnthropicProvider } from "noumen/vertex";
96
334
 
97
- Run inside a remote [sprites.dev](https://docs.sprites.dev) container:
335
+ const provider = new VertexAnthropicProvider({
336
+ projectId: "my-gcp-project",
337
+ region: "us-east5", // default
338
+ model: "claude-sonnet-4", // default
339
+ cacheControl: { enabled: true }, // optional prompt caching
340
+ });
341
+ ```
342
+
343
+ When `googleAuth` is omitted, the provider creates a `GoogleAuth` instance using application default credentials. You can pass your own `googleAuth` instance for custom authentication:
98
344
 
99
345
  ```typescript
100
- import { SpritesFs, SpritesComputer } from "noumen";
346
+ import { GoogleAuth } from "google-auth-library";
101
347
 
102
- const fs = new SpritesFs({
103
- token: process.env.SPRITE_TOKEN,
104
- spriteName: "my-sprite",
348
+ const provider = new VertexAnthropicProvider({
349
+ projectId: "my-project",
350
+ googleAuth: new GoogleAuth({ keyFile: "/path/to/service-account.json" }),
105
351
  });
352
+ ```
106
353
 
107
- const computer = new SpritesComputer({
354
+ ### Ollama (Local)
355
+
356
+ Run models locally with [Ollama](https://ollama.com). No API key needed — just install Ollama and pull a model:
357
+
358
+ ```bash
359
+ ollama pull qwen2.5-coder:32b
360
+ ollama serve
361
+ ```
362
+
363
+ ```typescript
364
+ import { OllamaProvider } from "noumen/ollama";
365
+
366
+ const provider = new OllamaProvider({
367
+ model: "qwen2.5-coder:32b", // default
368
+ baseURL: "http://localhost:11434/v1", // default
369
+ });
370
+ ```
371
+
372
+ The CLI auto-detects a running Ollama server when no cloud API keys are set, so you can simply run `noumen` with Ollama serving in the background.
373
+
374
+ ## Sandboxes
375
+
376
+ A `Sandbox` bundles a `VirtualFs` (filesystem) and `VirtualComputer` (shell execution) into one object. Every file read/write and shell command the agent executes goes through these interfaces — swap the sandbox to control what the agent can access.
377
+
378
+ ### Local — OS-level sandboxing
379
+
380
+ Backed by `@anthropic-ai/sandbox-runtime`. Uses macOS Seatbelt or Linux bubblewrap to restrict filesystem and network access at the OS level — no containers needed:
381
+
382
+ ```bash
383
+ pnpm add @anthropic-ai/sandbox-runtime
384
+ ```
385
+
386
+ ```typescript
387
+ import { LocalSandbox } from "noumen";
388
+
389
+ const sandbox = LocalSandbox({ cwd: "/my/project" });
390
+
391
+ // Customize restrictions:
392
+ const restricted = LocalSandbox({
393
+ cwd: "/my/project",
394
+ sandbox: {
395
+ filesystem: { denyRead: ["/etc/shadow"] },
396
+ network: { allowedDomains: ["api.openai.com"] },
397
+ },
398
+ });
399
+ ```
400
+
401
+ Defaults: writes allowed only in `cwd`, reads allowed everywhere, network unrestricted.
402
+
403
+ ### UnsandboxedLocal — no isolation
404
+
405
+ Backed by `fs/promises` and `child_process` with no OS-level restrictions. Use for development or trusted environments:
406
+
407
+ ```typescript
408
+ import { UnsandboxedLocal } from "noumen";
409
+
410
+ const sandbox = UnsandboxedLocal({ cwd: "/my/project" });
411
+ ```
412
+
413
+ ### sprites.dev — full sandbox
414
+
415
+ Run inside a remote [sprites.dev](https://docs.sprites.dev) container. The agent has no access to the host machine:
416
+
417
+ ```typescript
418
+ import { SpritesSandbox } from "noumen";
419
+
420
+ const sandbox = SpritesSandbox({
108
421
  token: process.env.SPRITE_TOKEN,
109
422
  spriteName: "my-sprite",
110
423
  });
111
424
  ```
112
425
 
426
+ ### Docker — container isolation
427
+
428
+ Run the agent inside a Docker container. Requires `dockerode` as an optional peer dependency:
429
+
430
+ ```bash
431
+ pnpm add dockerode
432
+ ```
433
+
434
+ ```typescript
435
+ import Docker from "dockerode";
436
+ import { DockerSandbox } from "noumen";
437
+
438
+ const docker = new Docker();
439
+ const container = await docker.createContainer({
440
+ Image: "node:22",
441
+ Cmd: ["sleep", "infinity"],
442
+ Tty: false,
443
+ });
444
+ await container.start();
445
+
446
+ const sandbox = DockerSandbox({
447
+ container,
448
+ cwd: "/workspace",
449
+ });
450
+
451
+ // Use the sandbox normally — all commands/files run inside the container
452
+ const agent = new Agent({ provider, sandbox });
453
+
454
+ // Clean up when done
455
+ await container.stop();
456
+ await container.remove();
457
+ ```
458
+
459
+ You are responsible for container lifecycle (create, start, stop, remove). The sandbox just wraps the running container.
460
+
461
+ ### E2B — cloud sandbox
462
+
463
+ Run the agent inside an [E2B](https://e2b.dev) cloud sandbox. Requires `e2b` as an optional peer dependency:
464
+
465
+ ```bash
466
+ pnpm add e2b
467
+ ```
468
+
469
+ ```typescript
470
+ import { Sandbox as E2BSandboxSDK } from "e2b";
471
+ import { E2BSandbox } from "noumen";
472
+
473
+ const e2b = await E2BSandboxSDK.create();
474
+
475
+ const sandbox = E2BSandbox({
476
+ sandbox: e2b,
477
+ cwd: "/home/user",
478
+ });
479
+
480
+ const agent = new Agent({ provider, sandbox });
481
+
482
+ // Clean up when done
483
+ await e2b.close();
484
+ ```
485
+
486
+ You are responsible for sandbox lifecycle (create, close). The adapter maps `VirtualFs` and `VirtualComputer` to E2B's `files` and `commands` APIs.
487
+
488
+ ### Custom sandboxes
489
+
490
+ Implement `VirtualFs` and `VirtualComputer` to target any execution environment — Daytona, cloud VMs, or an in-memory test harness. A custom `Sandbox` is any object with `{ fs, computer }`:
491
+
492
+ ```typescript
493
+ import type { Sandbox } from "noumen";
494
+
495
+ const sandbox: Sandbox = {
496
+ fs: new MyCustomFs(),
497
+ computer: new MyCustomComputer(),
498
+ };
499
+ ```
500
+
501
+ The interfaces are intentionally minimal (one method for shell, eight for filesystem) so adapters are straightforward to write.
502
+
113
503
  ## Options
114
504
 
115
505
  ```typescript
116
- const code = new Code({
117
- aiProvider,
118
- virtualFs,
119
- virtualComputer,
506
+ const agent = new Agent({
507
+ provider: "anthropic",
508
+ cwd: "/my/project",
120
509
  options: {
121
510
  sessionDir: ".noumen/sessions", // JSONL transcript storage path
122
- model: "gpt-4o", // default model
123
- maxTokens: 8192, // max output tokens per turn
124
- autoCompact: true, // auto-compact when context is large
125
- autoCompactThreshold: 100_000, // token threshold for auto-compact
126
- systemPrompt: "...", // override the built-in system prompt
127
- cwd: "/working/dir", // working directory for tools
511
+ model: "claude-sonnet-4", // default model
512
+ maxTokens: 8192, // max output tokens per turn
513
+ autoCompact: true, // auto-compact when context is large
514
+ autoCompactThreshold: 100_000, // token threshold for auto-compact
515
+ systemPrompt: "...", // override the built-in system prompt
128
516
  skills: [{ name: "...", content: "..." }],
129
- skillsPaths: [".claude/skills"], // paths to SKILL.md files on virtualFs
517
+ skillsPaths: [".claude/skills"], // paths to SKILL.md files on the sandbox filesystem
518
+ projectContext: true, // load NOUMEN.md / CLAUDE.md from project
519
+
520
+ // Extended thinking / reasoning (see below)
521
+ thinking: { type: "enabled", budgetTokens: 10000 },
522
+
523
+ // Retry / error resilience (see below)
524
+ retry: true, // use defaults, or pass a RetryConfig
525
+
526
+ // Cost tracking (see below)
527
+ costTracking: { enabled: true },
130
528
  },
131
529
  });
132
530
  ```
@@ -135,10 +533,10 @@ const code = new Code({
135
533
 
136
534
  ```typescript
137
535
  // New thread
138
- const thread = code.createThread();
536
+ const thread = agent.createThread();
139
537
 
140
538
  // Resume an existing session
141
- const thread = code.createThread({ sessionId: "abc-123", resume: true });
539
+ const thread = agent.createThread({ sessionId: "abc-123", resume: true });
142
540
 
143
541
  // Run a prompt (returns an async iterable of stream events)
144
542
  for await (const event of thread.run("Fix the failing test")) {
@@ -160,16 +558,45 @@ thread.abort();
160
558
  | Event | Fields | Description |
161
559
  |-------|--------|-------------|
162
560
  | `text_delta` | `text` | Incremental text from the model |
561
+ | `thinking_delta` | `text` | Incremental thinking/reasoning text from the model |
163
562
  | `tool_use_start` | `toolName`, `toolUseId` | Model is calling a tool |
164
563
  | `tool_use_delta` | `input` | Incremental tool call arguments |
165
564
  | `tool_result` | `toolUseId`, `toolName`, `result` | Tool execution result |
166
565
  | `message_complete` | `message` | Full assistant message |
566
+ | `usage` | `usage`, `model` | Token usage for a single model call |
567
+ | `cost_update` | `summary` | Updated cost summary after each model call |
568
+ | `turn_complete` | `usage`, `model`, `callCount` | Accumulated usage for the full agent turn |
569
+ | `retry_attempt` | `attempt`, `maxRetries`, `delayMs`, `error` | A retryable error occurred; waiting before retry |
570
+ | `retry_exhausted` | `attempts`, `error` | All retries exhausted |
167
571
  | `compact_start` | | Auto-compaction started |
168
572
  | `compact_complete` | | Auto-compaction finished |
573
+ | `microcompact_complete` | `tokensFreed` | Microcompaction freed tokens from tool results |
574
+ | `tool_result_truncated` | `toolCallId`, `originalChars`, `truncatedChars` | A tool result was truncated by the budget system |
575
+ | `permission_request` | `toolName`, `input`, `message` | Tool call requires user approval |
576
+ | `permission_granted` | `toolName`, `input` | Permission was granted for a tool call |
577
+ | `permission_denied` | `toolName`, `input`, `message` | Permission was denied for a tool call |
578
+ | `denial_limit_exceeded` | `consecutiveDenials`, `totalDenials` | Denial tracking limits hit |
579
+ | `user_input_request` | `toolUseId`, `question` | The agent is asking the user a question |
580
+ | `subagent_start` | `toolUseId`, `prompt` | A subagent is being spawned |
581
+ | `subagent_end` | `toolUseId`, `result` | A subagent finished |
582
+ | `session_resumed` | `sessionId`, `messageCount` | A previous session was restored |
583
+ | `checkpoint_snapshot` | `messageId` | A file checkpoint was taken before edits |
584
+ | `recovery_filtered` | `filterName`, `removedCount` | Corrupt entries were filtered during session restore |
585
+ | `interrupted_turn_detected` | `kind` | A previous turn was interrupted (`interrupted_tool` or `interrupted_prompt`) |
586
+ | `memory_update` | `created`, `updated`, `deleted` | Memories were extracted from the conversation |
587
+ | `span_start` | `name`, `spanId` | An OpenTelemetry-compatible span started |
588
+ | `span_end` | `name`, `spanId`, `durationMs`, `error?` | A span ended |
589
+ | `git_operation` | `operation`, `details` | A git operation was detected |
590
+ | `structured_output` | `data`, `schema` | Structured output was produced |
591
+ | `max_turns_reached` | `maxTurns`, `turnCount` | The agent hit the maxTurns limit |
169
592
  | `error` | `error` | An error occurred |
170
593
 
594
+ See **[noumen.dev/docs/stream-events](https://noumen.dev/docs/stream-events)** for the full event reference.
595
+
171
596
  ## Built-in Tools
172
597
 
598
+ ### Core tools (always available)
599
+
173
600
  | Tool | Description |
174
601
  |------|-------------|
175
602
  | **ReadFile** | Read files with line numbers, offset/limit support |
@@ -178,16 +605,148 @@ thread.abort();
178
605
  | **Bash** | Execute shell commands |
179
606
  | **Glob** | Find files by glob pattern (via ripgrep) |
180
607
  | **Grep** | Search file contents by regex (via ripgrep) |
608
+ | **WebFetch** | Fetch a URL and return contents as markdown |
609
+ | **NotebookEdit** | Edit Jupyter notebook cells (replace, insert, delete) |
610
+ | **AskUser** | Ask the user a question and wait for a response |
611
+
612
+ ### Optional tools (enabled via Agent options)
613
+
614
+ | Tool | Requires | Description |
615
+ |------|----------|-------------|
616
+ | **Agent** | `enableSubagents` | Spawn an isolated subagent for focused subtasks |
617
+ | **Skill** | `skills` / `skillsPaths` | Invoke a named skill with arguments |
618
+ | **TaskCreate** | `enableTasks` | Create a work item for tracking |
619
+ | **TaskList** | `enableTasks` | List all tasks with status |
620
+ | **TaskGet** | `enableTasks` | Get task details by ID |
621
+ | **TaskUpdate** | `enableTasks` | Update task status/description |
622
+ | **EnterPlanMode** | `enablePlanMode` | Switch to read-only exploration mode |
623
+ | **ExitPlanMode** | `enablePlanMode` | Return to normal mode with optional plan |
624
+ | **EnterWorktree** | `enableWorktrees` | Create an isolated git worktree |
625
+ | **ExitWorktree** | `enableWorktrees` | Leave and optionally clean up worktree |
626
+ | **LSP** | `lsp` config | Query language servers (definitions, references, hover) |
627
+ | **WebSearch** | `webSearch` config | Search the web via a user-provided backend |
628
+ | **ToolSearch** | `toolSearch` | Discover deferred tools on demand (reduces context usage) |
629
+
630
+ ## Extended Thinking
631
+
632
+ Enable model reasoning/thinking for supported providers. Each provider maps the config to its native format:
633
+
634
+ - **Anthropic**: Sets `thinking.budget_tokens` on the API call
635
+ - **OpenAI**: Maps to `reasoning_effort: "high"` for o-series models
636
+ - **Gemini**: Sets `thinkingConfig.thinkingBudget`
637
+
638
+ ```typescript
639
+ const agent = new Agent({
640
+ provider: "anthropic",
641
+ cwd: ".",
642
+ options: {
643
+ thinking: { type: "enabled", budgetTokens: 10000 },
644
+ },
645
+ });
646
+
647
+ for await (const event of thread.run("Solve this complex problem")) {
648
+ if (event.type === "thinking_delta") {
649
+ process.stderr.write(event.text); // reasoning trace
650
+ }
651
+ if (event.type === "text_delta") {
652
+ process.stdout.write(event.text); // final answer
653
+ }
654
+ }
655
+ ```
656
+
657
+ Disable explicitly with `{ type: "disabled" }`, or omit the option entirely for default behavior.
658
+
659
+ ## Retry / Error Resilience
660
+
661
+ Automatic retries with exponential backoff, Retry-After header support, context overflow recovery, and model fallback. Handles 429 (rate limit), 529 (overloaded), 500/502/503 (server errors), and connection failures.
662
+
663
+ ```typescript
664
+ const agent = new Agent({
665
+ provider: "anthropic",
666
+ cwd: ".",
667
+ options: {
668
+ retry: true, // use sensible defaults
669
+ },
670
+ });
671
+
672
+ // Or customize:
673
+ const agent2 = new Agent({
674
+ provider: "anthropic",
675
+ cwd: ".",
676
+ options: {
677
+ retry: {
678
+ maxRetries: 10,
679
+ baseDelayMs: 500,
680
+ maxDelayMs: 32000,
681
+ retryableStatuses: [408, 429, 500, 502, 503, 529],
682
+ fallbackModel: "gpt-4o-mini", // switch model after repeated 529s
683
+ maxConsecutiveOverloaded: 3,
684
+ onRetry: (attempt, error, delayMs) => {
685
+ console.log(`Retry ${attempt}, waiting ${delayMs}ms: ${error.message}`);
686
+ },
687
+ },
688
+ },
689
+ });
690
+ ```
691
+
692
+ On context overflow (input + max_tokens > context limit), the engine automatically reduces `max_tokens` and retries — no manual intervention needed.
693
+
694
+ ## Cost Tracking
695
+
696
+ Track token usage and estimate USD costs across all model calls. Includes built-in pricing for Claude, GPT-4o, Gemini, and o-series models.
697
+
698
+ ```typescript
699
+ const agent = new Agent({
700
+ provider: "anthropic",
701
+ cwd: ".",
702
+ options: {
703
+ costTracking: { enabled: true },
704
+ },
705
+ });
706
+
707
+ const thread = agent.createThread();
708
+
709
+ for await (const event of thread.run("Refactor the auth module")) {
710
+ if (event.type === "cost_update") {
711
+ console.log(`Running cost: $${event.summary.totalCostUSD.toFixed(4)}`);
712
+ }
713
+ }
714
+
715
+ // Or get the summary at any time
716
+ const summary = agent.getCostSummary();
717
+ console.log(`Total: $${summary.totalCostUSD.toFixed(4)}`);
718
+ console.log(`Input tokens: ${summary.totalInputTokens}`);
719
+ console.log(`Output tokens: ${summary.totalOutputTokens}`);
720
+ ```
721
+
722
+ Supply custom pricing for unlisted models:
723
+
724
+ ```typescript
725
+ const agent = new Agent({
726
+ provider: "anthropic",
727
+ cwd: ".",
728
+ options: {
729
+ costTracking: {
730
+ enabled: true,
731
+ pricing: {
732
+ "my-custom-model": {
733
+ inputTokens: 1, // USD per 1M tokens
734
+ outputTokens: 3,
735
+ },
736
+ },
737
+ },
738
+ },
739
+ });
740
+ ```
181
741
 
182
742
  ## Skills
183
743
 
184
744
  Skills are markdown instructions injected into the system prompt. Provide them inline or load from `SKILL.md` files on the virtual filesystem:
185
745
 
186
746
  ```typescript
187
- const code = new Code({
188
- aiProvider,
189
- virtualFs,
190
- virtualComputer,
747
+ const agent = new Agent({
748
+ provider: "anthropic",
749
+ cwd: ".",
191
750
  options: {
192
751
  skills: [
193
752
  { name: "Testing", content: "Always write vitest tests for new code." },
@@ -197,19 +756,176 @@ const code = new Code({
197
756
  });
198
757
 
199
758
  // If using skillsPaths, call init() to pre-load them
200
- await code.init();
759
+ await agent.init();
201
760
  ```
202
761
 
762
+ ## Project Context (NOUMEN.md / CLAUDE.md)
763
+
764
+ Drop a `NOUMEN.md` or `CLAUDE.md` in your project root to give the agent persistent instructions:
765
+
766
+ ```markdown
767
+ # Project instructions
768
+
769
+ This is a TypeScript monorepo. Use strict mode. Write vitest tests for all new code.
770
+ ```
771
+
772
+ Enable it with `projectContext: true` in your `Agent` options. The loader discovers context files from four layers — managed (enterprise), user (`~/.noumen/`), project (repo ancestors), and local (`.local.md`, gitignored) — so you can scope instructions at any level.
773
+
774
+ This is fully compatible with `CLAUDE.md`. If your project already has one, noumen picks it up automatically. Both `NOUMEN.md` and `CLAUDE.md` can coexist in the same directory. The format supports `@path` includes, conditional rules via `paths:` frontmatter in `.noumen/rules/` directories, and hierarchical overriding.
775
+
776
+ See **[noumen.dev/docs/context](https://noumen.dev/docs/context)** for full configuration options.
777
+
203
778
  ## Sessions
204
779
 
205
780
  Conversations are persisted as JSONL files on the virtual filesystem. Each line is a serialized message entry. Compaction writes a boundary marker followed by a summary, so resumed sessions only load post-boundary messages.
206
781
 
207
782
  ```typescript
208
783
  // List all saved sessions
209
- const sessions = await code.listSessions();
784
+ const sessions = await agent.listSessions();
210
785
  // [{ sessionId, createdAt, lastMessageAt, title?, messageCount }]
211
786
  ```
212
787
 
788
+ ## Hooks
789
+
790
+ 18 hook events across six categories — intercept tool calls, session lifecycle, permissions, file writes, model switches, compaction, retry, memory, and errors:
791
+
792
+ ```typescript
793
+ const agent = new Agent({
794
+ provider: "anthropic", cwd: ".",
795
+ options: {
796
+ hooks: [
797
+ {
798
+ event: "SessionStart",
799
+ handler: async (input) => {
800
+ console.log(`Session ${input.sessionId} started (resume: ${input.isResume})`);
801
+ },
802
+ },
803
+ {
804
+ event: "PreToolUse",
805
+ matcher: "Bash",
806
+ handler: async (input) => {
807
+ console.log(`Bash: ${input.toolInput.command}`);
808
+ return { decision: "allow" };
809
+ },
810
+ },
811
+ {
812
+ event: "FileWrite",
813
+ handler: async (input) => {
814
+ console.log(`${input.toolName} wrote ${input.filePath}`);
815
+ },
816
+ },
817
+ {
818
+ event: "PermissionDenied",
819
+ handler: async (input) => {
820
+ console.log(`Denied ${input.toolName}: ${input.reason}`);
821
+ },
822
+ },
823
+ ],
824
+ },
825
+ });
826
+ ```
827
+
828
+ | Category | Events |
829
+ |----------|--------|
830
+ | Session lifecycle | `SessionStart`, `SessionEnd`, `TurnStart`, `TurnEnd`, `Error` |
831
+ | Tool execution | `PreToolUse`, `PostToolUse`, `PostToolUseFailure`, `FileWrite` |
832
+ | Permissions | `PermissionRequest`, `PermissionDenied` |
833
+ | Subagents | `SubagentStart`, `SubagentStop` |
834
+ | Compaction | `PreCompact`, `PostCompact` |
835
+ | System | `ModelSwitch`, `RetryAttempt`, `MemoryUpdate` |
836
+
837
+ See the [hooks documentation](https://noumen.dev/docs/hooks) for full details on each event.
838
+
839
+ ## Permissions
840
+
841
+ Control what tools the agent can use with modes and rules:
842
+
843
+ ```typescript
844
+ options: {
845
+ permissions: {
846
+ mode: "default", // or "plan", "acceptEdits", "auto", "bypassPermissions", "dontAsk"
847
+ rules: [
848
+ { toolName: "Bash", behavior: "ask", source: "project" },
849
+ { toolName: "ReadFile", behavior: "allow", source: "user" },
850
+ ],
851
+ handler: async (request) => ({ allow: true }),
852
+ },
853
+ }
854
+ ```
855
+
856
+ ## Multi-Agent Swarm
857
+
858
+ Run multiple agents in parallel with message passing:
859
+
860
+ ```typescript
861
+ import { SwarmManager, InProcessBackend } from "noumen";
862
+
863
+ const backend = new InProcessBackend(agent);
864
+ const swarm = new SwarmManager(backend, { maxConcurrent: 3 });
865
+
866
+ await swarm.spawn({ name: "researcher", prompt: "Find all TODOs" });
867
+ await swarm.spawn({ name: "writer", prompt: "Write tests for auth" });
868
+ await swarm.waitForAll();
869
+ ```
870
+
871
+ ## Memory
872
+
873
+ Persist knowledge across sessions:
874
+
875
+ ```typescript
876
+ import { FileMemoryProvider, LocalFs } from "noumen";
877
+
878
+ options: {
879
+ memory: {
880
+ provider: new FileMemoryProvider(new LocalFs({ basePath: ".noumen/memory" })),
881
+ autoExtract: true,
882
+ injectIntoSystemPrompt: true,
883
+ },
884
+ }
885
+ ```
886
+
887
+ ## MCP (Model Context Protocol)
888
+
889
+ Connect to MCP servers to discover and use external tools:
890
+
891
+ ```typescript
892
+ options: {
893
+ mcpServers: {
894
+ filesystem: { command: "npx", args: ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"] },
895
+ remote: { type: "http", url: "http://localhost:3001/mcp" },
896
+ },
897
+ }
898
+ ```
899
+
900
+ Or expose noumen's tools as an MCP server (requires `@modelcontextprotocol/sdk`):
901
+
902
+ ```bash
903
+ pnpm add @modelcontextprotocol/sdk
904
+ ```
905
+
906
+ ```typescript
907
+ import { createMcpServer } from "noumen/mcp";
908
+ const server = createMcpServer({ tools: registry.listTools() });
909
+ ```
910
+
911
+ ## Tracing
912
+
913
+ Instrument agent runs with OpenTelemetry:
914
+
915
+ ```typescript
916
+ import { OTelTracer } from "noumen";
917
+
918
+ options: {
919
+ tracing: { tracer: await OTelTracer.create("my-agent") },
920
+ }
921
+ ```
922
+
923
+ Falls back to no-op if `@opentelemetry/api` is not installed.
924
+
925
+ ## Full Documentation
926
+
927
+ See **[noumen.dev](https://noumen.dev)** for complete documentation on all features including hooks, permissions, compaction strategies, LSP integration, task management, worktrees, plan mode, and more.
928
+
213
929
  ## License
214
930
 
215
931
  MIT