@mono-agent/agent-runtime 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/ARCHITECTURE.md +219 -0
  2. package/LICENSE +674 -0
  3. package/README.md +430 -0
  4. package/package.json +46 -0
  5. package/src/agent/allowlists.js +49 -0
  6. package/src/agent/approval.js +211 -0
  7. package/src/agent/compaction.js +752 -0
  8. package/src/agent/index.js +40 -0
  9. package/src/agent/prompt/skill-index.js +66 -0
  10. package/src/agent/tool-bloat.js +164 -0
  11. package/src/agent/tools/bash.js +156 -0
  12. package/src/agent/tools/edit.js +15 -0
  13. package/src/agent/tools/glob.js +71 -0
  14. package/src/agent/tools/grep.js +84 -0
  15. package/src/agent/tools/index.js +17 -0
  16. package/src/agent/tools/pi-bridge.js +638 -0
  17. package/src/agent/tools/read.js +39 -0
  18. package/src/agent/tools/shared/constants.js +21 -0
  19. package/src/agent/tools/shared/dedup.js +31 -0
  20. package/src/agent/tools/shared/output-truncation.js +54 -0
  21. package/src/agent/tools/shared/path-resolver.js +156 -0
  22. package/src/agent/tools/shared/ripgrep.js +130 -0
  23. package/src/agent/tools/shared/runtime-context.js +69 -0
  24. package/src/agent/tools/web-fetch.js +59 -0
  25. package/src/agent/tools/web-search.js +21 -0
  26. package/src/agent/tools/write.js +14 -0
  27. package/src/agent/transcript.js +227 -0
  28. package/src/ai/backend.js +17 -0
  29. package/src/ai/cost.js +164 -0
  30. package/src/ai/failure.js +165 -0
  31. package/src/ai/file-change-stats.js +234 -0
  32. package/src/ai/index.js +16 -0
  33. package/src/ai/live-input-prompt.js +15 -0
  34. package/src/ai/observer.js +233 -0
  35. package/src/ai/providers/claude-cli.js +694 -0
  36. package/src/ai/providers/claude-sdk.js +864 -0
  37. package/src/ai/providers/claude-subagents.js +67 -0
  38. package/src/ai/providers/codex-app.js +1045 -0
  39. package/src/ai/providers/opencode-app.js +356 -0
  40. package/src/ai/providers/opencode-discovery.js +39 -0
  41. package/src/ai/providers/pi-events.js +62 -0
  42. package/src/ai/providers/pi-messages.js +68 -0
  43. package/src/ai/providers/pi-models.js +111 -0
  44. package/src/ai/providers/pi-sdk.js +1310 -0
  45. package/src/ai/registry.js +5 -0
  46. package/src/ai/runtime/capabilities-used.js +56 -0
  47. package/src/ai/runtime/capabilities.js +44 -0
  48. package/src/ai/runtime/context-windows.js +38 -0
  49. package/src/ai/runtime/fast-mode.js +8 -0
  50. package/src/ai/runtime/model-refs.js +144 -0
  51. package/src/ai/runtime/registry.js +57 -0
  52. package/src/ai/runtime/router.js +214 -0
  53. package/src/ai/runtime/sessions.js +126 -0
  54. package/src/ai/streaming/codex-events.js +139 -0
  55. package/src/ai/streaming/opencode-events.js +54 -0
  56. package/src/ai/types.js +70 -0
  57. package/src/index.js +23 -0
  58. package/src/pi-auth.js +80 -0
  59. package/src/runtime-brand.js +32 -0
  60. package/src/runtime.js +104 -0
package/README.md ADDED
@@ -0,0 +1,430 @@
1
+ # @mono-agent/agent-runtime
2
+
3
+ ## Category
4
+
5
+ Category: `runtime`
6
+
7
+ ## Responsibility
8
+
9
+ Provides the multi-backend agent runtime bridges (Claude SDK, Claude Code CLI, Codex app-server, Pi SDK) with provider session support. This is the runtime layer that `@mono-agent/runtime-adapter` wraps behind runtime contracts, and it enforces optional `@mono-agent/sandbox` policy for runtime-owned tools.
10
+
11
+ ## Public API
12
+
13
+ - `createRuntime` — runtime factory dispatching to the backend bridges
14
+ - `ai/runtime/model-refs.js` — `parseRuntimeModelReference`, `executionModeIncompatibilityReason`
15
+ - `ai/runtime/registry.js` — `listRuntimeBridges`
16
+ - Provider bridges for `claude` (SDK + CLI), `codex` (app-server), `pi` (Pi SDK), and `opencode`
17
+ - Provider session support: bridges accept `sessionId` in run options and report `provider_session_id`; the runtime exposes `disposeSession` / `disposeAllSessions`
18
+ - Sandbox-aware built-in tools and stdio MCP startup through `@mono-agent/sandbox`
19
+
20
+ ## Dependency Boundary
21
+
22
+ Depends on external provider SDKs (`@anthropic-ai/claude-agent-sdk`, `@earendil-works/pi-agent-core`, `@earendil-works/pi-ai`, `@modelcontextprotocol/sdk`, `@opencode-ai/sdk`, `zod`) plus `@mono-agent/sandbox` for runtime-owned command preparation and network/path policy checks.
23
+
24
+ ## What This Package Does Not Own
25
+
26
+ - runtime contracts and backend descriptors (`@mono-agent/runtime-adapter`)
27
+ - Conversation history, context building, or host-side session TTL policy (`@mono-agent/agent-harness`)
28
+ - Host configuration (`@mono-agent/config`, `@mono-agent/agent-host`)
29
+
30
+ ## Verification
31
+
32
+ ```bash
33
+ pnpm --filter @mono-agent/agent-runtime run test
34
+ ```
35
+
36
+ ## Overview
37
+
38
+ Generic agent runtime that supports four backends out of the box:
39
+
40
+ - **Claude SDK** (`@anthropic-ai/claude-agent-sdk`)
41
+ - **Claude Code CLI** (the `claude` binary)
42
+ - **Pi SDK** (`@earendil-works/pi-agent-core`, used for OpenAI / Codex / Gemini / OpenRouter / Ollama / etc. via Pi providers)
43
+ - **Codex CLI** (the `codex` app-server)
44
+
45
+ Hosts wire in their own pricing, persistence, credential, and compaction-recording callbacks. The runtime returns raw text + raw structured output; hosts that want a domain-specific contract parse it on their end.
46
+
47
+ See [ARCHITECTURE.md](./ARCHITECTURE.md) for the package boundary, runtime
48
+ selection flow, lifecycle diagrams, and host responsibilities.
49
+
50
+ ## Install / Usage
51
+
52
+ ```bash
53
+ npm install @mono-agent/agent-runtime
54
+ ```
55
+
56
+ Peer requirements:
57
+
58
+ - Node.js ≥ 20
59
+ - `claude` CLI on PATH (only for `executionMode: "cli"` with `claude` SDK)
60
+ - `codex` CLI on PATH (only for `executionMode: "cli"` with `codex` SDK; override via the `codexAppServerCommand` option)
61
+ - `ripgrep` on PATH (or supplied via `ripgrepPath`) — required for the `Glob` and `Grep` built-in tools
62
+
63
+ ## Quick start
64
+
65
+ ```js
66
+ import { createRuntime } from "@mono-agent/agent-runtime";
67
+
68
+ const runtime = createRuntime({
69
+ // Host integration (all optional)
70
+ workspace: "/path/to/repo",
71
+ ripgrepPath: "/usr/bin/rg",
72
+ });
73
+
74
+ const result = await runtime.run("You are a helpful assistant.", {
75
+ model: { sdk: "claude", model: "claude-sonnet-4-6" },
76
+ executionMode: "sdk",
77
+ messages: [{ role: "user", content: "Read README.md and summarize it." }],
78
+ cwd: "/path/to/repo",
79
+ allowedTools: ["Read", "Bash"],
80
+ maxTurns: 10,
81
+ onEvent: (event) => console.log(event.type),
82
+ });
83
+
84
+ console.log(result.text);
85
+ ```
86
+
87
+ ## When to reach for this vs. other JS agent runtimes
88
+
89
+ `@mono-agent/agent-runtime` is purpose-built for **autonomous, long-running agent work** with provider portability and operational resilience as first-class concerns. It is *not* a streaming-chat UI kit. Where each peer fits:
90
+
91
+ - **Vercel AI SDK** — best when you're building a chat / generative-UI experience inside a React or Next.js app. `useChat`, `useCompletion`, streaming server components, and edge-runtime compatibility are their strengths. Their provider list is curated (Anthropic, OpenAI, Google, etc., via `@ai-sdk/*` packages); there's no Pi gateway, no Claude Code CLI, no Codex CLI app-server, and no per-call provider fallback. If you're rendering a streaming chat into a browser, use them. If you're orchestrating multi-turn autonomous work that must survive a rate-limited primary provider, use us.
92
+ - **Claude Agent SDK** (`@anthropic-ai/claude-agent-sdk`) — first-party Anthropic SDK. Tight integration with Claude features (canUseTool, sub-agents, hooks, MCP). We *wrap* it as one of our four backends and add context compaction, transcript-resume across provider drops, a 22-kind failure taxonomy, a tool-bloat guard with artifact persistence, and a provider fallback router. Reach for the bare Anthropic SDK when you only ever talk to Claude and don't need cross-provider portability or resume.
93
+ - **Mastra** — a workflow engine + memory + RAG stack. Different category: it's the layer *above* a runtime. You can layer Mastra workflows on top of `@mono-agent/agent-runtime` if you want both.
94
+ - **OpenAI Agents SDK** — first-party OpenAI SDK. Same trade-off as the Claude Agent SDK: tight integration with OpenAI, no other providers. Pi providers in our runtime cover OpenAI plus a dozen others through a single API.
95
+ - **LangChain.js** — kitchen sink with deep abstraction stacks. We're deliberately lean; if you want chains, agents, vector stores, and parsers under one umbrella, LangChain is built for that. If you want a focused runtime kernel, use us.
96
+
97
+ **What we natively bridge (no extra packages):**
98
+
99
+ - Anthropic Claude via the Claude Agent SDK (`claude` SDK).
100
+ - Anthropic Claude via the `claude` Code CLI binary.
101
+ - OpenAI's Codex via the `codex` app-server CLI.
102
+ - OpenAI, Google Gemini, AWS Bedrock, OpenRouter, xAI, Groq, Mistral, Perplexity, DeepSeek, Ollama, LlamaCPP, GLM, Vercel AI Gateway, GitHub Copilot, Gemini CLI — all through the Pi (`@earendil-works/pi-ai`) provider gateway, which our SDK adapter speaks directly.
103
+
104
+ **At-a-glance:**
105
+
106
+ | Need | Use this | Use Vercel AI SDK | Use Claude Agent SDK |
107
+ |---|---|---|---|
108
+ | Streaming chat UI in React/Next | ✗ | ✓ | ✗ |
109
+ | Multi-provider portability | ✓ (4 backends, 15+ providers) | partial | ✗ |
110
+ | CLI providers (claude/codex binaries) | ✓ | ✗ | ✗ |
111
+ | Provider fallback on rate limit / overload | ✓ (`createRouterRuntime`) | ✗ | ✗ |
112
+ | Aggressive context compaction with summarization | ✓ | ✗ | partial |
113
+ | Transcript-tail resume after provider drops | ✓ | ✗ | ✗ |
114
+ | Tool-output bloat guard + artifact persistence | ✓ | ✗ | ✗ |
115
+ | MCP transports out of the box (stdio/SSE/HTTP) | ✓ | partial | ✓ |
116
+ | HITL approval gates with risk tiers | ✓ | ✗ | partial (`canUseTool`) |
117
+ | Multi-subscriber observer with cost/cache metrics | ✓ | partial | partial |
118
+ | Edge-runtime compatibility | ✗ | ✓ | partial |
119
+
120
+ Honest summary: if the agent runs **without a human watching the screen** for minutes-to-hours and **must survive provider blips**, this is the right tool. If a human is watching a streaming chat, Vercel's SDK is the right tool. Both can coexist in the same app.
121
+
122
+ ## Picking a backend
123
+
124
+ The runtime picks a backend from `options.model` + `options.executionMode`:
125
+
126
+ | `model.sdk` | `executionMode` | Backend |
127
+ |---|---|---|
128
+ | `"claude"` | `"sdk"` (or omitted) | Claude SDK |
129
+ | `"claude"` | `"cli"` | `claude` CLI |
130
+ | `"pi"` | any | Pi SDK |
131
+ | `"codex"` | `"cli"` | Codex app-server CLI |
132
+
133
+ A `model` reference can be the parsed shape `{ sdk, model, provider? }` or a string (`"pi:openai:gpt-5.5"`, `"claude:claude-sonnet-4-6"`, etc.) that you parse with the package's `parseRuntimeModelReference` helper.
134
+
135
+ ## `createRuntime(host)`
136
+
137
+ Pass host-level integration once at boot. All keys are optional.
138
+
139
+ ```js
140
+ createRuntime({
141
+ // -- host callbacks --
142
+ resolveCustomPricing, // (parsed) => NormalizedPricing | null
143
+ resolvePiApiKey, // async (provider) => string | undefined
144
+ persistArtifact, // ({ filename, buffer, toolName, toolUseId }) => path | null
145
+ onCompactionRecorded, // (compactionRow) => void
146
+
147
+ // -- tool runtime context (process-level config for the tool kernel) --
148
+ workspace, // primary allowed root for path-based tools
149
+ repoRoot, // secondary allowed root
150
+ ripgrepPath, // explicit path to `rg`; falls back to vendored binary, then PATH
151
+ qaOutputDir, // fallback dir for Playwright MCP filename routing
152
+ sandboxPolicy, // optional @mono-agent/sandbox policy for tools and stdio MCP
153
+
154
+ // -- observers (multi-subscriber telemetry) --
155
+ // Optional. Each observer receives every event the runtime emits.
156
+ // Built-in createMetricsObserver() aggregates cost, cache hit rate, token
157
+ // counts, tool-call counts, errors, and turn-latency percentiles.
158
+ observers: [],
159
+
160
+ // -- approval gates (HITL) --
161
+ // Optional. When set, the runtime asks the host before every tool call
162
+ // whose risk tier is "medium" or "high" (and not session-allowlisted).
163
+ // See the "Approval gates" section below for the request/response shape.
164
+ onToolApprovalRequest, // async (req) => { decision, reason? }
165
+ toolRiskTiers: { Bash: "high" }, // per-tool tier override (low|medium|high)
166
+ approvalDefaultRiskTier: "medium",
167
+ approvalTimeoutMs: 60_000, // timeout → auto-deny
168
+ approvalAlwaysAllowTools: [], // start with these in session allowlist
169
+
170
+ // -- host-customisable identity strings (all optional, defaults shown) --
171
+ runtimeBrand: {
172
+ schemaPrefix: "agent_runtime", // prefix for snapshot/result schema ids
173
+ mcpClientName: "agent-runtime", // MCP client name reported to MCP servers
174
+ mcpClientVersion: "0.1.0", // MCP client version
175
+ tempdirPrefix: "agent-runtime-cli-", // mkdtemp prefix for CLI provider scratch dirs
176
+ providerModelPrefix: "agent", // id prefix for custom Pi providers
177
+ doctorCommand: "agent-runtime doctor", // command suggested in tool error messages
178
+ serviceName: "agent-runtime", // Codex app-server serviceName
179
+ clientInfoName: "agent-runtime", // Codex app-server clientInfo.name
180
+ clientInfoTitle: "Agent Runtime", // Codex app-server clientInfo.title
181
+ },
182
+ });
183
+ ```
184
+
185
+ `runtimeBrand` lets an external host reskin the package without forking string-by-string.
186
+
187
+ For Pi OAuth providers such as `openai-codex`, hosts can bind the standard Pi
188
+ auth JSON file with `createPiOAuthApiKeyResolver()`:
189
+
190
+ ```js
191
+ import { createPiOAuthApiKeyResolver, createRuntime } from "@mono-agent/agent-runtime";
192
+
193
+ const runtime = createRuntime({
194
+ resolvePiApiKey: createPiOAuthApiKeyResolver({
195
+ path: `${process.env.HOME}/.pi/agent/auth.json`,
196
+ }),
197
+ });
198
+ ```
199
+
200
+ The resolver reads provider credentials from the configured file, delegates token
201
+ refresh to `@earendil-works/pi-ai/oauth`, and writes refreshed credentials back
202
+ with `0600` permissions.
203
+
204
+ Returns:
205
+
206
+ - `run(systemPrompt, options)` — async, runs one agent turn against the chosen backend.
207
+ - `configureTools(next)` — update the tool runtime context after construction.
208
+
209
+ ### `runtime.run(systemPrompt, options)`
210
+
211
+ Per-call options (a non-exhaustive selection):
212
+
213
+ | Option | Type | Notes |
214
+ |---|---|---|
215
+ | `model` | `object \| string` | **Required.** See "Picking a backend". |
216
+ | `executionMode` | `"sdk" \| "cli"` | Default `"sdk"`. |
217
+ | `messages` | `Message[]` | Conversation history. |
218
+ | `cwd` | `string` | Working directory for the agent's tools. |
219
+ | `allowedTools` | `string[]` | Built-in tool allowlist. Default: all. |
220
+ | `disallowedTools` | `string[]` | Block list. |
221
+ | `mcpServers` | `Record<string, McpServerConfig>` | Configured MCP servers (stdio / sse / http). |
222
+ | `sandboxPolicy` | `SandboxPolicy` | Optional fail-closed sandbox policy for built-in tools and stdio MCP process startup. |
223
+ | `maxTurns` | `number` | Hard cap on agent turns. |
224
+ | `outputSchema` | `JSONSchema` | If set, the agent is asked to produce structured JSON matching this schema. The result lands in `result.structuredResult`. |
225
+ | `abortSignal` | `AbortSignal` | Cancel the run. |
226
+ | `liveInput` | `LiveInputQueue` | Stream of in-flight user messages (for human-in-the-loop steering). |
227
+ | `onEvent` | `(event) => void` | Fired for every event the provider emits (assistant text, tool calls/results, runtime warnings, structured output). |
228
+ | `runId` | `string` | Tag this run for downstream callbacks (e.g. `onCompactionRecorded`). |
229
+ | `providerSessionId` | `string` | Resume a prior provider session. |
230
+ | `runArtifactDir` | `string` | Used by some providers as the Playwright MCP filename target. |
231
+ | `piCodexTransport` | `string` | Forwarded to Pi when running OpenAI Codex models. |
232
+ | `codexAppServerCommand` | `string` | Override the Codex CLI binary. |
233
+ | `codexAppServerArgs` | `string[]` | Override the Codex CLI arguments. |
234
+
235
+ Returns:
236
+
237
+ ```ts
238
+ {
239
+ text: string, // raw assistant text
240
+ structuredResult?: any, // JSON returned via outputSchema (if any)
241
+ structuredResultSource?: string, // where structuredResult came from
242
+ events: RuntimeEvent[], // full event stream (for host-side parsing)
243
+ usage: {
244
+ input_tokens, output_tokens,
245
+ cache_read_tokens, cache_creation_tokens,
246
+ cost_usd,
247
+ },
248
+ durationMs: number,
249
+ numTurns: number,
250
+ model: string,
251
+ effort: string,
252
+ sdk: "claude" | "pi" | "codex",
253
+ cancelled: boolean,
254
+ error: string | null,
255
+ errorDetails: object | null,
256
+ failureKind: string | null,
257
+ providerSessionId: string | null,
258
+ runtimeWarnings: RuntimeWarning[],
259
+ diagnostics: object,
260
+ capabilitiesUsed: { // what the backend actually did this call
261
+ prompt_cache_active: true|false|null,
262
+ thinking_enabled: true|false|null,
263
+ structured_output_enforced: boolean,
264
+ subagent_invoked: true|false|null,
265
+ mcp_servers_used: string[],
266
+ native_subagents_used: string[],
267
+ tool_compaction_applied: boolean,
268
+ context_compaction_applied: true|false|null,
269
+ },
270
+ }
271
+ ```
272
+
273
+ `capabilitiesUsed` is the per-call complement to `runtimeCapabilities()`. Tristate fields use `null` to mean "this provider can't tell" — distinct from `false` ("definitely off"). It's also emitted as a `capabilities_resolved` event near the end of the run, so observers can capture it without inspecting the result object.
274
+
275
+ ## Built-in tools
276
+
277
+ The agent kernel ships with: `Read`, `Write`, `Edit`, `Glob`, `Grep`, `Bash`, `WebFetch`, `WebSearch`. You select via `allowedTools`. Tool implementations honor:
278
+
279
+ - `cwd` (required for path-based tools)
280
+ - The runtime context's `workspace` / `repoRoot` allow-list (paths outside both, plus `/tmp` and `process.cwd()`, are rejected)
281
+ - Output truncation with optional artifact persistence (`{toolArtifactDir}/tool-output/{runId}/...` when `toolArtifactDir` is configured)
282
+
283
+ Override or extend the tool surface by passing `mcpServers` for MCP-backed tools.
284
+
285
+ ## Structured output
286
+
287
+ Pass `options.outputSchema` (a JSON Schema). On Claude SDK / Codex app-server / Pi SDK, the runtime wires the schema into the provider's structured-output API. The matched JSON lands in `result.structuredResult`.
288
+
289
+ The package does **not** validate `structuredResult` against your schema — it only forwards what the provider produced. Hosts run their own validation (Zod, AJV, etc.).
290
+
291
+ ## Provider fallback router
292
+
293
+ `createRouterRuntime({ host, chain })` wraps the standard runtime with an ordered chain of model references. On a retryable provider failure (rate limit, overload, network blip — classified via the same taxonomy as `retryableProviderFailureInfo`), it retries the same logical run against the next chain entry, replaying the transcript-tail snapshot of the previous attempt so the next provider continues rather than starts over.
294
+
295
+ ```js
296
+ import { createRouterRuntime } from "@mono-agent/agent-runtime";
297
+
298
+ const router = createRouterRuntime({
299
+ host: { /* same shape as createRuntime */ },
300
+ chain: [
301
+ { sdk: "claude", model: "claude-opus-4-7" },
302
+ { sdk: "claude", model: "claude-sonnet-4-6" },
303
+ { model: { sdk: "pi", provider: "openai", model: "gpt-5.5" }, requires: { structured_output: true } },
304
+ ],
305
+ });
306
+
307
+ const result = await router.run("...", { /* same shape as runtime.run */ });
308
+ console.log(result.failoverHistory);
309
+ // [{ model, failureKind, requestId, retryableSubkind }, ...] one entry per attempt that didn't succeed.
310
+ ```
311
+
312
+ Behaviour:
313
+
314
+ - Successful run on entry N → returns the result with `failoverHistory` set to attempts 0..N-1.
315
+ - Retryable failure → emits `provider_failover_started`, builds a transcript snapshot, and retries on the next entry.
316
+ - Non-retryable failure (auth, billing, invalid request) → returns immediately with `failoverHistory` containing the one attempt.
317
+ - Cancellation → returns immediately.
318
+ - Chain exhausted → `failureKind: "provider_unavailable_exhausted"`, `failoverHistory` lists every attempt.
319
+
320
+ Chain entries can require backend capabilities via `requires: { structured_output: true, supports_mcp: true, ... }`; entries that don't satisfy the requirements are skipped (logged in `failoverHistory` as `failureKind: "skipped_capability_mismatch"`).
321
+
322
+ ## Observers & metrics
323
+
324
+ The runtime emits structured events for everything that happens during a run — assistant messages, tool calls, runtime warnings, cache hits/misses, cost updates, provider request start/end, approval lifecycle. Hosts can subscribe via `host.observers[]` (any number) or the simpler `options.onEvent` callback (one subscriber). Both work simultaneously.
325
+
326
+ A built-in aggregator covers the common metrics:
327
+
328
+ ```js
329
+ import { createRuntime, createMetricsObserver } from "@mono-agent/agent-runtime";
330
+
331
+ const metrics = createMetricsObserver();
332
+ const runtime = createRuntime({ observers: [metrics] });
333
+
334
+ await runtime.run("...", { model: { sdk: "claude", model: "claude-sonnet-4-6" } });
335
+
336
+ console.log(metrics.snapshot());
337
+ // {
338
+ // events: { total, byType: { tool_use: 5, assistant: 8, ... } },
339
+ // tokens: { input, output, cacheReadTokens, cacheCreationTokens },
340
+ // cost: { cumulativeUsd },
341
+ // cache: { hits, misses, hitRatio, readTokensFromEvents },
342
+ // tools: { callsByName: { Bash: 3, Read: 2 }, errorsByName: { ... } },
343
+ // errors: { total, byKind: { provider_unavailable: 1 } },
344
+ // turns: { count, latencyMsP50, latencyMsP95 },
345
+ // approvals: { pending, granted, denied },
346
+ // }
347
+ ```
348
+
349
+ Custom observers implement `{ recordEvent(event), recordMetric(metric)?, flush()? }`. Fan-out is synchronous on the hot path; observers that need to do I/O must buffer internally.
350
+
351
+ Notable new events emitted by the bridges:
352
+
353
+ - `provider_request_started` / `_completed` — at the boundary of each LLM call (sdk, model, runtime, timestamp, durationMs).
354
+ - `cache_hit` / `cache_miss` — when the provider reports cached / cache-creation input tokens.
355
+ - `cost_accumulated` — running cost in USD with cumulative token breakdown.
356
+
357
+ ## Approval gates (human-in-the-loop)
358
+
359
+ Pass `onToolApprovalRequest` to gate tool calls behind a runtime approval. The runtime calls your callback once per tool invocation whose risk tier requires it, and pauses the agent until you respond.
360
+
361
+ ```js
362
+ const runtime = createRuntime({
363
+ toolRiskTiers: { Bash: "high", Read: "low" },
364
+ async onToolApprovalRequest(req) {
365
+ // req = { requestId, toolName, toolUseId, argumentsSummary, riskTier, model }
366
+ // argumentsSummary is already secret-redacted (API keys, Bearer tokens,
367
+ // and known JSON fields like "api_key" / "password" stripped).
368
+ if (req.toolName === "Bash" && req.argumentsSummary.includes("rm -rf")) {
369
+ return { decision: "deny", reason: "destructive" };
370
+ }
371
+ return { decision: "approve" };
372
+ },
373
+ });
374
+ ```
375
+
376
+ Tiers (configurable per tool):
377
+
378
+ - **low** — auto-approved; the callback is not called.
379
+ - **medium** (default) — calls the host; if no callback is supplied, auto-approves.
380
+ - **high** — calls the host; if no callback is supplied, fails closed (deny).
381
+
382
+ Responses:
383
+
384
+ - `{ decision: "approve" }` — allow this call.
385
+ - `{ decision: "deny", reason? }` — block; the agent receives a tool error.
386
+ - `{ decision: "always" }` — allow + session-allowlist for the run.
387
+
388
+ Backend coverage: Claude SDK (via `canUseTool`) and Pi SDK (via tool dispatch wrapping). Claude CLI and Codex CLI bridge into their backend's own approval models (`permissionMode` / `approvalPolicy`) — per-call runtime gates aren't available there.
389
+
390
+ Approval lifecycle is observable via `onEvent`:
391
+
392
+ - `tool_approval_pending` — emitted before calling the host.
393
+ - `tool_approval_granted` — host approved.
394
+ - `tool_approval_denied` — host denied, timed out, threw, or no callback for a high-risk tool.
395
+
396
+ ## Tool-result bloat handling
397
+
398
+ `@mono-agent/agent-runtime/agent/tool-bloat.js` enforces a 256 KB default cap per `tool_result`. When a payload exceeds the cap, the kernel:
399
+
400
+ 1. Calls your `persistArtifact({ filename, buffer, toolName, toolUseId })` callback (if you supplied one).
401
+ 2. Substitutes a compact text reference in the agent's transcript.
402
+ 3. Emits a `runtime_warning` with `warning_kind: "tool_payload_truncated"` and the saved-paths array.
403
+
404
+ Hosts that don't supply `persistArtifact` get the truncation summary but no on-disk capture.
405
+
406
+ ## Context compaction
407
+
408
+ `@mono-agent/agent-runtime/agent/compaction.js` provides `createAgentCompactionManager(...)` which the Pi SDK provider invokes automatically. Configure via the agent's settings (`agent_compaction_*` keys). When a compaction completes, the kernel hands a structured row to your `onCompactionRecorded(record)` callback so the host can persist it however it likes.
409
+
410
+ ## Advanced exports
411
+
412
+ The package exposes its inner pieces via subpath imports:
413
+
414
+ ```js
415
+ import { resolveRuntimeBridge, listRuntimeBridges, runtimeCapabilities } from "@mono-agent/agent-runtime/ai/runtime/registry.js";
416
+ import { generateClaudeResponse } from "@mono-agent/agent-runtime/ai/providers/claude-sdk.js";
417
+ import { createAgentCompactionManager, estimateFirstTurnInput } from "@mono-agent/agent-runtime/agent/compaction.js";
418
+ import { configureToolRuntime, readToolRuntime } from "@mono-agent/agent-runtime/agent/tools/shared/runtime-context.js";
419
+ // ...
420
+ ```
421
+
422
+ These are stable but treated as advanced API. Most consumers should reach for `createRuntime` first.
423
+
424
+ ## Example consumer
425
+
426
+ See [`examples/echo-agent/`](../../examples/echo-agent/) for a runnable consumer that imports `@mono-agent/agent-runtime`, runs a single Claude SDK turn with the Bash tool, and prints the result.
427
+
428
+ ## License
429
+
430
+ GPL-3.0-only.
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "@mono-agent/agent-runtime",
3
+ "version": "0.1.0",
4
+ "description": "Agent runtime supporting Claude SDK, Claude CLI, Codex CLI, and PI SDK out of the box",
5
+ "type": "module",
6
+ "license": "GPL-3.0-only",
7
+ "keywords": [
8
+ "ai",
9
+ "agents",
10
+ "runtime",
11
+ "mono-agent"
12
+ ],
13
+ "publishConfig": {
14
+ "access": "public"
15
+ },
16
+ "exports": {
17
+ ".": "./src/index.js",
18
+ "./ai": "./src/ai/index.js",
19
+ "./agent": "./src/agent/index.js",
20
+ "./ai/*": "./src/ai/*",
21
+ "./agent/*": "./src/agent/*"
22
+ },
23
+ "main": "./src/index.js",
24
+ "files": [
25
+ "src/**/*.js",
26
+ "!src/__tests__/**",
27
+ "ARCHITECTURE.md",
28
+ "README.md",
29
+ "LICENSE"
30
+ ],
31
+ "engines": {
32
+ "node": ">=20"
33
+ },
34
+ "dependencies": {
35
+ "@anthropic-ai/claude-agent-sdk": "^0.1.0",
36
+ "@earendil-works/pi-agent-core": "^0.79.1",
37
+ "@earendil-works/pi-ai": "^0.79.1",
38
+ "@mono-agent/sandbox": "0.1.0",
39
+ "@modelcontextprotocol/sdk": "^1.12.0",
40
+ "@opencode-ai/sdk": "^1.15.13",
41
+ "zod": "^4.3.6"
42
+ },
43
+ "scripts": {
44
+ "test": "pnpm --filter @mono-agent/sandbox run build && vitest run --passWithNoTests"
45
+ }
46
+ }
@@ -0,0 +1,49 @@
1
+ export const ALLOWLIST_MODE_ALL = "all";
2
+ export const ALLOWLIST_MODE_CUSTOM = "custom";
3
+
4
+ export function normalizeList(value) {
5
+ return Array.isArray(value)
6
+ ? [...new Set(value.filter((entry) => typeof entry === "string" && entry.trim()).map((entry) => entry.trim()))]
7
+ : [];
8
+ }
9
+
10
+ export function normalizeAllowlistMode(value) {
11
+ if (value === ALLOWLIST_MODE_ALL || value === ALLOWLIST_MODE_CUSTOM) return value;
12
+ throw new Error('allowlist mode must be "all" or "custom"');
13
+ }
14
+
15
+ export function storedAllowlistMode(value) {
16
+ return value === ALLOWLIST_MODE_CUSTOM ? ALLOWLIST_MODE_CUSTOM : ALLOWLIST_MODE_ALL;
17
+ }
18
+
19
+ export function parseStoredAllowlist(value) {
20
+ try {
21
+ return normalizeList(JSON.parse(value || "[]"));
22
+ } catch {
23
+ return [];
24
+ }
25
+ }
26
+
27
+ export function inferAllowlistMode({ mode, list, fallback = ALLOWLIST_MODE_ALL } = {}) {
28
+ if (mode !== undefined) return normalizeAllowlistMode(mode);
29
+ return normalizeList(list).length > 0
30
+ ? ALLOWLIST_MODE_CUSTOM
31
+ : storedAllowlistMode(fallback);
32
+ }
33
+
34
+ export function resolveAllowlist({ mode, allowlist, all, getName = (item) => item }) {
35
+ const normalizedMode = storedAllowlistMode(mode);
36
+ if (normalizedMode === ALLOWLIST_MODE_ALL) return [...all];
37
+ const allowed = new Set(normalizeList(allowlist));
38
+ return all.filter((item) => allowed.has(getName(item)));
39
+ }
40
+
41
+ export function resolveAllowlistMap({ mode, allowlist, all }) {
42
+ const normalizedMode = storedAllowlistMode(mode);
43
+ if (normalizedMode === ALLOWLIST_MODE_ALL) return { ...all };
44
+ const out = {};
45
+ for (const name of normalizeList(allowlist)) {
46
+ if (all[name]) out[name] = all[name];
47
+ }
48
+ return out;
49
+ }