@bluecopa/harness 0.1.0-snapshot.7 → 0.1.0-snapshot.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +208 -148
- package/package.json +1 -1
- package/src/agent/create-agent.ts +49 -17
- package/src/agent/types.ts +27 -2
- package/src/arc/agent-runner.ts +994 -0
- package/src/arc/arc-loop.ts +803 -306
- package/src/arc/arc-types.ts +5 -99
- package/src/arc/consolidation.ts +22 -4
- package/src/arc/context-window.ts +267 -0
- package/src/arc/create-arc-agent.ts +78 -59
- package/src/arc/create-org-arc-agent.ts +60 -0
- package/src/arc/episode-compressor.ts +140 -67
- package/src/arc/memory-manager.ts +245 -0
- package/src/arc/message-convert.ts +123 -0
- package/src/arc/multi-model.ts +70 -0
- package/src/arc/org-arc-loop.ts +343 -0
- package/src/arc/org-arc-runner.ts +287 -0
- package/src/arc/org-types.ts +104 -0
- package/src/arc/profile-builder.ts +173 -0
- package/src/arc/resilience/bulkhead.ts +110 -0
- package/src/arc/resilience/circuit-breaker.ts +112 -0
- package/src/arc/resilience/fallback.ts +27 -0
- package/src/arc/resilience/index.ts +21 -0
- package/src/arc/resilience/pipeline.ts +103 -0
- package/src/arc/resilience/retry.ts +90 -0
- package/src/arc/resilience/timeout.ts +60 -0
- package/src/arc/resilience/types.ts +71 -0
- package/src/arc/result-pager.ts +77 -0
- package/src/arc/sig.ts +115 -0
- package/src/arc/skill-resolver.ts +109 -0
- package/src/arc/stores/rxdb-setup.ts +1 -0
- package/src/arc/tools.ts +67 -0
- package/src/arc/types.ts +370 -0
- package/src/arc/utils.ts +37 -0
- package/src/hooks/middleware.ts +95 -0
- package/src/interfaces/hooks.ts +7 -1
- package/src/interfaces/tool-provider.ts +2 -0
- package/src/loop/vercel-agent-loop.ts +122 -19
- package/src/skills/skill-router.ts +12 -6
- package/testing/index.ts +22 -0
- package/testing/scenario-replay.ts +209 -0
- package/testing/scenario-types.ts +38 -0
- package/testing/scripted-llm.ts +230 -0
- package/tests/arc/channel.test.ts +170 -0
- package/tests/arc/context-window.test.ts +396 -0
- package/tests/arc/e2e.test.ts +353 -0
- package/tests/arc/error-paths.test.ts +402 -0
- package/tests/arc/live-integration.test.ts +357 -0
- package/tests/arc/memory-manager.test.ts +384 -0
- package/tests/arc/middleware.test.ts +113 -0
- package/tests/arc/org-arc-loop.test.ts +138 -0
- package/tests/arc/process-interleaving.test.ts +432 -0
- package/tests/arc/process-profiles.test.ts +366 -0
- package/tests/arc/resilience-integration.test.ts +381 -0
- package/tests/arc/resilience.test.ts +575 -0
- package/tests/arc/result-paging.test.ts +392 -0
- package/tests/arc/scenario-driven.test.ts +297 -0
- package/tests/arc/tool-dispatch.test.ts +340 -0
- package/tests/arc/wasm-pbt.test.ts +104 -0
- package/verify/Cargo.lock +637 -0
- package/verify/Cargo.toml +24 -0
- package/verify/src/lib.rs +5 -0
- package/verify/src/main.rs +165 -0
- package/verify/src/model/context.rs +100 -0
- package/verify/src/model/mod.rs +6 -0
- package/verify/src/model/orchestrator.rs +371 -0
- package/verify/src/model/process.rs +140 -0
- package/verify/src/model/types.rs +273 -0
- package/verify/src/properties/liveness.rs +32 -0
- package/verify/src/properties/mod.rs +4 -0
- package/verify/src/properties/safety.rs +78 -0
- package/verify/src/trace/event.rs +155 -0
- package/verify/src/trace/mod.rs +2 -0
- package/verify/src/trace/validator.rs +367 -0
- package/verify/src/wasm/mod.rs +3 -0
- package/verify/src/wasm/scenario_generator.rs +400 -0
- package/verify/src/wasm/types.rs +104 -0
- package/verify/src/wasm/wasm_validator.rs +107 -0
- package/verify/tests/model_check.rs +49 -0
- package/verify/tests/trace_validation.rs +147 -0
- package/vitest.config.ts +1 -1
- package/src/arc/thread-executor.ts +0 -354
- package/src/arc/thread-tool.ts +0 -26
package/README.md
CHANGED
|
@@ -2,9 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Published on npm as **`@bluecopa/harness`**.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
Two execution modes: a simple single-agent loop (`createAgent` + `VercelAgentLoop`) and a process-based orchestrator (`ArcLoop`) that dispatches parallel processes with context management, memory, and resilience.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pnpm add @bluecopa/harness
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Development
|
|
8
16
|
|
|
9
17
|
```bash
|
|
10
18
|
pnpm install
|
|
@@ -13,9 +21,11 @@ pnpm test
|
|
|
13
21
|
|
|
14
22
|
## Architecture
|
|
15
23
|
|
|
24
|
+
### Single-Agent Loop
|
|
25
|
+
|
|
16
26
|
```
|
|
17
27
|
┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
|
|
18
|
-
│ createAgent
|
|
28
|
+
│ createAgent │────►│ AgentLoop │────►│ LLM (Claude) │
|
|
19
29
|
│ (turn loop) │ │ (nextAction)│ │ │
|
|
20
30
|
└──────┬───────┘ └──────────────┘ └──────────────────┘
|
|
21
31
|
│ │
|
|
@@ -27,20 +37,82 @@ pnpm test
|
|
|
27
37
|
└──────────────┘
|
|
28
38
|
```
|
|
29
39
|
|
|
30
|
-
|
|
31
|
-
2. Each step calls `loop.nextAction(messages)` to get the LLM's decision
|
|
32
|
-
3. If it's a tool call, the harness executes it via `ToolProvider` and appends the result
|
|
33
|
-
4. If it's a final action, the loop ends and returns the result
|
|
40
|
+
### ArcLoop Orchestrator
|
|
34
41
|
|
|
35
|
-
|
|
42
|
+
```
|
|
43
|
+
Orchestrator (ArcLoop — Opus 4.6 by default)
|
|
44
|
+
│ tools: Thread, Check, Cancel, Remember, ReadEpisode
|
|
45
|
+
│
|
|
46
|
+
│ Turn 1 (parallel):
|
|
47
|
+
├──► Process 0 ("read auth", model=fast) ─┐
|
|
48
|
+
├──► Process 1 ("read routes", model=fast) ─┼──► Episodes
|
|
49
|
+
├──► Process 2 ("read tests", model=fast) ─┘
|
|
50
|
+
│
|
|
51
|
+
│ Turn 2 (dispatch dependent work):
|
|
52
|
+
├──► Thread("fix bug", context=[ep0,ep1,ep2]) ──► Episode
|
|
53
|
+
│
|
|
54
|
+
│ Turn 3 (parallel):
|
|
55
|
+
├──► Thread("run tests", context=[ep3]) ─┐
|
|
56
|
+
├──► Thread("update docs", context=[ep3]) ─┘
|
|
57
|
+
│
|
|
58
|
+
└──► Final text response
|
|
59
|
+
```
|
|
36
60
|
|
|
37
|
-
|
|
61
|
+
Full architecture doc: [`docs/arc.md`](../docs/arc.md)
|
|
38
62
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## ToolProvider
|
|
66
|
+
|
|
67
|
+
The contract for tool execution. All agent modes use this interface.
|
|
68
|
+
|
|
69
|
+
```typescript
|
|
70
|
+
interface ToolProvider {
|
|
71
|
+
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
72
|
+
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
73
|
+
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
74
|
+
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
75
|
+
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
76
|
+
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
77
|
+
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
78
|
+
webSearch?(query: string): Promise<ToolResult>;
|
|
79
|
+
capabilities(): ToolProviderCapabilities;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
interface ToolResult {
|
|
83
|
+
success: boolean;
|
|
84
|
+
output: string;
|
|
85
|
+
error?: string;
|
|
86
|
+
}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Built-in implementations:
|
|
90
|
+
|
|
91
|
+
| Provider | Description |
|
|
92
|
+
|----------|-------------|
|
|
93
|
+
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
94
|
+
| `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
|
|
95
|
+
| `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
|
|
96
|
+
|
|
97
|
+
## SandboxProvider
|
|
98
|
+
|
|
99
|
+
Higher-level sandbox operations beyond basic tool calls:
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
interface SandboxProvider {
|
|
103
|
+
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
104
|
+
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
105
|
+
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Used by `SkillManager` for executing skill scripts in isolated VMs.
|
|
110
|
+
|
|
111
|
+
## Connecting to a Sandbox
|
|
112
|
+
|
|
113
|
+
```typescript
|
|
42
114
|
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
43
|
-
import {
|
|
115
|
+
import { E2BToolProvider } from './src/providers/e2b-tool-provider';
|
|
44
116
|
|
|
45
117
|
// Connect to sandbox service
|
|
46
118
|
const executor = new ControlPlaneE2BExecutor({
|
|
@@ -50,187 +122,172 @@ const executor = new ControlPlaneE2BExecutor({
|
|
|
50
122
|
});
|
|
51
123
|
await executor.initialize(); // creates a Firecracker VM
|
|
52
124
|
|
|
53
|
-
|
|
54
|
-
const agent = createAgent({
|
|
55
|
-
toolProvider: new E2BToolProvider(executor),
|
|
56
|
-
loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
|
|
57
|
-
});
|
|
125
|
+
const toolProvider = new E2BToolProvider(executor);
|
|
58
126
|
|
|
59
|
-
|
|
60
|
-
console.log(result.output); // LLM's final response
|
|
61
|
-
console.log(result.steps); // number of tool steps
|
|
127
|
+
// ... use with createAgent or ArcLoop
|
|
62
128
|
|
|
63
|
-
await executor.destroy();
|
|
129
|
+
await executor.destroy(); // tears down the VM
|
|
64
130
|
```
|
|
65
131
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
### From environment variables
|
|
132
|
+
From environment variables: `ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY`.
|
|
69
133
|
|
|
70
|
-
|
|
134
|
+
---
|
|
71
135
|
|
|
72
|
-
|
|
73
|
-
const executor = ControlPlaneE2BExecutor.fromEnv();
|
|
74
|
-
```
|
|
75
|
-
|
|
76
|
-
## Using locally (no sandbox)
|
|
136
|
+
## Single-Agent Mode (`createAgent`)
|
|
77
137
|
|
|
78
|
-
For
|
|
138
|
+
For simple tasks that don't need orchestration:
|
|
79
139
|
|
|
80
|
-
```
|
|
140
|
+
```typescript
|
|
81
141
|
import { createAgent } from './src/agent/create-agent';
|
|
82
142
|
import { LocalToolProvider } from './src/providers/local-tool-provider';
|
|
83
143
|
|
|
84
144
|
const agent = createAgent({
|
|
85
145
|
toolProvider: new LocalToolProvider(process.cwd()),
|
|
86
|
-
loop: new VercelAgentLoop(),
|
|
146
|
+
loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
|
|
87
147
|
});
|
|
88
148
|
|
|
89
149
|
const result = await agent.run('list all TypeScript files');
|
|
150
|
+
console.log(result.output);
|
|
90
151
|
```
|
|
91
152
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
### Agent creation (`src/agent/create-agent.ts`)
|
|
153
|
+
### Configuration
|
|
95
154
|
|
|
96
|
-
|
|
155
|
+
| Option | Type | Default | Description |
|
|
156
|
+
|--------|------|---------|-------------|
|
|
157
|
+
| `toolProvider` | `ToolProvider` | required | Executes tool calls |
|
|
158
|
+
| `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
|
|
159
|
+
| `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
|
|
160
|
+
| `maxSteps` | `number` | 30 | Max tool steps per run |
|
|
161
|
+
| `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
|
|
162
|
+
| `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
|
|
97
163
|
|
|
98
|
-
|
|
99
|
-
|--------|------|-------------|
|
|
100
|
-
| `toolProvider` | `ToolProvider` | Required. Executes tool calls |
|
|
101
|
-
| `loop` | `AgentLoop` | LLM decision loop (default: `VercelAgentLoop`) |
|
|
102
|
-
| `sandboxProvider` | `SandboxProvider` | Optional. Higher-level sandbox ops (file download, exec with env) |
|
|
103
|
-
| `maxSteps` | `number` | Max tool steps per run (default: 30) |
|
|
104
|
-
| `telemetry` | `HarnessTelemetry` | Optional. OpenTelemetry-style tracing |
|
|
105
|
-
| `skillIndexPath` | `string` | Optional. Path to skill index JSON |
|
|
164
|
+
### VercelAgentLoop
|
|
106
165
|
|
|
107
|
-
|
|
166
|
+
Calls Claude via the Vercel AI SDK. Supports parallel tool calls and configurable system prompt.
|
|
108
167
|
|
|
109
|
-
|
|
110
|
-
- Parallel tool calls (returns `ToolBatchAction` when the LLM requests multiple tools at once)
|
|
111
|
-
- Configurable system prompt
|
|
112
|
-
- Model selection via `HARNESS_MODEL` env var (default: `claude-sonnet-4-5`)
|
|
113
|
-
|
|
114
|
-
```ts
|
|
168
|
+
```typescript
|
|
115
169
|
const loop = new VercelAgentLoop({
|
|
116
170
|
systemPrompt: 'You are a helpful coding assistant.',
|
|
171
|
+
model: 'claude-sonnet-4-5', // or HARNESS_MODEL env var
|
|
117
172
|
});
|
|
118
173
|
```
|
|
119
174
|
|
|
120
|
-
###
|
|
175
|
+
### LCMToolLoop
|
|
121
176
|
|
|
122
|
-
|
|
177
|
+
Wraps another loop to add Lossless Context Management and optional REPL orchestration:
|
|
123
178
|
|
|
124
|
-
```
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
128
|
-
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
129
|
-
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
130
|
-
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
131
|
-
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
132
|
-
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
133
|
-
webSearch?(query: string): Promise<ToolResult>;
|
|
134
|
-
capabilities(): ToolProviderCapabilities;
|
|
135
|
-
}
|
|
179
|
+
```typescript
|
|
180
|
+
import { LCMToolLoop } from './src/loop/lcm-tool-loop';
|
|
181
|
+
import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
|
|
136
182
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
183
|
+
const loop = new LCMToolLoop({
|
|
184
|
+
innerLoop: new VercelAgentLoop(),
|
|
185
|
+
toolProvider: mySandboxProvider,
|
|
186
|
+
enableRepl: true, // default: true
|
|
187
|
+
bridgeDir: '/var/run/bridge',
|
|
188
|
+
onActivity: (entry) => console.log(entry),
|
|
189
|
+
onLlmRequest: async (prompt) => callLLM(prompt),
|
|
190
|
+
onWebFetchRequest: async (url) => fetch(url),
|
|
191
|
+
});
|
|
142
192
|
```
|
|
143
193
|
|
|
144
|
-
|
|
194
|
+
**Standard mode**: Lossless context trimming — the LLM always sees a coherent, budget-fitting view of the full conversation.
|
|
145
195
|
|
|
146
|
-
|
|
147
|
-
|----------|-------------|
|
|
148
|
-
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
149
|
-
| `E2BToolProvider` | Routes tools to an E2B-compatible executor over HTTP |
|
|
150
|
-
| `CompositeToolProvider` | Combines multiple providers (e.g. sandbox + web) |
|
|
196
|
+
**REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
|
|
151
197
|
|
|
152
|
-
|
|
198
|
+
---
|
|
153
199
|
|
|
154
|
-
|
|
200
|
+
## ArcLoop (Orchestrator Mode)
|
|
155
201
|
|
|
156
|
-
|
|
157
|
-
// Single tool call
|
|
158
|
-
interface ToolCallAction {
|
|
159
|
-
type: 'tool';
|
|
160
|
-
name: 'Bash' | 'Read' | 'Write' | 'Edit' | 'Glob' | 'Grep' | ...;
|
|
161
|
-
args: Record<string, unknown>;
|
|
162
|
-
}
|
|
202
|
+
For complex tasks that benefit from parallel processes, context management, and memory:
|
|
163
203
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
type: 'tool_batch';
|
|
167
|
-
calls: ToolCallAction[];
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
// Final text response (ends the loop)
|
|
171
|
-
interface FinalAction {
|
|
172
|
-
type: 'final';
|
|
173
|
-
content: string;
|
|
174
|
-
}
|
|
175
|
-
```
|
|
176
|
-
|
|
177
|
-
### LCM tool loop (`src/loop/lcm-tool-loop.ts`)
|
|
178
|
-
|
|
179
|
-
`LCMToolLoop` wraps another loop to add LCM-based tool routing, REPL script execution, and bridge-based tool dispatch. Used in the chat-assistant example.
|
|
180
|
-
|
|
181
|
-
### Sandbox provider (`src/interfaces/sandbox-provider.ts`)
|
|
204
|
+
```typescript
|
|
205
|
+
import { createArcAgent } from './src/arc/create-arc-agent';
|
|
182
206
|
|
|
183
|
-
|
|
207
|
+
const agent = await createArcAgent({
|
|
208
|
+
toolProvider: myToolProvider,
|
|
209
|
+
episodeStore: myEpisodeStore, // required
|
|
210
|
+
sessionMemoStore: mySessionMemoStore, // required
|
|
211
|
+
longTermStore: myLongTermStore, // required
|
|
212
|
+
taskId: 'task-1',
|
|
213
|
+
sessionId: 'session-1',
|
|
214
|
+
});
|
|
184
215
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
216
|
+
// Streaming
|
|
217
|
+
for await (const event of agent.stream(messages, signal)) {
|
|
218
|
+
if (event.type === 'text_delta') process.stdout.write(event.text);
|
|
219
|
+
if (event.type === 'process_dispatched') console.log(` → ${event.action}`);
|
|
220
|
+
if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
|
|
190
221
|
}
|
|
191
|
-
```
|
|
192
|
-
|
|
193
|
-
### Observability (`src/observability/otel.ts`)
|
|
194
|
-
|
|
195
|
-
`HarnessTelemetry` provides OpenTelemetry-style spans and metrics for agent runs.
|
|
196
222
|
|
|
197
|
-
|
|
223
|
+
// Non-streaming
|
|
224
|
+
const result = await agent.run(messages, signal);
|
|
225
|
+
```
|
|
198
226
|
|
|
199
|
-
|
|
227
|
+
### ArcLoopConfig
|
|
228
|
+
|
|
229
|
+
| Option | Type | Default | Description |
|
|
230
|
+
|--------|------|---------|-------------|
|
|
231
|
+
| `model` | `string` | `'claude-opus-4-6'` | Orchestrator model (ID or tier name) |
|
|
232
|
+
| `modelMap` | `Record<ModelTier, string>` | haiku/sonnet/opus | Maps fast/medium/strong to model IDs |
|
|
233
|
+
| `apiKey` | `string` | — | Anthropic API key |
|
|
234
|
+
| `systemPrompt` | `string` | built-in | Custom orchestrator system prompt |
|
|
235
|
+
| `maxTurns` | `number` | 30 | Max orchestrator turns |
|
|
236
|
+
| `processTimeout` | `number` | 120_000 | Per-process timeout (ms) |
|
|
237
|
+
| `processMaxSteps` | `number` | 20 | Per-process max tool steps |
|
|
238
|
+
| `contextWindowSize` | `number` | 200_000 | Context window in tokens |
|
|
239
|
+
| `outputReserve` | `number` | 20_000 | Tokens reserved for output |
|
|
240
|
+
| `autoMemory` | `boolean` | true | Auto-detect patterns from episodes |
|
|
241
|
+
| `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
|
|
242
|
+
| `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
|
|
243
|
+
| `longTermStore` | `LongTermStore` | required | Stores long-term memories |
|
|
244
|
+
| `taskId` | `string` | required | Task identifier |
|
|
245
|
+
| `sessionId` | `string` | required | Session identifier |
|
|
246
|
+
| `toolProvider` | `ToolProvider` | required | Tool execution |
|
|
247
|
+
| `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
|
|
248
|
+
| `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
|
|
249
|
+
| `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
|
|
250
|
+
| `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
|
|
251
|
+
| `traceWriter` | `function` | — | Callback for trace event emission |
|
|
252
|
+
|
|
253
|
+
### Resilience
|
|
254
|
+
|
|
255
|
+
```typescript
|
|
256
|
+
import { resilience } from './src/arc/resilience';
|
|
257
|
+
|
|
258
|
+
const pipeline = resilience()
|
|
259
|
+
.retry({ maxRetries: 2, baseDelay: 1000 })
|
|
260
|
+
.timeout({ durationMs: 30_000 })
|
|
261
|
+
.circuitBreaker({ failureThreshold: 5 })
|
|
262
|
+
.build();
|
|
263
|
+
|
|
264
|
+
const agent = await createArcAgent({
|
|
265
|
+
// ...config
|
|
266
|
+
resilience: pipeline,
|
|
267
|
+
});
|
|
268
|
+
```
|
|
200
269
|
|
|
201
|
-
|
|
202
|
-
import { createArcAgent } from './src/arc/create-arc-agent';
|
|
203
|
-
import { InMemoryEpisodeStore } from './src/arc/stores/episode-store';
|
|
204
|
-
import { InMemorySessionMemoStore } from './src/arc/stores/session-memo-store';
|
|
205
|
-
import { InMemoryLongTermStore } from './src/arc/stores/long-term-store';
|
|
270
|
+
### Trace Emission
|
|
206
271
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
taskId: 'task-1',
|
|
213
|
-
sessionId: 'session-1',
|
|
272
|
+
```typescript
|
|
273
|
+
const traces: TraceEvent[] = [];
|
|
274
|
+
const agent = await createArcAgent({
|
|
275
|
+
// ...config
|
|
276
|
+
traceWriter: (event) => traces.push(event),
|
|
214
277
|
});
|
|
215
|
-
|
|
216
|
-
const result = await agent.run('Fix the authentication bug');
|
|
217
278
|
```
|
|
218
279
|
|
|
219
|
-
|
|
220
|
-
- **Parallel threads**: orchestrator calls Thread N times in one turn → all run concurrently
|
|
221
|
-
- **Four-tier memory**: thread context → episodes → session memos → long-term
|
|
222
|
-
- **Per-thread models**: Haiku for reads, Sonnet for implementation
|
|
223
|
-
- **Template compression**: zero-LLM-call episode summaries
|
|
224
|
-
- **Async consolidation**: non-blocking background distillation
|
|
280
|
+
Traces can be validated against the formal model: `cd verify && cargo run -- trace file.ndjson`
|
|
225
281
|
|
|
226
|
-
|
|
282
|
+
---
|
|
227
283
|
|
|
228
|
-
## Package
|
|
284
|
+
## Package Layout
|
|
229
285
|
|
|
230
286
|
```
|
|
231
287
|
src/
|
|
232
288
|
├── agent/ # createAgent, step executor, types
|
|
233
|
-
├── arc/ # ArcLoop orchestrator,
|
|
289
|
+
├── arc/ # ArcLoop orchestrator, processes, memory, resilience
|
|
290
|
+
│ ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
|
|
234
291
|
│ ├── stores/ # RxDB + in-memory store implementations
|
|
235
292
|
│ └── object-store/ # Pluggable cloud sync (fs, memory)
|
|
236
293
|
├── interfaces/ # ToolProvider, SandboxProvider, AgentLoop contracts
|
|
@@ -240,17 +297,20 @@ src/
|
|
|
240
297
|
├── hooks/ # Pre/post tool call hooks
|
|
241
298
|
├── permissions/ # Tool permission checks
|
|
242
299
|
├── sessions/ # Session persistence
|
|
243
|
-
├── subagents/ # Subagent spawning
|
|
300
|
+
├── subagents/ # Subagent spawning
|
|
244
301
|
├── skills/ # Skill index, routing, and management
|
|
245
302
|
├── optimization/ # Benchmark runner
|
|
246
303
|
└── observability/ # OpenTelemetry integration
|
|
304
|
+
|
|
305
|
+
verify/ # Rust formal verification (Stateright model checker)
|
|
306
|
+
testing/ # Adversarial scenario replay harness
|
|
307
|
+
tests/ # Vitest test suite
|
|
247
308
|
```
|
|
248
309
|
|
|
249
310
|
## Documentation
|
|
250
311
|
|
|
251
|
-
-
|
|
252
|
-
-
|
|
253
|
-
-
|
|
254
|
-
-
|
|
255
|
-
-
|
|
256
|
-
- Full example: [`../examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts)
|
|
312
|
+
- [Arc architecture](../docs/arc.md) — process model, context window, memory, resilience, verification
|
|
313
|
+
- [Testing](../docs/testing.md) — test layers, running tests, writing new tests
|
|
314
|
+
- [Sandbox setup](../docs/PUBLIC_SANDBOX.md) — deploying the sandbox service
|
|
315
|
+
- [Release process](../docs/RELEASE.md) — versioning and publishing
|
|
316
|
+
- [Example](../examples/chat-assistant/src/chat.ts) — complete working chat assistant
|
package/package.json
CHANGED
|
@@ -7,12 +7,14 @@ import type { HarnessTelemetry } from '../observability/otel';
|
|
|
7
7
|
import { HookRunner } from '../hooks/hook-runner';
|
|
8
8
|
import { PermissionManager } from '../permissions/permission-manager';
|
|
9
9
|
import { VercelAgentLoop } from '../loop/vercel-agent-loop';
|
|
10
|
+
export type { SystemPromptBlock, VercelAgentLoopConfig } from '../loop/vercel-agent-loop';
|
|
11
|
+
export type { PrepareStepContext, PrepareStepResult } from './types';
|
|
10
12
|
import { SkillManager } from '../skills/skill-manager';
|
|
11
13
|
import { SkillRouter } from '../skills/skill-router';
|
|
12
14
|
import type { SkillSummary } from '../skills/skill-types';
|
|
13
15
|
import { SingleFlightStepExecutor } from './step-executor';
|
|
14
|
-
import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
|
|
15
|
-
export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
|
|
16
|
+
import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, StepUsage, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
|
|
17
|
+
export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, StepUsage, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
|
|
16
18
|
export { HookRunner } from '../hooks/hook-runner';
|
|
17
19
|
export { PermissionManager } from '../permissions/permission-manager';
|
|
18
20
|
export type { PermissionMode, PermissionResolver, PermissionRequest } from '../permissions/permission-manager';
|
|
@@ -37,6 +39,8 @@ export interface AgentRuntime {
|
|
|
37
39
|
/** Custom tool executor. Called for every tool action. Return null to fall through to built-in dispatch.
|
|
38
40
|
* When hookRunner/permissionManager are provided on the runtime, they are automatically applied before/after this callback — no manual wiring needed. */
|
|
39
41
|
executeToolAction?: (action: ToolCallAction) => Promise<ToolResult | null>;
|
|
42
|
+
/** Progress callback fired before/after each tool call during run(). */
|
|
43
|
+
onToolProgress?: (event: { type: 'tool_start'; name: string; args: Record<string, unknown> } | { type: 'tool_end'; name: string; success: boolean; durationMs: number }) => void;
|
|
40
44
|
}
|
|
41
45
|
|
|
42
46
|
/**
|
|
@@ -218,9 +222,21 @@ function toStreamResult(r: ToolResult): { success: boolean; output: string; erro
|
|
|
218
222
|
return base;
|
|
219
223
|
}
|
|
220
224
|
|
|
221
|
-
/**
|
|
225
|
+
/** Build the text the LLM sees for a tool result.
|
|
226
|
+
* Success: prefer modelOutput (compact) over raw output.
|
|
227
|
+
* Failure: prefer modelOutput (structured fix guidance) → error → output → generic fallback.
|
|
228
|
+
* This ensures custom tools can feed actionable error feedback to the model via modelOutput
|
|
229
|
+
* so the agent can self-correct instead of stopping with "unknown failure". */
|
|
230
|
+
function resultTextForLLM(result: ToolResult): string {
|
|
231
|
+
if (result.success) return result.modelOutput ?? result.output;
|
|
232
|
+
return result.modelOutput ?? result.error ?? result.output ?? 'unknown failure';
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/** Format content string for LLM context. Uses modelOutput (compact summary) when available. */
|
|
222
236
|
function formatToolResultContent(call: ToolCallAction, result: ToolResult): string {
|
|
223
|
-
const content = result.success
|
|
237
|
+
const content = result.success
|
|
238
|
+
? resultTextForLLM(result)
|
|
239
|
+
: `ERROR: ${resultTextForLLM(result)}`;
|
|
224
240
|
switch (call.name) {
|
|
225
241
|
case 'Write':
|
|
226
242
|
return `Write(${call.args.path}): ${result.success ? 'ok' : content}`;
|
|
@@ -515,6 +531,11 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
515
531
|
? { nextAction: runtime.nextAction }
|
|
516
532
|
: new VercelAgentLoop());
|
|
517
533
|
|
|
534
|
+
/** Read lastUsage from the loop if it's a VercelAgentLoop. */
|
|
535
|
+
function getLoopUsage(): StepUsage | undefined {
|
|
536
|
+
return loop instanceof VercelAgentLoop ? loop.lastUsage : undefined;
|
|
537
|
+
}
|
|
538
|
+
|
|
518
539
|
async function resolveSkillContext(prompt: string): Promise<string> {
|
|
519
540
|
if (!skillManager || !skillIndexPath) return '';
|
|
520
541
|
|
|
@@ -596,14 +617,18 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
596
617
|
|
|
597
618
|
// Execute valid calls via batch (sequential sandbox ops) or parallel fallback
|
|
598
619
|
if (validCalls.length > 0) {
|
|
620
|
+
for (const c of validCalls) runtime.onToolProgress?.({ type: 'tool_start', name: c.name, args: c.args });
|
|
621
|
+
const batchStart = Date.now();
|
|
599
622
|
const results = await executeBatch(validCalls, runtime.toolProvider, runtime);
|
|
623
|
+
const batchMs = Date.now() - batchStart;
|
|
600
624
|
for (let i = 0; i < validCalls.length; i++) {
|
|
601
625
|
const call = validCalls[i]!;
|
|
602
626
|
const r = results[i]!;
|
|
627
|
+
runtime.onToolProgress?.({ type: 'tool_end', name: call.name, success: r.success, durationMs: batchMs });
|
|
603
628
|
if (!r.success) {
|
|
604
629
|
recordAgentError(runtime.telemetry);
|
|
605
630
|
}
|
|
606
|
-
const resultText = r.success ? r
|
|
631
|
+
const resultText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
|
|
607
632
|
messages.push({
|
|
608
633
|
role: 'tool',
|
|
609
634
|
content: formatToolResultContent(call, r),
|
|
@@ -659,6 +684,8 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
659
684
|
} else {
|
|
660
685
|
consecutiveInvalid = 0;
|
|
661
686
|
}
|
|
687
|
+
runtime.onToolProgress?.({ type: 'tool_start', name: action.name, args: action.args });
|
|
688
|
+
const singleStart = Date.now();
|
|
662
689
|
const result = validationError
|
|
663
690
|
? ({ success: false, output: '', error: validationError } as ToolResult)
|
|
664
691
|
: await executor.run(async () => {
|
|
@@ -672,10 +699,11 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
672
699
|
};
|
|
673
700
|
}
|
|
674
701
|
});
|
|
702
|
+
runtime.onToolProgress?.({ type: 'tool_end', name: action.name, success: result.success, durationMs: Date.now() - singleStart });
|
|
675
703
|
if (!result.success) {
|
|
676
704
|
recordAgentError(runtime.telemetry);
|
|
677
705
|
}
|
|
678
|
-
const singleResultText = result.success ? result
|
|
706
|
+
const singleResultText = result.success ? resultTextForLLM(result) : `ERROR: ${resultTextForLLM(result)}`;
|
|
679
707
|
messages.push({
|
|
680
708
|
role: 'tool',
|
|
681
709
|
content: formatToolResultContent(action, result),
|
|
@@ -718,8 +746,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
718
746
|
if (event.type === 'text_delta') {
|
|
719
747
|
finalText += event.text;
|
|
720
748
|
yield event;
|
|
721
|
-
}
|
|
722
|
-
if (event.type === 'tool_start') {
|
|
749
|
+
} else if (event.type === 'tool_start') {
|
|
723
750
|
pendingTools.push({
|
|
724
751
|
type: 'tool',
|
|
725
752
|
name: event.name,
|
|
@@ -727,13 +754,18 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
727
754
|
...(event.toolCallId != null ? { toolCallId: event.toolCallId } : {}),
|
|
728
755
|
});
|
|
729
756
|
yield event;
|
|
757
|
+
} else {
|
|
758
|
+
// Forward all other events (tool_end, step_start, step_end, done)
|
|
759
|
+
// from self-managing loops like ArcLoop
|
|
760
|
+
yield event;
|
|
761
|
+
if (event.type === 'done') return;
|
|
730
762
|
}
|
|
731
763
|
}
|
|
732
764
|
|
|
733
765
|
// If no tools → final response
|
|
734
766
|
if (pendingTools.length === 0) {
|
|
735
767
|
messages.push({ role: 'assistant', content: finalText });
|
|
736
|
-
yield { type: 'step_end', step };
|
|
768
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
737
769
|
yield { type: 'done', output: finalText, steps: step };
|
|
738
770
|
return;
|
|
739
771
|
}
|
|
@@ -759,7 +791,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
759
791
|
if (action.type === 'final') {
|
|
760
792
|
yield { type: 'text_delta', text: action.content };
|
|
761
793
|
messages.push({ role: 'assistant', content: action.content });
|
|
762
|
-
yield { type: 'step_end', step };
|
|
794
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
763
795
|
yield { type: 'done', output: action.content, steps: step };
|
|
764
796
|
return;
|
|
765
797
|
}
|
|
@@ -771,7 +803,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
771
803
|
try {
|
|
772
804
|
const r = await executeTool(runtime.toolProvider, call, runtime);
|
|
773
805
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
774
|
-
const rText = r.success ? r
|
|
806
|
+
const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
|
|
775
807
|
messages.push({
|
|
776
808
|
role: 'tool',
|
|
777
809
|
content: formatToolResultContent(call, r),
|
|
@@ -793,7 +825,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
793
825
|
try {
|
|
794
826
|
const r = await executeTool(runtime.toolProvider, action, runtime);
|
|
795
827
|
yield { type: 'tool_end', name: action.name, result: toStreamResult(r) };
|
|
796
|
-
const rText = r.success ? r
|
|
828
|
+
const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
|
|
797
829
|
messages.push({
|
|
798
830
|
role: 'tool',
|
|
799
831
|
content: formatToolResultContent(action, r),
|
|
@@ -809,7 +841,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
809
841
|
});
|
|
810
842
|
}
|
|
811
843
|
}
|
|
812
|
-
yield { type: 'step_end', step };
|
|
844
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
813
845
|
continue;
|
|
814
846
|
}
|
|
815
847
|
|
|
@@ -819,7 +851,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
819
851
|
const call = pendingTools[i]!;
|
|
820
852
|
const r = results[i]!;
|
|
821
853
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
822
|
-
const rText = r.success ? r
|
|
854
|
+
const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
|
|
823
855
|
messages.push({
|
|
824
856
|
role: 'tool',
|
|
825
857
|
content: formatToolResultContent(call, r),
|
|
@@ -837,7 +869,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
837
869
|
|
|
838
870
|
if (action.type === 'final') {
|
|
839
871
|
messages.push({ role: 'assistant', content: action.content });
|
|
840
|
-
yield { type: 'step_end', step };
|
|
872
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
841
873
|
yield { type: 'done', output: action.content, steps: step };
|
|
842
874
|
return;
|
|
843
875
|
}
|
|
@@ -861,7 +893,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
861
893
|
const call = calls[i]!;
|
|
862
894
|
const r = results[i]!;
|
|
863
895
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
864
|
-
const rText = r.success ? r
|
|
896
|
+
const rText = r.success ? resultTextForLLM(r) : `ERROR: ${resultTextForLLM(r)}`;
|
|
865
897
|
messages.push({
|
|
866
898
|
role: 'tool',
|
|
867
899
|
content: formatToolResultContent(call, r),
|
|
@@ -875,7 +907,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
875
907
|
}
|
|
876
908
|
}
|
|
877
909
|
|
|
878
|
-
yield { type: 'step_end', step };
|
|
910
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
879
911
|
}
|
|
880
912
|
|
|
881
913
|
yield { type: 'done', output: 'ERROR: max steps exceeded', steps: maxSteps };
|