@bluecopa/harness 0.1.0-snapshot.59 → 0.1.0-snapshot.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +117 -212
- package/package.json +1 -2
- package/src/agent/create-agent.ts +17 -49
- package/src/agent/types.ts +2 -23
- package/src/interfaces/tool-provider.ts +0 -2
- package/src/loop/vercel-agent-loop.ts +18 -115
- package/src/skills/skill-router.ts +6 -12
- package/tests/integration/agent-skill-default-from-sandbox.spec.ts +2 -3
- package/tests/unit/structured-messages.spec.ts +1 -1
- package/vitest.config.ts +1 -1
- package/src/arc/agent-runner.ts +0 -697
- package/src/arc/arc-loop.ts +0 -818
- package/src/arc/arc-types.ts +0 -115
- package/src/arc/bridge-tools.ts +0 -170
- package/src/arc/bridged-tool-provider.ts +0 -80
- package/src/arc/consolidation.ts +0 -118
- package/src/arc/context-window.ts +0 -267
- package/src/arc/create-arc-agent.ts +0 -99
- package/src/arc/debug.ts +0 -62
- package/src/arc/episode-compressor.ts +0 -225
- package/src/arc/memory-manager.ts +0 -245
- package/src/arc/message-convert.ts +0 -111
- package/src/arc/object-store/fs-object-store.ts +0 -60
- package/src/arc/object-store/memory-object-store.ts +0 -41
- package/src/arc/object-store/object-store.ts +0 -12
- package/src/arc/profile-builder.ts +0 -157
- package/src/arc/resilience/bulkhead.ts +0 -110
- package/src/arc/resilience/circuit-breaker.ts +0 -112
- package/src/arc/resilience/fallback.ts +0 -27
- package/src/arc/resilience/index.ts +0 -21
- package/src/arc/resilience/pipeline.ts +0 -103
- package/src/arc/resilience/retry.ts +0 -90
- package/src/arc/resilience/timeout.ts +0 -60
- package/src/arc/resilience/types.ts +0 -71
- package/src/arc/sig.ts +0 -115
- package/src/arc/skill-resolver.ts +0 -78
- package/src/arc/stores/episode-store.ts +0 -120
- package/src/arc/stores/long-term-store.ts +0 -86
- package/src/arc/stores/rxdb-setup.ts +0 -113
- package/src/arc/stores/session-memo-store.ts +0 -58
- package/src/arc/tools.ts +0 -67
- package/src/arc/types.ts +0 -333
- package/src/arc/utils.ts +0 -37
- package/testing/index.ts +0 -22
- package/testing/scenario-replay.ts +0 -209
- package/testing/scenario-types.ts +0 -38
- package/testing/scripted-llm.ts +0 -230
- package/tests/arc/channel.test.ts +0 -170
- package/tests/arc/context-window.test.ts +0 -396
- package/tests/arc/e2e.test.ts +0 -353
- package/tests/arc/error-paths.test.ts +0 -402
- package/tests/arc/live-integration.test.ts +0 -357
- package/tests/arc/memory-manager.test.ts +0 -384
- package/tests/arc/process-interleaving.test.ts +0 -432
- package/tests/arc/process-profiles.test.ts +0 -364
- package/tests/arc/resilience-integration.test.ts +0 -381
- package/tests/arc/resilience.test.ts +0 -575
- package/tests/arc/scenario-driven.test.ts +0 -297
- package/tests/arc/tool-dispatch.test.ts +0 -340
- package/tests/arc/wasm-pbt.test.ts +0 -104
- package/verify/Cargo.lock +0 -637
- package/verify/Cargo.toml +0 -24
- package/verify/src/lib.rs +0 -5
- package/verify/src/main.rs +0 -165
- package/verify/src/model/context.rs +0 -100
- package/verify/src/model/mod.rs +0 -6
- package/verify/src/model/orchestrator.rs +0 -371
- package/verify/src/model/process.rs +0 -140
- package/verify/src/model/types.rs +0 -273
- package/verify/src/properties/liveness.rs +0 -32
- package/verify/src/properties/mod.rs +0 -4
- package/verify/src/properties/safety.rs +0 -78
- package/verify/src/trace/event.rs +0 -155
- package/verify/src/trace/mod.rs +0 -2
- package/verify/src/trace/validator.rs +0 -367
- package/verify/src/wasm/mod.rs +0 -3
- package/verify/src/wasm/scenario_generator.rs +0 -400
- package/verify/src/wasm/types.rs +0 -104
- package/verify/src/wasm/wasm_validator.rs +0 -107
- package/verify/tests/model_check.rs +0 -49
- package/verify/tests/trace_validation.rs +0 -147
package/README.md
CHANGED
|
@@ -2,17 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
The harness provides the core loop that drives an AI agent: send messages to an LLM, execute the tool calls it returns, feed results back, and repeat until the LLM produces a final text response.
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
## Install
|
|
10
|
-
|
|
11
|
-
```bash
|
|
12
|
-
pnpm add @bluecopa/harness
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
## Development
|
|
7
|
+
## Quickstart
|
|
16
8
|
|
|
17
9
|
```bash
|
|
18
10
|
pnpm install
|
|
@@ -21,11 +13,9 @@ pnpm test
|
|
|
21
13
|
|
|
22
14
|
## Architecture
|
|
23
15
|
|
|
24
|
-
### Single-Agent Loop
|
|
25
|
-
|
|
26
16
|
```
|
|
27
17
|
┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
|
|
28
|
-
│ createAgent
|
|
18
|
+
│ createAgent │────▶│ AgentLoop │────▶│ LLM (Claude) │
|
|
29
19
|
│ (turn loop) │ │ (nextAction)│ │ │
|
|
30
20
|
└──────┬───────┘ └──────────────┘ └──────────────────┘
|
|
31
21
|
│ │
|
|
@@ -37,82 +27,20 @@ pnpm test
|
|
|
37
27
|
└──────────────┘
|
|
38
28
|
```
|
|
39
29
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
│ tools: Thread, Check, Cancel, Remember, ReadEpisode
|
|
45
|
-
│
|
|
46
|
-
│ Turn 1 (parallel):
|
|
47
|
-
├──► Process 0 ("read auth", model=fast) ─┐
|
|
48
|
-
├──► Process 1 ("read routes", model=fast) ─┼──► Episodes
|
|
49
|
-
├──► Process 2 ("read tests", model=fast) ─┘
|
|
50
|
-
│
|
|
51
|
-
│ Turn 2 (dispatch dependent work):
|
|
52
|
-
├──► Thread("fix bug", context=[ep0,ep1,ep2]) ──► Episode
|
|
53
|
-
│
|
|
54
|
-
│ Turn 3 (parallel):
|
|
55
|
-
├──► Thread("run tests", context=[ep3]) ─┐
|
|
56
|
-
├──► Thread("update docs", context=[ep3]) ─┘
|
|
57
|
-
│
|
|
58
|
-
└──► Final text response
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
Full architecture doc: [`docs/arc.md`](../docs/arc.md)
|
|
62
|
-
|
|
63
|
-
---
|
|
64
|
-
|
|
65
|
-
## ToolProvider
|
|
66
|
-
|
|
67
|
-
The contract for tool execution. All agent modes use this interface.
|
|
68
|
-
|
|
69
|
-
```typescript
|
|
70
|
-
interface ToolProvider {
|
|
71
|
-
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
72
|
-
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
73
|
-
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
74
|
-
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
75
|
-
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
76
|
-
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
77
|
-
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
78
|
-
webSearch?(query: string): Promise<ToolResult>;
|
|
79
|
-
capabilities(): ToolProviderCapabilities;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
interface ToolResult {
|
|
83
|
-
success: boolean;
|
|
84
|
-
output: string;
|
|
85
|
-
error?: string;
|
|
86
|
-
}
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
Built-in implementations:
|
|
30
|
+
1. `createAgent` drives a deterministic step loop
|
|
31
|
+
2. Each step calls `loop.nextAction(messages)` to get the LLM's decision
|
|
32
|
+
3. If it's a tool call, the harness executes it via `ToolProvider` and appends the result
|
|
33
|
+
4. If it's a final action, the loop ends and returns the result
|
|
90
34
|
|
|
91
|
-
|
|
92
|
-
|----------|-------------|
|
|
93
|
-
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
94
|
-
| `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
|
|
95
|
-
| `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
|
|
35
|
+
## Using with the sandbox
|
|
96
36
|
|
|
97
|
-
|
|
37
|
+
The most common setup connects the harness to a running sandbox service via `ControlPlaneE2BExecutor`:
|
|
98
38
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
```typescript
|
|
102
|
-
interface SandboxProvider {
|
|
103
|
-
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
104
|
-
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
105
|
-
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
106
|
-
}
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
Used by `SkillManager` for executing skill scripts in isolated VMs.
|
|
110
|
-
|
|
111
|
-
## Connecting to a Sandbox
|
|
112
|
-
|
|
113
|
-
```typescript
|
|
114
|
-
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
39
|
+
```ts
|
|
40
|
+
import { createAgent } from './src/agent/create-agent';
|
|
115
41
|
import { E2BToolProvider } from './src/providers/e2b-tool-provider';
|
|
42
|
+
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
43
|
+
import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
|
|
116
44
|
|
|
117
45
|
// Connect to sandbox service
|
|
118
46
|
const executor = new ControlPlaneE2BExecutor({
|
|
@@ -122,174 +50,155 @@ const executor = new ControlPlaneE2BExecutor({
|
|
|
122
50
|
});
|
|
123
51
|
await executor.initialize(); // creates a Firecracker VM
|
|
124
52
|
|
|
125
|
-
|
|
53
|
+
// Build and run the agent
|
|
54
|
+
const agent = createAgent({
|
|
55
|
+
toolProvider: new E2BToolProvider(executor),
|
|
56
|
+
loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
|
|
57
|
+
});
|
|
126
58
|
|
|
127
|
-
|
|
59
|
+
const result = await agent.run('create a bar chart of sales data');
|
|
60
|
+
console.log(result.output); // LLM's final response
|
|
61
|
+
console.log(result.steps); // number of tool steps
|
|
128
62
|
|
|
129
|
-
await executor.destroy();
|
|
63
|
+
await executor.destroy(); // tears down the VM
|
|
130
64
|
```
|
|
131
65
|
|
|
132
|
-
|
|
66
|
+
For a complete working example, see [`examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts).
|
|
67
|
+
|
|
68
|
+
### From environment variables
|
|
133
69
|
|
|
134
|
-
|
|
70
|
+
`ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY` automatically:
|
|
135
71
|
|
|
136
|
-
|
|
72
|
+
```ts
|
|
73
|
+
const executor = ControlPlaneE2BExecutor.fromEnv();
|
|
74
|
+
```
|
|
137
75
|
|
|
138
|
-
|
|
76
|
+
## Using locally (no sandbox)
|
|
139
77
|
|
|
140
|
-
|
|
78
|
+
For development without a sandbox service, use `LocalToolProvider` which runs tools on the local machine:
|
|
79
|
+
|
|
80
|
+
```ts
|
|
141
81
|
import { createAgent } from './src/agent/create-agent';
|
|
142
82
|
import { LocalToolProvider } from './src/providers/local-tool-provider';
|
|
143
83
|
|
|
144
84
|
const agent = createAgent({
|
|
145
85
|
toolProvider: new LocalToolProvider(process.cwd()),
|
|
146
|
-
loop: new VercelAgentLoop(),
|
|
86
|
+
loop: new VercelAgentLoop(),
|
|
147
87
|
});
|
|
148
88
|
|
|
149
89
|
const result = await agent.run('list all TypeScript files');
|
|
150
|
-
console.log(result.output);
|
|
151
90
|
```
|
|
152
91
|
|
|
153
|
-
|
|
92
|
+
## Key modules
|
|
93
|
+
|
|
94
|
+
### Agent creation (`src/agent/create-agent.ts`)
|
|
154
95
|
|
|
155
|
-
|
|
156
|
-
|--------|------|---------|-------------|
|
|
157
|
-
| `toolProvider` | `ToolProvider` | required | Executes tool calls |
|
|
158
|
-
| `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
|
|
159
|
-
| `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
|
|
160
|
-
| `maxSteps` | `number` | 30 | Max tool steps per run |
|
|
161
|
-
| `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
|
|
162
|
-
| `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
|
|
96
|
+
`createAgent(options)` returns an agent with a `.run(prompt, options?)` method. Options:
|
|
163
97
|
|
|
164
|
-
|
|
98
|
+
| Option | Type | Description |
|
|
99
|
+
|--------|------|-------------|
|
|
100
|
+
| `toolProvider` | `ToolProvider` | Required. Executes tool calls |
|
|
101
|
+
| `loop` | `AgentLoop` | LLM decision loop (default: `VercelAgentLoop`) |
|
|
102
|
+
| `sandboxProvider` | `SandboxProvider` | Optional. Higher-level sandbox ops (file download, exec with env) |
|
|
103
|
+
| `maxSteps` | `number` | Max tool steps per run (default: 30) |
|
|
104
|
+
| `telemetry` | `HarnessTelemetry` | Optional. OpenTelemetry-style tracing |
|
|
105
|
+
| `skillIndexPath` | `string` | Optional. Path to skill index JSON |
|
|
165
106
|
|
|
166
|
-
|
|
107
|
+
### Agent loop (`src/loop/vercel-agent-loop.ts`)
|
|
167
108
|
|
|
168
|
-
|
|
109
|
+
`VercelAgentLoop` calls Claude via the Vercel AI SDK. It supports:
|
|
110
|
+
- Parallel tool calls (returns `ToolBatchAction` when the LLM requests multiple tools at once)
|
|
111
|
+
- Configurable system prompt
|
|
112
|
+
- Model selection via `HARNESS_MODEL` env var (default: `claude-sonnet-4-5`)
|
|
113
|
+
|
|
114
|
+
```ts
|
|
169
115
|
const loop = new VercelAgentLoop({
|
|
170
116
|
systemPrompt: 'You are a helpful coding assistant.',
|
|
171
|
-
model: 'claude-sonnet-4-5', // or HARNESS_MODEL env var
|
|
172
117
|
});
|
|
173
118
|
```
|
|
174
119
|
|
|
175
|
-
###
|
|
120
|
+
### Tool provider (`src/interfaces/tool-provider.ts`)
|
|
176
121
|
|
|
177
|
-
|
|
122
|
+
The contract for tool execution:
|
|
178
123
|
|
|
179
|
-
```
|
|
180
|
-
|
|
181
|
-
|
|
124
|
+
```ts
|
|
125
|
+
interface ToolProvider {
|
|
126
|
+
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
127
|
+
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
128
|
+
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
129
|
+
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
130
|
+
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
131
|
+
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
132
|
+
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
133
|
+
webSearch?(query: string): Promise<ToolResult>;
|
|
134
|
+
capabilities(): ToolProviderCapabilities;
|
|
135
|
+
}
|
|
182
136
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
onActivity: (entry) => console.log(entry),
|
|
189
|
-
onLlmRequest: async (prompt) => callLLM(prompt),
|
|
190
|
-
onWebFetchRequest: async (url) => fetch(url),
|
|
191
|
-
});
|
|
137
|
+
interface ToolResult {
|
|
138
|
+
success: boolean;
|
|
139
|
+
output: string;
|
|
140
|
+
error?: string;
|
|
141
|
+
}
|
|
192
142
|
```
|
|
193
143
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
**REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
|
|
197
|
-
|
|
198
|
-
---
|
|
144
|
+
Built-in implementations:
|
|
199
145
|
|
|
200
|
-
|
|
146
|
+
| Provider | Description |
|
|
147
|
+
|----------|-------------|
|
|
148
|
+
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
149
|
+
| `E2BToolProvider` | Routes tools to an E2B-compatible executor over HTTP |
|
|
150
|
+
| `CompositeToolProvider` | Combines multiple providers (e.g. sandbox + web) |
|
|
201
151
|
|
|
202
|
-
|
|
152
|
+
### Action types (`src/agent/types.ts`)
|
|
203
153
|
|
|
204
|
-
|
|
205
|
-
import { createArcAgent } from './src/arc/create-arc-agent';
|
|
154
|
+
The LLM returns one of these action types each turn:
|
|
206
155
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
});
|
|
156
|
+
```ts
|
|
157
|
+
// Single tool call
|
|
158
|
+
interface ToolCallAction {
|
|
159
|
+
type: 'tool';
|
|
160
|
+
name: 'Bash' | 'Read' | 'Write' | 'Edit' | 'Glob' | 'Grep' | ...;
|
|
161
|
+
args: Record<string, unknown>;
|
|
162
|
+
}
|
|
215
163
|
|
|
216
|
-
//
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
|
|
164
|
+
// Multiple independent tool calls (executed in parallel)
|
|
165
|
+
interface ToolBatchAction {
|
|
166
|
+
type: 'tool_batch';
|
|
167
|
+
calls: ToolCallAction[];
|
|
221
168
|
}
|
|
222
169
|
|
|
223
|
-
//
|
|
224
|
-
|
|
170
|
+
// Final text response (ends the loop)
|
|
171
|
+
interface FinalAction {
|
|
172
|
+
type: 'final';
|
|
173
|
+
content: string;
|
|
174
|
+
}
|
|
225
175
|
```
|
|
226
176
|
|
|
227
|
-
###
|
|
228
|
-
|
|
229
|
-
| Option | Type | Default | Description |
|
|
230
|
-
|--------|------|---------|-------------|
|
|
231
|
-
| `model` | `string` | `'claude-opus-4-6'` | Orchestrator model (ID or tier name) |
|
|
232
|
-
| `modelMap` | `Record<ModelTier, string>` | haiku/sonnet/opus | Maps fast/medium/strong to model IDs |
|
|
233
|
-
| `apiKey` | `string` | — | Anthropic API key |
|
|
234
|
-
| `systemPrompt` | `string` | built-in | Custom orchestrator system prompt |
|
|
235
|
-
| `maxTurns` | `number` | 30 | Max orchestrator turns |
|
|
236
|
-
| `processTimeout` | `number` | 120_000 | Per-process timeout (ms) |
|
|
237
|
-
| `processMaxSteps` | `number` | 20 | Per-process max tool steps |
|
|
238
|
-
| `contextWindowSize` | `number` | 200_000 | Context window in tokens |
|
|
239
|
-
| `outputReserve` | `number` | 20_000 | Tokens reserved for output |
|
|
240
|
-
| `autoMemory` | `boolean` | true | Auto-detect patterns from episodes |
|
|
241
|
-
| `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
|
|
242
|
-
| `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
|
|
243
|
-
| `longTermStore` | `LongTermStore` | required | Stores long-term memories |
|
|
244
|
-
| `taskId` | `string` | required | Task identifier |
|
|
245
|
-
| `sessionId` | `string` | required | Session identifier |
|
|
246
|
-
| `toolProvider` | `ToolProvider` | required | Tool execution |
|
|
247
|
-
| `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
|
|
248
|
-
| `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
|
|
249
|
-
| `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
|
|
250
|
-
| `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
|
|
251
|
-
| `traceWriter` | `function` | — | Callback for trace event emission |
|
|
252
|
-
|
|
253
|
-
### Resilience
|
|
254
|
-
|
|
255
|
-
```typescript
|
|
256
|
-
import { resilience } from './src/arc/resilience';
|
|
257
|
-
|
|
258
|
-
const pipeline = resilience()
|
|
259
|
-
.retry({ maxRetries: 2, baseDelay: 1000 })
|
|
260
|
-
.timeout({ durationMs: 30_000 })
|
|
261
|
-
.circuitBreaker({ failureThreshold: 5 })
|
|
262
|
-
.build();
|
|
263
|
-
|
|
264
|
-
const agent = await createArcAgent({
|
|
265
|
-
// ...config
|
|
266
|
-
resilience: pipeline,
|
|
267
|
-
});
|
|
268
|
-
```
|
|
177
|
+
### LCM tool loop (`src/loop/lcm-tool-loop.ts`)
|
|
269
178
|
|
|
270
|
-
|
|
179
|
+
`LCMToolLoop` wraps another loop to add LCM-based tool routing, REPL script execution, and bridge-based tool dispatch. Used in the chat-assistant example.
|
|
271
180
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
181
|
+
### Sandbox provider (`src/interfaces/sandbox-provider.ts`)
|
|
182
|
+
|
|
183
|
+
Higher-level sandbox operations beyond basic tool calls:
|
|
184
|
+
|
|
185
|
+
```ts
|
|
186
|
+
interface SandboxProvider {
|
|
187
|
+
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
188
|
+
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
189
|
+
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
190
|
+
}
|
|
278
191
|
```
|
|
279
192
|
|
|
280
|
-
|
|
193
|
+
### Observability (`src/observability/otel.ts`)
|
|
281
194
|
|
|
282
|
-
|
|
195
|
+
`HarnessTelemetry` provides OpenTelemetry-style spans and metrics for agent runs.
|
|
283
196
|
|
|
284
|
-
## Package
|
|
197
|
+
## Package layout
|
|
285
198
|
|
|
286
199
|
```
|
|
287
200
|
src/
|
|
288
201
|
├── agent/ # createAgent, step executor, types
|
|
289
|
-
├── arc/ # ArcLoop orchestrator, processes, memory, resilience
|
|
290
|
-
│ ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
|
|
291
|
-
│ ├── stores/ # RxDB + in-memory store implementations
|
|
292
|
-
│ └── object-store/ # Pluggable cloud sync (fs, memory)
|
|
293
202
|
├── interfaces/ # ToolProvider, SandboxProvider, AgentLoop contracts
|
|
294
203
|
├── loop/ # VercelAgentLoop, LCMToolLoop
|
|
295
204
|
├── providers/ # LocalToolProvider, E2BToolProvider, ControlPlaneE2BExecutor
|
|
@@ -297,20 +206,16 @@ src/
|
|
|
297
206
|
├── hooks/ # Pre/post tool call hooks
|
|
298
207
|
├── permissions/ # Tool permission checks
|
|
299
208
|
├── sessions/ # Session persistence
|
|
300
|
-
├── subagents/ # Subagent spawning
|
|
209
|
+
├── subagents/ # Subagent spawning and task tools
|
|
301
210
|
├── skills/ # Skill index, routing, and management
|
|
302
211
|
├── optimization/ # Benchmark runner
|
|
303
212
|
└── observability/ # OpenTelemetry integration
|
|
304
|
-
|
|
305
|
-
verify/ # Rust formal verification (Stateright model checker)
|
|
306
|
-
testing/ # Adversarial scenario replay harness
|
|
307
|
-
tests/ # Vitest test suite
|
|
308
213
|
```
|
|
309
214
|
|
|
310
215
|
## Documentation
|
|
311
216
|
|
|
312
|
-
-
|
|
313
|
-
-
|
|
314
|
-
-
|
|
315
|
-
-
|
|
316
|
-
- [
|
|
217
|
+
- Provider guide: `docs/guides/providers.md`
|
|
218
|
+
- Skills guide: `docs/guides/skills.md`
|
|
219
|
+
- Observability guide: `docs/guides/observability.md`
|
|
220
|
+
- Release process: `../docs/RELEASE.md`
|
|
221
|
+
- Full example: [`../examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bluecopa/harness",
|
|
3
|
-
"version": "0.1.0-snapshot.
|
|
3
|
+
"version": "0.1.0-snapshot.6",
|
|
4
4
|
"description": "Provider-agnostic TypeScript agent framework",
|
|
5
5
|
"license": "UNLICENSED",
|
|
6
6
|
"scripts": {
|
|
@@ -10,7 +10,6 @@
|
|
|
10
10
|
"dependencies": {
|
|
11
11
|
"@ai-sdk/anthropic": "^3.0.48",
|
|
12
12
|
"ai": "^6.0.101",
|
|
13
|
-
"rxdb": "^15.39.0",
|
|
14
13
|
"zod": "^4.1.11"
|
|
15
14
|
},
|
|
16
15
|
"devDependencies": {
|
|
@@ -7,14 +7,12 @@ import type { HarnessTelemetry } from '../observability/otel';
|
|
|
7
7
|
import { HookRunner } from '../hooks/hook-runner';
|
|
8
8
|
import { PermissionManager } from '../permissions/permission-manager';
|
|
9
9
|
import { VercelAgentLoop } from '../loop/vercel-agent-loop';
|
|
10
|
-
export type { SystemPromptBlock, VercelAgentLoopConfig } from '../loop/vercel-agent-loop';
|
|
11
|
-
export type { PrepareStepContext, PrepareStepResult } from './types';
|
|
12
10
|
import { SkillManager } from '../skills/skill-manager';
|
|
13
11
|
import { SkillRouter } from '../skills/skill-router';
|
|
14
12
|
import type { SkillSummary } from '../skills/skill-types';
|
|
15
13
|
import { SingleFlightStepExecutor } from './step-executor';
|
|
16
|
-
import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent,
|
|
17
|
-
export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent,
|
|
14
|
+
import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
|
|
15
|
+
export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
|
|
18
16
|
export { HookRunner } from '../hooks/hook-runner';
|
|
19
17
|
export { PermissionManager } from '../permissions/permission-manager';
|
|
20
18
|
export type { PermissionMode, PermissionResolver, PermissionRequest } from '../permissions/permission-manager';
|
|
@@ -39,8 +37,6 @@ export interface AgentRuntime {
|
|
|
39
37
|
/** Custom tool executor. Called for every tool action. Return null to fall through to built-in dispatch.
|
|
40
38
|
* When hookRunner/permissionManager are provided on the runtime, they are automatically applied before/after this callback — no manual wiring needed. */
|
|
41
39
|
executeToolAction?: (action: ToolCallAction) => Promise<ToolResult | null>;
|
|
42
|
-
/** Progress callback fired before/after each tool call during run(). */
|
|
43
|
-
onToolProgress?: (event: { type: 'tool_start'; name: string; args: Record<string, unknown> } | { type: 'tool_end'; name: string; success: boolean; durationMs: number }) => void;
|
|
44
40
|
}
|
|
45
41
|
|
|
46
42
|
/**
|
|
@@ -222,21 +218,9 @@ function toStreamResult(r: ToolResult): { success: boolean; output: string; erro
|
|
|
222
218
|
return base;
|
|
223
219
|
}
|
|
224
220
|
|
|
225
|
-
/**
|
|
226
|
-
* Success: prefer modelOutput (compact) over raw output.
|
|
227
|
-
* Failure: prefer modelOutput (structured fix guidance) → error → output → generic fallback.
|
|
228
|
-
* This ensures custom tools can feed actionable error feedback to the model via modelOutput
|
|
229
|
-
* so the agent can self-correct instead of stopping with "unknown failure". */
|
|
230
|
-
function resultTextForLLM(result: ToolResult): string {
|
|
231
|
-
if (result.success) return result.modelOutput ?? result.output;
|
|
232
|
-
return result.modelOutput ?? result.error ?? result.output ?? 'unknown failure';
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
/** Format content string for LLM context. Uses modelOutput (compact summary) when available. */
|
|
221
|
+
/** Format a display-friendly content string for tool results (used in content field). */
|
|
236
222
|
function formatToolResultContent(call: ToolCallAction, result: ToolResult): string {
|
|
237
|
-
const content = result.success
|
|
238
|
-
? resultTextForLLM(result)
|
|
239
|
-
: `ERROR: ${resultTextForLLM(result)}`;
|
|
223
|
+
const content = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
|
|
240
224
|
switch (call.name) {
|
|
241
225
|
case 'Write':
|
|
242
226
|
return `Write(${call.args.path}): ${result.success ? 'ok' : content}`;
|
|
@@ -531,11 +515,6 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
531
515
|
? { nextAction: runtime.nextAction }
|
|
532
516
|
: new VercelAgentLoop());
|
|
533
517
|
|
|
534
|
-
/** Read lastUsage from the loop if it's a VercelAgentLoop. */
|
|
535
|
-
function getLoopUsage(): StepUsage | undefined {
|
|
536
|
-
return loop instanceof VercelAgentLoop ? loop.lastUsage : undefined;
|
|
537
|
-
}
|
|
538
|
-
|
|
539
518
|
async function resolveSkillContext(prompt: string): Promise<string> {
|
|
540
519
|
if (!skillManager || !skillIndexPath) return '';
|
|
541
520
|
|
|
@@ -617,18 +596,14 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
617
596
|
|
|
618
597
|
// Execute valid calls via batch (sequential sandbox ops) or parallel fallback
|
|
619
598
|
if (validCalls.length > 0) {
|
|
620
|
-
for (const c of validCalls) runtime.onToolProgress?.({ type: 'tool_start', name: c.name, args: c.args });
|
|
621
|
-
const batchStart = Date.now();
|
|
622
599
|
const results = await executeBatch(validCalls, runtime.toolProvider, runtime);
|
|
623
|
-
const batchMs = Date.now() - batchStart;
|
|
624
600
|
for (let i = 0; i < validCalls.length; i++) {
|
|
625
601
|
const call = validCalls[i]!;
|
|
626
602
|
const r = results[i]!;
|
|
627
|
-
runtime.onToolProgress?.({ type: 'tool_end', name: call.name, success: r.success, durationMs: batchMs });
|
|
628
603
|
if (!r.success) {
|
|
629
604
|
recordAgentError(runtime.telemetry);
|
|
630
605
|
}
|
|
631
|
-
const resultText = r.success ?
|
|
606
|
+
const resultText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
632
607
|
messages.push({
|
|
633
608
|
role: 'tool',
|
|
634
609
|
content: formatToolResultContent(call, r),
|
|
@@ -684,8 +659,6 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
684
659
|
} else {
|
|
685
660
|
consecutiveInvalid = 0;
|
|
686
661
|
}
|
|
687
|
-
runtime.onToolProgress?.({ type: 'tool_start', name: action.name, args: action.args });
|
|
688
|
-
const singleStart = Date.now();
|
|
689
662
|
const result = validationError
|
|
690
663
|
? ({ success: false, output: '', error: validationError } as ToolResult)
|
|
691
664
|
: await executor.run(async () => {
|
|
@@ -699,11 +672,10 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
699
672
|
};
|
|
700
673
|
}
|
|
701
674
|
});
|
|
702
|
-
runtime.onToolProgress?.({ type: 'tool_end', name: action.name, success: result.success, durationMs: Date.now() - singleStart });
|
|
703
675
|
if (!result.success) {
|
|
704
676
|
recordAgentError(runtime.telemetry);
|
|
705
677
|
}
|
|
706
|
-
const singleResultText = result.success ?
|
|
678
|
+
const singleResultText = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
|
|
707
679
|
messages.push({
|
|
708
680
|
role: 'tool',
|
|
709
681
|
content: formatToolResultContent(action, result),
|
|
@@ -746,7 +718,8 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
746
718
|
if (event.type === 'text_delta') {
|
|
747
719
|
finalText += event.text;
|
|
748
720
|
yield event;
|
|
749
|
-
}
|
|
721
|
+
}
|
|
722
|
+
if (event.type === 'tool_start') {
|
|
750
723
|
pendingTools.push({
|
|
751
724
|
type: 'tool',
|
|
752
725
|
name: event.name,
|
|
@@ -754,18 +727,13 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
754
727
|
...(event.toolCallId != null ? { toolCallId: event.toolCallId } : {}),
|
|
755
728
|
});
|
|
756
729
|
yield event;
|
|
757
|
-
} else {
|
|
758
|
-
// Forward all other events (tool_end, step_start, step_end, done)
|
|
759
|
-
// from self-managing loops like ArcLoop
|
|
760
|
-
yield event;
|
|
761
|
-
if (event.type === 'done') return;
|
|
762
730
|
}
|
|
763
731
|
}
|
|
764
732
|
|
|
765
733
|
// If no tools → final response
|
|
766
734
|
if (pendingTools.length === 0) {
|
|
767
735
|
messages.push({ role: 'assistant', content: finalText });
|
|
768
|
-
|
|
736
|
+
yield { type: 'step_end', step };
|
|
769
737
|
yield { type: 'done', output: finalText, steps: step };
|
|
770
738
|
return;
|
|
771
739
|
}
|
|
@@ -791,7 +759,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
791
759
|
if (action.type === 'final') {
|
|
792
760
|
yield { type: 'text_delta', text: action.content };
|
|
793
761
|
messages.push({ role: 'assistant', content: action.content });
|
|
794
|
-
|
|
762
|
+
yield { type: 'step_end', step };
|
|
795
763
|
yield { type: 'done', output: action.content, steps: step };
|
|
796
764
|
return;
|
|
797
765
|
}
|
|
@@ -803,7 +771,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
803
771
|
try {
|
|
804
772
|
const r = await executeTool(runtime.toolProvider, call, runtime);
|
|
805
773
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
806
|
-
const rText = r.success ?
|
|
774
|
+
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
807
775
|
messages.push({
|
|
808
776
|
role: 'tool',
|
|
809
777
|
content: formatToolResultContent(call, r),
|
|
@@ -825,7 +793,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
825
793
|
try {
|
|
826
794
|
const r = await executeTool(runtime.toolProvider, action, runtime);
|
|
827
795
|
yield { type: 'tool_end', name: action.name, result: toStreamResult(r) };
|
|
828
|
-
const rText = r.success ?
|
|
796
|
+
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
829
797
|
messages.push({
|
|
830
798
|
role: 'tool',
|
|
831
799
|
content: formatToolResultContent(action, r),
|
|
@@ -841,7 +809,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
841
809
|
});
|
|
842
810
|
}
|
|
843
811
|
}
|
|
844
|
-
|
|
812
|
+
yield { type: 'step_end', step };
|
|
845
813
|
continue;
|
|
846
814
|
}
|
|
847
815
|
|
|
@@ -851,7 +819,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
851
819
|
const call = pendingTools[i]!;
|
|
852
820
|
const r = results[i]!;
|
|
853
821
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
854
|
-
const rText = r.success ?
|
|
822
|
+
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
855
823
|
messages.push({
|
|
856
824
|
role: 'tool',
|
|
857
825
|
content: formatToolResultContent(call, r),
|
|
@@ -869,7 +837,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
869
837
|
|
|
870
838
|
if (action.type === 'final') {
|
|
871
839
|
messages.push({ role: 'assistant', content: action.content });
|
|
872
|
-
|
|
840
|
+
yield { type: 'step_end', step };
|
|
873
841
|
yield { type: 'done', output: action.content, steps: step };
|
|
874
842
|
return;
|
|
875
843
|
}
|
|
@@ -893,7 +861,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
893
861
|
const call = calls[i]!;
|
|
894
862
|
const r = results[i]!;
|
|
895
863
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
896
|
-
const rText = r.success ?
|
|
864
|
+
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
897
865
|
messages.push({
|
|
898
866
|
role: 'tool',
|
|
899
867
|
content: formatToolResultContent(call, r),
|
|
@@ -907,7 +875,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
907
875
|
}
|
|
908
876
|
}
|
|
909
877
|
|
|
910
|
-
|
|
878
|
+
yield { type: 'step_end', step };
|
|
911
879
|
}
|
|
912
880
|
|
|
913
881
|
yield { type: 'done', output: 'ERROR: max steps exceeded', steps: maxSteps };
|