@bluecopa/harness 0.1.0-snapshot.49 → 0.1.0-snapshot.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +117 -212
- package/package.json +1 -2
- package/src/agent/create-agent.ts +27 -30
- package/src/agent/types.ts +24 -20
- package/src/interfaces/hooks.ts +1 -2
- package/src/interfaces/tool-provider.ts +2 -0
- package/src/loop/context-store.ts +9 -12
- package/src/loop/vercel-agent-loop.ts +118 -44
- package/src/skills/skill-router.ts +6 -12
- package/tests/integration/agent-skill-default-from-sandbox.spec.ts +2 -3
- package/tests/unit/structured-messages.spec.ts +1 -1
- package/vitest.config.ts +1 -1
- package/src/arc/agent-runner.ts +0 -947
- package/src/arc/arc-loop.ts +0 -845
- package/src/arc/arc-types.ts +0 -115
- package/src/arc/bridge-tools.ts +0 -170
- package/src/arc/bridged-tool-provider.ts +0 -80
- package/src/arc/consolidation.ts +0 -118
- package/src/arc/context-window.ts +0 -267
- package/src/arc/create-arc-agent.ts +0 -99
- package/src/arc/debug.ts +0 -62
- package/src/arc/episode-compressor.ts +0 -225
- package/src/arc/memory-manager.ts +0 -245
- package/src/arc/message-convert.ts +0 -123
- package/src/arc/multi-model.ts +0 -70
- package/src/arc/object-store/fs-object-store.ts +0 -60
- package/src/arc/object-store/memory-object-store.ts +0 -41
- package/src/arc/object-store/object-store.ts +0 -12
- package/src/arc/profile-builder.ts +0 -172
- package/src/arc/resilience/bulkhead.ts +0 -110
- package/src/arc/resilience/circuit-breaker.ts +0 -112
- package/src/arc/resilience/fallback.ts +0 -27
- package/src/arc/resilience/index.ts +0 -21
- package/src/arc/resilience/pipeline.ts +0 -103
- package/src/arc/resilience/retry.ts +0 -90
- package/src/arc/resilience/timeout.ts +0 -60
- package/src/arc/resilience/types.ts +0 -71
- package/src/arc/result-pager.ts +0 -77
- package/src/arc/sig.ts +0 -115
- package/src/arc/skill-resolver.ts +0 -81
- package/src/arc/stores/episode-store.ts +0 -120
- package/src/arc/stores/long-term-store.ts +0 -86
- package/src/arc/stores/rxdb-setup.ts +0 -113
- package/src/arc/stores/session-memo-store.ts +0 -58
- package/src/arc/tools.ts +0 -67
- package/src/arc/types.ts +0 -363
- package/src/arc/utils.ts +0 -37
- package/src/hooks/middleware.ts +0 -95
- package/testing/index.ts +0 -22
- package/testing/scenario-replay.ts +0 -209
- package/testing/scenario-types.ts +0 -38
- package/testing/scripted-llm.ts +0 -230
- package/tests/arc/channel.test.ts +0 -170
- package/tests/arc/context-window.test.ts +0 -396
- package/tests/arc/e2e.test.ts +0 -353
- package/tests/arc/error-paths.test.ts +0 -402
- package/tests/arc/live-integration.test.ts +0 -357
- package/tests/arc/memory-manager.test.ts +0 -384
- package/tests/arc/middleware.test.ts +0 -113
- package/tests/arc/process-interleaving.test.ts +0 -432
- package/tests/arc/process-profiles.test.ts +0 -366
- package/tests/arc/resilience-integration.test.ts +0 -381
- package/tests/arc/resilience.test.ts +0 -575
- package/tests/arc/result-paging.test.ts +0 -392
- package/tests/arc/scenario-driven.test.ts +0 -297
- package/tests/arc/tool-dispatch.test.ts +0 -340
- package/tests/arc/wasm-pbt.test.ts +0 -104
- package/verify/Cargo.lock +0 -637
- package/verify/Cargo.toml +0 -24
- package/verify/src/lib.rs +0 -5
- package/verify/src/main.rs +0 -165
- package/verify/src/model/context.rs +0 -100
- package/verify/src/model/mod.rs +0 -6
- package/verify/src/model/orchestrator.rs +0 -371
- package/verify/src/model/process.rs +0 -140
- package/verify/src/model/types.rs +0 -273
- package/verify/src/properties/liveness.rs +0 -32
- package/verify/src/properties/mod.rs +0 -4
- package/verify/src/properties/safety.rs +0 -78
- package/verify/src/trace/event.rs +0 -155
- package/verify/src/trace/mod.rs +0 -2
- package/verify/src/trace/validator.rs +0 -367
- package/verify/src/wasm/mod.rs +0 -3
- package/verify/src/wasm/scenario_generator.rs +0 -400
- package/verify/src/wasm/types.rs +0 -104
- package/verify/src/wasm/wasm_validator.rs +0 -107
- package/verify/tests/model_check.rs +0 -49
- package/verify/tests/trace_validation.rs +0 -147
package/README.md
CHANGED
|
@@ -2,17 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
The harness provides the core loop that drives an AI agent: send messages to an LLM, execute the tool calls it returns, feed results back, and repeat until the LLM produces a final text response.
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
## Install
|
|
10
|
-
|
|
11
|
-
```bash
|
|
12
|
-
pnpm add @bluecopa/harness
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
## Development
|
|
7
|
+
## Quickstart
|
|
16
8
|
|
|
17
9
|
```bash
|
|
18
10
|
pnpm install
|
|
@@ -21,11 +13,9 @@ pnpm test
|
|
|
21
13
|
|
|
22
14
|
## Architecture
|
|
23
15
|
|
|
24
|
-
### Single-Agent Loop
|
|
25
|
-
|
|
26
16
|
```
|
|
27
17
|
┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
|
|
28
|
-
│ createAgent
|
|
18
|
+
│ createAgent │────▶│ AgentLoop │────▶│ LLM (Claude) │
|
|
29
19
|
│ (turn loop) │ │ (nextAction)│ │ │
|
|
30
20
|
└──────┬───────┘ └──────────────┘ └──────────────────┘
|
|
31
21
|
│ │
|
|
@@ -37,82 +27,20 @@ pnpm test
|
|
|
37
27
|
└──────────────┘
|
|
38
28
|
```
|
|
39
29
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
│ tools: Thread, Check, Cancel, Remember, ReadEpisode
|
|
45
|
-
│
|
|
46
|
-
│ Turn 1 (parallel):
|
|
47
|
-
├──► Process 0 ("read auth", model=fast) ─┐
|
|
48
|
-
├──► Process 1 ("read routes", model=fast) ─┼──► Episodes
|
|
49
|
-
├──► Process 2 ("read tests", model=fast) ─┘
|
|
50
|
-
│
|
|
51
|
-
│ Turn 2 (dispatch dependent work):
|
|
52
|
-
├──► Thread("fix bug", context=[ep0,ep1,ep2]) ──► Episode
|
|
53
|
-
│
|
|
54
|
-
│ Turn 3 (parallel):
|
|
55
|
-
├──► Thread("run tests", context=[ep3]) ─┐
|
|
56
|
-
├──► Thread("update docs", context=[ep3]) ─┘
|
|
57
|
-
│
|
|
58
|
-
└──► Final text response
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
Full architecture doc: [`docs/arc.md`](../docs/arc.md)
|
|
62
|
-
|
|
63
|
-
---
|
|
64
|
-
|
|
65
|
-
## ToolProvider
|
|
66
|
-
|
|
67
|
-
The contract for tool execution. All agent modes use this interface.
|
|
68
|
-
|
|
69
|
-
```typescript
|
|
70
|
-
interface ToolProvider {
|
|
71
|
-
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
72
|
-
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
73
|
-
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
74
|
-
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
75
|
-
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
76
|
-
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
77
|
-
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
78
|
-
webSearch?(query: string): Promise<ToolResult>;
|
|
79
|
-
capabilities(): ToolProviderCapabilities;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
interface ToolResult {
|
|
83
|
-
success: boolean;
|
|
84
|
-
output: string;
|
|
85
|
-
error?: string;
|
|
86
|
-
}
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
Built-in implementations:
|
|
30
|
+
1. `createAgent` drives a deterministic step loop
|
|
31
|
+
2. Each step calls `loop.nextAction(messages)` to get the LLM's decision
|
|
32
|
+
3. If it's a tool call, the harness executes it via `ToolProvider` and appends the result
|
|
33
|
+
4. If it's a final action, the loop ends and returns the result
|
|
90
34
|
|
|
91
|
-
|
|
92
|
-
|----------|-------------|
|
|
93
|
-
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
94
|
-
| `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
|
|
95
|
-
| `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
|
|
35
|
+
## Using with the sandbox
|
|
96
36
|
|
|
97
|
-
|
|
37
|
+
The most common setup connects the harness to a running sandbox service via `ControlPlaneE2BExecutor`:
|
|
98
38
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
```typescript
|
|
102
|
-
interface SandboxProvider {
|
|
103
|
-
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
104
|
-
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
105
|
-
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
106
|
-
}
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
Used by `SkillManager` for executing skill scripts in isolated VMs.
|
|
110
|
-
|
|
111
|
-
## Connecting to a Sandbox
|
|
112
|
-
|
|
113
|
-
```typescript
|
|
114
|
-
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
39
|
+
```ts
|
|
40
|
+
import { createAgent } from './src/agent/create-agent';
|
|
115
41
|
import { E2BToolProvider } from './src/providers/e2b-tool-provider';
|
|
42
|
+
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
43
|
+
import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
|
|
116
44
|
|
|
117
45
|
// Connect to sandbox service
|
|
118
46
|
const executor = new ControlPlaneE2BExecutor({
|
|
@@ -122,174 +50,155 @@ const executor = new ControlPlaneE2BExecutor({
|
|
|
122
50
|
});
|
|
123
51
|
await executor.initialize(); // creates a Firecracker VM
|
|
124
52
|
|
|
125
|
-
|
|
53
|
+
// Build and run the agent
|
|
54
|
+
const agent = createAgent({
|
|
55
|
+
toolProvider: new E2BToolProvider(executor),
|
|
56
|
+
loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
|
|
57
|
+
});
|
|
126
58
|
|
|
127
|
-
|
|
59
|
+
const result = await agent.run('create a bar chart of sales data');
|
|
60
|
+
console.log(result.output); // LLM's final response
|
|
61
|
+
console.log(result.steps); // number of tool steps
|
|
128
62
|
|
|
129
|
-
await executor.destroy();
|
|
63
|
+
await executor.destroy(); // tears down the VM
|
|
130
64
|
```
|
|
131
65
|
|
|
132
|
-
|
|
66
|
+
For a complete working example, see [`examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts).
|
|
67
|
+
|
|
68
|
+
### From environment variables
|
|
133
69
|
|
|
134
|
-
|
|
70
|
+
`ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY` automatically:
|
|
135
71
|
|
|
136
|
-
|
|
72
|
+
```ts
|
|
73
|
+
const executor = ControlPlaneE2BExecutor.fromEnv();
|
|
74
|
+
```
|
|
137
75
|
|
|
138
|
-
|
|
76
|
+
## Using locally (no sandbox)
|
|
139
77
|
|
|
140
|
-
|
|
78
|
+
For development without a sandbox service, use `LocalToolProvider` which runs tools on the local machine:
|
|
79
|
+
|
|
80
|
+
```ts
|
|
141
81
|
import { createAgent } from './src/agent/create-agent';
|
|
142
82
|
import { LocalToolProvider } from './src/providers/local-tool-provider';
|
|
143
83
|
|
|
144
84
|
const agent = createAgent({
|
|
145
85
|
toolProvider: new LocalToolProvider(process.cwd()),
|
|
146
|
-
loop: new VercelAgentLoop(),
|
|
86
|
+
loop: new VercelAgentLoop(),
|
|
147
87
|
});
|
|
148
88
|
|
|
149
89
|
const result = await agent.run('list all TypeScript files');
|
|
150
|
-
console.log(result.output);
|
|
151
90
|
```
|
|
152
91
|
|
|
153
|
-
|
|
92
|
+
## Key modules
|
|
93
|
+
|
|
94
|
+
### Agent creation (`src/agent/create-agent.ts`)
|
|
154
95
|
|
|
155
|
-
|
|
156
|
-
|--------|------|---------|-------------|
|
|
157
|
-
| `toolProvider` | `ToolProvider` | required | Executes tool calls |
|
|
158
|
-
| `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
|
|
159
|
-
| `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
|
|
160
|
-
| `maxSteps` | `number` | 30 | Max tool steps per run |
|
|
161
|
-
| `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
|
|
162
|
-
| `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
|
|
96
|
+
`createAgent(options)` returns an agent with a `.run(prompt, options?)` method. Options:
|
|
163
97
|
|
|
164
|
-
|
|
98
|
+
| Option | Type | Description |
|
|
99
|
+
|--------|------|-------------|
|
|
100
|
+
| `toolProvider` | `ToolProvider` | Required. Executes tool calls |
|
|
101
|
+
| `loop` | `AgentLoop` | LLM decision loop (default: `VercelAgentLoop`) |
|
|
102
|
+
| `sandboxProvider` | `SandboxProvider` | Optional. Higher-level sandbox ops (file download, exec with env) |
|
|
103
|
+
| `maxSteps` | `number` | Max tool steps per run (default: 30) |
|
|
104
|
+
| `telemetry` | `HarnessTelemetry` | Optional. OpenTelemetry-style tracing |
|
|
105
|
+
| `skillIndexPath` | `string` | Optional. Path to skill index JSON |
|
|
165
106
|
|
|
166
|
-
|
|
107
|
+
### Agent loop (`src/loop/vercel-agent-loop.ts`)
|
|
167
108
|
|
|
168
|
-
|
|
109
|
+
`VercelAgentLoop` calls Claude via the Vercel AI SDK. It supports:
|
|
110
|
+
- Parallel tool calls (returns `ToolBatchAction` when the LLM requests multiple tools at once)
|
|
111
|
+
- Configurable system prompt
|
|
112
|
+
- Model selection via `HARNESS_MODEL` env var (default: `claude-sonnet-4-5`)
|
|
113
|
+
|
|
114
|
+
```ts
|
|
169
115
|
const loop = new VercelAgentLoop({
|
|
170
116
|
systemPrompt: 'You are a helpful coding assistant.',
|
|
171
|
-
model: 'claude-sonnet-4-5', // or HARNESS_MODEL env var
|
|
172
117
|
});
|
|
173
118
|
```
|
|
174
119
|
|
|
175
|
-
###
|
|
120
|
+
### Tool provider (`src/interfaces/tool-provider.ts`)
|
|
176
121
|
|
|
177
|
-
|
|
122
|
+
The contract for tool execution:
|
|
178
123
|
|
|
179
|
-
```
|
|
180
|
-
|
|
181
|
-
|
|
124
|
+
```ts
|
|
125
|
+
interface ToolProvider {
|
|
126
|
+
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
127
|
+
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
128
|
+
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
129
|
+
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
130
|
+
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
131
|
+
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
132
|
+
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
133
|
+
webSearch?(query: string): Promise<ToolResult>;
|
|
134
|
+
capabilities(): ToolProviderCapabilities;
|
|
135
|
+
}
|
|
182
136
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
onActivity: (entry) => console.log(entry),
|
|
189
|
-
onLlmRequest: async (prompt) => callLLM(prompt),
|
|
190
|
-
onWebFetchRequest: async (url) => fetch(url),
|
|
191
|
-
});
|
|
137
|
+
interface ToolResult {
|
|
138
|
+
success: boolean;
|
|
139
|
+
output: string;
|
|
140
|
+
error?: string;
|
|
141
|
+
}
|
|
192
142
|
```
|
|
193
143
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
**REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
|
|
197
|
-
|
|
198
|
-
---
|
|
144
|
+
Built-in implementations:
|
|
199
145
|
|
|
200
|
-
|
|
146
|
+
| Provider | Description |
|
|
147
|
+
|----------|-------------|
|
|
148
|
+
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
149
|
+
| `E2BToolProvider` | Routes tools to an E2B-compatible executor over HTTP |
|
|
150
|
+
| `CompositeToolProvider` | Combines multiple providers (e.g. sandbox + web) |
|
|
201
151
|
|
|
202
|
-
|
|
152
|
+
### Action types (`src/agent/types.ts`)
|
|
203
153
|
|
|
204
|
-
|
|
205
|
-
import { createArcAgent } from './src/arc/create-arc-agent';
|
|
154
|
+
The LLM returns one of these action types each turn:
|
|
206
155
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
});
|
|
156
|
+
```ts
|
|
157
|
+
// Single tool call
|
|
158
|
+
interface ToolCallAction {
|
|
159
|
+
type: 'tool';
|
|
160
|
+
name: 'Bash' | 'Read' | 'Write' | 'Edit' | 'Glob' | 'Grep' | ...;
|
|
161
|
+
args: Record<string, unknown>;
|
|
162
|
+
}
|
|
215
163
|
|
|
216
|
-
//
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
|
|
164
|
+
// Multiple independent tool calls (executed in parallel)
|
|
165
|
+
interface ToolBatchAction {
|
|
166
|
+
type: 'tool_batch';
|
|
167
|
+
calls: ToolCallAction[];
|
|
221
168
|
}
|
|
222
169
|
|
|
223
|
-
//
|
|
224
|
-
|
|
170
|
+
// Final text response (ends the loop)
|
|
171
|
+
interface FinalAction {
|
|
172
|
+
type: 'final';
|
|
173
|
+
content: string;
|
|
174
|
+
}
|
|
225
175
|
```
|
|
226
176
|
|
|
227
|
-
###
|
|
228
|
-
|
|
229
|
-
| Option | Type | Default | Description |
|
|
230
|
-
|--------|------|---------|-------------|
|
|
231
|
-
| `model` | `string` | `'claude-opus-4-6'` | Orchestrator model (ID or tier name) |
|
|
232
|
-
| `modelMap` | `Record<ModelTier, string>` | haiku/sonnet/opus | Maps fast/medium/strong to model IDs |
|
|
233
|
-
| `apiKey` | `string` | — | Anthropic API key |
|
|
234
|
-
| `systemPrompt` | `string` | built-in | Custom orchestrator system prompt |
|
|
235
|
-
| `maxTurns` | `number` | 30 | Max orchestrator turns |
|
|
236
|
-
| `processTimeout` | `number` | 120_000 | Per-process timeout (ms) |
|
|
237
|
-
| `processMaxSteps` | `number` | 20 | Per-process max tool steps |
|
|
238
|
-
| `contextWindowSize` | `number` | 200_000 | Context window in tokens |
|
|
239
|
-
| `outputReserve` | `number` | 20_000 | Tokens reserved for output |
|
|
240
|
-
| `autoMemory` | `boolean` | true | Auto-detect patterns from episodes |
|
|
241
|
-
| `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
|
|
242
|
-
| `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
|
|
243
|
-
| `longTermStore` | `LongTermStore` | required | Stores long-term memories |
|
|
244
|
-
| `taskId` | `string` | required | Task identifier |
|
|
245
|
-
| `sessionId` | `string` | required | Session identifier |
|
|
246
|
-
| `toolProvider` | `ToolProvider` | required | Tool execution |
|
|
247
|
-
| `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
|
|
248
|
-
| `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
|
|
249
|
-
| `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
|
|
250
|
-
| `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
|
|
251
|
-
| `traceWriter` | `function` | — | Callback for trace event emission |
|
|
252
|
-
|
|
253
|
-
### Resilience
|
|
254
|
-
|
|
255
|
-
```typescript
|
|
256
|
-
import { resilience } from './src/arc/resilience';
|
|
257
|
-
|
|
258
|
-
const pipeline = resilience()
|
|
259
|
-
.retry({ maxRetries: 2, baseDelay: 1000 })
|
|
260
|
-
.timeout({ durationMs: 30_000 })
|
|
261
|
-
.circuitBreaker({ failureThreshold: 5 })
|
|
262
|
-
.build();
|
|
263
|
-
|
|
264
|
-
const agent = await createArcAgent({
|
|
265
|
-
// ...config
|
|
266
|
-
resilience: pipeline,
|
|
267
|
-
});
|
|
268
|
-
```
|
|
177
|
+
### LCM tool loop (`src/loop/lcm-tool-loop.ts`)
|
|
269
178
|
|
|
270
|
-
|
|
179
|
+
`LCMToolLoop` wraps another loop to add LCM-based tool routing, REPL script execution, and bridge-based tool dispatch. Used in the chat-assistant example.
|
|
271
180
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
181
|
+
### Sandbox provider (`src/interfaces/sandbox-provider.ts`)
|
|
182
|
+
|
|
183
|
+
Higher-level sandbox operations beyond basic tool calls:
|
|
184
|
+
|
|
185
|
+
```ts
|
|
186
|
+
interface SandboxProvider {
|
|
187
|
+
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
188
|
+
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
189
|
+
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
190
|
+
}
|
|
278
191
|
```
|
|
279
192
|
|
|
280
|
-
|
|
193
|
+
### Observability (`src/observability/otel.ts`)
|
|
281
194
|
|
|
282
|
-
|
|
195
|
+
`HarnessTelemetry` provides OpenTelemetry-style spans and metrics for agent runs.
|
|
283
196
|
|
|
284
|
-
## Package
|
|
197
|
+
## Package layout
|
|
285
198
|
|
|
286
199
|
```
|
|
287
200
|
src/
|
|
288
201
|
├── agent/ # createAgent, step executor, types
|
|
289
|
-
├── arc/ # ArcLoop orchestrator, processes, memory, resilience
|
|
290
|
-
│ ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
|
|
291
|
-
│ ├── stores/ # RxDB + in-memory store implementations
|
|
292
|
-
│ └── object-store/ # Pluggable cloud sync (fs, memory)
|
|
293
202
|
├── interfaces/ # ToolProvider, SandboxProvider, AgentLoop contracts
|
|
294
203
|
├── loop/ # VercelAgentLoop, LCMToolLoop
|
|
295
204
|
├── providers/ # LocalToolProvider, E2BToolProvider, ControlPlaneE2BExecutor
|
|
@@ -297,20 +206,16 @@ src/
|
|
|
297
206
|
├── hooks/ # Pre/post tool call hooks
|
|
298
207
|
├── permissions/ # Tool permission checks
|
|
299
208
|
├── sessions/ # Session persistence
|
|
300
|
-
├── subagents/ # Subagent spawning
|
|
209
|
+
├── subagents/ # Subagent spawning and task tools
|
|
301
210
|
├── skills/ # Skill index, routing, and management
|
|
302
211
|
├── optimization/ # Benchmark runner
|
|
303
212
|
└── observability/ # OpenTelemetry integration
|
|
304
|
-
|
|
305
|
-
verify/ # Rust formal verification (Stateright model checker)
|
|
306
|
-
testing/ # Adversarial scenario replay harness
|
|
307
|
-
tests/ # Vitest test suite
|
|
308
213
|
```
|
|
309
214
|
|
|
310
215
|
## Documentation
|
|
311
216
|
|
|
312
|
-
-
|
|
313
|
-
-
|
|
314
|
-
-
|
|
315
|
-
-
|
|
316
|
-
- [
|
|
217
|
+
- Provider guide: `docs/guides/providers.md`
|
|
218
|
+
- Skills guide: `docs/guides/skills.md`
|
|
219
|
+
- Observability guide: `docs/guides/observability.md`
|
|
220
|
+
- Release process: `../docs/RELEASE.md`
|
|
221
|
+
- Full example: [`../examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bluecopa/harness",
|
|
3
|
-
"version": "0.1.0-snapshot.
|
|
3
|
+
"version": "0.1.0-snapshot.5",
|
|
4
4
|
"description": "Provider-agnostic TypeScript agent framework",
|
|
5
5
|
"license": "UNLICENSED",
|
|
6
6
|
"scripts": {
|
|
@@ -10,7 +10,6 @@
|
|
|
10
10
|
"dependencies": {
|
|
11
11
|
"@ai-sdk/anthropic": "^3.0.48",
|
|
12
12
|
"ai": "^6.0.101",
|
|
13
|
-
"rxdb": "^15.39.0",
|
|
14
13
|
"zod": "^4.1.11"
|
|
15
14
|
},
|
|
16
15
|
"devDependencies": {
|
|
@@ -7,12 +7,14 @@ import type { HarnessTelemetry } from '../observability/otel';
|
|
|
7
7
|
import { HookRunner } from '../hooks/hook-runner';
|
|
8
8
|
import { PermissionManager } from '../permissions/permission-manager';
|
|
9
9
|
import { VercelAgentLoop } from '../loop/vercel-agent-loop';
|
|
10
|
+
export type { SystemPromptBlock, VercelAgentLoopConfig } from '../loop/vercel-agent-loop';
|
|
11
|
+
export type { PrepareStepContext, PrepareStepResult } from './types';
|
|
10
12
|
import { SkillManager } from '../skills/skill-manager';
|
|
11
13
|
import { SkillRouter } from '../skills/skill-router';
|
|
12
14
|
import type { SkillSummary } from '../skills/skill-types';
|
|
13
15
|
import { SingleFlightStepExecutor } from './step-executor';
|
|
14
|
-
import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
|
|
15
|
-
export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
|
|
16
|
+
import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, StepUsage, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
|
|
17
|
+
export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, StepUsage, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
|
|
16
18
|
export { HookRunner } from '../hooks/hook-runner';
|
|
17
19
|
export { PermissionManager } from '../permissions/permission-manager';
|
|
18
20
|
export type { PermissionMode, PermissionResolver, PermissionRequest } from '../permissions/permission-manager';
|
|
@@ -37,8 +39,6 @@ export interface AgentRuntime {
|
|
|
37
39
|
/** Custom tool executor. Called for every tool action. Return null to fall through to built-in dispatch.
|
|
38
40
|
* When hookRunner/permissionManager are provided on the runtime, they are automatically applied before/after this callback — no manual wiring needed. */
|
|
39
41
|
executeToolAction?: (action: ToolCallAction) => Promise<ToolResult | null>;
|
|
40
|
-
/** Progress callback fired before/after each tool call during run(). */
|
|
41
|
-
onToolProgress?: (event: { type: 'tool_start'; name: string; args: Record<string, unknown> } | { type: 'tool_end'; name: string; success: boolean; durationMs: number }) => void;
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
/**
|
|
@@ -220,9 +220,12 @@ function toStreamResult(r: ToolResult): { success: boolean; output: string; erro
|
|
|
220
220
|
return base;
|
|
221
221
|
}
|
|
222
222
|
|
|
223
|
-
/** Format
|
|
223
|
+
/** Format content string for LLM context. Uses modelOutput (compact summary) when available. */
|
|
224
224
|
function formatToolResultContent(call: ToolCallAction, result: ToolResult): string {
|
|
225
|
-
|
|
225
|
+
// Use modelOutput for LLM context when available — keeps context compact
|
|
226
|
+
const content = result.success
|
|
227
|
+
? (result.modelOutput ?? result.output)
|
|
228
|
+
: `ERROR: ${result.error ?? 'unknown failure'}`;
|
|
226
229
|
switch (call.name) {
|
|
227
230
|
case 'Write':
|
|
228
231
|
return `Write(${call.args.path}): ${result.success ? 'ok' : content}`;
|
|
@@ -517,6 +520,11 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
517
520
|
? { nextAction: runtime.nextAction }
|
|
518
521
|
: new VercelAgentLoop());
|
|
519
522
|
|
|
523
|
+
/** Read lastUsage from the loop if it's a VercelAgentLoop. */
|
|
524
|
+
function getLoopUsage(): StepUsage | undefined {
|
|
525
|
+
return loop instanceof VercelAgentLoop ? loop.lastUsage : undefined;
|
|
526
|
+
}
|
|
527
|
+
|
|
520
528
|
async function resolveSkillContext(prompt: string): Promise<string> {
|
|
521
529
|
if (!skillManager || !skillIndexPath) return '';
|
|
522
530
|
|
|
@@ -598,18 +606,14 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
598
606
|
|
|
599
607
|
// Execute valid calls via batch (sequential sandbox ops) or parallel fallback
|
|
600
608
|
if (validCalls.length > 0) {
|
|
601
|
-
for (const c of validCalls) runtime.onToolProgress?.({ type: 'tool_start', name: c.name, args: c.args });
|
|
602
|
-
const batchStart = Date.now();
|
|
603
609
|
const results = await executeBatch(validCalls, runtime.toolProvider, runtime);
|
|
604
|
-
const batchMs = Date.now() - batchStart;
|
|
605
610
|
for (let i = 0; i < validCalls.length; i++) {
|
|
606
611
|
const call = validCalls[i]!;
|
|
607
612
|
const r = results[i]!;
|
|
608
|
-
runtime.onToolProgress?.({ type: 'tool_end', name: call.name, success: r.success, durationMs: batchMs });
|
|
609
613
|
if (!r.success) {
|
|
610
614
|
recordAgentError(runtime.telemetry);
|
|
611
615
|
}
|
|
612
|
-
const resultText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
616
|
+
const resultText = r.success ? (r.modelOutput ?? r.output) : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
613
617
|
messages.push({
|
|
614
618
|
role: 'tool',
|
|
615
619
|
content: formatToolResultContent(call, r),
|
|
@@ -665,8 +669,6 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
665
669
|
} else {
|
|
666
670
|
consecutiveInvalid = 0;
|
|
667
671
|
}
|
|
668
|
-
runtime.onToolProgress?.({ type: 'tool_start', name: action.name, args: action.args });
|
|
669
|
-
const singleStart = Date.now();
|
|
670
672
|
const result = validationError
|
|
671
673
|
? ({ success: false, output: '', error: validationError } as ToolResult)
|
|
672
674
|
: await executor.run(async () => {
|
|
@@ -680,11 +682,10 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
680
682
|
};
|
|
681
683
|
}
|
|
682
684
|
});
|
|
683
|
-
runtime.onToolProgress?.({ type: 'tool_end', name: action.name, success: result.success, durationMs: Date.now() - singleStart });
|
|
684
685
|
if (!result.success) {
|
|
685
686
|
recordAgentError(runtime.telemetry);
|
|
686
687
|
}
|
|
687
|
-
const singleResultText = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
|
|
688
|
+
const singleResultText = result.success ? (result.modelOutput ?? result.output) : `ERROR: ${result.error ?? 'unknown failure'}`;
|
|
688
689
|
messages.push({
|
|
689
690
|
role: 'tool',
|
|
690
691
|
content: formatToolResultContent(action, result),
|
|
@@ -727,7 +728,8 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
727
728
|
if (event.type === 'text_delta') {
|
|
728
729
|
finalText += event.text;
|
|
729
730
|
yield event;
|
|
730
|
-
}
|
|
731
|
+
}
|
|
732
|
+
if (event.type === 'tool_start') {
|
|
731
733
|
pendingTools.push({
|
|
732
734
|
type: 'tool',
|
|
733
735
|
name: event.name,
|
|
@@ -735,18 +737,13 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
735
737
|
...(event.toolCallId != null ? { toolCallId: event.toolCallId } : {}),
|
|
736
738
|
});
|
|
737
739
|
yield event;
|
|
738
|
-
} else {
|
|
739
|
-
// Forward all other events (tool_end, step_start, step_end, done)
|
|
740
|
-
// from self-managing loops like ArcLoop
|
|
741
|
-
yield event;
|
|
742
|
-
if (event.type === 'done') return;
|
|
743
740
|
}
|
|
744
741
|
}
|
|
745
742
|
|
|
746
743
|
// If no tools → final response
|
|
747
744
|
if (pendingTools.length === 0) {
|
|
748
745
|
messages.push({ role: 'assistant', content: finalText });
|
|
749
|
-
yield { type: 'step_end', step };
|
|
746
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
750
747
|
yield { type: 'done', output: finalText, steps: step };
|
|
751
748
|
return;
|
|
752
749
|
}
|
|
@@ -772,7 +769,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
772
769
|
if (action.type === 'final') {
|
|
773
770
|
yield { type: 'text_delta', text: action.content };
|
|
774
771
|
messages.push({ role: 'assistant', content: action.content });
|
|
775
|
-
yield { type: 'step_end', step };
|
|
772
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
776
773
|
yield { type: 'done', output: action.content, steps: step };
|
|
777
774
|
return;
|
|
778
775
|
}
|
|
@@ -784,7 +781,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
784
781
|
try {
|
|
785
782
|
const r = await executeTool(runtime.toolProvider, call, runtime);
|
|
786
783
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
787
|
-
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
784
|
+
const rText = r.success ? (r.modelOutput ?? r.output) : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
788
785
|
messages.push({
|
|
789
786
|
role: 'tool',
|
|
790
787
|
content: formatToolResultContent(call, r),
|
|
@@ -806,7 +803,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
806
803
|
try {
|
|
807
804
|
const r = await executeTool(runtime.toolProvider, action, runtime);
|
|
808
805
|
yield { type: 'tool_end', name: action.name, result: toStreamResult(r) };
|
|
809
|
-
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
806
|
+
const rText = r.success ? (r.modelOutput ?? r.output) : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
810
807
|
messages.push({
|
|
811
808
|
role: 'tool',
|
|
812
809
|
content: formatToolResultContent(action, r),
|
|
@@ -822,7 +819,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
822
819
|
});
|
|
823
820
|
}
|
|
824
821
|
}
|
|
825
|
-
yield { type: 'step_end', step };
|
|
822
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
826
823
|
continue;
|
|
827
824
|
}
|
|
828
825
|
|
|
@@ -832,7 +829,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
832
829
|
const call = pendingTools[i]!;
|
|
833
830
|
const r = results[i]!;
|
|
834
831
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
835
|
-
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
832
|
+
const rText = r.success ? (r.modelOutput ?? r.output) : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
836
833
|
messages.push({
|
|
837
834
|
role: 'tool',
|
|
838
835
|
content: formatToolResultContent(call, r),
|
|
@@ -850,7 +847,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
850
847
|
|
|
851
848
|
if (action.type === 'final') {
|
|
852
849
|
messages.push({ role: 'assistant', content: action.content });
|
|
853
|
-
yield { type: 'step_end', step };
|
|
850
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
854
851
|
yield { type: 'done', output: action.content, steps: step };
|
|
855
852
|
return;
|
|
856
853
|
}
|
|
@@ -874,7 +871,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
874
871
|
const call = calls[i]!;
|
|
875
872
|
const r = results[i]!;
|
|
876
873
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
877
|
-
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
874
|
+
const rText = r.success ? (r.modelOutput ?? r.output) : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
878
875
|
messages.push({
|
|
879
876
|
role: 'tool',
|
|
880
877
|
content: formatToolResultContent(call, r),
|
|
@@ -888,7 +885,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
888
885
|
}
|
|
889
886
|
}
|
|
890
887
|
|
|
891
|
-
yield { type: 'step_end', step };
|
|
888
|
+
{ const u = getLoopUsage(); yield u ? { type: 'step_end' as const, step, usage: u } : { type: 'step_end' as const, step }; }
|
|
892
889
|
}
|
|
893
890
|
|
|
894
891
|
yield { type: 'done', output: 'ERROR: max steps exceeded', steps: maxSteps };
|