@bluecopa/harness 0.1.0-snapshot.38 → 0.1.0-snapshot.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +117 -212
- package/package.json +1 -2
- package/src/agent/create-agent.ts +2 -15
- package/src/agent/types.ts +2 -15
- package/src/loop/context-store.ts +9 -12
- package/src/loop/vercel-agent-loop.ts +17 -28
- package/src/skills/skill-router.ts +6 -12
- package/tests/integration/agent-skill-default-from-sandbox.spec.ts +2 -3
- package/tests/unit/structured-messages.spec.ts +1 -1
- package/vitest.config.ts +1 -1
- package/src/arc/agent-runner.ts +0 -683
- package/src/arc/arc-loop.ts +0 -775
- package/src/arc/arc-types.ts +0 -115
- package/src/arc/bridge-tools.ts +0 -170
- package/src/arc/bridged-tool-provider.ts +0 -80
- package/src/arc/consolidation.ts +0 -118
- package/src/arc/context-window.ts +0 -267
- package/src/arc/create-arc-agent.ts +0 -99
- package/src/arc/debug.ts +0 -62
- package/src/arc/episode-compressor.ts +0 -225
- package/src/arc/memory-manager.ts +0 -245
- package/src/arc/message-convert.ts +0 -111
- package/src/arc/object-store/fs-object-store.ts +0 -60
- package/src/arc/object-store/memory-object-store.ts +0 -41
- package/src/arc/object-store/object-store.ts +0 -12
- package/src/arc/profile-builder.ts +0 -157
- package/src/arc/resilience/bulkhead.ts +0 -110
- package/src/arc/resilience/circuit-breaker.ts +0 -112
- package/src/arc/resilience/fallback.ts +0 -27
- package/src/arc/resilience/index.ts +0 -21
- package/src/arc/resilience/pipeline.ts +0 -103
- package/src/arc/resilience/retry.ts +0 -90
- package/src/arc/resilience/timeout.ts +0 -60
- package/src/arc/resilience/types.ts +0 -71
- package/src/arc/sig.ts +0 -115
- package/src/arc/skill-resolver.ts +0 -78
- package/src/arc/stores/episode-store.ts +0 -120
- package/src/arc/stores/long-term-store.ts +0 -86
- package/src/arc/stores/rxdb-setup.ts +0 -113
- package/src/arc/stores/session-memo-store.ts +0 -58
- package/src/arc/tools.ts +0 -67
- package/src/arc/types.ts +0 -324
- package/src/arc/utils.ts +0 -19
- package/testing/index.ts +0 -22
- package/testing/scenario-replay.ts +0 -209
- package/testing/scenario-types.ts +0 -38
- package/testing/scripted-llm.ts +0 -230
- package/tests/arc/channel.test.ts +0 -170
- package/tests/arc/context-window.test.ts +0 -396
- package/tests/arc/e2e.test.ts +0 -353
- package/tests/arc/error-paths.test.ts +0 -402
- package/tests/arc/live-integration.test.ts +0 -357
- package/tests/arc/memory-manager.test.ts +0 -384
- package/tests/arc/process-interleaving.test.ts +0 -432
- package/tests/arc/process-profiles.test.ts +0 -364
- package/tests/arc/resilience-integration.test.ts +0 -381
- package/tests/arc/resilience.test.ts +0 -575
- package/tests/arc/scenario-driven.test.ts +0 -297
- package/tests/arc/tool-dispatch.test.ts +0 -340
- package/tests/arc/wasm-pbt.test.ts +0 -104
- package/verify/Cargo.lock +0 -637
- package/verify/Cargo.toml +0 -24
- package/verify/src/lib.rs +0 -5
- package/verify/src/main.rs +0 -165
- package/verify/src/model/context.rs +0 -100
- package/verify/src/model/mod.rs +0 -6
- package/verify/src/model/orchestrator.rs +0 -371
- package/verify/src/model/process.rs +0 -140
- package/verify/src/model/types.rs +0 -273
- package/verify/src/properties/liveness.rs +0 -32
- package/verify/src/properties/mod.rs +0 -4
- package/verify/src/properties/safety.rs +0 -78
- package/verify/src/trace/event.rs +0 -155
- package/verify/src/trace/mod.rs +0 -2
- package/verify/src/trace/validator.rs +0 -367
- package/verify/src/wasm/mod.rs +0 -3
- package/verify/src/wasm/scenario_generator.rs +0 -400
- package/verify/src/wasm/types.rs +0 -104
- package/verify/src/wasm/wasm_validator.rs +0 -107
- package/verify/tests/model_check.rs +0 -49
- package/verify/tests/trace_validation.rs +0 -147
package/README.md
CHANGED
|
@@ -2,17 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
The harness provides the core loop that drives an AI agent: send messages to an LLM, execute the tool calls it returns, feed results back, and repeat until the LLM produces a final text response.
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
## Install
|
|
10
|
-
|
|
11
|
-
```bash
|
|
12
|
-
pnpm add @bluecopa/harness
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
## Development
|
|
7
|
+
## Quickstart
|
|
16
8
|
|
|
17
9
|
```bash
|
|
18
10
|
pnpm install
|
|
@@ -21,11 +13,9 @@ pnpm test
|
|
|
21
13
|
|
|
22
14
|
## Architecture
|
|
23
15
|
|
|
24
|
-
### Single-Agent Loop
|
|
25
|
-
|
|
26
16
|
```
|
|
27
17
|
┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
|
|
28
|
-
│ createAgent
|
|
18
|
+
│ createAgent │────▶│ AgentLoop │────▶│ LLM (Claude) │
|
|
29
19
|
│ (turn loop) │ │ (nextAction)│ │ │
|
|
30
20
|
└──────┬───────┘ └──────────────┘ └──────────────────┘
|
|
31
21
|
│ │
|
|
@@ -37,82 +27,20 @@ pnpm test
|
|
|
37
27
|
└──────────────┘
|
|
38
28
|
```
|
|
39
29
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
│ tools: Thread, Check, Cancel, Remember, ReadEpisode
|
|
45
|
-
│
|
|
46
|
-
│ Turn 1 (parallel):
|
|
47
|
-
├──► Process 0 ("read auth", model=fast) ─┐
|
|
48
|
-
├──► Process 1 ("read routes", model=fast) ─┼──► Episodes
|
|
49
|
-
├──► Process 2 ("read tests", model=fast) ─┘
|
|
50
|
-
│
|
|
51
|
-
│ Turn 2 (dispatch dependent work):
|
|
52
|
-
├──► Thread("fix bug", context=[ep0,ep1,ep2]) ──► Episode
|
|
53
|
-
│
|
|
54
|
-
│ Turn 3 (parallel):
|
|
55
|
-
├──► Thread("run tests", context=[ep3]) ─┐
|
|
56
|
-
├──► Thread("update docs", context=[ep3]) ─┘
|
|
57
|
-
│
|
|
58
|
-
└──► Final text response
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
Full architecture doc: [`docs/arc.md`](../docs/arc.md)
|
|
62
|
-
|
|
63
|
-
---
|
|
64
|
-
|
|
65
|
-
## ToolProvider
|
|
66
|
-
|
|
67
|
-
The contract for tool execution. All agent modes use this interface.
|
|
68
|
-
|
|
69
|
-
```typescript
|
|
70
|
-
interface ToolProvider {
|
|
71
|
-
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
72
|
-
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
73
|
-
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
74
|
-
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
75
|
-
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
76
|
-
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
77
|
-
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
78
|
-
webSearch?(query: string): Promise<ToolResult>;
|
|
79
|
-
capabilities(): ToolProviderCapabilities;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
interface ToolResult {
|
|
83
|
-
success: boolean;
|
|
84
|
-
output: string;
|
|
85
|
-
error?: string;
|
|
86
|
-
}
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
Built-in implementations:
|
|
30
|
+
1. `createAgent` drives a deterministic step loop
|
|
31
|
+
2. Each step calls `loop.nextAction(messages)` to get the LLM's decision
|
|
32
|
+
3. If it's a tool call, the harness executes it via `ToolProvider` and appends the result
|
|
33
|
+
4. If it's a final action, the loop ends and returns the result
|
|
90
34
|
|
|
91
|
-
|
|
92
|
-
|----------|-------------|
|
|
93
|
-
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
94
|
-
| `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
|
|
95
|
-
| `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
|
|
35
|
+
## Using with the sandbox
|
|
96
36
|
|
|
97
|
-
|
|
37
|
+
The most common setup connects the harness to a running sandbox service via `ControlPlaneE2BExecutor`:
|
|
98
38
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
```typescript
|
|
102
|
-
interface SandboxProvider {
|
|
103
|
-
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
104
|
-
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
105
|
-
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
106
|
-
}
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
Used by `SkillManager` for executing skill scripts in isolated VMs.
|
|
110
|
-
|
|
111
|
-
## Connecting to a Sandbox
|
|
112
|
-
|
|
113
|
-
```typescript
|
|
114
|
-
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
39
|
+
```ts
|
|
40
|
+
import { createAgent } from './src/agent/create-agent';
|
|
115
41
|
import { E2BToolProvider } from './src/providers/e2b-tool-provider';
|
|
42
|
+
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
43
|
+
import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
|
|
116
44
|
|
|
117
45
|
// Connect to sandbox service
|
|
118
46
|
const executor = new ControlPlaneE2BExecutor({
|
|
@@ -122,174 +50,155 @@ const executor = new ControlPlaneE2BExecutor({
|
|
|
122
50
|
});
|
|
123
51
|
await executor.initialize(); // creates a Firecracker VM
|
|
124
52
|
|
|
125
|
-
|
|
53
|
+
// Build and run the agent
|
|
54
|
+
const agent = createAgent({
|
|
55
|
+
toolProvider: new E2BToolProvider(executor),
|
|
56
|
+
loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
|
|
57
|
+
});
|
|
126
58
|
|
|
127
|
-
|
|
59
|
+
const result = await agent.run('create a bar chart of sales data');
|
|
60
|
+
console.log(result.output); // LLM's final response
|
|
61
|
+
console.log(result.steps); // number of tool steps
|
|
128
62
|
|
|
129
|
-
await executor.destroy();
|
|
63
|
+
await executor.destroy(); // tears down the VM
|
|
130
64
|
```
|
|
131
65
|
|
|
132
|
-
|
|
66
|
+
For a complete working example, see [`examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts).
|
|
67
|
+
|
|
68
|
+
### From environment variables
|
|
133
69
|
|
|
134
|
-
|
|
70
|
+
`ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY` automatically:
|
|
135
71
|
|
|
136
|
-
|
|
72
|
+
```ts
|
|
73
|
+
const executor = ControlPlaneE2BExecutor.fromEnv();
|
|
74
|
+
```
|
|
137
75
|
|
|
138
|
-
|
|
76
|
+
## Using locally (no sandbox)
|
|
139
77
|
|
|
140
|
-
|
|
78
|
+
For development without a sandbox service, use `LocalToolProvider` which runs tools on the local machine:
|
|
79
|
+
|
|
80
|
+
```ts
|
|
141
81
|
import { createAgent } from './src/agent/create-agent';
|
|
142
82
|
import { LocalToolProvider } from './src/providers/local-tool-provider';
|
|
143
83
|
|
|
144
84
|
const agent = createAgent({
|
|
145
85
|
toolProvider: new LocalToolProvider(process.cwd()),
|
|
146
|
-
loop: new VercelAgentLoop(),
|
|
86
|
+
loop: new VercelAgentLoop(),
|
|
147
87
|
});
|
|
148
88
|
|
|
149
89
|
const result = await agent.run('list all TypeScript files');
|
|
150
|
-
console.log(result.output);
|
|
151
90
|
```
|
|
152
91
|
|
|
153
|
-
|
|
92
|
+
## Key modules
|
|
93
|
+
|
|
94
|
+
### Agent creation (`src/agent/create-agent.ts`)
|
|
154
95
|
|
|
155
|
-
|
|
156
|
-
|--------|------|---------|-------------|
|
|
157
|
-
| `toolProvider` | `ToolProvider` | required | Executes tool calls |
|
|
158
|
-
| `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
|
|
159
|
-
| `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
|
|
160
|
-
| `maxSteps` | `number` | 30 | Max tool steps per run |
|
|
161
|
-
| `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
|
|
162
|
-
| `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
|
|
96
|
+
`createAgent(options)` returns an agent with a `.run(prompt, options?)` method. Options:
|
|
163
97
|
|
|
164
|
-
|
|
98
|
+
| Option | Type | Description |
|
|
99
|
+
|--------|------|-------------|
|
|
100
|
+
| `toolProvider` | `ToolProvider` | Required. Executes tool calls |
|
|
101
|
+
| `loop` | `AgentLoop` | LLM decision loop (default: `VercelAgentLoop`) |
|
|
102
|
+
| `sandboxProvider` | `SandboxProvider` | Optional. Higher-level sandbox ops (file download, exec with env) |
|
|
103
|
+
| `maxSteps` | `number` | Max tool steps per run (default: 30) |
|
|
104
|
+
| `telemetry` | `HarnessTelemetry` | Optional. OpenTelemetry-style tracing |
|
|
105
|
+
| `skillIndexPath` | `string` | Optional. Path to skill index JSON |
|
|
165
106
|
|
|
166
|
-
|
|
107
|
+
### Agent loop (`src/loop/vercel-agent-loop.ts`)
|
|
167
108
|
|
|
168
|
-
|
|
109
|
+
`VercelAgentLoop` calls Claude via the Vercel AI SDK. It supports:
|
|
110
|
+
- Parallel tool calls (returns `ToolBatchAction` when the LLM requests multiple tools at once)
|
|
111
|
+
- Configurable system prompt
|
|
112
|
+
- Model selection via `HARNESS_MODEL` env var (default: `claude-sonnet-4-5`)
|
|
113
|
+
|
|
114
|
+
```ts
|
|
169
115
|
const loop = new VercelAgentLoop({
|
|
170
116
|
systemPrompt: 'You are a helpful coding assistant.',
|
|
171
|
-
model: 'claude-sonnet-4-5', // or HARNESS_MODEL env var
|
|
172
117
|
});
|
|
173
118
|
```
|
|
174
119
|
|
|
175
|
-
###
|
|
120
|
+
### Tool provider (`src/interfaces/tool-provider.ts`)
|
|
176
121
|
|
|
177
|
-
|
|
122
|
+
The contract for tool execution:
|
|
178
123
|
|
|
179
|
-
```
|
|
180
|
-
|
|
181
|
-
|
|
124
|
+
```ts
|
|
125
|
+
interface ToolProvider {
|
|
126
|
+
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
127
|
+
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
128
|
+
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
129
|
+
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
130
|
+
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
131
|
+
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
132
|
+
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
133
|
+
webSearch?(query: string): Promise<ToolResult>;
|
|
134
|
+
capabilities(): ToolProviderCapabilities;
|
|
135
|
+
}
|
|
182
136
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
onActivity: (entry) => console.log(entry),
|
|
189
|
-
onLlmRequest: async (prompt) => callLLM(prompt),
|
|
190
|
-
onWebFetchRequest: async (url) => fetch(url),
|
|
191
|
-
});
|
|
137
|
+
interface ToolResult {
|
|
138
|
+
success: boolean;
|
|
139
|
+
output: string;
|
|
140
|
+
error?: string;
|
|
141
|
+
}
|
|
192
142
|
```
|
|
193
143
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
**REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
|
|
197
|
-
|
|
198
|
-
---
|
|
144
|
+
Built-in implementations:
|
|
199
145
|
|
|
200
|
-
|
|
146
|
+
| Provider | Description |
|
|
147
|
+
|----------|-------------|
|
|
148
|
+
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
149
|
+
| `E2BToolProvider` | Routes tools to an E2B-compatible executor over HTTP |
|
|
150
|
+
| `CompositeToolProvider` | Combines multiple providers (e.g. sandbox + web) |
|
|
201
151
|
|
|
202
|
-
|
|
152
|
+
### Action types (`src/agent/types.ts`)
|
|
203
153
|
|
|
204
|
-
|
|
205
|
-
import { createArcAgent } from './src/arc/create-arc-agent';
|
|
154
|
+
The LLM returns one of these action types each turn:
|
|
206
155
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
});
|
|
156
|
+
```ts
|
|
157
|
+
// Single tool call
|
|
158
|
+
interface ToolCallAction {
|
|
159
|
+
type: 'tool';
|
|
160
|
+
name: 'Bash' | 'Read' | 'Write' | 'Edit' | 'Glob' | 'Grep' | ...;
|
|
161
|
+
args: Record<string, unknown>;
|
|
162
|
+
}
|
|
215
163
|
|
|
216
|
-
//
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
|
|
164
|
+
// Multiple independent tool calls (executed in parallel)
|
|
165
|
+
interface ToolBatchAction {
|
|
166
|
+
type: 'tool_batch';
|
|
167
|
+
calls: ToolCallAction[];
|
|
221
168
|
}
|
|
222
169
|
|
|
223
|
-
//
|
|
224
|
-
|
|
170
|
+
// Final text response (ends the loop)
|
|
171
|
+
interface FinalAction {
|
|
172
|
+
type: 'final';
|
|
173
|
+
content: string;
|
|
174
|
+
}
|
|
225
175
|
```
|
|
226
176
|
|
|
227
|
-
###
|
|
228
|
-
|
|
229
|
-
| Option | Type | Default | Description |
|
|
230
|
-
|--------|------|---------|-------------|
|
|
231
|
-
| `model` | `string` | `'claude-opus-4-6'` | Orchestrator model (ID or tier name) |
|
|
232
|
-
| `modelMap` | `Record<ModelTier, string>` | haiku/sonnet/opus | Maps fast/medium/strong to model IDs |
|
|
233
|
-
| `apiKey` | `string` | — | Anthropic API key |
|
|
234
|
-
| `systemPrompt` | `string` | built-in | Custom orchestrator system prompt |
|
|
235
|
-
| `maxTurns` | `number` | 30 | Max orchestrator turns |
|
|
236
|
-
| `processTimeout` | `number` | 120_000 | Per-process timeout (ms) |
|
|
237
|
-
| `processMaxSteps` | `number` | 20 | Per-process max tool steps |
|
|
238
|
-
| `contextWindowSize` | `number` | 200_000 | Context window in tokens |
|
|
239
|
-
| `outputReserve` | `number` | 20_000 | Tokens reserved for output |
|
|
240
|
-
| `autoMemory` | `boolean` | true | Auto-detect patterns from episodes |
|
|
241
|
-
| `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
|
|
242
|
-
| `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
|
|
243
|
-
| `longTermStore` | `LongTermStore` | required | Stores long-term memories |
|
|
244
|
-
| `taskId` | `string` | required | Task identifier |
|
|
245
|
-
| `sessionId` | `string` | required | Session identifier |
|
|
246
|
-
| `toolProvider` | `ToolProvider` | required | Tool execution |
|
|
247
|
-
| `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
|
|
248
|
-
| `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
|
|
249
|
-
| `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
|
|
250
|
-
| `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
|
|
251
|
-
| `traceWriter` | `function` | — | Callback for trace event emission |
|
|
252
|
-
|
|
253
|
-
### Resilience
|
|
254
|
-
|
|
255
|
-
```typescript
|
|
256
|
-
import { resilience } from './src/arc/resilience';
|
|
257
|
-
|
|
258
|
-
const pipeline = resilience()
|
|
259
|
-
.retry({ maxRetries: 2, baseDelay: 1000 })
|
|
260
|
-
.timeout({ durationMs: 30_000 })
|
|
261
|
-
.circuitBreaker({ failureThreshold: 5 })
|
|
262
|
-
.build();
|
|
263
|
-
|
|
264
|
-
const agent = await createArcAgent({
|
|
265
|
-
// ...config
|
|
266
|
-
resilience: pipeline,
|
|
267
|
-
});
|
|
268
|
-
```
|
|
177
|
+
### LCM tool loop (`src/loop/lcm-tool-loop.ts`)
|
|
269
178
|
|
|
270
|
-
|
|
179
|
+
`LCMToolLoop` wraps another loop to add LCM-based tool routing, REPL script execution, and bridge-based tool dispatch. Used in the chat-assistant example.
|
|
271
180
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
181
|
+
### Sandbox provider (`src/interfaces/sandbox-provider.ts`)
|
|
182
|
+
|
|
183
|
+
Higher-level sandbox operations beyond basic tool calls:
|
|
184
|
+
|
|
185
|
+
```ts
|
|
186
|
+
interface SandboxProvider {
|
|
187
|
+
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
188
|
+
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
189
|
+
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
190
|
+
}
|
|
278
191
|
```
|
|
279
192
|
|
|
280
|
-
|
|
193
|
+
### Observability (`src/observability/otel.ts`)
|
|
281
194
|
|
|
282
|
-
|
|
195
|
+
`HarnessTelemetry` provides OpenTelemetry-style spans and metrics for agent runs.
|
|
283
196
|
|
|
284
|
-
## Package
|
|
197
|
+
## Package layout
|
|
285
198
|
|
|
286
199
|
```
|
|
287
200
|
src/
|
|
288
201
|
├── agent/ # createAgent, step executor, types
|
|
289
|
-
├── arc/ # ArcLoop orchestrator, processes, memory, resilience
|
|
290
|
-
│ ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
|
|
291
|
-
│ ├── stores/ # RxDB + in-memory store implementations
|
|
292
|
-
│ └── object-store/ # Pluggable cloud sync (fs, memory)
|
|
293
202
|
├── interfaces/ # ToolProvider, SandboxProvider, AgentLoop contracts
|
|
294
203
|
├── loop/ # VercelAgentLoop, LCMToolLoop
|
|
295
204
|
├── providers/ # LocalToolProvider, E2BToolProvider, ControlPlaneE2BExecutor
|
|
@@ -297,20 +206,16 @@ src/
|
|
|
297
206
|
├── hooks/ # Pre/post tool call hooks
|
|
298
207
|
├── permissions/ # Tool permission checks
|
|
299
208
|
├── sessions/ # Session persistence
|
|
300
|
-
├── subagents/ # Subagent spawning
|
|
209
|
+
├── subagents/ # Subagent spawning and task tools
|
|
301
210
|
├── skills/ # Skill index, routing, and management
|
|
302
211
|
├── optimization/ # Benchmark runner
|
|
303
212
|
└── observability/ # OpenTelemetry integration
|
|
304
|
-
|
|
305
|
-
verify/ # Rust formal verification (Stateright model checker)
|
|
306
|
-
testing/ # Adversarial scenario replay harness
|
|
307
|
-
tests/ # Vitest test suite
|
|
308
213
|
```
|
|
309
214
|
|
|
310
215
|
## Documentation
|
|
311
216
|
|
|
312
|
-
-
|
|
313
|
-
-
|
|
314
|
-
-
|
|
315
|
-
-
|
|
316
|
-
- [
|
|
217
|
+
- Provider guide: `docs/guides/providers.md`
|
|
218
|
+
- Skills guide: `docs/guides/skills.md`
|
|
219
|
+
- Observability guide: `docs/guides/observability.md`
|
|
220
|
+
- Release process: `../docs/RELEASE.md`
|
|
221
|
+
- Full example: [`../examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bluecopa/harness",
|
|
3
|
-
"version": "0.1.0-snapshot.
|
|
3
|
+
"version": "0.1.0-snapshot.4",
|
|
4
4
|
"description": "Provider-agnostic TypeScript agent framework",
|
|
5
5
|
"license": "UNLICENSED",
|
|
6
6
|
"scripts": {
|
|
@@ -10,7 +10,6 @@
|
|
|
10
10
|
"dependencies": {
|
|
11
11
|
"@ai-sdk/anthropic": "^3.0.48",
|
|
12
12
|
"ai": "^6.0.101",
|
|
13
|
-
"rxdb": "^15.39.0",
|
|
14
13
|
"zod": "^4.1.11"
|
|
15
14
|
},
|
|
16
15
|
"devDependencies": {
|
|
@@ -37,8 +37,6 @@ export interface AgentRuntime {
|
|
|
37
37
|
/** Custom tool executor. Called for every tool action. Return null to fall through to built-in dispatch.
|
|
38
38
|
* When hookRunner/permissionManager are provided on the runtime, they are automatically applied before/after this callback — no manual wiring needed. */
|
|
39
39
|
executeToolAction?: (action: ToolCallAction) => Promise<ToolResult | null>;
|
|
40
|
-
/** Progress callback fired before/after each tool call during run(). */
|
|
41
|
-
onToolProgress?: (event: { type: 'tool_start'; name: string; args: Record<string, unknown> } | { type: 'tool_end'; name: string; success: boolean; durationMs: number }) => void;
|
|
42
40
|
}
|
|
43
41
|
|
|
44
42
|
/**
|
|
@@ -598,14 +596,10 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
598
596
|
|
|
599
597
|
// Execute valid calls via batch (sequential sandbox ops) or parallel fallback
|
|
600
598
|
if (validCalls.length > 0) {
|
|
601
|
-
for (const c of validCalls) runtime.onToolProgress?.({ type: 'tool_start', name: c.name, args: c.args });
|
|
602
|
-
const batchStart = Date.now();
|
|
603
599
|
const results = await executeBatch(validCalls, runtime.toolProvider, runtime);
|
|
604
|
-
const batchMs = Date.now() - batchStart;
|
|
605
600
|
for (let i = 0; i < validCalls.length; i++) {
|
|
606
601
|
const call = validCalls[i]!;
|
|
607
602
|
const r = results[i]!;
|
|
608
|
-
runtime.onToolProgress?.({ type: 'tool_end', name: call.name, success: r.success, durationMs: batchMs });
|
|
609
603
|
if (!r.success) {
|
|
610
604
|
recordAgentError(runtime.telemetry);
|
|
611
605
|
}
|
|
@@ -665,8 +659,6 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
665
659
|
} else {
|
|
666
660
|
consecutiveInvalid = 0;
|
|
667
661
|
}
|
|
668
|
-
runtime.onToolProgress?.({ type: 'tool_start', name: action.name, args: action.args });
|
|
669
|
-
const singleStart = Date.now();
|
|
670
662
|
const result = validationError
|
|
671
663
|
? ({ success: false, output: '', error: validationError } as ToolResult)
|
|
672
664
|
: await executor.run(async () => {
|
|
@@ -680,7 +672,6 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
680
672
|
};
|
|
681
673
|
}
|
|
682
674
|
});
|
|
683
|
-
runtime.onToolProgress?.({ type: 'tool_end', name: action.name, success: result.success, durationMs: Date.now() - singleStart });
|
|
684
675
|
if (!result.success) {
|
|
685
676
|
recordAgentError(runtime.telemetry);
|
|
686
677
|
}
|
|
@@ -727,7 +718,8 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
727
718
|
if (event.type === 'text_delta') {
|
|
728
719
|
finalText += event.text;
|
|
729
720
|
yield event;
|
|
730
|
-
}
|
|
721
|
+
}
|
|
722
|
+
if (event.type === 'tool_start') {
|
|
731
723
|
pendingTools.push({
|
|
732
724
|
type: 'tool',
|
|
733
725
|
name: event.name,
|
|
@@ -735,11 +727,6 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
735
727
|
...(event.toolCallId != null ? { toolCallId: event.toolCallId } : {}),
|
|
736
728
|
});
|
|
737
729
|
yield event;
|
|
738
|
-
} else {
|
|
739
|
-
// Forward all other events (tool_end, step_start, step_end, done)
|
|
740
|
-
// from self-managing loops like ArcLoop
|
|
741
|
-
yield event;
|
|
742
|
-
if (event.type === 'done') return;
|
|
743
730
|
}
|
|
744
731
|
}
|
|
745
732
|
|
package/src/agent/types.ts
CHANGED
|
@@ -11,26 +11,13 @@ export interface ToolResultInfo {
|
|
|
11
11
|
isError?: boolean;
|
|
12
12
|
}
|
|
13
13
|
|
|
14
|
-
export type ContentPart =
|
|
15
|
-
| { type: 'text'; text: string }
|
|
16
|
-
| { type: 'image'; image: Buffer | Uint8Array; mimeType: string };
|
|
17
|
-
|
|
18
14
|
export interface AgentMessage {
|
|
19
15
|
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
20
|
-
content: string
|
|
16
|
+
content: string;
|
|
21
17
|
toolCalls?: ToolCallInfo[]; // assistant messages: what tools were called
|
|
22
18
|
toolResults?: ToolResultInfo[]; // tool messages: results keyed by toolCallId
|
|
23
19
|
}
|
|
24
20
|
|
|
25
|
-
/** Extract plain text from content (string or ContentPart[]). */
|
|
26
|
-
export function getTextContent(content: string | ContentPart[]): string {
|
|
27
|
-
if (typeof content === 'string') return content;
|
|
28
|
-
return content
|
|
29
|
-
.filter((p): p is Extract<ContentPart, { type: 'text' }> => p.type === 'text')
|
|
30
|
-
.map((p) => p.text)
|
|
31
|
-
.join('\n');
|
|
32
|
-
}
|
|
33
|
-
|
|
34
21
|
export interface ToolCallAction {
|
|
35
22
|
type: 'tool';
|
|
36
23
|
name: string;
|
|
@@ -59,7 +46,7 @@ export interface AgentRunResult {
|
|
|
59
46
|
export type AgentStreamEvent =
|
|
60
47
|
| { type: 'text_delta'; text: string }
|
|
61
48
|
| { type: 'tool_start'; name: string; args: Record<string, unknown>; toolCallId?: string }
|
|
62
|
-
| { type: 'tool_end'; name: string; result: { success: boolean; output: string; error?: string
|
|
49
|
+
| { type: 'tool_end'; name: string; result: { success: boolean; output: string; error?: string } }
|
|
63
50
|
| { type: 'step_start'; step: number }
|
|
64
51
|
| { type: 'step_end'; step: number }
|
|
65
52
|
| { type: 'done'; output: string; steps: number };
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import type { AgentMessage } from '../agent/types';
|
|
2
|
-
import { getTextContent } from '../agent/types';
|
|
3
2
|
|
|
4
3
|
export interface ContextStoreConfig {
|
|
5
4
|
/** Maximum token budget for the trimmed view. Default: 150_000 */
|
|
@@ -92,7 +91,7 @@ export class LosslessContextStore {
|
|
|
92
91
|
for (let i = 0; i < this.raw.length; i++) {
|
|
93
92
|
const existing = this.raw[i]!;
|
|
94
93
|
const candidate = incoming[i]!;
|
|
95
|
-
if (existing.role !== candidate.role ||
|
|
94
|
+
if (existing.role !== candidate.role || existing.content !== candidate.content) {
|
|
96
95
|
return false;
|
|
97
96
|
}
|
|
98
97
|
}
|
|
@@ -102,7 +101,7 @@ export class LosslessContextStore {
|
|
|
102
101
|
private estimateTokensFor(msgs: AgentMessage[]): number {
|
|
103
102
|
let chars = 0;
|
|
104
103
|
for (const m of msgs) {
|
|
105
|
-
chars +=
|
|
104
|
+
chars += m.content.length + m.role.length + 4; // role + separators
|
|
106
105
|
}
|
|
107
106
|
return Math.ceil(chars / CHARS_PER_TOKEN);
|
|
108
107
|
}
|
|
@@ -131,10 +130,9 @@ export class LosslessContextStore {
|
|
|
131
130
|
for (const m of hotZone) {
|
|
132
131
|
// Tool results are formatted as "ToolName: output" by create-agent
|
|
133
132
|
if (m.role === 'tool') {
|
|
134
|
-
const
|
|
135
|
-
const colonIdx = text.indexOf(':');
|
|
133
|
+
const colonIdx = m.content.indexOf(':');
|
|
136
134
|
if (colonIdx > 0) {
|
|
137
|
-
liveToolIds.add(
|
|
135
|
+
liveToolIds.add(m.content.slice(0, colonIdx));
|
|
138
136
|
}
|
|
139
137
|
}
|
|
140
138
|
}
|
|
@@ -166,10 +164,9 @@ export class LosslessContextStore {
|
|
|
166
164
|
|
|
167
165
|
// Tool results in cold zone
|
|
168
166
|
if (m.role === 'tool') {
|
|
169
|
-
const
|
|
170
|
-
const
|
|
171
|
-
const
|
|
172
|
-
const toolOutput = colonIdx > 0 ? rawText.slice(colonIdx + 2) : rawText;
|
|
167
|
+
const colonIdx = m.content.indexOf(':');
|
|
168
|
+
const toolName = colonIdx > 0 ? m.content.slice(0, colonIdx) : '';
|
|
169
|
+
const toolOutput = colonIdx > 0 ? m.content.slice(colonIdx + 2) : m.content;
|
|
173
170
|
|
|
174
171
|
// Pass 3a: drop orphaned tool results (tool not referenced in hot zone and output is large)
|
|
175
172
|
if (!liveToolIds.has(toolName) && toolOutput.length > this.stubThreshold * 2) {
|
|
@@ -177,7 +174,7 @@ export class LosslessContextStore {
|
|
|
177
174
|
}
|
|
178
175
|
|
|
179
176
|
// Pass 2: stub large tool outputs
|
|
180
|
-
let content =
|
|
177
|
+
let content = m.content;
|
|
181
178
|
if (toolOutput.length > this.stubThreshold) {
|
|
182
179
|
content = `${toolName}: [output truncated: ${toolOutput.length} chars]`;
|
|
183
180
|
}
|
|
@@ -197,7 +194,7 @@ export class LosslessContextStore {
|
|
|
197
194
|
}
|
|
198
195
|
|
|
199
196
|
private estimateMessageTokens(m: AgentMessage): number {
|
|
200
|
-
return Math.ceil((
|
|
197
|
+
return Math.ceil((m.content.length + m.role.length + 4) / CHARS_PER_TOKEN);
|
|
201
198
|
}
|
|
202
199
|
|
|
203
200
|
private stripMechanicalOverhead(content: string): string {
|