@bluecopa/harness 0.1.0-snapshot.5 → 0.1.0-snapshot.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +212 -117
- package/package.json +2 -1
- package/src/agent/create-agent.ts +30 -27
- package/src/agent/types.ts +20 -24
- package/src/arc/agent-runner.ts +955 -0
- package/src/arc/arc-loop.ts +845 -0
- package/src/arc/arc-types.ts +115 -0
- package/src/arc/bridge-tools.ts +170 -0
- package/src/arc/bridged-tool-provider.ts +80 -0
- package/src/arc/consolidation.ts +118 -0
- package/src/arc/context-window.ts +267 -0
- package/src/arc/create-arc-agent.ts +99 -0
- package/src/arc/debug.ts +62 -0
- package/src/arc/episode-compressor.ts +225 -0
- package/src/arc/memory-manager.ts +245 -0
- package/src/arc/message-convert.ts +123 -0
- package/src/arc/multi-model.ts +70 -0
- package/src/arc/object-store/fs-object-store.ts +60 -0
- package/src/arc/object-store/memory-object-store.ts +41 -0
- package/src/arc/object-store/object-store.ts +12 -0
- package/src/arc/profile-builder.ts +172 -0
- package/src/arc/resilience/bulkhead.ts +110 -0
- package/src/arc/resilience/circuit-breaker.ts +112 -0
- package/src/arc/resilience/fallback.ts +27 -0
- package/src/arc/resilience/index.ts +21 -0
- package/src/arc/resilience/pipeline.ts +103 -0
- package/src/arc/resilience/retry.ts +90 -0
- package/src/arc/resilience/timeout.ts +60 -0
- package/src/arc/resilience/types.ts +71 -0
- package/src/arc/result-pager.ts +77 -0
- package/src/arc/sig.ts +115 -0
- package/src/arc/skill-resolver.ts +81 -0
- package/src/arc/stores/episode-store.ts +120 -0
- package/src/arc/stores/long-term-store.ts +86 -0
- package/src/arc/stores/rxdb-setup.ts +113 -0
- package/src/arc/stores/session-memo-store.ts +58 -0
- package/src/arc/tools.ts +67 -0
- package/src/arc/types.ts +363 -0
- package/src/arc/utils.ts +37 -0
- package/src/hooks/middleware.ts +95 -0
- package/src/interfaces/hooks.ts +2 -1
- package/src/interfaces/tool-provider.ts +0 -2
- package/src/loop/context-store.ts +12 -9
- package/src/loop/vercel-agent-loop.ts +44 -118
- package/src/skills/skill-router.ts +12 -6
- package/testing/index.ts +22 -0
- package/testing/scenario-replay.ts +209 -0
- package/testing/scenario-types.ts +38 -0
- package/testing/scripted-llm.ts +230 -0
- package/tests/arc/channel.test.ts +170 -0
- package/tests/arc/context-window.test.ts +396 -0
- package/tests/arc/e2e.test.ts +353 -0
- package/tests/arc/error-paths.test.ts +402 -0
- package/tests/arc/live-integration.test.ts +357 -0
- package/tests/arc/memory-manager.test.ts +384 -0
- package/tests/arc/middleware.test.ts +113 -0
- package/tests/arc/process-interleaving.test.ts +432 -0
- package/tests/arc/process-profiles.test.ts +366 -0
- package/tests/arc/resilience-integration.test.ts +381 -0
- package/tests/arc/resilience.test.ts +575 -0
- package/tests/arc/result-paging.test.ts +392 -0
- package/tests/arc/scenario-driven.test.ts +297 -0
- package/tests/arc/tool-dispatch.test.ts +340 -0
- package/tests/arc/wasm-pbt.test.ts +104 -0
- package/tests/integration/agent-skill-default-from-sandbox.spec.ts +3 -2
- package/tests/unit/structured-messages.spec.ts +1 -1
- package/verify/Cargo.lock +637 -0
- package/verify/Cargo.toml +24 -0
- package/verify/src/lib.rs +5 -0
- package/verify/src/main.rs +165 -0
- package/verify/src/model/context.rs +100 -0
- package/verify/src/model/mod.rs +6 -0
- package/verify/src/model/orchestrator.rs +371 -0
- package/verify/src/model/process.rs +140 -0
- package/verify/src/model/types.rs +273 -0
- package/verify/src/properties/liveness.rs +32 -0
- package/verify/src/properties/mod.rs +4 -0
- package/verify/src/properties/safety.rs +78 -0
- package/verify/src/trace/event.rs +155 -0
- package/verify/src/trace/mod.rs +2 -0
- package/verify/src/trace/validator.rs +367 -0
- package/verify/src/wasm/mod.rs +3 -0
- package/verify/src/wasm/scenario_generator.rs +400 -0
- package/verify/src/wasm/types.rs +104 -0
- package/verify/src/wasm/wasm_validator.rs +107 -0
- package/verify/tests/model_check.rs +49 -0
- package/verify/tests/trace_validation.rs +147 -0
- package/vitest.config.ts +1 -1
package/README.md
CHANGED
|
@@ -2,9 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Published on npm as **`@bluecopa/harness`**.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
Two execution modes: a simple single-agent loop (`createAgent` + `VercelAgentLoop`) and a process-based orchestrator (`ArcLoop`) that dispatches parallel processes with context management, memory, and resilience.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pnpm add @bluecopa/harness
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Development
|
|
8
16
|
|
|
9
17
|
```bash
|
|
10
18
|
pnpm install
|
|
@@ -13,9 +21,11 @@ pnpm test
|
|
|
13
21
|
|
|
14
22
|
## Architecture
|
|
15
23
|
|
|
24
|
+
### Single-Agent Loop
|
|
25
|
+
|
|
16
26
|
```
|
|
17
27
|
┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
|
|
18
|
-
│ createAgent
|
|
28
|
+
│ createAgent │────►│ AgentLoop │────►│ LLM (Claude) │
|
|
19
29
|
│ (turn loop) │ │ (nextAction)│ │ │
|
|
20
30
|
└──────┬───────┘ └──────────────┘ └──────────────────┘
|
|
21
31
|
│ │
|
|
@@ -27,20 +37,82 @@ pnpm test
|
|
|
27
37
|
└──────────────┘
|
|
28
38
|
```
|
|
29
39
|
|
|
30
|
-
|
|
31
|
-
2. Each step calls `loop.nextAction(messages)` to get the LLM's decision
|
|
32
|
-
3. If it's a tool call, the harness executes it via `ToolProvider` and appends the result
|
|
33
|
-
4. If it's a final action, the loop ends and returns the result
|
|
40
|
+
### ArcLoop Orchestrator
|
|
34
41
|
|
|
35
|
-
|
|
42
|
+
```
|
|
43
|
+
Orchestrator (ArcLoop — Opus 4.6 by default)
|
|
44
|
+
│ tools: Thread, Check, Cancel, Remember, ReadEpisode
|
|
45
|
+
│
|
|
46
|
+
│ Turn 1 (parallel):
|
|
47
|
+
├──► Process 0 ("read auth", model=fast) ─┐
|
|
48
|
+
├──► Process 1 ("read routes", model=fast) ─┼──► Episodes
|
|
49
|
+
├──► Process 2 ("read tests", model=fast) ─┘
|
|
50
|
+
│
|
|
51
|
+
│ Turn 2 (dispatch dependent work):
|
|
52
|
+
├──► Thread("fix bug", context=[ep0,ep1,ep2]) ──► Episode
|
|
53
|
+
│
|
|
54
|
+
│ Turn 3 (parallel):
|
|
55
|
+
├──► Thread("run tests", context=[ep3]) ─┐
|
|
56
|
+
├──► Thread("update docs", context=[ep3]) ─┘
|
|
57
|
+
│
|
|
58
|
+
└──► Final text response
|
|
59
|
+
```
|
|
36
60
|
|
|
37
|
-
|
|
61
|
+
Full architecture doc: [`docs/arc.md`](../docs/arc.md)
|
|
38
62
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## ToolProvider
|
|
66
|
+
|
|
67
|
+
The contract for tool execution. All agent modes use this interface.
|
|
68
|
+
|
|
69
|
+
```typescript
|
|
70
|
+
interface ToolProvider {
|
|
71
|
+
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
72
|
+
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
73
|
+
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
74
|
+
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
75
|
+
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
76
|
+
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
77
|
+
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
78
|
+
webSearch?(query: string): Promise<ToolResult>;
|
|
79
|
+
capabilities(): ToolProviderCapabilities;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
interface ToolResult {
|
|
83
|
+
success: boolean;
|
|
84
|
+
output: string;
|
|
85
|
+
error?: string;
|
|
86
|
+
}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Built-in implementations:
|
|
90
|
+
|
|
91
|
+
| Provider | Description |
|
|
92
|
+
|----------|-------------|
|
|
93
|
+
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
94
|
+
| `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
|
|
95
|
+
| `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
|
|
96
|
+
|
|
97
|
+
## SandboxProvider
|
|
98
|
+
|
|
99
|
+
Higher-level sandbox operations beyond basic tool calls:
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
interface SandboxProvider {
|
|
103
|
+
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
104
|
+
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
105
|
+
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Used by `SkillManager` for executing skill scripts in isolated VMs.
|
|
110
|
+
|
|
111
|
+
## Connecting to a Sandbox
|
|
112
|
+
|
|
113
|
+
```typescript
|
|
42
114
|
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
43
|
-
import {
|
|
115
|
+
import { E2BToolProvider } from './src/providers/e2b-tool-provider';
|
|
44
116
|
|
|
45
117
|
// Connect to sandbox service
|
|
46
118
|
const executor = new ControlPlaneE2BExecutor({
|
|
@@ -50,155 +122,174 @@ const executor = new ControlPlaneE2BExecutor({
|
|
|
50
122
|
});
|
|
51
123
|
await executor.initialize(); // creates a Firecracker VM
|
|
52
124
|
|
|
53
|
-
|
|
54
|
-
const agent = createAgent({
|
|
55
|
-
toolProvider: new E2BToolProvider(executor),
|
|
56
|
-
loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
|
|
57
|
-
});
|
|
125
|
+
const toolProvider = new E2BToolProvider(executor);
|
|
58
126
|
|
|
59
|
-
|
|
60
|
-
console.log(result.output); // LLM's final response
|
|
61
|
-
console.log(result.steps); // number of tool steps
|
|
127
|
+
// ... use with createAgent or ArcLoop
|
|
62
128
|
|
|
63
|
-
await executor.destroy();
|
|
129
|
+
await executor.destroy(); // tears down the VM
|
|
64
130
|
```
|
|
65
131
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
### From environment variables
|
|
132
|
+
From environment variables: `ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY`.
|
|
69
133
|
|
|
70
|
-
|
|
134
|
+
---
|
|
71
135
|
|
|
72
|
-
|
|
73
|
-
const executor = ControlPlaneE2BExecutor.fromEnv();
|
|
74
|
-
```
|
|
136
|
+
## Single-Agent Mode (`createAgent`)
|
|
75
137
|
|
|
76
|
-
|
|
138
|
+
For simple tasks that don't need orchestration:
|
|
77
139
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
```ts
|
|
140
|
+
```typescript
|
|
81
141
|
import { createAgent } from './src/agent/create-agent';
|
|
82
142
|
import { LocalToolProvider } from './src/providers/local-tool-provider';
|
|
83
143
|
|
|
84
144
|
const agent = createAgent({
|
|
85
145
|
toolProvider: new LocalToolProvider(process.cwd()),
|
|
86
|
-
loop: new VercelAgentLoop(),
|
|
146
|
+
loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
|
|
87
147
|
});
|
|
88
148
|
|
|
89
149
|
const result = await agent.run('list all TypeScript files');
|
|
150
|
+
console.log(result.output);
|
|
90
151
|
```
|
|
91
152
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
### Agent creation (`src/agent/create-agent.ts`)
|
|
153
|
+
### Configuration
|
|
95
154
|
|
|
96
|
-
|
|
155
|
+
| Option | Type | Default | Description |
|
|
156
|
+
|--------|------|---------|-------------|
|
|
157
|
+
| `toolProvider` | `ToolProvider` | required | Executes tool calls |
|
|
158
|
+
| `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
|
|
159
|
+
| `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
|
|
160
|
+
| `maxSteps` | `number` | 30 | Max tool steps per run |
|
|
161
|
+
| `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
|
|
162
|
+
| `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
|
|
97
163
|
|
|
98
|
-
|
|
99
|
-
|--------|------|-------------|
|
|
100
|
-
| `toolProvider` | `ToolProvider` | Required. Executes tool calls |
|
|
101
|
-
| `loop` | `AgentLoop` | LLM decision loop (default: `VercelAgentLoop`) |
|
|
102
|
-
| `sandboxProvider` | `SandboxProvider` | Optional. Higher-level sandbox ops (file download, exec with env) |
|
|
103
|
-
| `maxSteps` | `number` | Max tool steps per run (default: 30) |
|
|
104
|
-
| `telemetry` | `HarnessTelemetry` | Optional. OpenTelemetry-style tracing |
|
|
105
|
-
| `skillIndexPath` | `string` | Optional. Path to skill index JSON |
|
|
164
|
+
### VercelAgentLoop
|
|
106
165
|
|
|
107
|
-
|
|
166
|
+
Calls Claude via the Vercel AI SDK. Supports parallel tool calls and configurable system prompt.
|
|
108
167
|
|
|
109
|
-
|
|
110
|
-
- Parallel tool calls (returns `ToolBatchAction` when the LLM requests multiple tools at once)
|
|
111
|
-
- Configurable system prompt
|
|
112
|
-
- Model selection via `HARNESS_MODEL` env var (default: `claude-sonnet-4-5`)
|
|
113
|
-
|
|
114
|
-
```ts
|
|
168
|
+
```typescript
|
|
115
169
|
const loop = new VercelAgentLoop({
|
|
116
170
|
systemPrompt: 'You are a helpful coding assistant.',
|
|
171
|
+
model: 'claude-sonnet-4-5', // or HARNESS_MODEL env var
|
|
117
172
|
});
|
|
118
173
|
```
|
|
119
174
|
|
|
120
|
-
###
|
|
175
|
+
### LCMToolLoop
|
|
121
176
|
|
|
122
|
-
|
|
177
|
+
Wraps another loop to add Lossless Context Management and optional REPL orchestration:
|
|
123
178
|
|
|
124
|
-
```
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
128
|
-
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
129
|
-
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
130
|
-
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
131
|
-
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
132
|
-
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
133
|
-
webSearch?(query: string): Promise<ToolResult>;
|
|
134
|
-
capabilities(): ToolProviderCapabilities;
|
|
135
|
-
}
|
|
179
|
+
```typescript
|
|
180
|
+
import { LCMToolLoop } from './src/loop/lcm-tool-loop';
|
|
181
|
+
import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
|
|
136
182
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
183
|
+
const loop = new LCMToolLoop({
|
|
184
|
+
innerLoop: new VercelAgentLoop(),
|
|
185
|
+
toolProvider: mySandboxProvider,
|
|
186
|
+
enableRepl: true, // default: true
|
|
187
|
+
bridgeDir: '/var/run/bridge',
|
|
188
|
+
onActivity: (entry) => console.log(entry),
|
|
189
|
+
onLlmRequest: async (prompt) => callLLM(prompt),
|
|
190
|
+
onWebFetchRequest: async (url) => fetch(url),
|
|
191
|
+
});
|
|
142
192
|
```
|
|
143
193
|
|
|
144
|
-
|
|
194
|
+
**Standard mode**: Lossless context trimming — the LLM always sees a coherent, budget-fitting view of the full conversation.
|
|
145
195
|
|
|
146
|
-
|
|
147
|
-
|----------|-------------|
|
|
148
|
-
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
149
|
-
| `E2BToolProvider` | Routes tools to an E2B-compatible executor over HTTP |
|
|
150
|
-
| `CompositeToolProvider` | Combines multiple providers (e.g. sandbox + web) |
|
|
196
|
+
**REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
|
|
151
197
|
|
|
152
|
-
|
|
198
|
+
---
|
|
153
199
|
|
|
154
|
-
|
|
200
|
+
## ArcLoop (Orchestrator Mode)
|
|
155
201
|
|
|
156
|
-
|
|
157
|
-
// Single tool call
|
|
158
|
-
interface ToolCallAction {
|
|
159
|
-
type: 'tool';
|
|
160
|
-
name: 'Bash' | 'Read' | 'Write' | 'Edit' | 'Glob' | 'Grep' | ...;
|
|
161
|
-
args: Record<string, unknown>;
|
|
162
|
-
}
|
|
202
|
+
For complex tasks that benefit from parallel processes, context management, and memory:
|
|
163
203
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
type: 'tool_batch';
|
|
167
|
-
calls: ToolCallAction[];
|
|
168
|
-
}
|
|
204
|
+
```typescript
|
|
205
|
+
import { createArcAgent } from './src/arc/create-arc-agent';
|
|
169
206
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
207
|
+
const agent = await createArcAgent({
|
|
208
|
+
toolProvider: myToolProvider,
|
|
209
|
+
episodeStore: myEpisodeStore, // required
|
|
210
|
+
sessionMemoStore: mySessionMemoStore, // required
|
|
211
|
+
longTermStore: myLongTermStore, // required
|
|
212
|
+
taskId: 'task-1',
|
|
213
|
+
sessionId: 'session-1',
|
|
214
|
+
});
|
|
176
215
|
|
|
177
|
-
|
|
216
|
+
// Streaming
|
|
217
|
+
for await (const event of agent.stream(messages, signal)) {
|
|
218
|
+
if (event.type === 'text_delta') process.stdout.write(event.text);
|
|
219
|
+
if (event.type === 'process_dispatched') console.log(` → ${event.action}`);
|
|
220
|
+
if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
|
|
221
|
+
}
|
|
178
222
|
|
|
179
|
-
|
|
223
|
+
// Non-streaming
|
|
224
|
+
const result = await agent.run(messages, signal);
|
|
225
|
+
```
|
|
180
226
|
|
|
181
|
-
###
|
|
227
|
+
### ArcLoopConfig
|
|
228
|
+
|
|
229
|
+
| Option | Type | Default | Description |
|
|
230
|
+
|--------|------|---------|-------------|
|
|
231
|
+
| `model` | `string` | `'claude-opus-4-6'` | Orchestrator model (ID or tier name) |
|
|
232
|
+
| `modelMap` | `Record<ModelTier, string>` | haiku/sonnet/opus | Maps fast/medium/strong to model IDs |
|
|
233
|
+
| `apiKey` | `string` | — | Anthropic API key |
|
|
234
|
+
| `systemPrompt` | `string` | built-in | Custom orchestrator system prompt |
|
|
235
|
+
| `maxTurns` | `number` | 30 | Max orchestrator turns |
|
|
236
|
+
| `processTimeout` | `number` | 120_000 | Per-process timeout (ms) |
|
|
237
|
+
| `processMaxSteps` | `number` | 20 | Per-process max tool steps |
|
|
238
|
+
| `contextWindowSize` | `number` | 200_000 | Context window in tokens |
|
|
239
|
+
| `outputReserve` | `number` | 20_000 | Tokens reserved for output |
|
|
240
|
+
| `autoMemory` | `boolean` | true | Auto-detect patterns from episodes |
|
|
241
|
+
| `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
|
|
242
|
+
| `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
|
|
243
|
+
| `longTermStore` | `LongTermStore` | required | Stores long-term memories |
|
|
244
|
+
| `taskId` | `string` | required | Task identifier |
|
|
245
|
+
| `sessionId` | `string` | required | Session identifier |
|
|
246
|
+
| `toolProvider` | `ToolProvider` | required | Tool execution |
|
|
247
|
+
| `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
|
|
248
|
+
| `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
|
|
249
|
+
| `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
|
|
250
|
+
| `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
|
|
251
|
+
| `traceWriter` | `function` | — | Callback for trace event emission |
|
|
252
|
+
|
|
253
|
+
### Resilience
|
|
254
|
+
|
|
255
|
+
```typescript
|
|
256
|
+
import { resilience } from './src/arc/resilience';
|
|
257
|
+
|
|
258
|
+
const pipeline = resilience()
|
|
259
|
+
.retry({ maxRetries: 2, baseDelay: 1000 })
|
|
260
|
+
.timeout({ durationMs: 30_000 })
|
|
261
|
+
.circuitBreaker({ failureThreshold: 5 })
|
|
262
|
+
.build();
|
|
263
|
+
|
|
264
|
+
const agent = await createArcAgent({
|
|
265
|
+
// ...config
|
|
266
|
+
resilience: pipeline,
|
|
267
|
+
});
|
|
268
|
+
```
|
|
182
269
|
|
|
183
|
-
|
|
270
|
+
### Trace Emission
|
|
184
271
|
|
|
185
|
-
```
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
}
|
|
272
|
+
```typescript
|
|
273
|
+
const traces: TraceEvent[] = [];
|
|
274
|
+
const agent = await createArcAgent({
|
|
275
|
+
// ...config
|
|
276
|
+
traceWriter: (event) => traces.push(event),
|
|
277
|
+
});
|
|
191
278
|
```
|
|
192
279
|
|
|
193
|
-
|
|
280
|
+
Traces can be validated against the formal model: `cd verify && cargo run -- trace file.ndjson`
|
|
194
281
|
|
|
195
|
-
|
|
282
|
+
---
|
|
196
283
|
|
|
197
|
-
## Package
|
|
284
|
+
## Package Layout
|
|
198
285
|
|
|
199
286
|
```
|
|
200
287
|
src/
|
|
201
288
|
├── agent/ # createAgent, step executor, types
|
|
289
|
+
├── arc/ # ArcLoop orchestrator, processes, memory, resilience
|
|
290
|
+
│ ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
|
|
291
|
+
│ ├── stores/ # RxDB + in-memory store implementations
|
|
292
|
+
│ └── object-store/ # Pluggable cloud sync (fs, memory)
|
|
202
293
|
├── interfaces/ # ToolProvider, SandboxProvider, AgentLoop contracts
|
|
203
294
|
├── loop/ # VercelAgentLoop, LCMToolLoop
|
|
204
295
|
├── providers/ # LocalToolProvider, E2BToolProvider, ControlPlaneE2BExecutor
|
|
@@ -206,16 +297,20 @@ src/
|
|
|
206
297
|
├── hooks/ # Pre/post tool call hooks
|
|
207
298
|
├── permissions/ # Tool permission checks
|
|
208
299
|
├── sessions/ # Session persistence
|
|
209
|
-
├── subagents/ # Subagent spawning
|
|
300
|
+
├── subagents/ # Subagent spawning
|
|
210
301
|
├── skills/ # Skill index, routing, and management
|
|
211
302
|
├── optimization/ # Benchmark runner
|
|
212
303
|
└── observability/ # OpenTelemetry integration
|
|
304
|
+
|
|
305
|
+
verify/ # Rust formal verification (Stateright model checker)
|
|
306
|
+
testing/ # Adversarial scenario replay harness
|
|
307
|
+
tests/ # Vitest test suite
|
|
213
308
|
```
|
|
214
309
|
|
|
215
310
|
## Documentation
|
|
216
311
|
|
|
217
|
-
-
|
|
218
|
-
-
|
|
219
|
-
-
|
|
220
|
-
- Release process
|
|
221
|
-
-
|
|
312
|
+
- [Arc architecture](../docs/arc.md) — process model, context window, memory, resilience, verification
|
|
313
|
+
- [Testing](../docs/testing.md) — test layers, running tests, writing new tests
|
|
314
|
+
- [Sandbox setup](../docs/PUBLIC_SANDBOX.md) — deploying the sandbox service
|
|
315
|
+
- [Release process](../docs/RELEASE.md) — versioning and publishing
|
|
316
|
+
- [Example](../examples/chat-assistant/src/chat.ts) — complete working chat assistant
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bluecopa/harness",
|
|
3
|
-
"version": "0.1.0-snapshot.
|
|
3
|
+
"version": "0.1.0-snapshot.50",
|
|
4
4
|
"description": "Provider-agnostic TypeScript agent framework",
|
|
5
5
|
"license": "UNLICENSED",
|
|
6
6
|
"scripts": {
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
"dependencies": {
|
|
11
11
|
"@ai-sdk/anthropic": "^3.0.48",
|
|
12
12
|
"ai": "^6.0.101",
|
|
13
|
+
"rxdb": "^15.39.0",
|
|
13
14
|
"zod": "^4.1.11"
|
|
14
15
|
},
|
|
15
16
|
"devDependencies": {
|
|
@@ -7,14 +7,12 @@ import type { HarnessTelemetry } from '../observability/otel';
|
|
|
7
7
|
import { HookRunner } from '../hooks/hook-runner';
|
|
8
8
|
import { PermissionManager } from '../permissions/permission-manager';
|
|
9
9
|
import { VercelAgentLoop } from '../loop/vercel-agent-loop';
|
|
10
|
-
export type { SystemPromptBlock, VercelAgentLoopConfig } from '../loop/vercel-agent-loop';
|
|
11
|
-
export type { PrepareStepContext, PrepareStepResult } from './types';
|
|
12
10
|
import { SkillManager } from '../skills/skill-manager';
|
|
13
11
|
import { SkillRouter } from '../skills/skill-router';
|
|
14
12
|
import type { SkillSummary } from '../skills/skill-types';
|
|
15
13
|
import { SingleFlightStepExecutor } from './step-executor';
|
|
16
|
-
import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent,
|
|
17
|
-
export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent,
|
|
14
|
+
import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
|
|
15
|
+
export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
|
|
18
16
|
export { HookRunner } from '../hooks/hook-runner';
|
|
19
17
|
export { PermissionManager } from '../permissions/permission-manager';
|
|
20
18
|
export type { PermissionMode, PermissionResolver, PermissionRequest } from '../permissions/permission-manager';
|
|
@@ -39,6 +37,8 @@ export interface AgentRuntime {
|
|
|
39
37
|
/** Custom tool executor. Called for every tool action. Return null to fall through to built-in dispatch.
|
|
40
38
|
* When hookRunner/permissionManager are provided on the runtime, they are automatically applied before/after this callback — no manual wiring needed. */
|
|
41
39
|
executeToolAction?: (action: ToolCallAction) => Promise<ToolResult | null>;
|
|
40
|
+
/** Progress callback fired before/after each tool call during run(). */
|
|
41
|
+
onToolProgress?: (event: { type: 'tool_start'; name: string; args: Record<string, unknown> } | { type: 'tool_end'; name: string; success: boolean; durationMs: number }) => void;
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
/**
|
|
@@ -220,12 +220,9 @@ function toStreamResult(r: ToolResult): { success: boolean; output: string; erro
|
|
|
220
220
|
return base;
|
|
221
221
|
}
|
|
222
222
|
|
|
223
|
-
/** Format content string for
|
|
223
|
+
/** Format a display-friendly content string for tool results (used in content field). */
|
|
224
224
|
function formatToolResultContent(call: ToolCallAction, result: ToolResult): string {
|
|
225
|
-
|
|
226
|
-
const content = result.success
|
|
227
|
-
? (result.modelOutput ?? result.output)
|
|
228
|
-
: `ERROR: ${result.error ?? 'unknown failure'}`;
|
|
225
|
+
const content = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
|
|
229
226
|
switch (call.name) {
|
|
230
227
|
case 'Write':
|
|
231
228
|
return `Write(${call.args.path}): ${result.success ? 'ok' : content}`;
|
|
@@ -520,11 +517,6 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
520
517
|
? { nextAction: runtime.nextAction }
|
|
521
518
|
: new VercelAgentLoop());
|
|
522
519
|
|
|
523
|
-
/** Read lastUsage from the loop if it's a VercelAgentLoop. */
|
|
524
|
-
function getLoopUsage(): StepUsage | undefined {
|
|
525
|
-
return loop instanceof VercelAgentLoop ? loop.lastUsage : undefined;
|
|
526
|
-
}
|
|
527
|
-
|
|
528
520
|
async function resolveSkillContext(prompt: string): Promise<string> {
|
|
529
521
|
if (!skillManager || !skillIndexPath) return '';
|
|
530
522
|
|
|
@@ -606,14 +598,18 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
606
598
|
|
|
607
599
|
// Execute valid calls via batch (sequential sandbox ops) or parallel fallback
|
|
608
600
|
if (validCalls.length > 0) {
|
|
601
|
+
for (const c of validCalls) runtime.onToolProgress?.({ type: 'tool_start', name: c.name, args: c.args });
|
|
602
|
+
const batchStart = Date.now();
|
|
609
603
|
const results = await executeBatch(validCalls, runtime.toolProvider, runtime);
|
|
604
|
+
const batchMs = Date.now() - batchStart;
|
|
610
605
|
for (let i = 0; i < validCalls.length; i++) {
|
|
611
606
|
const call = validCalls[i]!;
|
|
612
607
|
const r = results[i]!;
|
|
608
|
+
runtime.onToolProgress?.({ type: 'tool_end', name: call.name, success: r.success, durationMs: batchMs });
|
|
613
609
|
if (!r.success) {
|
|
614
610
|
recordAgentError(runtime.telemetry);
|
|
615
611
|
}
|
|
616
|
-
const resultText = r.success ?
|
|
612
|
+
const resultText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
617
613
|
messages.push({
|
|
618
614
|
role: 'tool',
|
|
619
615
|
content: formatToolResultContent(call, r),
|
|
@@ -669,6 +665,8 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
669
665
|
} else {
|
|
670
666
|
consecutiveInvalid = 0;
|
|
671
667
|
}
|
|
668
|
+
runtime.onToolProgress?.({ type: 'tool_start', name: action.name, args: action.args });
|
|
669
|
+
const singleStart = Date.now();
|
|
672
670
|
const result = validationError
|
|
673
671
|
? ({ success: false, output: '', error: validationError } as ToolResult)
|
|
674
672
|
: await executor.run(async () => {
|
|
@@ -682,10 +680,11 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
682
680
|
};
|
|
683
681
|
}
|
|
684
682
|
});
|
|
683
|
+
runtime.onToolProgress?.({ type: 'tool_end', name: action.name, success: result.success, durationMs: Date.now() - singleStart });
|
|
685
684
|
if (!result.success) {
|
|
686
685
|
recordAgentError(runtime.telemetry);
|
|
687
686
|
}
|
|
688
|
-
const singleResultText = result.success ?
|
|
687
|
+
const singleResultText = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
|
|
689
688
|
messages.push({
|
|
690
689
|
role: 'tool',
|
|
691
690
|
content: formatToolResultContent(action, result),
|
|
@@ -728,8 +727,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
728
727
|
if (event.type === 'text_delta') {
|
|
729
728
|
finalText += event.text;
|
|
730
729
|
yield event;
|
|
731
|
-
}
|
|
732
|
-
if (event.type === 'tool_start') {
|
|
730
|
+
} else if (event.type === 'tool_start') {
|
|
733
731
|
pendingTools.push({
|
|
734
732
|
type: 'tool',
|
|
735
733
|
name: event.name,
|
|
@@ -737,13 +735,18 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
737
735
|
...(event.toolCallId != null ? { toolCallId: event.toolCallId } : {}),
|
|
738
736
|
});
|
|
739
737
|
yield event;
|
|
738
|
+
} else {
|
|
739
|
+
// Forward all other events (tool_end, step_start, step_end, done)
|
|
740
|
+
// from self-managing loops like ArcLoop
|
|
741
|
+
yield event;
|
|
742
|
+
if (event.type === 'done') return;
|
|
740
743
|
}
|
|
741
744
|
}
|
|
742
745
|
|
|
743
746
|
// If no tools → final response
|
|
744
747
|
if (pendingTools.length === 0) {
|
|
745
748
|
messages.push({ role: 'assistant', content: finalText });
|
|
746
|
-
|
|
749
|
+
yield { type: 'step_end', step };
|
|
747
750
|
yield { type: 'done', output: finalText, steps: step };
|
|
748
751
|
return;
|
|
749
752
|
}
|
|
@@ -769,7 +772,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
769
772
|
if (action.type === 'final') {
|
|
770
773
|
yield { type: 'text_delta', text: action.content };
|
|
771
774
|
messages.push({ role: 'assistant', content: action.content });
|
|
772
|
-
|
|
775
|
+
yield { type: 'step_end', step };
|
|
773
776
|
yield { type: 'done', output: action.content, steps: step };
|
|
774
777
|
return;
|
|
775
778
|
}
|
|
@@ -781,7 +784,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
781
784
|
try {
|
|
782
785
|
const r = await executeTool(runtime.toolProvider, call, runtime);
|
|
783
786
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
784
|
-
const rText = r.success ?
|
|
787
|
+
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
785
788
|
messages.push({
|
|
786
789
|
role: 'tool',
|
|
787
790
|
content: formatToolResultContent(call, r),
|
|
@@ -803,7 +806,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
803
806
|
try {
|
|
804
807
|
const r = await executeTool(runtime.toolProvider, action, runtime);
|
|
805
808
|
yield { type: 'tool_end', name: action.name, result: toStreamResult(r) };
|
|
806
|
-
const rText = r.success ?
|
|
809
|
+
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
807
810
|
messages.push({
|
|
808
811
|
role: 'tool',
|
|
809
812
|
content: formatToolResultContent(action, r),
|
|
@@ -819,7 +822,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
819
822
|
});
|
|
820
823
|
}
|
|
821
824
|
}
|
|
822
|
-
|
|
825
|
+
yield { type: 'step_end', step };
|
|
823
826
|
continue;
|
|
824
827
|
}
|
|
825
828
|
|
|
@@ -829,7 +832,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
829
832
|
const call = pendingTools[i]!;
|
|
830
833
|
const r = results[i]!;
|
|
831
834
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
832
|
-
const rText = r.success ?
|
|
835
|
+
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
833
836
|
messages.push({
|
|
834
837
|
role: 'tool',
|
|
835
838
|
content: formatToolResultContent(call, r),
|
|
@@ -847,7 +850,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
847
850
|
|
|
848
851
|
if (action.type === 'final') {
|
|
849
852
|
messages.push({ role: 'assistant', content: action.content });
|
|
850
|
-
|
|
853
|
+
yield { type: 'step_end', step };
|
|
851
854
|
yield { type: 'done', output: action.content, steps: step };
|
|
852
855
|
return;
|
|
853
856
|
}
|
|
@@ -871,7 +874,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
871
874
|
const call = calls[i]!;
|
|
872
875
|
const r = results[i]!;
|
|
873
876
|
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
874
|
-
const rText = r.success ?
|
|
877
|
+
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
875
878
|
messages.push({
|
|
876
879
|
role: 'tool',
|
|
877
880
|
content: formatToolResultContent(call, r),
|
|
@@ -885,7 +888,7 @@ export function createAgent(runtime: AgentRuntime) {
|
|
|
885
888
|
}
|
|
886
889
|
}
|
|
887
890
|
|
|
888
|
-
|
|
891
|
+
yield { type: 'step_end', step };
|
|
889
892
|
}
|
|
890
893
|
|
|
891
894
|
yield { type: 'done', output: 'ERROR: max steps exceeded', steps: maxSteps };
|