@bluecopa/harness 0.0.0-snapshot.137
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +316 -0
- package/dist/arc/index.d.ts +796 -0
- package/dist/arc/index.js +2863 -0
- package/dist/arc/index.js.map +1 -0
- package/dist/observability/otel.d.ts +36 -0
- package/dist/observability/otel.js +73 -0
- package/dist/observability/otel.js.map +1 -0
- package/dist/shared-types-DRxnerLT.d.ts +138 -0
- package/dist/skills/index.d.ts +67 -0
- package/dist/skills/index.js +282 -0
- package/dist/skills/index.js.map +1 -0
- package/package.json +48 -0
package/README.md
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
# harness
|
|
2
|
+
|
|
3
|
+
Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
|
|
4
|
+
|
|
5
|
+
Published on npm as **`@bluecopa/harness`**.
|
|
6
|
+
|
|
7
|
+
Two execution modes: a simple single-agent loop (`createAgent` + `VercelAgentLoop`) and a process-based orchestrator (`ArcLoop`) that dispatches parallel processes with context management, memory, and resilience.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pnpm add @bluecopa/harness
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Development
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pnpm install
|
|
19
|
+
pnpm test
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Architecture
|
|
23
|
+
|
|
24
|
+
### Single-Agent Loop
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
|
|
28
|
+
│ createAgent │────►│ AgentLoop │────►│ LLM (Claude) │
|
|
29
|
+
│ (turn loop) │ │ (nextAction)│ │ │
|
|
30
|
+
└──────┬───────┘ └──────────────┘ └──────────────────┘
|
|
31
|
+
│ │
|
|
32
|
+
│ tool call returns ToolCallAction
|
|
33
|
+
▼ or FinalAction
|
|
34
|
+
┌──────────────┐
|
|
35
|
+
│ ToolProvider │──── bash, read, write, edit, glob, grep
|
|
36
|
+
│ (executor) │ runs in sandbox or local machine
|
|
37
|
+
└──────────────┘
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### ArcLoop Orchestrator
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
Orchestrator (ArcLoop — Opus 4.6 by default)
|
|
44
|
+
│ tools: Thread, Check, Cancel, Remember, ReadEpisode
|
|
45
|
+
│
|
|
46
|
+
│ Turn 1 (parallel):
|
|
47
|
+
├──► Process 0 ("read auth", model=fast) ─┐
|
|
48
|
+
├──► Process 1 ("read routes", model=fast) ─┼──► Episodes
|
|
49
|
+
├──► Process 2 ("read tests", model=fast) ─┘
|
|
50
|
+
│
|
|
51
|
+
│ Turn 2 (dispatch dependent work):
|
|
52
|
+
├──► Thread("fix bug", context=[ep0,ep1,ep2]) ──► Episode
|
|
53
|
+
│
|
|
54
|
+
│ Turn 3 (parallel):
|
|
55
|
+
├──► Thread("run tests", context=[ep3]) ─┐
|
|
56
|
+
├──► Thread("update docs", context=[ep3]) ─┘
|
|
57
|
+
│
|
|
58
|
+
└──► Final text response
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Full architecture doc: [`docs/arc.md`](../docs/arc.md)
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## ToolProvider
|
|
66
|
+
|
|
67
|
+
The contract for tool execution. All agent modes use this interface.
|
|
68
|
+
|
|
69
|
+
```typescript
|
|
70
|
+
interface ToolProvider {
|
|
71
|
+
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
72
|
+
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
73
|
+
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
74
|
+
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
75
|
+
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
76
|
+
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
77
|
+
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
78
|
+
webSearch?(query: string): Promise<ToolResult>;
|
|
79
|
+
capabilities(): ToolProviderCapabilities;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
interface ToolResult {
|
|
83
|
+
success: boolean;
|
|
84
|
+
output: string;
|
|
85
|
+
error?: string;
|
|
86
|
+
}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Built-in implementations:
|
|
90
|
+
|
|
91
|
+
| Provider | Description |
|
|
92
|
+
|----------|-------------|
|
|
93
|
+
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
94
|
+
| `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
|
|
95
|
+
| `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
|
|
96
|
+
|
|
97
|
+
## SandboxProvider
|
|
98
|
+
|
|
99
|
+
Higher-level sandbox operations beyond basic tool calls:
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
interface SandboxProvider {
|
|
103
|
+
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
104
|
+
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
105
|
+
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Used by `SkillManager` for executing skill scripts in isolated VMs.
|
|
110
|
+
|
|
111
|
+
## Connecting to a Sandbox
|
|
112
|
+
|
|
113
|
+
```typescript
|
|
114
|
+
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
115
|
+
import { E2BToolProvider } from './src/providers/e2b-tool-provider';
|
|
116
|
+
|
|
117
|
+
// Connect to sandbox service
|
|
118
|
+
const executor = new ControlPlaneE2BExecutor({
|
|
119
|
+
baseUrl: process.env.SAMYX_BASE_URL!,
|
|
120
|
+
apiKey: process.env.SAMYX_API_KEY!,
|
|
121
|
+
templateId: 'polyglot-v1',
|
|
122
|
+
});
|
|
123
|
+
await executor.initialize(); // creates a Firecracker VM
|
|
124
|
+
|
|
125
|
+
const toolProvider = new E2BToolProvider(executor);
|
|
126
|
+
|
|
127
|
+
// ... use with createAgent or ArcLoop
|
|
128
|
+
|
|
129
|
+
await executor.destroy(); // tears down the VM
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
From environment variables: `ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY`.
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Single-Agent Mode (`createAgent`)
|
|
137
|
+
|
|
138
|
+
For simple tasks that don't need orchestration:
|
|
139
|
+
|
|
140
|
+
```typescript
|
|
141
|
+
import { createAgent } from './src/agent/create-agent';
|
|
142
|
+
import { LocalToolProvider } from './src/providers/local-tool-provider';
|
|
143
|
+
|
|
144
|
+
const agent = createAgent({
|
|
145
|
+
toolProvider: new LocalToolProvider(process.cwd()),
|
|
146
|
+
loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
const result = await agent.run('list all TypeScript files');
|
|
150
|
+
console.log(result.output);
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Configuration
|
|
154
|
+
|
|
155
|
+
| Option | Type | Default | Description |
|
|
156
|
+
|--------|------|---------|-------------|
|
|
157
|
+
| `toolProvider` | `ToolProvider` | required | Executes tool calls |
|
|
158
|
+
| `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
|
|
159
|
+
| `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
|
|
160
|
+
| `maxSteps` | `number` | 30 | Max tool steps per run |
|
|
161
|
+
| `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
|
|
162
|
+
| `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
|
|
163
|
+
|
|
164
|
+
### VercelAgentLoop
|
|
165
|
+
|
|
166
|
+
Calls Claude via the Vercel AI SDK. Supports parallel tool calls and configurable system prompt.
|
|
167
|
+
|
|
168
|
+
```typescript
|
|
169
|
+
const loop = new VercelAgentLoop({
|
|
170
|
+
systemPrompt: 'You are a helpful coding assistant.',
|
|
171
|
+
model: 'claude-sonnet-4-5', // or HARNESS_MODEL env var
|
|
172
|
+
});
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### LCMToolLoop
|
|
176
|
+
|
|
177
|
+
Wraps another loop to add Lossless Context Management and optional REPL orchestration:
|
|
178
|
+
|
|
179
|
+
```typescript
|
|
180
|
+
import { LCMToolLoop } from './src/loop/lcm-tool-loop';
|
|
181
|
+
import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
|
|
182
|
+
|
|
183
|
+
const loop = new LCMToolLoop({
|
|
184
|
+
innerLoop: new VercelAgentLoop(),
|
|
185
|
+
toolProvider: mySandboxProvider,
|
|
186
|
+
enableRepl: true, // default: true
|
|
187
|
+
bridgeDir: '/var/run/bridge',
|
|
188
|
+
onActivity: (entry) => console.log(entry),
|
|
189
|
+
onLlmRequest: async (prompt) => callLLM(prompt),
|
|
190
|
+
onWebFetchRequest: async (url) => fetch(url),
|
|
191
|
+
});
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
**Standard mode**: Lossless context trimming — the LLM always sees a coherent, budget-fitting view of the full conversation.
|
|
195
|
+
|
|
196
|
+
**REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## ArcLoop (Orchestrator Mode)
|
|
201
|
+
|
|
202
|
+
For complex tasks that benefit from parallel processes, context management, and memory:
|
|
203
|
+
|
|
204
|
+
```typescript
|
|
205
|
+
import { createArcAgent } from './src/arc/create-arc-agent';
|
|
206
|
+
|
|
207
|
+
const agent = await createArcAgent({
|
|
208
|
+
toolProvider: myToolProvider,
|
|
209
|
+
episodeStore: myEpisodeStore, // required
|
|
210
|
+
sessionMemoStore: mySessionMemoStore, // required
|
|
211
|
+
longTermStore: myLongTermStore, // required
|
|
212
|
+
taskId: 'task-1',
|
|
213
|
+
sessionId: 'session-1',
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
// Streaming
|
|
217
|
+
for await (const event of agent.stream(messages, signal)) {
|
|
218
|
+
if (event.type === 'text_delta') process.stdout.write(event.text);
|
|
219
|
+
if (event.type === 'process_dispatched') console.log(` → ${event.action}`);
|
|
220
|
+
if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Non-streaming
|
|
224
|
+
const result = await agent.run(messages, signal);
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
### ArcLoopConfig
|
|
228
|
+
|
|
229
|
+
| Option | Type | Default | Description |
|
|
230
|
+
|--------|------|---------|-------------|
|
|
231
|
+
| `model` | `string` | `'claude-opus-4-6'` | Orchestrator model (ID or tier name) |
|
|
232
|
+
| `modelMap` | `Record<ModelTier, string>` | haiku/sonnet/opus | Maps fast/medium/strong to model IDs |
|
|
233
|
+
| `apiKey` | `string` | — | Anthropic API key |
|
|
234
|
+
| `systemPrompt` | `string` | built-in | Custom orchestrator system prompt |
|
|
235
|
+
| `maxTurns` | `number` | 30 | Max orchestrator turns |
|
|
236
|
+
| `processTimeout` | `number` | 120_000 | Per-process timeout (ms) |
|
|
237
|
+
| `processMaxSteps` | `number` | 20 | Per-process max tool steps |
|
|
238
|
+
| `contextWindowSize` | `number` | 200_000 | Context window in tokens |
|
|
239
|
+
| `outputReserve` | `number` | 20_000 | Tokens reserved for output |
|
|
240
|
+
| `autoMemory` | `boolean` | true | Auto-detect patterns from episodes |
|
|
241
|
+
| `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
|
|
242
|
+
| `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
|
|
243
|
+
| `longTermStore` | `LongTermStore` | required | Stores long-term memories |
|
|
244
|
+
| `taskId` | `string` | required | Task identifier |
|
|
245
|
+
| `sessionId` | `string` | required | Session identifier |
|
|
246
|
+
| `toolProvider` | `ToolProvider` | required | Tool execution |
|
|
247
|
+
| `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
|
|
248
|
+
| `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
|
|
249
|
+
| `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
|
|
250
|
+
| `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
|
|
251
|
+
| `traceWriter` | `function` | — | Callback for trace event emission |
|
|
252
|
+
|
|
253
|
+
### Resilience
|
|
254
|
+
|
|
255
|
+
```typescript
|
|
256
|
+
import { resilience } from './src/arc/resilience';
|
|
257
|
+
|
|
258
|
+
const pipeline = resilience()
|
|
259
|
+
.retry({ maxRetries: 2, baseDelay: 1000 })
|
|
260
|
+
.timeout({ durationMs: 30_000 })
|
|
261
|
+
.circuitBreaker({ failureThreshold: 5 })
|
|
262
|
+
.build();
|
|
263
|
+
|
|
264
|
+
const agent = await createArcAgent({
|
|
265
|
+
// ...config
|
|
266
|
+
resilience: pipeline,
|
|
267
|
+
});
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### Trace Emission
|
|
271
|
+
|
|
272
|
+
```typescript
|
|
273
|
+
const traces: TraceEvent[] = [];
|
|
274
|
+
const agent = await createArcAgent({
|
|
275
|
+
// ...config
|
|
276
|
+
traceWriter: (event) => traces.push(event),
|
|
277
|
+
});
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
Traces can be validated against the formal model: `cd verify && cargo run -- trace file.ndjson`
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
284
|
+
## Package Layout
|
|
285
|
+
|
|
286
|
+
```
|
|
287
|
+
src/
|
|
288
|
+
├── agent/ # createAgent, step executor, types
|
|
289
|
+
├── arc/ # ArcLoop orchestrator, processes, memory, resilience
|
|
290
|
+
│ ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
|
|
291
|
+
│ ├── stores/ # RxDB + in-memory store implementations
|
|
292
|
+
│ └── object-store/ # Pluggable cloud sync (fs, memory)
|
|
293
|
+
├── interfaces/ # ToolProvider, SandboxProvider, AgentLoop contracts
|
|
294
|
+
├── loop/ # VercelAgentLoop, LCMToolLoop
|
|
295
|
+
├── providers/ # LocalToolProvider, E2BToolProvider, ControlPlaneE2BExecutor
|
|
296
|
+
├── context/ # Token tracking and compaction
|
|
297
|
+
├── hooks/ # Pre/post tool call hooks
|
|
298
|
+
├── permissions/ # Tool permission checks
|
|
299
|
+
├── sessions/ # Session persistence
|
|
300
|
+
├── subagents/ # Subagent spawning
|
|
301
|
+
├── skills/ # Skill index, routing, and management
|
|
302
|
+
├── optimization/ # Benchmark runner
|
|
303
|
+
└── observability/ # OpenTelemetry integration
|
|
304
|
+
|
|
305
|
+
verify/ # Rust formal verification (Stateright model checker)
|
|
306
|
+
testing/ # Adversarial scenario replay harness
|
|
307
|
+
tests/ # Vitest test suite
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
## Documentation
|
|
311
|
+
|
|
312
|
+
- [Arc architecture](../docs/arc.md) — process model, context window, memory, resilience, verification
|
|
313
|
+
- [Testing](../docs/testing.md) — test layers, running tests, writing new tests
|
|
314
|
+
- [Sandbox setup](../docs/PUBLIC_SANDBOX.md) — deploying the sandbox service
|
|
315
|
+
- [Release process](../docs/RELEASE.md) — versioning and publishing
|
|
316
|
+
- [Example](../examples/chat-assistant/src/chat.ts) — complete working chat assistant
|