@bluecopa/harness 0.1.0-snapshot.98 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +18 -0
- package/README.md +117 -212
- package/docs/guides/observability.md +32 -0
- package/docs/guides/providers.md +51 -0
- package/docs/guides/skills.md +25 -0
- package/docs/security/skill-sandbox-threat-model.md +20 -0
- package/package.json +1 -29
- package/src/agent/create-agent.ts +884 -0
- package/src/agent/create-tools.ts +33 -0
- package/src/agent/step-executor.ts +15 -0
- package/src/agent/types.ts +57 -0
- package/src/context/llm-compaction-strategy.ts +37 -0
- package/src/context/prepare-step.ts +65 -0
- package/src/context/token-tracker.ts +26 -0
- package/src/extracted/manifest.json +10 -0
- package/src/extracted/prompts/compaction.md +5 -0
- package/src/extracted/prompts/system.md +5 -0
- package/src/extracted/tools.json +82 -0
- package/src/hooks/hook-runner.ts +22 -0
- package/src/hooks/tool-wrappers.ts +64 -0
- package/src/interfaces/compaction-strategy.ts +18 -0
- package/src/interfaces/hooks.ts +24 -0
- package/src/interfaces/sandbox-provider.ts +29 -0
- package/src/interfaces/session-store.ts +48 -0
- package/src/interfaces/tool-provider.ts +70 -0
- package/src/loop/bridge.ts +363 -0
- package/src/loop/context-store.ts +207 -0
- package/src/loop/lcm-tool-loop.ts +163 -0
- package/src/loop/vercel-agent-loop.ts +279 -0
- package/src/observability/context.ts +17 -0
- package/src/observability/metrics.ts +27 -0
- package/src/observability/otel.ts +105 -0
- package/src/observability/tracing.ts +13 -0
- package/src/optimization/agent-evaluator.ts +40 -0
- package/src/optimization/config-serializer.ts +16 -0
- package/src/optimization/optimization-runner.ts +39 -0
- package/src/optimization/trace-collector.ts +33 -0
- package/src/permissions/permission-manager.ts +34 -0
- package/src/providers/composite-tool-provider.ts +72 -0
- package/src/providers/control-plane-e2b-executor.ts +218 -0
- package/src/providers/e2b-tool-provider.ts +68 -0
- package/src/providers/local-tool-provider.ts +190 -0
- package/src/providers/skill-sandbox-provider.ts +46 -0
- package/src/sessions/file-session-store.ts +61 -0
- package/src/sessions/in-memory-session-store.ts +39 -0
- package/src/sessions/session-manager.ts +44 -0
- package/src/skills/skill-loader.ts +52 -0
- package/src/skills/skill-manager.ts +175 -0
- package/src/skills/skill-router.ts +99 -0
- package/src/skills/skill-types.ts +26 -0
- package/src/subagents/subagent-manager.ts +22 -0
- package/src/subagents/task-tool.ts +13 -0
- package/tests/integration/agent-loop-basic.spec.ts +56 -0
- package/tests/integration/agent-skill-default-from-sandbox.spec.ts +66 -0
- package/tests/integration/concurrency-single-turn.spec.ts +35 -0
- package/tests/integration/otel-metrics-emission.spec.ts +62 -0
- package/tests/integration/otel-trace-propagation.spec.ts +48 -0
- package/tests/integration/parity-benchmark.spec.ts +45 -0
- package/tests/integration/provider-local-smoke.spec.ts +63 -0
- package/tests/integration/session-resume.spec.ts +30 -0
- package/tests/integration/skill-install-rollback.spec.ts +64 -0
- package/tests/integration/skill-sandbox-file-blob.spec.ts +54 -0
- package/tests/integration/skills-progressive-disclosure.spec.ts +61 -0
- package/tests/integration/streaming-compaction-boundary.spec.ts +43 -0
- package/tests/integration/structured-messages-agent.spec.ts +265 -0
- package/tests/integration/subagent-isolation.spec.ts +24 -0
- package/tests/security/skill-sandbox-isolation.spec.ts +51 -0
- package/tests/unit/create-tools-schema-parity.spec.ts +22 -0
- package/tests/unit/extracted-manifest.spec.ts +41 -0
- package/tests/unit/interfaces-contract.spec.ts +101 -0
- package/tests/unit/structured-messages.spec.ts +176 -0
- package/tests/unit/token-tracker.spec.ts +22 -0
- package/tsconfig.json +14 -0
- package/vitest.config.ts +7 -0
- package/dist/arc/app-adapter.d.ts +0 -101
- package/dist/arc/app-adapter.js +0 -312
- package/dist/arc/app-adapter.js.map +0 -1
- package/dist/arc/create-arc-agent.d.ts +0 -50
- package/dist/arc/create-arc-agent.js +0 -2926
- package/dist/arc/create-arc-agent.js.map +0 -1
- package/dist/arc/profile-builder.d.ts +0 -49
- package/dist/arc/profile-builder.js +0 -163
- package/dist/arc/profile-builder.js.map +0 -1
- package/dist/loop/vercel-agent-loop.d.ts +0 -99
- package/dist/loop/vercel-agent-loop.js +0 -308
- package/dist/loop/vercel-agent-loop.js.map +0 -1
- package/dist/types-g-3DvSSE.d.ts +0 -745
package/AGENTS.md
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# AGENTS.md
|
|
2
|
+
|
|
3
|
+
Guidance for agents working in `harness/`.
|
|
4
|
+
Reference: https://agents.md/
|
|
5
|
+
|
|
6
|
+
## Scope
|
|
7
|
+
`harness/` contains the TypeScript agent framework core.
|
|
8
|
+
|
|
9
|
+
## Rules
|
|
10
|
+
- Keep API changes explicit and typed.
|
|
11
|
+
- Maintain deterministic behavior in agent loop, compaction, and tool execution.
|
|
12
|
+
- Preserve compatibility of extracted tool schemas unless intentionally versioned.
|
|
13
|
+
|
|
14
|
+
## Commands
|
|
15
|
+
```bash
|
|
16
|
+
pnpm install
|
|
17
|
+
pnpm test
|
|
18
|
+
```
|
package/README.md
CHANGED
|
@@ -2,17 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
The harness provides the core loop that drives an AI agent: send messages to an LLM, execute the tool calls it returns, feed results back, and repeat until the LLM produces a final text response.
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
## Install
|
|
10
|
-
|
|
11
|
-
```bash
|
|
12
|
-
pnpm add @bluecopa/harness
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
## Development
|
|
7
|
+
## Quickstart
|
|
16
8
|
|
|
17
9
|
```bash
|
|
18
10
|
pnpm install
|
|
@@ -21,11 +13,9 @@ pnpm test
|
|
|
21
13
|
|
|
22
14
|
## Architecture
|
|
23
15
|
|
|
24
|
-
### Single-Agent Loop
|
|
25
|
-
|
|
26
16
|
```
|
|
27
17
|
┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
|
|
28
|
-
│ createAgent
|
|
18
|
+
│ createAgent │────▶│ AgentLoop │────▶│ LLM (Claude) │
|
|
29
19
|
│ (turn loop) │ │ (nextAction)│ │ │
|
|
30
20
|
└──────┬───────┘ └──────────────┘ └──────────────────┘
|
|
31
21
|
│ │
|
|
@@ -37,82 +27,20 @@ pnpm test
|
|
|
37
27
|
└──────────────┘
|
|
38
28
|
```
|
|
39
29
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
│ tools: Thread, Check, Cancel, Remember, ReadEpisode
|
|
45
|
-
│
|
|
46
|
-
│ Turn 1 (parallel):
|
|
47
|
-
├──► Process 0 ("read auth", model=fast) ─┐
|
|
48
|
-
├──► Process 1 ("read routes", model=fast) ─┼──► Episodes
|
|
49
|
-
├──► Process 2 ("read tests", model=fast) ─┘
|
|
50
|
-
│
|
|
51
|
-
│ Turn 2 (dispatch dependent work):
|
|
52
|
-
├──► Thread("fix bug", context=[ep0,ep1,ep2]) ──► Episode
|
|
53
|
-
│
|
|
54
|
-
│ Turn 3 (parallel):
|
|
55
|
-
├──► Thread("run tests", context=[ep3]) ─┐
|
|
56
|
-
├──► Thread("update docs", context=[ep3]) ─┘
|
|
57
|
-
│
|
|
58
|
-
└──► Final text response
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
Full architecture doc: [`docs/arc.md`](../docs/arc.md)
|
|
62
|
-
|
|
63
|
-
---
|
|
64
|
-
|
|
65
|
-
## ToolProvider
|
|
66
|
-
|
|
67
|
-
The contract for tool execution. All agent modes use this interface.
|
|
68
|
-
|
|
69
|
-
```typescript
|
|
70
|
-
interface ToolProvider {
|
|
71
|
-
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
72
|
-
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
73
|
-
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
74
|
-
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
75
|
-
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
76
|
-
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
77
|
-
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
78
|
-
webSearch?(query: string): Promise<ToolResult>;
|
|
79
|
-
capabilities(): ToolProviderCapabilities;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
interface ToolResult {
|
|
83
|
-
success: boolean;
|
|
84
|
-
output: string;
|
|
85
|
-
error?: string;
|
|
86
|
-
}
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
Built-in implementations:
|
|
30
|
+
1. `createAgent` drives a deterministic step loop
|
|
31
|
+
2. Each step calls `loop.nextAction(messages)` to get the LLM's decision
|
|
32
|
+
3. If it's a tool call, the harness executes it via `ToolProvider` and appends the result
|
|
33
|
+
4. If it's a final action, the loop ends and returns the result
|
|
90
34
|
|
|
91
|
-
|
|
92
|
-
|----------|-------------|
|
|
93
|
-
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
94
|
-
| `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
|
|
95
|
-
| `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
|
|
35
|
+
## Using with the sandbox
|
|
96
36
|
|
|
97
|
-
|
|
37
|
+
The most common setup connects the harness to a running sandbox service via `ControlPlaneE2BExecutor`:
|
|
98
38
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
```typescript
|
|
102
|
-
interface SandboxProvider {
|
|
103
|
-
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
104
|
-
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
105
|
-
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
106
|
-
}
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
Used by `SkillManager` for executing skill scripts in isolated VMs.
|
|
110
|
-
|
|
111
|
-
## Connecting to a Sandbox
|
|
112
|
-
|
|
113
|
-
```typescript
|
|
114
|
-
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
39
|
+
```ts
|
|
40
|
+
import { createAgent } from './src/agent/create-agent';
|
|
115
41
|
import { E2BToolProvider } from './src/providers/e2b-tool-provider';
|
|
42
|
+
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
43
|
+
import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
|
|
116
44
|
|
|
117
45
|
// Connect to sandbox service
|
|
118
46
|
const executor = new ControlPlaneE2BExecutor({
|
|
@@ -122,174 +50,155 @@ const executor = new ControlPlaneE2BExecutor({
|
|
|
122
50
|
});
|
|
123
51
|
await executor.initialize(); // creates a Firecracker VM
|
|
124
52
|
|
|
125
|
-
|
|
53
|
+
// Build and run the agent
|
|
54
|
+
const agent = createAgent({
|
|
55
|
+
toolProvider: new E2BToolProvider(executor),
|
|
56
|
+
loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
|
|
57
|
+
});
|
|
126
58
|
|
|
127
|
-
|
|
59
|
+
const result = await agent.run('create a bar chart of sales data');
|
|
60
|
+
console.log(result.output); // LLM's final response
|
|
61
|
+
console.log(result.steps); // number of tool steps
|
|
128
62
|
|
|
129
|
-
await executor.destroy();
|
|
63
|
+
await executor.destroy(); // tears down the VM
|
|
130
64
|
```
|
|
131
65
|
|
|
132
|
-
|
|
66
|
+
For a complete working example, see [`examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts).
|
|
67
|
+
|
|
68
|
+
### From environment variables
|
|
133
69
|
|
|
134
|
-
|
|
70
|
+
`ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY` automatically:
|
|
135
71
|
|
|
136
|
-
|
|
72
|
+
```ts
|
|
73
|
+
const executor = ControlPlaneE2BExecutor.fromEnv();
|
|
74
|
+
```
|
|
137
75
|
|
|
138
|
-
|
|
76
|
+
## Using locally (no sandbox)
|
|
139
77
|
|
|
140
|
-
|
|
78
|
+
For development without a sandbox service, use `LocalToolProvider` which runs tools on the local machine:
|
|
79
|
+
|
|
80
|
+
```ts
|
|
141
81
|
import { createAgent } from './src/agent/create-agent';
|
|
142
82
|
import { LocalToolProvider } from './src/providers/local-tool-provider';
|
|
143
83
|
|
|
144
84
|
const agent = createAgent({
|
|
145
85
|
toolProvider: new LocalToolProvider(process.cwd()),
|
|
146
|
-
loop: new VercelAgentLoop(),
|
|
86
|
+
loop: new VercelAgentLoop(),
|
|
147
87
|
});
|
|
148
88
|
|
|
149
89
|
const result = await agent.run('list all TypeScript files');
|
|
150
|
-
console.log(result.output);
|
|
151
90
|
```
|
|
152
91
|
|
|
153
|
-
|
|
92
|
+
## Key modules
|
|
93
|
+
|
|
94
|
+
### Agent creation (`src/agent/create-agent.ts`)
|
|
154
95
|
|
|
155
|
-
|
|
156
|
-
|--------|------|---------|-------------|
|
|
157
|
-
| `toolProvider` | `ToolProvider` | required | Executes tool calls |
|
|
158
|
-
| `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
|
|
159
|
-
| `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
|
|
160
|
-
| `maxSteps` | `number` | 30 | Max tool steps per run |
|
|
161
|
-
| `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
|
|
162
|
-
| `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
|
|
96
|
+
`createAgent(options)` returns an agent with a `.run(prompt, options?)` method. Options:
|
|
163
97
|
|
|
164
|
-
|
|
98
|
+
| Option | Type | Description |
|
|
99
|
+
|--------|------|-------------|
|
|
100
|
+
| `toolProvider` | `ToolProvider` | Required. Executes tool calls |
|
|
101
|
+
| `loop` | `AgentLoop` | LLM decision loop (default: `VercelAgentLoop`) |
|
|
102
|
+
| `sandboxProvider` | `SandboxProvider` | Optional. Higher-level sandbox ops (file download, exec with env) |
|
|
103
|
+
| `maxSteps` | `number` | Max tool steps per run (default: 30) |
|
|
104
|
+
| `telemetry` | `HarnessTelemetry` | Optional. OpenTelemetry-style tracing |
|
|
105
|
+
| `skillIndexPath` | `string` | Optional. Path to skill index JSON |
|
|
165
106
|
|
|
166
|
-
|
|
107
|
+
### Agent loop (`src/loop/vercel-agent-loop.ts`)
|
|
167
108
|
|
|
168
|
-
|
|
109
|
+
`VercelAgentLoop` calls Claude via the Vercel AI SDK. It supports:
|
|
110
|
+
- Parallel tool calls (returns `ToolBatchAction` when the LLM requests multiple tools at once)
|
|
111
|
+
- Configurable system prompt
|
|
112
|
+
- Model selection via `HARNESS_MODEL` env var (default: `claude-sonnet-4-5`)
|
|
113
|
+
|
|
114
|
+
```ts
|
|
169
115
|
const loop = new VercelAgentLoop({
|
|
170
116
|
systemPrompt: 'You are a helpful coding assistant.',
|
|
171
|
-
model: 'claude-sonnet-4-5', // or HARNESS_MODEL env var
|
|
172
117
|
});
|
|
173
118
|
```
|
|
174
119
|
|
|
175
|
-
###
|
|
120
|
+
### Tool provider (`src/interfaces/tool-provider.ts`)
|
|
176
121
|
|
|
177
|
-
|
|
122
|
+
The contract for tool execution:
|
|
178
123
|
|
|
179
|
-
```
|
|
180
|
-
|
|
181
|
-
|
|
124
|
+
```ts
|
|
125
|
+
interface ToolProvider {
|
|
126
|
+
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
127
|
+
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
128
|
+
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
129
|
+
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
130
|
+
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
131
|
+
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
132
|
+
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
133
|
+
webSearch?(query: string): Promise<ToolResult>;
|
|
134
|
+
capabilities(): ToolProviderCapabilities;
|
|
135
|
+
}
|
|
182
136
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
onActivity: (entry) => console.log(entry),
|
|
189
|
-
onLlmRequest: async (prompt) => callLLM(prompt),
|
|
190
|
-
onWebFetchRequest: async (url) => fetch(url),
|
|
191
|
-
});
|
|
137
|
+
interface ToolResult {
|
|
138
|
+
success: boolean;
|
|
139
|
+
output: string;
|
|
140
|
+
error?: string;
|
|
141
|
+
}
|
|
192
142
|
```
|
|
193
143
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
**REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
|
|
197
|
-
|
|
198
|
-
---
|
|
144
|
+
Built-in implementations:
|
|
199
145
|
|
|
200
|
-
|
|
146
|
+
| Provider | Description |
|
|
147
|
+
|----------|-------------|
|
|
148
|
+
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
149
|
+
| `E2BToolProvider` | Routes tools to an E2B-compatible executor over HTTP |
|
|
150
|
+
| `CompositeToolProvider` | Combines multiple providers (e.g. sandbox + web) |
|
|
201
151
|
|
|
202
|
-
|
|
152
|
+
### Action types (`src/agent/types.ts`)
|
|
203
153
|
|
|
204
|
-
|
|
205
|
-
import { createArcAgent } from './src/arc/create-arc-agent';
|
|
154
|
+
The LLM returns one of these action types each turn:
|
|
206
155
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
});
|
|
156
|
+
```ts
|
|
157
|
+
// Single tool call
|
|
158
|
+
interface ToolCallAction {
|
|
159
|
+
type: 'tool';
|
|
160
|
+
name: 'Bash' | 'Read' | 'Write' | 'Edit' | 'Glob' | 'Grep' | ...;
|
|
161
|
+
args: Record<string, unknown>;
|
|
162
|
+
}
|
|
215
163
|
|
|
216
|
-
//
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
|
|
164
|
+
// Multiple independent tool calls (executed in parallel)
|
|
165
|
+
interface ToolBatchAction {
|
|
166
|
+
type: 'tool_batch';
|
|
167
|
+
calls: ToolCallAction[];
|
|
221
168
|
}
|
|
222
169
|
|
|
223
|
-
//
|
|
224
|
-
|
|
170
|
+
// Final text response (ends the loop)
|
|
171
|
+
interface FinalAction {
|
|
172
|
+
type: 'final';
|
|
173
|
+
content: string;
|
|
174
|
+
}
|
|
225
175
|
```
|
|
226
176
|
|
|
227
|
-
###
|
|
228
|
-
|
|
229
|
-
| Option | Type | Default | Description |
|
|
230
|
-
|--------|------|---------|-------------|
|
|
231
|
-
| `model` | `string` | `'claude-opus-4-6'` | Orchestrator model (ID or tier name) |
|
|
232
|
-
| `modelMap` | `Record<ModelTier, string>` | haiku/sonnet/opus | Maps fast/medium/strong to model IDs |
|
|
233
|
-
| `apiKey` | `string` | — | Anthropic API key |
|
|
234
|
-
| `systemPrompt` | `string` | built-in | Custom orchestrator system prompt |
|
|
235
|
-
| `maxTurns` | `number` | 30 | Max orchestrator turns |
|
|
236
|
-
| `processTimeout` | `number` | 120_000 | Per-process timeout (ms) |
|
|
237
|
-
| `processMaxSteps` | `number` | 20 | Per-process max tool steps |
|
|
238
|
-
| `contextWindowSize` | `number` | 200_000 | Context window in tokens |
|
|
239
|
-
| `outputReserve` | `number` | 20_000 | Tokens reserved for output |
|
|
240
|
-
| `autoMemory` | `boolean` | true | Auto-detect patterns from episodes |
|
|
241
|
-
| `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
|
|
242
|
-
| `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
|
|
243
|
-
| `longTermStore` | `LongTermStore` | required | Stores long-term memories |
|
|
244
|
-
| `taskId` | `string` | required | Task identifier |
|
|
245
|
-
| `sessionId` | `string` | required | Session identifier |
|
|
246
|
-
| `toolProvider` | `ToolProvider` | required | Tool execution |
|
|
247
|
-
| `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
|
|
248
|
-
| `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
|
|
249
|
-
| `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
|
|
250
|
-
| `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
|
|
251
|
-
| `traceWriter` | `function` | — | Callback for trace event emission |
|
|
252
|
-
|
|
253
|
-
### Resilience
|
|
254
|
-
|
|
255
|
-
```typescript
|
|
256
|
-
import { resilience } from './src/arc/resilience';
|
|
257
|
-
|
|
258
|
-
const pipeline = resilience()
|
|
259
|
-
.retry({ maxRetries: 2, baseDelay: 1000 })
|
|
260
|
-
.timeout({ durationMs: 30_000 })
|
|
261
|
-
.circuitBreaker({ failureThreshold: 5 })
|
|
262
|
-
.build();
|
|
263
|
-
|
|
264
|
-
const agent = await createArcAgent({
|
|
265
|
-
// ...config
|
|
266
|
-
resilience: pipeline,
|
|
267
|
-
});
|
|
268
|
-
```
|
|
177
|
+
### LCM tool loop (`src/loop/lcm-tool-loop.ts`)
|
|
269
178
|
|
|
270
|
-
|
|
179
|
+
`LCMToolLoop` wraps another loop to add LCM-based tool routing, REPL script execution, and bridge-based tool dispatch. Used in the chat-assistant example.
|
|
271
180
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
181
|
+
### Sandbox provider (`src/interfaces/sandbox-provider.ts`)
|
|
182
|
+
|
|
183
|
+
Higher-level sandbox operations beyond basic tool calls:
|
|
184
|
+
|
|
185
|
+
```ts
|
|
186
|
+
interface SandboxProvider {
|
|
187
|
+
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
188
|
+
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
189
|
+
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
190
|
+
}
|
|
278
191
|
```
|
|
279
192
|
|
|
280
|
-
|
|
193
|
+
### Observability (`src/observability/otel.ts`)
|
|
281
194
|
|
|
282
|
-
|
|
195
|
+
`HarnessTelemetry` provides OpenTelemetry-style spans and metrics for agent runs.
|
|
283
196
|
|
|
284
|
-
## Package
|
|
197
|
+
## Package layout
|
|
285
198
|
|
|
286
199
|
```
|
|
287
200
|
src/
|
|
288
201
|
├── agent/ # createAgent, step executor, types
|
|
289
|
-
├── arc/ # ArcLoop orchestrator, processes, memory, resilience
|
|
290
|
-
│ ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
|
|
291
|
-
│ ├── stores/ # RxDB + in-memory store implementations
|
|
292
|
-
│ └── object-store/ # Pluggable cloud sync (fs, memory)
|
|
293
202
|
├── interfaces/ # ToolProvider, SandboxProvider, AgentLoop contracts
|
|
294
203
|
├── loop/ # VercelAgentLoop, LCMToolLoop
|
|
295
204
|
├── providers/ # LocalToolProvider, E2BToolProvider, ControlPlaneE2BExecutor
|
|
@@ -297,20 +206,16 @@ src/
|
|
|
297
206
|
├── hooks/ # Pre/post tool call hooks
|
|
298
207
|
├── permissions/ # Tool permission checks
|
|
299
208
|
├── sessions/ # Session persistence
|
|
300
|
-
├── subagents/ # Subagent spawning
|
|
209
|
+
├── subagents/ # Subagent spawning and task tools
|
|
301
210
|
├── skills/ # Skill index, routing, and management
|
|
302
211
|
├── optimization/ # Benchmark runner
|
|
303
212
|
└── observability/ # OpenTelemetry integration
|
|
304
|
-
|
|
305
|
-
verify/ # Rust formal verification (Stateright model checker)
|
|
306
|
-
testing/ # Adversarial scenario replay harness
|
|
307
|
-
tests/ # Vitest test suite
|
|
308
213
|
```
|
|
309
214
|
|
|
310
215
|
## Documentation
|
|
311
216
|
|
|
312
|
-
-
|
|
313
|
-
-
|
|
314
|
-
-
|
|
315
|
-
-
|
|
316
|
-
- [
|
|
217
|
+
- Provider guide: `docs/guides/providers.md`
|
|
218
|
+
- Skills guide: `docs/guides/skills.md`
|
|
219
|
+
- Observability guide: `docs/guides/observability.md`
|
|
220
|
+
- Release process: `../docs/RELEASE.md`
|
|
221
|
+
- Full example: [`../examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Observability Guide
|
|
2
|
+
|
|
3
|
+
Harness emits OpenTelemetry-style traces and metrics through `HarnessTelemetry`.
|
|
4
|
+
|
|
5
|
+
## Spans
|
|
6
|
+
- `agent.run`
|
|
7
|
+
- `agent.step`
|
|
8
|
+
- `tool.call`
|
|
9
|
+
- `context.compaction`
|
|
10
|
+
- `skill.exec`
|
|
11
|
+
- `subagent.run`
|
|
12
|
+
|
|
13
|
+
## Metrics
|
|
14
|
+
- `agent_steps_total`
|
|
15
|
+
- `tool_calls_total`
|
|
16
|
+
- `tool_call_duration_ms`
|
|
17
|
+
- `compactions_total`
|
|
18
|
+
- `agent_errors_total`
|
|
19
|
+
|
|
20
|
+
## Correlation Fields
|
|
21
|
+
Attach these fields to logs where available:
|
|
22
|
+
- `trace_id`
|
|
23
|
+
- `span_id`
|
|
24
|
+
- `run_id`
|
|
25
|
+
- `session_id`
|
|
26
|
+
|
|
27
|
+
## Disable Mode
|
|
28
|
+
Create telemetry with disabled mode for zero-impact execution:
|
|
29
|
+
|
|
30
|
+
```ts
|
|
31
|
+
const telemetry = new HarnessTelemetry(false);
|
|
32
|
+
```
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Providers Guide
|
|
2
|
+
|
|
3
|
+
## ToolProvider
|
|
4
|
+
Implement the `ToolProvider` interface to expose agent tools (`Bash`, `Read`, `Write`, `Edit`, `Glob`, `Grep`).
|
|
5
|
+
|
|
6
|
+
Included foundations:
|
|
7
|
+
- `LocalToolProvider`
|
|
8
|
+
- `CompositeToolProvider`
|
|
9
|
+
- `E2BToolProvider` (executor-backed adapter)
|
|
10
|
+
|
|
11
|
+
## SandboxProvider
|
|
12
|
+
Use `SandboxProvider` for infrastructure actions (skill execution, setup/install tasks). Keep it separate from `ToolProvider`.
|
|
13
|
+
|
|
14
|
+
Current sandbox file contract is binary-first:
|
|
15
|
+
|
|
16
|
+
```ts
|
|
17
|
+
type SandboxFileBlob = {
|
|
18
|
+
data: Uint8Array;
|
|
19
|
+
mimeType?: string;
|
|
20
|
+
filename?: string;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
interface SandboxProvider {
|
|
24
|
+
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
25
|
+
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
26
|
+
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
27
|
+
}
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Use `mimeType`/`filename` for transport metadata (for example raw download endpoints). Keep file contents in `data` as bytes.
|
|
31
|
+
|
|
32
|
+
## Capability Routing
|
|
33
|
+
`CompositeToolProvider` routes calls to the first provider that advertises each capability.
|
|
34
|
+
|
|
35
|
+
## Default Skill Sandbox
|
|
36
|
+
`SkillManager` now defaults to the harness-provided `SkillSandboxProvider`:
|
|
37
|
+
|
|
38
|
+
```ts
|
|
39
|
+
const skillManager = new SkillManager();
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Default provider env vars:
|
|
43
|
+
- `SAMYX_BASE_URL` or `SANDBOX_BASE_URL`
|
|
44
|
+
- `SAMYX_API_KEY` or `SANDBOX_API_KEY`
|
|
45
|
+
- optional `SANDBOX_TEMPLATE` (default: `ubuntu-22.04`)
|
|
46
|
+
|
|
47
|
+
You can still override with a custom provider:
|
|
48
|
+
|
|
49
|
+
```ts
|
|
50
|
+
const skillManager = new SkillManager(customSandboxProvider);
|
|
51
|
+
```
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Skills Guide
|
|
2
|
+
|
|
3
|
+
## Progressive Disclosure
|
|
4
|
+
`SkillManager` stores only summary metadata for prompt injection and loads full `SKILL.md` instructions on invocation.
|
|
5
|
+
|
|
6
|
+
## Skill Routing
|
|
7
|
+
`createAgent` uses a `SkillRouter` before invocation:
|
|
8
|
+
- direct skill-name match (word boundary)
|
|
9
|
+
- alias match (for example `excel -> xlsx`, `word -> docx`, `powerpoint -> pptx`)
|
|
10
|
+
- Haiku model fallback for semantic matching
|
|
11
|
+
|
|
12
|
+
Environment knobs:
|
|
13
|
+
- `HARNESS_SKILL_ROUTER_MODEL` (default: `claude-3-5-haiku-latest`)
|
|
14
|
+
- `HARNESS_SKILL_ROUTER_THRESHOLD` (default: `0.55`)
|
|
15
|
+
|
|
16
|
+
## Install Lifecycle
|
|
17
|
+
Dependency install state transitions:
|
|
18
|
+
- `installing`
|
|
19
|
+
- `ready`
|
|
20
|
+
- `degraded`
|
|
21
|
+
|
|
22
|
+
If install fails, state becomes `degraded` and the error is surfaced.
|
|
23
|
+
|
|
24
|
+
## Security Baseline
|
|
25
|
+
See `docs/security/skill-sandbox-threat-model.md` for path traversal and sandbox boundary rules.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Skill Sandbox Threat Model
|
|
2
|
+
|
|
3
|
+
## Scope
|
|
4
|
+
This document defines the baseline security assumptions for skill execution in harness.
|
|
5
|
+
|
|
6
|
+
## Trust Boundaries
|
|
7
|
+
- Skill scripts are untrusted input.
|
|
8
|
+
- Sandbox runtime is the security boundary.
|
|
9
|
+
- Host filesystem and host network are outside trust boundary.
|
|
10
|
+
|
|
11
|
+
## Controls
|
|
12
|
+
- Deny host mounts by default.
|
|
13
|
+
- Deny outbound network by default unless explicitly allowed.
|
|
14
|
+
- Use tenant-scoped credentials and ephemeral filesystems.
|
|
15
|
+
- Disallow path traversal (`..`) in skill paths.
|
|
16
|
+
|
|
17
|
+
## Required Tests
|
|
18
|
+
- Sandbox escape attempt should fail.
|
|
19
|
+
- Cross-tenant path access should fail.
|
|
20
|
+
- Dependency install failures should degrade skill state and block execution until retry.
|
package/package.json
CHANGED
|
@@ -1,47 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bluecopa/harness",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "1.0.0",
|
|
4
4
|
"description": "Provider-agnostic TypeScript agent framework",
|
|
5
5
|
"license": "UNLICENSED",
|
|
6
|
-
"type": "module",
|
|
7
|
-
"files": [
|
|
8
|
-
"dist",
|
|
9
|
-
"README.md"
|
|
10
|
-
],
|
|
11
|
-
"exports": {
|
|
12
|
-
"./arc/app-adapter": {
|
|
13
|
-
"types": "./dist/arc/app-adapter.d.ts",
|
|
14
|
-
"import": "./dist/arc/app-adapter.js"
|
|
15
|
-
},
|
|
16
|
-
"./arc/create-arc-agent": {
|
|
17
|
-
"types": "./dist/arc/create-arc-agent.d.ts",
|
|
18
|
-
"import": "./dist/arc/create-arc-agent.js"
|
|
19
|
-
},
|
|
20
|
-
"./arc/profile-builder": {
|
|
21
|
-
"types": "./dist/arc/profile-builder.d.ts",
|
|
22
|
-
"import": "./dist/arc/profile-builder.js"
|
|
23
|
-
},
|
|
24
|
-
"./loop/vercel-agent-loop": {
|
|
25
|
-
"types": "./dist/loop/vercel-agent-loop.d.ts",
|
|
26
|
-
"import": "./dist/loop/vercel-agent-loop.js"
|
|
27
|
-
},
|
|
28
|
-
"./package.json": "./package.json"
|
|
29
|
-
},
|
|
30
6
|
"scripts": {
|
|
31
|
-
"build": "tsup",
|
|
32
|
-
"prepack": "pnpm run build",
|
|
33
7
|
"test": "vitest run",
|
|
34
8
|
"test:watch": "vitest"
|
|
35
9
|
},
|
|
36
10
|
"dependencies": {
|
|
37
11
|
"@ai-sdk/anthropic": "^3.0.48",
|
|
38
12
|
"ai": "^6.0.101",
|
|
39
|
-
"rxdb": "^15.39.0",
|
|
40
13
|
"zod": "^4.1.11"
|
|
41
14
|
},
|
|
42
15
|
"devDependencies": {
|
|
43
16
|
"@types/node": "^24.3.0",
|
|
44
|
-
"tsup": "^8.5.1",
|
|
45
17
|
"typescript": "^5.9.2",
|
|
46
18
|
"vitest": "^3.2.4"
|
|
47
19
|
},
|