@bluecopa/harness 0.1.0-snapshot.119 → 0.1.0-snapshot.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +18 -0
- package/README.md +148 -208
- package/docs/guides/observability.md +32 -0
- package/docs/guides/providers.md +51 -0
- package/docs/guides/skills.md +25 -0
- package/docs/security/skill-sandbox-threat-model.md +20 -0
- package/package.json +1 -28
- package/src/agent/create-agent.ts +893 -0
- package/src/agent/create-tools.ts +33 -0
- package/src/agent/step-executor.ts +15 -0
- package/src/agent/types.ts +70 -0
- package/src/arc/arc-loop.ts +396 -0
- package/src/arc/arc-types.ts +215 -0
- package/src/arc/bridge-tools.ts +170 -0
- package/src/arc/bridged-tool-provider.ts +80 -0
- package/src/arc/consolidation.ts +118 -0
- package/src/arc/create-arc-agent.ts +80 -0
- package/src/arc/debug.ts +62 -0
- package/src/arc/episode-compressor.ts +151 -0
- package/src/arc/object-store/fs-object-store.ts +60 -0
- package/src/arc/object-store/memory-object-store.ts +41 -0
- package/src/arc/object-store/object-store.ts +12 -0
- package/src/arc/stores/episode-store.ts +120 -0
- package/src/arc/stores/long-term-store.ts +86 -0
- package/src/arc/stores/rxdb-setup.ts +112 -0
- package/src/arc/stores/session-memo-store.ts +58 -0
- package/src/arc/thread-executor.ts +404 -0
- package/src/arc/thread-tool.ts +29 -0
- package/src/context/llm-compaction-strategy.ts +37 -0
- package/src/context/prepare-step.ts +65 -0
- package/src/context/token-tracker.ts +26 -0
- package/src/extracted/manifest.json +10 -0
- package/src/extracted/prompts/compaction.md +5 -0
- package/src/extracted/prompts/system.md +5 -0
- package/src/extracted/tools.json +82 -0
- package/src/hooks/hook-runner.ts +22 -0
- package/src/hooks/tool-wrappers.ts +64 -0
- package/src/interfaces/compaction-strategy.ts +18 -0
- package/src/interfaces/hooks.ts +24 -0
- package/src/interfaces/sandbox-provider.ts +29 -0
- package/src/interfaces/session-store.ts +48 -0
- package/src/interfaces/tool-provider.ts +70 -0
- package/src/loop/bridge.ts +363 -0
- package/src/loop/context-store.ts +210 -0
- package/src/loop/lcm-tool-loop.ts +163 -0
- package/src/loop/vercel-agent-loop.ts +285 -0
- package/src/observability/context.ts +17 -0
- package/src/observability/metrics.ts +27 -0
- package/src/observability/otel.ts +105 -0
- package/src/observability/tracing.ts +13 -0
- package/src/optimization/agent-evaluator.ts +40 -0
- package/src/optimization/config-serializer.ts +16 -0
- package/src/optimization/optimization-runner.ts +39 -0
- package/src/optimization/trace-collector.ts +33 -0
- package/src/permissions/permission-manager.ts +34 -0
- package/src/providers/composite-tool-provider.ts +72 -0
- package/src/providers/control-plane-e2b-executor.ts +218 -0
- package/src/providers/e2b-tool-provider.ts +68 -0
- package/src/providers/local-tool-provider.ts +190 -0
- package/src/providers/skill-sandbox-provider.ts +46 -0
- package/src/sessions/file-session-store.ts +61 -0
- package/src/sessions/in-memory-session-store.ts +39 -0
- package/src/sessions/session-manager.ts +44 -0
- package/src/skills/skill-loader.ts +52 -0
- package/src/skills/skill-manager.ts +175 -0
- package/src/skills/skill-router.ts +99 -0
- package/src/skills/skill-types.ts +26 -0
- package/src/subagents/subagent-manager.ts +22 -0
- package/src/subagents/task-tool.ts +13 -0
- package/tests/integration/agent-loop-basic.spec.ts +56 -0
- package/tests/integration/agent-skill-default-from-sandbox.spec.ts +67 -0
- package/tests/integration/concurrency-single-turn.spec.ts +35 -0
- package/tests/integration/otel-metrics-emission.spec.ts +62 -0
- package/tests/integration/otel-trace-propagation.spec.ts +48 -0
- package/tests/integration/parity-benchmark.spec.ts +45 -0
- package/tests/integration/provider-local-smoke.spec.ts +63 -0
- package/tests/integration/session-resume.spec.ts +30 -0
- package/tests/integration/skill-install-rollback.spec.ts +64 -0
- package/tests/integration/skill-sandbox-file-blob.spec.ts +54 -0
- package/tests/integration/skills-progressive-disclosure.spec.ts +61 -0
- package/tests/integration/streaming-compaction-boundary.spec.ts +43 -0
- package/tests/integration/structured-messages-agent.spec.ts +265 -0
- package/tests/integration/subagent-isolation.spec.ts +24 -0
- package/tests/security/skill-sandbox-isolation.spec.ts +51 -0
- package/tests/unit/create-tools-schema-parity.spec.ts +22 -0
- package/tests/unit/extracted-manifest.spec.ts +41 -0
- package/tests/unit/interfaces-contract.spec.ts +101 -0
- package/tests/unit/structured-messages.spec.ts +176 -0
- package/tests/unit/token-tracker.spec.ts +22 -0
- package/tsconfig.json +14 -0
- package/vitest.config.ts +7 -0
- package/dist/arc/app-adapter.d.ts +0 -108
- package/dist/arc/app-adapter.js +0 -423
- package/dist/arc/app-adapter.js.map +0 -1
- package/dist/arc/create-arc-agent.d.ts +0 -50
- package/dist/arc/create-arc-agent.js +0 -4317
- package/dist/arc/create-arc-agent.js.map +0 -1
- package/dist/arc/profile-builder.d.ts +0 -49
- package/dist/arc/profile-builder.js +0 -171
- package/dist/arc/profile-builder.js.map +0 -1
- package/dist/loop/vercel-agent-loop.d.ts +0 -125
- package/dist/loop/vercel-agent-loop.js +0 -345
- package/dist/loop/vercel-agent-loop.js.map +0 -1
- package/dist/types-HplqyDx-.d.ts +0 -873
package/AGENTS.md
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# AGENTS.md
|
|
2
|
+
|
|
3
|
+
Guidance for agents working in `harness/`.
|
|
4
|
+
Reference: https://agents.md/
|
|
5
|
+
|
|
6
|
+
## Scope
|
|
7
|
+
`harness/` contains the TypeScript agent framework core.
|
|
8
|
+
|
|
9
|
+
## Rules
|
|
10
|
+
- Keep API changes explicit and typed.
|
|
11
|
+
- Maintain deterministic behavior in agent loop, compaction, and tool execution.
|
|
12
|
+
- Preserve compatibility of extracted tool schemas unless intentionally versioned.
|
|
13
|
+
|
|
14
|
+
## Commands
|
|
15
|
+
```bash
|
|
16
|
+
pnpm install
|
|
17
|
+
pnpm test
|
|
18
|
+
```
|
package/README.md
CHANGED
|
@@ -2,17 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
The harness provides the core loop that drives an AI agent: send messages to an LLM, execute the tool calls it returns, feed results back, and repeat until the LLM produces a final text response.
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
## Install
|
|
10
|
-
|
|
11
|
-
```bash
|
|
12
|
-
pnpm add @bluecopa/harness
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
## Development
|
|
7
|
+
## Quickstart
|
|
16
8
|
|
|
17
9
|
```bash
|
|
18
10
|
pnpm install
|
|
@@ -21,11 +13,9 @@ pnpm test
|
|
|
21
13
|
|
|
22
14
|
## Architecture
|
|
23
15
|
|
|
24
|
-
### Single-Agent Loop
|
|
25
|
-
|
|
26
16
|
```
|
|
27
17
|
┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
|
|
28
|
-
│ createAgent
|
|
18
|
+
│ createAgent │────▶│ AgentLoop │────▶│ LLM (Claude) │
|
|
29
19
|
│ (turn loop) │ │ (nextAction)│ │ │
|
|
30
20
|
└──────┬───────┘ └──────────────┘ └──────────────────┘
|
|
31
21
|
│ │
|
|
@@ -37,82 +27,20 @@ pnpm test
|
|
|
37
27
|
└──────────────┘
|
|
38
28
|
```
|
|
39
29
|
|
|
40
|
-
|
|
30
|
+
1. `createAgent` drives a deterministic step loop
|
|
31
|
+
2. Each step calls `loop.nextAction(messages)` to get the LLM's decision
|
|
32
|
+
3. If it's a tool call, the harness executes it via `ToolProvider` and appends the result
|
|
33
|
+
4. If it's a final action, the loop ends and returns the result
|
|
41
34
|
|
|
42
|
-
|
|
43
|
-
Orchestrator (ArcLoop — Opus 4.6 by default)
|
|
44
|
-
│ tools: Thread, Check, Cancel, Remember, ReadEpisode
|
|
45
|
-
│
|
|
46
|
-
│ Turn 1 (parallel):
|
|
47
|
-
├──► Process 0 ("read auth", model=fast) ─┐
|
|
48
|
-
├──► Process 1 ("read routes", model=fast) ─┼──► Episodes
|
|
49
|
-
├──► Process 2 ("read tests", model=fast) ─┘
|
|
50
|
-
│
|
|
51
|
-
│ Turn 2 (dispatch dependent work):
|
|
52
|
-
├──► Thread("fix bug", context=[ep0,ep1,ep2]) ──► Episode
|
|
53
|
-
│
|
|
54
|
-
│ Turn 3 (parallel):
|
|
55
|
-
├──► Thread("run tests", context=[ep3]) ─┐
|
|
56
|
-
├──► Thread("update docs", context=[ep3]) ─┘
|
|
57
|
-
│
|
|
58
|
-
└──► Final text response
|
|
59
|
-
```
|
|
35
|
+
## Using with the sandbox
|
|
60
36
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
---
|
|
64
|
-
|
|
65
|
-
## ToolProvider
|
|
66
|
-
|
|
67
|
-
The contract for tool execution. All agent modes use this interface.
|
|
37
|
+
The most common setup connects the harness to a running sandbox service via `ControlPlaneE2BExecutor`:
|
|
68
38
|
|
|
69
|
-
```
|
|
70
|
-
|
|
71
|
-
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
72
|
-
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
73
|
-
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
74
|
-
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
75
|
-
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
76
|
-
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
77
|
-
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
78
|
-
webSearch?(query: string): Promise<ToolResult>;
|
|
79
|
-
capabilities(): ToolProviderCapabilities;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
interface ToolResult {
|
|
83
|
-
success: boolean;
|
|
84
|
-
output: string;
|
|
85
|
-
error?: string;
|
|
86
|
-
}
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
Built-in implementations:
|
|
90
|
-
|
|
91
|
-
| Provider | Description |
|
|
92
|
-
|----------|-------------|
|
|
93
|
-
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
94
|
-
| `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
|
|
95
|
-
| `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
|
|
96
|
-
|
|
97
|
-
## SandboxProvider
|
|
98
|
-
|
|
99
|
-
Higher-level sandbox operations beyond basic tool calls:
|
|
100
|
-
|
|
101
|
-
```typescript
|
|
102
|
-
interface SandboxProvider {
|
|
103
|
-
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
104
|
-
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
105
|
-
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
106
|
-
}
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
Used by `SkillManager` for executing skill scripts in isolated VMs.
|
|
110
|
-
|
|
111
|
-
## Connecting to a Sandbox
|
|
112
|
-
|
|
113
|
-
```typescript
|
|
114
|
-
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
39
|
+
```ts
|
|
40
|
+
import { createAgent } from './src/agent/create-agent';
|
|
115
41
|
import { E2BToolProvider } from './src/providers/e2b-tool-provider';
|
|
42
|
+
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
43
|
+
import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
|
|
116
44
|
|
|
117
45
|
// Connect to sandbox service
|
|
118
46
|
const executor = new ControlPlaneE2BExecutor({
|
|
@@ -122,172 +50,187 @@ const executor = new ControlPlaneE2BExecutor({
|
|
|
122
50
|
});
|
|
123
51
|
await executor.initialize(); // creates a Firecracker VM
|
|
124
52
|
|
|
125
|
-
|
|
53
|
+
// Build and run the agent
|
|
54
|
+
const agent = createAgent({
|
|
55
|
+
toolProvider: new E2BToolProvider(executor),
|
|
56
|
+
loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
|
|
57
|
+
});
|
|
126
58
|
|
|
127
|
-
|
|
59
|
+
const result = await agent.run('create a bar chart of sales data');
|
|
60
|
+
console.log(result.output); // LLM's final response
|
|
61
|
+
console.log(result.steps); // number of tool steps
|
|
128
62
|
|
|
129
|
-
await executor.destroy();
|
|
63
|
+
await executor.destroy(); // tears down the VM
|
|
130
64
|
```
|
|
131
65
|
|
|
132
|
-
|
|
66
|
+
For a complete working example, see [`examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts).
|
|
67
|
+
|
|
68
|
+
### From environment variables
|
|
133
69
|
|
|
134
|
-
|
|
70
|
+
`ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY` automatically:
|
|
135
71
|
|
|
136
|
-
|
|
72
|
+
```ts
|
|
73
|
+
const executor = ControlPlaneE2BExecutor.fromEnv();
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Using locally (no sandbox)
|
|
137
77
|
|
|
138
|
-
For
|
|
78
|
+
For development without a sandbox service, use `LocalToolProvider` which runs tools on the local machine:
|
|
139
79
|
|
|
140
|
-
```
|
|
80
|
+
```ts
|
|
141
81
|
import { createAgent } from './src/agent/create-agent';
|
|
142
82
|
import { LocalToolProvider } from './src/providers/local-tool-provider';
|
|
143
83
|
|
|
144
84
|
const agent = createAgent({
|
|
145
85
|
toolProvider: new LocalToolProvider(process.cwd()),
|
|
146
|
-
loop: new VercelAgentLoop(),
|
|
86
|
+
loop: new VercelAgentLoop(),
|
|
147
87
|
});
|
|
148
88
|
|
|
149
89
|
const result = await agent.run('list all TypeScript files');
|
|
150
|
-
console.log(result.output);
|
|
151
90
|
```
|
|
152
91
|
|
|
153
|
-
|
|
92
|
+
## Key modules
|
|
93
|
+
|
|
94
|
+
### Agent creation (`src/agent/create-agent.ts`)
|
|
154
95
|
|
|
155
|
-
|
|
156
|
-
|--------|------|---------|-------------|
|
|
157
|
-
| `toolProvider` | `ToolProvider` | required | Executes tool calls |
|
|
158
|
-
| `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
|
|
159
|
-
| `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
|
|
160
|
-
| `maxSteps` | `number` | 30 | Max tool steps per run |
|
|
161
|
-
| `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
|
|
162
|
-
| `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
|
|
96
|
+
`createAgent(options)` returns an agent with a `.run(prompt, options?)` method. Options:
|
|
163
97
|
|
|
164
|
-
|
|
98
|
+
| Option | Type | Description |
|
|
99
|
+
|--------|------|-------------|
|
|
100
|
+
| `toolProvider` | `ToolProvider` | Required. Executes tool calls |
|
|
101
|
+
| `loop` | `AgentLoop` | LLM decision loop (default: `VercelAgentLoop`) |
|
|
102
|
+
| `sandboxProvider` | `SandboxProvider` | Optional. Higher-level sandbox ops (file download, exec with env) |
|
|
103
|
+
| `maxSteps` | `number` | Max tool steps per run (default: 30) |
|
|
104
|
+
| `telemetry` | `HarnessTelemetry` | Optional. OpenTelemetry-style tracing |
|
|
105
|
+
| `skillIndexPath` | `string` | Optional. Path to skill index JSON |
|
|
165
106
|
|
|
166
|
-
|
|
107
|
+
### Agent loop (`src/loop/vercel-agent-loop.ts`)
|
|
167
108
|
|
|
168
|
-
|
|
109
|
+
`VercelAgentLoop` calls Claude via the Vercel AI SDK. It supports:
|
|
110
|
+
- Parallel tool calls (returns `ToolBatchAction` when the LLM requests multiple tools at once)
|
|
111
|
+
- Configurable system prompt
|
|
112
|
+
- Model selection via `HARNESS_MODEL` env var (default: `claude-sonnet-4-5`)
|
|
113
|
+
|
|
114
|
+
```ts
|
|
169
115
|
const loop = new VercelAgentLoop({
|
|
170
116
|
systemPrompt: 'You are a helpful coding assistant.',
|
|
171
|
-
model: 'claude-sonnet-4-5', // or HARNESS_MODEL env var
|
|
172
117
|
});
|
|
173
118
|
```
|
|
174
119
|
|
|
175
|
-
###
|
|
120
|
+
### Tool provider (`src/interfaces/tool-provider.ts`)
|
|
176
121
|
|
|
177
|
-
|
|
122
|
+
The contract for tool execution:
|
|
178
123
|
|
|
179
|
-
```
|
|
180
|
-
|
|
181
|
-
|
|
124
|
+
```ts
|
|
125
|
+
interface ToolProvider {
|
|
126
|
+
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
127
|
+
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
128
|
+
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
129
|
+
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
130
|
+
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
131
|
+
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
132
|
+
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
133
|
+
webSearch?(query: string): Promise<ToolResult>;
|
|
134
|
+
capabilities(): ToolProviderCapabilities;
|
|
135
|
+
}
|
|
182
136
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
onActivity: (entry) => console.log(entry),
|
|
189
|
-
onLlmRequest: async (prompt) => callLLM(prompt),
|
|
190
|
-
onWebFetchRequest: async (url) => fetch(url),
|
|
191
|
-
});
|
|
137
|
+
interface ToolResult {
|
|
138
|
+
success: boolean;
|
|
139
|
+
output: string;
|
|
140
|
+
error?: string;
|
|
141
|
+
}
|
|
192
142
|
```
|
|
193
143
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
**REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
|
|
144
|
+
Built-in implementations:
|
|
197
145
|
|
|
198
|
-
|
|
146
|
+
| Provider | Description |
|
|
147
|
+
|----------|-------------|
|
|
148
|
+
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
149
|
+
| `E2BToolProvider` | Routes tools to an E2B-compatible executor over HTTP |
|
|
150
|
+
| `CompositeToolProvider` | Combines multiple providers (e.g. sandbox + web) |
|
|
199
151
|
|
|
200
|
-
|
|
152
|
+
### Action types (`src/agent/types.ts`)
|
|
201
153
|
|
|
202
|
-
|
|
154
|
+
The LLM returns one of these action types each turn:
|
|
203
155
|
|
|
204
|
-
```
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
longTermStore: myLongTermStore, // required
|
|
212
|
-
taskId: 'task-1',
|
|
213
|
-
sessionId: 'session-1',
|
|
214
|
-
});
|
|
156
|
+
```ts
|
|
157
|
+
// Single tool call
|
|
158
|
+
interface ToolCallAction {
|
|
159
|
+
type: 'tool';
|
|
160
|
+
name: 'Bash' | 'Read' | 'Write' | 'Edit' | 'Glob' | 'Grep' | ...;
|
|
161
|
+
args: Record<string, unknown>;
|
|
162
|
+
}
|
|
215
163
|
|
|
216
|
-
//
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
|
|
164
|
+
// Multiple independent tool calls (executed in parallel)
|
|
165
|
+
interface ToolBatchAction {
|
|
166
|
+
type: 'tool_batch';
|
|
167
|
+
calls: ToolCallAction[];
|
|
221
168
|
}
|
|
222
169
|
|
|
223
|
-
//
|
|
224
|
-
|
|
170
|
+
// Final text response (ends the loop)
|
|
171
|
+
interface FinalAction {
|
|
172
|
+
type: 'final';
|
|
173
|
+
content: string;
|
|
174
|
+
}
|
|
225
175
|
```
|
|
226
176
|
|
|
227
|
-
###
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
| `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
|
|
242
|
-
| `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
|
|
243
|
-
| `longTermStore` | `LongTermStore` | required | Stores long-term memories |
|
|
244
|
-
| `taskId` | `string` | required | Task identifier |
|
|
245
|
-
| `sessionId` | `string` | required | Session identifier |
|
|
246
|
-
| `toolProvider` | `ToolProvider` | required | Tool execution |
|
|
247
|
-
| `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
|
|
248
|
-
| `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
|
|
249
|
-
| `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
|
|
250
|
-
| `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
|
|
251
|
-
| `traceWriter` | `function` | — | Callback for trace event emission |
|
|
252
|
-
|
|
253
|
-
### Resilience
|
|
254
|
-
|
|
255
|
-
```typescript
|
|
256
|
-
import { resilience } from './src/arc/resilience';
|
|
257
|
-
|
|
258
|
-
const pipeline = resilience()
|
|
259
|
-
.retry({ maxRetries: 2, baseDelay: 1000 })
|
|
260
|
-
.timeout({ durationMs: 30_000 })
|
|
261
|
-
.circuitBreaker({ failureThreshold: 5 })
|
|
262
|
-
.build();
|
|
263
|
-
|
|
264
|
-
const agent = await createArcAgent({
|
|
265
|
-
// ...config
|
|
266
|
-
resilience: pipeline,
|
|
267
|
-
});
|
|
177
|
+
### LCM tool loop (`src/loop/lcm-tool-loop.ts`)
|
|
178
|
+
|
|
179
|
+
`LCMToolLoop` wraps another loop to add LCM-based tool routing, REPL script execution, and bridge-based tool dispatch. Used in the chat-assistant example.
|
|
180
|
+
|
|
181
|
+
### Sandbox provider (`src/interfaces/sandbox-provider.ts`)
|
|
182
|
+
|
|
183
|
+
Higher-level sandbox operations beyond basic tool calls:
|
|
184
|
+
|
|
185
|
+
```ts
|
|
186
|
+
interface SandboxProvider {
|
|
187
|
+
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
188
|
+
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
189
|
+
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
190
|
+
}
|
|
268
191
|
```
|
|
269
192
|
|
|
270
|
-
###
|
|
193
|
+
### Observability (`src/observability/otel.ts`)
|
|
194
|
+
|
|
195
|
+
`HarnessTelemetry` provides OpenTelemetry-style spans and metrics for agent runs.
|
|
196
|
+
|
|
197
|
+
### Arc: Orchestrator + Thread Architecture (`src/arc/`)
|
|
198
|
+
|
|
199
|
+
`ArcLoop` is an `AgentLoop` implementation where an orchestrator LLM dispatches bounded threads via a single `Thread` tool. Threads produce episodes (summary + full trace). The orchestrator only sees summaries, keeping its context small.
|
|
200
|
+
|
|
201
|
+
```ts
|
|
202
|
+
import { createArcAgent } from './src/arc/create-arc-agent';
|
|
203
|
+
import { InMemoryEpisodeStore } from './src/arc/stores/episode-store';
|
|
204
|
+
import { InMemorySessionMemoStore } from './src/arc/stores/session-memo-store';
|
|
205
|
+
import { InMemoryLongTermStore } from './src/arc/stores/long-term-store';
|
|
271
206
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
207
|
+
const agent = createArcAgent({
|
|
208
|
+
toolProvider: new LocalToolProvider(process.cwd()),
|
|
209
|
+
episodeStore: new InMemoryEpisodeStore(),
|
|
210
|
+
sessionMemoStore: new InMemorySessionMemoStore(),
|
|
211
|
+
longTermStore: new InMemoryLongTermStore(),
|
|
212
|
+
taskId: 'task-1',
|
|
213
|
+
sessionId: 'session-1',
|
|
277
214
|
});
|
|
215
|
+
|
|
216
|
+
const result = await agent.run('Fix the authentication bug');
|
|
278
217
|
```
|
|
279
218
|
|
|
280
|
-
|
|
219
|
+
Key features:
|
|
220
|
+
- **Parallel threads**: orchestrator calls Thread N times in one turn → all run concurrently
|
|
221
|
+
- **Four-tier memory**: thread context → episodes → session memos → long-term
|
|
222
|
+
- **Per-thread models**: Haiku for reads, Sonnet for implementation
|
|
223
|
+
- **Template compression**: zero-LLM-call episode summaries
|
|
224
|
+
- **Async consolidation**: non-blocking background distillation
|
|
281
225
|
|
|
282
|
-
|
|
226
|
+
Full architecture doc: [`docs/arc.md`](../docs/arc.md)
|
|
283
227
|
|
|
284
|
-
## Package
|
|
228
|
+
## Package layout
|
|
285
229
|
|
|
286
230
|
```
|
|
287
231
|
src/
|
|
288
232
|
├── agent/ # createAgent, step executor, types
|
|
289
|
-
├── arc/ # ArcLoop orchestrator,
|
|
290
|
-
│ ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
|
|
233
|
+
├── arc/ # ArcLoop orchestrator, threads, memory hierarchy
|
|
291
234
|
│ ├── stores/ # RxDB + in-memory store implementations
|
|
292
235
|
│ └── object-store/ # Pluggable cloud sync (fs, memory)
|
|
293
236
|
├── interfaces/ # ToolProvider, SandboxProvider, AgentLoop contracts
|
|
@@ -297,20 +240,17 @@ src/
|
|
|
297
240
|
├── hooks/ # Pre/post tool call hooks
|
|
298
241
|
├── permissions/ # Tool permission checks
|
|
299
242
|
├── sessions/ # Session persistence
|
|
300
|
-
├── subagents/ # Subagent spawning
|
|
243
|
+
├── subagents/ # Subagent spawning and task tools
|
|
301
244
|
├── skills/ # Skill index, routing, and management
|
|
302
245
|
├── optimization/ # Benchmark runner
|
|
303
246
|
└── observability/ # OpenTelemetry integration
|
|
304
|
-
|
|
305
|
-
verify/ # Rust formal verification (Stateright model checker)
|
|
306
|
-
testing/ # Adversarial scenario replay harness
|
|
307
|
-
tests/ # Vitest test suite
|
|
308
247
|
```
|
|
309
248
|
|
|
310
249
|
## Documentation
|
|
311
250
|
|
|
312
|
-
-
|
|
313
|
-
-
|
|
314
|
-
-
|
|
315
|
-
-
|
|
316
|
-
-
|
|
251
|
+
- **Arc architecture**: [`docs/arc.md`](../docs/arc.md)
|
|
252
|
+
- Provider guide: `docs/guides/providers.md`
|
|
253
|
+
- Skills guide: `docs/guides/skills.md`
|
|
254
|
+
- Observability guide: `docs/guides/observability.md`
|
|
255
|
+
- Release process: `../docs/RELEASE.md`
|
|
256
|
+
- Full example: [`../examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Observability Guide
|
|
2
|
+
|
|
3
|
+
Harness emits OpenTelemetry-style traces and metrics through `HarnessTelemetry`.
|
|
4
|
+
|
|
5
|
+
## Spans
|
|
6
|
+
- `agent.run`
|
|
7
|
+
- `agent.step`
|
|
8
|
+
- `tool.call`
|
|
9
|
+
- `context.compaction`
|
|
10
|
+
- `skill.exec`
|
|
11
|
+
- `subagent.run`
|
|
12
|
+
|
|
13
|
+
## Metrics
|
|
14
|
+
- `agent_steps_total`
|
|
15
|
+
- `tool_calls_total`
|
|
16
|
+
- `tool_call_duration_ms`
|
|
17
|
+
- `compactions_total`
|
|
18
|
+
- `agent_errors_total`
|
|
19
|
+
|
|
20
|
+
## Correlation Fields
|
|
21
|
+
Attach these fields to logs where available:
|
|
22
|
+
- `trace_id`
|
|
23
|
+
- `span_id`
|
|
24
|
+
- `run_id`
|
|
25
|
+
- `session_id`
|
|
26
|
+
|
|
27
|
+
## Disable Mode
|
|
28
|
+
Create telemetry with disabled mode for zero-impact execution:
|
|
29
|
+
|
|
30
|
+
```ts
|
|
31
|
+
const telemetry = new HarnessTelemetry(false);
|
|
32
|
+
```
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Providers Guide
|
|
2
|
+
|
|
3
|
+
## ToolProvider
|
|
4
|
+
Implement the `ToolProvider` interface to expose agent tools (`Bash`, `Read`, `Write`, `Edit`, `Glob`, `Grep`).
|
|
5
|
+
|
|
6
|
+
Included foundations:
|
|
7
|
+
- `LocalToolProvider`
|
|
8
|
+
- `CompositeToolProvider`
|
|
9
|
+
- `E2BToolProvider` (executor-backed adapter)
|
|
10
|
+
|
|
11
|
+
## SandboxProvider
|
|
12
|
+
Use `SandboxProvider` for infrastructure actions (skill execution, setup/install tasks). Keep it separate from `ToolProvider`.
|
|
13
|
+
|
|
14
|
+
Current sandbox file contract is binary-first:
|
|
15
|
+
|
|
16
|
+
```ts
|
|
17
|
+
type SandboxFileBlob = {
|
|
18
|
+
data: Uint8Array;
|
|
19
|
+
mimeType?: string;
|
|
20
|
+
filename?: string;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
interface SandboxProvider {
|
|
24
|
+
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
25
|
+
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
26
|
+
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
27
|
+
}
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Use `mimeType`/`filename` for transport metadata (for example raw download endpoints). Keep file contents in `data` as bytes.
|
|
31
|
+
|
|
32
|
+
## Capability Routing
|
|
33
|
+
`CompositeToolProvider` routes calls to the first provider that advertises each capability.
|
|
34
|
+
|
|
35
|
+
## Default Skill Sandbox
|
|
36
|
+
`SkillManager` now defaults to the harness-provided `SkillSandboxProvider`:
|
|
37
|
+
|
|
38
|
+
```ts
|
|
39
|
+
const skillManager = new SkillManager();
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Default provider env vars:
|
|
43
|
+
- `SAMYX_BASE_URL` or `SANDBOX_BASE_URL`
|
|
44
|
+
- `SAMYX_API_KEY` or `SANDBOX_API_KEY`
|
|
45
|
+
- optional `SANDBOX_TEMPLATE` (default: `ubuntu-22.04`)
|
|
46
|
+
|
|
47
|
+
You can still override with a custom provider:
|
|
48
|
+
|
|
49
|
+
```ts
|
|
50
|
+
const skillManager = new SkillManager(customSandboxProvider);
|
|
51
|
+
```
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Skills Guide
|
|
2
|
+
|
|
3
|
+
## Progressive Disclosure
|
|
4
|
+
`SkillManager` stores only summary metadata for prompt injection and loads full `SKILL.md` instructions on invocation.
|
|
5
|
+
|
|
6
|
+
## Skill Routing
|
|
7
|
+
`createAgent` uses a `SkillRouter` before invocation:
|
|
8
|
+
- direct skill-name match (word boundary)
|
|
9
|
+
- alias match (for example `excel -> xlsx`, `word -> docx`, `powerpoint -> pptx`)
|
|
10
|
+
- Haiku model fallback for semantic matching
|
|
11
|
+
|
|
12
|
+
Environment knobs:
|
|
13
|
+
- `HARNESS_SKILL_ROUTER_MODEL` (default: `claude-3-5-haiku-latest`)
|
|
14
|
+
- `HARNESS_SKILL_ROUTER_THRESHOLD` (default: `0.55`)
|
|
15
|
+
|
|
16
|
+
## Install Lifecycle
|
|
17
|
+
Dependency install state transitions:
|
|
18
|
+
- `installing`
|
|
19
|
+
- `ready`
|
|
20
|
+
- `degraded`
|
|
21
|
+
|
|
22
|
+
If install fails, state becomes `degraded` and the error is surfaced.
|
|
23
|
+
|
|
24
|
+
## Security Baseline
|
|
25
|
+
See `docs/security/skill-sandbox-threat-model.md` for path traversal and sandbox boundary rules.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Skill Sandbox Threat Model
|
|
2
|
+
|
|
3
|
+
## Scope
|
|
4
|
+
This document defines the baseline security assumptions for skill execution in harness.
|
|
5
|
+
|
|
6
|
+
## Trust Boundaries
|
|
7
|
+
- Skill scripts are untrusted input.
|
|
8
|
+
- Sandbox runtime is the security boundary.
|
|
9
|
+
- Host filesystem and host network are outside trust boundary.
|
|
10
|
+
|
|
11
|
+
## Controls
|
|
12
|
+
- Deny host mounts by default.
|
|
13
|
+
- Deny outbound network by default unless explicitly allowed.
|
|
14
|
+
- Use tenant-scoped credentials and ephemeral filesystems.
|
|
15
|
+
- Disallow path traversal (`..`) in skill paths.
|
|
16
|
+
|
|
17
|
+
## Required Tests
|
|
18
|
+
- Sandbox escape attempt should fail.
|
|
19
|
+
- Cross-tenant path access should fail.
|
|
20
|
+
- Dependency install failures should degrade skill state and block execution until retry.
|
package/package.json
CHANGED
|
@@ -1,35 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bluecopa/harness",
|
|
3
|
-
"version": "0.1.0-snapshot.
|
|
3
|
+
"version": "0.1.0-snapshot.12",
|
|
4
4
|
"description": "Provider-agnostic TypeScript agent framework",
|
|
5
5
|
"license": "UNLICENSED",
|
|
6
|
-
"type": "module",
|
|
7
|
-
"files": [
|
|
8
|
-
"dist",
|
|
9
|
-
"README.md"
|
|
10
|
-
],
|
|
11
|
-
"exports": {
|
|
12
|
-
"./arc/app-adapter": {
|
|
13
|
-
"types": "./dist/arc/app-adapter.d.ts",
|
|
14
|
-
"import": "./dist/arc/app-adapter.js"
|
|
15
|
-
},
|
|
16
|
-
"./arc/create-arc-agent": {
|
|
17
|
-
"types": "./dist/arc/create-arc-agent.d.ts",
|
|
18
|
-
"import": "./dist/arc/create-arc-agent.js"
|
|
19
|
-
},
|
|
20
|
-
"./arc/profile-builder": {
|
|
21
|
-
"types": "./dist/arc/profile-builder.d.ts",
|
|
22
|
-
"import": "./dist/arc/profile-builder.js"
|
|
23
|
-
},
|
|
24
|
-
"./loop/vercel-agent-loop": {
|
|
25
|
-
"types": "./dist/loop/vercel-agent-loop.d.ts",
|
|
26
|
-
"import": "./dist/loop/vercel-agent-loop.js"
|
|
27
|
-
},
|
|
28
|
-
"./package.json": "./package.json"
|
|
29
|
-
},
|
|
30
6
|
"scripts": {
|
|
31
|
-
"build": "tsup",
|
|
32
|
-
"prepack": "pnpm run build",
|
|
33
7
|
"test": "vitest run",
|
|
34
8
|
"test:watch": "vitest"
|
|
35
9
|
},
|
|
@@ -41,7 +15,6 @@
|
|
|
41
15
|
},
|
|
42
16
|
"devDependencies": {
|
|
43
17
|
"@types/node": "^24.3.0",
|
|
44
|
-
"tsup": "^8.5.1",
|
|
45
18
|
"typescript": "^5.9.2",
|
|
46
19
|
"vitest": "^3.2.4"
|
|
47
20
|
},
|