@bluecopa/harness 0.1.0-snapshot.98 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/AGENTS.md +18 -0
  2. package/README.md +117 -212
  3. package/docs/guides/observability.md +32 -0
  4. package/docs/guides/providers.md +51 -0
  5. package/docs/guides/skills.md +25 -0
  6. package/docs/security/skill-sandbox-threat-model.md +20 -0
  7. package/package.json +1 -29
  8. package/src/agent/create-agent.ts +884 -0
  9. package/src/agent/create-tools.ts +33 -0
  10. package/src/agent/step-executor.ts +15 -0
  11. package/src/agent/types.ts +57 -0
  12. package/src/context/llm-compaction-strategy.ts +37 -0
  13. package/src/context/prepare-step.ts +65 -0
  14. package/src/context/token-tracker.ts +26 -0
  15. package/src/extracted/manifest.json +10 -0
  16. package/src/extracted/prompts/compaction.md +5 -0
  17. package/src/extracted/prompts/system.md +5 -0
  18. package/src/extracted/tools.json +82 -0
  19. package/src/hooks/hook-runner.ts +22 -0
  20. package/src/hooks/tool-wrappers.ts +64 -0
  21. package/src/interfaces/compaction-strategy.ts +18 -0
  22. package/src/interfaces/hooks.ts +24 -0
  23. package/src/interfaces/sandbox-provider.ts +29 -0
  24. package/src/interfaces/session-store.ts +48 -0
  25. package/src/interfaces/tool-provider.ts +70 -0
  26. package/src/loop/bridge.ts +363 -0
  27. package/src/loop/context-store.ts +207 -0
  28. package/src/loop/lcm-tool-loop.ts +163 -0
  29. package/src/loop/vercel-agent-loop.ts +279 -0
  30. package/src/observability/context.ts +17 -0
  31. package/src/observability/metrics.ts +27 -0
  32. package/src/observability/otel.ts +105 -0
  33. package/src/observability/tracing.ts +13 -0
  34. package/src/optimization/agent-evaluator.ts +40 -0
  35. package/src/optimization/config-serializer.ts +16 -0
  36. package/src/optimization/optimization-runner.ts +39 -0
  37. package/src/optimization/trace-collector.ts +33 -0
  38. package/src/permissions/permission-manager.ts +34 -0
  39. package/src/providers/composite-tool-provider.ts +72 -0
  40. package/src/providers/control-plane-e2b-executor.ts +218 -0
  41. package/src/providers/e2b-tool-provider.ts +68 -0
  42. package/src/providers/local-tool-provider.ts +190 -0
  43. package/src/providers/skill-sandbox-provider.ts +46 -0
  44. package/src/sessions/file-session-store.ts +61 -0
  45. package/src/sessions/in-memory-session-store.ts +39 -0
  46. package/src/sessions/session-manager.ts +44 -0
  47. package/src/skills/skill-loader.ts +52 -0
  48. package/src/skills/skill-manager.ts +175 -0
  49. package/src/skills/skill-router.ts +99 -0
  50. package/src/skills/skill-types.ts +26 -0
  51. package/src/subagents/subagent-manager.ts +22 -0
  52. package/src/subagents/task-tool.ts +13 -0
  53. package/tests/integration/agent-loop-basic.spec.ts +56 -0
  54. package/tests/integration/agent-skill-default-from-sandbox.spec.ts +66 -0
  55. package/tests/integration/concurrency-single-turn.spec.ts +35 -0
  56. package/tests/integration/otel-metrics-emission.spec.ts +62 -0
  57. package/tests/integration/otel-trace-propagation.spec.ts +48 -0
  58. package/tests/integration/parity-benchmark.spec.ts +45 -0
  59. package/tests/integration/provider-local-smoke.spec.ts +63 -0
  60. package/tests/integration/session-resume.spec.ts +30 -0
  61. package/tests/integration/skill-install-rollback.spec.ts +64 -0
  62. package/tests/integration/skill-sandbox-file-blob.spec.ts +54 -0
  63. package/tests/integration/skills-progressive-disclosure.spec.ts +61 -0
  64. package/tests/integration/streaming-compaction-boundary.spec.ts +43 -0
  65. package/tests/integration/structured-messages-agent.spec.ts +265 -0
  66. package/tests/integration/subagent-isolation.spec.ts +24 -0
  67. package/tests/security/skill-sandbox-isolation.spec.ts +51 -0
  68. package/tests/unit/create-tools-schema-parity.spec.ts +22 -0
  69. package/tests/unit/extracted-manifest.spec.ts +41 -0
  70. package/tests/unit/interfaces-contract.spec.ts +101 -0
  71. package/tests/unit/structured-messages.spec.ts +176 -0
  72. package/tests/unit/token-tracker.spec.ts +22 -0
  73. package/tsconfig.json +14 -0
  74. package/vitest.config.ts +7 -0
  75. package/dist/arc/app-adapter.d.ts +0 -101
  76. package/dist/arc/app-adapter.js +0 -312
  77. package/dist/arc/app-adapter.js.map +0 -1
  78. package/dist/arc/create-arc-agent.d.ts +0 -50
  79. package/dist/arc/create-arc-agent.js +0 -2926
  80. package/dist/arc/create-arc-agent.js.map +0 -1
  81. package/dist/arc/profile-builder.d.ts +0 -49
  82. package/dist/arc/profile-builder.js +0 -163
  83. package/dist/arc/profile-builder.js.map +0 -1
  84. package/dist/loop/vercel-agent-loop.d.ts +0 -99
  85. package/dist/loop/vercel-agent-loop.js +0 -308
  86. package/dist/loop/vercel-agent-loop.js.map +0 -1
  87. package/dist/types-g-3DvSSE.d.ts +0 -745
package/AGENTS.md ADDED
@@ -0,0 +1,18 @@
1
+ # AGENTS.md
2
+
3
+ Guidance for agents working in `harness/`.
4
+ Reference: https://agents.md/
5
+
6
+ ## Scope
7
+ `harness/` contains the TypeScript agent framework core.
8
+
9
+ ## Rules
10
+ - Keep API changes explicit and typed.
11
+ - Maintain deterministic behavior in agent loop, compaction, and tool execution.
12
+ - Preserve compatibility of extracted tool schemas unless intentionally versioned.
13
+
14
+ ## Commands
15
+ ```bash
16
+ pnpm install
17
+ pnpm test
18
+ ```
package/README.md CHANGED
@@ -2,17 +2,9 @@
2
2
 
3
3
  Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
4
4
 
5
- Published on npm as **`@bluecopa/harness`**.
5
+ The harness provides the core loop that drives an AI agent: send messages to an LLM, execute the tool calls it returns, feed results back, and repeat until the LLM produces a final text response.
6
6
 
7
- Two execution modes: a simple single-agent loop (`createAgent` + `VercelAgentLoop`) and a process-based orchestrator (`ArcLoop`) that dispatches parallel processes with context management, memory, and resilience.
8
-
9
- ## Install
10
-
11
- ```bash
12
- pnpm add @bluecopa/harness
13
- ```
14
-
15
- ## Development
7
+ ## Quickstart
16
8
 
17
9
  ```bash
18
10
  pnpm install
@@ -21,11 +13,9 @@ pnpm test
21
13
 
22
14
  ## Architecture
23
15
 
24
- ### Single-Agent Loop
25
-
26
16
  ```
27
17
  ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
28
- │ createAgent │────►│ AgentLoop │────►│ LLM (Claude) │
18
+ │ createAgent │────▶│ AgentLoop │────▶│ LLM (Claude) │
29
19
  │ (turn loop) │ │ (nextAction)│ │ │
30
20
  └──────┬───────┘ └──────────────┘ └──────────────────┘
31
21
  │ │
@@ -37,82 +27,20 @@ pnpm test
37
27
  └──────────────┘
38
28
  ```
39
29
 
40
- ### ArcLoop Orchestrator
41
-
42
- ```
43
- Orchestrator (ArcLoop Opus 4.6 by default)
44
- │ tools: Thread, Check, Cancel, Remember, ReadEpisode
45
-
46
- │ Turn 1 (parallel):
47
- ├──► Process 0 ("read auth", model=fast) ─┐
48
- ├──► Process 1 ("read routes", model=fast) ─┼──► Episodes
49
- ├──► Process 2 ("read tests", model=fast) ─┘
50
-
51
- │ Turn 2 (dispatch dependent work):
52
- ├──► Thread("fix bug", context=[ep0,ep1,ep2]) ──► Episode
53
-
54
- │ Turn 3 (parallel):
55
- ├──► Thread("run tests", context=[ep3]) ─┐
56
- ├──► Thread("update docs", context=[ep3]) ─┘
57
-
58
- └──► Final text response
59
- ```
60
-
61
- Full architecture doc: [`docs/arc.md`](../docs/arc.md)
62
-
63
- ---
64
-
65
- ## ToolProvider
66
-
67
- The contract for tool execution. All agent modes use this interface.
68
-
69
- ```typescript
70
- interface ToolProvider {
71
- bash(command: string, options?: BashOptions): Promise<ToolResult>;
72
- readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
73
- writeFile(path: string, content: string): Promise<ToolResult>;
74
- editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
75
- glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
76
- grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
77
- webFetch?(options: WebFetchOptions): Promise<ToolResult>;
78
- webSearch?(query: string): Promise<ToolResult>;
79
- capabilities(): ToolProviderCapabilities;
80
- }
81
-
82
- interface ToolResult {
83
- success: boolean;
84
- output: string;
85
- error?: string;
86
- }
87
- ```
88
-
89
- Built-in implementations:
30
+ 1. `createAgent` drives a deterministic step loop
31
+ 2. Each step calls `loop.nextAction(messages)` to get the LLM's decision
32
+ 3. If it's a tool call, the harness executes it via `ToolProvider` and appends the result
33
+ 4. If it's a final action, the loop ends and returns the result
90
34
 
91
- | Provider | Description |
92
- |----------|-------------|
93
- | `LocalToolProvider` | Runs tools on the local filesystem |
94
- | `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
95
- | `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
35
+ ## Using with the sandbox
96
36
 
97
- ## SandboxProvider
37
+ The most common setup connects the harness to a running sandbox service via `ControlPlaneE2BExecutor`:
98
38
 
99
- Higher-level sandbox operations beyond basic tool calls:
100
-
101
- ```typescript
102
- interface SandboxProvider {
103
- exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
104
- readSandboxFile(path: string): Promise<SandboxFileBlob>;
105
- writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
106
- }
107
- ```
108
-
109
- Used by `SkillManager` for executing skill scripts in isolated VMs.
110
-
111
- ## Connecting to a Sandbox
112
-
113
- ```typescript
114
- import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
39
+ ```ts
40
+ import { createAgent } from './src/agent/create-agent';
115
41
  import { E2BToolProvider } from './src/providers/e2b-tool-provider';
42
+ import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
43
+ import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
116
44
 
117
45
  // Connect to sandbox service
118
46
  const executor = new ControlPlaneE2BExecutor({
@@ -122,174 +50,155 @@ const executor = new ControlPlaneE2BExecutor({
122
50
  });
123
51
  await executor.initialize(); // creates a Firecracker VM
124
52
 
125
- const toolProvider = new E2BToolProvider(executor);
53
+ // Build and run the agent
54
+ const agent = createAgent({
55
+ toolProvider: new E2BToolProvider(executor),
56
+ loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
57
+ });
126
58
 
127
- // ... use with createAgent or ArcLoop
59
+ const result = await agent.run('create a bar chart of sales data');
60
+ console.log(result.output); // LLM's final response
61
+ console.log(result.steps); // number of tool steps
128
62
 
129
- await executor.destroy(); // tears down the VM
63
+ await executor.destroy(); // tears down the VM
130
64
  ```
131
65
 
132
- From environment variables: `ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY`.
66
+ For a complete working example, see [`examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts).
67
+
68
+ ### From environment variables
133
69
 
134
- ---
70
+ `ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY` automatically:
135
71
 
136
- ## Single-Agent Mode (`createAgent`)
72
+ ```ts
73
+ const executor = ControlPlaneE2BExecutor.fromEnv();
74
+ ```
137
75
 
138
- For simple tasks that don't need orchestration:
76
+ ## Using locally (no sandbox)
139
77
 
140
- ```typescript
78
+ For development without a sandbox service, use `LocalToolProvider` which runs tools on the local machine:
79
+
80
+ ```ts
141
81
  import { createAgent } from './src/agent/create-agent';
142
82
  import { LocalToolProvider } from './src/providers/local-tool-provider';
143
83
 
144
84
  const agent = createAgent({
145
85
  toolProvider: new LocalToolProvider(process.cwd()),
146
- loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
86
+ loop: new VercelAgentLoop(),
147
87
  });
148
88
 
149
89
  const result = await agent.run('list all TypeScript files');
150
- console.log(result.output);
151
90
  ```
152
91
 
153
- ### Configuration
92
+ ## Key modules
93
+
94
+ ### Agent creation (`src/agent/create-agent.ts`)
154
95
 
155
- | Option | Type | Default | Description |
156
- |--------|------|---------|-------------|
157
- | `toolProvider` | `ToolProvider` | required | Executes tool calls |
158
- | `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
159
- | `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
160
- | `maxSteps` | `number` | 30 | Max tool steps per run |
161
- | `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
162
- | `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
96
+ `createAgent(options)` returns an agent with a `.run(prompt, options?)` method. Options:
163
97
 
164
- ### VercelAgentLoop
98
+ | Option | Type | Description |
99
+ |--------|------|-------------|
100
+ | `toolProvider` | `ToolProvider` | Required. Executes tool calls |
101
+ | `loop` | `AgentLoop` | LLM decision loop (default: `VercelAgentLoop`) |
102
+ | `sandboxProvider` | `SandboxProvider` | Optional. Higher-level sandbox ops (file download, exec with env) |
103
+ | `maxSteps` | `number` | Max tool steps per run (default: 30) |
104
+ | `telemetry` | `HarnessTelemetry` | Optional. OpenTelemetry-style tracing |
105
+ | `skillIndexPath` | `string` | Optional. Path to skill index JSON |
165
106
 
166
- Calls Claude via the Vercel AI SDK. Supports parallel tool calls and configurable system prompt.
107
+ ### Agent loop (`src/loop/vercel-agent-loop.ts`)
167
108
 
168
- ```typescript
109
+ `VercelAgentLoop` calls Claude via the Vercel AI SDK. It supports:
110
+ - Parallel tool calls (returns `ToolBatchAction` when the LLM requests multiple tools at once)
111
+ - Configurable system prompt
112
+ - Model selection via `HARNESS_MODEL` env var (default: `claude-sonnet-4-5`)
113
+
114
+ ```ts
169
115
  const loop = new VercelAgentLoop({
170
116
  systemPrompt: 'You are a helpful coding assistant.',
171
- model: 'claude-sonnet-4-5', // or HARNESS_MODEL env var
172
117
  });
173
118
  ```
174
119
 
175
- ### LCMToolLoop
120
+ ### Tool provider (`src/interfaces/tool-provider.ts`)
176
121
 
177
- Wraps another loop to add Lossless Context Management and optional REPL orchestration:
122
+ The contract for tool execution:
178
123
 
179
- ```typescript
180
- import { LCMToolLoop } from './src/loop/lcm-tool-loop';
181
- import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
124
+ ```ts
125
+ interface ToolProvider {
126
+ bash(command: string, options?: BashOptions): Promise<ToolResult>;
127
+ readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
128
+ writeFile(path: string, content: string): Promise<ToolResult>;
129
+ editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
130
+ glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
131
+ grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
132
+ webFetch?(options: WebFetchOptions): Promise<ToolResult>;
133
+ webSearch?(query: string): Promise<ToolResult>;
134
+ capabilities(): ToolProviderCapabilities;
135
+ }
182
136
 
183
- const loop = new LCMToolLoop({
184
- innerLoop: new VercelAgentLoop(),
185
- toolProvider: mySandboxProvider,
186
- enableRepl: true, // default: true
187
- bridgeDir: '/var/run/bridge',
188
- onActivity: (entry) => console.log(entry),
189
- onLlmRequest: async (prompt) => callLLM(prompt),
190
- onWebFetchRequest: async (url) => fetch(url),
191
- });
137
+ interface ToolResult {
138
+ success: boolean;
139
+ output: string;
140
+ error?: string;
141
+ }
192
142
  ```
193
143
 
194
- **Standard mode**: Lossless context trimming — the LLM always sees a coherent, budget-fitting view of the full conversation.
195
-
196
- **REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
197
-
198
- ---
144
+ Built-in implementations:
199
145
 
200
- ## ArcLoop (Orchestrator Mode)
146
+ | Provider | Description |
147
+ |----------|-------------|
148
+ | `LocalToolProvider` | Runs tools on the local filesystem |
149
+ | `E2BToolProvider` | Routes tools to an E2B-compatible executor over HTTP |
150
+ | `CompositeToolProvider` | Combines multiple providers (e.g. sandbox + web) |
201
151
 
202
- For complex tasks that benefit from parallel processes, context management, and memory:
152
+ ### Action types (`src/agent/types.ts`)
203
153
 
204
- ```typescript
205
- import { createArcAgent } from './src/arc/create-arc-agent';
154
+ The LLM returns one of these action types each turn:
206
155
 
207
- const agent = await createArcAgent({
208
- toolProvider: myToolProvider,
209
- episodeStore: myEpisodeStore, // required
210
- sessionMemoStore: mySessionMemoStore, // required
211
- longTermStore: myLongTermStore, // required
212
- taskId: 'task-1',
213
- sessionId: 'session-1',
214
- });
156
+ ```ts
157
+ // Single tool call
158
+ interface ToolCallAction {
159
+ type: 'tool';
160
+ name: 'Bash' | 'Read' | 'Write' | 'Edit' | 'Glob' | 'Grep' | ...;
161
+ args: Record<string, unknown>;
162
+ }
215
163
 
216
- // Streaming
217
- for await (const event of agent.stream(messages, signal)) {
218
- if (event.type === 'text_delta') process.stdout.write(event.text);
219
- if (event.type === 'process_dispatched') console.log(` → ${event.action}`);
220
- if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
164
+ // Multiple independent tool calls (executed in parallel)
165
+ interface ToolBatchAction {
166
+ type: 'tool_batch';
167
+ calls: ToolCallAction[];
221
168
  }
222
169
 
223
- // Non-streaming
224
- const result = await agent.run(messages, signal);
170
+ // Final text response (ends the loop)
171
+ interface FinalAction {
172
+ type: 'final';
173
+ content: string;
174
+ }
225
175
  ```
226
176
 
227
- ### ArcLoopConfig
228
-
229
- | Option | Type | Default | Description |
230
- |--------|------|---------|-------------|
231
- | `model` | `string` | `'claude-opus-4-6'` | Orchestrator model (ID or tier name) |
232
- | `modelMap` | `Record<ModelTier, string>` | haiku/sonnet/opus | Maps fast/medium/strong to model IDs |
233
- | `apiKey` | `string` | — | Anthropic API key |
234
- | `systemPrompt` | `string` | built-in | Custom orchestrator system prompt |
235
- | `maxTurns` | `number` | 30 | Max orchestrator turns |
236
- | `processTimeout` | `number` | 120_000 | Per-process timeout (ms) |
237
- | `processMaxSteps` | `number` | 20 | Per-process max tool steps |
238
- | `contextWindowSize` | `number` | 200_000 | Context window in tokens |
239
- | `outputReserve` | `number` | 20_000 | Tokens reserved for output |
240
- | `autoMemory` | `boolean` | true | Auto-detect patterns from episodes |
241
- | `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
242
- | `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
243
- | `longTermStore` | `LongTermStore` | required | Stores long-term memories |
244
- | `taskId` | `string` | required | Task identifier |
245
- | `sessionId` | `string` | required | Session identifier |
246
- | `toolProvider` | `ToolProvider` | required | Tool execution |
247
- | `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
248
- | `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
249
- | `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
250
- | `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
251
- | `traceWriter` | `function` | — | Callback for trace event emission |
252
-
253
- ### Resilience
254
-
255
- ```typescript
256
- import { resilience } from './src/arc/resilience';
257
-
258
- const pipeline = resilience()
259
- .retry({ maxRetries: 2, baseDelay: 1000 })
260
- .timeout({ durationMs: 30_000 })
261
- .circuitBreaker({ failureThreshold: 5 })
262
- .build();
263
-
264
- const agent = await createArcAgent({
265
- // ...config
266
- resilience: pipeline,
267
- });
268
- ```
177
+ ### LCM tool loop (`src/loop/lcm-tool-loop.ts`)
269
178
 
270
- ### Trace Emission
179
+ `LCMToolLoop` wraps another loop to add LCM-based tool routing, REPL script execution, and bridge-based tool dispatch. Used in the chat-assistant example.
271
180
 
272
- ```typescript
273
- const traces: TraceEvent[] = [];
274
- const agent = await createArcAgent({
275
- // ...config
276
- traceWriter: (event) => traces.push(event),
277
- });
181
+ ### Sandbox provider (`src/interfaces/sandbox-provider.ts`)
182
+
183
+ Higher-level sandbox operations beyond basic tool calls:
184
+
185
+ ```ts
186
+ interface SandboxProvider {
187
+ exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
188
+ readSandboxFile(path: string): Promise<SandboxFileBlob>;
189
+ writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
190
+ }
278
191
  ```
279
192
 
280
- Traces can be validated against the formal model: `cd verify && cargo run -- trace file.ndjson`
193
+ ### Observability (`src/observability/otel.ts`)
281
194
 
282
- ---
195
+ `HarnessTelemetry` provides OpenTelemetry-style spans and metrics for agent runs.
283
196
 
284
- ## Package Layout
197
+ ## Package layout
285
198
 
286
199
  ```
287
200
  src/
288
201
  ├── agent/ # createAgent, step executor, types
289
- ├── arc/ # ArcLoop orchestrator, processes, memory, resilience
290
- │ ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
291
- │ ├── stores/ # RxDB + in-memory store implementations
292
- │ └── object-store/ # Pluggable cloud sync (fs, memory)
293
202
  ├── interfaces/ # ToolProvider, SandboxProvider, AgentLoop contracts
294
203
  ├── loop/ # VercelAgentLoop, LCMToolLoop
295
204
  ├── providers/ # LocalToolProvider, E2BToolProvider, ControlPlaneE2BExecutor
@@ -297,20 +206,16 @@ src/
297
206
  ├── hooks/ # Pre/post tool call hooks
298
207
  ├── permissions/ # Tool permission checks
299
208
  ├── sessions/ # Session persistence
300
- ├── subagents/ # Subagent spawning
209
+ ├── subagents/ # Subagent spawning and task tools
301
210
  ├── skills/ # Skill index, routing, and management
302
211
  ├── optimization/ # Benchmark runner
303
212
  └── observability/ # OpenTelemetry integration
304
-
305
- verify/ # Rust formal verification (Stateright model checker)
306
- testing/ # Adversarial scenario replay harness
307
- tests/ # Vitest test suite
308
213
  ```
309
214
 
310
215
  ## Documentation
311
216
 
312
- - [Arc architecture](../docs/arc.md) — process model, context window, memory, resilience, verification
313
- - [Testing](../docs/testing.md) — test layers, running tests, writing new tests
314
- - [Sandbox setup](../docs/PUBLIC_SANDBOX.md) — deploying the sandbox service
315
- - [Release process](../docs/RELEASE.md) — versioning and publishing
316
- - [Example](../examples/chat-assistant/src/chat.ts) — complete working chat assistant
217
+ - Provider guide: `docs/guides/providers.md`
218
+ - Skills guide: `docs/guides/skills.md`
219
+ - Observability guide: `docs/guides/observability.md`
220
+ - Release process: `../docs/RELEASE.md`
221
+ - Full example: [`../examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts)
@@ -0,0 +1,32 @@
1
+ # Observability Guide
2
+
3
+ Harness emits OpenTelemetry-style traces and metrics through `HarnessTelemetry`.
4
+
5
+ ## Spans
6
+ - `agent.run`
7
+ - `agent.step`
8
+ - `tool.call`
9
+ - `context.compaction`
10
+ - `skill.exec`
11
+ - `subagent.run`
12
+
13
+ ## Metrics
14
+ - `agent_steps_total`
15
+ - `tool_calls_total`
16
+ - `tool_call_duration_ms`
17
+ - `compactions_total`
18
+ - `agent_errors_total`
19
+
20
+ ## Correlation Fields
21
+ Attach these fields to logs where available:
22
+ - `trace_id`
23
+ - `span_id`
24
+ - `run_id`
25
+ - `session_id`
26
+
27
+ ## Disable Mode
28
+ Create telemetry with disabled mode for zero-impact execution:
29
+
30
+ ```ts
31
+ const telemetry = new HarnessTelemetry(false);
32
+ ```
@@ -0,0 +1,51 @@
1
+ # Providers Guide
2
+
3
+ ## ToolProvider
4
+ Implement the `ToolProvider` interface to expose agent tools (`Bash`, `Read`, `Write`, `Edit`, `Glob`, `Grep`).
5
+
6
+ Included foundations:
7
+ - `LocalToolProvider`
8
+ - `CompositeToolProvider`
9
+ - `E2BToolProvider` (executor-backed adapter)
10
+
11
+ ## SandboxProvider
12
+ Use `SandboxProvider` for infrastructure actions (skill execution, setup/install tasks). Keep it separate from `ToolProvider`.
13
+
14
+ Current sandbox file contract is binary-first:
15
+
16
+ ```ts
17
+ type SandboxFileBlob = {
18
+ data: Uint8Array;
19
+ mimeType?: string;
20
+ filename?: string;
21
+ };
22
+
23
+ interface SandboxProvider {
24
+ exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
25
+ readSandboxFile(path: string): Promise<SandboxFileBlob>;
26
+ writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
27
+ }
28
+ ```
29
+
30
+ Use `mimeType`/`filename` for transport metadata (for example raw download endpoints). Keep file contents in `data` as bytes.
31
+
32
+ ## Capability Routing
33
+ `CompositeToolProvider` routes calls to the first provider that advertises each capability.
34
+
35
+ ## Default Skill Sandbox
36
+ `SkillManager` now defaults to the harness-provided `SkillSandboxProvider`:
37
+
38
+ ```ts
39
+ const skillManager = new SkillManager();
40
+ ```
41
+
42
+ Default provider env vars:
43
+ - `SAMYX_BASE_URL` or `SANDBOX_BASE_URL`
44
+ - `SAMYX_API_KEY` or `SANDBOX_API_KEY`
45
+ - optional `SANDBOX_TEMPLATE` (default: `ubuntu-22.04`)
46
+
47
+ You can still override with a custom provider:
48
+
49
+ ```ts
50
+ const skillManager = new SkillManager(customSandboxProvider);
51
+ ```
@@ -0,0 +1,25 @@
1
+ # Skills Guide
2
+
3
+ ## Progressive Disclosure
4
+ `SkillManager` stores only summary metadata for prompt injection and loads full `SKILL.md` instructions on invocation.
5
+
6
+ ## Skill Routing
7
+ `createAgent` uses a `SkillRouter` before invocation:
8
+ - direct skill-name match (word boundary)
9
+ - alias match (for example `excel -> xlsx`, `word -> docx`, `powerpoint -> pptx`)
10
+ - Haiku model fallback for semantic matching
11
+
12
+ Environment knobs:
13
+ - `HARNESS_SKILL_ROUTER_MODEL` (default: `claude-3-5-haiku-latest`)
14
+ - `HARNESS_SKILL_ROUTER_THRESHOLD` (default: `0.55`)
15
+
16
+ ## Install Lifecycle
17
+ Dependency install state transitions:
18
+ - `installing`
19
+ - `ready`
20
+ - `degraded`
21
+
22
+ If install fails, state becomes `degraded` and the error is surfaced.
23
+
24
+ ## Security Baseline
25
+ See `docs/security/skill-sandbox-threat-model.md` for path traversal and sandbox boundary rules.
@@ -0,0 +1,20 @@
1
+ # Skill Sandbox Threat Model
2
+
3
+ ## Scope
4
+ This document defines the baseline security assumptions for skill execution in harness.
5
+
6
+ ## Trust Boundaries
7
+ - Skill scripts are untrusted input.
8
+ - Sandbox runtime is the security boundary.
9
+ - Host filesystem and host network are outside trust boundary.
10
+
11
+ ## Controls
12
+ - Deny host mounts by default.
13
+ - Deny outbound network by default unless explicitly allowed.
14
+ - Use tenant-scoped credentials and ephemeral filesystems.
15
+ - Disallow path traversal (`..`) in skill paths.
16
+
17
+ ## Required Tests
18
+ - Sandbox escape attempt should fail.
19
+ - Cross-tenant path access should fail.
20
+ - Dependency install failures should degrade skill state and block execution until retry.
package/package.json CHANGED
@@ -1,47 +1,19 @@
1
1
  {
2
2
  "name": "@bluecopa/harness",
3
- "version": "0.1.0-snapshot.98",
3
+ "version": "1.0.0",
4
4
  "description": "Provider-agnostic TypeScript agent framework",
5
5
  "license": "UNLICENSED",
6
- "type": "module",
7
- "files": [
8
- "dist",
9
- "README.md"
10
- ],
11
- "exports": {
12
- "./arc/app-adapter": {
13
- "types": "./dist/arc/app-adapter.d.ts",
14
- "import": "./dist/arc/app-adapter.js"
15
- },
16
- "./arc/create-arc-agent": {
17
- "types": "./dist/arc/create-arc-agent.d.ts",
18
- "import": "./dist/arc/create-arc-agent.js"
19
- },
20
- "./arc/profile-builder": {
21
- "types": "./dist/arc/profile-builder.d.ts",
22
- "import": "./dist/arc/profile-builder.js"
23
- },
24
- "./loop/vercel-agent-loop": {
25
- "types": "./dist/loop/vercel-agent-loop.d.ts",
26
- "import": "./dist/loop/vercel-agent-loop.js"
27
- },
28
- "./package.json": "./package.json"
29
- },
30
6
  "scripts": {
31
- "build": "tsup",
32
- "prepack": "pnpm run build",
33
7
  "test": "vitest run",
34
8
  "test:watch": "vitest"
35
9
  },
36
10
  "dependencies": {
37
11
  "@ai-sdk/anthropic": "^3.0.48",
38
12
  "ai": "^6.0.101",
39
- "rxdb": "^15.39.0",
40
13
  "zod": "^4.1.11"
41
14
  },
42
15
  "devDependencies": {
43
16
  "@types/node": "^24.3.0",
44
- "tsup": "^8.5.1",
45
17
  "typescript": "^5.9.2",
46
18
  "vitest": "^3.2.4"
47
19
  },