@bluecopa/harness 0.1.0-snapshot.38 → 0.1.0-snapshot.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/README.md +117 -212
  2. package/package.json +1 -2
  3. package/src/agent/create-agent.ts +2 -15
  4. package/src/agent/types.ts +2 -15
  5. package/src/loop/context-store.ts +9 -12
  6. package/src/loop/vercel-agent-loop.ts +17 -28
  7. package/src/skills/skill-router.ts +6 -12
  8. package/tests/integration/agent-skill-default-from-sandbox.spec.ts +2 -3
  9. package/tests/unit/structured-messages.spec.ts +1 -1
  10. package/vitest.config.ts +1 -1
  11. package/src/arc/agent-runner.ts +0 -683
  12. package/src/arc/arc-loop.ts +0 -775
  13. package/src/arc/arc-types.ts +0 -115
  14. package/src/arc/bridge-tools.ts +0 -170
  15. package/src/arc/bridged-tool-provider.ts +0 -80
  16. package/src/arc/consolidation.ts +0 -118
  17. package/src/arc/context-window.ts +0 -267
  18. package/src/arc/create-arc-agent.ts +0 -99
  19. package/src/arc/debug.ts +0 -62
  20. package/src/arc/episode-compressor.ts +0 -225
  21. package/src/arc/memory-manager.ts +0 -245
  22. package/src/arc/message-convert.ts +0 -111
  23. package/src/arc/object-store/fs-object-store.ts +0 -60
  24. package/src/arc/object-store/memory-object-store.ts +0 -41
  25. package/src/arc/object-store/object-store.ts +0 -12
  26. package/src/arc/profile-builder.ts +0 -157
  27. package/src/arc/resilience/bulkhead.ts +0 -110
  28. package/src/arc/resilience/circuit-breaker.ts +0 -112
  29. package/src/arc/resilience/fallback.ts +0 -27
  30. package/src/arc/resilience/index.ts +0 -21
  31. package/src/arc/resilience/pipeline.ts +0 -103
  32. package/src/arc/resilience/retry.ts +0 -90
  33. package/src/arc/resilience/timeout.ts +0 -60
  34. package/src/arc/resilience/types.ts +0 -71
  35. package/src/arc/sig.ts +0 -115
  36. package/src/arc/skill-resolver.ts +0 -78
  37. package/src/arc/stores/episode-store.ts +0 -120
  38. package/src/arc/stores/long-term-store.ts +0 -86
  39. package/src/arc/stores/rxdb-setup.ts +0 -113
  40. package/src/arc/stores/session-memo-store.ts +0 -58
  41. package/src/arc/tools.ts +0 -67
  42. package/src/arc/types.ts +0 -324
  43. package/src/arc/utils.ts +0 -19
  44. package/testing/index.ts +0 -22
  45. package/testing/scenario-replay.ts +0 -209
  46. package/testing/scenario-types.ts +0 -38
  47. package/testing/scripted-llm.ts +0 -230
  48. package/tests/arc/channel.test.ts +0 -170
  49. package/tests/arc/context-window.test.ts +0 -396
  50. package/tests/arc/e2e.test.ts +0 -353
  51. package/tests/arc/error-paths.test.ts +0 -402
  52. package/tests/arc/live-integration.test.ts +0 -357
  53. package/tests/arc/memory-manager.test.ts +0 -384
  54. package/tests/arc/process-interleaving.test.ts +0 -432
  55. package/tests/arc/process-profiles.test.ts +0 -364
  56. package/tests/arc/resilience-integration.test.ts +0 -381
  57. package/tests/arc/resilience.test.ts +0 -575
  58. package/tests/arc/scenario-driven.test.ts +0 -297
  59. package/tests/arc/tool-dispatch.test.ts +0 -340
  60. package/tests/arc/wasm-pbt.test.ts +0 -104
  61. package/verify/Cargo.lock +0 -637
  62. package/verify/Cargo.toml +0 -24
  63. package/verify/src/lib.rs +0 -5
  64. package/verify/src/main.rs +0 -165
  65. package/verify/src/model/context.rs +0 -100
  66. package/verify/src/model/mod.rs +0 -6
  67. package/verify/src/model/orchestrator.rs +0 -371
  68. package/verify/src/model/process.rs +0 -140
  69. package/verify/src/model/types.rs +0 -273
  70. package/verify/src/properties/liveness.rs +0 -32
  71. package/verify/src/properties/mod.rs +0 -4
  72. package/verify/src/properties/safety.rs +0 -78
  73. package/verify/src/trace/event.rs +0 -155
  74. package/verify/src/trace/mod.rs +0 -2
  75. package/verify/src/trace/validator.rs +0 -367
  76. package/verify/src/wasm/mod.rs +0 -3
  77. package/verify/src/wasm/scenario_generator.rs +0 -400
  78. package/verify/src/wasm/types.rs +0 -104
  79. package/verify/src/wasm/wasm_validator.rs +0 -107
  80. package/verify/tests/model_check.rs +0 -49
  81. package/verify/tests/trace_validation.rs +0 -147
package/README.md CHANGED
@@ -2,17 +2,9 @@
2
2
 
3
3
  Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
4
4
 
5
- Published on npm as **`@bluecopa/harness`**.
5
+ The harness provides the core loop that drives an AI agent: send messages to an LLM, execute the tool calls it returns, feed results back, and repeat until the LLM produces a final text response.
6
6
 
7
- Two execution modes: a simple single-agent loop (`createAgent` + `VercelAgentLoop`) and a process-based orchestrator (`ArcLoop`) that dispatches parallel processes with context management, memory, and resilience.
8
-
9
- ## Install
10
-
11
- ```bash
12
- pnpm add @bluecopa/harness
13
- ```
14
-
15
- ## Development
7
+ ## Quickstart
16
8
 
17
9
  ```bash
18
10
  pnpm install
@@ -21,11 +13,9 @@ pnpm test
21
13
 
22
14
  ## Architecture
23
15
 
24
- ### Single-Agent Loop
25
-
26
16
  ```
27
17
  ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
28
- │ createAgent │────►│ AgentLoop │────►│ LLM (Claude) │
18
+ │ createAgent │────▶│ AgentLoop │────▶│ LLM (Claude) │
29
19
  │ (turn loop) │ │ (nextAction)│ │ │
30
20
  └──────┬───────┘ └──────────────┘ └──────────────────┘
31
21
  │ │
@@ -37,82 +27,20 @@ pnpm test
37
27
  └──────────────┘
38
28
  ```
39
29
 
40
- ### ArcLoop Orchestrator
41
-
42
- ```
43
- Orchestrator (ArcLoop Opus 4.6 by default)
44
- │ tools: Thread, Check, Cancel, Remember, ReadEpisode
45
-
46
- │ Turn 1 (parallel):
47
- ├──► Process 0 ("read auth", model=fast) ─┐
48
- ├──► Process 1 ("read routes", model=fast) ─┼──► Episodes
49
- ├──► Process 2 ("read tests", model=fast) ─┘
50
-
51
- │ Turn 2 (dispatch dependent work):
52
- ├──► Thread("fix bug", context=[ep0,ep1,ep2]) ──► Episode
53
-
54
- │ Turn 3 (parallel):
55
- ├──► Thread("run tests", context=[ep3]) ─┐
56
- ├──► Thread("update docs", context=[ep3]) ─┘
57
-
58
- └──► Final text response
59
- ```
60
-
61
- Full architecture doc: [`docs/arc.md`](../docs/arc.md)
62
-
63
- ---
64
-
65
- ## ToolProvider
66
-
67
- The contract for tool execution. All agent modes use this interface.
68
-
69
- ```typescript
70
- interface ToolProvider {
71
- bash(command: string, options?: BashOptions): Promise<ToolResult>;
72
- readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
73
- writeFile(path: string, content: string): Promise<ToolResult>;
74
- editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
75
- glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
76
- grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
77
- webFetch?(options: WebFetchOptions): Promise<ToolResult>;
78
- webSearch?(query: string): Promise<ToolResult>;
79
- capabilities(): ToolProviderCapabilities;
80
- }
81
-
82
- interface ToolResult {
83
- success: boolean;
84
- output: string;
85
- error?: string;
86
- }
87
- ```
88
-
89
- Built-in implementations:
30
+ 1. `createAgent` drives a deterministic step loop
31
+ 2. Each step calls `loop.nextAction(messages)` to get the LLM's decision
32
+ 3. If it's a tool call, the harness executes it via `ToolProvider` and appends the result
33
+ 4. If it's a final action, the loop ends and returns the result
90
34
 
91
- | Provider | Description |
92
- |----------|-------------|
93
- | `LocalToolProvider` | Runs tools on the local filesystem |
94
- | `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
95
- | `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
35
+ ## Using with the sandbox
96
36
 
97
- ## SandboxProvider
37
+ The most common setup connects the harness to a running sandbox service via `ControlPlaneE2BExecutor`:
98
38
 
99
- Higher-level sandbox operations beyond basic tool calls:
100
-
101
- ```typescript
102
- interface SandboxProvider {
103
- exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
104
- readSandboxFile(path: string): Promise<SandboxFileBlob>;
105
- writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
106
- }
107
- ```
108
-
109
- Used by `SkillManager` for executing skill scripts in isolated VMs.
110
-
111
- ## Connecting to a Sandbox
112
-
113
- ```typescript
114
- import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
39
+ ```ts
40
+ import { createAgent } from './src/agent/create-agent';
115
41
  import { E2BToolProvider } from './src/providers/e2b-tool-provider';
42
+ import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
43
+ import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
116
44
 
117
45
  // Connect to sandbox service
118
46
  const executor = new ControlPlaneE2BExecutor({
@@ -122,174 +50,155 @@ const executor = new ControlPlaneE2BExecutor({
122
50
  });
123
51
  await executor.initialize(); // creates a Firecracker VM
124
52
 
125
- const toolProvider = new E2BToolProvider(executor);
53
+ // Build and run the agent
54
+ const agent = createAgent({
55
+ toolProvider: new E2BToolProvider(executor),
56
+ loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
57
+ });
126
58
 
127
- // ... use with createAgent or ArcLoop
59
+ const result = await agent.run('create a bar chart of sales data');
60
+ console.log(result.output); // LLM's final response
61
+ console.log(result.steps); // number of tool steps
128
62
 
129
- await executor.destroy(); // tears down the VM
63
+ await executor.destroy(); // tears down the VM
130
64
  ```
131
65
 
132
- From environment variables: `ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY`.
66
+ For a complete working example, see [`examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts).
67
+
68
+ ### From environment variables
133
69
 
134
- ---
70
+ `ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY` automatically:
135
71
 
136
- ## Single-Agent Mode (`createAgent`)
72
+ ```ts
73
+ const executor = ControlPlaneE2BExecutor.fromEnv();
74
+ ```
137
75
 
138
- For simple tasks that don't need orchestration:
76
+ ## Using locally (no sandbox)
139
77
 
140
- ```typescript
78
+ For development without a sandbox service, use `LocalToolProvider` which runs tools on the local machine:
79
+
80
+ ```ts
141
81
  import { createAgent } from './src/agent/create-agent';
142
82
  import { LocalToolProvider } from './src/providers/local-tool-provider';
143
83
 
144
84
  const agent = createAgent({
145
85
  toolProvider: new LocalToolProvider(process.cwd()),
146
- loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
86
+ loop: new VercelAgentLoop(),
147
87
  });
148
88
 
149
89
  const result = await agent.run('list all TypeScript files');
150
- console.log(result.output);
151
90
  ```
152
91
 
153
- ### Configuration
92
+ ## Key modules
93
+
94
+ ### Agent creation (`src/agent/create-agent.ts`)
154
95
 
155
- | Option | Type | Default | Description |
156
- |--------|------|---------|-------------|
157
- | `toolProvider` | `ToolProvider` | required | Executes tool calls |
158
- | `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
159
- | `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
160
- | `maxSteps` | `number` | 30 | Max tool steps per run |
161
- | `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
162
- | `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
96
+ `createAgent(options)` returns an agent with a `.run(prompt, options?)` method. Options:
163
97
 
164
- ### VercelAgentLoop
98
+ | Option | Type | Description |
99
+ |--------|------|-------------|
100
+ | `toolProvider` | `ToolProvider` | Required. Executes tool calls |
101
+ | `loop` | `AgentLoop` | LLM decision loop (default: `VercelAgentLoop`) |
102
+ | `sandboxProvider` | `SandboxProvider` | Optional. Higher-level sandbox ops (file download, exec with env) |
103
+ | `maxSteps` | `number` | Max tool steps per run (default: 30) |
104
+ | `telemetry` | `HarnessTelemetry` | Optional. OpenTelemetry-style tracing |
105
+ | `skillIndexPath` | `string` | Optional. Path to skill index JSON |
165
106
 
166
- Calls Claude via the Vercel AI SDK. Supports parallel tool calls and configurable system prompt.
107
+ ### Agent loop (`src/loop/vercel-agent-loop.ts`)
167
108
 
168
- ```typescript
109
+ `VercelAgentLoop` calls Claude via the Vercel AI SDK. It supports:
110
+ - Parallel tool calls (returns `ToolBatchAction` when the LLM requests multiple tools at once)
111
+ - Configurable system prompt
112
+ - Model selection via `HARNESS_MODEL` env var (default: `claude-sonnet-4-5`)
113
+
114
+ ```ts
169
115
  const loop = new VercelAgentLoop({
170
116
  systemPrompt: 'You are a helpful coding assistant.',
171
- model: 'claude-sonnet-4-5', // or HARNESS_MODEL env var
172
117
  });
173
118
  ```
174
119
 
175
- ### LCMToolLoop
120
+ ### Tool provider (`src/interfaces/tool-provider.ts`)
176
121
 
177
- Wraps another loop to add Lossless Context Management and optional REPL orchestration:
122
+ The contract for tool execution:
178
123
 
179
- ```typescript
180
- import { LCMToolLoop } from './src/loop/lcm-tool-loop';
181
- import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
124
+ ```ts
125
+ interface ToolProvider {
126
+ bash(command: string, options?: BashOptions): Promise<ToolResult>;
127
+ readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
128
+ writeFile(path: string, content: string): Promise<ToolResult>;
129
+ editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
130
+ glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
131
+ grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
132
+ webFetch?(options: WebFetchOptions): Promise<ToolResult>;
133
+ webSearch?(query: string): Promise<ToolResult>;
134
+ capabilities(): ToolProviderCapabilities;
135
+ }
182
136
 
183
- const loop = new LCMToolLoop({
184
- innerLoop: new VercelAgentLoop(),
185
- toolProvider: mySandboxProvider,
186
- enableRepl: true, // default: true
187
- bridgeDir: '/var/run/bridge',
188
- onActivity: (entry) => console.log(entry),
189
- onLlmRequest: async (prompt) => callLLM(prompt),
190
- onWebFetchRequest: async (url) => fetch(url),
191
- });
137
+ interface ToolResult {
138
+ success: boolean;
139
+ output: string;
140
+ error?: string;
141
+ }
192
142
  ```
193
143
 
194
- **Standard mode**: Lossless context trimming — the LLM always sees a coherent, budget-fitting view of the full conversation.
195
-
196
- **REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
197
-
198
- ---
144
+ Built-in implementations:
199
145
 
200
- ## ArcLoop (Orchestrator Mode)
146
+ | Provider | Description |
147
+ |----------|-------------|
148
+ | `LocalToolProvider` | Runs tools on the local filesystem |
149
+ | `E2BToolProvider` | Routes tools to an E2B-compatible executor over HTTP |
150
+ | `CompositeToolProvider` | Combines multiple providers (e.g. sandbox + web) |
201
151
 
202
- For complex tasks that benefit from parallel processes, context management, and memory:
152
+ ### Action types (`src/agent/types.ts`)
203
153
 
204
- ```typescript
205
- import { createArcAgent } from './src/arc/create-arc-agent';
154
+ The LLM returns one of these action types each turn:
206
155
 
207
- const agent = await createArcAgent({
208
- toolProvider: myToolProvider,
209
- episodeStore: myEpisodeStore, // required
210
- sessionMemoStore: mySessionMemoStore, // required
211
- longTermStore: myLongTermStore, // required
212
- taskId: 'task-1',
213
- sessionId: 'session-1',
214
- });
156
+ ```ts
157
+ // Single tool call
158
+ interface ToolCallAction {
159
+ type: 'tool';
160
+ name: 'Bash' | 'Read' | 'Write' | 'Edit' | 'Glob' | 'Grep' | ...;
161
+ args: Record<string, unknown>;
162
+ }
215
163
 
216
- // Streaming
217
- for await (const event of agent.stream(messages, signal)) {
218
- if (event.type === 'text_delta') process.stdout.write(event.text);
219
- if (event.type === 'process_dispatched') console.log(` → ${event.action}`);
220
- if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
164
+ // Multiple independent tool calls (executed in parallel)
165
+ interface ToolBatchAction {
166
+ type: 'tool_batch';
167
+ calls: ToolCallAction[];
221
168
  }
222
169
 
223
- // Non-streaming
224
- const result = await agent.run(messages, signal);
170
+ // Final text response (ends the loop)
171
+ interface FinalAction {
172
+ type: 'final';
173
+ content: string;
174
+ }
225
175
  ```
226
176
 
227
- ### ArcLoopConfig
228
-
229
- | Option | Type | Default | Description |
230
- |--------|------|---------|-------------|
231
- | `model` | `string` | `'claude-opus-4-6'` | Orchestrator model (ID or tier name) |
232
- | `modelMap` | `Record<ModelTier, string>` | haiku/sonnet/opus | Maps fast/medium/strong to model IDs |
233
- | `apiKey` | `string` | — | Anthropic API key |
234
- | `systemPrompt` | `string` | built-in | Custom orchestrator system prompt |
235
- | `maxTurns` | `number` | 30 | Max orchestrator turns |
236
- | `processTimeout` | `number` | 120_000 | Per-process timeout (ms) |
237
- | `processMaxSteps` | `number` | 20 | Per-process max tool steps |
238
- | `contextWindowSize` | `number` | 200_000 | Context window in tokens |
239
- | `outputReserve` | `number` | 20_000 | Tokens reserved for output |
240
- | `autoMemory` | `boolean` | true | Auto-detect patterns from episodes |
241
- | `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
242
- | `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
243
- | `longTermStore` | `LongTermStore` | required | Stores long-term memories |
244
- | `taskId` | `string` | required | Task identifier |
245
- | `sessionId` | `string` | required | Session identifier |
246
- | `toolProvider` | `ToolProvider` | required | Tool execution |
247
- | `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
248
- | `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
249
- | `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
250
- | `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
251
- | `traceWriter` | `function` | — | Callback for trace event emission |
252
-
253
- ### Resilience
254
-
255
- ```typescript
256
- import { resilience } from './src/arc/resilience';
257
-
258
- const pipeline = resilience()
259
- .retry({ maxRetries: 2, baseDelay: 1000 })
260
- .timeout({ durationMs: 30_000 })
261
- .circuitBreaker({ failureThreshold: 5 })
262
- .build();
263
-
264
- const agent = await createArcAgent({
265
- // ...config
266
- resilience: pipeline,
267
- });
268
- ```
177
+ ### LCM tool loop (`src/loop/lcm-tool-loop.ts`)
269
178
 
270
- ### Trace Emission
179
+ `LCMToolLoop` wraps another loop to add LCM-based tool routing, REPL script execution, and bridge-based tool dispatch. Used in the chat-assistant example.
271
180
 
272
- ```typescript
273
- const traces: TraceEvent[] = [];
274
- const agent = await createArcAgent({
275
- // ...config
276
- traceWriter: (event) => traces.push(event),
277
- });
181
+ ### Sandbox provider (`src/interfaces/sandbox-provider.ts`)
182
+
183
+ Higher-level sandbox operations beyond basic tool calls:
184
+
185
+ ```ts
186
+ interface SandboxProvider {
187
+ exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
188
+ readSandboxFile(path: string): Promise<SandboxFileBlob>;
189
+ writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
190
+ }
278
191
  ```
279
192
 
280
- Traces can be validated against the formal model: `cd verify && cargo run -- trace file.ndjson`
193
+ ### Observability (`src/observability/otel.ts`)
281
194
 
282
- ---
195
+ `HarnessTelemetry` provides OpenTelemetry-style spans and metrics for agent runs.
283
196
 
284
- ## Package Layout
197
+ ## Package layout
285
198
 
286
199
  ```
287
200
  src/
288
201
  ├── agent/ # createAgent, step executor, types
289
- ├── arc/ # ArcLoop orchestrator, processes, memory, resilience
290
- │ ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
291
- │ ├── stores/ # RxDB + in-memory store implementations
292
- │ └── object-store/ # Pluggable cloud sync (fs, memory)
293
202
  ├── interfaces/ # ToolProvider, SandboxProvider, AgentLoop contracts
294
203
  ├── loop/ # VercelAgentLoop, LCMToolLoop
295
204
  ├── providers/ # LocalToolProvider, E2BToolProvider, ControlPlaneE2BExecutor
@@ -297,20 +206,16 @@ src/
297
206
  ├── hooks/ # Pre/post tool call hooks
298
207
  ├── permissions/ # Tool permission checks
299
208
  ├── sessions/ # Session persistence
300
- ├── subagents/ # Subagent spawning
209
+ ├── subagents/ # Subagent spawning and task tools
301
210
  ├── skills/ # Skill index, routing, and management
302
211
  ├── optimization/ # Benchmark runner
303
212
  └── observability/ # OpenTelemetry integration
304
-
305
- verify/ # Rust formal verification (Stateright model checker)
306
- testing/ # Adversarial scenario replay harness
307
- tests/ # Vitest test suite
308
213
  ```
309
214
 
310
215
  ## Documentation
311
216
 
312
- - [Arc architecture](../docs/arc.md) — process model, context window, memory, resilience, verification
313
- - [Testing](../docs/testing.md) — test layers, running tests, writing new tests
314
- - [Sandbox setup](../docs/PUBLIC_SANDBOX.md) — deploying the sandbox service
315
- - [Release process](../docs/RELEASE.md) — versioning and publishing
316
- - [Example](../examples/chat-assistant/src/chat.ts) — complete working chat assistant
217
+ - Provider guide: `docs/guides/providers.md`
218
+ - Skills guide: `docs/guides/skills.md`
219
+ - Observability guide: `docs/guides/observability.md`
220
+ - Release process: `../docs/RELEASE.md`
221
+ - Full example: [`../examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bluecopa/harness",
3
- "version": "0.1.0-snapshot.38",
3
+ "version": "0.1.0-snapshot.4",
4
4
  "description": "Provider-agnostic TypeScript agent framework",
5
5
  "license": "UNLICENSED",
6
6
  "scripts": {
@@ -10,7 +10,6 @@
10
10
  "dependencies": {
11
11
  "@ai-sdk/anthropic": "^3.0.48",
12
12
  "ai": "^6.0.101",
13
- "rxdb": "^15.39.0",
14
13
  "zod": "^4.1.11"
15
14
  },
16
15
  "devDependencies": {
@@ -37,8 +37,6 @@ export interface AgentRuntime {
37
37
  /** Custom tool executor. Called for every tool action. Return null to fall through to built-in dispatch.
38
38
  * When hookRunner/permissionManager are provided on the runtime, they are automatically applied before/after this callback — no manual wiring needed. */
39
39
  executeToolAction?: (action: ToolCallAction) => Promise<ToolResult | null>;
40
- /** Progress callback fired before/after each tool call during run(). */
41
- onToolProgress?: (event: { type: 'tool_start'; name: string; args: Record<string, unknown> } | { type: 'tool_end'; name: string; success: boolean; durationMs: number }) => void;
42
40
  }
43
41
 
44
42
  /**
@@ -598,14 +596,10 @@ export function createAgent(runtime: AgentRuntime) {
598
596
 
599
597
  // Execute valid calls via batch (sequential sandbox ops) or parallel fallback
600
598
  if (validCalls.length > 0) {
601
- for (const c of validCalls) runtime.onToolProgress?.({ type: 'tool_start', name: c.name, args: c.args });
602
- const batchStart = Date.now();
603
599
  const results = await executeBatch(validCalls, runtime.toolProvider, runtime);
604
- const batchMs = Date.now() - batchStart;
605
600
  for (let i = 0; i < validCalls.length; i++) {
606
601
  const call = validCalls[i]!;
607
602
  const r = results[i]!;
608
- runtime.onToolProgress?.({ type: 'tool_end', name: call.name, success: r.success, durationMs: batchMs });
609
603
  if (!r.success) {
610
604
  recordAgentError(runtime.telemetry);
611
605
  }
@@ -665,8 +659,6 @@ export function createAgent(runtime: AgentRuntime) {
665
659
  } else {
666
660
  consecutiveInvalid = 0;
667
661
  }
668
- runtime.onToolProgress?.({ type: 'tool_start', name: action.name, args: action.args });
669
- const singleStart = Date.now();
670
662
  const result = validationError
671
663
  ? ({ success: false, output: '', error: validationError } as ToolResult)
672
664
  : await executor.run(async () => {
@@ -680,7 +672,6 @@ export function createAgent(runtime: AgentRuntime) {
680
672
  };
681
673
  }
682
674
  });
683
- runtime.onToolProgress?.({ type: 'tool_end', name: action.name, success: result.success, durationMs: Date.now() - singleStart });
684
675
  if (!result.success) {
685
676
  recordAgentError(runtime.telemetry);
686
677
  }
@@ -727,7 +718,8 @@ export function createAgent(runtime: AgentRuntime) {
727
718
  if (event.type === 'text_delta') {
728
719
  finalText += event.text;
729
720
  yield event;
730
- } else if (event.type === 'tool_start') {
721
+ }
722
+ if (event.type === 'tool_start') {
731
723
  pendingTools.push({
732
724
  type: 'tool',
733
725
  name: event.name,
@@ -735,11 +727,6 @@ export function createAgent(runtime: AgentRuntime) {
735
727
  ...(event.toolCallId != null ? { toolCallId: event.toolCallId } : {}),
736
728
  });
737
729
  yield event;
738
- } else {
739
- // Forward all other events (tool_end, step_start, step_end, done)
740
- // from self-managing loops like ArcLoop
741
- yield event;
742
- if (event.type === 'done') return;
743
730
  }
744
731
  }
745
732
 
@@ -11,26 +11,13 @@ export interface ToolResultInfo {
11
11
  isError?: boolean;
12
12
  }
13
13
 
14
- export type ContentPart =
15
- | { type: 'text'; text: string }
16
- | { type: 'image'; image: Buffer | Uint8Array; mimeType: string };
17
-
18
14
  export interface AgentMessage {
19
15
  role: 'system' | 'user' | 'assistant' | 'tool';
20
- content: string | ContentPart[];
16
+ content: string;
21
17
  toolCalls?: ToolCallInfo[]; // assistant messages: what tools were called
22
18
  toolResults?: ToolResultInfo[]; // tool messages: results keyed by toolCallId
23
19
  }
24
20
 
25
- /** Extract plain text from content (string or ContentPart[]). */
26
- export function getTextContent(content: string | ContentPart[]): string {
27
- if (typeof content === 'string') return content;
28
- return content
29
- .filter((p): p is Extract<ContentPart, { type: 'text' }> => p.type === 'text')
30
- .map((p) => p.text)
31
- .join('\n');
32
- }
33
-
34
21
  export interface ToolCallAction {
35
22
  type: 'tool';
36
23
  name: string;
@@ -59,7 +46,7 @@ export interface AgentRunResult {
59
46
  export type AgentStreamEvent =
60
47
  | { type: 'text_delta'; text: string }
61
48
  | { type: 'tool_start'; name: string; args: Record<string, unknown>; toolCallId?: string }
62
- | { type: 'tool_end'; name: string; result: { success: boolean; output: string; error?: string; [key: string]: unknown } }
49
+ | { type: 'tool_end'; name: string; result: { success: boolean; output: string; error?: string } }
63
50
  | { type: 'step_start'; step: number }
64
51
  | { type: 'step_end'; step: number }
65
52
  | { type: 'done'; output: string; steps: number };
@@ -1,5 +1,4 @@
1
1
  import type { AgentMessage } from '../agent/types';
2
- import { getTextContent } from '../agent/types';
3
2
 
4
3
  export interface ContextStoreConfig {
5
4
  /** Maximum token budget for the trimmed view. Default: 150_000 */
@@ -92,7 +91,7 @@ export class LosslessContextStore {
92
91
  for (let i = 0; i < this.raw.length; i++) {
93
92
  const existing = this.raw[i]!;
94
93
  const candidate = incoming[i]!;
95
- if (existing.role !== candidate.role || getTextContent(existing.content) !== getTextContent(candidate.content)) {
94
+ if (existing.role !== candidate.role || existing.content !== candidate.content) {
96
95
  return false;
97
96
  }
98
97
  }
@@ -102,7 +101,7 @@ export class LosslessContextStore {
102
101
  private estimateTokensFor(msgs: AgentMessage[]): number {
103
102
  let chars = 0;
104
103
  for (const m of msgs) {
105
- chars += getTextContent(m.content).length + m.role.length + 4; // role + separators
104
+ chars += m.content.length + m.role.length + 4; // role + separators
106
105
  }
107
106
  return Math.ceil(chars / CHARS_PER_TOKEN);
108
107
  }
@@ -131,10 +130,9 @@ export class LosslessContextStore {
131
130
  for (const m of hotZone) {
132
131
  // Tool results are formatted as "ToolName: output" by create-agent
133
132
  if (m.role === 'tool') {
134
- const text = getTextContent(m.content);
135
- const colonIdx = text.indexOf(':');
133
+ const colonIdx = m.content.indexOf(':');
136
134
  if (colonIdx > 0) {
137
- liveToolIds.add(text.slice(0, colonIdx));
135
+ liveToolIds.add(m.content.slice(0, colonIdx));
138
136
  }
139
137
  }
140
138
  }
@@ -166,10 +164,9 @@ export class LosslessContextStore {
166
164
 
167
165
  // Tool results in cold zone
168
166
  if (m.role === 'tool') {
169
- const rawText = getTextContent(m.content);
170
- const colonIdx = rawText.indexOf(':');
171
- const toolName = colonIdx > 0 ? rawText.slice(0, colonIdx) : '';
172
- const toolOutput = colonIdx > 0 ? rawText.slice(colonIdx + 2) : rawText;
167
+ const colonIdx = m.content.indexOf(':');
168
+ const toolName = colonIdx > 0 ? m.content.slice(0, colonIdx) : '';
169
+ const toolOutput = colonIdx > 0 ? m.content.slice(colonIdx + 2) : m.content;
173
170
 
174
171
  // Pass 3a: drop orphaned tool results (tool not referenced in hot zone and output is large)
175
172
  if (!liveToolIds.has(toolName) && toolOutput.length > this.stubThreshold * 2) {
@@ -177,7 +174,7 @@ export class LosslessContextStore {
177
174
  }
178
175
 
179
176
  // Pass 2: stub large tool outputs
180
- let content = rawText;
177
+ let content = m.content;
181
178
  if (toolOutput.length > this.stubThreshold) {
182
179
  content = `${toolName}: [output truncated: ${toolOutput.length} chars]`;
183
180
  }
@@ -197,7 +194,7 @@ export class LosslessContextStore {
197
194
  }
198
195
 
199
196
  private estimateMessageTokens(m: AgentMessage): number {
200
- return Math.ceil((getTextContent(m.content).length + m.role.length + 4) / CHARS_PER_TOKEN);
197
+ return Math.ceil((m.content.length + m.role.length + 4) / CHARS_PER_TOKEN);
201
198
  }
202
199
 
203
200
  private stripMechanicalOverhead(content: string): string {