@bluecopa/harness 0.1.0-snapshot.20 → 0.1.0-snapshot.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +208 -148
- package/package.json +1 -1
- package/src/arc/agent-runner.ts +14 -1
- package/src/arc/arc-loop.ts +58 -3
- package/src/arc/tools.ts +1 -0
- package/src/arc/types.ts +18 -0
- package/tests/arc/process-profiles.test.ts +364 -0
package/README.md
CHANGED
|
@@ -2,9 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Published on npm as **`@bluecopa/harness`**.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
Two execution modes: a simple single-agent loop (`createAgent` + `VercelAgentLoop`) and a process-based orchestrator (`ArcLoop`) that dispatches parallel processes with context management, memory, and resilience.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pnpm add @bluecopa/harness
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Development
|
|
8
16
|
|
|
9
17
|
```bash
|
|
10
18
|
pnpm install
|
|
@@ -13,9 +21,11 @@ pnpm test
|
|
|
13
21
|
|
|
14
22
|
## Architecture
|
|
15
23
|
|
|
24
|
+
### Single-Agent Loop
|
|
25
|
+
|
|
16
26
|
```
|
|
17
27
|
┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
|
|
18
|
-
│ createAgent
|
|
28
|
+
│ createAgent │────►│ AgentLoop │────►│ LLM (Claude) │
|
|
19
29
|
│ (turn loop) │ │ (nextAction)│ │ │
|
|
20
30
|
└──────┬───────┘ └──────────────┘ └──────────────────┘
|
|
21
31
|
│ │
|
|
@@ -27,20 +37,82 @@ pnpm test
|
|
|
27
37
|
└──────────────┘
|
|
28
38
|
```
|
|
29
39
|
|
|
30
|
-
|
|
31
|
-
2. Each step calls `loop.nextAction(messages)` to get the LLM's decision
|
|
32
|
-
3. If it's a tool call, the harness executes it via `ToolProvider` and appends the result
|
|
33
|
-
4. If it's a final action, the loop ends and returns the result
|
|
40
|
+
### ArcLoop Orchestrator
|
|
34
41
|
|
|
35
|
-
|
|
42
|
+
```
|
|
43
|
+
Orchestrator (ArcLoop — Opus 4.6 by default)
|
|
44
|
+
│ tools: Thread, Check, Cancel, Remember, ReadEpisode
|
|
45
|
+
│
|
|
46
|
+
│ Turn 1 (parallel):
|
|
47
|
+
├──► Process 0 ("read auth", model=fast) ─┐
|
|
48
|
+
├──► Process 1 ("read routes", model=fast) ─┼──► Episodes
|
|
49
|
+
├──► Process 2 ("read tests", model=fast) ─┘
|
|
50
|
+
│
|
|
51
|
+
│ Turn 2 (dispatch dependent work):
|
|
52
|
+
├──► Thread("fix bug", context=[ep0,ep1,ep2]) ──► Episode
|
|
53
|
+
│
|
|
54
|
+
│ Turn 3 (parallel):
|
|
55
|
+
├──► Thread("run tests", context=[ep3]) ─┐
|
|
56
|
+
├──► Thread("update docs", context=[ep3]) ─┘
|
|
57
|
+
│
|
|
58
|
+
└──► Final text response
|
|
59
|
+
```
|
|
36
60
|
|
|
37
|
-
|
|
61
|
+
Full architecture doc: [`docs/arc.md`](../docs/arc.md)
|
|
38
62
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## ToolProvider
|
|
66
|
+
|
|
67
|
+
The contract for tool execution. All agent modes use this interface.
|
|
68
|
+
|
|
69
|
+
```typescript
|
|
70
|
+
interface ToolProvider {
|
|
71
|
+
bash(command: string, options?: BashOptions): Promise<ToolResult>;
|
|
72
|
+
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
73
|
+
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
74
|
+
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
75
|
+
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
76
|
+
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
77
|
+
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
78
|
+
webSearch?(query: string): Promise<ToolResult>;
|
|
79
|
+
capabilities(): ToolProviderCapabilities;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
interface ToolResult {
|
|
83
|
+
success: boolean;
|
|
84
|
+
output: string;
|
|
85
|
+
error?: string;
|
|
86
|
+
}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Built-in implementations:
|
|
90
|
+
|
|
91
|
+
| Provider | Description |
|
|
92
|
+
|----------|-------------|
|
|
93
|
+
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
94
|
+
| `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
|
|
95
|
+
| `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
|
|
96
|
+
|
|
97
|
+
## SandboxProvider
|
|
98
|
+
|
|
99
|
+
Higher-level sandbox operations beyond basic tool calls:
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
interface SandboxProvider {
|
|
103
|
+
exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
|
|
104
|
+
readSandboxFile(path: string): Promise<SandboxFileBlob>;
|
|
105
|
+
writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Used by `SkillManager` for executing skill scripts in isolated VMs.
|
|
110
|
+
|
|
111
|
+
## Connecting to a Sandbox
|
|
112
|
+
|
|
113
|
+
```typescript
|
|
42
114
|
import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
|
|
43
|
-
import {
|
|
115
|
+
import { E2BToolProvider } from './src/providers/e2b-tool-provider';
|
|
44
116
|
|
|
45
117
|
// Connect to sandbox service
|
|
46
118
|
const executor = new ControlPlaneE2BExecutor({
|
|
@@ -50,187 +122,172 @@ const executor = new ControlPlaneE2BExecutor({
|
|
|
50
122
|
});
|
|
51
123
|
await executor.initialize(); // creates a Firecracker VM
|
|
52
124
|
|
|
53
|
-
|
|
54
|
-
const agent = createAgent({
|
|
55
|
-
toolProvider: new E2BToolProvider(executor),
|
|
56
|
-
loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
|
|
57
|
-
});
|
|
125
|
+
const toolProvider = new E2BToolProvider(executor);
|
|
58
126
|
|
|
59
|
-
|
|
60
|
-
console.log(result.output); // LLM's final response
|
|
61
|
-
console.log(result.steps); // number of tool steps
|
|
127
|
+
// ... use with createAgent or ArcLoop
|
|
62
128
|
|
|
63
|
-
await executor.destroy();
|
|
129
|
+
await executor.destroy(); // tears down the VM
|
|
64
130
|
```
|
|
65
131
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
### From environment variables
|
|
132
|
+
From environment variables: `ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY`.
|
|
69
133
|
|
|
70
|
-
|
|
134
|
+
---
|
|
71
135
|
|
|
72
|
-
|
|
73
|
-
const executor = ControlPlaneE2BExecutor.fromEnv();
|
|
74
|
-
```
|
|
75
|
-
|
|
76
|
-
## Using locally (no sandbox)
|
|
136
|
+
## Single-Agent Mode (`createAgent`)
|
|
77
137
|
|
|
78
|
-
For
|
|
138
|
+
For simple tasks that don't need orchestration:
|
|
79
139
|
|
|
80
|
-
```
|
|
140
|
+
```typescript
|
|
81
141
|
import { createAgent } from './src/agent/create-agent';
|
|
82
142
|
import { LocalToolProvider } from './src/providers/local-tool-provider';
|
|
83
143
|
|
|
84
144
|
const agent = createAgent({
|
|
85
145
|
toolProvider: new LocalToolProvider(process.cwd()),
|
|
86
|
-
loop: new VercelAgentLoop(),
|
|
146
|
+
loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
|
|
87
147
|
});
|
|
88
148
|
|
|
89
149
|
const result = await agent.run('list all TypeScript files');
|
|
150
|
+
console.log(result.output);
|
|
90
151
|
```
|
|
91
152
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
### Agent creation (`src/agent/create-agent.ts`)
|
|
153
|
+
### Configuration
|
|
95
154
|
|
|
96
|
-
|
|
155
|
+
| Option | Type | Default | Description |
|
|
156
|
+
|--------|------|---------|-------------|
|
|
157
|
+
| `toolProvider` | `ToolProvider` | required | Executes tool calls |
|
|
158
|
+
| `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
|
|
159
|
+
| `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
|
|
160
|
+
| `maxSteps` | `number` | 30 | Max tool steps per run |
|
|
161
|
+
| `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
|
|
162
|
+
| `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
|
|
97
163
|
|
|
98
|
-
|
|
99
|
-
|--------|------|-------------|
|
|
100
|
-
| `toolProvider` | `ToolProvider` | Required. Executes tool calls |
|
|
101
|
-
| `loop` | `AgentLoop` | LLM decision loop (default: `VercelAgentLoop`) |
|
|
102
|
-
| `sandboxProvider` | `SandboxProvider` | Optional. Higher-level sandbox ops (file download, exec with env) |
|
|
103
|
-
| `maxSteps` | `number` | Max tool steps per run (default: 30) |
|
|
104
|
-
| `telemetry` | `HarnessTelemetry` | Optional. OpenTelemetry-style tracing |
|
|
105
|
-
| `skillIndexPath` | `string` | Optional. Path to skill index JSON |
|
|
164
|
+
### VercelAgentLoop
|
|
106
165
|
|
|
107
|
-
|
|
166
|
+
Calls Claude via the Vercel AI SDK. Supports parallel tool calls and configurable system prompt.
|
|
108
167
|
|
|
109
|
-
|
|
110
|
-
- Parallel tool calls (returns `ToolBatchAction` when the LLM requests multiple tools at once)
|
|
111
|
-
- Configurable system prompt
|
|
112
|
-
- Model selection via `HARNESS_MODEL` env var (default: `claude-sonnet-4-5`)
|
|
113
|
-
|
|
114
|
-
```ts
|
|
168
|
+
```typescript
|
|
115
169
|
const loop = new VercelAgentLoop({
|
|
116
170
|
systemPrompt: 'You are a helpful coding assistant.',
|
|
171
|
+
model: 'claude-sonnet-4-5', // or HARNESS_MODEL env var
|
|
117
172
|
});
|
|
118
173
|
```
|
|
119
174
|
|
|
120
|
-
###
|
|
175
|
+
### LCMToolLoop
|
|
121
176
|
|
|
122
|
-
|
|
177
|
+
Wraps another loop to add Lossless Context Management and optional REPL orchestration:
|
|
123
178
|
|
|
124
|
-
```
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
|
|
128
|
-
writeFile(path: string, content: string): Promise<ToolResult>;
|
|
129
|
-
editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
|
|
130
|
-
glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
|
|
131
|
-
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
|
|
132
|
-
webFetch?(options: WebFetchOptions): Promise<ToolResult>;
|
|
133
|
-
webSearch?(query: string): Promise<ToolResult>;
|
|
134
|
-
capabilities(): ToolProviderCapabilities;
|
|
135
|
-
}
|
|
179
|
+
```typescript
|
|
180
|
+
import { LCMToolLoop } from './src/loop/lcm-tool-loop';
|
|
181
|
+
import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
|
|
136
182
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
183
|
+
const loop = new LCMToolLoop({
|
|
184
|
+
innerLoop: new VercelAgentLoop(),
|
|
185
|
+
toolProvider: mySandboxProvider,
|
|
186
|
+
enableRepl: true, // default: true
|
|
187
|
+
bridgeDir: '/var/run/bridge',
|
|
188
|
+
onActivity: (entry) => console.log(entry),
|
|
189
|
+
onLlmRequest: async (prompt) => callLLM(prompt),
|
|
190
|
+
onWebFetchRequest: async (url) => fetch(url),
|
|
191
|
+
});
|
|
142
192
|
```
|
|
143
193
|
|
|
144
|
-
|
|
194
|
+
**Standard mode**: Lossless context trimming — the LLM always sees a coherent, budget-fitting view of the full conversation.
|
|
145
195
|
|
|
146
|
-
|
|
147
|
-
|----------|-------------|
|
|
148
|
-
| `LocalToolProvider` | Runs tools on the local filesystem |
|
|
149
|
-
| `E2BToolProvider` | Routes tools to an E2B-compatible executor over HTTP |
|
|
150
|
-
| `CompositeToolProvider` | Combines multiple providers (e.g. sandbox + web) |
|
|
196
|
+
**REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
|
|
151
197
|
|
|
152
|
-
|
|
198
|
+
---
|
|
153
199
|
|
|
154
|
-
|
|
200
|
+
## ArcLoop (Orchestrator Mode)
|
|
155
201
|
|
|
156
|
-
|
|
157
|
-
// Single tool call
|
|
158
|
-
interface ToolCallAction {
|
|
159
|
-
type: 'tool';
|
|
160
|
-
name: 'Bash' | 'Read' | 'Write' | 'Edit' | 'Glob' | 'Grep' | ...;
|
|
161
|
-
args: Record<string, unknown>;
|
|
162
|
-
}
|
|
202
|
+
For complex tasks that benefit from parallel processes, context management, and memory:
|
|
163
203
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
type: 'tool_batch';
|
|
167
|
-
calls: ToolCallAction[];
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
// Final text response (ends the loop)
|
|
171
|
-
interface FinalAction {
|
|
172
|
-
type: 'final';
|
|
173
|
-
content: string;
|
|
174
|
-
}
|
|
175
|
-
```
|
|
176
|
-
|
|
177
|
-
### LCM tool loop (`src/loop/lcm-tool-loop.ts`)
|
|
178
|
-
|
|
179
|
-
`LCMToolLoop` wraps another loop to add LCM-based tool routing, REPL script execution, and bridge-based tool dispatch. Used in the chat-assistant example.
|
|
180
|
-
|
|
181
|
-
### Sandbox provider (`src/interfaces/sandbox-provider.ts`)
|
|
204
|
+
```typescript
|
|
205
|
+
import { createArcAgent } from './src/arc/create-arc-agent';
|
|
182
206
|
|
|
183
|
-
|
|
207
|
+
const agent = await createArcAgent({
|
|
208
|
+
toolProvider: myToolProvider,
|
|
209
|
+
episodeStore: myEpisodeStore, // required
|
|
210
|
+
sessionMemoStore: mySessionMemoStore, // required
|
|
211
|
+
longTermStore: myLongTermStore, // required
|
|
212
|
+
taskId: 'task-1',
|
|
213
|
+
sessionId: 'session-1',
|
|
214
|
+
});
|
|
184
215
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
216
|
+
// Streaming
|
|
217
|
+
for await (const event of agent.stream(messages, signal)) {
|
|
218
|
+
if (event.type === 'text_delta') process.stdout.write(event.text);
|
|
219
|
+
if (event.type === 'process_dispatched') console.log(` → ${event.action}`);
|
|
220
|
+
if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
|
|
190
221
|
}
|
|
191
|
-
```
|
|
192
|
-
|
|
193
|
-
### Observability (`src/observability/otel.ts`)
|
|
194
|
-
|
|
195
|
-
`HarnessTelemetry` provides OpenTelemetry-style spans and metrics for agent runs.
|
|
196
222
|
|
|
197
|
-
|
|
223
|
+
// Non-streaming
|
|
224
|
+
const result = await agent.run(messages, signal);
|
|
225
|
+
```
|
|
198
226
|
|
|
199
|
-
|
|
227
|
+
### ArcLoopConfig
|
|
228
|
+
|
|
229
|
+
| Option | Type | Default | Description |
|
|
230
|
+
|--------|------|---------|-------------|
|
|
231
|
+
| `model` | `string` | `'claude-opus-4-6'` | Orchestrator model (ID or tier name) |
|
|
232
|
+
| `modelMap` | `Record<ModelTier, string>` | haiku/sonnet/opus | Maps fast/medium/strong to model IDs |
|
|
233
|
+
| `apiKey` | `string` | — | Anthropic API key |
|
|
234
|
+
| `systemPrompt` | `string` | built-in | Custom orchestrator system prompt |
|
|
235
|
+
| `maxTurns` | `number` | 30 | Max orchestrator turns |
|
|
236
|
+
| `processTimeout` | `number` | 120_000 | Per-process timeout (ms) |
|
|
237
|
+
| `processMaxSteps` | `number` | 20 | Per-process max tool steps |
|
|
238
|
+
| `contextWindowSize` | `number` | 200_000 | Context window in tokens |
|
|
239
|
+
| `outputReserve` | `number` | 20_000 | Tokens reserved for output |
|
|
240
|
+
| `autoMemory` | `boolean` | true | Auto-detect patterns from episodes |
|
|
241
|
+
| `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
|
|
242
|
+
| `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
|
|
243
|
+
| `longTermStore` | `LongTermStore` | required | Stores long-term memories |
|
|
244
|
+
| `taskId` | `string` | required | Task identifier |
|
|
245
|
+
| `sessionId` | `string` | required | Session identifier |
|
|
246
|
+
| `toolProvider` | `ToolProvider` | required | Tool execution |
|
|
247
|
+
| `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
|
|
248
|
+
| `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
|
|
249
|
+
| `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
|
|
250
|
+
| `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
|
|
251
|
+
| `traceWriter` | `function` | — | Callback for trace event emission |
|
|
252
|
+
|
|
253
|
+
### Resilience
|
|
254
|
+
|
|
255
|
+
```typescript
|
|
256
|
+
import { resilience } from './src/arc/resilience';
|
|
257
|
+
|
|
258
|
+
const pipeline = resilience()
|
|
259
|
+
.retry({ maxRetries: 2, baseDelay: 1000 })
|
|
260
|
+
.timeout({ durationMs: 30_000 })
|
|
261
|
+
.circuitBreaker({ failureThreshold: 5 })
|
|
262
|
+
.build();
|
|
263
|
+
|
|
264
|
+
const agent = await createArcAgent({
|
|
265
|
+
// ...config
|
|
266
|
+
resilience: pipeline,
|
|
267
|
+
});
|
|
268
|
+
```
|
|
200
269
|
|
|
201
|
-
|
|
202
|
-
import { createArcAgent } from './src/arc/create-arc-agent';
|
|
203
|
-
import { InMemoryEpisodeStore } from './src/arc/stores/episode-store';
|
|
204
|
-
import { InMemorySessionMemoStore } from './src/arc/stores/session-memo-store';
|
|
205
|
-
import { InMemoryLongTermStore } from './src/arc/stores/long-term-store';
|
|
270
|
+
### Trace Emission
|
|
206
271
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
taskId: 'task-1',
|
|
213
|
-
sessionId: 'session-1',
|
|
272
|
+
```typescript
|
|
273
|
+
const traces: TraceEvent[] = [];
|
|
274
|
+
const agent = await createArcAgent({
|
|
275
|
+
// ...config
|
|
276
|
+
traceWriter: (event) => traces.push(event),
|
|
214
277
|
});
|
|
215
|
-
|
|
216
|
-
const result = await agent.run('Fix the authentication bug');
|
|
217
278
|
```
|
|
218
279
|
|
|
219
|
-
|
|
220
|
-
- **Parallel threads**: orchestrator calls Thread N times in one turn → all run concurrently
|
|
221
|
-
- **Four-tier memory**: thread context → episodes → session memos → long-term
|
|
222
|
-
- **Per-thread models**: Haiku for reads, Sonnet for implementation
|
|
223
|
-
- **Template compression**: zero-LLM-call episode summaries
|
|
224
|
-
- **Async consolidation**: non-blocking background distillation
|
|
280
|
+
Traces can be validated against the formal model: `cd verify && cargo run -- trace file.ndjson`
|
|
225
281
|
|
|
226
|
-
|
|
282
|
+
---
|
|
227
283
|
|
|
228
|
-
## Package
|
|
284
|
+
## Package Layout
|
|
229
285
|
|
|
230
286
|
```
|
|
231
287
|
src/
|
|
232
288
|
├── agent/ # createAgent, step executor, types
|
|
233
|
-
├── arc/ # ArcLoop orchestrator,
|
|
289
|
+
├── arc/ # ArcLoop orchestrator, processes, memory, resilience
|
|
290
|
+
│ ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
|
|
234
291
|
│ ├── stores/ # RxDB + in-memory store implementations
|
|
235
292
|
│ └── object-store/ # Pluggable cloud sync (fs, memory)
|
|
236
293
|
├── interfaces/ # ToolProvider, SandboxProvider, AgentLoop contracts
|
|
@@ -240,17 +297,20 @@ src/
|
|
|
240
297
|
├── hooks/ # Pre/post tool call hooks
|
|
241
298
|
├── permissions/ # Tool permission checks
|
|
242
299
|
├── sessions/ # Session persistence
|
|
243
|
-
├── subagents/ # Subagent spawning
|
|
300
|
+
├── subagents/ # Subagent spawning
|
|
244
301
|
├── skills/ # Skill index, routing, and management
|
|
245
302
|
├── optimization/ # Benchmark runner
|
|
246
303
|
└── observability/ # OpenTelemetry integration
|
|
304
|
+
|
|
305
|
+
verify/ # Rust formal verification (Stateright model checker)
|
|
306
|
+
testing/ # Adversarial scenario replay harness
|
|
307
|
+
tests/ # Vitest test suite
|
|
247
308
|
```
|
|
248
309
|
|
|
249
310
|
## Documentation
|
|
250
311
|
|
|
251
|
-
-
|
|
252
|
-
-
|
|
253
|
-
-
|
|
254
|
-
-
|
|
255
|
-
-
|
|
256
|
-
- Full example: [`../examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts)
|
|
312
|
+
- [Arc architecture](../docs/arc.md) — process model, context window, memory, resilience, verification
|
|
313
|
+
- [Testing](../docs/testing.md) — test layers, running tests, writing new tests
|
|
314
|
+
- [Sandbox setup](../docs/PUBLIC_SANDBOX.md) — deploying the sandbox service
|
|
315
|
+
- [Release process](../docs/RELEASE.md) — versioning and publishing
|
|
316
|
+
- [Example](../examples/chat-assistant/src/chat.ts) — complete working chat assistant
|
package/package.json
CHANGED
package/src/arc/agent-runner.ts
CHANGED
|
@@ -364,6 +364,10 @@ export interface CreateProcessConfig {
|
|
|
364
364
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
365
365
|
processTools: Record<string, any>;
|
|
366
366
|
parentSignal: AbortSignal;
|
|
367
|
+
/** Custom system prompt for this process (overrides PROCESS_SYSTEM_PROMPT). */
|
|
368
|
+
processSystemPrompt?: string;
|
|
369
|
+
/** Async skill instructions to prepend to system prompt (resolved during process startup). */
|
|
370
|
+
skillPromptPromise?: Promise<string | null>;
|
|
367
371
|
|
|
368
372
|
// Runtime extras
|
|
369
373
|
hookRunner?: HookRunner;
|
|
@@ -419,12 +423,21 @@ export function createProcess(
|
|
|
419
423
|
process.status = 'running';
|
|
420
424
|
const seed = await seedPromise;
|
|
421
425
|
|
|
426
|
+
// Build system prompt: base + optional skill instructions
|
|
427
|
+
let systemPrompt = config.processSystemPrompt ?? PROCESS_SYSTEM_PROMPT;
|
|
428
|
+
if (config.skillPromptPromise) {
|
|
429
|
+
const skillInstructions = await config.skillPromptPromise;
|
|
430
|
+
if (skillInstructions) {
|
|
431
|
+
systemPrompt += '\n\n## Skill Instructions\n' + skillInstructions;
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
|
|
422
435
|
const result = await Promise.race([
|
|
423
436
|
runner.run({
|
|
424
437
|
model,
|
|
425
438
|
prompt: request.action,
|
|
426
439
|
tools: config.processTools,
|
|
427
|
-
systemPrompt
|
|
440
|
+
systemPrompt,
|
|
428
441
|
toolProvider: config.toolProvider,
|
|
429
442
|
maxSteps,
|
|
430
443
|
signal: ac.signal,
|
package/src/arc/arc-loop.ts
CHANGED
|
@@ -27,6 +27,9 @@ import { createProcess, firstEvent } from './agent-runner';
|
|
|
27
27
|
import { EpisodeCompressor } from './episode-compressor';
|
|
28
28
|
import { runConsolidation } from './consolidation';
|
|
29
29
|
import { pickDefined } from './utils';
|
|
30
|
+
import { SkillRouter } from '../skills/skill-router';
|
|
31
|
+
import { loadSkillFromFile } from '../skills/skill-loader';
|
|
32
|
+
import type { SkillSummary } from '../skills/skill-types';
|
|
30
33
|
|
|
31
34
|
// ── Default orchestrator prompt ──
|
|
32
35
|
|
|
@@ -75,6 +78,9 @@ export class ArcLoop {
|
|
|
75
78
|
private readonly traceWriter: ((event: TraceEvent) => void) | undefined;
|
|
76
79
|
private readonly tracedRunning = new Set<string>();
|
|
77
80
|
private readonly processListeners: Promise<void>[] = [];
|
|
81
|
+
private readonly skillRouter: SkillRouter | undefined;
|
|
82
|
+
private skillSummaries: SkillSummary[] | null = null;
|
|
83
|
+
private skillSummariesPromise: Promise<SkillSummary[]> | null = null;
|
|
78
84
|
|
|
79
85
|
constructor(config: ArcLoopConfig) {
|
|
80
86
|
this.config = config;
|
|
@@ -114,6 +120,15 @@ export class ArcLoop {
|
|
|
114
120
|
|
|
115
121
|
this.resilience = config.resilience;
|
|
116
122
|
this.traceWriter = (config as ArcLoopConfig & { traceWriter?: (event: TraceEvent) => void }).traceWriter;
|
|
123
|
+
|
|
124
|
+
if (config.skillIndexPath) {
|
|
125
|
+
this.skillRouter = new SkillRouter();
|
|
126
|
+
// Lazy-load skill summaries on first dispatch
|
|
127
|
+
this.skillSummariesPromise = import('node:fs/promises')
|
|
128
|
+
.then(fs => fs.readFile(config.skillIndexPath!, 'utf-8'))
|
|
129
|
+
.then(raw => JSON.parse(raw) as SkillSummary[])
|
|
130
|
+
.catch(() => []);
|
|
131
|
+
}
|
|
117
132
|
}
|
|
118
133
|
|
|
119
134
|
private trace(kind: TraceEvent['kind']): void {
|
|
@@ -449,7 +464,20 @@ export class ArcLoop {
|
|
|
449
464
|
// ── Process dispatch ──
|
|
450
465
|
|
|
451
466
|
private dispatch(request: ProcessRequest, parentSignal: AbortSignal): Process {
|
|
452
|
-
const
|
|
467
|
+
const profile = request.profile
|
|
468
|
+
? this.config.processProfiles?.[request.profile]
|
|
469
|
+
: undefined;
|
|
470
|
+
const defaultModel = resolveModel(
|
|
471
|
+
profile?.model ?? 'medium',
|
|
472
|
+
this.modelMap,
|
|
473
|
+
this.modelMap.medium,
|
|
474
|
+
);
|
|
475
|
+
|
|
476
|
+
// Resolve skill instructions only when skills are configured
|
|
477
|
+
const skillPromptPromise = this.skillRouter
|
|
478
|
+
? this.resolveSkillPrompt(request.action)
|
|
479
|
+
: undefined;
|
|
480
|
+
|
|
453
481
|
const proc = createProcess(request, {
|
|
454
482
|
toolProvider: this.config.toolProvider,
|
|
455
483
|
episodeStore: this.config.episodeStore,
|
|
@@ -457,10 +485,12 @@ export class ArcLoop {
|
|
|
457
485
|
sessionId: this.config.sessionId,
|
|
458
486
|
modelMap: this.modelMap,
|
|
459
487
|
defaultModel,
|
|
460
|
-
processMaxSteps: this.config.processMaxSteps ?? 20,
|
|
488
|
+
processMaxSteps: profile?.maxSteps ?? this.config.processMaxSteps ?? 20,
|
|
461
489
|
processTimeout: this.config.processTimeout ?? 120_000,
|
|
462
490
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
463
|
-
processTools: this.config.processTools ?? builtinTools as any,
|
|
491
|
+
processTools: (profile?.tools ?? this.config.processTools ?? builtinTools) as any,
|
|
492
|
+
processSystemPrompt: profile?.systemPrompt ?? this.config.processSystemPrompt,
|
|
493
|
+
skillPromptPromise,
|
|
464
494
|
parentSignal,
|
|
465
495
|
...pickDefined(this.config, [
|
|
466
496
|
'hookRunner',
|
|
@@ -475,6 +505,28 @@ export class ArcLoop {
|
|
|
475
505
|
return proc;
|
|
476
506
|
}
|
|
477
507
|
|
|
508
|
+
/** Resolve skill instructions for a process action. Returns null if no skill matched. */
|
|
509
|
+
private async resolveSkillPrompt(action: string): Promise<string | null> {
|
|
510
|
+
if (!this.skillRouter || !this.skillSummariesPromise) return null;
|
|
511
|
+
|
|
512
|
+
// Ensure summaries are loaded
|
|
513
|
+
if (!this.skillSummaries) {
|
|
514
|
+
this.skillSummaries = await this.skillSummariesPromise;
|
|
515
|
+
}
|
|
516
|
+
if (this.skillSummaries.length === 0) return null;
|
|
517
|
+
|
|
518
|
+
// Fast match only (keyword + alias, no LLM call)
|
|
519
|
+
const matched = await this.skillRouter.selectSkill(action, this.skillSummaries);
|
|
520
|
+
if (!matched) return null;
|
|
521
|
+
|
|
522
|
+
try {
|
|
523
|
+
const skill = await loadSkillFromFile(matched.path);
|
|
524
|
+
return skill.instructions || null;
|
|
525
|
+
} catch {
|
|
526
|
+
return null;
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
|
|
478
530
|
private traceProcessRunning(procId: string): void {
|
|
479
531
|
if (!this.tracedRunning.has(procId)) {
|
|
480
532
|
this.tracedRunning.add(procId);
|
|
@@ -546,6 +598,9 @@ export class ArcLoop {
|
|
|
546
598
|
if (typeof args.label === 'string') {
|
|
547
599
|
req.label = args.label;
|
|
548
600
|
}
|
|
601
|
+
if (typeof args.profile === 'string') {
|
|
602
|
+
req.profile = args.profile;
|
|
603
|
+
}
|
|
549
604
|
return req;
|
|
550
605
|
}
|
|
551
606
|
|
package/src/arc/tools.ts
CHANGED
|
@@ -11,6 +11,7 @@ export const Thread = tool({
|
|
|
11
11
|
model: z.enum(['fast', 'medium', 'strong']).optional().describe('Model tier (default: medium)'),
|
|
12
12
|
maxSteps: z.number().optional().describe('Max tool-call steps'),
|
|
13
13
|
label: z.string().optional().describe('Human-readable label'),
|
|
14
|
+
profile: z.string().optional().describe('Named process profile (e.g. "researcher", "builder")'),
|
|
14
15
|
}),
|
|
15
16
|
});
|
|
16
17
|
|
package/src/arc/types.ts
CHANGED
|
@@ -63,6 +63,10 @@ export interface ArcLoopConfig {
|
|
|
63
63
|
processTimeout?: number;
|
|
64
64
|
/** Per-process max steps (default: 20) */
|
|
65
65
|
processMaxSteps?: number;
|
|
66
|
+
/** Default system prompt for all processes (overrides the built-in default) */
|
|
67
|
+
processSystemPrompt?: string;
|
|
68
|
+
/** Named process profiles. The orchestrator selects a profile via the Thread tool's `profile` param. */
|
|
69
|
+
processProfiles?: Record<string, ProcessProfile>;
|
|
66
70
|
|
|
67
71
|
// Context
|
|
68
72
|
/** Context window size in tokens (default: 200_000) */
|
|
@@ -148,6 +152,20 @@ export interface ProcessRequest {
|
|
|
148
152
|
model?: import('./arc-types').ModelTier;
|
|
149
153
|
maxSteps?: number;
|
|
150
154
|
label?: string;
|
|
155
|
+
/** Named profile to use for this process (looked up from ArcLoopConfig.processProfiles). */
|
|
156
|
+
profile?: string;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/** A named process profile — provides defaults for system prompt, tools, model, and step limit. */
|
|
160
|
+
export interface ProcessProfile {
|
|
161
|
+
/** System prompt for processes using this profile. */
|
|
162
|
+
systemPrompt: string;
|
|
163
|
+
/** Tools available to processes using this profile (overrides processTools). */
|
|
164
|
+
tools?: Record<string, import('./arc-types').AnyTool>;
|
|
165
|
+
/** Default model tier for this profile (Thread tool's explicit model overrides this). */
|
|
166
|
+
model?: import('./arc-types').ModelTier;
|
|
167
|
+
/** Max steps for this profile (Thread tool's explicit maxSteps overrides this). */
|
|
168
|
+
maxSteps?: number;
|
|
151
169
|
}
|
|
152
170
|
|
|
153
171
|
export type Activity =
|
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for process profiles.
|
|
3
|
+
*
|
|
4
|
+
* Verifies that:
|
|
5
|
+
* 1. A process dispatched with a named profile uses the profile's system prompt
|
|
6
|
+
* 2. A process dispatched with a named profile uses the profile's tools/maxSteps/model
|
|
7
|
+
* 3. The default processSystemPrompt is used when no profile is specified
|
|
8
|
+
* 4. Profile model is overridden by explicit Thread model param
|
|
9
|
+
* 5. Unknown profile names fall back to defaults
|
|
10
|
+
*/
|
|
11
|
+
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
12
|
+
import type { ToolProvider, ToolResult } from '../../src/interfaces/tool-provider';
|
|
13
|
+
import type { Episode, EpisodeTrace, SessionMemo, LongTermMemory } from '../../src/arc/arc-types';
|
|
14
|
+
|
|
15
|
+
// ── Capture what system prompt each process receives ──
|
|
16
|
+
|
|
17
|
+
const capturedSystemPrompts: string[] = [];
|
|
18
|
+
const capturedModels: string[] = [];
|
|
19
|
+
let orchestratorCallCount = 0;
|
|
20
|
+
let processCallCount = 0;
|
|
21
|
+
|
|
22
|
+
// Orchestrator (streamText): dispatches Thread calls based on test scenario
|
|
23
|
+
let orchestratorScript: Array<() => unknown> = [];
|
|
24
|
+
|
|
25
|
+
function mockStreamText() {
|
|
26
|
+
const callNum = orchestratorCallCount++;
|
|
27
|
+
if (callNum < orchestratorScript.length) {
|
|
28
|
+
return orchestratorScript[callNum]();
|
|
29
|
+
}
|
|
30
|
+
// Default: final text
|
|
31
|
+
return {
|
|
32
|
+
fullStream: (async function* () {
|
|
33
|
+
yield { type: 'text-delta', text: 'Done.' };
|
|
34
|
+
})(),
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Process (generateText): captures system prompt, returns immediate completion
|
|
39
|
+
function mockGenerateText(opts: Record<string, unknown>) {
|
|
40
|
+
processCallCount++;
|
|
41
|
+
// Capture the system prompt passed to this process
|
|
42
|
+
const system = opts.system as Array<{ content: string }> | undefined;
|
|
43
|
+
if (system?.[0]?.content) {
|
|
44
|
+
capturedSystemPrompts.push(system[0].content);
|
|
45
|
+
}
|
|
46
|
+
// Capture the model
|
|
47
|
+
capturedModels.push(String(opts.model ?? ''));
|
|
48
|
+
|
|
49
|
+
// Immediate completion — no tool calls
|
|
50
|
+
return Promise.resolve({
|
|
51
|
+
text: 'Process done.',
|
|
52
|
+
toolCalls: [],
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
vi.mock('ai', () => ({
|
|
57
|
+
streamText: (opts: Record<string, unknown>) => mockStreamText(),
|
|
58
|
+
generateText: (opts: Record<string, unknown>) => mockGenerateText(opts),
|
|
59
|
+
tool: (def: Record<string, unknown>) => def,
|
|
60
|
+
}));
|
|
61
|
+
|
|
62
|
+
vi.mock('@ai-sdk/anthropic', () => ({
|
|
63
|
+
anthropic: (model: string) => model,
|
|
64
|
+
}));
|
|
65
|
+
|
|
66
|
+
// ── In-memory stores ──
|
|
67
|
+
|
|
68
|
+
function createInMemoryStores() {
|
|
69
|
+
const episodes: Episode[] = [];
|
|
70
|
+
const traces: EpisodeTrace[] = [];
|
|
71
|
+
const memos: SessionMemo[] = [];
|
|
72
|
+
const memories: LongTermMemory[] = [];
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
episodeStore: {
|
|
76
|
+
async addEpisode(ep: Episode) { episodes.push(ep); },
|
|
77
|
+
async addTrace(tr: EpisodeTrace) { traces.push(tr); },
|
|
78
|
+
async getEpisode(id: string) { return episodes.find(e => e.id === id) ?? null; },
|
|
79
|
+
async getTrace(id: string) { return traces.find(t => t.episodeId === id) ?? null; },
|
|
80
|
+
async getEpisodesByTask(taskId: string) { return episodes.filter(e => e.taskId === taskId); },
|
|
81
|
+
async getEpisodesBySession(sid: string) { return episodes.filter(e => e.sessionId === sid); },
|
|
82
|
+
async getRecentEpisodes(n: number) { return episodes.slice(-n); },
|
|
83
|
+
async evictTraces() { return 0; },
|
|
84
|
+
},
|
|
85
|
+
sessionMemoStore: {
|
|
86
|
+
async addMemo(memo: SessionMemo) { memos.push(memo); },
|
|
87
|
+
async getMemo(id: string) { return memos.find(m => m.id === id) ?? null; },
|
|
88
|
+
async getMemosBySession(sid: string) { return memos.filter(m => m.sessionId === sid); },
|
|
89
|
+
async getRecentMemos(n: number) { return memos.slice(-n); },
|
|
90
|
+
},
|
|
91
|
+
longTermStore: {
|
|
92
|
+
async addMemory(mem: LongTermMemory) { memories.push(mem); },
|
|
93
|
+
async getMemory(id: string) { return memories.find(m => m.id === id) ?? null; },
|
|
94
|
+
async getAllMemories() { return [...memories]; },
|
|
95
|
+
async getMemoriesByCategory(cat: string) { return memories.filter(m => m.category === cat); },
|
|
96
|
+
async updateMemory(id: string, updates: Partial<Pick<LongTermMemory, 'content' | 'category' | 'updatedAt'>>) {
|
|
97
|
+
const mem = memories.find(m => m.id === id);
|
|
98
|
+
if (mem) Object.assign(mem, updates);
|
|
99
|
+
},
|
|
100
|
+
async deleteMemory(id: string) {
|
|
101
|
+
const idx = memories.findIndex(m => m.id === id);
|
|
102
|
+
if (idx >= 0) memories.splice(idx, 1);
|
|
103
|
+
},
|
|
104
|
+
},
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function createMockToolProvider(): ToolProvider {
|
|
109
|
+
const ok = (output: string): ToolResult => ({ success: true, output });
|
|
110
|
+
return {
|
|
111
|
+
async bash() { return ok('output'); },
|
|
112
|
+
async readFile() { return ok('content'); },
|
|
113
|
+
async writeFile() { return ok('written'); },
|
|
114
|
+
async editFile() { return ok('edited'); },
|
|
115
|
+
async glob() { return ok('files'); },
|
|
116
|
+
async grep() { return ok('matches'); },
|
|
117
|
+
capabilities() {
|
|
118
|
+
return { bash: true, fileSystem: true, webFetch: false, webSearch: false, codeExecution: false, sandboxed: false };
|
|
119
|
+
},
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Helper: create a streamText response that dispatches Thread tool calls
|
|
124
|
+
function threadCalls(...calls: Array<Record<string, unknown>>) {
|
|
125
|
+
return () => ({
|
|
126
|
+
fullStream: (async function* () {
|
|
127
|
+
for (const call of calls) {
|
|
128
|
+
yield { type: 'tool-call', toolName: 'Thread', toolCallId: `tc-${Math.random()}`, args: call };
|
|
129
|
+
}
|
|
130
|
+
})(),
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Helper: create a final text response
|
|
135
|
+
function finalText(text: string) {
|
|
136
|
+
return () => ({
|
|
137
|
+
fullStream: (async function* () {
|
|
138
|
+
yield { type: 'text-delta', text };
|
|
139
|
+
})(),
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// ── Import after mocks ──
|
|
144
|
+
import { ArcLoop } from '../../src/arc/arc-loop';
|
|
145
|
+
|
|
146
|
+
beforeEach(() => {
|
|
147
|
+
capturedSystemPrompts.length = 0;
|
|
148
|
+
capturedModels.length = 0;
|
|
149
|
+
orchestratorCallCount = 0;
|
|
150
|
+
processCallCount = 0;
|
|
151
|
+
orchestratorScript = [];
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
describe('Process Profiles', () => {
|
|
155
|
+
it('uses the profile system prompt when a named profile is specified', async () => {
|
|
156
|
+
orchestratorScript = [
|
|
157
|
+
threadCalls({ action: 'Do research', profile: 'researcher' }),
|
|
158
|
+
finalText('Done.'),
|
|
159
|
+
];
|
|
160
|
+
|
|
161
|
+
const stores = createInMemoryStores();
|
|
162
|
+
const loop = new ArcLoop({
|
|
163
|
+
...stores,
|
|
164
|
+
taskId: 'test-1',
|
|
165
|
+
sessionId: 'sess-1',
|
|
166
|
+
toolProvider: createMockToolProvider(),
|
|
167
|
+
processProfiles: {
|
|
168
|
+
researcher: {
|
|
169
|
+
systemPrompt: 'You are a research specialist. Search the web and compile findings.',
|
|
170
|
+
},
|
|
171
|
+
},
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
const events = [];
|
|
175
|
+
for await (const e of loop.stream([{ role: 'user', content: 'Research X' }], AbortSignal.timeout(10_000))) {
|
|
176
|
+
events.push(e);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
expect(capturedSystemPrompts).toHaveLength(1);
|
|
180
|
+
expect(capturedSystemPrompts[0]).toBe('You are a research specialist. Search the web and compile findings.');
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it('uses processSystemPrompt as default when no profile is specified', async () => {
|
|
184
|
+
orchestratorScript = [
|
|
185
|
+
threadCalls({ action: 'Do something' }),
|
|
186
|
+
finalText('Done.'),
|
|
187
|
+
];
|
|
188
|
+
|
|
189
|
+
const stores = createInMemoryStores();
|
|
190
|
+
const loop = new ArcLoop({
|
|
191
|
+
...stores,
|
|
192
|
+
taskId: 'test-2',
|
|
193
|
+
sessionId: 'sess-2',
|
|
194
|
+
toolProvider: createMockToolProvider(),
|
|
195
|
+
processSystemPrompt: 'You are a custom default agent.',
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
const events = [];
|
|
199
|
+
for await (const e of loop.stream([{ role: 'user', content: 'Do X' }], AbortSignal.timeout(10_000))) {
|
|
200
|
+
events.push(e);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
expect(capturedSystemPrompts).toHaveLength(1);
|
|
204
|
+
expect(capturedSystemPrompts[0]).toBe('You are a custom default agent.');
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it('falls back to built-in PROCESS_SYSTEM_PROMPT when no profile or default is set', async () => {
|
|
208
|
+
orchestratorScript = [
|
|
209
|
+
threadCalls({ action: 'Do something' }),
|
|
210
|
+
finalText('Done.'),
|
|
211
|
+
];
|
|
212
|
+
|
|
213
|
+
const stores = createInMemoryStores();
|
|
214
|
+
const loop = new ArcLoop({
|
|
215
|
+
...stores,
|
|
216
|
+
taskId: 'test-3',
|
|
217
|
+
sessionId: 'sess-3',
|
|
218
|
+
toolProvider: createMockToolProvider(),
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
const events = [];
|
|
222
|
+
for await (const e of loop.stream([{ role: 'user', content: 'Do X' }], AbortSignal.timeout(10_000))) {
|
|
223
|
+
events.push(e);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
expect(capturedSystemPrompts).toHaveLength(1);
|
|
227
|
+
expect(capturedSystemPrompts[0]).toContain('focused execution thread');
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
it('uses profile model as default but Thread explicit model overrides it', async () => {
|
|
231
|
+
orchestratorScript = [
|
|
232
|
+
// First thread: no explicit model → should use profile's 'strong'
|
|
233
|
+
// Second thread: explicit 'fast' → should override profile's 'strong'
|
|
234
|
+
threadCalls(
|
|
235
|
+
{ action: 'Synthesize report', profile: 'synthesizer' },
|
|
236
|
+
{ action: 'Quick lookup', profile: 'synthesizer', model: 'fast' },
|
|
237
|
+
),
|
|
238
|
+
finalText('Done.'),
|
|
239
|
+
];
|
|
240
|
+
|
|
241
|
+
const stores = createInMemoryStores();
|
|
242
|
+
const loop = new ArcLoop({
|
|
243
|
+
...stores,
|
|
244
|
+
taskId: 'test-4',
|
|
245
|
+
sessionId: 'sess-4',
|
|
246
|
+
toolProvider: createMockToolProvider(),
|
|
247
|
+
processProfiles: {
|
|
248
|
+
synthesizer: {
|
|
249
|
+
systemPrompt: 'You are a synthesis expert.',
|
|
250
|
+
model: 'strong',
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
const events = [];
|
|
256
|
+
for await (const e of loop.stream([{ role: 'user', content: 'Do X' }], AbortSignal.timeout(10_000))) {
|
|
257
|
+
events.push(e);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
expect(capturedSystemPrompts).toHaveLength(2);
|
|
261
|
+
expect(capturedSystemPrompts[0]).toBe('You are a synthesis expert.');
|
|
262
|
+
expect(capturedSystemPrompts[1]).toBe('You are a synthesis expert.');
|
|
263
|
+
|
|
264
|
+
// First process: profile default 'strong' → resolved to model ID
|
|
265
|
+
// Second process: explicit 'fast' → resolved to Haiku model ID
|
|
266
|
+
expect(capturedModels[0]).toBe('claude-opus-4-5'); // strong tier default
|
|
267
|
+
expect(capturedModels[1]).toBe('claude-haiku-4-5'); // fast tier default
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
it('unknown profile falls back to processSystemPrompt or built-in default', async () => {
|
|
271
|
+
orchestratorScript = [
|
|
272
|
+
threadCalls({ action: 'Do something', profile: 'nonexistent' }),
|
|
273
|
+
finalText('Done.'),
|
|
274
|
+
];
|
|
275
|
+
|
|
276
|
+
const stores = createInMemoryStores();
|
|
277
|
+
const loop = new ArcLoop({
|
|
278
|
+
...stores,
|
|
279
|
+
taskId: 'test-5',
|
|
280
|
+
sessionId: 'sess-5',
|
|
281
|
+
toolProvider: createMockToolProvider(),
|
|
282
|
+
processSystemPrompt: 'Custom default prompt.',
|
|
283
|
+
processProfiles: {
|
|
284
|
+
researcher: { systemPrompt: 'Research prompt.' },
|
|
285
|
+
},
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
const events = [];
|
|
289
|
+
for await (const e of loop.stream([{ role: 'user', content: 'Do X' }], AbortSignal.timeout(10_000))) {
|
|
290
|
+
events.push(e);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
expect(capturedSystemPrompts).toHaveLength(1);
|
|
294
|
+
// Unknown profile → falls back to processSystemPrompt
|
|
295
|
+
expect(capturedSystemPrompts[0]).toBe('Custom default prompt.');
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
it('profile maxSteps overrides config processMaxSteps', async () => {
|
|
299
|
+
orchestratorScript = [
|
|
300
|
+
threadCalls({ action: 'Quick task', profile: 'fast_worker' }),
|
|
301
|
+
finalText('Done.'),
|
|
302
|
+
];
|
|
303
|
+
|
|
304
|
+
const stores = createInMemoryStores();
|
|
305
|
+
const loop = new ArcLoop({
|
|
306
|
+
...stores,
|
|
307
|
+
taskId: 'test-6',
|
|
308
|
+
sessionId: 'sess-6',
|
|
309
|
+
toolProvider: createMockToolProvider(),
|
|
310
|
+
processMaxSteps: 20,
|
|
311
|
+
processProfiles: {
|
|
312
|
+
fast_worker: {
|
|
313
|
+
systemPrompt: 'Be fast.',
|
|
314
|
+
maxSteps: 3,
|
|
315
|
+
},
|
|
316
|
+
},
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
const events = [];
|
|
320
|
+
for await (const e of loop.stream([{ role: 'user', content: 'Do X' }], AbortSignal.timeout(10_000))) {
|
|
321
|
+
events.push(e);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// The process completed in 1 step (immediate text, no tools).
|
|
325
|
+
// We can't directly observe maxSteps from outside, but we can verify
|
|
326
|
+
// the process dispatched and completed successfully with the profile.
|
|
327
|
+
const dispatched = events.filter(e => e.type === 'process_dispatched');
|
|
328
|
+
const completed = events.filter(e => e.type === 'process_completed');
|
|
329
|
+
expect(dispatched).toHaveLength(1);
|
|
330
|
+
expect(completed).toHaveLength(1);
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
it('different profiles in the same turn get different system prompts', async () => {
|
|
334
|
+
orchestratorScript = [
|
|
335
|
+
threadCalls(
|
|
336
|
+
{ action: 'Search the web', profile: 'researcher' },
|
|
337
|
+
{ action: 'Write the report', profile: 'writer' },
|
|
338
|
+
),
|
|
339
|
+
finalText('Done.'),
|
|
340
|
+
];
|
|
341
|
+
|
|
342
|
+
const stores = createInMemoryStores();
|
|
343
|
+
const loop = new ArcLoop({
|
|
344
|
+
...stores,
|
|
345
|
+
taskId: 'test-7',
|
|
346
|
+
sessionId: 'sess-7',
|
|
347
|
+
toolProvider: createMockToolProvider(),
|
|
348
|
+
processProfiles: {
|
|
349
|
+
researcher: { systemPrompt: 'You are a web researcher.' },
|
|
350
|
+
writer: { systemPrompt: 'You are a technical writer.' },
|
|
351
|
+
},
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
const events = [];
|
|
355
|
+
for await (const e of loop.stream([{ role: 'user', content: 'Do X' }], AbortSignal.timeout(10_000))) {
|
|
356
|
+
events.push(e);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
expect(capturedSystemPrompts).toHaveLength(2);
|
|
360
|
+
// Order may vary due to parallel execution, so check both are present
|
|
361
|
+
expect(capturedSystemPrompts).toContain('You are a web researcher.');
|
|
362
|
+
expect(capturedSystemPrompts).toContain('You are a technical writer.');
|
|
363
|
+
});
|
|
364
|
+
});
|