@bluecopa/harness 0.1.0-snapshot.20 → 0.1.0-snapshot.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,9 +2,17 @@
2
2
 
3
3
  Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
4
4
 
5
- The harness provides the core loop that drives an AI agent: send messages to an LLM, execute the tool calls it returns, feed results back, and repeat until the LLM produces a final text response.
5
+ Published on npm as **`@bluecopa/harness`**.
6
6
 
7
- ## Quickstart
7
+ Two execution modes: a simple single-agent loop (`createAgent` + `VercelAgentLoop`) and a process-based orchestrator (`ArcLoop`) that dispatches parallel processes with context management, memory, and resilience.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ pnpm add @bluecopa/harness
13
+ ```
14
+
15
+ ## Development
8
16
 
9
17
  ```bash
10
18
  pnpm install
@@ -13,9 +21,11 @@ pnpm test
13
21
 
14
22
  ## Architecture
15
23
 
24
+ ### Single-Agent Loop
25
+
16
26
  ```
17
27
  ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
18
- │ createAgent │────▶│ AgentLoop │────▶│ LLM (Claude) │
28
+ │ createAgent │────►│ AgentLoop │────►│ LLM (Claude) │
19
29
  │ (turn loop) │ │ (nextAction)│ │ │
20
30
  └──────┬───────┘ └──────────────┘ └──────────────────┘
21
31
  │ │
@@ -27,20 +37,82 @@ pnpm test
27
37
  └──────────────┘
28
38
  ```
29
39
 
30
- 1. `createAgent` drives a deterministic step loop
31
- 2. Each step calls `loop.nextAction(messages)` to get the LLM's decision
32
- 3. If it's a tool call, the harness executes it via `ToolProvider` and appends the result
33
- 4. If it's a final action, the loop ends and returns the result
40
+ ### ArcLoop Orchestrator
34
41
 
35
- ## Using with the sandbox
42
+ ```
43
+ Orchestrator (ArcLoop — Opus 4.6 by default)
44
+ │ tools: Thread, Check, Cancel, Remember, ReadEpisode
45
+
46
+ │ Turn 1 (parallel):
47
+ ├──► Process 0 ("read auth", model=fast) ─┐
48
+ ├──► Process 1 ("read routes", model=fast) ─┼──► Episodes
49
+ ├──► Process 2 ("read tests", model=fast) ─┘
50
+
51
+ │ Turn 2 (dispatch dependent work):
52
+ ├──► Thread("fix bug", context=[ep0,ep1,ep2]) ──► Episode
53
+
54
+ │ Turn 3 (parallel):
55
+ ├──► Thread("run tests", context=[ep3]) ─┐
56
+ ├──► Thread("update docs", context=[ep3]) ─┘
57
+
58
+ └──► Final text response
59
+ ```
36
60
 
37
- The most common setup connects the harness to a running sandbox service via `ControlPlaneE2BExecutor`:
61
+ Full architecture doc: [`docs/arc.md`](../docs/arc.md)
38
62
 
39
- ```ts
40
- import { createAgent } from './src/agent/create-agent';
41
- import { E2BToolProvider } from './src/providers/e2b-tool-provider';
63
+ ---
64
+
65
+ ## ToolProvider
66
+
67
+ The contract for tool execution. All agent modes use this interface.
68
+
69
+ ```typescript
70
+ interface ToolProvider {
71
+ bash(command: string, options?: BashOptions): Promise<ToolResult>;
72
+ readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
73
+ writeFile(path: string, content: string): Promise<ToolResult>;
74
+ editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
75
+ glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
76
+ grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
77
+ webFetch?(options: WebFetchOptions): Promise<ToolResult>;
78
+ webSearch?(query: string): Promise<ToolResult>;
79
+ capabilities(): ToolProviderCapabilities;
80
+ }
81
+
82
+ interface ToolResult {
83
+ success: boolean;
84
+ output: string;
85
+ error?: string;
86
+ }
87
+ ```
88
+
89
+ Built-in implementations:
90
+
91
+ | Provider | Description |
92
+ |----------|-------------|
93
+ | `LocalToolProvider` | Runs tools on the local filesystem |
94
+ | `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
95
+ | `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
96
+
97
+ ## SandboxProvider
98
+
99
+ Higher-level sandbox operations beyond basic tool calls:
100
+
101
+ ```typescript
102
+ interface SandboxProvider {
103
+ exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
104
+ readSandboxFile(path: string): Promise<SandboxFileBlob>;
105
+ writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
106
+ }
107
+ ```
108
+
109
+ Used by `SkillManager` for executing skill scripts in isolated VMs.
110
+
111
+ ## Connecting to a Sandbox
112
+
113
+ ```typescript
42
114
  import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
43
- import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
115
+ import { E2BToolProvider } from './src/providers/e2b-tool-provider';
44
116
 
45
117
  // Connect to sandbox service
46
118
  const executor = new ControlPlaneE2BExecutor({
@@ -50,187 +122,172 @@ const executor = new ControlPlaneE2BExecutor({
50
122
  });
51
123
  await executor.initialize(); // creates a Firecracker VM
52
124
 
53
- // Build and run the agent
54
- const agent = createAgent({
55
- toolProvider: new E2BToolProvider(executor),
56
- loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
57
- });
125
+ const toolProvider = new E2BToolProvider(executor);
58
126
 
59
- const result = await agent.run('create a bar chart of sales data');
60
- console.log(result.output); // LLM's final response
61
- console.log(result.steps); // number of tool steps
127
+ // ... use with createAgent or ArcLoop
62
128
 
63
- await executor.destroy(); // tears down the VM
129
+ await executor.destroy(); // tears down the VM
64
130
  ```
65
131
 
66
- For a complete working example, see [`examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts).
67
-
68
- ### From environment variables
132
+ From environment variables: `ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY`.
69
133
 
70
- `ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY` automatically:
134
+ ---
71
135
 
72
- ```ts
73
- const executor = ControlPlaneE2BExecutor.fromEnv();
74
- ```
75
-
76
- ## Using locally (no sandbox)
136
+ ## Single-Agent Mode (`createAgent`)
77
137
 
78
- For development without a sandbox service, use `LocalToolProvider` which runs tools on the local machine:
138
+ For simple tasks that don't need orchestration:
79
139
 
80
- ```ts
140
+ ```typescript
81
141
  import { createAgent } from './src/agent/create-agent';
82
142
  import { LocalToolProvider } from './src/providers/local-tool-provider';
83
143
 
84
144
  const agent = createAgent({
85
145
  toolProvider: new LocalToolProvider(process.cwd()),
86
- loop: new VercelAgentLoop(),
146
+ loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
87
147
  });
88
148
 
89
149
  const result = await agent.run('list all TypeScript files');
150
+ console.log(result.output);
90
151
  ```
91
152
 
92
- ## Key modules
93
-
94
- ### Agent creation (`src/agent/create-agent.ts`)
153
+ ### Configuration
95
154
 
96
- `createAgent(options)` returns an agent with a `.run(prompt, options?)` method. Options:
155
+ | Option | Type | Default | Description |
156
+ |--------|------|---------|-------------|
157
+ | `toolProvider` | `ToolProvider` | required | Executes tool calls |
158
+ | `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
159
+ | `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
160
+ | `maxSteps` | `number` | 30 | Max tool steps per run |
161
+ | `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
162
+ | `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
97
163
 
98
- | Option | Type | Description |
99
- |--------|------|-------------|
100
- | `toolProvider` | `ToolProvider` | Required. Executes tool calls |
101
- | `loop` | `AgentLoop` | LLM decision loop (default: `VercelAgentLoop`) |
102
- | `sandboxProvider` | `SandboxProvider` | Optional. Higher-level sandbox ops (file download, exec with env) |
103
- | `maxSteps` | `number` | Max tool steps per run (default: 30) |
104
- | `telemetry` | `HarnessTelemetry` | Optional. OpenTelemetry-style tracing |
105
- | `skillIndexPath` | `string` | Optional. Path to skill index JSON |
164
+ ### VercelAgentLoop
106
165
 
107
- ### Agent loop (`src/loop/vercel-agent-loop.ts`)
166
+ Calls Claude via the Vercel AI SDK. Supports parallel tool calls and configurable system prompt.
108
167
 
109
- `VercelAgentLoop` calls Claude via the Vercel AI SDK. It supports:
110
- - Parallel tool calls (returns `ToolBatchAction` when the LLM requests multiple tools at once)
111
- - Configurable system prompt
112
- - Model selection via `HARNESS_MODEL` env var (default: `claude-sonnet-4-5`)
113
-
114
- ```ts
168
+ ```typescript
115
169
  const loop = new VercelAgentLoop({
116
170
  systemPrompt: 'You are a helpful coding assistant.',
171
+ model: 'claude-sonnet-4-5', // or HARNESS_MODEL env var
117
172
  });
118
173
  ```
119
174
 
120
- ### Tool provider (`src/interfaces/tool-provider.ts`)
175
+ ### LCMToolLoop
121
176
 
122
- The contract for tool execution:
177
+ Wraps another loop to add Lossless Context Management and optional REPL orchestration:
123
178
 
124
- ```ts
125
- interface ToolProvider {
126
- bash(command: string, options?: BashOptions): Promise<ToolResult>;
127
- readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
128
- writeFile(path: string, content: string): Promise<ToolResult>;
129
- editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
130
- glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
131
- grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
132
- webFetch?(options: WebFetchOptions): Promise<ToolResult>;
133
- webSearch?(query: string): Promise<ToolResult>;
134
- capabilities(): ToolProviderCapabilities;
135
- }
179
+ ```typescript
180
+ import { LCMToolLoop } from './src/loop/lcm-tool-loop';
181
+ import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
136
182
 
137
- interface ToolResult {
138
- success: boolean;
139
- output: string;
140
- error?: string;
141
- }
183
+ const loop = new LCMToolLoop({
184
+ innerLoop: new VercelAgentLoop(),
185
+ toolProvider: mySandboxProvider,
186
+ enableRepl: true, // default: true
187
+ bridgeDir: '/var/run/bridge',
188
+ onActivity: (entry) => console.log(entry),
189
+ onLlmRequest: async (prompt) => callLLM(prompt),
190
+ onWebFetchRequest: async (url) => fetch(url),
191
+ });
142
192
  ```
143
193
 
144
- Built-in implementations:
194
+ **Standard mode**: Lossless context trimming — the LLM always sees a coherent, budget-fitting view of the full conversation.
145
195
 
146
- | Provider | Description |
147
- |----------|-------------|
148
- | `LocalToolProvider` | Runs tools on the local filesystem |
149
- | `E2BToolProvider` | Routes tools to an E2B-compatible executor over HTTP |
150
- | `CompositeToolProvider` | Combines multiple providers (e.g. sandbox + web) |
196
+ **REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
151
197
 
152
- ### Action types (`src/agent/types.ts`)
198
+ ---
153
199
 
154
- The LLM returns one of these action types each turn:
200
+ ## ArcLoop (Orchestrator Mode)
155
201
 
156
- ```ts
157
- // Single tool call
158
- interface ToolCallAction {
159
- type: 'tool';
160
- name: 'Bash' | 'Read' | 'Write' | 'Edit' | 'Glob' | 'Grep' | ...;
161
- args: Record<string, unknown>;
162
- }
202
+ For complex tasks that benefit from parallel processes, context management, and memory:
163
203
 
164
- // Multiple independent tool calls (executed in parallel)
165
- interface ToolBatchAction {
166
- type: 'tool_batch';
167
- calls: ToolCallAction[];
168
- }
169
-
170
- // Final text response (ends the loop)
171
- interface FinalAction {
172
- type: 'final';
173
- content: string;
174
- }
175
- ```
176
-
177
- ### LCM tool loop (`src/loop/lcm-tool-loop.ts`)
178
-
179
- `LCMToolLoop` wraps another loop to add LCM-based tool routing, REPL script execution, and bridge-based tool dispatch. Used in the chat-assistant example.
180
-
181
- ### Sandbox provider (`src/interfaces/sandbox-provider.ts`)
204
+ ```typescript
205
+ import { createArcAgent } from './src/arc/create-arc-agent';
182
206
 
183
- Higher-level sandbox operations beyond basic tool calls:
207
+ const agent = await createArcAgent({
208
+ toolProvider: myToolProvider,
209
+ episodeStore: myEpisodeStore, // required
210
+ sessionMemoStore: mySessionMemoStore, // required
211
+ longTermStore: myLongTermStore, // required
212
+ taskId: 'task-1',
213
+ sessionId: 'session-1',
214
+ });
184
215
 
185
- ```ts
186
- interface SandboxProvider {
187
- exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
188
- readSandboxFile(path: string): Promise<SandboxFileBlob>;
189
- writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
216
+ // Streaming
217
+ for await (const event of agent.stream(messages, signal)) {
218
+ if (event.type === 'text_delta') process.stdout.write(event.text);
219
+ if (event.type === 'process_dispatched') console.log(` → ${event.action}`);
220
+ if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
190
221
  }
191
- ```
192
-
193
- ### Observability (`src/observability/otel.ts`)
194
-
195
- `HarnessTelemetry` provides OpenTelemetry-style spans and metrics for agent runs.
196
222
 
197
- ### Arc: Orchestrator + Thread Architecture (`src/arc/`)
223
+ // Non-streaming
224
+ const result = await agent.run(messages, signal);
225
+ ```
198
226
 
199
- `ArcLoop` is an `AgentLoop` implementation where an orchestrator LLM dispatches bounded threads via a single `Thread` tool. Threads produce episodes (summary + full trace). The orchestrator only sees summaries, keeping its context small.
227
+ ### ArcLoopConfig
228
+
229
+ | Option | Type | Default | Description |
230
+ |--------|------|---------|-------------|
231
+ | `model` | `string` | `'claude-opus-4-6'` | Orchestrator model (ID or tier name) |
232
+ | `modelMap` | `Record<ModelTier, string>` | haiku/sonnet/opus | Maps fast/medium/strong to model IDs |
233
+ | `apiKey` | `string` | — | Anthropic API key |
234
+ | `systemPrompt` | `string` | built-in | Custom orchestrator system prompt |
235
+ | `maxTurns` | `number` | 30 | Max orchestrator turns |
236
+ | `processTimeout` | `number` | 120_000 | Per-process timeout (ms) |
237
+ | `processMaxSteps` | `number` | 20 | Per-process max tool steps |
238
+ | `contextWindowSize` | `number` | 200_000 | Context window in tokens |
239
+ | `outputReserve` | `number` | 20_000 | Tokens reserved for output |
240
+ | `autoMemory` | `boolean` | true | Auto-detect patterns from episodes |
241
+ | `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
242
+ | `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
243
+ | `longTermStore` | `LongTermStore` | required | Stores long-term memories |
244
+ | `taskId` | `string` | required | Task identifier |
245
+ | `sessionId` | `string` | required | Session identifier |
246
+ | `toolProvider` | `ToolProvider` | required | Tool execution |
247
+ | `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
248
+ | `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
249
+ | `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
250
+ | `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
251
+ | `traceWriter` | `function` | — | Callback for trace event emission |
252
+
253
+ ### Resilience
254
+
255
+ ```typescript
256
+ import { resilience } from './src/arc/resilience';
257
+
258
+ const pipeline = resilience()
259
+ .retry({ maxRetries: 2, baseDelay: 1000 })
260
+ .timeout({ durationMs: 30_000 })
261
+ .circuitBreaker({ failureThreshold: 5 })
262
+ .build();
263
+
264
+ const agent = await createArcAgent({
265
+ // ...config
266
+ resilience: pipeline,
267
+ });
268
+ ```
200
269
 
201
- ```ts
202
- import { createArcAgent } from './src/arc/create-arc-agent';
203
- import { InMemoryEpisodeStore } from './src/arc/stores/episode-store';
204
- import { InMemorySessionMemoStore } from './src/arc/stores/session-memo-store';
205
- import { InMemoryLongTermStore } from './src/arc/stores/long-term-store';
270
+ ### Trace Emission
206
271
 
207
- const agent = createArcAgent({
208
- toolProvider: new LocalToolProvider(process.cwd()),
209
- episodeStore: new InMemoryEpisodeStore(),
210
- sessionMemoStore: new InMemorySessionMemoStore(),
211
- longTermStore: new InMemoryLongTermStore(),
212
- taskId: 'task-1',
213
- sessionId: 'session-1',
272
+ ```typescript
273
+ const traces: TraceEvent[] = [];
274
+ const agent = await createArcAgent({
275
+ // ...config
276
+ traceWriter: (event) => traces.push(event),
214
277
  });
215
-
216
- const result = await agent.run('Fix the authentication bug');
217
278
  ```
218
279
 
219
- Key features:
220
- - **Parallel threads**: orchestrator calls Thread N times in one turn → all run concurrently
221
- - **Four-tier memory**: thread context → episodes → session memos → long-term
222
- - **Per-thread models**: Haiku for reads, Sonnet for implementation
223
- - **Template compression**: zero-LLM-call episode summaries
224
- - **Async consolidation**: non-blocking background distillation
280
+ Traces can be validated against the formal model: `cd verify && cargo run -- trace file.ndjson`
225
281
 
226
- Full architecture doc: [`docs/arc.md`](../docs/arc.md)
282
+ ---
227
283
 
228
- ## Package layout
284
+ ## Package Layout
229
285
 
230
286
  ```
231
287
  src/
232
288
  ├── agent/ # createAgent, step executor, types
233
- ├── arc/ # ArcLoop orchestrator, threads, memory hierarchy
289
+ ├── arc/ # ArcLoop orchestrator, processes, memory, resilience
290
+ │ ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
234
291
  │ ├── stores/ # RxDB + in-memory store implementations
235
292
  │ └── object-store/ # Pluggable cloud sync (fs, memory)
236
293
  ├── interfaces/ # ToolProvider, SandboxProvider, AgentLoop contracts
@@ -240,17 +297,20 @@ src/
240
297
  ├── hooks/ # Pre/post tool call hooks
241
298
  ├── permissions/ # Tool permission checks
242
299
  ├── sessions/ # Session persistence
243
- ├── subagents/ # Subagent spawning and task tools
300
+ ├── subagents/ # Subagent spawning
244
301
  ├── skills/ # Skill index, routing, and management
245
302
  ├── optimization/ # Benchmark runner
246
303
  └── observability/ # OpenTelemetry integration
304
+
305
+ verify/ # Rust formal verification (Stateright model checker)
306
+ testing/ # Adversarial scenario replay harness
307
+ tests/ # Vitest test suite
247
308
  ```
248
309
 
249
310
  ## Documentation
250
311
 
251
- - **Arc architecture**: [`docs/arc.md`](../docs/arc.md)
252
- - Provider guide: `docs/guides/providers.md`
253
- - Skills guide: `docs/guides/skills.md`
254
- - Observability guide: `docs/guides/observability.md`
255
- - Release process: `../docs/RELEASE.md`
256
- - Full example: [`../examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts)
312
+ - [Arc architecture](../docs/arc.md) — process model, context window, memory, resilience, verification
313
+ - [Testing](../docs/testing.md) — test layers, running tests, writing new tests
314
+ - [Sandbox setup](../docs/PUBLIC_SANDBOX.md) — deploying the sandbox service
315
+ - [Release process](../docs/RELEASE.md) — versioning and publishing
316
+ - [Example](../examples/chat-assistant/src/chat.ts) complete working chat assistant
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bluecopa/harness",
3
- "version": "0.1.0-snapshot.20",
3
+ "version": "0.1.0-snapshot.22",
4
4
  "description": "Provider-agnostic TypeScript agent framework",
5
5
  "license": "UNLICENSED",
6
6
  "scripts": {
@@ -364,6 +364,10 @@ export interface CreateProcessConfig {
364
364
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
365
365
  processTools: Record<string, any>;
366
366
  parentSignal: AbortSignal;
367
+ /** Custom system prompt for this process (overrides PROCESS_SYSTEM_PROMPT). */
368
+ processSystemPrompt?: string;
369
+ /** Async skill instructions to prepend to system prompt (resolved during process startup). */
370
+ skillPromptPromise?: Promise<string | null>;
367
371
 
368
372
  // Runtime extras
369
373
  hookRunner?: HookRunner;
@@ -419,12 +423,21 @@ export function createProcess(
419
423
  process.status = 'running';
420
424
  const seed = await seedPromise;
421
425
 
426
+ // Build system prompt: base + optional skill instructions
427
+ let systemPrompt = config.processSystemPrompt ?? PROCESS_SYSTEM_PROMPT;
428
+ if (config.skillPromptPromise) {
429
+ const skillInstructions = await config.skillPromptPromise;
430
+ if (skillInstructions) {
431
+ systemPrompt += '\n\n## Skill Instructions\n' + skillInstructions;
432
+ }
433
+ }
434
+
422
435
  const result = await Promise.race([
423
436
  runner.run({
424
437
  model,
425
438
  prompt: request.action,
426
439
  tools: config.processTools,
427
- systemPrompt: PROCESS_SYSTEM_PROMPT,
440
+ systemPrompt,
428
441
  toolProvider: config.toolProvider,
429
442
  maxSteps,
430
443
  signal: ac.signal,
@@ -27,6 +27,9 @@ import { createProcess, firstEvent } from './agent-runner';
27
27
  import { EpisodeCompressor } from './episode-compressor';
28
28
  import { runConsolidation } from './consolidation';
29
29
  import { pickDefined } from './utils';
30
+ import { SkillRouter } from '../skills/skill-router';
31
+ import { loadSkillFromFile } from '../skills/skill-loader';
32
+ import type { SkillSummary } from '../skills/skill-types';
30
33
 
31
34
  // ── Default orchestrator prompt ──
32
35
 
@@ -75,6 +78,9 @@ export class ArcLoop {
75
78
  private readonly traceWriter: ((event: TraceEvent) => void) | undefined;
76
79
  private readonly tracedRunning = new Set<string>();
77
80
  private readonly processListeners: Promise<void>[] = [];
81
+ private readonly skillRouter: SkillRouter | undefined;
82
+ private skillSummaries: SkillSummary[] | null = null;
83
+ private skillSummariesPromise: Promise<SkillSummary[]> | null = null;
78
84
 
79
85
  constructor(config: ArcLoopConfig) {
80
86
  this.config = config;
@@ -114,6 +120,15 @@ export class ArcLoop {
114
120
 
115
121
  this.resilience = config.resilience;
116
122
  this.traceWriter = (config as ArcLoopConfig & { traceWriter?: (event: TraceEvent) => void }).traceWriter;
123
+
124
+ if (config.skillIndexPath) {
125
+ this.skillRouter = new SkillRouter();
126
+ // Lazy-load skill summaries on first dispatch
127
+ this.skillSummariesPromise = import('node:fs/promises')
128
+ .then(fs => fs.readFile(config.skillIndexPath!, 'utf-8'))
129
+ .then(raw => JSON.parse(raw) as SkillSummary[])
130
+ .catch(() => []);
131
+ }
117
132
  }
118
133
 
119
134
  private trace(kind: TraceEvent['kind']): void {
@@ -449,7 +464,20 @@ export class ArcLoop {
449
464
  // ── Process dispatch ──
450
465
 
451
466
  private dispatch(request: ProcessRequest, parentSignal: AbortSignal): Process {
452
- const defaultModel = resolveModel('medium', this.modelMap, this.modelMap.medium);
467
+ const profile = request.profile
468
+ ? this.config.processProfiles?.[request.profile]
469
+ : undefined;
470
+ const defaultModel = resolveModel(
471
+ profile?.model ?? 'medium',
472
+ this.modelMap,
473
+ this.modelMap.medium,
474
+ );
475
+
476
+ // Resolve skill instructions only when skills are configured
477
+ const skillPromptPromise = this.skillRouter
478
+ ? this.resolveSkillPrompt(request.action)
479
+ : undefined;
480
+
453
481
  const proc = createProcess(request, {
454
482
  toolProvider: this.config.toolProvider,
455
483
  episodeStore: this.config.episodeStore,
@@ -457,10 +485,12 @@ export class ArcLoop {
457
485
  sessionId: this.config.sessionId,
458
486
  modelMap: this.modelMap,
459
487
  defaultModel,
460
- processMaxSteps: this.config.processMaxSteps ?? 20,
488
+ processMaxSteps: profile?.maxSteps ?? this.config.processMaxSteps ?? 20,
461
489
  processTimeout: this.config.processTimeout ?? 120_000,
462
490
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
463
- processTools: this.config.processTools ?? builtinTools as any,
491
+ processTools: (profile?.tools ?? this.config.processTools ?? builtinTools) as any,
492
+ processSystemPrompt: profile?.systemPrompt ?? this.config.processSystemPrompt,
493
+ skillPromptPromise,
464
494
  parentSignal,
465
495
  ...pickDefined(this.config, [
466
496
  'hookRunner',
@@ -475,6 +505,28 @@ export class ArcLoop {
475
505
  return proc;
476
506
  }
477
507
 
508
+ /** Resolve skill instructions for a process action. Returns null if no skill matched. */
509
+ private async resolveSkillPrompt(action: string): Promise<string | null> {
510
+ if (!this.skillRouter || !this.skillSummariesPromise) return null;
511
+
512
+ // Ensure summaries are loaded
513
+ if (!this.skillSummaries) {
514
+ this.skillSummaries = await this.skillSummariesPromise;
515
+ }
516
+ if (this.skillSummaries.length === 0) return null;
517
+
518
+ // Fast match only (keyword + alias, no LLM call)
519
+ const matched = await this.skillRouter.selectSkill(action, this.skillSummaries);
520
+ if (!matched) return null;
521
+
522
+ try {
523
+ const skill = await loadSkillFromFile(matched.path);
524
+ return skill.instructions || null;
525
+ } catch {
526
+ return null;
527
+ }
528
+ }
529
+
478
530
  private traceProcessRunning(procId: string): void {
479
531
  if (!this.tracedRunning.has(procId)) {
480
532
  this.tracedRunning.add(procId);
@@ -546,6 +598,9 @@ export class ArcLoop {
546
598
  if (typeof args.label === 'string') {
547
599
  req.label = args.label;
548
600
  }
601
+ if (typeof args.profile === 'string') {
602
+ req.profile = args.profile;
603
+ }
549
604
  return req;
550
605
  }
551
606
 
package/src/arc/tools.ts CHANGED
@@ -11,6 +11,7 @@ export const Thread = tool({
11
11
  model: z.enum(['fast', 'medium', 'strong']).optional().describe('Model tier (default: medium)'),
12
12
  maxSteps: z.number().optional().describe('Max tool-call steps'),
13
13
  label: z.string().optional().describe('Human-readable label'),
14
+ profile: z.string().optional().describe('Named process profile (e.g. "researcher", "builder")'),
14
15
  }),
15
16
  });
16
17
 
package/src/arc/types.ts CHANGED
@@ -63,6 +63,10 @@ export interface ArcLoopConfig {
63
63
  processTimeout?: number;
64
64
  /** Per-process max steps (default: 20) */
65
65
  processMaxSteps?: number;
66
+ /** Default system prompt for all processes (overrides the built-in default) */
67
+ processSystemPrompt?: string;
68
+ /** Named process profiles. The orchestrator selects a profile via the Thread tool's `profile` param. */
69
+ processProfiles?: Record<string, ProcessProfile>;
66
70
 
67
71
  // Context
68
72
  /** Context window size in tokens (default: 200_000) */
@@ -148,6 +152,20 @@ export interface ProcessRequest {
148
152
  model?: import('./arc-types').ModelTier;
149
153
  maxSteps?: number;
150
154
  label?: string;
155
+ /** Named profile to use for this process (looked up from ArcLoopConfig.processProfiles). */
156
+ profile?: string;
157
+ }
158
+
159
+ /** A named process profile — provides defaults for system prompt, tools, model, and step limit. */
160
+ export interface ProcessProfile {
161
+ /** System prompt for processes using this profile. */
162
+ systemPrompt: string;
163
+ /** Tools available to processes using this profile (overrides processTools). */
164
+ tools?: Record<string, import('./arc-types').AnyTool>;
165
+ /** Default model tier for this profile (Thread tool's explicit model overrides this). */
166
+ model?: import('./arc-types').ModelTier;
167
+ /** Max steps for this profile (Thread tool's explicit maxSteps overrides this). */
168
+ maxSteps?: number;
151
169
  }
152
170
 
153
171
  export type Activity =
@@ -0,0 +1,364 @@
1
+ /**
2
+ * Tests for process profiles.
3
+ *
4
+ * Verifies that:
5
+ * 1. A process dispatched with a named profile uses the profile's system prompt
6
+ * 2. A process dispatched with a named profile uses the profile's tools/maxSteps/model
7
+ * 3. The default processSystemPrompt is used when no profile is specified
8
+ * 4. Profile model is overridden by explicit Thread model param
9
+ * 5. Unknown profile names fall back to defaults
10
+ */
11
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
12
+ import type { ToolProvider, ToolResult } from '../../src/interfaces/tool-provider';
13
+ import type { Episode, EpisodeTrace, SessionMemo, LongTermMemory } from '../../src/arc/arc-types';
14
+
15
+ // ── Capture what system prompt each process receives ──
16
+
17
+ const capturedSystemPrompts: string[] = [];
18
+ const capturedModels: string[] = [];
19
+ let orchestratorCallCount = 0;
20
+ let processCallCount = 0;
21
+
22
+ // Orchestrator (streamText): dispatches Thread calls based on test scenario
23
+ let orchestratorScript: Array<() => unknown> = [];
24
+
25
+ function mockStreamText() {
26
+ const callNum = orchestratorCallCount++;
27
+ if (callNum < orchestratorScript.length) {
28
+ return orchestratorScript[callNum]();
29
+ }
30
+ // Default: final text
31
+ return {
32
+ fullStream: (async function* () {
33
+ yield { type: 'text-delta', text: 'Done.' };
34
+ })(),
35
+ };
36
+ }
37
+
38
+ // Process (generateText): captures system prompt, returns immediate completion
39
+ function mockGenerateText(opts: Record<string, unknown>) {
40
+ processCallCount++;
41
+ // Capture the system prompt passed to this process
42
+ const system = opts.system as Array<{ content: string }> | undefined;
43
+ if (system?.[0]?.content) {
44
+ capturedSystemPrompts.push(system[0].content);
45
+ }
46
+ // Capture the model
47
+ capturedModels.push(String(opts.model ?? ''));
48
+
49
+ // Immediate completion — no tool calls
50
+ return Promise.resolve({
51
+ text: 'Process done.',
52
+ toolCalls: [],
53
+ });
54
+ }
55
+
56
+ vi.mock('ai', () => ({
57
+ streamText: (opts: Record<string, unknown>) => mockStreamText(),
58
+ generateText: (opts: Record<string, unknown>) => mockGenerateText(opts),
59
+ tool: (def: Record<string, unknown>) => def,
60
+ }));
61
+
62
+ vi.mock('@ai-sdk/anthropic', () => ({
63
+ anthropic: (model: string) => model,
64
+ }));
65
+
66
+ // ── In-memory stores ──
67
+
68
+ function createInMemoryStores() {
69
+ const episodes: Episode[] = [];
70
+ const traces: EpisodeTrace[] = [];
71
+ const memos: SessionMemo[] = [];
72
+ const memories: LongTermMemory[] = [];
73
+
74
+ return {
75
+ episodeStore: {
76
+ async addEpisode(ep: Episode) { episodes.push(ep); },
77
+ async addTrace(tr: EpisodeTrace) { traces.push(tr); },
78
+ async getEpisode(id: string) { return episodes.find(e => e.id === id) ?? null; },
79
+ async getTrace(id: string) { return traces.find(t => t.episodeId === id) ?? null; },
80
+ async getEpisodesByTask(taskId: string) { return episodes.filter(e => e.taskId === taskId); },
81
+ async getEpisodesBySession(sid: string) { return episodes.filter(e => e.sessionId === sid); },
82
+ async getRecentEpisodes(n: number) { return episodes.slice(-n); },
83
+ async evictTraces() { return 0; },
84
+ },
85
+ sessionMemoStore: {
86
+ async addMemo(memo: SessionMemo) { memos.push(memo); },
87
+ async getMemo(id: string) { return memos.find(m => m.id === id) ?? null; },
88
+ async getMemosBySession(sid: string) { return memos.filter(m => m.sessionId === sid); },
89
+ async getRecentMemos(n: number) { return memos.slice(-n); },
90
+ },
91
+ longTermStore: {
92
+ async addMemory(mem: LongTermMemory) { memories.push(mem); },
93
+ async getMemory(id: string) { return memories.find(m => m.id === id) ?? null; },
94
+ async getAllMemories() { return [...memories]; },
95
+ async getMemoriesByCategory(cat: string) { return memories.filter(m => m.category === cat); },
96
+ async updateMemory(id: string, updates: Partial<Pick<LongTermMemory, 'content' | 'category' | 'updatedAt'>>) {
97
+ const mem = memories.find(m => m.id === id);
98
+ if (mem) Object.assign(mem, updates);
99
+ },
100
+ async deleteMemory(id: string) {
101
+ const idx = memories.findIndex(m => m.id === id);
102
+ if (idx >= 0) memories.splice(idx, 1);
103
+ },
104
+ },
105
+ };
106
+ }
107
+
108
+ function createMockToolProvider(): ToolProvider {
109
+ const ok = (output: string): ToolResult => ({ success: true, output });
110
+ return {
111
+ async bash() { return ok('output'); },
112
+ async readFile() { return ok('content'); },
113
+ async writeFile() { return ok('written'); },
114
+ async editFile() { return ok('edited'); },
115
+ async glob() { return ok('files'); },
116
+ async grep() { return ok('matches'); },
117
+ capabilities() {
118
+ return { bash: true, fileSystem: true, webFetch: false, webSearch: false, codeExecution: false, sandboxed: false };
119
+ },
120
+ };
121
+ }
122
+
123
+ // Helper: create a streamText response that dispatches Thread tool calls
124
+ function threadCalls(...calls: Array<Record<string, unknown>>) {
125
+ return () => ({
126
+ fullStream: (async function* () {
127
+ for (const call of calls) {
128
+ yield { type: 'tool-call', toolName: 'Thread', toolCallId: `tc-${Math.random()}`, args: call };
129
+ }
130
+ })(),
131
+ });
132
+ }
133
+
134
+ // Helper: create a final text response
135
+ function finalText(text: string) {
136
+ return () => ({
137
+ fullStream: (async function* () {
138
+ yield { type: 'text-delta', text };
139
+ })(),
140
+ });
141
+ }
142
+
143
+ // ── Import after mocks ──
144
+ import { ArcLoop } from '../../src/arc/arc-loop';
145
+
146
+ beforeEach(() => {
147
+ capturedSystemPrompts.length = 0;
148
+ capturedModels.length = 0;
149
+ orchestratorCallCount = 0;
150
+ processCallCount = 0;
151
+ orchestratorScript = [];
152
+ });
153
+
154
+ describe('Process Profiles', () => {
155
+ it('uses the profile system prompt when a named profile is specified', async () => {
156
+ orchestratorScript = [
157
+ threadCalls({ action: 'Do research', profile: 'researcher' }),
158
+ finalText('Done.'),
159
+ ];
160
+
161
+ const stores = createInMemoryStores();
162
+ const loop = new ArcLoop({
163
+ ...stores,
164
+ taskId: 'test-1',
165
+ sessionId: 'sess-1',
166
+ toolProvider: createMockToolProvider(),
167
+ processProfiles: {
168
+ researcher: {
169
+ systemPrompt: 'You are a research specialist. Search the web and compile findings.',
170
+ },
171
+ },
172
+ });
173
+
174
+ const events = [];
175
+ for await (const e of loop.stream([{ role: 'user', content: 'Research X' }], AbortSignal.timeout(10_000))) {
176
+ events.push(e);
177
+ }
178
+
179
+ expect(capturedSystemPrompts).toHaveLength(1);
180
+ expect(capturedSystemPrompts[0]).toBe('You are a research specialist. Search the web and compile findings.');
181
+ });
182
+
183
+ it('uses processSystemPrompt as default when no profile is specified', async () => {
184
+ orchestratorScript = [
185
+ threadCalls({ action: 'Do something' }),
186
+ finalText('Done.'),
187
+ ];
188
+
189
+ const stores = createInMemoryStores();
190
+ const loop = new ArcLoop({
191
+ ...stores,
192
+ taskId: 'test-2',
193
+ sessionId: 'sess-2',
194
+ toolProvider: createMockToolProvider(),
195
+ processSystemPrompt: 'You are a custom default agent.',
196
+ });
197
+
198
+ const events = [];
199
+ for await (const e of loop.stream([{ role: 'user', content: 'Do X' }], AbortSignal.timeout(10_000))) {
200
+ events.push(e);
201
+ }
202
+
203
+ expect(capturedSystemPrompts).toHaveLength(1);
204
+ expect(capturedSystemPrompts[0]).toBe('You are a custom default agent.');
205
+ });
206
+
207
+ it('falls back to built-in PROCESS_SYSTEM_PROMPT when no profile or default is set', async () => {
208
+ orchestratorScript = [
209
+ threadCalls({ action: 'Do something' }),
210
+ finalText('Done.'),
211
+ ];
212
+
213
+ const stores = createInMemoryStores();
214
+ const loop = new ArcLoop({
215
+ ...stores,
216
+ taskId: 'test-3',
217
+ sessionId: 'sess-3',
218
+ toolProvider: createMockToolProvider(),
219
+ });
220
+
221
+ const events = [];
222
+ for await (const e of loop.stream([{ role: 'user', content: 'Do X' }], AbortSignal.timeout(10_000))) {
223
+ events.push(e);
224
+ }
225
+
226
+ expect(capturedSystemPrompts).toHaveLength(1);
227
+ expect(capturedSystemPrompts[0]).toContain('focused execution thread');
228
+ });
229
+
230
+ it('uses profile model as default but Thread explicit model overrides it', async () => {
231
+ orchestratorScript = [
232
+ // First thread: no explicit model → should use profile's 'strong'
233
+ // Second thread: explicit 'fast' → should override profile's 'strong'
234
+ threadCalls(
235
+ { action: 'Synthesize report', profile: 'synthesizer' },
236
+ { action: 'Quick lookup', profile: 'synthesizer', model: 'fast' },
237
+ ),
238
+ finalText('Done.'),
239
+ ];
240
+
241
+ const stores = createInMemoryStores();
242
+ const loop = new ArcLoop({
243
+ ...stores,
244
+ taskId: 'test-4',
245
+ sessionId: 'sess-4',
246
+ toolProvider: createMockToolProvider(),
247
+ processProfiles: {
248
+ synthesizer: {
249
+ systemPrompt: 'You are a synthesis expert.',
250
+ model: 'strong',
251
+ },
252
+ },
253
+ });
254
+
255
+ const events = [];
256
+ for await (const e of loop.stream([{ role: 'user', content: 'Do X' }], AbortSignal.timeout(10_000))) {
257
+ events.push(e);
258
+ }
259
+
260
+ expect(capturedSystemPrompts).toHaveLength(2);
261
+ expect(capturedSystemPrompts[0]).toBe('You are a synthesis expert.');
262
+ expect(capturedSystemPrompts[1]).toBe('You are a synthesis expert.');
263
+
264
+ // First process: profile default 'strong' → resolved to model ID
265
+ // Second process: explicit 'fast' → resolved to Haiku model ID
266
+ expect(capturedModels[0]).toBe('claude-opus-4-5'); // strong tier default
267
+ expect(capturedModels[1]).toBe('claude-haiku-4-5'); // fast tier default
268
+ });
269
+
270
+ it('unknown profile falls back to processSystemPrompt or built-in default', async () => {
271
+ orchestratorScript = [
272
+ threadCalls({ action: 'Do something', profile: 'nonexistent' }),
273
+ finalText('Done.'),
274
+ ];
275
+
276
+ const stores = createInMemoryStores();
277
+ const loop = new ArcLoop({
278
+ ...stores,
279
+ taskId: 'test-5',
280
+ sessionId: 'sess-5',
281
+ toolProvider: createMockToolProvider(),
282
+ processSystemPrompt: 'Custom default prompt.',
283
+ processProfiles: {
284
+ researcher: { systemPrompt: 'Research prompt.' },
285
+ },
286
+ });
287
+
288
+ const events = [];
289
+ for await (const e of loop.stream([{ role: 'user', content: 'Do X' }], AbortSignal.timeout(10_000))) {
290
+ events.push(e);
291
+ }
292
+
293
+ expect(capturedSystemPrompts).toHaveLength(1);
294
+ // Unknown profile → falls back to processSystemPrompt
295
+ expect(capturedSystemPrompts[0]).toBe('Custom default prompt.');
296
+ });
297
+
298
+ it('profile maxSteps overrides config processMaxSteps', async () => {
299
+ orchestratorScript = [
300
+ threadCalls({ action: 'Quick task', profile: 'fast_worker' }),
301
+ finalText('Done.'),
302
+ ];
303
+
304
+ const stores = createInMemoryStores();
305
+ const loop = new ArcLoop({
306
+ ...stores,
307
+ taskId: 'test-6',
308
+ sessionId: 'sess-6',
309
+ toolProvider: createMockToolProvider(),
310
+ processMaxSteps: 20,
311
+ processProfiles: {
312
+ fast_worker: {
313
+ systemPrompt: 'Be fast.',
314
+ maxSteps: 3,
315
+ },
316
+ },
317
+ });
318
+
319
+ const events = [];
320
+ for await (const e of loop.stream([{ role: 'user', content: 'Do X' }], AbortSignal.timeout(10_000))) {
321
+ events.push(e);
322
+ }
323
+
324
+ // The process completed in 1 step (immediate text, no tools).
325
+ // We can't directly observe maxSteps from outside, but we can verify
326
+ // the process dispatched and completed successfully with the profile.
327
+ const dispatched = events.filter(e => e.type === 'process_dispatched');
328
+ const completed = events.filter(e => e.type === 'process_completed');
329
+ expect(dispatched).toHaveLength(1);
330
+ expect(completed).toHaveLength(1);
331
+ });
332
+
333
+ it('different profiles in the same turn get different system prompts', async () => {
334
+ orchestratorScript = [
335
+ threadCalls(
336
+ { action: 'Search the web', profile: 'researcher' },
337
+ { action: 'Write the report', profile: 'writer' },
338
+ ),
339
+ finalText('Done.'),
340
+ ];
341
+
342
+ const stores = createInMemoryStores();
343
+ const loop = new ArcLoop({
344
+ ...stores,
345
+ taskId: 'test-7',
346
+ sessionId: 'sess-7',
347
+ toolProvider: createMockToolProvider(),
348
+ processProfiles: {
349
+ researcher: { systemPrompt: 'You are a web researcher.' },
350
+ writer: { systemPrompt: 'You are a technical writer.' },
351
+ },
352
+ });
353
+
354
+ const events = [];
355
+ for await (const e of loop.stream([{ role: 'user', content: 'Do X' }], AbortSignal.timeout(10_000))) {
356
+ events.push(e);
357
+ }
358
+
359
+ expect(capturedSystemPrompts).toHaveLength(2);
360
+ // Order may vary due to parallel execution, so check both are present
361
+ expect(capturedSystemPrompts).toContain('You are a web researcher.');
362
+ expect(capturedSystemPrompts).toContain('You are a technical writer.');
363
+ });
364
+ });