@bluecopa/harness 0.1.0-snapshot.13 → 0.1.0-snapshot.130

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/README.md +208 -148
  2. package/dist/arc/app-adapter.d.ts +109 -0
  3. package/dist/arc/app-adapter.js +408 -0
  4. package/dist/arc/app-adapter.js.map +1 -0
  5. package/dist/arc/create-arc-agent.d.ts +51 -0
  6. package/dist/arc/create-arc-agent.js +4863 -0
  7. package/dist/arc/create-arc-agent.js.map +1 -0
  8. package/dist/arc/profile-builder.d.ts +51 -0
  9. package/dist/arc/profile-builder.js +310 -0
  10. package/dist/arc/profile-builder.js.map +1 -0
  11. package/dist/arc/profile-graph.d.ts +36 -0
  12. package/dist/arc/profile-graph.js +113 -0
  13. package/dist/arc/profile-graph.js.map +1 -0
  14. package/dist/arc/transition-table.d.ts +75 -0
  15. package/dist/arc/transition-table.js +25 -0
  16. package/dist/arc/transition-table.js.map +1 -0
  17. package/dist/loop/vercel-agent-loop.d.ts +116 -0
  18. package/dist/loop/vercel-agent-loop.js +330 -0
  19. package/dist/loop/vercel-agent-loop.js.map +1 -0
  20. package/dist/types-B3VY0xnE.d.ts +973 -0
  21. package/package.json +37 -2
  22. package/AGENTS.md +0 -18
  23. package/docs/guides/observability.md +0 -32
  24. package/docs/guides/providers.md +0 -51
  25. package/docs/guides/skills.md +0 -25
  26. package/docs/security/skill-sandbox-threat-model.md +0 -20
  27. package/src/agent/create-agent.ts +0 -893
  28. package/src/agent/create-tools.ts +0 -33
  29. package/src/agent/step-executor.ts +0 -15
  30. package/src/agent/types.ts +0 -70
  31. package/src/arc/arc-loop.ts +0 -430
  32. package/src/arc/arc-types.ts +0 -217
  33. package/src/arc/bridge-tools.ts +0 -170
  34. package/src/arc/bridged-tool-provider.ts +0 -80
  35. package/src/arc/consolidation.ts +0 -118
  36. package/src/arc/create-arc-agent.ts +0 -80
  37. package/src/arc/debug.ts +0 -62
  38. package/src/arc/episode-compressor.ts +0 -151
  39. package/src/arc/object-store/fs-object-store.ts +0 -60
  40. package/src/arc/object-store/memory-object-store.ts +0 -41
  41. package/src/arc/object-store/object-store.ts +0 -12
  42. package/src/arc/stores/episode-store.ts +0 -120
  43. package/src/arc/stores/long-term-store.ts +0 -86
  44. package/src/arc/stores/rxdb-setup.ts +0 -112
  45. package/src/arc/stores/session-memo-store.ts +0 -58
  46. package/src/arc/thread-executor.ts +0 -404
  47. package/src/arc/thread-tool.ts +0 -29
  48. package/src/context/llm-compaction-strategy.ts +0 -37
  49. package/src/context/prepare-step.ts +0 -65
  50. package/src/context/token-tracker.ts +0 -26
  51. package/src/extracted/manifest.json +0 -10
  52. package/src/extracted/prompts/compaction.md +0 -5
  53. package/src/extracted/prompts/system.md +0 -5
  54. package/src/extracted/tools.json +0 -82
  55. package/src/hooks/hook-runner.ts +0 -22
  56. package/src/hooks/tool-wrappers.ts +0 -64
  57. package/src/interfaces/compaction-strategy.ts +0 -18
  58. package/src/interfaces/hooks.ts +0 -24
  59. package/src/interfaces/sandbox-provider.ts +0 -29
  60. package/src/interfaces/session-store.ts +0 -48
  61. package/src/interfaces/tool-provider.ts +0 -70
  62. package/src/loop/bridge.ts +0 -363
  63. package/src/loop/context-store.ts +0 -210
  64. package/src/loop/lcm-tool-loop.ts +0 -163
  65. package/src/loop/vercel-agent-loop.ts +0 -285
  66. package/src/observability/context.ts +0 -17
  67. package/src/observability/metrics.ts +0 -27
  68. package/src/observability/otel.ts +0 -105
  69. package/src/observability/tracing.ts +0 -13
  70. package/src/optimization/agent-evaluator.ts +0 -40
  71. package/src/optimization/config-serializer.ts +0 -16
  72. package/src/optimization/optimization-runner.ts +0 -39
  73. package/src/optimization/trace-collector.ts +0 -33
  74. package/src/permissions/permission-manager.ts +0 -34
  75. package/src/providers/composite-tool-provider.ts +0 -72
  76. package/src/providers/control-plane-e2b-executor.ts +0 -218
  77. package/src/providers/e2b-tool-provider.ts +0 -68
  78. package/src/providers/local-tool-provider.ts +0 -190
  79. package/src/providers/skill-sandbox-provider.ts +0 -46
  80. package/src/sessions/file-session-store.ts +0 -61
  81. package/src/sessions/in-memory-session-store.ts +0 -39
  82. package/src/sessions/session-manager.ts +0 -44
  83. package/src/skills/skill-loader.ts +0 -52
  84. package/src/skills/skill-manager.ts +0 -175
  85. package/src/skills/skill-router.ts +0 -99
  86. package/src/skills/skill-types.ts +0 -26
  87. package/src/subagents/subagent-manager.ts +0 -22
  88. package/src/subagents/task-tool.ts +0 -13
  89. package/tests/integration/agent-loop-basic.spec.ts +0 -56
  90. package/tests/integration/agent-skill-default-from-sandbox.spec.ts +0 -67
  91. package/tests/integration/concurrency-single-turn.spec.ts +0 -35
  92. package/tests/integration/otel-metrics-emission.spec.ts +0 -62
  93. package/tests/integration/otel-trace-propagation.spec.ts +0 -48
  94. package/tests/integration/parity-benchmark.spec.ts +0 -45
  95. package/tests/integration/provider-local-smoke.spec.ts +0 -63
  96. package/tests/integration/session-resume.spec.ts +0 -30
  97. package/tests/integration/skill-install-rollback.spec.ts +0 -64
  98. package/tests/integration/skill-sandbox-file-blob.spec.ts +0 -54
  99. package/tests/integration/skills-progressive-disclosure.spec.ts +0 -61
  100. package/tests/integration/streaming-compaction-boundary.spec.ts +0 -43
  101. package/tests/integration/structured-messages-agent.spec.ts +0 -265
  102. package/tests/integration/subagent-isolation.spec.ts +0 -24
  103. package/tests/security/skill-sandbox-isolation.spec.ts +0 -51
  104. package/tests/unit/create-tools-schema-parity.spec.ts +0 -22
  105. package/tests/unit/extracted-manifest.spec.ts +0 -41
  106. package/tests/unit/interfaces-contract.spec.ts +0 -101
  107. package/tests/unit/structured-messages.spec.ts +0 -176
  108. package/tests/unit/token-tracker.spec.ts +0 -22
  109. package/tsconfig.json +0 -14
  110. package/vitest.config.ts +0 -7
package/README.md CHANGED
@@ -2,9 +2,17 @@
2
2
 
3
3
  Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
4
4
 
5
- The harness provides the core loop that drives an AI agent: send messages to an LLM, execute the tool calls it returns, feed results back, and repeat until the LLM produces a final text response.
5
+ Published on npm as **`@bluecopa/harness`**.
6
6
 
7
- ## Quickstart
7
+ Two execution modes: a simple single-agent loop (`createAgent` + `VercelAgentLoop`) and a process-based orchestrator (`ArcLoop`) that dispatches parallel processes with context management, memory, and resilience.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ pnpm add @bluecopa/harness
13
+ ```
14
+
15
+ ## Development
8
16
 
9
17
  ```bash
10
18
  pnpm install
@@ -13,9 +21,11 @@ pnpm test
13
21
 
14
22
  ## Architecture
15
23
 
24
+ ### Single-Agent Loop
25
+
16
26
  ```
17
27
  ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
18
- │ createAgent │────▶│ AgentLoop │────▶│ LLM (Claude) │
28
+ │ createAgent │────►│ AgentLoop │────►│ LLM (Claude) │
19
29
  │ (turn loop) │ │ (nextAction)│ │ │
20
30
  └──────┬───────┘ └──────────────┘ └──────────────────┘
21
31
  │ │
@@ -27,20 +37,82 @@ pnpm test
27
37
  └──────────────┘
28
38
  ```
29
39
 
30
- 1. `createAgent` drives a deterministic step loop
31
- 2. Each step calls `loop.nextAction(messages)` to get the LLM's decision
32
- 3. If it's a tool call, the harness executes it via `ToolProvider` and appends the result
33
- 4. If it's a final action, the loop ends and returns the result
40
+ ### ArcLoop Orchestrator
34
41
 
35
- ## Using with the sandbox
42
+ ```
43
+ Orchestrator (ArcLoop — Opus 4.6 by default)
44
+ │ tools: Thread, Check, Cancel, Remember, ReadEpisode
45
+
46
+ │ Turn 1 (parallel):
47
+ ├──► Process 0 ("read auth", model=fast) ─┐
48
+ ├──► Process 1 ("read routes", model=fast) ─┼──► Episodes
49
+ ├──► Process 2 ("read tests", model=fast) ─┘
50
+
51
+ │ Turn 2 (dispatch dependent work):
52
+ ├──► Thread("fix bug", context=[ep0,ep1,ep2]) ──► Episode
53
+
54
+ │ Turn 3 (parallel):
55
+ ├──► Thread("run tests", context=[ep3]) ─┐
56
+ ├──► Thread("update docs", context=[ep3]) ─┘
57
+
58
+ └──► Final text response
59
+ ```
36
60
 
37
- The most common setup connects the harness to a running sandbox service via `ControlPlaneE2BExecutor`:
61
+ Full architecture doc: [`docs/arc.md`](../docs/arc.md)
38
62
 
39
- ```ts
40
- import { createAgent } from './src/agent/create-agent';
41
- import { E2BToolProvider } from './src/providers/e2b-tool-provider';
63
+ ---
64
+
65
+ ## ToolProvider
66
+
67
+ The contract for tool execution. All agent modes use this interface.
68
+
69
+ ```typescript
70
+ interface ToolProvider {
71
+ bash(command: string, options?: BashOptions): Promise<ToolResult>;
72
+ readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
73
+ writeFile(path: string, content: string): Promise<ToolResult>;
74
+ editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
75
+ glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
76
+ grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
77
+ webFetch?(options: WebFetchOptions): Promise<ToolResult>;
78
+ webSearch?(query: string): Promise<ToolResult>;
79
+ capabilities(): ToolProviderCapabilities;
80
+ }
81
+
82
+ interface ToolResult {
83
+ success: boolean;
84
+ output: string;
85
+ error?: string;
86
+ }
87
+ ```
88
+
89
+ Built-in implementations:
90
+
91
+ | Provider | Description |
92
+ |----------|-------------|
93
+ | `LocalToolProvider` | Runs tools on the local filesystem |
94
+ | `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
95
+ | `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
96
+
97
+ ## SandboxProvider
98
+
99
+ Higher-level sandbox operations beyond basic tool calls:
100
+
101
+ ```typescript
102
+ interface SandboxProvider {
103
+ exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
104
+ readSandboxFile(path: string): Promise<SandboxFileBlob>;
105
+ writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
106
+ }
107
+ ```
108
+
109
+ Used by `SkillManager` for executing skill scripts in isolated VMs.
110
+
111
+ ## Connecting to a Sandbox
112
+
113
+ ```typescript
42
114
  import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
43
- import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
115
+ import { E2BToolProvider } from './src/providers/e2b-tool-provider';
44
116
 
45
117
  // Connect to sandbox service
46
118
  const executor = new ControlPlaneE2BExecutor({
@@ -50,187 +122,172 @@ const executor = new ControlPlaneE2BExecutor({
50
122
  });
51
123
  await executor.initialize(); // creates a Firecracker VM
52
124
 
53
- // Build and run the agent
54
- const agent = createAgent({
55
- toolProvider: new E2BToolProvider(executor),
56
- loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
57
- });
125
+ const toolProvider = new E2BToolProvider(executor);
58
126
 
59
- const result = await agent.run('create a bar chart of sales data');
60
- console.log(result.output); // LLM's final response
61
- console.log(result.steps); // number of tool steps
127
+ // ... use with createAgent or ArcLoop
62
128
 
63
- await executor.destroy(); // tears down the VM
129
+ await executor.destroy(); // tears down the VM
64
130
  ```
65
131
 
66
- For a complete working example, see [`examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts).
67
-
68
- ### From environment variables
132
+ From environment variables: `ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY`.
69
133
 
70
- `ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY` automatically:
134
+ ---
71
135
 
72
- ```ts
73
- const executor = ControlPlaneE2BExecutor.fromEnv();
74
- ```
75
-
76
- ## Using locally (no sandbox)
136
+ ## Single-Agent Mode (`createAgent`)
77
137
 
78
- For development without a sandbox service, use `LocalToolProvider` which runs tools on the local machine:
138
+ For simple tasks that don't need orchestration:
79
139
 
80
- ```ts
140
+ ```typescript
81
141
  import { createAgent } from './src/agent/create-agent';
82
142
  import { LocalToolProvider } from './src/providers/local-tool-provider';
83
143
 
84
144
  const agent = createAgent({
85
145
  toolProvider: new LocalToolProvider(process.cwd()),
86
- loop: new VercelAgentLoop(),
146
+ loop: new VercelAgentLoop(), // needs ANTHROPIC_API_KEY
87
147
  });
88
148
 
89
149
  const result = await agent.run('list all TypeScript files');
150
+ console.log(result.output);
90
151
  ```
91
152
 
92
- ## Key modules
93
-
94
- ### Agent creation (`src/agent/create-agent.ts`)
153
+ ### Configuration
95
154
 
96
- `createAgent(options)` returns an agent with a `.run(prompt, options?)` method. Options:
155
+ | Option | Type | Default | Description |
156
+ |--------|------|---------|-------------|
157
+ | `toolProvider` | `ToolProvider` | required | Executes tool calls |
158
+ | `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
159
+ | `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
160
+ | `maxSteps` | `number` | 30 | Max tool steps per run |
161
+ | `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
162
+ | `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
97
163
 
98
- | Option | Type | Description |
99
- |--------|------|-------------|
100
- | `toolProvider` | `ToolProvider` | Required. Executes tool calls |
101
- | `loop` | `AgentLoop` | LLM decision loop (default: `VercelAgentLoop`) |
102
- | `sandboxProvider` | `SandboxProvider` | Optional. Higher-level sandbox ops (file download, exec with env) |
103
- | `maxSteps` | `number` | Max tool steps per run (default: 30) |
104
- | `telemetry` | `HarnessTelemetry` | Optional. OpenTelemetry-style tracing |
105
- | `skillIndexPath` | `string` | Optional. Path to skill index JSON |
164
+ ### VercelAgentLoop
106
165
 
107
- ### Agent loop (`src/loop/vercel-agent-loop.ts`)
166
+ Calls Claude via the Vercel AI SDK. Supports parallel tool calls and configurable system prompt.
108
167
 
109
- `VercelAgentLoop` calls Claude via the Vercel AI SDK. It supports:
110
- - Parallel tool calls (returns `ToolBatchAction` when the LLM requests multiple tools at once)
111
- - Configurable system prompt
112
- - Model selection via `HARNESS_MODEL` env var (default: `claude-sonnet-4-5`)
113
-
114
- ```ts
168
+ ```typescript
115
169
  const loop = new VercelAgentLoop({
116
170
  systemPrompt: 'You are a helpful coding assistant.',
171
+ model: 'claude-sonnet-4-5', // or HARNESS_MODEL env var
117
172
  });
118
173
  ```
119
174
 
120
- ### Tool provider (`src/interfaces/tool-provider.ts`)
175
+ ### LCMToolLoop
121
176
 
122
- The contract for tool execution:
177
+ Wraps another loop to add Lossless Context Management and optional REPL orchestration:
123
178
 
124
- ```ts
125
- interface ToolProvider {
126
- bash(command: string, options?: BashOptions): Promise<ToolResult>;
127
- readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
128
- writeFile(path: string, content: string): Promise<ToolResult>;
129
- editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
130
- glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
131
- grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
132
- webFetch?(options: WebFetchOptions): Promise<ToolResult>;
133
- webSearch?(query: string): Promise<ToolResult>;
134
- capabilities(): ToolProviderCapabilities;
135
- }
179
+ ```typescript
180
+ import { LCMToolLoop } from './src/loop/lcm-tool-loop';
181
+ import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
136
182
 
137
- interface ToolResult {
138
- success: boolean;
139
- output: string;
140
- error?: string;
141
- }
183
+ const loop = new LCMToolLoop({
184
+ innerLoop: new VercelAgentLoop(),
185
+ toolProvider: mySandboxProvider,
186
+ enableRepl: true, // default: true
187
+ bridgeDir: '/var/run/bridge',
188
+ onActivity: (entry) => console.log(entry),
189
+ onLlmRequest: async (prompt) => callLLM(prompt),
190
+ onWebFetchRequest: async (url) => fetch(url),
191
+ });
142
192
  ```
143
193
 
144
- Built-in implementations:
194
+ **Standard mode**: Lossless context trimming — the LLM always sees a coherent, budget-fitting view of the full conversation.
145
195
 
146
- | Provider | Description |
147
- |----------|-------------|
148
- | `LocalToolProvider` | Runs tools on the local filesystem |
149
- | `E2BToolProvider` | Routes tools to an E2B-compatible executor over HTTP |
150
- | `CompositeToolProvider` | Combines multiple providers (e.g. sandbox + web) |
196
+ **REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
151
197
 
152
- ### Action types (`src/agent/types.ts`)
198
+ ---
153
199
 
154
- The LLM returns one of these action types each turn:
200
+ ## ArcLoop (Orchestrator Mode)
155
201
 
156
- ```ts
157
- // Single tool call
158
- interface ToolCallAction {
159
- type: 'tool';
160
- name: 'Bash' | 'Read' | 'Write' | 'Edit' | 'Glob' | 'Grep' | ...;
161
- args: Record<string, unknown>;
162
- }
202
+ For complex tasks that benefit from parallel processes, context management, and memory:
163
203
 
164
- // Multiple independent tool calls (executed in parallel)
165
- interface ToolBatchAction {
166
- type: 'tool_batch';
167
- calls: ToolCallAction[];
168
- }
169
-
170
- // Final text response (ends the loop)
171
- interface FinalAction {
172
- type: 'final';
173
- content: string;
174
- }
175
- ```
176
-
177
- ### LCM tool loop (`src/loop/lcm-tool-loop.ts`)
178
-
179
- `LCMToolLoop` wraps another loop to add LCM-based tool routing, REPL script execution, and bridge-based tool dispatch. Used in the chat-assistant example.
180
-
181
- ### Sandbox provider (`src/interfaces/sandbox-provider.ts`)
204
+ ```typescript
205
+ import { createArcAgent } from './src/arc/create-arc-agent';
182
206
 
183
- Higher-level sandbox operations beyond basic tool calls:
207
+ const agent = await createArcAgent({
208
+ toolProvider: myToolProvider,
209
+ episodeStore: myEpisodeStore, // required
210
+ sessionMemoStore: mySessionMemoStore, // required
211
+ longTermStore: myLongTermStore, // required
212
+ taskId: 'task-1',
213
+ sessionId: 'session-1',
214
+ });
184
215
 
185
- ```ts
186
- interface SandboxProvider {
187
- exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
188
- readSandboxFile(path: string): Promise<SandboxFileBlob>;
189
- writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
216
+ // Streaming
217
+ for await (const event of agent.stream(messages, signal)) {
218
+ if (event.type === 'text_delta') process.stdout.write(event.text);
219
+ if (event.type === 'process_dispatched') console.log(` → ${event.action}`);
220
+ if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
190
221
  }
191
- ```
192
-
193
- ### Observability (`src/observability/otel.ts`)
194
-
195
- `HarnessTelemetry` provides OpenTelemetry-style spans and metrics for agent runs.
196
222
 
197
- ### Arc: Orchestrator + Thread Architecture (`src/arc/`)
223
+ // Non-streaming
224
+ const result = await agent.run(messages, signal);
225
+ ```
198
226
 
199
- `ArcLoop` is an `AgentLoop` implementation where an orchestrator LLM dispatches bounded threads via a single `Thread` tool. Threads produce episodes (summary + full trace). The orchestrator only sees summaries, keeping its context small.
227
+ ### ArcLoopConfig
228
+
229
+ | Option | Type | Default | Description |
230
+ |--------|------|---------|-------------|
231
+ | `model` | `string` | `'claude-opus-4-6'` | Orchestrator model (ID or tier name) |
232
+ | `modelMap` | `Record<ModelTier, string>` | haiku/sonnet/opus | Maps fast/medium/strong to model IDs |
233
+ | `apiKey` | `string` | — | Anthropic API key |
234
+ | `systemPrompt` | `string` | built-in | Custom orchestrator system prompt |
235
+ | `maxTurns` | `number` | 30 | Max orchestrator turns |
236
+ | `processTimeout` | `number` | 120_000 | Per-process timeout (ms) |
237
+ | `processMaxSteps` | `number` | 20 | Per-process max tool steps |
238
+ | `contextWindowSize` | `number` | 200_000 | Context window in tokens |
239
+ | `outputReserve` | `number` | 20_000 | Tokens reserved for output |
240
+ | `autoMemory` | `boolean` | true | Auto-detect patterns from episodes |
241
+ | `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
242
+ | `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
243
+ | `longTermStore` | `LongTermStore` | required | Stores long-term memories |
244
+ | `taskId` | `string` | required | Task identifier |
245
+ | `sessionId` | `string` | required | Session identifier |
246
+ | `toolProvider` | `ToolProvider` | required | Tool execution |
247
+ | `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
248
+ | `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
249
+ | `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
250
+ | `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
251
+ | `traceWriter` | `function` | — | Callback for trace event emission |
252
+
253
+ ### Resilience
254
+
255
+ ```typescript
256
+ import { resilience } from './src/arc/resilience';
257
+
258
+ const pipeline = resilience()
259
+ .retry({ maxRetries: 2, baseDelay: 1000 })
260
+ .timeout({ durationMs: 30_000 })
261
+ .circuitBreaker({ failureThreshold: 5 })
262
+ .build();
263
+
264
+ const agent = await createArcAgent({
265
+ // ...config
266
+ resilience: pipeline,
267
+ });
268
+ ```
200
269
 
201
- ```ts
202
- import { createArcAgent } from './src/arc/create-arc-agent';
203
- import { InMemoryEpisodeStore } from './src/arc/stores/episode-store';
204
- import { InMemorySessionMemoStore } from './src/arc/stores/session-memo-store';
205
- import { InMemoryLongTermStore } from './src/arc/stores/long-term-store';
270
+ ### Trace Emission
206
271
 
207
- const agent = createArcAgent({
208
- toolProvider: new LocalToolProvider(process.cwd()),
209
- episodeStore: new InMemoryEpisodeStore(),
210
- sessionMemoStore: new InMemorySessionMemoStore(),
211
- longTermStore: new InMemoryLongTermStore(),
212
- taskId: 'task-1',
213
- sessionId: 'session-1',
272
+ ```typescript
273
+ const traces: TraceEvent[] = [];
274
+ const agent = await createArcAgent({
275
+ // ...config
276
+ traceWriter: (event) => traces.push(event),
214
277
  });
215
-
216
- const result = await agent.run('Fix the authentication bug');
217
278
  ```
218
279
 
219
- Key features:
220
- - **Parallel threads**: orchestrator calls Thread N times in one turn → all run concurrently
221
- - **Four-tier memory**: thread context → episodes → session memos → long-term
222
- - **Per-thread models**: Haiku for reads, Sonnet for implementation
223
- - **Template compression**: zero-LLM-call episode summaries
224
- - **Async consolidation**: non-blocking background distillation
280
+ Traces can be validated against the formal model: `cd verify && cargo run -- trace file.ndjson`
225
281
 
226
- Full architecture doc: [`docs/arc.md`](../docs/arc.md)
282
+ ---
227
283
 
228
- ## Package layout
284
+ ## Package Layout
229
285
 
230
286
  ```
231
287
  src/
232
288
  ├── agent/ # createAgent, step executor, types
233
- ├── arc/ # ArcLoop orchestrator, threads, memory hierarchy
289
+ ├── arc/ # ArcLoop orchestrator, processes, memory, resilience
290
+ │ ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
234
291
  │ ├── stores/ # RxDB + in-memory store implementations
235
292
  │ └── object-store/ # Pluggable cloud sync (fs, memory)
236
293
  ├── interfaces/ # ToolProvider, SandboxProvider, AgentLoop contracts
@@ -240,17 +297,20 @@ src/
240
297
  ├── hooks/ # Pre/post tool call hooks
241
298
  ├── permissions/ # Tool permission checks
242
299
  ├── sessions/ # Session persistence
243
- ├── subagents/ # Subagent spawning and task tools
300
+ ├── subagents/ # Subagent spawning
244
301
  ├── skills/ # Skill index, routing, and management
245
302
  ├── optimization/ # Benchmark runner
246
303
  └── observability/ # OpenTelemetry integration
304
+
305
+ verify/ # Rust formal verification (Stateright model checker)
306
+ testing/ # Adversarial scenario replay harness
307
+ tests/ # Vitest test suite
247
308
  ```
248
309
 
249
310
  ## Documentation
250
311
 
251
- - **Arc architecture**: [`docs/arc.md`](../docs/arc.md)
252
- - Provider guide: `docs/guides/providers.md`
253
- - Skills guide: `docs/guides/skills.md`
254
- - Observability guide: `docs/guides/observability.md`
255
- - Release process: `../docs/RELEASE.md`
256
- - Full example: [`../examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts)
312
+ - [Arc architecture](../docs/arc.md) — process model, context window, memory, resilience, verification
313
+ - [Testing](../docs/testing.md) — test layers, running tests, writing new tests
314
+ - [Sandbox setup](../docs/PUBLIC_SANDBOX.md) — deploying the sandbox service
315
+ - [Release process](../docs/RELEASE.md) — versioning and publishing
316
+ - [Example](../examples/chat-assistant/src/chat.ts) complete working chat assistant
@@ -0,0 +1,109 @@
1
+ import { A as ArcEvent, S as StepUsage } from '../types-B3VY0xnE.js';
2
+ import 'zod';
3
+ import './profile-graph.js';
4
+ import 'ai';
5
+
6
+ type ArcAppRunStats = {
7
+ tools: number;
8
+ commands: number;
9
+ reads: number;
10
+ searches: number;
11
+ fileChanges: number;
12
+ failures: number;
13
+ inputTokens: number;
14
+ outputTokens: number;
15
+ };
16
+ type ArcAppProcessMeta = {
17
+ label?: string;
18
+ profile?: string;
19
+ model?: string;
20
+ dispatchedAt: number;
21
+ status: 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
22
+ step?: number;
23
+ usage?: StepUsage;
24
+ };
25
+ type ArcAppBackgroundTurn = {
26
+ id: string;
27
+ label?: string;
28
+ profile?: string;
29
+ model?: string;
30
+ dispatchedAt: number;
31
+ };
32
+ type ArcAppState = {
33
+ activeTurnId: string | null;
34
+ processes: Record<string, ArcAppProcessMeta>;
35
+ backgroundTurns: Record<string, ArcAppBackgroundTurn>;
36
+ runStats: ArcAppRunStats;
37
+ };
38
+ type ArcAppObservedActivity = {
39
+ phase: 'start' | 'end';
40
+ processId?: string;
41
+ toolName: string;
42
+ args?: Record<string, unknown>;
43
+ ok?: boolean;
44
+ preview?: string;
45
+ command?: string;
46
+ path?: string;
47
+ };
48
+ type ArcAdapterInputEvent = ArcEvent;
49
+ type ArcAppEvent = {
50
+ type: 'state_changed';
51
+ state: ArcAppState;
52
+ } | {
53
+ type: 'stats_changed';
54
+ stats: ArcAppRunStats;
55
+ } | {
56
+ type: 'activity_observed';
57
+ activity: ArcAppObservedActivity;
58
+ } | {
59
+ type: 'assistant_progress';
60
+ id?: string;
61
+ text: string;
62
+ } | {
63
+ type: 'process_progress';
64
+ id: string;
65
+ meta: ArcAppProcessMeta;
66
+ } | {
67
+ type: 'process_summary';
68
+ id: string;
69
+ meta?: ArcAppProcessMeta;
70
+ summary: string;
71
+ display: string;
72
+ } | {
73
+ type: 'process_failed';
74
+ id?: string;
75
+ error: string;
76
+ } | {
77
+ type: 'assistant_output';
78
+ text: string;
79
+ };
80
+ type Listener = (event: ArcAppEvent) => void;
81
+ declare function formatArcAppProcessSummary(summary: string): string;
82
+ declare function createEmptyArcAppRunStats(): ArcAppRunStats;
83
+ declare function formatArcAppRunStats(stats: ArcAppRunStats): string;
84
+ declare function buildArcAppRunSummary(stats: ArcAppRunStats, modelId: string): string | null;
85
+ declare function createInitialArcAppState(): ArcAppState;
86
+ declare class ArcAppAdapter {
87
+ private state;
88
+ private listeners;
89
+ private bashCommands;
90
+ private processTextBuffers;
91
+ constructor(initialState?: ArcAppState);
92
+ subscribe(listener: Listener): () => void;
93
+ getState(): ArcAppState;
94
+ reset(): void;
95
+ beginTurn(turnId: string): void;
96
+ finishTurn(turnId: string): void;
97
+ backgroundTurn(turnId: string, meta: {
98
+ label?: string;
99
+ profile?: string;
100
+ model?: string;
101
+ }): boolean;
102
+ handleArcEvent(event: ArcAdapterInputEvent): void;
103
+ private emit;
104
+ private handleProcessActivity;
105
+ private handleProcessTextDelta;
106
+ private flushProcessTextBuffer;
107
+ }
108
+
109
+ export { type ArcAdapterInputEvent, ArcAppAdapter, type ArcAppBackgroundTurn, type ArcAppEvent, type ArcAppObservedActivity, type ArcAppProcessMeta, type ArcAppRunStats, type ArcAppState, buildArcAppRunSummary, createEmptyArcAppRunStats, createInitialArcAppState, formatArcAppProcessSummary, formatArcAppRunStats };