@bluecopa/harness 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +212 -117
- package/dist/arc/index.d.ts +796 -0
- package/dist/arc/index.js +2863 -0
- package/dist/arc/index.js.map +1 -0
- package/dist/observability/otel.d.ts +36 -0
- package/dist/observability/otel.js +73 -0
- package/dist/observability/otel.js.map +1 -0
- package/dist/shared-types-DRxnerLT.d.ts +138 -0
- package/dist/skills/index.d.ts +67 -0
- package/dist/skills/index.js +282 -0
- package/dist/skills/index.js.map +1 -0
- package/package.json +26 -2
- package/AGENTS.md +0 -18
- package/docs/guides/observability.md +0 -32
- package/docs/guides/providers.md +0 -51
- package/docs/guides/skills.md +0 -25
- package/docs/security/skill-sandbox-threat-model.md +0 -20
- package/src/agent/create-agent.ts +0 -884
- package/src/agent/create-tools.ts +0 -33
- package/src/agent/step-executor.ts +0 -15
- package/src/agent/types.ts +0 -57
- package/src/context/llm-compaction-strategy.ts +0 -37
- package/src/context/prepare-step.ts +0 -65
- package/src/context/token-tracker.ts +0 -26
- package/src/extracted/manifest.json +0 -10
- package/src/extracted/prompts/compaction.md +0 -5
- package/src/extracted/prompts/system.md +0 -5
- package/src/extracted/tools.json +0 -82
- package/src/hooks/hook-runner.ts +0 -22
- package/src/hooks/tool-wrappers.ts +0 -64
- package/src/interfaces/compaction-strategy.ts +0 -18
- package/src/interfaces/hooks.ts +0 -24
- package/src/interfaces/sandbox-provider.ts +0 -29
- package/src/interfaces/session-store.ts +0 -48
- package/src/interfaces/tool-provider.ts +0 -70
- package/src/loop/bridge.ts +0 -363
- package/src/loop/context-store.ts +0 -207
- package/src/loop/lcm-tool-loop.ts +0 -163
- package/src/loop/vercel-agent-loop.ts +0 -279
- package/src/observability/context.ts +0 -17
- package/src/observability/metrics.ts +0 -27
- package/src/observability/otel.ts +0 -105
- package/src/observability/tracing.ts +0 -13
- package/src/optimization/agent-evaluator.ts +0 -40
- package/src/optimization/config-serializer.ts +0 -16
- package/src/optimization/optimization-runner.ts +0 -39
- package/src/optimization/trace-collector.ts +0 -33
- package/src/permissions/permission-manager.ts +0 -34
- package/src/providers/composite-tool-provider.ts +0 -72
- package/src/providers/control-plane-e2b-executor.ts +0 -218
- package/src/providers/e2b-tool-provider.ts +0 -68
- package/src/providers/local-tool-provider.ts +0 -190
- package/src/providers/skill-sandbox-provider.ts +0 -46
- package/src/sessions/file-session-store.ts +0 -61
- package/src/sessions/in-memory-session-store.ts +0 -39
- package/src/sessions/session-manager.ts +0 -44
- package/src/skills/skill-loader.ts +0 -52
- package/src/skills/skill-manager.ts +0 -175
- package/src/skills/skill-router.ts +0 -99
- package/src/skills/skill-types.ts +0 -26
- package/src/subagents/subagent-manager.ts +0 -22
- package/src/subagents/task-tool.ts +0 -13
- package/tests/integration/agent-loop-basic.spec.ts +0 -56
- package/tests/integration/agent-skill-default-from-sandbox.spec.ts +0 -66
- package/tests/integration/concurrency-single-turn.spec.ts +0 -35
- package/tests/integration/otel-metrics-emission.spec.ts +0 -62
- package/tests/integration/otel-trace-propagation.spec.ts +0 -48
- package/tests/integration/parity-benchmark.spec.ts +0 -45
- package/tests/integration/provider-local-smoke.spec.ts +0 -63
- package/tests/integration/session-resume.spec.ts +0 -30
- package/tests/integration/skill-install-rollback.spec.ts +0 -64
- package/tests/integration/skill-sandbox-file-blob.spec.ts +0 -54
- package/tests/integration/skills-progressive-disclosure.spec.ts +0 -61
- package/tests/integration/streaming-compaction-boundary.spec.ts +0 -43
- package/tests/integration/structured-messages-agent.spec.ts +0 -265
- package/tests/integration/subagent-isolation.spec.ts +0 -24
- package/tests/security/skill-sandbox-isolation.spec.ts +0 -51
- package/tests/unit/create-tools-schema-parity.spec.ts +0 -22
- package/tests/unit/extracted-manifest.spec.ts +0 -41
- package/tests/unit/interfaces-contract.spec.ts +0 -101
- package/tests/unit/structured-messages.spec.ts +0 -176
- package/tests/unit/token-tracker.spec.ts +0 -22
- package/tsconfig.json +0 -14
- package/vitest.config.ts +0 -7
|
@@ -0,0 +1,796 @@
|
|
|
1
|
+
import { A as AnyTool, T as ToolProvider, a as ToolResult, M as ModelFactory, b as ToolResultArtifact } from '../shared-types-DRxnerLT.js';
|
|
2
|
+
export { c as ActionType, B as BashOptions, d as BatchOp, e as BatchResult, G as GlobOptions, f as GrepOptions, R as ReadOptions, g as TextEditorRequest, h as ThreadStatus, i as ToolProviderCapabilities, W as WebFetchOptions } from '../shared-types-DRxnerLT.js';
|
|
3
|
+
import 'ai';
|
|
4
|
+
|
|
5
|
+
interface ToolCallInfo {
|
|
6
|
+
toolCallId: string;
|
|
7
|
+
toolName: string;
|
|
8
|
+
args: Record<string, unknown>;
|
|
9
|
+
/** Provider-specific metadata preserved across round-trips (e.g., Gemini thought signatures). */
|
|
10
|
+
providerMetadata?: Record<string, unknown>;
|
|
11
|
+
}
|
|
12
|
+
interface ToolResultInfo {
|
|
13
|
+
toolCallId: string;
|
|
14
|
+
toolName: string;
|
|
15
|
+
result: string;
|
|
16
|
+
isError?: boolean;
|
|
17
|
+
durationMs?: number;
|
|
18
|
+
}
|
|
19
|
+
type ContentPart = {
|
|
20
|
+
type: "text";
|
|
21
|
+
text: string;
|
|
22
|
+
} | {
|
|
23
|
+
type: "image";
|
|
24
|
+
image: Buffer | Uint8Array;
|
|
25
|
+
mimeType: string;
|
|
26
|
+
};
|
|
27
|
+
interface AgentMessage {
|
|
28
|
+
role: "system" | "user" | "assistant" | "tool";
|
|
29
|
+
content: string | ContentPart[];
|
|
30
|
+
toolCalls?: ToolCallInfo[];
|
|
31
|
+
toolResults?: ToolResultInfo[];
|
|
32
|
+
/** Provider-specific metadata preserved across round-trips (e.g., Gemini thought signatures). */
|
|
33
|
+
providerMetadata?: Record<string, unknown>;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Tool registry: the Tool contract and schema extraction.
|
|
38
|
+
*
|
|
39
|
+
* Agent tool definitions (Bash, Read, Write, etc.) live in the consumer
|
|
40
|
+
* (truecode). The harness only provides the Tool contract and helpers.
|
|
41
|
+
*/
|
|
42
|
+
|
|
43
|
+
/** A registered tool: schema for the model, execute for the worker */
|
|
44
|
+
interface Tool {
|
|
45
|
+
name: string;
|
|
46
|
+
/** AI SDK tool schema. Optional for ARC-internal tools (ReadEpisode, LCM_*, ScratchPad_*). */
|
|
47
|
+
schema?: AnyTool | undefined;
|
|
48
|
+
/** Execute using the ToolProvider. If not set, tool is handled externally (e.g. ARC tools). */
|
|
49
|
+
execute?: (provider: ToolProvider, args: Record<string, unknown>, workDir: string) => Promise<ToolResult>;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
interface StoredMessage {
|
|
53
|
+
id: string;
|
|
54
|
+
conversationId: string;
|
|
55
|
+
index: number;
|
|
56
|
+
role: "user" | "assistant" | "tool";
|
|
57
|
+
content: string;
|
|
58
|
+
toolCalls?: ToolCallInfo[];
|
|
59
|
+
toolResults?: ToolResultInfo[];
|
|
60
|
+
timestamp: number;
|
|
61
|
+
}
|
|
62
|
+
interface GrepResult {
|
|
63
|
+
messageId: string;
|
|
64
|
+
conversationId: string;
|
|
65
|
+
messageIndex: number;
|
|
66
|
+
excerpt: string;
|
|
67
|
+
matchContext: string;
|
|
68
|
+
}
|
|
69
|
+
interface MessageStore {
|
|
70
|
+
append(message: StoredMessage): void;
|
|
71
|
+
getConversation(conversationId: string): StoredMessage[];
|
|
72
|
+
getMessage(conversationId: string, index: number): StoredMessage | null;
|
|
73
|
+
grep(pattern: string, opts?: {
|
|
74
|
+
conversationId?: string;
|
|
75
|
+
maxResults?: number;
|
|
76
|
+
}): GrepResult[];
|
|
77
|
+
/** Serialize all messages for session persistence. */
|
|
78
|
+
serialize(): StoredMessage[];
|
|
79
|
+
/** Load serialized messages into this store (for session hydration). */
|
|
80
|
+
loadFrom(messages: StoredMessage[]): void;
|
|
81
|
+
}
|
|
82
|
+
declare class MemoryMessageStore implements MessageStore {
|
|
83
|
+
private messages;
|
|
84
|
+
private byConversation;
|
|
85
|
+
append(message: StoredMessage): void;
|
|
86
|
+
getConversation(conversationId: string): StoredMessage[];
|
|
87
|
+
getMessage(conversationId: string, index: number): StoredMessage | null;
|
|
88
|
+
serialize(): StoredMessage[];
|
|
89
|
+
loadFrom(messages: StoredMessage[]): void;
|
|
90
|
+
static hydrate(messages: StoredMessage[]): MemoryMessageStore;
|
|
91
|
+
grep(pattern: string, opts?: {
|
|
92
|
+
conversationId?: string;
|
|
93
|
+
maxResults?: number;
|
|
94
|
+
}): GrepResult[];
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
interface SummaryNode {
|
|
98
|
+
id: string;
|
|
99
|
+
depth: number;
|
|
100
|
+
sourceIds: string[];
|
|
101
|
+
sourceConversationIds: string[];
|
|
102
|
+
summary: string;
|
|
103
|
+
artifacts: string[];
|
|
104
|
+
operations: string[];
|
|
105
|
+
outcome: string;
|
|
106
|
+
tokenCount: number;
|
|
107
|
+
createdAt: number;
|
|
108
|
+
}
|
|
109
|
+
interface CompactionOpts {
|
|
110
|
+
/** Minimum number of uncovered children before compaction triggers (default: 4) */
|
|
111
|
+
minChildren?: number;
|
|
112
|
+
/** Soft token budget — compact when total tokens at a depth exceed this (default: 8000) */
|
|
113
|
+
softTokenBudget?: number;
|
|
114
|
+
}
|
|
115
|
+
interface SummaryDAG {
|
|
116
|
+
addLeaf(node: SummaryNode): void;
|
|
117
|
+
compact(opts?: CompactionOpts): SummaryNode[];
|
|
118
|
+
getNode(id: string): SummaryNode | null;
|
|
119
|
+
getLineage(id: string, visited?: Set<string>): string[];
|
|
120
|
+
getFrontier(budget: number): {
|
|
121
|
+
frontier: SummaryNode[];
|
|
122
|
+
coveredIds: Set<string>;
|
|
123
|
+
};
|
|
124
|
+
getAllNodes(): SummaryNode[];
|
|
125
|
+
/** Serialize DAG state for session persistence. */
|
|
126
|
+
serialize(): {
|
|
127
|
+
nodes: SummaryNode[];
|
|
128
|
+
coveredBy: [string, string][];
|
|
129
|
+
};
|
|
130
|
+
/** Load serialized DAG state into this instance (for session hydration). */
|
|
131
|
+
loadFrom(data: {
|
|
132
|
+
nodes: SummaryNode[];
|
|
133
|
+
coveredBy: [string, string][];
|
|
134
|
+
}): void;
|
|
135
|
+
}
|
|
136
|
+
declare class MemorySummaryDAG implements SummaryDAG {
|
|
137
|
+
private nodes;
|
|
138
|
+
/** Tracks which source IDs have been covered by a parent node */
|
|
139
|
+
private coveredBy;
|
|
140
|
+
serialize(): {
|
|
141
|
+
nodes: SummaryNode[];
|
|
142
|
+
coveredBy: [string, string][];
|
|
143
|
+
};
|
|
144
|
+
loadFrom(data: {
|
|
145
|
+
nodes: SummaryNode[];
|
|
146
|
+
coveredBy: [string, string][];
|
|
147
|
+
}): void;
|
|
148
|
+
static hydrate(data: {
|
|
149
|
+
nodes: SummaryNode[];
|
|
150
|
+
coveredBy: [string, string][];
|
|
151
|
+
}): MemorySummaryDAG;
|
|
152
|
+
addLeaf(node: SummaryNode): void;
|
|
153
|
+
compact(opts?: CompactionOpts): SummaryNode[];
|
|
154
|
+
getNode(id: string): SummaryNode | null;
|
|
155
|
+
getLineage(id: string, visited?: Set<string>): string[];
|
|
156
|
+
getFrontier(budget: number): {
|
|
157
|
+
frontier: SummaryNode[];
|
|
158
|
+
coveredIds: Set<string>;
|
|
159
|
+
};
|
|
160
|
+
getAllNodes(): SummaryNode[];
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
interface GhostCue {
|
|
164
|
+
summaryId: string;
|
|
165
|
+
conversationIds: string[];
|
|
166
|
+
depth: number;
|
|
167
|
+
label: string;
|
|
168
|
+
}
|
|
169
|
+
interface AssembledContext {
|
|
170
|
+
messages: AgentMessage[];
|
|
171
|
+
ghostCues: GhostCue[];
|
|
172
|
+
tokenEstimate: number;
|
|
173
|
+
/** Structured sections for downstream consumers (avoids string matching) */
|
|
174
|
+
frontierText?: string;
|
|
175
|
+
ghostCueText?: string;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
type HookEventName = 'PreToolUse' | 'PostToolUse' | 'BeforeWorker' | 'AfterWorker';
|
|
179
|
+
interface HookContext {
|
|
180
|
+
event: HookEventName;
|
|
181
|
+
toolName?: string;
|
|
182
|
+
input?: Record<string, unknown>;
|
|
183
|
+
output?: ToolResult;
|
|
184
|
+
metadata?: Record<string, unknown>;
|
|
185
|
+
}
|
|
186
|
+
interface HookDecision {
|
|
187
|
+
allow: boolean;
|
|
188
|
+
reason?: string;
|
|
189
|
+
}
|
|
190
|
+
type HookCallback = (context: HookContext) => Promise<HookDecision | void>;
|
|
191
|
+
|
|
192
|
+
declare class HookRunner {
|
|
193
|
+
private readonly hooks;
|
|
194
|
+
register(event: HookContext['event'], callback: HookCallback): void;
|
|
195
|
+
run(context: HookContext): Promise<HookDecision>;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/** Raw worker transcript, append-only storage */
|
|
199
|
+
interface Transcript {
|
|
200
|
+
id: string;
|
|
201
|
+
tupleId: string;
|
|
202
|
+
instruction: string;
|
|
203
|
+
messages: AgentMessage[];
|
|
204
|
+
timestamp: number;
|
|
205
|
+
}
|
|
206
|
+
/** Artifact produced by a worker dispatch */
|
|
207
|
+
interface Artifact {
|
|
208
|
+
id: string;
|
|
209
|
+
tupleId: string;
|
|
210
|
+
/** File path or returned value */
|
|
211
|
+
output: string | null;
|
|
212
|
+
/** Full textual worker result, preserved even when expected file artifacts are missing */
|
|
213
|
+
textOutput?: string | undefined;
|
|
214
|
+
status: "complete" | "incomplete" | "failed" | "interrupted";
|
|
215
|
+
/** One-line summary from worker's final message */
|
|
216
|
+
summary: string;
|
|
217
|
+
stepsUsed: number;
|
|
218
|
+
/** Tool calls with results (for orchestrator reasoning) */
|
|
219
|
+
actions?: string[];
|
|
220
|
+
/** The dispatch instruction that produced this artifact */
|
|
221
|
+
instruction?: string;
|
|
222
|
+
}
|
|
223
|
+
interface ExpectedArtifact {
|
|
224
|
+
type: "file" | "directory" | "value" | "unknown";
|
|
225
|
+
path?: string | undefined;
|
|
226
|
+
description?: string | undefined;
|
|
227
|
+
}
|
|
228
|
+
interface ExpectedOutputContract {
|
|
229
|
+
artifacts: ExpectedArtifact[];
|
|
230
|
+
successCriteria?: string[] | undefined;
|
|
231
|
+
verification?: string | undefined;
|
|
232
|
+
description?: string | undefined;
|
|
233
|
+
}
|
|
234
|
+
/** Worker model tier for dispatch routing. */
|
|
235
|
+
type DispatchTier = "fast" | "strong";
|
|
236
|
+
/** Worker instruction tuple */
|
|
237
|
+
interface Tuple {
|
|
238
|
+
id: string;
|
|
239
|
+
instruction: string;
|
|
240
|
+
/** Artifact IDs to provide as input */
|
|
241
|
+
inputs: string[];
|
|
242
|
+
/** Structured contract for what the worker should produce */
|
|
243
|
+
expectedOutput: ExpectedOutputContract;
|
|
244
|
+
/** Tool names available to worker */
|
|
245
|
+
tools: string[];
|
|
246
|
+
/** Step budget (1-10) */
|
|
247
|
+
steps: number;
|
|
248
|
+
/** Worker model tier — 'fast' for simple tasks, 'strong' (default) for complex reasoning. */
|
|
249
|
+
tier?: DispatchTier | undefined;
|
|
250
|
+
/** Public orchestrator rationale that preceded this dispatch */
|
|
251
|
+
orchestratorContext?: string | undefined;
|
|
252
|
+
}
|
|
253
|
+
interface DispatchRecord {
|
|
254
|
+
tuple: Tuple;
|
|
255
|
+
artifact: Artifact;
|
|
256
|
+
/** Reference to transcript in TranscriptStore — NOT the full transcript body. */
|
|
257
|
+
transcriptId: string;
|
|
258
|
+
progress: WorkerProgressEvent[];
|
|
259
|
+
completedAt: number;
|
|
260
|
+
/** Worker execution result (artifacts, actions, status) */
|
|
261
|
+
workerResult?: WorkerResult | undefined;
|
|
262
|
+
}
|
|
263
|
+
interface OodaSnapshot {
|
|
264
|
+
observations: string[];
|
|
265
|
+
beliefs: string[];
|
|
266
|
+
disprovenApproaches: string[];
|
|
267
|
+
blockers: string[];
|
|
268
|
+
decisionPressure: {
|
|
269
|
+
turn: number;
|
|
270
|
+
maxTurns: number;
|
|
271
|
+
turnsRemaining: number;
|
|
272
|
+
dispatchCount: number;
|
|
273
|
+
allIncomplete: boolean;
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
type ReadEpisodeDetail = "summary" | "trace" | "artifacts";
|
|
277
|
+
interface ReadEpisodeArgs {
|
|
278
|
+
id: string;
|
|
279
|
+
detail?: ReadEpisodeDetail | undefined;
|
|
280
|
+
artifactKey?: string | undefined;
|
|
281
|
+
maxTokens?: number | undefined;
|
|
282
|
+
}
|
|
283
|
+
interface TraceToolCall {
|
|
284
|
+
toolName: string;
|
|
285
|
+
toolCallId?: string | undefined;
|
|
286
|
+
args: Record<string, unknown>;
|
|
287
|
+
}
|
|
288
|
+
type ArcTraceEvent = {
|
|
289
|
+
scope: "orchestrator";
|
|
290
|
+
phase: "model_input";
|
|
291
|
+
turn: number;
|
|
292
|
+
model: string;
|
|
293
|
+
system: string;
|
|
294
|
+
/** Full AgentMessage context before SDK conversion. */
|
|
295
|
+
messages: AgentMessage[];
|
|
296
|
+
/** Full SDK-facing messages after conversion. */
|
|
297
|
+
modelMessages: unknown[];
|
|
298
|
+
toolNames: string[];
|
|
299
|
+
} | {
|
|
300
|
+
scope: "orchestrator";
|
|
301
|
+
phase: "model_output";
|
|
302
|
+
turn: number;
|
|
303
|
+
text: string;
|
|
304
|
+
toolCalls: TraceToolCall[];
|
|
305
|
+
} | {
|
|
306
|
+
scope: "orchestrator";
|
|
307
|
+
phase: "public_rationale_missing";
|
|
308
|
+
turn: number;
|
|
309
|
+
toolCalls: TraceToolCall[];
|
|
310
|
+
} | {
|
|
311
|
+
scope: "orchestrator";
|
|
312
|
+
phase: "tool_call";
|
|
313
|
+
turn: number;
|
|
314
|
+
toolName: string;
|
|
315
|
+
args: Record<string, unknown>;
|
|
316
|
+
} | {
|
|
317
|
+
scope: "orchestrator";
|
|
318
|
+
phase: "tool_result";
|
|
319
|
+
turn: number;
|
|
320
|
+
toolName: string;
|
|
321
|
+
args: Record<string, unknown>;
|
|
322
|
+
result: unknown;
|
|
323
|
+
} | {
|
|
324
|
+
scope: "worker";
|
|
325
|
+
phase: "model_input";
|
|
326
|
+
tupleId: string;
|
|
327
|
+
step: number;
|
|
328
|
+
model: string;
|
|
329
|
+
system: string;
|
|
330
|
+
messages: AgentMessage[];
|
|
331
|
+
toolNames: string[];
|
|
332
|
+
} | {
|
|
333
|
+
scope: "worker";
|
|
334
|
+
phase: "model_output";
|
|
335
|
+
tupleId: string;
|
|
336
|
+
step: number;
|
|
337
|
+
action: unknown;
|
|
338
|
+
} | {
|
|
339
|
+
scope: "worker";
|
|
340
|
+
phase: "public_rationale_missing";
|
|
341
|
+
tupleId: string;
|
|
342
|
+
step: number;
|
|
343
|
+
toolNames: string[];
|
|
344
|
+
} | {
|
|
345
|
+
scope: "worker";
|
|
346
|
+
phase: "tool_call";
|
|
347
|
+
tupleId: string;
|
|
348
|
+
step: number;
|
|
349
|
+
toolCallId: string;
|
|
350
|
+
toolName: string;
|
|
351
|
+
args: Record<string, unknown>;
|
|
352
|
+
} | {
|
|
353
|
+
scope: "worker";
|
|
354
|
+
phase: "tool_result";
|
|
355
|
+
tupleId: string;
|
|
356
|
+
step: number;
|
|
357
|
+
toolCallId: string;
|
|
358
|
+
toolName: string;
|
|
359
|
+
result: unknown;
|
|
360
|
+
resultText: string;
|
|
361
|
+
} | {
|
|
362
|
+
scope: "worker";
|
|
363
|
+
phase: "worker_result";
|
|
364
|
+
tupleId: string;
|
|
365
|
+
result: unknown;
|
|
366
|
+
};
|
|
367
|
+
interface TranscriptStore {
|
|
368
|
+
append(transcript: Transcript): Promise<void>;
|
|
369
|
+
getAll(): Promise<Transcript[]>;
|
|
370
|
+
get(id: string): Promise<Transcript | null>;
|
|
371
|
+
}
|
|
372
|
+
interface VectorIndex {
|
|
373
|
+
add(id: string, text: string): Promise<void>;
|
|
374
|
+
search(query: string, k: number): Promise<string[]>;
|
|
375
|
+
load(): Promise<void>;
|
|
376
|
+
save(): Promise<void>;
|
|
377
|
+
}
|
|
378
|
+
interface ArtifactStore {
|
|
379
|
+
set(id: string, artifact: Artifact): Promise<void>;
|
|
380
|
+
get(id: string): Promise<Artifact | null>;
|
|
381
|
+
getAll(): Promise<Record<string, Artifact>>;
|
|
382
|
+
}
|
|
383
|
+
interface ScratchPad {
|
|
384
|
+
write(key: string, content: string): Promise<void>;
|
|
385
|
+
read(key: string): Promise<string | null>;
|
|
386
|
+
list(): Promise<string[]>;
|
|
387
|
+
clear(): Promise<void>;
|
|
388
|
+
}
|
|
389
|
+
/** Serialized session state — everything needed to hydrate an ArcLoop. */
|
|
390
|
+
interface SessionSnapshot {
|
|
391
|
+
messages: StoredMessage[];
|
|
392
|
+
dispatches: DispatchRecord[];
|
|
393
|
+
dagNodes: SummaryNode[];
|
|
394
|
+
dagCoveredBy: [string, string][];
|
|
395
|
+
turn: number;
|
|
396
|
+
dispatchCount: number;
|
|
397
|
+
orchestratorMessageIndex: number;
|
|
398
|
+
}
|
|
399
|
+
/** Lightweight session metadata for listing/picking. */
|
|
400
|
+
interface SessionMeta {
|
|
401
|
+
id: string;
|
|
402
|
+
slug: string;
|
|
403
|
+
createdAt: number;
|
|
404
|
+
lastActiveAt: number;
|
|
405
|
+
taskCount?: number;
|
|
406
|
+
summary: string;
|
|
407
|
+
}
|
|
408
|
+
interface SessionStore {
|
|
409
|
+
load(id: string): Promise<SessionSnapshot | null>;
|
|
410
|
+
save(id: string, snapshot: SessionSnapshot): Promise<void>;
|
|
411
|
+
getMeta(id: string): Promise<SessionMeta | null>;
|
|
412
|
+
saveMeta(id: string, meta: SessionMeta): Promise<void>;
|
|
413
|
+
list(): Promise<SessionMeta[]>;
|
|
414
|
+
}
|
|
415
|
+
/** What the orchestrator sees each turn */
|
|
416
|
+
interface OrchestratorContext {
|
|
417
|
+
task: string;
|
|
418
|
+
artifacts: Record<string, Artifact>;
|
|
419
|
+
lastResult: Artifact | null;
|
|
420
|
+
/** Rolling window of recent orchestrator messages */
|
|
421
|
+
recentTurns: AgentMessage[];
|
|
422
|
+
/** Current turn number */
|
|
423
|
+
turn: number;
|
|
424
|
+
/** Max turns allowed */
|
|
425
|
+
maxTurns: number;
|
|
426
|
+
/** Turns remaining after the current turn */
|
|
427
|
+
turnsRemaining: number;
|
|
428
|
+
/** Number of dispatches so far */
|
|
429
|
+
dispatchCount: number;
|
|
430
|
+
/** Artifact status counts across all completed dispatches */
|
|
431
|
+
artifactStatusCounts: Record<Artifact["status"], number>;
|
|
432
|
+
/** True when no dispatch artifact has reached complete status */
|
|
433
|
+
allIncomplete: boolean;
|
|
434
|
+
/** Completed dispatches in chronological order */
|
|
435
|
+
dispatches: DispatchRecord[];
|
|
436
|
+
/** Current observe/orient state rendered into the orchestrator prompt */
|
|
437
|
+
ooda: OodaSnapshot;
|
|
438
|
+
/** LCM message store (all conversations) */
|
|
439
|
+
messageStore?: MessageStore | undefined;
|
|
440
|
+
/** LCM summary DAG */
|
|
441
|
+
summaryDAG?: SummaryDAG | undefined;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
interface ArcConfig {
|
|
445
|
+
task: string;
|
|
446
|
+
workDir: string;
|
|
447
|
+
/** Model ID for the orchestrator */
|
|
448
|
+
model: string;
|
|
449
|
+
/** Model ID for workers */
|
|
450
|
+
workerModel: string;
|
|
451
|
+
/** Optional per-tier worker model overrides. Falls back to workerModel when a tier is missing. */
|
|
452
|
+
workerModelMap?: Partial<Record<DispatchTier, string>> | undefined;
|
|
453
|
+
createModel?: ModelFactory;
|
|
454
|
+
toolProvider: ToolProvider;
|
|
455
|
+
/** Agent-provided tool definitions (Bash, Read, Write, etc.) with schemas, execute, and artifact metadata. Harness adds ARC framework tools internally. */
|
|
456
|
+
tools?: Map<string, Tool> | undefined;
|
|
457
|
+
/** Max orchestrator turns before stopping (default: 12) */
|
|
458
|
+
maxTurns?: number;
|
|
459
|
+
/** Max steps per worker (default: 5, max: 10) */
|
|
460
|
+
maxStepsPerWorker?: number;
|
|
461
|
+
/** Rolling window size for orchestrator context (default: 10) */
|
|
462
|
+
orchestratorWindowSize?: number;
|
|
463
|
+
/** Directory where agent memory lives (default: workDir/.arc) */
|
|
464
|
+
memDir?: string;
|
|
465
|
+
/** Injected vector index (default: in-memory for tests) */
|
|
466
|
+
vectorIndex?: VectorIndex;
|
|
467
|
+
/** Injected scratch pad for inter-worker note sharing (default: in-memory for tests) */
|
|
468
|
+
scratchPad?: ScratchPad;
|
|
469
|
+
/** Injected transcript store (default: in-memory) */
|
|
470
|
+
transcriptStore?: TranscriptStore;
|
|
471
|
+
/** Injected artifact store (default: in-memory) */
|
|
472
|
+
artifactStore?: ArtifactStore;
|
|
473
|
+
/** Custom worker system prompt (appended to default) */
|
|
474
|
+
workerSystemPromptSuffix?: string | undefined;
|
|
475
|
+
/** Custom messages after the core task/budget block */
|
|
476
|
+
formatOrchestratorContext?: ((context: OrchestratorContext) => AgentMessage[]) | undefined;
|
|
477
|
+
/** Provider options passed to generateText (e.g. reasoningEffort for OpenAI). */
|
|
478
|
+
providerOptions?: Record<string, unknown> | undefined;
|
|
479
|
+
/** Optional hook runner for PreToolUse/PostToolUse/BeforeWorker/AfterWorker events */
|
|
480
|
+
hookRunner?: HookRunner | undefined;
|
|
481
|
+
/** Callback for AskUser orchestrator tool. If provided, AskUser is available to the orchestrator. */
|
|
482
|
+
askUser?: ((question: string, options?: string[]) => Promise<string>) | undefined;
|
|
483
|
+
/** Session store for persistence across restarts. */
|
|
484
|
+
sessionStore?: SessionStore | undefined;
|
|
485
|
+
/** Session ID to resume. If provided with sessionStore, loop hydrates from saved state. */
|
|
486
|
+
sessionId?: string | undefined;
|
|
487
|
+
}
|
|
488
|
+
type ArcEvent = {
|
|
489
|
+
type: "orchestrator_turn";
|
|
490
|
+
turn: number;
|
|
491
|
+
contextTokens: number;
|
|
492
|
+
} | {
|
|
493
|
+
type: "trace";
|
|
494
|
+
trace: ArcTraceEvent;
|
|
495
|
+
} | {
|
|
496
|
+
type: "dispatch";
|
|
497
|
+
tupleId: string;
|
|
498
|
+
instruction: string;
|
|
499
|
+
} | {
|
|
500
|
+
type: "dispatch_full";
|
|
501
|
+
tuple: Tuple;
|
|
502
|
+
} | {
|
|
503
|
+
type: "worker_progress";
|
|
504
|
+
tupleId: string;
|
|
505
|
+
progress: WorkerProgressEvent;
|
|
506
|
+
} | {
|
|
507
|
+
type: "worker_complete";
|
|
508
|
+
tupleId: string;
|
|
509
|
+
status: Artifact["status"];
|
|
510
|
+
summary: string;
|
|
511
|
+
stepsUsed: number;
|
|
512
|
+
actions?: string[] | undefined;
|
|
513
|
+
} | {
|
|
514
|
+
type: "recall";
|
|
515
|
+
query: string;
|
|
516
|
+
answer: string;
|
|
517
|
+
} | {
|
|
518
|
+
type: "read_episode";
|
|
519
|
+
id: string;
|
|
520
|
+
detail: ReadEpisodeDetail;
|
|
521
|
+
output: string;
|
|
522
|
+
} | {
|
|
523
|
+
type: "ask_user";
|
|
524
|
+
question: string;
|
|
525
|
+
options?: string[] | undefined;
|
|
526
|
+
} | {
|
|
527
|
+
type: "orchestrator_usage";
|
|
528
|
+
turn: number;
|
|
529
|
+
inputTokens?: number;
|
|
530
|
+
outputTokens?: number;
|
|
531
|
+
} | {
|
|
532
|
+
type: "done";
|
|
533
|
+
output: string;
|
|
534
|
+
} | {
|
|
535
|
+
type: "session_saved";
|
|
536
|
+
sessionId: string;
|
|
537
|
+
} | {
|
|
538
|
+
type: "text_delta";
|
|
539
|
+
text: string;
|
|
540
|
+
};
|
|
541
|
+
type WorkerProgressEvent = {
|
|
542
|
+
kind: "model_start";
|
|
543
|
+
step: number;
|
|
544
|
+
maxSteps: number;
|
|
545
|
+
} | {
|
|
546
|
+
kind: "model_complete";
|
|
547
|
+
step: number;
|
|
548
|
+
actionType: "final" | "tool" | "tool_batch";
|
|
549
|
+
durationMs: number;
|
|
550
|
+
toolNames?: string[] | undefined;
|
|
551
|
+
publicRationale?: string | undefined;
|
|
552
|
+
missingPublicRationale?: boolean | undefined;
|
|
553
|
+
outputSummary?: string | undefined;
|
|
554
|
+
inputTokens?: number | undefined;
|
|
555
|
+
outputTokens?: number | undefined;
|
|
556
|
+
} | {
|
|
557
|
+
kind: "model_error";
|
|
558
|
+
step: number;
|
|
559
|
+
durationMs: number;
|
|
560
|
+
error: string;
|
|
561
|
+
} | {
|
|
562
|
+
kind: "tool_start";
|
|
563
|
+
step: number;
|
|
564
|
+
toolCallId: string;
|
|
565
|
+
toolName: string;
|
|
566
|
+
argsSummary?: string | undefined;
|
|
567
|
+
} | {
|
|
568
|
+
kind: "tool_complete";
|
|
569
|
+
step: number;
|
|
570
|
+
toolCallId: string;
|
|
571
|
+
toolName: string;
|
|
572
|
+
success: boolean;
|
|
573
|
+
durationMs: number;
|
|
574
|
+
outputSummary: string;
|
|
575
|
+
output?: string | undefined;
|
|
576
|
+
exitCode?: unknown;
|
|
577
|
+
} | {
|
|
578
|
+
kind: "tool_error";
|
|
579
|
+
step: number;
|
|
580
|
+
toolCallId: string;
|
|
581
|
+
toolName: string;
|
|
582
|
+
durationMs: number;
|
|
583
|
+
error: string;
|
|
584
|
+
} | {
|
|
585
|
+
kind: "worker_result";
|
|
586
|
+
status: "complete" | "incomplete" | "failed" | "interrupted";
|
|
587
|
+
stepsUsed: number;
|
|
588
|
+
summary: string;
|
|
589
|
+
};
|
|
590
|
+
interface RunWorkerConfig {
|
|
591
|
+
/** Original top-level task */
|
|
592
|
+
task?: string | undefined;
|
|
593
|
+
instruction: string;
|
|
594
|
+
/** Structured contract for this dispatch */
|
|
595
|
+
expectedOutput?: ExpectedOutputContract | undefined;
|
|
596
|
+
/** LCM-assembled context for this worker */
|
|
597
|
+
lcmContext?: AssembledContext | undefined;
|
|
598
|
+
/** Artifact ID -> file content */
|
|
599
|
+
inputArtifacts: Map<string, string>;
|
|
600
|
+
tools: Record<string, AnyTool>;
|
|
601
|
+
/** Tool registry with execute/artifact metadata for dispatch and episode projection */
|
|
602
|
+
toolRegistry: Map<string, Tool>;
|
|
603
|
+
maxSteps: number;
|
|
604
|
+
toolProvider: ToolProvider;
|
|
605
|
+
createModel: ModelFactory;
|
|
606
|
+
model: string;
|
|
607
|
+
workDir: string;
|
|
608
|
+
signal?: AbortSignal | undefined;
|
|
609
|
+
/** Extra text prefixed to worker system prompt */
|
|
610
|
+
systemPromptPrefix?: string | undefined;
|
|
611
|
+
/** Extra text appended to worker system prompt */
|
|
612
|
+
systemPromptSuffix?: string | undefined;
|
|
613
|
+
/** Provider options passed to generateText (e.g. reasoning config). */
|
|
614
|
+
providerOptions?: Record<string, unknown> | undefined;
|
|
615
|
+
/** Public orchestrator rationale that preceded this dispatch */
|
|
616
|
+
orchestratorContext?: string | undefined;
|
|
617
|
+
/** Tuple id for full-fidelity trace events */
|
|
618
|
+
tupleId?: string | undefined;
|
|
619
|
+
/** Optional diagnostic hook for streaming worker internals to the caller */
|
|
620
|
+
onProgress?: ((event: WorkerProgressEvent) => void) | undefined;
|
|
621
|
+
/** Optional full-fidelity trace hook for raw model/tool IO */
|
|
622
|
+
onTrace?: ((event: ArcTraceEvent) => void) | undefined;
|
|
623
|
+
/** Optional hook runner for PreToolUse/PostToolUse events */
|
|
624
|
+
hookRunner?: HookRunner | undefined;
|
|
625
|
+
}
|
|
626
|
+
interface WorkerResult {
|
|
627
|
+
transcript: AgentMessage[];
|
|
628
|
+
output: string | null;
|
|
629
|
+
status: "complete" | "incomplete" | "failed" | "interrupted";
|
|
630
|
+
stepsUsed: number;
|
|
631
|
+
/** Last message content for summary */
|
|
632
|
+
lastMessage: string;
|
|
633
|
+
/** All tool calls with results for orchestrator visibility */
|
|
634
|
+
actions: string[];
|
|
635
|
+
/** Artifacts touched during execution (from ToolResult.artifact) */
|
|
636
|
+
artifacts: ToolResultArtifact[];
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
interface ArcRunResult {
|
|
640
|
+
output: string;
|
|
641
|
+
events: ArcEvent[];
|
|
642
|
+
}
|
|
643
|
+
declare class ArcLoop {
|
|
644
|
+
private config;
|
|
645
|
+
private readonly transcriptStore;
|
|
646
|
+
private readonly vectorIndex;
|
|
647
|
+
private readonly scratchPad;
|
|
648
|
+
private readonly artifactStore;
|
|
649
|
+
private messageStore;
|
|
650
|
+
private summaryDAG;
|
|
651
|
+
private readonly createModel;
|
|
652
|
+
private readonly windowSize;
|
|
653
|
+
private readonly model;
|
|
654
|
+
/** Orchestrator tool schemas (for the model) */
|
|
655
|
+
private readonly orchestratorToolSchemas;
|
|
656
|
+
/** Orchestrator tool registry (for execute) — excludes dispatch/done (control flow) */
|
|
657
|
+
private readonly orchestratorToolRegistry;
|
|
658
|
+
/** Dispatcher deps + mutable state — shared with dispatcher.ts functions */
|
|
659
|
+
private readonly dispatchDeps;
|
|
660
|
+
private readonly dispatchState;
|
|
661
|
+
private orchestratorMessageIndex;
|
|
662
|
+
private turn;
|
|
663
|
+
private maxTurns;
|
|
664
|
+
/** Per-turn abort controller — cancelled by interrupt(), refreshed each turn. */
|
|
665
|
+
private turnController;
|
|
666
|
+
/** Resolver for the next task — set when the loop is waiting between tasks. */
|
|
667
|
+
private taskResolve;
|
|
668
|
+
constructor(config: ArcConfig);
|
|
669
|
+
/**
|
|
670
|
+
* Interrupt the current turn — cancels in-flight model calls and workers.
|
|
671
|
+
* The orchestrator loop stays alive and will prompt for user steering.
|
|
672
|
+
*/
|
|
673
|
+
interrupt(): void;
|
|
674
|
+
/** True when the loop is waiting for the next task (between done boundaries). */
|
|
675
|
+
get idle(): boolean;
|
|
676
|
+
/**
|
|
677
|
+
* Push a follow-up task into the loop. The orchestrator sees it as
|
|
678
|
+
* a new user message with full conversational context from prior tasks.
|
|
679
|
+
*/
|
|
680
|
+
pushTask(task: string): boolean;
|
|
681
|
+
private waitForNextTask;
|
|
682
|
+
/** Save session snapshot + update meta if a session store is configured. */
|
|
683
|
+
private saveSession;
|
|
684
|
+
/** Reset per-task state while keeping full conversation history. */
|
|
685
|
+
private resetForNewTask;
|
|
686
|
+
/**
|
|
687
|
+
* Stream events from the orchestration loop.
|
|
688
|
+
*/
|
|
689
|
+
stream(signal?: AbortSignal): AsyncGenerator<ArcEvent>;
|
|
690
|
+
/**
|
|
691
|
+
* Run a single task to completion (for headless/test use).
|
|
692
|
+
* Breaks after the first `done` event — does not wait for follow-up tasks.
|
|
693
|
+
*/
|
|
694
|
+
run(signal?: AbortSignal): Promise<ArcRunResult>;
|
|
695
|
+
/** Append a message to the LCM message store (single source of truth) */
|
|
696
|
+
private appendOrchestratorMessage;
|
|
697
|
+
private findEpisodeRecordBySummaryId;
|
|
698
|
+
private buildContext;
|
|
699
|
+
private buildOrchestratorMessages;
|
|
700
|
+
private buildTaskContextText;
|
|
701
|
+
private readEpisode;
|
|
702
|
+
/**
|
|
703
|
+
* Handle a turn interrupt: prompt user for steering, inject into context.
|
|
704
|
+
*/
|
|
705
|
+
private handleInterrupt;
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
/**
|
|
709
|
+
* Episode projection: minimal formatting for orchestrator context.
|
|
710
|
+
*
|
|
711
|
+
* The worker collects artifacts and actions during execution.
|
|
712
|
+
* This module just formats DispatchRecords for the orchestrator prompt.
|
|
713
|
+
*/
|
|
714
|
+
|
|
715
|
+
/** Format a single dispatch record for the orchestrator prompt */
|
|
716
|
+
declare function formatDispatchForPrompt(record: DispatchRecord, options?: {
|
|
717
|
+
compact?: boolean;
|
|
718
|
+
maxChars?: number;
|
|
719
|
+
}): string;
|
|
720
|
+
|
|
721
|
+
declare function cloneForTrace<T>(value: T): T;
|
|
722
|
+
|
|
723
|
+
/** In-memory transcript store for testing */
|
|
724
|
+
declare class MemoryTranscriptStore implements TranscriptStore {
|
|
725
|
+
private transcripts;
|
|
726
|
+
private byId;
|
|
727
|
+
append(transcript: Transcript): Promise<void>;
|
|
728
|
+
getAll(): Promise<Transcript[]>;
|
|
729
|
+
get(id: string): Promise<Transcript | null>;
|
|
730
|
+
}
|
|
731
|
+
/** In-memory vector index for testing (no actual embeddings) */
|
|
732
|
+
declare class MemoryVectorIndex implements VectorIndex {
|
|
733
|
+
private entries;
|
|
734
|
+
add(id: string, text: string): Promise<void>;
|
|
735
|
+
search(query: string, k: number): Promise<string[]>;
|
|
736
|
+
load(): Promise<void>;
|
|
737
|
+
save(): Promise<void>;
|
|
738
|
+
}
|
|
739
|
+
/** In-memory scratch pad for testing */
|
|
740
|
+
declare class MemoryScratchPad implements ScratchPad {
|
|
741
|
+
private entries;
|
|
742
|
+
write(key: string, content: string): Promise<void>;
|
|
743
|
+
read(key: string): Promise<string | null>;
|
|
744
|
+
list(): Promise<string[]>;
|
|
745
|
+
clear(): Promise<void>;
|
|
746
|
+
}
|
|
747
|
+
/** In-memory artifact store for testing */
|
|
748
|
+
declare class MemoryArtifactStore implements ArtifactStore {
|
|
749
|
+
private artifacts;
|
|
750
|
+
set(id: string, artifact: Artifact): Promise<void>;
|
|
751
|
+
get(id: string): Promise<Artifact | null>;
|
|
752
|
+
getAll(): Promise<Record<string, Artifact>>;
|
|
753
|
+
}
|
|
754
|
+
/** In-memory session store for testing */
|
|
755
|
+
declare class MemorySessionStore implements SessionStore {
|
|
756
|
+
private snapshots;
|
|
757
|
+
private metas;
|
|
758
|
+
load(id: string): Promise<SessionSnapshot | null>;
|
|
759
|
+
save(id: string, snapshot: SessionSnapshot): Promise<void>;
|
|
760
|
+
getMeta(id: string): Promise<SessionMeta | null>;
|
|
761
|
+
saveMeta(id: string, meta: SessionMeta): Promise<void>;
|
|
762
|
+
list(): Promise<SessionMeta[]>;
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
/**
|
|
766
|
+
* File-based transcript store.
|
|
767
|
+
* Stores transcripts as individual JSON files in a directory.
|
|
768
|
+
*/
|
|
769
|
+
declare class FsTranscriptStore implements TranscriptStore {
|
|
770
|
+
private readonly dir;
|
|
771
|
+
private readonly indexPath;
|
|
772
|
+
private index;
|
|
773
|
+
private loaded;
|
|
774
|
+
constructor(dir: string);
|
|
775
|
+
append(transcript: Transcript): Promise<void>;
|
|
776
|
+
getAll(): Promise<Transcript[]>;
|
|
777
|
+
get(id: string): Promise<Transcript | null>;
|
|
778
|
+
private ensureLoaded;
|
|
779
|
+
}
|
|
780
|
+
/**
|
|
781
|
+
* File-based artifact store.
|
|
782
|
+
* Stores artifacts in a single JSON file.
|
|
783
|
+
*/
|
|
784
|
+
declare class FsArtifactStore implements ArtifactStore {
|
|
785
|
+
private readonly filePath;
|
|
786
|
+
private artifacts;
|
|
787
|
+
private loaded;
|
|
788
|
+
constructor(filePath: string);
|
|
789
|
+
set(id: string, artifact: Artifact): Promise<void>;
|
|
790
|
+
get(id: string): Promise<Artifact | null>;
|
|
791
|
+
getAll(): Promise<Record<string, Artifact>>;
|
|
792
|
+
private ensureLoaded;
|
|
793
|
+
private save;
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
export { AnyTool, type ArcConfig, type ArcEvent, ArcLoop, type ArcRunResult, type ArcTraceEvent, type Artifact, type ArtifactStore, type DispatchRecord, type DispatchTier, type ExpectedArtifact, type ExpectedOutputContract, FsArtifactStore, FsTranscriptStore, MemoryArtifactStore, MemoryMessageStore, MemoryScratchPad, MemorySessionStore, MemorySummaryDAG, MemoryTranscriptStore, MemoryVectorIndex, type MessageStore, ModelFactory, type OodaSnapshot, type OrchestratorContext, type ReadEpisodeArgs, type ReadEpisodeDetail, type RunWorkerConfig, type ScratchPad, type SessionMeta, type SessionSnapshot, type SessionStore, type StoredMessage, type SummaryDAG, type SummaryNode, type Tool, ToolProvider, ToolResult, ToolResultArtifact, type TraceToolCall, type Transcript, type TranscriptStore, type Tuple, type VectorIndex, type WorkerProgressEvent, type WorkerResult, cloneForTrace, formatDispatchForPrompt };
|