@bluecopa/harness 0.0.1 → 0.1.0-snapshot.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,215 @@
1
+ import type { AgentMessage, AgentAction } from '../agent/types';
2
+ import type { ToolProvider } from '../interfaces/tool-provider';
3
+ import type { SandboxProvider } from '../interfaces/sandbox-provider';
4
+ import type { Tool } from 'ai';
5
+ import type { HarnessTelemetry } from '../observability/otel';
6
+ import type { HookRunner } from '../hooks/hook-runner';
7
+ import type { PermissionManager } from '../permissions/permission-manager';
8
+ import type { SkillManager } from '../skills/skill-manager';
9
+ import type { ToolCallAction } from '../agent/types';
10
+ import type { ToolResult } from '../interfaces/tool-provider';
11
+
12
+ // ── Episode types ──
13
+
14
+ export interface Episode {
15
+ id: string;
16
+ taskId: string;
17
+ sessionId: string;
18
+ index: number;
19
+ threadAction: string;
20
+ summary: string;
21
+ toolCalls: string[];
22
+ filesRead: string[];
23
+ filesModified: string[];
24
+ model: string;
25
+ steps: number;
26
+ success: boolean;
27
+ createdAt: number;
28
+ parentEpisodeIds: string[];
29
+ }
30
+
31
+ export interface EpisodeTrace {
32
+ episodeId: string;
33
+ messages: AgentMessage[];
34
+ createdAt: number;
35
+ ttl?: number;
36
+ }
37
+
38
+ // ── Session memo types ──
39
+
40
+ export interface SessionMemo {
41
+ id: string;
42
+ sessionId: string;
43
+ content: string;
44
+ sourceEpisodeIds: string[];
45
+ createdAt: number;
46
+ }
47
+
48
+ // ── Long-term memory types ──
49
+
50
+ export interface LongTermMemory {
51
+ id: string;
52
+ content: string;
53
+ category: string;
54
+ sourceSessionMemoIds: string[];
55
+ createdAt: number;
56
+ updatedAt: number;
57
+ }
58
+
59
+ // ── Store interfaces ──
60
+
61
+ export interface EpisodeStore {
62
+ addEpisode(episode: Episode): Promise<void>;
63
+ addTrace(trace: EpisodeTrace): Promise<void>;
64
+ getEpisode(id: string): Promise<Episode | null>;
65
+ getTrace(episodeId: string): Promise<EpisodeTrace | null>;
66
+ getEpisodesByTask(taskId: string): Promise<Episode[]>;
67
+ getEpisodesBySession(sessionId: string): Promise<Episode[]>;
68
+ getRecentEpisodes(limit: number): Promise<Episode[]>;
69
+ evictTraces(olderThan: number): Promise<number>;
70
+ }
71
+
72
+ export interface SessionMemoStore {
73
+ addMemo(memo: SessionMemo): Promise<void>;
74
+ getMemo(id: string): Promise<SessionMemo | null>;
75
+ getMemosBySession(sessionId: string): Promise<SessionMemo[]>;
76
+ getRecentMemos(limit: number): Promise<SessionMemo[]>;
77
+ }
78
+
79
+ export interface LongTermStore {
80
+ addMemory(memory: LongTermMemory): Promise<void>;
81
+ getMemory(id: string): Promise<LongTermMemory | null>;
82
+ getAllMemories(): Promise<LongTermMemory[]>;
83
+ getMemoriesByCategory(category: string): Promise<LongTermMemory[]>;
84
+ updateMemory(id: string, updates: Partial<Pick<LongTermMemory, 'content' | 'category' | 'updatedAt'>>): Promise<void>;
85
+ deleteMemory(id: string): Promise<void>;
86
+ }
87
+
88
+ // ── Thread types ──
89
+
90
+ export interface ThreadRequest {
91
+ action: string;
92
+ contextEpisodeIds?: string[];
93
+ model?: string;
94
+ maxSteps?: number;
95
+ }
96
+
97
+ export interface ThreadResult {
98
+ episode: Episode;
99
+ success: boolean;
100
+ error?: string;
101
+ /** Actual wall-clock duration of this individual thread in ms. */
102
+ durationMs?: number;
103
+ /** Resolved model ID (e.g. 'claude-haiku-4-5'). */
104
+ resolvedModel?: string;
105
+ }
106
+
107
+ // ── Model tiers ──
108
+
109
+ export type ModelTier = 'fast' | 'medium' | 'strong';
110
+
111
+ /** Default model IDs for each tier. Override via ArcLoopConfig.modelMap. */
112
+ export const DEFAULT_MODEL_MAP: Record<ModelTier, string> = {
113
+ fast: 'claude-haiku-4-5',
114
+ medium: 'claude-sonnet-4-5',
115
+ strong: 'claude-opus-4-5',
116
+ };
117
+
118
+ /** Resolve a model tier name or raw model ID to a concrete model ID. */
119
+ export function resolveModel(modelOrTier: string | undefined, modelMap: Record<ModelTier, string>, fallback: string): string {
120
+ if (!modelOrTier) return fallback;
121
+ if (modelOrTier in modelMap) return modelMap[modelOrTier as ModelTier];
122
+ return modelOrTier; // raw model ID passthrough
123
+ }
124
+
125
+ // ── ArcLoop config ──
126
+
127
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
128
+ type AnyTool = Tool<any, any>;
129
+
130
+ export interface ArcLoopConfig {
131
+ /** Orchestrator model (default: 'claude-opus-4-6'). Accepts a model ID or tier name. */
132
+ model?: string;
133
+ /** Custom orchestrator system prompt */
134
+ systemPrompt?: string;
135
+ /** Anthropic API key */
136
+ apiKey?: string;
137
+ /** Default model for threads (default: 'medium' → claude-sonnet-4-6). Accepts a model ID or tier name. */
138
+ threadModel?: string;
139
+ /** Model tier mapping. Override to use different models for fast/medium/strong. */
140
+ modelMap?: Record<ModelTier, string>;
141
+ /** Tools available to threads (default: builtinTools) */
142
+ threadTools?: Record<string, AnyTool>;
143
+ /** Thread concurrency limit (default: 3) */
144
+ maxConcurrency?: number;
145
+ /** Max orchestrator turns before stopping (default: 20) */
146
+ maxOrchestratorTurns?: number;
147
+ /** Per-thread timeout in ms (default: 120000) */
148
+ threadTimeout?: number;
149
+ /** Per-thread max steps (default: 20) */
150
+ threadMaxSteps?: number;
151
+
152
+ // Store dependencies
153
+ episodeStore: EpisodeStore;
154
+ sessionMemoStore: SessionMemoStore;
155
+ longTermStore: LongTermStore;
156
+
157
+ // Task/session context
158
+ taskId: string;
159
+ sessionId: string;
160
+
161
+ /** Episode compression strategy (default: 'template') */
162
+ compressor?: 'template' | 'llm';
163
+
164
+ /** Extra orchestrator tools beyond Thread (for dynadocs Task compatibility) */
165
+ extraOrchestratorTools?: Record<string, AnyTool>;
166
+ /** Handler for extra orchestrator tools. Return an AgentAction (typically FinalAction with directive). */
167
+ onOrchestratorTool?: (name: string, args: Record<string, unknown>) => Promise<AgentAction>;
168
+
169
+ /** Tool provider for thread execution (default — local filesystem, sandbox in browser) */
170
+ toolProvider: ToolProvider;
171
+ /** Tool provider for skill-matched threads (sandbox with packages pre-installed). If not set, all threads use toolProvider. */
172
+ skillToolProvider?: ToolProvider;
173
+ /** E2B executor instance — needed for binary file transfers (CopyToLocal for docx, xlsx, etc.) */
174
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
175
+ executor?: any;
176
+ /** Local directory to sync sandbox output artifacts to (default: './outputs') */
177
+ localOutputDir?: string;
178
+
179
+ // ── Thread runtime extras (passed through to each thread's createAgent) ──
180
+
181
+ /** Sandbox provider for skill execution within threads */
182
+ sandboxProvider?: SandboxProvider;
183
+ /** Skill manager for thread agents */
184
+ skillManager?: SkillManager;
185
+ /** Path to skill index JSON */
186
+ skillIndexPath?: string;
187
+ /** Callback for threads to ask the user a question */
188
+ askUser?: (question: string, options?: string[]) => Promise<string>;
189
+ /** Callback for threads to display a message to the user */
190
+ tellUser?: (message: string) => Promise<void>;
191
+ /** Callback for threads to download a file from sandbox */
192
+ downloadRawFile?: (path: string) => Promise<string>;
193
+ /** OpenTelemetry-style tracing for thread agents */
194
+ telemetry?: HarnessTelemetry;
195
+ /** Lifecycle hooks for thread tool calls */
196
+ hookRunner?: HookRunner;
197
+ /** Permission manager for thread tool access */
198
+ permissionManager?: PermissionManager;
199
+ /** Custom tool executor for threads. Return null to fall through to built-in dispatch. */
200
+ executeToolAction?: (action: ToolCallAction) => Promise<ToolResult | null>;
201
+ /** Progress callback fired for each tool call inside threads (tool_start/tool_end with thread context). */
202
+ onThreadToolProgress?: (event: { threadIndex: number; threadAction: string } & ({ type: 'tool_start'; name: string; args: Record<string, unknown> } | { type: 'tool_end'; name: string; success: boolean; durationMs: number })) => void;
203
+ }
204
+
205
+ // ── Debug export ──
206
+
207
+ export interface DebugExport {
208
+ taskId?: string;
209
+ sessionId?: string;
210
+ exportedAt: number;
211
+ episodes: Episode[];
212
+ traces: EpisodeTrace[];
213
+ sessionMemos: SessionMemo[];
214
+ longTermMemories: LongTermMemory[];
215
+ }
@@ -0,0 +1,170 @@
1
+ import { tool } from 'ai';
2
+ import { z } from 'zod';
3
+ import { readFile, writeFile, mkdir } from 'node:fs/promises';
4
+ import { dirname } from 'node:path';
5
+ import type { ToolProvider, ToolResult } from '../interfaces/tool-provider';
6
+ import type { ToolCallAction } from '../agent/types';
7
+
8
+ // ── Bridge tool definitions (LLM-facing schemas) ──
9
+
10
+ export const bridgeTools = {
11
+ ReadSandbox: tool({
12
+ description: 'Read a file from the sandbox filesystem (where code execution happens)',
13
+ inputSchema: z.object({
14
+ path: z.string().describe('Absolute path in the sandbox filesystem'),
15
+ }),
16
+ }),
17
+ WriteSandbox: tool({
18
+ description: 'Write content to a file in the sandbox filesystem',
19
+ inputSchema: z.object({
20
+ path: z.string().describe('Absolute path in the sandbox filesystem'),
21
+ content: z.string().describe('Content to write'),
22
+ }),
23
+ }),
24
+ CopyToSandbox: tool({
25
+ description: 'Copy a file from the local project to the sandbox. Use this to stage files for processing by sandbox tools (e.g. data files for a Python script).',
26
+ inputSchema: z.object({
27
+ localPath: z.string().describe('Path on the local filesystem (source)'),
28
+ sandboxPath: z.string().describe('Path in the sandbox (destination)'),
29
+ }),
30
+ }),
31
+ CopyToLocal: tool({
32
+ description: 'Copy a file from the sandbox to the local project. Use this to retrieve generated output files (docx, xlsx, pdf, images, etc.).',
33
+ inputSchema: z.object({
34
+ sandboxPath: z.string().describe('Path in the sandbox (source)'),
35
+ localPath: z.string().describe('Path on the local filesystem (destination)'),
36
+ }),
37
+ }),
38
+ };
39
+
40
+ // ── Bridge tool executor ──
41
+
42
+ export interface BridgeExecutorConfig {
43
+ localProvider: ToolProvider;
44
+ sandboxProvider: ToolProvider;
45
+ /** E2B executor with readFileBytes/writeFileBytes for binary transfers */
46
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
47
+ executor?: any;
48
+ }
49
+
50
+ /**
51
+ * Creates an executeToolAction handler for bridge tools.
52
+ * Returns null for non-bridge tools (falls through to built-in dispatch).
53
+ */
54
+ export function createBridgeExecutor(config: BridgeExecutorConfig) {
55
+ return async (action: ToolCallAction): Promise<ToolResult | null> => {
56
+ if (action.name === 'ReadSandbox') {
57
+ return config.sandboxProvider.readFile(String(action.args.path ?? ''));
58
+ }
59
+
60
+ if (action.name === 'WriteSandbox') {
61
+ return config.sandboxProvider.writeFile(
62
+ String(action.args.path ?? ''),
63
+ String(action.args.content ?? ''),
64
+ );
65
+ }
66
+
67
+ if (action.name === 'CopyToSandbox') {
68
+ const localPath = String(action.args.localPath ?? '');
69
+ const sandboxPath = String(action.args.sandboxPath ?? '');
70
+ try {
71
+ // Read from local
72
+ const localResult = await config.localProvider.readFile(localPath);
73
+ if (!localResult.success) {
74
+ return { success: false, output: '', error: `Failed to read local file: ${localResult.error}` };
75
+ }
76
+ // Write to sandbox
77
+ const writeResult = await config.sandboxProvider.writeFile(sandboxPath, localResult.output);
78
+ if (!writeResult.success) {
79
+ return { success: false, output: '', error: `Failed to write to sandbox: ${writeResult.error}` };
80
+ }
81
+ return { success: true, output: `Copied ${localPath} → sandbox:${sandboxPath} (${localResult.output.length} chars)` };
82
+ } catch (error) {
83
+ return { success: false, output: '', error: `CopyToSandbox failed: ${error instanceof Error ? error.message : String(error)}` };
84
+ }
85
+ }
86
+
87
+ if (action.name === 'CopyToLocal') {
88
+ const sandboxPath = String(action.args.sandboxPath ?? '');
89
+ const localPath = String(action.args.localPath ?? '');
90
+ try {
91
+ // Try binary transfer if executor supports readFileBytes
92
+ if (config.executor?.readFileBytes) {
93
+ const bytes: Uint8Array = await config.executor.readFileBytes(sandboxPath);
94
+ await mkdir(dirname(localPath), { recursive: true });
95
+ await writeFile(localPath, Buffer.from(bytes));
96
+ return { success: true, output: `Copied sandbox:${sandboxPath} → ${localPath} (${bytes.byteLength} bytes)` };
97
+ }
98
+
99
+ // Fallback: text-based transfer via provider
100
+ const sandboxResult = await config.sandboxProvider.readFile(sandboxPath);
101
+ if (!sandboxResult.success) {
102
+ return { success: false, output: '', error: `Failed to read sandbox file: ${sandboxResult.error}` };
103
+ }
104
+ await mkdir(dirname(localPath), { recursive: true });
105
+ await writeFile(localPath, sandboxResult.output, 'utf-8');
106
+ return { success: true, output: `Copied sandbox:${sandboxPath} → ${localPath} (${sandboxResult.output.length} chars)` };
107
+ } catch (error) {
108
+ return { success: false, output: '', error: `CopyToLocal failed: ${error instanceof Error ? error.message : String(error)}` };
109
+ }
110
+ }
111
+
112
+ // Not a bridge tool — fall through to built-in dispatch
113
+ return null;
114
+ };
115
+ }
116
+
117
+ // ── Auto-sync: copy output artifacts from sandbox to local after thread completes ──
118
+
119
+ const ARTIFACT_EXTENSIONS = /\.(docx|xlsx|pptx|csv|pdf|png|jpg|jpeg|gif|zip|tar|gz)$/i;
120
+
121
+ /**
122
+ * Scan sandbox output directory and copy any artifact files to local.
123
+ * Called after a skill thread completes.
124
+ */
125
+ export async function syncArtifactsToLocal(
126
+ sandboxProvider: ToolProvider,
127
+ localOutputDir: string,
128
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
129
+ executor?: any,
130
+ sandboxOutputDir = '/outputs',
131
+ ): Promise<string[]> {
132
+ const copied: string[] = [];
133
+
134
+ try {
135
+ // List files in sandbox output directory
136
+ const listResult = await sandboxProvider.bash(
137
+ `find ${sandboxOutputDir} -maxdepth 1 -type f 2>/dev/null | head -50`,
138
+ );
139
+ if (!listResult.success || !listResult.output.trim()) return copied;
140
+
141
+ const files = listResult.output.trim().split('\n').filter(Boolean);
142
+
143
+ for (const sandboxPath of files) {
144
+ const filename = sandboxPath.split('/').pop();
145
+ if (!filename || !ARTIFACT_EXTENSIONS.test(filename)) continue;
146
+
147
+ const localPath = `${localOutputDir}/${filename}`;
148
+ try {
149
+ if (executor?.readFileBytes) {
150
+ const bytes: Uint8Array = await executor.readFileBytes(sandboxPath);
151
+ await mkdir(localOutputDir, { recursive: true });
152
+ await writeFile(localPath, Buffer.from(bytes));
153
+ } else {
154
+ // Text fallback
155
+ const result = await sandboxProvider.readFile(sandboxPath);
156
+ if (!result.success) continue;
157
+ await mkdir(localOutputDir, { recursive: true });
158
+ await writeFile(localPath, result.output, 'utf-8');
159
+ }
160
+ copied.push(localPath);
161
+ } catch {
162
+ // Skip files that fail to copy
163
+ }
164
+ }
165
+ } catch {
166
+ // Non-critical — artifacts are bonus
167
+ }
168
+
169
+ return copied;
170
+ }
@@ -0,0 +1,80 @@
1
+ import type {
2
+ BashOptions,
3
+ GlobOptions,
4
+ GrepOptions,
5
+ ReadOptions,
6
+ ToolProvider,
7
+ ToolProviderCapabilities,
8
+ ToolResult,
9
+ WebFetchOptions,
10
+ BatchOp,
11
+ BatchResult,
12
+ } from '../interfaces/tool-provider';
13
+
14
+ /**
15
+ * A tool provider that routes operations to two underlying providers:
16
+ * - Local provider: Read, Write, Edit, Glob, Grep (project filesystem)
17
+ * - Sandbox provider: Bash (code execution in sandbox VM)
18
+ *
19
+ * Used for skill threads where code runs in sandbox (packages pre-installed)
20
+ * but file operations target the local project.
21
+ */
22
+ export class BridgedToolProvider implements ToolProvider {
23
+ constructor(
24
+ private readonly local: ToolProvider,
25
+ private readonly sandbox: ToolProvider,
26
+ ) {}
27
+
28
+ capabilities(): ToolProviderCapabilities {
29
+ const localCaps = this.local.capabilities();
30
+ const sandboxCaps = this.sandbox.capabilities();
31
+ return {
32
+ bash: sandboxCaps.bash,
33
+ fileSystem: localCaps.fileSystem,
34
+ webFetch: localCaps.webFetch || sandboxCaps.webFetch,
35
+ webSearch: localCaps.webSearch || sandboxCaps.webSearch,
36
+ codeExecution: sandboxCaps.codeExecution,
37
+ sandboxed: false, // mixed — not fully sandboxed
38
+ };
39
+ }
40
+
41
+ // Bash → sandbox (where packages are installed)
42
+ bash(command: string, options?: BashOptions): Promise<ToolResult> {
43
+ return this.sandbox.bash(command, options);
44
+ }
45
+
46
+ // File operations → local project
47
+ readFile(path: string, options?: ReadOptions): Promise<ToolResult> {
48
+ return this.local.readFile(path, options);
49
+ }
50
+
51
+ writeFile(path: string, content: string): Promise<ToolResult> {
52
+ return this.local.writeFile(path, content);
53
+ }
54
+
55
+ editFile(path: string, oldText: string, newText: string): Promise<ToolResult> {
56
+ return this.local.editFile(path, oldText, newText);
57
+ }
58
+
59
+ glob(pattern: string, options?: GlobOptions): Promise<ToolResult> {
60
+ return this.local.glob(pattern, options);
61
+ }
62
+
63
+ grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult> {
64
+ return this.local.grep(pattern, path, options);
65
+ }
66
+
67
+ // Web operations → whichever provider has them
68
+ get webFetch(): ((options: WebFetchOptions) => Promise<ToolResult>) | undefined {
69
+ return this.local.webFetch ?? this.sandbox.webFetch;
70
+ }
71
+
72
+ get webSearch(): ((query: string) => Promise<ToolResult>) | undefined {
73
+ return this.local.webSearch ?? this.sandbox.webSearch;
74
+ }
75
+
76
+ // No batch — mixed providers can't batch together
77
+ get batch(): undefined {
78
+ return undefined;
79
+ }
80
+ }
@@ -0,0 +1,118 @@
1
+ import { randomUUID } from 'node:crypto';
2
+ import type { Episode, SessionMemo, LongTermMemory, EpisodeStore, SessionMemoStore, LongTermStore } from './arc-types';
3
+
4
+ /**
5
+ * Consolidate episodes into a session memo.
6
+ * Called at task boundaries (async, non-blocking).
7
+ * Distills key learnings from a set of episodes into a compact memo.
8
+ */
9
+ export async function consolidateEpisodes(
10
+ episodes: Episode[],
11
+ sessionId: string,
12
+ sessionMemoStore: SessionMemoStore,
13
+ ): Promise<SessionMemo> {
14
+ if (episodes.length === 0) {
15
+ throw new Error('Cannot consolidate zero episodes');
16
+ }
17
+
18
+ // Template-based consolidation: extract key patterns from episodes
19
+ const filesModified = new Set<string>();
20
+ const toolsUsed = new Set<string>();
21
+ const actions: string[] = [];
22
+ let successCount = 0;
23
+
24
+ for (const ep of episodes) {
25
+ for (const f of ep.filesModified) filesModified.add(f);
26
+ for (const t of ep.toolCalls) toolsUsed.add(t);
27
+ actions.push(`${ep.index}. ${ep.threadAction} (${ep.success ? 'ok' : 'failed'})`);
28
+ if (ep.success) successCount++;
29
+ }
30
+
31
+ const parts: string[] = [
32
+ `Task summary (${episodes.length} threads, ${successCount} succeeded):`,
33
+ ...actions,
34
+ ];
35
+
36
+ if (filesModified.size > 0) {
37
+ parts.push(`Files modified: ${[...filesModified].join(', ')}`);
38
+ }
39
+ if (toolsUsed.size > 0) {
40
+ parts.push(`Tools used: ${[...toolsUsed].join(', ')}`);
41
+ }
42
+
43
+ const memo: SessionMemo = {
44
+ id: randomUUID(),
45
+ sessionId,
46
+ content: parts.join('\n'),
47
+ sourceEpisodeIds: episodes.map(e => e.id),
48
+ createdAt: Date.now(),
49
+ };
50
+
51
+ await sessionMemoStore.addMemo(memo);
52
+ return memo;
53
+ }
54
+
55
+ /**
56
+ * Consolidate session memos into long-term memories.
57
+ * Called at session boundaries (async, non-blocking).
58
+ * Extracts durable patterns/learnings from session work.
59
+ */
60
+ export async function consolidateMemos(
61
+ memos: SessionMemo[],
62
+ longTermStore: LongTermStore,
63
+ ): Promise<LongTermMemory[]> {
64
+ if (memos.length === 0) return [];
65
+
66
+ // Check for duplicate consolidation
67
+ const existingMemories = await longTermStore.getAllMemories();
68
+ const existingSourceIds = new Set<string>();
69
+ for (const mem of existingMemories) {
70
+ for (const id of mem.sourceSessionMemoIds) {
71
+ existingSourceIds.add(id);
72
+ }
73
+ }
74
+
75
+ // Filter out already-consolidated memos
76
+ const newMemos = memos.filter(m => !existingSourceIds.has(m.id));
77
+ if (newMemos.length === 0) return [];
78
+
79
+ // Template-based: create one long-term memory summarizing the session
80
+ const now = Date.now();
81
+ const sessionContent = newMemos.map(m => m.content).join('\n---\n');
82
+
83
+ const memory: LongTermMemory = {
84
+ id: randomUUID(),
85
+ content: `Session work:\n${sessionContent}`,
86
+ category: 'session-summary',
87
+ sourceSessionMemoIds: newMemos.map(m => m.id),
88
+ createdAt: now,
89
+ updatedAt: now,
90
+ };
91
+
92
+ await longTermStore.addMemory(memory);
93
+ return [memory];
94
+ }
95
+
96
+ /**
97
+ * Full consolidation pipeline: episodes → memo → long-term.
98
+ * Designed to be called as fire-and-forget at task/session boundaries.
99
+ */
100
+ export async function runConsolidation(
101
+ taskId: string,
102
+ sessionId: string,
103
+ episodeStore: EpisodeStore,
104
+ sessionMemoStore: SessionMemoStore,
105
+ longTermStore: LongTermStore,
106
+ ): Promise<void> {
107
+ // 1. Consolidate task episodes into session memo
108
+ const episodes = await episodeStore.getEpisodesByTask(taskId);
109
+ if (episodes.length > 0) {
110
+ await consolidateEpisodes(episodes, sessionId, sessionMemoStore);
111
+ }
112
+
113
+ // 2. Consolidate session memos into long-term memory
114
+ const memos = await sessionMemoStore.getMemosBySession(sessionId);
115
+ if (memos.length > 0) {
116
+ await consolidateMemos(memos, longTermStore);
117
+ }
118
+ }
@@ -0,0 +1,80 @@
1
+ import { createAgent, type AgentRuntime } from '../agent/create-agent';
2
+ import type { AgentMessage, AgentStreamEvent } from '../agent/types';
3
+ import type { ArcLoopConfig } from './arc-types';
4
+ import { ArcLoop } from './arc-loop';
5
+ import { runConsolidation } from './consolidation';
6
+
7
+ export interface ArcAgentConfig extends ArcLoopConfig {
8
+ /** Max steps for the outer agent loop (default: 1, since ArcLoop runs internally) */
9
+ maxOuterSteps?: number;
10
+ }
11
+
12
+ /**
13
+ * Create an agent powered by the ArcLoop orchestrator.
14
+ *
15
+ * The returned agent has the same interface as a regular `createAgent()` agent
16
+ * (run/stream), but internally uses the orchestrator → thread architecture.
17
+ *
18
+ * Consolidation runs automatically in the background after each run() completes.
19
+ *
20
+ * Note: `stream()` drives ArcLoop.streamAction() directly rather than going
21
+ * through createAgent's stream(), because ArcLoop executes Thread tool calls
22
+ * internally — createAgent would try to re-execute them via the toolProvider.
23
+ */
24
+ export function createArcAgent(config: ArcAgentConfig) {
25
+ const arcLoop = new ArcLoop(config);
26
+
27
+ const runtime: AgentRuntime = {
28
+ toolProvider: config.toolProvider,
29
+ loop: arcLoop,
30
+ // ArcLoop.nextAction() runs the full orchestration internally,
31
+ // so the outer agent only needs 1 step.
32
+ maxSteps: config.maxOuterSteps ?? 1,
33
+ };
34
+
35
+ // run() uses createAgent which calls arcLoop.nextAction() → returns FinalAction. Works.
36
+ const agent = createAgent(runtime);
37
+
38
+ function fireConsolidation(): void {
39
+ runConsolidation(
40
+ config.taskId,
41
+ config.sessionId,
42
+ config.episodeStore,
43
+ config.sessionMemoStore,
44
+ config.longTermStore,
45
+ ).catch(() => {
46
+ // Consolidation failure is non-critical
47
+ });
48
+ }
49
+
50
+ return {
51
+ async run(prompt: string, options?: { history?: AgentMessage[] }) {
52
+ const result = await agent.run(prompt, options);
53
+ fireConsolidation();
54
+ return result;
55
+ },
56
+
57
+ /**
58
+ * Stream orchestration events directly from ArcLoop.
59
+ *
60
+ * Yields the standard AgentStreamEvent types:
61
+ * - text_delta: orchestrator reasoning
62
+ * - tool_start/tool_end: thread dispatch & completion (name='Thread')
63
+ * - step_start/step_end: orchestrator turns
64
+ * - done: orchestration complete
65
+ */
66
+ async *stream(
67
+ prompt: string,
68
+ options?: { history?: AgentMessage[] },
69
+ ): AsyncGenerator<AgentStreamEvent> {
70
+ const history = options?.history ?? [];
71
+ const messages: AgentMessage[] = [
72
+ ...history,
73
+ { role: 'user', content: prompt },
74
+ ];
75
+
76
+ yield* arcLoop.streamAction(messages);
77
+ fireConsolidation();
78
+ },
79
+ };
80
+ }