@bluecopa/harness 0.0.1 → 0.1.0-snapshot.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -0
- package/package.json +2 -1
- package/src/agent/create-agent.ts +9 -0
- package/src/agent/types.ts +15 -2
- package/src/arc/arc-loop.ts +395 -0
- package/src/arc/arc-types.ts +215 -0
- package/src/arc/bridge-tools.ts +170 -0
- package/src/arc/bridged-tool-provider.ts +80 -0
- package/src/arc/consolidation.ts +118 -0
- package/src/arc/create-arc-agent.ts +80 -0
- package/src/arc/debug.ts +62 -0
- package/src/arc/episode-compressor.ts +151 -0
- package/src/arc/object-store/fs-object-store.ts +60 -0
- package/src/arc/object-store/memory-object-store.ts +41 -0
- package/src/arc/object-store/object-store.ts +12 -0
- package/src/arc/stores/episode-store.ts +120 -0
- package/src/arc/stores/long-term-store.ts +86 -0
- package/src/arc/stores/rxdb-setup.ts +112 -0
- package/src/arc/stores/session-memo-store.ts +58 -0
- package/src/arc/thread-executor.ts +365 -0
- package/src/arc/thread-tool.ts +29 -0
- package/src/loop/context-store.ts +12 -9
- package/src/loop/vercel-agent-loop.ts +12 -6
- package/tests/integration/agent-skill-default-from-sandbox.spec.ts +3 -2
- package/tests/unit/structured-messages.spec.ts +1 -1
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
import type { AgentMessage, AgentAction } from '../agent/types';
|
|
2
|
+
import type { ToolProvider } from '../interfaces/tool-provider';
|
|
3
|
+
import type { SandboxProvider } from '../interfaces/sandbox-provider';
|
|
4
|
+
import type { Tool } from 'ai';
|
|
5
|
+
import type { HarnessTelemetry } from '../observability/otel';
|
|
6
|
+
import type { HookRunner } from '../hooks/hook-runner';
|
|
7
|
+
import type { PermissionManager } from '../permissions/permission-manager';
|
|
8
|
+
import type { SkillManager } from '../skills/skill-manager';
|
|
9
|
+
import type { ToolCallAction } from '../agent/types';
|
|
10
|
+
import type { ToolResult } from '../interfaces/tool-provider';
|
|
11
|
+
|
|
12
|
+
// ── Episode types ──
|
|
13
|
+
|
|
14
|
+
export interface Episode {
|
|
15
|
+
id: string;
|
|
16
|
+
taskId: string;
|
|
17
|
+
sessionId: string;
|
|
18
|
+
index: number;
|
|
19
|
+
threadAction: string;
|
|
20
|
+
summary: string;
|
|
21
|
+
toolCalls: string[];
|
|
22
|
+
filesRead: string[];
|
|
23
|
+
filesModified: string[];
|
|
24
|
+
model: string;
|
|
25
|
+
steps: number;
|
|
26
|
+
success: boolean;
|
|
27
|
+
createdAt: number;
|
|
28
|
+
parentEpisodeIds: string[];
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface EpisodeTrace {
|
|
32
|
+
episodeId: string;
|
|
33
|
+
messages: AgentMessage[];
|
|
34
|
+
createdAt: number;
|
|
35
|
+
ttl?: number;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// ── Session memo types ──
|
|
39
|
+
|
|
40
|
+
export interface SessionMemo {
|
|
41
|
+
id: string;
|
|
42
|
+
sessionId: string;
|
|
43
|
+
content: string;
|
|
44
|
+
sourceEpisodeIds: string[];
|
|
45
|
+
createdAt: number;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ── Long-term memory types ──
|
|
49
|
+
|
|
50
|
+
export interface LongTermMemory {
|
|
51
|
+
id: string;
|
|
52
|
+
content: string;
|
|
53
|
+
category: string;
|
|
54
|
+
sourceSessionMemoIds: string[];
|
|
55
|
+
createdAt: number;
|
|
56
|
+
updatedAt: number;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ── Store interfaces ──
|
|
60
|
+
|
|
61
|
+
export interface EpisodeStore {
|
|
62
|
+
addEpisode(episode: Episode): Promise<void>;
|
|
63
|
+
addTrace(trace: EpisodeTrace): Promise<void>;
|
|
64
|
+
getEpisode(id: string): Promise<Episode | null>;
|
|
65
|
+
getTrace(episodeId: string): Promise<EpisodeTrace | null>;
|
|
66
|
+
getEpisodesByTask(taskId: string): Promise<Episode[]>;
|
|
67
|
+
getEpisodesBySession(sessionId: string): Promise<Episode[]>;
|
|
68
|
+
getRecentEpisodes(limit: number): Promise<Episode[]>;
|
|
69
|
+
evictTraces(olderThan: number): Promise<number>;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export interface SessionMemoStore {
|
|
73
|
+
addMemo(memo: SessionMemo): Promise<void>;
|
|
74
|
+
getMemo(id: string): Promise<SessionMemo | null>;
|
|
75
|
+
getMemosBySession(sessionId: string): Promise<SessionMemo[]>;
|
|
76
|
+
getRecentMemos(limit: number): Promise<SessionMemo[]>;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export interface LongTermStore {
|
|
80
|
+
addMemory(memory: LongTermMemory): Promise<void>;
|
|
81
|
+
getMemory(id: string): Promise<LongTermMemory | null>;
|
|
82
|
+
getAllMemories(): Promise<LongTermMemory[]>;
|
|
83
|
+
getMemoriesByCategory(category: string): Promise<LongTermMemory[]>;
|
|
84
|
+
updateMemory(id: string, updates: Partial<Pick<LongTermMemory, 'content' | 'category' | 'updatedAt'>>): Promise<void>;
|
|
85
|
+
deleteMemory(id: string): Promise<void>;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ── Thread types ──
|
|
89
|
+
|
|
90
|
+
export interface ThreadRequest {
|
|
91
|
+
action: string;
|
|
92
|
+
contextEpisodeIds?: string[];
|
|
93
|
+
model?: string;
|
|
94
|
+
maxSteps?: number;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export interface ThreadResult {
|
|
98
|
+
episode: Episode;
|
|
99
|
+
success: boolean;
|
|
100
|
+
error?: string;
|
|
101
|
+
/** Actual wall-clock duration of this individual thread in ms. */
|
|
102
|
+
durationMs?: number;
|
|
103
|
+
/** Resolved model ID (e.g. 'claude-haiku-4-5'). */
|
|
104
|
+
resolvedModel?: string;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// ── Model tiers ──
|
|
108
|
+
|
|
109
|
+
export type ModelTier = 'fast' | 'medium' | 'strong';
|
|
110
|
+
|
|
111
|
+
/** Default model IDs for each tier. Override via ArcLoopConfig.modelMap. */
|
|
112
|
+
export const DEFAULT_MODEL_MAP: Record<ModelTier, string> = {
|
|
113
|
+
fast: 'claude-haiku-4-5',
|
|
114
|
+
medium: 'claude-sonnet-4-5',
|
|
115
|
+
strong: 'claude-opus-4-5',
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
/** Resolve a model tier name or raw model ID to a concrete model ID. */
|
|
119
|
+
export function resolveModel(modelOrTier: string | undefined, modelMap: Record<ModelTier, string>, fallback: string): string {
|
|
120
|
+
if (!modelOrTier) return fallback;
|
|
121
|
+
if (modelOrTier in modelMap) return modelMap[modelOrTier as ModelTier];
|
|
122
|
+
return modelOrTier; // raw model ID passthrough
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// ── ArcLoop config ──
|
|
126
|
+
|
|
127
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
128
|
+
type AnyTool = Tool<any, any>;
|
|
129
|
+
|
|
130
|
+
export interface ArcLoopConfig {
|
|
131
|
+
/** Orchestrator model (default: 'claude-opus-4-6'). Accepts a model ID or tier name. */
|
|
132
|
+
model?: string;
|
|
133
|
+
/** Custom orchestrator system prompt */
|
|
134
|
+
systemPrompt?: string;
|
|
135
|
+
/** Anthropic API key */
|
|
136
|
+
apiKey?: string;
|
|
137
|
+
/** Default model for threads (default: 'medium' → claude-sonnet-4-6). Accepts a model ID or tier name. */
|
|
138
|
+
threadModel?: string;
|
|
139
|
+
/** Model tier mapping. Override to use different models for fast/medium/strong. */
|
|
140
|
+
modelMap?: Record<ModelTier, string>;
|
|
141
|
+
/** Tools available to threads (default: builtinTools) */
|
|
142
|
+
threadTools?: Record<string, AnyTool>;
|
|
143
|
+
/** Thread concurrency limit (default: 3) */
|
|
144
|
+
maxConcurrency?: number;
|
|
145
|
+
/** Max orchestrator turns before stopping (default: 20) */
|
|
146
|
+
maxOrchestratorTurns?: number;
|
|
147
|
+
/** Per-thread timeout in ms (default: 120000) */
|
|
148
|
+
threadTimeout?: number;
|
|
149
|
+
/** Per-thread max steps (default: 20) */
|
|
150
|
+
threadMaxSteps?: number;
|
|
151
|
+
|
|
152
|
+
// Store dependencies
|
|
153
|
+
episodeStore: EpisodeStore;
|
|
154
|
+
sessionMemoStore: SessionMemoStore;
|
|
155
|
+
longTermStore: LongTermStore;
|
|
156
|
+
|
|
157
|
+
// Task/session context
|
|
158
|
+
taskId: string;
|
|
159
|
+
sessionId: string;
|
|
160
|
+
|
|
161
|
+
/** Episode compression strategy (default: 'template') */
|
|
162
|
+
compressor?: 'template' | 'llm';
|
|
163
|
+
|
|
164
|
+
/** Extra orchestrator tools beyond Thread (for dynadocs Task compatibility) */
|
|
165
|
+
extraOrchestratorTools?: Record<string, AnyTool>;
|
|
166
|
+
/** Handler for extra orchestrator tools. Return an AgentAction (typically FinalAction with directive). */
|
|
167
|
+
onOrchestratorTool?: (name: string, args: Record<string, unknown>) => Promise<AgentAction>;
|
|
168
|
+
|
|
169
|
+
/** Tool provider for thread execution (default — local filesystem, sandbox in browser) */
|
|
170
|
+
toolProvider: ToolProvider;
|
|
171
|
+
/** Tool provider for skill-matched threads (sandbox with packages pre-installed). If not set, all threads use toolProvider. */
|
|
172
|
+
skillToolProvider?: ToolProvider;
|
|
173
|
+
/** E2B executor instance — needed for binary file transfers (CopyToLocal for docx, xlsx, etc.) */
|
|
174
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
175
|
+
executor?: any;
|
|
176
|
+
/** Local directory to sync sandbox output artifacts to (default: './outputs') */
|
|
177
|
+
localOutputDir?: string;
|
|
178
|
+
|
|
179
|
+
// ── Thread runtime extras (passed through to each thread's createAgent) ──
|
|
180
|
+
|
|
181
|
+
/** Sandbox provider for skill execution within threads */
|
|
182
|
+
sandboxProvider?: SandboxProvider;
|
|
183
|
+
/** Skill manager for thread agents */
|
|
184
|
+
skillManager?: SkillManager;
|
|
185
|
+
/** Path to skill index JSON */
|
|
186
|
+
skillIndexPath?: string;
|
|
187
|
+
/** Callback for threads to ask the user a question */
|
|
188
|
+
askUser?: (question: string, options?: string[]) => Promise<string>;
|
|
189
|
+
/** Callback for threads to display a message to the user */
|
|
190
|
+
tellUser?: (message: string) => Promise<void>;
|
|
191
|
+
/** Callback for threads to download a file from sandbox */
|
|
192
|
+
downloadRawFile?: (path: string) => Promise<string>;
|
|
193
|
+
/** OpenTelemetry-style tracing for thread agents */
|
|
194
|
+
telemetry?: HarnessTelemetry;
|
|
195
|
+
/** Lifecycle hooks for thread tool calls */
|
|
196
|
+
hookRunner?: HookRunner;
|
|
197
|
+
/** Permission manager for thread tool access */
|
|
198
|
+
permissionManager?: PermissionManager;
|
|
199
|
+
/** Custom tool executor for threads. Return null to fall through to built-in dispatch. */
|
|
200
|
+
executeToolAction?: (action: ToolCallAction) => Promise<ToolResult | null>;
|
|
201
|
+
/** Progress callback fired for each tool call inside threads (tool_start/tool_end with thread context). */
|
|
202
|
+
onThreadToolProgress?: (event: { threadIndex: number; threadAction: string } & ({ type: 'tool_start'; name: string; args: Record<string, unknown> } | { type: 'tool_end'; name: string; success: boolean; durationMs: number })) => void;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// ── Debug export ──
|
|
206
|
+
|
|
207
|
+
export interface DebugExport {
|
|
208
|
+
taskId?: string;
|
|
209
|
+
sessionId?: string;
|
|
210
|
+
exportedAt: number;
|
|
211
|
+
episodes: Episode[];
|
|
212
|
+
traces: EpisodeTrace[];
|
|
213
|
+
sessionMemos: SessionMemo[];
|
|
214
|
+
longTermMemories: LongTermMemory[];
|
|
215
|
+
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import { tool } from 'ai';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import { readFile, writeFile, mkdir } from 'node:fs/promises';
|
|
4
|
+
import { dirname } from 'node:path';
|
|
5
|
+
import type { ToolProvider, ToolResult } from '../interfaces/tool-provider';
|
|
6
|
+
import type { ToolCallAction } from '../agent/types';
|
|
7
|
+
|
|
8
|
+
// ── Bridge tool definitions (LLM-facing schemas) ──
|
|
9
|
+
|
|
10
|
+
export const bridgeTools = {
|
|
11
|
+
ReadSandbox: tool({
|
|
12
|
+
description: 'Read a file from the sandbox filesystem (where code execution happens)',
|
|
13
|
+
inputSchema: z.object({
|
|
14
|
+
path: z.string().describe('Absolute path in the sandbox filesystem'),
|
|
15
|
+
}),
|
|
16
|
+
}),
|
|
17
|
+
WriteSandbox: tool({
|
|
18
|
+
description: 'Write content to a file in the sandbox filesystem',
|
|
19
|
+
inputSchema: z.object({
|
|
20
|
+
path: z.string().describe('Absolute path in the sandbox filesystem'),
|
|
21
|
+
content: z.string().describe('Content to write'),
|
|
22
|
+
}),
|
|
23
|
+
}),
|
|
24
|
+
CopyToSandbox: tool({
|
|
25
|
+
description: 'Copy a file from the local project to the sandbox. Use this to stage files for processing by sandbox tools (e.g. data files for a Python script).',
|
|
26
|
+
inputSchema: z.object({
|
|
27
|
+
localPath: z.string().describe('Path on the local filesystem (source)'),
|
|
28
|
+
sandboxPath: z.string().describe('Path in the sandbox (destination)'),
|
|
29
|
+
}),
|
|
30
|
+
}),
|
|
31
|
+
CopyToLocal: tool({
|
|
32
|
+
description: 'Copy a file from the sandbox to the local project. Use this to retrieve generated output files (docx, xlsx, pdf, images, etc.).',
|
|
33
|
+
inputSchema: z.object({
|
|
34
|
+
sandboxPath: z.string().describe('Path in the sandbox (source)'),
|
|
35
|
+
localPath: z.string().describe('Path on the local filesystem (destination)'),
|
|
36
|
+
}),
|
|
37
|
+
}),
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
// ── Bridge tool executor ──
|
|
41
|
+
|
|
42
|
+
export interface BridgeExecutorConfig {
|
|
43
|
+
localProvider: ToolProvider;
|
|
44
|
+
sandboxProvider: ToolProvider;
|
|
45
|
+
/** E2B executor with readFileBytes/writeFileBytes for binary transfers */
|
|
46
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
47
|
+
executor?: any;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Creates an executeToolAction handler for bridge tools.
|
|
52
|
+
* Returns null for non-bridge tools (falls through to built-in dispatch).
|
|
53
|
+
*/
|
|
54
|
+
export function createBridgeExecutor(config: BridgeExecutorConfig) {
|
|
55
|
+
return async (action: ToolCallAction): Promise<ToolResult | null> => {
|
|
56
|
+
if (action.name === 'ReadSandbox') {
|
|
57
|
+
return config.sandboxProvider.readFile(String(action.args.path ?? ''));
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (action.name === 'WriteSandbox') {
|
|
61
|
+
return config.sandboxProvider.writeFile(
|
|
62
|
+
String(action.args.path ?? ''),
|
|
63
|
+
String(action.args.content ?? ''),
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (action.name === 'CopyToSandbox') {
|
|
68
|
+
const localPath = String(action.args.localPath ?? '');
|
|
69
|
+
const sandboxPath = String(action.args.sandboxPath ?? '');
|
|
70
|
+
try {
|
|
71
|
+
// Read from local
|
|
72
|
+
const localResult = await config.localProvider.readFile(localPath);
|
|
73
|
+
if (!localResult.success) {
|
|
74
|
+
return { success: false, output: '', error: `Failed to read local file: ${localResult.error}` };
|
|
75
|
+
}
|
|
76
|
+
// Write to sandbox
|
|
77
|
+
const writeResult = await config.sandboxProvider.writeFile(sandboxPath, localResult.output);
|
|
78
|
+
if (!writeResult.success) {
|
|
79
|
+
return { success: false, output: '', error: `Failed to write to sandbox: ${writeResult.error}` };
|
|
80
|
+
}
|
|
81
|
+
return { success: true, output: `Copied ${localPath} → sandbox:${sandboxPath} (${localResult.output.length} chars)` };
|
|
82
|
+
} catch (error) {
|
|
83
|
+
return { success: false, output: '', error: `CopyToSandbox failed: ${error instanceof Error ? error.message : String(error)}` };
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (action.name === 'CopyToLocal') {
|
|
88
|
+
const sandboxPath = String(action.args.sandboxPath ?? '');
|
|
89
|
+
const localPath = String(action.args.localPath ?? '');
|
|
90
|
+
try {
|
|
91
|
+
// Try binary transfer if executor supports readFileBytes
|
|
92
|
+
if (config.executor?.readFileBytes) {
|
|
93
|
+
const bytes: Uint8Array = await config.executor.readFileBytes(sandboxPath);
|
|
94
|
+
await mkdir(dirname(localPath), { recursive: true });
|
|
95
|
+
await writeFile(localPath, Buffer.from(bytes));
|
|
96
|
+
return { success: true, output: `Copied sandbox:${sandboxPath} → ${localPath} (${bytes.byteLength} bytes)` };
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Fallback: text-based transfer via provider
|
|
100
|
+
const sandboxResult = await config.sandboxProvider.readFile(sandboxPath);
|
|
101
|
+
if (!sandboxResult.success) {
|
|
102
|
+
return { success: false, output: '', error: `Failed to read sandbox file: ${sandboxResult.error}` };
|
|
103
|
+
}
|
|
104
|
+
await mkdir(dirname(localPath), { recursive: true });
|
|
105
|
+
await writeFile(localPath, sandboxResult.output, 'utf-8');
|
|
106
|
+
return { success: true, output: `Copied sandbox:${sandboxPath} → ${localPath} (${sandboxResult.output.length} chars)` };
|
|
107
|
+
} catch (error) {
|
|
108
|
+
return { success: false, output: '', error: `CopyToLocal failed: ${error instanceof Error ? error.message : String(error)}` };
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Not a bridge tool — fall through to built-in dispatch
|
|
113
|
+
return null;
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// ── Auto-sync: copy output artifacts from sandbox to local after thread completes ──
|
|
118
|
+
|
|
119
|
+
const ARTIFACT_EXTENSIONS = /\.(docx|xlsx|pptx|csv|pdf|png|jpg|jpeg|gif|zip|tar|gz)$/i;
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Scan sandbox output directory and copy any artifact files to local.
|
|
123
|
+
* Called after a skill thread completes.
|
|
124
|
+
*/
|
|
125
|
+
export async function syncArtifactsToLocal(
|
|
126
|
+
sandboxProvider: ToolProvider,
|
|
127
|
+
localOutputDir: string,
|
|
128
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
129
|
+
executor?: any,
|
|
130
|
+
sandboxOutputDir = '/outputs',
|
|
131
|
+
): Promise<string[]> {
|
|
132
|
+
const copied: string[] = [];
|
|
133
|
+
|
|
134
|
+
try {
|
|
135
|
+
// List files in sandbox output directory
|
|
136
|
+
const listResult = await sandboxProvider.bash(
|
|
137
|
+
`find ${sandboxOutputDir} -maxdepth 1 -type f 2>/dev/null | head -50`,
|
|
138
|
+
);
|
|
139
|
+
if (!listResult.success || !listResult.output.trim()) return copied;
|
|
140
|
+
|
|
141
|
+
const files = listResult.output.trim().split('\n').filter(Boolean);
|
|
142
|
+
|
|
143
|
+
for (const sandboxPath of files) {
|
|
144
|
+
const filename = sandboxPath.split('/').pop();
|
|
145
|
+
if (!filename || !ARTIFACT_EXTENSIONS.test(filename)) continue;
|
|
146
|
+
|
|
147
|
+
const localPath = `${localOutputDir}/${filename}`;
|
|
148
|
+
try {
|
|
149
|
+
if (executor?.readFileBytes) {
|
|
150
|
+
const bytes: Uint8Array = await executor.readFileBytes(sandboxPath);
|
|
151
|
+
await mkdir(localOutputDir, { recursive: true });
|
|
152
|
+
await writeFile(localPath, Buffer.from(bytes));
|
|
153
|
+
} else {
|
|
154
|
+
// Text fallback
|
|
155
|
+
const result = await sandboxProvider.readFile(sandboxPath);
|
|
156
|
+
if (!result.success) continue;
|
|
157
|
+
await mkdir(localOutputDir, { recursive: true });
|
|
158
|
+
await writeFile(localPath, result.output, 'utf-8');
|
|
159
|
+
}
|
|
160
|
+
copied.push(localPath);
|
|
161
|
+
} catch {
|
|
162
|
+
// Skip files that fail to copy
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
} catch {
|
|
166
|
+
// Non-critical — artifacts are bonus
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return copied;
|
|
170
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
BashOptions,
|
|
3
|
+
GlobOptions,
|
|
4
|
+
GrepOptions,
|
|
5
|
+
ReadOptions,
|
|
6
|
+
ToolProvider,
|
|
7
|
+
ToolProviderCapabilities,
|
|
8
|
+
ToolResult,
|
|
9
|
+
WebFetchOptions,
|
|
10
|
+
BatchOp,
|
|
11
|
+
BatchResult,
|
|
12
|
+
} from '../interfaces/tool-provider';
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* A tool provider that routes operations to two underlying providers:
|
|
16
|
+
* - Local provider: Read, Write, Edit, Glob, Grep (project filesystem)
|
|
17
|
+
* - Sandbox provider: Bash (code execution in sandbox VM)
|
|
18
|
+
*
|
|
19
|
+
* Used for skill threads where code runs in sandbox (packages pre-installed)
|
|
20
|
+
* but file operations target the local project.
|
|
21
|
+
*/
|
|
22
|
+
export class BridgedToolProvider implements ToolProvider {
|
|
23
|
+
constructor(
|
|
24
|
+
private readonly local: ToolProvider,
|
|
25
|
+
private readonly sandbox: ToolProvider,
|
|
26
|
+
) {}
|
|
27
|
+
|
|
28
|
+
capabilities(): ToolProviderCapabilities {
|
|
29
|
+
const localCaps = this.local.capabilities();
|
|
30
|
+
const sandboxCaps = this.sandbox.capabilities();
|
|
31
|
+
return {
|
|
32
|
+
bash: sandboxCaps.bash,
|
|
33
|
+
fileSystem: localCaps.fileSystem,
|
|
34
|
+
webFetch: localCaps.webFetch || sandboxCaps.webFetch,
|
|
35
|
+
webSearch: localCaps.webSearch || sandboxCaps.webSearch,
|
|
36
|
+
codeExecution: sandboxCaps.codeExecution,
|
|
37
|
+
sandboxed: false, // mixed — not fully sandboxed
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Bash → sandbox (where packages are installed)
|
|
42
|
+
bash(command: string, options?: BashOptions): Promise<ToolResult> {
|
|
43
|
+
return this.sandbox.bash(command, options);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// File operations → local project
|
|
47
|
+
readFile(path: string, options?: ReadOptions): Promise<ToolResult> {
|
|
48
|
+
return this.local.readFile(path, options);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
writeFile(path: string, content: string): Promise<ToolResult> {
|
|
52
|
+
return this.local.writeFile(path, content);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
editFile(path: string, oldText: string, newText: string): Promise<ToolResult> {
|
|
56
|
+
return this.local.editFile(path, oldText, newText);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
glob(pattern: string, options?: GlobOptions): Promise<ToolResult> {
|
|
60
|
+
return this.local.glob(pattern, options);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult> {
|
|
64
|
+
return this.local.grep(pattern, path, options);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Web operations → whichever provider has them
|
|
68
|
+
get webFetch(): ((options: WebFetchOptions) => Promise<ToolResult>) | undefined {
|
|
69
|
+
return this.local.webFetch ?? this.sandbox.webFetch;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
get webSearch(): ((query: string) => Promise<ToolResult>) | undefined {
|
|
73
|
+
return this.local.webSearch ?? this.sandbox.webSearch;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// No batch — mixed providers can't batch together
|
|
77
|
+
get batch(): undefined {
|
|
78
|
+
return undefined;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { randomUUID } from 'node:crypto';
|
|
2
|
+
import type { Episode, SessionMemo, LongTermMemory, EpisodeStore, SessionMemoStore, LongTermStore } from './arc-types';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Consolidate episodes into a session memo.
|
|
6
|
+
* Called at task boundaries (async, non-blocking).
|
|
7
|
+
* Distills key learnings from a set of episodes into a compact memo.
|
|
8
|
+
*/
|
|
9
|
+
export async function consolidateEpisodes(
|
|
10
|
+
episodes: Episode[],
|
|
11
|
+
sessionId: string,
|
|
12
|
+
sessionMemoStore: SessionMemoStore,
|
|
13
|
+
): Promise<SessionMemo> {
|
|
14
|
+
if (episodes.length === 0) {
|
|
15
|
+
throw new Error('Cannot consolidate zero episodes');
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// Template-based consolidation: extract key patterns from episodes
|
|
19
|
+
const filesModified = new Set<string>();
|
|
20
|
+
const toolsUsed = new Set<string>();
|
|
21
|
+
const actions: string[] = [];
|
|
22
|
+
let successCount = 0;
|
|
23
|
+
|
|
24
|
+
for (const ep of episodes) {
|
|
25
|
+
for (const f of ep.filesModified) filesModified.add(f);
|
|
26
|
+
for (const t of ep.toolCalls) toolsUsed.add(t);
|
|
27
|
+
actions.push(`${ep.index}. ${ep.threadAction} (${ep.success ? 'ok' : 'failed'})`);
|
|
28
|
+
if (ep.success) successCount++;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const parts: string[] = [
|
|
32
|
+
`Task summary (${episodes.length} threads, ${successCount} succeeded):`,
|
|
33
|
+
...actions,
|
|
34
|
+
];
|
|
35
|
+
|
|
36
|
+
if (filesModified.size > 0) {
|
|
37
|
+
parts.push(`Files modified: ${[...filesModified].join(', ')}`);
|
|
38
|
+
}
|
|
39
|
+
if (toolsUsed.size > 0) {
|
|
40
|
+
parts.push(`Tools used: ${[...toolsUsed].join(', ')}`);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const memo: SessionMemo = {
|
|
44
|
+
id: randomUUID(),
|
|
45
|
+
sessionId,
|
|
46
|
+
content: parts.join('\n'),
|
|
47
|
+
sourceEpisodeIds: episodes.map(e => e.id),
|
|
48
|
+
createdAt: Date.now(),
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
await sessionMemoStore.addMemo(memo);
|
|
52
|
+
return memo;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Consolidate session memos into long-term memories.
|
|
57
|
+
* Called at session boundaries (async, non-blocking).
|
|
58
|
+
* Extracts durable patterns/learnings from session work.
|
|
59
|
+
*/
|
|
60
|
+
export async function consolidateMemos(
|
|
61
|
+
memos: SessionMemo[],
|
|
62
|
+
longTermStore: LongTermStore,
|
|
63
|
+
): Promise<LongTermMemory[]> {
|
|
64
|
+
if (memos.length === 0) return [];
|
|
65
|
+
|
|
66
|
+
// Check for duplicate consolidation
|
|
67
|
+
const existingMemories = await longTermStore.getAllMemories();
|
|
68
|
+
const existingSourceIds = new Set<string>();
|
|
69
|
+
for (const mem of existingMemories) {
|
|
70
|
+
for (const id of mem.sourceSessionMemoIds) {
|
|
71
|
+
existingSourceIds.add(id);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Filter out already-consolidated memos
|
|
76
|
+
const newMemos = memos.filter(m => !existingSourceIds.has(m.id));
|
|
77
|
+
if (newMemos.length === 0) return [];
|
|
78
|
+
|
|
79
|
+
// Template-based: create one long-term memory summarizing the session
|
|
80
|
+
const now = Date.now();
|
|
81
|
+
const sessionContent = newMemos.map(m => m.content).join('\n---\n');
|
|
82
|
+
|
|
83
|
+
const memory: LongTermMemory = {
|
|
84
|
+
id: randomUUID(),
|
|
85
|
+
content: `Session work:\n${sessionContent}`,
|
|
86
|
+
category: 'session-summary',
|
|
87
|
+
sourceSessionMemoIds: newMemos.map(m => m.id),
|
|
88
|
+
createdAt: now,
|
|
89
|
+
updatedAt: now,
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
await longTermStore.addMemory(memory);
|
|
93
|
+
return [memory];
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Full consolidation pipeline: episodes → memo → long-term.
|
|
98
|
+
* Designed to be called as fire-and-forget at task/session boundaries.
|
|
99
|
+
*/
|
|
100
|
+
export async function runConsolidation(
|
|
101
|
+
taskId: string,
|
|
102
|
+
sessionId: string,
|
|
103
|
+
episodeStore: EpisodeStore,
|
|
104
|
+
sessionMemoStore: SessionMemoStore,
|
|
105
|
+
longTermStore: LongTermStore,
|
|
106
|
+
): Promise<void> {
|
|
107
|
+
// 1. Consolidate task episodes into session memo
|
|
108
|
+
const episodes = await episodeStore.getEpisodesByTask(taskId);
|
|
109
|
+
if (episodes.length > 0) {
|
|
110
|
+
await consolidateEpisodes(episodes, sessionId, sessionMemoStore);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// 2. Consolidate session memos into long-term memory
|
|
114
|
+
const memos = await sessionMemoStore.getMemosBySession(sessionId);
|
|
115
|
+
if (memos.length > 0) {
|
|
116
|
+
await consolidateMemos(memos, longTermStore);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { createAgent, type AgentRuntime } from '../agent/create-agent';
|
|
2
|
+
import type { AgentMessage, AgentStreamEvent } from '../agent/types';
|
|
3
|
+
import type { ArcLoopConfig } from './arc-types';
|
|
4
|
+
import { ArcLoop } from './arc-loop';
|
|
5
|
+
import { runConsolidation } from './consolidation';
|
|
6
|
+
|
|
7
|
+
export interface ArcAgentConfig extends ArcLoopConfig {
|
|
8
|
+
/** Max steps for the outer agent loop (default: 1, since ArcLoop runs internally) */
|
|
9
|
+
maxOuterSteps?: number;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Create an agent powered by the ArcLoop orchestrator.
|
|
14
|
+
*
|
|
15
|
+
* The returned agent has the same interface as a regular `createAgent()` agent
|
|
16
|
+
* (run/stream), but internally uses the orchestrator → thread architecture.
|
|
17
|
+
*
|
|
18
|
+
* Consolidation runs automatically in the background after each run() completes.
|
|
19
|
+
*
|
|
20
|
+
* Note: `stream()` drives ArcLoop.streamAction() directly rather than going
|
|
21
|
+
* through createAgent's stream(), because ArcLoop executes Thread tool calls
|
|
22
|
+
* internally — createAgent would try to re-execute them via the toolProvider.
|
|
23
|
+
*/
|
|
24
|
+
export function createArcAgent(config: ArcAgentConfig) {
|
|
25
|
+
const arcLoop = new ArcLoop(config);
|
|
26
|
+
|
|
27
|
+
const runtime: AgentRuntime = {
|
|
28
|
+
toolProvider: config.toolProvider,
|
|
29
|
+
loop: arcLoop,
|
|
30
|
+
// ArcLoop.nextAction() runs the full orchestration internally,
|
|
31
|
+
// so the outer agent only needs 1 step.
|
|
32
|
+
maxSteps: config.maxOuterSteps ?? 1,
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
// run() uses createAgent which calls arcLoop.nextAction() → returns FinalAction. Works.
|
|
36
|
+
const agent = createAgent(runtime);
|
|
37
|
+
|
|
38
|
+
function fireConsolidation(): void {
|
|
39
|
+
runConsolidation(
|
|
40
|
+
config.taskId,
|
|
41
|
+
config.sessionId,
|
|
42
|
+
config.episodeStore,
|
|
43
|
+
config.sessionMemoStore,
|
|
44
|
+
config.longTermStore,
|
|
45
|
+
).catch(() => {
|
|
46
|
+
// Consolidation failure is non-critical
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
async run(prompt: string, options?: { history?: AgentMessage[] }) {
|
|
52
|
+
const result = await agent.run(prompt, options);
|
|
53
|
+
fireConsolidation();
|
|
54
|
+
return result;
|
|
55
|
+
},
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Stream orchestration events directly from ArcLoop.
|
|
59
|
+
*
|
|
60
|
+
* Yields the standard AgentStreamEvent types:
|
|
61
|
+
* - text_delta: orchestrator reasoning
|
|
62
|
+
* - tool_start/tool_end: thread dispatch & completion (name='Thread')
|
|
63
|
+
* - step_start/step_end: orchestrator turns
|
|
64
|
+
* - done: orchestration complete
|
|
65
|
+
*/
|
|
66
|
+
async *stream(
|
|
67
|
+
prompt: string,
|
|
68
|
+
options?: { history?: AgentMessage[] },
|
|
69
|
+
): AsyncGenerator<AgentStreamEvent> {
|
|
70
|
+
const history = options?.history ?? [];
|
|
71
|
+
const messages: AgentMessage[] = [
|
|
72
|
+
...history,
|
|
73
|
+
{ role: 'user', content: prompt },
|
|
74
|
+
];
|
|
75
|
+
|
|
76
|
+
yield* arcLoop.streamAction(messages);
|
|
77
|
+
fireConsolidation();
|
|
78
|
+
},
|
|
79
|
+
};
|
|
80
|
+
}
|