npm - @bluecopa/harness - Versions diffs - 0.0.1 → 0.1.0-snapshot.10 - Mend

@bluecopa/harness 0.0.1 → 0.1.0-snapshot.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/README.md +35 -0
package/package.json +2 -1
package/src/agent/create-agent.ts +9 -0
package/src/agent/types.ts +15 -2
package/src/arc/arc-loop.ts +395 -0
package/src/arc/arc-types.ts +215 -0
package/src/arc/bridge-tools.ts +170 -0
package/src/arc/bridged-tool-provider.ts +80 -0
package/src/arc/consolidation.ts +118 -0
package/src/arc/create-arc-agent.ts +80 -0
package/src/arc/debug.ts +62 -0
package/src/arc/episode-compressor.ts +151 -0
package/src/arc/object-store/fs-object-store.ts +60 -0
package/src/arc/object-store/memory-object-store.ts +41 -0
package/src/arc/object-store/object-store.ts +12 -0
package/src/arc/stores/episode-store.ts +120 -0
package/src/arc/stores/long-term-store.ts +86 -0
package/src/arc/stores/rxdb-setup.ts +112 -0
package/src/arc/stores/session-memo-store.ts +58 -0
package/src/arc/thread-executor.ts +365 -0
package/src/arc/thread-tool.ts +29 -0
package/src/loop/context-store.ts +12 -9
package/src/loop/vercel-agent-loop.ts +12 -6
package/tests/integration/agent-skill-default-from-sandbox.spec.ts +3 -2
package/tests/unit/structured-messages.spec.ts +1 -1

package/README.md CHANGED Viewed

@@ -194,11 +194,45 @@ interface SandboxProvider {
 `HarnessTelemetry` provides OpenTelemetry-style spans and metrics for agent runs.
+### Arc: Orchestrator + Thread Architecture (`src/arc/`)
+`ArcLoop` is an `AgentLoop` implementation where an orchestrator LLM dispatches bounded threads via a single `Thread` tool. Threads produce episodes (summary + full trace). The orchestrator only sees summaries, keeping its context small.
+```ts
+import { createArcAgent } from './src/arc/create-arc-agent';
+import { InMemoryEpisodeStore } from './src/arc/stores/episode-store';
+import { InMemorySessionMemoStore } from './src/arc/stores/session-memo-store';
+import { InMemoryLongTermStore } from './src/arc/stores/long-term-store';
+const agent = createArcAgent({
+  toolProvider: new LocalToolProvider(process.cwd()),
+  episodeStore: new InMemoryEpisodeStore(),
+  sessionMemoStore: new InMemorySessionMemoStore(),
+  longTermStore: new InMemoryLongTermStore(),
+  taskId: 'task-1',
+  sessionId: 'session-1',
+});
+const result = await agent.run('Fix the authentication bug');
+```
+Key features:
+- **Parallel threads**: orchestrator calls Thread N times in one turn → all run concurrently
+- **Four-tier memory**: thread context → episodes → session memos → long-term
+- **Per-thread models**: Haiku for reads, Sonnet for implementation
+- **Template compression**: zero-LLM-call episode summaries
+- **Async consolidation**: non-blocking background distillation
+Full architecture doc: [`docs/arc.md`](../docs/arc.md)
 ## Package layout
 ```
 src/
 ├── agent/          # createAgent, step executor, types
+├── arc/            # ArcLoop orchestrator, threads, memory hierarchy
+│   ├── stores/     # RxDB + in-memory store implementations
+│   └── object-store/ # Pluggable cloud sync (fs, memory)
 ├── interfaces/     # ToolProvider, SandboxProvider, AgentLoop contracts
 ├── loop/           # VercelAgentLoop, LCMToolLoop
 ├── providers/      # LocalToolProvider, E2BToolProvider, ControlPlaneE2BExecutor
@@ -214,6 +248,7 @@ src/
 ## Documentation
+- **Arc architecture**: [`docs/arc.md`](../docs/arc.md)
 - Provider guide: `docs/guides/providers.md`
 - Skills guide: `docs/guides/skills.md`
 - Observability guide: `docs/guides/observability.md`

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bluecopa/harness",
-  "version": "0.0.1",
+  "version": "0.1.0-snapshot.10",
   "description": "Provider-agnostic TypeScript agent framework",
   "license": "UNLICENSED",
   "scripts": {
@@ -10,6 +10,7 @@
   "dependencies": {
     "@ai-sdk/anthropic": "^3.0.48",
     "ai": "^6.0.101",
+    "rxdb": "^15.39.0",
     "zod": "^4.1.11"
   },
   "devDependencies": {

package/src/agent/create-agent.ts CHANGED Viewed

@@ -37,6 +37,8 @@ export interface AgentRuntime {
   /** Custom tool executor. Called for every tool action. Return null to fall through to built-in dispatch.
    *  When hookRunner/permissionManager are provided on the runtime, they are automatically applied before/after this callback — no manual wiring needed. */
   executeToolAction?: (action: ToolCallAction) => Promise<ToolResult | null>;
+  /** Progress callback fired before/after each tool call during run(). */
+  onToolProgress?: (event: { type: 'tool_start'; name: string; args: Record<string, unknown> } | { type: 'tool_end'; name: string; success: boolean; durationMs: number }) => void;
 }
 /**
@@ -596,10 +598,14 @@ export function createAgent(runtime: AgentRuntime) {
             // Execute valid calls via batch (sequential sandbox ops) or parallel fallback
             if (validCalls.length > 0) {
+              for (const c of validCalls) runtime.onToolProgress?.({ type: 'tool_start', name: c.name, args: c.args });
+              const batchStart = Date.now();
               const results = await executeBatch(validCalls, runtime.toolProvider, runtime);
+              const batchMs = Date.now() - batchStart;
               for (let i = 0; i < validCalls.length; i++) {
                 const call = validCalls[i]!;
                 const r = results[i]!;
+                runtime.onToolProgress?.({ type: 'tool_end', name: call.name, success: r.success, durationMs: batchMs });
                 if (!r.success) {
                   recordAgentError(runtime.telemetry);
                 }
@@ -659,6 +665,8 @@ export function createAgent(runtime: AgentRuntime) {
           } else {
             consecutiveInvalid = 0;
           }
+          runtime.onToolProgress?.({ type: 'tool_start', name: action.name, args: action.args });
+          const singleStart = Date.now();
           const result = validationError
             ? ({ success: false, output: '', error: validationError } as ToolResult)
             : await executor.run(async () => {
@@ -672,6 +680,7 @@ export function createAgent(runtime: AgentRuntime) {
                   };
                 }
               });
+          runtime.onToolProgress?.({ type: 'tool_end', name: action.name, success: result.success, durationMs: Date.now() - singleStart });
           if (!result.success) {
             recordAgentError(runtime.telemetry);
           }

package/src/agent/types.ts CHANGED Viewed

@@ -11,13 +11,26 @@ export interface ToolResultInfo {
   isError?: boolean;
 }
+export type ContentPart =
+  | { type: 'text'; text: string }
+  | { type: 'image'; image: Buffer | Uint8Array; mimeType: string };
 export interface AgentMessage {
   role: 'system' | 'user' | 'assistant' | 'tool';
-  content: string;
+  content: string | ContentPart[];
   toolCalls?: ToolCallInfo[];      // assistant messages: what tools were called
   toolResults?: ToolResultInfo[];  // tool messages: results keyed by toolCallId
 }
+/** Extract plain text from content (string or ContentPart[]). */
+export function getTextContent(content: string | ContentPart[]): string {
+  if (typeof content === 'string') return content;
+  return content
+    .filter((p): p is Extract<ContentPart, { type: 'text' }> => p.type === 'text')
+    .map((p) => p.text)
+    .join('\n');
+}
 export interface ToolCallAction {
   type: 'tool';
   name: string;
@@ -46,7 +59,7 @@ export interface AgentRunResult {
 export type AgentStreamEvent =
   | { type: 'text_delta'; text: string }
   | { type: 'tool_start'; name: string; args: Record<string, unknown>; toolCallId?: string }
-  | { type: 'tool_end'; name: string; result: { success: boolean; output: string; error?: string } }
+  | { type: 'tool_end'; name: string; result: { success: boolean; output: string; error?: string; [key: string]: unknown } }
   | { type: 'step_start'; step: number }
   | { type: 'step_end'; step: number }
   | { type: 'done'; output: string; steps: number };

package/src/arc/arc-loop.ts ADDED Viewed

@@ -0,0 +1,395 @@
+import { randomUUID } from 'node:crypto';
+import type { AgentAction, AgentLoop, AgentMessage, AgentStreamEvent, ToolCallAction } from '../agent/types';
+import { getTextContent } from '../agent/types';
+import { VercelAgentLoop } from '../loop/vercel-agent-loop';
+import type { ArcLoopConfig, ThreadRequest, ThreadResult, ModelTier } from './arc-types';
+import { DEFAULT_MODEL_MAP, resolveModel } from './arc-types';
+import { threadTool } from './thread-tool';
+import { ThreadExecutor } from './thread-executor';
+import type { Tool } from 'ai';
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+type AnyTool = Tool<any, any>;
+// ── Orchestrator system prompt ──
+const DEFAULT_ORCHESTRATOR_PROMPT = [
+  'You are an orchestrator agent. You accomplish tasks by dispatching focused threads.',
+  'Your ONLY tool is Thread — use it to delegate tactical work.',
+  '',
+  'Strategy:',
+  '- Break complex tasks into focused, bounded threads.',
+  '- Dispatch independent threads in the SAME turn for parallel execution.',
+  '- Use contextEpisodeIds to pass context from completed threads to dependent ones.',
+  '- Read before writing: dispatch read threads first, then use their episode IDs for implementation threads.',
+  '- Each thread gets full tool access (Bash, Read, Write, Edit, Glob, Grep, etc.).',
+  '',
+  'Model selection — choose the right tier for each thread:',
+  '- "fast" — file reads, searches, directory listing, simple checks',
+  '- "medium" (default) — implementation, writing code, running tests, standard tasks',
+  '- "strong" — complex refactoring, debugging subtle issues, architectural decisions, multi-file changes',
+  '',
+  'Thread results appear as episode summaries. If you need detail, dispatch a new thread seeded with the episode.',
+  'When the task is fully complete, respond with a text summary (no Thread call).',
+].join('\n');
+// ── ArcLoop ──
+export class ArcLoop implements AgentLoop {
+  private readonly orchestratorLoop: VercelAgentLoop;
+  private readonly threadExecutor: ThreadExecutor;
+  private readonly config: ArcLoopConfig;
+  constructor(config: ArcLoopConfig) {
+    this.config = config;
+    // Build orchestrator tools: Thread + any extra tools
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const orchestratorTools: Record<string, AnyTool> = {
+      Thread: threadTool as any,
+      ...config.extraOrchestratorTools,
+    };
+    const modelMap = { ...DEFAULT_MODEL_MAP, ...config.modelMap };
+    const orchestratorModel = resolveModel(config.model, modelMap, modelMap.strong);
+    this.orchestratorLoop = new VercelAgentLoop({
+      model: orchestratorModel,
+      systemPrompt: config.systemPrompt ?? DEFAULT_ORCHESTRATOR_PROMPT,
+      ...(config.apiKey != null ? { apiKey: config.apiKey } : {}),
+      tools: orchestratorTools,
+    });
+    const defaultThreadModel = resolveModel(config.threadModel, modelMap, modelMap.medium);
+    this.threadExecutor = new ThreadExecutor({
+      maxConcurrency: config.maxConcurrency ?? 3,
+      threadTimeout: config.threadTimeout ?? 120_000,
+      threadMaxSteps: config.threadMaxSteps ?? 20,
+      threadModel: defaultThreadModel,
+      modelMap,
+      threadTools: config.threadTools ?? (undefined as unknown as Record<string, AnyTool>),
+      toolProvider: config.toolProvider,
+      ...(config.skillToolProvider != null ? { skillToolProvider: config.skillToolProvider } : {}),
+      ...(config.executor != null ? { executor: config.executor } : {}),
+      ...(config.localOutputDir != null ? { localOutputDir: config.localOutputDir } : {}),
+      episodeStore: config.episodeStore,
+      taskId: config.taskId,
+      sessionId: config.sessionId,
+      compressor: config.compressor ?? 'template',
+      // Pass through runtime extras for thread agents
+      ...(config.sandboxProvider != null ? { sandboxProvider: config.sandboxProvider } : {}),
+      ...(config.skillManager != null ? { skillManager: config.skillManager } : {}),
+      ...(config.skillIndexPath != null ? { skillIndexPath: config.skillIndexPath } : {}),
+      ...(config.askUser != null ? { askUser: config.askUser } : {}),
+      ...(config.tellUser != null ? { tellUser: config.tellUser } : {}),
+      ...(config.downloadRawFile != null ? { downloadRawFile: config.downloadRawFile } : {}),
+      ...(config.telemetry != null ? { telemetry: config.telemetry } : {}),
+      ...(config.hookRunner != null ? { hookRunner: config.hookRunner } : {}),
+      ...(config.permissionManager != null ? { permissionManager: config.permissionManager } : {}),
+      ...(config.executeToolAction != null ? { executeToolAction: config.executeToolAction } : {}),
+      ...(config.onThreadToolProgress != null ? { onThreadToolProgress: config.onThreadToolProgress } : {}),
+    });
+  }
+  /**
+   * Run the full orchestration loop internally.
+   * The outer createAgent() sees this as a single step that returns FinalAction.
+   */
+  async nextAction(messages: AgentMessage[]): Promise<AgentAction> {
+    const maxTurns = this.config.maxOrchestratorTurns ?? 20;
+    const orchestratorMessages = await this.buildOrchestratorContext(messages);
+    for (let turn = 0; turn < maxTurns; turn++) {
+      const action = await this.orchestratorLoop.nextAction(orchestratorMessages);
+      // Final response — pass through to outer agent
+      if (action.type === 'final') {
+        return action;
+      }
+      // Batch of thread calls (parallel dispatch)
+      if (action.type === 'tool_batch') {
+        const { threadRequests, extraCalls } = this.partitionCalls(action.calls);
+        // Handle extra orchestrator tools first
+        if (extraCalls.length > 0 && this.config.onOrchestratorTool) {
+          // Extra tools produce directives — return immediately
+          const firstExtra = extraCalls[0]!;
+          return await this.config.onOrchestratorTool(firstExtra.name, firstExtra.args);
+        }
+        // Record assistant message with tool calls
+        this.appendAssistantMessage(orchestratorMessages, action.calls);
+        // Dispatch all threads in parallel
+        if (threadRequests.length > 0) {
+          const results = await this.threadExecutor.executeAll(threadRequests);
+          this.appendThreadResults(orchestratorMessages, action.calls, results);
+        }
+        continue;
+      }
+      // Single tool call
+      if (action.type === 'tool') {
+        if (action.name === 'Thread') {
+          const request = this.toThreadRequest(action.args);
+          // Record assistant message
+          this.appendAssistantMessage(orchestratorMessages, [action]);
+          const result = await this.threadExecutor.execute(request);
+          this.appendThreadResults(orchestratorMessages, [action], [result]);
+          continue;
+        }
+        // Extra orchestrator tool
+        if (this.config.onOrchestratorTool) {
+          return await this.config.onOrchestratorTool(action.name, action.args);
+        }
+      }
+    }
+    return { type: 'final', content: 'Orchestrator reached maximum turns.' };
+  }
+  /**
+   * Streaming version of the orchestration loop.
+   * Yields events throughout: text deltas during orchestrator reasoning,
+   * tool_start/tool_end for thread dispatch/completion.
+   */
+  async *streamAction(messages: AgentMessage[]): AsyncGenerator<AgentStreamEvent> {
+    const maxTurns = this.config.maxOrchestratorTurns ?? 20;
+    const orchestratorMessages = await this.buildOrchestratorContext(messages);
+    let totalSteps = 0;
+    for (let turn = 0; turn < maxTurns; turn++) {
+      totalSteps++;
+      yield { type: 'step_start', step: totalSteps };
+      // Stream orchestrator LLM to get text deltas + tool calls
+      const pendingTools: ToolCallAction[] = [];
+      let finalText = '';
+      if (this.orchestratorLoop.streamAction) {
+        for await (const event of this.orchestratorLoop.streamAction(orchestratorMessages)) {
+          if (event.type === 'text_delta') {
+            finalText += event.text;
+            yield event;
+          }
+          if (event.type === 'tool_start') {
+            pendingTools.push({
+              type: 'tool',
+              name: event.name,
+              args: event.args,
+              ...(event.toolCallId != null ? { toolCallId: event.toolCallId } : {}),
+            });
+          }
+        }
+      } else {
+        // Fallback to non-streaming
+        const action = await this.orchestratorLoop.nextAction(orchestratorMessages);
+        if (action.type === 'final') {
+          yield { type: 'step_end', step: totalSteps };
+          yield { type: 'done', output: action.content, steps: totalSteps };
+          return;
+        }
+        if (action.type === 'tool_batch') {
+          pendingTools.push(...action.calls);
+        } else if (action.type === 'tool') {
+          pendingTools.push(action);
+        }
+      }
+      // No tools → final response
+      if (pendingTools.length === 0) {
+        orchestratorMessages.push({ role: 'assistant', content: finalText });
+        yield { type: 'step_end', step: totalSteps };
+        yield { type: 'done', output: finalText, steps: totalSteps };
+        return;
+      }
+      // Partition into thread calls and extra tool calls
+      const threadCalls = pendingTools.filter(c => c.name === 'Thread');
+      const extraCalls = pendingTools.filter(c => c.name !== 'Thread');
+      // Handle extra orchestrator tools
+      if (extraCalls.length > 0 && this.config.onOrchestratorTool) {
+        const firstExtra = extraCalls[0]!;
+        yield { type: 'tool_start', name: firstExtra.name, args: firstExtra.args };
+        // Extra tools produce directives — end the stream
+        yield { type: 'step_end', step: totalSteps };
+        yield { type: 'done', output: `Directive: ${firstExtra.name}`, steps: totalSteps };
+        return;
+      }
+      // Record assistant message
+      this.appendAssistantMessage(orchestratorMessages, pendingTools, finalText);
+      // Yield tool_start for each thread
+      for (const call of threadCalls) {
+        yield {
+          type: 'tool_start',
+          name: 'Thread',
+          args: call.args,
+          ...(call.toolCallId != null ? { toolCallId: call.toolCallId } : {}),
+        };
+      }
+      // Execute all threads in parallel (shallow — no inner streaming)
+      const threadRequests = threadCalls.map(c => this.toThreadRequest(c.args));
+      const results = await this.threadExecutor.executeAll(threadRequests);
+      // Yield tool_end for each completed thread
+      for (let i = 0; i < results.length; i++) {
+        const result = results[i]!;
+        yield {
+          type: 'tool_end',
+          name: 'Thread',
+          result: {
+            success: result.success,
+            output: result.episode.summary,
+            episodeId: result.episode.id,
+            toolCalls: result.episode.toolCalls,
+            steps: result.episode.steps,
+            filesRead: result.episode.filesRead,
+            filesModified: result.episode.filesModified,
+            ...(result.durationMs != null ? { threadDurationMs: result.durationMs } : {}),
+            ...(result.resolvedModel != null ? { resolvedModel: result.resolvedModel } : {}),
+            ...(result.error != null ? { error: result.error } : {}),
+          },
+        };
+      }
+      // Append thread results to orchestrator context
+      this.appendThreadResults(orchestratorMessages, threadCalls, results);
+      yield { type: 'step_end', step: totalSteps };
+    }
+    yield { type: 'done', output: 'Orchestrator reached maximum turns.', steps: totalSteps };
+  }
+  // ── Private helpers ──
+  private async buildOrchestratorContext(outerMessages: AgentMessage[]): Promise<AgentMessage[]> {
+    const messages: AgentMessage[] = [];
+    // Inject long-term memories
+    const longTermMemories = await this.config.longTermStore.getAllMemories();
+    if (longTermMemories.length > 0) {
+      const memoryText = longTermMemories
+        .map(m => `[${m.category}] ${m.content}`)
+        .join('\n');
+      messages.push({
+        role: 'system',
+        content: `Long-term memories:\n${memoryText}`,
+      });
+    }
+    // Inject session memos
+    const sessionMemos = await this.config.sessionMemoStore.getMemosBySession(this.config.sessionId);
+    if (sessionMemos.length > 0) {
+      const memoText = sessionMemos.map(m => m.content).join('\n---\n');
+      messages.push({
+        role: 'system',
+        content: `Session memos:\n${memoText}`,
+      });
+    }
+    // Inject existing episode summaries for this task
+    const existingEpisodes = await this.config.episodeStore.getEpisodesByTask(this.config.taskId);
+    if (existingEpisodes.length > 0) {
+      const episodeText = existingEpisodes
+        .map(e => `Episode ${e.index} [${e.id}]:\n${e.summary}`)
+        .join('\n\n');
+      messages.push({
+        role: 'system',
+        content: `Prior episodes for this task:\n${episodeText}`,
+      });
+    }
+    // Include outer messages (user prompt, any history)
+    messages.push(...outerMessages);
+    return messages;
+  }
+  private partitionCalls(calls: ToolCallAction[]): {
+    threadRequests: ThreadRequest[];
+    extraCalls: ToolCallAction[];
+  } {
+    const threadRequests: ThreadRequest[] = [];
+    const extraCalls: ToolCallAction[] = [];
+    for (const call of calls) {
+      if (call.name === 'Thread') {
+        threadRequests.push(this.toThreadRequest(call.args));
+      } else {
+        extraCalls.push(call);
+      }
+    }
+    return { threadRequests, extraCalls };
+  }
+  private toThreadRequest(args: Record<string, unknown>): ThreadRequest {
+    const req: ThreadRequest = { action: String(args.action ?? '') };
+    if (Array.isArray(args.contextEpisodeIds)) {
+      req.contextEpisodeIds = args.contextEpisodeIds.map(String);
+    }
+    if (args.model != null) {
+      req.model = String(args.model);
+    }
+    if (typeof args.maxSteps === 'number') {
+      req.maxSteps = args.maxSteps;
+    }
+    return req;
+  }
+  private appendAssistantMessage(
+    messages: AgentMessage[],
+    calls: ToolCallAction[],
+    text?: string,
+  ): void {
+    const content = text ?? calls.map(c =>
+      c.name === 'Thread'
+        ? `Thread: ${String(c.args.action ?? '')}`
+        : `${c.name}: ${JSON.stringify(c.args)}`
+    ).join('\n');
+    messages.push({
+      role: 'assistant',
+      content,
+      toolCalls: calls.map(c => ({
+        toolCallId: c.toolCallId ?? randomUUID(),
+        toolName: c.name,
+        args: c.args,
+      })),
+    });
+  }
+  private appendThreadResults(
+    messages: AgentMessage[],
+    calls: ToolCallAction[],
+    results: ThreadResult[],
+  ): void {
+    for (let i = 0; i < results.length; i++) {
+      const result = results[i]!;
+      const call = calls[i];
+      const callId = call?.toolCallId ?? '';
+      messages.push({
+        role: 'tool',
+        content: result.episode.summary,
+        toolResults: [{
+          toolCallId: callId,
+          toolName: 'Thread',
+          result: result.episode.summary,
+          isError: !result.success,
+        }],
+      });
+    }
+  }
+}