npm - @botbotgo/agent-harness - Versions diffs - 0.0.124 → 0.0.126 - Mend

@botbotgo/agent-harness 0.0.124 → 0.0.126

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +12 -12
package/README.zh.md +11 -11
package/dist/api.d.ts +49 -11
package/dist/api.js +131 -15
package/dist/benchmark/upstream-runtime-ab-benchmark.d.ts +32 -2
package/dist/benchmark/upstream-runtime-ab-benchmark.js +58 -2
package/dist/contracts/runtime.d.ts +46 -1
package/dist/index.d.ts +1 -1
package/dist/index.js +1 -1
package/dist/package-version.d.ts +1 -1
package/dist/package-version.js +1 -1
package/dist/runtime/agent-runtime-adapter.d.ts +3 -3
package/dist/runtime/agent-runtime-adapter.js +31 -7
package/dist/runtime/harness/run/helpers.d.ts +1 -1
package/dist/runtime/harness/run/helpers.js +1 -1
package/dist/runtime/harness/run/stream-run.js +1 -8
package/dist/runtime/harness/run/thread-records.d.ts +1 -1
package/dist/runtime/harness/run/thread-records.js +10 -7
package/dist/runtime/harness.js +2 -2
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -281,9 +281,9 @@ const result = await run(runtime, {
 });
 ```
-`run(runtime, { ... })` creates or continues a persisted thread and returns `threadId`, `runId`, `state`, and compact text `output`. Richer upstream result shapes stay available through `outputContent`, `contentBlocks`, and `structuredResponse`.
+`run(runtime, { ... })` creates or continues a persisted session and returns `sessionId`, `requestId`, `state`, and compact text `output`. Richer upstream result shapes stay available through `outputContent`, `contentBlocks`, and `structuredResponse`.
-Use `listRuns(runtime)` and `getRun(runtime, runId)` when a product needs a run-centric operations surface such as a review queue or execution dashboard.
+Use `listRequests(runtime)` and `getRequest(runtime, requestId)` when a product needs a request-centric operations surface such as a review queue or execution dashboard.
 Use `invocation` as the runtime-facing request envelope:
@@ -334,19 +334,19 @@ The runtime event stream includes:
 - `approval.resolved`
 - `output.delta`
-### Inspect Threads And Approvals
+### Inspect Sessions And Approvals
 ```ts
 import {
+  getSession,
   getApproval,
-  getThread,
+  listSessions,
   listApprovals,
-  listThreads,
   resolveApproval,
 } from "@botbotgo/agent-harness";
-const threads = await listThreads(runtime);
-const thread = await getThread(runtime, threads[0]!.threadId);
+const sessions = await listSessions(runtime);
+const session = await getSession(runtime, sessions[0]!.sessionId);
 const approvals = await listApprovals(runtime, { status: "pending" });
 const approval = approvals[0] ? await getApproval(runtime, approvals[0].approvalId) : null;
@@ -690,11 +690,11 @@ Primary exports:
 - `run`
 - `resolveApproval`
 - `subscribe`
-- `listRuns`
-- `getRun`
-- `listThreads`
-- `getThread`
-- `deleteThread`
+- `listRequests`
+- `getRequest`
+- `listSessions`
+- `getSession`
+- `deleteSession`
 - `listApprovals`
 - `getApproval`
 - `createToolMcpServer`

package/README.zh.md CHANGED Viewed

@@ -281,9 +281,9 @@ const result = await run(runtime, {
 });
 ```
-`run(runtime, { ... })` 会创建或延续持久化线程，并返回 `threadId`、`runId`、`state` 以及紧凑文本 `output`。更丰富的上游结果形态仍可通过 `outputContent`、`contentBlocks`、`structuredResponse` 等获得。
+`run(runtime, { ... })` 会创建或延续持久化会话，并返回 `sessionId`、`requestId`、`state` 以及紧凑文本 `output`。更丰富的上游结果形态仍可通过 `outputContent`、`contentBlocks`、`structuredResponse` 等获得。
-如果产品需要 run 视角的操作界面，例如 review queue 或执行看板，可使用 `listRuns(runtime)` 与 `getRun(runtime, runId)`。
+如果产品需要 request 视角的操作界面，例如 review queue 或执行看板，可使用 `listRequests(runtime)` 与 `getRequest(runtime, requestId)`。
 将 `invocation` 作为面向运行时的请求信封：
@@ -339,14 +339,14 @@ const result = await run(runtime, {
 ```ts
 import {
   getApproval,
-  getThread,
+  getSession,
   listApprovals,
-  listThreads,
+  listSessions,
   resolveApproval,
 } from "@botbotgo/agent-harness";
-const threads = await listThreads(runtime);
-const thread = await getThread(runtime, threads[0]!.threadId);
+const sessions = await listSessions(runtime);
+const session = await getSession(runtime, sessions[0]!.sessionId);
 const approvals = await listApprovals(runtime, { status: "pending" });
 const approval = approvals[0] ? await getApproval(runtime, approvals[0].approvalId) : null;
@@ -687,11 +687,11 @@ spec:
 - `run`
 - `resolveApproval`
 - `subscribe`
-- `listRuns`
-- `getRun`
-- `listThreads`
-- `getThread`
-- `deleteThread`
+- `listRequests`
+- `getRequest`
+- `listSessions`
+- `getSession`
+- `deleteSession`
 - `listApprovals`
 - `getApproval`
 - `createToolMcpServer`

package/dist/api.d.ts CHANGED Viewed

@@ -1,10 +1,39 @@
-import type { ApprovalRecord, CancelOptions, RunRecord, RunOptions, RunSummary, ResumeOptions, RuntimeHealthSnapshot, RuntimeAdapterOptions, ThreadSummary, ThreadRecord, WorkspaceLoadOptions } from "./contracts/types.js";
+import type { CancelOptions, RequestRecord, RequestSummary, ResumeOptions, RunDecisionOptions, RunResult, RunStartOptions, RuntimeHealthSnapshot, RuntimeAdapterOptions, SessionRecord, SessionSummary, WorkspaceLoadOptions } from "./contracts/types.js";
 import { AgentHarnessRuntime } from "./runtime/harness.js";
 import type { InventoryAgentRecord, InventorySkillRecord } from "./runtime/harness/system/inventory.js";
 import type { RequirementAssessmentOptions } from "./runtime/harness/system/skill-requirements.js";
 import type { ToolMcpServerOptions } from "./mcp.js";
 export { AgentHarnessRuntime } from "./runtime/harness.js";
 export { createUpstreamTimelineReducer } from "./upstream-events.js";
+type PublicApprovalRecord = {
+    approvalId: string;
+    pendingActionId: string;
+    sessionId: string;
+    requestId: string;
+    toolName: string;
+    status: "pending" | "approved" | "edited" | "rejected" | "expired";
+    requestedAt: string;
+    resolvedAt: string | null;
+    allowedDecisions: Array<"approve" | "edit" | "reject">;
+    inputPreview: Record<string, unknown>;
+};
+type PublicApprovalFilter = {
+    status?: PublicApprovalRecord["status"];
+    sessionId?: string;
+    requestId?: string;
+};
+type PublicRunStartOptions = Omit<RunStartOptions, "threadId"> & {
+    sessionId?: string;
+};
+type PublicRunDecisionOptions = Omit<RunDecisionOptions, "threadId" | "runId"> & {
+    sessionId: string;
+    requestId?: string;
+};
+type PublicRunOptions = PublicRunStartOptions | PublicRunDecisionOptions;
+type PublicRunResult = Omit<RunResult, "threadId" | "runId"> & {
+    sessionId: string;
+    requestId: string;
+};
 type CreateAgentHarnessOptions = {
     /**
      * Workspace loading behavior.
@@ -15,23 +44,32 @@ type CreateAgentHarnessOptions = {
 };
 export declare function createAgentHarness(): Promise<AgentHarnessRuntime>;
 export declare function createAgentHarness(workspaceRoot: string, options?: CreateAgentHarnessOptions): Promise<AgentHarnessRuntime>;
-export declare function run(runtime: AgentHarnessRuntime, options: RunOptions): Promise<import("./contracts/runtime.js").RunResult>;
+export declare function run(runtime: AgentHarnessRuntime, options: PublicRunOptions): Promise<PublicRunResult>;
 export declare function subscribe(runtime: AgentHarnessRuntime, listener: Parameters<AgentHarnessRuntime["subscribe"]>[0]): () => void;
-export declare function listThreads(runtime: AgentHarnessRuntime, filter?: Parameters<AgentHarnessRuntime["listThreads"]>[0]): Promise<ThreadSummary[]>;
-export declare function listRuns(runtime: AgentHarnessRuntime, filter?: Parameters<AgentHarnessRuntime["listRuns"]>[0]): Promise<RunSummary[]>;
-export declare function getThread(runtime: AgentHarnessRuntime, threadId: string): Promise<ThreadRecord | null>;
-export declare function getRun(runtime: AgentHarnessRuntime, runId: string): Promise<RunRecord | null>;
-export declare function deleteThread(runtime: AgentHarnessRuntime, threadId: string): Promise<boolean>;
-export declare function listApprovals(runtime: AgentHarnessRuntime, filter?: Parameters<AgentHarnessRuntime["listApprovals"]>[0]): Promise<ApprovalRecord[]>;
-export declare function getApproval(runtime: AgentHarnessRuntime, approvalId: string): Promise<ApprovalRecord | null>;
+export declare function listSessions(runtime: AgentHarnessRuntime, filter?: Parameters<AgentHarnessRuntime["listThreads"]>[0]): Promise<SessionSummary[]>;
+export declare function listRequests(runtime: AgentHarnessRuntime, filter?: {
+    agentId?: string;
+    sessionId?: string;
+    state?: RequestSummary["state"];
+}): Promise<RequestSummary[]>;
+export declare function getSession(runtime: AgentHarnessRuntime, sessionId: string): Promise<SessionRecord | null>;
+export declare function getRequest(runtime: AgentHarnessRuntime, requestId: string): Promise<RequestRecord | null>;
+export declare function deleteSession(runtime: AgentHarnessRuntime, sessionId: string): Promise<boolean>;
+export declare function listApprovals(runtime: AgentHarnessRuntime, filter?: PublicApprovalFilter): Promise<PublicApprovalRecord[]>;
+export declare function getApproval(runtime: AgentHarnessRuntime, approvalId: string): Promise<PublicApprovalRecord | null>;
 export declare function getHealth(runtime: AgentHarnessRuntime): Promise<RuntimeHealthSnapshot>;
 export declare function listAgentSkills(runtime: AgentHarnessRuntime, agentId: string, options?: RequirementAssessmentOptions): InventorySkillRecord[];
 export declare function describeInventory(runtime: AgentHarnessRuntime, options?: RequirementAssessmentOptions): {
     workspaceRoot: string;
     agents: InventoryAgentRecord[];
 };
-export declare function resolveApproval(runtime: AgentHarnessRuntime, options: ResumeOptions): Promise<import("./contracts/runtime.js").RunResult>;
-export declare function cancelRun(runtime: AgentHarnessRuntime, options: CancelOptions): Promise<import("./contracts/runtime.js").RunResult>;
+export declare function resolveApproval(runtime: AgentHarnessRuntime, options: ResumeOptions & {
+    sessionId?: string;
+    requestId?: string;
+}): Promise<PublicRunResult>;
+export declare function cancelRun(runtime: AgentHarnessRuntime, options: CancelOptions & {
+    requestId?: string;
+}): Promise<RunResult>;
 export declare function stop(runtime: AgentHarnessRuntime): Promise<void>;
 export declare function createToolMcpServer(runtime: AgentHarnessRuntime, options: ToolMcpServerOptions): Promise<import("@modelcontextprotocol/sdk/server/mcp.js").McpServer>;
 export declare function serveToolsOverStdio(runtime: AgentHarnessRuntime, options: ToolMcpServerOptions): Promise<import("@modelcontextprotocol/sdk/server/mcp.js").McpServer>;

package/dist/api.js CHANGED Viewed

@@ -2,6 +2,108 @@ import { AgentHarnessRuntime } from "./runtime/harness.js";
 import { loadWorkspace } from "./workspace/compile.js";
 export { AgentHarnessRuntime } from "./runtime/harness.js";
 export { createUpstreamTimelineReducer } from "./upstream-events.js";
+function toSessionSummary(summary) {
+    return {
+        agentId: summary.agentId,
+        sessionId: summary.threadId,
+        latestRequestId: summary.latestRunId,
+        createdAt: summary.createdAt,
+        updatedAt: summary.updatedAt,
+        status: summary.status,
+    };
+}
+function toRequestSummary(summary) {
+    return {
+        requestId: summary.runId,
+        sessionId: summary.threadId,
+        agentId: summary.agentId,
+        executionMode: summary.executionMode,
+        adapterKind: summary.adapterKind,
+        createdAt: summary.createdAt,
+        updatedAt: summary.updatedAt,
+        state: summary.state,
+        checkpointRef: summary.checkpointRef,
+        resumable: summary.resumable,
+    };
+}
+function toSessionRecord(record) {
+    return {
+        sessionId: record.threadId,
+        entryAgentId: record.entryAgentId,
+        currentState: record.currentState,
+        latestRequestId: record.latestRunId,
+        createdAt: record.createdAt,
+        updatedAt: record.updatedAt,
+        messages: record.messages,
+        requests: record.runs.map(toRequestSummary),
+        pendingDecision: record.pendingDecision,
+    };
+}
+function toRequestRecord(record) {
+    return toRequestSummary(record);
+}
+function toApprovalRecord(record) {
+    return {
+        approvalId: record.approvalId,
+        pendingActionId: record.pendingActionId,
+        sessionId: record.threadId,
+        requestId: record.runId,
+        toolName: record.toolName,
+        status: record.status,
+        requestedAt: record.requestedAt,
+        resolvedAt: record.resolvedAt,
+        allowedDecisions: record.allowedDecisions,
+        inputPreview: record.inputPreview,
+    };
+}
+function toPublicRunResult(result) {
+    return {
+        sessionId: result.threadId,
+        requestId: result.runId,
+        state: result.state,
+        output: result.output,
+        finalMessageText: result.finalMessageText,
+        outputContent: result.outputContent,
+        contentBlocks: result.contentBlocks,
+        structuredResponse: result.structuredResponse,
+        interruptContent: result.interruptContent,
+        agentId: result.agentId,
+        approvalId: result.approvalId,
+        pendingActionId: result.pendingActionId,
+        delegationId: result.delegationId,
+        artifacts: result.artifacts,
+        metadata: result.metadata,
+    };
+}
+function toInternalRunOptions(options) {
+    if ("decision" in options) {
+        return {
+            approvalId: options.approvalId,
+            decision: options.decision,
+            editedInput: options.editedInput,
+            listeners: options.listeners,
+            runId: options.requestId,
+            threadId: options.sessionId,
+        };
+    }
+    return {
+        agentId: options.agentId,
+        input: options.input,
+        invocation: options.invocation,
+        listeners: options.listeners,
+        priority: options.priority,
+        threadId: options.sessionId,
+    };
+}
+function toInternalResumeOptions(options) {
+    return {
+        approvalId: options.approvalId,
+        decision: options.decision,
+        editedInput: options.editedInput,
+        runId: options.requestId ?? options.runId,
+        threadId: options.sessionId ?? options.threadId,
+    };
+}
 export async function createAgentHarness(workspaceRoot = process.cwd(), options = {}) {
     const workspace = await loadWorkspace(workspaceRoot, options.load ?? {});
     const harness = new AgentHarnessRuntime(workspace, options.adapter ?? {});
@@ -9,31 +111,42 @@ export async function createAgentHarness(workspaceRoot = process.cwd(), options
     return harness;
 }
 export async function run(runtime, options) {
-    return runtime.run(options);
+    return toPublicRunResult(await runtime.run(toInternalRunOptions(options)));
 }
 export function subscribe(runtime, listener) {
     return runtime.subscribe(listener);
 }
-export async function listThreads(runtime, filter) {
-    return runtime.listThreads(filter);
+export async function listSessions(runtime, filter) {
+    return (await runtime.listThreads(filter)).map(toSessionSummary);
 }
-export async function listRuns(runtime, filter) {
-    return runtime.listRuns(filter);
+export async function listRequests(runtime, filter) {
+    return (await runtime.listRuns({
+        agentId: filter?.agentId,
+        state: filter?.state,
+        threadId: filter?.sessionId,
+    })).map(toRequestSummary);
 }
-export async function getThread(runtime, threadId) {
-    return runtime.getThread(threadId);
+export async function getSession(runtime, sessionId) {
+    const record = await runtime.getThread(sessionId);
+    return record ? toSessionRecord(record) : null;
 }
-export async function getRun(runtime, runId) {
-    return runtime.getRun(runId);
+export async function getRequest(runtime, requestId) {
+    const record = await runtime.getRun(requestId);
+    return record ? toRequestRecord(record) : null;
 }
-export async function deleteThread(runtime, threadId) {
-    return runtime.deleteThread(threadId);
+export async function deleteSession(runtime, sessionId) {
+    return runtime.deleteThread(sessionId);
 }
 export async function listApprovals(runtime, filter) {
-    return runtime.listApprovals(filter);
+    return (await runtime.listApprovals({
+        runId: filter?.requestId,
+        status: filter?.status,
+        threadId: filter?.sessionId,
+    })).map(toApprovalRecord);
 }
 export async function getApproval(runtime, approvalId) {
-    return runtime.getApproval(approvalId);
+    const record = await runtime.getApproval(approvalId);
+    return record ? toApprovalRecord(record) : null;
 }
 export async function getHealth(runtime) {
     return runtime.getHealth();
@@ -45,10 +158,13 @@ export function describeInventory(runtime, options) {
     return runtime.describeWorkspaceInventory(options);
 }
 export async function resolveApproval(runtime, options) {
-    return runtime.resume(options);
+    return toPublicRunResult(await runtime.resume(toInternalResumeOptions(options)));
 }
 export async function cancelRun(runtime, options) {
-    return runtime.cancelRun(options);
+    return runtime.cancelRun({
+        ...options,
+        runId: options.requestId ?? options.runId,
+    });
 }
 export async function stop(runtime) {
     return runtime.stop();

package/dist/benchmark/upstream-runtime-ab-benchmark.d.ts CHANGED Viewed

@@ -1,15 +1,23 @@
-export declare const DEFAULT_UPSTREAM_BENCHMARK_PATHS: readonly ["harness", "raw-langchain-v1", "raw-deepagent"];
+export declare const DEFAULT_UPSTREAM_BENCHMARK_PATHS: readonly ["harness", "harness-minimal-upstream", "raw-langchain-v1", "raw-deepagent"];
 export declare const DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD: "tool";
+export declare const DEFAULT_UPSTREAM_BENCHMARK_SCENARIOS: readonly ["normal", "complex", "extreme"];
 export type UpstreamBenchmarkPath = (typeof DEFAULT_UPSTREAM_BENCHMARK_PATHS)[number];
 export type UpstreamBenchmarkWorkload = "tool" | "no-tool";
+export type UpstreamBenchmarkScenario = (typeof DEFAULT_UPSTREAM_BENCHMARK_SCENARIOS)[number];
 export type UpstreamBenchmarkRunSummary = {
     providerLabel: string;
     model: string;
+    scenario: UpstreamBenchmarkScenario;
     path: UpstreamBenchmarkPath;
     runNumber: number;
     status: "completed" | "failed";
     totalMs: number;
     firstTokenMs: number | null;
+    setupMs: number | null;
+    firstToolMs: number | null;
+    lastToolMs: number | null;
+    finalOutputMs: number | null;
+    cleanupMs: number | null;
     outputLength: number;
     normalizedOutputLength: number;
     toolCallCount: number;
@@ -19,6 +27,7 @@ export type UpstreamBenchmarkRunSummary = {
 export type UpstreamBenchmarkAggregateSummary = {
     providerLabel: string;
     model: string;
+    scenario: UpstreamBenchmarkScenario;
     path: UpstreamBenchmarkPath;
     repetitions: number;
     successCount: number;
@@ -31,6 +40,11 @@ export type UpstreamBenchmarkAggregateSummary = {
     trimmedAvgFirstTokenMs: number | null;
     medianFirstTokenMs: number | null;
     p95FirstTokenMs: number | null;
+    avgSetupMs: number | null;
+    avgFirstToolMs: number | null;
+    avgLastToolMs: number | null;
+    avgFinalOutputMs: number | null;
+    avgCleanupMs: number | null;
     avgOutputLength: number | null;
     avgNormalizedOutputLength: number | null;
     avgToolCallCount: number | null;
@@ -44,8 +58,24 @@ export type UpstreamBenchmarkComparison = {
     avgFirstTokenMsDelta: number | null;
     avgFirstTokenMsOverheadPct: number | null;
 };
+export type UpstreamBenchmarkPhaseCheckpoint = {
+    label: string;
+    atMs: number | null;
+};
+export type UpstreamBenchmarkTemperature = "cold" | "warm";
+export type UpstreamBenchmarkDurationSummary = {
+    count: number;
+    totalMs: number;
+    avgMs: number | null;
+    maxMs: number | null;
+};
 export declare function resolveUpstreamBenchmarkPaths(rawValue?: string): readonly UpstreamBenchmarkPath[];
 export declare function resolveUpstreamBenchmarkWorkload(rawValue?: string): UpstreamBenchmarkWorkload;
+export declare function resolveUpstreamBenchmarkScenarios(rawValue?: string): readonly UpstreamBenchmarkScenario[];
 export declare function extractLastMatchingToken(output: string, prefixes: readonly string[]): string;
-export declare function aggregateUpstreamBenchmarkRuns(providerLabel: string, model: string, path: UpstreamBenchmarkPath, runs: UpstreamBenchmarkRunSummary[]): UpstreamBenchmarkAggregateSummary;
+export declare function aggregateUpstreamBenchmarkRuns(providerLabel: string, model: string, scenario: UpstreamBenchmarkScenario, path: UpstreamBenchmarkPath, runs: UpstreamBenchmarkRunSummary[]): UpstreamBenchmarkAggregateSummary;
+export declare function withUpstreamBenchmarkCleanup(summary: UpstreamBenchmarkRunSummary, cleanupMs: number | null): UpstreamBenchmarkRunSummary;
+export declare function summarizeUpstreamBenchmarkPhases(checkpoints: readonly UpstreamBenchmarkPhaseCheckpoint[]): Record<string, number | null>;
+export declare function selectUpstreamBenchmarkRunsByTemperature(runs: readonly UpstreamBenchmarkRunSummary[], temperature: UpstreamBenchmarkTemperature): UpstreamBenchmarkRunSummary[];
+export declare function summarizeUpstreamBenchmarkDurations(values: readonly number[]): UpstreamBenchmarkDurationSummary;
 export declare function compareUpstreamBenchmarkPaths(baseline: UpstreamBenchmarkAggregateSummary, candidate: UpstreamBenchmarkAggregateSummary): UpstreamBenchmarkComparison;

package/dist/benchmark/upstream-runtime-ab-benchmark.js CHANGED Viewed

@@ -1,9 +1,15 @@
 export const DEFAULT_UPSTREAM_BENCHMARK_PATHS = Object.freeze([
     "harness",
+    "harness-minimal-upstream",
     "raw-langchain-v1",
     "raw-deepagent",
 ]);
 export const DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD = "tool";
+export const DEFAULT_UPSTREAM_BENCHMARK_SCENARIOS = Object.freeze([
+    "normal",
+    "complex",
+    "extreme",
+]);
 function average(values) {
     return Number((values.reduce((sum, value) => sum + value, 0) / values.length).toFixed(2));
 }
@@ -47,12 +53,25 @@ export function resolveUpstreamBenchmarkPaths(rawValue) {
     const parsed = rawValue
         .split(",")
         .map((value) => value.trim().toLowerCase())
-        .filter((value) => value === "harness" || value === "raw-langchain-v1" || value === "raw-deepagent");
+        .filter((value) => value === "harness" ||
+        value === "harness-minimal-upstream" ||
+        value === "raw-langchain-v1" ||
+        value === "raw-deepagent");
     return parsed.length > 0 ? parsed : [...DEFAULT_UPSTREAM_BENCHMARK_PATHS];
 }
 export function resolveUpstreamBenchmarkWorkload(rawValue) {
     return rawValue?.trim().toLowerCase() === "no-tool" ? "no-tool" : DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD;
 }
+export function resolveUpstreamBenchmarkScenarios(rawValue) {
+    if (!rawValue) {
+        return [...DEFAULT_UPSTREAM_BENCHMARK_SCENARIOS];
+    }
+    const parsed = rawValue
+        .split(",")
+        .map((value) => value.trim().toLowerCase())
+        .filter((value) => value === "normal" || value === "complex" || value === "extreme");
+    return parsed.length > 0 ? parsed : [...DEFAULT_UPSTREAM_BENCHMARK_SCENARIOS];
+}
 export function extractLastMatchingToken(output, prefixes) {
     const normalized = output.replace(/\s+/g, " ").trim();
     let matched = "";
@@ -69,7 +88,7 @@ export function extractLastMatchingToken(output, prefixes) {
     }
     return matched || normalized;
 }
-export function aggregateUpstreamBenchmarkRuns(providerLabel, model, path, runs) {
+export function aggregateUpstreamBenchmarkRuns(providerLabel, model, scenario, path, runs) {
     const successfulRuns = runs.filter((run) => run.status === "completed");
     const totalValues = successfulRuns.map((run) => run.totalMs);
     const firstTokenValues = successfulRuns
@@ -78,6 +97,7 @@ export function aggregateUpstreamBenchmarkRuns(providerLabel, model, path, runs)
     return {
         providerLabel,
         model,
+        scenario,
         path,
         repetitions: runs.length,
         successCount: successfulRuns.length,
@@ -90,12 +110,48 @@ export function aggregateUpstreamBenchmarkRuns(providerLabel, model, path, runs)
         trimmedAvgFirstTokenMs: trimmedAverageOrNull(firstTokenValues, 0.1),
         medianFirstTokenMs: medianOrNull(firstTokenValues),
         p95FirstTokenMs: percentileOrNull(firstTokenValues, 0.95),
+        avgSetupMs: averageOrNull(successfulRuns.map((run) => run.setupMs).filter((value) => value !== null)),
+        avgFirstToolMs: averageOrNull(successfulRuns.map((run) => run.firstToolMs).filter((value) => value !== null)),
+        avgLastToolMs: averageOrNull(successfulRuns.map((run) => run.lastToolMs).filter((value) => value !== null)),
+        avgFinalOutputMs: averageOrNull(successfulRuns.map((run) => run.finalOutputMs).filter((value) => value !== null)),
+        avgCleanupMs: averageOrNull(successfulRuns.map((run) => run.cleanupMs).filter((value) => value !== null)),
         avgOutputLength: averageOrNull(successfulRuns.map((run) => run.outputLength)),
         avgNormalizedOutputLength: averageOrNull(successfulRuns.map((run) => run.normalizedOutputLength)),
         avgToolCallCount: averageOrNull(successfulRuns.map((run) => run.toolCallCount)),
         exactOutputMatchCount: successfulRuns.filter((run) => run.exactOutputMatch).length,
     };
 }
+export function withUpstreamBenchmarkCleanup(summary, cleanupMs) {
+    return {
+        ...summary,
+        cleanupMs,
+    };
+}
+export function summarizeUpstreamBenchmarkPhases(checkpoints) {
+    let previousAtMs = 0;
+    const durations = {};
+    for (const checkpoint of checkpoints) {
+        const key = `${checkpoint.label}Ms`;
+        if (checkpoint.atMs === null) {
+            durations[key] = null;
+            continue;
+        }
+        durations[key] = Number((checkpoint.atMs - previousAtMs).toFixed(2));
+        previousAtMs = checkpoint.atMs;
+    }
+    return durations;
+}
+export function selectUpstreamBenchmarkRunsByTemperature(runs, temperature) {
+    return runs.filter((run) => (temperature === "cold" ? run.runNumber === 1 : run.runNumber > 1));
+}
+export function summarizeUpstreamBenchmarkDurations(values) {
+    return {
+        count: values.length,
+        totalMs: Number(values.reduce((sum, value) => sum + value, 0).toFixed(2)),
+        avgMs: values.length > 0 ? average(values) : null,
+        maxMs: values.length > 0 ? Number(Math.max(...values).toFixed(2)) : null,
+    };
+}
 function computeOverhead(candidate, baseline) {
     if (candidate === null || baseline === null) {
         return { delta: null, pct: null };

package/dist/contracts/runtime.d.ts CHANGED Viewed

@@ -1,5 +1,10 @@
 import type { RunState } from "./core.js";
 import type { CompiledAgentBinding, CompiledModel, CompiledTool, ParsedAgentObject, ParsedToolObject, WorkspaceBundle } from "./workspace.js";
+/**
+ * Persisted runtime summary for an inspectable conversation thread.
+ * This projects upstream session/thread execution state into a stable runtime
+ * inspection surface.
+ */
 export type ThreadSummary = {
     agentId: string;
     threadId: string;
@@ -8,9 +13,17 @@ export type ThreadSummary = {
     updatedAt: string;
     status: RunState;
 };
-export type SessionRecord = ThreadSummary;
+export type SessionSummary = Omit<ThreadSummary, "threadId" | "latestRunId"> & {
+    sessionId: string;
+    latestRequestId: string;
+};
 export type KnownHarnessEventType = "run.created" | "run.queued" | "run.dequeued" | "run.state.changed" | "run.resumed" | "approval.requested" | "approval.resolved" | "artifact.created" | "output.delta" | "runtime.health.changed" | "runtime.synthetic_fallback";
 export type HarnessEventType = KnownHarnessEventType | (string & {});
+/**
+ * Persisted runtime event recorded by the harness runtime.
+ * Event payload semantics should stay aligned with upstream/runtime behavior
+ * rather than introducing a second execution protocol.
+ */
 export type HarnessEvent = {
     eventId: string;
     eventType: HarnessEventType;
@@ -45,6 +58,10 @@ export type RuntimeHealthSymptom = {
     firstSeenAt: string;
     lastSeenAt: string;
 };
+/**
+ * Harness-operated operational state built from persisted records and runtime telemetry.
+ * This is runtime ops state, not an upstream execution semantic.
+ */
 export type RuntimeHealthSnapshot = {
     status: HealthStatus;
     updatedAt: string;
@@ -153,8 +170,20 @@ export type ThreadRunRecord = {
     checkpointRef: string | null;
     resumable: boolean;
 };
+/**
+ * Persisted run summary projected from upstream execution state plus runtime lifecycle metadata.
+ */
 export type RunSummary = ThreadRunRecord;
 export type RunRecord = RunSummary;
+export type RequestSummary = Omit<RunSummary, "threadId" | "runId"> & {
+    sessionId: string;
+    requestId: string;
+};
+export type RequestRecord = RequestSummary;
+/**
+ * Persisted thread inspection record assembled from runtime records.
+ * This is an inspectable projection, not a second thread semantic model.
+ */
 export type ThreadRecord = {
     threadId: string;
     entryAgentId: string;
@@ -172,6 +201,11 @@ export type ThreadRecord = {
         requestedAt: string;
     };
 };
+export type SessionRecord = Omit<ThreadRecord, "threadId" | "latestRunId" | "runs"> & {
+    sessionId: string;
+    latestRequestId: string;
+    requests: RequestSummary[];
+};
 export type ResumeOptions = {
     threadId?: string;
     runId?: string;
@@ -188,6 +222,11 @@ export type RestartConversationOptions = {
     mode: "restart-in-thread" | "restart-new-thread";
     input: string;
 };
+/**
+ * Persisted approval inspection record.
+ * Approval decision semantics should stay aligned with upstream interrupt/approval
+ * behavior even though the record is stored and resolved through harness persistence.
+ */
 export type ApprovalRecord = {
     approvalId: string;
     pendingActionId: string;
@@ -240,6 +279,12 @@ export type RuntimeAdapterOptions = {
     checkpointerResolver?: RuntimeCheckpointerResolver;
     storeResolver?: RuntimeStoreResolver;
     backendResolver?: RuntimeBackendResolver;
+    /**
+     * DeepAgent execution semantics stay upstream-owned.
+     * `minimal` keeps harness runtime persistence/ops active while only attaching
+     * upstream substrate objects when the binding explicitly needs them.
+     */
+    deepAgentUpstreamSubstrateMode?: "full" | "minimal";
 };
 export type ToolKindAdapter = {
     type: string;

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-export { AgentHarnessRuntime, cancelRun, createAgentHarness, createUpstreamTimelineReducer, createToolMcpServer, deleteThread, describeInventory, getApproval, getHealth, getRun, getThread, listAgentSkills, listApprovals, listRuns, listThreads, resolveApproval, run, serveToolsOverStdio, subscribe, stop, } from "./api.js";
+export { AgentHarnessRuntime, cancelRun, createAgentHarness, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, getApproval, getRequest, getHealth, getSession, listAgentSkills, listApprovals, listRequests, listSessions, resolveApproval, run, serveToolsOverStdio, subscribe, stop, } from "./api.js";
 export type { ToolMcpServerOptions } from "./mcp.js";
 export { tool } from "./tools.js";
 export type { UpstreamTimelineProjection, UpstreamTimelineReducer } from "./upstream-events.js";

package/dist/index.js CHANGED Viewed

@@ -1,2 +1,2 @@
-export { AgentHarnessRuntime, cancelRun, createAgentHarness, createUpstreamTimelineReducer, createToolMcpServer, deleteThread, describeInventory, getApproval, getHealth, getRun, getThread, listAgentSkills, listApprovals, listRuns, listThreads, resolveApproval, run, serveToolsOverStdio, subscribe, stop, } from "./api.js";
+export { AgentHarnessRuntime, cancelRun, createAgentHarness, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, getApproval, getRequest, getHealth, getSession, listAgentSkills, listApprovals, listRequests, listSessions, resolveApproval, run, serveToolsOverStdio, subscribe, stop, } from "./api.js";
 export { tool } from "./tools.js";

package/dist/package-version.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export declare const AGENT_HARNESS_VERSION = "0.0.~~123~~";
1	+ export declare const AGENT_HARNESS_VERSION = "0.0.125";

package/dist/package-version.js CHANGED Viewed

	@@ -1 +1 @@
1	- export const AGENT_HARNESS_VERSION = "0.0.~~123~~";
1	+ export const AGENT_HARNESS_VERSION = "0.0.125";

package/dist/runtime/agent-runtime-adapter.d.ts CHANGED Viewed

@@ -32,9 +32,9 @@ export declare function buildDeepAgentCreateParams(input: {
     resolvedTools: unknown[];
     resolvedMiddleware: unknown[];
     resolvedSubagents: UpstreamSubagentConfig[];
-    resolvedCheckpointer: unknown;
-    resolvedStore: unknown;
-    resolvedBackend: unknown;
+    resolvedCheckpointer?: unknown;
+    resolvedStore?: unknown;
+    resolvedBackend?: unknown;
     resolvedInterruptOn?: Record<string, {
         allowedDecisions: Array<"approve" | "edit" | "reject">;
     }>;

package/dist/runtime/agent-runtime-adapter.js CHANGED Viewed

@@ -18,7 +18,7 @@ export { applyDeepAgentDelegationPromptCompatibility, materializeDeepAgentSkillS
 export { buildAuthOmittingFetch, normalizeOpenAICompatibleInit } from "./adapter/compat/openai-compatible.js";
 export { buildToolNameMapping, createModelFacingToolNameCandidates, createModelFacingToolNameLookupCandidates, resolveModelFacingToolName, sanitizeToolNameForModel, } from "./adapter/tool/tool-name-mapping.js";
 export { computeRemainingTimeoutMs, isRetryableProviderError, resolveBindingTimeout, resolveProviderRetryPolicy, resolveStreamIdleTimeout, resolveTimeoutMs, } from "./adapter/resilience.js";
-import { getBindingAdapterKind, getBindingExecutionKind, getBindingExecutionParams, getBindingFilesystemConfig, getBindingInterruptCompatibilityRules, getBindingPrimaryModel, getBindingSkills, getBindingSubagents, getBindingToolCount, getBindingPrimaryTools, getBindingSystemPrompt, isDeepAgentBinding, isLangChainBinding, } from "./support/compiled-binding.js";
+import { getBindingBackendConfig, getBindingAdapterKind, getBindingExecutionKind, getBindingExecutionParams, getBindingFilesystemConfig, getBindingInterruptCompatibilityRules, getBindingMemorySources, getBindingMiddlewareConfigs, getBindingPrimaryModel, getBindingSkills, getBindingStoreConfig, getBindingSubagents, getBindingToolCount, getBindingPrimaryTools, getBindingSystemPrompt, isDeepAgentBinding, isLangChainBinding, } from "./support/compiled-binding.js";
 const AGENT_INTERRUPT_SENTINEL_PREFIX = "__agent_harness_interrupt__:";
 const UPSTREAM_BUILTIN_MIDDLEWARE_TOOL_NAMES = Object.freeze([
     "write_todos",
@@ -116,13 +116,28 @@ export function buildDeepAgentCreateParams(input) {
         model: input.resolvedModel,
         tools: input.resolvedTools,
         middleware: input.resolvedMiddleware,
-        checkpointer: input.resolvedCheckpointer,
-        store: input.resolvedStore,
         subagents: input.resolvedSubagents,
-        backend: input.resolvedBackend,
         interruptOn: input.resolvedInterruptOn,
+        ...(input.resolvedCheckpointer !== undefined ? { checkpointer: input.resolvedCheckpointer } : {}),
+        ...(input.resolvedStore !== undefined ? { store: input.resolvedStore } : {}),
+        ...(input.resolvedBackend !== undefined ? { backend: input.resolvedBackend } : {}),
     };
 }
+function shouldAttachMinimalDeepAgentCheckpointer(binding, resolvedInterruptOn) {
+    if (binding.harnessRuntime.checkpointer !== undefined) {
+        return true;
+    }
+    return resolvedInterruptOn !== undefined && Object.keys(resolvedInterruptOn).length > 0;
+}
+function shouldAttachMinimalDeepAgentStore(binding) {
+    return getBindingStoreConfig(binding) !== undefined || getBindingMemorySources(binding).length > 0;
+}
+function shouldAttachMinimalDeepAgentBackend(binding) {
+    return (getBindingBackendConfig(binding) !== undefined ||
+        getBindingMemorySources(binding).length > 0 ||
+        getBindingSkills(binding).length > 0 ||
+        (getBindingMiddlewareConfigs(binding)?.length ?? 0) > 0);
+}
 export class AgentRuntimeAdapter {
     options;
     modelCache = new Map();
@@ -351,10 +366,19 @@ export class AgentRuntimeAdapter {
         const resolvedTools = this.resolveTools(primaryTools, binding);
         const resolvedMiddleware = await this.resolveMiddleware(binding);
         const resolvedSubagents = await this.resolveSubagents(getBindingSubagents(binding), binding);
-        const resolvedCheckpointer = resolveRunnableCheckpointer(this.options, binding);
-        const resolvedStore = this.options.storeResolver?.(binding);
-        const resolvedBackend = this.options.backendResolver?.(binding);
         const resolvedInterruptOn = resolveRunnableInterruptOn(binding);
+        const substrateMode = this.options.deepAgentUpstreamSubstrateMode ?? "minimal";
+        const resolvedCheckpointer = substrateMode === "minimal"
+            ? (shouldAttachMinimalDeepAgentCheckpointer(binding, resolvedInterruptOn)
+                ? resolveRunnableCheckpointer(this.options, binding)
+                : undefined)
+            : resolveRunnableCheckpointer(this.options, binding);
+        const resolvedStore = substrateMode === "minimal"
+            ? (shouldAttachMinimalDeepAgentStore(binding) ? this.options.storeResolver?.(binding) : undefined)
+            : this.options.storeResolver?.(binding);
+        const resolvedBackend = substrateMode === "minimal"
+            ? (shouldAttachMinimalDeepAgentBackend(binding) ? this.options.backendResolver?.(binding) : undefined)
+            : this.options.backendResolver?.(binding);
         const resolvedSkills = resolveDeepAgentSkillSourcePaths({
             workspaceRoot: binding.harnessRuntime.workspaceRoot,
             runRoot: binding.harnessRuntime.runRoot,

package/dist/runtime/harness/run/helpers.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { ApprovalRecord, HarnessEvent, InternalApprovalRecord, InvocationEnvelope, RunListeners, RunOptions, RunResult, RunStartOptions, MessageContent } from "../../../contracts/types.js";
-export declare function toPublicApprovalRecord(approval: InternalApprovalRecord): ApprovalRecord;
+export declare function toInspectableApprovalRecord(approval: InternalApprovalRecord): ApprovalRecord;
 export declare function normalizeInvocationEnvelope(options: RunStartOptions): {
     context?: Record<string, unknown>;
     state?: Record<string, unknown>;

package/dist/runtime/harness/run/helpers.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { normalizeMessageContent } from "../../../utils/message-content.js";
-export function toPublicApprovalRecord(approval) {
+export function toInspectableApprovalRecord(approval) {
     const { toolCallId: _toolCallId, checkpointRef: _checkpointRef, eventRefs: _eventRefs, ...publicApproval } = approval;
     return publicApproval;
 }

package/dist/runtime/harness/run/stream-run.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { AGENT_INTERRUPT_SENTINEL_PREFIX, RuntimeOperationTimeoutError } from "../../agent-runtime-adapter.js";
 import { renderRuntimeFailure, renderToolFailure } from "../../support/harness-support.js";
-import { createContentBlocksItem, createToolResultKey, emitOutputDeltaAndCreateItem, } from "../events/streaming.js";
+import { createContentBlocksItem, createToolResultKey, } from "../events/streaming.js";
 function normalizeStreamChunk(chunk) {
     if (typeof chunk === "string") {
         if (chunk.startsWith(AGENT_INTERRUPT_SENTINEL_PREFIX)) {
@@ -33,7 +33,6 @@ export async function* streamHarnessRun(options) {
     let releaseRunSlot = async () => undefined;
     let emitted = false;
     let streamActivityObserved = false;
-    const emitOutputDelta = (content) => emitOutputDeltaAndCreateItem(options.emit, options.threadId, options.runId, options.selectedAgentId, content);
     try {
         const [priorHistory, acquiredReleaseRunSlot] = await Promise.all([
             priorHistoryPromise,
@@ -114,12 +113,10 @@ export async function* streamHarnessRun(options) {
             }
             emitted = true;
             assistantOutput += normalizedChunk.content;
-            yield await emitOutputDelta(normalizedChunk.content);
         }
         if (!assistantOutput && toolErrors.length > 0) {
             assistantOutput = toolErrors.join("\n\n");
             emitted = true;
-            yield await emitOutputDelta(assistantOutput);
         }
         if (!assistantOutput) {
             const actual = await options.invokeWithHistory(options.binding, options.input, options.threadId, options.runId);
@@ -129,7 +126,6 @@ export async function* streamHarnessRun(options) {
             if (actual.output) {
                 assistantOutput = actual.output;
                 emitted = true;
-                yield await emitOutputDelta(actual.output);
             }
         }
         await options.appendAssistantMessage(options.threadId, options.runId, assistantOutput);
@@ -216,9 +212,6 @@ export async function* streamHarnessRun(options) {
             if (Array.isArray(actual.contentBlocks) && actual.contentBlocks.length > 0) {
                 yield createContentBlocksItem(options.threadId, options.runId, options.selectedAgentId, actual.contentBlocks);
             }
-            if (actual.output) {
-                yield await emitOutputDelta(actual.output);
-            }
             yield {
                 type: "result",
                 result: {

package/dist/runtime/harness/run/thread-records.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type { ApprovalRecord, ThreadRecord, ThreadSummary } from "../../../contracts/types.js";
 import type { RuntimePersistence } from "../../../persistence/types.js";
-export declare function getThreadRecord(input: {
+export declare function buildThreadInspectionRecord(input: {
     persistence: RuntimePersistence;
     getSession: (threadId: string) => Promise<ThreadSummary | null>;
 }, threadId: string): Promise<ThreadRecord | null>;

package/dist/runtime/harness/run/thread-records.js CHANGED Viewed

@@ -1,5 +1,10 @@
-import { isTerminalRunState, toPublicApprovalRecord } from "./helpers.js";
-export async function getThreadRecord(input, threadId) {
+import { isTerminalRunState, toInspectableApprovalRecord } from "./helpers.js";
+function selectLatestPendingApproval(approvals) {
+    return approvals
+        .filter((approval) => approval.status === "pending")
+        .sort((left, right) => right.requestedAt.localeCompare(left.requestedAt))[0];
+}
+export async function buildThreadInspectionRecord(input, threadId) {
     const [threadSummary, meta, messages, runs] = await Promise.all([
         input.getSession(threadId),
         input.persistence.getThreadMeta(threadId),
@@ -11,9 +16,7 @@ export async function getThreadRecord(input, threadId) {
     }
     const latestRunId = threadSummary.latestRunId;
     const latestApprovals = await input.persistence.getRunApprovals(threadId, latestRunId);
-    const pendingApproval = latestApprovals
-        .filter((approval) => approval.status === "pending")
-        .sort((left, right) => right.requestedAt.localeCompare(left.requestedAt))[0];
+    const pendingApproval = selectLatestPendingApproval(latestApprovals);
     return {
         threadId,
         entryAgentId: meta.entryAgentId,
@@ -36,11 +39,11 @@ export async function getThreadRecord(input, threadId) {
 }
 export async function listPublicApprovals(input, filter) {
     const approvals = await input.persistence.listApprovals(filter);
-    return approvals.map((approval) => toPublicApprovalRecord(approval));
+    return approvals.map((approval) => toInspectableApprovalRecord(approval));
 }
 export async function getPublicApproval(input, approvalId) {
     const approval = await input.persistence.getApproval(approvalId);
-    return approval ? toPublicApprovalRecord(approval) : null;
+    return approval ? toInspectableApprovalRecord(approval) : null;
 }
 export async function deleteThreadRecord(input, threadId) {
     const thread = await input.getThread(threadId);

package/dist/runtime/harness.js CHANGED Viewed

@@ -29,7 +29,7 @@ import { resolveRuntimeAdapterOptions } from "./support/runtime-adapter-options.
 import { initializeHarnessRuntime, reclaimExpiredClaimedRuns as reclaimHarnessExpiredClaimedRuns, recoverStartupRuns as recoverHarnessStartupRuns, isStaleRunningRun as isHarnessStaleRunningRun, } from "./harness/run/startup-runtime.js";
 import { streamHarnessRun } from "./harness/run/stream-run.js";
 import { defaultRequestedAgentId, prepareRunStart } from "./harness/run/start-run.js";
-import { deleteThreadRecord, getPublicApproval, getThreadRecord, listPublicApprovals, } from "./harness/run/thread-records.js";
+import { buildThreadInspectionRecord, deleteThreadRecord, getPublicApproval, listPublicApprovals, } from "./harness/run/thread-records.js";
 export class AgentHarnessRuntime {
     workspace;
     runtimeAdapterOptions;
@@ -199,7 +199,7 @@ export class AgentHarnessRuntime {
         return this.persistence.getSession(threadId);
     }
     async getThread(threadId) {
-        return getThreadRecord({
+        return buildThreadInspectionRecord({
             persistence: this.persistence,
             getSession: (currentThreadId) => this.getSession(currentThreadId),
         }, threadId);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@botbotgo/agent-harness",
-  "version": "0.0.124",
+  "version": "0.0.126",
   "description": "Workspace runtime for multi-agent applications",
   "type": "module",
   "packageManager": "npm@10.9.2",