@botbotgo/agent-harness 0.0.159 → 0.0.160

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -882,7 +882,9 @@ Primary exports:
882
882
  - `listArtifacts`
883
883
  - `getArtifact`
884
884
  - `exportEvaluationBundle`
885
+ - `replayEvaluationBundle`
885
886
  - `createAcpServer`
887
+ - `serveAcpStdio`
886
888
  - `createToolMcpServer`
887
889
  - `serveToolsOverStdio`
888
890
  - `stop`
package/README.zh.md CHANGED
@@ -841,7 +841,9 @@ spec:
841
841
  - `listArtifacts`
842
842
  - `getArtifact`
843
843
  - `exportEvaluationBundle`
844
+ - `replayEvaluationBundle`
844
845
  - `createAcpServer`
846
+ - `serveAcpStdio`
845
847
  - `createToolMcpServer`
846
848
  - `serveToolsOverStdio`
847
849
  - `stop`
package/dist/api.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { ArtifactListing, CancelOptions, InvocationEnvelope, ListMemoriesInput, ListMemoriesResult, MemoryRecord, MemorizeInput, MemorizeResult, MessageContent, RecallInput, RecallResult, RemoveMemoryInput, RequestRecord, RequestSummary, ResumeOptions, RunDecisionOptions, RunResult, RunStartOptions, RuntimeHealthSnapshot, RuntimeAdapterOptions, RuntimeEvaluationExport, RuntimeEvaluationExportInput, SessionRecord, SessionSummary, UpdateMemoryInput, WorkspaceLoadOptions } from "./contracts/types.js";
1
+ import type { ArtifactListing, CancelOptions, InvocationEnvelope, ListMemoriesInput, ListMemoriesResult, MemoryRecord, MemorizeInput, MemorizeResult, MessageContent, RecallInput, RecallResult, RemoveMemoryInput, RequestRecord, RequestSummary, ResumeOptions, RunDecisionOptions, RunResult, RunStartOptions, RuntimeHealthSnapshot, RuntimeAdapterOptions, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, SessionRecord, SessionSummary, UpdateMemoryInput, WorkspaceLoadOptions } from "./contracts/types.js";
2
2
  import { AgentHarnessRuntime } from "./runtime/harness.js";
3
3
  import type { InventoryAgentRecord, InventorySkillRecord } from "./runtime/harness/system/inventory.js";
4
4
  import type { RequirementAssessmentOptions } from "./runtime/harness/system/skill-requirements.js";
@@ -8,7 +8,8 @@ export type { AcpApproval, AcpArtifact, AcpEventNotification, AcpJsonRpcError, A
8
8
  export { AgentHarnessRuntime } from "./runtime/harness.js";
9
9
  export { buildFlowGraph, exportFlowGraphToMermaid, exportFlowGraphToSequenceMermaid } from "./flow/index.js";
10
10
  export { createUpstreamTimelineReducer } from "./upstream-events.js";
11
- export type { ListMemoriesInput, ListMemoriesResult, MemoryDecision, MemoryKind, MemoryRecord, MemoryScope, MemorizeInput, MemorizeResult, RecallInput, RecallResult, RemoveMemoryInput, RuntimeEvaluationExport, RuntimeEvaluationExportInput, UpdateMemoryInput, } from "./contracts/types.js";
11
+ export type { ListMemoriesInput, ListMemoriesResult, MemoryDecision, MemoryKind, MemoryRecord, MemoryScope, MemorizeInput, MemorizeResult, RecallInput, RecallResult, RemoveMemoryInput, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, UpdateMemoryInput, } from "./contracts/types.js";
12
+ export type { AcpStdioServer, AcpStdioServerOptions } from "./protocol/acp/stdio.js";
12
13
  type PublicApprovalRecord = {
13
14
  approvalId: string;
14
15
  pendingActionId: string;
@@ -86,6 +87,8 @@ export declare function getArtifact(runtime: AgentHarnessRuntime, input: {
86
87
  }): Promise<unknown>;
87
88
  export declare function getHealth(runtime: AgentHarnessRuntime): Promise<RuntimeHealthSnapshot>;
88
89
  export declare function exportEvaluationBundle(runtime: AgentHarnessRuntime, input: RuntimeEvaluationExportInput): Promise<RuntimeEvaluationExport>;
90
+ export declare function replayEvaluationBundle(runtime: AgentHarnessRuntime, input: RuntimeEvaluationReplayInput): Promise<RuntimeEvaluationReplayResult>;
91
+ export declare function serveAcpStdio(runtime: AgentHarnessRuntime, options?: import("./protocol/acp/stdio.js").AcpStdioServerOptions): import("./protocol/acp/stdio.js").AcpStdioServer;
89
92
  export declare function listAgentSkills(runtime: AgentHarnessRuntime, agentId: string, options?: RequirementAssessmentOptions): InventorySkillRecord[];
90
93
  export declare function getAgent(runtime: AgentHarnessRuntime, agentId: string, options?: RequirementAssessmentOptions): InventoryAgentRecord | null;
91
94
  export declare function describeInventory(runtime: AgentHarnessRuntime, options?: RequirementAssessmentOptions): {
package/dist/api.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { AgentHarnessRuntime } from "./runtime/harness.js";
2
+ import { serveAcpOverStdio } from "./protocol/acp/stdio.js";
2
3
  import { normalizeMessageContent } from "./utils/message-content.js";
3
4
  import { loadWorkspace } from "./workspace/compile.js";
4
5
  export { AgentHarnessAcpServer, createAcpServer } from "./acp.js";
@@ -192,6 +193,12 @@ export async function getHealth(runtime) {
192
193
  export async function exportEvaluationBundle(runtime, input) {
193
194
  return runtime.exportEvaluationBundle(input);
194
195
  }
196
+ export async function replayEvaluationBundle(runtime, input) {
197
+ return runtime.replayEvaluationBundle(input);
198
+ }
199
+ export function serveAcpStdio(runtime, options) {
200
+ return serveAcpOverStdio(runtime, options);
201
+ }
195
202
  export function listAgentSkills(runtime, agentId, options) {
196
203
  return runtime.listAgentSkills(agentId, options);
197
204
  }
package/dist/cli.d.ts CHANGED
@@ -1,8 +1,14 @@
1
1
  #!/usr/bin/env node
2
+ import { createAgentHarness } from "./api.js";
3
+ import { serveAcpOverStdio } from "./protocol/acp/stdio.js";
2
4
  type CliIo = {
3
5
  cwd?: string;
4
6
  stdout?: (message: string) => void;
5
7
  stderr?: (message: string) => void;
6
8
  };
7
- export declare function runCli(argv: string[], io?: CliIo): Promise<number>;
9
+ type CliDeps = {
10
+ createAgentHarness?: typeof createAgentHarness;
11
+ serveAcpOverStdio?: typeof serveAcpOverStdio;
12
+ };
13
+ export declare function runCli(argv: string[], io?: CliIo, deps?: CliDeps): Promise<number>;
8
14
  export {};
package/dist/cli.js CHANGED
@@ -1,10 +1,13 @@
1
1
  #!/usr/bin/env node
2
2
  import path from "node:path";
3
3
  import { pathToFileURL } from "node:url";
4
+ import { createAgentHarness } from "./api.js";
4
5
  import { initProject } from "./init-project.js";
6
+ import { serveAcpOverStdio } from "./protocol/acp/stdio.js";
5
7
  function renderUsage() {
6
8
  return `Usage:
7
9
  agent-harness init <project-name> [--template deep-research|single-agent] [--provider <provider>] [--model <model>] [--with-web-search|--no-web-search]
10
+ agent-harness acp serve [--workspace <path>] [--transport stdio]
8
11
  `;
9
12
  }
10
13
  function isTemplate(value) {
@@ -46,11 +49,43 @@ function parseInitOptions(args) {
46
49
  }
47
50
  return { options };
48
51
  }
49
- export async function runCli(argv, io = {}) {
52
+ function parseAcpServeOptions(args) {
53
+ let workspaceRoot;
54
+ let transport = "stdio";
55
+ for (let index = 0; index < args.length; index += 1) {
56
+ const arg = args[index];
57
+ if (arg === "--workspace") {
58
+ const value = args[index + 1];
59
+ if (!value) {
60
+ return { transport, error: "Missing value for --workspace" };
61
+ }
62
+ workspaceRoot = value;
63
+ index += 1;
64
+ continue;
65
+ }
66
+ if (arg === "--transport") {
67
+ const value = args[index + 1];
68
+ if (!value) {
69
+ return { transport, error: "Missing value for --transport" };
70
+ }
71
+ if (value !== "stdio") {
72
+ return { transport, error: `Unsupported ACP transport: ${value}` };
73
+ }
74
+ transport = "stdio";
75
+ index += 1;
76
+ continue;
77
+ }
78
+ return { transport, error: `Unknown option: ${arg}` };
79
+ }
80
+ return { workspaceRoot, transport };
81
+ }
82
+ export async function runCli(argv, io = {}, deps = {}) {
50
83
  const cwd = io.cwd ?? process.cwd();
51
84
  const stdout = io.stdout ?? ((message) => process.stdout.write(message));
52
85
  const stderr = io.stderr ?? ((message) => process.stderr.write(message));
53
86
  const [command, projectName, ...rest] = argv;
87
+ const createHarness = deps.createAgentHarness ?? createAgentHarness;
88
+ const serveAcp = deps.serveAcpOverStdio ?? serveAcpOverStdio;
54
89
  if (command === "init") {
55
90
  if (!projectName?.trim()) {
56
91
  stderr(renderUsage());
@@ -81,6 +116,32 @@ export async function runCli(argv, io = {}) {
81
116
  return 1;
82
117
  }
83
118
  }
119
+ if (command === "acp") {
120
+ const [subcommand, ...subcommandArgs] = [projectName, ...rest];
121
+ if (subcommand !== "serve") {
122
+ stderr(renderUsage());
123
+ return 1;
124
+ }
125
+ const parsed = parseAcpServeOptions(subcommandArgs);
126
+ if (parsed.error) {
127
+ stderr(`${parsed.error}\n`);
128
+ stderr(renderUsage());
129
+ return 1;
130
+ }
131
+ try {
132
+ const runtime = await createHarness(path.resolve(cwd, parsed.workspaceRoot ?? "."));
133
+ stderr(`Serving ACP over ${parsed.transport} from ${path.resolve(cwd, parsed.workspaceRoot ?? ".")}\n`);
134
+ const server = serveAcp(runtime);
135
+ await server.completed;
136
+ await runtime.stop();
137
+ return 0;
138
+ }
139
+ catch (error) {
140
+ const message = error instanceof Error ? error.message : String(error);
141
+ stderr(`${message}\n`);
142
+ return 1;
143
+ }
144
+ }
84
145
  stderr(renderUsage());
85
146
  return 1;
86
147
  }
@@ -593,6 +593,11 @@ export type RuntimeEvaluationArtifact = ArtifactRecord & {
593
593
  export type RuntimeEvaluationExport = {
594
594
  session: SessionRecord | null;
595
595
  request: RequestRecord | null;
596
+ runRequest: {
597
+ input: MessageContent;
598
+ invocation?: InvocationEnvelope;
599
+ priority?: number;
600
+ } | null;
596
601
  approvals: ApprovalRecord[];
597
602
  transcript: TranscriptMessage[];
598
603
  events: HarnessEvent[];
@@ -603,6 +608,24 @@ export type RuntimeEvaluationExport = {
603
608
  tags: string[];
604
609
  metadata?: Record<string, unknown>;
605
610
  };
611
+ export type RuntimeEvaluationReplayInput = {
612
+ bundle: RuntimeEvaluationExport;
613
+ agentId?: string;
614
+ sessionId?: string;
615
+ invocation?: InvocationEnvelope;
616
+ };
617
+ export type RuntimeEvaluationReplayResult = {
618
+ request: {
619
+ agentId: string;
620
+ input: MessageContent;
621
+ invocation?: InvocationEnvelope;
622
+ sessionId?: string;
623
+ };
624
+ result: RunResult;
625
+ assertions: {
626
+ expectedOutputMatched?: boolean;
627
+ };
628
+ };
606
629
  export type RuntimeInventoryContext = {
607
630
  workspace: WorkspaceBundle;
608
631
  };
@@ -234,6 +234,7 @@ export type CompiledAgentBinding = {
234
234
  workspaceRoot?: string;
235
235
  capabilities?: RuntimeCapabilities;
236
236
  resilience?: Record<string, unknown>;
237
+ governance?: Record<string, unknown>;
237
238
  deepagent?: {
238
239
  description?: string;
239
240
  passthrough?: Record<string, unknown>;
package/dist/index.d.ts CHANGED
@@ -1,7 +1,8 @@
1
- export { AgentHarnessAcpServer, AgentHarnessRuntime, buildFlowGraph, cancelRun, createAgentHarness, createAcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, getArtifact, getAgent, getApproval, getRequest, getHealth, listMemories, getSession, listAgentSkills, listArtifacts, listApprovals, listRequests, listSessions, memorize, normalizeUserChatInput, recall, removeMemory, resolveApproval, run, serveToolsOverStdio, subscribe, stop, updateMemory, exportFlowGraphToMermaid, exportFlowGraphToSequenceMermaid, } from "./api.js";
1
+ export { AgentHarnessAcpServer, AgentHarnessRuntime, buildFlowGraph, cancelRun, createAgentHarness, createAcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, replayEvaluationBundle, getArtifact, getAgent, getApproval, getRequest, getHealth, listMemories, getSession, listAgentSkills, listArtifacts, listApprovals, listRequests, listSessions, memorize, normalizeUserChatInput, recall, removeMemory, resolveApproval, run, serveAcpStdio, serveToolsOverStdio, subscribe, stop, updateMemory, exportFlowGraphToMermaid, exportFlowGraphToSequenceMermaid, } from "./api.js";
2
2
  export type { AcpApproval, AcpArtifact, AcpEventNotification, AcpJsonRpcError, AcpJsonRpcRequest, AcpJsonRpcResponse, AcpJsonRpcSuccess, AcpRequestRecord, AcpRunRequestParams, AcpServerCapabilities, AcpSessionRecord, } from "./acp.js";
3
- export type { ListMemoriesInput, ListMemoriesResult, MemoryDecision, MemoryKind, MemoryRecord, MemoryScope, MemorizeInput, MemorizeResult, NormalizeUserChatInputOptions, RecallInput, RecallResult, RemoveMemoryInput, RuntimeEvaluationExport, RuntimeEvaluationExportInput, UpdateMemoryInput, UserChatInput, UserChatMessage, } from "./api.js";
3
+ export type { ListMemoriesInput, ListMemoriesResult, MemoryDecision, MemoryKind, MemoryRecord, MemoryScope, MemorizeInput, MemorizeResult, NormalizeUserChatInputOptions, RecallInput, RecallResult, RemoveMemoryInput, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, UpdateMemoryInput, UserChatInput, UserChatMessage, } from "./api.js";
4
4
  export type { BuildFlowGraphInput, FlowEdge, FlowEdgeKind, FlowGraph, FlowGraphMermaidOptions, FlowGraphSequenceMermaidOptions, FlowGroup, FlowGroupKind, FlowNode, FlowNodeKind, FlowNodeLayer, FlowNodeStatus, } from "./flow/index.js";
5
+ export type { AcpStdioServer, AcpStdioServerOptions } from "./api.js";
5
6
  export type { ToolMcpServerOptions } from "./mcp.js";
6
7
  export { tool } from "./tools.js";
7
8
  export type { UpstreamTimelineProjection, UpstreamTimelineReducer } from "./upstream-events.js";
package/dist/index.js CHANGED
@@ -1,2 +1,2 @@
1
- export { AgentHarnessAcpServer, AgentHarnessRuntime, buildFlowGraph, cancelRun, createAgentHarness, createAcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, getArtifact, getAgent, getApproval, getRequest, getHealth, listMemories, getSession, listAgentSkills, listArtifacts, listApprovals, listRequests, listSessions, memorize, normalizeUserChatInput, recall, removeMemory, resolveApproval, run, serveToolsOverStdio, subscribe, stop, updateMemory, exportFlowGraphToMermaid, exportFlowGraphToSequenceMermaid, } from "./api.js";
1
+ export { AgentHarnessAcpServer, AgentHarnessRuntime, buildFlowGraph, cancelRun, createAgentHarness, createAcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, replayEvaluationBundle, getArtifact, getAgent, getApproval, getRequest, getHealth, listMemories, getSession, listAgentSkills, listArtifacts, listApprovals, listRequests, listSessions, memorize, normalizeUserChatInput, recall, removeMemory, resolveApproval, run, serveAcpStdio, serveToolsOverStdio, subscribe, stop, updateMemory, exportFlowGraphToMermaid, exportFlowGraphToSequenceMermaid, } from "./api.js";
2
2
  export { tool } from "./tools.js";
@@ -1 +1 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.158";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.159";
@@ -1 +1 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.158";
1
+ export const AGENT_HARNESS_VERSION = "0.0.159";
@@ -0,0 +1,11 @@
1
+ import type { Readable, Writable } from "node:stream";
2
+ import type { AgentHarnessRuntime } from "../../runtime/harness.js";
3
+ export type AcpStdioServerOptions = {
4
+ input?: Readable;
5
+ output?: Writable;
6
+ };
7
+ export type AcpStdioServer = {
8
+ completed: Promise<void>;
9
+ close: () => Promise<void>;
10
+ };
11
+ export declare function serveAcpOverStdio(runtime: AgentHarnessRuntime, options?: AcpStdioServerOptions): AcpStdioServer;
@@ -0,0 +1,69 @@
1
+ import { createInterface } from "node:readline";
2
+ import { createAcpServer } from "../../acp.js";
3
+ function writeJsonLine(output, payload) {
4
+ return new Promise((resolve, reject) => {
5
+ output.write(`${JSON.stringify(payload)}\n`, (error) => {
6
+ if (error) {
7
+ reject(error);
8
+ return;
9
+ }
10
+ resolve();
11
+ });
12
+ });
13
+ }
14
+ export function serveAcpOverStdio(runtime, options = {}) {
15
+ const input = options.input ?? process.stdin;
16
+ const output = options.output ?? process.stdout;
17
+ const server = createAcpServer(runtime);
18
+ const unsubscribe = server.subscribe((notification) => {
19
+ void writeJsonLine(output, notification);
20
+ });
21
+ const lineReader = createInterface({
22
+ input,
23
+ crlfDelay: Infinity,
24
+ });
25
+ const completed = (async () => {
26
+ try {
27
+ for await (const line of lineReader) {
28
+ const trimmed = line.trim();
29
+ if (trimmed.length === 0) {
30
+ continue;
31
+ }
32
+ let request;
33
+ try {
34
+ request = JSON.parse(trimmed);
35
+ }
36
+ catch {
37
+ await writeJsonLine(output, {
38
+ jsonrpc: "2.0",
39
+ id: null,
40
+ error: {
41
+ code: -32700,
42
+ message: "Invalid JSON payload.",
43
+ },
44
+ });
45
+ continue;
46
+ }
47
+ const response = await server.handle(request);
48
+ if (response) {
49
+ await writeJsonLine(output, response);
50
+ }
51
+ }
52
+ }
53
+ finally {
54
+ unsubscribe();
55
+ lineReader.close();
56
+ }
57
+ })();
58
+ return {
59
+ completed,
60
+ close: async () => {
61
+ unsubscribe();
62
+ lineReader.close();
63
+ if (typeof input.destroy === "function") {
64
+ input.destroy();
65
+ }
66
+ await completed.catch(() => undefined);
67
+ },
68
+ };
69
+ }
@@ -43,8 +43,67 @@ function toCategory(toolType) {
43
43
  }
44
44
  return "local";
45
45
  }
46
+ function asObject(value) {
47
+ return typeof value === "object" && value !== null && !Array.isArray(value) ? value : null;
48
+ }
49
+ function readStringArray(value) {
50
+ return Array.isArray(value)
51
+ ? value.filter((item) => typeof item === "string" && item.trim().length > 0).map((item) => item.trim())
52
+ : [];
53
+ }
54
+ function readRisk(value) {
55
+ return value === "low" || value === "medium" || value === "high" ? value : undefined;
56
+ }
57
+ function readApprovalPolicy(value) {
58
+ return value === "explicit-hitl" || value === "runtime-default" || value === "none" ? value : undefined;
59
+ }
60
+ function matchesToolPolicy(rule, policy) {
61
+ const match = asObject(rule.match) ?? rule;
62
+ const toolName = typeof match.toolName === "string" ? match.toolName.trim() : undefined;
63
+ const category = typeof match.category === "string" ? match.category.trim() : undefined;
64
+ const toolType = typeof match.toolType === "string" ? match.toolType.trim() : undefined;
65
+ return (!toolName || toolName === policy.toolName)
66
+ && (!category || category === policy.category)
67
+ && (!toolType || toolType === policy.toolType);
68
+ }
69
+ function applyGovernanceOverrides(binding, policies) {
70
+ const governance = asObject(binding.harnessRuntime.governance);
71
+ const overrides = Array.isArray(governance?.toolPolicies) ? governance.toolPolicies : [];
72
+ if (overrides.length === 0) {
73
+ return policies;
74
+ }
75
+ return policies.map((policy) => {
76
+ const merged = { ...policy };
77
+ for (const rule of overrides) {
78
+ const typedRule = asObject(rule);
79
+ if (!typedRule || !matchesToolPolicy(typedRule, merged)) {
80
+ continue;
81
+ }
82
+ const overrideRisk = readRisk(typedRule.risk);
83
+ const overrideApprovalPolicy = readApprovalPolicy(typedRule.approvalPolicy);
84
+ const overrideRequiresApproval = typeof typedRule.requiresApproval === "boolean" ? typedRule.requiresApproval : undefined;
85
+ if (overrideRisk) {
86
+ merged.risk = overrideRisk;
87
+ }
88
+ if (overrideRequiresApproval !== undefined) {
89
+ merged.requiresApproval = overrideRequiresApproval;
90
+ }
91
+ if (overrideApprovalPolicy) {
92
+ merged.approvalPolicy = overrideApprovalPolicy;
93
+ }
94
+ else if (overrideRequiresApproval === true && merged.approvalPolicy === "none") {
95
+ merged.approvalPolicy = "runtime-default";
96
+ }
97
+ const extraHints = readStringArray(typedRule.inputRiskHints);
98
+ if (extraHints.length > 0) {
99
+ merged.inputRiskHints = Array.from(new Set([...merged.inputRiskHints, ...extraHints]));
100
+ }
101
+ }
102
+ return merged;
103
+ });
104
+ }
46
105
  export function buildRuntimeGovernanceBundles(binding) {
47
- const toolPolicies = getBindingPrimaryTools(binding).map((tool) => {
106
+ const toolPolicies = applyGovernanceOverrides(binding, getBindingPrimaryTools(binding).map((tool) => {
48
107
  const requiresApproval = toolRequiresRuntimeApproval(tool);
49
108
  return {
50
109
  toolName: tool.name,
@@ -63,7 +122,7 @@ export function buildRuntimeGovernanceBundles(binding) {
63
122
  hasInputSchema: typeof tool.inputSchemaRef === "string" && tool.inputSchemaRef.trim().length > 0,
64
123
  inputRiskHints: inputHints(binding, tool),
65
124
  };
66
- });
125
+ }));
67
126
  if (toolPolicies.length === 0) {
68
127
  return [];
69
128
  }
@@ -9,6 +9,35 @@ export class PolicyEngine {
9
9
  const reasons = [];
10
10
  const bundles = [];
11
11
  let allowed = true;
12
+ const governance = typeof binding.harnessRuntime.governance === "object" && binding.harnessRuntime.governance
13
+ ? binding.harnessRuntime.governance
14
+ : undefined;
15
+ const denyConfig = typeof governance?.deny === "object" && governance.deny
16
+ ? governance.deny
17
+ : undefined;
18
+ if (denyConfig) {
19
+ const deniedNames = new Set(Array.isArray(denyConfig.toolNames)
20
+ ? denyConfig.toolNames.filter((item) => typeof item === "string" && item.trim().length > 0).map((item) => item.trim())
21
+ : []);
22
+ const deniedCategories = new Set(Array.isArray(denyConfig.categories)
23
+ ? denyConfig.categories.filter((item) => typeof item === "string" && item.trim().length > 0).map((item) => item.trim())
24
+ : []);
25
+ const tools = binding.execution?.params?.tools ?? binding.langchainAgentParams?.tools ?? binding.deepAgentParams?.tools ?? [];
26
+ const blocked = tools.filter((tool) => {
27
+ const category = tool.type === "mcp"
28
+ ? "mcp"
29
+ : tool.type === "backend"
30
+ ? "backend"
31
+ : tool.type === "provider"
32
+ ? "provider-native"
33
+ : "local";
34
+ return deniedNames.has(tool.name) || deniedCategories.has(category);
35
+ });
36
+ if (blocked.length > 0) {
37
+ allowed = false;
38
+ reasons.push(`runtime governance denied tool access: ${blocked.map((tool) => tool.name).join(", ")}`);
39
+ }
40
+ }
12
41
  for (const evaluator of getPolicyEvaluators()) {
13
42
  const decision = evaluator.evaluate(binding);
14
43
  if (!decision) {
@@ -1,4 +1,4 @@
1
- import type { ApprovalRecord, ArtifactListing, CancelOptions, HarnessEvent, HarnessStreamItem, RuntimeHealthSnapshot, ListMemoriesInput, ListMemoriesResult, MessageContent, RemoveMemoryInput, RunRecord, RunStartOptions, RestartConversationOptions, RuntimeAdapterOptions, RuntimeEvaluationExport, RuntimeEvaluationExportInput, ResumeOptions, RunOptions, RunResult, RunSummary, MemoryRecord, MemorizeInput, MemorizeResult, RecallInput, RecallResult, UpdateMemoryInput, ThreadSummary, ThreadRecord, WorkspaceBundle } from "../contracts/types.js";
1
+ import type { ApprovalRecord, ArtifactListing, CancelOptions, HarnessEvent, HarnessStreamItem, RuntimeHealthSnapshot, ListMemoriesInput, ListMemoriesResult, MessageContent, RemoveMemoryInput, RunRecord, RunStartOptions, RestartConversationOptions, RuntimeAdapterOptions, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, ResumeOptions, RunOptions, RunResult, RunSummary, MemoryRecord, MemorizeInput, MemorizeResult, RecallInput, RecallResult, UpdateMemoryInput, ThreadSummary, ThreadRecord, WorkspaceBundle } from "../contracts/types.js";
2
2
  import { type ToolMcpServerOptions } from "../mcp.js";
3
3
  import { type InventoryAgentRecord, type InventorySkillRecord } from "./harness/system/inventory.js";
4
4
  import type { RequirementAssessmentOptions } from "./harness/system/skill-requirements.js";
@@ -86,6 +86,7 @@ export declare class AgentHarnessRuntime {
86
86
  readArtifact(threadId: string, runId: string, artifactPath: string): Promise<unknown>;
87
87
  listRunEvents(threadId: string, runId: string): Promise<HarnessEvent[]>;
88
88
  exportEvaluationBundle(input: RuntimeEvaluationExportInput): Promise<RuntimeEvaluationExport>;
89
+ replayEvaluationBundle(input: RuntimeEvaluationReplayInput): Promise<RuntimeEvaluationReplayResult>;
89
90
  listAgentSkills(agentId: string, options?: RequirementAssessmentOptions): InventorySkillRecord[];
90
91
  getAgent(agentId: string, options?: RequirementAssessmentOptions): InventoryAgentRecord | null;
91
92
  describeWorkspaceInventory(options?: RequirementAssessmentOptions): {
@@ -487,6 +487,7 @@ export class AgentHarnessRuntime {
487
487
  async exportEvaluationBundle(input) {
488
488
  const thread = await this.getThread(input.sessionId);
489
489
  const run = await this.getRun(input.requestId);
490
+ const runRequest = await this.persistence.getRunRequest(input.sessionId, input.requestId);
490
491
  const approvals = await this.listApprovals({ threadId: input.sessionId, runId: input.requestId });
491
492
  const transcript = await this.persistence.listThreadMessages(input.sessionId, 500);
492
493
  const events = await this.persistence.listRunEvents(input.sessionId, input.requestId);
@@ -503,6 +504,13 @@ export class AgentHarnessRuntime {
503
504
  return {
504
505
  session: thread ? toSessionRecord(thread) : null,
505
506
  request: run ? toRequestRecord(run) : null,
507
+ runRequest: runRequest
508
+ ? {
509
+ input: runRequest.input,
510
+ ...(runRequest.invocation ? { invocation: runRequest.invocation } : {}),
511
+ ...(typeof runRequest.priority === "number" ? { priority: runRequest.priority } : {}),
512
+ }
513
+ : (deriveRunRequestFromTranscript(transcript, input.requestId) ?? null),
506
514
  approvals,
507
515
  transcript,
508
516
  events,
@@ -520,6 +528,36 @@ export class AgentHarnessRuntime {
520
528
  ...(input.metadata ? { metadata: { ...input.metadata } } : {}),
521
529
  };
522
530
  }
531
+ async replayEvaluationBundle(input) {
532
+ const replayAgentId = input.agentId ?? input.bundle.request?.agentId ?? input.bundle.session?.entryAgentId;
533
+ if (!replayAgentId) {
534
+ throw new Error("Evaluation replay requires an agentId on the replay input or exported bundle.");
535
+ }
536
+ const replayRequest = input.bundle.runRequest ?? deriveRunRequestFromTranscript(input.bundle.transcript, input.bundle.request?.requestId);
537
+ if (!replayRequest) {
538
+ throw new Error("Evaluation replay requires bundle.runRequest from exportEvaluationBundle.");
539
+ }
540
+ const invocation = input.invocation ?? replayRequest.invocation;
541
+ const result = await this.run({
542
+ agentId: replayAgentId,
543
+ input: replayRequest.input,
544
+ ...(input.sessionId ? { threadId: input.sessionId } : {}),
545
+ ...(invocation ? { invocation } : {}),
546
+ });
547
+ const expected = typeof input.bundle.expectedOutput === "string" ? input.bundle.expectedOutput.trim() : "";
548
+ return {
549
+ request: {
550
+ agentId: replayAgentId,
551
+ input: replayRequest.input,
552
+ ...(invocation ? { invocation } : {}),
553
+ ...(input.sessionId ? { sessionId: input.sessionId } : {}),
554
+ },
555
+ result,
556
+ assertions: {
557
+ ...(expected.length > 0 ? { expectedOutputMatched: result.output.includes(expected) } : {}),
558
+ },
559
+ };
560
+ }
523
561
  listAgentSkills(agentId, options = {}) {
524
562
  return listWorkspaceAgentSkills(this.workspace, agentId, {
525
563
  assessRequirements: isInventoryEnabled(this.workspace),
@@ -1475,3 +1513,9 @@ function toSessionRecord(record) {
1475
1513
  function toRequestRecord(record) {
1476
1514
  return toRequestSummary(record);
1477
1515
  }
1516
+ function deriveRunRequestFromTranscript(transcript, runId) {
1517
+ const candidate = [...transcript]
1518
+ .reverse()
1519
+ .find((message) => message.role === "user" && (!runId || message.runId === runId));
1520
+ return candidate ? { input: candidate.content } : null;
1521
+ }
@@ -340,6 +340,7 @@ export function compileBinding(workspaceRoot, agent, agents, referencedSubagentI
340
340
  const runtimeFilesystemDefaults = agent.executionMode === "langchain-v1"
341
341
  ? asObject(runtimeDefaults?.filesystem)
342
342
  : undefined;
343
+ const runtimeGovernanceDefaults = asObject(runtimeDefaults?.governance);
343
344
  const compiledFilesystemConfig = agent.executionMode === "langchain-v1"
344
345
  ? mergeConfigObjects(runtimeFilesystemDefaults, getAgentExecutionObject(agent, "filesystem", { executionMode: "langchain-v1" }))
345
346
  : undefined;
@@ -355,6 +356,7 @@ export function compileBinding(workspaceRoot, agent, agents, referencedSubagentI
355
356
  workspaceRoot,
356
357
  capabilities: inferAgentCapabilities(agent),
357
358
  resilience,
359
+ ...(runtimeGovernanceDefaults ? { governance: runtimeGovernanceDefaults } : {}),
358
360
  ...(agent.executionMode === "deepagent"
359
361
  ? {
360
362
  deepagent: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botbotgo/agent-harness",
3
- "version": "0.0.159",
3
+ "version": "0.0.160",
4
4
  "description": "Workspace runtime for multi-agent applications",
5
5
  "type": "module",
6
6
  "packageManager": "npm@10.9.2",