@botbotgo/agent-harness 0.0.158 → 0.0.160
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/README.zh.md +2 -0
- package/dist/api.d.ts +5 -2
- package/dist/api.js +7 -0
- package/dist/cli.d.ts +7 -1
- package/dist/cli.js +62 -1
- package/dist/contracts/runtime.d.ts +23 -0
- package/dist/contracts/workspace.d.ts +1 -0
- package/dist/index.d.ts +3 -2
- package/dist/index.js +1 -1
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/protocol/acp/stdio.d.ts +11 -0
- package/dist/protocol/acp/stdio.js +69 -0
- package/dist/runtime/harness/run/governance.js +61 -2
- package/dist/runtime/harness/system/policy-engine.js +29 -0
- package/dist/runtime/harness.d.ts +2 -1
- package/dist/runtime/harness.js +44 -0
- package/dist/workspace/agent-binding-compiler.js +2 -0
- package/package.json +1 -1
package/README.md
CHANGED
package/README.zh.md
CHANGED
package/dist/api.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { ArtifactListing, CancelOptions, InvocationEnvelope, ListMemoriesInput, ListMemoriesResult, MemoryRecord, MemorizeInput, MemorizeResult, MessageContent, RecallInput, RecallResult, RemoveMemoryInput, RequestRecord, RequestSummary, ResumeOptions, RunDecisionOptions, RunResult, RunStartOptions, RuntimeHealthSnapshot, RuntimeAdapterOptions, RuntimeEvaluationExport, RuntimeEvaluationExportInput, SessionRecord, SessionSummary, UpdateMemoryInput, WorkspaceLoadOptions } from "./contracts/types.js";
|
|
1
|
+
import type { ArtifactListing, CancelOptions, InvocationEnvelope, ListMemoriesInput, ListMemoriesResult, MemoryRecord, MemorizeInput, MemorizeResult, MessageContent, RecallInput, RecallResult, RemoveMemoryInput, RequestRecord, RequestSummary, ResumeOptions, RunDecisionOptions, RunResult, RunStartOptions, RuntimeHealthSnapshot, RuntimeAdapterOptions, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, SessionRecord, SessionSummary, UpdateMemoryInput, WorkspaceLoadOptions } from "./contracts/types.js";
|
|
2
2
|
import { AgentHarnessRuntime } from "./runtime/harness.js";
|
|
3
3
|
import type { InventoryAgentRecord, InventorySkillRecord } from "./runtime/harness/system/inventory.js";
|
|
4
4
|
import type { RequirementAssessmentOptions } from "./runtime/harness/system/skill-requirements.js";
|
|
@@ -8,7 +8,8 @@ export type { AcpApproval, AcpArtifact, AcpEventNotification, AcpJsonRpcError, A
|
|
|
8
8
|
export { AgentHarnessRuntime } from "./runtime/harness.js";
|
|
9
9
|
export { buildFlowGraph, exportFlowGraphToMermaid, exportFlowGraphToSequenceMermaid } from "./flow/index.js";
|
|
10
10
|
export { createUpstreamTimelineReducer } from "./upstream-events.js";
|
|
11
|
-
export type { ListMemoriesInput, ListMemoriesResult, MemoryDecision, MemoryKind, MemoryRecord, MemoryScope, MemorizeInput, MemorizeResult, RecallInput, RecallResult, RemoveMemoryInput, RuntimeEvaluationExport, RuntimeEvaluationExportInput, UpdateMemoryInput, } from "./contracts/types.js";
|
|
11
|
+
export type { ListMemoriesInput, ListMemoriesResult, MemoryDecision, MemoryKind, MemoryRecord, MemoryScope, MemorizeInput, MemorizeResult, RecallInput, RecallResult, RemoveMemoryInput, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, UpdateMemoryInput, } from "./contracts/types.js";
|
|
12
|
+
export type { AcpStdioServer, AcpStdioServerOptions } from "./protocol/acp/stdio.js";
|
|
12
13
|
type PublicApprovalRecord = {
|
|
13
14
|
approvalId: string;
|
|
14
15
|
pendingActionId: string;
|
|
@@ -86,6 +87,8 @@ export declare function getArtifact(runtime: AgentHarnessRuntime, input: {
|
|
|
86
87
|
}): Promise<unknown>;
|
|
87
88
|
export declare function getHealth(runtime: AgentHarnessRuntime): Promise<RuntimeHealthSnapshot>;
|
|
88
89
|
export declare function exportEvaluationBundle(runtime: AgentHarnessRuntime, input: RuntimeEvaluationExportInput): Promise<RuntimeEvaluationExport>;
|
|
90
|
+
export declare function replayEvaluationBundle(runtime: AgentHarnessRuntime, input: RuntimeEvaluationReplayInput): Promise<RuntimeEvaluationReplayResult>;
|
|
91
|
+
export declare function serveAcpStdio(runtime: AgentHarnessRuntime, options?: import("./protocol/acp/stdio.js").AcpStdioServerOptions): import("./protocol/acp/stdio.js").AcpStdioServer;
|
|
89
92
|
export declare function listAgentSkills(runtime: AgentHarnessRuntime, agentId: string, options?: RequirementAssessmentOptions): InventorySkillRecord[];
|
|
90
93
|
export declare function getAgent(runtime: AgentHarnessRuntime, agentId: string, options?: RequirementAssessmentOptions): InventoryAgentRecord | null;
|
|
91
94
|
export declare function describeInventory(runtime: AgentHarnessRuntime, options?: RequirementAssessmentOptions): {
|
package/dist/api.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { AgentHarnessRuntime } from "./runtime/harness.js";
|
|
2
|
+
import { serveAcpOverStdio } from "./protocol/acp/stdio.js";
|
|
2
3
|
import { normalizeMessageContent } from "./utils/message-content.js";
|
|
3
4
|
import { loadWorkspace } from "./workspace/compile.js";
|
|
4
5
|
export { AgentHarnessAcpServer, createAcpServer } from "./acp.js";
|
|
@@ -192,6 +193,12 @@ export async function getHealth(runtime) {
|
|
|
192
193
|
export async function exportEvaluationBundle(runtime, input) {
|
|
193
194
|
return runtime.exportEvaluationBundle(input);
|
|
194
195
|
}
|
|
196
|
+
export async function replayEvaluationBundle(runtime, input) {
|
|
197
|
+
return runtime.replayEvaluationBundle(input);
|
|
198
|
+
}
|
|
199
|
+
export function serveAcpStdio(runtime, options) {
|
|
200
|
+
return serveAcpOverStdio(runtime, options);
|
|
201
|
+
}
|
|
195
202
|
export function listAgentSkills(runtime, agentId, options) {
|
|
196
203
|
return runtime.listAgentSkills(agentId, options);
|
|
197
204
|
}
|
package/dist/cli.d.ts
CHANGED
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import { createAgentHarness } from "./api.js";
|
|
3
|
+
import { serveAcpOverStdio } from "./protocol/acp/stdio.js";
|
|
2
4
|
type CliIo = {
|
|
3
5
|
cwd?: string;
|
|
4
6
|
stdout?: (message: string) => void;
|
|
5
7
|
stderr?: (message: string) => void;
|
|
6
8
|
};
|
|
7
|
-
|
|
9
|
+
type CliDeps = {
|
|
10
|
+
createAgentHarness?: typeof createAgentHarness;
|
|
11
|
+
serveAcpOverStdio?: typeof serveAcpOverStdio;
|
|
12
|
+
};
|
|
13
|
+
export declare function runCli(argv: string[], io?: CliIo, deps?: CliDeps): Promise<number>;
|
|
8
14
|
export {};
|
package/dist/cli.js
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { pathToFileURL } from "node:url";
|
|
4
|
+
import { createAgentHarness } from "./api.js";
|
|
4
5
|
import { initProject } from "./init-project.js";
|
|
6
|
+
import { serveAcpOverStdio } from "./protocol/acp/stdio.js";
|
|
5
7
|
function renderUsage() {
|
|
6
8
|
return `Usage:
|
|
7
9
|
agent-harness init <project-name> [--template deep-research|single-agent] [--provider <provider>] [--model <model>] [--with-web-search|--no-web-search]
|
|
10
|
+
agent-harness acp serve [--workspace <path>] [--transport stdio]
|
|
8
11
|
`;
|
|
9
12
|
}
|
|
10
13
|
function isTemplate(value) {
|
|
@@ -46,11 +49,43 @@ function parseInitOptions(args) {
|
|
|
46
49
|
}
|
|
47
50
|
return { options };
|
|
48
51
|
}
|
|
49
|
-
|
|
52
|
+
function parseAcpServeOptions(args) {
|
|
53
|
+
let workspaceRoot;
|
|
54
|
+
let transport = "stdio";
|
|
55
|
+
for (let index = 0; index < args.length; index += 1) {
|
|
56
|
+
const arg = args[index];
|
|
57
|
+
if (arg === "--workspace") {
|
|
58
|
+
const value = args[index + 1];
|
|
59
|
+
if (!value) {
|
|
60
|
+
return { transport, error: "Missing value for --workspace" };
|
|
61
|
+
}
|
|
62
|
+
workspaceRoot = value;
|
|
63
|
+
index += 1;
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
if (arg === "--transport") {
|
|
67
|
+
const value = args[index + 1];
|
|
68
|
+
if (!value) {
|
|
69
|
+
return { transport, error: "Missing value for --transport" };
|
|
70
|
+
}
|
|
71
|
+
if (value !== "stdio") {
|
|
72
|
+
return { transport, error: `Unsupported ACP transport: ${value}` };
|
|
73
|
+
}
|
|
74
|
+
transport = "stdio";
|
|
75
|
+
index += 1;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
return { transport, error: `Unknown option: ${arg}` };
|
|
79
|
+
}
|
|
80
|
+
return { workspaceRoot, transport };
|
|
81
|
+
}
|
|
82
|
+
export async function runCli(argv, io = {}, deps = {}) {
|
|
50
83
|
const cwd = io.cwd ?? process.cwd();
|
|
51
84
|
const stdout = io.stdout ?? ((message) => process.stdout.write(message));
|
|
52
85
|
const stderr = io.stderr ?? ((message) => process.stderr.write(message));
|
|
53
86
|
const [command, projectName, ...rest] = argv;
|
|
87
|
+
const createHarness = deps.createAgentHarness ?? createAgentHarness;
|
|
88
|
+
const serveAcp = deps.serveAcpOverStdio ?? serveAcpOverStdio;
|
|
54
89
|
if (command === "init") {
|
|
55
90
|
if (!projectName?.trim()) {
|
|
56
91
|
stderr(renderUsage());
|
|
@@ -81,6 +116,32 @@ export async function runCli(argv, io = {}) {
|
|
|
81
116
|
return 1;
|
|
82
117
|
}
|
|
83
118
|
}
|
|
119
|
+
if (command === "acp") {
|
|
120
|
+
const [subcommand, ...subcommandArgs] = [projectName, ...rest];
|
|
121
|
+
if (subcommand !== "serve") {
|
|
122
|
+
stderr(renderUsage());
|
|
123
|
+
return 1;
|
|
124
|
+
}
|
|
125
|
+
const parsed = parseAcpServeOptions(subcommandArgs);
|
|
126
|
+
if (parsed.error) {
|
|
127
|
+
stderr(`${parsed.error}\n`);
|
|
128
|
+
stderr(renderUsage());
|
|
129
|
+
return 1;
|
|
130
|
+
}
|
|
131
|
+
try {
|
|
132
|
+
const runtime = await createHarness(path.resolve(cwd, parsed.workspaceRoot ?? "."));
|
|
133
|
+
stderr(`Serving ACP over ${parsed.transport} from ${path.resolve(cwd, parsed.workspaceRoot ?? ".")}\n`);
|
|
134
|
+
const server = serveAcp(runtime);
|
|
135
|
+
await server.completed;
|
|
136
|
+
await runtime.stop();
|
|
137
|
+
return 0;
|
|
138
|
+
}
|
|
139
|
+
catch (error) {
|
|
140
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
141
|
+
stderr(`${message}\n`);
|
|
142
|
+
return 1;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
84
145
|
stderr(renderUsage());
|
|
85
146
|
return 1;
|
|
86
147
|
}
|
|
@@ -593,6 +593,11 @@ export type RuntimeEvaluationArtifact = ArtifactRecord & {
|
|
|
593
593
|
export type RuntimeEvaluationExport = {
|
|
594
594
|
session: SessionRecord | null;
|
|
595
595
|
request: RequestRecord | null;
|
|
596
|
+
runRequest: {
|
|
597
|
+
input: MessageContent;
|
|
598
|
+
invocation?: InvocationEnvelope;
|
|
599
|
+
priority?: number;
|
|
600
|
+
} | null;
|
|
596
601
|
approvals: ApprovalRecord[];
|
|
597
602
|
transcript: TranscriptMessage[];
|
|
598
603
|
events: HarnessEvent[];
|
|
@@ -603,6 +608,24 @@ export type RuntimeEvaluationExport = {
|
|
|
603
608
|
tags: string[];
|
|
604
609
|
metadata?: Record<string, unknown>;
|
|
605
610
|
};
|
|
611
|
+
export type RuntimeEvaluationReplayInput = {
|
|
612
|
+
bundle: RuntimeEvaluationExport;
|
|
613
|
+
agentId?: string;
|
|
614
|
+
sessionId?: string;
|
|
615
|
+
invocation?: InvocationEnvelope;
|
|
616
|
+
};
|
|
617
|
+
export type RuntimeEvaluationReplayResult = {
|
|
618
|
+
request: {
|
|
619
|
+
agentId: string;
|
|
620
|
+
input: MessageContent;
|
|
621
|
+
invocation?: InvocationEnvelope;
|
|
622
|
+
sessionId?: string;
|
|
623
|
+
};
|
|
624
|
+
result: RunResult;
|
|
625
|
+
assertions: {
|
|
626
|
+
expectedOutputMatched?: boolean;
|
|
627
|
+
};
|
|
628
|
+
};
|
|
606
629
|
export type RuntimeInventoryContext = {
|
|
607
630
|
workspace: WorkspaceBundle;
|
|
608
631
|
};
|
|
@@ -234,6 +234,7 @@ export type CompiledAgentBinding = {
|
|
|
234
234
|
workspaceRoot?: string;
|
|
235
235
|
capabilities?: RuntimeCapabilities;
|
|
236
236
|
resilience?: Record<string, unknown>;
|
|
237
|
+
governance?: Record<string, unknown>;
|
|
237
238
|
deepagent?: {
|
|
238
239
|
description?: string;
|
|
239
240
|
passthrough?: Record<string, unknown>;
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
export { AgentHarnessAcpServer, AgentHarnessRuntime, buildFlowGraph, cancelRun, createAgentHarness, createAcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, getArtifact, getAgent, getApproval, getRequest, getHealth, listMemories, getSession, listAgentSkills, listArtifacts, listApprovals, listRequests, listSessions, memorize, normalizeUserChatInput, recall, removeMemory, resolveApproval, run, serveToolsOverStdio, subscribe, stop, updateMemory, exportFlowGraphToMermaid, exportFlowGraphToSequenceMermaid, } from "./api.js";
|
|
1
|
+
export { AgentHarnessAcpServer, AgentHarnessRuntime, buildFlowGraph, cancelRun, createAgentHarness, createAcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, replayEvaluationBundle, getArtifact, getAgent, getApproval, getRequest, getHealth, listMemories, getSession, listAgentSkills, listArtifacts, listApprovals, listRequests, listSessions, memorize, normalizeUserChatInput, recall, removeMemory, resolveApproval, run, serveAcpStdio, serveToolsOverStdio, subscribe, stop, updateMemory, exportFlowGraphToMermaid, exportFlowGraphToSequenceMermaid, } from "./api.js";
|
|
2
2
|
export type { AcpApproval, AcpArtifact, AcpEventNotification, AcpJsonRpcError, AcpJsonRpcRequest, AcpJsonRpcResponse, AcpJsonRpcSuccess, AcpRequestRecord, AcpRunRequestParams, AcpServerCapabilities, AcpSessionRecord, } from "./acp.js";
|
|
3
|
-
export type { ListMemoriesInput, ListMemoriesResult, MemoryDecision, MemoryKind, MemoryRecord, MemoryScope, MemorizeInput, MemorizeResult, NormalizeUserChatInputOptions, RecallInput, RecallResult, RemoveMemoryInput, RuntimeEvaluationExport, RuntimeEvaluationExportInput, UpdateMemoryInput, UserChatInput, UserChatMessage, } from "./api.js";
|
|
3
|
+
export type { ListMemoriesInput, ListMemoriesResult, MemoryDecision, MemoryKind, MemoryRecord, MemoryScope, MemorizeInput, MemorizeResult, NormalizeUserChatInputOptions, RecallInput, RecallResult, RemoveMemoryInput, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, UpdateMemoryInput, UserChatInput, UserChatMessage, } from "./api.js";
|
|
4
4
|
export type { BuildFlowGraphInput, FlowEdge, FlowEdgeKind, FlowGraph, FlowGraphMermaidOptions, FlowGraphSequenceMermaidOptions, FlowGroup, FlowGroupKind, FlowNode, FlowNodeKind, FlowNodeLayer, FlowNodeStatus, } from "./flow/index.js";
|
|
5
|
+
export type { AcpStdioServer, AcpStdioServerOptions } from "./api.js";
|
|
5
6
|
export type { ToolMcpServerOptions } from "./mcp.js";
|
|
6
7
|
export { tool } from "./tools.js";
|
|
7
8
|
export type { UpstreamTimelineProjection, UpstreamTimelineReducer } from "./upstream-events.js";
|
package/dist/index.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { AgentHarnessAcpServer, AgentHarnessRuntime, buildFlowGraph, cancelRun, createAgentHarness, createAcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, getArtifact, getAgent, getApproval, getRequest, getHealth, listMemories, getSession, listAgentSkills, listArtifacts, listApprovals, listRequests, listSessions, memorize, normalizeUserChatInput, recall, removeMemory, resolveApproval, run, serveToolsOverStdio, subscribe, stop, updateMemory, exportFlowGraphToMermaid, exportFlowGraphToSequenceMermaid, } from "./api.js";
|
|
1
|
+
export { AgentHarnessAcpServer, AgentHarnessRuntime, buildFlowGraph, cancelRun, createAgentHarness, createAcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, replayEvaluationBundle, getArtifact, getAgent, getApproval, getRequest, getHealth, listMemories, getSession, listAgentSkills, listArtifacts, listApprovals, listRequests, listSessions, memorize, normalizeUserChatInput, recall, removeMemory, resolveApproval, run, serveAcpStdio, serveToolsOverStdio, subscribe, stop, updateMemory, exportFlowGraphToMermaid, exportFlowGraphToSequenceMermaid, } from "./api.js";
|
|
2
2
|
export { tool } from "./tools.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export declare const AGENT_HARNESS_VERSION = "0.0.159";
|
package/dist/package-version.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export const AGENT_HARNESS_VERSION = "0.0.159";
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { Readable, Writable } from "node:stream";
|
|
2
|
+
import type { AgentHarnessRuntime } from "../../runtime/harness.js";
|
|
3
|
+
export type AcpStdioServerOptions = {
|
|
4
|
+
input?: Readable;
|
|
5
|
+
output?: Writable;
|
|
6
|
+
};
|
|
7
|
+
export type AcpStdioServer = {
|
|
8
|
+
completed: Promise<void>;
|
|
9
|
+
close: () => Promise<void>;
|
|
10
|
+
};
|
|
11
|
+
export declare function serveAcpOverStdio(runtime: AgentHarnessRuntime, options?: AcpStdioServerOptions): AcpStdioServer;
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { createInterface } from "node:readline";
|
|
2
|
+
import { createAcpServer } from "../../acp.js";
|
|
3
|
+
function writeJsonLine(output, payload) {
|
|
4
|
+
return new Promise((resolve, reject) => {
|
|
5
|
+
output.write(`${JSON.stringify(payload)}\n`, (error) => {
|
|
6
|
+
if (error) {
|
|
7
|
+
reject(error);
|
|
8
|
+
return;
|
|
9
|
+
}
|
|
10
|
+
resolve();
|
|
11
|
+
});
|
|
12
|
+
});
|
|
13
|
+
}
|
|
14
|
+
export function serveAcpOverStdio(runtime, options = {}) {
|
|
15
|
+
const input = options.input ?? process.stdin;
|
|
16
|
+
const output = options.output ?? process.stdout;
|
|
17
|
+
const server = createAcpServer(runtime);
|
|
18
|
+
const unsubscribe = server.subscribe((notification) => {
|
|
19
|
+
void writeJsonLine(output, notification);
|
|
20
|
+
});
|
|
21
|
+
const lineReader = createInterface({
|
|
22
|
+
input,
|
|
23
|
+
crlfDelay: Infinity,
|
|
24
|
+
});
|
|
25
|
+
const completed = (async () => {
|
|
26
|
+
try {
|
|
27
|
+
for await (const line of lineReader) {
|
|
28
|
+
const trimmed = line.trim();
|
|
29
|
+
if (trimmed.length === 0) {
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
let request;
|
|
33
|
+
try {
|
|
34
|
+
request = JSON.parse(trimmed);
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
await writeJsonLine(output, {
|
|
38
|
+
jsonrpc: "2.0",
|
|
39
|
+
id: null,
|
|
40
|
+
error: {
|
|
41
|
+
code: -32700,
|
|
42
|
+
message: "Invalid JSON payload.",
|
|
43
|
+
},
|
|
44
|
+
});
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
const response = await server.handle(request);
|
|
48
|
+
if (response) {
|
|
49
|
+
await writeJsonLine(output, response);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
finally {
|
|
54
|
+
unsubscribe();
|
|
55
|
+
lineReader.close();
|
|
56
|
+
}
|
|
57
|
+
})();
|
|
58
|
+
return {
|
|
59
|
+
completed,
|
|
60
|
+
close: async () => {
|
|
61
|
+
unsubscribe();
|
|
62
|
+
lineReader.close();
|
|
63
|
+
if (typeof input.destroy === "function") {
|
|
64
|
+
input.destroy();
|
|
65
|
+
}
|
|
66
|
+
await completed.catch(() => undefined);
|
|
67
|
+
},
|
|
68
|
+
};
|
|
69
|
+
}
|
|
@@ -43,8 +43,67 @@ function toCategory(toolType) {
|
|
|
43
43
|
}
|
|
44
44
|
return "local";
|
|
45
45
|
}
|
|
46
|
+
function asObject(value) {
|
|
47
|
+
return typeof value === "object" && value !== null && !Array.isArray(value) ? value : null;
|
|
48
|
+
}
|
|
49
|
+
function readStringArray(value) {
|
|
50
|
+
return Array.isArray(value)
|
|
51
|
+
? value.filter((item) => typeof item === "string" && item.trim().length > 0).map((item) => item.trim())
|
|
52
|
+
: [];
|
|
53
|
+
}
|
|
54
|
+
function readRisk(value) {
|
|
55
|
+
return value === "low" || value === "medium" || value === "high" ? value : undefined;
|
|
56
|
+
}
|
|
57
|
+
function readApprovalPolicy(value) {
|
|
58
|
+
return value === "explicit-hitl" || value === "runtime-default" || value === "none" ? value : undefined;
|
|
59
|
+
}
|
|
60
|
+
function matchesToolPolicy(rule, policy) {
|
|
61
|
+
const match = asObject(rule.match) ?? rule;
|
|
62
|
+
const toolName = typeof match.toolName === "string" ? match.toolName.trim() : undefined;
|
|
63
|
+
const category = typeof match.category === "string" ? match.category.trim() : undefined;
|
|
64
|
+
const toolType = typeof match.toolType === "string" ? match.toolType.trim() : undefined;
|
|
65
|
+
return (!toolName || toolName === policy.toolName)
|
|
66
|
+
&& (!category || category === policy.category)
|
|
67
|
+
&& (!toolType || toolType === policy.toolType);
|
|
68
|
+
}
|
|
69
|
+
function applyGovernanceOverrides(binding, policies) {
|
|
70
|
+
const governance = asObject(binding.harnessRuntime.governance);
|
|
71
|
+
const overrides = Array.isArray(governance?.toolPolicies) ? governance.toolPolicies : [];
|
|
72
|
+
if (overrides.length === 0) {
|
|
73
|
+
return policies;
|
|
74
|
+
}
|
|
75
|
+
return policies.map((policy) => {
|
|
76
|
+
const merged = { ...policy };
|
|
77
|
+
for (const rule of overrides) {
|
|
78
|
+
const typedRule = asObject(rule);
|
|
79
|
+
if (!typedRule || !matchesToolPolicy(typedRule, merged)) {
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
const overrideRisk = readRisk(typedRule.risk);
|
|
83
|
+
const overrideApprovalPolicy = readApprovalPolicy(typedRule.approvalPolicy);
|
|
84
|
+
const overrideRequiresApproval = typeof typedRule.requiresApproval === "boolean" ? typedRule.requiresApproval : undefined;
|
|
85
|
+
if (overrideRisk) {
|
|
86
|
+
merged.risk = overrideRisk;
|
|
87
|
+
}
|
|
88
|
+
if (overrideRequiresApproval !== undefined) {
|
|
89
|
+
merged.requiresApproval = overrideRequiresApproval;
|
|
90
|
+
}
|
|
91
|
+
if (overrideApprovalPolicy) {
|
|
92
|
+
merged.approvalPolicy = overrideApprovalPolicy;
|
|
93
|
+
}
|
|
94
|
+
else if (overrideRequiresApproval === true && merged.approvalPolicy === "none") {
|
|
95
|
+
merged.approvalPolicy = "runtime-default";
|
|
96
|
+
}
|
|
97
|
+
const extraHints = readStringArray(typedRule.inputRiskHints);
|
|
98
|
+
if (extraHints.length > 0) {
|
|
99
|
+
merged.inputRiskHints = Array.from(new Set([...merged.inputRiskHints, ...extraHints]));
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return merged;
|
|
103
|
+
});
|
|
104
|
+
}
|
|
46
105
|
export function buildRuntimeGovernanceBundles(binding) {
|
|
47
|
-
const toolPolicies = getBindingPrimaryTools(binding).map((tool) => {
|
|
106
|
+
const toolPolicies = applyGovernanceOverrides(binding, getBindingPrimaryTools(binding).map((tool) => {
|
|
48
107
|
const requiresApproval = toolRequiresRuntimeApproval(tool);
|
|
49
108
|
return {
|
|
50
109
|
toolName: tool.name,
|
|
@@ -63,7 +122,7 @@ export function buildRuntimeGovernanceBundles(binding) {
|
|
|
63
122
|
hasInputSchema: typeof tool.inputSchemaRef === "string" && tool.inputSchemaRef.trim().length > 0,
|
|
64
123
|
inputRiskHints: inputHints(binding, tool),
|
|
65
124
|
};
|
|
66
|
-
});
|
|
125
|
+
}));
|
|
67
126
|
if (toolPolicies.length === 0) {
|
|
68
127
|
return [];
|
|
69
128
|
}
|
|
@@ -9,6 +9,35 @@ export class PolicyEngine {
|
|
|
9
9
|
const reasons = [];
|
|
10
10
|
const bundles = [];
|
|
11
11
|
let allowed = true;
|
|
12
|
+
const governance = typeof binding.harnessRuntime.governance === "object" && binding.harnessRuntime.governance
|
|
13
|
+
? binding.harnessRuntime.governance
|
|
14
|
+
: undefined;
|
|
15
|
+
const denyConfig = typeof governance?.deny === "object" && governance.deny
|
|
16
|
+
? governance.deny
|
|
17
|
+
: undefined;
|
|
18
|
+
if (denyConfig) {
|
|
19
|
+
const deniedNames = new Set(Array.isArray(denyConfig.toolNames)
|
|
20
|
+
? denyConfig.toolNames.filter((item) => typeof item === "string" && item.trim().length > 0).map((item) => item.trim())
|
|
21
|
+
: []);
|
|
22
|
+
const deniedCategories = new Set(Array.isArray(denyConfig.categories)
|
|
23
|
+
? denyConfig.categories.filter((item) => typeof item === "string" && item.trim().length > 0).map((item) => item.trim())
|
|
24
|
+
: []);
|
|
25
|
+
const tools = binding.execution?.params?.tools ?? binding.langchainAgentParams?.tools ?? binding.deepAgentParams?.tools ?? [];
|
|
26
|
+
const blocked = tools.filter((tool) => {
|
|
27
|
+
const category = tool.type === "mcp"
|
|
28
|
+
? "mcp"
|
|
29
|
+
: tool.type === "backend"
|
|
30
|
+
? "backend"
|
|
31
|
+
: tool.type === "provider"
|
|
32
|
+
? "provider-native"
|
|
33
|
+
: "local";
|
|
34
|
+
return deniedNames.has(tool.name) || deniedCategories.has(category);
|
|
35
|
+
});
|
|
36
|
+
if (blocked.length > 0) {
|
|
37
|
+
allowed = false;
|
|
38
|
+
reasons.push(`runtime governance denied tool access: ${blocked.map((tool) => tool.name).join(", ")}`);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
12
41
|
for (const evaluator of getPolicyEvaluators()) {
|
|
13
42
|
const decision = evaluator.evaluate(binding);
|
|
14
43
|
if (!decision) {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { ApprovalRecord, ArtifactListing, CancelOptions, HarnessEvent, HarnessStreamItem, RuntimeHealthSnapshot, ListMemoriesInput, ListMemoriesResult, MessageContent, RemoveMemoryInput, RunRecord, RunStartOptions, RestartConversationOptions, RuntimeAdapterOptions, RuntimeEvaluationExport, RuntimeEvaluationExportInput, ResumeOptions, RunOptions, RunResult, RunSummary, MemoryRecord, MemorizeInput, MemorizeResult, RecallInput, RecallResult, UpdateMemoryInput, ThreadSummary, ThreadRecord, WorkspaceBundle } from "../contracts/types.js";
|
|
1
|
+
import type { ApprovalRecord, ArtifactListing, CancelOptions, HarnessEvent, HarnessStreamItem, RuntimeHealthSnapshot, ListMemoriesInput, ListMemoriesResult, MessageContent, RemoveMemoryInput, RunRecord, RunStartOptions, RestartConversationOptions, RuntimeAdapterOptions, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, ResumeOptions, RunOptions, RunResult, RunSummary, MemoryRecord, MemorizeInput, MemorizeResult, RecallInput, RecallResult, UpdateMemoryInput, ThreadSummary, ThreadRecord, WorkspaceBundle } from "../contracts/types.js";
|
|
2
2
|
import { type ToolMcpServerOptions } from "../mcp.js";
|
|
3
3
|
import { type InventoryAgentRecord, type InventorySkillRecord } from "./harness/system/inventory.js";
|
|
4
4
|
import type { RequirementAssessmentOptions } from "./harness/system/skill-requirements.js";
|
|
@@ -86,6 +86,7 @@ export declare class AgentHarnessRuntime {
|
|
|
86
86
|
readArtifact(threadId: string, runId: string, artifactPath: string): Promise<unknown>;
|
|
87
87
|
listRunEvents(threadId: string, runId: string): Promise<HarnessEvent[]>;
|
|
88
88
|
exportEvaluationBundle(input: RuntimeEvaluationExportInput): Promise<RuntimeEvaluationExport>;
|
|
89
|
+
replayEvaluationBundle(input: RuntimeEvaluationReplayInput): Promise<RuntimeEvaluationReplayResult>;
|
|
89
90
|
listAgentSkills(agentId: string, options?: RequirementAssessmentOptions): InventorySkillRecord[];
|
|
90
91
|
getAgent(agentId: string, options?: RequirementAssessmentOptions): InventoryAgentRecord | null;
|
|
91
92
|
describeWorkspaceInventory(options?: RequirementAssessmentOptions): {
|
package/dist/runtime/harness.js
CHANGED
|
@@ -487,6 +487,7 @@ export class AgentHarnessRuntime {
|
|
|
487
487
|
async exportEvaluationBundle(input) {
|
|
488
488
|
const thread = await this.getThread(input.sessionId);
|
|
489
489
|
const run = await this.getRun(input.requestId);
|
|
490
|
+
const runRequest = await this.persistence.getRunRequest(input.sessionId, input.requestId);
|
|
490
491
|
const approvals = await this.listApprovals({ threadId: input.sessionId, runId: input.requestId });
|
|
491
492
|
const transcript = await this.persistence.listThreadMessages(input.sessionId, 500);
|
|
492
493
|
const events = await this.persistence.listRunEvents(input.sessionId, input.requestId);
|
|
@@ -503,6 +504,13 @@ export class AgentHarnessRuntime {
|
|
|
503
504
|
return {
|
|
504
505
|
session: thread ? toSessionRecord(thread) : null,
|
|
505
506
|
request: run ? toRequestRecord(run) : null,
|
|
507
|
+
runRequest: runRequest
|
|
508
|
+
? {
|
|
509
|
+
input: runRequest.input,
|
|
510
|
+
...(runRequest.invocation ? { invocation: runRequest.invocation } : {}),
|
|
511
|
+
...(typeof runRequest.priority === "number" ? { priority: runRequest.priority } : {}),
|
|
512
|
+
}
|
|
513
|
+
: (deriveRunRequestFromTranscript(transcript, input.requestId) ?? null),
|
|
506
514
|
approvals,
|
|
507
515
|
transcript,
|
|
508
516
|
events,
|
|
@@ -520,6 +528,36 @@ export class AgentHarnessRuntime {
|
|
|
520
528
|
...(input.metadata ? { metadata: { ...input.metadata } } : {}),
|
|
521
529
|
};
|
|
522
530
|
}
|
|
531
|
+
async replayEvaluationBundle(input) {
|
|
532
|
+
const replayAgentId = input.agentId ?? input.bundle.request?.agentId ?? input.bundle.session?.entryAgentId;
|
|
533
|
+
if (!replayAgentId) {
|
|
534
|
+
throw new Error("Evaluation replay requires an agentId on the replay input or exported bundle.");
|
|
535
|
+
}
|
|
536
|
+
const replayRequest = input.bundle.runRequest ?? deriveRunRequestFromTranscript(input.bundle.transcript, input.bundle.request?.requestId);
|
|
537
|
+
if (!replayRequest) {
|
|
538
|
+
throw new Error("Evaluation replay requires bundle.runRequest from exportEvaluationBundle.");
|
|
539
|
+
}
|
|
540
|
+
const invocation = input.invocation ?? replayRequest.invocation;
|
|
541
|
+
const result = await this.run({
|
|
542
|
+
agentId: replayAgentId,
|
|
543
|
+
input: replayRequest.input,
|
|
544
|
+
...(input.sessionId ? { threadId: input.sessionId } : {}),
|
|
545
|
+
...(invocation ? { invocation } : {}),
|
|
546
|
+
});
|
|
547
|
+
const expected = typeof input.bundle.expectedOutput === "string" ? input.bundle.expectedOutput.trim() : "";
|
|
548
|
+
return {
|
|
549
|
+
request: {
|
|
550
|
+
agentId: replayAgentId,
|
|
551
|
+
input: replayRequest.input,
|
|
552
|
+
...(invocation ? { invocation } : {}),
|
|
553
|
+
...(input.sessionId ? { sessionId: input.sessionId } : {}),
|
|
554
|
+
},
|
|
555
|
+
result,
|
|
556
|
+
assertions: {
|
|
557
|
+
...(expected.length > 0 ? { expectedOutputMatched: result.output.includes(expected) } : {}),
|
|
558
|
+
},
|
|
559
|
+
};
|
|
560
|
+
}
|
|
523
561
|
listAgentSkills(agentId, options = {}) {
|
|
524
562
|
return listWorkspaceAgentSkills(this.workspace, agentId, {
|
|
525
563
|
assessRequirements: isInventoryEnabled(this.workspace),
|
|
@@ -1475,3 +1513,9 @@ function toSessionRecord(record) {
|
|
|
1475
1513
|
function toRequestRecord(record) {
|
|
1476
1514
|
return toRequestSummary(record);
|
|
1477
1515
|
}
|
|
1516
|
+
function deriveRunRequestFromTranscript(transcript, runId) {
|
|
1517
|
+
const candidate = [...transcript]
|
|
1518
|
+
.reverse()
|
|
1519
|
+
.find((message) => message.role === "user" && (!runId || message.runId === runId));
|
|
1520
|
+
return candidate ? { input: candidate.content } : null;
|
|
1521
|
+
}
|
|
@@ -340,6 +340,7 @@ export function compileBinding(workspaceRoot, agent, agents, referencedSubagentI
|
|
|
340
340
|
const runtimeFilesystemDefaults = agent.executionMode === "langchain-v1"
|
|
341
341
|
? asObject(runtimeDefaults?.filesystem)
|
|
342
342
|
: undefined;
|
|
343
|
+
const runtimeGovernanceDefaults = asObject(runtimeDefaults?.governance);
|
|
343
344
|
const compiledFilesystemConfig = agent.executionMode === "langchain-v1"
|
|
344
345
|
? mergeConfigObjects(runtimeFilesystemDefaults, getAgentExecutionObject(agent, "filesystem", { executionMode: "langchain-v1" }))
|
|
345
346
|
: undefined;
|
|
@@ -355,6 +356,7 @@ export function compileBinding(workspaceRoot, agent, agents, referencedSubagentI
|
|
|
355
356
|
workspaceRoot,
|
|
356
357
|
capabilities: inferAgentCapabilities(agent),
|
|
357
358
|
resilience,
|
|
359
|
+
...(runtimeGovernanceDefaults ? { governance: runtimeGovernanceDefaults } : {}),
|
|
358
360
|
...(agent.executionMode === "deepagent"
|
|
359
361
|
? {
|
|
360
362
|
deepagent: {
|