@botbotgo/agent-harness 0.0.287 → 0.0.288
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/README.zh.md +2 -2
- package/dist/api.d.ts +4 -2
- package/dist/api.js +3 -0
- package/dist/contracts/runtime.d.ts +24 -0
- package/dist/index.d.ts +2 -2
- package/dist/index.js +1 -1
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/runtime/harness/run/evaluation-artifacts.d.ts +8 -0
- package/dist/runtime/harness/run/evaluation-artifacts.js +108 -0
- package/dist/runtime/harness.d.ts +2 -1
- package/dist/runtime/harness.js +31 -6
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -143,8 +143,8 @@ Additional docs:
|
|
|
143
143
|
|
|
144
144
|
The public API spans a full product runtime—persistent records, memory and evidence, protocol surfaces, and governance—not only a thin bootstrap around YAML and tools.
|
|
145
145
|
|
|
146
|
-
- **Core runtime API:** `createAgentHarness`, `request`, `subscribe`, `resolveApproval`, inspection helpers, and stable persisted runtime records for `requests`, `sessions`, `approvals`, `events`, and artifacts.
|
|
147
|
-
- **Runtime memory and evidence:** `memorize`, `recall`, `listMemories`, memory policy hooks, `listArtifacts`, `getArtifact`, `exportEvaluationBundle`, `replayEvaluationBundle`, and request/session evidence export helpers.
|
|
146
|
+
- **Core runtime API:** `createAgentHarness`, `request`, `subscribe`, `resolveApproval`, `recordArtifact`, inspection helpers, and stable persisted runtime records for `requests`, `sessions`, `approvals`, `events`, and artifacts.
|
|
147
|
+
- **Runtime memory and evidence:** `memorize`, `recall`, `listMemories`, memory policy hooks, `recordArtifact`, `listArtifacts`, `getArtifact`, `exportEvaluationBundle`, `replayEvaluationBundle`, and request/session evidence export helpers.
|
|
148
148
|
- **Protocol and transport surfaces:** `createAcpServer`, `createAcpStdioClient`, `serveAcpStdio`, `serveAcpHttp`, `serveA2aHttp`, `serveAgUiHttp`, and `createRuntimeMcpServer` / `serveRuntimeMcpOverStdio`.
|
|
149
149
|
- **Governed workspace runtime:** YAML-owned routing, concurrency, maintenance, MCP policy, runtime governance bundles, and approval defaults for sensitive memory or write-like MCP side effects.
|
|
150
150
|
- **Policy-shaped approvals:** governed tools can stay on manual review, auto-approve, or auto-reject / deny-and-continue modes while the runtime keeps one inspectable governance decision surface.
|
package/README.zh.md
CHANGED
|
@@ -139,8 +139,8 @@ try {
|
|
|
139
139
|
|
|
140
140
|
若你想先看「今天能直接用到什么」,可从本节读起。`agent-harness` 提供完整的产品级运行时能力,而不只是「能启动」的脚手架。
|
|
141
141
|
|
|
142
|
-
- **核心 runtime API:** `createAgentHarness`、`request`、`subscribe`、`resolveApproval`、各类查询与检查辅助 API,以及稳定持久化的 `requests`、`sessions`、`approvals`、`events` 与 artifacts 记录。
|
|
143
|
-
- **运行时 memory 与证据能力:** `memorize`、`recall`、`listMemories`、memory policy hooks、`listArtifacts`、`getArtifact`、`exportEvaluationBundle`、`replayEvaluationBundle`,以及 request / session 级证据导出辅助函数。
|
|
142
|
+
- **核心 runtime API:** `createAgentHarness`、`request`、`subscribe`、`resolveApproval`、`recordArtifact`、各类查询与检查辅助 API,以及稳定持久化的 `requests`、`sessions`、`approvals`、`events` 与 artifacts 记录。
|
|
143
|
+
- **运行时 memory 与证据能力:** `memorize`、`recall`、`listMemories`、memory policy hooks、`recordArtifact`、`listArtifacts`、`getArtifact`、`exportEvaluationBundle`、`replayEvaluationBundle`,以及 request / session 级证据导出辅助函数。
|
|
144
144
|
- **协议与传输层:** `createAcpServer`、`createAcpStdioClient`、`serveAcpStdio`、`serveAcpHttp`、`serveA2aHttp`、`serveAgUiHttp`,以及 `createRuntimeMcpServer` / `serveRuntimeMcpOverStdio`。
|
|
145
145
|
- **受治理的工作区运行时:** 由 YAML 持有的路由、并发、维护、MCP 策略、runtime governance bundles,以及针对敏感 memory 或写类 MCP 副作用的默认审批门槛。
|
|
146
146
|
- **策略化审批:** 受治理工具现在既可以走人工审批,也可以走 `auto-approve`、`auto-reject` 或 `deny-and-continue`,同时继续保留统一可检查的治理决策面。
|
package/dist/api.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { ArtifactListing, CancelOptions, InvocationEnvelope, ListMemoriesInput, ListMemoriesResult, MemoryRecord, MemorizeInput, MemorizeResult, MessageContent, RecallInput, RecallResult, RemoveMemoryInput, RequestRecord, RequestSummary, ResumeOptions, RunDecisionOptions, RunListeners, RunResult, RunStartOptions, RuntimeHealthSnapshot, RuntimeGovernanceEvidence, RuntimeGovernanceDiagnostics, RuntimeOperatorOverview, RuntimeQueueDiagnostics, RuntimeAdapterOptions, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult as InternalRuntimeEvaluationReplayResult, RuntimeSessionPackage, RuntimeSessionPackageInput, SessionListSummary, SessionRecord, SessionSummary, TranscriptMessage, UpdateMemoryInput, WorkspaceLoadOptions } from "./contracts/types.js";
|
|
1
|
+
import type { ArtifactListing, CancelOptions, InvocationEnvelope, ListMemoriesInput, ListMemoriesResult, MemoryRecord, MemorizeInput, MemorizeResult, MessageContent, RecallInput, RecallResult, RemoveMemoryInput, RequestRecord, RequestSummary, RuntimeArtifactWriteInput, ResumeOptions, RunDecisionOptions, RunListeners, RunResult, RunStartOptions, RuntimeHealthSnapshot, RuntimeGovernanceEvidence, RuntimeGovernanceDiagnostics, RuntimeOperatorOverview, RuntimeQueueDiagnostics, RuntimeAdapterOptions, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationArtifact, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult as InternalRuntimeEvaluationReplayResult, RuntimeSessionPackage, RuntimeSessionPackageInput, SessionListSummary, SessionRecord, SessionSummary, TranscriptMessage, UpdateMemoryInput, WorkspaceLoadOptions } from "./contracts/types.js";
|
|
2
2
|
import { AgentHarnessRuntime } from "./runtime/harness.js";
|
|
3
3
|
import type { InventoryAgentRecord, InventorySkillRecord } from "./runtime/harness/system/inventory.js";
|
|
4
4
|
import type { RequirementAssessmentOptions } from "./runtime/harness/system/skill-requirements.js";
|
|
@@ -72,10 +72,11 @@ export type RequestPackage = {
|
|
|
72
72
|
approvals: Approval[];
|
|
73
73
|
transcript: TranscriptMessage[];
|
|
74
74
|
events: RequestEvent[];
|
|
75
|
-
artifacts:
|
|
75
|
+
artifacts: RuntimeEvaluationArtifact[];
|
|
76
76
|
governance: RuntimeGovernanceEvidence;
|
|
77
77
|
runtimeHealth?: RuntimeHealthSnapshot;
|
|
78
78
|
};
|
|
79
|
+
export type RecordArtifactInput = RuntimeArtifactWriteInput;
|
|
79
80
|
export type RuntimeEvaluationReplayResult = Omit<InternalRuntimeEvaluationReplayResult, "result"> & {
|
|
80
81
|
result: PublicRunResult;
|
|
81
82
|
};
|
|
@@ -162,6 +163,7 @@ export declare function getArtifact(runtime: AgentHarnessRuntime, input: {
|
|
|
162
163
|
requestId: string;
|
|
163
164
|
artifactPath: string;
|
|
164
165
|
}): Promise<unknown>;
|
|
166
|
+
export declare function recordArtifact(runtime: AgentHarnessRuntime, input: RecordArtifactInput): Promise<import("./contracts/types.js").ArtifactRecord>;
|
|
165
167
|
export declare function listRequestEvents(runtime: AgentHarnessRuntime, input: {
|
|
166
168
|
sessionId: string;
|
|
167
169
|
requestId: string;
|
package/dist/api.js
CHANGED
|
@@ -285,6 +285,9 @@ export async function listArtifacts(runtime, input) {
|
|
|
285
285
|
export async function getArtifact(runtime, input) {
|
|
286
286
|
return runtime.readArtifact(input.sessionId, input.requestId, input.artifactPath);
|
|
287
287
|
}
|
|
288
|
+
export async function recordArtifact(runtime, input) {
|
|
289
|
+
return runtime.recordArtifact(input);
|
|
290
|
+
}
|
|
288
291
|
export async function listRequestEvents(runtime, input) {
|
|
289
292
|
return (await runtime.listRequestEvents(input.sessionId, input.requestId)).map(toPublicEvent);
|
|
290
293
|
}
|
|
@@ -699,8 +699,31 @@ export type RuntimeEvaluationExportInput = {
|
|
|
699
699
|
tags?: string[];
|
|
700
700
|
metadata?: Record<string, unknown>;
|
|
701
701
|
};
|
|
702
|
+
export type RuntimeArtifactWriteInput = {
|
|
703
|
+
sessionId: string;
|
|
704
|
+
requestId: string;
|
|
705
|
+
kind: string;
|
|
706
|
+
path: string;
|
|
707
|
+
content: unknown;
|
|
708
|
+
artifactId?: string;
|
|
709
|
+
createdAt?: string;
|
|
710
|
+
};
|
|
711
|
+
export type RuntimeEvaluationArtifactFormat = "json" | "markdown" | "text" | "unknown";
|
|
712
|
+
export type RuntimeEvaluationArtifactRole = "product-spec" | "sprint-contract" | "qa-report" | "handoff" | "other";
|
|
702
713
|
export type RuntimeEvaluationArtifact = ArtifactRecord & {
|
|
703
714
|
content?: unknown;
|
|
715
|
+
format: RuntimeEvaluationArtifactFormat;
|
|
716
|
+
role: RuntimeEvaluationArtifactRole;
|
|
717
|
+
};
|
|
718
|
+
export type RuntimeEvaluationArtifactSummary = {
|
|
719
|
+
productSpecPaths: string[];
|
|
720
|
+
sprintContractPaths: string[];
|
|
721
|
+
qaReportPaths: string[];
|
|
722
|
+
handoffPaths: string[];
|
|
723
|
+
qaVerdicts: Array<{
|
|
724
|
+
path: string;
|
|
725
|
+
verdict: string;
|
|
726
|
+
}>;
|
|
704
727
|
};
|
|
705
728
|
export type RuntimeEvaluationExport = {
|
|
706
729
|
session: SessionRecord | null;
|
|
@@ -714,6 +737,7 @@ export type RuntimeEvaluationExport = {
|
|
|
714
737
|
transcript: TranscriptMessage[];
|
|
715
738
|
events: HarnessEvent[];
|
|
716
739
|
artifacts: RuntimeEvaluationArtifact[];
|
|
740
|
+
artifactSummary: RuntimeEvaluationArtifactSummary;
|
|
717
741
|
runtimeHealth: RuntimeHealthSnapshot;
|
|
718
742
|
expectedOutput?: string;
|
|
719
743
|
rubric: string[];
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
export { AgentHarnessAcpServer, AgentHarnessRuntime, cancelRun, createAgentHarness, createAcpServer, createAcpStdioClient, createRuntimeMcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, exportFlow, exportSequence, exportRequestPackage, exportSessionPackage, replayEvaluationBundle, getArtifact, getAgent, getApproval, getOperatorOverview, getRequest, getHealth, listMemories, listRequestTraceItems, getSession, listAgentSkills, listArtifacts, listApprovals, listRequests, listRequestEvents, listSessionSummaries, listSessions, memorize, normalizeUserChatInput, request, recall, removeMemory, resolveApproval, serveA2aHttp, serveAcpHttp, serveAcpStdio, serveAgUiHttp, serveRuntimeMcpOverStdio, serveToolsOverStdio, subscribe, stop, updateMemory, } from "./api.js";
|
|
1
|
+
export { AgentHarnessAcpServer, AgentHarnessRuntime, cancelRun, createAgentHarness, createAcpServer, createAcpStdioClient, createRuntimeMcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, exportFlow, exportSequence, exportRequestPackage, exportSessionPackage, replayEvaluationBundle, getArtifact, getAgent, getApproval, getOperatorOverview, getRequest, getHealth, listMemories, listRequestTraceItems, getSession, listAgentSkills, listArtifacts, listApprovals, listRequests, listRequestEvents, listSessionSummaries, listSessions, memorize, normalizeUserChatInput, recordArtifact, request, recall, removeMemory, resolveApproval, serveA2aHttp, serveAcpHttp, serveAcpStdio, serveAgUiHttp, serveRuntimeMcpOverStdio, serveToolsOverStdio, subscribe, stop, updateMemory, } from "./api.js";
|
|
2
2
|
export { createKnowledgeModule, readKnowledgeRuntimeConfig } from "./knowledge/index.js";
|
|
3
3
|
export type { AcpApproval, AcpArtifact, AcpEventNotification, AcpJsonRpcError, AcpJsonRpcRequest, AcpJsonRpcResponse, AcpJsonRpcSuccess, AcpRequestRecord, AcpRunRequestParams, AcpServerCapabilities, AcpSessionRecord, AcpStdioClient, AcpStdioClientOptions, } from "./acp.js";
|
|
4
|
-
export type { Approval, ListMemoriesInput, ListMemoriesResult, MemoryDecision, MemoryKind, MemoryRecord, MemoryScope, MemorizeInput, MemorizeResult, NormalizeUserChatInputOptions, OperatorOverview, PublicRunListeners, RequestArtifactListing, RequestEvent, RequestEventType, RequestPackage, RequestPackageInput, RequestFlowGraphInput, RequestResult, RequestTraceItem, RecallInput, RecallResult, RemoveMemoryInput, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, SessionListSummary, RuntimeSessionPackage, RuntimeSessionPackageInput, UpdateMemoryInput, UserChatInput, UserChatMessage, } from "./api.js";
|
|
4
|
+
export type { Approval, ListMemoriesInput, ListMemoriesResult, MemoryDecision, MemoryKind, MemoryRecord, MemoryScope, MemorizeInput, MemorizeResult, NormalizeUserChatInputOptions, OperatorOverview, PublicRunListeners, RecordArtifactInput, RequestArtifactListing, RequestEvent, RequestEventType, RequestPackage, RequestPackageInput, RequestFlowGraphInput, RequestResult, RequestTraceItem, RecallInput, RecallResult, RemoveMemoryInput, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, SessionListSummary, RuntimeSessionPackage, RuntimeSessionPackageInput, UpdateMemoryInput, UserChatInput, UserChatMessage, } from "./api.js";
|
|
5
5
|
export type { KnowledgeListInput, KnowledgeMemorizeInput, KnowledgeModule, KnowledgeModuleDependencies, KnowledgeRecallInput, KnowledgeRuntimeConfig, KnowledgeRuntimeContext, } from "./knowledge/index.js";
|
|
6
6
|
export type { A2aAgentCard, A2aHttpServer, A2aHttpServerOptions, A2aTask, A2aTaskState, AcpHttpServer, AcpHttpServerOptions, AcpStdioServer, AcpStdioServerOptions, AgUiEvent, AgUiHttpServer, AgUiHttpServerOptions, AgUiRunAgentInput, } from "./api.js";
|
|
7
7
|
export type { RuntimeMcpServerOptions, ToolMcpServerOptions } from "./mcp.js";
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export { AgentHarnessAcpServer, AgentHarnessRuntime, cancelRun, createAgentHarness, createAcpServer, createAcpStdioClient, createRuntimeMcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, exportFlow, exportSequence, exportRequestPackage, exportSessionPackage, replayEvaluationBundle, getArtifact, getAgent, getApproval, getOperatorOverview, getRequest, getHealth, listMemories, listRequestTraceItems, getSession, listAgentSkills, listArtifacts, listApprovals, listRequests, listRequestEvents, listSessionSummaries, listSessions, memorize, normalizeUserChatInput, request, recall, removeMemory, resolveApproval, serveA2aHttp, serveAcpHttp, serveAcpStdio, serveAgUiHttp, serveRuntimeMcpOverStdio, serveToolsOverStdio, subscribe, stop, updateMemory, } from "./api.js";
|
|
1
|
+
export { AgentHarnessAcpServer, AgentHarnessRuntime, cancelRun, createAgentHarness, createAcpServer, createAcpStdioClient, createRuntimeMcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, exportFlow, exportSequence, exportRequestPackage, exportSessionPackage, replayEvaluationBundle, getArtifact, getAgent, getApproval, getOperatorOverview, getRequest, getHealth, listMemories, listRequestTraceItems, getSession, listAgentSkills, listArtifacts, listApprovals, listRequests, listRequestEvents, listSessionSummaries, listSessions, memorize, normalizeUserChatInput, recordArtifact, request, recall, removeMemory, resolveApproval, serveA2aHttp, serveAcpHttp, serveAcpStdio, serveAgUiHttp, serveRuntimeMcpOverStdio, serveToolsOverStdio, subscribe, stop, updateMemory, } from "./api.js";
|
|
2
2
|
export { createKnowledgeModule, readKnowledgeRuntimeConfig } from "./knowledge/index.js";
|
|
3
3
|
export { tool } from "./tools.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export declare const AGENT_HARNESS_VERSION = "0.0.287";
|
package/dist/package-version.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export const AGENT_HARNESS_VERSION = "0.0.287";
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { RuntimeEvaluationArtifact, RuntimeEvaluationArtifactSummary } from "../../../contracts/types.js";
|
|
2
|
+
export declare function normalizeRecordedArtifactPath(input: string): string;
|
|
3
|
+
export declare function enrichEvaluationArtifact<T extends {
|
|
4
|
+
path: string;
|
|
5
|
+
kind: string;
|
|
6
|
+
content?: unknown;
|
|
7
|
+
}>(artifact: T): T & Pick<RuntimeEvaluationArtifact, "format" | "role">;
|
|
8
|
+
export declare function summarizeEvaluationArtifacts(artifacts: RuntimeEvaluationArtifact[]): RuntimeEvaluationArtifactSummary;
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
function isObject(value) {
|
|
3
|
+
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
4
|
+
}
|
|
5
|
+
function inferArtifactFormat(artifactPath, content) {
|
|
6
|
+
const normalized = artifactPath.toLowerCase();
|
|
7
|
+
if (normalized.endsWith(".json")) {
|
|
8
|
+
return "json";
|
|
9
|
+
}
|
|
10
|
+
if (normalized.endsWith(".md") || normalized.endsWith(".markdown")) {
|
|
11
|
+
return "markdown";
|
|
12
|
+
}
|
|
13
|
+
if (normalized.endsWith(".txt")) {
|
|
14
|
+
return "text";
|
|
15
|
+
}
|
|
16
|
+
if (typeof content === "string") {
|
|
17
|
+
return content.includes("#") || content.includes("##") ? "markdown" : "text";
|
|
18
|
+
}
|
|
19
|
+
if (content !== undefined) {
|
|
20
|
+
return "json";
|
|
21
|
+
}
|
|
22
|
+
return "unknown";
|
|
23
|
+
}
|
|
24
|
+
function inferArtifactRole(artifactPath, kind, content) {
|
|
25
|
+
const normalized = artifactPath.toLowerCase();
|
|
26
|
+
const loweredKind = kind.toLowerCase();
|
|
27
|
+
const contentObject = isObject(content) ? content : null;
|
|
28
|
+
if (normalized.includes("product-spec") || normalized.endsWith("/spec.md") || loweredKind === "product-spec") {
|
|
29
|
+
return "product-spec";
|
|
30
|
+
}
|
|
31
|
+
if (normalized.includes("sprint-contract")
|
|
32
|
+
|| loweredKind === "sprint-contract"
|
|
33
|
+
|| (contentObject && Array.isArray(contentObject.acceptanceCriteria))) {
|
|
34
|
+
return "sprint-contract";
|
|
35
|
+
}
|
|
36
|
+
if (normalized.includes("qa-report")
|
|
37
|
+
|| loweredKind === "qa-report"
|
|
38
|
+
|| (contentObject && (typeof contentObject.verdict === "string" || typeof contentObject.status === "string"))) {
|
|
39
|
+
return "qa-report";
|
|
40
|
+
}
|
|
41
|
+
if (normalized.includes("handoff") || loweredKind === "handoff") {
|
|
42
|
+
return "handoff";
|
|
43
|
+
}
|
|
44
|
+
return "other";
|
|
45
|
+
}
|
|
46
|
+
function extractQaVerdict(content) {
|
|
47
|
+
if (!isObject(content)) {
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
for (const key of ["verdict", "status", "decision"]) {
|
|
51
|
+
if (typeof content[key] === "string" && String(content[key]).trim().length > 0) {
|
|
52
|
+
return String(content[key]).trim();
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
export function normalizeRecordedArtifactPath(input) {
|
|
58
|
+
const trimmed = input.trim().replaceAll("\\", "/").replace(/^\.\/+/, "");
|
|
59
|
+
if (!trimmed) {
|
|
60
|
+
throw new Error("Artifact path must be a non-empty relative path.");
|
|
61
|
+
}
|
|
62
|
+
if (trimmed.startsWith("/") || /^[a-z]:\//i.test(trimmed)) {
|
|
63
|
+
throw new Error("Artifact path must stay relative to the persisted run artifact directory.");
|
|
64
|
+
}
|
|
65
|
+
const normalized = path.posix.normalize(trimmed);
|
|
66
|
+
if (normalized === "." || normalized.startsWith("../") || normalized.includes("/../")) {
|
|
67
|
+
throw new Error("Artifact path must stay inside the persisted run artifact directory.");
|
|
68
|
+
}
|
|
69
|
+
return normalized;
|
|
70
|
+
}
|
|
71
|
+
export function enrichEvaluationArtifact(artifact) {
|
|
72
|
+
return {
|
|
73
|
+
...artifact,
|
|
74
|
+
format: inferArtifactFormat(artifact.path, artifact.content),
|
|
75
|
+
role: inferArtifactRole(artifact.path, artifact.kind, artifact.content),
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
export function summarizeEvaluationArtifacts(artifacts) {
|
|
79
|
+
const summary = {
|
|
80
|
+
productSpecPaths: [],
|
|
81
|
+
sprintContractPaths: [],
|
|
82
|
+
qaReportPaths: [],
|
|
83
|
+
handoffPaths: [],
|
|
84
|
+
qaVerdicts: [],
|
|
85
|
+
};
|
|
86
|
+
for (const artifact of artifacts) {
|
|
87
|
+
if (artifact.role === "product-spec") {
|
|
88
|
+
summary.productSpecPaths.push(artifact.path);
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
if (artifact.role === "sprint-contract") {
|
|
92
|
+
summary.sprintContractPaths.push(artifact.path);
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
if (artifact.role === "qa-report") {
|
|
96
|
+
summary.qaReportPaths.push(artifact.path);
|
|
97
|
+
const verdict = extractQaVerdict(artifact.content);
|
|
98
|
+
if (verdict) {
|
|
99
|
+
summary.qaVerdicts.push({ path: artifact.path, verdict });
|
|
100
|
+
}
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
if (artifact.role === "handoff") {
|
|
104
|
+
summary.handoffPaths.push(artifact.path);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return summary;
|
|
108
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { ApprovalRecord, ArtifactListing, CancelOptions, HarnessEvent, HarnessStreamItem, RuntimeHealthSnapshot, RuntimeOperatorOverview, ListMemoriesInput, ListMemoriesResult, MessageContent, RemoveMemoryInput, RunRecord, RunStartOptions, RestartConversationOptions, RuntimeAdapterOptions, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, RuntimeRunPackage, RuntimeRunPackageInput, RuntimeSessionPackage, RuntimeSessionPackageInput, ResumeOptions, RunOptions, RunResult, RunSummary, MemoryRecord, MemorizeInput, MemorizeResult, RecallInput, RecallResult, UpdateMemoryInput, SessionSummary, ThreadSummary, ThreadRecord, SessionRecord, RequestRecord, RequestSummary, SessionListSummary, WorkspaceBundle } from "../contracts/types.js";
|
|
1
|
+
import type { ApprovalRecord, ArtifactListing, CancelOptions, HarnessEvent, HarnessStreamItem, RuntimeHealthSnapshot, RuntimeOperatorOverview, ListMemoriesInput, ListMemoriesResult, MessageContent, RemoveMemoryInput, RunRecord, RunStartOptions, RestartConversationOptions, RuntimeAdapterOptions, RuntimeArtifactWriteInput, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, RuntimeRunPackage, RuntimeRunPackageInput, RuntimeSessionPackage, RuntimeSessionPackageInput, ResumeOptions, RunOptions, RunResult, RunSummary, MemoryRecord, MemorizeInput, MemorizeResult, RecallInput, RecallResult, UpdateMemoryInput, SessionSummary, ThreadSummary, ThreadRecord, SessionRecord, RequestRecord, RequestSummary, SessionListSummary, WorkspaceBundle } from "../contracts/types.js";
|
|
2
2
|
import { type RuntimeMcpServerOptions, type ToolMcpServerOptions } from "../mcp.js";
|
|
3
3
|
import { type InventoryAgentRecord, type InventorySkillRecord } from "./harness/system/inventory.js";
|
|
4
4
|
import type { RequirementAssessmentOptions } from "./harness/system/skill-requirements.js";
|
|
@@ -102,6 +102,7 @@ export declare class AgentHarnessRuntime {
|
|
|
102
102
|
getApproval(approvalId: string): Promise<ApprovalRecord | null>;
|
|
103
103
|
listArtifacts(threadId: string, runId: string): Promise<ArtifactListing>;
|
|
104
104
|
readArtifact(threadId: string, runId: string, artifactPath: string): Promise<unknown>;
|
|
105
|
+
recordArtifact(input: RuntimeArtifactWriteInput): Promise<import("../contracts/types.js").ArtifactRecord>;
|
|
105
106
|
listRequestEvents(sessionId: string, requestId: string): Promise<HarnessEvent[]>;
|
|
106
107
|
listRunEvents(threadId: string, runId: string): Promise<HarnessEvent[]>;
|
|
107
108
|
exportRequestPackage(input: RuntimeRunPackageInput): Promise<RuntimeRunPackage>;
|
package/dist/runtime/harness.js
CHANGED
|
@@ -3,6 +3,7 @@ import { SqlitePersistence } from "../persistence/sqlite-store.js";
|
|
|
3
3
|
import { createPersistentId } from "../utils/id.js";
|
|
4
4
|
import { extractMessageText } from "../utils/message-content.js";
|
|
5
5
|
import { AgentRuntimeAdapter } from "./agent-runtime-adapter.js";
|
|
6
|
+
import { enrichEvaluationArtifact, normalizeRecordedArtifactPath, summarizeEvaluationArtifacts, } from "./harness/run/evaluation-artifacts.js";
|
|
6
7
|
import { EventBus } from "./harness/events/event-bus.js";
|
|
7
8
|
import { createBackgroundEventRuntime } from "./harness/background-runtime.js";
|
|
8
9
|
import { PolicyEngine } from "./harness/system/policy-engine.js";
|
|
@@ -633,7 +634,28 @@ export class AgentHarnessRuntime {
|
|
|
633
634
|
};
|
|
634
635
|
}
|
|
635
636
|
async readArtifact(threadId, runId, artifactPath) {
|
|
636
|
-
return this.persistence.readArtifact(threadId, runId, artifactPath);
|
|
637
|
+
return this.persistence.readArtifact(threadId, runId, normalizeRecordedArtifactPath(artifactPath));
|
|
638
|
+
}
|
|
639
|
+
async recordArtifact(input) {
|
|
640
|
+
const session = await this.getSessionRecord(input.sessionId);
|
|
641
|
+
if (!session) {
|
|
642
|
+
throw new Error(`Cannot record artifact for unknown session: ${input.sessionId}`);
|
|
643
|
+
}
|
|
644
|
+
const request = await this.getRequest(input.requestId);
|
|
645
|
+
if (!request || request.sessionId !== input.sessionId) {
|
|
646
|
+
throw new Error(`Cannot record artifact for unknown request: ${input.requestId}`);
|
|
647
|
+
}
|
|
648
|
+
const artifact = {
|
|
649
|
+
artifactId: typeof input.artifactId === "string" && input.artifactId.trim().length > 0
|
|
650
|
+
? input.artifactId.trim()
|
|
651
|
+
: createPersistentId(),
|
|
652
|
+
kind: input.kind.trim(),
|
|
653
|
+
path: normalizeRecordedArtifactPath(input.path),
|
|
654
|
+
createdAt: typeof input.createdAt === "string" && input.createdAt.trim().length > 0
|
|
655
|
+
? input.createdAt.trim()
|
|
656
|
+
: new Date().toISOString(),
|
|
657
|
+
};
|
|
658
|
+
return this.persistence.createArtifact(input.sessionId, input.requestId, artifact, input.content);
|
|
637
659
|
}
|
|
638
660
|
async listRequestEvents(sessionId, requestId) {
|
|
639
661
|
return this.persistence.listRunEvents(sessionId, requestId);
|
|
@@ -650,7 +672,7 @@ export class AgentHarnessRuntime {
|
|
|
650
672
|
const artifactsListing = input.includeArtifacts === false
|
|
651
673
|
? { items: [] }
|
|
652
674
|
: await this.persistence.listArtifacts(input.sessionId, input.requestId);
|
|
653
|
-
const artifacts = await Promise.all(artifactsListing.items.map(async (artifact) => ({
|
|
675
|
+
const artifacts = await Promise.all(artifactsListing.items.map(async (artifact) => enrichEvaluationArtifact({
|
|
654
676
|
...artifact,
|
|
655
677
|
...(input.includeArtifactContents === true
|
|
656
678
|
? { content: await this.persistence.readArtifact(input.sessionId, input.requestId, artifact.path) }
|
|
@@ -727,10 +749,12 @@ export class AgentHarnessRuntime {
|
|
|
727
749
|
? { items: [] }
|
|
728
750
|
: await this.persistence.listArtifacts(input.sessionId, input.requestId);
|
|
729
751
|
const artifacts = await Promise.all(artifactsListing.items.map(async (artifact) => ({
|
|
730
|
-
...
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
752
|
+
...enrichEvaluationArtifact({
|
|
753
|
+
...artifact,
|
|
754
|
+
...(input.includeArtifactContents === true
|
|
755
|
+
? { content: await this.persistence.readArtifact(input.sessionId, input.requestId, artifact.path) }
|
|
756
|
+
: {}),
|
|
757
|
+
}),
|
|
734
758
|
})));
|
|
735
759
|
return {
|
|
736
760
|
session: thread ? toSessionRecord(thread) : null,
|
|
@@ -746,6 +770,7 @@ export class AgentHarnessRuntime {
|
|
|
746
770
|
transcript,
|
|
747
771
|
events,
|
|
748
772
|
artifacts,
|
|
773
|
+
artifactSummary: summarizeEvaluationArtifacts(artifacts),
|
|
749
774
|
runtimeHealth,
|
|
750
775
|
...(typeof input.expectedOutput === "string" && input.expectedOutput.trim().length > 0
|
|
751
776
|
? { expectedOutput: input.expectedOutput.trim() }
|