@botbotgo/agent-harness 0.0.351 → 0.0.353

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +2 -1
  2. package/README.zh.md +2 -1
  3. package/dist/api.d.ts +3 -0
  4. package/dist/api.js +3 -0
  5. package/dist/cli/options.js +1 -0
  6. package/dist/cli/runtime-commands.js +12 -1
  7. package/dist/cli/runtime-output.d.ts +1 -0
  8. package/dist/cli/runtime-output.js +31 -0
  9. package/dist/index.d.ts +2 -2
  10. package/dist/index.js +1 -1
  11. package/dist/package-version.d.ts +1 -1
  12. package/dist/package-version.js +1 -1
  13. package/dist/resources/prompts/runtime/execution-with-tool-evidence-retry.md +5 -1
  14. package/dist/runtime/adapter/flow/invocation-flow.js +79 -8
  15. package/dist/runtime/adapter/invocation-result.d.ts +7 -0
  16. package/dist/runtime/adapter/invocation-result.js +95 -7
  17. package/dist/runtime/adapter/local-tool-invocation.js +23 -5
  18. package/dist/runtime/adapter/middleware-assembly.js +29 -2
  19. package/dist/runtime/adapter/resilience.d.ts +1 -0
  20. package/dist/runtime/adapter/resilience.js +2 -1
  21. package/dist/runtime/adapter/terminal-status.js +2 -2
  22. package/dist/runtime/agent-runtime-adapter.js +13 -3
  23. package/dist/runtime/harness/events/event-sink.js +19 -2
  24. package/dist/runtime/harness/system/boundary-analysis.d.ts +42 -0
  25. package/dist/runtime/harness/system/boundary-analysis.js +234 -0
  26. package/dist/runtime/harness.d.ts +3 -0
  27. package/dist/runtime/harness.js +29 -8
  28. package/dist/runtime/parsing/output-content.js +7 -2
  29. package/dist/runtime/parsing/output-recovery.js +6 -2
  30. package/dist/runtime/parsing/output-tool-args.d.ts +4 -0
  31. package/dist/runtime/parsing/output-tool-args.js +114 -4
  32. package/package.json +1 -1
package/README.md CHANGED
@@ -1218,9 +1218,10 @@ ACP transport notes:
1218
1218
  - `serveAgUiHttp(runtime)` exposes an AG-UI-compatible HTTP SSE bridge that projects runtime lifecycle, safe progress commentary, text output, upstream thinking, step progress, and tool calls onto `RUN_*`, `STATUS_UPDATE`, `TEXT_MESSAGE_*`, `THINKING_TEXT_MESSAGE_*`, `STEP_*`, and `TOOL_CALL_*` events for UI clients. `botbotgo ag-ui start|stop` now manages that HTTP bridge in the same workspace-local service registry as ACP, A2A, and runtime MCP.
1219
1219
  - `createRuntimeMcpServer(runtime)`, `serveRuntimeMcpOverStdio(runtime)`, and `serveRuntimeMcpOverStreamableHttp(runtime)` expose the persisted runtime control surface itself as MCP tools, including sessions, requests, approvals, artifacts, events, and package export helpers. `botbotgo mcp serve --transport streamable-http --host 127.0.0.1 --port 8090` serves the same tool surface over Streamable HTTP, and `botbotgo mcp start|stop` manages that background endpoint for one workspace.
1220
1220
  - `listRequestEvents(...)`, `listRequestTraceItems(...)`, and `exportRequestPackage(...)` are the request-first inspection helpers.
1221
+ - `analyzeBoundaries(runtime)` returns a workspace boundary report covering agent, subagent, tool, and skill surfaces, including structural findings such as missing subagent references, unreferenced tools or skills, and skill allow-lists that do not match the owning agent's exposed tools.
1221
1222
  - `exportRequestPackage(...)` and `exportSessionPackage(...)` package stable runtime records, transcript, approvals, events, artifacts, and governance evidence for operator tooling without reaching into persistence internals.
1222
1223
  - `runtime/default.governance.remoteMcp` can now deny or allow specific MCP servers, raise approval requirements by transport, and stamp transport-based risk tiers into runtime governance bundles. MCP server catalogs can also declare trust tier, access mode, tenant scope, approval policy, prompt-injection risk, and OAuth scope metadata so governance bundles capture why one remote tool is treated as high-risk. Tool policy overrides can also set `decisionMode: manual | auto-approve | auto-reject | deny-and-continue` so operator evidence and execution behavior stay aligned.
1223
1224
  - Protocol responsibilities stay split on purpose: ACP is the primary editor/client runtime boundary, A2A is the streaming-capable agent-platform bridge with polling compatibility, AG-UI is the UI event surface, and runtime MCP is the operator-facing control plane exported as MCP tools.
1224
1225
  - `runtime/default.observability.tracing` can now describe exporter metadata such as OTLP endpoints and propagation mode, so frozen runtime snapshots keep trace-correlation plus operator-visible export context without exposing backend-private span internals.
1225
- - `agent-harness runtime overview`, `agent-harness runtime health`, `agent-harness runtime approvals list|watch`, `agent-harness runtime requests list|tail`, and `agent-harness runtime export request|session` provide a thin operator CLI over persisted runtime health, queue pressure, governance risk, approval queues, active request state, and audit-ready evidence packages.
1226
+ - `agent-harness runtime overview`, `agent-harness runtime boundaries`, `agent-harness runtime health`, `agent-harness runtime approvals list|watch`, `agent-harness runtime requests list|tail`, and `agent-harness runtime export request|session` provide a thin operator CLI over workspace boundary analysis, persisted runtime health, queue pressure, governance risk, approval queues, active request state, and audit-ready evidence packages.
1226
1227
  - detailed A2A adapter guidance lives in [`docs/a2a-bridge.md`](docs/a2a-bridge.md)
package/README.zh.md CHANGED
@@ -1175,9 +1175,10 @@ ACP transport 说明:
1175
1175
  - `serveAgUiHttp(runtime)` 提供 AG-UI HTTP SSE bridge,把 runtime 生命周期、安全进度播报、文本输出、upstream thinking、step 进度与 tool call 投影成 `RUN_*`、`STATUS_UPDATE`、`TEXT_MESSAGE_*`、`THINKING_TEXT_MESSAGE_*`、`STEP_*` 与 `TOOL_CALL_*` 事件,便于 UI 客户端直接接入。`botbotgo ag-ui start|stop` 现在也会把这条 HTTP bridge 托管进与 ACP、A2A、runtime MCP 相同的 workspace 本地服务注册表中。
1176
1176
  - `createRuntimeMcpServer(runtime)`、`serveRuntimeMcpOverStdio(runtime)` 与 `serveRuntimeMcpOverStreamableHttp(runtime)` 会把持久化 runtime 控制面本身暴露成 MCP tools,包括 sessions、requests、approvals、artifacts、events 与 package export helpers。`botbotgo mcp serve --transport streamable-http --host 127.0.0.1 --port 8090` 会把同一套控制面作为 Streamable HTTP 暴露出去,而 `botbotgo mcp start|stop` 可直接托管该后台 endpoint。
1177
1177
  - `listRequestEvents(...)`、`listRequestTraceItems(...)` 与 `exportRequestPackage(...)` 是 request-first 的检查 helper。
1178
+ - `analyzeBoundaries(runtime)` 会返回 workspace boundary report,覆盖 agent、subagent、tool 与 skill 的可见边界,并指出缺失 subagent 引用、未被引用的 tools/skills、以及 skill allow-list 与所属 agent 暴露工具不匹配等结构性问题。
1178
1179
  - `exportRequestPackage(...)` 与 `exportSessionPackage(...)` 可把稳定 runtime 记录、transcript、approvals、events、artifacts 与 governance evidence 一起打包给管理工具,而不必直接访问 persistence 内部实现。
1179
1180
  - `runtime/default.governance.remoteMcp` 现在可以按 MCP server 或 transport 做 allow/deny、审批升级,并把 transport 风险等级写进 runtime governance bundles。MCP server catalog 也可以声明 trust tier、access mode、tenant scope、approval policy、prompt-injection risk 与 OAuth scope 元数据,让治理快照能解释为什么某个远端工具被视为高风险。tool policy override 也可以声明 `decisionMode: manual | auto-approve | auto-reject | deny-and-continue`,让治理快照与实际执行路径保持一致。
1180
1181
  - 协议分工要继续保持清晰:ACP 是 editor / client 的主运行时边界,A2A 是支持 streaming 且兼容轮询的 agent-platform bridge,AG-UI 是 UI 事件面,runtime MCP 是以 MCP tools 暴露的 operator control plane。
1181
1182
  - `runtime/default.observability.tracing` 现在可描述 OTLP endpoint 和 propagation mode 这类 exporter 元数据,使冻结的 runtime snapshot 在保留 trace correlation 的同时,也能保留有用的导出上下文,而不暴露 backend 私有 span 细节。
1182
- - `agent-harness runtime overview`、`agent-harness runtime health`、`agent-harness runtime approvals list|watch`、`agent-harness runtime requests list|tail` 与 `agent-harness runtime export request|session` 提供了一层轻量 CLI,可直接查看 runtime health、queue pressure、governance risk、审批队列、运行状态与可审计证据包。
1183
+ - `agent-harness runtime overview`、`agent-harness runtime boundaries`、`agent-harness runtime health`、`agent-harness runtime approvals list|watch`、`agent-harness runtime requests list|tail` 与 `agent-harness runtime export request|session` 提供了一层轻量 CLI,可直接查看 workspace boundary analysis、runtime health、queue pressure、governance risk、审批队列、运行状态与可审计证据包。
1183
1184
  - 更详细的 A2A 适配层开发说明见 [`docs/a2a-bridge.md`](docs/a2a-bridge.md)
package/dist/api.d.ts CHANGED
@@ -1,6 +1,8 @@
1
1
  import type { ArtifactListing, CancelOptions, InvocationEnvelope, ListMemoriesInput, ListMemoriesResult, MemoryRecord, MemorizeInput, MemorizeResult, MessageContent, RequestDataEvent, RecallInput, RecallResult, RemoveMemoryInput, RequestEventSnapshot, RequestPlanState, RequestDecisionOptions, RequestRecord, RequestResult as InternalRequestResult, RequestStartOptions, RequestSummary, RuntimeArtifactWriteInput, ResumeOptions, RequestListeners, RuntimeHealthSnapshot, RuntimeGovernanceEvidence, RuntimeGovernanceDiagnostics, RuntimeOperatorOverview, RuntimeQueueDiagnostics, RuntimeAdapterOptions, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationArtifact, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult as InternalRuntimeEvaluationReplayResult, RuntimeSessionPackage, RuntimeSessionPackageInput, SessionListSummary, SessionRecord, SessionSummary, TranscriptMessage, UpdateMemoryInput, WorkspaceLoadOptions } from "./contracts/types.js";
2
2
  import { AgentHarnessRuntime } from "./runtime/harness.js";
3
3
  import type { InventoryAgentRecord, InventorySkillRecord } from "./runtime/harness/system/inventory.js";
4
+ import type { BoundaryAnalysisOptions, WorkspaceBoundaryAnalysis } from "./runtime/harness/system/boundary-analysis.js";
5
+ export type { BoundaryAgentSurface, BoundaryAnalysisOptions, BoundaryAnalysisSummary, BoundaryFinding, BoundaryFindingSeverity, WorkspaceBoundaryAnalysis, } from "./runtime/harness/system/boundary-analysis.js";
4
6
  import type { RequirementAssessmentOptions } from "./runtime/harness/system/skill-requirements.js";
5
7
  import type { RuntimeMcpServerOptions, RuntimeMcpStreamableHttpServerOptions, ToolMcpServerOptions } from "./mcp.js";
6
8
  export { AgentHarnessAcpServer, createAcpServer } from "./acp.js";
@@ -202,6 +204,7 @@ export declare function describeInventory(runtime: AgentHarnessRuntime, options?
202
204
  workspaceRoot: string;
203
205
  agents: InventoryAgentRecord[];
204
206
  };
207
+ export declare function analyzeBoundaries(runtime: AgentHarnessRuntime, options?: BoundaryAnalysisOptions): WorkspaceBoundaryAnalysis;
205
208
  export declare function resolveApproval(runtime: AgentHarnessRuntime, options: ResumeOptions): Promise<PublicRequestResult>;
206
209
  export declare function cancelRequest(runtime: AgentHarnessRuntime, options: CancelOptions): Promise<PublicRequestResult>;
207
210
  export declare function stop(runtime: AgentHarnessRuntime): Promise<void>;
package/dist/api.js CHANGED
@@ -366,6 +366,9 @@ export function getAgent(runtime, agentId, options) {
366
366
  export function describeInventory(runtime, options) {
367
367
  return runtime.describeWorkspaceInventory(options);
368
368
  }
369
+ export function analyzeBoundaries(runtime, options) {
370
+ return runtime.analyzeWorkspaceBoundaries(options);
371
+ }
369
372
  export async function resolveApproval(runtime, options) {
370
373
  return toPublicRequestResult(await runtime.resume(toInternalResumeOptions(options)));
371
374
  }
@@ -16,6 +16,7 @@ export function renderUsage() {
16
16
  agent-harness ag-ui start [--workspace <path>] [--host <hostname>] [--port <port>]
17
17
  agent-harness ag-ui stop [--workspace <path>]
18
18
  agent-harness runtime overview [--workspace <path>] [--limit <n>] [--json]
19
+ agent-harness runtime boundaries [--workspace <path>] [--json]
19
20
  agent-harness runtime health [--workspace <path>] [--json]
20
21
  agent-harness runtime approvals list [--workspace <path>] [--status <pending|approved|edited|rejected|expired>] [--json]
21
22
  agent-harness runtime approvals watch [--workspace <path>] [--status <pending|approved|edited|rejected|expired>] [--poll-ms <ms>] [--once] [--json]
@@ -1,4 +1,4 @@
1
- import { renderApprovalList, renderHealthSnapshot, renderJson, renderOperatorOverview, renderRequestList } from "./runtime-output.js";
1
+ import { renderApprovalList, renderBoundaryAnalysis, renderHealthSnapshot, renderJson, renderOperatorOverview, renderRequestList, } from "./runtime-output.js";
2
2
  import { parseRuntimeExportOptions, parseRuntimeInspectOptions, parseScheduledRunOptions, renderUsage } from "./options.js";
3
3
  import { resolveCliWorkspaceRoot, validateCliWorkspaceRoot } from "./workspace.js";
4
4
  function resolveValidatedWorkspace(cwd, workspaceRoot, stderr) {
@@ -128,6 +128,17 @@ export async function handleRuntimeCommand(subcommandAndArgs, io, deps) {
128
128
  if (!workspacePath) {
129
129
  return 1;
130
130
  }
131
+ if (subcommand === "boundaries") {
132
+ const runtime = await createHarness(workspacePath);
133
+ try {
134
+ const analysis = runtime.analyzeWorkspaceBoundaries();
135
+ stdout(parsed.json ? renderJson(analysis) : renderBoundaryAnalysis(analysis, workspacePath));
136
+ return analysis.summary.errorCount > 0 ? 2 : 0;
137
+ }
138
+ finally {
139
+ await runtime.stop();
140
+ }
141
+ }
131
142
  const shouldUseClient = subcommand === "health"
132
143
  || subcommand === "overview"
133
144
  || (subcommand === "approvals" && (nestedCommand === "list" || nestedCommand === "watch"))
@@ -3,3 +3,4 @@ export declare function renderHealthSnapshot(snapshot: Record<string, unknown>,
3
3
  export declare function renderApprovalList(approvals: Array<Record<string, unknown>>): string;
4
4
  export declare function renderRequestList(requests: Array<Record<string, unknown>>): string;
5
5
  export declare function renderOperatorOverview(overview: Record<string, unknown>, workspacePath: string): string;
6
+ export declare function renderBoundaryAnalysis(analysis: Record<string, unknown>, workspacePath: string): string;
@@ -122,3 +122,34 @@ export function renderOperatorOverview(overview, workspacePath) {
122
122
  }
123
123
  return `${lines.join("\n")}\n`;
124
124
  }
125
+ export function renderBoundaryAnalysis(analysis, workspacePath) {
126
+ const lines = [];
127
+ const summary = isObject(analysis.summary) ? analysis.summary : {};
128
+ const findings = Array.isArray(analysis.findings) ? analysis.findings.filter(isObject) : [];
129
+ const errors = typeof summary.errorCount === "number" ? summary.errorCount : 0;
130
+ const warnings = typeof summary.warningCount === "number" ? summary.warningCount : 0;
131
+ const info = typeof summary.infoCount === "number" ? summary.infoCount : 0;
132
+ lines.push(`Runtime boundary analysis ${workspacePath}`);
133
+ lines.push(`Findings: errors=${errors} warnings=${warnings} info=${info}`);
134
+ const agents = typeof summary.agentCount === "number" ? summary.agentCount : undefined;
135
+ const tools = typeof summary.toolCount === "number" ? summary.toolCount : undefined;
136
+ const skills = typeof summary.skillCount === "number" ? summary.skillCount : undefined;
137
+ if (agents !== undefined || tools !== undefined || skills !== undefined) {
138
+ lines.push(`Inventory: agents=${agents ?? "unknown"} tools=${tools ?? "unknown"} skills=${skills ?? "unknown"}`);
139
+ }
140
+ if (findings.length === 0) {
141
+ lines.push("No boundary findings.");
142
+ return `${lines.join("\n")}\n`;
143
+ }
144
+ lines.push("Boundary findings:");
145
+ for (const finding of findings) {
146
+ const severity = typeof finding.severity === "string" ? finding.severity : "unknown";
147
+ const code = typeof finding.code === "string" ? finding.code : "unknown";
148
+ const message = typeof finding.message === "string" ? finding.message : "";
149
+ const agent = typeof finding.agentId === "string" ? ` agent=${finding.agentId}` : "";
150
+ const tool = typeof finding.toolName === "string" ? ` tool=${finding.toolName}` : "";
151
+ const skill = typeof finding.skillName === "string" ? ` skill=${finding.skillName}` : "";
152
+ lines.push(` - ${severity} ${code}:${agent}${tool}${skill}${message ? ` ${message}` : ""}`);
153
+ }
154
+ return `${lines.join("\n")}\n`;
155
+ }
package/dist/index.d.ts CHANGED
@@ -1,9 +1,9 @@
1
- export { AgentHarnessAcpServer, AgentHarnessRuntime, cancelRequest, createAgentHarness, createAcpServer, createAcpStdioClient, createRuntimeMcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, exportFlow, exportSequence, exportRequestPackage, exportSessionPackage, replayEvaluationBundle, getArtifact, getAgent, getApproval, getOperatorOverview, getRequestPlanState, getRequest, getHealth, listMemories, listRequestTraceItems, getSession, listAgentSkills, listRequestArtifacts, listApprovals, listRequests, listRequestEvents, listSessionSummaries, listSessions, memorize, normalizeUserChatInput, recordArtifact, request, recall, removeMemory, resolveApproval, serveA2aHttp, serveAcpHttp, serveAcpStdio, serveAgUiHttp, serveRuntimeMcpOverStdio, serveRuntimeMcpOverStreamableHttp, serveToolsOverStdio, subscribe, stop, updateMemory, } from "./api.js";
1
+ export { AgentHarnessAcpServer, AgentHarnessRuntime, cancelRequest, createAgentHarness, createAcpServer, createAcpStdioClient, createRuntimeMcpServer, createUpstreamTimelineReducer, createToolMcpServer, analyzeBoundaries, deleteSession, describeInventory, exportEvaluationBundle, exportFlow, exportSequence, exportRequestPackage, exportSessionPackage, replayEvaluationBundle, getArtifact, getAgent, getApproval, getOperatorOverview, getRequestPlanState, getRequest, getHealth, listMemories, listRequestTraceItems, getSession, listAgentSkills, listRequestArtifacts, listApprovals, listRequests, listRequestEvents, listSessionSummaries, listSessions, memorize, normalizeUserChatInput, recordArtifact, request, recall, removeMemory, resolveApproval, serveA2aHttp, serveAcpHttp, serveAcpStdio, serveAgUiHttp, serveRuntimeMcpOverStdio, serveRuntimeMcpOverStreamableHttp, serveToolsOverStdio, subscribe, stop, updateMemory, } from "./api.js";
2
2
  export { AcpHarnessClient, InProcessHarnessClient, createAcpHarnessClient, createAcpHttpHarnessClient, createAcpStdioHarnessClient, createAgentHarnessClient, createInProcessHarnessClient, } from "./client.js";
3
3
  export { createKnowledgeModule, readKnowledgeRuntimeConfig } from "./knowledge/index.js";
4
4
  export { readProceduralMemoryRuntimeConfig } from "./knowledge/procedural/index.js";
5
5
  export type { AcpApproval, AcpHttpClient, AcpHttpClientOptions, AcpArtifact, AcpEventNotification, AcpNotification, AcpJsonRpcError, AcpJsonRpcRequest, AcpJsonRpcResponse, AcpJsonRpcSuccess, AcpRequestRecord, AcpRequestParams, AcpServerCapabilities, AcpSessionRecord, AcpStreamNotification, AcpStdioClient, AcpStdioClientOptions, } from "./acp.js";
6
- export type { Approval, CreateAgentHarnessOptions, ListMemoriesInput, ListMemoriesResult, MemoryDecision, MemoryKind, MemoryRecord, MemoryScope, MemorizeInput, MemorizeResult, NormalizeUserChatInputOptions, OperatorOverview, RecordArtifactInput, PublicRequestListeners, RequestData, RequestArtifactListing, RequestEvent, RequestEventType, RequestSnapshot, RequestPlanState, RequestPackage, RequestPackageInput, RequestFlowGraphInput, RequestResult, RequestTraceItem, RecallInput, RecallResult, RemoveMemoryInput, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, SessionListSummary, RuntimeSessionPackage, RuntimeSessionPackageInput, UpdateMemoryInput, UserChatInput, UserChatMessage, } from "./api.js";
6
+ export type { Approval, BoundaryAgentSurface, BoundaryAnalysisOptions, BoundaryAnalysisSummary, BoundaryFinding, BoundaryFindingSeverity, CreateAgentHarnessOptions, ListMemoriesInput, ListMemoriesResult, MemoryDecision, MemoryKind, MemoryRecord, MemoryScope, MemorizeInput, MemorizeResult, NormalizeUserChatInputOptions, OperatorOverview, RecordArtifactInput, PublicRequestListeners, RequestData, RequestArtifactListing, RequestEvent, RequestEventType, RequestSnapshot, RequestPlanState, RequestPackage, RequestPackageInput, RequestFlowGraphInput, RequestResult, RequestTraceItem, RecallInput, RecallResult, RemoveMemoryInput, RuntimeEvaluationExport, RuntimeEvaluationExportInput, RuntimeEvaluationReplayInput, RuntimeEvaluationReplayResult, SessionListSummary, RuntimeSessionPackage, RuntimeSessionPackageInput, WorkspaceBoundaryAnalysis, UpdateMemoryInput, UserChatInput, UserChatMessage, } from "./api.js";
7
7
  export type { AcpHarnessTransport, HarnessClient, HarnessClientApprovalFilter, HarnessClientRequestFilter, HarnessClientRequestOptions, HarnessClientRequestResult, HarnessClientRequestStartOptions, } from "./client.js";
8
8
  export type { KnowledgeListInput, KnowledgeMemorizeInput, KnowledgeModule, KnowledgeModuleDependencies, KnowledgeRecallInput, KnowledgeRuntimeConfig, KnowledgeRuntimeContext, } from "./knowledge/index.js";
9
9
  export type { ProceduralMemoryBackgroundConfig, ProceduralMemoryFormationConfig, ProceduralMemoryMaintenanceConfig, ProceduralMemoryMaintenanceIdleConfig, ProceduralMemoryMaintenanceScheduleConfig, ProceduralMemoryProviderConfig, ProceduralMemoryRetrievalConfig, ProceduralMemoryRuntimeConfig, } from "./knowledge/procedural/index.js";
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- export { AgentHarnessAcpServer, AgentHarnessRuntime, cancelRequest, createAgentHarness, createAcpServer, createAcpStdioClient, createRuntimeMcpServer, createUpstreamTimelineReducer, createToolMcpServer, deleteSession, describeInventory, exportEvaluationBundle, exportFlow, exportSequence, exportRequestPackage, exportSessionPackage, replayEvaluationBundle, getArtifact, getAgent, getApproval, getOperatorOverview, getRequestPlanState, getRequest, getHealth, listMemories, listRequestTraceItems, getSession, listAgentSkills, listRequestArtifacts, listApprovals, listRequests, listRequestEvents, listSessionSummaries, listSessions, memorize, normalizeUserChatInput, recordArtifact, request, recall, removeMemory, resolveApproval, serveA2aHttp, serveAcpHttp, serveAcpStdio, serveAgUiHttp, serveRuntimeMcpOverStdio, serveRuntimeMcpOverStreamableHttp, serveToolsOverStdio, subscribe, stop, updateMemory, } from "./api.js";
1
+ export { AgentHarnessAcpServer, AgentHarnessRuntime, cancelRequest, createAgentHarness, createAcpServer, createAcpStdioClient, createRuntimeMcpServer, createUpstreamTimelineReducer, createToolMcpServer, analyzeBoundaries, deleteSession, describeInventory, exportEvaluationBundle, exportFlow, exportSequence, exportRequestPackage, exportSessionPackage, replayEvaluationBundle, getArtifact, getAgent, getApproval, getOperatorOverview, getRequestPlanState, getRequest, getHealth, listMemories, listRequestTraceItems, getSession, listAgentSkills, listRequestArtifacts, listApprovals, listRequests, listRequestEvents, listSessionSummaries, listSessions, memorize, normalizeUserChatInput, recordArtifact, request, recall, removeMemory, resolveApproval, serveA2aHttp, serveAcpHttp, serveAcpStdio, serveAgUiHttp, serveRuntimeMcpOverStdio, serveRuntimeMcpOverStreamableHttp, serveToolsOverStdio, subscribe, stop, updateMemory, } from "./api.js";
2
2
  export { AcpHarnessClient, InProcessHarnessClient, createAcpHarnessClient, createAcpHttpHarnessClient, createAcpStdioHarnessClient, createAgentHarnessClient, createInProcessHarnessClient, } from "./client.js";
3
3
  export { createKnowledgeModule, readKnowledgeRuntimeConfig } from "./knowledge/index.js";
4
4
  export { readProceduralMemoryRuntimeConfig } from "./knowledge/procedural/index.js";
@@ -1,2 +1,2 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.351";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.353";
2
2
  export declare const AGENT_HARNESS_RELEASE_DATE = "2026-04-24";
@@ -1,2 +1,2 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.351";
1
+ export const AGENT_HARNESS_VERSION = "0.0.353";
2
2
  export const AGENT_HARNESS_RELEASE_DATE = "2026-04-24";
@@ -1 +1,5 @@
1
- Your previous response was rejected because it claimed execution without any tool calls. Your next response must contain real tool calls only. If this is a multi-step task and the runtime todo board does not exist yet, call write_todos first. If the todo board already exists or prior tool evidence is already available, do not restart planning or ask the user for more details; continue with the next concrete tool call instead. Do not describe completed work until the tool calls have actually run.
1
+ Your previous response was rejected because it did not produce concrete execution evidence from the tools configured for this agent. Your next response must contain real execution tool calls only.
2
+
3
+ Do not call planning-only tools such as write_todos or read_todos in this retry. Do not restart the plan, ask for more details, or describe completed work. Select one configured non-planning tool that can advance the original request, call it with concrete arguments, and then continue from that tool result.
4
+
5
+ If this agent has no configured non-planning tool that can advance the original request, return a blocker instead of pretending the work is complete.
@@ -5,6 +5,8 @@ import { invokeRuntimeWithLocalTools } from "./invoke-runtime.js";
5
5
  import { buildInvocationRequest } from "../model/invocation-request.js";
6
6
  import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "../upstream-configurable-keys.js";
7
7
  import { extractVisibleOutput, tryParseJson } from "../../parsing/output-parsing.js";
8
+ import { salvageJsonToolCalls } from "../../parsing/output-tool-args.js";
9
+ import { isEmptyFinalAiMessageError } from "../resilience.js";
8
10
  function readBindingExecutionParams(binding) {
9
11
  const params = binding.execution?.params ?? binding.deepAgentParams ?? binding.langchainAgentParams;
10
12
  return {
@@ -48,6 +50,34 @@ function hasNativeTaskDelegationIntent(value) {
48
50
  return hasNativeTaskDelegationIntent(typed.tool_calls) || hasNativeTaskDelegationIntent(typed.messages);
49
51
  }
50
52
  function readStructuredToolCall(value) {
53
+ const salvaged = salvageJsonToolCalls(value)[0];
54
+ if (salvaged) {
55
+ return salvaged;
56
+ }
57
+ if (Array.isArray(value)) {
58
+ for (const item of value) {
59
+ const nested = readStructuredToolCall(item);
60
+ if (nested) {
61
+ return nested;
62
+ }
63
+ }
64
+ return null;
65
+ }
66
+ if (typeof value === "object" && value !== null) {
67
+ const typed = value;
68
+ const fromOutput = typed.output !== undefined ? readStructuredToolCall(typed.output) : null;
69
+ if (fromOutput) {
70
+ return fromOutput;
71
+ }
72
+ const fromContent = typed.content !== undefined ? readStructuredToolCall(typed.content) : null;
73
+ if (fromContent) {
74
+ return fromContent;
75
+ }
76
+ const fromMessages = typed.messages !== undefined ? readStructuredToolCall(typed.messages) : null;
77
+ if (fromMessages) {
78
+ return fromMessages;
79
+ }
80
+ }
51
81
  const text = typeof value === "string" ? value.trim() : "";
52
82
  const parsed = text ? (tryParseJson(text) ?? extractFirstJsonObject(text)) : value;
53
83
  if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
@@ -116,7 +146,7 @@ async function replayStructuredTaskToolCall(input) {
116
146
  return input.invocation;
117
147
  }
118
148
  const visibleOutput = extractVisibleOutput(input.invocation.result);
119
- const toolCall = readStructuredToolCall(visibleOutput);
149
+ const toolCall = readStructuredToolCall(visibleOutput) ?? readStructuredToolCall(input.invocation.result);
120
150
  if (toolCall?.name !== "task") {
121
151
  return input.invocation;
122
152
  }
@@ -179,6 +209,20 @@ function buildDelegationOnlyRecoveryInstruction(binding, input) {
179
209
  JSON.stringify(input),
180
210
  ].join("\n");
181
211
  }
212
+ function buildEmptyAssistantRecoveryInstruction() {
213
+ return [
214
+ "/no_think",
215
+ "Runtime correction: your previous assistant response was empty.",
216
+ "Continue the original request now.",
217
+ "If an available tool is needed, call exactly one valid tool with complete arguments.",
218
+ "If no tool is needed, return a non-empty final answer.",
219
+ "Do not return empty content.",
220
+ ].join("\n");
221
+ }
222
+ function isEmptyAssistantOutputError(error) {
223
+ const message = error instanceof Error ? error.message : String(error);
224
+ return isEmptyFinalAiMessageError(error) || message === "empty_final_output";
225
+ }
182
226
  function appendUserRecoveryInstruction(input, instruction) {
183
227
  const content = [
184
228
  "Runtime correction:",
@@ -246,12 +290,39 @@ export async function executeRequestInvocation(options) {
246
290
  if (!result) {
247
291
  throw new Error("Agent invocation returned no result");
248
292
  }
249
- return finalizeRequestResult({
250
- bindingAgentId: options.binding.agent.id,
251
- sessionId: options.sessionId,
252
- requestId: options.requestId,
253
- result,
254
- executedToolResults,
255
- });
293
+ try {
294
+ return finalizeRequestResult({
295
+ bindingAgentId: options.binding.agent.id,
296
+ sessionId: options.sessionId,
297
+ requestId: options.requestId,
298
+ binding: options.binding,
299
+ result,
300
+ executedToolResults,
301
+ });
302
+ }
303
+ catch (error) {
304
+ if (options.resumePayload !== undefined || !isEmptyAssistantOutputError(error)) {
305
+ throw error;
306
+ }
307
+ const shouldUseDelegationRecovery = isDelegationOnlyBinding(options.binding)
308
+ && !hasTaskDelegationEvidence(executedToolResults)
309
+ && !hasNativeTaskDelegationIntent(result);
310
+ const recoveredRequest = appendUserRecoveryInstruction(request, shouldUseDelegationRecovery
311
+ ? buildDelegationOnlyRecoveryInstruction(options.binding, options.input)
312
+ : buildEmptyAssistantRecoveryInstruction());
313
+ const recoveredInvocation = await replayStructuredTaskToolCall({
314
+ invocation: await invokeOnce(recoveredRequest),
315
+ builtinExecutableTools: builtinExecutableTools,
316
+ toolRuntimeContext: invokeOptions.toolRuntimeContext,
317
+ });
318
+ return finalizeRequestResult({
319
+ bindingAgentId: options.binding.agent.id,
320
+ sessionId: options.sessionId,
321
+ requestId: options.requestId,
322
+ binding: options.binding,
323
+ result: recoveredInvocation.result,
324
+ executedToolResults: recoveredInvocation.executedToolResults,
325
+ });
326
+ }
256
327
  }
257
328
  export const executeRuntimeInvocation = executeRequestInvocation;
@@ -16,6 +16,13 @@ export declare function finalizeRequestResult(params: {
16
16
  bindingAgentId: string;
17
17
  sessionId: string;
18
18
  requestId: string;
19
+ binding?: {
20
+ harnessRuntime?: {
21
+ executionContract?: {
22
+ requiresPlan?: boolean;
23
+ };
24
+ };
25
+ };
19
26
  result: Record<string, unknown>;
20
27
  executedToolResults: ExecutedToolResult[];
21
28
  }): RequestResult;
@@ -1,5 +1,5 @@
1
1
  import { containsLikelySkillDocument, extractContentBlocks, extractEmptyAssistantMessageFailure, extractOutputContent, extractToolFallbackContext, extractVisibleOutput, isLikelyToolArgsObject, sanitizeVisibleText, tryParseJson, } from "../parsing/output-parsing.js";
2
- import { salvageFunctionLikeToolCall } from "../parsing/output-tool-args.js";
2
+ import { salvageFunctionLikeToolCall, salvageJsonToolCalls } from "../parsing/output-tool-args.js";
3
3
  import { buildStateSnapshot } from "./model/message-assembly.js";
4
4
  import { asRecord } from "./tool/resolved-tool.js";
5
5
  import { renderToolFailure } from "../support/harness-support.js";
@@ -12,9 +12,30 @@ function looksLikeLeakedToolCallText(value) {
12
12
  if (salvageFunctionLikeToolCall(normalized)) {
13
13
  return true;
14
14
  }
15
+ if (salvageJsonToolCalls(normalized).length > 0) {
16
+ return true;
17
+ }
15
18
  const prefixedToolCallMatch = /^(?:\s*(?:Ready|Understood|Okay|Ok|Got it|Sure|All set|What is your request|Please provide a task for me to orchestrate)[.:?!]?\s*)+([A-Za-z_][A-Za-z0-9_]*\([\s\S]*\))\s*$/u.exec(normalized);
16
19
  return !!(prefixedToolCallMatch && salvageFunctionLikeToolCall(prefixedToolCallMatch[1]));
17
20
  }
21
+ function hasIncompleteStateSnapshotPlan(stateSnapshot) {
22
+ if (typeof stateSnapshot !== "object" || stateSnapshot === null) {
23
+ return false;
24
+ }
25
+ const todos = stateSnapshot.todos;
26
+ if (!Array.isArray(todos)) {
27
+ return false;
28
+ }
29
+ return todos.some((todo) => {
30
+ if (typeof todo !== "object" || todo === null) {
31
+ return false;
32
+ }
33
+ const status = typeof todo.status === "string"
34
+ ? todo.status.trim().toLowerCase()
35
+ : "";
36
+ return status === "pending" || status === "in_progress";
37
+ });
38
+ }
18
39
  function isPlaceholderTaskCompletion(value) {
19
40
  const normalized = sanitizeVisibleText(value).trim();
20
41
  return normalized === "Task completed";
@@ -113,6 +134,62 @@ function extractLatestSuccessfulNonTodoToolResultText(executedToolResults) {
113
134
  ?? candidates.at(-1)
114
135
  ?? "";
115
136
  }
137
+ function readSerializedMessageType(value) {
138
+ if (typeof value !== "object" || value === null) {
139
+ return "";
140
+ }
141
+ const id = value.id;
142
+ if (!Array.isArray(id)) {
143
+ return "";
144
+ }
145
+ return id.map((item) => typeof item === "string" ? item : "").filter(Boolean).join(".");
146
+ }
147
+ function readToolMessageRecord(value) {
148
+ if (typeof value !== "object" || value === null) {
149
+ return null;
150
+ }
151
+ const typed = value;
152
+ const messageType = readSerializedMessageType(value);
153
+ const kwargs = typeof typed.kwargs === "object" && typed.kwargs !== null ? typed.kwargs : undefined;
154
+ const isToolMessage = typed.role === "tool"
155
+ || typed.type === "tool"
156
+ || messageType.endsWith("ToolMessage")
157
+ || kwargs?.name !== undefined && kwargs?.content !== undefined;
158
+ if (!isToolMessage) {
159
+ return null;
160
+ }
161
+ const toolNameCandidate = kwargs?.name ?? typed.name;
162
+ const toolName = typeof toolNameCandidate === "string" ? toolNameCandidate.trim() : "";
163
+ if (!toolName) {
164
+ return null;
165
+ }
166
+ const output = kwargs?.content ?? typed.content ?? "";
167
+ const status = kwargs?.status ?? typed.status;
168
+ const outputText = typeof output === "string" ? output : extractVisibleOutput(output);
169
+ return {
170
+ toolName,
171
+ output,
172
+ ...(status === "error" || looksLikeToolBlocker(outputText) ? { isError: true } : {}),
173
+ };
174
+ }
175
+ function extractUpstreamToolResults(value, seen = new Set()) {
176
+ if (typeof value !== "object" || value === null || seen.has(value)) {
177
+ return [];
178
+ }
179
+ seen.add(value);
180
+ if (Array.isArray(value)) {
181
+ return value.flatMap((item) => extractUpstreamToolResults(item, seen));
182
+ }
183
+ const direct = readToolMessageRecord(value);
184
+ if (direct) {
185
+ return [direct];
186
+ }
187
+ const typed = value;
188
+ return [
189
+ ...extractUpstreamToolResults(typed.messages, seen),
190
+ ...extractUpstreamToolResults(typed.output, seen),
191
+ ];
192
+ }
116
193
  function hasDelegationBlocker(executedToolResults) {
117
194
  return executedToolResults.some((toolResult) => {
118
195
  if (toolResult.toolName !== "task") {
@@ -236,7 +313,11 @@ export function extractToolResultFindingsText(executedToolResults) {
236
313
  return extractLatestSuccessfulNonTodoToolResultText(executedToolResults);
237
314
  }
238
315
  export function finalizeRequestResult(params) {
239
- const { bindingAgentId, sessionId, requestId, result, executedToolResults } = params;
316
+ const { bindingAgentId, sessionId, requestId, binding, result, executedToolResults } = params;
317
+ const allExecutedToolResults = [
318
+ ...executedToolResults,
319
+ ...extractUpstreamToolResults(result),
320
+ ];
240
321
  const interruptContent = Array.isArray(result.__interrupt__) && result.__interrupt__.length > 0 ? JSON.stringify(result.__interrupt__) : undefined;
241
322
  const extractedOutput = extractVisibleOutput(result);
242
323
  const visibleOutput = extractedOutput && !isLikelyToolArgsObject(tryParseJson(extractedOutput)) ? extractedOutput : "";
@@ -257,7 +338,7 @@ export function finalizeRequestResult(params) {
257
338
  && contentBlocks.length === 0
258
339
  && structuredResponse === undefined
259
340
  && !files
260
- && executedToolResults.length === 0
341
+ && allExecutedToolResults.length === 0
261
342
  && hasEmptyFinalMessage(result)
262
343
  && !hasFinalMessageToolCalls(result)) {
263
344
  throw new Error("empty_final_output");
@@ -266,20 +347,27 @@ export function finalizeRequestResult(params) {
266
347
  const output = resolveDeterministicFinalOutput({
267
348
  visibleOutput,
268
349
  toolFallback,
269
- executedToolResults,
350
+ executedToolResults: allExecutedToolResults,
270
351
  })
271
352
  || (containsLikelySkillDocument(result) ? "" : serializedResult);
272
353
  const finalMessageText = sanitizeVisibleText(output);
273
354
  const terminalStatus = structuredTerminalStatus ?? readTerminalExecutionStatus(finalMessageText);
274
355
  const stateSnapshot = buildStateSnapshot(result);
275
- const memoryCandidates = executedToolResults.flatMap((toolResult) => toolResult.memoryCandidates ?? []);
356
+ const hasIncompleteRequiredPlan = binding?.harnessRuntime?.executionContract?.requiresPlan === true
357
+ && hasIncompleteStateSnapshotPlan(stateSnapshot);
358
+ const hasTerminalToolBlocker = looksLikeToolBlocker(finalMessageText);
359
+ const memoryCandidates = allExecutedToolResults.flatMap((toolResult) => toolResult.memoryCandidates ?? []);
276
360
  return {
277
361
  sessionId,
278
362
  requestId,
279
363
  agentId: bindingAgentId,
280
364
  state: Array.isArray(result.__interrupt__) && result.__interrupt__.length > 0
281
365
  ? "waiting_for_approval"
282
- : mapTerminalStatusToRequestState(terminalStatus),
366
+ : hasIncompleteRequiredPlan
367
+ ? "failed"
368
+ : hasTerminalToolBlocker
369
+ ? "failed"
370
+ : mapTerminalStatusToRequestState(terminalStatus),
283
371
  interruptContent,
284
372
  output: finalMessageText,
285
373
  finalMessageText,
@@ -287,7 +375,7 @@ export function finalizeRequestResult(params) {
287
375
  ...(contentBlocks.length > 0 ? { contentBlocks } : {}),
288
376
  ...(structuredResponse !== undefined ? { structuredResponse } : {}),
289
377
  metadata: {
290
- ...(executedToolResults.length > 0 ? { executedToolResults } : {}),
378
+ ...(allExecutedToolResults.length > 0 ? { executedToolResults: allExecutedToolResults } : {}),
291
379
  ...(memoryCandidates.length > 0 ? { memoryCandidates } : {}),
292
380
  ...(structuredResponse !== undefined ? { structuredResponse } : {}),
293
381
  ...(terminalStatus ? { terminalStatus } : {}),
@@ -4,7 +4,8 @@ import { canReplayToolCallsLocally } from "./tool/tool-replay.js";
4
4
  import { extractToolCallsFromResult, normalizeToolArgsForSchema, stringifyToolOutput } from "./tool/tool-arguments.js";
5
5
  import { extractMemoryCandidatesFromToolOutput } from "../harness/system/runtime-memory-candidates.js";
6
6
  import { maybePersistLargeToolOutput } from "./tool/tool-output-artifacts.js";
7
- import { appendToolRecoveryInstruction, extractVisibleOutput, resolveMissingPlanRecoveryInstruction, resolveExecutionWithoutToolEvidenceTextInstruction, sanitizeVisibleText, } from "../parsing/output-parsing.js";
7
+ import { appendToolRecoveryInstruction, extractVisibleOutput, resolveMissingPlanRecoveryInstruction, resolveExecutionWithoutToolEvidenceTextInstruction, resolveToolCallRecoveryInstruction, sanitizeVisibleText, STRICT_TOOL_JSON_INSTRUCTION, } from "../parsing/output-parsing.js";
8
+ import { salvageJsonToolCalls } from "../parsing/output-tool-args.js";
8
9
  import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION } from "../prompts/runtime-prompts.js";
9
10
  const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already available in this conversation. Answer the user's current request directly from the existing context and tool results. Do not ask the user to repeat inputs that are already present above.";
10
11
  function readPlanStateSummary(output) {
@@ -43,6 +44,17 @@ function hasNonTodoToolEvidence(executedToolResults) {
43
44
  function hasPlanStateEvidence(executedToolResults) {
44
45
  return executedToolResults.some((item) => item.toolName === "write_todos" || item.toolName === "read_todos" || readPlanStateSummary(item.output) !== null);
45
46
  }
47
+ function latestToolErrorRecoveryInstruction(executedToolResults) {
48
+ const latest = executedToolResults.at(-1);
49
+ if (!latest || latest.isError !== true) {
50
+ return null;
51
+ }
52
+ const message = typeof latest.output === "string" ? latest.output : JSON.stringify(latest.output);
53
+ return resolveToolCallRecoveryInstruction(new Error(message)) ?? AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION;
54
+ }
55
+ function terminalToolErrorRecoveryInstruction(terminalText) {
56
+ return resolveToolCallRecoveryInstruction(new Error(terminalText));
57
+ }
46
58
  function requiresPlanEvidence(binding) {
47
59
  return binding.harnessRuntime.executionContract?.requiresPlan === true;
48
60
  }
@@ -83,18 +95,24 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
83
95
  if (toolCalls.length === 0) {
84
96
  const terminalText = sanitizeVisibleText(extractVisibleOutput(result) || "");
85
97
  const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults);
98
+ const shouldEnforceIncompletePlan = requiresPlanEvidence(binding) && hasIncompletePlanState;
86
99
  const hasExecutionBeyondTodoPlanning = hasNonTodoToolEvidence(executedToolResults);
87
- const recoveryInstruction = terminalText
100
+ const toolErrorRecoveryInstruction = latestToolErrorRecoveryInstruction(executedToolResults)
101
+ ?? terminalToolErrorRecoveryInstruction(terminalText);
102
+ const leakedJsonToolCallRecoveryInstruction = terminalText && salvageJsonToolCalls(terminalText).length > 0
103
+ ? STRICT_TOOL_JSON_INSTRUCTION
104
+ : null;
105
+ const recoveryInstruction = toolErrorRecoveryInstruction ?? leakedJsonToolCallRecoveryInstruction ?? (terminalText
88
106
  ? resolveExecutionWithoutToolEvidenceTextInstruction(activeRequest, terminalText, false, {
89
107
  hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName === "write_todos"),
90
108
  hasToolResultEvidence: hasExecutionBeyondTodoPlanning,
91
109
  hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults),
92
- hasIncompletePlanState: hasExecutionBeyondTodoPlanning && hasIncompletePlanState,
110
+ hasIncompletePlanState: shouldEnforceIncompletePlan,
93
111
  requiresPlan: requiresPlanEvidence(binding),
94
112
  })
95
- : hasIncompletePlanState && hasExecutionBeyondTodoPlanning
113
+ : shouldEnforceIncompletePlan
96
114
  ? AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION
97
- : null;
115
+ : null);
98
116
  if (recoveryInstruction) {
99
117
  if (iteration + 1 === maxToolIterations) {
100
118
  throw new Error(`Tool-calling loop exceeded the maximum of ${maxToolIterations} iterations`);
@@ -12,6 +12,32 @@ import { materializeDeepAgentSkillSourcePaths } from "./compat/deepagent-compat.
12
12
  import { DEFAULT_SUBAGENT_PROMPT, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, } from "../prompts/runtime-prompts.js";
13
13
  import { createContextHygieneMiddleware } from "./middleware/context-hygiene.js";
14
14
  const INVALID_TOOL_MESSAGE_BLOCK_TYPES = new Set(["tool_use", "thinking", "redacted_thinking"]);
15
+ const DEFAULT_BUILTIN_TASK_TIMEOUT_MS = 180_000;
16
+ function resolveBuiltinTaskTimeoutMs(model) {
17
+ const timeout = model?.init?.timeout;
18
+ return typeof timeout === "number" && Number.isFinite(timeout) && timeout > 0
19
+ ? Math.floor(timeout)
20
+ : DEFAULT_BUILTIN_TASK_TIMEOUT_MS;
21
+ }
22
+ async function withBuiltinTaskTimeout(producer, timeoutMs, subagentName) {
23
+ let timeoutHandle;
24
+ try {
25
+ return await Promise.race([
26
+ producer(),
27
+ new Promise((_, reject) => {
28
+ timeoutHandle = setTimeout(() => {
29
+ reject(new Error(`Delegated agent ${subagentName} timed out after ${timeoutMs}ms.`));
30
+ }, timeoutMs);
31
+ timeoutHandle.unref?.();
32
+ }),
33
+ ]);
34
+ }
35
+ finally {
36
+ if (timeoutHandle) {
37
+ clearTimeout(timeoutHandle);
38
+ }
39
+ }
40
+ }
15
41
  function extractDeepAgentTaskContent(result) {
16
42
  if (typeof result !== "object" || result === null) {
17
43
  return undefined;
@@ -369,9 +395,10 @@ export async function invokeBuiltinTaskTool(input) {
369
395
  configurable: { [UPSTREAM_SESSION_CONFIG_KEY]: `${input.binding.agent.id}:builtin-task` },
370
396
  ...(input.options?.context ? { context: input.options.context } : {}),
371
397
  };
372
- const invokeSubagent = (content) => runnable.invoke({
398
+ const taskTimeoutMs = resolveBuiltinTaskTimeoutMs(selectedCompiledSubagent?.model ?? primaryModel);
399
+ const invokeSubagent = (content) => withBuiltinTaskTimeout(() => runnable.invoke({
373
400
  messages: [new HumanMessage({ content })],
374
- }, invokeConfig);
401
+ }, invokeConfig), taskTimeoutMs, selectedSubagent.name);
375
402
  let result = await invokeSubagent(description);
376
403
  if (!hasSubagentExecutionToolEvidence(result, resolvedSubagentTools, selectedCompiledSubagent?.tools)) {
377
404
  result = await invokeSubagent([description, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"));
@@ -8,5 +8,6 @@ export type ProviderRetryPolicy = {
8
8
  backoffMs: number;
9
9
  retryableMessages: string[];
10
10
  };
11
+ export declare function isEmptyFinalAiMessageError(error: unknown): boolean;
11
12
  export declare function resolveProviderRetryPolicy(binding: CompiledAgentBinding): ProviderRetryPolicy;
12
13
  export declare function isRetryableProviderError(binding: CompiledAgentBinding, error: unknown): boolean;
@@ -31,13 +31,14 @@ export function resolveStreamIdleTimeout(binding) {
31
31
  return 60_000;
32
32
  }
33
33
  const BUILTIN_RETRYABLE_PROVIDER_MESSAGES = [
34
+ "eof",
34
35
  "unexpected eof",
35
36
  "other side closed",
36
37
  "socket hang up",
37
38
  "connection reset",
38
39
  "econnreset",
39
40
  ];
40
- function isEmptyFinalAiMessageError(error) {
41
+ export function isEmptyFinalAiMessageError(error) {
41
42
  const message = error instanceof Error ? error.message : String(error);
42
43
  return message.toLowerCase().startsWith("empty_final_ai_message:");
43
44
  }
@@ -11,10 +11,10 @@ function normalizeTerminalStatus(value) {
11
11
  function readStatusLine(value) {
12
12
  for (const line of value.split("\n")) {
13
13
  const [key, ...rest] = line.split(":");
14
- if (key?.trim().toLowerCase() !== "status") {
14
+ if (key?.trim().replaceAll("*", "").toLowerCase() !== "status") {
15
15
  continue;
16
16
  }
17
- const statusValue = rest.join(":").trim().split(/\s+/)[0];
17
+ const statusValue = rest.join(":").trim().replaceAll("*", "").split(/\s+/)[0];
18
18
  const status = normalizeTerminalStatus(statusValue);
19
19
  if (status) {
20
20
  return status;