@botbotgo/agent-harness 0.0.418 → 0.0.420

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/dist/cli/chat-interactive.js +1 -1
  2. package/dist/cli/chat-stream.js +9 -1
  3. package/dist/package-version.d.ts +2 -2
  4. package/dist/package-version.js +2 -2
  5. package/dist/runtime/adapter/compat/openai-compatible.js +12 -0
  6. package/dist/runtime/adapter/flow/invocation-flow.d.ts +2 -0
  7. package/dist/runtime/adapter/flow/invocation-flow.js +13 -5
  8. package/dist/runtime/adapter/flow/invoke-runtime.d.ts +1 -0
  9. package/dist/runtime/adapter/flow/invoke-runtime.js +1 -0
  10. package/dist/runtime/adapter/flow/stream-runtime.d.ts +4 -0
  11. package/dist/runtime/adapter/flow/stream-runtime.js +177 -14
  12. package/dist/runtime/adapter/invocation-result.js +17 -6
  13. package/dist/runtime/adapter/local-tool-invocation.d.ts +2 -1
  14. package/dist/runtime/adapter/local-tool-invocation.js +268 -21
  15. package/dist/runtime/adapter/model/model-providers.js +269 -58
  16. package/dist/runtime/adapter/model/prompted-json-tool-call-capture.d.ts +9 -0
  17. package/dist/runtime/adapter/model/prompted-json-tool-call-capture.js +40 -0
  18. package/dist/runtime/adapter/runtime-adapter-support.js +58 -12
  19. package/dist/runtime/adapter/runtime-shell.js +3 -2
  20. package/dist/runtime/adapter/stream-event-projection.js +22 -5
  21. package/dist/runtime/adapter/tool/tool-arguments.js +157 -67
  22. package/dist/runtime/adapter/tool/tool-replay.js +0 -4
  23. package/dist/runtime/agent-runtime-adapter.d.ts +3 -0
  24. package/dist/runtime/agent-runtime-adapter.js +217 -73
  25. package/dist/runtime/harness/run/stream-run.js +31 -3
  26. package/dist/runtime/parsing/output-tool-args.js +108 -0
  27. package/dist/workspace/resource-compilers.js +17 -4
  28. package/package.json +1 -1
@@ -1,6 +1,7 @@
1
1
  import path from "node:path";
2
+ import { createHash } from "node:crypto";
2
3
  import { createAsyncSubAgentMiddleware, createFilesystemMiddleware, createMemoryMiddleware, createPatchToolCallsMiddleware, createSkillsMiddleware, createSummarizationMiddleware, createSubAgentMiddleware, FilesystemBackend, StateBackend, } from "deepagents";
3
- import { AIMessage, createAgent, createMiddleware, humanInTheLoopMiddleware, todoListMiddleware, ToolMessage } from "langchain";
4
+ import { createAgent, createMiddleware, humanInTheLoopMiddleware, todoListMiddleware, ToolMessage } from "langchain";
4
5
  import { sanitizeVisibleText, tryParseJson, wrapResolvedModel, } from "./parsing/output-parsing.js";
5
6
  import { salvageJsonToolCalls } from "./parsing/output-tool-args.js";
6
7
  import { extractMessageText } from "../utils/message-content.js";
@@ -17,6 +18,7 @@ import { isEmptyFinalAiMessageError, resolveBindingTimeout, resolveStreamIdleTim
17
18
  import { createResolvedModel } from "./adapter/model/model-providers.js";
18
19
  import { renderDirectWorkspaceListing, shouldDirectlyListWorkspaceFiles } from "./adapter/direct-builtin-utility.js";
19
20
  import { appendProviderToolCallAliasTools, resolveAdapterTools } from "./adapter/tool-resolution.js";
21
+ import { normalizeToolArgsForSchema } from "./adapter/tool/tool-arguments.js";
20
22
  import { resolveRuntimeStreamExecutionContext, } from "./adapter/flow/execution-context.js";
21
23
  import { isRetryableProviderError } from "./adapter/resilience.js";
22
24
  import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "./adapter/upstream-configurable-keys.js";
@@ -73,7 +75,9 @@ function isPlanToolName(toolName) {
73
75
  return normalized === "write_todos"
74
76
  || normalized === "read_todos"
75
77
  || normalized === "tool_call_write_todos"
76
- || normalized === "tool_call_read_todos";
78
+ || normalized === "tool_call_read_todos"
79
+ || normalized === "call_write_todos"
80
+ || normalized === "call_read_todos";
77
81
  }
78
82
  function readConfiguredToolName(value) {
79
83
  if (typeof value !== "object" || value === null) {
@@ -82,26 +86,6 @@ function readConfiguredToolName(value) {
82
86
  const typed = value;
83
87
  return typeof typed.name === "string" ? typed.name.trim() : "";
84
88
  }
85
- function createBootstrapTodoPlan(toolNames) {
86
- const evidenceToolName = toolNames.find((toolName) => !isPlanToolName(toolName));
87
- const contents = evidenceToolName
88
- ? [
89
- `Run ${evidenceToolName} for the requested evidence`,
90
- `Inspect the ${evidenceToolName} result and extract concrete findings`,
91
- "Update TODO status from the observed evidence",
92
- "Return the final answer grounded in tool output",
93
- ]
94
- : [
95
- "Identify the concrete evidence needed for this request",
96
- "Collect and inspect the available evidence",
97
- "Update TODO status from the observed evidence",
98
- "Return the final answer grounded in evidence",
99
- ];
100
- return contents.map((content, index) => ({
101
- content,
102
- status: index === 0 ? "in_progress" : "pending",
103
- }));
104
- }
105
89
  function readMessageContentText(message) {
106
90
  if (typeof message !== "object" || message === null) {
107
91
  return "";
@@ -120,6 +104,12 @@ function readMessageContentText(message) {
120
104
  .join("")
121
105
  .trim();
122
106
  }
107
+ function hasExternalPlanEvidenceInstruction(messages) {
108
+ return messages.some((message) => {
109
+ const text = readMessageContentText(message);
110
+ return /todo board already exists|required todo board already exists|non[-\s]?planning tool call/iu.test(text);
111
+ });
112
+ }
123
113
  function parseToolCallArgs(value) {
124
114
  if (typeof value === "object" && value !== null && !Array.isArray(value)) {
125
115
  return value;
@@ -182,7 +172,49 @@ function todoToolCallIsTerminal(toolCall) {
182
172
  return status !== "pending" && status !== "in_progress";
183
173
  });
184
174
  }
175
+ function readToolResultName(message) {
176
+ if (typeof message !== "object" || message === null) {
177
+ return "";
178
+ }
179
+ const typed = message;
180
+ const messageType = typeof typed.type === "string"
181
+ ? typed.type
182
+ : typeof typed._getType === "function"
183
+ ? String(typed._getType())
184
+ : "";
185
+ if (messageType !== "tool" && typeof typed.tool_call_id !== "string") {
186
+ return "";
187
+ }
188
+ return typeof typed.name === "string" ? typed.name : "";
189
+ }
190
+ function createPlanGuardToolError(toolCallId, content, fallbackPrefix) {
191
+ return new ToolMessage({
192
+ content,
193
+ tool_call_id: typeof toolCallId === "string" ? toolCallId : `${fallbackPrefix}-${Math.random().toString(36).slice(2, 10)}`,
194
+ status: "error",
195
+ });
196
+ }
197
+ function isPromiseLike(value) {
198
+ return typeof value === "object" && value !== null && typeof value.then === "function";
199
+ }
200
+ function renderToolInvocationError(error) {
201
+ const message = error instanceof Error ? error.message : String(error);
202
+ const cause = typeof error === "object" && error !== null && "cause" in error
203
+ ? error.cause
204
+ : undefined;
205
+ const causeMessage = cause instanceof Error ? cause.message : "";
206
+ return [message, causeMessage]
207
+ .map((value) => value.trim())
208
+ .filter(Boolean)
209
+ .join("\n");
210
+ }
211
+ function createToolInvocationErrorMessage(toolCallId, toolName, error) {
212
+ const rendered = renderToolInvocationError(error);
213
+ return createPlanGuardToolError(toolCallId, `Error invoking tool '${toolName}' with error: ${rendered || "tool invocation failed"}`, "tool-invocation-error");
214
+ }
185
215
  function createTodoPlanGuardMiddleware(options = {}) {
216
+ let observedPlanToolResult = false;
217
+ let observedNonPlanToolResult = false;
186
218
  return createMiddleware({
187
219
  name: "harnessTodoPlanGuard",
188
220
  wrapToolCall: ((request, handler) => {
@@ -192,34 +224,63 @@ function createTodoPlanGuardMiddleware(options = {}) {
192
224
  ? request.tool.name
193
225
  : "";
194
226
  const messages = Array.isArray(request.state?.messages) ? request.state.messages : [];
195
- const hasNonPlanToolResult = messages.some((message) => {
196
- if (typeof message !== "object" || message === null) {
197
- return false;
198
- }
199
- const typed = message;
200
- const messageType = typeof typed.type === "string"
201
- ? typed.type
202
- : typeof typed._getType === "function"
203
- ? String(typed._getType())
204
- : "";
205
- if (messageType !== "tool" && typeof typed.tool_call_id !== "string") {
206
- return false;
207
- }
208
- const resultToolName = typeof typed.name === "string" ? typed.name : "";
209
- return resultToolName.length > 0 && !isPlanToolName(resultToolName);
210
- });
227
+ const toolResultNames = messages.map(readToolResultName).filter((name) => name.length > 0);
228
+ const hasPlanToolResult = toolResultNames.some(isPlanToolName);
229
+ const hasNonPlanToolResult = toolResultNames.some((resultToolName) => !isPlanToolName(resultToolName));
230
+ const hasExternalPlanEvidence = hasExternalPlanEvidenceInstruction(messages);
231
+ if (options.requiresPlan === true
232
+ && !(observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
233
+ && toolName.length > 0
234
+ && !isPlanToolName(toolName)) {
235
+ return createPlanGuardToolError(request.toolCall?.id, "Error: this agent has a required visible planning contract. Your first action must be write_todos with concrete task steps and statuses. Do not call non-planning tools before the initial todo board exists.", "todo-initial-plan-guard");
236
+ }
237
+ if (options.requiresPlan === true
238
+ && (observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
239
+ && !(observedNonPlanToolResult || hasNonPlanToolResult)
240
+ && isPlanToolName(toolName)) {
241
+ return createPlanGuardToolError(request.toolCall?.id, "Error: the required todo board already exists, but no non-planning evidence tool has returned yet. Do not call write_todos or read_todos again. Your next action must be exactly one non-planning tool call selected from the available tool descriptions and schemas.", "todo-plan-evidence-guard");
242
+ }
211
243
  if (options.requiresPlan === true
212
- && !hasNonPlanToolResult
244
+ && !(observedNonPlanToolResult || hasNonPlanToolResult)
213
245
  && isPlanToolName(toolName)
214
246
  && normalizePlanToolName(toolName).includes("write_todos")
215
247
  && todoToolCallIsTerminal({ args: parseToolCallArgs(request.toolCall?.args) })) {
216
- return new ToolMessage({
217
- content: "Error: write_todos cannot mark every todo as terminal before any non-planning evidence tool returns. Keep one todo in_progress and the remaining todos pending until evidence tools return.",
218
- tool_call_id: typeof request.toolCall?.id === "string" ? request.toolCall.id : `write-todos-tool-guard-${Math.random().toString(36).slice(2, 10)}`,
219
- status: "error",
220
- });
248
+ return createPlanGuardToolError(request.toolCall?.id, "Error: write_todos cannot mark every todo as terminal before any non-planning evidence tool returns. Keep one todo in_progress and the remaining todos pending until evidence tools return.", "write-todos-tool-guard");
249
+ }
250
+ const markObservedToolResult = () => {
251
+ if (isPlanToolName(toolName)) {
252
+ observedPlanToolResult = true;
253
+ }
254
+ else if (toolName.length > 0) {
255
+ observedNonPlanToolResult = true;
256
+ }
257
+ };
258
+ const normalizedArgs = normalizeToolArgsForSchema(parseToolCallArgs(request.toolCall?.args), request.tool?.schema, request.toolCall?.args);
259
+ const normalizedRequest = {
260
+ ...request,
261
+ toolCall: request.toolCall
262
+ ? {
263
+ ...request.toolCall,
264
+ args: normalizedArgs,
265
+ }
266
+ : request.toolCall,
267
+ };
268
+ try {
269
+ const result = handler(normalizedRequest);
270
+ if (isPromiseLike(result)) {
271
+ return result
272
+ .then((value) => {
273
+ markObservedToolResult();
274
+ return value;
275
+ })
276
+ .catch((error) => createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error));
277
+ }
278
+ markObservedToolResult();
279
+ return result;
280
+ }
281
+ catch (error) {
282
+ return createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error);
221
283
  }
222
- return handler(request);
223
284
  }),
224
285
  afterModel: (state) => {
225
286
  if (!Array.isArray(state.messages) || state.messages.length === 0) {
@@ -246,22 +307,8 @@ function createTodoPlanGuardMiddleware(options = {}) {
246
307
  }
247
308
  const lastAiMessage = [...state.messages].reverse().find((message) => readMessageToolCalls(message).length > 0);
248
309
  const lastToolCalls = readMessageToolCalls(lastAiMessage);
249
- if (!lastAiMessage && options.requiresPlan === true) {
250
- const latestMessage = state.messages.at(-1);
251
- const hasVisibleContent = readMessageContentText(latestMessage).length > 0;
252
- if (!hasVisibleContent) {
253
- return {
254
- messages: [new AIMessage({
255
- content: "",
256
- tool_calls: [{
257
- id: `write-todos-bootstrap-${Math.random().toString(36).slice(2, 10)}`,
258
- name: "write_todos",
259
- args: { todos: createBootstrapTodoPlan(options.toolNames ?? []) },
260
- type: "tool_call",
261
- }],
262
- })],
263
- };
264
- }
310
+ if (!lastAiMessage) {
311
+ return;
265
312
  }
266
313
  const writeTodosCalls = lastToolCalls.filter((toolCall) => isPlanToolName(toolCall.name));
267
314
  const prematureCompletedCalls = writeTodosCalls.filter(todoToolCallIsTerminal);
@@ -442,6 +489,61 @@ function hasDelegatedPlanEvidence(result) {
442
489
  return Array.isArray(toolResults)
443
490
  && toolResults.some((item) => isPlanToolName(item.toolName));
444
491
  }
492
+ function readUpstreamToolEvidence(event) {
493
+ if (typeof event !== "object" || event === null) {
494
+ return null;
495
+ }
496
+ const typed = event;
497
+ const eventName = typeof typed.event === "string" ? typed.event : "";
498
+ const runType = typeof typed.run_type === "string" ? typed.run_type : "";
499
+ const toolName = typeof typed.name === "string" ? typed.name : "";
500
+ if (!toolName) {
501
+ return null;
502
+ }
503
+ const isToolStart = eventName === "on_tool_start" || (eventName === "on_chain_start" && runType === "tool");
504
+ if (isToolStart && isPlanToolName(toolName)) {
505
+ return { toolName, output: typed.data?.input };
506
+ }
507
+ const isToolEnd = eventName === "on_tool_end" || (eventName === "on_chain_end" && runType === "tool");
508
+ if (isToolEnd) {
509
+ return { toolName, output: typed.data?.output };
510
+ }
511
+ const isToolError = eventName === "on_tool_error";
512
+ if (isToolError) {
513
+ return { toolName, output: typed.data?.error ?? typed.data?.output, isError: true };
514
+ }
515
+ return null;
516
+ }
517
+ function appendUniqueToolEvidence(executedToolResults, evidence) {
518
+ const exists = executedToolResults.some((item) => item.toolName === evidence.toolName
519
+ && item.isError === evidence.isError
520
+ && JSON.stringify(item.output) === JSON.stringify(evidence.output));
521
+ if (!exists) {
522
+ executedToolResults.push(evidence);
523
+ }
524
+ }
525
+ function mergeDelegatedResultToolEvidence(result, previous) {
526
+ const merged = [];
527
+ for (const source of [previous, result]) {
528
+ const toolResults = Array.isArray(source.metadata?.executedToolResults)
529
+ ? source.metadata.executedToolResults
530
+ : [];
531
+ for (const toolResult of toolResults) {
532
+ if (typeof toolResult === "object"
533
+ && toolResult !== null
534
+ && typeof toolResult.toolName === "string") {
535
+ appendUniqueToolEvidence(merged, toolResult);
536
+ }
537
+ }
538
+ }
539
+ return {
540
+ ...result,
541
+ metadata: {
542
+ ...(result.metadata ?? {}),
543
+ executedToolResults: merged,
544
+ },
545
+ };
546
+ }
445
547
  const DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION = [
446
548
  "The delegated task requires visible TODO planning evidence.",
447
549
  "Before any other tool call or final answer, call write_todos with concrete task steps and statuses.",
@@ -966,14 +1068,8 @@ export class AgentRuntimeAdapter {
966
1068
  const inlineSubagents = input.resolvedSubagents.filter((subagent) => !("graphId" in subagent));
967
1069
  const asyncSubagents = input.resolvedSubagents.filter((subagent) => "graphId" in subagent);
968
1070
  const subagents = inlineSubagents;
969
- const requiresPlan = binding.harnessRuntime.executionContract?.requiresPlan === true;
970
- const resolvedToolNames = input.resolvedTools.map(readConfiguredToolName).filter((name) => name.length > 0);
971
1071
  const middleware = [
972
1072
  ...(builtinTools.todos === false ? [] : [todoListMiddleware()]),
973
- ...(builtinTools.todos === false ? [] : [createTodoPlanGuardMiddleware({
974
- requiresPlan,
975
- toolNames: resolvedToolNames,
976
- })]),
977
1073
  ...(input.resolvedSkills.length > 0 ? [createSkillsMiddleware({
978
1074
  backend,
979
1075
  sources: resolveDeepAgentSkillSourceRootPaths({
@@ -1029,7 +1125,22 @@ export class AgentRuntimeAdapter {
1029
1125
  ? filesystemConfig.sessionStorage
1030
1126
  : undefined;
1031
1127
  const sessionScoped = sessionStorage?.enabled === true;
1032
- return `${binding.agent.sourcePath}::${sessionScoped ? (sessionId ?? "__default__") : "__binding__"}`;
1128
+ const executionParams = getBindingExecutionParams(binding);
1129
+ const primaryModel = getBindingPrimaryModel(binding);
1130
+ const runnableFingerprint = createHash("sha256").update(JSON.stringify({
1131
+ executionKind: getBindingExecutionKind(binding),
1132
+ systemPrompt: getBindingSystemPrompt(binding) ?? "",
1133
+ responseFormat: executionParams && "responseFormat" in executionParams ? executionParams.responseFormat : undefined,
1134
+ model: primaryModel
1135
+ ? {
1136
+ id: primaryModel.id,
1137
+ provider: primaryModel.provider,
1138
+ model: primaryModel.model,
1139
+ }
1140
+ : undefined,
1141
+ tools: getBindingPrimaryTools(binding).map((tool) => tool.name).filter(Boolean).sort(),
1142
+ })).digest("hex").slice(0, 16);
1143
+ return `${binding.agent.sourcePath}::${sessionScoped ? (sessionId ?? "__default__") : "__binding__"}::${runnableFingerprint}`;
1033
1144
  }
1034
1145
  async create(binding, options = {}) {
1035
1146
  const cacheKey = this.buildRunnableCacheKey(binding, options.sessionId ?? options.legacySessionId);
@@ -1280,11 +1391,12 @@ export class AgentRuntimeAdapter {
1280
1391
  if (!selectedBinding) {
1281
1392
  return null;
1282
1393
  }
1283
- const runDelegatedRequest = (text, requestSuffix = "") => this.invoke(selectedBinding, text, sessionId, `${requestId}:${subagentType}${requestSuffix}`, undefined, [], {
1394
+ const runDelegatedRequest = (text, requestSuffix = "", delegatedOptions = {}) => this.invoke(selectedBinding, text, sessionId, `${requestId}:${subagentType}${requestSuffix}`, undefined, [], {
1284
1395
  context: options.context,
1285
1396
  state: options.state,
1286
1397
  files: options.files,
1287
1398
  memoryContext: options.memoryContext,
1399
+ ...delegatedOptions,
1288
1400
  });
1289
1401
  let delegatedResult;
1290
1402
  try {
@@ -1349,7 +1461,12 @@ export class AgentRuntimeAdapter {
1349
1461
  }
1350
1462
  if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
1351
1463
  try {
1352
- delegatedResult = await runDelegatedRequest([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry");
1464
+ delegatedResult = await runDelegatedRequest([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry", selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
1465
+ ? {
1466
+ suppressInitialRequiredPlanInstruction: true,
1467
+ externalPlanEvidence: hasDelegatedPlanEvidence(delegatedResult),
1468
+ }
1469
+ : {});
1353
1470
  }
1354
1471
  catch (error) {
1355
1472
  const output = error instanceof Error ? error.message : String(error);
@@ -1648,12 +1765,18 @@ export class AgentRuntimeAdapter {
1648
1765
  continue;
1649
1766
  }
1650
1767
  if (chunk.kind === "tool-result") {
1651
- executedToolResults.push({
1768
+ appendUniqueToolEvidence(executedToolResults, {
1652
1769
  toolName: chunk.toolName,
1653
1770
  output: chunk.output,
1654
1771
  ...(chunk.isError !== undefined ? { isError: chunk.isError } : {}),
1655
1772
  });
1656
1773
  }
1774
+ if (chunk.kind === "upstream-event") {
1775
+ const streamedEvidence = readUpstreamToolEvidence(chunk.event);
1776
+ if (streamedEvidence) {
1777
+ appendUniqueToolEvidence(executedToolResults, streamedEvidence);
1778
+ }
1779
+ }
1657
1780
  yield { ...chunk, agentId: chunk.agentId ?? selectedBinding.agent.id };
1658
1781
  }
1659
1782
  }
@@ -1683,10 +1806,12 @@ export class AgentRuntimeAdapter {
1683
1806
  const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(selectedBinding).length > 0;
1684
1807
  if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
1685
1808
  && !hasDelegatedPlanEvidence(delegatedResult)) {
1686
- delegatedResult = yield* runDelegatedStreamAttempt([requestText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry");
1809
+ const previousDelegatedResult = delegatedResult;
1810
+ delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([requestText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry"), previousDelegatedResult);
1687
1811
  }
1688
1812
  if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
1689
- delegatedResult = yield* runDelegatedStreamAttempt([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry");
1813
+ const previousDelegatedResult = delegatedResult;
1814
+ delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
1690
1815
  }
1691
1816
  if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
1692
1817
  && !hasDelegatedPlanEvidence(delegatedResult)) {
@@ -1707,6 +1832,25 @@ export class AgentRuntimeAdapter {
1707
1832
  finalMessageText: output,
1708
1833
  };
1709
1834
  }
1835
+ const delegatedToolResults = Array.isArray(delegatedResult.metadata?.executedToolResults)
1836
+ ? delegatedResult.metadata.executedToolResults
1837
+ : [];
1838
+ for (const toolResult of delegatedToolResults) {
1839
+ const toolName = typeof toolResult.toolName === "string" ? toolResult.toolName : "";
1840
+ if (!toolName || isPlanToolName(toolName)) {
1841
+ continue;
1842
+ }
1843
+ yield {
1844
+ kind: "commentary",
1845
+ content: `Running tool ${toolName}.`,
1846
+ agentId: selectedBinding.agent.id,
1847
+ };
1848
+ yield {
1849
+ kind: "commentary",
1850
+ content: `Tool ${toolName} ${toolResult.isError === true ? "failed" : "completed"}.`,
1851
+ agentId: selectedBinding.agent.id,
1852
+ };
1853
+ }
1710
1854
  return {
1711
1855
  toolOutput: resolveDelegatedResultOutput(delegatedResult),
1712
1856
  delegatedSubagentType: subagentType,
@@ -62,6 +62,12 @@ function inferPlanItemStatusFromTerminalAssistantOutput(value) {
62
62
  }
63
63
  return isSubstantiveTerminalAssistantOutput(value) ? "completed" : null;
64
64
  }
65
+ function mapTerminalStatusToObservedPlanItemStatus(status, sawSuccessfulToolResult) {
66
+ if (status === "blocked" && sawSuccessfulToolResult) {
67
+ return "completed";
68
+ }
69
+ return mapTerminalStatusToPlanItemStatus(status);
70
+ }
65
71
  function reconcilePlanStateToTerminalStatus(planState, status, updatedAt) {
66
72
  const items = planState.items.map((item) => ({
67
73
  ...item,
@@ -545,6 +551,17 @@ function createProfileStepCommentary(step) {
545
551
  if (step.kind === "agent" && step.action === "startup") {
546
552
  return `Preparing ${name}.`;
547
553
  }
554
+ if (step.kind === "tool") {
555
+ if (step.status === "started") {
556
+ return `Running tool ${name}.`;
557
+ }
558
+ if (step.status === "completed") {
559
+ return `Tool ${name} completed.`;
560
+ }
561
+ if (step.status === "failed") {
562
+ return `Tool ${name} failed.`;
563
+ }
564
+ }
548
565
  return null;
549
566
  }
550
567
  function isOpenAICompatibleStreamingCompatibilityError(binding, error) {
@@ -1016,7 +1033,7 @@ export async function* streamHarnessRun(options) {
1016
1033
  currentPlanState = await refreshPlanStateFromPersistence(options, currentPlanState);
1017
1034
  const terminalStructuredStatus = readTerminalExecutionStatus(actual.structuredResponse);
1018
1035
  if (terminalStructuredStatus && currentPlanState && planStateHasActiveItems(currentPlanState)) {
1019
- const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, mapTerminalStatusToPlanItemStatus(terminalStructuredStatus), new Date().toISOString());
1036
+ const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, mapTerminalStatusToObservedPlanItemStatus(terminalStructuredStatus, sawSuccessfulToolResult), new Date().toISOString());
1020
1037
  const signature = buildPlanStateSignature(reconciledPlanState);
1021
1038
  if (signature !== lastPlanStateSignature) {
1022
1039
  const previousPlanState = currentPlanState;
@@ -1040,7 +1057,18 @@ export async function* streamHarnessRun(options) {
1040
1057
  }
1041
1058
  }
1042
1059
  currentPlanState = await refreshPlanStateFromPersistence(options, currentPlanState);
1043
- const terminalAssistantPlanItemStatus = inferPlanItemStatusFromTerminalAssistantOutput(assistantOutput);
1060
+ const explicitTerminalAssistantStatus = readTerminalExecutionStatus(assistantOutput);
1061
+ let terminalAssistantPlanItemStatus = inferPlanItemStatusFromTerminalAssistantOutput(assistantOutput);
1062
+ if (explicitTerminalAssistantStatus === "blocked" && sawSuccessfulToolResult) {
1063
+ terminalAssistantPlanItemStatus = "completed";
1064
+ }
1065
+ if (terminalAssistantPlanItemStatus === "failed"
1066
+ && sawSuccessfulToolResult
1067
+ && !explicitTerminalAssistantStatus
1068
+ && !/^\s*terminated\b|\bBlockers?:\b|(?:委托执行失败|未能完成|无法完成)/iu.test(assistantOutput)
1069
+ && assistantOutput.trim()) {
1070
+ terminalAssistantPlanItemStatus = "completed";
1071
+ }
1044
1072
  if (terminalAssistantPlanItemStatus && currentPlanState && planStateHasActiveItems(currentPlanState)) {
1045
1073
  const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, terminalAssistantPlanItemStatus, new Date().toISOString());
1046
1074
  const signature = buildPlanStateSignature(reconciledPlanState);
@@ -1116,7 +1144,7 @@ export async function* streamHarnessRun(options) {
1116
1144
  const canUseDeterministicToolEvidenceOutput = !currentPlanState || !planStateHasActiveItems(currentPlanState) || Boolean(terminalStructuredStatus);
1117
1145
  if (!assistantOutput && sawSuccessfulToolResult && deterministicToolEvidenceOutput && canUseDeterministicToolEvidenceOutput) {
1118
1146
  if (terminalStructuredStatus && currentPlanState && planStateHasActiveItems(currentPlanState)) {
1119
- const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, mapTerminalStatusToPlanItemStatus(terminalStructuredStatus), new Date().toISOString());
1147
+ const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, mapTerminalStatusToObservedPlanItemStatus(terminalStructuredStatus, sawSuccessfulToolResult), new Date().toISOString());
1120
1148
  const signature = buildPlanStateSignature(reconciledPlanState);
1121
1149
  if (signature !== lastPlanStateSignature) {
1122
1150
  const previousPlanState = currentPlanState;
@@ -309,6 +309,89 @@ function normalizePythonLikeJson(value) {
309
309
  }
310
310
  return output;
311
311
  }
312
+ function repairMissingArrayObjectOpenBraces(value) {
313
+ let output = "";
314
+ let changed = false;
315
+ let inString = false;
316
+ let escaping = false;
317
+ const stack = [];
318
+ for (let index = 0; index < value.length; index += 1) {
319
+ const char = value[index];
320
+ if (inString) {
321
+ output += char;
322
+ if (escaping) {
323
+ escaping = false;
324
+ continue;
325
+ }
326
+ if (char === "\\") {
327
+ escaping = true;
328
+ continue;
329
+ }
330
+ if (char === "\"") {
331
+ inString = false;
332
+ }
333
+ continue;
334
+ }
335
+ if (char === "\"") {
336
+ output += char;
337
+ inString = true;
338
+ continue;
339
+ }
340
+ if (char === "{" || char === "[") {
341
+ stack.push(char);
342
+ output += char;
343
+ continue;
344
+ }
345
+ if (char === "}" || char === "]") {
346
+ const expectedOpen = char === "}" ? "{" : "[";
347
+ if (stack.at(-1) === expectedOpen) {
348
+ stack.pop();
349
+ }
350
+ output += char;
351
+ continue;
352
+ }
353
+ if (char !== "," || stack.at(-1) !== "[") {
354
+ output += char;
355
+ continue;
356
+ }
357
+ output += char;
358
+ let lookahead = index + 1;
359
+ while (lookahead < value.length && /\s/u.test(value[lookahead] ?? "")) {
360
+ output += value[lookahead];
361
+ lookahead += 1;
362
+ }
363
+ if (value[lookahead] !== "\"") {
364
+ index = lookahead - 1;
365
+ continue;
366
+ }
367
+ let cursor = lookahead + 1;
368
+ let keyEscaping = false;
369
+ while (cursor < value.length) {
370
+ const next = value[cursor];
371
+ if (keyEscaping) {
372
+ keyEscaping = false;
373
+ }
374
+ else if (next === "\\") {
375
+ keyEscaping = true;
376
+ }
377
+ else if (next === "\"") {
378
+ break;
379
+ }
380
+ cursor += 1;
381
+ }
382
+ let colonCursor = cursor + 1;
383
+ while (colonCursor < value.length && /\s/u.test(value[colonCursor] ?? "")) {
384
+ colonCursor += 1;
385
+ }
386
+ if (value[colonCursor] === ":") {
387
+ output += "{";
388
+ stack.push("{");
389
+ changed = true;
390
+ }
391
+ index = lookahead - 1;
392
+ }
393
+ return changed ? output : null;
394
+ }
312
395
  export function salvageToolArgs(value) {
313
396
  if (typeof value === "object" && value && !Array.isArray(value)) {
314
397
  return value;
@@ -359,6 +442,13 @@ export function salvageJsonToolCalls(value) {
359
442
  if (direct) {
360
443
  return direct;
361
444
  }
445
+ const repairedArrayObjects = repairMissingArrayObjectOpenBraces(trimmed);
446
+ if (repairedArrayObjects) {
447
+ const parsed = tryParseJson(repairedArrayObjects);
448
+ if (parsed) {
449
+ return parsed;
450
+ }
451
+ }
362
452
  const pythonLike = normalizePythonLikeJson(trimmed);
363
453
  if (pythonLike) {
364
454
  const parsed = tryParseJson(pythonLike);
@@ -366,6 +456,15 @@ export function salvageJsonToolCalls(value) {
366
456
  return parsed;
367
457
  }
368
458
  }
459
+ if (pythonLike) {
460
+ const repairedPythonLike = repairMissingArrayObjectOpenBraces(pythonLike);
461
+ if (repairedPythonLike) {
462
+ const parsed = tryParseJson(repairedPythonLike);
463
+ if (parsed) {
464
+ return parsed;
465
+ }
466
+ }
467
+ }
369
468
  const closed = closeJsonContainerSuffix(trimmed);
370
469
  if (closed) {
371
470
  const parsed = tryParseJson(closed);
@@ -373,6 +472,15 @@ export function salvageJsonToolCalls(value) {
373
472
  return parsed;
374
473
  }
375
474
  }
475
+ if (repairedArrayObjects) {
476
+ const closedRepaired = closeJsonContainerSuffix(repairedArrayObjects);
477
+ if (closedRepaired) {
478
+ const parsed = tryParseJson(closedRepaired);
479
+ if (parsed) {
480
+ return parsed;
481
+ }
482
+ }
483
+ }
376
484
  const embeddedObject = extractBalancedJsonObject(trimmed);
377
485
  if (embeddedObject) {
378
486
  const parsed = tryParseJson(embeddedObject);