@botbotgo/agent-harness 0.0.443 → 0.0.445

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.443";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.445";
2
2
  export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
@@ -1,2 +1,2 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.443";
1
+ export const AGENT_HARNESS_VERSION = "0.0.445";
2
2
  export const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
@@ -1,7 +1,7 @@
1
1
  import path from "node:path";
2
2
  import { createHash } from "node:crypto";
3
3
  import { createAsyncSubAgentMiddleware, createFilesystemMiddleware, createMemoryMiddleware, createPatchToolCallsMiddleware, createSkillsMiddleware, createSummarizationMiddleware, createSubAgentMiddleware, FilesystemBackend, StateBackend, } from "deepagents";
4
- import { createAgent, createMiddleware, humanInTheLoopMiddleware, todoListMiddleware, ToolMessage } from "langchain";
4
+ import { createAgent, humanInTheLoopMiddleware, todoListMiddleware } from "langchain";
5
5
  import { sanitizeVisibleText, tryParseJson, wrapResolvedModel, } from "./parsing/output-parsing.js";
6
6
  import { salvageJsonToolCalls } from "./parsing/output-tool-args.js";
7
7
  import { extractMessageText } from "../utils/message-content.js";
@@ -17,7 +17,6 @@ import { extractSubagentRequestText, invokeBuiltinTaskTool as invokeBuiltinTaskT
17
17
  import { isEmptyFinalAiMessageError, resolveBindingTimeout, resolveStreamIdleTimeout, } from "./adapter/resilience.js";
18
18
  import { createResolvedModel } from "./adapter/model/model-providers.js";
19
19
  import { appendProviderToolCallAliasTools, resolveAdapterTools } from "./adapter/tool-resolution.js";
20
- import { normalizeToolArgsForSchema } from "./adapter/tool/tool-arguments.js";
21
20
  import { resolveRuntimeStreamExecutionContext, } from "./adapter/flow/execution-context.js";
22
21
  import { isRetryableProviderError } from "./adapter/resilience.js";
23
22
  import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "./adapter/upstream-configurable-keys.js";
@@ -39,6 +38,61 @@ function hasDelegatedExecutionToolEvidence(result) {
39
38
  return executedToolResults.some((toolResult) => (toolResult.isError !== true
40
39
  && !isPlanToolName(toolResult.toolName)));
41
40
  }
41
+ function normalizeEvidenceToolName(toolName) {
42
+ return typeof toolName === "string" ? toolName.trim().toLowerCase() : "";
43
+ }
44
+ function collectSuccessfulDelegatedExecutionToolNames(result) {
45
+ const executedToolResults = Array.isArray(result.metadata?.executedToolResults)
46
+ ? result.metadata.executedToolResults
47
+ : [];
48
+ return new Set(executedToolResults
49
+ .filter((toolResult) => toolResult.isError !== true && !isPlanToolName(toolResult.toolName))
50
+ .map((toolResult) => normalizeEvidenceToolName(toolResult.toolName))
51
+ .filter((toolName) => toolName.length > 0));
52
+ }
53
+ function textExplicitlyNamesConfiguredTool(text, toolName) {
54
+ const name = toolName.trim();
55
+ if (!name) {
56
+ return false;
57
+ }
58
+ const pattern = new RegExp(`(?:^|[^\\p{L}\\p{N}_-])${escapeRegExp(name)}(?:$|[^\\p{L}\\p{N}_-])`, "iu");
59
+ return pattern.test(text);
60
+ }
61
+ function resolveExplicitRequestedExecutionToolNames(binding, requestText) {
62
+ const text = requestText.trim();
63
+ if (!text) {
64
+ return [];
65
+ }
66
+ return getBindingPrimaryTools(binding)
67
+ .map((tool) => tool.name)
68
+ .filter((toolName) => typeof toolName === "string" && toolName.trim().length > 0)
69
+ .filter((toolName) => !isPlanToolName(toolName))
70
+ .filter((toolName) => textExplicitlyNamesConfiguredTool(text, toolName));
71
+ }
72
+ function listMissingDelegatedExecutionToolEvidence(result, requiredToolNames = []) {
73
+ if (requiredToolNames.length === 0) {
74
+ return hasDelegatedExecutionToolEvidence(result) ? [] : ["configured non-planning tools"];
75
+ }
76
+ const observed = collectSuccessfulDelegatedExecutionToolNames(result);
77
+ return requiredToolNames.filter((toolName) => !observed.has(normalizeEvidenceToolName(toolName)));
78
+ }
79
+ function hasRequiredDelegatedExecutionToolEvidence(result, requiredToolNames = []) {
80
+ return hasDelegatedExecutionToolEvidence(result)
81
+ && listMissingDelegatedExecutionToolEvidence(result, requiredToolNames).length === 0;
82
+ }
83
+ function buildExplicitExecutionToolRetryInstruction(missingToolNames) {
84
+ const tools = missingToolNames
85
+ .filter((toolName) => toolName !== "configured non-planning tools")
86
+ .join(", ");
87
+ if (!tools) {
88
+ return "";
89
+ }
90
+ return [
91
+ `The request explicitly named configured evidence tool(s): ${tools}.`,
92
+ "Before the final answer, call every listed non-planning tool that has not already produced a successful tool result.",
93
+ "Do not substitute a different evidence tool for an explicitly named configured tool unless that tool invocation itself fails and the blocker is reported.",
94
+ ].join("\n");
95
+ }
42
96
  function buildDelegatedPlanEvidenceBlocker(agentId) {
43
97
  return JSON.stringify({
44
98
  status: "blocked",
@@ -97,245 +151,6 @@ function readConfiguredToolName(value) {
97
151
  const typed = value;
98
152
  return typeof typed.name === "string" ? typed.name.trim() : "";
99
153
  }
100
- function readMessageContentText(message) {
101
- if (typeof message !== "object" || message === null) {
102
- return "";
103
- }
104
- const content = message.content;
105
- if (typeof content === "string") {
106
- return content.trim();
107
- }
108
- if (!Array.isArray(content)) {
109
- return "";
110
- }
111
- return content
112
- .map((part) => typeof part === "object" && part !== null && typeof part.text === "string"
113
- ? part.text
114
- : "")
115
- .join("")
116
- .trim();
117
- }
118
- function hasExternalPlanEvidenceInstruction(messages) {
119
- return messages.some((message) => {
120
- const text = readMessageContentText(message);
121
- return /todo board already exists|required todo board already exists|non[-\s]?planning tool call/iu.test(text);
122
- });
123
- }
124
- function parseToolCallArgs(value) {
125
- if (typeof value === "object" && value !== null && !Array.isArray(value)) {
126
- return value;
127
- }
128
- if (typeof value !== "string" || value.trim().length === 0) {
129
- return {};
130
- }
131
- try {
132
- const parsed = JSON.parse(value);
133
- return typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)
134
- ? parsed
135
- : {};
136
- }
137
- catch {
138
- return {};
139
- }
140
- }
141
- function readMessageToolCalls(message) {
142
- if (typeof message !== "object" || message === null) {
143
- return [];
144
- }
145
- const typed = message;
146
- const raw = Array.isArray(typed.tool_calls) ? typed.tool_calls
147
- : Array.isArray(typed.kwargs?.tool_calls) ? typed.kwargs.tool_calls
148
- : Array.isArray(typed.additional_kwargs?.tool_calls) ? typed.additional_kwargs.tool_calls
149
- : Array.isArray(typed.kwargs?.additional_kwargs?.tool_calls) ? typed.kwargs.additional_kwargs.tool_calls
150
- : Array.isArray(typed.lc_kwargs?.tool_calls) ? typed.lc_kwargs.tool_calls
151
- : Array.isArray(typed.lc_kwargs?.additional_kwargs?.tool_calls) ? typed.lc_kwargs.additional_kwargs.tool_calls
152
- : [];
153
- return raw
154
- .map((toolCall) => {
155
- if (typeof toolCall !== "object" || toolCall === null) {
156
- return null;
157
- }
158
- const call = toolCall;
159
- const name = typeof call.name === "string"
160
- ? call.name
161
- : typeof call.function?.name === "string"
162
- ? call.function.name
163
- : undefined;
164
- const args = parseToolCallArgs(call.args ?? call.function?.arguments);
165
- return {
166
- ...(typeof call.id === "string" ? { id: call.id } : {}),
167
- ...(name ? { name } : {}),
168
- args,
169
- };
170
- })
171
- .filter((toolCall) => toolCall !== null);
172
- }
173
- function todoToolCallIsTerminal(toolCall) {
174
- const todos = toolCall.args?.todos;
175
- if (!Array.isArray(todos) || todos.length === 0) {
176
- return false;
177
- }
178
- return todos.every((todo) => {
179
- if (typeof todo !== "object" || todo === null || typeof todo.status !== "string") {
180
- return false;
181
- }
182
- const status = todo.status.trim().toLowerCase();
183
- return status !== "pending" && status !== "in_progress";
184
- });
185
- }
186
- function readToolResultName(message) {
187
- if (typeof message !== "object" || message === null) {
188
- return "";
189
- }
190
- const typed = message;
191
- const messageType = typeof typed.type === "string"
192
- ? typed.type
193
- : typeof typed._getType === "function"
194
- ? String(typed._getType())
195
- : "";
196
- if (messageType !== "tool" && typeof typed.tool_call_id !== "string") {
197
- return "";
198
- }
199
- return typeof typed.name === "string" ? typed.name : "";
200
- }
201
- function createPlanGuardToolError(toolCallId, content, fallbackPrefix) {
202
- return new ToolMessage({
203
- content,
204
- tool_call_id: typeof toolCallId === "string" ? toolCallId : `${fallbackPrefix}-${Math.random().toString(36).slice(2, 10)}`,
205
- status: "error",
206
- });
207
- }
208
- function isPromiseLike(value) {
209
- return typeof value === "object" && value !== null && typeof value.then === "function";
210
- }
211
- function renderToolInvocationError(error) {
212
- const message = error instanceof Error ? error.message : String(error);
213
- const cause = typeof error === "object" && error !== null && "cause" in error
214
- ? error.cause
215
- : undefined;
216
- const causeMessage = cause instanceof Error ? cause.message : "";
217
- return [message, causeMessage]
218
- .map((value) => value.trim())
219
- .filter(Boolean)
220
- .join("\n");
221
- }
222
- function createToolInvocationErrorMessage(toolCallId, toolName, error) {
223
- const rendered = renderToolInvocationError(error);
224
- return createPlanGuardToolError(toolCallId, `Error invoking tool '${toolName}' with error: ${rendered || "tool invocation failed"}`, "tool-invocation-error");
225
- }
226
- function createTodoPlanGuardMiddleware(options = {}) {
227
- let observedPlanToolResult = false;
228
- let observedNonPlanToolResult = false;
229
- return createMiddleware({
230
- name: "harnessTodoPlanGuard",
231
- wrapToolCall: ((request, handler) => {
232
- const toolName = typeof request.toolCall?.name === "string"
233
- ? request.toolCall.name
234
- : typeof request.tool?.name === "string"
235
- ? request.tool.name
236
- : "";
237
- const messages = Array.isArray(request.state?.messages) ? request.state.messages : [];
238
- const toolResultNames = messages.map(readToolResultName).filter((name) => name.length > 0);
239
- const hasPlanToolResult = toolResultNames.some(isPlanToolName);
240
- const hasNonPlanToolResult = toolResultNames.some((resultToolName) => !isPlanToolName(resultToolName));
241
- const hasExternalPlanEvidence = hasExternalPlanEvidenceInstruction(messages);
242
- if (options.requiresPlan === true
243
- && !(observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
244
- && toolName.length > 0
245
- && !isPlanToolName(toolName)) {
246
- return createPlanGuardToolError(request.toolCall?.id, "Error: this agent has a required visible planning contract. Your first action must be write_todos with concrete task steps and statuses. Do not call non-planning tools before the initial todo board exists.", "todo-initial-plan-guard");
247
- }
248
- if (options.requiresPlan === true
249
- && (observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
250
- && !(observedNonPlanToolResult || hasNonPlanToolResult)
251
- && isPlanToolName(toolName)) {
252
- return createPlanGuardToolError(request.toolCall?.id, "Error: the required todo board already exists, but no non-planning evidence tool has returned yet. Do not call write_todos or read_todos again. Your next action must be exactly one non-planning tool call selected from the available tool descriptions and schemas.", "todo-plan-evidence-guard");
253
- }
254
- if (options.requiresPlan === true
255
- && !(observedNonPlanToolResult || hasNonPlanToolResult)
256
- && isPlanToolName(toolName)
257
- && normalizePlanToolName(toolName).includes("write_todos")
258
- && todoToolCallIsTerminal({ args: parseToolCallArgs(request.toolCall?.args) })) {
259
- return createPlanGuardToolError(request.toolCall?.id, "Error: write_todos cannot mark every todo as terminal before any non-planning evidence tool returns. Keep one todo in_progress and the remaining todos pending until evidence tools return.", "write-todos-tool-guard");
260
- }
261
- const markObservedToolResult = () => {
262
- if (isPlanToolName(toolName)) {
263
- observedPlanToolResult = true;
264
- }
265
- else if (toolName.length > 0) {
266
- observedNonPlanToolResult = true;
267
- }
268
- };
269
- const normalizedArgs = normalizeToolArgsForSchema(parseToolCallArgs(request.toolCall?.args), request.tool?.schema, request.toolCall?.args);
270
- const normalizedRequest = {
271
- ...request,
272
- toolCall: request.toolCall
273
- ? {
274
- ...request.toolCall,
275
- args: normalizedArgs,
276
- }
277
- : request.toolCall,
278
- };
279
- try {
280
- const result = handler(normalizedRequest);
281
- if (isPromiseLike(result)) {
282
- return result
283
- .then((value) => {
284
- markObservedToolResult();
285
- return value;
286
- })
287
- .catch((error) => createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error));
288
- }
289
- markObservedToolResult();
290
- return result;
291
- }
292
- catch (error) {
293
- return createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error);
294
- }
295
- }),
296
- afterModel: (state) => {
297
- if (!Array.isArray(state.messages) || state.messages.length === 0) {
298
- return;
299
- }
300
- const hasNonPlanToolResult = state.messages.some((message) => {
301
- if (typeof message !== "object" || message === null) {
302
- return false;
303
- }
304
- const typed = message;
305
- const messageType = typeof typed.type === "string"
306
- ? typed.type
307
- : typeof typed._getType === "function"
308
- ? String(typed._getType())
309
- : "";
310
- if (messageType !== "tool" && typeof typed.tool_call_id !== "string") {
311
- return false;
312
- }
313
- const toolName = typeof typed.name === "string" ? typed.name : "";
314
- return toolName.length > 0 && !isPlanToolName(toolName);
315
- });
316
- if (hasNonPlanToolResult) {
317
- return;
318
- }
319
- const lastAiMessage = [...state.messages].reverse().find((message) => readMessageToolCalls(message).length > 0);
320
- const lastToolCalls = readMessageToolCalls(lastAiMessage);
321
- if (!lastAiMessage) {
322
- return;
323
- }
324
- const writeTodosCalls = lastToolCalls.filter((toolCall) => isPlanToolName(toolCall.name));
325
- const prematureCompletedCalls = writeTodosCalls.filter(todoToolCallIsTerminal);
326
- if (prematureCompletedCalls.length === 0) {
327
- return;
328
- }
329
- return {
330
- messages: prematureCompletedCalls.map((toolCall, index) => new ToolMessage({
331
- content: "Error: write_todos cannot mark every todo as terminal before any non-planning evidence tool returns. Keep one todo in_progress and the remaining todos pending until evidence tools return.",
332
- tool_call_id: toolCall.id ?? `write-todos-plan-guard-${index}`,
333
- status: "error",
334
- })),
335
- };
336
- },
337
- });
338
- }
339
154
  function shouldUseConfigurableDeepAgentAssembly(binding) {
340
155
  return getBindingExecutionKind(binding) === "deepagent";
341
156
  }
@@ -933,9 +748,15 @@ export class AgentRuntimeAdapter {
933
748
  const runDelegatedRequest = (text, requestSuffix = "") => this.invoke(targetBinding, text, childSessionId, `${childRequestId}${requestSuffix}`, undefined, [], invokeOptions);
934
749
  let result = await runDelegatedRequest(requestText);
935
750
  const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(targetBinding).length > 0;
936
- if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(result)) {
937
- result = await runDelegatedRequest([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry");
938
- if (!hasDelegatedExecutionToolEvidence(result)) {
751
+ const requiredExecutionToolNames = resolveExplicitRequestedExecutionToolNames(targetBinding, requestText);
752
+ if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(result, requiredExecutionToolNames)) {
753
+ const missingToolNames = listMissingDelegatedExecutionToolEvidence(result, requiredExecutionToolNames);
754
+ result = await runDelegatedRequest([
755
+ requestText,
756
+ EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
757
+ buildExplicitExecutionToolRetryInstruction(missingToolNames),
758
+ ].filter(Boolean).join("\n\n"), ":tool-evidence-retry");
759
+ if (!hasRequiredDelegatedExecutionToolEvidence(result, requiredExecutionToolNames)) {
939
760
  throw new DelegatedExecutionNoToolEvidenceError(targetBinding.agent.id);
940
761
  }
941
762
  }
@@ -1446,6 +1267,7 @@ export class AgentRuntimeAdapter {
1446
1267
  }
1447
1268
  }
1448
1269
  const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(selectedBinding).length > 0;
1270
+ const requiredExecutionToolNames = resolveExplicitRequestedExecutionToolNames(selectedBinding, requestText);
1449
1271
  if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
1450
1272
  && !hasDelegatedPlanEvidence(delegatedResult)) {
1451
1273
  try {
@@ -1465,9 +1287,14 @@ export class AgentRuntimeAdapter {
1465
1287
  };
1466
1288
  }
1467
1289
  }
1468
- if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
1290
+ if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
1291
+ const missingToolNames = listMissingDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames);
1469
1292
  try {
1470
- delegatedResult = await runDelegatedRequest([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry", selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
1293
+ delegatedResult = await runDelegatedRequest([
1294
+ requestText,
1295
+ EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
1296
+ buildExplicitExecutionToolRetryInstruction(missingToolNames),
1297
+ ].filter(Boolean).join("\n\n"), ":tool-evidence-retry", selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
1471
1298
  ? {
1472
1299
  suppressInitialRequiredPlanInstruction: true,
1473
1300
  externalPlanEvidence: hasDelegatedPlanEvidence(delegatedResult),
@@ -1502,8 +1329,10 @@ export class AgentRuntimeAdapter {
1502
1329
  },
1503
1330
  };
1504
1331
  }
1505
- if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
1506
- const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
1332
+ if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
1333
+ const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, requiredExecutionToolNames.length > 0
1334
+ ? requiredExecutionToolNames
1335
+ : getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
1507
1336
  return {
1508
1337
  toolOutput: output,
1509
1338
  delegatedSubagentType: subagentType,
@@ -1898,9 +1727,17 @@ export class AgentRuntimeAdapter {
1898
1727
  delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [delegatedText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry"), previousDelegatedResult);
1899
1728
  }
1900
1729
  const targetRequiresExecutionToolEvidence = selectedBinding ? getBindingPrimaryTools(selectedBinding).length > 0 : false;
1901
- if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
1730
+ const requiredExecutionToolNames = selectedBinding
1731
+ ? resolveExplicitRequestedExecutionToolNames(selectedBinding, planned.description)
1732
+ : [];
1733
+ if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
1734
+ const missingToolNames = listMissingDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames);
1902
1735
  const previousDelegatedResult = delegatedResult;
1903
- delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [delegatedText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
1736
+ delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [
1737
+ delegatedText,
1738
+ EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
1739
+ buildExplicitExecutionToolRetryInstruction(missingToolNames),
1740
+ ].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
1904
1741
  }
1905
1742
  if (selectedBinding?.harnessRuntime.executionContract?.requiresPlan === true && !hasDelegatedPlanEvidence(delegatedResult)) {
1906
1743
  const output = buildDelegatedPlanEvidenceBlocker(selectedBinding.agent.id);
@@ -1911,8 +1748,10 @@ export class AgentRuntimeAdapter {
1911
1748
  finalMessageText: output,
1912
1749
  };
1913
1750
  }
1914
- if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
1915
- const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
1751
+ if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
1752
+ const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, requiredExecutionToolNames.length > 0
1753
+ ? requiredExecutionToolNames
1754
+ : getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
1916
1755
  delegatedResult = {
1917
1756
  ...delegatedResult,
1918
1757
  state: "failed",
@@ -2105,14 +1944,20 @@ export class AgentRuntimeAdapter {
2105
1944
  });
2106
1945
  let delegatedResult = yield* runDelegatedStreamAttempt(delegatedText);
2107
1946
  const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(selectedBinding).length > 0;
1947
+ const requiredExecutionToolNames = resolveExplicitRequestedExecutionToolNames(selectedBinding, requestText);
2108
1948
  if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
2109
1949
  && !hasDelegatedPlanEvidence(delegatedResult)) {
2110
1950
  const previousDelegatedResult = delegatedResult;
2111
1951
  delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([delegatedText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry"), previousDelegatedResult);
2112
1952
  }
2113
- if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
1953
+ if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
1954
+ const missingToolNames = listMissingDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames);
2114
1955
  const previousDelegatedResult = delegatedResult;
2115
- delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([delegatedText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
1956
+ delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([
1957
+ delegatedText,
1958
+ EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
1959
+ buildExplicitExecutionToolRetryInstruction(missingToolNames),
1960
+ ].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
2116
1961
  }
2117
1962
  if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
2118
1963
  && !hasDelegatedPlanEvidence(delegatedResult)) {
@@ -2124,8 +1969,10 @@ export class AgentRuntimeAdapter {
2124
1969
  finalMessageText: output,
2125
1970
  };
2126
1971
  }
2127
- if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
2128
- const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
1972
+ if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
1973
+ const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, requiredExecutionToolNames.length > 0
1974
+ ? requiredExecutionToolNames
1975
+ : getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
2129
1976
  delegatedResult = {
2130
1977
  ...delegatedResult,
2131
1978
  state: "failed",
@@ -35,17 +35,23 @@ function planStateHasUnfinishedItems(planState) {
35
35
  function planStateHasActiveItems(planState) {
36
36
  return planStateHasUnfinishedItems(planState);
37
37
  }
38
+ function isPlanToolName(toolName) {
39
+ const normalized = typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
40
+ return normalized === "write_todos"
41
+ || normalized === "read_todos"
42
+ || normalized === "tool_call_write_todos"
43
+ || normalized === "tool_call_read_todos"
44
+ || normalized === "call_write_todos"
45
+ || normalized === "call_read_todos";
46
+ }
38
47
  function isSubstantiveTerminalAssistantOutput(value) {
39
48
  const normalized = sanitizeVisibleText(value).trim();
40
49
  if (normalized.length < 80) {
41
50
  return false;
42
51
  }
43
- if (/\b(?:delegated|waiting|wait for|initiated)\b/i.test(normalized) && !/\b(?:finding|summary|root cause|evidence|completed|result|issue)\b/i.test(normalized)) {
44
- return false;
45
- }
46
52
  return true;
47
53
  }
48
- function inferPlanItemStatusFromTerminalAssistantOutput(value) {
54
+ function inferPlanItemStatusFromTerminalAssistantOutput(value, options = {}) {
49
55
  const terminalStatus = readTerminalExecutionStatus(value);
50
56
  if (terminalStatus) {
51
57
  return mapTerminalStatusToPlanItemStatus(terminalStatus);
@@ -57,6 +63,9 @@ function inferPlanItemStatusFromTerminalAssistantOutput(value) {
57
63
  if (normalized.startsWith("runtime_error=")) {
58
64
  return "failed";
59
65
  }
66
+ if (options.hasSuccessfulExecutionEvidence !== true) {
67
+ return null;
68
+ }
60
69
  return isSubstantiveTerminalAssistantOutput(value) ? "completed" : null;
61
70
  }
62
71
  function mapTerminalStatusToObservedPlanItemStatus(status, sawSuccessfulToolResult) {
@@ -675,6 +684,7 @@ export async function* streamHarnessRun(options) {
675
684
  let syntheticFallback;
676
685
  const toolErrors = [];
677
686
  let sawSuccessfulToolResult = false;
687
+ let sawSuccessfulExecutionToolResult = false;
678
688
  let lastToolResultKey = null;
679
689
  const executedToolResults = [];
680
690
  const emittedCommentary = new Set();
@@ -873,6 +883,9 @@ export async function* streamHarnessRun(options) {
873
883
  }
874
884
  else {
875
885
  sawSuccessfulToolResult = true;
886
+ if (!isPlanToolName(normalizedChunk.toolName) && normalizedChunk.toolName !== "task") {
887
+ sawSuccessfulExecutionToolResult = true;
888
+ }
876
889
  }
877
890
  yield {
878
891
  type: "tool-result",
@@ -1055,12 +1068,14 @@ export async function* streamHarnessRun(options) {
1055
1068
  }
1056
1069
  currentPlanState = await refreshPlanStateFromPersistence(options, currentPlanState);
1057
1070
  const explicitTerminalAssistantStatus = readTerminalExecutionStatus(assistantOutput);
1058
- let terminalAssistantPlanItemStatus = inferPlanItemStatusFromTerminalAssistantOutput(assistantOutput);
1059
- if (explicitTerminalAssistantStatus === "blocked" && sawSuccessfulToolResult) {
1071
+ let terminalAssistantPlanItemStatus = inferPlanItemStatusFromTerminalAssistantOutput(assistantOutput, {
1072
+ hasSuccessfulExecutionEvidence: sawSuccessfulExecutionToolResult,
1073
+ });
1074
+ if (explicitTerminalAssistantStatus === "blocked" && sawSuccessfulExecutionToolResult) {
1060
1075
  terminalAssistantPlanItemStatus = "completed";
1061
1076
  }
1062
1077
  if (terminalAssistantPlanItemStatus === "failed"
1063
- && sawSuccessfulToolResult
1078
+ && sawSuccessfulExecutionToolResult
1064
1079
  && !explicitTerminalAssistantStatus
1065
1080
  && !sanitizeVisibleText(assistantOutput).trim().toLowerCase().startsWith("runtime_error=")
1066
1081
  && assistantOutput.trim()) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botbotgo/agent-harness",
3
- "version": "0.0.443",
3
+ "version": "0.0.445",
4
4
  "description": "Workspace runtime for multi-agent applications",
5
5
  "license": "MIT",
6
6
  "type": "module",