npm - @botbotgo/agent-harness - Versions diffs - 0.0.443 → 0.0.445 - Mend

@botbotgo/agent-harness 0.0.443 → 0.0.445

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/package-version.d.ts +1 -1
package/dist/package-version.js +1 -1
package/dist/runtime/agent-runtime-adapter.js +103 -256
package/dist/runtime/harness/run/stream-run.js +22 -7
package/package.json +1 -1

package/dist/package-version.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export declare const AGENT_HARNESS_VERSION = "0.0.443";
+export declare const AGENT_HARNESS_VERSION = "0.0.445";
 export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";

package/dist/package-version.js CHANGED Viewed

@@ -1,2 +1,2 @@
-export const AGENT_HARNESS_VERSION = "0.0.443";
+export const AGENT_HARNESS_VERSION = "0.0.445";
 export const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";

package/dist/runtime/agent-runtime-adapter.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import path from "node:path";
 import { createHash } from "node:crypto";
 import { createAsyncSubAgentMiddleware, createFilesystemMiddleware, createMemoryMiddleware, createPatchToolCallsMiddleware, createSkillsMiddleware, createSummarizationMiddleware, createSubAgentMiddleware, FilesystemBackend, StateBackend, } from "deepagents";
-import { createAgent, createMiddleware, humanInTheLoopMiddleware, todoListMiddleware, ToolMessage } from "langchain";
+import { createAgent, humanInTheLoopMiddleware, todoListMiddleware } from "langchain";
 import { sanitizeVisibleText, tryParseJson, wrapResolvedModel, } from "./parsing/output-parsing.js";
 import { salvageJsonToolCalls } from "./parsing/output-tool-args.js";
 import { extractMessageText } from "../utils/message-content.js";
@@ -17,7 +17,6 @@ import { extractSubagentRequestText, invokeBuiltinTaskTool as invokeBuiltinTaskT
 import { isEmptyFinalAiMessageError, resolveBindingTimeout, resolveStreamIdleTimeout, } from "./adapter/resilience.js";
 import { createResolvedModel } from "./adapter/model/model-providers.js";
 import { appendProviderToolCallAliasTools, resolveAdapterTools } from "./adapter/tool-resolution.js";
-import { normalizeToolArgsForSchema } from "./adapter/tool/tool-arguments.js";
 import { resolveRuntimeStreamExecutionContext, } from "./adapter/flow/execution-context.js";
 import { isRetryableProviderError } from "./adapter/resilience.js";
 import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "./adapter/upstream-configurable-keys.js";
@@ -39,6 +38,61 @@ function hasDelegatedExecutionToolEvidence(result) {
     return executedToolResults.some((toolResult) => (toolResult.isError !== true
         && !isPlanToolName(toolResult.toolName)));
 }
+function normalizeEvidenceToolName(toolName) {
+    return typeof toolName === "string" ? toolName.trim().toLowerCase() : "";
+}
+function collectSuccessfulDelegatedExecutionToolNames(result) {
+    const executedToolResults = Array.isArray(result.metadata?.executedToolResults)
+        ? result.metadata.executedToolResults
+        : [];
+    return new Set(executedToolResults
+        .filter((toolResult) => toolResult.isError !== true && !isPlanToolName(toolResult.toolName))
+        .map((toolResult) => normalizeEvidenceToolName(toolResult.toolName))
+        .filter((toolName) => toolName.length > 0));
+}
+function textExplicitlyNamesConfiguredTool(text, toolName) {
+    const name = toolName.trim();
+    if (!name) {
+        return false;
+    }
+    const pattern = new RegExp(`(?:^|[^\\p{L}\\p{N}_-])${escapeRegExp(name)}(?:$|[^\\p{L}\\p{N}_-])`, "iu");
+    return pattern.test(text);
+}
+function resolveExplicitRequestedExecutionToolNames(binding, requestText) {
+    const text = requestText.trim();
+    if (!text) {
+        return [];
+    }
+    return getBindingPrimaryTools(binding)
+        .map((tool) => tool.name)
+        .filter((toolName) => typeof toolName === "string" && toolName.trim().length > 0)
+        .filter((toolName) => !isPlanToolName(toolName))
+        .filter((toolName) => textExplicitlyNamesConfiguredTool(text, toolName));
+}
+function listMissingDelegatedExecutionToolEvidence(result, requiredToolNames = []) {
+    if (requiredToolNames.length === 0) {
+        return hasDelegatedExecutionToolEvidence(result) ? [] : ["configured non-planning tools"];
+    }
+    const observed = collectSuccessfulDelegatedExecutionToolNames(result);
+    return requiredToolNames.filter((toolName) => !observed.has(normalizeEvidenceToolName(toolName)));
+}
+function hasRequiredDelegatedExecutionToolEvidence(result, requiredToolNames = []) {
+    return hasDelegatedExecutionToolEvidence(result)
+        && listMissingDelegatedExecutionToolEvidence(result, requiredToolNames).length === 0;
+}
+function buildExplicitExecutionToolRetryInstruction(missingToolNames) {
+    const tools = missingToolNames
+        .filter((toolName) => toolName !== "configured non-planning tools")
+        .join(", ");
+    if (!tools) {
+        return "";
+    }
+    return [
+        `The request explicitly named configured evidence tool(s): ${tools}.`,
+        "Before the final answer, call every listed non-planning tool that has not already produced a successful tool result.",
+        "Do not substitute a different evidence tool for an explicitly named configured tool unless that tool invocation itself fails and the blocker is reported.",
+    ].join("\n");
+}
 function buildDelegatedPlanEvidenceBlocker(agentId) {
     return JSON.stringify({
         status: "blocked",
@@ -97,245 +151,6 @@ function readConfiguredToolName(value) {
     const typed = value;
     return typeof typed.name === "string" ? typed.name.trim() : "";
 }
-function readMessageContentText(message) {
-    if (typeof message !== "object" || message === null) {
-        return "";
-    }
-    const content = message.content;
-    if (typeof content === "string") {
-        return content.trim();
-    }
-    if (!Array.isArray(content)) {
-        return "";
-    }
-    return content
-        .map((part) => typeof part === "object" && part !== null && typeof part.text === "string"
-        ? part.text
-        : "")
-        .join("")
-        .trim();
-}
-function hasExternalPlanEvidenceInstruction(messages) {
-    return messages.some((message) => {
-        const text = readMessageContentText(message);
-        return /todo board already exists|required todo board already exists|non[-\s]?planning tool call/iu.test(text);
-    });
-}
-function parseToolCallArgs(value) {
-    if (typeof value === "object" && value !== null && !Array.isArray(value)) {
-        return value;
-    }
-    if (typeof value !== "string" || value.trim().length === 0) {
-        return {};
-    }
-    try {
-        const parsed = JSON.parse(value);
-        return typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)
-            ? parsed
-            : {};
-    }
-    catch {
-        return {};
-    }
-}
-function readMessageToolCalls(message) {
-    if (typeof message !== "object" || message === null) {
-        return [];
-    }
-    const typed = message;
-    const raw = Array.isArray(typed.tool_calls) ? typed.tool_calls
-        : Array.isArray(typed.kwargs?.tool_calls) ? typed.kwargs.tool_calls
-            : Array.isArray(typed.additional_kwargs?.tool_calls) ? typed.additional_kwargs.tool_calls
-                : Array.isArray(typed.kwargs?.additional_kwargs?.tool_calls) ? typed.kwargs.additional_kwargs.tool_calls
-                    : Array.isArray(typed.lc_kwargs?.tool_calls) ? typed.lc_kwargs.tool_calls
-                        : Array.isArray(typed.lc_kwargs?.additional_kwargs?.tool_calls) ? typed.lc_kwargs.additional_kwargs.tool_calls
-                            : [];
-    return raw
-        .map((toolCall) => {
-        if (typeof toolCall !== "object" || toolCall === null) {
-            return null;
-        }
-        const call = toolCall;
-        const name = typeof call.name === "string"
-            ? call.name
-            : typeof call.function?.name === "string"
-                ? call.function.name
-                : undefined;
-        const args = parseToolCallArgs(call.args ?? call.function?.arguments);
-        return {
-            ...(typeof call.id === "string" ? { id: call.id } : {}),
-            ...(name ? { name } : {}),
-            args,
-        };
-    })
-        .filter((toolCall) => toolCall !== null);
-}
-function todoToolCallIsTerminal(toolCall) {
-    const todos = toolCall.args?.todos;
-    if (!Array.isArray(todos) || todos.length === 0) {
-        return false;
-    }
-    return todos.every((todo) => {
-        if (typeof todo !== "object" || todo === null || typeof todo.status !== "string") {
-            return false;
-        }
-        const status = todo.status.trim().toLowerCase();
-        return status !== "pending" && status !== "in_progress";
-    });
-}
-function readToolResultName(message) {
-    if (typeof message !== "object" || message === null) {
-        return "";
-    }
-    const typed = message;
-    const messageType = typeof typed.type === "string"
-        ? typed.type
-        : typeof typed._getType === "function"
-            ? String(typed._getType())
-            : "";
-    if (messageType !== "tool" && typeof typed.tool_call_id !== "string") {
-        return "";
-    }
-    return typeof typed.name === "string" ? typed.name : "";
-}
-function createPlanGuardToolError(toolCallId, content, fallbackPrefix) {
-    return new ToolMessage({
-        content,
-        tool_call_id: typeof toolCallId === "string" ? toolCallId : `${fallbackPrefix}-${Math.random().toString(36).slice(2, 10)}`,
-        status: "error",
-    });
-}
-function isPromiseLike(value) {
-    return typeof value === "object" && value !== null && typeof value.then === "function";
-}
-function renderToolInvocationError(error) {
-    const message = error instanceof Error ? error.message : String(error);
-    const cause = typeof error === "object" && error !== null && "cause" in error
-        ? error.cause
-        : undefined;
-    const causeMessage = cause instanceof Error ? cause.message : "";
-    return [message, causeMessage]
-        .map((value) => value.trim())
-        .filter(Boolean)
-        .join("\n");
-}
-function createToolInvocationErrorMessage(toolCallId, toolName, error) {
-    const rendered = renderToolInvocationError(error);
-    return createPlanGuardToolError(toolCallId, `Error invoking tool '${toolName}' with error: ${rendered || "tool invocation failed"}`, "tool-invocation-error");
-}
-function createTodoPlanGuardMiddleware(options = {}) {
-    let observedPlanToolResult = false;
-    let observedNonPlanToolResult = false;
-    return createMiddleware({
-        name: "harnessTodoPlanGuard",
-        wrapToolCall: ((request, handler) => {
-            const toolName = typeof request.toolCall?.name === "string"
-                ? request.toolCall.name
-                : typeof request.tool?.name === "string"
-                    ? request.tool.name
-                    : "";
-            const messages = Array.isArray(request.state?.messages) ? request.state.messages : [];
-            const toolResultNames = messages.map(readToolResultName).filter((name) => name.length > 0);
-            const hasPlanToolResult = toolResultNames.some(isPlanToolName);
-            const hasNonPlanToolResult = toolResultNames.some((resultToolName) => !isPlanToolName(resultToolName));
-            const hasExternalPlanEvidence = hasExternalPlanEvidenceInstruction(messages);
-            if (options.requiresPlan === true
-                && !(observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
-                && toolName.length > 0
-                && !isPlanToolName(toolName)) {
-                return createPlanGuardToolError(request.toolCall?.id, "Error: this agent has a required visible planning contract. Your first action must be write_todos with concrete task steps and statuses. Do not call non-planning tools before the initial todo board exists.", "todo-initial-plan-guard");
-            }
-            if (options.requiresPlan === true
-                && (observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
-                && !(observedNonPlanToolResult || hasNonPlanToolResult)
-                && isPlanToolName(toolName)) {
-                return createPlanGuardToolError(request.toolCall?.id, "Error: the required todo board already exists, but no non-planning evidence tool has returned yet. Do not call write_todos or read_todos again. Your next action must be exactly one non-planning tool call selected from the available tool descriptions and schemas.", "todo-plan-evidence-guard");
-            }
-            if (options.requiresPlan === true
-                && !(observedNonPlanToolResult || hasNonPlanToolResult)
-                && isPlanToolName(toolName)
-                && normalizePlanToolName(toolName).includes("write_todos")
-                && todoToolCallIsTerminal({ args: parseToolCallArgs(request.toolCall?.args) })) {
-                return createPlanGuardToolError(request.toolCall?.id, "Error: write_todos cannot mark every todo as terminal before any non-planning evidence tool returns. Keep one todo in_progress and the remaining todos pending until evidence tools return.", "write-todos-tool-guard");
-            }
-            const markObservedToolResult = () => {
-                if (isPlanToolName(toolName)) {
-                    observedPlanToolResult = true;
-                }
-                else if (toolName.length > 0) {
-                    observedNonPlanToolResult = true;
-                }
-            };
-            const normalizedArgs = normalizeToolArgsForSchema(parseToolCallArgs(request.toolCall?.args), request.tool?.schema, request.toolCall?.args);
-            const normalizedRequest = {
-                ...request,
-                toolCall: request.toolCall
-                    ? {
-                        ...request.toolCall,
-                        args: normalizedArgs,
-                    }
-                    : request.toolCall,
-            };
-            try {
-                const result = handler(normalizedRequest);
-                if (isPromiseLike(result)) {
-                    return result
-                        .then((value) => {
-                        markObservedToolResult();
-                        return value;
-                    })
-                        .catch((error) => createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error));
-                }
-                markObservedToolResult();
-                return result;
-            }
-            catch (error) {
-                return createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error);
-            }
-        }),
-        afterModel: (state) => {
-            if (!Array.isArray(state.messages) || state.messages.length === 0) {
-                return;
-            }
-            const hasNonPlanToolResult = state.messages.some((message) => {
-                if (typeof message !== "object" || message === null) {
-                    return false;
-                }
-                const typed = message;
-                const messageType = typeof typed.type === "string"
-                    ? typed.type
-                    : typeof typed._getType === "function"
-                        ? String(typed._getType())
-                        : "";
-                if (messageType !== "tool" && typeof typed.tool_call_id !== "string") {
-                    return false;
-                }
-                const toolName = typeof typed.name === "string" ? typed.name : "";
-                return toolName.length > 0 && !isPlanToolName(toolName);
-            });
-            if (hasNonPlanToolResult) {
-                return;
-            }
-            const lastAiMessage = [...state.messages].reverse().find((message) => readMessageToolCalls(message).length > 0);
-            const lastToolCalls = readMessageToolCalls(lastAiMessage);
-            if (!lastAiMessage) {
-                return;
-            }
-            const writeTodosCalls = lastToolCalls.filter((toolCall) => isPlanToolName(toolCall.name));
-            const prematureCompletedCalls = writeTodosCalls.filter(todoToolCallIsTerminal);
-            if (prematureCompletedCalls.length === 0) {
-                return;
-            }
-            return {
-                messages: prematureCompletedCalls.map((toolCall, index) => new ToolMessage({
-                    content: "Error: write_todos cannot mark every todo as terminal before any non-planning evidence tool returns. Keep one todo in_progress and the remaining todos pending until evidence tools return.",
-                    tool_call_id: toolCall.id ?? `write-todos-plan-guard-${index}`,
-                    status: "error",
-                })),
-            };
-        },
-    });
-}
 function shouldUseConfigurableDeepAgentAssembly(binding) {
     return getBindingExecutionKind(binding) === "deepagent";
 }
@@ -933,9 +748,15 @@ export class AgentRuntimeAdapter {
                             const runDelegatedRequest = (text, requestSuffix = "") => this.invoke(targetBinding, text, childSessionId, `${childRequestId}${requestSuffix}`, undefined, [], invokeOptions);
                             let result = await runDelegatedRequest(requestText);
                             const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(targetBinding).length > 0;
-                            if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(result)) {
-                                result = await runDelegatedRequest([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry");
-                                if (!hasDelegatedExecutionToolEvidence(result)) {
+                            const requiredExecutionToolNames = resolveExplicitRequestedExecutionToolNames(targetBinding, requestText);
+                            if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(result, requiredExecutionToolNames)) {
+                                const missingToolNames = listMissingDelegatedExecutionToolEvidence(result, requiredExecutionToolNames);
+                                result = await runDelegatedRequest([
+                                    requestText,
+                                    EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
+                                    buildExplicitExecutionToolRetryInstruction(missingToolNames),
+                                ].filter(Boolean).join("\n\n"), ":tool-evidence-retry");
+                                if (!hasRequiredDelegatedExecutionToolEvidence(result, requiredExecutionToolNames)) {
                                     throw new DelegatedExecutionNoToolEvidenceError(targetBinding.agent.id);
                                 }
                             }
@@ -1446,6 +1267,7 @@ export class AgentRuntimeAdapter {
             }
         }
         const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(selectedBinding).length > 0;
+        const requiredExecutionToolNames = resolveExplicitRequestedExecutionToolNames(selectedBinding, requestText);
         if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
             && !hasDelegatedPlanEvidence(delegatedResult)) {
             try {
@@ -1465,9 +1287,14 @@ export class AgentRuntimeAdapter {
                 };
             }
         }
-        if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
+        if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
+            const missingToolNames = listMissingDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames);
             try {
-                delegatedResult = await runDelegatedRequest([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry", selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
+                delegatedResult = await runDelegatedRequest([
+                    requestText,
+                    EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
+                    buildExplicitExecutionToolRetryInstruction(missingToolNames),
+                ].filter(Boolean).join("\n\n"), ":tool-evidence-retry", selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
                     ? {
                         suppressInitialRequiredPlanInstruction: true,
                         externalPlanEvidence: hasDelegatedPlanEvidence(delegatedResult),
@@ -1502,8 +1329,10 @@ export class AgentRuntimeAdapter {
                 },
             };
         }
-        if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
-            const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
+        if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
+            const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, requiredExecutionToolNames.length > 0
+                ? requiredExecutionToolNames
+                : getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
             return {
                 toolOutput: output,
                 delegatedSubagentType: subagentType,
@@ -1898,9 +1727,17 @@ export class AgentRuntimeAdapter {
                     delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [delegatedText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry"), previousDelegatedResult);
                 }
                 const targetRequiresExecutionToolEvidence = selectedBinding ? getBindingPrimaryTools(selectedBinding).length > 0 : false;
-                if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
+                const requiredExecutionToolNames = selectedBinding
+                    ? resolveExplicitRequestedExecutionToolNames(selectedBinding, planned.description)
+                    : [];
+                if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
+                    const missingToolNames = listMissingDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames);
                     const previousDelegatedResult = delegatedResult;
-                    delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [delegatedText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
+                    delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [
+                        delegatedText,
+                        EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
+                        buildExplicitExecutionToolRetryInstruction(missingToolNames),
+                    ].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
                 }
                 if (selectedBinding?.harnessRuntime.executionContract?.requiresPlan === true && !hasDelegatedPlanEvidence(delegatedResult)) {
                     const output = buildDelegatedPlanEvidenceBlocker(selectedBinding.agent.id);
@@ -1911,8 +1748,10 @@ export class AgentRuntimeAdapter {
                         finalMessageText: output,
                     };
                 }
-                if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
-                    const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
+                if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
+                    const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, requiredExecutionToolNames.length > 0
+                        ? requiredExecutionToolNames
+                        : getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
                     delegatedResult = {
                         ...delegatedResult,
                         state: "failed",
@@ -2105,14 +1944,20 @@ export class AgentRuntimeAdapter {
         });
         let delegatedResult = yield* runDelegatedStreamAttempt(delegatedText);
         const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(selectedBinding).length > 0;
+        const requiredExecutionToolNames = resolveExplicitRequestedExecutionToolNames(selectedBinding, requestText);
         if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
             && !hasDelegatedPlanEvidence(delegatedResult)) {
             const previousDelegatedResult = delegatedResult;
             delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([delegatedText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry"), previousDelegatedResult);
         }
-        if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
+        if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
+            const missingToolNames = listMissingDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames);
             const previousDelegatedResult = delegatedResult;
-            delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([delegatedText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
+            delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([
+                delegatedText,
+                EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
+                buildExplicitExecutionToolRetryInstruction(missingToolNames),
+            ].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
         }
         if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
             && !hasDelegatedPlanEvidence(delegatedResult)) {
@@ -2124,8 +1969,10 @@ export class AgentRuntimeAdapter {
                 finalMessageText: output,
             };
         }
-        if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
-            const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
+        if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
+            const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, requiredExecutionToolNames.length > 0
+                ? requiredExecutionToolNames
+                : getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
             delegatedResult = {
                 ...delegatedResult,
                 state: "failed",

package/dist/runtime/harness/run/stream-run.js CHANGED Viewed

@@ -35,17 +35,23 @@ function planStateHasUnfinishedItems(planState) {
 function planStateHasActiveItems(planState) {
     return planStateHasUnfinishedItems(planState);
 }
+function isPlanToolName(toolName) {
+    const normalized = typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
+    return normalized === "write_todos"
+        || normalized === "read_todos"
+        || normalized === "tool_call_write_todos"
+        || normalized === "tool_call_read_todos"
+        || normalized === "call_write_todos"
+        || normalized === "call_read_todos";
+}
 function isSubstantiveTerminalAssistantOutput(value) {
     const normalized = sanitizeVisibleText(value).trim();
     if (normalized.length < 80) {
         return false;
     }
-    if (/\b(?:delegated|waiting|wait for|initiated)\b/i.test(normalized) && !/\b(?:finding|summary|root cause|evidence|completed|result|issue)\b/i.test(normalized)) {
-        return false;
-    }
     return true;
 }
-function inferPlanItemStatusFromTerminalAssistantOutput(value) {
+function inferPlanItemStatusFromTerminalAssistantOutput(value, options = {}) {
     const terminalStatus = readTerminalExecutionStatus(value);
     if (terminalStatus) {
         return mapTerminalStatusToPlanItemStatus(terminalStatus);
@@ -57,6 +63,9 @@ function inferPlanItemStatusFromTerminalAssistantOutput(value) {
     if (normalized.startsWith("runtime_error=")) {
         return "failed";
     }
+    if (options.hasSuccessfulExecutionEvidence !== true) {
+        return null;
+    }
     return isSubstantiveTerminalAssistantOutput(value) ? "completed" : null;
 }
 function mapTerminalStatusToObservedPlanItemStatus(status, sawSuccessfulToolResult) {
@@ -675,6 +684,7 @@ export async function* streamHarnessRun(options) {
     let syntheticFallback;
     const toolErrors = [];
     let sawSuccessfulToolResult = false;
+    let sawSuccessfulExecutionToolResult = false;
     let lastToolResultKey = null;
     const executedToolResults = [];
     const emittedCommentary = new Set();
@@ -873,6 +883,9 @@ export async function* streamHarnessRun(options) {
                 }
                 else {
                     sawSuccessfulToolResult = true;
+                    if (!isPlanToolName(normalizedChunk.toolName) && normalizedChunk.toolName !== "task") {
+                        sawSuccessfulExecutionToolResult = true;
+                    }
                 }
                 yield {
                     type: "tool-result",
@@ -1055,12 +1068,14 @@ export async function* streamHarnessRun(options) {
         }
         currentPlanState = await refreshPlanStateFromPersistence(options, currentPlanState);
         const explicitTerminalAssistantStatus = readTerminalExecutionStatus(assistantOutput);
-        let terminalAssistantPlanItemStatus = inferPlanItemStatusFromTerminalAssistantOutput(assistantOutput);
-        if (explicitTerminalAssistantStatus === "blocked" && sawSuccessfulToolResult) {
+        let terminalAssistantPlanItemStatus = inferPlanItemStatusFromTerminalAssistantOutput(assistantOutput, {
+            hasSuccessfulExecutionEvidence: sawSuccessfulExecutionToolResult,
+        });
+        if (explicitTerminalAssistantStatus === "blocked" && sawSuccessfulExecutionToolResult) {
             terminalAssistantPlanItemStatus = "completed";
         }
         if (terminalAssistantPlanItemStatus === "failed"
-            && sawSuccessfulToolResult
+            && sawSuccessfulExecutionToolResult
             && !explicitTerminalAssistantStatus
             && !sanitizeVisibleText(assistantOutput).trim().toLowerCase().startsWith("runtime_error=")
             && assistantOutput.trim()) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@botbotgo/agent-harness",
-  "version": "0.0.443",
+  "version": "0.0.445",
   "description": "Workspace runtime for multi-agent applications",
   "license": "MIT",
   "type": "module",