npm - @botbotgo/agent-harness - Versions diffs - 0.0.463 → 0.0.465 - Mend

@botbotgo/agent-harness 0.0.463 → 0.0.465

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/package-version.d.ts +1 -1
package/dist/package-version.js +1 -1
package/dist/resources/prompts/runtime/write-todos-full-entry.md +1 -1
package/dist/runtime/adapter/flow/stream-runtime.js +101 -6
package/dist/runtime/adapter/invocation-result.js +15 -2
package/dist/runtime/adapter/local-tool-invocation.js +49 -1
package/dist/runtime/adapter/runtime-adapter-support.d.ts +0 -1
package/dist/runtime/adapter/runtime-adapter-support.js +10 -7
package/dist/runtime/adapter/stream-event-projection.d.ts +1 -0
package/dist/runtime/adapter/stream-event-projection.js +75 -16
package/dist/runtime/adapter/tool/builtin-middleware-tools.js +1 -9
package/dist/runtime/adapter/tool/tool-arguments.js +145 -10
package/dist/runtime/agent-runtime-adapter.d.ts +12 -0
package/dist/runtime/agent-runtime-adapter.js +217 -29
package/dist/runtime/parsing/output-recovery.js +2 -1
package/dist/runtime/parsing/output-tool-args.js +20 -1
package/dist/runtime/parsing/stream-event-parsing.js +0 -32
package/package.json +1 -1

package/dist/package-version.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export declare const AGENT_HARNESS_VERSION = "0.0.463";
+export declare const AGENT_HARNESS_VERSION = "0.0.465";
 export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";

package/dist/package-version.js CHANGED Viewed

@@ -1,2 +1,2 @@
-export const AGENT_HARNESS_VERSION = "0.0.463";
+export const AGENT_HARNESS_VERSION = "0.0.465";
 export const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";

package/dist/resources/prompts/runtime/write-todos-full-entry.md CHANGED Viewed

	@@ -1 +1 @@
1	- When calling write_todos, every todo item must include both content and status. Do not send status-only updates. Retry by resending the full todo entry with the original content preserved.
1	+ When calling write_todos, every todo item must include both content and status. Use only these status values: pending, in_progress, completed. Do not send aliases such as not_started, open, active, done, blocked, failed, or cancelled. Do not send status-only updates. Retry by resending the full todo entry with the original content preserved.

package/dist/runtime/adapter/flow/stream-runtime.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, appendToolRecoveryInstruction, resolveMissingPlanRecoveryInstruction, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
-import { DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION, DELEGATION_ONLY_RECOVERY_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
+import { DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION, DELEGATION_ONLY_RECOVERY_INSTRUCTION, REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
 import { buildInvocationRequest } from "../model/invocation-request.js";
 import { PROMPTED_JSON_TOOL_POLICY_KEY, withPromptedJsonToolPolicy } from "../model/prompted-json-tool-policy.js";
 import { buildRawModelMessages } from "../model/message-assembly.js";
@@ -137,6 +137,26 @@ function hasIncompletePlanOutput(value) {
     }
     return null;
 }
+function hasFailedTodos(value) {
+    if (Array.isArray(value)) {
+        return value.some((todo) => hasFailedTodos(todo));
+    }
+    if (typeof value !== "object" || value === null) {
+        return false;
+    }
+    const typed = value;
+    if (typeof typed.status === "string" && typed.status.trim().toLowerCase() === "failed") {
+        return true;
+    }
+    return hasFailedTodos(typed.todos)
+        || hasFailedTodos(typed.update)
+        || hasFailedTodos(typed.data)
+        || hasFailedTodos(typed.output)
+        || hasFailedTodos(typed.summary);
+}
+function hasFailedPlanStateInExecutedToolResults(executedToolResults) {
+    return executedToolResults.some((item) => isPlanToolName(item.toolName) && hasFailedTodos(item.output));
+}
 function normalizePlanToolName(toolName) {
     return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
 }
@@ -199,6 +219,9 @@ function buildExecutionRecoveryEvidence(params) {
         hasNonTodoToolResultEvidence: hasNonTodoToolEvidence(executedToolResults) || projectionState.emittedNonTodoToolResult || projectionState.emittedToolError,
         hasSuccessfulNonTodoToolResultEvidence: hasSuccessfulNonTodoToolEvidence(executedToolResults) || projectionState.emittedSuccessfulNonTodoToolResult,
         hasIncompletePlanState: projectionState.hasIncompletePlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
+        hasFailedPlanState: (projectionState.hasFailedPlanState || hasFailedPlanStateInExecutedToolResults(executedToolResults))
+            && !projectionState.emittedSuccessfulNonTodoToolResult
+            && !hasSuccessfulNonTodoToolEvidence(executedToolResults),
         hasPlanStateEvidence: projectionState.sawPlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
         hasOpenTaskDelegation: projectionState.openTaskDelegations > 0,
         hasFailedTaskDelegation: projectionState.hasFailedTaskDelegation
@@ -213,6 +236,7 @@ function buildExecutionRecoveryEvidence(params) {
 }
 function hasUnresolvedExecution(evidence) {
     return (evidence.hasIncompletePlanState
+        || evidence.hasFailedPlanState
         || evidence.hasFailedTaskDelegation
         || evidence.hasOpenTaskDelegation);
 }
@@ -257,6 +281,14 @@ function buildDeterministicFinalFromStreamToolEvidence(executedToolResults) {
         evidence.length > 0 ? evidence.join("\n\n") : "(no non-planning tool evidence captured)",
     ].join("\n");
 }
+function latestStreamToolErrorRecoveryInstruction(executedToolResults) {
+    const latest = [...executedToolResults].reverse().find((item) => item.isError === true);
+    if (!latest) {
+        return null;
+    }
+    const message = typeof latest.output === "string" ? latest.output : JSON.stringify(latest.output);
+    return resolveToolCallRecoveryInstruction(new Error(message)) ?? REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION;
+}
 function hasUsefulVisibleSynthesis(value) {
     const trimmed = value.trim();
     if (trimmed.length < 80) {
@@ -381,6 +413,9 @@ function createUnresolvedExecutionError(evidence) {
     if (evidence.hasIncompletePlanState) {
         reasons.push("plan state still has unfinished work");
     }
+    if (evidence.hasFailedPlanState) {
+        reasons.push("plan state failed before non-TODO evidence returned");
+    }
     if (evidence.hasFailedTaskDelegation) {
         reasons.push("delegated task failed before surfacing final findings");
     }
@@ -463,7 +498,8 @@ function projectLocalToolExecutionProfileChunks(executedToolResults, prefix) {
 }
 export async function* streamRuntimeExecution(options) {
     let request = buildInvocationRequest(options.binding, options.history, options.input, options.runtimeOptions);
-    if (requiresPlanEvidence(options.binding)) {
+    if (requiresPlanEvidence(options.binding)
+        && options.runtimeOptions.suppressInitialRequiredPlanInstruction !== true) {
         request = withPromptedJsonToolPolicy(appendToolRecoveryInstruction(request, buildInitialRequiredPlanInstruction(options.primaryTools)), "planning");
     }
     let emittedUnsafeStreamSideEffects = false;
@@ -472,6 +508,14 @@ export async function* streamRuntimeExecution(options) {
     const deferredStreamContent = [];
     let sawRetrySafeInvalidToolSelectionError = false;
     const projectionState = createStreamEventProjectionState();
+    if (options.runtimeOptions.externalPlanEvidence === true) {
+        projectionState.sawPlanState = true;
+        yield {
+            kind: "commentary",
+            content: `${options.binding.agent.id}: TODO evidence observed.`,
+            agentId: options.binding.agent.id,
+        };
+    }
     const requestId = options.runtimeOptions.requestId ?? options.sessionId;
     const buildRunnableConfig = (extra) => ({
         ...(options.resolveInvocationConfig
@@ -515,6 +559,7 @@ export async function* streamRuntimeExecution(options) {
             try {
                 const rawStreamInput = buildRawModelMessages(options.binding, options.getSystemPrompt(options.binding), options.history, options.input, options.runtimeOptions.memoryContext);
                 const streamInput = requiresPlanEvidence(options.binding)
+                    && options.runtimeOptions.suppressInitialRequiredPlanInstruction !== true
                     ? withPromptedJsonToolPolicy(rawStreamInput, "planning")
                     : rawStreamInput;
                 stream = await options.withTimeout(() => options.langChainStreamModel.stream(streamInput), computeRemainingTimeoutMs(options.streamDeadlineAt, options.invokeTimeoutMs), "model stream start", "stream");
@@ -699,6 +744,7 @@ export async function* streamRuntimeExecution(options) {
                         const eventContainsNonRetrySafeChunk = projectedChunks.some((chunk) => chunk.kind !== "upstream-event"
                             && chunk.kind !== "content"
                             && !(chunk.kind === "tool-result" && isPlanToolName(chunk.toolName))
+                            && !(chunk.kind === "tool-result" && chunk.isError === true)
                             && !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
                         const hadPriorPlanToolResult = completedPlanToolResultCount > 0;
                         const repeatedPlanToolResultBeforeEvidence = requiresPlanEvidence(options.binding)
@@ -796,10 +842,23 @@ export async function* streamRuntimeExecution(options) {
                                 })
                                 : null;
                             const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
+                            const terminalPrematurePlanCloseRecoveryInstruction = requiresPlanEvidence(options.binding)
+                                && terminalExecutionEvidence.hasPlanStateEvidence
+                                && !terminalExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
+                                ? buildRunEvidenceAfterPlanInstruction(options.primaryTools)
+                                : null;
                             if (!emittedUnsafeStreamSideEffects
-                                && (terminalMissingPlanRecoveryInstruction || terminalDelegationOnlyRecoveryInstruction)) {
+                                && (terminalMissingPlanRecoveryInstruction
+                                    || terminalPrematurePlanCloseRecoveryInstruction
+                                    || terminalDelegationOnlyRecoveryInstruction)) {
                                 earlyStreamRecoveryInstruction =
-                                    terminalMissingPlanRecoveryInstruction ?? terminalDelegationOnlyRecoveryInstruction;
+                                    terminalMissingPlanRecoveryInstruction
+                                        ?? terminalPrematurePlanCloseRecoveryInstruction
+                                        ?? terminalDelegationOnlyRecoveryInstruction;
+                                earlyStreamRecoverySuppressInitialPlan = terminalPrematurePlanCloseRecoveryInstruction !== null;
+                                if (terminalPrematurePlanCloseRecoveryInstruction) {
+                                    earlyStreamExternalPlanEvidenceTools = resolveSingleConfiguredPlanEvidenceTool(options.primaryTools);
+                                }
                                 break;
                             }
                             if (requiresPlanEvidence(options.binding) && hasCompletedPlanWithEvidence(terminalExecutionEvidence)) {
@@ -820,6 +879,7 @@ export async function* streamRuntimeExecution(options) {
                                 && !hasMissingDelegatedExecutionEvidence(terminalExecutionEvidence)
                                 && !hasMissingDelegatedFindings(terminalExecutionEvidence)
                                 && !terminalMissingPlanRecoveryInstruction
+                                && !terminalPrematurePlanCloseRecoveryInstruction
                                 && !terminalDelegationOnlyRecoveryInstruction) {
                                 if (deferredStreamContent.length > 0) {
                                     yield* flushDeferredStreamContent();
@@ -842,6 +902,35 @@ export async function* streamRuntimeExecution(options) {
                         const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
                             ? recovered.metadata.executedToolResults
                             : [];
+                        const originalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
+                        const recoveredExecutionEvidence = buildExecutionRecoveryEvidence({
+                            projectionState: createStreamEventProjectionState(),
+                            executedToolResults: recoveredToolResults,
+                        });
+                        const recoveredCarriesExecutionEvidence = recoveredExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
+                            || recoveredExecutionEvidence.hasOpenTaskDelegation
+                            || recoveredExecutionEvidence.hasDelegatedExecutionToolEvidence;
+                        const recoveredHasUnresolvedExecution = hasUnresolvedExecution(recoveredExecutionEvidence)
+                            || hasMissingDelegatedExecutionEvidence(recoveredExecutionEvidence)
+                            || hasMissingDelegatedFindings(recoveredExecutionEvidence)
+                            || (!recoveredCarriesExecutionEvidence
+                                && (hasUnresolvedExecution(originalExecutionEvidence)
+                                    || hasMissingDelegatedExecutionEvidence(originalExecutionEvidence)
+                                    || hasMissingDelegatedFindings(originalExecutionEvidence)));
+                        if (recoveredHasUnresolvedExecution) {
+                            const effectiveRecoveryEvidence = recoveredCarriesExecutionEvidence
+                                ? recoveredExecutionEvidence
+                                : {
+                                    ...recoveredExecutionEvidence,
+                                    hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
+                                    hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
+                                    hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
+                                    hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
+                                    hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
+                                    hasDelegatedExecutionToolEvidence: originalExecutionEvidence.hasDelegatedExecutionToolEvidence,
+                                };
+                            throw createUnresolvedExecutionError(effectiveRecoveryEvidence);
+                        }
                         for (const toolResult of recoveredToolResults) {
                             yield {
                                 kind: "tool-result",
@@ -931,6 +1020,9 @@ export async function* streamRuntimeExecution(options) {
             const streamedRuntimeFailureRecoveryInstruction = projectionState.emittedOutput
                 ? resolveStreamedRuntimeFailureRecoveryInstruction(projectionState.emittedOutput, streamedExecutionEvidence)
                 : null;
+            const streamedToolErrorRecoveryInstruction = !emittedUnsafeStreamSideEffects
+                ? latestStreamToolErrorRecoveryInstruction(streamedToolResults)
+                : null;
             const missingPlanRecoveryInstruction = !hasUnresolvedExecution(streamedExecutionEvidence) && !delegatedExecutionRecoveryInstruction
                 ? resolveMissingPlanRecoveryInstruction({
                     request,
@@ -943,7 +1035,8 @@ export async function* streamRuntimeExecution(options) {
                 : null;
             const retryInstruction = !emittedUnsafeStreamSideEffects && sawRetrySafeInvalidToolSelectionError
                 ? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION
-                : delegatedExecutionRecoveryInstruction
+                : streamedToolErrorRecoveryInstruction
+                    ?? delegatedExecutionRecoveryInstruction
                     ?? streamedIncompletePlanRecoveryInstruction
                     ?? streamedPrematurePlanCloseRecoveryInstruction
                     ?? streamedRuntimeFailureRecoveryInstruction
@@ -970,7 +1063,7 @@ export async function* streamRuntimeExecution(options) {
                 }
                 const retriedVisibleOutput = retried.output ? toVisibleContent(retried.output) : "";
                 const retriedIsDelegationFailureFinalReport = isDelegationFailureFinalReport(originalExecutionEvidence, executedToolResults, retriedVisibleOutput);
-                const retriedCarriesExecutionEvidence = retriedExecutionEvidence.hasToolResultEvidence
+                const retriedCarriesExecutionEvidence = retriedExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
                     || retriedExecutionEvidence.hasOpenTaskDelegation
                     || retriedExecutionEvidence.hasDelegatedExecutionToolEvidence;
                 const retriedHasUnresolvedExecution = !retriedIsDelegationFailureFinalReport
@@ -986,6 +1079,7 @@ export async function* streamRuntimeExecution(options) {
                     : {
                         ...retriedExecutionEvidence,
                         hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
+                        hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
                         hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
                         hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
                         hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
@@ -1227,6 +1321,7 @@ export async function* streamRuntimeExecution(options) {
                 : {
                     ...recoveredExecutionEvidence,
                     hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
+                    hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
                     hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
                     hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
                     hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,

package/dist/runtime/adapter/invocation-result.js CHANGED Viewed

@@ -39,7 +39,8 @@ function hasIncompleteStateSnapshotPlan(stateSnapshot) {
 function hasStateSnapshotPlan(stateSnapshot) {
     return typeof stateSnapshot === "object"
         && stateSnapshot !== null
-        && Array.isArray(stateSnapshot.todos);
+        && Array.isArray(stateSnapshot.todos)
+        && (stateSnapshot.todos).length > 0;
 }
 function normalizePlanToolName(toolName) {
     return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
@@ -54,7 +55,19 @@ function isPlanToolName(toolName) {
         || normalized === "call_read_todos";
 }
 function hasPlanToolEvidence(executedToolResults) {
-    return executedToolResults.some((item) => isPlanToolName(item.toolName));
+    return executedToolResults.some((item) => {
+        if (!isPlanToolName(item.toolName)) {
+            return false;
+        }
+        const output = typeof item.output === "object" && item.output !== null ? item.output : null;
+        const summaryContainer = typeof output?.summary === "object" && output.summary !== null
+            ? output.summary
+            : null;
+        const counts = typeof summaryContainer?.summary === "object" && summaryContainer.summary !== null
+            ? summaryContainer.summary
+            : null;
+        return !(typeof counts?.total === "number" && counts.total <= 0);
+    });
 }
 function hasExecutionToolEvidence(executedToolResults) {
     return executedToolResults.some((item) => item.isError !== true && !isPlanToolName(item.toolName));

package/dist/runtime/adapter/local-tool-invocation.js CHANGED Viewed

@@ -13,8 +13,10 @@ const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already availab
 const DEFAULT_MAX_TOOL_ITERATIONS = 10_000;
 const MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS = 2;
 const MAX_REPEATED_PLAN_ONLY_AFTER_PLAN = 2;
+const MAX_REPEATED_INVALID_EXTERNAL_PLAN_EVIDENCE_SELECTION = 2;
 const REQUIRED_PLAN_CONTRACT_MARKER = "This agent has a required visible planning contract.";
 const INITIAL_WRITE_TODOS_MARKER = "Your first action for this request must be write_todos";
+const REQUIRED_SINGLE_EXTERNAL_PLAN_EVIDENCE_TOOL_INSTRUCTION = "The TODO board already exists. The next action must be exactly one non-planning evidence tool call. Do not call write_todos or read_todos, and do not call multiple tools in the same response.";
 function resolveSingleBootstrapEvidenceTool(primaryTools) {
     const evidenceTools = primaryTools
         .map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
@@ -108,7 +110,11 @@ function readPlanStateSummary(output) {
         return null;
     }
     const typedCounts = counts;
+    if (typeof typedCounts.total === "number" && typedCounts.total <= 0) {
+        return null;
+    }
     return {
+        ...(typeof typedCounts.total === "number" ? { total: typedCounts.total } : {}),
         pending: typeof typedCounts.pending === "number" ? typedCounts.pending : 0,
         inProgress: typeof typedCounts.inProgress === "number" ? typedCounts.inProgress : 0,
     };
@@ -138,6 +144,10 @@ function isPlanToolName(toolName) {
         || normalized === "call_write_todos"
         || normalized === "call_read_todos";
 }
+function isPlanToolCall(toolCall, toolNameMapping, primaryTools) {
+    const resolvedToolName = resolveModelFacingToolName(String(toolCall.name ?? ""), toolNameMapping, primaryTools);
+    return isPlanToolName(toolCall.name) || isPlanToolName(resolvedToolName);
+}
 function isFallbackTodoCompletionToolCall(toolCall) {
     return typeof toolCall.id === "string"
         && toolCall.id.startsWith("fallback-complete-")
@@ -218,7 +228,11 @@ function extractLatestUserInput(request) {
     const messages = Array.isArray(typedRequest.messages) ? typedRequest.messages : [];
     for (let index = messages.length - 1; index >= 0; index -= 1) {
         const candidate = messages[index];
-        if (candidate?.role !== "user" || typeof candidate.content !== "string") {
+        const role = typeof candidate?.role === "string" ? candidate.role.trim().toLowerCase() : "";
+        const messageType = typeof candidate?._getType === "function" ? String(candidate._getType()).trim().toLowerCase() : "";
+        const constructorType = Array.isArray(candidate?.id) ? String(candidate.id.at(-1)).trim().toLowerCase() : "";
+        const isUserMessage = role === "user" || role === "human" || messageType === "human" || constructorType === "humanmessage";
+        if (!isUserMessage || typeof candidate?.content !== "string") {
             continue;
         }
         const normalized = candidate.content.trim();
@@ -280,6 +294,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
     let lastRecoveryExecutedCount = -1;
     let repeatedRecoveryWithoutProgress = 0;
     let repeatedPlanOnlyAfterPlan = 0;
+    let repeatedInvalidExternalPlanEvidenceSelection = 0;
     let pendingResult;
     let result;
     const toolCatalog = new Map();
@@ -418,6 +433,26 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
             activeRequest = appendToolRecoveryInstruction(activeRequest, missingPlanRecoveryInstruction);
             continue;
         }
+        if (requiresPlanEvidence(binding)
+            && externalPlanEvidence === true
+            && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
+            && !hasNonTodoToolEvidence(executedToolResults)
+            && toolCalls.length > 0
+            && (toolCalls.length !== 1 || isPlanToolCall(toolCalls[0], toolNameMapping, primaryTools))) {
+            repeatedInvalidExternalPlanEvidenceSelection += 1;
+            if (repeatedInvalidExternalPlanEvidenceSelection >= MAX_REPEATED_INVALID_EXTERNAL_PLAN_EVIDENCE_SELECTION) {
+                throw createToolLoopError({
+                    reason: "model did not select exactly one non-planning evidence tool during delegated plan evidence recovery",
+                    iteration,
+                    maxToolIterations,
+                    toolCalls,
+                    executedToolResults,
+                });
+            }
+            activeRequest = appendToolRecoveryInstruction(activeRequest, REQUIRED_SINGLE_EXTERNAL_PLAN_EVIDENCE_TOOL_INSTRUCTION);
+            pendingResult = undefined;
+            continue;
+        }
         if (requiresPlanEvidence(binding)
             && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
             && !hasNonTodoToolEvidence(executedToolResults)
@@ -425,6 +460,12 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
             && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))) {
             repeatedPlanOnlyAfterPlan += 1;
             if (repeatedPlanOnlyAfterPlan >= MAX_REPEATED_PLAN_ONLY_AFTER_PLAN) {
+                if (hasNonTodoToolEvidence(executedToolResults)) {
+                    return {
+                        result: buildDeterministicFinalFromToolEvidence(executedToolResults),
+                        executedToolResults,
+                    };
+                }
                 throw createToolLoopError({
                     reason: "model repeatedly selected only planning tools after the todo board already existed and before any non-planning evidence tool returned",
                     iteration,
@@ -434,6 +475,12 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
                 });
             }
             if (iteration + 1 === maxToolIterations) {
+                if (hasNonTodoToolEvidence(executedToolResults)) {
+                    return {
+                        result: buildDeterministicFinalFromToolEvidence(executedToolResults),
+                        executedToolResults,
+                    };
+                }
                 throw createToolLoopError({
                     reason: "maximum iterations reached",
                     iteration,
@@ -448,6 +495,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
         }
         repeatedRecoveryWithoutProgress = 0;
         repeatedPlanOnlyAfterPlan = 0;
+        repeatedInvalidExternalPlanEvidenceSelection = 0;
         const canReplayToolCalls = usedExternalPlanEvidenceToolThisIteration
             || canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
         debugLocalToolReplay({

package/dist/runtime/adapter/runtime-adapter-support.d.ts CHANGED Viewed

@@ -5,7 +5,6 @@ export type BuiltinTodoSnapshot = {
 };
 export declare function truncateLines(lines: string[], maxChars?: number): string;
 export declare function summarizeBuiltinWriteTodosArgs(args: Record<string, unknown>): BuiltinTodoSnapshot;
-export declare function isLowSignalTodoContent(content: string): boolean;
 export declare function formatBuiltinTodoSnapshot(snapshot: BuiltinTodoSnapshot): string;
 export declare function buildRequestPlanState(input: {
     sessionId: string;

package/dist/runtime/adapter/runtime-adapter-support.js CHANGED Viewed

@@ -69,6 +69,13 @@ function readTodoContent(todo) {
     }
     return "";
 }
+function isLowSignalTodoContent(content) {
+    const normalized = content.trim().toLowerCase();
+    if (!normalized) {
+        return true;
+    }
+    return /^\d+$/.test(normalized) || /^step\s*\d+$/.test(normalized) || /^todo\s*\d+$/.test(normalized);
+}
 function normalizeTodoStatus(value) {
     if (typeof value !== "string") {
         return "pending";
@@ -90,6 +97,9 @@ export function summarizeBuiltinWriteTodosArgs(args) {
             return [];
         }
         const content = readTodoContent(todo);
+        if (isLowSignalTodoContent(content)) {
+            return [];
+        }
         const status = normalizeTodoStatus(todo.status);
         const metadata = isRecord(todo.metadata) ? todo.metadata : undefined;
         return content ? [{
@@ -119,13 +129,6 @@ export function summarizeBuiltinWriteTodosArgs(args) {
         summary,
     };
 }
-export function isLowSignalTodoContent(content) {
-    const normalized = content.trim().toLowerCase();
-    if (!normalized) {
-        return true;
-    }
-    return /^\d+$/.test(normalized) || /^step\s*\d+$/.test(normalized) || /^todo\s*\d+$/.test(normalized);
-}
 export function formatBuiltinTodoSnapshot(snapshot) {
     if (snapshot.summary.total === 0) {
         return "No todos tracked.";

package/dist/runtime/adapter/stream-event-projection.d.ts CHANGED Viewed

@@ -17,6 +17,7 @@ export type StreamEventProjectionState = {
     emittedDelegatedTerminalOutput: boolean;
     sawPlanState: boolean;
     hasIncompletePlanState: boolean;
+    hasFailedPlanState: boolean;
     openTaskDelegations: number;
     openToolCapableTaskDelegations: number;
     taskDelegationHasToolsStack: boolean[];

package/dist/runtime/adapter/stream-event-projection.js CHANGED Viewed

@@ -1,6 +1,5 @@
 import { extractToolFallbackContext, extractVisibleOutput, readTextContent, sanitizeVisibleText } from "../parsing/output-parsing.js";
 import { salvageFunctionLikeToolCall } from "../parsing/output-tool-args.js";
-import { isLowSignalTodoContent, summarizeBuiltinWriteTodosArgs } from "./runtime-adapter-support.js";
 import { computeIncrementalOutput, extractInterruptPayload, extractReasoningStreamOutput, sanitizeRetainedUpstreamEvent, extractStateStreamOutput, extractTerminalStreamOutput, extractToolResult, extractVisibleStreamOutput, normalizeTerminalOutputKey, } from "../parsing/stream-event-parsing.js";
 import { resolveModelFacingToolName } from "./tool/tool-name-mapping.js";
 export function createStreamEventProjectionState() {
@@ -20,6 +19,7 @@ export function createStreamEventProjectionState() {
         emittedDelegatedTerminalOutput: false,
         sawPlanState: false,
         hasIncompletePlanState: false,
+        hasFailedPlanState: false,
         openTaskDelegations: 0,
         openToolCapableTaskDelegations: 0,
         taskDelegationHasToolsStack: [],
@@ -141,6 +141,9 @@ function readSummaryCounts(summary) {
         return null;
     }
     const typed = summary;
+    if (typeof typed.total === "number" && typed.total <= 0) {
+        return null;
+    }
     const hasAnyCountField = typeof typed.pending === "number"
         || typeof typed.inProgress === "number";
     if (!hasAnyCountField) {
@@ -151,10 +154,23 @@ function readSummaryCounts(summary) {
         inProgress: typeof typed.inProgress === "number" ? typed.inProgress : 0,
     };
 }
+function readSummaryFailureCount(summary) {
+    if (typeof summary !== "object" || summary === null) {
+        return null;
+    }
+    const typed = summary;
+    if (typeof typed.total === "number" && typed.total <= 0) {
+        return null;
+    }
+    return typeof typed.failed === "number" ? typed.failed : null;
+}
 function hasIncompleteTodosArray(value) {
     if (!Array.isArray(value)) {
         return null;
     }
+    if (value.length === 0) {
+        return null;
+    }
     return value.some((item) => {
         if (typeof item !== "object" || item === null) {
             return false;
@@ -163,6 +179,20 @@ function hasIncompleteTodosArray(value) {
         return status === "pending" || status === "in_progress";
     });
 }
+function hasFailedTodosArray(value) {
+    if (!Array.isArray(value)) {
+        return null;
+    }
+    if (value.length === 0) {
+        return null;
+    }
+    return value.some((item) => {
+        if (typeof item !== "object" || item === null) {
+            return false;
+        }
+        return item.status === "failed";
+    });
+}
 function getPlanStateCompleteness(value) {
     if (typeof value !== "object" || value === null) {
         return null;
@@ -202,6 +232,45 @@ function getPlanStateCompleteness(value) {
     }
     return null;
 }
+function getPlanStateFailure(value) {
+    if (typeof value !== "object" || value === null) {
+        return null;
+    }
+    const typed = value;
+    const summaryFailed = readSummaryFailureCount(typed.summary);
+    if (summaryFailed !== null) {
+        return summaryFailed > 0;
+    }
+    if (typeof typed.summary === "object" && typed.summary !== null) {
+        const nestedSummary = getPlanStateFailure(typed.summary);
+        if (nestedSummary !== null) {
+            return nestedSummary;
+        }
+    }
+    const directTodos = hasFailedTodosArray(typed.todos);
+    if (directTodos !== null) {
+        return directTodos;
+    }
+    if (typeof typed.update === "object" && typed.update !== null) {
+        const nestedTodos = hasFailedTodosArray(typed.update.todos);
+        if (nestedTodos !== null) {
+            return nestedTodos;
+        }
+    }
+    if (typeof typed.output === "object" && typed.output !== null) {
+        const nestedOutput = getPlanStateFailure(typed.output);
+        if (nestedOutput !== null) {
+            return nestedOutput;
+        }
+    }
+    if (typeof typed.data === "object" && typed.data !== null) {
+        const nestedData = getPlanStateFailure(typed.data);
+        if (nestedData !== null) {
+            return nestedData;
+        }
+    }
+    return null;
+}
 function parseMaybeJsonString(value) {
     const trimmed = value.trim();
     if (!trimmed || (!trimmed.startsWith("{") && !trimmed.startsWith("["))) {
@@ -394,12 +463,6 @@ function isPlanToolName(toolName) {
         || normalized === "call_write_todos"
         || normalized === "call_read_todos";
 }
-function isWriteTodosToolName(toolName) {
-    const normalized = normalizePlanToolName(toolName);
-    return normalized === "write_todos"
-        || normalized === "tool_call_write_todos"
-        || normalized === "call_write_todos";
-}
 function extractTodoToolStart(event) {
     if (typeof event !== "object" || event === null) {
         return null;
@@ -413,15 +476,6 @@ function extractTodoToolStart(event) {
         return null;
     }
     const input = unwrapPossibleToolInput(typed.data?.input);
-    if (isWriteTodosToolName(toolName) && typeof input === "object" && input !== null && !Array.isArray(input)) {
-        const summary = summarizeBuiltinWriteTodosArgs(input);
-        if (summary.summary.total === 0) {
-            throw new Error("Error invoking tool 'write_todos' with kwargs {\"todos\":[]} with error: Error: Initial write_todos call cannot use an empty todo list. Send the concrete task steps with both content and status.");
-        }
-        if (summary.items.every((item) => isLowSignalTodoContent(item.content))) {
-            throw new Error("Error invoking tool 'write_todos' with placeholder todo content with error: Error: Initial write_todos call must use descriptive task content. Do not use placeholder entries like '1', '2', or 'step 1'.");
-        }
-    }
     return { toolName, input };
 }
 export function projectRuntimeStreamEvent(params) {
@@ -445,6 +499,11 @@ export function projectRuntimeStreamEvent(params) {
         state.sawPlanState = true;
         state.hasIncompletePlanState = planStateCompleteness;
     }
+    const planStateFailure = getPlanStateFailure(event);
+    if (planStateFailure !== null) {
+        state.sawPlanState = true;
+        state.hasFailedPlanState = planStateFailure;
+    }
     const eventAgentId = typeof event === "object" && event !== null && typeof event.agentId === "string"
         ? event.agentId.trim()
         : "";

package/dist/runtime/adapter/tool/builtin-middleware-tools.js CHANGED Viewed

@@ -2,7 +2,7 @@ import path from "node:path";
 import { z } from "zod";
 import { isSandboxBackend } from "deepagents";
 import { isRecord } from "../../../utils/object.js";
-import { formatBuiltinTodoSnapshot, isLowSignalTodoContent, summarizeBuiltinWriteTodosArgs, truncateLines } from "../runtime-adapter-support.js";
+import { formatBuiltinTodoSnapshot, summarizeBuiltinWriteTodosArgs, truncateLines } from "../runtime-adapter-support.js";
 import { maybePersistLargeToolOutput, resolveToolRuntimeContext } from "./tool-output-artifacts.js";
 function buildTaskToolDescription(subagents) {
     const lines = [
@@ -272,14 +272,6 @@ export async function createBuiltinMiddlewareTools(backend, options) {
         invoke: async (input) => {
             const args = isRecord(input) ? input : {};
             const summary = summarizeBuiltinWriteTodosArgs(args);
-            if (summary.summary.total === 0 && todoSnapshot.summary.total === 0) {
-                throw new Error("Initial write_todos call cannot use an empty todo list. Send the concrete task steps with both content and status.");
-            }
-            if (summary.summary.total > 0 &&
-                todoSnapshot.summary.total === 0 &&
-                summary.items.every((item) => isLowSignalTodoContent(item.content))) {
-                throw new Error("Initial write_todos call must use descriptive task content. Do not use placeholder entries like '1', '2', or 'step 1'.");
-            }
             todoSnapshot = summary;
             return {
                 ok: true,