npm - @botbotgo/agent-harness - Versions diffs - 0.0.418 → 0.0.420 - Mend

@botbotgo/agent-harness 0.0.418 → 0.0.420

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/dist/cli/chat-interactive.js +1 -1
package/dist/cli/chat-stream.js +9 -1
package/dist/package-version.d.ts +2 -2
package/dist/package-version.js +2 -2
package/dist/runtime/adapter/compat/openai-compatible.js +12 -0
package/dist/runtime/adapter/flow/invocation-flow.d.ts +2 -0
package/dist/runtime/adapter/flow/invocation-flow.js +13 -5
package/dist/runtime/adapter/flow/invoke-runtime.d.ts +1 -0
package/dist/runtime/adapter/flow/invoke-runtime.js +1 -0
package/dist/runtime/adapter/flow/stream-runtime.d.ts +4 -0
package/dist/runtime/adapter/flow/stream-runtime.js +177 -14
package/dist/runtime/adapter/invocation-result.js +17 -6
package/dist/runtime/adapter/local-tool-invocation.d.ts +2 -1
package/dist/runtime/adapter/local-tool-invocation.js +268 -21
package/dist/runtime/adapter/model/model-providers.js +269 -58
package/dist/runtime/adapter/model/prompted-json-tool-call-capture.d.ts +9 -0
package/dist/runtime/adapter/model/prompted-json-tool-call-capture.js +40 -0
package/dist/runtime/adapter/runtime-adapter-support.js +58 -12
package/dist/runtime/adapter/runtime-shell.js +3 -2
package/dist/runtime/adapter/stream-event-projection.js +22 -5
package/dist/runtime/adapter/tool/tool-arguments.js +157 -67
package/dist/runtime/adapter/tool/tool-replay.js +0 -4
package/dist/runtime/agent-runtime-adapter.d.ts +3 -0
package/dist/runtime/agent-runtime-adapter.js +217 -73
package/dist/runtime/harness/run/stream-run.js +31 -3
package/dist/runtime/parsing/output-tool-args.js +108 -0
package/dist/workspace/resource-compilers.js +17 -4
package/package.json +1 -1

package/dist/cli/chat-interactive.js CHANGED Viewed

@@ -166,7 +166,7 @@ export async function runInteractiveChatLoop(input) {
                 }
                 activeSessionId = chatCommand.arg;
                 latestRequestId = session.latestRequestId;
-                activeAgentId = session.currentAgentId ?? session.entryAgentId ?? activeAgentId;
+                activeAgentId = session.entryAgentId ?? session.currentAgentId ?? activeAgentId;
                 input.stdout(`session=${activeSessionId}\n`);
                 continue;
             }

package/dist/cli/chat-stream.js CHANGED Viewed

@@ -10,6 +10,7 @@ export async function streamChatMessage(input) {
     let latestSessionId = input.sessionId;
     let latestRequestId;
     let latestAgentId = input.agentId;
+    let entryAgentId = input.agentId;
     let wroteContent = false;
     let wroteRenderableBlocks = false;
     let renderedAssistantOutput = "";
@@ -348,6 +349,7 @@ export async function streamChatMessage(input) {
         writeChatStderr(lines.join(""));
     };
     const renderContentBlocks = (contentBlocks, agentId) => {
+        entryAgentId ??= agentId;
         latestAgentId = agentId || latestAgentId;
         const rendered = contentBlocks
             .map((block) => {
@@ -373,6 +375,7 @@ export async function streamChatMessage(input) {
             markRuntimeProgress();
             latestSessionId = snapshot.sessionId || latestSessionId;
             latestRequestId = snapshot.requestId || latestRequestId;
+            entryAgentId ??= snapshot.agentId;
             latestAgentId = snapshot.agentId || latestAgentId;
             latestSnapshot = snapshot;
             firstSnapshotAt ??= Date.now();
@@ -421,16 +424,19 @@ export async function streamChatMessage(input) {
             latestRequestId = delta.requestId || latestRequestId;
             firstDataAt ??= Date.now();
             if (delta.type === "output.text.delta") {
+                entryAgentId ??= delta.agentId;
                 latestAgentId = delta.agentId || latestAgentId;
                 writeAssistantOutput(delta.text);
                 return;
             }
             if (delta.type === "output.content-blocks") {
+                entryAgentId ??= delta.agentId;
                 suspendRequestTreeRendering();
                 renderContentBlocks(delta.contentBlocks, delta.agentId);
                 return;
             }
             if (delta.type === "plan.step") {
+                entryAgentId ??= delta.agentId;
                 latestAgentId = delta.agentId || latestAgentId;
                 const item = delta.item;
                 const status = typeof item?.status === "string" ? item.status : "unknown";
@@ -441,6 +447,7 @@ export async function streamChatMessage(input) {
                 return;
             }
             if (delta.type === "tool.result") {
+                entryAgentId ??= delta.agentId;
                 latestAgentId = delta.agentId || latestAgentId;
                 if ((input.showToolResults ?? true) && !input.requestEvents) {
                     writeChatStderr(`\n[${formatPerfClock(Date.now())} +${formatElapsed(Date.now())}]${formatAgentProgressLabel(delta.agentId)} [tool:${delta.toolName}] ${summarizeChatToolResult(delta.output, delta.isError === true)}${delta.isError ? " (error)" : ""}\n`);
@@ -448,6 +455,7 @@ export async function streamChatMessage(input) {
                 return;
             }
             if (delta.type === "progress.commentary") {
+                entryAgentId ??= delta.agentId;
                 latestAgentId = delta.agentId || latestAgentId;
                 if (wroteContent || wroteRenderableBlocks) {
                     return;
@@ -500,5 +508,5 @@ export async function streamChatMessage(input) {
         writeChatStdout("\n");
     }
     await Promise.allSettled([stdoutWriteChain, stderrWriteChain]);
-    return { sessionId: latestSessionId, requestId: latestRequestId, agentId: latestAgentId };
+    return { sessionId: latestSessionId, requestId: latestRequestId, agentId: entryAgentId ?? latestAgentId };
 }

package/dist/package-version.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export declare const AGENT_HARNESS_VERSION = "0.0.418";
-export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-02";
+export declare const AGENT_HARNESS_VERSION = "0.0.420";
+export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-03";

package/dist/package-version.js CHANGED Viewed

@@ -1,2 +1,2 @@
-export const AGENT_HARNESS_VERSION = "0.0.418";
-export const AGENT_HARNESS_RELEASE_DATE = "2026-05-02";
+export const AGENT_HARNESS_VERSION = "0.0.420";
+export const AGENT_HARNESS_RELEASE_DATE = "2026-05-03";

package/dist/runtime/adapter/compat/openai-compatible.js CHANGED Viewed

@@ -27,6 +27,7 @@ export function buildAuthOmittingFetch(baseFetch = fetch) {
 export function normalizeOpenAICompatibleInit(init) {
     const normalized = { ...init };
     const configuration = asObject(init.configuration) ?? {};
+    const modelKwargs = asObject(init.modelKwargs) ?? {};
     const baseUrl = typeof init.baseUrl === "string" && init.baseUrl.trim() ? init.baseUrl.trim() : undefined;
     const omitAuthHeader = init.omitAuthHeader === true || isPlaceholderApiKey(init.apiKey);
     const nextConfiguration = { ...configuration };
@@ -36,8 +37,19 @@ export function normalizeOpenAICompatibleInit(init) {
     if (omitAuthHeader) {
         nextConfiguration.fetch = buildAuthOmittingFetch(typeof configuration.fetch === "function" ? configuration.fetch : fetch);
     }
+    if (typeof init.numPredict === "number" && typeof normalized.maxTokens !== "number") {
+        normalized.maxTokens = init.numPredict;
+    }
+    if (typeof init.numCtx === "number" && typeof modelKwargs.num_ctx !== "number") {
+        normalized.modelKwargs = {
+            ...modelKwargs,
+            num_ctx: init.numCtx,
+        };
+    }
     normalized.configuration = nextConfiguration;
     delete normalized.baseUrl;
     delete normalized.omitAuthHeader;
+    delete normalized.numPredict;
+    delete normalized.numCtx;
     return normalized;
 }

package/dist/runtime/adapter/flow/invocation-flow.d.ts CHANGED Viewed

@@ -14,6 +14,8 @@ export declare function executeRequestInvocation(options: {
         files?: Record<string, unknown>;
         memoryContext?: string;
         toolRuntimeContext?: Record<string, unknown>;
+        suppressInitialRequiredPlanInstruction?: boolean;
+        externalPlanEvidence?: boolean;
     };
     resolveTools: (tools: CompiledTool[], binding?: CompiledAgentBinding) => unknown[];
     getToolNameMapping: (binding: CompiledAgentBinding) => ToolNameMapping;

package/dist/runtime/adapter/flow/invocation-flow.js CHANGED Viewed

@@ -50,11 +50,17 @@ function isDelegationOnlyBinding(binding) {
 function hasTaskDelegationEvidence(executedToolResults) {
     return executedToolResults.some((item) => item.toolName === "task");
 }
+function normalizePlanToolName(toolName) {
+    return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
+}
 function isPlanToolName(toolName) {
-    return toolName === "write_todos"
-        || toolName === "read_todos"
-        || toolName === "tool_call_write_todos"
-        || toolName === "tool_call_read_todos";
+    const normalized = normalizePlanToolName(toolName);
+    return normalized === "write_todos"
+        || normalized === "read_todos"
+        || normalized === "tool_call_write_todos"
+        || normalized === "tool_call_read_todos"
+        || normalized === "call_write_todos"
+        || normalized === "call_read_todos";
 }
 function hasPlanToolEvidence(executedToolResults) {
     return executedToolResults.some((item) => isPlanToolName(item.toolName));
@@ -303,7 +309,8 @@ export async function executeRequestInvocation(options) {
         ? buildInvocationRequest(options.binding, history, options.input, invokeOptions)
         : new Command({ resume: options.resumePayload });
     if (options.resumePayload === undefined
-        && options.binding.harnessRuntime.executionContract?.requiresPlan === true) {
+        && options.binding.harnessRuntime.executionContract?.requiresPlan === true
+        && invokeOptions.suppressInitialRequiredPlanInstruction !== true) {
         request = appendToolRecoveryInstruction(request, INITIAL_REQUIRED_PLAN_INSTRUCTION);
     }
     const { primaryTools, toolNameMapping, executableTools, defersToUpstreamHitlExecution, } = buildBindingToolExecutionContext({
@@ -331,6 +338,7 @@ export async function executeRequestInvocation(options) {
         builtinExecutableTools: builtinExecutableTools,
         callRuntimeWithToolParseRecovery: options.callRuntimeWithToolParseRecovery,
         toolRuntimeContext: invokeOptions.toolRuntimeContext,
+        externalPlanEvidence: invokeOptions.externalPlanEvidence,
     });
     let localOrUpstreamInvocation = await invokeOnce(request);
     if (options.resumePayload === undefined

package/dist/runtime/adapter/flow/invoke-runtime.d.ts CHANGED Viewed

@@ -17,6 +17,7 @@ export declare function invokeRuntimeWithLocalTools(options: {
     builtinExecutableTools: Map<string, ExecutableTool>;
     callRuntimeWithToolParseRecovery: (request: unknown) => Promise<Record<string, unknown>>;
     toolRuntimeContext?: Record<string, unknown>;
+    externalPlanEvidence?: boolean;
 }): Promise<{
     result: Record<string, unknown>;
     executedToolResults: ExecutedToolResult[];

package/dist/runtime/adapter/flow/invoke-runtime.js CHANGED Viewed

@@ -15,5 +15,6 @@ export async function invokeRuntimeWithLocalTools(options) {
         builtinExecutableTools: options.builtinExecutableTools,
         callRuntimeWithToolParseRecovery: options.callRuntimeWithToolParseRecovery,
         toolRuntimeContext: options.toolRuntimeContext,
+        externalPlanEvidence: options.externalPlanEvidence,
     });
 }

package/dist/runtime/adapter/flow/stream-runtime.d.ts CHANGED Viewed

@@ -21,6 +21,8 @@ export declare function streamRuntimeExecution(options: {
         memoryContext?: string;
         profiling?: boolean;
         toolRuntimeContext?: Record<string, unknown>;
+        suppressInitialRequiredPlanInstruction?: boolean;
+        externalPlanEvidence?: boolean;
     };
     primaryTools: CompiledTool[];
     toolNameMapping: ToolNameMapping;
@@ -47,6 +49,8 @@ export declare function streamRuntimeExecution(options: {
         files?: Record<string, unknown>;
         memoryContext?: string;
         toolRuntimeContext?: Record<string, unknown>;
+        suppressInitialRequiredPlanInstruction?: boolean;
+        externalPlanEvidence?: boolean;
     }) => Promise<{
         output: string;
         metadata?: Record<string, unknown>;

package/dist/runtime/adapter/flow/stream-runtime.js CHANGED Viewed

@@ -26,13 +26,43 @@ const RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION = [
     "Your next action must be exactly one non-TODO evidence tool call selected from the available tool descriptions and schemas.",
     "After that evidence tool returns, update the todo board and then provide the final answer required by the agent response format.",
 ].join("\n");
+function readPrimaryToolName(tool) {
+    return typeof tool.name === "string" ? tool.name.trim() : "";
+}
+function buildRunEvidenceAfterPlanInstruction(primaryTools) {
+    const toolNames = primaryTools
+        .map(readPrimaryToolName)
+        .filter((name) => name.length > 0 && !isPlanToolName(name));
+    if (toolNames.length === 0) {
+        return RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION;
+    }
+    return [
+        RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION,
+        "",
+        `Available non-planning tool names: ${toolNames.join(", ")}.`,
+    ].join("\n");
+}
 const INITIAL_REQUIRED_PLAN_INSTRUCTION = [
     "This agent has a required visible planning contract.",
     "Your first action for this request must be write_todos with concrete task steps and statuses.",
     "Do not call any domain/evidence tool and do not provide a final answer before the initial write_todos call succeeds.",
+    "After write_todos succeeds, do not call write_todos or read_todos again until one non-planning evidence tool returns.",
     "Do not use placeholders like '1', '2', '3', 'step 1', or generic labels. Each todo must name the concrete work it represents.",
     "After each evidence step, update the todo board. Before the final answer, close every todo as completed or failed.",
 ].join("\n");
+function buildInitialRequiredPlanInstruction(primaryTools) {
+    const toolNames = primaryTools
+        .map(readPrimaryToolName)
+        .filter((name) => name.length > 0 && !isPlanToolName(name));
+    if (toolNames.length === 0) {
+        return INITIAL_REQUIRED_PLAN_INSTRUCTION;
+    }
+    return [
+        INITIAL_REQUIRED_PLAN_INSTRUCTION,
+        "",
+        `After the initial todo board, select the next non-planning tool from these declared tool names: ${toolNames.join(", ")}.`,
+    ].join("\n");
+}
 function toVisibleContent(value) {
     const extracted = extractVisibleOutput(value);
     return extracted ? sanitizeVisibleText(extracted) : "";
@@ -98,11 +128,17 @@ function hasIncompletePlanOutput(value) {
     }
     return null;
 }
+function normalizePlanToolName(toolName) {
+    return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
+}
 function isPlanToolName(toolName) {
-    return toolName === "write_todos"
-        || toolName === "read_todos"
-        || toolName === "tool_call_write_todos"
-        || toolName === "tool_call_read_todos";
+    const normalized = normalizePlanToolName(toolName);
+    return normalized === "write_todos"
+        || normalized === "read_todos"
+        || normalized === "tool_call_write_todos"
+        || normalized === "tool_call_read_todos"
+        || normalized === "call_write_todos"
+        || normalized === "call_read_todos";
 }
 function isCompletedPlanToolResultChunk(chunk) {
     if (chunk.kind !== "tool-result" || !isPlanToolName(chunk.toolName)) {
@@ -122,6 +158,12 @@ function hasSuccessfulTaskToolEvidence(executedToolResults) {
 function requiresPlanEvidence(binding) {
     return binding.harnessRuntime?.executionContract?.requiresPlan === true;
 }
+function withSuppressedInitialRequiredPlanInstruction(options) {
+    return {
+        ...options,
+        suppressInitialRequiredPlanInstruction: true,
+    };
+}
 function hasParentLocalToolExecutionAfterDelegationFailure(originalEvidence, executedToolResults) {
     return originalEvidence.hasFailedTaskDelegation
         && executedToolResults.some((item) => item.toolName !== "task");
@@ -269,6 +311,14 @@ function isOpenAICompatibleStreamingCompatibilityError(binding, error) {
     const message = error instanceof Error ? error.message : String(error);
     return message.toLowerCase().includes("received empty response from chat model call");
 }
+function isGraphRecursionLimitError(error) {
+    const code = typeof error === "object" && error !== null && "lc_error_code" in error
+        ? String(error.lc_error_code ?? "")
+        : "";
+    const message = error instanceof Error ? error.message : String(error);
+    return code === "GRAPH_RECURSION_LIMIT"
+        || /Recursion limit .* without hitting a stop condition|GRAPH_RECURSION_LIMIT/i.test(message);
+}
 function hasDelegationEvidence(evidence) {
     return (evidence.hasSuccessfulTaskToolEvidence
         || evidence.hasOpenTaskDelegation
@@ -358,10 +408,45 @@ function finishProfileStep(input) {
         ...(input.error !== undefined ? { error: input.error instanceof Error ? input.error.message : String(input.error) } : {}),
     });
 }
+function projectLocalToolExecutionProfileChunks(executedToolResults, prefix) {
+    const chunks = [];
+    executedToolResults.forEach((toolResult, index) => {
+        if (isPlanToolName(toolResult.toolName)) {
+            return;
+        }
+        const id = `${prefix}:${index + 1}:${toolResult.toolName}`;
+        const startedAt = new Date().toISOString();
+        chunks.push({
+            kind: "profile",
+            step: {
+                id,
+                kind: "tool",
+                name: toolResult.toolName,
+                action: "invoke",
+                status: "started",
+                startedAt,
+            },
+        });
+        chunks.push({
+            kind: "profile",
+            step: {
+                id,
+                kind: "tool",
+                name: toolResult.toolName,
+                action: "invoke",
+                status: toolResult.isError === true ? "failed" : "completed",
+                startedAt,
+                endedAt: new Date().toISOString(),
+                ...(toolResult.isError === true ? { isError: true } : {}),
+            },
+        });
+    });
+    return chunks;
+}
 export async function* streamRuntimeExecution(options) {
     let request = buildInvocationRequest(options.binding, options.history, options.input, options.runtimeOptions);
     if (requiresPlanEvidence(options.binding)) {
-        request = appendToolRecoveryInstruction(request, INITIAL_REQUIRED_PLAN_INSTRUCTION);
+        request = appendToolRecoveryInstruction(request, buildInitialRequiredPlanInstruction(options.primaryTools));
     }
     let emittedUnsafeStreamSideEffects = false;
     const shouldProfile = options.runtimeOptions.profiling === true;
@@ -549,13 +634,14 @@ export async function* streamRuntimeExecution(options) {
                         error,
                     });
                 if (!emittedUnsafeStreamSideEffects
-                    && isOpenAICompatibleStreamingCompatibilityError(options.binding, error)) {
+                    && (isOpenAICompatibleStreamingCompatibilityError(options.binding, error) || isGraphRecursionLimitError(error))) {
                     deferredStreamContent.length = 0;
                 }
                 else {
                     throw error;
                 }
             }
+            const streamedToolResults = [];
             if (events) {
                 const streamEventsConsume = startProfileStep({
                     id: "profile:agent:stream-events-consume",
@@ -568,7 +654,9 @@ export async function* streamRuntimeExecution(options) {
                 try {
                     let sawCompletedPlanToolResult = false;
                     let sawSuccessfulNonTodoToolResult = false;
-                    const streamedToolResults = [];
+                    let earlyStreamRecoveryInstruction = null;
+                    let earlyStreamRecoverySuppressInitialPlan = false;
+                    let completedPlanToolResultCount = 0;
                     for await (const event of options.iterateWithTimeout(events, options.streamIdleTimeoutMs, "agent streamEvents", options.streamDeadlineAt, options.invokeTimeoutMs)) {
                         const projectedChunks = projectRuntimeStreamEvent({
                             event,
@@ -589,6 +677,15 @@ export async function* streamRuntimeExecution(options) {
                             && chunk.kind !== "content"
                             && !(chunk.kind === "tool-result" && isPlanToolName(chunk.toolName))
                             && !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
+                        const repeatedPlanToolResultBeforeEvidence = requiresPlanEvidence(options.binding)
+                            && !sawSuccessfulNonTodoToolResult
+                            && completedPlanToolResultCount > 0
+                            && projectedChunks.some((chunk) => chunk.kind === "tool-result" && isPlanToolName(chunk.toolName));
+                        if (repeatedPlanToolResultBeforeEvidence) {
+                            earlyStreamRecoveryInstruction = buildRunEvidenceAfterPlanInstruction(options.primaryTools);
+                            earlyStreamRecoverySuppressInitialPlan = true;
+                            break;
+                        }
                         for (const chunk of projectedChunks) {
                             if (chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)) {
                                 sawRetrySafeInvalidToolSelectionError = true;
@@ -606,6 +703,9 @@ export async function* streamRuntimeExecution(options) {
                             if (isCompletedPlanToolResultChunk(chunk)) {
                                 sawCompletedPlanToolResult = true;
                             }
+                            if (chunk.kind === "tool-result" && isPlanToolName(chunk.toolName)) {
+                                completedPlanToolResultCount += 1;
+                            }
                             if ((eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) && deferredStreamContent.length > 0) {
                                 yield* flushDeferredStreamContent();
                             }
@@ -660,6 +760,23 @@ export async function* streamRuntimeExecution(options) {
                                 })
                                 : null;
                             const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
+                            if (!emittedUnsafeStreamSideEffects
+                                && (terminalMissingPlanRecoveryInstruction || terminalDelegationOnlyRecoveryInstruction)) {
+                                earlyStreamRecoveryInstruction =
+                                    terminalMissingPlanRecoveryInstruction ?? terminalDelegationOnlyRecoveryInstruction;
+                                break;
+                            }
+                            if (requiresPlanEvidence(options.binding) && hasCompletedPlanWithEvidence(terminalExecutionEvidence)) {
+                                if (hasUsefulVisibleSynthesis(projectionState.emittedOutput)) {
+                                    if (deferredStreamContent.length > 0) {
+                                        yield* flushDeferredStreamContent();
+                                    }
+                                    return;
+                                }
+                                deferredStreamContent.length = 0;
+                                yield { kind: "content", content: buildDeterministicFinalFromStreamToolEvidence(streamedToolResults) };
+                                return;
+                            }
                             if (!shouldDeferStreamContent()
                                 && !terminalExecutionEvidence.hasIncompletePlanState
                                 && !terminalExecutionEvidence.hasFailedTaskDelegation
@@ -675,6 +792,30 @@ export async function* streamRuntimeExecution(options) {
                             }
                         }
                     }
+                    if (earlyStreamRecoveryInstruction) {
+                        const earlyRecoveryRuntimeOptions = earlyStreamRecoverySuppressInitialPlan
+                            ? withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions)
+                            : options.runtimeOptions;
+                        const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, earlyStreamRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, earlyRecoveryRuntimeOptions);
+                        const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
+                            ? recovered.metadata.executedToolResults
+                            : [];
+                        for (const toolResult of recoveredToolResults) {
+                            yield {
+                                kind: "tool-result",
+                                toolName: toolResult.toolName,
+                                output: toolResult.output,
+                                isError: toolResult.isError,
+                            };
+                        }
+                        if (recovered.output) {
+                            const visible = toVisibleContent(recovered.output);
+                            if (visible) {
+                                yield { kind: "content", content: visible };
+                            }
+                        }
+                        return;
+                    }
                     if (shouldProfile)
                         yield finishProfileStep({
                             id: "profile:agent:stream-events-consume",
@@ -697,7 +838,7 @@ export async function* streamRuntimeExecution(options) {
                             error,
                         });
                     if (!emittedUnsafeStreamSideEffects
-                        && isOpenAICompatibleStreamingCompatibilityError(options.binding, error)) {
+                        && (isOpenAICompatibleStreamingCompatibilityError(options.binding, error) || isGraphRecursionLimitError(error))) {
                         deferredStreamContent.length = 0;
                     }
                     else {
@@ -707,9 +848,14 @@ export async function* streamRuntimeExecution(options) {
             }
             const streamedExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
             if (requiresPlanEvidence(options.binding) && hasCompletedPlanWithEvidence(streamedExecutionEvidence)) {
-                if (deferredStreamContent.length > 0) {
-                    yield* flushDeferredStreamContent();
+                if (hasUsefulVisibleSynthesis(projectionState.emittedOutput)) {
+                    if (deferredStreamContent.length > 0) {
+                        yield* flushDeferredStreamContent();
+                    }
+                    return;
                 }
+                deferredStreamContent.length = 0;
+                yield { kind: "content", content: buildDeterministicFinalFromStreamToolEvidence(streamedToolResults) };
                 return;
             }
             const streamedDelegatedRecoveryInstruction = resolveDelegatedExecutionRecoveryInstruction(streamedExecutionEvidence);
@@ -722,7 +868,7 @@ export async function* streamRuntimeExecution(options) {
             const streamedPrematurePlanCloseRecoveryInstruction = requiresPlanEvidence(options.binding)
                 && streamedExecutionEvidence.hasPlanStateEvidence
                 && !streamedExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
-                ? RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION
+                ? buildRunEvidenceAfterPlanInstruction(options.primaryTools)
                 : null;
             const delegatedExecutionRecoveryInstruction = !emittedUnsafeStreamSideEffects || streamedDelegatedRecoveryInstruction
                 ? streamedDelegatedRecoveryInstruction
@@ -763,8 +909,12 @@ export async function* streamRuntimeExecution(options) {
                     ?? streamedDelegationOnlyRecoveryInstruction
                     ?? executionWithoutToolEvidenceInstruction;
             if (retryInstruction) {
+                const retryRuntimeOptions = retryInstruction === streamedIncompletePlanRecoveryInstruction
+                    || retryInstruction === streamedPrematurePlanCloseRecoveryInstruction
+                    ? withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions)
+                    : options.runtimeOptions;
                 let retried;
-                retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, retryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
+                retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, retryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, retryRuntimeOptions);
                 const executedToolResults = Array.isArray(retried.metadata?.executedToolResults)
                     ? retried.metadata.executedToolResults
                     : [];
@@ -954,6 +1104,9 @@ export async function* streamRuntimeExecution(options) {
         const executedToolResults = Array.isArray(result.metadata?.executedToolResults)
             ? result.metadata.executedToolResults
             : [];
+        for (const chunk of projectLocalToolExecutionProfileChunks(executedToolResults, "local-tool:invoke-fallback")) {
+            yield chunk;
+        }
         const invokeExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState, executedToolResults });
         if (hasUnresolvedExecution(invokeExecutionEvidence)) {
             throw createUnresolvedExecutionError(invokeExecutionEvidence);
@@ -984,7 +1137,7 @@ export async function* streamRuntimeExecution(options) {
         const invokeFallbackPlanWithoutEvidenceRecoveryInstruction = requiresPlanEvidence(options.binding)
             && invokeExecutionEvidence.hasPlanStateEvidence
             && !invokeExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
-            ? RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION
+            ? buildRunEvidenceAfterPlanInstruction(options.primaryTools)
             : null;
         const effectiveInvokeFallbackRecoveryInstruction = invokeFallbackIncompletePlanRecoveryInstruction
             ?? invokeFallbackPlanWithoutEvidenceRecoveryInstruction
@@ -992,10 +1145,20 @@ export async function* streamRuntimeExecution(options) {
             ?? invokeFallbackDelegationOnlyRecoveryInstruction
             ?? invokeFallbackRecoveryInstruction;
         if (effectiveInvokeFallbackRecoveryInstruction) {
-            const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, effectiveInvokeFallbackRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
+            const invokeFallbackRuntimeOptions = effectiveInvokeFallbackRecoveryInstruction === invokeFallbackIncompletePlanRecoveryInstruction
+                || effectiveInvokeFallbackRecoveryInstruction === invokeFallbackPlanWithoutEvidenceRecoveryInstruction
+                ? {
+                    ...withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions),
+                    externalPlanEvidence: true,
+                }
+                : options.runtimeOptions;
+            const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, effectiveInvokeFallbackRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, invokeFallbackRuntimeOptions);
             const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
                 ? recovered.metadata.executedToolResults
                 : [];
+            for (const chunk of projectLocalToolExecutionProfileChunks(recoveredToolResults, "local-tool:invoke-fallback-recovery")) {
+                yield chunk;
+            }
             const originalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState, executedToolResults });
             const recoveredExecutionEvidence = buildExecutionRecoveryEvidence({
                 projectionState: createStreamEventProjectionState(),

package/dist/runtime/adapter/invocation-result.js CHANGED Viewed

@@ -41,11 +41,23 @@ function hasStateSnapshotPlan(stateSnapshot) {
         && stateSnapshot !== null
         && Array.isArray(stateSnapshot.todos);
 }
+function normalizePlanToolName(toolName) {
+    return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
+}
+function isPlanToolName(toolName) {
+    const normalized = normalizePlanToolName(toolName);
+    return normalized === "write_todos"
+        || normalized === "read_todos"
+        || normalized === "tool_call_write_todos"
+        || normalized === "tool_call_read_todos"
+        || normalized === "call_write_todos"
+        || normalized === "call_read_todos";
+}
 function hasPlanToolEvidence(executedToolResults) {
-    return executedToolResults.some((item) => item.toolName === "write_todos" || item.toolName === "read_todos");
+    return executedToolResults.some((item) => isPlanToolName(item.toolName));
 }
 function hasExecutionToolEvidence(executedToolResults) {
-    return executedToolResults.some((item) => item.isError !== true && item.toolName !== "write_todos" && item.toolName !== "read_todos");
+    return executedToolResults.some((item) => item.isError !== true && !isPlanToolName(item.toolName));
 }
 function isPlaceholderTaskCompletion(value) {
     const normalized = sanitizeVisibleText(value).trim();
@@ -143,7 +155,7 @@ function extractLatestSuccessfulNonTodoToolResultText(executedToolResults) {
         if (toolResult.isError === true) {
             continue;
         }
-        if (toolResult.toolName === "task" || toolResult.toolName === "write_todos" || toolResult.toolName === "read_todos") {
+        if (toolResult.toolName === "task" || isPlanToolName(toolResult.toolName)) {
             continue;
         }
         const normalized = normalizeToolOutputText(toolResult.output);
@@ -258,8 +270,7 @@ function looksLikeContradictedToolExecutionFailure(value) {
 }
 function extractDeterministicToolFailureReport(executedToolResults) {
     const hasSuccessfulSubstantiveTool = executedToolResults.some((toolResult) => (toolResult.isError !== true
-        && toolResult.toolName !== "write_todos"
-        && toolResult.toolName !== "read_todos"));
+        && !isPlanToolName(toolResult.toolName)));
     if (hasSuccessfulSubstantiveTool) {
         return "";
     }
@@ -401,7 +412,7 @@ export function finalizeRequestResult(params) {
         && !visibleOutput
         && !preliminaryTerminalStatus
         && !output.trim()
-        && allExecutedToolResults.some((toolResult) => toolResult.isError !== true && toolResult.toolName !== "write_todos" && toolResult.toolName !== "read_todos");
+        && allExecutedToolResults.some((toolResult) => toolResult.isError !== true && !isPlanToolName(toolResult.toolName));
     if (hasMissingRequiredPlanEvidence) {
         output = "runtime_error=Agent ended before producing required plan evidence.";
     }

package/dist/runtime/adapter/local-tool-invocation.d.ts CHANGED Viewed

@@ -15,10 +15,11 @@ type LocalToolInvocationParams = {
     builtinExecutableTools: Map<string, ExecutableTool>;
     callRuntimeWithToolParseRecovery: (request: unknown) => Promise<Record<string, unknown>>;
     toolRuntimeContext?: Record<string, unknown>;
+    externalPlanEvidence?: boolean;
 };
 type LocalToolInvocationResult = {
     result: Record<string, unknown>;
     executedToolResults: ExecutedToolResult[];
 };
-export declare function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, }: LocalToolInvocationParams): Promise<LocalToolInvocationResult>;
+export declare function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, }: LocalToolInvocationParams): Promise<LocalToolInvocationResult>;
 export {};