npm - @botbotgo/agent-harness - Versions diffs - 0.0.420 → 0.0.422 - Mend

@botbotgo/agent-harness 0.0.420 → 0.0.422

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/package-version.d.ts +1 -1
package/dist/package-version.js +1 -1
package/dist/runtime/adapter/flow/invocation-flow.d.ts +10 -0
package/dist/runtime/adapter/flow/invocation-flow.js +2 -0
package/dist/runtime/adapter/flow/invoke-runtime.d.ts +10 -0
package/dist/runtime/adapter/flow/invoke-runtime.js +2 -0
package/dist/runtime/adapter/flow/stream-runtime.d.ts +20 -0
package/dist/runtime/adapter/flow/stream-runtime.js +201 -3
package/dist/runtime/adapter/local-tool-invocation.d.ts +11 -1
package/dist/runtime/adapter/local-tool-invocation.js +221 -9
package/dist/runtime/adapter/middleware-assembly.js +294 -20
package/dist/runtime/agent-runtime-adapter.js +657 -48
package/dist/runtime/agent-runtime-assembly.js +1 -1
package/dist/runtime/harness/run/inspection.js +9 -1
package/package.json +1 -1

package/dist/package-version.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export declare const AGENT_HARNESS_VERSION = "0.0.420";
+export declare const AGENT_HARNESS_VERSION = "0.0.422";
 export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-03";

package/dist/package-version.js CHANGED Viewed

@@ -1,2 +1,2 @@
-export const AGENT_HARNESS_VERSION = "0.0.420";
+export const AGENT_HARNESS_VERSION = "0.0.422";
 export const AGENT_HARNESS_RELEASE_DATE = "2026-05-03";

package/dist/runtime/adapter/flow/invocation-flow.d.ts CHANGED Viewed

@@ -16,6 +16,16 @@ export declare function executeRequestInvocation(options: {
         toolRuntimeContext?: Record<string, unknown>;
         suppressInitialRequiredPlanInstruction?: boolean;
         externalPlanEvidence?: boolean;
+        externalPlanEvidenceTool?: {
+            name: string;
+            args?: Record<string, unknown>;
+            id?: string;
+        };
+        externalPlanEvidenceTools?: Array<{
+            name: string;
+            args?: Record<string, unknown>;
+            id?: string;
+        }>;
     };
     resolveTools: (tools: CompiledTool[], binding?: CompiledAgentBinding) => unknown[];
     getToolNameMapping: (binding: CompiledAgentBinding) => ToolNameMapping;

package/dist/runtime/adapter/flow/invocation-flow.js CHANGED Viewed

@@ -339,6 +339,8 @@ export async function executeRequestInvocation(options) {
         callRuntimeWithToolParseRecovery: options.callRuntimeWithToolParseRecovery,
         toolRuntimeContext: invokeOptions.toolRuntimeContext,
         externalPlanEvidence: invokeOptions.externalPlanEvidence,
+        externalPlanEvidenceTool: invokeOptions.externalPlanEvidenceTool,
+        externalPlanEvidenceTools: invokeOptions.externalPlanEvidenceTools,
     });
     let localOrUpstreamInvocation = await invokeOnce(request);
     if (options.resumePayload === undefined

package/dist/runtime/adapter/flow/invoke-runtime.d.ts CHANGED Viewed

@@ -18,6 +18,16 @@ export declare function invokeRuntimeWithLocalTools(options: {
     callRuntimeWithToolParseRecovery: (request: unknown) => Promise<Record<string, unknown>>;
     toolRuntimeContext?: Record<string, unknown>;
     externalPlanEvidence?: boolean;
+    externalPlanEvidenceTool?: {
+        name: string;
+        args?: Record<string, unknown>;
+        id?: string;
+    };
+    externalPlanEvidenceTools?: Array<{
+        name: string;
+        args?: Record<string, unknown>;
+        id?: string;
+    }>;
 }): Promise<{
     result: Record<string, unknown>;
     executedToolResults: ExecutedToolResult[];

package/dist/runtime/adapter/flow/invoke-runtime.js CHANGED Viewed

@@ -16,5 +16,7 @@ export async function invokeRuntimeWithLocalTools(options) {
         callRuntimeWithToolParseRecovery: options.callRuntimeWithToolParseRecovery,
         toolRuntimeContext: options.toolRuntimeContext,
         externalPlanEvidence: options.externalPlanEvidence,
+        externalPlanEvidenceTool: options.externalPlanEvidenceTool,
+        externalPlanEvidenceTools: options.externalPlanEvidenceTools,
     });
 }

package/dist/runtime/adapter/flow/stream-runtime.d.ts CHANGED Viewed

@@ -23,6 +23,16 @@ export declare function streamRuntimeExecution(options: {
         toolRuntimeContext?: Record<string, unknown>;
         suppressInitialRequiredPlanInstruction?: boolean;
         externalPlanEvidence?: boolean;
+        externalPlanEvidenceTool?: {
+            name: string;
+            args?: Record<string, unknown>;
+            id?: string;
+        };
+        externalPlanEvidenceTools?: Array<{
+            name: string;
+            args?: Record<string, unknown>;
+            id?: string;
+        }>;
     };
     primaryTools: CompiledTool[];
     toolNameMapping: ToolNameMapping;
@@ -51,6 +61,16 @@ export declare function streamRuntimeExecution(options: {
         toolRuntimeContext?: Record<string, unknown>;
         suppressInitialRequiredPlanInstruction?: boolean;
         externalPlanEvidence?: boolean;
+        externalPlanEvidenceTool?: {
+            name: string;
+            args?: Record<string, unknown>;
+            id?: string;
+        };
+        externalPlanEvidenceTools?: Array<{
+            name: string;
+            args?: Record<string, unknown>;
+            id?: string;
+        }>;
     }) => Promise<{
         output: string;
         metadata?: Record<string, unknown>;

package/dist/runtime/adapter/flow/stream-runtime.js CHANGED Viewed

@@ -128,6 +128,182 @@ function hasIncompletePlanOutput(value) {
     }
     return null;
 }
+function extractInProgressTodoContents(value) {
+    if (typeof value !== "object" || value === null) {
+        return [];
+    }
+    const typed = value;
+    const arrays = [typed.todos, typed.items];
+    const contents = [];
+    for (const candidate of arrays) {
+        if (!Array.isArray(candidate)) {
+            continue;
+        }
+        for (const todo of candidate) {
+            if (typeof todo !== "object" || todo === null) {
+                continue;
+            }
+            const item = todo;
+            if (item.status !== "in_progress") {
+                continue;
+            }
+            const content = [item.content, item.description, item.title, item.name, item.text]
+                .find((value) => typeof value === "string" && value.trim().length > 0);
+            if (content) {
+                contents.push(content.trim());
+            }
+        }
+    }
+    for (const nested of [typed.summary, typed.update, typed.data, typed.output]) {
+        contents.push(...extractInProgressTodoContents(nested));
+    }
+    return [...new Set(contents)];
+}
+function extractTodoContentsForEvidenceResolution(value) {
+    const inProgress = extractTodoContentsByStatus(value, "in_progress");
+    if (inProgress.length > 0) {
+        return inProgress;
+    }
+    return extractTodoContentsByStatus(value, "pending");
+}
+function extractTodoContentsByStatus(value, status) {
+    if (Array.isArray(value)) {
+        return value.flatMap((item) => extractTodoContentsByStatus(item, status));
+    }
+    if (typeof value !== "object" || value === null) {
+        return [];
+    }
+    const typed = value;
+    const arrays = [typed.todos, typed.items];
+    const contents = [];
+    for (const array of arrays) {
+        if (!Array.isArray(array)) {
+            continue;
+        }
+        for (const item of array) {
+            if (typeof item !== "object" || item === null) {
+                continue;
+            }
+            const typedItem = item;
+            if (typedItem.status !== status) {
+                continue;
+            }
+            const content = [
+                typedItem.content,
+                typedItem.description,
+                typedItem.title,
+                typedItem.name,
+                typedItem.text,
+            ].find((value) => typeof value === "string" && value.trim().length > 0);
+            if (content) {
+                contents.push(content.trim());
+            }
+        }
+    }
+    for (const nested of [typed.summary, typed.update, typed.data, typed.output]) {
+        contents.push(...extractTodoContentsByStatus(nested, status));
+    }
+    return [...new Set(contents)];
+}
+function buildRunCommittedTodoEvidenceInstruction(primaryTools, planToolOutput) {
+    const todoContents = extractInProgressTodoContents(planToolOutput);
+    const todoText = todoContents.length > 0
+        ? todoContents.map((content, index) => `${index + 1}. ${content}`).join("\n")
+        : "(no in-progress todo content was readable)";
+    return [
+        buildRunEvidenceAfterPlanInstruction(primaryTools),
+        "",
+        "The completed write_todos result contains these in-progress evidence commitments:",
+        todoText,
+        "",
+        "Your next action must execute the non-planning tool named by the in-progress TODO. If every TODO is pending, execute the non-planning tool named by the first pending TODO. Do not call write_todos or read_todos again before that evidence tool returns.",
+    ].join("\n");
+}
+function resolveCommittedPlanEvidenceTool(primaryTools, planToolOutput) {
+    return resolveCommittedPlanEvidenceTools(primaryTools, planToolOutput)[0];
+}
+function resolveCommittedPlanEvidenceTools(primaryTools, planToolOutput) {
+    const availableToolNames = primaryTools
+        .map(readPrimaryToolName)
+        .filter((name) => name.length > 0 && !isPlanToolName(name));
+    const todoContents = extractTodoContentsForEvidenceResolution(planToolOutput);
+    if (todoContents.length === 0) {
+        return [];
+    }
+    const toolsByName = new Map(primaryTools.map((tool) => [tool.name, tool]));
+    const resolved = [];
+    const seen = new Set();
+    for (const content of todoContents) {
+        const todoText = content.toLowerCase();
+        const matches = availableToolNames.filter((name) => todoText.includes(name.toLowerCase()));
+        const selectedNames = matches.length === 1
+            ? [matches[0]]
+            : resolveBestScoredToolNames(availableToolNames, toolsByName, todoText);
+        for (const selectedName of selectedNames) {
+            if (seen.has(selectedName)) {
+                continue;
+            }
+            seen.add(selectedName);
+            const matchedTool = toolsByName.get(selectedName);
+            const args = buildCommittedPlanEvidenceToolArgs(matchedTool, content);
+            resolved.push({
+                name: selectedName,
+                args,
+                id: `stream-committed-plan-evidence-tool-${resolved.length + 1}`,
+            });
+        }
+    }
+    return resolved;
+}
+function extractSelectionTokens(value) {
+    const tokens = new Set();
+    for (const match of value.matchAll(/[\p{L}\p{N}_-]+/gu)) {
+        const token = match[0].toLowerCase();
+        if (token.length >= 2) {
+            tokens.add(token);
+        }
+        for (const part of token.split(/[_-]+/u)) {
+            if (part.length >= 2) {
+                tokens.add(part);
+            }
+        }
+    }
+    return tokens;
+}
+function resolveBestScoredToolNames(availableToolNames, toolsByName, todoText) {
+    const requestTokens = extractSelectionTokens(todoText);
+    const scored = availableToolNames
+        .map((name) => {
+        const tool = toolsByName.get(name);
+        const toolTokens = extractSelectionTokens(`${name} ${tool?.description ?? ""}`);
+        const toolNameTokens = extractSelectionTokens(name);
+        let score = 0;
+        for (const token of requestTokens) {
+            if (toolNameTokens.has(token)) {
+                score += 10;
+                continue;
+            }
+            if (toolTokens.has(token)) {
+                score += token.length > 3 ? 2 : 1;
+            }
+        }
+        return { name, score };
+    })
+        .filter((item) => item.score > 0)
+        .sort((left, right) => right.score - left.score);
+    const topScore = scored[0]?.score ?? 0;
+    return topScore > 0 ? scored.filter((item) => item.score === topScore).map((item) => item.name) : [];
+}
+function buildCommittedPlanEvidenceToolArgs(tool, todoText) {
+    const properties = typeof tool?.modelSchema === "object" && tool.modelSchema !== null
+        ? tool.modelSchema.properties
+        : undefined;
+    if (typeof properties !== "object" || properties === null) {
+        return {};
+    }
+    const queryLikeField = ["query", "question", "prompt", "input", "text"].find((field) => Object.prototype.hasOwnProperty.call(properties, field));
+    return queryLikeField ? { [queryLikeField]: todoText } : {};
+}
 function normalizePlanToolName(toolName) {
     return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
 }
@@ -655,6 +831,7 @@ export async function* streamRuntimeExecution(options) {
                     let sawCompletedPlanToolResult = false;
                     let sawSuccessfulNonTodoToolResult = false;
                     let earlyStreamRecoveryInstruction = null;
+                    let earlyStreamExternalPlanEvidenceTools;
                     let earlyStreamRecoverySuppressInitialPlan = false;
                     let completedPlanToolResultCount = 0;
                     for await (const event of options.iterateWithTimeout(events, options.streamIdleTimeoutMs, "agent streamEvents", options.streamDeadlineAt, options.invokeTimeoutMs)) {
@@ -677,12 +854,15 @@ export async function* streamRuntimeExecution(options) {
                             && chunk.kind !== "content"
                             && !(chunk.kind === "tool-result" && isPlanToolName(chunk.toolName))
                             && !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
+                        const hadPriorPlanToolResult = completedPlanToolResultCount > 0;
                         const repeatedPlanToolResultBeforeEvidence = requiresPlanEvidence(options.binding)
                             && !sawSuccessfulNonTodoToolResult
-                            && completedPlanToolResultCount > 0
+                            && hadPriorPlanToolResult
                             && projectedChunks.some((chunk) => chunk.kind === "tool-result" && isPlanToolName(chunk.toolName));
                         if (repeatedPlanToolResultBeforeEvidence) {
-                            earlyStreamRecoveryInstruction = buildRunEvidenceAfterPlanInstruction(options.primaryTools);
+                            const planToolResult = projectedChunks.find((chunk) => chunk.kind === "tool-result" && isPlanToolName(chunk.toolName) && chunk.isError !== true);
+                            earlyStreamExternalPlanEvidenceTools = resolveCommittedPlanEvidenceTools(options.primaryTools, planToolResult?.kind === "tool-result" ? planToolResult.output : undefined);
+                            earlyStreamRecoveryInstruction = buildRunCommittedTodoEvidenceInstruction(options.primaryTools, planToolResult?.kind === "tool-result" ? planToolResult.output : undefined);
                             earlyStreamRecoverySuppressInitialPlan = true;
                             break;
                         }
@@ -721,6 +901,18 @@ export async function* streamRuntimeExecution(options) {
                             }
                             yield chunk;
                         }
+                        const eventContainsPlanToolResult = projectedChunks.some((chunk) => chunk.kind === "tool-result" && isPlanToolName(chunk.toolName) && chunk.isError !== true);
+                        if (requiresPlanEvidence(options.binding)
+                            && eventContainsPlanToolResult
+                            && (hadPriorPlanToolResult
+                                || projectedChunks.some((chunk) => isCompletedPlanToolResultChunk(chunk)))
+                            && !sawSuccessfulNonTodoToolResult) {
+                            const planToolResult = projectedChunks.find((chunk) => chunk.kind === "tool-result" && isPlanToolName(chunk.toolName) && chunk.isError !== true);
+                            earlyStreamExternalPlanEvidenceTools = resolveCommittedPlanEvidenceTools(options.primaryTools, planToolResult?.kind === "tool-result" ? planToolResult.output : undefined);
+                            earlyStreamRecoveryInstruction = buildRunCommittedTodoEvidenceInstruction(options.primaryTools, planToolResult?.kind === "tool-result" ? planToolResult.output : undefined);
+                            earlyStreamRecoverySuppressInitialPlan = true;
+                            break;
+                        }
                         if (requiresPlanEvidence(options.binding) && sawCompletedPlanToolResult && sawSuccessfulNonTodoToolResult) {
                             if (hasUsefulVisibleSynthesis(projectionState.emittedOutput)) {
                                 if (deferredStreamContent.length > 0) {
@@ -794,7 +986,13 @@ export async function* streamRuntimeExecution(options) {
                     }
                     if (earlyStreamRecoveryInstruction) {
                         const earlyRecoveryRuntimeOptions = earlyStreamRecoverySuppressInitialPlan
-                            ? withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions)
+                            ? {
+                                ...withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions),
+                                externalPlanEvidence: true,
+                                ...(earlyStreamExternalPlanEvidenceTools && earlyStreamExternalPlanEvidenceTools.length > 0
+                                    ? { externalPlanEvidenceTools: earlyStreamExternalPlanEvidenceTools }
+                                    : {}),
+                            }
                             : options.runtimeOptions;
                         const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, earlyStreamRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, earlyRecoveryRuntimeOptions);
                         const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)

package/dist/runtime/adapter/local-tool-invocation.d.ts CHANGED Viewed

@@ -16,10 +16,20 @@ type LocalToolInvocationParams = {
     callRuntimeWithToolParseRecovery: (request: unknown) => Promise<Record<string, unknown>>;
     toolRuntimeContext?: Record<string, unknown>;
     externalPlanEvidence?: boolean;
+    externalPlanEvidenceTool?: {
+        name: string;
+        args?: Record<string, unknown>;
+        id?: string;
+    };
+    externalPlanEvidenceTools?: Array<{
+        name: string;
+        args?: Record<string, unknown>;
+        id?: string;
+    }>;
 };
 type LocalToolInvocationResult = {
     result: Record<string, unknown>;
     executedToolResults: ExecutedToolResult[];
 };
-export declare function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, }: LocalToolInvocationParams): Promise<LocalToolInvocationResult>;
+export declare function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, externalPlanEvidenceTool, externalPlanEvidenceTools, }: LocalToolInvocationParams): Promise<LocalToolInvocationResult>;
 export {};

package/dist/runtime/adapter/local-tool-invocation.js CHANGED Viewed

@@ -27,20 +27,57 @@ function stringifyRequestForToolSelection(request) {
         return "";
     }
 }
+function extractSelectionTokens(value) {
+    const tokens = new Set();
+    for (const match of value.matchAll(/[\p{L}\p{N}_-]+/gu)) {
+        const token = match[0].toLowerCase();
+        if (token.length >= 2) {
+            tokens.add(token);
+        }
+    }
+    for (const match of value.matchAll(/[\p{Script=Han}]{2,}/gu)) {
+        const sequence = match[0];
+        for (let size = 2; size <= Math.min(4, sequence.length); size += 1) {
+            for (let index = 0; index <= sequence.length - size; index += 1) {
+                tokens.add(sequence.slice(index, index + size).toLowerCase());
+            }
+        }
+    }
+    return tokens;
+}
 function prioritizeBootstrapEvidenceTools(primaryTools, request) {
     const requestText = stringifyRequestForToolSelection(request);
+    const requestTokens = extractSelectionTokens(requestText);
     const isFinanceRequest = /\b(?:stock|ticker|finance|market|valuation|quote)\b|股票|股价|行情|估值|财报/iu.test(requestText);
     const evidenceTools = primaryTools
-        .map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
-        .filter((name) => name.length > 0 && !isPlanToolName(name))
+        .map((tool) => {
+        const name = typeof tool.name === "string" ? tool.name.trim() : "";
+        const description = typeof tool.description === "string" ? tool.description : "";
+        const toolTokens = extractSelectionTokens(`${name} ${description}`);
+        let score = 0;
+        for (const token of requestTokens) {
+            if (toolTokens.has(token)) {
+                score += token.length > 3 ? 2 : 1;
+            }
+        }
+        if (requestText.toLowerCase().includes(name.toLowerCase())) {
+            score += 6;
+        }
+        return { name, score };
+    })
+        .filter((tool) => tool.name.length > 0 && !isPlanToolName(tool.name))
         .sort((left, right) => {
+        if (right.score !== left.score) {
+            return right.score - left.score;
+        }
         if (!isFinanceRequest) {
             return 0;
         }
-        const leftFinance = left.includes("finance") ? 0 : 1;
-        const rightFinance = right.includes("finance") ? 0 : 1;
+        const leftFinance = left.name.includes("finance") ? 0 : 1;
+        const rightFinance = right.name.includes("finance") ? 0 : 1;
         return leftFinance - rightFinance;
-    });
+    })
+        .map((tool) => tool.name);
     return evidenceTools.slice(0, 4);
 }
 function createBootstrapTodoPlan(primaryTools, request) {
@@ -90,6 +127,19 @@ function buildBootstrapPlanToolResult(primaryTools, request) {
             })],
     };
 }
+function buildExternalPlanEvidenceToolResult(tools) {
+    return {
+        messages: [{
+                content: "",
+                tool_calls: tools.map((tool, index) => ({
+                    id: tool.id ?? `external-plan-evidence-${index + 1}-${Math.random().toString(36).slice(2, 10)}`,
+                    name: tool.name,
+                    args: tool.args ?? {},
+                    type: "tool_call",
+                })),
+            }],
+    };
+}
 function readPlanStateSummary(output) {
     if (typeof output !== "object" || output === null) {
         return null;
@@ -210,6 +260,76 @@ function terminalToolErrorRecoveryInstruction(terminalText) {
 function requiresPlanEvidence(binding) {
     return binding.harnessRuntime.executionContract?.requiresPlan === true;
 }
+function resolveCommittedTodoEvidenceTool(executedToolResults, primaryTools) {
+    const availableTools = primaryTools
+        .filter((tool) => typeof tool.name === "string" && tool.name.length > 0 && !isPlanToolName(tool.name));
+    if (availableTools.length === 0) {
+        return null;
+    }
+    for (let index = executedToolResults.length - 1; index >= 0; index -= 1) {
+        const result = executedToolResults[index];
+        if (!result || result.isError === true || !isPlanToolName(result.toolName)) {
+            continue;
+        }
+        const output = result.output;
+        const summary = typeof output === "object" && output !== null
+            ? output.summary
+            : undefined;
+        const items = typeof summary === "object" && summary !== null && Array.isArray(summary.items)
+            ? summary.items
+            : [];
+        const activeItems = items.filter((item) => item.status === "in_progress");
+        const candidateItems = activeItems.length > 0
+            ? activeItems
+            : items.filter((item) => item.status === "pending").slice(0, 1);
+        for (const item of candidateItems) {
+            const content = [
+                item.content,
+                item.description,
+                item.title,
+                item.name,
+                item.text,
+            ].filter((value) => typeof value === "string").join(" ").toLowerCase();
+            const matched = availableTools.map((tool) => tool.name).filter((toolName) => content.includes(toolName.toLowerCase()));
+            if (matched.length === 1) {
+                return {
+                    name: matched[0],
+                    args: {},
+                    id: `todo-committed-evidence-${index}`,
+                };
+            }
+            const requestTokens = extractSelectionTokens(content);
+            const scored = availableTools
+                .map((tool) => {
+                const toolTokens = extractSelectionTokens(`${tool.name} ${tool.description ?? ""}`);
+                let score = 0;
+                for (const token of requestTokens) {
+                    if (toolTokens.has(token)) {
+                        score += token.length > 3 ? 2 : 1;
+                    }
+                }
+                return { name: tool.name, score };
+            })
+                .filter((item) => item.score > 0)
+                .sort((left, right) => right.score - left.score);
+            if (scored[0] && (!scored[1] || scored[0].score > scored[1].score)) {
+                return {
+                    name: scored[0].name,
+                    args: {},
+                    id: `todo-committed-evidence-${index}`,
+                };
+            }
+            if (matched.length === 1) {
+                return {
+                    name: matched[0],
+                    args: {},
+                    id: `todo-committed-evidence-${index}`,
+                };
+            }
+        }
+    }
+    return null;
+}
 function extractLatestUserInput(request) {
     const typedRequest = request;
     const messages = Array.isArray(typedRequest.messages) ? typedRequest.messages : [];
@@ -268,7 +388,7 @@ function summarizeResultMessages(result) {
         };
     });
 }
-export async function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, }) {
+export async function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, externalPlanEvidenceTool, externalPlanEvidenceTools, }) {
     const executedToolResults = [];
     let activeRequest = request;
     let currentMessages = Array.isArray(activeRequest.messages) ? [...activeRequest.messages] : [];
@@ -289,9 +409,39 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
     }
     for (let iteration = 0; iteration < maxToolIterations; iteration += 1) {
         const isFinalIteration = iteration + 1 === maxToolIterations;
-        result = pendingResult ?? await callRuntimeWithToolParseRecovery(activeRequest);
+        const externalPlanEvidenceToolCalls = externalPlanEvidenceTools && externalPlanEvidenceTools.length > 0
+            ? externalPlanEvidenceTools
+            : externalPlanEvidenceTool
+                ? [externalPlanEvidenceTool]
+                : [];
+        const shouldRunExternalPlanEvidenceTool = pendingResult === undefined
+            && requiresPlanEvidence(binding)
+            && externalPlanEvidence === true
+            && externalPlanEvidenceToolCalls.length > 0
+            && !hasNonTodoToolEvidence(executedToolResults);
+        const usedExternalPlanEvidenceToolThisIteration = shouldRunExternalPlanEvidenceTool;
+        result = pendingResult
+            ?? (shouldRunExternalPlanEvidenceTool
+                ? buildExternalPlanEvidenceToolResult(externalPlanEvidenceToolCalls)
+                : await callRuntimeWithToolParseRecovery(activeRequest));
         pendingResult = undefined;
-        const toolCalls = extractToolCallsFromResult(result);
+        let toolCalls = extractToolCallsFromResult(result);
+        const committedTodoEvidenceTool = requiresPlanEvidence(binding)
+            && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
+            && !hasNonTodoToolEvidence(executedToolResults)
+            && (externalPlanEvidenceTool !== undefined || !hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence))
+            && (toolCalls.length === 0 || toolCalls.every((toolCall) => isPlanToolName(toolCall.name)))
+            ? externalPlanEvidenceTool
+                ? {
+                    name: externalPlanEvidenceTool.name,
+                    args: externalPlanEvidenceTool.args ?? {},
+                    id: externalPlanEvidenceTool.id ?? "external-plan-evidence-tool",
+                }
+                : resolveCommittedTodoEvidenceTool(executedToolResults, primaryTools)
+            : null;
+        if (committedTodoEvidenceTool) {
+            toolCalls = [committedTodoEvidenceTool];
+        }
         if (toolCalls.length === 0) {
             const terminalText = sanitizeVisibleText(extractVisibleOutput(result) || "");
             const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence);
@@ -415,7 +565,8 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
         }
         repeatedRecoveryWithoutProgress = 0;
         repeatedPlanOnlyAfterPlan = 0;
-        const canReplayToolCalls = canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
+        const canReplayToolCalls = usedExternalPlanEvidenceToolThisIteration
+            || canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
         debugLocalToolReplay({
             toolCalls,
             result,
@@ -502,6 +653,67 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
                 content: stringifyToolOutput(safeToolResult),
             }));
         }
+        const committedEvidenceTool = requiresPlanEvidence(binding)
+            && !hadNonTodoEvidenceBeforeToolReplay
+            && !hasNonTodoToolEvidence(executedToolResults)
+            && !hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence)
+            ? resolveCommittedTodoEvidenceTool(executedToolResults, primaryTools)
+            : null;
+        if (committedEvidenceTool) {
+            const resolvedToolName = resolveModelFacingToolName(committedEvidenceTool.name, toolNameMapping, primaryTools);
+            const executable = executableTools.get(committedEvidenceTool.name) ?? executableTools.get(resolvedToolName);
+            if (executable) {
+                const compiledTool = toolCatalog.get(committedEvidenceTool.name) ?? toolCatalog.get(resolvedToolName);
+                const normalizedArgs = normalizeToolArgsForSchema(committedEvidenceTool.args, executable.schema, undefined, {
+                    latestUserInput,
+                });
+                const gateway = validateToolGatewayInput({
+                    toolName: executable.name,
+                    schema: executable.schema,
+                    args: normalizedArgs,
+                    requiresApproval: compiledTool ? toolRequiresRuntimeApproval(compiledTool) : false,
+                });
+                if (gateway.ok) {
+                    const toolResult = toolRuntimeContext
+                        ? await executable.invoke(gateway.input, { toolRuntimeContext })
+                        : await executable.invoke(gateway.input);
+                    const memoryCandidates = compiledTool ? extractMemoryCandidatesFromToolOutput(compiledTool, toolResult) : [];
+                    const safeToolResult = await maybePersistLargeToolOutput({
+                        toolName: executable.name,
+                        output: toolResult,
+                        toolRuntimeContext,
+                    });
+                    executedToolResults.push({
+                        toolName: executable.name,
+                        output: safeToolResult,
+                        ...(memoryCandidates.length > 0 ? { memoryCandidates } : {}),
+                    });
+                    nextMessages.push(new ToolMessage({
+                        name: executable.name,
+                        tool_call_id: committedEvidenceTool.id,
+                        content: stringifyToolOutput(safeToolResult),
+                    }));
+                }
+                else {
+                    executedToolResults.push({
+                        toolName: executable.name,
+                        output: gateway.error,
+                        isError: true,
+                    });
+                    nextMessages.push(new ToolMessage({
+                        name: executable.name,
+                        tool_call_id: committedEvidenceTool.id,
+                        content: stringifyToolOutput(gateway.error),
+                    }));
+                }
+            }
+        }
+        if (usedExternalPlanEvidenceToolThisIteration && hasNonTodoToolEvidence(executedToolResults)) {
+            return {
+                result: buildDeterministicFinalFromToolEvidence(executedToolResults),
+                executedToolResults,
+            };
+        }
         if (requiresPlanEvidence(binding)
             && toolCalls.length > 0
             && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))