npm - @botbotgo/agent-harness - Versions diffs - 0.0.326 → 0.0.328 - Mend

@botbotgo/agent-harness 0.0.326 → 0.0.328

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

package/dist/cli/chat-stream.js +33 -27
package/dist/cli/main.js +30 -3
package/dist/contracts/runtime-requests.d.ts +1 -2
package/dist/contracts/runtime-scheduling.d.ts +1 -1
package/dist/flow/flow-graph-upstream.js +3 -7
package/dist/package-version.d.ts +1 -1
package/dist/package-version.js +1 -1
package/dist/projections/request-events.js +0 -1
package/dist/resource/isolation.js +51 -10
package/dist/resources/toolkit.mjs +183 -0
package/dist/resources/tools/cancel_request.mjs +1 -1
package/dist/resources/tools/fetch_url.mjs +1 -1
package/dist/resources/tools/http_request.mjs +1 -1
package/dist/resources/tools/inspect_approvals.mjs +1 -1
package/dist/resources/tools/inspect_artifacts.mjs +1 -1
package/dist/resources/tools/inspect_events.mjs +1 -1
package/dist/resources/tools/inspect_requests.mjs +1 -1
package/dist/resources/tools/inspect_sessions.mjs +1 -1
package/dist/resources/tools/list_files.mjs +1 -1
package/dist/resources/tools/read_artifact.mjs +1 -1
package/dist/resources/tools/request_approval.mjs +1 -1
package/dist/resources/tools/run_command.mjs +1 -1
package/dist/resources/tools/schedule_task.mjs +1 -1
package/dist/resources/tools/search_files.mjs +1 -1
package/dist/resources/tools/send_message.mjs +1 -1
package/dist/runtime/adapter/compat/deepagent-compat.d.ts +0 -9
package/dist/runtime/adapter/compat/deepagent-compat.js +0 -22
package/dist/runtime/adapter/flow/stream-runtime.d.ts +4 -0
package/dist/runtime/adapter/flow/stream-runtime.js +239 -8
package/dist/runtime/adapter/local-tool-invocation.js +53 -0
package/dist/runtime/adapter/middleware-assembly.js +174 -29
package/dist/runtime/adapter/runtime-adapter-support.js +1 -2
package/dist/runtime/adapter/stream-event-projection.d.ts +17 -0
package/dist/runtime/adapter/stream-event-projection.js +217 -4
package/dist/runtime/adapter/tool/builtin-middleware-tools.d.ts +0 -3
package/dist/runtime/adapter/tool/builtin-middleware-tools.js +37 -17
package/dist/runtime/adapter/tool/resolved-tool.js +29 -3
package/dist/runtime/agent-runtime-adapter.d.ts +3 -3
package/dist/runtime/agent-runtime-adapter.js +12 -33
package/dist/runtime/agent-runtime-assembly.d.ts +3 -21
package/dist/runtime/agent-runtime-assembly.js +4 -56
package/dist/runtime/harness/run/inspection.js +21 -5
package/dist/runtime/harness/run/run-operations.js +2 -1
package/dist/runtime/harness/run/stream-run.d.ts +3 -1
package/dist/runtime/harness/run/stream-run.js +206 -30
package/dist/runtime/harness.js +3 -0
package/dist/runtime/parsing/output-content.js +11 -4
package/dist/runtime/parsing/output-recovery.d.ts +3 -0
package/dist/runtime/parsing/output-recovery.js +57 -11
package/dist/runtime/parsing/output-tool-args.d.ts +4 -0
package/dist/runtime/parsing/output-tool-args.js +122 -0
package/dist/runtime/parsing/stream-event-parsing.js +37 -3
package/dist/runtime/support/harness-support.d.ts +1 -0
package/dist/runtime/support/harness-support.js +44 -2
package/dist/tools.js +34 -4
package/package.json +8 -8

package/dist/runtime/adapter/compat/deepagent-compat.d.ts CHANGED Viewed

@@ -1,9 +1,3 @@
-import type { CompiledModel, CompiledSubAgent } from "../../../contracts/types.js";
-type DelegationPromptCompatibilityParams = {
-    subagents?: CompiledSubAgent[];
-    generalPurposeAgent?: boolean;
-    taskDescription?: string;
-};
 export declare function relativizeDeepAgentSkillSourcePaths(workspaceRoot: string | undefined, skillPaths: string[] | undefined): string[] | undefined;
 export declare function materializeDeepAgentSkillSourcePaths(options: {
     workspaceRoot?: string;
@@ -17,6 +11,3 @@ export declare function resolveDeepAgentSkillSourcePaths(options: {
     ownerId: string;
     skillPaths?: string[];
 }): string[] | undefined;
-export declare function shouldRelaxDeepAgentDelegationPrompt(model: CompiledModel | undefined, params: DelegationPromptCompatibilityParams): boolean;
-export declare function applyDeepAgentDelegationPromptCompatibility<T extends DelegationPromptCompatibilityParams>(model: CompiledModel | undefined, params: T): T;
-export {};

package/dist/runtime/adapter/compat/deepagent-compat.js CHANGED Viewed

@@ -1,7 +1,4 @@
 import path from "node:path";
-function isOpenAICompatibleGptOssModel(model) {
-    return model?.provider === "openai-compatible" && model.model.trim().toLowerCase().startsWith("gpt-oss");
-}
 export function relativizeDeepAgentSkillSourcePaths(workspaceRoot, skillPaths) {
     if (!workspaceRoot || !skillPaths) {
         return skillPaths;
@@ -27,22 +24,3 @@ export function resolveDeepAgentSkillSourcePaths(options) {
     }
     return relativizeDeepAgentSkillSourcePaths(workspaceRoot, skillPaths) ?? skillPaths;
 }
-export function shouldRelaxDeepAgentDelegationPrompt(model, params) {
-    if (!isOpenAICompatibleGptOssModel(model)) {
-        return false;
-    }
-    if ((params.subagents?.length ?? 0) === 0) {
-        return false;
-    }
-    return params.generalPurposeAgent === true || Boolean(params.taskDescription?.trim());
-}
-export function applyDeepAgentDelegationPromptCompatibility(model, params) {
-    if (!shouldRelaxDeepAgentDelegationPrompt(model, params)) {
-        return params;
-    }
-    return {
-        ...params,
-        generalPurposeAgent: undefined,
-        taskDescription: undefined,
-    };
-}

package/dist/runtime/adapter/flow/stream-runtime.d.ts CHANGED Viewed

@@ -5,6 +5,9 @@ type RunnableLike = {
     stream?: (input: unknown, config?: Record<string, unknown>) => Promise<AsyncIterable<unknown>>;
     streamEvents?: (input: unknown, config?: Record<string, unknown>) => Promise<AsyncIterable<unknown>>;
 };
+export declare class ExecutionReconciliationError extends Error {
+    constructor(message: string);
+}
 export declare function streamRuntimeExecution(options: {
     binding: CompiledAgentBinding;
     input: MessageContent;
@@ -48,5 +51,6 @@ export declare function streamRuntimeExecution(options: {
     isLangChainBinding: (binding: CompiledAgentBinding) => boolean;
     isDeepAgentBinding: (binding: CompiledAgentBinding) => boolean;
     countConfiguredTools: (binding: CompiledAgentBinding) => number;
+    countConfiguredToolsForAgentId?: (agentId: string) => number;
 }): AsyncGenerator<RuntimeStreamChunk>;
 export {};

package/dist/runtime/adapter/flow/stream-runtime.js CHANGED Viewed

@@ -1,14 +1,109 @@
-import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
+import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
 import { buildInvocationRequest } from "../model/invocation-request.js";
 import { buildRawModelMessages } from "../model/message-assembly.js";
 import { projectRuntimeStreamEvent, createStreamEventProjectionState } from "../stream-event-projection.js";
 import { projectTextStreamChunks } from "../stream-text-consumption.js";
 import { computeRemainingTimeoutMs } from "../resilience.js";
 import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "../upstream-configurable-keys.js";
+export class ExecutionReconciliationError extends Error {
+    constructor(message) {
+        super(message);
+        this.name = "ExecutionReconciliationError";
+    }
+}
 function toVisibleContent(value) {
     const extracted = extractVisibleOutput(value);
     return extracted ? sanitizeVisibleText(extracted) : "";
 }
+function readTerminalEventVisibleOutput(event) {
+    if (typeof event !== "object" || event === null) {
+        return "";
+    }
+    const typed = event;
+    const eventName = typeof typed.event === "string" ? typed.event : "";
+    if (eventName !== "on_chat_model_end" && eventName !== "on_chain_end") {
+        return "";
+    }
+    return toVisibleContent(typed.data?.output);
+}
+function hasIncompletePlanStateInExecutedToolResults(executedToolResults) {
+    for (const latest of [...executedToolResults].reverse()) {
+        if (typeof latest.output !== "object" || latest.output === null) {
+            continue;
+        }
+        const summaryContainer = latest.output.summary;
+        if (typeof summaryContainer !== "object" || summaryContainer === null) {
+            continue;
+        }
+        const summary = summaryContainer.summary;
+        if (typeof summary !== "object" || summary === null) {
+            continue;
+        }
+        const typedSummary = summary;
+        const pending = typeof typedSummary.pending === "number" ? typedSummary.pending : 0;
+        const inProgress = typeof typedSummary.inProgress === "number" ? typedSummary.inProgress : 0;
+        return pending > 0 || inProgress > 0;
+    }
+    return false;
+}
+function hasNonTodoToolEvidence(executedToolResults) {
+    return executedToolResults.some((item) => item.toolName !== "write_todos" && item.toolName !== "read_todos");
+}
+function hasSuccessfulNonTodoToolEvidence(executedToolResults) {
+    return executedToolResults.some((item) => item.isError !== true && item.toolName !== "write_todos" && item.toolName !== "read_todos");
+}
+function hasSuccessfulTaskToolEvidence(executedToolResults) {
+    return executedToolResults.some((item) => item.isError !== true && item.toolName === "task");
+}
+function buildExecutionRecoveryEvidence(params) {
+    const { projectionState, executedToolResults = [] } = params;
+    return {
+        hasToolResultEvidence: executedToolResults.length > 0 || projectionState.emittedToolResult || projectionState.emittedToolError,
+        hasSuccessfulToolResultEvidence: executedToolResults.some((item) => item.isError !== true) || projectionState.emittedSuccessfulToolResult,
+        hasNonTodoToolResultEvidence: hasNonTodoToolEvidence(executedToolResults) || projectionState.emittedNonTodoToolResult || projectionState.emittedToolError,
+        hasSuccessfulNonTodoToolResultEvidence: hasSuccessfulNonTodoToolEvidence(executedToolResults) || projectionState.emittedSuccessfulNonTodoToolResult,
+        hasIncompletePlanState: projectionState.hasIncompletePlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
+        hasPlanStateEvidence: projectionState.sawPlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
+        hasOpenTaskDelegation: projectionState.openTaskDelegations > 0,
+        hasFailedTaskDelegation: projectionState.hasFailedTaskDelegation,
+        hasDelegatedAgentWithConfiguredTools: projectionState.sawDelegatedAgentWithConfiguredTools,
+        hasDelegatedExecutionToolEvidence: projectionState.emittedDelegatedExecutionToolResult,
+        hasOnlyPlaceholderTaskCompletion: projectionState.emittedSuccessfulTaskResult
+            && projectionState.emittedPlaceholderTaskResult
+            && !projectionState.emittedDelegatedTerminalOutput,
+    };
+}
+function hasUnresolvedExecution(evidence) {
+    return (evidence.hasIncompletePlanState
+        || evidence.hasFailedTaskDelegation
+        || evidence.hasOpenTaskDelegation);
+}
+function hasMissingDelegatedExecutionEvidence(evidence) {
+    return evidence.hasDelegatedAgentWithConfiguredTools && !evidence.hasDelegatedExecutionToolEvidence;
+}
+function hasMissingDelegatedFindings(evidence) {
+    return evidence.hasDelegatedAgentWithConfiguredTools && evidence.hasOnlyPlaceholderTaskCompletion;
+}
+function createUnresolvedExecutionError(evidence) {
+    const reasons = [];
+    if (evidence.hasIncompletePlanState) {
+        reasons.push("plan state still has unfinished work");
+    }
+    if (evidence.hasFailedTaskDelegation) {
+        reasons.push("delegated task failed before surfacing final findings");
+    }
+    if (evidence.hasOpenTaskDelegation) {
+        reasons.push("delegated task has not finished");
+    }
+    if (hasMissingDelegatedExecutionEvidence(evidence)) {
+        reasons.push("delegated agent ended without surfacing any real tool execution evidence");
+    }
+    if (hasMissingDelegatedFindings(evidence)) {
+        reasons.push("delegated task returned only the upstream placeholder result without surfaced final findings");
+    }
+    const detail = reasons.length > 0 ? `: ${reasons.join("; ")}` : "";
+    return new ExecutionReconciliationError(`Agent ended before execution was fully reconciled${detail}.`);
+}
 function createProfileStep(id, kind, name, action, status, detail) {
     return {
         kind: "profile",
@@ -49,6 +144,7 @@ export async function* streamRuntimeExecution(options) {
     const shouldValidateStreamOutput = shouldValidateExecutionWithoutToolEvidence(request);
     const deferredStreamContent = [];
     let sawRetrySafeInvalidToolSelectionError = false;
+    const projectionState = createStreamEventProjectionState();
     const shouldDeferStreamContent = () => shouldValidateStreamOutput && !emittedUnsafeStreamSideEffects;
     const flushDeferredStreamContent = async function* () {
         while (deferredStreamContent.length > 0) {
@@ -212,7 +308,6 @@ export async function* streamRuntimeExecution(options) {
                     });
                 throw error;
             }
-            const projectionState = createStreamEventProjectionState();
             const streamEventsConsume = startProfileStep({
                 id: "profile:agent:stream-events-consume",
                 kind: "agent",
@@ -225,8 +320,12 @@ export async function* streamRuntimeExecution(options) {
                 for await (const event of options.iterateWithTimeout(events, options.streamIdleTimeoutMs, "agent streamEvents", options.streamDeadlineAt, options.invokeTimeoutMs)) {
                     const projectedChunks = projectRuntimeStreamEvent({
                         event,
-                        allowVisibleStreamDeltas: options.isLangChainBinding(options.binding),
+                        allowVisibleStreamDeltas: true,
                         includeStateStreamOutput: options.isDeepAgentBinding(options.binding),
+                        rootAgentId: typeof options.binding.agent?.id === "string"
+                            ? options.binding.agent.id
+                            : undefined,
+                        countConfiguredToolsForAgentId: options.countConfiguredToolsForAgentId,
                         toolNameMapping: options.toolNameMapping,
                         primaryTools: options.primaryTools,
                         state: projectionState,
@@ -248,12 +347,27 @@ export async function* streamRuntimeExecution(options) {
                         if (eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) {
                             emittedUnsafeStreamSideEffects = true;
                         }
-                        if (chunk.kind === "content" && shouldDeferStreamContent()) {
+                        if (chunk.kind === "content" && (shouldDeferStreamContent() || projectionState.hasFailedTaskDelegation)) {
                             deferredStreamContent.push(chunk);
                             continue;
                         }
                         yield chunk;
                     }
+                    const terminalVisibleOutput = readTerminalEventVisibleOutput(event);
+                    if (terminalVisibleOutput) {
+                        const terminalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
+                        if (!shouldDeferStreamContent()
+                            && !terminalExecutionEvidence.hasIncompletePlanState
+                            && !terminalExecutionEvidence.hasFailedTaskDelegation
+                            && !terminalExecutionEvidence.hasOpenTaskDelegation
+                            && !hasMissingDelegatedExecutionEvidence(terminalExecutionEvidence)
+                            && !hasMissingDelegatedFindings(terminalExecutionEvidence)) {
+                            if (deferredStreamContent.length > 0) {
+                                yield* flushDeferredStreamContent();
+                            }
+                            return;
+                        }
+                    }
                 }
                 if (shouldProfile)
                     yield finishProfileStep({
@@ -278,13 +392,55 @@ export async function* streamRuntimeExecution(options) {
                     });
                 throw error;
             }
-            const terminalRecoveryInstruction = !emittedUnsafeStreamSideEffects && sawRetrySafeInvalidToolSelectionError ? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION : null;
-            if (terminalRecoveryInstruction) {
+            const streamedExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
+            if (hasUnresolvedExecution(streamedExecutionEvidence)) {
+                throw createUnresolvedExecutionError(streamedExecutionEvidence);
+            }
+            const executionWithoutToolEvidenceInstruction = projectionState.emittedOutput
+                ? resolveExecutionWithoutToolEvidenceTextInstruction(request, projectionState.emittedOutput, false, {
+                    ...streamedExecutionEvidence,
+                    hasMissingDelegatedExecutionEvidence: hasMissingDelegatedExecutionEvidence(streamedExecutionEvidence),
+                })
+                : null;
+            const retryInstruction = !emittedUnsafeStreamSideEffects && sawRetrySafeInvalidToolSelectionError
+                ? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION
+                : executionWithoutToolEvidenceInstruction;
+            if (retryInstruction) {
                 let retried;
-                retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, terminalRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
+                retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, retryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
                 const executedToolResults = Array.isArray(retried.metadata?.executedToolResults)
                     ? retried.metadata.executedToolResults
                     : [];
+                const originalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
+                const retriedExecutionEvidence = buildExecutionRecoveryEvidence({
+                    projectionState: createStreamEventProjectionState(),
+                    executedToolResults,
+                });
+                const retriedVisibleOutput = retried.output ? toVisibleContent(retried.output) : "";
+                const retriedCarriesExecutionEvidence = retriedExecutionEvidence.hasToolResultEvidence
+                    || retriedExecutionEvidence.hasOpenTaskDelegation
+                    || retriedExecutionEvidence.hasDelegatedExecutionToolEvidence;
+                const retriedHasUnresolvedExecution = hasUnresolvedExecution(retriedExecutionEvidence)
+                    || hasMissingDelegatedExecutionEvidence(retriedExecutionEvidence)
+                    || hasMissingDelegatedFindings(retriedExecutionEvidence)
+                    || (!retriedCarriesExecutionEvidence
+                        && (hasUnresolvedExecution(originalExecutionEvidence)
+                            || hasMissingDelegatedExecutionEvidence(originalExecutionEvidence)
+                            || hasMissingDelegatedFindings(originalExecutionEvidence)));
+                const effectiveRecoveryEvidence = retriedCarriesExecutionEvidence
+                    ? retriedExecutionEvidence
+                    : {
+                        ...retriedExecutionEvidence,
+                        hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
+                        hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
+                        hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
+                        hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
+                        hasDelegatedExecutionToolEvidence: originalExecutionEvidence.hasDelegatedExecutionToolEvidence,
+                    };
+                if (retriedHasUnresolvedExecution
+                    || (retriedHasUnresolvedExecution && retriedExecutionEvidence.hasToolResultEvidence && !retriedVisibleOutput)) {
+                    throw createUnresolvedExecutionError(effectiveRecoveryEvidence);
+                }
                 for (const toolResult of executedToolResults) {
                     yield {
                         kind: "tool-result",
@@ -303,7 +459,15 @@ export async function* streamRuntimeExecution(options) {
             if (deferredStreamContent.length > 0) {
                 yield* flushDeferredStreamContent();
             }
-            if (projectionState.emittedOutput || projectionState.emittedToolResult || projectionState.emittedToolError) {
+            if (hasMissingDelegatedExecutionEvidence(streamedExecutionEvidence)) {
+                throw createUnresolvedExecutionError(streamedExecutionEvidence);
+            }
+            if (hasMissingDelegatedFindings(streamedExecutionEvidence)) {
+                throw createUnresolvedExecutionError(streamedExecutionEvidence);
+            }
+            const hasUnresolvedStreamExecution = hasUnresolvedExecution(streamedExecutionEvidence);
+            if (projectionState.emittedOutput
+                || ((projectionState.emittedToolResult || projectionState.emittedToolError) && !hasUnresolvedStreamExecution)) {
                 return;
             }
         }
@@ -429,6 +593,73 @@ export async function* streamRuntimeExecution(options) {
         const executedToolResults = Array.isArray(result.metadata?.executedToolResults)
             ? result.metadata.executedToolResults
             : [];
+        const invokeExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState, executedToolResults });
+        if (hasUnresolvedExecution(invokeExecutionEvidence)) {
+            throw createUnresolvedExecutionError(invokeExecutionEvidence);
+        }
+        const invokeFallbackRecoveryInstruction = result.output
+            ? resolveExecutionWithoutToolEvidenceTextInstruction(request, result.output, false, {
+                ...invokeExecutionEvidence,
+                hasMissingDelegatedExecutionEvidence: hasMissingDelegatedExecutionEvidence(invokeExecutionEvidence),
+            })
+            : null;
+        if (invokeFallbackRecoveryInstruction) {
+            const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, invokeFallbackRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
+            const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
+                ? recovered.metadata.executedToolResults
+                : [];
+            const originalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState, executedToolResults });
+            const recoveredExecutionEvidence = buildExecutionRecoveryEvidence({
+                projectionState: createStreamEventProjectionState(),
+                executedToolResults: recoveredToolResults,
+            });
+            const recoveredVisibleOutput = recovered.output ? toVisibleContent(recovered.output) : "";
+            const recoveredCarriesExecutionEvidence = recoveredExecutionEvidence.hasToolResultEvidence
+                || recoveredExecutionEvidence.hasOpenTaskDelegation
+                || recoveredExecutionEvidence.hasDelegatedExecutionToolEvidence;
+            const recoveredHasUnresolvedExecution = hasUnresolvedExecution(recoveredExecutionEvidence)
+                || hasMissingDelegatedExecutionEvidence(recoveredExecutionEvidence)
+                || hasMissingDelegatedFindings(recoveredExecutionEvidence)
+                || (!recoveredCarriesExecutionEvidence
+                    && (hasUnresolvedExecution(originalExecutionEvidence)
+                        || hasMissingDelegatedExecutionEvidence(originalExecutionEvidence)
+                        || hasMissingDelegatedFindings(originalExecutionEvidence)));
+            const effectiveRecoveredEvidence = recoveredCarriesExecutionEvidence
+                ? recoveredExecutionEvidence
+                : {
+                    ...recoveredExecutionEvidence,
+                    hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
+                    hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
+                    hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
+                    hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
+                    hasDelegatedExecutionToolEvidence: originalExecutionEvidence.hasDelegatedExecutionToolEvidence,
+                };
+            if (recoveredHasUnresolvedExecution
+                || (recoveredHasUnresolvedExecution && recoveredExecutionEvidence.hasToolResultEvidence && !recoveredVisibleOutput)) {
+                throw createUnresolvedExecutionError(effectiveRecoveredEvidence);
+            }
+            for (const toolResult of recoveredToolResults) {
+                yield {
+                    kind: "tool-result",
+                    toolName: toolResult.toolName,
+                    output: toolResult.output,
+                    isError: toolResult.isError,
+                };
+            }
+            if (recovered.output) {
+                const visible = toVisibleContent(recovered.output);
+                if (visible) {
+                    yield { kind: "content", content: visible };
+                }
+            }
+            return;
+        }
+        if (hasMissingDelegatedExecutionEvidence(invokeExecutionEvidence)) {
+            throw createUnresolvedExecutionError(invokeExecutionEvidence);
+        }
+        if (hasMissingDelegatedFindings(invokeExecutionEvidence)) {
+            throw createUnresolvedExecutionError(invokeExecutionEvidence);
+        }
         for (const toolResult of executedToolResults) {
             yield {
                 kind: "tool-result",

package/dist/runtime/adapter/local-tool-invocation.js CHANGED Viewed

@@ -4,7 +4,42 @@ import { canReplayToolCallsLocally } from "./tool/tool-replay.js";
 import { extractToolCallsFromResult, normalizeToolArgsForSchema, stringifyToolOutput } from "./tool/tool-arguments.js";
 import { extractMemoryCandidatesFromToolOutput } from "../harness/system/runtime-memory-candidates.js";
 import { maybePersistLargeToolOutput } from "./tool/tool-output-artifacts.js";
+import { appendToolRecoveryInstruction, extractVisibleOutput, resolveExecutionWithoutToolEvidenceTextInstruction, sanitizeVisibleText, } from "../parsing/output-parsing.js";
+import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION } from "../prompts/runtime-prompts.js";
 const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already available in this conversation. Answer the user's current request directly from the existing context and tool results. Do not ask the user to repeat inputs that are already present above.";
+function readPlanStateSummary(output) {
+    if (typeof output !== "object" || output === null) {
+        return null;
+    }
+    const typed = output;
+    const summaryContainer = typed.summary;
+    if (typeof summaryContainer !== "object" || summaryContainer === null) {
+        return null;
+    }
+    const nested = summaryContainer;
+    const counts = nested.summary;
+    if (typeof counts !== "object" || counts === null) {
+        return null;
+    }
+    const typedCounts = counts;
+    return {
+        pending: typeof typedCounts.pending === "number" ? typedCounts.pending : 0,
+        inProgress: typeof typedCounts.inProgress === "number" ? typedCounts.inProgress : 0,
+    };
+}
+function hasIncompleteExecutedPlan(executedToolResults) {
+    for (const latest of [...executedToolResults].reverse()) {
+        const summary = readPlanStateSummary(latest.output);
+        if (!summary) {
+            continue;
+        }
+        return summary.pending > 0 || summary.inProgress > 0;
+    }
+    return false;
+}
+function hasNonTodoToolEvidence(executedToolResults) {
+    return executedToolResults.some((item) => item.toolName !== "write_todos" && item.toolName !== "read_todos");
+}
 function extractLatestUserInput(request) {
     const typedRequest = request;
     const messages = Array.isArray(typedRequest.messages) ? typedRequest.messages : [];
@@ -40,6 +75,24 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
         pendingResult = undefined;
         const toolCalls = extractToolCallsFromResult(result);
         if (toolCalls.length === 0) {
+            const terminalText = sanitizeVisibleText(extractVisibleOutput(result) || "");
+            const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults);
+            const hasExecutionBeyondTodoPlanning = hasNonTodoToolEvidence(executedToolResults);
+            const recoveryInstruction = terminalText
+                ? resolveExecutionWithoutToolEvidenceTextInstruction(activeRequest, terminalText, false, {
+                    hasToolResultEvidence: hasExecutionBeyondTodoPlanning,
+                    hasIncompletePlanState: hasExecutionBeyondTodoPlanning && hasIncompletePlanState,
+                })
+                : hasIncompletePlanState && hasExecutionBeyondTodoPlanning
+                    ? AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION
+                    : null;
+            if (recoveryInstruction) {
+                if (iteration + 1 === maxToolIterations) {
+                    throw new Error(`Tool-calling loop exceeded the maximum of ${maxToolIterations} iterations`);
+                }
+                activeRequest = appendToolRecoveryInstruction(activeRequest, recoveryInstruction);
+                continue;
+            }
             break;
         }
         if (!canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools)) {