npm - @botbotgo/agent-harness - Versions diffs - 0.0.418 → 0.0.420 - Mend

@botbotgo/agent-harness 0.0.418 → 0.0.420

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/dist/cli/chat-interactive.js +1 -1
package/dist/cli/chat-stream.js +9 -1
package/dist/package-version.d.ts +2 -2
package/dist/package-version.js +2 -2
package/dist/runtime/adapter/compat/openai-compatible.js +12 -0
package/dist/runtime/adapter/flow/invocation-flow.d.ts +2 -0
package/dist/runtime/adapter/flow/invocation-flow.js +13 -5
package/dist/runtime/adapter/flow/invoke-runtime.d.ts +1 -0
package/dist/runtime/adapter/flow/invoke-runtime.js +1 -0
package/dist/runtime/adapter/flow/stream-runtime.d.ts +4 -0
package/dist/runtime/adapter/flow/stream-runtime.js +177 -14
package/dist/runtime/adapter/invocation-result.js +17 -6
package/dist/runtime/adapter/local-tool-invocation.d.ts +2 -1
package/dist/runtime/adapter/local-tool-invocation.js +268 -21
package/dist/runtime/adapter/model/model-providers.js +269 -58
package/dist/runtime/adapter/model/prompted-json-tool-call-capture.d.ts +9 -0
package/dist/runtime/adapter/model/prompted-json-tool-call-capture.js +40 -0
package/dist/runtime/adapter/runtime-adapter-support.js +58 -12
package/dist/runtime/adapter/runtime-shell.js +3 -2
package/dist/runtime/adapter/stream-event-projection.js +22 -5
package/dist/runtime/adapter/tool/tool-arguments.js +157 -67
package/dist/runtime/adapter/tool/tool-replay.js +0 -4
package/dist/runtime/agent-runtime-adapter.d.ts +3 -0
package/dist/runtime/agent-runtime-adapter.js +217 -73
package/dist/runtime/harness/run/stream-run.js +31 -3
package/dist/runtime/parsing/output-tool-args.js +108 -0
package/dist/workspace/resource-compilers.js +17 -4
package/package.json +1 -1

package/dist/runtime/agent-runtime-adapter.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import path from "node:path";
+import { createHash } from "node:crypto";
 import { createAsyncSubAgentMiddleware, createFilesystemMiddleware, createMemoryMiddleware, createPatchToolCallsMiddleware, createSkillsMiddleware, createSummarizationMiddleware, createSubAgentMiddleware, FilesystemBackend, StateBackend, } from "deepagents";
-import { AIMessage, createAgent, createMiddleware, humanInTheLoopMiddleware, todoListMiddleware, ToolMessage } from "langchain";
+import { createAgent, createMiddleware, humanInTheLoopMiddleware, todoListMiddleware, ToolMessage } from "langchain";
 import { sanitizeVisibleText, tryParseJson, wrapResolvedModel, } from "./parsing/output-parsing.js";
 import { salvageJsonToolCalls } from "./parsing/output-tool-args.js";
 import { extractMessageText } from "../utils/message-content.js";
@@ -17,6 +18,7 @@ import { isEmptyFinalAiMessageError, resolveBindingTimeout, resolveStreamIdleTim
 import { createResolvedModel } from "./adapter/model/model-providers.js";
 import { renderDirectWorkspaceListing, shouldDirectlyListWorkspaceFiles } from "./adapter/direct-builtin-utility.js";
 import { appendProviderToolCallAliasTools, resolveAdapterTools } from "./adapter/tool-resolution.js";
+import { normalizeToolArgsForSchema } from "./adapter/tool/tool-arguments.js";
 import { resolveRuntimeStreamExecutionContext, } from "./adapter/flow/execution-context.js";
 import { isRetryableProviderError } from "./adapter/resilience.js";
 import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "./adapter/upstream-configurable-keys.js";
@@ -73,7 +75,9 @@ function isPlanToolName(toolName) {
     return normalized === "write_todos"
         || normalized === "read_todos"
         || normalized === "tool_call_write_todos"
-        || normalized === "tool_call_read_todos";
+        || normalized === "tool_call_read_todos"
+        || normalized === "call_write_todos"
+        || normalized === "call_read_todos";
 }
 function readConfiguredToolName(value) {
     if (typeof value !== "object" || value === null) {
@@ -82,26 +86,6 @@ function readConfiguredToolName(value) {
     const typed = value;
     return typeof typed.name === "string" ? typed.name.trim() : "";
 }
-function createBootstrapTodoPlan(toolNames) {
-    const evidenceToolName = toolNames.find((toolName) => !isPlanToolName(toolName));
-    const contents = evidenceToolName
-        ? [
-            `Run ${evidenceToolName} for the requested evidence`,
-            `Inspect the ${evidenceToolName} result and extract concrete findings`,
-            "Update TODO status from the observed evidence",
-            "Return the final answer grounded in tool output",
-        ]
-        : [
-            "Identify the concrete evidence needed for this request",
-            "Collect and inspect the available evidence",
-            "Update TODO status from the observed evidence",
-            "Return the final answer grounded in evidence",
-        ];
-    return contents.map((content, index) => ({
-        content,
-        status: index === 0 ? "in_progress" : "pending",
-    }));
-}
 function readMessageContentText(message) {
     if (typeof message !== "object" || message === null) {
         return "";
@@ -120,6 +104,12 @@ function readMessageContentText(message) {
         .join("")
         .trim();
 }
+function hasExternalPlanEvidenceInstruction(messages) {
+    return messages.some((message) => {
+        const text = readMessageContentText(message);
+        return /todo board already exists|required todo board already exists|non[-\s]?planning tool call/iu.test(text);
+    });
+}
 function parseToolCallArgs(value) {
     if (typeof value === "object" && value !== null && !Array.isArray(value)) {
         return value;
@@ -182,7 +172,49 @@ function todoToolCallIsTerminal(toolCall) {
         return status !== "pending" && status !== "in_progress";
     });
 }
+function readToolResultName(message) {
+    if (typeof message !== "object" || message === null) {
+        return "";
+    }
+    const typed = message;
+    const messageType = typeof typed.type === "string"
+        ? typed.type
+        : typeof typed._getType === "function"
+            ? String(typed._getType())
+            : "";
+    if (messageType !== "tool" && typeof typed.tool_call_id !== "string") {
+        return "";
+    }
+    return typeof typed.name === "string" ? typed.name : "";
+}
+function createPlanGuardToolError(toolCallId, content, fallbackPrefix) {
+    return new ToolMessage({
+        content,
+        tool_call_id: typeof toolCallId === "string" ? toolCallId : `${fallbackPrefix}-${Math.random().toString(36).slice(2, 10)}`,
+        status: "error",
+    });
+}
+function isPromiseLike(value) {
+    return typeof value === "object" && value !== null && typeof value.then === "function";
+}
+function renderToolInvocationError(error) {
+    const message = error instanceof Error ? error.message : String(error);
+    const cause = typeof error === "object" && error !== null && "cause" in error
+        ? error.cause
+        : undefined;
+    const causeMessage = cause instanceof Error ? cause.message : "";
+    return [message, causeMessage]
+        .map((value) => value.trim())
+        .filter(Boolean)
+        .join("\n");
+}
+function createToolInvocationErrorMessage(toolCallId, toolName, error) {
+    const rendered = renderToolInvocationError(error);
+    return createPlanGuardToolError(toolCallId, `Error invoking tool '${toolName}' with error: ${rendered || "tool invocation failed"}`, "tool-invocation-error");
+}
 function createTodoPlanGuardMiddleware(options = {}) {
+    let observedPlanToolResult = false;
+    let observedNonPlanToolResult = false;
     return createMiddleware({
         name: "harnessTodoPlanGuard",
         wrapToolCall: ((request, handler) => {
@@ -192,34 +224,63 @@ function createTodoPlanGuardMiddleware(options = {}) {
                     ? request.tool.name
                     : "";
             const messages = Array.isArray(request.state?.messages) ? request.state.messages : [];
-            const hasNonPlanToolResult = messages.some((message) => {
-                if (typeof message !== "object" || message === null) {
-                    return false;
-                }
-                const typed = message;
-                const messageType = typeof typed.type === "string"
-                    ? typed.type
-                    : typeof typed._getType === "function"
-                        ? String(typed._getType())
-                        : "";
-                if (messageType !== "tool" && typeof typed.tool_call_id !== "string") {
-                    return false;
-                }
-                const resultToolName = typeof typed.name === "string" ? typed.name : "";
-                return resultToolName.length > 0 && !isPlanToolName(resultToolName);
-            });
+            const toolResultNames = messages.map(readToolResultName).filter((name) => name.length > 0);
+            const hasPlanToolResult = toolResultNames.some(isPlanToolName);
+            const hasNonPlanToolResult = toolResultNames.some((resultToolName) => !isPlanToolName(resultToolName));
+            const hasExternalPlanEvidence = hasExternalPlanEvidenceInstruction(messages);
+            if (options.requiresPlan === true
+                && !(observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
+                && toolName.length > 0
+                && !isPlanToolName(toolName)) {
+                return createPlanGuardToolError(request.toolCall?.id, "Error: this agent has a required visible planning contract. Your first action must be write_todos with concrete task steps and statuses. Do not call non-planning tools before the initial todo board exists.", "todo-initial-plan-guard");
+            }
+            if (options.requiresPlan === true
+                && (observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
+                && !(observedNonPlanToolResult || hasNonPlanToolResult)
+                && isPlanToolName(toolName)) {
+                return createPlanGuardToolError(request.toolCall?.id, "Error: the required todo board already exists, but no non-planning evidence tool has returned yet. Do not call write_todos or read_todos again. Your next action must be exactly one non-planning tool call selected from the available tool descriptions and schemas.", "todo-plan-evidence-guard");
+            }
             if (options.requiresPlan === true
-                && !hasNonPlanToolResult
+                && !(observedNonPlanToolResult || hasNonPlanToolResult)
                 && isPlanToolName(toolName)
                 && normalizePlanToolName(toolName).includes("write_todos")
                 && todoToolCallIsTerminal({ args: parseToolCallArgs(request.toolCall?.args) })) {
-                return new ToolMessage({
-                    content: "Error: write_todos cannot mark every todo as terminal before any non-planning evidence tool returns. Keep one todo in_progress and the remaining todos pending until evidence tools return.",
-                    tool_call_id: typeof request.toolCall?.id === "string" ? request.toolCall.id : `write-todos-tool-guard-${Math.random().toString(36).slice(2, 10)}`,
-                    status: "error",
-                });
+                return createPlanGuardToolError(request.toolCall?.id, "Error: write_todos cannot mark every todo as terminal before any non-planning evidence tool returns. Keep one todo in_progress and the remaining todos pending until evidence tools return.", "write-todos-tool-guard");
+            }
+            const markObservedToolResult = () => {
+                if (isPlanToolName(toolName)) {
+                    observedPlanToolResult = true;
+                }
+                else if (toolName.length > 0) {
+                    observedNonPlanToolResult = true;
+                }
+            };
+            const normalizedArgs = normalizeToolArgsForSchema(parseToolCallArgs(request.toolCall?.args), request.tool?.schema, request.toolCall?.args);
+            const normalizedRequest = {
+                ...request,
+                toolCall: request.toolCall
+                    ? {
+                        ...request.toolCall,
+                        args: normalizedArgs,
+                    }
+                    : request.toolCall,
+            };
+            try {
+                const result = handler(normalizedRequest);
+                if (isPromiseLike(result)) {
+                    return result
+                        .then((value) => {
+                        markObservedToolResult();
+                        return value;
+                    })
+                        .catch((error) => createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error));
+                }
+                markObservedToolResult();
+                return result;
+            }
+            catch (error) {
+                return createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error);
             }
-            return handler(request);
         }),
         afterModel: (state) => {
             if (!Array.isArray(state.messages) || state.messages.length === 0) {
@@ -246,22 +307,8 @@ function createTodoPlanGuardMiddleware(options = {}) {
             }
             const lastAiMessage = [...state.messages].reverse().find((message) => readMessageToolCalls(message).length > 0);
             const lastToolCalls = readMessageToolCalls(lastAiMessage);
-            if (!lastAiMessage && options.requiresPlan === true) {
-                const latestMessage = state.messages.at(-1);
-                const hasVisibleContent = readMessageContentText(latestMessage).length > 0;
-                if (!hasVisibleContent) {
-                    return {
-                        messages: [new AIMessage({
-                                content: "",
-                                tool_calls: [{
-                                        id: `write-todos-bootstrap-${Math.random().toString(36).slice(2, 10)}`,
-                                        name: "write_todos",
-                                        args: { todos: createBootstrapTodoPlan(options.toolNames ?? []) },
-                                        type: "tool_call",
-                                    }],
-                            })],
-                    };
-                }
+            if (!lastAiMessage) {
+                return;
             }
             const writeTodosCalls = lastToolCalls.filter((toolCall) => isPlanToolName(toolCall.name));
             const prematureCompletedCalls = writeTodosCalls.filter(todoToolCallIsTerminal);
@@ -442,6 +489,61 @@ function hasDelegatedPlanEvidence(result) {
     return Array.isArray(toolResults)
         && toolResults.some((item) => isPlanToolName(item.toolName));
 }
+function readUpstreamToolEvidence(event) {
+    if (typeof event !== "object" || event === null) {
+        return null;
+    }
+    const typed = event;
+    const eventName = typeof typed.event === "string" ? typed.event : "";
+    const runType = typeof typed.run_type === "string" ? typed.run_type : "";
+    const toolName = typeof typed.name === "string" ? typed.name : "";
+    if (!toolName) {
+        return null;
+    }
+    const isToolStart = eventName === "on_tool_start" || (eventName === "on_chain_start" && runType === "tool");
+    if (isToolStart && isPlanToolName(toolName)) {
+        return { toolName, output: typed.data?.input };
+    }
+    const isToolEnd = eventName === "on_tool_end" || (eventName === "on_chain_end" && runType === "tool");
+    if (isToolEnd) {
+        return { toolName, output: typed.data?.output };
+    }
+    const isToolError = eventName === "on_tool_error";
+    if (isToolError) {
+        return { toolName, output: typed.data?.error ?? typed.data?.output, isError: true };
+    }
+    return null;
+}
+function appendUniqueToolEvidence(executedToolResults, evidence) {
+    const exists = executedToolResults.some((item) => item.toolName === evidence.toolName
+        && item.isError === evidence.isError
+        && JSON.stringify(item.output) === JSON.stringify(evidence.output));
+    if (!exists) {
+        executedToolResults.push(evidence);
+    }
+}
+function mergeDelegatedResultToolEvidence(result, previous) {
+    const merged = [];
+    for (const source of [previous, result]) {
+        const toolResults = Array.isArray(source.metadata?.executedToolResults)
+            ? source.metadata.executedToolResults
+            : [];
+        for (const toolResult of toolResults) {
+            if (typeof toolResult === "object"
+                && toolResult !== null
+                && typeof toolResult.toolName === "string") {
+                appendUniqueToolEvidence(merged, toolResult);
+            }
+        }
+    }
+    return {
+        ...result,
+        metadata: {
+            ...(result.metadata ?? {}),
+            executedToolResults: merged,
+        },
+    };
+}
 const DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION = [
     "The delegated task requires visible TODO planning evidence.",
     "Before any other tool call or final answer, call write_todos with concrete task steps and statuses.",
@@ -966,14 +1068,8 @@ export class AgentRuntimeAdapter {
         const inlineSubagents = input.resolvedSubagents.filter((subagent) => !("graphId" in subagent));
         const asyncSubagents = input.resolvedSubagents.filter((subagent) => "graphId" in subagent);
         const subagents = inlineSubagents;
-        const requiresPlan = binding.harnessRuntime.executionContract?.requiresPlan === true;
-        const resolvedToolNames = input.resolvedTools.map(readConfiguredToolName).filter((name) => name.length > 0);
         const middleware = [
             ...(builtinTools.todos === false ? [] : [todoListMiddleware()]),
-            ...(builtinTools.todos === false ? [] : [createTodoPlanGuardMiddleware({
-                    requiresPlan,
-                    toolNames: resolvedToolNames,
-                })]),
             ...(input.resolvedSkills.length > 0 ? [createSkillsMiddleware({
                     backend,
                     sources: resolveDeepAgentSkillSourceRootPaths({
@@ -1029,7 +1125,22 @@ export class AgentRuntimeAdapter {
             ? filesystemConfig.sessionStorage
             : undefined;
         const sessionScoped = sessionStorage?.enabled === true;
-        return `${binding.agent.sourcePath}::${sessionScoped ? (sessionId ?? "__default__") : "__binding__"}`;
+        const executionParams = getBindingExecutionParams(binding);
+        const primaryModel = getBindingPrimaryModel(binding);
+        const runnableFingerprint = createHash("sha256").update(JSON.stringify({
+            executionKind: getBindingExecutionKind(binding),
+            systemPrompt: getBindingSystemPrompt(binding) ?? "",
+            responseFormat: executionParams && "responseFormat" in executionParams ? executionParams.responseFormat : undefined,
+            model: primaryModel
+                ? {
+                    id: primaryModel.id,
+                    provider: primaryModel.provider,
+                    model: primaryModel.model,
+                }
+                : undefined,
+            tools: getBindingPrimaryTools(binding).map((tool) => tool.name).filter(Boolean).sort(),
+        })).digest("hex").slice(0, 16);
+        return `${binding.agent.sourcePath}::${sessionScoped ? (sessionId ?? "__default__") : "__binding__"}::${runnableFingerprint}`;
     }
     async create(binding, options = {}) {
         const cacheKey = this.buildRunnableCacheKey(binding, options.sessionId ?? options.legacySessionId);
@@ -1280,11 +1391,12 @@ export class AgentRuntimeAdapter {
         if (!selectedBinding) {
             return null;
         }
-        const runDelegatedRequest = (text, requestSuffix = "") => this.invoke(selectedBinding, text, sessionId, `${requestId}:${subagentType}${requestSuffix}`, undefined, [], {
+        const runDelegatedRequest = (text, requestSuffix = "", delegatedOptions = {}) => this.invoke(selectedBinding, text, sessionId, `${requestId}:${subagentType}${requestSuffix}`, undefined, [], {
             context: options.context,
             state: options.state,
             files: options.files,
             memoryContext: options.memoryContext,
+            ...delegatedOptions,
         });
         let delegatedResult;
         try {
@@ -1349,7 +1461,12 @@ export class AgentRuntimeAdapter {
         }
         if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
             try {
-                delegatedResult = await runDelegatedRequest([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry");
+                delegatedResult = await runDelegatedRequest([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry", selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
+                    ? {
+                        suppressInitialRequiredPlanInstruction: true,
+                        externalPlanEvidence: hasDelegatedPlanEvidence(delegatedResult),
+                    }
+                    : {});
             }
             catch (error) {
                 const output = error instanceof Error ? error.message : String(error);
@@ -1648,12 +1765,18 @@ export class AgentRuntimeAdapter {
                         continue;
                     }
                     if (chunk.kind === "tool-result") {
-                        executedToolResults.push({
+                        appendUniqueToolEvidence(executedToolResults, {
                             toolName: chunk.toolName,
                             output: chunk.output,
                             ...(chunk.isError !== undefined ? { isError: chunk.isError } : {}),
                         });
                     }
+                    if (chunk.kind === "upstream-event") {
+                        const streamedEvidence = readUpstreamToolEvidence(chunk.event);
+                        if (streamedEvidence) {
+                            appendUniqueToolEvidence(executedToolResults, streamedEvidence);
+                        }
+                    }
                     yield { ...chunk, agentId: chunk.agentId ?? selectedBinding.agent.id };
                 }
             }
@@ -1683,10 +1806,12 @@ export class AgentRuntimeAdapter {
         const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(selectedBinding).length > 0;
         if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
             && !hasDelegatedPlanEvidence(delegatedResult)) {
-            delegatedResult = yield* runDelegatedStreamAttempt([requestText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry");
+            const previousDelegatedResult = delegatedResult;
+            delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([requestText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry"), previousDelegatedResult);
         }
         if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
-            delegatedResult = yield* runDelegatedStreamAttempt([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry");
+            const previousDelegatedResult = delegatedResult;
+            delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
         }
         if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
             && !hasDelegatedPlanEvidence(delegatedResult)) {
@@ -1707,6 +1832,25 @@ export class AgentRuntimeAdapter {
                 finalMessageText: output,
             };
         }
+        const delegatedToolResults = Array.isArray(delegatedResult.metadata?.executedToolResults)
+            ? delegatedResult.metadata.executedToolResults
+            : [];
+        for (const toolResult of delegatedToolResults) {
+            const toolName = typeof toolResult.toolName === "string" ? toolResult.toolName : "";
+            if (!toolName || isPlanToolName(toolName)) {
+                continue;
+            }
+            yield {
+                kind: "commentary",
+                content: `Running tool ${toolName}.`,
+                agentId: selectedBinding.agent.id,
+            };
+            yield {
+                kind: "commentary",
+                content: `Tool ${toolName} ${toolResult.isError === true ? "failed" : "completed"}.`,
+                agentId: selectedBinding.agent.id,
+            };
+        }
         return {
             toolOutput: resolveDelegatedResultOutput(delegatedResult),
             delegatedSubagentType: subagentType,

package/dist/runtime/harness/run/stream-run.js CHANGED Viewed

@@ -62,6 +62,12 @@ function inferPlanItemStatusFromTerminalAssistantOutput(value) {
     }
     return isSubstantiveTerminalAssistantOutput(value) ? "completed" : null;
 }
+function mapTerminalStatusToObservedPlanItemStatus(status, sawSuccessfulToolResult) {
+    if (status === "blocked" && sawSuccessfulToolResult) {
+        return "completed";
+    }
+    return mapTerminalStatusToPlanItemStatus(status);
+}
 function reconcilePlanStateToTerminalStatus(planState, status, updatedAt) {
     const items = planState.items.map((item) => ({
         ...item,
@@ -545,6 +551,17 @@ function createProfileStepCommentary(step) {
     if (step.kind === "agent" && step.action === "startup") {
         return `Preparing ${name}.`;
     }
+    if (step.kind === "tool") {
+        if (step.status === "started") {
+            return `Running tool ${name}.`;
+        }
+        if (step.status === "completed") {
+            return `Tool ${name} completed.`;
+        }
+        if (step.status === "failed") {
+            return `Tool ${name} failed.`;
+        }
+    }
     return null;
 }
 function isOpenAICompatibleStreamingCompatibilityError(binding, error) {
@@ -1016,7 +1033,7 @@ export async function* streamHarnessRun(options) {
             currentPlanState = await refreshPlanStateFromPersistence(options, currentPlanState);
             const terminalStructuredStatus = readTerminalExecutionStatus(actual.structuredResponse);
             if (terminalStructuredStatus && currentPlanState && planStateHasActiveItems(currentPlanState)) {
-                const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, mapTerminalStatusToPlanItemStatus(terminalStructuredStatus), new Date().toISOString());
+                const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, mapTerminalStatusToObservedPlanItemStatus(terminalStructuredStatus, sawSuccessfulToolResult), new Date().toISOString());
                 const signature = buildPlanStateSignature(reconciledPlanState);
                 if (signature !== lastPlanStateSignature) {
                     const previousPlanState = currentPlanState;
@@ -1040,7 +1057,18 @@ export async function* streamHarnessRun(options) {
             }
         }
         currentPlanState = await refreshPlanStateFromPersistence(options, currentPlanState);
-        const terminalAssistantPlanItemStatus = inferPlanItemStatusFromTerminalAssistantOutput(assistantOutput);
+        const explicitTerminalAssistantStatus = readTerminalExecutionStatus(assistantOutput);
+        let terminalAssistantPlanItemStatus = inferPlanItemStatusFromTerminalAssistantOutput(assistantOutput);
+        if (explicitTerminalAssistantStatus === "blocked" && sawSuccessfulToolResult) {
+            terminalAssistantPlanItemStatus = "completed";
+        }
+        if (terminalAssistantPlanItemStatus === "failed"
+            && sawSuccessfulToolResult
+            && !explicitTerminalAssistantStatus
+            && !/^\s*terminated\b|\bBlockers?:\b|(?:委托执行失败|未能完成|无法完成)/iu.test(assistantOutput)
+            && assistantOutput.trim()) {
+            terminalAssistantPlanItemStatus = "completed";
+        }
         if (terminalAssistantPlanItemStatus && currentPlanState && planStateHasActiveItems(currentPlanState)) {
             const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, terminalAssistantPlanItemStatus, new Date().toISOString());
             const signature = buildPlanStateSignature(reconciledPlanState);
@@ -1116,7 +1144,7 @@ export async function* streamHarnessRun(options) {
         const canUseDeterministicToolEvidenceOutput = !currentPlanState || !planStateHasActiveItems(currentPlanState) || Boolean(terminalStructuredStatus);
         if (!assistantOutput && sawSuccessfulToolResult && deterministicToolEvidenceOutput && canUseDeterministicToolEvidenceOutput) {
             if (terminalStructuredStatus && currentPlanState && planStateHasActiveItems(currentPlanState)) {
-                const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, mapTerminalStatusToPlanItemStatus(terminalStructuredStatus), new Date().toISOString());
+                const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, mapTerminalStatusToObservedPlanItemStatus(terminalStructuredStatus, sawSuccessfulToolResult), new Date().toISOString());
                 const signature = buildPlanStateSignature(reconciledPlanState);
                 if (signature !== lastPlanStateSignature) {
                     const previousPlanState = currentPlanState;

package/dist/runtime/parsing/output-tool-args.js CHANGED Viewed

@@ -309,6 +309,89 @@ function normalizePythonLikeJson(value) {
     }
     return output;
 }
+function repairMissingArrayObjectOpenBraces(value) {
+    let output = "";
+    let changed = false;
+    let inString = false;
+    let escaping = false;
+    const stack = [];
+    for (let index = 0; index < value.length; index += 1) {
+        const char = value[index];
+        if (inString) {
+            output += char;
+            if (escaping) {
+                escaping = false;
+                continue;
+            }
+            if (char === "\\") {
+                escaping = true;
+                continue;
+            }
+            if (char === "\"") {
+                inString = false;
+            }
+            continue;
+        }
+        if (char === "\"") {
+            output += char;
+            inString = true;
+            continue;
+        }
+        if (char === "{" || char === "[") {
+            stack.push(char);
+            output += char;
+            continue;
+        }
+        if (char === "}" || char === "]") {
+            const expectedOpen = char === "}" ? "{" : "[";
+            if (stack.at(-1) === expectedOpen) {
+                stack.pop();
+            }
+            output += char;
+            continue;
+        }
+        if (char !== "," || stack.at(-1) !== "[") {
+            output += char;
+            continue;
+        }
+        output += char;
+        let lookahead = index + 1;
+        while (lookahead < value.length && /\s/u.test(value[lookahead] ?? "")) {
+            output += value[lookahead];
+            lookahead += 1;
+        }
+        if (value[lookahead] !== "\"") {
+            index = lookahead - 1;
+            continue;
+        }
+        let cursor = lookahead + 1;
+        let keyEscaping = false;
+        while (cursor < value.length) {
+            const next = value[cursor];
+            if (keyEscaping) {
+                keyEscaping = false;
+            }
+            else if (next === "\\") {
+                keyEscaping = true;
+            }
+            else if (next === "\"") {
+                break;
+            }
+            cursor += 1;
+        }
+        let colonCursor = cursor + 1;
+        while (colonCursor < value.length && /\s/u.test(value[colonCursor] ?? "")) {
+            colonCursor += 1;
+        }
+        if (value[colonCursor] === ":") {
+            output += "{";
+            stack.push("{");
+            changed = true;
+        }
+        index = lookahead - 1;
+    }
+    return changed ? output : null;
+}
 export function salvageToolArgs(value) {
     if (typeof value === "object" && value && !Array.isArray(value)) {
         return value;
@@ -359,6 +442,13 @@ export function salvageJsonToolCalls(value) {
             if (direct) {
                 return direct;
             }
+            const repairedArrayObjects = repairMissingArrayObjectOpenBraces(trimmed);
+            if (repairedArrayObjects) {
+                const parsed = tryParseJson(repairedArrayObjects);
+                if (parsed) {
+                    return parsed;
+                }
+            }
             const pythonLike = normalizePythonLikeJson(trimmed);
             if (pythonLike) {
                 const parsed = tryParseJson(pythonLike);
@@ -366,6 +456,15 @@ export function salvageJsonToolCalls(value) {
                     return parsed;
                 }
             }
+            if (pythonLike) {
+                const repairedPythonLike = repairMissingArrayObjectOpenBraces(pythonLike);
+                if (repairedPythonLike) {
+                    const parsed = tryParseJson(repairedPythonLike);
+                    if (parsed) {
+                        return parsed;
+                    }
+                }
+            }
             const closed = closeJsonContainerSuffix(trimmed);
             if (closed) {
                 const parsed = tryParseJson(closed);
@@ -373,6 +472,15 @@ export function salvageJsonToolCalls(value) {
                     return parsed;
                 }
             }
+            if (repairedArrayObjects) {
+                const closedRepaired = closeJsonContainerSuffix(repairedArrayObjects);
+                if (closedRepaired) {
+                    const parsed = tryParseJson(closedRepaired);
+                    if (parsed) {
+                        return parsed;
+                    }
+                }
+            }
             const embeddedObject = extractBalancedJsonObject(trimmed);
             if (embeddedObject) {
                 const parsed = tryParseJson(embeddedObject);