npm - @botbotgo/agent-harness - Versions diffs - 0.0.359 → 0.0.362 - Mend

@botbotgo/agent-harness 0.0.359 → 0.0.362

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/README.md +2 -0
package/README.zh.md +2 -0
package/dist/config/catalogs/response-formats.yaml +43 -0
package/dist/config/runtime/workspace.yaml +8 -0
package/dist/contracts/runtime-requests.d.ts +19 -0
package/dist/contracts/workspace.d.ts +6 -0
package/dist/package-version.d.ts +2 -2
package/dist/package-version.js +2 -2
package/dist/projections/request-events.d.ts +1 -0
package/dist/projections/request-events.js +97 -45
package/dist/protocol/acp/harness-client.js +2 -3
package/dist/runtime/adapter/flow/invocation-flow.js +26 -1
package/dist/runtime/adapter/flow/stream-runtime.js +117 -94
package/dist/runtime/adapter/invocation-result.js +15 -0
package/dist/runtime/adapter/middleware-assembly.js +25 -3
package/dist/runtime/adapter/tool/builtin-middleware-tools.d.ts +5 -0
package/dist/runtime/adapter/tool/builtin-middleware-tools.js +30 -6
package/dist/runtime/agent-runtime-adapter.d.ts +1 -0
package/dist/runtime/agent-runtime-adapter.js +174 -28
package/dist/runtime/harness/events/streaming.js +2 -3
package/dist/workspace/agent-binding-compiler.js +90 -12
package/dist/workspace/compile.js +1 -0
package/dist/workspace/framework-contract-validation.d.ts +2 -1
package/dist/workspace/framework-contract-validation.js +77 -5
package/dist/workspace/object-loader.js +9 -0
package/dist/workspace/support/workspace-ref-utils.d.ts +1 -0
package/dist/workspace/support/workspace-ref-utils.js +40 -0
package/dist/workspace/yaml-object-reader.js +13 -9
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -94,6 +94,8 @@ agent-harness "Inspect this workspace and explain the main entry points."
 When the runtime emits request-scoped `plan-state` updates and safe `progress.commentary` deltas, the chat shell now renders live todo-board and progress status updates directly in the terminal, so you can watch planning and execution status change during the run instead of waiting for the final response.
 Those progress callouts now stay tied to stable runtime surfaces such as plan-state, tool start/completion, memory recall, and agent delegation, so the operator sees Codex-style intermediate status without exposing private model reasoning.
+Streaming data listeners also receive structured `plan.state`, per-item `plan.step`, and normalized `execution.step` events, so applications can render every planning and execution transition without parsing assistant text or raw upstream debug events.
+The bundled runtime now also provides a generic `response-format/default-report` structured-output default for agents; workspaces can replace it through `Runtime.spec.defaults.agent.config.responseFormatRef`, individual agents can extend it with inline `responseFormat`, replace it with `responseFormatRef`, or set `responseFormat: null`.
 The repository default `orchestra` host is also instructed to start real multi-step execution from the task you already gave it, call `write_todos` before non-trivial tool work, and keep that todo board updated while it runs.
 Durable-memory writes now also retrieve related existing records through the configured vector store before model reconciliation, then merge those semantic hits with deterministic matching so updates and deletes can target the right knowledge identity instead of creating nearby duplicate facts.

package/README.zh.md CHANGED Viewed

@@ -92,6 +92,8 @@ agent-harness "Inspect this workspace and explain the main entry points."
 当 runtime 发出 request 级 `plan-state` 更新以及安全的 `progress.commentary` 增量时，chat shell 现在会直接在终端里渲染实时 todo board 和进度播报，因此你可以在执行过程中看到规划和状态变化，而不必等到最终回复。
 这些进度播报现在会继续绑定在稳定的 runtime surface 上，例如 plan-state、tool 开始/完成、memory recall 与 agent delegation，因此 operator 可以看到类似 Codex 的清晰中间状态，但不会暴露私有模型推理。
+Streaming data listener 也会收到结构化的 `plan.state`、逐条 `plan.step` 和归一化的 `execution.step` 事件，因此应用可以渲染所有 plan / execution 转折，而不必解析 assistant 文本或 raw upstream debug event。
+随包 runtime 现在也提供通用的 `response-format/default-report` 作为 agent structured-output 默认值；workspace 可以通过 `Runtime.spec.defaults.agent.config.responseFormatRef` 替换它，单个 agent 可以用 inline `responseFormat` 扩展、用 `responseFormatRef` 替换，或设置 `responseFormat: null` 关闭。
 仓库默认的 `orchestra` host 现在也会被明确要求：对已经给清楚的多步任务不要再反问，而是直接开始执行；在非平凡工具工作前先调用 `write_todos`，并在运行过程中持续维护这块 todo board。
 durable memory 的写入现在也会在模型做 mutation reconciliation 之前，先通过配置好的 vector store 检索相关旧知识，再和确定性匹配结果合并，因此 update / delete 更容易命中正确的 knowledge identity，而不是生成几条相近但彼此独立的 fact。

package/dist/config/catalogs/response-formats.yaml ADDED Viewed

@@ -0,0 +1,43 @@
+# agent-harness feature: schema version for reusable response-format presets.
+apiVersion: agent-harness/v1alpha1
+# agent-harness feature: object type for named structured-output response format presets.
+kind: ResponseFormats
+spec:
+  - kind: ResponseFormat
+    name: default-report
+    description: Generic structured report for agent results. Workspaces and agents can override or disable it.
+    format:
+      type: object
+      properties:
+        status:
+          type: string
+          enum:
+            - completed
+            - blocked
+            - failed
+            - refused
+        summary:
+          type: array
+          items:
+            type: string
+        findings:
+          type: array
+          items:
+            type: string
+        blockers:
+          type: array
+          items:
+            type: string
+        nextActions:
+          type: array
+          items:
+            type: string
+        report:
+          type: string
+      required:
+        - status
+        - summary
+        - findings
+        - blockers
+        - nextActions
+        - report

package/dist/config/runtime/workspace.yaml CHANGED Viewed

@@ -45,6 +45,14 @@ spec:
     skills:
       - file://./resources/skills
+  # agent-harness feature: default agent execution config used when a workspace or agent does not override it.
+  # The bundled default keeps first-run agent outputs parseable while still allowing projects and individual agents
+  # to replace it with their own responseFormatRef, inline responseFormat, or `responseFormat: null`.
+  defaults:
+    agent:
+      config:
+        responseFormatRef: response-format/default-report
   # agent-harness feature: runtime-level task queue and maximum number of concurrent requests.
   # Additional requests wait in the harness queue until a slot becomes available.
   concurrency:

package/dist/contracts/runtime-requests.d.ts CHANGED Viewed

@@ -121,6 +121,25 @@ export type RequestDataEvent = {
     requestId: string;
     agentId: string;
     text: string;
+} | {
+    type: "plan.state";
+    sessionId: string;
+    requestId: string;
+    agentId: string;
+    planState: RequestPlanState;
+} | {
+    type: "plan.step";
+    sessionId: string;
+    requestId: string;
+    agentId: string;
+    planStateVersion: number;
+    index: number;
+    item: RequestPlanItem;
+} | {
+    type: "execution.step";
+    sessionId: string;
+    requestId: string;
+    step: RequestExecutionStep;
 } | {
     type: "output.content-blocks";
     sessionId: string;

package/dist/contracts/workspace.d.ts CHANGED Viewed

@@ -74,6 +74,12 @@ export type ParsedVectorStoreObject = {
     metadata?: Record<string, unknown>;
     sourcePath: string;
 };
+export type ParsedResponseFormatObject = {
+    id: string;
+    description?: string;
+    format: unknown;
+    sourcePath: string;
+};
 export type ParsedMcpServerObject = {
     id: string;
     transport: "stdio" | "http" | "sse" | "websocket";

package/dist/package-version.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export declare const AGENT_HARNESS_VERSION = "0.0.359";
-export declare const AGENT_HARNESS_RELEASE_DATE = "2026-04-25";
+export declare const AGENT_HARNESS_VERSION = "0.0.362";
+export declare const AGENT_HARNESS_RELEASE_DATE = "2026-04-27";

package/dist/package-version.js CHANGED Viewed

@@ -1,2 +1,2 @@
-export const AGENT_HARNESS_VERSION = "0.0.359";
-export const AGENT_HARNESS_RELEASE_DATE = "2026-04-25";
+export const AGENT_HARNESS_VERSION = "0.0.362";
+export const AGENT_HARNESS_RELEASE_DATE = "2026-04-27";

package/dist/projections/request-events.d.ts CHANGED Viewed

@@ -59,5 +59,6 @@ export type RequestSnapshotStreamItem = StreamEventItem | {
 } | StreamContentItem | StreamContentBlocksItem | StreamToolResultItem | StreamPlanStateItem | StreamUpstreamEventItem | StreamProfileStepItem | StreamResultItem;
 export declare function createInitialRequestEventSnapshot(): RequestEventSnapshot;
 export declare function applyRequestStreamItemToSnapshot(snapshot: RequestEventSnapshot, item: RequestSnapshotStreamItem): RequestEventSnapshot;
+export declare function toRequestDataEvents(item: RequestSnapshotStreamItem): RequestDataEvent[];
 export declare function toRequestDataEvent(item: RequestSnapshotStreamItem): RequestDataEvent | null;
 export {};

package/dist/projections/request-events.js CHANGED Viewed

@@ -393,58 +393,110 @@ export function applyRequestStreamItemToSnapshot(snapshot, item) {
             };
     }
 }
-export function toRequestDataEvent(item) {
+function createSurfaceExecutionStep(surfaceItem) {
+    return {
+        id: surfaceItem.id,
+        kind: surfaceItem.kind,
+        name: surfaceItem.name,
+        action: surfaceItem.action,
+        status: surfaceItem.status,
+        ...(surfaceItem.agentId ? { agentId: surfaceItem.agentId } : {}),
+        ...(surfaceItem.agentName ? { agentName: surfaceItem.agentName } : {}),
+        ...(surfaceItem.ownerAgentId ? { ownerAgentId: surfaceItem.ownerAgentId } : {}),
+        ...(surfaceItem.ownerAgentName ? { ownerAgentName: surfaceItem.ownerAgentName } : {}),
+        ...(surfaceItem.sourceEventId ? { sourceEventId: surfaceItem.sourceEventId } : {}),
+        ...(surfaceItem.detail ? { detail: surfaceItem.detail } : {}),
+    };
+}
+export function toRequestDataEvents(item) {
     switch (item.type) {
         case "commentary":
-            return {
-                type: "progress.commentary",
-                sessionId: item.sessionId,
-                requestId: item.requestId,
-                agentId: item.agentId,
-                text: item.content,
-            };
+            return [{
+                    type: "progress.commentary",
+                    sessionId: item.sessionId,
+                    requestId: item.requestId,
+                    agentId: item.agentId,
+                    text: item.content,
+                }];
         case "content":
-            return {
-                type: "output.text.delta",
-                sessionId: item.sessionId,
-                requestId: item.requestId,
-                agentId: item.agentId,
-                text: item.content,
-            };
+            return [{
+                    type: "output.text.delta",
+                    sessionId: item.sessionId,
+                    requestId: item.requestId,
+                    agentId: item.agentId,
+                    text: item.content,
+                }];
         case "content-blocks":
-            return {
-                type: "output.content-blocks",
-                sessionId: item.sessionId,
-                requestId: item.requestId,
-                agentId: item.agentId,
-                contentBlocks: item.contentBlocks,
-            };
+            return [{
+                    type: "output.content-blocks",
+                    sessionId: item.sessionId,
+                    requestId: item.requestId,
+                    agentId: item.agentId,
+                    contentBlocks: item.contentBlocks,
+                }];
         case "tool-result":
-            return {
-                type: "tool.result",
-                sessionId: item.sessionId,
-                requestId: item.requestId,
-                agentId: item.agentId,
-                toolName: item.toolName,
-                output: summarizeLargeDataEventOutput(item.output),
-                ...(item.isError !== undefined ? { isError: item.isError } : {}),
-            };
+            return [{
+                    type: "tool.result",
+                    sessionId: item.sessionId,
+                    requestId: item.requestId,
+                    agentId: item.agentId,
+                    toolName: item.toolName,
+                    output: summarizeLargeDataEventOutput(item.output),
+                    ...(item.isError !== undefined ? { isError: item.isError } : {}),
+                }];
+        case "plan-state":
+            return [
+                {
+                    type: "plan.state",
+                    sessionId: item.sessionId,
+                    requestId: item.requestId,
+                    agentId: item.agentId,
+                    planState: item.planState,
+                },
+                ...item.planState.items.map((planItem, index) => ({
+                    type: "plan.step",
+                    sessionId: item.sessionId,
+                    requestId: item.requestId,
+                    agentId: item.agentId,
+                    planStateVersion: item.planState.version,
+                    index,
+                    item: planItem,
+                })),
+            ];
         case "upstream-event":
-            return {
-                type: "debug.upstream",
-                sessionId: item.sessionId,
-                requestId: item.requestId,
-                ...(item.surfaceItem ? { surfaceItem: item.surfaceItem } : {}),
-                event: item.event,
-            };
+            return [
+                ...(item.surfaceItem
+                    ? [{
+                            type: "execution.step",
+                            sessionId: item.sessionId,
+                            requestId: item.requestId,
+                            step: createSurfaceExecutionStep(item.surfaceItem),
+                        }]
+                    : []),
+                {
+                    type: "debug.upstream",
+                    sessionId: item.sessionId,
+                    requestId: item.requestId,
+                    ...(item.surfaceItem ? { surfaceItem: item.surfaceItem } : {}),
+                    event: item.event,
+                },
+            ];
         case "profile-step":
-            return {
-                type: "debug.profile",
-                sessionId: item.sessionId,
-                requestId: item.requestId,
-                step: item.step,
-            };
+            return [{
+                    type: "execution.step",
+                    sessionId: item.sessionId,
+                    requestId: item.requestId,
+                    step: item.step,
+                }, {
+                    type: "debug.profile",
+                    sessionId: item.sessionId,
+                    requestId: item.requestId,
+                    step: item.step,
+                }];
         default:
-            return null;
+            return [];
     }
 }
+export function toRequestDataEvent(item) {
+    return toRequestDataEvents(item)[0] ?? null;
+}

package/dist/protocol/acp/harness-client.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { createAcpHttpClient, createAcpStdioClient, } from "./client.js";
-import { applyRequestStreamItemToSnapshot, createInitialRequestEventSnapshot, toRequestDataEvent, } from "../../projections/request-events.js";
+import { applyRequestStreamItemToSnapshot, createInitialRequestEventSnapshot, toRequestDataEvents, } from "../../projections/request-events.js";
 function toEvent(notification) {
     return notification.params.event;
 }
@@ -144,8 +144,7 @@ export class AcpHarnessClient {
             else if (item.type === "result") {
                 finalResult = item.result;
             }
-            const dataEvent = toRequestDataEvent(item);
-            if (dataEvent) {
+            for (const dataEvent of toRequestDataEvents(item)) {
                 await dataListener?.(dataEvent);
             }
             await eventListener?.(snapshot);

package/dist/runtime/adapter/flow/invocation-flow.js CHANGED Viewed

@@ -7,7 +7,7 @@ import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "../ups
 import { appendToolRecoveryInstruction, extractVisibleOutput, tryParseJson } from "../../parsing/output-parsing.js";
 import { salvageJsonToolCalls } from "../../parsing/output-tool-args.js";
 import { isEmptyFinalAiMessageError } from "../resilience.js";
-import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION } from "../../prompts/runtime-prompts.js";
+import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
 function readBindingExecutionParams(binding) {
     const params = binding.execution?.params ?? binding.deepAgentParams ?? binding.langchainAgentParams;
     return {
@@ -76,6 +76,17 @@ function hasNativeTaskDelegationIntent(value) {
     }
     return hasNativeTaskDelegationIntent(typed.tool_calls) || hasNativeTaskDelegationIntent(typed.messages);
 }
+function looksLikeCapabilityRefusalWithoutEvidence(value) {
+    const text = extractVisibleOutput(value).trim();
+    if (!text) {
+        return false;
+    }
+    const refusalSignal = /(?:cannot|can't|unable to|do not have|don't have|not support|does not support|missing capabilities|tool limitation|skill limitation|capability limitation|out of scope|无法|不能|不支持|缺少能力|能力不足|超出范围)/iu.test(text);
+    if (!refusalSignal) {
+        return false;
+    }
+    return /(?:tool|tools|skill|skills|capabilit|scope|工具|技能|能力|范围)/iu.test(text);
+}
 function readStructuredToolCall(value) {
     const salvaged = salvageJsonToolCalls(value)[0];
     if (salvaged) {
@@ -344,6 +355,20 @@ export async function executeRequestInvocation(options) {
         result = recoveredInvocation.result;
         executedToolResults.splice(0, executedToolResults.length, ...recoveredInvocation.executedToolResults);
     }
+    if (options.resumePayload === undefined
+        && primaryTools.length > 0
+        && executedToolResults.length === 0
+        && looksLikeCapabilityRefusalWithoutEvidence(result)) {
+        const messages = Array.isArray(result.messages)
+            ? result.messages
+            : undefined;
+        const recoveryBase = messages ? { messages } : request;
+        const recoveredRequest = appendToolRecoveryInstruction(recoveryBase, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION);
+        const recoveredInvocation = await invokeOnce(recoveredRequest);
+        localOrUpstreamInvocation = recoveredInvocation;
+        result = recoveredInvocation.result;
+        executedToolResults.splice(0, executedToolResults.length, ...recoveredInvocation.executedToolResults);
+    }
     try {
         return finalizeRequestResult({
             bindingAgentId: options.binding.agent.id,

package/dist/runtime/adapter/flow/stream-runtime.js CHANGED Viewed

@@ -125,6 +125,15 @@ function isDelegationOnlyBinding(binding) {
     const skillRefs = agent?.skillPathRefs ?? [];
     return configuredSubagents.length > 0 && configuredTools.length === 0 && skillRefs.length === 0;
 }
+function isOpenAICompatibleStreamingCompatibilityError(binding, error) {
+    const params = binding.execution?.params ?? binding.deepAgentParams ?? binding.langchainAgentParams;
+    const model = params?.model;
+    if (model?.provider !== "openai-compatible") {
+        return false;
+    }
+    const message = error instanceof Error ? error.message : String(error);
+    return message.toLowerCase().includes("received empty response from chat model call");
+}
 function hasDelegationEvidence(evidence) {
     return (evidence.hasSuccessfulTaskToolEvidence
         || evidence.hasOpenTaskDelegation
@@ -401,106 +410,120 @@ export async function* streamRuntimeExecution(options) {
                         status: "failed",
                         error,
                     });
-                throw error;
+                if (!emittedUnsafeStreamSideEffects
+                    && isOpenAICompatibleStreamingCompatibilityError(options.binding, error)) {
+                    deferredStreamContent.length = 0;
+                }
+                else {
+                    throw error;
+                }
             }
-            const streamEventsConsume = startProfileStep({
-                id: "profile:agent:stream-events-consume",
-                kind: "agent",
-                name: "streamEvents",
-                action: "consume",
-            });
-            if (shouldProfile)
-                yield streamEventsConsume.chunk;
-            try {
-                for await (const event of options.iterateWithTimeout(events, options.streamIdleTimeoutMs, "agent streamEvents", options.streamDeadlineAt, options.invokeTimeoutMs)) {
-                    const projectedChunks = projectRuntimeStreamEvent({
-                        event,
-                        allowVisibleStreamDeltas: true,
-                        includeStateStreamOutput: options.isDeepAgentBinding(options.binding),
-                        rootAgentId: typeof options.binding.agent?.id === "string"
-                            ? options.binding.agent.id
-                            : undefined,
-                        countConfiguredToolsForAgentId: options.countConfiguredToolsForAgentId,
-                        toolNameMapping: options.toolNameMapping,
-                        primaryTools: options.primaryTools,
-                        state: projectionState,
-                    });
-                    const eventContainsNonTodoToolResult = projectedChunks.some((chunk) => chunk.kind === "tool-result"
-                        && chunk.toolName !== "write_todos"
-                        && !(chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
-                    const eventContainsNonRetrySafeChunk = projectedChunks.some((chunk) => chunk.kind !== "upstream-event"
-                        && chunk.kind !== "content"
-                        && !(chunk.kind === "tool-result" && chunk.toolName === "write_todos")
-                        && !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
-                    for (const chunk of projectedChunks) {
-                        if (chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)) {
-                            sawRetrySafeInvalidToolSelectionError = true;
-                        }
-                        if ((eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) && deferredStreamContent.length > 0) {
-                            yield* flushDeferredStreamContent();
-                        }
-                        if (eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) {
-                            emittedUnsafeStreamSideEffects = true;
-                        }
-                        if (chunk.kind === "content" && (shouldDeferStreamContent() || projectionState.hasFailedTaskDelegation)) {
-                            deferredStreamContent.push(chunk);
-                            continue;
-                        }
-                        yield chunk;
-                    }
-                    const terminalVisibleOutput = readTerminalEventVisibleOutput(event);
-                    if (terminalVisibleOutput) {
-                        const terminalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
-                        const terminalMissingPlanRecoveryInstruction = !terminalExecutionEvidence.hasDelegatedAgentWithConfiguredTools
-                            && !terminalExecutionEvidence.hasOpenTaskDelegation
-                            && !projectionState.emittedSuccessfulTaskResult
-                            ? resolveMissingPlanRecoveryInstruction({
-                                request,
-                                assistantText: terminalVisibleOutput,
-                                requiresPlan: requiresPlanEvidence(options.binding),
-                                hasPlanStateEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
-                                hasWriteTodosEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
-                                hasToolResultEvidence: terminalExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence,
-                            })
-                            : null;
-                        const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
-                        if (!shouldDeferStreamContent()
-                            && !terminalExecutionEvidence.hasIncompletePlanState
-                            && !terminalExecutionEvidence.hasFailedTaskDelegation
-                            && !terminalExecutionEvidence.hasOpenTaskDelegation
-                            && !hasMissingDelegatedExecutionEvidence(terminalExecutionEvidence)
-                            && !hasMissingDelegatedFindings(terminalExecutionEvidence)
-                            && !terminalMissingPlanRecoveryInstruction
-                            && !terminalDelegationOnlyRecoveryInstruction) {
-                            if (deferredStreamContent.length > 0) {
+            if (events) {
+                const streamEventsConsume = startProfileStep({
+                    id: "profile:agent:stream-events-consume",
+                    kind: "agent",
+                    name: "streamEvents",
+                    action: "consume",
+                });
+                if (shouldProfile)
+                    yield streamEventsConsume.chunk;
+                try {
+                    for await (const event of options.iterateWithTimeout(events, options.streamIdleTimeoutMs, "agent streamEvents", options.streamDeadlineAt, options.invokeTimeoutMs)) {
+                        const projectedChunks = projectRuntimeStreamEvent({
+                            event,
+                            allowVisibleStreamDeltas: true,
+                            includeStateStreamOutput: options.isDeepAgentBinding(options.binding),
+                            rootAgentId: typeof options.binding.agent?.id === "string"
+                                ? options.binding.agent.id
+                                : undefined,
+                            countConfiguredToolsForAgentId: options.countConfiguredToolsForAgentId,
+                            toolNameMapping: options.toolNameMapping,
+                            primaryTools: options.primaryTools,
+                            state: projectionState,
+                        });
+                        const eventContainsNonTodoToolResult = projectedChunks.some((chunk) => chunk.kind === "tool-result"
+                            && chunk.toolName !== "write_todos"
+                            && !(chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
+                        const eventContainsNonRetrySafeChunk = projectedChunks.some((chunk) => chunk.kind !== "upstream-event"
+                            && chunk.kind !== "content"
+                            && !(chunk.kind === "tool-result" && chunk.toolName === "write_todos")
+                            && !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
+                        for (const chunk of projectedChunks) {
+                            if (chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)) {
+                                sawRetrySafeInvalidToolSelectionError = true;
+                            }
+                            if ((eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) && deferredStreamContent.length > 0) {
                                 yield* flushDeferredStreamContent();
                             }
-                            return;
+                            if (eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) {
+                                emittedUnsafeStreamSideEffects = true;
+                            }
+                            if (chunk.kind === "content" && (shouldDeferStreamContent() || projectionState.hasFailedTaskDelegation)) {
+                                deferredStreamContent.push(chunk);
+                                continue;
+                            }
+                            yield chunk;
+                        }
+                        const terminalVisibleOutput = readTerminalEventVisibleOutput(event);
+                        if (terminalVisibleOutput) {
+                            const terminalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
+                            const terminalMissingPlanRecoveryInstruction = !terminalExecutionEvidence.hasDelegatedAgentWithConfiguredTools
+                                && !terminalExecutionEvidence.hasOpenTaskDelegation
+                                && !projectionState.emittedSuccessfulTaskResult
+                                ? resolveMissingPlanRecoveryInstruction({
+                                    request,
+                                    assistantText: terminalVisibleOutput,
+                                    requiresPlan: requiresPlanEvidence(options.binding),
+                                    hasPlanStateEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
+                                    hasWriteTodosEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
+                                    hasToolResultEvidence: terminalExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence,
+                                })
+                                : null;
+                            const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
+                            if (!shouldDeferStreamContent()
+                                && !terminalExecutionEvidence.hasIncompletePlanState
+                                && !terminalExecutionEvidence.hasFailedTaskDelegation
+                                && !terminalExecutionEvidence.hasOpenTaskDelegation
+                                && !hasMissingDelegatedExecutionEvidence(terminalExecutionEvidence)
+                                && !hasMissingDelegatedFindings(terminalExecutionEvidence)
+                                && !terminalMissingPlanRecoveryInstruction
+                                && !terminalDelegationOnlyRecoveryInstruction) {
+                                if (deferredStreamContent.length > 0) {
+                                    yield* flushDeferredStreamContent();
+                                }
+                                return;
+                            }
                         }
                     }
+                    if (shouldProfile)
+                        yield finishProfileStep({
+                            id: "profile:agent:stream-events-consume",
+                            kind: "agent",
+                            name: "streamEvents",
+                            action: "consume",
+                            startedAt: streamEventsConsume.startedAt,
+                            status: "completed",
+                        });
+                }
+                catch (error) {
+                    if (shouldProfile)
+                        yield finishProfileStep({
+                            id: "profile:agent:stream-events-consume",
+                            kind: "agent",
+                            name: "streamEvents",
+                            action: "consume",
+                            startedAt: streamEventsConsume.startedAt,
+                            status: "failed",
+                            error,
+                        });
+                    if (!emittedUnsafeStreamSideEffects
+                        && isOpenAICompatibleStreamingCompatibilityError(options.binding, error)) {
+                        deferredStreamContent.length = 0;
+                    }
+                    else {
+                        throw error;
+                    }
                 }
-                if (shouldProfile)
-                    yield finishProfileStep({
-                        id: "profile:agent:stream-events-consume",
-                        kind: "agent",
-                        name: "streamEvents",
-                        action: "consume",
-                        startedAt: streamEventsConsume.startedAt,
-                        status: "completed",
-                    });
-            }
-            catch (error) {
-                if (shouldProfile)
-                    yield finishProfileStep({
-                        id: "profile:agent:stream-events-consume",
-                        kind: "agent",
-                        name: "streamEvents",
-                        action: "consume",
-                        startedAt: streamEventsConsume.startedAt,
-                        status: "failed",
-                        error,
-                    });
-                throw error;
             }
             const streamedExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
             const streamedDelegatedRecoveryInstruction = resolveDelegatedExecutionRecoveryInstruction(streamedExecutionEvidence);

package/dist/runtime/adapter/invocation-result.js CHANGED Viewed

@@ -242,6 +242,17 @@ function looksLikeNonEvidenceApology(value) {
         || /(?:system limitation|technical limitation|internal limitation|recursion limit)/iu.test(normalized)
         || /(?:抱歉|对不起)[\s\S]*(?:无法|不能|未能)(?:完成|继续|处理)/u.test(normalized);
 }
+function looksLikeContradictedToolExecutionFailure(value) {
+    const normalized = sanitizeVisibleText(value).trim();
+    if (!normalized) {
+        return false;
+    }
+    const mentionsToolExecution = /(?:\btool\b|\bfunction\b|\bexecute\b|\binvoke\b|\bcall\b|工具|函数|调用|执行)/iu.test(normalized);
+    if (!mentionsToolExecution) {
+        return false;
+    }
+    return /(?:cancelled|canceled|timeout|timed out|race condition|cannot execute|can't execute|unable to execute|could not execute|failed to execute|被取消|超时|无法执行|不能执行|未能执行)/iu.test(normalized);
+}
 function extractDeterministicToolFailureReport(executedToolResults) {
     const hasSuccessfulSubstantiveTool = executedToolResults.some((toolResult) => (toolResult.isError !== true
         && toolResult.toolName !== "write_todos"
@@ -311,6 +322,9 @@ export function resolveDeterministicFinalOutput(params) {
         && (looksLikeClarificationQuestion(sanitizedVisibleOutput) || looksLikeNonEvidenceApology(sanitizedVisibleOutput))) {
         return deterministicFailureReport || delegatedTaskOutput || successfulToolOutput || sanitizedVisibleOutput;
     }
+    if (sanitizedVisibleOutput && successfulToolOutput && looksLikeContradictedToolExecutionFailure(sanitizedVisibleOutput)) {
+        return delegatedTaskOutput || successfulToolOutput;
+    }
     if (sanitizedVisibleOutput && !isLowSignalStructuredCompletion(sanitizedVisibleOutput)) {
         return sanitizedVisibleOutput;
     }
@@ -383,6 +397,7 @@ export function finalizeRequestResult(params) {
     const hasMissingRequiredFinalAnswer = binding?.harnessRuntime?.executionContract?.requiresPlan === true
         && !visibleOutput
         && !preliminaryTerminalStatus
+        && !output.trim()
         && allExecutedToolResults.some((toolResult) => toolResult.isError !== true && toolResult.toolName !== "write_todos" && toolResult.toolName !== "read_todos");
     if (hasMissingRequiredPlanEvidence) {
         output = "runtime_error=Agent ended before producing required plan evidence.";