@botbotgo/agent-harness 0.0.359 → 0.0.362

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/README.md +2 -0
  2. package/README.zh.md +2 -0
  3. package/dist/config/catalogs/response-formats.yaml +43 -0
  4. package/dist/config/runtime/workspace.yaml +8 -0
  5. package/dist/contracts/runtime-requests.d.ts +19 -0
  6. package/dist/contracts/workspace.d.ts +6 -0
  7. package/dist/package-version.d.ts +2 -2
  8. package/dist/package-version.js +2 -2
  9. package/dist/projections/request-events.d.ts +1 -0
  10. package/dist/projections/request-events.js +97 -45
  11. package/dist/protocol/acp/harness-client.js +2 -3
  12. package/dist/runtime/adapter/flow/invocation-flow.js +26 -1
  13. package/dist/runtime/adapter/flow/stream-runtime.js +117 -94
  14. package/dist/runtime/adapter/invocation-result.js +15 -0
  15. package/dist/runtime/adapter/middleware-assembly.js +25 -3
  16. package/dist/runtime/adapter/tool/builtin-middleware-tools.d.ts +5 -0
  17. package/dist/runtime/adapter/tool/builtin-middleware-tools.js +30 -6
  18. package/dist/runtime/agent-runtime-adapter.d.ts +1 -0
  19. package/dist/runtime/agent-runtime-adapter.js +174 -28
  20. package/dist/runtime/harness/events/streaming.js +2 -3
  21. package/dist/workspace/agent-binding-compiler.js +90 -12
  22. package/dist/workspace/compile.js +1 -0
  23. package/dist/workspace/framework-contract-validation.d.ts +2 -1
  24. package/dist/workspace/framework-contract-validation.js +77 -5
  25. package/dist/workspace/object-loader.js +9 -0
  26. package/dist/workspace/support/workspace-ref-utils.d.ts +1 -0
  27. package/dist/workspace/support/workspace-ref-utils.js +40 -0
  28. package/dist/workspace/yaml-object-reader.js +13 -9
  29. package/package.json +1 -1
package/README.md CHANGED
@@ -94,6 +94,8 @@ agent-harness "Inspect this workspace and explain the main entry points."
94
94
 
95
95
  When the runtime emits request-scoped `plan-state` updates and safe `progress.commentary` deltas, the chat shell now renders live todo-board and progress status updates directly in the terminal, so you can watch planning and execution status change during the run instead of waiting for the final response.
96
96
  Those progress callouts now stay tied to stable runtime surfaces such as plan-state, tool start/completion, memory recall, and agent delegation, so the operator sees Codex-style intermediate status without exposing private model reasoning.
97
+ Streaming data listeners also receive structured `plan.state`, per-item `plan.step`, and normalized `execution.step` events, so applications can render every planning and execution transition without parsing assistant text or raw upstream debug events.
98
+ The bundled runtime now also provides a generic `response-format/default-report` structured-output default for agents; workspaces can replace it through `Runtime.spec.defaults.agent.config.responseFormatRef`, individual agents can extend it with inline `responseFormat`, replace it with `responseFormatRef`, or set `responseFormat: null`.
97
99
  The repository default `orchestra` host is also instructed to start real multi-step execution from the task you already gave it, call `write_todos` before non-trivial tool work, and keep that todo board updated while it runs.
98
100
  Durable-memory writes now also retrieve related existing records through the configured vector store before model reconciliation, then merge those semantic hits with deterministic matching so updates and deletes can target the right knowledge identity instead of creating nearby duplicate facts.
99
101
 
package/README.zh.md CHANGED
@@ -92,6 +92,8 @@ agent-harness "Inspect this workspace and explain the main entry points."
92
92
 
93
93
  当 runtime 发出 request 级 `plan-state` 更新以及安全的 `progress.commentary` 增量时,chat shell 现在会直接在终端里渲染实时 todo board 和进度播报,因此你可以在执行过程中看到规划和状态变化,而不必等到最终回复。
94
94
  这些进度播报现在会继续绑定在稳定的 runtime surface 上,例如 plan-state、tool 开始/完成、memory recall 与 agent delegation,因此 operator 可以看到类似 Codex 的清晰中间状态,但不会暴露私有模型推理。
95
+ Streaming data listener 也会收到结构化的 `plan.state`、逐条 `plan.step` 和归一化的 `execution.step` 事件,因此应用可以渲染所有 plan / execution 转折,而不必解析 assistant 文本或 raw upstream debug event。
96
+ 随包 runtime 现在也提供通用的 `response-format/default-report` 作为 agent structured-output 默认值;workspace 可以通过 `Runtime.spec.defaults.agent.config.responseFormatRef` 替换它,单个 agent 可以用 inline `responseFormat` 扩展、用 `responseFormatRef` 替换,或设置 `responseFormat: null` 关闭。
95
97
  仓库默认的 `orchestra` host 现在也会被明确要求:对已经给清楚的多步任务不要再反问,而是直接开始执行;在非平凡工具工作前先调用 `write_todos`,并在运行过程中持续维护这块 todo board。
96
98
  durable memory 的写入现在也会在模型做 mutation reconciliation 之前,先通过配置好的 vector store 检索相关旧知识,再和确定性匹配结果合并,因此 update / delete 更容易命中正确的 knowledge identity,而不是生成几条相近但彼此独立的 fact。
97
99
 
@@ -0,0 +1,43 @@
1
+ # agent-harness feature: schema version for reusable response-format presets.
2
+ apiVersion: agent-harness/v1alpha1
3
+ # agent-harness feature: object type for named structured-output response format presets.
4
+ kind: ResponseFormats
5
+ spec:
6
+ - kind: ResponseFormat
7
+ name: default-report
8
+ description: Generic structured report for agent results. Workspaces and agents can override or disable it.
9
+ format:
10
+ type: object
11
+ properties:
12
+ status:
13
+ type: string
14
+ enum:
15
+ - completed
16
+ - blocked
17
+ - failed
18
+ - refused
19
+ summary:
20
+ type: array
21
+ items:
22
+ type: string
23
+ findings:
24
+ type: array
25
+ items:
26
+ type: string
27
+ blockers:
28
+ type: array
29
+ items:
30
+ type: string
31
+ nextActions:
32
+ type: array
33
+ items:
34
+ type: string
35
+ report:
36
+ type: string
37
+ required:
38
+ - status
39
+ - summary
40
+ - findings
41
+ - blockers
42
+ - nextActions
43
+ - report
@@ -45,6 +45,14 @@ spec:
45
45
  skills:
46
46
  - file://./resources/skills
47
47
 
48
+ # agent-harness feature: default agent execution config used when a workspace or agent does not override it.
49
+ # The bundled default keeps first-run agent outputs parseable while still allowing projects and individual agents
50
+ # to replace it with their own responseFormatRef, inline responseFormat, or `responseFormat: null`.
51
+ defaults:
52
+ agent:
53
+ config:
54
+ responseFormatRef: response-format/default-report
55
+
48
56
  # agent-harness feature: runtime-level task queue and maximum number of concurrent requests.
49
57
  # Additional requests wait in the harness queue until a slot becomes available.
50
58
  concurrency:
@@ -121,6 +121,25 @@ export type RequestDataEvent = {
121
121
  requestId: string;
122
122
  agentId: string;
123
123
  text: string;
124
+ } | {
125
+ type: "plan.state";
126
+ sessionId: string;
127
+ requestId: string;
128
+ agentId: string;
129
+ planState: RequestPlanState;
130
+ } | {
131
+ type: "plan.step";
132
+ sessionId: string;
133
+ requestId: string;
134
+ agentId: string;
135
+ planStateVersion: number;
136
+ index: number;
137
+ item: RequestPlanItem;
138
+ } | {
139
+ type: "execution.step";
140
+ sessionId: string;
141
+ requestId: string;
142
+ step: RequestExecutionStep;
124
143
  } | {
125
144
  type: "output.content-blocks";
126
145
  sessionId: string;
@@ -74,6 +74,12 @@ export type ParsedVectorStoreObject = {
74
74
  metadata?: Record<string, unknown>;
75
75
  sourcePath: string;
76
76
  };
77
+ export type ParsedResponseFormatObject = {
78
+ id: string;
79
+ description?: string;
80
+ format: unknown;
81
+ sourcePath: string;
82
+ };
77
83
  export type ParsedMcpServerObject = {
78
84
  id: string;
79
85
  transport: "stdio" | "http" | "sse" | "websocket";
@@ -1,2 +1,2 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.359";
2
- export declare const AGENT_HARNESS_RELEASE_DATE = "2026-04-25";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.362";
2
+ export declare const AGENT_HARNESS_RELEASE_DATE = "2026-04-27";
@@ -1,2 +1,2 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.359";
2
- export const AGENT_HARNESS_RELEASE_DATE = "2026-04-25";
1
+ export const AGENT_HARNESS_VERSION = "0.0.362";
2
+ export const AGENT_HARNESS_RELEASE_DATE = "2026-04-27";
@@ -59,5 +59,6 @@ export type RequestSnapshotStreamItem = StreamEventItem | {
59
59
  } | StreamContentItem | StreamContentBlocksItem | StreamToolResultItem | StreamPlanStateItem | StreamUpstreamEventItem | StreamProfileStepItem | StreamResultItem;
60
60
  export declare function createInitialRequestEventSnapshot(): RequestEventSnapshot;
61
61
  export declare function applyRequestStreamItemToSnapshot(snapshot: RequestEventSnapshot, item: RequestSnapshotStreamItem): RequestEventSnapshot;
62
+ export declare function toRequestDataEvents(item: RequestSnapshotStreamItem): RequestDataEvent[];
62
63
  export declare function toRequestDataEvent(item: RequestSnapshotStreamItem): RequestDataEvent | null;
63
64
  export {};
@@ -393,58 +393,110 @@ export function applyRequestStreamItemToSnapshot(snapshot, item) {
393
393
  };
394
394
  }
395
395
  }
396
- export function toRequestDataEvent(item) {
396
+ function createSurfaceExecutionStep(surfaceItem) {
397
+ return {
398
+ id: surfaceItem.id,
399
+ kind: surfaceItem.kind,
400
+ name: surfaceItem.name,
401
+ action: surfaceItem.action,
402
+ status: surfaceItem.status,
403
+ ...(surfaceItem.agentId ? { agentId: surfaceItem.agentId } : {}),
404
+ ...(surfaceItem.agentName ? { agentName: surfaceItem.agentName } : {}),
405
+ ...(surfaceItem.ownerAgentId ? { ownerAgentId: surfaceItem.ownerAgentId } : {}),
406
+ ...(surfaceItem.ownerAgentName ? { ownerAgentName: surfaceItem.ownerAgentName } : {}),
407
+ ...(surfaceItem.sourceEventId ? { sourceEventId: surfaceItem.sourceEventId } : {}),
408
+ ...(surfaceItem.detail ? { detail: surfaceItem.detail } : {}),
409
+ };
410
+ }
411
+ export function toRequestDataEvents(item) {
397
412
  switch (item.type) {
398
413
  case "commentary":
399
- return {
400
- type: "progress.commentary",
401
- sessionId: item.sessionId,
402
- requestId: item.requestId,
403
- agentId: item.agentId,
404
- text: item.content,
405
- };
414
+ return [{
415
+ type: "progress.commentary",
416
+ sessionId: item.sessionId,
417
+ requestId: item.requestId,
418
+ agentId: item.agentId,
419
+ text: item.content,
420
+ }];
406
421
  case "content":
407
- return {
408
- type: "output.text.delta",
409
- sessionId: item.sessionId,
410
- requestId: item.requestId,
411
- agentId: item.agentId,
412
- text: item.content,
413
- };
422
+ return [{
423
+ type: "output.text.delta",
424
+ sessionId: item.sessionId,
425
+ requestId: item.requestId,
426
+ agentId: item.agentId,
427
+ text: item.content,
428
+ }];
414
429
  case "content-blocks":
415
- return {
416
- type: "output.content-blocks",
417
- sessionId: item.sessionId,
418
- requestId: item.requestId,
419
- agentId: item.agentId,
420
- contentBlocks: item.contentBlocks,
421
- };
430
+ return [{
431
+ type: "output.content-blocks",
432
+ sessionId: item.sessionId,
433
+ requestId: item.requestId,
434
+ agentId: item.agentId,
435
+ contentBlocks: item.contentBlocks,
436
+ }];
422
437
  case "tool-result":
423
- return {
424
- type: "tool.result",
425
- sessionId: item.sessionId,
426
- requestId: item.requestId,
427
- agentId: item.agentId,
428
- toolName: item.toolName,
429
- output: summarizeLargeDataEventOutput(item.output),
430
- ...(item.isError !== undefined ? { isError: item.isError } : {}),
431
- };
438
+ return [{
439
+ type: "tool.result",
440
+ sessionId: item.sessionId,
441
+ requestId: item.requestId,
442
+ agentId: item.agentId,
443
+ toolName: item.toolName,
444
+ output: summarizeLargeDataEventOutput(item.output),
445
+ ...(item.isError !== undefined ? { isError: item.isError } : {}),
446
+ }];
447
+ case "plan-state":
448
+ return [
449
+ {
450
+ type: "plan.state",
451
+ sessionId: item.sessionId,
452
+ requestId: item.requestId,
453
+ agentId: item.agentId,
454
+ planState: item.planState,
455
+ },
456
+ ...item.planState.items.map((planItem, index) => ({
457
+ type: "plan.step",
458
+ sessionId: item.sessionId,
459
+ requestId: item.requestId,
460
+ agentId: item.agentId,
461
+ planStateVersion: item.planState.version,
462
+ index,
463
+ item: planItem,
464
+ })),
465
+ ];
432
466
  case "upstream-event":
433
- return {
434
- type: "debug.upstream",
435
- sessionId: item.sessionId,
436
- requestId: item.requestId,
437
- ...(item.surfaceItem ? { surfaceItem: item.surfaceItem } : {}),
438
- event: item.event,
439
- };
467
+ return [
468
+ ...(item.surfaceItem
469
+ ? [{
470
+ type: "execution.step",
471
+ sessionId: item.sessionId,
472
+ requestId: item.requestId,
473
+ step: createSurfaceExecutionStep(item.surfaceItem),
474
+ }]
475
+ : []),
476
+ {
477
+ type: "debug.upstream",
478
+ sessionId: item.sessionId,
479
+ requestId: item.requestId,
480
+ ...(item.surfaceItem ? { surfaceItem: item.surfaceItem } : {}),
481
+ event: item.event,
482
+ },
483
+ ];
440
484
  case "profile-step":
441
- return {
442
- type: "debug.profile",
443
- sessionId: item.sessionId,
444
- requestId: item.requestId,
445
- step: item.step,
446
- };
485
+ return [{
486
+ type: "execution.step",
487
+ sessionId: item.sessionId,
488
+ requestId: item.requestId,
489
+ step: item.step,
490
+ }, {
491
+ type: "debug.profile",
492
+ sessionId: item.sessionId,
493
+ requestId: item.requestId,
494
+ step: item.step,
495
+ }];
447
496
  default:
448
- return null;
497
+ return [];
449
498
  }
450
499
  }
500
+ export function toRequestDataEvent(item) {
501
+ return toRequestDataEvents(item)[0] ?? null;
502
+ }
@@ -1,5 +1,5 @@
1
1
  import { createAcpHttpClient, createAcpStdioClient, } from "./client.js";
2
- import { applyRequestStreamItemToSnapshot, createInitialRequestEventSnapshot, toRequestDataEvent, } from "../../projections/request-events.js";
2
+ import { applyRequestStreamItemToSnapshot, createInitialRequestEventSnapshot, toRequestDataEvents, } from "../../projections/request-events.js";
3
3
  function toEvent(notification) {
4
4
  return notification.params.event;
5
5
  }
@@ -144,8 +144,7 @@ export class AcpHarnessClient {
144
144
  else if (item.type === "result") {
145
145
  finalResult = item.result;
146
146
  }
147
- const dataEvent = toRequestDataEvent(item);
148
- if (dataEvent) {
147
+ for (const dataEvent of toRequestDataEvents(item)) {
149
148
  await dataListener?.(dataEvent);
150
149
  }
151
150
  await eventListener?.(snapshot);
@@ -7,7 +7,7 @@ import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "../ups
7
7
  import { appendToolRecoveryInstruction, extractVisibleOutput, tryParseJson } from "../../parsing/output-parsing.js";
8
8
  import { salvageJsonToolCalls } from "../../parsing/output-tool-args.js";
9
9
  import { isEmptyFinalAiMessageError } from "../resilience.js";
10
- import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION } from "../../prompts/runtime-prompts.js";
10
+ import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
11
11
  function readBindingExecutionParams(binding) {
12
12
  const params = binding.execution?.params ?? binding.deepAgentParams ?? binding.langchainAgentParams;
13
13
  return {
@@ -76,6 +76,17 @@ function hasNativeTaskDelegationIntent(value) {
76
76
  }
77
77
  return hasNativeTaskDelegationIntent(typed.tool_calls) || hasNativeTaskDelegationIntent(typed.messages);
78
78
  }
79
+ function looksLikeCapabilityRefusalWithoutEvidence(value) {
80
+ const text = extractVisibleOutput(value).trim();
81
+ if (!text) {
82
+ return false;
83
+ }
84
+ const refusalSignal = /(?:cannot|can't|unable to|do not have|don't have|not support|does not support|missing capabilities|tool limitation|skill limitation|capability limitation|out of scope|无法|不能|不支持|缺少能力|能力不足|超出范围)/iu.test(text);
85
+ if (!refusalSignal) {
86
+ return false;
87
+ }
88
+ return /(?:tool|tools|skill|skills|capabilit|scope|工具|技能|能力|范围)/iu.test(text);
89
+ }
79
90
  function readStructuredToolCall(value) {
80
91
  const salvaged = salvageJsonToolCalls(value)[0];
81
92
  if (salvaged) {
@@ -344,6 +355,20 @@ export async function executeRequestInvocation(options) {
344
355
  result = recoveredInvocation.result;
345
356
  executedToolResults.splice(0, executedToolResults.length, ...recoveredInvocation.executedToolResults);
346
357
  }
358
+ if (options.resumePayload === undefined
359
+ && primaryTools.length > 0
360
+ && executedToolResults.length === 0
361
+ && looksLikeCapabilityRefusalWithoutEvidence(result)) {
362
+ const messages = Array.isArray(result.messages)
363
+ ? result.messages
364
+ : undefined;
365
+ const recoveryBase = messages ? { messages } : request;
366
+ const recoveredRequest = appendToolRecoveryInstruction(recoveryBase, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION);
367
+ const recoveredInvocation = await invokeOnce(recoveredRequest);
368
+ localOrUpstreamInvocation = recoveredInvocation;
369
+ result = recoveredInvocation.result;
370
+ executedToolResults.splice(0, executedToolResults.length, ...recoveredInvocation.executedToolResults);
371
+ }
347
372
  try {
348
373
  return finalizeRequestResult({
349
374
  bindingAgentId: options.binding.agent.id,
@@ -125,6 +125,15 @@ function isDelegationOnlyBinding(binding) {
125
125
  const skillRefs = agent?.skillPathRefs ?? [];
126
126
  return configuredSubagents.length > 0 && configuredTools.length === 0 && skillRefs.length === 0;
127
127
  }
128
+ function isOpenAICompatibleStreamingCompatibilityError(binding, error) {
129
+ const params = binding.execution?.params ?? binding.deepAgentParams ?? binding.langchainAgentParams;
130
+ const model = params?.model;
131
+ if (model?.provider !== "openai-compatible") {
132
+ return false;
133
+ }
134
+ const message = error instanceof Error ? error.message : String(error);
135
+ return message.toLowerCase().includes("received empty response from chat model call");
136
+ }
128
137
  function hasDelegationEvidence(evidence) {
129
138
  return (evidence.hasSuccessfulTaskToolEvidence
130
139
  || evidence.hasOpenTaskDelegation
@@ -401,106 +410,120 @@ export async function* streamRuntimeExecution(options) {
401
410
  status: "failed",
402
411
  error,
403
412
  });
404
- throw error;
413
+ if (!emittedUnsafeStreamSideEffects
414
+ && isOpenAICompatibleStreamingCompatibilityError(options.binding, error)) {
415
+ deferredStreamContent.length = 0;
416
+ }
417
+ else {
418
+ throw error;
419
+ }
405
420
  }
406
- const streamEventsConsume = startProfileStep({
407
- id: "profile:agent:stream-events-consume",
408
- kind: "agent",
409
- name: "streamEvents",
410
- action: "consume",
411
- });
412
- if (shouldProfile)
413
- yield streamEventsConsume.chunk;
414
- try {
415
- for await (const event of options.iterateWithTimeout(events, options.streamIdleTimeoutMs, "agent streamEvents", options.streamDeadlineAt, options.invokeTimeoutMs)) {
416
- const projectedChunks = projectRuntimeStreamEvent({
417
- event,
418
- allowVisibleStreamDeltas: true,
419
- includeStateStreamOutput: options.isDeepAgentBinding(options.binding),
420
- rootAgentId: typeof options.binding.agent?.id === "string"
421
- ? options.binding.agent.id
422
- : undefined,
423
- countConfiguredToolsForAgentId: options.countConfiguredToolsForAgentId,
424
- toolNameMapping: options.toolNameMapping,
425
- primaryTools: options.primaryTools,
426
- state: projectionState,
427
- });
428
- const eventContainsNonTodoToolResult = projectedChunks.some((chunk) => chunk.kind === "tool-result"
429
- && chunk.toolName !== "write_todos"
430
- && !(chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
431
- const eventContainsNonRetrySafeChunk = projectedChunks.some((chunk) => chunk.kind !== "upstream-event"
432
- && chunk.kind !== "content"
433
- && !(chunk.kind === "tool-result" && chunk.toolName === "write_todos")
434
- && !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
435
- for (const chunk of projectedChunks) {
436
- if (chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)) {
437
- sawRetrySafeInvalidToolSelectionError = true;
438
- }
439
- if ((eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) && deferredStreamContent.length > 0) {
440
- yield* flushDeferredStreamContent();
441
- }
442
- if (eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) {
443
- emittedUnsafeStreamSideEffects = true;
444
- }
445
- if (chunk.kind === "content" && (shouldDeferStreamContent() || projectionState.hasFailedTaskDelegation)) {
446
- deferredStreamContent.push(chunk);
447
- continue;
448
- }
449
- yield chunk;
450
- }
451
- const terminalVisibleOutput = readTerminalEventVisibleOutput(event);
452
- if (terminalVisibleOutput) {
453
- const terminalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
454
- const terminalMissingPlanRecoveryInstruction = !terminalExecutionEvidence.hasDelegatedAgentWithConfiguredTools
455
- && !terminalExecutionEvidence.hasOpenTaskDelegation
456
- && !projectionState.emittedSuccessfulTaskResult
457
- ? resolveMissingPlanRecoveryInstruction({
458
- request,
459
- assistantText: terminalVisibleOutput,
460
- requiresPlan: requiresPlanEvidence(options.binding),
461
- hasPlanStateEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
462
- hasWriteTodosEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
463
- hasToolResultEvidence: terminalExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence,
464
- })
465
- : null;
466
- const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
467
- if (!shouldDeferStreamContent()
468
- && !terminalExecutionEvidence.hasIncompletePlanState
469
- && !terminalExecutionEvidence.hasFailedTaskDelegation
470
- && !terminalExecutionEvidence.hasOpenTaskDelegation
471
- && !hasMissingDelegatedExecutionEvidence(terminalExecutionEvidence)
472
- && !hasMissingDelegatedFindings(terminalExecutionEvidence)
473
- && !terminalMissingPlanRecoveryInstruction
474
- && !terminalDelegationOnlyRecoveryInstruction) {
475
- if (deferredStreamContent.length > 0) {
421
+ if (events) {
422
+ const streamEventsConsume = startProfileStep({
423
+ id: "profile:agent:stream-events-consume",
424
+ kind: "agent",
425
+ name: "streamEvents",
426
+ action: "consume",
427
+ });
428
+ if (shouldProfile)
429
+ yield streamEventsConsume.chunk;
430
+ try {
431
+ for await (const event of options.iterateWithTimeout(events, options.streamIdleTimeoutMs, "agent streamEvents", options.streamDeadlineAt, options.invokeTimeoutMs)) {
432
+ const projectedChunks = projectRuntimeStreamEvent({
433
+ event,
434
+ allowVisibleStreamDeltas: true,
435
+ includeStateStreamOutput: options.isDeepAgentBinding(options.binding),
436
+ rootAgentId: typeof options.binding.agent?.id === "string"
437
+ ? options.binding.agent.id
438
+ : undefined,
439
+ countConfiguredToolsForAgentId: options.countConfiguredToolsForAgentId,
440
+ toolNameMapping: options.toolNameMapping,
441
+ primaryTools: options.primaryTools,
442
+ state: projectionState,
443
+ });
444
+ const eventContainsNonTodoToolResult = projectedChunks.some((chunk) => chunk.kind === "tool-result"
445
+ && chunk.toolName !== "write_todos"
446
+ && !(chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
447
+ const eventContainsNonRetrySafeChunk = projectedChunks.some((chunk) => chunk.kind !== "upstream-event"
448
+ && chunk.kind !== "content"
449
+ && !(chunk.kind === "tool-result" && chunk.toolName === "write_todos")
450
+ && !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
451
+ for (const chunk of projectedChunks) {
452
+ if (chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)) {
453
+ sawRetrySafeInvalidToolSelectionError = true;
454
+ }
455
+ if ((eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) && deferredStreamContent.length > 0) {
476
456
  yield* flushDeferredStreamContent();
477
457
  }
478
- return;
458
+ if (eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) {
459
+ emittedUnsafeStreamSideEffects = true;
460
+ }
461
+ if (chunk.kind === "content" && (shouldDeferStreamContent() || projectionState.hasFailedTaskDelegation)) {
462
+ deferredStreamContent.push(chunk);
463
+ continue;
464
+ }
465
+ yield chunk;
466
+ }
467
+ const terminalVisibleOutput = readTerminalEventVisibleOutput(event);
468
+ if (terminalVisibleOutput) {
469
+ const terminalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
470
+ const terminalMissingPlanRecoveryInstruction = !terminalExecutionEvidence.hasDelegatedAgentWithConfiguredTools
471
+ && !terminalExecutionEvidence.hasOpenTaskDelegation
472
+ && !projectionState.emittedSuccessfulTaskResult
473
+ ? resolveMissingPlanRecoveryInstruction({
474
+ request,
475
+ assistantText: terminalVisibleOutput,
476
+ requiresPlan: requiresPlanEvidence(options.binding),
477
+ hasPlanStateEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
478
+ hasWriteTodosEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
479
+ hasToolResultEvidence: terminalExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence,
480
+ })
481
+ : null;
482
+ const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
483
+ if (!shouldDeferStreamContent()
484
+ && !terminalExecutionEvidence.hasIncompletePlanState
485
+ && !terminalExecutionEvidence.hasFailedTaskDelegation
486
+ && !terminalExecutionEvidence.hasOpenTaskDelegation
487
+ && !hasMissingDelegatedExecutionEvidence(terminalExecutionEvidence)
488
+ && !hasMissingDelegatedFindings(terminalExecutionEvidence)
489
+ && !terminalMissingPlanRecoveryInstruction
490
+ && !terminalDelegationOnlyRecoveryInstruction) {
491
+ if (deferredStreamContent.length > 0) {
492
+ yield* flushDeferredStreamContent();
493
+ }
494
+ return;
495
+ }
479
496
  }
480
497
  }
498
+ if (shouldProfile)
499
+ yield finishProfileStep({
500
+ id: "profile:agent:stream-events-consume",
501
+ kind: "agent",
502
+ name: "streamEvents",
503
+ action: "consume",
504
+ startedAt: streamEventsConsume.startedAt,
505
+ status: "completed",
506
+ });
507
+ }
508
+ catch (error) {
509
+ if (shouldProfile)
510
+ yield finishProfileStep({
511
+ id: "profile:agent:stream-events-consume",
512
+ kind: "agent",
513
+ name: "streamEvents",
514
+ action: "consume",
515
+ startedAt: streamEventsConsume.startedAt,
516
+ status: "failed",
517
+ error,
518
+ });
519
+ if (!emittedUnsafeStreamSideEffects
520
+ && isOpenAICompatibleStreamingCompatibilityError(options.binding, error)) {
521
+ deferredStreamContent.length = 0;
522
+ }
523
+ else {
524
+ throw error;
525
+ }
481
526
  }
482
- if (shouldProfile)
483
- yield finishProfileStep({
484
- id: "profile:agent:stream-events-consume",
485
- kind: "agent",
486
- name: "streamEvents",
487
- action: "consume",
488
- startedAt: streamEventsConsume.startedAt,
489
- status: "completed",
490
- });
491
- }
492
- catch (error) {
493
- if (shouldProfile)
494
- yield finishProfileStep({
495
- id: "profile:agent:stream-events-consume",
496
- kind: "agent",
497
- name: "streamEvents",
498
- action: "consume",
499
- startedAt: streamEventsConsume.startedAt,
500
- status: "failed",
501
- error,
502
- });
503
- throw error;
504
527
  }
505
528
  const streamedExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
506
529
  const streamedDelegatedRecoveryInstruction = resolveDelegatedExecutionRecoveryInstruction(streamedExecutionEvidence);
@@ -242,6 +242,17 @@ function looksLikeNonEvidenceApology(value) {
242
242
  || /(?:system limitation|technical limitation|internal limitation|recursion limit)/iu.test(normalized)
243
243
  || /(?:抱歉|对不起)[\s\S]*(?:无法|不能|未能)(?:完成|继续|处理)/u.test(normalized);
244
244
  }
245
+ function looksLikeContradictedToolExecutionFailure(value) {
246
+ const normalized = sanitizeVisibleText(value).trim();
247
+ if (!normalized) {
248
+ return false;
249
+ }
250
+ const mentionsToolExecution = /(?:\btool\b|\bfunction\b|\bexecute\b|\binvoke\b|\bcall\b|工具|函数|调用|执行)/iu.test(normalized);
251
+ if (!mentionsToolExecution) {
252
+ return false;
253
+ }
254
+ return /(?:cancelled|canceled|timeout|timed out|race condition|cannot execute|can't execute|unable to execute|could not execute|failed to execute|被取消|超时|无法执行|不能执行|未能执行)/iu.test(normalized);
255
+ }
245
256
  function extractDeterministicToolFailureReport(executedToolResults) {
246
257
  const hasSuccessfulSubstantiveTool = executedToolResults.some((toolResult) => (toolResult.isError !== true
247
258
  && toolResult.toolName !== "write_todos"
@@ -311,6 +322,9 @@ export function resolveDeterministicFinalOutput(params) {
311
322
  && (looksLikeClarificationQuestion(sanitizedVisibleOutput) || looksLikeNonEvidenceApology(sanitizedVisibleOutput))) {
312
323
  return deterministicFailureReport || delegatedTaskOutput || successfulToolOutput || sanitizedVisibleOutput;
313
324
  }
325
+ if (sanitizedVisibleOutput && successfulToolOutput && looksLikeContradictedToolExecutionFailure(sanitizedVisibleOutput)) {
326
+ return delegatedTaskOutput || successfulToolOutput;
327
+ }
314
328
  if (sanitizedVisibleOutput && !isLowSignalStructuredCompletion(sanitizedVisibleOutput)) {
315
329
  return sanitizedVisibleOutput;
316
330
  }
@@ -383,6 +397,7 @@ export function finalizeRequestResult(params) {
383
397
  const hasMissingRequiredFinalAnswer = binding?.harnessRuntime?.executionContract?.requiresPlan === true
384
398
  && !visibleOutput
385
399
  && !preliminaryTerminalStatus
400
+ && !output.trim()
386
401
  && allExecutedToolResults.some((toolResult) => toolResult.isError !== true && toolResult.toolName !== "write_todos" && toolResult.toolName !== "read_todos");
387
402
  if (hasMissingRequiredPlanEvidence) {
388
403
  output = "runtime_error=Agent ended before producing required plan evidence.";