@botbotgo/agent-harness 0.0.418 → 0.0.420

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/dist/cli/chat-interactive.js +1 -1
  2. package/dist/cli/chat-stream.js +9 -1
  3. package/dist/package-version.d.ts +2 -2
  4. package/dist/package-version.js +2 -2
  5. package/dist/runtime/adapter/compat/openai-compatible.js +12 -0
  6. package/dist/runtime/adapter/flow/invocation-flow.d.ts +2 -0
  7. package/dist/runtime/adapter/flow/invocation-flow.js +13 -5
  8. package/dist/runtime/adapter/flow/invoke-runtime.d.ts +1 -0
  9. package/dist/runtime/adapter/flow/invoke-runtime.js +1 -0
  10. package/dist/runtime/adapter/flow/stream-runtime.d.ts +4 -0
  11. package/dist/runtime/adapter/flow/stream-runtime.js +177 -14
  12. package/dist/runtime/adapter/invocation-result.js +17 -6
  13. package/dist/runtime/adapter/local-tool-invocation.d.ts +2 -1
  14. package/dist/runtime/adapter/local-tool-invocation.js +268 -21
  15. package/dist/runtime/adapter/model/model-providers.js +269 -58
  16. package/dist/runtime/adapter/model/prompted-json-tool-call-capture.d.ts +9 -0
  17. package/dist/runtime/adapter/model/prompted-json-tool-call-capture.js +40 -0
  18. package/dist/runtime/adapter/runtime-adapter-support.js +58 -12
  19. package/dist/runtime/adapter/runtime-shell.js +3 -2
  20. package/dist/runtime/adapter/stream-event-projection.js +22 -5
  21. package/dist/runtime/adapter/tool/tool-arguments.js +157 -67
  22. package/dist/runtime/adapter/tool/tool-replay.js +0 -4
  23. package/dist/runtime/agent-runtime-adapter.d.ts +3 -0
  24. package/dist/runtime/agent-runtime-adapter.js +217 -73
  25. package/dist/runtime/harness/run/stream-run.js +31 -3
  26. package/dist/runtime/parsing/output-tool-args.js +108 -0
  27. package/dist/workspace/resource-compilers.js +17 -4
  28. package/package.json +1 -1
@@ -166,7 +166,7 @@ export async function runInteractiveChatLoop(input) {
166
166
  }
167
167
  activeSessionId = chatCommand.arg;
168
168
  latestRequestId = session.latestRequestId;
169
- activeAgentId = session.currentAgentId ?? session.entryAgentId ?? activeAgentId;
169
+ activeAgentId = session.entryAgentId ?? session.currentAgentId ?? activeAgentId;
170
170
  input.stdout(`session=${activeSessionId}\n`);
171
171
  continue;
172
172
  }
@@ -10,6 +10,7 @@ export async function streamChatMessage(input) {
10
10
  let latestSessionId = input.sessionId;
11
11
  let latestRequestId;
12
12
  let latestAgentId = input.agentId;
13
+ let entryAgentId = input.agentId;
13
14
  let wroteContent = false;
14
15
  let wroteRenderableBlocks = false;
15
16
  let renderedAssistantOutput = "";
@@ -348,6 +349,7 @@ export async function streamChatMessage(input) {
348
349
  writeChatStderr(lines.join(""));
349
350
  };
350
351
  const renderContentBlocks = (contentBlocks, agentId) => {
352
+ entryAgentId ??= agentId;
351
353
  latestAgentId = agentId || latestAgentId;
352
354
  const rendered = contentBlocks
353
355
  .map((block) => {
@@ -373,6 +375,7 @@ export async function streamChatMessage(input) {
373
375
  markRuntimeProgress();
374
376
  latestSessionId = snapshot.sessionId || latestSessionId;
375
377
  latestRequestId = snapshot.requestId || latestRequestId;
378
+ entryAgentId ??= snapshot.agentId;
376
379
  latestAgentId = snapshot.agentId || latestAgentId;
377
380
  latestSnapshot = snapshot;
378
381
  firstSnapshotAt ??= Date.now();
@@ -421,16 +424,19 @@ export async function streamChatMessage(input) {
421
424
  latestRequestId = delta.requestId || latestRequestId;
422
425
  firstDataAt ??= Date.now();
423
426
  if (delta.type === "output.text.delta") {
427
+ entryAgentId ??= delta.agentId;
424
428
  latestAgentId = delta.agentId || latestAgentId;
425
429
  writeAssistantOutput(delta.text);
426
430
  return;
427
431
  }
428
432
  if (delta.type === "output.content-blocks") {
433
+ entryAgentId ??= delta.agentId;
429
434
  suspendRequestTreeRendering();
430
435
  renderContentBlocks(delta.contentBlocks, delta.agentId);
431
436
  return;
432
437
  }
433
438
  if (delta.type === "plan.step") {
439
+ entryAgentId ??= delta.agentId;
434
440
  latestAgentId = delta.agentId || latestAgentId;
435
441
  const item = delta.item;
436
442
  const status = typeof item?.status === "string" ? item.status : "unknown";
@@ -441,6 +447,7 @@ export async function streamChatMessage(input) {
441
447
  return;
442
448
  }
443
449
  if (delta.type === "tool.result") {
450
+ entryAgentId ??= delta.agentId;
444
451
  latestAgentId = delta.agentId || latestAgentId;
445
452
  if ((input.showToolResults ?? true) && !input.requestEvents) {
446
453
  writeChatStderr(`\n[${formatPerfClock(Date.now())} +${formatElapsed(Date.now())}]${formatAgentProgressLabel(delta.agentId)} [tool:${delta.toolName}] ${summarizeChatToolResult(delta.output, delta.isError === true)}${delta.isError ? " (error)" : ""}\n`);
@@ -448,6 +455,7 @@ export async function streamChatMessage(input) {
448
455
  return;
449
456
  }
450
457
  if (delta.type === "progress.commentary") {
458
+ entryAgentId ??= delta.agentId;
451
459
  latestAgentId = delta.agentId || latestAgentId;
452
460
  if (wroteContent || wroteRenderableBlocks) {
453
461
  return;
@@ -500,5 +508,5 @@ export async function streamChatMessage(input) {
500
508
  writeChatStdout("\n");
501
509
  }
502
510
  await Promise.allSettled([stdoutWriteChain, stderrWriteChain]);
503
- return { sessionId: latestSessionId, requestId: latestRequestId, agentId: latestAgentId };
511
+ return { sessionId: latestSessionId, requestId: latestRequestId, agentId: entryAgentId ?? latestAgentId };
504
512
  }
@@ -1,2 +1,2 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.418";
2
- export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-02";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.420";
2
+ export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-03";
@@ -1,2 +1,2 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.418";
2
- export const AGENT_HARNESS_RELEASE_DATE = "2026-05-02";
1
+ export const AGENT_HARNESS_VERSION = "0.0.420";
2
+ export const AGENT_HARNESS_RELEASE_DATE = "2026-05-03";
@@ -27,6 +27,7 @@ export function buildAuthOmittingFetch(baseFetch = fetch) {
27
27
  export function normalizeOpenAICompatibleInit(init) {
28
28
  const normalized = { ...init };
29
29
  const configuration = asObject(init.configuration) ?? {};
30
+ const modelKwargs = asObject(init.modelKwargs) ?? {};
30
31
  const baseUrl = typeof init.baseUrl === "string" && init.baseUrl.trim() ? init.baseUrl.trim() : undefined;
31
32
  const omitAuthHeader = init.omitAuthHeader === true || isPlaceholderApiKey(init.apiKey);
32
33
  const nextConfiguration = { ...configuration };
@@ -36,8 +37,19 @@ export function normalizeOpenAICompatibleInit(init) {
36
37
  if (omitAuthHeader) {
37
38
  nextConfiguration.fetch = buildAuthOmittingFetch(typeof configuration.fetch === "function" ? configuration.fetch : fetch);
38
39
  }
40
+ if (typeof init.numPredict === "number" && typeof normalized.maxTokens !== "number") {
41
+ normalized.maxTokens = init.numPredict;
42
+ }
43
+ if (typeof init.numCtx === "number" && typeof modelKwargs.num_ctx !== "number") {
44
+ normalized.modelKwargs = {
45
+ ...modelKwargs,
46
+ num_ctx: init.numCtx,
47
+ };
48
+ }
39
49
  normalized.configuration = nextConfiguration;
40
50
  delete normalized.baseUrl;
41
51
  delete normalized.omitAuthHeader;
52
+ delete normalized.numPredict;
53
+ delete normalized.numCtx;
42
54
  return normalized;
43
55
  }
@@ -14,6 +14,8 @@ export declare function executeRequestInvocation(options: {
14
14
  files?: Record<string, unknown>;
15
15
  memoryContext?: string;
16
16
  toolRuntimeContext?: Record<string, unknown>;
17
+ suppressInitialRequiredPlanInstruction?: boolean;
18
+ externalPlanEvidence?: boolean;
17
19
  };
18
20
  resolveTools: (tools: CompiledTool[], binding?: CompiledAgentBinding) => unknown[];
19
21
  getToolNameMapping: (binding: CompiledAgentBinding) => ToolNameMapping;
@@ -50,11 +50,17 @@ function isDelegationOnlyBinding(binding) {
50
50
  function hasTaskDelegationEvidence(executedToolResults) {
51
51
  return executedToolResults.some((item) => item.toolName === "task");
52
52
  }
53
+ function normalizePlanToolName(toolName) {
54
+ return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
55
+ }
53
56
  function isPlanToolName(toolName) {
54
- return toolName === "write_todos"
55
- || toolName === "read_todos"
56
- || toolName === "tool_call_write_todos"
57
- || toolName === "tool_call_read_todos";
57
+ const normalized = normalizePlanToolName(toolName);
58
+ return normalized === "write_todos"
59
+ || normalized === "read_todos"
60
+ || normalized === "tool_call_write_todos"
61
+ || normalized === "tool_call_read_todos"
62
+ || normalized === "call_write_todos"
63
+ || normalized === "call_read_todos";
58
64
  }
59
65
  function hasPlanToolEvidence(executedToolResults) {
60
66
  return executedToolResults.some((item) => isPlanToolName(item.toolName));
@@ -303,7 +309,8 @@ export async function executeRequestInvocation(options) {
303
309
  ? buildInvocationRequest(options.binding, history, options.input, invokeOptions)
304
310
  : new Command({ resume: options.resumePayload });
305
311
  if (options.resumePayload === undefined
306
- && options.binding.harnessRuntime.executionContract?.requiresPlan === true) {
312
+ && options.binding.harnessRuntime.executionContract?.requiresPlan === true
313
+ && invokeOptions.suppressInitialRequiredPlanInstruction !== true) {
307
314
  request = appendToolRecoveryInstruction(request, INITIAL_REQUIRED_PLAN_INSTRUCTION);
308
315
  }
309
316
  const { primaryTools, toolNameMapping, executableTools, defersToUpstreamHitlExecution, } = buildBindingToolExecutionContext({
@@ -331,6 +338,7 @@ export async function executeRequestInvocation(options) {
331
338
  builtinExecutableTools: builtinExecutableTools,
332
339
  callRuntimeWithToolParseRecovery: options.callRuntimeWithToolParseRecovery,
333
340
  toolRuntimeContext: invokeOptions.toolRuntimeContext,
341
+ externalPlanEvidence: invokeOptions.externalPlanEvidence,
334
342
  });
335
343
  let localOrUpstreamInvocation = await invokeOnce(request);
336
344
  if (options.resumePayload === undefined
@@ -17,6 +17,7 @@ export declare function invokeRuntimeWithLocalTools(options: {
17
17
  builtinExecutableTools: Map<string, ExecutableTool>;
18
18
  callRuntimeWithToolParseRecovery: (request: unknown) => Promise<Record<string, unknown>>;
19
19
  toolRuntimeContext?: Record<string, unknown>;
20
+ externalPlanEvidence?: boolean;
20
21
  }): Promise<{
21
22
  result: Record<string, unknown>;
22
23
  executedToolResults: ExecutedToolResult[];
@@ -15,5 +15,6 @@ export async function invokeRuntimeWithLocalTools(options) {
15
15
  builtinExecutableTools: options.builtinExecutableTools,
16
16
  callRuntimeWithToolParseRecovery: options.callRuntimeWithToolParseRecovery,
17
17
  toolRuntimeContext: options.toolRuntimeContext,
18
+ externalPlanEvidence: options.externalPlanEvidence,
18
19
  });
19
20
  }
@@ -21,6 +21,8 @@ export declare function streamRuntimeExecution(options: {
21
21
  memoryContext?: string;
22
22
  profiling?: boolean;
23
23
  toolRuntimeContext?: Record<string, unknown>;
24
+ suppressInitialRequiredPlanInstruction?: boolean;
25
+ externalPlanEvidence?: boolean;
24
26
  };
25
27
  primaryTools: CompiledTool[];
26
28
  toolNameMapping: ToolNameMapping;
@@ -47,6 +49,8 @@ export declare function streamRuntimeExecution(options: {
47
49
  files?: Record<string, unknown>;
48
50
  memoryContext?: string;
49
51
  toolRuntimeContext?: Record<string, unknown>;
52
+ suppressInitialRequiredPlanInstruction?: boolean;
53
+ externalPlanEvidence?: boolean;
50
54
  }) => Promise<{
51
55
  output: string;
52
56
  metadata?: Record<string, unknown>;
@@ -26,13 +26,43 @@ const RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION = [
26
26
  "Your next action must be exactly one non-TODO evidence tool call selected from the available tool descriptions and schemas.",
27
27
  "After that evidence tool returns, update the todo board and then provide the final answer required by the agent response format.",
28
28
  ].join("\n");
29
+ function readPrimaryToolName(tool) {
30
+ return typeof tool.name === "string" ? tool.name.trim() : "";
31
+ }
32
+ function buildRunEvidenceAfterPlanInstruction(primaryTools) {
33
+ const toolNames = primaryTools
34
+ .map(readPrimaryToolName)
35
+ .filter((name) => name.length > 0 && !isPlanToolName(name));
36
+ if (toolNames.length === 0) {
37
+ return RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION;
38
+ }
39
+ return [
40
+ RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION,
41
+ "",
42
+ `Available non-planning tool names: ${toolNames.join(", ")}.`,
43
+ ].join("\n");
44
+ }
29
45
  const INITIAL_REQUIRED_PLAN_INSTRUCTION = [
30
46
  "This agent has a required visible planning contract.",
31
47
  "Your first action for this request must be write_todos with concrete task steps and statuses.",
32
48
  "Do not call any domain/evidence tool and do not provide a final answer before the initial write_todos call succeeds.",
49
+ "After write_todos succeeds, do not call write_todos or read_todos again until one non-planning evidence tool returns.",
33
50
  "Do not use placeholders like '1', '2', '3', 'step 1', or generic labels. Each todo must name the concrete work it represents.",
34
51
  "After each evidence step, update the todo board. Before the final answer, close every todo as completed or failed.",
35
52
  ].join("\n");
53
+ function buildInitialRequiredPlanInstruction(primaryTools) {
54
+ const toolNames = primaryTools
55
+ .map(readPrimaryToolName)
56
+ .filter((name) => name.length > 0 && !isPlanToolName(name));
57
+ if (toolNames.length === 0) {
58
+ return INITIAL_REQUIRED_PLAN_INSTRUCTION;
59
+ }
60
+ return [
61
+ INITIAL_REQUIRED_PLAN_INSTRUCTION,
62
+ "",
63
+ `After the initial todo board, select the next non-planning tool from these declared tool names: ${toolNames.join(", ")}.`,
64
+ ].join("\n");
65
+ }
36
66
  function toVisibleContent(value) {
37
67
  const extracted = extractVisibleOutput(value);
38
68
  return extracted ? sanitizeVisibleText(extracted) : "";
@@ -98,11 +128,17 @@ function hasIncompletePlanOutput(value) {
98
128
  }
99
129
  return null;
100
130
  }
131
+ function normalizePlanToolName(toolName) {
132
+ return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
133
+ }
101
134
  function isPlanToolName(toolName) {
102
- return toolName === "write_todos"
103
- || toolName === "read_todos"
104
- || toolName === "tool_call_write_todos"
105
- || toolName === "tool_call_read_todos";
135
+ const normalized = normalizePlanToolName(toolName);
136
+ return normalized === "write_todos"
137
+ || normalized === "read_todos"
138
+ || normalized === "tool_call_write_todos"
139
+ || normalized === "tool_call_read_todos"
140
+ || normalized === "call_write_todos"
141
+ || normalized === "call_read_todos";
106
142
  }
107
143
  function isCompletedPlanToolResultChunk(chunk) {
108
144
  if (chunk.kind !== "tool-result" || !isPlanToolName(chunk.toolName)) {
@@ -122,6 +158,12 @@ function hasSuccessfulTaskToolEvidence(executedToolResults) {
122
158
  function requiresPlanEvidence(binding) {
123
159
  return binding.harnessRuntime?.executionContract?.requiresPlan === true;
124
160
  }
161
+ function withSuppressedInitialRequiredPlanInstruction(options) {
162
+ return {
163
+ ...options,
164
+ suppressInitialRequiredPlanInstruction: true,
165
+ };
166
+ }
125
167
  function hasParentLocalToolExecutionAfterDelegationFailure(originalEvidence, executedToolResults) {
126
168
  return originalEvidence.hasFailedTaskDelegation
127
169
  && executedToolResults.some((item) => item.toolName !== "task");
@@ -269,6 +311,14 @@ function isOpenAICompatibleStreamingCompatibilityError(binding, error) {
269
311
  const message = error instanceof Error ? error.message : String(error);
270
312
  return message.toLowerCase().includes("received empty response from chat model call");
271
313
  }
314
+ function isGraphRecursionLimitError(error) {
315
+ const code = typeof error === "object" && error !== null && "lc_error_code" in error
316
+ ? String(error.lc_error_code ?? "")
317
+ : "";
318
+ const message = error instanceof Error ? error.message : String(error);
319
+ return code === "GRAPH_RECURSION_LIMIT"
320
+ || /Recursion limit .* without hitting a stop condition|GRAPH_RECURSION_LIMIT/i.test(message);
321
+ }
272
322
  function hasDelegationEvidence(evidence) {
273
323
  return (evidence.hasSuccessfulTaskToolEvidence
274
324
  || evidence.hasOpenTaskDelegation
@@ -358,10 +408,45 @@ function finishProfileStep(input) {
358
408
  ...(input.error !== undefined ? { error: input.error instanceof Error ? input.error.message : String(input.error) } : {}),
359
409
  });
360
410
  }
411
+ function projectLocalToolExecutionProfileChunks(executedToolResults, prefix) {
412
+ const chunks = [];
413
+ executedToolResults.forEach((toolResult, index) => {
414
+ if (isPlanToolName(toolResult.toolName)) {
415
+ return;
416
+ }
417
+ const id = `${prefix}:${index + 1}:${toolResult.toolName}`;
418
+ const startedAt = new Date().toISOString();
419
+ chunks.push({
420
+ kind: "profile",
421
+ step: {
422
+ id,
423
+ kind: "tool",
424
+ name: toolResult.toolName,
425
+ action: "invoke",
426
+ status: "started",
427
+ startedAt,
428
+ },
429
+ });
430
+ chunks.push({
431
+ kind: "profile",
432
+ step: {
433
+ id,
434
+ kind: "tool",
435
+ name: toolResult.toolName,
436
+ action: "invoke",
437
+ status: toolResult.isError === true ? "failed" : "completed",
438
+ startedAt,
439
+ endedAt: new Date().toISOString(),
440
+ ...(toolResult.isError === true ? { isError: true } : {}),
441
+ },
442
+ });
443
+ });
444
+ return chunks;
445
+ }
361
446
  export async function* streamRuntimeExecution(options) {
362
447
  let request = buildInvocationRequest(options.binding, options.history, options.input, options.runtimeOptions);
363
448
  if (requiresPlanEvidence(options.binding)) {
364
- request = appendToolRecoveryInstruction(request, INITIAL_REQUIRED_PLAN_INSTRUCTION);
449
+ request = appendToolRecoveryInstruction(request, buildInitialRequiredPlanInstruction(options.primaryTools));
365
450
  }
366
451
  let emittedUnsafeStreamSideEffects = false;
367
452
  const shouldProfile = options.runtimeOptions.profiling === true;
@@ -549,13 +634,14 @@ export async function* streamRuntimeExecution(options) {
549
634
  error,
550
635
  });
551
636
  if (!emittedUnsafeStreamSideEffects
552
- && isOpenAICompatibleStreamingCompatibilityError(options.binding, error)) {
637
+ && (isOpenAICompatibleStreamingCompatibilityError(options.binding, error) || isGraphRecursionLimitError(error))) {
553
638
  deferredStreamContent.length = 0;
554
639
  }
555
640
  else {
556
641
  throw error;
557
642
  }
558
643
  }
644
+ const streamedToolResults = [];
559
645
  if (events) {
560
646
  const streamEventsConsume = startProfileStep({
561
647
  id: "profile:agent:stream-events-consume",
@@ -568,7 +654,9 @@ export async function* streamRuntimeExecution(options) {
568
654
  try {
569
655
  let sawCompletedPlanToolResult = false;
570
656
  let sawSuccessfulNonTodoToolResult = false;
571
- const streamedToolResults = [];
657
+ let earlyStreamRecoveryInstruction = null;
658
+ let earlyStreamRecoverySuppressInitialPlan = false;
659
+ let completedPlanToolResultCount = 0;
572
660
  for await (const event of options.iterateWithTimeout(events, options.streamIdleTimeoutMs, "agent streamEvents", options.streamDeadlineAt, options.invokeTimeoutMs)) {
573
661
  const projectedChunks = projectRuntimeStreamEvent({
574
662
  event,
@@ -589,6 +677,15 @@ export async function* streamRuntimeExecution(options) {
589
677
  && chunk.kind !== "content"
590
678
  && !(chunk.kind === "tool-result" && isPlanToolName(chunk.toolName))
591
679
  && !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
680
+ const repeatedPlanToolResultBeforeEvidence = requiresPlanEvidence(options.binding)
681
+ && !sawSuccessfulNonTodoToolResult
682
+ && completedPlanToolResultCount > 0
683
+ && projectedChunks.some((chunk) => chunk.kind === "tool-result" && isPlanToolName(chunk.toolName));
684
+ if (repeatedPlanToolResultBeforeEvidence) {
685
+ earlyStreamRecoveryInstruction = buildRunEvidenceAfterPlanInstruction(options.primaryTools);
686
+ earlyStreamRecoverySuppressInitialPlan = true;
687
+ break;
688
+ }
592
689
  for (const chunk of projectedChunks) {
593
690
  if (chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)) {
594
691
  sawRetrySafeInvalidToolSelectionError = true;
@@ -606,6 +703,9 @@ export async function* streamRuntimeExecution(options) {
606
703
  if (isCompletedPlanToolResultChunk(chunk)) {
607
704
  sawCompletedPlanToolResult = true;
608
705
  }
706
+ if (chunk.kind === "tool-result" && isPlanToolName(chunk.toolName)) {
707
+ completedPlanToolResultCount += 1;
708
+ }
609
709
  if ((eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) && deferredStreamContent.length > 0) {
610
710
  yield* flushDeferredStreamContent();
611
711
  }
@@ -660,6 +760,23 @@ export async function* streamRuntimeExecution(options) {
660
760
  })
661
761
  : null;
662
762
  const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
763
+ if (!emittedUnsafeStreamSideEffects
764
+ && (terminalMissingPlanRecoveryInstruction || terminalDelegationOnlyRecoveryInstruction)) {
765
+ earlyStreamRecoveryInstruction =
766
+ terminalMissingPlanRecoveryInstruction ?? terminalDelegationOnlyRecoveryInstruction;
767
+ break;
768
+ }
769
+ if (requiresPlanEvidence(options.binding) && hasCompletedPlanWithEvidence(terminalExecutionEvidence)) {
770
+ if (hasUsefulVisibleSynthesis(projectionState.emittedOutput)) {
771
+ if (deferredStreamContent.length > 0) {
772
+ yield* flushDeferredStreamContent();
773
+ }
774
+ return;
775
+ }
776
+ deferredStreamContent.length = 0;
777
+ yield { kind: "content", content: buildDeterministicFinalFromStreamToolEvidence(streamedToolResults) };
778
+ return;
779
+ }
663
780
  if (!shouldDeferStreamContent()
664
781
  && !terminalExecutionEvidence.hasIncompletePlanState
665
782
  && !terminalExecutionEvidence.hasFailedTaskDelegation
@@ -675,6 +792,30 @@ export async function* streamRuntimeExecution(options) {
675
792
  }
676
793
  }
677
794
  }
795
+ if (earlyStreamRecoveryInstruction) {
796
+ const earlyRecoveryRuntimeOptions = earlyStreamRecoverySuppressInitialPlan
797
+ ? withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions)
798
+ : options.runtimeOptions;
799
+ const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, earlyStreamRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, earlyRecoveryRuntimeOptions);
800
+ const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
801
+ ? recovered.metadata.executedToolResults
802
+ : [];
803
+ for (const toolResult of recoveredToolResults) {
804
+ yield {
805
+ kind: "tool-result",
806
+ toolName: toolResult.toolName,
807
+ output: toolResult.output,
808
+ isError: toolResult.isError,
809
+ };
810
+ }
811
+ if (recovered.output) {
812
+ const visible = toVisibleContent(recovered.output);
813
+ if (visible) {
814
+ yield { kind: "content", content: visible };
815
+ }
816
+ }
817
+ return;
818
+ }
678
819
  if (shouldProfile)
679
820
  yield finishProfileStep({
680
821
  id: "profile:agent:stream-events-consume",
@@ -697,7 +838,7 @@ export async function* streamRuntimeExecution(options) {
697
838
  error,
698
839
  });
699
840
  if (!emittedUnsafeStreamSideEffects
700
- && isOpenAICompatibleStreamingCompatibilityError(options.binding, error)) {
841
+ && (isOpenAICompatibleStreamingCompatibilityError(options.binding, error) || isGraphRecursionLimitError(error))) {
701
842
  deferredStreamContent.length = 0;
702
843
  }
703
844
  else {
@@ -707,9 +848,14 @@ export async function* streamRuntimeExecution(options) {
707
848
  }
708
849
  const streamedExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
709
850
  if (requiresPlanEvidence(options.binding) && hasCompletedPlanWithEvidence(streamedExecutionEvidence)) {
710
- if (deferredStreamContent.length > 0) {
711
- yield* flushDeferredStreamContent();
851
+ if (hasUsefulVisibleSynthesis(projectionState.emittedOutput)) {
852
+ if (deferredStreamContent.length > 0) {
853
+ yield* flushDeferredStreamContent();
854
+ }
855
+ return;
712
856
  }
857
+ deferredStreamContent.length = 0;
858
+ yield { kind: "content", content: buildDeterministicFinalFromStreamToolEvidence(streamedToolResults) };
713
859
  return;
714
860
  }
715
861
  const streamedDelegatedRecoveryInstruction = resolveDelegatedExecutionRecoveryInstruction(streamedExecutionEvidence);
@@ -722,7 +868,7 @@ export async function* streamRuntimeExecution(options) {
722
868
  const streamedPrematurePlanCloseRecoveryInstruction = requiresPlanEvidence(options.binding)
723
869
  && streamedExecutionEvidence.hasPlanStateEvidence
724
870
  && !streamedExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
725
- ? RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION
871
+ ? buildRunEvidenceAfterPlanInstruction(options.primaryTools)
726
872
  : null;
727
873
  const delegatedExecutionRecoveryInstruction = !emittedUnsafeStreamSideEffects || streamedDelegatedRecoveryInstruction
728
874
  ? streamedDelegatedRecoveryInstruction
@@ -763,8 +909,12 @@ export async function* streamRuntimeExecution(options) {
763
909
  ?? streamedDelegationOnlyRecoveryInstruction
764
910
  ?? executionWithoutToolEvidenceInstruction;
765
911
  if (retryInstruction) {
912
+ const retryRuntimeOptions = retryInstruction === streamedIncompletePlanRecoveryInstruction
913
+ || retryInstruction === streamedPrematurePlanCloseRecoveryInstruction
914
+ ? withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions)
915
+ : options.runtimeOptions;
766
916
  let retried;
767
- retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, retryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
917
+ retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, retryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, retryRuntimeOptions);
768
918
  const executedToolResults = Array.isArray(retried.metadata?.executedToolResults)
769
919
  ? retried.metadata.executedToolResults
770
920
  : [];
@@ -954,6 +1104,9 @@ export async function* streamRuntimeExecution(options) {
954
1104
  const executedToolResults = Array.isArray(result.metadata?.executedToolResults)
955
1105
  ? result.metadata.executedToolResults
956
1106
  : [];
1107
+ for (const chunk of projectLocalToolExecutionProfileChunks(executedToolResults, "local-tool:invoke-fallback")) {
1108
+ yield chunk;
1109
+ }
957
1110
  const invokeExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState, executedToolResults });
958
1111
  if (hasUnresolvedExecution(invokeExecutionEvidence)) {
959
1112
  throw createUnresolvedExecutionError(invokeExecutionEvidence);
@@ -984,7 +1137,7 @@ export async function* streamRuntimeExecution(options) {
984
1137
  const invokeFallbackPlanWithoutEvidenceRecoveryInstruction = requiresPlanEvidence(options.binding)
985
1138
  && invokeExecutionEvidence.hasPlanStateEvidence
986
1139
  && !invokeExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
987
- ? RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION
1140
+ ? buildRunEvidenceAfterPlanInstruction(options.primaryTools)
988
1141
  : null;
989
1142
  const effectiveInvokeFallbackRecoveryInstruction = invokeFallbackIncompletePlanRecoveryInstruction
990
1143
  ?? invokeFallbackPlanWithoutEvidenceRecoveryInstruction
@@ -992,10 +1145,20 @@ export async function* streamRuntimeExecution(options) {
992
1145
  ?? invokeFallbackDelegationOnlyRecoveryInstruction
993
1146
  ?? invokeFallbackRecoveryInstruction;
994
1147
  if (effectiveInvokeFallbackRecoveryInstruction) {
995
- const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, effectiveInvokeFallbackRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, options.runtimeOptions);
1148
+ const invokeFallbackRuntimeOptions = effectiveInvokeFallbackRecoveryInstruction === invokeFallbackIncompletePlanRecoveryInstruction
1149
+ || effectiveInvokeFallbackRecoveryInstruction === invokeFallbackPlanWithoutEvidenceRecoveryInstruction
1150
+ ? {
1151
+ ...withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions),
1152
+ externalPlanEvidence: true,
1153
+ }
1154
+ : options.runtimeOptions;
1155
+ const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, effectiveInvokeFallbackRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, invokeFallbackRuntimeOptions);
996
1156
  const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
997
1157
  ? recovered.metadata.executedToolResults
998
1158
  : [];
1159
+ for (const chunk of projectLocalToolExecutionProfileChunks(recoveredToolResults, "local-tool:invoke-fallback-recovery")) {
1160
+ yield chunk;
1161
+ }
999
1162
  const originalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState, executedToolResults });
1000
1163
  const recoveredExecutionEvidence = buildExecutionRecoveryEvidence({
1001
1164
  projectionState: createStreamEventProjectionState(),
@@ -41,11 +41,23 @@ function hasStateSnapshotPlan(stateSnapshot) {
41
41
  && stateSnapshot !== null
42
42
  && Array.isArray(stateSnapshot.todos);
43
43
  }
44
+ function normalizePlanToolName(toolName) {
45
+ return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
46
+ }
47
+ function isPlanToolName(toolName) {
48
+ const normalized = normalizePlanToolName(toolName);
49
+ return normalized === "write_todos"
50
+ || normalized === "read_todos"
51
+ || normalized === "tool_call_write_todos"
52
+ || normalized === "tool_call_read_todos"
53
+ || normalized === "call_write_todos"
54
+ || normalized === "call_read_todos";
55
+ }
44
56
  function hasPlanToolEvidence(executedToolResults) {
45
- return executedToolResults.some((item) => item.toolName === "write_todos" || item.toolName === "read_todos");
57
+ return executedToolResults.some((item) => isPlanToolName(item.toolName));
46
58
  }
47
59
  function hasExecutionToolEvidence(executedToolResults) {
48
- return executedToolResults.some((item) => item.isError !== true && item.toolName !== "write_todos" && item.toolName !== "read_todos");
60
+ return executedToolResults.some((item) => item.isError !== true && !isPlanToolName(item.toolName));
49
61
  }
50
62
  function isPlaceholderTaskCompletion(value) {
51
63
  const normalized = sanitizeVisibleText(value).trim();
@@ -143,7 +155,7 @@ function extractLatestSuccessfulNonTodoToolResultText(executedToolResults) {
143
155
  if (toolResult.isError === true) {
144
156
  continue;
145
157
  }
146
- if (toolResult.toolName === "task" || toolResult.toolName === "write_todos" || toolResult.toolName === "read_todos") {
158
+ if (toolResult.toolName === "task" || isPlanToolName(toolResult.toolName)) {
147
159
  continue;
148
160
  }
149
161
  const normalized = normalizeToolOutputText(toolResult.output);
@@ -258,8 +270,7 @@ function looksLikeContradictedToolExecutionFailure(value) {
258
270
  }
259
271
  function extractDeterministicToolFailureReport(executedToolResults) {
260
272
  const hasSuccessfulSubstantiveTool = executedToolResults.some((toolResult) => (toolResult.isError !== true
261
- && toolResult.toolName !== "write_todos"
262
- && toolResult.toolName !== "read_todos"));
273
+ && !isPlanToolName(toolResult.toolName)));
263
274
  if (hasSuccessfulSubstantiveTool) {
264
275
  return "";
265
276
  }
@@ -401,7 +412,7 @@ export function finalizeRequestResult(params) {
401
412
  && !visibleOutput
402
413
  && !preliminaryTerminalStatus
403
414
  && !output.trim()
404
- && allExecutedToolResults.some((toolResult) => toolResult.isError !== true && toolResult.toolName !== "write_todos" && toolResult.toolName !== "read_todos");
415
+ && allExecutedToolResults.some((toolResult) => toolResult.isError !== true && !isPlanToolName(toolResult.toolName));
405
416
  if (hasMissingRequiredPlanEvidence) {
406
417
  output = "runtime_error=Agent ended before producing required plan evidence.";
407
418
  }
@@ -15,10 +15,11 @@ type LocalToolInvocationParams = {
15
15
  builtinExecutableTools: Map<string, ExecutableTool>;
16
16
  callRuntimeWithToolParseRecovery: (request: unknown) => Promise<Record<string, unknown>>;
17
17
  toolRuntimeContext?: Record<string, unknown>;
18
+ externalPlanEvidence?: boolean;
18
19
  };
19
20
  type LocalToolInvocationResult = {
20
21
  result: Record<string, unknown>;
21
22
  executedToolResults: ExecutedToolResult[];
22
23
  };
23
- export declare function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, }: LocalToolInvocationParams): Promise<LocalToolInvocationResult>;
24
+ export declare function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, }: LocalToolInvocationParams): Promise<LocalToolInvocationResult>;
24
25
  export {};