@botbotgo/agent-harness 0.0.463 → 0.0.465

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.463";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.465";
2
2
  export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
@@ -1,2 +1,2 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.463";
1
+ export const AGENT_HARNESS_VERSION = "0.0.465";
2
2
  export const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
@@ -1 +1 @@
1
- When calling write_todos, every todo item must include both content and status. Do not send status-only updates. Retry by resending the full todo entry with the original content preserved.
1
+ When calling write_todos, every todo item must include both content and status. Use only these status values: pending, in_progress, completed. Do not send aliases such as not_started, open, active, done, blocked, failed, or cancelled. Do not send status-only updates. Retry by resending the full todo entry with the original content preserved.
@@ -1,5 +1,5 @@
1
1
  import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, appendToolRecoveryInstruction, resolveMissingPlanRecoveryInstruction, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
2
- import { DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION, DELEGATION_ONLY_RECOVERY_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
2
+ import { DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION, DELEGATION_ONLY_RECOVERY_INSTRUCTION, REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
3
3
  import { buildInvocationRequest } from "../model/invocation-request.js";
4
4
  import { PROMPTED_JSON_TOOL_POLICY_KEY, withPromptedJsonToolPolicy } from "../model/prompted-json-tool-policy.js";
5
5
  import { buildRawModelMessages } from "../model/message-assembly.js";
@@ -137,6 +137,26 @@ function hasIncompletePlanOutput(value) {
137
137
  }
138
138
  return null;
139
139
  }
140
+ function hasFailedTodos(value) {
141
+ if (Array.isArray(value)) {
142
+ return value.some((todo) => hasFailedTodos(todo));
143
+ }
144
+ if (typeof value !== "object" || value === null) {
145
+ return false;
146
+ }
147
+ const typed = value;
148
+ if (typeof typed.status === "string" && typed.status.trim().toLowerCase() === "failed") {
149
+ return true;
150
+ }
151
+ return hasFailedTodos(typed.todos)
152
+ || hasFailedTodos(typed.update)
153
+ || hasFailedTodos(typed.data)
154
+ || hasFailedTodos(typed.output)
155
+ || hasFailedTodos(typed.summary);
156
+ }
157
+ function hasFailedPlanStateInExecutedToolResults(executedToolResults) {
158
+ return executedToolResults.some((item) => isPlanToolName(item.toolName) && hasFailedTodos(item.output));
159
+ }
140
160
  function normalizePlanToolName(toolName) {
141
161
  return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
142
162
  }
@@ -199,6 +219,9 @@ function buildExecutionRecoveryEvidence(params) {
199
219
  hasNonTodoToolResultEvidence: hasNonTodoToolEvidence(executedToolResults) || projectionState.emittedNonTodoToolResult || projectionState.emittedToolError,
200
220
  hasSuccessfulNonTodoToolResultEvidence: hasSuccessfulNonTodoToolEvidence(executedToolResults) || projectionState.emittedSuccessfulNonTodoToolResult,
201
221
  hasIncompletePlanState: projectionState.hasIncompletePlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
222
+ hasFailedPlanState: (projectionState.hasFailedPlanState || hasFailedPlanStateInExecutedToolResults(executedToolResults))
223
+ && !projectionState.emittedSuccessfulNonTodoToolResult
224
+ && !hasSuccessfulNonTodoToolEvidence(executedToolResults),
202
225
  hasPlanStateEvidence: projectionState.sawPlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
203
226
  hasOpenTaskDelegation: projectionState.openTaskDelegations > 0,
204
227
  hasFailedTaskDelegation: projectionState.hasFailedTaskDelegation
@@ -213,6 +236,7 @@ function buildExecutionRecoveryEvidence(params) {
213
236
  }
214
237
  function hasUnresolvedExecution(evidence) {
215
238
  return (evidence.hasIncompletePlanState
239
+ || evidence.hasFailedPlanState
216
240
  || evidence.hasFailedTaskDelegation
217
241
  || evidence.hasOpenTaskDelegation);
218
242
  }
@@ -257,6 +281,14 @@ function buildDeterministicFinalFromStreamToolEvidence(executedToolResults) {
257
281
  evidence.length > 0 ? evidence.join("\n\n") : "(no non-planning tool evidence captured)",
258
282
  ].join("\n");
259
283
  }
284
+ function latestStreamToolErrorRecoveryInstruction(executedToolResults) {
285
+ const latest = [...executedToolResults].reverse().find((item) => item.isError === true);
286
+ if (!latest) {
287
+ return null;
288
+ }
289
+ const message = typeof latest.output === "string" ? latest.output : JSON.stringify(latest.output);
290
+ return resolveToolCallRecoveryInstruction(new Error(message)) ?? REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION;
291
+ }
260
292
  function hasUsefulVisibleSynthesis(value) {
261
293
  const trimmed = value.trim();
262
294
  if (trimmed.length < 80) {
@@ -381,6 +413,9 @@ function createUnresolvedExecutionError(evidence) {
381
413
  if (evidence.hasIncompletePlanState) {
382
414
  reasons.push("plan state still has unfinished work");
383
415
  }
416
+ if (evidence.hasFailedPlanState) {
417
+ reasons.push("plan state failed before non-TODO evidence returned");
418
+ }
384
419
  if (evidence.hasFailedTaskDelegation) {
385
420
  reasons.push("delegated task failed before surfacing final findings");
386
421
  }
@@ -463,7 +498,8 @@ function projectLocalToolExecutionProfileChunks(executedToolResults, prefix) {
463
498
  }
464
499
  export async function* streamRuntimeExecution(options) {
465
500
  let request = buildInvocationRequest(options.binding, options.history, options.input, options.runtimeOptions);
466
- if (requiresPlanEvidence(options.binding)) {
501
+ if (requiresPlanEvidence(options.binding)
502
+ && options.runtimeOptions.suppressInitialRequiredPlanInstruction !== true) {
467
503
  request = withPromptedJsonToolPolicy(appendToolRecoveryInstruction(request, buildInitialRequiredPlanInstruction(options.primaryTools)), "planning");
468
504
  }
469
505
  let emittedUnsafeStreamSideEffects = false;
@@ -472,6 +508,14 @@ export async function* streamRuntimeExecution(options) {
472
508
  const deferredStreamContent = [];
473
509
  let sawRetrySafeInvalidToolSelectionError = false;
474
510
  const projectionState = createStreamEventProjectionState();
511
+ if (options.runtimeOptions.externalPlanEvidence === true) {
512
+ projectionState.sawPlanState = true;
513
+ yield {
514
+ kind: "commentary",
515
+ content: `${options.binding.agent.id}: TODO evidence observed.`,
516
+ agentId: options.binding.agent.id,
517
+ };
518
+ }
475
519
  const requestId = options.runtimeOptions.requestId ?? options.sessionId;
476
520
  const buildRunnableConfig = (extra) => ({
477
521
  ...(options.resolveInvocationConfig
@@ -515,6 +559,7 @@ export async function* streamRuntimeExecution(options) {
515
559
  try {
516
560
  const rawStreamInput = buildRawModelMessages(options.binding, options.getSystemPrompt(options.binding), options.history, options.input, options.runtimeOptions.memoryContext);
517
561
  const streamInput = requiresPlanEvidence(options.binding)
562
+ && options.runtimeOptions.suppressInitialRequiredPlanInstruction !== true
518
563
  ? withPromptedJsonToolPolicy(rawStreamInput, "planning")
519
564
  : rawStreamInput;
520
565
  stream = await options.withTimeout(() => options.langChainStreamModel.stream(streamInput), computeRemainingTimeoutMs(options.streamDeadlineAt, options.invokeTimeoutMs), "model stream start", "stream");
@@ -699,6 +744,7 @@ export async function* streamRuntimeExecution(options) {
699
744
  const eventContainsNonRetrySafeChunk = projectedChunks.some((chunk) => chunk.kind !== "upstream-event"
700
745
  && chunk.kind !== "content"
701
746
  && !(chunk.kind === "tool-result" && isPlanToolName(chunk.toolName))
747
+ && !(chunk.kind === "tool-result" && chunk.isError === true)
702
748
  && !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
703
749
  const hadPriorPlanToolResult = completedPlanToolResultCount > 0;
704
750
  const repeatedPlanToolResultBeforeEvidence = requiresPlanEvidence(options.binding)
@@ -796,10 +842,23 @@ export async function* streamRuntimeExecution(options) {
796
842
  })
797
843
  : null;
798
844
  const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
845
+ const terminalPrematurePlanCloseRecoveryInstruction = requiresPlanEvidence(options.binding)
846
+ && terminalExecutionEvidence.hasPlanStateEvidence
847
+ && !terminalExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
848
+ ? buildRunEvidenceAfterPlanInstruction(options.primaryTools)
849
+ : null;
799
850
  if (!emittedUnsafeStreamSideEffects
800
- && (terminalMissingPlanRecoveryInstruction || terminalDelegationOnlyRecoveryInstruction)) {
851
+ && (terminalMissingPlanRecoveryInstruction
852
+ || terminalPrematurePlanCloseRecoveryInstruction
853
+ || terminalDelegationOnlyRecoveryInstruction)) {
801
854
  earlyStreamRecoveryInstruction =
802
- terminalMissingPlanRecoveryInstruction ?? terminalDelegationOnlyRecoveryInstruction;
855
+ terminalMissingPlanRecoveryInstruction
856
+ ?? terminalPrematurePlanCloseRecoveryInstruction
857
+ ?? terminalDelegationOnlyRecoveryInstruction;
858
+ earlyStreamRecoverySuppressInitialPlan = terminalPrematurePlanCloseRecoveryInstruction !== null;
859
+ if (terminalPrematurePlanCloseRecoveryInstruction) {
860
+ earlyStreamExternalPlanEvidenceTools = resolveSingleConfiguredPlanEvidenceTool(options.primaryTools);
861
+ }
803
862
  break;
804
863
  }
805
864
  if (requiresPlanEvidence(options.binding) && hasCompletedPlanWithEvidence(terminalExecutionEvidence)) {
@@ -820,6 +879,7 @@ export async function* streamRuntimeExecution(options) {
820
879
  && !hasMissingDelegatedExecutionEvidence(terminalExecutionEvidence)
821
880
  && !hasMissingDelegatedFindings(terminalExecutionEvidence)
822
881
  && !terminalMissingPlanRecoveryInstruction
882
+ && !terminalPrematurePlanCloseRecoveryInstruction
823
883
  && !terminalDelegationOnlyRecoveryInstruction) {
824
884
  if (deferredStreamContent.length > 0) {
825
885
  yield* flushDeferredStreamContent();
@@ -842,6 +902,35 @@ export async function* streamRuntimeExecution(options) {
842
902
  const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
843
903
  ? recovered.metadata.executedToolResults
844
904
  : [];
905
+ const originalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
906
+ const recoveredExecutionEvidence = buildExecutionRecoveryEvidence({
907
+ projectionState: createStreamEventProjectionState(),
908
+ executedToolResults: recoveredToolResults,
909
+ });
910
+ const recoveredCarriesExecutionEvidence = recoveredExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
911
+ || recoveredExecutionEvidence.hasOpenTaskDelegation
912
+ || recoveredExecutionEvidence.hasDelegatedExecutionToolEvidence;
913
+ const recoveredHasUnresolvedExecution = hasUnresolvedExecution(recoveredExecutionEvidence)
914
+ || hasMissingDelegatedExecutionEvidence(recoveredExecutionEvidence)
915
+ || hasMissingDelegatedFindings(recoveredExecutionEvidence)
916
+ || (!recoveredCarriesExecutionEvidence
917
+ && (hasUnresolvedExecution(originalExecutionEvidence)
918
+ || hasMissingDelegatedExecutionEvidence(originalExecutionEvidence)
919
+ || hasMissingDelegatedFindings(originalExecutionEvidence)));
920
+ if (recoveredHasUnresolvedExecution) {
921
+ const effectiveRecoveryEvidence = recoveredCarriesExecutionEvidence
922
+ ? recoveredExecutionEvidence
923
+ : {
924
+ ...recoveredExecutionEvidence,
925
+ hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
926
+ hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
927
+ hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
928
+ hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
929
+ hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
930
+ hasDelegatedExecutionToolEvidence: originalExecutionEvidence.hasDelegatedExecutionToolEvidence,
931
+ };
932
+ throw createUnresolvedExecutionError(effectiveRecoveryEvidence);
933
+ }
845
934
  for (const toolResult of recoveredToolResults) {
846
935
  yield {
847
936
  kind: "tool-result",
@@ -931,6 +1020,9 @@ export async function* streamRuntimeExecution(options) {
931
1020
  const streamedRuntimeFailureRecoveryInstruction = projectionState.emittedOutput
932
1021
  ? resolveStreamedRuntimeFailureRecoveryInstruction(projectionState.emittedOutput, streamedExecutionEvidence)
933
1022
  : null;
1023
+ const streamedToolErrorRecoveryInstruction = !emittedUnsafeStreamSideEffects
1024
+ ? latestStreamToolErrorRecoveryInstruction(streamedToolResults)
1025
+ : null;
934
1026
  const missingPlanRecoveryInstruction = !hasUnresolvedExecution(streamedExecutionEvidence) && !delegatedExecutionRecoveryInstruction
935
1027
  ? resolveMissingPlanRecoveryInstruction({
936
1028
  request,
@@ -943,7 +1035,8 @@ export async function* streamRuntimeExecution(options) {
943
1035
  : null;
944
1036
  const retryInstruction = !emittedUnsafeStreamSideEffects && sawRetrySafeInvalidToolSelectionError
945
1037
  ? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION
946
- : delegatedExecutionRecoveryInstruction
1038
+ : streamedToolErrorRecoveryInstruction
1039
+ ?? delegatedExecutionRecoveryInstruction
947
1040
  ?? streamedIncompletePlanRecoveryInstruction
948
1041
  ?? streamedPrematurePlanCloseRecoveryInstruction
949
1042
  ?? streamedRuntimeFailureRecoveryInstruction
@@ -970,7 +1063,7 @@ export async function* streamRuntimeExecution(options) {
970
1063
  }
971
1064
  const retriedVisibleOutput = retried.output ? toVisibleContent(retried.output) : "";
972
1065
  const retriedIsDelegationFailureFinalReport = isDelegationFailureFinalReport(originalExecutionEvidence, executedToolResults, retriedVisibleOutput);
973
- const retriedCarriesExecutionEvidence = retriedExecutionEvidence.hasToolResultEvidence
1066
+ const retriedCarriesExecutionEvidence = retriedExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
974
1067
  || retriedExecutionEvidence.hasOpenTaskDelegation
975
1068
  || retriedExecutionEvidence.hasDelegatedExecutionToolEvidence;
976
1069
  const retriedHasUnresolvedExecution = !retriedIsDelegationFailureFinalReport
@@ -986,6 +1079,7 @@ export async function* streamRuntimeExecution(options) {
986
1079
  : {
987
1080
  ...retriedExecutionEvidence,
988
1081
  hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
1082
+ hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
989
1083
  hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
990
1084
  hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
991
1085
  hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
@@ -1227,6 +1321,7 @@ export async function* streamRuntimeExecution(options) {
1227
1321
  : {
1228
1322
  ...recoveredExecutionEvidence,
1229
1323
  hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
1324
+ hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
1230
1325
  hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
1231
1326
  hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
1232
1327
  hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
@@ -39,7 +39,8 @@ function hasIncompleteStateSnapshotPlan(stateSnapshot) {
39
39
  function hasStateSnapshotPlan(stateSnapshot) {
40
40
  return typeof stateSnapshot === "object"
41
41
  && stateSnapshot !== null
42
- && Array.isArray(stateSnapshot.todos);
42
+ && Array.isArray(stateSnapshot.todos)
43
+ && (stateSnapshot.todos).length > 0;
43
44
  }
44
45
  function normalizePlanToolName(toolName) {
45
46
  return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
@@ -54,7 +55,19 @@ function isPlanToolName(toolName) {
54
55
  || normalized === "call_read_todos";
55
56
  }
56
57
  function hasPlanToolEvidence(executedToolResults) {
57
- return executedToolResults.some((item) => isPlanToolName(item.toolName));
58
+ return executedToolResults.some((item) => {
59
+ if (!isPlanToolName(item.toolName)) {
60
+ return false;
61
+ }
62
+ const output = typeof item.output === "object" && item.output !== null ? item.output : null;
63
+ const summaryContainer = typeof output?.summary === "object" && output.summary !== null
64
+ ? output.summary
65
+ : null;
66
+ const counts = typeof summaryContainer?.summary === "object" && summaryContainer.summary !== null
67
+ ? summaryContainer.summary
68
+ : null;
69
+ return !(typeof counts?.total === "number" && counts.total <= 0);
70
+ });
58
71
  }
59
72
  function hasExecutionToolEvidence(executedToolResults) {
60
73
  return executedToolResults.some((item) => item.isError !== true && !isPlanToolName(item.toolName));
@@ -13,8 +13,10 @@ const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already availab
13
13
  const DEFAULT_MAX_TOOL_ITERATIONS = 10_000;
14
14
  const MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS = 2;
15
15
  const MAX_REPEATED_PLAN_ONLY_AFTER_PLAN = 2;
16
+ const MAX_REPEATED_INVALID_EXTERNAL_PLAN_EVIDENCE_SELECTION = 2;
16
17
  const REQUIRED_PLAN_CONTRACT_MARKER = "This agent has a required visible planning contract.";
17
18
  const INITIAL_WRITE_TODOS_MARKER = "Your first action for this request must be write_todos";
19
+ const REQUIRED_SINGLE_EXTERNAL_PLAN_EVIDENCE_TOOL_INSTRUCTION = "The TODO board already exists. The next action must be exactly one non-planning evidence tool call. Do not call write_todos or read_todos, and do not call multiple tools in the same response.";
18
20
  function resolveSingleBootstrapEvidenceTool(primaryTools) {
19
21
  const evidenceTools = primaryTools
20
22
  .map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
@@ -108,7 +110,11 @@ function readPlanStateSummary(output) {
108
110
  return null;
109
111
  }
110
112
  const typedCounts = counts;
113
+ if (typeof typedCounts.total === "number" && typedCounts.total <= 0) {
114
+ return null;
115
+ }
111
116
  return {
117
+ ...(typeof typedCounts.total === "number" ? { total: typedCounts.total } : {}),
112
118
  pending: typeof typedCounts.pending === "number" ? typedCounts.pending : 0,
113
119
  inProgress: typeof typedCounts.inProgress === "number" ? typedCounts.inProgress : 0,
114
120
  };
@@ -138,6 +144,10 @@ function isPlanToolName(toolName) {
138
144
  || normalized === "call_write_todos"
139
145
  || normalized === "call_read_todos";
140
146
  }
147
+ function isPlanToolCall(toolCall, toolNameMapping, primaryTools) {
148
+ const resolvedToolName = resolveModelFacingToolName(String(toolCall.name ?? ""), toolNameMapping, primaryTools);
149
+ return isPlanToolName(toolCall.name) || isPlanToolName(resolvedToolName);
150
+ }
141
151
  function isFallbackTodoCompletionToolCall(toolCall) {
142
152
  return typeof toolCall.id === "string"
143
153
  && toolCall.id.startsWith("fallback-complete-")
@@ -218,7 +228,11 @@ function extractLatestUserInput(request) {
218
228
  const messages = Array.isArray(typedRequest.messages) ? typedRequest.messages : [];
219
229
  for (let index = messages.length - 1; index >= 0; index -= 1) {
220
230
  const candidate = messages[index];
221
- if (candidate?.role !== "user" || typeof candidate.content !== "string") {
231
+ const role = typeof candidate?.role === "string" ? candidate.role.trim().toLowerCase() : "";
232
+ const messageType = typeof candidate?._getType === "function" ? String(candidate._getType()).trim().toLowerCase() : "";
233
+ const constructorType = Array.isArray(candidate?.id) ? String(candidate.id.at(-1)).trim().toLowerCase() : "";
234
+ const isUserMessage = role === "user" || role === "human" || messageType === "human" || constructorType === "humanmessage";
235
+ if (!isUserMessage || typeof candidate?.content !== "string") {
222
236
  continue;
223
237
  }
224
238
  const normalized = candidate.content.trim();
@@ -280,6 +294,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
280
294
  let lastRecoveryExecutedCount = -1;
281
295
  let repeatedRecoveryWithoutProgress = 0;
282
296
  let repeatedPlanOnlyAfterPlan = 0;
297
+ let repeatedInvalidExternalPlanEvidenceSelection = 0;
283
298
  let pendingResult;
284
299
  let result;
285
300
  const toolCatalog = new Map();
@@ -418,6 +433,26 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
418
433
  activeRequest = appendToolRecoveryInstruction(activeRequest, missingPlanRecoveryInstruction);
419
434
  continue;
420
435
  }
436
+ if (requiresPlanEvidence(binding)
437
+ && externalPlanEvidence === true
438
+ && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
439
+ && !hasNonTodoToolEvidence(executedToolResults)
440
+ && toolCalls.length > 0
441
+ && (toolCalls.length !== 1 || isPlanToolCall(toolCalls[0], toolNameMapping, primaryTools))) {
442
+ repeatedInvalidExternalPlanEvidenceSelection += 1;
443
+ if (repeatedInvalidExternalPlanEvidenceSelection >= MAX_REPEATED_INVALID_EXTERNAL_PLAN_EVIDENCE_SELECTION) {
444
+ throw createToolLoopError({
445
+ reason: "model did not select exactly one non-planning evidence tool during delegated plan evidence recovery",
446
+ iteration,
447
+ maxToolIterations,
448
+ toolCalls,
449
+ executedToolResults,
450
+ });
451
+ }
452
+ activeRequest = appendToolRecoveryInstruction(activeRequest, REQUIRED_SINGLE_EXTERNAL_PLAN_EVIDENCE_TOOL_INSTRUCTION);
453
+ pendingResult = undefined;
454
+ continue;
455
+ }
421
456
  if (requiresPlanEvidence(binding)
422
457
  && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
423
458
  && !hasNonTodoToolEvidence(executedToolResults)
@@ -425,6 +460,12 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
425
460
  && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))) {
426
461
  repeatedPlanOnlyAfterPlan += 1;
427
462
  if (repeatedPlanOnlyAfterPlan >= MAX_REPEATED_PLAN_ONLY_AFTER_PLAN) {
463
+ if (hasNonTodoToolEvidence(executedToolResults)) {
464
+ return {
465
+ result: buildDeterministicFinalFromToolEvidence(executedToolResults),
466
+ executedToolResults,
467
+ };
468
+ }
428
469
  throw createToolLoopError({
429
470
  reason: "model repeatedly selected only planning tools after the todo board already existed and before any non-planning evidence tool returned",
430
471
  iteration,
@@ -434,6 +475,12 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
434
475
  });
435
476
  }
436
477
  if (iteration + 1 === maxToolIterations) {
478
+ if (hasNonTodoToolEvidence(executedToolResults)) {
479
+ return {
480
+ result: buildDeterministicFinalFromToolEvidence(executedToolResults),
481
+ executedToolResults,
482
+ };
483
+ }
437
484
  throw createToolLoopError({
438
485
  reason: "maximum iterations reached",
439
486
  iteration,
@@ -448,6 +495,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
448
495
  }
449
496
  repeatedRecoveryWithoutProgress = 0;
450
497
  repeatedPlanOnlyAfterPlan = 0;
498
+ repeatedInvalidExternalPlanEvidenceSelection = 0;
451
499
  const canReplayToolCalls = usedExternalPlanEvidenceToolThisIteration
452
500
  || canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
453
501
  debugLocalToolReplay({
@@ -5,7 +5,6 @@ export type BuiltinTodoSnapshot = {
5
5
  };
6
6
  export declare function truncateLines(lines: string[], maxChars?: number): string;
7
7
  export declare function summarizeBuiltinWriteTodosArgs(args: Record<string, unknown>): BuiltinTodoSnapshot;
8
- export declare function isLowSignalTodoContent(content: string): boolean;
9
8
  export declare function formatBuiltinTodoSnapshot(snapshot: BuiltinTodoSnapshot): string;
10
9
  export declare function buildRequestPlanState(input: {
11
10
  sessionId: string;
@@ -69,6 +69,13 @@ function readTodoContent(todo) {
69
69
  }
70
70
  return "";
71
71
  }
72
+ function isLowSignalTodoContent(content) {
73
+ const normalized = content.trim().toLowerCase();
74
+ if (!normalized) {
75
+ return true;
76
+ }
77
+ return /^\d+$/.test(normalized) || /^step\s*\d+$/.test(normalized) || /^todo\s*\d+$/.test(normalized);
78
+ }
72
79
  function normalizeTodoStatus(value) {
73
80
  if (typeof value !== "string") {
74
81
  return "pending";
@@ -90,6 +97,9 @@ export function summarizeBuiltinWriteTodosArgs(args) {
90
97
  return [];
91
98
  }
92
99
  const content = readTodoContent(todo);
100
+ if (isLowSignalTodoContent(content)) {
101
+ return [];
102
+ }
93
103
  const status = normalizeTodoStatus(todo.status);
94
104
  const metadata = isRecord(todo.metadata) ? todo.metadata : undefined;
95
105
  return content ? [{
@@ -119,13 +129,6 @@ export function summarizeBuiltinWriteTodosArgs(args) {
119
129
  summary,
120
130
  };
121
131
  }
122
- export function isLowSignalTodoContent(content) {
123
- const normalized = content.trim().toLowerCase();
124
- if (!normalized) {
125
- return true;
126
- }
127
- return /^\d+$/.test(normalized) || /^step\s*\d+$/.test(normalized) || /^todo\s*\d+$/.test(normalized);
128
- }
129
132
  export function formatBuiltinTodoSnapshot(snapshot) {
130
133
  if (snapshot.summary.total === 0) {
131
134
  return "No todos tracked.";
@@ -17,6 +17,7 @@ export type StreamEventProjectionState = {
17
17
  emittedDelegatedTerminalOutput: boolean;
18
18
  sawPlanState: boolean;
19
19
  hasIncompletePlanState: boolean;
20
+ hasFailedPlanState: boolean;
20
21
  openTaskDelegations: number;
21
22
  openToolCapableTaskDelegations: number;
22
23
  taskDelegationHasToolsStack: boolean[];
@@ -1,6 +1,5 @@
1
1
  import { extractToolFallbackContext, extractVisibleOutput, readTextContent, sanitizeVisibleText } from "../parsing/output-parsing.js";
2
2
  import { salvageFunctionLikeToolCall } from "../parsing/output-tool-args.js";
3
- import { isLowSignalTodoContent, summarizeBuiltinWriteTodosArgs } from "./runtime-adapter-support.js";
4
3
  import { computeIncrementalOutput, extractInterruptPayload, extractReasoningStreamOutput, sanitizeRetainedUpstreamEvent, extractStateStreamOutput, extractTerminalStreamOutput, extractToolResult, extractVisibleStreamOutput, normalizeTerminalOutputKey, } from "../parsing/stream-event-parsing.js";
5
4
  import { resolveModelFacingToolName } from "./tool/tool-name-mapping.js";
6
5
  export function createStreamEventProjectionState() {
@@ -20,6 +19,7 @@ export function createStreamEventProjectionState() {
20
19
  emittedDelegatedTerminalOutput: false,
21
20
  sawPlanState: false,
22
21
  hasIncompletePlanState: false,
22
+ hasFailedPlanState: false,
23
23
  openTaskDelegations: 0,
24
24
  openToolCapableTaskDelegations: 0,
25
25
  taskDelegationHasToolsStack: [],
@@ -141,6 +141,9 @@ function readSummaryCounts(summary) {
141
141
  return null;
142
142
  }
143
143
  const typed = summary;
144
+ if (typeof typed.total === "number" && typed.total <= 0) {
145
+ return null;
146
+ }
144
147
  const hasAnyCountField = typeof typed.pending === "number"
145
148
  || typeof typed.inProgress === "number";
146
149
  if (!hasAnyCountField) {
@@ -151,10 +154,23 @@ function readSummaryCounts(summary) {
151
154
  inProgress: typeof typed.inProgress === "number" ? typed.inProgress : 0,
152
155
  };
153
156
  }
157
+ function readSummaryFailureCount(summary) {
158
+ if (typeof summary !== "object" || summary === null) {
159
+ return null;
160
+ }
161
+ const typed = summary;
162
+ if (typeof typed.total === "number" && typed.total <= 0) {
163
+ return null;
164
+ }
165
+ return typeof typed.failed === "number" ? typed.failed : null;
166
+ }
154
167
  function hasIncompleteTodosArray(value) {
155
168
  if (!Array.isArray(value)) {
156
169
  return null;
157
170
  }
171
+ if (value.length === 0) {
172
+ return null;
173
+ }
158
174
  return value.some((item) => {
159
175
  if (typeof item !== "object" || item === null) {
160
176
  return false;
@@ -163,6 +179,20 @@ function hasIncompleteTodosArray(value) {
163
179
  return status === "pending" || status === "in_progress";
164
180
  });
165
181
  }
182
+ function hasFailedTodosArray(value) {
183
+ if (!Array.isArray(value)) {
184
+ return null;
185
+ }
186
+ if (value.length === 0) {
187
+ return null;
188
+ }
189
+ return value.some((item) => {
190
+ if (typeof item !== "object" || item === null) {
191
+ return false;
192
+ }
193
+ return item.status === "failed";
194
+ });
195
+ }
166
196
  function getPlanStateCompleteness(value) {
167
197
  if (typeof value !== "object" || value === null) {
168
198
  return null;
@@ -202,6 +232,45 @@ function getPlanStateCompleteness(value) {
202
232
  }
203
233
  return null;
204
234
  }
235
+ function getPlanStateFailure(value) {
236
+ if (typeof value !== "object" || value === null) {
237
+ return null;
238
+ }
239
+ const typed = value;
240
+ const summaryFailed = readSummaryFailureCount(typed.summary);
241
+ if (summaryFailed !== null) {
242
+ return summaryFailed > 0;
243
+ }
244
+ if (typeof typed.summary === "object" && typed.summary !== null) {
245
+ const nestedSummary = getPlanStateFailure(typed.summary);
246
+ if (nestedSummary !== null) {
247
+ return nestedSummary;
248
+ }
249
+ }
250
+ const directTodos = hasFailedTodosArray(typed.todos);
251
+ if (directTodos !== null) {
252
+ return directTodos;
253
+ }
254
+ if (typeof typed.update === "object" && typed.update !== null) {
255
+ const nestedTodos = hasFailedTodosArray(typed.update.todos);
256
+ if (nestedTodos !== null) {
257
+ return nestedTodos;
258
+ }
259
+ }
260
+ if (typeof typed.output === "object" && typed.output !== null) {
261
+ const nestedOutput = getPlanStateFailure(typed.output);
262
+ if (nestedOutput !== null) {
263
+ return nestedOutput;
264
+ }
265
+ }
266
+ if (typeof typed.data === "object" && typed.data !== null) {
267
+ const nestedData = getPlanStateFailure(typed.data);
268
+ if (nestedData !== null) {
269
+ return nestedData;
270
+ }
271
+ }
272
+ return null;
273
+ }
205
274
  function parseMaybeJsonString(value) {
206
275
  const trimmed = value.trim();
207
276
  if (!trimmed || (!trimmed.startsWith("{") && !trimmed.startsWith("["))) {
@@ -394,12 +463,6 @@ function isPlanToolName(toolName) {
394
463
  || normalized === "call_write_todos"
395
464
  || normalized === "call_read_todos";
396
465
  }
397
- function isWriteTodosToolName(toolName) {
398
- const normalized = normalizePlanToolName(toolName);
399
- return normalized === "write_todos"
400
- || normalized === "tool_call_write_todos"
401
- || normalized === "call_write_todos";
402
- }
403
466
  function extractTodoToolStart(event) {
404
467
  if (typeof event !== "object" || event === null) {
405
468
  return null;
@@ -413,15 +476,6 @@ function extractTodoToolStart(event) {
413
476
  return null;
414
477
  }
415
478
  const input = unwrapPossibleToolInput(typed.data?.input);
416
- if (isWriteTodosToolName(toolName) && typeof input === "object" && input !== null && !Array.isArray(input)) {
417
- const summary = summarizeBuiltinWriteTodosArgs(input);
418
- if (summary.summary.total === 0) {
419
- throw new Error("Error invoking tool 'write_todos' with kwargs {\"todos\":[]} with error: Error: Initial write_todos call cannot use an empty todo list. Send the concrete task steps with both content and status.");
420
- }
421
- if (summary.items.every((item) => isLowSignalTodoContent(item.content))) {
422
- throw new Error("Error invoking tool 'write_todos' with placeholder todo content with error: Error: Initial write_todos call must use descriptive task content. Do not use placeholder entries like '1', '2', or 'step 1'.");
423
- }
424
- }
425
479
  return { toolName, input };
426
480
  }
427
481
  export function projectRuntimeStreamEvent(params) {
@@ -445,6 +499,11 @@ export function projectRuntimeStreamEvent(params) {
445
499
  state.sawPlanState = true;
446
500
  state.hasIncompletePlanState = planStateCompleteness;
447
501
  }
502
+ const planStateFailure = getPlanStateFailure(event);
503
+ if (planStateFailure !== null) {
504
+ state.sawPlanState = true;
505
+ state.hasFailedPlanState = planStateFailure;
506
+ }
448
507
  const eventAgentId = typeof event === "object" && event !== null && typeof event.agentId === "string"
449
508
  ? event.agentId.trim()
450
509
  : "";
@@ -2,7 +2,7 @@ import path from "node:path";
2
2
  import { z } from "zod";
3
3
  import { isSandboxBackend } from "deepagents";
4
4
  import { isRecord } from "../../../utils/object.js";
5
- import { formatBuiltinTodoSnapshot, isLowSignalTodoContent, summarizeBuiltinWriteTodosArgs, truncateLines } from "../runtime-adapter-support.js";
5
+ import { formatBuiltinTodoSnapshot, summarizeBuiltinWriteTodosArgs, truncateLines } from "../runtime-adapter-support.js";
6
6
  import { maybePersistLargeToolOutput, resolveToolRuntimeContext } from "./tool-output-artifacts.js";
7
7
  function buildTaskToolDescription(subagents) {
8
8
  const lines = [
@@ -272,14 +272,6 @@ export async function createBuiltinMiddlewareTools(backend, options) {
272
272
  invoke: async (input) => {
273
273
  const args = isRecord(input) ? input : {};
274
274
  const summary = summarizeBuiltinWriteTodosArgs(args);
275
- if (summary.summary.total === 0 && todoSnapshot.summary.total === 0) {
276
- throw new Error("Initial write_todos call cannot use an empty todo list. Send the concrete task steps with both content and status.");
277
- }
278
- if (summary.summary.total > 0 &&
279
- todoSnapshot.summary.total === 0 &&
280
- summary.items.every((item) => isLowSignalTodoContent(item.content))) {
281
- throw new Error("Initial write_todos call must use descriptive task content. Do not use placeholder entries like '1', '2', or 'step 1'.");
282
- }
283
275
  todoSnapshot = summary;
284
276
  return {
285
277
  ok: true,