@botbotgo/agent-harness 0.0.464 → 0.0.466

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -211,6 +211,10 @@ export type CompiledBuiltinToolsConfig = {
211
211
  };
212
212
  export type CompiledExecutionContract = {
213
213
  requiresPlan?: boolean;
214
+ recoveryEvidenceTool?: {
215
+ name: string;
216
+ args?: Record<string, unknown>;
217
+ };
214
218
  };
215
219
  export type LangChainAgentParams = {
216
220
  model: CompiledModel;
@@ -1,2 +1,2 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.464";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.466";
2
2
  export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
@@ -1,2 +1,2 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.464";
1
+ export const AGENT_HARNESS_VERSION = "0.0.466";
2
2
  export const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
@@ -137,6 +137,26 @@ function hasIncompletePlanOutput(value) {
137
137
  }
138
138
  return null;
139
139
  }
140
+ function hasFailedTodos(value) {
141
+ if (Array.isArray(value)) {
142
+ return value.some((todo) => hasFailedTodos(todo));
143
+ }
144
+ if (typeof value !== "object" || value === null) {
145
+ return false;
146
+ }
147
+ const typed = value;
148
+ if (typeof typed.status === "string" && typed.status.trim().toLowerCase() === "failed") {
149
+ return true;
150
+ }
151
+ return hasFailedTodos(typed.todos)
152
+ || hasFailedTodos(typed.update)
153
+ || hasFailedTodos(typed.data)
154
+ || hasFailedTodos(typed.output)
155
+ || hasFailedTodos(typed.summary);
156
+ }
157
+ function hasFailedPlanStateInExecutedToolResults(executedToolResults) {
158
+ return executedToolResults.some((item) => isPlanToolName(item.toolName) && hasFailedTodos(item.output));
159
+ }
140
160
  function normalizePlanToolName(toolName) {
141
161
  return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
142
162
  }
@@ -199,6 +219,9 @@ function buildExecutionRecoveryEvidence(params) {
199
219
  hasNonTodoToolResultEvidence: hasNonTodoToolEvidence(executedToolResults) || projectionState.emittedNonTodoToolResult || projectionState.emittedToolError,
200
220
  hasSuccessfulNonTodoToolResultEvidence: hasSuccessfulNonTodoToolEvidence(executedToolResults) || projectionState.emittedSuccessfulNonTodoToolResult,
201
221
  hasIncompletePlanState: projectionState.hasIncompletePlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
222
+ hasFailedPlanState: (projectionState.hasFailedPlanState || hasFailedPlanStateInExecutedToolResults(executedToolResults))
223
+ && !projectionState.emittedSuccessfulNonTodoToolResult
224
+ && !hasSuccessfulNonTodoToolEvidence(executedToolResults),
202
225
  hasPlanStateEvidence: projectionState.sawPlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
203
226
  hasOpenTaskDelegation: projectionState.openTaskDelegations > 0,
204
227
  hasFailedTaskDelegation: projectionState.hasFailedTaskDelegation
@@ -213,6 +236,7 @@ function buildExecutionRecoveryEvidence(params) {
213
236
  }
214
237
  function hasUnresolvedExecution(evidence) {
215
238
  return (evidence.hasIncompletePlanState
239
+ || evidence.hasFailedPlanState
216
240
  || evidence.hasFailedTaskDelegation
217
241
  || evidence.hasOpenTaskDelegation);
218
242
  }
@@ -389,6 +413,9 @@ function createUnresolvedExecutionError(evidence) {
389
413
  if (evidence.hasIncompletePlanState) {
390
414
  reasons.push("plan state still has unfinished work");
391
415
  }
416
+ if (evidence.hasFailedPlanState) {
417
+ reasons.push("plan state failed before non-TODO evidence returned");
418
+ }
392
419
  if (evidence.hasFailedTaskDelegation) {
393
420
  reasons.push("delegated task failed before surfacing final findings");
394
421
  }
@@ -471,7 +498,8 @@ function projectLocalToolExecutionProfileChunks(executedToolResults, prefix) {
471
498
  }
472
499
  export async function* streamRuntimeExecution(options) {
473
500
  let request = buildInvocationRequest(options.binding, options.history, options.input, options.runtimeOptions);
474
- if (requiresPlanEvidence(options.binding)) {
501
+ if (requiresPlanEvidence(options.binding)
502
+ && options.runtimeOptions.suppressInitialRequiredPlanInstruction !== true) {
475
503
  request = withPromptedJsonToolPolicy(appendToolRecoveryInstruction(request, buildInitialRequiredPlanInstruction(options.primaryTools)), "planning");
476
504
  }
477
505
  let emittedUnsafeStreamSideEffects = false;
@@ -480,6 +508,14 @@ export async function* streamRuntimeExecution(options) {
480
508
  const deferredStreamContent = [];
481
509
  let sawRetrySafeInvalidToolSelectionError = false;
482
510
  const projectionState = createStreamEventProjectionState();
511
+ if (options.runtimeOptions.externalPlanEvidence === true) {
512
+ projectionState.sawPlanState = true;
513
+ yield {
514
+ kind: "commentary",
515
+ content: `${options.binding.agent.id}: TODO evidence observed.`,
516
+ agentId: options.binding.agent.id,
517
+ };
518
+ }
483
519
  const requestId = options.runtimeOptions.requestId ?? options.sessionId;
484
520
  const buildRunnableConfig = (extra) => ({
485
521
  ...(options.resolveInvocationConfig
@@ -523,6 +559,7 @@ export async function* streamRuntimeExecution(options) {
523
559
  try {
524
560
  const rawStreamInput = buildRawModelMessages(options.binding, options.getSystemPrompt(options.binding), options.history, options.input, options.runtimeOptions.memoryContext);
525
561
  const streamInput = requiresPlanEvidence(options.binding)
562
+ && options.runtimeOptions.suppressInitialRequiredPlanInstruction !== true
526
563
  ? withPromptedJsonToolPolicy(rawStreamInput, "planning")
527
564
  : rawStreamInput;
528
565
  stream = await options.withTimeout(() => options.langChainStreamModel.stream(streamInput), computeRemainingTimeoutMs(options.streamDeadlineAt, options.invokeTimeoutMs), "model stream start", "stream");
@@ -865,6 +902,35 @@ export async function* streamRuntimeExecution(options) {
865
902
  const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
866
903
  ? recovered.metadata.executedToolResults
867
904
  : [];
905
+ const originalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
906
+ const recoveredExecutionEvidence = buildExecutionRecoveryEvidence({
907
+ projectionState: createStreamEventProjectionState(),
908
+ executedToolResults: recoveredToolResults,
909
+ });
910
+ const recoveredCarriesExecutionEvidence = recoveredExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
911
+ || recoveredExecutionEvidence.hasOpenTaskDelegation
912
+ || recoveredExecutionEvidence.hasDelegatedExecutionToolEvidence;
913
+ const recoveredHasUnresolvedExecution = hasUnresolvedExecution(recoveredExecutionEvidence)
914
+ || hasMissingDelegatedExecutionEvidence(recoveredExecutionEvidence)
915
+ || hasMissingDelegatedFindings(recoveredExecutionEvidence)
916
+ || (!recoveredCarriesExecutionEvidence
917
+ && (hasUnresolvedExecution(originalExecutionEvidence)
918
+ || hasMissingDelegatedExecutionEvidence(originalExecutionEvidence)
919
+ || hasMissingDelegatedFindings(originalExecutionEvidence)));
920
+ if (recoveredHasUnresolvedExecution) {
921
+ const effectiveRecoveryEvidence = recoveredCarriesExecutionEvidence
922
+ ? recoveredExecutionEvidence
923
+ : {
924
+ ...recoveredExecutionEvidence,
925
+ hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
926
+ hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
927
+ hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
928
+ hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
929
+ hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
930
+ hasDelegatedExecutionToolEvidence: originalExecutionEvidence.hasDelegatedExecutionToolEvidence,
931
+ };
932
+ throw createUnresolvedExecutionError(effectiveRecoveryEvidence);
933
+ }
868
934
  for (const toolResult of recoveredToolResults) {
869
935
  yield {
870
936
  kind: "tool-result",
@@ -997,7 +1063,7 @@ export async function* streamRuntimeExecution(options) {
997
1063
  }
998
1064
  const retriedVisibleOutput = retried.output ? toVisibleContent(retried.output) : "";
999
1065
  const retriedIsDelegationFailureFinalReport = isDelegationFailureFinalReport(originalExecutionEvidence, executedToolResults, retriedVisibleOutput);
1000
- const retriedCarriesExecutionEvidence = retriedExecutionEvidence.hasToolResultEvidence
1066
+ const retriedCarriesExecutionEvidence = retriedExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
1001
1067
  || retriedExecutionEvidence.hasOpenTaskDelegation
1002
1068
  || retriedExecutionEvidence.hasDelegatedExecutionToolEvidence;
1003
1069
  const retriedHasUnresolvedExecution = !retriedIsDelegationFailureFinalReport
@@ -1013,6 +1079,7 @@ export async function* streamRuntimeExecution(options) {
1013
1079
  : {
1014
1080
  ...retriedExecutionEvidence,
1015
1081
  hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
1082
+ hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
1016
1083
  hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
1017
1084
  hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
1018
1085
  hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
@@ -1254,6 +1321,7 @@ export async function* streamRuntimeExecution(options) {
1254
1321
  : {
1255
1322
  ...recoveredExecutionEvidence,
1256
1323
  hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
1324
+ hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
1257
1325
  hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
1258
1326
  hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
1259
1327
  hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
@@ -13,8 +13,10 @@ const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already availab
13
13
  const DEFAULT_MAX_TOOL_ITERATIONS = 10_000;
14
14
  const MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS = 2;
15
15
  const MAX_REPEATED_PLAN_ONLY_AFTER_PLAN = 2;
16
+ const MAX_REPEATED_INVALID_EXTERNAL_PLAN_EVIDENCE_SELECTION = 2;
16
17
  const REQUIRED_PLAN_CONTRACT_MARKER = "This agent has a required visible planning contract.";
17
18
  const INITIAL_WRITE_TODOS_MARKER = "Your first action for this request must be write_todos";
19
+ const REQUIRED_SINGLE_EXTERNAL_PLAN_EVIDENCE_TOOL_INSTRUCTION = "The TODO board already exists. The next action must be exactly one non-planning evidence tool call. Do not call write_todos or read_todos, and do not call multiple tools in the same response.";
18
20
  function resolveSingleBootstrapEvidenceTool(primaryTools) {
19
21
  const evidenceTools = primaryTools
20
22
  .map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
@@ -142,6 +144,10 @@ function isPlanToolName(toolName) {
142
144
  || normalized === "call_write_todos"
143
145
  || normalized === "call_read_todos";
144
146
  }
147
+ function isPlanToolCall(toolCall, toolNameMapping, primaryTools) {
148
+ const resolvedToolName = resolveModelFacingToolName(String(toolCall.name ?? ""), toolNameMapping, primaryTools);
149
+ return isPlanToolName(toolCall.name) || isPlanToolName(resolvedToolName);
150
+ }
145
151
  function isFallbackTodoCompletionToolCall(toolCall) {
146
152
  return typeof toolCall.id === "string"
147
153
  && toolCall.id.startsWith("fallback-complete-")
@@ -222,7 +228,11 @@ function extractLatestUserInput(request) {
222
228
  const messages = Array.isArray(typedRequest.messages) ? typedRequest.messages : [];
223
229
  for (let index = messages.length - 1; index >= 0; index -= 1) {
224
230
  const candidate = messages[index];
225
- if (candidate?.role !== "user" || typeof candidate.content !== "string") {
231
+ const role = typeof candidate?.role === "string" ? candidate.role.trim().toLowerCase() : "";
232
+ const messageType = typeof candidate?._getType === "function" ? String(candidate._getType()).trim().toLowerCase() : "";
233
+ const constructorType = Array.isArray(candidate?.id) ? String(candidate.id.at(-1)).trim().toLowerCase() : "";
234
+ const isUserMessage = role === "user" || role === "human" || messageType === "human" || constructorType === "humanmessage";
235
+ if (!isUserMessage || typeof candidate?.content !== "string") {
226
236
  continue;
227
237
  }
228
238
  const normalized = candidate.content.trim();
@@ -284,6 +294,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
284
294
  let lastRecoveryExecutedCount = -1;
285
295
  let repeatedRecoveryWithoutProgress = 0;
286
296
  let repeatedPlanOnlyAfterPlan = 0;
297
+ let repeatedInvalidExternalPlanEvidenceSelection = 0;
287
298
  let pendingResult;
288
299
  let result;
289
300
  const toolCatalog = new Map();
@@ -422,6 +433,26 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
422
433
  activeRequest = appendToolRecoveryInstruction(activeRequest, missingPlanRecoveryInstruction);
423
434
  continue;
424
435
  }
436
+ if (requiresPlanEvidence(binding)
437
+ && externalPlanEvidence === true
438
+ && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
439
+ && !hasNonTodoToolEvidence(executedToolResults)
440
+ && toolCalls.length > 0
441
+ && (toolCalls.length !== 1 || isPlanToolCall(toolCalls[0], toolNameMapping, primaryTools))) {
442
+ repeatedInvalidExternalPlanEvidenceSelection += 1;
443
+ if (repeatedInvalidExternalPlanEvidenceSelection >= MAX_REPEATED_INVALID_EXTERNAL_PLAN_EVIDENCE_SELECTION) {
444
+ throw createToolLoopError({
445
+ reason: "model did not select exactly one non-planning evidence tool during delegated plan evidence recovery",
446
+ iteration,
447
+ maxToolIterations,
448
+ toolCalls,
449
+ executedToolResults,
450
+ });
451
+ }
452
+ activeRequest = appendToolRecoveryInstruction(activeRequest, REQUIRED_SINGLE_EXTERNAL_PLAN_EVIDENCE_TOOL_INSTRUCTION);
453
+ pendingResult = undefined;
454
+ continue;
455
+ }
425
456
  if (requiresPlanEvidence(binding)
426
457
  && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
427
458
  && !hasNonTodoToolEvidence(executedToolResults)
@@ -429,6 +460,12 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
429
460
  && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))) {
430
461
  repeatedPlanOnlyAfterPlan += 1;
431
462
  if (repeatedPlanOnlyAfterPlan >= MAX_REPEATED_PLAN_ONLY_AFTER_PLAN) {
463
+ if (hasNonTodoToolEvidence(executedToolResults)) {
464
+ return {
465
+ result: buildDeterministicFinalFromToolEvidence(executedToolResults),
466
+ executedToolResults,
467
+ };
468
+ }
432
469
  throw createToolLoopError({
433
470
  reason: "model repeatedly selected only planning tools after the todo board already existed and before any non-planning evidence tool returned",
434
471
  iteration,
@@ -438,6 +475,12 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
438
475
  });
439
476
  }
440
477
  if (iteration + 1 === maxToolIterations) {
478
+ if (hasNonTodoToolEvidence(executedToolResults)) {
479
+ return {
480
+ result: buildDeterministicFinalFromToolEvidence(executedToolResults),
481
+ executedToolResults,
482
+ };
483
+ }
441
484
  throw createToolLoopError({
442
485
  reason: "maximum iterations reached",
443
486
  iteration,
@@ -452,6 +495,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
452
495
  }
453
496
  repeatedRecoveryWithoutProgress = 0;
454
497
  repeatedPlanOnlyAfterPlan = 0;
498
+ repeatedInvalidExternalPlanEvidenceSelection = 0;
455
499
  const canReplayToolCalls = usedExternalPlanEvidenceToolThisIteration
456
500
  || canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
457
501
  debugLocalToolReplay({
@@ -69,6 +69,13 @@ function readTodoContent(todo) {
69
69
  }
70
70
  return "";
71
71
  }
72
+ function isLowSignalTodoContent(content) {
73
+ const normalized = content.trim().toLowerCase();
74
+ if (!normalized) {
75
+ return true;
76
+ }
77
+ return /^\d+$/.test(normalized) || /^step\s*\d+$/.test(normalized) || /^todo\s*\d+$/.test(normalized);
78
+ }
72
79
  function normalizeTodoStatus(value) {
73
80
  if (typeof value !== "string") {
74
81
  return "pending";
@@ -90,6 +97,9 @@ export function summarizeBuiltinWriteTodosArgs(args) {
90
97
  return [];
91
98
  }
92
99
  const content = readTodoContent(todo);
100
+ if (isLowSignalTodoContent(content)) {
101
+ return [];
102
+ }
93
103
  const status = normalizeTodoStatus(todo.status);
94
104
  const metadata = isRecord(todo.metadata) ? todo.metadata : undefined;
95
105
  return content ? [{
@@ -17,6 +17,7 @@ export type StreamEventProjectionState = {
17
17
  emittedDelegatedTerminalOutput: boolean;
18
18
  sawPlanState: boolean;
19
19
  hasIncompletePlanState: boolean;
20
+ hasFailedPlanState: boolean;
20
21
  openTaskDelegations: number;
21
22
  openToolCapableTaskDelegations: number;
22
23
  taskDelegationHasToolsStack: boolean[];
@@ -19,6 +19,7 @@ export function createStreamEventProjectionState() {
19
19
  emittedDelegatedTerminalOutput: false,
20
20
  sawPlanState: false,
21
21
  hasIncompletePlanState: false,
22
+ hasFailedPlanState: false,
22
23
  openTaskDelegations: 0,
23
24
  openToolCapableTaskDelegations: 0,
24
25
  taskDelegationHasToolsStack: [],
@@ -153,6 +154,16 @@ function readSummaryCounts(summary) {
153
154
  inProgress: typeof typed.inProgress === "number" ? typed.inProgress : 0,
154
155
  };
155
156
  }
157
+ function readSummaryFailureCount(summary) {
158
+ if (typeof summary !== "object" || summary === null) {
159
+ return null;
160
+ }
161
+ const typed = summary;
162
+ if (typeof typed.total === "number" && typed.total <= 0) {
163
+ return null;
164
+ }
165
+ return typeof typed.failed === "number" ? typed.failed : null;
166
+ }
156
167
  function hasIncompleteTodosArray(value) {
157
168
  if (!Array.isArray(value)) {
158
169
  return null;
@@ -168,6 +179,20 @@ function hasIncompleteTodosArray(value) {
168
179
  return status === "pending" || status === "in_progress";
169
180
  });
170
181
  }
182
+ function hasFailedTodosArray(value) {
183
+ if (!Array.isArray(value)) {
184
+ return null;
185
+ }
186
+ if (value.length === 0) {
187
+ return null;
188
+ }
189
+ return value.some((item) => {
190
+ if (typeof item !== "object" || item === null) {
191
+ return false;
192
+ }
193
+ return item.status === "failed";
194
+ });
195
+ }
171
196
  function getPlanStateCompleteness(value) {
172
197
  if (typeof value !== "object" || value === null) {
173
198
  return null;
@@ -207,6 +232,45 @@ function getPlanStateCompleteness(value) {
207
232
  }
208
233
  return null;
209
234
  }
235
+ function getPlanStateFailure(value) {
236
+ if (typeof value !== "object" || value === null) {
237
+ return null;
238
+ }
239
+ const typed = value;
240
+ const summaryFailed = readSummaryFailureCount(typed.summary);
241
+ if (summaryFailed !== null) {
242
+ return summaryFailed > 0;
243
+ }
244
+ if (typeof typed.summary === "object" && typed.summary !== null) {
245
+ const nestedSummary = getPlanStateFailure(typed.summary);
246
+ if (nestedSummary !== null) {
247
+ return nestedSummary;
248
+ }
249
+ }
250
+ const directTodos = hasFailedTodosArray(typed.todos);
251
+ if (directTodos !== null) {
252
+ return directTodos;
253
+ }
254
+ if (typeof typed.update === "object" && typed.update !== null) {
255
+ const nestedTodos = hasFailedTodosArray(typed.update.todos);
256
+ if (nestedTodos !== null) {
257
+ return nestedTodos;
258
+ }
259
+ }
260
+ if (typeof typed.output === "object" && typed.output !== null) {
261
+ const nestedOutput = getPlanStateFailure(typed.output);
262
+ if (nestedOutput !== null) {
263
+ return nestedOutput;
264
+ }
265
+ }
266
+ if (typeof typed.data === "object" && typed.data !== null) {
267
+ const nestedData = getPlanStateFailure(typed.data);
268
+ if (nestedData !== null) {
269
+ return nestedData;
270
+ }
271
+ }
272
+ return null;
273
+ }
210
274
  function parseMaybeJsonString(value) {
211
275
  const trimmed = value.trim();
212
276
  if (!trimmed || (!trimmed.startsWith("{") && !trimmed.startsWith("["))) {
@@ -435,6 +499,11 @@ export function projectRuntimeStreamEvent(params) {
435
499
  state.sawPlanState = true;
436
500
  state.hasIncompletePlanState = planStateCompleteness;
437
501
  }
502
+ const planStateFailure = getPlanStateFailure(event);
503
+ if (planStateFailure !== null) {
504
+ state.sawPlanState = true;
505
+ state.hasFailedPlanState = planStateFailure;
506
+ }
438
507
  const eventAgentId = typeof event === "object" && event !== null && typeof event.agentId === "string"
439
508
  ? event.agentId.trim()
440
509
  : "";
@@ -86,6 +86,19 @@ function readSchemaDescription(schemaPart) {
86
86
  }
87
87
  return readSchemaDescription(def?.innerType);
88
88
  }
89
+ function schemaPartExpectsString(schemaPart) {
90
+ if (!isObject(schemaPart)) {
91
+ return false;
92
+ }
93
+ if (schemaPart.type === "string") {
94
+ return true;
95
+ }
96
+ const def = schemaPart._def ?? schemaPart.def;
97
+ if (def?.typeName === "ZodString" || def?.type === "string") {
98
+ return true;
99
+ }
100
+ return schemaPartExpectsString(def?.innerType);
101
+ }
89
102
  function fillLatestUserInputForQueryLikeFields(args, shape, latestUserInput) {
90
103
  const userInput = typeof latestUserInput === "string" ? latestUserInput.trim() : "";
91
104
  if (!userInput) {
@@ -100,7 +113,7 @@ function fillLatestUserInputForQueryLikeFields(args, shape, latestUserInput) {
100
113
  const description = readSchemaDescription(schemaPart);
101
114
  const keyIsQueryLike = ["query", "question", "prompt", "input", "text"].includes(normalizedKey);
102
115
  const descriptionIsQueryLike = /\b(?:query|question|prompt|input|text)\b/iu.test(description);
103
- if (!keyIsQueryLike && !descriptionIsQueryLike) {
116
+ if ((!keyIsQueryLike && !descriptionIsQueryLike) || !schemaPartExpectsString(schemaPart)) {
104
117
  continue;
105
118
  }
106
119
  next = {
@@ -110,6 +123,27 @@ function fillLatestUserInputForQueryLikeFields(args, shape, latestUserInput) {
110
123
  }
111
124
  return next;
112
125
  }
126
+ function fillLatestUserInputForResourceArrayFields(args, shape, latestUserInput) {
127
+ const userInput = typeof latestUserInput === "string" ? latestUserInput.trim() : "";
128
+ if (!userInput || !hasExplicitResourceReference(userInput)) {
129
+ return args;
130
+ }
131
+ const resourceRefs = extractExplicitResourceReferences(userInput);
132
+ if (resourceRefs.length === 0) {
133
+ return args;
134
+ }
135
+ let next = args;
136
+ for (const [key, schemaPart] of Object.entries(shape)) {
137
+ if (key in next || !schemaPartExpectsArray(schemaPart)) {
138
+ continue;
139
+ }
140
+ const mapped = resourceRefs.map((ref) => inferDiscriminatedArrayObjectFromString(schemaPart, ref));
141
+ if (mapped.length > 0 && mapped.every((item) => item !== null)) {
142
+ next = { ...next, [key]: mapped };
143
+ }
144
+ }
145
+ return next;
146
+ }
113
147
  function mapCommonArgumentAliases(args, shape) {
114
148
  let next = args;
115
149
  if ("args" in shape && !("args" in next) && Array.isArray(next.argv)) {
@@ -133,10 +167,96 @@ function schemaPartExpectsArray(schemaPart) {
133
167
  }
134
168
  return schemaPartExpectsArray(def?.innerType);
135
169
  }
170
+ function readObjectShape(schemaPart) {
171
+ if (!isObject(schemaPart)) {
172
+ return undefined;
173
+ }
174
+ const def = schemaPart._def ?? schemaPart.def;
175
+ if (!def) {
176
+ return undefined;
177
+ }
178
+ const shape = typeof def.shape === "function" ? def.shape() : def.shape;
179
+ return isRecord(shape) ? shape : readObjectShape(def.innerType);
180
+ }
181
+ function readLiteralValue(schemaPart) {
182
+ if (!isObject(schemaPart)) {
183
+ return undefined;
184
+ }
185
+ const def = schemaPart._def ?? schemaPart.def;
186
+ if ((def?.typeName === "ZodLiteral" || def?.type === "literal") && typeof def.value === "string") {
187
+ return def.value;
188
+ }
189
+ if ((def?.typeName === "ZodLiteral" || def?.type === "literal") && Array.isArray(def.values) && typeof def.values[0] === "string") {
190
+ return def.values[0];
191
+ }
192
+ return readLiteralValue(def?.innerType);
193
+ }
194
+ function readArrayElementSchema(schemaPart) {
195
+ if (!isObject(schemaPart)) {
196
+ return undefined;
197
+ }
198
+ const def = schemaPart._def ?? schemaPart.def;
199
+ if (def?.typeName === "ZodArray" || def?.type === "array") {
200
+ return def.element ?? def.type;
201
+ }
202
+ return readArrayElementSchema(def?.innerType);
203
+ }
204
+ function inferDiscriminatedArrayObjectFromString(schemaPart, value) {
205
+ const raw = value.trim();
206
+ if (!raw) {
207
+ return null;
208
+ }
209
+ const elementSchema = readArrayElementSchema(schemaPart);
210
+ if (!isObject(elementSchema)) {
211
+ return null;
212
+ }
213
+ const def = elementSchema._def ?? elementSchema.def;
214
+ const options = Array.isArray(def?.options)
215
+ ? def.options
216
+ : Array.isArray(elementSchema.options)
217
+ ? elementSchema.options
218
+ : undefined;
219
+ if (def?.typeName !== "ZodDiscriminatedUnion"
220
+ && !(def?.type === "union" && typeof def.discriminator === "string")) {
221
+ return null;
222
+ }
223
+ if (typeof def.discriminator !== "string" || !options) {
224
+ return null;
225
+ }
226
+ const looksLikeUrl = /^https?:\/\/\S+$/iu.test(raw);
227
+ for (const option of options) {
228
+ const optionShape = readObjectShape(option);
229
+ if (!optionShape) {
230
+ continue;
231
+ }
232
+ const discriminatorValue = readLiteralValue(optionShape[def.discriminator]);
233
+ if (!discriminatorValue) {
234
+ continue;
235
+ }
236
+ if (looksLikeUrl && "url" in optionShape) {
237
+ return { [def.discriminator]: discriminatorValue, url: raw };
238
+ }
239
+ const looksLikePath = /^(?:\.{1,2}\/|\/|~\/|[A-Za-z]:[\\/])|\\|\.[A-Za-z0-9]{1,12}$/u.test(raw);
240
+ if (!looksLikeUrl && looksLikePath && "path" in optionShape && !raw.includes("\n")) {
241
+ return { [def.discriminator]: discriminatorValue, path: raw };
242
+ }
243
+ if ("text" in optionShape) {
244
+ return { [def.discriminator]: discriminatorValue, text: raw };
245
+ }
246
+ }
247
+ return null;
248
+ }
136
249
  function mapStringArrayFields(args, shape) {
137
250
  let next = args;
138
251
  for (const [key, schemaPart] of Object.entries(shape)) {
139
252
  const value = next[key];
253
+ if (Array.isArray(value) && value.every((item) => typeof item === "string")) {
254
+ const mapped = value.map((item) => inferDiscriminatedArrayObjectFromString(schemaPart, item));
255
+ if (mapped.length > 0 && mapped.every((item) => item !== null)) {
256
+ next = { ...next, [key]: mapped };
257
+ }
258
+ continue;
259
+ }
140
260
  if (typeof value !== "string") {
141
261
  continue;
142
262
  }
@@ -175,6 +295,9 @@ function mapDelimitedListLikeArgs(args) {
175
295
  }
176
296
  return next;
177
297
  }
298
+ function stripArgsToShape(args, shape) {
299
+ return Object.fromEntries(Object.entries(args).filter(([key]) => key in shape));
300
+ }
178
301
  function dropDelimitedScalarPathArgs(args, shape) {
179
302
  let next = args;
180
303
  for (const [key, schemaPart] of Object.entries(shape)) {
@@ -199,7 +322,10 @@ function dropDelimitedScalarPathArgs(args, shape) {
199
322
  return next;
200
323
  }
201
324
  export function normalizeToolArgsForSchema(args, schema, rawArgsInput, options = {}) {
202
- const schemaDef = isObject(schema) ? schema._def : undefined;
325
+ const schemaDef = isObject(schema)
326
+ ? (schema._def
327
+ ?? schema.def)
328
+ : undefined;
203
329
  const zodShape = schemaDef
204
330
  ? isRecord(schemaDef.shape)
205
331
  ? schemaDef.shape
@@ -210,38 +336,47 @@ export function normalizeToolArgsForSchema(args, schema, rawArgsInput, options =
210
336
  const jsonShape = isObject(schema) && isRecord(schema.properties)
211
337
  ? (schema.properties ?? undefined)
212
338
  : undefined;
213
- const shape = zodShape && isRecord(zodShape) ? zodShape : jsonShape;
339
+ const plainShape = isObject(schema)
340
+ && !("_def" in schema)
341
+ && !("properties" in schema)
342
+ && Object.values(schema).every((value) => isObject(value))
343
+ ? schema
344
+ : undefined;
345
+ const shape = zodShape && isRecord(zodShape) ? zodShape : jsonShape ?? plainShape;
214
346
  if (!shape || !isRecord(shape)) {
215
347
  return mapDelimitedListLikeArgs(args);
216
348
  }
217
- const aliasMappedArgs = dropDelimitedScalarPathArgs(mapStringArrayFields(mapCommonArgumentAliases(args, shape), shape), shape);
349
+ const stripUnknownArgs = plainShape !== undefined && !zodShape && !jsonShape;
350
+ const aliasMappedArgs = dropDelimitedScalarPathArgs(fillLatestUserInputForResourceArrayFields(mapStringArrayFields(mapCommonArgumentAliases(args, shape), shape), shape, options.latestUserInput), shape);
218
351
  const keys = Object.keys(shape);
219
352
  if (keys.length !== 1) {
220
- return fillLatestUserInputForQueryLikeFields(aliasMappedArgs, shape, options.latestUserInput);
353
+ const filled = fillLatestUserInputForQueryLikeFields(aliasMappedArgs, shape, options.latestUserInput);
354
+ return stripUnknownArgs ? stripArgsToShape(filled, shape) : filled;
221
355
  }
222
356
  const [expectedKey] = keys;
223
357
  if (expectedKey in aliasMappedArgs) {
224
- return aliasMappedArgs;
358
+ return stripUnknownArgs ? stripArgsToShape(aliasMappedArgs, shape) : aliasMappedArgs;
225
359
  }
226
360
  const scalarMappedArgs = mapSingleFieldScalarArg(aliasMappedArgs, expectedKey, rawArgsInput);
227
361
  if (expectedKey in scalarMappedArgs) {
228
- return scalarMappedArgs;
362
+ return stripUnknownArgs ? stripArgsToShape(scalarMappedArgs, shape) : scalarMappedArgs;
229
363
  }
230
364
  if (Object.keys(scalarMappedArgs).length === 0 && typeof options.latestUserInput === "string") {
231
365
  const userInput = options.latestUserInput.trim();
232
366
  const resourceRefs = extractExplicitResourceReferences(userInput);
233
367
  if (resourceRefs.length === 1 && hasExplicitResourceReference(userInput)) {
234
- return {
368
+ const filled = {
235
369
  ...scalarMappedArgs,
236
370
  [expectedKey]: resourceRefs[0],
237
371
  };
372
+ return stripUnknownArgs ? stripArgsToShape(filled, shape) : filled;
238
373
  }
239
374
  }
240
375
  const genericScalarMappedArgs = mapSingleRemainingScalarArg(scalarMappedArgs, expectedKey);
241
376
  if (expectedKey in genericScalarMappedArgs) {
242
- return genericScalarMappedArgs;
377
+ return stripUnknownArgs ? stripArgsToShape(genericScalarMappedArgs, shape) : genericScalarMappedArgs;
243
378
  }
244
- return genericScalarMappedArgs;
379
+ return stripUnknownArgs ? stripArgsToShape(genericScalarMappedArgs, shape) : genericScalarMappedArgs;
245
380
  }
246
381
  export function extractToolCallsFromResult(result) {
247
382
  const capturedToolCalls = readCapturedPromptedJsonToolCalls(result);
@@ -62,6 +62,11 @@ export declare class AgentRuntimeAdapter {
62
62
  toolRuntimeContext?: Record<string, unknown>;
63
63
  suppressInitialRequiredPlanInstruction?: boolean;
64
64
  externalPlanEvidence?: boolean;
65
+ externalPlanEvidenceTools?: Array<{
66
+ name: string;
67
+ args?: Record<string, unknown>;
68
+ id?: string;
69
+ }>;
65
70
  }): Promise<RequestResult>;
66
71
  private tryDelegateWithCompactRouter;
67
72
  private buildCompactDelegationReport;
@@ -75,6 +80,13 @@ export declare class AgentRuntimeAdapter {
75
80
  memoryContext?: string;
76
81
  profiling?: boolean;
77
82
  toolRuntimeContext?: Record<string, unknown>;
83
+ suppressInitialRequiredPlanInstruction?: boolean;
84
+ externalPlanEvidence?: boolean;
85
+ externalPlanEvidenceTools?: Array<{
86
+ name: string;
87
+ args?: Record<string, unknown>;
88
+ id?: string;
89
+ }>;
78
90
  }): AsyncGenerator<RuntimeStreamChunk | string>;
79
91
  }
80
92
  export { AgentRuntimeAdapter as RuntimeAdapter, AGENT_INTERRUPT_SENTINEL_PREFIX, AGENT_INTERRUPT_SENTINEL_PREFIX as INTERRUPT_SENTINEL_PREFIX, buildDeepAgentCreateParams, buildLangChainCreateParams, DEFAULT_DEEPAGENT_RECURSION_LIMIT, materializeModelExposedBuiltinMiddlewareTools, resolveLangChainInvocationConfig, resolveRunnableCheckpointer, resolveRunnableInterruptOn, RuntimeOperationTimeoutError, };
@@ -8,7 +8,9 @@ import { extractMessageText } from "../utils/message-content.js";
8
8
  import { AGENT_INTERRUPT_SENTINEL_PREFIX, buildDeepAgentCreateParams, buildDeepAgentSystemPromptWithCapabilityCatalog, buildLangChainCreateParams, DEFAULT_DEEPAGENT_RECURSION_LIMIT, materializeModelExposedBuiltinMiddlewareTools, resolveLangChainInvocationConfig, resolveRunnableCheckpointer, resolveRunnableInterruptOn, shouldAttachDeepAgentBackend, shouldAttachDeepAgentCheckpointer, shouldAttachDeepAgentStore, } from "./agent-runtime-assembly.js";
9
9
  import { resolveDeepAgentSkillSourcePaths, resolveDeepAgentSkillSourceRootPaths, } from "./adapter/compat/deepagent-compat.js";
10
10
  import { buildToolNameMapping, } from "./adapter/tool/tool-name-mapping.js";
11
+ import { PROMPTED_JSON_TOOL_POLICY_KEY } from "./adapter/model/prompted-json-tool-policy.js";
11
12
  import { executeRequestInvocation } from "./adapter/flow/invocation-flow.js";
13
+ import { extractExplicitResourceReferences } from "./harness/system/runtime-memory-policy.js";
12
14
  import { streamRuntimeExecution } from "./adapter/flow/stream-runtime.js";
13
15
  import { resolveDeterministicFinalOutput } from "./adapter/invocation-result.js";
14
16
  import { applyToolRecoveryInstruction as applyToolRecoveryInstructionHelper, applyStrictToolJsonInstruction as applyStrictToolJsonInstructionHelper, callRuntimeWithToolParseRecovery as callRuntimeWithToolParseRecoveryHelper, createModelFallbackRunnable as createModelFallbackRunnableHelper, invokeWithProviderRetry as invokeWithProviderRetryHelper, iterateWithTimeout as iterateWithTimeoutHelper, materializeModelStream as materializeModelStreamHelper, RuntimeOperationTimeoutError, withRuntimeTimeout, } from "./adapter/runtime-shell.js";
@@ -243,6 +245,9 @@ function isDelegationOnlyDeepAgentBinding(binding) {
243
245
  && getBindingSkills(binding).length === 0;
244
246
  }
245
247
  function hasDelegatedPlanEvidence(result) {
248
+ if (result?.metadata?.externalPlanEvidence === true) {
249
+ return true;
250
+ }
246
251
  const toolResults = result?.metadata?.executedToolResults;
247
252
  return Array.isArray(toolResults)
248
253
  && toolResults.some((item) => isPlanToolName(item.toolName));
@@ -264,14 +269,62 @@ function hasIncompleteDelegatedTodos(value) {
264
269
  || hasIncompleteDelegatedTodos(record.stateSnapshot)
265
270
  || hasIncompleteDelegatedTodos(record.metadata);
266
271
  }
272
+ function hasFailedDelegatedTodos(value) {
273
+ if (Array.isArray(value)) {
274
+ return value.some((item) => hasFailedDelegatedTodos(item));
275
+ }
276
+ if (typeof value !== "object" || value === null) {
277
+ return false;
278
+ }
279
+ const record = value;
280
+ const status = typeof record.status === "string" ? record.status.trim().toLowerCase() : "";
281
+ if (status === "failed") {
282
+ return true;
283
+ }
284
+ return hasFailedDelegatedTodos(record.todos)
285
+ || hasFailedDelegatedTodos(record.update)
286
+ || hasFailedDelegatedTodos(record.stateSnapshot)
287
+ || hasFailedDelegatedTodos(record.metadata);
288
+ }
267
289
  function hasIncompleteDelegatedPlanState(result) {
268
290
  const toolResults = result?.metadata?.executedToolResults;
269
291
  return Array.isArray(toolResults)
270
292
  && toolResults.some((item) => isPlanToolName(item.toolName) && hasIncompleteDelegatedTodos(item.output));
271
293
  }
294
+ function hasFailedDelegatedPlanState(result) {
295
+ const toolResults = result?.metadata?.executedToolResults;
296
+ return Array.isArray(toolResults)
297
+ && toolResults.some((item) => isPlanToolName(item.toolName) && hasFailedDelegatedTodos(item.output));
298
+ }
299
+ function hasDelegatedNonPlanToolEvidence(result) {
300
+ const toolResults = result?.metadata?.executedToolResults;
301
+ return Array.isArray(toolResults)
302
+ && toolResults.some((item) => typeof item.toolName === "string" && !isPlanToolName(item.toolName) && item.isError !== true);
303
+ }
304
+ function hasNonPlanToolEvidenceItems(items) {
305
+ return items.some((item) => typeof item.toolName === "string" && !isPlanToolName(item.toolName) && item.isError !== true);
306
+ }
307
+ function buildDelegatedExternalPlanEvidenceSummary(items) {
308
+ const evidence = items
309
+ .filter((item) => typeof item.toolName === "string" && !isPlanToolName(item.toolName) && item.isError !== true)
310
+ .map((item) => {
311
+ const output = typeof item.output === "string" ? item.output : JSON.stringify(item.output ?? "");
312
+ return `## ${String(item.toolName)}\n${output}`;
313
+ });
314
+ return [
315
+ "Status: completed",
316
+ "Summary:",
317
+ "- Completed delegated recovery after collecting non-planning tool evidence.",
318
+ "",
319
+ "Evidence:",
320
+ evidence.length > 0 ? evidence.join("\n\n") : "(no non-planning tool evidence captured)",
321
+ ].join("\n");
322
+ }
272
323
  function needsDelegatedPlanRecovery(binding, result) {
273
324
  return binding?.harnessRuntime.executionContract?.requiresPlan === true
274
- && (!hasDelegatedPlanEvidence(result) || hasIncompleteDelegatedPlanState(result));
325
+ && (!hasDelegatedPlanEvidence(result)
326
+ || hasIncompleteDelegatedPlanState(result)
327
+ || (hasFailedDelegatedPlanState(result) && !hasDelegatedNonPlanToolEvidence(result)));
275
328
  }
276
329
  function readUpstreamToolEvidence(event) {
277
330
  if (typeof event !== "object" || event === null) {
@@ -334,11 +387,78 @@ const DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION = [
334
387
  "Then continue the task to completion, update TODO statuses after evidence steps, and close every TODO as completed or failed before the final answer.",
335
388
  ].join("\n");
336
389
  const DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION = [
337
- "The delegated task still has no visible TODO planning evidence.",
338
- "Use the actual write_todos tool interface now. Do not print JSON, markdown, or a tool-call transcript as text.",
339
- "The next runtime event must be the write_todos tool call result, not an assistant message describing the call.",
340
- "After write_todos succeeds, continue the delegated task and close every TODO as completed or failed.",
390
+ "The delegated task has already attempted TODO planning but still has no successful non-planning tool evidence.",
391
+ "Do not call write_todos or read_todos next.",
392
+ "The next runtime event must be one available non-planning evidence tool call.",
393
+ "After that tool returns, update or close the TODO board and provide the final answer required by the agent response format.",
341
394
  ].join("\n");
395
+ function buildDelegatedPlanEvidenceRecoveryOptions(binding, baseOptions, requestText = "") {
396
+ const nonPlanningTools = binding
397
+ ? getBindingPrimaryTools(binding).filter((tool) => !isPlanToolName(tool.name))
398
+ : [];
399
+ const configuredRecoveryTool = resolveConfiguredRecoveryEvidenceTool(binding, nonPlanningTools, requestText);
400
+ const externalPlanEvidenceTools = configuredRecoveryTool
401
+ ? [configuredRecoveryTool]
402
+ : nonPlanningTools.length === 1
403
+ ? [{
404
+ name: nonPlanningTools[0].name,
405
+ args: buildExternalPlanEvidenceArgs(nonPlanningTools[0], requestText),
406
+ id: "delegated-plan-evidence-tool-1",
407
+ }]
408
+ : undefined;
409
+ return {
410
+ ...baseOptions,
411
+ suppressInitialRequiredPlanInstruction: true,
412
+ externalPlanEvidence: true,
413
+ ...(externalPlanEvidenceTools ? { externalPlanEvidenceTools } : {}),
414
+ state: {
415
+ ...(typeof baseOptions.state === "object" && baseOptions.state !== null ? baseOptions.state : {}),
416
+ [PROMPTED_JSON_TOOL_POLICY_KEY]: "nonPlanningEvidence",
417
+ },
418
+ };
419
+ }
420
+ function resolveConfiguredRecoveryEvidenceTool(binding, nonPlanningTools, requestText) {
421
+ const configured = binding?.harnessRuntime.executionContract?.recoveryEvidenceTool;
422
+ const configuredName = typeof configured?.name === "string" ? configured.name.trim() : "";
423
+ if (!configuredName) {
424
+ return undefined;
425
+ }
426
+ const tool = nonPlanningTools.find((candidate) => candidate.name === configuredName);
427
+ if (!tool) {
428
+ return undefined;
429
+ }
430
+ return {
431
+ name: tool.name,
432
+ args: configured?.args ?? buildExternalPlanEvidenceArgs(tool, requestText),
433
+ id: "delegated-plan-evidence-tool-1",
434
+ };
435
+ }
436
+ function buildExternalPlanEvidenceArgs(tool, requestText) {
437
+ const properties = tool?.modelSchema && typeof tool.modelSchema === "object"
438
+ && tool.modelSchema !== null
439
+ && typeof tool.modelSchema.properties === "object"
440
+ && tool.modelSchema.properties !== null
441
+ ? tool.modelSchema.properties
442
+ : {};
443
+ const refs = extractExplicitResourceReferences(requestText);
444
+ const args = {};
445
+ const firstUrl = refs.find((ref) => /^https?:\/\//iu.test(ref));
446
+ if ("url" in properties && firstUrl) {
447
+ args.url = firstUrl;
448
+ }
449
+ if ("sources" in properties && refs.length > 0) {
450
+ args.sources = refs.map((ref) => /^https?:\/\//iu.test(ref)
451
+ ? { type: "url", url: ref }
452
+ : { type: "text", text: ref });
453
+ }
454
+ if ("question" in properties) {
455
+ args.question = requestText;
456
+ }
457
+ else if ("query" in properties) {
458
+ args.query = requestText;
459
+ }
460
+ return args;
461
+ }
342
462
  function looksLikeRawCommandTranscript(value) {
343
463
  const normalized = value.trim();
344
464
  return /^(?:stdout|stderr)\s*:/iu.test(normalized)
@@ -505,18 +625,21 @@ export class AgentRuntimeAdapter {
505
625
  if (!this.options.functionToolContextResolver) {
506
626
  return undefined;
507
627
  }
628
+ const publicRequestId = typeof options.requestId === "string" && options.requestId.includes(":")
629
+ ? options.requestId.split(":")[0]
630
+ : options.requestId;
508
631
  const backend = this.resolveBuiltinMiddlewareBackend(binding, options);
509
632
  return {
510
633
  ...this.options.functionToolContextResolver({
511
634
  binding,
512
635
  sessionId: options.sessionId,
513
- requestId: options.requestId,
636
+ requestId: publicRequestId,
514
637
  }),
515
638
  backend,
516
639
  invocation: {
517
640
  ...(options.context ? { context: options.context } : {}),
518
641
  ...(options.sessionId ? { sessionId: options.sessionId } : {}),
519
- ...(options.requestId ? { requestId: options.requestId } : {}),
642
+ ...(publicRequestId ? { requestId: publicRequestId } : {}),
520
643
  },
521
644
  };
522
645
  }
@@ -1195,13 +1318,25 @@ export class AgentRuntimeAdapter {
1195
1318
  if (!selectedBinding) {
1196
1319
  return null;
1197
1320
  }
1198
- const runDelegatedRequest = (text, requestSuffix = "", delegatedOptions = {}) => this.invoke(selectedBinding, text, sessionId, `${requestId}:${subagentType}${requestSuffix}`, undefined, [], {
1199
- context: options.context,
1200
- state: options.state,
1201
- files: options.files,
1202
- memoryContext: options.memoryContext,
1203
- ...delegatedOptions,
1204
- });
1321
+ const runDelegatedRequest = async (text, requestSuffix = "", delegatedOptions = {}) => {
1322
+ const result = await this.invoke(selectedBinding, text, sessionId, `${requestId}:${subagentType}${requestSuffix}`, undefined, [], {
1323
+ context: options.context,
1324
+ state: options.state,
1325
+ files: options.files,
1326
+ memoryContext: options.memoryContext,
1327
+ ...delegatedOptions,
1328
+ });
1329
+ if (delegatedOptions.externalPlanEvidence === true) {
1330
+ return {
1331
+ ...result,
1332
+ metadata: {
1333
+ ...(result.metadata ?? {}),
1334
+ externalPlanEvidence: true,
1335
+ },
1336
+ };
1337
+ }
1338
+ return result;
1339
+ };
1205
1340
  let delegatedResult;
1206
1341
  try {
1207
1342
  delegatedResult = await runDelegatedRequest(requestText);
@@ -1261,6 +1396,25 @@ export class AgentRuntimeAdapter {
1261
1396
  };
1262
1397
  }
1263
1398
  }
1399
+ if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1400
+ const previousDelegatedResult = delegatedResult;
1401
+ try {
1402
+ delegatedResult = mergeDelegatedResultToolEvidence(await runDelegatedRequest([requestText, DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-final-retry", buildDelegatedPlanEvidenceRecoveryOptions(selectedBinding, options, requestText)), previousDelegatedResult);
1403
+ }
1404
+ catch (error) {
1405
+ const output = error instanceof Error ? error.message : String(error);
1406
+ return {
1407
+ toolOutput: output,
1408
+ delegatedSubagentType: subagentType,
1409
+ delegatedResult: {
1410
+ ...delegatedResult,
1411
+ state: "failed",
1412
+ output,
1413
+ finalMessageText: output,
1414
+ },
1415
+ };
1416
+ }
1417
+ }
1264
1418
  if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1265
1419
  const output = buildDelegatedPlanEvidenceBlocker(selectedBinding.agent.id);
1266
1420
  return {
@@ -1318,9 +1472,10 @@ export class AgentRuntimeAdapter {
1318
1472
  const state = compactDelegation.delegatedResult?.state === "failed" ? "failed" : "completed";
1319
1473
  const uniqueToolNames = [...new Set(delegatedToolNames)];
1320
1474
  const toolEvidence = uniqueToolNames.length > 0 ? uniqueToolNames.join(", ") : "none";
1475
+ const hasPlanEvidence = hasDelegatedPlanEvidence(compactDelegation.delegatedResult);
1321
1476
  const fallbackTodoTrace = [
1322
1477
  `1) TODO observed: delegated to ${delegatedSubagentType}.`,
1323
- uniqueToolNames.some((toolName) => isPlanToolName(toolName))
1478
+ hasPlanEvidence
1324
1479
  ? `2) ${delegatedSubagentType}: TODO evidence observed; delegated specialist invoked write_todos.`
1325
1480
  : `2) ${delegatedSubagentType}: TODO evidence missing; delegated specialist did not expose write_todos in returned metadata.`,
1326
1481
  state === "failed"
@@ -1563,7 +1718,7 @@ export class AgentRuntimeAdapter {
1563
1718
  content: `Planned delegation tree: ${executableDelegations.map((item) => item.subagentType).join(" -> ")}.`,
1564
1719
  agentId: binding.agent.id,
1565
1720
  };
1566
- const runPlannedDelegation = async function* (subagentType, text, requestIdSuffix = "") {
1721
+ const runPlannedDelegation = async function* (subagentType, text, requestIdSuffix = "", delegatedOptions = {}) {
1567
1722
  const selectedBinding = this.options.bindingResolver?.(subagentType);
1568
1723
  if (!selectedBinding) {
1569
1724
  const output = `Configured subagent '${subagentType}' could not be resolved.`;
@@ -1582,11 +1737,14 @@ export class AgentRuntimeAdapter {
1582
1737
  try {
1583
1738
  for await (const chunk of this.stream(selectedBinding, text, sessionId, [], {
1584
1739
  context: options.context,
1585
- state: options.state,
1740
+ state: delegatedOptions.state ?? options.state,
1586
1741
  files: options.files,
1587
1742
  requestId: `${requestId}:${subagentType}${requestIdSuffix}`,
1588
1743
  memoryContext: options.memoryContext,
1589
1744
  profiling: options.profiling,
1745
+ suppressInitialRequiredPlanInstruction: delegatedOptions.suppressInitialRequiredPlanInstruction,
1746
+ externalPlanEvidence: delegatedOptions.externalPlanEvidence,
1747
+ externalPlanEvidenceTools: delegatedOptions.externalPlanEvidenceTools,
1590
1748
  })) {
1591
1749
  if (typeof chunk === "string") {
1592
1750
  output += chunk;
@@ -1621,17 +1779,26 @@ export class AgentRuntimeAdapter {
1621
1779
  state: "failed",
1622
1780
  output,
1623
1781
  finalMessageText: output,
1624
- metadata: { executedToolResults },
1782
+ metadata: {
1783
+ executedToolResults,
1784
+ ...(delegatedOptions.externalPlanEvidence === true ? { externalPlanEvidence: true } : {}),
1785
+ },
1625
1786
  };
1626
1787
  }
1788
+ const finalOutput = delegatedOptions.externalPlanEvidence === true && hasNonPlanToolEvidenceItems(executedToolResults)
1789
+ ? buildDelegatedExternalPlanEvidenceSummary(executedToolResults)
1790
+ : sanitizeVisibleText(output);
1627
1791
  return {
1628
1792
  sessionId,
1629
1793
  requestId: `${requestId}:${subagentType}${requestIdSuffix}`,
1630
1794
  agentId: selectedBinding.agent.id,
1631
1795
  state: "completed",
1632
- output: sanitizeVisibleText(output),
1633
- finalMessageText: sanitizeVisibleText(output),
1634
- metadata: { executedToolResults },
1796
+ output: finalOutput,
1797
+ finalMessageText: finalOutput,
1798
+ metadata: {
1799
+ executedToolResults,
1800
+ ...(delegatedOptions.externalPlanEvidence === true ? { externalPlanEvidence: true } : {}),
1801
+ },
1635
1802
  };
1636
1803
  }.bind(this);
1637
1804
  for (const [index, planned] of executableDelegations.entries()) {
@@ -1658,7 +1825,7 @@ export class AgentRuntimeAdapter {
1658
1825
  }
1659
1826
  if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1660
1827
  const previousDelegatedResult = delegatedResult;
1661
- delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [delegatedText, DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-final-retry"), previousDelegatedResult);
1828
+ delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [delegatedText, DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-final-retry", buildDelegatedPlanEvidenceRecoveryOptions(selectedBinding, options, delegatedText)), previousDelegatedResult);
1662
1829
  }
1663
1830
  if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1664
1831
  const output = buildDelegatedPlanEvidenceBlocker(selectedBinding?.agent.id ?? planned.subagentType);
@@ -1789,17 +1956,20 @@ export class AgentRuntimeAdapter {
1789
1956
  agentId: selectedBinding.agent.id,
1790
1957
  };
1791
1958
  const childRequestId = `${requestId}:${subagentType}`;
1792
- const runDelegatedStreamAttempt = async function* (text, requestIdSuffix = "") {
1959
+ const runDelegatedStreamAttempt = async function* (text, requestIdSuffix = "", delegatedOptions = {}) {
1793
1960
  const executedToolResults = [];
1794
1961
  let output = "";
1795
1962
  try {
1796
1963
  for await (const chunk of this.stream(selectedBinding, text, sessionId, [], {
1797
1964
  context: options.context,
1798
- state: options.state,
1965
+ state: delegatedOptions.state ?? options.state,
1799
1966
  files: options.files,
1800
1967
  requestId: `${childRequestId}${requestIdSuffix}`,
1801
1968
  memoryContext: options.memoryContext,
1802
1969
  profiling: options.profiling,
1970
+ suppressInitialRequiredPlanInstruction: delegatedOptions.suppressInitialRequiredPlanInstruction,
1971
+ externalPlanEvidence: delegatedOptions.externalPlanEvidence,
1972
+ externalPlanEvidenceTools: delegatedOptions.externalPlanEvidenceTools,
1803
1973
  })) {
1804
1974
  if (typeof chunk === "string") {
1805
1975
  output += chunk;
@@ -1834,17 +2004,26 @@ export class AgentRuntimeAdapter {
1834
2004
  state: "failed",
1835
2005
  output,
1836
2006
  finalMessageText: output,
1837
- metadata: { executedToolResults },
2007
+ metadata: {
2008
+ executedToolResults,
2009
+ ...(delegatedOptions.externalPlanEvidence === true ? { externalPlanEvidence: true } : {}),
2010
+ },
1838
2011
  };
1839
2012
  }
2013
+ const finalOutput = delegatedOptions.externalPlanEvidence === true && hasNonPlanToolEvidenceItems(executedToolResults)
2014
+ ? buildDelegatedExternalPlanEvidenceSummary(executedToolResults)
2015
+ : sanitizeVisibleText(output);
1840
2016
  return {
1841
2017
  sessionId,
1842
2018
  requestId: `${childRequestId}${requestIdSuffix}`,
1843
2019
  agentId: selectedBinding.agent.id,
1844
2020
  state: "completed",
1845
- output: sanitizeVisibleText(output),
1846
- finalMessageText: sanitizeVisibleText(output),
1847
- metadata: { executedToolResults },
2021
+ output: finalOutput,
2022
+ finalMessageText: finalOutput,
2023
+ metadata: {
2024
+ executedToolResults,
2025
+ ...(delegatedOptions.externalPlanEvidence === true ? { externalPlanEvidence: true } : {}),
2026
+ },
1848
2027
  };
1849
2028
  }.bind(this);
1850
2029
  const delegatedText = buildDelegatedOwnedTaskInstruction({
@@ -1859,7 +2038,7 @@ export class AgentRuntimeAdapter {
1859
2038
  }
1860
2039
  if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1861
2040
  const previousDelegatedResult = delegatedResult;
1862
- delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([delegatedText, DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-final-retry"), previousDelegatedResult);
2041
+ delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([delegatedText, DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-final-retry", buildDelegatedPlanEvidenceRecoveryOptions(selectedBinding, options, delegatedText)), previousDelegatedResult);
1863
2042
  }
1864
2043
  if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1865
2044
  const output = buildDelegatedPlanEvidenceBlocker(selectedBinding.agent.id);
@@ -282,8 +282,30 @@ function resolveExecutionContract(agent) {
282
282
  if (!value) {
283
283
  return undefined;
284
284
  }
285
+ const recoveryEvidenceTool = typeof value.recoveryEvidenceTool === "string" && value.recoveryEvidenceTool.trim().length > 0
286
+ ? { name: value.recoveryEvidenceTool.trim() }
287
+ : typeof value.recoveryEvidenceTool === "object" && value.recoveryEvidenceTool && !Array.isArray(value.recoveryEvidenceTool)
288
+ ? value.recoveryEvidenceTool
289
+ : undefined;
290
+ const recoveryEvidenceToolName = recoveryEvidenceTool && typeof recoveryEvidenceTool.name === "string" && recoveryEvidenceTool.name.trim().length > 0
291
+ ? recoveryEvidenceTool.name.trim()
292
+ : undefined;
293
+ const recoveryEvidenceToolArgs = recoveryEvidenceTool
294
+ && typeof recoveryEvidenceTool.args === "object"
295
+ && recoveryEvidenceTool.args
296
+ && !Array.isArray(recoveryEvidenceTool.args)
297
+ ? recoveryEvidenceTool.args
298
+ : undefined;
285
299
  return {
286
300
  ...(value.requiresPlan === true ? { requiresPlan: true } : {}),
301
+ ...(recoveryEvidenceToolName
302
+ ? {
303
+ recoveryEvidenceTool: {
304
+ name: recoveryEvidenceToolName,
305
+ ...(recoveryEvidenceToolArgs ? { args: recoveryEvidenceToolArgs } : {}),
306
+ },
307
+ }
308
+ : {}),
287
309
  };
288
310
  }
289
311
  function resolveCompiledMiddleware(agent, models) {
@@ -50,6 +50,16 @@ function readExecutionContractConfig(agent) {
50
50
  ? value
51
51
  : undefined;
52
52
  }
53
+ function readRecoveryEvidenceToolName(value) {
54
+ if (typeof value === "string") {
55
+ return value.trim();
56
+ }
57
+ if (typeof value !== "object" || value === null || Array.isArray(value)) {
58
+ return "";
59
+ }
60
+ const name = value.name;
61
+ return typeof name === "string" ? name.trim() : "";
62
+ }
53
63
  function collectAgentToolNames(agent, tools, ownsDelegation) {
54
64
  const names = new Set(FRAMEWORK_EXECUTION_TOOL_NAMES);
55
65
  if (ownsDelegation) {
@@ -208,6 +218,13 @@ function validateAgentContract(agent, referencedSubagentIds, tools, refs, issues
208
218
  if (executionContract?.requiresPlan === true && builtinTools?.todos === false) {
209
219
  addIssue(issues, "agent.execution_contract.plan_without_todos", `Agent ${agent.id} requires plan evidence but disables todo tools. Enable todo tools or remove config.executionContract.requiresPlan.`);
210
220
  }
221
+ const recoveryEvidenceToolName = readRecoveryEvidenceToolName(executionContract?.recoveryEvidenceTool);
222
+ if (recoveryEvidenceToolName) {
223
+ const toolNames = collectAgentToolNames(agent, tools, ownsDelegation);
224
+ if (FRAMEWORK_EXECUTION_TOOL_NAMES.has(recoveryEvidenceToolName) || !toolNames.has(recoveryEvidenceToolName)) {
225
+ addIssue(issues, "agent.execution_contract.invalid_recovery_evidence_tool", `Agent ${agent.id} config.executionContract.recoveryEvidenceTool must name one declared non-planning tool.`);
226
+ }
227
+ }
211
228
  if (ownsDelegation) {
212
229
  if (hasTools) {
213
230
  addIssue(issues, "agent.orchestrator.mixed_tool_surface", `Delegating agent ${agent.id} defines both subagents and direct tools. Keep routing agents focused on delegation, and move execution tools to specialist agents.`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botbotgo/agent-harness",
3
- "version": "0.0.464",
3
+ "version": "0.0.466",
4
4
  "description": "Workspace runtime for multi-agent applications",
5
5
  "license": "MIT",
6
6
  "type": "module",