@botbotgo/agent-harness 0.0.464 → 0.0.465

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.464";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.465";
2
2
  export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
@@ -1,2 +1,2 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.464";
1
+ export const AGENT_HARNESS_VERSION = "0.0.465";
2
2
  export const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
@@ -137,6 +137,26 @@ function hasIncompletePlanOutput(value) {
137
137
  }
138
138
  return null;
139
139
  }
140
+ function hasFailedTodos(value) {
141
+ if (Array.isArray(value)) {
142
+ return value.some((todo) => hasFailedTodos(todo));
143
+ }
144
+ if (typeof value !== "object" || value === null) {
145
+ return false;
146
+ }
147
+ const typed = value;
148
+ if (typeof typed.status === "string" && typed.status.trim().toLowerCase() === "failed") {
149
+ return true;
150
+ }
151
+ return hasFailedTodos(typed.todos)
152
+ || hasFailedTodos(typed.update)
153
+ || hasFailedTodos(typed.data)
154
+ || hasFailedTodos(typed.output)
155
+ || hasFailedTodos(typed.summary);
156
+ }
157
+ function hasFailedPlanStateInExecutedToolResults(executedToolResults) {
158
+ return executedToolResults.some((item) => isPlanToolName(item.toolName) && hasFailedTodos(item.output));
159
+ }
140
160
  function normalizePlanToolName(toolName) {
141
161
  return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
142
162
  }
@@ -199,6 +219,9 @@ function buildExecutionRecoveryEvidence(params) {
199
219
  hasNonTodoToolResultEvidence: hasNonTodoToolEvidence(executedToolResults) || projectionState.emittedNonTodoToolResult || projectionState.emittedToolError,
200
220
  hasSuccessfulNonTodoToolResultEvidence: hasSuccessfulNonTodoToolEvidence(executedToolResults) || projectionState.emittedSuccessfulNonTodoToolResult,
201
221
  hasIncompletePlanState: projectionState.hasIncompletePlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
222
+ hasFailedPlanState: (projectionState.hasFailedPlanState || hasFailedPlanStateInExecutedToolResults(executedToolResults))
223
+ && !projectionState.emittedSuccessfulNonTodoToolResult
224
+ && !hasSuccessfulNonTodoToolEvidence(executedToolResults),
202
225
  hasPlanStateEvidence: projectionState.sawPlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
203
226
  hasOpenTaskDelegation: projectionState.openTaskDelegations > 0,
204
227
  hasFailedTaskDelegation: projectionState.hasFailedTaskDelegation
@@ -213,6 +236,7 @@ function buildExecutionRecoveryEvidence(params) {
213
236
  }
214
237
  function hasUnresolvedExecution(evidence) {
215
238
  return (evidence.hasIncompletePlanState
239
+ || evidence.hasFailedPlanState
216
240
  || evidence.hasFailedTaskDelegation
217
241
  || evidence.hasOpenTaskDelegation);
218
242
  }
@@ -389,6 +413,9 @@ function createUnresolvedExecutionError(evidence) {
389
413
  if (evidence.hasIncompletePlanState) {
390
414
  reasons.push("plan state still has unfinished work");
391
415
  }
416
+ if (evidence.hasFailedPlanState) {
417
+ reasons.push("plan state failed before non-TODO evidence returned");
418
+ }
392
419
  if (evidence.hasFailedTaskDelegation) {
393
420
  reasons.push("delegated task failed before surfacing final findings");
394
421
  }
@@ -471,7 +498,8 @@ function projectLocalToolExecutionProfileChunks(executedToolResults, prefix) {
471
498
  }
472
499
  export async function* streamRuntimeExecution(options) {
473
500
  let request = buildInvocationRequest(options.binding, options.history, options.input, options.runtimeOptions);
474
- if (requiresPlanEvidence(options.binding)) {
501
+ if (requiresPlanEvidence(options.binding)
502
+ && options.runtimeOptions.suppressInitialRequiredPlanInstruction !== true) {
475
503
  request = withPromptedJsonToolPolicy(appendToolRecoveryInstruction(request, buildInitialRequiredPlanInstruction(options.primaryTools)), "planning");
476
504
  }
477
505
  let emittedUnsafeStreamSideEffects = false;
@@ -480,6 +508,14 @@ export async function* streamRuntimeExecution(options) {
480
508
  const deferredStreamContent = [];
481
509
  let sawRetrySafeInvalidToolSelectionError = false;
482
510
  const projectionState = createStreamEventProjectionState();
511
+ if (options.runtimeOptions.externalPlanEvidence === true) {
512
+ projectionState.sawPlanState = true;
513
+ yield {
514
+ kind: "commentary",
515
+ content: `${options.binding.agent.id}: TODO evidence observed.`,
516
+ agentId: options.binding.agent.id,
517
+ };
518
+ }
483
519
  const requestId = options.runtimeOptions.requestId ?? options.sessionId;
484
520
  const buildRunnableConfig = (extra) => ({
485
521
  ...(options.resolveInvocationConfig
@@ -523,6 +559,7 @@ export async function* streamRuntimeExecution(options) {
523
559
  try {
524
560
  const rawStreamInput = buildRawModelMessages(options.binding, options.getSystemPrompt(options.binding), options.history, options.input, options.runtimeOptions.memoryContext);
525
561
  const streamInput = requiresPlanEvidence(options.binding)
562
+ && options.runtimeOptions.suppressInitialRequiredPlanInstruction !== true
526
563
  ? withPromptedJsonToolPolicy(rawStreamInput, "planning")
527
564
  : rawStreamInput;
528
565
  stream = await options.withTimeout(() => options.langChainStreamModel.stream(streamInput), computeRemainingTimeoutMs(options.streamDeadlineAt, options.invokeTimeoutMs), "model stream start", "stream");
@@ -865,6 +902,35 @@ export async function* streamRuntimeExecution(options) {
865
902
  const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
866
903
  ? recovered.metadata.executedToolResults
867
904
  : [];
905
+ const originalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
906
+ const recoveredExecutionEvidence = buildExecutionRecoveryEvidence({
907
+ projectionState: createStreamEventProjectionState(),
908
+ executedToolResults: recoveredToolResults,
909
+ });
910
+ const recoveredCarriesExecutionEvidence = recoveredExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
911
+ || recoveredExecutionEvidence.hasOpenTaskDelegation
912
+ || recoveredExecutionEvidence.hasDelegatedExecutionToolEvidence;
913
+ const recoveredHasUnresolvedExecution = hasUnresolvedExecution(recoveredExecutionEvidence)
914
+ || hasMissingDelegatedExecutionEvidence(recoveredExecutionEvidence)
915
+ || hasMissingDelegatedFindings(recoveredExecutionEvidence)
916
+ || (!recoveredCarriesExecutionEvidence
917
+ && (hasUnresolvedExecution(originalExecutionEvidence)
918
+ || hasMissingDelegatedExecutionEvidence(originalExecutionEvidence)
919
+ || hasMissingDelegatedFindings(originalExecutionEvidence)));
920
+ if (recoveredHasUnresolvedExecution) {
921
+ const effectiveRecoveryEvidence = recoveredCarriesExecutionEvidence
922
+ ? recoveredExecutionEvidence
923
+ : {
924
+ ...recoveredExecutionEvidence,
925
+ hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
926
+ hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
927
+ hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
928
+ hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
929
+ hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
930
+ hasDelegatedExecutionToolEvidence: originalExecutionEvidence.hasDelegatedExecutionToolEvidence,
931
+ };
932
+ throw createUnresolvedExecutionError(effectiveRecoveryEvidence);
933
+ }
868
934
  for (const toolResult of recoveredToolResults) {
869
935
  yield {
870
936
  kind: "tool-result",
@@ -997,7 +1063,7 @@ export async function* streamRuntimeExecution(options) {
997
1063
  }
998
1064
  const retriedVisibleOutput = retried.output ? toVisibleContent(retried.output) : "";
999
1065
  const retriedIsDelegationFailureFinalReport = isDelegationFailureFinalReport(originalExecutionEvidence, executedToolResults, retriedVisibleOutput);
1000
- const retriedCarriesExecutionEvidence = retriedExecutionEvidence.hasToolResultEvidence
1066
+ const retriedCarriesExecutionEvidence = retriedExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
1001
1067
  || retriedExecutionEvidence.hasOpenTaskDelegation
1002
1068
  || retriedExecutionEvidence.hasDelegatedExecutionToolEvidence;
1003
1069
  const retriedHasUnresolvedExecution = !retriedIsDelegationFailureFinalReport
@@ -1013,6 +1079,7 @@ export async function* streamRuntimeExecution(options) {
1013
1079
  : {
1014
1080
  ...retriedExecutionEvidence,
1015
1081
  hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
1082
+ hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
1016
1083
  hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
1017
1084
  hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
1018
1085
  hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
@@ -1254,6 +1321,7 @@ export async function* streamRuntimeExecution(options) {
1254
1321
  : {
1255
1322
  ...recoveredExecutionEvidence,
1256
1323
  hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
1324
+ hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
1257
1325
  hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
1258
1326
  hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
1259
1327
  hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
@@ -13,8 +13,10 @@ const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already availab
13
13
  const DEFAULT_MAX_TOOL_ITERATIONS = 10_000;
14
14
  const MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS = 2;
15
15
  const MAX_REPEATED_PLAN_ONLY_AFTER_PLAN = 2;
16
+ const MAX_REPEATED_INVALID_EXTERNAL_PLAN_EVIDENCE_SELECTION = 2;
16
17
  const REQUIRED_PLAN_CONTRACT_MARKER = "This agent has a required visible planning contract.";
17
18
  const INITIAL_WRITE_TODOS_MARKER = "Your first action for this request must be write_todos";
19
+ const REQUIRED_SINGLE_EXTERNAL_PLAN_EVIDENCE_TOOL_INSTRUCTION = "The TODO board already exists. The next action must be exactly one non-planning evidence tool call. Do not call write_todos or read_todos, and do not call multiple tools in the same response.";
18
20
  function resolveSingleBootstrapEvidenceTool(primaryTools) {
19
21
  const evidenceTools = primaryTools
20
22
  .map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
@@ -142,6 +144,10 @@ function isPlanToolName(toolName) {
142
144
  || normalized === "call_write_todos"
143
145
  || normalized === "call_read_todos";
144
146
  }
147
+ function isPlanToolCall(toolCall, toolNameMapping, primaryTools) {
148
+ const resolvedToolName = resolveModelFacingToolName(String(toolCall.name ?? ""), toolNameMapping, primaryTools);
149
+ return isPlanToolName(toolCall.name) || isPlanToolName(resolvedToolName);
150
+ }
145
151
  function isFallbackTodoCompletionToolCall(toolCall) {
146
152
  return typeof toolCall.id === "string"
147
153
  && toolCall.id.startsWith("fallback-complete-")
@@ -222,7 +228,11 @@ function extractLatestUserInput(request) {
222
228
  const messages = Array.isArray(typedRequest.messages) ? typedRequest.messages : [];
223
229
  for (let index = messages.length - 1; index >= 0; index -= 1) {
224
230
  const candidate = messages[index];
225
- if (candidate?.role !== "user" || typeof candidate.content !== "string") {
231
+ const role = typeof candidate?.role === "string" ? candidate.role.trim().toLowerCase() : "";
232
+ const messageType = typeof candidate?._getType === "function" ? String(candidate._getType()).trim().toLowerCase() : "";
233
+ const constructorType = Array.isArray(candidate?.id) ? String(candidate.id.at(-1)).trim().toLowerCase() : "";
234
+ const isUserMessage = role === "user" || role === "human" || messageType === "human" || constructorType === "humanmessage";
235
+ if (!isUserMessage || typeof candidate?.content !== "string") {
226
236
  continue;
227
237
  }
228
238
  const normalized = candidate.content.trim();
@@ -284,6 +294,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
284
294
  let lastRecoveryExecutedCount = -1;
285
295
  let repeatedRecoveryWithoutProgress = 0;
286
296
  let repeatedPlanOnlyAfterPlan = 0;
297
+ let repeatedInvalidExternalPlanEvidenceSelection = 0;
287
298
  let pendingResult;
288
299
  let result;
289
300
  const toolCatalog = new Map();
@@ -422,6 +433,26 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
422
433
  activeRequest = appendToolRecoveryInstruction(activeRequest, missingPlanRecoveryInstruction);
423
434
  continue;
424
435
  }
436
+ if (requiresPlanEvidence(binding)
437
+ && externalPlanEvidence === true
438
+ && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
439
+ && !hasNonTodoToolEvidence(executedToolResults)
440
+ && toolCalls.length > 0
441
+ && (toolCalls.length !== 1 || isPlanToolCall(toolCalls[0], toolNameMapping, primaryTools))) {
442
+ repeatedInvalidExternalPlanEvidenceSelection += 1;
443
+ if (repeatedInvalidExternalPlanEvidenceSelection >= MAX_REPEATED_INVALID_EXTERNAL_PLAN_EVIDENCE_SELECTION) {
444
+ throw createToolLoopError({
445
+ reason: "model did not select exactly one non-planning evidence tool during delegated plan evidence recovery",
446
+ iteration,
447
+ maxToolIterations,
448
+ toolCalls,
449
+ executedToolResults,
450
+ });
451
+ }
452
+ activeRequest = appendToolRecoveryInstruction(activeRequest, REQUIRED_SINGLE_EXTERNAL_PLAN_EVIDENCE_TOOL_INSTRUCTION);
453
+ pendingResult = undefined;
454
+ continue;
455
+ }
425
456
  if (requiresPlanEvidence(binding)
426
457
  && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
427
458
  && !hasNonTodoToolEvidence(executedToolResults)
@@ -429,6 +460,12 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
429
460
  && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))) {
430
461
  repeatedPlanOnlyAfterPlan += 1;
431
462
  if (repeatedPlanOnlyAfterPlan >= MAX_REPEATED_PLAN_ONLY_AFTER_PLAN) {
463
+ if (hasNonTodoToolEvidence(executedToolResults)) {
464
+ return {
465
+ result: buildDeterministicFinalFromToolEvidence(executedToolResults),
466
+ executedToolResults,
467
+ };
468
+ }
432
469
  throw createToolLoopError({
433
470
  reason: "model repeatedly selected only planning tools after the todo board already existed and before any non-planning evidence tool returned",
434
471
  iteration,
@@ -438,6 +475,12 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
438
475
  });
439
476
  }
440
477
  if (iteration + 1 === maxToolIterations) {
478
+ if (hasNonTodoToolEvidence(executedToolResults)) {
479
+ return {
480
+ result: buildDeterministicFinalFromToolEvidence(executedToolResults),
481
+ executedToolResults,
482
+ };
483
+ }
441
484
  throw createToolLoopError({
442
485
  reason: "maximum iterations reached",
443
486
  iteration,
@@ -452,6 +495,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
452
495
  }
453
496
  repeatedRecoveryWithoutProgress = 0;
454
497
  repeatedPlanOnlyAfterPlan = 0;
498
+ repeatedInvalidExternalPlanEvidenceSelection = 0;
455
499
  const canReplayToolCalls = usedExternalPlanEvidenceToolThisIteration
456
500
  || canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
457
501
  debugLocalToolReplay({
@@ -69,6 +69,13 @@ function readTodoContent(todo) {
69
69
  }
70
70
  return "";
71
71
  }
72
+ function isLowSignalTodoContent(content) {
73
+ const normalized = content.trim().toLowerCase();
74
+ if (!normalized) {
75
+ return true;
76
+ }
77
+ return /^\d+$/.test(normalized) || /^step\s*\d+$/.test(normalized) || /^todo\s*\d+$/.test(normalized);
78
+ }
72
79
  function normalizeTodoStatus(value) {
73
80
  if (typeof value !== "string") {
74
81
  return "pending";
@@ -90,6 +97,9 @@ export function summarizeBuiltinWriteTodosArgs(args) {
90
97
  return [];
91
98
  }
92
99
  const content = readTodoContent(todo);
100
+ if (isLowSignalTodoContent(content)) {
101
+ return [];
102
+ }
93
103
  const status = normalizeTodoStatus(todo.status);
94
104
  const metadata = isRecord(todo.metadata) ? todo.metadata : undefined;
95
105
  return content ? [{
@@ -17,6 +17,7 @@ export type StreamEventProjectionState = {
17
17
  emittedDelegatedTerminalOutput: boolean;
18
18
  sawPlanState: boolean;
19
19
  hasIncompletePlanState: boolean;
20
+ hasFailedPlanState: boolean;
20
21
  openTaskDelegations: number;
21
22
  openToolCapableTaskDelegations: number;
22
23
  taskDelegationHasToolsStack: boolean[];
@@ -19,6 +19,7 @@ export function createStreamEventProjectionState() {
19
19
  emittedDelegatedTerminalOutput: false,
20
20
  sawPlanState: false,
21
21
  hasIncompletePlanState: false,
22
+ hasFailedPlanState: false,
22
23
  openTaskDelegations: 0,
23
24
  openToolCapableTaskDelegations: 0,
24
25
  taskDelegationHasToolsStack: [],
@@ -153,6 +154,16 @@ function readSummaryCounts(summary) {
153
154
  inProgress: typeof typed.inProgress === "number" ? typed.inProgress : 0,
154
155
  };
155
156
  }
157
+ function readSummaryFailureCount(summary) {
158
+ if (typeof summary !== "object" || summary === null) {
159
+ return null;
160
+ }
161
+ const typed = summary;
162
+ if (typeof typed.total === "number" && typed.total <= 0) {
163
+ return null;
164
+ }
165
+ return typeof typed.failed === "number" ? typed.failed : null;
166
+ }
156
167
  function hasIncompleteTodosArray(value) {
157
168
  if (!Array.isArray(value)) {
158
169
  return null;
@@ -168,6 +179,20 @@ function hasIncompleteTodosArray(value) {
168
179
  return status === "pending" || status === "in_progress";
169
180
  });
170
181
  }
182
+ function hasFailedTodosArray(value) {
183
+ if (!Array.isArray(value)) {
184
+ return null;
185
+ }
186
+ if (value.length === 0) {
187
+ return null;
188
+ }
189
+ return value.some((item) => {
190
+ if (typeof item !== "object" || item === null) {
191
+ return false;
192
+ }
193
+ return item.status === "failed";
194
+ });
195
+ }
171
196
  function getPlanStateCompleteness(value) {
172
197
  if (typeof value !== "object" || value === null) {
173
198
  return null;
@@ -207,6 +232,45 @@ function getPlanStateCompleteness(value) {
207
232
  }
208
233
  return null;
209
234
  }
235
+ function getPlanStateFailure(value) {
236
+ if (typeof value !== "object" || value === null) {
237
+ return null;
238
+ }
239
+ const typed = value;
240
+ const summaryFailed = readSummaryFailureCount(typed.summary);
241
+ if (summaryFailed !== null) {
242
+ return summaryFailed > 0;
243
+ }
244
+ if (typeof typed.summary === "object" && typed.summary !== null) {
245
+ const nestedSummary = getPlanStateFailure(typed.summary);
246
+ if (nestedSummary !== null) {
247
+ return nestedSummary;
248
+ }
249
+ }
250
+ const directTodos = hasFailedTodosArray(typed.todos);
251
+ if (directTodos !== null) {
252
+ return directTodos;
253
+ }
254
+ if (typeof typed.update === "object" && typed.update !== null) {
255
+ const nestedTodos = hasFailedTodosArray(typed.update.todos);
256
+ if (nestedTodos !== null) {
257
+ return nestedTodos;
258
+ }
259
+ }
260
+ if (typeof typed.output === "object" && typed.output !== null) {
261
+ const nestedOutput = getPlanStateFailure(typed.output);
262
+ if (nestedOutput !== null) {
263
+ return nestedOutput;
264
+ }
265
+ }
266
+ if (typeof typed.data === "object" && typed.data !== null) {
267
+ const nestedData = getPlanStateFailure(typed.data);
268
+ if (nestedData !== null) {
269
+ return nestedData;
270
+ }
271
+ }
272
+ return null;
273
+ }
210
274
  function parseMaybeJsonString(value) {
211
275
  const trimmed = value.trim();
212
276
  if (!trimmed || (!trimmed.startsWith("{") && !trimmed.startsWith("["))) {
@@ -435,6 +499,11 @@ export function projectRuntimeStreamEvent(params) {
435
499
  state.sawPlanState = true;
436
500
  state.hasIncompletePlanState = planStateCompleteness;
437
501
  }
502
+ const planStateFailure = getPlanStateFailure(event);
503
+ if (planStateFailure !== null) {
504
+ state.sawPlanState = true;
505
+ state.hasFailedPlanState = planStateFailure;
506
+ }
438
507
  const eventAgentId = typeof event === "object" && event !== null && typeof event.agentId === "string"
439
508
  ? event.agentId.trim()
440
509
  : "";
@@ -86,6 +86,19 @@ function readSchemaDescription(schemaPart) {
86
86
  }
87
87
  return readSchemaDescription(def?.innerType);
88
88
  }
89
+ function schemaPartExpectsString(schemaPart) {
90
+ if (!isObject(schemaPart)) {
91
+ return false;
92
+ }
93
+ if (schemaPart.type === "string") {
94
+ return true;
95
+ }
96
+ const def = schemaPart._def ?? schemaPart.def;
97
+ if (def?.typeName === "ZodString" || def?.type === "string") {
98
+ return true;
99
+ }
100
+ return schemaPartExpectsString(def?.innerType);
101
+ }
89
102
  function fillLatestUserInputForQueryLikeFields(args, shape, latestUserInput) {
90
103
  const userInput = typeof latestUserInput === "string" ? latestUserInput.trim() : "";
91
104
  if (!userInput) {
@@ -100,7 +113,7 @@ function fillLatestUserInputForQueryLikeFields(args, shape, latestUserInput) {
100
113
  const description = readSchemaDescription(schemaPart);
101
114
  const keyIsQueryLike = ["query", "question", "prompt", "input", "text"].includes(normalizedKey);
102
115
  const descriptionIsQueryLike = /\b(?:query|question|prompt|input|text)\b/iu.test(description);
103
- if (!keyIsQueryLike && !descriptionIsQueryLike) {
116
+ if ((!keyIsQueryLike && !descriptionIsQueryLike) || !schemaPartExpectsString(schemaPart)) {
104
117
  continue;
105
118
  }
106
119
  next = {
@@ -110,6 +123,27 @@ function fillLatestUserInputForQueryLikeFields(args, shape, latestUserInput) {
110
123
  }
111
124
  return next;
112
125
  }
126
+ function fillLatestUserInputForResourceArrayFields(args, shape, latestUserInput) {
127
+ const userInput = typeof latestUserInput === "string" ? latestUserInput.trim() : "";
128
+ if (!userInput || !hasExplicitResourceReference(userInput)) {
129
+ return args;
130
+ }
131
+ const resourceRefs = extractExplicitResourceReferences(userInput);
132
+ if (resourceRefs.length === 0) {
133
+ return args;
134
+ }
135
+ let next = args;
136
+ for (const [key, schemaPart] of Object.entries(shape)) {
137
+ if (key in next || !schemaPartExpectsArray(schemaPart)) {
138
+ continue;
139
+ }
140
+ const mapped = resourceRefs.map((ref) => inferDiscriminatedArrayObjectFromString(schemaPart, ref));
141
+ if (mapped.length > 0 && mapped.every((item) => item !== null)) {
142
+ next = { ...next, [key]: mapped };
143
+ }
144
+ }
145
+ return next;
146
+ }
113
147
  function mapCommonArgumentAliases(args, shape) {
114
148
  let next = args;
115
149
  if ("args" in shape && !("args" in next) && Array.isArray(next.argv)) {
@@ -133,10 +167,96 @@ function schemaPartExpectsArray(schemaPart) {
133
167
  }
134
168
  return schemaPartExpectsArray(def?.innerType);
135
169
  }
170
+ function readObjectShape(schemaPart) {
171
+ if (!isObject(schemaPart)) {
172
+ return undefined;
173
+ }
174
+ const def = schemaPart._def ?? schemaPart.def;
175
+ if (!def) {
176
+ return undefined;
177
+ }
178
+ const shape = typeof def.shape === "function" ? def.shape() : def.shape;
179
+ return isRecord(shape) ? shape : readObjectShape(def.innerType);
180
+ }
181
+ function readLiteralValue(schemaPart) {
182
+ if (!isObject(schemaPart)) {
183
+ return undefined;
184
+ }
185
+ const def = schemaPart._def ?? schemaPart.def;
186
+ if ((def?.typeName === "ZodLiteral" || def?.type === "literal") && typeof def.value === "string") {
187
+ return def.value;
188
+ }
189
+ if ((def?.typeName === "ZodLiteral" || def?.type === "literal") && Array.isArray(def.values) && typeof def.values[0] === "string") {
190
+ return def.values[0];
191
+ }
192
+ return readLiteralValue(def?.innerType);
193
+ }
194
+ function readArrayElementSchema(schemaPart) {
195
+ if (!isObject(schemaPart)) {
196
+ return undefined;
197
+ }
198
+ const def = schemaPart._def ?? schemaPart.def;
199
+ if (def?.typeName === "ZodArray" || def?.type === "array") {
200
+ return def.element ?? def.type;
201
+ }
202
+ return readArrayElementSchema(def?.innerType);
203
+ }
204
+ function inferDiscriminatedArrayObjectFromString(schemaPart, value) {
205
+ const raw = value.trim();
206
+ if (!raw) {
207
+ return null;
208
+ }
209
+ const elementSchema = readArrayElementSchema(schemaPart);
210
+ if (!isObject(elementSchema)) {
211
+ return null;
212
+ }
213
+ const def = elementSchema._def ?? elementSchema.def;
214
+ const options = Array.isArray(def?.options)
215
+ ? def.options
216
+ : Array.isArray(elementSchema.options)
217
+ ? elementSchema.options
218
+ : undefined;
219
+ if (def?.typeName !== "ZodDiscriminatedUnion"
220
+ && !(def?.type === "union" && typeof def.discriminator === "string")) {
221
+ return null;
222
+ }
223
+ if (typeof def.discriminator !== "string" || !options) {
224
+ return null;
225
+ }
226
+ const looksLikeUrl = /^https?:\/\/\S+$/iu.test(raw);
227
+ for (const option of options) {
228
+ const optionShape = readObjectShape(option);
229
+ if (!optionShape) {
230
+ continue;
231
+ }
232
+ const discriminatorValue = readLiteralValue(optionShape[def.discriminator]);
233
+ if (!discriminatorValue) {
234
+ continue;
235
+ }
236
+ if (looksLikeUrl && "url" in optionShape) {
237
+ return { [def.discriminator]: discriminatorValue, url: raw };
238
+ }
239
+ const looksLikePath = /^(?:\.{1,2}\/|\/|~\/|[A-Za-z]:[\\/])|\\|\.[A-Za-z0-9]{1,12}$/u.test(raw);
240
+ if (!looksLikeUrl && looksLikePath && "path" in optionShape && !raw.includes("\n")) {
241
+ return { [def.discriminator]: discriminatorValue, path: raw };
242
+ }
243
+ if ("text" in optionShape) {
244
+ return { [def.discriminator]: discriminatorValue, text: raw };
245
+ }
246
+ }
247
+ return null;
248
+ }
136
249
  function mapStringArrayFields(args, shape) {
137
250
  let next = args;
138
251
  for (const [key, schemaPart] of Object.entries(shape)) {
139
252
  const value = next[key];
253
+ if (Array.isArray(value) && value.every((item) => typeof item === "string")) {
254
+ const mapped = value.map((item) => inferDiscriminatedArrayObjectFromString(schemaPart, item));
255
+ if (mapped.length > 0 && mapped.every((item) => item !== null)) {
256
+ next = { ...next, [key]: mapped };
257
+ }
258
+ continue;
259
+ }
140
260
  if (typeof value !== "string") {
141
261
  continue;
142
262
  }
@@ -175,6 +295,9 @@ function mapDelimitedListLikeArgs(args) {
175
295
  }
176
296
  return next;
177
297
  }
298
+ function stripArgsToShape(args, shape) {
299
+ return Object.fromEntries(Object.entries(args).filter(([key]) => key in shape));
300
+ }
178
301
  function dropDelimitedScalarPathArgs(args, shape) {
179
302
  let next = args;
180
303
  for (const [key, schemaPart] of Object.entries(shape)) {
@@ -199,7 +322,10 @@ function dropDelimitedScalarPathArgs(args, shape) {
199
322
  return next;
200
323
  }
201
324
  export function normalizeToolArgsForSchema(args, schema, rawArgsInput, options = {}) {
202
- const schemaDef = isObject(schema) ? schema._def : undefined;
325
+ const schemaDef = isObject(schema)
326
+ ? (schema._def
327
+ ?? schema.def)
328
+ : undefined;
203
329
  const zodShape = schemaDef
204
330
  ? isRecord(schemaDef.shape)
205
331
  ? schemaDef.shape
@@ -210,38 +336,47 @@ export function normalizeToolArgsForSchema(args, schema, rawArgsInput, options =
210
336
  const jsonShape = isObject(schema) && isRecord(schema.properties)
211
337
  ? (schema.properties ?? undefined)
212
338
  : undefined;
213
- const shape = zodShape && isRecord(zodShape) ? zodShape : jsonShape;
339
+ const plainShape = isObject(schema)
340
+ && !("_def" in schema)
341
+ && !("properties" in schema)
342
+ && Object.values(schema).every((value) => isObject(value))
343
+ ? schema
344
+ : undefined;
345
+ const shape = zodShape && isRecord(zodShape) ? zodShape : jsonShape ?? plainShape;
214
346
  if (!shape || !isRecord(shape)) {
215
347
  return mapDelimitedListLikeArgs(args);
216
348
  }
217
- const aliasMappedArgs = dropDelimitedScalarPathArgs(mapStringArrayFields(mapCommonArgumentAliases(args, shape), shape), shape);
349
+ const stripUnknownArgs = plainShape !== undefined && !zodShape && !jsonShape;
350
+ const aliasMappedArgs = dropDelimitedScalarPathArgs(fillLatestUserInputForResourceArrayFields(mapStringArrayFields(mapCommonArgumentAliases(args, shape), shape), shape, options.latestUserInput), shape);
218
351
  const keys = Object.keys(shape);
219
352
  if (keys.length !== 1) {
220
- return fillLatestUserInputForQueryLikeFields(aliasMappedArgs, shape, options.latestUserInput);
353
+ const filled = fillLatestUserInputForQueryLikeFields(aliasMappedArgs, shape, options.latestUserInput);
354
+ return stripUnknownArgs ? stripArgsToShape(filled, shape) : filled;
221
355
  }
222
356
  const [expectedKey] = keys;
223
357
  if (expectedKey in aliasMappedArgs) {
224
- return aliasMappedArgs;
358
+ return stripUnknownArgs ? stripArgsToShape(aliasMappedArgs, shape) : aliasMappedArgs;
225
359
  }
226
360
  const scalarMappedArgs = mapSingleFieldScalarArg(aliasMappedArgs, expectedKey, rawArgsInput);
227
361
  if (expectedKey in scalarMappedArgs) {
228
- return scalarMappedArgs;
362
+ return stripUnknownArgs ? stripArgsToShape(scalarMappedArgs, shape) : scalarMappedArgs;
229
363
  }
230
364
  if (Object.keys(scalarMappedArgs).length === 0 && typeof options.latestUserInput === "string") {
231
365
  const userInput = options.latestUserInput.trim();
232
366
  const resourceRefs = extractExplicitResourceReferences(userInput);
233
367
  if (resourceRefs.length === 1 && hasExplicitResourceReference(userInput)) {
234
- return {
368
+ const filled = {
235
369
  ...scalarMappedArgs,
236
370
  [expectedKey]: resourceRefs[0],
237
371
  };
372
+ return stripUnknownArgs ? stripArgsToShape(filled, shape) : filled;
238
373
  }
239
374
  }
240
375
  const genericScalarMappedArgs = mapSingleRemainingScalarArg(scalarMappedArgs, expectedKey);
241
376
  if (expectedKey in genericScalarMappedArgs) {
242
- return genericScalarMappedArgs;
377
+ return stripUnknownArgs ? stripArgsToShape(genericScalarMappedArgs, shape) : genericScalarMappedArgs;
243
378
  }
244
- return genericScalarMappedArgs;
379
+ return stripUnknownArgs ? stripArgsToShape(genericScalarMappedArgs, shape) : genericScalarMappedArgs;
245
380
  }
246
381
  export function extractToolCallsFromResult(result) {
247
382
  const capturedToolCalls = readCapturedPromptedJsonToolCalls(result);
@@ -62,6 +62,11 @@ export declare class AgentRuntimeAdapter {
62
62
  toolRuntimeContext?: Record<string, unknown>;
63
63
  suppressInitialRequiredPlanInstruction?: boolean;
64
64
  externalPlanEvidence?: boolean;
65
+ externalPlanEvidenceTools?: Array<{
66
+ name: string;
67
+ args?: Record<string, unknown>;
68
+ id?: string;
69
+ }>;
65
70
  }): Promise<RequestResult>;
66
71
  private tryDelegateWithCompactRouter;
67
72
  private buildCompactDelegationReport;
@@ -75,6 +80,13 @@ export declare class AgentRuntimeAdapter {
75
80
  memoryContext?: string;
76
81
  profiling?: boolean;
77
82
  toolRuntimeContext?: Record<string, unknown>;
83
+ suppressInitialRequiredPlanInstruction?: boolean;
84
+ externalPlanEvidence?: boolean;
85
+ externalPlanEvidenceTools?: Array<{
86
+ name: string;
87
+ args?: Record<string, unknown>;
88
+ id?: string;
89
+ }>;
78
90
  }): AsyncGenerator<RuntimeStreamChunk | string>;
79
91
  }
80
92
  export { AgentRuntimeAdapter as RuntimeAdapter, AGENT_INTERRUPT_SENTINEL_PREFIX, AGENT_INTERRUPT_SENTINEL_PREFIX as INTERRUPT_SENTINEL_PREFIX, buildDeepAgentCreateParams, buildLangChainCreateParams, DEFAULT_DEEPAGENT_RECURSION_LIMIT, materializeModelExposedBuiltinMiddlewareTools, resolveLangChainInvocationConfig, resolveRunnableCheckpointer, resolveRunnableInterruptOn, RuntimeOperationTimeoutError, };
@@ -8,7 +8,9 @@ import { extractMessageText } from "../utils/message-content.js";
8
8
  import { AGENT_INTERRUPT_SENTINEL_PREFIX, buildDeepAgentCreateParams, buildDeepAgentSystemPromptWithCapabilityCatalog, buildLangChainCreateParams, DEFAULT_DEEPAGENT_RECURSION_LIMIT, materializeModelExposedBuiltinMiddlewareTools, resolveLangChainInvocationConfig, resolveRunnableCheckpointer, resolveRunnableInterruptOn, shouldAttachDeepAgentBackend, shouldAttachDeepAgentCheckpointer, shouldAttachDeepAgentStore, } from "./agent-runtime-assembly.js";
9
9
  import { resolveDeepAgentSkillSourcePaths, resolveDeepAgentSkillSourceRootPaths, } from "./adapter/compat/deepagent-compat.js";
10
10
  import { buildToolNameMapping, } from "./adapter/tool/tool-name-mapping.js";
11
+ import { PROMPTED_JSON_TOOL_POLICY_KEY } from "./adapter/model/prompted-json-tool-policy.js";
11
12
  import { executeRequestInvocation } from "./adapter/flow/invocation-flow.js";
13
+ import { extractExplicitResourceReferences } from "./harness/system/runtime-memory-policy.js";
12
14
  import { streamRuntimeExecution } from "./adapter/flow/stream-runtime.js";
13
15
  import { resolveDeterministicFinalOutput } from "./adapter/invocation-result.js";
14
16
  import { applyToolRecoveryInstruction as applyToolRecoveryInstructionHelper, applyStrictToolJsonInstruction as applyStrictToolJsonInstructionHelper, callRuntimeWithToolParseRecovery as callRuntimeWithToolParseRecoveryHelper, createModelFallbackRunnable as createModelFallbackRunnableHelper, invokeWithProviderRetry as invokeWithProviderRetryHelper, iterateWithTimeout as iterateWithTimeoutHelper, materializeModelStream as materializeModelStreamHelper, RuntimeOperationTimeoutError, withRuntimeTimeout, } from "./adapter/runtime-shell.js";
@@ -243,6 +245,9 @@ function isDelegationOnlyDeepAgentBinding(binding) {
243
245
  && getBindingSkills(binding).length === 0;
244
246
  }
245
247
  function hasDelegatedPlanEvidence(result) {
248
+ if (result?.metadata?.externalPlanEvidence === true) {
249
+ return true;
250
+ }
246
251
  const toolResults = result?.metadata?.executedToolResults;
247
252
  return Array.isArray(toolResults)
248
253
  && toolResults.some((item) => isPlanToolName(item.toolName));
@@ -264,14 +269,62 @@ function hasIncompleteDelegatedTodos(value) {
264
269
  || hasIncompleteDelegatedTodos(record.stateSnapshot)
265
270
  || hasIncompleteDelegatedTodos(record.metadata);
266
271
  }
272
+ function hasFailedDelegatedTodos(value) {
273
+ if (Array.isArray(value)) {
274
+ return value.some((item) => hasFailedDelegatedTodos(item));
275
+ }
276
+ if (typeof value !== "object" || value === null) {
277
+ return false;
278
+ }
279
+ const record = value;
280
+ const status = typeof record.status === "string" ? record.status.trim().toLowerCase() : "";
281
+ if (status === "failed") {
282
+ return true;
283
+ }
284
+ return hasFailedDelegatedTodos(record.todos)
285
+ || hasFailedDelegatedTodos(record.update)
286
+ || hasFailedDelegatedTodos(record.stateSnapshot)
287
+ || hasFailedDelegatedTodos(record.metadata);
288
+ }
267
289
  function hasIncompleteDelegatedPlanState(result) {
268
290
  const toolResults = result?.metadata?.executedToolResults;
269
291
  return Array.isArray(toolResults)
270
292
  && toolResults.some((item) => isPlanToolName(item.toolName) && hasIncompleteDelegatedTodos(item.output));
271
293
  }
294
+ function hasFailedDelegatedPlanState(result) {
295
+ const toolResults = result?.metadata?.executedToolResults;
296
+ return Array.isArray(toolResults)
297
+ && toolResults.some((item) => isPlanToolName(item.toolName) && hasFailedDelegatedTodos(item.output));
298
+ }
299
+ function hasDelegatedNonPlanToolEvidence(result) {
300
+ const toolResults = result?.metadata?.executedToolResults;
301
+ return Array.isArray(toolResults)
302
+ && toolResults.some((item) => typeof item.toolName === "string" && !isPlanToolName(item.toolName) && item.isError !== true);
303
+ }
304
+ function hasNonPlanToolEvidenceItems(items) {
305
+ return items.some((item) => typeof item.toolName === "string" && !isPlanToolName(item.toolName) && item.isError !== true);
306
+ }
307
+ function buildDelegatedExternalPlanEvidenceSummary(items) {
308
+ const evidence = items
309
+ .filter((item) => typeof item.toolName === "string" && !isPlanToolName(item.toolName) && item.isError !== true)
310
+ .map((item) => {
311
+ const output = typeof item.output === "string" ? item.output : JSON.stringify(item.output ?? "");
312
+ return `## ${String(item.toolName)}\n${output}`;
313
+ });
314
+ return [
315
+ "Status: completed",
316
+ "Summary:",
317
+ "- Completed delegated recovery after collecting non-planning tool evidence.",
318
+ "",
319
+ "Evidence:",
320
+ evidence.length > 0 ? evidence.join("\n\n") : "(no non-planning tool evidence captured)",
321
+ ].join("\n");
322
+ }
272
323
  function needsDelegatedPlanRecovery(binding, result) {
273
324
  return binding?.harnessRuntime.executionContract?.requiresPlan === true
274
- && (!hasDelegatedPlanEvidence(result) || hasIncompleteDelegatedPlanState(result));
325
+ && (!hasDelegatedPlanEvidence(result)
326
+ || hasIncompleteDelegatedPlanState(result)
327
+ || (hasFailedDelegatedPlanState(result) && !hasDelegatedNonPlanToolEvidence(result)));
275
328
  }
276
329
  function readUpstreamToolEvidence(event) {
277
330
  if (typeof event !== "object" || event === null) {
@@ -334,11 +387,87 @@ const DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION = [
334
387
  "Then continue the task to completion, update TODO statuses after evidence steps, and close every TODO as completed or failed before the final answer.",
335
388
  ].join("\n");
336
389
  const DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION = [
337
- "The delegated task still has no visible TODO planning evidence.",
338
- "Use the actual write_todos tool interface now. Do not print JSON, markdown, or a tool-call transcript as text.",
339
- "The next runtime event must be the write_todos tool call result, not an assistant message describing the call.",
340
- "After write_todos succeeds, continue the delegated task and close every TODO as completed or failed.",
390
+ "The delegated task has already attempted TODO planning but still has no successful non-planning tool evidence.",
391
+ "Do not call write_todos or read_todos next.",
392
+ "The next runtime event must be one available non-planning evidence tool call.",
393
+ "After that tool returns, update or close the TODO board and provide the final answer required by the agent response format.",
341
394
  ].join("\n");
395
+ function buildDelegatedPlanEvidenceRecoveryOptions(binding, baseOptions, requestText = "") {
396
+ const nonPlanningTools = binding
397
+ ? getBindingPrimaryTools(binding).filter((tool) => !isPlanToolName(tool.name))
398
+ : [];
399
+ const explicitlyRequestedTools = resolveExplicitlyRequestedToolNames(requestText, nonPlanningTools.map((tool) => tool.name));
400
+ const externalPlanEvidenceTools = explicitlyRequestedTools.length === 1
401
+ ? [{
402
+ name: explicitlyRequestedTools[0],
403
+ args: buildExternalPlanEvidenceArgs(nonPlanningTools.find((tool) => tool.name === explicitlyRequestedTools[0]), requestText),
404
+ id: "delegated-plan-evidence-tool-1",
405
+ }]
406
+ : nonPlanningTools.length === 1
407
+ ? [{
408
+ name: nonPlanningTools[0].name,
409
+ args: buildExternalPlanEvidenceArgs(nonPlanningTools[0], requestText),
410
+ id: "delegated-plan-evidence-tool-1",
411
+ }]
412
+ : nonPlanningTools.length > 1
413
+ ? [{
414
+ name: nonPlanningTools[0].name,
415
+ args: buildExternalPlanEvidenceArgs(nonPlanningTools[0], requestText),
416
+ id: "delegated-plan-evidence-tool-1",
417
+ }]
418
+ : undefined;
419
+ return {
420
+ ...baseOptions,
421
+ suppressInitialRequiredPlanInstruction: true,
422
+ externalPlanEvidence: true,
423
+ ...(externalPlanEvidenceTools ? { externalPlanEvidenceTools } : {}),
424
+ state: {
425
+ ...(typeof baseOptions.state === "object" && baseOptions.state !== null ? baseOptions.state : {}),
426
+ [PROMPTED_JSON_TOOL_POLICY_KEY]: "nonPlanningEvidence",
427
+ },
428
+ };
429
+ }
430
+ function buildExternalPlanEvidenceArgs(tool, requestText) {
431
+ const properties = tool?.modelSchema && typeof tool.modelSchema === "object"
432
+ && tool.modelSchema !== null
433
+ && typeof tool.modelSchema.properties === "object"
434
+ && tool.modelSchema.properties !== null
435
+ ? tool.modelSchema.properties
436
+ : {};
437
+ const refs = extractExplicitResourceReferences(requestText);
438
+ const args = {};
439
+ const firstUrl = refs.find((ref) => /^https?:\/\//iu.test(ref));
440
+ if ("url" in properties && firstUrl) {
441
+ args.url = firstUrl;
442
+ }
443
+ if ("sources" in properties && refs.length > 0) {
444
+ args.sources = refs.map((ref) => /^https?:\/\//iu.test(ref)
445
+ ? { type: "url", url: ref }
446
+ : { type: "text", text: ref });
447
+ }
448
+ if ("question" in properties) {
449
+ args.question = requestText;
450
+ }
451
+ else if ("query" in properties) {
452
+ args.query = requestText;
453
+ }
454
+ return args;
455
+ }
456
+ function escapeRegExp(value) {
457
+ return value.replace(/[.*+?^${}()|[\]\\]/gu, "\\$&");
458
+ }
459
+ function resolveExplicitlyRequestedToolNames(text, availableToolNames) {
460
+ if (!text.trim() || availableToolNames.length === 0) {
461
+ return [];
462
+ }
463
+ return availableToolNames.filter((toolName) => {
464
+ const trimmed = toolName.trim();
465
+ if (!trimmed) {
466
+ return false;
467
+ }
468
+ return new RegExp(`(^|[^A-Za-z0-9_])${escapeRegExp(trimmed)}([^A-Za-z0-9_]|$)`, "u").test(text);
469
+ });
470
+ }
342
471
  function looksLikeRawCommandTranscript(value) {
343
472
  const normalized = value.trim();
344
473
  return /^(?:stdout|stderr)\s*:/iu.test(normalized)
@@ -505,18 +634,21 @@ export class AgentRuntimeAdapter {
505
634
  if (!this.options.functionToolContextResolver) {
506
635
  return undefined;
507
636
  }
637
+ const publicRequestId = typeof options.requestId === "string" && options.requestId.includes(":")
638
+ ? options.requestId.split(":")[0]
639
+ : options.requestId;
508
640
  const backend = this.resolveBuiltinMiddlewareBackend(binding, options);
509
641
  return {
510
642
  ...this.options.functionToolContextResolver({
511
643
  binding,
512
644
  sessionId: options.sessionId,
513
- requestId: options.requestId,
645
+ requestId: publicRequestId,
514
646
  }),
515
647
  backend,
516
648
  invocation: {
517
649
  ...(options.context ? { context: options.context } : {}),
518
650
  ...(options.sessionId ? { sessionId: options.sessionId } : {}),
519
- ...(options.requestId ? { requestId: options.requestId } : {}),
651
+ ...(publicRequestId ? { requestId: publicRequestId } : {}),
520
652
  },
521
653
  };
522
654
  }
@@ -1195,13 +1327,25 @@ export class AgentRuntimeAdapter {
1195
1327
  if (!selectedBinding) {
1196
1328
  return null;
1197
1329
  }
1198
- const runDelegatedRequest = (text, requestSuffix = "", delegatedOptions = {}) => this.invoke(selectedBinding, text, sessionId, `${requestId}:${subagentType}${requestSuffix}`, undefined, [], {
1199
- context: options.context,
1200
- state: options.state,
1201
- files: options.files,
1202
- memoryContext: options.memoryContext,
1203
- ...delegatedOptions,
1204
- });
1330
+ const runDelegatedRequest = async (text, requestSuffix = "", delegatedOptions = {}) => {
1331
+ const result = await this.invoke(selectedBinding, text, sessionId, `${requestId}:${subagentType}${requestSuffix}`, undefined, [], {
1332
+ context: options.context,
1333
+ state: options.state,
1334
+ files: options.files,
1335
+ memoryContext: options.memoryContext,
1336
+ ...delegatedOptions,
1337
+ });
1338
+ if (delegatedOptions.externalPlanEvidence === true) {
1339
+ return {
1340
+ ...result,
1341
+ metadata: {
1342
+ ...(result.metadata ?? {}),
1343
+ externalPlanEvidence: true,
1344
+ },
1345
+ };
1346
+ }
1347
+ return result;
1348
+ };
1205
1349
  let delegatedResult;
1206
1350
  try {
1207
1351
  delegatedResult = await runDelegatedRequest(requestText);
@@ -1261,6 +1405,25 @@ export class AgentRuntimeAdapter {
1261
1405
  };
1262
1406
  }
1263
1407
  }
1408
+ if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1409
+ const previousDelegatedResult = delegatedResult;
1410
+ try {
1411
+ delegatedResult = mergeDelegatedResultToolEvidence(await runDelegatedRequest([requestText, DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-final-retry", buildDelegatedPlanEvidenceRecoveryOptions(selectedBinding, options, requestText)), previousDelegatedResult);
1412
+ }
1413
+ catch (error) {
1414
+ const output = error instanceof Error ? error.message : String(error);
1415
+ return {
1416
+ toolOutput: output,
1417
+ delegatedSubagentType: subagentType,
1418
+ delegatedResult: {
1419
+ ...delegatedResult,
1420
+ state: "failed",
1421
+ output,
1422
+ finalMessageText: output,
1423
+ },
1424
+ };
1425
+ }
1426
+ }
1264
1427
  if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1265
1428
  const output = buildDelegatedPlanEvidenceBlocker(selectedBinding.agent.id);
1266
1429
  return {
@@ -1318,9 +1481,10 @@ export class AgentRuntimeAdapter {
1318
1481
  const state = compactDelegation.delegatedResult?.state === "failed" ? "failed" : "completed";
1319
1482
  const uniqueToolNames = [...new Set(delegatedToolNames)];
1320
1483
  const toolEvidence = uniqueToolNames.length > 0 ? uniqueToolNames.join(", ") : "none";
1484
+ const hasPlanEvidence = hasDelegatedPlanEvidence(compactDelegation.delegatedResult);
1321
1485
  const fallbackTodoTrace = [
1322
1486
  `1) TODO observed: delegated to ${delegatedSubagentType}.`,
1323
- uniqueToolNames.some((toolName) => isPlanToolName(toolName))
1487
+ hasPlanEvidence
1324
1488
  ? `2) ${delegatedSubagentType}: TODO evidence observed; delegated specialist invoked write_todos.`
1325
1489
  : `2) ${delegatedSubagentType}: TODO evidence missing; delegated specialist did not expose write_todos in returned metadata.`,
1326
1490
  state === "failed"
@@ -1563,7 +1727,7 @@ export class AgentRuntimeAdapter {
1563
1727
  content: `Planned delegation tree: ${executableDelegations.map((item) => item.subagentType).join(" -> ")}.`,
1564
1728
  agentId: binding.agent.id,
1565
1729
  };
1566
- const runPlannedDelegation = async function* (subagentType, text, requestIdSuffix = "") {
1730
+ const runPlannedDelegation = async function* (subagentType, text, requestIdSuffix = "", delegatedOptions = {}) {
1567
1731
  const selectedBinding = this.options.bindingResolver?.(subagentType);
1568
1732
  if (!selectedBinding) {
1569
1733
  const output = `Configured subagent '${subagentType}' could not be resolved.`;
@@ -1582,11 +1746,14 @@ export class AgentRuntimeAdapter {
1582
1746
  try {
1583
1747
  for await (const chunk of this.stream(selectedBinding, text, sessionId, [], {
1584
1748
  context: options.context,
1585
- state: options.state,
1749
+ state: delegatedOptions.state ?? options.state,
1586
1750
  files: options.files,
1587
1751
  requestId: `${requestId}:${subagentType}${requestIdSuffix}`,
1588
1752
  memoryContext: options.memoryContext,
1589
1753
  profiling: options.profiling,
1754
+ suppressInitialRequiredPlanInstruction: delegatedOptions.suppressInitialRequiredPlanInstruction,
1755
+ externalPlanEvidence: delegatedOptions.externalPlanEvidence,
1756
+ externalPlanEvidenceTools: delegatedOptions.externalPlanEvidenceTools,
1590
1757
  })) {
1591
1758
  if (typeof chunk === "string") {
1592
1759
  output += chunk;
@@ -1621,17 +1788,26 @@ export class AgentRuntimeAdapter {
1621
1788
  state: "failed",
1622
1789
  output,
1623
1790
  finalMessageText: output,
1624
- metadata: { executedToolResults },
1791
+ metadata: {
1792
+ executedToolResults,
1793
+ ...(delegatedOptions.externalPlanEvidence === true ? { externalPlanEvidence: true } : {}),
1794
+ },
1625
1795
  };
1626
1796
  }
1797
+ const finalOutput = delegatedOptions.externalPlanEvidence === true && hasNonPlanToolEvidenceItems(executedToolResults)
1798
+ ? buildDelegatedExternalPlanEvidenceSummary(executedToolResults)
1799
+ : sanitizeVisibleText(output);
1627
1800
  return {
1628
1801
  sessionId,
1629
1802
  requestId: `${requestId}:${subagentType}${requestIdSuffix}`,
1630
1803
  agentId: selectedBinding.agent.id,
1631
1804
  state: "completed",
1632
- output: sanitizeVisibleText(output),
1633
- finalMessageText: sanitizeVisibleText(output),
1634
- metadata: { executedToolResults },
1805
+ output: finalOutput,
1806
+ finalMessageText: finalOutput,
1807
+ metadata: {
1808
+ executedToolResults,
1809
+ ...(delegatedOptions.externalPlanEvidence === true ? { externalPlanEvidence: true } : {}),
1810
+ },
1635
1811
  };
1636
1812
  }.bind(this);
1637
1813
  for (const [index, planned] of executableDelegations.entries()) {
@@ -1658,7 +1834,7 @@ export class AgentRuntimeAdapter {
1658
1834
  }
1659
1835
  if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1660
1836
  const previousDelegatedResult = delegatedResult;
1661
- delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [delegatedText, DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-final-retry"), previousDelegatedResult);
1837
+ delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [delegatedText, DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-final-retry", buildDelegatedPlanEvidenceRecoveryOptions(selectedBinding, options, delegatedText)), previousDelegatedResult);
1662
1838
  }
1663
1839
  if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1664
1840
  const output = buildDelegatedPlanEvidenceBlocker(selectedBinding?.agent.id ?? planned.subagentType);
@@ -1789,17 +1965,20 @@ export class AgentRuntimeAdapter {
1789
1965
  agentId: selectedBinding.agent.id,
1790
1966
  };
1791
1967
  const childRequestId = `${requestId}:${subagentType}`;
1792
- const runDelegatedStreamAttempt = async function* (text, requestIdSuffix = "") {
1968
+ const runDelegatedStreamAttempt = async function* (text, requestIdSuffix = "", delegatedOptions = {}) {
1793
1969
  const executedToolResults = [];
1794
1970
  let output = "";
1795
1971
  try {
1796
1972
  for await (const chunk of this.stream(selectedBinding, text, sessionId, [], {
1797
1973
  context: options.context,
1798
- state: options.state,
1974
+ state: delegatedOptions.state ?? options.state,
1799
1975
  files: options.files,
1800
1976
  requestId: `${childRequestId}${requestIdSuffix}`,
1801
1977
  memoryContext: options.memoryContext,
1802
1978
  profiling: options.profiling,
1979
+ suppressInitialRequiredPlanInstruction: delegatedOptions.suppressInitialRequiredPlanInstruction,
1980
+ externalPlanEvidence: delegatedOptions.externalPlanEvidence,
1981
+ externalPlanEvidenceTools: delegatedOptions.externalPlanEvidenceTools,
1803
1982
  })) {
1804
1983
  if (typeof chunk === "string") {
1805
1984
  output += chunk;
@@ -1834,17 +2013,26 @@ export class AgentRuntimeAdapter {
1834
2013
  state: "failed",
1835
2014
  output,
1836
2015
  finalMessageText: output,
1837
- metadata: { executedToolResults },
2016
+ metadata: {
2017
+ executedToolResults,
2018
+ ...(delegatedOptions.externalPlanEvidence === true ? { externalPlanEvidence: true } : {}),
2019
+ },
1838
2020
  };
1839
2021
  }
2022
+ const finalOutput = delegatedOptions.externalPlanEvidence === true && hasNonPlanToolEvidenceItems(executedToolResults)
2023
+ ? buildDelegatedExternalPlanEvidenceSummary(executedToolResults)
2024
+ : sanitizeVisibleText(output);
1840
2025
  return {
1841
2026
  sessionId,
1842
2027
  requestId: `${childRequestId}${requestIdSuffix}`,
1843
2028
  agentId: selectedBinding.agent.id,
1844
2029
  state: "completed",
1845
- output: sanitizeVisibleText(output),
1846
- finalMessageText: sanitizeVisibleText(output),
1847
- metadata: { executedToolResults },
2030
+ output: finalOutput,
2031
+ finalMessageText: finalOutput,
2032
+ metadata: {
2033
+ executedToolResults,
2034
+ ...(delegatedOptions.externalPlanEvidence === true ? { externalPlanEvidence: true } : {}),
2035
+ },
1848
2036
  };
1849
2037
  }.bind(this);
1850
2038
  const delegatedText = buildDelegatedOwnedTaskInstruction({
@@ -1859,7 +2047,7 @@ export class AgentRuntimeAdapter {
1859
2047
  }
1860
2048
  if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1861
2049
  const previousDelegatedResult = delegatedResult;
1862
- delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([delegatedText, DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-final-retry"), previousDelegatedResult);
2050
+ delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([delegatedText, DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-final-retry", buildDelegatedPlanEvidenceRecoveryOptions(selectedBinding, options, delegatedText)), previousDelegatedResult);
1863
2051
  }
1864
2052
  if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1865
2053
  const output = buildDelegatedPlanEvidenceBlocker(selectedBinding.agent.id);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botbotgo/agent-harness",
3
- "version": "0.0.464",
3
+ "version": "0.0.465",
4
4
  "description": "Workspace runtime for multi-agent applications",
5
5
  "license": "MIT",
6
6
  "type": "module",