@botbotgo/agent-harness 0.0.462 → 0.0.464

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.462";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.464";
2
2
  export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
@@ -1,2 +1,2 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.462";
1
+ export const AGENT_HARNESS_VERSION = "0.0.464";
2
2
  export const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
@@ -1 +1 @@
1
- When calling write_todos, every todo item must include both content and status. Do not send status-only updates. Retry by resending the full todo entry with the original content preserved.
1
+ When calling write_todos, every todo item must include both content and status. Use only these status values: pending, in_progress, completed. Do not send aliases such as not_started, open, active, done, blocked, failed, or cancelled. Do not send status-only updates. Retry by resending the full todo entry with the original content preserved.
@@ -1,5 +1,5 @@
1
1
  import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, appendToolRecoveryInstruction, resolveMissingPlanRecoveryInstruction, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
2
- import { DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION, DELEGATION_ONLY_RECOVERY_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
2
+ import { DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION, DELEGATION_ONLY_RECOVERY_INSTRUCTION, REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
3
3
  import { buildInvocationRequest } from "../model/invocation-request.js";
4
4
  import { PROMPTED_JSON_TOOL_POLICY_KEY, withPromptedJsonToolPolicy } from "../model/prompted-json-tool-policy.js";
5
5
  import { buildRawModelMessages } from "../model/message-assembly.js";
@@ -25,6 +25,8 @@ const RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION = [
25
25
  "The required todo board was closed before any non-TODO evidence tool returned.",
26
26
  "Do not call write_todos again yet.",
27
27
  "Your next action must be exactly one non-TODO evidence tool call selected from the available tool descriptions and schemas.",
28
+ "If the current request or todo board explicitly names one available non-TODO tool, call that named tool.",
29
+ "Do not substitute a neighboring, broader, narrower, or similarly named tool when an exact available tool name is present.",
28
30
  "After that evidence tool returns, update the todo board and then provide the final answer required by the agent response format.",
29
31
  ].join("\n");
30
32
  function readPrimaryToolName(tool) {
@@ -43,11 +45,11 @@ function buildRunEvidenceAfterPlanInstruction(primaryTools) {
43
45
  `Available non-planning tool names: ${toolNames.join(", ")}.`,
44
46
  ].join("\n");
45
47
  }
46
- function resolveConfiguredPlanEvidenceTool(primaryTools) {
47
- const toolName = primaryTools
48
+ function resolveSingleConfiguredPlanEvidenceTool(primaryTools) {
49
+ const toolNames = primaryTools
48
50
  .map(readPrimaryToolName)
49
- .find((name) => name.length > 0 && !isPlanToolName(name));
50
- return toolName ? [{ name: toolName, args: {}, id: "stream-configured-plan-evidence-tool-1" }] : [];
51
+ .filter((name) => name.length > 0 && !isPlanToolName(name));
52
+ return toolNames.length === 1 ? [{ name: toolNames[0], args: {}, id: "stream-single-plan-evidence-tool-1" }] : [];
51
53
  }
52
54
  const INITIAL_REQUIRED_PLAN_INSTRUCTION = [
53
55
  "This agent has a required visible planning contract.",
@@ -255,6 +257,14 @@ function buildDeterministicFinalFromStreamToolEvidence(executedToolResults) {
255
257
  evidence.length > 0 ? evidence.join("\n\n") : "(no non-planning tool evidence captured)",
256
258
  ].join("\n");
257
259
  }
260
+ function latestStreamToolErrorRecoveryInstruction(executedToolResults) {
261
+ const latest = [...executedToolResults].reverse().find((item) => item.isError === true);
262
+ if (!latest) {
263
+ return null;
264
+ }
265
+ const message = typeof latest.output === "string" ? latest.output : JSON.stringify(latest.output);
266
+ return resolveToolCallRecoveryInstruction(new Error(message)) ?? REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION;
267
+ }
258
268
  function hasUsefulVisibleSynthesis(value) {
259
269
  const trimmed = value.trim();
260
270
  if (trimmed.length < 80) {
@@ -697,6 +707,7 @@ export async function* streamRuntimeExecution(options) {
697
707
  const eventContainsNonRetrySafeChunk = projectedChunks.some((chunk) => chunk.kind !== "upstream-event"
698
708
  && chunk.kind !== "content"
699
709
  && !(chunk.kind === "tool-result" && isPlanToolName(chunk.toolName))
710
+ && !(chunk.kind === "tool-result" && chunk.isError === true)
700
711
  && !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
701
712
  const hadPriorPlanToolResult = completedPlanToolResultCount > 0;
702
713
  const repeatedPlanToolResultBeforeEvidence = requiresPlanEvidence(options.binding)
@@ -704,7 +715,7 @@ export async function* streamRuntimeExecution(options) {
704
715
  && hadPriorPlanToolResult
705
716
  && projectedChunks.some((chunk) => chunk.kind === "tool-result" && isPlanToolName(chunk.toolName));
706
717
  if (repeatedPlanToolResultBeforeEvidence) {
707
- earlyStreamExternalPlanEvidenceTools = resolveConfiguredPlanEvidenceTool(options.primaryTools);
718
+ earlyStreamExternalPlanEvidenceTools = resolveSingleConfiguredPlanEvidenceTool(options.primaryTools);
708
719
  earlyStreamRecoveryInstruction = buildRunEvidenceAfterPlanInstruction(options.primaryTools);
709
720
  earlyStreamRecoverySuppressInitialPlan = true;
710
721
  break;
@@ -750,7 +761,7 @@ export async function* streamRuntimeExecution(options) {
750
761
  && (hadPriorPlanToolResult
751
762
  || projectedChunks.some((chunk) => isCompletedPlanToolResultChunk(chunk)))
752
763
  && !sawSuccessfulNonTodoToolResult) {
753
- earlyStreamExternalPlanEvidenceTools = resolveConfiguredPlanEvidenceTool(options.primaryTools);
764
+ earlyStreamExternalPlanEvidenceTools = resolveSingleConfiguredPlanEvidenceTool(options.primaryTools);
754
765
  earlyStreamRecoveryInstruction = buildRunEvidenceAfterPlanInstruction(options.primaryTools);
755
766
  earlyStreamRecoverySuppressInitialPlan = true;
756
767
  break;
@@ -794,10 +805,23 @@ export async function* streamRuntimeExecution(options) {
794
805
  })
795
806
  : null;
796
807
  const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
808
+ const terminalPrematurePlanCloseRecoveryInstruction = requiresPlanEvidence(options.binding)
809
+ && terminalExecutionEvidence.hasPlanStateEvidence
810
+ && !terminalExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
811
+ ? buildRunEvidenceAfterPlanInstruction(options.primaryTools)
812
+ : null;
797
813
  if (!emittedUnsafeStreamSideEffects
798
- && (terminalMissingPlanRecoveryInstruction || terminalDelegationOnlyRecoveryInstruction)) {
814
+ && (terminalMissingPlanRecoveryInstruction
815
+ || terminalPrematurePlanCloseRecoveryInstruction
816
+ || terminalDelegationOnlyRecoveryInstruction)) {
799
817
  earlyStreamRecoveryInstruction =
800
- terminalMissingPlanRecoveryInstruction ?? terminalDelegationOnlyRecoveryInstruction;
818
+ terminalMissingPlanRecoveryInstruction
819
+ ?? terminalPrematurePlanCloseRecoveryInstruction
820
+ ?? terminalDelegationOnlyRecoveryInstruction;
821
+ earlyStreamRecoverySuppressInitialPlan = terminalPrematurePlanCloseRecoveryInstruction !== null;
822
+ if (terminalPrematurePlanCloseRecoveryInstruction) {
823
+ earlyStreamExternalPlanEvidenceTools = resolveSingleConfiguredPlanEvidenceTool(options.primaryTools);
824
+ }
801
825
  break;
802
826
  }
803
827
  if (requiresPlanEvidence(options.binding) && hasCompletedPlanWithEvidence(terminalExecutionEvidence)) {
@@ -818,6 +842,7 @@ export async function* streamRuntimeExecution(options) {
818
842
  && !hasMissingDelegatedExecutionEvidence(terminalExecutionEvidence)
819
843
  && !hasMissingDelegatedFindings(terminalExecutionEvidence)
820
844
  && !terminalMissingPlanRecoveryInstruction
845
+ && !terminalPrematurePlanCloseRecoveryInstruction
821
846
  && !terminalDelegationOnlyRecoveryInstruction) {
822
847
  if (deferredStreamContent.length > 0) {
823
848
  yield* flushDeferredStreamContent();
@@ -929,6 +954,9 @@ export async function* streamRuntimeExecution(options) {
929
954
  const streamedRuntimeFailureRecoveryInstruction = projectionState.emittedOutput
930
955
  ? resolveStreamedRuntimeFailureRecoveryInstruction(projectionState.emittedOutput, streamedExecutionEvidence)
931
956
  : null;
957
+ const streamedToolErrorRecoveryInstruction = !emittedUnsafeStreamSideEffects
958
+ ? latestStreamToolErrorRecoveryInstruction(streamedToolResults)
959
+ : null;
932
960
  const missingPlanRecoveryInstruction = !hasUnresolvedExecution(streamedExecutionEvidence) && !delegatedExecutionRecoveryInstruction
933
961
  ? resolveMissingPlanRecoveryInstruction({
934
962
  request,
@@ -941,7 +969,8 @@ export async function* streamRuntimeExecution(options) {
941
969
  : null;
942
970
  const retryInstruction = !emittedUnsafeStreamSideEffects && sawRetrySafeInvalidToolSelectionError
943
971
  ? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION
944
- : delegatedExecutionRecoveryInstruction
972
+ : streamedToolErrorRecoveryInstruction
973
+ ?? delegatedExecutionRecoveryInstruction
945
974
  ?? streamedIncompletePlanRecoveryInstruction
946
975
  ?? streamedPrematurePlanCloseRecoveryInstruction
947
976
  ?? streamedRuntimeFailureRecoveryInstruction
@@ -39,7 +39,8 @@ function hasIncompleteStateSnapshotPlan(stateSnapshot) {
39
39
  function hasStateSnapshotPlan(stateSnapshot) {
40
40
  return typeof stateSnapshot === "object"
41
41
  && stateSnapshot !== null
42
- && Array.isArray(stateSnapshot.todos);
42
+ && Array.isArray(stateSnapshot.todos)
43
+ && (stateSnapshot.todos).length > 0;
43
44
  }
44
45
  function normalizePlanToolName(toolName) {
45
46
  return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
@@ -54,7 +55,19 @@ function isPlanToolName(toolName) {
54
55
  || normalized === "call_read_todos";
55
56
  }
56
57
  function hasPlanToolEvidence(executedToolResults) {
57
- return executedToolResults.some((item) => isPlanToolName(item.toolName));
58
+ return executedToolResults.some((item) => {
59
+ if (!isPlanToolName(item.toolName)) {
60
+ return false;
61
+ }
62
+ const output = typeof item.output === "object" && item.output !== null ? item.output : null;
63
+ const summaryContainer = typeof output?.summary === "object" && output.summary !== null
64
+ ? output.summary
65
+ : null;
66
+ const counts = typeof summaryContainer?.summary === "object" && summaryContainer.summary !== null
67
+ ? summaryContainer.summary
68
+ : null;
69
+ return !(typeof counts?.total === "number" && counts.total <= 0);
70
+ });
58
71
  }
59
72
  function hasExecutionToolEvidence(executedToolResults) {
60
73
  return executedToolResults.some((item) => item.isError !== true && !isPlanToolName(item.toolName));
@@ -13,15 +13,21 @@ const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already availab
13
13
  const DEFAULT_MAX_TOOL_ITERATIONS = 10_000;
14
14
  const MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS = 2;
15
15
  const MAX_REPEATED_PLAN_ONLY_AFTER_PLAN = 2;
16
- function prioritizeBootstrapEvidenceTools(primaryTools) {
16
+ const REQUIRED_PLAN_CONTRACT_MARKER = "This agent has a required visible planning contract.";
17
+ const INITIAL_WRITE_TODOS_MARKER = "Your first action for this request must be write_todos";
18
+ function resolveSingleBootstrapEvidenceTool(primaryTools) {
17
19
  const evidenceTools = primaryTools
18
20
  .map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
19
21
  .filter((name) => name.length > 0 && !isPlanToolName(name));
20
- return evidenceTools.slice(0, 4);
22
+ return evidenceTools.length === 1 ? evidenceTools[0] : undefined;
21
23
  }
22
24
  function createBootstrapTodoPlan(primaryTools) {
23
- const evidenceTool = prioritizeBootstrapEvidenceTools(primaryTools)[0];
24
- if (!evidenceTool) {
25
+ const evidenceTool = resolveSingleBootstrapEvidenceTool(primaryTools);
26
+ const evidenceToolCount = primaryTools
27
+ .map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
28
+ .filter((name) => name.length > 0 && !isPlanToolName(name))
29
+ .length;
30
+ if (evidenceToolCount === 0) {
25
31
  return [
26
32
  {
27
33
  content: "Establish the required visible plan for this request",
@@ -35,7 +41,9 @@ function createBootstrapTodoPlan(primaryTools) {
35
41
  }
36
42
  return [
37
43
  {
38
- content: `Run the configured non-planning evidence tool: ${evidenceTool}`,
44
+ content: evidenceTool
45
+ ? `Run the only configured non-planning evidence tool: ${evidenceTool}`
46
+ : "Select and run the appropriate non-planning evidence tool from the declared tool surface",
39
47
  status: "in_progress",
40
48
  },
41
49
  {
@@ -76,6 +84,15 @@ function buildExternalPlanEvidenceToolResult(tools) {
76
84
  }],
77
85
  };
78
86
  }
87
+ function stripSatisfiedInitialPlanInstruction(messages) {
88
+ return messages.filter((message) => {
89
+ const typed = typeof message === "object" && message !== null ? message : {};
90
+ if (typeof typed.content !== "string") {
91
+ return true;
92
+ }
93
+ return !(typed.content.includes(REQUIRED_PLAN_CONTRACT_MARKER) && typed.content.includes(INITIAL_WRITE_TODOS_MARKER));
94
+ });
95
+ }
79
96
  function readPlanStateSummary(output) {
80
97
  if (typeof output !== "object" || output === null) {
81
98
  return null;
@@ -91,7 +108,11 @@ function readPlanStateSummary(output) {
91
108
  return null;
92
109
  }
93
110
  const typedCounts = counts;
111
+ if (typeof typedCounts.total === "number" && typedCounts.total <= 0) {
112
+ return null;
113
+ }
94
114
  return {
115
+ ...(typeof typedCounts.total === "number" ? { total: typedCounts.total } : {}),
95
116
  pending: typeof typedCounts.pending === "number" ? typedCounts.pending : 0,
96
117
  inProgress: typeof typedCounts.inProgress === "number" ? typedCounts.inProgress : 0,
97
118
  };
@@ -217,7 +238,7 @@ function debugLocalToolReplay(input) {
217
238
  }
218
239
  console.error(JSON.stringify({
219
240
  type: "local-tool-replay",
220
- toolCallNames: input.toolCalls.map((toolCall) => toolCall.name),
241
+ toolCalls: input.toolCalls.map((toolCall) => ({ name: toolCall.name, args: toolCall.args })),
221
242
  resultMessages: summarizeResultMessages(input.result),
222
243
  executableToolNames: input.executableToolNames,
223
244
  builtinToolNames: input.builtinToolNames,
@@ -534,7 +555,9 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
534
555
  executedToolResults,
535
556
  };
536
557
  }
537
- currentMessages = nextMessages;
558
+ currentMessages = hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
559
+ ? stripSatisfiedInitialPlanInstruction(nextMessages)
560
+ : nextMessages;
538
561
  activeRequest = {
539
562
  ...activeRequest,
540
563
  messages: currentMessages,
@@ -6,7 +6,7 @@ import { ChatOpenAI } from "@langchain/openai";
6
6
  import { AIMessage } from "langchain";
7
7
  import { initChatModel } from "langchain";
8
8
  import { salvageToolArgs, tryParseJson } from "../../parsing/output-parsing.js";
9
- import { salvageJsonToolCalls } from "../../parsing/output-tool-args.js";
9
+ import { normalizeKnownToolArgs, salvageJsonToolCalls } from "../../parsing/output-tool-args.js";
10
10
  import { normalizeModelFacingToolSchema } from "../tool/resolved-tool.js";
11
11
  import { normalizeOpenAICompatibleInit } from "../compat/openai-compatible.js";
12
12
  import { recordPromptedJsonToolCall } from "./prompted-json-tool-call-capture.js";
@@ -640,7 +640,7 @@ function normalizeParsedToolCall(payload) {
640
640
  const args = Array.isArray(argsCandidate)
641
641
  ? { args: argsCandidate }
642
642
  : salvageToolArgs(argsCandidate) ?? {};
643
- return { name, args };
643
+ return { name, args: normalizeKnownToolArgs(name, args) };
644
644
  }
645
645
  function buildFallbackTodoContents() {
646
646
  return [
@@ -770,6 +770,8 @@ function withPromptedJsonToolPrompt(input, tools, options = {}) {
770
770
  ? [
771
771
  "Required evidence tool call:",
772
772
  "A todo board already exists. Your next action must be exactly one non-planning tool call chosen from the available tool descriptions and schemas.",
773
+ "If the current request or todo board explicitly names one available non-planning tool, call that named tool.",
774
+ "Do not substitute a neighboring, broader, narrower, or similarly named tool when an exact available tool name is present.",
773
775
  "Do not call write_todos or read_todos now.",
774
776
  "Do not write prose, markdown, analysis, or a plain-text plan.",
775
777
  ].join("\n")
@@ -5,7 +5,6 @@ export type BuiltinTodoSnapshot = {
5
5
  };
6
6
  export declare function truncateLines(lines: string[], maxChars?: number): string;
7
7
  export declare function summarizeBuiltinWriteTodosArgs(args: Record<string, unknown>): BuiltinTodoSnapshot;
8
- export declare function isLowSignalTodoContent(content: string): boolean;
9
8
  export declare function formatBuiltinTodoSnapshot(snapshot: BuiltinTodoSnapshot): string;
10
9
  export declare function buildRequestPlanState(input: {
11
10
  sessionId: string;
@@ -119,13 +119,6 @@ export function summarizeBuiltinWriteTodosArgs(args) {
119
119
  summary,
120
120
  };
121
121
  }
122
- export function isLowSignalTodoContent(content) {
123
- const normalized = content.trim().toLowerCase();
124
- if (!normalized) {
125
- return true;
126
- }
127
- return /^\d+$/.test(normalized) || /^step\s*\d+$/.test(normalized) || /^todo\s*\d+$/.test(normalized);
128
- }
129
122
  export function formatBuiltinTodoSnapshot(snapshot) {
130
123
  if (snapshot.summary.total === 0) {
131
124
  return "No todos tracked.";
@@ -1,6 +1,5 @@
1
1
  import { extractToolFallbackContext, extractVisibleOutput, readTextContent, sanitizeVisibleText } from "../parsing/output-parsing.js";
2
2
  import { salvageFunctionLikeToolCall } from "../parsing/output-tool-args.js";
3
- import { isLowSignalTodoContent, summarizeBuiltinWriteTodosArgs } from "./runtime-adapter-support.js";
4
3
  import { computeIncrementalOutput, extractInterruptPayload, extractReasoningStreamOutput, sanitizeRetainedUpstreamEvent, extractStateStreamOutput, extractTerminalStreamOutput, extractToolResult, extractVisibleStreamOutput, normalizeTerminalOutputKey, } from "../parsing/stream-event-parsing.js";
5
4
  import { resolveModelFacingToolName } from "./tool/tool-name-mapping.js";
6
5
  export function createStreamEventProjectionState() {
@@ -141,6 +140,9 @@ function readSummaryCounts(summary) {
141
140
  return null;
142
141
  }
143
142
  const typed = summary;
143
+ if (typeof typed.total === "number" && typed.total <= 0) {
144
+ return null;
145
+ }
144
146
  const hasAnyCountField = typeof typed.pending === "number"
145
147
  || typeof typed.inProgress === "number";
146
148
  if (!hasAnyCountField) {
@@ -155,6 +157,9 @@ function hasIncompleteTodosArray(value) {
155
157
  if (!Array.isArray(value)) {
156
158
  return null;
157
159
  }
160
+ if (value.length === 0) {
161
+ return null;
162
+ }
158
163
  return value.some((item) => {
159
164
  if (typeof item !== "object" || item === null) {
160
165
  return false;
@@ -394,12 +399,6 @@ function isPlanToolName(toolName) {
394
399
  || normalized === "call_write_todos"
395
400
  || normalized === "call_read_todos";
396
401
  }
397
- function isWriteTodosToolName(toolName) {
398
- const normalized = normalizePlanToolName(toolName);
399
- return normalized === "write_todos"
400
- || normalized === "tool_call_write_todos"
401
- || normalized === "call_write_todos";
402
- }
403
402
  function extractTodoToolStart(event) {
404
403
  if (typeof event !== "object" || event === null) {
405
404
  return null;
@@ -413,15 +412,6 @@ function extractTodoToolStart(event) {
413
412
  return null;
414
413
  }
415
414
  const input = unwrapPossibleToolInput(typed.data?.input);
416
- if (isWriteTodosToolName(toolName) && typeof input === "object" && input !== null && !Array.isArray(input)) {
417
- const summary = summarizeBuiltinWriteTodosArgs(input);
418
- if (summary.summary.total === 0) {
419
- throw new Error("Error invoking tool 'write_todos' with kwargs {\"todos\":[]} with error: Error: Initial write_todos call cannot use an empty todo list. Send the concrete task steps with both content and status.");
420
- }
421
- if (summary.items.every((item) => isLowSignalTodoContent(item.content))) {
422
- throw new Error("Error invoking tool 'write_todos' with placeholder todo content with error: Error: Initial write_todos call must use descriptive task content. Do not use placeholder entries like '1', '2', or 'step 1'.");
423
- }
424
- }
425
415
  return { toolName, input };
426
416
  }
427
417
  export function projectRuntimeStreamEvent(params) {
@@ -2,7 +2,7 @@ import path from "node:path";
2
2
  import { z } from "zod";
3
3
  import { isSandboxBackend } from "deepagents";
4
4
  import { isRecord } from "../../../utils/object.js";
5
- import { formatBuiltinTodoSnapshot, isLowSignalTodoContent, summarizeBuiltinWriteTodosArgs, truncateLines } from "../runtime-adapter-support.js";
5
+ import { formatBuiltinTodoSnapshot, summarizeBuiltinWriteTodosArgs, truncateLines } from "../runtime-adapter-support.js";
6
6
  import { maybePersistLargeToolOutput, resolveToolRuntimeContext } from "./tool-output-artifacts.js";
7
7
  function buildTaskToolDescription(subagents) {
8
8
  const lines = [
@@ -272,14 +272,6 @@ export async function createBuiltinMiddlewareTools(backend, options) {
272
272
  invoke: async (input) => {
273
273
  const args = isRecord(input) ? input : {};
274
274
  const summary = summarizeBuiltinWriteTodosArgs(args);
275
- if (summary.summary.total === 0 && todoSnapshot.summary.total === 0) {
276
- throw new Error("Initial write_todos call cannot use an empty todo list. Send the concrete task steps with both content and status.");
277
- }
278
- if (summary.summary.total > 0 &&
279
- todoSnapshot.summary.total === 0 &&
280
- summary.items.every((item) => isLowSignalTodoContent(item.content))) {
281
- throw new Error("Initial write_todos call must use descriptive task content. Do not use placeholder entries like '1', '2', or 'step 1'.");
282
- }
283
275
  todoSnapshot = summary;
284
276
  return {
285
277
  ok: true,
@@ -1,5 +1,5 @@
1
1
  import { salvageToolArgs } from "../../parsing/output-parsing.js";
2
- import { salvageJsonToolCalls } from "../../parsing/output-tool-args.js";
2
+ import { normalizeKnownToolArgs, salvageJsonToolCalls, salvageResultLabeledToolCall } from "../../parsing/output-tool-args.js";
3
3
  import { isRecord } from "../../../utils/object.js";
4
4
  import { extractExplicitResourceReferences, hasExplicitResourceReference } from "../../harness/system/runtime-memory-policy.js";
5
5
  import { readCapturedPromptedJsonToolCalls } from "../model/prompted-json-tool-call-capture.js";
@@ -175,6 +175,29 @@ function mapDelimitedListLikeArgs(args) {
175
175
  }
176
176
  return next;
177
177
  }
178
+ function dropDelimitedScalarPathArgs(args, shape) {
179
+ let next = args;
180
+ for (const [key, schemaPart] of Object.entries(shape)) {
181
+ const value = next[key];
182
+ if (typeof value !== "string") {
183
+ continue;
184
+ }
185
+ const normalizedKey = key.trim().toLowerCase();
186
+ if (!/(?:^path$|path$|^filepath$|^targetpath$)/u.test(normalizedKey)) {
187
+ continue;
188
+ }
189
+ if (schemaPartExpectsArray(schemaPart)) {
190
+ continue;
191
+ }
192
+ const raw = value.trim();
193
+ if (!/[,;\n]/u.test(raw)) {
194
+ continue;
195
+ }
196
+ const { [key]: _dropped, ...rest } = next;
197
+ next = rest;
198
+ }
199
+ return next;
200
+ }
178
201
  export function normalizeToolArgsForSchema(args, schema, rawArgsInput, options = {}) {
179
202
  const schemaDef = isObject(schema) ? schema._def : undefined;
180
203
  const zodShape = schemaDef
@@ -191,7 +214,7 @@ export function normalizeToolArgsForSchema(args, schema, rawArgsInput, options =
191
214
  if (!shape || !isRecord(shape)) {
192
215
  return mapDelimitedListLikeArgs(args);
193
216
  }
194
- const aliasMappedArgs = mapStringArrayFields(mapCommonArgumentAliases(args, shape), shape);
217
+ const aliasMappedArgs = dropDelimitedScalarPathArgs(mapStringArrayFields(mapCommonArgumentAliases(args, shape), shape), shape);
195
218
  const keys = Object.keys(shape);
196
219
  if (keys.length !== 1) {
197
220
  return fillLatestUserInputForQueryLikeFields(aliasMappedArgs, shape, options.latestUserInput);
@@ -270,7 +293,7 @@ export function extractToolCallsFromResult(result) {
270
293
  if (id && answeredToolCallIds.has(id)) {
271
294
  return null;
272
295
  }
273
- return { id, name, args: rawArgs, rawArgsInput };
296
+ return { id, name, args: normalizeKnownToolArgs(name, rawArgs), rawArgsInput };
274
297
  })
275
298
  .filter((item) => item !== null);
276
299
  if (extracted.length > 0) {
@@ -296,6 +319,15 @@ export function extractToolCallsFromResult(result) {
296
319
  if (!content.trim()) {
297
320
  continue;
298
321
  }
322
+ const resultLabeledToolCall = salvageResultLabeledToolCall(content);
323
+ if (resultLabeledToolCall) {
324
+ return [{
325
+ id: "salvaged-result-label-1",
326
+ name: resultLabeledToolCall.name,
327
+ args: resultLabeledToolCall.args,
328
+ rawArgsInput: content,
329
+ }];
330
+ }
299
331
  const salvaged = salvageJsonToolCalls(content);
300
332
  if (salvaged.length > 0) {
301
333
  return salvaged.map((toolCall, salvageIndex) => ({
@@ -247,6 +247,32 @@ function hasDelegatedPlanEvidence(result) {
247
247
  return Array.isArray(toolResults)
248
248
  && toolResults.some((item) => isPlanToolName(item.toolName));
249
249
  }
250
+ function hasIncompleteDelegatedTodos(value) {
251
+ if (Array.isArray(value)) {
252
+ return value.some((item) => hasIncompleteDelegatedTodos(item));
253
+ }
254
+ if (typeof value !== "object" || value === null) {
255
+ return false;
256
+ }
257
+ const record = value;
258
+ const status = typeof record.status === "string" ? record.status.trim().toLowerCase() : "";
259
+ if (status === "pending" || status === "in_progress") {
260
+ return true;
261
+ }
262
+ return hasIncompleteDelegatedTodos(record.todos)
263
+ || hasIncompleteDelegatedTodos(record.update)
264
+ || hasIncompleteDelegatedTodos(record.stateSnapshot)
265
+ || hasIncompleteDelegatedTodos(record.metadata);
266
+ }
267
+ function hasIncompleteDelegatedPlanState(result) {
268
+ const toolResults = result?.metadata?.executedToolResults;
269
+ return Array.isArray(toolResults)
270
+ && toolResults.some((item) => isPlanToolName(item.toolName) && hasIncompleteDelegatedTodos(item.output));
271
+ }
272
+ function needsDelegatedPlanRecovery(binding, result) {
273
+ return binding?.harnessRuntime.executionContract?.requiresPlan === true
274
+ && (!hasDelegatedPlanEvidence(result) || hasIncompleteDelegatedPlanState(result));
275
+ }
250
276
  function readUpstreamToolEvidence(event) {
251
277
  if (typeof event !== "object" || event === null) {
252
278
  return null;
@@ -839,6 +865,28 @@ export class AgentRuntimeAdapter {
839
865
  const inlineSubagents = input.resolvedSubagents.filter((subagent) => !("graphId" in subagent));
840
866
  const asyncSubagents = input.resolvedSubagents.filter((subagent) => "graphId" in subagent);
841
867
  const subagents = inlineSubagents;
868
+ const subagentDefaultMiddleware = [
869
+ ...(builtinTools.todos === false ? [] : [todoListMiddleware()]),
870
+ ...(builtinTools.filesystem === false ? [] : [createFilesystemMiddleware({ backend })]),
871
+ createSummarizationMiddleware({
872
+ model: input.resolvedModel,
873
+ backend,
874
+ }),
875
+ createPatchToolCallsMiddleware(),
876
+ ];
877
+ const generalPurposeMiddleware = [
878
+ ...subagentDefaultMiddleware,
879
+ ...(input.resolvedSkills.length > 0 ? [createSkillsMiddleware({
880
+ backend,
881
+ sources: resolveDeepAgentSkillSourceRootPaths({
882
+ workspaceRoot: binding.harnessRuntime.workspaceRoot,
883
+ runtimeRoot: binding.harnessRuntime.runtimeRoot,
884
+ ownerId: binding.agent.id,
885
+ skillPaths: input.resolvedSkills,
886
+ }) ?? input.resolvedSkills,
887
+ })] : []),
888
+ ];
889
+ const hasGeneralPurposeOverride = subagents.some((subagent) => subagent.name === "general-purpose");
842
890
  const middleware = [
843
891
  ...(builtinTools.todos === false ? [] : [todoListMiddleware()]),
844
892
  ...(input.resolvedSkills.length > 0 ? [createSkillsMiddleware({
@@ -851,15 +899,15 @@ export class AgentRuntimeAdapter {
851
899
  }) ?? input.resolvedSkills,
852
900
  })] : []),
853
901
  ...(builtinTools.filesystem === false ? [] : [createFilesystemMiddleware({ backend })]),
854
- ...(subagents.length > 0
855
- ? [createSubAgentMiddleware({
856
- defaultModel: input.resolvedModel,
857
- defaultTools: input.resolvedTools,
858
- defaultInterruptOn: input.resolvedInterruptOn,
859
- subagents: subagents,
860
- generalPurposeAgent: false,
861
- })]
862
- : []),
902
+ createSubAgentMiddleware({
903
+ defaultModel: input.resolvedModel,
904
+ defaultTools: input.resolvedTools,
905
+ defaultMiddleware: subagentDefaultMiddleware,
906
+ generalPurposeMiddleware: generalPurposeMiddleware,
907
+ defaultInterruptOn: input.resolvedInterruptOn,
908
+ subagents: subagents,
909
+ generalPurposeAgent: !hasGeneralPurposeOverride,
910
+ }),
863
911
  createSummarizationMiddleware({
864
912
  model: input.resolvedModel,
865
913
  backend,
@@ -1195,8 +1243,7 @@ export class AgentRuntimeAdapter {
1195
1243
  };
1196
1244
  }
1197
1245
  }
1198
- if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
1199
- && !hasDelegatedPlanEvidence(delegatedResult)) {
1246
+ if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1200
1247
  try {
1201
1248
  delegatedResult = await runDelegatedRequest([requestText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry");
1202
1249
  }
@@ -1214,8 +1261,7 @@ export class AgentRuntimeAdapter {
1214
1261
  };
1215
1262
  }
1216
1263
  }
1217
- if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
1218
- && !hasDelegatedPlanEvidence(delegatedResult)) {
1264
+ if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1219
1265
  const output = buildDelegatedPlanEvidenceBlocker(selectedBinding.agent.id);
1220
1266
  return {
1221
1267
  toolOutput: output,
@@ -1606,12 +1652,16 @@ export class AgentRuntimeAdapter {
1606
1652
  agentId: selectedBinding?.agent.id ?? planned.subagentType,
1607
1653
  };
1608
1654
  let delegatedResult = yield* runPlannedDelegation(planned.subagentType, delegatedText);
1609
- if (selectedBinding?.harnessRuntime.executionContract?.requiresPlan === true && !hasDelegatedPlanEvidence(delegatedResult)) {
1655
+ if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1610
1656
  const previousDelegatedResult = delegatedResult;
1611
1657
  delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [delegatedText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry"), previousDelegatedResult);
1612
1658
  }
1613
- if (selectedBinding?.harnessRuntime.executionContract?.requiresPlan === true && !hasDelegatedPlanEvidence(delegatedResult)) {
1614
- const output = buildDelegatedPlanEvidenceBlocker(selectedBinding.agent.id);
1659
+ if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1660
+ const previousDelegatedResult = delegatedResult;
1661
+ delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [delegatedText, DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-final-retry"), previousDelegatedResult);
1662
+ }
1663
+ if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1664
+ const output = buildDelegatedPlanEvidenceBlocker(selectedBinding?.agent.id ?? planned.subagentType);
1615
1665
  delegatedResult = {
1616
1666
  ...delegatedResult,
1617
1667
  state: "failed",
@@ -1803,18 +1853,15 @@ export class AgentRuntimeAdapter {
1803
1853
  originalRequest: requestText,
1804
1854
  });
1805
1855
  let delegatedResult = yield* runDelegatedStreamAttempt(delegatedText);
1806
- if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
1807
- && !hasDelegatedPlanEvidence(delegatedResult)) {
1856
+ if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1808
1857
  const previousDelegatedResult = delegatedResult;
1809
1858
  delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([delegatedText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry"), previousDelegatedResult);
1810
1859
  }
1811
- if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
1812
- && !hasDelegatedPlanEvidence(delegatedResult)) {
1860
+ if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1813
1861
  const previousDelegatedResult = delegatedResult;
1814
1862
  delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([delegatedText, DELEGATED_PLAN_EVIDENCE_FINAL_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-final-retry"), previousDelegatedResult);
1815
1863
  }
1816
- if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
1817
- && !hasDelegatedPlanEvidence(delegatedResult)) {
1864
+ if (needsDelegatedPlanRecovery(selectedBinding, delegatedResult)) {
1818
1865
  const output = buildDelegatedPlanEvidenceBlocker(selectedBinding.agent.id);
1819
1866
  delegatedResult = {
1820
1867
  ...delegatedResult,
@@ -59,7 +59,8 @@ export function isToolCallValidationFailure(error) {
59
59
  if (/Invalid input:\s*expected .* received undefined/i.test(message) && /"path"\s*:\s*\[/.test(message)) {
60
60
  return true;
61
61
  }
62
- return /Received tool input did not match expected schema/i.test(message) && /(?:→\s*at|at)\s+[\w[\].]+/i.test(message);
62
+ return /Received tool input did not match expected schema/i.test(message)
63
+ && (/(?:→\s*at|at)\s+[\w[\].]+/i.test(message) || /\bDetails:/i.test(message));
63
64
  }
64
65
  function collectErrorMessages(error) {
65
66
  if (!error)
@@ -7,6 +7,10 @@ export declare function salvageLabeledToolCall(value: unknown): {
7
7
  name: string;
8
8
  args: Record<string, unknown>;
9
9
  } | null;
10
+ export declare function salvageResultLabeledToolCall(value: unknown): {
11
+ name: string;
12
+ args: Record<string, unknown>;
13
+ } | null;
10
14
  export declare function salvageToolArgs(value: unknown): Record<string, unknown> | null;
11
15
  export declare function salvageJsonToolCalls(value: unknown): Array<{
12
16
  name: string;
@@ -179,6 +179,22 @@ export function salvageLabeledToolCall(value) {
179
179
  }
180
180
  return null;
181
181
  }
182
+ export function salvageResultLabeledToolCall(value) {
183
+ if (typeof value !== "string") {
184
+ return null;
185
+ }
186
+ const lines = value
187
+ .split("\n")
188
+ .map((line) => line.trim())
189
+ .filter(Boolean);
190
+ const label = lines[0]?.replace(/[*`#]/gu, "").trim() ?? "";
191
+ const match = /^([A-Za-z_][A-Za-z0-9_]*)\s+result\b/iu.exec(label);
192
+ if (!match || !isToolName(match[1])) {
193
+ return null;
194
+ }
195
+ const args = salvageToolArgs(lines.slice(1).join("\n")) ?? {};
196
+ return { name: match[1], args: normalizeKnownToolArgs(match[1], args) };
197
+ }
182
198
  function extractBalancedJsonValue(value, openChar, closeChar) {
183
199
  const start = value.indexOf(openChar);
184
200
  if (start < 0)
@@ -514,12 +530,26 @@ function normalizeWriteTodosArgs(args) {
514
530
  if (Array.isArray(args.items) && !Array.isArray(args.todos)) {
515
531
  return normalizeWriteTodosArgs({ ...args, todos: args.items });
516
532
  }
533
+ if (Array.isArray(args.tasks) && !Array.isArray(args.todos)) {
534
+ return normalizeWriteTodosArgs({ ...args, todos: args.tasks });
535
+ }
536
+ if (Array.isArray(args.todo) && !Array.isArray(args.todos)) {
537
+ return normalizeWriteTodosArgs({ ...args, todos: args.todo });
538
+ }
517
539
  if (!Array.isArray(args.todos)) {
518
540
  return args;
519
541
  }
542
+ const { items: _items, tasks: _tasks, todo: _todo, ...rest } = args;
520
543
  return {
521
- ...args,
544
+ ...rest,
522
545
  todos: args.todos.map((todo, index) => {
546
+ if (typeof todo === "string") {
547
+ const content = todo.trim();
548
+ return {
549
+ content: content.length > 0 ? content : `Step ${index + 1}`,
550
+ status: index === 0 ? "in_progress" : "pending",
551
+ };
552
+ }
523
553
  if (typeof todo !== "object" || !todo || Array.isArray(todo)) {
524
554
  return todo;
525
555
  }
@@ -534,15 +564,40 @@ function normalizeWriteTodosArgs(args) {
534
564
  ? record.name
535
565
  : typeof record.text === "string" && record.text.trim().length > 0
536
566
  ? record.text
537
- : `Step ${index + 1}`;
567
+ : typeof record.task === "string" && record.task.trim().length > 0
568
+ ? record.task
569
+ : typeof record.action === "string" && record.action.trim().length > 0
570
+ ? record.action
571
+ : typeof record.step === "string" && record.step.trim().length > 0
572
+ ? record.step
573
+ : `Step ${index + 1}`;
538
574
  const normalized = {};
539
575
  if (content !== undefined)
540
576
  normalized.content = content;
541
- normalized.status = typeof record.status === "string" && record.status.trim().length > 0 ? record.status : "pending";
577
+ normalized.status = normalizeWriteTodoStatus(record.status);
542
578
  return Object.keys(normalized).length > 0 ? normalized : todo;
543
579
  }),
544
580
  };
545
581
  }
582
+ function normalizeWriteTodoStatus(value) {
583
+ if (typeof value !== "string") {
584
+ return "pending";
585
+ }
586
+ const normalized = value.trim().toLowerCase().replace(/[\s-]+/gu, "_");
587
+ if (!normalized) {
588
+ return "pending";
589
+ }
590
+ if (normalized === "not_started" || normalized === "todo" || normalized === "open") {
591
+ return "pending";
592
+ }
593
+ if (normalized === "started" || normalized === "active" || normalized === "in_progress") {
594
+ return "in_progress";
595
+ }
596
+ if (normalized === "done" || normalized === "complete" || normalized === "completed") {
597
+ return "completed";
598
+ }
599
+ return normalized;
600
+ }
546
601
  function normalizeTaskArgs(args) {
547
602
  const description = typeof args.description === "string" && args.description.trim().length > 0
548
603
  ? args.description
@@ -1,4 +1,3 @@
1
- import { isLowSignalTodoContent } from "../adapter/runtime-adapter-support.js";
2
1
  import { extractReasoningText, extractVisibleOutput, hasToolCalls, readTextContent } from "./output-parsing.js";
3
2
  const MAX_STREAM_INLINE_TEXT_CHARS = 12_000;
4
3
  const STREAM_PREVIEW_TEXT_CHARS = 2_000;
@@ -205,31 +204,6 @@ function isErrorLikeToolOutput(value) {
205
204
  || /^command failed:/i.test(firstNonEmptyLine)
206
205
  || /^stderr:/i.test(firstNonEmptyLine);
207
206
  }
208
- function isEmptyInitialWriteTodosResult(value) {
209
- if (typeof value !== "object" || !value || Array.isArray(value)) {
210
- return false;
211
- }
212
- const typed = value;
213
- if (typeof typed.summary === "object" && typed.summary && Array.isArray(typed.summary.items)) {
214
- return (typed.summary.items).length === 0;
215
- }
216
- if (typeof typed.update === "object" && typed.update && Array.isArray(typed.update.todos)) {
217
- return (typed.update.todos).length === 0;
218
- }
219
- return false;
220
- }
221
- function hasLowSignalInitialWriteTodos(value) {
222
- if (typeof value !== "object" || !value || Array.isArray(value)) {
223
- return false;
224
- }
225
- const typed = value;
226
- const items = typeof typed.summary === "object" && typed.summary && Array.isArray(typed.summary.items)
227
- ? (typed.summary.items ?? [])
228
- : typeof typed.update === "object" && typed.update && Array.isArray(typed.update.todos)
229
- ? (typed.update.todos ?? [])
230
- : [];
231
- return items.length > 0 && items.every((item) => typeof item?.content === "string" && isLowSignalTodoContent(item.content));
232
- }
233
207
  function isToolMessageLike(value) {
234
208
  if (typeof value !== "object" || value === null) {
235
209
  return false;
@@ -326,12 +300,6 @@ export function extractToolResult(event) {
326
300
  const normalizedOutput = typeof rawOutput === "string"
327
301
  ? parseMaybeJson(rawOutput)
328
302
  : unwrapToolMessageOutput(rawOutput);
329
- if (!isToolError && toolName === "write_todos" && isEmptyInitialWriteTodosResult(normalizedOutput)) {
330
- throw new Error("Error invoking tool 'write_todos' with kwargs {\"todos\":[]} with error: Error: Initial write_todos call cannot use an empty todo list. Send the concrete task steps with both content and status.");
331
- }
332
- if (!isToolError && toolName === "write_todos" && hasLowSignalInitialWriteTodos(normalizedOutput)) {
333
- throw new Error("Error invoking tool 'write_todos' with placeholder todo content with error: Error: Initial write_todos call must use descriptive task content. Do not use placeholder entries like '1', '2', or 'step 1'.");
334
- }
335
303
  return {
336
304
  toolName,
337
305
  output: sanitizeStreamPayload(normalizedOutput),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botbotgo/agent-harness",
3
- "version": "0.0.462",
3
+ "version": "0.0.464",
4
4
  "description": "Workspace runtime for multi-agent applications",
5
5
  "license": "MIT",
6
6
  "type": "module",