@botbotgo/agent-harness 0.0.463 → 0.0.464

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.463";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.464";
2
2
  export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
@@ -1,2 +1,2 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.463";
1
+ export const AGENT_HARNESS_VERSION = "0.0.464";
2
2
  export const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
@@ -1 +1 @@
1
- When calling write_todos, every todo item must include both content and status. Do not send status-only updates. Retry by resending the full todo entry with the original content preserved.
1
+ When calling write_todos, every todo item must include both content and status. Use only these status values: pending, in_progress, completed. Do not send aliases such as not_started, open, active, done, blocked, failed, or cancelled. Do not send status-only updates. Retry by resending the full todo entry with the original content preserved.
@@ -1,5 +1,5 @@
1
1
  import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, appendToolRecoveryInstruction, resolveMissingPlanRecoveryInstruction, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
2
- import { DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION, DELEGATION_ONLY_RECOVERY_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
2
+ import { DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION, DELEGATION_ONLY_RECOVERY_INSTRUCTION, REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
3
3
  import { buildInvocationRequest } from "../model/invocation-request.js";
4
4
  import { PROMPTED_JSON_TOOL_POLICY_KEY, withPromptedJsonToolPolicy } from "../model/prompted-json-tool-policy.js";
5
5
  import { buildRawModelMessages } from "../model/message-assembly.js";
@@ -257,6 +257,14 @@ function buildDeterministicFinalFromStreamToolEvidence(executedToolResults) {
257
257
  evidence.length > 0 ? evidence.join("\n\n") : "(no non-planning tool evidence captured)",
258
258
  ].join("\n");
259
259
  }
260
+ function latestStreamToolErrorRecoveryInstruction(executedToolResults) {
261
+ const latest = [...executedToolResults].reverse().find((item) => item.isError === true);
262
+ if (!latest) {
263
+ return null;
264
+ }
265
+ const message = typeof latest.output === "string" ? latest.output : JSON.stringify(latest.output);
266
+ return resolveToolCallRecoveryInstruction(new Error(message)) ?? REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION;
267
+ }
260
268
  function hasUsefulVisibleSynthesis(value) {
261
269
  const trimmed = value.trim();
262
270
  if (trimmed.length < 80) {
@@ -699,6 +707,7 @@ export async function* streamRuntimeExecution(options) {
699
707
  const eventContainsNonRetrySafeChunk = projectedChunks.some((chunk) => chunk.kind !== "upstream-event"
700
708
  && chunk.kind !== "content"
701
709
  && !(chunk.kind === "tool-result" && isPlanToolName(chunk.toolName))
710
+ && !(chunk.kind === "tool-result" && chunk.isError === true)
702
711
  && !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
703
712
  const hadPriorPlanToolResult = completedPlanToolResultCount > 0;
704
713
  const repeatedPlanToolResultBeforeEvidence = requiresPlanEvidence(options.binding)
@@ -796,10 +805,23 @@ export async function* streamRuntimeExecution(options) {
796
805
  })
797
806
  : null;
798
807
  const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
808
+ const terminalPrematurePlanCloseRecoveryInstruction = requiresPlanEvidence(options.binding)
809
+ && terminalExecutionEvidence.hasPlanStateEvidence
810
+ && !terminalExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
811
+ ? buildRunEvidenceAfterPlanInstruction(options.primaryTools)
812
+ : null;
799
813
  if (!emittedUnsafeStreamSideEffects
800
- && (terminalMissingPlanRecoveryInstruction || terminalDelegationOnlyRecoveryInstruction)) {
814
+ && (terminalMissingPlanRecoveryInstruction
815
+ || terminalPrematurePlanCloseRecoveryInstruction
816
+ || terminalDelegationOnlyRecoveryInstruction)) {
801
817
  earlyStreamRecoveryInstruction =
802
- terminalMissingPlanRecoveryInstruction ?? terminalDelegationOnlyRecoveryInstruction;
818
+ terminalMissingPlanRecoveryInstruction
819
+ ?? terminalPrematurePlanCloseRecoveryInstruction
820
+ ?? terminalDelegationOnlyRecoveryInstruction;
821
+ earlyStreamRecoverySuppressInitialPlan = terminalPrematurePlanCloseRecoveryInstruction !== null;
822
+ if (terminalPrematurePlanCloseRecoveryInstruction) {
823
+ earlyStreamExternalPlanEvidenceTools = resolveSingleConfiguredPlanEvidenceTool(options.primaryTools);
824
+ }
803
825
  break;
804
826
  }
805
827
  if (requiresPlanEvidence(options.binding) && hasCompletedPlanWithEvidence(terminalExecutionEvidence)) {
@@ -820,6 +842,7 @@ export async function* streamRuntimeExecution(options) {
820
842
  && !hasMissingDelegatedExecutionEvidence(terminalExecutionEvidence)
821
843
  && !hasMissingDelegatedFindings(terminalExecutionEvidence)
822
844
  && !terminalMissingPlanRecoveryInstruction
845
+ && !terminalPrematurePlanCloseRecoveryInstruction
823
846
  && !terminalDelegationOnlyRecoveryInstruction) {
824
847
  if (deferredStreamContent.length > 0) {
825
848
  yield* flushDeferredStreamContent();
@@ -931,6 +954,9 @@ export async function* streamRuntimeExecution(options) {
931
954
  const streamedRuntimeFailureRecoveryInstruction = projectionState.emittedOutput
932
955
  ? resolveStreamedRuntimeFailureRecoveryInstruction(projectionState.emittedOutput, streamedExecutionEvidence)
933
956
  : null;
957
+ const streamedToolErrorRecoveryInstruction = !emittedUnsafeStreamSideEffects
958
+ ? latestStreamToolErrorRecoveryInstruction(streamedToolResults)
959
+ : null;
934
960
  const missingPlanRecoveryInstruction = !hasUnresolvedExecution(streamedExecutionEvidence) && !delegatedExecutionRecoveryInstruction
935
961
  ? resolveMissingPlanRecoveryInstruction({
936
962
  request,
@@ -943,7 +969,8 @@ export async function* streamRuntimeExecution(options) {
943
969
  : null;
944
970
  const retryInstruction = !emittedUnsafeStreamSideEffects && sawRetrySafeInvalidToolSelectionError
945
971
  ? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION
946
- : delegatedExecutionRecoveryInstruction
972
+ : streamedToolErrorRecoveryInstruction
973
+ ?? delegatedExecutionRecoveryInstruction
947
974
  ?? streamedIncompletePlanRecoveryInstruction
948
975
  ?? streamedPrematurePlanCloseRecoveryInstruction
949
976
  ?? streamedRuntimeFailureRecoveryInstruction
@@ -39,7 +39,8 @@ function hasIncompleteStateSnapshotPlan(stateSnapshot) {
39
39
  function hasStateSnapshotPlan(stateSnapshot) {
40
40
  return typeof stateSnapshot === "object"
41
41
  && stateSnapshot !== null
42
- && Array.isArray(stateSnapshot.todos);
42
+ && Array.isArray(stateSnapshot.todos)
43
+ && (stateSnapshot.todos).length > 0;
43
44
  }
44
45
  function normalizePlanToolName(toolName) {
45
46
  return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
@@ -54,7 +55,19 @@ function isPlanToolName(toolName) {
54
55
  || normalized === "call_read_todos";
55
56
  }
56
57
  function hasPlanToolEvidence(executedToolResults) {
57
- return executedToolResults.some((item) => isPlanToolName(item.toolName));
58
+ return executedToolResults.some((item) => {
59
+ if (!isPlanToolName(item.toolName)) {
60
+ return false;
61
+ }
62
+ const output = typeof item.output === "object" && item.output !== null ? item.output : null;
63
+ const summaryContainer = typeof output?.summary === "object" && output.summary !== null
64
+ ? output.summary
65
+ : null;
66
+ const counts = typeof summaryContainer?.summary === "object" && summaryContainer.summary !== null
67
+ ? summaryContainer.summary
68
+ : null;
69
+ return !(typeof counts?.total === "number" && counts.total <= 0);
70
+ });
58
71
  }
59
72
  function hasExecutionToolEvidence(executedToolResults) {
60
73
  return executedToolResults.some((item) => item.isError !== true && !isPlanToolName(item.toolName));
@@ -108,7 +108,11 @@ function readPlanStateSummary(output) {
108
108
  return null;
109
109
  }
110
110
  const typedCounts = counts;
111
+ if (typeof typedCounts.total === "number" && typedCounts.total <= 0) {
112
+ return null;
113
+ }
111
114
  return {
115
+ ...(typeof typedCounts.total === "number" ? { total: typedCounts.total } : {}),
112
116
  pending: typeof typedCounts.pending === "number" ? typedCounts.pending : 0,
113
117
  inProgress: typeof typedCounts.inProgress === "number" ? typedCounts.inProgress : 0,
114
118
  };
@@ -5,7 +5,6 @@ export type BuiltinTodoSnapshot = {
5
5
  };
6
6
  export declare function truncateLines(lines: string[], maxChars?: number): string;
7
7
  export declare function summarizeBuiltinWriteTodosArgs(args: Record<string, unknown>): BuiltinTodoSnapshot;
8
- export declare function isLowSignalTodoContent(content: string): boolean;
9
8
  export declare function formatBuiltinTodoSnapshot(snapshot: BuiltinTodoSnapshot): string;
10
9
  export declare function buildRequestPlanState(input: {
11
10
  sessionId: string;
@@ -119,13 +119,6 @@ export function summarizeBuiltinWriteTodosArgs(args) {
119
119
  summary,
120
120
  };
121
121
  }
122
- export function isLowSignalTodoContent(content) {
123
- const normalized = content.trim().toLowerCase();
124
- if (!normalized) {
125
- return true;
126
- }
127
- return /^\d+$/.test(normalized) || /^step\s*\d+$/.test(normalized) || /^todo\s*\d+$/.test(normalized);
128
- }
129
122
  export function formatBuiltinTodoSnapshot(snapshot) {
130
123
  if (snapshot.summary.total === 0) {
131
124
  return "No todos tracked.";
@@ -1,6 +1,5 @@
1
1
  import { extractToolFallbackContext, extractVisibleOutput, readTextContent, sanitizeVisibleText } from "../parsing/output-parsing.js";
2
2
  import { salvageFunctionLikeToolCall } from "../parsing/output-tool-args.js";
3
- import { isLowSignalTodoContent, summarizeBuiltinWriteTodosArgs } from "./runtime-adapter-support.js";
4
3
  import { computeIncrementalOutput, extractInterruptPayload, extractReasoningStreamOutput, sanitizeRetainedUpstreamEvent, extractStateStreamOutput, extractTerminalStreamOutput, extractToolResult, extractVisibleStreamOutput, normalizeTerminalOutputKey, } from "../parsing/stream-event-parsing.js";
5
4
  import { resolveModelFacingToolName } from "./tool/tool-name-mapping.js";
6
5
  export function createStreamEventProjectionState() {
@@ -141,6 +140,9 @@ function readSummaryCounts(summary) {
141
140
  return null;
142
141
  }
143
142
  const typed = summary;
143
+ if (typeof typed.total === "number" && typed.total <= 0) {
144
+ return null;
145
+ }
144
146
  const hasAnyCountField = typeof typed.pending === "number"
145
147
  || typeof typed.inProgress === "number";
146
148
  if (!hasAnyCountField) {
@@ -155,6 +157,9 @@ function hasIncompleteTodosArray(value) {
155
157
  if (!Array.isArray(value)) {
156
158
  return null;
157
159
  }
160
+ if (value.length === 0) {
161
+ return null;
162
+ }
158
163
  return value.some((item) => {
159
164
  if (typeof item !== "object" || item === null) {
160
165
  return false;
@@ -394,12 +399,6 @@ function isPlanToolName(toolName) {
394
399
  || normalized === "call_write_todos"
395
400
  || normalized === "call_read_todos";
396
401
  }
397
- function isWriteTodosToolName(toolName) {
398
- const normalized = normalizePlanToolName(toolName);
399
- return normalized === "write_todos"
400
- || normalized === "tool_call_write_todos"
401
- || normalized === "call_write_todos";
402
- }
403
402
  function extractTodoToolStart(event) {
404
403
  if (typeof event !== "object" || event === null) {
405
404
  return null;
@@ -413,15 +412,6 @@ function extractTodoToolStart(event) {
413
412
  return null;
414
413
  }
415
414
  const input = unwrapPossibleToolInput(typed.data?.input);
416
- if (isWriteTodosToolName(toolName) && typeof input === "object" && input !== null && !Array.isArray(input)) {
417
- const summary = summarizeBuiltinWriteTodosArgs(input);
418
- if (summary.summary.total === 0) {
419
- throw new Error("Error invoking tool 'write_todos' with kwargs {\"todos\":[]} with error: Error: Initial write_todos call cannot use an empty todo list. Send the concrete task steps with both content and status.");
420
- }
421
- if (summary.items.every((item) => isLowSignalTodoContent(item.content))) {
422
- throw new Error("Error invoking tool 'write_todos' with placeholder todo content with error: Error: Initial write_todos call must use descriptive task content. Do not use placeholder entries like '1', '2', or 'step 1'.");
423
- }
424
- }
425
415
  return { toolName, input };
426
416
  }
427
417
  export function projectRuntimeStreamEvent(params) {
@@ -2,7 +2,7 @@ import path from "node:path";
2
2
  import { z } from "zod";
3
3
  import { isSandboxBackend } from "deepagents";
4
4
  import { isRecord } from "../../../utils/object.js";
5
- import { formatBuiltinTodoSnapshot, isLowSignalTodoContent, summarizeBuiltinWriteTodosArgs, truncateLines } from "../runtime-adapter-support.js";
5
+ import { formatBuiltinTodoSnapshot, summarizeBuiltinWriteTodosArgs, truncateLines } from "../runtime-adapter-support.js";
6
6
  import { maybePersistLargeToolOutput, resolveToolRuntimeContext } from "./tool-output-artifacts.js";
7
7
  function buildTaskToolDescription(subagents) {
8
8
  const lines = [
@@ -272,14 +272,6 @@ export async function createBuiltinMiddlewareTools(backend, options) {
272
272
  invoke: async (input) => {
273
273
  const args = isRecord(input) ? input : {};
274
274
  const summary = summarizeBuiltinWriteTodosArgs(args);
275
- if (summary.summary.total === 0 && todoSnapshot.summary.total === 0) {
276
- throw new Error("Initial write_todos call cannot use an empty todo list. Send the concrete task steps with both content and status.");
277
- }
278
- if (summary.summary.total > 0 &&
279
- todoSnapshot.summary.total === 0 &&
280
- summary.items.every((item) => isLowSignalTodoContent(item.content))) {
281
- throw new Error("Initial write_todos call must use descriptive task content. Do not use placeholder entries like '1', '2', or 'step 1'.");
282
- }
283
275
  todoSnapshot = summary;
284
276
  return {
285
277
  ok: true,
@@ -59,7 +59,8 @@ export function isToolCallValidationFailure(error) {
59
59
  if (/Invalid input:\s*expected .* received undefined/i.test(message) && /"path"\s*:\s*\[/.test(message)) {
60
60
  return true;
61
61
  }
62
- return /Received tool input did not match expected schema/i.test(message) && /(?:→\s*at|at)\s+[\w[\].]+/i.test(message);
62
+ return /Received tool input did not match expected schema/i.test(message)
63
+ && (/(?:→\s*at|at)\s+[\w[\].]+/i.test(message) || /\bDetails:/i.test(message));
63
64
  }
64
65
  function collectErrorMessages(error) {
65
66
  if (!error)
@@ -574,11 +574,30 @@ function normalizeWriteTodosArgs(args) {
574
574
  const normalized = {};
575
575
  if (content !== undefined)
576
576
  normalized.content = content;
577
- normalized.status = typeof record.status === "string" && record.status.trim().length > 0 ? record.status : "pending";
577
+ normalized.status = normalizeWriteTodoStatus(record.status);
578
578
  return Object.keys(normalized).length > 0 ? normalized : todo;
579
579
  }),
580
580
  };
581
581
  }
582
+ function normalizeWriteTodoStatus(value) {
583
+ if (typeof value !== "string") {
584
+ return "pending";
585
+ }
586
+ const normalized = value.trim().toLowerCase().replace(/[\s-]+/gu, "_");
587
+ if (!normalized) {
588
+ return "pending";
589
+ }
590
+ if (normalized === "not_started" || normalized === "todo" || normalized === "open") {
591
+ return "pending";
592
+ }
593
+ if (normalized === "started" || normalized === "active" || normalized === "in_progress") {
594
+ return "in_progress";
595
+ }
596
+ if (normalized === "done" || normalized === "complete" || normalized === "completed") {
597
+ return "completed";
598
+ }
599
+ return normalized;
600
+ }
582
601
  function normalizeTaskArgs(args) {
583
602
  const description = typeof args.description === "string" && args.description.trim().length > 0
584
603
  ? args.description
@@ -1,4 +1,3 @@
1
- import { isLowSignalTodoContent } from "../adapter/runtime-adapter-support.js";
2
1
  import { extractReasoningText, extractVisibleOutput, hasToolCalls, readTextContent } from "./output-parsing.js";
3
2
  const MAX_STREAM_INLINE_TEXT_CHARS = 12_000;
4
3
  const STREAM_PREVIEW_TEXT_CHARS = 2_000;
@@ -205,31 +204,6 @@ function isErrorLikeToolOutput(value) {
205
204
  || /^command failed:/i.test(firstNonEmptyLine)
206
205
  || /^stderr:/i.test(firstNonEmptyLine);
207
206
  }
208
- function isEmptyInitialWriteTodosResult(value) {
209
- if (typeof value !== "object" || !value || Array.isArray(value)) {
210
- return false;
211
- }
212
- const typed = value;
213
- if (typeof typed.summary === "object" && typed.summary && Array.isArray(typed.summary.items)) {
214
- return (typed.summary.items).length === 0;
215
- }
216
- if (typeof typed.update === "object" && typed.update && Array.isArray(typed.update.todos)) {
217
- return (typed.update.todos).length === 0;
218
- }
219
- return false;
220
- }
221
- function hasLowSignalInitialWriteTodos(value) {
222
- if (typeof value !== "object" || !value || Array.isArray(value)) {
223
- return false;
224
- }
225
- const typed = value;
226
- const items = typeof typed.summary === "object" && typed.summary && Array.isArray(typed.summary.items)
227
- ? (typed.summary.items ?? [])
228
- : typeof typed.update === "object" && typed.update && Array.isArray(typed.update.todos)
229
- ? (typed.update.todos ?? [])
230
- : [];
231
- return items.length > 0 && items.every((item) => typeof item?.content === "string" && isLowSignalTodoContent(item.content));
232
- }
233
207
  function isToolMessageLike(value) {
234
208
  if (typeof value !== "object" || value === null) {
235
209
  return false;
@@ -326,12 +300,6 @@ export function extractToolResult(event) {
326
300
  const normalizedOutput = typeof rawOutput === "string"
327
301
  ? parseMaybeJson(rawOutput)
328
302
  : unwrapToolMessageOutput(rawOutput);
329
- if (!isToolError && toolName === "write_todos" && isEmptyInitialWriteTodosResult(normalizedOutput)) {
330
- throw new Error("Error invoking tool 'write_todos' with kwargs {\"todos\":[]} with error: Error: Initial write_todos call cannot use an empty todo list. Send the concrete task steps with both content and status.");
331
- }
332
- if (!isToolError && toolName === "write_todos" && hasLowSignalInitialWriteTodos(normalizedOutput)) {
333
- throw new Error("Error invoking tool 'write_todos' with placeholder todo content with error: Error: Initial write_todos call must use descriptive task content. Do not use placeholder entries like '1', '2', or 'step 1'.");
334
- }
335
303
  return {
336
304
  toolName,
337
305
  output: sanitizeStreamPayload(normalizedOutput),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botbotgo/agent-harness",
3
- "version": "0.0.463",
3
+ "version": "0.0.464",
4
4
  "description": "Workspace runtime for multi-agent applications",
5
5
  "license": "MIT",
6
6
  "type": "module",