@botbotgo/agent-harness 0.0.463 → 0.0.465
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/resources/prompts/runtime/write-todos-full-entry.md +1 -1
- package/dist/runtime/adapter/flow/stream-runtime.js +101 -6
- package/dist/runtime/adapter/invocation-result.js +15 -2
- package/dist/runtime/adapter/local-tool-invocation.js +49 -1
- package/dist/runtime/adapter/runtime-adapter-support.d.ts +0 -1
- package/dist/runtime/adapter/runtime-adapter-support.js +10 -7
- package/dist/runtime/adapter/stream-event-projection.d.ts +1 -0
- package/dist/runtime/adapter/stream-event-projection.js +75 -16
- package/dist/runtime/adapter/tool/builtin-middleware-tools.js +1 -9
- package/dist/runtime/adapter/tool/tool-arguments.js +145 -10
- package/dist/runtime/agent-runtime-adapter.d.ts +12 -0
- package/dist/runtime/agent-runtime-adapter.js +217 -29
- package/dist/runtime/parsing/output-recovery.js +2 -1
- package/dist/runtime/parsing/output-tool-args.js +20 -1
- package/dist/runtime/parsing/stream-event-parsing.js +0 -32
- package/package.json +1 -1
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export declare const AGENT_HARNESS_VERSION = "0.0.465";
|
|
2
2
|
export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
|
package/dist/package-version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export const AGENT_HARNESS_VERSION = "0.0.465";
|
|
2
2
|
export const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
When calling write_todos, every todo item must include both content and status. Do not send status-only updates. Retry by resending the full todo entry with the original content preserved.
|
|
1
|
+
When calling write_todos, every todo item must include both content and status. Use only these status values: pending, in_progress, completed. Do not send aliases such as not_started, open, active, done, blocked, failed, or cancelled. Do not send status-only updates. Retry by resending the full todo entry with the original content preserved.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, appendToolRecoveryInstruction, resolveMissingPlanRecoveryInstruction, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
|
|
2
|
-
import { DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION, DELEGATION_ONLY_RECOVERY_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
|
|
2
|
+
import { DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION, DELEGATION_ONLY_RECOVERY_INSTRUCTION, REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
|
|
3
3
|
import { buildInvocationRequest } from "../model/invocation-request.js";
|
|
4
4
|
import { PROMPTED_JSON_TOOL_POLICY_KEY, withPromptedJsonToolPolicy } from "../model/prompted-json-tool-policy.js";
|
|
5
5
|
import { buildRawModelMessages } from "../model/message-assembly.js";
|
|
@@ -137,6 +137,26 @@ function hasIncompletePlanOutput(value) {
|
|
|
137
137
|
}
|
|
138
138
|
return null;
|
|
139
139
|
}
|
|
140
|
+
function hasFailedTodos(value) {
|
|
141
|
+
if (Array.isArray(value)) {
|
|
142
|
+
return value.some((todo) => hasFailedTodos(todo));
|
|
143
|
+
}
|
|
144
|
+
if (typeof value !== "object" || value === null) {
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
147
|
+
const typed = value;
|
|
148
|
+
if (typeof typed.status === "string" && typed.status.trim().toLowerCase() === "failed") {
|
|
149
|
+
return true;
|
|
150
|
+
}
|
|
151
|
+
return hasFailedTodos(typed.todos)
|
|
152
|
+
|| hasFailedTodos(typed.update)
|
|
153
|
+
|| hasFailedTodos(typed.data)
|
|
154
|
+
|| hasFailedTodos(typed.output)
|
|
155
|
+
|| hasFailedTodos(typed.summary);
|
|
156
|
+
}
|
|
157
|
+
function hasFailedPlanStateInExecutedToolResults(executedToolResults) {
|
|
158
|
+
return executedToolResults.some((item) => isPlanToolName(item.toolName) && hasFailedTodos(item.output));
|
|
159
|
+
}
|
|
140
160
|
function normalizePlanToolName(toolName) {
|
|
141
161
|
return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
|
|
142
162
|
}
|
|
@@ -199,6 +219,9 @@ function buildExecutionRecoveryEvidence(params) {
|
|
|
199
219
|
hasNonTodoToolResultEvidence: hasNonTodoToolEvidence(executedToolResults) || projectionState.emittedNonTodoToolResult || projectionState.emittedToolError,
|
|
200
220
|
hasSuccessfulNonTodoToolResultEvidence: hasSuccessfulNonTodoToolEvidence(executedToolResults) || projectionState.emittedSuccessfulNonTodoToolResult,
|
|
201
221
|
hasIncompletePlanState: projectionState.hasIncompletePlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
|
|
222
|
+
hasFailedPlanState: (projectionState.hasFailedPlanState || hasFailedPlanStateInExecutedToolResults(executedToolResults))
|
|
223
|
+
&& !projectionState.emittedSuccessfulNonTodoToolResult
|
|
224
|
+
&& !hasSuccessfulNonTodoToolEvidence(executedToolResults),
|
|
202
225
|
hasPlanStateEvidence: projectionState.sawPlanState || hasIncompletePlanStateInExecutedToolResults(executedToolResults),
|
|
203
226
|
hasOpenTaskDelegation: projectionState.openTaskDelegations > 0,
|
|
204
227
|
hasFailedTaskDelegation: projectionState.hasFailedTaskDelegation
|
|
@@ -213,6 +236,7 @@ function buildExecutionRecoveryEvidence(params) {
|
|
|
213
236
|
}
|
|
214
237
|
function hasUnresolvedExecution(evidence) {
|
|
215
238
|
return (evidence.hasIncompletePlanState
|
|
239
|
+
|| evidence.hasFailedPlanState
|
|
216
240
|
|| evidence.hasFailedTaskDelegation
|
|
217
241
|
|| evidence.hasOpenTaskDelegation);
|
|
218
242
|
}
|
|
@@ -257,6 +281,14 @@ function buildDeterministicFinalFromStreamToolEvidence(executedToolResults) {
|
|
|
257
281
|
evidence.length > 0 ? evidence.join("\n\n") : "(no non-planning tool evidence captured)",
|
|
258
282
|
].join("\n");
|
|
259
283
|
}
|
|
284
|
+
function latestStreamToolErrorRecoveryInstruction(executedToolResults) {
|
|
285
|
+
const latest = [...executedToolResults].reverse().find((item) => item.isError === true);
|
|
286
|
+
if (!latest) {
|
|
287
|
+
return null;
|
|
288
|
+
}
|
|
289
|
+
const message = typeof latest.output === "string" ? latest.output : JSON.stringify(latest.output);
|
|
290
|
+
return resolveToolCallRecoveryInstruction(new Error(message)) ?? REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION;
|
|
291
|
+
}
|
|
260
292
|
function hasUsefulVisibleSynthesis(value) {
|
|
261
293
|
const trimmed = value.trim();
|
|
262
294
|
if (trimmed.length < 80) {
|
|
@@ -381,6 +413,9 @@ function createUnresolvedExecutionError(evidence) {
|
|
|
381
413
|
if (evidence.hasIncompletePlanState) {
|
|
382
414
|
reasons.push("plan state still has unfinished work");
|
|
383
415
|
}
|
|
416
|
+
if (evidence.hasFailedPlanState) {
|
|
417
|
+
reasons.push("plan state failed before non-TODO evidence returned");
|
|
418
|
+
}
|
|
384
419
|
if (evidence.hasFailedTaskDelegation) {
|
|
385
420
|
reasons.push("delegated task failed before surfacing final findings");
|
|
386
421
|
}
|
|
@@ -463,7 +498,8 @@ function projectLocalToolExecutionProfileChunks(executedToolResults, prefix) {
|
|
|
463
498
|
}
|
|
464
499
|
export async function* streamRuntimeExecution(options) {
|
|
465
500
|
let request = buildInvocationRequest(options.binding, options.history, options.input, options.runtimeOptions);
|
|
466
|
-
if (requiresPlanEvidence(options.binding)
|
|
501
|
+
if (requiresPlanEvidence(options.binding)
|
|
502
|
+
&& options.runtimeOptions.suppressInitialRequiredPlanInstruction !== true) {
|
|
467
503
|
request = withPromptedJsonToolPolicy(appendToolRecoveryInstruction(request, buildInitialRequiredPlanInstruction(options.primaryTools)), "planning");
|
|
468
504
|
}
|
|
469
505
|
let emittedUnsafeStreamSideEffects = false;
|
|
@@ -472,6 +508,14 @@ export async function* streamRuntimeExecution(options) {
|
|
|
472
508
|
const deferredStreamContent = [];
|
|
473
509
|
let sawRetrySafeInvalidToolSelectionError = false;
|
|
474
510
|
const projectionState = createStreamEventProjectionState();
|
|
511
|
+
if (options.runtimeOptions.externalPlanEvidence === true) {
|
|
512
|
+
projectionState.sawPlanState = true;
|
|
513
|
+
yield {
|
|
514
|
+
kind: "commentary",
|
|
515
|
+
content: `${options.binding.agent.id}: TODO evidence observed.`,
|
|
516
|
+
agentId: options.binding.agent.id,
|
|
517
|
+
};
|
|
518
|
+
}
|
|
475
519
|
const requestId = options.runtimeOptions.requestId ?? options.sessionId;
|
|
476
520
|
const buildRunnableConfig = (extra) => ({
|
|
477
521
|
...(options.resolveInvocationConfig
|
|
@@ -515,6 +559,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
515
559
|
try {
|
|
516
560
|
const rawStreamInput = buildRawModelMessages(options.binding, options.getSystemPrompt(options.binding), options.history, options.input, options.runtimeOptions.memoryContext);
|
|
517
561
|
const streamInput = requiresPlanEvidence(options.binding)
|
|
562
|
+
&& options.runtimeOptions.suppressInitialRequiredPlanInstruction !== true
|
|
518
563
|
? withPromptedJsonToolPolicy(rawStreamInput, "planning")
|
|
519
564
|
: rawStreamInput;
|
|
520
565
|
stream = await options.withTimeout(() => options.langChainStreamModel.stream(streamInput), computeRemainingTimeoutMs(options.streamDeadlineAt, options.invokeTimeoutMs), "model stream start", "stream");
|
|
@@ -699,6 +744,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
699
744
|
const eventContainsNonRetrySafeChunk = projectedChunks.some((chunk) => chunk.kind !== "upstream-event"
|
|
700
745
|
&& chunk.kind !== "content"
|
|
701
746
|
&& !(chunk.kind === "tool-result" && isPlanToolName(chunk.toolName))
|
|
747
|
+
&& !(chunk.kind === "tool-result" && chunk.isError === true)
|
|
702
748
|
&& !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
|
|
703
749
|
const hadPriorPlanToolResult = completedPlanToolResultCount > 0;
|
|
704
750
|
const repeatedPlanToolResultBeforeEvidence = requiresPlanEvidence(options.binding)
|
|
@@ -796,10 +842,23 @@ export async function* streamRuntimeExecution(options) {
|
|
|
796
842
|
})
|
|
797
843
|
: null;
|
|
798
844
|
const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
|
|
845
|
+
const terminalPrematurePlanCloseRecoveryInstruction = requiresPlanEvidence(options.binding)
|
|
846
|
+
&& terminalExecutionEvidence.hasPlanStateEvidence
|
|
847
|
+
&& !terminalExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
|
|
848
|
+
? buildRunEvidenceAfterPlanInstruction(options.primaryTools)
|
|
849
|
+
: null;
|
|
799
850
|
if (!emittedUnsafeStreamSideEffects
|
|
800
|
-
&& (terminalMissingPlanRecoveryInstruction
|
|
851
|
+
&& (terminalMissingPlanRecoveryInstruction
|
|
852
|
+
|| terminalPrematurePlanCloseRecoveryInstruction
|
|
853
|
+
|| terminalDelegationOnlyRecoveryInstruction)) {
|
|
801
854
|
earlyStreamRecoveryInstruction =
|
|
802
|
-
terminalMissingPlanRecoveryInstruction
|
|
855
|
+
terminalMissingPlanRecoveryInstruction
|
|
856
|
+
?? terminalPrematurePlanCloseRecoveryInstruction
|
|
857
|
+
?? terminalDelegationOnlyRecoveryInstruction;
|
|
858
|
+
earlyStreamRecoverySuppressInitialPlan = terminalPrematurePlanCloseRecoveryInstruction !== null;
|
|
859
|
+
if (terminalPrematurePlanCloseRecoveryInstruction) {
|
|
860
|
+
earlyStreamExternalPlanEvidenceTools = resolveSingleConfiguredPlanEvidenceTool(options.primaryTools);
|
|
861
|
+
}
|
|
803
862
|
break;
|
|
804
863
|
}
|
|
805
864
|
if (requiresPlanEvidence(options.binding) && hasCompletedPlanWithEvidence(terminalExecutionEvidence)) {
|
|
@@ -820,6 +879,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
820
879
|
&& !hasMissingDelegatedExecutionEvidence(terminalExecutionEvidence)
|
|
821
880
|
&& !hasMissingDelegatedFindings(terminalExecutionEvidence)
|
|
822
881
|
&& !terminalMissingPlanRecoveryInstruction
|
|
882
|
+
&& !terminalPrematurePlanCloseRecoveryInstruction
|
|
823
883
|
&& !terminalDelegationOnlyRecoveryInstruction) {
|
|
824
884
|
if (deferredStreamContent.length > 0) {
|
|
825
885
|
yield* flushDeferredStreamContent();
|
|
@@ -842,6 +902,35 @@ export async function* streamRuntimeExecution(options) {
|
|
|
842
902
|
const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
|
|
843
903
|
? recovered.metadata.executedToolResults
|
|
844
904
|
: [];
|
|
905
|
+
const originalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
|
|
906
|
+
const recoveredExecutionEvidence = buildExecutionRecoveryEvidence({
|
|
907
|
+
projectionState: createStreamEventProjectionState(),
|
|
908
|
+
executedToolResults: recoveredToolResults,
|
|
909
|
+
});
|
|
910
|
+
const recoveredCarriesExecutionEvidence = recoveredExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
|
|
911
|
+
|| recoveredExecutionEvidence.hasOpenTaskDelegation
|
|
912
|
+
|| recoveredExecutionEvidence.hasDelegatedExecutionToolEvidence;
|
|
913
|
+
const recoveredHasUnresolvedExecution = hasUnresolvedExecution(recoveredExecutionEvidence)
|
|
914
|
+
|| hasMissingDelegatedExecutionEvidence(recoveredExecutionEvidence)
|
|
915
|
+
|| hasMissingDelegatedFindings(recoveredExecutionEvidence)
|
|
916
|
+
|| (!recoveredCarriesExecutionEvidence
|
|
917
|
+
&& (hasUnresolvedExecution(originalExecutionEvidence)
|
|
918
|
+
|| hasMissingDelegatedExecutionEvidence(originalExecutionEvidence)
|
|
919
|
+
|| hasMissingDelegatedFindings(originalExecutionEvidence)));
|
|
920
|
+
if (recoveredHasUnresolvedExecution) {
|
|
921
|
+
const effectiveRecoveryEvidence = recoveredCarriesExecutionEvidence
|
|
922
|
+
? recoveredExecutionEvidence
|
|
923
|
+
: {
|
|
924
|
+
...recoveredExecutionEvidence,
|
|
925
|
+
hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
|
|
926
|
+
hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
|
|
927
|
+
hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
|
|
928
|
+
hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
|
|
929
|
+
hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
|
|
930
|
+
hasDelegatedExecutionToolEvidence: originalExecutionEvidence.hasDelegatedExecutionToolEvidence,
|
|
931
|
+
};
|
|
932
|
+
throw createUnresolvedExecutionError(effectiveRecoveryEvidence);
|
|
933
|
+
}
|
|
845
934
|
for (const toolResult of recoveredToolResults) {
|
|
846
935
|
yield {
|
|
847
936
|
kind: "tool-result",
|
|
@@ -931,6 +1020,9 @@ export async function* streamRuntimeExecution(options) {
|
|
|
931
1020
|
const streamedRuntimeFailureRecoveryInstruction = projectionState.emittedOutput
|
|
932
1021
|
? resolveStreamedRuntimeFailureRecoveryInstruction(projectionState.emittedOutput, streamedExecutionEvidence)
|
|
933
1022
|
: null;
|
|
1023
|
+
const streamedToolErrorRecoveryInstruction = !emittedUnsafeStreamSideEffects
|
|
1024
|
+
? latestStreamToolErrorRecoveryInstruction(streamedToolResults)
|
|
1025
|
+
: null;
|
|
934
1026
|
const missingPlanRecoveryInstruction = !hasUnresolvedExecution(streamedExecutionEvidence) && !delegatedExecutionRecoveryInstruction
|
|
935
1027
|
? resolveMissingPlanRecoveryInstruction({
|
|
936
1028
|
request,
|
|
@@ -943,7 +1035,8 @@ export async function* streamRuntimeExecution(options) {
|
|
|
943
1035
|
: null;
|
|
944
1036
|
const retryInstruction = !emittedUnsafeStreamSideEffects && sawRetrySafeInvalidToolSelectionError
|
|
945
1037
|
? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION
|
|
946
|
-
:
|
|
1038
|
+
: streamedToolErrorRecoveryInstruction
|
|
1039
|
+
?? delegatedExecutionRecoveryInstruction
|
|
947
1040
|
?? streamedIncompletePlanRecoveryInstruction
|
|
948
1041
|
?? streamedPrematurePlanCloseRecoveryInstruction
|
|
949
1042
|
?? streamedRuntimeFailureRecoveryInstruction
|
|
@@ -970,7 +1063,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
970
1063
|
}
|
|
971
1064
|
const retriedVisibleOutput = retried.output ? toVisibleContent(retried.output) : "";
|
|
972
1065
|
const retriedIsDelegationFailureFinalReport = isDelegationFailureFinalReport(originalExecutionEvidence, executedToolResults, retriedVisibleOutput);
|
|
973
|
-
const retriedCarriesExecutionEvidence = retriedExecutionEvidence.
|
|
1066
|
+
const retriedCarriesExecutionEvidence = retriedExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
|
|
974
1067
|
|| retriedExecutionEvidence.hasOpenTaskDelegation
|
|
975
1068
|
|| retriedExecutionEvidence.hasDelegatedExecutionToolEvidence;
|
|
976
1069
|
const retriedHasUnresolvedExecution = !retriedIsDelegationFailureFinalReport
|
|
@@ -986,6 +1079,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
986
1079
|
: {
|
|
987
1080
|
...retriedExecutionEvidence,
|
|
988
1081
|
hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
|
|
1082
|
+
hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
|
|
989
1083
|
hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
|
|
990
1084
|
hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
|
|
991
1085
|
hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
|
|
@@ -1227,6 +1321,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
1227
1321
|
: {
|
|
1228
1322
|
...recoveredExecutionEvidence,
|
|
1229
1323
|
hasIncompletePlanState: originalExecutionEvidence.hasIncompletePlanState,
|
|
1324
|
+
hasFailedPlanState: originalExecutionEvidence.hasFailedPlanState,
|
|
1230
1325
|
hasFailedTaskDelegation: originalExecutionEvidence.hasFailedTaskDelegation,
|
|
1231
1326
|
hasOpenTaskDelegation: originalExecutionEvidence.hasOpenTaskDelegation,
|
|
1232
1327
|
hasDelegatedAgentWithConfiguredTools: originalExecutionEvidence.hasDelegatedAgentWithConfiguredTools,
|
|
@@ -39,7 +39,8 @@ function hasIncompleteStateSnapshotPlan(stateSnapshot) {
|
|
|
39
39
|
function hasStateSnapshotPlan(stateSnapshot) {
|
|
40
40
|
return typeof stateSnapshot === "object"
|
|
41
41
|
&& stateSnapshot !== null
|
|
42
|
-
&& Array.isArray(stateSnapshot.todos)
|
|
42
|
+
&& Array.isArray(stateSnapshot.todos)
|
|
43
|
+
&& (stateSnapshot.todos).length > 0;
|
|
43
44
|
}
|
|
44
45
|
function normalizePlanToolName(toolName) {
|
|
45
46
|
return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
|
|
@@ -54,7 +55,19 @@ function isPlanToolName(toolName) {
|
|
|
54
55
|
|| normalized === "call_read_todos";
|
|
55
56
|
}
|
|
56
57
|
function hasPlanToolEvidence(executedToolResults) {
|
|
57
|
-
return executedToolResults.some((item) =>
|
|
58
|
+
return executedToolResults.some((item) => {
|
|
59
|
+
if (!isPlanToolName(item.toolName)) {
|
|
60
|
+
return false;
|
|
61
|
+
}
|
|
62
|
+
const output = typeof item.output === "object" && item.output !== null ? item.output : null;
|
|
63
|
+
const summaryContainer = typeof output?.summary === "object" && output.summary !== null
|
|
64
|
+
? output.summary
|
|
65
|
+
: null;
|
|
66
|
+
const counts = typeof summaryContainer?.summary === "object" && summaryContainer.summary !== null
|
|
67
|
+
? summaryContainer.summary
|
|
68
|
+
: null;
|
|
69
|
+
return !(typeof counts?.total === "number" && counts.total <= 0);
|
|
70
|
+
});
|
|
58
71
|
}
|
|
59
72
|
function hasExecutionToolEvidence(executedToolResults) {
|
|
60
73
|
return executedToolResults.some((item) => item.isError !== true && !isPlanToolName(item.toolName));
|
|
@@ -13,8 +13,10 @@ const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already availab
|
|
|
13
13
|
const DEFAULT_MAX_TOOL_ITERATIONS = 10_000;
|
|
14
14
|
const MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS = 2;
|
|
15
15
|
const MAX_REPEATED_PLAN_ONLY_AFTER_PLAN = 2;
|
|
16
|
+
const MAX_REPEATED_INVALID_EXTERNAL_PLAN_EVIDENCE_SELECTION = 2;
|
|
16
17
|
const REQUIRED_PLAN_CONTRACT_MARKER = "This agent has a required visible planning contract.";
|
|
17
18
|
const INITIAL_WRITE_TODOS_MARKER = "Your first action for this request must be write_todos";
|
|
19
|
+
const REQUIRED_SINGLE_EXTERNAL_PLAN_EVIDENCE_TOOL_INSTRUCTION = "The TODO board already exists. The next action must be exactly one non-planning evidence tool call. Do not call write_todos or read_todos, and do not call multiple tools in the same response.";
|
|
18
20
|
function resolveSingleBootstrapEvidenceTool(primaryTools) {
|
|
19
21
|
const evidenceTools = primaryTools
|
|
20
22
|
.map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
|
|
@@ -108,7 +110,11 @@ function readPlanStateSummary(output) {
|
|
|
108
110
|
return null;
|
|
109
111
|
}
|
|
110
112
|
const typedCounts = counts;
|
|
113
|
+
if (typeof typedCounts.total === "number" && typedCounts.total <= 0) {
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
111
116
|
return {
|
|
117
|
+
...(typeof typedCounts.total === "number" ? { total: typedCounts.total } : {}),
|
|
112
118
|
pending: typeof typedCounts.pending === "number" ? typedCounts.pending : 0,
|
|
113
119
|
inProgress: typeof typedCounts.inProgress === "number" ? typedCounts.inProgress : 0,
|
|
114
120
|
};
|
|
@@ -138,6 +144,10 @@ function isPlanToolName(toolName) {
|
|
|
138
144
|
|| normalized === "call_write_todos"
|
|
139
145
|
|| normalized === "call_read_todos";
|
|
140
146
|
}
|
|
147
|
+
function isPlanToolCall(toolCall, toolNameMapping, primaryTools) {
|
|
148
|
+
const resolvedToolName = resolveModelFacingToolName(String(toolCall.name ?? ""), toolNameMapping, primaryTools);
|
|
149
|
+
return isPlanToolName(toolCall.name) || isPlanToolName(resolvedToolName);
|
|
150
|
+
}
|
|
141
151
|
function isFallbackTodoCompletionToolCall(toolCall) {
|
|
142
152
|
return typeof toolCall.id === "string"
|
|
143
153
|
&& toolCall.id.startsWith("fallback-complete-")
|
|
@@ -218,7 +228,11 @@ function extractLatestUserInput(request) {
|
|
|
218
228
|
const messages = Array.isArray(typedRequest.messages) ? typedRequest.messages : [];
|
|
219
229
|
for (let index = messages.length - 1; index >= 0; index -= 1) {
|
|
220
230
|
const candidate = messages[index];
|
|
221
|
-
|
|
231
|
+
const role = typeof candidate?.role === "string" ? candidate.role.trim().toLowerCase() : "";
|
|
232
|
+
const messageType = typeof candidate?._getType === "function" ? String(candidate._getType()).trim().toLowerCase() : "";
|
|
233
|
+
const constructorType = Array.isArray(candidate?.id) ? String(candidate.id.at(-1)).trim().toLowerCase() : "";
|
|
234
|
+
const isUserMessage = role === "user" || role === "human" || messageType === "human" || constructorType === "humanmessage";
|
|
235
|
+
if (!isUserMessage || typeof candidate?.content !== "string") {
|
|
222
236
|
continue;
|
|
223
237
|
}
|
|
224
238
|
const normalized = candidate.content.trim();
|
|
@@ -280,6 +294,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
280
294
|
let lastRecoveryExecutedCount = -1;
|
|
281
295
|
let repeatedRecoveryWithoutProgress = 0;
|
|
282
296
|
let repeatedPlanOnlyAfterPlan = 0;
|
|
297
|
+
let repeatedInvalidExternalPlanEvidenceSelection = 0;
|
|
283
298
|
let pendingResult;
|
|
284
299
|
let result;
|
|
285
300
|
const toolCatalog = new Map();
|
|
@@ -418,6 +433,26 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
418
433
|
activeRequest = appendToolRecoveryInstruction(activeRequest, missingPlanRecoveryInstruction);
|
|
419
434
|
continue;
|
|
420
435
|
}
|
|
436
|
+
if (requiresPlanEvidence(binding)
|
|
437
|
+
&& externalPlanEvidence === true
|
|
438
|
+
&& hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
|
|
439
|
+
&& !hasNonTodoToolEvidence(executedToolResults)
|
|
440
|
+
&& toolCalls.length > 0
|
|
441
|
+
&& (toolCalls.length !== 1 || isPlanToolCall(toolCalls[0], toolNameMapping, primaryTools))) {
|
|
442
|
+
repeatedInvalidExternalPlanEvidenceSelection += 1;
|
|
443
|
+
if (repeatedInvalidExternalPlanEvidenceSelection >= MAX_REPEATED_INVALID_EXTERNAL_PLAN_EVIDENCE_SELECTION) {
|
|
444
|
+
throw createToolLoopError({
|
|
445
|
+
reason: "model did not select exactly one non-planning evidence tool during delegated plan evidence recovery",
|
|
446
|
+
iteration,
|
|
447
|
+
maxToolIterations,
|
|
448
|
+
toolCalls,
|
|
449
|
+
executedToolResults,
|
|
450
|
+
});
|
|
451
|
+
}
|
|
452
|
+
activeRequest = appendToolRecoveryInstruction(activeRequest, REQUIRED_SINGLE_EXTERNAL_PLAN_EVIDENCE_TOOL_INSTRUCTION);
|
|
453
|
+
pendingResult = undefined;
|
|
454
|
+
continue;
|
|
455
|
+
}
|
|
421
456
|
if (requiresPlanEvidence(binding)
|
|
422
457
|
&& hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
|
|
423
458
|
&& !hasNonTodoToolEvidence(executedToolResults)
|
|
@@ -425,6 +460,12 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
425
460
|
&& toolCalls.every((toolCall) => isPlanToolName(toolCall.name))) {
|
|
426
461
|
repeatedPlanOnlyAfterPlan += 1;
|
|
427
462
|
if (repeatedPlanOnlyAfterPlan >= MAX_REPEATED_PLAN_ONLY_AFTER_PLAN) {
|
|
463
|
+
if (hasNonTodoToolEvidence(executedToolResults)) {
|
|
464
|
+
return {
|
|
465
|
+
result: buildDeterministicFinalFromToolEvidence(executedToolResults),
|
|
466
|
+
executedToolResults,
|
|
467
|
+
};
|
|
468
|
+
}
|
|
428
469
|
throw createToolLoopError({
|
|
429
470
|
reason: "model repeatedly selected only planning tools after the todo board already existed and before any non-planning evidence tool returned",
|
|
430
471
|
iteration,
|
|
@@ -434,6 +475,12 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
434
475
|
});
|
|
435
476
|
}
|
|
436
477
|
if (iteration + 1 === maxToolIterations) {
|
|
478
|
+
if (hasNonTodoToolEvidence(executedToolResults)) {
|
|
479
|
+
return {
|
|
480
|
+
result: buildDeterministicFinalFromToolEvidence(executedToolResults),
|
|
481
|
+
executedToolResults,
|
|
482
|
+
};
|
|
483
|
+
}
|
|
437
484
|
throw createToolLoopError({
|
|
438
485
|
reason: "maximum iterations reached",
|
|
439
486
|
iteration,
|
|
@@ -448,6 +495,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
448
495
|
}
|
|
449
496
|
repeatedRecoveryWithoutProgress = 0;
|
|
450
497
|
repeatedPlanOnlyAfterPlan = 0;
|
|
498
|
+
repeatedInvalidExternalPlanEvidenceSelection = 0;
|
|
451
499
|
const canReplayToolCalls = usedExternalPlanEvidenceToolThisIteration
|
|
452
500
|
|| canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
|
|
453
501
|
debugLocalToolReplay({
|
|
@@ -5,7 +5,6 @@ export type BuiltinTodoSnapshot = {
|
|
|
5
5
|
};
|
|
6
6
|
export declare function truncateLines(lines: string[], maxChars?: number): string;
|
|
7
7
|
export declare function summarizeBuiltinWriteTodosArgs(args: Record<string, unknown>): BuiltinTodoSnapshot;
|
|
8
|
-
export declare function isLowSignalTodoContent(content: string): boolean;
|
|
9
8
|
export declare function formatBuiltinTodoSnapshot(snapshot: BuiltinTodoSnapshot): string;
|
|
10
9
|
export declare function buildRequestPlanState(input: {
|
|
11
10
|
sessionId: string;
|
|
@@ -69,6 +69,13 @@ function readTodoContent(todo) {
|
|
|
69
69
|
}
|
|
70
70
|
return "";
|
|
71
71
|
}
|
|
72
|
+
function isLowSignalTodoContent(content) {
|
|
73
|
+
const normalized = content.trim().toLowerCase();
|
|
74
|
+
if (!normalized) {
|
|
75
|
+
return true;
|
|
76
|
+
}
|
|
77
|
+
return /^\d+$/.test(normalized) || /^step\s*\d+$/.test(normalized) || /^todo\s*\d+$/.test(normalized);
|
|
78
|
+
}
|
|
72
79
|
function normalizeTodoStatus(value) {
|
|
73
80
|
if (typeof value !== "string") {
|
|
74
81
|
return "pending";
|
|
@@ -90,6 +97,9 @@ export function summarizeBuiltinWriteTodosArgs(args) {
|
|
|
90
97
|
return [];
|
|
91
98
|
}
|
|
92
99
|
const content = readTodoContent(todo);
|
|
100
|
+
if (isLowSignalTodoContent(content)) {
|
|
101
|
+
return [];
|
|
102
|
+
}
|
|
93
103
|
const status = normalizeTodoStatus(todo.status);
|
|
94
104
|
const metadata = isRecord(todo.metadata) ? todo.metadata : undefined;
|
|
95
105
|
return content ? [{
|
|
@@ -119,13 +129,6 @@ export function summarizeBuiltinWriteTodosArgs(args) {
|
|
|
119
129
|
summary,
|
|
120
130
|
};
|
|
121
131
|
}
|
|
122
|
-
export function isLowSignalTodoContent(content) {
|
|
123
|
-
const normalized = content.trim().toLowerCase();
|
|
124
|
-
if (!normalized) {
|
|
125
|
-
return true;
|
|
126
|
-
}
|
|
127
|
-
return /^\d+$/.test(normalized) || /^step\s*\d+$/.test(normalized) || /^todo\s*\d+$/.test(normalized);
|
|
128
|
-
}
|
|
129
132
|
export function formatBuiltinTodoSnapshot(snapshot) {
|
|
130
133
|
if (snapshot.summary.total === 0) {
|
|
131
134
|
return "No todos tracked.";
|
|
@@ -17,6 +17,7 @@ export type StreamEventProjectionState = {
|
|
|
17
17
|
emittedDelegatedTerminalOutput: boolean;
|
|
18
18
|
sawPlanState: boolean;
|
|
19
19
|
hasIncompletePlanState: boolean;
|
|
20
|
+
hasFailedPlanState: boolean;
|
|
20
21
|
openTaskDelegations: number;
|
|
21
22
|
openToolCapableTaskDelegations: number;
|
|
22
23
|
taskDelegationHasToolsStack: boolean[];
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { extractToolFallbackContext, extractVisibleOutput, readTextContent, sanitizeVisibleText } from "../parsing/output-parsing.js";
|
|
2
2
|
import { salvageFunctionLikeToolCall } from "../parsing/output-tool-args.js";
|
|
3
|
-
import { isLowSignalTodoContent, summarizeBuiltinWriteTodosArgs } from "./runtime-adapter-support.js";
|
|
4
3
|
import { computeIncrementalOutput, extractInterruptPayload, extractReasoningStreamOutput, sanitizeRetainedUpstreamEvent, extractStateStreamOutput, extractTerminalStreamOutput, extractToolResult, extractVisibleStreamOutput, normalizeTerminalOutputKey, } from "../parsing/stream-event-parsing.js";
|
|
5
4
|
import { resolveModelFacingToolName } from "./tool/tool-name-mapping.js";
|
|
6
5
|
export function createStreamEventProjectionState() {
|
|
@@ -20,6 +19,7 @@ export function createStreamEventProjectionState() {
|
|
|
20
19
|
emittedDelegatedTerminalOutput: false,
|
|
21
20
|
sawPlanState: false,
|
|
22
21
|
hasIncompletePlanState: false,
|
|
22
|
+
hasFailedPlanState: false,
|
|
23
23
|
openTaskDelegations: 0,
|
|
24
24
|
openToolCapableTaskDelegations: 0,
|
|
25
25
|
taskDelegationHasToolsStack: [],
|
|
@@ -141,6 +141,9 @@ function readSummaryCounts(summary) {
|
|
|
141
141
|
return null;
|
|
142
142
|
}
|
|
143
143
|
const typed = summary;
|
|
144
|
+
if (typeof typed.total === "number" && typed.total <= 0) {
|
|
145
|
+
return null;
|
|
146
|
+
}
|
|
144
147
|
const hasAnyCountField = typeof typed.pending === "number"
|
|
145
148
|
|| typeof typed.inProgress === "number";
|
|
146
149
|
if (!hasAnyCountField) {
|
|
@@ -151,10 +154,23 @@ function readSummaryCounts(summary) {
|
|
|
151
154
|
inProgress: typeof typed.inProgress === "number" ? typed.inProgress : 0,
|
|
152
155
|
};
|
|
153
156
|
}
|
|
157
|
+
function readSummaryFailureCount(summary) {
|
|
158
|
+
if (typeof summary !== "object" || summary === null) {
|
|
159
|
+
return null;
|
|
160
|
+
}
|
|
161
|
+
const typed = summary;
|
|
162
|
+
if (typeof typed.total === "number" && typed.total <= 0) {
|
|
163
|
+
return null;
|
|
164
|
+
}
|
|
165
|
+
return typeof typed.failed === "number" ? typed.failed : null;
|
|
166
|
+
}
|
|
154
167
|
function hasIncompleteTodosArray(value) {
|
|
155
168
|
if (!Array.isArray(value)) {
|
|
156
169
|
return null;
|
|
157
170
|
}
|
|
171
|
+
if (value.length === 0) {
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
158
174
|
return value.some((item) => {
|
|
159
175
|
if (typeof item !== "object" || item === null) {
|
|
160
176
|
return false;
|
|
@@ -163,6 +179,20 @@ function hasIncompleteTodosArray(value) {
|
|
|
163
179
|
return status === "pending" || status === "in_progress";
|
|
164
180
|
});
|
|
165
181
|
}
|
|
182
|
+
function hasFailedTodosArray(value) {
|
|
183
|
+
if (!Array.isArray(value)) {
|
|
184
|
+
return null;
|
|
185
|
+
}
|
|
186
|
+
if (value.length === 0) {
|
|
187
|
+
return null;
|
|
188
|
+
}
|
|
189
|
+
return value.some((item) => {
|
|
190
|
+
if (typeof item !== "object" || item === null) {
|
|
191
|
+
return false;
|
|
192
|
+
}
|
|
193
|
+
return item.status === "failed";
|
|
194
|
+
});
|
|
195
|
+
}
|
|
166
196
|
function getPlanStateCompleteness(value) {
|
|
167
197
|
if (typeof value !== "object" || value === null) {
|
|
168
198
|
return null;
|
|
@@ -202,6 +232,45 @@ function getPlanStateCompleteness(value) {
|
|
|
202
232
|
}
|
|
203
233
|
return null;
|
|
204
234
|
}
|
|
235
|
+
function getPlanStateFailure(value) {
|
|
236
|
+
if (typeof value !== "object" || value === null) {
|
|
237
|
+
return null;
|
|
238
|
+
}
|
|
239
|
+
const typed = value;
|
|
240
|
+
const summaryFailed = readSummaryFailureCount(typed.summary);
|
|
241
|
+
if (summaryFailed !== null) {
|
|
242
|
+
return summaryFailed > 0;
|
|
243
|
+
}
|
|
244
|
+
if (typeof typed.summary === "object" && typed.summary !== null) {
|
|
245
|
+
const nestedSummary = getPlanStateFailure(typed.summary);
|
|
246
|
+
if (nestedSummary !== null) {
|
|
247
|
+
return nestedSummary;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
const directTodos = hasFailedTodosArray(typed.todos);
|
|
251
|
+
if (directTodos !== null) {
|
|
252
|
+
return directTodos;
|
|
253
|
+
}
|
|
254
|
+
if (typeof typed.update === "object" && typed.update !== null) {
|
|
255
|
+
const nestedTodos = hasFailedTodosArray(typed.update.todos);
|
|
256
|
+
if (nestedTodos !== null) {
|
|
257
|
+
return nestedTodos;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
if (typeof typed.output === "object" && typed.output !== null) {
|
|
261
|
+
const nestedOutput = getPlanStateFailure(typed.output);
|
|
262
|
+
if (nestedOutput !== null) {
|
|
263
|
+
return nestedOutput;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
if (typeof typed.data === "object" && typed.data !== null) {
|
|
267
|
+
const nestedData = getPlanStateFailure(typed.data);
|
|
268
|
+
if (nestedData !== null) {
|
|
269
|
+
return nestedData;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
return null;
|
|
273
|
+
}
|
|
205
274
|
function parseMaybeJsonString(value) {
|
|
206
275
|
const trimmed = value.trim();
|
|
207
276
|
if (!trimmed || (!trimmed.startsWith("{") && !trimmed.startsWith("["))) {
|
|
@@ -394,12 +463,6 @@ function isPlanToolName(toolName) {
|
|
|
394
463
|
|| normalized === "call_write_todos"
|
|
395
464
|
|| normalized === "call_read_todos";
|
|
396
465
|
}
|
|
397
|
-
function isWriteTodosToolName(toolName) {
|
|
398
|
-
const normalized = normalizePlanToolName(toolName);
|
|
399
|
-
return normalized === "write_todos"
|
|
400
|
-
|| normalized === "tool_call_write_todos"
|
|
401
|
-
|| normalized === "call_write_todos";
|
|
402
|
-
}
|
|
403
466
|
function extractTodoToolStart(event) {
|
|
404
467
|
if (typeof event !== "object" || event === null) {
|
|
405
468
|
return null;
|
|
@@ -413,15 +476,6 @@ function extractTodoToolStart(event) {
|
|
|
413
476
|
return null;
|
|
414
477
|
}
|
|
415
478
|
const input = unwrapPossibleToolInput(typed.data?.input);
|
|
416
|
-
if (isWriteTodosToolName(toolName) && typeof input === "object" && input !== null && !Array.isArray(input)) {
|
|
417
|
-
const summary = summarizeBuiltinWriteTodosArgs(input);
|
|
418
|
-
if (summary.summary.total === 0) {
|
|
419
|
-
throw new Error("Error invoking tool 'write_todos' with kwargs {\"todos\":[]} with error: Error: Initial write_todos call cannot use an empty todo list. Send the concrete task steps with both content and status.");
|
|
420
|
-
}
|
|
421
|
-
if (summary.items.every((item) => isLowSignalTodoContent(item.content))) {
|
|
422
|
-
throw new Error("Error invoking tool 'write_todos' with placeholder todo content with error: Error: Initial write_todos call must use descriptive task content. Do not use placeholder entries like '1', '2', or 'step 1'.");
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
479
|
return { toolName, input };
|
|
426
480
|
}
|
|
427
481
|
export function projectRuntimeStreamEvent(params) {
|
|
@@ -445,6 +499,11 @@ export function projectRuntimeStreamEvent(params) {
|
|
|
445
499
|
state.sawPlanState = true;
|
|
446
500
|
state.hasIncompletePlanState = planStateCompleteness;
|
|
447
501
|
}
|
|
502
|
+
const planStateFailure = getPlanStateFailure(event);
|
|
503
|
+
if (planStateFailure !== null) {
|
|
504
|
+
state.sawPlanState = true;
|
|
505
|
+
state.hasFailedPlanState = planStateFailure;
|
|
506
|
+
}
|
|
448
507
|
const eventAgentId = typeof event === "object" && event !== null && typeof event.agentId === "string"
|
|
449
508
|
? event.agentId.trim()
|
|
450
509
|
: "";
|
|
@@ -2,7 +2,7 @@ import path from "node:path";
|
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { isSandboxBackend } from "deepagents";
|
|
4
4
|
import { isRecord } from "../../../utils/object.js";
|
|
5
|
-
import { formatBuiltinTodoSnapshot,
|
|
5
|
+
import { formatBuiltinTodoSnapshot, summarizeBuiltinWriteTodosArgs, truncateLines } from "../runtime-adapter-support.js";
|
|
6
6
|
import { maybePersistLargeToolOutput, resolveToolRuntimeContext } from "./tool-output-artifacts.js";
|
|
7
7
|
function buildTaskToolDescription(subagents) {
|
|
8
8
|
const lines = [
|
|
@@ -272,14 +272,6 @@ export async function createBuiltinMiddlewareTools(backend, options) {
|
|
|
272
272
|
invoke: async (input) => {
|
|
273
273
|
const args = isRecord(input) ? input : {};
|
|
274
274
|
const summary = summarizeBuiltinWriteTodosArgs(args);
|
|
275
|
-
if (summary.summary.total === 0 && todoSnapshot.summary.total === 0) {
|
|
276
|
-
throw new Error("Initial write_todos call cannot use an empty todo list. Send the concrete task steps with both content and status.");
|
|
277
|
-
}
|
|
278
|
-
if (summary.summary.total > 0 &&
|
|
279
|
-
todoSnapshot.summary.total === 0 &&
|
|
280
|
-
summary.items.every((item) => isLowSignalTodoContent(item.content))) {
|
|
281
|
-
throw new Error("Initial write_todos call must use descriptive task content. Do not use placeholder entries like '1', '2', or 'step 1'.");
|
|
282
|
-
}
|
|
283
275
|
todoSnapshot = summary;
|
|
284
276
|
return {
|
|
285
277
|
ok: true,
|