@botbotgo/agent-harness 0.0.444 → 0.0.446
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/resources/prompts/runtime/required-execution-continuation.md +8 -0
- package/dist/runtime/adapter/flow/invocation-flow.js +2 -2
- package/dist/runtime/adapter/local-tool-invocation.js +4 -4
- package/dist/runtime/agent-runtime-adapter.js +103 -256
- package/dist/runtime/parsing/output-recovery.js +2 -2
- package/dist/runtime/prompts/runtime-prompts.d.ts +1 -1
- package/dist/runtime/prompts/runtime-prompts.js +1 -1
- package/package.json +1 -1
- package/dist/resources/prompts/runtime/autonomous-investigation-recovery.md +0 -10
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export declare const AGENT_HARNESS_VERSION = "0.0.446";
|
|
2
2
|
export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
|
package/dist/package-version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export const AGENT_HARNESS_VERSION = "0.0.446";
|
|
2
2
|
export const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
The runtime has already observed planning state, tool state, or delegated execution state for this request. Continue from that state instead of restarting the task, broadening the scope, or asking the user to choose the next step.
|
|
2
|
+
|
|
3
|
+
Use the current todo board, prior tool results, and available configured tools to choose one terminally useful action:
|
|
4
|
+
- If a todo board is incomplete and existing evidence is enough, update the remaining todo items to completed, blocked, or failed as appropriate, then provide the final answer grounded in the observed evidence.
|
|
5
|
+
- If more evidence is required and an available non-planning tool can get it, call that concrete tool next. Do not call write_todos or read_todos again before the evidence tool returns.
|
|
6
|
+
- If no available tool can resolve the remaining work, update the todo board to blocked or failed and return a blocker report with the evidence already gathered.
|
|
7
|
+
|
|
8
|
+
Never print a tool-call JSON object, function call, or tool name as prose when you intend to use a tool. Actually call the tool. Ask a blocking clarification question only after the available runtime evidence and configured tools are genuinely insufficient to continue.
|
|
@@ -8,7 +8,7 @@ import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "../ups
|
|
|
8
8
|
import { appendToolRecoveryInstruction, extractVisibleOutput, tryParseJson } from "../../parsing/output-parsing.js";
|
|
9
9
|
import { salvageJsonToolCalls } from "../../parsing/output-tool-args.js";
|
|
10
10
|
import { isEmptyFinalAiMessageError } from "../resilience.js";
|
|
11
|
-
import {
|
|
11
|
+
import { EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION, WRITE_TODOS_REQUIRED_PLAN_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
|
|
12
12
|
const CLOSE_REQUIRED_PLAN_RECOVERY_INSTRUCTION = [
|
|
13
13
|
"The current required todo board still has unfinished work.",
|
|
14
14
|
"Do not broaden the investigation, restart planning, or ask the user what to do next.",
|
|
@@ -381,7 +381,7 @@ export async function executeRequestInvocation(options) {
|
|
|
381
381
|
? result.messages
|
|
382
382
|
: undefined;
|
|
383
383
|
const recoveryBase = messages ? { messages } : request;
|
|
384
|
-
const recoveredRequest = appendToolRecoveryInstruction(recoveryBase,
|
|
384
|
+
const recoveredRequest = appendToolRecoveryInstruction(recoveryBase, REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION);
|
|
385
385
|
const recoveredInvocation = await invokeOnce(recoveredRequest);
|
|
386
386
|
result = recoveredInvocation.result;
|
|
387
387
|
executedToolResults.splice(0, executedToolResults.length, ...recoveredInvocation.executedToolResults);
|
|
@@ -8,7 +8,7 @@ import { toolRequiresRuntimeApproval } from "./tool/tool-hitl.js";
|
|
|
8
8
|
import { validateToolGatewayInput } from "../harness/tool-gateway/index.js";
|
|
9
9
|
import { appendToolRecoveryInstruction, extractVisibleOutput, resolveMissingPlanRecoveryInstruction, resolveExecutionWithoutToolEvidenceTextInstruction, resolveToolCallRecoveryInstruction, sanitizeVisibleText, STRICT_TOOL_JSON_INSTRUCTION, } from "../parsing/output-parsing.js";
|
|
10
10
|
import { salvageJsonToolCalls } from "../parsing/output-tool-args.js";
|
|
11
|
-
import {
|
|
11
|
+
import { REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION } from "../prompts/runtime-prompts.js";
|
|
12
12
|
const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already available in this conversation. Answer the user's current request directly from the existing context and tool results. Do not ask the user to repeat inputs that are already present above.";
|
|
13
13
|
const DEFAULT_MAX_TOOL_ITERATIONS = 10_000;
|
|
14
14
|
const MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS = 2;
|
|
@@ -188,7 +188,7 @@ function latestToolErrorRecoveryInstruction(executedToolResults) {
|
|
|
188
188
|
return null;
|
|
189
189
|
}
|
|
190
190
|
const message = typeof latest.output === "string" ? latest.output : JSON.stringify(latest.output);
|
|
191
|
-
return resolveToolCallRecoveryInstruction(new Error(message)) ??
|
|
191
|
+
return resolveToolCallRecoveryInstruction(new Error(message)) ?? REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION;
|
|
192
192
|
}
|
|
193
193
|
function terminalToolErrorRecoveryInstruction(terminalText) {
|
|
194
194
|
return resolveToolCallRecoveryInstruction(new Error(terminalText));
|
|
@@ -386,7 +386,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
386
386
|
requiresPlan: requiresPlanEvidence(binding),
|
|
387
387
|
})
|
|
388
388
|
: shouldEnforceIncompletePlan
|
|
389
|
-
?
|
|
389
|
+
? REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION
|
|
390
390
|
: null);
|
|
391
391
|
if (requiresPlanEvidence(binding)
|
|
392
392
|
&& !hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
|
|
@@ -477,7 +477,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
477
477
|
executedToolResults,
|
|
478
478
|
});
|
|
479
479
|
}
|
|
480
|
-
activeRequest = appendToolRecoveryInstruction(activeRequest,
|
|
480
|
+
activeRequest = appendToolRecoveryInstruction(activeRequest, REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION);
|
|
481
481
|
pendingResult = undefined;
|
|
482
482
|
continue;
|
|
483
483
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { createAsyncSubAgentMiddleware, createFilesystemMiddleware, createMemoryMiddleware, createPatchToolCallsMiddleware, createSkillsMiddleware, createSummarizationMiddleware, createSubAgentMiddleware, FilesystemBackend, StateBackend, } from "deepagents";
|
|
4
|
-
import { createAgent,
|
|
4
|
+
import { createAgent, humanInTheLoopMiddleware, todoListMiddleware } from "langchain";
|
|
5
5
|
import { sanitizeVisibleText, tryParseJson, wrapResolvedModel, } from "./parsing/output-parsing.js";
|
|
6
6
|
import { salvageJsonToolCalls } from "./parsing/output-tool-args.js";
|
|
7
7
|
import { extractMessageText } from "../utils/message-content.js";
|
|
@@ -17,7 +17,6 @@ import { extractSubagentRequestText, invokeBuiltinTaskTool as invokeBuiltinTaskT
|
|
|
17
17
|
import { isEmptyFinalAiMessageError, resolveBindingTimeout, resolveStreamIdleTimeout, } from "./adapter/resilience.js";
|
|
18
18
|
import { createResolvedModel } from "./adapter/model/model-providers.js";
|
|
19
19
|
import { appendProviderToolCallAliasTools, resolveAdapterTools } from "./adapter/tool-resolution.js";
|
|
20
|
-
import { normalizeToolArgsForSchema } from "./adapter/tool/tool-arguments.js";
|
|
21
20
|
import { resolveRuntimeStreamExecutionContext, } from "./adapter/flow/execution-context.js";
|
|
22
21
|
import { isRetryableProviderError } from "./adapter/resilience.js";
|
|
23
22
|
import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "./adapter/upstream-configurable-keys.js";
|
|
@@ -39,6 +38,61 @@ function hasDelegatedExecutionToolEvidence(result) {
|
|
|
39
38
|
return executedToolResults.some((toolResult) => (toolResult.isError !== true
|
|
40
39
|
&& !isPlanToolName(toolResult.toolName)));
|
|
41
40
|
}
|
|
41
|
+
function normalizeEvidenceToolName(toolName) {
|
|
42
|
+
return typeof toolName === "string" ? toolName.trim().toLowerCase() : "";
|
|
43
|
+
}
|
|
44
|
+
function collectSuccessfulDelegatedExecutionToolNames(result) {
|
|
45
|
+
const executedToolResults = Array.isArray(result.metadata?.executedToolResults)
|
|
46
|
+
? result.metadata.executedToolResults
|
|
47
|
+
: [];
|
|
48
|
+
return new Set(executedToolResults
|
|
49
|
+
.filter((toolResult) => toolResult.isError !== true && !isPlanToolName(toolResult.toolName))
|
|
50
|
+
.map((toolResult) => normalizeEvidenceToolName(toolResult.toolName))
|
|
51
|
+
.filter((toolName) => toolName.length > 0));
|
|
52
|
+
}
|
|
53
|
+
function textExplicitlyNamesConfiguredTool(text, toolName) {
|
|
54
|
+
const name = toolName.trim();
|
|
55
|
+
if (!name) {
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
const pattern = new RegExp(`(?:^|[^\\p{L}\\p{N}_-])${escapeRegExp(name)}(?:$|[^\\p{L}\\p{N}_-])`, "iu");
|
|
59
|
+
return pattern.test(text);
|
|
60
|
+
}
|
|
61
|
+
function resolveExplicitRequestedExecutionToolNames(binding, requestText) {
|
|
62
|
+
const text = requestText.trim();
|
|
63
|
+
if (!text) {
|
|
64
|
+
return [];
|
|
65
|
+
}
|
|
66
|
+
return getBindingPrimaryTools(binding)
|
|
67
|
+
.map((tool) => tool.name)
|
|
68
|
+
.filter((toolName) => typeof toolName === "string" && toolName.trim().length > 0)
|
|
69
|
+
.filter((toolName) => !isPlanToolName(toolName))
|
|
70
|
+
.filter((toolName) => textExplicitlyNamesConfiguredTool(text, toolName));
|
|
71
|
+
}
|
|
72
|
+
function listMissingDelegatedExecutionToolEvidence(result, requiredToolNames = []) {
|
|
73
|
+
if (requiredToolNames.length === 0) {
|
|
74
|
+
return hasDelegatedExecutionToolEvidence(result) ? [] : ["configured non-planning tools"];
|
|
75
|
+
}
|
|
76
|
+
const observed = collectSuccessfulDelegatedExecutionToolNames(result);
|
|
77
|
+
return requiredToolNames.filter((toolName) => !observed.has(normalizeEvidenceToolName(toolName)));
|
|
78
|
+
}
|
|
79
|
+
function hasRequiredDelegatedExecutionToolEvidence(result, requiredToolNames = []) {
|
|
80
|
+
return hasDelegatedExecutionToolEvidence(result)
|
|
81
|
+
&& listMissingDelegatedExecutionToolEvidence(result, requiredToolNames).length === 0;
|
|
82
|
+
}
|
|
83
|
+
function buildExplicitExecutionToolRetryInstruction(missingToolNames) {
|
|
84
|
+
const tools = missingToolNames
|
|
85
|
+
.filter((toolName) => toolName !== "configured non-planning tools")
|
|
86
|
+
.join(", ");
|
|
87
|
+
if (!tools) {
|
|
88
|
+
return "";
|
|
89
|
+
}
|
|
90
|
+
return [
|
|
91
|
+
`The request explicitly named configured evidence tool(s): ${tools}.`,
|
|
92
|
+
"Before the final answer, call every listed non-planning tool that has not already produced a successful tool result.",
|
|
93
|
+
"Do not substitute a different evidence tool for an explicitly named configured tool unless that tool invocation itself fails and the blocker is reported.",
|
|
94
|
+
].join("\n");
|
|
95
|
+
}
|
|
42
96
|
function buildDelegatedPlanEvidenceBlocker(agentId) {
|
|
43
97
|
return JSON.stringify({
|
|
44
98
|
status: "blocked",
|
|
@@ -97,245 +151,6 @@ function readConfiguredToolName(value) {
|
|
|
97
151
|
const typed = value;
|
|
98
152
|
return typeof typed.name === "string" ? typed.name.trim() : "";
|
|
99
153
|
}
|
|
100
|
-
function readMessageContentText(message) {
|
|
101
|
-
if (typeof message !== "object" || message === null) {
|
|
102
|
-
return "";
|
|
103
|
-
}
|
|
104
|
-
const content = message.content;
|
|
105
|
-
if (typeof content === "string") {
|
|
106
|
-
return content.trim();
|
|
107
|
-
}
|
|
108
|
-
if (!Array.isArray(content)) {
|
|
109
|
-
return "";
|
|
110
|
-
}
|
|
111
|
-
return content
|
|
112
|
-
.map((part) => typeof part === "object" && part !== null && typeof part.text === "string"
|
|
113
|
-
? part.text
|
|
114
|
-
: "")
|
|
115
|
-
.join("")
|
|
116
|
-
.trim();
|
|
117
|
-
}
|
|
118
|
-
function hasExternalPlanEvidenceInstruction(messages) {
|
|
119
|
-
return messages.some((message) => {
|
|
120
|
-
const text = readMessageContentText(message);
|
|
121
|
-
return /todo board already exists|required todo board already exists|non[-\s]?planning tool call/iu.test(text);
|
|
122
|
-
});
|
|
123
|
-
}
|
|
124
|
-
function parseToolCallArgs(value) {
|
|
125
|
-
if (typeof value === "object" && value !== null && !Array.isArray(value)) {
|
|
126
|
-
return value;
|
|
127
|
-
}
|
|
128
|
-
if (typeof value !== "string" || value.trim().length === 0) {
|
|
129
|
-
return {};
|
|
130
|
-
}
|
|
131
|
-
try {
|
|
132
|
-
const parsed = JSON.parse(value);
|
|
133
|
-
return typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)
|
|
134
|
-
? parsed
|
|
135
|
-
: {};
|
|
136
|
-
}
|
|
137
|
-
catch {
|
|
138
|
-
return {};
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
function readMessageToolCalls(message) {
|
|
142
|
-
if (typeof message !== "object" || message === null) {
|
|
143
|
-
return [];
|
|
144
|
-
}
|
|
145
|
-
const typed = message;
|
|
146
|
-
const raw = Array.isArray(typed.tool_calls) ? typed.tool_calls
|
|
147
|
-
: Array.isArray(typed.kwargs?.tool_calls) ? typed.kwargs.tool_calls
|
|
148
|
-
: Array.isArray(typed.additional_kwargs?.tool_calls) ? typed.additional_kwargs.tool_calls
|
|
149
|
-
: Array.isArray(typed.kwargs?.additional_kwargs?.tool_calls) ? typed.kwargs.additional_kwargs.tool_calls
|
|
150
|
-
: Array.isArray(typed.lc_kwargs?.tool_calls) ? typed.lc_kwargs.tool_calls
|
|
151
|
-
: Array.isArray(typed.lc_kwargs?.additional_kwargs?.tool_calls) ? typed.lc_kwargs.additional_kwargs.tool_calls
|
|
152
|
-
: [];
|
|
153
|
-
return raw
|
|
154
|
-
.map((toolCall) => {
|
|
155
|
-
if (typeof toolCall !== "object" || toolCall === null) {
|
|
156
|
-
return null;
|
|
157
|
-
}
|
|
158
|
-
const call = toolCall;
|
|
159
|
-
const name = typeof call.name === "string"
|
|
160
|
-
? call.name
|
|
161
|
-
: typeof call.function?.name === "string"
|
|
162
|
-
? call.function.name
|
|
163
|
-
: undefined;
|
|
164
|
-
const args = parseToolCallArgs(call.args ?? call.function?.arguments);
|
|
165
|
-
return {
|
|
166
|
-
...(typeof call.id === "string" ? { id: call.id } : {}),
|
|
167
|
-
...(name ? { name } : {}),
|
|
168
|
-
args,
|
|
169
|
-
};
|
|
170
|
-
})
|
|
171
|
-
.filter((toolCall) => toolCall !== null);
|
|
172
|
-
}
|
|
173
|
-
function todoToolCallIsTerminal(toolCall) {
|
|
174
|
-
const todos = toolCall.args?.todos;
|
|
175
|
-
if (!Array.isArray(todos) || todos.length === 0) {
|
|
176
|
-
return false;
|
|
177
|
-
}
|
|
178
|
-
return todos.every((todo) => {
|
|
179
|
-
if (typeof todo !== "object" || todo === null || typeof todo.status !== "string") {
|
|
180
|
-
return false;
|
|
181
|
-
}
|
|
182
|
-
const status = todo.status.trim().toLowerCase();
|
|
183
|
-
return status !== "pending" && status !== "in_progress";
|
|
184
|
-
});
|
|
185
|
-
}
|
|
186
|
-
function readToolResultName(message) {
|
|
187
|
-
if (typeof message !== "object" || message === null) {
|
|
188
|
-
return "";
|
|
189
|
-
}
|
|
190
|
-
const typed = message;
|
|
191
|
-
const messageType = typeof typed.type === "string"
|
|
192
|
-
? typed.type
|
|
193
|
-
: typeof typed._getType === "function"
|
|
194
|
-
? String(typed._getType())
|
|
195
|
-
: "";
|
|
196
|
-
if (messageType !== "tool" && typeof typed.tool_call_id !== "string") {
|
|
197
|
-
return "";
|
|
198
|
-
}
|
|
199
|
-
return typeof typed.name === "string" ? typed.name : "";
|
|
200
|
-
}
|
|
201
|
-
function createPlanGuardToolError(toolCallId, content, fallbackPrefix) {
|
|
202
|
-
return new ToolMessage({
|
|
203
|
-
content,
|
|
204
|
-
tool_call_id: typeof toolCallId === "string" ? toolCallId : `${fallbackPrefix}-${Math.random().toString(36).slice(2, 10)}`,
|
|
205
|
-
status: "error",
|
|
206
|
-
});
|
|
207
|
-
}
|
|
208
|
-
function isPromiseLike(value) {
|
|
209
|
-
return typeof value === "object" && value !== null && typeof value.then === "function";
|
|
210
|
-
}
|
|
211
|
-
function renderToolInvocationError(error) {
|
|
212
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
213
|
-
const cause = typeof error === "object" && error !== null && "cause" in error
|
|
214
|
-
? error.cause
|
|
215
|
-
: undefined;
|
|
216
|
-
const causeMessage = cause instanceof Error ? cause.message : "";
|
|
217
|
-
return [message, causeMessage]
|
|
218
|
-
.map((value) => value.trim())
|
|
219
|
-
.filter(Boolean)
|
|
220
|
-
.join("\n");
|
|
221
|
-
}
|
|
222
|
-
function createToolInvocationErrorMessage(toolCallId, toolName, error) {
|
|
223
|
-
const rendered = renderToolInvocationError(error);
|
|
224
|
-
return createPlanGuardToolError(toolCallId, `Error invoking tool '${toolName}' with error: ${rendered || "tool invocation failed"}`, "tool-invocation-error");
|
|
225
|
-
}
|
|
226
|
-
function createTodoPlanGuardMiddleware(options = {}) {
|
|
227
|
-
let observedPlanToolResult = false;
|
|
228
|
-
let observedNonPlanToolResult = false;
|
|
229
|
-
return createMiddleware({
|
|
230
|
-
name: "harnessTodoPlanGuard",
|
|
231
|
-
wrapToolCall: ((request, handler) => {
|
|
232
|
-
const toolName = typeof request.toolCall?.name === "string"
|
|
233
|
-
? request.toolCall.name
|
|
234
|
-
: typeof request.tool?.name === "string"
|
|
235
|
-
? request.tool.name
|
|
236
|
-
: "";
|
|
237
|
-
const messages = Array.isArray(request.state?.messages) ? request.state.messages : [];
|
|
238
|
-
const toolResultNames = messages.map(readToolResultName).filter((name) => name.length > 0);
|
|
239
|
-
const hasPlanToolResult = toolResultNames.some(isPlanToolName);
|
|
240
|
-
const hasNonPlanToolResult = toolResultNames.some((resultToolName) => !isPlanToolName(resultToolName));
|
|
241
|
-
const hasExternalPlanEvidence = hasExternalPlanEvidenceInstruction(messages);
|
|
242
|
-
if (options.requiresPlan === true
|
|
243
|
-
&& !(observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
|
|
244
|
-
&& toolName.length > 0
|
|
245
|
-
&& !isPlanToolName(toolName)) {
|
|
246
|
-
return createPlanGuardToolError(request.toolCall?.id, "Error: this agent has a required visible planning contract. Your first action must be write_todos with concrete task steps and statuses. Do not call non-planning tools before the initial todo board exists.", "todo-initial-plan-guard");
|
|
247
|
-
}
|
|
248
|
-
if (options.requiresPlan === true
|
|
249
|
-
&& (observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
|
|
250
|
-
&& !(observedNonPlanToolResult || hasNonPlanToolResult)
|
|
251
|
-
&& isPlanToolName(toolName)) {
|
|
252
|
-
return createPlanGuardToolError(request.toolCall?.id, "Error: the required todo board already exists, but no non-planning evidence tool has returned yet. Do not call write_todos or read_todos again. Your next action must be exactly one non-planning tool call selected from the available tool descriptions and schemas.", "todo-plan-evidence-guard");
|
|
253
|
-
}
|
|
254
|
-
if (options.requiresPlan === true
|
|
255
|
-
&& !(observedNonPlanToolResult || hasNonPlanToolResult)
|
|
256
|
-
&& isPlanToolName(toolName)
|
|
257
|
-
&& normalizePlanToolName(toolName).includes("write_todos")
|
|
258
|
-
&& todoToolCallIsTerminal({ args: parseToolCallArgs(request.toolCall?.args) })) {
|
|
259
|
-
return createPlanGuardToolError(request.toolCall?.id, "Error: write_todos cannot mark every todo as terminal before any non-planning evidence tool returns. Keep one todo in_progress and the remaining todos pending until evidence tools return.", "write-todos-tool-guard");
|
|
260
|
-
}
|
|
261
|
-
const markObservedToolResult = () => {
|
|
262
|
-
if (isPlanToolName(toolName)) {
|
|
263
|
-
observedPlanToolResult = true;
|
|
264
|
-
}
|
|
265
|
-
else if (toolName.length > 0) {
|
|
266
|
-
observedNonPlanToolResult = true;
|
|
267
|
-
}
|
|
268
|
-
};
|
|
269
|
-
const normalizedArgs = normalizeToolArgsForSchema(parseToolCallArgs(request.toolCall?.args), request.tool?.schema, request.toolCall?.args);
|
|
270
|
-
const normalizedRequest = {
|
|
271
|
-
...request,
|
|
272
|
-
toolCall: request.toolCall
|
|
273
|
-
? {
|
|
274
|
-
...request.toolCall,
|
|
275
|
-
args: normalizedArgs,
|
|
276
|
-
}
|
|
277
|
-
: request.toolCall,
|
|
278
|
-
};
|
|
279
|
-
try {
|
|
280
|
-
const result = handler(normalizedRequest);
|
|
281
|
-
if (isPromiseLike(result)) {
|
|
282
|
-
return result
|
|
283
|
-
.then((value) => {
|
|
284
|
-
markObservedToolResult();
|
|
285
|
-
return value;
|
|
286
|
-
})
|
|
287
|
-
.catch((error) => createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error));
|
|
288
|
-
}
|
|
289
|
-
markObservedToolResult();
|
|
290
|
-
return result;
|
|
291
|
-
}
|
|
292
|
-
catch (error) {
|
|
293
|
-
return createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error);
|
|
294
|
-
}
|
|
295
|
-
}),
|
|
296
|
-
afterModel: (state) => {
|
|
297
|
-
if (!Array.isArray(state.messages) || state.messages.length === 0) {
|
|
298
|
-
return;
|
|
299
|
-
}
|
|
300
|
-
const hasNonPlanToolResult = state.messages.some((message) => {
|
|
301
|
-
if (typeof message !== "object" || message === null) {
|
|
302
|
-
return false;
|
|
303
|
-
}
|
|
304
|
-
const typed = message;
|
|
305
|
-
const messageType = typeof typed.type === "string"
|
|
306
|
-
? typed.type
|
|
307
|
-
: typeof typed._getType === "function"
|
|
308
|
-
? String(typed._getType())
|
|
309
|
-
: "";
|
|
310
|
-
if (messageType !== "tool" && typeof typed.tool_call_id !== "string") {
|
|
311
|
-
return false;
|
|
312
|
-
}
|
|
313
|
-
const toolName = typeof typed.name === "string" ? typed.name : "";
|
|
314
|
-
return toolName.length > 0 && !isPlanToolName(toolName);
|
|
315
|
-
});
|
|
316
|
-
if (hasNonPlanToolResult) {
|
|
317
|
-
return;
|
|
318
|
-
}
|
|
319
|
-
const lastAiMessage = [...state.messages].reverse().find((message) => readMessageToolCalls(message).length > 0);
|
|
320
|
-
const lastToolCalls = readMessageToolCalls(lastAiMessage);
|
|
321
|
-
if (!lastAiMessage) {
|
|
322
|
-
return;
|
|
323
|
-
}
|
|
324
|
-
const writeTodosCalls = lastToolCalls.filter((toolCall) => isPlanToolName(toolCall.name));
|
|
325
|
-
const prematureCompletedCalls = writeTodosCalls.filter(todoToolCallIsTerminal);
|
|
326
|
-
if (prematureCompletedCalls.length === 0) {
|
|
327
|
-
return;
|
|
328
|
-
}
|
|
329
|
-
return {
|
|
330
|
-
messages: prematureCompletedCalls.map((toolCall, index) => new ToolMessage({
|
|
331
|
-
content: "Error: write_todos cannot mark every todo as terminal before any non-planning evidence tool returns. Keep one todo in_progress and the remaining todos pending until evidence tools return.",
|
|
332
|
-
tool_call_id: toolCall.id ?? `write-todos-plan-guard-${index}`,
|
|
333
|
-
status: "error",
|
|
334
|
-
})),
|
|
335
|
-
};
|
|
336
|
-
},
|
|
337
|
-
});
|
|
338
|
-
}
|
|
339
154
|
function shouldUseConfigurableDeepAgentAssembly(binding) {
|
|
340
155
|
return getBindingExecutionKind(binding) === "deepagent";
|
|
341
156
|
}
|
|
@@ -933,9 +748,15 @@ export class AgentRuntimeAdapter {
|
|
|
933
748
|
const runDelegatedRequest = (text, requestSuffix = "") => this.invoke(targetBinding, text, childSessionId, `${childRequestId}${requestSuffix}`, undefined, [], invokeOptions);
|
|
934
749
|
let result = await runDelegatedRequest(requestText);
|
|
935
750
|
const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(targetBinding).length > 0;
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
751
|
+
const requiredExecutionToolNames = resolveExplicitRequestedExecutionToolNames(targetBinding, requestText);
|
|
752
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(result, requiredExecutionToolNames)) {
|
|
753
|
+
const missingToolNames = listMissingDelegatedExecutionToolEvidence(result, requiredExecutionToolNames);
|
|
754
|
+
result = await runDelegatedRequest([
|
|
755
|
+
requestText,
|
|
756
|
+
EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
|
|
757
|
+
buildExplicitExecutionToolRetryInstruction(missingToolNames),
|
|
758
|
+
].filter(Boolean).join("\n\n"), ":tool-evidence-retry");
|
|
759
|
+
if (!hasRequiredDelegatedExecutionToolEvidence(result, requiredExecutionToolNames)) {
|
|
939
760
|
throw new DelegatedExecutionNoToolEvidenceError(targetBinding.agent.id);
|
|
940
761
|
}
|
|
941
762
|
}
|
|
@@ -1446,6 +1267,7 @@ export class AgentRuntimeAdapter {
|
|
|
1446
1267
|
}
|
|
1447
1268
|
}
|
|
1448
1269
|
const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(selectedBinding).length > 0;
|
|
1270
|
+
const requiredExecutionToolNames = resolveExplicitRequestedExecutionToolNames(selectedBinding, requestText);
|
|
1449
1271
|
if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
|
|
1450
1272
|
&& !hasDelegatedPlanEvidence(delegatedResult)) {
|
|
1451
1273
|
try {
|
|
@@ -1465,9 +1287,14 @@ export class AgentRuntimeAdapter {
|
|
|
1465
1287
|
};
|
|
1466
1288
|
}
|
|
1467
1289
|
}
|
|
1468
|
-
if (targetRequiresExecutionToolEvidence && !
|
|
1290
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
|
|
1291
|
+
const missingToolNames = listMissingDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames);
|
|
1469
1292
|
try {
|
|
1470
|
-
delegatedResult = await runDelegatedRequest([
|
|
1293
|
+
delegatedResult = await runDelegatedRequest([
|
|
1294
|
+
requestText,
|
|
1295
|
+
EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
|
|
1296
|
+
buildExplicitExecutionToolRetryInstruction(missingToolNames),
|
|
1297
|
+
].filter(Boolean).join("\n\n"), ":tool-evidence-retry", selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
|
|
1471
1298
|
? {
|
|
1472
1299
|
suppressInitialRequiredPlanInstruction: true,
|
|
1473
1300
|
externalPlanEvidence: hasDelegatedPlanEvidence(delegatedResult),
|
|
@@ -1502,8 +1329,10 @@ export class AgentRuntimeAdapter {
|
|
|
1502
1329
|
},
|
|
1503
1330
|
};
|
|
1504
1331
|
}
|
|
1505
|
-
if (targetRequiresExecutionToolEvidence && !
|
|
1506
|
-
const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id,
|
|
1332
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
|
|
1333
|
+
const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, requiredExecutionToolNames.length > 0
|
|
1334
|
+
? requiredExecutionToolNames
|
|
1335
|
+
: getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
|
|
1507
1336
|
return {
|
|
1508
1337
|
toolOutput: output,
|
|
1509
1338
|
delegatedSubagentType: subagentType,
|
|
@@ -1898,9 +1727,17 @@ export class AgentRuntimeAdapter {
|
|
|
1898
1727
|
delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [delegatedText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry"), previousDelegatedResult);
|
|
1899
1728
|
}
|
|
1900
1729
|
const targetRequiresExecutionToolEvidence = selectedBinding ? getBindingPrimaryTools(selectedBinding).length > 0 : false;
|
|
1901
|
-
|
|
1730
|
+
const requiredExecutionToolNames = selectedBinding
|
|
1731
|
+
? resolveExplicitRequestedExecutionToolNames(selectedBinding, planned.description)
|
|
1732
|
+
: [];
|
|
1733
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
|
|
1734
|
+
const missingToolNames = listMissingDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames);
|
|
1902
1735
|
const previousDelegatedResult = delegatedResult;
|
|
1903
|
-
delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [
|
|
1736
|
+
delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [
|
|
1737
|
+
delegatedText,
|
|
1738
|
+
EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
|
|
1739
|
+
buildExplicitExecutionToolRetryInstruction(missingToolNames),
|
|
1740
|
+
].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
|
|
1904
1741
|
}
|
|
1905
1742
|
if (selectedBinding?.harnessRuntime.executionContract?.requiresPlan === true && !hasDelegatedPlanEvidence(delegatedResult)) {
|
|
1906
1743
|
const output = buildDelegatedPlanEvidenceBlocker(selectedBinding.agent.id);
|
|
@@ -1911,8 +1748,10 @@ export class AgentRuntimeAdapter {
|
|
|
1911
1748
|
finalMessageText: output,
|
|
1912
1749
|
};
|
|
1913
1750
|
}
|
|
1914
|
-
if (targetRequiresExecutionToolEvidence && !
|
|
1915
|
-
const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id,
|
|
1751
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
|
|
1752
|
+
const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, requiredExecutionToolNames.length > 0
|
|
1753
|
+
? requiredExecutionToolNames
|
|
1754
|
+
: getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
|
|
1916
1755
|
delegatedResult = {
|
|
1917
1756
|
...delegatedResult,
|
|
1918
1757
|
state: "failed",
|
|
@@ -2105,14 +1944,20 @@ export class AgentRuntimeAdapter {
|
|
|
2105
1944
|
});
|
|
2106
1945
|
let delegatedResult = yield* runDelegatedStreamAttempt(delegatedText);
|
|
2107
1946
|
const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(selectedBinding).length > 0;
|
|
1947
|
+
const requiredExecutionToolNames = resolveExplicitRequestedExecutionToolNames(selectedBinding, requestText);
|
|
2108
1948
|
if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
|
|
2109
1949
|
&& !hasDelegatedPlanEvidence(delegatedResult)) {
|
|
2110
1950
|
const previousDelegatedResult = delegatedResult;
|
|
2111
1951
|
delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([delegatedText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry"), previousDelegatedResult);
|
|
2112
1952
|
}
|
|
2113
|
-
if (targetRequiresExecutionToolEvidence && !
|
|
1953
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
|
|
1954
|
+
const missingToolNames = listMissingDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames);
|
|
2114
1955
|
const previousDelegatedResult = delegatedResult;
|
|
2115
|
-
delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([
|
|
1956
|
+
delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([
|
|
1957
|
+
delegatedText,
|
|
1958
|
+
EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
|
|
1959
|
+
buildExplicitExecutionToolRetryInstruction(missingToolNames),
|
|
1960
|
+
].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
|
|
2116
1961
|
}
|
|
2117
1962
|
if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
|
|
2118
1963
|
&& !hasDelegatedPlanEvidence(delegatedResult)) {
|
|
@@ -2124,8 +1969,10 @@ export class AgentRuntimeAdapter {
|
|
|
2124
1969
|
finalMessageText: output,
|
|
2125
1970
|
};
|
|
2126
1971
|
}
|
|
2127
|
-
if (targetRequiresExecutionToolEvidence && !
|
|
2128
|
-
const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id,
|
|
1972
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
|
|
1973
|
+
const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, requiredExecutionToolNames.length > 0
|
|
1974
|
+
? requiredExecutionToolNames
|
|
1975
|
+
: getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
|
|
2129
1976
|
delegatedResult = {
|
|
2130
1977
|
...delegatedResult,
|
|
2131
1978
|
state: "failed",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, INTERNAL_RUNTIME_SPILL_PATH_INSTRUCTION, REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION, STRICT_TOOL_JSON_INSTRUCTION, WORKSPACE_RELATIVE_PATH_INSTRUCTION, WRITE_TODOS_DESCRIPTIVE_CONTENT_INSTRUCTION, WRITE_TODOS_FULL_ENTRY_INSTRUCTION, WRITE_TODOS_NON_EMPTY_INITIAL_LIST_INSTRUCTION, WRITE_TODOS_REQUIRED_PLAN_INSTRUCTION, } from "../prompts/runtime-prompts.js";
|
|
2
2
|
import { wrapNormalizedMessage, readTextContent } from "./output-content.js";
|
|
3
3
|
import { salvageJsonToolCalls } from "./output-tool-args.js";
|
|
4
4
|
function collectRequestMessages(request) {
|
|
@@ -153,7 +153,7 @@ export function resolveExecutionWithoutToolEvidenceTextInstruction(request, assi
|
|
|
153
153
|
return null;
|
|
154
154
|
}
|
|
155
155
|
return hasExecutionEvidence
|
|
156
|
-
?
|
|
156
|
+
? REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION
|
|
157
157
|
: EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION;
|
|
158
158
|
}
|
|
159
159
|
export function resolveToolCallRecoveryInstruction(error) {
|
|
@@ -10,7 +10,7 @@ export declare const WRITE_TODOS_DESCRIPTIVE_CONTENT_INSTRUCTION: string;
|
|
|
10
10
|
export declare const WRITE_TODOS_REQUIRED_PLAN_INSTRUCTION: string;
|
|
11
11
|
export declare const EXECUTION_WITH_TOOL_EVIDENCE_INSTRUCTION: string;
|
|
12
12
|
export declare const EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION: string;
|
|
13
|
-
export declare const
|
|
13
|
+
export declare const REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION: string;
|
|
14
14
|
export declare const DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION: string;
|
|
15
15
|
export declare const DELEGATION_ONLY_RECOVERY_INSTRUCTION: string;
|
|
16
16
|
export declare const INTERNAL_RUNTIME_SPILL_PATH_INSTRUCTION: string;
|
|
@@ -13,7 +13,7 @@ export const WRITE_TODOS_DESCRIPTIVE_CONTENT_INSTRUCTION = readRuntimePrompt("wr
|
|
|
13
13
|
export const WRITE_TODOS_REQUIRED_PLAN_INSTRUCTION = readRuntimePrompt("write-todos-required-plan");
|
|
14
14
|
export const EXECUTION_WITH_TOOL_EVIDENCE_INSTRUCTION = readRuntimePrompt("execution-with-tool-evidence");
|
|
15
15
|
export const EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION = readRuntimePrompt("execution-with-tool-evidence-retry");
|
|
16
|
-
export const
|
|
16
|
+
export const REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION = readRuntimePrompt("required-execution-continuation");
|
|
17
17
|
export const DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION = readRuntimePrompt("delegated-task-failure-recovery");
|
|
18
18
|
export const DELEGATION_ONLY_RECOVERY_INSTRUCTION = readRuntimePrompt("delegation-only-recovery");
|
|
19
19
|
export const INTERNAL_RUNTIME_SPILL_PATH_INSTRUCTION = readRuntimePrompt("internal-runtime-spill-path");
|
package/package.json
CHANGED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
Do not stop at a plan or ask the user to choose the next obvious diagnostic step when the request is for deep investigation, root-cause analysis, or step-by-step execution. Do not ask for more background, scope, logs, or environment details until you have first exhausted the context and tools already available in this runtime.
|
|
2
|
-
|
|
3
|
-
If no concrete execution has happened yet, start from the current workspace, shell, and attached runtime/tool context by default, then continue the investigation yourself with the next concrete tool call. If the user explicitly asked for a plan, or if the task is clearly non-trivial and multi-step, call write_todos first with concrete investigation/execution steps before any other tool call or final answer.
|
|
4
|
-
|
|
5
|
-
If a todo board already exists, do not restart planning and do not repeat the same clarification request. Use the current todo board and prior tool results to choose one of these terminally useful actions:
|
|
6
|
-
- If more evidence is genuinely needed and an available tool can get it, make the next concrete tool call.
|
|
7
|
-
- If the existing evidence is enough to answer, update the todo board to completed or blocked as appropriate, then provide the final answer grounded in the tool results.
|
|
8
|
-
- If the available tools cannot resolve the remaining work, update the todo board to blocked or failed and provide a blocker report with the evidence.
|
|
9
|
-
|
|
10
|
-
Never print a tool-call JSON object, function call, or tool name as prose when you intend to use a tool. Actually call the tool. Ask a blocking clarification question only after the available evidence is genuinely insufficient to continue.
|