@botbotgo/agent-harness 0.0.443 → 0.0.445
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export declare const AGENT_HARNESS_VERSION = "0.0.445";
|
|
2
2
|
export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
|
package/dist/package-version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export const AGENT_HARNESS_VERSION = "0.0.445";
|
|
2
2
|
export const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { createAsyncSubAgentMiddleware, createFilesystemMiddleware, createMemoryMiddleware, createPatchToolCallsMiddleware, createSkillsMiddleware, createSummarizationMiddleware, createSubAgentMiddleware, FilesystemBackend, StateBackend, } from "deepagents";
|
|
4
|
-
import { createAgent,
|
|
4
|
+
import { createAgent, humanInTheLoopMiddleware, todoListMiddleware } from "langchain";
|
|
5
5
|
import { sanitizeVisibleText, tryParseJson, wrapResolvedModel, } from "./parsing/output-parsing.js";
|
|
6
6
|
import { salvageJsonToolCalls } from "./parsing/output-tool-args.js";
|
|
7
7
|
import { extractMessageText } from "../utils/message-content.js";
|
|
@@ -17,7 +17,6 @@ import { extractSubagentRequestText, invokeBuiltinTaskTool as invokeBuiltinTaskT
|
|
|
17
17
|
import { isEmptyFinalAiMessageError, resolveBindingTimeout, resolveStreamIdleTimeout, } from "./adapter/resilience.js";
|
|
18
18
|
import { createResolvedModel } from "./adapter/model/model-providers.js";
|
|
19
19
|
import { appendProviderToolCallAliasTools, resolveAdapterTools } from "./adapter/tool-resolution.js";
|
|
20
|
-
import { normalizeToolArgsForSchema } from "./adapter/tool/tool-arguments.js";
|
|
21
20
|
import { resolveRuntimeStreamExecutionContext, } from "./adapter/flow/execution-context.js";
|
|
22
21
|
import { isRetryableProviderError } from "./adapter/resilience.js";
|
|
23
22
|
import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "./adapter/upstream-configurable-keys.js";
|
|
@@ -39,6 +38,61 @@ function hasDelegatedExecutionToolEvidence(result) {
|
|
|
39
38
|
return executedToolResults.some((toolResult) => (toolResult.isError !== true
|
|
40
39
|
&& !isPlanToolName(toolResult.toolName)));
|
|
41
40
|
}
|
|
41
|
+
function normalizeEvidenceToolName(toolName) {
|
|
42
|
+
return typeof toolName === "string" ? toolName.trim().toLowerCase() : "";
|
|
43
|
+
}
|
|
44
|
+
function collectSuccessfulDelegatedExecutionToolNames(result) {
|
|
45
|
+
const executedToolResults = Array.isArray(result.metadata?.executedToolResults)
|
|
46
|
+
? result.metadata.executedToolResults
|
|
47
|
+
: [];
|
|
48
|
+
return new Set(executedToolResults
|
|
49
|
+
.filter((toolResult) => toolResult.isError !== true && !isPlanToolName(toolResult.toolName))
|
|
50
|
+
.map((toolResult) => normalizeEvidenceToolName(toolResult.toolName))
|
|
51
|
+
.filter((toolName) => toolName.length > 0));
|
|
52
|
+
}
|
|
53
|
+
function textExplicitlyNamesConfiguredTool(text, toolName) {
|
|
54
|
+
const name = toolName.trim();
|
|
55
|
+
if (!name) {
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
const pattern = new RegExp(`(?:^|[^\\p{L}\\p{N}_-])${escapeRegExp(name)}(?:$|[^\\p{L}\\p{N}_-])`, "iu");
|
|
59
|
+
return pattern.test(text);
|
|
60
|
+
}
|
|
61
|
+
function resolveExplicitRequestedExecutionToolNames(binding, requestText) {
|
|
62
|
+
const text = requestText.trim();
|
|
63
|
+
if (!text) {
|
|
64
|
+
return [];
|
|
65
|
+
}
|
|
66
|
+
return getBindingPrimaryTools(binding)
|
|
67
|
+
.map((tool) => tool.name)
|
|
68
|
+
.filter((toolName) => typeof toolName === "string" && toolName.trim().length > 0)
|
|
69
|
+
.filter((toolName) => !isPlanToolName(toolName))
|
|
70
|
+
.filter((toolName) => textExplicitlyNamesConfiguredTool(text, toolName));
|
|
71
|
+
}
|
|
72
|
+
function listMissingDelegatedExecutionToolEvidence(result, requiredToolNames = []) {
|
|
73
|
+
if (requiredToolNames.length === 0) {
|
|
74
|
+
return hasDelegatedExecutionToolEvidence(result) ? [] : ["configured non-planning tools"];
|
|
75
|
+
}
|
|
76
|
+
const observed = collectSuccessfulDelegatedExecutionToolNames(result);
|
|
77
|
+
return requiredToolNames.filter((toolName) => !observed.has(normalizeEvidenceToolName(toolName)));
|
|
78
|
+
}
|
|
79
|
+
function hasRequiredDelegatedExecutionToolEvidence(result, requiredToolNames = []) {
|
|
80
|
+
return hasDelegatedExecutionToolEvidence(result)
|
|
81
|
+
&& listMissingDelegatedExecutionToolEvidence(result, requiredToolNames).length === 0;
|
|
82
|
+
}
|
|
83
|
+
function buildExplicitExecutionToolRetryInstruction(missingToolNames) {
|
|
84
|
+
const tools = missingToolNames
|
|
85
|
+
.filter((toolName) => toolName !== "configured non-planning tools")
|
|
86
|
+
.join(", ");
|
|
87
|
+
if (!tools) {
|
|
88
|
+
return "";
|
|
89
|
+
}
|
|
90
|
+
return [
|
|
91
|
+
`The request explicitly named configured evidence tool(s): ${tools}.`,
|
|
92
|
+
"Before the final answer, call every listed non-planning tool that has not already produced a successful tool result.",
|
|
93
|
+
"Do not substitute a different evidence tool for an explicitly named configured tool unless that tool invocation itself fails and the blocker is reported.",
|
|
94
|
+
].join("\n");
|
|
95
|
+
}
|
|
42
96
|
function buildDelegatedPlanEvidenceBlocker(agentId) {
|
|
43
97
|
return JSON.stringify({
|
|
44
98
|
status: "blocked",
|
|
@@ -97,245 +151,6 @@ function readConfiguredToolName(value) {
|
|
|
97
151
|
const typed = value;
|
|
98
152
|
return typeof typed.name === "string" ? typed.name.trim() : "";
|
|
99
153
|
}
|
|
100
|
-
function readMessageContentText(message) {
|
|
101
|
-
if (typeof message !== "object" || message === null) {
|
|
102
|
-
return "";
|
|
103
|
-
}
|
|
104
|
-
const content = message.content;
|
|
105
|
-
if (typeof content === "string") {
|
|
106
|
-
return content.trim();
|
|
107
|
-
}
|
|
108
|
-
if (!Array.isArray(content)) {
|
|
109
|
-
return "";
|
|
110
|
-
}
|
|
111
|
-
return content
|
|
112
|
-
.map((part) => typeof part === "object" && part !== null && typeof part.text === "string"
|
|
113
|
-
? part.text
|
|
114
|
-
: "")
|
|
115
|
-
.join("")
|
|
116
|
-
.trim();
|
|
117
|
-
}
|
|
118
|
-
function hasExternalPlanEvidenceInstruction(messages) {
|
|
119
|
-
return messages.some((message) => {
|
|
120
|
-
const text = readMessageContentText(message);
|
|
121
|
-
return /todo board already exists|required todo board already exists|non[-\s]?planning tool call/iu.test(text);
|
|
122
|
-
});
|
|
123
|
-
}
|
|
124
|
-
function parseToolCallArgs(value) {
|
|
125
|
-
if (typeof value === "object" && value !== null && !Array.isArray(value)) {
|
|
126
|
-
return value;
|
|
127
|
-
}
|
|
128
|
-
if (typeof value !== "string" || value.trim().length === 0) {
|
|
129
|
-
return {};
|
|
130
|
-
}
|
|
131
|
-
try {
|
|
132
|
-
const parsed = JSON.parse(value);
|
|
133
|
-
return typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)
|
|
134
|
-
? parsed
|
|
135
|
-
: {};
|
|
136
|
-
}
|
|
137
|
-
catch {
|
|
138
|
-
return {};
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
function readMessageToolCalls(message) {
|
|
142
|
-
if (typeof message !== "object" || message === null) {
|
|
143
|
-
return [];
|
|
144
|
-
}
|
|
145
|
-
const typed = message;
|
|
146
|
-
const raw = Array.isArray(typed.tool_calls) ? typed.tool_calls
|
|
147
|
-
: Array.isArray(typed.kwargs?.tool_calls) ? typed.kwargs.tool_calls
|
|
148
|
-
: Array.isArray(typed.additional_kwargs?.tool_calls) ? typed.additional_kwargs.tool_calls
|
|
149
|
-
: Array.isArray(typed.kwargs?.additional_kwargs?.tool_calls) ? typed.kwargs.additional_kwargs.tool_calls
|
|
150
|
-
: Array.isArray(typed.lc_kwargs?.tool_calls) ? typed.lc_kwargs.tool_calls
|
|
151
|
-
: Array.isArray(typed.lc_kwargs?.additional_kwargs?.tool_calls) ? typed.lc_kwargs.additional_kwargs.tool_calls
|
|
152
|
-
: [];
|
|
153
|
-
return raw
|
|
154
|
-
.map((toolCall) => {
|
|
155
|
-
if (typeof toolCall !== "object" || toolCall === null) {
|
|
156
|
-
return null;
|
|
157
|
-
}
|
|
158
|
-
const call = toolCall;
|
|
159
|
-
const name = typeof call.name === "string"
|
|
160
|
-
? call.name
|
|
161
|
-
: typeof call.function?.name === "string"
|
|
162
|
-
? call.function.name
|
|
163
|
-
: undefined;
|
|
164
|
-
const args = parseToolCallArgs(call.args ?? call.function?.arguments);
|
|
165
|
-
return {
|
|
166
|
-
...(typeof call.id === "string" ? { id: call.id } : {}),
|
|
167
|
-
...(name ? { name } : {}),
|
|
168
|
-
args,
|
|
169
|
-
};
|
|
170
|
-
})
|
|
171
|
-
.filter((toolCall) => toolCall !== null);
|
|
172
|
-
}
|
|
173
|
-
function todoToolCallIsTerminal(toolCall) {
|
|
174
|
-
const todos = toolCall.args?.todos;
|
|
175
|
-
if (!Array.isArray(todos) || todos.length === 0) {
|
|
176
|
-
return false;
|
|
177
|
-
}
|
|
178
|
-
return todos.every((todo) => {
|
|
179
|
-
if (typeof todo !== "object" || todo === null || typeof todo.status !== "string") {
|
|
180
|
-
return false;
|
|
181
|
-
}
|
|
182
|
-
const status = todo.status.trim().toLowerCase();
|
|
183
|
-
return status !== "pending" && status !== "in_progress";
|
|
184
|
-
});
|
|
185
|
-
}
|
|
186
|
-
function readToolResultName(message) {
|
|
187
|
-
if (typeof message !== "object" || message === null) {
|
|
188
|
-
return "";
|
|
189
|
-
}
|
|
190
|
-
const typed = message;
|
|
191
|
-
const messageType = typeof typed.type === "string"
|
|
192
|
-
? typed.type
|
|
193
|
-
: typeof typed._getType === "function"
|
|
194
|
-
? String(typed._getType())
|
|
195
|
-
: "";
|
|
196
|
-
if (messageType !== "tool" && typeof typed.tool_call_id !== "string") {
|
|
197
|
-
return "";
|
|
198
|
-
}
|
|
199
|
-
return typeof typed.name === "string" ? typed.name : "";
|
|
200
|
-
}
|
|
201
|
-
function createPlanGuardToolError(toolCallId, content, fallbackPrefix) {
|
|
202
|
-
return new ToolMessage({
|
|
203
|
-
content,
|
|
204
|
-
tool_call_id: typeof toolCallId === "string" ? toolCallId : `${fallbackPrefix}-${Math.random().toString(36).slice(2, 10)}`,
|
|
205
|
-
status: "error",
|
|
206
|
-
});
|
|
207
|
-
}
|
|
208
|
-
function isPromiseLike(value) {
|
|
209
|
-
return typeof value === "object" && value !== null && typeof value.then === "function";
|
|
210
|
-
}
|
|
211
|
-
function renderToolInvocationError(error) {
|
|
212
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
213
|
-
const cause = typeof error === "object" && error !== null && "cause" in error
|
|
214
|
-
? error.cause
|
|
215
|
-
: undefined;
|
|
216
|
-
const causeMessage = cause instanceof Error ? cause.message : "";
|
|
217
|
-
return [message, causeMessage]
|
|
218
|
-
.map((value) => value.trim())
|
|
219
|
-
.filter(Boolean)
|
|
220
|
-
.join("\n");
|
|
221
|
-
}
|
|
222
|
-
function createToolInvocationErrorMessage(toolCallId, toolName, error) {
|
|
223
|
-
const rendered = renderToolInvocationError(error);
|
|
224
|
-
return createPlanGuardToolError(toolCallId, `Error invoking tool '${toolName}' with error: ${rendered || "tool invocation failed"}`, "tool-invocation-error");
|
|
225
|
-
}
|
|
226
|
-
function createTodoPlanGuardMiddleware(options = {}) {
|
|
227
|
-
let observedPlanToolResult = false;
|
|
228
|
-
let observedNonPlanToolResult = false;
|
|
229
|
-
return createMiddleware({
|
|
230
|
-
name: "harnessTodoPlanGuard",
|
|
231
|
-
wrapToolCall: ((request, handler) => {
|
|
232
|
-
const toolName = typeof request.toolCall?.name === "string"
|
|
233
|
-
? request.toolCall.name
|
|
234
|
-
: typeof request.tool?.name === "string"
|
|
235
|
-
? request.tool.name
|
|
236
|
-
: "";
|
|
237
|
-
const messages = Array.isArray(request.state?.messages) ? request.state.messages : [];
|
|
238
|
-
const toolResultNames = messages.map(readToolResultName).filter((name) => name.length > 0);
|
|
239
|
-
const hasPlanToolResult = toolResultNames.some(isPlanToolName);
|
|
240
|
-
const hasNonPlanToolResult = toolResultNames.some((resultToolName) => !isPlanToolName(resultToolName));
|
|
241
|
-
const hasExternalPlanEvidence = hasExternalPlanEvidenceInstruction(messages);
|
|
242
|
-
if (options.requiresPlan === true
|
|
243
|
-
&& !(observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
|
|
244
|
-
&& toolName.length > 0
|
|
245
|
-
&& !isPlanToolName(toolName)) {
|
|
246
|
-
return createPlanGuardToolError(request.toolCall?.id, "Error: this agent has a required visible planning contract. Your first action must be write_todos with concrete task steps and statuses. Do not call non-planning tools before the initial todo board exists.", "todo-initial-plan-guard");
|
|
247
|
-
}
|
|
248
|
-
if (options.requiresPlan === true
|
|
249
|
-
&& (observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
|
|
250
|
-
&& !(observedNonPlanToolResult || hasNonPlanToolResult)
|
|
251
|
-
&& isPlanToolName(toolName)) {
|
|
252
|
-
return createPlanGuardToolError(request.toolCall?.id, "Error: the required todo board already exists, but no non-planning evidence tool has returned yet. Do not call write_todos or read_todos again. Your next action must be exactly one non-planning tool call selected from the available tool descriptions and schemas.", "todo-plan-evidence-guard");
|
|
253
|
-
}
|
|
254
|
-
if (options.requiresPlan === true
|
|
255
|
-
&& !(observedNonPlanToolResult || hasNonPlanToolResult)
|
|
256
|
-
&& isPlanToolName(toolName)
|
|
257
|
-
&& normalizePlanToolName(toolName).includes("write_todos")
|
|
258
|
-
&& todoToolCallIsTerminal({ args: parseToolCallArgs(request.toolCall?.args) })) {
|
|
259
|
-
return createPlanGuardToolError(request.toolCall?.id, "Error: write_todos cannot mark every todo as terminal before any non-planning evidence tool returns. Keep one todo in_progress and the remaining todos pending until evidence tools return.", "write-todos-tool-guard");
|
|
260
|
-
}
|
|
261
|
-
const markObservedToolResult = () => {
|
|
262
|
-
if (isPlanToolName(toolName)) {
|
|
263
|
-
observedPlanToolResult = true;
|
|
264
|
-
}
|
|
265
|
-
else if (toolName.length > 0) {
|
|
266
|
-
observedNonPlanToolResult = true;
|
|
267
|
-
}
|
|
268
|
-
};
|
|
269
|
-
const normalizedArgs = normalizeToolArgsForSchema(parseToolCallArgs(request.toolCall?.args), request.tool?.schema, request.toolCall?.args);
|
|
270
|
-
const normalizedRequest = {
|
|
271
|
-
...request,
|
|
272
|
-
toolCall: request.toolCall
|
|
273
|
-
? {
|
|
274
|
-
...request.toolCall,
|
|
275
|
-
args: normalizedArgs,
|
|
276
|
-
}
|
|
277
|
-
: request.toolCall,
|
|
278
|
-
};
|
|
279
|
-
try {
|
|
280
|
-
const result = handler(normalizedRequest);
|
|
281
|
-
if (isPromiseLike(result)) {
|
|
282
|
-
return result
|
|
283
|
-
.then((value) => {
|
|
284
|
-
markObservedToolResult();
|
|
285
|
-
return value;
|
|
286
|
-
})
|
|
287
|
-
.catch((error) => createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error));
|
|
288
|
-
}
|
|
289
|
-
markObservedToolResult();
|
|
290
|
-
return result;
|
|
291
|
-
}
|
|
292
|
-
catch (error) {
|
|
293
|
-
return createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error);
|
|
294
|
-
}
|
|
295
|
-
}),
|
|
296
|
-
afterModel: (state) => {
|
|
297
|
-
if (!Array.isArray(state.messages) || state.messages.length === 0) {
|
|
298
|
-
return;
|
|
299
|
-
}
|
|
300
|
-
const hasNonPlanToolResult = state.messages.some((message) => {
|
|
301
|
-
if (typeof message !== "object" || message === null) {
|
|
302
|
-
return false;
|
|
303
|
-
}
|
|
304
|
-
const typed = message;
|
|
305
|
-
const messageType = typeof typed.type === "string"
|
|
306
|
-
? typed.type
|
|
307
|
-
: typeof typed._getType === "function"
|
|
308
|
-
? String(typed._getType())
|
|
309
|
-
: "";
|
|
310
|
-
if (messageType !== "tool" && typeof typed.tool_call_id !== "string") {
|
|
311
|
-
return false;
|
|
312
|
-
}
|
|
313
|
-
const toolName = typeof typed.name === "string" ? typed.name : "";
|
|
314
|
-
return toolName.length > 0 && !isPlanToolName(toolName);
|
|
315
|
-
});
|
|
316
|
-
if (hasNonPlanToolResult) {
|
|
317
|
-
return;
|
|
318
|
-
}
|
|
319
|
-
const lastAiMessage = [...state.messages].reverse().find((message) => readMessageToolCalls(message).length > 0);
|
|
320
|
-
const lastToolCalls = readMessageToolCalls(lastAiMessage);
|
|
321
|
-
if (!lastAiMessage) {
|
|
322
|
-
return;
|
|
323
|
-
}
|
|
324
|
-
const writeTodosCalls = lastToolCalls.filter((toolCall) => isPlanToolName(toolCall.name));
|
|
325
|
-
const prematureCompletedCalls = writeTodosCalls.filter(todoToolCallIsTerminal);
|
|
326
|
-
if (prematureCompletedCalls.length === 0) {
|
|
327
|
-
return;
|
|
328
|
-
}
|
|
329
|
-
return {
|
|
330
|
-
messages: prematureCompletedCalls.map((toolCall, index) => new ToolMessage({
|
|
331
|
-
content: "Error: write_todos cannot mark every todo as terminal before any non-planning evidence tool returns. Keep one todo in_progress and the remaining todos pending until evidence tools return.",
|
|
332
|
-
tool_call_id: toolCall.id ?? `write-todos-plan-guard-${index}`,
|
|
333
|
-
status: "error",
|
|
334
|
-
})),
|
|
335
|
-
};
|
|
336
|
-
},
|
|
337
|
-
});
|
|
338
|
-
}
|
|
339
154
|
function shouldUseConfigurableDeepAgentAssembly(binding) {
|
|
340
155
|
return getBindingExecutionKind(binding) === "deepagent";
|
|
341
156
|
}
|
|
@@ -933,9 +748,15 @@ export class AgentRuntimeAdapter {
|
|
|
933
748
|
const runDelegatedRequest = (text, requestSuffix = "") => this.invoke(targetBinding, text, childSessionId, `${childRequestId}${requestSuffix}`, undefined, [], invokeOptions);
|
|
934
749
|
let result = await runDelegatedRequest(requestText);
|
|
935
750
|
const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(targetBinding).length > 0;
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
751
|
+
const requiredExecutionToolNames = resolveExplicitRequestedExecutionToolNames(targetBinding, requestText);
|
|
752
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(result, requiredExecutionToolNames)) {
|
|
753
|
+
const missingToolNames = listMissingDelegatedExecutionToolEvidence(result, requiredExecutionToolNames);
|
|
754
|
+
result = await runDelegatedRequest([
|
|
755
|
+
requestText,
|
|
756
|
+
EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
|
|
757
|
+
buildExplicitExecutionToolRetryInstruction(missingToolNames),
|
|
758
|
+
].filter(Boolean).join("\n\n"), ":tool-evidence-retry");
|
|
759
|
+
if (!hasRequiredDelegatedExecutionToolEvidence(result, requiredExecutionToolNames)) {
|
|
939
760
|
throw new DelegatedExecutionNoToolEvidenceError(targetBinding.agent.id);
|
|
940
761
|
}
|
|
941
762
|
}
|
|
@@ -1446,6 +1267,7 @@ export class AgentRuntimeAdapter {
|
|
|
1446
1267
|
}
|
|
1447
1268
|
}
|
|
1448
1269
|
const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(selectedBinding).length > 0;
|
|
1270
|
+
const requiredExecutionToolNames = resolveExplicitRequestedExecutionToolNames(selectedBinding, requestText);
|
|
1449
1271
|
if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
|
|
1450
1272
|
&& !hasDelegatedPlanEvidence(delegatedResult)) {
|
|
1451
1273
|
try {
|
|
@@ -1465,9 +1287,14 @@ export class AgentRuntimeAdapter {
|
|
|
1465
1287
|
};
|
|
1466
1288
|
}
|
|
1467
1289
|
}
|
|
1468
|
-
if (targetRequiresExecutionToolEvidence && !
|
|
1290
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
|
|
1291
|
+
const missingToolNames = listMissingDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames);
|
|
1469
1292
|
try {
|
|
1470
|
-
delegatedResult = await runDelegatedRequest([
|
|
1293
|
+
delegatedResult = await runDelegatedRequest([
|
|
1294
|
+
requestText,
|
|
1295
|
+
EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
|
|
1296
|
+
buildExplicitExecutionToolRetryInstruction(missingToolNames),
|
|
1297
|
+
].filter(Boolean).join("\n\n"), ":tool-evidence-retry", selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
|
|
1471
1298
|
? {
|
|
1472
1299
|
suppressInitialRequiredPlanInstruction: true,
|
|
1473
1300
|
externalPlanEvidence: hasDelegatedPlanEvidence(delegatedResult),
|
|
@@ -1502,8 +1329,10 @@ export class AgentRuntimeAdapter {
|
|
|
1502
1329
|
},
|
|
1503
1330
|
};
|
|
1504
1331
|
}
|
|
1505
|
-
if (targetRequiresExecutionToolEvidence && !
|
|
1506
|
-
const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id,
|
|
1332
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
|
|
1333
|
+
const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, requiredExecutionToolNames.length > 0
|
|
1334
|
+
? requiredExecutionToolNames
|
|
1335
|
+
: getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
|
|
1507
1336
|
return {
|
|
1508
1337
|
toolOutput: output,
|
|
1509
1338
|
delegatedSubagentType: subagentType,
|
|
@@ -1898,9 +1727,17 @@ export class AgentRuntimeAdapter {
|
|
|
1898
1727
|
delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [delegatedText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry"), previousDelegatedResult);
|
|
1899
1728
|
}
|
|
1900
1729
|
const targetRequiresExecutionToolEvidence = selectedBinding ? getBindingPrimaryTools(selectedBinding).length > 0 : false;
|
|
1901
|
-
|
|
1730
|
+
const requiredExecutionToolNames = selectedBinding
|
|
1731
|
+
? resolveExplicitRequestedExecutionToolNames(selectedBinding, planned.description)
|
|
1732
|
+
: [];
|
|
1733
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
|
|
1734
|
+
const missingToolNames = listMissingDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames);
|
|
1902
1735
|
const previousDelegatedResult = delegatedResult;
|
|
1903
|
-
delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [
|
|
1736
|
+
delegatedResult = mergeDelegatedResultToolEvidence(yield* runPlannedDelegation(planned.subagentType, [
|
|
1737
|
+
delegatedText,
|
|
1738
|
+
EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
|
|
1739
|
+
buildExplicitExecutionToolRetryInstruction(missingToolNames),
|
|
1740
|
+
].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
|
|
1904
1741
|
}
|
|
1905
1742
|
if (selectedBinding?.harnessRuntime.executionContract?.requiresPlan === true && !hasDelegatedPlanEvidence(delegatedResult)) {
|
|
1906
1743
|
const output = buildDelegatedPlanEvidenceBlocker(selectedBinding.agent.id);
|
|
@@ -1911,8 +1748,10 @@ export class AgentRuntimeAdapter {
|
|
|
1911
1748
|
finalMessageText: output,
|
|
1912
1749
|
};
|
|
1913
1750
|
}
|
|
1914
|
-
if (targetRequiresExecutionToolEvidence && !
|
|
1915
|
-
const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id,
|
|
1751
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
|
|
1752
|
+
const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, requiredExecutionToolNames.length > 0
|
|
1753
|
+
? requiredExecutionToolNames
|
|
1754
|
+
: getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
|
|
1916
1755
|
delegatedResult = {
|
|
1917
1756
|
...delegatedResult,
|
|
1918
1757
|
state: "failed",
|
|
@@ -2105,14 +1944,20 @@ export class AgentRuntimeAdapter {
|
|
|
2105
1944
|
});
|
|
2106
1945
|
let delegatedResult = yield* runDelegatedStreamAttempt(delegatedText);
|
|
2107
1946
|
const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(selectedBinding).length > 0;
|
|
1947
|
+
const requiredExecutionToolNames = resolveExplicitRequestedExecutionToolNames(selectedBinding, requestText);
|
|
2108
1948
|
if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
|
|
2109
1949
|
&& !hasDelegatedPlanEvidence(delegatedResult)) {
|
|
2110
1950
|
const previousDelegatedResult = delegatedResult;
|
|
2111
1951
|
delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([delegatedText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry"), previousDelegatedResult);
|
|
2112
1952
|
}
|
|
2113
|
-
if (targetRequiresExecutionToolEvidence && !
|
|
1953
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
|
|
1954
|
+
const missingToolNames = listMissingDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames);
|
|
2114
1955
|
const previousDelegatedResult = delegatedResult;
|
|
2115
|
-
delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([
|
|
1956
|
+
delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([
|
|
1957
|
+
delegatedText,
|
|
1958
|
+
EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION,
|
|
1959
|
+
buildExplicitExecutionToolRetryInstruction(missingToolNames),
|
|
1960
|
+
].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
|
|
2116
1961
|
}
|
|
2117
1962
|
if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
|
|
2118
1963
|
&& !hasDelegatedPlanEvidence(delegatedResult)) {
|
|
@@ -2124,8 +1969,10 @@ export class AgentRuntimeAdapter {
|
|
|
2124
1969
|
finalMessageText: output,
|
|
2125
1970
|
};
|
|
2126
1971
|
}
|
|
2127
|
-
if (targetRequiresExecutionToolEvidence && !
|
|
2128
|
-
const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id,
|
|
1972
|
+
if (targetRequiresExecutionToolEvidence && !hasRequiredDelegatedExecutionToolEvidence(delegatedResult, requiredExecutionToolNames)) {
|
|
1973
|
+
const output = buildDelegatedExecutionEvidenceBlocker(selectedBinding.agent.id, requiredExecutionToolNames.length > 0
|
|
1974
|
+
? requiredExecutionToolNames
|
|
1975
|
+
: getBindingPrimaryTools(selectedBinding).map((tool) => tool.name));
|
|
2129
1976
|
delegatedResult = {
|
|
2130
1977
|
...delegatedResult,
|
|
2131
1978
|
state: "failed",
|
|
@@ -35,17 +35,23 @@ function planStateHasUnfinishedItems(planState) {
|
|
|
35
35
|
function planStateHasActiveItems(planState) {
|
|
36
36
|
return planStateHasUnfinishedItems(planState);
|
|
37
37
|
}
|
|
38
|
+
function isPlanToolName(toolName) {
|
|
39
|
+
const normalized = typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
|
|
40
|
+
return normalized === "write_todos"
|
|
41
|
+
|| normalized === "read_todos"
|
|
42
|
+
|| normalized === "tool_call_write_todos"
|
|
43
|
+
|| normalized === "tool_call_read_todos"
|
|
44
|
+
|| normalized === "call_write_todos"
|
|
45
|
+
|| normalized === "call_read_todos";
|
|
46
|
+
}
|
|
38
47
|
function isSubstantiveTerminalAssistantOutput(value) {
|
|
39
48
|
const normalized = sanitizeVisibleText(value).trim();
|
|
40
49
|
if (normalized.length < 80) {
|
|
41
50
|
return false;
|
|
42
51
|
}
|
|
43
|
-
if (/\b(?:delegated|waiting|wait for|initiated)\b/i.test(normalized) && !/\b(?:finding|summary|root cause|evidence|completed|result|issue)\b/i.test(normalized)) {
|
|
44
|
-
return false;
|
|
45
|
-
}
|
|
46
52
|
return true;
|
|
47
53
|
}
|
|
48
|
-
function inferPlanItemStatusFromTerminalAssistantOutput(value) {
|
|
54
|
+
function inferPlanItemStatusFromTerminalAssistantOutput(value, options = {}) {
|
|
49
55
|
const terminalStatus = readTerminalExecutionStatus(value);
|
|
50
56
|
if (terminalStatus) {
|
|
51
57
|
return mapTerminalStatusToPlanItemStatus(terminalStatus);
|
|
@@ -57,6 +63,9 @@ function inferPlanItemStatusFromTerminalAssistantOutput(value) {
|
|
|
57
63
|
if (normalized.startsWith("runtime_error=")) {
|
|
58
64
|
return "failed";
|
|
59
65
|
}
|
|
66
|
+
if (options.hasSuccessfulExecutionEvidence !== true) {
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
60
69
|
return isSubstantiveTerminalAssistantOutput(value) ? "completed" : null;
|
|
61
70
|
}
|
|
62
71
|
function mapTerminalStatusToObservedPlanItemStatus(status, sawSuccessfulToolResult) {
|
|
@@ -675,6 +684,7 @@ export async function* streamHarnessRun(options) {
|
|
|
675
684
|
let syntheticFallback;
|
|
676
685
|
const toolErrors = [];
|
|
677
686
|
let sawSuccessfulToolResult = false;
|
|
687
|
+
let sawSuccessfulExecutionToolResult = false;
|
|
678
688
|
let lastToolResultKey = null;
|
|
679
689
|
const executedToolResults = [];
|
|
680
690
|
const emittedCommentary = new Set();
|
|
@@ -873,6 +883,9 @@ export async function* streamHarnessRun(options) {
|
|
|
873
883
|
}
|
|
874
884
|
else {
|
|
875
885
|
sawSuccessfulToolResult = true;
|
|
886
|
+
if (!isPlanToolName(normalizedChunk.toolName) && normalizedChunk.toolName !== "task") {
|
|
887
|
+
sawSuccessfulExecutionToolResult = true;
|
|
888
|
+
}
|
|
876
889
|
}
|
|
877
890
|
yield {
|
|
878
891
|
type: "tool-result",
|
|
@@ -1055,12 +1068,14 @@ export async function* streamHarnessRun(options) {
|
|
|
1055
1068
|
}
|
|
1056
1069
|
currentPlanState = await refreshPlanStateFromPersistence(options, currentPlanState);
|
|
1057
1070
|
const explicitTerminalAssistantStatus = readTerminalExecutionStatus(assistantOutput);
|
|
1058
|
-
let terminalAssistantPlanItemStatus = inferPlanItemStatusFromTerminalAssistantOutput(assistantOutput
|
|
1059
|
-
|
|
1071
|
+
let terminalAssistantPlanItemStatus = inferPlanItemStatusFromTerminalAssistantOutput(assistantOutput, {
|
|
1072
|
+
hasSuccessfulExecutionEvidence: sawSuccessfulExecutionToolResult,
|
|
1073
|
+
});
|
|
1074
|
+
if (explicitTerminalAssistantStatus === "blocked" && sawSuccessfulExecutionToolResult) {
|
|
1060
1075
|
terminalAssistantPlanItemStatus = "completed";
|
|
1061
1076
|
}
|
|
1062
1077
|
if (terminalAssistantPlanItemStatus === "failed"
|
|
1063
|
-
&&
|
|
1078
|
+
&& sawSuccessfulExecutionToolResult
|
|
1064
1079
|
&& !explicitTerminalAssistantStatus
|
|
1065
1080
|
&& !sanitizeVisibleText(assistantOutput).trim().toLowerCase().startsWith("runtime_error=")
|
|
1066
1081
|
&& assistantOutput.trim()) {
|