@botbotgo/agent-harness 0.0.418 → 0.0.420
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/chat-interactive.js +1 -1
- package/dist/cli/chat-stream.js +9 -1
- package/dist/package-version.d.ts +2 -2
- package/dist/package-version.js +2 -2
- package/dist/runtime/adapter/compat/openai-compatible.js +12 -0
- package/dist/runtime/adapter/flow/invocation-flow.d.ts +2 -0
- package/dist/runtime/adapter/flow/invocation-flow.js +13 -5
- package/dist/runtime/adapter/flow/invoke-runtime.d.ts +1 -0
- package/dist/runtime/adapter/flow/invoke-runtime.js +1 -0
- package/dist/runtime/adapter/flow/stream-runtime.d.ts +4 -0
- package/dist/runtime/adapter/flow/stream-runtime.js +177 -14
- package/dist/runtime/adapter/invocation-result.js +17 -6
- package/dist/runtime/adapter/local-tool-invocation.d.ts +2 -1
- package/dist/runtime/adapter/local-tool-invocation.js +268 -21
- package/dist/runtime/adapter/model/model-providers.js +269 -58
- package/dist/runtime/adapter/model/prompted-json-tool-call-capture.d.ts +9 -0
- package/dist/runtime/adapter/model/prompted-json-tool-call-capture.js +40 -0
- package/dist/runtime/adapter/runtime-adapter-support.js +58 -12
- package/dist/runtime/adapter/runtime-shell.js +3 -2
- package/dist/runtime/adapter/stream-event-projection.js +22 -5
- package/dist/runtime/adapter/tool/tool-arguments.js +157 -67
- package/dist/runtime/adapter/tool/tool-replay.js +0 -4
- package/dist/runtime/agent-runtime-adapter.d.ts +3 -0
- package/dist/runtime/agent-runtime-adapter.js +217 -73
- package/dist/runtime/harness/run/stream-run.js +31 -3
- package/dist/runtime/parsing/output-tool-args.js +108 -0
- package/dist/workspace/resource-compilers.js +17 -4
- package/package.json +1 -1
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
|
+
import { createHash } from "node:crypto";
|
|
2
3
|
import { createAsyncSubAgentMiddleware, createFilesystemMiddleware, createMemoryMiddleware, createPatchToolCallsMiddleware, createSkillsMiddleware, createSummarizationMiddleware, createSubAgentMiddleware, FilesystemBackend, StateBackend, } from "deepagents";
|
|
3
|
-
import {
|
|
4
|
+
import { createAgent, createMiddleware, humanInTheLoopMiddleware, todoListMiddleware, ToolMessage } from "langchain";
|
|
4
5
|
import { sanitizeVisibleText, tryParseJson, wrapResolvedModel, } from "./parsing/output-parsing.js";
|
|
5
6
|
import { salvageJsonToolCalls } from "./parsing/output-tool-args.js";
|
|
6
7
|
import { extractMessageText } from "../utils/message-content.js";
|
|
@@ -17,6 +18,7 @@ import { isEmptyFinalAiMessageError, resolveBindingTimeout, resolveStreamIdleTim
|
|
|
17
18
|
import { createResolvedModel } from "./adapter/model/model-providers.js";
|
|
18
19
|
import { renderDirectWorkspaceListing, shouldDirectlyListWorkspaceFiles } from "./adapter/direct-builtin-utility.js";
|
|
19
20
|
import { appendProviderToolCallAliasTools, resolveAdapterTools } from "./adapter/tool-resolution.js";
|
|
21
|
+
import { normalizeToolArgsForSchema } from "./adapter/tool/tool-arguments.js";
|
|
20
22
|
import { resolveRuntimeStreamExecutionContext, } from "./adapter/flow/execution-context.js";
|
|
21
23
|
import { isRetryableProviderError } from "./adapter/resilience.js";
|
|
22
24
|
import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "./adapter/upstream-configurable-keys.js";
|
|
@@ -73,7 +75,9 @@ function isPlanToolName(toolName) {
|
|
|
73
75
|
return normalized === "write_todos"
|
|
74
76
|
|| normalized === "read_todos"
|
|
75
77
|
|| normalized === "tool_call_write_todos"
|
|
76
|
-
|| normalized === "tool_call_read_todos"
|
|
78
|
+
|| normalized === "tool_call_read_todos"
|
|
79
|
+
|| normalized === "call_write_todos"
|
|
80
|
+
|| normalized === "call_read_todos";
|
|
77
81
|
}
|
|
78
82
|
function readConfiguredToolName(value) {
|
|
79
83
|
if (typeof value !== "object" || value === null) {
|
|
@@ -82,26 +86,6 @@ function readConfiguredToolName(value) {
|
|
|
82
86
|
const typed = value;
|
|
83
87
|
return typeof typed.name === "string" ? typed.name.trim() : "";
|
|
84
88
|
}
|
|
85
|
-
function createBootstrapTodoPlan(toolNames) {
|
|
86
|
-
const evidenceToolName = toolNames.find((toolName) => !isPlanToolName(toolName));
|
|
87
|
-
const contents = evidenceToolName
|
|
88
|
-
? [
|
|
89
|
-
`Run ${evidenceToolName} for the requested evidence`,
|
|
90
|
-
`Inspect the ${evidenceToolName} result and extract concrete findings`,
|
|
91
|
-
"Update TODO status from the observed evidence",
|
|
92
|
-
"Return the final answer grounded in tool output",
|
|
93
|
-
]
|
|
94
|
-
: [
|
|
95
|
-
"Identify the concrete evidence needed for this request",
|
|
96
|
-
"Collect and inspect the available evidence",
|
|
97
|
-
"Update TODO status from the observed evidence",
|
|
98
|
-
"Return the final answer grounded in evidence",
|
|
99
|
-
];
|
|
100
|
-
return contents.map((content, index) => ({
|
|
101
|
-
content,
|
|
102
|
-
status: index === 0 ? "in_progress" : "pending",
|
|
103
|
-
}));
|
|
104
|
-
}
|
|
105
89
|
function readMessageContentText(message) {
|
|
106
90
|
if (typeof message !== "object" || message === null) {
|
|
107
91
|
return "";
|
|
@@ -120,6 +104,12 @@ function readMessageContentText(message) {
|
|
|
120
104
|
.join("")
|
|
121
105
|
.trim();
|
|
122
106
|
}
|
|
107
|
+
function hasExternalPlanEvidenceInstruction(messages) {
|
|
108
|
+
return messages.some((message) => {
|
|
109
|
+
const text = readMessageContentText(message);
|
|
110
|
+
return /todo board already exists|required todo board already exists|non[-\s]?planning tool call/iu.test(text);
|
|
111
|
+
});
|
|
112
|
+
}
|
|
123
113
|
function parseToolCallArgs(value) {
|
|
124
114
|
if (typeof value === "object" && value !== null && !Array.isArray(value)) {
|
|
125
115
|
return value;
|
|
@@ -182,7 +172,49 @@ function todoToolCallIsTerminal(toolCall) {
|
|
|
182
172
|
return status !== "pending" && status !== "in_progress";
|
|
183
173
|
});
|
|
184
174
|
}
|
|
175
|
+
function readToolResultName(message) {
|
|
176
|
+
if (typeof message !== "object" || message === null) {
|
|
177
|
+
return "";
|
|
178
|
+
}
|
|
179
|
+
const typed = message;
|
|
180
|
+
const messageType = typeof typed.type === "string"
|
|
181
|
+
? typed.type
|
|
182
|
+
: typeof typed._getType === "function"
|
|
183
|
+
? String(typed._getType())
|
|
184
|
+
: "";
|
|
185
|
+
if (messageType !== "tool" && typeof typed.tool_call_id !== "string") {
|
|
186
|
+
return "";
|
|
187
|
+
}
|
|
188
|
+
return typeof typed.name === "string" ? typed.name : "";
|
|
189
|
+
}
|
|
190
|
+
function createPlanGuardToolError(toolCallId, content, fallbackPrefix) {
|
|
191
|
+
return new ToolMessage({
|
|
192
|
+
content,
|
|
193
|
+
tool_call_id: typeof toolCallId === "string" ? toolCallId : `${fallbackPrefix}-${Math.random().toString(36).slice(2, 10)}`,
|
|
194
|
+
status: "error",
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
function isPromiseLike(value) {
|
|
198
|
+
return typeof value === "object" && value !== null && typeof value.then === "function";
|
|
199
|
+
}
|
|
200
|
+
function renderToolInvocationError(error) {
|
|
201
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
202
|
+
const cause = typeof error === "object" && error !== null && "cause" in error
|
|
203
|
+
? error.cause
|
|
204
|
+
: undefined;
|
|
205
|
+
const causeMessage = cause instanceof Error ? cause.message : "";
|
|
206
|
+
return [message, causeMessage]
|
|
207
|
+
.map((value) => value.trim())
|
|
208
|
+
.filter(Boolean)
|
|
209
|
+
.join("\n");
|
|
210
|
+
}
|
|
211
|
+
function createToolInvocationErrorMessage(toolCallId, toolName, error) {
|
|
212
|
+
const rendered = renderToolInvocationError(error);
|
|
213
|
+
return createPlanGuardToolError(toolCallId, `Error invoking tool '${toolName}' with error: ${rendered || "tool invocation failed"}`, "tool-invocation-error");
|
|
214
|
+
}
|
|
185
215
|
function createTodoPlanGuardMiddleware(options = {}) {
|
|
216
|
+
let observedPlanToolResult = false;
|
|
217
|
+
let observedNonPlanToolResult = false;
|
|
186
218
|
return createMiddleware({
|
|
187
219
|
name: "harnessTodoPlanGuard",
|
|
188
220
|
wrapToolCall: ((request, handler) => {
|
|
@@ -192,34 +224,63 @@ function createTodoPlanGuardMiddleware(options = {}) {
|
|
|
192
224
|
? request.tool.name
|
|
193
225
|
: "";
|
|
194
226
|
const messages = Array.isArray(request.state?.messages) ? request.state.messages : [];
|
|
195
|
-
const
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
return
|
|
210
|
-
}
|
|
227
|
+
const toolResultNames = messages.map(readToolResultName).filter((name) => name.length > 0);
|
|
228
|
+
const hasPlanToolResult = toolResultNames.some(isPlanToolName);
|
|
229
|
+
const hasNonPlanToolResult = toolResultNames.some((resultToolName) => !isPlanToolName(resultToolName));
|
|
230
|
+
const hasExternalPlanEvidence = hasExternalPlanEvidenceInstruction(messages);
|
|
231
|
+
if (options.requiresPlan === true
|
|
232
|
+
&& !(observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
|
|
233
|
+
&& toolName.length > 0
|
|
234
|
+
&& !isPlanToolName(toolName)) {
|
|
235
|
+
return createPlanGuardToolError(request.toolCall?.id, "Error: this agent has a required visible planning contract. Your first action must be write_todos with concrete task steps and statuses. Do not call non-planning tools before the initial todo board exists.", "todo-initial-plan-guard");
|
|
236
|
+
}
|
|
237
|
+
if (options.requiresPlan === true
|
|
238
|
+
&& (observedPlanToolResult || hasPlanToolResult || hasExternalPlanEvidence)
|
|
239
|
+
&& !(observedNonPlanToolResult || hasNonPlanToolResult)
|
|
240
|
+
&& isPlanToolName(toolName)) {
|
|
241
|
+
return createPlanGuardToolError(request.toolCall?.id, "Error: the required todo board already exists, but no non-planning evidence tool has returned yet. Do not call write_todos or read_todos again. Your next action must be exactly one non-planning tool call selected from the available tool descriptions and schemas.", "todo-plan-evidence-guard");
|
|
242
|
+
}
|
|
211
243
|
if (options.requiresPlan === true
|
|
212
|
-
&& !hasNonPlanToolResult
|
|
244
|
+
&& !(observedNonPlanToolResult || hasNonPlanToolResult)
|
|
213
245
|
&& isPlanToolName(toolName)
|
|
214
246
|
&& normalizePlanToolName(toolName).includes("write_todos")
|
|
215
247
|
&& todoToolCallIsTerminal({ args: parseToolCallArgs(request.toolCall?.args) })) {
|
|
216
|
-
return
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
248
|
+
return createPlanGuardToolError(request.toolCall?.id, "Error: write_todos cannot mark every todo as terminal before any non-planning evidence tool returns. Keep one todo in_progress and the remaining todos pending until evidence tools return.", "write-todos-tool-guard");
|
|
249
|
+
}
|
|
250
|
+
const markObservedToolResult = () => {
|
|
251
|
+
if (isPlanToolName(toolName)) {
|
|
252
|
+
observedPlanToolResult = true;
|
|
253
|
+
}
|
|
254
|
+
else if (toolName.length > 0) {
|
|
255
|
+
observedNonPlanToolResult = true;
|
|
256
|
+
}
|
|
257
|
+
};
|
|
258
|
+
const normalizedArgs = normalizeToolArgsForSchema(parseToolCallArgs(request.toolCall?.args), request.tool?.schema, request.toolCall?.args);
|
|
259
|
+
const normalizedRequest = {
|
|
260
|
+
...request,
|
|
261
|
+
toolCall: request.toolCall
|
|
262
|
+
? {
|
|
263
|
+
...request.toolCall,
|
|
264
|
+
args: normalizedArgs,
|
|
265
|
+
}
|
|
266
|
+
: request.toolCall,
|
|
267
|
+
};
|
|
268
|
+
try {
|
|
269
|
+
const result = handler(normalizedRequest);
|
|
270
|
+
if (isPromiseLike(result)) {
|
|
271
|
+
return result
|
|
272
|
+
.then((value) => {
|
|
273
|
+
markObservedToolResult();
|
|
274
|
+
return value;
|
|
275
|
+
})
|
|
276
|
+
.catch((error) => createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error));
|
|
277
|
+
}
|
|
278
|
+
markObservedToolResult();
|
|
279
|
+
return result;
|
|
280
|
+
}
|
|
281
|
+
catch (error) {
|
|
282
|
+
return createToolInvocationErrorMessage(request.toolCall?.id, toolName || "unknown", error);
|
|
221
283
|
}
|
|
222
|
-
return handler(request);
|
|
223
284
|
}),
|
|
224
285
|
afterModel: (state) => {
|
|
225
286
|
if (!Array.isArray(state.messages) || state.messages.length === 0) {
|
|
@@ -246,22 +307,8 @@ function createTodoPlanGuardMiddleware(options = {}) {
|
|
|
246
307
|
}
|
|
247
308
|
const lastAiMessage = [...state.messages].reverse().find((message) => readMessageToolCalls(message).length > 0);
|
|
248
309
|
const lastToolCalls = readMessageToolCalls(lastAiMessage);
|
|
249
|
-
if (!lastAiMessage
|
|
250
|
-
|
|
251
|
-
const hasVisibleContent = readMessageContentText(latestMessage).length > 0;
|
|
252
|
-
if (!hasVisibleContent) {
|
|
253
|
-
return {
|
|
254
|
-
messages: [new AIMessage({
|
|
255
|
-
content: "",
|
|
256
|
-
tool_calls: [{
|
|
257
|
-
id: `write-todos-bootstrap-${Math.random().toString(36).slice(2, 10)}`,
|
|
258
|
-
name: "write_todos",
|
|
259
|
-
args: { todos: createBootstrapTodoPlan(options.toolNames ?? []) },
|
|
260
|
-
type: "tool_call",
|
|
261
|
-
}],
|
|
262
|
-
})],
|
|
263
|
-
};
|
|
264
|
-
}
|
|
310
|
+
if (!lastAiMessage) {
|
|
311
|
+
return;
|
|
265
312
|
}
|
|
266
313
|
const writeTodosCalls = lastToolCalls.filter((toolCall) => isPlanToolName(toolCall.name));
|
|
267
314
|
const prematureCompletedCalls = writeTodosCalls.filter(todoToolCallIsTerminal);
|
|
@@ -442,6 +489,61 @@ function hasDelegatedPlanEvidence(result) {
|
|
|
442
489
|
return Array.isArray(toolResults)
|
|
443
490
|
&& toolResults.some((item) => isPlanToolName(item.toolName));
|
|
444
491
|
}
|
|
492
|
+
function readUpstreamToolEvidence(event) {
|
|
493
|
+
if (typeof event !== "object" || event === null) {
|
|
494
|
+
return null;
|
|
495
|
+
}
|
|
496
|
+
const typed = event;
|
|
497
|
+
const eventName = typeof typed.event === "string" ? typed.event : "";
|
|
498
|
+
const runType = typeof typed.run_type === "string" ? typed.run_type : "";
|
|
499
|
+
const toolName = typeof typed.name === "string" ? typed.name : "";
|
|
500
|
+
if (!toolName) {
|
|
501
|
+
return null;
|
|
502
|
+
}
|
|
503
|
+
const isToolStart = eventName === "on_tool_start" || (eventName === "on_chain_start" && runType === "tool");
|
|
504
|
+
if (isToolStart && isPlanToolName(toolName)) {
|
|
505
|
+
return { toolName, output: typed.data?.input };
|
|
506
|
+
}
|
|
507
|
+
const isToolEnd = eventName === "on_tool_end" || (eventName === "on_chain_end" && runType === "tool");
|
|
508
|
+
if (isToolEnd) {
|
|
509
|
+
return { toolName, output: typed.data?.output };
|
|
510
|
+
}
|
|
511
|
+
const isToolError = eventName === "on_tool_error";
|
|
512
|
+
if (isToolError) {
|
|
513
|
+
return { toolName, output: typed.data?.error ?? typed.data?.output, isError: true };
|
|
514
|
+
}
|
|
515
|
+
return null;
|
|
516
|
+
}
|
|
517
|
+
function appendUniqueToolEvidence(executedToolResults, evidence) {
|
|
518
|
+
const exists = executedToolResults.some((item) => item.toolName === evidence.toolName
|
|
519
|
+
&& item.isError === evidence.isError
|
|
520
|
+
&& JSON.stringify(item.output) === JSON.stringify(evidence.output));
|
|
521
|
+
if (!exists) {
|
|
522
|
+
executedToolResults.push(evidence);
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
function mergeDelegatedResultToolEvidence(result, previous) {
|
|
526
|
+
const merged = [];
|
|
527
|
+
for (const source of [previous, result]) {
|
|
528
|
+
const toolResults = Array.isArray(source.metadata?.executedToolResults)
|
|
529
|
+
? source.metadata.executedToolResults
|
|
530
|
+
: [];
|
|
531
|
+
for (const toolResult of toolResults) {
|
|
532
|
+
if (typeof toolResult === "object"
|
|
533
|
+
&& toolResult !== null
|
|
534
|
+
&& typeof toolResult.toolName === "string") {
|
|
535
|
+
appendUniqueToolEvidence(merged, toolResult);
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
return {
|
|
540
|
+
...result,
|
|
541
|
+
metadata: {
|
|
542
|
+
...(result.metadata ?? {}),
|
|
543
|
+
executedToolResults: merged,
|
|
544
|
+
},
|
|
545
|
+
};
|
|
546
|
+
}
|
|
445
547
|
const DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION = [
|
|
446
548
|
"The delegated task requires visible TODO planning evidence.",
|
|
447
549
|
"Before any other tool call or final answer, call write_todos with concrete task steps and statuses.",
|
|
@@ -966,14 +1068,8 @@ export class AgentRuntimeAdapter {
|
|
|
966
1068
|
const inlineSubagents = input.resolvedSubagents.filter((subagent) => !("graphId" in subagent));
|
|
967
1069
|
const asyncSubagents = input.resolvedSubagents.filter((subagent) => "graphId" in subagent);
|
|
968
1070
|
const subagents = inlineSubagents;
|
|
969
|
-
const requiresPlan = binding.harnessRuntime.executionContract?.requiresPlan === true;
|
|
970
|
-
const resolvedToolNames = input.resolvedTools.map(readConfiguredToolName).filter((name) => name.length > 0);
|
|
971
1071
|
const middleware = [
|
|
972
1072
|
...(builtinTools.todos === false ? [] : [todoListMiddleware()]),
|
|
973
|
-
...(builtinTools.todos === false ? [] : [createTodoPlanGuardMiddleware({
|
|
974
|
-
requiresPlan,
|
|
975
|
-
toolNames: resolvedToolNames,
|
|
976
|
-
})]),
|
|
977
1073
|
...(input.resolvedSkills.length > 0 ? [createSkillsMiddleware({
|
|
978
1074
|
backend,
|
|
979
1075
|
sources: resolveDeepAgentSkillSourceRootPaths({
|
|
@@ -1029,7 +1125,22 @@ export class AgentRuntimeAdapter {
|
|
|
1029
1125
|
? filesystemConfig.sessionStorage
|
|
1030
1126
|
: undefined;
|
|
1031
1127
|
const sessionScoped = sessionStorage?.enabled === true;
|
|
1032
|
-
|
|
1128
|
+
const executionParams = getBindingExecutionParams(binding);
|
|
1129
|
+
const primaryModel = getBindingPrimaryModel(binding);
|
|
1130
|
+
const runnableFingerprint = createHash("sha256").update(JSON.stringify({
|
|
1131
|
+
executionKind: getBindingExecutionKind(binding),
|
|
1132
|
+
systemPrompt: getBindingSystemPrompt(binding) ?? "",
|
|
1133
|
+
responseFormat: executionParams && "responseFormat" in executionParams ? executionParams.responseFormat : undefined,
|
|
1134
|
+
model: primaryModel
|
|
1135
|
+
? {
|
|
1136
|
+
id: primaryModel.id,
|
|
1137
|
+
provider: primaryModel.provider,
|
|
1138
|
+
model: primaryModel.model,
|
|
1139
|
+
}
|
|
1140
|
+
: undefined,
|
|
1141
|
+
tools: getBindingPrimaryTools(binding).map((tool) => tool.name).filter(Boolean).sort(),
|
|
1142
|
+
})).digest("hex").slice(0, 16);
|
|
1143
|
+
return `${binding.agent.sourcePath}::${sessionScoped ? (sessionId ?? "__default__") : "__binding__"}::${runnableFingerprint}`;
|
|
1033
1144
|
}
|
|
1034
1145
|
async create(binding, options = {}) {
|
|
1035
1146
|
const cacheKey = this.buildRunnableCacheKey(binding, options.sessionId ?? options.legacySessionId);
|
|
@@ -1280,11 +1391,12 @@ export class AgentRuntimeAdapter {
|
|
|
1280
1391
|
if (!selectedBinding) {
|
|
1281
1392
|
return null;
|
|
1282
1393
|
}
|
|
1283
|
-
const runDelegatedRequest = (text, requestSuffix = "") => this.invoke(selectedBinding, text, sessionId, `${requestId}:${subagentType}${requestSuffix}`, undefined, [], {
|
|
1394
|
+
const runDelegatedRequest = (text, requestSuffix = "", delegatedOptions = {}) => this.invoke(selectedBinding, text, sessionId, `${requestId}:${subagentType}${requestSuffix}`, undefined, [], {
|
|
1284
1395
|
context: options.context,
|
|
1285
1396
|
state: options.state,
|
|
1286
1397
|
files: options.files,
|
|
1287
1398
|
memoryContext: options.memoryContext,
|
|
1399
|
+
...delegatedOptions,
|
|
1288
1400
|
});
|
|
1289
1401
|
let delegatedResult;
|
|
1290
1402
|
try {
|
|
@@ -1349,7 +1461,12 @@ export class AgentRuntimeAdapter {
|
|
|
1349
1461
|
}
|
|
1350
1462
|
if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
|
|
1351
1463
|
try {
|
|
1352
|
-
delegatedResult = await runDelegatedRequest([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry"
|
|
1464
|
+
delegatedResult = await runDelegatedRequest([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry", selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
|
|
1465
|
+
? {
|
|
1466
|
+
suppressInitialRequiredPlanInstruction: true,
|
|
1467
|
+
externalPlanEvidence: hasDelegatedPlanEvidence(delegatedResult),
|
|
1468
|
+
}
|
|
1469
|
+
: {});
|
|
1353
1470
|
}
|
|
1354
1471
|
catch (error) {
|
|
1355
1472
|
const output = error instanceof Error ? error.message : String(error);
|
|
@@ -1648,12 +1765,18 @@ export class AgentRuntimeAdapter {
|
|
|
1648
1765
|
continue;
|
|
1649
1766
|
}
|
|
1650
1767
|
if (chunk.kind === "tool-result") {
|
|
1651
|
-
executedToolResults
|
|
1768
|
+
appendUniqueToolEvidence(executedToolResults, {
|
|
1652
1769
|
toolName: chunk.toolName,
|
|
1653
1770
|
output: chunk.output,
|
|
1654
1771
|
...(chunk.isError !== undefined ? { isError: chunk.isError } : {}),
|
|
1655
1772
|
});
|
|
1656
1773
|
}
|
|
1774
|
+
if (chunk.kind === "upstream-event") {
|
|
1775
|
+
const streamedEvidence = readUpstreamToolEvidence(chunk.event);
|
|
1776
|
+
if (streamedEvidence) {
|
|
1777
|
+
appendUniqueToolEvidence(executedToolResults, streamedEvidence);
|
|
1778
|
+
}
|
|
1779
|
+
}
|
|
1657
1780
|
yield { ...chunk, agentId: chunk.agentId ?? selectedBinding.agent.id };
|
|
1658
1781
|
}
|
|
1659
1782
|
}
|
|
@@ -1683,10 +1806,12 @@ export class AgentRuntimeAdapter {
|
|
|
1683
1806
|
const targetRequiresExecutionToolEvidence = getBindingPrimaryTools(selectedBinding).length > 0;
|
|
1684
1807
|
if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
|
|
1685
1808
|
&& !hasDelegatedPlanEvidence(delegatedResult)) {
|
|
1686
|
-
|
|
1809
|
+
const previousDelegatedResult = delegatedResult;
|
|
1810
|
+
delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([requestText, DELEGATED_PLAN_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":plan-evidence-retry"), previousDelegatedResult);
|
|
1687
1811
|
}
|
|
1688
1812
|
if (targetRequiresExecutionToolEvidence && !hasDelegatedExecutionToolEvidence(delegatedResult)) {
|
|
1689
|
-
|
|
1813
|
+
const previousDelegatedResult = delegatedResult;
|
|
1814
|
+
delegatedResult = mergeDelegatedResultToolEvidence(yield* runDelegatedStreamAttempt([requestText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"), ":tool-evidence-retry"), previousDelegatedResult);
|
|
1690
1815
|
}
|
|
1691
1816
|
if (selectedBinding.harnessRuntime.executionContract?.requiresPlan === true
|
|
1692
1817
|
&& !hasDelegatedPlanEvidence(delegatedResult)) {
|
|
@@ -1707,6 +1832,25 @@ export class AgentRuntimeAdapter {
|
|
|
1707
1832
|
finalMessageText: output,
|
|
1708
1833
|
};
|
|
1709
1834
|
}
|
|
1835
|
+
const delegatedToolResults = Array.isArray(delegatedResult.metadata?.executedToolResults)
|
|
1836
|
+
? delegatedResult.metadata.executedToolResults
|
|
1837
|
+
: [];
|
|
1838
|
+
for (const toolResult of delegatedToolResults) {
|
|
1839
|
+
const toolName = typeof toolResult.toolName === "string" ? toolResult.toolName : "";
|
|
1840
|
+
if (!toolName || isPlanToolName(toolName)) {
|
|
1841
|
+
continue;
|
|
1842
|
+
}
|
|
1843
|
+
yield {
|
|
1844
|
+
kind: "commentary",
|
|
1845
|
+
content: `Running tool ${toolName}.`,
|
|
1846
|
+
agentId: selectedBinding.agent.id,
|
|
1847
|
+
};
|
|
1848
|
+
yield {
|
|
1849
|
+
kind: "commentary",
|
|
1850
|
+
content: `Tool ${toolName} ${toolResult.isError === true ? "failed" : "completed"}.`,
|
|
1851
|
+
agentId: selectedBinding.agent.id,
|
|
1852
|
+
};
|
|
1853
|
+
}
|
|
1710
1854
|
return {
|
|
1711
1855
|
toolOutput: resolveDelegatedResultOutput(delegatedResult),
|
|
1712
1856
|
delegatedSubagentType: subagentType,
|
|
@@ -62,6 +62,12 @@ function inferPlanItemStatusFromTerminalAssistantOutput(value) {
|
|
|
62
62
|
}
|
|
63
63
|
return isSubstantiveTerminalAssistantOutput(value) ? "completed" : null;
|
|
64
64
|
}
|
|
65
|
+
function mapTerminalStatusToObservedPlanItemStatus(status, sawSuccessfulToolResult) {
|
|
66
|
+
if (status === "blocked" && sawSuccessfulToolResult) {
|
|
67
|
+
return "completed";
|
|
68
|
+
}
|
|
69
|
+
return mapTerminalStatusToPlanItemStatus(status);
|
|
70
|
+
}
|
|
65
71
|
function reconcilePlanStateToTerminalStatus(planState, status, updatedAt) {
|
|
66
72
|
const items = planState.items.map((item) => ({
|
|
67
73
|
...item,
|
|
@@ -545,6 +551,17 @@ function createProfileStepCommentary(step) {
|
|
|
545
551
|
if (step.kind === "agent" && step.action === "startup") {
|
|
546
552
|
return `Preparing ${name}.`;
|
|
547
553
|
}
|
|
554
|
+
if (step.kind === "tool") {
|
|
555
|
+
if (step.status === "started") {
|
|
556
|
+
return `Running tool ${name}.`;
|
|
557
|
+
}
|
|
558
|
+
if (step.status === "completed") {
|
|
559
|
+
return `Tool ${name} completed.`;
|
|
560
|
+
}
|
|
561
|
+
if (step.status === "failed") {
|
|
562
|
+
return `Tool ${name} failed.`;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
548
565
|
return null;
|
|
549
566
|
}
|
|
550
567
|
function isOpenAICompatibleStreamingCompatibilityError(binding, error) {
|
|
@@ -1016,7 +1033,7 @@ export async function* streamHarnessRun(options) {
|
|
|
1016
1033
|
currentPlanState = await refreshPlanStateFromPersistence(options, currentPlanState);
|
|
1017
1034
|
const terminalStructuredStatus = readTerminalExecutionStatus(actual.structuredResponse);
|
|
1018
1035
|
if (terminalStructuredStatus && currentPlanState && planStateHasActiveItems(currentPlanState)) {
|
|
1019
|
-
const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState,
|
|
1036
|
+
const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, mapTerminalStatusToObservedPlanItemStatus(terminalStructuredStatus, sawSuccessfulToolResult), new Date().toISOString());
|
|
1020
1037
|
const signature = buildPlanStateSignature(reconciledPlanState);
|
|
1021
1038
|
if (signature !== lastPlanStateSignature) {
|
|
1022
1039
|
const previousPlanState = currentPlanState;
|
|
@@ -1040,7 +1057,18 @@ export async function* streamHarnessRun(options) {
|
|
|
1040
1057
|
}
|
|
1041
1058
|
}
|
|
1042
1059
|
currentPlanState = await refreshPlanStateFromPersistence(options, currentPlanState);
|
|
1043
|
-
const
|
|
1060
|
+
const explicitTerminalAssistantStatus = readTerminalExecutionStatus(assistantOutput);
|
|
1061
|
+
let terminalAssistantPlanItemStatus = inferPlanItemStatusFromTerminalAssistantOutput(assistantOutput);
|
|
1062
|
+
if (explicitTerminalAssistantStatus === "blocked" && sawSuccessfulToolResult) {
|
|
1063
|
+
terminalAssistantPlanItemStatus = "completed";
|
|
1064
|
+
}
|
|
1065
|
+
if (terminalAssistantPlanItemStatus === "failed"
|
|
1066
|
+
&& sawSuccessfulToolResult
|
|
1067
|
+
&& !explicitTerminalAssistantStatus
|
|
1068
|
+
&& !/^\s*terminated\b|\bBlockers?:\b|(?:委托执行失败|未能完成|无法完成)/iu.test(assistantOutput)
|
|
1069
|
+
&& assistantOutput.trim()) {
|
|
1070
|
+
terminalAssistantPlanItemStatus = "completed";
|
|
1071
|
+
}
|
|
1044
1072
|
if (terminalAssistantPlanItemStatus && currentPlanState && planStateHasActiveItems(currentPlanState)) {
|
|
1045
1073
|
const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, terminalAssistantPlanItemStatus, new Date().toISOString());
|
|
1046
1074
|
const signature = buildPlanStateSignature(reconciledPlanState);
|
|
@@ -1116,7 +1144,7 @@ export async function* streamHarnessRun(options) {
|
|
|
1116
1144
|
const canUseDeterministicToolEvidenceOutput = !currentPlanState || !planStateHasActiveItems(currentPlanState) || Boolean(terminalStructuredStatus);
|
|
1117
1145
|
if (!assistantOutput && sawSuccessfulToolResult && deterministicToolEvidenceOutput && canUseDeterministicToolEvidenceOutput) {
|
|
1118
1146
|
if (terminalStructuredStatus && currentPlanState && planStateHasActiveItems(currentPlanState)) {
|
|
1119
|
-
const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState,
|
|
1147
|
+
const reconciledPlanState = reconcilePlanStateToTerminalStatus(currentPlanState, mapTerminalStatusToObservedPlanItemStatus(terminalStructuredStatus, sawSuccessfulToolResult), new Date().toISOString());
|
|
1120
1148
|
const signature = buildPlanStateSignature(reconciledPlanState);
|
|
1121
1149
|
if (signature !== lastPlanStateSignature) {
|
|
1122
1150
|
const previousPlanState = currentPlanState;
|
|
@@ -309,6 +309,89 @@ function normalizePythonLikeJson(value) {
|
|
|
309
309
|
}
|
|
310
310
|
return output;
|
|
311
311
|
}
|
|
312
|
+
function repairMissingArrayObjectOpenBraces(value) {
|
|
313
|
+
let output = "";
|
|
314
|
+
let changed = false;
|
|
315
|
+
let inString = false;
|
|
316
|
+
let escaping = false;
|
|
317
|
+
const stack = [];
|
|
318
|
+
for (let index = 0; index < value.length; index += 1) {
|
|
319
|
+
const char = value[index];
|
|
320
|
+
if (inString) {
|
|
321
|
+
output += char;
|
|
322
|
+
if (escaping) {
|
|
323
|
+
escaping = false;
|
|
324
|
+
continue;
|
|
325
|
+
}
|
|
326
|
+
if (char === "\\") {
|
|
327
|
+
escaping = true;
|
|
328
|
+
continue;
|
|
329
|
+
}
|
|
330
|
+
if (char === "\"") {
|
|
331
|
+
inString = false;
|
|
332
|
+
}
|
|
333
|
+
continue;
|
|
334
|
+
}
|
|
335
|
+
if (char === "\"") {
|
|
336
|
+
output += char;
|
|
337
|
+
inString = true;
|
|
338
|
+
continue;
|
|
339
|
+
}
|
|
340
|
+
if (char === "{" || char === "[") {
|
|
341
|
+
stack.push(char);
|
|
342
|
+
output += char;
|
|
343
|
+
continue;
|
|
344
|
+
}
|
|
345
|
+
if (char === "}" || char === "]") {
|
|
346
|
+
const expectedOpen = char === "}" ? "{" : "[";
|
|
347
|
+
if (stack.at(-1) === expectedOpen) {
|
|
348
|
+
stack.pop();
|
|
349
|
+
}
|
|
350
|
+
output += char;
|
|
351
|
+
continue;
|
|
352
|
+
}
|
|
353
|
+
if (char !== "," || stack.at(-1) !== "[") {
|
|
354
|
+
output += char;
|
|
355
|
+
continue;
|
|
356
|
+
}
|
|
357
|
+
output += char;
|
|
358
|
+
let lookahead = index + 1;
|
|
359
|
+
while (lookahead < value.length && /\s/u.test(value[lookahead] ?? "")) {
|
|
360
|
+
output += value[lookahead];
|
|
361
|
+
lookahead += 1;
|
|
362
|
+
}
|
|
363
|
+
if (value[lookahead] !== "\"") {
|
|
364
|
+
index = lookahead - 1;
|
|
365
|
+
continue;
|
|
366
|
+
}
|
|
367
|
+
let cursor = lookahead + 1;
|
|
368
|
+
let keyEscaping = false;
|
|
369
|
+
while (cursor < value.length) {
|
|
370
|
+
const next = value[cursor];
|
|
371
|
+
if (keyEscaping) {
|
|
372
|
+
keyEscaping = false;
|
|
373
|
+
}
|
|
374
|
+
else if (next === "\\") {
|
|
375
|
+
keyEscaping = true;
|
|
376
|
+
}
|
|
377
|
+
else if (next === "\"") {
|
|
378
|
+
break;
|
|
379
|
+
}
|
|
380
|
+
cursor += 1;
|
|
381
|
+
}
|
|
382
|
+
let colonCursor = cursor + 1;
|
|
383
|
+
while (colonCursor < value.length && /\s/u.test(value[colonCursor] ?? "")) {
|
|
384
|
+
colonCursor += 1;
|
|
385
|
+
}
|
|
386
|
+
if (value[colonCursor] === ":") {
|
|
387
|
+
output += "{";
|
|
388
|
+
stack.push("{");
|
|
389
|
+
changed = true;
|
|
390
|
+
}
|
|
391
|
+
index = lookahead - 1;
|
|
392
|
+
}
|
|
393
|
+
return changed ? output : null;
|
|
394
|
+
}
|
|
312
395
|
export function salvageToolArgs(value) {
|
|
313
396
|
if (typeof value === "object" && value && !Array.isArray(value)) {
|
|
314
397
|
return value;
|
|
@@ -359,6 +442,13 @@ export function salvageJsonToolCalls(value) {
|
|
|
359
442
|
if (direct) {
|
|
360
443
|
return direct;
|
|
361
444
|
}
|
|
445
|
+
const repairedArrayObjects = repairMissingArrayObjectOpenBraces(trimmed);
|
|
446
|
+
if (repairedArrayObjects) {
|
|
447
|
+
const parsed = tryParseJson(repairedArrayObjects);
|
|
448
|
+
if (parsed) {
|
|
449
|
+
return parsed;
|
|
450
|
+
}
|
|
451
|
+
}
|
|
362
452
|
const pythonLike = normalizePythonLikeJson(trimmed);
|
|
363
453
|
if (pythonLike) {
|
|
364
454
|
const parsed = tryParseJson(pythonLike);
|
|
@@ -366,6 +456,15 @@ export function salvageJsonToolCalls(value) {
|
|
|
366
456
|
return parsed;
|
|
367
457
|
}
|
|
368
458
|
}
|
|
459
|
+
if (pythonLike) {
|
|
460
|
+
const repairedPythonLike = repairMissingArrayObjectOpenBraces(pythonLike);
|
|
461
|
+
if (repairedPythonLike) {
|
|
462
|
+
const parsed = tryParseJson(repairedPythonLike);
|
|
463
|
+
if (parsed) {
|
|
464
|
+
return parsed;
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
369
468
|
const closed = closeJsonContainerSuffix(trimmed);
|
|
370
469
|
if (closed) {
|
|
371
470
|
const parsed = tryParseJson(closed);
|
|
@@ -373,6 +472,15 @@ export function salvageJsonToolCalls(value) {
|
|
|
373
472
|
return parsed;
|
|
374
473
|
}
|
|
375
474
|
}
|
|
475
|
+
if (repairedArrayObjects) {
|
|
476
|
+
const closedRepaired = closeJsonContainerSuffix(repairedArrayObjects);
|
|
477
|
+
if (closedRepaired) {
|
|
478
|
+
const parsed = tryParseJson(closedRepaired);
|
|
479
|
+
if (parsed) {
|
|
480
|
+
return parsed;
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
}
|
|
376
484
|
const embeddedObject = extractBalancedJsonObject(trimmed);
|
|
377
485
|
if (embeddedObject) {
|
|
378
486
|
const parsed = tryParseJson(embeddedObject);
|