@botbotgo/agent-harness 0.0.418 → 0.0.420
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/chat-interactive.js +1 -1
- package/dist/cli/chat-stream.js +9 -1
- package/dist/package-version.d.ts +2 -2
- package/dist/package-version.js +2 -2
- package/dist/runtime/adapter/compat/openai-compatible.js +12 -0
- package/dist/runtime/adapter/flow/invocation-flow.d.ts +2 -0
- package/dist/runtime/adapter/flow/invocation-flow.js +13 -5
- package/dist/runtime/adapter/flow/invoke-runtime.d.ts +1 -0
- package/dist/runtime/adapter/flow/invoke-runtime.js +1 -0
- package/dist/runtime/adapter/flow/stream-runtime.d.ts +4 -0
- package/dist/runtime/adapter/flow/stream-runtime.js +177 -14
- package/dist/runtime/adapter/invocation-result.js +17 -6
- package/dist/runtime/adapter/local-tool-invocation.d.ts +2 -1
- package/dist/runtime/adapter/local-tool-invocation.js +268 -21
- package/dist/runtime/adapter/model/model-providers.js +269 -58
- package/dist/runtime/adapter/model/prompted-json-tool-call-capture.d.ts +9 -0
- package/dist/runtime/adapter/model/prompted-json-tool-call-capture.js +40 -0
- package/dist/runtime/adapter/runtime-adapter-support.js +58 -12
- package/dist/runtime/adapter/runtime-shell.js +3 -2
- package/dist/runtime/adapter/stream-event-projection.js +22 -5
- package/dist/runtime/adapter/tool/tool-arguments.js +157 -67
- package/dist/runtime/adapter/tool/tool-replay.js +0 -4
- package/dist/runtime/agent-runtime-adapter.d.ts +3 -0
- package/dist/runtime/agent-runtime-adapter.js +217 -73
- package/dist/runtime/harness/run/stream-run.js +31 -3
- package/dist/runtime/parsing/output-tool-args.js +108 -0
- package/dist/workspace/resource-compilers.js +17 -4
- package/package.json +1 -1
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ToolMessage } from "@langchain/core/messages";
|
|
1
|
+
import { AIMessage, ToolMessage } from "@langchain/core/messages";
|
|
2
2
|
import { createModelFacingToolNameLookupCandidates, resolveModelFacingToolName } from "./tool/tool-name-mapping.js";
|
|
3
3
|
import { canReplayToolCallsLocally } from "./tool/tool-replay.js";
|
|
4
4
|
import { extractToolCallsFromResult, normalizeToolArgsForSchema, stringifyToolOutput } from "./tool/tool-arguments.js";
|
|
@@ -10,6 +10,86 @@ import { appendToolRecoveryInstruction, extractVisibleOutput, resolveMissingPlan
|
|
|
10
10
|
import { salvageJsonToolCalls } from "../parsing/output-tool-args.js";
|
|
11
11
|
import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION } from "../prompts/runtime-prompts.js";
|
|
12
12
|
const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already available in this conversation. Answer the user's current request directly from the existing context and tool results. Do not ask the user to repeat inputs that are already present above.";
|
|
13
|
+
const DEFAULT_MAX_TOOL_ITERATIONS = 10_000;
|
|
14
|
+
const MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS = 2;
|
|
15
|
+
const MAX_REPEATED_PLAN_ONLY_AFTER_PLAN = 2;
|
|
16
|
+
function stringifyRequestForToolSelection(request) {
|
|
17
|
+
if (typeof request === "string") {
|
|
18
|
+
return request;
|
|
19
|
+
}
|
|
20
|
+
if (typeof request !== "object" || request === null) {
|
|
21
|
+
return "";
|
|
22
|
+
}
|
|
23
|
+
try {
|
|
24
|
+
return JSON.stringify(request);
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
return "";
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
function prioritizeBootstrapEvidenceTools(primaryTools, request) {
|
|
31
|
+
const requestText = stringifyRequestForToolSelection(request);
|
|
32
|
+
const isFinanceRequest = /\b(?:stock|ticker|finance|market|valuation|quote)\b|股票|股价|行情|估值|财报/iu.test(requestText);
|
|
33
|
+
const evidenceTools = primaryTools
|
|
34
|
+
.map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
|
|
35
|
+
.filter((name) => name.length > 0 && !isPlanToolName(name))
|
|
36
|
+
.sort((left, right) => {
|
|
37
|
+
if (!isFinanceRequest) {
|
|
38
|
+
return 0;
|
|
39
|
+
}
|
|
40
|
+
const leftFinance = left.includes("finance") ? 0 : 1;
|
|
41
|
+
const rightFinance = right.includes("finance") ? 0 : 1;
|
|
42
|
+
return leftFinance - rightFinance;
|
|
43
|
+
});
|
|
44
|
+
return evidenceTools.slice(0, 4);
|
|
45
|
+
}
|
|
46
|
+
function createBootstrapTodoPlan(primaryTools, request) {
|
|
47
|
+
const evidenceTools = prioritizeBootstrapEvidenceTools(primaryTools, request);
|
|
48
|
+
if (evidenceTools.length === 0) {
|
|
49
|
+
return [
|
|
50
|
+
{
|
|
51
|
+
content: "Establish the required visible plan for this request",
|
|
52
|
+
status: "completed",
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
content: "Return the final answer from the available conversation context",
|
|
56
|
+
status: "completed",
|
|
57
|
+
},
|
|
58
|
+
];
|
|
59
|
+
}
|
|
60
|
+
const evidenceLabel = evidenceTools.length > 0
|
|
61
|
+
? evidenceTools.join(", ")
|
|
62
|
+
: "the selected non-planning evidence tool";
|
|
63
|
+
return [
|
|
64
|
+
{
|
|
65
|
+
content: `Select and run an appropriate non-planning evidence tool from: ${evidenceLabel}`,
|
|
66
|
+
status: "in_progress",
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
content: "Inspect the returned tool evidence and update the todo board",
|
|
70
|
+
status: "pending",
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
content: "Return the final answer grounded in observed tool output",
|
|
74
|
+
status: "pending",
|
|
75
|
+
},
|
|
76
|
+
];
|
|
77
|
+
}
|
|
78
|
+
function buildBootstrapPlanToolResult(primaryTools, request) {
|
|
79
|
+
return {
|
|
80
|
+
messages: [new AIMessage({
|
|
81
|
+
content: "",
|
|
82
|
+
tool_calls: [{
|
|
83
|
+
id: `write-todos-bootstrap-${Math.random().toString(36).slice(2, 10)}`,
|
|
84
|
+
name: "write_todos",
|
|
85
|
+
args: {
|
|
86
|
+
todos: createBootstrapTodoPlan(primaryTools, request),
|
|
87
|
+
},
|
|
88
|
+
type: "tool_call",
|
|
89
|
+
}],
|
|
90
|
+
})],
|
|
91
|
+
};
|
|
92
|
+
}
|
|
13
93
|
function readPlanStateSummary(output) {
|
|
14
94
|
if (typeof output !== "object" || output === null) {
|
|
15
95
|
return null;
|
|
@@ -30,7 +110,7 @@ function readPlanStateSummary(output) {
|
|
|
30
110
|
inProgress: typeof typedCounts.inProgress === "number" ? typedCounts.inProgress : 0,
|
|
31
111
|
};
|
|
32
112
|
}
|
|
33
|
-
function hasIncompleteExecutedPlan(executedToolResults) {
|
|
113
|
+
function hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence = false) {
|
|
34
114
|
for (const latest of [...executedToolResults].reverse()) {
|
|
35
115
|
const summary = readPlanStateSummary(latest.output);
|
|
36
116
|
if (!summary) {
|
|
@@ -38,7 +118,7 @@ function hasIncompleteExecutedPlan(executedToolResults) {
|
|
|
38
118
|
}
|
|
39
119
|
return summary.pending > 0 || summary.inProgress > 0;
|
|
40
120
|
}
|
|
41
|
-
return
|
|
121
|
+
return externalPlanEvidence;
|
|
42
122
|
}
|
|
43
123
|
function normalizeToolName(value) {
|
|
44
124
|
return typeof value === "string" ? value.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
|
|
@@ -51,19 +131,45 @@ function isPlanToolName(toolName) {
|
|
|
51
131
|
return normalized === "write_todos"
|
|
52
132
|
|| normalized === "read_todos"
|
|
53
133
|
|| normalized === "tool_call_write_todos"
|
|
54
|
-
|| normalized === "tool_call_read_todos"
|
|
134
|
+
|| normalized === "tool_call_read_todos"
|
|
135
|
+
|| normalized === "call_write_todos"
|
|
136
|
+
|| normalized === "call_read_todos";
|
|
55
137
|
}
|
|
56
138
|
function isFallbackTodoCompletionToolCall(toolCall) {
|
|
57
139
|
return typeof toolCall.id === "string"
|
|
58
140
|
&& toolCall.id.startsWith("fallback-complete-")
|
|
59
|
-
&& (toolCall.name
|
|
141
|
+
&& isPlanToolName(toolCall.name)
|
|
142
|
+
&& normalizeToolName(toolCall.name).includes("write_todos");
|
|
143
|
+
}
|
|
144
|
+
function resolveMaxToolIterations() {
|
|
145
|
+
const raw = process.env.AGENT_HARNESS_MAX_TOOL_ITERATIONS;
|
|
146
|
+
if (!raw) {
|
|
147
|
+
return DEFAULT_MAX_TOOL_ITERATIONS;
|
|
148
|
+
}
|
|
149
|
+
const parsed = Number.parseInt(raw, 10);
|
|
150
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_MAX_TOOL_ITERATIONS;
|
|
151
|
+
}
|
|
152
|
+
function summarizeToolLoopState(input) {
|
|
153
|
+
const toolCallNames = input.toolCalls?.map((toolCall) => toolCall.name).filter(Boolean) ?? [];
|
|
154
|
+
const executedNames = input.executedToolResults.map((item) => `${item.toolName}${item.isError ? ":error" : ""}`);
|
|
155
|
+
const visibleText = input.terminalText?.trim();
|
|
156
|
+
return [
|
|
157
|
+
`Tool-calling loop stopped: ${input.reason}.`,
|
|
158
|
+
`iteration=${input.iteration + 1}/${input.maxToolIterations}.`,
|
|
159
|
+
toolCallNames.length > 0 ? `toolCalls=${toolCallNames.join(",")}.` : "",
|
|
160
|
+
executedNames.length > 0 ? `executedTools=${executedNames.join(",")}.` : "",
|
|
161
|
+
visibleText ? `lastVisibleOutput=${visibleText.slice(0, 500)}` : "",
|
|
162
|
+
].filter(Boolean).join(" ");
|
|
163
|
+
}
|
|
164
|
+
function createToolLoopError(input) {
|
|
165
|
+
return new Error(summarizeToolLoopState(input));
|
|
60
166
|
}
|
|
61
167
|
// Keep deterministic evidence summaries bounded for prompt/log readability while
|
|
62
168
|
// still preserving meaningful tool context; 4000 chars is a conservative cap.
|
|
63
169
|
const TOOL_OUTPUT_TRUNCATION_LIMIT = 4000;
|
|
64
170
|
function buildDeterministicFinalFromToolEvidence(executedToolResults) {
|
|
65
171
|
const evidence = executedToolResults
|
|
66
|
-
.filter((item) => item.isError !== true && item.toolName
|
|
172
|
+
.filter((item) => item.isError !== true && !isPlanToolName(item.toolName))
|
|
67
173
|
.map((item) => {
|
|
68
174
|
const output = stringifyToolOutput(item.output).trim();
|
|
69
175
|
const clipped = output.length > TOOL_OUTPUT_TRUNCATION_LIMIT
|
|
@@ -82,8 +188,8 @@ function buildDeterministicFinalFromToolEvidence(executedToolResults) {
|
|
|
82
188
|
].join("\n");
|
|
83
189
|
return { output };
|
|
84
190
|
}
|
|
85
|
-
function hasPlanStateEvidence(executedToolResults) {
|
|
86
|
-
return executedToolResults.some((item) => item.toolName
|
|
191
|
+
function hasPlanStateEvidence(executedToolResults, externalPlanEvidence = false) {
|
|
192
|
+
return externalPlanEvidence || executedToolResults.some((item) => isPlanToolName(item.toolName) || readPlanStateSummary(item.output) !== null);
|
|
87
193
|
}
|
|
88
194
|
function latestToolErrorRecoveryInstruction(executedToolResults) {
|
|
89
195
|
const latest = executedToolResults.at(-1);
|
|
@@ -119,11 +225,59 @@ function extractLatestUserInput(request) {
|
|
|
119
225
|
}
|
|
120
226
|
return undefined;
|
|
121
227
|
}
|
|
122
|
-
|
|
228
|
+
function debugLocalToolReplay(input) {
|
|
229
|
+
if (process.env.AGENT_HARNESS_PROMPTED_JSON_DEBUG !== "1") {
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
console.error(JSON.stringify({
|
|
233
|
+
type: "local-tool-replay",
|
|
234
|
+
toolCallNames: input.toolCalls.map((toolCall) => toolCall.name),
|
|
235
|
+
resultMessages: summarizeResultMessages(input.result),
|
|
236
|
+
executableToolNames: input.executableToolNames,
|
|
237
|
+
builtinToolNames: input.builtinToolNames,
|
|
238
|
+
canReplay: input.canReplay,
|
|
239
|
+
}));
|
|
240
|
+
}
|
|
241
|
+
function summarizeResultMessages(result) {
|
|
242
|
+
const messages = typeof result === "object" && result !== null && Array.isArray(result.messages)
|
|
243
|
+
? result.messages
|
|
244
|
+
: [];
|
|
245
|
+
return messages.slice(-8).map((message) => {
|
|
246
|
+
const typed = typeof message === "object" && message !== null ? message : {};
|
|
247
|
+
const kwargs = typeof typed.kwargs === "object" && typed.kwargs !== null ? typed.kwargs : {};
|
|
248
|
+
const toolCalls = Array.isArray(typed.tool_calls)
|
|
249
|
+
? typed.tool_calls
|
|
250
|
+
: Array.isArray(kwargs.tool_calls)
|
|
251
|
+
? kwargs.tool_calls
|
|
252
|
+
: [];
|
|
253
|
+
return {
|
|
254
|
+
role: typeof typed.role === "string" ? typed.role : undefined,
|
|
255
|
+
type: typeof typed._getType === "function"
|
|
256
|
+
? String(typed._getType())
|
|
257
|
+
: undefined,
|
|
258
|
+
name: typeof typed.name === "string" ? typed.name : undefined,
|
|
259
|
+
toolCallId: typeof typed.tool_call_id === "string" ? typed.tool_call_id : undefined,
|
|
260
|
+
toolCallNames: toolCalls.map((toolCall) => typeof toolCall === "object" && toolCall !== null && typeof toolCall.name === "string"
|
|
261
|
+
? toolCall.name
|
|
262
|
+
: ""),
|
|
263
|
+
contentHead: typeof typed.content === "string"
|
|
264
|
+
? typed.content.slice(0, 120)
|
|
265
|
+
: typeof kwargs.content === "string"
|
|
266
|
+
? kwargs.content.slice(0, 120)
|
|
267
|
+
: "",
|
|
268
|
+
};
|
|
269
|
+
});
|
|
270
|
+
}
|
|
271
|
+
export async function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, }) {
|
|
123
272
|
const executedToolResults = [];
|
|
124
273
|
let activeRequest = request;
|
|
125
274
|
let currentMessages = Array.isArray(activeRequest.messages) ? [...activeRequest.messages] : [];
|
|
126
|
-
const maxToolIterations =
|
|
275
|
+
const maxToolIterations = resolveMaxToolIterations();
|
|
276
|
+
let lastRecoveryInstruction = "";
|
|
277
|
+
let lastRecoveryExecutedCount = -1;
|
|
278
|
+
let repeatedRecoveryWithoutProgress = 0;
|
|
279
|
+
let repeatedPlanOnlyAfterPlan = 0;
|
|
280
|
+
let pendingResult;
|
|
127
281
|
let result;
|
|
128
282
|
const toolCatalog = new Map();
|
|
129
283
|
for (const tool of primaryTools) {
|
|
@@ -135,13 +289,21 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
135
289
|
}
|
|
136
290
|
for (let iteration = 0; iteration < maxToolIterations; iteration += 1) {
|
|
137
291
|
const isFinalIteration = iteration + 1 === maxToolIterations;
|
|
138
|
-
result = await callRuntimeWithToolParseRecovery(activeRequest);
|
|
292
|
+
result = pendingResult ?? await callRuntimeWithToolParseRecovery(activeRequest);
|
|
293
|
+
pendingResult = undefined;
|
|
139
294
|
const toolCalls = extractToolCallsFromResult(result);
|
|
140
295
|
if (toolCalls.length === 0) {
|
|
141
296
|
const terminalText = sanitizeVisibleText(extractVisibleOutput(result) || "");
|
|
142
|
-
const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults);
|
|
297
|
+
const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence);
|
|
143
298
|
const shouldEnforceIncompletePlan = requiresPlanEvidence(binding) && hasIncompletePlanState;
|
|
144
299
|
const hasExecutionBeyondTodoPlanning = hasNonTodoToolEvidence(executedToolResults);
|
|
300
|
+
const hasAvailableNonPlanningTool = primaryTools.some((tool) => !isPlanToolName(tool.name));
|
|
301
|
+
if (requiresPlanEvidence(binding)
|
|
302
|
+
&& hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
|
|
303
|
+
&& !hasIncompletePlanState
|
|
304
|
+
&& !hasAvailableNonPlanningTool) {
|
|
305
|
+
break;
|
|
306
|
+
}
|
|
145
307
|
const toolErrorRecoveryInstruction = latestToolErrorRecoveryInstruction(executedToolResults)
|
|
146
308
|
?? terminalToolErrorRecoveryInstruction(terminalText);
|
|
147
309
|
const leakedJsonToolCallRecoveryInstruction = terminalText && salvageJsonToolCalls(terminalText).length > 0
|
|
@@ -149,29 +311,70 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
149
311
|
: null;
|
|
150
312
|
const recoveryInstruction = toolErrorRecoveryInstruction ?? leakedJsonToolCallRecoveryInstruction ?? (terminalText
|
|
151
313
|
? resolveExecutionWithoutToolEvidenceTextInstruction(activeRequest, terminalText, false, {
|
|
152
|
-
hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName
|
|
314
|
+
hasWriteTodosEvidence: externalPlanEvidence === true || executedToolResults.some((item) => isPlanToolName(item.toolName)),
|
|
153
315
|
hasToolResultEvidence: hasExecutionBeyondTodoPlanning,
|
|
154
|
-
hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults),
|
|
316
|
+
hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults, externalPlanEvidence),
|
|
155
317
|
hasIncompletePlanState: shouldEnforceIncompletePlan,
|
|
156
318
|
requiresPlan: requiresPlanEvidence(binding),
|
|
157
319
|
})
|
|
158
320
|
: shouldEnforceIncompletePlan
|
|
159
321
|
? AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION
|
|
160
322
|
: null);
|
|
323
|
+
if (requiresPlanEvidence(binding)
|
|
324
|
+
&& !hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
|
|
325
|
+
&& builtinExecutableTools.has("write_todos")) {
|
|
326
|
+
pendingResult = buildBootstrapPlanToolResult(primaryTools, activeRequest);
|
|
327
|
+
continue;
|
|
328
|
+
}
|
|
161
329
|
if (recoveryInstruction) {
|
|
162
|
-
|
|
163
|
-
|
|
330
|
+
const executedCount = executedToolResults.length;
|
|
331
|
+
if (recoveryInstruction === lastRecoveryInstruction && executedCount === lastRecoveryExecutedCount) {
|
|
332
|
+
repeatedRecoveryWithoutProgress += 1;
|
|
333
|
+
}
|
|
334
|
+
else {
|
|
335
|
+
repeatedRecoveryWithoutProgress = 0;
|
|
336
|
+
lastRecoveryInstruction = recoveryInstruction;
|
|
337
|
+
lastRecoveryExecutedCount = executedCount;
|
|
338
|
+
}
|
|
339
|
+
if (repeatedRecoveryWithoutProgress >= MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS) {
|
|
340
|
+
if (hasNonTodoToolEvidence(executedToolResults)) {
|
|
341
|
+
return {
|
|
342
|
+
result: buildDeterministicFinalFromToolEvidence(executedToolResults),
|
|
343
|
+
executedToolResults,
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
if (!hasAvailableNonPlanningTool && !hasIncompletePlanState && result) {
|
|
347
|
+
return { result, executedToolResults };
|
|
348
|
+
}
|
|
349
|
+
throw createToolLoopError({
|
|
350
|
+
reason: "model repeated the same recovery path without producing a tool call or new tool evidence",
|
|
351
|
+
iteration,
|
|
352
|
+
maxToolIterations,
|
|
353
|
+
terminalText,
|
|
354
|
+
executedToolResults,
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
if (iteration + 1 === maxToolIterations) {
|
|
358
|
+
throw createToolLoopError({
|
|
359
|
+
reason: "maximum iterations reached",
|
|
360
|
+
iteration,
|
|
361
|
+
maxToolIterations,
|
|
362
|
+
terminalText,
|
|
363
|
+
executedToolResults,
|
|
364
|
+
});
|
|
164
365
|
}
|
|
165
366
|
activeRequest = appendToolRecoveryInstruction(activeRequest, recoveryInstruction);
|
|
166
367
|
continue;
|
|
167
368
|
}
|
|
369
|
+
repeatedRecoveryWithoutProgress = 0;
|
|
370
|
+
repeatedPlanOnlyAfterPlan = 0;
|
|
168
371
|
break;
|
|
169
372
|
}
|
|
170
373
|
const missingPlanRecoveryInstruction = resolveMissingPlanRecoveryInstruction({
|
|
171
374
|
request: activeRequest,
|
|
172
375
|
requiresPlan: requiresPlanEvidence(binding),
|
|
173
|
-
hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults),
|
|
174
|
-
hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName
|
|
376
|
+
hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults, externalPlanEvidence),
|
|
377
|
+
hasWriteTodosEvidence: externalPlanEvidence === true || executedToolResults.some((item) => isPlanToolName(item.toolName)),
|
|
175
378
|
hasToolResultEvidence: executedToolResults.length > 0 || toolCalls.length > 0,
|
|
176
379
|
});
|
|
177
380
|
if (missingPlanRecoveryInstruction
|
|
@@ -182,11 +385,55 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
182
385
|
activeRequest = appendToolRecoveryInstruction(activeRequest, missingPlanRecoveryInstruction);
|
|
183
386
|
continue;
|
|
184
387
|
}
|
|
185
|
-
if (
|
|
388
|
+
if (requiresPlanEvidence(binding)
|
|
389
|
+
&& hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
|
|
390
|
+
&& !hasNonTodoToolEvidence(executedToolResults)
|
|
391
|
+
&& toolCalls.length > 0
|
|
392
|
+
&& toolCalls.every((toolCall) => isPlanToolName(toolCall.name))) {
|
|
393
|
+
repeatedPlanOnlyAfterPlan += 1;
|
|
394
|
+
if (repeatedPlanOnlyAfterPlan >= MAX_REPEATED_PLAN_ONLY_AFTER_PLAN) {
|
|
395
|
+
throw createToolLoopError({
|
|
396
|
+
reason: "model repeatedly selected only planning tools after the todo board already existed and before any non-planning evidence tool returned",
|
|
397
|
+
iteration,
|
|
398
|
+
maxToolIterations,
|
|
399
|
+
toolCalls,
|
|
400
|
+
executedToolResults,
|
|
401
|
+
});
|
|
402
|
+
}
|
|
403
|
+
if (iteration + 1 === maxToolIterations) {
|
|
404
|
+
throw createToolLoopError({
|
|
405
|
+
reason: "maximum iterations reached",
|
|
406
|
+
iteration,
|
|
407
|
+
maxToolIterations,
|
|
408
|
+
toolCalls,
|
|
409
|
+
executedToolResults,
|
|
410
|
+
});
|
|
411
|
+
}
|
|
412
|
+
activeRequest = appendToolRecoveryInstruction(activeRequest, AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION);
|
|
413
|
+
pendingResult = undefined;
|
|
414
|
+
continue;
|
|
415
|
+
}
|
|
416
|
+
repeatedRecoveryWithoutProgress = 0;
|
|
417
|
+
repeatedPlanOnlyAfterPlan = 0;
|
|
418
|
+
const canReplayToolCalls = canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
|
|
419
|
+
debugLocalToolReplay({
|
|
420
|
+
toolCalls,
|
|
421
|
+
result,
|
|
422
|
+
executableToolNames: [...executableTools.keys()],
|
|
423
|
+
builtinToolNames: [...builtinExecutableTools.keys()],
|
|
424
|
+
canReplay: canReplayToolCalls,
|
|
425
|
+
});
|
|
426
|
+
if (!canReplayToolCalls) {
|
|
186
427
|
break;
|
|
187
428
|
}
|
|
188
429
|
if (iteration + 1 === maxToolIterations) {
|
|
189
|
-
throw
|
|
430
|
+
throw createToolLoopError({
|
|
431
|
+
reason: "maximum iterations reached",
|
|
432
|
+
iteration,
|
|
433
|
+
maxToolIterations,
|
|
434
|
+
toolCalls,
|
|
435
|
+
executedToolResults,
|
|
436
|
+
});
|
|
190
437
|
}
|
|
191
438
|
const resultMessages = result.messages;
|
|
192
439
|
const nextMessages = [...currentMessages];
|
|
@@ -258,7 +505,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
258
505
|
if (requiresPlanEvidence(binding)
|
|
259
506
|
&& toolCalls.length > 0
|
|
260
507
|
&& toolCalls.every((toolCall) => isPlanToolName(toolCall.name))
|
|
261
|
-
&& !hasIncompleteExecutedPlan(executedToolResults)
|
|
508
|
+
&& !hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence)
|
|
262
509
|
&& hasNonTodoToolEvidence(executedToolResults)) {
|
|
263
510
|
return {
|
|
264
511
|
result: buildDeterministicFinalFromToolEvidence(executedToolResults),
|