@botbotgo/agent-harness 0.0.418 → 0.0.419

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { ToolMessage } from "@langchain/core/messages";
1
+ import { AIMessage, ToolMessage } from "@langchain/core/messages";
2
2
  import { createModelFacingToolNameLookupCandidates, resolveModelFacingToolName } from "./tool/tool-name-mapping.js";
3
3
  import { canReplayToolCallsLocally } from "./tool/tool-replay.js";
4
4
  import { extractToolCallsFromResult, normalizeToolArgsForSchema, stringifyToolOutput } from "./tool/tool-arguments.js";
@@ -10,6 +10,59 @@ import { appendToolRecoveryInstruction, extractVisibleOutput, resolveMissingPlan
10
10
  import { salvageJsonToolCalls } from "../parsing/output-tool-args.js";
11
11
  import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION } from "../prompts/runtime-prompts.js";
12
12
  const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already available in this conversation. Answer the user's current request directly from the existing context and tool results. Do not ask the user to repeat inputs that are already present above.";
13
+ const DEFAULT_MAX_TOOL_ITERATIONS = 10_000;
14
+ const MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS = 2;
15
+ const MAX_REPEATED_PLAN_ONLY_AFTER_PLAN = 2;
16
+ function createBootstrapTodoPlan(primaryTools) {
17
+ const evidenceTools = primaryTools
18
+ .map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
19
+ .filter((name) => name.length > 0 && !isPlanToolName(name))
20
+ .slice(0, 3);
21
+ if (evidenceTools.length === 0) {
22
+ return [
23
+ {
24
+ content: "Establish the required visible plan for this request",
25
+ status: "completed",
26
+ },
27
+ {
28
+ content: "Return the final answer from the available conversation context",
29
+ status: "completed",
30
+ },
31
+ ];
32
+ }
33
+ const evidenceLabel = evidenceTools.length > 0
34
+ ? evidenceTools.join(", ")
35
+ : "the selected non-planning evidence tool";
36
+ return [
37
+ {
38
+ content: `Select and run an appropriate non-planning evidence tool from: ${evidenceLabel}`,
39
+ status: "in_progress",
40
+ },
41
+ {
42
+ content: "Inspect the returned tool evidence and update the todo board",
43
+ status: "pending",
44
+ },
45
+ {
46
+ content: "Return the final answer grounded in observed tool output",
47
+ status: "pending",
48
+ },
49
+ ];
50
+ }
51
+ function buildBootstrapPlanToolResult(primaryTools) {
52
+ return {
53
+ messages: [new AIMessage({
54
+ content: "",
55
+ tool_calls: [{
56
+ id: `write-todos-bootstrap-${Math.random().toString(36).slice(2, 10)}`,
57
+ name: "write_todos",
58
+ args: {
59
+ todos: createBootstrapTodoPlan(primaryTools),
60
+ },
61
+ type: "tool_call",
62
+ }],
63
+ })],
64
+ };
65
+ }
13
66
  function readPlanStateSummary(output) {
14
67
  if (typeof output !== "object" || output === null) {
15
68
  return null;
@@ -30,7 +83,7 @@ function readPlanStateSummary(output) {
30
83
  inProgress: typeof typedCounts.inProgress === "number" ? typedCounts.inProgress : 0,
31
84
  };
32
85
  }
33
- function hasIncompleteExecutedPlan(executedToolResults) {
86
+ function hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence = false) {
34
87
  for (const latest of [...executedToolResults].reverse()) {
35
88
  const summary = readPlanStateSummary(latest.output);
36
89
  if (!summary) {
@@ -38,7 +91,7 @@ function hasIncompleteExecutedPlan(executedToolResults) {
38
91
  }
39
92
  return summary.pending > 0 || summary.inProgress > 0;
40
93
  }
41
- return false;
94
+ return externalPlanEvidence;
42
95
  }
43
96
  function normalizeToolName(value) {
44
97
  return typeof value === "string" ? value.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
@@ -51,19 +104,45 @@ function isPlanToolName(toolName) {
51
104
  return normalized === "write_todos"
52
105
  || normalized === "read_todos"
53
106
  || normalized === "tool_call_write_todos"
54
- || normalized === "tool_call_read_todos";
107
+ || normalized === "tool_call_read_todos"
108
+ || normalized === "call_write_todos"
109
+ || normalized === "call_read_todos";
55
110
  }
56
111
  function isFallbackTodoCompletionToolCall(toolCall) {
57
112
  return typeof toolCall.id === "string"
58
113
  && toolCall.id.startsWith("fallback-complete-")
59
- && (toolCall.name === "write_todos" || toolCall.name === "tool_call_write_todos");
114
+ && isPlanToolName(toolCall.name)
115
+ && normalizeToolName(toolCall.name).includes("write_todos");
116
+ }
117
+ function resolveMaxToolIterations() {
118
+ const raw = process.env.AGENT_HARNESS_MAX_TOOL_ITERATIONS;
119
+ if (!raw) {
120
+ return DEFAULT_MAX_TOOL_ITERATIONS;
121
+ }
122
+ const parsed = Number.parseInt(raw, 10);
123
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_MAX_TOOL_ITERATIONS;
124
+ }
125
+ function summarizeToolLoopState(input) {
126
+ const toolCallNames = input.toolCalls?.map((toolCall) => toolCall.name).filter(Boolean) ?? [];
127
+ const executedNames = input.executedToolResults.map((item) => `${item.toolName}${item.isError ? ":error" : ""}`);
128
+ const visibleText = input.terminalText?.trim();
129
+ return [
130
+ `Tool-calling loop stopped: ${input.reason}.`,
131
+ `iteration=${input.iteration + 1}/${input.maxToolIterations}.`,
132
+ toolCallNames.length > 0 ? `toolCalls=${toolCallNames.join(",")}.` : "",
133
+ executedNames.length > 0 ? `executedTools=${executedNames.join(",")}.` : "",
134
+ visibleText ? `lastVisibleOutput=${visibleText.slice(0, 500)}` : "",
135
+ ].filter(Boolean).join(" ");
136
+ }
137
+ function createToolLoopError(input) {
138
+ return new Error(summarizeToolLoopState(input));
60
139
  }
61
140
  // Keep deterministic evidence summaries bounded for prompt/log readability while
62
141
  // still preserving meaningful tool context; 4000 chars is a conservative cap.
63
142
  const TOOL_OUTPUT_TRUNCATION_LIMIT = 4000;
64
143
  function buildDeterministicFinalFromToolEvidence(executedToolResults) {
65
144
  const evidence = executedToolResults
66
- .filter((item) => item.isError !== true && item.toolName !== "write_todos" && item.toolName !== "read_todos")
145
+ .filter((item) => item.isError !== true && !isPlanToolName(item.toolName))
67
146
  .map((item) => {
68
147
  const output = stringifyToolOutput(item.output).trim();
69
148
  const clipped = output.length > TOOL_OUTPUT_TRUNCATION_LIMIT
@@ -82,8 +161,8 @@ function buildDeterministicFinalFromToolEvidence(executedToolResults) {
82
161
  ].join("\n");
83
162
  return { output };
84
163
  }
85
- function hasPlanStateEvidence(executedToolResults) {
86
- return executedToolResults.some((item) => item.toolName === "write_todos" || item.toolName === "read_todos" || readPlanStateSummary(item.output) !== null);
164
+ function hasPlanStateEvidence(executedToolResults, externalPlanEvidence = false) {
165
+ return externalPlanEvidence || executedToolResults.some((item) => isPlanToolName(item.toolName) || readPlanStateSummary(item.output) !== null);
87
166
  }
88
167
  function latestToolErrorRecoveryInstruction(executedToolResults) {
89
168
  const latest = executedToolResults.at(-1);
@@ -119,11 +198,59 @@ function extractLatestUserInput(request) {
119
198
  }
120
199
  return undefined;
121
200
  }
122
- export async function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, }) {
201
+ function debugLocalToolReplay(input) {
202
+ if (process.env.AGENT_HARNESS_PROMPTED_JSON_DEBUG !== "1") {
203
+ return;
204
+ }
205
+ console.error(JSON.stringify({
206
+ type: "local-tool-replay",
207
+ toolCallNames: input.toolCalls.map((toolCall) => toolCall.name),
208
+ resultMessages: summarizeResultMessages(input.result),
209
+ executableToolNames: input.executableToolNames,
210
+ builtinToolNames: input.builtinToolNames,
211
+ canReplay: input.canReplay,
212
+ }));
213
+ }
214
+ function summarizeResultMessages(result) {
215
+ const messages = typeof result === "object" && result !== null && Array.isArray(result.messages)
216
+ ? result.messages
217
+ : [];
218
+ return messages.slice(-8).map((message) => {
219
+ const typed = typeof message === "object" && message !== null ? message : {};
220
+ const kwargs = typeof typed.kwargs === "object" && typed.kwargs !== null ? typed.kwargs : {};
221
+ const toolCalls = Array.isArray(typed.tool_calls)
222
+ ? typed.tool_calls
223
+ : Array.isArray(kwargs.tool_calls)
224
+ ? kwargs.tool_calls
225
+ : [];
226
+ return {
227
+ role: typeof typed.role === "string" ? typed.role : undefined,
228
+ type: typeof typed._getType === "function"
229
+ ? String(typed._getType())
230
+ : undefined,
231
+ name: typeof typed.name === "string" ? typed.name : undefined,
232
+ toolCallId: typeof typed.tool_call_id === "string" ? typed.tool_call_id : undefined,
233
+ toolCallNames: toolCalls.map((toolCall) => typeof toolCall === "object" && toolCall !== null && typeof toolCall.name === "string"
234
+ ? toolCall.name
235
+ : ""),
236
+ contentHead: typeof typed.content === "string"
237
+ ? typed.content.slice(0, 120)
238
+ : typeof kwargs.content === "string"
239
+ ? kwargs.content.slice(0, 120)
240
+ : "",
241
+ };
242
+ });
243
+ }
244
+ export async function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, }) {
123
245
  const executedToolResults = [];
124
246
  let activeRequest = request;
125
247
  let currentMessages = Array.isArray(activeRequest.messages) ? [...activeRequest.messages] : [];
126
- const maxToolIterations = 8;
248
+ const maxToolIterations = resolveMaxToolIterations();
249
+ let lastRecoveryInstruction = "";
250
+ let lastRecoveryExecutedCount = -1;
251
+ let repeatedRecoveryWithoutProgress = 0;
252
+ let repeatedPlanOnlyAfterPlan = 0;
253
+ let pendingResult;
127
254
  let result;
128
255
  const toolCatalog = new Map();
129
256
  for (const tool of primaryTools) {
@@ -135,13 +262,21 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
135
262
  }
136
263
  for (let iteration = 0; iteration < maxToolIterations; iteration += 1) {
137
264
  const isFinalIteration = iteration + 1 === maxToolIterations;
138
- result = await callRuntimeWithToolParseRecovery(activeRequest);
265
+ result = pendingResult ?? await callRuntimeWithToolParseRecovery(activeRequest);
266
+ pendingResult = undefined;
139
267
  const toolCalls = extractToolCallsFromResult(result);
140
268
  if (toolCalls.length === 0) {
141
269
  const terminalText = sanitizeVisibleText(extractVisibleOutput(result) || "");
142
- const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults);
270
+ const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence);
143
271
  const shouldEnforceIncompletePlan = requiresPlanEvidence(binding) && hasIncompletePlanState;
144
272
  const hasExecutionBeyondTodoPlanning = hasNonTodoToolEvidence(executedToolResults);
273
+ const hasAvailableNonPlanningTool = primaryTools.some((tool) => !isPlanToolName(tool.name));
274
+ if (requiresPlanEvidence(binding)
275
+ && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
276
+ && !hasIncompletePlanState
277
+ && !hasAvailableNonPlanningTool) {
278
+ break;
279
+ }
145
280
  const toolErrorRecoveryInstruction = latestToolErrorRecoveryInstruction(executedToolResults)
146
281
  ?? terminalToolErrorRecoveryInstruction(terminalText);
147
282
  const leakedJsonToolCallRecoveryInstruction = terminalText && salvageJsonToolCalls(terminalText).length > 0
@@ -149,29 +284,70 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
149
284
  : null;
150
285
  const recoveryInstruction = toolErrorRecoveryInstruction ?? leakedJsonToolCallRecoveryInstruction ?? (terminalText
151
286
  ? resolveExecutionWithoutToolEvidenceTextInstruction(activeRequest, terminalText, false, {
152
- hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName === "write_todos"),
287
+ hasWriteTodosEvidence: externalPlanEvidence === true || executedToolResults.some((item) => isPlanToolName(item.toolName)),
153
288
  hasToolResultEvidence: hasExecutionBeyondTodoPlanning,
154
- hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults),
289
+ hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults, externalPlanEvidence),
155
290
  hasIncompletePlanState: shouldEnforceIncompletePlan,
156
291
  requiresPlan: requiresPlanEvidence(binding),
157
292
  })
158
293
  : shouldEnforceIncompletePlan
159
294
  ? AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION
160
295
  : null);
296
+ if (requiresPlanEvidence(binding)
297
+ && !hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
298
+ && builtinExecutableTools.has("write_todos")) {
299
+ pendingResult = buildBootstrapPlanToolResult(primaryTools);
300
+ continue;
301
+ }
161
302
  if (recoveryInstruction) {
162
- if (isFinalIteration) {
163
- throw new Error(`Tool-calling loop exceeded the maximum of ${maxToolIterations} iterations`);
303
+ const executedCount = executedToolResults.length;
304
+ if (recoveryInstruction === lastRecoveryInstruction && executedCount === lastRecoveryExecutedCount) {
305
+ repeatedRecoveryWithoutProgress += 1;
306
+ }
307
+ else {
308
+ repeatedRecoveryWithoutProgress = 0;
309
+ lastRecoveryInstruction = recoveryInstruction;
310
+ lastRecoveryExecutedCount = executedCount;
311
+ }
312
+ if (repeatedRecoveryWithoutProgress >= MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS) {
313
+ if (hasNonTodoToolEvidence(executedToolResults)) {
314
+ return {
315
+ result: buildDeterministicFinalFromToolEvidence(executedToolResults),
316
+ executedToolResults,
317
+ };
318
+ }
319
+ if (!hasAvailableNonPlanningTool && !hasIncompletePlanState && result) {
320
+ return { result, executedToolResults };
321
+ }
322
+ throw createToolLoopError({
323
+ reason: "model repeated the same recovery path without producing a tool call or new tool evidence",
324
+ iteration,
325
+ maxToolIterations,
326
+ terminalText,
327
+ executedToolResults,
328
+ });
329
+ }
330
+ if (iteration + 1 === maxToolIterations) {
331
+ throw createToolLoopError({
332
+ reason: "maximum iterations reached",
333
+ iteration,
334
+ maxToolIterations,
335
+ terminalText,
336
+ executedToolResults,
337
+ });
164
338
  }
165
339
  activeRequest = appendToolRecoveryInstruction(activeRequest, recoveryInstruction);
166
340
  continue;
167
341
  }
342
+ repeatedRecoveryWithoutProgress = 0;
343
+ repeatedPlanOnlyAfterPlan = 0;
168
344
  break;
169
345
  }
170
346
  const missingPlanRecoveryInstruction = resolveMissingPlanRecoveryInstruction({
171
347
  request: activeRequest,
172
348
  requiresPlan: requiresPlanEvidence(binding),
173
- hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults),
174
- hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName === "write_todos"),
349
+ hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults, externalPlanEvidence),
350
+ hasWriteTodosEvidence: externalPlanEvidence === true || executedToolResults.some((item) => isPlanToolName(item.toolName)),
175
351
  hasToolResultEvidence: executedToolResults.length > 0 || toolCalls.length > 0,
176
352
  });
177
353
  if (missingPlanRecoveryInstruction
@@ -182,11 +358,55 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
182
358
  activeRequest = appendToolRecoveryInstruction(activeRequest, missingPlanRecoveryInstruction);
183
359
  continue;
184
360
  }
185
- if (!canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools)) {
361
+ if (requiresPlanEvidence(binding)
362
+ && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
363
+ && !hasNonTodoToolEvidence(executedToolResults)
364
+ && toolCalls.length > 0
365
+ && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))) {
366
+ repeatedPlanOnlyAfterPlan += 1;
367
+ if (repeatedPlanOnlyAfterPlan >= MAX_REPEATED_PLAN_ONLY_AFTER_PLAN) {
368
+ throw createToolLoopError({
369
+ reason: "model repeatedly selected only planning tools after the todo board already existed and before any non-planning evidence tool returned",
370
+ iteration,
371
+ maxToolIterations,
372
+ toolCalls,
373
+ executedToolResults,
374
+ });
375
+ }
376
+ if (iteration + 1 === maxToolIterations) {
377
+ throw createToolLoopError({
378
+ reason: "maximum iterations reached",
379
+ iteration,
380
+ maxToolIterations,
381
+ toolCalls,
382
+ executedToolResults,
383
+ });
384
+ }
385
+ activeRequest = appendToolRecoveryInstruction(activeRequest, AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION);
386
+ pendingResult = undefined;
387
+ continue;
388
+ }
389
+ repeatedRecoveryWithoutProgress = 0;
390
+ repeatedPlanOnlyAfterPlan = 0;
391
+ const canReplayToolCalls = canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
392
+ debugLocalToolReplay({
393
+ toolCalls,
394
+ result,
395
+ executableToolNames: [...executableTools.keys()],
396
+ builtinToolNames: [...builtinExecutableTools.keys()],
397
+ canReplay: canReplayToolCalls,
398
+ });
399
+ if (!canReplayToolCalls) {
186
400
  break;
187
401
  }
188
402
  if (iteration + 1 === maxToolIterations) {
189
- throw new Error(`Tool-calling loop exceeded the maximum of ${maxToolIterations} iterations`);
403
+ throw createToolLoopError({
404
+ reason: "maximum iterations reached",
405
+ iteration,
406
+ maxToolIterations,
407
+ toolCalls,
408
+ executedToolResults,
409
+ });
190
410
  }
191
411
  const resultMessages = result.messages;
192
412
  const nextMessages = [...currentMessages];
@@ -258,7 +478,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
258
478
  if (requiresPlanEvidence(binding)
259
479
  && toolCalls.length > 0
260
480
  && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))
261
- && !hasIncompleteExecutedPlan(executedToolResults)
481
+ && !hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence)
262
482
  && hasNonTodoToolEvidence(executedToolResults)) {
263
483
  return {
264
484
  result: buildDeterministicFinalFromToolEvidence(executedToolResults),