@botbotgo/agent-harness 0.0.418 → 0.0.420

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/dist/cli/chat-interactive.js +1 -1
  2. package/dist/cli/chat-stream.js +9 -1
  3. package/dist/package-version.d.ts +2 -2
  4. package/dist/package-version.js +2 -2
  5. package/dist/runtime/adapter/compat/openai-compatible.js +12 -0
  6. package/dist/runtime/adapter/flow/invocation-flow.d.ts +2 -0
  7. package/dist/runtime/adapter/flow/invocation-flow.js +13 -5
  8. package/dist/runtime/adapter/flow/invoke-runtime.d.ts +1 -0
  9. package/dist/runtime/adapter/flow/invoke-runtime.js +1 -0
  10. package/dist/runtime/adapter/flow/stream-runtime.d.ts +4 -0
  11. package/dist/runtime/adapter/flow/stream-runtime.js +177 -14
  12. package/dist/runtime/adapter/invocation-result.js +17 -6
  13. package/dist/runtime/adapter/local-tool-invocation.d.ts +2 -1
  14. package/dist/runtime/adapter/local-tool-invocation.js +268 -21
  15. package/dist/runtime/adapter/model/model-providers.js +269 -58
  16. package/dist/runtime/adapter/model/prompted-json-tool-call-capture.d.ts +9 -0
  17. package/dist/runtime/adapter/model/prompted-json-tool-call-capture.js +40 -0
  18. package/dist/runtime/adapter/runtime-adapter-support.js +58 -12
  19. package/dist/runtime/adapter/runtime-shell.js +3 -2
  20. package/dist/runtime/adapter/stream-event-projection.js +22 -5
  21. package/dist/runtime/adapter/tool/tool-arguments.js +157 -67
  22. package/dist/runtime/adapter/tool/tool-replay.js +0 -4
  23. package/dist/runtime/agent-runtime-adapter.d.ts +3 -0
  24. package/dist/runtime/agent-runtime-adapter.js +217 -73
  25. package/dist/runtime/harness/run/stream-run.js +31 -3
  26. package/dist/runtime/parsing/output-tool-args.js +108 -0
  27. package/dist/workspace/resource-compilers.js +17 -4
  28. package/package.json +1 -1
@@ -1,4 +1,4 @@
1
- import { ToolMessage } from "@langchain/core/messages";
1
+ import { AIMessage, ToolMessage } from "@langchain/core/messages";
2
2
  import { createModelFacingToolNameLookupCandidates, resolveModelFacingToolName } from "./tool/tool-name-mapping.js";
3
3
  import { canReplayToolCallsLocally } from "./tool/tool-replay.js";
4
4
  import { extractToolCallsFromResult, normalizeToolArgsForSchema, stringifyToolOutput } from "./tool/tool-arguments.js";
@@ -10,6 +10,86 @@ import { appendToolRecoveryInstruction, extractVisibleOutput, resolveMissingPlan
10
10
  import { salvageJsonToolCalls } from "../parsing/output-tool-args.js";
11
11
  import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION } from "../prompts/runtime-prompts.js";
12
12
  const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already available in this conversation. Answer the user's current request directly from the existing context and tool results. Do not ask the user to repeat inputs that are already present above.";
13
+ const DEFAULT_MAX_TOOL_ITERATIONS = 10_000;
14
+ const MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS = 2;
15
+ const MAX_REPEATED_PLAN_ONLY_AFTER_PLAN = 2;
16
+ function stringifyRequestForToolSelection(request) {
17
+ if (typeof request === "string") {
18
+ return request;
19
+ }
20
+ if (typeof request !== "object" || request === null) {
21
+ return "";
22
+ }
23
+ try {
24
+ return JSON.stringify(request);
25
+ }
26
+ catch {
27
+ return "";
28
+ }
29
+ }
30
+ function prioritizeBootstrapEvidenceTools(primaryTools, request) {
31
+ const requestText = stringifyRequestForToolSelection(request);
32
+ const isFinanceRequest = /\b(?:stock|ticker|finance|market|valuation|quote)\b|股票|股价|行情|估值|财报/iu.test(requestText);
33
+ const evidenceTools = primaryTools
34
+ .map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
35
+ .filter((name) => name.length > 0 && !isPlanToolName(name))
36
+ .sort((left, right) => {
37
+ if (!isFinanceRequest) {
38
+ return 0;
39
+ }
40
+ const leftFinance = left.includes("finance") ? 0 : 1;
41
+ const rightFinance = right.includes("finance") ? 0 : 1;
42
+ return leftFinance - rightFinance;
43
+ });
44
+ return evidenceTools.slice(0, 4);
45
+ }
46
+ function createBootstrapTodoPlan(primaryTools, request) {
47
+ const evidenceTools = prioritizeBootstrapEvidenceTools(primaryTools, request);
48
+ if (evidenceTools.length === 0) {
49
+ return [
50
+ {
51
+ content: "Establish the required visible plan for this request",
52
+ status: "completed",
53
+ },
54
+ {
55
+ content: "Return the final answer from the available conversation context",
56
+ status: "completed",
57
+ },
58
+ ];
59
+ }
60
+ const evidenceLabel = evidenceTools.length > 0
61
+ ? evidenceTools.join(", ")
62
+ : "the selected non-planning evidence tool";
63
+ return [
64
+ {
65
+ content: `Select and run an appropriate non-planning evidence tool from: ${evidenceLabel}`,
66
+ status: "in_progress",
67
+ },
68
+ {
69
+ content: "Inspect the returned tool evidence and update the todo board",
70
+ status: "pending",
71
+ },
72
+ {
73
+ content: "Return the final answer grounded in observed tool output",
74
+ status: "pending",
75
+ },
76
+ ];
77
+ }
78
+ function buildBootstrapPlanToolResult(primaryTools, request) {
79
+ return {
80
+ messages: [new AIMessage({
81
+ content: "",
82
+ tool_calls: [{
83
+ id: `write-todos-bootstrap-${Math.random().toString(36).slice(2, 10)}`,
84
+ name: "write_todos",
85
+ args: {
86
+ todos: createBootstrapTodoPlan(primaryTools, request),
87
+ },
88
+ type: "tool_call",
89
+ }],
90
+ })],
91
+ };
92
+ }
13
93
  function readPlanStateSummary(output) {
14
94
  if (typeof output !== "object" || output === null) {
15
95
  return null;
@@ -30,7 +110,7 @@ function readPlanStateSummary(output) {
30
110
  inProgress: typeof typedCounts.inProgress === "number" ? typedCounts.inProgress : 0,
31
111
  };
32
112
  }
33
- function hasIncompleteExecutedPlan(executedToolResults) {
113
+ function hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence = false) {
34
114
  for (const latest of [...executedToolResults].reverse()) {
35
115
  const summary = readPlanStateSummary(latest.output);
36
116
  if (!summary) {
@@ -38,7 +118,7 @@ function hasIncompleteExecutedPlan(executedToolResults) {
38
118
  }
39
119
  return summary.pending > 0 || summary.inProgress > 0;
40
120
  }
41
- return false;
121
+ return externalPlanEvidence;
42
122
  }
43
123
  function normalizeToolName(value) {
44
124
  return typeof value === "string" ? value.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
@@ -51,19 +131,45 @@ function isPlanToolName(toolName) {
51
131
  return normalized === "write_todos"
52
132
  || normalized === "read_todos"
53
133
  || normalized === "tool_call_write_todos"
54
- || normalized === "tool_call_read_todos";
134
+ || normalized === "tool_call_read_todos"
135
+ || normalized === "call_write_todos"
136
+ || normalized === "call_read_todos";
55
137
  }
56
138
  function isFallbackTodoCompletionToolCall(toolCall) {
57
139
  return typeof toolCall.id === "string"
58
140
  && toolCall.id.startsWith("fallback-complete-")
59
- && (toolCall.name === "write_todos" || toolCall.name === "tool_call_write_todos");
141
+ && isPlanToolName(toolCall.name)
142
+ && normalizeToolName(toolCall.name).includes("write_todos");
143
+ }
144
+ function resolveMaxToolIterations() {
145
+ const raw = process.env.AGENT_HARNESS_MAX_TOOL_ITERATIONS;
146
+ if (!raw) {
147
+ return DEFAULT_MAX_TOOL_ITERATIONS;
148
+ }
149
+ const parsed = Number.parseInt(raw, 10);
150
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_MAX_TOOL_ITERATIONS;
151
+ }
152
+ function summarizeToolLoopState(input) {
153
+ const toolCallNames = input.toolCalls?.map((toolCall) => toolCall.name).filter(Boolean) ?? [];
154
+ const executedNames = input.executedToolResults.map((item) => `${item.toolName}${item.isError ? ":error" : ""}`);
155
+ const visibleText = input.terminalText?.trim();
156
+ return [
157
+ `Tool-calling loop stopped: ${input.reason}.`,
158
+ `iteration=${input.iteration + 1}/${input.maxToolIterations}.`,
159
+ toolCallNames.length > 0 ? `toolCalls=${toolCallNames.join(",")}.` : "",
160
+ executedNames.length > 0 ? `executedTools=${executedNames.join(",")}.` : "",
161
+ visibleText ? `lastVisibleOutput=${visibleText.slice(0, 500)}` : "",
162
+ ].filter(Boolean).join(" ");
163
+ }
164
+ function createToolLoopError(input) {
165
+ return new Error(summarizeToolLoopState(input));
60
166
  }
61
167
  // Keep deterministic evidence summaries bounded for prompt/log readability while
62
168
  // still preserving meaningful tool context; 4000 chars is a conservative cap.
63
169
  const TOOL_OUTPUT_TRUNCATION_LIMIT = 4000;
64
170
  function buildDeterministicFinalFromToolEvidence(executedToolResults) {
65
171
  const evidence = executedToolResults
66
- .filter((item) => item.isError !== true && item.toolName !== "write_todos" && item.toolName !== "read_todos")
172
+ .filter((item) => item.isError !== true && !isPlanToolName(item.toolName))
67
173
  .map((item) => {
68
174
  const output = stringifyToolOutput(item.output).trim();
69
175
  const clipped = output.length > TOOL_OUTPUT_TRUNCATION_LIMIT
@@ -82,8 +188,8 @@ function buildDeterministicFinalFromToolEvidence(executedToolResults) {
82
188
  ].join("\n");
83
189
  return { output };
84
190
  }
85
- function hasPlanStateEvidence(executedToolResults) {
86
- return executedToolResults.some((item) => item.toolName === "write_todos" || item.toolName === "read_todos" || readPlanStateSummary(item.output) !== null);
191
+ function hasPlanStateEvidence(executedToolResults, externalPlanEvidence = false) {
192
+ return externalPlanEvidence || executedToolResults.some((item) => isPlanToolName(item.toolName) || readPlanStateSummary(item.output) !== null);
87
193
  }
88
194
  function latestToolErrorRecoveryInstruction(executedToolResults) {
89
195
  const latest = executedToolResults.at(-1);
@@ -119,11 +225,59 @@ function extractLatestUserInput(request) {
119
225
  }
120
226
  return undefined;
121
227
  }
122
- export async function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, }) {
228
+ function debugLocalToolReplay(input) {
229
+ if (process.env.AGENT_HARNESS_PROMPTED_JSON_DEBUG !== "1") {
230
+ return;
231
+ }
232
+ console.error(JSON.stringify({
233
+ type: "local-tool-replay",
234
+ toolCallNames: input.toolCalls.map((toolCall) => toolCall.name),
235
+ resultMessages: summarizeResultMessages(input.result),
236
+ executableToolNames: input.executableToolNames,
237
+ builtinToolNames: input.builtinToolNames,
238
+ canReplay: input.canReplay,
239
+ }));
240
+ }
241
+ function summarizeResultMessages(result) {
242
+ const messages = typeof result === "object" && result !== null && Array.isArray(result.messages)
243
+ ? result.messages
244
+ : [];
245
+ return messages.slice(-8).map((message) => {
246
+ const typed = typeof message === "object" && message !== null ? message : {};
247
+ const kwargs = typeof typed.kwargs === "object" && typed.kwargs !== null ? typed.kwargs : {};
248
+ const toolCalls = Array.isArray(typed.tool_calls)
249
+ ? typed.tool_calls
250
+ : Array.isArray(kwargs.tool_calls)
251
+ ? kwargs.tool_calls
252
+ : [];
253
+ return {
254
+ role: typeof typed.role === "string" ? typed.role : undefined,
255
+ type: typeof typed._getType === "function"
256
+ ? String(typed._getType())
257
+ : undefined,
258
+ name: typeof typed.name === "string" ? typed.name : undefined,
259
+ toolCallId: typeof typed.tool_call_id === "string" ? typed.tool_call_id : undefined,
260
+ toolCallNames: toolCalls.map((toolCall) => typeof toolCall === "object" && toolCall !== null && typeof toolCall.name === "string"
261
+ ? toolCall.name
262
+ : ""),
263
+ contentHead: typeof typed.content === "string"
264
+ ? typed.content.slice(0, 120)
265
+ : typeof kwargs.content === "string"
266
+ ? kwargs.content.slice(0, 120)
267
+ : "",
268
+ };
269
+ });
270
+ }
271
+ export async function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, }) {
123
272
  const executedToolResults = [];
124
273
  let activeRequest = request;
125
274
  let currentMessages = Array.isArray(activeRequest.messages) ? [...activeRequest.messages] : [];
126
- const maxToolIterations = 8;
275
+ const maxToolIterations = resolveMaxToolIterations();
276
+ let lastRecoveryInstruction = "";
277
+ let lastRecoveryExecutedCount = -1;
278
+ let repeatedRecoveryWithoutProgress = 0;
279
+ let repeatedPlanOnlyAfterPlan = 0;
280
+ let pendingResult;
127
281
  let result;
128
282
  const toolCatalog = new Map();
129
283
  for (const tool of primaryTools) {
@@ -135,13 +289,21 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
135
289
  }
136
290
  for (let iteration = 0; iteration < maxToolIterations; iteration += 1) {
137
291
  const isFinalIteration = iteration + 1 === maxToolIterations;
138
- result = await callRuntimeWithToolParseRecovery(activeRequest);
292
+ result = pendingResult ?? await callRuntimeWithToolParseRecovery(activeRequest);
293
+ pendingResult = undefined;
139
294
  const toolCalls = extractToolCallsFromResult(result);
140
295
  if (toolCalls.length === 0) {
141
296
  const terminalText = sanitizeVisibleText(extractVisibleOutput(result) || "");
142
- const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults);
297
+ const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence);
143
298
  const shouldEnforceIncompletePlan = requiresPlanEvidence(binding) && hasIncompletePlanState;
144
299
  const hasExecutionBeyondTodoPlanning = hasNonTodoToolEvidence(executedToolResults);
300
+ const hasAvailableNonPlanningTool = primaryTools.some((tool) => !isPlanToolName(tool.name));
301
+ if (requiresPlanEvidence(binding)
302
+ && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
303
+ && !hasIncompletePlanState
304
+ && !hasAvailableNonPlanningTool) {
305
+ break;
306
+ }
145
307
  const toolErrorRecoveryInstruction = latestToolErrorRecoveryInstruction(executedToolResults)
146
308
  ?? terminalToolErrorRecoveryInstruction(terminalText);
147
309
  const leakedJsonToolCallRecoveryInstruction = terminalText && salvageJsonToolCalls(terminalText).length > 0
@@ -149,29 +311,70 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
149
311
  : null;
150
312
  const recoveryInstruction = toolErrorRecoveryInstruction ?? leakedJsonToolCallRecoveryInstruction ?? (terminalText
151
313
  ? resolveExecutionWithoutToolEvidenceTextInstruction(activeRequest, terminalText, false, {
152
- hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName === "write_todos"),
314
+ hasWriteTodosEvidence: externalPlanEvidence === true || executedToolResults.some((item) => isPlanToolName(item.toolName)),
153
315
  hasToolResultEvidence: hasExecutionBeyondTodoPlanning,
154
- hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults),
316
+ hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults, externalPlanEvidence),
155
317
  hasIncompletePlanState: shouldEnforceIncompletePlan,
156
318
  requiresPlan: requiresPlanEvidence(binding),
157
319
  })
158
320
  : shouldEnforceIncompletePlan
159
321
  ? AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION
160
322
  : null);
323
+ if (requiresPlanEvidence(binding)
324
+ && !hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
325
+ && builtinExecutableTools.has("write_todos")) {
326
+ pendingResult = buildBootstrapPlanToolResult(primaryTools, activeRequest);
327
+ continue;
328
+ }
161
329
  if (recoveryInstruction) {
162
- if (isFinalIteration) {
163
- throw new Error(`Tool-calling loop exceeded the maximum of ${maxToolIterations} iterations`);
330
+ const executedCount = executedToolResults.length;
331
+ if (recoveryInstruction === lastRecoveryInstruction && executedCount === lastRecoveryExecutedCount) {
332
+ repeatedRecoveryWithoutProgress += 1;
333
+ }
334
+ else {
335
+ repeatedRecoveryWithoutProgress = 0;
336
+ lastRecoveryInstruction = recoveryInstruction;
337
+ lastRecoveryExecutedCount = executedCount;
338
+ }
339
+ if (repeatedRecoveryWithoutProgress >= MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS) {
340
+ if (hasNonTodoToolEvidence(executedToolResults)) {
341
+ return {
342
+ result: buildDeterministicFinalFromToolEvidence(executedToolResults),
343
+ executedToolResults,
344
+ };
345
+ }
346
+ if (!hasAvailableNonPlanningTool && !hasIncompletePlanState && result) {
347
+ return { result, executedToolResults };
348
+ }
349
+ throw createToolLoopError({
350
+ reason: "model repeated the same recovery path without producing a tool call or new tool evidence",
351
+ iteration,
352
+ maxToolIterations,
353
+ terminalText,
354
+ executedToolResults,
355
+ });
356
+ }
357
+ if (iteration + 1 === maxToolIterations) {
358
+ throw createToolLoopError({
359
+ reason: "maximum iterations reached",
360
+ iteration,
361
+ maxToolIterations,
362
+ terminalText,
363
+ executedToolResults,
364
+ });
164
365
  }
165
366
  activeRequest = appendToolRecoveryInstruction(activeRequest, recoveryInstruction);
166
367
  continue;
167
368
  }
369
+ repeatedRecoveryWithoutProgress = 0;
370
+ repeatedPlanOnlyAfterPlan = 0;
168
371
  break;
169
372
  }
170
373
  const missingPlanRecoveryInstruction = resolveMissingPlanRecoveryInstruction({
171
374
  request: activeRequest,
172
375
  requiresPlan: requiresPlanEvidence(binding),
173
- hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults),
174
- hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName === "write_todos"),
376
+ hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults, externalPlanEvidence),
377
+ hasWriteTodosEvidence: externalPlanEvidence === true || executedToolResults.some((item) => isPlanToolName(item.toolName)),
175
378
  hasToolResultEvidence: executedToolResults.length > 0 || toolCalls.length > 0,
176
379
  });
177
380
  if (missingPlanRecoveryInstruction
@@ -182,11 +385,55 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
182
385
  activeRequest = appendToolRecoveryInstruction(activeRequest, missingPlanRecoveryInstruction);
183
386
  continue;
184
387
  }
185
- if (!canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools)) {
388
+ if (requiresPlanEvidence(binding)
389
+ && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
390
+ && !hasNonTodoToolEvidence(executedToolResults)
391
+ && toolCalls.length > 0
392
+ && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))) {
393
+ repeatedPlanOnlyAfterPlan += 1;
394
+ if (repeatedPlanOnlyAfterPlan >= MAX_REPEATED_PLAN_ONLY_AFTER_PLAN) {
395
+ throw createToolLoopError({
396
+ reason: "model repeatedly selected only planning tools after the todo board already existed and before any non-planning evidence tool returned",
397
+ iteration,
398
+ maxToolIterations,
399
+ toolCalls,
400
+ executedToolResults,
401
+ });
402
+ }
403
+ if (iteration + 1 === maxToolIterations) {
404
+ throw createToolLoopError({
405
+ reason: "maximum iterations reached",
406
+ iteration,
407
+ maxToolIterations,
408
+ toolCalls,
409
+ executedToolResults,
410
+ });
411
+ }
412
+ activeRequest = appendToolRecoveryInstruction(activeRequest, AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION);
413
+ pendingResult = undefined;
414
+ continue;
415
+ }
416
+ repeatedRecoveryWithoutProgress = 0;
417
+ repeatedPlanOnlyAfterPlan = 0;
418
+ const canReplayToolCalls = canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
419
+ debugLocalToolReplay({
420
+ toolCalls,
421
+ result,
422
+ executableToolNames: [...executableTools.keys()],
423
+ builtinToolNames: [...builtinExecutableTools.keys()],
424
+ canReplay: canReplayToolCalls,
425
+ });
426
+ if (!canReplayToolCalls) {
186
427
  break;
187
428
  }
188
429
  if (iteration + 1 === maxToolIterations) {
189
- throw new Error(`Tool-calling loop exceeded the maximum of ${maxToolIterations} iterations`);
430
+ throw createToolLoopError({
431
+ reason: "maximum iterations reached",
432
+ iteration,
433
+ maxToolIterations,
434
+ toolCalls,
435
+ executedToolResults,
436
+ });
190
437
  }
191
438
  const resultMessages = result.messages;
192
439
  const nextMessages = [...currentMessages];
@@ -258,7 +505,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
258
505
  if (requiresPlanEvidence(binding)
259
506
  && toolCalls.length > 0
260
507
  && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))
261
- && !hasIncompleteExecutedPlan(executedToolResults)
508
+ && !hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence)
262
509
  && hasNonTodoToolEvidence(executedToolResults)) {
263
510
  return {
264
511
  result: buildDeterministicFinalFromToolEvidence(executedToolResults),