@botbotgo/agent-harness 0.0.419 → 0.0.421

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,11 +13,75 @@ const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already availab
13
13
  const DEFAULT_MAX_TOOL_ITERATIONS = 10_000;
14
14
  const MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS = 2;
15
15
  const MAX_REPEATED_PLAN_ONLY_AFTER_PLAN = 2;
16
- function createBootstrapTodoPlan(primaryTools) {
16
+ function stringifyRequestForToolSelection(request) {
17
+ if (typeof request === "string") {
18
+ return request;
19
+ }
20
+ if (typeof request !== "object" || request === null) {
21
+ return "";
22
+ }
23
+ try {
24
+ return JSON.stringify(request);
25
+ }
26
+ catch {
27
+ return "";
28
+ }
29
+ }
30
+ function extractSelectionTokens(value) {
31
+ const tokens = new Set();
32
+ for (const match of value.matchAll(/[\p{L}\p{N}_-]+/gu)) {
33
+ const token = match[0].toLowerCase();
34
+ if (token.length >= 2) {
35
+ tokens.add(token);
36
+ }
37
+ }
38
+ for (const match of value.matchAll(/[\p{Script=Han}]{2,}/gu)) {
39
+ const sequence = match[0];
40
+ for (let size = 2; size <= Math.min(4, sequence.length); size += 1) {
41
+ for (let index = 0; index <= sequence.length - size; index += 1) {
42
+ tokens.add(sequence.slice(index, index + size).toLowerCase());
43
+ }
44
+ }
45
+ }
46
+ return tokens;
47
+ }
48
+ function prioritizeBootstrapEvidenceTools(primaryTools, request) {
49
+ const requestText = stringifyRequestForToolSelection(request);
50
+ const requestTokens = extractSelectionTokens(requestText);
51
+ const isFinanceRequest = /\b(?:stock|ticker|finance|market|valuation|quote)\b|股票|股价|行情|估值|财报/iu.test(requestText);
17
52
  const evidenceTools = primaryTools
18
- .map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
19
- .filter((name) => name.length > 0 && !isPlanToolName(name))
20
- .slice(0, 3);
53
+ .map((tool) => {
54
+ const name = typeof tool.name === "string" ? tool.name.trim() : "";
55
+ const description = typeof tool.description === "string" ? tool.description : "";
56
+ const toolTokens = extractSelectionTokens(`${name} ${description}`);
57
+ let score = 0;
58
+ for (const token of requestTokens) {
59
+ if (toolTokens.has(token)) {
60
+ score += token.length > 3 ? 2 : 1;
61
+ }
62
+ }
63
+ if (requestText.toLowerCase().includes(name.toLowerCase())) {
64
+ score += 6;
65
+ }
66
+ return { name, score };
67
+ })
68
+ .filter((tool) => tool.name.length > 0 && !isPlanToolName(tool.name))
69
+ .sort((left, right) => {
70
+ if (right.score !== left.score) {
71
+ return right.score - left.score;
72
+ }
73
+ if (!isFinanceRequest) {
74
+ return 0;
75
+ }
76
+ const leftFinance = left.name.includes("finance") ? 0 : 1;
77
+ const rightFinance = right.name.includes("finance") ? 0 : 1;
78
+ return leftFinance - rightFinance;
79
+ })
80
+ .map((tool) => tool.name);
81
+ return evidenceTools.slice(0, 4);
82
+ }
83
+ function createBootstrapTodoPlan(primaryTools, request) {
84
+ const evidenceTools = prioritizeBootstrapEvidenceTools(primaryTools, request);
21
85
  if (evidenceTools.length === 0) {
22
86
  return [
23
87
  {
@@ -48,7 +112,7 @@ function createBootstrapTodoPlan(primaryTools) {
48
112
  },
49
113
  ];
50
114
  }
51
- function buildBootstrapPlanToolResult(primaryTools) {
115
+ function buildBootstrapPlanToolResult(primaryTools, request) {
52
116
  return {
53
117
  messages: [new AIMessage({
54
118
  content: "",
@@ -56,13 +120,26 @@ function buildBootstrapPlanToolResult(primaryTools) {
56
120
  id: `write-todos-bootstrap-${Math.random().toString(36).slice(2, 10)}`,
57
121
  name: "write_todos",
58
122
  args: {
59
- todos: createBootstrapTodoPlan(primaryTools),
123
+ todos: createBootstrapTodoPlan(primaryTools, request),
60
124
  },
61
125
  type: "tool_call",
62
126
  }],
63
127
  })],
64
128
  };
65
129
  }
130
+ function buildExternalPlanEvidenceToolResult(tools) {
131
+ return {
132
+ messages: [{
133
+ content: "",
134
+ tool_calls: tools.map((tool, index) => ({
135
+ id: tool.id ?? `external-plan-evidence-${index + 1}-${Math.random().toString(36).slice(2, 10)}`,
136
+ name: tool.name,
137
+ args: tool.args ?? {},
138
+ type: "tool_call",
139
+ })),
140
+ }],
141
+ };
142
+ }
66
143
  function readPlanStateSummary(output) {
67
144
  if (typeof output !== "object" || output === null) {
68
145
  return null;
@@ -183,6 +260,76 @@ function terminalToolErrorRecoveryInstruction(terminalText) {
183
260
  function requiresPlanEvidence(binding) {
184
261
  return binding.harnessRuntime.executionContract?.requiresPlan === true;
185
262
  }
263
+ function resolveCommittedTodoEvidenceTool(executedToolResults, primaryTools) {
264
+ const availableTools = primaryTools
265
+ .filter((tool) => typeof tool.name === "string" && tool.name.length > 0 && !isPlanToolName(tool.name));
266
+ if (availableTools.length === 0) {
267
+ return null;
268
+ }
269
+ for (let index = executedToolResults.length - 1; index >= 0; index -= 1) {
270
+ const result = executedToolResults[index];
271
+ if (!result || result.isError === true || !isPlanToolName(result.toolName)) {
272
+ continue;
273
+ }
274
+ const output = result.output;
275
+ const summary = typeof output === "object" && output !== null
276
+ ? output.summary
277
+ : undefined;
278
+ const items = typeof summary === "object" && summary !== null && Array.isArray(summary.items)
279
+ ? summary.items
280
+ : [];
281
+ const activeItems = items.filter((item) => item.status === "in_progress");
282
+ const candidateItems = activeItems.length > 0
283
+ ? activeItems
284
+ : items.filter((item) => item.status === "pending").slice(0, 1);
285
+ for (const item of candidateItems) {
286
+ const content = [
287
+ item.content,
288
+ item.description,
289
+ item.title,
290
+ item.name,
291
+ item.text,
292
+ ].filter((value) => typeof value === "string").join(" ").toLowerCase();
293
+ const matched = availableTools.map((tool) => tool.name).filter((toolName) => content.includes(toolName.toLowerCase()));
294
+ if (matched.length === 1) {
295
+ return {
296
+ name: matched[0],
297
+ args: {},
298
+ id: `todo-committed-evidence-${index}`,
299
+ };
300
+ }
301
+ const requestTokens = extractSelectionTokens(content);
302
+ const scored = availableTools
303
+ .map((tool) => {
304
+ const toolTokens = extractSelectionTokens(`${tool.name} ${tool.description ?? ""}`);
305
+ let score = 0;
306
+ for (const token of requestTokens) {
307
+ if (toolTokens.has(token)) {
308
+ score += token.length > 3 ? 2 : 1;
309
+ }
310
+ }
311
+ return { name: tool.name, score };
312
+ })
313
+ .filter((item) => item.score > 0)
314
+ .sort((left, right) => right.score - left.score);
315
+ if (scored[0] && (!scored[1] || scored[0].score > scored[1].score)) {
316
+ return {
317
+ name: scored[0].name,
318
+ args: {},
319
+ id: `todo-committed-evidence-${index}`,
320
+ };
321
+ }
322
+ if (matched.length === 1) {
323
+ return {
324
+ name: matched[0],
325
+ args: {},
326
+ id: `todo-committed-evidence-${index}`,
327
+ };
328
+ }
329
+ }
330
+ }
331
+ return null;
332
+ }
186
333
  function extractLatestUserInput(request) {
187
334
  const typedRequest = request;
188
335
  const messages = Array.isArray(typedRequest.messages) ? typedRequest.messages : [];
@@ -241,7 +388,7 @@ function summarizeResultMessages(result) {
241
388
  };
242
389
  });
243
390
  }
244
- export async function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, }) {
391
+ export async function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, externalPlanEvidenceTool, externalPlanEvidenceTools, }) {
245
392
  const executedToolResults = [];
246
393
  let activeRequest = request;
247
394
  let currentMessages = Array.isArray(activeRequest.messages) ? [...activeRequest.messages] : [];
@@ -262,9 +409,39 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
262
409
  }
263
410
  for (let iteration = 0; iteration < maxToolIterations; iteration += 1) {
264
411
  const isFinalIteration = iteration + 1 === maxToolIterations;
265
- result = pendingResult ?? await callRuntimeWithToolParseRecovery(activeRequest);
412
+ const externalPlanEvidenceToolCalls = externalPlanEvidenceTools && externalPlanEvidenceTools.length > 0
413
+ ? externalPlanEvidenceTools
414
+ : externalPlanEvidenceTool
415
+ ? [externalPlanEvidenceTool]
416
+ : [];
417
+ const shouldRunExternalPlanEvidenceTool = pendingResult === undefined
418
+ && requiresPlanEvidence(binding)
419
+ && externalPlanEvidence === true
420
+ && externalPlanEvidenceToolCalls.length > 0
421
+ && !hasNonTodoToolEvidence(executedToolResults);
422
+ const usedExternalPlanEvidenceToolThisIteration = shouldRunExternalPlanEvidenceTool;
423
+ result = pendingResult
424
+ ?? (shouldRunExternalPlanEvidenceTool
425
+ ? buildExternalPlanEvidenceToolResult(externalPlanEvidenceToolCalls)
426
+ : await callRuntimeWithToolParseRecovery(activeRequest));
266
427
  pendingResult = undefined;
267
- const toolCalls = extractToolCallsFromResult(result);
428
+ let toolCalls = extractToolCallsFromResult(result);
429
+ const committedTodoEvidenceTool = requiresPlanEvidence(binding)
430
+ && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
431
+ && !hasNonTodoToolEvidence(executedToolResults)
432
+ && (externalPlanEvidenceTool !== undefined || !hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence))
433
+ && (toolCalls.length === 0 || toolCalls.every((toolCall) => isPlanToolName(toolCall.name)))
434
+ ? externalPlanEvidenceTool
435
+ ? {
436
+ name: externalPlanEvidenceTool.name,
437
+ args: externalPlanEvidenceTool.args ?? {},
438
+ id: externalPlanEvidenceTool.id ?? "external-plan-evidence-tool",
439
+ }
440
+ : resolveCommittedTodoEvidenceTool(executedToolResults, primaryTools)
441
+ : null;
442
+ if (committedTodoEvidenceTool) {
443
+ toolCalls = [committedTodoEvidenceTool];
444
+ }
268
445
  if (toolCalls.length === 0) {
269
446
  const terminalText = sanitizeVisibleText(extractVisibleOutput(result) || "");
270
447
  const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence);
@@ -296,7 +473,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
296
473
  if (requiresPlanEvidence(binding)
297
474
  && !hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
298
475
  && builtinExecutableTools.has("write_todos")) {
299
- pendingResult = buildBootstrapPlanToolResult(primaryTools);
476
+ pendingResult = buildBootstrapPlanToolResult(primaryTools, activeRequest);
300
477
  continue;
301
478
  }
302
479
  if (recoveryInstruction) {
@@ -388,7 +565,8 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
388
565
  }
389
566
  repeatedRecoveryWithoutProgress = 0;
390
567
  repeatedPlanOnlyAfterPlan = 0;
391
- const canReplayToolCalls = canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
568
+ const canReplayToolCalls = usedExternalPlanEvidenceToolThisIteration
569
+ || canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
392
570
  debugLocalToolReplay({
393
571
  toolCalls,
394
572
  result,
@@ -475,6 +653,67 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
475
653
  content: stringifyToolOutput(safeToolResult),
476
654
  }));
477
655
  }
656
+ const committedEvidenceTool = requiresPlanEvidence(binding)
657
+ && !hadNonTodoEvidenceBeforeToolReplay
658
+ && !hasNonTodoToolEvidence(executedToolResults)
659
+ && !hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence)
660
+ ? resolveCommittedTodoEvidenceTool(executedToolResults, primaryTools)
661
+ : null;
662
+ if (committedEvidenceTool) {
663
+ const resolvedToolName = resolveModelFacingToolName(committedEvidenceTool.name, toolNameMapping, primaryTools);
664
+ const executable = executableTools.get(committedEvidenceTool.name) ?? executableTools.get(resolvedToolName);
665
+ if (executable) {
666
+ const compiledTool = toolCatalog.get(committedEvidenceTool.name) ?? toolCatalog.get(resolvedToolName);
667
+ const normalizedArgs = normalizeToolArgsForSchema(committedEvidenceTool.args, executable.schema, undefined, {
668
+ latestUserInput,
669
+ });
670
+ const gateway = validateToolGatewayInput({
671
+ toolName: executable.name,
672
+ schema: executable.schema,
673
+ args: normalizedArgs,
674
+ requiresApproval: compiledTool ? toolRequiresRuntimeApproval(compiledTool) : false,
675
+ });
676
+ if (gateway.ok) {
677
+ const toolResult = toolRuntimeContext
678
+ ? await executable.invoke(gateway.input, { toolRuntimeContext })
679
+ : await executable.invoke(gateway.input);
680
+ const memoryCandidates = compiledTool ? extractMemoryCandidatesFromToolOutput(compiledTool, toolResult) : [];
681
+ const safeToolResult = await maybePersistLargeToolOutput({
682
+ toolName: executable.name,
683
+ output: toolResult,
684
+ toolRuntimeContext,
685
+ });
686
+ executedToolResults.push({
687
+ toolName: executable.name,
688
+ output: safeToolResult,
689
+ ...(memoryCandidates.length > 0 ? { memoryCandidates } : {}),
690
+ });
691
+ nextMessages.push(new ToolMessage({
692
+ name: executable.name,
693
+ tool_call_id: committedEvidenceTool.id,
694
+ content: stringifyToolOutput(safeToolResult),
695
+ }));
696
+ }
697
+ else {
698
+ executedToolResults.push({
699
+ toolName: executable.name,
700
+ output: gateway.error,
701
+ isError: true,
702
+ });
703
+ nextMessages.push(new ToolMessage({
704
+ name: executable.name,
705
+ tool_call_id: committedEvidenceTool.id,
706
+ content: stringifyToolOutput(gateway.error),
707
+ }));
708
+ }
709
+ }
710
+ }
711
+ if (usedExternalPlanEvidenceToolThisIteration && hasNonTodoToolEvidence(executedToolResults)) {
712
+ return {
713
+ result: buildDeterministicFinalFromToolEvidence(executedToolResults),
714
+ executedToolResults,
715
+ };
716
+ }
478
717
  if (requiresPlanEvidence(binding)
479
718
  && toolCalls.length > 0
480
719
  && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))