npm - @botbotgo/agent-harness - Versions diffs - 0.0.418 → 0.0.419 - Mend

@botbotgo/agent-harness 0.0.418 → 0.0.419

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/package-version.d.ts +1 -1
package/dist/package-version.js +1 -1
package/dist/runtime/adapter/compat/openai-compatible.js +12 -0
package/dist/runtime/adapter/flow/invocation-flow.d.ts +2 -0
package/dist/runtime/adapter/flow/invocation-flow.js +13 -5
package/dist/runtime/adapter/flow/invoke-runtime.d.ts +1 -0
package/dist/runtime/adapter/flow/invoke-runtime.js +1 -0
package/dist/runtime/adapter/flow/stream-runtime.d.ts +4 -0
package/dist/runtime/adapter/flow/stream-runtime.js +177 -14
package/dist/runtime/adapter/invocation-result.js +17 -6
package/dist/runtime/adapter/local-tool-invocation.d.ts +2 -1
package/dist/runtime/adapter/local-tool-invocation.js +241 -21
package/dist/runtime/adapter/model/model-providers.js +261 -58
package/dist/runtime/adapter/model/prompted-json-tool-call-capture.d.ts +9 -0
package/dist/runtime/adapter/model/prompted-json-tool-call-capture.js +40 -0
package/dist/runtime/adapter/runtime-adapter-support.js +58 -12
package/dist/runtime/adapter/runtime-shell.js +3 -2
package/dist/runtime/adapter/stream-event-projection.js +22 -5
package/dist/runtime/adapter/tool/tool-arguments.js +157 -67
package/dist/runtime/adapter/tool/tool-replay.js +0 -4
package/dist/runtime/agent-runtime-adapter.d.ts +3 -0
package/dist/runtime/agent-runtime-adapter.js +217 -73
package/dist/runtime/harness/run/stream-run.js +20 -1
package/dist/workspace/resource-compilers.js +17 -4
package/package.json +1 -1

package/dist/runtime/adapter/local-tool-invocation.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ToolMessage } from "@langchain/core/messages";
+import { AIMessage, ToolMessage } from "@langchain/core/messages";
 import { createModelFacingToolNameLookupCandidates, resolveModelFacingToolName } from "./tool/tool-name-mapping.js";
 import { canReplayToolCallsLocally } from "./tool/tool-replay.js";
 import { extractToolCallsFromResult, normalizeToolArgsForSchema, stringifyToolOutput } from "./tool/tool-arguments.js";
@@ -10,6 +10,59 @@ import { appendToolRecoveryInstruction, extractVisibleOutput, resolveMissingPlan
 import { salvageJsonToolCalls } from "../parsing/output-tool-args.js";
 import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION } from "../prompts/runtime-prompts.js";
 const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already available in this conversation. Answer the user's current request directly from the existing context and tool results. Do not ask the user to repeat inputs that are already present above.";
+const DEFAULT_MAX_TOOL_ITERATIONS = 10_000;
+const MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS = 2;
+const MAX_REPEATED_PLAN_ONLY_AFTER_PLAN = 2;
+function createBootstrapTodoPlan(primaryTools) {
+    const evidenceTools = primaryTools
+        .map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
+        .filter((name) => name.length > 0 && !isPlanToolName(name))
+        .slice(0, 3);
+    if (evidenceTools.length === 0) {
+        return [
+            {
+                content: "Establish the required visible plan for this request",
+                status: "completed",
+            },
+            {
+                content: "Return the final answer from the available conversation context",
+                status: "completed",
+            },
+        ];
+    }
+    const evidenceLabel = evidenceTools.length > 0
+        ? evidenceTools.join(", ")
+        : "the selected non-planning evidence tool";
+    return [
+        {
+            content: `Select and run an appropriate non-planning evidence tool from: ${evidenceLabel}`,
+            status: "in_progress",
+        },
+        {
+            content: "Inspect the returned tool evidence and update the todo board",
+            status: "pending",
+        },
+        {
+            content: "Return the final answer grounded in observed tool output",
+            status: "pending",
+        },
+    ];
+}
+function buildBootstrapPlanToolResult(primaryTools) {
+    return {
+        messages: [new AIMessage({
+                content: "",
+                tool_calls: [{
+                        id: `write-todos-bootstrap-${Math.random().toString(36).slice(2, 10)}`,
+                        name: "write_todos",
+                        args: {
+                            todos: createBootstrapTodoPlan(primaryTools),
+                        },
+                        type: "tool_call",
+                    }],
+            })],
+    };
+}
 function readPlanStateSummary(output) {
     if (typeof output !== "object" || output === null) {
         return null;
@@ -30,7 +83,7 @@ function readPlanStateSummary(output) {
         inProgress: typeof typedCounts.inProgress === "number" ? typedCounts.inProgress : 0,
     };
 }
-function hasIncompleteExecutedPlan(executedToolResults) {
+function hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence = false) {
     for (const latest of [...executedToolResults].reverse()) {
         const summary = readPlanStateSummary(latest.output);
         if (!summary) {
@@ -38,7 +91,7 @@ function hasIncompleteExecutedPlan(executedToolResults) {
         }
         return summary.pending > 0 || summary.inProgress > 0;
     }
-    return false;
+    return externalPlanEvidence;
 }
 function normalizeToolName(value) {
     return typeof value === "string" ? value.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
@@ -51,19 +104,45 @@ function isPlanToolName(toolName) {
     return normalized === "write_todos"
         || normalized === "read_todos"
         || normalized === "tool_call_write_todos"
-        || normalized === "tool_call_read_todos";
+        || normalized === "tool_call_read_todos"
+        || normalized === "call_write_todos"
+        || normalized === "call_read_todos";
 }
 function isFallbackTodoCompletionToolCall(toolCall) {
     return typeof toolCall.id === "string"
         && toolCall.id.startsWith("fallback-complete-")
-        && (toolCall.name === "write_todos" || toolCall.name === "tool_call_write_todos");
+        && isPlanToolName(toolCall.name)
+        && normalizeToolName(toolCall.name).includes("write_todos");
+}
+function resolveMaxToolIterations() {
+    const raw = process.env.AGENT_HARNESS_MAX_TOOL_ITERATIONS;
+    if (!raw) {
+        return DEFAULT_MAX_TOOL_ITERATIONS;
+    }
+    const parsed = Number.parseInt(raw, 10);
+    return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_MAX_TOOL_ITERATIONS;
+}
+function summarizeToolLoopState(input) {
+    const toolCallNames = input.toolCalls?.map((toolCall) => toolCall.name).filter(Boolean) ?? [];
+    const executedNames = input.executedToolResults.map((item) => `${item.toolName}${item.isError ? ":error" : ""}`);
+    const visibleText = input.terminalText?.trim();
+    return [
+        `Tool-calling loop stopped: ${input.reason}.`,
+        `iteration=${input.iteration + 1}/${input.maxToolIterations}.`,
+        toolCallNames.length > 0 ? `toolCalls=${toolCallNames.join(",")}.` : "",
+        executedNames.length > 0 ? `executedTools=${executedNames.join(",")}.` : "",
+        visibleText ? `lastVisibleOutput=${visibleText.slice(0, 500)}` : "",
+    ].filter(Boolean).join(" ");
+}
+function createToolLoopError(input) {
+    return new Error(summarizeToolLoopState(input));
 }
 // Keep deterministic evidence summaries bounded for prompt/log readability while
 // still preserving meaningful tool context; 4000 chars is a conservative cap.
 const TOOL_OUTPUT_TRUNCATION_LIMIT = 4000;
 function buildDeterministicFinalFromToolEvidence(executedToolResults) {
     const evidence = executedToolResults
-        .filter((item) => item.isError !== true && item.toolName !== "write_todos" && item.toolName !== "read_todos")
+        .filter((item) => item.isError !== true && !isPlanToolName(item.toolName))
         .map((item) => {
         const output = stringifyToolOutput(item.output).trim();
         const clipped = output.length > TOOL_OUTPUT_TRUNCATION_LIMIT
@@ -82,8 +161,8 @@ function buildDeterministicFinalFromToolEvidence(executedToolResults) {
     ].join("\n");
     return { output };
 }
-function hasPlanStateEvidence(executedToolResults) {
-    return executedToolResults.some((item) => item.toolName === "write_todos" || item.toolName === "read_todos" || readPlanStateSummary(item.output) !== null);
+function hasPlanStateEvidence(executedToolResults, externalPlanEvidence = false) {
+    return externalPlanEvidence || executedToolResults.some((item) => isPlanToolName(item.toolName) || readPlanStateSummary(item.output) !== null);
 }
 function latestToolErrorRecoveryInstruction(executedToolResults) {
     const latest = executedToolResults.at(-1);
@@ -119,11 +198,59 @@ function extractLatestUserInput(request) {
     }
     return undefined;
 }
-export async function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, }) {
+function debugLocalToolReplay(input) {
+    if (process.env.AGENT_HARNESS_PROMPTED_JSON_DEBUG !== "1") {
+        return;
+    }
+    console.error(JSON.stringify({
+        type: "local-tool-replay",
+        toolCallNames: input.toolCalls.map((toolCall) => toolCall.name),
+        resultMessages: summarizeResultMessages(input.result),
+        executableToolNames: input.executableToolNames,
+        builtinToolNames: input.builtinToolNames,
+        canReplay: input.canReplay,
+    }));
+}
+function summarizeResultMessages(result) {
+    const messages = typeof result === "object" && result !== null && Array.isArray(result.messages)
+        ? result.messages
+        : [];
+    return messages.slice(-8).map((message) => {
+        const typed = typeof message === "object" && message !== null ? message : {};
+        const kwargs = typeof typed.kwargs === "object" && typed.kwargs !== null ? typed.kwargs : {};
+        const toolCalls = Array.isArray(typed.tool_calls)
+            ? typed.tool_calls
+            : Array.isArray(kwargs.tool_calls)
+                ? kwargs.tool_calls
+                : [];
+        return {
+            role: typeof typed.role === "string" ? typed.role : undefined,
+            type: typeof typed._getType === "function"
+                ? String(typed._getType())
+                : undefined,
+            name: typeof typed.name === "string" ? typed.name : undefined,
+            toolCallId: typeof typed.tool_call_id === "string" ? typed.tool_call_id : undefined,
+            toolCallNames: toolCalls.map((toolCall) => typeof toolCall === "object" && toolCall !== null && typeof toolCall.name === "string"
+                ? toolCall.name
+                : ""),
+            contentHead: typeof typed.content === "string"
+                ? typed.content.slice(0, 120)
+                : typeof kwargs.content === "string"
+                    ? kwargs.content.slice(0, 120)
+                    : "",
+        };
+    });
+}
+export async function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, }) {
     const executedToolResults = [];
     let activeRequest = request;
     let currentMessages = Array.isArray(activeRequest.messages) ? [...activeRequest.messages] : [];
-    const maxToolIterations = 8;
+    const maxToolIterations = resolveMaxToolIterations();
+    let lastRecoveryInstruction = "";
+    let lastRecoveryExecutedCount = -1;
+    let repeatedRecoveryWithoutProgress = 0;
+    let repeatedPlanOnlyAfterPlan = 0;
+    let pendingResult;
     let result;
     const toolCatalog = new Map();
     for (const tool of primaryTools) {
@@ -135,13 +262,21 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
     }
     for (let iteration = 0; iteration < maxToolIterations; iteration += 1) {
         const isFinalIteration = iteration + 1 === maxToolIterations;
-        result = await callRuntimeWithToolParseRecovery(activeRequest);
+        result = pendingResult ?? await callRuntimeWithToolParseRecovery(activeRequest);
+        pendingResult = undefined;
         const toolCalls = extractToolCallsFromResult(result);
         if (toolCalls.length === 0) {
             const terminalText = sanitizeVisibleText(extractVisibleOutput(result) || "");
-            const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults);
+            const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence);
             const shouldEnforceIncompletePlan = requiresPlanEvidence(binding) && hasIncompletePlanState;
             const hasExecutionBeyondTodoPlanning = hasNonTodoToolEvidence(executedToolResults);
+            const hasAvailableNonPlanningTool = primaryTools.some((tool) => !isPlanToolName(tool.name));
+            if (requiresPlanEvidence(binding)
+                && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
+                && !hasIncompletePlanState
+                && !hasAvailableNonPlanningTool) {
+                break;
+            }
             const toolErrorRecoveryInstruction = latestToolErrorRecoveryInstruction(executedToolResults)
                 ?? terminalToolErrorRecoveryInstruction(terminalText);
             const leakedJsonToolCallRecoveryInstruction = terminalText && salvageJsonToolCalls(terminalText).length > 0
@@ -149,29 +284,70 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
                 : null;
             const recoveryInstruction = toolErrorRecoveryInstruction ?? leakedJsonToolCallRecoveryInstruction ?? (terminalText
                 ? resolveExecutionWithoutToolEvidenceTextInstruction(activeRequest, terminalText, false, {
-                    hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName === "write_todos"),
+                    hasWriteTodosEvidence: externalPlanEvidence === true || executedToolResults.some((item) => isPlanToolName(item.toolName)),
                     hasToolResultEvidence: hasExecutionBeyondTodoPlanning,
-                    hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults),
+                    hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults, externalPlanEvidence),
                     hasIncompletePlanState: shouldEnforceIncompletePlan,
                     requiresPlan: requiresPlanEvidence(binding),
                 })
                 : shouldEnforceIncompletePlan
                     ? AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION
                     : null);
+            if (requiresPlanEvidence(binding)
+                && !hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
+                && builtinExecutableTools.has("write_todos")) {
+                pendingResult = buildBootstrapPlanToolResult(primaryTools);
+                continue;
+            }
             if (recoveryInstruction) {
-                if (isFinalIteration) {
-                    throw new Error(`Tool-calling loop exceeded the maximum of ${maxToolIterations} iterations`);
+                const executedCount = executedToolResults.length;
+                if (recoveryInstruction === lastRecoveryInstruction && executedCount === lastRecoveryExecutedCount) {
+                    repeatedRecoveryWithoutProgress += 1;
+                }
+                else {
+                    repeatedRecoveryWithoutProgress = 0;
+                    lastRecoveryInstruction = recoveryInstruction;
+                    lastRecoveryExecutedCount = executedCount;
+                }
+                if (repeatedRecoveryWithoutProgress >= MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS) {
+                    if (hasNonTodoToolEvidence(executedToolResults)) {
+                        return {
+                            result: buildDeterministicFinalFromToolEvidence(executedToolResults),
+                            executedToolResults,
+                        };
+                    }
+                    if (!hasAvailableNonPlanningTool && !hasIncompletePlanState && result) {
+                        return { result, executedToolResults };
+                    }
+                    throw createToolLoopError({
+                        reason: "model repeated the same recovery path without producing a tool call or new tool evidence",
+                        iteration,
+                        maxToolIterations,
+                        terminalText,
+                        executedToolResults,
+                    });
+                }
+                if (iteration + 1 === maxToolIterations) {
+                    throw createToolLoopError({
+                        reason: "maximum iterations reached",
+                        iteration,
+                        maxToolIterations,
+                        terminalText,
+                        executedToolResults,
+                    });
                 }
                 activeRequest = appendToolRecoveryInstruction(activeRequest, recoveryInstruction);
                 continue;
             }
+            repeatedRecoveryWithoutProgress = 0;
+            repeatedPlanOnlyAfterPlan = 0;
             break;
         }
         const missingPlanRecoveryInstruction = resolveMissingPlanRecoveryInstruction({
             request: activeRequest,
             requiresPlan: requiresPlanEvidence(binding),
-            hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults),
-            hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName === "write_todos"),
+            hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults, externalPlanEvidence),
+            hasWriteTodosEvidence: externalPlanEvidence === true || executedToolResults.some((item) => isPlanToolName(item.toolName)),
             hasToolResultEvidence: executedToolResults.length > 0 || toolCalls.length > 0,
         });
         if (missingPlanRecoveryInstruction
@@ -182,11 +358,55 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
             activeRequest = appendToolRecoveryInstruction(activeRequest, missingPlanRecoveryInstruction);
             continue;
         }
-        if (!canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools)) {
+        if (requiresPlanEvidence(binding)
+            && hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
+            && !hasNonTodoToolEvidence(executedToolResults)
+            && toolCalls.length > 0
+            && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))) {
+            repeatedPlanOnlyAfterPlan += 1;
+            if (repeatedPlanOnlyAfterPlan >= MAX_REPEATED_PLAN_ONLY_AFTER_PLAN) {
+                throw createToolLoopError({
+                    reason: "model repeatedly selected only planning tools after the todo board already existed and before any non-planning evidence tool returned",
+                    iteration,
+                    maxToolIterations,
+                    toolCalls,
+                    executedToolResults,
+                });
+            }
+            if (iteration + 1 === maxToolIterations) {
+                throw createToolLoopError({
+                    reason: "maximum iterations reached",
+                    iteration,
+                    maxToolIterations,
+                    toolCalls,
+                    executedToolResults,
+                });
+            }
+            activeRequest = appendToolRecoveryInstruction(activeRequest, AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION);
+            pendingResult = undefined;
+            continue;
+        }
+        repeatedRecoveryWithoutProgress = 0;
+        repeatedPlanOnlyAfterPlan = 0;
+        const canReplayToolCalls = canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
+        debugLocalToolReplay({
+            toolCalls,
+            result,
+            executableToolNames: [...executableTools.keys()],
+            builtinToolNames: [...builtinExecutableTools.keys()],
+            canReplay: canReplayToolCalls,
+        });
+        if (!canReplayToolCalls) {
             break;
         }
         if (iteration + 1 === maxToolIterations) {
-            throw new Error(`Tool-calling loop exceeded the maximum of ${maxToolIterations} iterations`);
+            throw createToolLoopError({
+                reason: "maximum iterations reached",
+                iteration,
+                maxToolIterations,
+                toolCalls,
+                executedToolResults,
+            });
         }
         const resultMessages = result.messages;
         const nextMessages = [...currentMessages];
@@ -258,7 +478,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
         if (requiresPlanEvidence(binding)
             && toolCalls.length > 0
             && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))
-            && !hasIncompleteExecutedPlan(executedToolResults)
+            && !hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence)
             && hasNonTodoToolEvidence(executedToolResults)) {
             return {
                 result: buildDeterministicFinalFromToolEvidence(executedToolResults),