@botbotgo/agent-harness 0.0.475 → 0.0.476
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1234
- package/README.zh.md +3 -1191
- package/dist/acp.js +1 -1
- package/dist/api.js +1 -404
- package/dist/benchmark/checkpoint-resume-cost-benchmark.js +1 -55
- package/dist/benchmark/deepagent-local-model-benchmark.js +2 -35
- package/dist/benchmark/upstream-runtime-ab-benchmark.js +1 -179
- package/dist/cli/chat-interactive.js +25 -244
- package/dist/cli/chat-rendering.js +6 -100
- package/dist/cli/chat-stream.js +23 -512
- package/dist/cli/chat-ui.js +21 -199
- package/dist/cli/chat-workspace.js +2 -210
- package/dist/cli/main.js +21 -428
- package/dist/cli/managed-service-commands.js +9 -63
- package/dist/cli/managed-service.js +2 -137
- package/dist/cli/options-init-chat.js +1 -108
- package/dist/cli/options-runtime.js +1 -158
- package/dist/cli/options-serve.js +1 -282
- package/dist/cli/options.js +2 -19
- package/dist/cli/process-guards.js +1 -139
- package/dist/cli/request-tree.js +7 -296
- package/dist/cli/runtime-commands.js +12 -258
- package/dist/cli/runtime-output.js +16 -155
- package/dist/cli/server-commands.js +16 -270
- package/dist/cli/workspace.js +1 -67
- package/dist/cli.js +1 -7
- package/dist/client/acp.js +1 -1
- package/dist/client/in-process.js +1 -67
- package/dist/client/index.js +1 -2
- package/dist/client/types.js +0 -1
- package/dist/client.js +1 -1
- package/dist/contracts/core.js +1 -1
- package/dist/contracts/runtime-evaluation.js +0 -1
- package/dist/contracts/runtime-memory.js +0 -1
- package/dist/contracts/runtime-observability.js +0 -1
- package/dist/contracts/runtime-requests.js +0 -1
- package/dist/contracts/runtime-scheduling.js +0 -1
- package/dist/contracts/runtime.js +1 -27
- package/dist/contracts/types.js +1 -3
- package/dist/contracts/workspace.js +0 -1
- package/dist/flow/build-flow-graph.js +1 -50
- package/dist/flow/export-mermaid.js +2 -464
- package/dist/flow/export-sequence-mermaid.js +2 -325
- package/dist/flow/flow-graph-normalization.js +1 -214
- package/dist/flow/flow-graph-runtime.js +1 -107
- package/dist/flow/flow-graph-upstream.js +1 -494
- package/dist/flow/index.js +1 -3
- package/dist/flow/types.js +0 -1
- package/dist/index.js +1 -5
- package/dist/init-project.js +1 -1
- package/dist/knowledge/config.js +1 -32
- package/dist/knowledge/contracts.js +0 -1
- package/dist/knowledge/index.js +1 -2
- package/dist/knowledge/module.js +12 -909
- package/dist/knowledge/procedural/config.js +1 -125
- package/dist/knowledge/procedural/index.js +1 -2
- package/dist/knowledge/procedural/manager.js +9 -345
- package/dist/mcp.js +1 -2
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -2
- package/dist/persistence/file-store.js +3 -758
- package/dist/persistence/sqlite-request-context-store.js +5 -54
- package/dist/persistence/sqlite-request-queue-store.js +10 -108
- package/dist/persistence/sqlite-runtime.js +1 -86
- package/dist/persistence/sqlite-store.js +62 -810
- package/dist/persistence/types.js +0 -1
- package/dist/projections/presentation.js +37 -206
- package/dist/projections/request-events.js +2 -502
- package/dist/projections/upstream-events.js +1 -201
- package/dist/protocol/a2a/http-discovery.js +1 -178
- package/dist/protocol/a2a/http-rpc.js +6 -622
- package/dist/protocol/a2a/http.js +1 -138
- package/dist/protocol/a2a/task-state.js +3 -317
- package/dist/protocol/acp/client.js +8 -294
- package/dist/protocol/acp/harness-client.js +1 -218
- package/dist/protocol/acp/http.js +5 -130
- package/dist/protocol/acp/server.js +1 -310
- package/dist/protocol/acp/stdio.js +2 -69
- package/dist/protocol/ag-ui/http.js +3 -378
- package/dist/protocol/mcp/server.js +1 -428
- package/dist/resource/backend/workspace-scoped-backend.js +1 -319
- package/dist/resource/isolation.js +1 -237
- package/dist/resource/mcp/tool-support.js +3 -296
- package/dist/resource/mcp-tool-support.js +1 -2
- package/dist/resource/providers/resource-provider.js +1 -215
- package/dist/resource/resource-impl.js +1 -3
- package/dist/resource/resource-types.js +0 -1
- package/dist/resource/resource.js +1 -1
- package/dist/resource/sources.js +1 -247
- package/dist/resource/tools/function-tool-resolver.js +2 -272
- package/dist/runtime/adapter/compat/deepagent-compat.js +1 -29
- package/dist/runtime/adapter/compat/openai-compatible.js +1 -55
- package/dist/runtime/adapter/direct-builtin-utility.js +2 -90
- package/dist/runtime/adapter/flow/execution-context.js +1 -71
- package/dist/runtime/adapter/flow/invocation-flow.js +8 -425
- package/dist/runtime/adapter/flow/invoke-runtime.js +1 -20
- package/dist/runtime/adapter/flow/stream-runtime.js +11 -1395
- package/dist/runtime/adapter/invocation-result.js +2 -473
- package/dist/runtime/adapter/local-tool-invocation.js +6 -638
- package/dist/runtime/adapter/middleware/context-hygiene.js +1 -83
- package/dist/runtime/adapter/middleware-assembly.js +5 -477
- package/dist/runtime/adapter/model/invocation-request.js +3 -183
- package/dist/runtime/adapter/model/message-assembly.js +1 -28
- package/dist/runtime/adapter/model/model-providers.js +23 -1115
- package/dist/runtime/adapter/model/prompted-json-tool-call-capture.js +1 -40
- package/dist/runtime/adapter/model/prompted-json-tool-policy.js +1 -22
- package/dist/runtime/adapter/resilience.js +1 -104
- package/dist/runtime/adapter/runtime-adapter-support.js +3 -141
- package/dist/runtime/adapter/runtime-shell.js +5 -166
- package/dist/runtime/adapter/stream-event-projection.js +2 -622
- package/dist/runtime/adapter/stream-text-consumption.js +1 -18
- package/dist/runtime/adapter/terminal-status.js +2 -67
- package/dist/runtime/adapter/tool/builtin-middleware-tools.js +6 -627
- package/dist/runtime/adapter/tool/declared-middleware.js +1 -154
- package/dist/runtime/adapter/tool/interrupt-policy.js +1 -34
- package/dist/runtime/adapter/tool/provider-tool.js +1 -25
- package/dist/runtime/adapter/tool/resolved-tool.js +1 -225
- package/dist/runtime/adapter/tool/tool-arguments.js +3 -486
- package/dist/runtime/adapter/tool/tool-hitl.js +1 -346
- package/dist/runtime/adapter/tool/tool-name-mapping.js +1 -128
- package/dist/runtime/adapter/tool/tool-output-artifacts.js +2 -88
- package/dist/runtime/adapter/tool/tool-replay.js +1 -37
- package/dist/runtime/adapter/tool-resolution.js +1 -86
- package/dist/runtime/adapter/upstream-configurable-keys.js +1 -2
- package/dist/runtime/agent-runtime-adapter.js +60 -2338
- package/dist/runtime/agent-runtime-assembly.js +7 -249
- package/dist/runtime/env/runtime-env.js +1 -62
- package/dist/runtime/harness/background-runtime.js +1 -8
- package/dist/runtime/harness/bindings.js +1 -58
- package/dist/runtime/harness/events/event-bus.js +1 -16
- package/dist/runtime/harness/events/event-sink.js +1 -61
- package/dist/runtime/harness/events/events.js +1 -80
- package/dist/runtime/harness/events/listener-runtime.js +1 -13
- package/dist/runtime/harness/events/runtime-event-operations.js +1 -9
- package/dist/runtime/harness/events/streaming.js +1 -100
- package/dist/runtime/harness/events/timeline.js +1 -52
- package/dist/runtime/harness/public-shapes.js +1 -186
- package/dist/runtime/harness/run/artifact-paths.js +1 -15
- package/dist/runtime/harness/run/governance.js +1 -295
- package/dist/runtime/harness/run/helpers.js +1 -71
- package/dist/runtime/harness/run/inspection.js +1 -409
- package/dist/runtime/harness/run/operator-overview.js +1 -80
- package/dist/runtime/harness/run/queue-diagnostics.js +1 -15
- package/dist/runtime/harness/run/recovery.js +1 -162
- package/dist/runtime/harness/run/resources.js +1 -60
- package/dist/runtime/harness/run/resume.js +1 -56
- package/dist/runtime/harness/run/routing.js +1 -48
- package/dist/runtime/harness/run/run-lifecycle.js +1 -66
- package/dist/runtime/harness/run/run-operations.js +1 -217
- package/dist/runtime/harness/run/run-queue.js +1 -43
- package/dist/runtime/harness/run/run-slot-acquisition.js +1 -157
- package/dist/runtime/harness/run/session-records.js +1 -97
- package/dist/runtime/harness/run/start-run.js +1 -120
- package/dist/runtime/harness/run/startup-runtime.js +1 -69
- package/dist/runtime/harness/run/stream-run.js +8 -1418
- package/dist/runtime/harness/run/surface-semantics.js +1 -79
- package/dist/runtime/harness/runtime-defaults.js +1 -39
- package/dist/runtime/harness/system/boundary-analysis.js +1 -234
- package/dist/runtime/harness/system/health-monitor.js +1 -258
- package/dist/runtime/harness/system/inventory.js +1 -129
- package/dist/runtime/harness/system/mem0-ingestion-sync.js +5 -345
- package/dist/runtime/harness/system/policy-engine.js +1 -175
- package/dist/runtime/harness/system/runtime-memory-candidates.js +4 -110
- package/dist/runtime/harness/system/runtime-memory-consolidation.js +1 -51
- package/dist/runtime/harness/system/runtime-memory-manager.js +10 -693
- package/dist/runtime/harness/system/runtime-memory-policy.js +1 -155
- package/dist/runtime/harness/system/runtime-memory-records.js +11 -577
- package/dist/runtime/harness/system/runtime-memory-sync.js +5 -206
- package/dist/runtime/harness/system/session-memory-sync.js +3 -113
- package/dist/runtime/harness/system/skill-requirements.js +1 -112
- package/dist/runtime/harness/system/store.js +9 -365
- package/dist/runtime/harness/tool-gateway/index.js +1 -2
- package/dist/runtime/harness/tool-gateway/policy.js +1 -45
- package/dist/runtime/harness/tool-gateway/validation.js +1 -176
- package/dist/runtime/harness/tool-schema.js +1 -3
- package/dist/runtime/harness.js +3 -1490
- package/dist/runtime/index.js +1 -3
- package/dist/runtime/layout/runtime-layout.js +1 -31
- package/dist/runtime/maintenance/checkpoint-maintenance.js +2 -178
- package/dist/runtime/maintenance/file-checkpoint-saver.js +1 -106
- package/dist/runtime/maintenance/runtime-record-maintenance.js +2 -169
- package/dist/runtime/maintenance/sqlite-checkpoint-saver.js +4 -289
- package/dist/runtime/parsing/output-content.js +10 -550
- package/dist/runtime/parsing/output-parsing.js +1 -4
- package/dist/runtime/parsing/output-recovery.js +3 -213
- package/dist/runtime/parsing/output-tool-args.js +7 -663
- package/dist/runtime/parsing/stream-event-parsing.js +3 -362
- package/dist/runtime/prompts/runtime-prompts.js +4 -73
- package/dist/runtime/scheduling/system-schedule-manager.js +11 -532
- package/dist/runtime/skills/skill-metadata.js +1 -197
- package/dist/runtime/startup-tracing.js +2 -37
- package/dist/runtime/support/compiled-binding.js +1 -290
- package/dist/runtime/support/embedding-models.js +1 -118
- package/dist/runtime/support/harness-support.js +5 -137
- package/dist/runtime/support/llamaindex.js +1 -108
- package/dist/runtime/support/runtime-adapter-options.js +1 -29
- package/dist/runtime/support/runtime-factories.js +1 -51
- package/dist/runtime/support/vector-stores.js +9 -270
- package/dist/scaffold/init-project.js +54 -233
- package/dist/tooling/extensions.js +1 -311
- package/dist/tooling/module-loader.js +1 -55
- package/dist/tools.js +1 -176
- package/dist/utils/agent-display.js +1 -18
- package/dist/utils/bundled-text.js +4 -39
- package/dist/utils/compiled-binding.js +1 -33
- package/dist/utils/fs.js +2 -45
- package/dist/utils/id.js +1 -9
- package/dist/utils/message-content.js +1 -30
- package/dist/utils/object.js +1 -6
- package/dist/workspace/agent-binding-compiler.js +3 -613
- package/dist/workspace/compile.js +1 -472
- package/dist/workspace/framework-contract-validation.js +2 -322
- package/dist/workspace/index.js +1 -1
- package/dist/workspace/object-loader-paths.js +1 -71
- package/dist/workspace/object-loader-readers.js +1 -187
- package/dist/workspace/object-loader.js +1 -754
- package/dist/workspace/resource-compilers.js +1 -374
- package/dist/workspace/support/agent-capabilities.js +1 -37
- package/dist/workspace/support/agent-execution-config.js +1 -44
- package/dist/workspace/support/discovery.js +1 -147
- package/dist/workspace/support/source-collectors.js +1 -30
- package/dist/workspace/support/source-protocols.js +2 -192
- package/dist/workspace/support/workspace-ref-utils.js +1 -362
- package/dist/workspace/tool-hydration.js +1 -280
- package/dist/workspace/validate.js +1 -99
- package/dist/workspace/yaml-object-reader.js +1 -285
- package/package.json +7 -3
|
@@ -1,638 +1,6 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
import { toolRequiresRuntimeApproval } from "./tool/tool-hitl.js";
|
|
8
|
-
import { validateToolGatewayInput } from "../harness/tool-gateway/index.js";
|
|
9
|
-
import { appendToolRecoveryInstruction, extractVisibleOutput, resolveMissingPlanRecoveryInstruction, resolveExecutionWithoutToolEvidenceTextInstruction, resolveToolCallRecoveryInstruction, sanitizeVisibleText, STRICT_TOOL_JSON_INSTRUCTION, } from "../parsing/output-parsing.js";
|
|
10
|
-
import { salvageJsonToolCalls } from "../parsing/output-tool-args.js";
|
|
11
|
-
import { REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION } from "../prompts/runtime-prompts.js";
|
|
12
|
-
const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already available in this conversation. Answer the user's current request directly from the existing context and tool results. Do not ask the user to repeat inputs that are already present above.";
|
|
13
|
-
const DEFAULT_MAX_TOOL_ITERATIONS = 10_000;
|
|
14
|
-
const MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS = 2;
|
|
15
|
-
const MAX_REPEATED_PLAN_ONLY_AFTER_PLAN = 2;
|
|
16
|
-
const MAX_REPEATED_INVALID_EXTERNAL_PLAN_EVIDENCE_SELECTION = 2;
|
|
17
|
-
const REQUIRED_PLAN_CONTRACT_MARKER = "This agent has a required visible planning contract.";
|
|
18
|
-
const INITIAL_WRITE_TODOS_MARKER = "Your first action for this request must be write_todos";
|
|
19
|
-
const REQUIRED_SINGLE_EXTERNAL_PLAN_EVIDENCE_TOOL_INSTRUCTION = "The TODO board already exists. The next action must be exactly one non-planning evidence tool call. Do not call write_todos or read_todos, and do not call multiple tools in the same response.";
|
|
20
|
-
function resolveSingleBootstrapEvidenceTool(primaryTools) {
|
|
21
|
-
const evidenceTools = primaryTools
|
|
22
|
-
.map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
|
|
23
|
-
.filter((name) => name.length > 0 && !isPlanToolName(name));
|
|
24
|
-
return evidenceTools.length === 1 ? evidenceTools[0] : undefined;
|
|
25
|
-
}
|
|
26
|
-
function createBootstrapTodoPlan(primaryTools) {
|
|
27
|
-
const evidenceTool = resolveSingleBootstrapEvidenceTool(primaryTools);
|
|
28
|
-
const evidenceToolCount = primaryTools
|
|
29
|
-
.map((tool) => typeof tool.name === "string" ? tool.name.trim() : "")
|
|
30
|
-
.filter((name) => name.length > 0 && !isPlanToolName(name))
|
|
31
|
-
.length;
|
|
32
|
-
if (evidenceToolCount === 0) {
|
|
33
|
-
return [
|
|
34
|
-
{
|
|
35
|
-
content: "Establish the required visible plan for this request",
|
|
36
|
-
status: "completed",
|
|
37
|
-
},
|
|
38
|
-
{
|
|
39
|
-
content: "Return the final answer from the available conversation context",
|
|
40
|
-
status: "completed",
|
|
41
|
-
},
|
|
42
|
-
];
|
|
43
|
-
}
|
|
44
|
-
return [
|
|
45
|
-
{
|
|
46
|
-
content: evidenceTool
|
|
47
|
-
? `Run the only configured non-planning evidence tool: ${evidenceTool}`
|
|
48
|
-
: "Select and run the appropriate non-planning evidence tool from the declared tool surface",
|
|
49
|
-
status: "in_progress",
|
|
50
|
-
},
|
|
51
|
-
{
|
|
52
|
-
content: "Inspect the returned tool evidence and update the todo board",
|
|
53
|
-
status: "pending",
|
|
54
|
-
},
|
|
55
|
-
{
|
|
56
|
-
content: "Return the final answer grounded in observed tool output",
|
|
57
|
-
status: "pending",
|
|
58
|
-
},
|
|
59
|
-
];
|
|
60
|
-
}
|
|
61
|
-
function buildBootstrapPlanToolResult(primaryTools) {
|
|
62
|
-
return {
|
|
63
|
-
messages: [new AIMessage({
|
|
64
|
-
content: "",
|
|
65
|
-
tool_calls: [{
|
|
66
|
-
id: `write-todos-bootstrap-${Math.random().toString(36).slice(2, 10)}`,
|
|
67
|
-
name: "write_todos",
|
|
68
|
-
args: {
|
|
69
|
-
todos: createBootstrapTodoPlan(primaryTools),
|
|
70
|
-
},
|
|
71
|
-
type: "tool_call",
|
|
72
|
-
}],
|
|
73
|
-
})],
|
|
74
|
-
};
|
|
75
|
-
}
|
|
76
|
-
function stripSatisfiedInitialPlanInstruction(messages) {
|
|
77
|
-
return messages.filter((message) => {
|
|
78
|
-
const typed = typeof message === "object" && message !== null ? message : {};
|
|
79
|
-
if (typeof typed.content !== "string") {
|
|
80
|
-
return true;
|
|
81
|
-
}
|
|
82
|
-
return !(typed.content.includes(REQUIRED_PLAN_CONTRACT_MARKER) && typed.content.includes(INITIAL_WRITE_TODOS_MARKER));
|
|
83
|
-
});
|
|
84
|
-
}
|
|
85
|
-
function readPlanStateSummary(output) {
|
|
86
|
-
if (typeof output !== "object" || output === null) {
|
|
87
|
-
return null;
|
|
88
|
-
}
|
|
89
|
-
const typed = output;
|
|
90
|
-
const summaryContainer = typed.summary;
|
|
91
|
-
if (typeof summaryContainer !== "object" || summaryContainer === null) {
|
|
92
|
-
return null;
|
|
93
|
-
}
|
|
94
|
-
const summaryWrapper = summaryContainer;
|
|
95
|
-
const counts = summaryWrapper.summary;
|
|
96
|
-
if (typeof counts !== "object" || counts === null) {
|
|
97
|
-
return null;
|
|
98
|
-
}
|
|
99
|
-
const typedCounts = counts;
|
|
100
|
-
if (typeof typedCounts.total === "number" && typedCounts.total <= 0) {
|
|
101
|
-
return null;
|
|
102
|
-
}
|
|
103
|
-
return {
|
|
104
|
-
...(typeof typedCounts.total === "number" ? { total: typedCounts.total } : {}),
|
|
105
|
-
pending: typeof typedCounts.pending === "number" ? typedCounts.pending : 0,
|
|
106
|
-
inProgress: typeof typedCounts.inProgress === "number" ? typedCounts.inProgress : 0,
|
|
107
|
-
};
|
|
108
|
-
}
|
|
109
|
-
function hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence = false) {
|
|
110
|
-
for (const latest of [...executedToolResults].reverse()) {
|
|
111
|
-
const summary = readPlanStateSummary(latest.output);
|
|
112
|
-
if (!summary) {
|
|
113
|
-
continue;
|
|
114
|
-
}
|
|
115
|
-
return summary.pending > 0 || summary.inProgress > 0;
|
|
116
|
-
}
|
|
117
|
-
return externalPlanEvidence;
|
|
118
|
-
}
|
|
119
|
-
function normalizeToolName(value) {
|
|
120
|
-
return typeof value === "string" ? value.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
|
|
121
|
-
}
|
|
122
|
-
function hasNonTodoToolEvidence(executedToolResults) {
|
|
123
|
-
return executedToolResults.some((item) => !isPlanToolName(item.toolName));
|
|
124
|
-
}
|
|
125
|
-
function isPlanToolName(toolName) {
|
|
126
|
-
const normalized = normalizeToolName(toolName);
|
|
127
|
-
return normalized === "write_todos"
|
|
128
|
-
|| normalized === "read_todos"
|
|
129
|
-
|| normalized === "tool_call_write_todos"
|
|
130
|
-
|| normalized === "tool_call_read_todos"
|
|
131
|
-
|| normalized === "call_write_todos"
|
|
132
|
-
|| normalized === "call_read_todos";
|
|
133
|
-
}
|
|
134
|
-
function isPlanToolCall(toolCall, toolNameMapping, primaryTools) {
|
|
135
|
-
const resolvedToolName = resolveModelFacingToolName(String(toolCall.name ?? ""), toolNameMapping, primaryTools);
|
|
136
|
-
return isPlanToolName(toolCall.name) || isPlanToolName(resolvedToolName);
|
|
137
|
-
}
|
|
138
|
-
function isFallbackTodoCompletionToolCall(toolCall) {
|
|
139
|
-
return typeof toolCall.id === "string"
|
|
140
|
-
&& toolCall.id.startsWith("fallback-complete-")
|
|
141
|
-
&& isPlanToolName(toolCall.name)
|
|
142
|
-
&& normalizeToolName(toolCall.name).includes("write_todos");
|
|
143
|
-
}
|
|
144
|
-
function resolveMaxToolIterations() {
|
|
145
|
-
const raw = process.env.AGENT_HARNESS_MAX_TOOL_ITERATIONS;
|
|
146
|
-
if (!raw) {
|
|
147
|
-
return DEFAULT_MAX_TOOL_ITERATIONS;
|
|
148
|
-
}
|
|
149
|
-
const parsed = Number.parseInt(raw, 10);
|
|
150
|
-
return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_MAX_TOOL_ITERATIONS;
|
|
151
|
-
}
|
|
152
|
-
function summarizeToolLoopState(input) {
|
|
153
|
-
const toolCallNames = input.toolCalls?.map((toolCall) => toolCall.name).filter(Boolean) ?? [];
|
|
154
|
-
const executedNames = input.executedToolResults.map((item) => `${item.toolName}${item.isError ? ":error" : ""}`);
|
|
155
|
-
const visibleText = input.terminalText?.trim();
|
|
156
|
-
return [
|
|
157
|
-
`Tool-calling loop stopped: ${input.reason}.`,
|
|
158
|
-
`iteration=${input.iteration + 1}/${input.maxToolIterations}.`,
|
|
159
|
-
toolCallNames.length > 0 ? `toolCalls=${toolCallNames.join(",")}.` : "",
|
|
160
|
-
executedNames.length > 0 ? `executedTools=${executedNames.join(",")}.` : "",
|
|
161
|
-
visibleText ? `lastVisibleOutput=${visibleText.slice(0, 500)}` : "",
|
|
162
|
-
].filter(Boolean).join(" ");
|
|
163
|
-
}
|
|
164
|
-
function createToolLoopError(input) {
|
|
165
|
-
return new Error(summarizeToolLoopState(input));
|
|
166
|
-
}
|
|
167
|
-
// Keep deterministic evidence summaries bounded for prompt/log readability while
|
|
168
|
-
// still preserving meaningful tool context; 4000 chars is a conservative cap.
|
|
169
|
-
const TOOL_OUTPUT_TRUNCATION_LIMIT = 4000;
|
|
170
|
-
function buildDeterministicFinalFromToolEvidence(executedToolResults) {
|
|
171
|
-
const evidence = executedToolResults
|
|
172
|
-
.filter((item) => item.isError !== true && !isPlanToolName(item.toolName))
|
|
173
|
-
.map((item) => {
|
|
174
|
-
const output = stringifyToolOutput(item.output).trim();
|
|
175
|
-
const clipped = output.length > TOOL_OUTPUT_TRUNCATION_LIMIT
|
|
176
|
-
? `${output.slice(0, TOOL_OUTPUT_TRUNCATION_LIMIT)}\n... [truncated]`
|
|
177
|
-
: output;
|
|
178
|
-
return `## ${item.toolName}\n${clipped}`;
|
|
179
|
-
});
|
|
180
|
-
const output = [
|
|
181
|
-
"Status: completed",
|
|
182
|
-
"Summary:",
|
|
183
|
-
"- Completed the required TODO burn down after collecting tool evidence.",
|
|
184
|
-
"- Returning deterministic evidence summary because the model did not provide a timely final synthesis after tool completion.",
|
|
185
|
-
"",
|
|
186
|
-
"Evidence:",
|
|
187
|
-
evidence.length > 0 ? evidence.join("\n\n") : "(no non-planning tool evidence captured)",
|
|
188
|
-
].join("\n");
|
|
189
|
-
return { output };
|
|
190
|
-
}
|
|
191
|
-
function hasPlanStateEvidence(executedToolResults, externalPlanEvidence = false) {
|
|
192
|
-
return externalPlanEvidence || executedToolResults.some((item) => isPlanToolName(item.toolName) || readPlanStateSummary(item.output) !== null);
|
|
193
|
-
}
|
|
194
|
-
function isNonFunctionObject(value) {
|
|
195
|
-
return Object(value) === value && typeof value !== "function";
|
|
196
|
-
}
|
|
197
|
-
function readSerializedMessageType(value) {
|
|
198
|
-
if (!isNonFunctionObject(value)) {
|
|
199
|
-
return "";
|
|
200
|
-
}
|
|
201
|
-
const id = value.id;
|
|
202
|
-
if (!Array.isArray(id)) {
|
|
203
|
-
return "";
|
|
204
|
-
}
|
|
205
|
-
return id.map((item) => typeof item === "string" ? item : "").filter(Boolean).join(".");
|
|
206
|
-
}
|
|
207
|
-
function readUpstreamToolMessage(value) {
|
|
208
|
-
if (!isNonFunctionObject(value)) {
|
|
209
|
-
return null;
|
|
210
|
-
}
|
|
211
|
-
const typed = value;
|
|
212
|
-
const kwargs = isNonFunctionObject(typed.kwargs) ? typed.kwargs : {};
|
|
213
|
-
const lcKwargs = isNonFunctionObject(typed.lc_kwargs) ? typed.lc_kwargs : {};
|
|
214
|
-
const messageType = readSerializedMessageType(value);
|
|
215
|
-
const isToolMessage = typed.role === "tool"
|
|
216
|
-
|| typed.type === "tool"
|
|
217
|
-
|| messageType.endsWith("ToolMessage")
|
|
218
|
-
|| typeof typed.tool_call_id === "string"
|
|
219
|
-
|| typeof kwargs.tool_call_id === "string"
|
|
220
|
-
|| typeof lcKwargs.tool_call_id === "string";
|
|
221
|
-
if (!isToolMessage) {
|
|
222
|
-
return null;
|
|
223
|
-
}
|
|
224
|
-
const toolNameCandidate = kwargs.name ?? lcKwargs.name ?? typed.name;
|
|
225
|
-
const toolName = typeof toolNameCandidate === "string" ? toolNameCandidate.trim() : "";
|
|
226
|
-
if (!toolName) {
|
|
227
|
-
return null;
|
|
228
|
-
}
|
|
229
|
-
const output = kwargs.content ?? lcKwargs.content ?? typed.content ?? "";
|
|
230
|
-
const status = kwargs.status ?? lcKwargs.status ?? typed.status;
|
|
231
|
-
return {
|
|
232
|
-
toolName,
|
|
233
|
-
output,
|
|
234
|
-
...(status === "error" ? { isError: true } : {}),
|
|
235
|
-
};
|
|
236
|
-
}
|
|
237
|
-
function extractUpstreamToolMessages(value, seen = new Set()) {
|
|
238
|
-
if (typeof value !== "object" || value === null || seen.has(value)) {
|
|
239
|
-
return [];
|
|
240
|
-
}
|
|
241
|
-
seen.add(value);
|
|
242
|
-
if (Array.isArray(value)) {
|
|
243
|
-
return value.flatMap((item) => extractUpstreamToolMessages(item, seen));
|
|
244
|
-
}
|
|
245
|
-
const direct = readUpstreamToolMessage(value);
|
|
246
|
-
if (direct) {
|
|
247
|
-
return [direct];
|
|
248
|
-
}
|
|
249
|
-
const typed = value;
|
|
250
|
-
return [
|
|
251
|
-
...extractUpstreamToolMessages(typed.messages, seen),
|
|
252
|
-
...extractUpstreamToolMessages(typed.output, seen),
|
|
253
|
-
];
|
|
254
|
-
}
|
|
255
|
-
function mergeUpstreamToolMessages(executedToolResults, result) {
|
|
256
|
-
const existing = new Set(executedToolResults.map((item) => `${item.toolName}\u0000${item.isError === true ? "error" : "ok"}\u0000${stringifyToolOutput(item.output)}`));
|
|
257
|
-
for (const upstream of extractUpstreamToolMessages(result)) {
|
|
258
|
-
const key = `${upstream.toolName}\u0000${upstream.isError === true ? "error" : "ok"}\u0000${stringifyToolOutput(upstream.output)}`;
|
|
259
|
-
if (existing.has(key)) {
|
|
260
|
-
continue;
|
|
261
|
-
}
|
|
262
|
-
existing.add(key);
|
|
263
|
-
executedToolResults.push(upstream);
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
function latestToolErrorRecoveryInstruction(executedToolResults) {
|
|
267
|
-
const latest = executedToolResults.at(-1);
|
|
268
|
-
if (!latest || latest.isError !== true) {
|
|
269
|
-
return null;
|
|
270
|
-
}
|
|
271
|
-
if (typeof latest.output === "object" &&
|
|
272
|
-
latest.output !== null &&
|
|
273
|
-
latest.output.code === "INVALID_ARGUMENTS") {
|
|
274
|
-
return null;
|
|
275
|
-
}
|
|
276
|
-
const message = typeof latest.output === "string" ? latest.output : JSON.stringify(latest.output);
|
|
277
|
-
return resolveToolCallRecoveryInstruction(new Error(message)) ?? REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION;
|
|
278
|
-
}
|
|
279
|
-
function terminalToolErrorRecoveryInstruction(terminalText) {
|
|
280
|
-
return resolveToolCallRecoveryInstruction(new Error(terminalText));
|
|
281
|
-
}
|
|
282
|
-
function requiresPlanEvidence(binding) {
|
|
283
|
-
return binding.harnessRuntime.executionContract?.requiresPlan === true;
|
|
284
|
-
}
|
|
285
|
-
function extractLatestUserInput(request) {
|
|
286
|
-
const typedRequest = request;
|
|
287
|
-
const messages = Array.isArray(typedRequest.messages) ? typedRequest.messages : [];
|
|
288
|
-
for (let index = messages.length - 1; index >= 0; index -= 1) {
|
|
289
|
-
const candidate = messages[index];
|
|
290
|
-
const role = typeof candidate?.role === "string" ? candidate.role.trim().toLowerCase() : "";
|
|
291
|
-
const messageType = typeof candidate?._getType === "function" ? String(candidate._getType()).trim().toLowerCase() : "";
|
|
292
|
-
const constructorType = Array.isArray(candidate?.id) ? String(candidate.id.at(-1)).trim().toLowerCase() : "";
|
|
293
|
-
const isUserMessage = role === "user" || role === "human" || messageType === "human" || constructorType === "humanmessage";
|
|
294
|
-
if (!isUserMessage || typeof candidate?.content !== "string") {
|
|
295
|
-
continue;
|
|
296
|
-
}
|
|
297
|
-
const normalized = candidate.content.trim();
|
|
298
|
-
if (normalized.length > 0) {
|
|
299
|
-
return normalized;
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
return undefined;
|
|
303
|
-
}
|
|
304
|
-
function debugLocalToolReplay(input) {
|
|
305
|
-
if (process.env.AGENT_HARNESS_PROMPTED_JSON_DEBUG !== "1") {
|
|
306
|
-
return;
|
|
307
|
-
}
|
|
308
|
-
console.error(JSON.stringify({
|
|
309
|
-
type: "local-tool-replay",
|
|
310
|
-
toolCalls: input.toolCalls.map((toolCall) => ({ name: toolCall.name, args: toolCall.args })),
|
|
311
|
-
resultMessages: summarizeResultMessages(input.result),
|
|
312
|
-
executableToolNames: input.executableToolNames,
|
|
313
|
-
builtinToolNames: input.builtinToolNames,
|
|
314
|
-
canReplay: input.canReplay,
|
|
315
|
-
}));
|
|
316
|
-
}
|
|
317
|
-
function summarizeResultMessages(result) {
|
|
318
|
-
const messages = typeof result === "object" && result !== null && Array.isArray(result.messages)
|
|
319
|
-
? result.messages
|
|
320
|
-
: [];
|
|
321
|
-
return messages.slice(-8).map((message) => {
|
|
322
|
-
const typed = typeof message === "object" && message !== null ? message : {};
|
|
323
|
-
const kwargs = typeof typed.kwargs === "object" && typed.kwargs !== null ? typed.kwargs : {};
|
|
324
|
-
const toolCalls = Array.isArray(typed.tool_calls)
|
|
325
|
-
? typed.tool_calls
|
|
326
|
-
: Array.isArray(kwargs.tool_calls)
|
|
327
|
-
? kwargs.tool_calls
|
|
328
|
-
: [];
|
|
329
|
-
return {
|
|
330
|
-
role: typeof typed.role === "string" ? typed.role : undefined,
|
|
331
|
-
type: typeof typed._getType === "function"
|
|
332
|
-
? String(typed._getType())
|
|
333
|
-
: undefined,
|
|
334
|
-
name: typeof typed.name === "string" ? typed.name : undefined,
|
|
335
|
-
toolCallId: typeof typed.tool_call_id === "string" ? typed.tool_call_id : undefined,
|
|
336
|
-
toolCallNames: toolCalls.map((toolCall) => typeof toolCall === "object" && toolCall !== null && typeof toolCall.name === "string"
|
|
337
|
-
? toolCall.name
|
|
338
|
-
: ""),
|
|
339
|
-
contentHead: typeof typed.content === "string"
|
|
340
|
-
? typed.content.slice(0, 120)
|
|
341
|
-
: typeof kwargs.content === "string"
|
|
342
|
-
? kwargs.content.slice(0, 120)
|
|
343
|
-
: "",
|
|
344
|
-
};
|
|
345
|
-
});
|
|
346
|
-
}
|
|
347
|
-
export async function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, }) {
|
|
348
|
-
const executedToolResults = [];
|
|
349
|
-
let activeRequest = request;
|
|
350
|
-
let currentMessages = Array.isArray(activeRequest.messages) ? [...activeRequest.messages] : [];
|
|
351
|
-
const maxToolIterations = resolveMaxToolIterations();
|
|
352
|
-
let lastRecoveryInstruction = "";
|
|
353
|
-
let lastRecoveryExecutedCount = -1;
|
|
354
|
-
let repeatedRecoveryWithoutProgress = 0;
|
|
355
|
-
let repeatedPlanOnlyAfterPlan = 0;
|
|
356
|
-
let repeatedInvalidExternalPlanEvidenceSelection = 0;
|
|
357
|
-
let pendingResult;
|
|
358
|
-
let result;
|
|
359
|
-
const toolCatalog = new Map();
|
|
360
|
-
for (const tool of primaryTools) {
|
|
361
|
-
toolCatalog.set(tool.name, tool);
|
|
362
|
-
const modelFacingName = toolNameMapping.originalToModelFacing.get(tool.name);
|
|
363
|
-
if (modelFacingName) {
|
|
364
|
-
toolCatalog.set(modelFacingName, tool);
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
for (let iteration = 0; iteration < maxToolIterations; iteration += 1) {
|
|
368
|
-
const isFinalIteration = iteration + 1 === maxToolIterations;
|
|
369
|
-
result = pendingResult
|
|
370
|
-
?? await callRuntimeWithToolParseRecovery(activeRequest);
|
|
371
|
-
pendingResult = undefined;
|
|
372
|
-
mergeUpstreamToolMessages(executedToolResults, result);
|
|
373
|
-
let toolCalls = extractToolCallsFromResult(result);
|
|
374
|
-
if (toolCalls.length === 0) {
|
|
375
|
-
const terminalText = sanitizeVisibleText(extractVisibleOutput(result) || "");
|
|
376
|
-
const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence);
|
|
377
|
-
const shouldEnforceIncompletePlan = requiresPlanEvidence(binding) && hasIncompletePlanState;
|
|
378
|
-
const hasExecutionBeyondTodoPlanning = hasNonTodoToolEvidence(executedToolResults);
|
|
379
|
-
const hasAvailableNonPlanningTool = primaryTools.some((tool) => !isPlanToolName(tool.name));
|
|
380
|
-
if (requiresPlanEvidence(binding)
|
|
381
|
-
&& hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
|
|
382
|
-
&& !hasIncompletePlanState
|
|
383
|
-
&& !hasAvailableNonPlanningTool) {
|
|
384
|
-
break;
|
|
385
|
-
}
|
|
386
|
-
const toolErrorRecoveryInstruction = latestToolErrorRecoveryInstruction(executedToolResults)
|
|
387
|
-
?? terminalToolErrorRecoveryInstruction(terminalText);
|
|
388
|
-
const leakedJsonToolCallRecoveryInstruction = terminalText && salvageJsonToolCalls(terminalText).length > 0
|
|
389
|
-
? STRICT_TOOL_JSON_INSTRUCTION
|
|
390
|
-
: null;
|
|
391
|
-
const recoveryInstruction = toolErrorRecoveryInstruction ?? leakedJsonToolCallRecoveryInstruction ?? (terminalText
|
|
392
|
-
? resolveExecutionWithoutToolEvidenceTextInstruction(activeRequest, terminalText, false, {
|
|
393
|
-
hasWriteTodosEvidence: externalPlanEvidence === true || executedToolResults.some((item) => isPlanToolName(item.toolName)),
|
|
394
|
-
hasToolResultEvidence: hasExecutionBeyondTodoPlanning,
|
|
395
|
-
hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults, externalPlanEvidence),
|
|
396
|
-
hasIncompletePlanState: shouldEnforceIncompletePlan,
|
|
397
|
-
requiresPlan: requiresPlanEvidence(binding),
|
|
398
|
-
})
|
|
399
|
-
: shouldEnforceIncompletePlan
|
|
400
|
-
? REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION
|
|
401
|
-
: null);
|
|
402
|
-
if (requiresPlanEvidence(binding)
|
|
403
|
-
&& !hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
|
|
404
|
-
&& builtinExecutableTools.has("write_todos")) {
|
|
405
|
-
pendingResult = buildBootstrapPlanToolResult(primaryTools);
|
|
406
|
-
continue;
|
|
407
|
-
}
|
|
408
|
-
if (recoveryInstruction) {
|
|
409
|
-
const executedCount = executedToolResults.length;
|
|
410
|
-
if (recoveryInstruction === lastRecoveryInstruction && executedCount === lastRecoveryExecutedCount) {
|
|
411
|
-
repeatedRecoveryWithoutProgress += 1;
|
|
412
|
-
}
|
|
413
|
-
else {
|
|
414
|
-
repeatedRecoveryWithoutProgress = 0;
|
|
415
|
-
lastRecoveryInstruction = recoveryInstruction;
|
|
416
|
-
lastRecoveryExecutedCount = executedCount;
|
|
417
|
-
}
|
|
418
|
-
if (repeatedRecoveryWithoutProgress >= MAX_REPEATED_RECOVERY_WITHOUT_PROGRESS) {
|
|
419
|
-
if (hasNonTodoToolEvidence(executedToolResults)) {
|
|
420
|
-
return {
|
|
421
|
-
result: buildDeterministicFinalFromToolEvidence(executedToolResults),
|
|
422
|
-
executedToolResults,
|
|
423
|
-
};
|
|
424
|
-
}
|
|
425
|
-
if (!hasAvailableNonPlanningTool && !hasIncompletePlanState && result) {
|
|
426
|
-
return { result, executedToolResults };
|
|
427
|
-
}
|
|
428
|
-
throw createToolLoopError({
|
|
429
|
-
reason: "model repeated the same recovery path without producing a tool call or new tool evidence",
|
|
430
|
-
iteration,
|
|
431
|
-
maxToolIterations,
|
|
432
|
-
terminalText,
|
|
433
|
-
executedToolResults,
|
|
434
|
-
});
|
|
435
|
-
}
|
|
436
|
-
if (iteration + 1 === maxToolIterations) {
|
|
437
|
-
throw createToolLoopError({
|
|
438
|
-
reason: "maximum iterations reached",
|
|
439
|
-
iteration,
|
|
440
|
-
maxToolIterations,
|
|
441
|
-
terminalText,
|
|
442
|
-
executedToolResults,
|
|
443
|
-
});
|
|
444
|
-
}
|
|
445
|
-
activeRequest = appendToolRecoveryInstruction(activeRequest, recoveryInstruction);
|
|
446
|
-
continue;
|
|
447
|
-
}
|
|
448
|
-
repeatedRecoveryWithoutProgress = 0;
|
|
449
|
-
repeatedPlanOnlyAfterPlan = 0;
|
|
450
|
-
break;
|
|
451
|
-
}
|
|
452
|
-
const missingPlanRecoveryInstruction = resolveMissingPlanRecoveryInstruction({
|
|
453
|
-
request: activeRequest,
|
|
454
|
-
requiresPlan: requiresPlanEvidence(binding),
|
|
455
|
-
hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults, externalPlanEvidence),
|
|
456
|
-
hasWriteTodosEvidence: externalPlanEvidence === true || executedToolResults.some((item) => isPlanToolName(item.toolName)),
|
|
457
|
-
hasToolResultEvidence: executedToolResults.length > 0 || toolCalls.length > 0,
|
|
458
|
-
});
|
|
459
|
-
if (missingPlanRecoveryInstruction
|
|
460
|
-
&& toolCalls.some((toolCall) => {
|
|
461
|
-
const resolvedToolName = resolveModelFacingToolName(toolCall.name, toolNameMapping, primaryTools);
|
|
462
|
-
return resolvedToolName !== "write_todos" && resolvedToolName !== "read_todos" && toolCall.name !== "write_todos" && toolCall.name !== "read_todos";
|
|
463
|
-
})) {
|
|
464
|
-
activeRequest = appendToolRecoveryInstruction(activeRequest, missingPlanRecoveryInstruction);
|
|
465
|
-
continue;
|
|
466
|
-
}
|
|
467
|
-
if (requiresPlanEvidence(binding)
|
|
468
|
-
&& externalPlanEvidence === true
|
|
469
|
-
&& hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
|
|
470
|
-
&& !hasNonTodoToolEvidence(executedToolResults)
|
|
471
|
-
&& toolCalls.length > 0
|
|
472
|
-
&& (toolCalls.length !== 1 || isPlanToolCall(toolCalls[0], toolNameMapping, primaryTools))) {
|
|
473
|
-
repeatedInvalidExternalPlanEvidenceSelection += 1;
|
|
474
|
-
if (repeatedInvalidExternalPlanEvidenceSelection >= MAX_REPEATED_INVALID_EXTERNAL_PLAN_EVIDENCE_SELECTION) {
|
|
475
|
-
throw createToolLoopError({
|
|
476
|
-
reason: "model did not select exactly one non-planning evidence tool during delegated plan evidence recovery",
|
|
477
|
-
iteration,
|
|
478
|
-
maxToolIterations,
|
|
479
|
-
toolCalls,
|
|
480
|
-
executedToolResults,
|
|
481
|
-
});
|
|
482
|
-
}
|
|
483
|
-
activeRequest = appendToolRecoveryInstruction(activeRequest, REQUIRED_SINGLE_EXTERNAL_PLAN_EVIDENCE_TOOL_INSTRUCTION);
|
|
484
|
-
pendingResult = undefined;
|
|
485
|
-
continue;
|
|
486
|
-
}
|
|
487
|
-
if (requiresPlanEvidence(binding)
|
|
488
|
-
&& hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
|
|
489
|
-
&& !hasNonTodoToolEvidence(executedToolResults)
|
|
490
|
-
&& toolCalls.length > 0
|
|
491
|
-
&& toolCalls.every((toolCall) => isPlanToolName(toolCall.name))) {
|
|
492
|
-
repeatedPlanOnlyAfterPlan += 1;
|
|
493
|
-
if (repeatedPlanOnlyAfterPlan >= MAX_REPEATED_PLAN_ONLY_AFTER_PLAN) {
|
|
494
|
-
if (hasNonTodoToolEvidence(executedToolResults)) {
|
|
495
|
-
return {
|
|
496
|
-
result: buildDeterministicFinalFromToolEvidence(executedToolResults),
|
|
497
|
-
executedToolResults,
|
|
498
|
-
};
|
|
499
|
-
}
|
|
500
|
-
throw createToolLoopError({
|
|
501
|
-
reason: "model repeatedly selected only planning tools after the todo board already existed and before any non-planning evidence tool returned",
|
|
502
|
-
iteration,
|
|
503
|
-
maxToolIterations,
|
|
504
|
-
toolCalls,
|
|
505
|
-
executedToolResults,
|
|
506
|
-
});
|
|
507
|
-
}
|
|
508
|
-
if (iteration + 1 === maxToolIterations) {
|
|
509
|
-
if (hasNonTodoToolEvidence(executedToolResults)) {
|
|
510
|
-
return {
|
|
511
|
-
result: buildDeterministicFinalFromToolEvidence(executedToolResults),
|
|
512
|
-
executedToolResults,
|
|
513
|
-
};
|
|
514
|
-
}
|
|
515
|
-
throw createToolLoopError({
|
|
516
|
-
reason: "maximum iterations reached",
|
|
517
|
-
iteration,
|
|
518
|
-
maxToolIterations,
|
|
519
|
-
toolCalls,
|
|
520
|
-
executedToolResults,
|
|
521
|
-
});
|
|
522
|
-
}
|
|
523
|
-
activeRequest = appendToolRecoveryInstruction(activeRequest, REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION);
|
|
524
|
-
pendingResult = undefined;
|
|
525
|
-
continue;
|
|
526
|
-
}
|
|
527
|
-
repeatedRecoveryWithoutProgress = 0;
|
|
528
|
-
repeatedPlanOnlyAfterPlan = 0;
|
|
529
|
-
repeatedInvalidExternalPlanEvidenceSelection = 0;
|
|
530
|
-
const canReplayToolCalls = canReplayToolCallsLocally(binding, toolCalls, primaryTools, toolNameMapping, executableTools, builtinExecutableTools);
|
|
531
|
-
debugLocalToolReplay({
|
|
532
|
-
toolCalls,
|
|
533
|
-
result,
|
|
534
|
-
executableToolNames: [...executableTools.keys()],
|
|
535
|
-
builtinToolNames: [...builtinExecutableTools.keys()],
|
|
536
|
-
canReplay: canReplayToolCalls,
|
|
537
|
-
});
|
|
538
|
-
if (!canReplayToolCalls) {
|
|
539
|
-
break;
|
|
540
|
-
}
|
|
541
|
-
if (iteration + 1 === maxToolIterations) {
|
|
542
|
-
throw createToolLoopError({
|
|
543
|
-
reason: "maximum iterations reached",
|
|
544
|
-
iteration,
|
|
545
|
-
maxToolIterations,
|
|
546
|
-
toolCalls,
|
|
547
|
-
executedToolResults,
|
|
548
|
-
});
|
|
549
|
-
}
|
|
550
|
-
const resultMessages = result.messages;
|
|
551
|
-
const nextMessages = [...currentMessages];
|
|
552
|
-
if (Array.isArray(resultMessages) && resultMessages.length > 0) {
|
|
553
|
-
nextMessages.push(...resultMessages);
|
|
554
|
-
}
|
|
555
|
-
const latestUserInput = extractLatestUserInput(activeRequest);
|
|
556
|
-
nextMessages.push({
|
|
557
|
-
role: "system",
|
|
558
|
-
content: TOOL_FOLLOW_UP_INSTRUCTION,
|
|
559
|
-
});
|
|
560
|
-
for (let toolIndex = 0; toolIndex < toolCalls.length; toolIndex += 1) {
|
|
561
|
-
const toolCall = toolCalls[toolIndex];
|
|
562
|
-
const resolvedToolName = resolveModelFacingToolName(toolCall.name, toolNameMapping, primaryTools);
|
|
563
|
-
const executable = executableTools.get(toolCall.name) ?? executableTools.get(resolvedToolName);
|
|
564
|
-
const builtinExecutable = builtinExecutableTools.get(toolCall.name) ??
|
|
565
|
-
builtinExecutableTools.get(resolvedToolName) ??
|
|
566
|
-
createModelFacingToolNameLookupCandidates(toolCall.name)
|
|
567
|
-
.map((candidate) => builtinExecutableTools.get(candidate))
|
|
568
|
-
.find((candidate) => candidate !== undefined);
|
|
569
|
-
const activeExecutable = executable ?? builtinExecutable;
|
|
570
|
-
if (!activeExecutable) {
|
|
571
|
-
throw new Error(`Tool ${toolCall.name} is not configured for this agent.`);
|
|
572
|
-
}
|
|
573
|
-
const compiledTool = toolCatalog.get(toolCall.name) ?? toolCatalog.get(resolvedToolName);
|
|
574
|
-
const normalizedArgs = normalizeToolArgsForSchema(toolCall.args, activeExecutable.schema, toolCall.rawArgsInput, {
|
|
575
|
-
latestUserInput,
|
|
576
|
-
});
|
|
577
|
-
const gateway = validateToolGatewayInput({
|
|
578
|
-
toolName: activeExecutable.name,
|
|
579
|
-
schema: activeExecutable.schema,
|
|
580
|
-
args: normalizedArgs,
|
|
581
|
-
requiresApproval: compiledTool ? toolRequiresRuntimeApproval(compiledTool) : false,
|
|
582
|
-
});
|
|
583
|
-
if (!gateway.ok) {
|
|
584
|
-
executedToolResults.push({
|
|
585
|
-
toolName: activeExecutable.name,
|
|
586
|
-
output: gateway.error,
|
|
587
|
-
isError: true,
|
|
588
|
-
});
|
|
589
|
-
nextMessages.push(new ToolMessage({
|
|
590
|
-
name: activeExecutable.name,
|
|
591
|
-
tool_call_id: toolCall.id ?? `tool-${iteration + 1}-${toolIndex + 1}`,
|
|
592
|
-
content: stringifyToolOutput(gateway.error),
|
|
593
|
-
}));
|
|
594
|
-
continue;
|
|
595
|
-
}
|
|
596
|
-
const toolResult = toolRuntimeContext
|
|
597
|
-
? await activeExecutable.invoke(gateway.input, { toolRuntimeContext })
|
|
598
|
-
: await activeExecutable.invoke(gateway.input);
|
|
599
|
-
const memoryCandidates = compiledTool ? extractMemoryCandidatesFromToolOutput(compiledTool, toolResult) : [];
|
|
600
|
-
const safeToolResult = await maybePersistLargeToolOutput({
|
|
601
|
-
toolName: activeExecutable.name,
|
|
602
|
-
output: toolResult,
|
|
603
|
-
toolRuntimeContext,
|
|
604
|
-
});
|
|
605
|
-
executedToolResults.push({
|
|
606
|
-
toolName: activeExecutable.name,
|
|
607
|
-
output: safeToolResult,
|
|
608
|
-
...(memoryCandidates.length > 0 ? { memoryCandidates } : {}),
|
|
609
|
-
});
|
|
610
|
-
nextMessages.push(new ToolMessage({
|
|
611
|
-
name: activeExecutable.name,
|
|
612
|
-
tool_call_id: toolCall.id ?? `tool-${iteration + 1}-${toolIndex + 1}`,
|
|
613
|
-
content: stringifyToolOutput(safeToolResult),
|
|
614
|
-
}));
|
|
615
|
-
}
|
|
616
|
-
if (requiresPlanEvidence(binding)
|
|
617
|
-
&& toolCalls.length > 0
|
|
618
|
-
&& toolCalls.every((toolCall) => isPlanToolName(toolCall.name))
|
|
619
|
-
&& !hasIncompleteExecutedPlan(executedToolResults, externalPlanEvidence)
|
|
620
|
-
&& hasNonTodoToolEvidence(executedToolResults)) {
|
|
621
|
-
return {
|
|
622
|
-
result: buildDeterministicFinalFromToolEvidence(executedToolResults),
|
|
623
|
-
executedToolResults,
|
|
624
|
-
};
|
|
625
|
-
}
|
|
626
|
-
currentMessages = hasPlanStateEvidence(executedToolResults, externalPlanEvidence)
|
|
627
|
-
? stripSatisfiedInitialPlanInstruction(nextMessages)
|
|
628
|
-
: nextMessages;
|
|
629
|
-
activeRequest = {
|
|
630
|
-
...activeRequest,
|
|
631
|
-
messages: currentMessages,
|
|
632
|
-
};
|
|
633
|
-
}
|
|
634
|
-
if (!result) {
|
|
635
|
-
throw new Error("Agent invocation returned no result");
|
|
636
|
-
}
|
|
637
|
-
return { result, executedToolResults };
|
|
638
|
-
}
|
|
1
|
+
import{AIMessage as le,ToolMessage as Z}from"@langchain/core/messages";import{createModelFacingToolNameLookupCandidates as ie,resolveModelFacingToolName as B}from"./tool/tool-name-mapping.js";import{canReplayToolCallsLocally as ce}from"./tool/tool-replay.js";import{extractToolCallsFromResult as ue,normalizeToolArgsForSchema as me,stringifyToolOutput as b}from"./tool/tool-arguments.js";import{extractMemoryCandidatesFromToolOutput as de}from"../harness/system/runtime-memory-candidates.js";import{maybePersistLargeToolOutput as fe}from"./tool/tool-output-artifacts.js";import{toolRequiresRuntimeApproval as pe}from"./tool/tool-hitl.js";import{validateToolGatewayInput as ge}from"../harness/tool-gateway/index.js";import{appendToolRecoveryInstruction as k,extractVisibleOutput as ye,resolveMissingPlanRecoveryInstruction as Te,resolveExecutionWithoutToolEvidenceTextInstruction as _e,resolveToolCallRecoveryInstruction as ee,sanitizeVisibleText as he,STRICT_TOOL_JSON_INSTRUCTION as Ne}from"../parsing/output-parsing.js";import{salvageJsonToolCalls as Ee}from"../parsing/output-tool-args.js";import{REQUIRED_EXECUTION_CONTINUATION_INSTRUCTION as V}from"../prompts/runtime-prompts.js";const Ie="One or more tool results are already available in this conversation. Answer the user's current request directly from the existing context and tool results. Do not ask the user to repeat inputs that are already present above.",te=1e4,Re=2,Ae=2,we=2,ve="This agent has a required visible planning contract.",Oe="Your first action for this request must be write_todos",Se="The TODO board already exists. The next action must be exactly one non-planning evidence tool call. Do not call write_todos or read_todos, and do not call multiple tools in the same response.";function Ce(e){const t=e.map(n=>typeof n.name=="string"?n.name.trim():"").filter(n=>n.length>0&&!p(n));return t.length===1?t[0]:void 0}function be(e){const t=Ce(e);return e.map(o=>typeof o.name=="string"?o.name.trim():"").filter(o=>o.length>0&&!p(o)).length===0?[{content:"Establish the required visible plan for this request",status:"completed"},{content:"Return the final answer from the available conversation context",status:"completed"}]:[{content:t?`Run the only configured non-planning evidence tool: ${t}`:"Select and run the appropriate non-planning evidence tool from the declared tool surface",status:"in_progress"},{content:"Inspect the returned tool evidence and update the todo board",status:"pending"},{content:"Return the final answer grounded in observed tool output",status:"pending"}]}function Le(e){return{messages:[new le({content:"",tool_calls:[{id:`write-todos-bootstrap-${Math.random().toString(36).slice(2,10)}`,name:"write_todos",args:{todos:be(e)},type:"tool_call"}]})]}}function Pe(e){return e.filter(t=>{const n=typeof t=="object"&&t!==null?t:{};return typeof n.content!="string"?!0:!(n.content.includes(ve)&&n.content.includes(Oe))})}function oe(e){if(typeof e!="object"||e===null)return null;const n=e.summary;if(typeof n!="object"||n===null)return null;const s=n.summary;if(typeof s!="object"||s===null)return null;const a=s;return typeof a.total=="number"&&a.total<=0?null:{...typeof a.total=="number"?{total:a.total}:{},pending:typeof a.pending=="number"?a.pending:0,inProgress:typeof a.inProgress=="number"?a.inProgress:0}}function ne(e,t=!1){for(const n of[...e].reverse()){const o=oe(n.output);if(o)return o.pending>0||o.inProgress>0}return t}function re(e){return typeof e=="string"?e.trim().toLowerCase().replace(/[\s-]+/gu,"_"):""}function E(e){return e.some(t=>!p(t.toolName))}function p(e){const t=re(e);return t==="write_todos"||t==="read_todos"||t==="tool_call_write_todos"||t==="tool_call_read_todos"||t==="call_write_todos"||t==="call_read_todos"}function Me(e,t,n){const o=B(String(e.name??""),t,n);return p(e.name)||p(o)}function tt(e){return typeof e.id=="string"&&e.id.startsWith("fallback-complete-")&&p(e.name)&&re(e.name).includes("write_todos")}function ke(){const e=process.env.AGENT_HARNESS_MAX_TOOL_ITERATIONS;if(!e)return te;const t=Number.parseInt(e,10);return Number.isFinite(t)&&t>0?t:te}function xe(e){const t=e.toolCalls?.map(s=>s.name).filter(Boolean)??[],n=e.executedToolResults.map(s=>`${s.toolName}${s.isError?":error":""}`),o=e.terminalText?.trim();return[`Tool-calling loop stopped: ${e.reason}.`,`iteration=${e.iteration+1}/${e.maxToolIterations}.`,t.length>0?`toolCalls=${t.join(",")}.`:"",n.length>0?`executedTools=${n.join(",")}.`:"",o?`lastVisibleOutput=${o.slice(0,500)}`:""].filter(Boolean).join(" ")}function O(e){return new Error(xe(e))}const se=4e3;function x(e){const t=e.filter(o=>o.isError!==!0&&!p(o.toolName)).map(o=>{const s=b(o.output).trim(),a=s.length>se?`${s.slice(0,se)}
|
|
2
|
+
... [truncated]`:s;return`## ${o.toolName}
|
|
3
|
+
${a}`});return{output:["Status: completed","Summary:","- Completed the required TODO burn down after collecting tool evidence.","- Returning deterministic evidence summary because the model did not provide a timely final synthesis after tool completion.","","Evidence:",t.length>0?t.join(`
|
|
4
|
+
|
|
5
|
+
`):"(no non-planning tool evidence captured)"].join(`
|
|
6
|
+
`)}}function I(e,t=!1){return t||e.some(n=>p(n.toolName)||oe(n.output)!==null)}function U(e){return Object(e)===e&&typeof e!="function"}function Ue(e){if(!U(e))return"";const t=e.id;return Array.isArray(t)?t.map(n=>typeof n=="string"?n:"").filter(Boolean).join("."):""}function $e(e){if(!U(e))return null;const t=e,n=U(t.kwargs)?t.kwargs:{},o=U(t.lc_kwargs)?t.lc_kwargs:{},s=Ue(e);if(!(t.role==="tool"||t.type==="tool"||s.endsWith("ToolMessage")||typeof t.tool_call_id=="string"||typeof n.tool_call_id=="string"||typeof o.tool_call_id=="string"))return null;const y=n.name??o.name??t.name,h=typeof y=="string"?y.trim():"";if(!h)return null;const d=n.content??o.content??t.content??"",r=n.status??o.status??t.status;return{toolName:h,output:d,...r==="error"?{isError:!0}:{}}}function $(e,t=new Set){if(typeof e!="object"||e===null||t.has(e))return[];if(t.add(e),Array.isArray(e))return e.flatMap(s=>$(s,t));const n=$e(e);if(n)return[n];const o=e;return[...$(o.messages,t),...$(o.output,t)]}function De(e,t){const n=new Set(e.map(o=>`${o.toolName}\0${o.isError===!0?"error":"ok"}\0${b(o.output)}`));for(const o of $(t)){const s=`${o.toolName}\0${o.isError===!0?"error":"ok"}\0${b(o.output)}`;n.has(s)||(n.add(s),e.push(o))}}function je(e){const t=e.at(-1);if(!t||t.isError!==!0||typeof t.output=="object"&&t.output!==null&&t.output.code==="INVALID_ARGUMENTS")return null;const n=typeof t.output=="string"?t.output:JSON.stringify(t.output);return ee(new Error(n))??V}function Fe(e){return ee(new Error(e))}function N(e){return e.harnessRuntime.executionContract?.requiresPlan===!0}function qe(e){const t=e,n=Array.isArray(t.messages)?t.messages:[];for(let o=n.length-1;o>=0;o-=1){const s=n[o],a=typeof s?.role=="string"?s.role.trim().toLowerCase():"",y=typeof s?._getType=="function"?String(s._getType()).trim().toLowerCase():"",h=Array.isArray(s?.id)?String(s.id.at(-1)).trim().toLowerCase():"";if(!(a==="user"||a==="human"||y==="human"||h==="humanmessage")||typeof s?.content!="string")continue;const r=s.content.trim();if(r.length>0)return r}}function We(e){process.env.AGENT_HARNESS_PROMPTED_JSON_DEBUG==="1"&&console.error(JSON.stringify({type:"local-tool-replay",toolCalls:e.toolCalls.map(t=>({name:t.name,args:t.args})),resultMessages:ze(e.result),executableToolNames:e.executableToolNames,builtinToolNames:e.builtinToolNames,canReplay:e.canReplay}))}function ze(e){return(typeof e=="object"&&e!==null&&Array.isArray(e.messages)?e.messages:[]).slice(-8).map(n=>{const o=typeof n=="object"&&n!==null?n:{},s=typeof o.kwargs=="object"&&o.kwargs!==null?o.kwargs:{},a=Array.isArray(o.tool_calls)?o.tool_calls:Array.isArray(s.tool_calls)?s.tool_calls:[];return{role:typeof o.role=="string"?o.role:void 0,type:typeof o._getType=="function"?String(o._getType()):void 0,name:typeof o.name=="string"?o.name:void 0,toolCallId:typeof o.tool_call_id=="string"?o.tool_call_id:void 0,toolCallNames:a.map(y=>typeof y=="object"&&y!==null&&typeof y.name=="string"?y.name:""),contentHead:typeof o.content=="string"?o.content.slice(0,120):typeof s.content=="string"?s.content.slice(0,120):""}})}async function ot({binding:e,request:t,primaryTools:n,toolNameMapping:o,executableTools:s,builtinExecutableTools:a,callRuntimeWithToolParseRecovery:y,toolRuntimeContext:h,externalPlanEvidence:d}){const r=[];let m=t,D=Array.isArray(m.messages)?[...m.messages]:[];const T=ke();let X="",G=-1,S=0,L=0,j=0,C,_;const P=new Map;for(const i of n){P.set(i.name,i);const F=o.originalToModelFacing.get(i.name);F&&P.set(F,i)}for(let i=0;i<T;i+=1){const F=i+1===T;_=C??await y(m),C=void 0,De(r,_);let c=ue(_);if(c.length===0){const l=he(ye(_)||""),u=ne(r,d),A=N(e)&&u,W=E(r),M=n.some(g=>!p(g.name));if(N(e)&&I(r,d)&&!u&&!M)break;const f=je(r)??Fe(l),w=l&&Ee(l).length>0?Ne:null,v=f??w??(l?_e(m,l,!1,{hasWriteTodosEvidence:d===!0||r.some(g=>p(g.toolName)),hasToolResultEvidence:W,hasPlanStateEvidence:I(r,d),hasIncompletePlanState:A,requiresPlan:N(e)}):A?V:null);if(N(e)&&!I(r,d)&&a.has("write_todos")){C=Le(n);continue}if(v){const g=r.length;if(v===X&&g===G?S+=1:(S=0,X=v,G=g),S>=Re){if(E(r))return{result:x(r),executedToolResults:r};if(!M&&!u&&_)return{result:_,executedToolResults:r};throw O({reason:"model repeated the same recovery path without producing a tool call or new tool evidence",iteration:i,maxToolIterations:T,terminalText:l,executedToolResults:r})}if(i+1===T)throw O({reason:"maximum iterations reached",iteration:i,maxToolIterations:T,terminalText:l,executedToolResults:r});m=k(m,v);continue}S=0,L=0;break}const J=Te({request:m,requiresPlan:N(e),hasPlanStateEvidence:I(r,d),hasWriteTodosEvidence:d===!0||r.some(l=>p(l.toolName)),hasToolResultEvidence:r.length>0||c.length>0});if(J&&c.some(l=>{const u=B(l.name,o,n);return u!=="write_todos"&&u!=="read_todos"&&l.name!=="write_todos"&&l.name!=="read_todos"})){m=k(m,J);continue}if(N(e)&&d===!0&&I(r,d)&&!E(r)&&c.length>0&&(c.length!==1||Me(c[0],o,n))){if(j+=1,j>=we)throw O({reason:"model did not select exactly one non-planning evidence tool during delegated plan evidence recovery",iteration:i,maxToolIterations:T,toolCalls:c,executedToolResults:r});m=k(m,Se),C=void 0;continue}if(N(e)&&I(r,d)&&!E(r)&&c.length>0&&c.every(l=>p(l.name))){if(L+=1,L>=Ae){if(E(r))return{result:x(r),executedToolResults:r};throw O({reason:"model repeatedly selected only planning tools after the todo board already existed and before any non-planning evidence tool returned",iteration:i,maxToolIterations:T,toolCalls:c,executedToolResults:r})}if(i+1===T){if(E(r))return{result:x(r),executedToolResults:r};throw O({reason:"maximum iterations reached",iteration:i,maxToolIterations:T,toolCalls:c,executedToolResults:r})}m=k(m,V),C=void 0;continue}S=0,L=0,j=0;const H=ce(e,c,n,o,s,a);if(We({toolCalls:c,result:_,executableToolNames:[...s.keys()],builtinToolNames:[...a.keys()],canReplay:H}),!H)break;if(i+1===T)throw O({reason:"maximum iterations reached",iteration:i,maxToolIterations:T,toolCalls:c,executedToolResults:r});const q=_.messages,R=[...D];Array.isArray(q)&&q.length>0&&R.push(...q);const ae=qe(m);R.push({role:"system",content:Ie});for(let l=0;l<c.length;l+=1){const u=c[l],A=B(u.name,o,n),W=s.get(u.name)??s.get(A),M=a.get(u.name)??a.get(A)??ie(u.name).map(z=>a.get(z)).find(z=>z!==void 0),f=W??M;if(!f)throw new Error(`Tool ${u.name} is not configured for this agent.`);const w=P.get(u.name)??P.get(A),v=me(u.args,f.schema,u.rawArgsInput,{latestUserInput:ae}),g=ge({toolName:f.name,schema:f.schema,args:v,requiresApproval:w?pe(w):!1});if(!g.ok){r.push({toolName:f.name,output:g.error,isError:!0}),R.push(new Z({name:f.name,tool_call_id:u.id??`tool-${i+1}-${l+1}`,content:b(g.error)}));continue}const K=h?await f.invoke(g.input,{toolRuntimeContext:h}):await f.invoke(g.input),Q=w?de(w,K):[],Y=await fe({toolName:f.name,output:K,toolRuntimeContext:h});r.push({toolName:f.name,output:Y,...Q.length>0?{memoryCandidates:Q}:{}}),R.push(new Z({name:f.name,tool_call_id:u.id??`tool-${i+1}-${l+1}`,content:b(Y)}))}if(N(e)&&c.length>0&&c.every(l=>p(l.name))&&!ne(r,d)&&E(r))return{result:x(r),executedToolResults:r};D=I(r,d)?Pe(R):R,m={...m,messages:D}}if(!_)throw new Error("Agent invocation returned no result");return{result:_,executedToolResults:r}}export{ot as runLocalToolInvocationLoop};
|