@botbotgo/agent-harness 0.0.400 → 0.0.402
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/README.zh.md +17 -0
- package/dist/contracts/runtime-observability.d.ts +25 -0
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/runtime/adapter/flow/stream-runtime.js +16 -1
- package/dist/runtime/adapter/local-tool-invocation.js +242 -14
- package/dist/runtime/adapter/middleware-assembly.js +1 -1
- package/dist/runtime/adapter/model/invocation-request.js +1 -1
- package/dist/runtime/adapter/model/model-providers.js +178 -119
- package/dist/runtime/adapter/stream-event-projection.js +70 -5
- package/dist/runtime/adapter/tool/tool-hitl.js +49 -6
- package/dist/runtime/agent-runtime-adapter.d.ts +2 -0
- package/dist/runtime/agent-runtime-adapter.js +329 -36
- package/dist/runtime/harness/bindings.js +2 -0
- package/dist/runtime/harness/tool-gateway/index.d.ts +2 -0
- package/dist/runtime/harness/tool-gateway/index.js +2 -0
- package/dist/runtime/harness/tool-gateway/policy.d.ts +2 -0
- package/dist/runtime/harness/tool-gateway/policy.js +45 -0
- package/dist/runtime/harness/tool-gateway/validation.d.ts +33 -0
- package/dist/runtime/harness/tool-gateway/validation.js +176 -0
- package/dist/runtime/parsing/output-recovery.js +1 -4
- package/package.json +15 -15
package/README.md
CHANGED
|
@@ -390,6 +390,22 @@ botbotgo -w /path/to/another-workspace "Summarize this project."
|
|
|
390
390
|
|
|
391
391
|
Development tip: repository-owned Ollama workspaces now default to `http://127.0.0.1:11434` for release-friendly local behavior. During development, point them at a shared remote Ollama by exporting `AGENT_HARNESS_OLLAMA_BASE_URL=https://ollama-rtx-4070.easynet.world` or `AGENT_HARNESS_OPENAI_COMPATIBLE_BASE_URL=https://ollama-rtx-4070.easynet.world/v1` before starting the runtime.
|
|
392
392
|
|
|
393
|
+
For CPU-only hosts with large RAM, run `llama.cpp` as an OpenAI-compatible server and use the existing `openai-compatible` provider:
|
|
394
|
+
|
|
395
|
+
```yaml
|
|
396
|
+
apiVersion: agent-harness/v1alpha1
|
|
397
|
+
kind: Models
|
|
398
|
+
spec:
|
|
399
|
+
- name: default
|
|
400
|
+
provider: openai-compatible
|
|
401
|
+
model: local-model
|
|
402
|
+
baseUrl: ${env:AGENT_HARNESS_LLAMA_CPP_BASE_URL:-http://127.0.0.1:8080/v1}
|
|
403
|
+
apiKey: dummy
|
|
404
|
+
toolCallingMode: prompted-json
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
Start the model separately with `llama-server -m /path/to/model.gguf --host 127.0.0.1 --port 8080`. `apiKey: dummy` uses the existing OpenAI-compatible auth-omission path, so the runtime does not send bearer auth to local `llama-server`.
|
|
408
|
+
|
|
393
409
|
Workspace layout:
|
|
394
410
|
|
|
395
411
|
```text
|
|
@@ -847,6 +863,7 @@ Practical guidance:
|
|
|
847
863
|
Local GGUF note:
|
|
848
864
|
|
|
849
865
|
- `provider: node-llama-cpp` now exposes a LangChain-style tool-binding shim, so local GGUF models can enter the standard tool-calling path without an app-owned model wrapper
|
|
866
|
+
- `provider: openai-compatible` targets an external `llama-server` endpoint when the model process should be tuned or supervised outside Node.js
|
|
850
867
|
- `backend: langchain-v1` is the straightforward local GGUF path and is the currently verified default for `node-llama-cpp` tool use
|
|
851
868
|
- `backend: deepagent` can also reach the same tool-calling path, but final reliability still depends on the selected model following upstream tool schemas correctly
|
|
852
869
|
- `agent-harness` does not try to normalize every model-specific argument drift or malformed tool payload; once the runtime hands a call to upstream tools, schema fidelity is a model responsibility
|
package/README.zh.md
CHANGED
|
@@ -386,6 +386,22 @@ botbotgo -w /path/to/another-workspace "Summarize this project."
|
|
|
386
386
|
|
|
387
387
|
开发时如果要把仓库自带的 Ollama workspace 切到共享远端,只需要在启动前设置环境变量即可:发布默认仍会回到 `http://127.0.0.1:11434` 这种本地 endpoint,而开发阶段可以通过 `AGENT_HARNESS_OLLAMA_BASE_URL=https://ollama-rtx-4070.easynet.world` 或 `AGENT_HARNESS_OPENAI_COMPATIBLE_BASE_URL=https://ollama-rtx-4070.easynet.world/v1` 覆盖到远端。
|
|
388
388
|
|
|
389
|
+
如果目标机器没有 GPU 但内存很大,可以单独启动 `llama.cpp` 的 OpenAI-compatible server,并继续使用已有的 `openai-compatible` provider:
|
|
390
|
+
|
|
391
|
+
```yaml
|
|
392
|
+
apiVersion: agent-harness/v1alpha1
|
|
393
|
+
kind: Models
|
|
394
|
+
spec:
|
|
395
|
+
- name: default
|
|
396
|
+
provider: openai-compatible
|
|
397
|
+
model: local-model
|
|
398
|
+
baseUrl: ${env:AGENT_HARNESS_LLAMA_CPP_BASE_URL:-http://127.0.0.1:8080/v1}
|
|
399
|
+
apiKey: dummy
|
|
400
|
+
toolCallingMode: prompted-json
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
模型进程用 `llama-server -m /path/to/model.gguf --host 127.0.0.1 --port 8080` 单独启动。`apiKey: dummy` 会复用现有 OpenAI-compatible 的 auth omission 路径,因此 runtime 不会向本地 `llama-server` 发送 bearer auth。
|
|
404
|
+
|
|
389
405
|
工作区布局:
|
|
390
406
|
|
|
391
407
|
```text
|
|
@@ -804,6 +820,7 @@ await stop(runtime);
|
|
|
804
820
|
本地 GGUF 补充说明:
|
|
805
821
|
|
|
806
822
|
- `provider: node-llama-cpp` 现在带有一层 LangChain 风格的 tool-binding shim,因此本地 GGUF 模型可以进入标准 tool-calling 路径,而不需要应用自己包一层 model wrapper
|
|
823
|
+
- `provider: openai-compatible` 可以指向外部 `llama-server` endpoint;当模型进程需要在 Node.js 外部单独调参、守护或部署时,继续复用这条已有路径
|
|
807
824
|
- 对 `node-llama-cpp` 来说,`backend: langchain-v1` 仍然是更直接、当前已验证的本地 tool use 路径
|
|
808
825
|
- `backend: deepagent` 也可以走到同一条 tool-calling 路径,但最终稳定性仍取决于所选模型是否能正确遵守 upstream tool schema
|
|
809
826
|
- `agent-harness` 不会为每个模型的参数漂移或畸形 tool payload 做无限兼容;runtime 把调用交给 upstream tools 之后,schema fidelity 就属于模型责任
|
|
@@ -126,6 +126,11 @@ export type RuntimeToolExecutionToolPolicy = {
|
|
|
126
126
|
hasInputSchema: boolean;
|
|
127
127
|
requiresApproval: boolean;
|
|
128
128
|
};
|
|
129
|
+
export type RuntimeToolGatewayToolPolicy = RuntimeToolExecutionToolPolicy & {
|
|
130
|
+
gatewayMode: "schema-first" | "approval-gated" | "best-effort";
|
|
131
|
+
modelRole: "propose";
|
|
132
|
+
runtimeRole: "validate-and-execute" | "request-approval" | "execute-with-runtime-checks";
|
|
133
|
+
};
|
|
129
134
|
export type RuntimeSnapshotModel = {
|
|
130
135
|
id: string;
|
|
131
136
|
provider: string;
|
|
@@ -188,6 +193,26 @@ export type RuntimeSnapshot = {
|
|
|
188
193
|
};
|
|
189
194
|
export type RuntimeToolExecutionPolicy = {
|
|
190
195
|
agentId: string;
|
|
196
|
+
gateway: {
|
|
197
|
+
layer: "tool-gateway";
|
|
198
|
+
toolScope: {
|
|
199
|
+
source: "agent-binding";
|
|
200
|
+
exposedToolCount: number;
|
|
201
|
+
schemaBoundToolCount: number;
|
|
202
|
+
approvalRequiredToolCount: number;
|
|
203
|
+
};
|
|
204
|
+
validation: {
|
|
205
|
+
strategy: "schema-first";
|
|
206
|
+
runtimeValidationRequired: boolean;
|
|
207
|
+
strictProviderSchemaPreferred: boolean;
|
|
208
|
+
};
|
|
209
|
+
correction: {
|
|
210
|
+
invalidArguments: "structured-error-retry";
|
|
211
|
+
maxModelRetries: number;
|
|
212
|
+
highRiskInvalidArguments: "approval-or-deny";
|
|
213
|
+
};
|
|
214
|
+
tools: RuntimeToolGatewayToolPolicy[];
|
|
215
|
+
};
|
|
191
216
|
invokeTimeoutMs?: number;
|
|
192
217
|
streamIdleTimeoutMs: number;
|
|
193
218
|
providerRetries: {
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export declare const AGENT_HARNESS_VERSION = "0.0.402";
|
|
2
2
|
export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-02";
|
package/dist/package-version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export const AGENT_HARNESS_VERSION = "0.0.402";
|
|
2
2
|
export const AGENT_HARNESS_RELEASE_DATE = "2026-05-02";
|
|
@@ -20,6 +20,12 @@ const CLOSE_REQUIRED_PLAN_RECOVERY_INSTRUCTION = [
|
|
|
20
20
|
"Your next action must be write_todos: update every remaining pending or in_progress item to completed if evidence was gathered, or failed if it cannot be completed with the available tools.",
|
|
21
21
|
"After that write_todos call, provide the final answer required by the agent response format.",
|
|
22
22
|
].join("\n");
|
|
23
|
+
const RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION = [
|
|
24
|
+
"The required todo board was closed before any non-TODO evidence tool returned.",
|
|
25
|
+
"Do not call write_todos again yet.",
|
|
26
|
+
"Your next action must be exactly one non-TODO evidence tool call selected from the available tool descriptions and schemas.",
|
|
27
|
+
"After that evidence tool returns, update the todo board and then provide the final answer required by the agent response format.",
|
|
28
|
+
].join("\n");
|
|
23
29
|
const INITIAL_REQUIRED_PLAN_INSTRUCTION = [
|
|
24
30
|
"This agent has a required visible planning contract.",
|
|
25
31
|
"Your first action for this request must be write_todos with concrete task steps and statuses.",
|
|
@@ -193,10 +199,13 @@ function hasUsefulVisibleSynthesis(value) {
|
|
|
193
199
|
if (/^(?:model_request|tool_call|call_tool)/iu.test(trimmed)) {
|
|
194
200
|
return false;
|
|
195
201
|
}
|
|
202
|
+
if (/^(?:name|tool_call_id)\s*=/iu.test(trimmed)) {
|
|
203
|
+
return false;
|
|
204
|
+
}
|
|
196
205
|
if (/^(?:we\s+need\s+to|so\s+next\s+step\b)/iu.test(trimmed)) {
|
|
197
206
|
return false;
|
|
198
207
|
}
|
|
199
|
-
if (/^\{\s*"(?:name|arguments|todos|symbol|query|market|count)"\s*:/iu.test(trimmed)) {
|
|
208
|
+
if (/^\{\s*"(?:name|arguments|args|argv|todos|symbol|query|market|count|stdout|stderr|exitCode)"\s*:/iu.test(trimmed)) {
|
|
200
209
|
return false;
|
|
201
210
|
}
|
|
202
211
|
if (/^(?:stdout|stderr|exitCode)\s*:/iu.test(trimmed)) {
|
|
@@ -702,6 +711,11 @@ export async function* streamRuntimeExecution(options) {
|
|
|
702
711
|
const streamedIncompletePlanRecoveryInstruction = requiresPlanEvidence(options.binding) && streamedExecutionEvidence.hasIncompletePlanState
|
|
703
712
|
? CLOSE_REQUIRED_PLAN_RECOVERY_INSTRUCTION
|
|
704
713
|
: null;
|
|
714
|
+
const streamedPrematurePlanCloseRecoveryInstruction = requiresPlanEvidence(options.binding)
|
|
715
|
+
&& streamedExecutionEvidence.hasPlanStateEvidence
|
|
716
|
+
&& !streamedExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
|
|
717
|
+
? RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION
|
|
718
|
+
: null;
|
|
705
719
|
const delegatedExecutionRecoveryInstruction = !emittedUnsafeStreamSideEffects || streamedDelegatedRecoveryInstruction
|
|
706
720
|
? streamedDelegatedRecoveryInstruction
|
|
707
721
|
: null;
|
|
@@ -734,6 +748,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
734
748
|
? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION
|
|
735
749
|
: delegatedExecutionRecoveryInstruction
|
|
736
750
|
?? streamedIncompletePlanRecoveryInstruction
|
|
751
|
+
?? streamedPrematurePlanCloseRecoveryInstruction
|
|
737
752
|
?? streamedRuntimeFailureRecoveryInstruction
|
|
738
753
|
?? missingPlanRecoveryInstruction
|
|
739
754
|
?? streamedDelegationOnlyRecoveryInstruction
|
|
@@ -4,10 +4,15 @@ import { canReplayToolCallsLocally } from "./tool/tool-replay.js";
|
|
|
4
4
|
import { extractToolCallsFromResult, normalizeToolArgsForSchema, stringifyToolOutput } from "./tool/tool-arguments.js";
|
|
5
5
|
import { extractMemoryCandidatesFromToolOutput } from "../harness/system/runtime-memory-candidates.js";
|
|
6
6
|
import { maybePersistLargeToolOutput } from "./tool/tool-output-artifacts.js";
|
|
7
|
+
import { toolRequiresRuntimeApproval } from "./tool/tool-hitl.js";
|
|
8
|
+
import { validateToolGatewayInput } from "../harness/tool-gateway/index.js";
|
|
7
9
|
import { appendToolRecoveryInstruction, extractVisibleOutput, resolveMissingPlanRecoveryInstruction, resolveExecutionWithoutToolEvidenceTextInstruction, resolveToolCallRecoveryInstruction, sanitizeVisibleText, STRICT_TOOL_JSON_INSTRUCTION, } from "../parsing/output-parsing.js";
|
|
8
10
|
import { salvageJsonToolCalls } from "../parsing/output-tool-args.js";
|
|
9
11
|
import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION } from "../prompts/runtime-prompts.js";
|
|
10
12
|
const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already available in this conversation. Answer the user's current request directly from the existing context and tool results. Do not ask the user to repeat inputs that are already present above.";
|
|
13
|
+
function isObject(value) {
|
|
14
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
15
|
+
}
|
|
11
16
|
function readPlanStateSummary(output) {
|
|
12
17
|
if (typeof output !== "object" || output === null) {
|
|
13
18
|
return null;
|
|
@@ -38,32 +43,176 @@ function hasIncompleteExecutedPlan(executedToolResults) {
|
|
|
38
43
|
}
|
|
39
44
|
return false;
|
|
40
45
|
}
|
|
46
|
+
function normalizeToolName(value) {
|
|
47
|
+
return typeof value === "string" ? value.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
|
|
48
|
+
}
|
|
41
49
|
function hasNonTodoToolEvidence(executedToolResults) {
|
|
42
|
-
return executedToolResults.some((item) => item.toolName
|
|
50
|
+
return executedToolResults.some((item) => !isPlanToolName(item.toolName));
|
|
43
51
|
}
|
|
44
52
|
function isPlanToolName(toolName) {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
||
|
|
48
|
-
||
|
|
53
|
+
const normalized = normalizeToolName(toolName);
|
|
54
|
+
return normalized === "write_todos"
|
|
55
|
+
|| normalized === "read_todos"
|
|
56
|
+
|| normalized === "tool_call_write_todos"
|
|
57
|
+
|| normalized === "tool_call_read_todos";
|
|
49
58
|
}
|
|
50
59
|
function isFallbackTodoCompletionToolCall(toolCall) {
|
|
51
60
|
return typeof toolCall.id === "string"
|
|
52
61
|
&& toolCall.id.startsWith("fallback-complete-")
|
|
53
62
|
&& (toolCall.name === "write_todos" || toolCall.name === "tool_call_write_todos");
|
|
54
63
|
}
|
|
55
|
-
function
|
|
56
|
-
if (toolCall.name
|
|
64
|
+
function isTerminalTodoUpdateToolCall(toolCall) {
|
|
65
|
+
if (!isPlanToolName(toolCall.name) || normalizeToolName(toolCall.name).includes("read_todos")) {
|
|
57
66
|
return false;
|
|
58
67
|
}
|
|
59
68
|
if (typeof toolCall.args !== "object" || toolCall.args === null || !Array.isArray(toolCall.args.todos)) {
|
|
60
69
|
return false;
|
|
61
70
|
}
|
|
62
71
|
const todos = toolCall.args.todos;
|
|
63
|
-
return todos.length > 0 && todos.every((todo) =>
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
72
|
+
return todos.length > 0 && todos.every((todo) => {
|
|
73
|
+
if (typeof todo !== "object" || todo === null || typeof todo.status !== "string") {
|
|
74
|
+
return false;
|
|
75
|
+
}
|
|
76
|
+
const status = todo.status.trim().toLowerCase();
|
|
77
|
+
return status !== "pending" && status !== "in_progress";
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
function readSchemaShape(schema) {
|
|
81
|
+
if (!isObject(schema)) {
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
if (isObject(schema.properties)) {
|
|
85
|
+
return schema.properties;
|
|
86
|
+
}
|
|
87
|
+
if (isObject(schema.shape)) {
|
|
88
|
+
return schema.shape;
|
|
89
|
+
}
|
|
90
|
+
const def = schema._def;
|
|
91
|
+
if (!def) {
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
const shape = typeof def.shape === "function" ? def.shape() : def.shape;
|
|
95
|
+
return isObject(shape) ? shape : null;
|
|
96
|
+
}
|
|
97
|
+
function readSchemaDescription(schemaPart) {
|
|
98
|
+
if (!isObject(schemaPart)) {
|
|
99
|
+
return "";
|
|
100
|
+
}
|
|
101
|
+
const direct = schemaPart.description;
|
|
102
|
+
if (typeof direct === "string") {
|
|
103
|
+
return direct;
|
|
104
|
+
}
|
|
105
|
+
const nested = schemaPart._def;
|
|
106
|
+
if (typeof nested?.description === "string") {
|
|
107
|
+
return nested.description;
|
|
108
|
+
}
|
|
109
|
+
return readSchemaDescription(nested?.innerType);
|
|
110
|
+
}
|
|
111
|
+
function readSchemaDefault(schemaPart) {
|
|
112
|
+
if (!isObject(schemaPart)) {
|
|
113
|
+
return undefined;
|
|
114
|
+
}
|
|
115
|
+
const typed = schemaPart;
|
|
116
|
+
const hasJsonDefault = Object.prototype.hasOwnProperty.call(schemaPart, "default") && typeof typed.default !== "function";
|
|
117
|
+
if (hasJsonDefault) {
|
|
118
|
+
return typed.default;
|
|
119
|
+
}
|
|
120
|
+
if (Object.prototype.hasOwnProperty.call(schemaPart, "const")) {
|
|
121
|
+
return typed.const;
|
|
122
|
+
}
|
|
123
|
+
const def = schemaPart._def;
|
|
124
|
+
if (!def) {
|
|
125
|
+
return undefined;
|
|
126
|
+
}
|
|
127
|
+
if (def.defaultValue !== undefined) {
|
|
128
|
+
return typeof def.defaultValue === "function" ? def.defaultValue() : def.defaultValue;
|
|
129
|
+
}
|
|
130
|
+
return readSchemaDefault(def.innerType);
|
|
131
|
+
}
|
|
132
|
+
function parseFirstStringArrayExample(description) {
|
|
133
|
+
const arrayMatch = description.match(/\[[^\]]+\]/u);
|
|
134
|
+
if (!arrayMatch) {
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
const values = [...arrayMatch[0].matchAll(/["']([^"']+)["']/gu)].map((match) => match[1]).filter(Boolean);
|
|
138
|
+
return values.length > 0 ? values : null;
|
|
139
|
+
}
|
|
140
|
+
function buildGenericFallbackArgsFromSchema(schema, latestUserInput) {
|
|
141
|
+
const shape = readSchemaShape(schema);
|
|
142
|
+
if (!shape) {
|
|
143
|
+
return {};
|
|
144
|
+
}
|
|
145
|
+
const args = {};
|
|
146
|
+
for (const [key, schemaPart] of Object.entries(shape)) {
|
|
147
|
+
const defaultValue = readSchemaDefault(schemaPart);
|
|
148
|
+
if (defaultValue !== undefined) {
|
|
149
|
+
args[key] = defaultValue;
|
|
150
|
+
continue;
|
|
151
|
+
}
|
|
152
|
+
const description = readSchemaDescription(schemaPart);
|
|
153
|
+
const arrayExample = parseFirstStringArrayExample(description);
|
|
154
|
+
if (arrayExample) {
|
|
155
|
+
args[key] = arrayExample;
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
if (latestUserInput
|
|
159
|
+
&& !args[key]
|
|
160
|
+
&& /(?:query|question|prompt|input|text)/iu.test(`${key} ${description}`)) {
|
|
161
|
+
args[key] = latestUserInput;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return args;
|
|
165
|
+
}
|
|
166
|
+
function readTodoPlanTextFromToolCalls(toolCalls) {
|
|
167
|
+
const fragments = [];
|
|
168
|
+
for (const toolCall of toolCalls) {
|
|
169
|
+
if (typeof toolCall.args !== "object" || toolCall.args === null) {
|
|
170
|
+
continue;
|
|
171
|
+
}
|
|
172
|
+
const todos = toolCall.args.todos;
|
|
173
|
+
if (!Array.isArray(todos)) {
|
|
174
|
+
continue;
|
|
175
|
+
}
|
|
176
|
+
for (const todo of todos) {
|
|
177
|
+
if (typeof todo === "object" && todo !== null && typeof todo.content === "string") {
|
|
178
|
+
fragments.push(todo.content);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return fragments.join("\n");
|
|
183
|
+
}
|
|
184
|
+
function selectGenericFallbackEvidenceTool(params) {
|
|
185
|
+
const candidates = [];
|
|
186
|
+
const appendCandidate = (name) => {
|
|
187
|
+
if (isPlanToolName(name)) {
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
const resolved = resolveModelFacingToolName(name, params.toolNameMapping, params.primaryTools);
|
|
191
|
+
const executable = params.executableTools.get(name)
|
|
192
|
+
?? params.executableTools.get(resolved)
|
|
193
|
+
?? params.builtinExecutableTools.get(name)
|
|
194
|
+
?? params.builtinExecutableTools.get(resolved);
|
|
195
|
+
if (!executable || candidates.some((candidate) => candidate.executable.name === executable.name)) {
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
candidates.push({ requestedName: name, executable });
|
|
199
|
+
};
|
|
200
|
+
for (const tool of params.primaryTools) {
|
|
201
|
+
appendCandidate(tool.name);
|
|
202
|
+
const modelFacing = params.toolNameMapping.originalToModelFacing.get(tool.name);
|
|
203
|
+
if (modelFacing) {
|
|
204
|
+
appendCandidate(modelFacing);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
for (const name of [...params.executableTools.keys(), ...params.builtinExecutableTools.keys()]) {
|
|
208
|
+
appendCandidate(name);
|
|
209
|
+
}
|
|
210
|
+
if (candidates.length === 0) {
|
|
211
|
+
return null;
|
|
212
|
+
}
|
|
213
|
+
const normalizedPlanText = params.planText.toLowerCase();
|
|
214
|
+
return candidates.find((candidate) => normalizedPlanText.includes(candidate.requestedName.toLowerCase())
|
|
215
|
+
|| normalizedPlanText.includes(candidate.executable.name.toLowerCase())) ?? candidates[0];
|
|
67
216
|
}
|
|
68
217
|
function buildDeterministicFinalFromToolEvidence(executedToolResults) {
|
|
69
218
|
const evidence = executedToolResults
|
|
@@ -92,6 +241,11 @@ function latestToolErrorRecoveryInstruction(executedToolResults) {
|
|
|
92
241
|
if (!latest || latest.isError !== true) {
|
|
93
242
|
return null;
|
|
94
243
|
}
|
|
244
|
+
if (typeof latest.output === "object" &&
|
|
245
|
+
latest.output !== null &&
|
|
246
|
+
latest.output.code === "INVALID_ARGUMENTS") {
|
|
247
|
+
return null;
|
|
248
|
+
}
|
|
95
249
|
const message = typeof latest.output === "string" ? latest.output : JSON.stringify(latest.output);
|
|
96
250
|
return resolveToolCallRecoveryInstruction(new Error(message)) ?? AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION;
|
|
97
251
|
}
|
|
@@ -140,12 +294,19 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
140
294
|
const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults);
|
|
141
295
|
const shouldEnforceIncompletePlan = requiresPlanEvidence(binding) && hasIncompletePlanState;
|
|
142
296
|
const hasExecutionBeyondTodoPlanning = hasNonTodoToolEvidence(executedToolResults);
|
|
297
|
+
const missingInitialPlanRecoveryInstruction = resolveMissingPlanRecoveryInstruction({
|
|
298
|
+
request: activeRequest,
|
|
299
|
+
requiresPlan: requiresPlanEvidence(binding),
|
|
300
|
+
hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults),
|
|
301
|
+
hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName === "write_todos"),
|
|
302
|
+
hasToolResultEvidence: hasExecutionBeyondTodoPlanning,
|
|
303
|
+
});
|
|
143
304
|
const toolErrorRecoveryInstruction = latestToolErrorRecoveryInstruction(executedToolResults)
|
|
144
305
|
?? terminalToolErrorRecoveryInstruction(terminalText);
|
|
145
306
|
const leakedJsonToolCallRecoveryInstruction = terminalText && salvageJsonToolCalls(terminalText).length > 0
|
|
146
307
|
? STRICT_TOOL_JSON_INSTRUCTION
|
|
147
308
|
: null;
|
|
148
|
-
const recoveryInstruction = toolErrorRecoveryInstruction ?? leakedJsonToolCallRecoveryInstruction ?? (terminalText
|
|
309
|
+
const recoveryInstruction = toolErrorRecoveryInstruction ?? leakedJsonToolCallRecoveryInstruction ?? missingInitialPlanRecoveryInstruction ?? (terminalText
|
|
149
310
|
? resolveExecutionWithoutToolEvidenceTextInstruction(activeRequest, terminalText, false, {
|
|
150
311
|
hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName === "write_todos"),
|
|
151
312
|
hasToolResultEvidence: hasExecutionBeyondTodoPlanning,
|
|
@@ -197,6 +358,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
197
358
|
role: "system",
|
|
198
359
|
content: TOOL_FOLLOW_UP_INSTRUCTION,
|
|
199
360
|
});
|
|
361
|
+
const hadNonTodoEvidenceBeforeToolReplay = hasNonTodoToolEvidence(executedToolResults);
|
|
200
362
|
for (let toolIndex = 0; toolIndex < toolCalls.length; toolIndex += 1) {
|
|
201
363
|
const toolCall = toolCalls[toolIndex];
|
|
202
364
|
const resolvedToolName = resolveModelFacingToolName(toolCall.name, toolNameMapping, primaryTools);
|
|
@@ -214,9 +376,28 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
214
376
|
const normalizedArgs = normalizeToolArgsForSchema(toolCall.args, activeExecutable.schema, toolCall.rawArgsInput, {
|
|
215
377
|
latestUserInput,
|
|
216
378
|
});
|
|
379
|
+
const gateway = validateToolGatewayInput({
|
|
380
|
+
toolName: activeExecutable.name,
|
|
381
|
+
schema: activeExecutable.schema,
|
|
382
|
+
args: normalizedArgs,
|
|
383
|
+
requiresApproval: compiledTool ? toolRequiresRuntimeApproval(compiledTool) : false,
|
|
384
|
+
});
|
|
385
|
+
if (!gateway.ok) {
|
|
386
|
+
executedToolResults.push({
|
|
387
|
+
toolName: activeExecutable.name,
|
|
388
|
+
output: gateway.error,
|
|
389
|
+
isError: true,
|
|
390
|
+
});
|
|
391
|
+
nextMessages.push(new ToolMessage({
|
|
392
|
+
name: activeExecutable.name,
|
|
393
|
+
tool_call_id: toolCall.id ?? `tool-${iteration + 1}-${toolIndex + 1}`,
|
|
394
|
+
content: stringifyToolOutput(gateway.error),
|
|
395
|
+
}));
|
|
396
|
+
continue;
|
|
397
|
+
}
|
|
217
398
|
const toolResult = toolRuntimeContext
|
|
218
|
-
? await activeExecutable.invoke(
|
|
219
|
-
: await activeExecutable.invoke(
|
|
399
|
+
? await activeExecutable.invoke(gateway.input, { toolRuntimeContext })
|
|
400
|
+
: await activeExecutable.invoke(gateway.input);
|
|
220
401
|
const memoryCandidates = compiledTool ? extractMemoryCandidatesFromToolOutput(compiledTool, toolResult) : [];
|
|
221
402
|
const safeToolResult = await maybePersistLargeToolOutput({
|
|
222
403
|
toolName: activeExecutable.name,
|
|
@@ -234,6 +415,53 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
|
|
|
234
415
|
content: stringifyToolOutput(safeToolResult),
|
|
235
416
|
}));
|
|
236
417
|
}
|
|
418
|
+
if (requiresPlanEvidence(binding)
|
|
419
|
+
&& !hadNonTodoEvidenceBeforeToolReplay
|
|
420
|
+
&& !hasNonTodoToolEvidence(executedToolResults)
|
|
421
|
+
&& toolCalls.length > 0
|
|
422
|
+
&& toolCalls.every((toolCall) => isPlanToolName(toolCall.name))
|
|
423
|
+
&& toolCalls.some(isTerminalTodoUpdateToolCall)) {
|
|
424
|
+
const fallbackEvidenceTool = selectGenericFallbackEvidenceTool({
|
|
425
|
+
planText: readTodoPlanTextFromToolCalls(toolCalls),
|
|
426
|
+
primaryTools,
|
|
427
|
+
toolNameMapping,
|
|
428
|
+
executableTools,
|
|
429
|
+
builtinExecutableTools,
|
|
430
|
+
});
|
|
431
|
+
if (fallbackEvidenceTool) {
|
|
432
|
+
const fallbackArgs = buildGenericFallbackArgsFromSchema(fallbackEvidenceTool.executable.schema, latestUserInput);
|
|
433
|
+
const normalizedArgs = normalizeToolArgsForSchema(fallbackArgs, fallbackEvidenceTool.executable.schema, undefined, {
|
|
434
|
+
latestUserInput,
|
|
435
|
+
});
|
|
436
|
+
const compiledTool = toolCatalog.get(fallbackEvidenceTool.requestedName) ?? toolCatalog.get(fallbackEvidenceTool.executable.name);
|
|
437
|
+
const gateway = validateToolGatewayInput({
|
|
438
|
+
toolName: fallbackEvidenceTool.executable.name,
|
|
439
|
+
schema: fallbackEvidenceTool.executable.schema,
|
|
440
|
+
args: normalizedArgs,
|
|
441
|
+
requiresApproval: compiledTool ? toolRequiresRuntimeApproval(compiledTool) : false,
|
|
442
|
+
});
|
|
443
|
+
if (gateway.ok) {
|
|
444
|
+
const toolResult = toolRuntimeContext
|
|
445
|
+
? await fallbackEvidenceTool.executable.invoke(gateway.input, { toolRuntimeContext })
|
|
446
|
+
: await fallbackEvidenceTool.executable.invoke(gateway.input);
|
|
447
|
+
const memoryCandidates = compiledTool ? extractMemoryCandidatesFromToolOutput(compiledTool, toolResult) : [];
|
|
448
|
+
const safeToolResult = await maybePersistLargeToolOutput({
|
|
449
|
+
toolName: fallbackEvidenceTool.executable.name,
|
|
450
|
+
output: toolResult,
|
|
451
|
+
toolRuntimeContext: toolRuntimeContext,
|
|
452
|
+
});
|
|
453
|
+
executedToolResults.push({
|
|
454
|
+
toolName: fallbackEvidenceTool.executable.name,
|
|
455
|
+
output: safeToolResult,
|
|
456
|
+
...(memoryCandidates.length > 0 ? { memoryCandidates } : {}),
|
|
457
|
+
});
|
|
458
|
+
return {
|
|
459
|
+
result: buildDeterministicFinalFromToolEvidence(executedToolResults),
|
|
460
|
+
executedToolResults,
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
}
|
|
237
465
|
if (requiresPlanEvidence(binding)
|
|
238
466
|
&& toolCalls.length > 0
|
|
239
467
|
&& toolCalls.every((toolCall) => isPlanToolName(toolCall.name))
|
|
@@ -418,7 +418,7 @@ export async function invokeBuiltinTaskTool(input) {
|
|
|
418
418
|
if (!hasSubagentExecutionToolEvidence(result, resolvedSubagentTools, selectedCompiledSubagent?.tools)) {
|
|
419
419
|
result = await invokeSubagent([description, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"));
|
|
420
420
|
if (!hasSubagentExecutionToolEvidence(result, resolvedSubagentTools, selectedCompiledSubagent?.tools)) {
|
|
421
|
-
throw new Error(`Delegated agent ${selectedSubagent.name} completed without tool execution evidence.`);
|
|
421
|
+
throw new Error(`Delegated agent ${selectedSubagent.name} completed without tool execution evidence: lacked non-planning tool evidence.`);
|
|
422
422
|
}
|
|
423
423
|
}
|
|
424
424
|
const structuredResponse = typeof result === "object" && result !== null && "structuredResponse" in result
|
|
@@ -130,7 +130,7 @@ function isIncidentFollowUpTurn(inputText) {
|
|
|
130
130
|
if (!normalized || hasExplicitResourceReference(normalized)) {
|
|
131
131
|
return false;
|
|
132
132
|
}
|
|
133
|
-
return /(the rca|deep research.*rca|root cause|go deeper|those issues|these issues|that issue|current incident
|
|
133
|
+
return /(the rca|deep research.*rca|root cause|go deeper|those issues|these issues|that issue|current incident)/i.test(normalized);
|
|
134
134
|
}
|
|
135
135
|
function findLastAssistantText(history) {
|
|
136
136
|
for (let index = history.length - 1; index >= 0; index -= 1) {
|