@botbotgo/agent-harness 0.0.400 → 0.0.402

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -390,6 +390,22 @@ botbotgo -w /path/to/another-workspace "Summarize this project."
390
390
 
391
391
  Development tip: repository-owned Ollama workspaces now default to `http://127.0.0.1:11434` for release-friendly local behavior. During development, point them at a shared remote Ollama by exporting `AGENT_HARNESS_OLLAMA_BASE_URL=https://ollama-rtx-4070.easynet.world` or `AGENT_HARNESS_OPENAI_COMPATIBLE_BASE_URL=https://ollama-rtx-4070.easynet.world/v1` before starting the runtime.
392
392
 
393
+ For CPU-only hosts with large RAM, run `llama.cpp` as an OpenAI-compatible server and use the existing `openai-compatible` provider:
394
+
395
+ ```yaml
396
+ apiVersion: agent-harness/v1alpha1
397
+ kind: Models
398
+ spec:
399
+ - name: default
400
+ provider: openai-compatible
401
+ model: local-model
402
+ baseUrl: ${env:AGENT_HARNESS_LLAMA_CPP_BASE_URL:-http://127.0.0.1:8080/v1}
403
+ apiKey: dummy
404
+ toolCallingMode: prompted-json
405
+ ```
406
+
407
+ Start the model separately with `llama-server -m /path/to/model.gguf --host 127.0.0.1 --port 8080`. `apiKey: dummy` uses the existing OpenAI-compatible auth-omission path, so the runtime does not send bearer auth to local `llama-server`.
408
+
393
409
  Workspace layout:
394
410
 
395
411
  ```text
@@ -847,6 +863,7 @@ Practical guidance:
847
863
  Local GGUF note:
848
864
 
849
865
  - `provider: node-llama-cpp` now exposes a LangChain-style tool-binding shim, so local GGUF models can enter the standard tool-calling path without an app-owned model wrapper
866
+ - `provider: openai-compatible` targets an external `llama-server` endpoint when the model process should be tuned or supervised outside Node.js
850
867
  - `backend: langchain-v1` is the straightforward local GGUF path and is the currently verified default for `node-llama-cpp` tool use
851
868
  - `backend: deepagent` can also reach the same tool-calling path, but final reliability still depends on the selected model following upstream tool schemas correctly
852
869
  - `agent-harness` does not try to normalize every model-specific argument drift or malformed tool payload; once the runtime hands a call to upstream tools, schema fidelity is a model responsibility
package/README.zh.md CHANGED
@@ -386,6 +386,22 @@ botbotgo -w /path/to/another-workspace "Summarize this project."
386
386
 
387
387
  开发时如果要把仓库自带的 Ollama workspace 切到共享远端,只需要在启动前设置环境变量即可:发布默认仍会回到 `http://127.0.0.1:11434` 这种本地 endpoint,而开发阶段可以通过 `AGENT_HARNESS_OLLAMA_BASE_URL=https://ollama-rtx-4070.easynet.world` 或 `AGENT_HARNESS_OPENAI_COMPATIBLE_BASE_URL=https://ollama-rtx-4070.easynet.world/v1` 覆盖到远端。
388
388
 
389
+ 如果目标机器没有 GPU 但内存很大,可以单独启动 `llama.cpp` 的 OpenAI-compatible server,并继续使用已有的 `openai-compatible` provider:
390
+
391
+ ```yaml
392
+ apiVersion: agent-harness/v1alpha1
393
+ kind: Models
394
+ spec:
395
+ - name: default
396
+ provider: openai-compatible
397
+ model: local-model
398
+ baseUrl: ${env:AGENT_HARNESS_LLAMA_CPP_BASE_URL:-http://127.0.0.1:8080/v1}
399
+ apiKey: dummy
400
+ toolCallingMode: prompted-json
401
+ ```
402
+
403
+ 模型进程用 `llama-server -m /path/to/model.gguf --host 127.0.0.1 --port 8080` 单独启动。`apiKey: dummy` 会复用现有 OpenAI-compatible 的 auth omission 路径,因此 runtime 不会向本地 `llama-server` 发送 bearer auth。
404
+
389
405
  工作区布局:
390
406
 
391
407
  ```text
@@ -804,6 +820,7 @@ await stop(runtime);
804
820
  本地 GGUF 补充说明:
805
821
 
806
822
  - `provider: node-llama-cpp` 现在带有一层 LangChain 风格的 tool-binding shim,因此本地 GGUF 模型可以进入标准 tool-calling 路径,而不需要应用自己包一层 model wrapper
823
+ - `provider: openai-compatible` 可以指向外部 `llama-server` endpoint;当模型进程需要在 Node.js 外部单独调参、守护或部署时,继续复用这条已有路径
807
824
  - 对 `node-llama-cpp` 来说,`backend: langchain-v1` 仍然是更直接、当前已验证的本地 tool use 路径
808
825
  - `backend: deepagent` 也可以走到同一条 tool-calling 路径,但最终稳定性仍取决于所选模型是否能正确遵守 upstream tool schema
809
826
  - `agent-harness` 不会为每个模型的参数漂移或畸形 tool payload 做无限兼容;runtime 把调用交给 upstream tools 之后,schema fidelity 就属于模型责任
@@ -126,6 +126,11 @@ export type RuntimeToolExecutionToolPolicy = {
126
126
  hasInputSchema: boolean;
127
127
  requiresApproval: boolean;
128
128
  };
129
+ export type RuntimeToolGatewayToolPolicy = RuntimeToolExecutionToolPolicy & {
130
+ gatewayMode: "schema-first" | "approval-gated" | "best-effort";
131
+ modelRole: "propose";
132
+ runtimeRole: "validate-and-execute" | "request-approval" | "execute-with-runtime-checks";
133
+ };
129
134
  export type RuntimeSnapshotModel = {
130
135
  id: string;
131
136
  provider: string;
@@ -188,6 +193,26 @@ export type RuntimeSnapshot = {
188
193
  };
189
194
  export type RuntimeToolExecutionPolicy = {
190
195
  agentId: string;
196
+ gateway: {
197
+ layer: "tool-gateway";
198
+ toolScope: {
199
+ source: "agent-binding";
200
+ exposedToolCount: number;
201
+ schemaBoundToolCount: number;
202
+ approvalRequiredToolCount: number;
203
+ };
204
+ validation: {
205
+ strategy: "schema-first";
206
+ runtimeValidationRequired: boolean;
207
+ strictProviderSchemaPreferred: boolean;
208
+ };
209
+ correction: {
210
+ invalidArguments: "structured-error-retry";
211
+ maxModelRetries: number;
212
+ highRiskInvalidArguments: "approval-or-deny";
213
+ };
214
+ tools: RuntimeToolGatewayToolPolicy[];
215
+ };
191
216
  invokeTimeoutMs?: number;
192
217
  streamIdleTimeoutMs: number;
193
218
  providerRetries: {
@@ -1,2 +1,2 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.400";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.402";
2
2
  export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-02";
@@ -1,2 +1,2 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.400";
1
+ export const AGENT_HARNESS_VERSION = "0.0.402";
2
2
  export const AGENT_HARNESS_RELEASE_DATE = "2026-05-02";
@@ -20,6 +20,12 @@ const CLOSE_REQUIRED_PLAN_RECOVERY_INSTRUCTION = [
20
20
  "Your next action must be write_todos: update every remaining pending or in_progress item to completed if evidence was gathered, or failed if it cannot be completed with the available tools.",
21
21
  "After that write_todos call, provide the final answer required by the agent response format.",
22
22
  ].join("\n");
23
+ const RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION = [
24
+ "The required todo board was closed before any non-TODO evidence tool returned.",
25
+ "Do not call write_todos again yet.",
26
+ "Your next action must be exactly one non-TODO evidence tool call selected from the available tool descriptions and schemas.",
27
+ "After that evidence tool returns, update the todo board and then provide the final answer required by the agent response format.",
28
+ ].join("\n");
23
29
  const INITIAL_REQUIRED_PLAN_INSTRUCTION = [
24
30
  "This agent has a required visible planning contract.",
25
31
  "Your first action for this request must be write_todos with concrete task steps and statuses.",
@@ -193,10 +199,13 @@ function hasUsefulVisibleSynthesis(value) {
193
199
  if (/^(?:model_request|tool_call|call_tool)/iu.test(trimmed)) {
194
200
  return false;
195
201
  }
202
+ if (/^(?:name|tool_call_id)\s*=/iu.test(trimmed)) {
203
+ return false;
204
+ }
196
205
  if (/^(?:we\s+need\s+to|so\s+next\s+step\b)/iu.test(trimmed)) {
197
206
  return false;
198
207
  }
199
- if (/^\{\s*"(?:name|arguments|todos|symbol|query|market|count)"\s*:/iu.test(trimmed)) {
208
+ if (/^\{\s*"(?:name|arguments|args|argv|todos|symbol|query|market|count|stdout|stderr|exitCode)"\s*:/iu.test(trimmed)) {
200
209
  return false;
201
210
  }
202
211
  if (/^(?:stdout|stderr|exitCode)\s*:/iu.test(trimmed)) {
@@ -702,6 +711,11 @@ export async function* streamRuntimeExecution(options) {
702
711
  const streamedIncompletePlanRecoveryInstruction = requiresPlanEvidence(options.binding) && streamedExecutionEvidence.hasIncompletePlanState
703
712
  ? CLOSE_REQUIRED_PLAN_RECOVERY_INSTRUCTION
704
713
  : null;
714
+ const streamedPrematurePlanCloseRecoveryInstruction = requiresPlanEvidence(options.binding)
715
+ && streamedExecutionEvidence.hasPlanStateEvidence
716
+ && !streamedExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
717
+ ? RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION
718
+ : null;
705
719
  const delegatedExecutionRecoveryInstruction = !emittedUnsafeStreamSideEffects || streamedDelegatedRecoveryInstruction
706
720
  ? streamedDelegatedRecoveryInstruction
707
721
  : null;
@@ -734,6 +748,7 @@ export async function* streamRuntimeExecution(options) {
734
748
  ? INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION
735
749
  : delegatedExecutionRecoveryInstruction
736
750
  ?? streamedIncompletePlanRecoveryInstruction
751
+ ?? streamedPrematurePlanCloseRecoveryInstruction
737
752
  ?? streamedRuntimeFailureRecoveryInstruction
738
753
  ?? missingPlanRecoveryInstruction
739
754
  ?? streamedDelegationOnlyRecoveryInstruction
@@ -4,10 +4,15 @@ import { canReplayToolCallsLocally } from "./tool/tool-replay.js";
4
4
  import { extractToolCallsFromResult, normalizeToolArgsForSchema, stringifyToolOutput } from "./tool/tool-arguments.js";
5
5
  import { extractMemoryCandidatesFromToolOutput } from "../harness/system/runtime-memory-candidates.js";
6
6
  import { maybePersistLargeToolOutput } from "./tool/tool-output-artifacts.js";
7
+ import { toolRequiresRuntimeApproval } from "./tool/tool-hitl.js";
8
+ import { validateToolGatewayInput } from "../harness/tool-gateway/index.js";
7
9
  import { appendToolRecoveryInstruction, extractVisibleOutput, resolveMissingPlanRecoveryInstruction, resolveExecutionWithoutToolEvidenceTextInstruction, resolveToolCallRecoveryInstruction, sanitizeVisibleText, STRICT_TOOL_JSON_INSTRUCTION, } from "../parsing/output-parsing.js";
8
10
  import { salvageJsonToolCalls } from "../parsing/output-tool-args.js";
9
11
  import { AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION } from "../prompts/runtime-prompts.js";
10
12
  const TOOL_FOLLOW_UP_INSTRUCTION = "One or more tool results are already available in this conversation. Answer the user's current request directly from the existing context and tool results. Do not ask the user to repeat inputs that are already present above.";
13
+ function isObject(value) {
14
+ return typeof value === "object" && value !== null && !Array.isArray(value);
15
+ }
11
16
  function readPlanStateSummary(output) {
12
17
  if (typeof output !== "object" || output === null) {
13
18
  return null;
@@ -38,32 +43,176 @@ function hasIncompleteExecutedPlan(executedToolResults) {
38
43
  }
39
44
  return false;
40
45
  }
46
+ function normalizeToolName(value) {
47
+ return typeof value === "string" ? value.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
48
+ }
41
49
  function hasNonTodoToolEvidence(executedToolResults) {
42
- return executedToolResults.some((item) => item.toolName !== "write_todos" && item.toolName !== "read_todos");
50
+ return executedToolResults.some((item) => !isPlanToolName(item.toolName));
43
51
  }
44
52
  function isPlanToolName(toolName) {
45
- return toolName === "write_todos"
46
- || toolName === "read_todos"
47
- || toolName === "tool_call_write_todos"
48
- || toolName === "tool_call_read_todos";
53
+ const normalized = normalizeToolName(toolName);
54
+ return normalized === "write_todos"
55
+ || normalized === "read_todos"
56
+ || normalized === "tool_call_write_todos"
57
+ || normalized === "tool_call_read_todos";
49
58
  }
50
59
  function isFallbackTodoCompletionToolCall(toolCall) {
51
60
  return typeof toolCall.id === "string"
52
61
  && toolCall.id.startsWith("fallback-complete-")
53
62
  && (toolCall.name === "write_todos" || toolCall.name === "tool_call_write_todos");
54
63
  }
55
- function isCompletedTodoUpdateToolCall(toolCall) {
56
- if (toolCall.name !== "write_todos" && toolCall.name !== "tool_call_write_todos") {
64
+ function isTerminalTodoUpdateToolCall(toolCall) {
65
+ if (!isPlanToolName(toolCall.name) || normalizeToolName(toolCall.name).includes("read_todos")) {
57
66
  return false;
58
67
  }
59
68
  if (typeof toolCall.args !== "object" || toolCall.args === null || !Array.isArray(toolCall.args.todos)) {
60
69
  return false;
61
70
  }
62
71
  const todos = toolCall.args.todos;
63
- return todos.length > 0 && todos.every((todo) => typeof todo === "object"
64
- && todo !== null
65
- && typeof todo.status === "string"
66
- && todo.status.trim().toLowerCase() === "completed");
72
+ return todos.length > 0 && todos.every((todo) => {
73
+ if (typeof todo !== "object" || todo === null || typeof todo.status !== "string") {
74
+ return false;
75
+ }
76
+ const status = todo.status.trim().toLowerCase();
77
+ return status !== "pending" && status !== "in_progress";
78
+ });
79
+ }
80
+ function readSchemaShape(schema) {
81
+ if (!isObject(schema)) {
82
+ return null;
83
+ }
84
+ if (isObject(schema.properties)) {
85
+ return schema.properties;
86
+ }
87
+ if (isObject(schema.shape)) {
88
+ return schema.shape;
89
+ }
90
+ const def = schema._def;
91
+ if (!def) {
92
+ return null;
93
+ }
94
+ const shape = typeof def.shape === "function" ? def.shape() : def.shape;
95
+ return isObject(shape) ? shape : null;
96
+ }
97
+ function readSchemaDescription(schemaPart) {
98
+ if (!isObject(schemaPart)) {
99
+ return "";
100
+ }
101
+ const direct = schemaPart.description;
102
+ if (typeof direct === "string") {
103
+ return direct;
104
+ }
105
+ const nested = schemaPart._def;
106
+ if (typeof nested?.description === "string") {
107
+ return nested.description;
108
+ }
109
+ return readSchemaDescription(nested?.innerType);
110
+ }
111
+ function readSchemaDefault(schemaPart) {
112
+ if (!isObject(schemaPart)) {
113
+ return undefined;
114
+ }
115
+ const typed = schemaPart;
116
+ const hasJsonDefault = Object.prototype.hasOwnProperty.call(schemaPart, "default") && typeof typed.default !== "function";
117
+ if (hasJsonDefault) {
118
+ return typed.default;
119
+ }
120
+ if (Object.prototype.hasOwnProperty.call(schemaPart, "const")) {
121
+ return typed.const;
122
+ }
123
+ const def = schemaPart._def;
124
+ if (!def) {
125
+ return undefined;
126
+ }
127
+ if (def.defaultValue !== undefined) {
128
+ return typeof def.defaultValue === "function" ? def.defaultValue() : def.defaultValue;
129
+ }
130
+ return readSchemaDefault(def.innerType);
131
+ }
132
+ function parseFirstStringArrayExample(description) {
133
+ const arrayMatch = description.match(/\[[^\]]+\]/u);
134
+ if (!arrayMatch) {
135
+ return null;
136
+ }
137
+ const values = [...arrayMatch[0].matchAll(/["']([^"']+)["']/gu)].map((match) => match[1]).filter(Boolean);
138
+ return values.length > 0 ? values : null;
139
+ }
140
+ function buildGenericFallbackArgsFromSchema(schema, latestUserInput) {
141
+ const shape = readSchemaShape(schema);
142
+ if (!shape) {
143
+ return {};
144
+ }
145
+ const args = {};
146
+ for (const [key, schemaPart] of Object.entries(shape)) {
147
+ const defaultValue = readSchemaDefault(schemaPart);
148
+ if (defaultValue !== undefined) {
149
+ args[key] = defaultValue;
150
+ continue;
151
+ }
152
+ const description = readSchemaDescription(schemaPart);
153
+ const arrayExample = parseFirstStringArrayExample(description);
154
+ if (arrayExample) {
155
+ args[key] = arrayExample;
156
+ continue;
157
+ }
158
+ if (latestUserInput
159
+ && !args[key]
160
+ && /(?:query|question|prompt|input|text)/iu.test(`${key} ${description}`)) {
161
+ args[key] = latestUserInput;
162
+ }
163
+ }
164
+ return args;
165
+ }
166
+ function readTodoPlanTextFromToolCalls(toolCalls) {
167
+ const fragments = [];
168
+ for (const toolCall of toolCalls) {
169
+ if (typeof toolCall.args !== "object" || toolCall.args === null) {
170
+ continue;
171
+ }
172
+ const todos = toolCall.args.todos;
173
+ if (!Array.isArray(todos)) {
174
+ continue;
175
+ }
176
+ for (const todo of todos) {
177
+ if (typeof todo === "object" && todo !== null && typeof todo.content === "string") {
178
+ fragments.push(todo.content);
179
+ }
180
+ }
181
+ }
182
+ return fragments.join("\n");
183
+ }
184
+ function selectGenericFallbackEvidenceTool(params) {
185
+ const candidates = [];
186
+ const appendCandidate = (name) => {
187
+ if (isPlanToolName(name)) {
188
+ return;
189
+ }
190
+ const resolved = resolveModelFacingToolName(name, params.toolNameMapping, params.primaryTools);
191
+ const executable = params.executableTools.get(name)
192
+ ?? params.executableTools.get(resolved)
193
+ ?? params.builtinExecutableTools.get(name)
194
+ ?? params.builtinExecutableTools.get(resolved);
195
+ if (!executable || candidates.some((candidate) => candidate.executable.name === executable.name)) {
196
+ return;
197
+ }
198
+ candidates.push({ requestedName: name, executable });
199
+ };
200
+ for (const tool of params.primaryTools) {
201
+ appendCandidate(tool.name);
202
+ const modelFacing = params.toolNameMapping.originalToModelFacing.get(tool.name);
203
+ if (modelFacing) {
204
+ appendCandidate(modelFacing);
205
+ }
206
+ }
207
+ for (const name of [...params.executableTools.keys(), ...params.builtinExecutableTools.keys()]) {
208
+ appendCandidate(name);
209
+ }
210
+ if (candidates.length === 0) {
211
+ return null;
212
+ }
213
+ const normalizedPlanText = params.planText.toLowerCase();
214
+ return candidates.find((candidate) => normalizedPlanText.includes(candidate.requestedName.toLowerCase())
215
+ || normalizedPlanText.includes(candidate.executable.name.toLowerCase())) ?? candidates[0];
67
216
  }
68
217
  function buildDeterministicFinalFromToolEvidence(executedToolResults) {
69
218
  const evidence = executedToolResults
@@ -92,6 +241,11 @@ function latestToolErrorRecoveryInstruction(executedToolResults) {
92
241
  if (!latest || latest.isError !== true) {
93
242
  return null;
94
243
  }
244
+ if (typeof latest.output === "object" &&
245
+ latest.output !== null &&
246
+ latest.output.code === "INVALID_ARGUMENTS") {
247
+ return null;
248
+ }
95
249
  const message = typeof latest.output === "string" ? latest.output : JSON.stringify(latest.output);
96
250
  return resolveToolCallRecoveryInstruction(new Error(message)) ?? AUTONOMOUS_INVESTIGATION_RECOVERY_INSTRUCTION;
97
251
  }
@@ -140,12 +294,19 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
140
294
  const hasIncompletePlanState = hasIncompleteExecutedPlan(executedToolResults);
141
295
  const shouldEnforceIncompletePlan = requiresPlanEvidence(binding) && hasIncompletePlanState;
142
296
  const hasExecutionBeyondTodoPlanning = hasNonTodoToolEvidence(executedToolResults);
297
+ const missingInitialPlanRecoveryInstruction = resolveMissingPlanRecoveryInstruction({
298
+ request: activeRequest,
299
+ requiresPlan: requiresPlanEvidence(binding),
300
+ hasPlanStateEvidence: hasPlanStateEvidence(executedToolResults),
301
+ hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName === "write_todos"),
302
+ hasToolResultEvidence: hasExecutionBeyondTodoPlanning,
303
+ });
143
304
  const toolErrorRecoveryInstruction = latestToolErrorRecoveryInstruction(executedToolResults)
144
305
  ?? terminalToolErrorRecoveryInstruction(terminalText);
145
306
  const leakedJsonToolCallRecoveryInstruction = terminalText && salvageJsonToolCalls(terminalText).length > 0
146
307
  ? STRICT_TOOL_JSON_INSTRUCTION
147
308
  : null;
148
- const recoveryInstruction = toolErrorRecoveryInstruction ?? leakedJsonToolCallRecoveryInstruction ?? (terminalText
309
+ const recoveryInstruction = toolErrorRecoveryInstruction ?? leakedJsonToolCallRecoveryInstruction ?? missingInitialPlanRecoveryInstruction ?? (terminalText
149
310
  ? resolveExecutionWithoutToolEvidenceTextInstruction(activeRequest, terminalText, false, {
150
311
  hasWriteTodosEvidence: executedToolResults.some((item) => item.toolName === "write_todos"),
151
312
  hasToolResultEvidence: hasExecutionBeyondTodoPlanning,
@@ -197,6 +358,7 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
197
358
  role: "system",
198
359
  content: TOOL_FOLLOW_UP_INSTRUCTION,
199
360
  });
361
+ const hadNonTodoEvidenceBeforeToolReplay = hasNonTodoToolEvidence(executedToolResults);
200
362
  for (let toolIndex = 0; toolIndex < toolCalls.length; toolIndex += 1) {
201
363
  const toolCall = toolCalls[toolIndex];
202
364
  const resolvedToolName = resolveModelFacingToolName(toolCall.name, toolNameMapping, primaryTools);
@@ -214,9 +376,28 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
214
376
  const normalizedArgs = normalizeToolArgsForSchema(toolCall.args, activeExecutable.schema, toolCall.rawArgsInput, {
215
377
  latestUserInput,
216
378
  });
379
+ const gateway = validateToolGatewayInput({
380
+ toolName: activeExecutable.name,
381
+ schema: activeExecutable.schema,
382
+ args: normalizedArgs,
383
+ requiresApproval: compiledTool ? toolRequiresRuntimeApproval(compiledTool) : false,
384
+ });
385
+ if (!gateway.ok) {
386
+ executedToolResults.push({
387
+ toolName: activeExecutable.name,
388
+ output: gateway.error,
389
+ isError: true,
390
+ });
391
+ nextMessages.push(new ToolMessage({
392
+ name: activeExecutable.name,
393
+ tool_call_id: toolCall.id ?? `tool-${iteration + 1}-${toolIndex + 1}`,
394
+ content: stringifyToolOutput(gateway.error),
395
+ }));
396
+ continue;
397
+ }
217
398
  const toolResult = toolRuntimeContext
218
- ? await activeExecutable.invoke(normalizedArgs, { toolRuntimeContext })
219
- : await activeExecutable.invoke(normalizedArgs);
399
+ ? await activeExecutable.invoke(gateway.input, { toolRuntimeContext })
400
+ : await activeExecutable.invoke(gateway.input);
220
401
  const memoryCandidates = compiledTool ? extractMemoryCandidatesFromToolOutput(compiledTool, toolResult) : [];
221
402
  const safeToolResult = await maybePersistLargeToolOutput({
222
403
  toolName: activeExecutable.name,
@@ -234,6 +415,53 @@ export async function runLocalToolInvocationLoop({ binding, request, primaryTool
234
415
  content: stringifyToolOutput(safeToolResult),
235
416
  }));
236
417
  }
418
+ if (requiresPlanEvidence(binding)
419
+ && !hadNonTodoEvidenceBeforeToolReplay
420
+ && !hasNonTodoToolEvidence(executedToolResults)
421
+ && toolCalls.length > 0
422
+ && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))
423
+ && toolCalls.some(isTerminalTodoUpdateToolCall)) {
424
+ const fallbackEvidenceTool = selectGenericFallbackEvidenceTool({
425
+ planText: readTodoPlanTextFromToolCalls(toolCalls),
426
+ primaryTools,
427
+ toolNameMapping,
428
+ executableTools,
429
+ builtinExecutableTools,
430
+ });
431
+ if (fallbackEvidenceTool) {
432
+ const fallbackArgs = buildGenericFallbackArgsFromSchema(fallbackEvidenceTool.executable.schema, latestUserInput);
433
+ const normalizedArgs = normalizeToolArgsForSchema(fallbackArgs, fallbackEvidenceTool.executable.schema, undefined, {
434
+ latestUserInput,
435
+ });
436
+ const compiledTool = toolCatalog.get(fallbackEvidenceTool.requestedName) ?? toolCatalog.get(fallbackEvidenceTool.executable.name);
437
+ const gateway = validateToolGatewayInput({
438
+ toolName: fallbackEvidenceTool.executable.name,
439
+ schema: fallbackEvidenceTool.executable.schema,
440
+ args: normalizedArgs,
441
+ requiresApproval: compiledTool ? toolRequiresRuntimeApproval(compiledTool) : false,
442
+ });
443
+ if (gateway.ok) {
444
+ const toolResult = toolRuntimeContext
445
+ ? await fallbackEvidenceTool.executable.invoke(gateway.input, { toolRuntimeContext })
446
+ : await fallbackEvidenceTool.executable.invoke(gateway.input);
447
+ const memoryCandidates = compiledTool ? extractMemoryCandidatesFromToolOutput(compiledTool, toolResult) : [];
448
+ const safeToolResult = await maybePersistLargeToolOutput({
449
+ toolName: fallbackEvidenceTool.executable.name,
450
+ output: toolResult,
451
+ toolRuntimeContext: toolRuntimeContext,
452
+ });
453
+ executedToolResults.push({
454
+ toolName: fallbackEvidenceTool.executable.name,
455
+ output: safeToolResult,
456
+ ...(memoryCandidates.length > 0 ? { memoryCandidates } : {}),
457
+ });
458
+ return {
459
+ result: buildDeterministicFinalFromToolEvidence(executedToolResults),
460
+ executedToolResults,
461
+ };
462
+ }
463
+ }
464
+ }
237
465
  if (requiresPlanEvidence(binding)
238
466
  && toolCalls.length > 0
239
467
  && toolCalls.every((toolCall) => isPlanToolName(toolCall.name))
@@ -418,7 +418,7 @@ export async function invokeBuiltinTaskTool(input) {
418
418
  if (!hasSubagentExecutionToolEvidence(result, resolvedSubagentTools, selectedCompiledSubagent?.tools)) {
419
419
  result = await invokeSubagent([description, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION].filter(Boolean).join("\n\n"));
420
420
  if (!hasSubagentExecutionToolEvidence(result, resolvedSubagentTools, selectedCompiledSubagent?.tools)) {
421
- throw new Error(`Delegated agent ${selectedSubagent.name} completed without tool execution evidence.`);
421
+ throw new Error(`Delegated agent ${selectedSubagent.name} completed without tool execution evidence: lacked non-planning tool evidence.`);
422
422
  }
423
423
  }
424
424
  const structuredResponse = typeof result === "object" && result !== null && "structuredResponse" in result
@@ -130,7 +130,7 @@ function isIncidentFollowUpTurn(inputText) {
130
130
  if (!normalized || hasExplicitResourceReference(normalized)) {
131
131
  return false;
132
132
  }
133
- return /(the rca|deep research.*rca|root cause|go deeper|those issues|these issues|that issue|current incident|kubernetes issues)/i.test(normalized);
133
+ return /(the rca|deep research.*rca|root cause|go deeper|those issues|these issues|that issue|current incident)/i.test(normalized);
134
134
  }
135
135
  function findLastAssistantText(history) {
136
136
  for (let index = history.length - 1; index >= 0; index -= 1) {