@botbotgo/agent-harness 0.0.441 → 0.0.443
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/runtime/adapter/flow/invocation-flow.js +3 -2
- package/dist/runtime/adapter/flow/stream-runtime.js +19 -5
- package/dist/runtime/adapter/model/model-providers.js +3 -5
- package/dist/runtime/adapter/model/prompted-json-tool-policy.d.ts +4 -0
- package/dist/runtime/adapter/model/prompted-json-tool-policy.js +22 -0
- package/dist/runtime/agent-runtime-adapter.js +6 -6
- package/dist/runtime/harness/run/stream-run.js +2 -5
- package/package.json +1 -1
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export declare const AGENT_HARNESS_VERSION = "0.0.443";
|
|
2
2
|
export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
|
package/dist/package-version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export const AGENT_HARNESS_VERSION = "0.0.443";
|
|
2
2
|
export const AGENT_HARNESS_RELEASE_DATE = "2026-05-04";
|
|
@@ -3,6 +3,7 @@ import { buildBindingToolExecutionContext } from "./execution-context.js";
|
|
|
3
3
|
import { finalizeRequestResult } from "../invocation-result.js";
|
|
4
4
|
import { invokeRuntimeWithLocalTools } from "./invoke-runtime.js";
|
|
5
5
|
import { buildInvocationRequest } from "../model/invocation-request.js";
|
|
6
|
+
import { withPromptedJsonToolPolicy } from "../model/prompted-json-tool-policy.js";
|
|
6
7
|
import { UPSTREAM_REQUEST_CONFIG_KEY, UPSTREAM_SESSION_CONFIG_KEY } from "../upstream-configurable-keys.js";
|
|
7
8
|
import { appendToolRecoveryInstruction, extractVisibleOutput, tryParseJson } from "../../parsing/output-parsing.js";
|
|
8
9
|
import { salvageJsonToolCalls } from "../../parsing/output-tool-args.js";
|
|
@@ -311,7 +312,7 @@ export async function executeRequestInvocation(options) {
|
|
|
311
312
|
if (options.resumePayload === undefined
|
|
312
313
|
&& options.binding.harnessRuntime.executionContract?.requiresPlan === true
|
|
313
314
|
&& invokeOptions.suppressInitialRequiredPlanInstruction !== true) {
|
|
314
|
-
request = appendToolRecoveryInstruction(request, INITIAL_REQUIRED_PLAN_INSTRUCTION);
|
|
315
|
+
request = withPromptedJsonToolPolicy(appendToolRecoveryInstruction(request, INITIAL_REQUIRED_PLAN_INSTRUCTION), "planning");
|
|
315
316
|
}
|
|
316
317
|
const { primaryTools, toolNameMapping, executableTools, defersToUpstreamHitlExecution, } = buildBindingToolExecutionContext({
|
|
317
318
|
binding: options.binding,
|
|
@@ -368,7 +369,7 @@ export async function executeRequestInvocation(options) {
|
|
|
368
369
|
? result.messages
|
|
369
370
|
: undefined;
|
|
370
371
|
const recoveryBase = messages ? { messages } : request;
|
|
371
|
-
const recoveredRequest = appendToolRecoveryInstruction(recoveryBase, WRITE_TODOS_REQUIRED_PLAN_INSTRUCTION);
|
|
372
|
+
const recoveredRequest = withPromptedJsonToolPolicy(appendToolRecoveryInstruction(recoveryBase, WRITE_TODOS_REQUIRED_PLAN_INSTRUCTION), "planning");
|
|
372
373
|
const recoveredInvocation = await invokeOnce(recoveredRequest);
|
|
373
374
|
result = recoveredInvocation.result;
|
|
374
375
|
executedToolResults.splice(0, executedToolResults.length, ...recoveredInvocation.executedToolResults);
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { extractVisibleOutput, isToolCallRecoveryFailure, isRetrySafeInvalidToolSelectionError, appendToolRecoveryInstruction, resolveMissingPlanRecoveryInstruction, resolveExecutionWithoutToolEvidenceTextInstruction, shouldValidateExecutionWithoutToolEvidence, resolveToolCallRecoveryInstruction, sanitizeVisibleText, EXECUTION_WITH_TOOL_EVIDENCE_RETRY_INSTRUCTION, INVALID_TOOL_SELECTION_RECOVERY_INSTRUCTION, } from "../../parsing/output-parsing.js";
|
|
2
2
|
import { DELEGATED_TASK_FAILURE_RECOVERY_INSTRUCTION, DELEGATION_ONLY_RECOVERY_INSTRUCTION, } from "../../prompts/runtime-prompts.js";
|
|
3
3
|
import { buildInvocationRequest } from "../model/invocation-request.js";
|
|
4
|
+
import { PROMPTED_JSON_TOOL_POLICY_KEY, withPromptedJsonToolPolicy } from "../model/prompted-json-tool-policy.js";
|
|
4
5
|
import { buildRawModelMessages } from "../model/message-assembly.js";
|
|
5
6
|
import { projectRuntimeStreamEvent, createStreamEventProjectionState } from "../stream-event-projection.js";
|
|
6
7
|
import { projectTextStreamChunks } from "../stream-text-consumption.js";
|
|
@@ -293,6 +294,15 @@ function withSuppressedInitialRequiredPlanInstruction(options) {
|
|
|
293
294
|
suppressInitialRequiredPlanInstruction: true,
|
|
294
295
|
};
|
|
295
296
|
}
|
|
297
|
+
function withNonPlanningEvidenceToolPolicy(options) {
|
|
298
|
+
return {
|
|
299
|
+
...options,
|
|
300
|
+
state: {
|
|
301
|
+
...(typeof options.state === "object" && options.state !== null ? options.state : {}),
|
|
302
|
+
[PROMPTED_JSON_TOOL_POLICY_KEY]: "nonPlanningEvidence",
|
|
303
|
+
},
|
|
304
|
+
};
|
|
305
|
+
}
|
|
296
306
|
function hasParentLocalToolExecutionAfterDelegationFailure(originalEvidence, executedToolResults) {
|
|
297
307
|
return originalEvidence.hasFailedTaskDelegation
|
|
298
308
|
&& executedToolResults.some((item) => item.toolName !== "task");
|
|
@@ -575,7 +585,7 @@ function projectLocalToolExecutionProfileChunks(executedToolResults, prefix) {
|
|
|
575
585
|
export async function* streamRuntimeExecution(options) {
|
|
576
586
|
let request = buildInvocationRequest(options.binding, options.history, options.input, options.runtimeOptions);
|
|
577
587
|
if (requiresPlanEvidence(options.binding)) {
|
|
578
|
-
request = appendToolRecoveryInstruction(request, buildInitialRequiredPlanInstruction(options.primaryTools));
|
|
588
|
+
request = withPromptedJsonToolPolicy(appendToolRecoveryInstruction(request, buildInitialRequiredPlanInstruction(options.primaryTools)), "planning");
|
|
579
589
|
}
|
|
580
590
|
let emittedUnsafeStreamSideEffects = false;
|
|
581
591
|
const shouldProfile = options.runtimeOptions.profiling === true;
|
|
@@ -624,7 +634,11 @@ export async function* streamRuntimeExecution(options) {
|
|
|
624
634
|
yield modelStreamStart.chunk;
|
|
625
635
|
let stream;
|
|
626
636
|
try {
|
|
627
|
-
|
|
637
|
+
const rawStreamInput = buildRawModelMessages(options.binding, options.getSystemPrompt(options.binding), options.history, options.input, options.runtimeOptions.memoryContext);
|
|
638
|
+
const streamInput = requiresPlanEvidence(options.binding)
|
|
639
|
+
? withPromptedJsonToolPolicy(rawStreamInput, "planning")
|
|
640
|
+
: rawStreamInput;
|
|
641
|
+
stream = await options.withTimeout(() => options.langChainStreamModel.stream(streamInput), computeRemainingTimeoutMs(options.streamDeadlineAt, options.invokeTimeoutMs), "model stream start", "stream");
|
|
628
642
|
if (shouldProfile)
|
|
629
643
|
yield finishProfileStep({
|
|
630
644
|
id: "profile:agent:model-stream-start",
|
|
@@ -940,7 +954,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
940
954
|
if (earlyStreamRecoveryInstruction) {
|
|
941
955
|
const earlyRecoveryRuntimeOptions = earlyStreamRecoverySuppressInitialPlan
|
|
942
956
|
? {
|
|
943
|
-
...withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions),
|
|
957
|
+
...withNonPlanningEvidenceToolPolicy(withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions)),
|
|
944
958
|
externalPlanEvidence: true,
|
|
945
959
|
...(earlyStreamExternalPlanEvidenceTools && earlyStreamExternalPlanEvidenceTools.length > 0
|
|
946
960
|
? { externalPlanEvidenceTools: earlyStreamExternalPlanEvidenceTools }
|
|
@@ -1062,7 +1076,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
1062
1076
|
if (retryInstruction) {
|
|
1063
1077
|
const retryRuntimeOptions = retryInstruction === streamedIncompletePlanRecoveryInstruction
|
|
1064
1078
|
|| retryInstruction === streamedPrematurePlanCloseRecoveryInstruction
|
|
1065
|
-
? withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions)
|
|
1079
|
+
? withNonPlanningEvidenceToolPolicy(withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions))
|
|
1066
1080
|
: options.runtimeOptions;
|
|
1067
1081
|
let retried;
|
|
1068
1082
|
retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, retryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, retryRuntimeOptions);
|
|
@@ -1299,7 +1313,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
1299
1313
|
const invokeFallbackRuntimeOptions = effectiveInvokeFallbackRecoveryInstruction === invokeFallbackIncompletePlanRecoveryInstruction
|
|
1300
1314
|
|| effectiveInvokeFallbackRecoveryInstruction === invokeFallbackPlanWithoutEvidenceRecoveryInstruction
|
|
1301
1315
|
? {
|
|
1302
|
-
...withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions),
|
|
1316
|
+
...withNonPlanningEvidenceToolPolicy(withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions)),
|
|
1303
1317
|
externalPlanEvidence: true,
|
|
1304
1318
|
}
|
|
1305
1319
|
: options.runtimeOptions;
|
|
@@ -10,6 +10,7 @@ import { salvageJsonToolCalls } from "../../parsing/output-tool-args.js";
|
|
|
10
10
|
import { normalizeModelFacingToolSchema } from "../tool/resolved-tool.js";
|
|
11
11
|
import { normalizeOpenAICompatibleInit } from "../compat/openai-compatible.js";
|
|
12
12
|
import { recordPromptedJsonToolCall } from "./prompted-json-tool-call-capture.js";
|
|
13
|
+
import { readPromptedJsonToolPolicy } from "./prompted-json-tool-policy.js";
|
|
13
14
|
const NODE_LLAMA_CPP_TOOL_CALL_INSTRUCTION = [
|
|
14
15
|
"Available tools are listed below.",
|
|
15
16
|
"If you need a tool, respond with only one JSON object.",
|
|
@@ -342,8 +343,7 @@ function hasPriorPlanningToolResult(input) {
|
|
|
342
343
|
|| hasPriorToolResultForToolName(input, "call_read_todos");
|
|
343
344
|
}
|
|
344
345
|
function shouldLimitToolsToPlanning(input, boundTools) {
|
|
345
|
-
|
|
346
|
-
return text.includes("required visible planning contract")
|
|
346
|
+
return readPromptedJsonToolPolicy(input) === "planning"
|
|
347
347
|
&& !hasPriorToolResultForToolName(input, "write_todos")
|
|
348
348
|
&& !hasPriorToolResultForToolName(input, "tool_call_write_todos")
|
|
349
349
|
&& !hasPriorToolResultForToolName(input, "call_write_todos")
|
|
@@ -357,9 +357,7 @@ function selectPlanningToolsForTurn(input, boundTools) {
|
|
|
357
357
|
return planningTools.length > 0 ? planningTools : boundTools;
|
|
358
358
|
}
|
|
359
359
|
function shouldLimitToolsToNonPlanningEvidence(input, boundTools) {
|
|
360
|
-
|
|
361
|
-
const hasNonPlanningEvidenceInstruction = /non[-\s]?planning (?:evidence )?tool call|non[-\s]?TODO evidence tool|Do not call write_todos|Do not call write_todos or read_todos/i.test(text);
|
|
362
|
-
return (hasPriorPlanningToolResult(input) || hasNonPlanningEvidenceInstruction)
|
|
360
|
+
return (hasPriorPlanningToolResult(input) || readPromptedJsonToolPolicy(input) === "nonPlanningEvidence")
|
|
363
361
|
&& !hasPriorNonPlanningToolResult(input, boundTools);
|
|
364
362
|
}
|
|
365
363
|
function selectNonPlanningToolsForTurn(boundTools) {
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export declare const PROMPTED_JSON_TOOL_POLICY_KEY = "__agentHarnessPromptedJsonToolPolicy";
|
|
2
|
+
export type PromptedJsonToolPolicy = "planning" | "nonPlanningEvidence";
|
|
3
|
+
export declare function readPromptedJsonToolPolicy(input: unknown): PromptedJsonToolPolicy | undefined;
|
|
4
|
+
export declare function withPromptedJsonToolPolicy<T>(input: T, policy: PromptedJsonToolPolicy): T;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export const PROMPTED_JSON_TOOL_POLICY_KEY = "__agentHarnessPromptedJsonToolPolicy";
|
|
2
|
+
export function readPromptedJsonToolPolicy(input) {
|
|
3
|
+
if (typeof input !== "object" || input === null) {
|
|
4
|
+
return undefined;
|
|
5
|
+
}
|
|
6
|
+
const value = input[PROMPTED_JSON_TOOL_POLICY_KEY];
|
|
7
|
+
return value === "planning" || value === "nonPlanningEvidence" ? value : undefined;
|
|
8
|
+
}
|
|
9
|
+
export function withPromptedJsonToolPolicy(input, policy) {
|
|
10
|
+
if (typeof input !== "object" || input === null) {
|
|
11
|
+
return input;
|
|
12
|
+
}
|
|
13
|
+
if (Array.isArray(input)) {
|
|
14
|
+
return Object.assign([...input], {
|
|
15
|
+
[PROMPTED_JSON_TOOL_POLICY_KEY]: policy,
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
return {
|
|
19
|
+
...input,
|
|
20
|
+
[PROMPTED_JSON_TOOL_POLICY_KEY]: policy,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
@@ -45,13 +45,13 @@ function buildDelegatedPlanEvidenceBlocker(agentId) {
|
|
|
45
45
|
routing: [`delegated agent ${agentId}`],
|
|
46
46
|
plan: ["delegate to specialist", "require visible TODO planning evidence", "return blocker when planning evidence is absent"],
|
|
47
47
|
execution: [`task delegated to ${agentId}`, `delegated agent ${agentId} ended before producing required TODO plan evidence`],
|
|
48
|
-
todoTrace: [
|
|
48
|
+
todoTrace: [`${agentId}: TODO evidence missing; delegated agent ended before producing required planning evidence.`],
|
|
49
49
|
stepResults: ["delegated planning evidence was not observed"],
|
|
50
50
|
summary: [`Delegated agent ${agentId} ended before producing the required TODO plan evidence.`],
|
|
51
51
|
findings: ["The delegated run did not expose a valid planning trace, so the framework cannot treat the task as complete."],
|
|
52
52
|
blockers: ["missing delegated TODO planning evidence"],
|
|
53
53
|
nextActions: ["Retry with the same request or inspect the delegated agent configuration and model/tool-call behavior."],
|
|
54
|
-
report: `routing delegated to ${agentId}; todoTrace missing; stepResults blocked; summary missing planning evidence; findings require retry; blockers missing TODO planning evidence; nextActions inspect delegated model/tool behavior; report task delegated to ${agentId}.`,
|
|
54
|
+
report: `routing delegated to ${agentId}; todoTrace ${agentId}: TODO evidence missing; stepResults blocked; summary missing planning evidence; findings require retry; blockers missing TODO planning evidence; nextActions inspect delegated model/tool behavior; report task delegated to ${agentId}.`,
|
|
55
55
|
});
|
|
56
56
|
}
|
|
57
57
|
function buildDelegatedExecutionEvidenceBlocker(agentId, expectedToolNames = []) {
|
|
@@ -65,7 +65,7 @@ function buildDelegatedExecutionEvidenceBlocker(agentId, expectedToolNames = [])
|
|
|
65
65
|
`expected evidence tools: ${expectedTools}`,
|
|
66
66
|
`delegated agent ${agentId} did not return any non-planning tool evidence after retry`,
|
|
67
67
|
],
|
|
68
|
-
todoTrace: [
|
|
68
|
+
todoTrace: [`${agentId}: TODO evidence observed; delegated planning board did not produce completed non-planning evidence.`],
|
|
69
69
|
stepResults: ["delegated execution evidence was not observed"],
|
|
70
70
|
summary: [`Delegated agent ${agentId} did not return any non-planning tool evidence after retry.`],
|
|
71
71
|
findings: [
|
|
@@ -75,7 +75,7 @@ function buildDelegatedExecutionEvidenceBlocker(agentId, expectedToolNames = [])
|
|
|
75
75
|
],
|
|
76
76
|
blockers: ["missing delegated non-planning tool evidence"],
|
|
77
77
|
nextActions: ["Retry the request or inspect the delegated agent's model/tool-call behavior."],
|
|
78
|
-
report: `routing delegated to ${agentId}; todoTrace observed planning
|
|
78
|
+
report: `routing delegated to ${agentId}; todoTrace ${agentId}: TODO evidence observed but non-planning evidence missing; stepResults blocked; summary missing non-planning tool evidence; findings expected evidence tools ${expectedTools}; blockers missing execution evidence; nextActions inspect delegated model/tool behavior; report task delegated to ${agentId}.`,
|
|
79
79
|
});
|
|
80
80
|
}
|
|
81
81
|
function normalizePlanToolName(toolName) {
|
|
@@ -1562,8 +1562,8 @@ export class AgentRuntimeAdapter {
|
|
|
1562
1562
|
const fallbackTodoTrace = [
|
|
1563
1563
|
`1) TODO observed: delegated to ${delegatedSubagentType}.`,
|
|
1564
1564
|
uniqueToolNames.some((toolName) => isPlanToolName(toolName))
|
|
1565
|
-
?
|
|
1566
|
-
:
|
|
1565
|
+
? `2) ${delegatedSubagentType}: TODO evidence observed; delegated specialist invoked write_todos.`
|
|
1566
|
+
: `2) ${delegatedSubagentType}: TODO evidence missing; delegated specialist did not expose write_todos in returned metadata.`,
|
|
1567
1567
|
state === "failed"
|
|
1568
1568
|
? "3) TODO closed: delegated execution failed; blocker reported."
|
|
1569
1569
|
: "3) TODO closed: delegated execution completed; synthesis returned.",
|
|
@@ -54,10 +54,7 @@ function inferPlanItemStatusFromTerminalAssistantOutput(value) {
|
|
|
54
54
|
if (!normalized) {
|
|
55
55
|
return null;
|
|
56
56
|
}
|
|
57
|
-
if (normalized.startsWith("runtime_error=")
|
|
58
|
-
|| /\bterminated\b/i.test(normalized)
|
|
59
|
-
|| /\b(?:blocked|blocker|failed|failure|refused|unable to complete|could not complete)\b/i.test(normalized)
|
|
60
|
-
|| /(?:执行失败|未能完成|无法完成|阻塞|失败)/u.test(normalized)) {
|
|
57
|
+
if (normalized.startsWith("runtime_error=")) {
|
|
61
58
|
return "failed";
|
|
62
59
|
}
|
|
63
60
|
return isSubstantiveTerminalAssistantOutput(value) ? "completed" : null;
|
|
@@ -1065,7 +1062,7 @@ export async function* streamHarnessRun(options) {
|
|
|
1065
1062
|
if (terminalAssistantPlanItemStatus === "failed"
|
|
1066
1063
|
&& sawSuccessfulToolResult
|
|
1067
1064
|
&& !explicitTerminalAssistantStatus
|
|
1068
|
-
&&
|
|
1065
|
+
&& !sanitizeVisibleText(assistantOutput).trim().toLowerCase().startsWith("runtime_error=")
|
|
1069
1066
|
&& assistantOutput.trim()) {
|
|
1070
1067
|
terminalAssistantPlanItemStatus = "completed";
|
|
1071
1068
|
}
|