@botbotgo/agent-harness 0.0.418 → 0.0.419
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/runtime/adapter/compat/openai-compatible.js +12 -0
- package/dist/runtime/adapter/flow/invocation-flow.d.ts +2 -0
- package/dist/runtime/adapter/flow/invocation-flow.js +13 -5
- package/dist/runtime/adapter/flow/invoke-runtime.d.ts +1 -0
- package/dist/runtime/adapter/flow/invoke-runtime.js +1 -0
- package/dist/runtime/adapter/flow/stream-runtime.d.ts +4 -0
- package/dist/runtime/adapter/flow/stream-runtime.js +177 -14
- package/dist/runtime/adapter/invocation-result.js +17 -6
- package/dist/runtime/adapter/local-tool-invocation.d.ts +2 -1
- package/dist/runtime/adapter/local-tool-invocation.js +241 -21
- package/dist/runtime/adapter/model/model-providers.js +261 -58
- package/dist/runtime/adapter/model/prompted-json-tool-call-capture.d.ts +9 -0
- package/dist/runtime/adapter/model/prompted-json-tool-call-capture.js +40 -0
- package/dist/runtime/adapter/runtime-adapter-support.js +58 -12
- package/dist/runtime/adapter/runtime-shell.js +3 -2
- package/dist/runtime/adapter/stream-event-projection.js +22 -5
- package/dist/runtime/adapter/tool/tool-arguments.js +157 -67
- package/dist/runtime/adapter/tool/tool-replay.js +0 -4
- package/dist/runtime/agent-runtime-adapter.d.ts +3 -0
- package/dist/runtime/agent-runtime-adapter.js +217 -73
- package/dist/runtime/harness/run/stream-run.js +20 -1
- package/dist/workspace/resource-compilers.js +17 -4
- package/package.json +1 -1
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export declare const AGENT_HARNESS_VERSION = "0.0.419";
|
|
2
2
|
export declare const AGENT_HARNESS_RELEASE_DATE = "2026-05-02";
|
package/dist/package-version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export const AGENT_HARNESS_VERSION = "0.0.419";
|
|
2
2
|
export const AGENT_HARNESS_RELEASE_DATE = "2026-05-02";
|
|
@@ -27,6 +27,7 @@ export function buildAuthOmittingFetch(baseFetch = fetch) {
|
|
|
27
27
|
export function normalizeOpenAICompatibleInit(init) {
|
|
28
28
|
const normalized = { ...init };
|
|
29
29
|
const configuration = asObject(init.configuration) ?? {};
|
|
30
|
+
const modelKwargs = asObject(init.modelKwargs) ?? {};
|
|
30
31
|
const baseUrl = typeof init.baseUrl === "string" && init.baseUrl.trim() ? init.baseUrl.trim() : undefined;
|
|
31
32
|
const omitAuthHeader = init.omitAuthHeader === true || isPlaceholderApiKey(init.apiKey);
|
|
32
33
|
const nextConfiguration = { ...configuration };
|
|
@@ -36,8 +37,19 @@ export function normalizeOpenAICompatibleInit(init) {
|
|
|
36
37
|
if (omitAuthHeader) {
|
|
37
38
|
nextConfiguration.fetch = buildAuthOmittingFetch(typeof configuration.fetch === "function" ? configuration.fetch : fetch);
|
|
38
39
|
}
|
|
40
|
+
if (typeof init.numPredict === "number" && typeof normalized.maxTokens !== "number") {
|
|
41
|
+
normalized.maxTokens = init.numPredict;
|
|
42
|
+
}
|
|
43
|
+
if (typeof init.numCtx === "number" && typeof modelKwargs.num_ctx !== "number") {
|
|
44
|
+
normalized.modelKwargs = {
|
|
45
|
+
...modelKwargs,
|
|
46
|
+
num_ctx: init.numCtx,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
39
49
|
normalized.configuration = nextConfiguration;
|
|
40
50
|
delete normalized.baseUrl;
|
|
41
51
|
delete normalized.omitAuthHeader;
|
|
52
|
+
delete normalized.numPredict;
|
|
53
|
+
delete normalized.numCtx;
|
|
42
54
|
return normalized;
|
|
43
55
|
}
|
|
@@ -14,6 +14,8 @@ export declare function executeRequestInvocation(options: {
|
|
|
14
14
|
files?: Record<string, unknown>;
|
|
15
15
|
memoryContext?: string;
|
|
16
16
|
toolRuntimeContext?: Record<string, unknown>;
|
|
17
|
+
suppressInitialRequiredPlanInstruction?: boolean;
|
|
18
|
+
externalPlanEvidence?: boolean;
|
|
17
19
|
};
|
|
18
20
|
resolveTools: (tools: CompiledTool[], binding?: CompiledAgentBinding) => unknown[];
|
|
19
21
|
getToolNameMapping: (binding: CompiledAgentBinding) => ToolNameMapping;
|
|
@@ -50,11 +50,17 @@ function isDelegationOnlyBinding(binding) {
|
|
|
50
50
|
function hasTaskDelegationEvidence(executedToolResults) {
|
|
51
51
|
return executedToolResults.some((item) => item.toolName === "task");
|
|
52
52
|
}
|
|
53
|
+
function normalizePlanToolName(toolName) {
|
|
54
|
+
return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
|
|
55
|
+
}
|
|
53
56
|
function isPlanToolName(toolName) {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
||
|
|
57
|
-
||
|
|
57
|
+
const normalized = normalizePlanToolName(toolName);
|
|
58
|
+
return normalized === "write_todos"
|
|
59
|
+
|| normalized === "read_todos"
|
|
60
|
+
|| normalized === "tool_call_write_todos"
|
|
61
|
+
|| normalized === "tool_call_read_todos"
|
|
62
|
+
|| normalized === "call_write_todos"
|
|
63
|
+
|| normalized === "call_read_todos";
|
|
58
64
|
}
|
|
59
65
|
function hasPlanToolEvidence(executedToolResults) {
|
|
60
66
|
return executedToolResults.some((item) => isPlanToolName(item.toolName));
|
|
@@ -303,7 +309,8 @@ export async function executeRequestInvocation(options) {
|
|
|
303
309
|
? buildInvocationRequest(options.binding, history, options.input, invokeOptions)
|
|
304
310
|
: new Command({ resume: options.resumePayload });
|
|
305
311
|
if (options.resumePayload === undefined
|
|
306
|
-
&& options.binding.harnessRuntime.executionContract?.requiresPlan === true
|
|
312
|
+
&& options.binding.harnessRuntime.executionContract?.requiresPlan === true
|
|
313
|
+
&& invokeOptions.suppressInitialRequiredPlanInstruction !== true) {
|
|
307
314
|
request = appendToolRecoveryInstruction(request, INITIAL_REQUIRED_PLAN_INSTRUCTION);
|
|
308
315
|
}
|
|
309
316
|
const { primaryTools, toolNameMapping, executableTools, defersToUpstreamHitlExecution, } = buildBindingToolExecutionContext({
|
|
@@ -331,6 +338,7 @@ export async function executeRequestInvocation(options) {
|
|
|
331
338
|
builtinExecutableTools: builtinExecutableTools,
|
|
332
339
|
callRuntimeWithToolParseRecovery: options.callRuntimeWithToolParseRecovery,
|
|
333
340
|
toolRuntimeContext: invokeOptions.toolRuntimeContext,
|
|
341
|
+
externalPlanEvidence: invokeOptions.externalPlanEvidence,
|
|
334
342
|
});
|
|
335
343
|
let localOrUpstreamInvocation = await invokeOnce(request);
|
|
336
344
|
if (options.resumePayload === undefined
|
|
@@ -17,6 +17,7 @@ export declare function invokeRuntimeWithLocalTools(options: {
|
|
|
17
17
|
builtinExecutableTools: Map<string, ExecutableTool>;
|
|
18
18
|
callRuntimeWithToolParseRecovery: (request: unknown) => Promise<Record<string, unknown>>;
|
|
19
19
|
toolRuntimeContext?: Record<string, unknown>;
|
|
20
|
+
externalPlanEvidence?: boolean;
|
|
20
21
|
}): Promise<{
|
|
21
22
|
result: Record<string, unknown>;
|
|
22
23
|
executedToolResults: ExecutedToolResult[];
|
|
@@ -15,5 +15,6 @@ export async function invokeRuntimeWithLocalTools(options) {
|
|
|
15
15
|
builtinExecutableTools: options.builtinExecutableTools,
|
|
16
16
|
callRuntimeWithToolParseRecovery: options.callRuntimeWithToolParseRecovery,
|
|
17
17
|
toolRuntimeContext: options.toolRuntimeContext,
|
|
18
|
+
externalPlanEvidence: options.externalPlanEvidence,
|
|
18
19
|
});
|
|
19
20
|
}
|
|
@@ -21,6 +21,8 @@ export declare function streamRuntimeExecution(options: {
|
|
|
21
21
|
memoryContext?: string;
|
|
22
22
|
profiling?: boolean;
|
|
23
23
|
toolRuntimeContext?: Record<string, unknown>;
|
|
24
|
+
suppressInitialRequiredPlanInstruction?: boolean;
|
|
25
|
+
externalPlanEvidence?: boolean;
|
|
24
26
|
};
|
|
25
27
|
primaryTools: CompiledTool[];
|
|
26
28
|
toolNameMapping: ToolNameMapping;
|
|
@@ -47,6 +49,8 @@ export declare function streamRuntimeExecution(options: {
|
|
|
47
49
|
files?: Record<string, unknown>;
|
|
48
50
|
memoryContext?: string;
|
|
49
51
|
toolRuntimeContext?: Record<string, unknown>;
|
|
52
|
+
suppressInitialRequiredPlanInstruction?: boolean;
|
|
53
|
+
externalPlanEvidence?: boolean;
|
|
50
54
|
}) => Promise<{
|
|
51
55
|
output: string;
|
|
52
56
|
metadata?: Record<string, unknown>;
|
|
@@ -26,13 +26,43 @@ const RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION = [
|
|
|
26
26
|
"Your next action must be exactly one non-TODO evidence tool call selected from the available tool descriptions and schemas.",
|
|
27
27
|
"After that evidence tool returns, update the todo board and then provide the final answer required by the agent response format.",
|
|
28
28
|
].join("\n");
|
|
29
|
+
function readPrimaryToolName(tool) {
|
|
30
|
+
return typeof tool.name === "string" ? tool.name.trim() : "";
|
|
31
|
+
}
|
|
32
|
+
function buildRunEvidenceAfterPlanInstruction(primaryTools) {
|
|
33
|
+
const toolNames = primaryTools
|
|
34
|
+
.map(readPrimaryToolName)
|
|
35
|
+
.filter((name) => name.length > 0 && !isPlanToolName(name));
|
|
36
|
+
if (toolNames.length === 0) {
|
|
37
|
+
return RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION;
|
|
38
|
+
}
|
|
39
|
+
return [
|
|
40
|
+
RUN_EVIDENCE_AFTER_PREMATURE_PLAN_CLOSE_INSTRUCTION,
|
|
41
|
+
"",
|
|
42
|
+
`Available non-planning tool names: ${toolNames.join(", ")}.`,
|
|
43
|
+
].join("\n");
|
|
44
|
+
}
|
|
29
45
|
const INITIAL_REQUIRED_PLAN_INSTRUCTION = [
|
|
30
46
|
"This agent has a required visible planning contract.",
|
|
31
47
|
"Your first action for this request must be write_todos with concrete task steps and statuses.",
|
|
32
48
|
"Do not call any domain/evidence tool and do not provide a final answer before the initial write_todos call succeeds.",
|
|
49
|
+
"After write_todos succeeds, do not call write_todos or read_todos again until one non-planning evidence tool returns.",
|
|
33
50
|
"Do not use placeholders like '1', '2', '3', 'step 1', or generic labels. Each todo must name the concrete work it represents.",
|
|
34
51
|
"After each evidence step, update the todo board. Before the final answer, close every todo as completed or failed.",
|
|
35
52
|
].join("\n");
|
|
53
|
+
function buildInitialRequiredPlanInstruction(primaryTools) {
|
|
54
|
+
const toolNames = primaryTools
|
|
55
|
+
.map(readPrimaryToolName)
|
|
56
|
+
.filter((name) => name.length > 0 && !isPlanToolName(name));
|
|
57
|
+
if (toolNames.length === 0) {
|
|
58
|
+
return INITIAL_REQUIRED_PLAN_INSTRUCTION;
|
|
59
|
+
}
|
|
60
|
+
return [
|
|
61
|
+
INITIAL_REQUIRED_PLAN_INSTRUCTION,
|
|
62
|
+
"",
|
|
63
|
+
`After the initial todo board, select the next non-planning tool from these declared tool names: ${toolNames.join(", ")}.`,
|
|
64
|
+
].join("\n");
|
|
65
|
+
}
|
|
36
66
|
function toVisibleContent(value) {
|
|
37
67
|
const extracted = extractVisibleOutput(value);
|
|
38
68
|
return extracted ? sanitizeVisibleText(extracted) : "";
|
|
@@ -98,11 +128,17 @@ function hasIncompletePlanOutput(value) {
|
|
|
98
128
|
}
|
|
99
129
|
return null;
|
|
100
130
|
}
|
|
131
|
+
function normalizePlanToolName(toolName) {
|
|
132
|
+
return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
|
|
133
|
+
}
|
|
101
134
|
function isPlanToolName(toolName) {
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
||
|
|
105
|
-
||
|
|
135
|
+
const normalized = normalizePlanToolName(toolName);
|
|
136
|
+
return normalized === "write_todos"
|
|
137
|
+
|| normalized === "read_todos"
|
|
138
|
+
|| normalized === "tool_call_write_todos"
|
|
139
|
+
|| normalized === "tool_call_read_todos"
|
|
140
|
+
|| normalized === "call_write_todos"
|
|
141
|
+
|| normalized === "call_read_todos";
|
|
106
142
|
}
|
|
107
143
|
function isCompletedPlanToolResultChunk(chunk) {
|
|
108
144
|
if (chunk.kind !== "tool-result" || !isPlanToolName(chunk.toolName)) {
|
|
@@ -122,6 +158,12 @@ function hasSuccessfulTaskToolEvidence(executedToolResults) {
|
|
|
122
158
|
function requiresPlanEvidence(binding) {
|
|
123
159
|
return binding.harnessRuntime?.executionContract?.requiresPlan === true;
|
|
124
160
|
}
|
|
161
|
+
function withSuppressedInitialRequiredPlanInstruction(options) {
|
|
162
|
+
return {
|
|
163
|
+
...options,
|
|
164
|
+
suppressInitialRequiredPlanInstruction: true,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
125
167
|
function hasParentLocalToolExecutionAfterDelegationFailure(originalEvidence, executedToolResults) {
|
|
126
168
|
return originalEvidence.hasFailedTaskDelegation
|
|
127
169
|
&& executedToolResults.some((item) => item.toolName !== "task");
|
|
@@ -269,6 +311,14 @@ function isOpenAICompatibleStreamingCompatibilityError(binding, error) {
|
|
|
269
311
|
const message = error instanceof Error ? error.message : String(error);
|
|
270
312
|
return message.toLowerCase().includes("received empty response from chat model call");
|
|
271
313
|
}
|
|
314
|
+
function isGraphRecursionLimitError(error) {
|
|
315
|
+
const code = typeof error === "object" && error !== null && "lc_error_code" in error
|
|
316
|
+
? String(error.lc_error_code ?? "")
|
|
317
|
+
: "";
|
|
318
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
319
|
+
return code === "GRAPH_RECURSION_LIMIT"
|
|
320
|
+
|| /Recursion limit .* without hitting a stop condition|GRAPH_RECURSION_LIMIT/i.test(message);
|
|
321
|
+
}
|
|
272
322
|
function hasDelegationEvidence(evidence) {
|
|
273
323
|
return (evidence.hasSuccessfulTaskToolEvidence
|
|
274
324
|
|| evidence.hasOpenTaskDelegation
|
|
@@ -358,10 +408,45 @@ function finishProfileStep(input) {
|
|
|
358
408
|
...(input.error !== undefined ? { error: input.error instanceof Error ? input.error.message : String(input.error) } : {}),
|
|
359
409
|
});
|
|
360
410
|
}
|
|
411
|
+
function projectLocalToolExecutionProfileChunks(executedToolResults, prefix) {
|
|
412
|
+
const chunks = [];
|
|
413
|
+
executedToolResults.forEach((toolResult, index) => {
|
|
414
|
+
if (isPlanToolName(toolResult.toolName)) {
|
|
415
|
+
return;
|
|
416
|
+
}
|
|
417
|
+
const id = `${prefix}:${index + 1}:${toolResult.toolName}`;
|
|
418
|
+
const startedAt = new Date().toISOString();
|
|
419
|
+
chunks.push({
|
|
420
|
+
kind: "profile",
|
|
421
|
+
step: {
|
|
422
|
+
id,
|
|
423
|
+
kind: "tool",
|
|
424
|
+
name: toolResult.toolName,
|
|
425
|
+
action: "invoke",
|
|
426
|
+
status: "started",
|
|
427
|
+
startedAt,
|
|
428
|
+
},
|
|
429
|
+
});
|
|
430
|
+
chunks.push({
|
|
431
|
+
kind: "profile",
|
|
432
|
+
step: {
|
|
433
|
+
id,
|
|
434
|
+
kind: "tool",
|
|
435
|
+
name: toolResult.toolName,
|
|
436
|
+
action: "invoke",
|
|
437
|
+
status: toolResult.isError === true ? "failed" : "completed",
|
|
438
|
+
startedAt,
|
|
439
|
+
endedAt: new Date().toISOString(),
|
|
440
|
+
...(toolResult.isError === true ? { isError: true } : {}),
|
|
441
|
+
},
|
|
442
|
+
});
|
|
443
|
+
});
|
|
444
|
+
return chunks;
|
|
445
|
+
}
|
|
361
446
|
export async function* streamRuntimeExecution(options) {
|
|
362
447
|
let request = buildInvocationRequest(options.binding, options.history, options.input, options.runtimeOptions);
|
|
363
448
|
if (requiresPlanEvidence(options.binding)) {
|
|
364
|
-
request = appendToolRecoveryInstruction(request,
|
|
449
|
+
request = appendToolRecoveryInstruction(request, buildInitialRequiredPlanInstruction(options.primaryTools));
|
|
365
450
|
}
|
|
366
451
|
let emittedUnsafeStreamSideEffects = false;
|
|
367
452
|
const shouldProfile = options.runtimeOptions.profiling === true;
|
|
@@ -549,13 +634,14 @@ export async function* streamRuntimeExecution(options) {
|
|
|
549
634
|
error,
|
|
550
635
|
});
|
|
551
636
|
if (!emittedUnsafeStreamSideEffects
|
|
552
|
-
&& isOpenAICompatibleStreamingCompatibilityError(options.binding, error)) {
|
|
637
|
+
&& (isOpenAICompatibleStreamingCompatibilityError(options.binding, error) || isGraphRecursionLimitError(error))) {
|
|
553
638
|
deferredStreamContent.length = 0;
|
|
554
639
|
}
|
|
555
640
|
else {
|
|
556
641
|
throw error;
|
|
557
642
|
}
|
|
558
643
|
}
|
|
644
|
+
const streamedToolResults = [];
|
|
559
645
|
if (events) {
|
|
560
646
|
const streamEventsConsume = startProfileStep({
|
|
561
647
|
id: "profile:agent:stream-events-consume",
|
|
@@ -568,7 +654,9 @@ export async function* streamRuntimeExecution(options) {
|
|
|
568
654
|
try {
|
|
569
655
|
let sawCompletedPlanToolResult = false;
|
|
570
656
|
let sawSuccessfulNonTodoToolResult = false;
|
|
571
|
-
|
|
657
|
+
let earlyStreamRecoveryInstruction = null;
|
|
658
|
+
let earlyStreamRecoverySuppressInitialPlan = false;
|
|
659
|
+
let completedPlanToolResultCount = 0;
|
|
572
660
|
for await (const event of options.iterateWithTimeout(events, options.streamIdleTimeoutMs, "agent streamEvents", options.streamDeadlineAt, options.invokeTimeoutMs)) {
|
|
573
661
|
const projectedChunks = projectRuntimeStreamEvent({
|
|
574
662
|
event,
|
|
@@ -589,6 +677,15 @@ export async function* streamRuntimeExecution(options) {
|
|
|
589
677
|
&& chunk.kind !== "content"
|
|
590
678
|
&& !(chunk.kind === "tool-result" && isPlanToolName(chunk.toolName))
|
|
591
679
|
&& !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
|
|
680
|
+
const repeatedPlanToolResultBeforeEvidence = requiresPlanEvidence(options.binding)
|
|
681
|
+
&& !sawSuccessfulNonTodoToolResult
|
|
682
|
+
&& completedPlanToolResultCount > 0
|
|
683
|
+
&& projectedChunks.some((chunk) => chunk.kind === "tool-result" && isPlanToolName(chunk.toolName));
|
|
684
|
+
if (repeatedPlanToolResultBeforeEvidence) {
|
|
685
|
+
earlyStreamRecoveryInstruction = buildRunEvidenceAfterPlanInstruction(options.primaryTools);
|
|
686
|
+
earlyStreamRecoverySuppressInitialPlan = true;
|
|
687
|
+
break;
|
|
688
|
+
}
|
|
592
689
|
for (const chunk of projectedChunks) {
|
|
593
690
|
if (chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)) {
|
|
594
691
|
sawRetrySafeInvalidToolSelectionError = true;
|
|
@@ -606,6 +703,9 @@ export async function* streamRuntimeExecution(options) {
|
|
|
606
703
|
if (isCompletedPlanToolResultChunk(chunk)) {
|
|
607
704
|
sawCompletedPlanToolResult = true;
|
|
608
705
|
}
|
|
706
|
+
if (chunk.kind === "tool-result" && isPlanToolName(chunk.toolName)) {
|
|
707
|
+
completedPlanToolResultCount += 1;
|
|
708
|
+
}
|
|
609
709
|
if ((eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) && deferredStreamContent.length > 0) {
|
|
610
710
|
yield* flushDeferredStreamContent();
|
|
611
711
|
}
|
|
@@ -660,6 +760,23 @@ export async function* streamRuntimeExecution(options) {
|
|
|
660
760
|
})
|
|
661
761
|
: null;
|
|
662
762
|
const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
|
|
763
|
+
if (!emittedUnsafeStreamSideEffects
|
|
764
|
+
&& (terminalMissingPlanRecoveryInstruction || terminalDelegationOnlyRecoveryInstruction)) {
|
|
765
|
+
earlyStreamRecoveryInstruction =
|
|
766
|
+
terminalMissingPlanRecoveryInstruction ?? terminalDelegationOnlyRecoveryInstruction;
|
|
767
|
+
break;
|
|
768
|
+
}
|
|
769
|
+
if (requiresPlanEvidence(options.binding) && hasCompletedPlanWithEvidence(terminalExecutionEvidence)) {
|
|
770
|
+
if (hasUsefulVisibleSynthesis(projectionState.emittedOutput)) {
|
|
771
|
+
if (deferredStreamContent.length > 0) {
|
|
772
|
+
yield* flushDeferredStreamContent();
|
|
773
|
+
}
|
|
774
|
+
return;
|
|
775
|
+
}
|
|
776
|
+
deferredStreamContent.length = 0;
|
|
777
|
+
yield { kind: "content", content: buildDeterministicFinalFromStreamToolEvidence(streamedToolResults) };
|
|
778
|
+
return;
|
|
779
|
+
}
|
|
663
780
|
if (!shouldDeferStreamContent()
|
|
664
781
|
&& !terminalExecutionEvidence.hasIncompletePlanState
|
|
665
782
|
&& !terminalExecutionEvidence.hasFailedTaskDelegation
|
|
@@ -675,6 +792,30 @@ export async function* streamRuntimeExecution(options) {
|
|
|
675
792
|
}
|
|
676
793
|
}
|
|
677
794
|
}
|
|
795
|
+
if (earlyStreamRecoveryInstruction) {
|
|
796
|
+
const earlyRecoveryRuntimeOptions = earlyStreamRecoverySuppressInitialPlan
|
|
797
|
+
? withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions)
|
|
798
|
+
: options.runtimeOptions;
|
|
799
|
+
const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, earlyStreamRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, earlyRecoveryRuntimeOptions);
|
|
800
|
+
const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
|
|
801
|
+
? recovered.metadata.executedToolResults
|
|
802
|
+
: [];
|
|
803
|
+
for (const toolResult of recoveredToolResults) {
|
|
804
|
+
yield {
|
|
805
|
+
kind: "tool-result",
|
|
806
|
+
toolName: toolResult.toolName,
|
|
807
|
+
output: toolResult.output,
|
|
808
|
+
isError: toolResult.isError,
|
|
809
|
+
};
|
|
810
|
+
}
|
|
811
|
+
if (recovered.output) {
|
|
812
|
+
const visible = toVisibleContent(recovered.output);
|
|
813
|
+
if (visible) {
|
|
814
|
+
yield { kind: "content", content: visible };
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
return;
|
|
818
|
+
}
|
|
678
819
|
if (shouldProfile)
|
|
679
820
|
yield finishProfileStep({
|
|
680
821
|
id: "profile:agent:stream-events-consume",
|
|
@@ -697,7 +838,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
697
838
|
error,
|
|
698
839
|
});
|
|
699
840
|
if (!emittedUnsafeStreamSideEffects
|
|
700
|
-
&& isOpenAICompatibleStreamingCompatibilityError(options.binding, error)) {
|
|
841
|
+
&& (isOpenAICompatibleStreamingCompatibilityError(options.binding, error) || isGraphRecursionLimitError(error))) {
|
|
701
842
|
deferredStreamContent.length = 0;
|
|
702
843
|
}
|
|
703
844
|
else {
|
|
@@ -707,9 +848,14 @@ export async function* streamRuntimeExecution(options) {
|
|
|
707
848
|
}
|
|
708
849
|
const streamedExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
|
|
709
850
|
if (requiresPlanEvidence(options.binding) && hasCompletedPlanWithEvidence(streamedExecutionEvidence)) {
|
|
710
|
-
if (
|
|
711
|
-
|
|
851
|
+
if (hasUsefulVisibleSynthesis(projectionState.emittedOutput)) {
|
|
852
|
+
if (deferredStreamContent.length > 0) {
|
|
853
|
+
yield* flushDeferredStreamContent();
|
|
854
|
+
}
|
|
855
|
+
return;
|
|
712
856
|
}
|
|
857
|
+
deferredStreamContent.length = 0;
|
|
858
|
+
yield { kind: "content", content: buildDeterministicFinalFromStreamToolEvidence(streamedToolResults) };
|
|
713
859
|
return;
|
|
714
860
|
}
|
|
715
861
|
const streamedDelegatedRecoveryInstruction = resolveDelegatedExecutionRecoveryInstruction(streamedExecutionEvidence);
|
|
@@ -722,7 +868,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
722
868
|
const streamedPrematurePlanCloseRecoveryInstruction = requiresPlanEvidence(options.binding)
|
|
723
869
|
&& streamedExecutionEvidence.hasPlanStateEvidence
|
|
724
870
|
&& !streamedExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
|
|
725
|
-
?
|
|
871
|
+
? buildRunEvidenceAfterPlanInstruction(options.primaryTools)
|
|
726
872
|
: null;
|
|
727
873
|
const delegatedExecutionRecoveryInstruction = !emittedUnsafeStreamSideEffects || streamedDelegatedRecoveryInstruction
|
|
728
874
|
? streamedDelegatedRecoveryInstruction
|
|
@@ -763,8 +909,12 @@ export async function* streamRuntimeExecution(options) {
|
|
|
763
909
|
?? streamedDelegationOnlyRecoveryInstruction
|
|
764
910
|
?? executionWithoutToolEvidenceInstruction;
|
|
765
911
|
if (retryInstruction) {
|
|
912
|
+
const retryRuntimeOptions = retryInstruction === streamedIncompletePlanRecoveryInstruction
|
|
913
|
+
|| retryInstruction === streamedPrematurePlanCloseRecoveryInstruction
|
|
914
|
+
? withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions)
|
|
915
|
+
: options.runtimeOptions;
|
|
766
916
|
let retried;
|
|
767
|
-
retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, retryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history,
|
|
917
|
+
retried = await options.invoke(options.applyToolRecoveryInstruction(options.binding, retryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, retryRuntimeOptions);
|
|
768
918
|
const executedToolResults = Array.isArray(retried.metadata?.executedToolResults)
|
|
769
919
|
? retried.metadata.executedToolResults
|
|
770
920
|
: [];
|
|
@@ -954,6 +1104,9 @@ export async function* streamRuntimeExecution(options) {
|
|
|
954
1104
|
const executedToolResults = Array.isArray(result.metadata?.executedToolResults)
|
|
955
1105
|
? result.metadata.executedToolResults
|
|
956
1106
|
: [];
|
|
1107
|
+
for (const chunk of projectLocalToolExecutionProfileChunks(executedToolResults, "local-tool:invoke-fallback")) {
|
|
1108
|
+
yield chunk;
|
|
1109
|
+
}
|
|
957
1110
|
const invokeExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState, executedToolResults });
|
|
958
1111
|
if (hasUnresolvedExecution(invokeExecutionEvidence)) {
|
|
959
1112
|
throw createUnresolvedExecutionError(invokeExecutionEvidence);
|
|
@@ -984,7 +1137,7 @@ export async function* streamRuntimeExecution(options) {
|
|
|
984
1137
|
const invokeFallbackPlanWithoutEvidenceRecoveryInstruction = requiresPlanEvidence(options.binding)
|
|
985
1138
|
&& invokeExecutionEvidence.hasPlanStateEvidence
|
|
986
1139
|
&& !invokeExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence
|
|
987
|
-
?
|
|
1140
|
+
? buildRunEvidenceAfterPlanInstruction(options.primaryTools)
|
|
988
1141
|
: null;
|
|
989
1142
|
const effectiveInvokeFallbackRecoveryInstruction = invokeFallbackIncompletePlanRecoveryInstruction
|
|
990
1143
|
?? invokeFallbackPlanWithoutEvidenceRecoveryInstruction
|
|
@@ -992,10 +1145,20 @@ export async function* streamRuntimeExecution(options) {
|
|
|
992
1145
|
?? invokeFallbackDelegationOnlyRecoveryInstruction
|
|
993
1146
|
?? invokeFallbackRecoveryInstruction;
|
|
994
1147
|
if (effectiveInvokeFallbackRecoveryInstruction) {
|
|
995
|
-
const
|
|
1148
|
+
const invokeFallbackRuntimeOptions = effectiveInvokeFallbackRecoveryInstruction === invokeFallbackIncompletePlanRecoveryInstruction
|
|
1149
|
+
|| effectiveInvokeFallbackRecoveryInstruction === invokeFallbackPlanWithoutEvidenceRecoveryInstruction
|
|
1150
|
+
? {
|
|
1151
|
+
...withSuppressedInitialRequiredPlanInstruction(options.runtimeOptions),
|
|
1152
|
+
externalPlanEvidence: true,
|
|
1153
|
+
}
|
|
1154
|
+
: options.runtimeOptions;
|
|
1155
|
+
const recovered = await options.invoke(options.applyToolRecoveryInstruction(options.binding, effectiveInvokeFallbackRecoveryInstruction), options.input, options.sessionId, options.runtimeOptions.requestId ?? options.sessionId, undefined, options.history, invokeFallbackRuntimeOptions);
|
|
996
1156
|
const recoveredToolResults = Array.isArray(recovered.metadata?.executedToolResults)
|
|
997
1157
|
? recovered.metadata.executedToolResults
|
|
998
1158
|
: [];
|
|
1159
|
+
for (const chunk of projectLocalToolExecutionProfileChunks(recoveredToolResults, "local-tool:invoke-fallback-recovery")) {
|
|
1160
|
+
yield chunk;
|
|
1161
|
+
}
|
|
999
1162
|
const originalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState, executedToolResults });
|
|
1000
1163
|
const recoveredExecutionEvidence = buildExecutionRecoveryEvidence({
|
|
1001
1164
|
projectionState: createStreamEventProjectionState(),
|
|
@@ -41,11 +41,23 @@ function hasStateSnapshotPlan(stateSnapshot) {
|
|
|
41
41
|
&& stateSnapshot !== null
|
|
42
42
|
&& Array.isArray(stateSnapshot.todos);
|
|
43
43
|
}
|
|
44
|
+
function normalizePlanToolName(toolName) {
|
|
45
|
+
return typeof toolName === "string" ? toolName.trim().toLowerCase().replace(/[\s-]+/gu, "_") : "";
|
|
46
|
+
}
|
|
47
|
+
function isPlanToolName(toolName) {
|
|
48
|
+
const normalized = normalizePlanToolName(toolName);
|
|
49
|
+
return normalized === "write_todos"
|
|
50
|
+
|| normalized === "read_todos"
|
|
51
|
+
|| normalized === "tool_call_write_todos"
|
|
52
|
+
|| normalized === "tool_call_read_todos"
|
|
53
|
+
|| normalized === "call_write_todos"
|
|
54
|
+
|| normalized === "call_read_todos";
|
|
55
|
+
}
|
|
44
56
|
function hasPlanToolEvidence(executedToolResults) {
|
|
45
|
-
return executedToolResults.some((item) => item.toolName
|
|
57
|
+
return executedToolResults.some((item) => isPlanToolName(item.toolName));
|
|
46
58
|
}
|
|
47
59
|
function hasExecutionToolEvidence(executedToolResults) {
|
|
48
|
-
return executedToolResults.some((item) => item.isError !== true && item.toolName
|
|
60
|
+
return executedToolResults.some((item) => item.isError !== true && !isPlanToolName(item.toolName));
|
|
49
61
|
}
|
|
50
62
|
function isPlaceholderTaskCompletion(value) {
|
|
51
63
|
const normalized = sanitizeVisibleText(value).trim();
|
|
@@ -143,7 +155,7 @@ function extractLatestSuccessfulNonTodoToolResultText(executedToolResults) {
|
|
|
143
155
|
if (toolResult.isError === true) {
|
|
144
156
|
continue;
|
|
145
157
|
}
|
|
146
|
-
if (toolResult.toolName === "task" || toolResult.toolName
|
|
158
|
+
if (toolResult.toolName === "task" || isPlanToolName(toolResult.toolName)) {
|
|
147
159
|
continue;
|
|
148
160
|
}
|
|
149
161
|
const normalized = normalizeToolOutputText(toolResult.output);
|
|
@@ -258,8 +270,7 @@ function looksLikeContradictedToolExecutionFailure(value) {
|
|
|
258
270
|
}
|
|
259
271
|
function extractDeterministicToolFailureReport(executedToolResults) {
|
|
260
272
|
const hasSuccessfulSubstantiveTool = executedToolResults.some((toolResult) => (toolResult.isError !== true
|
|
261
|
-
&& toolResult.toolName
|
|
262
|
-
&& toolResult.toolName !== "read_todos"));
|
|
273
|
+
&& !isPlanToolName(toolResult.toolName)));
|
|
263
274
|
if (hasSuccessfulSubstantiveTool) {
|
|
264
275
|
return "";
|
|
265
276
|
}
|
|
@@ -401,7 +412,7 @@ export function finalizeRequestResult(params) {
|
|
|
401
412
|
&& !visibleOutput
|
|
402
413
|
&& !preliminaryTerminalStatus
|
|
403
414
|
&& !output.trim()
|
|
404
|
-
&& allExecutedToolResults.some((toolResult) => toolResult.isError !== true && toolResult.toolName
|
|
415
|
+
&& allExecutedToolResults.some((toolResult) => toolResult.isError !== true && !isPlanToolName(toolResult.toolName));
|
|
405
416
|
if (hasMissingRequiredPlanEvidence) {
|
|
406
417
|
output = "runtime_error=Agent ended before producing required plan evidence.";
|
|
407
418
|
}
|
|
@@ -15,10 +15,11 @@ type LocalToolInvocationParams = {
|
|
|
15
15
|
builtinExecutableTools: Map<string, ExecutableTool>;
|
|
16
16
|
callRuntimeWithToolParseRecovery: (request: unknown) => Promise<Record<string, unknown>>;
|
|
17
17
|
toolRuntimeContext?: Record<string, unknown>;
|
|
18
|
+
externalPlanEvidence?: boolean;
|
|
18
19
|
};
|
|
19
20
|
type LocalToolInvocationResult = {
|
|
20
21
|
result: Record<string, unknown>;
|
|
21
22
|
executedToolResults: ExecutedToolResult[];
|
|
22
23
|
};
|
|
23
|
-
export declare function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, }: LocalToolInvocationParams): Promise<LocalToolInvocationResult>;
|
|
24
|
+
export declare function runLocalToolInvocationLoop({ binding, request, primaryTools, toolNameMapping, executableTools, builtinExecutableTools, callRuntimeWithToolParseRecovery, toolRuntimeContext, externalPlanEvidence, }: LocalToolInvocationParams): Promise<LocalToolInvocationResult>;
|
|
24
25
|
export {};
|