@botbotgo/agent-harness 0.0.361 → 0.0.362
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/README.zh.md +2 -0
- package/dist/config/catalogs/response-formats.yaml +43 -0
- package/dist/config/runtime/workspace.yaml +8 -0
- package/dist/contracts/runtime-requests.d.ts +19 -0
- package/dist/contracts/workspace.d.ts +6 -0
- package/dist/package-version.d.ts +2 -2
- package/dist/package-version.js +2 -2
- package/dist/projections/request-events.d.ts +1 -0
- package/dist/projections/request-events.js +97 -45
- package/dist/protocol/acp/harness-client.js +2 -3
- package/dist/runtime/adapter/flow/stream-runtime.js +117 -94
- package/dist/runtime/adapter/middleware-assembly.js +25 -3
- package/dist/runtime/adapter/tool/builtin-middleware-tools.d.ts +5 -0
- package/dist/runtime/adapter/tool/builtin-middleware-tools.js +30 -6
- package/dist/runtime/agent-runtime-adapter.d.ts +1 -0
- package/dist/runtime/agent-runtime-adapter.js +108 -17
- package/dist/runtime/harness/events/streaming.js +2 -3
- package/dist/workspace/agent-binding-compiler.js +90 -12
- package/dist/workspace/compile.js +1 -0
- package/dist/workspace/framework-contract-validation.d.ts +2 -1
- package/dist/workspace/framework-contract-validation.js +77 -5
- package/dist/workspace/object-loader.js +9 -0
- package/dist/workspace/support/workspace-ref-utils.d.ts +1 -0
- package/dist/workspace/support/workspace-ref-utils.js +40 -0
- package/dist/workspace/yaml-object-reader.js +13 -9
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -94,6 +94,8 @@ agent-harness "Inspect this workspace and explain the main entry points."
|
|
|
94
94
|
|
|
95
95
|
When the runtime emits request-scoped `plan-state` updates and safe `progress.commentary` deltas, the chat shell now renders live todo-board and progress status updates directly in the terminal, so you can watch planning and execution status change during the run instead of waiting for the final response.
|
|
96
96
|
Those progress callouts now stay tied to stable runtime surfaces such as plan-state, tool start/completion, memory recall, and agent delegation, so the operator sees Codex-style intermediate status without exposing private model reasoning.
|
|
97
|
+
Streaming data listeners also receive structured `plan.state`, per-item `plan.step`, and normalized `execution.step` events, so applications can render every planning and execution transition without parsing assistant text or raw upstream debug events.
|
|
98
|
+
The bundled runtime now also provides a generic `response-format/default-report` structured-output default for agents; workspaces can replace it through `Runtime.spec.defaults.agent.config.responseFormatRef`, individual agents can extend it with inline `responseFormat`, replace it with `responseFormatRef`, or set `responseFormat: null`.
|
|
97
99
|
The repository default `orchestra` host is also instructed to start real multi-step execution from the task you already gave it, call `write_todos` before non-trivial tool work, and keep that todo board updated while it runs.
|
|
98
100
|
Durable-memory writes now also retrieve related existing records through the configured vector store before model reconciliation, then merge those semantic hits with deterministic matching so updates and deletes can target the right knowledge identity instead of creating nearby duplicate facts.
|
|
99
101
|
|
package/README.zh.md
CHANGED
|
@@ -92,6 +92,8 @@ agent-harness "Inspect this workspace and explain the main entry points."
|
|
|
92
92
|
|
|
93
93
|
当 runtime 发出 request 级 `plan-state` 更新以及安全的 `progress.commentary` 增量时,chat shell 现在会直接在终端里渲染实时 todo board 和进度播报,因此你可以在执行过程中看到规划和状态变化,而不必等到最终回复。
|
|
94
94
|
这些进度播报现在会继续绑定在稳定的 runtime surface 上,例如 plan-state、tool 开始/完成、memory recall 与 agent delegation,因此 operator 可以看到类似 Codex 的清晰中间状态,但不会暴露私有模型推理。
|
|
95
|
+
Streaming data listener 也会收到结构化的 `plan.state`、逐条 `plan.step` 和归一化的 `execution.step` 事件,因此应用可以渲染所有 plan / execution 转折,而不必解析 assistant 文本或 raw upstream debug event。
|
|
96
|
+
随包 runtime 现在也提供通用的 `response-format/default-report` 作为 agent structured-output 默认值;workspace 可以通过 `Runtime.spec.defaults.agent.config.responseFormatRef` 替换它,单个 agent 可以用 inline `responseFormat` 扩展、用 `responseFormatRef` 替换,或设置 `responseFormat: null` 关闭。
|
|
95
97
|
仓库默认的 `orchestra` host 现在也会被明确要求:对已经给清楚的多步任务不要再反问,而是直接开始执行;在非平凡工具工作前先调用 `write_todos`,并在运行过程中持续维护这块 todo board。
|
|
96
98
|
durable memory 的写入现在也会在模型做 mutation reconciliation 之前,先通过配置好的 vector store 检索相关旧知识,再和确定性匹配结果合并,因此 update / delete 更容易命中正确的 knowledge identity,而不是生成几条相近但彼此独立的 fact。
|
|
97
99
|
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# agent-harness feature: schema version for reusable response-format presets.
|
|
2
|
+
apiVersion: agent-harness/v1alpha1
|
|
3
|
+
# agent-harness feature: object type for named structured-output response format presets.
|
|
4
|
+
kind: ResponseFormats
|
|
5
|
+
spec:
|
|
6
|
+
- kind: ResponseFormat
|
|
7
|
+
name: default-report
|
|
8
|
+
description: Generic structured report for agent results. Workspaces and agents can override or disable it.
|
|
9
|
+
format:
|
|
10
|
+
type: object
|
|
11
|
+
properties:
|
|
12
|
+
status:
|
|
13
|
+
type: string
|
|
14
|
+
enum:
|
|
15
|
+
- completed
|
|
16
|
+
- blocked
|
|
17
|
+
- failed
|
|
18
|
+
- refused
|
|
19
|
+
summary:
|
|
20
|
+
type: array
|
|
21
|
+
items:
|
|
22
|
+
type: string
|
|
23
|
+
findings:
|
|
24
|
+
type: array
|
|
25
|
+
items:
|
|
26
|
+
type: string
|
|
27
|
+
blockers:
|
|
28
|
+
type: array
|
|
29
|
+
items:
|
|
30
|
+
type: string
|
|
31
|
+
nextActions:
|
|
32
|
+
type: array
|
|
33
|
+
items:
|
|
34
|
+
type: string
|
|
35
|
+
report:
|
|
36
|
+
type: string
|
|
37
|
+
required:
|
|
38
|
+
- status
|
|
39
|
+
- summary
|
|
40
|
+
- findings
|
|
41
|
+
- blockers
|
|
42
|
+
- nextActions
|
|
43
|
+
- report
|
|
@@ -45,6 +45,14 @@ spec:
|
|
|
45
45
|
skills:
|
|
46
46
|
- file://./resources/skills
|
|
47
47
|
|
|
48
|
+
# agent-harness feature: default agent execution config used when a workspace or agent does not override it.
|
|
49
|
+
# The bundled default keeps first-run agent outputs parseable while still allowing projects and individual agents
|
|
50
|
+
# to replace it with their own responseFormatRef, inline responseFormat, or `responseFormat: null`.
|
|
51
|
+
defaults:
|
|
52
|
+
agent:
|
|
53
|
+
config:
|
|
54
|
+
responseFormatRef: response-format/default-report
|
|
55
|
+
|
|
48
56
|
# agent-harness feature: runtime-level task queue and maximum number of concurrent requests.
|
|
49
57
|
# Additional requests wait in the harness queue until a slot becomes available.
|
|
50
58
|
concurrency:
|
|
@@ -121,6 +121,25 @@ export type RequestDataEvent = {
|
|
|
121
121
|
requestId: string;
|
|
122
122
|
agentId: string;
|
|
123
123
|
text: string;
|
|
124
|
+
} | {
|
|
125
|
+
type: "plan.state";
|
|
126
|
+
sessionId: string;
|
|
127
|
+
requestId: string;
|
|
128
|
+
agentId: string;
|
|
129
|
+
planState: RequestPlanState;
|
|
130
|
+
} | {
|
|
131
|
+
type: "plan.step";
|
|
132
|
+
sessionId: string;
|
|
133
|
+
requestId: string;
|
|
134
|
+
agentId: string;
|
|
135
|
+
planStateVersion: number;
|
|
136
|
+
index: number;
|
|
137
|
+
item: RequestPlanItem;
|
|
138
|
+
} | {
|
|
139
|
+
type: "execution.step";
|
|
140
|
+
sessionId: string;
|
|
141
|
+
requestId: string;
|
|
142
|
+
step: RequestExecutionStep;
|
|
124
143
|
} | {
|
|
125
144
|
type: "output.content-blocks";
|
|
126
145
|
sessionId: string;
|
|
@@ -74,6 +74,12 @@ export type ParsedVectorStoreObject = {
|
|
|
74
74
|
metadata?: Record<string, unknown>;
|
|
75
75
|
sourcePath: string;
|
|
76
76
|
};
|
|
77
|
+
export type ParsedResponseFormatObject = {
|
|
78
|
+
id: string;
|
|
79
|
+
description?: string;
|
|
80
|
+
format: unknown;
|
|
81
|
+
sourcePath: string;
|
|
82
|
+
};
|
|
77
83
|
export type ParsedMcpServerObject = {
|
|
78
84
|
id: string;
|
|
79
85
|
transport: "stdio" | "http" | "sse" | "websocket";
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const AGENT_HARNESS_VERSION = "0.0.
|
|
2
|
-
export declare const AGENT_HARNESS_RELEASE_DATE = "2026-04-
|
|
1
|
+
export declare const AGENT_HARNESS_VERSION = "0.0.362";
|
|
2
|
+
export declare const AGENT_HARNESS_RELEASE_DATE = "2026-04-27";
|
package/dist/package-version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const AGENT_HARNESS_VERSION = "0.0.
|
|
2
|
-
export const AGENT_HARNESS_RELEASE_DATE = "2026-04-
|
|
1
|
+
export const AGENT_HARNESS_VERSION = "0.0.362";
|
|
2
|
+
export const AGENT_HARNESS_RELEASE_DATE = "2026-04-27";
|
|
@@ -59,5 +59,6 @@ export type RequestSnapshotStreamItem = StreamEventItem | {
|
|
|
59
59
|
} | StreamContentItem | StreamContentBlocksItem | StreamToolResultItem | StreamPlanStateItem | StreamUpstreamEventItem | StreamProfileStepItem | StreamResultItem;
|
|
60
60
|
export declare function createInitialRequestEventSnapshot(): RequestEventSnapshot;
|
|
61
61
|
export declare function applyRequestStreamItemToSnapshot(snapshot: RequestEventSnapshot, item: RequestSnapshotStreamItem): RequestEventSnapshot;
|
|
62
|
+
export declare function toRequestDataEvents(item: RequestSnapshotStreamItem): RequestDataEvent[];
|
|
62
63
|
export declare function toRequestDataEvent(item: RequestSnapshotStreamItem): RequestDataEvent | null;
|
|
63
64
|
export {};
|
|
@@ -393,58 +393,110 @@ export function applyRequestStreamItemToSnapshot(snapshot, item) {
|
|
|
393
393
|
};
|
|
394
394
|
}
|
|
395
395
|
}
|
|
396
|
-
|
|
396
|
+
function createSurfaceExecutionStep(surfaceItem) {
|
|
397
|
+
return {
|
|
398
|
+
id: surfaceItem.id,
|
|
399
|
+
kind: surfaceItem.kind,
|
|
400
|
+
name: surfaceItem.name,
|
|
401
|
+
action: surfaceItem.action,
|
|
402
|
+
status: surfaceItem.status,
|
|
403
|
+
...(surfaceItem.agentId ? { agentId: surfaceItem.agentId } : {}),
|
|
404
|
+
...(surfaceItem.agentName ? { agentName: surfaceItem.agentName } : {}),
|
|
405
|
+
...(surfaceItem.ownerAgentId ? { ownerAgentId: surfaceItem.ownerAgentId } : {}),
|
|
406
|
+
...(surfaceItem.ownerAgentName ? { ownerAgentName: surfaceItem.ownerAgentName } : {}),
|
|
407
|
+
...(surfaceItem.sourceEventId ? { sourceEventId: surfaceItem.sourceEventId } : {}),
|
|
408
|
+
...(surfaceItem.detail ? { detail: surfaceItem.detail } : {}),
|
|
409
|
+
};
|
|
410
|
+
}
|
|
411
|
+
export function toRequestDataEvents(item) {
|
|
397
412
|
switch (item.type) {
|
|
398
413
|
case "commentary":
|
|
399
|
-
return {
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
414
|
+
return [{
|
|
415
|
+
type: "progress.commentary",
|
|
416
|
+
sessionId: item.sessionId,
|
|
417
|
+
requestId: item.requestId,
|
|
418
|
+
agentId: item.agentId,
|
|
419
|
+
text: item.content,
|
|
420
|
+
}];
|
|
406
421
|
case "content":
|
|
407
|
-
return {
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
422
|
+
return [{
|
|
423
|
+
type: "output.text.delta",
|
|
424
|
+
sessionId: item.sessionId,
|
|
425
|
+
requestId: item.requestId,
|
|
426
|
+
agentId: item.agentId,
|
|
427
|
+
text: item.content,
|
|
428
|
+
}];
|
|
414
429
|
case "content-blocks":
|
|
415
|
-
return {
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
430
|
+
return [{
|
|
431
|
+
type: "output.content-blocks",
|
|
432
|
+
sessionId: item.sessionId,
|
|
433
|
+
requestId: item.requestId,
|
|
434
|
+
agentId: item.agentId,
|
|
435
|
+
contentBlocks: item.contentBlocks,
|
|
436
|
+
}];
|
|
422
437
|
case "tool-result":
|
|
423
|
-
return {
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
438
|
+
return [{
|
|
439
|
+
type: "tool.result",
|
|
440
|
+
sessionId: item.sessionId,
|
|
441
|
+
requestId: item.requestId,
|
|
442
|
+
agentId: item.agentId,
|
|
443
|
+
toolName: item.toolName,
|
|
444
|
+
output: summarizeLargeDataEventOutput(item.output),
|
|
445
|
+
...(item.isError !== undefined ? { isError: item.isError } : {}),
|
|
446
|
+
}];
|
|
447
|
+
case "plan-state":
|
|
448
|
+
return [
|
|
449
|
+
{
|
|
450
|
+
type: "plan.state",
|
|
451
|
+
sessionId: item.sessionId,
|
|
452
|
+
requestId: item.requestId,
|
|
453
|
+
agentId: item.agentId,
|
|
454
|
+
planState: item.planState,
|
|
455
|
+
},
|
|
456
|
+
...item.planState.items.map((planItem, index) => ({
|
|
457
|
+
type: "plan.step",
|
|
458
|
+
sessionId: item.sessionId,
|
|
459
|
+
requestId: item.requestId,
|
|
460
|
+
agentId: item.agentId,
|
|
461
|
+
planStateVersion: item.planState.version,
|
|
462
|
+
index,
|
|
463
|
+
item: planItem,
|
|
464
|
+
})),
|
|
465
|
+
];
|
|
432
466
|
case "upstream-event":
|
|
433
|
-
return
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
467
|
+
return [
|
|
468
|
+
...(item.surfaceItem
|
|
469
|
+
? [{
|
|
470
|
+
type: "execution.step",
|
|
471
|
+
sessionId: item.sessionId,
|
|
472
|
+
requestId: item.requestId,
|
|
473
|
+
step: createSurfaceExecutionStep(item.surfaceItem),
|
|
474
|
+
}]
|
|
475
|
+
: []),
|
|
476
|
+
{
|
|
477
|
+
type: "debug.upstream",
|
|
478
|
+
sessionId: item.sessionId,
|
|
479
|
+
requestId: item.requestId,
|
|
480
|
+
...(item.surfaceItem ? { surfaceItem: item.surfaceItem } : {}),
|
|
481
|
+
event: item.event,
|
|
482
|
+
},
|
|
483
|
+
];
|
|
440
484
|
case "profile-step":
|
|
441
|
-
return {
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
485
|
+
return [{
|
|
486
|
+
type: "execution.step",
|
|
487
|
+
sessionId: item.sessionId,
|
|
488
|
+
requestId: item.requestId,
|
|
489
|
+
step: item.step,
|
|
490
|
+
}, {
|
|
491
|
+
type: "debug.profile",
|
|
492
|
+
sessionId: item.sessionId,
|
|
493
|
+
requestId: item.requestId,
|
|
494
|
+
step: item.step,
|
|
495
|
+
}];
|
|
447
496
|
default:
|
|
448
|
-
return
|
|
497
|
+
return [];
|
|
449
498
|
}
|
|
450
499
|
}
|
|
500
|
+
export function toRequestDataEvent(item) {
|
|
501
|
+
return toRequestDataEvents(item)[0] ?? null;
|
|
502
|
+
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { createAcpHttpClient, createAcpStdioClient, } from "./client.js";
|
|
2
|
-
import { applyRequestStreamItemToSnapshot, createInitialRequestEventSnapshot,
|
|
2
|
+
import { applyRequestStreamItemToSnapshot, createInitialRequestEventSnapshot, toRequestDataEvents, } from "../../projections/request-events.js";
|
|
3
3
|
function toEvent(notification) {
|
|
4
4
|
return notification.params.event;
|
|
5
5
|
}
|
|
@@ -144,8 +144,7 @@ export class AcpHarnessClient {
|
|
|
144
144
|
else if (item.type === "result") {
|
|
145
145
|
finalResult = item.result;
|
|
146
146
|
}
|
|
147
|
-
const dataEvent
|
|
148
|
-
if (dataEvent) {
|
|
147
|
+
for (const dataEvent of toRequestDataEvents(item)) {
|
|
149
148
|
await dataListener?.(dataEvent);
|
|
150
149
|
}
|
|
151
150
|
await eventListener?.(snapshot);
|
|
@@ -125,6 +125,15 @@ function isDelegationOnlyBinding(binding) {
|
|
|
125
125
|
const skillRefs = agent?.skillPathRefs ?? [];
|
|
126
126
|
return configuredSubagents.length > 0 && configuredTools.length === 0 && skillRefs.length === 0;
|
|
127
127
|
}
|
|
128
|
+
function isOpenAICompatibleStreamingCompatibilityError(binding, error) {
|
|
129
|
+
const params = binding.execution?.params ?? binding.deepAgentParams ?? binding.langchainAgentParams;
|
|
130
|
+
const model = params?.model;
|
|
131
|
+
if (model?.provider !== "openai-compatible") {
|
|
132
|
+
return false;
|
|
133
|
+
}
|
|
134
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
135
|
+
return message.toLowerCase().includes("received empty response from chat model call");
|
|
136
|
+
}
|
|
128
137
|
function hasDelegationEvidence(evidence) {
|
|
129
138
|
return (evidence.hasSuccessfulTaskToolEvidence
|
|
130
139
|
|| evidence.hasOpenTaskDelegation
|
|
@@ -401,106 +410,120 @@ export async function* streamRuntimeExecution(options) {
|
|
|
401
410
|
status: "failed",
|
|
402
411
|
error,
|
|
403
412
|
});
|
|
404
|
-
|
|
413
|
+
if (!emittedUnsafeStreamSideEffects
|
|
414
|
+
&& isOpenAICompatibleStreamingCompatibilityError(options.binding, error)) {
|
|
415
|
+
deferredStreamContent.length = 0;
|
|
416
|
+
}
|
|
417
|
+
else {
|
|
418
|
+
throw error;
|
|
419
|
+
}
|
|
405
420
|
}
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
const
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
}
|
|
442
|
-
if (eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) {
|
|
443
|
-
emittedUnsafeStreamSideEffects = true;
|
|
444
|
-
}
|
|
445
|
-
if (chunk.kind === "content" && (shouldDeferStreamContent() || projectionState.hasFailedTaskDelegation)) {
|
|
446
|
-
deferredStreamContent.push(chunk);
|
|
447
|
-
continue;
|
|
448
|
-
}
|
|
449
|
-
yield chunk;
|
|
450
|
-
}
|
|
451
|
-
const terminalVisibleOutput = readTerminalEventVisibleOutput(event);
|
|
452
|
-
if (terminalVisibleOutput) {
|
|
453
|
-
const terminalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
|
|
454
|
-
const terminalMissingPlanRecoveryInstruction = !terminalExecutionEvidence.hasDelegatedAgentWithConfiguredTools
|
|
455
|
-
&& !terminalExecutionEvidence.hasOpenTaskDelegation
|
|
456
|
-
&& !projectionState.emittedSuccessfulTaskResult
|
|
457
|
-
? resolveMissingPlanRecoveryInstruction({
|
|
458
|
-
request,
|
|
459
|
-
assistantText: terminalVisibleOutput,
|
|
460
|
-
requiresPlan: requiresPlanEvidence(options.binding),
|
|
461
|
-
hasPlanStateEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
|
|
462
|
-
hasWriteTodosEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
|
|
463
|
-
hasToolResultEvidence: terminalExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence,
|
|
464
|
-
})
|
|
465
|
-
: null;
|
|
466
|
-
const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
|
|
467
|
-
if (!shouldDeferStreamContent()
|
|
468
|
-
&& !terminalExecutionEvidence.hasIncompletePlanState
|
|
469
|
-
&& !terminalExecutionEvidence.hasFailedTaskDelegation
|
|
470
|
-
&& !terminalExecutionEvidence.hasOpenTaskDelegation
|
|
471
|
-
&& !hasMissingDelegatedExecutionEvidence(terminalExecutionEvidence)
|
|
472
|
-
&& !hasMissingDelegatedFindings(terminalExecutionEvidence)
|
|
473
|
-
&& !terminalMissingPlanRecoveryInstruction
|
|
474
|
-
&& !terminalDelegationOnlyRecoveryInstruction) {
|
|
475
|
-
if (deferredStreamContent.length > 0) {
|
|
421
|
+
if (events) {
|
|
422
|
+
const streamEventsConsume = startProfileStep({
|
|
423
|
+
id: "profile:agent:stream-events-consume",
|
|
424
|
+
kind: "agent",
|
|
425
|
+
name: "streamEvents",
|
|
426
|
+
action: "consume",
|
|
427
|
+
});
|
|
428
|
+
if (shouldProfile)
|
|
429
|
+
yield streamEventsConsume.chunk;
|
|
430
|
+
try {
|
|
431
|
+
for await (const event of options.iterateWithTimeout(events, options.streamIdleTimeoutMs, "agent streamEvents", options.streamDeadlineAt, options.invokeTimeoutMs)) {
|
|
432
|
+
const projectedChunks = projectRuntimeStreamEvent({
|
|
433
|
+
event,
|
|
434
|
+
allowVisibleStreamDeltas: true,
|
|
435
|
+
includeStateStreamOutput: options.isDeepAgentBinding(options.binding),
|
|
436
|
+
rootAgentId: typeof options.binding.agent?.id === "string"
|
|
437
|
+
? options.binding.agent.id
|
|
438
|
+
: undefined,
|
|
439
|
+
countConfiguredToolsForAgentId: options.countConfiguredToolsForAgentId,
|
|
440
|
+
toolNameMapping: options.toolNameMapping,
|
|
441
|
+
primaryTools: options.primaryTools,
|
|
442
|
+
state: projectionState,
|
|
443
|
+
});
|
|
444
|
+
const eventContainsNonTodoToolResult = projectedChunks.some((chunk) => chunk.kind === "tool-result"
|
|
445
|
+
&& chunk.toolName !== "write_todos"
|
|
446
|
+
&& !(chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
|
|
447
|
+
const eventContainsNonRetrySafeChunk = projectedChunks.some((chunk) => chunk.kind !== "upstream-event"
|
|
448
|
+
&& chunk.kind !== "content"
|
|
449
|
+
&& !(chunk.kind === "tool-result" && chunk.toolName === "write_todos")
|
|
450
|
+
&& !(chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)));
|
|
451
|
+
for (const chunk of projectedChunks) {
|
|
452
|
+
if (chunk.kind === "tool-result" && chunk.isError === true && isRetrySafeInvalidToolSelectionError(chunk.output)) {
|
|
453
|
+
sawRetrySafeInvalidToolSelectionError = true;
|
|
454
|
+
}
|
|
455
|
+
if ((eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) && deferredStreamContent.length > 0) {
|
|
476
456
|
yield* flushDeferredStreamContent();
|
|
477
457
|
}
|
|
478
|
-
|
|
458
|
+
if (eventContainsNonTodoToolResult || eventContainsNonRetrySafeChunk) {
|
|
459
|
+
emittedUnsafeStreamSideEffects = true;
|
|
460
|
+
}
|
|
461
|
+
if (chunk.kind === "content" && (shouldDeferStreamContent() || projectionState.hasFailedTaskDelegation)) {
|
|
462
|
+
deferredStreamContent.push(chunk);
|
|
463
|
+
continue;
|
|
464
|
+
}
|
|
465
|
+
yield chunk;
|
|
466
|
+
}
|
|
467
|
+
const terminalVisibleOutput = readTerminalEventVisibleOutput(event);
|
|
468
|
+
if (terminalVisibleOutput) {
|
|
469
|
+
const terminalExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
|
|
470
|
+
const terminalMissingPlanRecoveryInstruction = !terminalExecutionEvidence.hasDelegatedAgentWithConfiguredTools
|
|
471
|
+
&& !terminalExecutionEvidence.hasOpenTaskDelegation
|
|
472
|
+
&& !projectionState.emittedSuccessfulTaskResult
|
|
473
|
+
? resolveMissingPlanRecoveryInstruction({
|
|
474
|
+
request,
|
|
475
|
+
assistantText: terminalVisibleOutput,
|
|
476
|
+
requiresPlan: requiresPlanEvidence(options.binding),
|
|
477
|
+
hasPlanStateEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
|
|
478
|
+
hasWriteTodosEvidence: terminalExecutionEvidence.hasPlanStateEvidence,
|
|
479
|
+
hasToolResultEvidence: terminalExecutionEvidence.hasSuccessfulNonTodoToolResultEvidence,
|
|
480
|
+
})
|
|
481
|
+
: null;
|
|
482
|
+
const terminalDelegationOnlyRecoveryInstruction = resolveDelegationOnlyRecoveryInstruction(options.binding, terminalExecutionEvidence);
|
|
483
|
+
if (!shouldDeferStreamContent()
|
|
484
|
+
&& !terminalExecutionEvidence.hasIncompletePlanState
|
|
485
|
+
&& !terminalExecutionEvidence.hasFailedTaskDelegation
|
|
486
|
+
&& !terminalExecutionEvidence.hasOpenTaskDelegation
|
|
487
|
+
&& !hasMissingDelegatedExecutionEvidence(terminalExecutionEvidence)
|
|
488
|
+
&& !hasMissingDelegatedFindings(terminalExecutionEvidence)
|
|
489
|
+
&& !terminalMissingPlanRecoveryInstruction
|
|
490
|
+
&& !terminalDelegationOnlyRecoveryInstruction) {
|
|
491
|
+
if (deferredStreamContent.length > 0) {
|
|
492
|
+
yield* flushDeferredStreamContent();
|
|
493
|
+
}
|
|
494
|
+
return;
|
|
495
|
+
}
|
|
479
496
|
}
|
|
480
497
|
}
|
|
498
|
+
if (shouldProfile)
|
|
499
|
+
yield finishProfileStep({
|
|
500
|
+
id: "profile:agent:stream-events-consume",
|
|
501
|
+
kind: "agent",
|
|
502
|
+
name: "streamEvents",
|
|
503
|
+
action: "consume",
|
|
504
|
+
startedAt: streamEventsConsume.startedAt,
|
|
505
|
+
status: "completed",
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
catch (error) {
|
|
509
|
+
if (shouldProfile)
|
|
510
|
+
yield finishProfileStep({
|
|
511
|
+
id: "profile:agent:stream-events-consume",
|
|
512
|
+
kind: "agent",
|
|
513
|
+
name: "streamEvents",
|
|
514
|
+
action: "consume",
|
|
515
|
+
startedAt: streamEventsConsume.startedAt,
|
|
516
|
+
status: "failed",
|
|
517
|
+
error,
|
|
518
|
+
});
|
|
519
|
+
if (!emittedUnsafeStreamSideEffects
|
|
520
|
+
&& isOpenAICompatibleStreamingCompatibilityError(options.binding, error)) {
|
|
521
|
+
deferredStreamContent.length = 0;
|
|
522
|
+
}
|
|
523
|
+
else {
|
|
524
|
+
throw error;
|
|
525
|
+
}
|
|
481
526
|
}
|
|
482
|
-
if (shouldProfile)
|
|
483
|
-
yield finishProfileStep({
|
|
484
|
-
id: "profile:agent:stream-events-consume",
|
|
485
|
-
kind: "agent",
|
|
486
|
-
name: "streamEvents",
|
|
487
|
-
action: "consume",
|
|
488
|
-
startedAt: streamEventsConsume.startedAt,
|
|
489
|
-
status: "completed",
|
|
490
|
-
});
|
|
491
|
-
}
|
|
492
|
-
catch (error) {
|
|
493
|
-
if (shouldProfile)
|
|
494
|
-
yield finishProfileStep({
|
|
495
|
-
id: "profile:agent:stream-events-consume",
|
|
496
|
-
kind: "agent",
|
|
497
|
-
name: "streamEvents",
|
|
498
|
-
action: "consume",
|
|
499
|
-
startedAt: streamEventsConsume.startedAt,
|
|
500
|
-
status: "failed",
|
|
501
|
-
error,
|
|
502
|
-
});
|
|
503
|
-
throw error;
|
|
504
527
|
}
|
|
505
528
|
const streamedExecutionEvidence = buildExecutionRecoveryEvidence({ projectionState });
|
|
506
529
|
const streamedDelegatedRecoveryInstruction = resolveDelegatedExecutionRecoveryInstruction(streamedExecutionEvidence);
|
|
@@ -362,17 +362,32 @@ export async function invokeBuiltinTaskTool(input) {
|
|
|
362
362
|
const typedInput = isRecord(input.toolInput) ? input.toolInput : {};
|
|
363
363
|
const description = typeof typedInput.description === "string"
|
|
364
364
|
? typedInput.description
|
|
365
|
-
: ""
|
|
365
|
+
: typeof typedInput.instruction === "string"
|
|
366
|
+
? typedInput.instruction
|
|
367
|
+
: typeof typedInput.task === "string"
|
|
368
|
+
? typedInput.task
|
|
369
|
+
: typeof typedInput.prompt === "string"
|
|
370
|
+
? typedInput.prompt
|
|
371
|
+
: "";
|
|
366
372
|
const subagentType = typeof typedInput.subagent_type === "string"
|
|
367
373
|
? typedInput.subagent_type
|
|
368
|
-
: ""
|
|
374
|
+
: typeof typedInput.agentId === "string"
|
|
375
|
+
? typedInput.agentId
|
|
376
|
+
: typeof typedInput.agent_id === "string"
|
|
377
|
+
? typedInput.agent_id
|
|
378
|
+
: typeof typedInput.subagent === "string"
|
|
379
|
+
? typedInput.subagent
|
|
380
|
+
: "";
|
|
369
381
|
const builtinBackend = input.resolveBuiltinMiddlewareBackend(input.binding, input.options);
|
|
370
382
|
const resolvedSubagents = await input.resolveSubagents(compiledSubagents, input.binding);
|
|
371
383
|
const selectedSubagent = resolvedSubagents.find((subagent) => subagent.name === subagentType);
|
|
372
384
|
const selectedCompiledSubagent = compiledSubagents.find((subagent) => subagent.name === subagentType);
|
|
373
385
|
if (!selectedSubagent) {
|
|
374
386
|
const allowed = resolvedSubagents.map((subagent) => subagent.name);
|
|
375
|
-
|
|
387
|
+
const available = resolvedSubagents
|
|
388
|
+
.map((subagent) => `- ${subagent.name}: ${subagent.description}`)
|
|
389
|
+
.join("\n");
|
|
390
|
+
throw new Error(`Error: invoked agent of type ${subagentType}, the only allowed types are ${allowed.map((name) => `\`${name}\``).join(", ")}.${available ? `\nAvailable subagents:\n${available}` : ""}`);
|
|
376
391
|
}
|
|
377
392
|
const resolvedHostModel = selectedSubagent.model ? undefined : await input.resolveModel(primaryModel);
|
|
378
393
|
const summarizationModel = selectedSubagent.model ?? resolvedHostModel;
|
|
@@ -446,8 +461,15 @@ export async function resolveBuiltinMiddlewareTools(input) {
|
|
|
446
461
|
...(input.binding.agent.asyncSubagents ?? []),
|
|
447
462
|
];
|
|
448
463
|
const includeTaskTool = configuredSubagents.length > 0;
|
|
464
|
+
const taskSubagents = getBindingDeepAgentSubagents(input.binding)
|
|
465
|
+
.filter((subagent) => !("graphId" in subagent))
|
|
466
|
+
.map((subagent) => ({
|
|
467
|
+
name: subagent.name,
|
|
468
|
+
description: subagent.description,
|
|
469
|
+
}));
|
|
449
470
|
const tools = (await createBuiltinMiddlewareTools(backend, {
|
|
450
471
|
includeTaskTool,
|
|
472
|
+
taskSubagents,
|
|
451
473
|
workspaceRoot: input.binding.harnessRuntime.workspaceRoot,
|
|
452
474
|
toolRuntimeContext: input.options?.toolRuntimeContext,
|
|
453
475
|
invokeTaskTool: includeTaskTool
|
|
@@ -188,6 +188,10 @@ export type BuiltinExecutableTool = {
|
|
|
188
188
|
schema: unknown;
|
|
189
189
|
invoke: (input: unknown, config?: Record<string, unknown>) => Promise<unknown>;
|
|
190
190
|
};
|
|
191
|
+
export type BuiltinTaskSubagentDescriptor = {
|
|
192
|
+
name: string;
|
|
193
|
+
description: string;
|
|
194
|
+
};
|
|
191
195
|
export declare const BUILTIN_MIDDLEWARE_TOOL_DESCRIPTORS: readonly [{
|
|
192
196
|
readonly name: "write_todos";
|
|
193
197
|
readonly description: "Create and update the runtime todo board for multi-step work.";
|
|
@@ -254,6 +258,7 @@ export declare function filterBuiltinMiddlewareToolDescriptors(options?: {
|
|
|
254
258
|
export declare function createBuiltinMiddlewareTools(backend: BuiltinMiddlewareBackend, options: {
|
|
255
259
|
includeTaskTool: boolean;
|
|
256
260
|
invokeTaskTool?: (input: unknown) => Promise<unknown>;
|
|
261
|
+
taskSubagents?: BuiltinTaskSubagentDescriptor[];
|
|
257
262
|
workspaceRoot?: string;
|
|
258
263
|
toolRuntimeContext?: Record<string, unknown>;
|
|
259
264
|
}): Promise<Map<string, BuiltinExecutableTool>>;
|