@namzu/sdk 0.5.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +393 -0
- package/dist/advisory/executor.d.ts.map +1 -1
- package/dist/advisory/executor.js +9 -2
- package/dist/advisory/executor.js.map +1 -1
- package/dist/advisory/executor.test.d.ts +2 -1
- package/dist/advisory/executor.test.d.ts.map +1 -1
- package/dist/advisory/executor.test.js +7 -4
- package/dist/advisory/executor.test.js.map +1 -1
- package/dist/agents/ReactiveAgent.d.ts.map +1 -1
- package/dist/agents/ReactiveAgent.js +2 -0
- package/dist/agents/ReactiveAgent.js.map +1 -1
- package/dist/agents/SupervisorAgent.d.ts.map +1 -1
- package/dist/agents/SupervisorAgent.js +13 -0
- package/dist/agents/SupervisorAgent.js.map +1 -1
- package/dist/bridge/sse/mapper.test.js +2 -2
- package/dist/constants/compaction/index.d.ts.map +1 -1
- package/dist/constants/compaction/index.js +8 -3
- package/dist/constants/compaction/index.js.map +1 -1
- package/dist/constants/sandbox/index.d.ts +21 -0
- package/dist/constants/sandbox/index.d.ts.map +1 -1
- package/dist/constants/sandbox/index.js +30 -0
- package/dist/constants/sandbox/index.js.map +1 -1
- package/dist/constants/tools/index.d.ts.map +1 -1
- package/dist/constants/tools/index.js +33 -2
- package/dist/constants/tools/index.js.map +1 -1
- package/dist/manager/run/persistence.d.ts.map +1 -1
- package/dist/manager/run/persistence.js +35 -5
- package/dist/manager/run/persistence.js.map +1 -1
- package/dist/persona/assembler.d.ts +1 -0
- package/dist/persona/assembler.d.ts.map +1 -1
- package/dist/persona/assembler.js +28 -6
- package/dist/persona/assembler.js.map +1 -1
- package/dist/provider/collect.test.js +2 -2
- package/dist/public-runtime.d.ts +5 -4
- package/dist/public-runtime.d.ts.map +1 -1
- package/dist/public-runtime.js +5 -4
- package/dist/public-runtime.js.map +1 -1
- package/dist/public-tools.d.ts +2 -0
- package/dist/public-tools.d.ts.map +1 -1
- package/dist/public-tools.js +2 -0
- package/dist/public-tools.js.map +1 -1
- package/dist/public-types.d.ts +3 -0
- package/dist/public-types.d.ts.map +1 -1
- package/dist/registry/index.d.ts +2 -0
- package/dist/registry/index.d.ts.map +1 -1
- package/dist/registry/index.js +1 -0
- package/dist/registry/index.js.map +1 -1
- package/dist/registry/tool/execute.d.ts.map +1 -1
- package/dist/registry/tool/execute.js +87 -5
- package/dist/registry/tool/execute.js.map +1 -1
- package/dist/registry/tool/execute.test.d.ts +4 -2
- package/dist/registry/tool/execute.test.d.ts.map +1 -1
- package/dist/registry/tool/execute.test.js +112 -3
- package/dist/registry/tool/execute.test.js.map +1 -1
- package/dist/registry/toolset/catalog.d.ts +42 -0
- package/dist/registry/toolset/catalog.d.ts.map +1 -0
- package/dist/registry/toolset/catalog.js +217 -0
- package/dist/registry/toolset/catalog.js.map +1 -0
- package/dist/registry/toolset/catalog.test.d.ts +2 -0
- package/dist/registry/toolset/catalog.test.d.ts.map +1 -0
- package/dist/registry/toolset/catalog.test.js +85 -0
- package/dist/registry/toolset/catalog.test.js.map +1 -0
- package/dist/runtime/query/__tests__/deferred-tools.test.d.ts +2 -0
- package/dist/runtime/query/__tests__/deferred-tools.test.d.ts.map +1 -0
- package/dist/runtime/query/__tests__/deferred-tools.test.js +147 -0
- package/dist/runtime/query/__tests__/deferred-tools.test.js.map +1 -0
- package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts +2 -0
- package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts.map +1 -0
- package/dist/runtime/query/__tests__/executor-concurrency.test.js +98 -0
- package/dist/runtime/query/__tests__/executor-concurrency.test.js.map +1 -0
- package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js +38 -3
- package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js.map +1 -1
- package/dist/runtime/query/__tests__/prompt.test.js +47 -2
- package/dist/runtime/query/__tests__/prompt.test.js.map +1 -1
- package/dist/runtime/query/__tests__/stream-recovery.test.d.ts +2 -0
- package/dist/runtime/query/__tests__/stream-recovery.test.d.ts.map +1 -0
- package/dist/runtime/query/__tests__/stream-recovery.test.js +126 -0
- package/dist/runtime/query/__tests__/stream-recovery.test.js.map +1 -0
- package/dist/runtime/query/continuation.d.ts +16 -0
- package/dist/runtime/query/continuation.d.ts.map +1 -0
- package/dist/runtime/query/continuation.js +16 -0
- package/dist/runtime/query/continuation.js.map +1 -0
- package/dist/runtime/query/executor.d.ts +3 -0
- package/dist/runtime/query/executor.d.ts.map +1 -1
- package/dist/runtime/query/executor.js +71 -3
- package/dist/runtime/query/executor.js.map +1 -1
- package/dist/runtime/query/index.d.ts.map +1 -1
- package/dist/runtime/query/index.js +19 -3
- package/dist/runtime/query/index.js.map +1 -1
- package/dist/runtime/query/iteration/index.d.ts +22 -0
- package/dist/runtime/query/iteration/index.d.ts.map +1 -1
- package/dist/runtime/query/iteration/index.js +227 -60
- package/dist/runtime/query/iteration/index.js.map +1 -1
- package/dist/runtime/query/iteration/phases/context.d.ts +10 -0
- package/dist/runtime/query/iteration/phases/context.d.ts.map +1 -1
- package/dist/runtime/query/iteration/phases/context.js.map +1 -1
- package/dist/runtime/query/prompt.d.ts.map +1 -1
- package/dist/runtime/query/prompt.js +21 -1
- package/dist/runtime/query/prompt.js.map +1 -1
- package/dist/runtime/query/tooling.d.ts +1 -0
- package/dist/runtime/query/tooling.d.ts.map +1 -1
- package/dist/runtime/query/tooling.js +1 -0
- package/dist/runtime/query/tooling.js.map +1 -1
- package/dist/sandbox/provider/local.d.ts.map +1 -1
- package/dist/sandbox/provider/local.js +32 -1
- package/dist/sandbox/provider/local.js.map +1 -1
- package/dist/session/workspace/__tests__/shared-run.test.d.ts +2 -0
- package/dist/session/workspace/__tests__/shared-run.test.d.ts.map +1 -0
- package/dist/session/workspace/__tests__/shared-run.test.js +147 -0
- package/dist/session/workspace/__tests__/shared-run.test.js.map +1 -0
- package/dist/session/workspace/index.d.ts +2 -0
- package/dist/session/workspace/index.d.ts.map +1 -1
- package/dist/session/workspace/index.js +1 -0
- package/dist/session/workspace/index.js.map +1 -1
- package/dist/session/workspace/shared-run.d.ts +81 -0
- package/dist/session/workspace/shared-run.d.ts.map +1 -0
- package/dist/session/workspace/shared-run.js +251 -0
- package/dist/session/workspace/shared-run.js.map +1 -0
- package/dist/skills/loader.d.ts.map +1 -1
- package/dist/skills/loader.js +36 -6
- package/dist/skills/loader.js.map +1 -1
- package/dist/skills/loader.test.d.ts +2 -0
- package/dist/skills/loader.test.d.ts.map +1 -0
- package/dist/skills/loader.test.js +65 -0
- package/dist/skills/loader.test.js.map +1 -0
- package/dist/streaming/coalesce.test.js +1 -1
- package/dist/tools/builtins/__tests__/edit.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/edit.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/edit.test.js +38 -0
- package/dist/tools/builtins/__tests__/edit.test.js.map +1 -0
- package/dist/tools/builtins/__tests__/payload-budget.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/payload-budget.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/payload-budget.test.js +22 -0
- package/dist/tools/builtins/__tests__/payload-budget.test.js.map +1 -0
- package/dist/tools/builtins/__tests__/read-file.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/read-file.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/read-file.test.js +24 -0
- package/dist/tools/builtins/__tests__/read-file.test.js.map +1 -0
- package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/verify-outputs.test.js +52 -0
- package/dist/tools/builtins/__tests__/verify-outputs.test.js.map +1 -0
- package/dist/tools/builtins/__tests__/write-file.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/write-file.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/write-file.test.js +74 -0
- package/dist/tools/builtins/__tests__/write-file.test.js.map +1 -0
- package/dist/tools/builtins/bash.d.ts.map +1 -1
- package/dist/tools/builtins/bash.js +40 -7
- package/dist/tools/builtins/bash.js.map +1 -1
- package/dist/tools/builtins/edit.d.ts +5 -2
- package/dist/tools/builtins/edit.d.ts.map +1 -1
- package/dist/tools/builtins/edit.js +114 -18
- package/dist/tools/builtins/edit.js.map +1 -1
- package/dist/tools/builtins/index.d.ts +1 -0
- package/dist/tools/builtins/index.d.ts.map +1 -1
- package/dist/tools/builtins/index.js +13 -13
- package/dist/tools/builtins/index.js.map +1 -1
- package/dist/tools/builtins/read-file.d.ts +1 -0
- package/dist/tools/builtins/read-file.d.ts.map +1 -1
- package/dist/tools/builtins/read-file.js +23 -8
- package/dist/tools/builtins/read-file.js.map +1 -1
- package/dist/tools/builtins/search-tools.d.ts.map +1 -1
- package/dist/tools/builtins/search-tools.js +4 -1
- package/dist/tools/builtins/search-tools.js.map +1 -1
- package/dist/tools/builtins/verify-outputs.d.ts +5 -0
- package/dist/tools/builtins/verify-outputs.d.ts.map +1 -0
- package/dist/tools/builtins/verify-outputs.js +103 -0
- package/dist/tools/builtins/verify-outputs.js.map +1 -0
- package/dist/tools/builtins/write-file.d.ts +3 -2
- package/dist/tools/builtins/write-file.d.ts.map +1 -1
- package/dist/tools/builtins/write-file.js +72 -12
- package/dist/tools/builtins/write-file.js.map +1 -1
- package/dist/tools/coordinator/__tests__/agent.test.d.ts +15 -0
- package/dist/tools/coordinator/__tests__/agent.test.d.ts.map +1 -0
- package/dist/tools/coordinator/__tests__/agent.test.js +142 -0
- package/dist/tools/coordinator/__tests__/agent.test.js.map +1 -0
- package/dist/tools/coordinator/__tests__/task-list.test.d.ts +13 -0
- package/dist/tools/coordinator/__tests__/task-list.test.d.ts.map +1 -0
- package/dist/tools/coordinator/__tests__/task-list.test.js +162 -0
- package/dist/tools/coordinator/__tests__/task-list.test.js.map +1 -0
- package/dist/tools/coordinator/agent.d.ts +34 -0
- package/dist/tools/coordinator/agent.d.ts.map +1 -0
- package/dist/tools/coordinator/agent.js +107 -0
- package/dist/tools/coordinator/agent.js.map +1 -0
- package/dist/tools/coordinator/index.d.ts +7 -0
- package/dist/tools/coordinator/index.d.ts.map +1 -1
- package/dist/tools/coordinator/index.js +111 -21
- package/dist/tools/coordinator/index.js.map +1 -1
- package/dist/types/agent/base.d.ts +8 -0
- package/dist/types/agent/base.d.ts.map +1 -1
- package/dist/types/agent/reactive.d.ts +23 -0
- package/dist/types/agent/reactive.d.ts.map +1 -1
- package/dist/types/agent/supervisor.d.ts +41 -0
- package/dist/types/agent/supervisor.d.ts.map +1 -1
- package/dist/types/message/index.d.ts +22 -1
- package/dist/types/message/index.d.ts.map +1 -1
- package/dist/types/message/index.js +7 -2
- package/dist/types/message/index.js.map +1 -1
- package/dist/types/provider/chat.d.ts +2 -9
- package/dist/types/provider/chat.d.ts.map +1 -1
- package/dist/types/run/events.d.ts +6 -0
- package/dist/types/run/events.d.ts.map +1 -1
- package/dist/types/run/events.js.map +1 -1
- package/dist/types/sandbox/index.d.ts +193 -0
- package/dist/types/sandbox/index.d.ts.map +1 -1
- package/dist/types/sandbox/index.js.map +1 -1
- package/dist/types/skills/index.d.ts +2 -0
- package/dist/types/skills/index.d.ts.map +1 -1
- package/dist/types/tool/index.d.ts +22 -0
- package/dist/types/tool/index.d.ts.map +1 -1
- package/dist/types/toolset/index.d.ts +71 -0
- package/dist/types/toolset/index.d.ts.map +1 -0
- package/dist/types/toolset/index.js +2 -0
- package/dist/types/toolset/index.js.map +1 -0
- package/dist/types/workspace/index.d.ts +1 -0
- package/dist/types/workspace/index.d.ts.map +1 -1
- package/dist/types/workspace/shared-run.d.ts +61 -0
- package/dist/types/workspace/shared-run.d.ts.map +1 -0
- package/dist/types/workspace/shared-run.js +2 -0
- package/dist/types/workspace/shared-run.js.map +1 -0
- package/dist/verification/index.d.ts +1 -0
- package/dist/verification/index.d.ts.map +1 -1
- package/dist/verification/index.js +1 -0
- package/dist/verification/index.js.map +1 -1
- package/dist/verification/presets.d.ts +53 -0
- package/dist/verification/presets.d.ts.map +1 -0
- package/dist/verification/presets.js +70 -0
- package/dist/verification/presets.js.map +1 -0
- package/dist/verification/presets.test.d.ts +16 -0
- package/dist/verification/presets.test.d.ts.map +1 -0
- package/dist/verification/presets.test.js +79 -0
- package/dist/verification/presets.test.js.map +1 -0
- package/package.json +3 -2
- package/src/advisory/executor.test.ts +7 -4
- package/src/advisory/executor.ts +11 -2
- package/src/agents/ReactiveAgent.ts +2 -0
- package/src/agents/SupervisorAgent.ts +13 -0
- package/src/bridge/sse/mapper.test.ts +2 -2
- package/src/constants/compaction/index.ts +8 -3
- package/src/constants/sandbox/index.ts +37 -0
- package/src/constants/tools/index.ts +33 -2
- package/src/manager/run/persistence.ts +34 -6
- package/src/persona/assembler.ts +31 -8
- package/src/provider/collect.test.ts +2 -2
- package/src/public-runtime.ts +14 -1
- package/src/public-tools.ts +2 -0
- package/src/public-types.ts +7 -0
- package/src/registry/index.ts +7 -0
- package/src/registry/tool/execute.test.ts +132 -3
- package/src/registry/tool/execute.ts +94 -9
- package/src/registry/toolset/catalog.test.ts +97 -0
- package/src/registry/toolset/catalog.ts +283 -0
- package/src/runtime/query/__tests__/deferred-tools.test.ts +183 -0
- package/src/runtime/query/__tests__/executor-concurrency.test.ts +122 -0
- package/src/runtime/query/__tests__/executor-plugin-hooks.test.ts +48 -3
- package/src/runtime/query/__tests__/prompt.test.ts +51 -2
- package/src/runtime/query/__tests__/stream-recovery.test.ts +156 -0
- package/src/runtime/query/continuation.ts +16 -0
- package/src/runtime/query/executor.ts +82 -13
- package/src/runtime/query/index.ts +24 -3
- package/src/runtime/query/iteration/index.ts +263 -68
- package/src/runtime/query/iteration/phases/context.ts +10 -0
- package/src/runtime/query/prompt.ts +17 -1
- package/src/runtime/query/tooling.ts +2 -0
- package/src/sandbox/provider/local.ts +33 -0
- package/src/session/workspace/__tests__/shared-run.test.ts +181 -0
- package/src/session/workspace/index.ts +6 -0
- package/src/session/workspace/shared-run.ts +316 -0
- package/src/skills/loader.test.ts +89 -0
- package/src/skills/loader.ts +37 -6
- package/src/streaming/coalesce.test.ts +1 -1
- package/src/tools/builtins/__tests__/edit.test.ts +57 -0
- package/src/tools/builtins/__tests__/payload-budget.test.ts +29 -0
- package/src/tools/builtins/__tests__/read-file.test.ts +31 -0
- package/src/tools/builtins/__tests__/verify-outputs.test.ts +71 -0
- package/src/tools/builtins/__tests__/write-file.test.ts +97 -0
- package/src/tools/builtins/bash.ts +48 -7
- package/src/tools/builtins/edit.ts +162 -27
- package/src/tools/builtins/index.ts +13 -13
- package/src/tools/builtins/read-file.ts +31 -8
- package/src/tools/builtins/search-tools.ts +5 -1
- package/src/tools/builtins/verify-outputs.ts +126 -0
- package/src/tools/builtins/write-file.ts +83 -14
- package/src/tools/coordinator/__tests__/agent.test.ts +172 -0
- package/src/tools/coordinator/__tests__/task-list.test.ts +182 -0
- package/src/tools/coordinator/agent.ts +157 -0
- package/src/tools/coordinator/index.ts +128 -22
- package/src/types/agent/base.ts +8 -0
- package/src/types/agent/reactive.ts +25 -0
- package/src/types/agent/supervisor.ts +45 -0
- package/src/types/message/index.ts +32 -2
- package/src/types/provider/chat.ts +2 -9
- package/src/types/run/events.ts +6 -0
- package/src/types/sandbox/index.ts +219 -0
- package/src/types/skills/index.ts +4 -0
- package/src/types/tool/index.ts +24 -0
- package/src/types/toolset/index.ts +86 -0
- package/src/types/workspace/index.ts +9 -0
- package/src/types/workspace/shared-run.ts +65 -0
- package/src/verification/index.ts +1 -0
- package/src/verification/presets.test.ts +112 -0
- package/src/verification/presets.ts +72 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Behavioural contract for the `agent_task_list` coordinator tool:
|
|
3
|
+
*
|
|
4
|
+
* - Returns every task the gateway knows about, with state + timing.
|
|
5
|
+
* - Filters by state when the input narrows it.
|
|
6
|
+
* - Emits a per-state summary in the data payload — what the supervisor
|
|
7
|
+
* reads to decide "done vs not done" before calling verify_outputs.
|
|
8
|
+
* - Distinct from the plan-task store's `task_list` (subject/blockedBy);
|
|
9
|
+
* listing them under different names avoids ToolRegistry collisions when
|
|
10
|
+
* both surfaces are wired into the same agent.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { describe, expect, it } from 'vitest'
|
|
14
|
+
|
|
15
|
+
import type { TaskGateway, TaskHandle } from '../../../types/agent/gateway.js'
|
|
16
|
+
import type { TaskId } from '../../../types/ids/index.js'
|
|
17
|
+
import type { ToolContext } from '../../../types/tool/index.js'
|
|
18
|
+
import { buildCoordinatorTools } from '../index.js'
|
|
19
|
+
|
|
20
|
+
function makeContext(): ToolContext {
|
|
21
|
+
return {
|
|
22
|
+
runId: 'run_test' as never,
|
|
23
|
+
workingDirectory: '/tmp/test',
|
|
24
|
+
abortSignal: new AbortController().signal,
|
|
25
|
+
env: {},
|
|
26
|
+
log: () => {},
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function gatewayWith(handles: TaskHandle[]): TaskGateway {
|
|
31
|
+
return {
|
|
32
|
+
async createTask() {
|
|
33
|
+
throw new Error('not used')
|
|
34
|
+
},
|
|
35
|
+
async waitForTask() {
|
|
36
|
+
throw new Error('not used')
|
|
37
|
+
},
|
|
38
|
+
async continueTask() {},
|
|
39
|
+
cancelTask() {},
|
|
40
|
+
getTask(id) {
|
|
41
|
+
return handles.find((h) => h.taskId === id)
|
|
42
|
+
},
|
|
43
|
+
listTasks() {
|
|
44
|
+
return handles
|
|
45
|
+
},
|
|
46
|
+
onTaskCompleted() {
|
|
47
|
+
return () => {}
|
|
48
|
+
},
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function handle(input: {
|
|
53
|
+
id: string
|
|
54
|
+
agentId: string
|
|
55
|
+
state: TaskHandle['state']
|
|
56
|
+
createdAt: number
|
|
57
|
+
completedAt?: number
|
|
58
|
+
lastError?: string
|
|
59
|
+
}): TaskHandle {
|
|
60
|
+
return {
|
|
61
|
+
taskId: input.id as TaskId,
|
|
62
|
+
agentId: input.agentId,
|
|
63
|
+
state: input.state,
|
|
64
|
+
createdAt: input.createdAt,
|
|
65
|
+
completedAt: input.completedAt,
|
|
66
|
+
result: input.lastError
|
|
67
|
+
? ({
|
|
68
|
+
runId: 'run_x' as never,
|
|
69
|
+
status: input.state === 'failed' ? 'failed' : 'completed',
|
|
70
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 } as never,
|
|
71
|
+
cost: { inputCostUsd: 0, outputCostUsd: 0, totalCostUsd: 0 } as never,
|
|
72
|
+
iterations: 1,
|
|
73
|
+
durationMs: 0,
|
|
74
|
+
messages: [],
|
|
75
|
+
result: '',
|
|
76
|
+
lastError: input.lastError,
|
|
77
|
+
} as never)
|
|
78
|
+
: undefined,
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function findAgentTaskList(gateway: TaskGateway) {
|
|
83
|
+
const tools = buildCoordinatorTools({
|
|
84
|
+
gateway,
|
|
85
|
+
workingDirectory: '/tmp/test',
|
|
86
|
+
allowedAgentIds: ['solution-architecture', 'enterprise-architecture'],
|
|
87
|
+
})
|
|
88
|
+
const t = tools.find((tool) => tool.name === 'agent_task_list')
|
|
89
|
+
if (!t) throw new Error('agent_task_list tool missing from coordinator builder')
|
|
90
|
+
return t
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
describe('coordinator agent_task_list tool', () => {
|
|
94
|
+
it('lists every task with state, agent, and timing', async () => {
|
|
95
|
+
const gateway = gatewayWith([
|
|
96
|
+
handle({
|
|
97
|
+
id: 'task_a',
|
|
98
|
+
agentId: 'solution-architecture',
|
|
99
|
+
state: 'completed',
|
|
100
|
+
createdAt: 0,
|
|
101
|
+
completedAt: 5000,
|
|
102
|
+
}),
|
|
103
|
+
handle({
|
|
104
|
+
id: 'task_b',
|
|
105
|
+
agentId: 'enterprise-architecture',
|
|
106
|
+
state: 'running',
|
|
107
|
+
createdAt: 1000,
|
|
108
|
+
}),
|
|
109
|
+
handle({
|
|
110
|
+
id: 'task_c',
|
|
111
|
+
agentId: 'solution-architecture',
|
|
112
|
+
state: 'failed',
|
|
113
|
+
createdAt: 2000,
|
|
114
|
+
completedAt: 4000,
|
|
115
|
+
lastError: 'bash exit 1',
|
|
116
|
+
}),
|
|
117
|
+
])
|
|
118
|
+
|
|
119
|
+
const tool = findAgentTaskList(gateway)
|
|
120
|
+
const result = await tool.execute({}, makeContext())
|
|
121
|
+
expect(result.success).toBe(true)
|
|
122
|
+
expect(result.output).toMatch(/Tasks: 3 total/)
|
|
123
|
+
expect(result.output).toMatch(/1 running/)
|
|
124
|
+
expect(result.output).toMatch(/1 completed/)
|
|
125
|
+
expect(result.output).toMatch(/1 failed/)
|
|
126
|
+
expect(result.output).toMatch(/task_a → solution-architecture \[completed\]/)
|
|
127
|
+
expect(result.output).toMatch(/task_c .* error: bash exit 1/)
|
|
128
|
+
const data = result.data as { items: unknown[]; summary: { total: number } }
|
|
129
|
+
expect(data.summary.total).toBe(3)
|
|
130
|
+
expect(data.items).toHaveLength(3)
|
|
131
|
+
})
|
|
132
|
+
|
|
133
|
+
it('filters by state', async () => {
|
|
134
|
+
const gateway = gatewayWith([
|
|
135
|
+
handle({
|
|
136
|
+
id: 'task_a',
|
|
137
|
+
agentId: 'solution-architecture',
|
|
138
|
+
state: 'completed',
|
|
139
|
+
createdAt: 0,
|
|
140
|
+
completedAt: 5000,
|
|
141
|
+
}),
|
|
142
|
+
handle({
|
|
143
|
+
id: 'task_b',
|
|
144
|
+
agentId: 'enterprise-architecture',
|
|
145
|
+
state: 'running',
|
|
146
|
+
createdAt: 1000,
|
|
147
|
+
}),
|
|
148
|
+
])
|
|
149
|
+
|
|
150
|
+
const tool = findAgentTaskList(gateway)
|
|
151
|
+
const result = await tool.execute({ state: 'running' }, makeContext())
|
|
152
|
+
expect(result.success).toBe(true)
|
|
153
|
+
const data = result.data as { items: Array<{ task_id: string }> }
|
|
154
|
+
expect(data.items).toHaveLength(1)
|
|
155
|
+
expect(data.items[0]?.task_id).toBe('task_b')
|
|
156
|
+
expect(result.output).not.toMatch(/task_a/)
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
it('handles an empty gateway', async () => {
|
|
160
|
+
const tool = findAgentTaskList(gatewayWith([]))
|
|
161
|
+
const result = await tool.execute({}, makeContext())
|
|
162
|
+
expect(result.success).toBe(true)
|
|
163
|
+
expect(result.output).toMatch(/Tasks: 0 total/)
|
|
164
|
+
expect(result.output).toMatch(/no tasks launched yet/)
|
|
165
|
+
})
|
|
166
|
+
|
|
167
|
+
it('does not collide with the plan-task store `task_list` tool name', async () => {
|
|
168
|
+
// Regression: an earlier cut registered the agent-task gateway
|
|
169
|
+
// inspector under the same `task_list` name as the plan-task store
|
|
170
|
+
// list tool, which would shadow one of them in any agent that wired
|
|
171
|
+
// both surfaces together. The agent inspector now lives under
|
|
172
|
+
// `agent_task_list`; this test guards the rename.
|
|
173
|
+
const coordinatorTools = buildCoordinatorTools({
|
|
174
|
+
gateway: gatewayWith([]),
|
|
175
|
+
workingDirectory: '/tmp/test',
|
|
176
|
+
allowedAgentIds: ['solution-architecture'],
|
|
177
|
+
})
|
|
178
|
+
const names = coordinatorTools.map((t) => t.name)
|
|
179
|
+
expect(names).toContain('agent_task_list')
|
|
180
|
+
expect(names).not.toContain('task_list')
|
|
181
|
+
})
|
|
182
|
+
})
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
|
|
3
|
+
import type { AgentRuntimeContext } from '../../types/agent/base.js'
|
|
4
|
+
import type { TaskGateway } from '../../types/agent/gateway.js'
|
|
5
|
+
import type { ToolDefinition } from '../../types/tool/index.js'
|
|
6
|
+
import { defineTool } from '../defineTool.js'
|
|
7
|
+
|
|
8
|
+
import type { TaskLaunchedCallback } from './index.js'
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Build the canonical Claude Code `Agent` tool — synchronous subagent
|
|
12
|
+
* delegation that mirrors what Claude is trained against in
|
|
13
|
+
* `code.claude.com/docs/en/sub-agents`.
|
|
14
|
+
*
|
|
15
|
+
* Semantics: parent calls `Agent({ description, prompt, subagent_type })`,
|
|
16
|
+
* the runtime spawns the chosen subagent with its own context window,
|
|
17
|
+
* the parent's tool call BLOCKS until the subagent finishes, and the
|
|
18
|
+
* subagent's final text comes back as the tool result. Intermediate
|
|
19
|
+
* subagent tool calls are isolated — only the summary surfaces to
|
|
20
|
+
* the parent.
|
|
21
|
+
*
|
|
22
|
+
* This is **NOT** the same shape as the legacy `create_task` /
|
|
23
|
+
* `continue_task` / `cancel_task` trio that this package ships
|
|
24
|
+
* alongside it: those are non-blocking and use a `<task-notification>`
|
|
25
|
+
* callback model. The async pattern is useful for hosts that want a
|
|
26
|
+
* work-queue surface, but it is not what Claude Code trained against.
|
|
27
|
+
* For free agentic alignment, prefer the canonical `Agent` tool; keep
|
|
28
|
+
* the legacy coordinator tools only when you genuinely need
|
|
29
|
+
* fire-and-forget multi-task fan-out.
|
|
30
|
+
*/
|
|
31
|
+
export interface AgentToolOptions {
|
|
32
|
+
gateway: TaskGateway
|
|
33
|
+
workingDirectory: string
|
|
34
|
+
runtimeContext?: AgentRuntimeContext
|
|
35
|
+
allowedAgentIds: string[]
|
|
36
|
+
|
|
37
|
+
onTaskLaunched?: TaskLaunchedCallback
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function buildAgentTool(opts: AgentToolOptions): ToolDefinition {
|
|
41
|
+
const { gateway, allowedAgentIds: agentIds, onTaskLaunched } = opts
|
|
42
|
+
const cwd = opts.workingDirectory
|
|
43
|
+
|
|
44
|
+
const subagentTypeEnum =
|
|
45
|
+
agentIds.length > 0 ? z.enum(agentIds as [string, ...string[]]) : z.string()
|
|
46
|
+
|
|
47
|
+
return defineTool({
|
|
48
|
+
name: 'Agent',
|
|
49
|
+
description: `Delegate a task to a specialized subagent. BLOCKING: returns when the subagent has finished, with the subagent's final text as the tool result. The subagent runs in its own context window and cannot see your conversation — include all necessary context in the prompt. Available subagents: ${agentIds.join(', ')}. To run multiple subagents in parallel, call this tool multiple times in a single response.`,
|
|
50
|
+
inputSchema: z.object({
|
|
51
|
+
description: z.string().describe('Short label for tracking (shown to the user)'),
|
|
52
|
+
prompt: z
|
|
53
|
+
.string()
|
|
54
|
+
.describe('Self-contained task description with all context the subagent needs'),
|
|
55
|
+
subagent_type:
|
|
56
|
+
agentIds.length === 1
|
|
57
|
+
? subagentTypeEnum
|
|
58
|
+
.optional()
|
|
59
|
+
.describe(`Which subagent to run (defaults to the only one: ${agentIds[0]})`)
|
|
60
|
+
: subagentTypeEnum.describe('Which subagent to run'),
|
|
61
|
+
}),
|
|
62
|
+
category: 'custom',
|
|
63
|
+
permissions: [],
|
|
64
|
+
readOnly: false,
|
|
65
|
+
destructive: false,
|
|
66
|
+
concurrencySafe: true,
|
|
67
|
+
async execute({ description, prompt, subagent_type }, context) {
|
|
68
|
+
// With a single registered subagent the type is optional — default to
|
|
69
|
+
// it so the model can't trip the "subagent_type required" validation.
|
|
70
|
+
const agentId = subagent_type ?? (agentIds.length === 1 ? agentIds[0] : undefined)
|
|
71
|
+
if (!agentId) {
|
|
72
|
+
return {
|
|
73
|
+
success: false,
|
|
74
|
+
output: '',
|
|
75
|
+
error: `subagent_type is required — choose one of: ${agentIds.join(', ')}`,
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
const handle = await gateway.createTask({
|
|
79
|
+
agentId,
|
|
80
|
+
prompt,
|
|
81
|
+
workingDirectory: cwd,
|
|
82
|
+
runtimeContext: opts.runtimeContext,
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
onTaskLaunched?.(handle.taskId, {
|
|
86
|
+
agentId,
|
|
87
|
+
description,
|
|
88
|
+
// Same canonical-envelope plumbing as coordinator/index.ts
|
|
89
|
+
// (ses_009-task-notification-envelope). For Agent-tool path
|
|
90
|
+
// the subagent run is awaited synchronously below, so this
|
|
91
|
+
// id is only used if a probe / hook unexpectedly forks the
|
|
92
|
+
// completion to the background notification channel.
|
|
93
|
+
originalToolUseId: context.toolUseId,
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
const completed = await gateway.waitForTask(handle.taskId)
|
|
97
|
+
|
|
98
|
+
// Two layers can disagree on whether the subagent succeeded:
|
|
99
|
+
//
|
|
100
|
+
// 1. `TaskHandle.state` — the gateway's terminal task state.
|
|
101
|
+
// Some gateways (e.g. vandal's) explicitly map
|
|
102
|
+
// `result.status !== 'completed'` to `state = 'failed'`,
|
|
103
|
+
// others (e.g. SDK's `LocalTaskGateway`) just forward
|
|
104
|
+
// whatever the AgentManager set, which does not always
|
|
105
|
+
// reflect run-level failure.
|
|
106
|
+
// 2. `BaseAgentResult.status` — the run's own status. The
|
|
107
|
+
// canonical source of truth for whether the agent actually
|
|
108
|
+
// finished its work; `lastError` carries the failure
|
|
109
|
+
// message when set.
|
|
110
|
+
//
|
|
111
|
+
// Treat the subagent as successful only when BOTH agree.
|
|
112
|
+
// Reporting a failed subagent as successful would silently
|
|
113
|
+
// hand the parent garbage output and make debugging
|
|
114
|
+
// impossible, which is what Codex flagged on the first cut.
|
|
115
|
+
const runStatus = completed.result?.status
|
|
116
|
+
const succeeded =
|
|
117
|
+
completed.state === 'completed' && (runStatus === undefined || runStatus === 'completed')
|
|
118
|
+
|
|
119
|
+
const resultText =
|
|
120
|
+
typeof completed.result?.result === 'string'
|
|
121
|
+
? completed.result.result
|
|
122
|
+
: completed.result?.result !== undefined
|
|
123
|
+
? JSON.stringify(completed.result.result)
|
|
124
|
+
: ''
|
|
125
|
+
|
|
126
|
+
if (!succeeded) {
|
|
127
|
+
const failureLabel =
|
|
128
|
+
completed.state !== 'completed' ? completed.state : (runStatus ?? 'failed')
|
|
129
|
+
const detail =
|
|
130
|
+
completed.result?.lastError ?? resultText ?? '(subagent provided no failure detail)'
|
|
131
|
+
return {
|
|
132
|
+
success: false,
|
|
133
|
+
output: '',
|
|
134
|
+
error: `Subagent ${agentId} ${failureLabel}: ${detail}`,
|
|
135
|
+
data: {
|
|
136
|
+
task_id: handle.taskId,
|
|
137
|
+
subagent_type: agentId,
|
|
138
|
+
state: completed.state,
|
|
139
|
+
status: runStatus,
|
|
140
|
+
lastError: completed.result?.lastError,
|
|
141
|
+
},
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return {
|
|
146
|
+
success: true,
|
|
147
|
+
output: resultText || '(subagent returned no text)',
|
|
148
|
+
data: {
|
|
149
|
+
task_id: handle.taskId,
|
|
150
|
+
subagent_type: agentId,
|
|
151
|
+
state: completed.state,
|
|
152
|
+
status: runStatus,
|
|
153
|
+
},
|
|
154
|
+
}
|
|
155
|
+
},
|
|
156
|
+
})
|
|
157
|
+
}
|
|
@@ -13,6 +13,13 @@ export type TaskLaunchedCallback = (
|
|
|
13
13
|
agentId: string
|
|
14
14
|
description: string
|
|
15
15
|
planTaskId?: string
|
|
16
|
+
/**
|
|
17
|
+
* The assistant `tool_use_id` that dispatched this task.
|
|
18
|
+
* Threaded from `ToolContext.toolUseId` so the runtime can
|
|
19
|
+
* later emit a canonical `tool_result` content block bound
|
|
20
|
+
* to the same id when the background task completes.
|
|
21
|
+
*/
|
|
22
|
+
originalToolUseId?: string
|
|
16
23
|
},
|
|
17
24
|
) => void
|
|
18
25
|
|
|
@@ -38,21 +45,29 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
|
|
|
38
45
|
taskStore,
|
|
39
46
|
runId,
|
|
40
47
|
getPlanManager,
|
|
41
|
-
onTaskLaunched
|
|
48
|
+
// `onTaskLaunched` was the entry point for the old
|
|
49
|
+
// non-blocking + envelope-injection flow. create_task is now
|
|
50
|
+
// blocking, so the callback is no longer wired here.
|
|
51
|
+
// Intentionally not destructured to keep the unused-binding
|
|
52
|
+
// lint clean; callers can still pass it for backwards
|
|
53
|
+
// compatibility (Agent tool consumes it from its own path).
|
|
42
54
|
} = opts
|
|
43
55
|
const cwd = opts.workingDirectory
|
|
56
|
+
void opts.onTaskLaunched
|
|
44
57
|
|
|
45
58
|
const agentIdEnum = agentIds.length > 0 ? z.enum(agentIds as [string, ...string[]]) : z.string()
|
|
46
59
|
|
|
47
60
|
const createTask = defineTool({
|
|
48
61
|
name: 'create_task',
|
|
49
|
-
description: `Launch a task on a specialized agent.
|
|
62
|
+
description: `Launch a task on a specialized agent and await its result. BLOCKING: returns the agent's final output as this call's tool_result. Available agents: ${agentIds.join(', ')}. Prefer compact assignments; for large context, write/read shared workspace files and pass filenames or references. To launch multiple tasks in parallel, call this tool multiple times in a single assistant turn — the runtime executes every tool_use block from one response concurrently and delivers all tool_results together, so 'fan out 8 specialists' is one assistant message with 8 create_task blocks.`,
|
|
50
63
|
inputSchema: z.object({
|
|
51
64
|
agent_id: agentIdEnum.describe('Which agent to run'),
|
|
52
65
|
prompt: z
|
|
53
66
|
.string()
|
|
54
|
-
.describe(
|
|
55
|
-
|
|
67
|
+
.describe(
|
|
68
|
+
'Self-contained assignment for the agent. For large generated content, prefer workspace file references so provider output-token limits do not cut off the tool call.',
|
|
69
|
+
),
|
|
70
|
+
description: z.string().describe('Short summary for tracking, shown to the user.'),
|
|
56
71
|
plan_task_id: z
|
|
57
72
|
.string()
|
|
58
73
|
.optional()
|
|
@@ -65,7 +80,7 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
|
|
|
65
80
|
readOnly: false,
|
|
66
81
|
destructive: false,
|
|
67
82
|
concurrencySafe: true,
|
|
68
|
-
async execute({ agent_id, prompt, description, plan_task_id }) {
|
|
83
|
+
async execute({ agent_id, prompt, description, plan_task_id }, _context) {
|
|
69
84
|
let resolvedPlanTaskId = plan_task_id
|
|
70
85
|
|
|
71
86
|
if (taskStore) {
|
|
@@ -93,22 +108,36 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
|
|
|
93
108
|
runtimeContext: opts.runtimeContext,
|
|
94
109
|
})
|
|
95
110
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
111
|
+
// Industrial-standard Anthropic tool pattern: tool returns
|
|
112
|
+
// its real result as the tool_result for the dispatching
|
|
113
|
+
// tool_use. Parallel fan-out happens at the executor layer
|
|
114
|
+
// — when the supervisor emits N create_task blocks in one
|
|
115
|
+
// assistant turn, the runtime runs them with Promise.all
|
|
116
|
+
// and delivers all N tool_results together. No async
|
|
117
|
+
// envelope injection, no second tool_result for the same
|
|
118
|
+
// tool_use_id (which Anthropic rejects with 400).
|
|
119
|
+
const completed = await gateway.waitForTask(handle.taskId)
|
|
120
|
+
const success = completed.state === 'completed'
|
|
121
|
+
const resultText =
|
|
122
|
+
completed.result?.result ??
|
|
123
|
+
completed.result?.lastError ??
|
|
124
|
+
`Task finished with state: ${completed.state}`
|
|
125
|
+
|
|
126
|
+
if (resolvedPlanTaskId && taskStore) {
|
|
127
|
+
await taskStore.update(resolvedPlanTaskId as `task_${string}`, {
|
|
128
|
+
status: 'completed',
|
|
129
|
+
description: success ? undefined : `Failed: ${resultText.substring(0, 200)}`,
|
|
101
130
|
})
|
|
102
131
|
}
|
|
103
132
|
|
|
104
133
|
return {
|
|
105
|
-
success
|
|
106
|
-
output:
|
|
134
|
+
success,
|
|
135
|
+
output: resultText,
|
|
107
136
|
data: {
|
|
108
137
|
task_id: handle.taskId,
|
|
109
138
|
agent_id,
|
|
110
139
|
description,
|
|
111
|
-
state:
|
|
140
|
+
state: completed.state,
|
|
112
141
|
plan_task_id: resolvedPlanTaskId,
|
|
113
142
|
},
|
|
114
143
|
}
|
|
@@ -118,11 +147,9 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
|
|
|
118
147
|
const continueTask = defineTool({
|
|
119
148
|
name: 'continue_task',
|
|
120
149
|
description:
|
|
121
|
-
|
|
150
|
+
"Send a follow-up message to a previously completed task and await the agent's next reply. BLOCKING: returns the agent's new output as this call's tool_result, the same shape as create_task. Only use this with a task_id from a previous create_task. To run multiple follow-ups in parallel, call this tool multiple times in a single assistant turn.",
|
|
122
151
|
inputSchema: z.object({
|
|
123
|
-
task_id: z
|
|
124
|
-
.string()
|
|
125
|
-
.describe('Agent task ID from a previous create_task or task-notification'),
|
|
152
|
+
task_id: z.string().describe('Agent task ID from a previous create_task'),
|
|
126
153
|
message: z.string().describe('Follow-up instruction for the agent'),
|
|
127
154
|
}),
|
|
128
155
|
category: 'custom',
|
|
@@ -132,11 +159,22 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
|
|
|
132
159
|
concurrencySafe: true,
|
|
133
160
|
async execute({ task_id, message }) {
|
|
134
161
|
await gateway.continueTask(task_id as TaskId, message)
|
|
135
|
-
|
|
162
|
+
// Mirror create_task's blocking pattern: await the new
|
|
163
|
+
// completion and return the agent's output inline. The
|
|
164
|
+
// previous non-blocking shape ('You will receive a
|
|
165
|
+
// task-notification…') relied on a global
|
|
166
|
+
// onTaskCompleted listener that the iteration loop
|
|
167
|
+
// no longer registers (envelope path is dead).
|
|
168
|
+
const completed = await gateway.waitForTask(task_id as TaskId)
|
|
169
|
+
const success = completed.state === 'completed'
|
|
170
|
+
const resultText =
|
|
171
|
+
completed.result?.result ??
|
|
172
|
+
completed.result?.lastError ??
|
|
173
|
+
`Task finished with state: ${completed.state}`
|
|
136
174
|
return {
|
|
137
|
-
success
|
|
138
|
-
output:
|
|
139
|
-
data: { task_id, state:
|
|
175
|
+
success,
|
|
176
|
+
output: resultText,
|
|
177
|
+
data: { task_id, state: completed.state },
|
|
140
178
|
}
|
|
141
179
|
},
|
|
142
180
|
})
|
|
@@ -163,7 +201,75 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
|
|
|
163
201
|
},
|
|
164
202
|
})
|
|
165
203
|
|
|
166
|
-
const
|
|
204
|
+
const agentTaskList = defineTool({
|
|
205
|
+
name: 'agent_task_list',
|
|
206
|
+
description:
|
|
207
|
+
"Inspect the live state of every agent task launched on this gateway via create_task: returns each task's id, agent, state (pending/running/completed/failed/canceled), and timing. Distinct from the plan-task store's `task_list` (which lists planning tasks): this tool lists running/completed worker invocations. Use it BEFORE declaring multi-worker work done — confirm every launched task reached `completed`, none still `running` or `failed`. Read-only and safe to call repeatedly.",
|
|
208
|
+
inputSchema: z.object({
|
|
209
|
+
state: z
|
|
210
|
+
.enum(['pending', 'running', 'completed', 'failed', 'canceled'])
|
|
211
|
+
.optional()
|
|
212
|
+
.describe('Filter by terminal/non-terminal state. Omit to list every task.'),
|
|
213
|
+
}),
|
|
214
|
+
category: 'custom',
|
|
215
|
+
permissions: [],
|
|
216
|
+
readOnly: true,
|
|
217
|
+
destructive: false,
|
|
218
|
+
concurrencySafe: true,
|
|
219
|
+
async execute({ state }) {
|
|
220
|
+
const handles = gateway.listTasks()
|
|
221
|
+
const filtered = state ? handles.filter((h) => h.state === state) : handles
|
|
222
|
+
const items = filtered.map((h) => {
|
|
223
|
+
const runStatus = h.result?.status
|
|
224
|
+
const lastError = h.result?.lastError ?? undefined
|
|
225
|
+
return {
|
|
226
|
+
task_id: h.taskId,
|
|
227
|
+
agent_id: h.agentId,
|
|
228
|
+
state: h.state,
|
|
229
|
+
run_status: runStatus,
|
|
230
|
+
created_at: new Date(h.createdAt).toISOString(),
|
|
231
|
+
completed_at: h.completedAt ? new Date(h.completedAt).toISOString() : null,
|
|
232
|
+
duration_ms: h.completedAt ? h.completedAt - h.createdAt : null,
|
|
233
|
+
last_error: lastError,
|
|
234
|
+
}
|
|
235
|
+
})
|
|
236
|
+
const summary = {
|
|
237
|
+
total: handles.length,
|
|
238
|
+
running: handles.filter((h) => h.state === 'running').length,
|
|
239
|
+
completed: handles.filter((h) => h.state === 'completed').length,
|
|
240
|
+
failed: handles.filter((h) => h.state === 'failed').length,
|
|
241
|
+
canceled: handles.filter((h) => h.state === 'canceled').length,
|
|
242
|
+
}
|
|
243
|
+
const lines = items.length
|
|
244
|
+
? items.map(
|
|
245
|
+
(i) =>
|
|
246
|
+
`- ${i.task_id} → ${i.agent_id} [${i.state}${i.run_status && i.run_status !== i.state ? ` / ${i.run_status}` : ''}]${
|
|
247
|
+
i.duration_ms !== null ? ` (${Math.round(i.duration_ms / 1000)}s)` : ''
|
|
248
|
+
}${i.last_error ? ` — error: ${i.last_error.slice(0, 200)}` : ''}`,
|
|
249
|
+
)
|
|
250
|
+
: ['(no tasks launched yet)']
|
|
251
|
+
const header = `Tasks: ${summary.total} total — ${summary.running} running, ${summary.completed} completed, ${summary.failed} failed, ${summary.canceled} canceled`
|
|
252
|
+
return {
|
|
253
|
+
success: true,
|
|
254
|
+
output: [header, '', ...lines].join('\n'),
|
|
255
|
+
data: { items, summary },
|
|
256
|
+
}
|
|
257
|
+
},
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
// `continue_task` was a follow-up channel for a still-alive worker
|
|
261
|
+
// task. With `create_task` now blocking + tool_result returning
|
|
262
|
+
// the worker's final output, every worker reaches a terminal
|
|
263
|
+
// state by the time the supervisor wants to follow up — and the
|
|
264
|
+
// agent manager rejects `continue` on terminal tasks. The
|
|
265
|
+
// industrial pattern is to issue a fresh `create_task` that
|
|
266
|
+
// references the prior worker's output path, so we drop
|
|
267
|
+
// `continue_task` from the registered surface entirely. The
|
|
268
|
+
// definition stays in this file for now in case a future
|
|
269
|
+
// non-default gateway (one that keeps the worker process alive
|
|
270
|
+
// for follow-ups) wants to re-register it.
|
|
271
|
+
void continueTask
|
|
272
|
+
const tools: ToolDefinition[] = [createTask, cancelTask, agentTaskList]
|
|
167
273
|
|
|
168
274
|
if (getPlanManager) {
|
|
169
275
|
const approvePlan = defineTool({
|
package/src/types/agent/base.ts
CHANGED
|
@@ -65,6 +65,14 @@ export type RuntimeToolOverrides = Record<string, ToolAvailability | 'disabled'>
|
|
|
65
65
|
export interface AgentRuntimeContext {
|
|
66
66
|
label?: string
|
|
67
67
|
outputDirectory?: string
|
|
68
|
+
/**
|
|
69
|
+
* Optional working/scratch directory the runtime exposes to the
|
|
70
|
+
* agent — sibling to `outputDirectory`, invisible to the
|
|
71
|
+
* output collector. Mirrors the Anthropic Cowork pattern
|
|
72
|
+
* where `/home/claude` is scratch and `/mnt/user-data/outputs` is
|
|
73
|
+
* user-visible.
|
|
74
|
+
*/
|
|
75
|
+
scratchDirectory?: string
|
|
68
76
|
outputFileMarker?: string
|
|
69
77
|
notes?: readonly string[]
|
|
70
78
|
}
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import type { AdvisoryConfig } from '../advisory/index.js'
|
|
2
2
|
import type { AgentPersona } from '../persona/index.js'
|
|
3
3
|
import type { LLMProvider } from '../provider/index.js'
|
|
4
|
+
import type { SandboxProvider } from '../sandbox/index.js'
|
|
4
5
|
import type { Skill } from '../skills/index.js'
|
|
5
6
|
import type { ToolRegistryContract } from '../tool/index.js'
|
|
7
|
+
import type { VerificationGateConfig } from '../verification/index.js'
|
|
6
8
|
import type { BaseAgentConfig, BaseAgentResult } from './base.js'
|
|
7
9
|
|
|
8
10
|
export interface ReactiveAgentConfig extends BaseAgentConfig {
|
|
@@ -17,6 +19,29 @@ export interface ReactiveAgentConfig extends BaseAgentConfig {
|
|
|
17
19
|
tools: ToolRegistryContract
|
|
18
20
|
|
|
19
21
|
advisory?: AdvisoryConfig
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Optional capability-aware deny/allow gate for child tool calls.
|
|
25
|
+
* Mirrors the same field on `SupervisorAgentConfig`; when omitted,
|
|
26
|
+
* `drainQuery` falls back to its `autoApproveHandler` default
|
|
27
|
+
* (every tool call auto-approves, no policy applied). Hosts that
|
|
28
|
+
* trust their sandbox should still pass at least
|
|
29
|
+
* `{ enabled: true, denyDangerousPatterns: true, ... }` so the
|
|
30
|
+
* canonical brick patterns hard-deny instead of executing
|
|
31
|
+
* silently.
|
|
32
|
+
*/
|
|
33
|
+
verificationGate?: VerificationGateConfig
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Optional ephemeral sandbox provider. When set, drainQuery creates
|
|
37
|
+
* a sandbox via `provider.create()` before the iteration loop and
|
|
38
|
+
* routes filesystem / shell tool calls through it; on run end the
|
|
39
|
+
* SDK calls `sandbox.destroy()`. Hosts that want a per-task
|
|
40
|
+
* container shared across supervisor + every child specialist run
|
|
41
|
+
* pass the SAME provider instance to all of them — caching layered
|
|
42
|
+
* on top of the provider keeps the underlying container alive.
|
|
43
|
+
*/
|
|
44
|
+
sandboxProvider?: SandboxProvider
|
|
20
45
|
}
|
|
21
46
|
|
|
22
47
|
export interface ReactiveAgentResult extends BaseAgentResult {
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
import type { AdvisoryConfig } from '../advisory/index.js'
|
|
2
|
+
import type { ResumeHandler } from '../hitl/index.js'
|
|
2
3
|
import type { LLMProvider } from '../provider/index.js'
|
|
3
4
|
import type { TaskRouterConfig } from '../router/index.js'
|
|
5
|
+
import type { SandboxProvider } from '../sandbox/index.js'
|
|
6
|
+
import type { Skill } from '../skills/index.js'
|
|
7
|
+
import type { ToolRegistryContract } from '../tool/index.js'
|
|
8
|
+
import type { VerificationGateConfig } from '../verification/index.js'
|
|
4
9
|
import type { BaseAgentConfig, BaseAgentResult } from './base.js'
|
|
5
10
|
import type { AgentFactoryOptions } from './factory.js'
|
|
6
11
|
import type { TaskGateway } from './gateway.js'
|
|
@@ -13,9 +18,12 @@ export interface SupervisorAgentConfig extends BaseAgentConfig {
|
|
|
13
18
|
|
|
14
19
|
gateway?: TaskGateway
|
|
15
20
|
agentManager?: AgentManagerContract
|
|
21
|
+
tools?: ToolRegistryContract
|
|
16
22
|
|
|
17
23
|
systemPrompt: string
|
|
18
24
|
|
|
25
|
+
skills?: Skill[]
|
|
26
|
+
|
|
19
27
|
maxDepth?: number
|
|
20
28
|
|
|
21
29
|
taskRouter?: TaskRouterConfig
|
|
@@ -23,6 +31,43 @@ export interface SupervisorAgentConfig extends BaseAgentConfig {
|
|
|
23
31
|
factoryOptions?: AgentFactoryOptions
|
|
24
32
|
|
|
25
33
|
advisory?: AdvisoryConfig
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Optional human-in-the-loop hook for tool review and run-pause
|
|
37
|
+
* decisions. When omitted, the supervisor delegates to drainQuery's
|
|
38
|
+
* built-in `autoApproveHandler`, which approves every tool call
|
|
39
|
+
* without prompting — matching Anthropic's "Act without asking"
|
|
40
|
+
* cowork mode.
|
|
41
|
+
*
|
|
42
|
+
* Hosts that want "Ask before acting" behaviour pass a custom
|
|
43
|
+
* handler that surfaces the `tool_review_requested` RunEvent to
|
|
44
|
+
* the user and resolves the returned promise once the user
|
|
45
|
+
* approves, rejects, or modifies the call.
|
|
46
|
+
*/
|
|
47
|
+
resumeHandler?: ResumeHandler
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Optional declarative gate evaluated before tool execution. When
|
|
51
|
+
* the gate marks all calls in a batch as `allow`, they execute
|
|
52
|
+
* without round-tripping through the resumeHandler. Mixed or all-
|
|
53
|
+
* deny outcomes fall through to review (and the resumeHandler).
|
|
54
|
+
*
|
|
55
|
+
* Use it to express deterministic policy (e.g. "internal
|
|
56
|
+
* read-only tools always allow; destructive shell calls always
|
|
57
|
+
* review") so the resumeHandler only fires for the truly
|
|
58
|
+
* non-deterministic cases.
|
|
59
|
+
*/
|
|
60
|
+
verificationGate?: VerificationGateConfig
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Optional ephemeral sandbox provider. When set, drainQuery creates
|
|
64
|
+
* a sandbox via `provider.create()` before the supervisor's own
|
|
65
|
+
* iteration loop and routes filesystem / shell tool calls through
|
|
66
|
+
* it. Multi-agent hosts thread the SAME provider instance into
|
|
67
|
+
* every child `ReactiveAgentConfig.sandboxProvider` so supervisor
|
|
68
|
+
* + children share one ephemeral container per task.
|
|
69
|
+
*/
|
|
70
|
+
sandboxProvider?: SandboxProvider
|
|
26
71
|
}
|
|
27
72
|
|
|
28
73
|
export interface AgentTaskResult {
|