@namzu/sdk 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. package/CHANGELOG.md +393 -0
  2. package/dist/advisory/executor.d.ts.map +1 -1
  3. package/dist/advisory/executor.js +9 -2
  4. package/dist/advisory/executor.js.map +1 -1
  5. package/dist/advisory/executor.test.d.ts +2 -1
  6. package/dist/advisory/executor.test.d.ts.map +1 -1
  7. package/dist/advisory/executor.test.js +7 -4
  8. package/dist/advisory/executor.test.js.map +1 -1
  9. package/dist/agents/ReactiveAgent.d.ts.map +1 -1
  10. package/dist/agents/ReactiveAgent.js +2 -0
  11. package/dist/agents/ReactiveAgent.js.map +1 -1
  12. package/dist/agents/SupervisorAgent.d.ts.map +1 -1
  13. package/dist/agents/SupervisorAgent.js +13 -0
  14. package/dist/agents/SupervisorAgent.js.map +1 -1
  15. package/dist/bridge/sse/mapper.test.js +2 -2
  16. package/dist/constants/compaction/index.d.ts.map +1 -1
  17. package/dist/constants/compaction/index.js +8 -3
  18. package/dist/constants/compaction/index.js.map +1 -1
  19. package/dist/constants/sandbox/index.d.ts +21 -0
  20. package/dist/constants/sandbox/index.d.ts.map +1 -1
  21. package/dist/constants/sandbox/index.js +30 -0
  22. package/dist/constants/sandbox/index.js.map +1 -1
  23. package/dist/constants/tools/index.d.ts.map +1 -1
  24. package/dist/constants/tools/index.js +33 -2
  25. package/dist/constants/tools/index.js.map +1 -1
  26. package/dist/manager/run/persistence.d.ts.map +1 -1
  27. package/dist/manager/run/persistence.js +35 -5
  28. package/dist/manager/run/persistence.js.map +1 -1
  29. package/dist/persona/assembler.d.ts +1 -0
  30. package/dist/persona/assembler.d.ts.map +1 -1
  31. package/dist/persona/assembler.js +28 -6
  32. package/dist/persona/assembler.js.map +1 -1
  33. package/dist/provider/collect.test.js +2 -2
  34. package/dist/public-runtime.d.ts +5 -4
  35. package/dist/public-runtime.d.ts.map +1 -1
  36. package/dist/public-runtime.js +5 -4
  37. package/dist/public-runtime.js.map +1 -1
  38. package/dist/public-tools.d.ts +2 -0
  39. package/dist/public-tools.d.ts.map +1 -1
  40. package/dist/public-tools.js +2 -0
  41. package/dist/public-tools.js.map +1 -1
  42. package/dist/public-types.d.ts +3 -0
  43. package/dist/public-types.d.ts.map +1 -1
  44. package/dist/registry/index.d.ts +2 -0
  45. package/dist/registry/index.d.ts.map +1 -1
  46. package/dist/registry/index.js +1 -0
  47. package/dist/registry/index.js.map +1 -1
  48. package/dist/registry/tool/execute.d.ts.map +1 -1
  49. package/dist/registry/tool/execute.js +87 -5
  50. package/dist/registry/tool/execute.js.map +1 -1
  51. package/dist/registry/tool/execute.test.d.ts +4 -2
  52. package/dist/registry/tool/execute.test.d.ts.map +1 -1
  53. package/dist/registry/tool/execute.test.js +112 -3
  54. package/dist/registry/tool/execute.test.js.map +1 -1
  55. package/dist/registry/toolset/catalog.d.ts +42 -0
  56. package/dist/registry/toolset/catalog.d.ts.map +1 -0
  57. package/dist/registry/toolset/catalog.js +217 -0
  58. package/dist/registry/toolset/catalog.js.map +1 -0
  59. package/dist/registry/toolset/catalog.test.d.ts +2 -0
  60. package/dist/registry/toolset/catalog.test.d.ts.map +1 -0
  61. package/dist/registry/toolset/catalog.test.js +85 -0
  62. package/dist/registry/toolset/catalog.test.js.map +1 -0
  63. package/dist/runtime/query/__tests__/deferred-tools.test.d.ts +2 -0
  64. package/dist/runtime/query/__tests__/deferred-tools.test.d.ts.map +1 -0
  65. package/dist/runtime/query/__tests__/deferred-tools.test.js +147 -0
  66. package/dist/runtime/query/__tests__/deferred-tools.test.js.map +1 -0
  67. package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts +2 -0
  68. package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts.map +1 -0
  69. package/dist/runtime/query/__tests__/executor-concurrency.test.js +98 -0
  70. package/dist/runtime/query/__tests__/executor-concurrency.test.js.map +1 -0
  71. package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js +38 -3
  72. package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js.map +1 -1
  73. package/dist/runtime/query/__tests__/prompt.test.js +47 -2
  74. package/dist/runtime/query/__tests__/prompt.test.js.map +1 -1
  75. package/dist/runtime/query/__tests__/stream-recovery.test.d.ts +2 -0
  76. package/dist/runtime/query/__tests__/stream-recovery.test.d.ts.map +1 -0
  77. package/dist/runtime/query/__tests__/stream-recovery.test.js +126 -0
  78. package/dist/runtime/query/__tests__/stream-recovery.test.js.map +1 -0
  79. package/dist/runtime/query/continuation.d.ts +16 -0
  80. package/dist/runtime/query/continuation.d.ts.map +1 -0
  81. package/dist/runtime/query/continuation.js +16 -0
  82. package/dist/runtime/query/continuation.js.map +1 -0
  83. package/dist/runtime/query/executor.d.ts +3 -0
  84. package/dist/runtime/query/executor.d.ts.map +1 -1
  85. package/dist/runtime/query/executor.js +71 -3
  86. package/dist/runtime/query/executor.js.map +1 -1
  87. package/dist/runtime/query/index.d.ts.map +1 -1
  88. package/dist/runtime/query/index.js +19 -3
  89. package/dist/runtime/query/index.js.map +1 -1
  90. package/dist/runtime/query/iteration/index.d.ts +22 -0
  91. package/dist/runtime/query/iteration/index.d.ts.map +1 -1
  92. package/dist/runtime/query/iteration/index.js +227 -60
  93. package/dist/runtime/query/iteration/index.js.map +1 -1
  94. package/dist/runtime/query/iteration/phases/context.d.ts +10 -0
  95. package/dist/runtime/query/iteration/phases/context.d.ts.map +1 -1
  96. package/dist/runtime/query/iteration/phases/context.js.map +1 -1
  97. package/dist/runtime/query/prompt.d.ts.map +1 -1
  98. package/dist/runtime/query/prompt.js +21 -1
  99. package/dist/runtime/query/prompt.js.map +1 -1
  100. package/dist/runtime/query/tooling.d.ts +1 -0
  101. package/dist/runtime/query/tooling.d.ts.map +1 -1
  102. package/dist/runtime/query/tooling.js +1 -0
  103. package/dist/runtime/query/tooling.js.map +1 -1
  104. package/dist/sandbox/provider/local.d.ts.map +1 -1
  105. package/dist/sandbox/provider/local.js +32 -1
  106. package/dist/sandbox/provider/local.js.map +1 -1
  107. package/dist/session/workspace/__tests__/shared-run.test.d.ts +2 -0
  108. package/dist/session/workspace/__tests__/shared-run.test.d.ts.map +1 -0
  109. package/dist/session/workspace/__tests__/shared-run.test.js +147 -0
  110. package/dist/session/workspace/__tests__/shared-run.test.js.map +1 -0
  111. package/dist/session/workspace/index.d.ts +2 -0
  112. package/dist/session/workspace/index.d.ts.map +1 -1
  113. package/dist/session/workspace/index.js +1 -0
  114. package/dist/session/workspace/index.js.map +1 -1
  115. package/dist/session/workspace/shared-run.d.ts +81 -0
  116. package/dist/session/workspace/shared-run.d.ts.map +1 -0
  117. package/dist/session/workspace/shared-run.js +251 -0
  118. package/dist/session/workspace/shared-run.js.map +1 -0
  119. package/dist/skills/loader.d.ts.map +1 -1
  120. package/dist/skills/loader.js +36 -6
  121. package/dist/skills/loader.js.map +1 -1
  122. package/dist/skills/loader.test.d.ts +2 -0
  123. package/dist/skills/loader.test.d.ts.map +1 -0
  124. package/dist/skills/loader.test.js +65 -0
  125. package/dist/skills/loader.test.js.map +1 -0
  126. package/dist/streaming/coalesce.test.js +1 -1
  127. package/dist/tools/builtins/__tests__/edit.test.d.ts +2 -0
  128. package/dist/tools/builtins/__tests__/edit.test.d.ts.map +1 -0
  129. package/dist/tools/builtins/__tests__/edit.test.js +38 -0
  130. package/dist/tools/builtins/__tests__/edit.test.js.map +1 -0
  131. package/dist/tools/builtins/__tests__/payload-budget.test.d.ts +2 -0
  132. package/dist/tools/builtins/__tests__/payload-budget.test.d.ts.map +1 -0
  133. package/dist/tools/builtins/__tests__/payload-budget.test.js +22 -0
  134. package/dist/tools/builtins/__tests__/payload-budget.test.js.map +1 -0
  135. package/dist/tools/builtins/__tests__/read-file.test.d.ts +2 -0
  136. package/dist/tools/builtins/__tests__/read-file.test.d.ts.map +1 -0
  137. package/dist/tools/builtins/__tests__/read-file.test.js +24 -0
  138. package/dist/tools/builtins/__tests__/read-file.test.js.map +1 -0
  139. package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts +2 -0
  140. package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts.map +1 -0
  141. package/dist/tools/builtins/__tests__/verify-outputs.test.js +52 -0
  142. package/dist/tools/builtins/__tests__/verify-outputs.test.js.map +1 -0
  143. package/dist/tools/builtins/__tests__/write-file.test.d.ts +2 -0
  144. package/dist/tools/builtins/__tests__/write-file.test.d.ts.map +1 -0
  145. package/dist/tools/builtins/__tests__/write-file.test.js +74 -0
  146. package/dist/tools/builtins/__tests__/write-file.test.js.map +1 -0
  147. package/dist/tools/builtins/bash.d.ts.map +1 -1
  148. package/dist/tools/builtins/bash.js +40 -7
  149. package/dist/tools/builtins/bash.js.map +1 -1
  150. package/dist/tools/builtins/edit.d.ts +5 -2
  151. package/dist/tools/builtins/edit.d.ts.map +1 -1
  152. package/dist/tools/builtins/edit.js +114 -18
  153. package/dist/tools/builtins/edit.js.map +1 -1
  154. package/dist/tools/builtins/index.d.ts +1 -0
  155. package/dist/tools/builtins/index.d.ts.map +1 -1
  156. package/dist/tools/builtins/index.js +13 -13
  157. package/dist/tools/builtins/index.js.map +1 -1
  158. package/dist/tools/builtins/read-file.d.ts +1 -0
  159. package/dist/tools/builtins/read-file.d.ts.map +1 -1
  160. package/dist/tools/builtins/read-file.js +23 -8
  161. package/dist/tools/builtins/read-file.js.map +1 -1
  162. package/dist/tools/builtins/search-tools.d.ts.map +1 -1
  163. package/dist/tools/builtins/search-tools.js +4 -1
  164. package/dist/tools/builtins/search-tools.js.map +1 -1
  165. package/dist/tools/builtins/verify-outputs.d.ts +5 -0
  166. package/dist/tools/builtins/verify-outputs.d.ts.map +1 -0
  167. package/dist/tools/builtins/verify-outputs.js +103 -0
  168. package/dist/tools/builtins/verify-outputs.js.map +1 -0
  169. package/dist/tools/builtins/write-file.d.ts +3 -2
  170. package/dist/tools/builtins/write-file.d.ts.map +1 -1
  171. package/dist/tools/builtins/write-file.js +72 -12
  172. package/dist/tools/builtins/write-file.js.map +1 -1
  173. package/dist/tools/coordinator/__tests__/agent.test.d.ts +15 -0
  174. package/dist/tools/coordinator/__tests__/agent.test.d.ts.map +1 -0
  175. package/dist/tools/coordinator/__tests__/agent.test.js +142 -0
  176. package/dist/tools/coordinator/__tests__/agent.test.js.map +1 -0
  177. package/dist/tools/coordinator/__tests__/task-list.test.d.ts +13 -0
  178. package/dist/tools/coordinator/__tests__/task-list.test.d.ts.map +1 -0
  179. package/dist/tools/coordinator/__tests__/task-list.test.js +162 -0
  180. package/dist/tools/coordinator/__tests__/task-list.test.js.map +1 -0
  181. package/dist/tools/coordinator/agent.d.ts +34 -0
  182. package/dist/tools/coordinator/agent.d.ts.map +1 -0
  183. package/dist/tools/coordinator/agent.js +107 -0
  184. package/dist/tools/coordinator/agent.js.map +1 -0
  185. package/dist/tools/coordinator/index.d.ts +7 -0
  186. package/dist/tools/coordinator/index.d.ts.map +1 -1
  187. package/dist/tools/coordinator/index.js +111 -21
  188. package/dist/tools/coordinator/index.js.map +1 -1
  189. package/dist/types/agent/base.d.ts +8 -0
  190. package/dist/types/agent/base.d.ts.map +1 -1
  191. package/dist/types/agent/reactive.d.ts +23 -0
  192. package/dist/types/agent/reactive.d.ts.map +1 -1
  193. package/dist/types/agent/supervisor.d.ts +41 -0
  194. package/dist/types/agent/supervisor.d.ts.map +1 -1
  195. package/dist/types/message/index.d.ts +22 -1
  196. package/dist/types/message/index.d.ts.map +1 -1
  197. package/dist/types/message/index.js +7 -2
  198. package/dist/types/message/index.js.map +1 -1
  199. package/dist/types/provider/chat.d.ts +2 -9
  200. package/dist/types/provider/chat.d.ts.map +1 -1
  201. package/dist/types/run/events.d.ts +6 -0
  202. package/dist/types/run/events.d.ts.map +1 -1
  203. package/dist/types/run/events.js.map +1 -1
  204. package/dist/types/sandbox/index.d.ts +193 -0
  205. package/dist/types/sandbox/index.d.ts.map +1 -1
  206. package/dist/types/sandbox/index.js.map +1 -1
  207. package/dist/types/skills/index.d.ts +2 -0
  208. package/dist/types/skills/index.d.ts.map +1 -1
  209. package/dist/types/tool/index.d.ts +22 -0
  210. package/dist/types/tool/index.d.ts.map +1 -1
  211. package/dist/types/toolset/index.d.ts +71 -0
  212. package/dist/types/toolset/index.d.ts.map +1 -0
  213. package/dist/types/toolset/index.js +2 -0
  214. package/dist/types/toolset/index.js.map +1 -0
  215. package/dist/types/workspace/index.d.ts +1 -0
  216. package/dist/types/workspace/index.d.ts.map +1 -1
  217. package/dist/types/workspace/shared-run.d.ts +61 -0
  218. package/dist/types/workspace/shared-run.d.ts.map +1 -0
  219. package/dist/types/workspace/shared-run.js +2 -0
  220. package/dist/types/workspace/shared-run.js.map +1 -0
  221. package/dist/verification/index.d.ts +1 -0
  222. package/dist/verification/index.d.ts.map +1 -1
  223. package/dist/verification/index.js +1 -0
  224. package/dist/verification/index.js.map +1 -1
  225. package/dist/verification/presets.d.ts +53 -0
  226. package/dist/verification/presets.d.ts.map +1 -0
  227. package/dist/verification/presets.js +70 -0
  228. package/dist/verification/presets.js.map +1 -0
  229. package/dist/verification/presets.test.d.ts +16 -0
  230. package/dist/verification/presets.test.d.ts.map +1 -0
  231. package/dist/verification/presets.test.js +79 -0
  232. package/dist/verification/presets.test.js.map +1 -0
  233. package/package.json +3 -2
  234. package/src/advisory/executor.test.ts +7 -4
  235. package/src/advisory/executor.ts +11 -2
  236. package/src/agents/ReactiveAgent.ts +2 -0
  237. package/src/agents/SupervisorAgent.ts +13 -0
  238. package/src/bridge/sse/mapper.test.ts +2 -2
  239. package/src/constants/compaction/index.ts +8 -3
  240. package/src/constants/sandbox/index.ts +37 -0
  241. package/src/constants/tools/index.ts +33 -2
  242. package/src/manager/run/persistence.ts +34 -6
  243. package/src/persona/assembler.ts +31 -8
  244. package/src/provider/collect.test.ts +2 -2
  245. package/src/public-runtime.ts +14 -1
  246. package/src/public-tools.ts +2 -0
  247. package/src/public-types.ts +7 -0
  248. package/src/registry/index.ts +7 -0
  249. package/src/registry/tool/execute.test.ts +132 -3
  250. package/src/registry/tool/execute.ts +94 -9
  251. package/src/registry/toolset/catalog.test.ts +97 -0
  252. package/src/registry/toolset/catalog.ts +283 -0
  253. package/src/runtime/query/__tests__/deferred-tools.test.ts +183 -0
  254. package/src/runtime/query/__tests__/executor-concurrency.test.ts +122 -0
  255. package/src/runtime/query/__tests__/executor-plugin-hooks.test.ts +48 -3
  256. package/src/runtime/query/__tests__/prompt.test.ts +51 -2
  257. package/src/runtime/query/__tests__/stream-recovery.test.ts +156 -0
  258. package/src/runtime/query/continuation.ts +16 -0
  259. package/src/runtime/query/executor.ts +82 -13
  260. package/src/runtime/query/index.ts +24 -3
  261. package/src/runtime/query/iteration/index.ts +263 -68
  262. package/src/runtime/query/iteration/phases/context.ts +10 -0
  263. package/src/runtime/query/prompt.ts +17 -1
  264. package/src/runtime/query/tooling.ts +2 -0
  265. package/src/sandbox/provider/local.ts +33 -0
  266. package/src/session/workspace/__tests__/shared-run.test.ts +181 -0
  267. package/src/session/workspace/index.ts +6 -0
  268. package/src/session/workspace/shared-run.ts +316 -0
  269. package/src/skills/loader.test.ts +89 -0
  270. package/src/skills/loader.ts +37 -6
  271. package/src/streaming/coalesce.test.ts +1 -1
  272. package/src/tools/builtins/__tests__/edit.test.ts +57 -0
  273. package/src/tools/builtins/__tests__/payload-budget.test.ts +29 -0
  274. package/src/tools/builtins/__tests__/read-file.test.ts +31 -0
  275. package/src/tools/builtins/__tests__/verify-outputs.test.ts +71 -0
  276. package/src/tools/builtins/__tests__/write-file.test.ts +97 -0
  277. package/src/tools/builtins/bash.ts +48 -7
  278. package/src/tools/builtins/edit.ts +162 -27
  279. package/src/tools/builtins/index.ts +13 -13
  280. package/src/tools/builtins/read-file.ts +31 -8
  281. package/src/tools/builtins/search-tools.ts +5 -1
  282. package/src/tools/builtins/verify-outputs.ts +126 -0
  283. package/src/tools/builtins/write-file.ts +83 -14
  284. package/src/tools/coordinator/__tests__/agent.test.ts +172 -0
  285. package/src/tools/coordinator/__tests__/task-list.test.ts +182 -0
  286. package/src/tools/coordinator/agent.ts +157 -0
  287. package/src/tools/coordinator/index.ts +128 -22
  288. package/src/types/agent/base.ts +8 -0
  289. package/src/types/agent/reactive.ts +25 -0
  290. package/src/types/agent/supervisor.ts +45 -0
  291. package/src/types/message/index.ts +32 -2
  292. package/src/types/provider/chat.ts +2 -9
  293. package/src/types/run/events.ts +6 -0
  294. package/src/types/sandbox/index.ts +219 -0
  295. package/src/types/skills/index.ts +4 -0
  296. package/src/types/tool/index.ts +24 -0
  297. package/src/types/toolset/index.ts +86 -0
  298. package/src/types/workspace/index.ts +9 -0
  299. package/src/types/workspace/shared-run.ts +65 -0
  300. package/src/verification/index.ts +1 -0
  301. package/src/verification/presets.test.ts +112 -0
  302. package/src/verification/presets.ts +72 -0
@@ -0,0 +1,182 @@
1
+ /**
2
+ * Behavioural contract for the `agent_task_list` coordinator tool:
3
+ *
4
+ * - Returns every task the gateway knows about, with state + timing.
5
+ * - Filters by state when the input narrows it.
6
+ * - Emits a per-state summary in the data payload — what the supervisor
7
+ * reads to decide "done vs not done" before calling verify_outputs.
8
+ * - Distinct from the plan-task store's `task_list` (subject/blockedBy);
9
+ * listing them under different names avoids ToolRegistry collisions when
10
+ * both surfaces are wired into the same agent.
11
+ */
12
+
13
+ import { describe, expect, it } from 'vitest'
14
+
15
+ import type { TaskGateway, TaskHandle } from '../../../types/agent/gateway.js'
16
+ import type { TaskId } from '../../../types/ids/index.js'
17
+ import type { ToolContext } from '../../../types/tool/index.js'
18
+ import { buildCoordinatorTools } from '../index.js'
19
+
20
+ function makeContext(): ToolContext {
21
+ return {
22
+ runId: 'run_test' as never,
23
+ workingDirectory: '/tmp/test',
24
+ abortSignal: new AbortController().signal,
25
+ env: {},
26
+ log: () => {},
27
+ }
28
+ }
29
+
30
+ function gatewayWith(handles: TaskHandle[]): TaskGateway {
31
+ return {
32
+ async createTask() {
33
+ throw new Error('not used')
34
+ },
35
+ async waitForTask() {
36
+ throw new Error('not used')
37
+ },
38
+ async continueTask() {},
39
+ cancelTask() {},
40
+ getTask(id) {
41
+ return handles.find((h) => h.taskId === id)
42
+ },
43
+ listTasks() {
44
+ return handles
45
+ },
46
+ onTaskCompleted() {
47
+ return () => {}
48
+ },
49
+ }
50
+ }
51
+
52
+ function handle(input: {
53
+ id: string
54
+ agentId: string
55
+ state: TaskHandle['state']
56
+ createdAt: number
57
+ completedAt?: number
58
+ lastError?: string
59
+ }): TaskHandle {
60
+ return {
61
+ taskId: input.id as TaskId,
62
+ agentId: input.agentId,
63
+ state: input.state,
64
+ createdAt: input.createdAt,
65
+ completedAt: input.completedAt,
66
+ result: input.lastError
67
+ ? ({
68
+ runId: 'run_x' as never,
69
+ status: input.state === 'failed' ? 'failed' : 'completed',
70
+ usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 } as never,
71
+ cost: { inputCostUsd: 0, outputCostUsd: 0, totalCostUsd: 0 } as never,
72
+ iterations: 1,
73
+ durationMs: 0,
74
+ messages: [],
75
+ result: '',
76
+ lastError: input.lastError,
77
+ } as never)
78
+ : undefined,
79
+ }
80
+ }
81
+
82
+ function findAgentTaskList(gateway: TaskGateway) {
83
+ const tools = buildCoordinatorTools({
84
+ gateway,
85
+ workingDirectory: '/tmp/test',
86
+ allowedAgentIds: ['solution-architecture', 'enterprise-architecture'],
87
+ })
88
+ const t = tools.find((tool) => tool.name === 'agent_task_list')
89
+ if (!t) throw new Error('agent_task_list tool missing from coordinator builder')
90
+ return t
91
+ }
92
+
93
+ describe('coordinator agent_task_list tool', () => {
94
+ it('lists every task with state, agent, and timing', async () => {
95
+ const gateway = gatewayWith([
96
+ handle({
97
+ id: 'task_a',
98
+ agentId: 'solution-architecture',
99
+ state: 'completed',
100
+ createdAt: 0,
101
+ completedAt: 5000,
102
+ }),
103
+ handle({
104
+ id: 'task_b',
105
+ agentId: 'enterprise-architecture',
106
+ state: 'running',
107
+ createdAt: 1000,
108
+ }),
109
+ handle({
110
+ id: 'task_c',
111
+ agentId: 'solution-architecture',
112
+ state: 'failed',
113
+ createdAt: 2000,
114
+ completedAt: 4000,
115
+ lastError: 'bash exit 1',
116
+ }),
117
+ ])
118
+
119
+ const tool = findAgentTaskList(gateway)
120
+ const result = await tool.execute({}, makeContext())
121
+ expect(result.success).toBe(true)
122
+ expect(result.output).toMatch(/Tasks: 3 total/)
123
+ expect(result.output).toMatch(/1 running/)
124
+ expect(result.output).toMatch(/1 completed/)
125
+ expect(result.output).toMatch(/1 failed/)
126
+ expect(result.output).toMatch(/task_a → solution-architecture \[completed\]/)
127
+ expect(result.output).toMatch(/task_c .* error: bash exit 1/)
128
+ const data = result.data as { items: unknown[]; summary: { total: number } }
129
+ expect(data.summary.total).toBe(3)
130
+ expect(data.items).toHaveLength(3)
131
+ })
132
+
133
+ it('filters by state', async () => {
134
+ const gateway = gatewayWith([
135
+ handle({
136
+ id: 'task_a',
137
+ agentId: 'solution-architecture',
138
+ state: 'completed',
139
+ createdAt: 0,
140
+ completedAt: 5000,
141
+ }),
142
+ handle({
143
+ id: 'task_b',
144
+ agentId: 'enterprise-architecture',
145
+ state: 'running',
146
+ createdAt: 1000,
147
+ }),
148
+ ])
149
+
150
+ const tool = findAgentTaskList(gateway)
151
+ const result = await tool.execute({ state: 'running' }, makeContext())
152
+ expect(result.success).toBe(true)
153
+ const data = result.data as { items: Array<{ task_id: string }> }
154
+ expect(data.items).toHaveLength(1)
155
+ expect(data.items[0]?.task_id).toBe('task_b')
156
+ expect(result.output).not.toMatch(/task_a/)
157
+ })
158
+
159
+ it('handles an empty gateway', async () => {
160
+ const tool = findAgentTaskList(gatewayWith([]))
161
+ const result = await tool.execute({}, makeContext())
162
+ expect(result.success).toBe(true)
163
+ expect(result.output).toMatch(/Tasks: 0 total/)
164
+ expect(result.output).toMatch(/no tasks launched yet/)
165
+ })
166
+
167
+ it('does not collide with the plan-task store `task_list` tool name', async () => {
168
+ // Regression: an earlier cut registered the agent-task gateway
169
+ // inspector under the same `task_list` name as the plan-task store
170
+ // list tool, which would shadow one of them in any agent that wired
171
+ // both surfaces together. The agent inspector now lives under
172
+ // `agent_task_list`; this test guards the rename.
173
+ const coordinatorTools = buildCoordinatorTools({
174
+ gateway: gatewayWith([]),
175
+ workingDirectory: '/tmp/test',
176
+ allowedAgentIds: ['solution-architecture'],
177
+ })
178
+ const names = coordinatorTools.map((t) => t.name)
179
+ expect(names).toContain('agent_task_list')
180
+ expect(names).not.toContain('task_list')
181
+ })
182
+ })
@@ -0,0 +1,157 @@
1
+ import { z } from 'zod'
2
+
3
+ import type { AgentRuntimeContext } from '../../types/agent/base.js'
4
+ import type { TaskGateway } from '../../types/agent/gateway.js'
5
+ import type { ToolDefinition } from '../../types/tool/index.js'
6
+ import { defineTool } from '../defineTool.js'
7
+
8
+ import type { TaskLaunchedCallback } from './index.js'
9
+
10
+ /**
11
+ * Build the canonical Claude Code `Agent` tool — synchronous subagent
12
+ * delegation that mirrors what Claude is trained against in
13
+ * `code.claude.com/docs/en/sub-agents`.
14
+ *
15
+ * Semantics: parent calls `Agent({ description, prompt, subagent_type })`,
16
+ * the runtime spawns the chosen subagent with its own context window,
17
+ * the parent's tool call BLOCKS until the subagent finishes, and the
18
+ * subagent's final text comes back as the tool result. Intermediate
19
+ * subagent tool calls are isolated — only the summary surfaces to
20
+ * the parent.
21
+ *
22
+ * This is **NOT** the same shape as the legacy `create_task` /
23
+ * `continue_task` / `cancel_task` trio that this package ships
24
+ * alongside it: those are non-blocking and use a `<task-notification>`
25
+ * callback model. The async pattern is useful for hosts that want a
26
+ * work-queue surface, but it is not what Claude Code trained against.
27
+ * For free agentic alignment, prefer the canonical `Agent` tool; keep
28
+ * the legacy coordinator tools only when you genuinely need
29
+ * fire-and-forget multi-task fan-out.
30
+ */
31
+ export interface AgentToolOptions {
32
+ gateway: TaskGateway
33
+ workingDirectory: string
34
+ runtimeContext?: AgentRuntimeContext
35
+ allowedAgentIds: string[]
36
+
37
+ onTaskLaunched?: TaskLaunchedCallback
38
+ }
39
+
40
+ export function buildAgentTool(opts: AgentToolOptions): ToolDefinition {
41
+ const { gateway, allowedAgentIds: agentIds, onTaskLaunched } = opts
42
+ const cwd = opts.workingDirectory
43
+
44
+ const subagentTypeEnum =
45
+ agentIds.length > 0 ? z.enum(agentIds as [string, ...string[]]) : z.string()
46
+
47
+ return defineTool({
48
+ name: 'Agent',
49
+ description: `Delegate a task to a specialized subagent. BLOCKING: returns when the subagent has finished, with the subagent's final text as the tool result. The subagent runs in its own context window and cannot see your conversation — include all necessary context in the prompt. Available subagents: ${agentIds.join(', ')}. To run multiple subagents in parallel, call this tool multiple times in a single response.`,
50
+ inputSchema: z.object({
51
+ description: z.string().describe('Short label for tracking (shown to the user)'),
52
+ prompt: z
53
+ .string()
54
+ .describe('Self-contained task description with all context the subagent needs'),
55
+ subagent_type:
56
+ agentIds.length === 1
57
+ ? subagentTypeEnum
58
+ .optional()
59
+ .describe(`Which subagent to run (defaults to the only one: ${agentIds[0]})`)
60
+ : subagentTypeEnum.describe('Which subagent to run'),
61
+ }),
62
+ category: 'custom',
63
+ permissions: [],
64
+ readOnly: false,
65
+ destructive: false,
66
+ concurrencySafe: true,
67
+ async execute({ description, prompt, subagent_type }, context) {
68
+ // With a single registered subagent the type is optional — default to
69
+ // it so the model can't trip the "subagent_type required" validation.
70
+ const agentId = subagent_type ?? (agentIds.length === 1 ? agentIds[0] : undefined)
71
+ if (!agentId) {
72
+ return {
73
+ success: false,
74
+ output: '',
75
+ error: `subagent_type is required — choose one of: ${agentIds.join(', ')}`,
76
+ }
77
+ }
78
+ const handle = await gateway.createTask({
79
+ agentId,
80
+ prompt,
81
+ workingDirectory: cwd,
82
+ runtimeContext: opts.runtimeContext,
83
+ })
84
+
85
+ onTaskLaunched?.(handle.taskId, {
86
+ agentId,
87
+ description,
88
+ // Same canonical-envelope plumbing as coordinator/index.ts
89
+ // (ses_009-task-notification-envelope). For Agent-tool path
90
+ // the subagent run is awaited synchronously below, so this
91
+ // id is only used if a probe / hook unexpectedly forks the
92
+ // completion to the background notification channel.
93
+ originalToolUseId: context.toolUseId,
94
+ })
95
+
96
+ const completed = await gateway.waitForTask(handle.taskId)
97
+
98
+ // Two layers can disagree on whether the subagent succeeded:
99
+ //
100
+ // 1. `TaskHandle.state` — the gateway's terminal task state.
101
+ // Some gateways (e.g. vandal's) explicitly map
102
+ // `result.status !== 'completed'` to `state = 'failed'`,
103
+ // others (e.g. SDK's `LocalTaskGateway`) just forward
104
+ // whatever the AgentManager set, which does not always
105
+ // reflect run-level failure.
106
+ // 2. `BaseAgentResult.status` — the run's own status. The
107
+ // canonical source of truth for whether the agent actually
108
+ // finished its work; `lastError` carries the failure
109
+ // message when set.
110
+ //
111
+ // Treat the subagent as successful only when BOTH agree.
112
+ // Reporting a failed subagent as successful would silently
113
+ // hand the parent garbage output and make debugging
114
+ // impossible, which is what Codex flagged on the first cut.
115
+ const runStatus = completed.result?.status
116
+ const succeeded =
117
+ completed.state === 'completed' && (runStatus === undefined || runStatus === 'completed')
118
+
119
+ const resultText =
120
+ typeof completed.result?.result === 'string'
121
+ ? completed.result.result
122
+ : completed.result?.result !== undefined
123
+ ? JSON.stringify(completed.result.result)
124
+ : ''
125
+
126
+ if (!succeeded) {
127
+ const failureLabel =
128
+ completed.state !== 'completed' ? completed.state : (runStatus ?? 'failed')
129
+ const detail =
130
+ completed.result?.lastError ?? resultText ?? '(subagent provided no failure detail)'
131
+ return {
132
+ success: false,
133
+ output: '',
134
+ error: `Subagent ${agentId} ${failureLabel}: ${detail}`,
135
+ data: {
136
+ task_id: handle.taskId,
137
+ subagent_type: agentId,
138
+ state: completed.state,
139
+ status: runStatus,
140
+ lastError: completed.result?.lastError,
141
+ },
142
+ }
143
+ }
144
+
145
+ return {
146
+ success: true,
147
+ output: resultText || '(subagent returned no text)',
148
+ data: {
149
+ task_id: handle.taskId,
150
+ subagent_type: agentId,
151
+ state: completed.state,
152
+ status: runStatus,
153
+ },
154
+ }
155
+ },
156
+ })
157
+ }
@@ -13,6 +13,13 @@ export type TaskLaunchedCallback = (
13
13
  agentId: string
14
14
  description: string
15
15
  planTaskId?: string
16
+ /**
17
+ * The assistant `tool_use_id` that dispatched this task.
18
+ * Threaded from `ToolContext.toolUseId` so the runtime can
19
+ * later emit a canonical `tool_result` content block bound
20
+ * to the same id when the background task completes.
21
+ */
22
+ originalToolUseId?: string
16
23
  },
17
24
  ) => void
18
25
 
@@ -38,21 +45,29 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
38
45
  taskStore,
39
46
  runId,
40
47
  getPlanManager,
41
- onTaskLaunched,
48
+ // `onTaskLaunched` was the entry point for the old
49
+ // non-blocking + envelope-injection flow. create_task is now
50
+ // blocking, so the callback is no longer wired here.
51
+ // Intentionally not destructured to keep the unused-binding
52
+ // lint clean; callers can still pass it for backwards
53
+ // compatibility (Agent tool consumes it from its own path).
42
54
  } = opts
43
55
  const cwd = opts.workingDirectory
56
+ void opts.onTaskLaunched
44
57
 
45
58
  const agentIdEnum = agentIds.length > 0 ? z.enum(agentIds as [string, ...string[]]) : z.string()
46
59
 
47
60
  const createTask = defineTool({
48
61
  name: 'create_task',
49
- description: `Launch a task on a specialized agent. NON-BLOCKING: returns immediately. You will receive a <task-notification> message when the agent finishes. Available agents: ${agentIds.join(', ')}. The agent cannot see your conversation include ALL necessary context in the prompt. To launch multiple tasks in parallel, call this tool multiple times in a single response. After launching, briefly tell the user what you launched and end your turn do NOT predict or fabricate results.`,
62
+ description: `Launch a task on a specialized agent and await its result. BLOCKING: returns the agent's final output as this call's tool_result. Available agents: ${agentIds.join(', ')}. Prefer compact assignments; for large context, write/read shared workspace files and pass filenames or references. To launch multiple tasks in parallel, call this tool multiple times in a single assistant turn the runtime executes every tool_use block from one response concurrently and delivers all tool_results together, so 'fan out 8 specialists' is one assistant message with 8 create_task blocks.`,
50
63
  inputSchema: z.object({
51
64
  agent_id: agentIdEnum.describe('Which agent to run'),
52
65
  prompt: z
53
66
  .string()
54
- .describe('Self-contained task description with all context the agent needs'),
55
- description: z.string().describe('Short summary for tracking (shown to user)'),
67
+ .describe(
68
+ 'Self-contained assignment for the agent. For large generated content, prefer workspace file references so provider output-token limits do not cut off the tool call.',
69
+ ),
70
+ description: z.string().describe('Short summary for tracking, shown to the user.'),
56
71
  plan_task_id: z
57
72
  .string()
58
73
  .optional()
@@ -65,7 +80,7 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
65
80
  readOnly: false,
66
81
  destructive: false,
67
82
  concurrencySafe: true,
68
- async execute({ agent_id, prompt, description, plan_task_id }) {
83
+ async execute({ agent_id, prompt, description, plan_task_id }, _context) {
69
84
  let resolvedPlanTaskId = plan_task_id
70
85
 
71
86
  if (taskStore) {
@@ -93,22 +108,36 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
93
108
  runtimeContext: opts.runtimeContext,
94
109
  })
95
110
 
96
- if (onTaskLaunched) {
97
- onTaskLaunched(handle.taskId, {
98
- agentId: agent_id,
99
- description,
100
- planTaskId: resolvedPlanTaskId,
111
+ // Industrial-standard Anthropic tool pattern: tool returns
112
+ // its real result as the tool_result for the dispatching
113
+ // tool_use. Parallel fan-out happens at the executor layer
114
+ // — when the supervisor emits N create_task blocks in one
115
+ // assistant turn, the runtime runs them with Promise.all
116
+ // and delivers all N tool_results together. No async
117
+ // envelope injection, no second tool_result for the same
118
+ // tool_use_id (which Anthropic rejects with 400).
119
+ const completed = await gateway.waitForTask(handle.taskId)
120
+ const success = completed.state === 'completed'
121
+ const resultText =
122
+ completed.result?.result ??
123
+ completed.result?.lastError ??
124
+ `Task finished with state: ${completed.state}`
125
+
126
+ if (resolvedPlanTaskId && taskStore) {
127
+ await taskStore.update(resolvedPlanTaskId as `task_${string}`, {
128
+ status: 'completed',
129
+ description: success ? undefined : `Failed: ${resultText.substring(0, 200)}`,
101
130
  })
102
131
  }
103
132
 
104
133
  return {
105
- success: true,
106
- output: `Task launched: ${handle.taskId} → ${agent_id} ("${description}"). You will receive a task-notification when it completes.`,
134
+ success,
135
+ output: resultText,
107
136
  data: {
108
137
  task_id: handle.taskId,
109
138
  agent_id,
110
139
  description,
111
- state: 'running',
140
+ state: completed.state,
112
141
  plan_task_id: resolvedPlanTaskId,
113
142
  },
114
143
  }
@@ -118,11 +147,9 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
118
147
  const continueTask = defineTool({
119
148
  name: 'continue_task',
120
149
  description:
121
- 'Send a follow-up message to a previously completed task. NON-BLOCKING: the agent resumes in the background with full prior context. You will receive a task-notification when it finishes. Only use this with a task_id from a previous create_task or task-notification.',
150
+ "Send a follow-up message to a previously completed task and await the agent's next reply. BLOCKING: returns the agent's new output as this call's tool_result, the same shape as create_task. Only use this with a task_id from a previous create_task. To run multiple follow-ups in parallel, call this tool multiple times in a single assistant turn.",
122
151
  inputSchema: z.object({
123
- task_id: z
124
- .string()
125
- .describe('Agent task ID from a previous create_task or task-notification'),
152
+ task_id: z.string().describe('Agent task ID from a previous create_task'),
126
153
  message: z.string().describe('Follow-up instruction for the agent'),
127
154
  }),
128
155
  category: 'custom',
@@ -132,11 +159,22 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
132
159
  concurrencySafe: true,
133
160
  async execute({ task_id, message }) {
134
161
  await gateway.continueTask(task_id as TaskId, message)
135
-
162
+ // Mirror create_task's blocking pattern: await the new
163
+ // completion and return the agent's output inline. The
164
+ // previous non-blocking shape ('You will receive a
165
+ // task-notification…') relied on a global
166
+ // onTaskCompleted listener that the iteration loop
167
+ // no longer registers (envelope path is dead).
168
+ const completed = await gateway.waitForTask(task_id as TaskId)
169
+ const success = completed.state === 'completed'
170
+ const resultText =
171
+ completed.result?.result ??
172
+ completed.result?.lastError ??
173
+ `Task finished with state: ${completed.state}`
136
174
  return {
137
- success: true,
138
- output: `Follow-up sent to ${task_id}. You will receive a task-notification when it finishes.`,
139
- data: { task_id, state: 'running' },
175
+ success,
176
+ output: resultText,
177
+ data: { task_id, state: completed.state },
140
178
  }
141
179
  },
142
180
  })
@@ -163,7 +201,75 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
163
201
  },
164
202
  })
165
203
 
166
- const tools: ToolDefinition[] = [createTask, continueTask, cancelTask]
204
+ const agentTaskList = defineTool({
205
+ name: 'agent_task_list',
206
+ description:
207
+ "Inspect the live state of every agent task launched on this gateway via create_task: returns each task's id, agent, state (pending/running/completed/failed/canceled), and timing. Distinct from the plan-task store's `task_list` (which lists planning tasks): this tool lists running/completed worker invocations. Use it BEFORE declaring multi-worker work done — confirm every launched task reached `completed`, none still `running` or `failed`. Read-only and safe to call repeatedly.",
208
+ inputSchema: z.object({
209
+ state: z
210
+ .enum(['pending', 'running', 'completed', 'failed', 'canceled'])
211
+ .optional()
212
+ .describe('Filter by terminal/non-terminal state. Omit to list every task.'),
213
+ }),
214
+ category: 'custom',
215
+ permissions: [],
216
+ readOnly: true,
217
+ destructive: false,
218
+ concurrencySafe: true,
219
+ async execute({ state }) {
220
+ const handles = gateway.listTasks()
221
+ const filtered = state ? handles.filter((h) => h.state === state) : handles
222
+ const items = filtered.map((h) => {
223
+ const runStatus = h.result?.status
224
+ const lastError = h.result?.lastError ?? undefined
225
+ return {
226
+ task_id: h.taskId,
227
+ agent_id: h.agentId,
228
+ state: h.state,
229
+ run_status: runStatus,
230
+ created_at: new Date(h.createdAt).toISOString(),
231
+ completed_at: h.completedAt ? new Date(h.completedAt).toISOString() : null,
232
+ duration_ms: h.completedAt ? h.completedAt - h.createdAt : null,
233
+ last_error: lastError,
234
+ }
235
+ })
236
+ const summary = {
237
+ total: handles.length,
238
+ running: handles.filter((h) => h.state === 'running').length,
239
+ completed: handles.filter((h) => h.state === 'completed').length,
240
+ failed: handles.filter((h) => h.state === 'failed').length,
241
+ canceled: handles.filter((h) => h.state === 'canceled').length,
242
+ }
243
+ const lines = items.length
244
+ ? items.map(
245
+ (i) =>
246
+ `- ${i.task_id} → ${i.agent_id} [${i.state}${i.run_status && i.run_status !== i.state ? ` / ${i.run_status}` : ''}]${
247
+ i.duration_ms !== null ? ` (${Math.round(i.duration_ms / 1000)}s)` : ''
248
+ }${i.last_error ? ` — error: ${i.last_error.slice(0, 200)}` : ''}`,
249
+ )
250
+ : ['(no tasks launched yet)']
251
+ const header = `Tasks: ${summary.total} total — ${summary.running} running, ${summary.completed} completed, ${summary.failed} failed, ${summary.canceled} canceled`
252
+ return {
253
+ success: true,
254
+ output: [header, '', ...lines].join('\n'),
255
+ data: { items, summary },
256
+ }
257
+ },
258
+ })
259
+
260
+ // `continue_task` was a follow-up channel for a still-alive worker
261
+ // task. With `create_task` now blocking + tool_result returning
262
+ // the worker's final output, every worker reaches a terminal
263
+ // state by the time the supervisor wants to follow up — and the
264
+ // agent manager rejects `continue` on terminal tasks. The
265
+ // industrial pattern is to issue a fresh `create_task` that
266
+ // references the prior worker's output path, so we drop
267
+ // `continue_task` from the registered surface entirely. The
268
+ // definition stays in this file for now in case a future
269
+ // non-default gateway (one that keeps the worker process alive
270
+ // for follow-ups) wants to re-register it.
271
+ void continueTask
272
+ const tools: ToolDefinition[] = [createTask, cancelTask, agentTaskList]
167
273
 
168
274
  if (getPlanManager) {
169
275
  const approvePlan = defineTool({
@@ -65,6 +65,14 @@ export type RuntimeToolOverrides = Record<string, ToolAvailability | 'disabled'>
65
65
  export interface AgentRuntimeContext {
66
66
  label?: string
67
67
  outputDirectory?: string
68
+ /**
69
+ * Optional working/scratch directory the runtime exposes to the
70
+ * agent — sibling to `outputDirectory`, invisible to the
71
+ * output collector. Mirrors the Anthropic Cowork pattern
72
+ * where `/home/claude` is scratch and `/mnt/user-data/outputs` is
73
+ * user-visible.
74
+ */
75
+ scratchDirectory?: string
68
76
  outputFileMarker?: string
69
77
  notes?: readonly string[]
70
78
  }
@@ -1,8 +1,10 @@
1
1
  import type { AdvisoryConfig } from '../advisory/index.js'
2
2
  import type { AgentPersona } from '../persona/index.js'
3
3
  import type { LLMProvider } from '../provider/index.js'
4
+ import type { SandboxProvider } from '../sandbox/index.js'
4
5
  import type { Skill } from '../skills/index.js'
5
6
  import type { ToolRegistryContract } from '../tool/index.js'
7
+ import type { VerificationGateConfig } from '../verification/index.js'
6
8
  import type { BaseAgentConfig, BaseAgentResult } from './base.js'
7
9
 
8
10
  export interface ReactiveAgentConfig extends BaseAgentConfig {
@@ -17,6 +19,29 @@ export interface ReactiveAgentConfig extends BaseAgentConfig {
17
19
  tools: ToolRegistryContract
18
20
 
19
21
  advisory?: AdvisoryConfig
22
+
23
+ /**
24
+ * Optional capability-aware deny/allow gate for child tool calls.
25
+ * Mirrors the same field on `SupervisorAgentConfig`; when omitted,
26
+ * `drainQuery` falls back to its `autoApproveHandler` default
27
+ * (every tool call auto-approves, no policy applied). Hosts that
28
+ * trust their sandbox should still pass at least
29
+ * `{ enabled: true, denyDangerousPatterns: true, ... }` so the
30
+ * canonical brick patterns hard-deny instead of executing
31
+ * silently.
32
+ */
33
+ verificationGate?: VerificationGateConfig
34
+
35
+ /**
36
+ * Optional ephemeral sandbox provider. When set, drainQuery creates
37
+ * a sandbox via `provider.create()` before the iteration loop and
38
+ * routes filesystem / shell tool calls through it; on run end the
39
+ * SDK calls `sandbox.destroy()`. Hosts that want a per-task
40
+ * container shared across supervisor + every child specialist run
41
+ * pass the SAME provider instance to all of them — caching layered
42
+ * on top of the provider keeps the underlying container alive.
43
+ */
44
+ sandboxProvider?: SandboxProvider
20
45
  }
21
46
 
22
47
  export interface ReactiveAgentResult extends BaseAgentResult {
@@ -1,6 +1,11 @@
1
1
  import type { AdvisoryConfig } from '../advisory/index.js'
2
+ import type { ResumeHandler } from '../hitl/index.js'
2
3
  import type { LLMProvider } from '../provider/index.js'
3
4
  import type { TaskRouterConfig } from '../router/index.js'
5
+ import type { SandboxProvider } from '../sandbox/index.js'
6
+ import type { Skill } from '../skills/index.js'
7
+ import type { ToolRegistryContract } from '../tool/index.js'
8
+ import type { VerificationGateConfig } from '../verification/index.js'
4
9
  import type { BaseAgentConfig, BaseAgentResult } from './base.js'
5
10
  import type { AgentFactoryOptions } from './factory.js'
6
11
  import type { TaskGateway } from './gateway.js'
@@ -13,9 +18,12 @@ export interface SupervisorAgentConfig extends BaseAgentConfig {
13
18
 
14
19
  gateway?: TaskGateway
15
20
  agentManager?: AgentManagerContract
21
+ tools?: ToolRegistryContract
16
22
 
17
23
  systemPrompt: string
18
24
 
25
+ skills?: Skill[]
26
+
19
27
  maxDepth?: number
20
28
 
21
29
  taskRouter?: TaskRouterConfig
@@ -23,6 +31,43 @@ export interface SupervisorAgentConfig extends BaseAgentConfig {
23
31
  factoryOptions?: AgentFactoryOptions
24
32
 
25
33
  advisory?: AdvisoryConfig
34
+
35
+ /**
36
+ * Optional human-in-the-loop hook for tool review and run-pause
37
+ * decisions. When omitted, the supervisor delegates to drainQuery's
38
+ * built-in `autoApproveHandler`, which approves every tool call
39
+ * without prompting — matching Anthropic's "Act without asking"
40
+ * cowork mode.
41
+ *
42
+ * Hosts that want "Ask before acting" behaviour pass a custom
43
+ * handler that surfaces the `tool_review_requested` RunEvent to
44
+ * the user and resolves the returned promise once the user
45
+ * approves, rejects, or modifies the call.
46
+ */
47
+ resumeHandler?: ResumeHandler
48
+
49
+ /**
50
+ * Optional declarative gate evaluated before tool execution. When
51
+ * the gate marks all calls in a batch as `allow`, they execute
52
+ * without round-tripping through the resumeHandler. Mixed or all-
53
+ * deny outcomes fall through to review (and the resumeHandler).
54
+ *
55
+ * Use it to express deterministic policy (e.g. "internal
56
+ * read-only tools always allow; destructive shell calls always
57
+ * review") so the resumeHandler only fires for the truly
58
+ * non-deterministic cases.
59
+ */
60
+ verificationGate?: VerificationGateConfig
61
+
62
+ /**
63
+ * Optional ephemeral sandbox provider. When set, drainQuery creates
64
+ * a sandbox via `provider.create()` before the supervisor's own
65
+ * iteration loop and routes filesystem / shell tool calls through
66
+ * it. Multi-agent hosts thread the SAME provider instance into
67
+ * every child `ReactiveAgentConfig.sandboxProvider` so supervisor
68
+ * + children share one ephemeral container per task.
69
+ */
70
+ sandboxProvider?: SandboxProvider
26
71
  }
27
72
 
28
73
  export interface AgentTaskResult {