npm - @namzu/sdk - Versions diffs - 0.5.0 → 1.0.0 - Mend

@namzu/sdk 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (302) hide show

package/CHANGELOG.md +393 -0
package/dist/advisory/executor.d.ts.map +1 -1
package/dist/advisory/executor.js +9 -2
package/dist/advisory/executor.js.map +1 -1
package/dist/advisory/executor.test.d.ts +2 -1
package/dist/advisory/executor.test.d.ts.map +1 -1
package/dist/advisory/executor.test.js +7 -4
package/dist/advisory/executor.test.js.map +1 -1
package/dist/agents/ReactiveAgent.d.ts.map +1 -1
package/dist/agents/ReactiveAgent.js +2 -0
package/dist/agents/ReactiveAgent.js.map +1 -1
package/dist/agents/SupervisorAgent.d.ts.map +1 -1
package/dist/agents/SupervisorAgent.js +13 -0
package/dist/agents/SupervisorAgent.js.map +1 -1
package/dist/bridge/sse/mapper.test.js +2 -2
package/dist/constants/compaction/index.d.ts.map +1 -1
package/dist/constants/compaction/index.js +8 -3
package/dist/constants/compaction/index.js.map +1 -1
package/dist/constants/sandbox/index.d.ts +21 -0
package/dist/constants/sandbox/index.d.ts.map +1 -1
package/dist/constants/sandbox/index.js +30 -0
package/dist/constants/sandbox/index.js.map +1 -1
package/dist/constants/tools/index.d.ts.map +1 -1
package/dist/constants/tools/index.js +33 -2
package/dist/constants/tools/index.js.map +1 -1
package/dist/manager/run/persistence.d.ts.map +1 -1
package/dist/manager/run/persistence.js +35 -5
package/dist/manager/run/persistence.js.map +1 -1
package/dist/persona/assembler.d.ts +1 -0
package/dist/persona/assembler.d.ts.map +1 -1
package/dist/persona/assembler.js +28 -6
package/dist/persona/assembler.js.map +1 -1
package/dist/provider/collect.test.js +2 -2
package/dist/public-runtime.d.ts +5 -4
package/dist/public-runtime.d.ts.map +1 -1
package/dist/public-runtime.js +5 -4
package/dist/public-runtime.js.map +1 -1
package/dist/public-tools.d.ts +2 -0
package/dist/public-tools.d.ts.map +1 -1
package/dist/public-tools.js +2 -0
package/dist/public-tools.js.map +1 -1
package/dist/public-types.d.ts +3 -0
package/dist/public-types.d.ts.map +1 -1
package/dist/registry/index.d.ts +2 -0
package/dist/registry/index.d.ts.map +1 -1
package/dist/registry/index.js +1 -0
package/dist/registry/index.js.map +1 -1
package/dist/registry/tool/execute.d.ts.map +1 -1
package/dist/registry/tool/execute.js +87 -5
package/dist/registry/tool/execute.js.map +1 -1
package/dist/registry/tool/execute.test.d.ts +4 -2
package/dist/registry/tool/execute.test.d.ts.map +1 -1
package/dist/registry/tool/execute.test.js +112 -3
package/dist/registry/tool/execute.test.js.map +1 -1
package/dist/registry/toolset/catalog.d.ts +42 -0
package/dist/registry/toolset/catalog.d.ts.map +1 -0
package/dist/registry/toolset/catalog.js +217 -0
package/dist/registry/toolset/catalog.js.map +1 -0
package/dist/registry/toolset/catalog.test.d.ts +2 -0
package/dist/registry/toolset/catalog.test.d.ts.map +1 -0
package/dist/registry/toolset/catalog.test.js +85 -0
package/dist/registry/toolset/catalog.test.js.map +1 -0
package/dist/runtime/query/__tests__/deferred-tools.test.d.ts +2 -0
package/dist/runtime/query/__tests__/deferred-tools.test.d.ts.map +1 -0
package/dist/runtime/query/__tests__/deferred-tools.test.js +147 -0
package/dist/runtime/query/__tests__/deferred-tools.test.js.map +1 -0
package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts +2 -0
package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts.map +1 -0
package/dist/runtime/query/__tests__/executor-concurrency.test.js +98 -0
package/dist/runtime/query/__tests__/executor-concurrency.test.js.map +1 -0
package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js +38 -3
package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js.map +1 -1
package/dist/runtime/query/__tests__/prompt.test.js +47 -2
package/dist/runtime/query/__tests__/prompt.test.js.map +1 -1
package/dist/runtime/query/__tests__/stream-recovery.test.d.ts +2 -0
package/dist/runtime/query/__tests__/stream-recovery.test.d.ts.map +1 -0
package/dist/runtime/query/__tests__/stream-recovery.test.js +126 -0
package/dist/runtime/query/__tests__/stream-recovery.test.js.map +1 -0
package/dist/runtime/query/continuation.d.ts +16 -0
package/dist/runtime/query/continuation.d.ts.map +1 -0
package/dist/runtime/query/continuation.js +16 -0
package/dist/runtime/query/continuation.js.map +1 -0
package/dist/runtime/query/executor.d.ts +3 -0
package/dist/runtime/query/executor.d.ts.map +1 -1
package/dist/runtime/query/executor.js +71 -3
package/dist/runtime/query/executor.js.map +1 -1
package/dist/runtime/query/index.d.ts.map +1 -1
package/dist/runtime/query/index.js +19 -3
package/dist/runtime/query/index.js.map +1 -1
package/dist/runtime/query/iteration/index.d.ts +22 -0
package/dist/runtime/query/iteration/index.d.ts.map +1 -1
package/dist/runtime/query/iteration/index.js +227 -60
package/dist/runtime/query/iteration/index.js.map +1 -1
package/dist/runtime/query/iteration/phases/context.d.ts +10 -0
package/dist/runtime/query/iteration/phases/context.d.ts.map +1 -1
package/dist/runtime/query/iteration/phases/context.js.map +1 -1
package/dist/runtime/query/prompt.d.ts.map +1 -1
package/dist/runtime/query/prompt.js +21 -1
package/dist/runtime/query/prompt.js.map +1 -1
package/dist/runtime/query/tooling.d.ts +1 -0
package/dist/runtime/query/tooling.d.ts.map +1 -1
package/dist/runtime/query/tooling.js +1 -0
package/dist/runtime/query/tooling.js.map +1 -1
package/dist/sandbox/provider/local.d.ts.map +1 -1
package/dist/sandbox/provider/local.js +32 -1
package/dist/sandbox/provider/local.js.map +1 -1
package/dist/session/workspace/__tests__/shared-run.test.d.ts +2 -0
package/dist/session/workspace/__tests__/shared-run.test.d.ts.map +1 -0
package/dist/session/workspace/__tests__/shared-run.test.js +147 -0
package/dist/session/workspace/__tests__/shared-run.test.js.map +1 -0
package/dist/session/workspace/index.d.ts +2 -0
package/dist/session/workspace/index.d.ts.map +1 -1
package/dist/session/workspace/index.js +1 -0
package/dist/session/workspace/index.js.map +1 -1
package/dist/session/workspace/shared-run.d.ts +81 -0
package/dist/session/workspace/shared-run.d.ts.map +1 -0
package/dist/session/workspace/shared-run.js +251 -0
package/dist/session/workspace/shared-run.js.map +1 -0
package/dist/skills/loader.d.ts.map +1 -1
package/dist/skills/loader.js +36 -6
package/dist/skills/loader.js.map +1 -1
package/dist/skills/loader.test.d.ts +2 -0
package/dist/skills/loader.test.d.ts.map +1 -0
package/dist/skills/loader.test.js +65 -0
package/dist/skills/loader.test.js.map +1 -0
package/dist/streaming/coalesce.test.js +1 -1
package/dist/tools/builtins/__tests__/edit.test.d.ts +2 -0
package/dist/tools/builtins/__tests__/edit.test.d.ts.map +1 -0
package/dist/tools/builtins/__tests__/edit.test.js +38 -0
package/dist/tools/builtins/__tests__/edit.test.js.map +1 -0
package/dist/tools/builtins/__tests__/payload-budget.test.d.ts +2 -0
package/dist/tools/builtins/__tests__/payload-budget.test.d.ts.map +1 -0
package/dist/tools/builtins/__tests__/payload-budget.test.js +22 -0
package/dist/tools/builtins/__tests__/payload-budget.test.js.map +1 -0
package/dist/tools/builtins/__tests__/read-file.test.d.ts +2 -0
package/dist/tools/builtins/__tests__/read-file.test.d.ts.map +1 -0
package/dist/tools/builtins/__tests__/read-file.test.js +24 -0
package/dist/tools/builtins/__tests__/read-file.test.js.map +1 -0
package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts +2 -0
package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts.map +1 -0
package/dist/tools/builtins/__tests__/verify-outputs.test.js +52 -0
package/dist/tools/builtins/__tests__/verify-outputs.test.js.map +1 -0
package/dist/tools/builtins/__tests__/write-file.test.d.ts +2 -0
package/dist/tools/builtins/__tests__/write-file.test.d.ts.map +1 -0
package/dist/tools/builtins/__tests__/write-file.test.js +74 -0
package/dist/tools/builtins/__tests__/write-file.test.js.map +1 -0
package/dist/tools/builtins/bash.d.ts.map +1 -1
package/dist/tools/builtins/bash.js +40 -7
package/dist/tools/builtins/bash.js.map +1 -1
package/dist/tools/builtins/edit.d.ts +5 -2
package/dist/tools/builtins/edit.d.ts.map +1 -1
package/dist/tools/builtins/edit.js +114 -18
package/dist/tools/builtins/edit.js.map +1 -1
package/dist/tools/builtins/index.d.ts +1 -0
package/dist/tools/builtins/index.d.ts.map +1 -1
package/dist/tools/builtins/index.js +13 -13
package/dist/tools/builtins/index.js.map +1 -1
package/dist/tools/builtins/read-file.d.ts +1 -0
package/dist/tools/builtins/read-file.d.ts.map +1 -1
package/dist/tools/builtins/read-file.js +23 -8
package/dist/tools/builtins/read-file.js.map +1 -1
package/dist/tools/builtins/search-tools.d.ts.map +1 -1
package/dist/tools/builtins/search-tools.js +4 -1
package/dist/tools/builtins/search-tools.js.map +1 -1
package/dist/tools/builtins/verify-outputs.d.ts +5 -0
package/dist/tools/builtins/verify-outputs.d.ts.map +1 -0
package/dist/tools/builtins/verify-outputs.js +103 -0
package/dist/tools/builtins/verify-outputs.js.map +1 -0
package/dist/tools/builtins/write-file.d.ts +3 -2
package/dist/tools/builtins/write-file.d.ts.map +1 -1
package/dist/tools/builtins/write-file.js +72 -12
package/dist/tools/builtins/write-file.js.map +1 -1
package/dist/tools/coordinator/__tests__/agent.test.d.ts +15 -0
package/dist/tools/coordinator/__tests__/agent.test.d.ts.map +1 -0
package/dist/tools/coordinator/__tests__/agent.test.js +142 -0
package/dist/tools/coordinator/__tests__/agent.test.js.map +1 -0
package/dist/tools/coordinator/__tests__/task-list.test.d.ts +13 -0
package/dist/tools/coordinator/__tests__/task-list.test.d.ts.map +1 -0
package/dist/tools/coordinator/__tests__/task-list.test.js +162 -0
package/dist/tools/coordinator/__tests__/task-list.test.js.map +1 -0
package/dist/tools/coordinator/agent.d.ts +34 -0
package/dist/tools/coordinator/agent.d.ts.map +1 -0
package/dist/tools/coordinator/agent.js +107 -0
package/dist/tools/coordinator/agent.js.map +1 -0
package/dist/tools/coordinator/index.d.ts +7 -0
package/dist/tools/coordinator/index.d.ts.map +1 -1
package/dist/tools/coordinator/index.js +111 -21
package/dist/tools/coordinator/index.js.map +1 -1
package/dist/types/agent/base.d.ts +8 -0
package/dist/types/agent/base.d.ts.map +1 -1
package/dist/types/agent/reactive.d.ts +23 -0
package/dist/types/agent/reactive.d.ts.map +1 -1
package/dist/types/agent/supervisor.d.ts +41 -0
package/dist/types/agent/supervisor.d.ts.map +1 -1
package/dist/types/message/index.d.ts +22 -1
package/dist/types/message/index.d.ts.map +1 -1
package/dist/types/message/index.js +7 -2
package/dist/types/message/index.js.map +1 -1
package/dist/types/provider/chat.d.ts +2 -9
package/dist/types/provider/chat.d.ts.map +1 -1
package/dist/types/run/events.d.ts +6 -0
package/dist/types/run/events.d.ts.map +1 -1
package/dist/types/run/events.js.map +1 -1
package/dist/types/sandbox/index.d.ts +193 -0
package/dist/types/sandbox/index.d.ts.map +1 -1
package/dist/types/sandbox/index.js.map +1 -1
package/dist/types/skills/index.d.ts +2 -0
package/dist/types/skills/index.d.ts.map +1 -1
package/dist/types/tool/index.d.ts +22 -0
package/dist/types/tool/index.d.ts.map +1 -1
package/dist/types/toolset/index.d.ts +71 -0
package/dist/types/toolset/index.d.ts.map +1 -0
package/dist/types/toolset/index.js +2 -0
package/dist/types/toolset/index.js.map +1 -0
package/dist/types/workspace/index.d.ts +1 -0
package/dist/types/workspace/index.d.ts.map +1 -1
package/dist/types/workspace/shared-run.d.ts +61 -0
package/dist/types/workspace/shared-run.d.ts.map +1 -0
package/dist/types/workspace/shared-run.js +2 -0
package/dist/types/workspace/shared-run.js.map +1 -0
package/dist/verification/index.d.ts +1 -0
package/dist/verification/index.d.ts.map +1 -1
package/dist/verification/index.js +1 -0
package/dist/verification/index.js.map +1 -1
package/dist/verification/presets.d.ts +53 -0
package/dist/verification/presets.d.ts.map +1 -0
package/dist/verification/presets.js +70 -0
package/dist/verification/presets.js.map +1 -0
package/dist/verification/presets.test.d.ts +16 -0
package/dist/verification/presets.test.d.ts.map +1 -0
package/dist/verification/presets.test.js +79 -0
package/dist/verification/presets.test.js.map +1 -0
package/package.json +3 -2
package/src/advisory/executor.test.ts +7 -4
package/src/advisory/executor.ts +11 -2
package/src/agents/ReactiveAgent.ts +2 -0
package/src/agents/SupervisorAgent.ts +13 -0
package/src/bridge/sse/mapper.test.ts +2 -2
package/src/constants/compaction/index.ts +8 -3
package/src/constants/sandbox/index.ts +37 -0
package/src/constants/tools/index.ts +33 -2
package/src/manager/run/persistence.ts +34 -6
package/src/persona/assembler.ts +31 -8
package/src/provider/collect.test.ts +2 -2
package/src/public-runtime.ts +14 -1
package/src/public-tools.ts +2 -0
package/src/public-types.ts +7 -0
package/src/registry/index.ts +7 -0
package/src/registry/tool/execute.test.ts +132 -3
package/src/registry/tool/execute.ts +94 -9
package/src/registry/toolset/catalog.test.ts +97 -0
package/src/registry/toolset/catalog.ts +283 -0
package/src/runtime/query/__tests__/deferred-tools.test.ts +183 -0
package/src/runtime/query/__tests__/executor-concurrency.test.ts +122 -0
package/src/runtime/query/__tests__/executor-plugin-hooks.test.ts +48 -3
package/src/runtime/query/__tests__/prompt.test.ts +51 -2
package/src/runtime/query/__tests__/stream-recovery.test.ts +156 -0
package/src/runtime/query/continuation.ts +16 -0
package/src/runtime/query/executor.ts +82 -13
package/src/runtime/query/index.ts +24 -3
package/src/runtime/query/iteration/index.ts +263 -68
package/src/runtime/query/iteration/phases/context.ts +10 -0
package/src/runtime/query/prompt.ts +17 -1
package/src/runtime/query/tooling.ts +2 -0
package/src/sandbox/provider/local.ts +33 -0
package/src/session/workspace/__tests__/shared-run.test.ts +181 -0
package/src/session/workspace/index.ts +6 -0
package/src/session/workspace/shared-run.ts +316 -0
package/src/skills/loader.test.ts +89 -0
package/src/skills/loader.ts +37 -6
package/src/streaming/coalesce.test.ts +1 -1
package/src/tools/builtins/__tests__/edit.test.ts +57 -0
package/src/tools/builtins/__tests__/payload-budget.test.ts +29 -0
package/src/tools/builtins/__tests__/read-file.test.ts +31 -0
package/src/tools/builtins/__tests__/verify-outputs.test.ts +71 -0
package/src/tools/builtins/__tests__/write-file.test.ts +97 -0
package/src/tools/builtins/bash.ts +48 -7
package/src/tools/builtins/edit.ts +162 -27
package/src/tools/builtins/index.ts +13 -13
package/src/tools/builtins/read-file.ts +31 -8
package/src/tools/builtins/search-tools.ts +5 -1
package/src/tools/builtins/verify-outputs.ts +126 -0
package/src/tools/builtins/write-file.ts +83 -14
package/src/tools/coordinator/__tests__/agent.test.ts +172 -0
package/src/tools/coordinator/__tests__/task-list.test.ts +182 -0
package/src/tools/coordinator/agent.ts +157 -0
package/src/tools/coordinator/index.ts +128 -22
package/src/types/agent/base.ts +8 -0
package/src/types/agent/reactive.ts +25 -0
package/src/types/agent/supervisor.ts +45 -0
package/src/types/message/index.ts +32 -2
package/src/types/provider/chat.ts +2 -9
package/src/types/run/events.ts +6 -0
package/src/types/sandbox/index.ts +219 -0
package/src/types/skills/index.ts +4 -0
package/src/types/tool/index.ts +24 -0
package/src/types/toolset/index.ts +86 -0
package/src/types/workspace/index.ts +9 -0
package/src/types/workspace/shared-run.ts +65 -0
package/src/verification/index.ts +1 -0
package/src/verification/presets.test.ts +112 -0
package/src/verification/presets.ts +72 -0

package/src/tools/coordinator/__tests__/task-list.test.ts ADDED Viewed

@@ -0,0 +1,182 @@
+/**
+ * Behavioural contract for the `agent_task_list` coordinator tool:
+ *
+ * - Returns every task the gateway knows about, with state + timing.
+ * - Filters by state when the input narrows it.
+ * - Emits a per-state summary in the data payload — what the supervisor
+ *   reads to decide "done vs not done" before calling verify_outputs.
+ * - Distinct from the plan-task store's `task_list` (subject/blockedBy);
+ *   listing them under different names avoids ToolRegistry collisions when
+ *   both surfaces are wired into the same agent.
+ */
+import { describe, expect, it } from 'vitest'
+import type { TaskGateway, TaskHandle } from '../../../types/agent/gateway.js'
+import type { TaskId } from '../../../types/ids/index.js'
+import type { ToolContext } from '../../../types/tool/index.js'
+import { buildCoordinatorTools } from '../index.js'
+function makeContext(): ToolContext {
+	return {
+		runId: 'run_test' as never,
+		workingDirectory: '/tmp/test',
+		abortSignal: new AbortController().signal,
+		env: {},
+		log: () => {},
+	}
+}
+function gatewayWith(handles: TaskHandle[]): TaskGateway {
+	return {
+		async createTask() {
+			throw new Error('not used')
+		},
+		async waitForTask() {
+			throw new Error('not used')
+		},
+		async continueTask() {},
+		cancelTask() {},
+		getTask(id) {
+			return handles.find((h) => h.taskId === id)
+		},
+		listTasks() {
+			return handles
+		},
+		onTaskCompleted() {
+			return () => {}
+		},
+	}
+}
+function handle(input: {
+	id: string
+	agentId: string
+	state: TaskHandle['state']
+	createdAt: number
+	completedAt?: number
+	lastError?: string
+}): TaskHandle {
+	return {
+		taskId: input.id as TaskId,
+		agentId: input.agentId,
+		state: input.state,
+		createdAt: input.createdAt,
+		completedAt: input.completedAt,
+		result: input.lastError
+			? ({
+					runId: 'run_x' as never,
+					status: input.state === 'failed' ? 'failed' : 'completed',
+					usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 } as never,
+					cost: { inputCostUsd: 0, outputCostUsd: 0, totalCostUsd: 0 } as never,
+					iterations: 1,
+					durationMs: 0,
+					messages: [],
+					result: '',
+					lastError: input.lastError,
+				} as never)
+			: undefined,
+	}
+}
+function findAgentTaskList(gateway: TaskGateway) {
+	const tools = buildCoordinatorTools({
+		gateway,
+		workingDirectory: '/tmp/test',
+		allowedAgentIds: ['solution-architecture', 'enterprise-architecture'],
+	})
+	const t = tools.find((tool) => tool.name === 'agent_task_list')
+	if (!t) throw new Error('agent_task_list tool missing from coordinator builder')
+	return t
+}
+describe('coordinator agent_task_list tool', () => {
+	it('lists every task with state, agent, and timing', async () => {
+		const gateway = gatewayWith([
+			handle({
+				id: 'task_a',
+				agentId: 'solution-architecture',
+				state: 'completed',
+				createdAt: 0,
+				completedAt: 5000,
+			}),
+			handle({
+				id: 'task_b',
+				agentId: 'enterprise-architecture',
+				state: 'running',
+				createdAt: 1000,
+			}),
+			handle({
+				id: 'task_c',
+				agentId: 'solution-architecture',
+				state: 'failed',
+				createdAt: 2000,
+				completedAt: 4000,
+				lastError: 'bash exit 1',
+			}),
+		])
+		const tool = findAgentTaskList(gateway)
+		const result = await tool.execute({}, makeContext())
+		expect(result.success).toBe(true)
+		expect(result.output).toMatch(/Tasks: 3 total/)
+		expect(result.output).toMatch(/1 running/)
+		expect(result.output).toMatch(/1 completed/)
+		expect(result.output).toMatch(/1 failed/)
+		expect(result.output).toMatch(/task_a → solution-architecture \[completed\]/)
+		expect(result.output).toMatch(/task_c .* error: bash exit 1/)
+		const data = result.data as { items: unknown[]; summary: { total: number } }
+		expect(data.summary.total).toBe(3)
+		expect(data.items).toHaveLength(3)
+	})
+	it('filters by state', async () => {
+		const gateway = gatewayWith([
+			handle({
+				id: 'task_a',
+				agentId: 'solution-architecture',
+				state: 'completed',
+				createdAt: 0,
+				completedAt: 5000,
+			}),
+			handle({
+				id: 'task_b',
+				agentId: 'enterprise-architecture',
+				state: 'running',
+				createdAt: 1000,
+			}),
+		])
+		const tool = findAgentTaskList(gateway)
+		const result = await tool.execute({ state: 'running' }, makeContext())
+		expect(result.success).toBe(true)
+		const data = result.data as { items: Array<{ task_id: string }> }
+		expect(data.items).toHaveLength(1)
+		expect(data.items[0]?.task_id).toBe('task_b')
+		expect(result.output).not.toMatch(/task_a/)
+	})
+	it('handles an empty gateway', async () => {
+		const tool = findAgentTaskList(gatewayWith([]))
+		const result = await tool.execute({}, makeContext())
+		expect(result.success).toBe(true)
+		expect(result.output).toMatch(/Tasks: 0 total/)
+		expect(result.output).toMatch(/no tasks launched yet/)
+	})
+	it('does not collide with the plan-task store `task_list` tool name', async () => {
+		// Regression: an earlier cut registered the agent-task gateway
+		// inspector under the same `task_list` name as the plan-task store
+		// list tool, which would shadow one of them in any agent that wired
+		// both surfaces together. The agent inspector now lives under
+		// `agent_task_list`; this test guards the rename.
+		const coordinatorTools = buildCoordinatorTools({
+			gateway: gatewayWith([]),
+			workingDirectory: '/tmp/test',
+			allowedAgentIds: ['solution-architecture'],
+		})
+		const names = coordinatorTools.map((t) => t.name)
+		expect(names).toContain('agent_task_list')
+		expect(names).not.toContain('task_list')
+	})
+})

package/src/tools/coordinator/agent.ts ADDED Viewed

@@ -0,0 +1,157 @@
+import { z } from 'zod'
+import type { AgentRuntimeContext } from '../../types/agent/base.js'
+import type { TaskGateway } from '../../types/agent/gateway.js'
+import type { ToolDefinition } from '../../types/tool/index.js'
+import { defineTool } from '../defineTool.js'
+import type { TaskLaunchedCallback } from './index.js'
+/**
+ * Build the canonical Claude Code `Agent` tool — synchronous subagent
+ * delegation that mirrors what Claude is trained against in
+ * `code.claude.com/docs/en/sub-agents`.
+ *
+ * Semantics: parent calls `Agent({ description, prompt, subagent_type })`,
+ * the runtime spawns the chosen subagent with its own context window,
+ * the parent's tool call BLOCKS until the subagent finishes, and the
+ * subagent's final text comes back as the tool result. Intermediate
+ * subagent tool calls are isolated — only the summary surfaces to
+ * the parent.
+ *
+ * This is **NOT** the same shape as the legacy `create_task` /
+ * `continue_task` / `cancel_task` trio that this package ships
+ * alongside it: those are non-blocking and use a `<task-notification>`
+ * callback model. The async pattern is useful for hosts that want a
+ * work-queue surface, but it is not what Claude Code trained against.
+ * For free agentic alignment, prefer the canonical `Agent` tool; keep
+ * the legacy coordinator tools only when you genuinely need
+ * fire-and-forget multi-task fan-out.
+ */
+export interface AgentToolOptions {
+	gateway: TaskGateway
+	workingDirectory: string
+	runtimeContext?: AgentRuntimeContext
+	allowedAgentIds: string[]
+	onTaskLaunched?: TaskLaunchedCallback
+}
+export function buildAgentTool(opts: AgentToolOptions): ToolDefinition {
+	const { gateway, allowedAgentIds: agentIds, onTaskLaunched } = opts
+	const cwd = opts.workingDirectory
+	const subagentTypeEnum =
+		agentIds.length > 0 ? z.enum(agentIds as [string, ...string[]]) : z.string()
+	return defineTool({
+		name: 'Agent',
+		description: `Delegate a task to a specialized subagent. BLOCKING: returns when the subagent has finished, with the subagent's final text as the tool result. The subagent runs in its own context window and cannot see your conversation — include all necessary context in the prompt. Available subagents: ${agentIds.join(', ')}. To run multiple subagents in parallel, call this tool multiple times in a single response.`,
+		inputSchema: z.object({
+			description: z.string().describe('Short label for tracking (shown to the user)'),
+			prompt: z
+				.string()
+				.describe('Self-contained task description with all context the subagent needs'),
+			subagent_type:
+				agentIds.length === 1
+					? subagentTypeEnum
+							.optional()
+							.describe(`Which subagent to run (defaults to the only one: ${agentIds[0]})`)
+					: subagentTypeEnum.describe('Which subagent to run'),
+		}),
+		category: 'custom',
+		permissions: [],
+		readOnly: false,
+		destructive: false,
+		concurrencySafe: true,
+		async execute({ description, prompt, subagent_type }, context) {
+			// With a single registered subagent the type is optional — default to
+			// it so the model can't trip the "subagent_type required" validation.
+			const agentId = subagent_type ?? (agentIds.length === 1 ? agentIds[0] : undefined)
+			if (!agentId) {
+				return {
+					success: false,
+					output: '',
+					error: `subagent_type is required — choose one of: ${agentIds.join(', ')}`,
+				}
+			}
+			const handle = await gateway.createTask({
+				agentId,
+				prompt,
+				workingDirectory: cwd,
+				runtimeContext: opts.runtimeContext,
+			})
+			onTaskLaunched?.(handle.taskId, {
+				agentId,
+				description,
+				// Same canonical-envelope plumbing as coordinator/index.ts
+				// (ses_009-task-notification-envelope). For Agent-tool path
+				// the subagent run is awaited synchronously below, so this
+				// id is only used if a probe / hook unexpectedly forks the
+				// completion to the background notification channel.
+				originalToolUseId: context.toolUseId,
+			})
+			const completed = await gateway.waitForTask(handle.taskId)
+			// Two layers can disagree on whether the subagent succeeded:
+			//
+			// 1. `TaskHandle.state` — the gateway's terminal task state.
+			//    Some gateways (e.g. vandal's) explicitly map
+			//    `result.status !== 'completed'` to `state = 'failed'`,
+			//    others (e.g. SDK's `LocalTaskGateway`) just forward
+			//    whatever the AgentManager set, which does not always
+			//    reflect run-level failure.
+			// 2. `BaseAgentResult.status` — the run's own status. The
+			//    canonical source of truth for whether the agent actually
+			//    finished its work; `lastError` carries the failure
+			//    message when set.
+			//
+			// Treat the subagent as successful only when BOTH agree.
+			// Reporting a failed subagent as successful would silently
+			// hand the parent garbage output and make debugging
+			// impossible, which is what Codex flagged on the first cut.
+			const runStatus = completed.result?.status
+			const succeeded =
+				completed.state === 'completed' && (runStatus === undefined || runStatus === 'completed')
+			const resultText =
+				typeof completed.result?.result === 'string'
+					? completed.result.result
+					: completed.result?.result !== undefined
+						? JSON.stringify(completed.result.result)
+						: ''
+			if (!succeeded) {
+				const failureLabel =
+					completed.state !== 'completed' ? completed.state : (runStatus ?? 'failed')
+				const detail =
+					completed.result?.lastError ?? resultText ?? '(subagent provided no failure detail)'
+				return {
+					success: false,
+					output: '',
+					error: `Subagent ${agentId} ${failureLabel}: ${detail}`,
+					data: {
+						task_id: handle.taskId,
+						subagent_type: agentId,
+						state: completed.state,
+						status: runStatus,
+						lastError: completed.result?.lastError,
+					},
+				}
+			}
+			return {
+				success: true,
+				output: resultText || '(subagent returned no text)',
+				data: {
+					task_id: handle.taskId,
+					subagent_type: agentId,
+					state: completed.state,
+					status: runStatus,
+				},
+			}
+		},
+	})
+}

package/src/tools/coordinator/index.ts CHANGED Viewed

@@ -13,6 +13,13 @@ export type TaskLaunchedCallback = (
 		agentId: string
 		description: string
 		planTaskId?: string
+		/**
+		 * The assistant `tool_use_id` that dispatched this task.
+		 * Threaded from `ToolContext.toolUseId` so the runtime can
+		 * later emit a canonical `tool_result` content block bound
+		 * to the same id when the background task completes.
+		 */
+		originalToolUseId?: string
 	},
 ) => void
@@ -38,21 +45,29 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
 		taskStore,
 		runId,
 		getPlanManager,
-		onTaskLaunched,
+		// `onTaskLaunched` was the entry point for the old
+		// non-blocking + envelope-injection flow. create_task is now
+		// blocking, so the callback is no longer wired here.
+		// Intentionally not destructured to keep the unused-binding
+		// lint clean; callers can still pass it for backwards
+		// compatibility (Agent tool consumes it from its own path).
 	} = opts
 	const cwd = opts.workingDirectory
+	void opts.onTaskLaunched
 	const agentIdEnum = agentIds.length > 0 ? z.enum(agentIds as [string, ...string[]]) : z.string()
 	const createTask = defineTool({
 		name: 'create_task',
-		description: `Launch a task on a specialized agent. NON-BLOCKING: returns immediately. You will receive a <task-notification> message when the agent finishes. Available agents: ${agentIds.join(', ')}. The agent cannot see your conversation — include ALL necessary context in the prompt. To launch multiple tasks in parallel, call this tool multiple times in a single response. After launching, briefly tell the user what you launched and end your turn — do NOT predict or fabricate results.`,
+		description: `Launch a task on a specialized agent and await its result. BLOCKING: returns the agent's final output as this call's tool_result. Available agents: ${agentIds.join(', ')}. Prefer compact assignments; for large context, write/read shared workspace files and pass filenames or references. To launch multiple tasks in parallel, call this tool multiple times in a single assistant turn — the runtime executes every tool_use block from one response concurrently and delivers all tool_results together, so 'fan out 8 specialists' is one assistant message with 8 create_task blocks.`,
 		inputSchema: z.object({
 			agent_id: agentIdEnum.describe('Which agent to run'),
 			prompt: z
 				.string()
-				.describe('Self-contained task description with all context the agent needs'),
-			description: z.string().describe('Short summary for tracking (shown to user)'),
+				.describe(
+					'Self-contained assignment for the agent. For large generated content, prefer workspace file references so provider output-token limits do not cut off the tool call.',
+				),
+			description: z.string().describe('Short summary for tracking, shown to the user.'),
 			plan_task_id: z
 				.string()
 				.optional()
@@ -65,7 +80,7 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
 		readOnly: false,
 		destructive: false,
 		concurrencySafe: true,
-		async execute({ agent_id, prompt, description, plan_task_id }) {
+		async execute({ agent_id, prompt, description, plan_task_id }, _context) {
 			let resolvedPlanTaskId = plan_task_id
 			if (taskStore) {
@@ -93,22 +108,36 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
 				runtimeContext: opts.runtimeContext,
 			})
-			if (onTaskLaunched) {
-				onTaskLaunched(handle.taskId, {
-					agentId: agent_id,
-					description,
-					planTaskId: resolvedPlanTaskId,
+			// Industrial-standard Anthropic tool pattern: tool returns
+			// its real result as the tool_result for the dispatching
+			// tool_use. Parallel fan-out happens at the executor layer
+			// — when the supervisor emits N create_task blocks in one
+			// assistant turn, the runtime runs them with Promise.all
+			// and delivers all N tool_results together. No async
+			// envelope injection, no second tool_result for the same
+			// tool_use_id (which Anthropic rejects with 400).
+			const completed = await gateway.waitForTask(handle.taskId)
+			const success = completed.state === 'completed'
+			const resultText =
+				completed.result?.result ??
+				completed.result?.lastError ??
+				`Task finished with state: ${completed.state}`
+			if (resolvedPlanTaskId && taskStore) {
+				await taskStore.update(resolvedPlanTaskId as `task_${string}`, {
+					status: 'completed',
+					description: success ? undefined : `Failed: ${resultText.substring(0, 200)}`,
 				})
 			}
 			return {
-				success: true,
-				output: `Task launched: ${handle.taskId} → ${agent_id} ("${description}"). You will receive a task-notification when it completes.`,
+				success,
+				output: resultText,
 				data: {
 					task_id: handle.taskId,
 					agent_id,
 					description,
-					state: 'running',
+					state: completed.state,
 					plan_task_id: resolvedPlanTaskId,
 				},
 			}
@@ -118,11 +147,9 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
 	const continueTask = defineTool({
 		name: 'continue_task',
 		description:
-			'Send a follow-up message to a previously completed task. NON-BLOCKING: the agent resumes in the background with full prior context. You will receive a task-notification when it finishes. Only use this with a task_id from a previous create_task or task-notification.',
+			"Send a follow-up message to a previously completed task and await the agent's next reply. BLOCKING: returns the agent's new output as this call's tool_result, the same shape as create_task. Only use this with a task_id from a previous create_task. To run multiple follow-ups in parallel, call this tool multiple times in a single assistant turn.",
 		inputSchema: z.object({
-			task_id: z
-				.string()
-				.describe('Agent task ID from a previous create_task or task-notification'),
+			task_id: z.string().describe('Agent task ID from a previous create_task'),
 			message: z.string().describe('Follow-up instruction for the agent'),
 		}),
 		category: 'custom',
@@ -132,11 +159,22 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
 		concurrencySafe: true,
 		async execute({ task_id, message }) {
 			await gateway.continueTask(task_id as TaskId, message)
+			// Mirror create_task's blocking pattern: await the new
+			// completion and return the agent's output inline. The
+			// previous non-blocking shape ('You will receive a
+			// task-notification…') relied on a global
+			// onTaskCompleted listener that the iteration loop
+			// no longer registers (envelope path is dead).
+			const completed = await gateway.waitForTask(task_id as TaskId)
+			const success = completed.state === 'completed'
+			const resultText =
+				completed.result?.result ??
+				completed.result?.lastError ??
+				`Task finished with state: ${completed.state}`
 			return {
-				success: true,
-				output: `Follow-up sent to ${task_id}. You will receive a task-notification when it finishes.`,
-				data: { task_id, state: 'running' },
+				success,
+				output: resultText,
+				data: { task_id, state: completed.state },
 			}
 		},
 	})
@@ -163,7 +201,75 @@ export function buildCoordinatorTools(opts: CoordinatorToolsOptions): ToolDefini
 		},
 	})
-	const tools: ToolDefinition[] = [createTask, continueTask, cancelTask]
+	const agentTaskList = defineTool({
+		name: 'agent_task_list',
+		description:
+			"Inspect the live state of every agent task launched on this gateway via create_task: returns each task's id, agent, state (pending/running/completed/failed/canceled), and timing. Distinct from the plan-task store's `task_list` (which lists planning tasks): this tool lists running/completed worker invocations. Use it BEFORE declaring multi-worker work done — confirm every launched task reached `completed`, none still `running` or `failed`. Read-only and safe to call repeatedly.",
+		inputSchema: z.object({
+			state: z
+				.enum(['pending', 'running', 'completed', 'failed', 'canceled'])
+				.optional()
+				.describe('Filter by terminal/non-terminal state. Omit to list every task.'),
+		}),
+		category: 'custom',
+		permissions: [],
+		readOnly: true,
+		destructive: false,
+		concurrencySafe: true,
+		async execute({ state }) {
+			const handles = gateway.listTasks()
+			const filtered = state ? handles.filter((h) => h.state === state) : handles
+			const items = filtered.map((h) => {
+				const runStatus = h.result?.status
+				const lastError = h.result?.lastError ?? undefined
+				return {
+					task_id: h.taskId,
+					agent_id: h.agentId,
+					state: h.state,
+					run_status: runStatus,
+					created_at: new Date(h.createdAt).toISOString(),
+					completed_at: h.completedAt ? new Date(h.completedAt).toISOString() : null,
+					duration_ms: h.completedAt ? h.completedAt - h.createdAt : null,
+					last_error: lastError,
+				}
+			})
+			const summary = {
+				total: handles.length,
+				running: handles.filter((h) => h.state === 'running').length,
+				completed: handles.filter((h) => h.state === 'completed').length,
+				failed: handles.filter((h) => h.state === 'failed').length,
+				canceled: handles.filter((h) => h.state === 'canceled').length,
+			}
+			const lines = items.length
+				? items.map(
+						(i) =>
+							`- ${i.task_id} → ${i.agent_id} [${i.state}${i.run_status && i.run_status !== i.state ? ` / ${i.run_status}` : ''}]${
+								i.duration_ms !== null ? ` (${Math.round(i.duration_ms / 1000)}s)` : ''
+							}${i.last_error ? ` — error: ${i.last_error.slice(0, 200)}` : ''}`,
+					)
+				: ['(no tasks launched yet)']
+			const header = `Tasks: ${summary.total} total — ${summary.running} running, ${summary.completed} completed, ${summary.failed} failed, ${summary.canceled} canceled`
+			return {
+				success: true,
+				output: [header, '', ...lines].join('\n'),
+				data: { items, summary },
+			}
+		},
+	})
+	// `continue_task` was a follow-up channel for a still-alive worker
+	// task. With `create_task` now blocking + tool_result returning
+	// the worker's final output, every worker reaches a terminal
+	// state by the time the supervisor wants to follow up — and the
+	// agent manager rejects `continue` on terminal tasks. The
+	// industrial pattern is to issue a fresh `create_task` that
+	// references the prior worker's output path, so we drop
+	// `continue_task` from the registered surface entirely. The
+	// definition stays in this file for now in case a future
+	// non-default gateway (one that keeps the worker process alive
+	// for follow-ups) wants to re-register it.
+	void continueTask
+	const tools: ToolDefinition[] = [createTask, cancelTask, agentTaskList]
 	if (getPlanManager) {
 		const approvePlan = defineTool({

package/src/types/agent/base.ts CHANGED Viewed

@@ -65,6 +65,14 @@ export type RuntimeToolOverrides = Record<string, ToolAvailability | 'disabled'>
 export interface AgentRuntimeContext {
 	label?: string
 	outputDirectory?: string
+	/**
+	 * Optional working/scratch directory the runtime exposes to the
+	 * agent — sibling to `outputDirectory`, invisible to the
+	 * output collector. Mirrors the Anthropic Cowork pattern
+	 * where `/home/claude` is scratch and `/mnt/user-data/outputs` is
+	 * user-visible.
+	 */
+	scratchDirectory?: string
 	outputFileMarker?: string
 	notes?: readonly string[]
 }

package/src/types/agent/reactive.ts CHANGED Viewed

@@ -1,8 +1,10 @@
 import type { AdvisoryConfig } from '../advisory/index.js'
 import type { AgentPersona } from '../persona/index.js'
 import type { LLMProvider } from '../provider/index.js'
+import type { SandboxProvider } from '../sandbox/index.js'
 import type { Skill } from '../skills/index.js'
 import type { ToolRegistryContract } from '../tool/index.js'
+import type { VerificationGateConfig } from '../verification/index.js'
 import type { BaseAgentConfig, BaseAgentResult } from './base.js'
 export interface ReactiveAgentConfig extends BaseAgentConfig {
@@ -17,6 +19,29 @@ export interface ReactiveAgentConfig extends BaseAgentConfig {
 	tools: ToolRegistryContract
 	advisory?: AdvisoryConfig
+	/**
+	 * Optional capability-aware deny/allow gate for child tool calls.
+	 * Mirrors the same field on `SupervisorAgentConfig`; when omitted,
+	 * `drainQuery` falls back to its `autoApproveHandler` default
+	 * (every tool call auto-approves, no policy applied). Hosts that
+	 * trust their sandbox should still pass at least
+	 * `{ enabled: true, denyDangerousPatterns: true, ... }` so the
+	 * canonical brick patterns hard-deny instead of executing
+	 * silently.
+	 */
+	verificationGate?: VerificationGateConfig
+	/**
+	 * Optional ephemeral sandbox provider. When set, drainQuery creates
+	 * a sandbox via `provider.create()` before the iteration loop and
+	 * routes filesystem / shell tool calls through it; on run end the
+	 * SDK calls `sandbox.destroy()`. Hosts that want a per-task
+	 * container shared across supervisor + every child specialist run
+	 * pass the SAME provider instance to all of them — caching layered
+	 * on top of the provider keeps the underlying container alive.
+	 */
+	sandboxProvider?: SandboxProvider
 }
 export interface ReactiveAgentResult extends BaseAgentResult {

package/src/types/agent/supervisor.ts CHANGED Viewed

@@ -1,6 +1,11 @@
 import type { AdvisoryConfig } from '../advisory/index.js'
+import type { ResumeHandler } from '../hitl/index.js'
 import type { LLMProvider } from '../provider/index.js'
 import type { TaskRouterConfig } from '../router/index.js'
+import type { SandboxProvider } from '../sandbox/index.js'
+import type { Skill } from '../skills/index.js'
+import type { ToolRegistryContract } from '../tool/index.js'
+import type { VerificationGateConfig } from '../verification/index.js'
 import type { BaseAgentConfig, BaseAgentResult } from './base.js'
 import type { AgentFactoryOptions } from './factory.js'
 import type { TaskGateway } from './gateway.js'
@@ -13,9 +18,12 @@ export interface SupervisorAgentConfig extends BaseAgentConfig {
 	gateway?: TaskGateway
 	agentManager?: AgentManagerContract
+	tools?: ToolRegistryContract
 	systemPrompt: string
+	skills?: Skill[]
 	maxDepth?: number
 	taskRouter?: TaskRouterConfig
@@ -23,6 +31,43 @@ export interface SupervisorAgentConfig extends BaseAgentConfig {
 	factoryOptions?: AgentFactoryOptions
 	advisory?: AdvisoryConfig
+	/**
+	 * Optional human-in-the-loop hook for tool review and run-pause
+	 * decisions. When omitted, the supervisor delegates to drainQuery's
+	 * built-in `autoApproveHandler`, which approves every tool call
+	 * without prompting — matching Anthropic's "Act without asking"
+	 * cowork mode.
+	 *
+	 * Hosts that want "Ask before acting" behaviour pass a custom
+	 * handler that surfaces the `tool_review_requested` RunEvent to
+	 * the user and resolves the returned promise once the user
+	 * approves, rejects, or modifies the call.
+	 */
+	resumeHandler?: ResumeHandler
+	/**
+	 * Optional declarative gate evaluated before tool execution. When
+	 * the gate marks all calls in a batch as `allow`, they execute
+	 * without round-tripping through the resumeHandler. Mixed or all-
+	 * deny outcomes fall through to review (and the resumeHandler).
+	 *
+	 * Use it to express deterministic policy (e.g. "internal
+	 * read-only tools always allow; destructive shell calls always
+	 * review") so the resumeHandler only fires for the truly
+	 * non-deterministic cases.
+	 */
+	verificationGate?: VerificationGateConfig
+	/**
+	 * Optional ephemeral sandbox provider. When set, drainQuery creates
+	 * a sandbox via `provider.create()` before the supervisor's own
+	 * iteration loop and routes filesystem / shell tool calls through
+	 * it. Multi-agent hosts thread the SAME provider instance into
+	 * every child `ReactiveAgentConfig.sandboxProvider` so supervisor
+	 * + children share one ephemeral container per task.
+	 */
+	sandboxProvider?: SandboxProvider
 }
 export interface AgentTaskResult {