npm - @roj-ai/sdk - Versions diffs - 0.1.11 → 0.1.13 - Mend

@roj-ai/sdk 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/dist/bootstrap.d.ts +6 -0
package/dist/bootstrap.d.ts.map +1 -1
package/dist/core/agents/agent.d.ts +2 -0
package/dist/core/agents/agent.d.ts.map +1 -1
package/dist/core/agents/agent.js +46 -10
package/dist/core/agents/agent.js.map +1 -1
package/dist/core/sessions/session-manager.d.ts.map +1 -1
package/dist/core/sessions/session-manager.js +13 -5
package/dist/core/sessions/session-manager.js.map +1 -1
package/dist/index.d.ts +1 -1
package/dist/index.d.ts.map +1 -1
package/dist/plugins/agents/plugin.d.ts +20 -0
package/dist/plugins/agents/plugin.d.ts.map +1 -1
package/dist/plugins/agents/plugin.js +189 -2
package/dist/plugins/agents/plugin.js.map +1 -1
package/dist/plugins/agents/supervision.integration.test.d.ts +2 -0
package/dist/plugins/agents/supervision.integration.test.d.ts.map +1 -0
package/dist/plugins/agents/supervision.integration.test.js +215 -0
package/dist/plugins/agents/supervision.integration.test.js.map +1 -0
package/dist/plugins/mailbox/mailbox.integration.test.js +80 -0
package/dist/plugins/mailbox/mailbox.integration.test.js.map +1 -1
package/dist/plugins/mailbox/plugin.d.ts +1 -0
package/dist/plugins/mailbox/plugin.d.ts.map +1 -1
package/dist/plugins/mailbox/plugin.js +17 -0
package/dist/plugins/mailbox/plugin.js.map +1 -1
package/dist/plugins/mailbox/schema.d.ts +1 -1
package/dist/plugins/mailbox/schema.d.ts.map +1 -1
package/dist/plugins/mailbox/state.d.ts +2 -1
package/dist/plugins/mailbox/state.d.ts.map +1 -1
package/dist/plugins/mailbox/state.js +1 -1
package/dist/plugins/mailbox/state.js.map +1 -1
package/dist/transport/http/middleware/error-handler.d.ts +1 -1
package/dist/user-config.d.ts +32 -0
package/dist/user-config.d.ts.map +1 -1
package/dist/user-config.js.map +1 -1
package/package.json +2 -2
package/src/core/agents/agent.ts +52 -14
package/src/core/sessions/session-manager.ts +14 -5
package/src/index.ts +1 -1
package/src/plugins/agents/plugin.ts +228 -3
package/src/plugins/agents/supervision.integration.test.ts +249 -0
package/src/plugins/mailbox/mailbox.integration.test.ts +95 -0
package/src/plugins/mailbox/plugin.ts +20 -0
package/src/plugins/mailbox/schema.ts +1 -0
package/src/plugins/mailbox/state.ts +2 -1
package/src/user-config.ts +34 -0

package/src/plugins/agents/plugin.ts CHANGED Viewed

@@ -12,11 +12,12 @@
 import z from 'zod/v4'
 import { AgentId, agentIdSchema, generateAgentId } from '~/core/agents/schema.js'
-import { agentEvents } from '~/core/agents/state.js'
+import { type AgentState, agentEvents } from '~/core/agents/state.js'
 import { AgentErrors, ValidationErrors } from '~/core/errors.js'
 import { definePlugin } from '~/core/plugins/index.js'
 import { getNextAgentSeq } from '~/core/sessions/state.js'
 import { createTool } from '~/core/tools/definition.js'
+import type { Logger } from '~/lib/logger/logger.js'
 import { Err, Ok } from '~/lib/utils/result.js'
 import { mailboxPlugin } from '~/plugins/mailbox/plugin.js'
@@ -36,6 +37,132 @@ export interface SpawnableAgentInfo {
 export interface AgentsPluginConfig {
 	/** Map of agent name → spawn info for generating typed tools */
 	agentDefinitions: Map<string, SpawnableAgentInfo>
+	/**
+	 * Supervision tick interval (ms) for parent agents. When set, parent agents
+	 * with active children receive a periodic <children-status> snapshot via
+	 * mailbox so they stay aware of long-running sub-agents and prompt cache
+	 * stays warm.
+	 *
+	 * Default: undefined (disabled). Recommended: 240000 (4 min, just under
+	 * the 5 min prompt cache TTL — see SUPERVISION_INTERVAL_CACHE_FRIENDLY).
+	 */
+	superviseChildrenIntervalMs?: number
+}
+/**
+ * Recommended supervision interval — 4 min, just under prompt cache TTL.
+ * Each tick triggers a parent inference, keeping the prompt cache warm.
+ */
+export const SUPERVISION_INTERVAL_CACHE_FRIENDLY = 240_000
+/** Per-session runtime state held in plugin context — timers + trigger callback. */
+interface AgentsPluginContext {
+	timers: Map<AgentId, ReturnType<typeof setTimeout>>
+	/** Set in onSessionReady — calls agents._supervisionTick via callPluginMethod (fresh ctx). */
+	triggerTick: ((agentId: AgentId) => Promise<unknown>) | null
+	/** null = supervision disabled for this session. */
+	intervalMs: number | null
+	logger: Logger | null
+}
+/**
+ * Get all direct children of an agent.
+ */
+function getDirectChildren(sessionAgents: Map<AgentId, AgentState>, parentId: AgentId): AgentState[] {
+	const out: AgentState[] = []
+	for (const agent of sessionAgents.values()) {
+		if (agent.parentId === parentId) out.push(agent)
+	}
+	return out
+}
+/**
+ * Count assistant tool calls across conversation history + currently pending.
+ */
+function countToolCalls(state: AgentState): number {
+	let total = state.pendingToolCalls.length
+	for (const m of state.conversationHistory) {
+		if (m.role === 'assistant' && m.toolCalls) total += m.toolCalls.length
+	}
+	return total
+}
+/**
+ * Count completed LLM inferences (= assistant turns in history).
+ */
+function countLLMCalls(state: AgentState): number {
+	let total = 0
+	for (const m of state.conversationHistory) {
+		if (m.role === 'assistant') total++
+	}
+	return total
+}
+/**
+ * Build a compact "first N words..last M words" preview of the agent's most
+ * recent assistant message (skipping empty turns). Returns null if none.
+ */
+function previewLastAssistant(state: AgentState, headWords = 5, tailWords = 5): string | null {
+	for (let i = state.conversationHistory.length - 1; i >= 0; i--) {
+		const m = state.conversationHistory[i]
+		if (m.role !== 'assistant') continue
+		const text = m.content?.trim()
+		if (!text) continue
+		const words = text.split(/\s+/)
+		if (words.length <= headWords + tailWords + 1) return text
+		return `${words.slice(0, headWords).join(' ')}..${words.slice(-tailWords).join(' ')}`
+	}
+	return null
+}
+/**
+ * Build a compact children-status snapshot for the given parent agent.
+ */
+function buildChildrenStatus(sessionAgents: Map<AgentId, AgentState>, parentId: AgentId): string {
+	const children = getDirectChildren(sessionAgents, parentId)
+	const lines = children.map((c) => {
+		const tools = countToolCalls(c)
+		const llm = countLLMCalls(c)
+		const subs = getDirectChildren(sessionAgents, c.id).length
+		const last = previewLastAssistant(c)
+		const parts: string[] = [c.id, c.status]
+		parts.push(`${tools} tools`)
+		parts.push(`${llm} llm`)
+		if (subs > 0) parts.push(`${subs} sub${subs === 1 ? '' : 's'}`)
+		if (last) parts.push(`last "${last.replaceAll('"', "'")}"`)
+		return parts.join(', ')
+	})
+	return `<children-status>\n${lines.join('\n')}\n</children-status>`
+}
+/**
+ * (Re)schedule a supervision tick for an agent. Any existing timer is cleared first.
+ */
+function scheduleSupervisionTick(
+	pluginContext: AgentsPluginContext,
+	agentId: AgentId,
+	delayMs: number,
+): void {
+	const existing = pluginContext.timers.get(agentId)
+	if (existing) clearTimeout(existing)
+	const timer = setTimeout(() => {
+		pluginContext.timers.delete(agentId)
+		const trigger = pluginContext.triggerTick
+		if (!trigger) return
+		trigger(agentId).catch((err) => {
+			pluginContext.logger?.error(
+				'Supervision tick failed',
+				err instanceof Error ? err : undefined,
+				{ agentId },
+			)
+		})
+	}, delayMs)
+	pluginContext.timers.set(agentId, timer)
 }
 /**
@@ -57,6 +184,12 @@ function createStartAgentSchema(agent: SpawnableAgentInfo) {
 export const agentsPlugin = definePlugin('agents')
 	.pluginConfig<AgentsPluginConfig>()
 	.dependencies([mailboxPlugin])
+	.context(async (): Promise<AgentsPluginContext> => ({
+		timers: new Map(),
+		triggerTick: null,
+		intervalMs: null,
+		logger: null,
+	}))
 	.isEnabled((ctx) => {
 		return ctx.agentConfig.spawnableAgents.length > 0
 	})
@@ -108,6 +241,11 @@ export const agentsPlugin = definePlugin('agents')
 				parentId: input.parentId,
 			})
+			// Ensure parent has a supervision tick running now that it has a child.
+			if (ctx.pluginContext.intervalMs !== null) {
+				scheduleSupervisionTick(ctx.pluginContext, parentId, ctx.pluginContext.intervalMs)
+			}
 			return Ok({ agentId })
 		},
 	})
@@ -196,12 +334,99 @@ export const agentsPlugin = definePlugin('agents')
 			return Ok({})
 		},
 	})
-	.systemPrompt(() => {
-		return `## Working with Child Agents
+	.method('_supervisionTick', {
+		input: z.object({ agentId: agentIdSchema }),
+		output: z.object({}),
+		handler: async (ctx, input) => {
+			const agentId = AgentId(input.agentId)
+			// Self may already be gone (terminated mid-tick); just stop.
+			if (!ctx.sessionState.agents.has(agentId)) return Ok({})
+			const children = getDirectChildren(ctx.sessionState.agents, agentId)
+			if (children.length === 0) {
+				// No active children → don't reschedule. spawn() will re-arm if/when needed.
+				return Ok({})
+			}
+			const snapshot = buildChildrenStatus(ctx.sessionState.agents, agentId)
+			const sendResult = await ctx.deps.mailbox.send({
+				toAgentId: agentId,
+				content: snapshot,
+				fromSupervisor: true,
+			})
+			if (!sendResult.ok) {
+				ctx.logger.warn('Supervision snapshot send failed', {
+					agentId,
+					error: sendResult.error.message,
+				})
+			}
+			// Reschedule the next tick from now (rolling).
+			if (ctx.pluginContext.intervalMs !== null) {
+				scheduleSupervisionTick(ctx.pluginContext, agentId, ctx.pluginContext.intervalMs)
+			}
+			return Ok({})
+		},
+	})
+	.sessionHook('onSessionReady', async (ctx) => {
+		const intervalMs = ctx.pluginConfig.superviseChildrenIntervalMs
+		if (intervalMs === undefined) {
+			// Supervision disabled (default). No timer wiring; spawn() and
+			// afterInference() check intervalMs === null and skip too.
+			ctx.pluginContext.intervalMs = null
+			return
+		}
+		ctx.pluginContext.intervalMs = intervalMs
+		ctx.pluginContext.logger = ctx.logger
+		// Wire the trigger callback — calls back via self.* so each tick gets a
+		// fresh ctx (live sessionState/pluginState/deps).
+		ctx.pluginContext.triggerTick = (agentId) => ctx.self._supervisionTick({ agentId })
+		// (Re-)schedule timers for every agent that currently has direct children.
+		// Covers initial session creation AND server-restart reload (onSessionReady
+		// fires in both paths). Worst-case drift after restart = intervalMs.
+		for (const agent of ctx.sessionState.agents.values()) {
+			if (getDirectChildren(ctx.sessionState.agents, agent.id).length > 0) {
+				scheduleSupervisionTick(ctx.pluginContext, agent.id, intervalMs)
+			}
+		}
+	})
+	.sessionHook('onSessionClose', async (ctx) => {
+		for (const t of ctx.pluginContext.timers.values()) clearTimeout(t)
+		ctx.pluginContext.timers.clear()
+		ctx.pluginContext.triggerTick = null
+	})
+	.hook('afterInference', async (ctx) => {
+		// Natural inference warmed the cache — push the next tick out by intervalMs
+		// so we don't double-charge for parents who are already actively interacting.
+		if (ctx.pluginContext.intervalMs !== null) {
+			if (getDirectChildren(ctx.sessionState.agents, ctx.agentId).length > 0) {
+				scheduleSupervisionTick(ctx.pluginContext, ctx.agentId, ctx.pluginContext.intervalMs)
+			}
+		}
+		return null
+	})
+	.systemPrompt((ctx) => {
+		const base = `## Working with Child Agents
 - **New task** → spawn a new agent using \`start_<agent_name>\`. You will receive the agent's ID in the result — use it with \`send_message\` for follow-up communication.
 - **Follow-up on an existing task** → send a message to the existing agent via \`send_message\` with the agent's ID. Do NOT spawn a new agent for feedback, corrections, or additional instructions on a task already assigned.
 - Spawned agents communicate back to you via \`send_message\`. Check your incoming messages for their results and progress updates.`
+		// Only include supervision instructions if supervision is actually enabled
+		// for this session — otherwise the section is misleading bloat.
+		if (ctx.pluginContext.intervalMs === null) return base
+		return `${base}
+### Supervision messages
+You will periodically receive a \`<children-status>\` message from \`from="supervisor"\`. It is a status snapshot of your direct children — purely informational. Per child you'll see status, cumulative tool/llm call counts, sub-agent count, and a "first words..last words" preview of their last assistant turn.
+Do NOT act on a supervision tick unless something is genuinely wrong (a child has been errored or stuck for a long time, you have a deadline, etc.). Most of the time you should just wait. Never reply to the supervisor.`
 	})
 	.tools((ctx) => {
 		const spawnableAgents = ctx.agentConfig.spawnableAgents

package/src/plugins/agents/supervision.integration.test.ts ADDED Viewed

@@ -0,0 +1,249 @@
+import { describe, expect, it } from 'bun:test'
+import { AgentId } from '~/core/agents/schema.js'
+import { MockLLMProvider } from '~/core/llm/mock.js'
+import { ToolCallId } from '~/core/tools/schema.js'
+import { agentsPlugin } from '~/plugins/agents/plugin.js'
+import { mailboxEvents } from '~/plugins/mailbox/index.js'
+import { createMultiAgentPreset, TestHarness, type TestSession } from '~/testing/index.js'
+/**
+ * Helper — wait until at least one supervision message has landed in the parent's
+ * mailbox (or timeout). We poll because supervision ticks fire on real timers.
+ */
+async function waitForSupervisorMessage(
+	session: TestSession,
+	toAgentId: AgentId,
+	timeoutMs = 2000,
+): Promise<{ message: { from: unknown; content: string } } | undefined> {
+	const deadline = Date.now() + timeoutMs
+	while (Date.now() < deadline) {
+		const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
+		const found = events.find((e) =>
+			e.message.from === 'supervisor'
+			&& e.toAgentId === toAgentId
+			&& typeof e.message.content === 'string',
+		)
+		if (found) return found
+		await new Promise((r) => setTimeout(r, 25))
+	}
+	return undefined
+}
+describe('agents plugin supervision', () => {
+	it('parent with active children receives a periodic <children-status> snapshot', async () => {
+		let orchestratorCalls = 0
+		let workerCalls = 0
+		const harness = new TestHarness({
+			presets: [{
+				...createMultiAgentPreset([
+					{ name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
+				], { orchestratorSystem: 'Orchestrator agent.' }),
+				plugins: [{ pluginName: 'agents', definition: agentsPlugin, config: { superviseChildrenIntervalMs: 100 } }],
+			}],
+			mockHandler: (request) => {
+				if (request.systemPrompt.includes('Orchestrator')) {
+					orchestratorCalls++
+					if (orchestratorCalls === 1) {
+						return {
+							content: null,
+							toolCalls: [{ id: ToolCallId('tc1'), name: 'start_worker', input: { message: 'Long-running task' } }],
+							finishReason: 'stop',
+							metrics: MockLLMProvider.defaultMetrics(),
+						}
+					}
+					// Subsequent calls: orchestrator does nothing more, just acknowledges.
+					return { content: 'noted', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
+				}
+				// Worker: takes a long time — say something but never reports back.
+				workerCalls++
+				return { content: `Working on step ${workerCalls}`, toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
+			},
+		})
+		const session = await harness.createSession('test')
+		await session.sendMessage('Start')
+		// Orchestrator is the entry agent in this preset. Wait for a tick.
+		const orchestratorId = session.getEntryAgentId()!
+		const supervisorMsg = await waitForSupervisorMessage(session as never, orchestratorId)
+		expect(supervisorMsg).toBeDefined()
+		expect(supervisorMsg!.message.content).toContain('<children-status>')
+		expect(supervisorMsg!.message.content).toContain('worker_1')
+		// Cumulative LLM call count should be present
+		expect(supervisorMsg!.message.content).toMatch(/worker_1[^,\n]*,[^,\n]*,\s*\d+ tools,\s*\d+ llm/)
+		await harness.shutdown()
+	})
+	it('default (no config) → supervision disabled, no tick fires', async () => {
+		const harness = new TestHarness({
+			presets: [createMultiAgentPreset([
+				{ name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
+			], { orchestratorSystem: 'Orchestrator agent.' })],
+			mockHandler: (request) => {
+				if (request.systemPrompt.includes('Orchestrator')) {
+					return {
+						content: null,
+						toolCalls: [{ id: ToolCallId('tc1'), name: 'start_worker', input: { message: 'Do work' } }],
+						finishReason: 'stop',
+						metrics: MockLLMProvider.defaultMetrics(),
+					}
+				}
+				return { content: 'Working', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
+			},
+		})
+		const session = await harness.createSession('test')
+		await session.sendMessage('Start')
+		// Wait long enough for ticks if they were enabled (they shouldn't).
+		await new Promise((r) => setTimeout(r, 300))
+		const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
+		const supervisorMessages = events.filter(e => e.message.from === 'supervisor')
+		expect(supervisorMessages).toHaveLength(0)
+		await harness.shutdown()
+	})
+	it('parent without children → no tick fires', async () => {
+		const harness = new TestHarness({
+			presets: [{
+				...createMultiAgentPreset([
+					{ name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
+				], { orchestratorSystem: 'Orchestrator agent.' }),
+				plugins: [{ pluginName: 'agents', definition: agentsPlugin, config: { superviseChildrenIntervalMs: 100 } }],
+			}],
+			mockHandler: (request) => {
+				// Orchestrator never spawns anyone.
+				if (request.systemPrompt.includes('Orchestrator')) {
+					return { content: 'Done without spawning', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
+				}
+				return { content: 'unused', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
+			},
+		})
+		const session = await harness.createSession('test')
+		await session.sendAndWaitForIdle('Start')
+		// Give supervision plenty of room to fire (it shouldn't).
+		await new Promise((r) => setTimeout(r, 300))
+		const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
+		const supervisorMessages = events.filter(e => e.message.from === 'supervisor')
+		expect(supervisorMessages).toHaveLength(0)
+		await harness.shutdown()
+	})
+	it('snapshot includes "first words..last words" preview of last assistant turn', async () => {
+		let orchestratorCalls = 0
+		const harness = new TestHarness({
+			presets: [{
+				...createMultiAgentPreset([
+					{ name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
+				], { orchestratorSystem: 'Orchestrator agent.' }),
+				plugins: [{ pluginName: 'agents', definition: agentsPlugin, config: { superviseChildrenIntervalMs: 100 } }],
+			}],
+			mockHandler: (request) => {
+				if (request.systemPrompt.includes('Orchestrator')) {
+					orchestratorCalls++
+					if (orchestratorCalls === 1) {
+						return {
+							content: null,
+							toolCalls: [{ id: ToolCallId('tc1'), name: 'start_worker', input: { message: 'Long task' } }],
+							finishReason: 'stop',
+							metrics: MockLLMProvider.defaultMetrics(),
+						}
+					}
+					return { content: 'ack', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
+				}
+				// Worker says a long sentence that should be truncated to first..last words
+				return {
+					content: 'Started fetching data and now I am running through the pipeline analyzing the response carefully',
+					toolCalls: [],
+					finishReason: 'stop',
+					metrics: MockLLMProvider.defaultMetrics(),
+				}
+			},
+		})
+		const session = await harness.createSession('test')
+		await session.sendMessage('Start')
+		const orchestratorId = session.getEntryAgentId()!
+		const msg = await waitForSupervisorMessage(session as never, orchestratorId)
+		expect(msg).toBeDefined()
+		// Should contain both head (first 5 words) and tail (last 5 words), joined by ".."
+		expect(msg!.message.content).toContain('Started fetching data and now')
+		expect(msg!.message.content).toContain('pipeline analyzing the response carefully')
+		expect(msg!.message.content).toMatch(/\.\.pipeline/)
+		await harness.shutdown()
+	})
+	it('server restart re-establishes timers via onSessionReady', async () => {
+		const sharedEventStore = new (await import('~/core/events/memory.js')).MemoryEventStore()
+		// Counter shared across phases — phase 1 spawns once, then orchestrator goes idle;
+		// phase 2 just acknowledges any wake-up triggered by the supervision tick.
+		let orchestratorCalls = 0
+		const buildHarness = (intervalMs: number | undefined) => new TestHarness({
+			eventStore: sharedEventStore,
+			presets: [{
+				...createMultiAgentPreset([
+					{ name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
+				], { orchestratorSystem: 'Orchestrator agent.' }),
+				...(intervalMs !== undefined && {
+					plugins: [{ pluginName: 'agents', definition: agentsPlugin, config: { superviseChildrenIntervalMs: intervalMs } }],
+				}),
+			}],
+			mockHandler: (request) => {
+				if (request.systemPrompt.includes('Orchestrator')) {
+					orchestratorCalls++
+					if (orchestratorCalls === 1) {
+						return {
+							content: null,
+							toolCalls: [{ id: ToolCallId('tc1'), name: 'start_worker', input: { message: 'Long task' } }],
+							finishReason: 'stop',
+							metrics: MockLLMProvider.defaultMetrics(),
+						}
+					}
+					return { content: 'noted', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
+				}
+				return { content: 'still working', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
+			},
+		})
+		// Phase 1: create session with supervision DISABLED (default) so no ticks pre-restart.
+		const harness1 = buildHarness(undefined)
+		const session1 = await harness1.createSession('test')
+		await session1.sendAndWaitForIdle('Start')
+		const hasWorker = () => {
+			for (const agent of session1.state.agents.values()) {
+				if (agent.definitionName === 'worker') return true
+			}
+			return false
+		}
+		expect(hasWorker()).toBe(true)
+		const sessionId = session1.sessionId
+		await harness1.shutdown()
+		// Phase 2: restart with supervision enabled. onSessionReady should
+		// re-arm the orchestrator's tick because it has a child.
+		const harness2 = buildHarness(100)
+		const session2 = await harness2.openSession(sessionId)
+		const orchestratorId = session2.getEntryAgentId()!
+		const msg = await waitForSupervisorMessage(session2, orchestratorId, 1500)
+		expect(msg).toBeDefined()
+		expect(msg!.message.content).toContain('worker_1')
+		await harness2.shutdown()
+	})
+})

package/src/plugins/mailbox/mailbox.integration.test.ts CHANGED Viewed

@@ -581,6 +581,101 @@ describe('mailbox plugin', () => {
 			await harness.shutdown()
 		})
+		it('empty-stop LLM response → agent retries; persistent empty → onError reports to parent', async () => {
+			let workerCalls = 0
+			let orchestratorCalls = 0
+			const harness = new TestHarness({
+				presets: [createMultiAgentPreset([
+					{ name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
+				], { orchestratorSystem: 'Orchestrator agent.' })],
+				mockHandler: (request) => {
+					if (request.systemPrompt.includes('Orchestrator')) {
+						orchestratorCalls++
+						if (orchestratorCalls === 1) {
+							return {
+								content: null,
+								toolCalls: [{ id: ToolCallId('tc1'), name: 'start_worker', input: { message: 'Do work' } }],
+								finishReason: 'stop',
+								metrics: MockLLMProvider.defaultMetrics(),
+							}
+						}
+						return { content: 'Done', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
+					}
+					workerCalls++
+					// Always empty-stop → triggers retry until exhausted, then onError
+					return { content: null, toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
+				},
+			})
+			const session = await harness.createSession('test')
+			await session.sendMessage('Start')
+			// Worker ends up errored (not idle); poll for the error message to parent.
+			const deadline = Date.now() + 5000
+			let errMsg: { message: { content: string; from: unknown } } | undefined
+			while (Date.now() < deadline) {
+				const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
+				errMsg = events.find(e =>
+					e.message.from === AgentId('worker_1')
+					&& typeof e.message.content === 'string'
+					&& e.message.content.startsWith('Agent encountered an error:'),
+				)
+				if (errMsg) break
+				await new Promise((r) => setTimeout(r, 50))
+			}
+			// Initial + 2 retries = 3 worker LLM calls
+			expect(workerCalls).toBe(3)
+			expect(errMsg).toBeDefined()
+			await harness.shutdown()
+		})
+		it('empty-stop LLM response → recovers on retry, no error sent', async () => {
+			let workerCalls = 0
+			let orchestratorCalls = 0
+			const harness = new TestHarness({
+				presets: [createMultiAgentPreset([
+					{ name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
+				], { orchestratorSystem: 'Orchestrator agent.' })],
+				mockHandler: (request) => {
+					if (request.systemPrompt.includes('Orchestrator')) {
+						orchestratorCalls++
+						if (orchestratorCalls === 1) {
+							return {
+								content: null,
+								toolCalls: [{ id: ToolCallId('tc1'), name: 'start_worker', input: { message: 'Do work' } }],
+								finishReason: 'stop',
+								metrics: MockLLMProvider.defaultMetrics(),
+							}
+						}
+						return { content: 'Done', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
+					}
+					workerCalls++
+					if (workerCalls === 1) {
+						return { content: null, toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
+					}
+					return { content: 'Recovered', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
+				},
+			})
+			const session = await harness.createSession('test')
+			await session.sendAndWaitForIdle('Start')
+			expect(workerCalls).toBe(2)
+			const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
+			const errMsg = events.find(e =>
+				typeof e.message.content === 'string'
+				&& e.message.content.startsWith('Agent encountered an error:'),
+			)
+			expect(errMsg).toBeUndefined()
+			await harness.shutdown()
+		})
 		it('agent without parent → no completion message even with flag true', async () => {
 			const harness = new TestHarness({
 				presets: [createTestPreset({

package/src/plugins/mailbox/plugin.ts CHANGED Viewed

@@ -90,11 +90,31 @@ export const mailboxPlugin = definePlugin("mailbox")
 			toAgentId: agentIdSchema,
 			content: z.string(),
 			debug: z.boolean().optional(),
+			fromSupervisor: z.boolean().optional(),
 		}),
 		output: z.object({ messageId: z.string() }),
 		handler: async (ctx, input) => {
 			const { toAgentId, content } = input;
+			if (input.fromSupervisor) {
+				// System-emitted supervision status — bypasses communication validation.
+				const messageId = generateMessageId(getNextMessageSeq(ctx.pluginState));
+				await ctx.emitEvent(
+					mailboxEvents.create("mailbox_message", {
+						toAgentId,
+						message: {
+							id: messageId,
+							from: "supervisor",
+							content,
+							timestamp: Date.now(),
+							consumed: false,
+						},
+					}),
+				);
+				ctx.scheduleAgent(toAgentId);
+				return Ok({ messageId });
+			}
 			if (input.debug) {
 				// Debug messages bypass communication validation
 				const messageId = generateMessageId(getNextMessageSeq(ctx.pluginState));

package/src/plugins/mailbox/schema.ts CHANGED Viewed

@@ -58,6 +58,7 @@ export type MailboxMessageSender =
 	| WorkerId
 	| 'user'
 	| 'debug'
+	| 'supervisor'
 	| typeof ORCHESTRATOR_ROLE
 	| typeof COMMUNICATOR_ROLE

package/src/plugins/mailbox/state.ts CHANGED Viewed

@@ -11,6 +11,7 @@ export type MailboxMessageSender =
 	| WorkerId
 	| 'user'
 	| 'debug'
+	| 'supervisor'
 	| typeof ORCHESTRATOR_ROLE
 	| typeof COMMUNICATOR_ROLE
@@ -23,7 +24,7 @@ export const mailboxEvents = createEventsFactory({
 				from: z4.union([
 					agentIdSchema,
 					workerIdSchema,
-					z4.enum(['user', 'debug', COMMUNICATOR_ROLE, ORCHESTRATOR_ROLE]),
+					z4.enum(['user', 'debug', 'supervisor', COMMUNICATOR_ROLE, ORCHESTRATOR_ROLE]),
 				]),
 				content: z4.string(),
 				timestamp: z4.number(),