@roj-ai/sdk 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/bootstrap.d.ts +6 -0
  2. package/dist/bootstrap.d.ts.map +1 -1
  3. package/dist/core/agents/agent.d.ts +2 -0
  4. package/dist/core/agents/agent.d.ts.map +1 -1
  5. package/dist/core/agents/agent.js +46 -10
  6. package/dist/core/agents/agent.js.map +1 -1
  7. package/dist/core/sessions/session-manager.d.ts.map +1 -1
  8. package/dist/core/sessions/session-manager.js +13 -5
  9. package/dist/core/sessions/session-manager.js.map +1 -1
  10. package/dist/index.d.ts +1 -1
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/plugins/agents/plugin.d.ts +20 -0
  13. package/dist/plugins/agents/plugin.d.ts.map +1 -1
  14. package/dist/plugins/agents/plugin.js +189 -2
  15. package/dist/plugins/agents/plugin.js.map +1 -1
  16. package/dist/plugins/agents/supervision.integration.test.d.ts +2 -0
  17. package/dist/plugins/agents/supervision.integration.test.d.ts.map +1 -0
  18. package/dist/plugins/agents/supervision.integration.test.js +215 -0
  19. package/dist/plugins/agents/supervision.integration.test.js.map +1 -0
  20. package/dist/plugins/mailbox/mailbox.integration.test.js +80 -0
  21. package/dist/plugins/mailbox/mailbox.integration.test.js.map +1 -1
  22. package/dist/plugins/mailbox/plugin.d.ts +1 -0
  23. package/dist/plugins/mailbox/plugin.d.ts.map +1 -1
  24. package/dist/plugins/mailbox/plugin.js +17 -0
  25. package/dist/plugins/mailbox/plugin.js.map +1 -1
  26. package/dist/plugins/mailbox/schema.d.ts +1 -1
  27. package/dist/plugins/mailbox/schema.d.ts.map +1 -1
  28. package/dist/plugins/mailbox/state.d.ts +2 -1
  29. package/dist/plugins/mailbox/state.d.ts.map +1 -1
  30. package/dist/plugins/mailbox/state.js +1 -1
  31. package/dist/plugins/mailbox/state.js.map +1 -1
  32. package/dist/transport/http/middleware/error-handler.d.ts +1 -1
  33. package/dist/user-config.d.ts +32 -0
  34. package/dist/user-config.d.ts.map +1 -1
  35. package/dist/user-config.js.map +1 -1
  36. package/package.json +2 -2
  37. package/src/core/agents/agent.ts +52 -14
  38. package/src/core/sessions/session-manager.ts +14 -5
  39. package/src/index.ts +1 -1
  40. package/src/plugins/agents/plugin.ts +228 -3
  41. package/src/plugins/agents/supervision.integration.test.ts +249 -0
  42. package/src/plugins/mailbox/mailbox.integration.test.ts +95 -0
  43. package/src/plugins/mailbox/plugin.ts +20 -0
  44. package/src/plugins/mailbox/schema.ts +1 -0
  45. package/src/plugins/mailbox/state.ts +2 -1
  46. package/src/user-config.ts +34 -0
@@ -12,11 +12,12 @@
12
12
 
13
13
  import z from 'zod/v4'
14
14
  import { AgentId, agentIdSchema, generateAgentId } from '~/core/agents/schema.js'
15
- import { agentEvents } from '~/core/agents/state.js'
15
+ import { type AgentState, agentEvents } from '~/core/agents/state.js'
16
16
  import { AgentErrors, ValidationErrors } from '~/core/errors.js'
17
17
  import { definePlugin } from '~/core/plugins/index.js'
18
18
  import { getNextAgentSeq } from '~/core/sessions/state.js'
19
19
  import { createTool } from '~/core/tools/definition.js'
20
+ import type { Logger } from '~/lib/logger/logger.js'
20
21
  import { Err, Ok } from '~/lib/utils/result.js'
21
22
  import { mailboxPlugin } from '~/plugins/mailbox/plugin.js'
22
23
 
@@ -36,6 +37,132 @@ export interface SpawnableAgentInfo {
36
37
  export interface AgentsPluginConfig {
37
38
  /** Map of agent name → spawn info for generating typed tools */
38
39
  agentDefinitions: Map<string, SpawnableAgentInfo>
40
+ /**
41
+ * Supervision tick interval (ms) for parent agents. When set, parent agents
42
+ * with active children receive a periodic <children-status> snapshot via
43
+ * mailbox so they stay aware of long-running sub-agents and prompt cache
44
+ * stays warm.
45
+ *
46
+ * Default: undefined (disabled). Recommended: 240000 (4 min, just under
47
+ * the 5 min prompt cache TTL — see SUPERVISION_INTERVAL_CACHE_FRIENDLY).
48
+ */
49
+ superviseChildrenIntervalMs?: number
50
+ }
51
+
52
+ /**
53
+ * Recommended supervision interval — 4 min, just under prompt cache TTL.
54
+ * Each tick triggers a parent inference, keeping the prompt cache warm.
55
+ */
56
+ export const SUPERVISION_INTERVAL_CACHE_FRIENDLY = 240_000
57
+
58
+ /** Per-session runtime state held in plugin context — timers + trigger callback. */
59
+ interface AgentsPluginContext {
60
+ timers: Map<AgentId, ReturnType<typeof setTimeout>>
61
+ /** Set in onSessionReady — calls agents._supervisionTick via callPluginMethod (fresh ctx). */
62
+ triggerTick: ((agentId: AgentId) => Promise<unknown>) | null
63
+ /** null = supervision disabled for this session. */
64
+ intervalMs: number | null
65
+ logger: Logger | null
66
+ }
67
+
68
+ /**
69
+ * Get all direct children of an agent.
70
+ */
71
+ function getDirectChildren(sessionAgents: Map<AgentId, AgentState>, parentId: AgentId): AgentState[] {
72
+ const out: AgentState[] = []
73
+ for (const agent of sessionAgents.values()) {
74
+ if (agent.parentId === parentId) out.push(agent)
75
+ }
76
+ return out
77
+ }
78
+
79
+ /**
80
+ * Count assistant tool calls across conversation history + currently pending.
81
+ */
82
+ function countToolCalls(state: AgentState): number {
83
+ let total = state.pendingToolCalls.length
84
+ for (const m of state.conversationHistory) {
85
+ if (m.role === 'assistant' && m.toolCalls) total += m.toolCalls.length
86
+ }
87
+ return total
88
+ }
89
+
90
+ /**
91
+ * Count completed LLM inferences (= assistant turns in history).
92
+ */
93
+ function countLLMCalls(state: AgentState): number {
94
+ let total = 0
95
+ for (const m of state.conversationHistory) {
96
+ if (m.role === 'assistant') total++
97
+ }
98
+ return total
99
+ }
100
+
101
+ /**
102
+ * Build a compact "first N words..last M words" preview of the agent's most
103
+ * recent assistant message (skipping empty turns). Returns null if none.
104
+ */
105
+ function previewLastAssistant(state: AgentState, headWords = 5, tailWords = 5): string | null {
106
+ for (let i = state.conversationHistory.length - 1; i >= 0; i--) {
107
+ const m = state.conversationHistory[i]
108
+ if (m.role !== 'assistant') continue
109
+ const text = m.content?.trim()
110
+ if (!text) continue
111
+ const words = text.split(/\s+/)
112
+ if (words.length <= headWords + tailWords + 1) return text
113
+ return `${words.slice(0, headWords).join(' ')}..${words.slice(-tailWords).join(' ')}`
114
+ }
115
+ return null
116
+ }
117
+
118
+ /**
119
+ * Build a compact children-status snapshot for the given parent agent.
120
+ */
121
+ function buildChildrenStatus(sessionAgents: Map<AgentId, AgentState>, parentId: AgentId): string {
122
+ const children = getDirectChildren(sessionAgents, parentId)
123
+ const lines = children.map((c) => {
124
+ const tools = countToolCalls(c)
125
+ const llm = countLLMCalls(c)
126
+ const subs = getDirectChildren(sessionAgents, c.id).length
127
+ const last = previewLastAssistant(c)
128
+
129
+ const parts: string[] = [c.id, c.status]
130
+ parts.push(`${tools} tools`)
131
+ parts.push(`${llm} llm`)
132
+ if (subs > 0) parts.push(`${subs} sub${subs === 1 ? '' : 's'}`)
133
+ if (last) parts.push(`last "${last.replaceAll('"', "'")}"`)
134
+
135
+ return parts.join(', ')
136
+ })
137
+
138
+ return `<children-status>\n${lines.join('\n')}\n</children-status>`
139
+ }
140
+
141
+ /**
142
+ * (Re)schedule a supervision tick for an agent. Any existing timer is cleared first.
143
+ */
144
+ function scheduleSupervisionTick(
145
+ pluginContext: AgentsPluginContext,
146
+ agentId: AgentId,
147
+ delayMs: number,
148
+ ): void {
149
+ const existing = pluginContext.timers.get(agentId)
150
+ if (existing) clearTimeout(existing)
151
+
152
+ const timer = setTimeout(() => {
153
+ pluginContext.timers.delete(agentId)
154
+ const trigger = pluginContext.triggerTick
155
+ if (!trigger) return
156
+ trigger(agentId).catch((err) => {
157
+ pluginContext.logger?.error(
158
+ 'Supervision tick failed',
159
+ err instanceof Error ? err : undefined,
160
+ { agentId },
161
+ )
162
+ })
163
+ }, delayMs)
164
+
165
+ pluginContext.timers.set(agentId, timer)
39
166
  }
40
167
 
41
168
  /**
@@ -57,6 +184,12 @@ function createStartAgentSchema(agent: SpawnableAgentInfo) {
57
184
  export const agentsPlugin = definePlugin('agents')
58
185
  .pluginConfig<AgentsPluginConfig>()
59
186
  .dependencies([mailboxPlugin])
187
+ .context(async (): Promise<AgentsPluginContext> => ({
188
+ timers: new Map(),
189
+ triggerTick: null,
190
+ intervalMs: null,
191
+ logger: null,
192
+ }))
60
193
  .isEnabled((ctx) => {
61
194
  return ctx.agentConfig.spawnableAgents.length > 0
62
195
  })
@@ -108,6 +241,11 @@ export const agentsPlugin = definePlugin('agents')
108
241
  parentId: input.parentId,
109
242
  })
110
243
 
244
+ // Ensure parent has a supervision tick running now that it has a child.
245
+ if (ctx.pluginContext.intervalMs !== null) {
246
+ scheduleSupervisionTick(ctx.pluginContext, parentId, ctx.pluginContext.intervalMs)
247
+ }
248
+
111
249
  return Ok({ agentId })
112
250
  },
113
251
  })
@@ -196,12 +334,99 @@ export const agentsPlugin = definePlugin('agents')
196
334
  return Ok({})
197
335
  },
198
336
  })
199
- .systemPrompt(() => {
200
- return `## Working with Child Agents
337
+ .method('_supervisionTick', {
338
+ input: z.object({ agentId: agentIdSchema }),
339
+ output: z.object({}),
340
+ handler: async (ctx, input) => {
341
+ const agentId = AgentId(input.agentId)
342
+
343
+ // Self may already be gone (terminated mid-tick); just stop.
344
+ if (!ctx.sessionState.agents.has(agentId)) return Ok({})
345
+
346
+ const children = getDirectChildren(ctx.sessionState.agents, agentId)
347
+ if (children.length === 0) {
348
+ // No active children → don't reschedule. spawn() will re-arm if/when needed.
349
+ return Ok({})
350
+ }
351
+
352
+ const snapshot = buildChildrenStatus(ctx.sessionState.agents, agentId)
353
+ const sendResult = await ctx.deps.mailbox.send({
354
+ toAgentId: agentId,
355
+ content: snapshot,
356
+ fromSupervisor: true,
357
+ })
358
+ if (!sendResult.ok) {
359
+ ctx.logger.warn('Supervision snapshot send failed', {
360
+ agentId,
361
+ error: sendResult.error.message,
362
+ })
363
+ }
364
+
365
+ // Reschedule the next tick from now (rolling).
366
+ if (ctx.pluginContext.intervalMs !== null) {
367
+ scheduleSupervisionTick(ctx.pluginContext, agentId, ctx.pluginContext.intervalMs)
368
+ }
369
+
370
+ return Ok({})
371
+ },
372
+ })
373
+ .sessionHook('onSessionReady', async (ctx) => {
374
+ const intervalMs = ctx.pluginConfig.superviseChildrenIntervalMs
375
+ if (intervalMs === undefined) {
376
+ // Supervision disabled (default). No timer wiring; spawn() and
377
+ // afterInference() check intervalMs === null and skip too.
378
+ ctx.pluginContext.intervalMs = null
379
+ return
380
+ }
381
+ ctx.pluginContext.intervalMs = intervalMs
382
+ ctx.pluginContext.logger = ctx.logger
383
+
384
+ // Wire the trigger callback — calls back via self.* so each tick gets a
385
+ // fresh ctx (live sessionState/pluginState/deps).
386
+ ctx.pluginContext.triggerTick = (agentId) => ctx.self._supervisionTick({ agentId })
387
+
388
+ // (Re-)schedule timers for every agent that currently has direct children.
389
+ // Covers initial session creation AND server-restart reload (onSessionReady
390
+ // fires in both paths). Worst-case drift after restart = intervalMs.
391
+ for (const agent of ctx.sessionState.agents.values()) {
392
+ if (getDirectChildren(ctx.sessionState.agents, agent.id).length > 0) {
393
+ scheduleSupervisionTick(ctx.pluginContext, agent.id, intervalMs)
394
+ }
395
+ }
396
+ })
397
+ .sessionHook('onSessionClose', async (ctx) => {
398
+ for (const t of ctx.pluginContext.timers.values()) clearTimeout(t)
399
+ ctx.pluginContext.timers.clear()
400
+ ctx.pluginContext.triggerTick = null
401
+ })
402
+ .hook('afterInference', async (ctx) => {
403
+ // Natural inference warmed the cache — push the next tick out by intervalMs
404
+ // so we don't double-charge for parents who are already actively interacting.
405
+ if (ctx.pluginContext.intervalMs !== null) {
406
+ if (getDirectChildren(ctx.sessionState.agents, ctx.agentId).length > 0) {
407
+ scheduleSupervisionTick(ctx.pluginContext, ctx.agentId, ctx.pluginContext.intervalMs)
408
+ }
409
+ }
410
+ return null
411
+ })
412
+ .systemPrompt((ctx) => {
413
+ const base = `## Working with Child Agents
201
414
 
202
415
  - **New task** → spawn a new agent using \`start_<agent_name>\`. You will receive the agent's ID in the result — use it with \`send_message\` for follow-up communication.
203
416
  - **Follow-up on an existing task** → send a message to the existing agent via \`send_message\` with the agent's ID. Do NOT spawn a new agent for feedback, corrections, or additional instructions on a task already assigned.
204
417
  - Spawned agents communicate back to you via \`send_message\`. Check your incoming messages for their results and progress updates.`
418
+
419
+ // Only include supervision instructions if supervision is actually enabled
420
+ // for this session — otherwise the section is misleading bloat.
421
+ if (ctx.pluginContext.intervalMs === null) return base
422
+
423
+ return `${base}
424
+
425
+ ### Supervision messages
426
+
427
+ You will periodically receive a \`<children-status>\` message from \`from="supervisor"\`. It is a status snapshot of your direct children — purely informational. Per child you'll see status, cumulative tool/llm call counts, sub-agent count, and a "first words..last words" preview of their last assistant turn.
428
+
429
+ Do NOT act on a supervision tick unless something is genuinely wrong (a child has been errored or stuck for a long time, you have a deadline, etc.). Most of the time you should just wait. Never reply to the supervisor.`
205
430
  })
206
431
  .tools((ctx) => {
207
432
  const spawnableAgents = ctx.agentConfig.spawnableAgents
@@ -0,0 +1,249 @@
1
+ import { describe, expect, it } from 'bun:test'
2
+ import { AgentId } from '~/core/agents/schema.js'
3
+ import { MockLLMProvider } from '~/core/llm/mock.js'
4
+ import { ToolCallId } from '~/core/tools/schema.js'
5
+ import { agentsPlugin } from '~/plugins/agents/plugin.js'
6
+ import { mailboxEvents } from '~/plugins/mailbox/index.js'
7
+ import { createMultiAgentPreset, TestHarness, type TestSession } from '~/testing/index.js'
8
+
9
+ /**
10
+ * Helper — wait until at least one supervision message has landed in the parent's
11
+ * mailbox (or timeout). We poll because supervision ticks fire on real timers.
12
+ */
13
+ async function waitForSupervisorMessage(
14
+ session: TestSession,
15
+ toAgentId: AgentId,
16
+ timeoutMs = 2000,
17
+ ): Promise<{ message: { from: unknown; content: string } } | undefined> {
18
+ const deadline = Date.now() + timeoutMs
19
+ while (Date.now() < deadline) {
20
+ const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
21
+ const found = events.find((e) =>
22
+ e.message.from === 'supervisor'
23
+ && e.toAgentId === toAgentId
24
+ && typeof e.message.content === 'string',
25
+ )
26
+ if (found) return found
27
+ await new Promise((r) => setTimeout(r, 25))
28
+ }
29
+ return undefined
30
+ }
31
+
32
+ describe('agents plugin supervision', () => {
33
+ it('parent with active children receives a periodic <children-status> snapshot', async () => {
34
+ let orchestratorCalls = 0
35
+ let workerCalls = 0
36
+
37
+ const harness = new TestHarness({
38
+ presets: [{
39
+ ...createMultiAgentPreset([
40
+ { name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
41
+ ], { orchestratorSystem: 'Orchestrator agent.' }),
42
+ plugins: [{ pluginName: 'agents', definition: agentsPlugin, config: { superviseChildrenIntervalMs: 100 } }],
43
+ }],
44
+ mockHandler: (request) => {
45
+ if (request.systemPrompt.includes('Orchestrator')) {
46
+ orchestratorCalls++
47
+ if (orchestratorCalls === 1) {
48
+ return {
49
+ content: null,
50
+ toolCalls: [{ id: ToolCallId('tc1'), name: 'start_worker', input: { message: 'Long-running task' } }],
51
+ finishReason: 'stop',
52
+ metrics: MockLLMProvider.defaultMetrics(),
53
+ }
54
+ }
55
+ // Subsequent calls: orchestrator does nothing more, just acknowledges.
56
+ return { content: 'noted', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
57
+ }
58
+ // Worker: takes a long time — say something but never reports back.
59
+ workerCalls++
60
+ return { content: `Working on step ${workerCalls}`, toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
61
+ },
62
+ })
63
+
64
+ const session = await harness.createSession('test')
65
+ await session.sendMessage('Start')
66
+
67
+ // Orchestrator is the entry agent in this preset. Wait for a tick.
68
+ const orchestratorId = session.getEntryAgentId()!
69
+ const supervisorMsg = await waitForSupervisorMessage(session as never, orchestratorId)
70
+
71
+ expect(supervisorMsg).toBeDefined()
72
+ expect(supervisorMsg!.message.content).toContain('<children-status>')
73
+ expect(supervisorMsg!.message.content).toContain('worker_1')
74
+ // Cumulative LLM call count should be present
75
+ expect(supervisorMsg!.message.content).toMatch(/worker_1[^,\n]*,[^,\n]*,\s*\d+ tools,\s*\d+ llm/)
76
+
77
+ await harness.shutdown()
78
+ })
79
+
80
+ it('default (no config) → supervision disabled, no tick fires', async () => {
81
+ const harness = new TestHarness({
82
+ presets: [createMultiAgentPreset([
83
+ { name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
84
+ ], { orchestratorSystem: 'Orchestrator agent.' })],
85
+ mockHandler: (request) => {
86
+ if (request.systemPrompt.includes('Orchestrator')) {
87
+ return {
88
+ content: null,
89
+ toolCalls: [{ id: ToolCallId('tc1'), name: 'start_worker', input: { message: 'Do work' } }],
90
+ finishReason: 'stop',
91
+ metrics: MockLLMProvider.defaultMetrics(),
92
+ }
93
+ }
94
+ return { content: 'Working', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
95
+ },
96
+ })
97
+
98
+ const session = await harness.createSession('test')
99
+ await session.sendMessage('Start')
100
+
101
+ // Wait long enough for ticks if they were enabled (they shouldn't).
102
+ await new Promise((r) => setTimeout(r, 300))
103
+
104
+ const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
105
+ const supervisorMessages = events.filter(e => e.message.from === 'supervisor')
106
+ expect(supervisorMessages).toHaveLength(0)
107
+
108
+ await harness.shutdown()
109
+ })
110
+
111
+ it('parent without children → no tick fires', async () => {
112
+ const harness = new TestHarness({
113
+ presets: [{
114
+ ...createMultiAgentPreset([
115
+ { name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
116
+ ], { orchestratorSystem: 'Orchestrator agent.' }),
117
+ plugins: [{ pluginName: 'agents', definition: agentsPlugin, config: { superviseChildrenIntervalMs: 100 } }],
118
+ }],
119
+ mockHandler: (request) => {
120
+ // Orchestrator never spawns anyone.
121
+ if (request.systemPrompt.includes('Orchestrator')) {
122
+ return { content: 'Done without spawning', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
123
+ }
124
+ return { content: 'unused', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
125
+ },
126
+ })
127
+
128
+ const session = await harness.createSession('test')
129
+ await session.sendAndWaitForIdle('Start')
130
+
131
+ // Give supervision plenty of room to fire (it shouldn't).
132
+ await new Promise((r) => setTimeout(r, 300))
133
+
134
+ const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
135
+ const supervisorMessages = events.filter(e => e.message.from === 'supervisor')
136
+ expect(supervisorMessages).toHaveLength(0)
137
+
138
+ await harness.shutdown()
139
+ })
140
+
141
+ it('snapshot includes "first words..last words" preview of last assistant turn', async () => {
142
+ let orchestratorCalls = 0
143
+
144
+ const harness = new TestHarness({
145
+ presets: [{
146
+ ...createMultiAgentPreset([
147
+ { name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
148
+ ], { orchestratorSystem: 'Orchestrator agent.' }),
149
+ plugins: [{ pluginName: 'agents', definition: agentsPlugin, config: { superviseChildrenIntervalMs: 100 } }],
150
+ }],
151
+ mockHandler: (request) => {
152
+ if (request.systemPrompt.includes('Orchestrator')) {
153
+ orchestratorCalls++
154
+ if (orchestratorCalls === 1) {
155
+ return {
156
+ content: null,
157
+ toolCalls: [{ id: ToolCallId('tc1'), name: 'start_worker', input: { message: 'Long task' } }],
158
+ finishReason: 'stop',
159
+ metrics: MockLLMProvider.defaultMetrics(),
160
+ }
161
+ }
162
+ return { content: 'ack', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
163
+ }
164
+ // Worker says a long sentence that should be truncated to first..last words
165
+ return {
166
+ content: 'Started fetching data and now I am running through the pipeline analyzing the response carefully',
167
+ toolCalls: [],
168
+ finishReason: 'stop',
169
+ metrics: MockLLMProvider.defaultMetrics(),
170
+ }
171
+ },
172
+ })
173
+
174
+ const session = await harness.createSession('test')
175
+ await session.sendMessage('Start')
176
+
177
+ const orchestratorId = session.getEntryAgentId()!
178
+ const msg = await waitForSupervisorMessage(session as never, orchestratorId)
179
+
180
+ expect(msg).toBeDefined()
181
+ // Should contain both head (first 5 words) and tail (last 5 words), joined by ".."
182
+ expect(msg!.message.content).toContain('Started fetching data and now')
183
+ expect(msg!.message.content).toContain('pipeline analyzing the response carefully')
184
+ expect(msg!.message.content).toMatch(/\.\.pipeline/)
185
+
186
+ await harness.shutdown()
187
+ })
188
+
189
+ it('server restart re-establishes timers via onSessionReady', async () => {
190
+ const sharedEventStore = new (await import('~/core/events/memory.js')).MemoryEventStore()
191
+
192
+ // Counter shared across phases — phase 1 spawns once, then orchestrator goes idle;
193
+ // phase 2 just acknowledges any wake-up triggered by the supervision tick.
194
+ let orchestratorCalls = 0
195
+
196
+ const buildHarness = (intervalMs: number | undefined) => new TestHarness({
197
+ eventStore: sharedEventStore,
198
+ presets: [{
199
+ ...createMultiAgentPreset([
200
+ { name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
201
+ ], { orchestratorSystem: 'Orchestrator agent.' }),
202
+ ...(intervalMs !== undefined && {
203
+ plugins: [{ pluginName: 'agents', definition: agentsPlugin, config: { superviseChildrenIntervalMs: intervalMs } }],
204
+ }),
205
+ }],
206
+ mockHandler: (request) => {
207
+ if (request.systemPrompt.includes('Orchestrator')) {
208
+ orchestratorCalls++
209
+ if (orchestratorCalls === 1) {
210
+ return {
211
+ content: null,
212
+ toolCalls: [{ id: ToolCallId('tc1'), name: 'start_worker', input: { message: 'Long task' } }],
213
+ finishReason: 'stop',
214
+ metrics: MockLLMProvider.defaultMetrics(),
215
+ }
216
+ }
217
+ return { content: 'noted', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
218
+ }
219
+ return { content: 'still working', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
220
+ },
221
+ })
222
+
223
+ // Phase 1: create session with supervision DISABLED (default) so no ticks pre-restart.
224
+ const harness1 = buildHarness(undefined)
225
+ const session1 = await harness1.createSession('test')
226
+ await session1.sendAndWaitForIdle('Start')
227
+ const hasWorker = () => {
228
+ for (const agent of session1.state.agents.values()) {
229
+ if (agent.definitionName === 'worker') return true
230
+ }
231
+ return false
232
+ }
233
+ expect(hasWorker()).toBe(true)
234
+ const sessionId = session1.sessionId
235
+ await harness1.shutdown()
236
+
237
+ // Phase 2: restart with supervision enabled. onSessionReady should
238
+ // re-arm the orchestrator's tick because it has a child.
239
+ const harness2 = buildHarness(100)
240
+ const session2 = await harness2.openSession(sessionId)
241
+
242
+ const orchestratorId = session2.getEntryAgentId()!
243
+ const msg = await waitForSupervisorMessage(session2, orchestratorId, 1500)
244
+ expect(msg).toBeDefined()
245
+ expect(msg!.message.content).toContain('worker_1')
246
+
247
+ await harness2.shutdown()
248
+ })
249
+ })
@@ -581,6 +581,101 @@ describe('mailbox plugin', () => {
581
581
  await harness.shutdown()
582
582
  })
583
583
 
584
+ it('empty-stop LLM response → agent retries; persistent empty → onError reports to parent', async () => {
585
+ let workerCalls = 0
586
+ let orchestratorCalls = 0
587
+
588
+ const harness = new TestHarness({
589
+ presets: [createMultiAgentPreset([
590
+ { name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
591
+ ], { orchestratorSystem: 'Orchestrator agent.' })],
592
+ mockHandler: (request) => {
593
+ if (request.systemPrompt.includes('Orchestrator')) {
594
+ orchestratorCalls++
595
+ if (orchestratorCalls === 1) {
596
+ return {
597
+ content: null,
598
+ toolCalls: [{ id: ToolCallId('tc1'), name: 'start_worker', input: { message: 'Do work' } }],
599
+ finishReason: 'stop',
600
+ metrics: MockLLMProvider.defaultMetrics(),
601
+ }
602
+ }
603
+ return { content: 'Done', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
604
+ }
605
+ workerCalls++
606
+ // Always empty-stop → triggers retry until exhausted, then onError
607
+ return { content: null, toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
608
+ },
609
+ })
610
+
611
+ const session = await harness.createSession('test')
612
+ await session.sendMessage('Start')
613
+
614
+ // Worker ends up errored (not idle); poll for the error message to parent.
615
+ const deadline = Date.now() + 5000
616
+ let errMsg: { message: { content: string; from: unknown } } | undefined
617
+ while (Date.now() < deadline) {
618
+ const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
619
+ errMsg = events.find(e =>
620
+ e.message.from === AgentId('worker_1')
621
+ && typeof e.message.content === 'string'
622
+ && e.message.content.startsWith('Agent encountered an error:'),
623
+ )
624
+ if (errMsg) break
625
+ await new Promise((r) => setTimeout(r, 50))
626
+ }
627
+
628
+ // Initial + 2 retries = 3 worker LLM calls
629
+ expect(workerCalls).toBe(3)
630
+ expect(errMsg).toBeDefined()
631
+
632
+ await harness.shutdown()
633
+ })
634
+
635
+ it('empty-stop LLM response → recovers on retry, no error sent', async () => {
636
+ let workerCalls = 0
637
+ let orchestratorCalls = 0
638
+
639
+ const harness = new TestHarness({
640
+ presets: [createMultiAgentPreset([
641
+ { name: 'worker', system: 'Worker agent.', tools: [], agents: [] },
642
+ ], { orchestratorSystem: 'Orchestrator agent.' })],
643
+ mockHandler: (request) => {
644
+ if (request.systemPrompt.includes('Orchestrator')) {
645
+ orchestratorCalls++
646
+ if (orchestratorCalls === 1) {
647
+ return {
648
+ content: null,
649
+ toolCalls: [{ id: ToolCallId('tc1'), name: 'start_worker', input: { message: 'Do work' } }],
650
+ finishReason: 'stop',
651
+ metrics: MockLLMProvider.defaultMetrics(),
652
+ }
653
+ }
654
+ return { content: 'Done', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
655
+ }
656
+ workerCalls++
657
+ if (workerCalls === 1) {
658
+ return { content: null, toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
659
+ }
660
+ return { content: 'Recovered', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
661
+ },
662
+ })
663
+
664
+ const session = await harness.createSession('test')
665
+ await session.sendAndWaitForIdle('Start')
666
+
667
+ expect(workerCalls).toBe(2)
668
+
669
+ const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
670
+ const errMsg = events.find(e =>
671
+ typeof e.message.content === 'string'
672
+ && e.message.content.startsWith('Agent encountered an error:'),
673
+ )
674
+ expect(errMsg).toBeUndefined()
675
+
676
+ await harness.shutdown()
677
+ })
678
+
584
679
  it('agent without parent → no completion message even with flag true', async () => {
585
680
  const harness = new TestHarness({
586
681
  presets: [createTestPreset({
@@ -90,11 +90,31 @@ export const mailboxPlugin = definePlugin("mailbox")
90
90
  toAgentId: agentIdSchema,
91
91
  content: z.string(),
92
92
  debug: z.boolean().optional(),
93
+ fromSupervisor: z.boolean().optional(),
93
94
  }),
94
95
  output: z.object({ messageId: z.string() }),
95
96
  handler: async (ctx, input) => {
96
97
  const { toAgentId, content } = input;
97
98
 
99
+ if (input.fromSupervisor) {
100
+ // System-emitted supervision status — bypasses communication validation.
101
+ const messageId = generateMessageId(getNextMessageSeq(ctx.pluginState));
102
+ await ctx.emitEvent(
103
+ mailboxEvents.create("mailbox_message", {
104
+ toAgentId,
105
+ message: {
106
+ id: messageId,
107
+ from: "supervisor",
108
+ content,
109
+ timestamp: Date.now(),
110
+ consumed: false,
111
+ },
112
+ }),
113
+ );
114
+ ctx.scheduleAgent(toAgentId);
115
+ return Ok({ messageId });
116
+ }
117
+
98
118
  if (input.debug) {
99
119
  // Debug messages bypass communication validation
100
120
  const messageId = generateMessageId(getNextMessageSeq(ctx.pluginState));
@@ -58,6 +58,7 @@ export type MailboxMessageSender =
58
58
  | WorkerId
59
59
  | 'user'
60
60
  | 'debug'
61
+ | 'supervisor'
61
62
  | typeof ORCHESTRATOR_ROLE
62
63
  | typeof COMMUNICATOR_ROLE
63
64
 
@@ -11,6 +11,7 @@ export type MailboxMessageSender =
11
11
  | WorkerId
12
12
  | 'user'
13
13
  | 'debug'
14
+ | 'supervisor'
14
15
  | typeof ORCHESTRATOR_ROLE
15
16
  | typeof COMMUNICATOR_ROLE
16
17
 
@@ -23,7 +24,7 @@ export const mailboxEvents = createEventsFactory({
23
24
  from: z4.union([
24
25
  agentIdSchema,
25
26
  workerIdSchema,
26
- z4.enum(['user', 'debug', COMMUNICATOR_ROLE, ORCHESTRATOR_ROLE]),
27
+ z4.enum(['user', 'debug', 'supervisor', COMMUNICATOR_ROLE, ORCHESTRATOR_ROLE]),
27
28
  ]),
28
29
  content: z4.string(),
29
30
  timestamp: z4.number(),