@katyella/legio 0.1.3 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/CHANGELOG.md +61 -3
  2. package/README.md +21 -10
  3. package/agents/builder.md +11 -10
  4. package/agents/coordinator.md +36 -27
  5. package/agents/cto.md +9 -8
  6. package/agents/gateway.md +28 -12
  7. package/agents/lead.md +45 -30
  8. package/agents/merger.md +4 -4
  9. package/agents/monitor.md +10 -9
  10. package/agents/reviewer.md +8 -8
  11. package/agents/scout.md +10 -10
  12. package/agents/supervisor.md +60 -45
  13. package/package.json +2 -2
  14. package/src/agents/hooks-deployer.test.ts +46 -41
  15. package/src/agents/hooks-deployer.ts +10 -9
  16. package/src/agents/manifest.test.ts +6 -2
  17. package/src/agents/overlay.test.ts +9 -7
  18. package/src/agents/overlay.ts +29 -7
  19. package/src/commands/agents.test.ts +1 -5
  20. package/src/commands/clean.test.ts +2 -5
  21. package/src/commands/clean.ts +25 -1
  22. package/src/commands/completions.test.ts +1 -1
  23. package/src/commands/completions.ts +26 -7
  24. package/src/commands/coordinator.test.ts +87 -82
  25. package/src/commands/coordinator.ts +94 -48
  26. package/src/commands/costs.test.ts +2 -6
  27. package/src/commands/dashboard.test.ts +2 -5
  28. package/src/commands/doctor.test.ts +2 -6
  29. package/src/commands/down.ts +3 -3
  30. package/src/commands/errors.test.ts +2 -6
  31. package/src/commands/feed.test.ts +2 -6
  32. package/src/commands/gateway.test.ts +43 -17
  33. package/src/commands/gateway.ts +101 -11
  34. package/src/commands/hooks.test.ts +2 -5
  35. package/src/commands/init.test.ts +4 -13
  36. package/src/commands/inspect.test.ts +2 -6
  37. package/src/commands/log.test.ts +2 -6
  38. package/src/commands/logs.test.ts +2 -9
  39. package/src/commands/mail.test.ts +76 -215
  40. package/src/commands/mail.ts +43 -187
  41. package/src/commands/metrics.test.ts +3 -10
  42. package/src/commands/nudge.ts +15 -0
  43. package/src/commands/prime.test.ts +4 -11
  44. package/src/commands/replay.test.ts +2 -6
  45. package/src/commands/server.test.ts +1 -5
  46. package/src/commands/server.ts +1 -1
  47. package/src/commands/sling.test.ts +6 -1
  48. package/src/commands/sling.ts +42 -17
  49. package/src/commands/spec.test.ts +2 -5
  50. package/src/commands/status.test.ts +2 -4
  51. package/src/commands/stop.test.ts +2 -5
  52. package/src/commands/supervisor.ts +6 -6
  53. package/src/commands/trace.test.ts +2 -6
  54. package/src/commands/up.test.ts +43 -9
  55. package/src/commands/up.ts +15 -11
  56. package/src/commands/watchman.ts +327 -0
  57. package/src/commands/worktree.test.ts +2 -6
  58. package/src/config.test.ts +34 -104
  59. package/src/config.ts +120 -32
  60. package/src/doctor/agents.test.ts +52 -2
  61. package/src/doctor/agents.ts +4 -2
  62. package/src/doctor/config-check.test.ts +7 -2
  63. package/src/doctor/consistency.test.ts +7 -2
  64. package/src/doctor/databases.test.ts +6 -2
  65. package/src/doctor/dependencies.test.ts +18 -13
  66. package/src/doctor/dependencies.ts +23 -94
  67. package/src/doctor/logs.test.ts +7 -2
  68. package/src/doctor/merge-queue.test.ts +6 -2
  69. package/src/doctor/structure.test.ts +7 -2
  70. package/src/doctor/version.test.ts +7 -2
  71. package/src/e2e/init-sling-lifecycle.test.ts +2 -5
  72. package/src/index.ts +7 -7
  73. package/src/mail/pending.ts +120 -0
  74. package/src/mail/store.test.ts +89 -0
  75. package/src/mail/store.ts +11 -0
  76. package/src/merge/resolver.test.ts +518 -489
  77. package/src/server/index.ts +33 -2
  78. package/src/server/public/app.js +3 -3
  79. package/src/server/public/components/message-bubble.js +11 -1
  80. package/src/server/public/components/terminal-panel.js +66 -74
  81. package/src/server/public/views/chat.js +18 -2
  82. package/src/server/public/views/costs.js +5 -5
  83. package/src/server/public/views/dashboard.js +80 -51
  84. package/src/server/public/views/gateway-chat.js +37 -131
  85. package/src/server/public/views/inspect.js +16 -4
  86. package/src/server/public/views/issues.js +16 -12
  87. package/src/server/routes.test.ts +55 -39
  88. package/src/server/routes.ts +38 -26
  89. package/src/test-helpers.ts +6 -3
  90. package/src/tracker/beads.ts +159 -0
  91. package/src/tracker/exec.ts +44 -0
  92. package/src/tracker/factory.test.ts +283 -0
  93. package/src/tracker/factory.ts +59 -0
  94. package/src/tracker/seeds.ts +156 -0
  95. package/src/tracker/types.ts +46 -0
  96. package/src/types.ts +11 -2
  97. package/src/{watchdog → watchman}/daemon.test.ts +421 -515
  98. package/src/watchman/daemon.ts +940 -0
  99. package/src/worktree/tmux.test.ts +2 -1
  100. package/src/worktree/tmux.ts +4 -4
  101. package/templates/hooks.json.tmpl +17 -17
  102. package/src/beads/client.test.ts +0 -210
  103. package/src/commands/merge.test.ts +0 -676
  104. package/src/commands/watch.test.ts +0 -152
  105. package/src/commands/watch.ts +0 -238
  106. package/src/test-helpers.test.ts +0 -97
  107. package/src/watchdog/daemon.ts +0 -533
  108. package/src/watchdog/health.test.ts +0 -371
  109. package/src/watchdog/triage.test.ts +0 -162
  110. package/src/worktree/manager.test.ts +0 -444
  111. /package/src/{watchdog → watchman}/health.ts +0 -0
  112. /package/src/{watchdog → watchman}/triage.ts +0 -0
@@ -1,12 +1,13 @@
1
1
  /**
2
- * Integration tests for the watchdog daemon tick loop.
2
+ * Integration tests for the unified watchman daemon (health + mail + beacon).
3
3
  *
4
4
  * Uses real filesystem (temp directories via mkdtemp) and real SessionStore
5
5
  * (better-sqlite3) for session persistence, plus real health evaluation logic.
6
+ * Uses real SQLite for MailStore in mail tick tests.
6
7
  *
7
- * Only tmux operations (isSessionAlive, killSession) are mocked via dependency
8
- * injection (_tmux params) because real tmux interferes with developer sessions
9
- * and is fragile in CI.
8
+ * Only tmux operations (isSessionAlive, killSession, capturePaneContent, sendKeys)
9
+ * are mocked via dependency injection (_tmux params) because real tmux interferes
10
+ * with developer sessions and is fragile in CI.
10
11
  *
11
12
  * Does NOT use mock.module() — it leaks across test files. See mulch record
12
13
  * mx-56558b for background.
@@ -17,9 +18,10 @@ import { tmpdir } from "node:os";
17
18
  import { join } from "node:path";
18
19
  import { afterEach, beforeEach, describe, expect, test } from "vitest";
19
20
  import { createEventStore } from "../events/store.ts";
21
+ import { createMailStore, type MailStore } from "../mail/store.ts";
20
22
  import { createSessionStore } from "../sessions/store.ts";
21
23
  import type { AgentSession, HealthCheck, SessionCheckpoint, StoredEvent } from "../types.ts";
22
- import { runDaemonTick } from "./daemon.ts";
24
+ import { type AgentMailState, runDaemonTick, runMailTick, type WatchmanOptions } from "./daemon.ts";
23
25
 
24
26
  // === Test constants ===
25
27
 
@@ -119,7 +121,7 @@ function tmuxWithLiveness(aliveMap: Record<string, boolean>): {
119
121
  };
120
122
  }
121
123
 
122
- // === Tests ===
124
+ // === Health tick tests ===
123
125
 
124
126
  let tempRoot: string;
125
127
 
@@ -131,11 +133,8 @@ afterEach(async () => {
131
133
  await rm(tempRoot, { recursive: true, force: true });
132
134
  });
133
135
 
134
- describe("daemon tick", () => {
135
- // --- Test 1: tick with no sessions file ---
136
-
136
+ describe("daemon health tick", () => {
137
137
  test("tick with no sessions is a graceful no-op", async () => {
138
- // No sessions in the store — daemon should not crash
139
138
  const checks: HealthCheck[] = [];
140
139
 
141
140
  await runDaemonTick({
@@ -145,12 +144,9 @@ describe("daemon tick", () => {
145
144
  _tmux: tmuxAllAlive(),
146
145
  });
147
146
 
148
- // No health checks should have been produced (no sessions to check)
149
147
  expect(checks).toHaveLength(0);
150
148
  });
151
149
 
152
- // --- Test 2: tick with healthy sessions ---
153
-
154
150
  test("tick with healthy sessions produces no state changes", async () => {
155
151
  const session = makeSession({
156
152
  state: "working",
@@ -174,14 +170,11 @@ describe("daemon tick", () => {
174
170
  expect(check?.state).toBe("working");
175
171
  expect(check?.action).toBe("none");
176
172
 
177
- // Session state should be unchanged because state didn't change.
178
173
  const reloaded = readSessionsFromStore(tempRoot);
179
174
  expect(reloaded).toHaveLength(1);
180
175
  expect(reloaded[0]?.state).toBe("working");
181
176
  });
182
177
 
183
- // --- Test 3: tick with dead tmux -> zombie transition ---
184
-
185
178
  test("tick with dead tmux transitions session to zombie and fires terminate", async () => {
186
179
  const session = makeSession({
187
180
  agentName: "dead-agent",
@@ -202,23 +195,18 @@ describe("daemon tick", () => {
202
195
  _tmux: tmuxMock,
203
196
  });
204
197
 
205
- // Health check should detect zombie with terminate action
206
198
  expect(checks).toHaveLength(1);
207
199
  expect(checks[0]?.state).toBe("zombie");
208
200
  expect(checks[0]?.action).toBe("terminate");
209
201
 
210
- // tmux is dead so killSession should NOT be called (only kills if tmuxAlive)
211
202
  expect(tmuxMock.killed).toHaveLength(0);
212
203
 
213
- // Session state should be persisted as zombie
214
204
  const reloaded = readSessionsFromStore(tempRoot);
215
205
  expect(reloaded).toHaveLength(1);
216
206
  expect(reloaded[0]?.state).toBe("zombie");
217
207
  });
218
208
 
219
209
  test("tick with alive tmux but zombie-old activity calls killSession", async () => {
220
- // tmux IS alive but time-based zombie threshold is exceeded,
221
- // causing a terminate action — killSession SHOULD be called.
222
210
  const oldActivity = new Date(Date.now() - 200_000).toISOString();
223
211
  const session = makeSession({
224
212
  agentName: "zombie-agent",
@@ -242,16 +230,12 @@ describe("daemon tick", () => {
242
230
  expect(checks).toHaveLength(1);
243
231
  expect(checks[0]?.action).toBe("terminate");
244
232
 
245
- // tmux was alive, so killSession SHOULD have been called
246
233
  expect(tmuxMock.killed).toContain("legio-zombie-agent");
247
234
 
248
- // Session persisted as zombie
249
235
  const reloaded = readSessionsFromStore(tempRoot);
250
236
  expect(reloaded[0]?.state).toBe("zombie");
251
237
  });
252
238
 
253
- // --- Test 4: session persistence round-trip ---
254
-
255
239
  test("session persistence round-trip: load, modify, save, reload", async () => {
256
240
  const sessions: AgentSession[] = [
257
241
  makeSession({
@@ -266,7 +250,6 @@ describe("daemon tick", () => {
266
250
  agentName: "agent-beta",
267
251
  tmuxSession: "legio-agent-beta",
268
252
  state: "working",
269
- // Make beta's tmux dead so it transitions to zombie
270
253
  lastActivity: new Date().toISOString(),
271
254
  }),
272
255
  makeSession({
@@ -282,8 +265,8 @@ describe("daemon tick", () => {
282
265
 
283
266
  const tmuxMock = tmuxWithLiveness({
284
267
  "legio-agent-alpha": true,
285
- "legio-agent-beta": false, // Dead — should become zombie
286
- "legio-agent-gamma": true, // Doesn't matter — completed is skipped
268
+ "legio-agent-beta": false,
269
+ "legio-agent-gamma": true,
287
270
  });
288
271
 
289
272
  const checks: HealthCheck[] = [];
@@ -295,10 +278,8 @@ describe("daemon tick", () => {
295
278
  _tmux: tmuxMock,
296
279
  });
297
280
 
298
- // Completed sessions are skipped — only 2 health checks
299
281
  expect(checks).toHaveLength(2);
300
282
 
301
- // Reload and verify persistence
302
283
  const reloaded = readSessionsFromStore(tempRoot);
303
284
  expect(reloaded).toHaveLength(3);
304
285
 
@@ -306,42 +287,11 @@ describe("daemon tick", () => {
306
287
  const beta = reloaded.find((s) => s.agentName === "agent-beta");
307
288
  const gamma = reloaded.find((s) => s.agentName === "agent-gamma");
308
289
 
309
- expect(alpha).toBeDefined();
310
- expect(beta).toBeDefined();
311
- expect(gamma).toBeDefined();
312
-
313
- // Alpha: tmux alive + recent activity — stays working
314
290
  expect(alpha?.state).toBe("working");
315
-
316
- // Beta: tmux dead — zombie (ZFC rule 1)
317
291
  expect(beta?.state).toBe("zombie");
318
-
319
- // Gamma: completed — unchanged (skipped by daemon)
320
292
  expect(gamma?.state).toBe("completed");
321
293
  });
322
294
 
323
- test("session persistence: state unchanged when nothing changes", async () => {
324
- const session = makeSession({
325
- state: "working",
326
- lastActivity: new Date().toISOString(),
327
- });
328
-
329
- writeSessionsToStore(tempRoot, [session]);
330
-
331
- await runDaemonTick({
332
- root: tempRoot,
333
- ...THRESHOLDS,
334
- _tmux: tmuxAllAlive(),
335
- });
336
-
337
- // Session state should remain unchanged since nothing triggered a transition
338
- const reloaded = readSessionsFromStore(tempRoot);
339
- expect(reloaded).toHaveLength(1);
340
- expect(reloaded[0]?.state).toBe("working");
341
- });
342
-
343
- // --- Edge cases ---
344
-
345
295
  test("completed sessions are skipped entirely", async () => {
346
296
  const session = makeSession({ state: "completed" });
347
297
 
@@ -353,99 +303,15 @@ describe("daemon tick", () => {
353
303
  root: tempRoot,
354
304
  ...THRESHOLDS,
355
305
  onHealthCheck: (c) => checks.push(c),
356
- _tmux: tmuxAllDead(), // Would be zombie if not skipped
306
+ _tmux: tmuxAllDead(),
357
307
  });
358
308
 
359
- // No health checks emitted for completed sessions
360
309
  expect(checks).toHaveLength(0);
361
310
 
362
- // State unchanged
363
311
  const reloaded = readSessionsFromStore(tempRoot);
364
312
  expect(reloaded[0]?.state).toBe("completed");
365
313
  });
366
314
 
367
- test("multiple sessions with mixed states are all processed", async () => {
368
- const now = Date.now();
369
- const sessions: AgentSession[] = [
370
- makeSession({
371
- id: "s1",
372
- agentName: "healthy",
373
- tmuxSession: "legio-healthy",
374
- state: "working",
375
- lastActivity: new Date(now).toISOString(),
376
- }),
377
- makeSession({
378
- id: "s2",
379
- agentName: "dying",
380
- tmuxSession: "legio-dying",
381
- state: "working",
382
- lastActivity: new Date(now).toISOString(),
383
- }),
384
- makeSession({
385
- id: "s3",
386
- agentName: "stale",
387
- tmuxSession: "legio-stale",
388
- state: "working",
389
- lastActivity: new Date(now - 60_000).toISOString(),
390
- }),
391
- makeSession({
392
- id: "s4",
393
- agentName: "done",
394
- tmuxSession: "legio-done",
395
- state: "completed",
396
- }),
397
- ];
398
-
399
- writeSessionsToStore(tempRoot, sessions);
400
-
401
- const tmuxMock = tmuxWithLiveness({
402
- "legio-healthy": true,
403
- "legio-dying": false,
404
- "legio-stale": true,
405
- "legio-done": false,
406
- });
407
-
408
- const checks: HealthCheck[] = [];
409
-
410
- await runDaemonTick({
411
- root: tempRoot,
412
- ...THRESHOLDS,
413
- onHealthCheck: (c) => checks.push(c),
414
- _tmux: tmuxMock,
415
- });
416
-
417
- // 3 non-completed sessions processed
418
- expect(checks).toHaveLength(3);
419
-
420
- const reloaded = readSessionsFromStore(tempRoot);
421
-
422
- const healthy = reloaded.find((s) => s.agentName === "healthy");
423
- const dying = reloaded.find((s) => s.agentName === "dying");
424
- const stale = reloaded.find((s) => s.agentName === "stale");
425
- const done = reloaded.find((s) => s.agentName === "done");
426
-
427
- expect(healthy?.state).toBe("working");
428
- expect(dying?.state).toBe("zombie");
429
- // 60s old activity is below zombieMs (120s) — session stays working
430
- expect(stale?.state).toBe("working");
431
- expect(done?.state).toBe("completed");
432
- });
433
-
434
- test("empty sessions array is a no-op", async () => {
435
- writeSessionsToStore(tempRoot, []);
436
-
437
- const checks: HealthCheck[] = [];
438
-
439
- await runDaemonTick({
440
- root: tempRoot,
441
- ...THRESHOLDS,
442
- onHealthCheck: (c) => checks.push(c),
443
- _tmux: tmuxAllAlive(),
444
- });
445
-
446
- expect(checks).toHaveLength(0);
447
- });
448
-
449
315
  test("booting session with recent activity transitions to working", async () => {
450
316
  const session = makeSession({
451
317
  state: "booting",
@@ -470,24 +336,8 @@ describe("daemon tick", () => {
470
336
  expect(reloaded[0]?.state).toBe("working");
471
337
  });
472
338
 
473
- // --- Backward compatibility ---
474
-
475
- test("sessions with default escalation fields are processed correctly", async () => {
476
- // Write a session with default (zero) escalation fields
477
- const session = makeSession({
478
- id: "session-old",
479
- agentName: "old-agent",
480
- worktreePath: "/tmp/test",
481
- branchName: "legio/old-agent/task",
482
- beadId: "task",
483
- tmuxSession: "legio-old-agent",
484
- state: "working",
485
- pid: process.pid,
486
- escalationLevel: 0,
487
- stalledSince: null,
488
- });
489
-
490
- writeSessionsToStore(tempRoot, [session]);
339
+ test("empty sessions array is a no-op", async () => {
340
+ writeSessionsToStore(tempRoot, []);
491
341
 
492
342
  const checks: HealthCheck[] = [];
493
343
 
@@ -498,21 +348,17 @@ describe("daemon tick", () => {
498
348
  _tmux: tmuxAllAlive(),
499
349
  });
500
350
 
501
- // Should process without errors
502
- expect(checks).toHaveLength(1);
503
- expect(checks[0]?.state).toBe("working");
351
+ expect(checks).toHaveLength(0);
504
352
  });
505
353
  });
506
354
 
507
355
  // === Event recording tests ===
508
356
 
509
357
  describe("daemon event recording", () => {
510
- /** Open the events.db in the temp root and return all events. */
511
358
  function readEvents(root: string): StoredEvent[] {
512
359
  const dbPath = join(root, ".legio", "events.db");
513
360
  const store = createEventStore(dbPath);
514
361
  try {
515
- // Get all events (no agent filter — use a broad timeline)
516
362
  return store.getTimeline({ since: "2000-01-01T00:00:00Z" });
517
363
  } finally {
518
364
  store.close();
@@ -520,7 +366,6 @@ describe("daemon event recording", () => {
520
366
  }
521
367
 
522
368
  test("run_id is included in events when current-run.txt exists", async () => {
523
- // Use zombie-old activity to trigger terminate + recovery attempt events
524
369
  const oldActivity = new Date(Date.now() - 200_000).toISOString();
525
370
  const session = makeSession({
526
371
  agentName: "zombie-agent",
@@ -534,7 +379,6 @@ describe("daemon event recording", () => {
534
379
 
535
380
  writeSessionsToStore(tempRoot, [session]);
536
381
 
537
- // Write a current-run.txt
538
382
  const runId = "run-2026-02-13T10-00-00-000Z";
539
383
  await writeFile(join(tempRoot, ".legio", "current-run.txt"), runId, "utf-8");
540
384
 
@@ -591,7 +435,6 @@ describe("daemon event recording", () => {
591
435
 
592
436
  const checks: HealthCheck[] = [];
593
437
 
594
- // Inject null EventStore — daemon should still work fine
595
438
  await runDaemonTick({
596
439
  root: tempRoot,
597
440
  ...THRESHOLDS,
@@ -600,112 +443,11 @@ describe("daemon event recording", () => {
600
443
  _eventStore: null,
601
444
  });
602
445
 
603
- // Daemon should still produce health checks even without EventStore
604
446
  expect(checks).toHaveLength(1);
605
447
  expect(checks[0]?.action).toBe("none");
606
448
  });
607
449
  });
608
450
 
609
- // === Mulch failure recording tests ===
610
-
611
- describe("daemon mulch failure recording", () => {
612
- let tempRoot: string;
613
-
614
- beforeEach(async () => {
615
- tempRoot = await createTempRoot();
616
- });
617
-
618
- afterEach(async () => {
619
- await rm(tempRoot, { recursive: true, force: true });
620
- });
621
-
622
- /** Track calls to the recordFailure mock. */
623
- interface FailureRecord {
624
- root: string;
625
- session: AgentSession;
626
- reason: string;
627
- tier: 0 | 1;
628
- triageSuggestion?: string;
629
- }
630
-
631
- function failureTracker(): {
632
- calls: FailureRecord[];
633
- recordFailure: (
634
- root: string,
635
- session: AgentSession,
636
- reason: string,
637
- tier: 0 | 1,
638
- triageSuggestion?: string,
639
- ) => Promise<void>;
640
- } {
641
- const calls: FailureRecord[] = [];
642
- return {
643
- calls,
644
- async recordFailure(root, session, reason, tier, triageSuggestion) {
645
- calls.push({ root, session, reason, tier, triageSuggestion });
646
- },
647
- };
648
- }
649
-
650
- test("Tier 0: recordFailure called when action=terminate (process death)", async () => {
651
- const session = makeSession({
652
- agentName: "dying-agent",
653
- capability: "builder",
654
- beadId: "task-123",
655
- tmuxSession: "legio-dying-agent",
656
- state: "working",
657
- lastActivity: new Date().toISOString(),
658
- });
659
-
660
- writeSessionsToStore(tempRoot, [session]);
661
-
662
- const tmuxMock = tmuxWithLiveness({ "legio-dying-agent": false });
663
- const failureMock = failureTracker();
664
-
665
- await runDaemonTick({
666
- root: tempRoot,
667
- ...THRESHOLDS,
668
- _tmux: tmuxMock,
669
- _recordFailure: failureMock.recordFailure,
670
- });
671
-
672
- // recordFailure should be called with Tier 0
673
- expect(failureMock.calls).toHaveLength(1);
674
- expect(failureMock.calls[0]?.tier).toBe(0);
675
- expect(failureMock.calls[0]?.session.agentName).toBe("dying-agent");
676
- expect(failureMock.calls[0]?.session.capability).toBe("builder");
677
- expect(failureMock.calls[0]?.session.beadId).toBe("task-123");
678
- // Reason should be either the reconciliationNote or default "Process terminated"
679
- expect(failureMock.calls[0]?.reason).toBeDefined();
680
- });
681
-
682
- test("recordFailure includes evidenceBead when beadId is present", async () => {
683
- const session = makeSession({
684
- agentName: "beaded-agent",
685
- capability: "builder",
686
- beadId: "task-789",
687
- tmuxSession: "legio-beaded-agent",
688
- state: "working",
689
- lastActivity: new Date().toISOString(),
690
- });
691
-
692
- writeSessionsToStore(tempRoot, [session]);
693
-
694
- const tmuxMock = tmuxWithLiveness({ "legio-beaded-agent": false });
695
- const failureMock = failureTracker();
696
-
697
- await runDaemonTick({
698
- root: tempRoot,
699
- ...THRESHOLDS,
700
- _tmux: tmuxMock,
701
- _recordFailure: failureMock.recordFailure,
702
- });
703
-
704
- expect(failureMock.calls).toHaveLength(1);
705
- expect(failureMock.calls[0]?.session.beadId).toBe("task-789");
706
- });
707
- });
708
-
709
451
  // === Recovery tests ===
710
452
 
711
453
  describe("daemon recovery", () => {
@@ -719,7 +461,6 @@ describe("daemon recovery", () => {
719
461
  await rm(tempRoot, { recursive: true, force: true });
720
462
  });
721
463
 
722
- /** Open the events.db and return all events. */
723
464
  function readEvents(root: string): StoredEvent[] {
724
465
  const dbPath = join(root, ".legio", "events.db");
725
466
  const store = createEventStore(dbPath);
@@ -730,7 +471,6 @@ describe("daemon recovery", () => {
730
471
  }
731
472
  }
732
473
 
733
- /** Build a minimal SessionCheckpoint for a session. */
734
474
  function makeCheckpoint(agentName: string, beadId: string): SessionCheckpoint {
735
475
  return {
736
476
  agentName,
@@ -745,7 +485,6 @@ describe("daemon recovery", () => {
745
485
  };
746
486
  }
747
487
 
748
- /** Create a fake _sling that tracks calls and returns a given exit code. */
749
488
  function slingTracker(exitCode = 0): {
750
489
  sling: (args: string[]) => Promise<{ exitCode: number; stderr: string }>;
751
490
  calls: string[][];
@@ -760,7 +499,6 @@ describe("daemon recovery", () => {
760
499
  };
761
500
  }
762
501
 
763
- /** Create a fake _sendRecoveryMail that tracks calls. */
764
502
  function mailTracker(): {
765
503
  sendRecoveryMail: (args: string[]) => Promise<void>;
766
504
  calls: string[][];
@@ -774,7 +512,6 @@ describe("daemon recovery", () => {
774
512
  };
775
513
  }
776
514
 
777
- /** Read recovery count from disk. */
778
515
  async function readRecoveryCountFromDisk(root: string, agentName: string): Promise<number> {
779
516
  try {
780
517
  const text = await readFile(
@@ -787,7 +524,6 @@ describe("daemon recovery", () => {
787
524
  }
788
525
  }
789
526
 
790
- /** Write recovery count to disk to simulate prior attempts. */
791
527
  async function writeRecoveryCountToDisk(
792
528
  root: string,
793
529
  agentName: string,
@@ -798,8 +534,6 @@ describe("daemon recovery", () => {
798
534
  await writeFile(join(dir, "recovery-count"), String(count), "utf-8");
799
535
  }
800
536
 
801
- // --- Direct terminate path (tmux dead) ---
802
-
803
537
  test("no checkpoint → no recovery, agent marked zombie", async () => {
804
538
  const session = makeSession({
805
539
  agentName: "dead-agent",
@@ -824,16 +558,13 @@ describe("daemon recovery", () => {
824
558
  _recordFailure: async () => {},
825
559
  });
826
560
 
827
- // No sling attempted (no checkpoint)
828
561
  expect(slingMock.calls).toHaveLength(0);
829
- // No mail sent
830
562
  expect(mailMock.calls).toHaveLength(0);
831
- // Agent is zombie (existing behavior)
832
563
  const reloaded = readSessionsFromStore(tempRoot);
833
564
  expect(reloaded[0]?.state).toBe("zombie");
834
565
  });
835
566
 
836
- test("checkpoint exists, sling succeeds → sling called, recovery events recorded", async () => {
567
+ test("checkpoint exists, sling succeeds → recovery events recorded", async () => {
837
568
  const session = makeSession({
838
569
  agentName: "dead-agent",
839
570
  tmuxSession: "legio-dead-agent",
@@ -869,13 +600,10 @@ describe("daemon recovery", () => {
869
600
  eventStore.close();
870
601
  }
871
602
 
872
- // Sling was called
873
603
  expect(slingMock.calls).toHaveLength(1);
874
- // Mail sent to parent
875
604
  expect(mailMock.calls).toHaveLength(1);
876
605
  expect(mailMock.calls[0]).toContain("my-lead");
877
606
 
878
- // recovery_attempt and recovery_success events recorded
879
607
  const events = readEvents(tempRoot);
880
608
  const attemptEvent = events.find((e) => {
881
609
  if (!e.data) return false;
@@ -883,39 +611,107 @@ describe("daemon recovery", () => {
883
611
  return d.type === "recovery_attempt";
884
612
  });
885
613
  expect(attemptEvent).toBeDefined();
886
- expect(attemptEvent?.level).toBe("info");
887
- expect(attemptEvent?.agentName).toBe("dead-agent");
888
614
 
889
- const successEvent = events.find((e) => {
890
- if (!e.data) return false;
891
- const d = JSON.parse(e.data) as Record<string, unknown>;
892
- return d.type === "recovery_success";
893
- });
894
- expect(successEvent).toBeDefined();
895
- expect(successEvent?.level).toBe("info");
896
-
897
- // State must be "completed" after successful recovery, not "zombie"
898
615
  const reloaded = readSessionsFromStore(tempRoot);
899
- expect(reloaded[0]?.state).not.toBe("zombie");
900
616
  expect(reloaded[0]?.state).toBe("completed");
901
617
  });
902
618
 
903
- test("checkpoint exists, sling fails → sling called, agent stays zombie, recovery_failed event", async () => {
619
+ test("recovery count exhaustedno sling, agent zombified", async () => {
904
620
  const session = makeSession({
905
621
  agentName: "dead-agent",
906
622
  tmuxSession: "legio-dead-agent",
907
623
  state: "working",
908
624
  lastActivity: new Date().toISOString(),
909
625
  parentAgent: "my-lead",
910
- beadId: "task-abc",
911
- capability: "builder",
912
626
  });
913
627
 
914
628
  writeSessionsToStore(tempRoot, [session]);
629
+ await writeRecoveryCountToDisk(tempRoot, "dead-agent", 1);
915
630
 
916
631
  const checkpoint = makeCheckpoint("dead-agent", "task-abc");
917
- const slingMock = slingTracker(1); // Non-zero exit code
632
+ const slingMock = slingTracker(0);
633
+ const mailMock = mailTracker();
634
+
635
+ await runDaemonTick({
636
+ root: tempRoot,
637
+ ...THRESHOLDS,
638
+ _tmux: tmuxWithLiveness({ "legio-dead-agent": false }),
639
+ _loadCheckpoint: async () => checkpoint,
640
+ _sling: slingMock.sling,
641
+ _sendRecoveryMail: mailMock.sendRecoveryMail,
642
+ _recordFailure: async () => {},
643
+ });
918
644
 
645
+ expect(slingMock.calls).toHaveLength(0);
646
+ expect(mailMock.calls).toHaveLength(1);
647
+ const reloaded = readSessionsFromStore(tempRoot);
648
+ expect(reloaded[0]?.state).toBe("zombie");
649
+ });
650
+
651
+ test("recovery count increments after successful attempt", async () => {
652
+ const session = makeSession({
653
+ agentName: "dead-agent",
654
+ tmuxSession: "legio-dead-agent",
655
+ state: "working",
656
+ lastActivity: new Date().toISOString(),
657
+ });
658
+
659
+ writeSessionsToStore(tempRoot, [session]);
660
+
661
+ const checkpoint = makeCheckpoint("dead-agent", "task-abc");
662
+
663
+ await runDaemonTick({
664
+ root: tempRoot,
665
+ ...THRESHOLDS,
666
+ _tmux: tmuxWithLiveness({ "legio-dead-agent": false }),
667
+ _loadCheckpoint: async () => checkpoint,
668
+ _sling: slingTracker(0).sling,
669
+ _sendRecoveryMail: async () => {},
670
+ _recordFailure: async () => {},
671
+ });
672
+
673
+ const count = await readRecoveryCountFromDisk(tempRoot, "dead-agent");
674
+ expect(count).toBe(1);
675
+ });
676
+ });
677
+
678
+ // === Beacon safety net tests ===
679
+
680
+ describe("beacon safety net", () => {
681
+ let tempRoot: string;
682
+
683
+ beforeEach(async () => {
684
+ tempRoot = await createTempRoot();
685
+ });
686
+
687
+ afterEach(async () => {
688
+ await rm(tempRoot, { recursive: true, force: true });
689
+ });
690
+
691
+ function readEvents(root: string): StoredEvent[] {
692
+ const dbPath = join(root, ".legio", "events.db");
693
+ const store = createEventStore(dbPath);
694
+ try {
695
+ return store.getTimeline({ since: "2000-01-01T00:00:00Z" });
696
+ } finally {
697
+ store.close();
698
+ }
699
+ }
700
+
701
+ test("sends follow-up Enter when booting agent has no activity markers", async () => {
702
+ // Agent has been booting for 25s (past beaconNudgeMs=20s but before bootTimeoutMs=90s)
703
+ const startedAt = new Date(Date.now() - 25_000).toISOString();
704
+ const session = makeSession({
705
+ agentName: "stuck-agent",
706
+ tmuxSession: "legio-stuck-agent",
707
+ state: "booting",
708
+ startedAt,
709
+ lastActivity: startedAt,
710
+ });
711
+
712
+ writeSessionsToStore(tempRoot, [session]);
713
+
714
+ const sendKeysCalls: Array<{ session: string; keys: string }> = [];
919
715
  const eventsDbPath = join(tempRoot, ".legio", "events.db");
920
716
  const eventStore = createEventStore(eventsDbPath);
921
717
 
@@ -923,311 +719,421 @@ describe("daemon recovery", () => {
923
719
  await runDaemonTick({
924
720
  root: tempRoot,
925
721
  ...THRESHOLDS,
926
- _tmux: tmuxWithLiveness({ "legio-dead-agent": false }),
927
- _loadCheckpoint: async () => checkpoint,
928
- _sling: slingMock.sling,
929
- _sendRecoveryMail: async () => {},
930
- _recordFailure: async () => {},
722
+ beaconNudgeMs: 20_000,
723
+ _tmux: tmuxWithLiveness({ "legio-stuck-agent": true }),
724
+ _capturePaneContent: async () => "Some prompt text sitting in buffer",
725
+ _sendKeys: async (sessionName, keys) => {
726
+ sendKeysCalls.push({ session: sessionName, keys });
727
+ },
931
728
  _eventStore: eventStore,
932
729
  });
933
730
  } finally {
934
731
  eventStore.close();
935
732
  }
936
733
 
937
- // Sling was called
938
- expect(slingMock.calls).toHaveLength(1);
939
- // Agent should be zombie (sling failed)
940
- const reloaded = readSessionsFromStore(tempRoot);
941
- expect(reloaded[0]?.state).toBe("zombie");
734
+ // Follow-up Enter should have been sent
735
+ expect(sendKeysCalls).toHaveLength(1);
736
+ expect(sendKeysCalls[0]?.session).toBe("legio-stuck-agent");
737
+ expect(sendKeysCalls[0]?.keys).toBe("");
942
738
 
943
- // recovery_failed event recorded
739
+ // beacon_nudge event should be recorded
944
740
  const events = readEvents(tempRoot);
945
- const failedEvent = events.find((e) => {
741
+ const beaconEvent = events.find((e) => {
946
742
  if (!e.data) return false;
947
743
  const d = JSON.parse(e.data) as Record<string, unknown>;
948
- return d.type === "recovery_failed";
744
+ return d.type === "beacon_nudge";
949
745
  });
950
- expect(failedEvent).toBeDefined();
951
- expect(failedEvent?.level).toBe("error");
746
+ expect(beaconEvent).toBeDefined();
952
747
  });
953
748
 
954
- test("sling args include capability, name, spec path, files, parent, depth", async () => {
749
+ test("does NOT send Enter when activity markers are present", async () => {
750
+ const startedAt = new Date(Date.now() - 25_000).toISOString();
955
751
  const session = makeSession({
956
- agentName: "dead-agent",
957
- tmuxSession: "legio-dead-agent",
958
- state: "working",
959
- lastActivity: new Date().toISOString(),
960
- parentAgent: "my-lead",
961
- beadId: "task-abc",
962
- capability: "builder",
963
- depth: 2,
752
+ agentName: "active-agent",
753
+ tmuxSession: "legio-active-agent",
754
+ state: "booting",
755
+ startedAt,
756
+ lastActivity: startedAt,
964
757
  });
965
758
 
966
759
  writeSessionsToStore(tempRoot, [session]);
967
760
 
968
- const checkpoint: SessionCheckpoint = {
969
- ...makeCheckpoint("dead-agent", "task-abc"),
970
- filesModified: ["src/foo.ts", "src/bar.ts"],
971
- };
972
- const slingMock = slingTracker(0);
761
+ const sendKeysCalls: Array<{ session: string; keys: string }> = [];
973
762
 
974
763
  await runDaemonTick({
975
764
  root: tempRoot,
976
765
  ...THRESHOLDS,
977
- _tmux: tmuxWithLiveness({ "legio-dead-agent": false }),
978
- _loadCheckpoint: async () => checkpoint,
979
- _sling: slingMock.sling,
980
- _sendRecoveryMail: async () => {},
981
- _recordFailure: async () => {},
766
+ beaconNudgeMs: 20_000,
767
+ _tmux: tmuxWithLiveness({ "legio-active-agent": true }),
768
+ _capturePaneContent: async () => "⏺ Claude is thinking...",
769
+ _sendKeys: async (sessionName, keys) => {
770
+ sendKeysCalls.push({ session: sessionName, keys });
771
+ },
982
772
  });
983
773
 
984
- expect(slingMock.calls).toHaveLength(1);
985
- const args = slingMock.calls[0] ?? [];
986
- expect(args).toContain("task-abc");
987
- expect(args).toContain("--capability");
988
- expect(args).toContain("builder");
989
- expect(args).toContain("--name");
990
- expect(args).toContain("dead-agent");
991
- expect(args).toContain("--spec");
992
- expect(args).toContain("--files");
993
- expect(args).toContain("src/foo.ts,src/bar.ts");
994
- expect(args).toContain("--parent");
995
- expect(args).toContain("my-lead");
996
- expect(args).toContain("--depth");
997
- expect(args).toContain("2");
774
+ // No follow-up Enter — agent shows activity
775
+ expect(sendKeysCalls).toHaveLength(0);
998
776
  });
999
777
 
1000
- test("no files modified --files arg omitted from sling", async () => {
778
+ test("does NOT send Enter when pane content is empty", async () => {
779
+ const startedAt = new Date(Date.now() - 25_000).toISOString();
1001
780
  const session = makeSession({
1002
- agentName: "dead-agent",
1003
- tmuxSession: "legio-dead-agent",
1004
- state: "working",
1005
- lastActivity: new Date().toISOString(),
1006
- capability: "builder",
781
+ agentName: "empty-agent",
782
+ tmuxSession: "legio-empty-agent",
783
+ state: "booting",
784
+ startedAt,
785
+ lastActivity: startedAt,
1007
786
  });
1008
787
 
1009
788
  writeSessionsToStore(tempRoot, [session]);
1010
789
 
1011
- const checkpoint: SessionCheckpoint = {
1012
- ...makeCheckpoint("dead-agent", "task-abc"),
1013
- filesModified: [], // No files
1014
- };
1015
- const slingMock = slingTracker(0);
790
+ const sendKeysCalls: Array<{ session: string; keys: string }> = [];
1016
791
 
1017
792
  await runDaemonTick({
1018
793
  root: tempRoot,
1019
794
  ...THRESHOLDS,
1020
- _tmux: tmuxWithLiveness({ "legio-dead-agent": false }),
1021
- _loadCheckpoint: async () => checkpoint,
1022
- _sling: slingMock.sling,
1023
- _sendRecoveryMail: async () => {},
1024
- _recordFailure: async () => {},
795
+ beaconNudgeMs: 20_000,
796
+ _tmux: tmuxWithLiveness({ "legio-empty-agent": true }),
797
+ _capturePaneContent: async () => " ",
798
+ _sendKeys: async (sessionName, keys) => {
799
+ sendKeysCalls.push({ session: sessionName, keys });
800
+ },
1025
801
  });
1026
802
 
1027
- expect(slingMock.calls).toHaveLength(1);
1028
- const args = slingMock.calls[0] ?? [];
1029
- expect(args).not.toContain("--files");
803
+ // No follow-up Enter — pane is empty (agent hasn't started yet)
804
+ expect(sendKeysCalls).toHaveLength(0);
1030
805
  });
1031
806
 
1032
- test("recovery count increments after successful attempt", async () => {
807
+ test("does NOT send Enter before beaconNudgeMs", async () => {
808
+ // Agent has been booting for only 5s (well before beaconNudgeMs=20s)
809
+ const startedAt = new Date(Date.now() - 5_000).toISOString();
1033
810
  const session = makeSession({
1034
- agentName: "dead-agent",
1035
- tmuxSession: "legio-dead-agent",
1036
- state: "working",
1037
- lastActivity: new Date().toISOString(),
811
+ agentName: "new-agent",
812
+ tmuxSession: "legio-new-agent",
813
+ state: "booting",
814
+ startedAt,
815
+ lastActivity: startedAt,
1038
816
  });
1039
817
 
1040
818
  writeSessionsToStore(tempRoot, [session]);
1041
819
 
1042
- const checkpoint = makeCheckpoint("dead-agent", "task-abc");
820
+ const sendKeysCalls: Array<{ session: string; keys: string }> = [];
1043
821
 
1044
822
  await runDaemonTick({
1045
823
  root: tempRoot,
1046
824
  ...THRESHOLDS,
1047
- _tmux: tmuxWithLiveness({ "legio-dead-agent": false }),
1048
- _loadCheckpoint: async () => checkpoint,
1049
- _sling: slingTracker(0).sling,
1050
- _sendRecoveryMail: async () => {},
1051
- _recordFailure: async () => {},
825
+ beaconNudgeMs: 20_000,
826
+ _tmux: tmuxWithLiveness({ "legio-new-agent": true }),
827
+ _capturePaneContent: async () => "Some text",
828
+ _sendKeys: async (sessionName, keys) => {
829
+ sendKeysCalls.push({ session: sessionName, keys });
830
+ },
1052
831
  });
1053
832
 
1054
- const count = await readRecoveryCountFromDisk(tempRoot, "dead-agent");
1055
- expect(count).toBe(1);
833
+ // No follow-up Enter too early
834
+ expect(sendKeysCalls).toHaveLength(0);
1056
835
  });
1057
836
 
1058
- test("recovery count exhausted → no sling, agent zombified, escalation mail sent to parent", async () => {
837
+ test("capturePaneContent failure is non-fatal", async () => {
838
+ const startedAt = new Date(Date.now() - 25_000).toISOString();
1059
839
  const session = makeSession({
1060
- agentName: "dead-agent",
1061
- tmuxSession: "legio-dead-agent",
1062
- state: "working",
1063
- lastActivity: new Date().toISOString(),
1064
- parentAgent: "my-lead",
840
+ agentName: "error-agent",
841
+ tmuxSession: "legio-error-agent",
842
+ state: "booting",
843
+ startedAt,
844
+ lastActivity: startedAt,
1065
845
  });
1066
846
 
1067
847
  writeSessionsToStore(tempRoot, [session]);
1068
848
 
1069
- // Pre-write recovery count = 1 (default maxRecoveryAttempts=1, so exhausted)
1070
- await writeRecoveryCountToDisk(tempRoot, "dead-agent", 1);
1071
-
1072
- const checkpoint = makeCheckpoint("dead-agent", "task-abc");
1073
- const slingMock = slingTracker(0);
1074
- const mailMock = mailTracker();
1075
-
849
+ // Should not throw
1076
850
  await runDaemonTick({
1077
851
  root: tempRoot,
1078
852
  ...THRESHOLDS,
1079
- _tmux: tmuxWithLiveness({ "legio-dead-agent": false }),
1080
- _loadCheckpoint: async () => checkpoint,
1081
- _sling: slingMock.sling,
1082
- _sendRecoveryMail: mailMock.sendRecoveryMail,
1083
- _recordFailure: async () => {},
853
+ beaconNudgeMs: 20_000,
854
+ _tmux: tmuxWithLiveness({ "legio-error-agent": true }),
855
+ _capturePaneContent: async () => {
856
+ throw new Error("tmux capture failed");
857
+ },
858
+ _sendKeys: async () => {},
1084
859
  });
1085
860
 
1086
- // No sling attempted (exhausted)
1087
- expect(slingMock.calls).toHaveLength(0);
1088
- // Exhaustion error mail sent to parent
1089
- expect(mailMock.calls).toHaveLength(1);
1090
- const mailArgs = mailMock.calls[0] ?? [];
1091
- expect(mailArgs).toContain("my-lead");
1092
- expect(mailArgs).toContain("error");
1093
- // Agent marked zombie
1094
- const reloaded = readSessionsFromStore(tempRoot);
1095
- expect(reloaded[0]?.state).toBe("zombie");
861
+ // Daemon should continue without crashing
1096
862
  });
863
+ });
1097
864
 
1098
- test("maxRecoveryAttempts=2: second attempt allowed when count=1", async () => {
1099
- const session = makeSession({
1100
- agentName: "dead-agent",
1101
- tmuxSession: "legio-dead-agent",
1102
- state: "working",
1103
- lastActivity: new Date().toISOString(),
1104
- parentAgent: "my-lead",
865
+ // === Mail tick tests ===
866
+
867
+ describe("mail tick", () => {
868
+ let tempDir: string;
869
+ let store: MailStore;
870
+ let nudgeCalls: Array<{ agentName: string; message: string }>;
871
+ let pendingNudgeCalls: Array<{ agentName: string }>;
872
+
873
+ function makeMailOptions(overrides?: Partial<WatchmanOptions>): WatchmanOptions {
874
+ return {
875
+ root: tempDir,
876
+ zombieThresholdMs: 120_000,
877
+ _mailStore: store,
878
+ _nudge: async (_root, agentName, message) => {
879
+ nudgeCalls.push({ agentName, message });
880
+ return { delivered: true };
881
+ },
882
+ _isAgentIdle: async () => true,
883
+ _writePendingNudge: async (_cwd, agentName) => {
884
+ pendingNudgeCalls.push({ agentName });
885
+ },
886
+ ...overrides,
887
+ };
888
+ }
889
+
890
+ beforeEach(async () => {
891
+ tempDir = await mkdtemp(join(tmpdir(), "legio-watchman-test-"));
892
+ store = createMailStore(join(tempDir, "mail.db"));
893
+ nudgeCalls = [];
894
+ pendingNudgeCalls = [];
895
+ });
896
+
897
+ afterEach(async () => {
898
+ store.close();
899
+ await rm(tempDir, { recursive: true, force: true });
900
+ });
901
+
902
+ test("no-op when no unread messages", async () => {
903
+ const state = new Map<string, AgentMailState>();
904
+ await runMailTick(makeMailOptions(), state);
905
+
906
+ expect(nudgeCalls).toHaveLength(0);
907
+ expect(pendingNudgeCalls).toHaveLength(0);
908
+ expect(state.size).toBe(0);
909
+ });
910
+
911
+ test("nudges agent with unread mail on first tick", async () => {
912
+ store.insert({
913
+ id: "",
914
+ from: "agent-a",
915
+ to: "builder-1",
916
+ subject: "Build this",
917
+ body: "Please build feature X",
918
+ type: "status",
919
+ priority: "normal",
920
+ threadId: null,
1105
921
  });
1106
922
 
1107
- writeSessionsToStore(tempRoot, [session]);
923
+ const state = new Map<string, AgentMailState>();
924
+ await runMailTick(makeMailOptions(), state);
1108
925
 
1109
- // count=1 but max=2, so one more attempt is allowed
1110
- await writeRecoveryCountToDisk(tempRoot, "dead-agent", 1);
926
+ expect(nudgeCalls).toHaveLength(1);
927
+ expect(nudgeCalls[0]?.agentName).toBe("builder-1");
928
+ expect(pendingNudgeCalls).toHaveLength(1);
929
+ expect(pendingNudgeCalls[0]?.agentName).toBe("builder-1");
930
+ expect(state.size).toBe(1);
931
+ expect(state.get("builder-1")?.nudgeCount).toBe(1);
932
+ });
1111
933
 
1112
- const checkpoint = makeCheckpoint("dead-agent", "task-abc");
1113
- const slingMock = slingTracker(0);
934
+ test("does not re-nudge before reNudgeIntervalMs", async () => {
935
+ store.insert({
936
+ id: "",
937
+ from: "agent-a",
938
+ to: "builder-1",
939
+ subject: "Build this",
940
+ body: "body",
941
+ type: "status",
942
+ priority: "normal",
943
+ threadId: null,
944
+ });
1114
945
 
1115
- await runDaemonTick({
1116
- root: tempRoot,
1117
- ...THRESHOLDS,
1118
- maxRecoveryAttempts: 2,
1119
- _tmux: tmuxWithLiveness({ "legio-dead-agent": false }),
1120
- _loadCheckpoint: async () => checkpoint,
1121
- _sling: slingMock.sling,
1122
- _sendRecoveryMail: async () => {},
1123
- _recordFailure: async () => {},
946
+ const state = new Map<string, AgentMailState>();
947
+ const opts = makeMailOptions({ reNudgeIntervalMs: 60_000 });
948
+
949
+ await runMailTick(opts, state);
950
+ expect(nudgeCalls).toHaveLength(1);
951
+
952
+ await runMailTick(opts, state);
953
+ expect(nudgeCalls).toHaveLength(1);
954
+ });
955
+
956
+ test("re-nudges after reNudgeIntervalMs", async () => {
957
+ store.insert({
958
+ id: "",
959
+ from: "agent-a",
960
+ to: "builder-1",
961
+ subject: "Build this",
962
+ body: "body",
963
+ type: "status",
964
+ priority: "normal",
965
+ threadId: null,
1124
966
  });
1125
967
 
1126
- // Second attempt was made
1127
- expect(slingMock.calls).toHaveLength(1);
1128
- // Count now 2
1129
- const count = await readRecoveryCountFromDisk(tempRoot, "dead-agent");
1130
- expect(count).toBe(2);
968
+ const state = new Map<string, AgentMailState>();
969
+ const opts = makeMailOptions({ reNudgeIntervalMs: 100 });
970
+
971
+ await runMailTick(opts, state);
972
+ expect(nudgeCalls).toHaveLength(1);
973
+
974
+ await new Promise<void>((resolve) => setTimeout(resolve, 150));
975
+
976
+ await runMailTick(opts, state);
977
+ expect(nudgeCalls).toHaveLength(2);
978
+ expect(state.get("builder-1")?.nudgeCount).toBe(2);
1131
979
  });
1132
980
 
1133
- test("no parent agent no mail, recovery still attempted", async () => {
1134
- const session = makeSession({
1135
- agentName: "dead-agent",
1136
- tmuxSession: "legio-dead-agent",
1137
- state: "working",
1138
- lastActivity: new Date().toISOString(),
1139
- parentAgent: null,
981
+ test("clears state when agent reads mail", async () => {
982
+ const msg = store.insert({
983
+ id: "",
984
+ from: "agent-a",
985
+ to: "builder-1",
986
+ subject: "Build this",
987
+ body: "body",
988
+ type: "status",
989
+ priority: "normal",
990
+ threadId: null,
1140
991
  });
1141
992
 
1142
- writeSessionsToStore(tempRoot, [session]);
993
+ const state = new Map<string, AgentMailState>();
994
+ await runMailTick(makeMailOptions(), state);
995
+ expect(state.size).toBe(1);
1143
996
 
1144
- const checkpoint = makeCheckpoint("dead-agent", "task-abc");
1145
- const slingMock = slingTracker(0);
1146
- const mailMock = mailTracker();
997
+ store.markRead(msg.id);
1147
998
 
1148
- await runDaemonTick({
1149
- root: tempRoot,
1150
- ...THRESHOLDS,
1151
- _tmux: tmuxWithLiveness({ "legio-dead-agent": false }),
1152
- _loadCheckpoint: async () => checkpoint,
1153
- _sling: slingMock.sling,
1154
- _sendRecoveryMail: mailMock.sendRecoveryMail,
1155
- _recordFailure: async () => {},
999
+ await runMailTick(makeMailOptions(), state);
1000
+ expect(state.size).toBe(0);
1001
+ });
1002
+
1003
+ test("nudge failure is non-fatal", async () => {
1004
+ store.insert({
1005
+ id: "",
1006
+ from: "agent-a",
1007
+ to: "builder-1",
1008
+ subject: "Build this",
1009
+ body: "body",
1010
+ type: "status",
1011
+ priority: "normal",
1012
+ threadId: null,
1156
1013
  });
1157
1014
 
1158
- // Sling still attempted
1159
- expect(slingMock.calls).toHaveLength(1);
1160
- // No mail (no parent)
1161
- expect(mailMock.calls).toHaveLength(0);
1015
+ const state = new Map<string, AgentMailState>();
1016
+ const opts = makeMailOptions({
1017
+ _nudge: async () => {
1018
+ throw new Error("tmux dead");
1019
+ },
1020
+ });
1021
+
1022
+ await runMailTick(opts, state);
1023
+ expect(state.size).toBe(1);
1024
+ expect(state.get("builder-1")?.nudgeCount).toBe(1);
1162
1025
  });
1163
1026
 
1164
- test("recovery_attempt event includes attempt number and maxAttempts", async () => {
1165
- const session = makeSession({
1166
- agentName: "dead-agent",
1167
- tmuxSession: "legio-dead-agent",
1168
- state: "working",
1169
- lastActivity: new Date().toISOString(),
1027
+ test("skips tmux nudge when agent is busy", async () => {
1028
+ store.insert({
1029
+ id: "",
1030
+ from: "agent-a",
1031
+ to: "builder-1",
1032
+ subject: "Build this",
1033
+ body: "body",
1034
+ type: "status",
1035
+ priority: "normal",
1036
+ threadId: null,
1170
1037
  });
1171
1038
 
1172
- writeSessionsToStore(tempRoot, [session]);
1039
+ const state = new Map<string, AgentMailState>();
1040
+ const opts = makeMailOptions({
1041
+ _isAgentIdle: async () => false,
1042
+ });
1173
1043
 
1174
- const checkpoint = makeCheckpoint("dead-agent", "task-abc");
1044
+ await runMailTick(opts, state);
1175
1045
 
1176
- const eventsDbPath = join(tempRoot, ".legio", "events.db");
1177
- const eventStore = createEventStore(eventsDbPath);
1046
+ expect(pendingNudgeCalls).toHaveLength(1);
1047
+ expect(nudgeCalls).toHaveLength(0);
1048
+ });
1178
1049
 
1179
- try {
1180
- await runDaemonTick({
1181
- root: tempRoot,
1182
- ...THRESHOLDS,
1183
- maxRecoveryAttempts: 3,
1184
- _tmux: tmuxWithLiveness({ "legio-dead-agent": false }),
1185
- _loadCheckpoint: async () => checkpoint,
1186
- _sling: slingTracker(0).sling,
1187
- _sendRecoveryMail: async () => {},
1188
- _recordFailure: async () => {},
1189
- _eventStore: eventStore,
1190
- });
1191
- } finally {
1192
- eventStore.close();
1193
- }
1050
+ test("calls onWarn after warnAfterMs", async () => {
1051
+ store.insert({
1052
+ id: "",
1053
+ from: "agent-a",
1054
+ to: "builder-1",
1055
+ subject: "Build this",
1056
+ body: "body",
1057
+ type: "status",
1058
+ priority: "normal",
1059
+ threadId: null,
1060
+ });
1194
1061
 
1195
- const events = readEvents(tempRoot);
1196
- const attemptEvent = events.find((e) => {
1197
- if (!e.data) return false;
1198
- const d = JSON.parse(e.data) as Record<string, unknown>;
1199
- return d.type === "recovery_attempt";
1062
+ const warnings: Array<{ agentName: string; durationMs: number }> = [];
1063
+ const state = new Map<string, AgentMailState>();
1064
+ const opts = makeMailOptions({
1065
+ warnAfterMs: 50,
1066
+ reNudgeIntervalMs: 10,
1067
+ onWarn: (agentName, durationMs) => {
1068
+ warnings.push({ agentName, durationMs });
1069
+ },
1200
1070
  });
1201
- expect(attemptEvent).toBeDefined();
1202
- const data = JSON.parse(attemptEvent?.data ?? "{}") as Record<string, unknown>;
1203
- expect(data.attempt).toBe(1);
1204
- expect(data.maxAttempts).toBe(3);
1071
+
1072
+ await runMailTick(opts, state);
1073
+ expect(warnings).toHaveLength(0);
1074
+
1075
+ await new Promise<void>((resolve) => setTimeout(resolve, 100));
1076
+
1077
+ await runMailTick(opts, state);
1078
+ expect(warnings).toHaveLength(1);
1079
+ expect(warnings[0]?.agentName).toBe("builder-1");
1205
1080
  });
1206
1081
 
1207
- test("existing tests unchanged: dead tmux without recovery DI still zombifies", async () => {
1208
- // Verify that omitting recovery DI (no _loadCheckpoint) uses default behavior —
1209
- // since the real loadCheckpoint would find no file, agent should still be zombified.
1210
- const session = makeSession({
1211
- agentName: "dead-agent",
1212
- tmuxSession: "legio-dead-agent",
1213
- state: "working",
1214
- lastActivity: new Date().toISOString(),
1082
+ test("handles multiple agents with unread mail", async () => {
1083
+ store.insert({
1084
+ id: "",
1085
+ from: "orchestrator",
1086
+ to: "builder-1",
1087
+ subject: "task 1",
1088
+ body: "body",
1089
+ type: "status",
1090
+ priority: "normal",
1091
+ threadId: null,
1092
+ });
1093
+ store.insert({
1094
+ id: "",
1095
+ from: "orchestrator",
1096
+ to: "builder-2",
1097
+ subject: "task 2",
1098
+ body: "body",
1099
+ type: "status",
1100
+ priority: "normal",
1101
+ threadId: null,
1215
1102
  });
1216
1103
 
1217
- writeSessionsToStore(tempRoot, [session]);
1104
+ const state = new Map<string, AgentMailState>();
1105
+ await runMailTick(makeMailOptions(), state);
1218
1106
 
1219
- // Use a _loadCheckpoint that returns null (as the real impl would for no file)
1220
- await runDaemonTick({
1221
- root: tempRoot,
1222
- ...THRESHOLDS,
1223
- _tmux: tmuxWithLiveness({ "legio-dead-agent": false }),
1224
- _loadCheckpoint: async () => null,
1225
- _sling: async () => ({ exitCode: 0, stderr: "" }),
1226
- _sendRecoveryMail: async () => {},
1227
- _recordFailure: async () => {},
1107
+ expect(nudgeCalls).toHaveLength(2);
1108
+ const nudgedAgents = nudgeCalls.map((c) => c.agentName).sort();
1109
+ expect(nudgedAgents).toEqual(["builder-1", "builder-2"]);
1110
+ expect(state.size).toBe(2);
1111
+ });
1112
+
1113
+ test("calls onNudge callback", async () => {
1114
+ store.insert({
1115
+ id: "",
1116
+ from: "agent-a",
1117
+ to: "builder-1",
1118
+ subject: "Build this",
1119
+ body: "body",
1120
+ type: "status",
1121
+ priority: "normal",
1122
+ threadId: null,
1228
1123
  });
1229
1124
 
1230
- const reloaded = readSessionsFromStore(tempRoot);
1231
- expect(reloaded[0]?.state).toBe("zombie");
1125
+ const nudgeEvents: Array<{ agentName: string; count: number }> = [];
1126
+ const state = new Map<string, AgentMailState>();
1127
+ const opts = makeMailOptions({
1128
+ onNudge: (agentName, count) => {
1129
+ nudgeEvents.push({ agentName, count });
1130
+ },
1131
+ });
1132
+
1133
+ await runMailTick(opts, state);
1134
+
1135
+ expect(nudgeEvents).toHaveLength(1);
1136
+ expect(nudgeEvents[0]?.agentName).toBe("builder-1");
1137
+ expect(nudgeEvents[0]?.count).toBe(1);
1232
1138
  });
1233
1139
  });