@os-eco/overstory-cli 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/README.md +21 -6
  2. package/agents/coordinator.md +34 -10
  3. package/agents/lead.md +11 -1
  4. package/package.json +1 -1
  5. package/src/agents/copilot-hooks-deployer.test.ts +162 -0
  6. package/src/agents/copilot-hooks-deployer.ts +93 -0
  7. package/src/agents/hooks-deployer.test.ts +9 -1
  8. package/src/agents/hooks-deployer.ts +2 -1
  9. package/src/agents/overlay.test.ts +26 -0
  10. package/src/agents/overlay.ts +18 -4
  11. package/src/beads/client.ts +31 -3
  12. package/src/commands/agents.ts +1 -1
  13. package/src/commands/clean.test.ts +3 -0
  14. package/src/commands/clean.ts +1 -58
  15. package/src/commands/completions.test.ts +18 -6
  16. package/src/commands/completions.ts +40 -1
  17. package/src/commands/coordinator.test.ts +77 -4
  18. package/src/commands/coordinator.ts +228 -125
  19. package/src/commands/dashboard.ts +50 -10
  20. package/src/commands/doctor.ts +3 -1
  21. package/src/commands/ecosystem.test.ts +126 -1
  22. package/src/commands/ecosystem.ts +7 -53
  23. package/src/commands/feed.test.ts +117 -2
  24. package/src/commands/feed.ts +46 -30
  25. package/src/commands/group.test.ts +274 -155
  26. package/src/commands/group.ts +11 -5
  27. package/src/commands/init.ts +50 -0
  28. package/src/commands/inspect.ts +8 -4
  29. package/src/commands/log.test.ts +35 -0
  30. package/src/commands/log.ts +10 -6
  31. package/src/commands/logs.test.ts +423 -1
  32. package/src/commands/logs.ts +99 -104
  33. package/src/commands/monitor.ts +8 -2
  34. package/src/commands/orchestrator.ts +42 -0
  35. package/src/commands/prime.test.ts +177 -2
  36. package/src/commands/prime.ts +4 -2
  37. package/src/commands/sling.ts +8 -3
  38. package/src/commands/upgrade.test.ts +2 -0
  39. package/src/commands/upgrade.ts +1 -17
  40. package/src/commands/watch.test.ts +67 -1
  41. package/src/commands/watch.ts +4 -79
  42. package/src/config.test.ts +250 -0
  43. package/src/config.ts +43 -0
  44. package/src/doctor/agents.test.ts +72 -5
  45. package/src/doctor/agents.ts +10 -10
  46. package/src/doctor/consistency.test.ts +35 -0
  47. package/src/doctor/consistency.ts +7 -3
  48. package/src/doctor/dependencies.test.ts +58 -1
  49. package/src/doctor/dependencies.ts +4 -2
  50. package/src/doctor/providers.test.ts +41 -5
  51. package/src/doctor/types.ts +2 -1
  52. package/src/doctor/version.test.ts +106 -2
  53. package/src/doctor/version.ts +4 -2
  54. package/src/doctor/watchdog.test.ts +167 -0
  55. package/src/doctor/watchdog.ts +158 -0
  56. package/src/e2e/init-sling-lifecycle.test.ts +2 -1
  57. package/src/errors.test.ts +350 -0
  58. package/src/events/tailer.test.ts +25 -0
  59. package/src/events/tailer.ts +8 -1
  60. package/src/index.ts +4 -1
  61. package/src/mail/store.test.ts +110 -0
  62. package/src/runtimes/aider.test.ts +124 -0
  63. package/src/runtimes/aider.ts +147 -0
  64. package/src/runtimes/amp.test.ts +164 -0
  65. package/src/runtimes/amp.ts +154 -0
  66. package/src/runtimes/claude.test.ts +4 -2
  67. package/src/runtimes/codex.test.ts +38 -1
  68. package/src/runtimes/codex.ts +22 -3
  69. package/src/runtimes/copilot.test.ts +213 -13
  70. package/src/runtimes/copilot.ts +93 -11
  71. package/src/runtimes/goose.test.ts +133 -0
  72. package/src/runtimes/goose.ts +157 -0
  73. package/src/runtimes/pi-guards.ts +2 -1
  74. package/src/runtimes/pi.test.ts +33 -9
  75. package/src/runtimes/pi.ts +10 -10
  76. package/src/runtimes/registry.test.ts +1 -1
  77. package/src/runtimes/registry.ts +13 -4
  78. package/src/runtimes/sapling.ts +2 -1
  79. package/src/runtimes/types.ts +9 -2
  80. package/src/tracker/factory.test.ts +10 -0
  81. package/src/tracker/factory.ts +3 -2
  82. package/src/types.ts +4 -0
  83. package/src/utils/bin.test.ts +10 -0
  84. package/src/utils/bin.ts +37 -0
  85. package/src/utils/fs.test.ts +119 -0
  86. package/src/utils/fs.ts +62 -0
  87. package/src/utils/pid.test.ts +68 -0
  88. package/src/utils/pid.ts +45 -0
  89. package/src/utils/time.test.ts +43 -0
  90. package/src/utils/time.ts +37 -0
  91. package/src/utils/version.test.ts +33 -0
  92. package/src/utils/version.ts +70 -0
  93. package/src/watchdog/daemon.test.ts +255 -1
  94. package/src/watchdog/daemon.ts +46 -9
  95. package/src/watchdog/health.test.ts +15 -1
  96. package/src/watchdog/health.ts +1 -1
  97. package/src/watchdog/triage.test.ts +49 -9
  98. package/src/watchdog/triage.ts +21 -5
  99. package/src/worktree/tmux.test.ts +166 -49
  100. package/src/worktree/tmux.ts +36 -37
  101. package/templates/copilot-hooks.json.tmpl +13 -0
@@ -37,7 +37,7 @@ import { openSessionStore } from "../sessions/compat.ts";
37
37
  import type { AgentSession, EventStore, HealthCheck } from "../types.ts";
38
38
  import { isProcessAlive, isSessionAlive, killProcessTree, killSession } from "../worktree/tmux.ts";
39
39
  import { evaluateHealth, transitionState } from "./health.ts";
40
- import { triageAgent } from "./triage.ts";
40
+ import { type TriageResult, triageAgent } from "./triage.ts";
41
41
 
42
42
  /** Maximum escalation level (terminate). */
43
43
  const MAX_ESCALATION_LEVEL = 3;
@@ -46,7 +46,7 @@ const MAX_ESCALATION_LEVEL = 3;
46
46
  * Persistent agent capabilities that are excluded from run-level completion checks.
47
47
  * These agents are long-running and should not count toward "all workers done".
48
48
  */
49
- const PERSISTENT_CAPABILITIES = new Set(["coordinator", "monitor"]);
49
+ const PERSISTENT_CAPABILITIES = new Set(["coordinator", "orchestrator", "monitor"]);
50
50
 
51
51
  /**
52
52
  * Module-level registry of active event tailers for headless agents.
@@ -280,7 +280,9 @@ export interface DaemonOptions {
280
280
  agentName: string;
281
281
  root: string;
282
282
  lastActivity: string;
283
- }) => Promise<"retry" | "terminate" | "extend">;
283
+ }) => Promise<TriageResult | "retry" | "terminate" | "extend">;
284
+ /** Max triage calls per daemon tick (prevents runaway AI usage). Default: 3. */
285
+ _maxTriagePerTick?: number;
284
286
  /** Dependency injection for testing. Uses real nudgeAgent when omitted. */
285
287
  _nudge?: (
286
288
  projectRoot: string,
@@ -340,6 +342,7 @@ export interface DaemonOptions {
340
342
  */
341
343
  export function startDaemon(options: DaemonOptions & { intervalMs: number }): { stop: () => void } {
342
344
  const { intervalMs } = options;
345
+ const tailerRegistry = options._tailerRegistry ?? _defaultTailerRegistry;
343
346
 
344
347
  // Run the first tick immediately, then on interval
345
348
  runDaemonTick(options).catch(() => {
@@ -355,6 +358,10 @@ export function startDaemon(options: DaemonOptions & { intervalMs: number }): {
355
358
  return {
356
359
  stop(): void {
357
360
  clearInterval(interval);
361
+ for (const [name, handle] of tailerRegistry) {
362
+ handle.stop();
363
+ tailerRegistry.delete(name);
364
+ }
358
365
  },
359
366
  };
360
367
  }
@@ -416,6 +423,8 @@ export async function runDaemonTick(options: DaemonOptions): Promise<void> {
416
423
  const tailerRegistry = options._tailerRegistry ?? _defaultTailerRegistry;
417
424
  const tailerFactory = options._tailerFactory ?? startEventTailer;
418
425
  const findStdoutLog = options._findLatestStdoutLog ?? findLatestStdoutLog;
426
+ const maxTriagePerTick = options._maxTriagePerTick ?? 3;
427
+ const triageCount = { value: 0 };
419
428
 
420
429
  const overstoryDir = join(root, ".overstory");
421
430
  const { store } = openSessionStore(overstoryDir);
@@ -624,6 +633,8 @@ export async function runDaemonTick(options: DaemonOptions): Promise<void> {
624
633
  eventStore,
625
634
  runId,
626
635
  recordFailure: recordFailureFn,
636
+ triageCount,
637
+ maxTriagePerTick,
627
638
  });
628
639
 
629
640
  if (actionResult.terminated) {
@@ -710,7 +721,11 @@ async function executeEscalationAction(ctx: {
710
721
  agentName: string;
711
722
  root: string;
712
723
  lastActivity: string;
713
- }) => Promise<"retry" | "terminate" | "extend">;
724
+ }) => Promise<TriageResult | "retry" | "terminate" | "extend">;
725
+ /** Shared counter across escalation calls in a single tick — enforces maxTriagePerTick. */
726
+ triageCount: { value: number };
727
+ /** Maximum number of triage calls allowed in one daemon tick. Default: 3. */
728
+ maxTriagePerTick: number;
714
729
  nudge: (
715
730
  projectRoot: string,
716
731
  agentName: string,
@@ -739,6 +754,8 @@ async function executeEscalationAction(ctx: {
739
754
  eventStore,
740
755
  runId,
741
756
  recordFailure,
757
+ triageCount,
758
+ maxTriagePerTick,
742
759
  } = ctx;
743
760
 
744
761
  switch (session.escalationLevel) {
@@ -785,29 +802,49 @@ async function executeEscalationAction(ctx: {
785
802
  return { terminated: false, stateChanged: false };
786
803
  }
787
804
 
788
- const verdict = await triage({
805
+ // Concurrency guard: limit triage calls per tick to avoid runaway AI usage
806
+ if (triageCount.value >= maxTriagePerTick) {
807
+ return { terminated: false, stateChanged: false };
808
+ }
809
+ triageCount.value++;
810
+
811
+ const raw = await triage({
789
812
  agentName: session.agentName,
790
813
  root,
791
814
  lastActivity: session.lastActivity,
792
815
  });
816
+ // Normalize: accept bare string (backward compat) or TriageResult
817
+ const result: TriageResult =
818
+ typeof raw === "string" ? { verdict: raw, fallback: false } : raw;
793
819
 
794
820
  recordEvent(eventStore, {
795
821
  runId,
796
822
  agentName: session.agentName,
797
823
  eventType: "custom",
798
824
  level: "warn",
799
- data: { type: "triage", escalationLevel: 2, verdict },
825
+ data: {
826
+ type: "triage",
827
+ escalationLevel: 2,
828
+ verdict: result.verdict,
829
+ triageFailed: result.fallback,
830
+ },
800
831
  });
801
832
 
802
- if (verdict === "terminate") {
833
+ if (result.verdict === "terminate") {
803
834
  // Record the failure via mulch (Tier 1 AI triage)
804
- await recordFailure(root, session, "AI triage classified as terminal failure", 1, verdict);
835
+ await recordFailure(
836
+ root,
837
+ session,
838
+ "AI triage classified as terminal failure",
839
+ 1,
840
+ result.verdict,
841
+ );
805
842
 
806
843
  await killAgent({ session, tmuxAlive, tmux, process: proc });
807
844
  return { terminated: true, stateChanged: true };
808
845
  }
809
846
 
810
- if (verdict === "retry") {
847
+ if (result.verdict === "retry") {
811
848
  // Send a nudge with a recovery message
812
849
  try {
813
850
  await nudge(
@@ -196,7 +196,7 @@ describe("evaluateHealth", () => {
196
196
  expect(check.action).toBe("escalate");
197
197
  });
198
198
 
199
- // --- Persistent capabilities (coordinator, monitor) ---
199
+ // --- Persistent capabilities (coordinator, orchestrator, monitor) ---
200
200
 
201
201
  test("persistent capability: coordinator with stale activity → still working, no escalation", () => {
202
202
  const staleActivity = new Date(Date.now() - 60_000).toISOString();
@@ -237,6 +237,20 @@ describe("evaluateHealth", () => {
237
237
  expect(check.action).toBe("none");
238
238
  });
239
239
 
240
+ test("persistent capability: orchestrator with stale activity → still working", () => {
241
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
242
+ const session = makeSession({
243
+ agentName: "orchestrator",
244
+ capability: "orchestrator",
245
+ state: "working",
246
+ lastActivity: staleActivity,
247
+ });
248
+ const check = evaluateHealth(session, true, THRESHOLDS);
249
+
250
+ expect(check.state).toBe("working");
251
+ expect(check.action).toBe("none");
252
+ });
253
+
240
254
  test("persistent capability: coordinator booting → transitions to working", () => {
241
255
  const session = makeSession({
242
256
  capability: "coordinator",
@@ -40,7 +40,7 @@ import type { AgentSession, AgentState, HealthCheck } from "../types.ts";
40
40
  *
41
41
  * Shared concept with src/commands/log.ts:PERSISTENT_CAPABILITIES.
42
42
  */
43
- const PERSISTENT_CAPABILITIES = new Set(["coordinator", "monitor"]);
43
+ const PERSISTENT_CAPABILITIES = new Set(["coordinator", "orchestrator", "monitor"]);
44
44
 
45
45
  /** Numeric ordering for forward-only state transitions. */
46
46
  const STATE_ORDER: Record<AgentState, number> = {
@@ -6,7 +6,7 @@
6
6
  * spawnClaude is NOT mocked — we rely on it failing naturally in tests.
7
7
  */
8
8
 
9
- import { afterEach, beforeEach, describe, expect, test } from "bun:test";
9
+ import { afterEach, beforeEach, describe, expect, spyOn, test } from "bun:test";
10
10
  import { mkdir, mkdtemp } from "node:fs/promises";
11
11
  import { tmpdir } from "node:os";
12
12
  import { join } from "node:path";
@@ -101,16 +101,18 @@ describe("triageAgent", () => {
101
101
  await cleanupTempDir(tempRoot);
102
102
  });
103
103
 
104
- test("returns 'extend' when no logs directory exists", async () => {
104
+ test("returns fallback TriageResult when no logs directory exists", async () => {
105
105
  const result = await triageAgent({
106
106
  agentName: "test-agent",
107
107
  root: tempRoot,
108
108
  lastActivity: "2026-02-13T10:00:00Z",
109
109
  });
110
- expect(result).toBe("extend");
110
+ expect(result.verdict).toBe("extend");
111
+ expect(result.fallback).toBe(true);
112
+ expect(result.reason).toBe("No logs available");
111
113
  });
112
114
 
113
- test("returns 'extend' when logs directory exists but is empty", async () => {
115
+ test("returns fallback TriageResult when logs directory exists but is empty", async () => {
114
116
  const logsDir = join(tempRoot, ".overstory", "logs", "test-agent");
115
117
  await mkdir(logsDir, { recursive: true });
116
118
 
@@ -119,10 +121,11 @@ describe("triageAgent", () => {
119
121
  root: tempRoot,
120
122
  lastActivity: "2026-02-13T10:00:00Z",
121
123
  });
122
- expect(result).toBe("extend");
124
+ expect(result.verdict).toBe("extend");
125
+ expect(result.fallback).toBe(true);
123
126
  });
124
127
 
125
- test("returns 'extend' when logs directory has session dir but no session.log", async () => {
128
+ test("returns fallback TriageResult when logs directory has session dir but no session.log", async () => {
126
129
  const logsDir = join(tempRoot, ".overstory", "logs", "test-agent", "2026-02-13T10-00-00");
127
130
  await Bun.write(join(logsDir, ".gitkeep"), "");
128
131
 
@@ -131,10 +134,11 @@ describe("triageAgent", () => {
131
134
  root: tempRoot,
132
135
  lastActivity: "2026-02-13T10:00:00Z",
133
136
  });
134
- expect(result).toBe("extend");
137
+ expect(result.verdict).toBe("extend");
138
+ expect(result.fallback).toBe(true);
135
139
  });
136
140
 
137
- test("returns 'extend' when session.log exists but claude binary fails", async () => {
141
+ test("returns fallback TriageResult when session.log exists but claude binary fails", async () => {
138
142
  const timestamp = "2026-02-13T10-00-00";
139
143
  const sessionLogPath = join(
140
144
  tempRoot,
@@ -160,6 +164,42 @@ describe("triageAgent", () => {
160
164
  lastActivity: "2026-02-13T10:00:00Z",
161
165
  timeoutMs: 500,
162
166
  });
163
- expect(result).toBe("extend");
167
+ expect(result.verdict).toBe("extend");
168
+ expect(result.fallback).toBe(true);
169
+ expect(result.reason).toBe("Claude unavailable");
170
+ });
171
+
172
+ test("writes stderr warning when claude is unavailable (fallback path)", async () => {
173
+ const timestamp = "2026-02-13T10-00-00";
174
+ const sessionLogPath = join(
175
+ tempRoot,
176
+ ".overstory",
177
+ "logs",
178
+ "test-agent",
179
+ timestamp,
180
+ "session.log",
181
+ );
182
+ await Bun.write(sessionLogPath, "some log content\n");
183
+
184
+ const written: string[] = [];
185
+ const spy = spyOn(process.stderr, "write").mockImplementation((chunk: unknown) => {
186
+ written.push(String(chunk));
187
+ return true;
188
+ });
189
+
190
+ try {
191
+ await triageAgent({
192
+ agentName: "test-agent",
193
+ root: tempRoot,
194
+ lastActivity: "2026-02-13T10:00:00Z",
195
+ timeoutMs: 500,
196
+ });
197
+ } finally {
198
+ spy.mockRestore();
199
+ }
200
+
201
+ expect(written.some((s) => s.includes("triage fallback") && s.includes("test-agent"))).toBe(
202
+ true,
203
+ );
164
204
  });
165
205
  });
@@ -12,6 +12,19 @@ import { AgentError } from "../errors.ts";
12
12
  import { getRuntime } from "../runtimes/registry.ts";
13
13
  import type { OverstoryConfig } from "../types.ts";
14
14
 
15
+ /**
16
+ * Structured result returned by triageAgent.
17
+ * Replaces bare string for observability — callers can distinguish AI verdicts from fallbacks.
18
+ */
19
+ export interface TriageResult {
20
+ /** Classification: "retry", "terminate", or "extend". */
21
+ verdict: "retry" | "terminate" | "extend";
22
+ /** True when the verdict is a safe default (no AI analysis performed). */
23
+ fallback: boolean;
24
+ /** Optional human-readable reason, set on fallback paths. */
25
+ reason?: string;
26
+ }
27
+
15
28
  /**
16
29
  * Triage a stalled agent by analyzing its recent log output with Claude.
17
30
  *
@@ -24,7 +37,7 @@ import type { OverstoryConfig } from "../types.ts";
24
37
  * @param options.agentName - Name of the agent to triage
25
38
  * @param options.root - Project root directory (contains .overstory/)
26
39
  * @param options.lastActivity - ISO timestamp of the agent's last recorded activity
27
- * @returns "retry" if recoverable, "terminate" if fatal, "extend" if likely long-running
40
+ * @returns TriageResult with verdict ("retry" | "terminate" | "extend"), fallback flag, and optional reason
28
41
  */
29
42
  export async function triageAgent(options: {
30
43
  agentName: string;
@@ -34,7 +47,7 @@ export async function triageAgent(options: {
34
47
  timeoutMs?: number;
35
48
  /** Overstory config for runtime resolution. */
36
49
  config?: OverstoryConfig;
37
- }): Promise<"retry" | "terminate" | "extend"> {
50
+ }): Promise<TriageResult> {
38
51
  const { agentName, root, lastActivity, timeoutMs, config } = options;
39
52
  const logsDir = join(root, ".overstory", "logs", agentName);
40
53
 
@@ -43,17 +56,20 @@ export async function triageAgent(options: {
43
56
  logContent = await readRecentLog(logsDir);
44
57
  } catch {
45
58
  // No logs available — assume long-running operation
46
- return "extend";
59
+ return { verdict: "extend", fallback: true, reason: "No logs available" };
47
60
  }
48
61
 
49
62
  const prompt = buildTriagePrompt(agentName, lastActivity, logContent);
50
63
 
51
64
  try {
52
65
  const response = await spawnClaude(prompt, timeoutMs, config);
53
- return classifyResponse(response);
66
+ return { verdict: classifyResponse(response), fallback: false };
54
67
  } catch {
55
68
  // Claude not available — default to extend (safe fallback)
56
- return "extend";
69
+ process.stderr.write(
70
+ `[watchdog] triage fallback for ${agentName}: Claude unavailable, defaulting to extend\n`,
71
+ );
72
+ return { verdict: "extend", fallback: true, reason: "Claude unavailable" };
57
73
  }
58
74
  }
59
75