@os-eco/overstory-cli 0.7.0 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/README.md +7 -6
  2. package/agents/builder.md +1 -1
  3. package/agents/coordinator.md +12 -11
  4. package/agents/lead.md +6 -6
  5. package/agents/monitor.md +4 -4
  6. package/agents/reviewer.md +1 -1
  7. package/agents/scout.md +5 -5
  8. package/agents/supervisor.md +36 -32
  9. package/package.json +1 -1
  10. package/src/agents/guard-rules.ts +97 -0
  11. package/src/agents/hooks-deployer.test.ts +6 -5
  12. package/src/agents/hooks-deployer.ts +7 -90
  13. package/src/agents/identity.test.ts +3 -2
  14. package/src/agents/manifest.test.ts +4 -3
  15. package/src/agents/overlay.test.ts +10 -9
  16. package/src/agents/overlay.ts +5 -5
  17. package/src/commands/agents.test.ts +10 -4
  18. package/src/commands/clean.test.ts +3 -0
  19. package/src/commands/completions.test.ts +8 -5
  20. package/src/commands/completions.ts +38 -2
  21. package/src/commands/coordinator.test.ts +1 -0
  22. package/src/commands/coordinator.ts +15 -11
  23. package/src/commands/costs.test.ts +9 -3
  24. package/src/commands/dashboard.test.ts +265 -6
  25. package/src/commands/dashboard.ts +367 -64
  26. package/src/commands/doctor.test.ts +3 -2
  27. package/src/commands/errors.test.ts +3 -2
  28. package/src/commands/feed.test.ts +3 -2
  29. package/src/commands/feed.ts +2 -29
  30. package/src/commands/init.test.ts +1 -2
  31. package/src/commands/init.ts +1 -8
  32. package/src/commands/inspect.test.ts +17 -2
  33. package/src/commands/log.test.ts +262 -8
  34. package/src/commands/log.ts +232 -110
  35. package/src/commands/logs.test.ts +3 -2
  36. package/src/commands/mail.test.ts +8 -2
  37. package/src/commands/metrics.test.ts +4 -3
  38. package/src/commands/monitor.ts +15 -11
  39. package/src/commands/nudge.test.ts +4 -2
  40. package/src/commands/prime.test.ts +4 -2
  41. package/src/commands/prime.ts +6 -2
  42. package/src/commands/replay.test.ts +3 -2
  43. package/src/commands/run.test.ts +3 -1
  44. package/src/commands/sling.test.ts +142 -1
  45. package/src/commands/sling.ts +145 -24
  46. package/src/commands/status.test.ts +9 -8
  47. package/src/commands/stop.test.ts +1 -0
  48. package/src/commands/supervisor.ts +19 -12
  49. package/src/commands/trace.test.ts +4 -2
  50. package/src/commands/watch.test.ts +3 -2
  51. package/src/commands/worktree.test.ts +9 -0
  52. package/src/config.test.ts +3 -3
  53. package/src/config.ts +29 -0
  54. package/src/doctor/agents.test.ts +3 -2
  55. package/src/doctor/consistency.test.ts +14 -0
  56. package/src/doctor/logs.test.ts +3 -2
  57. package/src/doctor/structure.test.ts +3 -2
  58. package/src/e2e/init-sling-lifecycle.test.ts +3 -5
  59. package/src/index.ts +3 -1
  60. package/src/logging/color.ts +1 -1
  61. package/src/logging/format.test.ts +110 -0
  62. package/src/logging/format.ts +42 -1
  63. package/src/logging/logger.test.ts +3 -2
  64. package/src/mail/broadcast.test.ts +1 -0
  65. package/src/mail/client.test.ts +3 -2
  66. package/src/mail/store.test.ts +3 -2
  67. package/src/merge/queue.test.ts +3 -2
  68. package/src/merge/resolver.test.ts +39 -0
  69. package/src/merge/resolver.ts +24 -5
  70. package/src/mulch/client.test.ts +63 -2
  71. package/src/mulch/client.ts +62 -1
  72. package/src/runtimes/claude.test.ts +5 -4
  73. package/src/runtimes/pi-guards.test.ts +457 -0
  74. package/src/runtimes/pi-guards.ts +349 -0
  75. package/src/runtimes/pi.test.ts +620 -0
  76. package/src/runtimes/pi.ts +244 -0
  77. package/src/runtimes/registry.test.ts +33 -0
  78. package/src/runtimes/registry.ts +15 -2
  79. package/src/runtimes/types.ts +63 -0
  80. package/src/schema-consistency.test.ts +5 -2
  81. package/src/sessions/compat.test.ts +3 -2
  82. package/src/sessions/compat.ts +1 -0
  83. package/src/sessions/store.test.ts +34 -2
  84. package/src/sessions/store.ts +37 -4
  85. package/src/test-helpers.ts +20 -1
  86. package/src/types.ts +17 -0
  87. package/src/watchdog/daemon.test.ts +11 -7
  88. package/src/watchdog/daemon.ts +1 -1
  89. package/src/watchdog/health.test.ts +1 -0
  90. package/src/watchdog/triage.test.ts +3 -2
  91. package/src/watchdog/triage.ts +14 -4
package/src/types.ts CHANGED
@@ -22,6 +22,14 @@ export interface ResolvedModel {
22
22
  env?: Record<string, string>;
23
23
  }
24
24
 
25
+ /** Configuration for the Pi runtime's model alias expansion. */
26
+ export interface PiRuntimeConfig {
27
+ /** Provider prefix for unqualified model aliases (e.g., "anthropic", "amazon-bedrock"). */
28
+ provider: string;
29
+ /** Maps short aliases (e.g., "opus") to provider-qualified model IDs. */
30
+ modelMap: Record<string, string>;
31
+ }
32
+
25
33
  // === Task Tracker ===
26
34
 
27
35
  /** Backend for the task tracker. Defined here for use in OverstoryConfig. */
@@ -89,6 +97,14 @@ export interface OverstoryConfig {
89
97
  runtime?: {
90
98
  /** Default runtime adapter name (default: "claude"). */
91
99
  default: string;
100
+ /**
101
+ * Runtime adapter for headless one-shot AI calls (--print mode).
102
+ * Used by merge/resolver.ts and watchdog/triage.ts.
103
+ * Falls back to runtime.default when omitted.
104
+ */
105
+ printCommand?: string;
106
+ /** Pi runtime configuration for model alias expansion. */
107
+ pi?: PiRuntimeConfig;
92
108
  };
93
109
  }
94
110
 
@@ -145,6 +161,7 @@ export interface AgentSession {
145
161
  lastActivity: string;
146
162
  escalationLevel: number; // Progressive nudge stage: 0=warn, 1=nudge, 2=escalate, 3=terminate
147
163
  stalledSince: string | null; // ISO timestamp when agent first entered stalled state
164
+ transcriptPath: string | null; // Runtime-provided transcript JSONL path (decoupled from ~/.claude/)
148
165
  }
149
166
 
150
167
  // === Agent Identity ===
@@ -15,11 +15,12 @@
15
15
  */
16
16
 
17
17
  import { afterEach, beforeEach, describe, expect, test } from "bun:test";
18
- import { mkdir, mkdtemp, rm } from "node:fs/promises";
18
+ import { mkdir, mkdtemp } from "node:fs/promises";
19
19
  import { tmpdir } from "node:os";
20
20
  import { join } from "node:path";
21
21
  import { createEventStore } from "../events/store.ts";
22
22
  import { createSessionStore } from "../sessions/store.ts";
23
+ import { cleanupTempDir } from "../test-helpers.ts";
23
24
  import type { AgentSession, HealthCheck, StoredEvent } from "../types.ts";
24
25
  import { buildCompletionMessage, runDaemonTick } from "./daemon.ts";
25
26
 
@@ -75,6 +76,7 @@ function makeSession(overrides: Partial<AgentSession> = {}): AgentSession {
75
76
  runId: null,
76
77
  escalationLevel: 0,
77
78
  stalledSince: null,
79
+ transcriptPath: null,
78
80
  startedAt: new Date().toISOString(),
79
81
  lastActivity: new Date().toISOString(),
80
82
  ...overrides,
@@ -162,7 +164,7 @@ beforeEach(async () => {
162
164
  });
163
165
 
164
166
  afterEach(async () => {
165
- await rm(tempRoot, { recursive: true, force: true });
167
+ await cleanupTempDir(tempRoot);
166
168
  });
167
169
 
168
170
  describe("daemon tick", () => {
@@ -804,6 +806,7 @@ describe("daemon tick", () => {
804
806
  pid: process.pid,
805
807
  escalationLevel: 0,
806
808
  stalledSince: null,
809
+ transcriptPath: null,
807
810
  });
808
811
 
809
812
  writeSessionsToStore(tempRoot, [session]);
@@ -1098,7 +1101,7 @@ describe("daemon mulch failure recording", () => {
1098
1101
  });
1099
1102
 
1100
1103
  afterEach(async () => {
1101
- await rm(tempRoot, { recursive: true, force: true });
1104
+ await cleanupTempDir(tempRoot);
1102
1105
  });
1103
1106
 
1104
1107
  /** Track calls to the recordFailure mock. */
@@ -1389,7 +1392,7 @@ describe("run completion detection", () => {
1389
1392
  expect(coordinatorNudges).toHaveLength(1);
1390
1393
  // The test creates builders, so the message should be builder-specific
1391
1394
  expect(coordinatorNudges[0]?.message).toContain("builder");
1392
- expect(coordinatorNudges[0]?.message).toContain("merge/cleanup");
1395
+ expect(coordinatorNudges[0]?.message).toContain("Awaiting lead verification");
1393
1396
  });
1394
1397
 
1395
1398
  test("does not nudge when some workers still active", async () => {
@@ -1581,7 +1584,7 @@ describe("run completion detection", () => {
1581
1584
  expect(coordinatorNudges).toHaveLength(1);
1582
1585
  // The test creates builders, so the message should be builder-specific
1583
1586
  expect(coordinatorNudges[0]?.message).toContain("builder");
1584
- expect(coordinatorNudges[0]?.message).toContain("merge/cleanup");
1587
+ expect(coordinatorNudges[0]?.message).toContain("Awaiting lead verification");
1585
1588
  });
1586
1589
 
1587
1590
  test("does not nudge when no worker sessions in run", async () => {
@@ -1916,14 +1919,15 @@ describe("buildCompletionMessage", () => {
1916
1919
  expect(msg).not.toContain("merge/cleanup");
1917
1920
  });
1918
1921
 
1919
- test("all builders → contains 'builder' and 'Ready for merge/cleanup'", () => {
1922
+ test("all builders → contains 'builder' and 'Awaiting lead verification' (not merge authorization)", () => {
1920
1923
  const sessions = [
1921
1924
  makeSession({ capability: "builder", agentName: "builder-1" }),
1922
1925
  makeSession({ capability: "builder", agentName: "builder-2" }),
1923
1926
  ];
1924
1927
  const msg = buildCompletionMessage(sessions, testRunId);
1925
1928
  expect(msg).toContain("builder");
1926
- expect(msg).toContain("Ready for merge/cleanup");
1929
+ expect(msg).toContain("Awaiting lead verification");
1930
+ expect(msg).not.toContain("merge/cleanup");
1927
1931
  });
1928
1932
 
1929
1933
  test("all reviewers → contains 'reviewer' and 'Reviews done'", () => {
@@ -148,7 +148,7 @@ export function buildCompletionMessage(
148
148
  return `[WATCHDOG] All ${count} scout(s) in run ${runId} have completed. Ready for next phase.`;
149
149
  }
150
150
  if (capabilities.has("builder")) {
151
- return `[WATCHDOG] All ${count} builder(s) in run ${runId} have completed. Ready for merge/cleanup.`;
151
+ return `[WATCHDOG] All ${count} builder(s) in run ${runId} have completed. Awaiting lead verification.`;
152
152
  }
153
153
  if (capabilities.has("reviewer")) {
154
154
  return `[WATCHDOG] All ${count} reviewer(s) in run ${runId} have completed. Reviews done.`;
@@ -47,6 +47,7 @@ function makeSession(overrides: Partial<AgentSession> = {}): AgentSession {
47
47
  lastActivity: new Date().toISOString(),
48
48
  escalationLevel: 0,
49
49
  stalledSince: null,
50
+ transcriptPath: null,
50
51
  ...overrides,
51
52
  };
52
53
  }
@@ -7,9 +7,10 @@
7
7
  */
8
8
 
9
9
  import { afterEach, beforeEach, describe, expect, test } from "bun:test";
10
- import { mkdir, mkdtemp, rm } from "node:fs/promises";
10
+ import { mkdir, mkdtemp } from "node:fs/promises";
11
11
  import { tmpdir } from "node:os";
12
12
  import { join } from "node:path";
13
+ import { cleanupTempDir } from "../test-helpers.ts";
13
14
  import { buildTriagePrompt, classifyResponse, triageAgent } from "./triage.ts";
14
15
 
15
16
  describe("classifyResponse", () => {
@@ -97,7 +98,7 @@ describe("triageAgent", () => {
97
98
  });
98
99
 
99
100
  afterEach(async () => {
100
- await rm(tempRoot, { recursive: true, force: true });
101
+ await cleanupTempDir(tempRoot);
101
102
  });
102
103
 
103
104
  test("returns 'extend' when no logs directory exists", async () => {
@@ -9,6 +9,8 @@
9
9
  import { readdir } from "node:fs/promises";
10
10
  import { join } from "node:path";
11
11
  import { AgentError } from "../errors.ts";
12
+ import { getRuntime } from "../runtimes/registry.ts";
13
+ import type { OverstoryConfig } from "../types.ts";
12
14
 
13
15
  /**
14
16
  * Triage a stalled agent by analyzing its recent log output with Claude.
@@ -30,8 +32,10 @@ export async function triageAgent(options: {
30
32
  lastActivity: string;
31
33
  /** Timeout in ms for the Claude subprocess. Defaults to 30_000 (30s). */
32
34
  timeoutMs?: number;
35
+ /** Overstory config for runtime resolution. */
36
+ config?: OverstoryConfig;
33
37
  }): Promise<"retry" | "terminate" | "extend"> {
34
- const { agentName, root, lastActivity, timeoutMs } = options;
38
+ const { agentName, root, lastActivity, timeoutMs, config } = options;
35
39
  const logsDir = join(root, ".overstory", "logs", agentName);
36
40
 
37
41
  let logContent: string;
@@ -45,7 +49,7 @@ export async function triageAgent(options: {
45
49
  const prompt = buildTriagePrompt(agentName, lastActivity, logContent);
46
50
 
47
51
  try {
48
- const response = await spawnClaude(prompt, timeoutMs);
52
+ const response = await spawnClaude(prompt, timeoutMs, config);
49
53
  return classifyResponse(response);
50
54
  } catch {
51
55
  // Claude not available — default to extend (safe fallback)
@@ -130,10 +134,16 @@ const DEFAULT_TRIAGE_TIMEOUT_MS = 30_000;
130
134
  * @returns Claude's response text
131
135
  * @throws Error if claude is not installed, the process fails, or the timeout is reached
132
136
  */
133
- async function spawnClaude(prompt: string, timeoutMs?: number): Promise<string> {
137
+ async function spawnClaude(
138
+ prompt: string,
139
+ timeoutMs?: number,
140
+ config?: OverstoryConfig,
141
+ ): Promise<string> {
134
142
  const timeout = timeoutMs ?? DEFAULT_TRIAGE_TIMEOUT_MS;
135
143
 
136
- const proc = Bun.spawn(["claude", "--print", "-p", prompt], {
144
+ const runtime = getRuntime(config?.runtime?.printCommand ?? config?.runtime?.default, config);
145
+ const argv = runtime.buildPrintCommand(prompt);
146
+ const proc = Bun.spawn(argv, {
137
147
  stdout: "pipe",
138
148
  stderr: "pipe",
139
149
  });