@os-eco/overstory-cli 0.9.4 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -18
- package/agents/builder.md +9 -8
- package/agents/coordinator.md +6 -6
- package/agents/lead.md +98 -82
- package/agents/merger.md +25 -14
- package/agents/reviewer.md +22 -16
- package/agents/scout.md +17 -12
- package/package.json +6 -3
- package/src/agents/capabilities.test.ts +85 -0
- package/src/agents/capabilities.ts +125 -0
- package/src/agents/headless-mail-injector.test.ts +448 -0
- package/src/agents/headless-mail-injector.ts +211 -0
- package/src/agents/headless-prompt.test.ts +102 -0
- package/src/agents/headless-prompt.ts +68 -0
- package/src/agents/hooks-deployer.test.ts +514 -14
- package/src/agents/hooks-deployer.ts +141 -0
- package/src/agents/overlay.test.ts +4 -4
- package/src/agents/overlay.ts +30 -8
- package/src/agents/turn-lock.test.ts +181 -0
- package/src/agents/turn-lock.ts +235 -0
- package/src/agents/turn-runner-dispatch.test.ts +182 -0
- package/src/agents/turn-runner-dispatch.ts +105 -0
- package/src/agents/turn-runner.test.ts +1450 -0
- package/src/agents/turn-runner.ts +1166 -0
- package/src/commands/clean.ts +54 -0
- package/src/commands/coordinator.test.ts +127 -0
- package/src/commands/coordinator.ts +203 -5
- package/src/commands/dashboard.test.ts +188 -0
- package/src/commands/dashboard.ts +13 -3
- package/src/commands/doctor.ts +3 -1
- package/src/commands/group.test.ts +94 -0
- package/src/commands/group.ts +49 -20
- package/src/commands/init.test.ts +8 -0
- package/src/commands/init.ts +8 -1
- package/src/commands/log.test.ts +56 -11
- package/src/commands/log.ts +134 -69
- package/src/commands/mail.test.ts +162 -0
- package/src/commands/mail.ts +64 -9
- package/src/commands/merge.test.ts +112 -1
- package/src/commands/merge.ts +17 -4
- package/src/commands/nudge.test.ts +351 -4
- package/src/commands/nudge.ts +356 -34
- package/src/commands/run.test.ts +43 -7
- package/src/commands/serve/build.test.ts +202 -0
- package/src/commands/serve/build.ts +206 -0
- package/src/commands/serve/coordinator-actions.test.ts +339 -0
- package/src/commands/serve/coordinator-actions.ts +408 -0
- package/src/commands/serve/dev.test.ts +168 -0
- package/src/commands/serve/dev.ts +117 -0
- package/src/commands/serve/mail-actions.test.ts +312 -0
- package/src/commands/serve/mail-actions.ts +167 -0
- package/src/commands/serve/rest.test.ts +1323 -0
- package/src/commands/serve/rest.ts +708 -0
- package/src/commands/serve/static.ts +51 -0
- package/src/commands/serve/ws.test.ts +361 -0
- package/src/commands/serve/ws.ts +332 -0
- package/src/commands/serve.test.ts +459 -0
- package/src/commands/serve.ts +565 -0
- package/src/commands/sling.test.ts +73 -1
- package/src/commands/sling.ts +149 -64
- package/src/commands/status.test.ts +9 -0
- package/src/commands/status.ts +12 -4
- package/src/commands/stop.test.ts +174 -1
- package/src/commands/stop.ts +107 -8
- package/src/commands/watch.test.ts +43 -0
- package/src/commands/watch.ts +153 -28
- package/src/config.ts +23 -0
- package/src/doctor/consistency.test.ts +106 -0
- package/src/doctor/consistency.ts +48 -1
- package/src/doctor/serve.test.ts +95 -0
- package/src/doctor/serve.ts +86 -0
- package/src/doctor/types.ts +2 -1
- package/src/doctor/watchdog.ts +57 -1
- package/src/events/tailer.test.ts +234 -1
- package/src/events/tailer.ts +90 -0
- package/src/index.ts +53 -6
- package/src/json.ts +29 -0
- package/src/mail/client.ts +15 -2
- package/src/mail/store.test.ts +82 -0
- package/src/mail/store.ts +41 -4
- package/src/merge/lock.test.ts +149 -0
- package/src/merge/lock.ts +140 -0
- package/src/runtimes/__fixtures__/claude-stream-fixture.ts +22 -0
- package/src/runtimes/claude.test.ts +791 -1
- package/src/runtimes/claude.ts +323 -1
- package/src/runtimes/connections.test.ts +141 -1
- package/src/runtimes/connections.ts +73 -4
- package/src/runtimes/headless-connection.test.ts +264 -0
- package/src/runtimes/headless-connection.ts +158 -0
- package/src/runtimes/types.ts +10 -0
- package/src/schema-consistency.test.ts +1 -0
- package/src/sessions/store.test.ts +390 -24
- package/src/sessions/store.ts +184 -19
- package/src/test-setup.test.ts +31 -0
- package/src/test-setup.ts +28 -0
- package/src/types.ts +56 -1
- package/src/utils/pid.test.ts +85 -1
- package/src/utils/pid.ts +86 -1
- package/src/utils/process-scan.test.ts +53 -0
- package/src/utils/process-scan.ts +76 -0
- package/src/watchdog/daemon.test.ts +1520 -411
- package/src/watchdog/daemon.ts +442 -83
- package/src/watchdog/health.test.ts +157 -0
- package/src/watchdog/health.ts +92 -25
- package/src/worktree/process.test.ts +71 -0
- package/src/worktree/process.ts +25 -5
- package/src/worktree/tmux.test.ts +3 -0
- package/src/worktree/tmux.ts +10 -3
- package/templates/CLAUDE.md.tmpl +19 -8
- package/templates/overlay.md.tmpl +3 -2
|
@@ -212,7 +212,9 @@ export async function checkConsistency(
|
|
|
212
212
|
|
|
213
213
|
// 8. Check for SessionStore entries with missing tmux sessions
|
|
214
214
|
const existingTmuxNames = new Set(tmuxSessions.map((s) => s.name));
|
|
215
|
-
const missingTmux = liveSessions.filter(
|
|
215
|
+
const missingTmux = liveSessions.filter(
|
|
216
|
+
(s) => s.tmuxSession.length > 0 && !existingTmuxNames.has(s.tmuxSession),
|
|
217
|
+
);
|
|
216
218
|
|
|
217
219
|
if (missingTmux.length > 0) {
|
|
218
220
|
checks.push({
|
|
@@ -232,6 +234,51 @@ export async function checkConsistency(
|
|
|
232
234
|
});
|
|
233
235
|
}
|
|
234
236
|
|
|
237
|
+
// 8b. Check for orphaned claude spawn PIDs (overstory-505d).
|
|
238
|
+
//
|
|
239
|
+
// An orphan is a session whose pid is still alive but should not be:
|
|
240
|
+
// - the session reached a terminal state (completed/zombie) yet the
|
|
241
|
+
// spawn didn't exit, or
|
|
242
|
+
// - the tmux container is gone but the claude child survived (was
|
|
243
|
+
// reparented to init when its bash wrapper got SIGHUP).
|
|
244
|
+
// Run `ov clean --all` to reap. Distinct from `dead-pids` (the inverse:
|
|
245
|
+
// session is live but its pid already died).
|
|
246
|
+
const orphanedSpawns: Array<{ session: AgentSession; reason: string }> = [];
|
|
247
|
+
for (const s of storeSessions) {
|
|
248
|
+
if (s.pid === null || !isProcessAliveFn(s.pid)) continue;
|
|
249
|
+
if (s.state === "completed" || s.state === "zombie") {
|
|
250
|
+
orphanedSpawns.push({
|
|
251
|
+
session: s,
|
|
252
|
+
reason: `state=${s.state} but pid ${s.pid} still alive`,
|
|
253
|
+
});
|
|
254
|
+
continue;
|
|
255
|
+
}
|
|
256
|
+
if (s.tmuxSession.length > 0 && !existingTmuxNames.has(s.tmuxSession)) {
|
|
257
|
+
orphanedSpawns.push({
|
|
258
|
+
session: s,
|
|
259
|
+
reason: `tmux session "${s.tmuxSession}" missing but pid ${s.pid} alive`,
|
|
260
|
+
});
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
if (orphanedSpawns.length > 0) {
|
|
265
|
+
checks.push({
|
|
266
|
+
name: "orphan-spawns",
|
|
267
|
+
category: "consistency",
|
|
268
|
+
status: "warn",
|
|
269
|
+
message: `Found ${orphanedSpawns.length} orphaned spawn process(es) — run "ov clean --all" to reap`,
|
|
270
|
+
details: orphanedSpawns.map(({ session, reason }) => `${session.agentName}: ${reason}`),
|
|
271
|
+
fixable: true,
|
|
272
|
+
});
|
|
273
|
+
} else {
|
|
274
|
+
checks.push({
|
|
275
|
+
name: "orphan-spawns",
|
|
276
|
+
category: "consistency",
|
|
277
|
+
status: "pass",
|
|
278
|
+
message: "No orphaned spawn processes detected",
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
|
|
235
282
|
// 9. Check reviewer-to-builder ratio per lead
|
|
236
283
|
const parentGroups = new Map<string, { builders: number; reviewers: number }>();
|
|
237
284
|
for (const session of storeSessions) {
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
+
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import type { OverstoryConfig } from "../types.ts";
|
|
6
|
+
import { checkServe } from "./serve.ts";
|
|
7
|
+
|
|
8
|
+
describe("checkServe", () => {
|
|
9
|
+
let tempDir: string;
|
|
10
|
+
let mockConfig: OverstoryConfig;
|
|
11
|
+
|
|
12
|
+
beforeEach(() => {
|
|
13
|
+
tempDir = mkdtempSync(join(tmpdir(), "overstory-serve-doctor-test-"));
|
|
14
|
+
mockConfig = {
|
|
15
|
+
project: { name: "test", root: tempDir, canonicalBranch: "main" },
|
|
16
|
+
agents: {
|
|
17
|
+
manifestPath: "",
|
|
18
|
+
baseDir: "",
|
|
19
|
+
maxConcurrent: 5,
|
|
20
|
+
staggerDelayMs: 100,
|
|
21
|
+
maxDepth: 2,
|
|
22
|
+
maxSessionsPerRun: 0,
|
|
23
|
+
maxAgentsPerLead: 5,
|
|
24
|
+
},
|
|
25
|
+
worktrees: { baseDir: "" },
|
|
26
|
+
taskTracker: { backend: "auto", enabled: true },
|
|
27
|
+
mulch: { enabled: true, domains: [], primeFormat: "markdown" },
|
|
28
|
+
merge: { aiResolveEnabled: false, reimagineEnabled: false },
|
|
29
|
+
providers: {
|
|
30
|
+
anthropic: { type: "native" },
|
|
31
|
+
},
|
|
32
|
+
watchdog: {
|
|
33
|
+
tier0Enabled: false,
|
|
34
|
+
tier0IntervalMs: 30000,
|
|
35
|
+
tier1Enabled: false,
|
|
36
|
+
tier2Enabled: false,
|
|
37
|
+
staleThresholdMs: 300000,
|
|
38
|
+
zombieThresholdMs: 600000,
|
|
39
|
+
nudgeIntervalMs: 60000,
|
|
40
|
+
},
|
|
41
|
+
models: {},
|
|
42
|
+
logging: { verbose: false, redactSecrets: true },
|
|
43
|
+
};
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
afterEach(() => {
|
|
47
|
+
rmSync(tempDir, { recursive: true, force: true });
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
test("ui/dist missing — returns warn about missing build", async () => {
|
|
51
|
+
const checks = await checkServe(mockConfig, tempDir);
|
|
52
|
+
const distCheck = checks.find((c) => c.name === "serve ui/dist");
|
|
53
|
+
|
|
54
|
+
expect(distCheck).toBeDefined();
|
|
55
|
+
expect(distCheck?.status).toBe("warn");
|
|
56
|
+
expect(distCheck?.message).toContain("ui/dist not found");
|
|
57
|
+
expect(distCheck?.details?.some((d) => d.includes("ui/dist"))).toBe(true);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
test("ui/dist exists but index.html missing — returns warn about incomplete build", async () => {
|
|
61
|
+
mkdirSync(join(tempDir, "ui", "dist"), { recursive: true });
|
|
62
|
+
const checks = await checkServe(mockConfig, tempDir);
|
|
63
|
+
const distCheck = checks.find((c) => c.name === "serve ui/dist");
|
|
64
|
+
|
|
65
|
+
expect(distCheck).toBeDefined();
|
|
66
|
+
expect(distCheck?.status).toBe("warn");
|
|
67
|
+
expect(distCheck?.message).toContain("index.html is missing");
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test("ui/dist with index.html — returns pass", async () => {
|
|
71
|
+
mkdirSync(join(tempDir, "ui", "dist"), { recursive: true });
|
|
72
|
+
writeFileSync(join(tempDir, "ui", "dist", "index.html"), "<html></html>");
|
|
73
|
+
const checks = await checkServe(mockConfig, tempDir);
|
|
74
|
+
const distCheck = checks.find((c) => c.name === "serve ui/dist");
|
|
75
|
+
|
|
76
|
+
expect(distCheck).toBeDefined();
|
|
77
|
+
expect(distCheck?.status).toBe("pass");
|
|
78
|
+
expect(distCheck?.message).toContain("index.html");
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
test("port check included in results", async () => {
|
|
82
|
+
const checks = await checkServe(mockConfig, tempDir);
|
|
83
|
+
const portCheck = checks.find((c) => c.name === "serve port");
|
|
84
|
+
|
|
85
|
+
expect(portCheck).toBeDefined();
|
|
86
|
+
// Server not running — should warn (or pass if something happens to be on the default port)
|
|
87
|
+
expect(portCheck?.status === "warn" || portCheck?.status === "pass").toBe(true);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
test("returns exactly 2 checks (ui/dist + port)", async () => {
|
|
91
|
+
const checks = await checkServe(mockConfig, tempDir);
|
|
92
|
+
expect(checks).toHaveLength(2);
|
|
93
|
+
expect(checks.map((c) => c.category).every((cat) => cat === "serve")).toBe(true);
|
|
94
|
+
});
|
|
95
|
+
});
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { existsSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { DEFAULT_SERVE_PORT } from "../commands/serve.ts";
|
|
4
|
+
import type { DoctorCheck, DoctorCheckFn } from "./types.ts";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* ov serve subsystem health checks.
|
|
8
|
+
* Validates ui/dist build output and port reachability.
|
|
9
|
+
*/
|
|
10
|
+
export const checkServe: DoctorCheckFn = async (config, _overstoryDir): Promise<DoctorCheck[]> => {
|
|
11
|
+
const checks: DoctorCheck[] = [];
|
|
12
|
+
|
|
13
|
+
// Check 1: ui/dist directory exists (only relevant if a UI has been built)
|
|
14
|
+
const uiDistPath = join(config.project.root, "ui", "dist");
|
|
15
|
+
const uiDistExists = existsSync(uiDistPath);
|
|
16
|
+
const indexHtmlExists = uiDistExists && existsSync(join(uiDistPath, "index.html"));
|
|
17
|
+
|
|
18
|
+
if (!uiDistExists) {
|
|
19
|
+
checks.push({
|
|
20
|
+
name: "serve ui/dist",
|
|
21
|
+
category: "serve",
|
|
22
|
+
status: "warn",
|
|
23
|
+
message: "ui/dist not found — run the UI build before starting ov serve",
|
|
24
|
+
details: [`Expected: ${uiDistPath}`],
|
|
25
|
+
});
|
|
26
|
+
} else if (!indexHtmlExists) {
|
|
27
|
+
checks.push({
|
|
28
|
+
name: "serve ui/dist",
|
|
29
|
+
category: "serve",
|
|
30
|
+
status: "warn",
|
|
31
|
+
message: "ui/dist exists but index.html is missing — UI build may be incomplete",
|
|
32
|
+
details: [`Expected: ${join(uiDistPath, "index.html")}`],
|
|
33
|
+
});
|
|
34
|
+
} else {
|
|
35
|
+
checks.push({
|
|
36
|
+
name: "serve ui/dist",
|
|
37
|
+
category: "serve",
|
|
38
|
+
status: "pass",
|
|
39
|
+
message: "ui/dist is present with index.html",
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Check 2: default port reachability (non-blocking probe)
|
|
44
|
+
const port = DEFAULT_SERVE_PORT;
|
|
45
|
+
const host = "127.0.0.1";
|
|
46
|
+
const reachable = await probePort(host, port);
|
|
47
|
+
if (reachable) {
|
|
48
|
+
checks.push({
|
|
49
|
+
name: "serve port",
|
|
50
|
+
category: "serve",
|
|
51
|
+
status: "pass",
|
|
52
|
+
message: `ov serve is reachable on ${host}:${port}`,
|
|
53
|
+
});
|
|
54
|
+
} else {
|
|
55
|
+
checks.push({
|
|
56
|
+
name: "serve port",
|
|
57
|
+
category: "serve",
|
|
58
|
+
status: "warn",
|
|
59
|
+
message: `ov serve is not running on ${host}:${port}`,
|
|
60
|
+
details: [`Start with: ov serve --port ${port}`],
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return checks;
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Probe whether a TCP port is open by attempting an HTTP connection.
|
|
69
|
+
* Returns true if the server responds, false on any error.
|
|
70
|
+
*/
|
|
71
|
+
async function probePort(host: string, port: number): Promise<boolean> {
|
|
72
|
+
try {
|
|
73
|
+
const controller = new AbortController();
|
|
74
|
+
const timeout = setTimeout(() => controller.abort(), 1000);
|
|
75
|
+
try {
|
|
76
|
+
const res = await fetch(`http://${host}:${port}/healthz`, {
|
|
77
|
+
signal: controller.signal,
|
|
78
|
+
});
|
|
79
|
+
return res.ok || res.status < 500;
|
|
80
|
+
} finally {
|
|
81
|
+
clearTimeout(timeout);
|
|
82
|
+
}
|
|
83
|
+
} catch {
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
}
|
package/src/doctor/types.ts
CHANGED
package/src/doctor/watchdog.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { stat, unlink } from "node:fs/promises";
|
|
|
3
3
|
import { join } from "node:path";
|
|
4
4
|
import { getRuntime } from "../runtimes/registry.ts";
|
|
5
5
|
import { openSessionStore } from "../sessions/compat.ts";
|
|
6
|
+
import { findRunningWatchdogProcesses } from "../utils/process-scan.ts";
|
|
6
7
|
import { isProcessRunning } from "../watchdog/health.ts";
|
|
7
8
|
import type { DoctorCheck, DoctorCheckFn } from "./types.ts";
|
|
8
9
|
|
|
@@ -134,7 +135,62 @@ export const checkWatchdog: DoctorCheckFn = async (
|
|
|
134
135
|
}
|
|
135
136
|
}
|
|
136
137
|
|
|
137
|
-
// Check 6:
|
|
138
|
+
// Check 6: multi-daemon detection (overstory-8ef6).
|
|
139
|
+
// Earlier releases had no exclusion lock, so multiple `ov watch` daemons
|
|
140
|
+
// could run simultaneously. We scan the process table for `ov watch`
|
|
141
|
+
// processes and flag any case with more than one. This is observational —
|
|
142
|
+
// even with the lock now in place, a corrupted/missing PID file could
|
|
143
|
+
// still let a foreign daemon slip past, and we want doctor to catch it.
|
|
144
|
+
try {
|
|
145
|
+
const watchProcs = await findRunningWatchdogProcesses();
|
|
146
|
+
if (watchProcs.length > 1) {
|
|
147
|
+
const lockOwner = existsSync(pidFilePath)
|
|
148
|
+
? Number.parseInt((await Bun.file(pidFilePath).text()).trim(), 10)
|
|
149
|
+
: Number.NaN;
|
|
150
|
+
const lockOwnerLabel = Number.isFinite(lockOwner) ? `${lockOwner}` : "(none)";
|
|
151
|
+
const pidList = watchProcs.map((p) => p.pid).join(", ");
|
|
152
|
+
checks.push({
|
|
153
|
+
name: "watchdog multi-daemon",
|
|
154
|
+
category: "watchdog",
|
|
155
|
+
status: "fail",
|
|
156
|
+
message: `${watchProcs.length} 'ov watch' daemons running concurrently — only one should be live`,
|
|
157
|
+
details: [
|
|
158
|
+
`Live PIDs: ${pidList}`,
|
|
159
|
+
`PID-file owner: ${lockOwnerLabel}`,
|
|
160
|
+
"Run 'ov watch --kill-others' to terminate the foreign daemons.",
|
|
161
|
+
],
|
|
162
|
+
fixable: true,
|
|
163
|
+
fix: async () => {
|
|
164
|
+
const ownerPid = Number.isFinite(lockOwner) ? lockOwner : null;
|
|
165
|
+
const messages: string[] = [];
|
|
166
|
+
for (const proc of watchProcs) {
|
|
167
|
+
if (proc.pid === ownerPid) continue;
|
|
168
|
+
try {
|
|
169
|
+
process.kill(proc.pid, "SIGTERM");
|
|
170
|
+
messages.push(`Killed foreign watchdog PID ${proc.pid}`);
|
|
171
|
+
} catch {
|
|
172
|
+
messages.push(`PID ${proc.pid} already gone`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
if (messages.length === 0) {
|
|
176
|
+
messages.push("No foreign watchdogs to kill — fix is a no-op");
|
|
177
|
+
}
|
|
178
|
+
return messages;
|
|
179
|
+
},
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
} catch {
|
|
183
|
+
// Process scan failure is non-fatal — leave a soft warning instead of
|
|
184
|
+
// failing the whole doctor run.
|
|
185
|
+
checks.push({
|
|
186
|
+
name: "watchdog multi-daemon",
|
|
187
|
+
category: "watchdog",
|
|
188
|
+
status: "warn",
|
|
189
|
+
message: "Could not scan process table for foreign 'ov watch' daemons",
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Check 7: Tier 1 triage available if tier1Enabled
|
|
138
194
|
if (config.watchdog.tier1Enabled) {
|
|
139
195
|
try {
|
|
140
196
|
getRuntime(config?.runtime?.printCommand ?? config?.runtime?.default, config);
|
|
@@ -13,8 +13,9 @@ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
|
13
13
|
import { mkdir, mkdtemp, writeFile } from "node:fs/promises";
|
|
14
14
|
import { tmpdir } from "node:os";
|
|
15
15
|
import { join } from "node:path";
|
|
16
|
+
import { createSessionStore, type SessionStore } from "../sessions/store.ts";
|
|
16
17
|
import { cleanupTempDir } from "../test-helpers.ts";
|
|
17
|
-
import type { EventStore } from "../types.ts";
|
|
18
|
+
import type { AgentSession, EventStore } from "../types.ts";
|
|
18
19
|
import { createEventStore } from "./store.ts";
|
|
19
20
|
import type { TailerHandle, TailerOptions } from "./tailer.ts";
|
|
20
21
|
import { findLatestStdoutLog, startEventTailer } from "./tailer.ts";
|
|
@@ -484,3 +485,235 @@ describe("daemon tailer integration", () => {
|
|
|
484
485
|
await cleanupTempDir(tmpDir);
|
|
485
486
|
});
|
|
486
487
|
});
|
|
488
|
+
|
|
489
|
+
// === session_id capture (overstory-7b8c Phase 1) ===
|
|
490
|
+
|
|
491
|
+
describe("startEventTailer session_id capture", () => {
|
|
492
|
+
let tmpDir: string;
|
|
493
|
+
let eventStore: EventStore;
|
|
494
|
+
let eventsDbPath: string;
|
|
495
|
+
let sessionStore: SessionStore;
|
|
496
|
+
let sessionsDbPath: string;
|
|
497
|
+
|
|
498
|
+
function makeSession(agentName: string): AgentSession {
|
|
499
|
+
const now = new Date().toISOString();
|
|
500
|
+
return {
|
|
501
|
+
id: `id-${agentName}`,
|
|
502
|
+
agentName,
|
|
503
|
+
capability: "builder",
|
|
504
|
+
worktreePath: "/tmp/wt",
|
|
505
|
+
branchName: "test-branch",
|
|
506
|
+
taskId: "task-1",
|
|
507
|
+
tmuxSession: "",
|
|
508
|
+
state: "working",
|
|
509
|
+
pid: 12345,
|
|
510
|
+
parentAgent: null,
|
|
511
|
+
depth: 0,
|
|
512
|
+
runId: null,
|
|
513
|
+
startedAt: now,
|
|
514
|
+
lastActivity: now,
|
|
515
|
+
escalationLevel: 0,
|
|
516
|
+
stalledSince: null,
|
|
517
|
+
transcriptPath: null,
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
beforeEach(async () => {
|
|
522
|
+
tmpDir = await createTempDir();
|
|
523
|
+
eventsDbPath = join(tmpDir, "events.db");
|
|
524
|
+
eventStore = createEventStore(eventsDbPath);
|
|
525
|
+
sessionsDbPath = join(tmpDir, "sessions.db");
|
|
526
|
+
sessionStore = createSessionStore(sessionsDbPath);
|
|
527
|
+
});
|
|
528
|
+
|
|
529
|
+
afterEach(async () => {
|
|
530
|
+
eventStore.close();
|
|
531
|
+
sessionStore.close();
|
|
532
|
+
await cleanupTempDir(tmpDir);
|
|
533
|
+
});
|
|
534
|
+
|
|
535
|
+
test("parses system event session_id and calls updateClaudeSessionId once", async () => {
|
|
536
|
+
const agentName = "agent-sid-1";
|
|
537
|
+
sessionStore.upsert(makeSession(agentName));
|
|
538
|
+
const logPath = await createAgentLogDir(tmpDir, agentName);
|
|
539
|
+
|
|
540
|
+
const sysLine = JSON.stringify({
|
|
541
|
+
type: "system",
|
|
542
|
+
subtype: "init",
|
|
543
|
+
session_id: "sess-first-pin",
|
|
544
|
+
timestamp: new Date().toISOString(),
|
|
545
|
+
});
|
|
546
|
+
await writeFile(logPath, `${sysLine}\n`);
|
|
547
|
+
|
|
548
|
+
const handle = startEventTailer({
|
|
549
|
+
stdoutLogPath: logPath,
|
|
550
|
+
agentName,
|
|
551
|
+
runId: null,
|
|
552
|
+
eventsDbPath,
|
|
553
|
+
pollIntervalMs: 50,
|
|
554
|
+
_eventStore: eventStore,
|
|
555
|
+
_sessionStore: sessionStore,
|
|
556
|
+
});
|
|
557
|
+
|
|
558
|
+
try {
|
|
559
|
+
await waitFor(() => sessionStore.getByName(agentName)?.claudeSessionId === "sess-first-pin");
|
|
560
|
+
expect(sessionStore.getByName(agentName)?.claudeSessionId).toBe("sess-first-pin");
|
|
561
|
+
} finally {
|
|
562
|
+
handle.stop();
|
|
563
|
+
}
|
|
564
|
+
});
|
|
565
|
+
|
|
566
|
+
test("ignores subsequent system events with the same session_id (single-fire)", async () => {
|
|
567
|
+
const agentName = "agent-sid-2";
|
|
568
|
+
sessionStore.upsert(makeSession(agentName));
|
|
569
|
+
const logPath = await createAgentLogDir(tmpDir, agentName);
|
|
570
|
+
|
|
571
|
+
// Three system events all carrying the same session_id.
|
|
572
|
+
const lines = [
|
|
573
|
+
JSON.stringify({
|
|
574
|
+
type: "system",
|
|
575
|
+
subtype: "init",
|
|
576
|
+
session_id: "sess-stable",
|
|
577
|
+
timestamp: new Date().toISOString(),
|
|
578
|
+
}),
|
|
579
|
+
JSON.stringify({
|
|
580
|
+
type: "system",
|
|
581
|
+
subtype: "ping",
|
|
582
|
+
session_id: "sess-stable",
|
|
583
|
+
timestamp: new Date().toISOString(),
|
|
584
|
+
}),
|
|
585
|
+
JSON.stringify({
|
|
586
|
+
type: "system",
|
|
587
|
+
subtype: "ping",
|
|
588
|
+
session_id: "sess-stable",
|
|
589
|
+
timestamp: new Date().toISOString(),
|
|
590
|
+
}),
|
|
591
|
+
].join("\n");
|
|
592
|
+
await writeFile(logPath, `${lines}\n`);
|
|
593
|
+
|
|
594
|
+
// Wrap the SessionStore so we can count update calls without altering behaviour.
|
|
595
|
+
let updateCalls = 0;
|
|
596
|
+
const proxy: SessionStore = {
|
|
597
|
+
...sessionStore,
|
|
598
|
+
upsert: (s) => sessionStore.upsert(s),
|
|
599
|
+
getByName: (n) => sessionStore.getByName(n),
|
|
600
|
+
getActive: () => sessionStore.getActive(),
|
|
601
|
+
getAll: () => sessionStore.getAll(),
|
|
602
|
+
count: () => sessionStore.count(),
|
|
603
|
+
getByRun: (r) => sessionStore.getByRun(r),
|
|
604
|
+
updateState: (n, s) => sessionStore.updateState(n, s),
|
|
605
|
+
updateLastActivity: (n) => sessionStore.updateLastActivity(n),
|
|
606
|
+
updateEscalation: (n, l, s) => sessionStore.updateEscalation(n, l, s),
|
|
607
|
+
updateTranscriptPath: (n, p) => sessionStore.updateTranscriptPath(n, p),
|
|
608
|
+
updateClaudeSessionId: (n, s) => {
|
|
609
|
+
updateCalls++;
|
|
610
|
+
sessionStore.updateClaudeSessionId(n, s);
|
|
611
|
+
},
|
|
612
|
+
remove: (n) => sessionStore.remove(n),
|
|
613
|
+
purge: (o) => sessionStore.purge(o),
|
|
614
|
+
close: () => {
|
|
615
|
+
/* owned by outer test */
|
|
616
|
+
},
|
|
617
|
+
};
|
|
618
|
+
|
|
619
|
+
const handle = startEventTailer({
|
|
620
|
+
stdoutLogPath: logPath,
|
|
621
|
+
agentName,
|
|
622
|
+
runId: null,
|
|
623
|
+
eventsDbPath,
|
|
624
|
+
pollIntervalMs: 50,
|
|
625
|
+
_eventStore: eventStore,
|
|
626
|
+
_sessionStore: proxy,
|
|
627
|
+
});
|
|
628
|
+
|
|
629
|
+
try {
|
|
630
|
+
// Wait until events.db has all three lines processed.
|
|
631
|
+
await waitFor(() => eventStore.getByAgent(agentName).length >= 3);
|
|
632
|
+
// Allow extra poll cycles to confirm no late updates sneak in.
|
|
633
|
+
await new Promise((resolve) => setTimeout(resolve, 150));
|
|
634
|
+
expect(updateCalls).toBe(1);
|
|
635
|
+
expect(sessionStore.getByName(agentName)?.claudeSessionId).toBe("sess-stable");
|
|
636
|
+
} finally {
|
|
637
|
+
handle.stop();
|
|
638
|
+
}
|
|
639
|
+
});
|
|
640
|
+
|
|
641
|
+
test("detects resume mismatch and invokes _onResumeMismatch DI hook (observed wins)", async () => {
|
|
642
|
+
const agentName = "agent-sid-3";
|
|
643
|
+
const session = makeSession(agentName);
|
|
644
|
+
session.claudeSessionId = "sess-requested-OLD";
|
|
645
|
+
sessionStore.upsert(session);
|
|
646
|
+
const logPath = await createAgentLogDir(tmpDir, agentName);
|
|
647
|
+
|
|
648
|
+
const sysLine = JSON.stringify({
|
|
649
|
+
type: "system",
|
|
650
|
+
subtype: "init",
|
|
651
|
+
session_id: "sess-observed-NEW",
|
|
652
|
+
timestamp: new Date().toISOString(),
|
|
653
|
+
});
|
|
654
|
+
await writeFile(logPath, `${sysLine}\n`);
|
|
655
|
+
|
|
656
|
+
const mismatches: Array<{ agent: string; requested: string; observed: string }> = [];
|
|
657
|
+
const handle = startEventTailer({
|
|
658
|
+
stdoutLogPath: logPath,
|
|
659
|
+
agentName,
|
|
660
|
+
runId: null,
|
|
661
|
+
eventsDbPath,
|
|
662
|
+
pollIntervalMs: 50,
|
|
663
|
+
_eventStore: eventStore,
|
|
664
|
+
_sessionStore: sessionStore,
|
|
665
|
+
_onResumeMismatch: (agent, requested, observed) =>
|
|
666
|
+
mismatches.push({ agent, requested, observed }),
|
|
667
|
+
});
|
|
668
|
+
|
|
669
|
+
try {
|
|
670
|
+
await waitFor(
|
|
671
|
+
() => sessionStore.getByName(agentName)?.claudeSessionId === "sess-observed-NEW",
|
|
672
|
+
);
|
|
673
|
+
expect(mismatches).toHaveLength(1);
|
|
674
|
+
expect(mismatches[0]).toEqual({
|
|
675
|
+
agent: agentName,
|
|
676
|
+
requested: "sess-requested-OLD",
|
|
677
|
+
observed: "sess-observed-NEW",
|
|
678
|
+
});
|
|
679
|
+
// observed wins — SessionStore is overwritten with the new id.
|
|
680
|
+
expect(sessionStore.getByName(agentName)?.claudeSessionId).toBe("sess-observed-NEW");
|
|
681
|
+
} finally {
|
|
682
|
+
handle.stop();
|
|
683
|
+
}
|
|
684
|
+
});
|
|
685
|
+
|
|
686
|
+
test("backward compat: tailer with no sessionsDbPath performs no SessionStore writes", async () => {
|
|
687
|
+
const agentName = "agent-sid-4";
|
|
688
|
+
sessionStore.upsert(makeSession(agentName));
|
|
689
|
+
const logPath = await createAgentLogDir(tmpDir, agentName);
|
|
690
|
+
|
|
691
|
+
const sysLine = JSON.stringify({
|
|
692
|
+
type: "system",
|
|
693
|
+
subtype: "init",
|
|
694
|
+
session_id: "sess-should-not-pin",
|
|
695
|
+
timestamp: new Date().toISOString(),
|
|
696
|
+
});
|
|
697
|
+
await writeFile(logPath, `${sysLine}\n`);
|
|
698
|
+
|
|
699
|
+
// No sessionsDbPath, no _sessionStore — tailer must still process events.
|
|
700
|
+
const handle = startEventTailer({
|
|
701
|
+
stdoutLogPath: logPath,
|
|
702
|
+
agentName,
|
|
703
|
+
runId: null,
|
|
704
|
+
eventsDbPath,
|
|
705
|
+
pollIntervalMs: 50,
|
|
706
|
+
_eventStore: eventStore,
|
|
707
|
+
});
|
|
708
|
+
|
|
709
|
+
try {
|
|
710
|
+
await waitFor(() => eventStore.getByAgent(agentName).length >= 1);
|
|
711
|
+
// Give the tailer extra time to confirm no late writes occur.
|
|
712
|
+
await new Promise((resolve) => setTimeout(resolve, 150));
|
|
713
|
+
// SessionStore must remain untouched.
|
|
714
|
+
expect(sessionStore.getByName(agentName)?.claudeSessionId ?? null).toBeNull();
|
|
715
|
+
} finally {
|
|
716
|
+
handle.stop();
|
|
717
|
+
}
|
|
718
|
+
});
|
|
719
|
+
});
|
package/src/events/tailer.ts
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import { readdir } from "node:fs/promises";
|
|
16
16
|
import { join } from "node:path";
|
|
17
|
+
import { createSessionStore, type SessionStore } from "../sessions/store.ts";
|
|
17
18
|
import type { EventStore, EventType } from "../types.ts";
|
|
18
19
|
import { createEventStore } from "./store.ts";
|
|
19
20
|
|
|
@@ -66,10 +67,26 @@ export interface TailerOptions {
|
|
|
66
67
|
runId: string | null;
|
|
67
68
|
/** Absolute path to events.db. The tailer opens its own connection. */
|
|
68
69
|
eventsDbPath: string;
|
|
70
|
+
/**
|
|
71
|
+
* Absolute path to sessions.db. When present and not equal to ":memory:",
|
|
72
|
+
* the tailer opens a dedicated SessionStore to persist the runtime-provided
|
|
73
|
+
* session_id (e.g. Claude stream-json `session_id`). Omit (or set to
|
|
74
|
+
* ":memory:") for tailers that should not write to SessionStore.
|
|
75
|
+
*/
|
|
76
|
+
sessionsDbPath?: string;
|
|
69
77
|
/** Poll interval in milliseconds (default: 500). */
|
|
70
78
|
pollIntervalMs?: number;
|
|
71
79
|
/** DI: injected EventStore for testing (overrides eventsDbPath). */
|
|
72
80
|
_eventStore?: EventStore;
|
|
81
|
+
/** DI: injected SessionStore for testing (overrides sessionsDbPath). */
|
|
82
|
+
_sessionStore?: SessionStore;
|
|
83
|
+
/**
|
|
84
|
+
* DI: invoked exactly once per tailer when an observed session_id differs
|
|
85
|
+
* from the prior claudeSessionId stored in SessionStore. Receives the agent
|
|
86
|
+
* name, the prior (requested) id, and the newly observed id. Production
|
|
87
|
+
* code logs a warning to stderr instead.
|
|
88
|
+
*/
|
|
89
|
+
_onResumeMismatch?: (agentName: string, requested: string, observed: string) => void;
|
|
73
90
|
}
|
|
74
91
|
|
|
75
92
|
/**
|
|
@@ -109,6 +126,28 @@ export function startEventTailer(opts: TailerOptions): TailerHandle {
|
|
|
109
126
|
}
|
|
110
127
|
}
|
|
111
128
|
|
|
129
|
+
// Open a dedicated SessionStore for this tailer's lifetime when a real
|
|
130
|
+
// sessionsDbPath is provided. Tailers that omit sessionsDbPath (or pass
|
|
131
|
+
// ":memory:") skip session_id persistence entirely — backward compat for
|
|
132
|
+
// callers that don't yet route through the watchdog wiring.
|
|
133
|
+
let sessionStore: SessionStore | null = opts._sessionStore ?? null;
|
|
134
|
+
let ownedSessionStore = false;
|
|
135
|
+
if (!sessionStore && opts.sessionsDbPath && opts.sessionsDbPath !== ":memory:") {
|
|
136
|
+
try {
|
|
137
|
+
sessionStore = createSessionStore(opts.sessionsDbPath);
|
|
138
|
+
ownedSessionStore = true;
|
|
139
|
+
} catch {
|
|
140
|
+
// SessionStore failure is non-fatal — events still flow.
|
|
141
|
+
sessionStore = null;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Single-fire guard for session_id pinning. Mirrors claude.ts:312
|
|
146
|
+
// `sessionIdPinned` so that updateClaudeSessionId is called at most once
|
|
147
|
+
// per tailer lifetime, even if many system events stream by.
|
|
148
|
+
let sessionIdPinned = false;
|
|
149
|
+
const onResumeMismatch = opts._onResumeMismatch;
|
|
150
|
+
|
|
112
151
|
let stopped = false;
|
|
113
152
|
let byteOffset = 0;
|
|
114
153
|
let timer: ReturnType<typeof setTimeout> | null = null;
|
|
@@ -154,6 +193,48 @@ export function startEventTailer(opts: TailerOptions): TailerHandle {
|
|
|
154
193
|
|
|
155
194
|
const toolDurationMs = typeof event.duration_ms === "number" ? event.duration_ms : null;
|
|
156
195
|
|
|
196
|
+
// Extract session_id from stream-json system events (e.g. Claude Code
|
|
197
|
+
// emits `{type:"system", subtype:"init", session_id:"sess-..."}` on
|
|
198
|
+
// every spawn — including --resume spawns, which assign a fresh id).
|
|
199
|
+
// The "result" event also carries session_id; treat both as authoritative.
|
|
200
|
+
// Single-fire per tailer lifetime so we don't churn writes.
|
|
201
|
+
if (!sessionIdPinned && sessionStore !== null) {
|
|
202
|
+
const sid =
|
|
203
|
+
typeof event.session_id === "string" && event.session_id.length > 0
|
|
204
|
+
? event.session_id
|
|
205
|
+
: null;
|
|
206
|
+
if (sid !== null && (type === "system" || type === "result")) {
|
|
207
|
+
sessionIdPinned = true;
|
|
208
|
+
let prior: string | null = null;
|
|
209
|
+
try {
|
|
210
|
+
prior = sessionStore.getByName(agentName)?.claudeSessionId ?? null;
|
|
211
|
+
} catch {
|
|
212
|
+
prior = null;
|
|
213
|
+
}
|
|
214
|
+
try {
|
|
215
|
+
sessionStore.updateClaudeSessionId(agentName, sid);
|
|
216
|
+
} catch {
|
|
217
|
+
// Non-fatal: SessionStore write failure must not break tailing.
|
|
218
|
+
}
|
|
219
|
+
// Resume mismatch: requested != observed. The observed id wins
|
|
220
|
+
// (claude assigns fresh ids on --resume), but operators need to
|
|
221
|
+
// know — log a warning, and call DI hook for tests.
|
|
222
|
+
if (prior !== null && prior !== sid) {
|
|
223
|
+
if (onResumeMismatch) {
|
|
224
|
+
try {
|
|
225
|
+
onResumeMismatch(agentName, prior, sid);
|
|
226
|
+
} catch {
|
|
227
|
+
// DI hook errors must not crash the tailer.
|
|
228
|
+
}
|
|
229
|
+
} else {
|
|
230
|
+
process.stderr.write(
|
|
231
|
+
`[tailer] resume mismatch for ${agentName}: requested=${prior} observed=${sid}\n`,
|
|
232
|
+
);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
157
238
|
try {
|
|
158
239
|
eventStore?.insert({
|
|
159
240
|
runId,
|
|
@@ -201,6 +282,15 @@ export function startEventTailer(opts: TailerOptions): TailerHandle {
|
|
|
201
282
|
}
|
|
202
283
|
eventStore = null;
|
|
203
284
|
}
|
|
285
|
+
// Close only the SessionStore this tailer owns.
|
|
286
|
+
if (ownedSessionStore && sessionStore) {
|
|
287
|
+
try {
|
|
288
|
+
sessionStore.close();
|
|
289
|
+
} catch {
|
|
290
|
+
// Non-fatal.
|
|
291
|
+
}
|
|
292
|
+
sessionStore = null;
|
|
293
|
+
}
|
|
204
294
|
},
|
|
205
295
|
};
|
|
206
296
|
}
|