@os-eco/overstory-cli 0.10.3 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/agents/builder.md +10 -1
- package/agents/lead.md +106 -5
- package/package.json +1 -1
- package/src/agents/headless-mail-injector.ts +8 -0
- package/src/agents/mail-poll-detect.test.ts +153 -0
- package/src/agents/mail-poll-detect.ts +73 -0
- package/src/agents/overlay.test.ts +56 -0
- package/src/agents/overlay.ts +33 -0
- package/src/agents/scope-detect.test.ts +190 -0
- package/src/agents/scope-detect.ts +146 -0
- package/src/agents/turn-runner.test.ts +862 -0
- package/src/agents/turn-runner.ts +225 -8
- package/src/commands/agents.ts +9 -0
- package/src/commands/coordinator.test.ts +127 -0
- package/src/commands/coordinator.ts +71 -4
- package/src/commands/dashboard.ts +1 -1
- package/src/commands/log.test.ts +131 -0
- package/src/commands/log.ts +37 -2
- package/src/commands/merge.test.ts +118 -0
- package/src/commands/merge.ts +51 -8
- package/src/commands/sling.test.ts +104 -0
- package/src/commands/sling.ts +95 -8
- package/src/commands/stop.test.ts +81 -0
- package/src/index.ts +5 -1
- package/src/insights/quality-gates.test.ts +141 -0
- package/src/insights/quality-gates.ts +156 -0
- package/src/logging/theme.ts +4 -0
- package/src/merge/predict.test.ts +387 -0
- package/src/merge/predict.ts +249 -0
- package/src/merge/resolver.ts +1 -1
- package/src/mulch/client.ts +3 -3
- package/src/sessions/store.test.ts +267 -5
- package/src/sessions/store.ts +105 -7
- package/src/types.ts +51 -1
- package/src/watchdog/daemon.test.ts +124 -2
- package/src/watchdog/daemon.ts +27 -12
- package/src/watchdog/health.test.ts +133 -8
- package/src/watchdog/health.ts +37 -5
- package/src/worktree/manager.test.ts +218 -1
- package/src/worktree/manager.ts +55 -0
- package/src/worktree/tmux.test.ts +25 -0
- package/src/worktree/tmux.ts +17 -0
- package/templates/overlay.md.tmpl +2 -0
package/src/commands/sling.ts
CHANGED
|
@@ -158,6 +158,27 @@ export interface SlingOptions {
|
|
|
158
158
|
profile?: string;
|
|
159
159
|
headless?: boolean;
|
|
160
160
|
recover?: boolean;
|
|
161
|
+
/**
|
|
162
|
+
* Comma-separated list of sibling agent names dispatched in parallel that
|
|
163
|
+
* may share file scope with this agent (overstory-f76a). Plumbed through
|
|
164
|
+
* to `OverlayConfig.siblings` so the overlay renders rebase-before-merge_ready
|
|
165
|
+
* guidance.
|
|
166
|
+
*/
|
|
167
|
+
siblings?: string;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Parse the `--siblings <names>` argument into a normalized string array.
|
|
172
|
+
* Trims whitespace, drops empty entries. Empty / undefined input → `[]`.
|
|
173
|
+
*
|
|
174
|
+
* Exported for unit-testing.
|
|
175
|
+
*/
|
|
176
|
+
export function parseSiblings(raw: string | undefined): string[] {
|
|
177
|
+
if (!raw) return [];
|
|
178
|
+
return raw
|
|
179
|
+
.split(",")
|
|
180
|
+
.map((s) => s.trim())
|
|
181
|
+
.filter((s) => s.length > 0);
|
|
161
182
|
}
|
|
162
183
|
|
|
163
184
|
const WORKABLE_STATUSES = ["open", "in_progress"] as const;
|
|
@@ -173,12 +194,51 @@ export function isTaskWorkable(status: string, recover: boolean): boolean {
|
|
|
173
194
|
return (WORKABLE_STATUSES as readonly string[]).includes(status);
|
|
174
195
|
}
|
|
175
196
|
|
|
197
|
+
/**
|
|
198
|
+
* Resolve the effective `parentAgent` for a sling invocation, preserving the
|
|
199
|
+
* prior session's link on a re-spawn (`--recover`) when `--parent` was not
|
|
200
|
+
* explicitly passed.
|
|
201
|
+
*
|
|
202
|
+
* Pre-fix, sling always read `opts.parent ?? null` and upserted that into the
|
|
203
|
+
* session row, overwriting the prior `parent_agent` with null whenever a
|
|
204
|
+
* coordinator/lead invoked `ov sling --recover --name <existing>` without
|
|
205
|
+
* threading `--parent`. The runner then read `parentAgent === null` and
|
|
206
|
+
* skipped its in-band `worker_died` notify on a resumed-turn parser stall —
|
|
207
|
+
* the lead waited forever on a signal that never came (overstory-de3c).
|
|
208
|
+
*
|
|
209
|
+
* Resolution rules:
|
|
210
|
+
* - **Explicit caller intent wins.** If `opts.parent` is defined (including
|
|
211
|
+
* an empty string), use it verbatim. The caller may legitimately want to
|
|
212
|
+
* change or clear the parent on re-spawn.
|
|
213
|
+
* - **Caller silence preserves linkage.** If `opts.parent` is undefined and
|
|
214
|
+
* a prior session row exists with a non-null `parentAgent`, fall back to
|
|
215
|
+
* the prior value. Otherwise return null.
|
|
216
|
+
*
|
|
217
|
+
* Pure function so the regression test in `sling.test.ts` can assert behavior
|
|
218
|
+
* without spinning up the full sling command pipeline.
|
|
219
|
+
*/
|
|
220
|
+
export function resolveParentAgent(
|
|
221
|
+
optsParent: string | undefined,
|
|
222
|
+
existingSession: { parentAgent: string | null } | null,
|
|
223
|
+
): string | null {
|
|
224
|
+
if (optsParent !== undefined) {
|
|
225
|
+
return optsParent;
|
|
226
|
+
}
|
|
227
|
+
return existingSession?.parentAgent ?? null;
|
|
228
|
+
}
|
|
229
|
+
|
|
176
230
|
export interface AutoDispatchOptions {
|
|
177
231
|
agentName: string;
|
|
178
232
|
taskId: string;
|
|
179
233
|
capability: string;
|
|
180
234
|
specPath: string | null;
|
|
181
235
|
parentAgent: string | null;
|
|
236
|
+
/**
|
|
237
|
+
* The agent who invoked `ov sling` (from `OVERSTORY_AGENT_NAME` env var);
|
|
238
|
+
* takes precedence over `parentAgent` for the mail `from` field, since
|
|
239
|
+
* `--parent` describes the new agent's hierarchical parent, not the slinger.
|
|
240
|
+
*/
|
|
241
|
+
slingerName: string | null;
|
|
182
242
|
instructionPath: string;
|
|
183
243
|
}
|
|
184
244
|
|
|
@@ -195,7 +255,7 @@ export function buildAutoDispatch(opts: AutoDispatchOptions): {
|
|
|
195
255
|
subject: string;
|
|
196
256
|
body: string;
|
|
197
257
|
} {
|
|
198
|
-
const from = opts.parentAgent ?? "orchestrator";
|
|
258
|
+
const from = opts.slingerName ?? opts.parentAgent ?? "orchestrator";
|
|
199
259
|
const specLine = opts.specPath
|
|
200
260
|
? `Spec file: ${opts.specPath}`
|
|
201
261
|
: "No spec file provided. Check your overlay for task details.";
|
|
@@ -537,7 +597,9 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
537
597
|
let name = nameWasAutoGenerated ? `${capability}-${taskId}` : rawName;
|
|
538
598
|
const specPath = opts.spec ?? null;
|
|
539
599
|
const filesRaw = opts.files;
|
|
540
|
-
|
|
600
|
+
// Reassigned later when re-spawning an existing agent to preserve the prior
|
|
601
|
+
// row's parentAgent — see overstory-de3c at the existingSession lookup below.
|
|
602
|
+
let parentAgent = opts.parent ?? null;
|
|
541
603
|
const depthStr = opts.depth;
|
|
542
604
|
const depth = depthStr !== undefined ? Number.parseInt(depthStr, 10) : 0;
|
|
543
605
|
const forceHierarchy = opts.forceHierarchy ?? false;
|
|
@@ -614,6 +676,8 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
614
676
|
.filter((f) => f.length > 0)
|
|
615
677
|
: [];
|
|
616
678
|
|
|
679
|
+
const siblings = parseSiblings(opts.siblings);
|
|
680
|
+
|
|
617
681
|
// 1. Load config
|
|
618
682
|
const cwd = process.cwd();
|
|
619
683
|
const config = await loadConfig(cwd);
|
|
@@ -715,18 +779,35 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
715
779
|
);
|
|
716
780
|
}
|
|
717
781
|
|
|
782
|
+
// Track the prior session row when re-spawning against an existing agent
|
|
783
|
+
// name so downstream code can preserve linkage (parentAgent, claudeSessionId)
|
|
784
|
+
// that the upsert would otherwise erase. Auto-generated names are unique
|
|
785
|
+
// so there is never a prior row to preserve.
|
|
786
|
+
let existingSession: AgentSession | null = null;
|
|
718
787
|
if (nameWasAutoGenerated) {
|
|
719
788
|
const takenNames = activeSessions.map((s) => s.agentName);
|
|
720
789
|
name = generateAgentName(capability, taskId, takenNames);
|
|
721
790
|
} else {
|
|
722
|
-
|
|
723
|
-
if (
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
791
|
+
existingSession = store.getByName(name);
|
|
792
|
+
if (
|
|
793
|
+
existingSession &&
|
|
794
|
+
existingSession.state !== "zombie" &&
|
|
795
|
+
existingSession.state !== "completed"
|
|
796
|
+
) {
|
|
797
|
+
throw new AgentError(
|
|
798
|
+
`Agent name "${name}" is already in use (state: ${existingSession.state})`,
|
|
799
|
+
{
|
|
800
|
+
agentName: name,
|
|
801
|
+
},
|
|
802
|
+
);
|
|
727
803
|
}
|
|
728
804
|
}
|
|
729
805
|
|
|
806
|
+
// Preserve the prior session's parentAgent on re-spawn when --parent was
|
|
807
|
+
// not explicitly passed (overstory-de3c). See `resolveParentAgent` for the
|
|
808
|
+
// full rationale and resolution rules.
|
|
809
|
+
parentAgent = resolveParentAgent(opts.parent, existingSession);
|
|
810
|
+
|
|
730
811
|
// 5d. Task-level locking: prevent concurrent agents on the same task ID.
|
|
731
812
|
// Exception: the parent agent may delegate its own task to a child.
|
|
732
813
|
const lockHolder = checkTaskLock(activeSessions, taskId);
|
|
@@ -897,6 +978,7 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
897
978
|
trackerCli: trackerCliName(resolvedBackend),
|
|
898
979
|
trackerName: resolvedBackend,
|
|
899
980
|
instructionPath: runtime.instructionPath,
|
|
981
|
+
siblings,
|
|
900
982
|
};
|
|
901
983
|
|
|
902
984
|
await writeOverlay(worktreePath, overlayConfig, config.project.root, runtime.instructionPath);
|
|
@@ -922,12 +1004,14 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
922
1004
|
|
|
923
1005
|
// 9b. Send auto-dispatch mail so it exists when SessionStart hook fires.
|
|
924
1006
|
// This eliminates the race where coordinator sends dispatch AFTER agent boots.
|
|
1007
|
+
const slingerName = process.env.OVERSTORY_AGENT_NAME?.trim() || null;
|
|
925
1008
|
const dispatch = buildAutoDispatch({
|
|
926
1009
|
agentName: name,
|
|
927
1010
|
taskId,
|
|
928
1011
|
capability,
|
|
929
1012
|
specPath: absoluteSpecPath,
|
|
930
1013
|
parentAgent,
|
|
1014
|
+
slingerName,
|
|
931
1015
|
instructionPath: runtime.instructionPath,
|
|
932
1016
|
});
|
|
933
1017
|
const mailStore = createMailStore(join(overstoryDir, "mail.db"));
|
|
@@ -996,7 +1080,10 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
996
1080
|
// (or "completed" if terminal mail observed), and exits. No persistent
|
|
997
1081
|
// process remains after this returns; subsequent turns are driven by
|
|
998
1082
|
// `ov serve` (mail) or `ov nudge`.
|
|
999
|
-
|
|
1083
|
+
// `existingSession` was captured during the name-collision check (above).
|
|
1084
|
+
// Re-using it here keeps re-spawn linkage (parentAgent + claudeSessionId)
|
|
1085
|
+
// resolved from the same row.
|
|
1086
|
+
const priorClaudeSessionId = existingSession?.claudeSessionId ?? null;
|
|
1000
1087
|
|
|
1001
1088
|
// Build the initial prompt (mulch expertise + pending mail + beacon)
|
|
1002
1089
|
// as the first user turn.
|
|
@@ -634,6 +634,87 @@ describe("stopCommand stop behavior", () => {
|
|
|
634
634
|
);
|
|
635
635
|
});
|
|
636
636
|
|
|
637
|
+
test("lead falls back to historical subject when mail store cannot be opened (overstory-7291)", async () => {
|
|
638
|
+
const session = makeAgentSession({
|
|
639
|
+
agentName: "lead-zeta",
|
|
640
|
+
capability: "lead",
|
|
641
|
+
state: "working",
|
|
642
|
+
tmuxSession: "overstory-lead-zeta",
|
|
643
|
+
});
|
|
644
|
+
saveSessionsToDb([session]);
|
|
645
|
+
|
|
646
|
+
// Make mail.db un-openable by creating a directory at that path. SQLite
|
|
647
|
+
// cannot open a directory as a database, so createMailStore() throws and
|
|
648
|
+
// buildLeadCompletedSubject hits its outer-catch fallback.
|
|
649
|
+
const mailDbPath = join(overstoryDir, "mail.db");
|
|
650
|
+
await mkdir(mailDbPath, { recursive: true });
|
|
651
|
+
|
|
652
|
+
const { deps } = makeDeps({ [session.tmuxSession]: true });
|
|
653
|
+
await stopCommand("lead-zeta", {}, deps);
|
|
654
|
+
|
|
655
|
+
const markerPath = join(overstoryDir, "pending-nudges", "coordinator.json");
|
|
656
|
+
const marker = JSON.parse(await Bun.file(markerPath).text());
|
|
657
|
+
expect(marker.subject).toBe(
|
|
658
|
+
"Lead lead-zeta completed — check mail for merge_ready/worker_done",
|
|
659
|
+
);
|
|
660
|
+
});
|
|
661
|
+
|
|
662
|
+
test("lead with malformed merge_ready payload skips that message (overstory-7291)", async () => {
|
|
663
|
+
const session = makeAgentSession({
|
|
664
|
+
agentName: "lead-eta",
|
|
665
|
+
capability: "lead",
|
|
666
|
+
state: "working",
|
|
667
|
+
tmuxSession: "overstory-lead-eta",
|
|
668
|
+
});
|
|
669
|
+
saveSessionsToDb([session]);
|
|
670
|
+
|
|
671
|
+
// Insert two merge_ready rows directly via the store: one with a valid
|
|
672
|
+
// MergeReadyPayload, one with a non-JSON payload string. sendProtocol
|
|
673
|
+
// would JSON.stringify any payload, so it cannot produce a malformed
|
|
674
|
+
// row — the low-level store accepts the payload column verbatim. The
|
|
675
|
+
// loop must skip the malformed one (inner catch + continue) and use
|
|
676
|
+
// the valid one, yielding the single-branch subject variant.
|
|
677
|
+
const mailStore = createMailStore(join(overstoryDir, "mail.db"));
|
|
678
|
+
const validPayload: MergeReadyPayload = {
|
|
679
|
+
branch: "overstory/lead-eta/bead-99",
|
|
680
|
+
taskId: "bead-99",
|
|
681
|
+
agentName: "lead-eta",
|
|
682
|
+
filesModified: ["src/x.ts"],
|
|
683
|
+
};
|
|
684
|
+
mailStore.insert({
|
|
685
|
+
id: "msg-valid",
|
|
686
|
+
from: "lead-eta",
|
|
687
|
+
to: "coordinator",
|
|
688
|
+
subject: "merge_ready: bead-99",
|
|
689
|
+
body: "ready",
|
|
690
|
+
type: "merge_ready",
|
|
691
|
+
priority: "normal",
|
|
692
|
+
threadId: null,
|
|
693
|
+
payload: JSON.stringify(validPayload),
|
|
694
|
+
});
|
|
695
|
+
mailStore.insert({
|
|
696
|
+
id: "msg-malformed",
|
|
697
|
+
from: "lead-eta",
|
|
698
|
+
to: "coordinator",
|
|
699
|
+
subject: "merge_ready: broken",
|
|
700
|
+
body: "ready",
|
|
701
|
+
type: "merge_ready",
|
|
702
|
+
priority: "normal",
|
|
703
|
+
threadId: null,
|
|
704
|
+
payload: "not-json{",
|
|
705
|
+
});
|
|
706
|
+
mailStore.close();
|
|
707
|
+
|
|
708
|
+
const { deps } = makeDeps({ [session.tmuxSession]: true });
|
|
709
|
+
await stopCommand("lead-eta", {}, deps);
|
|
710
|
+
|
|
711
|
+
const markerPath = join(overstoryDir, "pending-nudges", "coordinator.json");
|
|
712
|
+
const marker = JSON.parse(await Bun.file(markerPath).text());
|
|
713
|
+
expect(marker.subject).toBe(
|
|
714
|
+
"Lead lead-eta sent merge_ready for branch overstory/lead-eta/bead-99",
|
|
715
|
+
);
|
|
716
|
+
});
|
|
717
|
+
|
|
637
718
|
test("stopping a non-lead agent does NOT write lead_completed pending-nudge", async () => {
|
|
638
719
|
const session = makeAgentSession({ state: "working", capability: "builder" });
|
|
639
720
|
saveSessionsToDb([session]);
|
package/src/index.ts
CHANGED
|
@@ -52,7 +52,7 @@ import { ConfigError, OverstoryError, WorktreeError } from "./errors.ts";
|
|
|
52
52
|
import { jsonError } from "./json.ts";
|
|
53
53
|
import { brand, chalk, muted, setQuiet } from "./logging/color.ts";
|
|
54
54
|
|
|
55
|
-
export const VERSION = "0.
|
|
55
|
+
export const VERSION = "0.11.0";
|
|
56
56
|
|
|
57
57
|
const rawArgs = process.argv.slice(2);
|
|
58
58
|
|
|
@@ -287,6 +287,10 @@ program
|
|
|
287
287
|
.option("--name <name>", "Unique agent name (auto-generated if omitted)")
|
|
288
288
|
.option("--spec <path>", "Path to task spec file")
|
|
289
289
|
.option("--files <list>", "Exclusive file scope (comma-separated)")
|
|
290
|
+
.option(
|
|
291
|
+
"--siblings <names>",
|
|
292
|
+
"Comma-separated names of parallel sibling agents that may share file scope. Renders rebase-before-merge_ready guidance into the overlay.",
|
|
293
|
+
)
|
|
290
294
|
.option("--parent <agent>", "Parent agent for hierarchy tracking")
|
|
291
295
|
.option("--depth <n>", "Current hierarchy depth", "0")
|
|
292
296
|
.option("--skip-scout", "Skip scout phase for lead agents")
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
+
import { mkdtemp } from "node:fs/promises";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import {
|
|
6
|
+
cleanupTempDir,
|
|
7
|
+
commitFile,
|
|
8
|
+
createTempGitRepo,
|
|
9
|
+
getDefaultBranch,
|
|
10
|
+
} from "../test-helpers.ts";
|
|
11
|
+
import type { QualityGate } from "../types.ts";
|
|
12
|
+
import { hasWorkToVerify, runQualityGates } from "./quality-gates.ts";
|
|
13
|
+
|
|
14
|
+
describe("runQualityGates", () => {
|
|
15
|
+
let tempDir: string;
|
|
16
|
+
|
|
17
|
+
beforeEach(async () => {
|
|
18
|
+
tempDir = await mkdtemp(join(tmpdir(), "qg-test-"));
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
afterEach(async () => {
|
|
22
|
+
await cleanupTempDir(tempDir);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
test("returns null when gates list is empty", async () => {
|
|
26
|
+
const result = await runQualityGates([], tempDir);
|
|
27
|
+
expect(result).toBeNull();
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("status is 'success' when all gates exit 0", async () => {
|
|
31
|
+
const gates: QualityGate[] = [
|
|
32
|
+
{ name: "True", command: "true", description: "always passes" },
|
|
33
|
+
{ name: "Echo", command: "echo ok", description: "always passes" },
|
|
34
|
+
];
|
|
35
|
+
const result = await runQualityGates(gates, tempDir);
|
|
36
|
+
expect(result).not.toBeNull();
|
|
37
|
+
expect(result?.status).toBe("success");
|
|
38
|
+
expect(result?.results).toHaveLength(2);
|
|
39
|
+
expect(result?.results.every((r) => r.passed)).toBe(true);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
test("status is 'failure' when no gates exit 0", async () => {
|
|
43
|
+
const gates: QualityGate[] = [
|
|
44
|
+
{ name: "False1", command: "false", description: "always fails" },
|
|
45
|
+
{ name: "False2", command: "false", description: "always fails" },
|
|
46
|
+
];
|
|
47
|
+
const result = await runQualityGates(gates, tempDir);
|
|
48
|
+
expect(result).not.toBeNull();
|
|
49
|
+
expect(result?.status).toBe("failure");
|
|
50
|
+
expect(result?.results.every((r) => !r.passed)).toBe(true);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test("status is 'partial' on mixed exit codes", async () => {
|
|
54
|
+
const gates: QualityGate[] = [
|
|
55
|
+
{ name: "Pass", command: "true", description: "passes" },
|
|
56
|
+
{ name: "Fail", command: "false", description: "fails" },
|
|
57
|
+
];
|
|
58
|
+
const result = await runQualityGates(gates, tempDir);
|
|
59
|
+
expect(result).not.toBeNull();
|
|
60
|
+
expect(result?.status).toBe("partial");
|
|
61
|
+
expect(result?.results.filter((r) => r.passed)).toHaveLength(1);
|
|
62
|
+
expect(result?.results.filter((r) => !r.passed)).toHaveLength(1);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test("a gate that hangs past the timeout is treated as failed", async () => {
|
|
66
|
+
const gates: QualityGate[] = [
|
|
67
|
+
{ name: "Sleeper", command: "sleep 5", description: "intentionally slow" },
|
|
68
|
+
];
|
|
69
|
+
const result = await runQualityGates(gates, tempDir, { timeoutMs: 200 });
|
|
70
|
+
expect(result).not.toBeNull();
|
|
71
|
+
expect(result?.status).toBe("failure");
|
|
72
|
+
expect(result?.results[0]?.passed).toBe(false);
|
|
73
|
+
expect(result?.results[0]?.exitCode).toBe(-1);
|
|
74
|
+
// Should return well before the 5s the gate would otherwise take
|
|
75
|
+
expect(result?.totalDurationMs).toBeLessThan(2_000);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
test("captures per-gate duration and exit code", async () => {
|
|
79
|
+
const gates: QualityGate[] = [{ name: "Quick", command: "true", description: "passes fast" }];
|
|
80
|
+
const result = await runQualityGates(gates, tempDir);
|
|
81
|
+
expect(result?.results[0]?.exitCode).toBe(0);
|
|
82
|
+
expect(result?.results[0]?.durationMs).toBeGreaterThanOrEqual(0);
|
|
83
|
+
expect(result?.totalDurationMs).toBeGreaterThanOrEqual(result?.results[0]?.durationMs ?? 0);
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
describe("hasWorkToVerify", () => {
|
|
88
|
+
let repoDir: string;
|
|
89
|
+
|
|
90
|
+
beforeEach(async () => {
|
|
91
|
+
repoDir = await createTempGitRepo();
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
afterEach(async () => {
|
|
95
|
+
await cleanupTempDir(repoDir);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
test("returns false on a fresh repo with no commits past base and a clean tree", async () => {
|
|
99
|
+
const branch = await getDefaultBranch(repoDir);
|
|
100
|
+
const result = await hasWorkToVerify(repoDir, branch);
|
|
101
|
+
expect(result).toBe(false);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
test("returns true when worktree has uncommitted changes", async () => {
|
|
105
|
+
const branch = await getDefaultBranch(repoDir);
|
|
106
|
+
await Bun.write(join(repoDir, "dirty.txt"), "uncommitted content");
|
|
107
|
+
const result = await hasWorkToVerify(repoDir, branch);
|
|
108
|
+
expect(result).toBe(true);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
test("returns true when there are commits past base", async () => {
|
|
112
|
+
// Pin a "base-ref" branch at the initial commit, then add a new commit
|
|
113
|
+
// on the working branch so HEAD is one commit ahead of base-ref.
|
|
114
|
+
const proc = Bun.spawn(["git", "branch", "base-ref", "HEAD"], { cwd: repoDir });
|
|
115
|
+
await proc.exited;
|
|
116
|
+
await commitFile(repoDir, "new-file.txt", "second commit", "second commit");
|
|
117
|
+
|
|
118
|
+
const result = await hasWorkToVerify(repoDir, "base-ref");
|
|
119
|
+
expect(result).toBe(true);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
test("returns true when base ref cannot be resolved (fail open)", async () => {
|
|
123
|
+
const result = await hasWorkToVerify(repoDir, "definitely-not-a-real-ref");
|
|
124
|
+
expect(result).toBe(true);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
test("defaults baseRef to 'main' when not provided", async () => {
|
|
128
|
+
// On a clean repo with default branch 'main' the function should resolve
|
|
129
|
+
// 'main' successfully and report no work to verify.
|
|
130
|
+
const branch = await getDefaultBranch(repoDir);
|
|
131
|
+
// Skip this assertion if the default branch isn't 'main' (e.g., master on
|
|
132
|
+
// some CI runners) — fall back to passing the explicit branch.
|
|
133
|
+
if (branch === "main") {
|
|
134
|
+
const result = await hasWorkToVerify(repoDir);
|
|
135
|
+
expect(result).toBe(false);
|
|
136
|
+
} else {
|
|
137
|
+
const result = await hasWorkToVerify(repoDir, branch);
|
|
138
|
+
expect(result).toBe(false);
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
});
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Quality-gate runner used at session-end to determine the outcome status
|
|
3
|
+
* threaded into mulch record writes (success / partial / failure).
|
|
4
|
+
*
|
|
5
|
+
* Used by `src/commands/log.ts` session-end handler. Cheap precheck via
|
|
6
|
+
* `hasWorkToVerify()` lets read-only agents (scout/reviewer) skip gate
|
|
7
|
+
* execution entirely when no commits or uncommitted changes exist.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { QualityGate } from "../types.ts";
|
|
11
|
+
|
|
12
|
+
export interface QualityGateResult {
|
|
13
|
+
name: string;
|
|
14
|
+
command: string;
|
|
15
|
+
passed: boolean;
|
|
16
|
+
durationMs: number;
|
|
17
|
+
exitCode: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface QualityGateOutcome {
|
|
21
|
+
status: "success" | "partial" | "failure";
|
|
22
|
+
results: QualityGateResult[];
|
|
23
|
+
totalDurationMs: number;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const DEFAULT_TIMEOUT_MS = 300_000;
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Run each configured quality gate against `cwd` and aggregate the result.
|
|
30
|
+
*
|
|
31
|
+
* Returns null when `gates` is empty.
|
|
32
|
+
*
|
|
33
|
+
* - all passed -> "success"
|
|
34
|
+
* - none passed -> "failure"
|
|
35
|
+
* - mixed -> "partial"
|
|
36
|
+
*/
|
|
37
|
+
export async function runQualityGates(
|
|
38
|
+
gates: QualityGate[],
|
|
39
|
+
cwd: string,
|
|
40
|
+
options?: { timeoutMs?: number },
|
|
41
|
+
): Promise<QualityGateOutcome | null> {
|
|
42
|
+
if (gates.length === 0) return null;
|
|
43
|
+
|
|
44
|
+
const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
45
|
+
const results: QualityGateResult[] = [];
|
|
46
|
+
const totalStart = Date.now();
|
|
47
|
+
|
|
48
|
+
for (const gate of gates) {
|
|
49
|
+
const argv = gate.command.split(/\s+/).filter((s) => s.length > 0);
|
|
50
|
+
if (argv.length === 0) {
|
|
51
|
+
results.push({
|
|
52
|
+
name: gate.name,
|
|
53
|
+
command: gate.command,
|
|
54
|
+
passed: false,
|
|
55
|
+
durationMs: 0,
|
|
56
|
+
exitCode: -1,
|
|
57
|
+
});
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const start = Date.now();
|
|
62
|
+
let proc: ReturnType<typeof Bun.spawn> | undefined;
|
|
63
|
+
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
64
|
+
let timedOut = false;
|
|
65
|
+
try {
|
|
66
|
+
proc = Bun.spawn(argv, {
|
|
67
|
+
cwd,
|
|
68
|
+
stdout: "ignore",
|
|
69
|
+
stderr: "ignore",
|
|
70
|
+
});
|
|
71
|
+
timer = setTimeout(() => {
|
|
72
|
+
timedOut = true;
|
|
73
|
+
try {
|
|
74
|
+
proc?.kill();
|
|
75
|
+
} catch {
|
|
76
|
+
// best-effort kill
|
|
77
|
+
}
|
|
78
|
+
}, timeoutMs);
|
|
79
|
+
const exitCode = await proc.exited;
|
|
80
|
+
const durationMs = Date.now() - start;
|
|
81
|
+
results.push({
|
|
82
|
+
name: gate.name,
|
|
83
|
+
command: gate.command,
|
|
84
|
+
passed: !timedOut && exitCode === 0,
|
|
85
|
+
durationMs,
|
|
86
|
+
exitCode: timedOut ? -1 : exitCode,
|
|
87
|
+
});
|
|
88
|
+
} catch {
|
|
89
|
+
results.push({
|
|
90
|
+
name: gate.name,
|
|
91
|
+
command: gate.command,
|
|
92
|
+
passed: false,
|
|
93
|
+
durationMs: Date.now() - start,
|
|
94
|
+
exitCode: -1,
|
|
95
|
+
});
|
|
96
|
+
} finally {
|
|
97
|
+
if (timer) clearTimeout(timer);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const passedCount = results.filter((r) => r.passed).length;
|
|
102
|
+
let status: "success" | "partial" | "failure";
|
|
103
|
+
if (passedCount === results.length) {
|
|
104
|
+
status = "success";
|
|
105
|
+
} else if (passedCount === 0) {
|
|
106
|
+
status = "failure";
|
|
107
|
+
} else {
|
|
108
|
+
status = "partial";
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return {
|
|
112
|
+
status,
|
|
113
|
+
results,
|
|
114
|
+
totalDurationMs: Date.now() - totalStart,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Cheap precheck: returns true when the worktree has commits beyond `baseRef`
|
|
120
|
+
* or any uncommitted modifications. Used to skip gate execution for read-only
|
|
121
|
+
* agents that produced no work.
|
|
122
|
+
*
|
|
123
|
+
* Fails open: if HEAD or `baseRef` cannot be resolved, returns true so that
|
|
124
|
+
* gates still run rather than silently skipping.
|
|
125
|
+
*/
|
|
126
|
+
export async function hasWorkToVerify(cwd: string, baseRef = "main"): Promise<boolean> {
|
|
127
|
+
const head = await runGit(cwd, ["rev-parse", "--verify", "HEAD"]);
|
|
128
|
+
const base = await runGit(cwd, ["rev-parse", "--verify", baseRef]);
|
|
129
|
+
if (head.exitCode !== 0 || base.exitCode !== 0) return true;
|
|
130
|
+
|
|
131
|
+
const ahead = await runGit(cwd, ["rev-list", "--count", `${baseRef}..HEAD`]);
|
|
132
|
+
if (ahead.exitCode === 0) {
|
|
133
|
+
const count = Number.parseInt(ahead.stdout.trim(), 10);
|
|
134
|
+
if (Number.isFinite(count) && count > 0) return true;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const status = await runGit(cwd, ["status", "--porcelain"]);
|
|
138
|
+
if (status.exitCode === 0 && status.stdout.trim().length > 0) return true;
|
|
139
|
+
|
|
140
|
+
return false;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
async function runGit(cwd: string, args: string[]): Promise<{ stdout: string; exitCode: number }> {
|
|
144
|
+
try {
|
|
145
|
+
const proc = Bun.spawn(["git", ...args], {
|
|
146
|
+
cwd,
|
|
147
|
+
stdout: "pipe",
|
|
148
|
+
stderr: "ignore",
|
|
149
|
+
});
|
|
150
|
+
const stdout = await new Response(proc.stdout).text();
|
|
151
|
+
const exitCode = await proc.exited;
|
|
152
|
+
return { stdout, exitCode };
|
|
153
|
+
} catch {
|
|
154
|
+
return { stdout: "", exitCode: -1 };
|
|
155
|
+
}
|
|
156
|
+
}
|
package/src/logging/theme.ts
CHANGED
|
@@ -14,6 +14,8 @@ import { brand, color, noColor, visibleLength } from "./color.ts";
|
|
|
14
14
|
/** Maps agent states to their visual color functions. */
|
|
15
15
|
const STATE_COLORS: Record<AgentState, ColorFn> = {
|
|
16
16
|
working: color.green,
|
|
17
|
+
in_turn: color.green,
|
|
18
|
+
between_turns: color.cyan,
|
|
17
19
|
booting: color.yellow,
|
|
18
20
|
stalled: color.red,
|
|
19
21
|
zombie: color.dim,
|
|
@@ -23,6 +25,8 @@ const STATE_COLORS: Record<AgentState, ColorFn> = {
|
|
|
23
25
|
/** Maps agent states to their icon characters. */
|
|
24
26
|
const STATE_ICONS: Record<AgentState, string> = {
|
|
25
27
|
working: ">",
|
|
28
|
+
in_turn: ">",
|
|
29
|
+
between_turns: "~",
|
|
26
30
|
booting: "~",
|
|
27
31
|
stalled: "!",
|
|
28
32
|
zombie: "x",
|