pi-crew 0.7.5 → 0.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +71 -0
- package/README.md +11 -11
- package/docs/commands-reference.md +14 -10
- package/docs/troubleshooting.md +131 -0
- package/docs/usage.md +9 -4
- package/package.json +1 -1
- package/src/config/config.ts +11 -4
- package/src/extension/action-suggestions.ts +71 -0
- package/src/extension/context-status-injection.ts +32 -1
- package/src/extension/register.ts +71 -65
- package/src/extension/team-tool/api.ts +3 -2
- package/src/extension/team-tool/cancel.ts +5 -4
- package/src/extension/team-tool/explain.ts +2 -1
- package/src/extension/team-tool/failure-patterns.ts +124 -0
- package/src/extension/team-tool/inspect.ts +10 -6
- package/src/extension/team-tool/lifecycle-actions.ts +5 -4
- package/src/extension/team-tool/respond.ts +4 -3
- package/src/extension/team-tool/run-not-found.ts +54 -0
- package/src/extension/team-tool/run.ts +26 -4
- package/src/extension/team-tool/status.ts +58 -4
- package/src/extension/team-tool.ts +5 -3
- package/src/runtime/async-runner.ts +7 -0
- package/src/runtime/background-runner.ts +7 -1
- package/src/runtime/chain-parser.ts +13 -5
- package/src/runtime/checkpoint.ts +13 -1
- package/src/runtime/child-pi.ts +9 -1
- package/src/runtime/crash-recovery.ts +21 -1
- package/src/runtime/live-session-runtime.ts +15 -1
- package/src/runtime/parent-guard.ts +2 -2
- package/src/runtime/pi-spawn.ts +66 -0
- package/src/runtime/stale-reconciler.ts +38 -3
- package/src/runtime/task-runner.ts +10 -1
- package/src/runtime/team-runner.ts +19 -2
- package/src/runtime/verification-gates.ts +21 -1
- package/src/schema/team-tool-schema.ts +9 -0
- package/src/state/blob-store.ts +12 -10
- package/src/state/event-log-rotation.ts +114 -93
- package/src/state/event-log.ts +79 -20
- package/src/state/health-store.ts +6 -1
- package/src/state/locks.ts +66 -16
- package/src/state/state-store.ts +14 -1
- package/src/ui/card-colors.ts +7 -3
- package/src/ui/dashboard-panes/agents-pane.ts +15 -2
- package/src/ui/live-duration.ts +58 -0
- package/src/ui/tool-render.ts +7 -11
- package/src/ui/tool-renderers/index.ts +6 -3
- package/src/ui/widget/widget-formatters.ts +2 -13
- package/src/utils/fs-watch.ts +11 -60
- package/src/utils/run-watcher-registry.ts +164 -0
- package/src/workflows/discover-workflows.ts +2 -1
- package/src/workflows/workflow-config.ts +5 -0
- package/src/runtime/dynamic-script-runner.ts +0 -497
- package/src/runtime/sandbox.ts +0 -335
package/src/ui/card-colors.ts
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
* 30-35% — word-level emphasis (prominent)
|
|
15
15
|
*/
|
|
16
16
|
import type { CrewTheme } from "./theme-adapter.ts";
|
|
17
|
+
import { visibleWidth as visualWidth } from "../utils/visual.ts";
|
|
17
18
|
|
|
18
19
|
// ── ANSI parsing ────────────────────────────────────────────────────────
|
|
19
20
|
|
|
@@ -96,12 +97,15 @@ export function deriveCardBackground(
|
|
|
96
97
|
|
|
97
98
|
// ── Helpers for padding lines with a background ─────────────────────────
|
|
98
99
|
|
|
99
|
-
const ANSI_SGR_RE = /\x1b\[[0-9;]*m/g;
|
|
100
100
|
const RESET = "\x1b[0m";
|
|
101
101
|
|
|
102
|
-
/** Strip ANSI SGR codes
|
|
102
|
+
/** Strip ANSI SGR codes then compute the VISUAL width (Unicode-aware).
|
|
103
|
+
* Round 23 (BUG 2): previously this used `.length` (UTF-16 code units), which
|
|
104
|
+
* under-counts CJK/emoji → wrong padding → broken frame borders in crew cards.
|
|
105
|
+
* Delegate to the canonical Unicode-aware visualWidth from utils/visual.ts
|
|
106
|
+
* used by every other renderer. */
|
|
103
107
|
export function visibleWidth(text: string): number {
|
|
104
|
-
return text
|
|
108
|
+
return visualWidth(text);
|
|
105
109
|
}
|
|
106
110
|
|
|
107
111
|
/**
|
|
@@ -3,7 +3,9 @@ import { iconForStatus } from "../status-colors.ts";
|
|
|
3
3
|
import type { RunUiSnapshot } from "../snapshot-types.ts";
|
|
4
4
|
import { spinnerFrame } from "../spinner.ts";
|
|
5
5
|
import type { CrewAgentRecord } from "../../runtime/crew-agent-runtime.ts";
|
|
6
|
+
import { formatCost } from "../../state/usage.ts";
|
|
6
7
|
import { listLiveAgents, listLiveAgentsByWorkspace, type LiveAgentHandle } from "../../runtime/live-agent-manager.ts";
|
|
8
|
+
import { computeLiveDurationMs } from "../live-duration.ts";
|
|
7
9
|
|
|
8
10
|
/**
|
|
9
11
|
* Returns true if this agent did real work (LLM call, tool use, or non-trivial duration).
|
|
@@ -82,15 +84,26 @@ export function renderAgentsPane(snapshot: RunUiSnapshot | undefined, options: R
|
|
|
82
84
|
: agent.status === "failed" ? (agent.error ?? "failed")
|
|
83
85
|
: "done";
|
|
84
86
|
|
|
85
|
-
// Stats: tokens + duration
|
|
87
|
+
// Stats: tokens + cost + duration
|
|
86
88
|
const stats: string[] = [];
|
|
87
89
|
const tokenTotal = (agent.usage?.input ?? 0) + (agent.usage?.output ?? 0) + (agent.usage?.cacheRead ?? 0) + (agent.usage?.cacheWrite ?? 0);
|
|
88
90
|
if (tokenTotal > 0) {
|
|
89
91
|
const tok = tokenTotal >= 1000 ? `${(tokenTotal / 1000).toFixed(1)}k` : `${tokenTotal}`;
|
|
90
92
|
stats.push(tok);
|
|
91
93
|
}
|
|
94
|
+
// Per-agent cost (Round 17 BS-1): the data is already on task.usage.cost;
|
|
95
|
+
// surface it live so the user sees $ burn per agent during a run.
|
|
96
|
+
if (agent.usage?.cost && agent.usage.cost > 0) {
|
|
97
|
+
stats.push(formatCost(agent.usage.cost));
|
|
98
|
+
}
|
|
92
99
|
if (liveHandle) {
|
|
93
|
-
|
|
100
|
+
// Round 23 (BUG 1): the duration math here was naive —
|
|
101
|
+
// (completedAtMs ?? Date.now()) - startedAtMs
|
|
102
|
+
// which produced a giant NEGATIVE duration whenever startedAtMs was
|
|
103
|
+
// 0/undefined/bad, or a race set completedAtMs < startedAtMs. This
|
|
104
|
+
// fired for EVERY running live agent in the dashboard. Use the shared,
|
|
105
|
+
// validated computeLiveDurationMs (mirrors widget-formatters.ts).
|
|
106
|
+
const ms = computeLiveDurationMs(liveHandle.activity);
|
|
94
107
|
stats.push(`${(ms / 1000).toFixed(1)}s`);
|
|
95
108
|
if (options.showModel !== false && liveHandle.modelName && liveHandle.modelName !== "default") {
|
|
96
109
|
stats.push(liveHandle.modelName);
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Round 23 (BUG 1 fix): live-agent duration computation.
|
|
3
|
+
*
|
|
4
|
+
* The naive `(completedAtMs ?? Date.now()) - startedAtMs` produced giant
|
|
5
|
+
* NEGATIVE durations for every running live agent whenever startedAtMs was
|
|
6
|
+
* 0/undefined/out-of-range, or a race set completedAtMs < startedAtMs.
|
|
7
|
+
*
|
|
8
|
+
* This module consolidates the validated duration math (previously duplicated
|
|
9
|
+
* between widget-formatters.ts and agents-pane.ts) into one pure, fully
|
|
10
|
+
* testable function: it normalizes seconds-vs-ms, sanity-checks the start
|
|
11
|
+
* timestamp against the current time, and never returns a negative value.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
export interface LiveActivity {
|
|
15
|
+
startedAtMs?: number;
|
|
16
|
+
completedAtMs?: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/** Normalize a raw timestamp that may be seconds or milliseconds. */
|
|
20
|
+
function toMs(v: number): number {
|
|
21
|
+
if (v <= 0) return 0;
|
|
22
|
+
// 1e9 < seconds < 1e10 → seconds, scale up
|
|
23
|
+
if (v > 1_000_000_000 && v < 10_000_000_000) return v * 1000;
|
|
24
|
+
// 1e11 < ms < 1e13 → already ms
|
|
25
|
+
if (v > 100_000_000_000 && v < 10_000_000_000_000) return v;
|
|
26
|
+
return v;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Compute the live elapsed duration in milliseconds for an agent activity.
|
|
31
|
+
*
|
|
32
|
+
* - Never negative (clamped to >= 0).
|
|
33
|
+
* - Returns 0 if the start timestamp is missing or implausible.
|
|
34
|
+
* - Uses `completedAtMs` when present and sane; otherwise `nowMs` (running).
|
|
35
|
+
*
|
|
36
|
+
* @param activity the live agent activity handle
|
|
37
|
+
* @param nowMs optional override for `Date.now()` (tests / determinism)
|
|
38
|
+
*/
|
|
39
|
+
export function computeLiveDurationMs(activity: LiveActivity, nowMs: number = Date.now()): number {
|
|
40
|
+
const rawStarted = activity.startedAtMs || 0;
|
|
41
|
+
const rawCompleted = activity.completedAtMs || 0;
|
|
42
|
+
const startedMs = toMs(rawStarted);
|
|
43
|
+
const completedMs = rawCompleted > 0 ? toMs(rawCompleted) : 0;
|
|
44
|
+
// A valid start is positive, not more than 1 minute in the future, and not
|
|
45
|
+
// more than ~1000 years in the past (guards against 0 / garbage / clock skew).
|
|
46
|
+
const isValidStarted =
|
|
47
|
+
startedMs > 0 &&
|
|
48
|
+
startedMs < nowMs + 60_000 &&
|
|
49
|
+
startedMs > nowMs - 31_556_926_000_000;
|
|
50
|
+
const end = completedMs > 0 && completedMs < nowMs + 60_000 ? completedMs : nowMs;
|
|
51
|
+
const ms = end - (isValidStarted ? startedMs : nowMs);
|
|
52
|
+
return Number.isFinite(ms) && ms >= 0 ? ms : 0;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** Format a live duration in seconds, e.g. `12.3s`. Returns `0.0s` for 0. */
|
|
56
|
+
export function formatLiveDuration(activity: LiveActivity, nowMs: number = Date.now()): string {
|
|
57
|
+
return `${(computeLiveDurationMs(activity, nowMs) / 1000).toFixed(1)}s`;
|
|
58
|
+
}
|
package/src/ui/tool-render.ts
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
import { Container, Spacer, Text, visibleWidth } from "@earendil-works/pi-tui";
|
|
11
11
|
import type { CrewAgentRecord } from "../runtime/crew-agent-runtime.ts";
|
|
12
12
|
import { replaceTabs } from "./render-diff.ts";
|
|
13
|
+
import { truncateToWidth } from "../utils/visual.ts";
|
|
13
14
|
|
|
14
15
|
// ── Types ──────────────────────────────────────────────────────────────
|
|
15
16
|
export interface Theme {
|
|
@@ -68,17 +69,12 @@ function formatContextUsage(tokens: number, contextWindow: number | undefined):
|
|
|
68
69
|
|
|
69
70
|
export function truncLine(text: string, maxWidth: number): string {
|
|
70
71
|
if (text.includes("\n") || text.includes("\r")) text = text.replace(/\r?\n/g, "↵ ");
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
}
|
|
78
|
-
if (width >= maxWidth - 1) return result + "…";
|
|
79
|
-
result += text[i]; width++;
|
|
80
|
-
}
|
|
81
|
-
return result;
|
|
72
|
+
// Round 23 (BUG 4): previously this loop counted 1 visual column per UTF-16
|
|
73
|
+
// code unit and indexed text[i], so for CJK it emitted up to 2x the visual
|
|
74
|
+
// width (frame overflow) and for emoji it split surrogate pairs (U+FFFD).
|
|
75
|
+
// Delegate to the grapheme/ANSI-aware truncateToWidth (keeps ANSI codes,
|
|
76
|
+
// respects double-wide CJK + surrogate pairs, adds the '…' ellipsis).
|
|
77
|
+
return truncateToWidth(text, maxWidth);
|
|
82
78
|
}
|
|
83
79
|
|
|
84
80
|
export function formatToolPreview(name: string, args: Record<string, unknown>): string {
|
|
@@ -12,6 +12,7 @@ import type { CrewTheme } from "../theme-adapter.ts";
|
|
|
12
12
|
import { truncLine, formatTokens, formatDuration } from "../tool-render.ts";
|
|
13
13
|
import type { CrewAgentRecord } from "../../runtime/crew-agent-runtime.ts";
|
|
14
14
|
import { isBrief, briefToolResult } from "./brief-mode.ts";
|
|
15
|
+
import { truncateToWidth } from "../../utils/visual.ts";
|
|
15
16
|
|
|
16
17
|
// ── Types ──────────────────────────────────────────────────────────────
|
|
17
18
|
|
|
@@ -42,9 +43,11 @@ function padVisual(str: string, targetWidth: number): string {
|
|
|
42
43
|
/** Truncate a string (which may contain ANSI codes) to a target VISUAL width. */
|
|
43
44
|
function truncVisual(str: string, maxWidth: number): string {
|
|
44
45
|
if (visibleWidth(str) <= maxWidth) return str;
|
|
45
|
-
//
|
|
46
|
-
|
|
47
|
-
|
|
46
|
+
// Round 23 (BUG 3): previously used String.slice(0, maxWidth) which counts
|
|
47
|
+
// UTF-16 code units — for CJK that overflows the card by up to 2x, and for
|
|
48
|
+
// emoji it splits a surrogate pair (U+FFFD). Use the grapheme/ANSI-aware
|
|
49
|
+
// truncateToWidth with empty ellipsis (the caller appends its own '…').
|
|
50
|
+
return truncateToWidth(str, maxWidth, "");
|
|
48
51
|
}
|
|
49
52
|
|
|
50
53
|
// ── Visual primitives ──────────────────────────────────────────────────
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
import type { CrewAgentRecord } from "../../runtime/crew-agent-runtime.ts";
|
|
8
8
|
import type { LiveAgentHandle } from "../../runtime/live-agent-manager.ts";
|
|
9
9
|
import { getTaskUsage } from "../../runtime/usage-tracker.ts";
|
|
10
|
+
import { computeLiveDurationMs } from "../live-duration.ts";
|
|
10
11
|
|
|
11
12
|
// ── Token formatting ──────────────────────────────────────────────────
|
|
12
13
|
|
|
@@ -115,19 +116,7 @@ export function agentStats(agent: CrewAgentRecord, liveHandle?: LiveAgentHandle)
|
|
|
115
116
|
const ctxPct = stats?.contextUsage?.percent;
|
|
116
117
|
if (ctxPct != null) parts.push(`${Math.round(ctxPct)}% ctx`);
|
|
117
118
|
} catch { /* ignore */ }
|
|
118
|
-
const
|
|
119
|
-
const rawCompleted = act.completedAtMs || 0;
|
|
120
|
-
const nowMs = Date.now();
|
|
121
|
-
const toMs = (v: number): number => {
|
|
122
|
-
if (v <= 0) return 0;
|
|
123
|
-
if (v > 1000000000 && v < 10000000000) return v * 1000;
|
|
124
|
-
if (v > 100000000000 && v < 10000000000000) return v;
|
|
125
|
-
return v;
|
|
126
|
-
};
|
|
127
|
-
const startedMs = toMs(rawStarted);
|
|
128
|
-
const completedMs = rawCompleted > 0 ? toMs(rawCompleted) : 0;
|
|
129
|
-
const isValidStarted = startedMs > 0 && startedMs < nowMs + 60000 && startedMs > nowMs - 3155692600000;
|
|
130
|
-
const ms = (completedMs > 0 && completedMs < nowMs + 60000 ? completedMs : nowMs) - (isValidStarted ? startedMs : nowMs);
|
|
119
|
+
const ms = computeLiveDurationMs(act);
|
|
131
120
|
parts.push(`${(ms / 1000).toFixed(1)}s`);
|
|
132
121
|
} else {
|
|
133
122
|
if (agent.toolUses) parts.push(`${agent.toolUses} tools`);
|
package/src/utils/fs-watch.ts
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
|
-
import * as path from "node:path";
|
|
3
2
|
import type { FSWatcher, WatchListener } from "node:fs";
|
|
4
3
|
|
|
5
|
-
/**
|
|
6
|
-
|
|
4
|
+
/**
|
|
5
|
+
* Filesystem watcher helpers (slimmed down — pts/2 hang fix 2026-06-16).
|
|
6
|
+
*
|
|
7
|
+
* The recursive-watcher helpers (createRecursiveWatcher / watchCrewState /
|
|
8
|
+
* runIdFromStateRelativePath) were REMOVED: a recursive fs.watch on the run
|
|
9
|
+
* state tree exploded to O(total run history) inotify watches on Linux and
|
|
10
|
+
* caused a permanent interactive-session busy-loop. The bounded
|
|
11
|
+
* {@link RunWatcherRegistry} (one non-recursive watcher per ACTIVE run) now
|
|
12
|
+
* replaces them. Only the two primitives below survive — they are still used by
|
|
13
|
+
* manifest-cache, result-watcher, and run-watcher-registry.
|
|
14
|
+
*/
|
|
7
15
|
|
|
8
16
|
export function closeWatcher(watcher: FSWatcher | null | undefined): void {
|
|
9
17
|
if (!watcher) {
|
|
@@ -31,60 +39,3 @@ export function watchWithErrorHandler(
|
|
|
31
39
|
return null;
|
|
32
40
|
}
|
|
33
41
|
}
|
|
34
|
-
|
|
35
|
-
/**
|
|
36
|
-
* 1.3 — Watch a directory recursively and invoke `onChange` when any file
|
|
37
|
-
* inside changes. Falls back to `null` on systems where `fs.watch` rejects
|
|
38
|
-
* recursive mode (e.g., Linux when running on older kernels via FUSE/network FS).
|
|
39
|
-
*
|
|
40
|
-
* Callers MUST handle null and fall back to polling. The watcher emits the
|
|
41
|
-
* filename relative to `rootDir` (forward-slash normalised on Windows).
|
|
42
|
-
*/
|
|
43
|
-
export function createRecursiveWatcher(
|
|
44
|
-
rootDir: string,
|
|
45
|
-
onChange: (relativePath: string) => void,
|
|
46
|
-
onError: (error: unknown) => void,
|
|
47
|
-
): FSWatcher | null {
|
|
48
|
-
try {
|
|
49
|
-
if (!fs.existsSync(rootDir)) fs.mkdirSync(rootDir, { recursive: true });
|
|
50
|
-
const watcher = fs.watch(rootDir, { recursive: true }, (_eventType, filename) => {
|
|
51
|
-
if (typeof filename !== "string" || filename.length === 0) return;
|
|
52
|
-
onChange(filename.replace(/\\/g, "/"));
|
|
53
|
-
});
|
|
54
|
-
watcher.on("error", (error) => {
|
|
55
|
-
try { watcher.close(); } catch { /* ignore */ }
|
|
56
|
-
onError(error);
|
|
57
|
-
});
|
|
58
|
-
return watcher;
|
|
59
|
-
} catch (error) {
|
|
60
|
-
onError(error);
|
|
61
|
-
return null;
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
/**
|
|
66
|
-
* Given a path relative to `<crewRoot>/state`, return the runId that owns
|
|
67
|
-
* the change, or undefined if the path doesn't match any tracked run layout.
|
|
68
|
-
*/
|
|
69
|
-
export function runIdFromStateRelativePath(relativePath: string): string | undefined {
|
|
70
|
-
const parts = relativePath.split("/");
|
|
71
|
-
// Layout is `runs/{runId}/...` — see docs/architecture.md state layer.
|
|
72
|
-
if (parts.length >= 2 && parts[0] === "runs" && parts[1]) return parts[1];
|
|
73
|
-
return undefined;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
/** Convenience: combine the two helpers for `<crewRoot>/state` watching. */
|
|
77
|
-
export function watchCrewState(
|
|
78
|
-
stateDir: string,
|
|
79
|
-
onRunChange: (runId: string) => void,
|
|
80
|
-
onError: (error: unknown) => void,
|
|
81
|
-
): FSWatcher | null {
|
|
82
|
-
return createRecursiveWatcher(stateDir, (relativePath) => {
|
|
83
|
-
const runId = runIdFromStateRelativePath(relativePath);
|
|
84
|
-
if (runId) onRunChange(runId);
|
|
85
|
-
}, onError);
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
// Re-export path helper so callers don't pull node:path just for join.
|
|
89
|
-
/** @internal */
|
|
90
|
-
const joinPath = path.join;
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RunWatcherRegistry — bounded per-run filesystem watcher registry.
|
|
3
|
+
*
|
|
4
|
+
* PROBLEM (pts/2 interactive-session hang, /home/bom/pts2-hang-investigation-2026-06-16.md):
|
|
5
|
+
* `watchCrewState` used `fs.watch(<crewRoot>/state, { recursive: true })`. On
|
|
6
|
+
* Linux, Node implements "recursive" by creating ONE inotify watch PER
|
|
7
|
+
* SUBDIRECTORY. With many historical runs under `.crew/state/runs/`, this
|
|
8
|
+
* ballooned to hundreds of watches (109→339 observed) — one per run dir ever —
|
|
9
|
+
* and the resulting event volume + render amplification produced a permanent
|
|
10
|
+
* busy-loop (71% CPU, 400KB/s read) even with no active work.
|
|
11
|
+
*
|
|
12
|
+
* FIX: instead of recursively watching the whole history, watch a SINGLE
|
|
13
|
+
* non-recursive watcher on the `runs/` root (to detect new run dirs appearing)
|
|
14
|
+
* PLUS one non-recursive watcher PER ACTIVE RUN. Total inotify cost is now
|
|
15
|
+
* O(active runs) — typically 1–5 — not O(total history). Completed runs stop
|
|
16
|
+
* being watched as soon as they leave the active set (reconciled by buildFrame,
|
|
17
|
+
* which reads manifest statuses each preload tick).
|
|
18
|
+
*
|
|
19
|
+
* The registry is intentionally small and directly unit-testable (a Map of
|
|
20
|
+
* watchers with add/remove/reconcile/close semantics).
|
|
21
|
+
*/
|
|
22
|
+
import type { FSWatcher } from "node:fs";
|
|
23
|
+
import { closeWatcher, watchWithErrorHandler } from "./fs-watch.ts";
|
|
24
|
+
|
|
25
|
+
export interface ReconcileResult {
|
|
26
|
+
added: string[];
|
|
27
|
+
removed: string[];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface ActiveRun {
|
|
31
|
+
runId: string;
|
|
32
|
+
runDir: string;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export type RunChangeCallback = (runId: string) => void;
|
|
36
|
+
export type ErrorCallback = (error: unknown) => void;
|
|
37
|
+
|
|
38
|
+
export class RunWatcherRegistry {
|
|
39
|
+
private readonly runWatchers = new Map<string, FSWatcher>();
|
|
40
|
+
private rootWatcher: FSWatcher | undefined;
|
|
41
|
+
private closed = false;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Watch the `runs/` root directory (non-recursive) and invoke `onNewRun`
|
|
45
|
+
* whenever a new run subdirectory appears. This is the only way to detect a
|
|
46
|
+
* brand-new run, because `crew.run.created` is never emitted by the runtime
|
|
47
|
+
* (confirmed: only `crew.run.completed/failed/cancelled` are emitted).
|
|
48
|
+
*/
|
|
49
|
+
setRootWatcher(
|
|
50
|
+
runsDir: string,
|
|
51
|
+
onNewRun: RunChangeCallback,
|
|
52
|
+
onError?: ErrorCallback,
|
|
53
|
+
): void {
|
|
54
|
+
if (this.closed) return;
|
|
55
|
+
// Replace any prior root watcher.
|
|
56
|
+
closeWatcher(this.rootWatcher);
|
|
57
|
+
this.rootWatcher = watchWithErrorHandler(
|
|
58
|
+
runsDir,
|
|
59
|
+
(_eventType, filename) => {
|
|
60
|
+
if (typeof filename !== "string" || filename.length === 0) return;
|
|
61
|
+
// fs.watch reports directory entries as bare names (no slash on Linux).
|
|
62
|
+
// A new run dir appears as `runs/<runId>` → filename = "<runId>".
|
|
63
|
+
// Filter obviously-not-run-id noise (files, temp, etc.) defensively.
|
|
64
|
+
const candidate = filename.replace(/\\/g, "/").split("/")[0];
|
|
65
|
+
if (candidate.length === 0) return;
|
|
66
|
+
onNewRun(candidate);
|
|
67
|
+
},
|
|
68
|
+
(error) => {
|
|
69
|
+
if (onError) onError(error);
|
|
70
|
+
},
|
|
71
|
+
) ?? undefined;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Add a NON-RECURSIVE watcher on a single run directory. Costs exactly ONE
|
|
76
|
+
* inotify watch. If a watcher for this runId already exists, close + replace.
|
|
77
|
+
* Returns true if a watcher is now active for this runId.
|
|
78
|
+
*/
|
|
79
|
+
addRunWatcher(
|
|
80
|
+
runId: string,
|
|
81
|
+
runDir: string,
|
|
82
|
+
onChange: RunChangeCallback,
|
|
83
|
+
onError?: ErrorCallback,
|
|
84
|
+
): boolean {
|
|
85
|
+
if (this.closed) return false;
|
|
86
|
+
const existing = this.runWatchers.get(runId);
|
|
87
|
+
if (existing) closeWatcher(existing);
|
|
88
|
+
const watcher = watchWithErrorHandler(
|
|
89
|
+
runDir,
|
|
90
|
+
() => onChange(runId),
|
|
91
|
+
(error) => {
|
|
92
|
+
if (onError) onError(error);
|
|
93
|
+
},
|
|
94
|
+
);
|
|
95
|
+
if (watcher) {
|
|
96
|
+
this.runWatchers.set(runId, watcher);
|
|
97
|
+
return true;
|
|
98
|
+
}
|
|
99
|
+
// watchWithErrorHandler returned null (fs.watch unsupported / dir missing).
|
|
100
|
+
// Remove any stale entry so hasWatcher() stays honest.
|
|
101
|
+
this.runWatchers.delete(runId);
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/** Remove and close a specific run's watcher. No-op if not watched. */
|
|
106
|
+
removeRunWatcher(runId: string): void {
|
|
107
|
+
const watcher = this.runWatchers.get(runId);
|
|
108
|
+
if (watcher) {
|
|
109
|
+
closeWatcher(watcher);
|
|
110
|
+
this.runWatchers.delete(runId);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/** Is a run currently being watched? */
|
|
115
|
+
hasWatcher(runId: string): boolean {
|
|
116
|
+
return this.runWatchers.has(runId);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Reconcile against the current active-run set: add watchers for active runs
|
|
121
|
+
* not yet watched, remove watchers for runs that left the active set. Returns
|
|
122
|
+
* which runIds were added / removed (useful for logging + tests).
|
|
123
|
+
*/
|
|
124
|
+
reconcile(
|
|
125
|
+
activeRuns: ActiveRun[],
|
|
126
|
+
onChange: RunChangeCallback,
|
|
127
|
+
onError?: ErrorCallback,
|
|
128
|
+
): ReconcileResult {
|
|
129
|
+
if (this.closed) return { added: [], removed: [] };
|
|
130
|
+
const activeIds = new Set(activeRuns.map((r) => r.runId));
|
|
131
|
+
const added: string[] = [];
|
|
132
|
+
const removed: string[] = [];
|
|
133
|
+
// Remove watchers for runs no longer active.
|
|
134
|
+
for (const runId of [...this.runWatchers.keys()]) {
|
|
135
|
+
if (!activeIds.has(runId)) {
|
|
136
|
+
this.removeRunWatcher(runId);
|
|
137
|
+
removed.push(runId);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
// Add watchers for newly-active runs.
|
|
141
|
+
for (const { runId, runDir } of activeRuns) {
|
|
142
|
+
if (!this.runWatchers.has(runId)) {
|
|
143
|
+
if (this.addRunWatcher(runId, runDir, onChange, onError)) {
|
|
144
|
+
added.push(runId);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return { added, removed };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/** Close ALL watchers (per-run + root). Safe to call multiple times. */
|
|
152
|
+
closeAll(): void {
|
|
153
|
+
this.closed = true;
|
|
154
|
+
for (const watcher of this.runWatchers.values()) closeWatcher(watcher);
|
|
155
|
+
this.runWatchers.clear();
|
|
156
|
+
closeWatcher(this.rootWatcher);
|
|
157
|
+
this.rootWatcher = undefined;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/** Number of active PER-RUN watchers (excludes the root watcher). */
|
|
161
|
+
get size(): number {
|
|
162
|
+
return this.runWatchers.size;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
@@ -11,7 +11,7 @@ export interface WorkflowDiscoveryResult {
|
|
|
11
11
|
project: WorkflowConfig[];
|
|
12
12
|
}
|
|
13
13
|
|
|
14
|
-
const STEP_CONFIG_KEYS = new Set(["role", "dependsOn", "parallelGroup", "output", "reads", "model", "skills", "progress", "worktree", "verify", "task", "seedPaths", "preStepScript", "preStepArgs", "preStepTimeout"]);
|
|
14
|
+
const STEP_CONFIG_KEYS = new Set(["role", "dependsOn", "parallelGroup", "output", "reads", "model", "skills", "progress", "worktree", "verify", "task", "seedPaths", "preStepScript", "preStepArgs", "preStepTimeout", "preStepOptional"]);
|
|
15
15
|
|
|
16
16
|
function parseStepSection(id: string, body: string): WorkflowStep | undefined {
|
|
17
17
|
const lines = body.trim().split("\n");
|
|
@@ -54,6 +54,7 @@ function parseStepSection(id: string, body: string): WorkflowStep | undefined {
|
|
|
54
54
|
preStepScript: config.preStepScript || undefined,
|
|
55
55
|
preStepArgs: parseCsv(config.preStepArgs) || undefined,
|
|
56
56
|
preStepTimeout: parseOptionalInteger(config.preStepTimeout) ?? undefined,
|
|
57
|
+
preStepOptional: config.preStepOptional === "true" || config.preStepOptional === "1",
|
|
57
58
|
};
|
|
58
59
|
}
|
|
59
60
|
|
|
@@ -25,6 +25,11 @@ export interface WorkflowStep {
|
|
|
25
25
|
preStepArgs?: string[];
|
|
26
26
|
/** Timeout in ms for preStepScript. Default: 30000. */
|
|
27
27
|
preStepTimeout?: number;
|
|
28
|
+
/** Round 21 (E4): if true, a failing preStepScript does NOT abort the task.
|
|
29
|
+
* The failure is logged as a warning and the task proceeds without the
|
|
30
|
+
* pre-step output. Use for advisory hooks (e.g. optional test runs) whose
|
|
31
|
+
* failure shouldn't block the workflow. Default: false (fail-fast). */
|
|
32
|
+
preStepOptional?: boolean;
|
|
28
33
|
}
|
|
29
34
|
|
|
30
35
|
export interface WorkflowConfig {
|