pi-crew 0.1.45 → 0.1.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/agents/analyst.md +1 -1
- package/agents/critic.md +1 -1
- package/agents/executor.md +1 -1
- package/agents/explorer.md +1 -1
- package/agents/planner.md +1 -1
- package/agents/reviewer.md +1 -1
- package/agents/security-reviewer.md +1 -1
- package/agents/test-engineer.md +1 -1
- package/agents/verifier.md +1 -1
- package/agents/writer.md +1 -1
- package/docs/next-upgrade-roadmap.md +733 -0
- package/docs/refactor-tasks-phase3.md +394 -394
- package/docs/refactor-tasks-phase4.md +564 -564
- package/docs/refactor-tasks-phase5.md +402 -402
- package/docs/refactor-tasks-phase6.md +662 -662
- package/docs/research-awesome-agent-skills-distillation.md +100 -0
- package/docs/research-extension-examples.md +297 -297
- package/docs/research-extension-system.md +324 -324
- package/docs/research-oh-my-pi-distillation.md +322 -0
- package/docs/research-optimization-plan.md +548 -548
- package/docs/research-phase10-distillation.md +198 -198
- package/docs/research-phase11-distillation.md +201 -201
- package/docs/research-pi-coding-agent.md +357 -357
- package/docs/research-source-pi-crew-reference.md +174 -174
- package/docs/runtime-flow.md +148 -148
- package/docs/source-runtime-refactor-map.md +107 -83
- package/docs/usage.md +3 -3
- package/index.ts +6 -6
- package/install.mjs +52 -8
- package/package.json +1 -1
- package/schema.json +2 -1
- package/skills/async-worker-recovery/SKILL.md +42 -0
- package/skills/context-artifact-hygiene/SKILL.md +52 -0
- package/skills/delegation-patterns/SKILL.md +54 -0
- package/skills/mailbox-interactive/SKILL.md +40 -0
- package/skills/model-routing-context/SKILL.md +39 -0
- package/skills/multi-perspective-review/SKILL.md +58 -0
- package/skills/observability-reliability/SKILL.md +41 -0
- package/skills/ownership-session-security/SKILL.md +41 -0
- package/skills/pi-extension-lifecycle/SKILL.md +39 -0
- package/skills/requirements-to-task-packet/SKILL.md +63 -0
- package/skills/resource-discovery-config/SKILL.md +41 -0
- package/skills/runtime-state-reader/SKILL.md +44 -0
- package/skills/secure-agent-orchestration-review/SKILL.md +45 -0
- package/skills/state-mutation-locking/SKILL.md +42 -0
- package/skills/systematic-debugging/SKILL.md +67 -0
- package/skills/ui-render-performance/SKILL.md +39 -0
- package/skills/verification-before-done/SKILL.md +57 -0
- package/skills/worktree-isolation/SKILL.md +39 -0
- package/src/agents/agent-serializer.ts +34 -34
- package/src/agents/discover-agents.ts +12 -11
- package/src/config/config.ts +48 -24
- package/src/config/defaults.ts +14 -0
- package/src/extension/cross-extension-rpc.ts +82 -82
- package/src/extension/project-init.ts +62 -2
- package/src/extension/register.ts +11 -9
- package/src/extension/registration/commands.ts +32 -25
- package/src/extension/registration/compaction-guard.ts +125 -125
- package/src/extension/registration/subagent-helpers.ts +8 -0
- package/src/extension/registration/subagent-tools.ts +149 -148
- package/src/extension/registration/team-tool.ts +8 -6
- package/src/extension/run-bundle-schema.ts +89 -89
- package/src/extension/run-index.ts +13 -5
- package/src/extension/run-maintenance.ts +62 -43
- package/src/extension/team-tool/api.ts +25 -8
- package/src/extension/team-tool/cancel.ts +33 -4
- package/src/extension/team-tool/context.ts +5 -0
- package/src/extension/team-tool/handle-settings.ts +188 -188
- package/src/extension/team-tool/inspect.ts +41 -41
- package/src/extension/team-tool/lifecycle-actions.ts +91 -79
- package/src/extension/team-tool/plan.ts +19 -19
- package/src/extension/team-tool/respond.ts +37 -17
- package/src/extension/team-tool/run.ts +52 -10
- package/src/extension/team-tool/status.ts +12 -1
- package/src/extension/team-tool-types.ts +2 -0
- package/src/extension/team-tool.ts +32 -11
- package/src/i18n.ts +184 -184
- package/src/observability/event-to-metric.ts +8 -1
- package/src/observability/exporters/otlp-exporter.ts +77 -77
- package/src/prompt/prompt-runtime.ts +72 -72
- package/src/runtime/agent-control.ts +63 -63
- package/src/runtime/agent-memory.ts +72 -72
- package/src/runtime/agent-observability.ts +114 -114
- package/src/runtime/async-marker.ts +26 -26
- package/src/runtime/attention-events.ts +28 -28
- package/src/runtime/background-runner.ts +59 -53
- package/src/runtime/cancellation.ts +51 -0
- package/src/runtime/child-pi.ts +457 -444
- package/src/runtime/completion-guard.ts +190 -190
- package/src/runtime/crash-recovery.ts +1 -0
- package/src/runtime/crew-agent-records.ts +38 -6
- package/src/runtime/deadletter.ts +1 -0
- package/src/runtime/delivery-coordinator.ts +46 -25
- package/src/runtime/direct-run.ts +35 -35
- package/src/runtime/effectiveness.ts +76 -0
- package/src/runtime/foreground-control.ts +82 -82
- package/src/runtime/green-contract.ts +46 -46
- package/src/runtime/group-join.ts +106 -106
- package/src/runtime/heartbeat-gradient.ts +28 -28
- package/src/runtime/heartbeat-watcher.ts +124 -124
- package/src/runtime/live-agent-control.ts +88 -87
- package/src/runtime/live-agent-manager.ts +103 -85
- package/src/runtime/live-control-realtime.ts +36 -36
- package/src/runtime/live-session-runtime.ts +309 -305
- package/src/runtime/manifest-cache.ts +17 -2
- package/src/runtime/model-fallback.ts +6 -4
- package/src/runtime/parallel-research.ts +44 -44
- package/src/runtime/pi-args.ts +18 -3
- package/src/runtime/pi-json-output.ts +111 -111
- package/src/runtime/policy-engine.ts +79 -79
- package/src/runtime/process-status.ts +5 -1
- package/src/runtime/progress-event-coalescer.ts +43 -43
- package/src/runtime/recovery-recipes.ts +74 -74
- package/src/runtime/retry-executor.ts +81 -64
- package/src/runtime/role-permission.ts +39 -39
- package/src/runtime/runtime-resolver.ts +22 -6
- package/src/runtime/session-resources.ts +25 -25
- package/src/runtime/session-snapshot.ts +59 -59
- package/src/runtime/session-usage.ts +79 -79
- package/src/runtime/sidechain-output.ts +29 -29
- package/src/runtime/skill-instructions.ts +222 -0
- package/src/runtime/stale-reconciler.ts +4 -14
- package/src/runtime/subagent-manager.ts +3 -0
- package/src/runtime/supervisor-contact.ts +59 -59
- package/src/runtime/task-display.ts +38 -38
- package/src/runtime/task-output-context.ts +127 -127
- package/src/runtime/task-runner/capabilities.ts +78 -0
- package/src/runtime/task-runner/live-executor.ts +105 -101
- package/src/runtime/task-runner/progress.ts +119 -119
- package/src/runtime/task-runner/prompt-builder.ts +3 -1
- package/src/runtime/task-runner/prompt-pipeline.ts +64 -0
- package/src/runtime/task-runner/result-utils.ts +14 -14
- package/src/runtime/task-runner/state-helpers.ts +22 -22
- package/src/runtime/task-runner.ts +44 -5
- package/src/runtime/team-runner.ts +78 -15
- package/src/runtime/worker-heartbeat.ts +21 -21
- package/src/runtime/worker-startup.ts +57 -57
- package/src/schema/config-schema.ts +1 -0
- package/src/schema/team-tool-schema.ts +3 -3
- package/src/state/active-run-registry.ts +165 -0
- package/src/state/contracts.ts +1 -1
- package/src/state/mailbox.ts +44 -4
- package/src/state/state-store.ts +8 -1
- package/src/state/task-claims.ts +44 -44
- package/src/state/types.ts +44 -2
- package/src/state/usage.ts +29 -29
- package/src/subagents/async-entry.ts +1 -1
- package/src/subagents/index.ts +3 -3
- package/src/subagents/live/control.ts +1 -1
- package/src/subagents/live/manager.ts +1 -1
- package/src/subagents/live/realtime.ts +1 -1
- package/src/subagents/live/session-runtime.ts +1 -1
- package/src/subagents/manager.ts +1 -1
- package/src/subagents/spawn.ts +1 -1
- package/src/teams/team-config.ts +1 -0
- package/src/teams/team-serializer.ts +38 -38
- package/src/types/diff.d.ts +18 -18
- package/src/ui/crew-footer.ts +101 -101
- package/src/ui/crew-select-list.ts +111 -111
- package/src/ui/crew-widget.ts +4 -3
- package/src/ui/dashboard-panes/metrics-pane.ts +34 -34
- package/src/ui/dashboard-panes/progress-pane.ts +2 -0
- package/src/ui/dynamic-border.ts +25 -25
- package/src/ui/layout-primitives.ts +106 -106
- package/src/ui/loaders.ts +158 -158
- package/src/ui/render-diff.ts +119 -119
- package/src/ui/render-scheduler.ts +143 -143
- package/src/ui/run-snapshot-cache.ts +10 -2
- package/src/ui/snapshot-types.ts +2 -0
- package/src/ui/spinner.ts +17 -17
- package/src/ui/status-colors.ts +58 -58
- package/src/ui/syntax-highlight.ts +116 -116
- package/src/utils/atomic-write.ts +33 -33
- package/src/utils/completion-dedupe.ts +63 -63
- package/src/utils/frontmatter.ts +68 -68
- package/src/utils/git.ts +262 -262
- package/src/utils/ids.ts +12 -12
- package/src/utils/names.ts +27 -27
- package/src/utils/paths.ts +4 -2
- package/src/utils/redaction.ts +44 -44
- package/src/utils/safe-paths.ts +47 -47
- package/src/utils/sleep.ts +32 -32
- package/src/workflows/validate-workflow.ts +40 -40
- package/src/workflows/workflow-config.ts +1 -0
- package/src/worktree/branch-freshness.ts +45 -45
- package/teams/default.team.md +12 -12
- package/teams/fast-fix.team.md +11 -11
- package/teams/implementation.team.md +18 -18
- package/teams/parallel-research.team.md +14 -14
- package/teams/research.team.md +11 -11
- package/teams/review.team.md +12 -12
- package/workflows/default.workflow.md +29 -29
- package/workflows/fast-fix.workflow.md +22 -22
- package/workflows/implementation.workflow.md +38 -38
- package/workflows/parallel-research.workflow.md +46 -46
- package/workflows/research.workflow.md +22 -22
- package/workflows/review.workflow.md +30 -30
|
@@ -1,124 +1,124 @@
|
|
|
1
|
-
import type { NotificationDescriptor } from "../extension/notification-router.ts";
|
|
2
|
-
import type { MetricRegistry } from "../observability/metric-registry.ts";
|
|
3
|
-
import { appendEvent } from "../state/event-log.ts";
|
|
4
|
-
import { loadRunManifestById } from "../state/state-store.ts";
|
|
5
|
-
import type { TeamRunManifest } from "../state/types.ts";
|
|
6
|
-
import { logInternalError } from "../utils/internal-error.ts";
|
|
7
|
-
import type { ManifestCache } from "./manifest-cache.ts";
|
|
8
|
-
import { classifyHeartbeat, DEFAULT_GRADIENT_THRESHOLDS, heartbeatAgeMs, type GradientThresholds, type HeartbeatLevel } from "./heartbeat-gradient.ts";
|
|
9
|
-
|
|
10
|
-
export interface HeartbeatWatcherRouter {
|
|
11
|
-
enqueue(notification: NotificationDescriptor): boolean;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export interface HeartbeatWatcherOptions {
|
|
15
|
-
cwd: string;
|
|
16
|
-
pollIntervalMs?: number;
|
|
17
|
-
thresholds?: GradientThresholds;
|
|
18
|
-
manifestCache: ManifestCache;
|
|
19
|
-
registry: MetricRegistry;
|
|
20
|
-
router: HeartbeatWatcherRouter;
|
|
21
|
-
deadletterTickThreshold?: number;
|
|
22
|
-
onDead?: (runId: string, taskId: string, elapsed: number) => void;
|
|
23
|
-
onDeadletterTrigger?: (manifest: TeamRunManifest, taskId: string) => void;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Polls running runs for heartbeat staleness.
|
|
28
|
-
*
|
|
29
|
-
* Uses recursive setTimeout to avoid timer storms.
|
|
30
|
-
* Cleanup is done in the same pass — no second scan over manifests.
|
|
31
|
-
* Keys for runs that disappear from the cache are cleaned via staleness-age policy
|
|
32
|
-
* rather than being leaked forever.
|
|
33
|
-
*/
|
|
34
|
-
export class HeartbeatWatcher {
|
|
35
|
-
private timer?: ReturnType<typeof setTimeout>;
|
|
36
|
-
private lastLevel = new Map<string, HeartbeatLevel>();
|
|
37
|
-
private consecutiveDead = new Map<string, number>();
|
|
38
|
-
private lastSeen = new Map<string, number>(); // key → last time it was active
|
|
39
|
-
/** Max age (ms) to retain a stale key before garbage-collecting it. */
|
|
40
|
-
private readonly maxKeyAgeMs = 600_000; // 10 minutes
|
|
41
|
-
private readonly opts: HeartbeatWatcherOptions;
|
|
42
|
-
|
|
43
|
-
constructor(opts: HeartbeatWatcherOptions) {
|
|
44
|
-
this.opts = opts;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
start(): void {
|
|
48
|
-
this.dispose();
|
|
49
|
-
this.scheduleTick();
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
private scheduleTick(): void {
|
|
53
|
-
this.timer = setTimeout(() => this.tick(), this.opts.pollIntervalMs ?? 5000);
|
|
54
|
-
this.timer.unref();
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
tick(now = Date.now()): void {
|
|
58
|
-
try {
|
|
59
|
-
this.tickUnsafe(now);
|
|
60
|
-
} catch (error) {
|
|
61
|
-
logInternalError("heartbeat-watcher.tick", error);
|
|
62
|
-
} finally {
|
|
63
|
-
this.scheduleTick();
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
private tickUnsafe(now: number): void {
|
|
68
|
-
const thresholds = this.opts.thresholds ?? DEFAULT_GRADIENT_THRESHOLDS;
|
|
69
|
-
const tickThreshold = this.opts.deadletterTickThreshold ?? 3;
|
|
70
|
-
const activeKeys = new Set<string>();
|
|
71
|
-
|
|
72
|
-
for (const run of this.opts.manifestCache.list(50)) {
|
|
73
|
-
if (run.status !== "running") continue;
|
|
74
|
-
const loaded = loadRunManifestById(this.opts.cwd, run.runId);
|
|
75
|
-
if (!loaded) continue;
|
|
76
|
-
for (const task of loaded.tasks) {
|
|
77
|
-
if (task.status !== "running") continue;
|
|
78
|
-
const key = `${run.runId}:${task.id}`;
|
|
79
|
-
activeKeys.add(key);
|
|
80
|
-
this.lastSeen.set(key, now);
|
|
81
|
-
|
|
82
|
-
const elapsed = heartbeatAgeMs(task.heartbeat, now);
|
|
83
|
-
const level = classifyHeartbeat(task.heartbeat, thresholds, now);
|
|
84
|
-
this.opts.registry.gauge("crew.heartbeat.staleness_ms", "Heartbeat elapsed since last seen, milliseconds").set({ runId: run.runId, taskId: task.id }, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
|
|
85
|
-
this.opts.registry.counter("crew.heartbeat.level_total", "Heartbeat classifications by level").inc({ runId: run.runId, level });
|
|
86
|
-
const previous = this.lastLevel.get(key);
|
|
87
|
-
this.lastLevel.set(key, level);
|
|
88
|
-
if (level === "dead" && previous !== "dead") {
|
|
89
|
-
this.opts.registry.counter("crew.heartbeat.dead_total", "Dead heartbeat detections").inc({ runId: run.runId });
|
|
90
|
-
appendEvent(loaded.manifest.eventsPath, { type: "crew.task.heartbeat_dead", runId: run.runId, taskId: task.id, message: `Task ${task.id} heartbeat dead.`, data: { elapsedMs: Number.isFinite(elapsed) ? elapsed : undefined } });
|
|
91
|
-
this.opts.router.enqueue({ id: `dead_${run.runId}_${task.id}`, severity: "warning", source: "heartbeat-watcher", runId: run.runId, title: `Task ${task.id} heartbeat dead`, body: "Background watcher detected a stuck worker." });
|
|
92
|
-
this.opts.onDead?.(run.runId, task.id, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
|
|
93
|
-
}
|
|
94
|
-
if (level === "dead") {
|
|
95
|
-
const count = (this.consecutiveDead.get(key) ?? 0) + 1;
|
|
96
|
-
this.consecutiveDead.set(key, count);
|
|
97
|
-
if (count === tickThreshold) this.opts.onDeadletterTrigger?.(loaded.manifest, task.id);
|
|
98
|
-
} else {
|
|
99
|
-
this.consecutiveDead.delete(key);
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
// Cleanup: drop keys that were NOT in this tick's active set AND
|
|
105
|
-
// haven't been seen for > maxKeyAgeMs. This covers runs that
|
|
106
|
-
// completed or fell out of the manifest cache's top-50 window.
|
|
107
|
-
const cutoff = now - this.maxKeyAgeMs;
|
|
108
|
-
for (const [key, ts] of this.lastSeen) {
|
|
109
|
-
if (!activeKeys.has(key) && ts < cutoff) {
|
|
110
|
-
this.lastLevel.delete(key);
|
|
111
|
-
this.consecutiveDead.delete(key);
|
|
112
|
-
this.lastSeen.delete(key);
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
dispose(): void {
|
|
118
|
-
if (this.timer) clearTimeout(this.timer);
|
|
119
|
-
this.timer = undefined;
|
|
120
|
-
this.lastLevel.clear();
|
|
121
|
-
this.consecutiveDead.clear();
|
|
122
|
-
this.lastSeen.clear();
|
|
123
|
-
}
|
|
124
|
-
}
|
|
1
|
+
import type { NotificationDescriptor } from "../extension/notification-router.ts";
|
|
2
|
+
import type { MetricRegistry } from "../observability/metric-registry.ts";
|
|
3
|
+
import { appendEvent } from "../state/event-log.ts";
|
|
4
|
+
import { loadRunManifestById } from "../state/state-store.ts";
|
|
5
|
+
import type { TeamRunManifest } from "../state/types.ts";
|
|
6
|
+
import { logInternalError } from "../utils/internal-error.ts";
|
|
7
|
+
import type { ManifestCache } from "./manifest-cache.ts";
|
|
8
|
+
import { classifyHeartbeat, DEFAULT_GRADIENT_THRESHOLDS, heartbeatAgeMs, type GradientThresholds, type HeartbeatLevel } from "./heartbeat-gradient.ts";
|
|
9
|
+
|
|
10
|
+
export interface HeartbeatWatcherRouter {
|
|
11
|
+
enqueue(notification: NotificationDescriptor): boolean;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface HeartbeatWatcherOptions {
|
|
15
|
+
cwd: string;
|
|
16
|
+
pollIntervalMs?: number;
|
|
17
|
+
thresholds?: GradientThresholds;
|
|
18
|
+
manifestCache: ManifestCache;
|
|
19
|
+
registry: MetricRegistry;
|
|
20
|
+
router: HeartbeatWatcherRouter;
|
|
21
|
+
deadletterTickThreshold?: number;
|
|
22
|
+
onDead?: (runId: string, taskId: string, elapsed: number) => void;
|
|
23
|
+
onDeadletterTrigger?: (manifest: TeamRunManifest, taskId: string) => void;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Polls running runs for heartbeat staleness.
|
|
28
|
+
*
|
|
29
|
+
* Uses recursive setTimeout to avoid timer storms.
|
|
30
|
+
* Cleanup is done in the same pass — no second scan over manifests.
|
|
31
|
+
* Keys for runs that disappear from the cache are cleaned via staleness-age policy
|
|
32
|
+
* rather than being leaked forever.
|
|
33
|
+
*/
|
|
34
|
+
export class HeartbeatWatcher {
|
|
35
|
+
private timer?: ReturnType<typeof setTimeout>;
|
|
36
|
+
private lastLevel = new Map<string, HeartbeatLevel>();
|
|
37
|
+
private consecutiveDead = new Map<string, number>();
|
|
38
|
+
private lastSeen = new Map<string, number>(); // key → last time it was active
|
|
39
|
+
/** Max age (ms) to retain a stale key before garbage-collecting it. */
|
|
40
|
+
private readonly maxKeyAgeMs = 600_000; // 10 minutes
|
|
41
|
+
private readonly opts: HeartbeatWatcherOptions;
|
|
42
|
+
|
|
43
|
+
constructor(opts: HeartbeatWatcherOptions) {
|
|
44
|
+
this.opts = opts;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
start(): void {
|
|
48
|
+
this.dispose();
|
|
49
|
+
this.scheduleTick();
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
private scheduleTick(): void {
|
|
53
|
+
this.timer = setTimeout(() => this.tick(), this.opts.pollIntervalMs ?? 5000);
|
|
54
|
+
this.timer.unref();
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
tick(now = Date.now()): void {
|
|
58
|
+
try {
|
|
59
|
+
this.tickUnsafe(now);
|
|
60
|
+
} catch (error) {
|
|
61
|
+
logInternalError("heartbeat-watcher.tick", error);
|
|
62
|
+
} finally {
|
|
63
|
+
this.scheduleTick();
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
private tickUnsafe(now: number): void {
|
|
68
|
+
const thresholds = this.opts.thresholds ?? DEFAULT_GRADIENT_THRESHOLDS;
|
|
69
|
+
const tickThreshold = this.opts.deadletterTickThreshold ?? 3;
|
|
70
|
+
const activeKeys = new Set<string>();
|
|
71
|
+
|
|
72
|
+
for (const run of this.opts.manifestCache.list(50)) {
|
|
73
|
+
if (run.status !== "running") continue;
|
|
74
|
+
const loaded = loadRunManifestById(this.opts.cwd, run.runId);
|
|
75
|
+
if (!loaded) continue;
|
|
76
|
+
for (const task of loaded.tasks) {
|
|
77
|
+
if (task.status !== "running") continue;
|
|
78
|
+
const key = `${run.runId}:${task.id}`;
|
|
79
|
+
activeKeys.add(key);
|
|
80
|
+
this.lastSeen.set(key, now);
|
|
81
|
+
|
|
82
|
+
const elapsed = heartbeatAgeMs(task.heartbeat, now);
|
|
83
|
+
const level = classifyHeartbeat(task.heartbeat, thresholds, now);
|
|
84
|
+
this.opts.registry.gauge("crew.heartbeat.staleness_ms", "Heartbeat elapsed since last seen, milliseconds").set({ runId: run.runId, taskId: task.id }, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
|
|
85
|
+
this.opts.registry.counter("crew.heartbeat.level_total", "Heartbeat classifications by level").inc({ runId: run.runId, level });
|
|
86
|
+
const previous = this.lastLevel.get(key);
|
|
87
|
+
this.lastLevel.set(key, level);
|
|
88
|
+
if (level === "dead" && previous !== "dead") {
|
|
89
|
+
this.opts.registry.counter("crew.heartbeat.dead_total", "Dead heartbeat detections").inc({ runId: run.runId });
|
|
90
|
+
appendEvent(loaded.manifest.eventsPath, { type: "crew.task.heartbeat_dead", runId: run.runId, taskId: task.id, message: `Task ${task.id} heartbeat dead.`, data: { elapsedMs: Number.isFinite(elapsed) ? elapsed : undefined } });
|
|
91
|
+
this.opts.router.enqueue({ id: `dead_${run.runId}_${task.id}`, severity: "warning", source: "heartbeat-watcher", runId: run.runId, title: `Task ${task.id} heartbeat dead`, body: "Background watcher detected a stuck worker." });
|
|
92
|
+
this.opts.onDead?.(run.runId, task.id, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
|
|
93
|
+
}
|
|
94
|
+
if (level === "dead") {
|
|
95
|
+
const count = (this.consecutiveDead.get(key) ?? 0) + 1;
|
|
96
|
+
this.consecutiveDead.set(key, count);
|
|
97
|
+
if (count === tickThreshold) this.opts.onDeadletterTrigger?.(loaded.manifest, task.id);
|
|
98
|
+
} else {
|
|
99
|
+
this.consecutiveDead.delete(key);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Cleanup: drop keys that were NOT in this tick's active set AND
|
|
105
|
+
// haven't been seen for > maxKeyAgeMs. This covers runs that
|
|
106
|
+
// completed or fell out of the manifest cache's top-50 window.
|
|
107
|
+
const cutoff = now - this.maxKeyAgeMs;
|
|
108
|
+
for (const [key, ts] of this.lastSeen) {
|
|
109
|
+
if (!activeKeys.has(key) && ts < cutoff) {
|
|
110
|
+
this.lastLevel.delete(key);
|
|
111
|
+
this.consecutiveDead.delete(key);
|
|
112
|
+
this.lastSeen.delete(key);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
dispose(): void {
|
|
118
|
+
if (this.timer) clearTimeout(this.timer);
|
|
119
|
+
this.timer = undefined;
|
|
120
|
+
this.lastLevel.clear();
|
|
121
|
+
this.consecutiveDead.clear();
|
|
122
|
+
this.lastSeen.clear();
|
|
123
|
+
}
|
|
124
|
+
}
|
|
@@ -1,87 +1,88 @@
|
|
|
1
|
-
import * as fs from "node:fs";
|
|
2
|
-
import * as path from "node:path";
|
|
3
|
-
import type { TeamRunManifest } from "../state/types.ts";
|
|
4
|
-
import { agentStateFile, ensureAgentStateDir } from "./crew-agent-records.ts";
|
|
5
|
-
|
|
6
|
-
export type LiveAgentControlOperation = "steer" | "stop" | "resume";
|
|
7
|
-
|
|
8
|
-
export interface LiveAgentControlRequest {
|
|
9
|
-
id: string;
|
|
10
|
-
runId: string;
|
|
11
|
-
taskId: string;
|
|
12
|
-
agentId?: string;
|
|
13
|
-
operation: LiveAgentControlOperation;
|
|
14
|
-
message?: string;
|
|
15
|
-
createdAt: string;
|
|
16
|
-
processedAt?: string;
|
|
17
|
-
error?: string;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export interface LiveAgentControlCursor {
|
|
21
|
-
offset: number;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export function liveAgentControlPath(manifest: TeamRunManifest, taskId: string): string {
|
|
25
|
-
return path.join(ensureAgentStateDir(manifest, taskId), "live-control.jsonl");
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
function liveAgentControlFile(manifest: TeamRunManifest, taskId: string): string {
|
|
29
|
-
return agentStateFile(manifest, taskId, "live-control.jsonl");
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function requestId(): string {
|
|
33
|
-
return `ctrl_${Date.now().toString(36)}_${Math.random().toString(16).slice(2, 10)}`;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
export function appendLiveAgentControlRequest(manifest: TeamRunManifest, input: { taskId: string; agentId?: string; operation: LiveAgentControlOperation; message?: string }): LiveAgentControlRequest {
|
|
37
|
-
const request: LiveAgentControlRequest = {
|
|
38
|
-
id: requestId(),
|
|
39
|
-
runId: manifest.runId,
|
|
40
|
-
taskId: input.taskId,
|
|
41
|
-
agentId: input.agentId,
|
|
42
|
-
operation: input.operation,
|
|
43
|
-
message: input.message,
|
|
44
|
-
createdAt: new Date().toISOString(),
|
|
45
|
-
};
|
|
46
|
-
const filePath = liveAgentControlFile(manifest, input.taskId);
|
|
47
|
-
fs.appendFileSync(filePath, `${JSON.stringify(request)}\n`, "utf-8");
|
|
48
|
-
return request;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
export function readLiveAgentControlRequests(manifest: TeamRunManifest, taskId: string, cursor: LiveAgentControlCursor = { offset: 0 }): { requests: LiveAgentControlRequest[]; cursor: LiveAgentControlCursor } {
|
|
52
|
-
let filePath: string;
|
|
53
|
-
try {
|
|
54
|
-
filePath = liveAgentControlFile(manifest, taskId);
|
|
55
|
-
} catch {
|
|
56
|
-
return { requests: [], cursor };
|
|
57
|
-
}
|
|
58
|
-
if (!fs.existsSync(filePath)) return { requests: [], cursor };
|
|
59
|
-
const text = fs.readFileSync(filePath, "utf-8");
|
|
60
|
-
const lines = text.split(/\r?\n/).filter(Boolean);
|
|
61
|
-
const requests = lines.slice(cursor.offset).flatMap((line) => {
|
|
62
|
-
try {
|
|
63
|
-
const parsed = JSON.parse(line) as LiveAgentControlRequest;
|
|
64
|
-
return parsed && parsed.runId === manifest.runId && parsed.taskId === taskId ? [parsed] : [];
|
|
65
|
-
} catch {
|
|
66
|
-
return [];
|
|
67
|
-
}
|
|
68
|
-
});
|
|
69
|
-
return { requests, cursor: { offset: lines.length } };
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
export async function applyLiveAgentControlRequest(input: { request: LiveAgentControlRequest; taskId: string; agentId: string; session: { steer?: (text: string) => Promise<void>; prompt?: (text: string, options?: Record<string, unknown>) => Promise<void>; abort?: () => Promise<void> | void }; seenRequestIds?: Set<string> }): Promise<boolean> {
|
|
73
|
-
const { request, taskId, agentId, session, seenRequestIds } = input;
|
|
74
|
-
if (seenRequestIds?.has(request.id)) return false;
|
|
75
|
-
if (request.agentId && request.agentId !== agentId && request.agentId !== taskId) return false;
|
|
76
|
-
seenRequestIds?.add(request.id);
|
|
77
|
-
if (request.operation === "steer") await session.steer?.(request.message ?? "Please report current status and wrap up if possible.");
|
|
78
|
-
else if (request.operation === "
|
|
79
|
-
else if (request.operation === "
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
1
|
+
import * as fs from "node:fs";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import type { TeamRunManifest } from "../state/types.ts";
|
|
4
|
+
import { agentStateFile, ensureAgentStateDir } from "./crew-agent-records.ts";
|
|
5
|
+
|
|
6
|
+
export type LiveAgentControlOperation = "steer" | "follow-up" | "stop" | "resume";
|
|
7
|
+
|
|
8
|
+
export interface LiveAgentControlRequest {
|
|
9
|
+
id: string;
|
|
10
|
+
runId: string;
|
|
11
|
+
taskId: string;
|
|
12
|
+
agentId?: string;
|
|
13
|
+
operation: LiveAgentControlOperation;
|
|
14
|
+
message?: string;
|
|
15
|
+
createdAt: string;
|
|
16
|
+
processedAt?: string;
|
|
17
|
+
error?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface LiveAgentControlCursor {
|
|
21
|
+
offset: number;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function liveAgentControlPath(manifest: TeamRunManifest, taskId: string): string {
|
|
25
|
+
return path.join(ensureAgentStateDir(manifest, taskId), "live-control.jsonl");
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function liveAgentControlFile(manifest: TeamRunManifest, taskId: string): string {
|
|
29
|
+
return agentStateFile(manifest, taskId, "live-control.jsonl");
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function requestId(): string {
|
|
33
|
+
return `ctrl_${Date.now().toString(36)}_${Math.random().toString(16).slice(2, 10)}`;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function appendLiveAgentControlRequest(manifest: TeamRunManifest, input: { taskId: string; agentId?: string; operation: LiveAgentControlOperation; message?: string }): LiveAgentControlRequest {
|
|
37
|
+
const request: LiveAgentControlRequest = {
|
|
38
|
+
id: requestId(),
|
|
39
|
+
runId: manifest.runId,
|
|
40
|
+
taskId: input.taskId,
|
|
41
|
+
agentId: input.agentId,
|
|
42
|
+
operation: input.operation,
|
|
43
|
+
message: input.message,
|
|
44
|
+
createdAt: new Date().toISOString(),
|
|
45
|
+
};
|
|
46
|
+
const filePath = liveAgentControlFile(manifest, input.taskId);
|
|
47
|
+
fs.appendFileSync(filePath, `${JSON.stringify(request)}\n`, "utf-8");
|
|
48
|
+
return request;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function readLiveAgentControlRequests(manifest: TeamRunManifest, taskId: string, cursor: LiveAgentControlCursor = { offset: 0 }): { requests: LiveAgentControlRequest[]; cursor: LiveAgentControlCursor } {
|
|
52
|
+
let filePath: string;
|
|
53
|
+
try {
|
|
54
|
+
filePath = liveAgentControlFile(manifest, taskId);
|
|
55
|
+
} catch {
|
|
56
|
+
return { requests: [], cursor };
|
|
57
|
+
}
|
|
58
|
+
if (!fs.existsSync(filePath)) return { requests: [], cursor };
|
|
59
|
+
const text = fs.readFileSync(filePath, "utf-8");
|
|
60
|
+
const lines = text.split(/\r?\n/).filter(Boolean);
|
|
61
|
+
const requests = lines.slice(cursor.offset).flatMap((line) => {
|
|
62
|
+
try {
|
|
63
|
+
const parsed = JSON.parse(line) as LiveAgentControlRequest;
|
|
64
|
+
return parsed && parsed.runId === manifest.runId && parsed.taskId === taskId ? [parsed] : [];
|
|
65
|
+
} catch {
|
|
66
|
+
return [];
|
|
67
|
+
}
|
|
68
|
+
});
|
|
69
|
+
return { requests, cursor: { offset: lines.length } };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export async function applyLiveAgentControlRequest(input: { request: LiveAgentControlRequest; taskId: string; agentId: string; session: { steer?: (text: string) => Promise<void>; prompt?: (text: string, options?: Record<string, unknown>) => Promise<void>; abort?: () => Promise<void> | void }; seenRequestIds?: Set<string> }): Promise<boolean> {
|
|
73
|
+
const { request, taskId, agentId, session, seenRequestIds } = input;
|
|
74
|
+
if (seenRequestIds?.has(request.id)) return false;
|
|
75
|
+
if (request.agentId && request.agentId !== agentId && request.agentId !== taskId) return false;
|
|
76
|
+
seenRequestIds?.add(request.id);
|
|
77
|
+
if (request.operation === "steer") await session.steer?.(request.message ?? "Please report current status and wrap up if possible.");
|
|
78
|
+
else if (request.operation === "follow-up") await session.prompt?.(request.message ?? "Please continue with the follow-up request.", { source: "api", expandPromptTemplates: false });
|
|
79
|
+
else if (request.operation === "resume") await session.prompt?.(request.message ?? "Please resume and report final status.", { source: "api", expandPromptTemplates: false });
|
|
80
|
+
else if (request.operation === "stop") await session.abort?.();
|
|
81
|
+
return true;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export async function applyLiveAgentControlRequests(input: { manifest: TeamRunManifest; taskId: string; agentId: string; session: { steer?: (text: string) => Promise<void>; prompt?: (text: string, options?: Record<string, unknown>) => Promise<void>; abort?: () => Promise<void> | void }; cursor: LiveAgentControlCursor; seenRequestIds?: Set<string> }): Promise<LiveAgentControlCursor> {
|
|
85
|
+
const batch = readLiveAgentControlRequests(input.manifest, input.taskId, input.cursor);
|
|
86
|
+
for (const request of batch.requests) await applyLiveAgentControlRequest({ request, taskId: input.taskId, agentId: input.agentId, session: input.session, seenRequestIds: input.seenRequestIds });
|
|
87
|
+
return batch.cursor;
|
|
88
|
+
}
|
|
@@ -1,85 +1,103 @@
|
|
|
1
|
-
import type { CrewAgentRecord } from "./crew-agent-runtime.ts";
|
|
2
|
-
|
|
3
|
-
type LiveSessionHandle = {
|
|
4
|
-
steer?: (text: string) => Promise<void>;
|
|
5
|
-
prompt?: (text: string, options?: Record<string, unknown>) => Promise<void>;
|
|
6
|
-
abort?: () => Promise<void> | void;
|
|
7
|
-
};
|
|
8
|
-
|
|
9
|
-
export interface LiveAgentHandle {
|
|
10
|
-
agentId: string;
|
|
11
|
-
taskId: string;
|
|
12
|
-
runId: string;
|
|
13
|
-
session: LiveSessionHandle;
|
|
14
|
-
createdAt: string;
|
|
15
|
-
updatedAt: string;
|
|
16
|
-
status: CrewAgentRecord["status"];
|
|
17
|
-
pendingSteers: string[];
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
const
|
|
25
|
-
const
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
handle.pendingSteers
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
handle
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
handle
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
handle.
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
1
|
+
import type { CrewAgentRecord } from "./crew-agent-runtime.ts";
|
|
2
|
+
|
|
3
|
+
type LiveSessionHandle = {
|
|
4
|
+
steer?: (text: string) => Promise<void>;
|
|
5
|
+
prompt?: (text: string, options?: Record<string, unknown>) => Promise<void>;
|
|
6
|
+
abort?: () => Promise<void> | void;
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
export interface LiveAgentHandle {
|
|
10
|
+
agentId: string;
|
|
11
|
+
taskId: string;
|
|
12
|
+
runId: string;
|
|
13
|
+
session: LiveSessionHandle;
|
|
14
|
+
createdAt: string;
|
|
15
|
+
updatedAt: string;
|
|
16
|
+
status: CrewAgentRecord["status"];
|
|
17
|
+
pendingSteers: string[];
|
|
18
|
+
pendingFollowUps: string[];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const liveAgents = new Map<string, LiveAgentHandle>();
|
|
22
|
+
|
|
23
|
+
export function registerLiveAgent(input: Omit<LiveAgentHandle, "createdAt" | "updatedAt" | "pendingSteers" | "pendingFollowUps">): LiveAgentHandle {
|
|
24
|
+
const now = new Date().toISOString();
|
|
25
|
+
const existing = liveAgents.get(input.agentId);
|
|
26
|
+
const handle: LiveAgentHandle = { ...input, createdAt: existing?.createdAt ?? now, updatedAt: now, pendingSteers: existing?.pendingSteers ?? [], pendingFollowUps: existing?.pendingFollowUps ?? [] };
|
|
27
|
+
liveAgents.set(input.agentId, handle);
|
|
28
|
+
if (handle.pendingSteers.length && typeof handle.session.steer === "function") {
|
|
29
|
+
const pending = [...handle.pendingSteers];
|
|
30
|
+
handle.pendingSteers.length = 0;
|
|
31
|
+
for (const message of pending) void handle.session.steer(message).catch(() => {});
|
|
32
|
+
}
|
|
33
|
+
if (handle.pendingFollowUps.length && typeof handle.session.prompt === "function") {
|
|
34
|
+
const pending = [...handle.pendingFollowUps];
|
|
35
|
+
handle.pendingFollowUps.length = 0;
|
|
36
|
+
for (const message of pending) void handle.session.prompt(message, { source: "api", expandPromptTemplates: false }).catch(() => {});
|
|
37
|
+
}
|
|
38
|
+
return handle;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function updateLiveAgentStatus(agentId: string, status: CrewAgentRecord["status"]): void {
|
|
42
|
+
const handle = liveAgents.get(agentId);
|
|
43
|
+
if (!handle) return;
|
|
44
|
+
handle.status = status;
|
|
45
|
+
handle.updatedAt = new Date().toISOString();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function getLiveAgent(agentIdOrTaskId: string): LiveAgentHandle | undefined {
|
|
49
|
+
return liveAgents.get(agentIdOrTaskId) ?? [...liveAgents.values()].find((entry) => entry.taskId === agentIdOrTaskId);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function listLiveAgents(): LiveAgentHandle[] {
|
|
53
|
+
return [...liveAgents.values()].sort((a, b) => b.updatedAt.localeCompare(a.updatedAt));
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export async function steerLiveAgent(agentIdOrTaskId: string, message: string): Promise<LiveAgentHandle> {
|
|
57
|
+
const handle = getLiveAgent(agentIdOrTaskId);
|
|
58
|
+
if (!handle) throw new Error(`Live agent '${agentIdOrTaskId}' is not registered in this process.`);
|
|
59
|
+
if (typeof handle.session.steer !== "function") {
|
|
60
|
+
handle.pendingSteers.push(message);
|
|
61
|
+
return handle;
|
|
62
|
+
}
|
|
63
|
+
await handle.session.steer(message);
|
|
64
|
+
handle.updatedAt = new Date().toISOString();
|
|
65
|
+
return handle;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export async function followUpLiveAgent(agentIdOrTaskId: string, prompt: string): Promise<LiveAgentHandle> {
|
|
69
|
+
const handle = getLiveAgent(agentIdOrTaskId);
|
|
70
|
+
if (!handle) throw new Error(`Live agent '${agentIdOrTaskId}' is not registered in this process.`);
|
|
71
|
+
if (typeof handle.session.prompt !== "function") {
|
|
72
|
+
handle.pendingFollowUps.push(prompt);
|
|
73
|
+
return handle;
|
|
74
|
+
}
|
|
75
|
+
await handle.session.prompt(prompt, { source: "api", expandPromptTemplates: false });
|
|
76
|
+
handle.updatedAt = new Date().toISOString();
|
|
77
|
+
return handle;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export async function stopLiveAgent(agentIdOrTaskId: string): Promise<LiveAgentHandle> {
|
|
81
|
+
const handle = getLiveAgent(agentIdOrTaskId);
|
|
82
|
+
if (!handle) throw new Error(`Live agent '${agentIdOrTaskId}' is not registered in this process.`);
|
|
83
|
+
if (typeof handle.session.abort !== "function") throw new Error(`Live agent '${agentIdOrTaskId}' does not expose abort().`);
|
|
84
|
+
await handle.session.abort();
|
|
85
|
+
handle.status = "stopped";
|
|
86
|
+
handle.updatedAt = new Date().toISOString();
|
|
87
|
+
return handle;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export async function resumeLiveAgent(agentIdOrTaskId: string, prompt: string): Promise<LiveAgentHandle> {
|
|
91
|
+
const handle = getLiveAgent(agentIdOrTaskId);
|
|
92
|
+
if (!handle) throw new Error(`Live agent '${agentIdOrTaskId}' is not registered in this process.`);
|
|
93
|
+
if (typeof handle.session.prompt !== "function") throw new Error(`Live agent '${agentIdOrTaskId}' does not expose prompt().`);
|
|
94
|
+
handle.status = "running";
|
|
95
|
+
await handle.session.prompt(prompt, { source: "api", expandPromptTemplates: false });
|
|
96
|
+
handle.status = "completed";
|
|
97
|
+
handle.updatedAt = new Date().toISOString();
|
|
98
|
+
return handle;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export function clearLiveAgentsForTest(): void {
|
|
102
|
+
liveAgents.clear();
|
|
103
|
+
}
|