pi-crew 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. package/AGENTS.md +57 -32
  2. package/CHANGELOG.md +466 -448
  3. package/LICENSE +21 -21
  4. package/NOTICE.md +16 -16
  5. package/README.md +323 -323
  6. package/docs/FEATURE_INTAKE.md +126 -0
  7. package/docs/HARNESS.md +86 -0
  8. package/docs/HARNESS_BACKLOG.md +41 -0
  9. package/docs/TEST_MATRIX.md +49 -0
  10. package/docs/actions-reference.md +595 -595
  11. package/docs/architecture.md +180 -180
  12. package/docs/code-review-2026-05-11.md +592 -592
  13. package/docs/commands-reference.md +347 -347
  14. package/docs/comparison-pi-subagents-vs-pi-crew.md +303 -0
  15. package/docs/decisions/0001-durable-state.md +41 -0
  16. package/docs/decisions/0002-child-process-for-async.md +42 -0
  17. package/docs/decisions/0003-depth-guard.md +36 -0
  18. package/docs/decisions/0004-execfile-over-exec.md +34 -0
  19. package/docs/decisions/0005-no-parameter-properties.md +49 -0
  20. package/docs/decisions/0006-publish-bundled-esm.md +63 -0
  21. package/docs/decisions/0007-active-run-binary-index.md +54 -0
  22. package/docs/decisions/0008-child-pi-warm-pool.md +61 -0
  23. package/docs/decisions/README.md +23 -0
  24. package/docs/followup-review-round4-2026-05-13.md +107 -0
  25. package/docs/implementation-plan-top3.md +333 -0
  26. package/docs/live-mailbox-runtime.md +36 -36
  27. package/docs/next-upgrade-roadmap.md +808 -808
  28. package/docs/oh-my-pi-research.md +509 -0
  29. package/docs/perf/baseline-2026-05.md +113 -0
  30. package/docs/perf/final-report-2026-05.md +206 -0
  31. package/docs/perf/sprint-1-report.md +71 -0
  32. package/docs/perf/sprint-2-report.md +81 -0
  33. package/docs/perf/sprint-2.5-report.md +53 -0
  34. package/docs/perf/sprint-3-report.md +36 -0
  35. package/docs/perf/sprint-4-report.md +47 -0
  36. package/docs/perf/sprint-5-report.md +51 -0
  37. package/docs/perf/sprint-6-report.md +94 -0
  38. package/docs/perf/sprint-7-report.md +74 -0
  39. package/docs/perf/upgrade-plan-2026-05.md +147 -0
  40. package/docs/pi-subagents3-deep-analysis.md +508 -0
  41. package/docs/product/README.md +31 -0
  42. package/docs/product/platform.md +27 -0
  43. package/docs/product/runtime-safety.md +37 -0
  44. package/docs/product/team-run.md +39 -0
  45. package/docs/product/team-tool.md +37 -0
  46. package/docs/publishing.md +65 -65
  47. package/docs/resource-formats.md +134 -134
  48. package/docs/runtime-analysis-child-vs-live.md +171 -0
  49. package/docs/runtime-flow.md +148 -148
  50. package/docs/runtime-migration-in-process-analysis.md +250 -0
  51. package/docs/stories/README.md +30 -0
  52. package/docs/stories/backlog.md +36 -0
  53. package/docs/templates/decision.md +27 -0
  54. package/docs/templates/story.md +44 -0
  55. package/docs/templates/validation-report.md +32 -0
  56. package/docs/usage.md +238 -238
  57. package/index.ts +7 -6
  58. package/install.mjs +65 -65
  59. package/package.json +107 -100
  60. package/schema.json +222 -222
  61. package/skills/child-pi-spawning/SKILL.md +213 -0
  62. package/skills/context-artifact-hygiene/SKILL.md +32 -0
  63. package/skills/event-log-tracing/SKILL.md +299 -0
  64. package/skills/git-master/SKILL.md +225 -24
  65. package/skills/live-agent-lifecycle/SKILL.md +192 -0
  66. package/skills/mailbox-interactive/SKILL.md +300 -19
  67. package/skills/model-routing-context/SKILL.md +94 -0
  68. package/skills/multi-perspective-review/SKILL.md +88 -0
  69. package/skills/read-only-explorer/SKILL.md +250 -26
  70. package/skills/safe-bash/SKILL.md +307 -21
  71. package/skills/verification-before-done/SKILL.md +11 -2
  72. package/skills/widget-rendering/SKILL.md +258 -0
  73. package/skills/workspace-isolation/SKILL.md +202 -0
  74. package/skills/worktree-isolation/SKILL.md +202 -18
  75. package/src/adapters/claude-adapter.ts +25 -25
  76. package/src/adapters/codex-adapter.ts +21 -21
  77. package/src/adapters/cursor-adapter.ts +17 -17
  78. package/src/adapters/export-util.ts +137 -137
  79. package/src/adapters/index.ts +15 -15
  80. package/src/adapters/registry.ts +18 -18
  81. package/src/adapters/types.ts +23 -23
  82. package/src/agents/agent-config.ts +38 -38
  83. package/src/agents/agent-serializer.ts +38 -38
  84. package/src/agents/discover-agents.ts +121 -118
  85. package/src/config/config.ts +740 -858
  86. package/src/config/defaults.ts +96 -96
  87. package/src/config/drift-detector.ts +211 -211
  88. package/src/config/markers.ts +327 -327
  89. package/src/config/resilient-parser.ts +109 -108
  90. package/src/config/suggestions.ts +74 -74
  91. package/src/config/types.ts +199 -0
  92. package/src/extension/async-notifier.ts +123 -89
  93. package/src/extension/autonomous-policy.ts +169 -169
  94. package/src/extension/cross-extension-rpc.ts +104 -104
  95. package/src/extension/help.ts +47 -47
  96. package/src/extension/import-index.ts +69 -69
  97. package/src/extension/management.ts +395 -382
  98. package/src/extension/notification-router.ts +116 -116
  99. package/src/extension/notification-sink.ts +51 -51
  100. package/src/extension/project-init.ts +168 -168
  101. package/src/extension/register.ts +859 -668
  102. package/src/extension/registration/artifact-cleanup.ts +15 -15
  103. package/src/extension/registration/command-utils.ts +54 -54
  104. package/src/extension/registration/commands.ts +559 -452
  105. package/src/extension/registration/compaction-guard.ts +125 -125
  106. package/src/extension/registration/subagent-helpers.ts +102 -102
  107. package/src/extension/registration/subagent-tools.ts +220 -159
  108. package/src/extension/registration/team-tool.ts +159 -99
  109. package/src/extension/registration/viewers.ts +29 -0
  110. package/src/extension/result-watcher.ts +128 -128
  111. package/src/extension/run-bundle-schema.ts +89 -89
  112. package/src/extension/run-export.ts +73 -73
  113. package/src/extension/run-import.ts +84 -84
  114. package/src/extension/run-index.ts +94 -94
  115. package/src/extension/run-maintenance.ts +142 -142
  116. package/src/extension/session-summary.ts +8 -8
  117. package/src/extension/team-manager-command.ts +96 -96
  118. package/src/extension/team-recommendation.ts +188 -188
  119. package/src/extension/team-tool/api.ts +5 -2
  120. package/src/extension/team-tool/cancel.ts +224 -209
  121. package/src/extension/team-tool/config-patch.ts +36 -36
  122. package/src/extension/team-tool/context.ts +60 -60
  123. package/src/extension/team-tool/doctor.ts +242 -242
  124. package/src/extension/team-tool/handle-settings.ts +421 -195
  125. package/src/extension/team-tool/inspect.ts +41 -41
  126. package/src/extension/team-tool/lifecycle-actions.ts +139 -139
  127. package/src/extension/team-tool/parallel-dispatch.ts +156 -156
  128. package/src/extension/team-tool/plan.ts +19 -19
  129. package/src/extension/team-tool/respond.ts +112 -111
  130. package/src/extension/team-tool/run.ts +246 -229
  131. package/src/extension/team-tool/status.ts +110 -110
  132. package/src/extension/team-tool-types.ts +13 -13
  133. package/src/extension/team-tool.ts +344 -344
  134. package/src/extension/tool-result.ts +16 -16
  135. package/src/extension/validate-resources.ts +77 -77
  136. package/src/hooks/registry.ts +61 -61
  137. package/src/hooks/types.ts +40 -40
  138. package/src/i18n.ts +184 -184
  139. package/src/observability/correlation.ts +35 -35
  140. package/src/observability/event-to-metric.ts +68 -68
  141. package/src/observability/exporters/adapter.ts +30 -30
  142. package/src/observability/exporters/otlp-exporter.ts +106 -92
  143. package/src/observability/exporters/prometheus-exporter.ts +54 -54
  144. package/src/observability/metric-registry.ts +87 -87
  145. package/src/observability/metric-retention.ts +54 -54
  146. package/src/observability/metric-sink.ts +81 -56
  147. package/src/observability/metrics-primitives.ts +167 -167
  148. package/src/prompt/prompt-runtime.ts +72 -72
  149. package/src/runtime/adaptive-plan.ts +338 -0
  150. package/src/runtime/agent-control.ts +169 -169
  151. package/src/runtime/agent-memory.ts +72 -72
  152. package/src/runtime/agent-observability.ts +114 -114
  153. package/src/runtime/async-marker.ts +26 -26
  154. package/src/runtime/async-runner.ts +153 -153
  155. package/src/runtime/attention-events.ts +28 -28
  156. package/src/runtime/auto-resume.ts +100 -100
  157. package/src/runtime/background-runner.ts +122 -89
  158. package/src/runtime/cancellation.ts +61 -61
  159. package/src/runtime/capability-inventory.ts +116 -116
  160. package/src/runtime/child-pi-pool.ts +68 -0
  161. package/src/runtime/child-pi.ts +541 -461
  162. package/src/runtime/code-summary.ts +247 -247
  163. package/src/runtime/compaction-summary.ts +271 -271
  164. package/src/runtime/concurrency.ts +58 -58
  165. package/src/runtime/crash-recovery.ts +317 -301
  166. package/src/runtime/crew-agent-records.ts +379 -281
  167. package/src/runtime/crew-agent-runtime.ts +60 -60
  168. package/src/runtime/cross-extension-rpc.ts +72 -0
  169. package/src/runtime/custom-tools/irc-tool.ts +201 -201
  170. package/src/runtime/custom-tools/submit-result-tool.ts +90 -90
  171. package/src/runtime/deadletter.ts +47 -47
  172. package/src/runtime/delivery-coordinator.ts +176 -176
  173. package/src/runtime/delta-conflict.ts +360 -360
  174. package/src/runtime/diagnostic-export.ts +102 -102
  175. package/src/runtime/direct-run.ts +35 -35
  176. package/src/runtime/effectiveness.ts +82 -81
  177. package/src/runtime/errors/crew-errors.ts +166 -0
  178. package/src/runtime/event-stream-bridge.ts +92 -92
  179. package/src/runtime/foreground-control.ts +82 -82
  180. package/src/runtime/green-contract.ts +46 -46
  181. package/src/runtime/group-join.ts +234 -106
  182. package/src/runtime/heartbeat-watcher.ts +145 -124
  183. package/src/runtime/iteration-hooks.ts +267 -267
  184. package/src/runtime/live-agent-control.ts +88 -88
  185. package/src/runtime/live-agent-manager.ts +377 -179
  186. package/src/runtime/live-control-realtime.ts +36 -36
  187. package/src/runtime/live-session-runtime.ts +676 -600
  188. package/src/runtime/loop-gates.ts +129 -129
  189. package/src/runtime/manifest-cache.ts +263 -263
  190. package/src/runtime/mcp-proxy.ts +113 -113
  191. package/src/runtime/metric-parser.ts +40 -40
  192. package/src/runtime/model-fallback.ts +282 -274
  193. package/src/runtime/model-resolver.ts +118 -0
  194. package/src/runtime/output-validator.ts +187 -187
  195. package/src/runtime/overflow-recovery.ts +175 -175
  196. package/src/runtime/parallel-research.ts +44 -44
  197. package/src/runtime/parallel-utils.ts +156 -156
  198. package/src/runtime/parent-guard.ts +80 -80
  199. package/src/runtime/phase-progress.ts +217 -217
  200. package/src/runtime/pi-args.ts +165 -165
  201. package/src/runtime/pi-json-output.ts +111 -111
  202. package/src/runtime/pi-spawn.ts +167 -167
  203. package/src/runtime/policy-engine.ts +79 -79
  204. package/src/runtime/post-checks.ts +125 -125
  205. package/src/runtime/post-exit-stdio-guard.ts +86 -86
  206. package/src/runtime/process-status.ts +97 -73
  207. package/src/runtime/progress-event-coalescer.ts +43 -43
  208. package/src/runtime/recovery-recipes.ts +74 -74
  209. package/src/runtime/retry-executor.ts +81 -81
  210. package/src/runtime/role-permission.ts +39 -39
  211. package/src/runtime/run-tracker.ts +99 -0
  212. package/src/runtime/runtime-policy.ts +21 -0
  213. package/src/runtime/runtime-resolver.ts +94 -91
  214. package/src/runtime/scheduler.ts +294 -0
  215. package/src/runtime/semaphore.ts +131 -131
  216. package/src/runtime/sensitive-paths.ts +92 -92
  217. package/src/runtime/session-usage.ts +79 -79
  218. package/src/runtime/settings-store.ts +103 -0
  219. package/src/runtime/sidechain-output.ts +29 -29
  220. package/src/runtime/skill-instructions.ts +222 -222
  221. package/src/runtime/stale-reconciler.ts +198 -189
  222. package/src/runtime/streaming-output.ts +47 -0
  223. package/src/runtime/subagent-manager.ts +404 -400
  224. package/src/runtime/subprocess-tool-registry.ts +67 -67
  225. package/src/runtime/task-display.ts +38 -38
  226. package/src/runtime/task-graph-scheduler.ts +122 -122
  227. package/src/runtime/task-graph.ts +207 -207
  228. package/src/runtime/task-output-context.ts +177 -177
  229. package/src/runtime/task-packet.ts +93 -93
  230. package/src/runtime/task-quality.ts +207 -207
  231. package/src/runtime/task-runner/capabilities.ts +78 -78
  232. package/src/runtime/task-runner/live-executor.ts +131 -113
  233. package/src/runtime/task-runner/progress.ts +119 -119
  234. package/src/runtime/task-runner/prompt-builder.ts +139 -139
  235. package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
  236. package/src/runtime/task-runner/result-utils.ts +14 -14
  237. package/src/runtime/task-runner/run-projection.ts +103 -103
  238. package/src/runtime/task-runner/state-helpers.ts +22 -22
  239. package/src/runtime/task-runner.ts +469 -459
  240. package/src/runtime/team-runner.ts +693 -945
  241. package/src/runtime/usage-tracker.ts +71 -0
  242. package/src/runtime/worker-heartbeat.ts +21 -21
  243. package/src/runtime/worker-startup.ts +57 -57
  244. package/src/runtime/workflow-state.ts +187 -187
  245. package/src/runtime/yield-handler.ts +190 -190
  246. package/src/schema/config-schema.ts +172 -168
  247. package/src/schema/team-tool-schema.ts +126 -126
  248. package/src/schema/validation-types.ts +151 -148
  249. package/src/skills/discover-skills.ts +67 -67
  250. package/src/skills/skill-templates.ts +374 -374
  251. package/src/state/active-run-registry.ts +227 -191
  252. package/src/state/artifact-store.ts +130 -129
  253. package/src/state/atomic-write.ts +262 -195
  254. package/src/state/blob-store.ts +116 -116
  255. package/src/state/contracts.ts +111 -111
  256. package/src/state/event-log-rotation.ts +161 -158
  257. package/src/state/event-log.ts +383 -303
  258. package/src/state/event-reconstructor.ts +217 -217
  259. package/src/state/jsonl-writer.ts +82 -82
  260. package/src/state/locks.ts +146 -146
  261. package/src/state/mailbox.ts +446 -405
  262. package/src/state/state-store.ts +364 -351
  263. package/src/state/task-claims.ts +44 -44
  264. package/src/state/types.ts +285 -285
  265. package/src/state/usage.ts +29 -29
  266. package/src/subagents/async-entry.ts +1 -1
  267. package/src/subagents/index.ts +3 -3
  268. package/src/subagents/live/control.ts +1 -1
  269. package/src/subagents/live/manager.ts +1 -1
  270. package/src/subagents/live/realtime.ts +1 -1
  271. package/src/subagents/live/session-runtime.ts +1 -1
  272. package/src/subagents/manager.ts +1 -1
  273. package/src/subagents/spawn.ts +1 -1
  274. package/src/teams/discover-teams.ts +116 -116
  275. package/src/teams/team-config.ts +27 -27
  276. package/src/teams/team-serializer.ts +38 -38
  277. package/src/types/diff.d.ts +18 -18
  278. package/src/ui/agent-management-overlay.ts +144 -144
  279. package/src/ui/crew-widget.ts +487 -370
  280. package/src/ui/dashboard-panes/agents-pane.ts +109 -28
  281. package/src/ui/dashboard-panes/cancellation-pane.ts +42 -42
  282. package/src/ui/dashboard-panes/capability-pane.ts +59 -59
  283. package/src/ui/dashboard-panes/health-pane.ts +30 -30
  284. package/src/ui/dashboard-panes/mailbox-pane.ts +35 -35
  285. package/src/ui/dashboard-panes/progress-pane.ts +30 -30
  286. package/src/ui/dashboard-panes/transcript-pane.ts +10 -10
  287. package/src/ui/heartbeat-aggregator.ts +63 -63
  288. package/src/ui/keybinding-map.ts +97 -94
  289. package/src/ui/live-conversation-overlay.ts +152 -0
  290. package/src/ui/live-run-sidebar.ts +180 -180
  291. package/src/ui/mascot.ts +442 -442
  292. package/src/ui/overlays/agent-picker-overlay.ts +57 -57
  293. package/src/ui/overlays/confirm-overlay.ts +58 -58
  294. package/src/ui/overlays/mailbox-compose-overlay.ts +144 -144
  295. package/src/ui/overlays/mailbox-compose-preview.ts +63 -63
  296. package/src/ui/overlays/mailbox-detail-overlay.ts +122 -122
  297. package/src/ui/pi-ui-compat.ts +57 -57
  298. package/src/ui/powerbar-publisher.ts +221 -197
  299. package/src/ui/render-scheduler.ts +216 -143
  300. package/src/ui/run-action-dispatcher.ts +118 -118
  301. package/src/ui/run-dashboard.ts +526 -464
  302. package/src/ui/run-event-bus.ts +208 -208
  303. package/src/ui/run-snapshot-cache.ts +826 -777
  304. package/src/ui/settings-overlay.ts +721 -0
  305. package/src/ui/snapshot-types.ts +86 -70
  306. package/src/ui/theme-adapter.ts +190 -190
  307. package/src/ui/tool-progress-formatter.ts +89 -0
  308. package/src/ui/transcript-cache.ts +94 -94
  309. package/src/ui/transcript-viewer.ts +335 -335
  310. package/src/utils/conflict-detect.ts +662 -0
  311. package/src/utils/file-coalescer.ts +86 -86
  312. package/src/utils/frontmatter.ts +68 -68
  313. package/src/utils/fs-watch.ts +88 -31
  314. package/src/utils/gh-protocol.ts +479 -0
  315. package/src/utils/ids.ts +17 -17
  316. package/src/utils/incremental-reader.ts +104 -104
  317. package/src/utils/internal-error.ts +6 -6
  318. package/src/utils/names.ts +27 -27
  319. package/src/utils/paths.ts +102 -63
  320. package/src/utils/redaction.ts +44 -44
  321. package/src/utils/safe-paths.ts +47 -47
  322. package/src/utils/scan-cache.ts +136 -136
  323. package/src/utils/sse-parser.ts +134 -134
  324. package/src/utils/task-name-generator.ts +337 -337
  325. package/src/utils/timings.ts +33 -33
  326. package/src/utils/visual.ts +243 -198
  327. package/src/workflows/discover-workflows.ts +139 -139
  328. package/src/workflows/validate-workflow.ts +40 -40
  329. package/src/workflows/workflow-config.ts +26 -26
  330. package/src/workflows/workflow-serializer.ts +32 -32
  331. package/src/worktree/branch-freshness.ts +45 -45
  332. package/src/worktree/cleanup.ts +75 -75
  333. package/src/worktree/worktree-manager.ts +188 -188
  334. package/teams/default.team.md +12 -12
  335. package/teams/fast-fix.team.md +11 -11
  336. package/teams/implementation.team.md +18 -18
  337. package/teams/parallel-research.team.md +14 -14
  338. package/teams/research.team.md +11 -11
  339. package/teams/review.team.md +12 -12
  340. package/tsconfig.json +19 -19
  341. package/workflows/default.workflow.md +30 -30
  342. package/workflows/fast-fix.workflow.md +23 -23
  343. package/workflows/implementation.workflow.md +43 -43
  344. package/workflows/parallel-research.workflow.md +46 -46
  345. package/workflows/research.workflow.md +22 -22
  346. package/workflows/review.workflow.md +30 -30
  347. package/skills/task-packet/SKILL.md +0 -28
  348. package/skills/verify-evidence/SKILL.md +0 -27
@@ -1,106 +1,234 @@
1
- import type { CrewRuntimeConfig } from "../config/config.ts";
2
- import { writeArtifact } from "../state/artifact-store.ts";
3
- import { appendEvent } from "../state/event-log.ts";
4
- import { appendMailboxMessage, findMailboxMessageByRequestId, readDeliveryState } from "../state/mailbox.ts";
5
- import type { ArtifactDescriptor, TeamRunManifest, TeamTaskState } from "../state/types.ts";
6
- import { aggregateTaskOutputs } from "./task-output-context.ts";
7
-
8
- export type CrewGroupJoinMode = "off" | "group" | "smart";
9
-
10
- export interface CrewGroupJoinDelivery {
11
- batchId: string;
12
- mode: CrewGroupJoinMode;
13
- partial: boolean;
14
- taskIds: string[];
15
- completed: string[];
16
- failed: string[];
17
- skipped: string[];
18
- remaining: string[];
19
- artifact?: ArtifactDescriptor;
20
- messageId?: string;
21
- requestId?: string;
22
- ackRequired?: boolean;
23
- ackStatus?: "pending" | "acknowledged";
24
- }
25
-
26
- export function resolveGroupJoinMode(runtime?: CrewRuntimeConfig): CrewGroupJoinMode {
27
- return runtime?.groupJoin ?? "smart";
28
- }
29
-
30
- export function shouldGroupJoin(mode: CrewGroupJoinMode, batch: TeamTaskState[]): boolean {
31
- if (mode === "off") return false;
32
- if (mode === "group") return batch.length > 0;
33
- return batch.length > 1;
34
- }
35
-
36
- function batchIdFor(runId: string, taskIds: string[]): string {
37
- return `${runId}_${taskIds.join("+").replace(/[^a-zA-Z0-9_+-]/g, "_")}`;
38
- }
39
-
40
- function requestIdFor(runId: string, batchId: string, partial: boolean): string {
41
- return `${runId}:group-join:${partial ? "partial" : "completed"}:${batchId}`;
42
- }
43
-
44
- function statusList(tasks: TeamTaskState[], status: TeamTaskState["status"]): string[] {
45
- return tasks.filter((task) => task.status === status).map((task) => task.id);
46
- }
47
-
48
- export function deliverGroupJoin(input: {
49
- manifest: TeamRunManifest;
50
- mode: CrewGroupJoinMode;
51
- batch: TeamTaskState[];
52
- allTasks: TeamTaskState[];
53
- partial?: boolean;
54
- }): CrewGroupJoinDelivery | undefined {
55
- if (!shouldGroupJoin(input.mode, input.batch)) return undefined;
56
- const taskIds = input.batch.map((task) => task.id);
57
- const latest = taskIds.map((id) => input.allTasks.find((task) => task.id === id)).filter((task): task is TeamTaskState => Boolean(task));
58
- const completed = statusList(latest, "completed");
59
- const failed = statusList(latest, "failed");
60
- const skipped = statusList(latest, "skipped");
61
- const remaining = latest.filter((task) => task.status === "queued" || task.status === "running").map((task) => task.id);
62
- const partial = input.partial ?? remaining.length > 0;
63
- const batchId = batchIdFor(input.manifest.runId, taskIds);
64
- const summary = aggregateTaskOutputs(latest, input.manifest);
65
- const requestId = requestIdFor(input.manifest.runId, batchId, partial);
66
- const existingMailbox = findMailboxMessageByRequestId(input.manifest, requestId);
67
- const existingStatus = existingMailbox ? readDeliveryState(input.manifest).messages[existingMailbox.id] ?? existingMailbox.status : undefined;
68
- const delivery: CrewGroupJoinDelivery = { batchId, mode: input.mode, partial, taskIds, completed, failed, skipped, remaining, requestId, ackRequired: true, ackStatus: existingStatus === "acknowledged" ? "acknowledged" : "pending" };
69
- const content = `${JSON.stringify({ ...delivery, createdAt: new Date().toISOString() }, null, 2)}\n`;
70
- const artifact = writeArtifact(input.manifest.artifactsRoot, {
71
- kind: "metadata",
72
- relativePath: `metadata/group-joins/${batchId}.json`,
73
- producer: "group-join",
74
- content,
75
- });
76
- const mailbox = existingMailbox ?? appendMailboxMessage(input.manifest, {
77
- direction: "outbox",
78
- from: "group-join",
79
- to: "leader",
80
- body: [
81
- `Group join ${partial ? "partial" : "completed"}: ${taskIds.join(", ")}`,
82
- `Request: ${requestId}`,
83
- `Completed: ${completed.join(", ") || "none"}`,
84
- `Failed: ${failed.join(", ") || "none"}`,
85
- `Skipped: ${skipped.join(", ") || "none"}`,
86
- `Remaining: ${remaining.join(", ") || "none"}`,
87
- "",
88
- summary,
89
- ].join("\n"),
90
- status: "delivered",
91
- data: { kind: "group_join", requestId, batchId, partial, ackRequired: true, taskIds, completed, failed, skipped, remaining },
92
- });
93
- appendEvent(input.manifest.eventsPath, {
94
- type: partial ? "agent.group_join.partial" : "agent.group_join.completed",
95
- runId: input.manifest.runId,
96
- message: `Group join ${partial ? "partial" : "completed"} for ${taskIds.length} task(s).`,
97
- data: { ...delivery, artifactPath: artifact.path, messageId: mailbox.id, fallback: "mailbox-delivered", reused: Boolean(existingMailbox) },
98
- });
99
- if (existingMailbox) appendEvent(input.manifest.eventsPath, {
100
- type: "agent.group_join.delivery_reused",
101
- runId: input.manifest.runId,
102
- message: `Reused group join mailbox delivery for ${taskIds.length} task(s).`,
103
- data: { requestId, messageId: mailbox.id, batchId, partial },
104
- });
105
- return { ...delivery, artifact, messageId: mailbox.id };
106
- }
1
+ import type { CrewRuntimeConfig } from "../config/config.ts";
2
+ import { writeArtifact } from "../state/artifact-store.ts";
3
+ import { appendEvent } from "../state/event-log.ts";
4
+ import { appendMailboxMessage, findMailboxMessageByRequestId, readDeliveryState } from "../state/mailbox.ts";
5
+ import type { ArtifactDescriptor, TeamRunManifest, TeamTaskState } from "../state/types.ts";
6
+ import { aggregateTaskOutputs } from "./task-output-context.ts";
7
+
8
+ export type CrewGroupJoinMode = "off" | "group" | "smart";
9
+
10
+ export interface CrewGroupJoinDelivery {
11
+ batchId: string;
12
+ mode: CrewGroupJoinMode;
13
+ partial: boolean;
14
+ taskIds: string[];
15
+ completed: string[];
16
+ failed: string[];
17
+ skipped: string[];
18
+ remaining: string[];
19
+ artifact?: ArtifactDescriptor;
20
+ messageId?: string;
21
+ requestId?: string;
22
+ ackRequired?: boolean;
23
+ ackStatus?: "pending" | "acknowledged";
24
+ }
25
+
26
+ export function resolveGroupJoinMode(runtime?: CrewRuntimeConfig): CrewGroupJoinMode {
27
+ return runtime?.groupJoin ?? "smart";
28
+ }
29
+
30
+ export function shouldGroupJoin(mode: CrewGroupJoinMode, batch: TeamTaskState[]): boolean {
31
+ if (mode === "off") return false;
32
+ if (mode === "group") return batch.length > 0;
33
+ return batch.length > 1;
34
+ }
35
+
36
+ function batchIdFor(runId: string, taskIds: string[]): string {
37
+ return `${runId}_${taskIds.join("+").replace(/[^a-zA-Z0-9_+-]/g, "_")}`;
38
+ }
39
+
40
+ function requestIdFor(runId: string, batchId: string, partial: boolean): string {
41
+ return `${runId}:group-join:${partial ? "partial" : "completed"}:${batchId}`;
42
+ }
43
+
44
+ function statusList(tasks: TeamTaskState[], status: TeamTaskState["status"]): string[] {
45
+ return tasks.filter((task) => task.status === status).map((task) => task.id);
46
+ }
47
+
48
+ export function deliverGroupJoin(input: {
49
+ manifest: TeamRunManifest;
50
+ mode: CrewGroupJoinMode;
51
+ batch: TeamTaskState[];
52
+ allTasks: TeamTaskState[];
53
+ partial?: boolean;
54
+ }): CrewGroupJoinDelivery | undefined {
55
+ if (!shouldGroupJoin(input.mode, input.batch)) return undefined;
56
+ const taskIds = input.batch.map((task) => task.id);
57
+ const latest = taskIds.map((id) => input.allTasks.find((task) => task.id === id)).filter((task): task is TeamTaskState => Boolean(task));
58
+ const completed = statusList(latest, "completed");
59
+ const failed = statusList(latest, "failed");
60
+ const skipped = statusList(latest, "skipped");
61
+ const remaining = latest.filter((task) => task.status === "queued" || task.status === "running").map((task) => task.id);
62
+ const partial = input.partial ?? remaining.length > 0;
63
+ const batchId = batchIdFor(input.manifest.runId, taskIds);
64
+ const summary = aggregateTaskOutputs(latest, input.manifest);
65
+ const requestId = requestIdFor(input.manifest.runId, batchId, partial);
66
+ const existingMailbox = findMailboxMessageByRequestId(input.manifest, requestId);
67
+ const existingStatus = existingMailbox ? readDeliveryState(input.manifest).messages[existingMailbox.id] ?? existingMailbox.status : undefined;
68
+ const delivery: CrewGroupJoinDelivery = { batchId, mode: input.mode, partial, taskIds, completed, failed, skipped, remaining, requestId, ackRequired: true, ackStatus: existingStatus === "acknowledged" ? "acknowledged" : "pending" };
69
+ const content = `${JSON.stringify({ ...delivery, createdAt: new Date().toISOString() }, null, 2)}\n`;
70
+ const artifact = writeArtifact(input.manifest.artifactsRoot, {
71
+ kind: "metadata",
72
+ relativePath: `metadata/group-joins/${batchId}.json`,
73
+ producer: "group-join",
74
+ content,
75
+ });
76
+ const mailbox = existingMailbox ?? appendMailboxMessage(input.manifest, {
77
+ direction: "outbox",
78
+ from: "group-join",
79
+ to: "leader",
80
+ body: [
81
+ `Group join ${partial ? "partial" : "completed"}: ${taskIds.join(", ")}`,
82
+ `Request: ${requestId}`,
83
+ `Completed: ${completed.join(", ") || "none"}`,
84
+ `Failed: ${failed.join(", ") || "none"}`,
85
+ `Skipped: ${skipped.join(", ") || "none"}`,
86
+ `Remaining: ${remaining.join(", ") || "none"}`,
87
+ "",
88
+ summary,
89
+ ].join("\n"),
90
+ status: "delivered",
91
+ data: { kind: "group_join", requestId, batchId, partial, ackRequired: true, taskIds, completed, failed, skipped, remaining },
92
+ });
93
+ appendEvent(input.manifest.eventsPath, {
94
+ type: partial ? "agent.group_join.partial" : "agent.group_join.completed",
95
+ runId: input.manifest.runId,
96
+ message: `Group join ${partial ? "partial" : "completed"} for ${taskIds.length} task(s).`,
97
+ data: { ...delivery, artifactPath: artifact.path, messageId: mailbox.id, fallback: "mailbox-delivered", reused: Boolean(existingMailbox) },
98
+ });
99
+ if (existingMailbox) appendEvent(input.manifest.eventsPath, {
100
+ type: "agent.group_join.delivery_reused",
101
+ runId: input.manifest.runId,
102
+ message: `Reused group join mailbox delivery for ${taskIds.length} task(s).`,
103
+ data: { requestId, messageId: mailbox.id, batchId, partial },
104
+ });
105
+ return { ...delivery, artifact, messageId: mailbox.id };
106
+ }
107
+
108
+ import type { CrewAgentRecord } from "./crew-agent-runtime.ts";
109
+
110
+ export type JoinMode = "async" | "group" | "smart";
111
+ export type DeliveryCallback = (records: CrewAgentRecord[], partial: boolean) => void;
112
+
113
+ interface AgentGroup {
114
+ groupId: string;
115
+ agentIds: Set<string>;
116
+ completedRecords: Map<string, CrewAgentRecord>;
117
+ timeoutHandle?: ReturnType<typeof setTimeout>;
118
+ delivered: boolean;
119
+ isStraggler: boolean;
120
+ }
121
+
122
+ const DEFAULT_TIMEOUT = 30_000;
123
+ const STRAGGLER_TIMEOUT = 15_000;
124
+
125
+ export class GroupJoinManager {
126
+ private groups = new Map<string, AgentGroup>();
127
+ private agentToGroup = new Map<string, string>();
128
+
129
+ private deliverCb: DeliveryCallback;
130
+ private groupTimeout: number;
131
+
132
+ constructor(
133
+ deliverCb: DeliveryCallback,
134
+ groupTimeout = DEFAULT_TIMEOUT,
135
+ ) {
136
+ this.deliverCb = deliverCb;
137
+ this.groupTimeout = groupTimeout;
138
+ }
139
+
140
+ registerGroup(groupId: string, agentIds: string[]): void {
141
+ const group: AgentGroup = {
142
+ groupId,
143
+ agentIds: new Set(agentIds),
144
+ completedRecords: new Map(),
145
+ delivered: false,
146
+ isStraggler: false,
147
+ };
148
+ this.groups.set(groupId, group);
149
+ for (const id of agentIds) {
150
+ this.agentToGroup.set(id, groupId);
151
+ }
152
+ }
153
+
154
+ onAgentComplete(record: CrewAgentRecord): "delivered" | "held" | "pass" {
155
+ const groupId = this.agentToGroup.get(record.taskId);
156
+ if (!groupId) return "pass";
157
+
158
+ const group = this.groups.get(groupId);
159
+ if (!group || group.delivered) return "pass";
160
+
161
+ group.completedRecords.set(record.taskId, record);
162
+
163
+ if (group.completedRecords.size >= group.agentIds.size) {
164
+ this.deliver(group, false);
165
+ return "delivered";
166
+ }
167
+
168
+ if (!group.timeoutHandle) {
169
+ const timeout = group.isStraggler ? STRAGGLER_TIMEOUT : this.groupTimeout;
170
+ group.timeoutHandle = setTimeout(() => {
171
+ this.onTimeout(group);
172
+ }, timeout);
173
+ }
174
+
175
+ return "held";
176
+ }
177
+
178
+ private onTimeout(group: AgentGroup): void {
179
+ if (group.delivered) return;
180
+ group.timeoutHandle = undefined;
181
+
182
+ const remaining = new Set<string>();
183
+ for (const id of group.agentIds) {
184
+ if (!group.completedRecords.has(id)) remaining.add(id);
185
+ }
186
+
187
+ for (const id of group.completedRecords.keys()) {
188
+ this.agentToGroup.delete(id);
189
+ }
190
+
191
+ this.deliverCb([...group.completedRecords.values()], true);
192
+
193
+ group.completedRecords.clear();
194
+ group.agentIds = remaining;
195
+ group.isStraggler = true;
196
+
197
+ // Re-arm timer for remaining stragglers so they aren't silently abandoned
198
+ if (remaining.size > 0) {
199
+ group.timeoutHandle = setTimeout(() => this.onTimeout(group), STRAGGLER_TIMEOUT);
200
+ group.timeoutHandle.unref();
201
+ }
202
+ }
203
+
204
+ private deliver(group: AgentGroup, partial: boolean): void {
205
+ if (group.timeoutHandle) {
206
+ clearTimeout(group.timeoutHandle);
207
+ group.timeoutHandle = undefined;
208
+ }
209
+ group.delivered = true;
210
+ this.deliverCb([...group.completedRecords.values()], partial);
211
+ this.cleanupGroup(group.groupId);
212
+ }
213
+
214
+ private cleanupGroup(groupId: string): void {
215
+ const group = this.groups.get(groupId);
216
+ if (!group) return;
217
+ for (const id of group.agentIds) {
218
+ this.agentToGroup.delete(id);
219
+ }
220
+ this.groups.delete(groupId);
221
+ }
222
+
223
+ isGrouped(agentId: string): boolean {
224
+ return this.agentToGroup.has(agentId);
225
+ }
226
+
227
+ dispose(): void {
228
+ for (const group of this.groups.values()) {
229
+ if (group.timeoutHandle) clearTimeout(group.timeoutHandle);
230
+ }
231
+ this.groups.clear();
232
+ this.agentToGroup.clear();
233
+ }
234
+ }
@@ -1,124 +1,145 @@
1
- import type { NotificationDescriptor } from "../extension/notification-router.ts";
2
- import type { MetricRegistry } from "../observability/metric-registry.ts";
3
- import { appendEvent } from "../state/event-log.ts";
4
- import { loadRunManifestById } from "../state/state-store.ts";
5
- import type { TeamRunManifest } from "../state/types.ts";
6
- import { logInternalError } from "../utils/internal-error.ts";
7
- import type { ManifestCache } from "./manifest-cache.ts";
8
- import { classifyHeartbeat, DEFAULT_GRADIENT_THRESHOLDS, heartbeatAgeMs, type GradientThresholds, type HeartbeatLevel } from "./heartbeat-gradient.ts";
9
-
10
- export interface HeartbeatWatcherRouter {
11
- enqueue(notification: NotificationDescriptor): boolean;
12
- }
13
-
14
- export interface HeartbeatWatcherOptions {
15
- cwd: string;
16
- pollIntervalMs?: number;
17
- thresholds?: GradientThresholds;
18
- manifestCache: ManifestCache;
19
- registry: MetricRegistry;
20
- router: HeartbeatWatcherRouter;
21
- deadletterTickThreshold?: number;
22
- onDead?: (runId: string, taskId: string, elapsed: number) => void;
23
- onDeadletterTrigger?: (manifest: TeamRunManifest, taskId: string) => void;
24
- }
25
-
26
- /**
27
- * Polls running runs for heartbeat staleness.
28
- *
29
- * Uses recursive setTimeout to avoid timer storms.
30
- * Cleanup is done in the same pass — no second scan over manifests.
31
- * Keys for runs that disappear from the cache are cleaned via staleness-age policy
32
- * rather than being leaked forever.
33
- */
34
- export class HeartbeatWatcher {
35
- private timer?: ReturnType<typeof setTimeout>;
36
- private lastLevel = new Map<string, HeartbeatLevel>();
37
- private consecutiveDead = new Map<string, number>();
38
- private lastSeen = new Map<string, number>(); // key last time it was active
39
- /** Max age (ms) to retain a stale key before garbage-collecting it. */
40
- private readonly maxKeyAgeMs = 600_000; // 10 minutes
41
- private readonly opts: HeartbeatWatcherOptions;
42
-
43
- constructor(opts: HeartbeatWatcherOptions) {
44
- this.opts = opts;
45
- }
46
-
47
- start(): void {
48
- this.dispose();
49
- this.scheduleTick();
50
- }
51
-
52
- private scheduleTick(): void {
53
- this.timer = setTimeout(() => this.tick(), this.opts.pollIntervalMs ?? 5000);
54
- this.timer.unref();
55
- }
56
-
57
- tick(now = Date.now()): void {
58
- try {
59
- this.tickUnsafe(now);
60
- } catch (error) {
61
- logInternalError("heartbeat-watcher.tick", error);
62
- } finally {
63
- this.scheduleTick();
64
- }
65
- }
66
-
67
- private tickUnsafe(now: number): void {
68
- const thresholds = this.opts.thresholds ?? DEFAULT_GRADIENT_THRESHOLDS;
69
- const tickThreshold = this.opts.deadletterTickThreshold ?? 3;
70
- const activeKeys = new Set<string>();
71
-
72
- for (const run of this.opts.manifestCache.list(50)) {
73
- if (run.status !== "running") continue;
74
- const loaded = loadRunManifestById(this.opts.cwd, run.runId);
75
- if (!loaded) continue;
76
- for (const task of loaded.tasks) {
77
- if (task.status !== "running") continue;
78
- const key = `${run.runId}:${task.id}`;
79
- activeKeys.add(key);
80
- this.lastSeen.set(key, now);
81
-
82
- const elapsed = heartbeatAgeMs(task.heartbeat, now);
83
- const level = classifyHeartbeat(task.heartbeat, thresholds, now);
84
- this.opts.registry.gauge("crew.heartbeat.staleness_ms", "Heartbeat elapsed since last seen, milliseconds").set({ runId: run.runId, taskId: task.id }, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
85
- this.opts.registry.counter("crew.heartbeat.level_total", "Heartbeat classifications by level").inc({ runId: run.runId, level });
86
- const previous = this.lastLevel.get(key);
87
- this.lastLevel.set(key, level);
88
- if (level === "dead" && previous !== "dead") {
89
- this.opts.registry.counter("crew.heartbeat.dead_total", "Dead heartbeat detections").inc({ runId: run.runId });
90
- appendEvent(loaded.manifest.eventsPath, { type: "crew.task.heartbeat_dead", runId: run.runId, taskId: task.id, message: `Task ${task.id} heartbeat dead.`, data: { elapsedMs: Number.isFinite(elapsed) ? elapsed : undefined } });
91
- this.opts.router.enqueue({ id: `dead_${run.runId}_${task.id}`, severity: "warning", source: "heartbeat-watcher", runId: run.runId, title: `Task ${task.id} heartbeat dead`, body: "Background watcher detected a stuck worker." });
92
- this.opts.onDead?.(run.runId, task.id, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
93
- }
94
- if (level === "dead") {
95
- const count = (this.consecutiveDead.get(key) ?? 0) + 1;
96
- this.consecutiveDead.set(key, count);
97
- if (count === tickThreshold) this.opts.onDeadletterTrigger?.(loaded.manifest, task.id);
98
- } else {
99
- this.consecutiveDead.delete(key);
100
- }
101
- }
102
- }
103
-
104
- // Cleanup: drop keys that were NOT in this tick's active set AND
105
- // haven't been seen for > maxKeyAgeMs. This covers runs that
106
- // completed or fell out of the manifest cache's top-50 window.
107
- const cutoff = now - this.maxKeyAgeMs;
108
- for (const [key, ts] of this.lastSeen) {
109
- if (!activeKeys.has(key) && ts < cutoff) {
110
- this.lastLevel.delete(key);
111
- this.consecutiveDead.delete(key);
112
- this.lastSeen.delete(key);
113
- }
114
- }
115
- }
116
-
117
- dispose(): void {
118
- if (this.timer) clearTimeout(this.timer);
119
- this.timer = undefined;
120
- this.lastLevel.clear();
121
- this.consecutiveDead.clear();
122
- this.lastSeen.clear();
123
- }
124
- }
1
+ import type { NotificationDescriptor } from "../extension/notification-router.ts";
2
+ import type { MetricRegistry } from "../observability/metric-registry.ts";
3
+ import { appendEvent } from "../state/event-log.ts";
4
+ import { loadRunManifestById } from "../state/state-store.ts";
5
+ import type { TeamRunManifest } from "../state/types.ts";
6
+ import { logInternalError } from "../utils/internal-error.ts";
7
+ import type { ManifestCache } from "./manifest-cache.ts";
8
+ import { classifyHeartbeat, DEFAULT_GRADIENT_THRESHOLDS, heartbeatAgeMs, type GradientThresholds, type HeartbeatLevel } from "./heartbeat-gradient.ts";
9
+
10
+ export interface HeartbeatWatcherRouter {
11
+ enqueue(notification: NotificationDescriptor): boolean;
12
+ }
13
+
14
+ export interface HeartbeatWatcherOptions {
15
+ cwd: string;
16
+ pollIntervalMs?: number;
17
+ thresholds?: GradientThresholds;
18
+ manifestCache: ManifestCache;
19
+ registry: MetricRegistry;
20
+ router: HeartbeatWatcherRouter;
21
+ deadletterTickThreshold?: number;
22
+ /**
23
+ * 3.6 minimum interval between repeated deadletter triggers for the same
24
+ * runId+taskId. Without this, a flaky worker (dead → alive → dead) can
25
+ * fire deadletter entries faster than the operator can respond. Default
26
+ * 60_000 ms.
27
+ */
28
+ deadletterCooldownMs?: number;
29
+ onDead?: (runId: string, taskId: string, elapsed: number) => void;
30
+ onDeadletterTrigger?: (manifest: TeamRunManifest, taskId: string) => void;
31
+ }
32
+
33
+ /**
34
+ * Polls running runs for heartbeat staleness.
35
+ *
36
+ * Uses recursive setTimeout to avoid timer storms.
37
+ * Cleanup is done in the same pass — no second scan over manifests.
38
+ * Keys for runs that disappear from the cache are cleaned via staleness-age policy
39
+ * rather than being leaked forever.
40
+ */
41
+ export class HeartbeatWatcher {
42
+ private timer?: ReturnType<typeof setTimeout>;
43
+ private lastLevel = new Map<string, HeartbeatLevel>();
44
+ private consecutiveDead = new Map<string, number>();
45
+ private lastSeen = new Map<string, number>(); // key → last time it was active
46
+ private lastDeadletterTriggerAt = new Map<string, number>(); // 3.6 cooldown gate
47
+ /** Max age (ms) to retain a stale key before garbage-collecting it. */
48
+ private readonly maxKeyAgeMs = 600_000; // 10 minutes
49
+ private readonly opts: HeartbeatWatcherOptions;
50
+
51
+ constructor(opts: HeartbeatWatcherOptions) {
52
+ this.opts = opts;
53
+ }
54
+
55
+ start(): void {
56
+ this.dispose();
57
+ this.scheduleTick();
58
+ }
59
+
60
+ private scheduleTick(): void {
61
+ // 3.2 — when at least one run has a dead-streak in progress, poll faster
62
+ // (1s) so operators get notified quickly. Healthy state stays at the
63
+ // configured interval (default 5s) to keep idle CPU near zero.
64
+ const baseInterval = this.opts.pollIntervalMs ?? 5000;
65
+ const interval = this.consecutiveDead.size > 0 ? Math.min(1000, baseInterval) : baseInterval;
66
+ this.timer = setTimeout(() => this.tick(), interval);
67
+ this.timer.unref();
68
+ }
69
+
70
+ tick(now = Date.now()): void {
71
+ try {
72
+ this.tickUnsafe(now);
73
+ } catch (error) {
74
+ logInternalError("heartbeat-watcher.tick", error);
75
+ } finally {
76
+ this.scheduleTick();
77
+ }
78
+ }
79
+
80
+ private tickUnsafe(now: number): void {
81
+ const thresholds = this.opts.thresholds ?? DEFAULT_GRADIENT_THRESHOLDS;
82
+ const tickThreshold = this.opts.deadletterTickThreshold ?? 3;
83
+ const activeKeys = new Set<string>();
84
+
85
+ for (const run of this.opts.manifestCache.list(50)) {
86
+ if (run.status !== "running") continue;
87
+ const loaded = loadRunManifestById(this.opts.cwd, run.runId);
88
+ if (!loaded) continue;
89
+ for (const task of loaded.tasks) {
90
+ if (task.status !== "running") continue;
91
+ const key = `${run.runId}:${task.id}`;
92
+ activeKeys.add(key);
93
+ this.lastSeen.set(key, now);
94
+
95
+ const elapsed = heartbeatAgeMs(task.heartbeat, now);
96
+ const level = classifyHeartbeat(task.heartbeat, thresholds, now);
97
+ this.opts.registry.gauge("crew.heartbeat.staleness_ms", "Heartbeat elapsed since last seen, milliseconds").set({ runId: run.runId, taskId: task.id }, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
98
+ this.opts.registry.counter("crew.heartbeat.level_total", "Heartbeat classifications by level").inc({ runId: run.runId, level });
99
+ const previous = this.lastLevel.get(key);
100
+ this.lastLevel.set(key, level);
101
+ if (level === "dead" && previous !== "dead") {
102
+ this.opts.registry.counter("crew.heartbeat.dead_total", "Dead heartbeat detections").inc({ runId: run.runId });
103
+ appendEvent(loaded.manifest.eventsPath, { type: "crew.task.heartbeat_dead", runId: run.runId, taskId: task.id, message: `Task ${task.id} heartbeat dead.`, data: { elapsedMs: Number.isFinite(elapsed) ? elapsed : undefined } });
104
+ this.opts.router.enqueue({ id: `dead_${run.runId}_${task.id}`, severity: "warning", source: "heartbeat-watcher", runId: run.runId, title: `Task ${task.id} heartbeat dead`, body: "Background watcher detected a stuck worker." });
105
+ this.opts.onDead?.(run.runId, task.id, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
106
+ }
107
+ if (level === "dead") {
108
+ const count = (this.consecutiveDead.get(key) ?? 0) + 1;
109
+ this.consecutiveDead.set(key, count);
110
+ if (count === tickThreshold) {
111
+ // 3.6 cooldown gate
112
+ const cooldown = this.opts.deadletterCooldownMs ?? 60_000;
113
+ const lastTrigger = this.lastDeadletterTriggerAt.get(key) ?? 0;
114
+ if (now - lastTrigger >= cooldown) {
115
+ this.lastDeadletterTriggerAt.set(key, now);
116
+ this.opts.onDeadletterTrigger?.(loaded.manifest, task.id);
117
+ }
118
+ }
119
+ } else {
120
+ this.consecutiveDead.delete(key);
121
+ }
122
+ }
123
+ }
124
+
125
+ // Cleanup: drop keys that were NOT in this tick's active set AND
126
+ // haven't been seen for > maxKeyAgeMs. This covers runs that
127
+ // completed or fell out of the manifest cache's top-50 window.
128
+ const cutoff = now - this.maxKeyAgeMs;
129
+ for (const [key, ts] of this.lastSeen) {
130
+ if (!activeKeys.has(key) && ts < cutoff) {
131
+ this.lastLevel.delete(key);
132
+ this.consecutiveDead.delete(key);
133
+ this.lastSeen.delete(key);
134
+ }
135
+ }
136
+ }
137
+
138
+ dispose(): void {
139
+ if (this.timer) clearTimeout(this.timer);
140
+ this.timer = undefined;
141
+ this.lastLevel.clear();
142
+ this.consecutiveDead.clear();
143
+ this.lastSeen.clear();
144
+ }
145
+ }