pi-crew 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. package/AGENTS.md +57 -32
  2. package/CHANGELOG.md +466 -413
  3. package/LICENSE +21 -21
  4. package/NOTICE.md +16 -16
  5. package/README.md +323 -323
  6. package/docs/FEATURE_INTAKE.md +126 -0
  7. package/docs/HARNESS.md +86 -0
  8. package/docs/HARNESS_BACKLOG.md +41 -0
  9. package/docs/TEST_MATRIX.md +49 -0
  10. package/docs/actions-reference.md +595 -595
  11. package/docs/architecture.md +180 -180
  12. package/docs/code-review-2026-05-11.md +592 -0
  13. package/docs/commands-reference.md +347 -347
  14. package/docs/comparison-pi-subagents-vs-pi-crew.md +303 -0
  15. package/docs/decisions/0001-durable-state.md +41 -0
  16. package/docs/decisions/0002-child-process-for-async.md +42 -0
  17. package/docs/decisions/0003-depth-guard.md +36 -0
  18. package/docs/decisions/0004-execfile-over-exec.md +34 -0
  19. package/docs/decisions/0005-no-parameter-properties.md +49 -0
  20. package/docs/decisions/0006-publish-bundled-esm.md +63 -0
  21. package/docs/decisions/0007-active-run-binary-index.md +54 -0
  22. package/docs/decisions/0008-child-pi-warm-pool.md +61 -0
  23. package/docs/decisions/README.md +23 -0
  24. package/docs/followup-plan-2026-05-12.md +463 -0
  25. package/docs/followup-review-2026-05-12.md +297 -0
  26. package/docs/followup-review-round3-2026-05-12.md +342 -0
  27. package/docs/followup-review-round4-2026-05-13.md +107 -0
  28. package/docs/implementation-plan-top3.md +333 -0
  29. package/docs/live-mailbox-runtime.md +36 -36
  30. package/docs/next-upgrade-roadmap.md +808 -808
  31. package/docs/oh-my-pi-research.md +509 -0
  32. package/docs/perf/baseline-2026-05.md +113 -0
  33. package/docs/perf/final-report-2026-05.md +206 -0
  34. package/docs/perf/sprint-1-report.md +71 -0
  35. package/docs/perf/sprint-2-report.md +81 -0
  36. package/docs/perf/sprint-2.5-report.md +53 -0
  37. package/docs/perf/sprint-3-report.md +36 -0
  38. package/docs/perf/sprint-4-report.md +47 -0
  39. package/docs/perf/sprint-5-report.md +51 -0
  40. package/docs/perf/sprint-6-report.md +94 -0
  41. package/docs/perf/sprint-7-report.md +74 -0
  42. package/docs/perf/upgrade-plan-2026-05.md +147 -0
  43. package/docs/pi-subagents3-deep-analysis.md +508 -0
  44. package/docs/product/README.md +31 -0
  45. package/docs/product/platform.md +27 -0
  46. package/docs/product/runtime-safety.md +37 -0
  47. package/docs/product/team-run.md +39 -0
  48. package/docs/product/team-tool.md +37 -0
  49. package/docs/publishing.md +65 -65
  50. package/docs/resource-formats.md +134 -134
  51. package/docs/runtime-analysis-child-vs-live.md +171 -0
  52. package/docs/runtime-flow.md +148 -148
  53. package/docs/runtime-migration-in-process-analysis.md +250 -0
  54. package/docs/stories/README.md +30 -0
  55. package/docs/stories/backlog.md +36 -0
  56. package/docs/templates/decision.md +27 -0
  57. package/docs/templates/story.md +44 -0
  58. package/docs/templates/validation-report.md +32 -0
  59. package/docs/usage.md +238 -238
  60. package/index.ts +7 -6
  61. package/install.mjs +65 -65
  62. package/package.json +107 -99
  63. package/schema.json +222 -222
  64. package/skills/child-pi-spawning/SKILL.md +213 -0
  65. package/skills/context-artifact-hygiene/SKILL.md +32 -0
  66. package/skills/event-log-tracing/SKILL.md +299 -0
  67. package/skills/git-master/SKILL.md +225 -24
  68. package/skills/live-agent-lifecycle/SKILL.md +192 -0
  69. package/skills/mailbox-interactive/SKILL.md +300 -19
  70. package/skills/model-routing-context/SKILL.md +94 -0
  71. package/skills/multi-perspective-review/SKILL.md +88 -0
  72. package/skills/read-only-explorer/SKILL.md +250 -26
  73. package/skills/safe-bash/SKILL.md +307 -21
  74. package/skills/verification-before-done/SKILL.md +11 -2
  75. package/skills/widget-rendering/SKILL.md +258 -0
  76. package/skills/workspace-isolation/SKILL.md +202 -0
  77. package/skills/worktree-isolation/SKILL.md +202 -18
  78. package/src/adapters/claude-adapter.ts +25 -25
  79. package/src/adapters/codex-adapter.ts +21 -21
  80. package/src/adapters/cursor-adapter.ts +17 -17
  81. package/src/adapters/export-util.ts +137 -137
  82. package/src/adapters/index.ts +15 -15
  83. package/src/adapters/registry.ts +18 -18
  84. package/src/adapters/types.ts +23 -23
  85. package/src/agents/agent-config.ts +38 -38
  86. package/src/agents/agent-serializer.ts +38 -38
  87. package/src/agents/discover-agents.ts +121 -118
  88. package/src/config/config.ts +740 -858
  89. package/src/config/defaults.ts +96 -96
  90. package/src/config/drift-detector.ts +211 -211
  91. package/src/config/markers.ts +327 -327
  92. package/src/config/resilient-parser.ts +109 -108
  93. package/src/config/suggestions.ts +74 -74
  94. package/src/config/types.ts +199 -0
  95. package/src/extension/async-notifier.ts +123 -89
  96. package/src/extension/autonomous-policy.ts +169 -169
  97. package/src/extension/cross-extension-rpc.ts +104 -103
  98. package/src/extension/help.ts +47 -47
  99. package/src/extension/import-index.ts +69 -69
  100. package/src/extension/management.ts +395 -382
  101. package/src/extension/notification-router.ts +116 -116
  102. package/src/extension/notification-sink.ts +51 -51
  103. package/src/extension/project-init.ts +168 -168
  104. package/src/extension/register.ts +859 -668
  105. package/src/extension/registration/artifact-cleanup.ts +15 -15
  106. package/src/extension/registration/command-utils.ts +54 -54
  107. package/src/extension/registration/commands.ts +559 -452
  108. package/src/extension/registration/compaction-guard.ts +125 -125
  109. package/src/extension/registration/subagent-helpers.ts +102 -102
  110. package/src/extension/registration/subagent-tools.ts +220 -158
  111. package/src/extension/registration/team-tool.ts +159 -98
  112. package/src/extension/registration/viewers.ts +29 -0
  113. package/src/extension/result-watcher.ts +128 -128
  114. package/src/extension/run-bundle-schema.ts +89 -89
  115. package/src/extension/run-export.ts +73 -73
  116. package/src/extension/run-import.ts +84 -84
  117. package/src/extension/run-index.ts +94 -94
  118. package/src/extension/run-maintenance.ts +142 -142
  119. package/src/extension/session-summary.ts +8 -8
  120. package/src/extension/team-manager-command.ts +96 -95
  121. package/src/extension/team-recommendation.ts +188 -188
  122. package/src/extension/team-tool/api.ts +5 -2
  123. package/src/extension/team-tool/cancel.ts +224 -209
  124. package/src/extension/team-tool/config-patch.ts +36 -36
  125. package/src/extension/team-tool/context.ts +60 -60
  126. package/src/extension/team-tool/doctor.ts +242 -242
  127. package/src/extension/team-tool/handle-settings.ts +421 -195
  128. package/src/extension/team-tool/inspect.ts +41 -41
  129. package/src/extension/team-tool/lifecycle-actions.ts +139 -139
  130. package/src/extension/team-tool/parallel-dispatch.ts +156 -156
  131. package/src/extension/team-tool/plan.ts +19 -19
  132. package/src/extension/team-tool/respond.ts +112 -111
  133. package/src/extension/team-tool/run.ts +246 -228
  134. package/src/extension/team-tool/status.ts +110 -110
  135. package/src/extension/team-tool-types.ts +13 -13
  136. package/src/extension/team-tool.ts +16 -4
  137. package/src/extension/tool-result.ts +16 -16
  138. package/src/extension/validate-resources.ts +77 -77
  139. package/src/hooks/registry.ts +61 -61
  140. package/src/hooks/types.ts +40 -40
  141. package/src/i18n.ts +184 -184
  142. package/src/observability/correlation.ts +35 -35
  143. package/src/observability/event-to-metric.ts +68 -68
  144. package/src/observability/exporters/adapter.ts +30 -30
  145. package/src/observability/exporters/otlp-exporter.ts +106 -92
  146. package/src/observability/exporters/prometheus-exporter.ts +54 -54
  147. package/src/observability/metric-registry.ts +87 -87
  148. package/src/observability/metric-retention.ts +54 -54
  149. package/src/observability/metric-sink.ts +81 -56
  150. package/src/observability/metrics-primitives.ts +167 -167
  151. package/src/prompt/prompt-runtime.ts +72 -72
  152. package/src/runtime/adaptive-plan.ts +338 -0
  153. package/src/runtime/agent-control.ts +169 -169
  154. package/src/runtime/agent-memory.ts +72 -72
  155. package/src/runtime/agent-observability.ts +114 -114
  156. package/src/runtime/async-marker.ts +26 -26
  157. package/src/runtime/async-runner.ts +153 -79
  158. package/src/runtime/attention-events.ts +28 -28
  159. package/src/runtime/auto-resume.ts +100 -100
  160. package/src/runtime/background-runner.ts +122 -88
  161. package/src/runtime/cancellation.ts +61 -61
  162. package/src/runtime/capability-inventory.ts +116 -116
  163. package/src/runtime/child-pi-pool.ts +68 -0
  164. package/src/runtime/child-pi.ts +541 -463
  165. package/src/runtime/code-summary.ts +247 -247
  166. package/src/runtime/compaction-summary.ts +271 -271
  167. package/src/runtime/concurrency.ts +58 -58
  168. package/src/runtime/crash-recovery.ts +317 -301
  169. package/src/runtime/crew-agent-records.ts +379 -281
  170. package/src/runtime/crew-agent-runtime.ts +60 -60
  171. package/src/runtime/cross-extension-rpc.ts +72 -0
  172. package/src/runtime/custom-tools/irc-tool.ts +201 -201
  173. package/src/runtime/custom-tools/submit-result-tool.ts +90 -90
  174. package/src/runtime/deadletter.ts +47 -47
  175. package/src/runtime/delivery-coordinator.ts +176 -176
  176. package/src/runtime/delta-conflict.ts +360 -360
  177. package/src/runtime/diagnostic-export.ts +102 -102
  178. package/src/runtime/direct-run.ts +35 -35
  179. package/src/runtime/effectiveness.ts +82 -81
  180. package/src/runtime/errors/crew-errors.ts +166 -0
  181. package/src/runtime/event-stream-bridge.ts +92 -92
  182. package/src/runtime/foreground-control.ts +82 -82
  183. package/src/runtime/green-contract.ts +46 -46
  184. package/src/runtime/group-join.ts +234 -106
  185. package/src/runtime/heartbeat-watcher.ts +145 -124
  186. package/src/runtime/iteration-hooks.ts +267 -264
  187. package/src/runtime/live-agent-control.ts +88 -88
  188. package/src/runtime/live-agent-manager.ts +377 -179
  189. package/src/runtime/live-control-realtime.ts +36 -36
  190. package/src/runtime/live-session-runtime.ts +676 -599
  191. package/src/runtime/loop-gates.ts +129 -129
  192. package/src/runtime/manifest-cache.ts +263 -263
  193. package/src/runtime/mcp-proxy.ts +113 -113
  194. package/src/runtime/metric-parser.ts +40 -40
  195. package/src/runtime/model-fallback.ts +282 -274
  196. package/src/runtime/model-resolver.ts +118 -0
  197. package/src/runtime/output-validator.ts +187 -187
  198. package/src/runtime/overflow-recovery.ts +175 -175
  199. package/src/runtime/parallel-research.ts +44 -44
  200. package/src/runtime/parallel-utils.ts +156 -156
  201. package/src/runtime/parent-guard.ts +80 -80
  202. package/src/runtime/phase-progress.ts +217 -217
  203. package/src/runtime/pi-args.ts +165 -165
  204. package/src/runtime/pi-json-output.ts +111 -111
  205. package/src/runtime/pi-spawn.ts +167 -167
  206. package/src/runtime/policy-engine.ts +79 -79
  207. package/src/runtime/post-checks.ts +125 -122
  208. package/src/runtime/post-exit-stdio-guard.ts +86 -86
  209. package/src/runtime/process-status.ts +97 -73
  210. package/src/runtime/progress-event-coalescer.ts +43 -43
  211. package/src/runtime/recovery-recipes.ts +74 -74
  212. package/src/runtime/retry-executor.ts +81 -81
  213. package/src/runtime/role-permission.ts +39 -39
  214. package/src/runtime/run-tracker.ts +99 -0
  215. package/src/runtime/runtime-policy.ts +21 -0
  216. package/src/runtime/runtime-resolver.ts +94 -90
  217. package/src/runtime/scheduler.ts +294 -0
  218. package/src/runtime/semaphore.ts +131 -131
  219. package/src/runtime/sensitive-paths.ts +92 -92
  220. package/src/runtime/session-usage.ts +79 -79
  221. package/src/runtime/settings-store.ts +103 -0
  222. package/src/runtime/sidechain-output.ts +29 -29
  223. package/src/runtime/skill-instructions.ts +222 -222
  224. package/src/runtime/stale-reconciler.ts +198 -189
  225. package/src/runtime/streaming-output.ts +47 -0
  226. package/src/runtime/subagent-manager.ts +404 -395
  227. package/src/runtime/subprocess-tool-registry.ts +67 -67
  228. package/src/runtime/task-display.ts +38 -38
  229. package/src/runtime/task-graph-scheduler.ts +122 -122
  230. package/src/runtime/task-graph.ts +207 -207
  231. package/src/runtime/task-output-context.ts +177 -177
  232. package/src/runtime/task-packet.ts +93 -93
  233. package/src/runtime/task-quality.ts +207 -207
  234. package/src/runtime/task-runner/capabilities.ts +78 -78
  235. package/src/runtime/task-runner/live-executor.ts +131 -113
  236. package/src/runtime/task-runner/progress.ts +119 -119
  237. package/src/runtime/task-runner/prompt-builder.ts +139 -139
  238. package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
  239. package/src/runtime/task-runner/result-utils.ts +14 -14
  240. package/src/runtime/task-runner/run-projection.ts +103 -103
  241. package/src/runtime/task-runner/state-helpers.ts +22 -22
  242. package/src/runtime/task-runner.ts +469 -458
  243. package/src/runtime/team-runner.ts +693 -945
  244. package/src/runtime/usage-tracker.ts +71 -0
  245. package/src/runtime/worker-heartbeat.ts +21 -21
  246. package/src/runtime/worker-startup.ts +57 -57
  247. package/src/runtime/workflow-state.ts +187 -187
  248. package/src/runtime/yield-handler.ts +190 -189
  249. package/src/schema/config-schema.ts +172 -168
  250. package/src/schema/team-tool-schema.ts +126 -125
  251. package/src/schema/validation-types.ts +151 -148
  252. package/src/skills/discover-skills.ts +67 -67
  253. package/src/skills/skill-templates.ts +374 -374
  254. package/src/state/active-run-registry.ts +227 -191
  255. package/src/state/artifact-store.ts +130 -129
  256. package/src/state/atomic-write.ts +262 -178
  257. package/src/state/blob-store.ts +116 -116
  258. package/src/state/contracts.ts +111 -111
  259. package/src/state/event-log-rotation.ts +161 -158
  260. package/src/state/event-log.ts +383 -240
  261. package/src/state/event-reconstructor.ts +217 -217
  262. package/src/state/jsonl-writer.ts +82 -82
  263. package/src/state/locks.ts +146 -148
  264. package/src/state/mailbox.ts +446 -405
  265. package/src/state/state-store.ts +364 -351
  266. package/src/state/task-claims.ts +44 -44
  267. package/src/state/types.ts +285 -285
  268. package/src/state/usage.ts +29 -29
  269. package/src/subagents/async-entry.ts +1 -1
  270. package/src/subagents/index.ts +3 -3
  271. package/src/subagents/live/control.ts +1 -1
  272. package/src/subagents/live/manager.ts +1 -1
  273. package/src/subagents/live/realtime.ts +1 -1
  274. package/src/subagents/live/session-runtime.ts +1 -1
  275. package/src/subagents/manager.ts +1 -1
  276. package/src/subagents/spawn.ts +1 -1
  277. package/src/teams/discover-teams.ts +116 -116
  278. package/src/teams/team-config.ts +27 -27
  279. package/src/teams/team-serializer.ts +38 -38
  280. package/src/types/diff.d.ts +18 -18
  281. package/src/ui/agent-management-overlay.ts +144 -144
  282. package/src/ui/crew-widget.ts +487 -370
  283. package/src/ui/dashboard-panes/agents-pane.ts +109 -28
  284. package/src/ui/dashboard-panes/cancellation-pane.ts +42 -42
  285. package/src/ui/dashboard-panes/capability-pane.ts +59 -59
  286. package/src/ui/dashboard-panes/health-pane.ts +30 -30
  287. package/src/ui/dashboard-panes/mailbox-pane.ts +35 -35
  288. package/src/ui/dashboard-panes/progress-pane.ts +30 -30
  289. package/src/ui/dashboard-panes/transcript-pane.ts +10 -10
  290. package/src/ui/heartbeat-aggregator.ts +63 -63
  291. package/src/ui/keybinding-map.ts +97 -94
  292. package/src/ui/live-conversation-overlay.ts +152 -0
  293. package/src/ui/live-run-sidebar.ts +180 -180
  294. package/src/ui/mascot.ts +442 -442
  295. package/src/ui/overlays/agent-picker-overlay.ts +57 -57
  296. package/src/ui/overlays/confirm-overlay.ts +58 -58
  297. package/src/ui/overlays/mailbox-compose-overlay.ts +144 -144
  298. package/src/ui/overlays/mailbox-compose-preview.ts +63 -63
  299. package/src/ui/overlays/mailbox-detail-overlay.ts +122 -122
  300. package/src/ui/pi-ui-compat.ts +57 -57
  301. package/src/ui/powerbar-publisher.ts +221 -197
  302. package/src/ui/render-scheduler.ts +216 -143
  303. package/src/ui/run-action-dispatcher.ts +118 -117
  304. package/src/ui/run-dashboard.ts +526 -464
  305. package/src/ui/run-event-bus.ts +208 -208
  306. package/src/ui/run-snapshot-cache.ts +826 -777
  307. package/src/ui/settings-overlay.ts +721 -0
  308. package/src/ui/snapshot-types.ts +86 -70
  309. package/src/ui/theme-adapter.ts +190 -190
  310. package/src/ui/tool-progress-formatter.ts +89 -0
  311. package/src/ui/transcript-cache.ts +94 -94
  312. package/src/ui/transcript-viewer.ts +335 -335
  313. package/src/utils/conflict-detect.ts +662 -0
  314. package/src/utils/env-filter.ts +30 -0
  315. package/src/utils/file-coalescer.ts +86 -86
  316. package/src/utils/frontmatter.ts +68 -68
  317. package/src/utils/fs-watch.ts +88 -31
  318. package/src/utils/gh-protocol.ts +479 -0
  319. package/src/utils/ids.ts +17 -17
  320. package/src/utils/incremental-reader.ts +104 -104
  321. package/src/utils/internal-error.ts +6 -6
  322. package/src/utils/names.ts +27 -27
  323. package/src/utils/paths.ts +102 -63
  324. package/src/utils/redaction.ts +44 -44
  325. package/src/utils/resolve-shell.ts +34 -0
  326. package/src/utils/safe-paths.ts +47 -47
  327. package/src/utils/scan-cache.ts +136 -136
  328. package/src/utils/sleep.ts +2 -1
  329. package/src/utils/sse-parser.ts +134 -134
  330. package/src/utils/task-name-generator.ts +337 -337
  331. package/src/utils/timings.ts +33 -33
  332. package/src/utils/visual.ts +243 -198
  333. package/src/workflows/discover-workflows.ts +139 -139
  334. package/src/workflows/validate-workflow.ts +40 -40
  335. package/src/workflows/workflow-config.ts +26 -26
  336. package/src/workflows/workflow-serializer.ts +32 -32
  337. package/src/worktree/branch-freshness.ts +45 -45
  338. package/src/worktree/cleanup.ts +75 -72
  339. package/src/worktree/worktree-manager.ts +188 -146
  340. package/teams/default.team.md +12 -12
  341. package/teams/fast-fix.team.md +11 -11
  342. package/teams/implementation.team.md +18 -18
  343. package/teams/parallel-research.team.md +14 -14
  344. package/teams/research.team.md +11 -11
  345. package/teams/review.team.md +12 -12
  346. package/tsconfig.json +19 -19
  347. package/workflows/default.workflow.md +30 -30
  348. package/workflows/fast-fix.workflow.md +23 -23
  349. package/workflows/implementation.workflow.md +43 -43
  350. package/workflows/parallel-research.workflow.md +46 -46
  351. package/workflows/research.workflow.md +22 -22
  352. package/workflows/review.workflow.md +30 -30
  353. package/skills/task-packet/SKILL.md +0 -28
  354. package/skills/verify-evidence/SKILL.md +0 -27
@@ -1,106 +1,234 @@
1
- import type { CrewRuntimeConfig } from "../config/config.ts";
2
- import { writeArtifact } from "../state/artifact-store.ts";
3
- import { appendEvent } from "../state/event-log.ts";
4
- import { appendMailboxMessage, findMailboxMessageByRequestId, readDeliveryState } from "../state/mailbox.ts";
5
- import type { ArtifactDescriptor, TeamRunManifest, TeamTaskState } from "../state/types.ts";
6
- import { aggregateTaskOutputs } from "./task-output-context.ts";
7
-
8
- export type CrewGroupJoinMode = "off" | "group" | "smart";
9
-
10
- export interface CrewGroupJoinDelivery {
11
- batchId: string;
12
- mode: CrewGroupJoinMode;
13
- partial: boolean;
14
- taskIds: string[];
15
- completed: string[];
16
- failed: string[];
17
- skipped: string[];
18
- remaining: string[];
19
- artifact?: ArtifactDescriptor;
20
- messageId?: string;
21
- requestId?: string;
22
- ackRequired?: boolean;
23
- ackStatus?: "pending" | "acknowledged";
24
- }
25
-
26
- export function resolveGroupJoinMode(runtime?: CrewRuntimeConfig): CrewGroupJoinMode {
27
- return runtime?.groupJoin ?? "smart";
28
- }
29
-
30
- export function shouldGroupJoin(mode: CrewGroupJoinMode, batch: TeamTaskState[]): boolean {
31
- if (mode === "off") return false;
32
- if (mode === "group") return batch.length > 0;
33
- return batch.length > 1;
34
- }
35
-
36
- function batchIdFor(runId: string, taskIds: string[]): string {
37
- return `${runId}_${taskIds.join("+").replace(/[^a-zA-Z0-9_+-]/g, "_")}`;
38
- }
39
-
40
- function requestIdFor(runId: string, batchId: string, partial: boolean): string {
41
- return `${runId}:group-join:${partial ? "partial" : "completed"}:${batchId}`;
42
- }
43
-
44
- function statusList(tasks: TeamTaskState[], status: TeamTaskState["status"]): string[] {
45
- return tasks.filter((task) => task.status === status).map((task) => task.id);
46
- }
47
-
48
- export function deliverGroupJoin(input: {
49
- manifest: TeamRunManifest;
50
- mode: CrewGroupJoinMode;
51
- batch: TeamTaskState[];
52
- allTasks: TeamTaskState[];
53
- partial?: boolean;
54
- }): CrewGroupJoinDelivery | undefined {
55
- if (!shouldGroupJoin(input.mode, input.batch)) return undefined;
56
- const taskIds = input.batch.map((task) => task.id);
57
- const latest = taskIds.map((id) => input.allTasks.find((task) => task.id === id)).filter((task): task is TeamTaskState => Boolean(task));
58
- const completed = statusList(latest, "completed");
59
- const failed = statusList(latest, "failed");
60
- const skipped = statusList(latest, "skipped");
61
- const remaining = latest.filter((task) => task.status === "queued" || task.status === "running").map((task) => task.id);
62
- const partial = input.partial ?? remaining.length > 0;
63
- const batchId = batchIdFor(input.manifest.runId, taskIds);
64
- const summary = aggregateTaskOutputs(latest, input.manifest);
65
- const requestId = requestIdFor(input.manifest.runId, batchId, partial);
66
- const existingMailbox = findMailboxMessageByRequestId(input.manifest, requestId);
67
- const existingStatus = existingMailbox ? readDeliveryState(input.manifest).messages[existingMailbox.id] ?? existingMailbox.status : undefined;
68
- const delivery: CrewGroupJoinDelivery = { batchId, mode: input.mode, partial, taskIds, completed, failed, skipped, remaining, requestId, ackRequired: true, ackStatus: existingStatus === "acknowledged" ? "acknowledged" : "pending" };
69
- const content = `${JSON.stringify({ ...delivery, createdAt: new Date().toISOString() }, null, 2)}\n`;
70
- const artifact = writeArtifact(input.manifest.artifactsRoot, {
71
- kind: "metadata",
72
- relativePath: `metadata/group-joins/${batchId}.json`,
73
- producer: "group-join",
74
- content,
75
- });
76
- const mailbox = existingMailbox ?? appendMailboxMessage(input.manifest, {
77
- direction: "outbox",
78
- from: "group-join",
79
- to: "leader",
80
- body: [
81
- `Group join ${partial ? "partial" : "completed"}: ${taskIds.join(", ")}`,
82
- `Request: ${requestId}`,
83
- `Completed: ${completed.join(", ") || "none"}`,
84
- `Failed: ${failed.join(", ") || "none"}`,
85
- `Skipped: ${skipped.join(", ") || "none"}`,
86
- `Remaining: ${remaining.join(", ") || "none"}`,
87
- "",
88
- summary,
89
- ].join("\n"),
90
- status: "delivered",
91
- data: { kind: "group_join", requestId, batchId, partial, ackRequired: true, taskIds, completed, failed, skipped, remaining },
92
- });
93
- appendEvent(input.manifest.eventsPath, {
94
- type: partial ? "agent.group_join.partial" : "agent.group_join.completed",
95
- runId: input.manifest.runId,
96
- message: `Group join ${partial ? "partial" : "completed"} for ${taskIds.length} task(s).`,
97
- data: { ...delivery, artifactPath: artifact.path, messageId: mailbox.id, fallback: "mailbox-delivered", reused: Boolean(existingMailbox) },
98
- });
99
- if (existingMailbox) appendEvent(input.manifest.eventsPath, {
100
- type: "agent.group_join.delivery_reused",
101
- runId: input.manifest.runId,
102
- message: `Reused group join mailbox delivery for ${taskIds.length} task(s).`,
103
- data: { requestId, messageId: mailbox.id, batchId, partial },
104
- });
105
- return { ...delivery, artifact, messageId: mailbox.id };
106
- }
1
+ import type { CrewRuntimeConfig } from "../config/config.ts";
2
+ import { writeArtifact } from "../state/artifact-store.ts";
3
+ import { appendEvent } from "../state/event-log.ts";
4
+ import { appendMailboxMessage, findMailboxMessageByRequestId, readDeliveryState } from "../state/mailbox.ts";
5
+ import type { ArtifactDescriptor, TeamRunManifest, TeamTaskState } from "../state/types.ts";
6
+ import { aggregateTaskOutputs } from "./task-output-context.ts";
7
+
8
+ export type CrewGroupJoinMode = "off" | "group" | "smart";
9
+
10
+ export interface CrewGroupJoinDelivery {
11
+ batchId: string;
12
+ mode: CrewGroupJoinMode;
13
+ partial: boolean;
14
+ taskIds: string[];
15
+ completed: string[];
16
+ failed: string[];
17
+ skipped: string[];
18
+ remaining: string[];
19
+ artifact?: ArtifactDescriptor;
20
+ messageId?: string;
21
+ requestId?: string;
22
+ ackRequired?: boolean;
23
+ ackStatus?: "pending" | "acknowledged";
24
+ }
25
+
26
+ export function resolveGroupJoinMode(runtime?: CrewRuntimeConfig): CrewGroupJoinMode {
27
+ return runtime?.groupJoin ?? "smart";
28
+ }
29
+
30
+ export function shouldGroupJoin(mode: CrewGroupJoinMode, batch: TeamTaskState[]): boolean {
31
+ if (mode === "off") return false;
32
+ if (mode === "group") return batch.length > 0;
33
+ return batch.length > 1;
34
+ }
35
+
36
+ function batchIdFor(runId: string, taskIds: string[]): string {
37
+ return `${runId}_${taskIds.join("+").replace(/[^a-zA-Z0-9_+-]/g, "_")}`;
38
+ }
39
+
40
+ function requestIdFor(runId: string, batchId: string, partial: boolean): string {
41
+ return `${runId}:group-join:${partial ? "partial" : "completed"}:${batchId}`;
42
+ }
43
+
44
+ function statusList(tasks: TeamTaskState[], status: TeamTaskState["status"]): string[] {
45
+ return tasks.filter((task) => task.status === status).map((task) => task.id);
46
+ }
47
+
48
+ export function deliverGroupJoin(input: {
49
+ manifest: TeamRunManifest;
50
+ mode: CrewGroupJoinMode;
51
+ batch: TeamTaskState[];
52
+ allTasks: TeamTaskState[];
53
+ partial?: boolean;
54
+ }): CrewGroupJoinDelivery | undefined {
55
+ if (!shouldGroupJoin(input.mode, input.batch)) return undefined;
56
+ const taskIds = input.batch.map((task) => task.id);
57
+ const latest = taskIds.map((id) => input.allTasks.find((task) => task.id === id)).filter((task): task is TeamTaskState => Boolean(task));
58
+ const completed = statusList(latest, "completed");
59
+ const failed = statusList(latest, "failed");
60
+ const skipped = statusList(latest, "skipped");
61
+ const remaining = latest.filter((task) => task.status === "queued" || task.status === "running").map((task) => task.id);
62
+ const partial = input.partial ?? remaining.length > 0;
63
+ const batchId = batchIdFor(input.manifest.runId, taskIds);
64
+ const summary = aggregateTaskOutputs(latest, input.manifest);
65
+ const requestId = requestIdFor(input.manifest.runId, batchId, partial);
66
+ const existingMailbox = findMailboxMessageByRequestId(input.manifest, requestId);
67
+ const existingStatus = existingMailbox ? readDeliveryState(input.manifest).messages[existingMailbox.id] ?? existingMailbox.status : undefined;
68
+ const delivery: CrewGroupJoinDelivery = { batchId, mode: input.mode, partial, taskIds, completed, failed, skipped, remaining, requestId, ackRequired: true, ackStatus: existingStatus === "acknowledged" ? "acknowledged" : "pending" };
69
+ const content = `${JSON.stringify({ ...delivery, createdAt: new Date().toISOString() }, null, 2)}\n`;
70
+ const artifact = writeArtifact(input.manifest.artifactsRoot, {
71
+ kind: "metadata",
72
+ relativePath: `metadata/group-joins/${batchId}.json`,
73
+ producer: "group-join",
74
+ content,
75
+ });
76
+ const mailbox = existingMailbox ?? appendMailboxMessage(input.manifest, {
77
+ direction: "outbox",
78
+ from: "group-join",
79
+ to: "leader",
80
+ body: [
81
+ `Group join ${partial ? "partial" : "completed"}: ${taskIds.join(", ")}`,
82
+ `Request: ${requestId}`,
83
+ `Completed: ${completed.join(", ") || "none"}`,
84
+ `Failed: ${failed.join(", ") || "none"}`,
85
+ `Skipped: ${skipped.join(", ") || "none"}`,
86
+ `Remaining: ${remaining.join(", ") || "none"}`,
87
+ "",
88
+ summary,
89
+ ].join("\n"),
90
+ status: "delivered",
91
+ data: { kind: "group_join", requestId, batchId, partial, ackRequired: true, taskIds, completed, failed, skipped, remaining },
92
+ });
93
+ appendEvent(input.manifest.eventsPath, {
94
+ type: partial ? "agent.group_join.partial" : "agent.group_join.completed",
95
+ runId: input.manifest.runId,
96
+ message: `Group join ${partial ? "partial" : "completed"} for ${taskIds.length} task(s).`,
97
+ data: { ...delivery, artifactPath: artifact.path, messageId: mailbox.id, fallback: "mailbox-delivered", reused: Boolean(existingMailbox) },
98
+ });
99
+ if (existingMailbox) appendEvent(input.manifest.eventsPath, {
100
+ type: "agent.group_join.delivery_reused",
101
+ runId: input.manifest.runId,
102
+ message: `Reused group join mailbox delivery for ${taskIds.length} task(s).`,
103
+ data: { requestId, messageId: mailbox.id, batchId, partial },
104
+ });
105
+ return { ...delivery, artifact, messageId: mailbox.id };
106
+ }
107
+
108
+ import type { CrewAgentRecord } from "./crew-agent-runtime.ts";
109
+
110
+ export type JoinMode = "async" | "group" | "smart";
111
+ export type DeliveryCallback = (records: CrewAgentRecord[], partial: boolean) => void;
112
+
113
+ interface AgentGroup {
114
+ groupId: string;
115
+ agentIds: Set<string>;
116
+ completedRecords: Map<string, CrewAgentRecord>;
117
+ timeoutHandle?: ReturnType<typeof setTimeout>;
118
+ delivered: boolean;
119
+ isStraggler: boolean;
120
+ }
121
+
122
+ const DEFAULT_TIMEOUT = 30_000;
123
+ const STRAGGLER_TIMEOUT = 15_000;
124
+
125
+ export class GroupJoinManager {
126
+ private groups = new Map<string, AgentGroup>();
127
+ private agentToGroup = new Map<string, string>();
128
+
129
+ private deliverCb: DeliveryCallback;
130
+ private groupTimeout: number;
131
+
132
+ constructor(
133
+ deliverCb: DeliveryCallback,
134
+ groupTimeout = DEFAULT_TIMEOUT,
135
+ ) {
136
+ this.deliverCb = deliverCb;
137
+ this.groupTimeout = groupTimeout;
138
+ }
139
+
140
+ registerGroup(groupId: string, agentIds: string[]): void {
141
+ const group: AgentGroup = {
142
+ groupId,
143
+ agentIds: new Set(agentIds),
144
+ completedRecords: new Map(),
145
+ delivered: false,
146
+ isStraggler: false,
147
+ };
148
+ this.groups.set(groupId, group);
149
+ for (const id of agentIds) {
150
+ this.agentToGroup.set(id, groupId);
151
+ }
152
+ }
153
+
154
+ onAgentComplete(record: CrewAgentRecord): "delivered" | "held" | "pass" {
155
+ const groupId = this.agentToGroup.get(record.taskId);
156
+ if (!groupId) return "pass";
157
+
158
+ const group = this.groups.get(groupId);
159
+ if (!group || group.delivered) return "pass";
160
+
161
+ group.completedRecords.set(record.taskId, record);
162
+
163
+ if (group.completedRecords.size >= group.agentIds.size) {
164
+ this.deliver(group, false);
165
+ return "delivered";
166
+ }
167
+
168
+ if (!group.timeoutHandle) {
169
+ const timeout = group.isStraggler ? STRAGGLER_TIMEOUT : this.groupTimeout;
170
+ group.timeoutHandle = setTimeout(() => {
171
+ this.onTimeout(group);
172
+ }, timeout);
173
+ }
174
+
175
+ return "held";
176
+ }
177
+
178
+ private onTimeout(group: AgentGroup): void {
179
+ if (group.delivered) return;
180
+ group.timeoutHandle = undefined;
181
+
182
+ const remaining = new Set<string>();
183
+ for (const id of group.agentIds) {
184
+ if (!group.completedRecords.has(id)) remaining.add(id);
185
+ }
186
+
187
+ for (const id of group.completedRecords.keys()) {
188
+ this.agentToGroup.delete(id);
189
+ }
190
+
191
+ this.deliverCb([...group.completedRecords.values()], true);
192
+
193
+ group.completedRecords.clear();
194
+ group.agentIds = remaining;
195
+ group.isStraggler = true;
196
+
197
+ // Re-arm timer for remaining stragglers so they aren't silently abandoned
198
+ if (remaining.size > 0) {
199
+ group.timeoutHandle = setTimeout(() => this.onTimeout(group), STRAGGLER_TIMEOUT);
200
+ group.timeoutHandle.unref();
201
+ }
202
+ }
203
+
204
+ private deliver(group: AgentGroup, partial: boolean): void {
205
+ if (group.timeoutHandle) {
206
+ clearTimeout(group.timeoutHandle);
207
+ group.timeoutHandle = undefined;
208
+ }
209
+ group.delivered = true;
210
+ this.deliverCb([...group.completedRecords.values()], partial);
211
+ this.cleanupGroup(group.groupId);
212
+ }
213
+
214
+ private cleanupGroup(groupId: string): void {
215
+ const group = this.groups.get(groupId);
216
+ if (!group) return;
217
+ for (const id of group.agentIds) {
218
+ this.agentToGroup.delete(id);
219
+ }
220
+ this.groups.delete(groupId);
221
+ }
222
+
223
+ isGrouped(agentId: string): boolean {
224
+ return this.agentToGroup.has(agentId);
225
+ }
226
+
227
+ dispose(): void {
228
+ for (const group of this.groups.values()) {
229
+ if (group.timeoutHandle) clearTimeout(group.timeoutHandle);
230
+ }
231
+ this.groups.clear();
232
+ this.agentToGroup.clear();
233
+ }
234
+ }
@@ -1,124 +1,145 @@
1
- import type { NotificationDescriptor } from "../extension/notification-router.ts";
2
- import type { MetricRegistry } from "../observability/metric-registry.ts";
3
- import { appendEvent } from "../state/event-log.ts";
4
- import { loadRunManifestById } from "../state/state-store.ts";
5
- import type { TeamRunManifest } from "../state/types.ts";
6
- import { logInternalError } from "../utils/internal-error.ts";
7
- import type { ManifestCache } from "./manifest-cache.ts";
8
- import { classifyHeartbeat, DEFAULT_GRADIENT_THRESHOLDS, heartbeatAgeMs, type GradientThresholds, type HeartbeatLevel } from "./heartbeat-gradient.ts";
9
-
10
- export interface HeartbeatWatcherRouter {
11
- enqueue(notification: NotificationDescriptor): boolean;
12
- }
13
-
14
- export interface HeartbeatWatcherOptions {
15
- cwd: string;
16
- pollIntervalMs?: number;
17
- thresholds?: GradientThresholds;
18
- manifestCache: ManifestCache;
19
- registry: MetricRegistry;
20
- router: HeartbeatWatcherRouter;
21
- deadletterTickThreshold?: number;
22
- onDead?: (runId: string, taskId: string, elapsed: number) => void;
23
- onDeadletterTrigger?: (manifest: TeamRunManifest, taskId: string) => void;
24
- }
25
-
26
- /**
27
- * Polls running runs for heartbeat staleness.
28
- *
29
- * Uses recursive setTimeout to avoid timer storms.
30
- * Cleanup is done in the same pass — no second scan over manifests.
31
- * Keys for runs that disappear from the cache are cleaned via staleness-age policy
32
- * rather than being leaked forever.
33
- */
34
- export class HeartbeatWatcher {
35
- private timer?: ReturnType<typeof setTimeout>;
36
- private lastLevel = new Map<string, HeartbeatLevel>();
37
- private consecutiveDead = new Map<string, number>();
38
- private lastSeen = new Map<string, number>(); // key last time it was active
39
- /** Max age (ms) to retain a stale key before garbage-collecting it. */
40
- private readonly maxKeyAgeMs = 600_000; // 10 minutes
41
- private readonly opts: HeartbeatWatcherOptions;
42
-
43
- constructor(opts: HeartbeatWatcherOptions) {
44
- this.opts = opts;
45
- }
46
-
47
- start(): void {
48
- this.dispose();
49
- this.scheduleTick();
50
- }
51
-
52
- private scheduleTick(): void {
53
- this.timer = setTimeout(() => this.tick(), this.opts.pollIntervalMs ?? 5000);
54
- this.timer.unref();
55
- }
56
-
57
- tick(now = Date.now()): void {
58
- try {
59
- this.tickUnsafe(now);
60
- } catch (error) {
61
- logInternalError("heartbeat-watcher.tick", error);
62
- } finally {
63
- this.scheduleTick();
64
- }
65
- }
66
-
67
- private tickUnsafe(now: number): void {
68
- const thresholds = this.opts.thresholds ?? DEFAULT_GRADIENT_THRESHOLDS;
69
- const tickThreshold = this.opts.deadletterTickThreshold ?? 3;
70
- const activeKeys = new Set<string>();
71
-
72
- for (const run of this.opts.manifestCache.list(50)) {
73
- if (run.status !== "running") continue;
74
- const loaded = loadRunManifestById(this.opts.cwd, run.runId);
75
- if (!loaded) continue;
76
- for (const task of loaded.tasks) {
77
- if (task.status !== "running") continue;
78
- const key = `${run.runId}:${task.id}`;
79
- activeKeys.add(key);
80
- this.lastSeen.set(key, now);
81
-
82
- const elapsed = heartbeatAgeMs(task.heartbeat, now);
83
- const level = classifyHeartbeat(task.heartbeat, thresholds, now);
84
- this.opts.registry.gauge("crew.heartbeat.staleness_ms", "Heartbeat elapsed since last seen, milliseconds").set({ runId: run.runId, taskId: task.id }, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
85
- this.opts.registry.counter("crew.heartbeat.level_total", "Heartbeat classifications by level").inc({ runId: run.runId, level });
86
- const previous = this.lastLevel.get(key);
87
- this.lastLevel.set(key, level);
88
- if (level === "dead" && previous !== "dead") {
89
- this.opts.registry.counter("crew.heartbeat.dead_total", "Dead heartbeat detections").inc({ runId: run.runId });
90
- appendEvent(loaded.manifest.eventsPath, { type: "crew.task.heartbeat_dead", runId: run.runId, taskId: task.id, message: `Task ${task.id} heartbeat dead.`, data: { elapsedMs: Number.isFinite(elapsed) ? elapsed : undefined } });
91
- this.opts.router.enqueue({ id: `dead_${run.runId}_${task.id}`, severity: "warning", source: "heartbeat-watcher", runId: run.runId, title: `Task ${task.id} heartbeat dead`, body: "Background watcher detected a stuck worker." });
92
- this.opts.onDead?.(run.runId, task.id, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
93
- }
94
- if (level === "dead") {
95
- const count = (this.consecutiveDead.get(key) ?? 0) + 1;
96
- this.consecutiveDead.set(key, count);
97
- if (count === tickThreshold) this.opts.onDeadletterTrigger?.(loaded.manifest, task.id);
98
- } else {
99
- this.consecutiveDead.delete(key);
100
- }
101
- }
102
- }
103
-
104
- // Cleanup: drop keys that were NOT in this tick's active set AND
105
- // haven't been seen for > maxKeyAgeMs. This covers runs that
106
- // completed or fell out of the manifest cache's top-50 window.
107
- const cutoff = now - this.maxKeyAgeMs;
108
- for (const [key, ts] of this.lastSeen) {
109
- if (!activeKeys.has(key) && ts < cutoff) {
110
- this.lastLevel.delete(key);
111
- this.consecutiveDead.delete(key);
112
- this.lastSeen.delete(key);
113
- }
114
- }
115
- }
116
-
117
- dispose(): void {
118
- if (this.timer) clearTimeout(this.timer);
119
- this.timer = undefined;
120
- this.lastLevel.clear();
121
- this.consecutiveDead.clear();
122
- this.lastSeen.clear();
123
- }
124
- }
1
+ import type { NotificationDescriptor } from "../extension/notification-router.ts";
2
+ import type { MetricRegistry } from "../observability/metric-registry.ts";
3
+ import { appendEvent } from "../state/event-log.ts";
4
+ import { loadRunManifestById } from "../state/state-store.ts";
5
+ import type { TeamRunManifest } from "../state/types.ts";
6
+ import { logInternalError } from "../utils/internal-error.ts";
7
+ import type { ManifestCache } from "./manifest-cache.ts";
8
+ import { classifyHeartbeat, DEFAULT_GRADIENT_THRESHOLDS, heartbeatAgeMs, type GradientThresholds, type HeartbeatLevel } from "./heartbeat-gradient.ts";
9
+
10
+ export interface HeartbeatWatcherRouter {
11
+ enqueue(notification: NotificationDescriptor): boolean;
12
+ }
13
+
14
+ export interface HeartbeatWatcherOptions {
15
+ cwd: string;
16
+ pollIntervalMs?: number;
17
+ thresholds?: GradientThresholds;
18
+ manifestCache: ManifestCache;
19
+ registry: MetricRegistry;
20
+ router: HeartbeatWatcherRouter;
21
+ deadletterTickThreshold?: number;
22
+ /**
23
+ * 3.6 minimum interval between repeated deadletter triggers for the same
24
+ * runId+taskId. Without this, a flaky worker (dead → alive → dead) can
25
+ * fire deadletter entries faster than the operator can respond. Default
26
+ * 60_000 ms.
27
+ */
28
+ deadletterCooldownMs?: number;
29
+ onDead?: (runId: string, taskId: string, elapsed: number) => void;
30
+ onDeadletterTrigger?: (manifest: TeamRunManifest, taskId: string) => void;
31
+ }
32
+
33
+ /**
34
+ * Polls running runs for heartbeat staleness.
35
+ *
36
+ * Uses recursive setTimeout to avoid timer storms.
37
+ * Cleanup is done in the same pass — no second scan over manifests.
38
+ * Keys for runs that disappear from the cache are cleaned via staleness-age policy
39
+ * rather than being leaked forever.
40
+ */
41
+ export class HeartbeatWatcher {
42
+ private timer?: ReturnType<typeof setTimeout>;
43
+ private lastLevel = new Map<string, HeartbeatLevel>();
44
+ private consecutiveDead = new Map<string, number>();
45
+ private lastSeen = new Map<string, number>(); // key → last time it was active
46
+ private lastDeadletterTriggerAt = new Map<string, number>(); // 3.6 cooldown gate
47
+ /** Max age (ms) to retain a stale key before garbage-collecting it. */
48
+ private readonly maxKeyAgeMs = 600_000; // 10 minutes
49
+ private readonly opts: HeartbeatWatcherOptions;
50
+
51
+ constructor(opts: HeartbeatWatcherOptions) {
52
+ this.opts = opts;
53
+ }
54
+
55
+ start(): void {
56
+ this.dispose();
57
+ this.scheduleTick();
58
+ }
59
+
60
+ private scheduleTick(): void {
61
+ // 3.2 — when at least one run has a dead-streak in progress, poll faster
62
+ // (1s) so operators get notified quickly. Healthy state stays at the
63
+ // configured interval (default 5s) to keep idle CPU near zero.
64
+ const baseInterval = this.opts.pollIntervalMs ?? 5000;
65
+ const interval = this.consecutiveDead.size > 0 ? Math.min(1000, baseInterval) : baseInterval;
66
+ this.timer = setTimeout(() => this.tick(), interval);
67
+ this.timer.unref();
68
+ }
69
+
70
+ tick(now = Date.now()): void {
71
+ try {
72
+ this.tickUnsafe(now);
73
+ } catch (error) {
74
+ logInternalError("heartbeat-watcher.tick", error);
75
+ } finally {
76
+ this.scheduleTick();
77
+ }
78
+ }
79
+
80
+ private tickUnsafe(now: number): void {
81
+ const thresholds = this.opts.thresholds ?? DEFAULT_GRADIENT_THRESHOLDS;
82
+ const tickThreshold = this.opts.deadletterTickThreshold ?? 3;
83
+ const activeKeys = new Set<string>();
84
+
85
+ for (const run of this.opts.manifestCache.list(50)) {
86
+ if (run.status !== "running") continue;
87
+ const loaded = loadRunManifestById(this.opts.cwd, run.runId);
88
+ if (!loaded) continue;
89
+ for (const task of loaded.tasks) {
90
+ if (task.status !== "running") continue;
91
+ const key = `${run.runId}:${task.id}`;
92
+ activeKeys.add(key);
93
+ this.lastSeen.set(key, now);
94
+
95
+ const elapsed = heartbeatAgeMs(task.heartbeat, now);
96
+ const level = classifyHeartbeat(task.heartbeat, thresholds, now);
97
+ this.opts.registry.gauge("crew.heartbeat.staleness_ms", "Heartbeat elapsed since last seen, milliseconds").set({ runId: run.runId, taskId: task.id }, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
98
+ this.opts.registry.counter("crew.heartbeat.level_total", "Heartbeat classifications by level").inc({ runId: run.runId, level });
99
+ const previous = this.lastLevel.get(key);
100
+ this.lastLevel.set(key, level);
101
+ if (level === "dead" && previous !== "dead") {
102
+ this.opts.registry.counter("crew.heartbeat.dead_total", "Dead heartbeat detections").inc({ runId: run.runId });
103
+ appendEvent(loaded.manifest.eventsPath, { type: "crew.task.heartbeat_dead", runId: run.runId, taskId: task.id, message: `Task ${task.id} heartbeat dead.`, data: { elapsedMs: Number.isFinite(elapsed) ? elapsed : undefined } });
104
+ this.opts.router.enqueue({ id: `dead_${run.runId}_${task.id}`, severity: "warning", source: "heartbeat-watcher", runId: run.runId, title: `Task ${task.id} heartbeat dead`, body: "Background watcher detected a stuck worker." });
105
+ this.opts.onDead?.(run.runId, task.id, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
106
+ }
107
+ if (level === "dead") {
108
+ const count = (this.consecutiveDead.get(key) ?? 0) + 1;
109
+ this.consecutiveDead.set(key, count);
110
+ if (count === tickThreshold) {
111
+ // 3.6 cooldown gate
112
+ const cooldown = this.opts.deadletterCooldownMs ?? 60_000;
113
+ const lastTrigger = this.lastDeadletterTriggerAt.get(key) ?? 0;
114
+ if (now - lastTrigger >= cooldown) {
115
+ this.lastDeadletterTriggerAt.set(key, now);
116
+ this.opts.onDeadletterTrigger?.(loaded.manifest, task.id);
117
+ }
118
+ }
119
+ } else {
120
+ this.consecutiveDead.delete(key);
121
+ }
122
+ }
123
+ }
124
+
125
+ // Cleanup: drop keys that were NOT in this tick's active set AND
126
+ // haven't been seen for > maxKeyAgeMs. This covers runs that
127
+ // completed or fell out of the manifest cache's top-50 window.
128
+ const cutoff = now - this.maxKeyAgeMs;
129
+ for (const [key, ts] of this.lastSeen) {
130
+ if (!activeKeys.has(key) && ts < cutoff) {
131
+ this.lastLevel.delete(key);
132
+ this.consecutiveDead.delete(key);
133
+ this.lastSeen.delete(key);
134
+ }
135
+ }
136
+ }
137
+
138
+ dispose(): void {
139
+ if (this.timer) clearTimeout(this.timer);
140
+ this.timer = undefined;
141
+ this.lastLevel.clear();
142
+ this.consecutiveDead.clear();
143
+ this.lastSeen.clear();
144
+ }
145
+ }