pi-crew 0.1.49 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. package/CHANGELOG.md +74 -1
  2. package/README.md +176 -781
  3. package/agents/analyst.md +11 -11
  4. package/agents/critic.md +11 -11
  5. package/agents/executor.md +11 -11
  6. package/agents/explorer.md +11 -11
  7. package/agents/planner.md +11 -11
  8. package/agents/reviewer.md +11 -11
  9. package/agents/security-reviewer.md +11 -11
  10. package/agents/test-engineer.md +11 -11
  11. package/agents/verifier.md +70 -11
  12. package/agents/writer.md +11 -11
  13. package/docs/actions-reference.md +595 -0
  14. package/docs/commands-reference.md +347 -0
  15. package/docs/runtime-flow.md +148 -148
  16. package/index.ts +6 -6
  17. package/package.json +99 -99
  18. package/skills/async-worker-recovery/SKILL.md +42 -42
  19. package/skills/context-artifact-hygiene/SKILL.md +52 -52
  20. package/skills/delegation-patterns/SKILL.md +54 -54
  21. package/skills/mailbox-interactive/SKILL.md +40 -40
  22. package/skills/model-routing-context/SKILL.md +39 -39
  23. package/skills/multi-perspective-review/SKILL.md +58 -58
  24. package/skills/observability-reliability/SKILL.md +41 -41
  25. package/skills/orchestration/SKILL.md +157 -157
  26. package/skills/ownership-session-security/SKILL.md +41 -41
  27. package/skills/pi-extension-lifecycle/SKILL.md +39 -39
  28. package/skills/requirements-to-task-packet/SKILL.md +63 -63
  29. package/skills/resource-discovery-config/SKILL.md +41 -41
  30. package/skills/runtime-state-reader/SKILL.md +44 -44
  31. package/skills/secure-agent-orchestration-review/SKILL.md +45 -45
  32. package/skills/state-mutation-locking/SKILL.md +42 -42
  33. package/skills/systematic-debugging/SKILL.md +67 -67
  34. package/skills/ui-render-performance/SKILL.md +39 -39
  35. package/skills/verification-before-done/SKILL.md +57 -57
  36. package/skills/worktree-isolation/SKILL.md +39 -39
  37. package/src/adapters/claude-adapter.ts +25 -0
  38. package/src/adapters/codex-adapter.ts +21 -0
  39. package/src/adapters/cursor-adapter.ts +17 -0
  40. package/src/adapters/export-util.ts +137 -0
  41. package/src/adapters/index.ts +15 -0
  42. package/src/adapters/registry.ts +18 -0
  43. package/src/adapters/types.ts +23 -0
  44. package/src/agents/agent-config.ts +2 -0
  45. package/src/agents/agent-search.ts +98 -98
  46. package/src/agents/discover-agents.ts +2 -1
  47. package/src/config/config.ts +14 -1
  48. package/src/config/defaults.ts +5 -5
  49. package/src/config/drift-detector.ts +211 -0
  50. package/src/config/markers.ts +327 -0
  51. package/src/config/resilient-parser.ts +108 -0
  52. package/src/config/suggestions.ts +74 -0
  53. package/src/extension/cross-extension-rpc.ts +103 -82
  54. package/src/extension/project-init.ts +36 -4
  55. package/src/extension/register.ts +67 -22
  56. package/src/extension/registration/commands.ts +77 -8
  57. package/src/extension/registration/subagent-tools.ts +10 -1
  58. package/src/extension/registration/team-tool.ts +10 -1
  59. package/src/extension/registration/viewers.ts +48 -34
  60. package/src/extension/run-bundle-schema.ts +89 -89
  61. package/src/extension/run-export.ts +26 -12
  62. package/src/extension/run-import.ts +25 -1
  63. package/src/extension/run-index.ts +5 -1
  64. package/src/extension/run-maintenance.ts +142 -68
  65. package/src/extension/team-manager-command.ts +10 -1
  66. package/src/extension/team-tool/context.ts +1 -1
  67. package/src/extension/team-tool/doctor.ts +28 -3
  68. package/src/extension/team-tool/handle-settings.ts +195 -188
  69. package/src/extension/team-tool/inspect.ts +41 -41
  70. package/src/extension/team-tool/intent-policy.ts +42 -42
  71. package/src/extension/team-tool/lifecycle-actions.ts +27 -8
  72. package/src/extension/team-tool/plan.ts +19 -19
  73. package/src/extension/team-tool/run.ts +12 -1
  74. package/src/extension/team-tool.ts +14 -3
  75. package/src/i18n.ts +184 -184
  76. package/src/observability/exporters/otlp-exporter.ts +92 -77
  77. package/src/prompt/prompt-runtime.ts +72 -72
  78. package/src/runtime/agent-memory.ts +72 -72
  79. package/src/runtime/agent-observability.ts +114 -114
  80. package/src/runtime/async-marker.ts +26 -26
  81. package/src/runtime/attention-events.ts +28 -28
  82. package/src/runtime/auto-resume.ts +100 -0
  83. package/src/runtime/background-runner.ts +11 -1
  84. package/src/runtime/cancellation-token.ts +89 -89
  85. package/src/runtime/cancellation.ts +61 -61
  86. package/src/runtime/capability-inventory.ts +116 -116
  87. package/src/runtime/child-pi.ts +7 -2
  88. package/src/runtime/compaction-summary.ts +271 -0
  89. package/src/runtime/completion-guard.ts +190 -190
  90. package/src/runtime/concurrency.ts +3 -1
  91. package/src/runtime/crash-recovery.ts +33 -0
  92. package/src/runtime/delta-conflict.ts +360 -0
  93. package/src/runtime/diagnostic-export.ts +3 -1
  94. package/src/runtime/direct-run.ts +35 -35
  95. package/src/runtime/event-stream-bridge.ts +3 -1
  96. package/src/runtime/foreground-control.ts +82 -82
  97. package/src/runtime/green-contract.ts +46 -46
  98. package/src/runtime/group-join.ts +106 -106
  99. package/src/runtime/heartbeat-gradient.ts +28 -28
  100. package/src/runtime/heartbeat-watcher.ts +124 -124
  101. package/src/runtime/iteration-hooks.ts +262 -0
  102. package/src/runtime/live-agent-control.ts +88 -88
  103. package/src/runtime/live-control-realtime.ts +36 -36
  104. package/src/runtime/live-extension-bridge.ts +150 -150
  105. package/src/runtime/live-irc.ts +92 -92
  106. package/src/runtime/live-session-health.ts +100 -100
  107. package/src/runtime/loop-gates.ts +129 -0
  108. package/src/runtime/metric-parser.ts +40 -0
  109. package/src/runtime/notebook-helpers.ts +90 -90
  110. package/src/runtime/orphan-sentinel.ts +7 -7
  111. package/src/runtime/parallel-research.ts +44 -44
  112. package/src/runtime/phase-progress.ts +217 -0
  113. package/src/runtime/pi-args.ts +38 -2
  114. package/src/runtime/pi-json-output.ts +111 -111
  115. package/src/runtime/pi-spawn.ts +74 -6
  116. package/src/runtime/policy-engine.ts +79 -79
  117. package/src/runtime/post-checks.ts +122 -0
  118. package/src/runtime/process-status.ts +14 -1
  119. package/src/runtime/progress-event-coalescer.ts +43 -43
  120. package/src/runtime/prose-compressor.ts +164 -164
  121. package/src/runtime/recovery-recipes.ts +74 -74
  122. package/src/runtime/result-extractor.ts +121 -121
  123. package/src/runtime/role-permission.ts +39 -39
  124. package/src/runtime/sensitive-paths.ts +3 -3
  125. package/src/runtime/session-resources.ts +25 -25
  126. package/src/runtime/session-snapshot.ts +59 -59
  127. package/src/runtime/session-usage.ts +79 -79
  128. package/src/runtime/sidechain-output.ts +29 -29
  129. package/src/runtime/stream-preview.ts +177 -177
  130. package/src/runtime/supervisor-contact.ts +59 -59
  131. package/src/runtime/task-display.ts +38 -38
  132. package/src/runtime/task-graph.ts +207 -0
  133. package/src/runtime/task-quality.ts +207 -0
  134. package/src/runtime/task-runner/capabilities.ts +78 -78
  135. package/src/runtime/task-runner/live-executor.ts +7 -1
  136. package/src/runtime/task-runner/progress.ts +119 -119
  137. package/src/runtime/task-runner/prompt-builder.ts +1 -1
  138. package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
  139. package/src/runtime/task-runner/result-utils.ts +14 -14
  140. package/src/runtime/task-runner/run-projection.ts +103 -103
  141. package/src/runtime/task-runner/state-helpers.ts +22 -22
  142. package/src/runtime/team-runner.ts +126 -7
  143. package/src/runtime/worker-heartbeat.ts +21 -21
  144. package/src/runtime/worker-startup.ts +57 -57
  145. package/src/runtime/workflow-state.ts +187 -0
  146. package/src/runtime/workspace-tree.ts +298 -298
  147. package/src/schema/config-schema.ts +12 -0
  148. package/src/schema/validation-types.ts +148 -0
  149. package/src/skills/skill-templates.ts +374 -0
  150. package/src/state/active-run-registry.ts +35 -11
  151. package/src/state/atomic-write.ts +33 -26
  152. package/src/state/contracts.ts +1 -0
  153. package/src/state/event-reconstructor.ts +217 -0
  154. package/src/state/locks.ts +2 -11
  155. package/src/state/mailbox.ts +4 -3
  156. package/src/state/state-store.ts +32 -14
  157. package/src/state/task-claims.ts +44 -44
  158. package/src/state/types.ts +9 -0
  159. package/src/state/usage.ts +29 -29
  160. package/src/subagents/async-entry.ts +1 -1
  161. package/src/subagents/index.ts +3 -3
  162. package/src/subagents/live/control.ts +1 -1
  163. package/src/subagents/live/manager.ts +1 -1
  164. package/src/subagents/live/realtime.ts +1 -1
  165. package/src/subagents/live/session-runtime.ts +1 -1
  166. package/src/subagents/manager.ts +1 -1
  167. package/src/subagents/spawn.ts +1 -1
  168. package/src/teams/team-serializer.ts +38 -38
  169. package/src/types/diff.d.ts +18 -18
  170. package/src/ui/crew-footer.ts +101 -101
  171. package/src/ui/crew-select-list.ts +111 -111
  172. package/src/ui/crew-widget.ts +9 -4
  173. package/src/ui/dashboard-panes/cancellation-pane.ts +42 -42
  174. package/src/ui/dashboard-panes/capability-pane.ts +59 -59
  175. package/src/ui/dashboard-panes/mailbox-pane.ts +35 -35
  176. package/src/ui/dashboard-panes/metrics-pane.ts +34 -34
  177. package/src/ui/dashboard-panes/progress-pane.ts +11 -0
  178. package/src/ui/dynamic-border.ts +25 -25
  179. package/src/ui/layout-primitives.ts +106 -106
  180. package/src/ui/loaders.ts +158 -158
  181. package/src/ui/powerbar-publisher.ts +6 -0
  182. package/src/ui/render-coalescer.ts +51 -51
  183. package/src/ui/render-diff.ts +119 -119
  184. package/src/ui/render-scheduler.ts +143 -143
  185. package/src/ui/run-action-dispatcher.ts +10 -1
  186. package/src/ui/spinner.ts +17 -17
  187. package/src/ui/status-colors.ts +58 -58
  188. package/src/ui/syntax-highlight.ts +116 -116
  189. package/src/ui/transcript-entries.ts +258 -258
  190. package/src/utils/completion-dedupe.ts +63 -63
  191. package/src/utils/frontmatter.ts +68 -68
  192. package/src/utils/git.ts +262 -262
  193. package/src/utils/ids.ts +17 -17
  194. package/src/utils/incremental-reader.ts +104 -104
  195. package/src/utils/names.ts +27 -27
  196. package/src/utils/redaction.ts +44 -44
  197. package/src/utils/safe-paths.ts +47 -47
  198. package/src/utils/scan-cache.ts +136 -136
  199. package/src/utils/sleep.ts +40 -26
  200. package/src/utils/task-name-generator.ts +337 -337
  201. package/src/workflows/validate-workflow.ts +40 -40
  202. package/src/worktree/branch-freshness.ts +45 -45
  203. package/src/worktree/worktree-manager.ts +11 -3
  204. package/teams/default.team.md +12 -12
  205. package/teams/fast-fix.team.md +11 -11
  206. package/teams/implementation.team.md +18 -18
  207. package/teams/parallel-research.team.md +14 -14
  208. package/teams/research.team.md +11 -11
  209. package/teams/review.team.md +12 -12
  210. package/workflows/default.workflow.md +30 -29
  211. package/workflows/fast-fix.workflow.md +23 -22
  212. package/workflows/implementation.workflow.md +43 -38
  213. package/workflows/parallel-research.workflow.md +46 -46
  214. package/workflows/research.workflow.md +22 -22
  215. package/workflows/review.workflow.md +30 -30
  216. package/docs/refactor-tasks-phase3.md +0 -394
  217. package/docs/refactor-tasks-phase4.md +0 -564
  218. package/docs/refactor-tasks-phase5.md +0 -402
  219. package/docs/refactor-tasks-phase6.md +0 -662
  220. package/docs/refactor-tasks.md +0 -1484
  221. package/docs/research/AGENT-EXECUTION-ARCHITECTURE.md +0 -261
  222. package/docs/research/AGENT-LIFECYCLE-COMPARISON.md +0 -111
  223. package/docs/research/AUDIT_OH_MY_PI.md +0 -261
  224. package/docs/research/AUDIT_PI_CREW.md +0 -457
  225. package/docs/research/CAVEMAN-DEEP-RESEARCH.md +0 -281
  226. package/docs/research/COMPARISON_OH_MY_PI_VS_PI_CREW.md +0 -264
  227. package/docs/research/DEEP-RESEARCH-PI-POWERBAR.md +0 -343
  228. package/docs/research/DEEP_RESEARCH_SUBAGENT_ARCHITECTURE.md +0 -480
  229. package/docs/research/GAP_CLOSURE_IMPLEMENTATION_PLAN.md +0 -354
  230. package/docs/research/IMPLEMENTATION_PLAN.md +0 -385
  231. package/docs/research/LIVE-SESSION-PRODUCTION-READY-PLAN.md +0 -502
  232. package/docs/research/OH-MY-PI-DEEP-RESEARCH-v14.7.6.md +0 -266
  233. package/docs/research/REMAINING-GAPS-PLAN.md +0 -363
  234. package/docs/research/SESSION-SUMMARY-2026-05-08.md +0 -146
  235. package/docs/research/UI-RESPONSIVENESS-AUDIT.md +0 -173
  236. package/docs/research-awesome-agent-skills-distillation.md +0 -100
  237. package/docs/research-extension-examples.md +0 -297
  238. package/docs/research-extension-system.md +0 -324
  239. package/docs/research-oh-my-pi-distillation.md +0 -369
  240. package/docs/research-optimization-plan.md +0 -548
  241. package/docs/research-phase10-distillation.md +0 -199
  242. package/docs/research-phase11-distillation.md +0 -201
  243. package/docs/research-phase8-operator-experience-plan.md +0 -819
  244. package/docs/research-phase9-observability-reliability-plan.md +0 -1190
  245. package/docs/research-pi-coding-agent.md +0 -357
  246. package/docs/research-source-pi-crew-reference.md +0 -174
  247. package/docs/research-ui-optimization-plan.md +0 -480
  248. package/docs/source-runtime-refactor-map.md +0 -107
  249. package/src/utils/atomic-write.ts +0 -33
@@ -1,124 +1,124 @@
1
- import type { NotificationDescriptor } from "../extension/notification-router.ts";
2
- import type { MetricRegistry } from "../observability/metric-registry.ts";
3
- import { appendEvent } from "../state/event-log.ts";
4
- import { loadRunManifestById } from "../state/state-store.ts";
5
- import type { TeamRunManifest } from "../state/types.ts";
6
- import { logInternalError } from "../utils/internal-error.ts";
7
- import type { ManifestCache } from "./manifest-cache.ts";
8
- import { classifyHeartbeat, DEFAULT_GRADIENT_THRESHOLDS, heartbeatAgeMs, type GradientThresholds, type HeartbeatLevel } from "./heartbeat-gradient.ts";
9
-
10
- export interface HeartbeatWatcherRouter {
11
- enqueue(notification: NotificationDescriptor): boolean;
12
- }
13
-
14
- export interface HeartbeatWatcherOptions {
15
- cwd: string;
16
- pollIntervalMs?: number;
17
- thresholds?: GradientThresholds;
18
- manifestCache: ManifestCache;
19
- registry: MetricRegistry;
20
- router: HeartbeatWatcherRouter;
21
- deadletterTickThreshold?: number;
22
- onDead?: (runId: string, taskId: string, elapsed: number) => void;
23
- onDeadletterTrigger?: (manifest: TeamRunManifest, taskId: string) => void;
24
- }
25
-
26
- /**
27
- * Polls running runs for heartbeat staleness.
28
- *
29
- * Uses recursive setTimeout to avoid timer storms.
30
- * Cleanup is done in the same pass — no second scan over manifests.
31
- * Keys for runs that disappear from the cache are cleaned via staleness-age policy
32
- * rather than being leaked forever.
33
- */
34
- export class HeartbeatWatcher {
35
- private timer?: ReturnType<typeof setTimeout>;
36
- private lastLevel = new Map<string, HeartbeatLevel>();
37
- private consecutiveDead = new Map<string, number>();
38
- private lastSeen = new Map<string, number>(); // key → last time it was active
39
- /** Max age (ms) to retain a stale key before garbage-collecting it. */
40
- private readonly maxKeyAgeMs = 600_000; // 10 minutes
41
- private readonly opts: HeartbeatWatcherOptions;
42
-
43
- constructor(opts: HeartbeatWatcherOptions) {
44
- this.opts = opts;
45
- }
46
-
47
- start(): void {
48
- this.dispose();
49
- this.scheduleTick();
50
- }
51
-
52
- private scheduleTick(): void {
53
- this.timer = setTimeout(() => this.tick(), this.opts.pollIntervalMs ?? 5000);
54
- this.timer.unref();
55
- }
56
-
57
- tick(now = Date.now()): void {
58
- try {
59
- this.tickUnsafe(now);
60
- } catch (error) {
61
- logInternalError("heartbeat-watcher.tick", error);
62
- } finally {
63
- this.scheduleTick();
64
- }
65
- }
66
-
67
- private tickUnsafe(now: number): void {
68
- const thresholds = this.opts.thresholds ?? DEFAULT_GRADIENT_THRESHOLDS;
69
- const tickThreshold = this.opts.deadletterTickThreshold ?? 3;
70
- const activeKeys = new Set<string>();
71
-
72
- for (const run of this.opts.manifestCache.list(50)) {
73
- if (run.status !== "running") continue;
74
- const loaded = loadRunManifestById(this.opts.cwd, run.runId);
75
- if (!loaded) continue;
76
- for (const task of loaded.tasks) {
77
- if (task.status !== "running") continue;
78
- const key = `${run.runId}:${task.id}`;
79
- activeKeys.add(key);
80
- this.lastSeen.set(key, now);
81
-
82
- const elapsed = heartbeatAgeMs(task.heartbeat, now);
83
- const level = classifyHeartbeat(task.heartbeat, thresholds, now);
84
- this.opts.registry.gauge("crew.heartbeat.staleness_ms", "Heartbeat elapsed since last seen, milliseconds").set({ runId: run.runId, taskId: task.id }, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
85
- this.opts.registry.counter("crew.heartbeat.level_total", "Heartbeat classifications by level").inc({ runId: run.runId, level });
86
- const previous = this.lastLevel.get(key);
87
- this.lastLevel.set(key, level);
88
- if (level === "dead" && previous !== "dead") {
89
- this.opts.registry.counter("crew.heartbeat.dead_total", "Dead heartbeat detections").inc({ runId: run.runId });
90
- appendEvent(loaded.manifest.eventsPath, { type: "crew.task.heartbeat_dead", runId: run.runId, taskId: task.id, message: `Task ${task.id} heartbeat dead.`, data: { elapsedMs: Number.isFinite(elapsed) ? elapsed : undefined } });
91
- this.opts.router.enqueue({ id: `dead_${run.runId}_${task.id}`, severity: "warning", source: "heartbeat-watcher", runId: run.runId, title: `Task ${task.id} heartbeat dead`, body: "Background watcher detected a stuck worker." });
92
- this.opts.onDead?.(run.runId, task.id, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
93
- }
94
- if (level === "dead") {
95
- const count = (this.consecutiveDead.get(key) ?? 0) + 1;
96
- this.consecutiveDead.set(key, count);
97
- if (count === tickThreshold) this.opts.onDeadletterTrigger?.(loaded.manifest, task.id);
98
- } else {
99
- this.consecutiveDead.delete(key);
100
- }
101
- }
102
- }
103
-
104
- // Cleanup: drop keys that were NOT in this tick's active set AND
105
- // haven't been seen for > maxKeyAgeMs. This covers runs that
106
- // completed or fell out of the manifest cache's top-50 window.
107
- const cutoff = now - this.maxKeyAgeMs;
108
- for (const [key, ts] of this.lastSeen) {
109
- if (!activeKeys.has(key) && ts < cutoff) {
110
- this.lastLevel.delete(key);
111
- this.consecutiveDead.delete(key);
112
- this.lastSeen.delete(key);
113
- }
114
- }
115
- }
116
-
117
- dispose(): void {
118
- if (this.timer) clearTimeout(this.timer);
119
- this.timer = undefined;
120
- this.lastLevel.clear();
121
- this.consecutiveDead.clear();
122
- this.lastSeen.clear();
123
- }
124
- }
1
+ import type { NotificationDescriptor } from "../extension/notification-router.ts";
2
+ import type { MetricRegistry } from "../observability/metric-registry.ts";
3
+ import { appendEvent } from "../state/event-log.ts";
4
+ import { loadRunManifestById } from "../state/state-store.ts";
5
+ import type { TeamRunManifest } from "../state/types.ts";
6
+ import { logInternalError } from "../utils/internal-error.ts";
7
+ import type { ManifestCache } from "./manifest-cache.ts";
8
+ import { classifyHeartbeat, DEFAULT_GRADIENT_THRESHOLDS, heartbeatAgeMs, type GradientThresholds, type HeartbeatLevel } from "./heartbeat-gradient.ts";
9
+
10
+ export interface HeartbeatWatcherRouter {
11
+ enqueue(notification: NotificationDescriptor): boolean;
12
+ }
13
+
14
+ export interface HeartbeatWatcherOptions {
15
+ cwd: string;
16
+ pollIntervalMs?: number;
17
+ thresholds?: GradientThresholds;
18
+ manifestCache: ManifestCache;
19
+ registry: MetricRegistry;
20
+ router: HeartbeatWatcherRouter;
21
+ deadletterTickThreshold?: number;
22
+ onDead?: (runId: string, taskId: string, elapsed: number) => void;
23
+ onDeadletterTrigger?: (manifest: TeamRunManifest, taskId: string) => void;
24
+ }
25
+
26
+ /**
27
+ * Polls running runs for heartbeat staleness.
28
+ *
29
+ * Uses recursive setTimeout to avoid timer storms.
30
+ * Cleanup is done in the same pass — no second scan over manifests.
31
+ * Keys for runs that disappear from the cache are cleaned via staleness-age policy
32
+ * rather than being leaked forever.
33
+ */
34
+ export class HeartbeatWatcher {
35
+ private timer?: ReturnType<typeof setTimeout>;
36
+ private lastLevel = new Map<string, HeartbeatLevel>();
37
+ private consecutiveDead = new Map<string, number>();
38
+ private lastSeen = new Map<string, number>(); // key → last time it was active
39
+ /** Max age (ms) to retain a stale key before garbage-collecting it. */
40
+ private readonly maxKeyAgeMs = 600_000; // 10 minutes
41
+ private readonly opts: HeartbeatWatcherOptions;
42
+
43
+ constructor(opts: HeartbeatWatcherOptions) {
44
+ this.opts = opts;
45
+ }
46
+
47
+ start(): void {
48
+ this.dispose();
49
+ this.scheduleTick();
50
+ }
51
+
52
+ private scheduleTick(): void {
53
+ this.timer = setTimeout(() => this.tick(), this.opts.pollIntervalMs ?? 5000);
54
+ this.timer.unref();
55
+ }
56
+
57
+ tick(now = Date.now()): void {
58
+ try {
59
+ this.tickUnsafe(now);
60
+ } catch (error) {
61
+ logInternalError("heartbeat-watcher.tick", error);
62
+ } finally {
63
+ this.scheduleTick();
64
+ }
65
+ }
66
+
67
+ private tickUnsafe(now: number): void {
68
+ const thresholds = this.opts.thresholds ?? DEFAULT_GRADIENT_THRESHOLDS;
69
+ const tickThreshold = this.opts.deadletterTickThreshold ?? 3;
70
+ const activeKeys = new Set<string>();
71
+
72
+ for (const run of this.opts.manifestCache.list(50)) {
73
+ if (run.status !== "running") continue;
74
+ const loaded = loadRunManifestById(this.opts.cwd, run.runId);
75
+ if (!loaded) continue;
76
+ for (const task of loaded.tasks) {
77
+ if (task.status !== "running") continue;
78
+ const key = `${run.runId}:${task.id}`;
79
+ activeKeys.add(key);
80
+ this.lastSeen.set(key, now);
81
+
82
+ const elapsed = heartbeatAgeMs(task.heartbeat, now);
83
+ const level = classifyHeartbeat(task.heartbeat, thresholds, now);
84
+ this.opts.registry.gauge("crew.heartbeat.staleness_ms", "Heartbeat elapsed since last seen, milliseconds").set({ runId: run.runId, taskId: task.id }, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
85
+ this.opts.registry.counter("crew.heartbeat.level_total", "Heartbeat classifications by level").inc({ runId: run.runId, level });
86
+ const previous = this.lastLevel.get(key);
87
+ this.lastLevel.set(key, level);
88
+ if (level === "dead" && previous !== "dead") {
89
+ this.opts.registry.counter("crew.heartbeat.dead_total", "Dead heartbeat detections").inc({ runId: run.runId });
90
+ appendEvent(loaded.manifest.eventsPath, { type: "crew.task.heartbeat_dead", runId: run.runId, taskId: task.id, message: `Task ${task.id} heartbeat dead.`, data: { elapsedMs: Number.isFinite(elapsed) ? elapsed : undefined } });
91
+ this.opts.router.enqueue({ id: `dead_${run.runId}_${task.id}`, severity: "warning", source: "heartbeat-watcher", runId: run.runId, title: `Task ${task.id} heartbeat dead`, body: "Background watcher detected a stuck worker." });
92
+ this.opts.onDead?.(run.runId, task.id, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
93
+ }
94
+ if (level === "dead") {
95
+ const count = (this.consecutiveDead.get(key) ?? 0) + 1;
96
+ this.consecutiveDead.set(key, count);
97
+ if (count === tickThreshold) this.opts.onDeadletterTrigger?.(loaded.manifest, task.id);
98
+ } else {
99
+ this.consecutiveDead.delete(key);
100
+ }
101
+ }
102
+ }
103
+
104
+ // Cleanup: drop keys that were NOT in this tick's active set AND
105
+ // haven't been seen for > maxKeyAgeMs. This covers runs that
106
+ // completed or fell out of the manifest cache's top-50 window.
107
+ const cutoff = now - this.maxKeyAgeMs;
108
+ for (const [key, ts] of this.lastSeen) {
109
+ if (!activeKeys.has(key) && ts < cutoff) {
110
+ this.lastLevel.delete(key);
111
+ this.consecutiveDead.delete(key);
112
+ this.lastSeen.delete(key);
113
+ }
114
+ }
115
+ }
116
+
117
+ dispose(): void {
118
+ if (this.timer) clearTimeout(this.timer);
119
+ this.timer = undefined;
120
+ this.lastLevel.clear();
121
+ this.consecutiveDead.clear();
122
+ this.lastSeen.clear();
123
+ }
124
+ }
@@ -0,0 +1,262 @@
1
+ /**
2
+ * Transparent iteration hooks — runs user-supplied before/after task scripts
3
+ * with structured JSON payload on stdin.
4
+ *
5
+ * Distilled from pi-autoresearch's iteration hook pattern.
6
+ */
7
+ import { spawn } from "node:child_process";
8
+ import * as fs from "node:fs";
9
+ import { DENIED_METRIC_NAMES } from "./metric-parser.ts";
10
+
11
+ /** Hook execution stage. */
12
+ export type HookStage = "before" | "after";
13
+
14
+ /** Payload sent to the hook script via stdin as JSON. */
15
+ export interface HookPayload {
16
+ event: HookStage;
17
+ cwd: string;
18
+ taskId: string;
19
+ runId: string;
20
+ taskRole: string;
21
+ lastResult?: {
22
+ status: string;
23
+ description: string;
24
+ diagnostics?: Record<string, unknown>;
25
+ } | null;
26
+ session: {
27
+ teamName: string;
28
+ workflowName: string;
29
+ goal: string;
30
+ completedTasks: number;
31
+ totalTasks: number;
32
+ };
33
+ }
34
+
35
+ /** Result of executing an iteration hook. */
36
+ export interface HookResult {
37
+ /** Whether the hook script was actually executed. */
38
+ fired: boolean;
39
+ /** Captured stdout (truncated to 8KB). */
40
+ stdout: string;
41
+ /** Captured stderr. */
42
+ stderr: string;
43
+ /** Exit code of the hook process. */
44
+ exitCode: number | null;
45
+ /** Whether the hook timed out. */
46
+ timedOut: boolean;
47
+ /** Wall-clock duration in milliseconds. */
48
+ durationMs: number;
49
+ }
50
+
51
+ /** Maximum stdout capture size in bytes (8 KB). */
52
+ const MAX_STDOUT_BYTES = 8192;
53
+
54
+ /** Hook execution timeout in milliseconds (30 seconds). */
55
+ const HOOK_TIMEOUT_MS = 30_000;
56
+
57
+ /**
58
+ * Create a not-fired result for when the hook script is absent or not executable.
59
+ */
60
+ function notFiredResult(): HookResult {
61
+ return {
62
+ fired: false,
63
+ stdout: "",
64
+ stderr: "",
65
+ exitCode: null,
66
+ timedOut: false,
67
+ durationMs: 0,
68
+ };
69
+ }
70
+
71
+ /**
72
+ * Truncate a buffer to the given byte limit, snapping to the last newline
73
+ * boundary for UTF-8 safety.
74
+ */
75
+ function truncateToLimit(buf: Buffer, limit: number): Buffer {
76
+ if (buf.byteLength <= limit) return buf;
77
+
78
+ const slice = buf.subarray(0, limit);
79
+ // Find the last newline within the truncated region
80
+ const lastNewline = slice.lastIndexOf("\n");
81
+ if (lastNewline >= 0) {
82
+ return slice.subarray(0, lastNewline);
83
+ }
84
+ // No newline found — return the full slice
85
+ return slice;
86
+ }
87
+
88
+ /**
89
+ * Check if a script path exists and is executable.
90
+ */
91
+ function isScriptRunnable(scriptPath: string): boolean {
92
+ try {
93
+ if (!fs.existsSync(scriptPath)) return false;
94
+
95
+ // On Windows, X_OK is unreliable — just check F_OK (file exists).
96
+ // On Unix, check both F_OK and X_OK.
97
+ if (process.platform === "win32") {
98
+ fs.accessSync(scriptPath, fs.constants.F_OK);
99
+ } else {
100
+ fs.accessSync(scriptPath, fs.constants.F_OK | fs.constants.X_OK);
101
+ }
102
+ return true;
103
+ } catch {
104
+ return false;
105
+ }
106
+ }
107
+
108
+ /**
109
+ * Run an iteration hook script with JSON payload on stdin.
110
+ *
111
+ * Spawns `bash <script>` with the hook payload as JSON on stdin.
112
+ * Captures stdout (capped at 8KB) and stderr. Enforces a 30-second timeout.
113
+ *
114
+ * **Security note:** The script path is user-configurable and executed with
115
+ * minimal environment (PATH, HOME, USER, LANG). Only use with trusted script paths from
116
+ * workspace-owned configuration. No path containment validation is performed.
117
+ *
118
+ * @param payload - Structured hook payload
119
+ * @param hookScriptPath - Absolute or relative path to the hook script
120
+ * @returns HookResult indicating whether the hook fired and its output
121
+ */
122
+ export async function runIterationHook(
123
+ payload: HookPayload,
124
+ hookScriptPath: string,
125
+ ): Promise<HookResult> {
126
+ if (!isScriptRunnable(hookScriptPath)) {
127
+ return notFiredResult();
128
+ }
129
+
130
+ const startTime = Date.now();
131
+ const stdinJson = JSON.stringify(payload);
132
+ const stdoutChunks: Buffer[] = [];
133
+ const stderrChunks: Buffer[] = [];
134
+
135
+ return new Promise<HookResult>((resolve) => {
136
+ const child = spawn("bash", [hookScriptPath], {
137
+ cwd: payload.cwd,
138
+ env: { PATH: process.env.PATH ?? "/usr/bin:/bin", HOME: process.env.HOME ?? "/tmp", USER: process.env.USER, LANG: process.env.LANG, PI_CREW_HOOK: "1" },
139
+ stdio: ["pipe", "pipe", "pipe"],
140
+ });
141
+
142
+ let killed = false;
143
+ const timeout = setTimeout(() => {
144
+ killed = true;
145
+ child.kill("SIGKILL");
146
+ }, HOOK_TIMEOUT_MS);
147
+
148
+ child.stdout.on("data", (chunk: Buffer) => {
149
+ stdoutChunks.push(chunk);
150
+ });
151
+
152
+ child.stderr.on("data", (chunk: Buffer) => {
153
+ stderrChunks.push(chunk);
154
+ });
155
+
156
+ child.on("close", (code: number | null) => {
157
+ clearTimeout(timeout);
158
+ const durationMs = Date.now() - startTime;
159
+
160
+ const rawStdout = Buffer.concat(stdoutChunks);
161
+ const truncatedStdout = truncateToLimit(rawStdout, MAX_STDOUT_BYTES);
162
+
163
+ const rawStderr = Buffer.concat(stderrChunks);
164
+
165
+ resolve({
166
+ fired: true,
167
+ stdout: truncatedStdout.toString("utf-8"),
168
+ stderr: rawStderr.toString("utf-8"),
169
+ exitCode: code,
170
+ timedOut: killed,
171
+ durationMs,
172
+ });
173
+ });
174
+
175
+ child.on("error", (err: Error) => {
176
+ clearTimeout(timeout);
177
+ const durationMs = Date.now() - startTime;
178
+ resolve({
179
+ fired: true,
180
+ stdout: "",
181
+ stderr: err.message,
182
+ exitCode: null,
183
+ timedOut: false,
184
+ durationMs,
185
+ });
186
+ });
187
+
188
+ // Write payload to stdin and close it
189
+ child.stdin.write(stdinJson, "utf-8");
190
+ child.stdin.end();
191
+ });
192
+ }
193
+
194
+ /**
195
+ * Derive a steer message from the hook result.
196
+ *
197
+ * - Non-zero exit → error steer message
198
+ * - Timeout → timeout steer message
199
+ * - Empty stdout → null (no steer)
200
+ * - Otherwise → trimmed stdout content
201
+ */
202
+ export function steerMessageFromHook(
203
+ stage: HookStage,
204
+ result: HookResult,
205
+ ): string | null {
206
+ if (!result.fired) return null;
207
+
208
+ if (result.timedOut) {
209
+ return `[${stage}-hook] Hook timed out after ${result.durationMs}ms`;
210
+ }
211
+
212
+ if (result.exitCode !== null && result.exitCode !== 0) {
213
+ const stderrSnippet = result.stderr.trim().slice(0, 200);
214
+ return `[${stage}-hook] Hook exited with code ${result.exitCode}${stderrSnippet ? `: ${stderrSnippet}` : ""}`;
215
+ }
216
+
217
+ const trimmed = result.stdout.trim();
218
+ if (trimmed.length === 0) return null;
219
+
220
+ // Filter out prototype-polluting metric names from hook output
221
+ const lines = trimmed.split("\n");
222
+ const safeLines = lines.filter((line) => {
223
+ const match = /^CREW_METRIC\s+(\w+)=/.exec(line);
224
+ if (match) {
225
+ const name = match[1];
226
+ return !DENIED_METRIC_NAMES.has(name);
227
+ }
228
+ return true;
229
+ });
230
+
231
+ return safeLines.join("\n");
232
+ }
233
+
234
+ /**
235
+ * Build a log entry for recording hook execution in events.jsonl.
236
+ */
237
+ export function hookLogEntry(
238
+ stage: HookStage,
239
+ result: HookResult,
240
+ ): Record<string, unknown> {
241
+ const entry: Record<string, unknown> = {
242
+ type: "iteration-hook",
243
+ stage,
244
+ fired: result.fired,
245
+ durationMs: result.durationMs,
246
+ };
247
+
248
+ if (result.fired) {
249
+ entry.exitCode = result.exitCode;
250
+ entry.timedOut = result.timedOut;
251
+
252
+ // Include truncated stdout/stderr for diagnostics
253
+ if (result.stdout.length > 0) {
254
+ entry.stdoutPreview = result.stdout.slice(0, 512);
255
+ }
256
+ if (result.stderr.length > 0) {
257
+ entry.stderrPreview = result.stderr.slice(0, 512);
258
+ }
259
+ }
260
+
261
+ return entry;
262
+ }