pi-crew 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. package/AGENTS.md +57 -32
  2. package/CHANGELOG.md +466 -448
  3. package/LICENSE +21 -21
  4. package/NOTICE.md +16 -16
  5. package/README.md +323 -323
  6. package/docs/FEATURE_INTAKE.md +126 -0
  7. package/docs/HARNESS.md +86 -0
  8. package/docs/HARNESS_BACKLOG.md +41 -0
  9. package/docs/TEST_MATRIX.md +49 -0
  10. package/docs/actions-reference.md +595 -595
  11. package/docs/architecture.md +180 -180
  12. package/docs/code-review-2026-05-11.md +592 -592
  13. package/docs/commands-reference.md +347 -347
  14. package/docs/comparison-pi-subagents-vs-pi-crew.md +303 -0
  15. package/docs/decisions/0001-durable-state.md +41 -0
  16. package/docs/decisions/0002-child-process-for-async.md +42 -0
  17. package/docs/decisions/0003-depth-guard.md +36 -0
  18. package/docs/decisions/0004-execfile-over-exec.md +34 -0
  19. package/docs/decisions/0005-no-parameter-properties.md +49 -0
  20. package/docs/decisions/0006-publish-bundled-esm.md +63 -0
  21. package/docs/decisions/0007-active-run-binary-index.md +54 -0
  22. package/docs/decisions/0008-child-pi-warm-pool.md +61 -0
  23. package/docs/decisions/README.md +23 -0
  24. package/docs/followup-review-round4-2026-05-13.md +107 -0
  25. package/docs/implementation-plan-top3.md +333 -0
  26. package/docs/live-mailbox-runtime.md +36 -36
  27. package/docs/next-upgrade-roadmap.md +808 -808
  28. package/docs/oh-my-pi-research.md +509 -0
  29. package/docs/perf/baseline-2026-05.md +113 -0
  30. package/docs/perf/final-report-2026-05.md +206 -0
  31. package/docs/perf/sprint-1-report.md +71 -0
  32. package/docs/perf/sprint-2-report.md +81 -0
  33. package/docs/perf/sprint-2.5-report.md +53 -0
  34. package/docs/perf/sprint-3-report.md +36 -0
  35. package/docs/perf/sprint-4-report.md +47 -0
  36. package/docs/perf/sprint-5-report.md +51 -0
  37. package/docs/perf/sprint-6-report.md +94 -0
  38. package/docs/perf/sprint-7-report.md +74 -0
  39. package/docs/perf/upgrade-plan-2026-05.md +147 -0
  40. package/docs/pi-subagents3-deep-analysis.md +508 -0
  41. package/docs/product/README.md +31 -0
  42. package/docs/product/platform.md +27 -0
  43. package/docs/product/runtime-safety.md +37 -0
  44. package/docs/product/team-run.md +39 -0
  45. package/docs/product/team-tool.md +37 -0
  46. package/docs/publishing.md +65 -65
  47. package/docs/resource-formats.md +134 -134
  48. package/docs/runtime-analysis-child-vs-live.md +171 -0
  49. package/docs/runtime-flow.md +148 -148
  50. package/docs/runtime-migration-in-process-analysis.md +250 -0
  51. package/docs/stories/README.md +30 -0
  52. package/docs/stories/backlog.md +36 -0
  53. package/docs/templates/decision.md +27 -0
  54. package/docs/templates/story.md +44 -0
  55. package/docs/templates/validation-report.md +32 -0
  56. package/docs/usage.md +238 -238
  57. package/index.ts +7 -6
  58. package/install.mjs +65 -65
  59. package/package.json +107 -100
  60. package/schema.json +222 -222
  61. package/skills/child-pi-spawning/SKILL.md +213 -0
  62. package/skills/context-artifact-hygiene/SKILL.md +32 -0
  63. package/skills/event-log-tracing/SKILL.md +299 -0
  64. package/skills/git-master/SKILL.md +225 -24
  65. package/skills/live-agent-lifecycle/SKILL.md +192 -0
  66. package/skills/mailbox-interactive/SKILL.md +300 -19
  67. package/skills/model-routing-context/SKILL.md +94 -0
  68. package/skills/multi-perspective-review/SKILL.md +88 -0
  69. package/skills/read-only-explorer/SKILL.md +250 -26
  70. package/skills/safe-bash/SKILL.md +307 -21
  71. package/skills/verification-before-done/SKILL.md +11 -2
  72. package/skills/widget-rendering/SKILL.md +258 -0
  73. package/skills/workspace-isolation/SKILL.md +202 -0
  74. package/skills/worktree-isolation/SKILL.md +202 -18
  75. package/src/adapters/claude-adapter.ts +25 -25
  76. package/src/adapters/codex-adapter.ts +21 -21
  77. package/src/adapters/cursor-adapter.ts +17 -17
  78. package/src/adapters/export-util.ts +137 -137
  79. package/src/adapters/index.ts +15 -15
  80. package/src/adapters/registry.ts +18 -18
  81. package/src/adapters/types.ts +23 -23
  82. package/src/agents/agent-config.ts +38 -38
  83. package/src/agents/agent-serializer.ts +38 -38
  84. package/src/agents/discover-agents.ts +121 -118
  85. package/src/config/config.ts +740 -858
  86. package/src/config/defaults.ts +96 -96
  87. package/src/config/drift-detector.ts +211 -211
  88. package/src/config/markers.ts +327 -327
  89. package/src/config/resilient-parser.ts +109 -108
  90. package/src/config/suggestions.ts +74 -74
  91. package/src/config/types.ts +199 -0
  92. package/src/extension/async-notifier.ts +123 -89
  93. package/src/extension/autonomous-policy.ts +169 -169
  94. package/src/extension/cross-extension-rpc.ts +104 -104
  95. package/src/extension/help.ts +47 -47
  96. package/src/extension/import-index.ts +69 -69
  97. package/src/extension/management.ts +395 -382
  98. package/src/extension/notification-router.ts +116 -116
  99. package/src/extension/notification-sink.ts +51 -51
  100. package/src/extension/project-init.ts +168 -168
  101. package/src/extension/register.ts +859 -668
  102. package/src/extension/registration/artifact-cleanup.ts +15 -15
  103. package/src/extension/registration/command-utils.ts +54 -54
  104. package/src/extension/registration/commands.ts +559 -452
  105. package/src/extension/registration/compaction-guard.ts +125 -125
  106. package/src/extension/registration/subagent-helpers.ts +102 -102
  107. package/src/extension/registration/subagent-tools.ts +220 -159
  108. package/src/extension/registration/team-tool.ts +159 -99
  109. package/src/extension/registration/viewers.ts +29 -0
  110. package/src/extension/result-watcher.ts +128 -128
  111. package/src/extension/run-bundle-schema.ts +89 -89
  112. package/src/extension/run-export.ts +73 -73
  113. package/src/extension/run-import.ts +84 -84
  114. package/src/extension/run-index.ts +94 -94
  115. package/src/extension/run-maintenance.ts +142 -142
  116. package/src/extension/session-summary.ts +8 -8
  117. package/src/extension/team-manager-command.ts +96 -96
  118. package/src/extension/team-recommendation.ts +188 -188
  119. package/src/extension/team-tool/api.ts +5 -2
  120. package/src/extension/team-tool/cancel.ts +224 -209
  121. package/src/extension/team-tool/config-patch.ts +36 -36
  122. package/src/extension/team-tool/context.ts +60 -60
  123. package/src/extension/team-tool/doctor.ts +242 -242
  124. package/src/extension/team-tool/handle-settings.ts +421 -195
  125. package/src/extension/team-tool/inspect.ts +41 -41
  126. package/src/extension/team-tool/lifecycle-actions.ts +139 -139
  127. package/src/extension/team-tool/parallel-dispatch.ts +156 -156
  128. package/src/extension/team-tool/plan.ts +19 -19
  129. package/src/extension/team-tool/respond.ts +112 -111
  130. package/src/extension/team-tool/run.ts +246 -229
  131. package/src/extension/team-tool/status.ts +110 -110
  132. package/src/extension/team-tool-types.ts +13 -13
  133. package/src/extension/team-tool.ts +344 -344
  134. package/src/extension/tool-result.ts +16 -16
  135. package/src/extension/validate-resources.ts +77 -77
  136. package/src/hooks/registry.ts +61 -61
  137. package/src/hooks/types.ts +40 -40
  138. package/src/i18n.ts +184 -184
  139. package/src/observability/correlation.ts +35 -35
  140. package/src/observability/event-to-metric.ts +68 -68
  141. package/src/observability/exporters/adapter.ts +30 -30
  142. package/src/observability/exporters/otlp-exporter.ts +106 -92
  143. package/src/observability/exporters/prometheus-exporter.ts +54 -54
  144. package/src/observability/metric-registry.ts +87 -87
  145. package/src/observability/metric-retention.ts +54 -54
  146. package/src/observability/metric-sink.ts +81 -56
  147. package/src/observability/metrics-primitives.ts +167 -167
  148. package/src/prompt/prompt-runtime.ts +72 -72
  149. package/src/runtime/adaptive-plan.ts +338 -0
  150. package/src/runtime/agent-control.ts +169 -169
  151. package/src/runtime/agent-memory.ts +72 -72
  152. package/src/runtime/agent-observability.ts +114 -114
  153. package/src/runtime/async-marker.ts +26 -26
  154. package/src/runtime/async-runner.ts +153 -153
  155. package/src/runtime/attention-events.ts +28 -28
  156. package/src/runtime/auto-resume.ts +100 -100
  157. package/src/runtime/background-runner.ts +122 -89
  158. package/src/runtime/cancellation.ts +61 -61
  159. package/src/runtime/capability-inventory.ts +116 -116
  160. package/src/runtime/child-pi-pool.ts +68 -0
  161. package/src/runtime/child-pi.ts +541 -461
  162. package/src/runtime/code-summary.ts +247 -247
  163. package/src/runtime/compaction-summary.ts +271 -271
  164. package/src/runtime/concurrency.ts +58 -58
  165. package/src/runtime/crash-recovery.ts +317 -301
  166. package/src/runtime/crew-agent-records.ts +379 -281
  167. package/src/runtime/crew-agent-runtime.ts +60 -60
  168. package/src/runtime/cross-extension-rpc.ts +72 -0
  169. package/src/runtime/custom-tools/irc-tool.ts +201 -201
  170. package/src/runtime/custom-tools/submit-result-tool.ts +90 -90
  171. package/src/runtime/deadletter.ts +47 -47
  172. package/src/runtime/delivery-coordinator.ts +176 -176
  173. package/src/runtime/delta-conflict.ts +360 -360
  174. package/src/runtime/diagnostic-export.ts +102 -102
  175. package/src/runtime/direct-run.ts +35 -35
  176. package/src/runtime/effectiveness.ts +82 -81
  177. package/src/runtime/errors/crew-errors.ts +166 -0
  178. package/src/runtime/event-stream-bridge.ts +92 -92
  179. package/src/runtime/foreground-control.ts +82 -82
  180. package/src/runtime/green-contract.ts +46 -46
  181. package/src/runtime/group-join.ts +234 -106
  182. package/src/runtime/heartbeat-watcher.ts +145 -124
  183. package/src/runtime/iteration-hooks.ts +267 -267
  184. package/src/runtime/live-agent-control.ts +88 -88
  185. package/src/runtime/live-agent-manager.ts +377 -179
  186. package/src/runtime/live-control-realtime.ts +36 -36
  187. package/src/runtime/live-session-runtime.ts +676 -600
  188. package/src/runtime/loop-gates.ts +129 -129
  189. package/src/runtime/manifest-cache.ts +263 -263
  190. package/src/runtime/mcp-proxy.ts +113 -113
  191. package/src/runtime/metric-parser.ts +40 -40
  192. package/src/runtime/model-fallback.ts +282 -274
  193. package/src/runtime/model-resolver.ts +118 -0
  194. package/src/runtime/output-validator.ts +187 -187
  195. package/src/runtime/overflow-recovery.ts +175 -175
  196. package/src/runtime/parallel-research.ts +44 -44
  197. package/src/runtime/parallel-utils.ts +156 -156
  198. package/src/runtime/parent-guard.ts +80 -80
  199. package/src/runtime/phase-progress.ts +217 -217
  200. package/src/runtime/pi-args.ts +165 -165
  201. package/src/runtime/pi-json-output.ts +111 -111
  202. package/src/runtime/pi-spawn.ts +167 -167
  203. package/src/runtime/policy-engine.ts +79 -79
  204. package/src/runtime/post-checks.ts +125 -125
  205. package/src/runtime/post-exit-stdio-guard.ts +86 -86
  206. package/src/runtime/process-status.ts +97 -73
  207. package/src/runtime/progress-event-coalescer.ts +43 -43
  208. package/src/runtime/recovery-recipes.ts +74 -74
  209. package/src/runtime/retry-executor.ts +81 -81
  210. package/src/runtime/role-permission.ts +39 -39
  211. package/src/runtime/run-tracker.ts +99 -0
  212. package/src/runtime/runtime-policy.ts +21 -0
  213. package/src/runtime/runtime-resolver.ts +94 -91
  214. package/src/runtime/scheduler.ts +294 -0
  215. package/src/runtime/semaphore.ts +131 -131
  216. package/src/runtime/sensitive-paths.ts +92 -92
  217. package/src/runtime/session-usage.ts +79 -79
  218. package/src/runtime/settings-store.ts +103 -0
  219. package/src/runtime/sidechain-output.ts +29 -29
  220. package/src/runtime/skill-instructions.ts +222 -222
  221. package/src/runtime/stale-reconciler.ts +198 -189
  222. package/src/runtime/streaming-output.ts +47 -0
  223. package/src/runtime/subagent-manager.ts +404 -400
  224. package/src/runtime/subprocess-tool-registry.ts +67 -67
  225. package/src/runtime/task-display.ts +38 -38
  226. package/src/runtime/task-graph-scheduler.ts +122 -122
  227. package/src/runtime/task-graph.ts +207 -207
  228. package/src/runtime/task-output-context.ts +177 -177
  229. package/src/runtime/task-packet.ts +93 -93
  230. package/src/runtime/task-quality.ts +207 -207
  231. package/src/runtime/task-runner/capabilities.ts +78 -78
  232. package/src/runtime/task-runner/live-executor.ts +131 -113
  233. package/src/runtime/task-runner/progress.ts +119 -119
  234. package/src/runtime/task-runner/prompt-builder.ts +139 -139
  235. package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
  236. package/src/runtime/task-runner/result-utils.ts +14 -14
  237. package/src/runtime/task-runner/run-projection.ts +103 -103
  238. package/src/runtime/task-runner/state-helpers.ts +22 -22
  239. package/src/runtime/task-runner.ts +469 -459
  240. package/src/runtime/team-runner.ts +693 -945
  241. package/src/runtime/usage-tracker.ts +71 -0
  242. package/src/runtime/worker-heartbeat.ts +21 -21
  243. package/src/runtime/worker-startup.ts +57 -57
  244. package/src/runtime/workflow-state.ts +187 -187
  245. package/src/runtime/yield-handler.ts +190 -190
  246. package/src/schema/config-schema.ts +172 -168
  247. package/src/schema/team-tool-schema.ts +126 -126
  248. package/src/schema/validation-types.ts +151 -148
  249. package/src/skills/discover-skills.ts +67 -67
  250. package/src/skills/skill-templates.ts +374 -374
  251. package/src/state/active-run-registry.ts +227 -191
  252. package/src/state/artifact-store.ts +130 -129
  253. package/src/state/atomic-write.ts +262 -195
  254. package/src/state/blob-store.ts +116 -116
  255. package/src/state/contracts.ts +111 -111
  256. package/src/state/event-log-rotation.ts +161 -158
  257. package/src/state/event-log.ts +383 -303
  258. package/src/state/event-reconstructor.ts +217 -217
  259. package/src/state/jsonl-writer.ts +82 -82
  260. package/src/state/locks.ts +146 -146
  261. package/src/state/mailbox.ts +446 -405
  262. package/src/state/state-store.ts +364 -351
  263. package/src/state/task-claims.ts +44 -44
  264. package/src/state/types.ts +285 -285
  265. package/src/state/usage.ts +29 -29
  266. package/src/subagents/async-entry.ts +1 -1
  267. package/src/subagents/index.ts +3 -3
  268. package/src/subagents/live/control.ts +1 -1
  269. package/src/subagents/live/manager.ts +1 -1
  270. package/src/subagents/live/realtime.ts +1 -1
  271. package/src/subagents/live/session-runtime.ts +1 -1
  272. package/src/subagents/manager.ts +1 -1
  273. package/src/subagents/spawn.ts +1 -1
  274. package/src/teams/discover-teams.ts +116 -116
  275. package/src/teams/team-config.ts +27 -27
  276. package/src/teams/team-serializer.ts +38 -38
  277. package/src/types/diff.d.ts +18 -18
  278. package/src/ui/agent-management-overlay.ts +144 -144
  279. package/src/ui/crew-widget.ts +487 -370
  280. package/src/ui/dashboard-panes/agents-pane.ts +109 -28
  281. package/src/ui/dashboard-panes/cancellation-pane.ts +42 -42
  282. package/src/ui/dashboard-panes/capability-pane.ts +59 -59
  283. package/src/ui/dashboard-panes/health-pane.ts +30 -30
  284. package/src/ui/dashboard-panes/mailbox-pane.ts +35 -35
  285. package/src/ui/dashboard-panes/progress-pane.ts +30 -30
  286. package/src/ui/dashboard-panes/transcript-pane.ts +10 -10
  287. package/src/ui/heartbeat-aggregator.ts +63 -63
  288. package/src/ui/keybinding-map.ts +97 -94
  289. package/src/ui/live-conversation-overlay.ts +152 -0
  290. package/src/ui/live-run-sidebar.ts +180 -180
  291. package/src/ui/mascot.ts +442 -442
  292. package/src/ui/overlays/agent-picker-overlay.ts +57 -57
  293. package/src/ui/overlays/confirm-overlay.ts +58 -58
  294. package/src/ui/overlays/mailbox-compose-overlay.ts +144 -144
  295. package/src/ui/overlays/mailbox-compose-preview.ts +63 -63
  296. package/src/ui/overlays/mailbox-detail-overlay.ts +122 -122
  297. package/src/ui/pi-ui-compat.ts +57 -57
  298. package/src/ui/powerbar-publisher.ts +221 -197
  299. package/src/ui/render-scheduler.ts +216 -143
  300. package/src/ui/run-action-dispatcher.ts +118 -118
  301. package/src/ui/run-dashboard.ts +526 -464
  302. package/src/ui/run-event-bus.ts +208 -208
  303. package/src/ui/run-snapshot-cache.ts +826 -777
  304. package/src/ui/settings-overlay.ts +721 -0
  305. package/src/ui/snapshot-types.ts +86 -70
  306. package/src/ui/theme-adapter.ts +190 -190
  307. package/src/ui/tool-progress-formatter.ts +89 -0
  308. package/src/ui/transcript-cache.ts +94 -94
  309. package/src/ui/transcript-viewer.ts +335 -335
  310. package/src/utils/conflict-detect.ts +662 -0
  311. package/src/utils/file-coalescer.ts +86 -86
  312. package/src/utils/frontmatter.ts +68 -68
  313. package/src/utils/fs-watch.ts +88 -31
  314. package/src/utils/gh-protocol.ts +479 -0
  315. package/src/utils/ids.ts +17 -17
  316. package/src/utils/incremental-reader.ts +104 -104
  317. package/src/utils/internal-error.ts +6 -6
  318. package/src/utils/names.ts +27 -27
  319. package/src/utils/paths.ts +102 -63
  320. package/src/utils/redaction.ts +44 -44
  321. package/src/utils/safe-paths.ts +47 -47
  322. package/src/utils/scan-cache.ts +136 -136
  323. package/src/utils/sse-parser.ts +134 -134
  324. package/src/utils/task-name-generator.ts +337 -337
  325. package/src/utils/timings.ts +33 -33
  326. package/src/utils/visual.ts +243 -198
  327. package/src/workflows/discover-workflows.ts +139 -139
  328. package/src/workflows/validate-workflow.ts +40 -40
  329. package/src/workflows/workflow-config.ts +26 -26
  330. package/src/workflows/workflow-serializer.ts +32 -32
  331. package/src/worktree/branch-freshness.ts +45 -45
  332. package/src/worktree/cleanup.ts +75 -75
  333. package/src/worktree/worktree-manager.ts +188 -188
  334. package/teams/default.team.md +12 -12
  335. package/teams/fast-fix.team.md +11 -11
  336. package/teams/implementation.team.md +18 -18
  337. package/teams/parallel-research.team.md +14 -14
  338. package/teams/research.team.md +11 -11
  339. package/teams/review.team.md +12 -12
  340. package/tsconfig.json +19 -19
  341. package/workflows/default.workflow.md +30 -30
  342. package/workflows/fast-fix.workflow.md +23 -23
  343. package/workflows/implementation.workflow.md +43 -43
  344. package/workflows/parallel-research.workflow.md +46 -46
  345. package/workflows/research.workflow.md +22 -22
  346. package/workflows/review.workflow.md +30 -30
  347. package/skills/task-packet/SKILL.md +0 -28
  348. package/skills/verify-evidence/SKILL.md +0 -27
@@ -1,808 +1,808 @@
1
- # pi-crew Next Upgrade Roadmap
2
-
3
- Date: 2026-05-05
4
- Source inputs:
5
-
6
- - `docs/research-oh-my-pi-distillation.md`
7
- - `docs/source-runtime-refactor-map.md`
8
- - Recent runtime hardening commits through `f5d47aa feat: surface run effectiveness evidence`
9
-
10
- This document tracks the next practical upgrades after the current scaffold/no-op subagent fix, runtime safety classification, cancellation provenance, intent audit trail, prompt pipeline artifacts, capability inventory artifacts, and run effectiveness reporting.
11
-
12
- ## Current Baseline
13
-
14
- Already implemented and pushed:
15
-
16
- - Real child worker execution is the default.
17
- - Implicit scaffold/no-op runs are blocked when worker execution is disabled by config/env.
18
- - Explicit `runtime.mode=scaffold` remains available for dry-run prompt/artifact generation.
19
- - Run `summary.md`, `progress.md`, and `status` now expose effectiveness evidence.
20
- - Structured cancellation reasons flow through retry/cancel/team-runner/run events/metrics/UI snapshot.
21
- - `cancel`, `cleanup`, `forget`, and `prune` accept audit intent metadata.
22
- - Live-agent control distinguishes `steer` from `follow-up` at live-control/API level.
23
- - Retry attempts have `attemptId`; max-retry deadletters link to the final `attemptId`.
24
- - Worker prompt pipeline and capability inventory metadata artifacts are written per task.
25
- - P0.1: effectiveness guard escalates `warn` to `blocked` for mutating-role tasks with no observable worker activity.
26
- - P1.1: mailbox `readMailbox` accepts `kind` filter; API `read-mailbox` supports `config.kind`.
27
- - P1.5: `TeamEventMetadata` extended with `parentEventId`, `attemptId`, `branchId`, `causationId`, `correlationId`.
28
- - P1.6: `buildSyntheticTerminalEvidence()` produces `"worker"`/`"cancelled"` terminal records for cancelled in-flight tasks.
29
- - P1.7: `buildCapabilityInventory(cwd)` normalizes teams/workflows/agents; API `operation=inventory`.
30
- - P2.1: typed hook lifecycle — `registerHook`/`executeHook` registry; `before_run_start` and `before_task_start` wired.
31
- - P2.4: `AbortSignal` wired into `collectRuns`, `validateMailbox`, `readAllMailboxMessages`, `pruneFinishedRuns`, `cleanupRunWorktrees`, etc.
32
- - Resume scaffold runs preserve scaffold mode from original manifest when workers not disabled.
33
-
34
- ## Implementation Status as of `v0.1.46`
35
-
36
- This roadmap is **not complete overall**. The `v0.1.46` release completed several vertical slices, but multiple roadmap items remain partial or unimplemented.
37
-
38
- ### Implemented / mostly implemented
39
-
40
- - Baseline worker behavior: real child-process execution by default, explicit scaffold dry-runs, and blocked implicit scaffold/no-op runs.
41
- - P0.1 ✅ effectiveness policy enforcement: default guard escalates `warn` to `blocked` for mutating-role tasks.
42
- - P0.2 ✅ runtime safety persistence: manifests persist `runtimeResolution`; `runtime.resolved` event emitted; status shows safety; blocked runs persist evidence.
43
- - Effectiveness reporting: summary/progress/status expose no-observed-work evidence and policy outcome.
44
- - Structured cancellation basics: cancellation reasons flow through retry/backoff/team-runner paths and run/task events.
45
- - Retry attempt evidence: retry attempts and max-retry deadletters carry/link `attemptId` data.
46
- - Prompt pipeline artifacts and per-task capability metadata artifacts are written.
47
- - P1.3 worker teardown evidence vertical slice: `WorkerExitStatus` and terminal worker cancellation evidence exist.
48
-
49
- ### Completed in this upgrade cycle (after v0.1.46)
50
-
51
- - P0.1 effectiveness policy enforcement: default guard now escalates `warn` to `blocked` for mutating-role tasks with no observable worker activity; read-only roles remain `warning`.
52
- - P0.2 runtime safety persistence: manifests persist `runtimeResolution`; `runtime.resolved` event emitted; status shows safety; blocked runs persist evidence.
53
- - P1.1 durable steering/follow-up queues: `readMailbox` accepts `kind` filter; API `read-mailbox` supports `config.kind`; steering and follow-up are isolatable by kind.
54
- - P1.2 respond vs follow-up UX: `/team-follow-up` command added for continuation prompts; `/team-respond` remains for waiting-task replies.
55
- - P1.3 two-phase child process teardown: `WorkerExitStatus` populated from graceful SIGTERM → grace window → hard kill pipeline.
56
- - P1.5 event-tree provenance: `TeamEventMetadata` extended with `parentEventId`, `attemptId`, `branchId`, `causationId`, `correlationId`; retry and cancel events carry `attemptId`.
57
- - P1.6 synthetic terminal results: `buildSyntheticTerminalEvidence()` in `cancellation.ts`; cancelled in-flight tasks receive `"worker"`/`"cancelled"` terminal evidence records.
58
- - P1.7 unified capability inventory: `buildCapabilityInventory(cwd)` normalizes teams/workflows/agents into `CapabilityItem[]`; API `operation=inventory` returns sorted JSON.
59
- - P1.8 capability disable by stable ID: `disabledCapabilities` in `CrewPolicyConfig`; inventory marks disabled items with reason.
60
- - P2.1 typed hook lifecycle: `HookName`, `HookMode`, `HookOutcome`, `HookContext`, `HookResult`, `HookExecutionReport` types; `registerHook`/`executeHook`/`clearHooks` registry; `before_run_start` and `before_task_start` wired into team-runner.
61
- - P2.2 intent gates for destructive actions: `enforceDestructiveIntent` wired in cancel/cleanup/forget/prune/delete; configurable via `policy.requireIntentForDestructiveActions`.
62
- - P2.3 durable history projection: `transformRunContextBeforeWorkerStart()` and `convertRunHistoryToWorkerPrompt()` bounded projection functions.
63
- - P2.4 CancellationToken wired into long scans: `AbortSignal` passed to `collectRuns`/`validateMailbox`/`readAllMailboxMessages`/`pruneFinishedRuns`/`cleanupRunWorktrees`.
64
- - P2.5 content-addressed blob store: `writeBlob`/`readBlob`/`readBlobMetadata` with SHA-256 dedup and metadata sidecars.
65
- - P2.6 dashboard panes for capability and cancellation: `renderCapabilityPane` and `renderCancellationPane`.
66
- - Resume scaffold run fix: preserves scaffold mode from original manifest when workers not disabled.
67
-
68
- ### Partial / not safe to mark complete
69
-
70
- - P1.4 reserve worker control channel before spawn: controller metadata persistence during startup not yet implemented.
71
- - P2.7 event-first UI: render coalescing and snapshot caches exist, but live UI still relies on durable file polling as a primary source in several panes.
72
- - P2.8 shared raw scan-entry cache: not yet implemented.
73
-
74
- ### Completed / no longer backlog
75
-
76
- - P2.7 event-first UI — RunEventBus wired into appendEvent; dashboard, widget, sidebar auto-invalidate on events; snapshot cache invalidates on events.
77
- - P2.8 shared raw scan-entry cache — SharedScanCache implemented and wired into manifest reads (run-index) and active-run-registry (active manifest reads).
78
- - P3.1 tarball-install smoke — `scripts/release-smoke.mjs` verified; `npm run smoke:release` added.
79
- - Hook lifecycle — All hooks wired: `before_run_start`, `before_task_start`, `before_cancel`, `before_forget`, `before_cleanup`, `before_publish`, `task_result`, `run_recovery`. Only `session_before_switch` remains (no cwd switch mechanism in current codebase).
80
-
81
- ### Remaining items
82
-
83
- - `session_before_switch` hook — no cwd/session switch mechanism in current codebase; placeholder for future.
84
- - P3.2 CI gate — integrate `smoke:release` into CI pipeline (requires CI config).
85
-
86
- ## Priority Legend
87
-
88
- - **P0**: correctness/safety issue; should be addressed before next release if feasible.
89
- - **P1**: high user-visible value or reliability gain; good patch-release candidates.
90
- - **P2**: larger subsystem work; should be planned and sequenced.
91
- - **P3**: polish/UX/longer-term architecture.
92
-
93
- ## P0 — Prevent Ineffective Completed Runs
94
-
95
- ### P0.1 Enforce effectiveness policy for non-scaffold workers
96
-
97
- **Problem**
98
-
99
- `summary/status` now surface effectiveness evidence, but non-scaffold `child-process`/`live-session` runs can still end `completed` when task evidence is weak unless the existing mutation guard fires.
100
-
101
- **Target behavior**
102
-
103
- - For real workers, a run with completed tasks but no observable worker activity should be `blocked` or `failed`, not silently `completed`.
104
- - Keep explicit scaffold dry-runs allowed, but label them as dry-runs.
105
- - Policy should be configurable:
106
- - `runtime.effectivenessGuard = "off" | "warn" | "block" | "fail"`
107
- - default candidate: `warn` for read-only roles, `block` for mutating roles.
108
-
109
- **Suggested files**
110
-
111
- - `src/runtime/team-runner.ts`
112
- - `src/runtime/completion-guard.ts`
113
- - `src/state/types.ts` if storing guard result on manifest/tasks
114
- - `src/schema/config-schema.ts`
115
- - `src/config/config.ts`
116
- - `test/unit/summary.test.ts`
117
- - `test/unit/team-runner-merge.test.ts` or new `test/unit/effectiveness-guard.test.ts`
118
-
119
- **Implementation sketch**
120
-
121
- 1. Extract run effectiveness calculation into a reusable exported helper, e.g.:
122
-
123
- ```ts
124
- export interface RunEffectivenessSummary {
125
- completed: number;
126
- observable: number;
127
- noObservedWorkTaskIds: string[];
128
- needsAttentionTaskIds: string[];
129
- workerExecution: "enabled" | "disabled/scaffold";
130
- severity: "ok" | "warning" | "blocked" | "failed";
131
- }
132
- ```
133
-
134
- 2. Use this helper for:
135
- - `progress.md`
136
- - `summary.md`
137
- - `status`
138
- - policy enforcement before `run.completed`.
139
-
140
- 3. For non-scaffold runs, if mutating tasks have no mutation/tool/model/transcript evidence:
141
- - append `policy.action` with `reason: "ineffective_worker"`;
142
- - set run `blocked` or `failed` depending config;
143
- - include task IDs in `data`.
144
-
145
- **Acceptance criteria** ✅
146
-
147
- - ✅ A mocked child-process run with no tool/model/transcript evidence does not report clean `completed` by default.
148
- - ✅ Scaffold run still completes as explicit dry-run and displays `Worker execution: disabled/scaffold`.
149
- - ✅ `status` clearly lists `noObservedWork` and `needsAttention` task IDs.
150
- - ✅ Unit tests cover warn/block/fail modes.
151
- - ✅ Default guard escalates `warn` to `blocked` for mutating-role tasks.
152
-
153
- **Verification**
154
-
155
- ```bash
156
- npx tsc --noEmit
157
- node --experimental-strip-types --test --test-concurrency=1 --test-timeout=30000 test/unit/effectiveness-guard.test.ts test/unit/summary.test.ts
158
- npm run test:unit
159
- ```
160
-
161
- ### P0.2 Make runtime safety visible in manifest and run events
162
-
163
- **Problem**
164
-
165
- `runtime.safety` exists in runtime resolution, but it is not persisted as first-class run metadata. Debugging currently requires reading events or inferred artifacts.
166
-
167
- **Target behavior**
168
-
169
- - Manifest records resolved runtime:
170
-
171
- ```json
172
- {
173
- "runtimeResolution": {
174
- "kind": "child-process",
175
- "requestedMode": "auto",
176
- "safety": "trusted",
177
- "fallback": "child-process",
178
- "reason": "..."
179
- }
180
- }
181
- ```
182
-
183
- - `run.running` or `run.blocked` event includes the same resolution.
184
-
185
- **Suggested files**
186
-
187
- - `src/state/types.ts`
188
- - `src/extension/team-tool/run.ts`
189
- - `src/runtime/background-runner.ts`
190
- - `src/extension/team-tool/status.ts`
191
- - `test/unit/team-run.test.ts`
192
- - `test/unit/runtime-resolver.test.ts`
193
-
194
- **Acceptance criteria** ✅
195
-
196
- - ✅ `status` shows `Runtime safety: trusted|explicit_dry_run|blocked`.
197
- - ✅ Blocked disabled-worker runs persist enough evidence to explain why no subagents spawned.
198
- - ✅ Existing manifest schema remains backward compatible.
199
- - ✅ `runtimeResolution` persisted on manifest; `runtime.resolved` event emitted.
200
-
201
- ## P1 — Steering/Follow-up Semantics Beyond Live Control
202
-
203
- ### P1.1 Persist separate steering and follow-up queues in mailbox state
204
-
205
- **Current state**
206
-
207
- `follow-up-agent` exists in live-control, but durable mailbox is still generic inbox/outbox and `respond` still has waiting-task semantics.
208
-
209
- **Target behavior**
210
-
211
- - Mailbox messages can carry semantic kind:
212
-
213
- ```ts
214
- kind?: "message" | "steer" | "follow-up" | "response" | "group_join";
215
- priority?: "urgent" | "normal" | "low";
216
- deliveryMode?: "interrupt" | "next_turn";
217
- ```
218
-
219
- - `steer-agent` appends durable steering queue entry when no live session is present.
220
- - `follow-up-agent` appends durable follow-up queue entry, deliverable after task stop/resume.
221
- - UI/status separates urgent steering from follow-up backlog.
222
-
223
- **Suggested files**
224
-
225
- - `src/state/mailbox.ts`
226
- - `src/runtime/live-agent-control.ts`
227
- - `src/runtime/live-agent-manager.ts`
228
- - `src/extension/team-tool/api.ts`
229
- - `src/extension/team-tool/respond.ts`
230
- - `src/ui/dashboard-panes/mailbox-pane.ts`
231
- - `test/unit/mailbox-api.test.ts`
232
- - `test/unit/live-agent-control.test.ts`
233
- - `test/unit/respond-tool.test.ts`
234
-
235
- **Acceptance criteria** ✅ (partially — kind filter and API done; UI pane separation remaining)
236
-
237
- - ✅ Steering and follow-up can be inspected separately via `readMailbox` kind filter and API `config.kind`.
238
- - ✅ Existing inbox/outbox JSONL remains readable.
239
- - ✅ Kind filter survives process/session switch (durable mailbox).
240
- - ✅ UI/status separates urgent steering from follow-up backlog (mailbox pane shows kind breakdown with urgency indicators).
241
-
242
- ### P1.2 Clarify `respond` vs `follow-up` UX
243
-
244
- **Problem**
245
-
246
- `respond` is currently a waiting-task resume primitive. Users may expect it to send a general follow-up.
247
-
248
- **Target behavior**
249
-
250
- - `/team-respond` remains only for `waiting` tasks.
251
- - `/team-follow-up` or `api operation=follow-up-agent` is documented as continuation prompt.
252
- - Error messages recommend the correct command.
253
-
254
- **Suggested files**
255
-
256
- - `src/extension/registration/commands.ts`
257
- - `src/extension/help.ts`
258
- - `docs/usage.md`
259
- - `test/unit/registration-commands-coverage.test.ts`
260
- - `test/unit/respond-tool.test.ts`
261
-
262
- ## P1 — Worker Lifecycle and Process Reliability
263
-
264
- ### P1.3 Two-phase child process teardown
265
-
266
- **Current state**
267
-
268
- Child workers have improved post-exit stdio guards and bounded drains, but cancellation semantics can be made more deterministic.
269
-
270
- **Target behavior**
271
-
272
- Worker process cancellation returns structured status:
273
-
274
- ```ts
275
- interface WorkerExitStatus {
276
- exitCode: number | null;
277
- cancelled: boolean;
278
- timedOut: boolean;
279
- killed: boolean;
280
- signal?: string;
281
- cleanupErrors: string[];
282
- finalDrainMs: number;
283
- }
284
- ```
285
-
286
- Process lifecycle:
287
-
288
- 1. graceful cancel/TERM;
289
- 2. wait grace window;
290
- 3. hard kill process tree;
291
- 4. bounded stdout/stderr drain;
292
- 5. mark session non-reusable.
293
-
294
- **Suggested files**
295
-
296
- - `src/runtime/child-pi.ts`
297
- - `src/runtime/pi-spawn.ts`
298
- - `src/runtime/post-exit-stdio-guard.ts`
299
- - `src/runtime/task-runner.ts`
300
- - `src/runtime/cancellation.ts`
301
- - `test/unit/child-pi*.test.ts`
302
- - `test/integration/mock-child-run.test.ts`
303
-
304
- **Acceptance criteria**
305
-
306
- - Cancelled worker always produces terminal task event.
307
- - Output drains are bounded.
308
- - Status includes `cancelled/timedOut/killed`.
309
- - No zombie/stale running task after cancellation.
310
-
311
- ### P1.4 Reserve worker control channel before spawn
312
-
313
- **Problem**
314
-
315
- There can be a short window where a task is logically starting but cancel/steer cannot target a controller yet.
316
-
317
- **Target behavior**
318
-
319
- - Synchronously create a `WorkerRunCore`/controller before async spawn.
320
- - Persist controller metadata in agent status.
321
- - Cancel/steer requests can be queued immediately while startup is in progress.
322
- - Controller is cleared in `finally`.
323
-
324
- **Suggested files**
325
-
326
- - `src/runtime/task-runner.ts`
327
- - `src/runtime/agent-control.ts`
328
- - `src/runtime/live-agent-control.ts`
329
- - `src/runtime/crew-agent-records.ts`
330
- - `src/extension/team-tool/api.ts`
331
-
332
- **Acceptance criteria**
333
-
334
- - Starting worker can be cancelled immediately.
335
- - Durable control request written during startup is applied or recorded as terminal no-op with reason.
336
- - Tests simulate control request before child process emits first output.
337
-
338
- ## P1 — Cancellation and Attempt History
339
-
340
- ### P1.5 Add event-tree provenance: `parentEventId`, `attemptId`, `branchId`
341
-
342
- **Current state**
343
-
344
- Retry attempts have `attemptId`, and deadletters link to final attempt. Event log has sequence and terminal fingerprints but no general event tree.
345
-
346
- **Target behavior**
347
-
348
- - `TeamEvent.metadata` supports:
349
-
350
- ```ts
351
- parentEventId?: string;
352
- attemptId?: string;
353
- branchId?: string;
354
- causationId?: string;
355
- correlationId?: string;
356
- ```
357
-
358
- - Retry events, task started/completed/failed, deadletter, recovery events link by `attemptId`.
359
- - UI/status can show attempt timeline.
360
-
361
- **Suggested files**
362
-
363
- - `src/state/event-log.ts`
364
- - `src/state/types.ts`
365
- - `src/runtime/team-runner.ts`
366
- - `src/runtime/retry-executor.ts`
367
- - `src/runtime/recovery-recipes.ts`
368
- - `src/extension/team-tool/status.ts`
369
- - `test/unit/event-metadata.test.ts`
370
- - `test/unit/retry-executor.test.ts`
371
-
372
- **Acceptance criteria** ✅
373
-
374
- - ✅ Retry attempt events and terminal task events share attempt provenance.
375
- - ✅ Deadletter records can be traced back to event sequence.
376
- - ✅ Existing JSONL readers ignore missing provenance fields.
377
- - ✅ `TeamEventMetadata` extended with `parentEventId`, `attemptId`, `branchId`, `causationId`, `correlationId`.
378
-
379
- ### P1.6 Synthetic terminal results for cancelled in-flight operations
380
-
381
- **Problem**
382
-
383
- Run/task cancellation events are now structured, but worker/tool sub-operations can still lack synthetic terminal records if cancelled mid-operation.
384
-
385
- **Target behavior**
386
-
387
- - If a task started a worker/tool/model call and cancellation occurs, append a synthetic terminal record:
388
- - `tool.cancelled` or `worker.cancelled`
389
- - reason code/message
390
- - startedAt/finishedAt
391
- - attemptId if available
392
-
393
- **Suggested files**
394
-
395
- - `src/runtime/task-runner.ts`
396
- - `src/runtime/task-runner/progress.ts`
397
- - `src/runtime/child-pi.ts`
398
- - `src/runtime/cancellation.ts`
399
- - `src/state/contracts.ts`
400
- - `test/unit/cancellation.test.ts`
401
-
402
- **Acceptance criteria** ✅
403
-
404
- - ✅ No started tool/model operation is left without terminal evidence after cancellation.
405
- - ✅ Status/diagnostics can distinguish user cancel vs timeout vs shutdown.
406
- - ✅ `buildSyntheticTerminalEvidence()` in `cancellation.ts` produces `"worker"`/`"cancelled"` records.
407
-
408
- ## P1 — Capability Inventory and Control Center
409
-
410
- ### P1.7 Build run/project capability inventory view
411
-
412
- **Current state**
413
-
414
- Per-task capability artifacts exist. There is no unified project/run inventory UI/API yet.
415
-
416
- **Target behavior**
417
-
418
- `/team-settings` or new `/team-control` shows normalized inventory:
419
-
420
- ```ts
421
- interface CapabilityItem {
422
- id: string;
423
- kind: "team" | "workflow" | "agent" | "skill" | "tool" | "hook" | "runtime" | "provider";
424
- name: string;
425
- source: "builtin" | "project" | "user" | "runtime";
426
- path?: string;
427
- state: "active" | "disabled" | "shadowed" | "missing";
428
- disabledReason?: string;
429
- shadowedBy?: string;
430
- }
431
- ```
432
-
433
- **Suggested files**
434
-
435
- - `src/extension/team-tool/handle-settings.ts`
436
- - `src/extension/management.ts`
437
- - `src/agents/discover-agents.ts`
438
- - `src/teams/discover-teams.ts`
439
- - `src/workflows/discover-workflows.ts`
440
- - `src/runtime/skill-instructions.ts`
441
- - `docs/resource-formats.md`
442
- - `test/unit/management.test.ts`
443
-
444
- **Acceptance criteria**
445
-
446
- - ✅ Inventory is stable and sorted.
447
- - ✅ Shadowed project/user/builtin resources are visible in capability inventory (state="shadowed", shadowedBy field).
448
- - ✅ Skill disabled/budget state is visible in capability inventory (skills enumerated via discoverSkills).
449
- - ✅ No file path is used as the only stable ID (uses `kind:name` IDs).
450
-
451
- ### P1.8 Persist capability disables by stable ID
452
-
453
- **Target behavior**
454
-
455
- - Operator can disable a skill/agent/team by capability ID.
456
- - Disable config survives path relocation when resource identity remains stable.
457
- - Status explains disabled reason.
458
-
459
- **Suggested files**
460
-
461
- - `src/config/config.ts`
462
- - `src/schema/config-schema.ts`
463
- - discovery modules
464
- - `test/unit/config-schema-validation.test.ts`
465
-
466
- ## P2 — Typed Hook Lifecycle
467
-
468
- ### P2.1 Introduce typed hook contract
469
-
470
- **Target behavior**
471
-
472
- Define typed lifecycle gates:
473
-
474
- - `before_run_start`
475
- - `before_task_start`
476
- - `task_result`
477
- - `before_cancel`
478
- - `before_forget`
479
- - `before_cleanup`
480
- - `before_publish`
481
- - `session_before_switch`
482
- - `run_recovery`
483
-
484
- Each hook declares:
485
-
486
- ```ts
487
- type HookMode = "blocking" | "non_blocking";
488
- type HookOutcome = "allow" | "block" | "modify" | "diagnostic";
489
- ```
490
-
491
- Errors are recorded in diagnostics/events, not uncontrolled exceptions.
492
-
493
- **Suggested files**
494
-
495
- - new `src/hooks/*`
496
- - `src/extension/register.ts`
497
- - `src/runtime/team-runner.ts`
498
- - `src/extension/team-tool/cancel.ts`
499
- - `src/extension/team-tool/lifecycle-actions.ts`
500
- - `docs/resource-formats.md`
501
- - `test/unit/hooks*.test.ts`
502
-
503
- **Acceptance criteria** ✅ (partial — `before_cancel` not yet wired for async)
504
-
505
- - ✅ Blocking hook can stop a run before worker start with clear event and status.
506
- - ✅ Non-blocking hook failure records diagnostic and does not crash run.
507
- - ✅ Hook context is redacted and bounded.
508
- - ✅ `before_cancel` hook wired (async handleCancel conversion done).
509
- - ✅ `before_forget` hook wired (async handleForget conversion done).
510
- - ✅ `before_cleanup` hook wired (async handleCleanup conversion done).
511
- - ✅ `task_result` hook wired in task-runner before completed/failed event.
512
- - ✅ `before_publish` hook wired in handleExport.
513
- - ✅ `run_recovery` hook wired in crash-recovery `applyRecoveryPlan`.
514
- - ☐ `session_before_switch` not yet wired (no cwd switch mechanism in current codebase; placeholder for future Pi lifecycle integration).
515
-
516
- ### P2.2 Require intent via policy/hook for destructive actions
517
-
518
- **Current state**
519
-
520
- Intent is optional for cancel/cleanup/forget/prune.
521
-
522
- **Target behavior**
523
-
524
- - Optional config:
525
-
526
- ```json
527
- {
528
- "policy": {
529
- "requireIntentForDestructiveActions": true
530
- }
531
- }
532
- ```
533
-
534
- - Actions requiring intent:
535
- - cancel
536
- - forget
537
- - prune
538
- - cleanup with force
539
- - publish/release helpers if added
540
- - worktree removal
541
-
542
- **Acceptance criteria**
543
-
544
- - Missing intent blocks action with actionable error.
545
- - Existing tests can opt out or provide intent.
546
- - Audit trail includes intent after approval.
547
-
548
- ## P2 — Durable History vs Prompt Projection
549
-
550
- ### P2.3 Separate durable run history projection from worker prompt text
551
-
552
- **Current state**
553
-
554
- Prompt pipeline artifacts exist, but context projection logic is still coupled to prompt construction in multiple places.
555
-
556
- **Target behavior**
557
-
558
- Introduce explicit projection functions:
559
-
560
- ```ts
561
- transformRunContextBeforeWorkerStart(...)
562
- convertRunHistoryToWorkerPrompt(...)
563
- ```
564
-
565
- Rules:
566
-
567
- - Durable history retains events, mailbox, artifacts, UI/runtime metadata.
568
- - Worker prompt gets a bounded projection.
569
- - UI/runtime events are not prompt text unless explicitly selected.
570
-
571
- **Suggested files**
572
-
573
- - `src/runtime/task-runner/prompt-pipeline.ts`
574
- - `src/runtime/task-runner/prompt-builder.ts`
575
- - `src/runtime/task-output-context.ts`
576
- - `src/runtime/task-runner.ts`
577
- - `test/unit/task-runner-prompt-pipeline.test.ts`
578
-
579
- **Acceptance criteria**
580
-
581
- - Prompt pipeline artifact identifies every projection source.
582
- - Large event/mailbox history is summarized or referenced, not blindly embedded.
583
- - Tests verify UI/runtime events are not injected as instructions.
584
-
585
- ## P2 — Cooperative Cancellation for Internal Scans
586
-
587
- ### P2.4 Add internal `CancellationToken`
588
-
589
- **Target behavior**
590
-
591
- A utility for long internal loops:
592
-
593
- ```ts
594
- interface CancellationToken {
595
- readonly aborted: boolean;
596
- readonly reason?: CancellationReason;
597
- heartbeat(stage?: string): void;
598
- throwIfCancelled(): void;
599
- wait(ms: number): Promise<void>;
600
- }
601
- ```
602
-
603
- Use it in:
604
-
605
- - run index scans
606
- - artifact cleanup
607
- - mailbox validation/replay
608
- - worktree cleanup
609
- - diagnostic export
610
- - large transcript/event reads
611
-
612
- **Suggested files**
613
-
614
- - new `src/runtime/cancellation-token.ts`
615
- - `src/extension/run-index.ts`
616
- - `src/extension/registration/artifact-cleanup.ts`
617
- - `src/state/mailbox.ts`
618
- - `src/ui/run-snapshot-cache.ts`
619
- - `test/unit/cancellation-token.test.ts`
620
-
621
- **Acceptance criteria** ✅
622
-
623
- - ✅ Long scan can abort within bounded cadence (`AbortSignal` wired into `collectRuns`, `validateMailbox`, `readAllMailboxMessages`, `pruneFinishedRuns`, `cleanupRunWorktrees`).
624
- - ✅ `CancellationToken.heartbeat(stage)` wired into `collectRuns` and `pruneFinishedRuns` with stage diagnostics.
625
- - ✅ Existing APIs can pass no token/signal and keep current behavior.
626
-
627
- ## P2 — Artifact Store Improvements
628
-
629
- ### P2.5 Content-addressed blob artifacts
630
-
631
- **Target behavior**
632
-
633
- Large logs/transcripts/results are stored as blobs:
634
-
635
- ```text
636
- artifacts/blobs/sha256/<hash>
637
- artifacts/blob-metadata/<hash>.json
638
- ```
639
-
640
- Metadata includes:
641
-
642
- - runId/taskId
643
- - MIME/type
644
- - producer
645
- - original path/name
646
- - size/hash
647
- - redaction status
648
- - retention policy
649
-
650
- **Suggested files**
651
-
652
- - `src/state/artifact-store.ts`
653
- - `src/runtime/task-runner.ts`
654
- - `src/ui/transcript-viewer.ts`
655
- - `src/extension/run-export.ts`
656
- - `src/extension/run-import.ts`
657
- - `test/unit/artifact-store*.test.ts`
658
-
659
- **Acceptance criteria**
660
-
661
- - Artifacts above threshold are blob-referenced.
662
- - Run export/import preserves blobs.
663
- - GC removes unreferenced blobs after retention.
664
- - Path traversal protections remain intact.
665
-
666
- ## P2 — UI and Dashboard Upgrades
667
-
668
- ### P2.6 Show capability/effectiveness/cancellation panels in dashboard
669
-
670
- **Target behavior**
671
-
672
- Dashboard panes expose:
673
-
674
- - run effectiveness score and no-observed-work tasks;
675
- - cancellation reason and intent;
676
- - capability inventory for selected task;
677
- - attempt/deadletter timeline.
678
-
679
- **Suggested files**
680
-
681
- - `src/ui/run-dashboard.ts`
682
- - `src/ui/dashboard-panes/*`
683
- - `src/ui/snapshot-types.ts`
684
- - `src/ui/run-snapshot-cache.ts`
685
- - `test/unit/run-dashboard.test.ts`
686
- - new pane tests
687
-
688
- **Acceptance criteria**
689
-
690
- - No heavy synchronous scans in render path.
691
- - Pane output is width-safe.
692
- - Snapshot cache provides precomputed compact data.
693
-
694
- ### P2.7 Event-first UI stream
695
-
696
- **Target behavior**
697
-
698
- Move more live UI updates from file polling to semantic events:
699
-
700
- - `task_started`
701
- - `task_completed`
702
- - `worker_status`
703
- - `mailbox_updated`
704
- - `effectiveness_changed`
705
-
706
- **Acceptance criteria**
707
-
708
- - Render scheduler remains coalesced and overlap-safe.
709
- - UI still recovers from durable files after restart.
710
- - File polling is fallback, not the hot path.
711
-
712
- ## P2 — Raw Scan Entry Cache
713
-
714
- ### P2.8 Cache raw entries, not final semantic query results
715
-
716
- **Target behavior**
717
-
718
- Shared raw scan cache for:
719
-
720
- - runs
721
- - artifacts
722
- - mailbox files
723
- - transcript chunks
724
- - worktree roots
725
-
726
- Then apply filters/sorts after retrieval.
727
-
728
- **Suggested files**
729
-
730
- - `src/runtime/manifest-cache.ts`
731
- - `src/ui/run-snapshot-cache.ts`
732
- - `src/extension/run-index.ts`
733
- - `src/utils/file-coalescer.ts`
734
-
735
- **Acceptance criteria**
736
-
737
- - Deterministic sort order.
738
- - State mutation invalidates relevant raw entries.
739
- - Large workspaces do not trigger full rescans on every render/status.
740
-
741
- ## P3 — Release/Install Hardening
742
-
743
- ### P3.1 Tarball install smoke before publish
744
-
745
- **Target behavior**
746
-
747
- Release workflow requires:
748
-
749
- ```bash
750
- npm run ci
751
- npm pack --dry-run
752
- npm pack
753
- # install tarball in temp project
754
- # verify pi extension load smoke
755
- # verify npm package files and version/tag consistency
756
- ```
757
-
758
- **Suggested files**
759
-
760
- - `docs/publishing.md`
761
- - `package.json` scripts
762
- - `.github/workflows/*` if CI is added
763
- - optional `scripts/release-smoke.mjs`
764
-
765
- **Acceptance criteria**
766
-
767
- - Packed tarball loads extension in temp Pi home.
768
- - Version in package, changelog, tag, npm view are consistent.
769
- - Release instructions include rollback notes.
770
-
771
- ## Suggested Implementation Order
772
-
773
- 1. ~~**P0.1 Effectiveness policy enforcement**~~ ✅ Completed — default guard escalates `warn` to `blocked` for mutating-role tasks.
774
- 2. ~~**P0.2 Persist runtime safety**~~ ✅ Completed — manifests persist `runtimeResolution`; `runtime.resolved` event emitted.
775
- 3. **P1.3 Two-phase worker teardown** — reduces stale/zombie worker risk.
776
- 4. ~~**P1.1 Durable steering/follow-up queues**~~ ✅ Completed — `readMailbox` kind filter; API `read-mailbox` supports `config.kind`.
777
- 5. ~~**P1.5 Event-tree provenance**~~ ✅ Completed — `TeamEventMetadata` extended with `parentEventId`/`attemptId`/`branchId`/`causationId`/`correlationId`.
778
- 6. ~~**P1.7 Capability inventory view**~~ ✅ Completed — `buildCapabilityInventory()` + API `operation=inventory` + dashboard pane.
779
- 7. ~~**P2.3 Durable history projection**~~ ✅ Completed — `transformRunContextBeforeWorkerStart()` + `convertRunHistoryToWorkerPrompt()`.
780
- 8. ~~**P2.4 CancellationToken**~~ ✅ Completed — wired into `collectRuns`/`validateMailbox`/`pruneFinishedRuns`/`cleanupRunWorktrees` etc.
781
- 9. ~~**P2.5 Blob artifacts**~~ ✅ Completed — content-addressed blob store with SHA-256 dedup and metadata sidecars.
782
- 10. ~~**P2.6 Dashboard panels**~~ ✅ Completed — capability and cancellation panes.
783
-
784
- Also completed (not in original order list):
785
- - ~~**P1.6 Synthetic terminal results**~~ ✅ — `buildSyntheticTerminalEvidence()` for cancelled in-flight tasks.
786
- - ~~**P2.1 Typed hook lifecycle**~~ ✅ — `before_run_start`/`before_task_start` wired into team-runner.
787
-
788
- ## Release Guidance
789
-
790
- Before publishing a patch with these upgrades:
791
-
792
- ```bash
793
- npx tsc --noEmit
794
- npm run test:unit
795
- npm run test:integration
796
- npm pack --dry-run
797
- ```
798
-
799
- For runtime/process changes also run targeted child-worker integration tests:
800
-
801
- ```bash
802
- node --experimental-strip-types --test --test-concurrency=1 --test-timeout=60000 \
803
- test/integration/mock-child-run.test.ts \
804
- test/integration/mock-child-json-run.test.ts \
805
- test/integration/phase6-runtime-hardening.test.ts
806
- ```
807
-
808
- Do not publish without explicit user confirmation and a green verification pass.
1
+ # pi-crew Next Upgrade Roadmap
2
+
3
+ Date: 2026-05-05
4
+ Source inputs:
5
+
6
+ - `docs/research-oh-my-pi-distillation.md`
7
+ - `docs/source-runtime-refactor-map.md`
8
+ - Recent runtime hardening commits through `f5d47aa feat: surface run effectiveness evidence`
9
+
10
+ This document tracks the next practical upgrades after the current scaffold/no-op subagent fix, runtime safety classification, cancellation provenance, intent audit trail, prompt pipeline artifacts, capability inventory artifacts, and run effectiveness reporting.
11
+
12
+ ## Current Baseline
13
+
14
+ Already implemented and pushed:
15
+
16
+ - Real child worker execution is the default.
17
+ - Implicit scaffold/no-op runs are blocked when worker execution is disabled by config/env.
18
+ - Explicit `runtime.mode=scaffold` remains available for dry-run prompt/artifact generation.
19
+ - Run `summary.md`, `progress.md`, and `status` now expose effectiveness evidence.
20
+ - Structured cancellation reasons flow through retry/cancel/team-runner/run events/metrics/UI snapshot.
21
+ - `cancel`, `cleanup`, `forget`, and `prune` accept audit intent metadata.
22
+ - Live-agent control distinguishes `steer` from `follow-up` at live-control/API level.
23
+ - Retry attempts have `attemptId`; max-retry deadletters link to the final `attemptId`.
24
+ - Worker prompt pipeline and capability inventory metadata artifacts are written per task.
25
+ - P0.1: effectiveness guard escalates `warn` to `blocked` for mutating-role tasks with no observable worker activity.
26
+ - P1.1: mailbox `readMailbox` accepts `kind` filter; API `read-mailbox` supports `config.kind`.
27
+ - P1.5: `TeamEventMetadata` extended with `parentEventId`, `attemptId`, `branchId`, `causationId`, `correlationId`.
28
+ - P1.6: `buildSyntheticTerminalEvidence()` produces `"worker"`/`"cancelled"` terminal records for cancelled in-flight tasks.
29
+ - P1.7: `buildCapabilityInventory(cwd)` normalizes teams/workflows/agents; API `operation=inventory`.
30
+ - P2.1: typed hook lifecycle — `registerHook`/`executeHook` registry; `before_run_start` and `before_task_start` wired.
31
+ - P2.4: `AbortSignal` wired into `collectRuns`, `validateMailbox`, `readAllMailboxMessages`, `pruneFinishedRuns`, `cleanupRunWorktrees`, etc.
32
+ - Resume scaffold runs preserve scaffold mode from original manifest when workers not disabled.
33
+
34
+ ## Implementation Status as of `v0.1.46`
35
+
36
+ This roadmap is **not complete overall**. The `v0.1.46` release completed several vertical slices, but multiple roadmap items remain partial or unimplemented.
37
+
38
+ ### Implemented / mostly implemented
39
+
40
+ - Baseline worker behavior: real child-process execution by default, explicit scaffold dry-runs, and blocked implicit scaffold/no-op runs.
41
+ - P0.1 ✅ effectiveness policy enforcement: default guard escalates `warn` to `blocked` for mutating-role tasks.
42
+ - P0.2 ✅ runtime safety persistence: manifests persist `runtimeResolution`; `runtime.resolved` event emitted; status shows safety; blocked runs persist evidence.
43
+ - Effectiveness reporting: summary/progress/status expose no-observed-work evidence and policy outcome.
44
+ - Structured cancellation basics: cancellation reasons flow through retry/backoff/team-runner paths and run/task events.
45
+ - Retry attempt evidence: retry attempts and max-retry deadletters carry/link `attemptId` data.
46
+ - Prompt pipeline artifacts and per-task capability metadata artifacts are written.
47
+ - P1.3 worker teardown evidence vertical slice: `WorkerExitStatus` and terminal worker cancellation evidence exist.
48
+
49
+ ### Completed in this upgrade cycle (after v0.1.46)
50
+
51
+ - P0.1 effectiveness policy enforcement: default guard now escalates `warn` to `blocked` for mutating-role tasks with no observable worker activity; read-only roles remain `warning`.
52
+ - P0.2 runtime safety persistence: manifests persist `runtimeResolution`; `runtime.resolved` event emitted; status shows safety; blocked runs persist evidence.
53
+ - P1.1 durable steering/follow-up queues: `readMailbox` accepts `kind` filter; API `read-mailbox` supports `config.kind`; steering and follow-up are isolatable by kind.
54
+ - P1.2 respond vs follow-up UX: `/team-follow-up` command added for continuation prompts; `/team-respond` remains for waiting-task replies.
55
+ - P1.3 two-phase child process teardown: `WorkerExitStatus` populated from graceful SIGTERM → grace window → hard kill pipeline.
56
+ - P1.5 event-tree provenance: `TeamEventMetadata` extended with `parentEventId`, `attemptId`, `branchId`, `causationId`, `correlationId`; retry and cancel events carry `attemptId`.
57
+ - P1.6 synthetic terminal results: `buildSyntheticTerminalEvidence()` in `cancellation.ts`; cancelled in-flight tasks receive `"worker"`/`"cancelled"` terminal evidence records.
58
+ - P1.7 unified capability inventory: `buildCapabilityInventory(cwd)` normalizes teams/workflows/agents into `CapabilityItem[]`; API `operation=inventory` returns sorted JSON.
59
+ - P1.8 capability disable by stable ID: `disabledCapabilities` in `CrewPolicyConfig`; inventory marks disabled items with reason.
60
+ - P2.1 typed hook lifecycle: `HookName`, `HookMode`, `HookOutcome`, `HookContext`, `HookResult`, `HookExecutionReport` types; `registerHook`/`executeHook`/`clearHooks` registry; `before_run_start` and `before_task_start` wired into team-runner.
61
+ - P2.2 intent gates for destructive actions: `enforceDestructiveIntent` wired in cancel/cleanup/forget/prune/delete; configurable via `policy.requireIntentForDestructiveActions`.
62
+ - P2.3 durable history projection: `transformRunContextBeforeWorkerStart()` and `convertRunHistoryToWorkerPrompt()` bounded projection functions.
63
+ - P2.4 CancellationToken wired into long scans: `AbortSignal` passed to `collectRuns`/`validateMailbox`/`readAllMailboxMessages`/`pruneFinishedRuns`/`cleanupRunWorktrees`.
64
+ - P2.5 content-addressed blob store: `writeBlob`/`readBlob`/`readBlobMetadata` with SHA-256 dedup and metadata sidecars.
65
+ - P2.6 dashboard panes for capability and cancellation: `renderCapabilityPane` and `renderCancellationPane`.
66
+ - Resume scaffold run fix: preserves scaffold mode from original manifest when workers not disabled.
67
+
68
+ ### Partial / not safe to mark complete
69
+
70
+ - P1.4 reserve worker control channel before spawn: controller metadata persistence during startup not yet implemented.
71
+ - P2.7 event-first UI: render coalescing and snapshot caches exist, but live UI still relies on durable file polling as a primary source in several panes.
72
+ - P2.8 shared raw scan-entry cache: not yet implemented.
73
+
74
+ ### Completed / no longer backlog
75
+
76
+ - P2.7 event-first UI — RunEventBus wired into appendEvent; dashboard, widget, sidebar auto-invalidate on events; snapshot cache invalidates on events.
77
+ - P2.8 shared raw scan-entry cache — SharedScanCache implemented and wired into manifest reads (run-index) and active-run-registry (active manifest reads).
78
+ - P3.1 tarball-install smoke — `scripts/release-smoke.mjs` verified; `npm run smoke:release` added.
79
+ - Hook lifecycle — All hooks wired: `before_run_start`, `before_task_start`, `before_cancel`, `before_forget`, `before_cleanup`, `before_publish`, `task_result`, `run_recovery`. Only `session_before_switch` remains (no cwd switch mechanism in current codebase).
80
+
81
+ ### Remaining items
82
+
83
+ - `session_before_switch` hook — no cwd/session switch mechanism in current codebase; placeholder for future.
84
+ - P3.2 CI gate — integrate `smoke:release` into CI pipeline (requires CI config).
85
+
86
+ ## Priority Legend
87
+
88
+ - **P0**: correctness/safety issue; should be addressed before next release if feasible.
89
+ - **P1**: high user-visible value or reliability gain; good patch-release candidates.
90
+ - **P2**: larger subsystem work; should be planned and sequenced.
91
+ - **P3**: polish/UX/longer-term architecture.
92
+
93
+ ## P0 — Prevent Ineffective Completed Runs
94
+
95
+ ### P0.1 Enforce effectiveness policy for non-scaffold workers
96
+
97
+ **Problem**
98
+
99
+ `summary/status` now surface effectiveness evidence, but non-scaffold `child-process`/`live-session` runs can still end `completed` when task evidence is weak unless the existing mutation guard fires.
100
+
101
+ **Target behavior**
102
+
103
+ - For real workers, a run with completed tasks but no observable worker activity should be `blocked` or `failed`, not silently `completed`.
104
+ - Keep explicit scaffold dry-runs allowed, but label them as dry-runs.
105
+ - Policy should be configurable:
106
+ - `runtime.effectivenessGuard = "off" | "warn" | "block" | "fail"`
107
+ - default candidate: `warn` for read-only roles, `block` for mutating roles.
108
+
109
+ **Suggested files**
110
+
111
+ - `src/runtime/team-runner.ts`
112
+ - `src/runtime/completion-guard.ts`
113
+ - `src/state/types.ts` if storing guard result on manifest/tasks
114
+ - `src/schema/config-schema.ts`
115
+ - `src/config/config.ts`
116
+ - `test/unit/summary.test.ts`
117
+ - `test/unit/team-runner-merge.test.ts` or new `test/unit/effectiveness-guard.test.ts`
118
+
119
+ **Implementation sketch**
120
+
121
+ 1. Extract run effectiveness calculation into a reusable exported helper, e.g.:
122
+
123
+ ```ts
124
+ export interface RunEffectivenessSummary {
125
+ completed: number;
126
+ observable: number;
127
+ noObservedWorkTaskIds: string[];
128
+ needsAttentionTaskIds: string[];
129
+ workerExecution: "enabled" | "disabled/scaffold";
130
+ severity: "ok" | "warning" | "blocked" | "failed";
131
+ }
132
+ ```
133
+
134
+ 2. Use this helper for:
135
+ - `progress.md`
136
+ - `summary.md`
137
+ - `status`
138
+ - policy enforcement before `run.completed`.
139
+
140
+ 3. For non-scaffold runs, if mutating tasks have no mutation/tool/model/transcript evidence:
141
+ - append `policy.action` with `reason: "ineffective_worker"`;
142
+ - set run `blocked` or `failed` depending config;
143
+ - include task IDs in `data`.
144
+
145
+ **Acceptance criteria** ✅
146
+
147
+ - ✅ A mocked child-process run with no tool/model/transcript evidence does not report clean `completed` by default.
148
+ - ✅ Scaffold run still completes as explicit dry-run and displays `Worker execution: disabled/scaffold`.
149
+ - ✅ `status` clearly lists `noObservedWork` and `needsAttention` task IDs.
150
+ - ✅ Unit tests cover warn/block/fail modes.
151
+ - ✅ Default guard escalates `warn` to `blocked` for mutating-role tasks.
152
+
153
+ **Verification**
154
+
155
+ ```bash
156
+ npx tsc --noEmit
157
+ node --experimental-strip-types --test --test-concurrency=1 --test-timeout=30000 test/unit/effectiveness-guard.test.ts test/unit/summary.test.ts
158
+ npm run test:unit
159
+ ```
160
+
161
+ ### P0.2 Make runtime safety visible in manifest and run events
162
+
163
+ **Problem**
164
+
165
+ `runtime.safety` exists in runtime resolution, but it is not persisted as first-class run metadata. Debugging currently requires reading events or inferred artifacts.
166
+
167
+ **Target behavior**
168
+
169
+ - Manifest records resolved runtime:
170
+
171
+ ```json
172
+ {
173
+ "runtimeResolution": {
174
+ "kind": "child-process",
175
+ "requestedMode": "auto",
176
+ "safety": "trusted",
177
+ "fallback": "child-process",
178
+ "reason": "..."
179
+ }
180
+ }
181
+ ```
182
+
183
+ - `run.running` or `run.blocked` event includes the same resolution.
184
+
185
+ **Suggested files**
186
+
187
+ - `src/state/types.ts`
188
+ - `src/extension/team-tool/run.ts`
189
+ - `src/runtime/background-runner.ts`
190
+ - `src/extension/team-tool/status.ts`
191
+ - `test/unit/team-run.test.ts`
192
+ - `test/unit/runtime-resolver.test.ts`
193
+
194
+ **Acceptance criteria** ✅
195
+
196
+ - ✅ `status` shows `Runtime safety: trusted|explicit_dry_run|blocked`.
197
+ - ✅ Blocked disabled-worker runs persist enough evidence to explain why no subagents spawned.
198
+ - ✅ Existing manifest schema remains backward compatible.
199
+ - ✅ `runtimeResolution` persisted on manifest; `runtime.resolved` event emitted.
200
+
201
+ ## P1 — Steering/Follow-up Semantics Beyond Live Control
202
+
203
+ ### P1.1 Persist separate steering and follow-up queues in mailbox state
204
+
205
+ **Current state**
206
+
207
+ `follow-up-agent` exists in live-control, but durable mailbox is still generic inbox/outbox and `respond` still has waiting-task semantics.
208
+
209
+ **Target behavior**
210
+
211
+ - Mailbox messages can carry semantic kind:
212
+
213
+ ```ts
214
+ kind?: "message" | "steer" | "follow-up" | "response" | "group_join";
215
+ priority?: "urgent" | "normal" | "low";
216
+ deliveryMode?: "interrupt" | "next_turn";
217
+ ```
218
+
219
+ - `steer-agent` appends durable steering queue entry when no live session is present.
220
+ - `follow-up-agent` appends durable follow-up queue entry, deliverable after task stop/resume.
221
+ - UI/status separates urgent steering from follow-up backlog.
222
+
223
+ **Suggested files**
224
+
225
+ - `src/state/mailbox.ts`
226
+ - `src/runtime/live-agent-control.ts`
227
+ - `src/runtime/live-agent-manager.ts`
228
+ - `src/extension/team-tool/api.ts`
229
+ - `src/extension/team-tool/respond.ts`
230
+ - `src/ui/dashboard-panes/mailbox-pane.ts`
231
+ - `test/unit/mailbox-api.test.ts`
232
+ - `test/unit/live-agent-control.test.ts`
233
+ - `test/unit/respond-tool.test.ts`
234
+
235
+ **Acceptance criteria** ✅ (partially — kind filter and API done; UI pane separation remaining)
236
+
237
+ - ✅ Steering and follow-up can be inspected separately via `readMailbox` kind filter and API `config.kind`.
238
+ - ✅ Existing inbox/outbox JSONL remains readable.
239
+ - ✅ Kind filter survives process/session switch (durable mailbox).
240
+ - ✅ UI/status separates urgent steering from follow-up backlog (mailbox pane shows kind breakdown with urgency indicators).
241
+
242
+ ### P1.2 Clarify `respond` vs `follow-up` UX
243
+
244
+ **Problem**
245
+
246
+ `respond` is currently a waiting-task resume primitive. Users may expect it to send a general follow-up.
247
+
248
+ **Target behavior**
249
+
250
+ - `/team-respond` remains only for `waiting` tasks.
251
+ - `/team-follow-up` or `api operation=follow-up-agent` is documented as continuation prompt.
252
+ - Error messages recommend the correct command.
253
+
254
+ **Suggested files**
255
+
256
+ - `src/extension/registration/commands.ts`
257
+ - `src/extension/help.ts`
258
+ - `docs/usage.md`
259
+ - `test/unit/registration-commands-coverage.test.ts`
260
+ - `test/unit/respond-tool.test.ts`
261
+
262
+ ## P1 — Worker Lifecycle and Process Reliability
263
+
264
+ ### P1.3 Two-phase child process teardown
265
+
266
+ **Current state**
267
+
268
+ Child workers have improved post-exit stdio guards and bounded drains, but cancellation semantics can be made more deterministic.
269
+
270
+ **Target behavior**
271
+
272
+ Worker process cancellation returns structured status:
273
+
274
+ ```ts
275
+ interface WorkerExitStatus {
276
+ exitCode: number | null;
277
+ cancelled: boolean;
278
+ timedOut: boolean;
279
+ killed: boolean;
280
+ signal?: string;
281
+ cleanupErrors: string[];
282
+ finalDrainMs: number;
283
+ }
284
+ ```
285
+
286
+ Process lifecycle:
287
+
288
+ 1. graceful cancel/TERM;
289
+ 2. wait grace window;
290
+ 3. hard kill process tree;
291
+ 4. bounded stdout/stderr drain;
292
+ 5. mark session non-reusable.
293
+
294
+ **Suggested files**
295
+
296
+ - `src/runtime/child-pi.ts`
297
+ - `src/runtime/pi-spawn.ts`
298
+ - `src/runtime/post-exit-stdio-guard.ts`
299
+ - `src/runtime/task-runner.ts`
300
+ - `src/runtime/cancellation.ts`
301
+ - `test/unit/child-pi*.test.ts`
302
+ - `test/integration/mock-child-run.test.ts`
303
+
304
+ **Acceptance criteria**
305
+
306
+ - Cancelled worker always produces terminal task event.
307
+ - Output drains are bounded.
308
+ - Status includes `cancelled/timedOut/killed`.
309
+ - No zombie/stale running task after cancellation.
310
+
311
+ ### P1.4 Reserve worker control channel before spawn
312
+
313
+ **Problem**
314
+
315
+ There can be a short window where a task is logically starting but cancel/steer cannot target a controller yet.
316
+
317
+ **Target behavior**
318
+
319
+ - Synchronously create a `WorkerRunCore`/controller before async spawn.
320
+ - Persist controller metadata in agent status.
321
+ - Cancel/steer requests can be queued immediately while startup is in progress.
322
+ - Controller is cleared in `finally`.
323
+
324
+ **Suggested files**
325
+
326
+ - `src/runtime/task-runner.ts`
327
+ - `src/runtime/agent-control.ts`
328
+ - `src/runtime/live-agent-control.ts`
329
+ - `src/runtime/crew-agent-records.ts`
330
+ - `src/extension/team-tool/api.ts`
331
+
332
+ **Acceptance criteria**
333
+
334
+ - Starting worker can be cancelled immediately.
335
+ - Durable control request written during startup is applied or recorded as terminal no-op with reason.
336
+ - Tests simulate control request before child process emits first output.
337
+
338
+ ## P1 — Cancellation and Attempt History
339
+
340
+ ### P1.5 Add event-tree provenance: `parentEventId`, `attemptId`, `branchId`
341
+
342
+ **Current state**
343
+
344
+ Retry attempts have `attemptId`, and deadletters link to final attempt. Event log has sequence and terminal fingerprints but no general event tree.
345
+
346
+ **Target behavior**
347
+
348
+ - `TeamEvent.metadata` supports:
349
+
350
+ ```ts
351
+ parentEventId?: string;
352
+ attemptId?: string;
353
+ branchId?: string;
354
+ causationId?: string;
355
+ correlationId?: string;
356
+ ```
357
+
358
+ - Retry events, task started/completed/failed, deadletter, recovery events link by `attemptId`.
359
+ - UI/status can show attempt timeline.
360
+
361
+ **Suggested files**
362
+
363
+ - `src/state/event-log.ts`
364
+ - `src/state/types.ts`
365
+ - `src/runtime/team-runner.ts`
366
+ - `src/runtime/retry-executor.ts`
367
+ - `src/runtime/recovery-recipes.ts`
368
+ - `src/extension/team-tool/status.ts`
369
+ - `test/unit/event-metadata.test.ts`
370
+ - `test/unit/retry-executor.test.ts`
371
+
372
+ **Acceptance criteria** ✅
373
+
374
+ - ✅ Retry attempt events and terminal task events share attempt provenance.
375
+ - ✅ Deadletter records can be traced back to event sequence.
376
+ - ✅ Existing JSONL readers ignore missing provenance fields.
377
+ - ✅ `TeamEventMetadata` extended with `parentEventId`, `attemptId`, `branchId`, `causationId`, `correlationId`.
378
+
379
+ ### P1.6 Synthetic terminal results for cancelled in-flight operations
380
+
381
+ **Problem**
382
+
383
+ Run/task cancellation events are now structured, but worker/tool sub-operations can still lack synthetic terminal records if cancelled mid-operation.
384
+
385
+ **Target behavior**
386
+
387
+ - If a task started a worker/tool/model call and cancellation occurs, append a synthetic terminal record:
388
+ - `tool.cancelled` or `worker.cancelled`
389
+ - reason code/message
390
+ - startedAt/finishedAt
391
+ - attemptId if available
392
+
393
+ **Suggested files**
394
+
395
+ - `src/runtime/task-runner.ts`
396
+ - `src/runtime/task-runner/progress.ts`
397
+ - `src/runtime/child-pi.ts`
398
+ - `src/runtime/cancellation.ts`
399
+ - `src/state/contracts.ts`
400
+ - `test/unit/cancellation.test.ts`
401
+
402
+ **Acceptance criteria** ✅
403
+
404
+ - ✅ No started tool/model operation is left without terminal evidence after cancellation.
405
+ - ✅ Status/diagnostics can distinguish user cancel vs timeout vs shutdown.
406
+ - ✅ `buildSyntheticTerminalEvidence()` in `cancellation.ts` produces `"worker"`/`"cancelled"` records.
407
+
408
+ ## P1 — Capability Inventory and Control Center
409
+
410
+ ### P1.7 Build run/project capability inventory view
411
+
412
+ **Current state**
413
+
414
+ Per-task capability artifacts exist. There is no unified project/run inventory UI/API yet.
415
+
416
+ **Target behavior**
417
+
418
+ `/team-settings` or new `/team-control` shows normalized inventory:
419
+
420
+ ```ts
421
+ interface CapabilityItem {
422
+ id: string;
423
+ kind: "team" | "workflow" | "agent" | "skill" | "tool" | "hook" | "runtime" | "provider";
424
+ name: string;
425
+ source: "builtin" | "project" | "user" | "runtime";
426
+ path?: string;
427
+ state: "active" | "disabled" | "shadowed" | "missing";
428
+ disabledReason?: string;
429
+ shadowedBy?: string;
430
+ }
431
+ ```
432
+
433
+ **Suggested files**
434
+
435
+ - `src/extension/team-tool/handle-settings.ts`
436
+ - `src/extension/management.ts`
437
+ - `src/agents/discover-agents.ts`
438
+ - `src/teams/discover-teams.ts`
439
+ - `src/workflows/discover-workflows.ts`
440
+ - `src/runtime/skill-instructions.ts`
441
+ - `docs/resource-formats.md`
442
+ - `test/unit/management.test.ts`
443
+
444
+ **Acceptance criteria**
445
+
446
+ - ✅ Inventory is stable and sorted.
447
+ - ✅ Shadowed project/user/builtin resources are visible in capability inventory (state="shadowed", shadowedBy field).
448
+ - ✅ Skill disabled/budget state is visible in capability inventory (skills enumerated via discoverSkills).
449
+ - ✅ No file path is used as the only stable ID (uses `kind:name` IDs).
450
+
451
+ ### P1.8 Persist capability disables by stable ID
452
+
453
+ **Target behavior**
454
+
455
+ - Operator can disable a skill/agent/team by capability ID.
456
+ - Disable config survives path relocation when resource identity remains stable.
457
+ - Status explains disabled reason.
458
+
459
+ **Suggested files**
460
+
461
+ - `src/config/config.ts`
462
+ - `src/schema/config-schema.ts`
463
+ - discovery modules
464
+ - `test/unit/config-schema-validation.test.ts`
465
+
466
+ ## P2 — Typed Hook Lifecycle
467
+
468
+ ### P2.1 Introduce typed hook contract
469
+
470
+ **Target behavior**
471
+
472
+ Define typed lifecycle gates:
473
+
474
+ - `before_run_start`
475
+ - `before_task_start`
476
+ - `task_result`
477
+ - `before_cancel`
478
+ - `before_forget`
479
+ - `before_cleanup`
480
+ - `before_publish`
481
+ - `session_before_switch`
482
+ - `run_recovery`
483
+
484
+ Each hook declares:
485
+
486
+ ```ts
487
+ type HookMode = "blocking" | "non_blocking";
488
+ type HookOutcome = "allow" | "block" | "modify" | "diagnostic";
489
+ ```
490
+
491
+ Errors are recorded in diagnostics/events, not uncontrolled exceptions.
492
+
493
+ **Suggested files**
494
+
495
+ - new `src/hooks/*`
496
+ - `src/extension/register.ts`
497
+ - `src/runtime/team-runner.ts`
498
+ - `src/extension/team-tool/cancel.ts`
499
+ - `src/extension/team-tool/lifecycle-actions.ts`
500
+ - `docs/resource-formats.md`
501
+ - `test/unit/hooks*.test.ts`
502
+
503
+ **Acceptance criteria** ✅ (partial — `before_cancel` not yet wired for async)
504
+
505
+ - ✅ Blocking hook can stop a run before worker start with clear event and status.
506
+ - ✅ Non-blocking hook failure records diagnostic and does not crash run.
507
+ - ✅ Hook context is redacted and bounded.
508
+ - ✅ `before_cancel` hook wired (async handleCancel conversion done).
509
+ - ✅ `before_forget` hook wired (async handleForget conversion done).
510
+ - ✅ `before_cleanup` hook wired (async handleCleanup conversion done).
511
+ - ✅ `task_result` hook wired in task-runner before completed/failed event.
512
+ - ✅ `before_publish` hook wired in handleExport.
513
+ - ✅ `run_recovery` hook wired in crash-recovery `applyRecoveryPlan`.
514
+ - ☐ `session_before_switch` not yet wired (no cwd switch mechanism in current codebase; placeholder for future Pi lifecycle integration).
515
+
516
+ ### P2.2 Require intent via policy/hook for destructive actions
517
+
518
+ **Current state**
519
+
520
+ Intent is optional for cancel/cleanup/forget/prune.
521
+
522
+ **Target behavior**
523
+
524
+ - Optional config:
525
+
526
+ ```json
527
+ {
528
+ "policy": {
529
+ "requireIntentForDestructiveActions": true
530
+ }
531
+ }
532
+ ```
533
+
534
+ - Actions requiring intent:
535
+ - cancel
536
+ - forget
537
+ - prune
538
+ - cleanup with force
539
+ - publish/release helpers if added
540
+ - worktree removal
541
+
542
+ **Acceptance criteria**
543
+
544
+ - Missing intent blocks action with actionable error.
545
+ - Existing tests can opt out or provide intent.
546
+ - Audit trail includes intent after approval.
547
+
548
+ ## P2 — Durable History vs Prompt Projection
549
+
550
+ ### P2.3 Separate durable run history projection from worker prompt text
551
+
552
+ **Current state**
553
+
554
+ Prompt pipeline artifacts exist, but context projection logic is still coupled to prompt construction in multiple places.
555
+
556
+ **Target behavior**
557
+
558
+ Introduce explicit projection functions:
559
+
560
+ ```ts
561
+ transformRunContextBeforeWorkerStart(...)
562
+ convertRunHistoryToWorkerPrompt(...)
563
+ ```
564
+
565
+ Rules:
566
+
567
+ - Durable history retains events, mailbox, artifacts, UI/runtime metadata.
568
+ - Worker prompt gets a bounded projection.
569
+ - UI/runtime events are not prompt text unless explicitly selected.
570
+
571
+ **Suggested files**
572
+
573
+ - `src/runtime/task-runner/prompt-pipeline.ts`
574
+ - `src/runtime/task-runner/prompt-builder.ts`
575
+ - `src/runtime/task-output-context.ts`
576
+ - `src/runtime/task-runner.ts`
577
+ - `test/unit/task-runner-prompt-pipeline.test.ts`
578
+
579
+ **Acceptance criteria**
580
+
581
+ - Prompt pipeline artifact identifies every projection source.
582
+ - Large event/mailbox history is summarized or referenced, not blindly embedded.
583
+ - Tests verify UI/runtime events are not injected as instructions.
584
+
585
+ ## P2 — Cooperative Cancellation for Internal Scans
586
+
587
+ ### P2.4 Add internal `CancellationToken`
588
+
589
+ **Target behavior**
590
+
591
+ A utility for long internal loops:
592
+
593
+ ```ts
594
+ interface CancellationToken {
595
+ readonly aborted: boolean;
596
+ readonly reason?: CancellationReason;
597
+ heartbeat(stage?: string): void;
598
+ throwIfCancelled(): void;
599
+ wait(ms: number): Promise<void>;
600
+ }
601
+ ```
602
+
603
+ Use it in:
604
+
605
+ - run index scans
606
+ - artifact cleanup
607
+ - mailbox validation/replay
608
+ - worktree cleanup
609
+ - diagnostic export
610
+ - large transcript/event reads
611
+
612
+ **Suggested files**
613
+
614
+ - new `src/runtime/cancellation-token.ts`
615
+ - `src/extension/run-index.ts`
616
+ - `src/extension/registration/artifact-cleanup.ts`
617
+ - `src/state/mailbox.ts`
618
+ - `src/ui/run-snapshot-cache.ts`
619
+ - `test/unit/cancellation-token.test.ts`
620
+
621
+ **Acceptance criteria** ✅
622
+
623
+ - ✅ Long scan can abort within bounded cadence (`AbortSignal` wired into `collectRuns`, `validateMailbox`, `readAllMailboxMessages`, `pruneFinishedRuns`, `cleanupRunWorktrees`).
624
+ - ✅ `CancellationToken.heartbeat(stage)` wired into `collectRuns` and `pruneFinishedRuns` with stage diagnostics.
625
+ - ✅ Existing APIs can pass no token/signal and keep current behavior.
626
+
627
+ ## P2 — Artifact Store Improvements
628
+
629
+ ### P2.5 Content-addressed blob artifacts
630
+
631
+ **Target behavior**
632
+
633
+ Large logs/transcripts/results are stored as blobs:
634
+
635
+ ```text
636
+ artifacts/blobs/sha256/<hash>
637
+ artifacts/blob-metadata/<hash>.json
638
+ ```
639
+
640
+ Metadata includes:
641
+
642
+ - runId/taskId
643
+ - MIME/type
644
+ - producer
645
+ - original path/name
646
+ - size/hash
647
+ - redaction status
648
+ - retention policy
649
+
650
+ **Suggested files**
651
+
652
+ - `src/state/artifact-store.ts`
653
+ - `src/runtime/task-runner.ts`
654
+ - `src/ui/transcript-viewer.ts`
655
+ - `src/extension/run-export.ts`
656
+ - `src/extension/run-import.ts`
657
+ - `test/unit/artifact-store*.test.ts`
658
+
659
+ **Acceptance criteria**
660
+
661
+ - Artifacts above threshold are blob-referenced.
662
+ - Run export/import preserves blobs.
663
+ - GC removes unreferenced blobs after retention.
664
+ - Path traversal protections remain intact.
665
+
666
+ ## P2 — UI and Dashboard Upgrades
667
+
668
+ ### P2.6 Show capability/effectiveness/cancellation panels in dashboard
669
+
670
+ **Target behavior**
671
+
672
+ Dashboard panes expose:
673
+
674
+ - run effectiveness score and no-observed-work tasks;
675
+ - cancellation reason and intent;
676
+ - capability inventory for selected task;
677
+ - attempt/deadletter timeline.
678
+
679
+ **Suggested files**
680
+
681
+ - `src/ui/run-dashboard.ts`
682
+ - `src/ui/dashboard-panes/*`
683
+ - `src/ui/snapshot-types.ts`
684
+ - `src/ui/run-snapshot-cache.ts`
685
+ - `test/unit/run-dashboard.test.ts`
686
+ - new pane tests
687
+
688
+ **Acceptance criteria**
689
+
690
+ - No heavy synchronous scans in render path.
691
+ - Pane output is width-safe.
692
+ - Snapshot cache provides precomputed compact data.
693
+
694
+ ### P2.7 Event-first UI stream
695
+
696
+ **Target behavior**
697
+
698
+ Move more live UI updates from file polling to semantic events:
699
+
700
+ - `task_started`
701
+ - `task_completed`
702
+ - `worker_status`
703
+ - `mailbox_updated`
704
+ - `effectiveness_changed`
705
+
706
+ **Acceptance criteria**
707
+
708
+ - Render scheduler remains coalesced and overlap-safe.
709
+ - UI still recovers from durable files after restart.
710
+ - File polling is fallback, not the hot path.
711
+
712
+ ## P2 — Raw Scan Entry Cache
713
+
714
+ ### P2.8 Cache raw entries, not final semantic query results
715
+
716
+ **Target behavior**
717
+
718
+ Shared raw scan cache for:
719
+
720
+ - runs
721
+ - artifacts
722
+ - mailbox files
723
+ - transcript chunks
724
+ - worktree roots
725
+
726
+ Then apply filters/sorts after retrieval.
727
+
728
+ **Suggested files**
729
+
730
+ - `src/runtime/manifest-cache.ts`
731
+ - `src/ui/run-snapshot-cache.ts`
732
+ - `src/extension/run-index.ts`
733
+ - `src/utils/file-coalescer.ts`
734
+
735
+ **Acceptance criteria**
736
+
737
+ - Deterministic sort order.
738
+ - State mutation invalidates relevant raw entries.
739
+ - Large workspaces do not trigger full rescans on every render/status.
740
+
741
+ ## P3 — Release/Install Hardening
742
+
743
+ ### P3.1 Tarball install smoke before publish
744
+
745
+ **Target behavior**
746
+
747
+ Release workflow requires:
748
+
749
+ ```bash
750
+ npm run ci
751
+ npm pack --dry-run
752
+ npm pack
753
+ # install tarball in temp project
754
+ # verify pi extension load smoke
755
+ # verify npm package files and version/tag consistency
756
+ ```
757
+
758
+ **Suggested files**
759
+
760
+ - `docs/publishing.md`
761
+ - `package.json` scripts
762
+ - `.github/workflows/*` if CI is added
763
+ - optional `scripts/release-smoke.mjs`
764
+
765
+ **Acceptance criteria**
766
+
767
+ - Packed tarball loads extension in temp Pi home.
768
+ - Version in package, changelog, tag, npm view are consistent.
769
+ - Release instructions include rollback notes.
770
+
771
+ ## Suggested Implementation Order
772
+
773
+ 1. ~~**P0.1 Effectiveness policy enforcement**~~ ✅ Completed — default guard escalates `warn` to `blocked` for mutating-role tasks.
774
+ 2. ~~**P0.2 Persist runtime safety**~~ ✅ Completed — manifests persist `runtimeResolution`; `runtime.resolved` event emitted.
775
+ 3. **P1.3 Two-phase worker teardown** — reduces stale/zombie worker risk.
776
+ 4. ~~**P1.1 Durable steering/follow-up queues**~~ ✅ Completed — `readMailbox` kind filter; API `read-mailbox` supports `config.kind`.
777
+ 5. ~~**P1.5 Event-tree provenance**~~ ✅ Completed — `TeamEventMetadata` extended with `parentEventId`/`attemptId`/`branchId`/`causationId`/`correlationId`.
778
+ 6. ~~**P1.7 Capability inventory view**~~ ✅ Completed — `buildCapabilityInventory()` + API `operation=inventory` + dashboard pane.
779
+ 7. ~~**P2.3 Durable history projection**~~ ✅ Completed — `transformRunContextBeforeWorkerStart()` + `convertRunHistoryToWorkerPrompt()`.
780
+ 8. ~~**P2.4 CancellationToken**~~ ✅ Completed — wired into `collectRuns`/`validateMailbox`/`pruneFinishedRuns`/`cleanupRunWorktrees` etc.
781
+ 9. ~~**P2.5 Blob artifacts**~~ ✅ Completed — content-addressed blob store with SHA-256 dedup and metadata sidecars.
782
+ 10. ~~**P2.6 Dashboard panels**~~ ✅ Completed — capability and cancellation panes.
783
+
784
+ Also completed (not in original order list):
785
+ - ~~**P1.6 Synthetic terminal results**~~ ✅ — `buildSyntheticTerminalEvidence()` for cancelled in-flight tasks.
786
+ - ~~**P2.1 Typed hook lifecycle**~~ ✅ — `before_run_start`/`before_task_start` wired into team-runner.
787
+
788
+ ## Release Guidance
789
+
790
+ Before publishing a patch with these upgrades:
791
+
792
+ ```bash
793
+ npx tsc --noEmit
794
+ npm run test:unit
795
+ npm run test:integration
796
+ npm pack --dry-run
797
+ ```
798
+
799
+ For runtime/process changes also run targeted child-worker integration tests:
800
+
801
+ ```bash
802
+ node --experimental-strip-types --test --test-concurrency=1 --test-timeout=60000 \
803
+ test/integration/mock-child-run.test.ts \
804
+ test/integration/mock-child-json-run.test.ts \
805
+ test/integration/phase6-runtime-hardening.test.ts
806
+ ```
807
+
808
+ Do not publish without explicit user confirmation and a green verification pass.