pi-crew 0.1.51 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/CHANGELOG.md +56 -1
  2. package/README.md +176 -781
  3. package/agents/analyst.md +11 -11
  4. package/agents/critic.md +11 -11
  5. package/agents/executor.md +11 -11
  6. package/agents/explorer.md +11 -11
  7. package/agents/planner.md +11 -11
  8. package/agents/reviewer.md +11 -11
  9. package/agents/security-reviewer.md +11 -11
  10. package/agents/test-engineer.md +11 -11
  11. package/agents/verifier.md +70 -11
  12. package/agents/writer.md +11 -11
  13. package/docs/actions-reference.md +595 -0
  14. package/docs/commands-reference.md +347 -0
  15. package/docs/runtime-flow.md +148 -148
  16. package/index.ts +6 -6
  17. package/package.json +99 -99
  18. package/skills/async-worker-recovery/SKILL.md +42 -42
  19. package/skills/context-artifact-hygiene/SKILL.md +52 -52
  20. package/skills/delegation-patterns/SKILL.md +54 -54
  21. package/skills/mailbox-interactive/SKILL.md +40 -40
  22. package/skills/model-routing-context/SKILL.md +39 -39
  23. package/skills/multi-perspective-review/SKILL.md +58 -58
  24. package/skills/observability-reliability/SKILL.md +41 -41
  25. package/skills/orchestration/SKILL.md +157 -157
  26. package/skills/ownership-session-security/SKILL.md +41 -41
  27. package/skills/pi-extension-lifecycle/SKILL.md +39 -39
  28. package/skills/requirements-to-task-packet/SKILL.md +63 -63
  29. package/skills/resource-discovery-config/SKILL.md +41 -41
  30. package/skills/runtime-state-reader/SKILL.md +44 -44
  31. package/skills/secure-agent-orchestration-review/SKILL.md +45 -45
  32. package/skills/state-mutation-locking/SKILL.md +42 -42
  33. package/skills/systematic-debugging/SKILL.md +67 -67
  34. package/skills/ui-render-performance/SKILL.md +39 -39
  35. package/skills/verification-before-done/SKILL.md +57 -57
  36. package/skills/worktree-isolation/SKILL.md +39 -39
  37. package/src/adapters/claude-adapter.ts +25 -0
  38. package/src/adapters/codex-adapter.ts +21 -0
  39. package/src/adapters/cursor-adapter.ts +17 -0
  40. package/src/adapters/export-util.ts +137 -0
  41. package/src/adapters/index.ts +15 -0
  42. package/src/adapters/registry.ts +18 -0
  43. package/src/adapters/types.ts +23 -0
  44. package/src/agents/agent-config.ts +2 -0
  45. package/src/agents/agent-search.ts +98 -98
  46. package/src/agents/discover-agents.ts +2 -1
  47. package/src/config/config.ts +13 -1
  48. package/src/config/drift-detector.ts +211 -0
  49. package/src/config/markers.ts +327 -0
  50. package/src/config/resilient-parser.ts +108 -0
  51. package/src/config/suggestions.ts +74 -0
  52. package/src/extension/cross-extension-rpc.ts +103 -94
  53. package/src/extension/project-init.ts +21 -1
  54. package/src/extension/register.ts +45 -14
  55. package/src/extension/registration/commands.ts +77 -8
  56. package/src/extension/registration/subagent-tools.ts +10 -1
  57. package/src/extension/registration/team-tool.ts +10 -1
  58. package/src/extension/registration/viewers.ts +48 -34
  59. package/src/extension/run-bundle-schema.ts +89 -89
  60. package/src/extension/run-import.ts +25 -1
  61. package/src/extension/run-index.ts +5 -1
  62. package/src/extension/run-maintenance.ts +142 -68
  63. package/src/extension/team-manager-command.ts +10 -1
  64. package/src/extension/team-tool/doctor.ts +28 -3
  65. package/src/extension/team-tool/handle-settings.ts +195 -188
  66. package/src/extension/team-tool/inspect.ts +41 -41
  67. package/src/extension/team-tool/intent-policy.ts +42 -42
  68. package/src/extension/team-tool/lifecycle-actions.ts +27 -8
  69. package/src/extension/team-tool/plan.ts +19 -19
  70. package/src/extension/team-tool/run.ts +12 -1
  71. package/src/extension/team-tool.ts +11 -1
  72. package/src/i18n.ts +184 -184
  73. package/src/observability/exporters/otlp-exporter.ts +92 -77
  74. package/src/prompt/prompt-runtime.ts +72 -72
  75. package/src/runtime/agent-memory.ts +72 -72
  76. package/src/runtime/agent-observability.ts +114 -114
  77. package/src/runtime/async-marker.ts +26 -26
  78. package/src/runtime/attention-events.ts +28 -28
  79. package/src/runtime/auto-resume.ts +100 -0
  80. package/src/runtime/background-runner.ts +11 -1
  81. package/src/runtime/cancellation-token.ts +89 -89
  82. package/src/runtime/cancellation.ts +61 -61
  83. package/src/runtime/capability-inventory.ts +116 -116
  84. package/src/runtime/child-pi.ts +7 -2
  85. package/src/runtime/compaction-summary.ts +271 -0
  86. package/src/runtime/completion-guard.ts +190 -190
  87. package/src/runtime/crash-recovery.ts +33 -0
  88. package/src/runtime/delta-conflict.ts +360 -0
  89. package/src/runtime/direct-run.ts +35 -35
  90. package/src/runtime/foreground-control.ts +82 -82
  91. package/src/runtime/green-contract.ts +46 -46
  92. package/src/runtime/group-join.ts +106 -106
  93. package/src/runtime/heartbeat-gradient.ts +28 -28
  94. package/src/runtime/heartbeat-watcher.ts +124 -124
  95. package/src/runtime/iteration-hooks.ts +262 -0
  96. package/src/runtime/live-agent-control.ts +88 -88
  97. package/src/runtime/live-control-realtime.ts +36 -36
  98. package/src/runtime/live-extension-bridge.ts +150 -150
  99. package/src/runtime/live-irc.ts +92 -92
  100. package/src/runtime/live-session-health.ts +100 -100
  101. package/src/runtime/loop-gates.ts +129 -0
  102. package/src/runtime/metric-parser.ts +40 -0
  103. package/src/runtime/notebook-helpers.ts +90 -90
  104. package/src/runtime/orphan-sentinel.ts +7 -7
  105. package/src/runtime/parallel-research.ts +44 -44
  106. package/src/runtime/phase-progress.ts +217 -0
  107. package/src/runtime/pi-args.ts +38 -11
  108. package/src/runtime/pi-json-output.ts +111 -111
  109. package/src/runtime/pi-spawn.ts +57 -7
  110. package/src/runtime/policy-engine.ts +79 -79
  111. package/src/runtime/post-checks.ts +122 -0
  112. package/src/runtime/progress-event-coalescer.ts +43 -43
  113. package/src/runtime/prose-compressor.ts +164 -164
  114. package/src/runtime/recovery-recipes.ts +74 -74
  115. package/src/runtime/result-extractor.ts +121 -121
  116. package/src/runtime/role-permission.ts +39 -39
  117. package/src/runtime/sensitive-paths.ts +2 -2
  118. package/src/runtime/session-resources.ts +25 -25
  119. package/src/runtime/session-snapshot.ts +59 -59
  120. package/src/runtime/session-usage.ts +79 -79
  121. package/src/runtime/sidechain-output.ts +29 -29
  122. package/src/runtime/stream-preview.ts +177 -177
  123. package/src/runtime/supervisor-contact.ts +59 -59
  124. package/src/runtime/task-display.ts +38 -38
  125. package/src/runtime/task-graph.ts +207 -0
  126. package/src/runtime/task-quality.ts +207 -0
  127. package/src/runtime/task-runner/capabilities.ts +78 -78
  128. package/src/runtime/task-runner/live-executor.ts +7 -1
  129. package/src/runtime/task-runner/progress.ts +119 -119
  130. package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
  131. package/src/runtime/task-runner/result-utils.ts +14 -14
  132. package/src/runtime/task-runner/run-projection.ts +103 -103
  133. package/src/runtime/task-runner/state-helpers.ts +22 -22
  134. package/src/runtime/team-runner.ts +117 -7
  135. package/src/runtime/worker-heartbeat.ts +21 -21
  136. package/src/runtime/worker-startup.ts +57 -57
  137. package/src/runtime/workflow-state.ts +187 -0
  138. package/src/runtime/workspace-tree.ts +298 -298
  139. package/src/schema/config-schema.ts +11 -0
  140. package/src/schema/validation-types.ts +148 -0
  141. package/src/skills/skill-templates.ts +374 -0
  142. package/src/state/active-run-registry.ts +35 -11
  143. package/src/state/atomic-write.ts +33 -26
  144. package/src/state/contracts.ts +1 -0
  145. package/src/state/event-reconstructor.ts +217 -0
  146. package/src/state/locks.ts +2 -13
  147. package/src/state/mailbox.ts +4 -3
  148. package/src/state/state-store.ts +32 -14
  149. package/src/state/task-claims.ts +44 -44
  150. package/src/state/types.ts +9 -0
  151. package/src/state/usage.ts +29 -29
  152. package/src/subagents/async-entry.ts +1 -1
  153. package/src/subagents/index.ts +3 -3
  154. package/src/subagents/live/control.ts +1 -1
  155. package/src/subagents/live/manager.ts +1 -1
  156. package/src/subagents/live/realtime.ts +1 -1
  157. package/src/subagents/live/session-runtime.ts +1 -1
  158. package/src/subagents/manager.ts +1 -1
  159. package/src/subagents/spawn.ts +1 -1
  160. package/src/teams/team-serializer.ts +38 -38
  161. package/src/types/diff.d.ts +18 -18
  162. package/src/ui/crew-footer.ts +101 -101
  163. package/src/ui/crew-select-list.ts +111 -111
  164. package/src/ui/crew-widget.ts +5 -2
  165. package/src/ui/dashboard-panes/cancellation-pane.ts +42 -42
  166. package/src/ui/dashboard-panes/capability-pane.ts +59 -59
  167. package/src/ui/dashboard-panes/mailbox-pane.ts +35 -35
  168. package/src/ui/dashboard-panes/metrics-pane.ts +34 -34
  169. package/src/ui/dashboard-panes/progress-pane.ts +11 -0
  170. package/src/ui/dynamic-border.ts +25 -25
  171. package/src/ui/layout-primitives.ts +106 -106
  172. package/src/ui/loaders.ts +158 -158
  173. package/src/ui/render-coalescer.ts +51 -51
  174. package/src/ui/render-diff.ts +119 -119
  175. package/src/ui/render-scheduler.ts +143 -143
  176. package/src/ui/run-action-dispatcher.ts +10 -1
  177. package/src/ui/spinner.ts +17 -17
  178. package/src/ui/status-colors.ts +58 -58
  179. package/src/ui/syntax-highlight.ts +116 -116
  180. package/src/ui/transcript-entries.ts +258 -258
  181. package/src/utils/completion-dedupe.ts +63 -63
  182. package/src/utils/frontmatter.ts +68 -68
  183. package/src/utils/git.ts +262 -262
  184. package/src/utils/ids.ts +17 -17
  185. package/src/utils/incremental-reader.ts +104 -104
  186. package/src/utils/names.ts +27 -27
  187. package/src/utils/redaction.ts +44 -44
  188. package/src/utils/safe-paths.ts +47 -47
  189. package/src/utils/scan-cache.ts +136 -136
  190. package/src/utils/sleep.ts +40 -26
  191. package/src/utils/task-name-generator.ts +337 -337
  192. package/src/workflows/validate-workflow.ts +40 -40
  193. package/src/worktree/branch-freshness.ts +45 -45
  194. package/teams/default.team.md +12 -12
  195. package/teams/fast-fix.team.md +11 -11
  196. package/teams/implementation.team.md +18 -18
  197. package/teams/parallel-research.team.md +14 -14
  198. package/teams/research.team.md +11 -11
  199. package/teams/review.team.md +12 -12
  200. package/workflows/default.workflow.md +30 -29
  201. package/workflows/fast-fix.workflow.md +23 -22
  202. package/workflows/implementation.workflow.md +43 -43
  203. package/workflows/parallel-research.workflow.md +46 -46
  204. package/workflows/research.workflow.md +22 -22
  205. package/workflows/review.workflow.md +30 -30
  206. package/docs/refactor-tasks-phase3.md +0 -394
  207. package/docs/refactor-tasks-phase4.md +0 -564
  208. package/docs/refactor-tasks-phase5.md +0 -402
  209. package/docs/refactor-tasks-phase6.md +0 -662
  210. package/docs/refactor-tasks.md +0 -1484
  211. package/docs/research/AGENT-EXECUTION-ARCHITECTURE.md +0 -261
  212. package/docs/research/AGENT-LIFECYCLE-COMPARISON.md +0 -111
  213. package/docs/research/AUDIT_OH_MY_PI.md +0 -261
  214. package/docs/research/AUDIT_PI_CREW.md +0 -457
  215. package/docs/research/CAVEMAN-DEEP-RESEARCH.md +0 -281
  216. package/docs/research/COMPARISON_OH_MY_PI_VS_PI_CREW.md +0 -264
  217. package/docs/research/DEEP-RESEARCH-PI-POWERBAR.md +0 -343
  218. package/docs/research/DEEP_RESEARCH_SUBAGENT_ARCHITECTURE.md +0 -480
  219. package/docs/research/GAP_CLOSURE_IMPLEMENTATION_PLAN.md +0 -354
  220. package/docs/research/IMPLEMENTATION_PLAN.md +0 -385
  221. package/docs/research/LIVE-SESSION-PRODUCTION-READY-PLAN.md +0 -502
  222. package/docs/research/OH-MY-PI-DEEP-RESEARCH-v14.7.6.md +0 -266
  223. package/docs/research/REMAINING-GAPS-PLAN.md +0 -363
  224. package/docs/research/SESSION-SUMMARY-2026-05-08.md +0 -146
  225. package/docs/research/UI-RESPONSIVENESS-AUDIT.md +0 -173
  226. package/docs/research-awesome-agent-skills-distillation.md +0 -100
  227. package/docs/research-extension-examples.md +0 -297
  228. package/docs/research-extension-system.md +0 -324
  229. package/docs/research-oh-my-pi-distillation.md +0 -369
  230. package/docs/research-optimization-plan.md +0 -548
  231. package/docs/research-phase10-distillation.md +0 -199
  232. package/docs/research-phase11-distillation.md +0 -201
  233. package/docs/research-phase8-operator-experience-plan.md +0 -819
  234. package/docs/research-phase9-observability-reliability-plan.md +0 -1190
  235. package/docs/research-pi-coding-agent.md +0 -357
  236. package/docs/research-source-pi-crew-reference.md +0 -174
  237. package/docs/research-ui-optimization-plan.md +0 -480
  238. package/docs/source-runtime-refactor-map.md +0 -107
  239. package/src/utils/atomic-write.ts +0 -33
@@ -1,6 +1,7 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
3
  import { logInternalError } from "../utils/internal-error.ts";
4
+ import { sleepSync } from "../utils/sleep.ts";
4
5
 
5
6
  const RETRYABLE_RENAME_CODES = new Set(["EPERM", "EBUSY", "EACCES"]);
6
7
 
@@ -40,25 +41,7 @@ function isSymlinkSafePath(filePath: string): boolean {
40
41
  }
41
42
  }
42
43
 
43
- /**
44
- * Synchronous sleep using Atomics.wait (non-busy) with busy-wait fallback.
45
- *
46
- * WARNING: This blocks the Node.js main thread. Only used in atomic-write
47
- * rename retry path where sync I/O is required by the caller.
48
- * NOT safe to call from Pi extension async code paths.
49
- */
50
- function sleepSync(ms: number): void {
51
- try {
52
- const buffer = new SharedArrayBuffer(4);
53
- Atomics.wait(new Int32Array(buffer), 0, 0, ms);
54
- } catch {
55
- // Fallback for environments without SharedArrayBuffer / Atomics.wait support.
56
- const deadline = Date.now() + ms;
57
- while (Date.now() < deadline) {
58
- // Busy-wait — only used as last-resort, retry counts are capped.
59
- }
60
- }
61
- }
44
+
62
45
 
63
46
  function sleep(ms: number): Promise<void> {
64
47
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -68,7 +51,7 @@ function isRetryableRenameError(error: unknown): boolean {
68
51
  return Boolean(error && typeof error === "object" && "code" in error && RETRYABLE_RENAME_CODES.has(String((error as NodeJS.ErrnoException).code)));
69
52
  }
70
53
 
71
- export function __test__renameWithRetry(tempPath: string, filePath: string, retries = 5, rename: (oldPath: string, newPath: string) => void = fs.renameSync): void {
54
+ export function __test__renameWithRetry(tempPath: string, filePath: string, retries = 10, rename: (oldPath: string, newPath: string) => void = fs.renameSync): void {
72
55
  let lastError: unknown;
73
56
  for (let attempt = 0; attempt <= retries; attempt++) {
74
57
  try {
@@ -77,13 +60,15 @@ export function __test__renameWithRetry(tempPath: string, filePath: string, retr
77
60
  } catch (error) {
78
61
  lastError = error;
79
62
  if (!isRetryableRenameError(error) || attempt === retries) break;
80
- sleepSync(Math.min(250, 10 * 2 ** attempt));
63
+ // Exponential backoff: 10ms, 20ms, 40ms, ..., capped at 500ms
64
+ // Windows EPERM on rename can take longer when multiple processes contend
65
+ sleepSync(Math.min(500, 10 * 2 ** attempt));
81
66
  }
82
67
  }
83
68
  throw lastError;
84
69
  }
85
70
 
86
- export async function __test__renameWithRetryAsync(tempPath: string, filePath: string, retries = 5, rename: (oldPath: string, newPath: string) => Promise<void> = (source, destination) => fs.promises.rename(source, destination)): Promise<void> {
71
+ export async function __test__renameWithRetryAsync(tempPath: string, filePath: string, retries = 10, rename: (oldPath: string, newPath: string) => Promise<void> = (source, destination) => fs.promises.rename(source, destination)): Promise<void> {
87
72
  let lastError: unknown;
88
73
  for (let attempt = 0; attempt <= retries; attempt++) {
89
74
  try {
@@ -92,7 +77,7 @@ export async function __test__renameWithRetryAsync(tempPath: string, filePath: s
92
77
  } catch (error) {
93
78
  lastError = error;
94
79
  if (!isRetryableRenameError(error) || attempt === retries) break;
95
- await sleep(Math.min(250, 10 * 2 ** attempt));
80
+ await sleep(Math.min(500, 10 * 2 ** attempt));
96
81
  }
97
82
  }
98
83
  throw lastError;
@@ -106,6 +91,12 @@ export function atomicWriteFile(filePath: string, content: string): void {
106
91
  const O_NOFOLLOW = typeof fs.constants.O_NOFOLLOW === "number" ? fs.constants.O_NOFOLLOW : 0;
107
92
  try {
108
93
  const fd = fs.openSync(tempPath, fs.constants.O_WRONLY | fs.constants.O_CREAT | fs.constants.O_EXCL | O_NOFOLLOW, 0o644);
94
+ // Post-open verification: on Windows O_NOFOLLOW is 0, so verify FD is a regular file
95
+ const openedStat = fs.fstatSync(fd);
96
+ if (!openedStat.isFile()) {
97
+ fs.closeSync(fd);
98
+ throw new Error(`Refusing to write: opened path is not a regular file: ${tempPath}`);
99
+ }
109
100
  fs.writeSync(fd, content, undefined, "utf-8");
110
101
  fs.closeSync(fd);
111
102
  __test__renameWithRetry(tempPath, filePath);
@@ -125,7 +116,16 @@ export async function atomicWriteFileAsync(filePath: string, content: string): P
125
116
  await fs.promises.mkdir(path.dirname(filePath), { recursive: true });
126
117
  const tempPath = `${filePath}.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2)}.tmp`;
127
118
  try {
128
- await fs.promises.writeFile(tempPath, content, "utf-8");
119
+ const O_NOFOLLOW = typeof fs.constants.O_NOFOLLOW === "number" ? fs.constants.O_NOFOLLOW : 0;
120
+ const fd = await fs.promises.open(tempPath, fs.constants.O_WRONLY | fs.constants.O_CREAT | fs.constants.O_EXCL | O_NOFOLLOW, 0o644);
121
+ // Post-open verification: on Windows O_NOFOLLOW is 0, so verify FD is a regular file
122
+ const openedStat = await fd.stat();
123
+ if (!openedStat.isFile()) {
124
+ await fd.close();
125
+ throw new Error(`Refusing to write: opened path is not a regular file: ${tempPath}`);
126
+ }
127
+ await fd.writeFile(content, "utf-8");
128
+ await fd.close();
129
129
  try {
130
130
  await __test__renameWithRetryAsync(tempPath, filePath);
131
131
  } catch (renameError) {
@@ -166,6 +166,13 @@ export async function atomicWriteJsonAsync<T>(filePath: string, value: T): Promi
166
166
  }
167
167
 
168
168
  export function readJsonFile<T>(filePath: string): T | undefined {
169
- if (!fs.existsSync(filePath)) return undefined;
170
- return JSON.parse(fs.readFileSync(filePath, "utf-8")) as T;
169
+ try {
170
+ return JSON.parse(fs.readFileSync(filePath, "utf-8")) as T;
171
+ } catch (err) {
172
+ const code = (err as NodeJS.ErrnoException).code;
173
+ if (code !== "ENOENT" && code !== "ENOTDIR") {
174
+ logInternalError("readJsonFile", err, `filePath=${filePath}`);
175
+ }
176
+ return undefined;
177
+ }
171
178
  }
@@ -36,6 +36,7 @@ export const TEAM_EVENT_TYPES = [
36
36
  "run.completed",
37
37
  "run.failed",
38
38
  "run.cancelled",
39
+ "task.created",
39
40
  "task.started",
40
41
  "task.progress",
41
42
  "task.blocked",
@@ -0,0 +1,217 @@
1
+ /**
2
+ * Event reconstructor — rebuilds task state from the append-only event log.
3
+ *
4
+ * Primary use-case: crash recovery when tasks.json is corrupted or missing.
5
+ * The materialized tasks.json view is the primary source of truth; this
6
+ * module provides a fallback reconstruction path from events.jsonl.
7
+ *
8
+ * Distilled from pi-autoresearch's append-only event log pattern.
9
+ */
10
+ import type { TeamEvent } from "./event-log.ts";
11
+ import { readEvents } from "./event-log.ts";
12
+
13
+ /** Task status values that can be reconstructed from lifecycle events. */
14
+ const RECONSTRUCTABLE_STATUSES = new Set(["created", "queued", "running", "completed", "failed", "cancelled", "skipped", "waiting"]);
15
+
16
+ /** Event types that carry task lifecycle state transitions. */
17
+ const TASK_LIFECYCLE_EVENT_TYPES = new Set([
18
+ "task.created",
19
+ "task.started",
20
+ "task.completed",
21
+ "task.failed",
22
+ "task.skipped",
23
+ "task.cancelled",
24
+ "task.waiting",
25
+ "task.resumed",
26
+ "task.retried",
27
+ "task.blocked",
28
+ "task.progress",
29
+ "task.green",
30
+ "task.red",
31
+ ]);
32
+
33
+ /** Terminal events that set finishedAt. */
34
+ const TERMINAL_EVENTS = new Set(["task.completed", "task.failed", "task.cancelled", "task.skipped"]);
35
+
36
+ /** Mapping from event type to the reconstructed task status. */
37
+ const EVENT_STATUS_MAP: Readonly<Record<string, string>> = {
38
+ "task.created": "created",
39
+ "task.started": "running",
40
+ "task.completed": "completed",
41
+ "task.failed": "failed",
42
+ "task.skipped": "skipped",
43
+ "task.cancelled": "cancelled",
44
+ "task.waiting": "waiting",
45
+ "task.resumed": "running",
46
+ "task.retried": "queued",
47
+ };
48
+
49
+ /** Task state reconstructed purely from event log entries. */
50
+ export interface ReconstructedTaskState {
51
+ /** Task identifier */
52
+ id: string;
53
+ /** Reconstructed status derived from the last lifecycle event */
54
+ status: string;
55
+ /** Timestamp of the task.started event, if observed */
56
+ startedAt?: string;
57
+ /** Timestamp of the terminal event (completed/failed/cancelled/skipped), if observed */
58
+ finishedAt?: string;
59
+ /** Error message from task.failed events */
60
+ error?: string;
61
+ /** Segment number from event data (for retry isolation) */
62
+ segment?: number;
63
+ /** Structured diagnostics from event data */
64
+ diagnostics?: Record<string, unknown>;
65
+ /** Numeric metrics from event data */
66
+ metrics?: Record<string, number>;
67
+ }
68
+
69
+ /** Result of reconstructing task state from events. */
70
+ export interface ReconstructionResult {
71
+ /** Map of taskId → reconstructed task state */
72
+ tasks: Map<string, ReconstructedTaskState>;
73
+ /** Total number of events processed */
74
+ eventCount: number;
75
+ /** Number of malformed/unparseable events skipped */
76
+ corruptedCount: number;
77
+ }
78
+
79
+ /** Input: either a file path to read events from, or an in-memory array. */
80
+ export type EventSource = string | TeamEvent[];
81
+
82
+ function isTaskLifecycleEvent(event: TeamEvent): boolean {
83
+ return TASK_LIFECYCLE_EVENT_TYPES.has(event.type);
84
+ }
85
+
86
+ function statusFromEventType(eventType: string): string | undefined {
87
+ return EVENT_STATUS_MAP[eventType];
88
+ }
89
+
90
+ function safeNumber(value: unknown): number | undefined {
91
+ return typeof value === "number" && Number.isFinite(value) ? value : undefined;
92
+ }
93
+
94
+ function safeRecord(value: unknown): Record<string, unknown> | undefined {
95
+ if (typeof value !== "object" || value === null || Array.isArray(value)) {
96
+ return undefined;
97
+ }
98
+ return value as Record<string, unknown>;
99
+ }
100
+
101
+ function safeNumericRecord(value: unknown): Record<string, number> | undefined {
102
+ if (typeof value !== "object" || value === null || Array.isArray(value)) {
103
+ return undefined;
104
+ }
105
+ const record: Record<string, number> = {};
106
+ for (const [key, val] of Object.entries(value as Record<string, unknown>)) {
107
+ if (typeof val === "number" && Number.isFinite(val)) {
108
+ record[key] = val;
109
+ }
110
+ }
111
+ if (Object.keys(record).length === 0) {
112
+ return undefined;
113
+ }
114
+ return record;
115
+ }
116
+
117
+ function parseEventLine(line: string): TeamEvent | undefined {
118
+ const trimmed = line.trim();
119
+ if (trimmed.length === 0) return undefined;
120
+ try {
121
+ const parsed = JSON.parse(trimmed);
122
+ if (typeof parsed !== "object" || parsed === null) return undefined;
123
+ if (typeof parsed.type !== "string" || typeof parsed.runId !== "string") return undefined;
124
+ return parsed as TeamEvent;
125
+ } catch {
126
+ return undefined;
127
+ }
128
+ }
129
+
130
+ /**
131
+ * Process a stream of validated TeamEvents into reconstructed task states.
132
+ * Shared logic for both file-based and line-based reconstruction.
133
+ */
134
+ function processEvents(events: Iterable<TeamEvent>, eventCount: number, corruptedCount: number): ReconstructionResult {
135
+ const tasks = new Map<string, ReconstructedTaskState>();
136
+
137
+ for (const event of events) {
138
+ if (typeof event.taskId !== "string" || event.taskId.length === 0) continue;
139
+ if (!isTaskLifecycleEvent(event)) continue;
140
+
141
+ const taskId = event.taskId;
142
+ let task = tasks.get(taskId);
143
+ if (!task) {
144
+ task = { id: taskId, status: "created" };
145
+ tasks.set(taskId, task);
146
+ }
147
+
148
+ const newStatus = statusFromEventType(event.type);
149
+ if (newStatus && RECONSTRUCTABLE_STATUSES.has(newStatus)) {
150
+ task.status = newStatus;
151
+ }
152
+
153
+ if (event.type === "task.started") {
154
+ task.startedAt = event.time;
155
+ }
156
+
157
+ if (TERMINAL_EVENTS.has(event.type)) {
158
+ task.finishedAt = event.time;
159
+ }
160
+
161
+ if (event.type === "task.failed" && event.message) {
162
+ task.error = event.message;
163
+ }
164
+
165
+ if (event.data) {
166
+ const segment = safeNumber(event.data.segment);
167
+ if (segment !== undefined) task.segment = segment;
168
+
169
+ const diagnostics = safeRecord(event.data.diagnostics);
170
+ if (diagnostics !== undefined) task.diagnostics = diagnostics;
171
+
172
+ const metrics = safeNumericRecord(event.data.metrics);
173
+ if (metrics !== undefined) task.metrics = metrics;
174
+ }
175
+ }
176
+
177
+ return { tasks, eventCount, corruptedCount };
178
+ }
179
+
180
+ /**
181
+ * Reconstruct task states from an append-only event log.
182
+ *
183
+ * @param source - Either a file path to events.jsonl, or an array of TeamEvent objects
184
+ * @returns Reconstruction result with task map, counts
185
+ */
186
+ export function reconstructTasksFromEvents(source: EventSource): ReconstructionResult {
187
+ const events: TeamEvent[] = typeof source === "string" ? readEvents(source) : source;
188
+ return processEvents(events, events.length, 0);
189
+ }
190
+
191
+ /**
192
+ * Reconstruct task states from raw JSONL lines (string array).
193
+ * Useful for testing without creating files.
194
+ *
195
+ * @param lines - Array of raw JSONL lines
196
+ * @returns Reconstruction result
197
+ */
198
+ export function reconstructTasksFromLines(lines: string[]): ReconstructionResult {
199
+ let eventCount = 0;
200
+ let corruptedCount = 0;
201
+ const parsedEvents: TeamEvent[] = [];
202
+
203
+ for (const line of lines) {
204
+ const trimmed = line.trim();
205
+ if (trimmed.length === 0) continue;
206
+ const event = parseEventLine(trimmed);
207
+ if (event === undefined) {
208
+ corruptedCount++;
209
+ eventCount++;
210
+ continue;
211
+ }
212
+ parsedEvents.push(event);
213
+ eventCount++;
214
+ }
215
+
216
+ return processEvents(parsedEvents, eventCount, corruptedCount);
217
+ }
@@ -2,6 +2,7 @@ import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
3
  import type { TeamRunManifest } from "./types.ts";
4
4
  import { DEFAULT_LOCKS } from "../config/defaults.ts";
5
+ import { sleepSync } from "../utils/sleep.ts";
5
6
 
6
7
  export interface RunLockOptions {
7
8
  staleMs?: number;
@@ -13,19 +14,7 @@ function lockPath(manifest: TeamRunManifest): string {
13
14
  return path.join(manifest.stateRoot, "run.lock");
14
15
  }
15
16
 
16
- function sleepSync(ms: number): void {
17
- try {
18
- Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
19
- } catch {
20
- // Fallback for environments without SharedArrayBuffer / Atomics.wait support.
21
- // Use a short busy-wait with yielding intervals instead of continuous spin.
22
- const deadline = Date.now() + ms;
23
- while (Date.now() < deadline) {
24
- // Yield to event loop periodically — reduces CPU from 100% to ~1%
25
- for (let i = 0; i < 1e6; i++) { /* busy micro-yield */ }
26
- }
27
- }
28
- }
17
+
29
18
 
30
19
  function parseCreatedAtFromLock(raw: string): number | undefined {
31
20
  try {
@@ -3,6 +3,7 @@ import * as path from "node:path";
3
3
  import type { TeamRunManifest } from "./types.ts";
4
4
  import { resolveRealContainedPath } from "../utils/safe-paths.ts";
5
5
  import { redactSecrets } from "../utils/redaction.ts";
6
+ import { atomicWriteFile } from "./atomic-write.ts";
6
7
 
7
8
  export type MailboxDirection = "inbox" | "outbox";
8
9
  export type MailboxMessageStatus = "queued" | "delivered" | "acknowledged";
@@ -232,7 +233,7 @@ export function readDeliveryState(manifest: TeamRunManifest): MailboxDeliverySta
232
233
 
233
234
  function writeDeliveryState(manifest: TeamRunManifest, state: MailboxDeliveryState): void {
234
235
  ensureRunMailbox(manifest);
235
- fs.writeFileSync(deliveryFile(manifest, true), `${JSON.stringify(redactSecrets(state), null, 2)}\n`, "utf-8");
236
+ atomicWriteFile(deliveryFile(manifest, true), `${JSON.stringify(redactSecrets(state), null, 2)}\n`);
236
237
  }
237
238
 
238
239
  export function appendMailboxMessage(manifest: TeamRunManifest, message: Omit<MailboxMessage, "id" | "runId" | "createdAt" | "status"> & { id?: string; status?: MailboxMessageStatus }): MailboxMessage {
@@ -342,7 +343,7 @@ export function updateMailboxMessageReply(manifest: TeamRunManifest, originalMes
342
343
  }
343
344
  }
344
345
  if (found) {
345
- fs.writeFileSync(filePath, `${updatedLines.join("\n")}\n`, "utf-8");
346
+ atomicWriteFile(filePath, `${updatedLines.join("\n")}\n`);
346
347
  return;
347
348
  }
348
349
  }
@@ -384,7 +385,7 @@ export function validateMailbox(manifest: TeamRunManifest, options: { repair?: b
384
385
  }
385
386
  }
386
387
  if (options.repair && validLines.length !== lines.length) {
387
- fs.writeFileSync(filePath, `${validLines.join("\n")}${validLines.length ? "\n" : ""}`, "utf-8");
388
+ atomicWriteFile(filePath, `${validLines.join("\n")}${validLines.length ? "\n" : ""}`);
388
389
  repaired.push(filePath);
389
390
  }
390
391
  }
@@ -8,6 +8,7 @@ import { DEFAULT_CACHE, DEFAULT_PATHS } from "../config/defaults.ts";
8
8
  import { createRunId, createTaskId } from "../utils/ids.ts";
9
9
  import { findRepoRoot, projectCrewRoot, userCrewRoot } from "../utils/paths.ts";
10
10
  import { assertSafePathId, resolveContainedRelativePath, resolveRealContainedPath } from "../utils/safe-paths.ts";
11
+ import { withRunLockSync, withRunLock } from "./locks.ts";
11
12
  import type { TeamConfig } from "../teams/team-config.ts";
12
13
  import type { WorkflowConfig } from "../workflows/workflow-config.ts";
13
14
 
@@ -180,32 +181,49 @@ export function createRunManifest(params: {
180
181
  }
181
182
 
182
183
  export function saveRunManifest(manifest: TeamRunManifest): void {
183
- atomicWriteJson(path.join(manifest.stateRoot, "manifest.json"), manifest);
184
- invalidateRunCache(manifest.stateRoot);
184
+ withRunLockSync(manifest, () => {
185
+ atomicWriteJson(path.join(manifest.stateRoot, "manifest.json"), manifest);
186
+ invalidateRunCache(manifest.stateRoot);
187
+ });
185
188
  }
186
189
 
187
190
  export async function saveRunManifestAsync(manifest: TeamRunManifest): Promise<void> {
188
- await atomicWriteJsonAsync(path.join(manifest.stateRoot, "manifest.json"), manifest);
189
- invalidateRunCache(manifest.stateRoot);
191
+ await withRunLock(manifest, async () => {
192
+ await atomicWriteJsonAsync(path.join(manifest.stateRoot, "manifest.json"), manifest);
193
+ invalidateRunCache(manifest.stateRoot);
194
+ });
190
195
  }
191
196
 
192
197
  export function saveRunTasks(manifest: TeamRunManifest, tasks: TeamTaskState[]): void {
193
- atomicWriteJson(manifest.tasksPath, tasks);
194
- invalidateRunCache(manifest.stateRoot);
198
+ withRunLockSync(manifest, () => {
199
+ atomicWriteJson(manifest.tasksPath, tasks);
200
+ invalidateRunCache(manifest.stateRoot);
201
+ });
195
202
  }
196
203
 
197
204
  export async function saveRunTasksAsync(manifest: TeamRunManifest, tasks: TeamTaskState[]): Promise<void> {
198
- await atomicWriteJsonAsync(manifest.tasksPath, tasks);
199
- invalidateRunCache(manifest.stateRoot);
205
+ await withRunLock(manifest, async () => {
206
+ await atomicWriteJsonAsync(manifest.tasksPath, tasks);
207
+ invalidateRunCache(manifest.stateRoot);
208
+ });
200
209
  }
201
210
 
202
- /** M8: Atomically save manifest + tasks and invalidate cache once to prevent stale reads between saves */
211
+ /**
212
+ * Save manifest and tasks files with individual atomic writes.
213
+ *
214
+ * Note: The two writes are individually atomic (via rename) but not
215
+ * jointly atomic — a crash between writes can leave them inconsistent.
216
+ * This is acceptable because crash recovery detects and repairs
217
+ * inconsistent state on next session start.
218
+ */
203
219
  export async function saveManifestAndTasksAtomic(manifest: TeamRunManifest, tasks: TeamTaskState[]): Promise<void> {
204
- await Promise.all([
205
- atomicWriteJsonAsync(path.join(manifest.stateRoot, "manifest.json"), manifest),
206
- atomicWriteJsonAsync(manifest.tasksPath, tasks),
207
- ]);
208
- invalidateRunCache(manifest.stateRoot);
220
+ await withRunLock(manifest, async () => {
221
+ await Promise.all([
222
+ atomicWriteJsonAsync(path.join(manifest.stateRoot, "manifest.json"), manifest),
223
+ atomicWriteJsonAsync(manifest.tasksPath, tasks),
224
+ ]);
225
+ invalidateRunCache(manifest.stateRoot);
226
+ });
209
227
  }
210
228
 
211
229
  export interface UpdateRunStatusOptions {
@@ -1,44 +1,44 @@
1
- import { randomUUID } from "node:crypto";
2
- import type { TeamTaskState } from "./types.ts";
3
-
4
- export interface TaskClaimState {
5
- owner: string;
6
- token: string;
7
- leasedUntil: string;
8
- }
9
-
10
- export function createTaskClaim(owner: string, leaseMs = 5 * 60_000, now = new Date()): TaskClaimState {
11
- return { owner, token: randomUUID(), leasedUntil: new Date(now.getTime() + leaseMs).toISOString() };
12
- }
13
-
14
- export function isTaskClaimExpired(claim: TaskClaimState | undefined, now = new Date()): boolean {
15
- if (!claim) return false;
16
- const parsed = Date.parse(claim.leasedUntil);
17
- // Corrupt or invalid date strings produce NaN — treat as expired immediately.
18
- return Number.isFinite(parsed) ? parsed <= now.getTime() : true;
19
- }
20
-
21
- export function canUseTaskClaim(task: Pick<TeamTaskState, "claim">, owner: string, token: string, now = new Date()): boolean {
22
- return task.claim?.owner === owner && task.claim.token === token && !isTaskClaimExpired(task.claim, now);
23
- }
24
-
25
- export function claimTask<T extends TeamTaskState>(task: T, owner: string, leaseMs?: number, now = new Date()): T {
26
- if (task.claim && !isTaskClaimExpired(task.claim, now)) {
27
- throw new Error(`Task '${task.id}' is already claimed by '${task.claim.owner}'.`);
28
- }
29
- return { ...task, claim: createTaskClaim(owner, leaseMs, now) };
30
- }
31
-
32
- export function releaseTaskClaim<T extends TeamTaskState>(task: T, owner: string, token: string, now = new Date()): T {
33
- if (!canUseTaskClaim(task, owner, token, now)) {
34
- throw new Error(`Task '${task.id}' claim is not held by '${owner}' or has expired.`);
35
- }
36
- return { ...task, claim: undefined };
37
- }
38
-
39
- export function transitionClaimedTaskStatus<T extends TeamTaskState>(task: T, owner: string, token: string, status: T["status"], now = new Date()): T {
40
- if (!canUseTaskClaim(task, owner, token, now)) {
41
- throw new Error(`Task '${task.id}' claim is not held by '${owner}' or has expired.`);
42
- }
43
- return { ...task, status };
44
- }
1
+ import { randomUUID } from "node:crypto";
2
+ import type { TeamTaskState } from "./types.ts";
3
+
4
+ export interface TaskClaimState {
5
+ owner: string;
6
+ token: string;
7
+ leasedUntil: string;
8
+ }
9
+
10
+ export function createTaskClaim(owner: string, leaseMs = 5 * 60_000, now = new Date()): TaskClaimState {
11
+ return { owner, token: randomUUID(), leasedUntil: new Date(now.getTime() + leaseMs).toISOString() };
12
+ }
13
+
14
+ export function isTaskClaimExpired(claim: TaskClaimState | undefined, now = new Date()): boolean {
15
+ if (!claim) return false;
16
+ const parsed = Date.parse(claim.leasedUntil);
17
+ // Corrupt or invalid date strings produce NaN — treat as expired immediately.
18
+ return Number.isFinite(parsed) ? parsed <= now.getTime() : true;
19
+ }
20
+
21
+ export function canUseTaskClaim(task: Pick<TeamTaskState, "claim">, owner: string, token: string, now = new Date()): boolean {
22
+ return task.claim?.owner === owner && task.claim.token === token && !isTaskClaimExpired(task.claim, now);
23
+ }
24
+
25
+ export function claimTask<T extends TeamTaskState>(task: T, owner: string, leaseMs?: number, now = new Date()): T {
26
+ if (task.claim && !isTaskClaimExpired(task.claim, now)) {
27
+ throw new Error(`Task '${task.id}' is already claimed by '${task.claim.owner}'.`);
28
+ }
29
+ return { ...task, claim: createTaskClaim(owner, leaseMs, now) };
30
+ }
31
+
32
+ export function releaseTaskClaim<T extends TeamTaskState>(task: T, owner: string, token: string, now = new Date()): T {
33
+ if (!canUseTaskClaim(task, owner, token, now)) {
34
+ throw new Error(`Task '${task.id}' claim is not held by '${owner}' or has expired.`);
35
+ }
36
+ return { ...task, claim: undefined };
37
+ }
38
+
39
+ export function transitionClaimedTaskStatus<T extends TeamTaskState>(task: T, owner: string, token: string, status: T["status"], now = new Date()): T {
40
+ if (!canUseTaskClaim(task, owner, token, now)) {
41
+ throw new Error(`Task '${task.id}' claim is not held by '${owner}' or has expired.`);
42
+ }
43
+ return { ...task, status };
44
+ }
@@ -267,6 +267,15 @@ export interface TeamTaskState {
267
267
  lastDecision?: PolicyDecision;
268
268
  };
269
269
  controlReservation?: ControlReservation;
270
+
271
+ /** Structured diagnostics per task (ASI pattern from pi-autoresearch). */
272
+ diagnostics?: Record<string, unknown>;
273
+
274
+ /** Segment counter for task retry isolation. Default 0 (first attempt). Incremented on retry. */
275
+ segment?: number;
276
+
277
+ /** Parsed metric key-values from worker output (CREW_METRIC lines). */
278
+ metrics?: Record<string, number>;
270
279
  }
271
280
 
272
281
  export interface ControlReservation {
@@ -1,29 +1,29 @@
1
- import type { TeamTaskState, UsageState } from "./types.ts";
2
-
3
- export function aggregateUsage(tasks: TeamTaskState[]): UsageState | undefined {
4
- const total: UsageState = {};
5
- let found = false;
6
- for (const task of tasks) {
7
- if (!task.usage) continue;
8
- found = true;
9
- total.input = (total.input ?? 0) + (task.usage.input ?? 0);
10
- total.output = (total.output ?? 0) + (task.usage.output ?? 0);
11
- total.cacheRead = (total.cacheRead ?? 0) + (task.usage.cacheRead ?? 0);
12
- total.cacheWrite = (total.cacheWrite ?? 0) + (task.usage.cacheWrite ?? 0);
13
- total.cost = (total.cost ?? 0) + (task.usage.cost ?? 0);
14
- total.turns = (total.turns ?? 0) + (task.usage.turns ?? 0);
15
- }
16
- return found ? total : undefined;
17
- }
18
-
19
- export function formatUsage(usage: UsageState | undefined): string {
20
- if (!usage) return "(none)";
21
- const parts: string[] = [];
22
- if (usage.input !== undefined) parts.push(`input=${usage.input}`);
23
- if (usage.output !== undefined) parts.push(`output=${usage.output}`);
24
- if (usage.cacheRead !== undefined) parts.push(`cacheRead=${usage.cacheRead}`);
25
- if (usage.cacheWrite !== undefined) parts.push(`cacheWrite=${usage.cacheWrite}`);
26
- if (usage.cost !== undefined && Number.isFinite(usage.cost)) parts.push(`cost=${usage.cost.toFixed(6)}`);
27
- if (usage.turns !== undefined) parts.push(`turns=${usage.turns}`);
28
- return parts.join(", ") || "(none)";
29
- }
1
+ import type { TeamTaskState, UsageState } from "./types.ts";
2
+
3
+ export function aggregateUsage(tasks: TeamTaskState[]): UsageState | undefined {
4
+ const total: UsageState = {};
5
+ let found = false;
6
+ for (const task of tasks) {
7
+ if (!task.usage) continue;
8
+ found = true;
9
+ total.input = (total.input ?? 0) + (task.usage.input ?? 0);
10
+ total.output = (total.output ?? 0) + (task.usage.output ?? 0);
11
+ total.cacheRead = (total.cacheRead ?? 0) + (task.usage.cacheRead ?? 0);
12
+ total.cacheWrite = (total.cacheWrite ?? 0) + (task.usage.cacheWrite ?? 0);
13
+ total.cost = (total.cost ?? 0) + (task.usage.cost ?? 0);
14
+ total.turns = (total.turns ?? 0) + (task.usage.turns ?? 0);
15
+ }
16
+ return found ? total : undefined;
17
+ }
18
+
19
+ export function formatUsage(usage: UsageState | undefined): string {
20
+ if (!usage) return "(none)";
21
+ const parts: string[] = [];
22
+ if (usage.input !== undefined) parts.push(`input=${usage.input}`);
23
+ if (usage.output !== undefined) parts.push(`output=${usage.output}`);
24
+ if (usage.cacheRead !== undefined) parts.push(`cacheRead=${usage.cacheRead}`);
25
+ if (usage.cacheWrite !== undefined) parts.push(`cacheWrite=${usage.cacheWrite}`);
26
+ if (usage.cost !== undefined && Number.isFinite(usage.cost)) parts.push(`cost=${usage.cost.toFixed(6)}`);
27
+ if (usage.turns !== undefined) parts.push(`turns=${usage.turns}`);
28
+ return parts.join(", ") || "(none)";
29
+ }
@@ -1 +1 @@
1
- export * from "../runtime/async-runner.ts";
1
+ export * from "../runtime/async-runner.ts";
@@ -1,3 +1,3 @@
1
- export * from "./spawn.ts";
2
- export * from "./manager.ts";
3
- export * from "./async-entry.ts";
1
+ export * from "./spawn.ts";
2
+ export * from "./manager.ts";
3
+ export * from "./async-entry.ts";
@@ -1 +1 @@
1
- export * from "../../runtime/live-agent-control.ts";
1
+ export * from "../../runtime/live-agent-control.ts";
@@ -1 +1 @@
1
- export * from "../../runtime/live-agent-manager.ts";
1
+ export * from "../../runtime/live-agent-manager.ts";
@@ -1 +1 @@
1
- export * from "../../runtime/live-control-realtime.ts";
1
+ export * from "../../runtime/live-control-realtime.ts";