pi-crew 0.5.2 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/CHANGELOG.md +183 -0
  2. package/README.md +17 -1
  3. package/docs/architecture.md +2 -0
  4. package/docs/bugs/cross-session-notification-leakage.md +82 -0
  5. package/docs/coding-agent-optimization.md +268 -0
  6. package/docs/deep-review-report.md +384 -0
  7. package/docs/distillation/cybersecurity-patterns.md +294 -0
  8. package/docs/migration-v0.4-v0.5.md +208 -0
  9. package/docs/optimization-plan.md +642 -0
  10. package/docs/pi-crew-v0.5.5-audit-fix-plan.md +133 -0
  11. package/docs/pi-mono-opportunities.md +969 -0
  12. package/docs/pi-mono-review.md +291 -0
  13. package/docs/skills/REFERENCE.md +144 -0
  14. package/package.json +12 -9
  15. package/skills/artifact-analysis-loop/SKILL.md +302 -0
  16. package/skills/async-worker-recovery/SKILL.md +19 -1
  17. package/skills/child-pi-spawning/SKILL.md +19 -6
  18. package/skills/context-artifact-hygiene/SKILL.md +19 -2
  19. package/skills/delegation-patterns/SKILL.md +68 -3
  20. package/skills/detection-pipeline-design/SKILL.md +285 -0
  21. package/skills/event-log-tracing/SKILL.md +20 -6
  22. package/skills/git-master/SKILL.md +20 -6
  23. package/skills/hunting-investigation-loop/SKILL.md +401 -0
  24. package/skills/incident-playbook-construction/SKILL.md +383 -0
  25. package/skills/live-agent-lifecycle/SKILL.md +20 -6
  26. package/skills/mailbox-interactive/SKILL.md +19 -6
  27. package/skills/model-routing-context/SKILL.md +19 -1
  28. package/skills/multi-perspective-review/SKILL.md +19 -4
  29. package/skills/observability-reliability/SKILL.md +19 -2
  30. package/skills/orchestration/SKILL.md +20 -2
  31. package/skills/ownership-session-security/SKILL.md +20 -2
  32. package/skills/pi-extension-lifecycle/SKILL.md +20 -2
  33. package/skills/post-mortem/SKILL.md +7 -2
  34. package/skills/read-only-explorer/SKILL.md +20 -6
  35. package/skills/requirements-to-task-packet/SKILL.md +23 -3
  36. package/skills/resource-discovery-config/SKILL.md +20 -2
  37. package/skills/runtime-state-reader/SKILL.md +20 -2
  38. package/skills/safe-bash/SKILL.md +21 -6
  39. package/skills/scrutinize/SKILL.md +20 -2
  40. package/skills/secure-agent-orchestration-review/SKILL.md +29 -2
  41. package/skills/security-review/SKILL.md +560 -0
  42. package/skills/state-mutation-locking/SKILL.md +22 -2
  43. package/skills/systematic-debugging/SKILL.md +8 -6
  44. package/skills/threat-hypothesis-framework/SKILL.md +175 -0
  45. package/skills/ui-render-performance/SKILL.md +20 -2
  46. package/skills/verification-before-done/SKILL.md +17 -2
  47. package/skills/widget-rendering/SKILL.md +21 -6
  48. package/skills/workspace-isolation/SKILL.md +20 -6
  49. package/skills/worktree-isolation/SKILL.md +20 -6
  50. package/src/agents/agent-config.ts +40 -1
  51. package/src/benchmark/benchmark-runner.ts +45 -0
  52. package/src/benchmark/feedback-loop.ts +5 -0
  53. package/src/config/config.ts +32 -5
  54. package/src/config/role-tools.ts +82 -0
  55. package/src/config/suggestions.ts +8 -0
  56. package/src/config/types.ts +4 -0
  57. package/src/extension/async-notifier.ts +10 -1
  58. package/src/extension/crew-cleanup.ts +114 -0
  59. package/src/extension/cross-extension-rpc.ts +1 -1
  60. package/src/extension/notification-router.ts +18 -0
  61. package/src/extension/register.ts +27 -19
  62. package/src/extension/registration/subagent-tools.ts +1 -1
  63. package/src/extension/team-tool/anchor.ts +201 -0
  64. package/src/extension/team-tool/api.ts +2 -1
  65. package/src/extension/team-tool/auto-summarize.ts +154 -0
  66. package/src/extension/team-tool/run.ts +42 -7
  67. package/src/extension/team-tool.ts +44 -2
  68. package/src/hooks/registry.ts +1 -3
  69. package/src/observability/event-bus.ts +69 -0
  70. package/src/observability/event-to-metric.ts +0 -2
  71. package/src/runtime/anchor-manager.ts +473 -0
  72. package/src/runtime/async-runner.ts +8 -4
  73. package/src/runtime/auto-summarize.ts +350 -0
  74. package/src/runtime/background-runner.ts +10 -3
  75. package/src/runtime/budget-tracker.ts +354 -0
  76. package/src/runtime/chain-runner.ts +507 -0
  77. package/src/runtime/child-pi.ts +123 -35
  78. package/src/runtime/crash-recovery.ts +5 -4
  79. package/src/runtime/crew-agent-runtime.ts +1 -0
  80. package/src/runtime/custom-tools/irc-tool.ts +13 -0
  81. package/src/runtime/custom-tools/submit-result-tool.ts +3 -2
  82. package/src/runtime/delivery-coordinator.ts +10 -3
  83. package/src/runtime/dynamic-script-runner.ts +482 -0
  84. package/src/runtime/foreground-control.ts +87 -17
  85. package/src/runtime/handoff-manager.ts +589 -0
  86. package/src/runtime/hidden-handoff.ts +424 -0
  87. package/src/runtime/live-agent-manager.ts +20 -4
  88. package/src/runtime/live-session-runtime.ts +39 -4
  89. package/src/runtime/manifest-cache.ts +2 -1
  90. package/src/runtime/model-resolver.ts +16 -4
  91. package/src/runtime/phase-tracker.ts +373 -0
  92. package/src/runtime/pi-args.ts +11 -1
  93. package/src/runtime/pi-json-output.ts +31 -0
  94. package/src/runtime/pipeline-runner.ts +514 -0
  95. package/src/runtime/progress-tracker.ts +124 -0
  96. package/src/runtime/retry-runner.ts +354 -0
  97. package/src/runtime/sandbox.ts +252 -0
  98. package/src/runtime/scheduler.ts +7 -2
  99. package/src/runtime/skill-effectiveness.ts +473 -0
  100. package/src/runtime/skill-instructions.ts +37 -3
  101. package/src/runtime/subagent-manager.ts +1 -1
  102. package/src/runtime/task-graph.ts +11 -1
  103. package/src/runtime/task-runner.ts +92 -18
  104. package/src/runtime/team-runner.ts +13 -12
  105. package/src/runtime/tool-progress.ts +10 -3
  106. package/src/runtime/verification-gates.ts +367 -0
  107. package/src/schema/team-tool-schema.ts +37 -0
  108. package/src/skills/discover-skills.ts +5 -0
  109. package/src/state/active-run-registry.ts +9 -2
  110. package/src/state/contracts.ts +9 -0
  111. package/src/state/crew-init.ts +3 -3
  112. package/src/state/decision-ledger.ts +98 -55
  113. package/src/state/event-log-rotation.ts +2 -2
  114. package/src/state/event-log.ts +144 -10
  115. package/src/state/hook-instinct-bridge.ts +5 -5
  116. package/src/state/mailbox.ts +10 -0
  117. package/src/state/run-cache.ts +18 -8
  118. package/src/state/state-store.ts +3 -1
  119. package/src/state/types.ts +4 -0
  120. package/src/tools/safe-bash-extension.ts +1 -0
  121. package/src/tools/safe-bash.ts +152 -20
  122. package/src/types/new-api-types.ts +34 -0
  123. package/src/ui/agent-management-overlay.ts +5 -1
  124. package/src/ui/crew-widget.ts +29 -15
  125. package/src/ui/overlays/mailbox-detail-overlay.ts +13 -2
  126. package/src/ui/powerbar-publisher.ts +101 -7
  127. package/src/ui/tool-render.ts +15 -15
  128. package/src/ui/transcript-cache.ts +13 -0
  129. package/src/utils/bm25-search.ts +16 -8
  130. package/src/utils/env-filter.ts +8 -5
  131. package/src/utils/redaction.ts +169 -15
  132. package/src/utils/session-utils.ts +52 -0
  133. package/src/utils/sse-parser.ts +10 -1
  134. package/src/worktree/cleanup.ts +6 -1
  135. package/src/worktree/worktree-manager.ts +32 -13
  136. package/workflows/chain.workflow.md +252 -0
  137. package/workflows/pipeline.workflow.md +27 -0
@@ -1,5 +1,6 @@
1
1
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
2
2
  import { dirname } from "path";
3
+ import { atomicWriteFile } from "./atomic-write.ts";
3
4
 
4
5
  export interface CoherenceMark {
5
6
  matchesPrior: boolean;
@@ -21,9 +22,12 @@ export interface RolloutEntry {
21
22
 
22
23
  /**
23
24
  * Get the ledger file path for a given run ID.
25
+ * SECURITY: Accept stateRoot param to use it for path computation
26
+ * instead of hardcoded path, ensuring stateRoot containment.
24
27
  */
25
- function getLedgerPath(runId: string): string {
26
- return `.crew/state/runs/${runId}/decision-ledger.jsonl`;
28
+ function getLedgerPath(runId: string, stateRoot?: string): string {
29
+ const base = stateRoot ?? `.crew/state/runs/${runId}`;
30
+ return `${base}/decision-ledger.jsonl`;
27
31
  }
28
32
 
29
33
  /**
@@ -44,19 +48,19 @@ function computeCoherence(entry: RolloutEntry, ledger: RolloutEntry[]): Coherenc
44
48
  entry.decisionMark === previousEntry.decisionMark ||
45
49
  Boolean(entry.priorWinner && entry.topCandidates.includes(entry.priorWinner));
46
50
 
47
- // Check last 3 entries for recursive pattern
48
- const recentEntries = ledger.slice(-3);
51
+ // Check last 10 entries for recursive pattern
52
+ const recentEntries = ledger.slice(-10);
49
53
  const recentDecisions = recentEntries.map((e) => e.decisionMark);
50
54
  const currentDecision = entry.decisionMark;
51
55
 
52
56
  const recursiveMatches = recentDecisions.filter((d) => d === currentDecision).length;
53
- const matchesRecursive = recursiveMatches >= 2;
57
+ const matchesRecursive = recursiveMatches >= Math.ceil(recentDecisions.length / 2); // At least half match
54
58
 
55
59
  const promotionAllowed = matchesPrior || matchesRecursive;
56
60
 
57
61
  let reason: string;
58
62
  if (matchesPrior && matchesRecursive) {
59
- reason = `Matches prior winner and recursive pattern (${recursiveMatches}/3 recent decisions)`;
63
+ reason = `Matches prior winner and recursive pattern (${recursiveMatches}/${recentDecisions.length} recent decisions)`;
60
64
  } else if (matchesPrior) {
61
65
  reason = `Matches prior winner decision`;
62
66
  } else if (matchesRecursive) {
@@ -94,29 +98,31 @@ export function initLedger(runId: string): void {
94
98
  /**
95
99
  * Append a new entry to the decision ledger.
96
100
  * Automatically computes and adds coherence marks.
101
+ * FIX: Uses atomic write to prevent partial writes on crash.
97
102
  */
98
103
  export function appendEntry(runId: string, entry: RolloutEntry): RolloutEntry {
99
- const ledgerPath = getLedgerPath(runId);
100
-
101
104
  // Ensure directory exists
105
+ const ledgerPath = getLedgerPath(runId);
102
106
  const dir = dirname(ledgerPath);
103
107
  if (!existsSync(dir)) {
104
108
  mkdirSync(dir, { recursive: true });
105
109
  }
106
110
 
107
- // Get existing entries to compute coherence
111
+ // Get existing entries to compute coherence (and use same result for write)
108
112
  const ledger = getLedger(runId);
109
113
 
110
- // Compute coherence marks
114
+ // Compute coherence
111
115
  const coherenceMark = computeCoherence(entry, ledger);
112
116
  const entryWithCoherence: RolloutEntry = {
113
117
  ...entry,
114
118
  coherenceMark,
115
119
  };
116
120
 
117
- // Append to JSONL file
121
+ // Append to JSONL file using atomic write to prevent corruption
122
+ // Use the already-loaded ledger content (no double-read)
118
123
  const line = JSON.stringify(entryWithCoherence) + "\n";
119
- writeFileSync(ledgerPath, line, { flag: "a", encoding: "utf-8" });
124
+ const existingContent = ledger.length > 0 ? ledger.map((e) => JSON.stringify(e)).join("\n") + "\n" : "";
125
+ atomicWriteFile(ledgerPath, existingContent + line);
120
126
  return entryWithCoherence;
121
127
  }
122
128
 
@@ -218,78 +224,115 @@ export function summarizeLedger(runId: string): string {
218
224
  return lines.join("\n");
219
225
  }
220
226
 
227
+ /**
228
+ * Override the coherence mark of the last entry in the ledger.
229
+ * FIX: This preserves all previous entries while updating just the last one.
230
+ * Previously this would truncate the entire ledger!
231
+ */
232
+ function overrideLastEntry(runId: string, coherenceMark: import("./types.js").CoherenceMark): RolloutEntry {
233
+ const ledger = getLedger(runId);
234
+ if (ledger.length === 0) {
235
+ throw new Error(`No ledger entries found for run ${runId}`);
236
+ }
237
+ // Update the last entry with the new coherence mark
238
+ const lastIndex = ledger.length - 1;
239
+ ledger[lastIndex] = { ...ledger[lastIndex], coherenceMark };
240
+ // Rewrite entire ledger to preserve all entries
241
+ const ledgerPath = getLedgerPath(runId);
242
+ atomicWriteFile(ledgerPath, ledger.map((e) => JSON.stringify(e)).join("\n") + "\n");
243
+ return ledger[lastIndex];
244
+ }
245
+
221
246
  /**
222
247
  * Promote a candidate by marking it as accepted with proper coherence.
223
248
  */
224
249
  export function promoteCandidate(runId: string, candidate: string): RolloutEntry {
225
250
  const latestDecision = getLatestDecision(runId);
226
251
 
227
- const entry: RolloutEntry = {
252
+ // Get existing entries to compute proper coherence
253
+ const ledger = getLedger(runId);
254
+
255
+ // Create entry without coherence first
256
+ const entryWithoutCoherence = {
228
257
  rolloutId: `promote-${Date.now()}`,
229
258
  timestamp: new Date().toISOString(),
230
259
  priorWinner: latestDecision?.topCandidates[0],
231
260
  searchSpace: latestDecision?.searchSpace || "unknown",
232
261
  trialCount: (latestDecision?.trialCount || 0) + 1,
233
262
  topCandidates: [candidate],
234
- decisionMark: "accept",
235
- coherenceMark: {
236
- matchesPrior: false,
237
- matchesRecursive: false,
238
- promotionAllowed: true,
239
- reason: "Manual promotion by user",
240
- },
263
+ decisionMark: "accept" as const,
241
264
  };
242
265
 
243
- // Persist via appendEntry so ledger is consistent.
244
- appendEntry(runId, entry);
245
- const manualCoherence: import("./types.js").CoherenceMark = {
246
- matchesPrior: false,
247
- matchesRecursive: false,
248
- promotionAllowed: true,
249
- reason: "Manual promotion by user",
266
+ // Compute coherence (empty ledger = no matches)
267
+ const coherenceMark = computeCoherence(entryWithoutCoherence as RolloutEntry, ledger);
268
+
269
+ // Manual promotion always allows further promotion
270
+ coherenceMark.promotionAllowed = true;
271
+ coherenceMark.reason = "Manual promotion - promotion allowed";
272
+
273
+ // Create full entry with coherence
274
+ const entry: RolloutEntry = {
275
+ ...entryWithoutCoherence,
276
+ coherenceMark,
250
277
  };
251
- // Manually override the last line in the JSONL to reflect the coherent
252
- // decision we want, bypassing appendEntry's auto-compute for the returned value.
253
- const lastLine = readFileSync(getLedgerPath(runId), "utf-8").trim().split("\n").filter(Boolean).at(-1)!;
254
- const overridden: RolloutEntry = { ...JSON.parse(lastLine), coherenceMark: manualCoherence };
255
- writeFileSync(getLedgerPath(runId), JSON.stringify(overridden) + "\n", "utf-8");
256
- return overridden;
278
+
279
+ // Always push new entry (append-only pattern)
280
+ ledger.push(entry);
281
+
282
+ // Rewrite entire ledger atomically to preserve all entries
283
+ const ledgerPath = getLedgerPath(runId);
284
+ const dir = dirname(ledgerPath);
285
+ if (!existsSync(dir)) {
286
+ mkdirSync(dir, { recursive: true });
287
+ }
288
+ atomicWriteFile(ledgerPath, ledger.map((e) => JSON.stringify(e)).join("\n") + "\n");
289
+
290
+ return entry;
257
291
  }
258
292
 
259
293
  /**
260
- * Decay a candidate by marking it as decayed with proper coherence.
294
+ * Decay a candidate by marking it as accepted with proper coherence.
261
295
  */
262
296
  export function decayCandidate(runId: string, candidate: string): RolloutEntry {
263
297
  const latestDecision = getLatestDecision(runId);
264
298
 
265
- const entry: RolloutEntry = {
299
+ // Get existing entries to compute proper coherence
300
+ const ledger = getLedger(runId);
301
+
302
+ // Create entry without coherence first
303
+ const entryWithoutCoherence = {
266
304
  rolloutId: `decay-${Date.now()}`,
267
305
  timestamp: new Date().toISOString(),
268
306
  priorWinner: latestDecision?.topCandidates[0],
269
307
  searchSpace: latestDecision?.searchSpace || "unknown",
270
308
  trialCount: (latestDecision?.trialCount || 0) + 1,
271
309
  topCandidates: [candidate],
272
- decisionMark: "decay",
273
- coherenceMark: {
274
- matchesPrior: false,
275
- matchesRecursive: false,
276
- promotionAllowed: false,
277
- reason: "Manual decay by user",
278
- },
310
+ decisionMark: "decay" as const,
279
311
  };
280
312
 
281
- // Persist via appendEntry so ledger is consistent.
282
- appendEntry(runId, entry);
283
- const manualCoherence: import("./types.js").CoherenceMark = {
284
- matchesPrior: false,
285
- matchesRecursive: false,
286
- promotionAllowed: false,
287
- reason: "Manual decay by user",
313
+ // Compute coherence (empty ledger = no matches)
314
+ const coherenceMark = computeCoherence(entryWithoutCoherence as RolloutEntry, ledger);
315
+
316
+ // Manual decay never allows promotion
317
+ coherenceMark.promotionAllowed = false;
318
+ coherenceMark.reason = "Manual decay - promotion not allowed";
319
+
320
+ // Create full entry with coherence
321
+ const entry: RolloutEntry = {
322
+ ...entryWithoutCoherence,
323
+ coherenceMark,
288
324
  };
289
- // Manually override the last line in the JSONL to reflect the coherent
290
- // decision we want, bypassing appendEntry's auto-compute for the returned value.
291
- const lastLine = readFileSync(getLedgerPath(runId), "utf-8").trim().split("\n").filter(Boolean).at(-1)!;
292
- const overridden: RolloutEntry = { ...JSON.parse(lastLine), coherenceMark: manualCoherence };
293
- writeFileSync(getLedgerPath(runId), JSON.stringify(overridden) + "\n", "utf-8");
294
- return overridden;
325
+
326
+ // Always push new entry (append-only pattern)
327
+ ledger.push(entry);
328
+
329
+ // Rewrite entire ledger to preserve all entries
330
+ const ledgerPath = getLedgerPath(runId);
331
+ const dir = dirname(ledgerPath);
332
+ if (!existsSync(dir)) {
333
+ mkdirSync(dir, { recursive: true });
334
+ }
335
+ atomicWriteFile(ledgerPath, ledger.map((e) => JSON.stringify(e)).join("\n") + "\n");
336
+
337
+ return entry;
295
338
  }
@@ -209,9 +209,9 @@ export function getEventLogStats(eventsPath: string): EventLogStats | undefined
209
209
  if (newlineCount === 0) firstLineBytes = offset + i + 1;
210
210
  newlineCount++;
211
211
  }
212
- }
213
- offset += bytesRead;
214
212
  }
213
+ offset += bytesRead;
214
+ }
215
215
  } finally {
216
216
  fs.closeSync(scanFd);
217
217
  }
@@ -63,12 +63,17 @@ let appendCounter = 0;
63
63
 
64
64
  /** Simple cross-process lock for an eventsPath to prevent JSONL interleave on concurrent append.
65
65
  * Detects stale locks by checking the owner PID written inside the lock directory.
66
+ *
67
+ * @deprecated Prefer `appendEventAsync()` for callers in async contexts. The sync lock
68
+ * uses `sleepSync` which blocks the event loop and prevents AbortSignal handlers from firing.
66
69
  */
67
70
  export function withEventLogLockSync<T>(eventsPath: string, fn: () => T): T {
71
+ // Ensure parent directory exists before attempting lock
72
+ fs.mkdirSync(path.dirname(eventsPath), { recursive: true });
68
73
  const lockDir = `${eventsPath}.lock`;
69
74
  const pidFile = path.join(lockDir, "pid");
70
75
  const start = Date.now();
71
- const timeout = 5000;
76
+ const timeout = 120000; // 120s timeout for slow CI environments
72
77
  const staleMs = 10000;
73
78
  let acquired = false;
74
79
  while (true) {
@@ -79,6 +84,8 @@ export function withEventLogLockSync<T>(eventsPath: string, fn: () => T): T {
79
84
  break;
80
85
  } catch {
81
86
  if (Date.now() - start > timeout) {
87
+ // Log error and continue without lock — lock is held by live process.
88
+ // Stale detection will clean up dead locks on next attempt.
82
89
  logInternalError("event-log.lock-timeout", new Error(`Event log lock timeout for ${eventsPath}`), `lockDir=${lockDir}`);
83
90
  break;
84
91
  }
@@ -112,9 +119,15 @@ export function withEventLogLockSync<T>(eventsPath: string, fn: () => T): T {
112
119
  }
113
120
  }
114
121
 
115
- function evictOldestSequenceCacheEntry(): void {
116
- const first = sequenceCache.keys().next().value;
117
- if (first !== undefined) sequenceCache.delete(first);
122
+ function evictOldestSequenceCacheEntries(): void {
123
+ // Batch evict oldest 50% of entries when cache is full
124
+ const toEvict = Math.ceil(MAX_SEQUENCE_CACHE_ENTRIES / 2);
125
+ let evicted = 0;
126
+ for (const key of sequenceCache.keys()) {
127
+ if (evicted >= toEvict) break;
128
+ sequenceCache.delete(key);
129
+ evicted++;
130
+ }
118
131
  }
119
132
 
120
133
  export function sequencePath(eventsPath: string): string {
@@ -174,10 +187,116 @@ export function computeEventFingerprint(event: Pick<TeamEvent, "type" | "runId"
174
187
  return createHash("sha256").update(JSON.stringify({ type: event.type, runId: event.runId, taskId: event.taskId, data: event.data ?? null })).digest("hex").slice(0, 16);
175
188
  }
176
189
 
190
+ /**
191
+ * @deprecated Prefer `appendEventAsync()` in async contexts. The sync lock uses
192
+ * `sleepSync` which blocks the Node.js event loop, preventing AbortSignal handlers
193
+ * from firing and degrading live-agent responsiveness.
194
+ */
177
195
  export function appendEvent(eventsPath: string, event: AppendTeamEvent): TeamEvent {
178
196
  return withEventLogLockSync(eventsPath, () => appendEventInsideLock(eventsPath, event));
179
197
  }
180
198
 
199
+ // --- Async write queue (non-blocking alternative to withEventLogLockSync) ---
200
+ const asyncQueues = new Map<string, Promise<unknown>>();
201
+
202
+ /**
203
+ * Append an event to the event log using non-blocking async I/O.
204
+ *
205
+ * Uses a per-eventsPath promise-chain queue to ensure sequential writes without
206
+ * blocking the Node.js event loop. This allows AbortSignal handlers and other
207
+ * async operations to proceed while events are being persisted.
208
+ *
209
+ * For callers that are already in an async context (team-runner, task-runner,
210
+ * foreground-control, etc.), prefer this over the sync `appendEvent()`.
211
+ */
212
+ export async function appendEventAsync(eventsPath: string, event: AppendTeamEvent): Promise<TeamEvent> {
213
+ const queueKey = eventsPath;
214
+ const prev = asyncQueues.get(queueKey) ?? Promise.resolve();
215
+ const next = prev.then(async (): Promise<TeamEvent> => {
216
+ // Ensure directory exists
217
+ await fs.promises.mkdir(path.dirname(eventsPath), { recursive: true });
218
+
219
+ // Build metadata (same logic as appendEventInsideLock)
220
+ const baseMetadata = event.metadata;
221
+ let metadata: TeamEventMetadata = {
222
+ seq: baseMetadata?.seq ?? nextSequence(eventsPath),
223
+ provenance: baseMetadata?.provenance ?? "team_runner",
224
+ ...(baseMetadata?.parentEventId ? { parentEventId: baseMetadata.parentEventId } : {}),
225
+ ...(baseMetadata?.attemptId ? { attemptId: baseMetadata.attemptId } : {}),
226
+ ...(baseMetadata?.branchId ? { branchId: baseMetadata.branchId } : {}),
227
+ ...(baseMetadata?.causationId ? { causationId: baseMetadata.causationId } : {}),
228
+ ...(baseMetadata?.correlationId ? { correlationId: baseMetadata.correlationId } : {}),
229
+ ...(baseMetadata?.sessionIdentity ? { sessionIdentity: baseMetadata.sessionIdentity } : {}),
230
+ ...(baseMetadata?.ownership ? { ownership: baseMetadata.ownership } : {}),
231
+ ...(baseMetadata?.nudgeId ? { nudgeId: baseMetadata.nudgeId } : {}),
232
+ ...(baseMetadata?.confidence ? { confidence: baseMetadata.confidence } : {}),
233
+ };
234
+ const fullEvent: TeamEvent = {
235
+ time: new Date().toISOString(),
236
+ ...event,
237
+ metadata,
238
+ };
239
+ if (baseMetadata?.fingerprint || TERMINAL_EVENT_TYPES.has(fullEvent.type)) {
240
+ metadata = { ...metadata, fingerprint: baseMetadata?.fingerprint ?? computeEventFingerprint(fullEvent) };
241
+ fullEvent.metadata = metadata;
242
+ }
243
+
244
+ // Overflow handling: same logic as sync path
245
+ const isTerminal = TERMINAL_EVENT_TYPES.has(fullEvent.type);
246
+ let skippedDueToSize = false;
247
+ if (!isTerminal && fs.existsSync(eventsPath)) {
248
+ const stat = fs.statSync(eventsPath);
249
+ if (stat.size > MAX_EVENTS_BYTES) {
250
+ try {
251
+ compactEventLog(eventsPath);
252
+ } catch (error) {
253
+ logInternalError("event-log.immediate-compact", error, `eventsPath=${eventsPath}`);
254
+ }
255
+ if (fs.existsSync(eventsPath)) {
256
+ const afterCompact = fs.statSync(eventsPath);
257
+ if (afterCompact.size > MAX_EVENTS_BYTES) {
258
+ rotateEventLog(eventsPath);
259
+ }
260
+ }
261
+ }
262
+ }
263
+ try {
264
+ if (fs.existsSync(eventsPath) && fs.statSync(eventsPath).size > MAX_EVENTS_BYTES) {
265
+ logInternalError("event-log.size-limit", new Error(`events file ${eventsPath} exceeds ${MAX_EVENTS_BYTES} bytes after compaction`), `eventsPath=${eventsPath}`);
266
+ skippedDueToSize = true;
267
+ }
268
+ } catch (error) {
269
+ logInternalError("event-log.size-check", error, `eventsPath=${eventsPath}`);
270
+ }
271
+
272
+ if (!skippedDueToSize) {
273
+ const line = JSON.stringify(redactSecrets(fullEvent)) + "\n";
274
+ await fs.promises.appendFile(eventsPath, line, { encoding: "utf-8" });
275
+ }
276
+
277
+ appendCounter++;
278
+ if (appendCounter % 100 === 0 && needsRotation(eventsPath)) {
279
+ try { compactEventLog(eventsPath); } catch (error) { logInternalError("event-log.rotation", error, `eventsPath=${eventsPath}`); }
280
+ }
281
+ try { emitFromTeamEvent(fullEvent); } catch (error) { logInternalError("event-log.emit", error); }
282
+
283
+ const seq = fullEvent.metadata?.seq ?? 0;
284
+ try {
285
+ const stat = fs.statSync(eventsPath);
286
+ if (sequenceCache.size >= MAX_SEQUENCE_CACHE_ENTRIES) {
287
+ evictOldestSequenceCacheEntries();
288
+ }
289
+ sequenceCache.set(eventsPath, { size: stat.size, mtimeMs: stat.mtimeMs, seq });
290
+ persistSequence(eventsPath, seq);
291
+ } catch (error) {
292
+ logInternalError("event-log.persist-sequence", error, `eventsPath=${eventsPath}`);
293
+ }
294
+ return fullEvent;
295
+ });
296
+ asyncQueues.set(queueKey, next.catch((error) => { logInternalError("event-log.async-queue", error, eventsPath); asyncQueues.delete(queueKey); }));
297
+ return next;
298
+ }
299
+
181
300
  /**
182
301
  * Body of `appendEvent` assuming the caller already holds
183
302
  * `withEventLogLockSync` for `eventsPath`. Used by `appendEventBuffered` to
@@ -254,7 +373,7 @@ function appendEventInsideLock(eventsPath: string, event: AppendTeamEvent): Team
254
373
  try {
255
374
  const stat = fs.statSync(eventsPath);
256
375
  if (sequenceCache.size >= MAX_SEQUENCE_CACHE_ENTRIES) {
257
- evictOldestSequenceCacheEntry();
376
+ evictOldestSequenceCacheEntries();
258
377
  }
259
378
  sequenceCache.set(eventsPath, { size: stat.size, mtimeMs: stat.mtimeMs, seq });
260
379
  persistSequence(eventsPath, seq);
@@ -283,6 +402,12 @@ const bufferedTimers = new Map<string, ReturnType<typeof setTimeout>>();
283
402
  const DEFAULT_BUFFER_MS = 20;
284
403
 
285
404
  export function appendEventBuffered(eventsPath: string, event: AppendTeamEvent, bufferMs = DEFAULT_BUFFER_MS): Promise<TeamEvent> {
405
+ // FIX: Terminal events must bypass buffer to ensure they're written immediately.
406
+ // Previously, terminal events like task.failed could be lost on process crash.
407
+ if (TERMINAL_EVENT_TYPES.has(event.type)) {
408
+ // For terminal events, write synchronously to ensure durability
409
+ return Promise.resolve(appendEvent(eventsPath, event));
410
+ }
286
411
  return new Promise<TeamEvent>((resolve, reject) => {
287
412
  const queue = bufferedQueues.get(eventsPath) ?? [];
288
413
  queue.push({ event, resolve, reject });
@@ -300,8 +425,16 @@ function flushOneEventLogBuffer(eventsPath: string): void {
300
425
  bufferedQueues.delete(eventsPath);
301
426
  const timer = bufferedTimers.get(eventsPath);
302
427
  if (timer) clearTimeout(timer);
428
+ // MEDIUM-13: Delete timer entry only after successful flush (in finally block)
303
429
  bufferedTimers.delete(eventsPath);
304
430
  if (!queue || queue.length === 0) return;
431
+
432
+ // HIGH-10: Clean up queue if it exceeds limit to prevent unbounded growth
433
+ if (queue.length > 1000) {
434
+ // Keep only the last 500 entries
435
+ queue.splice(0, queue.length - 500);
436
+ }
437
+
305
438
  try {
306
439
  withEventLogLockSync(eventsPath, () => {
307
440
  for (const item of queue) {
@@ -325,12 +458,13 @@ export function flushEventLogBuffer(): void {
325
458
  }
326
459
 
327
460
  /**
328
- * 2.2 caller-migration helper schedule a buffered append but do not return
329
- * the resulting Promise. Use only for events whose return value is ignored
330
- * (high-frequency `task.progress`). Errors are logged via logInternalError.
461
+ * Schedule an async event append without waiting for the result.
462
+ * Uses the non-blocking async queue to avoid blocking the event loop.
463
+ * Use only for events whose return value is ignored (high-frequency `task.progress`).
464
+ * Errors are logged via logInternalError.
331
465
  */
332
- export function appendEventFireAndForget(eventsPath: string, event: AppendTeamEvent, bufferMs = DEFAULT_BUFFER_MS): void {
333
- appendEventBuffered(eventsPath, event, bufferMs).catch((error) => logInternalError("event-log.fire-and-forget", error, eventsPath));
466
+ export function appendEventFireAndForget(eventsPath: string, event: AppendTeamEvent, _bufferMs = DEFAULT_BUFFER_MS): void {
467
+ appendEventAsync(eventsPath, event).catch((error) => logInternalError("event-log.fire-and-forget", error, eventsPath));
334
468
  }
335
469
 
336
470
  // Auto-flush on process exit so buffered events do not silently leak.
@@ -6,13 +6,13 @@
6
6
  import { crewHooks } from "../runtime/crew-hooks.ts";
7
7
 
8
8
  // Lazy-initialized store and paths
9
- let storeInstance: import("./instinct-store").InstinctStore | null = null;
10
- let pathsInstance: typeof import("../utils/paths") | null = null;
9
+ let storeInstance: import("./instinct-store.js").InstinctStore | null = null;
10
+ let pathsInstance: typeof import("../utils/paths.js") | null = null;
11
11
 
12
12
  async function getStore() {
13
13
  if (!storeInstance) {
14
- const { InstinctStore } = await import("./instinct-store");
15
- const paths = await import("../utils/paths");
14
+ const { InstinctStore } = await import("./instinct-store.js");
15
+ const paths = await import("../utils/paths.js");
16
16
  storeInstance = new InstinctStore(paths.projectCrewRoot(process.cwd()));
17
17
  }
18
18
  return storeInstance;
@@ -20,7 +20,7 @@ async function getStore() {
20
20
 
21
21
  async function getPaths() {
22
22
  if (!pathsInstance) {
23
- pathsInstance = await import("../utils/paths");
23
+ pathsInstance = await import("../utils/paths.js");
24
24
  }
25
25
  return pathsInstance;
26
26
  }
@@ -289,6 +289,16 @@ export function readDeliveryState(manifest: TeamRunManifest): MailboxDeliverySta
289
289
 
290
290
  function writeDeliveryState(manifest: TeamRunManifest, state: MailboxDeliveryState): void {
291
291
  ensureRunMailbox(manifest);
292
+ // Prune oldest entries if capped
293
+ const MAX_DELIVERY_MESSAGES = 10000;
294
+ if (Object.keys(state.messages).length > MAX_DELIVERY_MESSAGES) {
295
+ const sorted = Object.entries(state.messages).sort(([, a], [, b]) => {
296
+ const order = { queued: 0, delivered: 1, acknowledged: 2 };
297
+ return (order[a] ?? 3) - (order[b] ?? 3);
298
+ });
299
+ const trimmed = sorted.slice(0, MAX_DELIVERY_MESSAGES);
300
+ state.messages = Object.fromEntries(trimmed);
301
+ }
292
302
  atomicWriteFile(deliveryFile(manifest, true), `${JSON.stringify(redactSecrets(state), null, 2)}\n`);
293
303
  }
294
304
 
@@ -3,6 +3,8 @@ import * as path from "node:path";
3
3
  import * as crypto from "node:crypto";
4
4
  import { projectCrewRoot } from "../utils/paths.ts";
5
5
  import type { TeamTaskState } from "./types.ts";
6
+ import { atomicWriteFile } from "./atomic-write.ts";
7
+ import { withFileLockSync } from "./locks.ts";
6
8
 
7
9
  const DEFAULT_CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
8
10
 
@@ -31,6 +33,7 @@ export function computeRunCacheKey(goal: string, team: string, workflow: string,
31
33
  .update(normalized)
32
34
  .update(team)
33
35
  .update(workflow)
36
+ .update(_cwd)
34
37
  .digest("hex")
35
38
  .slice(0, 16);
36
39
  }
@@ -61,12 +64,15 @@ export function getCachedRun(cwd: string, cacheKey: string): CacheEntry | null {
61
64
  const entry = JSON.parse(fs.readFileSync(entryPath, "utf-8")) as CacheEntry;
62
65
 
63
66
  if (Date.now() > entry.expiresAt) {
64
- // Remove expired entry
65
- try {
66
- fs.unlinkSync(entryPath);
67
- } catch { /* ignore */ }
68
- delete index[cacheKey];
69
- fs.writeFileSync(indexPath, JSON.stringify(index), "utf-8");
67
+ // Remove expired entry — use lock + atomic write to prevent index corruption
68
+ withFileLockSync(indexPath, () => {
69
+ try {
70
+ fs.unlinkSync(entryPath);
71
+ } catch { /* ignore */ }
72
+ const updatedIndex = JSON.parse(fs.readFileSync(indexPath, "utf-8")) as CacheIndex;
73
+ delete updatedIndex[cacheKey];
74
+ atomicWriteFile(indexPath, JSON.stringify(updatedIndex));
75
+ });
70
76
  return null;
71
77
  }
72
78
 
@@ -109,14 +115,18 @@ export function saveRunToCache(
109
115
  const entryPath = path.join(dir, `${cacheKey}.json`);
110
116
  fs.writeFileSync(entryPath, JSON.stringify(entry), "utf-8");
111
117
 
112
- // Update index
118
+ // Update index with atomic write: write to temp file then rename
113
119
  const indexPath = path.join(dir, "index.json");
114
120
  const index: CacheIndex = fs.existsSync(indexPath)
115
121
  ? JSON.parse(fs.readFileSync(indexPath, "utf-8"))
116
122
  : {};
117
123
 
118
124
  index[cacheKey] = entryPath;
119
- fs.writeFileSync(indexPath, JSON.stringify(index), "utf-8");
125
+
126
+ // Atomic write: write to temp file first, then rename
127
+ const tempPath = path.join(dir, "index.json.tmp");
128
+ fs.writeFileSync(tempPath, JSON.stringify(index), "utf-8");
129
+ fs.renameSync(tempPath, indexPath);
120
130
  }
121
131
 
122
132
  /**
@@ -12,6 +12,7 @@ import { assertSafePathId, resolveContainedRelativePath, resolveRealContainedPat
12
12
  import { withRunLock } from "./locks.ts";
13
13
  import type { TeamConfig } from "../teams/team-config.ts";
14
14
  import type { WorkflowConfig } from "../workflows/workflow-config.ts";
15
+ import { toPiSessionId } from "../utils/session-utils.ts";
15
16
 
16
17
  export interface RunPaths {
17
18
  runId: string;
@@ -32,7 +33,7 @@ interface ManifestCacheEntry {
32
33
  cachedAt?: number;
33
34
  }
34
35
 
35
- const MANIFEST_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
36
+ const MANIFEST_CACHE_TTL_MS = 30 * 1000; // 30 seconds (FIX: reduced from 5 minutes for faster state updates)
36
37
  const manifestCache = new Map<string, ManifestCacheEntry>();
37
38
 
38
39
  function setManifestCache(stateRoot: string, entry: ManifestCacheEntry): void {
@@ -148,6 +149,7 @@ export function createRunManifest(params: {
148
149
  const manifest: TeamRunManifest = {
149
150
  schemaVersion: 1,
150
151
  runId: paths.runId,
152
+ sessionId: toPiSessionId(paths.runId),
151
153
  team: params.team.name,
152
154
  workflow: params.workflow?.name,
153
155
  goal: params.goal,
@@ -4,6 +4,7 @@ import type { WorkerHeartbeatState } from "../runtime/worker-heartbeat.ts";
4
4
  import type { CrewAgentProgress } from "../runtime/crew-agent-runtime.ts";
5
5
  import type { RolloutEntry, CoherenceMark } from "./decision-ledger.ts";
6
6
  export type { RolloutEntry, CoherenceMark };
7
+ export type { CrewAgentProgress };
7
8
 
8
9
  export type { TeamRunStatus, TeamTaskStatus } from "./contracts.ts";
9
10
 
@@ -25,6 +26,7 @@ export interface VerificationCommandResult {
25
26
  cmd: string;
26
27
  status: "passed" | "failed" | "not_run";
27
28
  exitCode?: number | null;
29
+ durationMs?: number;
28
30
  outputArtifact?: ArtifactDescriptor;
29
31
  }
30
32
 
@@ -156,6 +158,8 @@ export interface CrewAttentionEventData {
156
158
  export interface TeamRunManifest {
157
159
  schemaVersion: 1;
158
160
  runId: string;
161
+ /** pi session ID aligned with run ID for cross-referencing (e.g., "crew-team20260528") */
162
+ sessionId?: string;
159
163
  team: string;
160
164
  workflow?: string;
161
165
  goal: string;
@@ -68,6 +68,7 @@ export default function safeBashExtension(pi: ExtensionAPI): void {
68
68
  "Execute a bash command safely. Blocks dangerous commands like `rm -rf /`, `sudo`, `curl | sh`, etc.",
69
69
  parameters: Type.Object({
70
70
  command: Type.String({ description: "Bash command to execute" }),
71
+ /** Timeout in seconds (optional). Default: no timeout. If exceeded, the command is killed. */
71
72
  timeout: Type.Optional(
72
73
  Type.Number({ description: "Timeout in seconds (optional)" }),
73
74
  ),