pi-crew 0.5.2 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +183 -0
- package/README.md +17 -1
- package/docs/architecture.md +2 -0
- package/docs/bugs/cross-session-notification-leakage.md +82 -0
- package/docs/coding-agent-optimization.md +268 -0
- package/docs/deep-review-report.md +384 -0
- package/docs/distillation/cybersecurity-patterns.md +294 -0
- package/docs/migration-v0.4-v0.5.md +208 -0
- package/docs/optimization-plan.md +642 -0
- package/docs/pi-crew-v0.5.5-audit-fix-plan.md +133 -0
- package/docs/pi-mono-opportunities.md +969 -0
- package/docs/pi-mono-review.md +291 -0
- package/docs/skills/REFERENCE.md +144 -0
- package/package.json +12 -9
- package/skills/artifact-analysis-loop/SKILL.md +302 -0
- package/skills/async-worker-recovery/SKILL.md +19 -1
- package/skills/child-pi-spawning/SKILL.md +19 -6
- package/skills/context-artifact-hygiene/SKILL.md +19 -2
- package/skills/delegation-patterns/SKILL.md +68 -3
- package/skills/detection-pipeline-design/SKILL.md +285 -0
- package/skills/event-log-tracing/SKILL.md +20 -6
- package/skills/git-master/SKILL.md +20 -6
- package/skills/hunting-investigation-loop/SKILL.md +401 -0
- package/skills/incident-playbook-construction/SKILL.md +383 -0
- package/skills/live-agent-lifecycle/SKILL.md +20 -6
- package/skills/mailbox-interactive/SKILL.md +19 -6
- package/skills/model-routing-context/SKILL.md +19 -1
- package/skills/multi-perspective-review/SKILL.md +19 -4
- package/skills/observability-reliability/SKILL.md +19 -2
- package/skills/orchestration/SKILL.md +20 -2
- package/skills/ownership-session-security/SKILL.md +20 -2
- package/skills/pi-extension-lifecycle/SKILL.md +20 -2
- package/skills/post-mortem/SKILL.md +7 -2
- package/skills/read-only-explorer/SKILL.md +20 -6
- package/skills/requirements-to-task-packet/SKILL.md +23 -3
- package/skills/resource-discovery-config/SKILL.md +20 -2
- package/skills/runtime-state-reader/SKILL.md +20 -2
- package/skills/safe-bash/SKILL.md +21 -6
- package/skills/scrutinize/SKILL.md +20 -2
- package/skills/secure-agent-orchestration-review/SKILL.md +29 -2
- package/skills/security-review/SKILL.md +560 -0
- package/skills/state-mutation-locking/SKILL.md +22 -2
- package/skills/systematic-debugging/SKILL.md +8 -6
- package/skills/threat-hypothesis-framework/SKILL.md +175 -0
- package/skills/ui-render-performance/SKILL.md +20 -2
- package/skills/verification-before-done/SKILL.md +17 -2
- package/skills/widget-rendering/SKILL.md +21 -6
- package/skills/workspace-isolation/SKILL.md +20 -6
- package/skills/worktree-isolation/SKILL.md +20 -6
- package/src/agents/agent-config.ts +40 -1
- package/src/benchmark/benchmark-runner.ts +45 -0
- package/src/benchmark/feedback-loop.ts +5 -0
- package/src/config/config.ts +32 -5
- package/src/config/role-tools.ts +82 -0
- package/src/config/suggestions.ts +8 -0
- package/src/config/types.ts +4 -0
- package/src/extension/async-notifier.ts +10 -1
- package/src/extension/crew-cleanup.ts +114 -0
- package/src/extension/cross-extension-rpc.ts +1 -1
- package/src/extension/notification-router.ts +18 -0
- package/src/extension/register.ts +27 -19
- package/src/extension/registration/subagent-tools.ts +1 -1
- package/src/extension/team-tool/anchor.ts +201 -0
- package/src/extension/team-tool/api.ts +2 -1
- package/src/extension/team-tool/auto-summarize.ts +154 -0
- package/src/extension/team-tool/run.ts +42 -7
- package/src/extension/team-tool.ts +44 -2
- package/src/hooks/registry.ts +1 -3
- package/src/observability/event-bus.ts +69 -0
- package/src/observability/event-to-metric.ts +0 -2
- package/src/runtime/anchor-manager.ts +473 -0
- package/src/runtime/async-runner.ts +8 -4
- package/src/runtime/auto-summarize.ts +350 -0
- package/src/runtime/background-runner.ts +10 -3
- package/src/runtime/budget-tracker.ts +354 -0
- package/src/runtime/chain-runner.ts +507 -0
- package/src/runtime/child-pi.ts +123 -35
- package/src/runtime/crash-recovery.ts +5 -4
- package/src/runtime/crew-agent-runtime.ts +1 -0
- package/src/runtime/custom-tools/irc-tool.ts +13 -0
- package/src/runtime/custom-tools/submit-result-tool.ts +3 -2
- package/src/runtime/delivery-coordinator.ts +10 -3
- package/src/runtime/dynamic-script-runner.ts +482 -0
- package/src/runtime/foreground-control.ts +87 -17
- package/src/runtime/handoff-manager.ts +589 -0
- package/src/runtime/hidden-handoff.ts +424 -0
- package/src/runtime/live-agent-manager.ts +20 -4
- package/src/runtime/live-session-runtime.ts +39 -4
- package/src/runtime/manifest-cache.ts +2 -1
- package/src/runtime/model-resolver.ts +16 -4
- package/src/runtime/phase-tracker.ts +373 -0
- package/src/runtime/pi-args.ts +11 -1
- package/src/runtime/pi-json-output.ts +31 -0
- package/src/runtime/pipeline-runner.ts +514 -0
- package/src/runtime/progress-tracker.ts +124 -0
- package/src/runtime/retry-runner.ts +354 -0
- package/src/runtime/sandbox.ts +252 -0
- package/src/runtime/scheduler.ts +7 -2
- package/src/runtime/skill-effectiveness.ts +473 -0
- package/src/runtime/skill-instructions.ts +37 -3
- package/src/runtime/subagent-manager.ts +1 -1
- package/src/runtime/task-graph.ts +11 -1
- package/src/runtime/task-runner.ts +92 -18
- package/src/runtime/team-runner.ts +13 -12
- package/src/runtime/tool-progress.ts +10 -3
- package/src/runtime/verification-gates.ts +367 -0
- package/src/schema/team-tool-schema.ts +37 -0
- package/src/skills/discover-skills.ts +5 -0
- package/src/state/active-run-registry.ts +9 -2
- package/src/state/contracts.ts +9 -0
- package/src/state/crew-init.ts +3 -3
- package/src/state/decision-ledger.ts +98 -55
- package/src/state/event-log-rotation.ts +2 -2
- package/src/state/event-log.ts +144 -10
- package/src/state/hook-instinct-bridge.ts +5 -5
- package/src/state/mailbox.ts +10 -0
- package/src/state/run-cache.ts +18 -8
- package/src/state/state-store.ts +3 -1
- package/src/state/types.ts +4 -0
- package/src/tools/safe-bash-extension.ts +1 -0
- package/src/tools/safe-bash.ts +152 -20
- package/src/types/new-api-types.ts +34 -0
- package/src/ui/agent-management-overlay.ts +5 -1
- package/src/ui/crew-widget.ts +29 -15
- package/src/ui/overlays/mailbox-detail-overlay.ts +13 -2
- package/src/ui/powerbar-publisher.ts +101 -7
- package/src/ui/tool-render.ts +15 -15
- package/src/ui/transcript-cache.ts +13 -0
- package/src/utils/bm25-search.ts +16 -8
- package/src/utils/env-filter.ts +8 -5
- package/src/utils/redaction.ts +169 -15
- package/src/utils/session-utils.ts +52 -0
- package/src/utils/sse-parser.ts +10 -1
- package/src/worktree/cleanup.ts +6 -1
- package/src/worktree/worktree-manager.ts +32 -13
- package/workflows/chain.workflow.md +252 -0
- package/workflows/pipeline.workflow.md +27 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
|
|
2
2
|
import { dirname } from "path";
|
|
3
|
+
import { atomicWriteFile } from "./atomic-write.ts";
|
|
3
4
|
|
|
4
5
|
export interface CoherenceMark {
|
|
5
6
|
matchesPrior: boolean;
|
|
@@ -21,9 +22,12 @@ export interface RolloutEntry {
|
|
|
21
22
|
|
|
22
23
|
/**
|
|
23
24
|
* Get the ledger file path for a given run ID.
|
|
25
|
+
* SECURITY: Accept stateRoot param to use it for path computation
|
|
26
|
+
* instead of hardcoded path, ensuring stateRoot containment.
|
|
24
27
|
*/
|
|
25
|
-
function getLedgerPath(runId: string): string {
|
|
26
|
-
|
|
28
|
+
function getLedgerPath(runId: string, stateRoot?: string): string {
|
|
29
|
+
const base = stateRoot ?? `.crew/state/runs/${runId}`;
|
|
30
|
+
return `${base}/decision-ledger.jsonl`;
|
|
27
31
|
}
|
|
28
32
|
|
|
29
33
|
/**
|
|
@@ -44,19 +48,19 @@ function computeCoherence(entry: RolloutEntry, ledger: RolloutEntry[]): Coherenc
|
|
|
44
48
|
entry.decisionMark === previousEntry.decisionMark ||
|
|
45
49
|
Boolean(entry.priorWinner && entry.topCandidates.includes(entry.priorWinner));
|
|
46
50
|
|
|
47
|
-
// Check last
|
|
48
|
-
const recentEntries = ledger.slice(-
|
|
51
|
+
// Check last 10 entries for recursive pattern
|
|
52
|
+
const recentEntries = ledger.slice(-10);
|
|
49
53
|
const recentDecisions = recentEntries.map((e) => e.decisionMark);
|
|
50
54
|
const currentDecision = entry.decisionMark;
|
|
51
55
|
|
|
52
56
|
const recursiveMatches = recentDecisions.filter((d) => d === currentDecision).length;
|
|
53
|
-
const matchesRecursive = recursiveMatches >= 2;
|
|
57
|
+
const matchesRecursive = recursiveMatches >= Math.ceil(recentDecisions.length / 2); // At least half match
|
|
54
58
|
|
|
55
59
|
const promotionAllowed = matchesPrior || matchesRecursive;
|
|
56
60
|
|
|
57
61
|
let reason: string;
|
|
58
62
|
if (matchesPrior && matchesRecursive) {
|
|
59
|
-
reason = `Matches prior winner and recursive pattern (${recursiveMatches}
|
|
63
|
+
reason = `Matches prior winner and recursive pattern (${recursiveMatches}/${recentDecisions.length} recent decisions)`;
|
|
60
64
|
} else if (matchesPrior) {
|
|
61
65
|
reason = `Matches prior winner decision`;
|
|
62
66
|
} else if (matchesRecursive) {
|
|
@@ -94,29 +98,31 @@ export function initLedger(runId: string): void {
|
|
|
94
98
|
/**
|
|
95
99
|
* Append a new entry to the decision ledger.
|
|
96
100
|
* Automatically computes and adds coherence marks.
|
|
101
|
+
* FIX: Uses atomic write to prevent partial writes on crash.
|
|
97
102
|
*/
|
|
98
103
|
export function appendEntry(runId: string, entry: RolloutEntry): RolloutEntry {
|
|
99
|
-
const ledgerPath = getLedgerPath(runId);
|
|
100
|
-
|
|
101
104
|
// Ensure directory exists
|
|
105
|
+
const ledgerPath = getLedgerPath(runId);
|
|
102
106
|
const dir = dirname(ledgerPath);
|
|
103
107
|
if (!existsSync(dir)) {
|
|
104
108
|
mkdirSync(dir, { recursive: true });
|
|
105
109
|
}
|
|
106
110
|
|
|
107
|
-
// Get existing entries to compute coherence
|
|
111
|
+
// Get existing entries to compute coherence (and use same result for write)
|
|
108
112
|
const ledger = getLedger(runId);
|
|
109
113
|
|
|
110
|
-
// Compute coherence
|
|
114
|
+
// Compute coherence
|
|
111
115
|
const coherenceMark = computeCoherence(entry, ledger);
|
|
112
116
|
const entryWithCoherence: RolloutEntry = {
|
|
113
117
|
...entry,
|
|
114
118
|
coherenceMark,
|
|
115
119
|
};
|
|
116
120
|
|
|
117
|
-
// Append to JSONL file
|
|
121
|
+
// Append to JSONL file using atomic write to prevent corruption
|
|
122
|
+
// Use the already-loaded ledger content (no double-read)
|
|
118
123
|
const line = JSON.stringify(entryWithCoherence) + "\n";
|
|
119
|
-
|
|
124
|
+
const existingContent = ledger.length > 0 ? ledger.map((e) => JSON.stringify(e)).join("\n") + "\n" : "";
|
|
125
|
+
atomicWriteFile(ledgerPath, existingContent + line);
|
|
120
126
|
return entryWithCoherence;
|
|
121
127
|
}
|
|
122
128
|
|
|
@@ -218,78 +224,115 @@ export function summarizeLedger(runId: string): string {
|
|
|
218
224
|
return lines.join("\n");
|
|
219
225
|
}
|
|
220
226
|
|
|
227
|
+
/**
|
|
228
|
+
* Override the coherence mark of the last entry in the ledger.
|
|
229
|
+
* FIX: This preserves all previous entries while updating just the last one.
|
|
230
|
+
* Previously this would truncate the entire ledger!
|
|
231
|
+
*/
|
|
232
|
+
function overrideLastEntry(runId: string, coherenceMark: import("./types.js").CoherenceMark): RolloutEntry {
|
|
233
|
+
const ledger = getLedger(runId);
|
|
234
|
+
if (ledger.length === 0) {
|
|
235
|
+
throw new Error(`No ledger entries found for run ${runId}`);
|
|
236
|
+
}
|
|
237
|
+
// Update the last entry with the new coherence mark
|
|
238
|
+
const lastIndex = ledger.length - 1;
|
|
239
|
+
ledger[lastIndex] = { ...ledger[lastIndex], coherenceMark };
|
|
240
|
+
// Rewrite entire ledger to preserve all entries
|
|
241
|
+
const ledgerPath = getLedgerPath(runId);
|
|
242
|
+
atomicWriteFile(ledgerPath, ledger.map((e) => JSON.stringify(e)).join("\n") + "\n");
|
|
243
|
+
return ledger[lastIndex];
|
|
244
|
+
}
|
|
245
|
+
|
|
221
246
|
/**
|
|
222
247
|
* Promote a candidate by marking it as accepted with proper coherence.
|
|
223
248
|
*/
|
|
224
249
|
export function promoteCandidate(runId: string, candidate: string): RolloutEntry {
|
|
225
250
|
const latestDecision = getLatestDecision(runId);
|
|
226
251
|
|
|
227
|
-
|
|
252
|
+
// Get existing entries to compute proper coherence
|
|
253
|
+
const ledger = getLedger(runId);
|
|
254
|
+
|
|
255
|
+
// Create entry without coherence first
|
|
256
|
+
const entryWithoutCoherence = {
|
|
228
257
|
rolloutId: `promote-${Date.now()}`,
|
|
229
258
|
timestamp: new Date().toISOString(),
|
|
230
259
|
priorWinner: latestDecision?.topCandidates[0],
|
|
231
260
|
searchSpace: latestDecision?.searchSpace || "unknown",
|
|
232
261
|
trialCount: (latestDecision?.trialCount || 0) + 1,
|
|
233
262
|
topCandidates: [candidate],
|
|
234
|
-
decisionMark: "accept",
|
|
235
|
-
coherenceMark: {
|
|
236
|
-
matchesPrior: false,
|
|
237
|
-
matchesRecursive: false,
|
|
238
|
-
promotionAllowed: true,
|
|
239
|
-
reason: "Manual promotion by user",
|
|
240
|
-
},
|
|
263
|
+
decisionMark: "accept" as const,
|
|
241
264
|
};
|
|
242
265
|
|
|
243
|
-
//
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
266
|
+
// Compute coherence (empty ledger = no matches)
|
|
267
|
+
const coherenceMark = computeCoherence(entryWithoutCoherence as RolloutEntry, ledger);
|
|
268
|
+
|
|
269
|
+
// Manual promotion always allows further promotion
|
|
270
|
+
coherenceMark.promotionAllowed = true;
|
|
271
|
+
coherenceMark.reason = "Manual promotion - promotion allowed";
|
|
272
|
+
|
|
273
|
+
// Create full entry with coherence
|
|
274
|
+
const entry: RolloutEntry = {
|
|
275
|
+
...entryWithoutCoherence,
|
|
276
|
+
coherenceMark,
|
|
250
277
|
};
|
|
251
|
-
|
|
252
|
-
//
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
278
|
+
|
|
279
|
+
// Always push new entry (append-only pattern)
|
|
280
|
+
ledger.push(entry);
|
|
281
|
+
|
|
282
|
+
// Rewrite entire ledger atomically to preserve all entries
|
|
283
|
+
const ledgerPath = getLedgerPath(runId);
|
|
284
|
+
const dir = dirname(ledgerPath);
|
|
285
|
+
if (!existsSync(dir)) {
|
|
286
|
+
mkdirSync(dir, { recursive: true });
|
|
287
|
+
}
|
|
288
|
+
atomicWriteFile(ledgerPath, ledger.map((e) => JSON.stringify(e)).join("\n") + "\n");
|
|
289
|
+
|
|
290
|
+
return entry;
|
|
257
291
|
}
|
|
258
292
|
|
|
259
293
|
/**
|
|
260
|
-
* Decay a candidate by marking it as
|
|
294
|
+
* Decay a candidate by marking it as accepted with proper coherence.
|
|
261
295
|
*/
|
|
262
296
|
export function decayCandidate(runId: string, candidate: string): RolloutEntry {
|
|
263
297
|
const latestDecision = getLatestDecision(runId);
|
|
264
298
|
|
|
265
|
-
|
|
299
|
+
// Get existing entries to compute proper coherence
|
|
300
|
+
const ledger = getLedger(runId);
|
|
301
|
+
|
|
302
|
+
// Create entry without coherence first
|
|
303
|
+
const entryWithoutCoherence = {
|
|
266
304
|
rolloutId: `decay-${Date.now()}`,
|
|
267
305
|
timestamp: new Date().toISOString(),
|
|
268
306
|
priorWinner: latestDecision?.topCandidates[0],
|
|
269
307
|
searchSpace: latestDecision?.searchSpace || "unknown",
|
|
270
308
|
trialCount: (latestDecision?.trialCount || 0) + 1,
|
|
271
309
|
topCandidates: [candidate],
|
|
272
|
-
decisionMark: "decay",
|
|
273
|
-
coherenceMark: {
|
|
274
|
-
matchesPrior: false,
|
|
275
|
-
matchesRecursive: false,
|
|
276
|
-
promotionAllowed: false,
|
|
277
|
-
reason: "Manual decay by user",
|
|
278
|
-
},
|
|
310
|
+
decisionMark: "decay" as const,
|
|
279
311
|
};
|
|
280
312
|
|
|
281
|
-
//
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
313
|
+
// Compute coherence (empty ledger = no matches)
|
|
314
|
+
const coherenceMark = computeCoherence(entryWithoutCoherence as RolloutEntry, ledger);
|
|
315
|
+
|
|
316
|
+
// Manual decay never allows promotion
|
|
317
|
+
coherenceMark.promotionAllowed = false;
|
|
318
|
+
coherenceMark.reason = "Manual decay - promotion not allowed";
|
|
319
|
+
|
|
320
|
+
// Create full entry with coherence
|
|
321
|
+
const entry: RolloutEntry = {
|
|
322
|
+
...entryWithoutCoherence,
|
|
323
|
+
coherenceMark,
|
|
288
324
|
};
|
|
289
|
-
|
|
290
|
-
//
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
325
|
+
|
|
326
|
+
// Always push new entry (append-only pattern)
|
|
327
|
+
ledger.push(entry);
|
|
328
|
+
|
|
329
|
+
// Rewrite entire ledger to preserve all entries
|
|
330
|
+
const ledgerPath = getLedgerPath(runId);
|
|
331
|
+
const dir = dirname(ledgerPath);
|
|
332
|
+
if (!existsSync(dir)) {
|
|
333
|
+
mkdirSync(dir, { recursive: true });
|
|
334
|
+
}
|
|
335
|
+
atomicWriteFile(ledgerPath, ledger.map((e) => JSON.stringify(e)).join("\n") + "\n");
|
|
336
|
+
|
|
337
|
+
return entry;
|
|
295
338
|
}
|
|
@@ -209,9 +209,9 @@ export function getEventLogStats(eventsPath: string): EventLogStats | undefined
|
|
|
209
209
|
if (newlineCount === 0) firstLineBytes = offset + i + 1;
|
|
210
210
|
newlineCount++;
|
|
211
211
|
}
|
|
212
|
-
}
|
|
213
|
-
offset += bytesRead;
|
|
214
212
|
}
|
|
213
|
+
offset += bytesRead;
|
|
214
|
+
}
|
|
215
215
|
} finally {
|
|
216
216
|
fs.closeSync(scanFd);
|
|
217
217
|
}
|
package/src/state/event-log.ts
CHANGED
|
@@ -63,12 +63,17 @@ let appendCounter = 0;
|
|
|
63
63
|
|
|
64
64
|
/** Simple cross-process lock for an eventsPath to prevent JSONL interleave on concurrent append.
|
|
65
65
|
* Detects stale locks by checking the owner PID written inside the lock directory.
|
|
66
|
+
*
|
|
67
|
+
* @deprecated Prefer `appendEventAsync()` for callers in async contexts. The sync lock
|
|
68
|
+
* uses `sleepSync` which blocks the event loop and prevents AbortSignal handlers from firing.
|
|
66
69
|
*/
|
|
67
70
|
export function withEventLogLockSync<T>(eventsPath: string, fn: () => T): T {
|
|
71
|
+
// Ensure parent directory exists before attempting lock
|
|
72
|
+
fs.mkdirSync(path.dirname(eventsPath), { recursive: true });
|
|
68
73
|
const lockDir = `${eventsPath}.lock`;
|
|
69
74
|
const pidFile = path.join(lockDir, "pid");
|
|
70
75
|
const start = Date.now();
|
|
71
|
-
const timeout =
|
|
76
|
+
const timeout = 120000; // 120s timeout for slow CI environments
|
|
72
77
|
const staleMs = 10000;
|
|
73
78
|
let acquired = false;
|
|
74
79
|
while (true) {
|
|
@@ -79,6 +84,8 @@ export function withEventLogLockSync<T>(eventsPath: string, fn: () => T): T {
|
|
|
79
84
|
break;
|
|
80
85
|
} catch {
|
|
81
86
|
if (Date.now() - start > timeout) {
|
|
87
|
+
// Log error and continue without lock — lock is held by live process.
|
|
88
|
+
// Stale detection will clean up dead locks on next attempt.
|
|
82
89
|
logInternalError("event-log.lock-timeout", new Error(`Event log lock timeout for ${eventsPath}`), `lockDir=${lockDir}`);
|
|
83
90
|
break;
|
|
84
91
|
}
|
|
@@ -112,9 +119,15 @@ export function withEventLogLockSync<T>(eventsPath: string, fn: () => T): T {
|
|
|
112
119
|
}
|
|
113
120
|
}
|
|
114
121
|
|
|
115
|
-
function
|
|
116
|
-
|
|
117
|
-
|
|
122
|
+
function evictOldestSequenceCacheEntries(): void {
|
|
123
|
+
// Batch evict oldest 50% of entries when cache is full
|
|
124
|
+
const toEvict = Math.ceil(MAX_SEQUENCE_CACHE_ENTRIES / 2);
|
|
125
|
+
let evicted = 0;
|
|
126
|
+
for (const key of sequenceCache.keys()) {
|
|
127
|
+
if (evicted >= toEvict) break;
|
|
128
|
+
sequenceCache.delete(key);
|
|
129
|
+
evicted++;
|
|
130
|
+
}
|
|
118
131
|
}
|
|
119
132
|
|
|
120
133
|
export function sequencePath(eventsPath: string): string {
|
|
@@ -174,10 +187,116 @@ export function computeEventFingerprint(event: Pick<TeamEvent, "type" | "runId"
|
|
|
174
187
|
return createHash("sha256").update(JSON.stringify({ type: event.type, runId: event.runId, taskId: event.taskId, data: event.data ?? null })).digest("hex").slice(0, 16);
|
|
175
188
|
}
|
|
176
189
|
|
|
190
|
+
/**
|
|
191
|
+
* @deprecated Prefer `appendEventAsync()` in async contexts. The sync lock uses
|
|
192
|
+
* `sleepSync` which blocks the Node.js event loop, preventing AbortSignal handlers
|
|
193
|
+
* from firing and degrading live-agent responsiveness.
|
|
194
|
+
*/
|
|
177
195
|
export function appendEvent(eventsPath: string, event: AppendTeamEvent): TeamEvent {
|
|
178
196
|
return withEventLogLockSync(eventsPath, () => appendEventInsideLock(eventsPath, event));
|
|
179
197
|
}
|
|
180
198
|
|
|
199
|
+
// --- Async write queue (non-blocking alternative to withEventLogLockSync) ---
|
|
200
|
+
const asyncQueues = new Map<string, Promise<unknown>>();
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Append an event to the event log using non-blocking async I/O.
|
|
204
|
+
*
|
|
205
|
+
* Uses a per-eventsPath promise-chain queue to ensure sequential writes without
|
|
206
|
+
* blocking the Node.js event loop. This allows AbortSignal handlers and other
|
|
207
|
+
* async operations to proceed while events are being persisted.
|
|
208
|
+
*
|
|
209
|
+
* For callers that are already in an async context (team-runner, task-runner,
|
|
210
|
+
* foreground-control, etc.), prefer this over the sync `appendEvent()`.
|
|
211
|
+
*/
|
|
212
|
+
export async function appendEventAsync(eventsPath: string, event: AppendTeamEvent): Promise<TeamEvent> {
|
|
213
|
+
const queueKey = eventsPath;
|
|
214
|
+
const prev = asyncQueues.get(queueKey) ?? Promise.resolve();
|
|
215
|
+
const next = prev.then(async (): Promise<TeamEvent> => {
|
|
216
|
+
// Ensure directory exists
|
|
217
|
+
await fs.promises.mkdir(path.dirname(eventsPath), { recursive: true });
|
|
218
|
+
|
|
219
|
+
// Build metadata (same logic as appendEventInsideLock)
|
|
220
|
+
const baseMetadata = event.metadata;
|
|
221
|
+
let metadata: TeamEventMetadata = {
|
|
222
|
+
seq: baseMetadata?.seq ?? nextSequence(eventsPath),
|
|
223
|
+
provenance: baseMetadata?.provenance ?? "team_runner",
|
|
224
|
+
...(baseMetadata?.parentEventId ? { parentEventId: baseMetadata.parentEventId } : {}),
|
|
225
|
+
...(baseMetadata?.attemptId ? { attemptId: baseMetadata.attemptId } : {}),
|
|
226
|
+
...(baseMetadata?.branchId ? { branchId: baseMetadata.branchId } : {}),
|
|
227
|
+
...(baseMetadata?.causationId ? { causationId: baseMetadata.causationId } : {}),
|
|
228
|
+
...(baseMetadata?.correlationId ? { correlationId: baseMetadata.correlationId } : {}),
|
|
229
|
+
...(baseMetadata?.sessionIdentity ? { sessionIdentity: baseMetadata.sessionIdentity } : {}),
|
|
230
|
+
...(baseMetadata?.ownership ? { ownership: baseMetadata.ownership } : {}),
|
|
231
|
+
...(baseMetadata?.nudgeId ? { nudgeId: baseMetadata.nudgeId } : {}),
|
|
232
|
+
...(baseMetadata?.confidence ? { confidence: baseMetadata.confidence } : {}),
|
|
233
|
+
};
|
|
234
|
+
const fullEvent: TeamEvent = {
|
|
235
|
+
time: new Date().toISOString(),
|
|
236
|
+
...event,
|
|
237
|
+
metadata,
|
|
238
|
+
};
|
|
239
|
+
if (baseMetadata?.fingerprint || TERMINAL_EVENT_TYPES.has(fullEvent.type)) {
|
|
240
|
+
metadata = { ...metadata, fingerprint: baseMetadata?.fingerprint ?? computeEventFingerprint(fullEvent) };
|
|
241
|
+
fullEvent.metadata = metadata;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Overflow handling: same logic as sync path
|
|
245
|
+
const isTerminal = TERMINAL_EVENT_TYPES.has(fullEvent.type);
|
|
246
|
+
let skippedDueToSize = false;
|
|
247
|
+
if (!isTerminal && fs.existsSync(eventsPath)) {
|
|
248
|
+
const stat = fs.statSync(eventsPath);
|
|
249
|
+
if (stat.size > MAX_EVENTS_BYTES) {
|
|
250
|
+
try {
|
|
251
|
+
compactEventLog(eventsPath);
|
|
252
|
+
} catch (error) {
|
|
253
|
+
logInternalError("event-log.immediate-compact", error, `eventsPath=${eventsPath}`);
|
|
254
|
+
}
|
|
255
|
+
if (fs.existsSync(eventsPath)) {
|
|
256
|
+
const afterCompact = fs.statSync(eventsPath);
|
|
257
|
+
if (afterCompact.size > MAX_EVENTS_BYTES) {
|
|
258
|
+
rotateEventLog(eventsPath);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
try {
|
|
264
|
+
if (fs.existsSync(eventsPath) && fs.statSync(eventsPath).size > MAX_EVENTS_BYTES) {
|
|
265
|
+
logInternalError("event-log.size-limit", new Error(`events file ${eventsPath} exceeds ${MAX_EVENTS_BYTES} bytes after compaction`), `eventsPath=${eventsPath}`);
|
|
266
|
+
skippedDueToSize = true;
|
|
267
|
+
}
|
|
268
|
+
} catch (error) {
|
|
269
|
+
logInternalError("event-log.size-check", error, `eventsPath=${eventsPath}`);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if (!skippedDueToSize) {
|
|
273
|
+
const line = JSON.stringify(redactSecrets(fullEvent)) + "\n";
|
|
274
|
+
await fs.promises.appendFile(eventsPath, line, { encoding: "utf-8" });
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
appendCounter++;
|
|
278
|
+
if (appendCounter % 100 === 0 && needsRotation(eventsPath)) {
|
|
279
|
+
try { compactEventLog(eventsPath); } catch (error) { logInternalError("event-log.rotation", error, `eventsPath=${eventsPath}`); }
|
|
280
|
+
}
|
|
281
|
+
try { emitFromTeamEvent(fullEvent); } catch (error) { logInternalError("event-log.emit", error); }
|
|
282
|
+
|
|
283
|
+
const seq = fullEvent.metadata?.seq ?? 0;
|
|
284
|
+
try {
|
|
285
|
+
const stat = fs.statSync(eventsPath);
|
|
286
|
+
if (sequenceCache.size >= MAX_SEQUENCE_CACHE_ENTRIES) {
|
|
287
|
+
evictOldestSequenceCacheEntries();
|
|
288
|
+
}
|
|
289
|
+
sequenceCache.set(eventsPath, { size: stat.size, mtimeMs: stat.mtimeMs, seq });
|
|
290
|
+
persistSequence(eventsPath, seq);
|
|
291
|
+
} catch (error) {
|
|
292
|
+
logInternalError("event-log.persist-sequence", error, `eventsPath=${eventsPath}`);
|
|
293
|
+
}
|
|
294
|
+
return fullEvent;
|
|
295
|
+
});
|
|
296
|
+
asyncQueues.set(queueKey, next.catch((error) => { logInternalError("event-log.async-queue", error, eventsPath); asyncQueues.delete(queueKey); }));
|
|
297
|
+
return next;
|
|
298
|
+
}
|
|
299
|
+
|
|
181
300
|
/**
|
|
182
301
|
* Body of `appendEvent` assuming the caller already holds
|
|
183
302
|
* `withEventLogLockSync` for `eventsPath`. Used by `appendEventBuffered` to
|
|
@@ -254,7 +373,7 @@ function appendEventInsideLock(eventsPath: string, event: AppendTeamEvent): Team
|
|
|
254
373
|
try {
|
|
255
374
|
const stat = fs.statSync(eventsPath);
|
|
256
375
|
if (sequenceCache.size >= MAX_SEQUENCE_CACHE_ENTRIES) {
|
|
257
|
-
|
|
376
|
+
evictOldestSequenceCacheEntries();
|
|
258
377
|
}
|
|
259
378
|
sequenceCache.set(eventsPath, { size: stat.size, mtimeMs: stat.mtimeMs, seq });
|
|
260
379
|
persistSequence(eventsPath, seq);
|
|
@@ -283,6 +402,12 @@ const bufferedTimers = new Map<string, ReturnType<typeof setTimeout>>();
|
|
|
283
402
|
const DEFAULT_BUFFER_MS = 20;
|
|
284
403
|
|
|
285
404
|
export function appendEventBuffered(eventsPath: string, event: AppendTeamEvent, bufferMs = DEFAULT_BUFFER_MS): Promise<TeamEvent> {
|
|
405
|
+
// FIX: Terminal events must bypass buffer to ensure they're written immediately.
|
|
406
|
+
// Previously, terminal events like task.failed could be lost on process crash.
|
|
407
|
+
if (TERMINAL_EVENT_TYPES.has(event.type)) {
|
|
408
|
+
// For terminal events, write synchronously to ensure durability
|
|
409
|
+
return Promise.resolve(appendEvent(eventsPath, event));
|
|
410
|
+
}
|
|
286
411
|
return new Promise<TeamEvent>((resolve, reject) => {
|
|
287
412
|
const queue = bufferedQueues.get(eventsPath) ?? [];
|
|
288
413
|
queue.push({ event, resolve, reject });
|
|
@@ -300,8 +425,16 @@ function flushOneEventLogBuffer(eventsPath: string): void {
|
|
|
300
425
|
bufferedQueues.delete(eventsPath);
|
|
301
426
|
const timer = bufferedTimers.get(eventsPath);
|
|
302
427
|
if (timer) clearTimeout(timer);
|
|
428
|
+
// MEDIUM-13: Delete timer entry only after successful flush (in finally block)
|
|
303
429
|
bufferedTimers.delete(eventsPath);
|
|
304
430
|
if (!queue || queue.length === 0) return;
|
|
431
|
+
|
|
432
|
+
// HIGH-10: Clean up queue if it exceeds limit to prevent unbounded growth
|
|
433
|
+
if (queue.length > 1000) {
|
|
434
|
+
// Keep only the last 500 entries
|
|
435
|
+
queue.splice(0, queue.length - 500);
|
|
436
|
+
}
|
|
437
|
+
|
|
305
438
|
try {
|
|
306
439
|
withEventLogLockSync(eventsPath, () => {
|
|
307
440
|
for (const item of queue) {
|
|
@@ -325,12 +458,13 @@ export function flushEventLogBuffer(): void {
|
|
|
325
458
|
}
|
|
326
459
|
|
|
327
460
|
/**
|
|
328
|
-
*
|
|
329
|
-
* the
|
|
330
|
-
* (high-frequency `task.progress`).
|
|
461
|
+
* Schedule an async event append without waiting for the result.
|
|
462
|
+
* Uses the non-blocking async queue to avoid blocking the event loop.
|
|
463
|
+
* Use only for events whose return value is ignored (high-frequency `task.progress`).
|
|
464
|
+
* Errors are logged via logInternalError.
|
|
331
465
|
*/
|
|
332
|
-
export function appendEventFireAndForget(eventsPath: string, event: AppendTeamEvent,
|
|
333
|
-
|
|
466
|
+
export function appendEventFireAndForget(eventsPath: string, event: AppendTeamEvent, _bufferMs = DEFAULT_BUFFER_MS): void {
|
|
467
|
+
appendEventAsync(eventsPath, event).catch((error) => logInternalError("event-log.fire-and-forget", error, eventsPath));
|
|
334
468
|
}
|
|
335
469
|
|
|
336
470
|
// Auto-flush on process exit so buffered events do not silently leak.
|
|
@@ -6,13 +6,13 @@
|
|
|
6
6
|
import { crewHooks } from "../runtime/crew-hooks.ts";
|
|
7
7
|
|
|
8
8
|
// Lazy-initialized store and paths
|
|
9
|
-
let storeInstance: import("./instinct-store").InstinctStore | null = null;
|
|
10
|
-
let pathsInstance: typeof import("../utils/paths") | null = null;
|
|
9
|
+
let storeInstance: import("./instinct-store.js").InstinctStore | null = null;
|
|
10
|
+
let pathsInstance: typeof import("../utils/paths.js") | null = null;
|
|
11
11
|
|
|
12
12
|
async function getStore() {
|
|
13
13
|
if (!storeInstance) {
|
|
14
|
-
const { InstinctStore } = await import("./instinct-store");
|
|
15
|
-
const paths = await import("../utils/paths");
|
|
14
|
+
const { InstinctStore } = await import("./instinct-store.js");
|
|
15
|
+
const paths = await import("../utils/paths.js");
|
|
16
16
|
storeInstance = new InstinctStore(paths.projectCrewRoot(process.cwd()));
|
|
17
17
|
}
|
|
18
18
|
return storeInstance;
|
|
@@ -20,7 +20,7 @@ async function getStore() {
|
|
|
20
20
|
|
|
21
21
|
async function getPaths() {
|
|
22
22
|
if (!pathsInstance) {
|
|
23
|
-
pathsInstance = await import("../utils/paths");
|
|
23
|
+
pathsInstance = await import("../utils/paths.js");
|
|
24
24
|
}
|
|
25
25
|
return pathsInstance;
|
|
26
26
|
}
|
package/src/state/mailbox.ts
CHANGED
|
@@ -289,6 +289,16 @@ export function readDeliveryState(manifest: TeamRunManifest): MailboxDeliverySta
|
|
|
289
289
|
|
|
290
290
|
function writeDeliveryState(manifest: TeamRunManifest, state: MailboxDeliveryState): void {
|
|
291
291
|
ensureRunMailbox(manifest);
|
|
292
|
+
// Prune oldest entries if capped
|
|
293
|
+
const MAX_DELIVERY_MESSAGES = 10000;
|
|
294
|
+
if (Object.keys(state.messages).length > MAX_DELIVERY_MESSAGES) {
|
|
295
|
+
const sorted = Object.entries(state.messages).sort(([, a], [, b]) => {
|
|
296
|
+
const order = { queued: 0, delivered: 1, acknowledged: 2 };
|
|
297
|
+
return (order[a] ?? 3) - (order[b] ?? 3);
|
|
298
|
+
});
|
|
299
|
+
const trimmed = sorted.slice(0, MAX_DELIVERY_MESSAGES);
|
|
300
|
+
state.messages = Object.fromEntries(trimmed);
|
|
301
|
+
}
|
|
292
302
|
atomicWriteFile(deliveryFile(manifest, true), `${JSON.stringify(redactSecrets(state), null, 2)}\n`);
|
|
293
303
|
}
|
|
294
304
|
|
package/src/state/run-cache.ts
CHANGED
|
@@ -3,6 +3,8 @@ import * as path from "node:path";
|
|
|
3
3
|
import * as crypto from "node:crypto";
|
|
4
4
|
import { projectCrewRoot } from "../utils/paths.ts";
|
|
5
5
|
import type { TeamTaskState } from "./types.ts";
|
|
6
|
+
import { atomicWriteFile } from "./atomic-write.ts";
|
|
7
|
+
import { withFileLockSync } from "./locks.ts";
|
|
6
8
|
|
|
7
9
|
const DEFAULT_CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
8
10
|
|
|
@@ -31,6 +33,7 @@ export function computeRunCacheKey(goal: string, team: string, workflow: string,
|
|
|
31
33
|
.update(normalized)
|
|
32
34
|
.update(team)
|
|
33
35
|
.update(workflow)
|
|
36
|
+
.update(_cwd)
|
|
34
37
|
.digest("hex")
|
|
35
38
|
.slice(0, 16);
|
|
36
39
|
}
|
|
@@ -61,12 +64,15 @@ export function getCachedRun(cwd: string, cacheKey: string): CacheEntry | null {
|
|
|
61
64
|
const entry = JSON.parse(fs.readFileSync(entryPath, "utf-8")) as CacheEntry;
|
|
62
65
|
|
|
63
66
|
if (Date.now() > entry.expiresAt) {
|
|
64
|
-
// Remove expired entry
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
67
|
+
// Remove expired entry — use lock + atomic write to prevent index corruption
|
|
68
|
+
withFileLockSync(indexPath, () => {
|
|
69
|
+
try {
|
|
70
|
+
fs.unlinkSync(entryPath);
|
|
71
|
+
} catch { /* ignore */ }
|
|
72
|
+
const updatedIndex = JSON.parse(fs.readFileSync(indexPath, "utf-8")) as CacheIndex;
|
|
73
|
+
delete updatedIndex[cacheKey];
|
|
74
|
+
atomicWriteFile(indexPath, JSON.stringify(updatedIndex));
|
|
75
|
+
});
|
|
70
76
|
return null;
|
|
71
77
|
}
|
|
72
78
|
|
|
@@ -109,14 +115,18 @@ export function saveRunToCache(
|
|
|
109
115
|
const entryPath = path.join(dir, `${cacheKey}.json`);
|
|
110
116
|
fs.writeFileSync(entryPath, JSON.stringify(entry), "utf-8");
|
|
111
117
|
|
|
112
|
-
// Update index
|
|
118
|
+
// Update index with atomic write: write to temp file then rename
|
|
113
119
|
const indexPath = path.join(dir, "index.json");
|
|
114
120
|
const index: CacheIndex = fs.existsSync(indexPath)
|
|
115
121
|
? JSON.parse(fs.readFileSync(indexPath, "utf-8"))
|
|
116
122
|
: {};
|
|
117
123
|
|
|
118
124
|
index[cacheKey] = entryPath;
|
|
119
|
-
|
|
125
|
+
|
|
126
|
+
// Atomic write: write to temp file first, then rename
|
|
127
|
+
const tempPath = path.join(dir, "index.json.tmp");
|
|
128
|
+
fs.writeFileSync(tempPath, JSON.stringify(index), "utf-8");
|
|
129
|
+
fs.renameSync(tempPath, indexPath);
|
|
120
130
|
}
|
|
121
131
|
|
|
122
132
|
/**
|
package/src/state/state-store.ts
CHANGED
|
@@ -12,6 +12,7 @@ import { assertSafePathId, resolveContainedRelativePath, resolveRealContainedPat
|
|
|
12
12
|
import { withRunLock } from "./locks.ts";
|
|
13
13
|
import type { TeamConfig } from "../teams/team-config.ts";
|
|
14
14
|
import type { WorkflowConfig } from "../workflows/workflow-config.ts";
|
|
15
|
+
import { toPiSessionId } from "../utils/session-utils.ts";
|
|
15
16
|
|
|
16
17
|
export interface RunPaths {
|
|
17
18
|
runId: string;
|
|
@@ -32,7 +33,7 @@ interface ManifestCacheEntry {
|
|
|
32
33
|
cachedAt?: number;
|
|
33
34
|
}
|
|
34
35
|
|
|
35
|
-
const MANIFEST_CACHE_TTL_MS =
|
|
36
|
+
const MANIFEST_CACHE_TTL_MS = 30 * 1000; // 30 seconds (FIX: reduced from 5 minutes for faster state updates)
|
|
36
37
|
const manifestCache = new Map<string, ManifestCacheEntry>();
|
|
37
38
|
|
|
38
39
|
function setManifestCache(stateRoot: string, entry: ManifestCacheEntry): void {
|
|
@@ -148,6 +149,7 @@ export function createRunManifest(params: {
|
|
|
148
149
|
const manifest: TeamRunManifest = {
|
|
149
150
|
schemaVersion: 1,
|
|
150
151
|
runId: paths.runId,
|
|
152
|
+
sessionId: toPiSessionId(paths.runId),
|
|
151
153
|
team: params.team.name,
|
|
152
154
|
workflow: params.workflow?.name,
|
|
153
155
|
goal: params.goal,
|
package/src/state/types.ts
CHANGED
|
@@ -4,6 +4,7 @@ import type { WorkerHeartbeatState } from "../runtime/worker-heartbeat.ts";
|
|
|
4
4
|
import type { CrewAgentProgress } from "../runtime/crew-agent-runtime.ts";
|
|
5
5
|
import type { RolloutEntry, CoherenceMark } from "./decision-ledger.ts";
|
|
6
6
|
export type { RolloutEntry, CoherenceMark };
|
|
7
|
+
export type { CrewAgentProgress };
|
|
7
8
|
|
|
8
9
|
export type { TeamRunStatus, TeamTaskStatus } from "./contracts.ts";
|
|
9
10
|
|
|
@@ -25,6 +26,7 @@ export interface VerificationCommandResult {
|
|
|
25
26
|
cmd: string;
|
|
26
27
|
status: "passed" | "failed" | "not_run";
|
|
27
28
|
exitCode?: number | null;
|
|
29
|
+
durationMs?: number;
|
|
28
30
|
outputArtifact?: ArtifactDescriptor;
|
|
29
31
|
}
|
|
30
32
|
|
|
@@ -156,6 +158,8 @@ export interface CrewAttentionEventData {
|
|
|
156
158
|
export interface TeamRunManifest {
|
|
157
159
|
schemaVersion: 1;
|
|
158
160
|
runId: string;
|
|
161
|
+
/** pi session ID aligned with run ID for cross-referencing (e.g., "crew-team20260528") */
|
|
162
|
+
sessionId?: string;
|
|
159
163
|
team: string;
|
|
160
164
|
workflow?: string;
|
|
161
165
|
goal: string;
|
|
@@ -68,6 +68,7 @@ export default function safeBashExtension(pi: ExtensionAPI): void {
|
|
|
68
68
|
"Execute a bash command safely. Blocks dangerous commands like `rm -rf /`, `sudo`, `curl | sh`, etc.",
|
|
69
69
|
parameters: Type.Object({
|
|
70
70
|
command: Type.String({ description: "Bash command to execute" }),
|
|
71
|
+
/** Timeout in seconds (optional). Default: no timeout. If exceeded, the command is killed. */
|
|
71
72
|
timeout: Type.Optional(
|
|
72
73
|
Type.Number({ description: "Timeout in seconds (optional)" }),
|
|
73
74
|
),
|