pi-crew 0.5.2 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/CHANGELOG.md +183 -0
  2. package/README.md +17 -1
  3. package/docs/architecture.md +2 -0
  4. package/docs/bugs/cross-session-notification-leakage.md +82 -0
  5. package/docs/coding-agent-optimization.md +268 -0
  6. package/docs/deep-review-report.md +384 -0
  7. package/docs/distillation/cybersecurity-patterns.md +294 -0
  8. package/docs/migration-v0.4-v0.5.md +208 -0
  9. package/docs/optimization-plan.md +642 -0
  10. package/docs/pi-crew-v0.5.5-audit-fix-plan.md +133 -0
  11. package/docs/pi-mono-opportunities.md +969 -0
  12. package/docs/pi-mono-review.md +291 -0
  13. package/docs/skills/REFERENCE.md +144 -0
  14. package/package.json +12 -9
  15. package/skills/artifact-analysis-loop/SKILL.md +302 -0
  16. package/skills/async-worker-recovery/SKILL.md +19 -1
  17. package/skills/child-pi-spawning/SKILL.md +19 -6
  18. package/skills/context-artifact-hygiene/SKILL.md +19 -2
  19. package/skills/delegation-patterns/SKILL.md +68 -3
  20. package/skills/detection-pipeline-design/SKILL.md +285 -0
  21. package/skills/event-log-tracing/SKILL.md +20 -6
  22. package/skills/git-master/SKILL.md +20 -6
  23. package/skills/hunting-investigation-loop/SKILL.md +401 -0
  24. package/skills/incident-playbook-construction/SKILL.md +383 -0
  25. package/skills/live-agent-lifecycle/SKILL.md +20 -6
  26. package/skills/mailbox-interactive/SKILL.md +19 -6
  27. package/skills/model-routing-context/SKILL.md +19 -1
  28. package/skills/multi-perspective-review/SKILL.md +19 -4
  29. package/skills/observability-reliability/SKILL.md +19 -2
  30. package/skills/orchestration/SKILL.md +20 -2
  31. package/skills/ownership-session-security/SKILL.md +20 -2
  32. package/skills/pi-extension-lifecycle/SKILL.md +20 -2
  33. package/skills/post-mortem/SKILL.md +7 -2
  34. package/skills/read-only-explorer/SKILL.md +20 -6
  35. package/skills/requirements-to-task-packet/SKILL.md +23 -3
  36. package/skills/resource-discovery-config/SKILL.md +20 -2
  37. package/skills/runtime-state-reader/SKILL.md +20 -2
  38. package/skills/safe-bash/SKILL.md +21 -6
  39. package/skills/scrutinize/SKILL.md +20 -2
  40. package/skills/secure-agent-orchestration-review/SKILL.md +29 -2
  41. package/skills/security-review/SKILL.md +560 -0
  42. package/skills/state-mutation-locking/SKILL.md +22 -2
  43. package/skills/systematic-debugging/SKILL.md +8 -6
  44. package/skills/threat-hypothesis-framework/SKILL.md +175 -0
  45. package/skills/ui-render-performance/SKILL.md +20 -2
  46. package/skills/verification-before-done/SKILL.md +17 -2
  47. package/skills/widget-rendering/SKILL.md +21 -6
  48. package/skills/workspace-isolation/SKILL.md +20 -6
  49. package/skills/worktree-isolation/SKILL.md +20 -6
  50. package/src/agents/agent-config.ts +40 -1
  51. package/src/benchmark/benchmark-runner.ts +45 -0
  52. package/src/benchmark/feedback-loop.ts +5 -0
  53. package/src/config/config.ts +32 -5
  54. package/src/config/role-tools.ts +82 -0
  55. package/src/config/suggestions.ts +8 -0
  56. package/src/config/types.ts +4 -0
  57. package/src/extension/async-notifier.ts +10 -1
  58. package/src/extension/crew-cleanup.ts +114 -0
  59. package/src/extension/cross-extension-rpc.ts +1 -1
  60. package/src/extension/notification-router.ts +18 -0
  61. package/src/extension/register.ts +27 -19
  62. package/src/extension/registration/subagent-tools.ts +1 -1
  63. package/src/extension/team-tool/anchor.ts +201 -0
  64. package/src/extension/team-tool/api.ts +2 -1
  65. package/src/extension/team-tool/auto-summarize.ts +154 -0
  66. package/src/extension/team-tool/run.ts +42 -7
  67. package/src/extension/team-tool.ts +44 -2
  68. package/src/hooks/registry.ts +1 -3
  69. package/src/observability/event-bus.ts +69 -0
  70. package/src/observability/event-to-metric.ts +0 -2
  71. package/src/runtime/anchor-manager.ts +473 -0
  72. package/src/runtime/async-runner.ts +8 -4
  73. package/src/runtime/auto-summarize.ts +350 -0
  74. package/src/runtime/background-runner.ts +10 -3
  75. package/src/runtime/budget-tracker.ts +354 -0
  76. package/src/runtime/chain-runner.ts +507 -0
  77. package/src/runtime/child-pi.ts +123 -35
  78. package/src/runtime/crash-recovery.ts +5 -4
  79. package/src/runtime/crew-agent-runtime.ts +1 -0
  80. package/src/runtime/custom-tools/irc-tool.ts +13 -0
  81. package/src/runtime/custom-tools/submit-result-tool.ts +3 -2
  82. package/src/runtime/delivery-coordinator.ts +10 -3
  83. package/src/runtime/dynamic-script-runner.ts +482 -0
  84. package/src/runtime/foreground-control.ts +87 -17
  85. package/src/runtime/handoff-manager.ts +589 -0
  86. package/src/runtime/hidden-handoff.ts +424 -0
  87. package/src/runtime/live-agent-manager.ts +20 -4
  88. package/src/runtime/live-session-runtime.ts +39 -4
  89. package/src/runtime/manifest-cache.ts +2 -1
  90. package/src/runtime/model-resolver.ts +16 -4
  91. package/src/runtime/phase-tracker.ts +373 -0
  92. package/src/runtime/pi-args.ts +11 -1
  93. package/src/runtime/pi-json-output.ts +31 -0
  94. package/src/runtime/pipeline-runner.ts +514 -0
  95. package/src/runtime/progress-tracker.ts +124 -0
  96. package/src/runtime/retry-runner.ts +354 -0
  97. package/src/runtime/sandbox.ts +252 -0
  98. package/src/runtime/scheduler.ts +7 -2
  99. package/src/runtime/skill-effectiveness.ts +473 -0
  100. package/src/runtime/skill-instructions.ts +37 -3
  101. package/src/runtime/subagent-manager.ts +1 -1
  102. package/src/runtime/task-graph.ts +11 -1
  103. package/src/runtime/task-runner.ts +92 -18
  104. package/src/runtime/team-runner.ts +13 -12
  105. package/src/runtime/tool-progress.ts +10 -3
  106. package/src/runtime/verification-gates.ts +367 -0
  107. package/src/schema/team-tool-schema.ts +37 -0
  108. package/src/skills/discover-skills.ts +5 -0
  109. package/src/state/active-run-registry.ts +9 -2
  110. package/src/state/contracts.ts +9 -0
  111. package/src/state/crew-init.ts +3 -3
  112. package/src/state/decision-ledger.ts +98 -55
  113. package/src/state/event-log-rotation.ts +2 -2
  114. package/src/state/event-log.ts +144 -10
  115. package/src/state/hook-instinct-bridge.ts +5 -5
  116. package/src/state/mailbox.ts +10 -0
  117. package/src/state/run-cache.ts +18 -8
  118. package/src/state/state-store.ts +3 -1
  119. package/src/state/types.ts +4 -0
  120. package/src/tools/safe-bash-extension.ts +1 -0
  121. package/src/tools/safe-bash.ts +152 -20
  122. package/src/types/new-api-types.ts +34 -0
  123. package/src/ui/agent-management-overlay.ts +5 -1
  124. package/src/ui/crew-widget.ts +29 -15
  125. package/src/ui/overlays/mailbox-detail-overlay.ts +13 -2
  126. package/src/ui/powerbar-publisher.ts +101 -7
  127. package/src/ui/tool-render.ts +15 -15
  128. package/src/ui/transcript-cache.ts +13 -0
  129. package/src/utils/bm25-search.ts +16 -8
  130. package/src/utils/env-filter.ts +8 -5
  131. package/src/utils/redaction.ts +169 -15
  132. package/src/utils/session-utils.ts +52 -0
  133. package/src/utils/sse-parser.ts +10 -1
  134. package/src/worktree/cleanup.ts +6 -1
  135. package/src/worktree/worktree-manager.ts +32 -13
  136. package/workflows/chain.workflow.md +252 -0
  137. package/workflows/pipeline.workflow.md +27 -0
@@ -0,0 +1,367 @@
1
+ /**
2
+ * Verification Gates — ECC VERIFICATION_LOOP Pattern Implementation
3
+ *
4
+ * Implements RED/GREEN phase gates for task verification.
5
+ * Sequential execution: cannot skip to Phase N+1 without Phase N passing.
6
+ *
7
+ * Based on: docs/distillation/ECC-10-skills.md §2 (verification-loop)
8
+ *
9
+ * @module verification-gates
10
+ */
11
+
12
+ import { spawn } from "node:child_process";
13
+ import * as fs from "node:fs";
14
+ import * as path from "node:path";
15
+ import { writeArtifact } from "../state/artifact-store.ts";
16
+ import type { VerificationContract, VerificationCommandResult, GreenLevel, ArtifactDescriptor } from "../state/types.ts";
17
+
18
+ export interface PhaseGateResult {
19
+ phase: number;
20
+ name: string;
21
+ status: "passed" | "failed" | "skipped";
22
+ command: string;
23
+ exitCode?: number | null;
24
+ output?: string;
25
+ durationMs: number;
26
+ error?: string;
27
+ }
28
+
29
+ export interface PhaseGateBundle {
30
+ results: PhaseGateResult[];
31
+ totalDurationMs: number;
32
+ allPassed: boolean;
33
+ stoppedAt?: number; // phase number where stopped
34
+ }
35
+
36
+ /**
37
+ * Standard phase gate definitions for npm/TypeScript projects.
38
+ * Sequential enforcement: each phase must pass before proceeding.
39
+ */
40
+ export const NPM_TYPESCRIPT_GATES: Array<{ name: string; command: string; critical: boolean }> = [
41
+ { name: "build", command: "npm run build 2>&1 || true", critical: true },
42
+ { name: "typecheck", command: "npx tsc --noEmit 2>&1 || true", critical: true },
43
+ { name: "lint", command: "npm run lint 2>&1 || true", critical: false },
44
+ { name: "tests", command: "npm test 2>&1 || true", critical: true },
45
+ ];
46
+
47
+ /**
48
+ * Cargo/Rust project phase gates.
49
+ */
50
+ export const CARGO_RUST_GATES: Array<{ name: string; command: string; critical: boolean }> = [
51
+ { name: "check", command: "cargo check 2>&1 || true", critical: true },
52
+ { name: "test", command: "cargo test 2>&1 || true", critical: true },
53
+ { name: "clippy", command: "cargo clippy 2>&1 || true", critical: false },
54
+ ];
55
+
56
+ /**
57
+ * Execute a single command and capture output.
58
+ */
59
+ async function executeCommand(
60
+ command: string,
61
+ cwd: string,
62
+ timeoutMs: number = 120000,
63
+ ): Promise<{ exitCode: number | null; output: string; durationMs: number }> {
64
+ const start = Date.now();
65
+ let output = "";
66
+ let exitCode: number | null = null;
67
+
68
+ return new Promise((resolve) => {
69
+ // Use shell to handle compound commands
70
+ const shell = spawn("sh", ["-c", command], {
71
+ cwd,
72
+ timeout: timeoutMs,
73
+ env: { ...process.env, FORCE_COLOR: "0" },
74
+ });
75
+
76
+ shell.stdout?.on("data", (data) => {
77
+ output += data.toString();
78
+ });
79
+
80
+ shell.stderr?.on("data", (data) => {
81
+ output += data.toString();
82
+ });
83
+
84
+ shell.on("close", (code) => {
85
+ exitCode = code;
86
+ resolve({
87
+ exitCode,
88
+ output: output.slice(-100000), // Cap at 100KB
89
+ durationMs: Date.now() - start,
90
+ });
91
+ });
92
+
93
+ shell.on("error", (err) => {
94
+ resolve({
95
+ exitCode: -1,
96
+ output: `Execution error: ${err.message}`,
97
+ durationMs: Date.now() - start,
98
+ });
99
+ });
100
+
101
+ // Handle timeout
102
+ setTimeout(() => {
103
+ shell.kill("SIGKILL");
104
+ resolve({
105
+ exitCode: -1,
106
+ output: output + "\n[TIMEOUT: Command exceeded limit]",
107
+ durationMs: Date.now() - start,
108
+ });
109
+ }, timeoutMs);
110
+ });
111
+ }
112
+
113
+ /**
114
+ * Run phase gates sequentially, stopping on first critical failure.
115
+ *
116
+ * @param gates - Array of phase gate definitions
117
+ * @param cwd - Working directory to execute commands in
118
+ * @param signal - Optional abort signal
119
+ * @param onPhase - Optional callback for each phase completion
120
+ * @returns Phase gate bundle with all results
121
+ */
122
+ export async function runPhaseGates(
123
+ gates: Array<{ name: string; command: string; critical: boolean }>,
124
+ cwd: string,
125
+ signal?: AbortSignal,
126
+ onPhase?: (result: PhaseGateResult) => void,
127
+ ): Promise<PhaseGateBundle> {
128
+ const results: PhaseGateResult[] = [];
129
+ const startTime = Date.now();
130
+ let stoppedAt: number | undefined;
131
+
132
+ for (let i = 0; i < gates.length; i++) {
133
+ // Check abort signal
134
+ if (signal?.aborted) {
135
+ results.push({
136
+ phase: i + 1,
137
+ name: gates[i].name,
138
+ status: "skipped",
139
+ command: gates[i].command,
140
+ durationMs: 0,
141
+ error: "Aborted",
142
+ });
143
+ stoppedAt = i + 1;
144
+ break;
145
+ }
146
+
147
+ const gate = gates[i];
148
+ const phaseStart = Date.now();
149
+
150
+ // Execute the gate command
151
+ const { exitCode, output, durationMs } = await executeCommand(
152
+ gate.command,
153
+ cwd,
154
+ 120000, // 2 minute timeout
155
+ );
156
+
157
+ const passed = exitCode === 0;
158
+ const result: PhaseGateResult = {
159
+ phase: i + 1,
160
+ name: gate.name,
161
+ status: passed ? "passed" : "failed",
162
+ command: gate.command,
163
+ exitCode,
164
+ output,
165
+ durationMs,
166
+ error: passed ? undefined : `Exit code: ${exitCode}`,
167
+ };
168
+
169
+ results.push(result);
170
+ onPhase?.(result);
171
+
172
+ // Stop on critical failure
173
+ if (!passed && gate.critical) {
174
+ stoppedAt = i + 1;
175
+ break;
176
+ }
177
+ }
178
+
179
+ return {
180
+ results,
181
+ totalDurationMs: Date.now() - startTime,
182
+ allPassed: results.every((r) => r.status === "passed"),
183
+ stoppedAt,
184
+ };
185
+ }
186
+
187
+ /**
188
+ * Execute verification commands from a task's verification contract.
189
+ * Maps the contract commands to phase gates and runs them sequentially.
190
+ *
191
+ * @param contract - Verification contract with commands to execute
192
+ * @param cwd - Working directory
193
+ * @param runId - Run ID for artifact naming
194
+ * @param taskId - Task ID for artifact naming
195
+ * @param artifactsRoot - Artifacts root directory
196
+ * @param signal - Optional abort signal
197
+ * @returns Array of verification command results
198
+ */
199
+ export async function executeVerificationCommands(
200
+ contract: VerificationContract,
201
+ cwd: string,
202
+ runId: string,
203
+ taskId: string,
204
+ artifactsRoot: string,
205
+ signal?: AbortSignal,
206
+ ): Promise<VerificationCommandResult[]> {
207
+ if (!contract.commands || contract.commands.length === 0) {
208
+ return [];
209
+ }
210
+
211
+ const results: VerificationCommandResult[] = [];
212
+
213
+ // Map commands to phase gates
214
+ const gates = contract.commands.map((cmd, index) => ({
215
+ name: `verification-${index + 1}`,
216
+ command: cmd,
217
+ critical: true, // All verification commands are critical by default
218
+ }));
219
+
220
+ // Create artifacts directory
221
+ const gatesDir = path.join(artifactsRoot, "verification-gates");
222
+ if (!fs.existsSync(gatesDir)) {
223
+ fs.mkdirSync(gatesDir, { recursive: true });
224
+ }
225
+
226
+ // Run phase gates
227
+ const bundle = await runPhaseGates(gates, cwd, signal, (phaseResult) => {
228
+ // Write phase artifact immediately for observability
229
+ const phaseArtifact = writeArtifact(artifactsRoot, {
230
+ kind: "log",
231
+ relativePath: `verification-gates/${taskId}-phase-${phaseResult.phase}-${phaseResult.name}.log`,
232
+ content: [
233
+ `# Phase ${phaseResult.phase}: ${phaseResult.name}`,
234
+ `Status: ${phaseResult.status.toUpperCase()}`,
235
+ `Command: ${phaseResult.command}`,
236
+ `Duration: ${phaseResult.durationMs}ms`,
237
+ phaseResult.exitCode != null ? `Exit Code: ${phaseResult.exitCode}` : "",
238
+ phaseResult.error ? `Error: ${phaseResult.error}` : "",
239
+ "",
240
+ "## Output",
241
+ phaseResult.output || "(no output)",
242
+ ].join("\n"),
243
+ producer: taskId,
244
+ });
245
+
246
+ results.push({
247
+ cmd: phaseResult.command,
248
+ status: phaseResult.status === "passed" ? "passed" : "failed",
249
+ exitCode: phaseResult.exitCode,
250
+ outputArtifact: phaseArtifact,
251
+ });
252
+ });
253
+
254
+ // Write summary artifact
255
+ const summaryArtifact = writeArtifact(artifactsRoot, {
256
+ kind: "metadata",
257
+ relativePath: `verification-gates/${taskId}-summary.json`,
258
+ content: JSON.stringify(bundle, null, 2),
259
+ producer: taskId,
260
+ });
261
+
262
+ // Fill in any remaining results (in case of early exit)
263
+ for (let i = results.length; i < gates.length; i++) {
264
+ results.push({
265
+ cmd: gates[i].command,
266
+ status: "not_run",
267
+ });
268
+ }
269
+
270
+ return results;
271
+ }
272
+
273
+ /**
274
+ * Compute observed green level from verification results.
275
+ * Maps verification outcomes to green levels per ECC pattern.
276
+ *
277
+ * @param commands - Array of verification command results
278
+ * @param requiredLevel - Required green level from contract
279
+ * @returns Observed green level
280
+ */
281
+ export function computeGreenLevelFromResults(
282
+ commands: VerificationCommandResult[],
283
+ requiredLevel: GreenLevel,
284
+ ): GreenLevel {
285
+ if (commands.length === 0) {
286
+ return "none";
287
+ }
288
+
289
+ const passed = commands.filter((c) => c.status === "passed").length;
290
+ const failed = commands.filter((c) => c.status === "failed").length;
291
+ const notRun = commands.filter((c) => c.status === "not_run").length;
292
+
293
+ // If any critical verification failed, return none
294
+ if (failed > 0) {
295
+ return "none";
296
+ }
297
+
298
+ // If all passed, return the required level (capped at merge_ready)
299
+ if (passed === commands.length) {
300
+ return requiredLevel === "none" ? "targeted" : requiredLevel;
301
+ }
302
+
303
+ // Partial pass - return targeted
304
+ if (passed > 0) {
305
+ return "targeted";
306
+ }
307
+
308
+ // Nothing run
309
+ return "none";
310
+ }
311
+
312
+ /**
313
+ * Create a verification gate report artifact.
314
+ * Formatted for human review per ECC verification-loop pattern.
315
+ */
316
+ export function createVerificationGateReport(
317
+ taskId: string,
318
+ contract: VerificationContract,
319
+ results: VerificationCommandResult[],
320
+ bundle: PhaseGateBundle,
321
+ ): string {
322
+ const lines = [
323
+ `# Verification Gate Report: ${taskId}`,
324
+ "",
325
+ `## Contract`,
326
+ `- Required Green Level: ${contract.requiredGreenLevel}`,
327
+ `- Allow Manual Evidence: ${contract.allowManualEvidence}`,
328
+ `- Commands: ${contract.commands.length}`,
329
+ "",
330
+ `## Results`,
331
+ "",
332
+ `| Phase | Command | Status | Exit Code | Duration |`,
333
+ `|-------|---------|--------|-----------|----------|`,
334
+ ];
335
+
336
+ for (const result of results) {
337
+ const phaseIndex = results.indexOf(result) + 1;
338
+ const statusIcon = result.status === "passed" ? "✅" : result.status === "failed" ? "❌" : "⏭️";
339
+ lines.push(
340
+ `| ${phaseIndex} | \`${truncate(result.cmd, 40)}\` | ${statusIcon} ${result.status} | ${result.exitCode ?? "-"} | ${result.durationMs ?? 0}ms |`,
341
+ );
342
+ }
343
+
344
+ lines.push("");
345
+ lines.push(`## Summary`);
346
+ lines.push(`- Total Phases: ${bundle.results.length}`);
347
+ lines.push(`- Passed: ${bundle.results.filter((r) => r.status === "passed").length}`);
348
+ lines.push(`- Failed: ${bundle.results.filter((r) => r.status === "failed").length}`);
349
+ lines.push(`- Skipped: ${bundle.results.filter((r) => r.status === "skipped").length}`);
350
+ lines.push(`- Total Duration: ${bundle.totalDurationMs}ms`);
351
+ lines.push(`- All Passed: ${bundle.allPassed ? "YES ✅" : "NO ❌"}`);
352
+
353
+ if (bundle.stoppedAt) {
354
+ lines.push(`- Stopped At: Phase ${bundle.stoppedAt}`);
355
+ }
356
+
357
+ lines.push("");
358
+ lines.push("## VERIFICATION");
359
+ lines.push(bundle.allPassed ? "**PASSED** - All gates green ✅" : "**FAILED** - One or more gates red ❌");
360
+
361
+ return lines.join("\n");
362
+ }
363
+
364
+ function truncate(str: string, maxLen: number): string {
365
+ if (str.length <= maxLen) return str;
366
+ return str.slice(0, maxLen - 3) + "...";
367
+ }
@@ -68,6 +68,9 @@ export const TeamToolParams = Type.Object({
68
68
  Type.Literal("orchestrate"),
69
69
  Type.Literal("schedule"),
70
70
  Type.Literal("scheduled"),
71
+ Type.Literal("anchor"),
72
+ Type.Literal("auto-summarize"),
73
+ Type.Literal("auto_boomerang"),
71
74
  ],
72
75
  { description: "Team action. Defaults to 'list' when omitted." },
73
76
  ),
@@ -204,6 +207,32 @@ export const TeamToolParams = Type.Object({
204
207
  once: Type.Optional(
205
208
  Type.Union([Type.String(), Type.Number()], { description: "ISO timestamp or epoch ms for a one-time scheduled run." }),
206
209
  ),
210
+ excludeContextBash: Type.Optional(
211
+ Type.Boolean({
212
+ description: "Mark certain bash commands as excludeFromContext to reduce context tokens (default: false).",
213
+ }),
214
+ ),
215
+ // Budget tracking options
216
+ budgetTotal: Type.Optional(
217
+ Type.Number({
218
+ description: "Total token budget for the run. When set, enables budget tracking with default 80% warning and 95% abort thresholds.",
219
+ minimum: 1,
220
+ }),
221
+ ),
222
+ budgetWarning: Type.Optional(
223
+ Type.Number({
224
+ description: "Budget warning threshold as a fraction (0-1). Default: 0.8 (80%). Emits warning event when this threshold is crossed.",
225
+ minimum: 0,
226
+ maximum: 1,
227
+ }),
228
+ ),
229
+ budgetAbort: Type.Optional(
230
+ Type.Number({
231
+ description: "Budget abort threshold as a fraction (0-1). Default: 0.95 (95%). Aborts further execution when this threshold is crossed.",
232
+ minimum: 0,
233
+ maximum: 1,
234
+ }),
235
+ ),
207
236
  });
208
237
 
209
238
  export interface TeamToolParamsValue {
@@ -287,4 +316,12 @@ export interface TeamToolParamsValue {
287
316
  cron?: string;
288
317
  interval?: number;
289
318
  once?: string | number;
319
+ /** Mark certain bash commands as excludeFromContext to reduce context tokens (default: false). */
320
+ excludeContextBash?: boolean;
321
+ /** Total token budget for the run. When set, enables budget tracking. */
322
+ budgetTotal?: number;
323
+ /** Budget warning threshold as a fraction (0-1). Default: 0.8. */
324
+ budgetWarning?: number;
325
+ /** Budget abort threshold as a fraction (0-1). Default: 0.95. */
326
+ budgetAbort?: number;
290
327
  }
@@ -6,6 +6,9 @@ import { isSafePathId, resolveContainedPath, resolveRealContainedPath } from "..
6
6
 
7
7
  const PACKAGE_SKILLS_DIR = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..", "..", "skills");
8
8
 
9
+ const CACHE_TTL_MS = 30_000; // 30 seconds
10
+ let cache: { skills: SkillDescriptor[]; cachedAt: number; cwd: string } | null = null;
11
+
9
12
  export interface SkillDescriptor {
10
13
  name: string;
11
14
  description: string;
@@ -28,6 +31,7 @@ function frontmatterDescription(content: string): string | undefined {
28
31
  }
29
32
 
30
33
  export function discoverSkills(cwd: string): SkillDescriptor[] {
34
+ if (cache && cache.cwd === cwd && Date.now() - cache.cachedAt < CACHE_TTL_MS) return cache.skills;
31
35
  const results: SkillDescriptor[] = [];
32
36
  for (const dir of listSkillDirs(cwd)) {
33
37
  if (!fs.existsSync(dir.root)) continue;
@@ -63,5 +67,6 @@ export function discoverSkills(cwd: string): SkillDescriptor[] {
63
67
  logInternalError("discoverSkills.readdir", error, `root=${dir.root}`);
64
68
  }
65
69
  }
70
+ cache = { skills: results, cachedAt: Date.now(), cwd };
66
71
  return results;
67
72
  }
@@ -10,6 +10,9 @@ import { sharedScanCache } from "../utils/scan-cache.ts";
10
10
  import { sleepSync } from "../utils/sleep.ts";
11
11
  import { logInternalError } from "../utils/internal-error.ts";
12
12
 
13
+ /** Magic bytes prefix for binary registry to prevent deserialization of hostile files. */
14
+ const BINARY_MAGIC = Buffer.from("PICREW2BIN", "utf-8");
15
+
13
16
  export interface ActiveRunRegistryEntry {
14
17
  runId: string;
15
18
  cwd: string;
@@ -111,7 +114,11 @@ export function readActiveRunRegistry(maxEntries = DEFAULT_CACHE.manifestMaxEntr
111
114
  // corrupt; this lets a 2-release migration co-exist with old readers.
112
115
  try {
113
116
  const buf = fs.readFileSync(registryBinaryPath());
114
- parsed = deserialize(buf);
117
+ // Security: verify magic bytes before deserializing to prevent RCE from hostile files
118
+ if (buf.length < BINARY_MAGIC.length || !buf.slice(0, BINARY_MAGIC.length).equals(BINARY_MAGIC)) {
119
+ throw new Error("Invalid binary registry: missing magic bytes");
120
+ }
121
+ parsed = deserialize(buf.slice(BINARY_MAGIC.length));
115
122
  } catch {
116
123
  try {
117
124
  parsed = JSON.parse(fs.readFileSync(registryPath(), "utf-8"));
@@ -135,7 +142,7 @@ function writeEntries(entries: ActiveRunRegistryEntry[]): void {
135
142
  atomicWriteJson(registryPath(), trimmed);
136
143
  try {
137
144
  const tempBin = `${registryBinaryPath()}.${process.pid}.${Date.now()}.tmp`;
138
- fs.writeFileSync(tempBin, serialize(trimmed));
145
+ fs.writeFileSync(tempBin, Buffer.concat([BINARY_MAGIC, serialize(trimmed)]));
139
146
  fs.renameSync(tempBin, registryBinaryPath());
140
147
  } catch (error) {
141
148
  logInternalError("active-run-registry.binary-write", error);
@@ -67,6 +67,15 @@ export const TEAM_EVENT_TYPES = [
67
67
  "task.resumed",
68
68
  "task.retried",
69
69
  "supervisor.contact",
70
+ // Budget tracking events
71
+ "budget.initialized",
72
+ "budget.warning",
73
+ "budget.exhausted",
74
+ // Phase tracking events
75
+ "phase.started",
76
+ "phase.completed",
77
+ "phase.skipped",
78
+ "phase.failed",
70
79
  ] as const;
71
80
  export type TeamEventType = typeof TEAM_EVENT_TYPES[number];
72
81
 
@@ -108,9 +108,9 @@ export async function ensureCrewDirectory(cwd: string): Promise<void> {
108
108
  ];
109
109
 
110
110
  for (const dir of dirs) {
111
- if (!fs.existsSync(dir)) {
112
- fs.mkdirSync(dir, { recursive: true });
113
- }
111
+ // Use mkdirSync directly with recursive:true to avoid TOCTOU race.
112
+ // This is atomic and doesn't require existsSync check.
113
+ fs.mkdirSync(dir, { recursive: true });
114
114
  }
115
115
 
116
116
  // 2. Create .gitkeep placeholders in directories that should be tracked