pi-crew 0.5.13 → 0.5.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +139 -0
  2. package/README.md +1 -1
  3. package/docs/pi-crew-v0.5.14-audit-fix-plan.md +75 -0
  4. package/docs/pi-crew-v0.5.16-audit-fix-plan.md +35 -0
  5. package/docs/pi-crew-v0.5.17-audit-fix-plan.md +80 -0
  6. package/docs/skills/REFERENCE.md +11 -0
  7. package/package.json +1 -1
  8. package/skills/iterative-audit/SKILL.md +330 -0
  9. package/src/extension/management.ts +1 -1
  10. package/src/extension/plan-orchestrate.ts +0 -1
  11. package/src/extension/register.ts +16 -7
  12. package/src/extension/registration/viewers.ts +1 -1
  13. package/src/extension/run-index.ts +1 -1
  14. package/src/extension/team-tool/explain.ts +0 -1
  15. package/src/extension/team-tool/handle-schedule.ts +0 -1
  16. package/src/extension/team-tool/health-monitor.ts +0 -1
  17. package/src/extension/team-tool/run.ts +2 -2
  18. package/src/extension/team-tool/status.ts +1 -1
  19. package/src/extension/team-tool.ts +2 -30
  20. package/src/observability/exporters/otlp-exporter.ts +11 -1
  21. package/src/runtime/checkpoint.ts +19 -0
  22. package/src/runtime/child-pi.ts +1 -1
  23. package/src/runtime/crash-recovery.ts +1 -1
  24. package/src/runtime/crew-agent-records.ts +23 -3
  25. package/src/runtime/crew-hooks.ts +1 -1
  26. package/src/runtime/handoff-manager.ts +0 -1
  27. package/src/runtime/heartbeat-watcher.ts +1 -1
  28. package/src/runtime/live-session-runtime.ts +0 -1
  29. package/src/runtime/loop-gates.ts +0 -1
  30. package/src/runtime/mcp-proxy.ts +2 -2
  31. package/src/runtime/pipeline-runner.ts +1 -2
  32. package/src/runtime/task-runner/live-executor.ts +1 -2
  33. package/src/runtime/task-runner.ts +1 -1
  34. package/src/state/jsonl-writer.ts +24 -0
  35. package/src/state/locks.ts +66 -35
  36. package/src/state/run-metrics.ts +1 -2
  37. package/src/state/schedule.ts +13 -5
  38. package/src/state/state-store.ts +1 -1
  39. package/src/tools/safe-bash.ts +0 -1
  40. package/src/ui/crew-widget.ts +2 -2
  41. package/src/ui/render-diff.ts +1 -1
  42. package/src/ui/run-dashboard.ts +1 -2
  43. package/src/ui/tool-render.ts +20 -3
  44. package/src/utils/conflict-detect.ts +0 -1
  45. package/src/utils/gh-protocol.ts +0 -2
@@ -0,0 +1,330 @@
1
+ ---
2
+ name: iterative-audit
3
+ description: "Iterative multi-round codebase audit with diminishing-returns detection. Run 5-20+ rounds, each focusing on one specific area. Built from 19 rounds of dogfooding pi-crew on itself."
4
+ triggers:
5
+ - "audit this codebase"
6
+ - "review everything"
7
+ - "find all bugs"
8
+ - "deep audit"
9
+ - "harden this"
10
+ - "iterate audit rounds"
11
+ - "multi-round review"
12
+ ---
13
+
14
+ # Iterative Audit
15
+
16
+ > Distilled from 19 rounds of auditing pi-crew on itself (v0.5.5 → v0.5.14):
17
+ > ~70 issues fixed, 286 tests added, 9 security improvements, 2 performance improvements.
18
+
19
+ The core insight: **a single round of audit finds the easy 30% of bugs**. The remaining 70% only surfaces through 5-20+ targeted rounds, each with a specific focus. After round 5+ you find HIGH severity bugs that round 1 missed. After round 10+ you find issues that no human reviewer would catch in a single pass.
20
+
21
+ ## Operating Stance
22
+
23
+ - **One focus per round.** Each round targets one of the 7 patterns below. Don't try to fix everything in one pass.
24
+ - **Source verification is mandatory.** Never trust audit docs or previous round reports — always read the actual code. ~30% of issues from prior rounds are false positives or already fixed.
25
+ - **Document every finding with file:line.** "Sandbox env allow-list" is useless. "src/runtime/sandbox.ts:70 — process.env full leak" is actionable.
26
+ - **Verify the team actually applied changes.** After any team run, run `git diff` and inspect. ~20% of team runs silently fail to apply changes.
27
+ - **Don't publish without explicit user confirmation.** Audit work compounds; releasing in the middle of a round leaves the codebase in a half-hardened state.
28
+
29
+ ## The 7 Patterns (rotate through these)
30
+
31
+ After 19 rounds, every issue found falls into one of these 7 categories. Use this to plan each round's focus.
32
+
33
+ ### 1. L1 Cleanup (decoration, low value, easy)
34
+ **What**: Replace `console.error` / `console.warn` / `process.stderr.write` with `logInternalError()` from `utils/internal-error.ts`.
35
+
36
+ **Why**: `console.error` may not be visible in JSON-RPC mode or when stderr is redirected. `logInternalError` is the project-wide pattern; missing it means errors are silently dropped.
37
+
38
+ **How to find them**:
39
+ ```bash
40
+ rg -n 'console\.(error|warn|log)' src/
41
+ rg -n 'process\.stderr\.write' src/
42
+ ```
43
+
44
+ **Rule**: Skip `internal-error.ts:5` itself (it's the implementation). Skip `background-runner.ts:146` (overrides `console.error` for testing). Skip `parent-guard.ts:37` (exit-time log must fire synchronously).
45
+
46
+ **Time per round**: 30 min for 5-10 callsites. Diminishing returns after round 1.
47
+
48
+ ### 2. Defensive Caps (memory safety, medium value, medium effort)
49
+ **What**: Find Maps, Sets, Arrays, and Queues that grow unboundedly. Add `MAX_*` constants and eviction logic.
50
+
51
+ **Why**: Long-running processes (background runners, extension reloads) accumulate state. Without caps, a busy period causes OOM.
52
+
53
+ **How to find them**:
54
+ ```bash
55
+ rg -n 'new Map\(' src/ # look for ones that are .set() repeatedly
56
+ rg -n 'new Set\(' src/
57
+ rg -n 'this\.\w+\.push\(' src/ # look for unbounded arrays
58
+ ```
59
+
60
+ **Common patterns**:
61
+ - `Semaphore.#queue` → add `MAX_QUEUE` cap (pi-crew: 10,000)
62
+ - `liveAgentManager.liveAgents` Map → add `MAX_LIVE_AGENTS` cap (pi-crew: 5,000)
63
+ - `OverflowRecoveryTracker.states` Map → add `MAX_TRACKED_STATES` cap (pi-crew: 5,000)
64
+ - `NotificationRouter.seen` Map → add `SEEN_MAP_MAX_SIZE` cap (pi-crew: 10,000)
65
+
66
+ **Eviction strategies** (in order of preference):
67
+ 1. **LRU by access time** — track `lastAccessAt` per entry
68
+ 2. **Oldest insertion** — Map's natural insertion order works (delete first key)
69
+ 3. **Terminal-state priority** — protect live entries, evict completed/failed/cancelled first
70
+
71
+ **Test pattern**: Verify cap by inserting 1.5× the max, confirm old entries are gone.
72
+
73
+ ### 3. Test Coverage Gaps (good value, low effort)
74
+ **What**: Find source files with zero direct unit tests.
75
+
76
+ **How to find them**:
77
+ ```bash
78
+ # For each src file, check if any test file imports it
79
+ for f in src/runtime/*.ts src/extension/*.ts; do
80
+ basename=$(basename "$f" .ts)
81
+ count=$(ls test/unit/${basename}*.test.ts 2>/dev/null | wc -l)
82
+ [ "$count" = "0" ] && echo "NO TEST: $f"
83
+ done
84
+ ```
85
+
86
+ **Prioritize**:
87
+ - Security-critical: `sandbox.ts`, `child-pi.ts`, `pi-spawn.ts`, `crew-cleanup.ts`
88
+ - Resource-management: `live-agent-manager.ts`, `semaphore.ts`, `overflow-recovery.ts`
89
+ - Public APIs: anything with `export class` or `export function`
90
+
91
+ **Don't test**: internal helpers, generated code, pure re-exports.
92
+
93
+ **Test categories** (in order of importance):
94
+ 1. **Path validation** (security) — `assertSafePathId`, path traversal rejection
95
+ 2. **Resource cleanup** — `dispose()` clears everything, listeners don't stack
96
+ 3. **Boundary conditions** — empty input, max-size, overflow
97
+ 4. **Callback lifecycle** — sync/async error handling, `resultConsumed` flag
98
+
99
+ ### 4. Security Hardening (high value, high effort)
100
+ **What**: Find places where untrusted input reaches dangerous sinks.
101
+
102
+ **Common sinks to audit**:
103
+ - `execSync(command)` → switch to `execFileSync(program, args[])`
104
+ - `eval()` / `Function()` / `vm.runInNewContext()` → avoid entirely
105
+ - `path.join(base, userInput)` → use `assertSafePathId(userInput)` first
106
+ - `process.env` access → use sanitized env with allow-list
107
+ - File writes to user-controlled paths → validate path is within allowed roots
108
+ - Child process spawn → use `cwd: knownDir`, sanitize env
109
+
110
+ **How to find them**:
111
+ ```bash
112
+ rg -n 'execSync\(' src/
113
+ rg -n 'exec\(' src/
114
+ rg -n 'eval\(|Function\(' src/
115
+ rg -n 'spawn\(' src/
116
+ rg -n 'path\.join\(' src/ | rg 'record\.|task\.|runId|agent\.'
117
+ ```
118
+
119
+ **Round 1**: Find all `execSync` and `exec`. Switch to `execFileSync(program, args)` (no shell).
120
+ **Round 2**: Audit env handling. Look for `process.env` access in hot paths. Add allow-list.
121
+ **Round 3**: Path traversal. For every `path.join(base, userInput)`, add `assertSafePathId()`.
122
+ **Round 4**: Subprocess safety. Verify all `spawn()` calls have: validated args, sanitized env, `cwd` set, signal handling, timeout.
123
+
124
+ ### 5. Performance (medium value, medium effort)
125
+ **What**: Find O(N²) or worse algorithms, especially in hot paths.
126
+
127
+ **Common patterns**:
128
+ - Recomputing document frequency in search loops → precompute at construction
129
+ - `array.filter().map().filter()` in a loop → fuse into one pass
130
+ - `JSON.parse` of the same file repeatedly → cache
131
+ - `fs.statSync` per file in a directory scan → batch with `Dirent.isDirectory()`
132
+ - `setTimeout` busy-polling for state changes → use `fs.watch` or events
133
+
134
+ **How to find them**:
135
+ ```bash
136
+ # Look for nested loops over the same data
137
+ rg -nB 1 -A 5 'for.*of.*for' src/
138
+ # Look for polls
139
+ rg -n 'setTimeout.*poll' src/
140
+ rg -n 'pollIntervalMs' src/
141
+ ```
142
+
143
+ **Test pattern**: For precomputation fixes, write a perf test that creates 1000 docs, runs search, and asserts completion under 100ms.
144
+
145
+ ### 6. Code Quality (low value, easy)
146
+ **What**: Remove dead code, fix type misuse, add missing JSDoc.
147
+
148
+ **Common patterns**:
149
+ - Fields declared but never used (e.g., `seenCleanupCounter`)
150
+ - Unused imports
151
+ - Type assertions (`as any`, `as unknown as T`) that hide real issues
152
+ - Functions that always return the same value
153
+ - Catch blocks that swallow errors silently
154
+
155
+ **How to find them**:
156
+ ```bash
157
+ # Find fields/methods declared but never used
158
+ rg -n 'private \w+\s*=\s*' src/ | while read line; do
159
+ field=$(echo "$line" | grep -oP 'private \K\w+')
160
+ count=$(rg -c "\b$field\b" src/ 2>/dev/null | head -1)
161
+ [ "$count" = "1" ] && echo "DEAD: $line"
162
+ done
163
+ ```
164
+
165
+ ### 7. Resource Cleanup (medium value, medium effort)
166
+ **What**: Find places where listeners, timers, file handles, or other resources can leak.
167
+
168
+ **Common patterns**:
169
+ - `process.on('SIGTERM', ...)` registered multiple times → use module-level flag
170
+ - `setInterval` / `setTimeout` not cleared on shutdown → `dispose()` method
171
+ - `AbortController` not aborted in cleanup
172
+ - File watchers (`fs.watch`) not closed
173
+ - Event listeners (`emitter.on`) not removed
174
+
175
+ **How to find them**:
176
+ ```bash
177
+ rg -n 'process\.on\(' src/
178
+ rg -n 'setInterval\(' src/
179
+ rg -n 'setTimeout\(' src/ | rg -v 'setTimeout.*resolve' # filter out poll sleeps
180
+ rg -n 'fs\.watch\(' src/
181
+ ```
182
+
183
+ **Test pattern**: Call the registration function N times, verify listener count is 1.
184
+
185
+ ## Round Workflow (use this for EVERY round)
186
+
187
+ ### Step 1: Pick a focus
188
+ Choose ONE of the 7 patterns above. Don't try to do multiple patterns in one round.
189
+
190
+ ### Step 2: Explore (read 3-5 files)
191
+ Read the actual source for the focus area. Don't trust prior audit docs.
192
+
193
+ ### Step 3: Verify from source
194
+ For each candidate issue:
195
+ - Read the file at the cited line
196
+ - Check if the issue is real (not a false positive)
197
+ - Check if it's already fixed
198
+ - Note the exact file:line and code snippet
199
+
200
+ ### Step 4: Create a plan doc
201
+ ```markdown
202
+ # Round N Audit Fix Plan
203
+ ## Findings
204
+ ### Issue 1: <file>:<line> — <title> (severity)
205
+ <File path and line numbers>
206
+ <Code snippet showing the issue>
207
+ <Rationale>
208
+
209
+ ## Plan (5 phases)
210
+ ### Phase 1: <action>
211
+ ### Phase 2: <action>
212
+ ...
213
+ ```
214
+
215
+ ### Step 5: Implement
216
+ - Make the fix
217
+ - Add tests (if applicable)
218
+ - Run typecheck: `npx tsc --noEmit`
219
+ - Run tests: `npm test`
220
+
221
+ ### Step 6: Commit + Release
222
+ - Commit with conventional message: `fix: round N - <summary>`
223
+ - Update CHANGELOG.md
224
+ - Bump version (patch)
225
+ - Push + npm publish
226
+ - Create GitHub release
227
+
228
+ ### Step 7: Decide: continue or stop?
229
+ After 5-10 rounds, evaluate:
230
+
231
+ **Continue if**:
232
+ - Last 2 rounds found HIGH or MEDIUM severity issues
233
+ - Test coverage is < 80% of modules
234
+ - User explicitly wants more
235
+
236
+ **Stop if**:
237
+ - Last 2 rounds found only LOW severity or L1 cleanup
238
+ - All patterns exhausted (you've done each at least once)
239
+ - Diminishing returns: more time spent planning than implementing
240
+
241
+ ## When to Use Teams vs. Do It Yourself
242
+
243
+ **Use teams** (via `team action='run', team='review'`) for:
244
+ - Initial broad audit (round 1)
245
+ - Security reviews (specialized `security-reviewer` agent)
246
+ - When you need 3+ perspectives (multi-explorer)
247
+
248
+ **Do it yourself** for:
249
+ - Round 2+ (you have context from prior rounds)
250
+ - Focused single-pattern work (L1 cleanup, test coverage)
251
+ - Small fixes (< 5 file edits)
252
+
253
+ **Teams often fail because**:
254
+ - 5-min heartbeat timeout for long-running runs (add `startTeamRunHeartbeat` if needed)
255
+ - Agent cancellations
256
+ - Hallucinated file:line references (always verify from source)
257
+
258
+ ## Common False Positives (audit findings to reject)
259
+
260
+ After 19 rounds, ~30% of audit findings are false positives. Common patterns:
261
+
262
+ 1. **"Double-merge in config"** — looks like a bug, but project config + user config merge is intentional
263
+ 2. **"as unknown as T in error handling"** — necessary for TypeScript's strict mode
264
+ 3. **"Auto-repair timer race"** — there's a guard like `cleanedUp || !currentCtx` you missed
265
+ 4. **"Already-validated input"** — validation is in the caller, not the callee
266
+ 5. **"Redundant null check"** — TypeScript narrowing doesn't always work for closures
267
+
268
+ **Always verify against source** before acting. If you're not sure, write a test that exercises the alleged bug path. If the test passes, it's a false positive.
269
+
270
+ ## Success Metrics
271
+
272
+ After each round, record:
273
+ - Issues found (real vs. false positive)
274
+ - Tests added
275
+ - Typecheck clean?
276
+ - Total test count delta
277
+
278
+ **Healthy round**: 3-8 real issues found, +20 to +50 tests added, all pass.
279
+
280
+ **Exhausted round**: 0-1 real issues found, 0 tests added, mostly L1 cleanup.
281
+
282
+ When you hit 2+ exhausted rounds in a row, **stop**.
283
+
284
+ ## Real Examples from 19 Rounds
285
+
286
+ | Round | Focus | Issues Found | Severity Range |
287
+ |-------|-------|--------------|----------------|
288
+ | 1-3 | Broad security audit | 11 | CRITICAL, HIGH |
289
+ | 4-6 | Race conditions, locks | 5 | HIGH |
290
+ | 7-9 | L1 cleanup, dead code | 12 | LOW |
291
+ | 10-12 | Defensive caps | 3 | MEDIUM |
292
+ | 13-15 | Security: execSync, sandbox | 9 | CRITICAL, HIGH |
293
+ | 16-18 | Test coverage, L1 | 30+ | LOW |
294
+ | 19 | Path validation, tests | 5 | MEDIUM |
295
+
296
+ **Pattern**: First 3 rounds find the most impactful issues. Rounds 4-15 find the rest. Rounds 16+ are diminishing returns (mostly test coverage and L1 cleanup).
297
+
298
+ ## Anti-Patterns to Avoid
299
+
300
+ - **Mega-rounds** (10+ files, 5+ categories) — too broad, low quality findings
301
+ - **Trusting audit docs** — always verify from source
302
+ - **Skipping typecheck** — type errors compound and become hard to debug later
303
+ - **Releasing mid-round** — leaves the codebase in a half-hardened state
304
+ - **No test for the fix** — every fix needs a test that would have caught the bug
305
+ - **Committing too late** — commit after each phase, not at the end of the round
306
+
307
+ ## Enforcement — Iterative Audit Gate
308
+
309
+ **Before reporting round findings, verify:**
310
+
311
+ - [ ] Round focus is ONE of the 7 patterns (not multiple)
312
+ - [ ] Each finding has a verified `file:line` reference (read the actual source)
313
+ - [ ] False positives filtered out (consult "Common False Positives" section)
314
+ - [ ] Severity assigned using the standard scale (CRITICAL / HIGH / MEDIUM / LOW)
315
+ - [ ] Plan doc created with phases and file:line evidence
316
+ - [ ] Typecheck clean: `npx tsc --noEmit` returns 0 errors
317
+ - [ ] All tests pass: `npm test` shows 0 failures
318
+ - [ ] Tests added for the fix (if applicable)
319
+ - [ ] Round results recorded: issues found, tests added, delta
320
+ - [ ] Decision logged: continue to next round or stop (with reason)
321
+
322
+ **If ANY answer is NO → Stop. Complete audit requirements before reporting round results.**
323
+
324
+ ## Related Skills
325
+
326
+ - `scrutinize` — Quick outsider-perspective review of a single change
327
+ - `multi-perspective-review` — 8-pass deep review for a single change
328
+ - `security-review` — Security-focused audit with detection authoring
329
+ - `verification-before-done` — Evidence before claim (use per round)
330
+ - `systematic-debugging` — When a finding reveals a real bug that needs deeper investigation
@@ -3,7 +3,7 @@ import * as fs from "node:fs";
3
3
  import * as path from "node:path";
4
4
  import type { AgentConfig, ResourceSource, RoutingMetadata } from "../agents/agent-config.ts";
5
5
  import { serializeAgent } from "../agents/agent-serializer.ts";
6
- import { allAgents, discoverAgents } from "../agents/discover-agents.ts";
6
+ import { discoverAgents } from "../agents/discover-agents.ts";
7
7
  import type { TeamToolDetails } from "./team-tool-types.ts";
8
8
  import { toolResult, type PiTeamsToolResult } from "./tool-result.ts";
9
9
  import type { TeamToolParamsValue } from "../schema/team-tool-schema.ts";
@@ -6,7 +6,6 @@
6
6
  */
7
7
 
8
8
  import * as fs from "node:fs";
9
- import * as path from "node:path";
10
9
 
11
10
  /**
12
11
  * Tag → Agent chain mapping from ECC recommendations.
@@ -6,7 +6,7 @@ import type {
6
6
  ExtensionContext,
7
7
  } from "@earendil-works/pi-coding-agent";
8
8
  import { loadConfig } from "../config/config.ts";
9
- import { applyCrewSettingsToConfig, loadCrewSettings, saveCrewSettings } from "../runtime/settings-store.ts";
9
+ import { applyCrewSettingsToConfig, loadCrewSettings } from "../runtime/settings-store.ts";
10
10
  // 2.7: Lazy-load LiveRunSidebar — only constructed when the user actually opens
11
11
  // a live run sidebar overlay. The class pulls in transcript-viewer and other
12
12
  // heavy UI modules.
@@ -47,12 +47,9 @@ import {
47
47
  createMetricFileSink,
48
48
  type MetricSink,
49
49
  } from "../observability/metric-sink.ts";
50
- import { killProcessPid } from "../runtime/child-pi.ts";
51
50
  import { listLiveAgents } from "../runtime/live-agent-manager.ts";
52
51
  import { createManifestCache } from "../runtime/manifest-cache.ts";
53
- import { checkProcessLiveness } from "../runtime/process-status.ts";
54
52
  import { CrewScheduler } from "../runtime/scheduler.ts";
55
- import { appendEvent } from "../state/event-log.ts";
56
53
  import { loadRunManifestById, updateRunStatus } from "../state/state-store.ts";
57
54
  import type { TeamRunManifest } from "../state/types.ts";
58
55
  import { SubagentManager } from "../subagents/manager.ts";
@@ -128,9 +125,6 @@ import type {
128
125
  // deferred cleanup and cleanupRuntime. Each function is awaited inside an
129
126
  // async context that already runs after registration completes.
130
127
  import {
131
- cancelOrphanedRuns,
132
- detectInterruptedRuns,
133
- purgeStaleActiveRunIndex,
134
128
  reconcileAllStaleRuns,
135
129
  } from "../runtime/crash-recovery.ts";
136
130
  import { appendDeadletter } from "../runtime/deadletter.ts";
@@ -482,6 +476,13 @@ export function registerPiTeams(pi: ExtensionAPI): void {
482
476
  }
483
477
  };
484
478
  const autoRecoveryLast = new Map<string, number>();
479
+ // FIX (Round 22, defensive cap): Bound the cooldown-gate Map. Each run
480
+ // contributes up to 4 keys (one per maybeNotifyHealth kind). Without a cap,
481
+ // a long-running pi session that runs thousands of teams accumulates
482
+ // thousands of entries. Eviction: oldest insertion first — matches the
483
+ // 5-minute cooldown gate semantics, since once the gate has expired the
484
+ // entry is irrelevant.
485
+ const AUTO_RECOVERY_LAST_MAX_ENTRIES = 1000;
485
486
  const configureDeliveryCoordinator = (): void => {
486
487
  deliveryCoordinator?.dispose();
487
488
  deliveryCoordinator = undefined;
@@ -1531,6 +1532,14 @@ export function registerPiTeams(pi: ExtensionAPI): void {
1531
1532
  now - previous < 5 * 60_000
1532
1533
  )
1533
1534
  return;
1535
+ // Defensive cap: evict oldest entries before inserting
1536
+ // when size exceeds the limit. Map's natural insertion
1537
+ // order means the first key is the oldest.
1538
+ while (autoRecoveryLast.size >= AUTO_RECOVERY_LAST_MAX_ENTRIES) {
1539
+ const oldest = autoRecoveryLast.keys().next().value;
1540
+ if (oldest === undefined) break;
1541
+ autoRecoveryLast.delete(oldest);
1542
+ }
1534
1543
  autoRecoveryLast.set(key, now);
1535
1544
  notifyOperator({
1536
1545
  id: key,
@@ -2,7 +2,7 @@ import type { ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
2
2
  import { loadRunManifestById } from "../../state/state-store.ts";
3
3
  import { readCrewAgents } from "../../runtime/crew-agent-records.ts";
4
4
  import { loadConfig } from "../../config/config.ts";
5
- import { listLiveAgents, type LiveAgentHandle } from "../../runtime/live-agent-manager.ts";
5
+ import { listLiveAgents } from "../../runtime/live-agent-manager.ts";
6
6
  import { LiveConversationOverlay } from "../../ui/live-conversation-overlay.ts";
7
7
  import { asCrewTheme } from "../../ui/theme-adapter.ts";
8
8
  // Lazy-loaded: DurableTranscriptViewer is 658ms — only needed for /crew transcript command
@@ -7,7 +7,7 @@ import { findRepoRoot, projectCrewRoot, userCrewRoot } from "../utils/paths.ts";
7
7
  import { activeRunEntries } from "../state/active-run-registry.ts";
8
8
  import { isSafePathId, resolveRealContainedPath } from "../utils/safe-paths.ts";
9
9
  import { sharedScanCache } from "../utils/scan-cache.ts";
10
- import { CancellationToken, createCancellationToken } from "../runtime/cancellation-token.ts";
10
+ import { createCancellationToken } from "../runtime/cancellation-token.ts";
11
11
 
12
12
  function readManifest(filePath: string): TeamRunManifest | undefined {
13
13
  const cached = sharedScanCache.readAndCache("manifests", filePath, filePath);
@@ -1,6 +1,5 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
- import { toolResult } from "../tool-result.ts";
4
3
  import { loadRunManifestById } from "../../state/state-store.ts";
5
4
  import type { TeamRunManifest, TeamTaskState } from "../../state/types.ts";
6
5
 
@@ -3,7 +3,6 @@ import type { PiTeamsToolResult } from "../tool-result.ts";
3
3
  import type { TeamToolParamsValue } from "../../schema/team-tool-schema.ts";
4
4
  import { result, type TeamContext } from "./context.ts";
5
5
  import { humanizeSchedule, nextRunTime, parseSchedule } from "../../runtime/scheduler.ts";
6
- import { loadConfig } from "../../config/config.ts";
7
6
  import { loadCrewSettings, saveCrewSettings } from "../../runtime/settings-store.ts";
8
7
 
9
8
  // Global key for cross-module scheduler access.
@@ -8,7 +8,6 @@ import { listRuns } from "../run-index.ts";
8
8
  import { readCrewAgents } from "../../runtime/crew-agent-records.ts";
9
9
  import {
10
10
  isActiveRunStatus,
11
- isFinishedRunStatus,
12
11
  hasStaleAsyncProcess,
13
12
  isLikelyOrphanedActiveRun,
14
13
  } from "../../runtime/process-status.ts";
@@ -8,7 +8,7 @@ import { registerActiveRun, unregisterActiveRun } from "../../state/active-run-r
8
8
  import { createRunManifest, loadRunManifestById, updateRunStatus } from "../../state/state-store.ts";
9
9
  import { atomicWriteJson } from "../../state/atomic-write.ts";
10
10
  import { validateWorkflowForTeam } from "../../workflows/validate-workflow.ts";
11
- import { PipelineRunner, type PipelineWorkflow, type PipelineStage } from "../../runtime/pipeline-runner.ts";
11
+ import { PipelineRunner, type PipelineWorkflow } from "../../runtime/pipeline-runner.ts";
12
12
  // Heavy runtime — lazy-loaded to avoid 1.4s import cost at extension registration.
13
13
  import type { executeTeamRun as ExecuteTeamRunFn } from "../../runtime/team-runner.ts";
14
14
  // eslint-disable-next-line @typescript-eslint/no-unused-vars -- type-only import for TS inference
@@ -24,7 +24,7 @@ async function executeTeamRun(...args: Parameters<typeof ExecuteTeamRunFn>): Pro
24
24
  return _cachedExecuteTeamRun(...args);
25
25
  }
26
26
  import { spawnBackgroundTeamRun } from "../../subagents/async-entry.ts";
27
- import { appendEvent, appendEventAsync, readEvents } from "../../state/event-log.ts";
27
+ import { appendEventAsync, readEvents } from "../../state/event-log.ts";
28
28
  import { resolveCrewRuntime, runtimeResolutionState } from "../../runtime/runtime-resolver.ts";
29
29
  import { normalizeSkillOverride } from "../../runtime/skill-instructions.ts";
30
30
  import { expandParallelResearchWorkflow } from "../../runtime/parallel-research.ts";
@@ -8,7 +8,7 @@ import { applyAttentionState, formatActivityAge, resolveCrewControlConfig } from
8
8
  import { readCrewAgents } from "../../runtime/crew-agent-records.ts";
9
9
  import { checkProcessLiveness, isActiveRunStatus } from "../../runtime/process-status.ts";
10
10
  import { formatTaskGraphLines, waitingReason } from "../../runtime/task-display.ts";
11
- import { verifyTaskCompletion, formatOutputPreview } from "../../runtime/completion-guard.ts";
11
+ import { verifyTaskCompletion } from "../../runtime/completion-guard.ts";
12
12
  import { evaluateRunEffectiveness } from "../../runtime/effectiveness.ts";
13
13
  import type { PiTeamsToolResult } from "../tool-result.ts";
14
14
  import { locateRunCwd } from "../team-tool.ts";
@@ -4,7 +4,6 @@ import type { AgentConfig } from "../agents/agent-config.ts";
4
4
  import {
5
5
  allAgents,
6
6
  discoverAgents,
7
- invalidateAgentDiscoveryCache,
8
7
  listDynamicAgents,
9
8
  registerDynamicAgent,
10
9
  unregisterDynamicAgent,
@@ -19,8 +18,8 @@ import {
19
18
  import type { executeTeamRun as _executeTeamRunFn } from "../runtime/team-runner.ts";
20
19
  import type { TeamToolParamsValue } from "../schema/team-tool-schema.ts";
21
20
  import { writeArtifact } from "../state/artifact-store.ts";
22
- import { appendEvent, readEvents } from "../state/event-log.ts";
23
- import { withRunLock, withRunLockSync } from "../state/locks.ts";
21
+ import { appendEvent } from "../state/event-log.ts";
22
+ import { withRunLock } from "../state/locks.ts";
24
23
  import { replayPendingMailboxMessages } from "../state/mailbox.ts";
25
24
  import {
26
25
  loadRunManifestById,
@@ -33,22 +32,15 @@ import type {
33
32
  TeamRunManifest,
34
33
  TeamTaskState,
35
34
  } from "../state/types.ts";
36
- import { aggregateUsage, formatUsage } from "../state/usage.ts";
37
35
  import { allTeams, discoverTeams } from "../teams/discover-teams.ts";
38
36
  import {
39
37
  allWorkflows,
40
38
  discoverWorkflows,
41
39
  } from "../workflows/discover-workflows.ts";
42
- import { validateWorkflowForTeam } from "../workflows/validate-workflow.ts";
43
- import { cleanupRunWorktrees } from "../worktree/cleanup.ts";
44
40
  import { piTeamsHelp } from "./help.ts";
45
- import { listImportedRuns } from "./import-index.ts";
46
41
  import { handleCreate, handleDelete, handleUpdate } from "./management.ts";
47
42
  import { initializeProject } from "./project-init.ts";
48
- import { exportRunBundle } from "./run-export.ts";
49
- import { importRunBundle } from "./run-import.ts";
50
43
  import { listRuns } from "./run-index.ts";
51
- import { pruneFinishedRuns } from "./run-maintenance.ts";
52
44
  import { formatRecommendation, recommendTeam } from "./team-recommendation.ts";
53
45
  import { handleSettings } from "./team-tool/handle-settings.ts";
54
46
  import type { PiTeamsToolResult } from "./tool-result.ts";
@@ -70,31 +62,12 @@ async function executeTeamRun(
70
62
  return _cachedExecuteTeamRun(...args);
71
63
  }
72
64
 
73
- import {
74
- applyAttentionState,
75
- formatActivityAge,
76
- resolveCrewControlConfig,
77
- } from "../runtime/agent-control.ts";
78
- import {
79
- readCrewAgents,
80
- recordFromTask,
81
- saveCrewAgents,
82
- } from "../runtime/crew-agent-records.ts";
83
65
  import { directTeamAndWorkflowFromRun } from "../runtime/direct-run.ts";
84
- import { writeForegroundInterruptRequest } from "../runtime/foreground-control.ts";
85
66
  import { parsePiJsonOutput } from "../runtime/pi-json-output.ts";
86
- import {
87
- checkProcessLiveness,
88
- isActiveRunStatus,
89
- } from "../runtime/process-status.ts";
90
67
  import {
91
68
  resolveCrewRuntime,
92
69
  runtimeResolutionState,
93
70
  } from "../runtime/runtime-resolver.ts";
94
- import {
95
- formatTaskGraphLines,
96
- waitingReason,
97
- } from "../runtime/task-display.ts";
98
71
  import { handleApi } from "./team-tool/api.ts";
99
72
  import {
100
73
  autonomousPatchFromConfig,
@@ -128,7 +101,6 @@ async function handleRun(
128
101
 
129
102
  import { waitForRun } from "../runtime/run-tracker.ts";
130
103
  import { normalizeSkillOverride } from "../runtime/skill-instructions.ts";
131
- import { logInternalError } from "../utils/internal-error.ts";
132
104
  import { searchAgents, searchTeams } from "../utils/bm25-search.ts";
133
105
  import { projectCrewRoot } from "../utils/paths.ts";
134
106
  import {
@@ -124,8 +124,18 @@ export class OTLPExporter implements MetricExporter {
124
124
  }
125
125
  }
126
126
 
127
- dispose(): void {
127
+ /**
128
+ * FIX (Round 23, resource cleanup): Make dispose() async and await the
129
+ * in-flight push so it completes (or aborts) before we return. The push
130
+ * itself is bounded by the 10s fetch timeout, so this won't hang
131
+ * indefinitely. Without this, dispose() would orphan an in-flight
132
+ * network request whose result is then discarded.
133
+ */
134
+ async dispose(): Promise<void> {
128
135
  if (this.timer) clearInterval(this.timer);
129
136
  this.timer = undefined;
137
+ if (this.inFlight) {
138
+ try { await this.inFlight; } catch { /* swallow — push() already logs errors */ }
139
+ }
130
140
  }
131
141
  }
@@ -1,5 +1,6 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
+ import { assertSafePathId } from "../utils/safe-paths.ts";
3
4
 
4
5
  export interface Checkpoint {
5
6
  runId: string;
@@ -51,12 +52,18 @@ export class FileCheckpointStore implements CheckpointStore {
51
52
  }
52
53
 
53
54
  save(checkpoint: Checkpoint): void {
55
+ // Validate taskId to prevent path traversal: the taskId is used to
56
+ // build a file path under this.checkpointDir(). Without validation, a
57
+ // malicious or buggy taskId like "../../../etc/passwd" could escape
58
+ // the checkpoints directory.
59
+ assertSafePathId("taskId", checkpoint.taskId);
54
60
  this.ensureDir();
55
61
  const p = this.checkpointPath(checkpoint.taskId);
56
62
  fs.writeFileSync(p, JSON.stringify(checkpoint, null, 2), "utf-8");
57
63
  }
58
64
 
59
65
  load(runId: string, taskId: string): Checkpoint | null {
66
+ assertSafePathId("taskId", taskId);
60
67
  const p = this.checkpointPath(taskId);
61
68
  if (!fs.existsSync(p)) return null;
62
69
 
@@ -71,6 +78,7 @@ export class FileCheckpointStore implements CheckpointStore {
71
78
  }
72
79
 
73
80
  delete(runId: string, taskId: string): void {
81
+ assertSafePathId("taskId", taskId);
74
82
  const p = this.checkpointPath(taskId);
75
83
  if (fs.existsSync(p)) {
76
84
  try {
@@ -139,6 +147,10 @@ export function saveCheckpoint(
139
147
  agentId: string,
140
148
  agentModel?: string,
141
149
  ): void {
150
+ // Validate both runId and taskId to prevent path traversal: these are
151
+ // used to build the file path under .crew/state/runs/<runId>/checkpoints/<taskId>.json.
152
+ assertSafePathId("runId", runId);
153
+ assertSafePathId("taskId", taskId);
142
154
  const checkpoint: Checkpoint = {
143
155
  runId,
144
156
  taskId,
@@ -160,6 +172,8 @@ export function saveCheckpoint(
160
172
  * Load a checkpoint for resuming.
161
173
  */
162
174
  export function loadCheckpoint(runId: string, taskId: string): Checkpoint | null {
175
+ assertSafePathId("runId", runId);
176
+ assertSafePathId("taskId", taskId);
163
177
  const stateRoot = path.join(process.cwd(), ".crew/state/runs", runId);
164
178
  const store = getCheckpointStore(stateRoot);
165
179
  return store.load(runId, taskId);
@@ -169,6 +183,8 @@ export function loadCheckpoint(runId: string, taskId: string): Checkpoint | null
169
183
  * Delete a checkpoint after successful completion.
170
184
  */
171
185
  export function clearCheckpoint(runId: string, taskId: string): void {
186
+ assertSafePathId("runId", runId);
187
+ assertSafePathId("taskId", taskId);
172
188
  const stateRoot = path.join(process.cwd(), ".crew/state/runs", runId);
173
189
  const store = getCheckpointStore(stateRoot);
174
190
  store.delete(runId, taskId);
@@ -178,6 +194,8 @@ export function clearCheckpoint(runId: string, taskId: string): void {
178
194
  * Check if a checkpoint exists for a task.
179
195
  */
180
196
  export function hasCheckpoint(runId: string, taskId: string): boolean {
197
+ assertSafePathId("runId", runId);
198
+ assertSafePathId("taskId", taskId);
181
199
  const stateRoot = path.join(process.cwd(), ".crew/state/runs", runId);
182
200
  const store = getCheckpointStore(stateRoot);
183
201
  return store.hasCheckpoint(runId, taskId);
@@ -187,6 +205,7 @@ export function hasCheckpoint(runId: string, taskId: string): boolean {
187
205
  * List all checkpoints for a run.
188
206
  */
189
207
  export function listCheckpoints(runId: string): Checkpoint[] {
208
+ assertSafePathId("runId", runId);
190
209
  const stateRoot = path.join(process.cwd(), ".crew/state/runs", runId);
191
210
  const store = getCheckpointStore(stateRoot);
192
211
  return store.list(runId);