pi-crew 0.5.9 → 0.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,38 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.5.10] — Round 15 Audit Fixes (2026-06-02)
4
+
5
+ ### Phase 1: Semaphore Queue Cap (HIGH)
6
+ - **H1**: `src/runtime/semaphore.ts:11` - `#queue` unbounded growth → added `MAX_QUEUE = 10_000` cap. `acquire()` now throws "Semaphore queue full" when at cap.
7
+
8
+ ### Phase 2: Observability Hardening (MEDIUM)
9
+ - **L1**: `src/observability/event-bus.ts:47` - `console.error` → `logInternalError` for consistency
10
+ - **OTLPExporter**:
11
+ - Added `MAX_SNAPSHOTS_PER_PUSH = 5_000` cap to prevent OOM/oversized payloads
12
+ - Added `inFlight` promise tracking in `start()` to prevent overlapping setInterval pushes
13
+ - **live-agent-manager**: Added `MAX_LIVE_AGENTS = 5_000` cap. `registerLiveAgent()` now evicts oldest completed agent first; if none, evicts oldest running with warning.
14
+
15
+ ### Phase 3: Test Coverage (LOW)
16
+ - Added first-ever test coverage for `src/observability/`:
17
+ - 8 new tests in `test/unit/observability.test.ts` covering metric-registry, correlation, OTLP conversion
18
+ - Reveals new finding: `crew.<domain>.<measure>` naming pattern enforcement is good (already validated)
19
+
20
+ ### Regression: Team-Runner Heartbeat (CRITICAL)
21
+ - **CRITICAL regression** discovered via background watcher notification
22
+ - `team-runner.ts` had NO periodic heartbeat, so any team run >5 min was being marked stale by the reconciler
23
+ - Root cause of Round 15 review cancellation
24
+ - Added `startTeamRunHeartbeat()` helper - writes `heartbeat.json` to stateRoot every 30s
25
+ - Wired into `executeTeamRun()` with start/stop on both success and error paths
26
+ - Same JSON shape as background-runner for reconciler compatibility
27
+
28
+ ### Tests
29
+ - 2311 tests pass / 0 failures (was 2297 in v0.5.9)
30
+ - +14 new tests across 3 new test files:
31
+ - `test/unit/team-runner-heartbeat.test.ts` (2 tests)
32
+ - `test/unit/round15-observability.test.ts` (4 tests)
33
+ - `test/unit/observability.test.ts` (8 tests)
34
+ - TypeScript: 0 errors
35
+
3
36
  ## [0.5.9] — Round 14 Audit Fixes (2026-06-02)
4
37
 
5
38
  ### Phase 1: Sandbox Security (3 CRITICAL fixes)
package/README.md CHANGED
@@ -9,7 +9,7 @@ npm: pi-crew
9
9
  repo: https://github.com/baphuongna/pi-crew
10
10
  ```
11
11
 
12
- **v0.5.9**: See [CHANGELOG.md](CHANGELOG.md).
12
+ **v0.5.10**: See [CHANGELOG.md](CHANGELOG.md).
13
13
 
14
14
  ### Security highlights (v0.5.5)
15
15
 
@@ -0,0 +1,60 @@
1
+ # pi-crew v0.5.10 — Round 15 Audit Fix Plan (2026-06-02)
2
+
3
+ **Source**: Round 15 dogfooding review (partial — explorer completed, reviewer/security-reviewer cancelled due to stale run reconciliation).
4
+
5
+ **Findings verified from source**: 9 → 5 confirmed real, 4 false positives.
6
+
7
+ ## Verification Summary
8
+
9
+ | Status | Count |
10
+ |--------|-------|
11
+ | ✅ CONFIRMED (real issue) | 5 |
12
+ | ❌ FALSE POSITIVE | 4 |
13
+
14
+ ### False Positives Identified
15
+ - **M2** (`register.ts` autoRepairTimer race): Code already guards with `cleanedUp || !currentCtx` checks
16
+ - **M3** (`dynamic-script-runner.ts` walkNode type guard): Only runs on parsed acorn AST (parser guarantees `type: string`)
17
+ - **H3** (event-log asyncQueues eviction): Already addressed in Round 14 — entries are deleted on success/error
18
+ - **H2** (benchmark validateCommand footgun): Reviewer misread the validation flow
19
+
20
+ ### Real Issues Confirmed (5)
21
+
22
+ 1. **H1**: `Semaphore.#queue` unbounded growth (`src/runtime/semaphore.ts:11`)
23
+ 2. **L1**: `EventBus.emit` uses `console.error` instead of `logInternalError` (`src/observability/event-bus.ts:47`)
24
+ 3. **NEW**: `OTLPExporter.convertToOTLP` no size cap on snapshots (`src/observability/exporters/otlp-exporter.ts:33`)
25
+ 4. **NEW**: `OTLPExporter` `setInterval` can overlap if `push` is slow (no in-flight check)
26
+ 5. **NEW**: `hooks/registry.ts` Map unbounded; `Object.assign(ctx, result.data)` without validation
27
+
28
+ ## Plan: 3 small fixes
29
+
30
+ ### Phase 1: Semaphore Queue Cap (HIGH)
31
+ - **H1**: Add `MAX_QUEUE = 10_000` cap to `Semaphore.#queue`. Reject with error when full.
32
+
33
+ **Files**: `src/runtime/semaphore.ts`
34
+
35
+ ### Phase 2: Observability Hardening (MEDIUM)
36
+ - **L1**: Replace `console.error` with `logInternalError` in `EventBus.emit`
37
+ - **OTLP size**: Add snapshots.length cap + in-flight check in `OTLPExporter`
38
+ - **Hook registry**: Add `clearHooks` after run, validate `result.data` keys
39
+
40
+ **Files**: `src/observability/event-bus.ts`, `src/observability/exporters/otlp-exporter.ts`, `src/hooks/registry.ts`
41
+
42
+ ### Phase 3: Test Coverage (LOW)
43
+ - Add basic tests for `observability/` (metric-registry, metric-sink, OTLP converter)
44
+ - Add tests for `Semaphore` queue cap
45
+
46
+ **Files**: new test files in `test/unit/`
47
+
48
+ ## Expected Outcomes
49
+
50
+ - 5/5 confirmed issues fixed
51
+ - Tests: 2300+ pass (5+ new tests)
52
+ - TypeScript: 0 errors
53
+ - v0.5.10 release
54
+
55
+ ## Backlog (deferred)
56
+
57
+ - `console.log/error` in `background-runner.ts` — debug logging, intentional
58
+ - `console.warn` in `discover-agents.ts` — informational
59
+ - Full OTLP wire format compliance — out of scope
60
+ - Hook `Object.assign` — needs design discussion
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-crew",
3
- "version": "0.5.9",
3
+ "version": "0.5.10",
4
4
  "description": "Pi extension for coordinated AI teams, workflows, worktrees, and async task orchestration",
5
5
  "author": "baphuongna",
6
6
  "license": "MIT",
@@ -1,3 +1,4 @@
1
+ import { logInternalError } from "../utils/internal-error.ts";
1
2
  import type { AgentProgress } from "../runtime/progress-tracker.ts";
2
3
 
3
4
  export type CrewEventType =
@@ -44,7 +45,11 @@ class EventBus {
44
45
  try {
45
46
  listener(event);
46
47
  } catch (e) {
47
- console.error("[EventBus] Listener error:", e);
48
+ // FIX (Round 15, L1): Use logInternalError for consistency with
49
+ // the rest of the codebase. Previously console.error may not be
50
+ // visible in all environments (e.g. JSON-RPC mode, redirected
51
+ // stderr).
52
+ logInternalError("event-bus.listener", e, `type=${event.type} runId=${event.runId}`);
48
53
  }
49
54
  }
50
55
  }
@@ -8,6 +8,13 @@ import type { MetricExporter } from "./adapter.ts";
8
8
 
9
9
  const gzipAsync = promisify(gzip);
10
10
 
11
+ // FIX (Round 15): Cap the number of snapshots per push to prevent OOM when
12
+ // the metric registry has grown large. The OTLP HTTP spec allows many metrics
13
+ // in one payload, but a single push > 10_000 metrics would balloon the
14
+ // request body (gzipped or not) and likely exceed the collector's request
15
+ // size limit.
16
+ const MAX_SNAPSHOTS_PER_PUSH = 5_000;
17
+
11
18
  export interface OTLPExporterOptions {
12
19
  endpoint: string;
13
20
  headers?: Record<string, string>;
@@ -57,6 +64,9 @@ export function convertToOTLP(snapshots: MetricSnapshot[]): unknown {
57
64
  export class OTLPExporter implements MetricExporter {
58
65
  name = "otlp";
59
66
  private timer?: ReturnType<typeof setInterval>;
67
+ // FIX (Round 15): Track in-flight pushes so a slow network cannot cause
68
+ // the setInterval to overlap and pile up concurrent requests.
69
+ private inFlight: Promise<void> | null = null;
60
70
  private readonly opts: OTLPExporterOptions;
61
71
  private readonly registry: MetricRegistry;
62
72
 
@@ -67,12 +77,27 @@ export class OTLPExporter implements MetricExporter {
67
77
 
68
78
  start(): void {
69
79
  this.dispose();
70
- this.timer = setInterval(() => { void this.push(this.registry.snapshot()); }, this.opts.intervalMs ?? 60_000);
80
+ this.timer = setInterval(() => {
81
+ // Skip if a previous push is still running; the next tick will retry.
82
+ if (this.inFlight) return;
83
+ const snap = this.registry.snapshot();
84
+ this.inFlight = this.push(snap).finally(() => { this.inFlight = null; });
85
+ }, this.opts.intervalMs ?? 60_000);
71
86
  this.timer.unref();
72
87
  }
73
88
 
74
89
  async push(snapshots: MetricSnapshot[]): Promise<void> {
75
90
  try {
91
+ // FIX (Round 15): Cap snapshots to a safe size to avoid OOM and
92
+ // oversized HTTP payloads. Log a warning if we are truncating.
93
+ let toSend = snapshots;
94
+ if (snapshots.length > MAX_SNAPSHOTS_PER_PUSH) {
95
+ logInternalError(
96
+ "otlp-export-cap",
97
+ new Error(`Snapshot count ${snapshots.length} exceeds cap ${MAX_SNAPSHOTS_PER_PUSH}; truncating`),
98
+ );
99
+ toSend = snapshots.slice(0, MAX_SNAPSHOTS_PER_PUSH);
100
+ }
76
101
  const timeoutMs = this.opts.timeoutMs ?? 10_000;
77
102
  const controller = new AbortController();
78
103
  const timer = setTimeout(() => controller.abort(), timeoutMs);
@@ -80,7 +105,7 @@ export class OTLPExporter implements MetricExporter {
80
105
  // 4.2: gzip body. OTLP HTTP exporters of every flavour accept
81
106
  // `content-encoding: gzip`; collectors expect uncompressed JSON
82
107
  // otherwise. Saves bandwidth on metric-heavy runs (often 3-5x).
83
- const json = JSON.stringify(convertToOTLP(snapshots));
108
+ const json = JSON.stringify(convertToOTLP(toSend));
84
109
  const body = await gzipAsync(Buffer.from(json));
85
110
  const response = await fetch(this.opts.endpoint, {
86
111
  method: "POST",
@@ -63,6 +63,12 @@ export interface LiveAgentHandle {
63
63
  }
64
64
 
65
65
  const liveAgents = new Map<string, LiveAgentHandle>();
66
+ // FIX (Round 15): Cap the number of tracked live agents to prevent unbounded
67
+ // growth if a caller spawns agents but fails to unregister them. When the
68
+ // cap is reached, the oldest completed agent is evicted first; if no
69
+ // completed agents are present, the oldest running one is evicted (with a
70
+ // warning) to keep memory bounded.
71
+ const MAX_LIVE_AGENTS = 5_000;
66
72
 
67
73
  /**
68
74
  * List all live agents for a specific workspace.
@@ -100,6 +106,22 @@ export function registerLiveAgent(input: Omit<LiveAgentHandle, "createdAt" | "up
100
106
  modelName: undefined,
101
107
  },
102
108
  };
109
+ // FIX (Round 15): Enforce the live-agent cap before adding. Prefer to
110
+ // evict the oldest completed agent (already finished, so caller no
111
+ // longer needs it). If none exist, evict the oldest running one with
112
+ // a warning so memory stays bounded.
113
+ if (liveAgents.size >= MAX_LIVE_AGENTS) {
114
+ const completed = [...liveAgents.entries()].find(([, h]) => h.activity.completedAtMs > 0);
115
+ if (completed) {
116
+ liveAgents.delete(completed[0]);
117
+ } else {
118
+ const oldestKey = liveAgents.keys().next().value;
119
+ if (oldestKey !== undefined) {
120
+ logInternalError("live-agent-manager.cap", new Error(`liveAgents at cap ${MAX_LIVE_AGENTS}; evicting oldest ${oldestKey}`));
121
+ liveAgents.delete(oldestKey);
122
+ }
123
+ }
124
+ }
103
125
  liveAgents.set(input.agentId, handle);
104
126
  try { if (eventLogFn && eventsPath) eventLogFn(eventsPath, { type: "live_agent.registered", runId: input.runId, taskId: input.taskId, message: `Live agent registered: ${input.agent} (${input.role})`, data: { agentId: input.agentId, role: input.role, agent: input.agent, workspaceId: input.workspaceId } }); } catch { /* non-critical */ }
105
127
  if (handle.pendingSteers.length && typeof handle.session.steer === "function") {
@@ -16,6 +16,9 @@ export class Semaphore {
16
16
  #max: number;
17
17
  #current = 0;
18
18
  #queue: Array<() => void> = [];
19
+ // FIX (Round 15): Cap the waiter queue to prevent unbounded memory growth
20
+ // if the semaphore is held for a long period and many tasks accumulate.
21
+ static readonly MAX_QUEUE = 10_000;
19
22
 
20
23
  constructor(max: number) {
21
24
  this.#max = Math.max(1, max);
@@ -26,6 +29,14 @@ export class Semaphore {
26
29
  this.#current++;
27
30
  return;
28
31
  }
32
+ // FIX (Round 15): Reject when the waiter queue is full. The previous
33
+ // implementation let #queue grow without bound, risking memory
34
+ // exhaustion under sustained high concurrency with slow releases.
35
+ if (this.#queue.length >= Semaphore.MAX_QUEUE) {
36
+ throw new Error(
37
+ `Semaphore queue full: ${this.#queue.length} waiters (max ${Semaphore.MAX_QUEUE}); cannot acquire slot`,
38
+ );
39
+ }
29
40
  const { promise, resolve } = (() => {
30
41
  let res: () => void;
31
42
  const p = new Promise<void>((r) => { res = r; });
@@ -1,4 +1,5 @@
1
1
  import * as fs from "node:fs";
2
+ import * as path from "node:path";
2
3
  import type { AgentConfig } from "../agents/agent-config.ts";
3
4
  import type { CrewLimitsConfig, CrewRuntimeConfig, CrewReliabilityConfig } from "../config/config.ts";
4
5
  import type { CrewRuntimeCapabilities } from "./runtime-resolver.ts";
@@ -38,6 +39,36 @@ import { CrewCancellationError, buildSyntheticTerminalEvidence, cancellationReas
38
39
  import { effectivenessPolicyDecision, evaluateRunEffectiveness, formatRunEffectivenessLines } from "./effectiveness.ts";
39
40
  import { logInternalError } from "../utils/internal-error.ts";
40
41
 
42
+ /**
43
+ * Start a periodic heartbeat for the team-level run.
44
+ *
45
+ * The stale reconciler (src/runtime/stale-reconciler.ts) marks runs as failed
46
+ * if their heartbeat is older than `NO_PID_HEARTBEAT_STALE_MS` (5 minutes).
47
+ * Without this, long-running team runs (e.g. multi-phase workflows) get
48
+ * cancelled by the reconciler as "stale" even when they are actively
49
+ * executing. The team-runner has no periodic heartbeat today, so any
50
+ * team run lasting >5min is at risk.
51
+ */
52
+ function startTeamRunHeartbeat(stateRoot: string, runId: string): () => void {
53
+ const heartbeatPath = path.join(stateRoot, "heartbeat.json");
54
+ const writeHeartbeat = (): void => {
55
+ try {
56
+ fs.writeFileSync(heartbeatPath, JSON.stringify({
57
+ pid: process.pid,
58
+ at: Date.now(),
59
+ runId,
60
+ kind: "team-runner",
61
+ }), "utf-8");
62
+ } catch {
63
+ // best-effort
64
+ }
65
+ };
66
+ writeHeartbeat();
67
+ const interval = setInterval(writeHeartbeat, 30_000);
68
+ interval.unref();
69
+ return () => clearInterval(interval);
70
+ }
71
+
41
72
  export interface ExecuteTeamRunInput {
42
73
  manifest: TeamRunManifest;
43
74
  tasks: TeamTaskState[];
@@ -271,12 +302,20 @@ export async function executeTeamRun(input: ExecuteTeamRunInput): Promise<{ mani
271
302
 
272
303
  void registerRunPromise(manifest.runId);
273
304
 
305
+ // FIX (Round 15, regression): Start a team-level heartbeat so the stale
306
+ // reconciler does not cancel long-running team runs after 5 minutes
307
+ // (NO_PID_HEARTBEAT_STALE_MS). Previously only sub-task runners wrote
308
+ // heartbeats; the team-level run had no heartbeat, so any multi-phase
309
+ // workflow lasting >5min was marked stale and cancelled.
310
+ const stopTeamHeartbeat = startTeamRunHeartbeat(manifest.stateRoot, manifest.runId);
311
+
274
312
  const cleanupUsage = (): void => {
275
313
  for (const task of input.tasks) clearTrackedTaskUsage(task.id);
276
314
  };
277
315
 
278
316
  try {
279
317
  const result = await executeTeamRunCore(input, manifest, workflow);
318
+ stopTeamHeartbeat();
280
319
  resolveRunPromise(manifest.runId, result);
281
320
  cleanupUsage();
282
321
  // Terminate live agents for this run — agents are done when the run ends.
@@ -318,6 +357,7 @@ export async function executeTeamRun(input: ExecuteTeamRunInput): Promise<{ mani
318
357
  rejectRunPromise(manifest.runId, error instanceof Error ? error : new Error(message));
319
358
  crewHooks.emit({ type: "run_failed", timestamp: new Date().toISOString(), runId: manifest.runId, data: { status: manifest.status, error: message } });
320
359
  cleanupUsage();
360
+ stopTeamHeartbeat();
321
361
  return result;
322
362
  }
323
363
  }