pi-crew 0.5.2 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/CHANGELOG.md +183 -0
  2. package/README.md +17 -1
  3. package/docs/architecture.md +2 -0
  4. package/docs/bugs/cross-session-notification-leakage.md +82 -0
  5. package/docs/coding-agent-optimization.md +268 -0
  6. package/docs/deep-review-report.md +384 -0
  7. package/docs/distillation/cybersecurity-patterns.md +294 -0
  8. package/docs/migration-v0.4-v0.5.md +208 -0
  9. package/docs/optimization-plan.md +642 -0
  10. package/docs/pi-crew-v0.5.5-audit-fix-plan.md +133 -0
  11. package/docs/pi-mono-opportunities.md +969 -0
  12. package/docs/pi-mono-review.md +291 -0
  13. package/docs/skills/REFERENCE.md +144 -0
  14. package/package.json +12 -9
  15. package/skills/artifact-analysis-loop/SKILL.md +302 -0
  16. package/skills/async-worker-recovery/SKILL.md +19 -1
  17. package/skills/child-pi-spawning/SKILL.md +19 -6
  18. package/skills/context-artifact-hygiene/SKILL.md +19 -2
  19. package/skills/delegation-patterns/SKILL.md +68 -3
  20. package/skills/detection-pipeline-design/SKILL.md +285 -0
  21. package/skills/event-log-tracing/SKILL.md +20 -6
  22. package/skills/git-master/SKILL.md +20 -6
  23. package/skills/hunting-investigation-loop/SKILL.md +401 -0
  24. package/skills/incident-playbook-construction/SKILL.md +383 -0
  25. package/skills/live-agent-lifecycle/SKILL.md +20 -6
  26. package/skills/mailbox-interactive/SKILL.md +19 -6
  27. package/skills/model-routing-context/SKILL.md +19 -1
  28. package/skills/multi-perspective-review/SKILL.md +19 -4
  29. package/skills/observability-reliability/SKILL.md +19 -2
  30. package/skills/orchestration/SKILL.md +20 -2
  31. package/skills/ownership-session-security/SKILL.md +20 -2
  32. package/skills/pi-extension-lifecycle/SKILL.md +20 -2
  33. package/skills/post-mortem/SKILL.md +7 -2
  34. package/skills/read-only-explorer/SKILL.md +20 -6
  35. package/skills/requirements-to-task-packet/SKILL.md +23 -3
  36. package/skills/resource-discovery-config/SKILL.md +20 -2
  37. package/skills/runtime-state-reader/SKILL.md +20 -2
  38. package/skills/safe-bash/SKILL.md +21 -6
  39. package/skills/scrutinize/SKILL.md +20 -2
  40. package/skills/secure-agent-orchestration-review/SKILL.md +29 -2
  41. package/skills/security-review/SKILL.md +560 -0
  42. package/skills/state-mutation-locking/SKILL.md +22 -2
  43. package/skills/systematic-debugging/SKILL.md +8 -6
  44. package/skills/threat-hypothesis-framework/SKILL.md +175 -0
  45. package/skills/ui-render-performance/SKILL.md +20 -2
  46. package/skills/verification-before-done/SKILL.md +17 -2
  47. package/skills/widget-rendering/SKILL.md +21 -6
  48. package/skills/workspace-isolation/SKILL.md +20 -6
  49. package/skills/worktree-isolation/SKILL.md +20 -6
  50. package/src/agents/agent-config.ts +40 -1
  51. package/src/benchmark/benchmark-runner.ts +45 -0
  52. package/src/benchmark/feedback-loop.ts +5 -0
  53. package/src/config/config.ts +32 -5
  54. package/src/config/role-tools.ts +82 -0
  55. package/src/config/suggestions.ts +8 -0
  56. package/src/config/types.ts +4 -0
  57. package/src/extension/async-notifier.ts +10 -1
  58. package/src/extension/crew-cleanup.ts +114 -0
  59. package/src/extension/cross-extension-rpc.ts +1 -1
  60. package/src/extension/notification-router.ts +18 -0
  61. package/src/extension/register.ts +27 -19
  62. package/src/extension/registration/subagent-tools.ts +1 -1
  63. package/src/extension/team-tool/anchor.ts +201 -0
  64. package/src/extension/team-tool/api.ts +2 -1
  65. package/src/extension/team-tool/auto-summarize.ts +154 -0
  66. package/src/extension/team-tool/run.ts +42 -7
  67. package/src/extension/team-tool.ts +44 -2
  68. package/src/hooks/registry.ts +1 -3
  69. package/src/observability/event-bus.ts +69 -0
  70. package/src/observability/event-to-metric.ts +0 -2
  71. package/src/runtime/anchor-manager.ts +473 -0
  72. package/src/runtime/async-runner.ts +8 -4
  73. package/src/runtime/auto-summarize.ts +350 -0
  74. package/src/runtime/background-runner.ts +10 -3
  75. package/src/runtime/budget-tracker.ts +354 -0
  76. package/src/runtime/chain-runner.ts +507 -0
  77. package/src/runtime/child-pi.ts +123 -35
  78. package/src/runtime/crash-recovery.ts +5 -4
  79. package/src/runtime/crew-agent-runtime.ts +1 -0
  80. package/src/runtime/custom-tools/irc-tool.ts +13 -0
  81. package/src/runtime/custom-tools/submit-result-tool.ts +3 -2
  82. package/src/runtime/delivery-coordinator.ts +10 -3
  83. package/src/runtime/dynamic-script-runner.ts +482 -0
  84. package/src/runtime/foreground-control.ts +87 -17
  85. package/src/runtime/handoff-manager.ts +589 -0
  86. package/src/runtime/hidden-handoff.ts +424 -0
  87. package/src/runtime/live-agent-manager.ts +20 -4
  88. package/src/runtime/live-session-runtime.ts +39 -4
  89. package/src/runtime/manifest-cache.ts +2 -1
  90. package/src/runtime/model-resolver.ts +16 -4
  91. package/src/runtime/phase-tracker.ts +373 -0
  92. package/src/runtime/pi-args.ts +11 -1
  93. package/src/runtime/pi-json-output.ts +31 -0
  94. package/src/runtime/pipeline-runner.ts +514 -0
  95. package/src/runtime/progress-tracker.ts +124 -0
  96. package/src/runtime/retry-runner.ts +354 -0
  97. package/src/runtime/sandbox.ts +252 -0
  98. package/src/runtime/scheduler.ts +7 -2
  99. package/src/runtime/skill-effectiveness.ts +473 -0
  100. package/src/runtime/skill-instructions.ts +37 -3
  101. package/src/runtime/subagent-manager.ts +1 -1
  102. package/src/runtime/task-graph.ts +11 -1
  103. package/src/runtime/task-runner.ts +92 -18
  104. package/src/runtime/team-runner.ts +13 -12
  105. package/src/runtime/tool-progress.ts +10 -3
  106. package/src/runtime/verification-gates.ts +367 -0
  107. package/src/schema/team-tool-schema.ts +37 -0
  108. package/src/skills/discover-skills.ts +5 -0
  109. package/src/state/active-run-registry.ts +9 -2
  110. package/src/state/contracts.ts +9 -0
  111. package/src/state/crew-init.ts +3 -3
  112. package/src/state/decision-ledger.ts +98 -55
  113. package/src/state/event-log-rotation.ts +2 -2
  114. package/src/state/event-log.ts +144 -10
  115. package/src/state/hook-instinct-bridge.ts +5 -5
  116. package/src/state/mailbox.ts +10 -0
  117. package/src/state/run-cache.ts +18 -8
  118. package/src/state/state-store.ts +3 -1
  119. package/src/state/types.ts +4 -0
  120. package/src/tools/safe-bash-extension.ts +1 -0
  121. package/src/tools/safe-bash.ts +152 -20
  122. package/src/types/new-api-types.ts +34 -0
  123. package/src/ui/agent-management-overlay.ts +5 -1
  124. package/src/ui/crew-widget.ts +29 -15
  125. package/src/ui/overlays/mailbox-detail-overlay.ts +13 -2
  126. package/src/ui/powerbar-publisher.ts +101 -7
  127. package/src/ui/tool-render.ts +15 -15
  128. package/src/ui/transcript-cache.ts +13 -0
  129. package/src/utils/bm25-search.ts +16 -8
  130. package/src/utils/env-filter.ts +8 -5
  131. package/src/utils/redaction.ts +169 -15
  132. package/src/utils/session-utils.ts +52 -0
  133. package/src/utils/sse-parser.ts +10 -1
  134. package/src/worktree/cleanup.ts +6 -1
  135. package/src/worktree/worktree-manager.ts +32 -13
  136. package/workflows/chain.workflow.md +252 -0
  137. package/workflows/pipeline.workflow.md +27 -0
@@ -8,8 +8,9 @@ import { getPiSpawnCommand } from "./pi-spawn.ts";
8
8
  import { DEFAULT_CHILD_PI } from "../config/defaults.ts";
9
9
  import { logInternalError } from "../utils/internal-error.ts";
10
10
  import { attachPostExitStdioGuard, trySignalChild } from "./post-exit-stdio-guard.ts";
11
- import { redactJsonLine, SECRET_KEY_PATTERN } from "../utils/redaction.ts";
11
+ import { redactJsonLine, isSecretKey } from "../utils/redaction.ts";
12
12
  import { sanitizeEnvSecrets } from "../utils/env-filter.ts";
13
+ import { registerChildProcess, unregisterChildProcess } from "../extension/crew-cleanup.ts";
13
14
 
14
15
  const POST_EXIT_STDIO_GUARD_MS = DEFAULT_CHILD_PI.postExitStdioGuardMs;
15
16
  const FINAL_DRAIN_MS = DEFAULT_CHILD_PI.finalDrainMs;
@@ -117,6 +118,8 @@ export interface ChildPiLifecycleEvent {
117
118
  error?: string;
118
119
  /** Stderr captured at timeout moment (for response_timeout events). */
119
120
  stderr?: string;
121
+ /** Last N chars of stderr for error context (exit/error events). */
122
+ stderrExcerpt?: string;
120
123
  /** Timestamp (ISO). */
121
124
  ts: string;
122
125
  }
@@ -146,6 +149,16 @@ export interface ChildPiRunInput {
146
149
  parentContext?: string;
147
150
  /** When true, prepend parentContext to the task prompt. */
148
151
  inheritContext?: boolean;
152
+ /** Pass to pi to mark certain commands as context-excluded. Default: false */
153
+ excludeContextBash?: boolean;
154
+ /** pi session ID for session naming (aligns with pi-crew run ID) */
155
+ sessionId?: string;
156
+ /** Run ID for cleanup tracking */
157
+ runId?: string;
158
+ /** Agent ID for cleanup tracking */
159
+ agentId?: string;
160
+ /** Role for tool restrictions (from role-tools.ts) */
161
+ role?: string;
149
162
  }
150
163
 
151
164
  export interface ChildPiRunResult {
@@ -168,18 +181,24 @@ export function buildChildPiSpawnOptions(cwd: string, env: NodeJS.ProcessEnv): S
168
181
  // Bug #12 fix: essential env vars (PATH, HOME, etc.) are always preserved so child can find npm/node.
169
182
  const filteredEnv = sanitizeEnvSecrets(env, {
170
183
  allowList: [
171
- // Model provider API keys (these are safe to pass they're meant for API calls)
172
- "MINIMAX_*",
173
- "OPENAI_*",
174
- "ANTHROPIC_*",
175
- "GOOGLE_*",
176
- "AZURE_*",
177
- "AWS_*",
178
- "ZEU_*",
179
- "ZERODEV_*",
180
- "*_API_KEY",
181
- "*_TOKEN",
182
- "*_SECRET",
184
+ // Model provider API keys (explicit listdo NOT use wildcards)
185
+ "MINIMAX_API_KEY",
186
+ "MINIMAX_GROUP_ID",
187
+ "OPENAI_API_KEY",
188
+ "OPENAI_ORG_ID",
189
+ "ANTHROPIC_API_KEY",
190
+ "GOOGLE_API_KEY",
191
+ "GOOGLE_GENERATIVE_LANGUAGE_API_KEY",
192
+ "AZURE_OPENAI_API_KEY",
193
+ "AZURE_OPENAI_ENDPOINT",
194
+ "AWS_ACCESS_KEY_ID",
195
+ "AWS_SECRET_ACCESS_KEY",
196
+ "AWS_REGION",
197
+ "ZEU_API_KEY",
198
+ "ZERODEV_API_KEY",
199
+ // SECURITY FIX: Removed dangerous wildcards "*_API_KEY", "*_TOKEN", "*_SECRET"
200
+ // These patterns would leak ALL secrets matching the pattern to child processes.
201
+ // Only add specific, intended provider keys above.
183
202
  // Essential non-secret vars for child process to function
184
203
  "PATH",
185
204
  "HOME",
@@ -368,23 +387,31 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
368
387
  if (depth.blocked) return { exitCode: 1, stdout: "", stderr: `pi-crew depth guard blocked child worker: depth ${depth.depth} >= max ${depth.maxDepth}` };
369
388
  const mock = process.env.PI_TEAMS_MOCK_CHILD_PI;
370
389
  if (mock) {
390
+ // SECURITY: Log mock mode activation prominently for audit trail
391
+ console.warn(`[⚠️ PI_CREW_MOCK_MODE] Mock mode active: ${mock} — NOT running real agents!`);
392
+ // SECURITY FIX: Require PI_CREW_ALLOW_MOCK alongside PI_TEAMS_MOCK_CHILD_PI
393
+ const allowMock = process.env.PI_CREW_ALLOW_MOCK === "1" || process.env.PI_CREW_ALLOW_MOCK === "true";
394
+ if (!allowMock) {
395
+ console.error(`[🚨 PI_CREW_MOCK_MODE] SECURITY: PI_TEAMS_MOCK_CHILD_PI is set but PI_CREW_ALLOW_MOCK is not "1". Ignoring mock request for safety.`);
396
+ return { exitCode: 1, stdout: "", stderr: "Mock mode requires PI_CREW_ALLOW_MOCK=1 alongside PI_TEAMS_MOCK_CHILD_PI" };
397
+ }
371
398
  if (mock === "success") {
372
- const stdout = `Mock child Pi success for ${input.agent.name}\n`;
399
+ const stdout = `[MOCK] Success for ${input.agent.name}\n`;
373
400
  observeStdoutChunk(input, stdout);
374
401
  return { exitCode: 0, stdout, stderr: "" };
375
402
  }
376
403
  if (mock === "json-success" || mock === "adaptive-plan") {
377
404
  const text = mock === "adaptive-plan" && effectiveTask.includes("ADAPTIVE_PLAN_JSON_START")
378
- ? `Adaptive mock plan\nADAPTIVE_PLAN_JSON_START\n${JSON.stringify({ phases: [{ name: "research", tasks: [{ role: "explorer", task: "Explore adaptive target" }, { role: "analyst", task: "Analyze adaptive target" }, { role: "planner", task: "Plan adaptive target" }] }, { name: "build", tasks: [{ role: "executor", task: "Implement adaptive target" }] }, { name: "check", tasks: [{ role: "reviewer", task: "Review adaptive target" }, { role: "test-engineer", task: "Test adaptive target" }, { role: "writer", task: "Summarize adaptive target" }] }] })}\nADAPTIVE_PLAN_JSON_END`
379
- : `Mock JSON success for ${input.agent.name}`;
405
+ ? `[MOCK] Adaptive plan\nADAPTIVE_PLAN_JSON_START\n${JSON.stringify({ phases: [{ name: "research", tasks: [{ role: "explorer", task: "Explore adaptive target" }, { role: "analyst", task: "Analyze adaptive target" }, { role: "planner", task: "Plan adaptive target" }] }, { name: "build", tasks: [{ role: "executor", task: "Implement adaptive target" }] }, { name: "check", tasks: [{ role: "reviewer", task: "Review adaptive target" }, { role: "test-engineer", task: "Test adaptive target" }, { role: "writer", task: "Summarize adaptive target" }] }] })}\nADAPTIVE_PLAN_JSON_END`
406
+ : `[MOCK] JSON success for ${input.agent.name}`;
380
407
  const stdout = `${JSON.stringify({ type: "message", message: { role: "assistant", content: [{ type: "text", text }] } })}\n${JSON.stringify({ type: "message_end", usage: { input: 10, output: 5, cost: 0.001, turns: 1 } })}\n`;
381
408
  observeStdoutChunk(input, stdout);
382
409
  return { exitCode: 0, stdout, stderr: "" };
383
410
  }
384
- if (mock === "retryable-failure") return { exitCode: 1, stdout: "", stderr: "rate limit: mock failure" };
385
- return { exitCode: 1, stdout: "", stderr: `mock failure: ${mock}` };
411
+ if (mock === "retryable-failure") return { exitCode: 1, stdout: "", stderr: "[MOCK] rate limit: mock failure" };
412
+ return { exitCode: 1, stdout: "", stderr: `[MOCK] failure: ${mock}` };
386
413
  }
387
- const built = buildPiWorkerArgs({ task: effectiveTask, agent: input.agent, model: input.model, sessionEnabled: true, maxDepth: input.maxDepth, skillPaths: input.skillPaths });
414
+ const built = buildPiWorkerArgs({ task: effectiveTask, agent: input.agent, model: input.model, sessionEnabled: true, maxDepth: input.maxDepth, skillPaths: input.skillPaths, role: input.role });
388
415
  const spawnSpec = getPiSpawnCommand(built.args);
389
416
  try {
390
417
  return await new Promise<ChildPiRunResult>((resolve) => {
@@ -393,6 +420,10 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
393
420
  activeChildProcesses.set(child.pid, child);
394
421
  input.onSpawn?.(child.pid);
395
422
  input.onLifecycleEvent?.({ type: "spawned", pid: child.pid, ts: new Date().toISOString() });
423
+ // Register with cleanup handler for graceful shutdown
424
+ if (input.runId && input.agentId) {
425
+ registerChildProcess(child.pid, input.runId, input.agentId);
426
+ }
396
427
  } else {
397
428
  input.onLifecycleEvent?.({ type: "spawn_error", error: "spawn returned no pid", ts: new Date().toISOString() });
398
429
  }
@@ -414,6 +445,36 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
414
445
  let hardKilled = false;
415
446
  const cleanupErrors: string[] = [];
416
447
  let turnCount = 0;
448
+ // Track in-flight operations for proper rejection on unexpected exit
449
+ interface PendingOperation {
450
+ id: string;
451
+ type: "prompt" | "steer" | "json_event";
452
+ startedAt: number;
453
+ }
454
+ const pendingOperations = new Map<string, PendingOperation>();
455
+ let operationIdCounter = 0;
456
+
457
+ const startOperation = (type: PendingOperation["type"]): string => {
458
+ const id = `op-${++operationIdCounter}`;
459
+ pendingOperations.set(id, { id, type, startedAt: Date.now() });
460
+ return id;
461
+ };
462
+
463
+ const completeOperation = (id: string): void => {
464
+ pendingOperations.delete(id);
465
+ };
466
+
467
+ const rejectPendingOperations = (error: Error): void => {
468
+ pendingOperations.forEach((op, id) => {
469
+ logInternalError(
470
+ "child-pi.pending-operation-rejected",
471
+ error,
472
+ `opId=${id} type=${op.type} elapsed=${Date.now() - op.startedAt}ms`,
473
+ );
474
+ });
475
+ pendingOperations.clear();
476
+ };
477
+
417
478
  let softLimitReached = false;
418
479
  const maxTurns = input.maxTurns;
419
480
  const graceTurns = input.graceTurns;
@@ -450,20 +511,27 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
450
511
  },
451
512
  onJsonEvent: (event) => {
452
513
  restartNoResponseTimer();
453
- // Turn-count-based steering: soft limit steer + hard abort after graceTurns
454
- if (event && typeof event === "object" && !Array.isArray(event)) {
455
- const obj = event as Record<string, unknown>;
456
- if (obj.type === "turn_end") {
457
- turnCount += 1;
458
- if (maxTurns !== undefined && !softLimitReached && turnCount >= maxTurns) {
459
- softLimitReached = true;
460
- // Inject steer via stdin to tell child to wrap up
461
- child.stdin?.write(JSON.stringify({ type: "steer", message: "You have reached your turn limit. Wrap up immediately — provide your final answer now." }) + "\n");
462
- } else if (maxTurns !== undefined && softLimitReached && turnCount >= maxTurns + (graceTurns ?? 5)) {
463
- // Hard abortterminate after grace turns
464
- try { child.kill(process.platform === "win32" ? undefined : "SIGTERM"); } catch { /* best-effort */ }
514
+ const eventOpId = startOperation("json_event");
515
+ try {
516
+ // Turn-count-based steering: soft limit steer + hard abort after graceTurns
517
+ if (event && typeof event === "object" && !Array.isArray(event)) {
518
+ const obj = event as Record<string, unknown>;
519
+ if (obj.type === "turn_end") {
520
+ turnCount += 1;
521
+ if (maxTurns !== undefined && !softLimitReached && turnCount >= maxTurns) {
522
+ softLimitReached = true;
523
+ // Inject steer via stdin to tell child to wrap up
524
+ child.stdin?.write(JSON.stringify({ type: "steer", message: "You have reached your turn limit. Wrap up immediately provide your final answer now." }) + "\n");
525
+ } else if (maxTurns !== undefined && softLimitReached && turnCount >= maxTurns + (graceTurns ?? 5)) {
526
+ // Hard abort — terminate after grace turns
527
+ try { child.kill(process.platform === "win32" ? undefined : "SIGTERM"); } catch { /* best-effort */ }
528
+ }
465
529
  }
466
530
  }
531
+ completeOperation(eventOpId);
532
+ } catch (err) {
533
+ completeOperation(eventOpId);
534
+ throw err;
467
535
  }
468
536
  input.onJsonEvent?.(event);
469
537
  if (!isFinalAssistantEvent(event) || childExited || settled || finalDrainTimer) return;
@@ -587,20 +655,38 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
587
655
  stderr = appendBoundedTail(stderr, chunk.toString("utf-8"));
588
656
  });
589
657
  child.on("error", (error) => {
658
+ // Reject pending operations with process error context
659
+ const processError = new Error(
660
+ `Child Pi process error: ${error.message}. Stderr: ${stderr.slice(-500) || "(none)"}`,
661
+ );
662
+ rejectPendingOperations(processError);
590
663
  try {
591
- input.onLifecycleEvent?.({ type: "spawn_error", pid: child.pid, error: error.message, ts: new Date().toISOString() });
664
+ input.onLifecycleEvent?.({ type: "spawn_error", pid: child.pid, error: processError.message, ts: new Date().toISOString(), stderrExcerpt: stderr.slice(-500) || undefined });
592
665
  } catch (err) {
593
666
  logInternalError("child-pi.on-lifecycle-event", err, `event=error, pid=${child.pid}`);
594
667
  }
595
- settle({ exitCode: null, stdout, stderr, error: error.message });
668
+ settle({ exitCode: null, stdout, stderr, error: processError.message });
596
669
  });
597
- child.on("exit", (code) => {
670
+ child.on("exit", (code, signal) => {
598
671
  if (child.pid) {
599
672
  activeChildProcesses.delete(child.pid);
600
673
  clearHardKillTimer(child.pid);
674
+ // Unregister from cleanup handler
675
+ unregisterChildProcess(child.pid);
676
+ }
677
+ // Build comprehensive exit error for unexpected exits
678
+ const isUnexpectedExit = !childExited && !settled && !responseTimeoutHit && !abortRequested;
679
+ const exitError = isUnexpectedExit
680
+ ? new Error(
681
+ `Child Pi process exited unexpectedly (code=${code ?? "null"} signal=${signal ?? "null"}). `
682
+ + `Stderr: ${stderr.slice(-1000) || "(none)"}`,
683
+ )
684
+ : null;
685
+ if (exitError) {
686
+ rejectPendingOperations(exitError);
601
687
  }
602
688
  try {
603
- input.onLifecycleEvent?.({ type: "exit", pid: child.pid, exitCode: code, ts: new Date().toISOString() });
689
+ input.onLifecycleEvent?.({ type: "exit", pid: child.pid, exitCode: code, ts: new Date().toISOString(), error: exitError?.message, stderrExcerpt: isUnexpectedExit ? stderr.slice(-1000) || undefined : undefined });
604
690
  } catch (err) {
605
691
  logInternalError("child-pi.on-lifecycle-event", err, `event=exit, pid=${child.pid}`);
606
692
  }
@@ -618,6 +704,8 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
618
704
  if (child.pid) {
619
705
  activeChildProcesses.delete(child.pid);
620
706
  clearHardKillTimer(child.pid);
707
+ // Unregister from cleanup handler
708
+ unregisterChildProcess(child.pid);
621
709
  }
622
710
  try {
623
711
  input.onLifecycleEvent?.({ type: "close", pid: child.pid, exitCode, ts: new Date().toISOString() });
@@ -15,6 +15,7 @@ import { activeRunEntries, unregisterActiveRun, readActiveRunRegistry } from "..
15
15
  import { resolveRealContainedPath } from "../utils/safe-paths.ts";
16
16
  import { projectCrewRoot, userCrewRoot } from "../utils/paths.ts";
17
17
  import { terminateLiveAgentsForRun } from "./live-agent-manager.ts";
18
+ import { logInternalError } from "../utils/internal-error.ts";
18
19
 
19
20
  export interface RecoveryPlan {
20
21
  runId: string;
@@ -159,7 +160,7 @@ export function cancelOrphanedRuns(
159
160
  cancelled.push(manifest.runId);
160
161
  cancelledRun = true;
161
162
  });
162
- if (cancelledRun) void terminateLiveAgentsForRun(manifest.runId, "cancelled", appendEvent, loaded.manifest.eventsPath).catch(() => {});
163
+ if (cancelledRun) void terminateLiveAgentsForRun(manifest.runId, "cancelled", appendEvent, loaded.manifest.eventsPath).catch((error) => logInternalError("crash-recovery.orphan.terminate", error, `runId=${manifest.runId}`));
163
164
  }
164
165
 
165
166
  return { cancelled, skipped };
@@ -268,7 +269,7 @@ export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.
268
269
  saveRunTasks(fullLoaded.manifest, repairedTasks);
269
270
  for (const task of repairedTasks) { try { upsertCrewAgent(fullLoaded.manifest, recordFromTask(fullLoaded.manifest, task, "scaffold")); } catch { /* non-critical */ } }
270
271
  updateRunStatus(fullLoaded.manifest, "cancelled", "Orphaned run: worker process dead and no recent activity");
271
- void terminateLiveAgentsForRun(fullLoaded.manifest.runId, "cancelled", appendEvent, fullLoaded.manifest.eventsPath).catch(() => {});
272
+ void terminateLiveAgentsForRun(fullLoaded.manifest.runId, "cancelled", appendEvent, fullLoaded.manifest.eventsPath).catch((error) => logInternalError("crash-recovery.pid-dead.terminate", error, `runId=${fullLoaded.manifest.runId}`));
272
273
  }
273
274
  } catch {
274
275
  // Best-effort manifest cleanup
@@ -299,7 +300,7 @@ export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.
299
300
  saveRunTasks(fullLoaded.manifest, repairedTasks);
300
301
  for (const task of repairedTasks) { try { upsertCrewAgent(fullLoaded.manifest, recordFromTask(fullLoaded.manifest, task, "scaffold")); } catch { /* non-critical */ } }
301
302
  updateRunStatus(fullLoaded.manifest, "cancelled", "Orphaned run: no async worker and no manifest update in over " + Math.round(staleThresholdMs / 60000) + " minutes");
302
- void terminateLiveAgentsForRun(fullLoaded.manifest.runId, "cancelled", appendEvent, fullLoaded.manifest.eventsPath).catch(() => {});
303
+ void terminateLiveAgentsForRun(fullLoaded.manifest.runId, "cancelled", appendEvent, fullLoaded.manifest.eventsPath).catch((error) => logInternalError("crash-recovery.pid-dead.terminate", error, `runId=${fullLoaded.manifest.runId}`));
303
304
  }
304
305
  } catch {
305
306
  // Best-effort
@@ -335,7 +336,7 @@ export function reconcileAllStaleRuns(cwd: string, manifestCache: ManifestCache,
335
336
  for (const task of result.repairedTasks) { try { upsertCrewAgent(fresh.manifest, recordFromTask(fresh.manifest, task, "scaffold")); } catch { /* non-critical */ } }
336
337
  }
337
338
  updateRunStatus(fresh.manifest, "failed", `Stale run reconciled: ${result.detail}`);
338
- void terminateLiveAgentsForRun(fresh.manifest.runId, "failed", appendEvent, fresh.manifest.eventsPath).catch(() => {});
339
+ void terminateLiveAgentsForRun(fresh.manifest.runId, "failed", appendEvent, fresh.manifest.eventsPath).catch((error) => logInternalError("crash-recovery.reconcile.terminate", error, `runId=${fresh.manifest.runId}`));
339
340
  appendEvent(fresh.manifest.eventsPath, { type: "crew.run.reconciled_stale", runId: manifest.runId, message: result.detail, data: { verdict: result.verdict } });
340
341
  }
341
342
  if (result.verdict !== "healthy") {
@@ -7,6 +7,7 @@ export type CrewAgentStatus = "queued" | "running" | "waiting" | "completed" | "
7
7
  export interface CrewAgentRecentTool {
8
8
  tool: string;
9
9
  args?: string;
10
+ startedAt?: string;
10
11
  endedAt: string;
11
12
  }
12
13
 
@@ -44,6 +44,19 @@ const IrcParams = Type.Object({
44
44
 
45
45
  type IrcParams = Static<typeof IrcParams>;
46
46
 
47
+ /**
48
+ * Output schema for the irc tool's `details` field.
49
+ * All fields are optional — only present when relevant to the operation.
50
+ *
51
+ * Schema:
52
+ * op — Always present. "send" | "list"
53
+ * from — Sender agent ID. Present on all responses.
54
+ * to — Recipient agent ID. Present on send responses.
55
+ * delivered — Array of agent IDs that received the message. Present on send.
56
+ * notFound — Array of agent IDs that were unknown or unavailable. Present on send.
57
+ * peers — Array of { id, status } for list operation.
58
+ * error — Human-readable error description. Present when the operation failed.
59
+ */
47
60
  interface IrcDetails {
48
61
  op: "send" | "list";
49
62
  from?: string;
@@ -12,6 +12,7 @@
12
12
  import { defineTool, type ToolDefinition } from "@earendil-works/pi-coding-agent";
13
13
  import { Type, type Static } from "@sinclair/typebox";
14
14
  import type { YieldResult } from "../yield-handler.ts";
15
+ import { logInternalError } from "../../utils/internal-error.ts";
15
16
 
16
17
  const SubmitResultParams = Type.Object({
17
18
  summary: Type.String({ description: "Summary of completed work." }),
@@ -81,8 +82,8 @@ export function createSubmitResultTool(
81
82
  };
82
83
  try {
83
84
  onYield(result);
84
- } catch {
85
- // Yield handler failure should not prevent tool response
85
+ } catch (error) {
86
+ logInternalError("submit-result-tool.yield", error, toolCallId);
86
87
  }
87
88
  return response;
88
89
  },
@@ -28,11 +28,10 @@ export class DeliveryCoordinator {
28
28
  private flushing = false;
29
29
  private readonly deps: DeliveryCoordinatorDeps;
30
30
  private ttlTimer: ReturnType<typeof setInterval> | undefined;
31
+ private timerStarted = false;
31
32
 
32
33
  constructor(deps: DeliveryCoordinatorDeps) {
33
34
  this.deps = deps;
34
- this.ttlTimer = setInterval(() => this.evictExpired(), 60_000);
35
- this.ttlTimer.unref();
36
35
  }
37
36
 
38
37
  activate(sessionId: string): void {
@@ -102,9 +101,11 @@ export class DeliveryCoordinator {
102
101
 
103
102
  flushQueuedResults(): void {
104
103
  if (!this.active || this.pending.length === 0) return;
105
- // H7: Set flushing BEFORE splice to prevent re-entrancy
104
+ // HIGH-16/ MEDIUM-16: Set flushing BEFORE splice to prevent re-entrancy
106
105
  if (this.flushing) return;
107
106
  this.flushing = true;
107
+ // Note: this.flushing is now set, so concurrent calls will exit early due to the check above
108
+ // This serves as a simple lock to prevent race conditions
108
109
  const batch = this.pending.splice(0);
109
110
  try {
110
111
  const retryLater: PendingDelivery[] = [];
@@ -162,6 +163,12 @@ export class DeliveryCoordinator {
162
163
  }
163
164
 
164
165
  private enqueue(delivery: PendingDelivery): void {
166
+ // Lazily start the TTL timer on first enqueue (only if never started)
167
+ if (!this.timerStarted) {
168
+ this.timerStarted = true;
169
+ this.ttlTimer = setInterval(() => this.evictExpired(), 60_000);
170
+ this.ttlTimer.unref();
171
+ }
165
172
  this.pending.push({ ...delivery, generation: this.generation });
166
173
  }
167
174