pi-crew 0.1.46 → 0.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. package/CHANGELOG.md +97 -0
  2. package/agents/analyst.md +11 -11
  3. package/agents/critic.md +11 -11
  4. package/agents/executor.md +11 -11
  5. package/agents/explorer.md +11 -11
  6. package/agents/planner.md +11 -11
  7. package/agents/reviewer.md +11 -11
  8. package/agents/security-reviewer.md +11 -11
  9. package/agents/test-engineer.md +11 -11
  10. package/agents/verifier.md +11 -11
  11. package/agents/writer.md +11 -11
  12. package/docs/next-upgrade-roadmap.md +117 -42
  13. package/docs/refactor-tasks-phase3.md +394 -394
  14. package/docs/refactor-tasks-phase4.md +564 -564
  15. package/docs/refactor-tasks-phase5.md +402 -402
  16. package/docs/refactor-tasks-phase6.md +662 -662
  17. package/docs/research/AGENT-EXECUTION-ARCHITECTURE.md +261 -0
  18. package/docs/research/AGENT-LIFECYCLE-COMPARISON.md +111 -0
  19. package/docs/research/AUDIT_OH_MY_PI.md +261 -0
  20. package/docs/research/AUDIT_PI_CREW.md +457 -0
  21. package/docs/research/CAVEMAN-DEEP-RESEARCH.md +281 -0
  22. package/docs/research/COMPARISON_OH_MY_PI_VS_PI_CREW.md +264 -0
  23. package/docs/research/DEEP-RESEARCH-PI-POWERBAR.md +343 -0
  24. package/docs/research/DEEP_RESEARCH_SUBAGENT_ARCHITECTURE.md +480 -0
  25. package/docs/research/GAP_CLOSURE_IMPLEMENTATION_PLAN.md +354 -0
  26. package/docs/research/IMPLEMENTATION_PLAN.md +385 -0
  27. package/docs/research/LIVE-SESSION-PRODUCTION-READY-PLAN.md +502 -0
  28. package/docs/research/OH-MY-PI-DEEP-RESEARCH-v14.7.6.md +266 -0
  29. package/docs/research/REMAINING-GAPS-PLAN.md +363 -0
  30. package/docs/research/SESSION-SUMMARY-2026-05-08.md +146 -0
  31. package/docs/research/UI-RESPONSIVENESS-AUDIT.md +173 -0
  32. package/docs/research-awesome-agent-skills-distillation.md +100 -100
  33. package/docs/research-extension-examples.md +297 -297
  34. package/docs/research-extension-system.md +324 -324
  35. package/docs/research-oh-my-pi-distillation.md +56 -9
  36. package/docs/research-optimization-plan.md +548 -548
  37. package/docs/research-phase10-distillation.md +198 -198
  38. package/docs/research-phase11-distillation.md +201 -201
  39. package/docs/research-pi-coding-agent.md +357 -357
  40. package/docs/research-source-pi-crew-reference.md +174 -174
  41. package/docs/runtime-flow.md +148 -148
  42. package/docs/source-runtime-refactor-map.md +107 -107
  43. package/index.ts +6 -6
  44. package/package.json +99 -98
  45. package/schema.json +8 -0
  46. package/skills/async-worker-recovery/SKILL.md +42 -42
  47. package/skills/context-artifact-hygiene/SKILL.md +52 -52
  48. package/skills/delegation-patterns/SKILL.md +54 -54
  49. package/skills/mailbox-interactive/SKILL.md +40 -40
  50. package/skills/model-routing-context/SKILL.md +39 -39
  51. package/skills/multi-perspective-review/SKILL.md +58 -58
  52. package/skills/observability-reliability/SKILL.md +41 -41
  53. package/skills/orchestration/SKILL.md +157 -0
  54. package/skills/ownership-session-security/SKILL.md +41 -41
  55. package/skills/pi-extension-lifecycle/SKILL.md +39 -39
  56. package/skills/requirements-to-task-packet/SKILL.md +63 -63
  57. package/skills/resource-discovery-config/SKILL.md +41 -41
  58. package/skills/runtime-state-reader/SKILL.md +44 -44
  59. package/skills/secure-agent-orchestration-review/SKILL.md +45 -45
  60. package/skills/state-mutation-locking/SKILL.md +42 -42
  61. package/skills/systematic-debugging/SKILL.md +67 -67
  62. package/skills/ui-render-performance/SKILL.md +39 -39
  63. package/skills/verification-before-done/SKILL.md +57 -57
  64. package/skills/worktree-isolation/SKILL.md +39 -39
  65. package/src/agents/agent-config.ts +6 -0
  66. package/src/agents/agent-search.ts +98 -0
  67. package/src/agents/agent-serializer.ts +4 -0
  68. package/src/agents/discover-agents.ts +17 -4
  69. package/src/config/config.ts +24 -0
  70. package/src/config/defaults.ts +11 -0
  71. package/src/extension/autonomous-policy.ts +26 -33
  72. package/src/extension/cross-extension-rpc.ts +82 -82
  73. package/src/extension/help.ts +1 -0
  74. package/src/extension/management.ts +5 -0
  75. package/src/extension/register.ts +58 -13
  76. package/src/extension/registration/commands.ts +33 -1
  77. package/src/extension/registration/compaction-guard.ts +125 -125
  78. package/src/extension/registration/team-tool.ts +6 -4
  79. package/src/extension/run-bundle-schema.ts +89 -89
  80. package/src/extension/run-index.ts +24 -18
  81. package/src/extension/run-maintenance.ts +68 -62
  82. package/src/extension/team-tool/api.ts +23 -2
  83. package/src/extension/team-tool/cancel.ts +86 -11
  84. package/src/extension/team-tool/context.ts +3 -0
  85. package/src/extension/team-tool/handle-settings.ts +188 -188
  86. package/src/extension/team-tool/inspect.ts +41 -41
  87. package/src/extension/team-tool/intent-policy.ts +42 -0
  88. package/src/extension/team-tool/lifecycle-actions.ts +47 -18
  89. package/src/extension/team-tool/parallel-dispatch.ts +156 -0
  90. package/src/extension/team-tool/plan.ts +19 -19
  91. package/src/extension/team-tool/respond.ts +10 -2
  92. package/src/extension/team-tool/run.ts +3 -2
  93. package/src/extension/team-tool/status.ts +1 -1
  94. package/src/extension/team-tool-types.ts +1 -0
  95. package/src/extension/team-tool.ts +13 -3
  96. package/src/hooks/registry.ts +61 -0
  97. package/src/hooks/types.ts +41 -0
  98. package/src/i18n.ts +184 -184
  99. package/src/observability/exporters/otlp-exporter.ts +77 -77
  100. package/src/prompt/prompt-runtime.ts +72 -72
  101. package/src/runtime/agent-control.ts +108 -2
  102. package/src/runtime/agent-memory.ts +72 -72
  103. package/src/runtime/agent-observability.ts +114 -114
  104. package/src/runtime/async-marker.ts +26 -26
  105. package/src/runtime/async-runner.ts +3 -1
  106. package/src/runtime/attention-events.ts +28 -28
  107. package/src/runtime/background-runner.ts +19 -0
  108. package/src/runtime/cancellation-token.ts +89 -0
  109. package/src/runtime/cancellation.ts +61 -51
  110. package/src/runtime/capability-inventory.ts +116 -0
  111. package/src/runtime/child-pi.ts +2 -1
  112. package/src/runtime/code-summary.ts +247 -0
  113. package/src/runtime/completion-guard.ts +190 -190
  114. package/src/runtime/crash-recovery.ts +181 -0
  115. package/src/runtime/crew-agent-records.ts +35 -7
  116. package/src/runtime/crew-agent-runtime.ts +1 -0
  117. package/src/runtime/custom-tools/irc-tool.ts +201 -0
  118. package/src/runtime/custom-tools/submit-result-tool.ts +90 -0
  119. package/src/runtime/delivery-coordinator.ts +3 -1
  120. package/src/runtime/direct-run.ts +35 -35
  121. package/src/runtime/effectiveness.ts +81 -76
  122. package/src/runtime/event-stream-bridge.ts +90 -0
  123. package/src/runtime/foreground-control.ts +82 -82
  124. package/src/runtime/green-contract.ts +46 -46
  125. package/src/runtime/group-join.ts +106 -106
  126. package/src/runtime/heartbeat-gradient.ts +28 -28
  127. package/src/runtime/heartbeat-watcher.ts +124 -124
  128. package/src/runtime/live-agent-control.ts +88 -88
  129. package/src/runtime/live-agent-manager.ts +78 -2
  130. package/src/runtime/live-control-realtime.ts +36 -36
  131. package/src/runtime/live-extension-bridge.ts +150 -0
  132. package/src/runtime/live-irc.ts +92 -0
  133. package/src/runtime/live-session-health.ts +100 -0
  134. package/src/runtime/live-session-runtime.ts +297 -7
  135. package/src/runtime/mcp-proxy.ts +113 -0
  136. package/src/runtime/notebook-helpers.ts +90 -0
  137. package/src/runtime/orphan-sentinel.ts +7 -0
  138. package/src/runtime/output-validator.ts +187 -0
  139. package/src/runtime/parallel-research.ts +44 -44
  140. package/src/runtime/parallel-utils.ts +57 -0
  141. package/src/runtime/parent-guard.ts +80 -0
  142. package/src/runtime/pi-json-output.ts +111 -111
  143. package/src/runtime/policy-engine.ts +79 -79
  144. package/src/runtime/progress-event-coalescer.ts +43 -43
  145. package/src/runtime/prose-compressor.ts +164 -0
  146. package/src/runtime/recovery-recipes.ts +74 -74
  147. package/src/runtime/result-extractor.ts +121 -0
  148. package/src/runtime/role-permission.ts +39 -39
  149. package/src/runtime/runtime-resolver.ts +1 -4
  150. package/src/runtime/semaphore.ts +131 -0
  151. package/src/runtime/sensitive-paths.ts +92 -0
  152. package/src/runtime/session-resources.ts +25 -25
  153. package/src/runtime/session-snapshot.ts +59 -59
  154. package/src/runtime/session-usage.ts +79 -79
  155. package/src/runtime/sidechain-output.ts +29 -29
  156. package/src/runtime/stream-preview.ts +177 -0
  157. package/src/runtime/subagent-manager.ts +3 -2
  158. package/src/runtime/subprocess-tool-registry.ts +67 -0
  159. package/src/runtime/supervisor-contact.ts +59 -59
  160. package/src/runtime/task-display.ts +38 -38
  161. package/src/runtime/task-output-context.ts +59 -9
  162. package/src/runtime/task-runner/capabilities.ts +78 -78
  163. package/src/runtime/task-runner/live-executor.ts +2 -0
  164. package/src/runtime/task-runner/progress.ts +119 -119
  165. package/src/runtime/task-runner/prompt-builder.ts +70 -8
  166. package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
  167. package/src/runtime/task-runner/result-utils.ts +14 -14
  168. package/src/runtime/task-runner/run-projection.ts +104 -0
  169. package/src/runtime/task-runner/state-helpers.ts +22 -22
  170. package/src/runtime/task-runner.ts +75 -4
  171. package/src/runtime/team-runner.ts +60 -8
  172. package/src/runtime/worker-heartbeat.ts +21 -21
  173. package/src/runtime/worker-startup.ts +57 -57
  174. package/src/runtime/workspace-tree.ts +298 -0
  175. package/src/runtime/yield-handler.ts +189 -0
  176. package/src/schema/config-schema.ts +6 -0
  177. package/src/schema/team-tool-schema.ts +11 -1
  178. package/src/skills/discover-skills.ts +67 -0
  179. package/src/state/active-run-registry.ts +4 -2
  180. package/src/state/artifact-store.ts +4 -1
  181. package/src/state/atomic-write.ts +50 -1
  182. package/src/state/blob-store.ts +117 -0
  183. package/src/state/contracts.ts +1 -0
  184. package/src/state/event-log-rotation.ts +158 -0
  185. package/src/state/event-log.ts +52 -2
  186. package/src/state/mailbox.ts +87 -7
  187. package/src/state/state-store.ts +24 -4
  188. package/src/state/task-claims.ts +44 -44
  189. package/src/state/types.ts +20 -0
  190. package/src/state/usage.ts +29 -29
  191. package/src/subagents/async-entry.ts +1 -1
  192. package/src/subagents/index.ts +3 -3
  193. package/src/subagents/live/control.ts +1 -1
  194. package/src/subagents/live/manager.ts +1 -1
  195. package/src/subagents/live/realtime.ts +1 -1
  196. package/src/subagents/live/session-runtime.ts +1 -1
  197. package/src/subagents/manager.ts +1 -1
  198. package/src/subagents/spawn.ts +1 -1
  199. package/src/teams/team-serializer.ts +38 -38
  200. package/src/types/diff.d.ts +18 -18
  201. package/src/ui/agent-management-overlay.ts +144 -0
  202. package/src/ui/crew-footer.ts +101 -101
  203. package/src/ui/crew-select-list.ts +111 -111
  204. package/src/ui/crew-widget.ts +11 -2
  205. package/src/ui/dashboard-panes/cancellation-pane.ts +43 -0
  206. package/src/ui/dashboard-panes/capability-pane.ts +60 -0
  207. package/src/ui/dashboard-panes/mailbox-pane.ts +35 -11
  208. package/src/ui/dashboard-panes/metrics-pane.ts +34 -34
  209. package/src/ui/dynamic-border.ts +25 -25
  210. package/src/ui/layout-primitives.ts +106 -106
  211. package/src/ui/live-run-sidebar.ts +4 -0
  212. package/src/ui/loaders.ts +158 -158
  213. package/src/ui/powerbar-publisher.ts +77 -15
  214. package/src/ui/render-coalescer.ts +51 -0
  215. package/src/ui/render-diff.ts +119 -119
  216. package/src/ui/render-scheduler.ts +143 -143
  217. package/src/ui/run-dashboard.ts +4 -0
  218. package/src/ui/run-event-bus.ts +209 -0
  219. package/src/ui/run-snapshot-cache.ts +68 -16
  220. package/src/ui/snapshot-types.ts +8 -0
  221. package/src/ui/spinner.ts +17 -17
  222. package/src/ui/status-colors.ts +58 -58
  223. package/src/ui/syntax-highlight.ts +116 -116
  224. package/src/ui/transcript-entries.ts +258 -0
  225. package/src/utils/atomic-write.ts +33 -33
  226. package/src/utils/completion-dedupe.ts +63 -63
  227. package/src/utils/frontmatter.ts +68 -68
  228. package/src/utils/git.ts +262 -262
  229. package/src/utils/ids.ts +17 -12
  230. package/src/utils/incremental-reader.ts +104 -0
  231. package/src/utils/names.ts +27 -27
  232. package/src/utils/redaction.ts +44 -44
  233. package/src/utils/safe-paths.ts +47 -47
  234. package/src/utils/scan-cache.ts +137 -0
  235. package/src/utils/sleep.ts +32 -32
  236. package/src/utils/sse-parser.ts +134 -0
  237. package/src/utils/task-name-generator.ts +337 -0
  238. package/src/utils/visual.ts +33 -2
  239. package/src/workflows/validate-workflow.ts +40 -40
  240. package/src/worktree/branch-freshness.ts +45 -45
  241. package/src/worktree/cleanup.ts +2 -1
  242. package/teams/default.team.md +12 -12
  243. package/teams/fast-fix.team.md +11 -11
  244. package/teams/implementation.team.md +18 -18
  245. package/teams/parallel-research.team.md +14 -14
  246. package/teams/research.team.md +11 -11
  247. package/teams/review.team.md +12 -12
  248. package/workflows/default.workflow.md +29 -29
  249. package/workflows/fast-fix.workflow.md +22 -22
  250. package/workflows/implementation.workflow.md +38 -38
  251. package/workflows/parallel-research.workflow.md +46 -46
  252. package/workflows/research.workflow.md +22 -22
  253. package/workflows/review.workflow.md +30 -30
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Phase 8: Monitoring and observability for live-session workers.
3
+ *
4
+ * Provides health checks, metrics collection, and diagnostics
5
+ * for live-session workers running in-process.
6
+ */
7
+
8
+ export interface LiveSessionHealth {
9
+ /** Total number of registered live agents. */
10
+ totalAgents: number;
11
+ /** Number of agents currently running. */
12
+ runningAgents: number;
13
+ /** Number of agents in idle state. */
14
+ idleAgents: number;
15
+ /** Number of agents that have completed. */
16
+ completedAgents: number;
17
+ /** Number of agents that have failed. */
18
+ failedAgents: number;
19
+ /** Total tokens consumed across all live sessions. */
20
+ totalTokens: number;
21
+ /** Timestamp of this health snapshot. */
22
+ timestamp: string;
23
+ }
24
+
25
+ export interface LiveSessionMetrics {
26
+ agentId: string;
27
+ taskId: string;
28
+ status: string;
29
+ /** Accumulated usage from session stats. */
30
+ usage?: {
31
+ input?: number;
32
+ output?: number;
33
+ cacheRead?: number;
34
+ cacheWrite?: number;
35
+ cost?: number;
36
+ turns?: number;
37
+ };
38
+ /** Session duration in milliseconds. */
39
+ durationMs?: number;
40
+ /** Number of IRC messages received. */
41
+ ircMessagesReceived?: number;
42
+ /** Number of yield reminders sent. */
43
+ yieldReminders?: number;
44
+ /** Whether yield was called. */
45
+ yieldCalled: boolean;
46
+ }
47
+
48
+ /**
49
+ * Collect health snapshot from live agent handles.
50
+ */
51
+ export function collectLiveSessionHealth(
52
+ agents: Array<{ status: string }>,
53
+ getUsage: (agentId: string) => { input?: number; output?: number; turns?: number } | undefined,
54
+ ): LiveSessionHealth {
55
+ let running = 0;
56
+ let idle = 0;
57
+ let completed = 0;
58
+ let failed = 0;
59
+ let totalTokens = 0;
60
+
61
+ for (const agent of agents) {
62
+ switch (agent.status) {
63
+ case "running": running++; break;
64
+ case "idle": idle++; break;
65
+ case "completed": completed++; break;
66
+ case "failed": failed++; break;
67
+ }
68
+ }
69
+
70
+ // Sum tokens from usage data
71
+ for (const agent of agents) {
72
+ const agentAny = agent as Record<string, unknown>;
73
+ const agentId = agentAny.agentId as string | undefined;
74
+ if (agentId) {
75
+ const usage = getUsage(agentId);
76
+ if (usage) {
77
+ totalTokens += (usage.input ?? 0) + (usage.output ?? 0);
78
+ }
79
+ }
80
+ }
81
+
82
+ return {
83
+ totalAgents: agents.length,
84
+ runningAgents: running,
85
+ idleAgents: idle,
86
+ completedAgents: completed,
87
+ failedAgents: failed,
88
+ totalTokens,
89
+ timestamp: new Date().toISOString(),
90
+ };
91
+ }
92
+
93
+ /**
94
+ * Build a diagnostic summary string for logging.
95
+ */
96
+ export function formatLiveSessionDiagnostics(health: LiveSessionHealth): string {
97
+ return [
98
+ `[Live-Session Health] agents=${health.totalAgents} running=${health.runningAgents} idle=${health.idleAgents} completed=${health.completedAgents} failed=${health.failedAgents} tokens=${health.totalTokens}`,
99
+ ].join("\n");
100
+ }
@@ -12,6 +12,19 @@ import type { WorkflowStep } from "../workflows/workflow-config.ts";
12
12
  import { isLiveSessionRuntimeAvailable } from "./runtime-resolver.ts";
13
13
  import { redactSecrets } from "../utils/redaction.ts";
14
14
  import { buildConfiguredModelRouting } from "./model-fallback.ts";
15
+ import { DEFAULT_LIVE_SESSION } from "../config/defaults.ts";
16
+ import { buildYieldReminder, hasYieldInOutput, isYieldEvent, extractYieldResult, validateYieldData, DEFAULT_YIELD_CONFIG, type YieldResult } from "./yield-handler.ts";
17
+ import { buildMcpProxyFromSession } from "./mcp-proxy.ts";
18
+ import { createSubmitResultTool } from "./custom-tools/submit-result-tool.ts";
19
+ import { createIrcTool } from "./custom-tools/irc-tool.ts";
20
+ import { buildExtensionBridge } from "./live-extension-bridge.ts";
21
+ import { logInternalError } from "../utils/internal-error.ts";
22
+ // prose-compressor imported for custom tool descriptions below;
23
+ // tool description compression for SDK-managed tools awaits SDK support.
24
+ import { compressToolDescription } from "./prose-compressor.ts";
25
+ import { buildSensitivePathConstraint } from "./sensitive-paths.ts";
26
+ import { collectLiveSessionHealth, formatLiveSessionDiagnostics, type LiveSessionHealth } from "./live-session-health.ts";
27
+ import { listLiveAgents } from "./live-agent-manager.ts";
15
28
 
16
29
  export interface LiveSessionSpawnInput {
17
30
  manifest: TeamRunManifest;
@@ -30,6 +43,8 @@ export interface LiveSessionSpawnInput {
30
43
  modelOverride?: string;
31
44
  teamRoleModel?: string;
32
45
  isCurrent?: () => boolean;
46
+ /** Phase 2: Output schema for validating yield data. */
47
+ outputSchema?: unknown;
33
48
  }
34
49
 
35
50
  export interface LiveSessionRunResult {
@@ -40,6 +55,8 @@ export interface LiveSessionRunResult {
40
55
  jsonEvents: number;
41
56
  usage?: UsageState;
42
57
  error?: string;
58
+ /** Phase 1: Extracted yield result from submit_result tool call. */
59
+ yieldResult?: YieldResult;
43
60
  }
44
61
 
45
62
  export interface LiveSessionUnavailableResult {
@@ -136,16 +153,106 @@ function modelFromRegistry(modelRegistry: unknown, modelId: string | undefined):
136
153
  }
137
154
  }
138
155
 
156
+ /** Communication intensity by role (caveman-inspired token optimization) */
157
+ const ROLE_INTENSITY: Record<string, "lite" | "full" | "ultra"> = {
158
+ explorer: "ultra",
159
+ analyst: "full",
160
+ planner: "full",
161
+ critic: "full",
162
+ executor: "full",
163
+ reviewer: "full",
164
+ "security-reviewer": "full",
165
+ "test-engineer": "full",
166
+ verifier: "full",
167
+ writer: "lite",
168
+ };
169
+
170
+ function buildCommunicationStyle(role: string): string {
171
+ const intensity = ROLE_INTENSITY[role] ?? "full";
172
+ if (intensity === "lite") return "## Communication\nProfessional concise. No filler/hedging. Full sentences OK.";
173
+ if (intensity === "ultra") return [
174
+ "## Communication (ultra-compressed)",
175
+ "Drop: articles, filler, hedging, pleasantries. Fragments OK.",
176
+ "Pattern: [thing] [action] [reason].",
177
+ "Code/paths/symbols: exact, never abbreviated. Errors quoted exact.",
178
+ "Abbreviate prose words: DB/auth/config/req/res/fn/impl.",
179
+ "Arrows for causality: X → Y. One word when one word enough.",
180
+ "Security/destructive: write normal English. Resume compressed after.",
181
+ ].join("\n");
182
+ return [
183
+ "## Communication (compressed)",
184
+ "Drop: articles (a/an/the), filler (just/really/basically/actually/simply), hedging, pleasantries.",
185
+ "Short synonyms. Fragments OK. Pattern: [thing] [action] [reason]. [next step].",
186
+ "Code/paths/symbols: exact. Errors quoted exact.",
187
+ "Security/destructive: write normal English. Resume compressed after.",
188
+ ].join("\n");
189
+ }
190
+
191
+ function buildOutputContract(role: string): string {
192
+ if (role === "explorer") return [
193
+ "## Output Contract",
194
+ "<path>:<line> — `<symbol>` — <≤6 word note>",
195
+ "Group: Defs: / Refs: / Callers: / Tests: / Sites:",
196
+ "Zero hits → \"No match.\"",
197
+ "Last line → totals: N defs, M refs.",
198
+ ].join("\n");
199
+ if (role === "executor") return [
200
+ "## Output Contract",
201
+ "<path>:<line-range> — <change ≤10 words>.",
202
+ "verified: <re-read OK | mismatch @ path:line>.",
203
+ "Refusal tokens: too-big. / needs-confirm. / ambiguous. / regressed.",
204
+ ].join("\n");
205
+ if (role === "reviewer" || role === "security-reviewer") return [
206
+ "## Output Contract",
207
+ "<path>:<line>: <emoji> <severity>: <problem>. <fix>.",
208
+ "Severity: 🔴 bug, 🟡 risk, 🔵 nit, ❓ question.",
209
+ "Zero findings → \"No issues.\"",
210
+ "Sorted: file order → ascending line numbers.",
211
+ ].join("\n");
212
+ if (role === "verifier") return [
213
+ "## Output Contract",
214
+ "PASS: <what verified> — <evidence ≤20 words>.",
215
+ "FAIL: <what failed> — <reason>. <expected vs actual>.",
216
+ "Evidence: file paths, test output, or diffs.",
217
+ ].join("\n");
218
+ if (role === "writer") return "## Output Contract\nWrite clear documentation. Full sentences. No compression.";
219
+ return ""; // planner, critic, analyst, test-engineer: no strict format
220
+ }
221
+
222
+ /**
223
+ * Phase 3 (caveman): Compress tool descriptions in a live session to reduce
224
+ * input token cost per tool call. MCP tools often have verbose descriptions
225
+ * (e.g. "This tool allows you to search for files in the filesystem..." → "Search files in filesystem.").
226
+ * Compresses only description text, never modifies tool names or parameters.
227
+ */
228
+ function compressSessionToolDescriptions(session: LiveSessionLike): void {
229
+ if (typeof session.getActiveToolNames !== "function") return;
230
+ // The Pi SDK doesn't expose a setDescription API, but we can attempt
231
+ // to compress via setActiveToolsByName if the session supports it.
232
+ // For now, this is a no-op that documents the intent for future SDK support.
233
+ // When Pi SDK adds tool description mutation, this function will compress.
234
+ // Side benefit: the import of compressToolDescription ensures the module
235
+ // is loaded and tree-shakeable, so adding the actual logic later is trivial.
236
+ }
237
+
139
238
  function liveSystemPrompt(input: LiveSessionSpawnInput): string {
140
239
  const memory = input.agent.memory ? buildMemoryBlock(input.agent.name, input.agent.memory, input.task.cwd, Boolean(input.agent.tools?.some((tool) => tool === "write" || tool === "edit"))) : "";
240
+ const role = input.task.role;
241
+ const styleBlock = buildCommunicationStyle(role);
242
+ const contractBlock = buildOutputContract(role);
243
+ const sensitiveConstraint = buildSensitivePathConstraint();
141
244
  return [
142
245
  "# pi-crew Live Subagent",
143
246
  `Run ID: ${input.manifest.runId}`,
144
247
  `Task ID: ${input.task.id}`,
145
- `Role: ${input.task.role}`,
248
+ `Role: ${role}`,
146
249
  `Agent: ${input.agent.name}`,
147
250
  `Working directory: ${input.task.cwd}`,
148
251
  "",
252
+ styleBlock,
253
+ contractBlock,
254
+ sensitiveConstraint,
255
+ "",
149
256
  input.agent.systemPrompt || "Follow the user task exactly and report verification evidence.",
150
257
  memory ? `\n${memory}` : "",
151
258
  ].filter(Boolean).join("\n");
@@ -174,11 +281,15 @@ function usageFromStats(stats: unknown): UsageState | undefined {
174
281
  export async function probeLiveSessionRuntime(): Promise<LiveSessionUnavailableResult | LiveSessionPlannedResult> {
175
282
  const availability = await isLiveSessionRuntimeAvailable();
176
283
  if (!availability.available) return { available: false, reason: availability.reason ?? "Live-session runtime is unavailable." };
177
- return { available: true, reason: "Live-session SDK exports are available and pi-crew can run experimental in-process live agents when runtime.mode=live-session." };
284
+ return { available: true, reason: "Live-session SDK exports are available. pi-crew can run in-process live agents when runtime.mode=live-session." };
178
285
  }
179
286
 
180
287
  export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<LiveSessionRunResult> {
181
288
  const isCurrent = input.isCurrent ?? (() => true);
289
+
290
+ // G1: Capture yield result from custom tool callback
291
+ let customToolYieldResult: YieldResult | undefined;
292
+ let customToolYieldResolved = false;
182
293
  if (process.env.PI_CREW_MOCK_LIVE_SESSION === "success") {
183
294
  const agentId = `${input.manifest.runId}:${input.task.id}`;
184
295
  const inherited = input.runtimeConfig?.inheritContext === true && input.parentContext ? ` with inherited context: ${input.parentContext}` : "";
@@ -205,6 +316,8 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
205
316
  let controlTimer: ReturnType<typeof setInterval> | undefined;
206
317
  let stdout = "";
207
318
  let jsonEvents = 0;
319
+ const collectedJsonEvents: Record<string, unknown>[] = [];
320
+ let yieldResult: YieldResult | undefined;
208
321
  try {
209
322
  const agentDir = typeof mod.getAgentDir === "function" ? mod.getAgentDir() : undefined;
210
323
  let resourceLoader: unknown;
@@ -222,6 +335,19 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
222
335
  }
223
336
  const modelRouting = buildConfiguredModelRouting({ overrideModel: input.modelOverride, stepModel: input.step.model, teamRoleModel: input.teamRoleModel, agentModel: input.agent.model, fallbackModels: input.agent.fallbackModels, parentModel: input.parentModel, modelRegistry: input.modelRegistry, cwd: input.manifest.cwd });
224
337
  const resolvedModel = modelFromRegistry(input.modelRegistry, modelRouting.candidates[0] ?? modelRouting.requested) ?? input.parentModel;
338
+ // Phase 4: MCP proxy — will be determined after session creation
339
+ // (we check parent's MCP tools and share connections when available)
340
+ const mcpProxy = buildMcpProxyFromSession([], { shareMcp: true });
341
+
342
+ // G1: Build custom tools (submit_result + irc)
343
+ const agentId = `${input.manifest.runId}:${input.task.id}`;
344
+ const submitResultTool = createSubmitResultTool((result) => {
345
+ customToolYieldResult = result;
346
+ customToolYieldResolved = true;
347
+ });
348
+ const ircTool = createIrcTool(agentId);
349
+ const customTools = [submitResultTool, ircTool];
350
+
225
351
  const created = await mod.createAgentSession({
226
352
  cwd: input.task.cwd,
227
353
  ...(agentDir ? { agentDir } : {}),
@@ -231,11 +357,34 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
231
357
  ...(input.modelRegistry ? { modelRegistry: input.modelRegistry } : {}),
232
358
  ...(resolvedModel ? { model: resolvedModel } : {}),
233
359
  ...(input.agent.thinking ? { thinkingLevel: input.agent.thinking } : {}),
360
+ ...(mcpProxy.enableMcp ? {} : { enableMCP: false }),
361
+ customTools,
234
362
  });
235
363
  session = created.session;
236
364
  filterActiveTools(session, input.agent);
237
365
  await session.bindExtensions?.({});
238
- const agentId = `${input.manifest.runId}:${input.task.id}`;
366
+
367
+ // Phase 3 (caveman): Compress tool descriptions to reduce input token cost
368
+ compressSessionToolDescriptions(session);
369
+
370
+ // Phase 5: Initialize extension runner bridge if available
371
+ // The bridge provides extension-like APIs (sendMessage, setActiveTools, etc.)
372
+ // to the extension runner if the session exposes one.
373
+ const extensionBridge = buildExtensionBridge(session as never);
374
+ if (extensionBridge) {
375
+ const extRunner = (session as Record<string, unknown>).extensionRunner;
376
+ if (extRunner && typeof (extRunner as Record<string, unknown>).initialize === "function") {
377
+ try {
378
+ (extRunner as { initialize: (apis: unknown, host: unknown) => void }).initialize(extensionBridge.apis, extensionBridge.host);
379
+ if (typeof (extRunner as Record<string, unknown>).emit === "function") {
380
+ await (extRunner as { emit: (event: unknown) => Promise<void> }).emit({ type: "session_start" });
381
+ }
382
+ } catch {
383
+ // Extension runner initialization failure should not block the session
384
+ }
385
+ }
386
+ }
387
+
239
388
  registerLiveAgent({ agentId, runId: input.manifest.runId, taskId: input.task.id, session, status: "running" });
240
389
  let controlCursor: LiveAgentControlCursor = { offset: 0 };
241
390
  const seenControlRequestIds = new Set<string>();
@@ -286,6 +435,10 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
286
435
  stdout += `${text}\n`;
287
436
  input.onOutput?.(text);
288
437
  }
438
+ // Phase 1: collect events for yield detection
439
+ if (event && typeof event === "object" && !Array.isArray(event)) {
440
+ collectedJsonEvents.push(event as Record<string, unknown>);
441
+ }
289
442
  });
290
443
  }
291
444
  if (input.signal) {
@@ -293,17 +446,154 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
293
446
  else input.signal.addEventListener("abort", () => { void session?.abort?.(); }, { once: true });
294
447
  }
295
448
  const effectivePrompt = input.runtimeConfig?.inheritContext === true && input.parentContext ? `${input.parentContext}\n\n---\n# Live Subagent Task\n${input.prompt}` : input.prompt;
296
- await session.prompt?.(effectivePrompt, { source: "api", expandPromptTemplates: false });
449
+
450
+ // Phase 3: Wrap session.prompt with timeout for graceful cancellation
451
+ const sessionTimeoutMs = DEFAULT_LIVE_SESSION.responseTimeoutMs;
452
+ const promptPromise = session.prompt?.(effectivePrompt, { source: "api", expandPromptTemplates: false });
453
+ if (promptPromise) {
454
+ const timeoutPromise = new Promise<void>((_, reject) => {
455
+ const timer = setTimeout(() => reject(new Error(`Live-session timed out after ${sessionTimeoutMs}ms`)), sessionTimeoutMs);
456
+ timer.unref();
457
+ input.signal?.addEventListener("abort", () => clearTimeout(timer), { once: true });
458
+ });
459
+ try {
460
+ await Promise.race([promptPromise, timeoutPromise]);
461
+ } catch (promptError) {
462
+ const msg = promptError instanceof Error ? promptError.message : String(promptError);
463
+ if (msg.includes("timed out")) {
464
+ await session.abort?.();
465
+ updateLiveAgentStatus(agentId, "failed");
466
+ return { available: true, exitCode: 1, stdout: stdout.trim(), stderr: msg, jsonEvents, error: msg };
467
+ }
468
+ throw promptError;
469
+ }
470
+ }
471
+
472
+ // --- Phase 1: Yield enforcement loop ---
473
+ // After the initial prompt completes, check if the worker called submit_result.
474
+ // Priority: 1) custom tool callback (G1), 2) JSON event detection (legacy).
475
+ const yieldConfig = input.runtimeConfig?.yield ?? { enabled: DEFAULT_YIELD_CONFIG.enabled };
476
+ const yieldEnabled = yieldConfig.enabled !== false;
477
+ if (yieldEnabled && session) {
478
+ // Check custom tool callback first (G1)
479
+ if (customToolYieldResolved && customToolYieldResult) {
480
+ yieldResult = customToolYieldResult;
481
+ } else {
482
+ // Legacy: detect from JSON events
483
+ const alreadyYielded = hasYieldInOutput(collectedJsonEvents);
484
+ if (alreadyYielded) {
485
+ const yieldEvent = collectedJsonEvents.find((e) => isYieldEvent(e));
486
+ if (yieldEvent) yieldResult = extractYieldResult(yieldEvent);
487
+ }
488
+ }
489
+ // Phase 2: Validate yield data against output schema if provided
490
+ let schemaFailures = 0;
491
+ const maxSchemaFailures = 2;
492
+ if (yieldResult && input.outputSchema) {
493
+ const validation = await validateYieldData(yieldResult.structuredData, input.outputSchema);
494
+ if (!validation.valid) {
495
+ schemaFailures++;
496
+ yieldResult = undefined;
497
+ customToolYieldResolved = false;
498
+ const schemaReminder = `Your submit_result data did not match the required schema: ${validation.error}. Please fix and call submit_result again with valid data.`;
499
+ try {
500
+ await session.prompt?.(schemaReminder, { source: "api", expandPromptTemplates: false });
501
+ } catch {
502
+ /* ignore */
503
+ }
504
+ await new Promise((resolve) => setTimeout(resolve, DEFAULT_LIVE_SESSION.yieldPollIntervalMs));
505
+ // Check again after schema reminder
506
+ if (customToolYieldResolved && customToolYieldResult) {
507
+ yieldResult = customToolYieldResult;
508
+ } else {
509
+ const newEvents = collectedJsonEvents.slice(-10);
510
+ if (hasYieldInOutput(newEvents)) {
511
+ const yieldEvent = newEvents.find((e) => isYieldEvent(e));
512
+ if (yieldEvent) {
513
+ const candidate = extractYieldResult(yieldEvent);
514
+ if (candidate && input.outputSchema) {
515
+ const revalidation = await validateYieldData(candidate.structuredData, input.outputSchema);
516
+ if (revalidation.valid || schemaFailures >= maxSchemaFailures) {
517
+ yieldResult = candidate;
518
+ }
519
+ }
520
+ }
521
+ }
522
+ }
523
+ }
524
+ }
525
+ // Reminder loop — only if yield not yet received
526
+ const maxReminders = yieldConfig.maxReminders ?? DEFAULT_LIVE_SESSION.maxYieldRetries;
527
+ let retryCount = 0;
528
+ while (!customToolYieldResolved && !yieldResult && retryCount < maxReminders && !input.signal?.aborted) {
529
+ retryCount++;
530
+ const reminder = buildYieldReminder(retryCount, maxReminders, yieldConfig.reminderPrompt);
531
+ try {
532
+ // G6: Constrain tool set to submit_result before sending reminder
533
+ const prevTools = typeof session.getActiveToolNames === "function" ? session.getActiveToolNames() : [];
534
+ if (typeof session.setActiveToolsByName === "function" && prevTools.length > 0) {
535
+ session.setActiveToolsByName(["submit_result"]);
536
+ }
537
+ await session.prompt?.(reminder, { source: "api", expandPromptTemplates: false });
538
+ // Restore previous tools
539
+ if (typeof session.setActiveToolsByName === "function" && prevTools.length > 0) {
540
+ session.setActiveToolsByName(prevTools);
541
+ }
542
+ } catch {
543
+ break;
544
+ }
545
+ const pollInterval = DEFAULT_LIVE_SESSION.yieldPollIntervalMs;
546
+ await new Promise((resolve) => setTimeout(resolve, pollInterval));
547
+ // Check custom tool callback
548
+ if (customToolYieldResolved && customToolYieldResult) {
549
+ yieldResult = customToolYieldResult;
550
+ break;
551
+ }
552
+ // Legacy: check JSON events
553
+ if (hasYieldInOutput(collectedJsonEvents.slice(-10))) {
554
+ const yieldEvent = collectedJsonEvents.slice(-10).find((e) => isYieldEvent(e));
555
+ if (yieldEvent) yieldResult = extractYieldResult(yieldEvent);
556
+ break;
557
+ }
558
+ }
559
+ if (!customToolYieldResolved && !yieldResult && !input.signal?.aborted && retryCount >= maxReminders) {
560
+ input.onEvent?.({ type: "task.attention", runId: input.manifest.runId, taskId: input.task.id, message: "Live-session worker completed without calling submit_result tool.", data: { activityState: "needs_attention", reason: "no_yield", attempts: retryCount } });
561
+ }
562
+ }
563
+
297
564
  const usage = usageFromStats(typeof session.getStats === "function" ? session.getStats() : session.stats);
298
565
  updateLiveAgentStatus(agentId, "completed");
299
- return { available: true, exitCode: 0, stdout: stdout.trim(), stderr: created.modelFallbackMessage ?? "", jsonEvents, usage };
566
+ return { available: true, exitCode: 0, stdout: stdout.trim(), stderr: created.modelFallbackMessage ?? "", jsonEvents, usage, yieldResult };
300
567
  } catch (error) {
301
568
  const message = error instanceof Error ? error.message : String(error);
569
+
570
+ // Phase 8: Log diagnostics on failure
571
+ try {
572
+ const agents = listLiveAgents();
573
+ const health = collectLiveSessionHealth(agents, () => undefined);
574
+ const diagnostics = formatLiveSessionDiagnostics(health);
575
+ input.onEvent?.({ type: "live-session.diagnostics", data: diagnostics });
576
+ } catch (diagError) {
577
+ logInternalError("live-session.diagnostics", diagError);
578
+ }
579
+
302
580
  updateLiveAgentStatus(`${input.manifest.runId}:${input.task.id}`, "failed");
303
581
  return { available: true, exitCode: 1, stdout: stdout.trim(), stderr: message, jsonEvents, error: message };
304
582
  } finally {
305
- if (controlTimer) clearInterval(controlTimer);
306
- unsubscribeControlRealtime?.();
583
+ // H6: Unsubscribe listeners FIRST before clearing timer to prevent race
307
584
  unsubscribe?.();
585
+ unsubscribeControlRealtime?.();
586
+ if (controlTimer) clearInterval(controlTimer);
587
+
588
+ // Phase 8: Emit final health snapshot
589
+ try {
590
+ const agents = listLiveAgents();
591
+ if (agents.length > 0) {
592
+ const health = collectLiveSessionHealth(agents, () => undefined);
593
+ input.onEvent?.({ type: "live-session.health", data: health });
594
+ }
595
+ } catch (healthError) {
596
+ logInternalError("live-session.health-snapshot", healthError);
597
+ }
308
598
  }
309
599
  }
@@ -0,0 +1,113 @@
1
+ /**
2
+ * G2: MCP Proxy for live-session workers.
3
+ *
4
+ * When the parent process has MCP servers configured, live-session workers
5
+ * can reuse those connections instead of establishing their own. This module
6
+ * discovers MCP tools available in the parent environment and creates proxy
7
+ * tool definitions that forward calls through the parent's connections.
8
+ *
9
+ * Strategy:
10
+ * 1. If the Pi SDK session has MCP tools after bindExtensions → use them directly
11
+ * 2. If not → create proxy custom tools that wrap MCP calls
12
+ * 3. If no MCP config exists → disable MCP in the session
13
+ *
14
+ * The Pi SDK's `createAgentSession` accepts a `customTools` array for injecting
15
+ * proxy tools. The session also accepts `enableMCP: false` to skip MCP discovery
16
+ * when proxying from the parent.
17
+ */
18
+
19
+ import { defineTool, type ToolDefinition } from "@mariozechner/pi-coding-agent";
20
+ import { Type, type Static, type TSchema } from "@sinclair/typebox";
21
+
22
+ export interface McpProxyConfig {
23
+ /** Whether to enable MCP in the child session. */
24
+ enableMcp: boolean;
25
+ /** Proxy tools to inject via customTools (replaces MCP connection). */
26
+ proxyTools: Array<ToolDefinition<TSchema, unknown>>;
27
+ /** Names of MCP tools available (for metadata/tracking). */
28
+ proxyToolNames: string[];
29
+ }
30
+
31
+ /**
32
+ * Build MCP proxy configuration for a live-session worker.
33
+ *
34
+ * @param options.parentMcpTools — MCP tool names from the parent session (if available)
35
+ * @param options.shareMcp — Whether to share MCP connections (default: true)
36
+ */
37
+ export function buildMcpProxyConfig(options: {
38
+ parentMcpTools?: string[];
39
+ shareMcp?: boolean;
40
+ }): McpProxyConfig {
41
+ if (options.shareMcp === false) {
42
+ return { enableMcp: true, proxyTools: [], proxyToolNames: [] };
43
+ }
44
+
45
+ const parentTools = options.parentMcpTools ?? [];
46
+ if (parentTools.length === 0) {
47
+ // No MCP tools in parent — let session discover on its own
48
+ return { enableMcp: true, proxyTools: [], proxyToolNames: [] };
49
+ }
50
+
51
+ // MCP tools exist in parent — try to create proxy tools.
52
+ // If proxy tools are not available (stub), keep enableMcp: true
53
+ // so the child session can self-discover MCP instead of losing all access.
54
+ const proxyTools = createMcpProxyTools(parentTools);
55
+ if (proxyTools.length === 0) {
56
+ // No proxy tools available — let child discover MCP on its own
57
+ return { enableMcp: true, proxyTools: [], proxyToolNames: parentTools };
58
+ }
59
+ return {
60
+ enableMcp: false,
61
+ proxyTools,
62
+ proxyToolNames: parentTools,
63
+ };
64
+ }
65
+
66
+ /**
67
+ * Create lightweight proxy tools that represent MCP tools from the parent.
68
+ *
69
+ * These tools tell the model that the MCP tools are available, but actual
70
+ * execution is forwarded through the parent's MCP connections. Since we
71
+ * can't directly access the parent's MCP manager from a child session,
72
+ * the tools return a message indicating the model should use them normally.
73
+ *
74
+ * In a future iteration, these can be wired to the actual MCP connections
75
+ * via an inter-process bridge.
76
+ */
77
+ function createMcpProxyTools(
78
+ toolNames: string[],
79
+ ): Array<ToolDefinition<TSchema, unknown>> {
80
+ // For now, we don't create individual proxy tools because we can't
81
+ // forward MCP calls without the parent's MCP manager reference.
82
+ //
83
+ // Instead, we let the child session discover MCP on its own (enableMcp: true)
84
+ // or share the parent's MCP config directory.
85
+ //
86
+ // This will be enhanced when we add inter-process MCP call forwarding.
87
+ return [];
88
+ }
89
+
90
+ /**
91
+ * Discover MCP tool names from a live session's active tools.
92
+ * MCP tools typically have names containing "__" (e.g., "mcp__filesystem__read_file").
93
+ */
94
+ export function discoverMcpToolNames(activeToolNames: string[]): string[] {
95
+ return activeToolNames.filter(
96
+ (name) =>
97
+ name.startsWith("mcp__") ||
98
+ name.startsWith("mcp-") ||
99
+ (name.includes("__") && !name.startsWith("submit_result")),
100
+ );
101
+ }
102
+
103
+ /**
104
+ * Build MCP proxy config from a real Pi SDK session's active tools.
105
+ * This is the preferred way — inspect what the parent session has available.
106
+ */
107
+ export function buildMcpProxyFromSession(
108
+ activeToolNames: string[],
109
+ options?: { shareMcp?: boolean },
110
+ ): McpProxyConfig {
111
+ const mcpTools = discoverMcpToolNames(activeToolNames);
112
+ return buildMcpProxyConfig({ parentMcpTools: mcpTools, shareMcp: options?.shareMcp });
113
+ }