pi-crew 0.5.25 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/CHANGELOG.md +99 -0
  2. package/README.md +13 -11
  3. package/docs/patterns/command-agent-skill.md +71 -0
  4. package/package.json +1 -1
  5. package/skills/council/SKILL.md +163 -0
  6. package/src/agents/agent-config.ts +4 -1
  7. package/src/agents/discover-agents.ts +1 -0
  8. package/src/benchmark/feedback-loop.ts +4 -2
  9. package/src/extension/cross-extension-rpc.ts +48 -0
  10. package/src/extension/registration/commands.ts +2 -1
  11. package/src/extension/registration/subagent-tools.ts +2 -0
  12. package/src/extension/registration/team-tool.ts +2 -0
  13. package/src/extension/registration/viewers.ts +1 -0
  14. package/src/extension/run-export.ts +16 -1
  15. package/src/extension/run-import.ts +16 -0
  16. package/src/extension/team-tool/anchor.ts +5 -1
  17. package/src/extension/team-tool/api.ts +9 -4
  18. package/src/extension/team-tool/config-patch.ts +15 -1
  19. package/src/extension/team-tool.ts +2 -1
  20. package/src/hooks/registry.ts +9 -1
  21. package/src/hooks/types.ts +14 -0
  22. package/src/i18n.ts +15 -2
  23. package/src/observability/exporters/otlp-exporter.ts +73 -0
  24. package/src/runtime/adaptive-plan.ts +24 -0
  25. package/src/runtime/agent-control.ts +6 -3
  26. package/src/runtime/async-runner.ts +58 -3
  27. package/src/runtime/background-runner.ts +1 -1
  28. package/src/runtime/chain-parser.ts +192 -0
  29. package/src/runtime/chain-runner.ts +58 -0
  30. package/src/runtime/child-pi.ts +1 -1
  31. package/src/runtime/crew-agent-records.ts +4 -3
  32. package/src/runtime/cross-extension-rpc.ts +34 -8
  33. package/src/runtime/diagnostic-export.ts +3 -4
  34. package/src/runtime/dynamic-script-runner.ts +7 -7
  35. package/src/runtime/foreground-watchdog.ts +2 -2
  36. package/src/runtime/intercom-bridge.ts +178 -0
  37. package/src/runtime/live-agent-manager.ts +6 -3
  38. package/src/runtime/live-irc.ts +4 -2
  39. package/src/runtime/parallel-utils.ts +2 -1
  40. package/src/runtime/plan-templates.ts +200 -0
  41. package/src/runtime/post-checks.ts +10 -3
  42. package/src/runtime/run-drift.ts +220 -0
  43. package/src/runtime/sandbox.ts +26 -20
  44. package/src/runtime/semaphore.ts +2 -1
  45. package/src/runtime/settings-store.ts +14 -2
  46. package/src/runtime/skill-effectiveness.ts +4 -2
  47. package/src/runtime/skill-instructions.ts +4 -1
  48. package/src/runtime/subagent-manager.ts +20 -2
  49. package/src/runtime/subprocess-tool-registry.ts +2 -2
  50. package/src/runtime/task-graph.ts +79 -0
  51. package/src/runtime/task-id.ts +148 -0
  52. package/src/runtime/task-packet.ts +13 -1
  53. package/src/runtime/task-runner/context-retrieval.ts +172 -0
  54. package/src/runtime/task-runner.ts +39 -1
  55. package/src/runtime/team-runner.ts +7 -0
  56. package/src/runtime/usage-tracker.ts +4 -2
  57. package/src/runtime/verification-gates.ts +36 -9
  58. package/src/state/contracts.ts +2 -1
  59. package/src/state/event-log.ts +16 -5
  60. package/src/state/hook-instinct-bridge.ts +2 -1
  61. package/src/state/locks.ts +9 -2
  62. package/src/state/memory-store.ts +244 -0
  63. package/src/state/observation-store.ts +177 -0
  64. package/src/state/state-store.ts +4 -2
  65. package/src/state/task-claims.ts +9 -2
  66. package/src/tools/safe-bash.ts +69 -20
  67. package/src/types/new-api-types.ts +10 -5
  68. package/src/ui/keybinding-map.ts +2 -1
  69. package/src/ui/run-action-dispatcher.ts +2 -1
  70. package/src/ui/status-colors.ts +2 -1
  71. package/src/ui/syntax-highlight.ts +2 -1
  72. package/src/ui/tool-render.ts +13 -3
  73. package/src/utils/fingerprint.ts +183 -0
  74. package/src/utils/fs-watch.ts +4 -2
  75. package/src/utils/gh-protocol.ts +2 -1
  76. package/src/utils/safe-paths.ts +6 -0
  77. package/src/workflows/discover-workflows.ts +5 -1
  78. package/src/workflows/intermediate-store.ts +173 -0
  79. package/src/workflows/workflow-config.ts +8 -0
  80. package/src/worktree/cleanup.ts +8 -5
  81. package/src/worktree/worktree-manager.ts +1 -1
@@ -424,7 +424,7 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
424
424
  return { exitCode: 1, stdout: "", stderr: "Mock mode requires PI_CREW_ALLOW_MOCK=1" };
425
425
  }
426
426
  // SECURITY: Log mock mode activation prominently for audit trail
427
- console.warn(`Mock mode active: ${mock} NOT running real agents!`);
427
+ logInternalError("child-pi.mock", new Error(`Mock mode active: ${mock}`), "NOT running real agents");
428
428
  if (mock === "success") {
429
429
  const stdout = `[MOCK] Success for ${input.agent.name}\n`;
430
430
  observeStdoutChunk(input, stdout);
@@ -249,8 +249,8 @@ export function writeCrewAgentStatusCoalesced(manifest: TeamRunManifest, record:
249
249
  atomicWriteJsonCoalesced(agentStatusPath(manifest, record.taskId), redactSecrets(record), AGENT_COALESCE_MS);
250
250
  }
251
251
 
252
- /** Flush all coalesced agent writes synchronously. Hook into cleanup paths. */
253
- export function flushPendingAgentWrites(): void {
252
+ /** @internal Flush all coalesced agent writes synchronously. Hook into cleanup paths. */
253
+ function flushPendingAgentWrites(): void {
254
254
  flushPendingAtomicWrites();
255
255
  }
256
256
 
@@ -353,7 +353,8 @@ export interface CrewAgentEventCursorOptions {
353
353
  limit?: number;
354
354
  }
355
355
 
356
- export function readCrewAgentEvents(manifest: TeamRunManifest, taskId: string): unknown[] {
356
+ /** @internal Convenience wrapper around readCrewAgentEventsCursor. */
357
+ function readCrewAgentEvents(manifest: TeamRunManifest, taskId: string): unknown[] {
357
358
  return readCrewAgentEventsCursor(manifest, taskId).events;
358
359
  }
359
360
 
@@ -29,15 +29,19 @@ function handleRpc<P extends { requestId: string }>(
29
29
  ): () => void {
30
30
  return events.on(channel, async (raw: unknown) => {
31
31
  const params = raw as P;
32
+ // SECURITY: Validate requestId format to prevent channel injection.
33
+ if (!/^[a-zA-Z0-9_-]+$/.test(params.requestId)) {
34
+ throw new Error("Security: invalid requestId format");
35
+ }
32
36
  try {
33
37
  const data = await fn(params);
34
38
  const reply: { success: true; data?: unknown } = { success: true };
35
39
  if (data !== undefined) reply.data = data;
36
40
  events.emit(`${channel}:reply:${params.requestId}`, reply);
37
- } catch (err: any) {
41
+ } catch (err: unknown) {
38
42
  events.emit(`${channel}:reply:${params.requestId}`, {
39
43
  success: false,
40
- error: err?.message ?? String(err),
44
+ error: err instanceof Error ? err.message : String(err),
41
45
  });
42
46
  }
43
47
  });
@@ -50,21 +54,43 @@ export function registerCrewRpcHandlers(deps: RpcDeps): RpcHandle {
50
54
  return { version: PROTOCOL_VERSION };
51
55
  });
52
56
 
53
- const unsubSpawn = handleRpc<{ requestId: string; type: string; prompt: string; options?: Record<string, unknown> }>(
57
+ // SECURITY TRUST BOUNDARY: crew:rpc:spawn and crew:rpc:stop are privileged
58
+ // operations that create or terminate child processes. Any subscriber on
59
+ // the shared event bus can emit these events. In a multi-extension
60
+ // environment, this means a malicious extension could spawn/stop agents.
61
+ // Mitigation: validate that the caller is the pi-crew extension by checking
62
+ // the request includes a known extension identifier. Log all invocations
63
+ // for audit. A full fix requires event-bus-level origin signing.
64
+ const CREW_RPC_SOURCE = "pi-crew";
65
+
66
+ function validateRpcSource(params: { requestId: string; source?: string }): boolean {
67
+ if (!params.source || params.source !== CREW_RPC_SOURCE) {
68
+ console.warn(
69
+ `[pi-crew SECURITY] RPC invocation from unexpected source: ${params.source ?? "(none)"}. ` +
70
+ `Expected '${CREW_RPC_SOURCE}'. Request may be from an untrusted extension.`,
71
+ );
72
+ return false;
73
+ }
74
+ return true;
75
+ }
76
+
77
+ const unsubSpawn = handleRpc<{ requestId: string; type: string; prompt: string; options?: Record<string, unknown>; source?: string }>(
54
78
  events,
55
79
  "crew:rpc:spawn",
56
- ({ type, prompt, options }) => {
80
+ (params) => {
81
+ if (!validateRpcSource(params)) throw new Error("Unauthorized: RPC spawn requires source='pi-crew'");
57
82
  const ctx = getCtx();
58
83
  if (!ctx) throw new Error("No active session");
59
- return { id: spawn(type, prompt, options ?? {}) };
84
+ return { id: spawn(params.type, params.prompt, params.options ?? {}) };
60
85
  },
61
86
  );
62
87
 
63
- const unsubStop = handleRpc<{ requestId: string; agentId: string }>(
88
+ const unsubStop = handleRpc<{ requestId: string; agentId: string; source?: string }>(
64
89
  events,
65
90
  "crew:rpc:stop",
66
- ({ agentId }) => {
67
- if (!abort(agentId)) throw new Error("Agent not found");
91
+ (params) => {
92
+ if (!validateRpcSource(params)) throw new Error("Unauthorized: RPC stop requires source='pi-crew'");
93
+ if (!abort(params.agentId)) throw new Error("Agent not found");
68
94
  },
69
95
  );
70
96
 
@@ -9,9 +9,9 @@ import { loadRunManifestById } from "../state/state-store.ts";
9
9
  import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
10
10
  import { summarizeHeartbeats, type HeartbeatSummary } from "../ui/heartbeat-aggregator.ts";
11
11
  import type { RunUiSnapshot } from "../ui/snapshot-types.ts";
12
- import { redactSecrets } from "../utils/redaction.ts";
12
+ import { redactSecrets, isSecretKey } from "../utils/redaction.ts";
13
13
  import { buildRecoveryLedger, type RecoveryLedgerEntry } from "./recovery-recipes.ts";
14
- export { redactSecrets } from "../utils/redaction.ts";
14
+ export { redactSecrets, isSecretKey } from "../utils/redaction.ts";
15
15
 
16
16
  export interface DiagnosticReport {
17
17
  schemaVersion?: number;
@@ -37,13 +37,12 @@ export interface DiagnosticReport {
37
37
  recoveryLedger: RecoveryLedgerEntry[];
38
38
  }
39
39
 
40
- const SECRET_KEY_PATTERN = /(token|key|password|secret|credential|auth)/i;
41
40
  const ENV_DEBUG_ALLOWLIST = /^(PI_CREW_|PI_TEAMS_|PI_.*HOME|NODE_ENV|NODE_VERSION|OS|PROCESSOR|TERM|LANG|HOME|USERPROFILE|APPDATA|PLATFORM|ARCH|WIN32|DOCKER|CI|VERBOSE|DEBUG|NO_COLOR|FORCE_COLOR|NPM_CONFIG|npm_)/i;
42
41
 
43
42
  function envRedacted(): Record<string, string> {
44
43
  const output: Record<string, string> = {};
45
44
  for (const [key, value] of Object.entries(process.env)) {
46
- if (SECRET_KEY_PATTERN.test(key)) output[key] = "***";
45
+ if (isSecretKey(key)) output[key] = "***";
47
46
  else if (typeof value === "string" && ENV_DEBUG_ALLOWLIST.test(key)) output[key] = value;
48
47
  // All other env vars are omitted to prevent leaking sensitive paths or system topology.
49
48
  }
@@ -484,11 +484,11 @@ export function createScriptRunner(options?: DynamicScriptOptions): DynamicScrip
484
484
  /**
485
485
  * @internal TEST ONLY — do not use in production code.
486
486
  * Exposes DynamicScriptRunner.executeUnchecked for unit testing.
487
+ * Returns undefined in non-test environments to prevent production use.
487
488
  */
488
- export function __test_executeUnchecked(
489
- runner: DynamicScriptRunner,
490
- code: string,
491
- timeout?: number,
492
- ): ScriptExecutionResult {
493
- return (runner as unknown as { executeUnchecked: (code: string, timeout?: number) => ScriptExecutionResult }).executeUnchecked(code, timeout);
494
- }
489
+ export const __test_executeUnchecked: ((runner: DynamicScriptRunner, code: string, timeout?: number) => ScriptExecutionResult) | undefined =
490
+ process.env.NODE_ENV === "test"
491
+ ? (runner: DynamicScriptRunner, code: string, timeout?: number): ScriptExecutionResult => {
492
+ return (runner as unknown as { executeUnchecked: (code: string, timeout?: number) => ScriptExecutionResult }).executeUnchecked(code, timeout);
493
+ }
494
+ : undefined;
@@ -41,8 +41,8 @@ export function stopWatchdog(runId: string): void {
41
41
  }
42
42
  }
43
43
 
44
- /** Stop all active watchdogs. Called on session shutdown. */
45
- export function stopAllWatchdogs(): void {
44
+ /** @internal Stop all active watchdogs. Called on session shutdown. */
45
+ function stopAllWatchdogs(): void {
46
46
  for (const [runId, timer] of activeWatchdogs) {
47
47
  clearTimeout(timer);
48
48
  }
@@ -0,0 +1,178 @@
1
+ /**
2
+ * Intercom bridge — workers can escalate questions to the orchestrator.
3
+ *
4
+ * Pattern origin: pi-subagents/src/intercom-bridge.ts — contact_supervisor tool
5
+ * for child agents to escalate decisions, report progress, or ask questions.
6
+ *
7
+ * This module provides the message queue and correlation logic.
8
+ * The actual tool registration happens in task-runner.ts.
9
+ */
10
+
11
+ import { logInternalError } from "../utils/internal-error.ts";
12
+
13
+ // ── Types ────────────────────────────────────────────────────────────────
14
+
15
+ export type IntercomUrgency = "low" | "medium" | "high" | "critical";
16
+ export type IntercomType = "question" | "escalation" | "progress" | "block";
17
+
18
+ export interface IntercomMessage {
19
+ type: IntercomType;
20
+ taskStepId: string;
21
+ content: string;
22
+ urgency: IntercomUrgency;
23
+ timestamp: number;
24
+ timeout?: number; // ms to wait for response
25
+ }
26
+
27
+ export interface IntercomResponse {
28
+ answer: string;
29
+ source: "orchestrator" | "human" | "timeout";
30
+ timestamp: number;
31
+ messageId: string;
32
+ }
33
+
34
+ // ── Message Queue ────────────────────────────────────────────────────────
35
+
36
+ interface PendingMessage {
37
+ message: IntercomMessage;
38
+ id: string;
39
+ resolve: (response: IntercomResponse) => void;
40
+ timer?: ReturnType<typeof setTimeout>;
41
+ }
42
+
43
+ const MAX_QUEUE_SIZE = 100;
44
+
45
+ /**
46
+ * In-process intercom queue for worker→orchestrator communication.
47
+ *
48
+ * Each message gets a unique ID. Callers await a response via a Promise.
49
+ * If no response arrives within the timeout, resolves with source="timeout".
50
+ */
51
+ export class IntercomQueue {
52
+ private pending = new Map<string, PendingMessage>();
53
+ private queue: IntercomMessage[] = [];
54
+
55
+ /**
56
+ * Enqueue a message and return a promise that resolves when the
57
+ * orchestrator responds (or times out).
58
+ */
59
+ enqueue(message: IntercomMessage): Promise<IntercomResponse> {
60
+ if (this.pending.size >= MAX_QUEUE_SIZE) {
61
+ // Evict oldest
62
+ const firstKey = this.pending.keys().next().value;
63
+ if (firstKey) this.evict(firstKey, "queue_full");
64
+ }
65
+
66
+ const id = `icm-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 6)}`;
67
+
68
+ return new Promise<IntercomResponse>((resolve) => {
69
+ const entry: PendingMessage = { message, id, resolve };
70
+
71
+ // Set timeout if specified
72
+ if (message.timeout && message.timeout > 0) {
73
+ entry.timer = setTimeout(() => {
74
+ resolve({
75
+ answer: "No response received within timeout",
76
+ source: "timeout",
77
+ timestamp: Date.now(),
78
+ messageId: id,
79
+ });
80
+ this.pending.delete(id);
81
+ }, message.timeout);
82
+ }
83
+
84
+ this.pending.set(id, entry);
85
+ this.queue.push({ ...message });
86
+ });
87
+ }
88
+
89
+ /**
90
+ * Respond to a pending message by ID.
91
+ */
92
+ respond(messageId: string, answer: string, source: "orchestrator" | "human" = "orchestrator"): boolean {
93
+ const entry = this.pending.get(messageId);
94
+ if (!entry) return false;
95
+
96
+ if (entry.timer) clearTimeout(entry.timer);
97
+
98
+ entry.resolve({
99
+ answer,
100
+ source,
101
+ timestamp: Date.now(),
102
+ messageId,
103
+ });
104
+
105
+ this.pending.delete(messageId);
106
+ return true;
107
+ }
108
+
109
+ /**
110
+ * Get all pending messages (for orchestrator to process).
111
+ */
112
+ getPending(): Array<IntercomMessage & { id: string }> {
113
+ return [...this.pending.entries()].map(([id, entry]) => ({
114
+ ...entry.message,
115
+ id,
116
+ }));
117
+ }
118
+
119
+ /**
120
+ * Number of pending messages awaiting response.
121
+ */
122
+ get pendingCount(): number {
123
+ return this.pending.size;
124
+ }
125
+
126
+ /**
127
+ * Clean up all pending messages (e.g., on run completion).
128
+ */
129
+ clear(): void {
130
+ for (const [id, entry] of this.pending) {
131
+ this.evict(id, "run_complete");
132
+ }
133
+ this.queue = [];
134
+ }
135
+
136
+ private evict(id: string, reason: string): void {
137
+ const entry = this.pending.get(id);
138
+ if (!entry) return;
139
+
140
+ if (entry.timer) clearTimeout(entry.timer);
141
+
142
+ entry.resolve({
143
+ answer: `Message evicted: ${reason}`,
144
+ source: "timeout",
145
+ timestamp: Date.now(),
146
+ messageId: id,
147
+ });
148
+
149
+ this.pending.delete(id);
150
+ }
151
+ }
152
+
153
+ // ── Singleton per run ────────────────────────────────────────────────────
154
+
155
+ const queues = new Map<string, IntercomQueue>();
156
+
157
+ /**
158
+ * Get or create an intercom queue for a run.
159
+ */
160
+ export function getIntercomQueue(runId: string): IntercomQueue {
161
+ let queue = queues.get(runId);
162
+ if (!queue) {
163
+ queue = new IntercomQueue();
164
+ queues.set(runId, queue);
165
+ }
166
+ return queue;
167
+ }
168
+
169
+ /**
170
+ * Clean up intercom queue for a completed run.
171
+ */
172
+ export function cleanupIntercomQueue(runId: string): void {
173
+ const queue = queues.get(runId);
174
+ if (queue) {
175
+ queue.clear();
176
+ queues.delete(runId);
177
+ }
178
+ }
@@ -81,7 +81,8 @@ export function listLiveAgentsByWorkspace(workspaceId: string): LiveAgentHandle[
81
81
  /**
82
82
  * List only active agents (running/queued/waiting) for a specific workspace.
83
83
  */
84
- export function listActiveLiveAgentsByWorkspace(workspaceId: string): LiveAgentHandle[] {
84
+ /** @internal */
85
+ function listActiveLiveAgentsByWorkspace(workspaceId: string): LiveAgentHandle[] {
85
86
  return listActiveLiveAgents().filter((a) => a.workspaceId === workspaceId);
86
87
  }
87
88
 
@@ -150,7 +151,8 @@ function safeDisposeLiveSession(handle: LiveAgentHandle): void {
150
151
  }
151
152
  }
152
153
 
153
- export function removeLiveAgentHandle(agentId: string): LiveAgentHandle | undefined {
154
+ /** @internal */
155
+ function removeLiveAgentHandle(agentId: string): LiveAgentHandle | undefined {
154
156
  const handle = liveAgents.get(agentId);
155
157
  if (!handle) return undefined;
156
158
  liveAgents.delete(agentId);
@@ -406,7 +408,8 @@ export function broadcastIrcMessage(fromAgentId: string, message: IrcMessage): s
406
408
  }
407
409
 
408
410
  /** Phase 7: Get pending IRC messages for an agent (and clear them). */
409
- export function drainIrcMessages(agentIdOrTaskId: string): IrcMessage[] {
411
+ /** @internal */
412
+ function drainIrcMessages(agentIdOrTaskId: string): IrcMessage[] {
410
413
  const handle = getLiveAgent(agentIdOrTaskId);
411
414
  if (!handle) return [];
412
415
  const messages = [...handle.pendingMessages];
@@ -51,7 +51,8 @@ export function renderIrcPeerRoster(selfId: string, peers: Array<{ agentId: stri
51
51
  /**
52
52
  * Build the IRC system prompt section for a live-session worker.
53
53
  */
54
- export function buildIrcSystemSection(selfId: string, peers: Array<{ agentId: string; status: string }>): string {
54
+ /** @internal */
55
+ function buildIrcSystemSection(selfId: string, peers: Array<{ agentId: string; status: string }>): string {
55
56
  const roster = renderIrcPeerRoster(selfId, peers);
56
57
  return [
57
58
  "## Inter-Agent Communication",
@@ -66,7 +67,8 @@ export function buildIrcSystemSection(selfId: string, peers: Array<{ agentId: st
66
67
  * Route an IRC message to the appropriate agent(s).
67
68
  * Returns the list of agent IDs that received the message.
68
69
  */
69
- export function routeIrcMessage(
70
+ /** @internal */
71
+ function routeIrcMessage(
70
72
  message: IrcSendMessage,
71
73
  selfId: string,
72
74
  routing: {
@@ -63,7 +63,8 @@ export async function mapConcurrent<T, R>(items: T[], limit: number, fn: (item:
63
63
  * On abort: returns partial results (may contain undefined entries).
64
64
  * On error: throws immediately (fail-fast) and cancels remaining work.
65
65
  */
66
- export async function mapConcurrentWithSignal<T, R>(
66
+ /** @internal */
67
+ async function mapConcurrentWithSignal<T, R>(
67
68
  items: T[],
68
69
  limit: number,
69
70
  fn: (item: T, i: number, signal: AbortSignal) => Promise<R>,
@@ -0,0 +1,200 @@
1
+ /**
2
+ * Structured planning engine — template-based plan generation with verification.
3
+ *
4
+ * Pattern origin: plannotator/ — plan templates with task decomposition,
5
+ * verification constraints, and pre-execution plan verification.
6
+ *
7
+ * Templates provide reusable plan structures that can be specialized
8
+ * for different project types, replacing pure LLM-generated plans with
9
+ * deterministic scaffolding + LLM refinement.
10
+ */
11
+
12
+ import { logInternalError } from "../utils/internal-error.ts";
13
+
14
+ // ── Types ────────────────────────────────────────────────────────────────
15
+
16
+ export interface PlanTemplate {
17
+ /** Template name (e.g., "standard-review", "full-implementation") */
18
+ name: string;
19
+ /** One-line description */
20
+ description: string;
21
+ /** Template phases */
22
+ phases: PlanPhase[];
23
+ /** Verification commands per phase (phaseName → command) */
24
+ verificationCommands: Record<string, string>;
25
+ }
26
+
27
+ export interface PlanPhase {
28
+ /** Phase name (e.g., "explore", "plan", "execute", "verify") */
29
+ name: string;
30
+ /** Agent role for this phase */
31
+ role: string;
32
+ /** Task description template — {{variables}} are substituted */
33
+ taskTemplate: string;
34
+ /** Maximum number of tasks in this phase */
35
+ maxTasks: number;
36
+ /** Dependencies on other phases */
37
+ dependsOn: string[];
38
+ /** Optional verification command */
39
+ verificationCommand?: string;
40
+ }
41
+
42
+ export interface RenderedPlan {
43
+ templateName: string;
44
+ phases: RenderedPhase[];
45
+ variables: Record<string, string>;
46
+ }
47
+
48
+ export interface RenderedPhase {
49
+ name: string;
50
+ role: string;
51
+ task: string;
52
+ dependsOn: string[];
53
+ verificationCommand?: string;
54
+ }
55
+
56
+ // ── Template Registry ────────────────────────────────────────────────────
57
+
58
+ const templates = new Map<string, PlanTemplate>();
59
+
60
+ /**
61
+ * Register a plan template.
62
+ */
63
+ export function registerPlanTemplate(template: PlanTemplate): void {
64
+ templates.set(template.name, template);
65
+ }
66
+
67
+ /**
68
+ * Get a registered template by name.
69
+ */
70
+ export function getPlanTemplate(name: string): PlanTemplate | undefined {
71
+ return templates.get(name);
72
+ }
73
+
74
+ /**
75
+ * List all registered template names.
76
+ */
77
+ export function listPlanTemplates(): string[] {
78
+ return [...templates.keys()];
79
+ }
80
+
81
+ // ── Rendering ────────────────────────────────────────────────────────────
82
+
83
+ /**
84
+ * Render a plan template with variable substitution.
85
+ *
86
+ * Variables in task templates use {{variableName}} syntax.
87
+ *
88
+ * @param templateName - Name of the registered template
89
+ * @param variables - Key-value pairs for substitution
90
+ * @returns Rendered plan, or undefined if template not found
91
+ */
92
+ export function renderPlanTemplate(
93
+ templateName: string,
94
+ variables: Record<string, string>,
95
+ ): RenderedPlan | undefined {
96
+ const template = templates.get(templateName);
97
+ if (!template) {
98
+ logInternalError("plan-templates", new Error(`Template not found: ${templateName}`));
99
+ return undefined;
100
+ }
101
+
102
+ const phases: RenderedPhase[] = template.phases.map((phase) => ({
103
+ name: phase.name,
104
+ role: phase.role,
105
+ task: substituteVariables(phase.taskTemplate, variables),
106
+ dependsOn: phase.dependsOn,
107
+ verificationCommand: phase.verificationCommand ?? template.verificationCommands[phase.name],
108
+ }));
109
+
110
+ return { templateName, phases, variables };
111
+ }
112
+
113
+ /**
114
+ * Substitute {{variable}} placeholders in a template string.
115
+ */
116
+ function substituteVariables(template: string, variables: Record<string, string>): string {
117
+ return template.replace(/\{\{(\w+)\}\}/g, (match, key: string) => {
118
+ return variables[key] ?? match;
119
+ });
120
+ }
121
+
122
+ // ── Built-in Templates ───────────────────────────────────────────────────
123
+
124
+ registerPlanTemplate({
125
+ name: "standard-review",
126
+ description: "Standard code review workflow: explore → review → verify",
127
+ phases: [
128
+ {
129
+ name: "explore",
130
+ role: "explorer",
131
+ taskTemplate: "Map the codebase and identify the key files related to: {{goal}}. Focus on: {{focusAreas}}.",
132
+ maxTasks: 1,
133
+ dependsOn: [],
134
+ },
135
+ {
136
+ name: "review",
137
+ role: "reviewer",
138
+ taskTemplate: "Review the code identified in the explore phase for: {{goal}}. Check correctness, maintainability, and security.",
139
+ maxTasks: 1,
140
+ dependsOn: ["explore"],
141
+ },
142
+ {
143
+ name: "verify",
144
+ role: "verifier",
145
+ taskTemplate: "Verify that all review findings are addressed. Run tests if applicable. Confirm: {{goal}} is achieved.",
146
+ maxTasks: 1,
147
+ dependsOn: ["review"],
148
+ verificationCommand: "npm test",
149
+ },
150
+ ],
151
+ verificationCommands: {
152
+ verify: "npm test",
153
+ },
154
+ });
155
+
156
+ registerPlanTemplate({
157
+ name: "full-implementation",
158
+ description: "Full implementation workflow: explore → plan → execute → review → verify",
159
+ phases: [
160
+ {
161
+ name: "explore",
162
+ role: "explorer",
163
+ taskTemplate: "Explore the codebase to understand the current state relevant to: {{goal}}. Identify affected files and patterns.",
164
+ maxTasks: 1,
165
+ dependsOn: [],
166
+ },
167
+ {
168
+ name: "plan",
169
+ role: "planner",
170
+ taskTemplate: "Create a detailed implementation plan for: {{goal}}. Break down into concrete steps with file-level changes.",
171
+ maxTasks: 1,
172
+ dependsOn: ["explore"],
173
+ },
174
+ {
175
+ name: "execute",
176
+ role: "executor",
177
+ taskTemplate: "Implement the plan for: {{goal}}. Make all planned changes, write tests, and ensure TypeScript compiles.",
178
+ maxTasks: 3,
179
+ dependsOn: ["plan"],
180
+ },
181
+ {
182
+ name: "review",
183
+ role: "reviewer",
184
+ taskTemplate: "Review the implementation of: {{goal}}. Check for correctness, security, performance, and code quality.",
185
+ maxTasks: 1,
186
+ dependsOn: ["execute"],
187
+ },
188
+ {
189
+ name: "verify",
190
+ role: "verifier",
191
+ taskTemplate: "Verify the complete implementation of: {{goal}}. Run tests, check types, validate all acceptance criteria.",
192
+ maxTasks: 1,
193
+ dependsOn: ["review"],
194
+ verificationCommand: "npm test && npx tsc --noEmit",
195
+ },
196
+ ],
197
+ verificationCommands: {
198
+ verify: "npm test && npx tsc --noEmit",
199
+ },
200
+ });
@@ -5,6 +5,7 @@
5
5
  * Distilled from pi-autoresearch's post-check / backpressure pattern.
6
6
  */
7
7
  import { execFileSync } from "node:child_process";
8
+ import * as path from "node:path";
8
9
  import { resolveShellForScript } from "../utils/resolve-shell.ts";
9
10
  import { sanitizeEnvSecrets } from "../utils/env-filter.ts";
10
11
 
@@ -56,9 +57,8 @@ function resolveScriptPath(config: PostCheckConfig): string | undefined {
56
57
  * If no script path is available (neither config nor env var), the check
57
58
  * passes by default with a note.
58
59
  *
59
- * **Security note:** The script path is user-configurable (config or env var)
60
- * and executed with minimal environment (PATH, HOME, USER, LANG). Only use with trusted script
61
- * paths. No path containment validation is performed.
60
+ * **Security note:** The script path is validated to stay within `cwd`.
61
+ * Scripts that escape the working directory are rejected.
62
62
  *
63
63
  * @param config - Post-check configuration (script path and timeout)
64
64
  * @param cwd - Working directory for script execution
@@ -77,6 +77,13 @@ export async function runPostCheck(config: PostCheckConfig, cwd: string): Promis
77
77
  };
78
78
  }
79
79
 
80
+ // M1: Validate that the script path is contained within cwd to prevent arbitrary file execution
81
+ const resolved = path.resolve(cwd, scriptPath);
82
+ const resolvedCwd = path.resolve(cwd);
83
+ if (!resolved.startsWith(resolvedCwd + path.sep) && resolved !== resolvedCwd) {
84
+ throw new Error(`Security: PI_CREW_POST_CHECK_SCRIPT escapes cwd: ${scriptPath}`);
85
+ }
86
+
80
87
  const startTime = Date.now();
81
88
 
82
89
  return new Promise<PostCheckResult>((resolve) => {