pi-crew 0.9.4 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +592 -0
  2. package/README.md +55 -3
  3. package/docs/HARNESS_BACKLOG.md +51 -3
  4. package/docs/dynamic-workflows.md +315 -2
  5. package/docs/fix-plan-disabletools-exit-null.md +219 -0
  6. package/docs/troubleshooting.md +102 -0
  7. package/package.json +8 -2
  8. package/src/extension/command-completions.ts +1 -0
  9. package/src/extension/crew-shortcuts.ts +1 -0
  10. package/src/extension/register.ts +2 -0
  11. package/src/extension/registration/commands.ts +3 -0
  12. package/src/extension/team-tool/doctor.ts +14 -0
  13. package/src/extension/team-tool/goal.ts +1 -0
  14. package/src/extension/team-tool/run.ts +4 -0
  15. package/src/runtime/background-runner.ts +24 -2
  16. package/src/runtime/chain-runner.ts +1 -0
  17. package/src/runtime/child-pi.ts +101 -10
  18. package/src/runtime/crash-recovery.ts +78 -36
  19. package/src/runtime/deterministic-ast.ts +161 -0
  20. package/src/runtime/dwf-state-store.ts +97 -0
  21. package/src/runtime/dynamic-workflow-context.ts +381 -7
  22. package/src/runtime/dynamic-workflow-runner.ts +94 -2
  23. package/src/runtime/goal-loop-runner.ts +2 -0
  24. package/src/runtime/live-session-runtime.ts +1 -0
  25. package/src/runtime/model-scope.ts +1 -0
  26. package/src/runtime/peer-dep.ts +1 -0
  27. package/src/runtime/pi-args.ts +11 -0
  28. package/src/runtime/resilient-edit.ts +1 -0
  29. package/src/runtime/result-extractor.ts +72 -7
  30. package/src/runtime/task-runner.ts +1 -0
  31. package/src/runtime/team-runner.ts +8 -3
  32. package/src/runtime/zombie-scanner.ts +297 -0
  33. package/src/schema/team-tool-schema.ts +28 -0
  34. package/src/state/contracts.ts +1 -0
  35. package/src/state/hook-instinct-bridge.ts +3 -0
  36. package/src/state/state-store.ts +3 -0
  37. package/src/state/types.ts +9 -0
  38. package/src/ui/dashboard-panes/progress-pane.ts +5 -0
  39. package/src/ui/dwf-phase-display.ts +151 -0
  40. package/src/ui/run-snapshot-cache.ts +4 -0
  41. package/src/ui/snapshot-types.ts +3 -0
  42. package/src/utils/bm25-search.ts +2 -0
  43. package/src/workflows/workflow-config.ts +3 -0
  44. package/src/worktree/worktree-manager.ts +94 -0
  45. package/types/dwf.d.ts +187 -0
@@ -30,13 +30,17 @@ import { Semaphore } from "./semaphore.ts";
30
30
  import { executeWithRetry } from "./retry-executor.ts";
31
31
  import { allAgents, discoverAgents } from "../agents/discover-agents.ts";
32
32
  import { writeArtifact } from "../state/artifact-store.ts";
33
+ import { appendEvent } from "../state/event-log.ts";
33
34
  import { appendMailboxMessage, readMailbox } from "../state/mailbox.ts";
34
35
  import { renderPlanTemplate } from "./plan-templates.ts";
36
+ import { prepareAgentWorktree, cleanupAgentWorktree } from "../worktree/worktree-manager.ts";
35
37
  import { logInternalError } from "../utils/internal-error.ts";
36
38
  import { randomBytes } from "node:crypto";
39
+ import type { TSchema } from "@sinclair/typebox";
37
40
  import type { AgentConfig } from "../agents/agent-config.ts";
38
41
  import type { TeamConfig } from "../teams/team-config.ts";
39
42
  import type { TeamRunManifest } from "../state/types.ts";
43
+ import type { DwfCheckpointState } from "./dwf-state-store.ts";
40
44
 
41
45
  export interface AgentCallOpts {
42
46
  prompt: string;
@@ -61,6 +65,19 @@ export interface AgentCallOpts {
61
65
  * JSON-verdict judge, but the user's reviewer.md agent is a markdown code-reviewer.
62
66
  * When set, the resolved agent's systemPrompt is replaced entirely. */
63
67
  systemPrompt?: string;
68
+ /** Round-13 P0-3: optional TypeBox schema. When set, the call's output is validated
69
+ * against the schema after extraction. Validation failure yields ok:false with a
70
+ * structured `error` and undefined `structured` field. Forward-compatible: when
71
+ * undefined, behavior is identical to the regex-based extractor. */
72
+ schema?: TSchema;
73
+ /** round-17 P2-4: spawn this agent in an isolated git worktree.
74
+ * Useful when parallel agents modify files concurrently (avoids conflicts). The
75
+ * worktree is created from HEAD, the agent runs there, and on completion the
76
+ * diff is captured as an artifact before cleanup. Default false.
77
+ * If worktree creation fails (no git repo, dirty leader), the agent runs in the
78
+ * normal cwd and a warning is logged via ctx.log(). Backward compatible —
79
+ * omitting it is identical to `false`. */
80
+ worktree?: boolean;
64
81
  }
65
82
 
66
83
  export interface AgentResult {
@@ -75,6 +92,16 @@ export interface AgentResult {
75
92
  durationMs?: number;
76
93
  }
77
94
 
95
+ /** round-14 P1-2: per-workflow token budget. Frozen read-only surface exposed as ctx.budget. */
96
+ export interface WorkflowBudget {
97
+ /** Configured budget, or null when unbounded. */
98
+ total: number | null;
99
+ /** Tokens consumed so far (accumulated from each ctx.agent() run's usage). */
100
+ spent(): number;
101
+ /** Tokens remaining; Infinity when total is null. */
102
+ remaining(): number;
103
+ }
104
+
78
105
  export interface WorkflowCtx {
79
106
  cwd: string;
80
107
  runId: string;
@@ -83,6 +110,15 @@ export interface WorkflowCtx {
83
110
  agent(opts: AgentCallOpts): Promise<AgentResult>;
84
111
  /** Bounded fan-out preserving order (wraps mapConcurrent). */
85
112
  fanOut<T>(items: T[], limit: number, fn: (item: T, i: number) => Promise<AgentResult>): Promise<AgentResult[]>;
113
+ /** Pipeline: sequential per-item stages, parallel across items (bounded by
114
+ * ctx.semaphore). Each item passes through all stages in order; different
115
+ * items may run concurrently. A failed stage yields `null` for that item
116
+ * (logged via ctx.log) and other items continue. Aborts propagate.
117
+ * round-16 (P2-1). */
118
+ pipeline<TItem, TResult = unknown>(
119
+ items: TItem[],
120
+ ...stages: Array<(previous: TResult, original: TItem, index: number) => Promise<TResult> | TResult>
121
+ ): Promise<(TResult | null)[]>;
86
122
  /** Run a reviewer agent over an artifact; parse {outcome, feedback}. §3.2. */
87
123
  review(taskId: string, reviewerRole?: string, opts?: { content?: string; artifactPath?: string; disableTools?: boolean }): Promise<{ outcome: "accept" | "reject" | "changes_requested"; feedback: string }>;
88
124
  /** Re-run a task with feedback (wraps executeWithRetry). */
@@ -97,6 +133,22 @@ export interface WorkflowCtx {
97
133
  vars: Record<string, unknown>;
98
134
  /** Mark the final result. ONLY this artifact reaches the main context. */
99
135
  setResult(artifactPath: string, meta?: Record<string, unknown>): void;
136
+ /** Mark the start of a named workflow phase. Emits a `dwf.phase_started` event
137
+ * (and a `dwf.phase_completed` for the previous phase, if any) to the run's
138
+ * events.jsonl. Idempotent on the same title — calling twice with the same
139
+ * title is a no-op. Phase titles are in-memory only; the events log is the
140
+ * durable source of truth for phase boundaries. */
141
+ phase(title: string): void;
142
+ /** round-14 P1-3: append a workflow-level log line. Persists to events.jsonl
143
+ * as a `dwf.log` event and keeps a bounded in-memory copy (capped at 1000). */
144
+ log(message: unknown): void;
145
+ /** round-14 P1-2: per-workflow token budget. ctx.agent() auto-rejects with
146
+ * ok:false once exhausted. */
147
+ budget: WorkflowBudget;
148
+ /** round-14 P1-5: typed workflow arguments. Reads the value passed via
149
+ * MakeWorkflowCtxOptions.args (sourced from manifest.args). Defaults to {}
150
+ * when unset. */
151
+ args<T = unknown>(): T;
100
152
  semaphore: Semaphore;
101
153
  /** Abort signal (cancel/stop). */
102
154
  signal: AbortSignal;
@@ -107,6 +159,19 @@ export interface MakeWorkflowCtxOptions {
107
159
  signal: AbortSignal;
108
160
  team?: TeamConfig;
109
161
  modelOverride?: string;
162
+ /** round-14 P1-2: per-workflow token budget. null/undefined = unbounded. */
163
+ tokenBudget?: number | null;
164
+ /** round-14 P1-5: typed workflow arguments (sourced from manifest.args). Defaults to {}. */
165
+ args?: unknown;
166
+ /** round-18 P2-3: checkpoint state to hydrate ctx with on resume. When provided,
167
+ * the ctx starts with the resumed vars/phases/logs/spent/agentCount instead of
168
+ * empty defaults. Omit (or undefined) for a fresh run — backward compatible. */
169
+ resumedState?: DwfCheckpointState;
170
+ /** round-18 P2-3: callback invoked after each `ctx.agent()` call completes
171
+ * (success OR fail). The runner wires this to `DwfStore.save()` so a crash after
172
+ * an agent call leaves a durable checkpoint. Best-effort — failures are swallowed
173
+ * so checkpointing can never crash the workflow. */
174
+ onCheckpoint?: (state: DwfCheckpointState) => void;
110
175
  }
111
176
 
112
177
  /**
@@ -163,6 +228,31 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
163
228
  const concurrency = Math.max(1, opts.concurrency ?? 4);
164
229
  const semaphore = new Semaphore(concurrency);
165
230
  let finalResult: { artifactPath: string; meta?: Record<string, unknown> } | undefined;
231
+ // round-18 P2-3: agent invocation counter. Hydrated from a resumed checkpoint so a
232
+ // resumed run keeps an accurate count; incremented in agent()'s finally block.
233
+ let agentCount = opts.resumedState ? opts.resumedState.agentCount : 0;
234
+ // round-12 P0-1: in-memory phase state, exposed via non-enumerable getter like __finalResult.
235
+ // The events log is the durable source of truth for phase boundaries.
236
+ // round-18 P2-3: hydrate phaseState from a resumed checkpoint (backward compatible when unset).
237
+ let phaseState: { currentPhase: string | undefined; phases: string[] } = opts.resumedState
238
+ ? { currentPhase: opts.resumedState.currentPhase, phases: [...opts.resumedState.phases] }
239
+ : { currentPhase: undefined, phases: [] };
240
+ let phaseCapWarned = false;
241
+ // round-14 P1-2/P1-3/P1-5: closure-scoped runtime state shared by budget/log/args.
242
+ // Mirrors the pi-dynamic-workflows RuntimeState pattern (workflow.ts:state).
243
+ // round-18 P2-3: hydrate spent/logs from a resumed checkpoint (backward compatible when unset).
244
+ const wfState: { spent: number; logs: string[]; args: unknown } = {
245
+ spent: opts.resumedState?.spent ?? 0,
246
+ logs: opts.resumedState ? [...opts.resumedState.logs].slice(0, 1000) : [],
247
+ args: opts.args ?? {},
248
+ };
249
+ // round-14 P1-2: frozen budget surface. The closures read wfState.spent so the
250
+ // object stays live after Object.freeze(ctx). total is a snapshot primitive.
251
+ const budget = Object.freeze({
252
+ total: opts.tokenBudget ?? null,
253
+ spent: () => wfState.spent,
254
+ remaining: () => (opts.tokenBudget == null ? Infinity : Math.max(0, opts.tokenBudget - wfState.spent)),
255
+ } satisfies WorkflowBudget);
166
256
 
167
257
  const ctx: WorkflowCtx = {
168
258
  cwd: manifest.cwd,
@@ -173,7 +263,16 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
173
263
  async agent(call: AgentCallOpts): Promise<AgentResult> {
174
264
  await semaphore.acquire();
175
265
  const started = Date.now();
266
+ // round-17 P2-4: declared before the try so the finally can clean it up
267
+ // regardless of which return/throw path is taken.
268
+ let worktreePath: string | undefined;
269
+ let worktreeBranch: string | undefined;
176
270
  try {
271
+ // round-14 P1-2: budget check BEFORE spawning. When the per-workflow token
272
+ // budget is exhausted, reject the call without consuming a child worker.
273
+ if (budget.total !== null && budget.remaining() <= 0) {
274
+ return { ok: false, text: "", error: "workflow token budget exhausted", durationMs: 0 };
275
+ }
177
276
  const agentConfig = resolveAgentForRole(call.role, {
178
277
  explicitAgent: call.agent,
179
278
  team: opts.team,
@@ -185,12 +284,49 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
185
284
  let effectiveAgent = call.disableTools === true ? { ...agentConfig, disableTools: true, tools: [] } : agentConfig;
186
285
  // Per-call systemPrompt override (replaces the resolved agent's persona/output-format).
187
286
  // Used by ctx.review() to force a JSON-verdict judge instead of the role's markdown reviewer.
188
- if (call.systemPrompt !== undefined) {
287
+ // Round-13 P0-3: when a schema is provided, append a JSON-output instruction so
288
+ // the model returns parseable JSON instead of prose. Schema name is intentionally
289
+ // generic — we don't reveal TypeBox internal types.
290
+ //
291
+ // Smoke-test fix: when BOTH schema AND an explicit call.systemPrompt are set,
292
+ // the call.systemPrompt is the caller's intended persona (e.g. a JSON-verdict
293
+ // judge). It MUST be used as the base for the JSON instruction — otherwise the
294
+ // role's persona leaks through and the model returns prose, failing schema
295
+ // validation. Previously call.systemPrompt was silently dropped when a schema
296
+ // was present, which confused models into returning text like "hello".
297
+ if (call.schema !== undefined) {
298
+ const base = call.systemPrompt ?? effectiveAgent.systemPrompt;
299
+ effectiveAgent = {
300
+ ...effectiveAgent,
301
+ systemPrompt: composeSchemaSystemPrompt(base, call.schema),
302
+ };
303
+ } else if (call.systemPrompt !== undefined) {
189
304
  effectiveAgent = { ...effectiveAgent, systemPrompt: call.systemPrompt };
190
305
  }
191
306
  const task = composeAgentTask(call);
307
+
308
+ // round-17 P2-4: worktree isolation per agent. When requested, spawn the
309
+ // agent in an isolated git worktree so parallel file-modifying agents
310
+ // don't clobber each other. Falls back to the normal cwd (with a warning)
311
+ // when worktree creation is unavailable (no git repo, dirty leader).
312
+ let agentCwd = manifest.cwd;
313
+ if (call.worktree === true) {
314
+ const wt = prepareAgentWorktree(
315
+ manifest,
316
+ `dwf-agent-${Date.now()}-${randomBytes(4).toString("hex")}`,
317
+ );
318
+ if (wt?.worktreePath) {
319
+ agentCwd = wt.cwd;
320
+ worktreePath = wt.worktreePath;
321
+ worktreeBranch = wt.branch;
322
+ ctx.log(`worktree: agent isolated at ${wt.worktreePath}`);
323
+ } else {
324
+ ctx.log("worktree: creation unavailable — falling back to normal cwd");
325
+ }
326
+ }
327
+
192
328
  const childResult = await runChildPi({
193
- cwd: manifest.cwd,
329
+ cwd: agentCwd,
194
330
  task,
195
331
  agent: effectiveAgent,
196
332
  model: call.model ?? opts.modelOverride ?? agentConfig.model,
@@ -206,6 +342,9 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
206
342
  return { ok: false, text: "", error: childResult.error ?? `exit ${childResult.exitCode}`, durationMs: Date.now() - started };
207
343
  }
208
344
  const parsed = parsePiJsonOutput(childResult.stdout);
345
+ // round-14 P1-2: accumulate this run's token usage into the workflow budget.
346
+ // Covers both the success and schema-mismatch paths (both report parsed.usage).
347
+ wfState.spent += (parsed.usage?.input ?? 0) + (parsed.usage?.output ?? 0);
209
348
  let text = parsed.finalText ?? "";
210
349
  // Round-11 test fix: parsePiJsonOutput only extracts text from pi event stream
211
350
  // ({type:"message_end", message:{role:"assistant", content:[...]}}). When the
@@ -214,7 +353,11 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
214
353
  if (!text.trim()) {
215
354
  text = extractTextFallback(childResult.stdout);
216
355
  }
217
- const extracted = extractStructuredResult(text);
356
+ // Round-13 P0-3: schema validation post-extraction. The schema option is
357
+ // additive — when undefined the call site is unchanged. With a schema,
358
+ // extracted.error means the worker output didn't match expected shape and
359
+ // the script should treat the result as failed (ok:false, error set).
360
+ const extracted = extractStructuredResult(text, call.schema);
218
361
  // Write a side artifact for audit/isolation (§0b G3).
219
362
  const rel = `wf/${Date.now()}-${randomBytes(4).toString("hex")}.md`;
220
363
  const artifact = writeArtifact(manifest.artifactsRoot, {
@@ -223,6 +366,16 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
223
366
  content: text,
224
367
  producer: "dynamic-workflow",
225
368
  });
369
+ if (call.schema !== undefined && !extracted.structured) {
370
+ return {
371
+ ok: false,
372
+ text,
373
+ usage: parsed.usage,
374
+ artifactPath: artifact.path,
375
+ error: extracted.error ?? "structured output does not match schema",
376
+ durationMs: Date.now() - started,
377
+ };
378
+ }
226
379
  return {
227
380
  ok: true,
228
381
  text,
@@ -235,12 +388,70 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
235
388
  logInternalError("dynamic-workflow-context.agent", error, `runId=${manifest.runId}`);
236
389
  return { ok: false, text: "", error: error instanceof Error ? error.message : String(error), durationMs: Date.now() - started };
237
390
  } finally {
391
+ // round-17 P2-4: clean up the worktree after the agent completes (success
392
+ // OR failure). Captures the diff as an artifact before removal. Best-effort
393
+ // — a leak must never crash the workflow.
394
+ if (worktreePath) {
395
+ try {
396
+ cleanupAgentWorktree(manifest, worktreePath, worktreeBranch);
397
+ } catch (cleanupError) {
398
+ logInternalError("dynamic-workflow-context.worktree-cleanup", cleanupError, `worktreePath=${worktreePath}`);
399
+ }
400
+ }
401
+ // round-18 P2-3: checkpoint AFTER the agent completes (success or fail) so a
402
+ // crash between agent calls leaves durable state to resume from. The counter is
403
+ // incremented here (after the call) so the checkpoint reflects the call that ran.
404
+ agentCount++;
405
+ if (opts.onCheckpoint) {
406
+ try {
407
+ opts.onCheckpoint({
408
+ runId: manifest.runId,
409
+ vars: ctx.vars,
410
+ phases: phaseState.phases,
411
+ currentPhase: phaseState.currentPhase,
412
+ logs: wfState.logs.slice(0, 1000),
413
+ spent: wfState.spent,
414
+ agentCount,
415
+ updatedAt: new Date().toISOString(),
416
+ });
417
+ } catch (checkpointError) {
418
+ logInternalError("dynamic-workflow-context.checkpoint", checkpointError, `runId=${manifest.runId}`);
419
+ }
420
+ }
238
421
  semaphore.release();
239
422
  }
240
423
  },
241
424
  async fanOut<T>(items: T[], limit: number, fn: (item: T, i: number) => Promise<AgentResult>): Promise<AgentResult[]> {
242
425
  return mapConcurrent(items, Math.max(1, limit), fn);
243
426
  },
427
+ async pipeline<TItem, TResult = unknown>(
428
+ items: TItem[],
429
+ ...stages: Array<(previous: TResult, original: TItem, index: number) => Promise<TResult> | TResult>
430
+ ): Promise<(TResult | null)[]> {
431
+ if (!Array.isArray(items)) {
432
+ throw new TypeError("pipeline() expects an array as the first argument");
433
+ }
434
+ if (stages.length === 0 || stages.some((s) => typeof s !== "function")) {
435
+ throw new TypeError("pipeline() stages must be functions");
436
+ }
437
+ if (items.length === 0) return [];
438
+ // Parallel across items, bounded by the workflow concurrency (mirrors fanOut).
439
+ // Per-item stages run sequentially. A failed stage yields null for that item
440
+ // (logged via ctx.log) and the remaining items continue. Aborts propagate.
441
+ return mapConcurrent(items, concurrency, async (item, index): Promise<TResult | null> => {
442
+ let value: unknown = item;
443
+ for (const stage of stages) {
444
+ try {
445
+ value = await stage(value as TResult, item, index);
446
+ } catch (error) {
447
+ if (opts.signal.aborted) throw error;
448
+ ctx.log(`pipeline[${index}] failed: ${error instanceof Error ? error.message : String(error)}`);
449
+ return null;
450
+ }
451
+ }
452
+ return value as TResult;
453
+ });
454
+ },
244
455
  async review(taskId: string, reviewerRole = "reviewer", reviewOpts?: { content?: string; artifactPath?: string; disableTools?: boolean }): Promise<{ outcome: "accept" | "reject" | "changes_requested"; feedback: string }> {
245
456
  // review() is a VERDICT step: it must produce a parseable JSON {outcome, feedback}, not a
246
457
  // free-form markdown review. The resolved reviewer agent (e.g. ~/.pi/agent/agents/reviewer.md)
@@ -339,10 +550,62 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
339
550
  renderTemplate(name: string, vars: Record<string, string>): unknown {
340
551
  return renderPlanTemplate(name, vars);
341
552
  },
342
- vars: {} as Record<string, unknown>,
553
+ vars: opts.resumedState ? { ...opts.resumedState.vars } : ({} as Record<string, unknown>),
343
554
  setResult(artifactPath: string, meta?: Record<string, unknown>): void {
344
555
  finalResult = { artifactPath, meta };
345
556
  },
557
+ phase(title: string): void {
558
+ if (typeof title !== "string" || title.length === 0) {
559
+ throw new TypeError("ctx.phase(title) requires a non-empty string title.");
560
+ }
561
+ // Idempotency: same phase title → no event, no state change.
562
+ if (title === phaseState.currentPhase) return;
563
+ // Close out the previous open phase BEFORE the new one opens.
564
+ if (phaseState.currentPhase !== undefined) {
565
+ appendEvent(manifest.eventsPath, {
566
+ type: "dwf.phase_completed",
567
+ runId: manifest.runId,
568
+ data: { phase: phaseState.currentPhase },
569
+ });
570
+ }
571
+ phaseState.currentPhase = title;
572
+ // Dedup append with hard cap to bound memory; events still flow.
573
+ if (!phaseState.phases.includes(title)) {
574
+ if (phaseState.phases.length < 100) {
575
+ phaseState.phases.push(title);
576
+ } else if (!phaseCapWarned) {
577
+ phaseCapWarned = true;
578
+ logInternalError(
579
+ "dynamic-workflow-context.phase-cap",
580
+ new Error("Phase list cap of 100 reached; further phases still emit events but are not added to the in-memory phases[] list. Use the events log as the durable source of truth."),
581
+ `runId=${manifest.runId}`,
582
+ );
583
+ }
584
+ }
585
+ appendEvent(manifest.eventsPath, {
586
+ type: "dwf.phase_started",
587
+ runId: manifest.runId,
588
+ data: { phase: title },
589
+ });
590
+ },
591
+ budget,
592
+ log(message: unknown): void {
593
+ // round-14 P1-3: stringify non-strings, keep a bounded in-memory copy, and
594
+ // always emit a dwf.log event (the events log is the durable source of truth).
595
+ const text = typeof message === "string" ? message : JSON.stringify(message);
596
+ if (wfState.logs.length < 1000) {
597
+ wfState.logs.push(text);
598
+ }
599
+ appendEvent(manifest.eventsPath, {
600
+ type: "dwf.log",
601
+ runId: manifest.runId,
602
+ data: { message: text },
603
+ });
604
+ },
605
+ args<T = unknown>(): T {
606
+ // round-14 P1-5: typed workflow args sourced from manifest (via opts.args).
607
+ return wfState.args as T;
608
+ },
346
609
  };
347
610
 
348
611
  // Attach the final-result slot via a non-enumerable getter so the runner can read it
@@ -351,6 +614,25 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
351
614
  get: () => finalResult,
352
615
  enumerable: false,
353
616
  });
617
+ // round-12 P0-1: phase state is read-only from the runner; the script can only mutate
618
+ // it via ctx.phase(title), which is the documented public surface.
619
+ Object.defineProperty(ctx, "__phaseState", {
620
+ get: () => phaseState,
621
+ enumerable: false,
622
+ });
623
+ // round-14 P1-3: in-memory log buffer is read-only from the runner; the script can only
624
+ // append via ctx.log(message). The events log remains the durable source of truth.
625
+ Object.defineProperty(ctx, "__logs", {
626
+ get: () => wfState.logs,
627
+ enumerable: false,
628
+ });
629
+ // round-18 P2-3: agent invocation counter is read-only from the runner. The script can
630
+ // only advance it via ctx.agent() (incremented in agent()'s finally). Exposed so
631
+ // getWorkflowCheckpoint() can report an accurate count.
632
+ Object.defineProperty(ctx, "__agentCount", {
633
+ get: () => agentCount,
634
+ enumerable: false,
635
+ });
354
636
  return ctx;
355
637
  }
356
638
 
@@ -359,11 +641,103 @@ export function getWorkflowFinalResult(ctx: WorkflowCtx): { artifactPath: string
359
641
  return (ctx as unknown as { __finalResult?: { artifactPath: string; meta?: Record<string, unknown> } }).__finalResult;
360
642
  }
361
643
 
644
+ /** Read the in-memory phase state set by the script (runner-only; not part of the public ctx surface). */
645
+ export function getWorkflowPhaseState(ctx: WorkflowCtx): { currentPhase: string | undefined; phases: string[] } | undefined {
646
+ return (ctx as unknown as { __phaseState?: { currentPhase: string | undefined; phases: string[] } }).__phaseState;
647
+ }
648
+
649
+ /** Read the in-memory log buffer appended by ctx.log() (runner-only; not part of the public ctx surface).
650
+ * Capped at 1000 entries — the events log (dwf.log) is the durable source of truth. */
651
+ export function getWorkflowLogs(ctx: WorkflowCtx): string[] | undefined {
652
+ return (ctx as unknown as { __logs?: string[] }).__logs;
653
+ }
654
+
655
+ /** round-18 P2-3: snapshot the current DWF checkpoint state (runner-only; not part of the public
656
+ * ctx surface). Mirrors getWorkflowFinalResult/getWorkflowPhaseState. The runner relies on the
657
+ * `onCheckpoint` callback for accurate per-agent-call checkpoints (it captures the closure value
658
+ * at call time); this helper is a best-effort snapshot for inspection/debugging. */
659
+ export function getWorkflowCheckpoint(ctx: WorkflowCtx): DwfCheckpointState {
660
+ const phaseState = getWorkflowPhaseState(ctx);
661
+ const logs = getWorkflowLogs(ctx);
662
+ return {
663
+ runId: ctx.runId,
664
+ vars: ctx.vars,
665
+ phases: phaseState?.phases ?? [],
666
+ currentPhase: phaseState?.currentPhase,
667
+ logs: logs ?? [],
668
+ spent: ctx.budget.spent(),
669
+ agentCount: (ctx as unknown as { __agentCount?: number }).__agentCount ?? 0,
670
+ updatedAt: new Date().toISOString(),
671
+ };
672
+ }
673
+
362
674
  /** Compose the agent task: prompt + optional dependency-input context block. */
363
675
  function composeAgentTask(call: AgentCallOpts): string {
364
- if (!call.inputs?.length) return call.prompt;
365
- const block = call.inputs.map((p) => `- ${p}`).join("\n");
366
- return `${call.prompt}\n\n## Inputs (artifact paths)\n${block}`;
676
+ let base = call.prompt;
677
+ if (call.inputs?.length) {
678
+ const block = call.inputs.map((p) => `- ${p}`).join("\n");
679
+ base = `${base}\n\n## Inputs (artifact paths)\n${block}`;
680
+ }
681
+ // Round-13 P0-3: when a schema is requested, append a JSON-output directive.
682
+ // The directive lives at the END of the prompt so it wins over any conflicting
683
+ // persona instruction in the agent's system prompt.
684
+ if (call.schema !== undefined) {
685
+ base = `${base}\n\n## Output format\nRespond with ONLY a single JSON object that matches the schema described in your instructions. Begin your response with { and end with }. Do not wrap the JSON in a code fence. Do not add any prose before or after the JSON.`;
686
+ }
687
+ return base;
688
+ }
689
+
690
+ /**
691
+ * Round-13 P0-3: compose a system-prompt suffix that asks the agent to output a
692
+ * structured JSON object matching the schema's required shape. We don't expose
693
+ * the TypeBox internal type — we describe the SHAPE so the model can match it.
694
+ */
695
+ function composeSchemaSystemPrompt(base: string | undefined, schema: TSchema): string {
696
+ const shape = describeSchemaShape(schema, 0);
697
+ const intro = "You are a structured-output assistant. ";
698
+ const instruction = `When responding, output ONLY a single JSON object matching this shape (no prose, no markdown fences, no commentary): ${shape}. Begin your response with { and end with }.`;
699
+ if (typeof base === "string" && base.length > 0) {
700
+ return `${base}\n\n${intro}${instruction}`;
701
+ }
702
+ return `${intro}${instruction}`;
703
+ }
704
+
705
+ /**
706
+ * Walk a TypeBox schema recursively and produce a human-readable shape description.
707
+ * Depth-limited to avoid runaway expansion on deeply nested schemas.
708
+ */
709
+ function describeSchemaShape(schema: unknown, depth: number): string {
710
+ if (depth > 4) return "{...}";
711
+ if (!schema || typeof schema !== "object") return "any";
712
+ const obj = schema as Record<string, unknown>;
713
+ // TypeBox: every schema has a `type` discriminator or a `kind` field.
714
+ const kind = obj.kind as string | undefined;
715
+ const type = obj.type as string | undefined;
716
+ if (kind === "object" || type === "object") {
717
+ const properties = obj.properties;
718
+ if (!properties || typeof properties !== "object") return "{}";
719
+ const required = Array.isArray(obj.required) ? new Set(obj.required as string[]) : new Set<string>();
720
+ const props = Object.entries(properties as Record<string, unknown>)
721
+ .map(([key, sub]) => {
722
+ const mark = required.has(key) ? "" : "?";
723
+ return `"${key}"${mark}: ${describeSchemaShape(sub, depth + 1)}`;
724
+ })
725
+ .join(", ");
726
+ return `{${props}}`;
727
+ }
728
+ if (kind === "array" || type === "array") {
729
+ const items = obj.items;
730
+ return `[${describeSchemaShape(items, depth + 1)}]`;
731
+ }
732
+ if (type === "string") return "string";
733
+ if (type === "number" || type === "integer") return "number";
734
+ if (type === "boolean") return "boolean";
735
+ if (type === "null") return "null";
736
+ // Union/Enum fallbacks.
737
+ if (Array.isArray(obj.anyOf)) return obj.anyOf.map((s) => describeSchemaShape(s, depth + 1)).join(" | ");
738
+ if (Array.isArray(obj.oneOf)) return obj.oneOf.map((s) => describeSchemaShape(s, depth + 1)).join(" | ");
739
+ if (Array.isArray(obj.enum)) return obj.enum.map((v) => JSON.stringify(v)).join(" | ");
740
+ return "any";
367
741
  }
368
742
 
369
743
  /**
@@ -23,7 +23,9 @@ import { resolveRealContainedPath } from "../utils/safe-paths.ts";
23
23
  import { appendEvent } from "../state/event-log.ts";
24
24
  import { writeArtifact } from "../state/artifact-store.ts";
25
25
  import { logInternalError } from "../utils/internal-error.ts";
26
- import { makeWorkflowCtx, getWorkflowFinalResult } from "./dynamic-workflow-context.ts";
26
+ import { makeWorkflowCtx, getWorkflowFinalResult, getWorkflowPhaseState } from "./dynamic-workflow-context.ts";
27
+ import { DwfStore } from "./dwf-state-store.ts";
28
+ import { assertDeterministicScript, isDeterminismCheckEnabled } from "./deterministic-ast.ts";
27
29
  import { projectCrewRoot, userPiRoot, packageRoot } from "../utils/paths.ts";
28
30
  import type { DynamicWorkflowConfig } from "../workflows/workflow-config.ts";
29
31
  import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
@@ -36,6 +38,8 @@ export interface RunDynamicWorkflowInput {
36
38
  signal: AbortSignal;
37
39
  concurrency?: number;
38
40
  modelOverride?: string;
41
+ /** round-14 P1-2: per-workflow token budget. Overrides workflow.maxTokenBudget. */
42
+ tokenBudget?: number;
39
43
  }
40
44
 
41
45
  export interface RunDynamicWorkflowResult {
@@ -46,6 +50,27 @@ export interface RunDynamicWorkflowResult {
46
50
  /** The signature a .dwf.ts default export must satisfy. */
47
51
  export type DynamicWorkflowScript = (ctx: import("./dynamic-workflow-context.ts").WorkflowCtx) => Promise<void> | void;
48
52
 
53
+ /**
54
+ * round-12 P0-4: defensive structured-clone guard at the runner boundary.
55
+ *
56
+ * Today this is mostly future-proofing: a DWF script's `setResult()` path
57
+ * reads an artifact file as a string, and strings are always structured-
58
+ * cloneable. But if a future code path produces a non-cloneable value
59
+ * (e.g. a Worker postMessage payload that wraps a Symbol or function), we
60
+ * want a clear, actionable error here — not a cryptic `DataCloneError`
61
+ * from deep inside the artifact store. The error message also nudges
62
+ * users toward the most common cause: forgetting `await` on ctx.agent()
63
+ * or ctx.review() in their script.
64
+ */
65
+ function assertStructuredCloneable(value: unknown, name: string): void {
66
+ try {
67
+ structuredClone(value);
68
+ } catch (error) {
69
+ const detail = error instanceof Error ? error.message : String(error);
70
+ throw new Error(`${name} must be structured-cloneable; did you forget to await ctx.agent() or ctx.review()? ${detail}`);
71
+ }
72
+ }
73
+
49
74
  /**
50
75
  * Resolve + validate the script path against the allowlist of workflow dirs (§0c C5).
51
76
  * Returns the real contained path or throws.
@@ -79,12 +104,24 @@ function resolveScriptPath(workflow: DynamicWorkflowConfig, cwd: string): string
79
104
  /**
80
105
  * Transpile + load the .dwf.ts default export. Uses jiti (already a dep) for TS→JS.
81
106
  * Returns the default export function or throws.
107
+ *
108
+ * Round-13 P0-2: after reading the script source, run `assertDeterministicScript`
109
+ * to reject non-deterministic calls (Date.now()/Math.random()/new Date()) BEFORE
110
+ * jiti executes the module. The check is opt-out via PI_CREW_DWF_SKIP_DETERMINISM_CHECK=1.
82
111
  */
83
112
  async function loadWorkflowModule(scriptPath: string): Promise<DynamicWorkflowScript> {
113
+ // Round-13 P0-2: read source first so we can AST-scan before execution.
114
+ // jiti does not surface the transpiled source back to us, so we read the
115
+ // raw .dwf.ts file. This is the same source jiti will execute.
116
+ const scriptSource = readFileSync(scriptPath, "utf-8");
117
+ if (isDeterminismCheckEnabled()) {
118
+ assertDeterministicScript(scriptSource);
119
+ }
84
120
  // jiti is the same loader async-runner.ts uses (resolveTypeScriptLoader). We require it
85
121
  // lazily so this module stays importable in environments without jiti (type-only consumers).
86
122
  // Fix round-4: use createRequire(import.meta.url) so `require` works under the strip-types
87
123
  // loader fallback (Node ≥ 22.6) where bare `require` is not defined in ESM scope.
124
+ // LAZY: defer dynamic import of node:module to its call site.
88
125
  const { createRequire } = await import("node:module");
89
126
  const require = createRequire(import.meta.url);
90
127
  // eslint-disable-next-line @typescript-eslint/no-require-imports
@@ -109,11 +146,37 @@ export async function runDynamicWorkflow(input: RunDynamicWorkflowInput): Promis
109
146
 
110
147
  appendEvent(eventsPath, { type: "dwf.started", runId: manifest.runId, data: { workflow: workflow.name, script: scriptPath } });
111
148
 
149
+ // round-18 P2-3: resume/checkpoint. Load any existing checkpoint for this run's stateRoot.
150
+ // stateRoot is already <crewRoot>/state/runs/<runId>, so the checkpoint lands at
151
+ // <stateRoot>/dwf-checkpoint.json (no double-nesting). A missing checkpoint (fresh run)
152
+ // yields undefined — makeWorkflowCtx starts with empty defaults (backward compatible).
153
+ const dwfStore = new DwfStore(manifest.stateRoot);
154
+ const resumedState = dwfStore.load();
155
+ if (resumedState) {
156
+ appendEvent(eventsPath, {
157
+ type: "dwf.resumed",
158
+ runId: manifest.runId,
159
+ data: { agentCount: resumedState.agentCount, phases: resumedState.phases, currentPhase: resumedState.currentPhase },
160
+ });
161
+ }
162
+
112
163
  const ctx = makeWorkflowCtx(manifest, {
113
164
  concurrency: input.concurrency ?? workflow.maxConcurrency ?? 4,
114
165
  signal,
115
166
  team: input.team,
116
167
  modelOverride: input.modelOverride,
168
+ tokenBudget: input.tokenBudget ?? workflow.maxTokenBudget,
169
+ args: manifest.args,
170
+ resumedState,
171
+ // round-18 P2-3: checkpoint after each ctx.agent() call so a crash between calls
172
+ // leaves durable state. onCheckpoint captures the closure values at call time.
173
+ onCheckpoint: (state) => {
174
+ try {
175
+ dwfStore.save(state);
176
+ } catch (error) {
177
+ logInternalError("dynamic-workflow-runner.checkpoint-save", error, `runId=${manifest.runId}`);
178
+ }
179
+ },
117
180
  });
118
181
 
119
182
  // Freeze the ctx so the script cannot add/override capability methods (§0c C4).
@@ -150,6 +213,12 @@ export async function runDynamicWorkflow(input: RunDynamicWorkflowInput): Promis
150
213
  const final = getWorkflowFinalResult(ctx);
151
214
  const finalText = final ? readFinalArtifact(final.artifactPath) : `(dynamic workflow '${workflow.name}' completed without calling ctx.setResult())`;
152
215
 
216
+ // round-12 P0-4: fail fast on unawaited Promise returns BEFORE we try to
217
+ // write a 2 KB blob that contains a Promise reference. structuredClone on
218
+ // a string always succeeds; if it doesn't, the script returned something
219
+ // uncloneable (most often an unawaited Promise) and we want a clear error.
220
+ assertStructuredCloneable(finalText, "final artifact content (set via ctx.setResult)");
221
+
153
222
  // Write a summary artifact mirroring the static-workflow summary.md contract (run.ts reads this).
154
223
  const summary = writeArtifact(manifest.artifactsRoot, {
155
224
  kind: "result",
@@ -158,12 +227,35 @@ export async function runDynamicWorkflow(input: RunDynamicWorkflowInput): Promis
158
227
  producer: "dynamic-workflow",
159
228
  });
160
229
 
230
+ // round-12 P0-1: safety net — if a script never explicitly closes its
231
+ // final phase before returning, the runner emits a closing event so the
232
+ // last open phase is always terminated before dwf.completed.
233
+ const phaseState = getWorkflowPhaseState(ctx);
234
+ if (phaseState?.currentPhase !== undefined) {
235
+ appendEvent(eventsPath, {
236
+ type: "dwf.phase_completed",
237
+ runId: manifest.runId,
238
+ data: { phase: phaseState.currentPhase },
239
+ });
240
+ phaseState.currentPhase = undefined;
241
+ }
242
+
161
243
  appendEvent(eventsPath, { type: "dwf.completed", runId: manifest.runId, data: { workflow: workflow.name, summaryArtifact: summary.path } });
162
244
 
245
+ // round-18 P2-3: the run completed cleanly — delete the checkpoint so a fresh re-run
246
+ // (same runId) starts from scratch rather than resuming stale state.
247
+ dwfStore.delete();
248
+
249
+ // round-12 P0-4: also guard the manifest.summary slice (the value is
250
+ // written into JSON-serialized manifest state — a Promise here would also
251
+ // crash later in the run-event-bus emitter).
252
+ const summaryText = finalText.slice(0, 2000);
253
+ assertStructuredCloneable(summaryText, "manifest.summary (derived from final result)");
254
+
163
255
  const updatedManifest: TeamRunManifest = {
164
256
  ...manifest,
165
257
  status: "completed",
166
- summary: finalText.slice(0, 2000),
258
+ summary: summaryText,
167
259
  updatedAt: new Date().toISOString(),
168
260
  artifacts: [...manifest.artifacts, summary],
169
261
  };