pi-crew 0.9.5 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-crew",
3
- "version": "0.9.5",
3
+ "version": "0.9.7",
4
4
  "description": "Pi extension for coordinated AI teams, workflows, worktrees, and async task orchestration",
5
5
  "author": "baphuongna",
6
6
  "license": "MIT",
@@ -39,6 +39,7 @@
39
39
  "docs/",
40
40
  "tsconfig.json",
41
41
  "schema.json",
42
+ "types/",
42
43
  "CHANGELOG.md",
43
44
  "LICENSE",
44
45
  "NOTICE.md"
@@ -52,6 +53,7 @@
52
53
  "test:unit": "node scripts/test-runner.mjs --test-concurrency=4 --test-timeout=180000 --test-force-exit test/unit/*.test.ts",
53
54
  "test:watch": "tsx --watch --test --test-concurrency=4 --test-timeout=30000 --test-force-exit test/unit/*.test.ts",
54
55
  "test:integration": "node scripts/test-runner.mjs --test-concurrency=1 --test-timeout=120000 test/integration/*.test.ts",
56
+ "test:smoke": "node scripts/test-runner.mjs --test-concurrency=1 --test-timeout=180000 test/smoke/*.smoke.ts",
55
57
  "build:bundle": "node scripts/build-bundle.mjs",
56
58
  "bench": "node scripts/run-bench.mjs",
57
59
  "bench:check": "node scripts/bench-check.mjs",
@@ -63,7 +65,10 @@
63
65
  "smoke:release": "node scripts/release-smoke.mjs"
64
66
  },
65
67
  "exports": {
66
- "./schema.json": "./schema.json"
68
+ "./schema.json": "./schema.json",
69
+ "./workflow": {
70
+ "types": "./types/dwf.d.ts"
71
+ }
67
72
  },
68
73
  "pi": {
69
74
  "extensions": [
@@ -81,6 +86,7 @@
81
86
  },
82
87
  "dependencies": {
83
88
  "@sinclair/typebox": "^0.34.49",
89
+ "acorn": "^8.17.0",
84
90
  "ajv": "^8.20.0",
85
91
  "cli-highlight": "^2.1.11",
86
92
  "diff": "^5.2.0",
@@ -10,6 +10,7 @@ import { DEFAULT_PATHS } from "../../config/defaults.ts";
10
10
  import type { TeamToolParamsValue } from "../../schema/team-tool-schema.ts";
11
11
  import { getPiSpawnCommand } from "../../runtime/pi-spawn.ts";
12
12
  import { getRuntimeWarmupStatus } from "../../runtime/runtime-warmup.ts";
13
+ import { scanZombieSubagents, formatZombieReport } from "../../runtime/zombie-scanner.ts";
13
14
  import { validateResources } from "../validate-resources.ts";
14
15
  import { detectDrift, formatDriftReport, type DriftReport } from "../../config/drift-detector.ts";
15
16
  import { TeamToolParams } from "../../schema/team-tool-schema.ts";
@@ -237,6 +238,19 @@ export function buildTeamDoctorReport(input: TeamDoctorReportInput): TeamDoctorR
237
238
  }
238
239
 
239
240
  export function handleDoctor(ctx: TeamContext, params: TeamToolParamsValue = {}): PiTeamsToolResult {
241
+ // Sub-focus: zombie sub-agent scan. READ-ONLY — never kills. Returns a table of
242
+ // orphaned pi-crew sub-agents identified by the authoritative PI_CREW_KIND=subagent
243
+ // marker. The user's main session never carries that marker, so it can never appear.
244
+ if (params.focus === "zombies") {
245
+ const scan = scanZombieSubagents();
246
+ const text = formatZombieReport(scan);
247
+ return result(text, {
248
+ action: "doctor",
249
+ status: "ok",
250
+ data: { zombies: scan.zombies.length, live: scan.live.length, errors: scan.errors.length },
251
+ }, false);
252
+ }
253
+
240
254
  const loadedConfig = loadConfig(ctx.cwd);
241
255
  let smokeChildPi: { ok: boolean; detail: string } | undefined;
242
256
  if (configRecord(params.config).smokeChildPi === true) {
@@ -281,6 +281,7 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
281
281
  workspaceMode: params.workspaceMode,
282
282
  ownerSessionId: ctx.sessionId,
283
283
  runKind: params.runKind,
284
+ args: params.args,
284
285
  });
285
286
  const goalArtifact = writeArtifact(paths.artifactsRoot, {
286
287
  kind: "prompt",
@@ -323,6 +324,7 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
323
324
  team: dwfTeam,
324
325
  signal: ctx.signal ?? AbortSignal.timeout(3_600_000),
325
326
  modelOverride: params.model,
327
+ tokenBudget: params.tokenBudget ?? (workflow as import("../../workflows/workflow-config.ts").DynamicWorkflowConfig).maxTokenBudget,
326
328
  });
327
329
  } catch (runnerError) {
328
330
  // Round-11 runtime fix: persist manifest with status=failed when runner throws
@@ -602,7 +602,7 @@ async function main(): Promise<void> {
602
602
  const { allWorkflows, discoverWorkflows } = await import("../workflows/discover-workflows.ts");
603
603
  const wf = allWorkflows(discoverWorkflows(manifest.cwd)).find((w) => w.name === manifest.workflow);
604
604
  if (!wf || wf.runtime !== "dynamic" || !wf.dynamicScript) throw new Error(`runKind="dynamic-workflow" but workflow '${manifest.workflow}' is not dynamic (runId=${manifest.runId})`);
605
- const dwfResult = await runDynamicWorkflow({ manifest, workflow: wf as import("../workflows/workflow-config.ts").DynamicWorkflowConfig, signal: abortController.signal });
605
+ const dwfResult = await runDynamicWorkflow({ manifest, workflow: wf as import("../workflows/workflow-config.ts").DynamicWorkflowConfig, signal: abortController.signal, tokenBudget: wf.maxTokenBudget });
606
606
  saveRunManifest(dwfResult.manifest);
607
607
  earlyResult = dwfResult;
608
608
  }
@@ -95,6 +95,17 @@ export function killProcessPid(pid: number): void {
95
95
  }
96
96
 
97
97
  function killProcessTree(pid: number | undefined, child?: ChildProcess): void {
98
+ // Phase-0 diagnostic (HB-003a): capture who invoked killProcessTree so the
99
+ // exit-null race has a provenance trail. .stack is best-effort (may be undefined
100
+ // under deep async), so we take a snapshot lazily.
101
+ try {
102
+ const callerStack = new Error("killProcessTree caller").stack ?? "(no stack)";
103
+ logInternalError(
104
+ "child-pi.kill-process-tree-invoked",
105
+ new Error(`pid=${pid} called from:\n${callerStack.split("\n").slice(0, 8).join("\n")}`),
106
+ `pid=${pid}`,
107
+ );
108
+ } catch { /* diagnostic best-effort */ }
98
109
  if (!pid || !Number.isInteger(pid) || pid <= 0) return;
99
110
  if (child && child.exitCode !== null) return;
100
111
  killProcessPid(pid);
@@ -124,6 +135,18 @@ export interface ChildPiLifecycleEvent {
124
135
  stderrExcerpt?: string;
125
136
  /** Timestamp (ISO). */
126
137
  ts: string;
138
+ /** Phase-0 diagnostic (HB-003a): the signal that killed the child (when
139
+ * available). Was previously discarded after building the error string. */
140
+ signal?: string;
141
+ /** Phase-0 diagnostic (HB-003a): final-drain race timing, present only on
142
+ * exit events where a drain timer was armed. Surfaces the exit-null race. */
143
+ diagnostic?: {
144
+ finalDrainArmed: boolean;
145
+ forcedFinalDrain: boolean;
146
+ finalDrainFiredMonotonicMs?: number;
147
+ finalAssistantEventMonotonicMs?: number;
148
+ exitMonotonicMs: number;
149
+ };
127
150
  }
128
151
 
129
152
  export interface ChildPiRunInput {
@@ -267,6 +290,9 @@ export function buildChildPiSpawnOptions(cwd: string, env: NodeJS.ProcessEnv): S
267
290
  "PI_CREW_MAX_DEPTH",
268
291
  "PI_CREW_INHERIT_PROJECT_CONTEXT",
269
292
  "PI_CREW_INHERIT_SKILLS",
293
+ // PI_CREW_KIND marks this process as a crew sub-agent (vs the user's main session).
294
+ // doctor --zombies matches it to safely list orphaned sub-agents only.
295
+ "PI_CREW_KIND",
270
296
  // PI_CREW_PARENT_PID is needed by child-pi's parent-guard (uses
271
297
  // process.kill(pid, 0) liveness check). The PID is not a secret.
272
298
  "PI_CREW_PARENT_PID",
@@ -577,6 +603,15 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
577
603
  let noResponseTimer: NodeJS.Timeout | undefined;
578
604
  const finalDrainMs = input.finalDrainMs ?? FINAL_DRAIN_MS;
579
605
  const hardKillMs = input.hardKillMs ?? HARD_KILL_MS;
606
+ // Phase-0 diagnostic (HB-003a): track the final-drain race that produces
607
+ // `exit null` for ctx.agent({disableTools:true}). These vars are READ-ONLY
608
+ // instrumentation — no behavior change. finalDrainArmed lets the close
609
+ // handler know a drain timer existed even after clearFinalDrainTimers() ran;
610
+ // spawnMonotonicMs gives us relative timing to distinguish a race from a crash.
611
+ let finalDrainArmed = false;
612
+ let finalDrainFiredMonotonicMs: number | undefined;
613
+ const spawnMonotonicMs = performance.now();
614
+ let finalAssistantEventMonotonicMs: number | undefined;
580
615
  // FIX (Round 14): Bound the env-controlled response timeout to
581
616
  // [1_000ms, 3_600_000ms] (1s–1h) so a hostile or accidental value
582
617
  // (e.g. 1, or 999_999_999) cannot disable the timeout or cause
@@ -680,20 +715,27 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
680
715
  if (maxTurns !== undefined && !softLimitReached && turnCount >= maxTurns) {
681
716
  softLimitReached = true;
682
717
  // Inject steer via stdin to tell child to wrap up.
683
- // If stdin is not writable or the write fails (backpressure/closed),
684
- // the steer cannot be injected and the agent could run indefinitely.
685
- // Kill the process tree in that case to enforce the turn limit.
718
+ // Steer injection is ADVISORY: it asks the worker to wrap up. The real
719
+ // enforcement is the hard-abort at maxTurns + graceTurns (below). So a
720
+ // failed/non-writable stdin must NOT kill the worker that destroys a
721
+ // valid answer already in stdout (Phase-0 root cause of the
722
+ // disableTools/maxTurns:1 exit-null bug). Just log + let the hard-abort
723
+ // path handle a genuinely runaway worker.
686
724
  if (child.stdin?.writable) {
687
725
  const steerPayload = JSON.stringify({ type: "steer", message: "You have reached your turn limit. Wrap up immediately — provide your final answer now." }) + "\n";
688
726
  const writeSucceeded = child.stdin.write(steerPayload);
689
727
  if (!writeSucceeded) {
690
- logInternalError("child-pi.steer-backpressure", new Error("stdin write returned false during steer injection; buffer full"), `pid=${child.pid}`);
691
- steerInjectionFailed = true;
692
- killProcessTree(child.pid, child);
728
+ // Normal Node backpressure: the payload is buffered and will flush on
729
+ // 'drain'. NOT a failure — do NOT kill the worker. The steer is
730
+ // advisory; if the worker ignores it and runs past maxTurns +
731
+ // graceTurns, the hard-abort below terminates it.
732
+ logInternalError("child-pi.steer-backpressure", new Error("stdin write returned false (normal backpressure); steer buffered, worker NOT killed"), `pid=${child.pid}`);
693
733
  }
694
734
  } else {
695
- logInternalError("child-pi.steer-not-writable", new Error("stdin not writable when attempting steer injection"), `pid=${child.pid}`);
696
- killProcessTree(child.pid, child);
735
+ // stdin closed (worker already finished) or otherwise unwritable.
736
+ // Also advisory — the worker is done or nearly done; let it exit
737
+ // naturally. Hard-abort remains the safety net for true runaways.
738
+ logInternalError("child-pi.steer-not-writable", new Error("stdin not writable when attempting steer injection (worker may be done); worker NOT killed"), `pid=${child.pid}`);
697
739
  }
698
740
  } else if (maxTurns !== undefined && softLimitReached && turnCount >= maxTurns + (graceTurns ?? 5)) {
699
741
  // Hard abort — terminate after grace turns
@@ -708,9 +750,12 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
708
750
  }
709
751
  input.onJsonEvent?.(event);
710
752
  if (!isFinalAssistantEvent(event) || childExited || settled || finalDrainTimer) return;
753
+ finalAssistantEventMonotonicMs = performance.now();
754
+ finalDrainArmed = true; // Phase-0 diagnostic: track that a drain timer was created.
711
755
  finalDrainTimer = setTimeout(() => {
712
756
  if (settled || childExited) return;
713
757
  forcedFinalDrain = true;
758
+ finalDrainFiredMonotonicMs = performance.now(); // Phase-0 diagnostic: race timing.
714
759
  input.onLifecycleEvent?.({ type: "final_drain", pid: child.pid, ts: new Date().toISOString() });
715
760
  try {
716
761
  child.kill(process.platform === "win32" ? undefined : "SIGTERM");
@@ -765,7 +810,27 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
765
810
  }
766
811
  // Catch all errors from settle to prevent unhandled rejection from propagating
767
812
  try {
768
- resolve({ ...result, exitStatus: result.exitStatus ?? { exitCode: result.exitCode, cancelled: abortRequested, timedOut: responseTimeoutHit, killed: hardKilled, cleanupErrors, finalDrainMs } });
813
+ resolve({
814
+ ...result,
815
+ exitStatus: result.exitStatus ?? {
816
+ exitCode: result.exitCode,
817
+ cancelled: abortRequested,
818
+ timedOut: responseTimeoutHit,
819
+ killed: hardKilled,
820
+ // Phase-0 diagnostic (HB-003a): surface the final-drain race state.
821
+ // finalDrainArmed lets Phase 1 decide whether a signal-death (exitCode=null)
822
+ // should be treated as a forced final drain. READ-ONLY for now.
823
+ ...(finalDrainArmed || forcedFinalDrain
824
+ ? {
825
+ finalDrainArmed,
826
+ forcedFinalDrain,
827
+ finalDrainFiredMonotonicMs,
828
+ }
829
+ : {}),
830
+ cleanupErrors,
831
+ finalDrainMs,
832
+ },
833
+ });
769
834
  } catch (resolveError) {
770
835
  logInternalError("child-pi.settle-resolve", resolveError, `result=${JSON.stringify({ exitCode: result.exitCode })}`);
771
836
  }
@@ -866,7 +931,30 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
866
931
  rejectPendingOperations(exitError);
867
932
  }
868
933
  try {
869
- input.onLifecycleEvent?.({ type: "exit", pid: child.pid, exitCode: code, ts: new Date().toISOString(), error: exitError?.message, stderrExcerpt: isUnexpectedExit ? stderr.slice(-1000) || undefined : undefined });
934
+ // Phase-0 diagnostic (HB-003a): capture signal + drain timing in the
935
+ // exit lifecycle event so the exit-null race is diagnosable instead of
936
+ // opaque. `signal` was previously discarded after building the error msg.
937
+ input.onLifecycleEvent?.({
938
+ type: "exit",
939
+ pid: child.pid,
940
+ exitCode: code,
941
+ ts: new Date().toISOString(),
942
+ error: exitError?.message,
943
+ stderrExcerpt: isUnexpectedExit ? stderr.slice(-1000) || undefined : undefined,
944
+ // Phase-0 diagnostic fields (kept optional — no type change required).
945
+ ...(signal ? { signal } : {}),
946
+ ...(finalDrainArmed || forcedFinalDrain
947
+ ? {
948
+ diagnostic: {
949
+ finalDrainArmed,
950
+ forcedFinalDrain,
951
+ finalDrainFiredMonotonicMs,
952
+ finalAssistantEventMonotonicMs,
953
+ exitMonotonicMs: performance.now() - spawnMonotonicMs,
954
+ },
955
+ }
956
+ : {}),
957
+ });
870
958
  } catch (err) {
871
959
  logInternalError("child-pi.on-lifecycle-event", err, `event=exit, pid=${child.pid}`);
872
960
  }
@@ -902,6 +990,9 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
902
990
  const finalExitCode = forcedFinalDrain && !timeoutError ? 0 : exitCode;
903
991
  const wasGraceAborted = softLimitReached && turnCount >= (maxTurns ?? 0) + (graceTurns ?? 5);
904
992
  const wasParentAborted = abortDueToParentSignal && !wasGraceAborted;
993
+ // steerInjectionFailed is now always false (Phase-1 fix: steer backpressure
994
+ // is logged, not fatal). The steerError branch is retained for safety in
995
+ // case a future change reintroduces a fatal steer path.
905
996
  const steerError = steerInjectionFailed ? "Steer injection failed due to stdin backpressure; process killed" : undefined;
906
997
  settle({ exitCode: finalExitCode, stdout, stderr, ...(timeoutError ? { error: timeoutError.error } : {}), ...(steerError ? { error: steerError } : {}), aborted: wasGraceAborted || wasParentAborted, steered: softLimitReached && !wasGraceAborted, exitStatus: { exitCode: finalExitCode, cancelled: abortRequested, timedOut: responseTimeoutHit, killed: hardKilled, cleanupErrors, finalDrainMs } });
907
998
  });
@@ -0,0 +1,161 @@
1
+ /**
2
+ * deterministic-ast.ts — AST-based determinism enforcement for dynamic-workflow scripts (round-13 P0-2).
3
+ *
4
+ * Rejects `Date.now()`, `Math.random()`, and `new Date()` at workflow-load time
5
+ * using a true AST walk (not regex) so that:
6
+ * - Prompts mentioning "Date.now()" as string literals are accepted.
7
+ * - Comments containing "Date.now()" are accepted.
8
+ * - `Date.parse()`, `Date.UTC()`, `Math.floor()`, etc. are accepted (only `now` and `random` are blocked).
9
+ *
10
+ * Adapted from pi-dynamic-workflows/src/workflow.ts (MIT) — see NOTICE.md.
11
+ *
12
+ * The walker uses acorn's parse() with permissive flags (allowAwaitOutsideFunction,
13
+ * allowReturnOutsideFunction) so we don't reject perfectly valid workflow scripts
14
+ * that contain top-level `await` or `return`.
15
+ *
16
+ * On parse error, this function returns silently: jiti will surface a clearer
17
+ * parse error downstream. We don't double-report parse errors.
18
+ */
19
+
20
+ import { parse } from "acorn";
21
+
22
+ const NONDETERMINISM_ERROR =
23
+ "Workflow scripts must be deterministic: Date.now()/Math.random()/new Date() are unavailable. These introduce non-reproducible behavior across runs. Use ctx.vars for cached state, or pass a fixed seed via ctx.setArgs(). To bypass this check (escape hatch), set PI_CREW_DWF_SKIP_DETERMINISM_CHECK=1.";
24
+
25
+ export class DeterminismError extends Error {
26
+ constructor() {
27
+ super(NONDETERMINISM_ERROR);
28
+ this.name = "DeterminismError";
29
+ }
30
+ }
31
+
32
+ /**
33
+ * Parse `script` and walk the AST looking for non-deterministic calls.
34
+ * Throws DeterminismError on the first hit. Silently returns on parse error
35
+ * (jiti will produce a clearer message downstream).
36
+ */
37
+ export function assertDeterministicScript(script: string): void {
38
+ let ast: AstNode;
39
+ try {
40
+ ast = parse(script, {
41
+ ecmaVersion: "latest",
42
+ sourceType: "module",
43
+ allowAwaitOutsideFunction: true,
44
+ allowReturnOutsideFunction: true,
45
+ ranges: false,
46
+ }) as unknown as AstNode;
47
+ } catch {
48
+ // Parse errors are handled by jiti downstream — don't double-report.
49
+ return;
50
+ }
51
+ assertDeterministicAst(ast);
52
+ }
53
+
54
+ /**
55
+ * Escape hatch: when PI_CREW_DWF_SKIP_DETERMINISM_CHECK=1 the check is bypassed.
56
+ * Power users may need this when a workflow legitimately depends on time/random
57
+ * (e.g. randomized benchmark scripts).
58
+ */
59
+ export function isDeterminismCheckEnabled(): boolean {
60
+ return process.env.PI_CREW_DWF_SKIP_DETERMINISM_CHECK !== "1";
61
+ }
62
+
63
+ // ---------------------------------------------------------------------------
64
+ // AST walker
65
+ // ---------------------------------------------------------------------------
66
+
67
+ interface AstNode {
68
+ type: string;
69
+ [key: string]: unknown;
70
+ }
71
+
72
+ function asAstNode(value: unknown): AstNode | undefined {
73
+ if (!value || typeof value !== "object") return undefined;
74
+ const obj = value as Record<string, unknown>;
75
+ if (typeof obj.type !== "string") return undefined;
76
+ return obj as AstNode;
77
+ }
78
+
79
+ function astChildren(node: AstNode): AstNode[] {
80
+ const out: AstNode[] = [];
81
+ for (const value of Object.values(node)) {
82
+ if (Array.isArray(value)) {
83
+ for (const item of value) {
84
+ const child = asAstNode(item);
85
+ if (child) out.push(child);
86
+ }
87
+ } else {
88
+ const child = asAstNode(value);
89
+ if (child) out.push(child);
90
+ }
91
+ }
92
+ return out;
93
+ }
94
+
95
+ function assertDeterministicAst(node: AstNode): void {
96
+ if (isDateNowCall(node) || isMathRandomCall(node) || isNewDateExpression(node)) {
97
+ throw new DeterminismError();
98
+ }
99
+ for (const child of astChildren(node)) assertDeterministicAst(child);
100
+ }
101
+
102
+ function isDateNowCall(node: AstNode): boolean {
103
+ return node.type === "CallExpression" && isMemberExpression(node, "callee", "Date", "now");
104
+ }
105
+
106
+ function isMathRandomCall(node: AstNode): boolean {
107
+ return node.type === "CallExpression" && isMemberExpression(node, "callee", "Math", "random");
108
+ }
109
+
110
+ function isNewDateExpression(node: AstNode): boolean {
111
+ if (node.type !== "NewExpression") return false;
112
+ const callee = asAstNode(node.callee);
113
+ return callee?.type === "Identifier" && callee.name === "Date";
114
+ }
115
+
116
+ /**
117
+ * Test whether `node[childKey]` is a MemberExpression of shape `objectName.propertyName`,
118
+ * where the property is either a static Identifier or a resolvable static string.
119
+ * `childKey` is the property name on `node` (usually "callee" for CallExpression).
120
+ */
121
+ function isMemberExpression(node: AstNode, childKey: string, objectName: string, propertyName: string): boolean {
122
+ const child = asAstNode(node[childKey]);
123
+ if (!child || child.type !== "MemberExpression") return false;
124
+ const object = asAstNode(child.object);
125
+ if (!object || object.type !== "Identifier" || object.name !== objectName) return false;
126
+ return propertyNameOf(child) === propertyName;
127
+ }
128
+
129
+ function propertyNameOf(node: AstNode): string | undefined {
130
+ const computed = node.computed === true;
131
+ const property = asAstNode(node.property);
132
+ if (!property) return undefined;
133
+ if (!computed && property.type === "Identifier") {
134
+ return property.name as string | undefined;
135
+ }
136
+ return staticStringOf(property);
137
+ }
138
+
139
+ function staticStringOf(node: AstNode | undefined): string | undefined {
140
+ if (!node) return undefined;
141
+ if (node.type === "Literal" && typeof node.value === "string") return node.value;
142
+ if (node.type === "TemplateLiteral") {
143
+ const expressions = node.expressions;
144
+ if (Array.isArray(expressions) && expressions.length > 0) return undefined;
145
+ const quasis = node.quasis;
146
+ if (!Array.isArray(quasis)) return undefined;
147
+ return quasis
148
+ .map((q) => {
149
+ const quasi = asAstNode(q);
150
+ const value = quasi?.value as { cooked?: string; raw?: string } | undefined;
151
+ return value?.cooked ?? value?.raw ?? "";
152
+ })
153
+ .join("");
154
+ }
155
+ if (node.type === "BinaryExpression" && node.operator === "+") {
156
+ const left = staticStringOf(asAstNode(node.left));
157
+ const right = staticStringOf(asAstNode(node.right));
158
+ if (left !== undefined && right !== undefined) return left + right;
159
+ }
160
+ return undefined;
161
+ }
@@ -0,0 +1,97 @@
1
+ /**
2
+ * dwf-state-store.ts — Persistent checkpoint state for dynamic-workflow runs (P2-3, round-18).
3
+ *
4
+ * Modeled on GoalStore (goal-state-store.ts) and FileCheckpointStore (checkpoint.ts),
5
+ * but scoped to a single run's stateRoot (which is already <crewRoot>/state/runs/<runId>).
6
+ *
7
+ * Stores DwfCheckpointState as atomic JSON at <stateRoot>/dwf-checkpoint.json.
8
+ * atomicWriteJson (temp + rename + fsync) guarantees either the old or the new file,
9
+ * never a partial write — safe across crashes.
10
+ *
11
+ * Resume semantics (round-18): the runner loads a checkpoint on run start and hydrates
12
+ * ctx.vars/phases/logs from it; on clean completion the runner deletes it. A missing or
13
+ * corrupt checkpoint is treated as a fresh run (load() returns undefined). If a crash
14
+ * happens mid-agent, that agent simply re-runs from scratch on resume — agent results
15
+ * are expected to be idempotent-ish.
16
+ */
17
+
18
+ import { mkdirSync, existsSync, readFileSync, unlinkSync } from "node:fs";
19
+ import { dirname } from "node:path";
20
+ import { atomicWriteJson } from "../state/atomic-write.ts";
21
+ import { logInternalError } from "../utils/internal-error.ts";
22
+
23
+ export interface DwfCheckpointState {
24
+ runId: string;
25
+ vars: Record<string, unknown>;
26
+ phases: string[];
27
+ currentPhase: string | undefined;
28
+ logs: string[]; // capped copy (≤1000); the events log (dwf.log) is the durable source of truth
29
+ spent: number; // budget accumulator (round-14 P1-2)
30
+ agentCount: number;
31
+ updatedAt: string;
32
+ }
33
+
34
+ /**
35
+ * DwfStore — atomic CRUD for a single run's DWF checkpoint.
36
+ *
37
+ * Concurrency: writes are atomic (atomicWriteJson). The DWF runner is the sole
38
+ * writer during a run; `team resume` loads the checkpoint read-only before the
39
+ * script re-executes. No file-lock is needed here because only one runner owns a
40
+ * run's stateRoot at a time (run locks protect manifest transitions elsewhere).
41
+ *
42
+ * Note: the constructor takes the run's stateRoot directly (NOT cwd + runId) to
43
+ * avoid a double-nesting bug — stateRoot is already <crewRoot>/state/runs/<runId>,
44
+ * so the checkpoint lands at <crewRoot>/state/runs/<runId>/dwf-checkpoint.json.
45
+ * This mirrors FileCheckpointStore (checkpoint.ts: constructor(stateRoot)).
46
+ */
47
+ export class DwfStore {
48
+ private readonly stateRoot: string;
49
+
50
+ constructor(stateRoot: string) {
51
+ this.stateRoot = stateRoot;
52
+ }
53
+
54
+ private get path(): string {
55
+ return `${this.stateRoot}/dwf-checkpoint.json`;
56
+ }
57
+
58
+ /** Load the checkpoint for this run's stateRoot. Returns undefined if missing or corrupt (fresh run). */
59
+ load(): DwfCheckpointState | undefined {
60
+ const path = this.path;
61
+ try {
62
+ if (!existsSync(path)) return undefined;
63
+ const raw = readFileSync(path, "utf-8");
64
+ const parsed = JSON.parse(raw);
65
+ // Corrupt-guard: a valid checkpoint must be an object with a string runId
66
+ // (mirrors GoalStore.load's typeof parsed.goalId !== "string" check).
67
+ if (!parsed || typeof parsed !== "object" || typeof parsed.runId !== "string") return undefined;
68
+ return parsed as DwfCheckpointState;
69
+ } catch {
70
+ return undefined;
71
+ }
72
+ }
73
+
74
+ /** Atomically persist a checkpoint state. Stamps `updatedAt` (callers need not set it). */
75
+ save(state: DwfCheckpointState): void {
76
+ const path = this.path;
77
+ const next = { ...state, updatedAt: new Date().toISOString() };
78
+ try {
79
+ mkdirSync(dirname(path), { recursive: true });
80
+ atomicWriteJson(path, next);
81
+ } catch (error) {
82
+ logInternalError("dwf-state-store.save", error, `runId=${state.runId}`);
83
+ throw error;
84
+ }
85
+ }
86
+
87
+ /** Remove the checkpoint file (after a clean completion). Best-effort; never throws. */
88
+ delete(): void {
89
+ const path = this.path;
90
+ try {
91
+ if (!existsSync(path)) return;
92
+ unlinkSync(path);
93
+ } catch (error) {
94
+ logInternalError("dwf-state-store.delete", error);
95
+ }
96
+ }
97
+ }