@smartmemory/compose 0.2.8-beta → 0.2.10-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/bin/compose.js +75 -1
  2. package/contracts/gsd-state.json +140 -0
  3. package/contracts/par-merge-bounce.json +39 -0
  4. package/dist/assets/{App-D3ehVPvi.js → App-CG-2euMe.js} +164 -164
  5. package/dist/assets/{arc-Dmf69iHG.js → arc-7QBWoLra.js} +1 -1
  6. package/dist/assets/{architectureDiagram-3BPJPVTR-xYo993Yw.js → architectureDiagram-3BPJPVTR-CUw-7uLm.js} +1 -1
  7. package/dist/assets/{blockDiagram-GPEHLZMM-UX4EF98O.js → blockDiagram-GPEHLZMM-COU1vmr7.js} +1 -1
  8. package/dist/assets/{c4Diagram-AAUBKEIU-DaP9CGWb.js → c4Diagram-AAUBKEIU-XPO9PSJL.js} +1 -1
  9. package/dist/assets/channel-Bcu04MIK.js +1 -0
  10. package/dist/assets/{chunk-2J33WTMH-CKk_RN3A.js → chunk-2J33WTMH-zMzVB2a6.js} +1 -1
  11. package/dist/assets/{chunk-4BX2VUAB-DboAwYKw.js → chunk-4BX2VUAB-Kke_qcHU.js} +1 -1
  12. package/dist/assets/{chunk-55IACEB6-Dsy9RYvI.js → chunk-55IACEB6-hMeFx5Nh.js} +1 -1
  13. package/dist/assets/{chunk-727SXJPM-fAH0QO9v.js → chunk-727SXJPM-DesUnrEw.js} +1 -1
  14. package/dist/assets/{chunk-AQP2D5EJ-DyZYerFP.js → chunk-AQP2D5EJ-1uGGvkxW.js} +1 -1
  15. package/dist/assets/{chunk-FMBD7UC4-BnboGO5t.js → chunk-FMBD7UC4-DYHv1PcZ.js} +1 -1
  16. package/dist/assets/{chunk-ND2GUHAM-Di9tYXme.js → chunk-ND2GUHAM-D0MENOLX.js} +1 -1
  17. package/dist/assets/{chunk-QZHKN3VN-zRPRlAIL.js → chunk-QZHKN3VN-8nn3HP-N.js} +1 -1
  18. package/dist/assets/classDiagram-4FO5ZUOK-DU4yxldU.js +1 -0
  19. package/dist/assets/classDiagram-v2-Q7XG4LA2-DU4yxldU.js +1 -0
  20. package/dist/assets/{cose-bilkent-S5V4N54A-C7Hqukaf.js → cose-bilkent-S5V4N54A-BoZPVIny.js} +1 -1
  21. package/dist/assets/{dagre-BM42HDAG-B-cR-BjI.js → dagre-BM42HDAG-BgZzdLG9.js} +1 -1
  22. package/dist/assets/{diagram-2AECGRRQ-B6-5onDk.js → diagram-2AECGRRQ-CknAnpSu.js} +1 -1
  23. package/dist/assets/{diagram-5GNKFQAL-DoZZgFAM.js → diagram-5GNKFQAL-CZUEbKim.js} +1 -1
  24. package/dist/assets/{diagram-KO2AKTUF-77jEGlJh.js → diagram-KO2AKTUF-DCs-pLdH.js} +1 -1
  25. package/dist/assets/{diagram-LMA3HP47-D3S7XDRD.js → diagram-LMA3HP47-lRaDjIfM.js} +1 -1
  26. package/dist/assets/{diagram-OG6HWLK6-KbYL9aCY.js → diagram-OG6HWLK6-CIGqmehP.js} +1 -1
  27. package/dist/assets/{erDiagram-TEJ5UH35-DezFbJP-.js → erDiagram-TEJ5UH35-Lx3c2N6F.js} +1 -1
  28. package/dist/assets/{flowDiagram-I6XJVG4X-4x31cK9j.js → flowDiagram-I6XJVG4X-VoluKqSq.js} +1 -1
  29. package/dist/assets/{ganttDiagram-6RSMTGT7-FopfSTyZ.js → ganttDiagram-6RSMTGT7-D7hETiNZ.js} +1 -1
  30. package/dist/assets/{gitGraphDiagram-PVQCEYII-DSiQGKbN.js → gitGraphDiagram-PVQCEYII-DenEcUvY.js} +1 -1
  31. package/dist/assets/{index-ClX6LVAf.js → index-B4dv3acY.js} +2 -2
  32. package/dist/assets/{infoDiagram-5YYISTIA-DE6BqzK_.js → infoDiagram-5YYISTIA-v7cq9Er9.js} +1 -1
  33. package/dist/assets/{ishikawaDiagram-YF4QCWOH-Dml8NwQI.js → ishikawaDiagram-YF4QCWOH-CfCCXt2x.js} +1 -1
  34. package/dist/assets/{journeyDiagram-JHISSGLW-CwWeJgjE.js → journeyDiagram-JHISSGLW-Bbokl_xO.js} +1 -1
  35. package/dist/assets/{kanban-definition-UN3LZRKU-DnG956Wh.js → kanban-definition-UN3LZRKU-DhkOZ2hg.js} +1 -1
  36. package/dist/assets/{linear-CA3N7Rpi.js → linear-bHjluRm2.js} +1 -1
  37. package/dist/assets/{mindmap-definition-RKZ34NQL-CxfIOjLX.js → mindmap-definition-RKZ34NQL-C1bHpoXH.js} +1 -1
  38. package/dist/assets/{pieDiagram-4H26LBE5-O7aIwy1x.js → pieDiagram-4H26LBE5-CZb1i55T.js} +1 -1
  39. package/dist/assets/{quadrantDiagram-W4KKPZXB-CPQ2qq7c.js → quadrantDiagram-W4KKPZXB-o37AwRHB.js} +1 -1
  40. package/dist/assets/{requirementDiagram-4Y6WPE33-C23horL4.js → requirementDiagram-4Y6WPE33-BVErWDzU.js} +1 -1
  41. package/dist/assets/{sankeyDiagram-5OEKKPKP-DPY04kOW.js → sankeyDiagram-5OEKKPKP-BhBK8gHQ.js} +1 -1
  42. package/dist/assets/{sequenceDiagram-3UESZ5HK-BKaTfIvo.js → sequenceDiagram-3UESZ5HK-CsICF23P.js} +1 -1
  43. package/dist/assets/{stateDiagram-AJRCARHV-B9na_6mY.js → stateDiagram-AJRCARHV-TN1AXwim.js} +1 -1
  44. package/dist/assets/stateDiagram-v2-BHNVJYJU-BLR6AkKX.js +1 -0
  45. package/dist/assets/{timeline-definition-PNZ67QCA-BBWPqd7X.js → timeline-definition-PNZ67QCA-DftAajbU.js} +1 -1
  46. package/dist/assets/{vennDiagram-CIIHVFJN-tWqiHsOZ.js → vennDiagram-CIIHVFJN-cFTMstT7.js} +1 -1
  47. package/dist/assets/{wardley-L42UT6IY-DorxG6os.js → wardley-L42UT6IY-DL8CivzO.js} +1 -1
  48. package/dist/assets/{wardleyDiagram-YWT4CUSO-B49f8GzW.js → wardleyDiagram-YWT4CUSO-BDZT1hQj.js} +1 -1
  49. package/dist/assets/{xychartDiagram-2RQKCTM6-BgKSj8Qb.js → xychartDiagram-2RQKCTM6-DQQSkfC4.js} +1 -1
  50. package/dist/index.html +1 -1
  51. package/lib/build.js +140 -17
  52. package/lib/gsd-diff-capture.js +34 -0
  53. package/lib/gsd-events.js +61 -0
  54. package/lib/gsd-headless-config.js +110 -0
  55. package/lib/gsd-milestone-report.js +323 -0
  56. package/lib/gsd-state.js +165 -0
  57. package/lib/gsd-supervisor.js +223 -0
  58. package/lib/gsd-timing.js +89 -0
  59. package/lib/gsd.js +504 -49
  60. package/lib/step-prompt.js +6 -0
  61. package/lib/stratum-mcp-client.js +3 -1
  62. package/package.json +1 -1
  63. package/pipelines/gsd.stratum.yaml +12 -4
  64. package/dist/assets/channel-D_RXsFFT.js +0 -1
  65. package/dist/assets/classDiagram-4FO5ZUOK-K6wdB4ic.js +0 -1
  66. package/dist/assets/classDiagram-v2-Q7XG4LA2-K6wdB4ic.js +0 -1
  67. package/dist/assets/stateDiagram-v2-BHNVJYJU-Cf84VDiH.js +0 -1
package/lib/gsd.js CHANGED
@@ -13,7 +13,7 @@
13
13
  // V1 limitation: runtime task-to-task handoff is not implemented; tasks see
14
14
  // only spec-level upstream context (Boundary Map declarations) per blueprint.
15
15
 
16
- import { readFileSync, writeFileSync, existsSync, readdirSync, mkdirSync, rmSync } from 'node:fs';
16
+ import { readFileSync, writeFileSync, existsSync, readdirSync, mkdirSync, rmSync, statSync, renameSync } from 'node:fs';
17
17
  import { join, resolve, dirname } from 'node:path';
18
18
  import { fileURLToPath } from 'node:url';
19
19
  import { execSync } from 'node:child_process';
@@ -27,11 +27,22 @@ import { executeParallelDispatchServer, executeShipStep } from './build.js';
27
27
  import { GsdStuckDetector, DEFAULT_THRESHOLDS } from './gsd-stuck.js';
28
28
  import { readGsdBudgetConfig, buildBudgetBlock, injectBudget, composeBudgetDiagnostic } from './gsd-budget.js';
29
29
  import { recordGsdUsage, checkGsdCumulativeBudget } from './budget-ledger.js';
30
+ // COMP-GSD-6: continuous run-state checkpoint + canonical pid-liveness probe.
31
+ // pidAlive is canonical in gsd-state.js (EPERM=alive) and imported one-way here.
32
+ import { writeGsdState, readGsdState, gsdStatePath, pidAlive, clearGsdHaltArtifacts } from './gsd-state.js';
33
+ import { generateGsdMilestoneReport } from './gsd-milestone-report.js';
34
+ import { readHeadlessConfig } from './gsd-headless-config.js';
35
+ import { appendGsdEvent, clearGsdEvents } from './gsd-events.js';
30
36
 
31
37
  const __dirname = dirname(fileURLToPath(import.meta.url));
32
38
  const PACKAGE_ROOT = resolve(__dirname, '..');
33
39
 
34
40
  const DEFAULT_GATE_COMMANDS = ['pnpm lint', 'pnpm build', 'pnpm test'];
41
+ // COMP-PAR-MERGE-QUEUE: the fast per-task pre-merge gate (lint + build, no full
42
+ // test suite). Enforced in each task's worktree before its diff merges; the full
43
+ // `pnpm test` runs once at ship_gsd. Single-sourced into both the enforced gate
44
+ // (execute.pre_merge_verify) and the instructed gate (task descriptions).
45
+ const DEFAULT_FAST_GATE = ['pnpm lint', 'pnpm build'];
35
46
 
36
47
  // ---------- Public API ----------
37
48
 
@@ -41,6 +52,15 @@ export async function runGsd(featureCode, opts = {}) {
41
52
  }
42
53
  const cwd = opts.cwd ?? process.cwd();
43
54
 
55
+ // COMP-GSD-6: a FRESH (non-resume) run must not inherit a prior run's
56
+ // state.json. Clear it up front so that if a precondition below throws BEFORE
57
+ // the planning checkpoint, NO running state remains → the headless supervisor
58
+ // (and `query`) read 'absent' → fatal-by-absence, never a stale 'complete'
59
+ // success. A resume keeps the old state.json (the crash-bridge may need it).
60
+ if (!opts.resume) {
61
+ try { rmSync(gsdStatePath(cwd, featureCode), { force: true }); } catch { /* ignore */ }
62
+ }
63
+
44
64
  // 1. Validate preconditions: blueprint exists + Boundary Map ok
45
65
  const blueprintPath = join(cwd, 'docs', 'features', featureCode, 'blueprint.md');
46
66
  if (!existsSync(blueprintPath)) {
@@ -107,6 +127,8 @@ export async function runGsd(featureCode, opts = {}) {
107
127
  // 4. Resolve gateCommands. loadProjectConfig() does not merge defaults, so
108
128
  // explicit fallback here.
109
129
  const gateCommands = resolveGateCommands(cwd, opts.gateCommands);
130
+ // COMP-PAR-MERGE-QUEUE: the fast per-task pre-merge gate (lint+build).
131
+ const preMergeGate = resolvePreMergeGate(cwd, opts.preMergeGate);
110
132
 
111
133
  // 4. Load pipeline spec
112
134
  const specPath = join(PACKAGE_ROOT, 'pipelines', 'gsd.stratum.yaml');
@@ -143,7 +165,20 @@ export async function runGsd(featureCode, opts = {}) {
143
165
  // from clobbering a concurrent resume's valid claim and (b) a claim-race loser
144
166
  // (EEXIST) from deleting the winner's lock on its way out.
145
167
  let lockClaimed = false;
168
+ let runLockClaimed = false;
169
+ // COMP-GSD-6: the in-memory run-state, threaded through stepCtx and flushed to
170
+ // state.json. Declared here so the catch/finally can read it.
171
+ let stepCtx = null;
172
+ // COMP-GSD-6-WATCHDOG: independent wall-clock heartbeat timer (see below).
173
+ // Declared here so the finally can always clear it.
174
+ let heartbeatTimer = null;
146
175
  try {
176
+ // COMP-GSD-6: claim the live-run lock BEFORE any stratum side effect, so two
177
+ // fresh `compose gsd <same-feature>` runs can't race the results dir. Takes
178
+ // over a stale lock (dead owner) and refuses a live one.
179
+ claimRunLock(cwd, featureCode);
180
+ runLockClaimed = true;
181
+
147
182
  // COMP-GSD-4: claim the resume lock HERE (first statement in the try) so the
148
183
  // finally releases it on EVERY exit — budget/stuck re-halt, throw, or clean
149
184
  // finish. loadResumeTaskGraph above already read+guarded (claim:false).
@@ -152,22 +187,91 @@ export async function runGsd(featureCode, opts = {}) {
152
187
  lockClaimed = true;
153
188
  }
154
189
 
155
- let response = await stratum.plan(specYaml, 'gsd', {
156
- featureCode,
157
- gateCommands,
158
- });
159
- const flowId = response.flow_id;
190
+ // COMP-GSD-6: pre-plan "planning" checkpoint. A crash during plan/decompose
191
+ // now leaves a dead-pid state.json — the failed-vs-fatal boundary. A throw
192
+ // BEFORE this point (preconditions) leaves no running state → fatal by
193
+ // absence; a throw AFTER → the catch converts it to status:"failed".
194
+ // On resume, seed the planning checkpoint from the (in-memory) resume graph
195
+ // so that if THIS resume re-crashes before its decompose step repopulates
196
+ // state.json, the crash-bridge still has a task graph to recover from
197
+ // (otherwise the fresh empty checkpoint would clobber the prior good data).
198
+ const resumeTasks = opts.resume ? (resumeTaskGraph?.tasks ?? []).map((t) => ({ ...t })) : [];
199
+ const initialState = {
200
+ feature: featureCode,
201
+ flowId: null,
202
+ pid: process.pid,
203
+ mode: 'gsd',
204
+ phase: 'planning',
205
+ status: 'running',
206
+ startedAt: new Date().toISOString(),
207
+ headless: !!opts.headless,
208
+ attempt: opts.attempt ?? 1,
209
+ resumeReady: opts.resume && resumeTasks.length > 0,
210
+ decomposedTasks: resumeTasks,
211
+ completedTaskIds: collectCompletedTaskIds(cwd, featureCode),
212
+ };
160
213
 
161
214
  // Track files merged into the base cwd by the execute step so ship_gsd
162
215
  // can stage them. executeShipStep's default filter only stages feature
163
216
  // docs unless context.filesChanged is provided.
164
- const stepCtx = {
165
- stratum, cwd, featureCode, blueprintText, gateCommands,
217
+ stepCtx = {
218
+ stratum, cwd, featureCode, blueprintText, gateCommands, preMergeGate,
166
219
  filesChanged: [],
167
220
  stuckDetector,
168
221
  resumeTaskGraph,
169
222
  stuck: null, // set by runOneStep on a stuck verdict
223
+ runState: initialState, // COMP-GSD-6: flushState merges into this
224
+ // COMP-GSD-7-EVENTLOG: tasks already completed at run start (a resume
225
+ // preloads them) are seeded as already-emitted so the appended log never
226
+ // re-fires task_completed for prior-session completions.
227
+ emittedCompletions: new Set(initialState.completedTaskIds),
228
+ // COMP-GSD-7-EVENTLOG: phases already announced (dedupe — runState.phase is
229
+ // set to 'execute' before the merge checkpoint, so it can't gate emission).
230
+ emittedPhases: new Set(),
170
231
  };
232
+ flushState(stepCtx, {}); // write the planning checkpoint
233
+
234
+ // COMP-GSD-7-EVENTLOG: at the planning checkpoint — AFTER preconditions
235
+ // passed (so a failed fresh invocation never wipes a prior run's history) —
236
+ // a fresh run truncates the event log and clears stale halt artifacts so the
237
+ // timeline reflects only this run; a resume appends to the existing log.
238
+ if (!opts.resume) {
239
+ clearGsdEvents(cwd, featureCode);
240
+ clearGsdHaltArtifacts(cwd, featureCode);
241
+ }
242
+ appendGsdEvent(cwd, featureCode, 'run_started', {
243
+ mode: opts.resume ? 'resume' : 'fresh',
244
+ attempt: opts.attempt ?? 1,
245
+ });
246
+
247
+ // COMP-GSD-6-WATCHDOG: an INDEPENDENT wall-clock heartbeat. The existing
248
+ // heartbeat only advances on agent push-events (onHeartbeat below), so a
249
+ // quiet-but-healthy task would look stale. This timer restamps state.json's
250
+ // heartbeat on a fixed cadence whenever the event loop is still turning — so
251
+ // a stale heartbeat genuinely means the loop is WEDGED (or the process dead),
252
+ // which is what the headless watchdog keys its hung-kill on. .unref() so it
253
+ // never holds the process open; cleared in finally. Same empty-patch restamp
254
+ // onHeartbeat uses, so it's behavior-compatible.
255
+ //
256
+ // Gated to SUPERVISED children only (GSD_HEADLESS_ATTEMPT, set by the
257
+ // supervisor's spawner) — the supervisor is the sole watcher, so an
258
+ // interactive `compose gsd` stays byte-identical (no extra state.json writes).
259
+ if (process.env.GSD_HEADLESS_ATTEMPT != null) {
260
+ const hbMs = readHeadlessConfig(cwd).watchdogHeartbeatMs;
261
+ heartbeatTimer = setInterval(() => {
262
+ try { if (stepCtx?.runState) flushState(stepCtx, {}); } catch { /* best-effort */ }
263
+ }, hbMs);
264
+ heartbeatTimer.unref?.();
265
+ }
266
+
267
+ let response = await stratum.plan(specYaml, 'gsd', {
268
+ featureCode,
269
+ gateCommands,
270
+ pre_merge_gate: preMergeGate,
271
+ });
272
+ const flowId = response.flow_id;
273
+ flushState(stepCtx, { flowId, phase: 'decompose' });
274
+ emitPhaseOnce(stepCtx, 'decompose'); // COMP-GSD-7-EVENTLOG
171
275
 
172
276
  // 5. Status loop. `stuck` (COMP-GSD-5) and `budget_exhausted` (COMP-GSD-4)
173
277
  // are terminal statuses. `stuck` is set compose-side by runOneStep; budget
@@ -177,13 +281,38 @@ export async function runGsd(featureCode, opts = {}) {
177
281
  response.status !== 'complete' &&
178
282
  response.status !== 'killed' &&
179
283
  response.status !== 'stuck' &&
180
- response.status !== 'budget_exhausted'
284
+ response.status !== 'budget_exhausted' &&
285
+ response.status !== 'error' // COMP-PAR-MERGE-QUEUE: terminal step failure (e.g. retries_exhausted)
181
286
  ) {
182
287
  response = await runOneStep(response, stepCtx);
183
288
  }
184
289
 
290
+ // COMP-PAR-MERGE-QUEUE: a step that exhausted its retries (e.g. the execute
291
+ // step after repeated pre-merge gate failures) surfaces as a terminal `error`
292
+ // envelope rather than silently advancing to ship. Stop here with the failure
293
+ // and its bounce context instead of throwing `unknown response status`.
294
+ if (response.status === 'error') {
295
+ emitCompletionDeltas(stepCtx);
296
+ flushState(stepCtx, { status: 'failed' });
297
+ return {
298
+ status: 'failed',
299
+ flowId,
300
+ stepId: response.step_id ?? stepCtx.lastStepId ?? null,
301
+ errorType: response.error_type ?? 'step_failed',
302
+ message: response.message ?? 'GSD step failed',
303
+ violations: response.violations ?? [],
304
+ bouncedTasks: response.bounced_tasks ?? [],
305
+ };
306
+ }
307
+
185
308
  if (response.status === 'stuck') {
186
309
  // Artifacts (stuck.md/json + pause.json) were written by runOneStep.
310
+ // COMP-GSD-7-EVENTLOG: flush any completions that finished before the stuck
311
+ // verdict (the stuck path returns early, before the execute-merge delta),
312
+ // then record the pause.
313
+ emitCompletionDeltas(stepCtx);
314
+ appendGsdEvent(cwd, featureCode, 'paused', { pauseKind: 'stuck', taskId: stepCtx.stuck?.taskId ?? null });
315
+ flushState(stepCtx, { status: 'stuck' }); // COMP-GSD-6 terminal checkpoint
187
316
  return {
188
317
  status: 'stuck',
189
318
  flowId,
@@ -201,6 +330,10 @@ export async function runGsd(featureCode, opts = {}) {
201
330
  writeBudgetArtifacts(stepCtx, response, budgetState);
202
331
  recordGsdUsageFromState(cwd, featureCode, budgetState);
203
332
  const axis = composeBudgetDiagnostic(budgetState, { feature: featureCode }).json.axis;
333
+ // COMP-GSD-7-EVENTLOG: flush pre-halt completions, then record the pause.
334
+ emitCompletionDeltas(stepCtx);
335
+ appendGsdEvent(cwd, featureCode, 'paused', { pauseKind: 'budget', axis });
336
+ flushState(stepCtx, { status: 'budget' }); // COMP-GSD-6 terminal checkpoint
204
337
  return { status: 'budget', flowId, axis, consumed: budgetState.consumed ?? {}, caps: budgetState.caps ?? {} };
205
338
  }
206
339
 
@@ -218,14 +351,73 @@ export async function runGsd(featureCode, opts = {}) {
218
351
  // the complete envelope carries no budget_state, e.g. un-budgeted runs).
219
352
  recordGsdUsageFromState(cwd, featureCode, response.budget_state);
220
353
  clearPauseFile(cwd, featureCode);
354
+ // COMP-GSD-7: on a clean complete, budget.json is NOT written (only halts
355
+ // write it). Persist a budget-final.json snapshot so the milestone report
356
+ // (auto + retroactive `gsd report`) has actuals-vs-caps. No-op when the
357
+ // envelope carries no budget_state (un-budgeted run). Best-effort: this is
358
+ // a derived report input — a write failure must NEVER demote a successful
359
+ // run to 'failed' via the outer catch.
360
+ if (response.budget_state) {
361
+ try {
362
+ writeBudgetFinalSnapshot(stepCtx, response.budget_state);
363
+ } catch (err) {
364
+ console.warn(`[gsd] budget-final snapshot failed: ${err.message}`);
365
+ }
366
+ }
367
+ }
368
+
369
+ // COMP-GSD-6: terminal state.json flush. Only 'complete' is a success; any
370
+ // other terminal here (e.g. stratum 'killed') maps to 'failed' so we stay
371
+ // within the closed status vocabulary the contract + supervisor share.
372
+ // COMP-GSD-7: stamp completedAt so retroactive reports can recover wall-clock.
373
+ const terminalStatus = response.status === 'complete' ? 'complete' : 'failed';
374
+ // COMP-GSD-7-EVENTLOG: emit the terminal event. complete → final completion
375
+ // deltas + 'completed'; any other terminal (e.g. stratum 'killed') → 'failed'.
376
+ if (terminalStatus === 'complete') {
377
+ emitCompletionDeltas(stepCtx);
378
+ appendGsdEvent(cwd, featureCode, 'completed', {});
379
+ } else {
380
+ appendGsdEvent(cwd, featureCode, 'failed', { reason: response.status ?? 'unknown' });
381
+ }
382
+ flushState(stepCtx, { status: terminalStatus, phase: 'done', completedAt: new Date().toISOString() });
383
+
384
+ // COMP-GSD-7: best-effort milestone report on a clean complete. A report
385
+ // failure must never fail the run — it is a derived artifact.
386
+ if (terminalStatus === 'complete') {
387
+ try {
388
+ const r = generateGsdMilestoneReport(featureCode, cwd);
389
+ if (!r.ok) console.warn(`[gsd] milestone report skipped: ${r.error}`);
390
+ } catch (err) {
391
+ console.warn(`[gsd] milestone report generation failed: ${err.message}`);
392
+ }
221
393
  }
222
394
 
395
+ // Return the normalized closed-vocabulary status (not the raw stratum status)
396
+ // so the CLI/callers don't mistake a 'killed' terminal for success.
223
397
  return {
224
- status: response.status,
398
+ status: terminalStatus,
225
399
  flowId,
226
400
  blackboardEntries: Object.keys(blackboard).length,
227
401
  };
402
+ } catch (err) {
403
+ // COMP-GSD-6: an orderly throw AFTER the planning checkpoint becomes a
404
+ // terminal status:"failed" so the supervisor treats it as non-recoverable
405
+ // (vs a hard crash → status stays "running" + dead pid → reader-derived
406
+ // "crashed"). Guard on a persisted running state so pre-checkpoint throws
407
+ // (which left no running state) stay fatal-by-absence, not "failed".
408
+ if (stepCtx?.runState && readGsdState(cwd, featureCode)?.status === 'running') {
409
+ try { flushState(stepCtx, { status: 'failed' }); } catch { /* best-effort */ }
410
+ // COMP-GSD-7-EVENTLOG: record the failure (only when a run actually started
411
+ // — a pre-checkpoint throw left no running state and gets no event). Append
412
+ // is best-effort; never mask the original error.
413
+ appendGsdEvent(cwd, featureCode, 'failed', { reason: err?.message ?? 'error' });
414
+ }
415
+ throw err;
228
416
  } finally {
417
+ // COMP-GSD-6-WATCHDOG: stop the independent heartbeat timer.
418
+ if (heartbeatTimer) clearInterval(heartbeatTimer);
419
+ // COMP-GSD-6: release the live-run lock if THIS process claimed it.
420
+ if (runLockClaimed) releaseRunLock(cwd, featureCode);
229
421
  // COMP-GSD-4: release the resume claim ONLY if THIS process claimed it
230
422
  // (ownership-aware — never clobber a concurrent run's valid claim, and don't
231
423
  // release after losing the claim race). pause.json persists for --resume
@@ -257,8 +449,32 @@ export function resolveGateCommands(cwd, override) {
257
449
  return [...DEFAULT_GATE_COMMANDS];
258
450
  }
259
451
 
452
+ // COMP-PAR-MERGE-QUEUE: resolve the fast per-task pre-merge gate. Mirrors
453
+ // resolveGateCommands but defaults to lint+build (no full test suite). Honors
454
+ // `.compose/compose.json#preMergeGate`, else falls back to the non-test subset
455
+ // of `gateCommands`, else DEFAULT_FAST_GATE.
456
+ export function resolvePreMergeGate(cwd, override) {
457
+ if (Array.isArray(override) && override.length > 0) return override;
458
+ const configPath = join(cwd, '.compose', 'compose.json');
459
+ if (existsSync(configPath)) {
460
+ try {
461
+ const cfg = JSON.parse(readFileSync(configPath, 'utf-8'));
462
+ if (Array.isArray(cfg.preMergeGate) && cfg.preMergeGate.length > 0) {
463
+ return cfg.preMergeGate;
464
+ }
465
+ if (Array.isArray(cfg.gateCommands) && cfg.gateCommands.length > 0) {
466
+ const fast = cfg.gateCommands.filter((c) => !/\btest\b/.test(c));
467
+ if (fast.length > 0) return fast;
468
+ }
469
+ } catch {
470
+ /* fall through to default */
471
+ }
472
+ }
473
+ return [...DEFAULT_FAST_GATE];
474
+ }
475
+
260
476
  async function runOneStep(response, ctx) {
261
- const { stratum, cwd, featureCode, blueprintText, gateCommands } = ctx;
477
+ const { stratum, cwd, featureCode, blueprintText, gateCommands, preMergeGate } = ctx;
262
478
  const flowId = response.flow_id;
263
479
  const stepId = response.step_id;
264
480
  const stepType = response.type ?? response.step_type;
@@ -270,11 +486,16 @@ async function runOneStep(response, ctx) {
270
486
  const outcome = await executeParallelDispatchServer(
271
487
  response,
272
488
  stratum,
273
- { cwd, featureCode },
489
+ { cwd, featureCode, gsd: true }, // COMP-GSD-7: gates timing+diff capture
274
490
  null, // progress
275
491
  { write: () => {} }, // streamWriter — no-op for v1
276
492
  cwd,
277
- { stuckDetector: ctx.stuckDetector }, // COMP-GSD-5 (null in non-gsd callers)
493
+ {
494
+ stuckDetector: ctx.stuckDetector, // COMP-GSD-5 (null in non-gsd callers)
495
+ // COMP-GSD-6: bump state.json's heartbeat on every task event so a long
496
+ // task sitting in the dispatch poll loop isn't mistaken for crashed.
497
+ onHeartbeat: ctx.runState ? () => { try { flushState(ctx, {}); } catch { /* best-effort */ } } : null,
498
+ },
278
499
  );
279
500
 
280
501
  // COMP-GSD-5: a stuck verdict halts the run. Persist the diagnostic +
@@ -290,6 +511,15 @@ async function runOneStep(response, ctx) {
290
511
  // staging. The clean-workspace precondition above guarantees every
291
512
  // file in the post-execute dirty set is genuinely a GSD-produced change.
292
513
  ctx.filesChanged = collectChangedFiles(cwd);
514
+ // COMP-GSD-6: checkpoint completed tasks after the execute merge.
515
+ // COMP-GSD-7-EVENTLOG: emit the execute-phase transition once, then a
516
+ // task_completed event per newly-completed task.
517
+ if (ctx.runState) {
518
+ const completed = collectCompletedTaskIds(cwd, featureCode);
519
+ flushState(ctx, { phase: 'execute', completedTaskIds: completed });
520
+ emitPhaseOnce(ctx, 'execute'); // dedupes; runState.phase can't gate this
521
+ emitCompletionDeltas(ctx, completed);
522
+ }
293
523
  // executeParallelDispatchServer returns the next-step dispatch envelope
294
524
  return outcome;
295
525
  }
@@ -320,6 +550,15 @@ async function runOneStep(response, ctx) {
320
550
  // Stable task IDs + no re-decompose are the whole point.
321
551
  if (stepId === 'decompose_gsd' && ctx.resumeTaskGraph) {
322
552
  ctx.lastTaskGraph = ctx.resumeTaskGraph;
553
+ // COMP-GSD-6: a resume already has the (filtered) task graph — mark
554
+ // resumeReady so a re-crash during execute resumes rather than restarts.
555
+ if (ctx.runState) {
556
+ flushState(ctx, {
557
+ phase: 'execute',
558
+ resumeReady: true,
559
+ decomposedTasks: (ctx.resumeTaskGraph.tasks ?? []).map((t) => ({ ...t })),
560
+ });
561
+ }
323
562
  return await stratum.stepDone(flowId, stepId, ctx.resumeTaskGraph);
324
563
  }
325
564
 
@@ -338,11 +577,23 @@ async function runOneStep(response, ctx) {
338
577
 
339
578
  // T6 step 7: validate decompose_gsd output and repair missing descriptions.
340
579
  if (stepId === 'decompose_gsd') {
341
- result = validateAndRepairTaskGraph(result, blueprintText, gateCommands);
580
+ // COMP-PAR-MERGE-QUEUE: single-source the per-task instructed gate to the
581
+ // fast pre-merge gate (== the enforced execute.pre_merge_verify). Full
582
+ // `pnpm test` is instructed only at ship_gsd.
583
+ result = validateAndRepairTaskGraph(result, blueprintText, preMergeGate ?? gateCommands);
342
584
  // COMP-GSD-5: remember the ENRICHED graph so a later stuck halt can
343
585
  // persist the full task definitions (with descriptions/produces/consumes)
344
586
  // into pause.json — resume re-dispatches these without re-enriching.
345
587
  ctx.lastTaskGraph = result;
588
+ // COMP-GSD-6: the task graph now exists → resumeReady true; persist it so a
589
+ // crash during execute can synthesize a resume graph from state.json.
590
+ if (ctx.runState) {
591
+ flushState(ctx, {
592
+ phase: 'execute',
593
+ resumeReady: true,
594
+ decomposedTasks: (result.tasks ?? []).map((t) => ({ ...t })),
595
+ });
596
+ }
346
597
  }
347
598
 
348
599
  return await stratum.stepDone(flowId, stepId, result);
@@ -507,6 +758,139 @@ function gsdDir(cwd, featureCode) {
507
758
  return join(cwd, '.compose', 'gsd', featureCode);
508
759
  }
509
760
 
761
+ // ===========================================================================
762
+ // COMP-GSD-6: run.lock (live-run exclusivity) + state.json flush helpers
763
+ // ===========================================================================
764
+
765
+ const RUN_LOCK_STALE_MS = 90000;
766
+
767
+ function runLockDir(cwd, featureCode) {
768
+ return join(gsdDir(cwd, featureCode), 'run.lock');
769
+ }
770
+
771
+ // Atomically take over a stale lock dir. The naive `rmSync` + `mkdirSync` is
772
+ // racy — two reclaimers can both see "stale", both rm, and one deletes the
773
+ // other's fresh lock. renameSync IS atomic, so only one racer can rename the
774
+ // stale dir aside; the loser gets ENOENT. The winner removes the renamed copy
775
+ // and re-creates the lock; if a NEW claimant raced into the freed name first,
776
+ // our mkdir gets EEXIST and we (correctly) report we lost. Returns true iff WE
777
+ // recreated the lock.
778
+ function takeoverStaleLock(lockPath) {
779
+ const aside = `${lockPath}.stale.${process.pid}.${Date.now()}`;
780
+ try {
781
+ renameSync(lockPath, aside); // atomic — loser gets ENOENT
782
+ } catch {
783
+ return false; // another racer already took it over (or it vanished)
784
+ }
785
+ try { rmSync(aside, { recursive: true, force: true }); } catch { /* best-effort */ }
786
+ try {
787
+ mkdirSync(lockPath);
788
+ return true;
789
+ } catch (err) {
790
+ if (err.code === 'EEXIST') return false; // a fresh claimant won the freed name
791
+ throw err;
792
+ }
793
+ }
794
+
795
+ // Read the owning pid for a run.lock: run.lock/owner.json first (lock-local
796
+ // record), then state.json (Codex review precedence). Returns a number or null.
797
+ function runLockOwnerPid(cwd, featureCode) {
798
+ const ownerPath = join(runLockDir(cwd, featureCode), 'owner.json');
799
+ if (existsSync(ownerPath)) {
800
+ try {
801
+ const o = JSON.parse(readFileSync(ownerPath, 'utf-8'));
802
+ if (typeof o.pid === 'number') return o.pid;
803
+ } catch { /* fall through to state.json */ }
804
+ }
805
+ const state = readGsdState(cwd, featureCode);
806
+ return typeof state?.pid === 'number' ? state.pid : null;
807
+ }
808
+
809
+ // Atomic live-run claim, taken BEFORE the first stratum side effect. mkdirSync
810
+ // is atomic on POSIX: the loser gets EEXIST. On EEXIST we take over a STALE lock
811
+ // — owner pid dead, OR (no owner record AND lock-dir mtime older than the stale
812
+ // window, which covers the sub-ms gap before owner.json lands). A live owner
813
+ // refuses. Writes run.lock/owner.json {pid,startedAt} immediately after winning.
814
+ export function claimRunLock(cwd, featureCode) {
815
+ const dir = gsdDir(cwd, featureCode);
816
+ mkdirSync(dir, { recursive: true });
817
+ const lock = runLockDir(cwd, featureCode);
818
+ const write = () => {
819
+ writeFileSync(
820
+ join(lock, 'owner.json'),
821
+ JSON.stringify({ pid: process.pid, startedAt: new Date().toISOString() }, null, 2),
822
+ );
823
+ };
824
+ try {
825
+ mkdirSync(lock);
826
+ write();
827
+ return;
828
+ } catch (err) {
829
+ if (err.code !== 'EEXIST') throw err;
830
+ }
831
+ // EEXIST — decide stale vs live.
832
+ const ownerPid = runLockOwnerPid(cwd, featureCode);
833
+ let stale = false;
834
+ if (typeof ownerPid === 'number') {
835
+ stale = !pidAlive(ownerPid);
836
+ } else {
837
+ // No owner record yet: fall back to lock-dir age.
838
+ try {
839
+ stale = Date.now() - statSync(lock).mtimeMs > RUN_LOCK_STALE_MS;
840
+ } catch { stale = true; }
841
+ }
842
+ if (!stale) {
843
+ throw new Error(
844
+ `runGsd: another gsd run owns ${featureCode} (.compose/gsd/${featureCode}/run.lock, ` +
845
+ `pid ${ownerPid ?? 'unknown'} alive). Refusing to start a concurrent run.`,
846
+ );
847
+ }
848
+ // Atomic stale takeover (rename-aside). If we lose the takeover race, another
849
+ // run now legitimately owns the feature — refuse.
850
+ if (!takeoverStaleLock(lock)) {
851
+ throw new Error(
852
+ `runGsd: another gsd run claimed ${featureCode} during stale-lock takeover. ` +
853
+ `Refusing to start a concurrent run.`,
854
+ );
855
+ }
856
+ write();
857
+ }
858
+
859
+ export function releaseRunLock(cwd, featureCode) {
860
+ rmSync(runLockDir(cwd, featureCode), { recursive: true, force: true });
861
+ }
862
+
863
+ // Merge a patch into ctx.runState and atomically flush state.json. ctx.runState
864
+ // is the single in-memory source of truth; every flush restamps heartbeatAt.
865
+ function flushState(ctx, patch) {
866
+ ctx.runState = { ...(ctx.runState ?? {}), ...patch };
867
+ writeGsdState(ctx.cwd, ctx.featureCode, ctx.runState);
868
+ }
869
+
870
+ // COMP-GSD-7-EVENTLOG: emit a `task_completed` event for each task that has
871
+ // completed since the last emit. Dedupes via ctx.emittedCompletions (seeded from
872
+ // the run's initial completed snapshot, so a resume never re-fires prior-session
873
+ // completions). Called at the execute-merge checkpoint and before each halt
874
+ // (stuck/budget) — the halt paths return early, before the merge checkpoint.
875
+ function emitCompletionDeltas(ctx, completedIds) {
876
+ if (!ctx?.emittedCompletions) return;
877
+ const ids = completedIds ?? collectCompletedTaskIds(ctx.cwd, ctx.featureCode);
878
+ for (const id of ids) {
879
+ if (!id || ctx.emittedCompletions.has(id)) continue;
880
+ ctx.emittedCompletions.add(id);
881
+ appendGsdEvent(ctx.cwd, ctx.featureCode, 'task_completed', { taskId: id });
882
+ }
883
+ }
884
+
885
+ // COMP-GSD-7-EVENTLOG: emit a `phase` event the first time a phase is entered.
886
+ // Deduped via ctx.emittedPhases — runState.phase is set to 'execute' before the
887
+ // execute-merge checkpoint runs, so it can't itself gate the emission.
888
+ function emitPhaseOnce(ctx, phase) {
889
+ if (!ctx?.emittedPhases || ctx.emittedPhases.has(phase)) return;
890
+ ctx.emittedPhases.add(phase);
891
+ appendGsdEvent(ctx.cwd, ctx.featureCode, 'phase', { phase });
892
+ }
893
+
510
894
  /**
511
895
  * Build a GsdStuckDetector from `.compose/compose.json` `gsd.stuck.*`, falling
512
896
  * back to documented defaults (sameFileEdits=3, errorRepeats=3,
@@ -662,17 +1046,41 @@ State for resume is in \`pause.json\` (schema: \`contracts/gsd-stuck.json#/defin
662
1046
  */
663
1047
  export function loadResumeTaskGraph(cwd, featureCode, { claim = true } = {}) {
664
1048
  const pausePath = join(gsdDir(cwd, featureCode), 'pause.json');
665
- if (!existsSync(pausePath)) {
666
- throw new Error(
667
- `runGsd: no pause.json to resume for ${featureCode}. ` +
668
- `Nothing to resume — run \`compose gsd ${featureCode}\` to start fresh.`,
669
- );
670
- }
671
1049
  let pause;
672
- try {
673
- pause = JSON.parse(readFileSync(pausePath, 'utf-8'));
674
- } catch (err) {
675
- throw new Error(`runGsd: pause.json for ${featureCode} is unreadable: ${err.message}`);
1050
+ if (existsSync(pausePath)) {
1051
+ try {
1052
+ pause = JSON.parse(readFileSync(pausePath, 'utf-8'));
1053
+ } catch (err) {
1054
+ throw new Error(`runGsd: pause.json for ${featureCode} is unreadable: ${err.message}`);
1055
+ }
1056
+ } else {
1057
+ // COMP-GSD-6 crash bridge: a hard crash never reaches the stuck/budget halt
1058
+ // paths that write pause.json. If state.json shows a running run with a DEAD
1059
+ // pid and a populated task graph (resumeReady), synthesize a pause-shaped
1060
+ // object so the unfinished subset can be re-dispatched through the same
1061
+ // guards/filtering below. An EMPTY graph (crashed pre/at decompose) is NOT
1062
+ // resumable here — it (correctly) falls through to the throw; the supervisor
1063
+ // restarts such runs fresh rather than --resume.
1064
+ const state = readGsdState(cwd, featureCode);
1065
+ if (
1066
+ state && state.status === 'running' && !pidAlive(state.pid) &&
1067
+ Array.isArray(state.decomposedTasks) && state.decomposedTasks.length > 0
1068
+ ) {
1069
+ pause = {
1070
+ flowId: state.flowId ?? null,
1071
+ stepId: state.lastStepId ?? 'execute',
1072
+ decomposedTasks: state.decomposedTasks,
1073
+ completedTaskIds: state.completedTaskIds ?? [],
1074
+ pid: state.pid,
1075
+ mode: 'gsd',
1076
+ ts: state.heartbeatAt ?? new Date().toISOString(),
1077
+ };
1078
+ } else {
1079
+ throw new Error(
1080
+ `runGsd: no pause.json to resume for ${featureCode}. ` +
1081
+ `Nothing to resume — run \`compose gsd ${featureCode}\` to start fresh.`,
1082
+ );
1083
+ }
676
1084
  }
677
1085
 
678
1086
  // Mode guard: refuse to resume a non-gsd pause file.
@@ -687,7 +1095,7 @@ export function loadResumeTaskGraph(cwd, featureCode, { claim = true } = {}) {
687
1095
  // run still owns this feature (mirrors `compose fix --resume`). We do not
688
1096
  // make a self-pid exception: if a live process holds the pause, resuming is
689
1097
  // unsafe regardless of whether that pid happens to match ours.
690
- if (typeof pause.pid === 'number' && isPidAlive(pause.pid)) {
1098
+ if (typeof pause.pid === 'number' && pidAlive(pause.pid)) {
691
1099
  throw new Error(
692
1100
  `runGsd: cannot --resume: pid ${pause.pid} still owns this gsd run (process is live). ` +
693
1101
  `Wait for it to exit (or remove a stale pause.json) before resuming.`,
@@ -724,26 +1132,65 @@ export function loadResumeTaskGraph(cwd, featureCode, { claim = true } = {}) {
724
1132
  /**
725
1133
  * Atomic ownership claim (COMP-GSD-5 Codex review, HIGH). `mkdirSync` is an
726
1134
  * atomically exclusive create, so two concurrent --resume invocations cannot
727
- * both claim — the loser gets EEXIST and refuses. We deliberately do NOT
728
- * auto-take-over a pre-existing claim: stale-claim recovery (a crashed resume's
729
- * leftover) has an inherent TOCTOU race and is GSD-6's (crash-recovery) job,
730
- * built on this same pause-state. A claim left by a crashed resume is cleared
731
- * manually (message below) until GSD-6 lands.
1135
+ * both claim — the loser gets EEXIST and refuses.
1136
+ *
1137
+ * COMP-GSD-6: a STALE claim left by a crashed --resume is now auto-recovered.
1138
+ * The HOLDER of pause.lock writes its own pid into pause.lock/owner.json (NOT
1139
+ * pause.json.pid, which is the original crashed run's pid — always dead at
1140
+ * resume time and so useless for liveness). Takeover when that holder pid is
1141
+ * dead, OR no owner record exists and the lock-dir mtime is older than the
1142
+ * stale window. TOCTOU-safe: remove + re-attempt the atomic mkdir; a concurrent
1143
+ * winner still wins.
732
1144
  */
733
1145
  export function claimResumeLock(cwd, featureCode) {
734
1146
  const claimPath = join(gsdDir(cwd, featureCode), 'pause.lock');
1147
+ const writeOwner = () => {
1148
+ try {
1149
+ writeFileSync(
1150
+ join(claimPath, 'owner.json'),
1151
+ JSON.stringify({ pid: process.pid, ts: new Date().toISOString() }, null, 2),
1152
+ );
1153
+ } catch { /* best-effort; mtime fallback still protects takeover */ }
1154
+ };
735
1155
  try {
736
1156
  mkdirSync(claimPath);
1157
+ writeOwner();
1158
+ return;
737
1159
  } catch (err) {
738
- if (err.code === 'EEXIST') {
739
- throw new Error(
740
- `runGsd: a resume claim already exists for ${featureCode} ` +
741
- `(.compose/gsd/${featureCode}/pause.lock). Another --resume may be in progress; ` +
742
- `if none is, remove that directory to clear a stale claim, then retry.`,
743
- );
744
- }
745
- throw err;
1160
+ if (err.code !== 'EEXIST') throw err;
1161
+ }
1162
+ // EEXIST decide stale vs live by the lock HOLDER's own owner record.
1163
+ let holderPid = null;
1164
+ const ownerPath = join(claimPath, 'owner.json');
1165
+ if (existsSync(ownerPath)) {
1166
+ try {
1167
+ const o = JSON.parse(readFileSync(ownerPath, 'utf-8'));
1168
+ if (typeof o.pid === 'number') holderPid = o.pid;
1169
+ } catch { /* fall through to mtime */ }
1170
+ }
1171
+ let stale = false;
1172
+ if (typeof holderPid === 'number') {
1173
+ stale = !pidAlive(holderPid);
1174
+ } else {
1175
+ try {
1176
+ stale = Date.now() - statSync(claimPath).mtimeMs > RUN_LOCK_STALE_MS;
1177
+ } catch { stale = true; }
1178
+ }
1179
+ if (!stale) {
1180
+ throw new Error(
1181
+ `runGsd: a resume claim already exists for ${featureCode} ` +
1182
+ `(.compose/gsd/${featureCode}/pause.lock, pid ${holderPid ?? 'unknown'} alive). ` +
1183
+ `Another --resume may be in progress; if none is, remove that directory to clear a stale claim.`,
1184
+ );
746
1185
  }
1186
+ // Atomic stale takeover (rename-aside) — a concurrent reclaimer can't delete
1187
+ // our fresh lock. If we lose the race, refuse.
1188
+ if (!takeoverStaleLock(claimPath)) {
1189
+ throw new Error(
1190
+ `runGsd: another --resume claimed ${featureCode} during stale-claim takeover; retry.`,
1191
+ );
1192
+ }
1193
+ writeOwner();
747
1194
  }
748
1195
 
749
1196
  /**
@@ -791,6 +1238,25 @@ function writeBudgetArtifacts(ctx, response, budgetState) {
791
1238
  writeFileSync(join(dir, 'pause.json'), JSON.stringify(pause, null, 2) + '\n');
792
1239
  }
793
1240
 
1241
+ /**
1242
+ * COMP-GSD-7: on a clean complete, snapshot the run's final budget actuals-vs-caps
1243
+ * to budget-final.json so the milestone report has them retroactively (a clean
1244
+ * complete writes no budget.json — only halts do). Distinct filename from the
1245
+ * halt artifact budget.json (which buildGsdQuery's precedence reads). Atomic write.
1246
+ */
1247
+ export function writeBudgetFinalSnapshot(ctx, budgetState) {
1248
+ const { cwd, featureCode } = ctx;
1249
+ const dir = gsdDir(cwd, featureCode);
1250
+ mkdirSync(dir, { recursive: true });
1251
+ const decomposedTasks = (ctx.runState?.decomposedTasks ?? []).map((t) => ({ ...t }));
1252
+ const completedTaskIds = collectCompletedTaskIds(cwd, featureCode);
1253
+ const { json } = composeBudgetDiagnostic(budgetState, { feature: featureCode, decomposedTasks, completedTaskIds });
1254
+ const target = join(dir, 'budget-final.json');
1255
+ const tmp = `${target}.tmp`;
1256
+ writeFileSync(tmp, JSON.stringify(json, null, 2) + '\n');
1257
+ renameSync(tmp, target);
1258
+ }
1259
+
794
1260
  /**
795
1261
  * COMP-GSD-4: append a run's consumed usage to the cumulative ledger. Sourced
796
1262
  * from the stratum budget_state.consumed ({tokens,dispatches,wall_s,dollars}).
@@ -836,17 +1302,6 @@ function writeCumulativeRefusal(cwd, featureCode, chk, limits) {
836
1302
  writeFileSync(join(dir, 'budget.md'), md);
837
1303
  }
838
1304
 
839
- function isPidAlive(pid) {
840
- try {
841
- // signal 0 probes existence without sending a signal.
842
- process.kill(pid, 0);
843
- return true;
844
- } catch (err) {
845
- // ESRCH = no such process; EPERM = exists but not ours (still alive).
846
- return err.code === 'EPERM';
847
- }
848
- }
849
-
850
1305
  function clearPauseFile(cwd, featureCode) {
851
1306
  const dir = gsdDir(cwd, featureCode);
852
1307
  try { rmSync(join(dir, 'pause.json'), { force: true }); } catch { /* best-effort */ }