gsd-pi 2.78.1-dev.d8826a445 → 2.78.1-dev.eccf86e27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +5 -7
  2. package/dist/help-text.js +1 -1
  3. package/dist/resource-loader.js +6 -1
  4. package/dist/resources/.managed-resources-content-hash +1 -1
  5. package/dist/resources/extensions/gsd/auto/detect-stuck.js +41 -5
  6. package/dist/resources/extensions/gsd/auto/loop.js +235 -36
  7. package/dist/resources/extensions/gsd/auto/phases.js +7 -5
  8. package/dist/resources/extensions/gsd/auto/session.js +33 -0
  9. package/dist/resources/extensions/gsd/auto-dispatch.js +46 -2
  10. package/dist/resources/extensions/gsd/auto-post-unit.js +19 -11
  11. package/dist/resources/extensions/gsd/auto-worktree.js +26 -187
  12. package/dist/resources/extensions/gsd/auto.js +79 -50
  13. package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +9 -4
  14. package/dist/resources/extensions/gsd/crash-recovery.js +160 -47
  15. package/dist/resources/extensions/gsd/db/auto-workers.js +227 -0
  16. package/dist/resources/extensions/gsd/db/command-queue.js +105 -0
  17. package/dist/resources/extensions/gsd/db/milestone-leases.js +210 -0
  18. package/dist/resources/extensions/gsd/db/runtime-kv.js +91 -0
  19. package/dist/resources/extensions/gsd/db/unit-dispatches.js +322 -0
  20. package/dist/resources/extensions/gsd/docs/COORDINATION.md +42 -0
  21. package/dist/resources/extensions/gsd/doctor-proactive.js +4 -0
  22. package/dist/resources/extensions/gsd/doctor-runtime-checks.js +22 -6
  23. package/dist/resources/extensions/gsd/doctor.js +12 -2
  24. package/dist/resources/extensions/gsd/gsd-db.js +161 -3
  25. package/dist/resources/extensions/gsd/guided-flow.js +6 -2
  26. package/dist/resources/extensions/gsd/interrupted-session.js +18 -15
  27. package/dist/resources/extensions/gsd/state.js +21 -6
  28. package/dist/resources/extensions/gsd/worktree-resolver.js +64 -0
  29. package/dist/tsconfig.extensions.tsbuildinfo +1 -1
  30. package/dist/web/standalone/.next/BUILD_ID +1 -1
  31. package/dist/web/standalone/.next/app-path-routes-manifest.json +12 -12
  32. package/dist/web/standalone/.next/build-manifest.json +2 -2
  33. package/dist/web/standalone/.next/prerender-manifest.json +3 -3
  34. package/dist/web/standalone/.next/server/app/_global-error.html +1 -1
  35. package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
  36. package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  37. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  38. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  39. package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  40. package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  41. package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  42. package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
  43. package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
  44. package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  45. package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  46. package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  47. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  48. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  49. package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  50. package/dist/web/standalone/.next/server/app/index.html +1 -1
  51. package/dist/web/standalone/.next/server/app/index.rsc +1 -1
  52. package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  53. package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
  54. package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  55. package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
  56. package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  57. package/dist/web/standalone/.next/server/app-paths-manifest.json +12 -12
  58. package/dist/web/standalone/.next/server/middleware-build-manifest.js +1 -1
  59. package/dist/web/standalone/.next/server/pages/404.html +1 -1
  60. package/dist/web/standalone/.next/server/pages/500.html +1 -1
  61. package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
  62. package/package.json +1 -1
  63. package/src/resources/extensions/gsd/auto/detect-stuck.ts +37 -5
  64. package/src/resources/extensions/gsd/auto/loop.ts +263 -41
  65. package/src/resources/extensions/gsd/auto/phases.ts +7 -5
  66. package/src/resources/extensions/gsd/auto/session.ts +36 -0
  67. package/src/resources/extensions/gsd/auto-dispatch.ts +53 -2
  68. package/src/resources/extensions/gsd/auto-post-unit.ts +19 -11
  69. package/src/resources/extensions/gsd/auto-worktree.ts +26 -211
  70. package/src/resources/extensions/gsd/auto.ts +89 -44
  71. package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +9 -4
  72. package/src/resources/extensions/gsd/crash-recovery.ts +177 -43
  73. package/src/resources/extensions/gsd/db/auto-workers.ts +273 -0
  74. package/src/resources/extensions/gsd/db/command-queue.ts +149 -0
  75. package/src/resources/extensions/gsd/db/milestone-leases.ts +274 -0
  76. package/src/resources/extensions/gsd/db/runtime-kv.ts +127 -0
  77. package/src/resources/extensions/gsd/db/unit-dispatches.ts +446 -0
  78. package/src/resources/extensions/gsd/docs/COORDINATION.md +42 -0
  79. package/src/resources/extensions/gsd/doctor-proactive.ts +4 -0
  80. package/src/resources/extensions/gsd/doctor-runtime-checks.ts +24 -6
  81. package/src/resources/extensions/gsd/doctor.ts +10 -2
  82. package/src/resources/extensions/gsd/gsd-db.ts +170 -3
  83. package/src/resources/extensions/gsd/guided-flow.ts +6 -2
  84. package/src/resources/extensions/gsd/interrupted-session.ts +19 -12
  85. package/src/resources/extensions/gsd/state.ts +44 -6
  86. package/src/resources/extensions/gsd/tests/auto-loop-no-copy-artifacts.test.ts +72 -0
  87. package/src/resources/extensions/gsd/tests/auto-loop-symlink-worktree.test.ts +190 -0
  88. package/src/resources/extensions/gsd/tests/auto-workers.test.ts +105 -0
  89. package/src/resources/extensions/gsd/tests/command-queue.test.ts +141 -0
  90. package/src/resources/extensions/gsd/tests/crash-recovery-via-db.test.ts +203 -0
  91. package/src/resources/extensions/gsd/tests/crash-recovery.test.ts +169 -59
  92. package/src/resources/extensions/gsd/tests/detect-stuck-respects-retry.test.ts +173 -0
  93. package/src/resources/extensions/gsd/tests/integration/auto-worktree.test.ts +22 -12
  94. package/src/resources/extensions/gsd/tests/integration/doctor-proactive.test.ts +24 -10
  95. package/src/resources/extensions/gsd/tests/integration/doctor-runtime.test.ts +35 -23
  96. package/src/resources/extensions/gsd/tests/integration/workspace-collapse-integration.test.ts +3 -5
  97. package/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts +72 -25
  98. package/src/resources/extensions/gsd/tests/interrupted-session-ui.test.ts +72 -25
  99. package/src/resources/extensions/gsd/tests/memory-pressure-stuck-state.test.ts +9 -6
  100. package/src/resources/extensions/gsd/tests/milestone-leases.test.ts +152 -0
  101. package/src/resources/extensions/gsd/tests/parallel-milestone-isolation.test.ts +106 -0
  102. package/src/resources/extensions/gsd/tests/paused-session-via-db.test.ts +119 -0
  103. package/src/resources/extensions/gsd/tests/pipeline-variant-dispatch.test.ts +58 -0
  104. package/src/resources/extensions/gsd/tests/preferences-worktree-sync.test.ts +3 -17
  105. package/src/resources/extensions/gsd/tests/register-hooks-depth-verification.test.ts +110 -0
  106. package/src/resources/extensions/gsd/tests/runtime-kv.test.ts +120 -0
  107. package/src/resources/extensions/gsd/tests/skipped-validation-completion.test.ts +133 -28
  108. package/src/resources/extensions/gsd/tests/skipped-validation-db-atomicity.test.ts +17 -0
  109. package/src/resources/extensions/gsd/tests/stuck-state-via-db.test.ts +134 -0
  110. package/src/resources/extensions/gsd/tests/sync-layer-scope.test.ts +7 -26
  111. package/src/resources/extensions/gsd/tests/teardown-cleanup-parity.test.ts +4 -8
  112. package/src/resources/extensions/gsd/tests/unit-dispatches.test.ts +247 -0
  113. package/src/resources/extensions/gsd/tests/validate-milestone.test.ts +41 -1
  114. package/src/resources/extensions/gsd/tests/workspace.test.ts +15 -9
  115. package/src/resources/extensions/gsd/tests/write-gate.test.ts +31 -23
  116. package/src/resources/extensions/gsd/worktree-resolver.ts +62 -0
  117. package/src/resources/extensions/gsd/tests/auto-lock-creation.test.ts +0 -213
  118. package/src/resources/extensions/gsd/tests/auto-stale-lock-self-kill.test.ts +0 -87
  119. package/src/resources/extensions/gsd/tests/stop-auto-remote.test.ts +0 -159
  120. /package/dist/web/standalone/.next/static/{AT5qi39nKXkdmQIOIoh0f → Y5UeGFkXTYM9WIQOWHkot}/_buildManifest.js +0 -0
  121. /package/dist/web/standalone/.next/static/{AT5qi39nKXkdmQIOIoh0f → Y5UeGFkXTYM9WIQOWHkot}/_ssgManifest.js +0 -0
@@ -33,6 +33,18 @@ import { ModelPolicyDispatchBlockedError } from "../auto-model-selection.js";
33
33
  import { resolveEngine } from "../engine-resolver.js";
34
34
  import { logWarning } from "../workflow-logger.js";
35
35
  import { gsdRoot } from "../paths.js";
36
+ import { heartbeatAutoWorker } from "../db/auto-workers.js";
37
+ import {
38
+ recordDispatchClaim,
39
+ markRunning as markDispatchRunning,
40
+ markCompleted as markDispatchCompleted,
41
+ markFailed as markDispatchFailed,
42
+ markStuck as markDispatchStuck,
43
+ getRecentForUnit as getRecentDispatchesForUnit,
44
+ getRecentUnitKeysForProjectRoot,
45
+ } from "../db/unit-dispatches.js";
46
+ import { refreshMilestoneLease } from "../db/milestone-leases.js";
47
+ import { getRuntimeKv, setRuntimeKv } from "../db/runtime-kv.js";
36
48
  import { atomicWriteSync } from "../atomic-write.js";
37
49
  import { resolveUokFlags } from "../uok/flags.js";
38
50
  import { scheduleSidecarQueue } from "../uok/execution-graph.js";
@@ -40,45 +52,47 @@ import { ExecutionGraphScheduler } from "../uok/execution-graph.js";
40
52
  import type { UokGraphNode } from "../uok/contracts.js";
41
53
  import { readFileSync, writeFileSync, mkdirSync, unlinkSync } from "node:fs";
42
54
  import { join } from "node:path";
55
+ import { normalizeRealPath } from "../paths.js";
43
56
 
44
57
  // ── Stuck detection persistence (#3704) ──────────────────────────────────
45
- // Persist stuck detection state to disk so it survives session restarts.
46
- // Without this, restarting auto-mode resets all counters, allowing the
47
- // same blocked unit to burn a full retry budget each session.
48
- function stuckStatePath(basePath: string): string {
49
- return join(gsdRoot(basePath), "runtime", "stuck-state.json");
58
+ // Phase C migration: stuck-state.json deleted in favor of DB-backed
59
+ // equivalents. recentUnits is rebuilt from unit_dispatches (Phase B
60
+ // ledger) on session start; stuckRecoveryAttempts persists in runtime_kv
61
+ // under a stable project scope (soft state per the runtime_kv invariant). Single-host
62
+ // SQLite WAL only — multi-host would need a real coordinator.
63
+ //
64
+ // When no worker is registered (DB unavailable, fresh project), both
65
+ // helpers degrade to the empty-state fallback that #3704 already
66
+ // tolerates — same behavior as a fresh session.
67
+ const STUCK_RECOVERY_ATTEMPTS_KEY = "stuck_recovery_attempts";
68
+ const RECENT_UNIT_KEYS_LIMIT = 20;
69
+
70
+ function stableStuckStateScopeId(s: AutoSession): string {
71
+ return normalizeRealPath(s.scope?.workspace.projectRoot ?? (s.originalBasePath || s.basePath));
50
72
  }
51
73
 
52
- function loadStuckState(basePath: string): { recentUnits: Array<{ key: string }>; stuckRecoveryAttempts: number } {
74
+ function loadStuckState(s: AutoSession): { recentUnits: Array<{ key: string }>; stuckRecoveryAttempts: number } {
75
+ const scopeId = stableStuckStateScopeId(s);
76
+ if (!scopeId) return { recentUnits: [], stuckRecoveryAttempts: 0 };
53
77
  try {
54
- const data = JSON.parse(readFileSync(stuckStatePath(basePath), "utf-8"));
55
- // Only load state written by a DIFFERENT process (real session restart).
56
- // If the PID matches the current process, this state was written by an earlier
57
- // autoLoop call in the same process (e.g., a test that completed before this
58
- // one), not by a crashed session — skip it to prevent test state pollution.
59
- if (data.pid === process.pid) {
60
- return { recentUnits: [], stuckRecoveryAttempts: 0 };
61
- }
62
- return {
63
- recentUnits: Array.isArray(data.recentUnits) ? data.recentUnits : [],
64
- stuckRecoveryAttempts: typeof data.stuckRecoveryAttempts === "number" ? data.stuckRecoveryAttempts : 0,
65
- };
78
+ const recentUnits = getRecentUnitKeysForProjectRoot(scopeId, RECENT_UNIT_KEYS_LIMIT);
79
+ const stuckRecoveryAttempts =
80
+ getRuntimeKv<number>("global", scopeId, STUCK_RECOVERY_ATTEMPTS_KEY) ?? 0;
81
+ return { recentUnits, stuckRecoveryAttempts };
66
82
  } catch (err) {
67
83
  debugLog("autoLoop", { phase: "load-stuck-state-failed", error: err instanceof Error ? err.message : String(err) });
68
84
  return { recentUnits: [], stuckRecoveryAttempts: 0 };
69
85
  }
70
86
  }
71
87
 
72
- function saveStuckState(basePath: string, state: LoopState): void {
88
+ function saveStuckState(s: AutoSession, state: LoopState): void {
89
+ const scopeId = stableStuckStateScopeId(s);
90
+ if (!scopeId) return;
91
+ // recentUnits is automatically derived from unit_dispatches by the
92
+ // dispatch ledger writes in openDispatchClaim — no separate persistence
93
+ // needed. Only the soft retry counter needs a runtime_kv row.
73
94
  try {
74
- const filePath = stuckStatePath(basePath);
75
- mkdirSync(join(gsdRoot(basePath), "runtime"), { recursive: true });
76
- writeFileSync(filePath, JSON.stringify({
77
- pid: process.pid,
78
- recentUnits: state.recentUnits.slice(-20), // keep last 20 entries
79
- stuckRecoveryAttempts: state.stuckRecoveryAttempts,
80
- updatedAt: new Date().toISOString(),
81
- }) + "\n");
95
+ setRuntimeKv("global", scopeId, STUCK_RECOVERY_ATTEMPTS_KEY, state.stuckRecoveryAttempts);
82
96
  } catch (err) {
83
97
  debugLog("autoLoop", { phase: "save-stuck-state-failed", error: err instanceof Error ? err.message : String(err) });
84
98
  }
@@ -140,6 +154,78 @@ function saveCustomVerifyRetryCounts(s: AutoSession): void {
140
154
  }
141
155
  }
142
156
 
157
+ /**
158
+ * Phase B helper: open a unit_dispatches row in 'claimed' state and
159
+ * immediately transition it to 'running'. Returns a tri-state result so
160
+ * callers can distinguish between a degraded ledger write and an explicit
161
+ * already-active rejection from the partial unique index.
162
+ *
163
+ * Single-worker compatibility: this function is best-effort and never
164
+ * throws. The auto-loop must continue to behave identically when the
165
+ * ledger is degraded.
166
+ */
167
+ type DispatchClaimOutcome =
168
+ | { kind: "opened"; dispatchId: number }
169
+ | { kind: "skip"; reason: "already-active" | "stale-lease"; existingId?: number; existingWorker?: string }
170
+ | { kind: "degraded" };
171
+
172
+ function openDispatchClaim(
173
+ s: AutoSession,
174
+ flowId: string,
175
+ turnId: string,
176
+ iterData: IterationData,
177
+ ): DispatchClaimOutcome {
178
+ if (!s.workerId || s.milestoneLeaseToken === null) return { kind: "degraded" };
179
+ const mid = iterData.mid;
180
+ if (!mid) return { kind: "degraded" };
181
+
182
+ const recent = getRecentDispatchesForUnit(iterData.unitId, 1);
183
+ const attemptN = (recent[0]?.attempt_n ?? 0) + 1;
184
+
185
+ let claim: ReturnType<typeof recordDispatchClaim>;
186
+ try {
187
+ claim = recordDispatchClaim({
188
+ traceId: flowId,
189
+ turnId,
190
+ workerId: s.workerId,
191
+ milestoneLeaseToken: s.milestoneLeaseToken,
192
+ milestoneId: mid,
193
+ sliceId: iterData.state.activeSlice?.id ?? null,
194
+ taskId: iterData.state.activeTask?.id ?? null,
195
+ unitType: iterData.unitType,
196
+ unitId: iterData.unitId,
197
+ attemptN,
198
+ });
199
+ if (!claim.ok) {
200
+ debugLog("autoLoop", {
201
+ phase: "dispatch-claim-rejected",
202
+ unitId: iterData.unitId,
203
+ reason: claim.error,
204
+ existingId: "existingId" in claim ? claim.existingId : undefined,
205
+ existingWorker: "existingWorker" in claim ? claim.existingWorker : undefined,
206
+ });
207
+ if (claim.error === "already_active") {
208
+ return {
209
+ kind: "skip",
210
+ reason: "already-active",
211
+ existingId: claim.existingId,
212
+ existingWorker: claim.existingWorker,
213
+ };
214
+ }
215
+ return { kind: "skip", reason: "stale-lease" };
216
+ }
217
+ markDispatchRunning(claim.dispatchId);
218
+ return { kind: "opened", dispatchId: claim.dispatchId };
219
+ } catch (err) {
220
+ debugLog("autoLoop", {
221
+ phase: "dispatch-claim-failed",
222
+ error: err instanceof Error ? err.message : String(err),
223
+ });
224
+ return { kind: "degraded" };
225
+ }
226
+
227
+ }
228
+
143
229
  // ── Memory pressure monitoring (#3331) ──────────────────────────────────
144
230
  // Check heap usage every N iterations and trigger graceful shutdown before
145
231
  // the OS OOM killer sends SIGKILL. The threshold is 90% of the V8 heap
@@ -268,7 +354,7 @@ export async function autoLoop(
268
354
  let iteration = 0;
269
355
  const dispatchContract = options?.dispatchContract ?? "legacy-direct";
270
356
  // Load persisted stuck state so counters survive session restarts (#3704)
271
- const persisted = loadStuckState(s.basePath);
357
+ const persisted = loadStuckState(s);
272
358
  const loopState: LoopState = {
273
359
  recentUnits: persisted.recentUnits,
274
360
  stuckRecoveryAttempts: persisted.stuckRecoveryAttempts,
@@ -282,6 +368,23 @@ export async function autoLoop(
282
368
  iteration++;
283
369
  debugLog("autoLoop", { phase: "loop-top", iteration });
284
370
 
371
+ // Phase B: heartbeat the worker registry + active milestone lease so
372
+ // janitors and concurrent workers see a live process. Best-effort —
373
+ // DB unavailability or stale state must not stop the loop.
374
+ if (s.workerId) {
375
+ try {
376
+ heartbeatAutoWorker(s.workerId);
377
+ if (s.currentMilestoneId && s.milestoneLeaseToken) {
378
+ refreshMilestoneLease(s.workerId, s.currentMilestoneId, s.milestoneLeaseToken);
379
+ }
380
+ } catch (err) {
381
+ debugLog("autoLoop", {
382
+ phase: "heartbeat-failed",
383
+ error: err instanceof Error ? err.message : String(err),
384
+ });
385
+ }
386
+ }
387
+
285
388
  // ── Journal: per-iteration flow grouping ──
286
389
  const flowId = randomUUID();
287
390
  let seqCounter = 0;
@@ -364,6 +467,9 @@ export async function autoLoop(
364
467
  break;
365
468
  }
366
469
 
470
+ let dispatchId: number | null = null;
471
+ let dispatchSettled = false;
472
+
367
473
  try {
368
474
  // ── Blanket try/catch: one bad iteration must not kill the session
369
475
  const prefs = deps.loadEffectiveGSDPreferences()?.preferences;
@@ -429,7 +535,17 @@ export async function autoLoop(
429
535
  activeRunDir: s.activeRunDir,
430
536
  });
431
537
 
432
- const engineState = await engine.deriveState(s.basePath);
538
+ const engineState = await engine.deriveState(s.canonicalProjectRoot);
539
+ debugLog("autoLoop", {
540
+ phase: "post-derive",
541
+ site: "custom-engine-derive",
542
+ basePath: s.basePath,
543
+ originalBasePath: s.originalBasePath,
544
+ scopeProjectRoot: s.scope?.workspace.projectRoot,
545
+ canonicalProjectRoot: s.canonicalProjectRoot,
546
+ derivedPhase: (engineState as { phase?: string }).phase,
547
+ isComplete: engineState.isComplete,
548
+ });
433
549
  if (engineState.isComplete) {
434
550
  await deps.stopAuto(ctx, pi, "Workflow complete");
435
551
  break;
@@ -448,7 +564,15 @@ export async function autoLoop(
448
564
 
449
565
  // dispatch.action === "dispatch"
450
566
  const step = dispatch.step!;
451
- const gsdState = await deps.deriveState(s.basePath);
567
+ const gsdState = await deps.deriveState(s.canonicalProjectRoot);
568
+ debugLog("autoLoop", {
569
+ phase: "post-derive",
570
+ site: "custom-engine-gsd-state",
571
+ basePath: s.basePath,
572
+ canonicalProjectRoot: s.canonicalProjectRoot,
573
+ derivedPhase: gsdState.phase,
574
+ activeUnit: gsdState.activeTask?.id ?? gsdState.activeSlice?.id ?? gsdState.activeMilestone?.id,
575
+ });
452
576
 
453
577
  iterData = {
454
578
  unitType: step.unitType,
@@ -571,7 +695,7 @@ export async function autoLoop(
571
695
  consecutiveCooldowns = 0;
572
696
  recentErrorMessages.length = 0;
573
697
  deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } });
574
- saveStuckState(s.basePath, loopState); // persist across session restarts (#3704)
698
+ saveStuckState(s, loopState); // persist across session restarts (#3704)
575
699
  debugLog("autoLoop", { phase: "iteration-complete", iteration });
576
700
 
577
701
  if (reconcileResult.outcome === "milestone-complete") {
@@ -649,7 +773,15 @@ export async function autoLoop(
649
773
  observedUnitId = iterData.unitId;
650
774
  } else {
651
775
  // ── Sidecar path: use values from the sidecar item directly ──
652
- const sidecarState = await deps.deriveState(s.basePath);
776
+ const sidecarState = await deps.deriveState(s.canonicalProjectRoot);
777
+ debugLog("autoLoop", {
778
+ phase: "post-derive",
779
+ site: "sidecar",
780
+ basePath: s.basePath,
781
+ canonicalProjectRoot: s.canonicalProjectRoot,
782
+ derivedPhase: sidecarState.phase,
783
+ activeUnit: sidecarState.activeTask?.id ?? sidecarState.activeSlice?.id ?? sidecarState.activeMilestone?.id,
784
+ });
653
785
  iterData = {
654
786
  unitType: sidecarItem.unitType,
655
787
  unitId: sidecarItem.unitId,
@@ -671,13 +803,45 @@ export async function autoLoop(
671
803
  }
672
804
 
673
805
  await enforceMinRequestInterval(s, prefs);
674
- const unitPhaseResult = await runUnitPhaseViaContract(
675
- dispatchContract,
676
- ic,
677
- iterData,
678
- loopState,
679
- sidecarItem,
680
- );
806
+
807
+ // Phase B: claim a unit_dispatches row before invoking the unit. The
808
+ // partial unique index idx_unit_dispatches_active_per_unit prevents
809
+ // a second worker from claiming the same unit concurrently. Returns
810
+ // null when DB unavailable, no worker registered, or no active lease
811
+ // — those degraded paths fall through to the existing single-worker
812
+ // semantics with no ledger entry, preserving back-compat.
813
+ const dispatchClaim = openDispatchClaim(s, flowId, turnId, iterData);
814
+ if (dispatchClaim.kind === "skip") {
815
+ finishTurn("skipped", "execution", dispatchClaim.reason);
816
+ continue;
817
+ }
818
+ dispatchId = dispatchClaim.kind === "opened" ? dispatchClaim.dispatchId : null;
819
+
820
+ let unitPhaseResult: Awaited<ReturnType<typeof runUnitPhaseViaContract>>;
821
+ try {
822
+ unitPhaseResult = await runUnitPhaseViaContract(
823
+ dispatchContract,
824
+ ic,
825
+ iterData,
826
+ loopState,
827
+ sidecarItem,
828
+ );
829
+ } catch (err) {
830
+ if (err instanceof ModelPolicyDispatchBlockedError) {
831
+ throw err;
832
+ }
833
+ if (dispatchId !== null) {
834
+ try {
835
+ markDispatchFailed(dispatchId, {
836
+ errorSummary: `exception:${err instanceof Error ? err.message : String(err)}`,
837
+ });
838
+ dispatchSettled = true;
839
+ } catch (ledgerErr) {
840
+ debugLog("autoLoop", { phase: "dispatch-ledger-write-failed", error: ledgerErr instanceof Error ? ledgerErr.message : String(ledgerErr) });
841
+ }
842
+ }
843
+ throw err;
844
+ }
681
845
  if (unitPhaseResult.action === "next") {
682
846
  const requestTimestamp = unitPhaseResult.data.requestDispatchedAt ?? unitPhaseResult.data.unitStartedAt;
683
847
  if (typeof requestTimestamp === "number") s.lastRequestTimestamp = requestTimestamp;
@@ -687,13 +851,36 @@ export async function autoLoop(
687
851
  unitId: iterData.unitId,
688
852
  });
689
853
  if (unitPhaseResult.action === "break") {
854
+ if (dispatchId !== null) {
855
+ try {
856
+ markDispatchFailed(dispatchId, { errorSummary: "unit-break" });
857
+ dispatchSettled = true;
858
+ } catch (err) {
859
+ debugLog("autoLoop", { phase: "dispatch-ledger-write-failed", error: err instanceof Error ? err.message : String(err) });
860
+ }
861
+ }
690
862
  finishTurn("stopped", "execution", "unit-break");
691
863
  break;
692
864
  }
693
865
 
694
866
  // ── Phase 5: Finalize ───────────────────────────────────────────────
695
867
 
696
- const finalizeResult = await runFinalize(ic, iterData, loopState, sidecarItem);
868
+ let finalizeResult: Awaited<ReturnType<typeof runFinalize>>;
869
+ try {
870
+ finalizeResult = await runFinalize(ic, iterData, loopState, sidecarItem);
871
+ } catch (err) {
872
+ if (dispatchId !== null) {
873
+ try {
874
+ markDispatchFailed(dispatchId, {
875
+ errorSummary: `exception:${err instanceof Error ? err.message : String(err)}`,
876
+ });
877
+ dispatchSettled = true;
878
+ } catch (ledgerErr) {
879
+ debugLog("autoLoop", { phase: "dispatch-ledger-write-failed", error: ledgerErr instanceof Error ? ledgerErr.message : String(ledgerErr) });
880
+ }
881
+ }
882
+ throw err;
883
+ }
697
884
  deps.uokObserver?.onPhaseResult("finalize", finalizeResult.action, {
698
885
  unitType: iterData.unitType,
699
886
  unitId: iterData.unitId,
@@ -702,24 +889,59 @@ export async function autoLoop(
702
889
  const finalizeFailureClass = finalizeResult.reason === "git-closeout-failure"
703
890
  ? "git"
704
891
  : "closeout";
892
+ if (dispatchId !== null) {
893
+ try {
894
+ markDispatchFailed(dispatchId, { errorSummary: `finalize-break:${finalizeResult.reason ?? "unknown"}` });
895
+ dispatchSettled = true;
896
+ } catch (err) {
897
+ debugLog("autoLoop", { phase: "dispatch-ledger-write-failed", error: err instanceof Error ? err.message : String(err) });
898
+ }
899
+ }
705
900
  finishTurn("stopped", finalizeFailureClass, "finalize-break");
706
901
  break;
707
902
  }
708
903
  if (finalizeResult.action === "continue") {
904
+ if (dispatchId !== null) {
905
+ try {
906
+ markDispatchFailed(dispatchId, { errorSummary: "finalize-retry" });
907
+ dispatchSettled = true;
908
+ } catch (err) {
909
+ debugLog("autoLoop", { phase: "dispatch-ledger-write-failed", error: err instanceof Error ? err.message : String(err) });
910
+ }
911
+ }
709
912
  finishTurn("retry");
710
913
  continue;
711
914
  }
712
915
 
916
+ if (dispatchId !== null) {
917
+ try {
918
+ markDispatchCompleted(dispatchId);
919
+ dispatchSettled = true;
920
+ } catch (err) {
921
+ debugLog("autoLoop", { phase: "dispatch-ledger-write-failed", error: err instanceof Error ? err.message : String(err) });
922
+ }
923
+ }
713
924
  consecutiveErrors = 0; // Iteration completed successfully
714
925
  consecutiveCooldowns = 0;
715
926
  recentErrorMessages.length = 0;
716
927
  deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } });
717
- saveStuckState(s.basePath, loopState); // persist across session restarts (#4382)
928
+ saveStuckState(s, loopState); // persist across session restarts (#4382)
718
929
  debugLog("autoLoop", { phase: "iteration-complete", iteration });
719
930
  finishTurn("completed");
720
931
  } catch (loopErr) {
721
932
  // ── Blanket catch: absorb unexpected exceptions, apply graduated recovery ──
722
933
  const msg = loopErr instanceof Error ? loopErr.message : String(loopErr);
934
+ if (dispatchId !== null && !dispatchSettled && !(loopErr instanceof ModelPolicyDispatchBlockedError)) {
935
+ try {
936
+ markDispatchFailed(dispatchId, { errorSummary: `unhandled-error:${msg.slice(0, 200)}` });
937
+ dispatchSettled = true;
938
+ } catch (err) {
939
+ debugLog("autoLoop", {
940
+ phase: "dispatch-ledger-write-failed",
941
+ error: err instanceof Error ? err.message : String(err),
942
+ });
943
+ }
944
+ }
723
945
 
724
946
  // Always emit iteration-end on error so the journal records iteration
725
947
  // completion even on failure (#2344). Without this, errors in
@@ -418,8 +418,10 @@ export async function runPreDispatch(
418
418
  );
419
419
  }
420
420
 
421
- // Derive state
422
- let state = await deps.deriveState(s.basePath);
421
+ // Derive state — use canonical project root so the cache key is stable
422
+ // across worktree↔project-root path-form alternation. See PR #5236
423
+ // (workspace handle infrastructure) and the Phase A pt 2 plan.
424
+ let state = await deps.deriveState(s.canonicalProjectRoot);
423
425
  const { getDeepStageGate } = await import("../auto-dispatch.js");
424
426
  const deepStageGate = getDeepStageGate(prefs, s.basePath);
425
427
  const canRunDeepSetupGate =
@@ -457,7 +459,7 @@ export async function runPreDispatch(
457
459
  let compiled = ensurePlanV2Graph(s.basePath, state);
458
460
  if (isEmptyPlanV2GraphResult(compiled)) {
459
461
  deps.invalidateAllCaches();
460
- state = await deps.deriveState(s.basePath);
462
+ state = await deps.deriveState(s.canonicalProjectRoot);
461
463
  compiled = shouldRunPlanV2Gate(state.phase)
462
464
  ? ensurePlanV2Graph(s.basePath, state)
463
465
  : {
@@ -657,7 +659,7 @@ export async function runPreDispatch(
657
659
 
658
660
  deps.invalidateAllCaches();
659
661
 
660
- state = await deps.deriveState(s.basePath);
662
+ state = await deps.deriveState(s.canonicalProjectRoot);
661
663
  mid = state.activeMilestone?.id;
662
664
  midTitle = state.activeMilestone?.title;
663
665
 
@@ -837,7 +839,7 @@ export async function runPreDispatch(
837
839
  }
838
840
  if (mergeReconcileResult === "reconciled") {
839
841
  deps.invalidateAllCaches();
840
- state = await deps.deriveState(s.basePath);
842
+ state = await deps.deriveState(s.canonicalProjectRoot);
841
843
  mid = state.activeMilestone?.id;
842
844
  midTitle = state.activeMilestone?.title;
843
845
  }
@@ -22,6 +22,7 @@ import type { GitServiceImpl } from "../git-service.js";
22
22
  import type { CaptureEntry } from "../captures.js";
23
23
  import type { BudgetAlertLevel } from "../auto-budget.js";
24
24
  import { resolveWorktreeProjectRoot } from "../worktree-root.js";
25
+ import { normalizeRealPath } from "../paths.js";
25
26
  import type { MilestoneScope } from "../workspace.js";
26
27
 
27
28
  // ─── Exported Types ──────────────────────────────────────────────────────────
@@ -97,6 +98,21 @@ export class AutoSession {
97
98
  originalBasePath = "";
98
99
  // TODO(C8): remove basePath/originalBasePath once all readers use s.scope
99
100
  scope: MilestoneScope | null = null;
101
+
102
+ // ── Coordination identity (Phase B — DB-backed coordination) ────────────
103
+ /**
104
+ * Worker registry ID set by registerAutoWorker() at session start. Used by
105
+ * heartbeatAutoWorker() each loop iteration and by recordDispatchClaim()
106
+ * to fence dispatch ledger writes against stale workers.
107
+ */
108
+ workerId: string | null = null;
109
+ /**
110
+ * Active milestone lease fencing token, set by claimMilestoneLease() inside
111
+ * worktree-resolver.enterMilestone(). Threaded into recordDispatchClaim()
112
+ * as milestone_lease_token so out-of-band dispatches by a stale worker
113
+ * are detectable.
114
+ */
115
+ milestoneLeaseToken: number | null = null;
100
116
  previousProjectRootEnv: string | null = null;
101
117
  hadProjectRootEnv = false;
102
118
  projectRootEnvCaptured = false;
@@ -235,6 +251,24 @@ export class AutoSession {
235
251
  return resolveWorktreeProjectRoot(this.basePath, this.originalBasePath);
236
252
  }
237
253
 
254
+ /**
255
+ * Canonical project root for state-derivation reads AND writer paths.
256
+ *
257
+ * Prefers the realpath-normalized projectRoot from the MilestoneScope
258
+ * (introduced by PR #5236), falling back to resolveWorktreeProjectRoot
259
+ * during early lifecycle / engine-bypass paths where scope may be null.
260
+ *
261
+ * Always realpath-normalized so cache keys (e.g. deriveState's _stateCache)
262
+ * cannot drift across worktree↔project-root path-string variants for the
263
+ * same filesystem location.
264
+ */
265
+ get canonicalProjectRoot(): string {
266
+ const root =
267
+ this.scope?.workspace.projectRoot
268
+ ?? resolveWorktreeProjectRoot(this.basePath, this.originalBasePath);
269
+ return normalizeRealPath(root);
270
+ }
271
+
238
272
  reset(): void {
239
273
  this.clearTimers();
240
274
 
@@ -251,6 +285,8 @@ export class AutoSession {
251
285
  this.basePath = "";
252
286
  this.originalBasePath = "";
253
287
  this.scope = null;
288
+ this.workerId = null;
289
+ this.milestoneLeaseToken = null;
254
290
  this.previousProjectRootEnv = null;
255
291
  this.hadProjectRootEnv = false;
256
292
  this.projectRootEnvCaptured = false;
@@ -14,7 +14,7 @@ import type { GSDPreferences } from "./preferences.js";
14
14
  import type { UatType } from "./files.js";
15
15
  import type { MinimalModelRegistry } from "./context-budget.js";
16
16
  import { loadFile, extractUatType, loadActiveOverrides } from "./files.js";
17
- import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone } from "./gsd-db.js";
17
+ import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone, insertAssessment, transaction } from "./gsd-db.js";
18
18
  import { isClosedStatus } from "./status-guards.js";
19
19
  import { extractVerdict, isAcceptableUatVerdict } from "./verdict-parser.js";
20
20
 
@@ -78,6 +78,8 @@ import {
78
78
  type DeepProjectSetupStage,
79
79
  } from "./deep-project-setup-policy.js";
80
80
  import { annotateBackgroundable } from "./delegation-policy.js";
81
+ import { invalidateAllCaches } from "./cache.js";
82
+ import { insertMilestoneValidationGates } from "./milestone-validation-gates.js";
81
83
 
82
84
  // ─── Types ────────────────────────────────────────────────────────────────
83
85
 
@@ -1203,9 +1205,12 @@ export const DISPATCH_RULES: DispatchRule[] = [
1203
1205
  const skipSource = trivialVariant
1204
1206
  ? "trivial-scope pipeline variant (#4781)"
1205
1207
  : "`skip_milestone_validation` preference";
1208
+ const skipValidationReason = trivialVariant ? "trivial-scope" : "preference";
1206
1209
  const content = [
1207
1210
  "---",
1208
1211
  "verdict: pass",
1212
+ "skip_validation: true",
1213
+ `skip_validation_reason: ${skipValidationReason}`,
1209
1214
  "remediation_round: 0",
1210
1215
  "---",
1211
1216
  "",
@@ -1214,6 +1219,45 @@ export const DISPATCH_RULES: DispatchRule[] = [
1214
1219
  `Milestone validation was skipped via ${skipSource}.`,
1215
1220
  ].join("\n");
1216
1221
  writeFileSync(validationPath, content, "utf-8");
1222
+ try {
1223
+ // DB-backed state derivation keys off assessments, not only the file
1224
+ // projection. Persist the skipped validation there too so the next
1225
+ // loop iteration advances to completing-milestone instead of
1226
+ // re-entering validating-milestone.
1227
+ if (isDbAvailable()) {
1228
+ transaction(() => {
1229
+ insertAssessment({
1230
+ path: validationPath,
1231
+ milestoneId: mid,
1232
+ sliceId: null,
1233
+ taskId: null,
1234
+ status: "pass",
1235
+ scope: "milestone-validation",
1236
+ fullContent: content,
1237
+ });
1238
+ const gateSliceId = getMilestoneSlices(mid)[0]?.id;
1239
+ if (gateSliceId) {
1240
+ insertMilestoneValidationGates(
1241
+ mid,
1242
+ gateSliceId,
1243
+ "pass",
1244
+ new Date().toISOString(),
1245
+ );
1246
+ }
1247
+ });
1248
+ }
1249
+ } catch (err) {
1250
+ try {
1251
+ unlinkSync(validationPath);
1252
+ } catch (unlinkErr) {
1253
+ logWarning(
1254
+ "dispatch",
1255
+ `failed to remove skipped validation file after DB write failure for ${mid}: ${unlinkErr instanceof Error ? unlinkErr.message : String(unlinkErr)}`,
1256
+ );
1257
+ }
1258
+ throw err;
1259
+ }
1260
+ invalidateAllCaches();
1217
1261
  }
1218
1262
  return { action: "skip" };
1219
1263
  }
@@ -1298,7 +1342,9 @@ export const DISPATCH_RULES: DispatchRule[] = [
1298
1342
  if (validationContent) {
1299
1343
  // Allow completion when validation was intentionally skipped by
1300
1344
  // preference/budget profile (#3399, #3344).
1345
+ const skippedByMarker = /^skip_validation:\s*true$/im.test(validationContent);
1301
1346
  const skippedByPreference = /skip(?:ped)?[\s\-]+(?:by|per|due to)\s+(?:preference|budget|profile)/i.test(validationContent);
1347
+ const skippedByTrivialVariant = /trivial-scope pipeline variant/i.test(validationContent);
1302
1348
 
1303
1349
  // Accept either the structured template format (table with MET/N/A/SATISFIED)
1304
1350
  // or prose evidence patterns the validation agent may emit.
@@ -1307,7 +1353,12 @@ export const DISPATCH_RULES: DispatchRule[] = [
1307
1353
  (validationContent.includes("MET") || validationContent.includes("N/A") || validationContent.includes("SATISFIED"));
1308
1354
  const proseMatch =
1309
1355
  /[Oo]perational[\s\S]{0,500}?(?:✅|pass|verified|confirmed|met|complete|true|yes|addressed|covered|satisfied|partially|n\/a|not[\s-]+applicable)/i.test(validationContent);
1310
- const hasOperationalCheck = skippedByPreference || structuredMatch || proseMatch;
1356
+ const hasOperationalCheck =
1357
+ skippedByMarker ||
1358
+ skippedByPreference ||
1359
+ skippedByTrivialVariant ||
1360
+ structuredMatch ||
1361
+ proseMatch;
1311
1362
  if (!hasOperationalCheck) {
1312
1363
  return {
1313
1364
  action: "stop" as const,