gsd-pi 2.78.1-dev.d8826a445 → 2.78.1-dev.eccf86e27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +5 -7
  2. package/dist/help-text.js +1 -1
  3. package/dist/resource-loader.js +6 -1
  4. package/dist/resources/.managed-resources-content-hash +1 -1
  5. package/dist/resources/extensions/gsd/auto/detect-stuck.js +41 -5
  6. package/dist/resources/extensions/gsd/auto/loop.js +235 -36
  7. package/dist/resources/extensions/gsd/auto/phases.js +7 -5
  8. package/dist/resources/extensions/gsd/auto/session.js +33 -0
  9. package/dist/resources/extensions/gsd/auto-dispatch.js +46 -2
  10. package/dist/resources/extensions/gsd/auto-post-unit.js +19 -11
  11. package/dist/resources/extensions/gsd/auto-worktree.js +26 -187
  12. package/dist/resources/extensions/gsd/auto.js +79 -50
  13. package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +9 -4
  14. package/dist/resources/extensions/gsd/crash-recovery.js +160 -47
  15. package/dist/resources/extensions/gsd/db/auto-workers.js +227 -0
  16. package/dist/resources/extensions/gsd/db/command-queue.js +105 -0
  17. package/dist/resources/extensions/gsd/db/milestone-leases.js +210 -0
  18. package/dist/resources/extensions/gsd/db/runtime-kv.js +91 -0
  19. package/dist/resources/extensions/gsd/db/unit-dispatches.js +322 -0
  20. package/dist/resources/extensions/gsd/docs/COORDINATION.md +42 -0
  21. package/dist/resources/extensions/gsd/doctor-proactive.js +4 -0
  22. package/dist/resources/extensions/gsd/doctor-runtime-checks.js +22 -6
  23. package/dist/resources/extensions/gsd/doctor.js +12 -2
  24. package/dist/resources/extensions/gsd/gsd-db.js +161 -3
  25. package/dist/resources/extensions/gsd/guided-flow.js +6 -2
  26. package/dist/resources/extensions/gsd/interrupted-session.js +18 -15
  27. package/dist/resources/extensions/gsd/state.js +21 -6
  28. package/dist/resources/extensions/gsd/worktree-resolver.js +64 -0
  29. package/dist/tsconfig.extensions.tsbuildinfo +1 -1
  30. package/dist/web/standalone/.next/BUILD_ID +1 -1
  31. package/dist/web/standalone/.next/app-path-routes-manifest.json +12 -12
  32. package/dist/web/standalone/.next/build-manifest.json +2 -2
  33. package/dist/web/standalone/.next/prerender-manifest.json +3 -3
  34. package/dist/web/standalone/.next/server/app/_global-error.html +1 -1
  35. package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
  36. package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  37. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  38. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  39. package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  40. package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  41. package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  42. package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
  43. package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
  44. package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  45. package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  46. package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  47. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  48. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  49. package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  50. package/dist/web/standalone/.next/server/app/index.html +1 -1
  51. package/dist/web/standalone/.next/server/app/index.rsc +1 -1
  52. package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  53. package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
  54. package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  55. package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
  56. package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  57. package/dist/web/standalone/.next/server/app-paths-manifest.json +12 -12
  58. package/dist/web/standalone/.next/server/middleware-build-manifest.js +1 -1
  59. package/dist/web/standalone/.next/server/pages/404.html +1 -1
  60. package/dist/web/standalone/.next/server/pages/500.html +1 -1
  61. package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
  62. package/package.json +1 -1
  63. package/src/resources/extensions/gsd/auto/detect-stuck.ts +37 -5
  64. package/src/resources/extensions/gsd/auto/loop.ts +263 -41
  65. package/src/resources/extensions/gsd/auto/phases.ts +7 -5
  66. package/src/resources/extensions/gsd/auto/session.ts +36 -0
  67. package/src/resources/extensions/gsd/auto-dispatch.ts +53 -2
  68. package/src/resources/extensions/gsd/auto-post-unit.ts +19 -11
  69. package/src/resources/extensions/gsd/auto-worktree.ts +26 -211
  70. package/src/resources/extensions/gsd/auto.ts +89 -44
  71. package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +9 -4
  72. package/src/resources/extensions/gsd/crash-recovery.ts +177 -43
  73. package/src/resources/extensions/gsd/db/auto-workers.ts +273 -0
  74. package/src/resources/extensions/gsd/db/command-queue.ts +149 -0
  75. package/src/resources/extensions/gsd/db/milestone-leases.ts +274 -0
  76. package/src/resources/extensions/gsd/db/runtime-kv.ts +127 -0
  77. package/src/resources/extensions/gsd/db/unit-dispatches.ts +446 -0
  78. package/src/resources/extensions/gsd/docs/COORDINATION.md +42 -0
  79. package/src/resources/extensions/gsd/doctor-proactive.ts +4 -0
  80. package/src/resources/extensions/gsd/doctor-runtime-checks.ts +24 -6
  81. package/src/resources/extensions/gsd/doctor.ts +10 -2
  82. package/src/resources/extensions/gsd/gsd-db.ts +170 -3
  83. package/src/resources/extensions/gsd/guided-flow.ts +6 -2
  84. package/src/resources/extensions/gsd/interrupted-session.ts +19 -12
  85. package/src/resources/extensions/gsd/state.ts +44 -6
  86. package/src/resources/extensions/gsd/tests/auto-loop-no-copy-artifacts.test.ts +72 -0
  87. package/src/resources/extensions/gsd/tests/auto-loop-symlink-worktree.test.ts +190 -0
  88. package/src/resources/extensions/gsd/tests/auto-workers.test.ts +105 -0
  89. package/src/resources/extensions/gsd/tests/command-queue.test.ts +141 -0
  90. package/src/resources/extensions/gsd/tests/crash-recovery-via-db.test.ts +203 -0
  91. package/src/resources/extensions/gsd/tests/crash-recovery.test.ts +169 -59
  92. package/src/resources/extensions/gsd/tests/detect-stuck-respects-retry.test.ts +173 -0
  93. package/src/resources/extensions/gsd/tests/integration/auto-worktree.test.ts +22 -12
  94. package/src/resources/extensions/gsd/tests/integration/doctor-proactive.test.ts +24 -10
  95. package/src/resources/extensions/gsd/tests/integration/doctor-runtime.test.ts +35 -23
  96. package/src/resources/extensions/gsd/tests/integration/workspace-collapse-integration.test.ts +3 -5
  97. package/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts +72 -25
  98. package/src/resources/extensions/gsd/tests/interrupted-session-ui.test.ts +72 -25
  99. package/src/resources/extensions/gsd/tests/memory-pressure-stuck-state.test.ts +9 -6
  100. package/src/resources/extensions/gsd/tests/milestone-leases.test.ts +152 -0
  101. package/src/resources/extensions/gsd/tests/parallel-milestone-isolation.test.ts +106 -0
  102. package/src/resources/extensions/gsd/tests/paused-session-via-db.test.ts +119 -0
  103. package/src/resources/extensions/gsd/tests/pipeline-variant-dispatch.test.ts +58 -0
  104. package/src/resources/extensions/gsd/tests/preferences-worktree-sync.test.ts +3 -17
  105. package/src/resources/extensions/gsd/tests/register-hooks-depth-verification.test.ts +110 -0
  106. package/src/resources/extensions/gsd/tests/runtime-kv.test.ts +120 -0
  107. package/src/resources/extensions/gsd/tests/skipped-validation-completion.test.ts +133 -28
  108. package/src/resources/extensions/gsd/tests/skipped-validation-db-atomicity.test.ts +17 -0
  109. package/src/resources/extensions/gsd/tests/stuck-state-via-db.test.ts +134 -0
  110. package/src/resources/extensions/gsd/tests/sync-layer-scope.test.ts +7 -26
  111. package/src/resources/extensions/gsd/tests/teardown-cleanup-parity.test.ts +4 -8
  112. package/src/resources/extensions/gsd/tests/unit-dispatches.test.ts +247 -0
  113. package/src/resources/extensions/gsd/tests/validate-milestone.test.ts +41 -1
  114. package/src/resources/extensions/gsd/tests/workspace.test.ts +15 -9
  115. package/src/resources/extensions/gsd/tests/write-gate.test.ts +31 -23
  116. package/src/resources/extensions/gsd/worktree-resolver.ts +62 -0
  117. package/src/resources/extensions/gsd/tests/auto-lock-creation.test.ts +0 -213
  118. package/src/resources/extensions/gsd/tests/auto-stale-lock-self-kill.test.ts +0 -87
  119. package/src/resources/extensions/gsd/tests/stop-auto-remote.test.ts +0 -159
  120. /package/dist/web/standalone/.next/static/{AT5qi39nKXkdmQIOIoh0f → Y5UeGFkXTYM9WIQOWHkot}/_buildManifest.js +0 -0
  121. /package/dist/web/standalone/.next/static/{AT5qi39nKXkdmQIOIoh0f → Y5UeGFkXTYM9WIQOWHkot}/_ssgManifest.js +0 -0
@@ -0,0 +1,446 @@
1
+ // gsd-2 + Unit dispatch ledger (DB-backed coordination, Phase B)
2
+ //
3
+ // Records every auto-mode unit dispatch (plan-slice, run-task, summarize, …)
4
+ // with worker_id, fencing token, status lifecycle, and retry metadata. The
5
+ // ledger is the substrate Phase C will consume to migrate stuck-state.json
6
+ // and paused-session.json out of the runtime/ directory.
7
+ //
8
+ // Codex review MEDIUM B2: partial unique index
9
+ // idx_unit_dispatches_active_per_unit ON unit_dispatches(unit_id)
10
+ // WHERE status IN ('claimed','running')
11
+ // enforces that two workers cannot simultaneously claim the same unit.
12
+ // recordDispatchClaim relies on the index to fail fast at INSERT time
13
+ // rather than racing in application code.
14
+
15
+ import { randomUUID } from "node:crypto";
16
+
17
+ import {
18
+ _getAdapter,
19
+ isDbAvailable,
20
+ transaction,
21
+ insertAuditEvent,
22
+ } from "../gsd-db.js";
23
+
24
+ export type DispatchStatus =
25
+ | "pending"
26
+ | "claimed"
27
+ | "running"
28
+ | "completed"
29
+ | "failed"
30
+ | "stuck"
31
+ | "canceled"
32
+ | "paused";
33
+
34
+ export interface UnitDispatchRow {
35
+ id: number;
36
+ trace_id: string;
37
+ turn_id: string | null;
38
+ worker_id: string;
39
+ milestone_lease_token: number;
40
+ milestone_id: string;
41
+ slice_id: string | null;
42
+ task_id: string | null;
43
+ unit_type: string;
44
+ unit_id: string;
45
+ status: DispatchStatus;
46
+ attempt_n: number;
47
+ started_at: string;
48
+ ended_at: string | null;
49
+ exit_reason: string | null;
50
+ error_summary: string | null;
51
+ verification_evidence_id: number | null;
52
+ next_run_at: string | null;
53
+ retry_after_ms: number | null;
54
+ max_attempts: number;
55
+ last_error_code: string | null;
56
+ last_error_at: string | null;
57
+ }
58
+
59
+ export interface RecordClaimInput {
60
+ traceId: string;
61
+ turnId?: string | null;
62
+ workerId: string;
63
+ milestoneLeaseToken: number;
64
+ milestoneId: string;
65
+ sliceId?: string | null;
66
+ taskId?: string | null;
67
+ unitType: string;
68
+ unitId: string;
69
+ /**
70
+ * Attempt number for this unit. Callers should compute this from the
71
+ * most recent prior dispatch for the same unit_id (use
72
+ * getRecentForUnit() then add 1). Defaults to 1 for fresh claims.
73
+ */
74
+ attemptN?: number;
75
+ /** Per-attempt cap; defaults to 3. */
76
+ maxAttempts?: number;
77
+ }
78
+
79
+ export type RecordClaimResult =
80
+ | { ok: true; dispatchId: number }
81
+ | { ok: false; error: "already_active"; existingId: number; existingStatus: DispatchStatus; existingWorker: string }
82
+ | { ok: false; error: "stale_lease"; milestoneId: string; workerId: string; milestoneLeaseToken: number };
83
+
84
+ function isAlreadyActiveConstraintError(err: unknown): boolean {
85
+ const code =
86
+ err && typeof err === "object" && "code" in err
87
+ ? String((err as { code?: unknown }).code ?? "")
88
+ : "";
89
+ const msg = err instanceof Error ? err.message : String(err);
90
+ if (/\bFOREIGN KEY\b/i.test(msg)) {
91
+ return false;
92
+ }
93
+
94
+ if (code === "SQLITE_CONSTRAINT" || code === "SQLITE_CONSTRAINT_UNIQUE") {
95
+ return true;
96
+ }
97
+
98
+ return /\bUNIQUE\b|\bconstraint failed\b/i.test(msg);
99
+ }
100
+
101
+ /**
102
+ * Insert a new dispatch row in `claimed` state. Atomic guard against
103
+ * double-claim (B2): the partial unique index
104
+ * idx_unit_dispatches_active_per_unit refuses the INSERT if any row for
105
+ * the same unit_id already has status IN ('claimed','running').
106
+ */
107
+ export function recordDispatchClaim(input: RecordClaimInput): RecordClaimResult {
108
+ if (!isDbAvailable()) {
109
+ throw new Error("recordDispatchClaim: DB unavailable");
110
+ }
111
+ const now = new Date().toISOString();
112
+
113
+ return transaction((): RecordClaimResult => {
114
+ const db = _getAdapter()!;
115
+
116
+ const lease = db.prepare(
117
+ `SELECT fencing_token
118
+ FROM milestone_leases
119
+ WHERE milestone_id = :milestone_id
120
+ AND worker_id = :worker_id
121
+ AND fencing_token = :token
122
+ AND status = 'held'`,
123
+ ).get({
124
+ ":milestone_id": input.milestoneId,
125
+ ":worker_id": input.workerId,
126
+ ":token": input.milestoneLeaseToken,
127
+ }) as { fencing_token: number } | undefined;
128
+ if (!lease) {
129
+ return {
130
+ ok: false,
131
+ error: "stale_lease",
132
+ milestoneId: input.milestoneId,
133
+ workerId: input.workerId,
134
+ milestoneLeaseToken: input.milestoneLeaseToken,
135
+ };
136
+ }
137
+
138
+ try {
139
+ const result = db.prepare(
140
+ `INSERT INTO unit_dispatches (
141
+ trace_id, turn_id, worker_id, milestone_lease_token,
142
+ milestone_id, slice_id, task_id,
143
+ unit_type, unit_id, status, attempt_n,
144
+ started_at, max_attempts
145
+ ) VALUES (
146
+ :trace_id, :turn_id, :worker_id, :milestone_lease_token,
147
+ :milestone_id, :slice_id, :task_id,
148
+ :unit_type, :unit_id, 'claimed', :attempt_n,
149
+ :started_at, :max_attempts
150
+ )`,
151
+ ).run({
152
+ ":trace_id": input.traceId,
153
+ ":turn_id": input.turnId ?? null,
154
+ ":worker_id": input.workerId,
155
+ ":milestone_lease_token": input.milestoneLeaseToken,
156
+ ":milestone_id": input.milestoneId,
157
+ ":slice_id": input.sliceId ?? null,
158
+ ":task_id": input.taskId ?? null,
159
+ ":unit_type": input.unitType,
160
+ ":unit_id": input.unitId,
161
+ ":attempt_n": input.attemptN ?? 1,
162
+ ":started_at": now,
163
+ ":max_attempts": input.maxAttempts ?? 3,
164
+ });
165
+ const id = Number((result as { lastInsertRowid?: number | bigint }).lastInsertRowid ?? 0);
166
+
167
+ insertAuditEvent({
168
+ eventId: randomUUID(),
169
+ traceId: input.traceId,
170
+ turnId: input.turnId ?? undefined,
171
+ category: "orchestration",
172
+ type: "dispatch-claimed",
173
+ ts: now,
174
+ payload: {
175
+ dispatchId: id,
176
+ unitId: input.unitId,
177
+ unitType: input.unitType,
178
+ workerId: input.workerId,
179
+ attemptN: input.attemptN ?? 1,
180
+ },
181
+ });
182
+
183
+ return { ok: true, dispatchId: id };
184
+ } catch (err) {
185
+ if (!isAlreadyActiveConstraintError(err)) throw err;
186
+
187
+ // Partial unique index rejected the INSERT — surface the existing
188
+ // active dispatch so callers can decide what to do.
189
+ const existing = db.prepare(
190
+ `SELECT id, status, worker_id FROM unit_dispatches
191
+ WHERE unit_id = :unit_id AND status IN ('claimed','running')
192
+ ORDER BY id DESC LIMIT 1`,
193
+ ).get({ ":unit_id": input.unitId }) as { id: number; status: DispatchStatus; worker_id: string } | undefined;
194
+
195
+ return {
196
+ ok: false,
197
+ error: "already_active",
198
+ existingId: existing?.id ?? 0,
199
+ existingStatus: existing?.status ?? "claimed",
200
+ existingWorker: existing?.worker_id ?? "unknown",
201
+ };
202
+ }
203
+ });
204
+ }
205
+
206
+ /** Transition a `claimed` dispatch into `running`. */
207
+ export function markRunning(dispatchId: number): void {
208
+ if (!isDbAvailable()) return;
209
+ const db = _getAdapter()!;
210
+ db.prepare(
211
+ `UPDATE unit_dispatches SET status = 'running'
212
+ WHERE id = :id AND status = 'claimed'`,
213
+ ).run({ ":id": dispatchId });
214
+ }
215
+
216
+ export interface CompleteOpts {
217
+ verificationEvidenceId?: number | null;
218
+ exitReason?: string;
219
+ }
220
+
221
+ /** Transition a dispatch into `completed`. */
222
+ export function markCompleted(dispatchId: number, opts?: CompleteOpts): void {
223
+ if (!isDbAvailable()) return;
224
+ const now = new Date().toISOString();
225
+ const db = _getAdapter()!;
226
+ let changes = 0;
227
+ transaction(() => {
228
+ const result = db.prepare(
229
+ `UPDATE unit_dispatches
230
+ SET status = 'completed', ended_at = :ended_at,
231
+ exit_reason = :exit_reason,
232
+ verification_evidence_id = :evidence_id
233
+ WHERE id = :id
234
+ AND status IN ('claimed','running')`,
235
+ ).run({
236
+ ":id": dispatchId,
237
+ ":ended_at": now,
238
+ ":exit_reason": opts?.exitReason ?? null,
239
+ ":evidence_id": opts?.verificationEvidenceId ?? null,
240
+ });
241
+ changes =
242
+ typeof (result as { changes?: unknown }).changes === "number"
243
+ ? (result as { changes: number }).changes
244
+ : 0;
245
+ });
246
+ if (changes < 1) return;
247
+ insertAuditEvent({
248
+ eventId: randomUUID(),
249
+ traceId: dispatchId.toString(),
250
+ category: "orchestration",
251
+ type: "dispatch-completed",
252
+ ts: now,
253
+ payload: { dispatchId },
254
+ });
255
+ }
256
+
257
+ export interface FailureOpts {
258
+ errorSummary: string;
259
+ errorCode?: string;
260
+ /** Backoff before next attempt (used by stuck-detector retry suppression). */
261
+ retryAfterMs?: number;
262
+ }
263
+
264
+ /** Transition a dispatch into `failed`, optionally scheduling a retry. */
265
+ export function markFailed(dispatchId: number, opts: FailureOpts): void {
266
+ if (!isDbAvailable()) return;
267
+ const now = new Date();
268
+ const nowIso = now.toISOString();
269
+ const nextRunIso = opts.retryAfterMs
270
+ ? new Date(now.getTime() + opts.retryAfterMs).toISOString()
271
+ : null;
272
+ const db = _getAdapter()!;
273
+ let changes = 0;
274
+ transaction(() => {
275
+ const result = db.prepare(
276
+ `UPDATE unit_dispatches
277
+ SET status = 'failed', ended_at = :ended_at,
278
+ error_summary = :error_summary,
279
+ last_error_code = :last_error_code,
280
+ last_error_at = :last_error_at,
281
+ retry_after_ms = :retry_after_ms,
282
+ next_run_at = :next_run_at
283
+ WHERE id = :id
284
+ AND status IN ('claimed','running')`,
285
+ ).run({
286
+ ":id": dispatchId,
287
+ ":ended_at": nowIso,
288
+ ":error_summary": opts.errorSummary,
289
+ ":last_error_code": opts.errorCode ?? null,
290
+ ":last_error_at": nowIso,
291
+ ":retry_after_ms": opts.retryAfterMs ?? null,
292
+ ":next_run_at": nextRunIso,
293
+ });
294
+ changes =
295
+ typeof (result as { changes?: unknown }).changes === "number"
296
+ ? (result as { changes: number }).changes
297
+ : 0;
298
+ });
299
+ if (changes < 1) return;
300
+ insertAuditEvent({
301
+ eventId: randomUUID(),
302
+ traceId: dispatchId.toString(),
303
+ category: "orchestration",
304
+ type: "dispatch-failed",
305
+ ts: nowIso,
306
+ payload: { dispatchId, errorSummary: opts.errorSummary, retryAfterMs: opts.retryAfterMs ?? null },
307
+ });
308
+ }
309
+
310
+ /** Transition a dispatch into `stuck`. */
311
+ export function markStuck(dispatchId: number, reason: string): void {
312
+ if (!isDbAvailable()) return;
313
+ const now = new Date().toISOString();
314
+ const db = _getAdapter()!;
315
+ const result = transaction(() => {
316
+ return db.prepare(
317
+ `UPDATE unit_dispatches
318
+ SET status = 'stuck', ended_at = :ended_at, exit_reason = :reason
319
+ WHERE id = :id
320
+ AND status IN ('claimed','running')`,
321
+ ).run({ ":id": dispatchId, ":ended_at": now, ":reason": reason });
322
+ });
323
+ const changes =
324
+ typeof (result as { changes?: unknown }).changes === "number"
325
+ ? (result as { changes: number }).changes
326
+ : 0;
327
+ if (changes <= 0) return;
328
+ insertAuditEvent({
329
+ eventId: randomUUID(),
330
+ traceId: dispatchId.toString(),
331
+ category: "orchestration",
332
+ type: "dispatch-stuck",
333
+ ts: now,
334
+ payload: { dispatchId, reason },
335
+ });
336
+ }
337
+
338
+ /** Transition a dispatch into `paused`. */
339
+ export function markPaused(dispatchId: number): void {
340
+ if (!isDbAvailable()) return;
341
+ const now = new Date().toISOString();
342
+ const db = _getAdapter()!;
343
+ db.prepare(
344
+ `UPDATE unit_dispatches
345
+ SET status = 'paused', ended_at = :ended_at
346
+ WHERE id = :id AND status IN ('claimed','running')`,
347
+ ).run({ ":id": dispatchId, ":ended_at": now });
348
+ }
349
+
350
+ /** Transition a dispatch into `canceled`. */
351
+ export function markCanceled(dispatchId: number, reason: string): void {
352
+ if (!isDbAvailable()) return;
353
+ const now = new Date().toISOString();
354
+ const db = _getAdapter()!;
355
+ db.prepare(
356
+ `UPDATE unit_dispatches
357
+ SET status = 'canceled', ended_at = :ended_at, exit_reason = :reason
358
+ WHERE id = :id AND status IN ('pending','claimed','running')`,
359
+ ).run({ ":id": dispatchId, ":ended_at": now, ":reason": reason });
360
+ }
361
+
362
+ /**
363
+ * Fetch the most recent N dispatches for a unit. Used by recordDispatchClaim
364
+ * callers to compute attempt_n and by detect-stuck.ts (B3) to consult
365
+ * retry budget before tripping the stuck verdict.
366
+ */
367
+ export function getRecentForUnit(unitId: string, limit = 10): UnitDispatchRow[] {
368
+ if (!isDbAvailable()) return [];
369
+ const db = _getAdapter()!;
370
+ return db.prepare(
371
+ `SELECT * FROM unit_dispatches WHERE unit_id = :unit_id ORDER BY id DESC LIMIT :limit`,
372
+ ).all({ ":unit_id": unitId, ":limit": limit }) as unknown as UnitDispatchRow[];
373
+ }
374
+
375
+ /**
376
+ * Fetch the latest dispatch for a unit, regardless of status. Returns null
377
+ * if the unit has never been dispatched.
378
+ */
379
+ export function getLatestForUnit(unitId: string): UnitDispatchRow | null {
380
+ if (!isDbAvailable()) return null;
381
+ const db = _getAdapter()!;
382
+ const row = db.prepare(
383
+ `SELECT * FROM unit_dispatches WHERE unit_id = :unit_id ORDER BY id DESC LIMIT 1`,
384
+ ).get({ ":unit_id": unitId }) as UnitDispatchRow | undefined;
385
+ return row ?? null;
386
+ }
387
+
388
+ /**
389
+ * Phase C — return the most recent unit_id values for a worker, oldest-first.
390
+ *
391
+ * Drop-in replacement for the persistence side of stuck-state.json's
392
+ * `recentUnits` field. The auto-loop uses this to seed loopState.recentUnits
393
+ * on session start so the stuck-detector window survives a session restart
394
+ * (#3704). Returned in oldest-first order to match the in-memory window
395
+ * shape that detect-stuck.ts expects.
396
+ */
397
+ export function getRecentUnitKeysForWorker(
398
+ workerId: string,
399
+ limit = 20,
400
+ ): Array<{ key: string }> {
401
+ if (!isDbAvailable()) return [];
402
+ const db = _getAdapter()!;
403
+ const rows = db.prepare(
404
+ `SELECT unit_id FROM unit_dispatches
405
+ WHERE worker_id = :worker_id
406
+ ORDER BY started_at DESC, id DESC
407
+ LIMIT :limit`,
408
+ ).all({ ":worker_id": workerId, ":limit": limit }) as Array<{ unit_id: string }>;
409
+ // Reverse so callers consume oldest-first (sliding-window semantics).
410
+ return rows.reverse().map((r) => ({ key: r.unit_id }));
411
+ }
412
+
413
+ export function getRecentUnitKeysForProjectRoot(
414
+ projectRootRealpath: string,
415
+ limit = 20,
416
+ ): Array<{ key: string }> {
417
+ if (!isDbAvailable()) return [];
418
+ const db = _getAdapter()!;
419
+ const rows = db.prepare(
420
+ `SELECT ud.unit_id
421
+ FROM unit_dispatches ud
422
+ INNER JOIN workers w ON w.worker_id = ud.worker_id
423
+ WHERE w.project_root_realpath = :project_root_realpath
424
+ ORDER BY ud.started_at DESC, ud.id DESC
425
+ LIMIT :limit`,
426
+ ).all({
427
+ ":project_root_realpath": projectRootRealpath,
428
+ ":limit": limit,
429
+ }) as Array<{ unit_id: string }>;
430
+ return rows.reverse().map((r) => ({ key: r.unit_id }));
431
+ }
432
+
433
+ /**
434
+ * Fetch dispatches for a milestone filtered by status. Useful for janitors
435
+ * + dashboards.
436
+ */
437
+ export function getDispatchesByStatus(
438
+ milestoneId: string,
439
+ status: DispatchStatus,
440
+ ): UnitDispatchRow[] {
441
+ if (!isDbAvailable()) return [];
442
+ const db = _getAdapter()!;
443
+ return db.prepare(
444
+ `SELECT * FROM unit_dispatches WHERE milestone_id = :mid AND status = :status ORDER BY id`,
445
+ ).all({ ":mid": milestoneId, ":status": status }) as unknown as UnitDispatchRow[];
446
+ }
@@ -0,0 +1,42 @@
1
+ # Auto-mode coordination is single-host
2
+
3
+ The DB-backed coordination tables introduced by Phase B (`workers`,
4
+ `milestone_leases`, `unit_dispatches`, `cancellation_requests`,
5
+ `command_queue`) and the supporting `runtime_kv` table from Phase C all
6
+ rely on **shared SQLite WAL on local disk**. They do not work across
7
+ machines.
8
+
9
+ ## Why single-host only
10
+
11
+ - SQLite WAL coordination — the locking primitives that make
12
+ `claimMilestoneLease`, `recordDispatchClaim`, and `claimNextCommand`
13
+ atomic — is local-disk only. Network filesystems (NFS, SMB, S3FS) and
14
+ fuse mounts break the lock semantics that the WAL relies on.
15
+ - Heartbeat TTL (`workers.last_heartbeat_at`) compares timestamps written
16
+ with SQLite wall-clock time (`datetime('now')`). Across machines without
17
+ wall-clock synchronization (for example NTP/chrony), TTL filtering can
18
+ produce phantom-active or premature-crashed verdicts. Monotonic clocks
19
+ are not used for these comparisons.
20
+ - Fencing tokens (`milestone_leases.fencing_token`) are monotonically
21
+ ordered by SQL within a single transaction. Cross-host races could
22
+ produce duplicate tokens if two SQLite processes opened the same DB
23
+ on a network mount.
24
+
25
+ ## What does work
26
+
27
+ - Multiple `gsd auto` worker processes on the **same machine**, sharing
28
+ the project's SQLite DB via WAL. The lease check refuses concurrent
29
+ claims on the same milestone; the dispatch ledger's partial unique
30
+ index refuses double-claims of the same unit.
31
+ - A single `gsd auto` worker plus arbitrary read-only consumers
32
+ (dashboards, doctors) on the same machine.
33
+ - Worktree-based parallelism on the same machine, where each worker
34
+ holds a different milestone lease.
35
+
36
+ ## Multi-host alternatives
37
+
38
+ If you need to coordinate `gsd auto` workers across machines, you need
39
+ a real coordinator: Postgres for the ledger + a leader-election service
40
+ (etcd, Consul) for the leases. That's out of scope for these phases.
41
+ The schema and module shapes here would need a non-trivial backend
42
+ swap before they could ride on top of either.
@@ -25,6 +25,7 @@ import { resolveMilestoneIntegrationBranch } from "./git-service.js";
25
25
  import { nativeIsRepo, nativeHasChanges, nativeLastCommitEpoch, nativeGetCurrentBranch, nativeAddTracked, nativeCommit } from "./native-git-bridge.js";
26
26
  import { loadEffectiveGSDPreferences } from "./preferences.js";
27
27
  import { runEnvironmentChecks } from "./doctor-environment.js";
28
+ import { ensureDbOpen } from "./bootstrap/dynamic-tools.js";
28
29
 
29
30
  // ── Health Score Tracking ──────────────────────────────────────────────────
30
31
 
@@ -219,6 +220,9 @@ export async function preDispatchHealthGate(basePath: string): Promise<PreDispat
219
220
  // If a stale lock exists, the crash recovery path should handle it,
220
221
  // not a new dispatch. This prevents double-dispatch after crashes.
221
222
  try {
223
+ if (existsSync(join(gsdRoot(basePath), "gsd.db"))) {
224
+ await ensureDbOpen(basePath);
225
+ }
222
226
  const lock = readCrashLock(basePath);
223
227
  if (lock && !isLockProcessAlive(lock)) {
224
228
  // Auto-clear it since we're about to dispatch anyway
@@ -8,6 +8,8 @@ import { deriveState, isGhostMilestone, isReusableGhostMilestone } from "./state
8
8
  import { saveFile } from "./files.js";
9
9
  import { nativeIsRepo, nativeForEachRef, nativeUpdateRef } from "./native-git-bridge.js";
10
10
  import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js";
11
+ import { getActiveAutoWorkers } from "./db/auto-workers.js";
12
+ import { normalizeRealPath } from "./paths.js";
11
13
  import { ensureGitignore, isGsdGitignored } from "./gitignore.js";
12
14
  import { readAllSessionStatuses, isSessionStale, removeSessionStatus } from "./session-status-io.js";
13
15
  import { recoverFailedMigration } from "./migrate-external.js";
@@ -35,6 +37,9 @@ export async function checkRuntimeHealth(
35
37
  const root = gsdRoot(basePath);
36
38
 
37
39
  // ── Stale crash lock ──────────────────────────────────────────────────
40
+ // Phase C pt 2: the lock state lives in the workers + unit_dispatches
41
+ // tables now, not auto.lock. readCrashLock synthesizes a LockData from
42
+ // the DB; isLockProcessAlive is a pure OS PID check.
38
43
  try {
39
44
  const lock = readCrashLock(basePath);
40
45
  if (lock) {
@@ -45,14 +50,14 @@ export async function checkRuntimeHealth(
45
50
  code: "stale_crash_lock",
46
51
  scope: "project",
47
52
  unitId: "project",
48
- message: `Stale auto.lock from PID ${lock.pid} (started ${lock.startedAt}, was executing ${lock.unitType} ${lock.unitId}) — process is no longer running`,
49
- file: ".gsd/auto.lock",
53
+ message: `Stale auto-mode worker (PID ${lock.pid}, started ${lock.startedAt}, was executing ${lock.unitType} ${lock.unitId}) — process is no longer running`,
54
+ file: "<workers table>",
50
55
  fixable: true,
51
56
  });
52
57
 
53
58
  if (shouldFix("stale_crash_lock")) {
54
59
  clearLock(basePath);
55
- fixesApplied.push("cleared stale auto.lock");
60
+ fixesApplied.push("cleared stale auto-mode worker state");
56
61
  }
57
62
  }
58
63
  }
@@ -70,9 +75,22 @@ export async function checkRuntimeHealth(
70
75
  if (existsSync(lockDir)) {
71
76
  const statRes = statSync(lockDir);
72
77
  if (statRes.isDirectory()) {
73
- // Check if any live process actually holds this lock
74
- const lock = readCrashLock(basePath);
75
- const lockHolderAlive = lock ? isLockProcessAlive(lock) : false;
78
+ // Phase C pt 2: "any live process holds the lock?" check now means
79
+ // "is any worker registered with status='active' AND a fresh
80
+ // heartbeat for this project?" readCrashLock returns null for
81
+ // healthy live workers (it surfaces stale ones only), so we must
82
+ // consult getActiveAutoWorkers directly.
83
+ const projectRoot = normalizeRealPath(basePath);
84
+ const activeWorkers = getActiveAutoWorkers().filter(
85
+ (w) => w.project_root_realpath === projectRoot && isLockProcessAlive({
86
+ pid: w.pid,
87
+ startedAt: w.started_at,
88
+ unitType: "starting",
89
+ unitId: "bootstrap",
90
+ unitStartedAt: w.started_at,
91
+ }),
92
+ );
93
+ const lockHolderAlive = activeWorkers.length > 0;
76
94
  if (!lockHolderAlive) {
77
95
  issues.push({
78
96
  severity: "error",
@@ -3,8 +3,8 @@ import { join } from "node:path";
3
3
 
4
4
  import { loadFile, parseSummary, saveFile, parseTaskPlanMustHaves, countMustHavesMentionedInSummary } from "./files.js";
5
5
  import { parseRoadmap as parseLegacyRoadmap, parsePlan as parseLegacyPlan } from "./parsers-legacy.js";
6
- import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
7
- import { resolveMilestoneFile, resolveMilestonePath, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTasksDir, milestonesDir, gsdRoot, relMilestoneFile, relSliceFile, relTaskFile, relSlicePath, relGsdRootFile, resolveGsdRootFile, relMilestonePath } from "./paths.js";
6
+ import { isDbAvailable, openDatabase, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
7
+ import { resolveMilestoneFile, resolveMilestonePath, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTasksDir, milestonesDir, gsdRoot, relMilestoneFile, relSliceFile, relTaskFile, relSlicePath, relGsdRootFile, resolveGsdRootFile, relMilestonePath, resolveGsdPathContract } from "./paths.js";
8
8
  import { deriveState, isMilestoneComplete } from "./state.js";
9
9
  import { invalidateAllCaches } from "./cache.js";
10
10
  import { loadEffectiveGSDPreferences, type GSDPreferences } from "./preferences.js";
@@ -336,6 +336,14 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
336
336
  const dryRun = options?.dryRun === true;
337
337
  const fixLevel = options?.fixLevel ?? "all";
338
338
 
339
+ // CLI doctor can run before any tool handler has opened the DB. Runtime
340
+ // health checks need the existing project DB to surface DB-backed crash
341
+ // locks, paused sessions, and coordination rows.
342
+ const dbPath = resolveGsdPathContract(basePath).projectDb;
343
+ if (existsSync(dbPath)) {
344
+ try { openDatabase(dbPath); } catch { /* surfaced later as db_unavailable */ }
345
+ }
346
+
339
347
  // Issue codes that represent completion state transitions — creating summary
340
348
  // stubs, marking slices/milestones done in the roadmap. These belong to the
341
349
  // dispatch lifecycle (complete-slice, complete-milestone units), not to