gsd-pi 2.78.1-dev.d8826a445 → 2.78.1-dev.eccf86e27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +5 -7
  2. package/dist/help-text.js +1 -1
  3. package/dist/resource-loader.js +6 -1
  4. package/dist/resources/.managed-resources-content-hash +1 -1
  5. package/dist/resources/extensions/gsd/auto/detect-stuck.js +41 -5
  6. package/dist/resources/extensions/gsd/auto/loop.js +235 -36
  7. package/dist/resources/extensions/gsd/auto/phases.js +7 -5
  8. package/dist/resources/extensions/gsd/auto/session.js +33 -0
  9. package/dist/resources/extensions/gsd/auto-dispatch.js +46 -2
  10. package/dist/resources/extensions/gsd/auto-post-unit.js +19 -11
  11. package/dist/resources/extensions/gsd/auto-worktree.js +26 -187
  12. package/dist/resources/extensions/gsd/auto.js +79 -50
  13. package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +9 -4
  14. package/dist/resources/extensions/gsd/crash-recovery.js +160 -47
  15. package/dist/resources/extensions/gsd/db/auto-workers.js +227 -0
  16. package/dist/resources/extensions/gsd/db/command-queue.js +105 -0
  17. package/dist/resources/extensions/gsd/db/milestone-leases.js +210 -0
  18. package/dist/resources/extensions/gsd/db/runtime-kv.js +91 -0
  19. package/dist/resources/extensions/gsd/db/unit-dispatches.js +322 -0
  20. package/dist/resources/extensions/gsd/docs/COORDINATION.md +42 -0
  21. package/dist/resources/extensions/gsd/doctor-proactive.js +4 -0
  22. package/dist/resources/extensions/gsd/doctor-runtime-checks.js +22 -6
  23. package/dist/resources/extensions/gsd/doctor.js +12 -2
  24. package/dist/resources/extensions/gsd/gsd-db.js +161 -3
  25. package/dist/resources/extensions/gsd/guided-flow.js +6 -2
  26. package/dist/resources/extensions/gsd/interrupted-session.js +18 -15
  27. package/dist/resources/extensions/gsd/state.js +21 -6
  28. package/dist/resources/extensions/gsd/worktree-resolver.js +64 -0
  29. package/dist/tsconfig.extensions.tsbuildinfo +1 -1
  30. package/dist/web/standalone/.next/BUILD_ID +1 -1
  31. package/dist/web/standalone/.next/app-path-routes-manifest.json +12 -12
  32. package/dist/web/standalone/.next/build-manifest.json +2 -2
  33. package/dist/web/standalone/.next/prerender-manifest.json +3 -3
  34. package/dist/web/standalone/.next/server/app/_global-error.html +1 -1
  35. package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
  36. package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  37. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  38. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  39. package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  40. package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  41. package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  42. package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
  43. package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
  44. package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  45. package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  46. package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  47. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  48. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  49. package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  50. package/dist/web/standalone/.next/server/app/index.html +1 -1
  51. package/dist/web/standalone/.next/server/app/index.rsc +1 -1
  52. package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  53. package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
  54. package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  55. package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
  56. package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  57. package/dist/web/standalone/.next/server/app-paths-manifest.json +12 -12
  58. package/dist/web/standalone/.next/server/middleware-build-manifest.js +1 -1
  59. package/dist/web/standalone/.next/server/pages/404.html +1 -1
  60. package/dist/web/standalone/.next/server/pages/500.html +1 -1
  61. package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
  62. package/package.json +1 -1
  63. package/src/resources/extensions/gsd/auto/detect-stuck.ts +37 -5
  64. package/src/resources/extensions/gsd/auto/loop.ts +263 -41
  65. package/src/resources/extensions/gsd/auto/phases.ts +7 -5
  66. package/src/resources/extensions/gsd/auto/session.ts +36 -0
  67. package/src/resources/extensions/gsd/auto-dispatch.ts +53 -2
  68. package/src/resources/extensions/gsd/auto-post-unit.ts +19 -11
  69. package/src/resources/extensions/gsd/auto-worktree.ts +26 -211
  70. package/src/resources/extensions/gsd/auto.ts +89 -44
  71. package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +9 -4
  72. package/src/resources/extensions/gsd/crash-recovery.ts +177 -43
  73. package/src/resources/extensions/gsd/db/auto-workers.ts +273 -0
  74. package/src/resources/extensions/gsd/db/command-queue.ts +149 -0
  75. package/src/resources/extensions/gsd/db/milestone-leases.ts +274 -0
  76. package/src/resources/extensions/gsd/db/runtime-kv.ts +127 -0
  77. package/src/resources/extensions/gsd/db/unit-dispatches.ts +446 -0
  78. package/src/resources/extensions/gsd/docs/COORDINATION.md +42 -0
  79. package/src/resources/extensions/gsd/doctor-proactive.ts +4 -0
  80. package/src/resources/extensions/gsd/doctor-runtime-checks.ts +24 -6
  81. package/src/resources/extensions/gsd/doctor.ts +10 -2
  82. package/src/resources/extensions/gsd/gsd-db.ts +170 -3
  83. package/src/resources/extensions/gsd/guided-flow.ts +6 -2
  84. package/src/resources/extensions/gsd/interrupted-session.ts +19 -12
  85. package/src/resources/extensions/gsd/state.ts +44 -6
  86. package/src/resources/extensions/gsd/tests/auto-loop-no-copy-artifacts.test.ts +72 -0
  87. package/src/resources/extensions/gsd/tests/auto-loop-symlink-worktree.test.ts +190 -0
  88. package/src/resources/extensions/gsd/tests/auto-workers.test.ts +105 -0
  89. package/src/resources/extensions/gsd/tests/command-queue.test.ts +141 -0
  90. package/src/resources/extensions/gsd/tests/crash-recovery-via-db.test.ts +203 -0
  91. package/src/resources/extensions/gsd/tests/crash-recovery.test.ts +169 -59
  92. package/src/resources/extensions/gsd/tests/detect-stuck-respects-retry.test.ts +173 -0
  93. package/src/resources/extensions/gsd/tests/integration/auto-worktree.test.ts +22 -12
  94. package/src/resources/extensions/gsd/tests/integration/doctor-proactive.test.ts +24 -10
  95. package/src/resources/extensions/gsd/tests/integration/doctor-runtime.test.ts +35 -23
  96. package/src/resources/extensions/gsd/tests/integration/workspace-collapse-integration.test.ts +3 -5
  97. package/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts +72 -25
  98. package/src/resources/extensions/gsd/tests/interrupted-session-ui.test.ts +72 -25
  99. package/src/resources/extensions/gsd/tests/memory-pressure-stuck-state.test.ts +9 -6
  100. package/src/resources/extensions/gsd/tests/milestone-leases.test.ts +152 -0
  101. package/src/resources/extensions/gsd/tests/parallel-milestone-isolation.test.ts +106 -0
  102. package/src/resources/extensions/gsd/tests/paused-session-via-db.test.ts +119 -0
  103. package/src/resources/extensions/gsd/tests/pipeline-variant-dispatch.test.ts +58 -0
  104. package/src/resources/extensions/gsd/tests/preferences-worktree-sync.test.ts +3 -17
  105. package/src/resources/extensions/gsd/tests/register-hooks-depth-verification.test.ts +110 -0
  106. package/src/resources/extensions/gsd/tests/runtime-kv.test.ts +120 -0
  107. package/src/resources/extensions/gsd/tests/skipped-validation-completion.test.ts +133 -28
  108. package/src/resources/extensions/gsd/tests/skipped-validation-db-atomicity.test.ts +17 -0
  109. package/src/resources/extensions/gsd/tests/stuck-state-via-db.test.ts +134 -0
  110. package/src/resources/extensions/gsd/tests/sync-layer-scope.test.ts +7 -26
  111. package/src/resources/extensions/gsd/tests/teardown-cleanup-parity.test.ts +4 -8
  112. package/src/resources/extensions/gsd/tests/unit-dispatches.test.ts +247 -0
  113. package/src/resources/extensions/gsd/tests/validate-milestone.test.ts +41 -1
  114. package/src/resources/extensions/gsd/tests/workspace.test.ts +15 -9
  115. package/src/resources/extensions/gsd/tests/write-gate.test.ts +31 -23
  116. package/src/resources/extensions/gsd/worktree-resolver.ts +62 -0
  117. package/src/resources/extensions/gsd/tests/auto-lock-creation.test.ts +0 -213
  118. package/src/resources/extensions/gsd/tests/auto-stale-lock-self-kill.test.ts +0 -87
  119. package/src/resources/extensions/gsd/tests/stop-auto-remote.test.ts +0 -159
  120. /package/dist/web/standalone/.next/static/{AT5qi39nKXkdmQIOIoh0f → Y5UeGFkXTYM9WIQOWHkot}/_buildManifest.js +0 -0
  121. /package/dist/web/standalone/.next/static/{AT5qi39nKXkdmQIOIoh0f → Y5UeGFkXTYM9WIQOWHkot}/_ssgManifest.js +0 -0
@@ -0,0 +1,322 @@
1
+ // gsd-2 + Unit dispatch ledger (DB-backed coordination, Phase B)
2
+ //
3
+ // Records every auto-mode unit dispatch (plan-slice, run-task, summarize, …)
4
+ // with worker_id, fencing token, status lifecycle, and retry metadata. The
5
+ // ledger is the substrate Phase C will consume to migrate stuck-state.json
6
+ // and paused-session.json out of the runtime/ directory.
7
+ //
8
+ // Codex review MEDIUM B2: partial unique index
9
+ // idx_unit_dispatches_active_per_unit ON unit_dispatches(unit_id)
10
+ // WHERE status IN ('claimed','running')
11
+ // enforces that two workers cannot simultaneously claim the same unit.
12
+ // recordDispatchClaim relies on the index to fail fast at INSERT time
13
+ // rather than racing in application code.
14
+ import { randomUUID } from "node:crypto";
15
+ import { _getAdapter, isDbAvailable, transaction, insertAuditEvent, } from "../gsd-db.js";
16
+ function isAlreadyActiveConstraintError(err) {
17
+ const code = err && typeof err === "object" && "code" in err
18
+ ? String(err.code ?? "")
19
+ : "";
20
+ const msg = err instanceof Error ? err.message : String(err);
21
+ if (/\bFOREIGN KEY\b/i.test(msg)) {
22
+ return false;
23
+ }
24
+ if (code === "SQLITE_CONSTRAINT" || code === "SQLITE_CONSTRAINT_UNIQUE") {
25
+ return true;
26
+ }
27
+ return /\bUNIQUE\b|\bconstraint failed\b/i.test(msg);
28
+ }
29
+ /**
30
+ * Insert a new dispatch row in `claimed` state. Atomic guard against
31
+ * double-claim (B2): the partial unique index
32
+ * idx_unit_dispatches_active_per_unit refuses the INSERT if any row for
33
+ * the same unit_id already has status IN ('claimed','running').
34
+ */
35
+ export function recordDispatchClaim(input) {
36
+ if (!isDbAvailable()) {
37
+ throw new Error("recordDispatchClaim: DB unavailable");
38
+ }
39
+ const now = new Date().toISOString();
40
+ return transaction(() => {
41
+ const db = _getAdapter();
42
+ const lease = db.prepare(`SELECT fencing_token
43
+ FROM milestone_leases
44
+ WHERE milestone_id = :milestone_id
45
+ AND worker_id = :worker_id
46
+ AND fencing_token = :token
47
+ AND status = 'held'`).get({
48
+ ":milestone_id": input.milestoneId,
49
+ ":worker_id": input.workerId,
50
+ ":token": input.milestoneLeaseToken,
51
+ });
52
+ if (!lease) {
53
+ return {
54
+ ok: false,
55
+ error: "stale_lease",
56
+ milestoneId: input.milestoneId,
57
+ workerId: input.workerId,
58
+ milestoneLeaseToken: input.milestoneLeaseToken,
59
+ };
60
+ }
61
+ try {
62
+ const result = db.prepare(`INSERT INTO unit_dispatches (
63
+ trace_id, turn_id, worker_id, milestone_lease_token,
64
+ milestone_id, slice_id, task_id,
65
+ unit_type, unit_id, status, attempt_n,
66
+ started_at, max_attempts
67
+ ) VALUES (
68
+ :trace_id, :turn_id, :worker_id, :milestone_lease_token,
69
+ :milestone_id, :slice_id, :task_id,
70
+ :unit_type, :unit_id, 'claimed', :attempt_n,
71
+ :started_at, :max_attempts
72
+ )`).run({
73
+ ":trace_id": input.traceId,
74
+ ":turn_id": input.turnId ?? null,
75
+ ":worker_id": input.workerId,
76
+ ":milestone_lease_token": input.milestoneLeaseToken,
77
+ ":milestone_id": input.milestoneId,
78
+ ":slice_id": input.sliceId ?? null,
79
+ ":task_id": input.taskId ?? null,
80
+ ":unit_type": input.unitType,
81
+ ":unit_id": input.unitId,
82
+ ":attempt_n": input.attemptN ?? 1,
83
+ ":started_at": now,
84
+ ":max_attempts": input.maxAttempts ?? 3,
85
+ });
86
+ const id = Number(result.lastInsertRowid ?? 0);
87
+ insertAuditEvent({
88
+ eventId: randomUUID(),
89
+ traceId: input.traceId,
90
+ turnId: input.turnId ?? undefined,
91
+ category: "orchestration",
92
+ type: "dispatch-claimed",
93
+ ts: now,
94
+ payload: {
95
+ dispatchId: id,
96
+ unitId: input.unitId,
97
+ unitType: input.unitType,
98
+ workerId: input.workerId,
99
+ attemptN: input.attemptN ?? 1,
100
+ },
101
+ });
102
+ return { ok: true, dispatchId: id };
103
+ }
104
+ catch (err) {
105
+ if (!isAlreadyActiveConstraintError(err))
106
+ throw err;
107
+ // Partial unique index rejected the INSERT — surface the existing
108
+ // active dispatch so callers can decide what to do.
109
+ const existing = db.prepare(`SELECT id, status, worker_id FROM unit_dispatches
110
+ WHERE unit_id = :unit_id AND status IN ('claimed','running')
111
+ ORDER BY id DESC LIMIT 1`).get({ ":unit_id": input.unitId });
112
+ return {
113
+ ok: false,
114
+ error: "already_active",
115
+ existingId: existing?.id ?? 0,
116
+ existingStatus: existing?.status ?? "claimed",
117
+ existingWorker: existing?.worker_id ?? "unknown",
118
+ };
119
+ }
120
+ });
121
+ }
122
+ /** Transition a `claimed` dispatch into `running`. */
123
+ export function markRunning(dispatchId) {
124
+ if (!isDbAvailable())
125
+ return;
126
+ const db = _getAdapter();
127
+ db.prepare(`UPDATE unit_dispatches SET status = 'running'
128
+ WHERE id = :id AND status = 'claimed'`).run({ ":id": dispatchId });
129
+ }
130
+ /** Transition a dispatch into `completed`. */
131
+ export function markCompleted(dispatchId, opts) {
132
+ if (!isDbAvailable())
133
+ return;
134
+ const now = new Date().toISOString();
135
+ const db = _getAdapter();
136
+ let changes = 0;
137
+ transaction(() => {
138
+ const result = db.prepare(`UPDATE unit_dispatches
139
+ SET status = 'completed', ended_at = :ended_at,
140
+ exit_reason = :exit_reason,
141
+ verification_evidence_id = :evidence_id
142
+ WHERE id = :id
143
+ AND status IN ('claimed','running')`).run({
144
+ ":id": dispatchId,
145
+ ":ended_at": now,
146
+ ":exit_reason": opts?.exitReason ?? null,
147
+ ":evidence_id": opts?.verificationEvidenceId ?? null,
148
+ });
149
+ changes =
150
+ typeof result.changes === "number"
151
+ ? result.changes
152
+ : 0;
153
+ });
154
+ if (changes < 1)
155
+ return;
156
+ insertAuditEvent({
157
+ eventId: randomUUID(),
158
+ traceId: dispatchId.toString(),
159
+ category: "orchestration",
160
+ type: "dispatch-completed",
161
+ ts: now,
162
+ payload: { dispatchId },
163
+ });
164
+ }
165
+ /** Transition a dispatch into `failed`, optionally scheduling a retry. */
166
+ export function markFailed(dispatchId, opts) {
167
+ if (!isDbAvailable())
168
+ return;
169
+ const now = new Date();
170
+ const nowIso = now.toISOString();
171
+ const nextRunIso = opts.retryAfterMs
172
+ ? new Date(now.getTime() + opts.retryAfterMs).toISOString()
173
+ : null;
174
+ const db = _getAdapter();
175
+ let changes = 0;
176
+ transaction(() => {
177
+ const result = db.prepare(`UPDATE unit_dispatches
178
+ SET status = 'failed', ended_at = :ended_at,
179
+ error_summary = :error_summary,
180
+ last_error_code = :last_error_code,
181
+ last_error_at = :last_error_at,
182
+ retry_after_ms = :retry_after_ms,
183
+ next_run_at = :next_run_at
184
+ WHERE id = :id
185
+ AND status IN ('claimed','running')`).run({
186
+ ":id": dispatchId,
187
+ ":ended_at": nowIso,
188
+ ":error_summary": opts.errorSummary,
189
+ ":last_error_code": opts.errorCode ?? null,
190
+ ":last_error_at": nowIso,
191
+ ":retry_after_ms": opts.retryAfterMs ?? null,
192
+ ":next_run_at": nextRunIso,
193
+ });
194
+ changes =
195
+ typeof result.changes === "number"
196
+ ? result.changes
197
+ : 0;
198
+ });
199
+ if (changes < 1)
200
+ return;
201
+ insertAuditEvent({
202
+ eventId: randomUUID(),
203
+ traceId: dispatchId.toString(),
204
+ category: "orchestration",
205
+ type: "dispatch-failed",
206
+ ts: nowIso,
207
+ payload: { dispatchId, errorSummary: opts.errorSummary, retryAfterMs: opts.retryAfterMs ?? null },
208
+ });
209
+ }
210
+ /** Transition a dispatch into `stuck`. */
211
+ export function markStuck(dispatchId, reason) {
212
+ if (!isDbAvailable())
213
+ return;
214
+ const now = new Date().toISOString();
215
+ const db = _getAdapter();
216
+ const result = transaction(() => {
217
+ return db.prepare(`UPDATE unit_dispatches
218
+ SET status = 'stuck', ended_at = :ended_at, exit_reason = :reason
219
+ WHERE id = :id
220
+ AND status IN ('claimed','running')`).run({ ":id": dispatchId, ":ended_at": now, ":reason": reason });
221
+ });
222
+ const changes = typeof result.changes === "number"
223
+ ? result.changes
224
+ : 0;
225
+ if (changes <= 0)
226
+ return;
227
+ insertAuditEvent({
228
+ eventId: randomUUID(),
229
+ traceId: dispatchId.toString(),
230
+ category: "orchestration",
231
+ type: "dispatch-stuck",
232
+ ts: now,
233
+ payload: { dispatchId, reason },
234
+ });
235
+ }
236
+ /** Transition a dispatch into `paused`. */
237
+ export function markPaused(dispatchId) {
238
+ if (!isDbAvailable())
239
+ return;
240
+ const now = new Date().toISOString();
241
+ const db = _getAdapter();
242
+ db.prepare(`UPDATE unit_dispatches
243
+ SET status = 'paused', ended_at = :ended_at
244
+ WHERE id = :id AND status IN ('claimed','running')`).run({ ":id": dispatchId, ":ended_at": now });
245
+ }
246
+ /** Transition a dispatch into `canceled`. */
247
+ export function markCanceled(dispatchId, reason) {
248
+ if (!isDbAvailable())
249
+ return;
250
+ const now = new Date().toISOString();
251
+ const db = _getAdapter();
252
+ db.prepare(`UPDATE unit_dispatches
253
+ SET status = 'canceled', ended_at = :ended_at, exit_reason = :reason
254
+ WHERE id = :id AND status IN ('pending','claimed','running')`).run({ ":id": dispatchId, ":ended_at": now, ":reason": reason });
255
+ }
256
+ /**
257
+ * Fetch the most recent N dispatches for a unit. Used by recordDispatchClaim
258
+ * callers to compute attempt_n and by detect-stuck.ts (B3) to consult
259
+ * retry budget before tripping the stuck verdict.
260
+ */
261
+ export function getRecentForUnit(unitId, limit = 10) {
262
+ if (!isDbAvailable())
263
+ return [];
264
+ const db = _getAdapter();
265
+ return db.prepare(`SELECT * FROM unit_dispatches WHERE unit_id = :unit_id ORDER BY id DESC LIMIT :limit`).all({ ":unit_id": unitId, ":limit": limit });
266
+ }
267
+ /**
268
+ * Fetch the latest dispatch for a unit, regardless of status. Returns null
269
+ * if the unit has never been dispatched.
270
+ */
271
+ export function getLatestForUnit(unitId) {
272
+ if (!isDbAvailable())
273
+ return null;
274
+ const db = _getAdapter();
275
+ const row = db.prepare(`SELECT * FROM unit_dispatches WHERE unit_id = :unit_id ORDER BY id DESC LIMIT 1`).get({ ":unit_id": unitId });
276
+ return row ?? null;
277
+ }
278
+ /**
279
+ * Phase C — return the most recent unit_id values for a worker, oldest-first.
280
+ *
281
+ * Drop-in replacement for the persistence side of stuck-state.json's
282
+ * `recentUnits` field. The auto-loop uses this to seed loopState.recentUnits
283
+ * on session start so the stuck-detector window survives a session restart
284
+ * (#3704). Returned in oldest-first order to match the in-memory window
285
+ * shape that detect-stuck.ts expects.
286
+ */
287
+ export function getRecentUnitKeysForWorker(workerId, limit = 20) {
288
+ if (!isDbAvailable())
289
+ return [];
290
+ const db = _getAdapter();
291
+ const rows = db.prepare(`SELECT unit_id FROM unit_dispatches
292
+ WHERE worker_id = :worker_id
293
+ ORDER BY started_at DESC, id DESC
294
+ LIMIT :limit`).all({ ":worker_id": workerId, ":limit": limit });
295
+ // Reverse so callers consume oldest-first (sliding-window semantics).
296
+ return rows.reverse().map((r) => ({ key: r.unit_id }));
297
+ }
298
+ export function getRecentUnitKeysForProjectRoot(projectRootRealpath, limit = 20) {
299
+ if (!isDbAvailable())
300
+ return [];
301
+ const db = _getAdapter();
302
+ const rows = db.prepare(`SELECT ud.unit_id
303
+ FROM unit_dispatches ud
304
+ INNER JOIN workers w ON w.worker_id = ud.worker_id
305
+ WHERE w.project_root_realpath = :project_root_realpath
306
+ ORDER BY ud.started_at DESC, ud.id DESC
307
+ LIMIT :limit`).all({
308
+ ":project_root_realpath": projectRootRealpath,
309
+ ":limit": limit,
310
+ });
311
+ return rows.reverse().map((r) => ({ key: r.unit_id }));
312
+ }
313
+ /**
314
+ * Fetch dispatches for a milestone filtered by status. Useful for janitors
315
+ * + dashboards.
316
+ */
317
+ export function getDispatchesByStatus(milestoneId, status) {
318
+ if (!isDbAvailable())
319
+ return [];
320
+ const db = _getAdapter();
321
+ return db.prepare(`SELECT * FROM unit_dispatches WHERE milestone_id = :mid AND status = :status ORDER BY id`).all({ ":mid": milestoneId, ":status": status });
322
+ }
@@ -0,0 +1,42 @@
1
+ # Auto-mode coordination is single-host
2
+
3
+ The DB-backed coordination tables introduced by Phase B (`workers`,
4
+ `milestone_leases`, `unit_dispatches`, `cancellation_requests`,
5
+ `command_queue`) and the supporting `runtime_kv` table from Phase C all
6
+ rely on **shared SQLite WAL on local disk**. They do not work across
7
+ machines.
8
+
9
+ ## Why single-host only
10
+
11
+ - SQLite WAL coordination — the locking primitives that make
12
+ `claimMilestoneLease`, `recordDispatchClaim`, and `claimNextCommand`
13
+ atomic — is local-disk only. Network filesystems (NFS, SMB, S3FS) and
14
+ fuse mounts break the lock semantics that the WAL relies on.
15
+ - Heartbeat TTL (`workers.last_heartbeat_at`) compares timestamps written
16
+ with SQLite wall-clock time (`datetime('now')`). Across machines without
17
+ wall-clock synchronization (for example NTP/chrony), TTL filtering can
18
+ produce phantom-active or premature-crashed verdicts. Monotonic clocks
19
+ are not used for these comparisons.
20
+ - Fencing tokens (`milestone_leases.fencing_token`) are monotonically
21
+ ordered by SQL within a single transaction. Cross-host races could
22
+ produce duplicate tokens if two SQLite processes opened the same DB
23
+ on a network mount.
24
+
25
+ ## What does work
26
+
27
+ - Multiple `gsd auto` worker processes on the **same machine**, sharing
28
+ the project's SQLite DB via WAL. The lease check refuses concurrent
29
+ claims on the same milestone; the dispatch ledger's partial unique
30
+ index refuses double-claims of the same unit.
31
+ - A single `gsd auto` worker plus arbitrary read-only consumers
32
+ (dashboards, doctors) on the same machine.
33
+ - Worktree-based parallelism on the same machine, where each worker
34
+ holds a different milestone lease.
35
+
36
+ ## Multi-host alternatives
37
+
38
+ If you need to coordinate `gsd auto` workers across machines, you need
39
+ a real coordinator: Postgres for the ledger + a leader-election service
40
+ (etcd, Consul) for the leases. That's out of scope for these phases.
41
+ The schema and module shapes here would need a non-trivial backend
42
+ swap before they could ride on top of either.
@@ -24,6 +24,7 @@ import { resolveMilestoneIntegrationBranch } from "./git-service.js";
24
24
  import { nativeIsRepo, nativeHasChanges, nativeLastCommitEpoch, nativeGetCurrentBranch, nativeAddTracked, nativeCommit } from "./native-git-bridge.js";
25
25
  import { loadEffectiveGSDPreferences } from "./preferences.js";
26
26
  import { runEnvironmentChecks } from "./doctor-environment.js";
27
+ import { ensureDbOpen } from "./bootstrap/dynamic-tools.js";
27
28
  /** In-memory health history for the current auto-mode session. */
28
29
  let healthHistory = [];
29
30
  /** Count of consecutive units with unresolved errors. */
@@ -159,6 +160,9 @@ export async function preDispatchHealthGate(basePath) {
159
160
  // If a stale lock exists, the crash recovery path should handle it,
160
161
  // not a new dispatch. This prevents double-dispatch after crashes.
161
162
  try {
163
+ if (existsSync(join(gsdRoot(basePath), "gsd.db"))) {
164
+ await ensureDbOpen(basePath);
165
+ }
162
166
  const lock = readCrashLock(basePath);
163
167
  if (lock && !isLockProcessAlive(lock)) {
164
168
  // Auto-clear it since we're about to dispatch anyway
@@ -6,6 +6,8 @@ import { deriveState, isGhostMilestone, isReusableGhostMilestone } from "./state
6
6
  import { saveFile } from "./files.js";
7
7
  import { nativeIsRepo, nativeForEachRef, nativeUpdateRef } from "./native-git-bridge.js";
8
8
  import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js";
9
+ import { getActiveAutoWorkers } from "./db/auto-workers.js";
10
+ import { normalizeRealPath } from "./paths.js";
9
11
  import { ensureGitignore, isGsdGitignored } from "./gitignore.js";
10
12
  import { readAllSessionStatuses, isSessionStale, removeSessionStatus } from "./session-status-io.js";
11
13
  import { recoverFailedMigration } from "./migrate-external.js";
@@ -26,6 +28,9 @@ function hasAssessmentVerdict(basePath, mid, sid) {
26
28
  export async function checkRuntimeHealth(basePath, issues, fixesApplied, shouldFix) {
27
29
  const root = gsdRoot(basePath);
28
30
  // ── Stale crash lock ──────────────────────────────────────────────────
31
+ // Phase C pt 2: the lock state lives in the workers + unit_dispatches
32
+ // tables now, not auto.lock. readCrashLock synthesizes a LockData from
33
+ // the DB; isLockProcessAlive is a pure OS PID check.
29
34
  try {
30
35
  const lock = readCrashLock(basePath);
31
36
  if (lock) {
@@ -36,13 +41,13 @@ export async function checkRuntimeHealth(basePath, issues, fixesApplied, shouldF
36
41
  code: "stale_crash_lock",
37
42
  scope: "project",
38
43
  unitId: "project",
39
- message: `Stale auto.lock from PID ${lock.pid} (started ${lock.startedAt}, was executing ${lock.unitType} ${lock.unitId}) — process is no longer running`,
40
- file: ".gsd/auto.lock",
44
+ message: `Stale auto-mode worker (PID ${lock.pid}, started ${lock.startedAt}, was executing ${lock.unitType} ${lock.unitId}) — process is no longer running`,
45
+ file: "<workers table>",
41
46
  fixable: true,
42
47
  });
43
48
  if (shouldFix("stale_crash_lock")) {
44
49
  clearLock(basePath);
45
- fixesApplied.push("cleared stale auto.lock");
50
+ fixesApplied.push("cleared stale auto-mode worker state");
46
51
  }
47
52
  }
48
53
  }
@@ -60,9 +65,20 @@ export async function checkRuntimeHealth(basePath, issues, fixesApplied, shouldF
60
65
  if (existsSync(lockDir)) {
61
66
  const statRes = statSync(lockDir);
62
67
  if (statRes.isDirectory()) {
63
- // Check if any live process actually holds this lock
64
- const lock = readCrashLock(basePath);
65
- const lockHolderAlive = lock ? isLockProcessAlive(lock) : false;
68
+ // Phase C pt 2: "any live process holds the lock?" check now means
69
+ // "is any worker registered with status='active' AND a fresh
70
+ // heartbeat for this project?" readCrashLock returns null for
71
+ // healthy live workers (it surfaces stale ones only), so we must
72
+ // consult getActiveAutoWorkers directly.
73
+ const projectRoot = normalizeRealPath(basePath);
74
+ const activeWorkers = getActiveAutoWorkers().filter((w) => w.project_root_realpath === projectRoot && isLockProcessAlive({
75
+ pid: w.pid,
76
+ startedAt: w.started_at,
77
+ unitType: "starting",
78
+ unitId: "bootstrap",
79
+ unitStartedAt: w.started_at,
80
+ }));
81
+ const lockHolderAlive = activeWorkers.length > 0;
66
82
  if (!lockHolderAlive) {
67
83
  issues.push({
68
84
  severity: "error",
@@ -2,8 +2,8 @@ import { existsSync, mkdirSync, lstatSync, readdirSync, readFileSync } from "nod
2
2
  import { join } from "node:path";
3
3
  import { loadFile, parseSummary, saveFile, parseTaskPlanMustHaves, countMustHavesMentionedInSummary } from "./files.js";
4
4
  import { parseRoadmap as parseLegacyRoadmap, parsePlan as parseLegacyPlan } from "./parsers-legacy.js";
5
- import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
6
- import { resolveMilestoneFile, resolveMilestonePath, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTasksDir, milestonesDir, gsdRoot, relMilestoneFile, relSliceFile, relTaskFile, relSlicePath, relGsdRootFile, resolveGsdRootFile, relMilestonePath } from "./paths.js";
5
+ import { isDbAvailable, openDatabase, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
6
+ import { resolveMilestoneFile, resolveMilestonePath, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTasksDir, milestonesDir, gsdRoot, relMilestoneFile, relSliceFile, relTaskFile, relSlicePath, relGsdRootFile, resolveGsdRootFile, relMilestonePath, resolveGsdPathContract } from "./paths.js";
7
7
  import { deriveState, isMilestoneComplete } from "./state.js";
8
8
  import { invalidateAllCaches } from "./cache.js";
9
9
  import { loadEffectiveGSDPreferences } from "./preferences.js";
@@ -309,6 +309,16 @@ export async function runGSDDoctor(basePath, options) {
309
309
  const fix = options?.fix === true;
310
310
  const dryRun = options?.dryRun === true;
311
311
  const fixLevel = options?.fixLevel ?? "all";
312
+ // CLI doctor can run before any tool handler has opened the DB. Runtime
313
+ // health checks need the existing project DB to surface DB-backed crash
314
+ // locks, paused sessions, and coordination rows.
315
+ const dbPath = resolveGsdPathContract(basePath).projectDb;
316
+ if (existsSync(dbPath)) {
317
+ try {
318
+ openDatabase(dbPath);
319
+ }
320
+ catch { /* surfaced later as db_unavailable */ }
321
+ }
312
322
  // Issue codes that represent completion state transitions — creating summary
313
323
  // stubs, marking slices/milestones done in the roadmap. These belong to the
314
324
  // dispatch lifecycle (complete-slice, complete-milestone units), not to