gsd-pi 2.78.1-dev.d8826a445 → 2.78.1-dev.eccf86e27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -7
- package/dist/help-text.js +1 -1
- package/dist/resource-loader.js +6 -1
- package/dist/resources/.managed-resources-content-hash +1 -1
- package/dist/resources/extensions/gsd/auto/detect-stuck.js +41 -5
- package/dist/resources/extensions/gsd/auto/loop.js +235 -36
- package/dist/resources/extensions/gsd/auto/phases.js +7 -5
- package/dist/resources/extensions/gsd/auto/session.js +33 -0
- package/dist/resources/extensions/gsd/auto-dispatch.js +46 -2
- package/dist/resources/extensions/gsd/auto-post-unit.js +19 -11
- package/dist/resources/extensions/gsd/auto-worktree.js +26 -187
- package/dist/resources/extensions/gsd/auto.js +79 -50
- package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +9 -4
- package/dist/resources/extensions/gsd/crash-recovery.js +160 -47
- package/dist/resources/extensions/gsd/db/auto-workers.js +227 -0
- package/dist/resources/extensions/gsd/db/command-queue.js +105 -0
- package/dist/resources/extensions/gsd/db/milestone-leases.js +210 -0
- package/dist/resources/extensions/gsd/db/runtime-kv.js +91 -0
- package/dist/resources/extensions/gsd/db/unit-dispatches.js +322 -0
- package/dist/resources/extensions/gsd/docs/COORDINATION.md +42 -0
- package/dist/resources/extensions/gsd/doctor-proactive.js +4 -0
- package/dist/resources/extensions/gsd/doctor-runtime-checks.js +22 -6
- package/dist/resources/extensions/gsd/doctor.js +12 -2
- package/dist/resources/extensions/gsd/gsd-db.js +161 -3
- package/dist/resources/extensions/gsd/guided-flow.js +6 -2
- package/dist/resources/extensions/gsd/interrupted-session.js +18 -15
- package/dist/resources/extensions/gsd/state.js +21 -6
- package/dist/resources/extensions/gsd/worktree-resolver.js +64 -0
- package/dist/tsconfig.extensions.tsbuildinfo +1 -1
- package/dist/web/standalone/.next/BUILD_ID +1 -1
- package/dist/web/standalone/.next/app-path-routes-manifest.json +12 -12
- package/dist/web/standalone/.next/build-manifest.json +2 -2
- package/dist/web/standalone/.next/prerender-manifest.json +3 -3
- package/dist/web/standalone/.next/server/app/_global-error.html +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.html +1 -1
- package/dist/web/standalone/.next/server/app/index.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app-paths-manifest.json +12 -12
- package/dist/web/standalone/.next/server/middleware-build-manifest.js +1 -1
- package/dist/web/standalone/.next/server/pages/404.html +1 -1
- package/dist/web/standalone/.next/server/pages/500.html +1 -1
- package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
- package/package.json +1 -1
- package/src/resources/extensions/gsd/auto/detect-stuck.ts +37 -5
- package/src/resources/extensions/gsd/auto/loop.ts +263 -41
- package/src/resources/extensions/gsd/auto/phases.ts +7 -5
- package/src/resources/extensions/gsd/auto/session.ts +36 -0
- package/src/resources/extensions/gsd/auto-dispatch.ts +53 -2
- package/src/resources/extensions/gsd/auto-post-unit.ts +19 -11
- package/src/resources/extensions/gsd/auto-worktree.ts +26 -211
- package/src/resources/extensions/gsd/auto.ts +89 -44
- package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +9 -4
- package/src/resources/extensions/gsd/crash-recovery.ts +177 -43
- package/src/resources/extensions/gsd/db/auto-workers.ts +273 -0
- package/src/resources/extensions/gsd/db/command-queue.ts +149 -0
- package/src/resources/extensions/gsd/db/milestone-leases.ts +274 -0
- package/src/resources/extensions/gsd/db/runtime-kv.ts +127 -0
- package/src/resources/extensions/gsd/db/unit-dispatches.ts +446 -0
- package/src/resources/extensions/gsd/docs/COORDINATION.md +42 -0
- package/src/resources/extensions/gsd/doctor-proactive.ts +4 -0
- package/src/resources/extensions/gsd/doctor-runtime-checks.ts +24 -6
- package/src/resources/extensions/gsd/doctor.ts +10 -2
- package/src/resources/extensions/gsd/gsd-db.ts +170 -3
- package/src/resources/extensions/gsd/guided-flow.ts +6 -2
- package/src/resources/extensions/gsd/interrupted-session.ts +19 -12
- package/src/resources/extensions/gsd/state.ts +44 -6
- package/src/resources/extensions/gsd/tests/auto-loop-no-copy-artifacts.test.ts +72 -0
- package/src/resources/extensions/gsd/tests/auto-loop-symlink-worktree.test.ts +190 -0
- package/src/resources/extensions/gsd/tests/auto-workers.test.ts +105 -0
- package/src/resources/extensions/gsd/tests/command-queue.test.ts +141 -0
- package/src/resources/extensions/gsd/tests/crash-recovery-via-db.test.ts +203 -0
- package/src/resources/extensions/gsd/tests/crash-recovery.test.ts +169 -59
- package/src/resources/extensions/gsd/tests/detect-stuck-respects-retry.test.ts +173 -0
- package/src/resources/extensions/gsd/tests/integration/auto-worktree.test.ts +22 -12
- package/src/resources/extensions/gsd/tests/integration/doctor-proactive.test.ts +24 -10
- package/src/resources/extensions/gsd/tests/integration/doctor-runtime.test.ts +35 -23
- package/src/resources/extensions/gsd/tests/integration/workspace-collapse-integration.test.ts +3 -5
- package/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts +72 -25
- package/src/resources/extensions/gsd/tests/interrupted-session-ui.test.ts +72 -25
- package/src/resources/extensions/gsd/tests/memory-pressure-stuck-state.test.ts +9 -6
- package/src/resources/extensions/gsd/tests/milestone-leases.test.ts +152 -0
- package/src/resources/extensions/gsd/tests/parallel-milestone-isolation.test.ts +106 -0
- package/src/resources/extensions/gsd/tests/paused-session-via-db.test.ts +119 -0
- package/src/resources/extensions/gsd/tests/pipeline-variant-dispatch.test.ts +58 -0
- package/src/resources/extensions/gsd/tests/preferences-worktree-sync.test.ts +3 -17
- package/src/resources/extensions/gsd/tests/register-hooks-depth-verification.test.ts +110 -0
- package/src/resources/extensions/gsd/tests/runtime-kv.test.ts +120 -0
- package/src/resources/extensions/gsd/tests/skipped-validation-completion.test.ts +133 -28
- package/src/resources/extensions/gsd/tests/skipped-validation-db-atomicity.test.ts +17 -0
- package/src/resources/extensions/gsd/tests/stuck-state-via-db.test.ts +134 -0
- package/src/resources/extensions/gsd/tests/sync-layer-scope.test.ts +7 -26
- package/src/resources/extensions/gsd/tests/teardown-cleanup-parity.test.ts +4 -8
- package/src/resources/extensions/gsd/tests/unit-dispatches.test.ts +247 -0
- package/src/resources/extensions/gsd/tests/validate-milestone.test.ts +41 -1
- package/src/resources/extensions/gsd/tests/workspace.test.ts +15 -9
- package/src/resources/extensions/gsd/tests/write-gate.test.ts +31 -23
- package/src/resources/extensions/gsd/worktree-resolver.ts +62 -0
- package/src/resources/extensions/gsd/tests/auto-lock-creation.test.ts +0 -213
- package/src/resources/extensions/gsd/tests/auto-stale-lock-self-kill.test.ts +0 -87
- package/src/resources/extensions/gsd/tests/stop-auto-remote.test.ts +0 -159
- /package/dist/web/standalone/.next/static/{AT5qi39nKXkdmQIOIoh0f → Y5UeGFkXTYM9WIQOWHkot}/_buildManifest.js +0 -0
- /package/dist/web/standalone/.next/static/{AT5qi39nKXkdmQIOIoh0f → Y5UeGFkXTYM9WIQOWHkot}/_ssgManifest.js +0 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
// gsd-2 + Unit dispatch ledger (DB-backed coordination, Phase B)
|
|
2
|
+
//
|
|
3
|
+
// Records every auto-mode unit dispatch (plan-slice, run-task, summarize, …)
|
|
4
|
+
// with worker_id, fencing token, status lifecycle, and retry metadata. The
|
|
5
|
+
// ledger is the substrate Phase C will consume to migrate stuck-state.json
|
|
6
|
+
// and paused-session.json out of the runtime/ directory.
|
|
7
|
+
//
|
|
8
|
+
// Codex review MEDIUM B2: partial unique index
|
|
9
|
+
// idx_unit_dispatches_active_per_unit ON unit_dispatches(unit_id)
|
|
10
|
+
// WHERE status IN ('claimed','running')
|
|
11
|
+
// enforces that two workers cannot simultaneously claim the same unit.
|
|
12
|
+
// recordDispatchClaim relies on the index to fail fast at INSERT time
|
|
13
|
+
// rather than racing in application code.
|
|
14
|
+
import { randomUUID } from "node:crypto";
|
|
15
|
+
import { _getAdapter, isDbAvailable, transaction, insertAuditEvent, } from "../gsd-db.js";
|
|
16
|
+
function isAlreadyActiveConstraintError(err) {
|
|
17
|
+
const code = err && typeof err === "object" && "code" in err
|
|
18
|
+
? String(err.code ?? "")
|
|
19
|
+
: "";
|
|
20
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
21
|
+
if (/\bFOREIGN KEY\b/i.test(msg)) {
|
|
22
|
+
return false;
|
|
23
|
+
}
|
|
24
|
+
if (code === "SQLITE_CONSTRAINT" || code === "SQLITE_CONSTRAINT_UNIQUE") {
|
|
25
|
+
return true;
|
|
26
|
+
}
|
|
27
|
+
return /\bUNIQUE\b|\bconstraint failed\b/i.test(msg);
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Insert a new dispatch row in `claimed` state. Atomic guard against
|
|
31
|
+
* double-claim (B2): the partial unique index
|
|
32
|
+
* idx_unit_dispatches_active_per_unit refuses the INSERT if any row for
|
|
33
|
+
* the same unit_id already has status IN ('claimed','running').
|
|
34
|
+
*/
|
|
35
|
+
export function recordDispatchClaim(input) {
|
|
36
|
+
if (!isDbAvailable()) {
|
|
37
|
+
throw new Error("recordDispatchClaim: DB unavailable");
|
|
38
|
+
}
|
|
39
|
+
const now = new Date().toISOString();
|
|
40
|
+
return transaction(() => {
|
|
41
|
+
const db = _getAdapter();
|
|
42
|
+
const lease = db.prepare(`SELECT fencing_token
|
|
43
|
+
FROM milestone_leases
|
|
44
|
+
WHERE milestone_id = :milestone_id
|
|
45
|
+
AND worker_id = :worker_id
|
|
46
|
+
AND fencing_token = :token
|
|
47
|
+
AND status = 'held'`).get({
|
|
48
|
+
":milestone_id": input.milestoneId,
|
|
49
|
+
":worker_id": input.workerId,
|
|
50
|
+
":token": input.milestoneLeaseToken,
|
|
51
|
+
});
|
|
52
|
+
if (!lease) {
|
|
53
|
+
return {
|
|
54
|
+
ok: false,
|
|
55
|
+
error: "stale_lease",
|
|
56
|
+
milestoneId: input.milestoneId,
|
|
57
|
+
workerId: input.workerId,
|
|
58
|
+
milestoneLeaseToken: input.milestoneLeaseToken,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
try {
|
|
62
|
+
const result = db.prepare(`INSERT INTO unit_dispatches (
|
|
63
|
+
trace_id, turn_id, worker_id, milestone_lease_token,
|
|
64
|
+
milestone_id, slice_id, task_id,
|
|
65
|
+
unit_type, unit_id, status, attempt_n,
|
|
66
|
+
started_at, max_attempts
|
|
67
|
+
) VALUES (
|
|
68
|
+
:trace_id, :turn_id, :worker_id, :milestone_lease_token,
|
|
69
|
+
:milestone_id, :slice_id, :task_id,
|
|
70
|
+
:unit_type, :unit_id, 'claimed', :attempt_n,
|
|
71
|
+
:started_at, :max_attempts
|
|
72
|
+
)`).run({
|
|
73
|
+
":trace_id": input.traceId,
|
|
74
|
+
":turn_id": input.turnId ?? null,
|
|
75
|
+
":worker_id": input.workerId,
|
|
76
|
+
":milestone_lease_token": input.milestoneLeaseToken,
|
|
77
|
+
":milestone_id": input.milestoneId,
|
|
78
|
+
":slice_id": input.sliceId ?? null,
|
|
79
|
+
":task_id": input.taskId ?? null,
|
|
80
|
+
":unit_type": input.unitType,
|
|
81
|
+
":unit_id": input.unitId,
|
|
82
|
+
":attempt_n": input.attemptN ?? 1,
|
|
83
|
+
":started_at": now,
|
|
84
|
+
":max_attempts": input.maxAttempts ?? 3,
|
|
85
|
+
});
|
|
86
|
+
const id = Number(result.lastInsertRowid ?? 0);
|
|
87
|
+
insertAuditEvent({
|
|
88
|
+
eventId: randomUUID(),
|
|
89
|
+
traceId: input.traceId,
|
|
90
|
+
turnId: input.turnId ?? undefined,
|
|
91
|
+
category: "orchestration",
|
|
92
|
+
type: "dispatch-claimed",
|
|
93
|
+
ts: now,
|
|
94
|
+
payload: {
|
|
95
|
+
dispatchId: id,
|
|
96
|
+
unitId: input.unitId,
|
|
97
|
+
unitType: input.unitType,
|
|
98
|
+
workerId: input.workerId,
|
|
99
|
+
attemptN: input.attemptN ?? 1,
|
|
100
|
+
},
|
|
101
|
+
});
|
|
102
|
+
return { ok: true, dispatchId: id };
|
|
103
|
+
}
|
|
104
|
+
catch (err) {
|
|
105
|
+
if (!isAlreadyActiveConstraintError(err))
|
|
106
|
+
throw err;
|
|
107
|
+
// Partial unique index rejected the INSERT — surface the existing
|
|
108
|
+
// active dispatch so callers can decide what to do.
|
|
109
|
+
const existing = db.prepare(`SELECT id, status, worker_id FROM unit_dispatches
|
|
110
|
+
WHERE unit_id = :unit_id AND status IN ('claimed','running')
|
|
111
|
+
ORDER BY id DESC LIMIT 1`).get({ ":unit_id": input.unitId });
|
|
112
|
+
return {
|
|
113
|
+
ok: false,
|
|
114
|
+
error: "already_active",
|
|
115
|
+
existingId: existing?.id ?? 0,
|
|
116
|
+
existingStatus: existing?.status ?? "claimed",
|
|
117
|
+
existingWorker: existing?.worker_id ?? "unknown",
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
/** Transition a `claimed` dispatch into `running`. */
|
|
123
|
+
export function markRunning(dispatchId) {
|
|
124
|
+
if (!isDbAvailable())
|
|
125
|
+
return;
|
|
126
|
+
const db = _getAdapter();
|
|
127
|
+
db.prepare(`UPDATE unit_dispatches SET status = 'running'
|
|
128
|
+
WHERE id = :id AND status = 'claimed'`).run({ ":id": dispatchId });
|
|
129
|
+
}
|
|
130
|
+
/** Transition a dispatch into `completed`. */
|
|
131
|
+
export function markCompleted(dispatchId, opts) {
|
|
132
|
+
if (!isDbAvailable())
|
|
133
|
+
return;
|
|
134
|
+
const now = new Date().toISOString();
|
|
135
|
+
const db = _getAdapter();
|
|
136
|
+
let changes = 0;
|
|
137
|
+
transaction(() => {
|
|
138
|
+
const result = db.prepare(`UPDATE unit_dispatches
|
|
139
|
+
SET status = 'completed', ended_at = :ended_at,
|
|
140
|
+
exit_reason = :exit_reason,
|
|
141
|
+
verification_evidence_id = :evidence_id
|
|
142
|
+
WHERE id = :id
|
|
143
|
+
AND status IN ('claimed','running')`).run({
|
|
144
|
+
":id": dispatchId,
|
|
145
|
+
":ended_at": now,
|
|
146
|
+
":exit_reason": opts?.exitReason ?? null,
|
|
147
|
+
":evidence_id": opts?.verificationEvidenceId ?? null,
|
|
148
|
+
});
|
|
149
|
+
changes =
|
|
150
|
+
typeof result.changes === "number"
|
|
151
|
+
? result.changes
|
|
152
|
+
: 0;
|
|
153
|
+
});
|
|
154
|
+
if (changes < 1)
|
|
155
|
+
return;
|
|
156
|
+
insertAuditEvent({
|
|
157
|
+
eventId: randomUUID(),
|
|
158
|
+
traceId: dispatchId.toString(),
|
|
159
|
+
category: "orchestration",
|
|
160
|
+
type: "dispatch-completed",
|
|
161
|
+
ts: now,
|
|
162
|
+
payload: { dispatchId },
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
/** Transition a dispatch into `failed`, optionally scheduling a retry. */
|
|
166
|
+
export function markFailed(dispatchId, opts) {
|
|
167
|
+
if (!isDbAvailable())
|
|
168
|
+
return;
|
|
169
|
+
const now = new Date();
|
|
170
|
+
const nowIso = now.toISOString();
|
|
171
|
+
const nextRunIso = opts.retryAfterMs
|
|
172
|
+
? new Date(now.getTime() + opts.retryAfterMs).toISOString()
|
|
173
|
+
: null;
|
|
174
|
+
const db = _getAdapter();
|
|
175
|
+
let changes = 0;
|
|
176
|
+
transaction(() => {
|
|
177
|
+
const result = db.prepare(`UPDATE unit_dispatches
|
|
178
|
+
SET status = 'failed', ended_at = :ended_at,
|
|
179
|
+
error_summary = :error_summary,
|
|
180
|
+
last_error_code = :last_error_code,
|
|
181
|
+
last_error_at = :last_error_at,
|
|
182
|
+
retry_after_ms = :retry_after_ms,
|
|
183
|
+
next_run_at = :next_run_at
|
|
184
|
+
WHERE id = :id
|
|
185
|
+
AND status IN ('claimed','running')`).run({
|
|
186
|
+
":id": dispatchId,
|
|
187
|
+
":ended_at": nowIso,
|
|
188
|
+
":error_summary": opts.errorSummary,
|
|
189
|
+
":last_error_code": opts.errorCode ?? null,
|
|
190
|
+
":last_error_at": nowIso,
|
|
191
|
+
":retry_after_ms": opts.retryAfterMs ?? null,
|
|
192
|
+
":next_run_at": nextRunIso,
|
|
193
|
+
});
|
|
194
|
+
changes =
|
|
195
|
+
typeof result.changes === "number"
|
|
196
|
+
? result.changes
|
|
197
|
+
: 0;
|
|
198
|
+
});
|
|
199
|
+
if (changes < 1)
|
|
200
|
+
return;
|
|
201
|
+
insertAuditEvent({
|
|
202
|
+
eventId: randomUUID(),
|
|
203
|
+
traceId: dispatchId.toString(),
|
|
204
|
+
category: "orchestration",
|
|
205
|
+
type: "dispatch-failed",
|
|
206
|
+
ts: nowIso,
|
|
207
|
+
payload: { dispatchId, errorSummary: opts.errorSummary, retryAfterMs: opts.retryAfterMs ?? null },
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
/** Transition a dispatch into `stuck`. */
|
|
211
|
+
export function markStuck(dispatchId, reason) {
|
|
212
|
+
if (!isDbAvailable())
|
|
213
|
+
return;
|
|
214
|
+
const now = new Date().toISOString();
|
|
215
|
+
const db = _getAdapter();
|
|
216
|
+
const result = transaction(() => {
|
|
217
|
+
return db.prepare(`UPDATE unit_dispatches
|
|
218
|
+
SET status = 'stuck', ended_at = :ended_at, exit_reason = :reason
|
|
219
|
+
WHERE id = :id
|
|
220
|
+
AND status IN ('claimed','running')`).run({ ":id": dispatchId, ":ended_at": now, ":reason": reason });
|
|
221
|
+
});
|
|
222
|
+
const changes = typeof result.changes === "number"
|
|
223
|
+
? result.changes
|
|
224
|
+
: 0;
|
|
225
|
+
if (changes <= 0)
|
|
226
|
+
return;
|
|
227
|
+
insertAuditEvent({
|
|
228
|
+
eventId: randomUUID(),
|
|
229
|
+
traceId: dispatchId.toString(),
|
|
230
|
+
category: "orchestration",
|
|
231
|
+
type: "dispatch-stuck",
|
|
232
|
+
ts: now,
|
|
233
|
+
payload: { dispatchId, reason },
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
/** Transition a dispatch into `paused`. */
|
|
237
|
+
export function markPaused(dispatchId) {
|
|
238
|
+
if (!isDbAvailable())
|
|
239
|
+
return;
|
|
240
|
+
const now = new Date().toISOString();
|
|
241
|
+
const db = _getAdapter();
|
|
242
|
+
db.prepare(`UPDATE unit_dispatches
|
|
243
|
+
SET status = 'paused', ended_at = :ended_at
|
|
244
|
+
WHERE id = :id AND status IN ('claimed','running')`).run({ ":id": dispatchId, ":ended_at": now });
|
|
245
|
+
}
|
|
246
|
+
/** Transition a dispatch into `canceled`. */
|
|
247
|
+
export function markCanceled(dispatchId, reason) {
|
|
248
|
+
if (!isDbAvailable())
|
|
249
|
+
return;
|
|
250
|
+
const now = new Date().toISOString();
|
|
251
|
+
const db = _getAdapter();
|
|
252
|
+
db.prepare(`UPDATE unit_dispatches
|
|
253
|
+
SET status = 'canceled', ended_at = :ended_at, exit_reason = :reason
|
|
254
|
+
WHERE id = :id AND status IN ('pending','claimed','running')`).run({ ":id": dispatchId, ":ended_at": now, ":reason": reason });
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Fetch the most recent N dispatches for a unit. Used by recordDispatchClaim
|
|
258
|
+
* callers to compute attempt_n and by detect-stuck.ts (B3) to consult
|
|
259
|
+
* retry budget before tripping the stuck verdict.
|
|
260
|
+
*/
|
|
261
|
+
export function getRecentForUnit(unitId, limit = 10) {
|
|
262
|
+
if (!isDbAvailable())
|
|
263
|
+
return [];
|
|
264
|
+
const db = _getAdapter();
|
|
265
|
+
return db.prepare(`SELECT * FROM unit_dispatches WHERE unit_id = :unit_id ORDER BY id DESC LIMIT :limit`).all({ ":unit_id": unitId, ":limit": limit });
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Fetch the latest dispatch for a unit, regardless of status. Returns null
|
|
269
|
+
* if the unit has never been dispatched.
|
|
270
|
+
*/
|
|
271
|
+
export function getLatestForUnit(unitId) {
|
|
272
|
+
if (!isDbAvailable())
|
|
273
|
+
return null;
|
|
274
|
+
const db = _getAdapter();
|
|
275
|
+
const row = db.prepare(`SELECT * FROM unit_dispatches WHERE unit_id = :unit_id ORDER BY id DESC LIMIT 1`).get({ ":unit_id": unitId });
|
|
276
|
+
return row ?? null;
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Phase C — return the most recent unit_id values for a worker, oldest-first.
|
|
280
|
+
*
|
|
281
|
+
* Drop-in replacement for the persistence side of stuck-state.json's
|
|
282
|
+
* `recentUnits` field. The auto-loop uses this to seed loopState.recentUnits
|
|
283
|
+
* on session start so the stuck-detector window survives a session restart
|
|
284
|
+
* (#3704). Returned in oldest-first order to match the in-memory window
|
|
285
|
+
* shape that detect-stuck.ts expects.
|
|
286
|
+
*/
|
|
287
|
+
export function getRecentUnitKeysForWorker(workerId, limit = 20) {
|
|
288
|
+
if (!isDbAvailable())
|
|
289
|
+
return [];
|
|
290
|
+
const db = _getAdapter();
|
|
291
|
+
const rows = db.prepare(`SELECT unit_id FROM unit_dispatches
|
|
292
|
+
WHERE worker_id = :worker_id
|
|
293
|
+
ORDER BY started_at DESC, id DESC
|
|
294
|
+
LIMIT :limit`).all({ ":worker_id": workerId, ":limit": limit });
|
|
295
|
+
// Reverse so callers consume oldest-first (sliding-window semantics).
|
|
296
|
+
return rows.reverse().map((r) => ({ key: r.unit_id }));
|
|
297
|
+
}
|
|
298
|
+
export function getRecentUnitKeysForProjectRoot(projectRootRealpath, limit = 20) {
|
|
299
|
+
if (!isDbAvailable())
|
|
300
|
+
return [];
|
|
301
|
+
const db = _getAdapter();
|
|
302
|
+
const rows = db.prepare(`SELECT ud.unit_id
|
|
303
|
+
FROM unit_dispatches ud
|
|
304
|
+
INNER JOIN workers w ON w.worker_id = ud.worker_id
|
|
305
|
+
WHERE w.project_root_realpath = :project_root_realpath
|
|
306
|
+
ORDER BY ud.started_at DESC, ud.id DESC
|
|
307
|
+
LIMIT :limit`).all({
|
|
308
|
+
":project_root_realpath": projectRootRealpath,
|
|
309
|
+
":limit": limit,
|
|
310
|
+
});
|
|
311
|
+
return rows.reverse().map((r) => ({ key: r.unit_id }));
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Fetch dispatches for a milestone filtered by status. Useful for janitors
|
|
315
|
+
* + dashboards.
|
|
316
|
+
*/
|
|
317
|
+
export function getDispatchesByStatus(milestoneId, status) {
|
|
318
|
+
if (!isDbAvailable())
|
|
319
|
+
return [];
|
|
320
|
+
const db = _getAdapter();
|
|
321
|
+
return db.prepare(`SELECT * FROM unit_dispatches WHERE milestone_id = :mid AND status = :status ORDER BY id`).all({ ":mid": milestoneId, ":status": status });
|
|
322
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Auto-mode coordination is single-host
|
|
2
|
+
|
|
3
|
+
The DB-backed coordination tables introduced by Phase B (`workers`,
|
|
4
|
+
`milestone_leases`, `unit_dispatches`, `cancellation_requests`,
|
|
5
|
+
`command_queue`) and the supporting `runtime_kv` table from Phase C all
|
|
6
|
+
rely on **shared SQLite WAL on local disk**. They do not work across
|
|
7
|
+
machines.
|
|
8
|
+
|
|
9
|
+
## Why single-host only
|
|
10
|
+
|
|
11
|
+
- SQLite WAL coordination — the locking primitives that make
|
|
12
|
+
`claimMilestoneLease`, `recordDispatchClaim`, and `claimNextCommand`
|
|
13
|
+
atomic — is local-disk only. Network filesystems (NFS, SMB, S3FS) and
|
|
14
|
+
fuse mounts break the lock semantics that the WAL relies on.
|
|
15
|
+
- Heartbeat TTL (`workers.last_heartbeat_at`) compares timestamps written
|
|
16
|
+
with SQLite wall-clock time (`datetime('now')`). Across machines without
|
|
17
|
+
wall-clock synchronization (for example NTP/chrony), TTL filtering can
|
|
18
|
+
produce phantom-active or premature-crashed verdicts. Monotonic clocks
|
|
19
|
+
are not used for these comparisons.
|
|
20
|
+
- Fencing tokens (`milestone_leases.fencing_token`) are monotonically
|
|
21
|
+
ordered by SQL within a single transaction. Cross-host races could
|
|
22
|
+
produce duplicate tokens if two SQLite processes opened the same DB
|
|
23
|
+
on a network mount.
|
|
24
|
+
|
|
25
|
+
## What does work
|
|
26
|
+
|
|
27
|
+
- Multiple `gsd auto` worker processes on the **same machine**, sharing
|
|
28
|
+
the project's SQLite DB via WAL. The lease check refuses concurrent
|
|
29
|
+
claims on the same milestone; the dispatch ledger's partial unique
|
|
30
|
+
index refuses double-claims of the same unit.
|
|
31
|
+
- A single `gsd auto` worker plus arbitrary read-only consumers
|
|
32
|
+
(dashboards, doctors) on the same machine.
|
|
33
|
+
- Worktree-based parallelism on the same machine, where each worker
|
|
34
|
+
holds a different milestone lease.
|
|
35
|
+
|
|
36
|
+
## Multi-host alternatives
|
|
37
|
+
|
|
38
|
+
If you need to coordinate `gsd auto` workers across machines, you need
|
|
39
|
+
a real coordinator: Postgres for the ledger + a leader-election service
|
|
40
|
+
(etcd, Consul) for the leases. That's out of scope for these phases.
|
|
41
|
+
The schema and module shapes here would need a non-trivial backend
|
|
42
|
+
swap before they could ride on top of either.
|
|
@@ -24,6 +24,7 @@ import { resolveMilestoneIntegrationBranch } from "./git-service.js";
|
|
|
24
24
|
import { nativeIsRepo, nativeHasChanges, nativeLastCommitEpoch, nativeGetCurrentBranch, nativeAddTracked, nativeCommit } from "./native-git-bridge.js";
|
|
25
25
|
import { loadEffectiveGSDPreferences } from "./preferences.js";
|
|
26
26
|
import { runEnvironmentChecks } from "./doctor-environment.js";
|
|
27
|
+
import { ensureDbOpen } from "./bootstrap/dynamic-tools.js";
|
|
27
28
|
/** In-memory health history for the current auto-mode session. */
|
|
28
29
|
let healthHistory = [];
|
|
29
30
|
/** Count of consecutive units with unresolved errors. */
|
|
@@ -159,6 +160,9 @@ export async function preDispatchHealthGate(basePath) {
|
|
|
159
160
|
// If a stale lock exists, the crash recovery path should handle it,
|
|
160
161
|
// not a new dispatch. This prevents double-dispatch after crashes.
|
|
161
162
|
try {
|
|
163
|
+
if (existsSync(join(gsdRoot(basePath), "gsd.db"))) {
|
|
164
|
+
await ensureDbOpen(basePath);
|
|
165
|
+
}
|
|
162
166
|
const lock = readCrashLock(basePath);
|
|
163
167
|
if (lock && !isLockProcessAlive(lock)) {
|
|
164
168
|
// Auto-clear it since we're about to dispatch anyway
|
|
@@ -6,6 +6,8 @@ import { deriveState, isGhostMilestone, isReusableGhostMilestone } from "./state
|
|
|
6
6
|
import { saveFile } from "./files.js";
|
|
7
7
|
import { nativeIsRepo, nativeForEachRef, nativeUpdateRef } from "./native-git-bridge.js";
|
|
8
8
|
import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js";
|
|
9
|
+
import { getActiveAutoWorkers } from "./db/auto-workers.js";
|
|
10
|
+
import { normalizeRealPath } from "./paths.js";
|
|
9
11
|
import { ensureGitignore, isGsdGitignored } from "./gitignore.js";
|
|
10
12
|
import { readAllSessionStatuses, isSessionStale, removeSessionStatus } from "./session-status-io.js";
|
|
11
13
|
import { recoverFailedMigration } from "./migrate-external.js";
|
|
@@ -26,6 +28,9 @@ function hasAssessmentVerdict(basePath, mid, sid) {
|
|
|
26
28
|
export async function checkRuntimeHealth(basePath, issues, fixesApplied, shouldFix) {
|
|
27
29
|
const root = gsdRoot(basePath);
|
|
28
30
|
// ── Stale crash lock ──────────────────────────────────────────────────
|
|
31
|
+
// Phase C pt 2: the lock state lives in the workers + unit_dispatches
|
|
32
|
+
// tables now, not auto.lock. readCrashLock synthesizes a LockData from
|
|
33
|
+
// the DB; isLockProcessAlive is a pure OS PID check.
|
|
29
34
|
try {
|
|
30
35
|
const lock = readCrashLock(basePath);
|
|
31
36
|
if (lock) {
|
|
@@ -36,13 +41,13 @@ export async function checkRuntimeHealth(basePath, issues, fixesApplied, shouldF
|
|
|
36
41
|
code: "stale_crash_lock",
|
|
37
42
|
scope: "project",
|
|
38
43
|
unitId: "project",
|
|
39
|
-
message: `Stale auto
|
|
40
|
-
file: "
|
|
44
|
+
message: `Stale auto-mode worker (PID ${lock.pid}, started ${lock.startedAt}, was executing ${lock.unitType} ${lock.unitId}) — process is no longer running`,
|
|
45
|
+
file: "<workers table>",
|
|
41
46
|
fixable: true,
|
|
42
47
|
});
|
|
43
48
|
if (shouldFix("stale_crash_lock")) {
|
|
44
49
|
clearLock(basePath);
|
|
45
|
-
fixesApplied.push("cleared stale auto
|
|
50
|
+
fixesApplied.push("cleared stale auto-mode worker state");
|
|
46
51
|
}
|
|
47
52
|
}
|
|
48
53
|
}
|
|
@@ -60,9 +65,20 @@ export async function checkRuntimeHealth(basePath, issues, fixesApplied, shouldF
|
|
|
60
65
|
if (existsSync(lockDir)) {
|
|
61
66
|
const statRes = statSync(lockDir);
|
|
62
67
|
if (statRes.isDirectory()) {
|
|
63
|
-
//
|
|
64
|
-
|
|
65
|
-
|
|
68
|
+
// Phase C pt 2: "any live process holds the lock?" check now means
|
|
69
|
+
// "is any worker registered with status='active' AND a fresh
|
|
70
|
+
// heartbeat for this project?" — readCrashLock returns null for
|
|
71
|
+
// healthy live workers (it surfaces stale ones only), so we must
|
|
72
|
+
// consult getActiveAutoWorkers directly.
|
|
73
|
+
const projectRoot = normalizeRealPath(basePath);
|
|
74
|
+
const activeWorkers = getActiveAutoWorkers().filter((w) => w.project_root_realpath === projectRoot && isLockProcessAlive({
|
|
75
|
+
pid: w.pid,
|
|
76
|
+
startedAt: w.started_at,
|
|
77
|
+
unitType: "starting",
|
|
78
|
+
unitId: "bootstrap",
|
|
79
|
+
unitStartedAt: w.started_at,
|
|
80
|
+
}));
|
|
81
|
+
const lockHolderAlive = activeWorkers.length > 0;
|
|
66
82
|
if (!lockHolderAlive) {
|
|
67
83
|
issues.push({
|
|
68
84
|
severity: "error",
|
|
@@ -2,8 +2,8 @@ import { existsSync, mkdirSync, lstatSync, readdirSync, readFileSync } from "nod
|
|
|
2
2
|
import { join } from "node:path";
|
|
3
3
|
import { loadFile, parseSummary, saveFile, parseTaskPlanMustHaves, countMustHavesMentionedInSummary } from "./files.js";
|
|
4
4
|
import { parseRoadmap as parseLegacyRoadmap, parsePlan as parseLegacyPlan } from "./parsers-legacy.js";
|
|
5
|
-
import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
|
|
6
|
-
import { resolveMilestoneFile, resolveMilestonePath, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTasksDir, milestonesDir, gsdRoot, relMilestoneFile, relSliceFile, relTaskFile, relSlicePath, relGsdRootFile, resolveGsdRootFile, relMilestonePath } from "./paths.js";
|
|
5
|
+
import { isDbAvailable, openDatabase, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
|
|
6
|
+
import { resolveMilestoneFile, resolveMilestonePath, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTasksDir, milestonesDir, gsdRoot, relMilestoneFile, relSliceFile, relTaskFile, relSlicePath, relGsdRootFile, resolveGsdRootFile, relMilestonePath, resolveGsdPathContract } from "./paths.js";
|
|
7
7
|
import { deriveState, isMilestoneComplete } from "./state.js";
|
|
8
8
|
import { invalidateAllCaches } from "./cache.js";
|
|
9
9
|
import { loadEffectiveGSDPreferences } from "./preferences.js";
|
|
@@ -309,6 +309,16 @@ export async function runGSDDoctor(basePath, options) {
|
|
|
309
309
|
const fix = options?.fix === true;
|
|
310
310
|
const dryRun = options?.dryRun === true;
|
|
311
311
|
const fixLevel = options?.fixLevel ?? "all";
|
|
312
|
+
// CLI doctor can run before any tool handler has opened the DB. Runtime
|
|
313
|
+
// health checks need the existing project DB to surface DB-backed crash
|
|
314
|
+
// locks, paused sessions, and coordination rows.
|
|
315
|
+
const dbPath = resolveGsdPathContract(basePath).projectDb;
|
|
316
|
+
if (existsSync(dbPath)) {
|
|
317
|
+
try {
|
|
318
|
+
openDatabase(dbPath);
|
|
319
|
+
}
|
|
320
|
+
catch { /* surfaced later as db_unavailable */ }
|
|
321
|
+
}
|
|
312
322
|
// Issue codes that represent completion state transitions — creating summary
|
|
313
323
|
// stubs, marking slices/milestones done in the roadmap. These belong to the
|
|
314
324
|
// dispatch lifecycle (complete-slice, complete-milestone units), not to
|