gsd-pi 2.78.1-dev.e9d88a536 → 2.78.1-dev.eccf86e27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -7
- package/dist/help-text.js +1 -1
- package/dist/resource-loader.js +6 -1
- package/dist/resources/.managed-resources-content-hash +1 -1
- package/dist/resources/extensions/gsd/auto/detect-stuck.js +41 -5
- package/dist/resources/extensions/gsd/auto/loop.js +235 -36
- package/dist/resources/extensions/gsd/auto/phases.js +14 -7
- package/dist/resources/extensions/gsd/auto/session.js +36 -0
- package/dist/resources/extensions/gsd/auto-dispatch.js +49 -4
- package/dist/resources/extensions/gsd/auto-post-unit.js +26 -12
- package/dist/resources/extensions/gsd/auto-worktree.js +185 -201
- package/dist/resources/extensions/gsd/auto.js +139 -49
- package/dist/resources/extensions/gsd/bootstrap/agent-end-recovery.js +1 -1
- package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +26 -20
- package/dist/resources/extensions/gsd/bootstrap/write-gate.js +67 -55
- package/dist/resources/extensions/gsd/crash-recovery.js +160 -47
- package/dist/resources/extensions/gsd/db/auto-workers.js +227 -0
- package/dist/resources/extensions/gsd/db/command-queue.js +105 -0
- package/dist/resources/extensions/gsd/db/milestone-leases.js +210 -0
- package/dist/resources/extensions/gsd/db/runtime-kv.js +91 -0
- package/dist/resources/extensions/gsd/db/unit-dispatches.js +322 -0
- package/dist/resources/extensions/gsd/db-writer.js +96 -16
- package/dist/resources/extensions/gsd/delegation-policy.js +155 -0
- package/dist/resources/extensions/gsd/docs/COORDINATION.md +42 -0
- package/dist/resources/extensions/gsd/doctor-proactive.js +4 -0
- package/dist/resources/extensions/gsd/doctor-runtime-checks.js +22 -6
- package/dist/resources/extensions/gsd/doctor.js +12 -2
- package/dist/resources/extensions/gsd/gsd-db.js +355 -3
- package/dist/resources/extensions/gsd/guided-flow-queue.js +1 -1
- package/dist/resources/extensions/gsd/guided-flow.js +116 -26
- package/dist/resources/extensions/gsd/interrupted-session.js +18 -15
- package/dist/resources/extensions/gsd/metrics.js +287 -1
- package/dist/resources/extensions/gsd/paths.js +79 -8
- package/dist/resources/extensions/gsd/prompts/complete-slice.md +4 -4
- package/dist/resources/extensions/gsd/prompts/execute-task.md +3 -3
- package/dist/resources/extensions/gsd/prompts/guided-discuss-milestone.md +8 -1
- package/dist/resources/extensions/gsd/prompts/guided-discuss-project.md +22 -7
- package/dist/resources/extensions/gsd/prompts/guided-discuss-requirements.md +6 -2
- package/dist/resources/extensions/gsd/prompts/guided-discuss-slice.md +8 -1
- package/dist/resources/extensions/gsd/state.js +21 -6
- package/dist/resources/extensions/gsd/templates/project.md +10 -0
- package/dist/resources/extensions/gsd/workflow-mcp.js +2 -2
- package/dist/resources/extensions/gsd/workspace.js +59 -0
- package/dist/resources/extensions/gsd/worktree-resolver.js +79 -2
- package/dist/resources/extensions/gsd/write-intercept.js +3 -3
- package/dist/tsconfig.extensions.tsbuildinfo +1 -1
- package/dist/web/standalone/.next/BUILD_ID +1 -1
- package/dist/web/standalone/.next/app-path-routes-manifest.json +14 -14
- package/dist/web/standalone/.next/build-manifest.json +2 -2
- package/dist/web/standalone/.next/prerender-manifest.json +3 -3
- package/dist/web/standalone/.next/required-server-files.json +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.html +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.html +1 -1
- package/dist/web/standalone/.next/server/app/index.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app-paths-manifest.json +14 -14
- package/dist/web/standalone/.next/server/middleware-build-manifest.js +1 -1
- package/dist/web/standalone/.next/server/pages/404.html +1 -1
- package/dist/web/standalone/.next/server/pages/500.html +1 -1
- package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
- package/dist/web/standalone/server.js +1 -1
- package/package.json +1 -1
- package/packages/mcp-server/README.md +2 -11
- package/packages/mcp-server/dist/remote-questions.d.ts +27 -0
- package/packages/mcp-server/dist/remote-questions.d.ts.map +1 -1
- package/packages/mcp-server/dist/remote-questions.js +28 -0
- package/packages/mcp-server/dist/remote-questions.js.map +1 -1
- package/packages/mcp-server/dist/server.d.ts +28 -0
- package/packages/mcp-server/dist/server.d.ts.map +1 -1
- package/packages/mcp-server/dist/server.js +94 -4
- package/packages/mcp-server/dist/server.js.map +1 -1
- package/packages/mcp-server/dist/workflow-tools.js.map +1 -1
- package/packages/mcp-server/src/mcp-server.test.ts +226 -0
- package/packages/mcp-server/src/remote-questions.test.ts +103 -0
- package/packages/mcp-server/src/remote-questions.ts +35 -0
- package/packages/mcp-server/src/server.ts +129 -6
- package/packages/mcp-server/src/workflow-tools.ts +1 -1
- package/packages/mcp-server/tsconfig.tsbuildinfo +1 -1
- package/src/resources/extensions/gsd/auto/detect-stuck.ts +37 -5
- package/src/resources/extensions/gsd/auto/loop.ts +263 -41
- package/src/resources/extensions/gsd/auto/phases.ts +15 -7
- package/src/resources/extensions/gsd/auto/session.ts +40 -0
- package/src/resources/extensions/gsd/auto-dispatch.ts +63 -4
- package/src/resources/extensions/gsd/auto-post-unit.ts +27 -12
- package/src/resources/extensions/gsd/auto-worktree.ts +218 -225
- package/src/resources/extensions/gsd/auto.ts +166 -43
- package/src/resources/extensions/gsd/bootstrap/agent-end-recovery.ts +1 -1
- package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +26 -21
- package/src/resources/extensions/gsd/bootstrap/tests/write-gate-basepath.test.ts +103 -0
- package/src/resources/extensions/gsd/bootstrap/write-gate.ts +80 -55
- package/src/resources/extensions/gsd/crash-recovery.ts +177 -43
- package/src/resources/extensions/gsd/db/auto-workers.ts +273 -0
- package/src/resources/extensions/gsd/db/command-queue.ts +149 -0
- package/src/resources/extensions/gsd/db/milestone-leases.ts +274 -0
- package/src/resources/extensions/gsd/db/runtime-kv.ts +127 -0
- package/src/resources/extensions/gsd/db/unit-dispatches.ts +446 -0
- package/src/resources/extensions/gsd/db-writer.ts +113 -17
- package/src/resources/extensions/gsd/delegation-policy.ts +197 -0
- package/src/resources/extensions/gsd/docs/COORDINATION.md +42 -0
- package/src/resources/extensions/gsd/doctor-proactive.ts +4 -0
- package/src/resources/extensions/gsd/doctor-runtime-checks.ts +24 -6
- package/src/resources/extensions/gsd/doctor.ts +10 -2
- package/src/resources/extensions/gsd/gsd-db.ts +354 -3
- package/src/resources/extensions/gsd/guided-flow-queue.ts +1 -1
- package/src/resources/extensions/gsd/guided-flow.ts +152 -26
- package/src/resources/extensions/gsd/interrupted-session.ts +19 -12
- package/src/resources/extensions/gsd/metrics.ts +321 -1
- package/src/resources/extensions/gsd/paths.ts +67 -8
- package/src/resources/extensions/gsd/prompts/complete-slice.md +4 -4
- package/src/resources/extensions/gsd/prompts/execute-task.md +3 -3
- package/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md +8 -1
- package/src/resources/extensions/gsd/prompts/guided-discuss-project.md +22 -7
- package/src/resources/extensions/gsd/prompts/guided-discuss-requirements.md +6 -2
- package/src/resources/extensions/gsd/prompts/guided-discuss-slice.md +8 -1
- package/src/resources/extensions/gsd/state.ts +44 -6
- package/src/resources/extensions/gsd/templates/project.md +10 -0
- package/src/resources/extensions/gsd/tests/auto-discuss-milestone-deadlock-4973.test.ts +14 -14
- package/src/resources/extensions/gsd/tests/auto-loop-no-copy-artifacts.test.ts +72 -0
- package/src/resources/extensions/gsd/tests/auto-loop-symlink-worktree.test.ts +190 -0
- package/src/resources/extensions/gsd/tests/auto-session-scope.test.ts +331 -0
- package/src/resources/extensions/gsd/tests/auto-workers.test.ts +105 -0
- package/src/resources/extensions/gsd/tests/auto-worktree-registry.test.ts +176 -0
- package/src/resources/extensions/gsd/tests/command-queue.test.ts +141 -0
- package/src/resources/extensions/gsd/tests/crash-recovery-via-db.test.ts +203 -0
- package/src/resources/extensions/gsd/tests/crash-recovery.test.ts +169 -59
- package/src/resources/extensions/gsd/tests/db-writer-path-containment.test.ts +152 -0
- package/src/resources/extensions/gsd/tests/db-writer-root-artifact.test.ts +221 -0
- package/src/resources/extensions/gsd/tests/db-writer-scope.test.ts +230 -0
- package/src/resources/extensions/gsd/tests/delegation-policy.test.ts +151 -0
- package/src/resources/extensions/gsd/tests/detect-stuck-respects-retry.test.ts +173 -0
- package/src/resources/extensions/gsd/tests/dispatch-backgroundable-annotation.test.ts +55 -0
- package/src/resources/extensions/gsd/tests/draft-promotion.test.ts +3 -23
- package/src/resources/extensions/gsd/tests/gate-1b-orphan-discrimination.test.ts +193 -0
- package/src/resources/extensions/gsd/tests/gate-1b-recovery-bound-corrections.test.ts +246 -0
- package/src/resources/extensions/gsd/tests/gate-1b-recovery-bound.test.ts +218 -0
- package/src/resources/extensions/gsd/tests/gsd-db-failed-open-restore.test.ts +117 -0
- package/src/resources/extensions/gsd/tests/gsd-db-workspace-scope.test.ts +226 -0
- package/src/resources/extensions/gsd/tests/gsd-root-canonical.test.ts +66 -0
- package/src/resources/extensions/gsd/tests/gsd-root-home-guard.test.ts +68 -5
- package/src/resources/extensions/gsd/tests/guided-flow-prompt-consolidation.test.ts +4 -4
- package/src/resources/extensions/gsd/tests/integration/auto-worktree.test.ts +22 -12
- package/src/resources/extensions/gsd/tests/integration/doctor-proactive.test.ts +24 -10
- package/src/resources/extensions/gsd/tests/integration/doctor-runtime.test.ts +35 -23
- package/src/resources/extensions/gsd/tests/integration/workspace-collapse-integration.test.ts +369 -0
- package/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts +72 -25
- package/src/resources/extensions/gsd/tests/interrupted-session-ui.test.ts +72 -25
- package/src/resources/extensions/gsd/tests/memory-pressure-stuck-state.test.ts +9 -6
- package/src/resources/extensions/gsd/tests/metrics-atomic-merge.test.ts +222 -0
- package/src/resources/extensions/gsd/tests/metrics-lock-hardening.test.ts +400 -0
- package/src/resources/extensions/gsd/tests/metrics-lock-not-acquired.test.ts +141 -0
- package/src/resources/extensions/gsd/tests/metrics-lock-retry-sleep.test.ts +287 -0
- package/src/resources/extensions/gsd/tests/metrics-prune-cache-invalidation.test.ts +149 -0
- package/src/resources/extensions/gsd/tests/metrics-scope.test.ts +378 -0
- package/src/resources/extensions/gsd/tests/milestone-leases.test.ts +152 -0
- package/src/resources/extensions/gsd/tests/originalbase-path-comparison.test.ts +329 -0
- package/src/resources/extensions/gsd/tests/parallel-milestone-isolation.test.ts +106 -0
- package/src/resources/extensions/gsd/tests/path-cache-decoupled.test.ts +209 -0
- package/src/resources/extensions/gsd/tests/path-normalization-unified.test.ts +175 -0
- package/src/resources/extensions/gsd/tests/paths-cache.test.ts +170 -0
- package/src/resources/extensions/gsd/tests/paused-session-via-db.test.ts +119 -0
- package/src/resources/extensions/gsd/tests/pending-autostart-scope.test.ts +120 -0
- package/src/resources/extensions/gsd/tests/pipeline-variant-dispatch.test.ts +58 -0
- package/src/resources/extensions/gsd/tests/preferences-worktree-sync.test.ts +3 -17
- package/src/resources/extensions/gsd/tests/prompt-contracts.test.ts +150 -7
- package/src/resources/extensions/gsd/tests/register-hooks-depth-verification.test.ts +138 -16
- package/src/resources/extensions/gsd/tests/resume-missing-worktree-warning.test.ts +209 -0
- package/src/resources/extensions/gsd/tests/runtime-kv.test.ts +120 -0
- package/src/resources/extensions/gsd/tests/skipped-validation-completion.test.ts +133 -28
- package/src/resources/extensions/gsd/tests/skipped-validation-db-atomicity.test.ts +17 -0
- package/src/resources/extensions/gsd/tests/stuck-state-via-db.test.ts +134 -0
- package/src/resources/extensions/gsd/tests/sync-layer-scope.test.ts +434 -0
- package/src/resources/extensions/gsd/tests/teardown-chdir-failure-clears-registry.test.ts +162 -0
- package/src/resources/extensions/gsd/tests/teardown-cleanup-parity.test.ts +98 -0
- package/src/resources/extensions/gsd/tests/teardown-failure-clears-registry.test.ts +186 -0
- package/src/resources/extensions/gsd/tests/tool-invocation-error-loop-break.test.ts +1 -1
- package/src/resources/extensions/gsd/tests/unit-dispatches.test.ts +247 -0
- package/src/resources/extensions/gsd/tests/validate-milestone.test.ts +41 -1
- package/src/resources/extensions/gsd/tests/validator-scope-parity.test.ts +239 -0
- package/src/resources/extensions/gsd/tests/workflow-mcp.test.ts +2 -2
- package/src/resources/extensions/gsd/tests/workflow-tool-executors.test.ts +9 -15
- package/src/resources/extensions/gsd/tests/workspace.test.ts +196 -0
- package/src/resources/extensions/gsd/tests/write-gate-predicates.test.ts +35 -35
- package/src/resources/extensions/gsd/tests/write-gate.test.ts +94 -71
- package/src/resources/extensions/gsd/tests/write-intercept.test.ts +1 -1
- package/src/resources/extensions/gsd/workflow-mcp.ts +2 -2
- package/src/resources/extensions/gsd/workspace.ts +95 -0
- package/src/resources/extensions/gsd/worktree-resolver.ts +78 -2
- package/src/resources/extensions/gsd/write-intercept.ts +3 -3
- package/src/resources/extensions/gsd/tests/auto-lock-creation.test.ts +0 -213
- package/src/resources/extensions/gsd/tests/auto-stale-lock-self-kill.test.ts +0 -87
- package/src/resources/extensions/gsd/tests/stop-auto-remote.test.ts +0 -159
- /package/dist/web/standalone/.next/static/{oZGTPvJBQX_IDKKnuV8Bt → Y5UeGFkXTYM9WIQOWHkot}/_buildManifest.js +0 -0
- /package/dist/web/standalone/.next/static/{oZGTPvJBQX_IDKKnuV8Bt → Y5UeGFkXTYM9WIQOWHkot}/_ssgManifest.js +0 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
// gsd-2 + Auto-mode worker process registry (DB-backed coordination, Phase B)
|
|
2
|
+
//
|
|
3
|
+
// IMPORTANT — naming clarification (codex review LOW N1):
|
|
4
|
+
// This module is the AUTO-MODE PROCESS REGISTRY. It tracks long-running
|
|
5
|
+
// `gsd auto` worker processes for cross-process coordination via the shared
|
|
6
|
+
// SQLite WAL. It is NOT the in-process subagent registry, which lives at
|
|
7
|
+
// `src/resources/extensions/subagent/worker-registry.ts` and tracks dispatched
|
|
8
|
+
// subagent threads within a single process.
|
|
9
|
+
//
|
|
10
|
+
// Both modules use the word "worker" but they are unrelated:
|
|
11
|
+
// - subagent/worker-registry.ts → ephemeral in-process subagent threads
|
|
12
|
+
// - db/auto-workers.ts → durable cross-process auto-mode sessions
|
|
13
|
+
//
|
|
14
|
+
// Single-host invariant: SQLite WAL coordination only works on local disk.
|
|
15
|
+
// NFS / network filesystems break heartbeat semantics. Multi-host execution
|
|
16
|
+
// needs a real coordinator (etcd, Postgres) — out of scope for Phase B.
|
|
17
|
+
|
|
18
|
+
import { randomUUID } from "node:crypto";
|
|
19
|
+
import { hostname } from "node:os";
|
|
20
|
+
|
|
21
|
+
import {
|
|
22
|
+
_getAdapter,
|
|
23
|
+
isDbAvailable,
|
|
24
|
+
transaction,
|
|
25
|
+
insertAuditEvent,
|
|
26
|
+
} from "../gsd-db.js";
|
|
27
|
+
import { normalizeRealPath } from "../paths.js";
|
|
28
|
+
|
|
29
|
+
const HEARTBEAT_TTL_SECONDS = 60;
|
|
30
|
+
// Version label is for diagnostics only — embedded in audit_events and
|
|
31
|
+
// workers.version. Bumping this manually on protocol changes is fine; we
|
|
32
|
+
// don't pull it from package.json to avoid module-load filesystem I/O.
|
|
33
|
+
const WORKER_REGISTRY_VERSION = "1";
|
|
34
|
+
|
|
35
|
+
export type WorkerStatus = "active" | "stopping" | "crashed";
|
|
36
|
+
|
|
37
|
+
export interface AutoWorkerRow {
|
|
38
|
+
worker_id: string;
|
|
39
|
+
host: string;
|
|
40
|
+
pid: number;
|
|
41
|
+
started_at: string;
|
|
42
|
+
version: string;
|
|
43
|
+
last_heartbeat_at: string;
|
|
44
|
+
status: WorkerStatus;
|
|
45
|
+
project_root_realpath: string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Register a new auto-mode worker process. Returns the generated worker_id
|
|
50
|
+
* for the session to store on its AutoSession.
|
|
51
|
+
*
|
|
52
|
+
* The worker is created with `status='active'` and an initial heartbeat
|
|
53
|
+
* stamp; callers must invoke heartbeatAutoWorker() periodically (e.g. once
|
|
54
|
+
* per loop iteration) to refresh the TTL.
|
|
55
|
+
*/
|
|
56
|
+
export function registerAutoWorker(opts: {
|
|
57
|
+
projectRootRealpath: string;
|
|
58
|
+
}): string {
|
|
59
|
+
if (!isDbAvailable()) {
|
|
60
|
+
throw new Error("registerAutoWorker: DB unavailable");
|
|
61
|
+
}
|
|
62
|
+
const workerId = `auto-${hostname()}-${process.pid}-${randomUUID().slice(0, 8)}`;
|
|
63
|
+
const now = new Date().toISOString();
|
|
64
|
+
|
|
65
|
+
transaction(() => {
|
|
66
|
+
const db = _getAdapter()!;
|
|
67
|
+
db.prepare(
|
|
68
|
+
`INSERT INTO workers (
|
|
69
|
+
worker_id, host, pid, started_at, version,
|
|
70
|
+
last_heartbeat_at, status, project_root_realpath
|
|
71
|
+
) VALUES (
|
|
72
|
+
:worker_id, :host, :pid, :started_at, :version,
|
|
73
|
+
:last_heartbeat_at, 'active', :project_root_realpath
|
|
74
|
+
)`,
|
|
75
|
+
).run({
|
|
76
|
+
":worker_id": workerId,
|
|
77
|
+
":host": hostname(),
|
|
78
|
+
":pid": process.pid,
|
|
79
|
+
":started_at": now,
|
|
80
|
+
":version": WORKER_REGISTRY_VERSION,
|
|
81
|
+
":last_heartbeat_at": now,
|
|
82
|
+
":project_root_realpath": opts.projectRootRealpath,
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
insertAuditEvent({
|
|
87
|
+
eventId: randomUUID(),
|
|
88
|
+
traceId: workerId,
|
|
89
|
+
category: "orchestration",
|
|
90
|
+
type: "worker-registered",
|
|
91
|
+
ts: now,
|
|
92
|
+
payload: {
|
|
93
|
+
workerId,
|
|
94
|
+
host: hostname(),
|
|
95
|
+
pid: process.pid,
|
|
96
|
+
version: WORKER_REGISTRY_VERSION,
|
|
97
|
+
projectRootRealpath: opts.projectRootRealpath,
|
|
98
|
+
},
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
return workerId;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Refresh the worker's heartbeat. Call once per auto-loop iteration.
|
|
106
|
+
* Idempotent — silently no-ops if the worker no longer exists (e.g. row was
|
|
107
|
+
* cleaned up by a janitor).
|
|
108
|
+
*/
|
|
109
|
+
export function heartbeatAutoWorker(workerId: string): void {
|
|
110
|
+
if (!isDbAvailable()) return;
|
|
111
|
+
const now = new Date().toISOString();
|
|
112
|
+
const db = _getAdapter()!;
|
|
113
|
+
db.prepare(
|
|
114
|
+
`UPDATE workers SET last_heartbeat_at = :now WHERE worker_id = :worker_id AND status = 'active'`,
|
|
115
|
+
).run({ ":now": now, ":worker_id": workerId });
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Mark the worker as crashed. Used by janitors / doctor commands when a
|
|
120
|
+
* worker's heartbeat has expired beyond the TTL window.
|
|
121
|
+
*/
|
|
122
|
+
export function markWorkerCrashed(workerId: string): void {
|
|
123
|
+
if (!isDbAvailable()) return;
|
|
124
|
+
const db = _getAdapter()!;
|
|
125
|
+
let changes = 0;
|
|
126
|
+
transaction(() => {
|
|
127
|
+
const result = db.prepare(
|
|
128
|
+
`UPDATE workers SET status = 'crashed' WHERE worker_id = :worker_id AND status = 'active'`,
|
|
129
|
+
).run({ ":worker_id": workerId });
|
|
130
|
+
changes =
|
|
131
|
+
typeof (result as { changes?: unknown }).changes === "number"
|
|
132
|
+
? (result as { changes: number }).changes
|
|
133
|
+
: 0;
|
|
134
|
+
});
|
|
135
|
+
if (changes < 1) return;
|
|
136
|
+
insertAuditEvent({
|
|
137
|
+
eventId: randomUUID(),
|
|
138
|
+
traceId: workerId,
|
|
139
|
+
category: "orchestration",
|
|
140
|
+
type: "worker-crashed",
|
|
141
|
+
ts: new Date().toISOString(),
|
|
142
|
+
payload: { workerId },
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Mark the worker as stopping. Called from the stopAuto path when the user
|
|
148
|
+
* cleanly shuts down auto-mode.
|
|
149
|
+
*/
|
|
150
|
+
export function markWorkerStopping(workerId: string): void {
|
|
151
|
+
if (!isDbAvailable()) return;
|
|
152
|
+
const db = _getAdapter()!;
|
|
153
|
+
transaction(() => {
|
|
154
|
+
db.prepare(
|
|
155
|
+
`UPDATE workers SET status = 'stopping' WHERE worker_id = :worker_id`,
|
|
156
|
+
).run({ ":worker_id": workerId });
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Return all workers whose status is 'active' AND whose heartbeat is within
|
|
162
|
+
* the TTL window. Workers older than the TTL are NOT auto-marked crashed
|
|
163
|
+
* here — that's a separate janitor responsibility — but they are filtered
|
|
164
|
+
* out of the active set so callers see a fresh view.
|
|
165
|
+
*/
|
|
166
|
+
export function getActiveAutoWorkers(): readonly AutoWorkerRow[] {
|
|
167
|
+
if (!isDbAvailable()) return [];
|
|
168
|
+
const db = _getAdapter()!;
|
|
169
|
+
const cutoffMs = Date.now() - HEARTBEAT_TTL_SECONDS * 1000;
|
|
170
|
+
const cutoffIso = new Date(cutoffMs).toISOString();
|
|
171
|
+
const rows = db.prepare(
|
|
172
|
+
`SELECT worker_id, host, pid, started_at, version,
|
|
173
|
+
last_heartbeat_at, status, project_root_realpath
|
|
174
|
+
FROM workers
|
|
175
|
+
WHERE status = 'active' AND last_heartbeat_at >= :cutoff
|
|
176
|
+
ORDER BY started_at`,
|
|
177
|
+
).all({ ":cutoff": cutoffIso }) as unknown as AutoWorkerRow[];
|
|
178
|
+
return rows;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/** Return all worker rows regardless of status or TTL. */
|
|
182
|
+
export function getAllAutoWorkers(): readonly AutoWorkerRow[] {
|
|
183
|
+
if (!isDbAvailable()) return [];
|
|
184
|
+
const db = _getAdapter()!;
|
|
185
|
+
const rows = db.prepare(
|
|
186
|
+
`SELECT worker_id, host, pid, started_at, version,
|
|
187
|
+
last_heartbeat_at, status, project_root_realpath
|
|
188
|
+
FROM workers
|
|
189
|
+
ORDER BY started_at`,
|
|
190
|
+
).all() as unknown as AutoWorkerRow[];
|
|
191
|
+
return rows;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Look up a single worker row. Returns null if no row exists.
|
|
196
|
+
*/
|
|
197
|
+
export function getAutoWorker(workerId: string): AutoWorkerRow | null {
|
|
198
|
+
if (!isDbAvailable()) return null;
|
|
199
|
+
const db = _getAdapter()!;
|
|
200
|
+
const row = db.prepare(
|
|
201
|
+
`SELECT worker_id, host, pid, started_at, version,
|
|
202
|
+
last_heartbeat_at, status, project_root_realpath
|
|
203
|
+
FROM workers WHERE worker_id = :worker_id`,
|
|
204
|
+
).get({ ":worker_id": workerId }) as AutoWorkerRow | undefined;
|
|
205
|
+
return row ?? null;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/** Test/janitor helper: TTL constant exported for callers to compute expirations. */
|
|
209
|
+
export function autoWorkerHeartbeatTtlSeconds(): number {
|
|
210
|
+
return HEARTBEAT_TTL_SECONDS;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function isWorkerProcessAlive(candidate: Pick<AutoWorkerRow, "host" | "pid">): boolean {
|
|
214
|
+
const pid = candidate.pid;
|
|
215
|
+
if (!Number.isInteger(pid) || pid <= 0) return false;
|
|
216
|
+
if (candidate.host !== hostname()) return false;
|
|
217
|
+
if (pid === process.pid) return true;
|
|
218
|
+
try {
|
|
219
|
+
process.kill(pid, 0);
|
|
220
|
+
return true;
|
|
221
|
+
} catch (err) {
|
|
222
|
+
if ((err as NodeJS.ErrnoException).code === "EPERM") return true;
|
|
223
|
+
return false;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Phase C pt 2 — find the most recently active worker for a project root
|
|
229
|
+
* whose heartbeat has lapsed (the "previous crashed session" indicator).
|
|
230
|
+
*
|
|
231
|
+
* Used by crash-recovery.ts:readCrashLock to detect when a prior auto-mode
|
|
232
|
+
* session ended without cleanup. Workers are only treated as stale after
|
|
233
|
+
* their heartbeat has lapsed and the OS PID liveness check says the process
|
|
234
|
+
* is no longer alive.
|
|
235
|
+
*
|
|
236
|
+
* Returns null if no stale worker exists for this project root.
|
|
237
|
+
*/
|
|
238
|
+
export function findStaleWorkerForProject(
|
|
239
|
+
projectRootRealpath: string,
|
|
240
|
+
): AutoWorkerRow | null {
|
|
241
|
+
if (!isDbAvailable()) return null;
|
|
242
|
+
const db = _getAdapter()!;
|
|
243
|
+
const cutoffMs = Date.now() - HEARTBEAT_TTL_SECONDS * 1000;
|
|
244
|
+
const cutoffIso = new Date(cutoffMs).toISOString();
|
|
245
|
+
const row = db.prepare(
|
|
246
|
+
`SELECT worker_id, host, pid, started_at, version,
|
|
247
|
+
last_heartbeat_at, status, project_root_realpath
|
|
248
|
+
FROM workers
|
|
249
|
+
WHERE project_root_realpath = :project_root
|
|
250
|
+
AND status = 'active'
|
|
251
|
+
AND last_heartbeat_at < :cutoff
|
|
252
|
+
ORDER BY started_at DESC
|
|
253
|
+
LIMIT 1`,
|
|
254
|
+
).get({ ":project_root": projectRootRealpath, ":cutoff": cutoffIso }) as AutoWorkerRow | undefined;
|
|
255
|
+
if (row && !isWorkerProcessAlive(row)) return row;
|
|
256
|
+
|
|
257
|
+
// Older rows and external fixtures may have captured a non-realpath spelling
|
|
258
|
+
// of the same project root, e.g. /var/... vs /private/var/... on macOS.
|
|
259
|
+
const canonicalProjectRoot = normalizeRealPath(projectRootRealpath);
|
|
260
|
+
const staleRows = db.prepare(
|
|
261
|
+
`SELECT worker_id, host, pid, started_at, version,
|
|
262
|
+
last_heartbeat_at, status, project_root_realpath
|
|
263
|
+
FROM workers
|
|
264
|
+
WHERE status = 'active'
|
|
265
|
+
AND last_heartbeat_at < :cutoff
|
|
266
|
+
ORDER BY started_at DESC`,
|
|
267
|
+
).all({ ":cutoff": cutoffIso }) as unknown as AutoWorkerRow[];
|
|
268
|
+
return staleRows.find(
|
|
269
|
+
(candidate) =>
|
|
270
|
+
normalizeRealPath(candidate.project_root_realpath) === canonicalProjectRoot
|
|
271
|
+
&& !isWorkerProcessAlive(candidate),
|
|
272
|
+
) ?? null;
|
|
273
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
// gsd-2 + Worker IPC command queue (DB-backed coordination, Phase B)
|
|
2
|
+
//
|
|
3
|
+
// New infrastructure for dispatcher-to-worker IPC (cancel signals, pause
|
|
4
|
+
// requests, etc.). NOT a replacement for any existing on-disk queue and
|
|
5
|
+
// NOT related to startAutoCommandPolling() in auto.ts (which polls a
|
|
6
|
+
// remote channel like Telegram, not a local file queue).
|
|
7
|
+
//
|
|
8
|
+
// Broadcast semantics (codex review LOW B4):
|
|
9
|
+
// SQLite indexes NULLs in B-trees, so the single index
|
|
10
|
+
// idx_command_queue_pending(target_worker, claimed_at) serves both:
|
|
11
|
+
// - targeted queries: WHERE target_worker = ?
|
|
12
|
+
// - broadcast queries: WHERE target_worker IS NULL
|
|
13
|
+
// Workers should poll for both forms (their own ID + broadcasts) on each
|
|
14
|
+
// claim cycle.
|
|
15
|
+
|
|
16
|
+
import {
|
|
17
|
+
_getAdapter,
|
|
18
|
+
isDbAvailable,
|
|
19
|
+
transaction,
|
|
20
|
+
} from "../gsd-db.js";
|
|
21
|
+
|
|
22
|
+
export interface CommandQueueRow {
|
|
23
|
+
id: number;
|
|
24
|
+
target_worker: string | null;
|
|
25
|
+
command: string;
|
|
26
|
+
args_json: string;
|
|
27
|
+
enqueued_at: string;
|
|
28
|
+
claimed_at: string | null;
|
|
29
|
+
claimed_by: string | null;
|
|
30
|
+
completed_at: string | null;
|
|
31
|
+
result_json: string | null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface EnqueueInput {
|
|
35
|
+
/** null = broadcast to all workers; string = target a specific worker_id */
|
|
36
|
+
targetWorker: string | null;
|
|
37
|
+
command: string;
|
|
38
|
+
args?: Record<string, unknown>;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Enqueue a command. Returns the new row id. Broadcast commands
|
|
43
|
+
* (targetWorker=null) will be claimed by exactly one worker — the IPC
|
|
44
|
+
* model is "single delivery to whoever claims first", not pub-sub.
|
|
45
|
+
*/
|
|
46
|
+
export function enqueueCommand(input: EnqueueInput): number {
|
|
47
|
+
if (!isDbAvailable()) {
|
|
48
|
+
throw new Error("enqueueCommand: DB unavailable");
|
|
49
|
+
}
|
|
50
|
+
const now = new Date().toISOString();
|
|
51
|
+
const db = _getAdapter()!;
|
|
52
|
+
const result = transaction(() => {
|
|
53
|
+
return db.prepare(
|
|
54
|
+
`INSERT INTO command_queue (target_worker, command, args_json, enqueued_at)
|
|
55
|
+
VALUES (:target_worker, :command, :args_json, :enqueued_at)`,
|
|
56
|
+
).run({
|
|
57
|
+
":target_worker": input.targetWorker,
|
|
58
|
+
":command": input.command,
|
|
59
|
+
":args_json": JSON.stringify(input.args ?? {}),
|
|
60
|
+
":enqueued_at": now,
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
return Number((result as { lastInsertRowid?: number | bigint }).lastInsertRowid ?? 0);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Atomically claim the next pending command for the given worker. Returns
|
|
68
|
+
* the claimed row, or null if nothing to claim.
|
|
69
|
+
*
|
|
70
|
+
* Polls both targeted (target_worker = workerId) and broadcast
|
|
71
|
+
* (target_worker IS NULL) queues, oldest-first.
|
|
72
|
+
*/
|
|
73
|
+
export function claimNextCommand(workerId: string): CommandQueueRow | null {
|
|
74
|
+
if (!isDbAvailable()) return null;
|
|
75
|
+
const now = new Date().toISOString();
|
|
76
|
+
const db = _getAdapter()!;
|
|
77
|
+
|
|
78
|
+
return transaction((): CommandQueueRow | null => {
|
|
79
|
+
// Find the oldest unclaimed command targeted at this worker OR
|
|
80
|
+
// broadcast. The partial index covers both via NULL-in-B-tree.
|
|
81
|
+
const row = db.prepare(
|
|
82
|
+
`SELECT id, target_worker, command, args_json, enqueued_at,
|
|
83
|
+
claimed_at, claimed_by, completed_at, result_json
|
|
84
|
+
FROM command_queue
|
|
85
|
+
WHERE claimed_at IS NULL
|
|
86
|
+
AND completed_at IS NULL
|
|
87
|
+
AND (target_worker = :worker_id OR target_worker IS NULL)
|
|
88
|
+
ORDER BY enqueued_at ASC, id ASC
|
|
89
|
+
LIMIT 1`,
|
|
90
|
+
).get({ ":worker_id": workerId }) as CommandQueueRow | undefined;
|
|
91
|
+
|
|
92
|
+
if (!row) return null;
|
|
93
|
+
|
|
94
|
+
// Conditional UPDATE — only succeeds if still unclaimed (guards against
|
|
95
|
+
// races between two workers polling simultaneously).
|
|
96
|
+
const result = db.prepare(
|
|
97
|
+
`UPDATE command_queue
|
|
98
|
+
SET claimed_at = :now, claimed_by = :worker_id
|
|
99
|
+
WHERE id = :id AND claimed_at IS NULL AND completed_at IS NULL`,
|
|
100
|
+
).run({ ":now": now, ":worker_id": workerId, ":id": row.id });
|
|
101
|
+
|
|
102
|
+
const changes =
|
|
103
|
+
typeof (result as { changes?: unknown }).changes === "number"
|
|
104
|
+
? (result as { changes: number }).changes
|
|
105
|
+
: 0;
|
|
106
|
+
|
|
107
|
+
if (changes !== 1) return null; // lost the race
|
|
108
|
+
|
|
109
|
+
return { ...row, claimed_at: now, claimed_by: workerId };
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Mark a command complete with optional result payload. Idempotent — if
|
|
115
|
+
* the command is already completed, the second call is a no-op.
|
|
116
|
+
*/
|
|
117
|
+
export function completeCommand(
|
|
118
|
+
id: number,
|
|
119
|
+
workerId: string,
|
|
120
|
+
result?: Record<string, unknown>,
|
|
121
|
+
): void {
|
|
122
|
+
if (!isDbAvailable()) return;
|
|
123
|
+
const now = new Date().toISOString();
|
|
124
|
+
const db = _getAdapter()!;
|
|
125
|
+
db.prepare(
|
|
126
|
+
`UPDATE command_queue
|
|
127
|
+
SET completed_at = :now, result_json = :result_json
|
|
128
|
+
WHERE id = :id
|
|
129
|
+
AND claimed_by = :worker_id
|
|
130
|
+
AND completed_at IS NULL`,
|
|
131
|
+
).run({
|
|
132
|
+
":id": id,
|
|
133
|
+
":worker_id": workerId,
|
|
134
|
+
":now": now,
|
|
135
|
+
":result_json": result ? JSON.stringify(result) : null,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/** Diagnostic helper: read a single row by id. */
|
|
140
|
+
export function getCommand(id: number): CommandQueueRow | null {
|
|
141
|
+
if (!isDbAvailable()) return null;
|
|
142
|
+
const db = _getAdapter()!;
|
|
143
|
+
const row = db.prepare(
|
|
144
|
+
`SELECT id, target_worker, command, args_json, enqueued_at,
|
|
145
|
+
claimed_at, claimed_by, completed_at, result_json
|
|
146
|
+
FROM command_queue WHERE id = :id`,
|
|
147
|
+
).get({ ":id": id }) as CommandQueueRow | undefined;
|
|
148
|
+
return row ?? null;
|
|
149
|
+
}
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
// gsd-2 + Milestone leases with fencing tokens (DB-backed coordination, Phase B)
|
|
2
|
+
//
|
|
3
|
+
// One worker at a time may hold a lease on a given milestone. Leases carry a
|
|
4
|
+
// monotonic fencing token that increments on every successful takeover, so
|
|
5
|
+
// stale workers can be cheaply detected and rejected at write time
|
|
6
|
+
// (unit_dispatches.milestone_lease_token).
|
|
7
|
+
//
|
|
8
|
+
// Codex review BLOCKING B1: claim semantics must atomically handle two
|
|
9
|
+
// distinct cases inside one transaction:
|
|
10
|
+
// 1. First claim (no row exists) → INSERT with fencing_token=1
|
|
11
|
+
// 2. Takeover (row exists, expired/released) → UPDATE w/ fencing_token+1
|
|
12
|
+
// `INSERT OR ABORT` alone is wrong because the row already exists for any
|
|
13
|
+
// takeover and a plain INSERT cannot succeed.
|
|
14
|
+
|
|
15
|
+
import { randomUUID } from "node:crypto";
|
|
16
|
+
|
|
17
|
+
import {
|
|
18
|
+
_getAdapter,
|
|
19
|
+
isDbAvailable,
|
|
20
|
+
transaction,
|
|
21
|
+
insertAuditEvent,
|
|
22
|
+
} from "../gsd-db.js";
|
|
23
|
+
|
|
24
|
+
const LEASE_TTL_SECONDS = 60;
|
|
25
|
+
|
|
26
|
+
export type LeaseStatus = "held" | "released" | "expired";
|
|
27
|
+
|
|
28
|
+
export interface MilestoneLeaseRow {
|
|
29
|
+
milestone_id: string;
|
|
30
|
+
worker_id: string;
|
|
31
|
+
fencing_token: number;
|
|
32
|
+
acquired_at: string;
|
|
33
|
+
expires_at: string;
|
|
34
|
+
status: LeaseStatus;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export type ClaimResult =
|
|
38
|
+
| { ok: true; token: number; expiresAt: string }
|
|
39
|
+
| { ok: false; error: "held_by"; byWorker: string; expiresAt: string };
|
|
40
|
+
|
|
41
|
+
function isDuplicateLeaseInsertError(err: unknown): boolean {
|
|
42
|
+
const code =
|
|
43
|
+
err && typeof err === "object" && "code" in err
|
|
44
|
+
? String((err as { code?: unknown }).code ?? "")
|
|
45
|
+
: "";
|
|
46
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
47
|
+
if (/\bFOREIGN KEY\b/i.test(msg)) {
|
|
48
|
+
return false;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (code === "SQLITE_CONSTRAINT" || code === "SQLITE_CONSTRAINT_PRIMARYKEY" || code === "SQLITE_CONSTRAINT_UNIQUE") {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return /\bUNIQUE\b|\bPRIMARY KEY\b|\bconstraint failed\b/i.test(msg);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function ttlExpiry(now: Date): string {
|
|
59
|
+
return new Date(now.getTime() + LEASE_TTL_SECONDS * 1000).toISOString();
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Acquire (or take over an expired) milestone lease for the given worker.
|
|
64
|
+
*
|
|
65
|
+
* Atomicity: the entire claim runs inside a single transaction so the
|
|
66
|
+
* INSERT-vs-UPDATE branch decision can never tear under concurrent claims.
|
|
67
|
+
* Fencing token is computed by SQL (`fencing_token + 1`), never supplied
|
|
68
|
+
* by the client. Initial value is 1.
|
|
69
|
+
*
|
|
70
|
+
* datetime('now') uses local wall-clock time, so this remains single-host
|
|
71
|
+
* SQLite WAL coordination only. Cross-host coordination would need a real
|
|
72
|
+
* coordinator; out of scope for Phase B.
|
|
73
|
+
*/
|
|
74
|
+
export function claimMilestoneLease(
|
|
75
|
+
workerId: string,
|
|
76
|
+
milestoneId: string,
|
|
77
|
+
): ClaimResult {
|
|
78
|
+
if (!isDbAvailable()) {
|
|
79
|
+
throw new Error("claimMilestoneLease: DB unavailable");
|
|
80
|
+
}
|
|
81
|
+
const now = new Date();
|
|
82
|
+
const nowIso = now.toISOString();
|
|
83
|
+
const expiresIso = ttlExpiry(now);
|
|
84
|
+
|
|
85
|
+
return transaction((): ClaimResult => {
|
|
86
|
+
const db = _getAdapter()!;
|
|
87
|
+
|
|
88
|
+
// Step 1: try a fresh INSERT. If it fails because the row already
|
|
89
|
+
// exists, fall through to the takeover branch below.
|
|
90
|
+
let inserted = false;
|
|
91
|
+
try {
|
|
92
|
+
db.prepare(
|
|
93
|
+
`INSERT INTO milestone_leases (
|
|
94
|
+
milestone_id, worker_id, fencing_token,
|
|
95
|
+
acquired_at, expires_at, status
|
|
96
|
+
) VALUES (
|
|
97
|
+
:milestone_id, :worker_id, 1,
|
|
98
|
+
:acquired_at, :expires_at, 'held'
|
|
99
|
+
)`,
|
|
100
|
+
).run({
|
|
101
|
+
":milestone_id": milestoneId,
|
|
102
|
+
":worker_id": workerId,
|
|
103
|
+
":acquired_at": nowIso,
|
|
104
|
+
":expires_at": expiresIso,
|
|
105
|
+
});
|
|
106
|
+
inserted = true;
|
|
107
|
+
} catch (err) {
|
|
108
|
+
// SQLite raises a constraint error on duplicate PK — catch and fall
|
|
109
|
+
// through to UPDATE. Any other error is a bug; rethrow.
|
|
110
|
+
if (!isDuplicateLeaseInsertError(err)) throw err;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (inserted) {
|
|
114
|
+
insertAuditEvent({
|
|
115
|
+
eventId: randomUUID(),
|
|
116
|
+
traceId: workerId,
|
|
117
|
+
category: "orchestration",
|
|
118
|
+
type: "lease-acquired",
|
|
119
|
+
ts: nowIso,
|
|
120
|
+
payload: { workerId, milestoneId, token: 1, mode: "fresh" },
|
|
121
|
+
});
|
|
122
|
+
return { ok: true, token: 1, expiresAt: expiresIso };
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Step 2: takeover. Conditional UPDATE — only succeeds if the existing
|
|
126
|
+
// lease is expired or explicitly released. Fencing token is incremented
|
|
127
|
+
// by SQL (`fencing_token + 1`) so the new holder's token monotonically
|
|
128
|
+
// exceeds the prior holder's. db.changes() === 1 confirms the takeover
|
|
129
|
+
// actually happened (vs. losing the race to another worker).
|
|
130
|
+
const updateResult = db.prepare(
|
|
131
|
+
`UPDATE milestone_leases
|
|
132
|
+
SET worker_id = :worker_id,
|
|
133
|
+
fencing_token = fencing_token + 1,
|
|
134
|
+
acquired_at = :acquired_at,
|
|
135
|
+
expires_at = :expires_at,
|
|
136
|
+
status = 'held'
|
|
137
|
+
WHERE milestone_id = :milestone_id
|
|
138
|
+
AND (status IN ('expired','released')
|
|
139
|
+
OR datetime(expires_at) < datetime('now'))`,
|
|
140
|
+
).run({
|
|
141
|
+
":milestone_id": milestoneId,
|
|
142
|
+
":worker_id": workerId,
|
|
143
|
+
":acquired_at": nowIso,
|
|
144
|
+
":expires_at": expiresIso,
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
const changes =
|
|
148
|
+
typeof (updateResult as { changes?: unknown }).changes === "number"
|
|
149
|
+
? (updateResult as { changes: number }).changes
|
|
150
|
+
: 0;
|
|
151
|
+
|
|
152
|
+
if (changes === 1) {
|
|
153
|
+
// Read back to obtain the new token value.
|
|
154
|
+
const row = db.prepare(
|
|
155
|
+
`SELECT worker_id, fencing_token, expires_at FROM milestone_leases WHERE milestone_id = :milestone_id`,
|
|
156
|
+
).get({ ":milestone_id": milestoneId }) as Pick<MilestoneLeaseRow, "worker_id" | "fencing_token" | "expires_at"> | undefined;
|
|
157
|
+
const token = row?.fencing_token ?? 1;
|
|
158
|
+
insertAuditEvent({
|
|
159
|
+
eventId: randomUUID(),
|
|
160
|
+
traceId: workerId,
|
|
161
|
+
category: "orchestration",
|
|
162
|
+
type: "lease-acquired",
|
|
163
|
+
ts: nowIso,
|
|
164
|
+
payload: { workerId, milestoneId, token, mode: "takeover" },
|
|
165
|
+
});
|
|
166
|
+
return { ok: true, token, expiresAt: expiresIso };
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Lease still held by someone else — read current holder for the error.
|
|
170
|
+
const holder = db.prepare(
|
|
171
|
+
`SELECT worker_id, expires_at FROM milestone_leases WHERE milestone_id = :milestone_id`,
|
|
172
|
+
).get({ ":milestone_id": milestoneId }) as { worker_id: string; expires_at: string } | undefined;
|
|
173
|
+
|
|
174
|
+
return {
|
|
175
|
+
ok: false,
|
|
176
|
+
error: "held_by",
|
|
177
|
+
byWorker: holder?.worker_id ?? "unknown",
|
|
178
|
+
expiresAt: holder?.expires_at ?? "",
|
|
179
|
+
};
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Refresh the lease's expires_at when the worker heartbeats. Idempotent —
|
|
185
|
+
* silently no-ops if the lease was already taken over or released.
|
|
186
|
+
*/
|
|
187
|
+
export function refreshMilestoneLease(
|
|
188
|
+
workerId: string,
|
|
189
|
+
milestoneId: string,
|
|
190
|
+
fencingToken: number,
|
|
191
|
+
): boolean {
|
|
192
|
+
if (!isDbAvailable()) return false;
|
|
193
|
+
const now = new Date();
|
|
194
|
+
const expiresIso = ttlExpiry(now);
|
|
195
|
+
const db = _getAdapter()!;
|
|
196
|
+
const result = db.prepare(
|
|
197
|
+
`UPDATE milestone_leases
|
|
198
|
+
SET expires_at = :expires_at
|
|
199
|
+
WHERE milestone_id = :milestone_id
|
|
200
|
+
AND worker_id = :worker_id
|
|
201
|
+
AND fencing_token = :token
|
|
202
|
+
AND status = 'held'`,
|
|
203
|
+
).run({
|
|
204
|
+
":expires_at": expiresIso,
|
|
205
|
+
":milestone_id": milestoneId,
|
|
206
|
+
":worker_id": workerId,
|
|
207
|
+
":token": fencingToken,
|
|
208
|
+
});
|
|
209
|
+
const changes =
|
|
210
|
+
typeof (result as { changes?: unknown }).changes === "number"
|
|
211
|
+
? (result as { changes: number }).changes
|
|
212
|
+
: 0;
|
|
213
|
+
return changes === 1;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Voluntarily release the lease (e.g. clean shutdown). Future claims may
|
|
218
|
+
* proceed without waiting for TTL expiry.
|
|
219
|
+
*/
|
|
220
|
+
export function releaseMilestoneLease(
|
|
221
|
+
workerId: string,
|
|
222
|
+
milestoneId: string,
|
|
223
|
+
fencingToken: number,
|
|
224
|
+
): boolean {
|
|
225
|
+
if (!isDbAvailable()) return false;
|
|
226
|
+
const db = _getAdapter()!;
|
|
227
|
+
return transaction(() => {
|
|
228
|
+
const result = db.prepare(
|
|
229
|
+
`UPDATE milestone_leases
|
|
230
|
+
SET status = 'released'
|
|
231
|
+
WHERE milestone_id = :milestone_id
|
|
232
|
+
AND worker_id = :worker_id
|
|
233
|
+
AND fencing_token = :token
|
|
234
|
+
AND status = 'held'`,
|
|
235
|
+
).run({
|
|
236
|
+
":milestone_id": milestoneId,
|
|
237
|
+
":worker_id": workerId,
|
|
238
|
+
":token": fencingToken,
|
|
239
|
+
});
|
|
240
|
+
const changes =
|
|
241
|
+
typeof (result as { changes?: unknown }).changes === "number"
|
|
242
|
+
? (result as { changes: number }).changes
|
|
243
|
+
: 0;
|
|
244
|
+
if (changes === 1) {
|
|
245
|
+
insertAuditEvent({
|
|
246
|
+
eventId: randomUUID(),
|
|
247
|
+
traceId: workerId,
|
|
248
|
+
category: "orchestration",
|
|
249
|
+
type: "lease-released",
|
|
250
|
+
ts: new Date().toISOString(),
|
|
251
|
+
payload: { workerId, milestoneId, token: fencingToken },
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
return changes === 1;
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Read current lease row for diagnostics. Returns null if no row exists.
|
|
260
|
+
*/
|
|
261
|
+
export function getMilestoneLease(milestoneId: string): MilestoneLeaseRow | null {
|
|
262
|
+
if (!isDbAvailable()) return null;
|
|
263
|
+
const db = _getAdapter()!;
|
|
264
|
+
const row = db.prepare(
|
|
265
|
+
`SELECT milestone_id, worker_id, fencing_token, acquired_at, expires_at, status
|
|
266
|
+
FROM milestone_leases WHERE milestone_id = :milestone_id`,
|
|
267
|
+
).get({ ":milestone_id": milestoneId }) as MilestoneLeaseRow | undefined;
|
|
268
|
+
return row ?? null;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/** TTL exported so callers (e.g. tests / janitors) can compute expirations. */
|
|
272
|
+
export function milestoneLeaseTtlSeconds(): number {
|
|
273
|
+
return LEASE_TTL_SECONDS;
|
|
274
|
+
}
|