astrocode-workflow 0.3.3 → 0.3.5-1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/astro/workflow-runner.d.ts +15 -0
- package/dist/src/astro/workflow-runner.js +25 -0
- package/dist/src/hooks/inject-provider.d.ts +5 -0
- package/dist/src/hooks/inject-provider.js +10 -0
- package/dist/src/index.js +11 -6
- package/dist/src/state/repo-lock.d.ts +65 -1
- package/dist/src/state/repo-lock.js +568 -17
- package/dist/src/state/workflow-repo-lock.d.ts +16 -0
- package/dist/src/state/workflow-repo-lock.js +50 -0
- package/dist/src/tools/index.js +3 -0
- package/dist/src/tools/lock.d.ts +4 -0
- package/dist/src/tools/lock.js +78 -0
- package/dist/src/tools/repair.js +40 -6
- package/dist/src/tools/status.js +1 -1
- package/dist/src/tools/workflow.js +182 -179
- package/dist/src/workflow/repair.js +2 -2
- package/package.json +1 -1
- package/src/hooks/inject-provider.ts +16 -0
- package/src/index.ts +13 -7
- package/src/state/repo-lock.ts +170 -38
- package/src/state/workflow-repo-lock.ts +1 -1
- package/src/tools/index.ts +3 -0
- package/src/tools/lock.ts +75 -0
- package/src/tools/repair.ts +43 -6
- package/src/workflow/repair.ts +2 -2
|
@@ -1,29 +1,580 @@
|
|
|
1
1
|
// src/state/repo-lock.ts
|
|
2
2
|
import fs from "node:fs";
|
|
3
3
|
import path from "node:path";
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
import crypto from "node:crypto";
|
|
5
|
+
const LOCK_VERSION = 2;
|
|
6
|
+
// Process-stable identifier for this Node process instance.
|
|
7
|
+
const PROCESS_INSTANCE_ID = crypto.randomUUID();
|
|
8
|
+
// Hard guardrails against garbage/corruption.
|
|
9
|
+
const MAX_LOCK_BYTES = 64 * 1024; // 64KB; lock file should be tiny.
|
|
10
|
+
// How many times we’ll attempt "atomic-ish replace" before giving up.
|
|
11
|
+
const ATOMIC_REPLACE_RETRIES = 3;
|
|
12
|
+
function nowISO() {
|
|
13
|
+
return new Date().toISOString();
|
|
14
|
+
}
|
|
15
|
+
function sleep(ms) {
|
|
16
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* PID existence check:
|
|
20
|
+
* - EPERM => process exists but we can't signal it (treat as alive)
|
|
21
|
+
* - ESRCH => process does not exist (dead)
|
|
22
|
+
*/
|
|
23
|
+
function isPidAlive(pid) {
|
|
7
24
|
try {
|
|
8
|
-
|
|
25
|
+
process.kill(pid, 0);
|
|
26
|
+
return true;
|
|
9
27
|
}
|
|
10
|
-
catch (
|
|
11
|
-
const
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
28
|
+
catch (err) {
|
|
29
|
+
const code = err?.code;
|
|
30
|
+
if (code === "EPERM")
|
|
31
|
+
return true;
|
|
32
|
+
if (code === "ESRCH")
|
|
33
|
+
return false;
|
|
34
|
+
// Unknown: conservative = don't evict.
|
|
35
|
+
return true;
|
|
15
36
|
}
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
37
|
+
}
|
|
38
|
+
function parseISOToMs(iso) {
|
|
39
|
+
const t = Date.parse(iso);
|
|
40
|
+
if (Number.isNaN(t))
|
|
41
|
+
return null;
|
|
42
|
+
return t;
|
|
43
|
+
}
|
|
44
|
+
function isStaleByAge(existing, staleMs) {
|
|
45
|
+
const updatedMs = parseISOToMs(existing.updated_at);
|
|
46
|
+
if (updatedMs === null)
|
|
47
|
+
return true;
|
|
48
|
+
return Date.now() - updatedMs > staleMs;
|
|
49
|
+
}
|
|
50
|
+
function safeUnlink(p) {
|
|
51
|
+
try {
|
|
52
|
+
fs.unlinkSync(p);
|
|
53
|
+
}
|
|
54
|
+
catch {
|
|
55
|
+
// ignore
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Reads & validates lock file defensively.
|
|
60
|
+
* Supports both v2 JSON format and legacy PID-only format for compatibility.
|
|
61
|
+
* Returns null on any parse/validation failure.
|
|
62
|
+
*/
|
|
63
|
+
function readLock(lockPath) {
|
|
64
|
+
try {
|
|
65
|
+
const st = fs.statSync(lockPath);
|
|
66
|
+
if (!st.isFile())
|
|
67
|
+
return null;
|
|
68
|
+
if (st.size <= 0 || st.size > MAX_LOCK_BYTES)
|
|
69
|
+
return null;
|
|
70
|
+
const raw = fs.readFileSync(lockPath, "utf8").trim();
|
|
71
|
+
// Try v2 JSON first
|
|
72
|
+
try {
|
|
73
|
+
const parsed = JSON.parse(raw);
|
|
74
|
+
if (parsed && typeof parsed === "object" && parsed.v === LOCK_VERSION) {
|
|
75
|
+
if (typeof parsed.pid !== "number")
|
|
76
|
+
return null;
|
|
77
|
+
if (typeof parsed.created_at !== "string")
|
|
78
|
+
return null;
|
|
79
|
+
if (typeof parsed.updated_at !== "string")
|
|
80
|
+
return null;
|
|
81
|
+
if (typeof parsed.repo_root !== "string")
|
|
82
|
+
return null;
|
|
83
|
+
if (typeof parsed.instance_id !== "string")
|
|
84
|
+
return null;
|
|
85
|
+
if (typeof parsed.lease_id !== "string")
|
|
86
|
+
return null;
|
|
87
|
+
if (parsed.session_id !== undefined && typeof parsed.session_id !== "string")
|
|
88
|
+
return null;
|
|
89
|
+
if (parsed.owner !== undefined && typeof parsed.owner !== "string")
|
|
90
|
+
return null;
|
|
91
|
+
return parsed;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
catch {
|
|
95
|
+
// Not JSON, try legacy format
|
|
96
|
+
}
|
|
97
|
+
// Legacy format: just PID as number string
|
|
98
|
+
const legacyPid = parseInt(raw, 10);
|
|
99
|
+
if (Number.isNaN(legacyPid) || legacyPid <= 0)
|
|
100
|
+
return null;
|
|
101
|
+
// Convert legacy to v2 format
|
|
102
|
+
const now = nowISO();
|
|
103
|
+
const leaseId = crypto.randomUUID();
|
|
104
|
+
return {
|
|
105
|
+
v: LOCK_VERSION,
|
|
106
|
+
pid: legacyPid,
|
|
107
|
+
created_at: now, // Approximate
|
|
108
|
+
updated_at: now,
|
|
109
|
+
repo_root: "", // Unknown, will be filled by caller
|
|
110
|
+
instance_id: PROCESS_INSTANCE_ID, // Assume same instance
|
|
111
|
+
session_id: undefined,
|
|
112
|
+
lease_id: leaseId,
|
|
113
|
+
owner: "legacy-lock",
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
catch {
|
|
117
|
+
return null;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Best-effort directory fsync:
|
|
122
|
+
* Helps durability on crash for some filesystems (mostly POSIX).
|
|
123
|
+
* On platforms where opening a directory fails, we ignore.
|
|
124
|
+
*/
|
|
125
|
+
function fsyncDirBestEffort(dirPath) {
|
|
126
|
+
try {
|
|
127
|
+
const fd = fs.openSync(dirPath, "r");
|
|
128
|
+
try {
|
|
129
|
+
fs.fsyncSync(fd);
|
|
130
|
+
}
|
|
131
|
+
finally {
|
|
132
|
+
fs.closeSync(fd);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
catch {
|
|
136
|
+
// ignore (not portable)
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* "Atomic-ish" replace:
|
|
141
|
+
* - Write temp file
|
|
142
|
+
* - Try rename over target (POSIX generally atomic)
|
|
143
|
+
* - Windows can fail if target exists/locked; fallback to unlink+rename (not atomic, but best-effort)
|
|
144
|
+
* - Best-effort directory fsync after rename
|
|
145
|
+
*/
|
|
146
|
+
function writeLockAtomicish(lockPath, lock) {
|
|
147
|
+
const dir = path.dirname(lockPath);
|
|
148
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
149
|
+
const tmp = `${lockPath}.${process.pid}.${Date.now()}.${crypto.randomUUID()}.tmp`;
|
|
150
|
+
const body = JSON.stringify(lock); // compact JSON to reduce IO
|
|
151
|
+
fs.writeFileSync(tmp, body, "utf8");
|
|
152
|
+
let lastErr = null;
|
|
153
|
+
for (let i = 0; i < ATOMIC_REPLACE_RETRIES; i++) {
|
|
154
|
+
try {
|
|
155
|
+
fs.renameSync(tmp, lockPath);
|
|
156
|
+
fsyncDirBestEffort(dir);
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
catch (err) {
|
|
160
|
+
lastErr = err;
|
|
161
|
+
const code = err?.code;
|
|
162
|
+
// Common Windows-ish cases where rename over existing fails.
|
|
163
|
+
if (code === "EEXIST" || code === "EPERM" || code === "ENOTEMPTY") {
|
|
164
|
+
safeUnlink(lockPath);
|
|
165
|
+
continue;
|
|
166
|
+
}
|
|
167
|
+
// If tmp vanished somehow, stop.
|
|
168
|
+
if (code === "ENOENT")
|
|
169
|
+
break;
|
|
170
|
+
continue;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
safeUnlink(tmp);
|
|
174
|
+
if (lastErr)
|
|
175
|
+
throw lastErr;
|
|
176
|
+
throw new Error(`Failed to replace lock file: ${lockPath}`);
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Atomic "create if not exists" using exclusive open.
|
|
180
|
+
*/
|
|
181
|
+
function tryCreateExclusiveFile(filePath, contentsUtf8) {
|
|
182
|
+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
183
|
+
try {
|
|
184
|
+
const fd = fs.openSync(filePath, "wx");
|
|
185
|
+
try {
|
|
186
|
+
fs.writeFileSync(fd, contentsUtf8, "utf8");
|
|
187
|
+
fs.fsyncSync(fd);
|
|
188
|
+
}
|
|
189
|
+
finally {
|
|
190
|
+
fs.closeSync(fd);
|
|
191
|
+
}
|
|
192
|
+
fsyncDirBestEffort(path.dirname(filePath));
|
|
193
|
+
return true;
|
|
194
|
+
}
|
|
195
|
+
catch (err) {
|
|
196
|
+
if (err?.code === "EEXIST")
|
|
197
|
+
return false;
|
|
198
|
+
throw err;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
function tryCreateRepoLockExclusive(lockPath, lock) {
|
|
202
|
+
return tryCreateExclusiveFile(lockPath, JSON.stringify(lock));
|
|
203
|
+
}
|
|
204
|
+
const ACTIVE_LOCKS = new Map();
|
|
205
|
+
function cacheKey(lockPath, sessionId) {
|
|
206
|
+
return `${lockPath}::${sessionId ?? ""}`;
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Heartbeat loop:
|
|
210
|
+
* - setTimeout (not setInterval) to avoid backlog drift under load
|
|
211
|
+
* - Minimizes writes by enforcing minWriteMs
|
|
212
|
+
* - ABA-safe: only refreshes if lock matches our lease_id and process identity
|
|
213
|
+
* - Avoids unnecessary writes if lock already has a recent updated_at
|
|
214
|
+
*/
|
|
215
|
+
function startHeartbeat(opts) {
|
|
216
|
+
let stopped = false;
|
|
217
|
+
let lastWriteAt = 0;
|
|
218
|
+
let timer = null;
|
|
219
|
+
const tick = () => {
|
|
220
|
+
if (stopped)
|
|
221
|
+
return;
|
|
222
|
+
const now = Date.now();
|
|
223
|
+
const shouldAttempt = now - lastWriteAt >= opts.minWriteMs;
|
|
224
|
+
if (shouldAttempt) {
|
|
19
225
|
try {
|
|
20
|
-
|
|
226
|
+
const existing = readLock(opts.lockPath);
|
|
227
|
+
if (existing &&
|
|
228
|
+
existing.lease_id === opts.leaseId &&
|
|
229
|
+
existing.pid === process.pid &&
|
|
230
|
+
existing.instance_id === PROCESS_INSTANCE_ID) {
|
|
231
|
+
const updatedMs = parseISOToMs(existing.updated_at);
|
|
232
|
+
const isFresh = updatedMs !== null && now - updatedMs < opts.minWriteMs;
|
|
233
|
+
if (!isFresh) {
|
|
234
|
+
writeLockAtomicish(opts.lockPath, {
|
|
235
|
+
...existing,
|
|
236
|
+
updated_at: nowISO(),
|
|
237
|
+
repo_root: opts.repoRoot,
|
|
238
|
+
session_id: opts.sessionId ?? existing.session_id,
|
|
239
|
+
owner: opts.owner ?? existing.owner,
|
|
240
|
+
});
|
|
241
|
+
lastWriteAt = now;
|
|
242
|
+
}
|
|
243
|
+
else {
|
|
244
|
+
lastWriteAt = now;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
21
247
|
}
|
|
22
|
-
catch {
|
|
248
|
+
catch (err) {
|
|
249
|
+
// Heartbeat write failed - don't propagate, just reschedule
|
|
250
|
+
// Lock will become stale if heartbeat continues failing
|
|
251
|
+
// eslint-disable-next-line no-console
|
|
252
|
+
console.warn("[Astrocode] Heartbeat write error:", err);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
timer = setTimeout(tick, opts.heartbeatMs);
|
|
256
|
+
timer.unref?.();
|
|
257
|
+
};
|
|
258
|
+
tick();
|
|
259
|
+
return () => {
|
|
260
|
+
stopped = true;
|
|
261
|
+
if (timer)
|
|
262
|
+
clearTimeout(timer);
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Shutdown cleanup:
|
|
267
|
+
* Best-effort release on normal termination signals.
|
|
268
|
+
*/
|
|
269
|
+
let EXIT_HOOK_INSTALLED = false;
|
|
270
|
+
function installExitHookOnce() {
|
|
271
|
+
if (EXIT_HOOK_INSTALLED)
|
|
272
|
+
return;
|
|
273
|
+
EXIT_HOOK_INSTALLED = true;
|
|
274
|
+
const cleanup = () => {
|
|
275
|
+
for (const [key, h] of ACTIVE_LOCKS.entries()) {
|
|
23
276
|
try {
|
|
24
|
-
|
|
277
|
+
ACTIVE_LOCKS.delete(key);
|
|
278
|
+
h.heartbeatStop();
|
|
279
|
+
h.releaseOnce();
|
|
280
|
+
}
|
|
281
|
+
catch {
|
|
282
|
+
// ignore
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
};
|
|
286
|
+
process.once("exit", cleanup);
|
|
287
|
+
process.once("SIGINT", () => {
|
|
288
|
+
cleanup();
|
|
289
|
+
process.exit(130);
|
|
290
|
+
});
|
|
291
|
+
process.once("SIGTERM", () => {
|
|
292
|
+
cleanup();
|
|
293
|
+
process.exit(143);
|
|
294
|
+
});
|
|
295
|
+
process.once("uncaughtException", (err) => {
|
|
296
|
+
// eslint-disable-next-line no-console
|
|
297
|
+
console.error("[Astrocode] Uncaught Exception, cleaning up locks:", err);
|
|
298
|
+
cleanup();
|
|
299
|
+
process.exit(1);
|
|
300
|
+
});
|
|
301
|
+
process.once("unhandledRejection", (reason) => {
|
|
302
|
+
// eslint-disable-next-line no-console
|
|
303
|
+
console.error("[Astrocode] Unhandled Rejection, cleaning up locks:", reason);
|
|
304
|
+
cleanup();
|
|
305
|
+
process.exit(1);
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Acquire a repo-scoped lock with:
|
|
310
|
+
* - ✅ process-local caching + refcount (efficient repeated tool calls)
|
|
311
|
+
* - ✅ heartbeat lease + stale recovery
|
|
312
|
+
* - ✅ atomic create (`wx`) + portable replace fallback
|
|
313
|
+
* - ✅ dead PID eviction + stale eviction
|
|
314
|
+
* - ✅ no live takeover (even same session) to avoid concurrency stomps
|
|
315
|
+
* - ✅ ABA-safe release via lease_id fencing
|
|
316
|
+
* - ✅ exponential backoff + jitter to reduce FS churn
|
|
317
|
+
*/
|
|
318
|
+
export async function acquireRepoLock(opts) {
|
|
319
|
+
installExitHookOnce();
|
|
320
|
+
const { lockPath, repoRoot, sessionId, owner } = opts;
|
|
321
|
+
const retryMs = opts.retryMs ?? 8000;
|
|
322
|
+
const pollBaseMs = opts.pollMs ?? 20;
|
|
323
|
+
const pollMaxMs = opts.pollMaxMs ?? 250;
|
|
324
|
+
const heartbeatMs = opts.heartbeatMs ?? 200;
|
|
325
|
+
const minWriteMs = opts.minWriteMs ?? 800;
|
|
326
|
+
// Ensure stale is comfortably above minWriteMs to prevent false-stale under load.
|
|
327
|
+
const staleMs = Math.max(opts.staleMs ?? 2 * 60 * 1000, minWriteMs * 8);
|
|
328
|
+
// ✅ Fast path: reuse cached handle in the same process/session.
|
|
329
|
+
const key = cacheKey(lockPath, sessionId);
|
|
330
|
+
const cached = ACTIVE_LOCKS.get(key);
|
|
331
|
+
if (cached) {
|
|
332
|
+
cached.refCount += 1;
|
|
333
|
+
return {
|
|
334
|
+
release: () => {
|
|
335
|
+
cached.refCount -= 1;
|
|
336
|
+
if (cached.refCount <= 0) {
|
|
337
|
+
ACTIVE_LOCKS.delete(key);
|
|
338
|
+
cached.heartbeatStop();
|
|
339
|
+
cached.releaseOnce();
|
|
340
|
+
}
|
|
341
|
+
},
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
const myPid = process.pid;
|
|
345
|
+
const startedAt = Date.now();
|
|
346
|
+
let pollMs = pollBaseMs;
|
|
347
|
+
while (true) {
|
|
348
|
+
const existing = readLock(lockPath);
|
|
349
|
+
// No lock (or unreadable/invalid) -> try create.
|
|
350
|
+
if (!existing) {
|
|
351
|
+
const now = nowISO();
|
|
352
|
+
const leaseId = crypto.randomUUID();
|
|
353
|
+
const candidate = {
|
|
354
|
+
v: LOCK_VERSION,
|
|
355
|
+
pid: myPid,
|
|
356
|
+
created_at: now,
|
|
357
|
+
updated_at: now,
|
|
358
|
+
repo_root: repoRoot,
|
|
359
|
+
instance_id: PROCESS_INSTANCE_ID,
|
|
360
|
+
session_id: sessionId,
|
|
361
|
+
lease_id: leaseId,
|
|
362
|
+
owner,
|
|
363
|
+
};
|
|
364
|
+
const created = tryCreateRepoLockExclusive(lockPath, candidate);
|
|
365
|
+
if (created) {
|
|
366
|
+
const heartbeatStop = startHeartbeat({
|
|
367
|
+
lockPath,
|
|
368
|
+
repoRoot,
|
|
369
|
+
sessionId,
|
|
370
|
+
owner,
|
|
371
|
+
leaseId,
|
|
372
|
+
heartbeatMs,
|
|
373
|
+
minWriteMs,
|
|
374
|
+
});
|
|
375
|
+
const releaseOnce = () => {
|
|
376
|
+
const cur = readLock(lockPath);
|
|
377
|
+
if (!cur)
|
|
378
|
+
return;
|
|
379
|
+
// ABA-safe
|
|
380
|
+
if (cur.lease_id !== leaseId)
|
|
381
|
+
return;
|
|
382
|
+
// Strict identity: only exact process instance can delete.
|
|
383
|
+
if (cur.pid !== myPid)
|
|
384
|
+
return;
|
|
385
|
+
if (cur.instance_id !== PROCESS_INSTANCE_ID)
|
|
386
|
+
return;
|
|
387
|
+
safeUnlink(lockPath);
|
|
388
|
+
fsyncDirBestEffort(path.dirname(lockPath));
|
|
389
|
+
};
|
|
390
|
+
const handle = {
|
|
391
|
+
key,
|
|
392
|
+
lockPath,
|
|
393
|
+
sessionId,
|
|
394
|
+
leaseId,
|
|
395
|
+
refCount: 1,
|
|
396
|
+
heartbeatStop,
|
|
397
|
+
releaseOnce,
|
|
398
|
+
};
|
|
399
|
+
ACTIVE_LOCKS.set(key, handle);
|
|
400
|
+
return {
|
|
401
|
+
release: () => {
|
|
402
|
+
const h = ACTIVE_LOCKS.get(key);
|
|
403
|
+
if (!h)
|
|
404
|
+
return;
|
|
405
|
+
h.refCount -= 1;
|
|
406
|
+
if (h.refCount <= 0) {
|
|
407
|
+
ACTIVE_LOCKS.delete(key);
|
|
408
|
+
h.heartbeatStop();
|
|
409
|
+
h.releaseOnce();
|
|
410
|
+
}
|
|
411
|
+
},
|
|
412
|
+
};
|
|
25
413
|
}
|
|
26
|
-
|
|
27
|
-
|
|
414
|
+
// Race lost; reset backoff and loop.
|
|
415
|
+
pollMs = pollBaseMs;
|
|
416
|
+
continue;
|
|
417
|
+
}
|
|
418
|
+
// Re-entrant by SAME PROCESS IDENTITY (pid+instance), or legacy lock with same PID.
|
|
419
|
+
if (existing.pid === myPid && (existing.instance_id === PROCESS_INSTANCE_ID || existing.owner === "legacy-lock")) {
|
|
420
|
+
const leaseId = crypto.randomUUID();
|
|
421
|
+
writeLockAtomicish(lockPath, {
|
|
422
|
+
...existing,
|
|
423
|
+
v: LOCK_VERSION,
|
|
424
|
+
updated_at: nowISO(),
|
|
425
|
+
repo_root: repoRoot,
|
|
426
|
+
instance_id: PROCESS_INSTANCE_ID, // Upgrade legacy
|
|
427
|
+
session_id: sessionId ?? existing.session_id,
|
|
428
|
+
owner: owner ?? existing.owner,
|
|
429
|
+
lease_id: leaseId,
|
|
430
|
+
});
|
|
431
|
+
const heartbeatStop = startHeartbeat({
|
|
432
|
+
lockPath,
|
|
433
|
+
repoRoot,
|
|
434
|
+
sessionId: sessionId ?? existing.session_id,
|
|
435
|
+
owner: owner ?? existing.owner,
|
|
436
|
+
leaseId,
|
|
437
|
+
heartbeatMs,
|
|
438
|
+
minWriteMs,
|
|
439
|
+
});
|
|
440
|
+
const releaseOnce = () => {
|
|
441
|
+
const cur = readLock(lockPath);
|
|
442
|
+
if (!cur)
|
|
443
|
+
return;
|
|
444
|
+
if (cur.lease_id !== leaseId)
|
|
445
|
+
return;
|
|
446
|
+
if (cur.pid !== myPid)
|
|
447
|
+
return;
|
|
448
|
+
if (cur.instance_id !== PROCESS_INSTANCE_ID)
|
|
449
|
+
return;
|
|
450
|
+
safeUnlink(lockPath);
|
|
451
|
+
fsyncDirBestEffort(path.dirname(lockPath));
|
|
452
|
+
};
|
|
453
|
+
const handle = {
|
|
454
|
+
key,
|
|
455
|
+
lockPath,
|
|
456
|
+
sessionId,
|
|
457
|
+
leaseId,
|
|
458
|
+
refCount: 1,
|
|
459
|
+
heartbeatStop,
|
|
460
|
+
releaseOnce,
|
|
461
|
+
};
|
|
462
|
+
ACTIVE_LOCKS.set(key, handle);
|
|
463
|
+
return {
|
|
464
|
+
release: () => {
|
|
465
|
+
const h = ACTIVE_LOCKS.get(key);
|
|
466
|
+
if (!h)
|
|
467
|
+
return;
|
|
468
|
+
h.refCount -= 1;
|
|
469
|
+
if (h.refCount <= 0) {
|
|
470
|
+
ACTIVE_LOCKS.delete(key);
|
|
471
|
+
h.heartbeatStop();
|
|
472
|
+
h.releaseOnce();
|
|
473
|
+
}
|
|
474
|
+
},
|
|
475
|
+
};
|
|
476
|
+
}
|
|
477
|
+
// 🚫 No live takeover (even same session).
|
|
478
|
+
// We only evict dead/stale locks.
|
|
479
|
+
const pidAlive = isPidAlive(existing.pid);
|
|
480
|
+
const staleByAge = isStaleByAge(existing, staleMs);
|
|
481
|
+
if (!pidAlive || staleByAge) {
|
|
482
|
+
safeUnlink(lockPath);
|
|
483
|
+
fsyncDirBestEffort(path.dirname(lockPath));
|
|
484
|
+
pollMs = pollBaseMs;
|
|
485
|
+
continue;
|
|
486
|
+
}
|
|
487
|
+
// Alive and not us -> bounded wait with exponential backoff + jitter.
|
|
488
|
+
if (Date.now() - startedAt > retryMs) {
|
|
489
|
+
const ownerBits = [
|
|
490
|
+
`pid=${existing.pid}`,
|
|
491
|
+
existing.session_id ? `session=${existing.session_id}` : null,
|
|
492
|
+
existing.owner ? `owner=${existing.owner}` : null,
|
|
493
|
+
`updated_at=${existing.updated_at}`,
|
|
494
|
+
sessionId && existing.session_id === sessionId ? `(same-session waiting)` : null,
|
|
495
|
+
]
|
|
496
|
+
.filter(Boolean)
|
|
497
|
+
.join(" ");
|
|
498
|
+
throw new Error(`Astrocode lock is already held (${lockPath}). ${ownerBits}. ` +
|
|
499
|
+
`Close other opencode processes or wait.`);
|
|
500
|
+
}
|
|
501
|
+
const jitter = Math.floor(Math.random() * Math.min(12, pollMs));
|
|
502
|
+
await sleep(pollMs + jitter);
|
|
503
|
+
pollMs = Math.min(pollMaxMs, Math.floor(pollMs * 1.35));
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
/**
|
|
507
|
+
* Helper wrapper: always releases lock.
|
|
508
|
+
*/
|
|
509
|
+
export async function withRepoLock(opts) {
|
|
510
|
+
const handle = await acquireRepoLock({
|
|
511
|
+
lockPath: opts.lockPath,
|
|
512
|
+
repoRoot: opts.repoRoot,
|
|
513
|
+
sessionId: opts.sessionId,
|
|
514
|
+
owner: opts.owner,
|
|
515
|
+
});
|
|
516
|
+
try {
|
|
517
|
+
return await opts.fn();
|
|
518
|
+
}
|
|
519
|
+
finally {
|
|
520
|
+
handle.release();
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
/**
|
|
524
|
+
* Get lock file status and diagnostics.
|
|
525
|
+
* Returns detailed information about the current lock state.
|
|
526
|
+
*/
|
|
527
|
+
export function getLockStatus(lockPath, staleMs = 30_000) {
|
|
528
|
+
const existing = readLock(lockPath);
|
|
529
|
+
if (!existing) {
|
|
530
|
+
return {
|
|
531
|
+
exists: false,
|
|
532
|
+
path: lockPath,
|
|
533
|
+
};
|
|
534
|
+
}
|
|
535
|
+
const updatedMs = parseISOToMs(existing.updated_at);
|
|
536
|
+
const ageMs = updatedMs !== null ? Date.now() - updatedMs : undefined;
|
|
537
|
+
const pidAlive = isPidAlive(existing.pid);
|
|
538
|
+
const isStale = isStaleByAge(existing, staleMs);
|
|
539
|
+
return {
|
|
540
|
+
exists: true,
|
|
541
|
+
path: lockPath,
|
|
542
|
+
pid: existing.pid,
|
|
543
|
+
pidAlive,
|
|
544
|
+
instanceId: existing.instance_id,
|
|
545
|
+
sessionId: existing.session_id,
|
|
546
|
+
owner: existing.owner,
|
|
547
|
+
leaseId: existing.lease_id,
|
|
548
|
+
createdAt: existing.created_at,
|
|
549
|
+
updatedAt: existing.updated_at,
|
|
550
|
+
ageMs,
|
|
551
|
+
isStale,
|
|
552
|
+
repoRoot: existing.repo_root,
|
|
553
|
+
version: existing.v,
|
|
28
554
|
};
|
|
29
555
|
}
|
|
556
|
+
/**
|
|
557
|
+
* Attempt to remove a lock file if it's safe to do so.
|
|
558
|
+
* Only removes locks with dead PIDs or stale timestamps.
|
|
559
|
+
* Returns true if lock was removed, false if lock is still held.
|
|
560
|
+
*/
|
|
561
|
+
export function tryRemoveStaleLock(lockPath, staleMs = 30_000) {
|
|
562
|
+
const existing = readLock(lockPath);
|
|
563
|
+
if (!existing) {
|
|
564
|
+
return { removed: false, reason: "No lock file found" };
|
|
565
|
+
}
|
|
566
|
+
const pidAlive = isPidAlive(existing.pid);
|
|
567
|
+
const isStale = isStaleByAge(existing, staleMs);
|
|
568
|
+
if (!pidAlive) {
|
|
569
|
+
safeUnlink(lockPath);
|
|
570
|
+
fsyncDirBestEffort(path.dirname(lockPath));
|
|
571
|
+
return { removed: true, reason: `Dead PID ${existing.pid}` };
|
|
572
|
+
}
|
|
573
|
+
if (isStale) {
|
|
574
|
+
safeUnlink(lockPath);
|
|
575
|
+
fsyncDirBestEffort(path.dirname(lockPath));
|
|
576
|
+
const ageSeconds = Math.floor((Date.now() - (parseISOToMs(existing.updated_at) ?? 0)) / 1000);
|
|
577
|
+
return { removed: true, reason: `Stale lock (${ageSeconds}s old, threshold ${staleMs / 1000}s)` };
|
|
578
|
+
}
|
|
579
|
+
return { removed: false, reason: `Lock is active (PID ${existing.pid} alive, age ${Math.floor((Date.now() - (parseISOToMs(existing.updated_at) ?? 0)) / 1000)}s)` };
|
|
580
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { acquireRepoLock } from "./repo-lock";
|
|
2
|
+
type RepoLockAcquire = typeof acquireRepoLock;
|
|
3
|
+
/**
|
|
4
|
+
* Acquire ONCE per workflow/session in this process.
|
|
5
|
+
* Nested calls reuse the same held lock (no reacquire, no churn).
|
|
6
|
+
*/
|
|
7
|
+
export declare function workflowRepoLock<T>(deps: {
|
|
8
|
+
acquireRepoLock: RepoLockAcquire;
|
|
9
|
+
}, opts: {
|
|
10
|
+
lockPath: string;
|
|
11
|
+
repoRoot: string;
|
|
12
|
+
sessionId?: string;
|
|
13
|
+
owner?: string;
|
|
14
|
+
fn: () => Promise<T>;
|
|
15
|
+
}): Promise<T>;
|
|
16
|
+
export {};
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
const HELD_BY_KEY = new Map();
|
|
2
|
+
function key(lockPath, sessionId) {
|
|
3
|
+
return `${lockPath}::${sessionId ?? ""}`;
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* Acquire ONCE per workflow/session in this process.
|
|
7
|
+
* Nested calls reuse the same held lock (no reacquire, no churn).
|
|
8
|
+
*/
|
|
9
|
+
export async function workflowRepoLock(deps, opts) {
|
|
10
|
+
const k = key(opts.lockPath, opts.sessionId);
|
|
11
|
+
const existing = HELD_BY_KEY.get(k);
|
|
12
|
+
if (existing) {
|
|
13
|
+
existing.depth += 1;
|
|
14
|
+
try {
|
|
15
|
+
return await opts.fn();
|
|
16
|
+
}
|
|
17
|
+
finally {
|
|
18
|
+
existing.depth -= 1;
|
|
19
|
+
if (existing.depth <= 0) {
|
|
20
|
+
HELD_BY_KEY.delete(k);
|
|
21
|
+
existing.release();
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
// IMPORTANT: this is tuned for "hold for whole workflow".
|
|
26
|
+
const handle = await deps.acquireRepoLock({
|
|
27
|
+
lockPath: opts.lockPath,
|
|
28
|
+
repoRoot: opts.repoRoot,
|
|
29
|
+
sessionId: opts.sessionId,
|
|
30
|
+
owner: opts.owner,
|
|
31
|
+
retryMs: 30_000,
|
|
32
|
+
staleMs: 30_000, // Reduced from 2 minutes to 30 seconds for faster stale lock recovery
|
|
33
|
+
heartbeatMs: 200,
|
|
34
|
+
minWriteMs: 800,
|
|
35
|
+
pollMs: 20,
|
|
36
|
+
pollMaxMs: 250,
|
|
37
|
+
});
|
|
38
|
+
const held = { release: handle.release, depth: 1 };
|
|
39
|
+
HELD_BY_KEY.set(k, held);
|
|
40
|
+
try {
|
|
41
|
+
return await opts.fn();
|
|
42
|
+
}
|
|
43
|
+
finally {
|
|
44
|
+
held.depth -= 1;
|
|
45
|
+
if (held.depth <= 0) {
|
|
46
|
+
HELD_BY_KEY.delete(k);
|
|
47
|
+
held.release();
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
package/dist/src/tools/index.js
CHANGED
|
@@ -11,6 +11,7 @@ import { createAstroRepairTool } from "./repair";
|
|
|
11
11
|
import { createAstroHealthTool } from "./health";
|
|
12
12
|
import { createAstroResetTool } from "./reset";
|
|
13
13
|
import { createAstroMetricsTool } from "./metrics";
|
|
14
|
+
import { createAstroLockStatusTool } from "./lock";
|
|
14
15
|
export function createAstroTools(opts) {
|
|
15
16
|
const { ctx, config, agents, runtime } = opts;
|
|
16
17
|
const { db } = runtime;
|
|
@@ -22,6 +23,7 @@ export function createAstroTools(opts) {
|
|
|
22
23
|
tools.astro_health = createAstroHealthTool({ ctx, config, db });
|
|
23
24
|
tools.astro_reset = createAstroResetTool({ ctx, config, db });
|
|
24
25
|
tools.astro_metrics = createAstroMetricsTool({ ctx, config });
|
|
26
|
+
tools.astro_lock_status = createAstroLockStatusTool({ ctx });
|
|
25
27
|
// Recovery tool - available even in limited mode to allow DB initialization
|
|
26
28
|
tools.astro_init = createAstroInitTool({ ctx, config, runtime });
|
|
27
29
|
// Database-dependent tools
|
|
@@ -83,6 +85,7 @@ export function createAstroTools(opts) {
|
|
|
83
85
|
["_astro_health", "astro_health"],
|
|
84
86
|
["_astro_reset", "astro_reset"],
|
|
85
87
|
["_astro_metrics", "astro_metrics"],
|
|
88
|
+
["_astro_lock_status", "astro_lock_status"],
|
|
86
89
|
];
|
|
87
90
|
// Only add aliases for tools that exist
|
|
88
91
|
for (const [alias, target] of aliases) {
|