astrocode-workflow 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "astrocode-workflow",
3
- "version": "0.3.2",
3
+ "version": "0.3.4",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -0,0 +1,36 @@
1
+ // src/astro/workflow-runner.ts
2
+ import { acquireRepoLock } from "../state/repo-lock";
3
+ import { workflowRepoLock } from "../state/workflow-repo-lock";
4
+
5
+ /**
6
+ * This is the only place you should hold the repo lock.
7
+ * Everything that mutates the repo (tool calls, steps) runs inside this scope.
8
+ *
9
+ * Replace the internals with your actual astro/opencode driver loop.
10
+ */
11
+ export async function runAstroWorkflow(opts: {
12
+ lockPath: string;
13
+ repoRoot: string;
14
+ sessionId: string;
15
+ owner?: string;
16
+
17
+ // Hook in your existing workflow engine
18
+ proceedOneStep: () => Promise<{ done: boolean }>;
19
+ }): Promise<void> {
20
+ await workflowRepoLock(
21
+ { acquireRepoLock },
22
+ {
23
+ lockPath: opts.lockPath,
24
+ repoRoot: opts.repoRoot,
25
+ sessionId: opts.sessionId,
26
+ owner: opts.owner,
27
+ fn: async () => {
28
+ // ✅ Lock is held ONCE for the entire run. Tool calls can "rattle through".
29
+ while (true) {
30
+ const { done } = await opts.proceedOneStep();
31
+ if (done) return;
32
+ }
33
+ },
34
+ }
35
+ );
36
+ }
@@ -1,13 +1,37 @@
1
1
  // src/state/repo-lock.ts
2
2
  import fs from "node:fs";
3
3
  import path from "node:path";
4
+ import crypto from "node:crypto";
5
+
6
+ const LOCK_VERSION = 2;
7
+
8
+ // Process-stable identifier for this Node process instance.
9
+ const PROCESS_INSTANCE_ID = crypto.randomUUID();
10
+
11
+ // Hard guardrails against garbage/corruption.
12
+ const MAX_LOCK_BYTES = 64 * 1024; // 64KB; lock file should be tiny.
13
+
14
+ // How many times we’ll attempt "atomic-ish replace" before giving up.
15
+ const ATOMIC_REPLACE_RETRIES = 3;
4
16
 
5
17
  type LockFile = {
18
+ v: number;
19
+
6
20
  pid: number;
7
21
  created_at: string;
8
22
  updated_at: string;
9
23
  repo_root: string;
24
+
25
+ // Identifies the running process instance (process-stable).
26
+ instance_id: string;
27
+
28
+ // Logical session owner (propagated by opencode).
10
29
  session_id?: string;
30
+
31
+ // Fencing token: changes every successful acquire.
32
+ // Prevents ABA release deleting someone else’s lock.
33
+ lease_id: string;
34
+
11
35
  owner?: string; // optional human-readable owner
12
36
  };
13
37
 
@@ -19,117 +43,534 @@ function sleep(ms: number) {
19
43
  return new Promise((r) => setTimeout(r, ms));
20
44
  }
21
45
 
46
+ /**
47
+ * PID existence check:
48
+ * - EPERM => process exists but we can't signal it (treat as alive)
49
+ * - ESRCH => process does not exist (dead)
50
+ */
22
51
  function isPidAlive(pid: number): boolean {
23
52
  try {
24
- // Signal 0 checks existence without killing.
25
53
  (process as any).kill(pid, 0);
26
54
  return true;
55
+ } catch (err: any) {
56
+ const code = err?.code;
57
+ if (code === "EPERM") return true;
58
+ if (code === "ESRCH") return false;
59
+ // Unknown: conservative = don't evict.
60
+ return true;
61
+ }
62
+ }
63
+
64
+ function parseISOToMs(iso: string): number | null {
65
+ const t = Date.parse(iso);
66
+ if (Number.isNaN(t)) return null;
67
+ return t;
68
+ }
69
+
70
+ function isStaleByAge(existing: LockFile, staleMs: number): boolean {
71
+ const updatedMs = parseISOToMs(existing.updated_at);
72
+ if (updatedMs === null) return true;
73
+ return Date.now() - updatedMs > staleMs;
74
+ }
75
+
76
+ function safeUnlink(p: string) {
77
+ try {
78
+ fs.unlinkSync(p);
27
79
  } catch {
28
- return false;
80
+ // ignore
29
81
  }
30
82
  }
31
83
 
84
+ /**
85
+ * Reads & validates lock file defensively.
86
+ * Supports both v2 JSON format and legacy PID-only format for compatibility.
87
+ * Returns null on any parse/validation failure.
88
+ */
32
89
  function readLock(lockPath: string): LockFile | null {
33
90
  try {
34
- const raw = fs.readFileSync(lockPath, "utf8");
35
- const parsed = JSON.parse(raw) as LockFile;
36
- if (!parsed || typeof parsed.pid !== "number") return null;
37
- return parsed;
91
+ const st = fs.statSync(lockPath);
92
+ if (!st.isFile()) return null;
93
+ if (st.size <= 0 || st.size > MAX_LOCK_BYTES) return null;
94
+
95
+ const raw = fs.readFileSync(lockPath, "utf8").trim();
96
+
97
+ // Try v2 JSON first
98
+ try {
99
+ const parsed = JSON.parse(raw) as LockFile;
100
+ if (parsed && typeof parsed === "object" && parsed.v === LOCK_VERSION) {
101
+ if (typeof parsed.pid !== "number") return null;
102
+ if (typeof parsed.created_at !== "string") return null;
103
+ if (typeof parsed.updated_at !== "string") return null;
104
+ if (typeof parsed.repo_root !== "string") return null;
105
+ if (typeof parsed.instance_id !== "string") return null;
106
+ if (typeof parsed.lease_id !== "string") return null;
107
+
108
+ if (parsed.session_id !== undefined && typeof parsed.session_id !== "string") return null;
109
+ if (parsed.owner !== undefined && typeof parsed.owner !== "string") return null;
110
+
111
+ return parsed;
112
+ }
113
+ } catch {
114
+ // Not JSON, try legacy format
115
+ }
116
+
117
+ // Legacy format: just PID as number string
118
+ const legacyPid = parseInt(raw, 10);
119
+ if (Number.isNaN(legacyPid) || legacyPid <= 0) return null;
120
+
121
+ // Convert legacy to v2 format
122
+ const now = nowISO();
123
+ const leaseId = crypto.randomUUID();
124
+ return {
125
+ v: LOCK_VERSION,
126
+ pid: legacyPid,
127
+ created_at: now, // Approximate
128
+ updated_at: now,
129
+ repo_root: "", // Unknown, will be filled by caller
130
+ instance_id: PROCESS_INSTANCE_ID, // Assume same instance
131
+ session_id: undefined,
132
+ lease_id: leaseId,
133
+ owner: "legacy-lock",
134
+ };
38
135
  } catch {
39
136
  return null;
40
137
  }
41
138
  }
42
139
 
43
- function writeLock(lockPath: string, lock: LockFile) {
44
- fs.mkdirSync(path.dirname(lockPath), { recursive: true });
45
- fs.writeFileSync(lockPath, JSON.stringify(lock, null, 2));
140
+ /**
141
+ * Best-effort directory fsync:
142
+ * Helps durability on crash for some filesystems (mostly POSIX).
143
+ * On platforms where opening a directory fails, we ignore.
144
+ */
145
+ function fsyncDirBestEffort(dirPath: string) {
146
+ try {
147
+ const fd = fs.openSync(dirPath, "r");
148
+ try {
149
+ fs.fsyncSync(fd);
150
+ } finally {
151
+ fs.closeSync(fd);
152
+ }
153
+ } catch {
154
+ // ignore (not portable)
155
+ }
156
+ }
157
+
158
+ /**
159
+ * "Atomic-ish" replace:
160
+ * - Write temp file
161
+ * - Try rename over target (POSIX generally atomic)
162
+ * - Windows can fail if target exists/locked; fallback to unlink+rename (not atomic, but best-effort)
163
+ * - Best-effort directory fsync after rename
164
+ */
165
+ function writeLockAtomicish(lockPath: string, lock: LockFile) {
166
+ const dir = path.dirname(lockPath);
167
+ fs.mkdirSync(dir, { recursive: true });
168
+
169
+ const tmp = `${lockPath}.${(process as any).pid}.${Date.now()}.${crypto.randomUUID()}.tmp`;
170
+ const body = JSON.stringify(lock); // compact JSON to reduce IO
171
+
172
+ fs.writeFileSync(tmp, body, "utf8");
173
+
174
+ let lastErr: any = null;
175
+ for (let i = 0; i < ATOMIC_REPLACE_RETRIES; i++) {
176
+ try {
177
+ fs.renameSync(tmp, lockPath);
178
+ fsyncDirBestEffort(dir);
179
+ return;
180
+ } catch (err: any) {
181
+ lastErr = err;
182
+ const code = err?.code;
183
+
184
+ // Common Windows-ish cases where rename over existing fails.
185
+ if (code === "EEXIST" || code === "EPERM" || code === "ENOTEMPTY") {
186
+ safeUnlink(lockPath);
187
+ continue;
188
+ }
189
+
190
+ // If tmp vanished somehow, stop.
191
+ if (code === "ENOENT") break;
192
+
193
+ continue;
194
+ }
195
+ }
196
+
197
+ safeUnlink(tmp);
198
+ if (lastErr) throw lastErr;
199
+ throw new Error(`Failed to replace lock file: ${lockPath}`);
46
200
  }
47
201
 
48
- function safeUnlink(lockPath: string) {
202
+ /**
203
+ * Atomic "create if not exists" using exclusive open.
204
+ */
205
+ function tryCreateExclusiveFile(filePath: string, contentsUtf8: string): boolean {
206
+ fs.mkdirSync(path.dirname(filePath), { recursive: true });
207
+
49
208
  try {
50
- fs.unlinkSync(lockPath);
51
- } catch {
52
- // ignore
209
+ const fd = fs.openSync(filePath, "wx");
210
+ try {
211
+ fs.writeFileSync(fd, contentsUtf8, "utf8");
212
+ fs.fsyncSync(fd);
213
+ } finally {
214
+ fs.closeSync(fd);
215
+ }
216
+ fsyncDirBestEffort(path.dirname(filePath));
217
+ return true;
218
+ } catch (err: any) {
219
+ if (err?.code === "EEXIST") return false;
220
+ throw err;
53
221
  }
54
222
  }
55
223
 
224
+ function tryCreateRepoLockExclusive(lockPath: string, lock: LockFile): boolean {
225
+ return tryCreateExclusiveFile(lockPath, JSON.stringify(lock));
226
+ }
227
+
228
+ /**
229
+ * In-process lock cache:
230
+ * Prevents repeated acquire/release cycles during tool-call storms.
231
+ */
232
+ type CachedHandle = {
233
+ key: string;
234
+ lockPath: string;
235
+ sessionId?: string;
236
+ leaseId: string;
237
+ refCount: number;
238
+ heartbeatStop: () => void;
239
+ releaseOnce: () => void;
240
+ };
241
+
242
+ const ACTIVE_LOCKS = new Map<string, CachedHandle>();
243
+
244
+ function cacheKey(lockPath: string, sessionId?: string): string {
245
+ return `${lockPath}::${sessionId ?? ""}`;
246
+ }
247
+
248
+ /**
249
+ * Heartbeat loop:
250
+ * - setTimeout (not setInterval) to avoid backlog drift under load
251
+ * - Minimizes writes by enforcing minWriteMs
252
+ * - ABA-safe: only refreshes if lock matches our lease_id and process identity
253
+ * - Avoids unnecessary writes if lock already has a recent updated_at
254
+ */
255
+ function startHeartbeat(opts: {
256
+ lockPath: string;
257
+ repoRoot: string;
258
+ sessionId?: string;
259
+ owner?: string;
260
+ leaseId: string;
261
+ heartbeatMs: number;
262
+ minWriteMs: number;
263
+ }): () => void {
264
+ let stopped = false;
265
+ let lastWriteAt = 0;
266
+ let timer: NodeJS.Timeout | null = null;
267
+
268
+ const tick = () => {
269
+ if (stopped) return;
270
+
271
+ const now = Date.now();
272
+ const shouldAttempt = now - lastWriteAt >= opts.minWriteMs;
273
+
274
+ if (shouldAttempt) {
275
+ const existing = readLock(opts.lockPath);
276
+
277
+ if (
278
+ existing &&
279
+ existing.lease_id === opts.leaseId &&
280
+ existing.pid === (process as any).pid &&
281
+ existing.instance_id === PROCESS_INSTANCE_ID
282
+ ) {
283
+ const updatedMs = parseISOToMs(existing.updated_at);
284
+ const isFresh = updatedMs !== null && now - updatedMs < opts.minWriteMs;
285
+
286
+ if (!isFresh) {
287
+ writeLockAtomicish(opts.lockPath, {
288
+ ...existing,
289
+ updated_at: nowISO(),
290
+ repo_root: opts.repoRoot,
291
+ session_id: opts.sessionId ?? existing.session_id,
292
+ owner: opts.owner ?? existing.owner,
293
+ });
294
+ lastWriteAt = now;
295
+ } else {
296
+ lastWriteAt = now;
297
+ }
298
+ }
299
+ }
300
+
301
+ timer = setTimeout(tick, opts.heartbeatMs);
302
+ (timer as any).unref?.();
303
+ };
304
+
305
+ tick();
306
+
307
+ return () => {
308
+ stopped = true;
309
+ if (timer) clearTimeout(timer);
310
+ };
311
+ }
312
+
313
+ /**
314
+ * Shutdown cleanup:
315
+ * Best-effort release on normal termination signals.
316
+ */
317
+ let EXIT_HOOK_INSTALLED = false;
318
+ function installExitHookOnce() {
319
+ if (EXIT_HOOK_INSTALLED) return;
320
+ EXIT_HOOK_INSTALLED = true;
321
+
322
+ const cleanup = () => {
323
+ for (const [key, h] of ACTIVE_LOCKS.entries()) {
324
+ try {
325
+ ACTIVE_LOCKS.delete(key);
326
+ h.heartbeatStop();
327
+ h.releaseOnce();
328
+ } catch {
329
+ // ignore
330
+ }
331
+ }
332
+ };
333
+
334
+ (process as any).once("exit", cleanup);
335
+ (process as any).once("SIGINT", () => {
336
+ cleanup();
337
+ (process as any).exit(130);
338
+ });
339
+ (process as any).once("SIGTERM", () => {
340
+ cleanup();
341
+ (process as any).exit(143);
342
+ });
343
+ }
344
+
56
345
  /**
57
346
  * Acquire a repo-scoped lock with:
58
- * - Re-entrant behavior for SAME PID (your own process can call tools repeatedly)
59
- * - Stale lock eviction for dead PIDs
60
- * - Best-effort contention retry
347
+ * - ✅ process-local caching + refcount (efficient repeated tool calls)
348
+ * - heartbeat lease + stale recovery
349
+ * - atomic create (`wx`) + portable replace fallback
350
+ * - ✅ dead PID eviction + stale eviction
351
+ * - ✅ no live takeover (even same session) to avoid concurrency stomps
352
+ * - ✅ ABA-safe release via lease_id fencing
353
+ * - ✅ exponential backoff + jitter to reduce FS churn
61
354
  */
62
355
  export async function acquireRepoLock(opts: {
63
356
  lockPath: string;
64
357
  repoRoot: string;
65
358
  sessionId?: string;
66
359
  owner?: string;
67
- retryMs?: number; // default 2000
68
- pollMs?: number; // default 100
360
+
361
+ retryMs?: number; // default 8000
362
+ pollMs?: number; // default 20
363
+ pollMaxMs?: number; // default 250
364
+ staleMs?: number; // default 2 minutes
365
+ heartbeatMs?: number; // default 200
366
+ minWriteMs?: number; // default 800
69
367
  }): Promise<{ release: () => void }> {
368
+ installExitHookOnce();
369
+
70
370
  const { lockPath, repoRoot, sessionId, owner } = opts;
71
- const retryMs = opts.retryMs ?? 2000;
72
- const pollMs = opts.pollMs ?? 100;
73
371
 
74
- const myPid = (process as any).pid;
372
+ const retryMs = opts.retryMs ?? 8000;
373
+ const pollBaseMs = opts.pollMs ?? 20;
374
+ const pollMaxMs = opts.pollMaxMs ?? 250;
375
+
376
+ const heartbeatMs = opts.heartbeatMs ?? 200;
377
+ const minWriteMs = opts.minWriteMs ?? 800;
378
+
379
+ // Ensure stale is comfortably above minWriteMs to prevent false-stale under load.
380
+ const staleMs = Math.max(opts.staleMs ?? 2 * 60 * 1000, minWriteMs * 8);
381
+
382
+ // ✅ Fast path: reuse cached handle in the same process/session.
383
+ const key = cacheKey(lockPath, sessionId);
384
+ const cached = ACTIVE_LOCKS.get(key);
385
+ if (cached) {
386
+ cached.refCount += 1;
387
+ return {
388
+ release: () => {
389
+ cached.refCount -= 1;
390
+ if (cached.refCount <= 0) {
391
+ ACTIVE_LOCKS.delete(key);
392
+ cached.heartbeatStop();
393
+ cached.releaseOnce();
394
+ }
395
+ },
396
+ };
397
+ }
398
+
399
+ const myPid = ((process as any).pid as number);
75
400
  const startedAt = Date.now();
401
+ let pollMs = pollBaseMs;
76
402
 
77
403
  while (true) {
78
404
  const existing = readLock(lockPath);
79
405
 
80
- // No lock -> take it.
406
+ // No lock (or unreadable/invalid) -> try create.
81
407
  if (!existing) {
82
408
  const now = nowISO();
83
- writeLock(lockPath, {
409
+ const leaseId = crypto.randomUUID();
410
+
411
+ const candidate: LockFile = {
412
+ v: LOCK_VERSION,
84
413
  pid: myPid,
85
414
  created_at: now,
86
415
  updated_at: now,
87
416
  repo_root: repoRoot,
417
+ instance_id: PROCESS_INSTANCE_ID,
88
418
  session_id: sessionId,
419
+ lease_id: leaseId,
89
420
  owner,
90
- });
421
+ };
91
422
 
92
- // Verify we actually own it (race safety)
93
- const verify = readLock(lockPath);
94
- if (verify && verify.pid === myPid) {
95
- return {
96
- release: () => {
97
- const cur = readLock(lockPath);
98
- // Only the owner PID removes the lock.
99
- if (cur && cur.pid === myPid) safeUnlink(lockPath);
100
- },
423
+ const created = tryCreateRepoLockExclusive(lockPath, candidate);
424
+ if (created) {
425
+ const heartbeatStop = startHeartbeat({
426
+ lockPath,
427
+ repoRoot,
428
+ sessionId,
429
+ owner,
430
+ leaseId,
431
+ heartbeatMs,
432
+ minWriteMs,
433
+ });
434
+
435
+ const releaseOnce = () => {
436
+ const cur = readLock(lockPath);
437
+ if (!cur) return;
438
+
439
+ // ABA-safe
440
+ if (cur.lease_id !== leaseId) return;
441
+
442
+ // Strict identity: only exact process instance can delete.
443
+ if (cur.pid !== myPid) return;
444
+ if (cur.instance_id !== PROCESS_INSTANCE_ID) return;
445
+
446
+ safeUnlink(lockPath);
447
+ fsyncDirBestEffort(path.dirname(lockPath));
101
448
  };
102
- }
103
449
 
104
- // Race lost; retry.
105
- } else {
106
- // Re-entrant: SAME PID owns lock -> refresh timestamp and proceed.
107
- if (existing.pid === myPid) {
108
- const now = nowISO();
109
- writeLock(lockPath, { ...existing, updated_at: now, session_id: sessionId ?? existing.session_id, owner: owner ?? existing.owner });
450
+ const handle: CachedHandle = {
451
+ key,
452
+ lockPath,
453
+ sessionId,
454
+ leaseId,
455
+ refCount: 1,
456
+ heartbeatStop,
457
+ releaseOnce,
458
+ };
459
+ ACTIVE_LOCKS.set(key, handle);
460
+
110
461
  return {
111
462
  release: () => {
112
- const cur = readLock(lockPath);
113
- if (cur && cur.pid === myPid) safeUnlink(lockPath);
463
+ const h = ACTIVE_LOCKS.get(key);
464
+ if (!h) return;
465
+ h.refCount -= 1;
466
+ if (h.refCount <= 0) {
467
+ ACTIVE_LOCKS.delete(key);
468
+ h.heartbeatStop();
469
+ h.releaseOnce();
470
+ }
114
471
  },
115
472
  };
116
473
  }
117
474
 
118
- // Another PID: if dead -> evict stale lock
119
- if (!isPidAlive(existing.pid)) {
475
+ // Race lost; reset backoff and loop.
476
+ pollMs = pollBaseMs;
477
+ continue;
478
+ }
479
+
480
+ // Re-entrant by SAME PROCESS IDENTITY (pid+instance), or legacy lock with same PID.
481
+ if (existing.pid === myPid && (existing.instance_id === PROCESS_INSTANCE_ID || existing.owner === "legacy-lock")) {
482
+ const leaseId = crypto.randomUUID();
483
+
484
+ writeLockAtomicish(lockPath, {
485
+ ...existing,
486
+ v: LOCK_VERSION,
487
+ updated_at: nowISO(),
488
+ repo_root: repoRoot,
489
+ instance_id: PROCESS_INSTANCE_ID, // Upgrade legacy
490
+ session_id: sessionId ?? existing.session_id,
491
+ owner: owner ?? existing.owner,
492
+ lease_id: leaseId,
493
+ });
494
+
495
+ const heartbeatStop = startHeartbeat({
496
+ lockPath,
497
+ repoRoot,
498
+ sessionId: sessionId ?? existing.session_id,
499
+ owner: owner ?? existing.owner,
500
+ leaseId,
501
+ heartbeatMs,
502
+ minWriteMs,
503
+ });
504
+
505
+ const releaseOnce = () => {
506
+ const cur = readLock(lockPath);
507
+ if (!cur) return;
508
+ if (cur.lease_id !== leaseId) return;
509
+ if (cur.pid !== myPid) return;
510
+ if (cur.instance_id !== PROCESS_INSTANCE_ID) return;
120
511
  safeUnlink(lockPath);
121
- // loop back and acquire
122
- } else {
123
- // Alive and not us -> wait bounded
124
- if (Date.now() - startedAt > retryMs) {
125
- throw new Error(
126
- `Astrocode lock is already held (${lockPath}). pid=${existing.pid} (alive). ` +
127
- `Close other opencode processes or wait.`
128
- );
129
- }
130
- await sleep(pollMs);
131
- }
512
+ fsyncDirBestEffort(path.dirname(lockPath));
513
+ };
514
+
515
+ const handle: CachedHandle = {
516
+ key,
517
+ lockPath,
518
+ sessionId,
519
+ leaseId,
520
+ refCount: 1,
521
+ heartbeatStop,
522
+ releaseOnce,
523
+ };
524
+ ACTIVE_LOCKS.set(key, handle);
525
+
526
+ return {
527
+ release: () => {
528
+ const h = ACTIVE_LOCKS.get(key);
529
+ if (!h) return;
530
+ h.refCount -= 1;
531
+ if (h.refCount <= 0) {
532
+ ACTIVE_LOCKS.delete(key);
533
+ h.heartbeatStop();
534
+ h.releaseOnce();
535
+ }
536
+ },
537
+ };
132
538
  }
539
+
540
+ // 🚫 No live takeover (even same session).
541
+ // We only evict dead/stale locks.
542
+
543
+ const pidAlive = isPidAlive(existing.pid);
544
+ const staleByAge = isStaleByAge(existing, staleMs);
545
+
546
+ if (!pidAlive || staleByAge) {
547
+ safeUnlink(lockPath);
548
+ fsyncDirBestEffort(path.dirname(lockPath));
549
+ pollMs = pollBaseMs;
550
+ continue;
551
+ }
552
+
553
+ // Alive and not us -> bounded wait with exponential backoff + jitter.
554
+ if (Date.now() - startedAt > retryMs) {
555
+ const ownerBits = [
556
+ `pid=${existing.pid}`,
557
+ existing.session_id ? `session=${existing.session_id}` : null,
558
+ existing.owner ? `owner=${existing.owner}` : null,
559
+ `updated_at=${existing.updated_at}`,
560
+ sessionId && existing.session_id === sessionId ? `(same-session waiting)` : null,
561
+ ]
562
+ .filter(Boolean)
563
+ .join(" ");
564
+
565
+ throw new Error(
566
+ `Astrocode lock is already held (${lockPath}). ${ownerBits}. ` +
567
+ `Close other opencode processes or wait.`
568
+ );
569
+ }
570
+
571
+ const jitter = Math.floor(Math.random() * Math.min(12, pollMs));
572
+ await sleep(pollMs + jitter);
573
+ pollMs = Math.min(pollMaxMs, Math.floor(pollMs * 1.35));
133
574
  }
134
575
  }
135
576
 
@@ -0,0 +1,74 @@
1
+ // src/state/workflow-repo-lock.ts
2
+ import type { acquireRepoLock } from "./repo-lock";
3
+
4
+ type RepoLockAcquire = typeof acquireRepoLock;
5
+
6
+ type Held = {
7
+ release: () => void;
8
+ depth: number;
9
+ };
10
+
11
+ const HELD_BY_KEY = new Map<string, Held>();
12
+
13
+ function key(lockPath: string, sessionId?: string) {
14
+ return `${lockPath}::${sessionId ?? ""}`;
15
+ }
16
+
17
+ /**
18
+ * Acquire ONCE per workflow/session in this process.
19
+ * Nested calls reuse the same held lock (no reacquire, no churn).
20
+ */
21
+ export async function workflowRepoLock<T>(
22
+ deps: { acquireRepoLock: RepoLockAcquire },
23
+ opts: {
24
+ lockPath: string;
25
+ repoRoot: string;
26
+ sessionId?: string;
27
+ owner?: string;
28
+ fn: () => Promise<T>;
29
+ }
30
+ ): Promise<T> {
31
+ const k = key(opts.lockPath, opts.sessionId);
32
+ const existing = HELD_BY_KEY.get(k);
33
+
34
+ if (existing) {
35
+ existing.depth += 1;
36
+ try {
37
+ return await opts.fn();
38
+ } finally {
39
+ existing.depth -= 1;
40
+ if (existing.depth <= 0) {
41
+ HELD_BY_KEY.delete(k);
42
+ existing.release();
43
+ }
44
+ }
45
+ }
46
+
47
+ // IMPORTANT: this is tuned for "hold for whole workflow".
48
+ const handle = await deps.acquireRepoLock({
49
+ lockPath: opts.lockPath,
50
+ repoRoot: opts.repoRoot,
51
+ sessionId: opts.sessionId,
52
+ owner: opts.owner,
53
+
54
+ retryMs: 30_000,
55
+ staleMs: 2 * 60_000,
56
+ heartbeatMs: 200,
57
+ minWriteMs: 800,
58
+ pollMs: 20,
59
+ pollMaxMs: 250,
60
+ });
61
+
62
+ const held: Held = { release: handle.release, depth: 1 };
63
+ HELD_BY_KEY.set(k, held);
64
+
65
+ try {
66
+ return await opts.fn();
67
+ } finally {
68
+ held.depth -= 1;
69
+ if (held.depth <= 0) {
70
+ HELD_BY_KEY.delete(k);
71
+ held.release();
72
+ }
73
+ }
74
+ }
package/src/tools/init.ts CHANGED
@@ -7,7 +7,7 @@ import { ensureSchema, openSqlite, configurePragmas } from "../state/db";
7
7
  import { getAstroPaths, ensureAstroDirs } from "../shared/paths";
8
8
  import { nowISO } from "../shared/time";
9
9
  import { sha256Hex } from "../shared/hash";
10
- import { withRepoLock } from "../state/repo-lock";
10
+
11
11
 
12
12
  type RuntimeState = {
13
13
  db: SqliteDb | null;
@@ -30,15 +30,6 @@ export function createAstroInitTool(opts: { ctx: any; config: AstrocodeConfig; r
30
30
  },
31
31
  execute: async ({ ensure_spec, spec_placeholder }) => {
32
32
  const repoRoot = ctx.directory as string;
33
- const lockPath = path.join(repoRoot, ".astro", "astro.lock");
34
- const sessionId = (ctx as any).sessionID as string | undefined;
35
-
36
- return withRepoLock({
37
- lockPath,
38
- repoRoot,
39
- sessionId,
40
- owner: "astro_init",
41
- fn: async () => {
42
33
  const paths = getAstroPaths(repoRoot, config.db.path);
43
34
  ensureAstroDirs(paths);
44
35
 
@@ -116,16 +107,14 @@ export function createAstroInitTool(opts: { ctx: any; config: AstrocodeConfig; r
116
107
  ? `Next: run /astro-status. (DB recovered in-process.)`
117
108
  : `Next: restart the agent/runtime if Astrocode is still in Limited Mode, then run /astro-status.`,
118
109
  ].join("\n");
119
- } finally {
120
- // Only close if this tool opened it AND we did not publish it for ongoing use.
121
- if (!hadDbAlready && !publishedToRuntime && db && typeof db.close === "function") {
122
- try {
123
- db.close();
124
- } catch {}
125
- }
126
- }
127
- },
128
- });
110
+ } finally {
111
+ // Only close if this tool opened it AND we did not publish it for ongoing use.
112
+ if (!hadDbAlready && !publishedToRuntime && db && typeof db.close === "function") {
113
+ try {
114
+ db.close();
115
+ } catch {}
116
+ }
117
+ }
129
118
  },
130
119
  });
131
120
  }
@@ -1,12 +1,11 @@
1
1
  import { tool, type ToolDefinition } from "@opencode-ai/plugin/tool";
2
- import path from "node:path";
3
2
  import type { AstrocodeConfig } from "../config/schema";
4
3
  import type { SqliteDb } from "../state/db";
5
4
  import { withTx } from "../state/db";
6
5
  import { repairState, formatRepairReport } from "../workflow/repair";
7
6
  import { putArtifact } from "../workflow/artifacts";
8
7
  import { nowISO } from "../shared/time";
9
- import { withRepoLock } from "../state/repo-lock";
8
+
10
9
 
11
10
  export function createAstroRepairTool(opts: { ctx: any; config: AstrocodeConfig; db: SqliteDb }): ToolDefinition {
12
11
  const { ctx, config, db } = opts;
@@ -18,27 +17,16 @@ export function createAstroRepairTool(opts: { ctx: any; config: AstrocodeConfig;
18
17
  },
19
18
  execute: async ({ write_report_artifact }) => {
20
19
  const repoRoot = ctx.directory as string;
21
- const lockPath = path.join(repoRoot, ".astro", "astro.lock");
22
- const sessionId = (ctx as any).sessionID as string | undefined;
23
-
24
- return withRepoLock({
25
- lockPath,
26
- repoRoot,
27
- sessionId,
28
- owner: "astro_repair",
29
- fn: async () => {
30
- const report = withTx(db, () => repairState(db, config));
31
- const md = formatRepairReport(report);
20
+ const report = withTx(db, () => repairState(db, config));
21
+ const md = formatRepairReport(report);
32
22
 
33
- if (write_report_artifact) {
34
- const rel = `.astro/repair/repair_${nowISO().replace(/[:.]/g, "-")}.md`;
35
- const a = putArtifact({ repoRoot, db, run_id: null, stage_key: null, type: "log", rel_path: rel, content: md, meta: { kind: "repair" } });
36
- return md + `\n\nReport saved: ${rel} (artifact=${a.artifact_id})`;
37
- }
23
+ if (write_report_artifact) {
24
+ const rel = `.astro/repair/repair_${nowISO().replace(/[:.]/g, "-")}.md`;
25
+ const a = putArtifact({ repoRoot, db, run_id: null, stage_key: null, type: "log", rel_path: rel, content: md, meta: { kind: "repair" } });
26
+ return md + `\n\nReport saved: ${rel} (artifact=${a.artifact_id})`;
27
+ }
38
28
 
39
- return md;
40
- },
41
- });
29
+ return md;
42
30
  },
43
31
  });
44
32
  }
@@ -13,7 +13,7 @@ import { getAstroPaths, ensureAstroDirs, toPosix } from "../shared/paths";
13
13
  import { failRun, getActiveRun, getStageRuns, startStage, completeRun } from "../workflow/state-machine";
14
14
  import { newEventId, newId } from "../state/ids";
15
15
  import { insertStory } from "../workflow/story-helpers";
16
- import { withRepoLock } from "../state/repo-lock";
16
+
17
17
 
18
18
  function nextStageKey(pipeline: StageKey[], current: StageKey): StageKey | null {
19
19
  const i = pipeline.indexOf(current);
@@ -129,15 +129,6 @@ export function createAstroStageCompleteTool(opts: { ctx: any; config: Astrocode
129
129
  },
130
130
  execute: async ({ run_id, stage_key, output_text, allow_new_stories, relation_reason }) => {
131
131
  const repoRoot = ctx.directory as string;
132
- const lockPath = path.join(repoRoot, ".astro", "astro.lock");
133
- const sessionId = (ctx as any).sessionID as string | undefined;
134
-
135
- return withRepoLock({
136
- lockPath,
137
- repoRoot,
138
- sessionId,
139
- owner: "astro_stage_complete",
140
- fn: async () => {
141
132
  const paths = getAstroPaths(repoRoot, config.db.path);
142
133
  ensureAstroDirs(paths);
143
134
 
@@ -401,8 +392,6 @@ Ensure JSON has required fields (stage_key, status) and valid syntax.`;
401
392
  lines.push(context);
402
393
 
403
394
  return lines.join("\n").trim();
404
- },
405
- });
406
395
  },
407
396
  });
408
397
  }
@@ -1,9 +1,8 @@
1
1
  import { tool, type ToolDefinition } from "@opencode-ai/plugin/tool";
2
- import path from "node:path";
3
2
  import type { AstrocodeConfig } from "../config/schema";
4
3
  import type { SqliteDb } from "../state/db";
5
4
  import { decideNextAction, getActiveRun, getStageRuns, getStory } from "../workflow/state-machine";
6
- import { withRepoLock } from "../state/repo-lock";
5
+
7
6
 
8
7
  function statusIcon(status: string): string {
9
8
  switch (status) {
@@ -57,17 +56,7 @@ export function createAstroStatusTool(opts: { ctx: any; config: AstrocodeConfig;
57
56
  ].join("\n");
58
57
  }
59
58
 
60
- const repoRoot = ctx.directory as string;
61
- const lockPath = path.join(repoRoot, ".astro", "astro.lock");
62
- const sessionId = (ctx as any).sessionID as string | undefined;
63
-
64
- return withRepoLock({
65
- lockPath,
66
- repoRoot,
67
- sessionId,
68
- owner: "astro_status",
69
- fn: async () => {
70
- try {
59
+ try {
71
60
  const active = getActiveRun(db);
72
61
 
73
62
  const lines: string[] = [];
@@ -141,9 +130,7 @@ export function createAstroStatusTool(opts: { ctx: any; config: AstrocodeConfig;
141
130
  `⛔ Database error.`,
142
131
  `Error: ${msg}`,
143
132
  ].join("\n");
144
- }
145
- },
146
- });
133
+ }
147
134
  },
148
135
  });
149
136
  }
@@ -1,5 +1,4 @@
1
1
  import { tool, type ToolDefinition } from "@opencode-ai/plugin/tool";
2
- import path from "node:path";
3
2
  import type { AstrocodeConfig } from "../config/schema";
4
3
  import type { SqliteDb } from "../state/db";
5
4
  import { withTx } from "../state/db";
@@ -7,11 +6,11 @@ import { nowISO } from "../shared/time";
7
6
  import type { StoryState } from "../state/types";
8
7
 
9
8
  import { insertStory } from "../workflow/story-helpers";
10
- import { withRepoLock } from "../state/repo-lock";
9
+
11
10
 
12
11
 
13
12
  export function createAstroStoryQueueTool(opts: { ctx: any; config: AstrocodeConfig; db: SqliteDb }): ToolDefinition {
14
- const { ctx, db } = opts;
13
+ const { db } = opts;
15
14
 
16
15
  return tool({
17
16
  description: "Create a queued story (ticket) in Astrocode. Returns story_key.",
@@ -22,30 +21,18 @@ export function createAstroStoryQueueTool(opts: { ctx: any; config: AstrocodeCon
22
21
  priority: tool.schema.number().int().default(0),
23
22
  },
24
23
  execute: async ({ title, body_md, epic_key, priority }) => {
25
- const repoRoot = ctx.directory as string;
26
- const lockPath = path.join(repoRoot, ".astro", "astro.lock");
27
- const sessionId = (ctx as any).sessionID as string | undefined;
28
-
29
- return withRepoLock({
30
- lockPath,
31
- repoRoot,
32
- sessionId,
33
- owner: "astro_story_queue",
34
- fn: async () => {
35
- const story_key = withTx(db, () => {
36
- const key = insertStory(db, { title, body_md, epic_key: epic_key ?? null, priority: priority ?? 0, state: 'queued' });
37
- return key;
38
- });
39
-
40
- return `✅ Queued story ${story_key}: ${title}`;
41
- },
24
+ const story_key = withTx(db, () => {
25
+ const key = insertStory(db, { title, body_md, epic_key: epic_key ?? null, priority: priority ?? 0, state: 'queued' });
26
+ return key;
42
27
  });
28
+
29
+ return `✅ Queued story ${story_key}: ${title}`;
43
30
  },
44
31
  });
45
32
  }
46
33
 
47
34
  export function createAstroStoryApproveTool(opts: { ctx: any; config: AstrocodeConfig; db: SqliteDb }): ToolDefinition {
48
- const { ctx, db } = opts;
35
+ const { db } = opts;
49
36
 
50
37
  return tool({
51
38
  description: "Approve a story so it becomes eligible to run.",
@@ -53,26 +40,14 @@ export function createAstroStoryApproveTool(opts: { ctx: any; config: AstrocodeC
53
40
  story_key: tool.schema.string().min(1),
54
41
  },
55
42
  execute: async ({ story_key }) => {
56
- const repoRoot = ctx.directory as string;
57
- const lockPath = path.join(repoRoot, ".astro", "astro.lock");
58
- const sessionId = (ctx as any).sessionID as string | undefined;
59
-
60
- return withRepoLock({
61
- lockPath,
62
- repoRoot,
63
- sessionId,
64
- owner: "astro_story_approve",
65
- fn: async () => {
66
- const now = nowISO();
67
- const row = db.prepare("SELECT story_key, state, title FROM stories WHERE story_key=?").get(story_key) as any;
68
- if (!row) throw new Error(`Story not found: ${story_key}`);
69
-
70
- if (row.state === "approved") return `ℹ️ Story ${story_key} already approved.`;
71
-
72
- db.prepare("UPDATE stories SET state='approved', approved_at=?, updated_at=? WHERE story_key=?").run(now, now, story_key);
73
- return `✅ Approved story ${story_key}: ${row.title}`;
74
- },
75
- });
43
+ const now = nowISO();
44
+ const row = db.prepare("SELECT story_key, state, title FROM stories WHERE story_key=?").get(story_key) as any;
45
+ if (!row) throw new Error(`Story not found: ${story_key}`);
46
+
47
+ if (row.state === "approved") return `ℹ️ Story ${story_key} already approved.`;
48
+
49
+ db.prepare("UPDATE stories SET state='approved', approved_at=?, updated_at=? WHERE story_key=?").run(now, now, story_key);
50
+ return `✅ Approved story ${story_key}: ${row.title}`;
76
51
  },
77
52
  });
78
53
  }
@@ -20,11 +20,12 @@ import { buildStageDirective, directiveHash } from "../workflow/directives";
20
20
  import { injectChatPrompt } from "../ui/inject";
21
21
  import { nowISO } from "../shared/time";
22
22
  import { newEventId } from "../state/ids";
23
- import { withRepoLock } from "../state/repo-lock";
23
+
24
24
  import { debug } from "../shared/log";
25
25
  import { createToastManager } from "../ui/toasts";
26
26
  import type { AgentConfig } from "@opencode-ai/sdk";
27
27
  import { acquireRepoLock } from "../state/repo-lock";
28
+ import { workflowRepoLock } from "../state/workflow-repo-lock";
28
29
 
29
30
  // Agent name mapping for case-sensitive resolution
30
31
  export const STAGE_TO_AGENT_MAP: Record<string, string> = {
@@ -191,12 +192,14 @@ export function createAstroWorkflowProceedTool(opts: { ctx: any; config: Astroco
191
192
  const lockPath = path.join(repoRoot, ".astro", "astro.lock");
192
193
  const sessionId = (ctx as any).sessionID as string | undefined;
193
194
 
194
- return withRepoLock({
195
- lockPath,
196
- repoRoot,
197
- sessionId,
198
- owner: "astro_workflow_proceed",
199
- fn: async () => {
195
+ return workflowRepoLock(
196
+ { acquireRepoLock },
197
+ {
198
+ lockPath,
199
+ repoRoot,
200
+ sessionId,
201
+ owner: "astro_workflow_proceed",
202
+ fn: async () => {
200
203
  const steps = Math.min(max_steps, config.workflow.loop_max_steps_hard_cap);
201
204
 
202
205
  const actions: string[] = [];
@@ -420,8 +423,8 @@ export function createAstroWorkflowProceedTool(opts: { ctx: any; config: Astroco
420
423
  }
421
424
 
422
425
  return lines.join("\n").trim();
423
- },
424
- });
426
+ },
427
+ });
425
428
  },
426
429
  });
427
430
  }