@tokenfactory/acc-runner 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,11 +16,13 @@
16
16
  import os from "node:os";
17
17
  import path from "node:path";
18
18
  import { execa } from "execa";
19
- import { normalizeUsage, parseClaudeJson, priceUsdCents, } from "./cost-pricing.js";
19
+ import { normalizeUsage, parseClaudeJson, priceUsdCents, toCliAlias, } from "./cost-pricing.js";
20
20
  import { git as defaultGit } from "./git.js";
21
21
  import { gh as defaultGh } from "./gh.js";
22
22
  import { writeMcpConfig as defaultWriteMcpConfig, } from "./mcp-spawn.js";
23
23
  import { branchForTask, prTitleForTask, renderTaskPrompt, } from "./prompt.js";
24
+ import { acquireTaskLock as defaultAcquireTaskLock, TaskLockHeldError, } from "./runtime/locks.js";
25
+ import { prepareTaskWorktree as defaultPrepareTaskWorktree, } from "./runtime/worktree.js";
24
26
  const LOG_BATCH_BYTES = 4 * 1024;
25
27
  /**
26
28
  * v0.6.0 REG-296: build the argv for `claude --print`. Splitting this
@@ -158,204 +160,407 @@ export function runTask(taskId, deps) {
158
160
  const gh = deps.gh ?? defaultGh;
159
161
  const spawnClaude = deps.spawnClaude ?? defaultSpawnClaude;
160
162
  const checkoutBase = deps.checkoutBase ?? defaultCheckoutBase;
163
+ const acquireLock = deps.acquireLock ?? defaultAcquireTaskLock;
164
+ const prepareWorktree = deps.prepareWorktree ?? defaultPrepareTaskWorktree;
161
165
  const postCostEvent = deps.postCostEvent ?? ((event) => defaultPostCostEvent(deps.supabase, event));
166
+ // Silence the unused-binding lint for `checkoutBase` — v0.11-F supersedes
167
+ // the v0.6.0 REG-301 pre-spawn `git checkout <baseBranch>` with the
168
+ // worktree's `add -B <branch> <path> <baseBranch>` start-point semantics,
169
+ // but the dep is still accepted for back-compat with test fixtures that
170
+ // inject a mock. Drop in v0.7.
171
+ void checkoutBase;
162
172
  let child = null;
163
173
  let cancelled = false;
164
- const promise = (async () => {
165
- // 1. running (no-op if already running due to a duplicate broadcast).
166
- const transition = await deps.supabase.rpc("transition_task", {
174
+ // v0.6.1 (v0.11-D): release lock rows on every terminal exit path
175
+ // below the claim. Best-effort a release failure logs to stderr
176
+ // but does not change the outcome the runner reports to the caller.
177
+ // Calling release without first calling claim is a no-op at the
178
+ // RPC level, so wiring this into both pre-claim and post-claim
179
+ // returns would be safe; we only call it from post-claim returns
180
+ // to keep the code path obvious to a reader.
181
+ const releaseLocks = async () => {
182
+ const { error } = await deps.supabase.rpc("release_task_locks", {
167
183
  p_task_id: taskId,
168
- p_new_status: "running",
169
184
  });
170
- if (transition.error) {
171
- const msg = transition.error.message;
172
- // 22023 = invalid_task_transition. The task may already be past
173
- // 'running' (e.g. needs-review) — log and bail rather than crash.
174
- await appendEvent(deps.supabase, taskId, "error", {
175
- phase: "transition_to_running",
176
- error: msg,
177
- });
178
- return { taskId, status: "failed", phase: "transition_to_running", error: msg };
185
+ if (error) {
186
+ process.stderr.write(`[acc-runner] release_task_locks(${taskId}) failed: ${error.message}\n`);
179
187
  }
180
- // 2. Fetch task + adjacent rows.
181
- const fetched = await deps.supabase.rpc("fetch_task_for_runner", {
188
+ };
189
+ // v0.12-RESUME periodic signal loop. After the claim succeeds we
190
+ // bump acc.tasks.last_runner_signal_at every signalIntervalMs ms so
191
+ // the v0.12 /5m sweep distinguishes "runner alive, work in flight"
192
+ // from "runner crashed, task stuck at running". Self-rescheduling
193
+ // setTimeout (not setInterval) so a slow RPC doesn't queue up
194
+ // overlapping firings; the loop stops as soon as `signalStopped`
195
+ // flips in the outer try/finally below.
196
+ const DEFAULT_SIGNAL_MS = 30_000;
197
+ const signalIntervalMs = deps.signalIntervalMs ?? DEFAULT_SIGNAL_MS;
198
+ let signalTimer = null;
199
+ let signalStopped = false;
200
+ const updateSignal = async () => {
201
+ const { error } = await deps.supabase.rpc("update_task_signal", {
182
202
  p_task_id: taskId,
183
203
  });
184
- if (fetched.error || !fetched.data) {
185
- const msg = fetched.error?.message ?? "fetch_task_for_runner returned no data";
186
- await appendEvent(deps.supabase, taskId, "error", { phase: "fetch", error: msg });
187
- await deps.supabase.rpc("transition_task", {
188
- p_task_id: taskId,
189
- p_new_status: "failed",
190
- });
191
- return { taskId, status: "failed", phase: "fetch", error: msg };
192
- }
193
- const result = fetched.data;
194
- const { task } = result;
195
- const branch = branchForTask(task.id, task.title, task.branch);
196
- // REG-295: per-task repo overrides — task row trumps config so one
197
- // runner can service tasks across multiple repos. Env-backed config
198
- // remains the fallback for tasks that don't carry a hint yet.
199
- const repoPath = task.repo_path_hint?.trim()
200
- ? expandHomePath(task.repo_path_hint.trim())
201
- : deps.cfg.repoPath;
202
- const targetRepo = task.repo?.trim() || deps.cfg.targetRepo;
203
- // REG-301: branch base comes from task → cfg → main. cfg.integrationBranch
204
- // already defaults to "acc/integration" so the third fallback only
205
- // matters when an operator zeroed it out via env.
206
- const integrationBranch = task.integration_branch?.trim() || deps.cfg.integrationBranch || "main";
207
- const prompt = renderTaskPrompt({
208
- task,
209
- agent: result.agent,
210
- model: result.model,
211
- integrationBranch,
212
- targetRepo,
213
- });
214
- // 3. Repo prep. v0.6.0 forks the task branch from the integration
215
- // base (REG-301): fetch, switch to the base branch, then create the
216
- // task branch from there. Prior versions ran `git checkout -B`
217
- // directly from cwd HEAD, which in the dogfood loop produced a
218
- // 321-commit PR.
219
- try {
220
- await git.fetch(repoPath);
221
- await checkoutBase(repoPath, integrationBranch);
222
- await git.checkout(repoPath, branch);
223
- }
224
- catch (err) {
225
- const msg = err.message;
226
- await appendEvent(deps.supabase, taskId, "error", {
227
- phase: "git",
228
- error: msg,
229
- base: integrationBranch,
230
- branch,
231
- repo_path: repoPath,
232
- });
233
- await deps.supabase.rpc("transition_task", {
234
- p_task_id: taskId,
235
- p_new_status: "failed",
236
- });
237
- return { taskId, status: "failed", phase: "git", error: msg };
238
- }
239
- // 4. Spawn Claude.
240
- if (cancelled) {
241
- return { taskId, status: "cancelled" };
242
- }
243
- // 4a. Provision .mcp.json so Claude Code auto-discovers acc-mcp-server.
244
- // Best-effort: a failed write must not block the task. The MCP server
245
- // is a context source, not a critical dependency for v0.5-C1.
246
- let mcpCleanup = null;
247
- if (deps.session) {
248
- const writer = deps.writeMcpConfig ?? defaultWriteMcpConfig;
249
- try {
250
- mcpCleanup = await writer({
251
- cwd: repoPath,
252
- taskId,
253
- runnerId: deps.session.runnerId,
254
- accessToken: deps.session.accessToken,
255
- publicUrl: deps.publicUrl ?? deps.cfg.publicUrl,
256
- supabaseUrl: deps.cfg.supabaseUrl,
257
- supabaseAnonKey: deps.cfg.supabaseAnonKey,
258
- });
259
- }
260
- catch (err) {
261
- // Don't echo the token even on failure.
262
- process.stderr.write(`[acc-runner] mcp .mcp.json write failed: ${err.message}\n`);
263
- }
264
- }
265
- child = spawnClaude(repoPath, result.model?.id);
266
- if (child.stdin) {
267
- child.stdin.write(prompt);
268
- child.stdin.end();
269
- }
270
- const stdoutPromise = child.stdout
271
- ? streamToEvents(child.stdout, deps.supabase, taskId, "stdout")
272
- : Promise.resolve("");
273
- const stderrPromise = child.stderr
274
- ? streamToEvents(child.stderr, deps.supabase, taskId, "stderr")
275
- : Promise.resolve("");
276
- const [outcome, capturedStdout, capturedStderr] = await Promise.all([
277
- child,
278
- stdoutPromise,
279
- stderrPromise,
280
- ]);
281
- // Restore .mcp.json as soon as Claude exits — its MCP subprocess
282
- // tree comes down with it, so leaving our token-bearing config on
283
- // disk a moment longer is pure exposure surface.
284
- if (mcpCleanup) {
285
- try {
286
- await mcpCleanup.restore();
287
- }
288
- catch (err) {
289
- process.stderr.write(`[acc-runner] mcp .mcp.json restore failed: ${err.message}\n`);
290
- }
204
+ if (error) {
205
+ // Best-effort: a missed signal just means the sweep might pull
206
+ // the task back if enough of them stack up. Log and continue.
207
+ process.stderr.write(`[acc-runner] update_task_signal(${taskId}) failed: ${error.message}\n`);
291
208
  }
292
- // Post the cost event before any later branch returns so cap tracking
293
- // captures success, cancellation, and non-zero exit alike. Best-effort:
294
- // a failed POST logs to stderr but never bubbles past the runner.
295
- {
296
- const event = buildCostEvent(taskId, capturedStdout, result.model?.id, result.runner?.id);
209
+ };
210
+ const scheduleSignal = () => {
211
+ if (signalStopped)
212
+ return;
213
+ signalTimer = setTimeout(async () => {
214
+ if (signalStopped)
215
+ return;
297
216
  try {
298
- await postCostEvent(event);
217
+ await updateSignal();
299
218
  }
300
- catch (err) {
301
- process.stderr.write(`[acc-runner] cost-event post failed: ${err.message}\n`);
302
- }
303
- }
304
- if (cancelled) {
305
- await appendEvent(deps.supabase, taskId, "cancelled", {
306
- exit_code: outcome.exitCode,
307
- });
308
- return { taskId, status: "cancelled", exitCode: outcome.exitCode };
219
+ catch { /* logged inside */ }
220
+ scheduleSignal();
221
+ }, signalIntervalMs);
222
+ // Detach so an in-flight signal timer doesn't keep the process
223
+ // alive past `watch.ts` shutdown. The outer finally clears it
224
+ // anyway; this is belt-and-suspenders for stray timers.
225
+ if (signalTimer.unref)
226
+ signalTimer.unref();
227
+ };
228
+ const stopSignalLoop = () => {
229
+ signalStopped = true;
230
+ if (signalTimer) {
231
+ clearTimeout(signalTimer);
232
+ signalTimer = null;
309
233
  }
310
- if (outcome.exitCode !== 0) {
234
+ };
235
+ const promise = (async () => {
236
+ // 1. Atomic claim + transition to running. v0.11-D: replaces the
237
+ // pre-v0.6.1 raw transition_task('running') call. The RPC
238
+ // row-locks acc.tasks FOR UPDATE, checks file-path overlap
239
+ // against other running tasks' locks, INSERTs a lock row +
240
+ // transitions to running in one transaction. On overlap or
241
+ // same-task race the task stays queued for another runner.
242
+ const claim = await deps.supabase.rpc("claim_task_with_locks", {
243
+ p_task_id: taskId,
244
+ p_runner_id: deps.session.runnerId,
245
+ });
246
+ if (claim.error) {
247
+ const msg = claim.error.message;
248
+ // Includes 22023 (invalid_task_transition surfaced through the
249
+ // nested acc.transition_task call) — task may already be past
250
+ // 'running' (e.g. needs-review). Log and bail rather than crash.
311
251
  await appendEvent(deps.supabase, taskId, "error", {
312
- phase: "claude_exit",
313
- exit_code: outcome.exitCode,
314
- stderr_tail: capturedStderr.slice(-2000),
252
+ phase: "claim_locks",
253
+ error: msg,
315
254
  });
316
- await deps.supabase.rpc("transition_task", {
317
- p_task_id: taskId,
318
- p_new_status: "failed",
255
+ return { taskId, status: "failed", phase: "claim_locks", error: msg };
256
+ }
257
+ const claimResult = (claim.data ?? {});
258
+ if (claimResult.ok !== true) {
259
+ const conflicts = Array.isArray(claimResult.conflicts) ? claimResult.conflicts : [];
260
+ await appendEvent(deps.supabase, taskId, "log", {
261
+ phase: "claim_locks",
262
+ stream: "stderr",
263
+ conflicts,
264
+ message: `file-lock conflict — other tasks hold overlapping paths: ${conflicts.join(", ")}`,
265
+ runner_id: deps.session.runnerId,
319
266
  });
267
+ const reason = `file-lock conflict with: ${conflicts.join(", ") || "(unknown)"}`;
320
268
  return {
321
269
  taskId,
322
270
  status: "failed",
323
- phase: "claude_exit",
324
- exitCode: outcome.exitCode,
325
- error: capturedStderr.slice(-200).trim() || `claude exited ${outcome.exitCode}`,
271
+ phase: "claim_locks",
272
+ error: reason,
326
273
  };
327
274
  }
328
- // 5. Push + open PR.
275
+ // v0.6.1 (v0.11-D): every exit path below the successful claim
276
+ // must release the lock row so the same paths free up for the
277
+ // next runner. try/finally captures returns AND uncaught throws
278
+ // alike — strictly stronger than explicit pre-return release at
279
+ // each of the seven completion sites, and the runner CLI never
280
+ // recovers from a thrown error inside runTask, so the lock
281
+ // would otherwise leak until a future sweep job clears it.
329
282
  try {
330
- await git.push(repoPath, branch);
331
- }
332
- catch (err) {
333
- const msg = err.message;
334
- await appendEvent(deps.supabase, taskId, "error", { phase: "push", error: msg });
335
- await deps.supabase.rpc("transition_task", {
283
+ // v0.12-RESUME: prime the signal column immediately on claim so
284
+ // the sweep window resets from "now" and the first periodic tick
285
+ // (after signalIntervalMs) refreshes it. Without this prime, a
286
+ // task whose run takes < signalIntervalMs from claim to first
287
+ // tick could race the sweep on borderline updated_at values.
288
+ // Placed inside the outer try so an unexpected throw from the
289
+ // RPC still runs the finally (stop loop, release locks).
290
+ await updateSignal();
291
+ scheduleSignal();
292
+ // 2. Fetch task + adjacent rows.
293
+ const fetched = await deps.supabase.rpc("fetch_task_for_runner", {
336
294
  p_task_id: taskId,
337
- p_new_status: "failed",
338
295
  });
339
- return { taskId, status: "failed", phase: "push", error: msg };
340
- }
341
- let prUrl = "";
342
- try {
343
- const body = extractReportFromOutput(capturedStdout);
344
- const pr = await gh.openPR(repoPath, {
345
- title: prTitleForTask(task.id, task.title),
346
- body,
347
- base: integrationBranch,
296
+ if (fetched.error || !fetched.data) {
297
+ const msg = fetched.error?.message ?? "fetch_task_for_runner returned no data";
298
+ await appendEvent(deps.supabase, taskId, "error", { phase: "fetch", error: msg });
299
+ await deps.supabase.rpc("transition_task", {
300
+ p_task_id: taskId,
301
+ p_new_status: "failed",
302
+ });
303
+ return { taskId, status: "failed", phase: "fetch", error: msg };
304
+ }
305
+ const result = fetched.data;
306
+ const { task } = result;
307
+ const branch = branchForTask(task.id, task.title, task.branch);
308
+ // REG-295: per-task repo overrides — task row trumps config so one
309
+ // runner can service tasks across multiple repos. Env-backed config
310
+ // remains the fallback for tasks that don't carry a hint yet.
311
+ const repoPath = task.repo_path_hint?.trim()
312
+ ? expandHomePath(task.repo_path_hint.trim())
313
+ : deps.cfg.repoPath;
314
+ const targetRepo = task.repo?.trim() || deps.cfg.targetRepo;
315
+ // REG-301: branch base comes from task → cfg → main. cfg.integrationBranch
316
+ // already defaults to "acc/integration" so the third fallback only
317
+ // matters when an operator zeroed it out via env.
318
+ const integrationBranch = task.integration_branch?.trim() || deps.cfg.integrationBranch || "main";
319
+ const prompt = renderTaskPrompt({
320
+ task,
321
+ agent: result.agent,
322
+ model: result.model,
323
+ integrationBranch,
324
+ targetRepo,
348
325
  });
349
- prUrl = pr.url;
350
- await appendEvent(deps.supabase, taskId, "pr-opened", { url: prUrl });
326
+ // v0.11-F: acquire the per-task PID lock before any worktree
327
+ // side-effect. Same-machine parallel runners that picked up the same
328
+ // task_id (e.g. two `acc-runner watch` processes seeing the same
329
+ // broadcast) race here; the second one bails on TaskLockHeldError.
330
+ let lock;
331
+ try {
332
+ lock = await acquireLock(taskId);
333
+ }
334
+ catch (err) {
335
+ if (err instanceof TaskLockHeldError) {
336
+ await appendEvent(deps.supabase, taskId, "error", {
337
+ phase: "worktree_lock",
338
+ error: err.message,
339
+ held_by_pid: err.heldByPid,
340
+ });
341
+ await deps.supabase.rpc("transition_task", {
342
+ p_task_id: taskId,
343
+ p_new_status: "failed",
344
+ });
345
+ return { taskId, status: "failed", phase: "worktree_lock", error: err.message };
346
+ }
347
+ throw err;
348
+ }
349
+ // Worktree gets assigned inside the git-prep block; cleanup in the
350
+ // outer finally handles both the success path and every early
351
+ // return that follows.
352
+ let worktree = null;
353
+ let workdir = repoPath;
354
+ try {
355
+ // 3. Repo prep. v0.11-F: fetch on the shared clone (worktree
356
+ // shares the object store) then provision an isolated worktree
357
+ // at ~/.cache/acc-runner/work/<task_id>/ forked from the
358
+ // integration branch. The redundant `git.checkout -B <branch>`
359
+ // is a no-op inside the new worktree — kept so the v0.6.0
360
+ // checkout seam stays observable in unit tests.
361
+ try {
362
+ await git.fetch(repoPath);
363
+ worktree = await prepareWorktree({
364
+ repoPath,
365
+ taskId,
366
+ branch,
367
+ baseBranch: integrationBranch,
368
+ });
369
+ workdir = worktree.path;
370
+ // v0.12-RESUME: log resume-vs-fresh so an operator can audit
371
+ // how often the resume path actually fires. `resumed: true`
372
+ // means a prior runner crashed mid-task, the v0.12 sweep
373
+ // returned the task to queued, and this runner picked it
374
+ // back up with the prior worktree intact. Claude reads the
375
+ // partially-committed state and continues; the spawn is the
376
+ // same prompt either way (Claude is idempotent enough that
377
+ // re-running on a partially-edited worktree converges on
378
+ // the right final state).
379
+ if (worktree.resumed) {
380
+ await appendEvent(deps.supabase, taskId, "log", {
381
+ phase: "git",
382
+ stream: "stdout",
383
+ event: "worktree.resumed",
384
+ worktree_path: workdir,
385
+ branch,
386
+ runner_id: deps.session.runnerId,
387
+ });
388
+ }
389
+ await git.checkout(workdir, branch);
390
+ }
391
+ catch (err) {
392
+ const msg = err.message;
393
+ await appendEvent(deps.supabase, taskId, "error", {
394
+ phase: "git",
395
+ error: msg,
396
+ base: integrationBranch,
397
+ branch,
398
+ repo_path: repoPath,
399
+ worktree_path: worktree?.path ?? null,
400
+ });
401
+ await deps.supabase.rpc("transition_task", {
402
+ p_task_id: taskId,
403
+ p_new_status: "failed",
404
+ });
405
+ return { taskId, status: "failed", phase: "git", error: msg };
406
+ }
407
+ // 4. Spawn Claude.
408
+ if (cancelled) {
409
+ return { taskId, status: "cancelled" };
410
+ }
411
+ // 4a. Provision .mcp.json so Claude Code auto-discovers acc-mcp-server.
412
+ // Best-effort: a failed write must not block the task. The MCP server
413
+ // is a context source, not a critical dependency for v0.5-C1.
414
+ let mcpCleanup = null;
415
+ if (deps.session) {
416
+ const writer = deps.writeMcpConfig ?? defaultWriteMcpConfig;
417
+ try {
418
+ mcpCleanup = await writer({
419
+ cwd: workdir,
420
+ taskId,
421
+ runnerId: deps.session.runnerId,
422
+ accessToken: deps.session.accessToken,
423
+ publicUrl: deps.publicUrl ?? deps.cfg.publicUrl,
424
+ supabaseUrl: deps.cfg.supabaseUrl,
425
+ supabaseAnonKey: deps.cfg.supabaseAnonKey,
426
+ });
427
+ }
428
+ catch (err) {
429
+ // Don't echo the token even on failure.
430
+ process.stderr.write(`[acc-runner] mcp .mcp.json write failed: ${err.message}\n`);
431
+ }
432
+ }
433
+ // v0.12-MODEL-ALIAS (REG-303/304): translate the ACC model alias
434
+ // (`claude-sonnet-4`) into the wire form `claude --model` actually
435
+ // accepts (`sonnet` or `claude-sonnet-4-6`). Unknown ids pass
436
+ // through verbatim so a future model not yet in the embedded
437
+ // table still spawns.
438
+ child = spawnClaude(workdir, toCliAlias(result.model?.id));
439
+ if (child.stdin) {
440
+ child.stdin.write(prompt);
441
+ child.stdin.end();
442
+ }
443
+ const stdoutPromise = child.stdout
444
+ ? streamToEvents(child.stdout, deps.supabase, taskId, "stdout")
445
+ : Promise.resolve("");
446
+ const stderrPromise = child.stderr
447
+ ? streamToEvents(child.stderr, deps.supabase, taskId, "stderr")
448
+ : Promise.resolve("");
449
+ const [outcome, capturedStdout, capturedStderr] = await Promise.all([
450
+ child,
451
+ stdoutPromise,
452
+ stderrPromise,
453
+ ]);
454
+ // Restore .mcp.json as soon as Claude exits — its MCP subprocess
455
+ // tree comes down with it, so leaving our token-bearing config on
456
+ // disk a moment longer is pure exposure surface.
457
+ if (mcpCleanup) {
458
+ try {
459
+ await mcpCleanup.restore();
460
+ }
461
+ catch (err) {
462
+ process.stderr.write(`[acc-runner] mcp .mcp.json restore failed: ${err.message}\n`);
463
+ }
464
+ }
465
+ // Post the cost event before any later branch returns so cap tracking
466
+ // captures success, cancellation, and non-zero exit alike. Best-effort:
467
+ // a failed POST logs to stderr but never bubbles past the runner.
468
+ {
469
+ const event = buildCostEvent(taskId, capturedStdout, result.model?.id, result.runner?.id);
470
+ try {
471
+ await postCostEvent(event);
472
+ }
473
+ catch (err) {
474
+ process.stderr.write(`[acc-runner] cost-event post failed: ${err.message}\n`);
475
+ }
476
+ }
477
+ if (cancelled) {
478
+ await appendEvent(deps.supabase, taskId, "cancelled", {
479
+ exit_code: outcome.exitCode,
480
+ });
481
+ return { taskId, status: "cancelled", exitCode: outcome.exitCode };
482
+ }
483
+ if (outcome.exitCode !== 0) {
484
+ await appendEvent(deps.supabase, taskId, "error", {
485
+ phase: "claude_exit",
486
+ exit_code: outcome.exitCode,
487
+ stderr_tail: capturedStderr.slice(-2000),
488
+ });
489
+ await deps.supabase.rpc("transition_task", {
490
+ p_task_id: taskId,
491
+ p_new_status: "failed",
492
+ });
493
+ return {
494
+ taskId,
495
+ status: "failed",
496
+ phase: "claude_exit",
497
+ exitCode: outcome.exitCode,
498
+ error: capturedStderr.slice(-200).trim() || `claude exited ${outcome.exitCode}`,
499
+ };
500
+ }
501
+ // 5. Push + open PR. Both run from the worktree so the operator's
502
+ // shared clone never has the task branch checked out.
503
+ try {
504
+ await git.push(workdir, branch);
505
+ }
506
+ catch (err) {
507
+ const msg = err.message;
508
+ await appendEvent(deps.supabase, taskId, "error", { phase: "push", error: msg });
509
+ await deps.supabase.rpc("transition_task", {
510
+ p_task_id: taskId,
511
+ p_new_status: "failed",
512
+ });
513
+ return { taskId, status: "failed", phase: "push", error: msg };
514
+ }
515
+ let prUrl = "";
516
+ try {
517
+ const body = extractReportFromOutput(capturedStdout);
518
+ const pr = await gh.openPR(workdir, {
519
+ title: prTitleForTask(task.id, task.title),
520
+ body,
521
+ base: integrationBranch,
522
+ });
523
+ prUrl = pr.url;
524
+ await appendEvent(deps.supabase, taskId, "pr-opened", { url: prUrl });
525
+ }
526
+ catch (err) {
527
+ const msg = err.message;
528
+ await appendEvent(deps.supabase, taskId, "error", { phase: "pr_open", error: msg });
529
+ // Don't transition to failed — the push succeeded, the user can
530
+ // open a PR manually. Webhook will pick it up.
531
+ }
532
+ return { taskId, status: "ok", prUrl, exitCode: 0 };
533
+ }
534
+ finally {
535
+ // v0.11-F: tear down the per-task worktree and release the PID
536
+ // lock on every completion path (success, failure, cancellation,
537
+ // unexpected throw). Both legs are best-effort — leaking either
538
+ // resource is preferable to masking the original return value.
539
+ if (worktree) {
540
+ try {
541
+ await worktree.cleanup();
542
+ }
543
+ catch (err) {
544
+ process.stderr.write(`[acc-runner] worktree cleanup failed: ${err.message}\n`);
545
+ }
546
+ }
547
+ try {
548
+ await lock.release();
549
+ }
550
+ catch (err) {
551
+ process.stderr.write(`[acc-runner] lock release failed: ${err.message}\n`);
552
+ }
553
+ }
351
554
  }
352
- catch (err) {
353
- const msg = err.message;
354
- await appendEvent(deps.supabase, taskId, "error", { phase: "pr_open", error: msg });
355
- // Don't transition to failed the push succeeded, the user can
356
- // open a PR manually. Webhook will pick it up.
555
+ finally {
556
+ // v0.12-RESUME: stop the periodic signal loop before releasing
557
+ // locks so a late-firing signal can't bump the column after the
558
+ // task transitions to a terminal status (the RPC is no-op on
559
+ // non-running rows anyway, but stopping early avoids the extra
560
+ // RPC round-trip).
561
+ stopSignalLoop();
562
+ await releaseLocks();
357
563
  }
358
- return { taskId, status: "ok", prUrl, exitCode: 0 };
359
564
  })();
360
565
  return {
361
566
  taskId,