@tokenfactory/acc-runner 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cost-pricing.d.ts +49 -0
- package/dist/cost-pricing.d.ts.map +1 -1
- package/dist/cost-pricing.js +45 -0
- package/dist/cost-pricing.js.map +1 -1
- package/dist/doctor.d.ts +2 -0
- package/dist/doctor.d.ts.map +1 -1
- package/dist/doctor.js +159 -0
- package/dist/doctor.js.map +1 -1
- package/dist/runtime/locks.d.ts +21 -0
- package/dist/runtime/locks.d.ts.map +1 -0
- package/dist/runtime/locks.js +97 -0
- package/dist/runtime/locks.js.map +1 -0
- package/dist/runtime/reviewer.d.ts +70 -0
- package/dist/runtime/reviewer.d.ts.map +1 -0
- package/dist/runtime/reviewer.js +279 -0
- package/dist/runtime/reviewer.js.map +1 -0
- package/dist/runtime/worktree.d.ts +63 -0
- package/dist/runtime/worktree.d.ts.map +1 -0
- package/dist/runtime/worktree.js +184 -0
- package/dist/runtime/worktree.js.map +1 -0
- package/dist/task-runner.d.ts +32 -5
- package/dist/task-runner.d.ts.map +1 -1
- package/dist/task-runner.js +378 -173
- package/dist/task-runner.js.map +1 -1
- package/dist/types.d.ts +4 -4
- package/dist/types.js +1 -1
- package/dist/watch.d.ts +6 -0
- package/dist/watch.d.ts.map +1 -1
- package/dist/watch.js +67 -0
- package/dist/watch.js.map +1 -1
- package/package.json +2 -2
package/dist/task-runner.js
CHANGED
|
@@ -16,11 +16,13 @@
|
|
|
16
16
|
import os from "node:os";
|
|
17
17
|
import path from "node:path";
|
|
18
18
|
import { execa } from "execa";
|
|
19
|
-
import { normalizeUsage, parseClaudeJson, priceUsdCents, } from "./cost-pricing.js";
|
|
19
|
+
import { normalizeUsage, parseClaudeJson, priceUsdCents, toCliAlias, } from "./cost-pricing.js";
|
|
20
20
|
import { git as defaultGit } from "./git.js";
|
|
21
21
|
import { gh as defaultGh } from "./gh.js";
|
|
22
22
|
import { writeMcpConfig as defaultWriteMcpConfig, } from "./mcp-spawn.js";
|
|
23
23
|
import { branchForTask, prTitleForTask, renderTaskPrompt, } from "./prompt.js";
|
|
24
|
+
import { acquireTaskLock as defaultAcquireTaskLock, TaskLockHeldError, } from "./runtime/locks.js";
|
|
25
|
+
import { prepareTaskWorktree as defaultPrepareTaskWorktree, } from "./runtime/worktree.js";
|
|
24
26
|
const LOG_BATCH_BYTES = 4 * 1024;
|
|
25
27
|
/**
|
|
26
28
|
* v0.6.0 REG-296: build the argv for `claude --print`. Splitting this
|
|
@@ -158,204 +160,407 @@ export function runTask(taskId, deps) {
|
|
|
158
160
|
const gh = deps.gh ?? defaultGh;
|
|
159
161
|
const spawnClaude = deps.spawnClaude ?? defaultSpawnClaude;
|
|
160
162
|
const checkoutBase = deps.checkoutBase ?? defaultCheckoutBase;
|
|
163
|
+
const acquireLock = deps.acquireLock ?? defaultAcquireTaskLock;
|
|
164
|
+
const prepareWorktree = deps.prepareWorktree ?? defaultPrepareTaskWorktree;
|
|
161
165
|
const postCostEvent = deps.postCostEvent ?? ((event) => defaultPostCostEvent(deps.supabase, event));
|
|
166
|
+
// Silence the unused-binding lint for `checkoutBase` — v0.11-F supersedes
|
|
167
|
+
// the v0.6.0 REG-301 pre-spawn `git checkout <baseBranch>` with the
|
|
168
|
+
// worktree's `add -B <branch> <path> <baseBranch>` start-point semantics,
|
|
169
|
+
// but the dep is still accepted for back-compat with test fixtures that
|
|
170
|
+
// inject a mock. Drop in v0.7.
|
|
171
|
+
void checkoutBase;
|
|
162
172
|
let child = null;
|
|
163
173
|
let cancelled = false;
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
174
|
+
// v0.6.1 (v0.11-D): release lock rows on every terminal exit path
|
|
175
|
+
// below the claim. Best-effort — a release failure logs to stderr
|
|
176
|
+
// but does not change the outcome the runner reports to the caller.
|
|
177
|
+
// Calling release without first calling claim is a no-op at the
|
|
178
|
+
// RPC level, so wiring this into both pre-claim and post-claim
|
|
179
|
+
// returns would be safe; we only call it from post-claim returns
|
|
180
|
+
// to keep the code path obvious to a reader.
|
|
181
|
+
const releaseLocks = async () => {
|
|
182
|
+
const { error } = await deps.supabase.rpc("release_task_locks", {
|
|
167
183
|
p_task_id: taskId,
|
|
168
|
-
p_new_status: "running",
|
|
169
184
|
});
|
|
170
|
-
if (
|
|
171
|
-
|
|
172
|
-
// 22023 = invalid_task_transition. The task may already be past
|
|
173
|
-
// 'running' (e.g. needs-review) — log and bail rather than crash.
|
|
174
|
-
await appendEvent(deps.supabase, taskId, "error", {
|
|
175
|
-
phase: "transition_to_running",
|
|
176
|
-
error: msg,
|
|
177
|
-
});
|
|
178
|
-
return { taskId, status: "failed", phase: "transition_to_running", error: msg };
|
|
185
|
+
if (error) {
|
|
186
|
+
process.stderr.write(`[acc-runner] release_task_locks(${taskId}) failed: ${error.message}\n`);
|
|
179
187
|
}
|
|
180
|
-
|
|
181
|
-
|
|
188
|
+
};
|
|
189
|
+
// v0.12-RESUME — periodic signal loop. After the claim succeeds we
|
|
190
|
+
// bump acc.tasks.last_runner_signal_at every signalIntervalMs ms so
|
|
191
|
+
// the v0.12 /5m sweep distinguishes "runner alive, work in flight"
|
|
192
|
+
// from "runner crashed, task stuck at running". Self-rescheduling
|
|
193
|
+
// setTimeout (not setInterval) so a slow RPC doesn't queue up
|
|
194
|
+
// overlapping firings; the loop stops as soon as `signalStopped`
|
|
195
|
+
// flips in the outer try/finally below.
|
|
196
|
+
const DEFAULT_SIGNAL_MS = 30_000;
|
|
197
|
+
const signalIntervalMs = deps.signalIntervalMs ?? DEFAULT_SIGNAL_MS;
|
|
198
|
+
let signalTimer = null;
|
|
199
|
+
let signalStopped = false;
|
|
200
|
+
const updateSignal = async () => {
|
|
201
|
+
const { error } = await deps.supabase.rpc("update_task_signal", {
|
|
182
202
|
p_task_id: taskId,
|
|
183
203
|
});
|
|
184
|
-
if (
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
p_task_id: taskId,
|
|
189
|
-
p_new_status: "failed",
|
|
190
|
-
});
|
|
191
|
-
return { taskId, status: "failed", phase: "fetch", error: msg };
|
|
192
|
-
}
|
|
193
|
-
const result = fetched.data;
|
|
194
|
-
const { task } = result;
|
|
195
|
-
const branch = branchForTask(task.id, task.title, task.branch);
|
|
196
|
-
// REG-295: per-task repo overrides — task row trumps config so one
|
|
197
|
-
// runner can service tasks across multiple repos. Env-backed config
|
|
198
|
-
// remains the fallback for tasks that don't carry a hint yet.
|
|
199
|
-
const repoPath = task.repo_path_hint?.trim()
|
|
200
|
-
? expandHomePath(task.repo_path_hint.trim())
|
|
201
|
-
: deps.cfg.repoPath;
|
|
202
|
-
const targetRepo = task.repo?.trim() || deps.cfg.targetRepo;
|
|
203
|
-
// REG-301: branch base comes from task → cfg → main. cfg.integrationBranch
|
|
204
|
-
// already defaults to "acc/integration" so the third fallback only
|
|
205
|
-
// matters when an operator zeroed it out via env.
|
|
206
|
-
const integrationBranch = task.integration_branch?.trim() || deps.cfg.integrationBranch || "main";
|
|
207
|
-
const prompt = renderTaskPrompt({
|
|
208
|
-
task,
|
|
209
|
-
agent: result.agent,
|
|
210
|
-
model: result.model,
|
|
211
|
-
integrationBranch,
|
|
212
|
-
targetRepo,
|
|
213
|
-
});
|
|
214
|
-
// 3. Repo prep. v0.6.0 forks the task branch from the integration
|
|
215
|
-
// base (REG-301): fetch, switch to the base branch, then create the
|
|
216
|
-
// task branch from there. Prior versions ran `git checkout -B`
|
|
217
|
-
// directly from cwd HEAD, which in the dogfood loop produced a
|
|
218
|
-
// 321-commit PR.
|
|
219
|
-
try {
|
|
220
|
-
await git.fetch(repoPath);
|
|
221
|
-
await checkoutBase(repoPath, integrationBranch);
|
|
222
|
-
await git.checkout(repoPath, branch);
|
|
223
|
-
}
|
|
224
|
-
catch (err) {
|
|
225
|
-
const msg = err.message;
|
|
226
|
-
await appendEvent(deps.supabase, taskId, "error", {
|
|
227
|
-
phase: "git",
|
|
228
|
-
error: msg,
|
|
229
|
-
base: integrationBranch,
|
|
230
|
-
branch,
|
|
231
|
-
repo_path: repoPath,
|
|
232
|
-
});
|
|
233
|
-
await deps.supabase.rpc("transition_task", {
|
|
234
|
-
p_task_id: taskId,
|
|
235
|
-
p_new_status: "failed",
|
|
236
|
-
});
|
|
237
|
-
return { taskId, status: "failed", phase: "git", error: msg };
|
|
238
|
-
}
|
|
239
|
-
// 4. Spawn Claude.
|
|
240
|
-
if (cancelled) {
|
|
241
|
-
return { taskId, status: "cancelled" };
|
|
242
|
-
}
|
|
243
|
-
// 4a. Provision .mcp.json so Claude Code auto-discovers acc-mcp-server.
|
|
244
|
-
// Best-effort: a failed write must not block the task. The MCP server
|
|
245
|
-
// is a context source, not a critical dependency for v0.5-C1.
|
|
246
|
-
let mcpCleanup = null;
|
|
247
|
-
if (deps.session) {
|
|
248
|
-
const writer = deps.writeMcpConfig ?? defaultWriteMcpConfig;
|
|
249
|
-
try {
|
|
250
|
-
mcpCleanup = await writer({
|
|
251
|
-
cwd: repoPath,
|
|
252
|
-
taskId,
|
|
253
|
-
runnerId: deps.session.runnerId,
|
|
254
|
-
accessToken: deps.session.accessToken,
|
|
255
|
-
publicUrl: deps.publicUrl ?? deps.cfg.publicUrl,
|
|
256
|
-
supabaseUrl: deps.cfg.supabaseUrl,
|
|
257
|
-
supabaseAnonKey: deps.cfg.supabaseAnonKey,
|
|
258
|
-
});
|
|
259
|
-
}
|
|
260
|
-
catch (err) {
|
|
261
|
-
// Don't echo the token even on failure.
|
|
262
|
-
process.stderr.write(`[acc-runner] mcp .mcp.json write failed: ${err.message}\n`);
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
child = spawnClaude(repoPath, result.model?.id);
|
|
266
|
-
if (child.stdin) {
|
|
267
|
-
child.stdin.write(prompt);
|
|
268
|
-
child.stdin.end();
|
|
269
|
-
}
|
|
270
|
-
const stdoutPromise = child.stdout
|
|
271
|
-
? streamToEvents(child.stdout, deps.supabase, taskId, "stdout")
|
|
272
|
-
: Promise.resolve("");
|
|
273
|
-
const stderrPromise = child.stderr
|
|
274
|
-
? streamToEvents(child.stderr, deps.supabase, taskId, "stderr")
|
|
275
|
-
: Promise.resolve("");
|
|
276
|
-
const [outcome, capturedStdout, capturedStderr] = await Promise.all([
|
|
277
|
-
child,
|
|
278
|
-
stdoutPromise,
|
|
279
|
-
stderrPromise,
|
|
280
|
-
]);
|
|
281
|
-
// Restore .mcp.json as soon as Claude exits — its MCP subprocess
|
|
282
|
-
// tree comes down with it, so leaving our token-bearing config on
|
|
283
|
-
// disk a moment longer is pure exposure surface.
|
|
284
|
-
if (mcpCleanup) {
|
|
285
|
-
try {
|
|
286
|
-
await mcpCleanup.restore();
|
|
287
|
-
}
|
|
288
|
-
catch (err) {
|
|
289
|
-
process.stderr.write(`[acc-runner] mcp .mcp.json restore failed: ${err.message}\n`);
|
|
290
|
-
}
|
|
204
|
+
if (error) {
|
|
205
|
+
// Best-effort: a missed signal just means the sweep might pull
|
|
206
|
+
// the task back if enough of them stack up. Log and continue.
|
|
207
|
+
process.stderr.write(`[acc-runner] update_task_signal(${taskId}) failed: ${error.message}\n`);
|
|
291
208
|
}
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
209
|
+
};
|
|
210
|
+
const scheduleSignal = () => {
|
|
211
|
+
if (signalStopped)
|
|
212
|
+
return;
|
|
213
|
+
signalTimer = setTimeout(async () => {
|
|
214
|
+
if (signalStopped)
|
|
215
|
+
return;
|
|
297
216
|
try {
|
|
298
|
-
await
|
|
217
|
+
await updateSignal();
|
|
299
218
|
}
|
|
300
|
-
catch
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
219
|
+
catch { /* logged inside */ }
|
|
220
|
+
scheduleSignal();
|
|
221
|
+
}, signalIntervalMs);
|
|
222
|
+
// Detach so an in-flight signal timer doesn't keep the process
|
|
223
|
+
// alive past `watch.ts` shutdown. The outer finally clears it
|
|
224
|
+
// anyway; this is belt-and-suspenders for stray timers.
|
|
225
|
+
if (signalTimer.unref)
|
|
226
|
+
signalTimer.unref();
|
|
227
|
+
};
|
|
228
|
+
const stopSignalLoop = () => {
|
|
229
|
+
signalStopped = true;
|
|
230
|
+
if (signalTimer) {
|
|
231
|
+
clearTimeout(signalTimer);
|
|
232
|
+
signalTimer = null;
|
|
309
233
|
}
|
|
310
|
-
|
|
234
|
+
};
|
|
235
|
+
const promise = (async () => {
|
|
236
|
+
// 1. Atomic claim + transition to running. v0.11-D: replaces the
|
|
237
|
+
// pre-v0.6.1 raw transition_task('running') call. The RPC
|
|
238
|
+
// row-locks acc.tasks FOR UPDATE, checks file-path overlap
|
|
239
|
+
// against other running tasks' locks, INSERTs a lock row +
|
|
240
|
+
// transitions to running in one transaction. On overlap or
|
|
241
|
+
// same-task race the task stays queued for another runner.
|
|
242
|
+
const claim = await deps.supabase.rpc("claim_task_with_locks", {
|
|
243
|
+
p_task_id: taskId,
|
|
244
|
+
p_runner_id: deps.session.runnerId,
|
|
245
|
+
});
|
|
246
|
+
if (claim.error) {
|
|
247
|
+
const msg = claim.error.message;
|
|
248
|
+
// Includes 22023 (invalid_task_transition surfaced through the
|
|
249
|
+
// nested acc.transition_task call) — task may already be past
|
|
250
|
+
// 'running' (e.g. needs-review). Log and bail rather than crash.
|
|
311
251
|
await appendEvent(deps.supabase, taskId, "error", {
|
|
312
|
-
phase: "
|
|
313
|
-
|
|
314
|
-
stderr_tail: capturedStderr.slice(-2000),
|
|
252
|
+
phase: "claim_locks",
|
|
253
|
+
error: msg,
|
|
315
254
|
});
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
255
|
+
return { taskId, status: "failed", phase: "claim_locks", error: msg };
|
|
256
|
+
}
|
|
257
|
+
const claimResult = (claim.data ?? {});
|
|
258
|
+
if (claimResult.ok !== true) {
|
|
259
|
+
const conflicts = Array.isArray(claimResult.conflicts) ? claimResult.conflicts : [];
|
|
260
|
+
await appendEvent(deps.supabase, taskId, "log", {
|
|
261
|
+
phase: "claim_locks",
|
|
262
|
+
stream: "stderr",
|
|
263
|
+
conflicts,
|
|
264
|
+
message: `file-lock conflict — other tasks hold overlapping paths: ${conflicts.join(", ")}`,
|
|
265
|
+
runner_id: deps.session.runnerId,
|
|
319
266
|
});
|
|
267
|
+
const reason = `file-lock conflict with: ${conflicts.join(", ") || "(unknown)"}`;
|
|
320
268
|
return {
|
|
321
269
|
taskId,
|
|
322
270
|
status: "failed",
|
|
323
|
-
phase: "
|
|
324
|
-
|
|
325
|
-
error: capturedStderr.slice(-200).trim() || `claude exited ${outcome.exitCode}`,
|
|
271
|
+
phase: "claim_locks",
|
|
272
|
+
error: reason,
|
|
326
273
|
};
|
|
327
274
|
}
|
|
328
|
-
//
|
|
275
|
+
// v0.6.1 (v0.11-D): every exit path below the successful claim
|
|
276
|
+
// must release the lock row so the same paths free up for the
|
|
277
|
+
// next runner. try/finally captures returns AND uncaught throws
|
|
278
|
+
// alike — strictly stronger than explicit pre-return release at
|
|
279
|
+
// each of the seven completion sites, and the runner CLI never
|
|
280
|
+
// recovers from a thrown error inside runTask, so the lock
|
|
281
|
+
// would otherwise leak until a future sweep job clears it.
|
|
329
282
|
try {
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
283
|
+
// v0.12-RESUME: prime the signal column immediately on claim so
|
|
284
|
+
// the sweep window resets from "now" and the first periodic tick
|
|
285
|
+
// (after signalIntervalMs) refreshes it. Without this prime, a
|
|
286
|
+
// task whose run takes < signalIntervalMs from claim to first
|
|
287
|
+
// tick could race the sweep on borderline updated_at values.
|
|
288
|
+
// Placed inside the outer try so an unexpected throw from the
|
|
289
|
+
// RPC still runs the finally (stop loop, release locks).
|
|
290
|
+
await updateSignal();
|
|
291
|
+
scheduleSignal();
|
|
292
|
+
// 2. Fetch task + adjacent rows.
|
|
293
|
+
const fetched = await deps.supabase.rpc("fetch_task_for_runner", {
|
|
336
294
|
p_task_id: taskId,
|
|
337
|
-
p_new_status: "failed",
|
|
338
295
|
});
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
296
|
+
if (fetched.error || !fetched.data) {
|
|
297
|
+
const msg = fetched.error?.message ?? "fetch_task_for_runner returned no data";
|
|
298
|
+
await appendEvent(deps.supabase, taskId, "error", { phase: "fetch", error: msg });
|
|
299
|
+
await deps.supabase.rpc("transition_task", {
|
|
300
|
+
p_task_id: taskId,
|
|
301
|
+
p_new_status: "failed",
|
|
302
|
+
});
|
|
303
|
+
return { taskId, status: "failed", phase: "fetch", error: msg };
|
|
304
|
+
}
|
|
305
|
+
const result = fetched.data;
|
|
306
|
+
const { task } = result;
|
|
307
|
+
const branch = branchForTask(task.id, task.title, task.branch);
|
|
308
|
+
// REG-295: per-task repo overrides — task row trumps config so one
|
|
309
|
+
// runner can service tasks across multiple repos. Env-backed config
|
|
310
|
+
// remains the fallback for tasks that don't carry a hint yet.
|
|
311
|
+
const repoPath = task.repo_path_hint?.trim()
|
|
312
|
+
? expandHomePath(task.repo_path_hint.trim())
|
|
313
|
+
: deps.cfg.repoPath;
|
|
314
|
+
const targetRepo = task.repo?.trim() || deps.cfg.targetRepo;
|
|
315
|
+
// REG-301: branch base comes from task → cfg → main. cfg.integrationBranch
|
|
316
|
+
// already defaults to "acc/integration" so the third fallback only
|
|
317
|
+
// matters when an operator zeroed it out via env.
|
|
318
|
+
const integrationBranch = task.integration_branch?.trim() || deps.cfg.integrationBranch || "main";
|
|
319
|
+
const prompt = renderTaskPrompt({
|
|
320
|
+
task,
|
|
321
|
+
agent: result.agent,
|
|
322
|
+
model: result.model,
|
|
323
|
+
integrationBranch,
|
|
324
|
+
targetRepo,
|
|
348
325
|
});
|
|
349
|
-
|
|
350
|
-
|
|
326
|
+
// v0.11-F: acquire the per-task PID lock before any worktree
|
|
327
|
+
// side-effect. Same-machine parallel runners that picked up the same
|
|
328
|
+
// task_id (e.g. two `acc-runner watch` processes seeing the same
|
|
329
|
+
// broadcast) race here; the second one bails on TaskLockHeldError.
|
|
330
|
+
let lock;
|
|
331
|
+
try {
|
|
332
|
+
lock = await acquireLock(taskId);
|
|
333
|
+
}
|
|
334
|
+
catch (err) {
|
|
335
|
+
if (err instanceof TaskLockHeldError) {
|
|
336
|
+
await appendEvent(deps.supabase, taskId, "error", {
|
|
337
|
+
phase: "worktree_lock",
|
|
338
|
+
error: err.message,
|
|
339
|
+
held_by_pid: err.heldByPid,
|
|
340
|
+
});
|
|
341
|
+
await deps.supabase.rpc("transition_task", {
|
|
342
|
+
p_task_id: taskId,
|
|
343
|
+
p_new_status: "failed",
|
|
344
|
+
});
|
|
345
|
+
return { taskId, status: "failed", phase: "worktree_lock", error: err.message };
|
|
346
|
+
}
|
|
347
|
+
throw err;
|
|
348
|
+
}
|
|
349
|
+
// Worktree gets assigned inside the git-prep block; cleanup in the
|
|
350
|
+
// outer finally handles both the success path and every early
|
|
351
|
+
// return that follows.
|
|
352
|
+
let worktree = null;
|
|
353
|
+
let workdir = repoPath;
|
|
354
|
+
try {
|
|
355
|
+
// 3. Repo prep. v0.11-F: fetch on the shared clone (worktree
|
|
356
|
+
// shares the object store) then provision an isolated worktree
|
|
357
|
+
// at ~/.cache/acc-runner/work/<task_id>/ forked from the
|
|
358
|
+
// integration branch. The redundant `git.checkout -B <branch>`
|
|
359
|
+
// is a no-op inside the new worktree — kept so the v0.6.0
|
|
360
|
+
// checkout seam stays observable in unit tests.
|
|
361
|
+
try {
|
|
362
|
+
await git.fetch(repoPath);
|
|
363
|
+
worktree = await prepareWorktree({
|
|
364
|
+
repoPath,
|
|
365
|
+
taskId,
|
|
366
|
+
branch,
|
|
367
|
+
baseBranch: integrationBranch,
|
|
368
|
+
});
|
|
369
|
+
workdir = worktree.path;
|
|
370
|
+
// v0.12-RESUME: log resume-vs-fresh so an operator can audit
|
|
371
|
+
// how often the resume path actually fires. `resumed: true`
|
|
372
|
+
// means a prior runner crashed mid-task, the v0.12 sweep
|
|
373
|
+
// returned the task to queued, and this runner picked it
|
|
374
|
+
// back up with the prior worktree intact. Claude reads the
|
|
375
|
+
// partially-committed state and continues; the spawn is the
|
|
376
|
+
// same prompt either way (Claude is idempotent enough that
|
|
377
|
+
// re-running on a partially-edited worktree converges on
|
|
378
|
+
// the right final state).
|
|
379
|
+
if (worktree.resumed) {
|
|
380
|
+
await appendEvent(deps.supabase, taskId, "log", {
|
|
381
|
+
phase: "git",
|
|
382
|
+
stream: "stdout",
|
|
383
|
+
event: "worktree.resumed",
|
|
384
|
+
worktree_path: workdir,
|
|
385
|
+
branch,
|
|
386
|
+
runner_id: deps.session.runnerId,
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
await git.checkout(workdir, branch);
|
|
390
|
+
}
|
|
391
|
+
catch (err) {
|
|
392
|
+
const msg = err.message;
|
|
393
|
+
await appendEvent(deps.supabase, taskId, "error", {
|
|
394
|
+
phase: "git",
|
|
395
|
+
error: msg,
|
|
396
|
+
base: integrationBranch,
|
|
397
|
+
branch,
|
|
398
|
+
repo_path: repoPath,
|
|
399
|
+
worktree_path: worktree?.path ?? null,
|
|
400
|
+
});
|
|
401
|
+
await deps.supabase.rpc("transition_task", {
|
|
402
|
+
p_task_id: taskId,
|
|
403
|
+
p_new_status: "failed",
|
|
404
|
+
});
|
|
405
|
+
return { taskId, status: "failed", phase: "git", error: msg };
|
|
406
|
+
}
|
|
407
|
+
// 4. Spawn Claude.
|
|
408
|
+
if (cancelled) {
|
|
409
|
+
return { taskId, status: "cancelled" };
|
|
410
|
+
}
|
|
411
|
+
// 4a. Provision .mcp.json so Claude Code auto-discovers acc-mcp-server.
|
|
412
|
+
// Best-effort: a failed write must not block the task. The MCP server
|
|
413
|
+
// is a context source, not a critical dependency for v0.5-C1.
|
|
414
|
+
let mcpCleanup = null;
|
|
415
|
+
if (deps.session) {
|
|
416
|
+
const writer = deps.writeMcpConfig ?? defaultWriteMcpConfig;
|
|
417
|
+
try {
|
|
418
|
+
mcpCleanup = await writer({
|
|
419
|
+
cwd: workdir,
|
|
420
|
+
taskId,
|
|
421
|
+
runnerId: deps.session.runnerId,
|
|
422
|
+
accessToken: deps.session.accessToken,
|
|
423
|
+
publicUrl: deps.publicUrl ?? deps.cfg.publicUrl,
|
|
424
|
+
supabaseUrl: deps.cfg.supabaseUrl,
|
|
425
|
+
supabaseAnonKey: deps.cfg.supabaseAnonKey,
|
|
426
|
+
});
|
|
427
|
+
}
|
|
428
|
+
catch (err) {
|
|
429
|
+
// Don't echo the token even on failure.
|
|
430
|
+
process.stderr.write(`[acc-runner] mcp .mcp.json write failed: ${err.message}\n`);
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
// v0.12-MODEL-ALIAS (REG-303/304): translate the ACC model alias
|
|
434
|
+
// (`claude-sonnet-4`) into the wire form `claude --model` actually
|
|
435
|
+
// accepts (`sonnet` or `claude-sonnet-4-6`). Unknown ids pass
|
|
436
|
+
// through verbatim so a future model not yet in the embedded
|
|
437
|
+
// table still spawns.
|
|
438
|
+
child = spawnClaude(workdir, toCliAlias(result.model?.id));
|
|
439
|
+
if (child.stdin) {
|
|
440
|
+
child.stdin.write(prompt);
|
|
441
|
+
child.stdin.end();
|
|
442
|
+
}
|
|
443
|
+
const stdoutPromise = child.stdout
|
|
444
|
+
? streamToEvents(child.stdout, deps.supabase, taskId, "stdout")
|
|
445
|
+
: Promise.resolve("");
|
|
446
|
+
const stderrPromise = child.stderr
|
|
447
|
+
? streamToEvents(child.stderr, deps.supabase, taskId, "stderr")
|
|
448
|
+
: Promise.resolve("");
|
|
449
|
+
const [outcome, capturedStdout, capturedStderr] = await Promise.all([
|
|
450
|
+
child,
|
|
451
|
+
stdoutPromise,
|
|
452
|
+
stderrPromise,
|
|
453
|
+
]);
|
|
454
|
+
// Restore .mcp.json as soon as Claude exits — its MCP subprocess
|
|
455
|
+
// tree comes down with it, so leaving our token-bearing config on
|
|
456
|
+
// disk a moment longer is pure exposure surface.
|
|
457
|
+
if (mcpCleanup) {
|
|
458
|
+
try {
|
|
459
|
+
await mcpCleanup.restore();
|
|
460
|
+
}
|
|
461
|
+
catch (err) {
|
|
462
|
+
process.stderr.write(`[acc-runner] mcp .mcp.json restore failed: ${err.message}\n`);
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
// Post the cost event before any later branch returns so cap tracking
|
|
466
|
+
// captures success, cancellation, and non-zero exit alike. Best-effort:
|
|
467
|
+
// a failed POST logs to stderr but never bubbles past the runner.
|
|
468
|
+
{
|
|
469
|
+
const event = buildCostEvent(taskId, capturedStdout, result.model?.id, result.runner?.id);
|
|
470
|
+
try {
|
|
471
|
+
await postCostEvent(event);
|
|
472
|
+
}
|
|
473
|
+
catch (err) {
|
|
474
|
+
process.stderr.write(`[acc-runner] cost-event post failed: ${err.message}\n`);
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
if (cancelled) {
|
|
478
|
+
await appendEvent(deps.supabase, taskId, "cancelled", {
|
|
479
|
+
exit_code: outcome.exitCode,
|
|
480
|
+
});
|
|
481
|
+
return { taskId, status: "cancelled", exitCode: outcome.exitCode };
|
|
482
|
+
}
|
|
483
|
+
if (outcome.exitCode !== 0) {
|
|
484
|
+
await appendEvent(deps.supabase, taskId, "error", {
|
|
485
|
+
phase: "claude_exit",
|
|
486
|
+
exit_code: outcome.exitCode,
|
|
487
|
+
stderr_tail: capturedStderr.slice(-2000),
|
|
488
|
+
});
|
|
489
|
+
await deps.supabase.rpc("transition_task", {
|
|
490
|
+
p_task_id: taskId,
|
|
491
|
+
p_new_status: "failed",
|
|
492
|
+
});
|
|
493
|
+
return {
|
|
494
|
+
taskId,
|
|
495
|
+
status: "failed",
|
|
496
|
+
phase: "claude_exit",
|
|
497
|
+
exitCode: outcome.exitCode,
|
|
498
|
+
error: capturedStderr.slice(-200).trim() || `claude exited ${outcome.exitCode}`,
|
|
499
|
+
};
|
|
500
|
+
}
|
|
501
|
+
// 5. Push + open PR. Both run from the worktree so the operator's
|
|
502
|
+
// shared clone never has the task branch checked out.
|
|
503
|
+
try {
|
|
504
|
+
await git.push(workdir, branch);
|
|
505
|
+
}
|
|
506
|
+
catch (err) {
|
|
507
|
+
const msg = err.message;
|
|
508
|
+
await appendEvent(deps.supabase, taskId, "error", { phase: "push", error: msg });
|
|
509
|
+
await deps.supabase.rpc("transition_task", {
|
|
510
|
+
p_task_id: taskId,
|
|
511
|
+
p_new_status: "failed",
|
|
512
|
+
});
|
|
513
|
+
return { taskId, status: "failed", phase: "push", error: msg };
|
|
514
|
+
}
|
|
515
|
+
let prUrl = "";
|
|
516
|
+
try {
|
|
517
|
+
const body = extractReportFromOutput(capturedStdout);
|
|
518
|
+
const pr = await gh.openPR(workdir, {
|
|
519
|
+
title: prTitleForTask(task.id, task.title),
|
|
520
|
+
body,
|
|
521
|
+
base: integrationBranch,
|
|
522
|
+
});
|
|
523
|
+
prUrl = pr.url;
|
|
524
|
+
await appendEvent(deps.supabase, taskId, "pr-opened", { url: prUrl });
|
|
525
|
+
}
|
|
526
|
+
catch (err) {
|
|
527
|
+
const msg = err.message;
|
|
528
|
+
await appendEvent(deps.supabase, taskId, "error", { phase: "pr_open", error: msg });
|
|
529
|
+
// Don't transition to failed — the push succeeded, the user can
|
|
530
|
+
// open a PR manually. Webhook will pick it up.
|
|
531
|
+
}
|
|
532
|
+
return { taskId, status: "ok", prUrl, exitCode: 0 };
|
|
533
|
+
}
|
|
534
|
+
finally {
|
|
535
|
+
// v0.11-F: tear down the per-task worktree and release the PID
|
|
536
|
+
// lock on every completion path (success, failure, cancellation,
|
|
537
|
+
// unexpected throw). Both legs are best-effort — leaking either
|
|
538
|
+
// resource is preferable to masking the original return value.
|
|
539
|
+
if (worktree) {
|
|
540
|
+
try {
|
|
541
|
+
await worktree.cleanup();
|
|
542
|
+
}
|
|
543
|
+
catch (err) {
|
|
544
|
+
process.stderr.write(`[acc-runner] worktree cleanup failed: ${err.message}\n`);
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
try {
|
|
548
|
+
await lock.release();
|
|
549
|
+
}
|
|
550
|
+
catch (err) {
|
|
551
|
+
process.stderr.write(`[acc-runner] lock release failed: ${err.message}\n`);
|
|
552
|
+
}
|
|
553
|
+
}
|
|
351
554
|
}
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
//
|
|
356
|
-
//
|
|
555
|
+
finally {
|
|
556
|
+
// v0.12-RESUME: stop the periodic signal loop before releasing
|
|
557
|
+
// locks so a late-firing signal can't bump the column after the
|
|
558
|
+
// task transitions to a terminal status (the RPC is no-op on
|
|
559
|
+
// non-running rows anyway, but stopping early avoids the extra
|
|
560
|
+
// RPC round-trip).
|
|
561
|
+
stopSignalLoop();
|
|
562
|
+
await releaseLocks();
|
|
357
563
|
}
|
|
358
|
-
return { taskId, status: "ok", prUrl, exitCode: 0 };
|
|
359
564
|
})();
|
|
360
565
|
return {
|
|
361
566
|
taskId,
|