beflow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +121 -0
  3. package/config.example.json +68 -0
  4. package/config.schema.json +413 -0
  5. package/package.json +72 -0
  6. package/src/agent/acpx.ts +197 -0
  7. package/src/agent/driver.ts +38 -0
  8. package/src/agent/events.ts +228 -0
  9. package/src/agent/issuefence.ts +42 -0
  10. package/src/agent/report.ts +44 -0
  11. package/src/cli.ts +910 -0
  12. package/src/config/load.ts +45 -0
  13. package/src/config/persist.ts +58 -0
  14. package/src/config/schema.ts +181 -0
  15. package/src/config/store.ts +119 -0
  16. package/src/core/accept.ts +25 -0
  17. package/src/core/continuation.ts +57 -0
  18. package/src/core/deadletter.ts +55 -0
  19. package/src/core/decision.ts +8 -0
  20. package/src/core/doctor.ts +223 -0
  21. package/src/core/drift.ts +59 -0
  22. package/src/core/gc.ts +223 -0
  23. package/src/core/inputquality.ts +30 -0
  24. package/src/core/issuetemplate.ts +175 -0
  25. package/src/core/mcp.ts +191 -0
  26. package/src/core/newissue.ts +343 -0
  27. package/src/core/notify.ts +151 -0
  28. package/src/core/prompts.ts +165 -0
  29. package/src/core/qualitygate.ts +70 -0
  30. package/src/core/queue.ts +40 -0
  31. package/src/core/review.ts +266 -0
  32. package/src/core/run.ts +1075 -0
  33. package/src/core/runstore.ts +144 -0
  34. package/src/core/runsview.ts +111 -0
  35. package/src/core/setup.ts +203 -0
  36. package/src/core/sla.ts +39 -0
  37. package/src/core/template.ts +65 -0
  38. package/src/core/watch.ts +825 -0
  39. package/src/core/worktree.ts +74 -0
  40. package/src/core/writeback.ts +88 -0
  41. package/src/index.ts +154 -0
  42. package/src/model/types.ts +35 -0
  43. package/src/prompts/defaults/continuation.md +9 -0
  44. package/src/prompts/defaults/implement.md +13 -0
  45. package/src/prompts/defaults/issue-enrich.md +30 -0
  46. package/src/prompts/defaults/issues/bug.md +35 -0
  47. package/src/prompts/defaults/issues/feature.md +24 -0
  48. package/src/prompts/defaults/issues/generic.md +16 -0
  49. package/src/prompts/defaults/issues/spike.md +24 -0
  50. package/src/prompts/defaults/report.md +20 -0
  51. package/src/prompts/defaults/review.md +34 -0
  52. package/src/prompts/defaults/spec.md +11 -0
  53. package/src/prompts/defaults/task.md +6 -0
  54. package/src/prompts/defaults/triage.md +11 -0
  55. package/src/prompts/text-modules.d.ts +4 -0
  56. package/src/resolve/jobkind.ts +11 -0
  57. package/src/resolve/metadata.ts +103 -0
  58. package/src/resolve/precedence.ts +104 -0
  59. package/src/trackers/factory.ts +17 -0
  60. package/src/trackers/linear/adapter.ts +416 -0
  61. package/src/trackers/linear/client.ts +264 -0
  62. package/src/trackers/linear/map.ts +113 -0
  63. package/src/trackers/linear/types.ts +44 -0
  64. package/src/trackers/marker.ts +20 -0
  65. package/src/trackers/plane/adapter.ts +754 -0
  66. package/src/trackers/plane/client.ts +302 -0
  67. package/src/trackers/plane/map.ts +168 -0
  68. package/src/trackers/plane/types.ts +134 -0
  69. package/src/trackers/tracker.ts +135 -0
@@ -0,0 +1,825 @@
1
+ import type { AgentDriver } from "../agent/driver.ts";
2
+ import type { Config, Registry } from "../config/schema.ts";
3
+ import type { Issue, Resolved } from "../model/types.ts";
4
+ import { IssueNotFoundError } from "../trackers/tracker.ts";
5
+ import type { Tracker } from "../trackers/tracker.ts";
6
+ import { assembleContinuation, renderContinuation } from "./continuation.ts";
7
+ import { QUARANTINED_LABEL, quarantine, resolveDeadLetterThreshold, shouldQuarantine } from "./deadletter.ts";
8
+ import { isDecisionHeld } from "./decision.ts";
9
+ import type { McpServer } from "./mcp.ts";
10
+ import { notifyEscalation } from "./notify.ts";
11
+ import type { Notifier } from "./notify.ts";
12
+ import type { PromptSet } from "./prompts.ts";
13
+ import { defaultPrComment, resolveReviewEnabled, resolveReviewPostToPr, runReview } from "./review.ts";
14
+ import type { PrCommenter, RunReviewDeps } from "./review.ts";
15
+ import { isPulledByHuman, runIssue } from "./run.ts";
16
+ import type { Logger, RunIssueDeps } from "./run.ts";
17
+ import { deleteRecord, listRecords, loadRecord, resolveRunsDir, saveRecord, systemClock } from "./runstore.ts";
18
+ import type { Clock, RunRecord, RunStoreFs } from "./runstore.ts";
19
+ import { ageMinutes, formatAge, resolveSla, shouldRemind } from "./sla.ts";
20
+ import { bunExec, removeWorktree } from "./worktree.ts";
21
+ import type { Exec } from "./worktree.ts";
22
+
23
+ const IN_PROGRESS_STATE = "In Progress";
24
+ const DONE_STATE = "Done";
25
+ const CHANGES_REQUESTED_LABEL = "changes-requested";
26
+ const BLOCKED_LABEL = "blocked";
27
+ const FAILED_LABEL = "failed";
28
+
29
+ const GUIDANCE_SENTINEL = "haven't described the changes";
30
+ const CHANGES_REQUESTED_GUIDANCE = `You added the \`${CHANGES_REQUESTED_LABEL}\` label but ${GUIDANCE_SENTINEL}. Please leave a comment explaining what to change, and beflow will pick it up automatically.`;
31
+
32
+ // watch is a headless drainer: it can only run autonomously (no human to supervise),
33
+ // so every dispatch forces autonomous mode regardless of the project's configured
34
+ // default runMode. This is what makes each run get an isolated worktree —
35
+ // useWorktree (run.ts) requires runMode "autonomous"; without it concurrent
36
+ // dispatches would share the repo working tree and corrupt each other's branches.
37
+ const AUTONOMOUS_DISPATCH: Partial<Resolved> = { runMode: "autonomous" };
38
+
39
+ // Safety floors used only when a project doesn't set its own limit.
40
+ const DEFAULT_LIMIT_IN_REVIEW = 5;
41
+ const DEFAULT_LIMIT_IN_PROGRESS = 3;
42
+
43
+ export interface PrCheckResult {
44
+ failing: string[]; // names of failing checks (for the continuation detail)
45
+ sha?: string; // PR head commit SHA (the loop-safety key)
46
+ state: "failing" | "none" | "passing" | "pending";
47
+ }
48
+
49
+ export interface WatchDeps {
50
+ tracker: Tracker;
51
+ driver: AgentDriver;
52
+ config: Config;
53
+ registry: Registry;
54
+ prompts: PromptSet;
55
+ // When true, `watchTick` previews the dispatch decision and performs NO
56
+ // Mutating pass, dispatch, or record write — a read-only single-tick plan.
57
+ dryRun?: boolean;
58
+ git?: Exec;
59
+ log?: Logger;
60
+ notify?: Notifier;
61
+ runsFs?: RunStoreFs;
62
+ clock?: Clock;
63
+ fresh?: boolean;
64
+ // When provided, each tick resolves the live config/registry from this
65
+ // Snapshot so a hot-reloaded config takes effect on the next tick.
66
+ getSnapshot?: () => { config: Config; registry: Registry };
67
+ // When provided (and opted-in per project), the CI-red pass polls each
68
+ // In-Review item's PR checks and re-dispatches rework on a red commit.
69
+ // Omitted in tests that don't exercise that path; when undefined the pass
70
+ // Is skipped (degrade safely), mirroring `prMerged`.
71
+ prChecks?: (prUrl: string) => Promise<PrCheckResult>;
72
+ // When provided, the auto-Done pass polls each In-Review item's linked PR and
73
+ // Moves merged ones to Done. Omitted in tests that don't exercise that path;
74
+ // When undefined the auto-Done pass is skipped (degrade safely).
75
+ prMerged?: (prUrl: string) => Promise<boolean>;
76
+ // Posts the reviewer agent's findings on the PR (only when review.postToPr is on).
77
+ // Defaults to the gh-backed `defaultPrComment`; tests inject a spy.
78
+ prCommenter?: PrCommenter;
79
+ // The PR-review entrypoint, injected so tests can drive the review pass with a
80
+ // Fake. Defaults to the real `runReview`. The review pass also requires `prChecks`
81
+ // (its head-SHA source) and the per-project `review.enabled` opt-in.
82
+ runReview?: (key: string, reviewDeps: RunReviewDeps) => Promise<unknown>;
83
+ // ACP MCP servers (from the `.mcp.json` cascade) injected per dispatched run
84
+ // Via acpx; threaded into each runIssueDeps build. Omitted when mcp is disabled.
85
+ mcpServers?: McpServer[];
86
+ }
87
+
88
+ export async function defaultPrMerged(prUrl: string): Promise<boolean> {
89
+ const res = await bunExec("gh", ["pr", "view", prUrl, "--json", "state", "--jq", ".state"]);
90
+ return res.code === 0 && res.stdout.trim() === "MERGED";
91
+ }
92
+
93
+ const CI_FAILING_CONCLUSIONS = new Set(["CANCELLED", "ERROR", "FAILURE", "STARTUP_FAILURE", "TIMED_OUT"]);
94
+ const CI_FAILING_STATES = new Set(["ERROR", "FAILURE"]);
95
+ const CI_PENDING_STATUSES = new Set(["IN_PROGRESS", "PENDING", "QUEUED", "WAITING"]);
96
+
97
+ interface RollupEntry {
98
+ __typename?: string;
99
+ conclusion?: string;
100
+ context?: string;
101
+ name?: string;
102
+ state?: string;
103
+ status?: string;
104
+ }
105
+
106
+ export async function defaultPrChecks(prUrl: string): Promise<PrCheckResult> {
107
+ const res = await bunExec("gh", ["pr", "view", prUrl, "--json", "statusCheckRollup,headRefOid"]);
108
+ if (res.code !== 0) {
109
+ return { failing: [], state: "none" };
110
+ }
111
+ let parsed: unknown;
112
+ try {
113
+ parsed = JSON.parse(res.stdout);
114
+ } catch {
115
+ return { failing: [], state: "none" };
116
+ }
117
+ const obj: Record<string, unknown> = typeof parsed === "object" && parsed !== null ? { ...parsed } : {};
118
+ const rawRollup = obj.statusCheckRollup;
119
+ const rollup: RollupEntry[] = Array.isArray(rawRollup)
120
+ ? rawRollup.filter((e): e is RollupEntry => typeof e === "object" && e !== null)
121
+ : [];
122
+ const sha = typeof obj.headRefOid === "string" ? obj.headRefOid : undefined;
123
+ const base: PrCheckResult = { failing: [], ...(sha !== undefined ? { sha } : {}), state: "none" };
124
+ if (rollup.length === 0) {
125
+ return base;
126
+ }
127
+ const failing: string[] = [];
128
+ let pending = false;
129
+ for (const entry of rollup) {
130
+ const conclusion = typeof entry.conclusion === "string" ? entry.conclusion.toUpperCase() : undefined;
131
+ const state = typeof entry.state === "string" ? entry.state.toUpperCase() : undefined;
132
+ const status = typeof entry.status === "string" ? entry.status.toUpperCase() : undefined;
133
+ if (
134
+ (conclusion !== undefined && CI_FAILING_CONCLUSIONS.has(conclusion)) ||
135
+ (state !== undefined && CI_FAILING_STATES.has(state))
136
+ ) {
137
+ failing.push(entry.name ?? entry.context ?? "(unknown check)");
138
+ continue;
139
+ }
140
+ if ((status !== undefined && CI_PENDING_STATUSES.has(status)) || state === "PENDING") {
141
+ pending = true;
142
+ }
143
+ }
144
+ if (failing.length > 0) {
145
+ return { ...base, failing, state: "failing" };
146
+ }
147
+ if (pending) {
148
+ return { ...base, state: "pending" };
149
+ }
150
+ return { ...base, state: "passing" };
151
+ }
152
+
153
+ export type WatchAction =
154
+ | "dispatched"
155
+ | "parked"
156
+ | "resumed"
157
+ | "at-capacity"
158
+ | "idle"
159
+ | "error"
160
+ | "completed"
161
+ | "rework"
162
+ | "ci-rework"
163
+ | "answered"
164
+ | "awaiting-feedback"
165
+ | "orphaned"
166
+ | "quarantined"
167
+ | "reconciled"
168
+ | "released"
169
+ | "reviewed";
170
+
171
+ export interface WatchTickResult {
172
+ action: WatchAction;
173
+ key?: string;
174
+ }
175
+
176
+ function optionalDeps(deps: WatchDeps): { notify?: Notifier; runsFs?: RunStoreFs } {
177
+ return {
178
+ ...(deps.notify !== undefined ? { notify: deps.notify } : {}),
179
+ ...(deps.runsFs !== undefined ? { runsFs: deps.runsFs } : {}),
180
+ };
181
+ }
182
+
183
+ function runIssueDeps(deps: WatchDeps, config: Config, registry: Registry, log: Logger): RunIssueDeps {
184
+ return {
185
+ config,
186
+ driver: deps.driver,
187
+ git: deps.git,
188
+ log,
189
+ prompts: deps.prompts,
190
+ registry,
191
+ tracker: deps.tracker,
192
+ ...(deps.runsFs !== undefined ? { runsFs: deps.runsFs } : {}),
193
+ ...(deps.clock !== undefined ? { clock: deps.clock } : {}),
194
+ ...(deps.fresh !== undefined ? { fresh: deps.fresh } : {}),
195
+ ...(deps.notify !== undefined ? { notify: deps.notify } : {}),
196
+ ...(deps.mcpServers !== undefined ? { mcpServers: deps.mcpServers } : {}),
197
+ };
198
+ }
199
+
200
+ function runReviewDeps(
201
+ deps: WatchDeps,
202
+ config: Config,
203
+ registry: Registry,
204
+ log: Logger,
205
+ postToPr: boolean,
206
+ ): RunReviewDeps {
207
+ return {
208
+ config,
209
+ driver: deps.driver,
210
+ log,
211
+ postToPr,
212
+ prCommenter: deps.prCommenter ?? defaultPrComment,
213
+ prompts: deps.prompts,
214
+ registry,
215
+ tracker: deps.tracker,
216
+ ...(deps.git !== undefined ? { git: deps.git } : {}),
217
+ ...(deps.runsFs !== undefined ? { runsFs: deps.runsFs } : {}),
218
+ ...(deps.clock !== undefined ? { clock: deps.clock } : {}),
219
+ };
220
+ }
221
+
222
+ export async function watchTick(projectKey: string, deps: WatchDeps): Promise<WatchTickResult> {
223
+ const log =
224
+ deps.log ??
225
+ ((): void => {
226
+ /* no-op: logging disabled */
227
+ });
228
+ const { config, registry } = deps.getSnapshot
229
+ ? deps.getSnapshot()
230
+ : { config: deps.config, registry: deps.registry };
231
+ const runsDir = resolveRunsDir(config.runs?.dir);
232
+ const clock = deps.clock ?? systemClock;
233
+ const sla = resolveSla(config, registry, projectKey);
234
+ const deadLetterThreshold = resolveDeadLetterThreshold(config, registry, projectKey);
235
+
236
+ if (deps.dryRun === true) {
237
+ return dryRunTick(projectKey, deps, config, registry, log);
238
+ }
239
+
240
+ // SLA re-escalation (opt-in housekeeping): a stuck item past its threshold gets a
241
+ // Periodic `reminder` ping; once it had a reminder, a later `resolved` ping cancels
242
+ // It. Writing `escalatedAt` must NOT bump `updatedAt`, else the age clock resets.
243
+ async function remind(
244
+ item: Issue,
245
+ record: RunRecord | null,
246
+ thresholdMin: number | undefined,
247
+ state: string,
248
+ ): Promise<void> {
249
+ if (thresholdMin === undefined || record === null) {
250
+ return;
251
+ }
252
+ if (!shouldRemind(clock(), record, thresholdMin)) {
253
+ return;
254
+ }
255
+ const detail = `Stuck in ${state} for ${formatAge(ageMinutes(clock(), record.updatedAt))}.`;
256
+ await notifyEscalation(deps.notify, item, "reminder", detail);
257
+ saveRecord(runsDir, { ...record, escalatedAt: clock(), updatedAt: record.updatedAt }, deps.runsFs);
258
+ log(`beflow: watch ${projectKey} — reminder ${item.key} (${state}, ${detail})`);
259
+ }
260
+
261
+ // (a) Crash-resume — driven by the RUN STORE, the source of truth for which runs
262
+ // Should be live. Resume an interrupted autonomous run for THIS project before
263
+ // Anything new, so a crashed/restarted process picks up where it left off. One
264
+ // Unit per tick. Supervised runs are user-driven and never auto-resumed.
265
+ const active = listRecords(runsDir, deps.runsFs).filter(
266
+ (r) => r.status === "in_progress" && r.runMode === "autonomous" && r.key.startsWith(`${projectKey}-`),
267
+ );
268
+ for (const rec of active) {
269
+ // Reconcile a manual pull: if a human moved the card out of the started
270
+ // Group while the run was crashed/parked, don't resume — the human wins.
271
+ let issue: Issue;
272
+ try {
273
+ issue = await deps.tracker.getIssue(rec.key);
274
+ } catch (err) {
275
+ if (err instanceof IssueNotFoundError) {
276
+ // Gone (deleted). PARK: stop retrying, but LEAVE the worktree — it may hold
277
+ // unpushed work and there's no issue left to comment it back to.
278
+ deleteRecord(runsDir, rec.key, deps.runsFs);
279
+ log(
280
+ `beflow: watch ${projectKey} — ${rec.key} gone (deleted); record dropped, worktree left at ${rec.cwd} for manual cleanup`,
281
+ );
282
+ return { action: "orphaned", key: rec.key };
283
+ }
284
+ throw err; // transient → bubble to the per-tick guard; record is KEPT, retried next tick
285
+ }
286
+ if (issue.archived === true) {
287
+ // Archived. Same conservative park: stop acting on it, leave the worktree.
288
+ deleteRecord(runsDir, rec.key, deps.runsFs);
289
+ log(
290
+ `beflow: watch ${projectKey} — ${rec.key} archived; record dropped, worktree left at ${rec.cwd} for manual cleanup`,
291
+ );
292
+ return { action: "orphaned", key: rec.key };
293
+ }
294
+ if (isPulledByHuman(issue)) {
295
+ if (deps.git !== undefined && rec.cwd) {
296
+ try {
297
+ await removeWorktree(rec.repoPath ?? rec.cwd, rec.cwd, deps.git);
298
+ } catch {
299
+ // Best-effort: a stale or already-removed worktree must not block reconcile.
300
+ }
301
+ }
302
+ deleteRecord(runsDir, rec.key, deps.runsFs);
303
+ log(
304
+ `beflow: watch ${projectKey} — ${rec.key} reconciled (now ${issue.state.name}); manual move, not resumed`,
305
+ );
306
+ return { action: "reconciled", key: rec.key };
307
+ }
308
+ if (shouldQuarantine(rec.attempts ?? 0, deadLetterThreshold)) {
309
+ // Crash-loop dead-letter: quarantine for a human, stop auto-resuming. The
310
+ // Universal counter accumulates failures across resume + CI-rework alike.
311
+ await quarantine(
312
+ issue,
313
+ `Quarantined after ${String(rec.attempts ?? 0)} failed attempts — the run kept crashing or could not finish.`,
314
+ { clock, record: rec, runsDir, tracker: deps.tracker, ...optionalDeps(deps) },
315
+ );
316
+ log(
317
+ `beflow: watch ${projectKey} — ${rec.key} quarantined after ${String(rec.attempts ?? 0)} failed attempts → Needs Input`,
318
+ );
319
+ return { action: "quarantined", key: rec.key };
320
+ }
321
+ try {
322
+ await runIssue(rec.key, AUTONOMOUS_DISPATCH, runIssueDeps(deps, config, registry, log));
323
+ log(`beflow: watch ${projectKey} — resumed ${rec.key}`);
324
+ return { action: "resumed", key: rec.key };
325
+ } catch (err) {
326
+ log(
327
+ `beflow: watch ${projectKey} — resume ${rec.key} errored: ${err instanceof Error ? err.message : String(err)}`,
328
+ );
329
+ return { action: "error", key: rec.key };
330
+ }
331
+ }
332
+
333
+ // The cap check (pass d) still needs the board's In Progress count.
334
+ const inProgress = await deps.tracker.listQueue({
335
+ project: projectKey,
336
+ state: IN_PROGRESS_STATE,
337
+ });
338
+
339
+ // (b) Housekeeping pre-pass over In Review (no early return; cheap, no agent).
340
+ // Auto-Done: a merged linked PR moves the item to Done and cleans up.
341
+ let inReview = await deps.tracker.listQueue({
342
+ project: projectKey,
343
+ state: "In Review",
344
+ });
345
+ let didComplete = false;
346
+ const completedKeys = new Set<string>();
347
+ if (deps.prMerged !== undefined) {
348
+ for (const item of inReview) {
349
+ const record = loadRecord(runsDir, item.key, deps.runsFs);
350
+ if (record?.prUrl === undefined) {
351
+ continue;
352
+ }
353
+ if (!(await deps.prMerged(record.prUrl))) {
354
+ continue;
355
+ }
356
+ await deps.tracker.updateState(item, DONE_STATE);
357
+ if (record.escalatedAt !== undefined) {
358
+ await notifyEscalation(deps.notify, item, "resolved", "Merged and closed.");
359
+ }
360
+ if (deps.git !== undefined && record.cwd) {
361
+ try {
362
+ await removeWorktree(record.repoPath ?? record.cwd, record.cwd, deps.git);
363
+ } catch {
364
+ // Best-effort: a stale or already-removed worktree must not block Done.
365
+ }
366
+ }
367
+ deleteRecord(runsDir, item.key, deps.runsFs);
368
+ log(`beflow: watch ${projectKey} — ${item.key} merged → Done`);
369
+ didComplete = true;
370
+ completedKeys.add(item.key);
371
+ }
372
+ if (completedKeys.size > 0) {
373
+ inReview = inReview.filter((i) => !completedKeys.has(i.key));
374
+ }
375
+ }
376
+
377
+ // SLA reminders over the surviving In-Review items (after auto-Done filtering).
378
+ for (const item of inReview) {
379
+ await remind(item, loadRecord(runsDir, item.key, deps.runsFs), sla.inReviewMinutes, "In Review");
380
+ }
381
+
382
+ // (c) Dispatch decision (ONE agent unit; order rework → answered → todo).
383
+ // The rework/answered re-dispatches finish existing work (like crash-resume)
384
+ // And are NOT gated by the Todo cap.
385
+ let didGuide = false;
386
+ for (const item of inReview) {
387
+ if (!item.labels.includes(CHANGES_REQUESTED_LABEL)) {
388
+ continue;
389
+ }
390
+ const record = loadRecord(runsDir, item.key, deps.runsFs);
391
+ const ctx = await assembleContinuation(deps.tracker, item, {
392
+ ...(record?.updatedAt !== undefined ? { since: record.updatedAt } : {}),
393
+ record,
394
+ });
395
+ if (ctx.newComments.length > 0) {
396
+ await deps.tracker.removeProperty(item, CHANGES_REQUESTED_LABEL);
397
+ await runIssue(item.key, AUTONOMOUS_DISPATCH, {
398
+ ...runIssueDeps(deps, config, registry, log),
399
+ continuation: renderContinuation(deps.prompts, ctx),
400
+ });
401
+ log(`beflow: watch ${projectKey} — rework ${item.key}`);
402
+ return { action: "rework", key: item.key };
403
+ }
404
+ // Label only, no description: post guidance once (idempotent), keep scanning.
405
+ const comments = await deps.tracker.listComments(item);
406
+ const last = comments.at(-1);
407
+ if (!(last?.isBot === true && last.body.includes(GUIDANCE_SENTINEL))) {
408
+ await deps.tracker.comment(item, CHANGES_REQUESTED_GUIDANCE);
409
+ log(`beflow: watch ${projectKey} — ${item.key} changes-requested without description; posted guidance`);
410
+ didGuide = true;
411
+ }
412
+ }
413
+
414
+ // CI-red auto-rework (opt-in, gh-gated). A red CI check on an In-Review PR
415
+ // Re-dispatches rework with the failure as continuation context, exactly like
416
+ // A `changes-requested` label — but only after the explicit-human path above.
417
+ // Loop-safe: never reworks the same head SHA twice; quarantines a perpetually
418
+ // Red PR to Needs Input once the universal attempt counter hits the threshold.
419
+ if (deps.prChecks !== undefined && registry.projects[projectKey]?.ci?.autoReworkOnRed === true) {
420
+ for (const item of inReview) {
421
+ const record = loadRecord(runsDir, item.key, deps.runsFs);
422
+ if (record?.prUrl === undefined) {
423
+ continue;
424
+ }
425
+ const checks = await deps.prChecks(record.prUrl);
426
+ // Green again → clear the failure streak (housekeeping, no return).
427
+ if (checks.state === "passing" && (record.attempts ?? 0) > 0) {
428
+ saveRecord(runsDir, { ...record, attempts: 0 }, deps.runsFs);
429
+ continue;
430
+ }
431
+ if (checks.state !== "failing") {
432
+ continue;
433
+ }
434
+ // Loop-safety: never rework the same head SHA twice.
435
+ if (checks.sha !== undefined && checks.sha === record.ciReworkSha) {
436
+ continue;
437
+ }
438
+
439
+ const attempts = record.attempts ?? 0;
440
+ if (shouldQuarantine(attempts, deadLetterThreshold)) {
441
+ // Quarantine a PR CI can't get green: the universal counter accumulates
442
+ // CI-rework failures together with crash-resume failures.
443
+ await quarantine(
444
+ item,
445
+ `CI red after ${String(attempts)} auto-rework attempts: ${checks.failing.join(", ") || "(unknown)"}.`,
446
+ {
447
+ clock,
448
+ record: { ...record, ...(checks.sha !== undefined ? { ciReworkSha: checks.sha } : {}) },
449
+ runsDir,
450
+ tracker: deps.tracker,
451
+ ...optionalDeps(deps),
452
+ },
453
+ );
454
+ log(`beflow: watch ${projectKey} — ${item.key} CI-rework quarantined → Needs Input`);
455
+ return { action: "quarantined", key: item.key };
456
+ }
457
+
458
+ const ctx = await assembleContinuation(deps.tracker, item, { record, since: record.updatedAt });
459
+ const ciNote = `The CI checks on this PR are failing (${checks.failing.join(", ") || "unknown checks"}). Investigate the failures, fix them, and update the existing PR (${record.prUrl}). Then emit the report block.`;
460
+ const continuation = `${ciNote}\n\n${renderContinuation(deps.prompts, ctx)}`;
461
+ await runIssue(item.key, AUTONOMOUS_DISPATCH, {
462
+ ...runIssueDeps(deps, config, registry, log),
463
+ continuation,
464
+ });
465
+ // RunIssue rewrites the record from scratch (resetting attempts to 0 on a
466
+ // Continuation re-dispatch) — re-stamp the accumulated counter + loop-safety SHA.
467
+ const after = loadRecord(runsDir, item.key, deps.runsFs);
468
+ if (after !== null) {
469
+ saveRecord(
470
+ runsDir,
471
+ {
472
+ ...after,
473
+ attempts: attempts + 1,
474
+ ...(checks.sha !== undefined ? { ciReworkSha: checks.sha } : {}),
475
+ },
476
+ deps.runsFs,
477
+ );
478
+ }
479
+ log(
480
+ `beflow: watch ${projectKey} — CI-rework ${item.key} (failing: ${checks.failing.join(", ") || "unknown"})`,
481
+ );
482
+ return { action: "ci-rework", key: item.key };
483
+ }
484
+ }
485
+
486
+ // PR review assist (opt-in, gh-gated). When `review.enabled` and a head-SHA source
487
+ // (`prChecks`) are both present, a reviewer agent reads each In-Review PR's diff and
488
+ // Posts findings to the issue (and, when `postToPr`, to the PR). Loop-safe: never
489
+ // Re-reviews the same head SHA. Reads-only on the board — never moves or merges.
490
+ if (deps.prChecks !== undefined && resolveReviewEnabled(config, registry, projectKey)) {
491
+ const postToPr = resolveReviewPostToPr(config, registry, projectKey);
492
+ const review = deps.runReview ?? runReview;
493
+ for (const item of inReview) {
494
+ const record = loadRecord(runsDir, item.key, deps.runsFs);
495
+ if (record?.prUrl === undefined) {
496
+ continue;
497
+ }
498
+ const checks = await deps.prChecks(record.prUrl);
499
+ if (checks.sha === undefined || checks.sha === record.reviewedSha) {
500
+ continue;
501
+ }
502
+ const headSha = checks.sha;
503
+ await review(item.key, {
504
+ ...runReviewDeps(deps, config, registry, log, postToPr),
505
+ reviewSha: async () => Promise.resolve(headSha),
506
+ });
507
+ log(`beflow: watch ${projectKey} — reviewed ${item.key}`);
508
+ return { action: "reviewed", key: item.key };
509
+ }
510
+ }
511
+
512
+ const needsInput = await deps.tracker.listQueue({
513
+ project: projectKey,
514
+ state: "Needs Input",
515
+ });
516
+ // SLA reminders BEFORE the answered loop so a reminder for a later item isn't
517
+ // Skipped when the answered loop early-returns on the first re-activated item.
518
+ for (const item of needsInput) {
519
+ await remind(item, loadRecord(runsDir, item.key, deps.runsFs), sla.needsInputMinutes, "Needs Input");
520
+ }
521
+ // Decision-gate RELEASE pass: a decision-hold record whose issue NO LONGER carries
522
+ // The `needs-decision` label means the human made the call; release it back to Todo.
523
+ // Early-return on the first release, like the answered loop. The `heldReason` filter
524
+ // Makes this pass act on (and only on) decision holds, never an unrelated Needs-Input.
525
+ for (const item of needsInput) {
526
+ const record = loadRecord(runsDir, item.key, deps.runsFs);
527
+ if (record?.heldReason !== "decision") {
528
+ continue;
529
+ }
530
+ if (isDecisionHeld(item.labels)) {
531
+ continue; // decision still pending
532
+ }
533
+ await deps.tracker.updateState(item, "Todo");
534
+ if (record.escalatedAt !== undefined) {
535
+ await notifyEscalation(deps.notify, item, "resolved", "Decision made; released to Todo.");
536
+ }
537
+ deleteRecord(runsDir, item.key, deps.runsFs);
538
+ log(`beflow: watch ${projectKey} — ${item.key} decision made (label removed) → released to Todo`);
539
+ return { action: "released", key: item.key };
540
+ }
541
+ // Quarantine RELEASE pass: a quarantine-hold record whose issue NO LONGER carries the
542
+ // `quarantined` label means a human cleared it for retry; reset the universal counter,
543
+ // Clear the hold, and release it back to Todo. Mirrors the decision-release pass; the
544
+ // `heldReason` filter scopes it to (and only to) quarantine holds.
545
+ for (const item of needsInput) {
546
+ const record = loadRecord(runsDir, item.key, deps.runsFs);
547
+ if (record?.heldReason !== "quarantine") {
548
+ continue;
549
+ }
550
+ if (item.labels.includes(QUARANTINED_LABEL)) {
551
+ continue; // still quarantined
552
+ }
553
+ await deps.tracker.updateState(item, "Todo");
554
+ if (record.escalatedAt !== undefined) {
555
+ await notifyEscalation(deps.notify, item, "resolved", "Quarantine cleared; released to Todo.");
556
+ }
557
+ // `heldReason: undefined` clears the hold — JSON.stringify drops undefined keys.
558
+ saveRecord(runsDir, { ...record, attempts: 0, heldReason: undefined, updatedAt: clock() }, deps.runsFs);
559
+ log(`beflow: watch ${projectKey} — ${item.key} quarantine cleared (label removed) → released to Todo`);
560
+ return { action: "released", key: item.key };
561
+ }
562
+ for (const item of needsInput) {
563
+ // A still-`needs-decision`-labeled item is resolved by removing the label (the
564
+ // Release pass above), NOT by commenting — so a comment can't bypass an undecided
565
+ // Hold. Skip it here.
566
+ if (isDecisionHeld(item.labels)) {
567
+ continue;
568
+ }
569
+ const record = loadRecord(runsDir, item.key, deps.runsFs);
570
+ const ctx = await assembleContinuation(deps.tracker, item, {
571
+ ...(record?.updatedAt !== undefined ? { since: record.updatedAt } : {}),
572
+ record,
573
+ });
574
+ if (ctx.newComments.length > 0) {
575
+ // A human re-activated this item; clear the reason-tag that parked it.
576
+ for (const label of [BLOCKED_LABEL, FAILED_LABEL]) {
577
+ if (item.labels.includes(label)) {
578
+ await deps.tracker.removeProperty(item, label);
579
+ }
580
+ }
581
+ if (record?.escalatedAt !== undefined) {
582
+ await notifyEscalation(deps.notify, item, "resolved", "A human responded; resuming.");
583
+ }
584
+ await runIssue(item.key, AUTONOMOUS_DISPATCH, {
585
+ ...runIssueDeps(deps, config, registry, log),
586
+ continuation: renderContinuation(deps.prompts, ctx),
587
+ });
588
+ log(`beflow: watch ${projectKey} — answered ${item.key}`);
589
+ return { action: "answered", key: item.key };
590
+ }
591
+ }
592
+
593
+ // (d) Caps + Todo dispatch — unchanged.
594
+ const limits = registry.projects[projectKey]?.limits;
595
+ const inProgressCap = limits?.inProgress ?? DEFAULT_LIMIT_IN_PROGRESS;
596
+ if (inProgress.length >= inProgressCap) {
597
+ log(
598
+ `beflow: watch ${projectKey} — In Progress at cap (${String(inProgress.length)}/${String(inProgressCap)}); skipping`,
599
+ );
600
+ return finalize(didComplete, didGuide, { action: "at-capacity" });
601
+ }
602
+
603
+ const inReviewCap = limits?.inReview ?? DEFAULT_LIMIT_IN_REVIEW;
604
+ if (inReview.length >= inReviewCap) {
605
+ log(
606
+ `beflow: watch ${projectKey} — In Review at cap (${String(inReview.length)}/${String(inReviewCap)}); skipping`,
607
+ );
608
+ return finalize(didComplete, didGuide, { action: "at-capacity" });
609
+ }
610
+
611
+ let todo = await deps.tracker.listQueue({
612
+ project: projectKey,
613
+ state: "Todo",
614
+ });
615
+ if (registry.projects[projectKey]?.scheduling?.activeCycleOnly === true) {
616
+ const cycleIds = await deps.tracker.activeCycleIssueIds(projectKey);
617
+ if (cycleIds !== null) {
618
+ const before = todo.length;
619
+ todo = todo.filter((t) => cycleIds.has(t.id));
620
+ log(
621
+ `beflow: watch ${projectKey} — active-cycle filter: ${String(todo.length)}/${String(before)} Todo in cycle`,
622
+ );
623
+ } else {
624
+ log(
625
+ `beflow: watch ${projectKey} — activeCycleOnly set but no active cycle determinable; dispatching without cycle filter`,
626
+ );
627
+ }
628
+ }
629
+ if (todo.length === 0) {
630
+ log(`beflow: watch ${projectKey} — Todo empty; idle`);
631
+ return finalize(didComplete, didGuide, { action: "idle" });
632
+ }
633
+
634
+ // Respect blocked-by: walk the priority-ranked queue and COLLECT up to the
635
+ // Remaining capacity worth of eligible Todos — those whose blockers are ALL
636
+ // Resolved and that aren't quarantined (and, when the cycle filter is on, that
637
+ // Survived it above). The at-capacity early-return guaranteed at least one open
638
+ // Slot here. A relations-fetch failure bubbles to the per-tick guard rather than
639
+ // Being mistaken for "unblocked".
640
+ const slots = inProgressCap - inProgress.length;
641
+ const selected: Issue[] = [];
642
+ for (const candidate of todo) {
643
+ if (selected.length >= slots) {
644
+ break;
645
+ }
646
+ if (candidate.labels.includes(QUARANTINED_LABEL)) {
647
+ log(`beflow: watch ${projectKey} — ${candidate.key} skipped: quarantined`);
648
+ continue;
649
+ }
650
+ const pending = (await deps.tracker.blockedBy(candidate)).filter((b) => !b.done);
651
+ if (pending.length === 0) {
652
+ selected.push(candidate);
653
+ continue;
654
+ }
655
+ log(
656
+ `beflow: watch ${projectKey} — ${candidate.key} skipped: blocked-by ${pending
657
+ .map((b) => b.key)
658
+ .join(", ")} (not done)`,
659
+ );
660
+ }
661
+ if (selected.length === 0) {
662
+ log(`beflow: watch ${projectKey} — all Todo blocked; idle`);
663
+ return finalize(didComplete, didGuide, { action: "idle" });
664
+ }
665
+
666
+ // Dispatch the collected batch CONCURRENTLY, bounded by the remaining cap so it
667
+ // Can never be exceeded. Each runIssue does its own record-first claim, so
668
+ // Distinct issues are safe to run in parallel. A throw or thin-park on one item
669
+ // Must not abort the others — each outcome is captured per item.
670
+ const outcomes = await Promise.all(
671
+ selected.map(async (candidate): Promise<{ key: string; status: "dispatched" | "error" | "parked" }> => {
672
+ try {
673
+ const r = await runIssue(candidate.key, AUTONOMOUS_DISPATCH, runIssueDeps(deps, config, registry, log));
674
+ if (r.parked === "thin") {
675
+ log(`beflow: watch ${projectKey} — ${candidate.key} parked: thin description → Needs Input`);
676
+ return { key: candidate.key, status: "parked" };
677
+ }
678
+ return { key: candidate.key, status: "dispatched" };
679
+ } catch (err) {
680
+ log(
681
+ `beflow: watch ${projectKey} — dispatch ${candidate.key} errored: ${err instanceof Error ? err.message : String(err)}`,
682
+ );
683
+ return { key: candidate.key, status: "error" };
684
+ }
685
+ }),
686
+ );
687
+
688
+ const firstDispatched = outcomes.find((o) => o.status === "dispatched");
689
+ if (firstDispatched !== undefined) {
690
+ const dispatched = outcomes.filter((o) => o.status === "dispatched");
691
+ log(
692
+ `beflow: watch ${projectKey} — dispatched ${String(dispatched.length)}: ${dispatched
693
+ .map((o) => o.key)
694
+ .join(", ")}`,
695
+ );
696
+ return { action: "dispatched", key: firstDispatched.key };
697
+ }
698
+ const firstParked = outcomes.find((o) => o.status === "parked");
699
+ if (firstParked !== undefined) {
700
+ return { action: "parked", key: firstParked.key };
701
+ }
702
+ // Every collected item errored — surface the first as the representative error.
703
+ const firstErrored = outcomes.find((o) => o.status === "error");
704
+ return { action: "error", key: firstErrored?.key };
705
+ }
706
+
707
+ // Read-only preview of one tick: fetch the cap counts + the Todo queue, apply the
708
+ // Same quarantine / blocked-by / active-cycle filters the live dispatch uses, and
709
+ // LOG the decision beflow WOULD make — never resuming, mutating the board, writing
710
+ // A record, or dispatching an agent. Mirrors the cap + eligibility logic in the
711
+ // Tail of `watchTick`, stopping short of every side effect.
712
+ async function dryRunTick(
713
+ projectKey: string,
714
+ deps: WatchDeps,
715
+ config: Config,
716
+ registry: Registry,
717
+ log: Logger,
718
+ ): Promise<WatchTickResult> {
719
+ const inProgress = await deps.tracker.listQueue({ project: projectKey, state: IN_PROGRESS_STATE });
720
+ const inReview = await deps.tracker.listQueue({ project: projectKey, state: "In Review" });
721
+
722
+ const limits = registry.projects[projectKey]?.limits;
723
+ const inProgressCap = limits?.inProgress ?? DEFAULT_LIMIT_IN_PROGRESS;
724
+ if (inProgress.length >= inProgressCap) {
725
+ log(
726
+ `beflow: watch ${projectKey} — DRY RUN: In Progress at cap (${String(inProgress.length)}/${String(inProgressCap)}); would skip`,
727
+ );
728
+ return { action: "at-capacity" };
729
+ }
730
+ const inReviewCap = limits?.inReview ?? DEFAULT_LIMIT_IN_REVIEW;
731
+ if (inReview.length >= inReviewCap) {
732
+ log(
733
+ `beflow: watch ${projectKey} — DRY RUN: In Review at cap (${String(inReview.length)}/${String(inReviewCap)}); would skip`,
734
+ );
735
+ return { action: "at-capacity" };
736
+ }
737
+
738
+ let todo = await deps.tracker.listQueue({ project: projectKey, state: "Todo" });
739
+ if (registry.projects[projectKey]?.scheduling?.activeCycleOnly === true) {
740
+ const cycleIds = await deps.tracker.activeCycleIssueIds(projectKey);
741
+ if (cycleIds !== null) {
742
+ todo = todo.filter((t) => cycleIds.has(t.id));
743
+ }
744
+ }
745
+ if (todo.length === 0) {
746
+ log(`beflow: watch ${projectKey} — DRY RUN: Todo empty; would idle`);
747
+ return { action: "idle" };
748
+ }
749
+
750
+ const slots = inProgressCap - inProgress.length;
751
+ const selected: Issue[] = [];
752
+ for (const candidate of todo) {
753
+ if (selected.length >= slots) {
754
+ break;
755
+ }
756
+ if (candidate.labels.includes(QUARANTINED_LABEL)) {
757
+ log(`beflow: watch ${projectKey} — DRY RUN: ${candidate.key} skipped: quarantined`);
758
+ continue;
759
+ }
760
+ const pending = (await deps.tracker.blockedBy(candidate)).filter((b) => !b.done);
761
+ if (pending.length === 0) {
762
+ selected.push(candidate);
763
+ continue;
764
+ }
765
+ log(
766
+ `beflow: watch ${projectKey} — DRY RUN: ${candidate.key} skipped: blocked-by ${pending
767
+ .map((b) => b.key)
768
+ .join(", ")} (not done)`,
769
+ );
770
+ }
771
+ if (selected.length === 0) {
772
+ log(`beflow: watch ${projectKey} — DRY RUN: all Todo blocked; would idle`);
773
+ return { action: "idle" };
774
+ }
775
+
776
+ const [first] = selected;
777
+ log(`beflow: watch ${projectKey} — DRY RUN: would dispatch ${selected.map((c) => c.key).join(", ")}`);
778
+ return { action: "dispatched", key: first?.key };
779
+ }
780
+
781
+ // (e) Final return when nothing was dispatched: surface housekeeping side-effects
782
+ // (auto-Done / guidance) ahead of the plain idle / at-capacity outcome.
783
+ function finalize(didComplete: boolean, didGuide: boolean, fallback: WatchTickResult): WatchTickResult {
784
+ if (didComplete) {
785
+ return { action: "completed" };
786
+ }
787
+ if (didGuide) {
788
+ return { action: "awaiting-feedback" };
789
+ }
790
+ return fallback;
791
+ }
792
+
793
+ export interface WatchControl {
794
+ sleepMs: number;
795
+ sleep?: (ms: number) => Promise<void>;
796
+ shouldStop: () => boolean;
797
+ }
798
+
799
+ async function defaultSleep(ms: number): Promise<void> {
800
+ return new Promise((resolve) => {
801
+ setTimeout(resolve, ms);
802
+ });
803
+ }
804
+
805
+ export async function watch(projectKey: string, deps: WatchDeps, ctrl: WatchControl): Promise<void> {
806
+ const sleep = ctrl.sleep ?? defaultSleep;
807
+ const log =
808
+ deps.log ??
809
+ ((): void => {
810
+ /* no-op: logging disabled */
811
+ });
812
+ while (!ctrl.shouldStop()) {
813
+ try {
814
+ await watchTick(projectKey, deps);
815
+ } catch (err) {
816
+ // One bad tick (a transient tracker failure, a thrown reconcile, etc.)
817
+ // Must never kill the daemon — log it and continue to the next tick.
818
+ log(`beflow: watch ${projectKey} — tick errored: ${err instanceof Error ? err.message : String(err)}`);
819
+ }
820
+ if (ctrl.shouldStop()) {
821
+ break;
822
+ }
823
+ await sleep(ctrl.sleepMs);
824
+ }
825
+ }