ai-foreman 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,445 @@
1
+ import { select, isCancel } from "@clack/prompts";
2
+ import { MARKER_SPEC, QA_MARKER_SPEC } from "./markers.js";
3
+ import { fireNotification } from "./notify.js";
4
+ import { isTicketsInitialized } from "./tickets/config.js";
5
+ import { cmdUpdate, cmdComplete, cmdBlock, cmdQueue } from "./tickets/commands.js";
6
+ /** Phrases that suggest the builder ended its turn by asking the human. */
7
+ const QUESTION_HINTS = [
8
+ "should i",
9
+ "would you like",
10
+ "do you want",
11
+ "let me know",
12
+ "please confirm",
13
+ "could you clarify",
14
+ "which option",
15
+ ];
16
+ /**
17
+ * Pull the STEP_STATUS marker out of a turn's final text.
18
+ * Format: `STEP_STATUS: <kind> | key="value" key="value"`.
19
+ */
20
+ export function parseStepStatus(text) {
21
+ const markerCount = (text.match(/STEP_STATUS:/g) ?? []).length;
22
+ if (markerCount > 1) {
23
+ return { kind: "unknown", error: "builder emitted multiple STEP_STATUS markers" };
24
+ }
25
+ const lines = text.trimEnd().split(/\r?\n/).filter((line) => line.trim().length > 0);
26
+ const last = lines[lines.length - 1] ?? "";
27
+ if (!last.includes("STEP_STATUS:")) {
28
+ if (markerCount === 1) {
29
+ return { kind: "unknown", error: "STEP_STATUS marker was not the final non-empty line" };
30
+ }
31
+ return { kind: "unknown" };
32
+ }
33
+ const match = last.match(/^STEP_STATUS:\s*(done|blocked|plan_complete|needs_input|qa_pass|qa_fail)\b\s*(?:\|\s*(.*))?$/i);
34
+ if (!match) {
35
+ return { kind: "unknown", error: "malformed STEP_STATUS marker" };
36
+ }
37
+ const kind = match[1].toLowerCase();
38
+ const fields = parseMarkerFields(match[2] ?? "");
39
+ if (fields instanceof Error) {
40
+ return { kind: "unknown", error: fields.message };
41
+ }
42
+ return {
43
+ kind,
44
+ summary: fields.summary,
45
+ next: fields.next,
46
+ reason: fields.reason,
47
+ question: fields.question,
48
+ issues: fields.issues,
49
+ ticket: fields.ticket,
50
+ choices: fields.choices
51
+ ? fields.choices.split("|").map((c) => c.trim()).filter(Boolean)
52
+ : undefined,
53
+ };
54
+ }
55
+ function parseMarkerFields(input) {
56
+ const fields = {};
57
+ let rest = input.trim();
58
+ while (rest.length > 0) {
59
+ const key = rest.match(/^(\w+)="/);
60
+ if (!key)
61
+ return new Error(`malformed STEP_STATUS field near: ${rest.slice(0, 40)}`);
62
+ const name = key[1];
63
+ let i = key[0].length;
64
+ let value = "";
65
+ let closed = false;
66
+ while (i < rest.length) {
67
+ const ch = rest[i];
68
+ if (ch === "\\") {
69
+ const next = rest[i + 1];
70
+ if (next === undefined)
71
+ return new Error(`unterminated escape in STEP_STATUS field: ${name}`);
72
+ value += next;
73
+ i += 2;
74
+ continue;
75
+ }
76
+ if (ch === "\"") {
77
+ closed = true;
78
+ i++;
79
+ break;
80
+ }
81
+ value += ch;
82
+ i++;
83
+ }
84
+ if (!closed)
85
+ return new Error(`unterminated STEP_STATUS field: ${name}`);
86
+ if (fields[name] !== undefined)
87
+ return new Error(`duplicate STEP_STATUS field: ${name}`);
88
+ fields[name] = value;
89
+ rest = rest.slice(i).trim();
90
+ }
91
+ return fields;
92
+ }
93
+ /** Heuristic: did a marker-less turn end by asking the human something? */
94
+ export function looksLikeQuestion(text) {
95
+ const tail = text.trim().toLowerCase().slice(-400);
96
+ if (tail.endsWith("?"))
97
+ return true;
98
+ return QUESTION_HINTS.some((hint) => tail.includes(hint));
99
+ }
100
+ export { MARKER_SPEC } from "./markers.js";
101
+ export { QA_MARKER_SPEC } from "./markers.js";
102
+ /** Instruction sent on the first turn of a batch. */
103
+ export function buildPrimer(n, trackerPath, ticketsEnabled = false) {
104
+ let trackerRule;
105
+ if (ticketsEnabled) {
106
+ trackerRule = `\n- Ticket state is managed by foreman — you do NOT need to manually edit docs/ticket-progress.md.` +
107
+ `\n- Use \`foreman tickets discover --summary "..." --rationale "..."\` to log newly discovered work.` +
108
+ `\n- Use \`foreman tickets update <id> --next-action "..."\` to record mid-turn notes.` +
109
+ `\n- Always include ticket="<id>" in your STEP_STATUS marker so foreman can update the tracker.`;
110
+ }
111
+ else if (trackerPath) {
112
+ trackerRule = `\n- After completing each ticket or step, update its status in the ticket progress tracker at \`${trackerPath}\`, following the Standard Update Workflow documented in that file.`;
113
+ }
114
+ else {
115
+ trackerRule = "";
116
+ }
117
+ return `You are being run by an automated foreman. We will work through your next ${n} tickets or implementation steps, one per turn.
118
+
119
+ Rules:
120
+ - Do exactly ONE ticket or step this turn, then stop.
121
+ - ${MARKER_SPEC}
122
+ - If a tool action is denied by foreman policy, do not retry it; report it via the blocked marker.${trackerRule}
123
+
124
+ This is step 1 of ${n}. Implement the next ticket or step now.`;
125
+ }
126
+ /** Instruction sent on turns 2–N. */
127
+ export function buildNextStepInstruction(i, n) {
128
+ return `Implement the next ticket or step now (exactly one) — this is step ${i} of ${n}. Then end with the STEP_STATUS marker line.`;
129
+ }
130
+ /** QA review turn: ask the builder to triple-check the ticket or step it just completed. */
131
+ export function buildQaInstruction() {
132
+ return `Now QA the ticket or step you just completed. Triple-check your work. Verify:
133
+ - Accuracy — does the implementation actually do what the ticket describes?
134
+ - Test existence — are there tests covering the new behavior? If tests are expected and missing, that is a QA failure.
135
+ - Test execution — run the test suite (or the relevant subset). Do all tests pass?
136
+ - Ticket satisfaction — are the ticket's acceptance criteria fully met?
137
+ - Confidence — would you bet money this works as described in production?
138
+
139
+ Triple-check. Do not rubber-stamp your own work. Be skeptical.
140
+
141
+ If everything is solid, end with STEP_STATUS: qa_pass.
142
+ If anything is off, end with STEP_STATUS: qa_fail and list every concrete issue in the issues="..." field. Do NOT fix issues on this turn — just report them. Foreman will instruct you to fix them next.
143
+
144
+ ${QA_MARKER_SPEC}`;
145
+ }
146
+ /** Follow-up turn after qa_fail: have the builder implement the listed fixes. */
147
+ export function buildQaFixInstruction(issues) {
148
+ return `Your QA found these issues:
149
+
150
+ ${issues}
151
+
152
+ Fix every one of them now. Then end with STEP_STATUS: done so foreman can re-run QA on the fixes. Triple-check that your fixes actually resolve the issues before emitting done.`;
153
+ }
154
+ /** Pre-flight planning turn: ask the builder to list its next N steps without implementing anything. */
155
+ export function buildPlanningTurn(n, ticketsContent) {
156
+ const header = ticketsContent
157
+ ? `Here is the project's ticket list:\n\n${ticketsContent}\n\n`
158
+ : "";
159
+ return `${header}Before we begin, list the next ${n} ticket(s) or step(s) you plan to implement, in order. Be specific — reference ticket IDs or titles where applicable. Do not implement anything yet; output the numbered list only. Do not emit a STEP_STATUS marker on this turn.`;
160
+ }
161
+ /** Builds the permission callback: classify, log, allow or escalate. */
162
+ export function createPermissionHandler(policy, log) {
163
+ return async (req) => {
164
+ const verdict = policy.classify(req);
165
+ log.write("permission", {
166
+ tool: req.toolName,
167
+ decision: verdict.decision,
168
+ reason: verdict.reason,
169
+ });
170
+ if (verdict.decision === "allow")
171
+ return { behavior: "allow" };
172
+ log.write("escalation", {
173
+ tool: req.toolName,
174
+ reason: verdict.reason,
175
+ input: req.input,
176
+ });
177
+ return {
178
+ behavior: "deny",
179
+ message: `Foreman policy: this action needs human approval (${verdict.reason}) ` +
180
+ `and was NOT performed. Do not retry it. If this step depends on it, ` +
181
+ `end your turn with: STEP_STATUS: blocked | reason="needs human approval: ${verdict.reason}".`,
182
+ };
183
+ };
184
+ }
185
+ /** Drives one builder through a batch of N steps via the STEP_STATUS protocol. */
186
+ export class Foreman {
187
+ builder;
188
+ log;
189
+ notificationsEnabled;
190
+ qaEnabled;
191
+ qaMaxCycles;
192
+ projectDir;
193
+ ticketsEnabled;
194
+ constructor(builder, log, notificationsEnabled = false, qaEnabled = true, qaMaxCycles = 3, projectDir) {
195
+ this.builder = builder;
196
+ this.log = log;
197
+ this.notificationsEnabled = notificationsEnabled;
198
+ this.qaEnabled = qaEnabled;
199
+ this.qaMaxCycles = qaMaxCycles;
200
+ this.projectDir = projectDir;
201
+ this.ticketsEnabled = !!(projectDir && isTicketsInitialized(projectDir));
202
+ }
203
+ /**
204
+ * Send one instruction and resolve any needs_input exchanges before returning.
205
+ * The returned result and status always reflect the final (non-needs_input) turn.
206
+ */
207
+ async doTurn(instruction) {
208
+ let result = await this.builder.sendTurn(instruction);
209
+ let status = parseStepStatus(result.text);
210
+ while (status.kind === "needs_input") {
211
+ const question = status.question ?? "The builder has a question";
212
+ const choices = status.choices?.length
213
+ ? status.choices
214
+ : ["Continue", "Cancel"];
215
+ this.log.write("needs_input", { question, choices });
216
+ if (this.notificationsEnabled) {
217
+ fireNotification("Foreman needs your input", question);
218
+ }
219
+ console.log();
220
+ const answer = await select({
221
+ message: question,
222
+ options: choices.map((c) => ({ value: c, label: c })),
223
+ });
224
+ console.log();
225
+ if (isCancel(answer)) {
226
+ result = { text: "", isError: false, numTurns: 0, costUsd: 0 };
227
+ status = { kind: "blocked", reason: "user cancelled at input prompt" };
228
+ break;
229
+ }
230
+ result = await this.builder.sendTurn(String(answer));
231
+ status = parseStepStatus(result.text);
232
+ }
233
+ return { result, status };
234
+ }
235
+ /** Send a planning turn and return the builder's response text. Does not count toward steps. */
236
+ async runPreflight(n, ticketsContent) {
237
+ const instruction = buildPlanningTurn(n, ticketsContent);
238
+ const result = await this.builder.sendTurn(instruction);
239
+ this.log.write("preflight", {
240
+ ticketsProvided: ticketsContent !== undefined,
241
+ costUsd: result.costUsd,
242
+ });
243
+ return result.text;
244
+ }
245
+ /** Send user feedback on the plan; builder responds with a revised list. Does not count toward steps. */
246
+ async sendPreflightFeedback(feedback) {
247
+ const result = await this.builder.sendTurn(feedback);
248
+ this.log.write("preflight", { feedback: true, costUsd: result.costUsd });
249
+ }
250
+ /** Send one custom instruction through the same needs_input loop as a batch turn. */
251
+ async runInstruction(instruction) {
252
+ return this.doTurn(instruction);
253
+ }
254
+ /**
255
+ * Run a QA review pass on the ticket the builder just completed.
256
+ * Loops on qa_fail → fix → re-QA until qa_pass or the cycle cap is reached.
257
+ * QA turns are free — they do not advance the step counter.
258
+ */
259
+ async runQa(stepIndex) {
260
+ for (let cycle = 1; cycle <= this.qaMaxCycles; cycle++) {
261
+ const qa = await this.doTurn(buildQaInstruction());
262
+ this.log.write("qa", {
263
+ stepIndex,
264
+ cycle,
265
+ statusKind: qa.status.kind,
266
+ issues: qa.status.issues,
267
+ costUsd: qa.result.costUsd,
268
+ isError: qa.result.isError,
269
+ });
270
+ if (qa.result.isError) {
271
+ return { outcome: "blocked", detail: `QA turn errored: ${qa.result.text.slice(0, 200)}` };
272
+ }
273
+ if (qa.status.kind === "qa_pass") {
274
+ return { outcome: "passed", summary: qa.status.summary };
275
+ }
276
+ if (qa.status.kind === "blocked") {
277
+ return { outcome: "blocked", detail: qa.status.reason ?? "QA reported blocked" };
278
+ }
279
+ if (qa.status.kind !== "qa_fail") {
280
+ return {
281
+ outcome: "needs-human",
282
+ detail: qa.status.error ?? `QA turn did not emit a valid marker (got ${qa.status.kind})`,
283
+ };
284
+ }
285
+ const fix = await this.doTurn(buildQaFixInstruction(qa.status.issues ?? "(no issues listed)"));
286
+ this.log.write("qa-fix", {
287
+ stepIndex,
288
+ cycle,
289
+ statusKind: fix.status.kind,
290
+ costUsd: fix.result.costUsd,
291
+ isError: fix.result.isError,
292
+ });
293
+ if (fix.result.isError) {
294
+ return { outcome: "blocked", detail: `QA fix turn errored: ${fix.result.text.slice(0, 200)}` };
295
+ }
296
+ if (fix.status.kind === "blocked") {
297
+ return { outcome: "blocked", detail: fix.status.reason ?? "QA fix reported blocked" };
298
+ }
299
+ if (fix.status.kind !== "done") {
300
+ return {
301
+ outcome: "needs-human",
302
+ detail: fix.status.error ?? `QA fix turn did not emit done (got ${fix.status.kind})`,
303
+ };
304
+ }
305
+ // loop back into QA
306
+ }
307
+ return {
308
+ outcome: "needs-human",
309
+ detail: `QA could not converge after ${this.qaMaxCycles} cycles`,
310
+ };
311
+ }
312
+ async runBatch(n, trackerPath) {
313
+ this.log.write("batch-start", { requested: n, agent: this.builder.agent });
314
+ let completed = 0;
315
+ let outcome = "all-done";
316
+ let detail;
317
+ for (let i = 1; i <= n; i++) {
318
+ // Determine which ticket we're about to work on (for in_progress marking)
319
+ let pendingTicketId;
320
+ if (this.ticketsEnabled && this.projectDir) {
321
+ try {
322
+ const queue = cmdQueue(this.projectDir);
323
+ const next = queue.find((r) => r.status === "next");
324
+ if (next) {
325
+ pendingTicketId = next.ticket;
326
+ cmdUpdate(this.projectDir, next.ticket, {
327
+ status: "in_progress",
328
+ actor: "foreman",
329
+ summary: `Starting step ${i} of ${n}`,
330
+ });
331
+ }
332
+ }
333
+ catch (err) {
334
+ outcome = "needs-human";
335
+ detail = `failed to update ticket tracker before step ${i}: ${err instanceof Error ? err.message : String(err)}`;
336
+ break;
337
+ }
338
+ }
339
+ const instruction = i === 1
340
+ ? buildPrimer(n, trackerPath, this.ticketsEnabled)
341
+ : buildNextStepInstruction(i, n);
342
+ const { result, status } = await this.doTurn(instruction);
343
+ this.log.write("step", {
344
+ index: i,
345
+ statusKind: status.kind,
346
+ summary: status.summary,
347
+ next: status.next,
348
+ reason: status.reason,
349
+ ticket: status.ticket,
350
+ costUsd: result.costUsd,
351
+ isError: result.isError,
352
+ });
353
+ if (result.isError) {
354
+ outcome = "blocked";
355
+ detail = `builder turn errored: ${result.text.slice(0, 200)}`;
356
+ break;
357
+ }
358
+ if (status.kind === "done" || status.kind === "plan_complete") {
359
+ let qaSummary;
360
+ if (this.qaEnabled) {
361
+ const qa = await this.runQa(i);
362
+ if (qa.outcome === "blocked") {
363
+ outcome = "blocked";
364
+ detail = qa.detail;
365
+ break;
366
+ }
367
+ if (qa.outcome === "needs-human") {
368
+ outcome = "needs-human";
369
+ detail = qa.detail;
370
+ break;
371
+ }
372
+ qaSummary = qa.summary;
373
+ }
374
+ // Update ticket state only after QA has passed, so generated tracker
375
+ // state does not claim done before verification has completed.
376
+ if (this.ticketsEnabled && this.projectDir) {
377
+ const ticketId = status.ticket ?? pendingTicketId;
378
+ if (ticketId) {
379
+ try {
380
+ cmdComplete(this.projectDir, ticketId, {
381
+ actor: "foreman",
382
+ summary: status.summary ?? `Step ${i} complete`,
383
+ validationResult: this.qaEnabled ? "passed" : "not_applicable",
384
+ validationNotes: this.qaEnabled
385
+ ? "Foreman QA emitted qa_pass"
386
+ : "Foreman QA disabled for this run",
387
+ evidence: this.qaEnabled
388
+ ? (qaSummary ?? "Foreman QA emitted qa_pass")
389
+ : undefined,
390
+ });
391
+ }
392
+ catch (err) {
393
+ outcome = "needs-human";
394
+ detail = `failed to complete ticket ${ticketId}: ${err instanceof Error ? err.message : String(err)}`;
395
+ break;
396
+ }
397
+ }
398
+ }
399
+ completed++;
400
+ if (status.kind === "plan_complete") {
401
+ outcome = "plan-complete";
402
+ detail = status.summary;
403
+ break;
404
+ }
405
+ continue;
406
+ }
407
+ if (status.kind === "blocked") {
408
+ // Mark ticket blocked in the tracker
409
+ if (this.ticketsEnabled && this.projectDir) {
410
+ const ticketId = status.ticket ?? pendingTicketId;
411
+ if (ticketId) {
412
+ try {
413
+ cmdBlock(this.projectDir, ticketId, {
414
+ summary: status.reason ?? "builder reported blocked",
415
+ actor: "foreman",
416
+ });
417
+ }
418
+ catch (err) {
419
+ detail =
420
+ `${status.reason ?? "builder reported blocked"}; failed to update ticket tracker: ` +
421
+ `${err instanceof Error ? err.message : String(err)}`;
422
+ }
423
+ }
424
+ }
425
+ outcome = "blocked";
426
+ detail = detail ?? status.reason ?? "builder reported blocked";
427
+ break;
428
+ }
429
+ // No marker: treat as a blocker so we never loop blindly.
430
+ outcome = "needs-human";
431
+ detail = status.error
432
+ ? status.error
433
+ : looksLikeQuestion(result.text)
434
+ ? `builder ended with a question: ${lastLine(result.text)}`
435
+ : "builder did not emit a STEP_STATUS marker";
436
+ break;
437
+ }
438
+ this.log.write("batch-end", { completed, requested: n, outcome, detail, sessionId: this.builder.sessionId() });
439
+ return { completed, requested: n, outcome, detail };
440
+ }
441
+ }
442
+ function lastLine(text) {
443
+ const lines = text.trim().split("\n");
444
+ return (lines[lines.length - 1] ?? "").slice(0, 200);
445
+ }