@flumecode/runner 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +240 -52
- package/package.json +1 -1
- package/skills-plugin/skills/create-release/SKILL.md +24 -2
- package/skills-plugin/skills/implement-plan/SKILL.md +59 -29
- package/skills-plugin/skills/request-to-plan/SKILL.md +4 -3
- package/skills-plugin/skills/revise-implementation/SKILL.md +4 -2
package/dist/cli.js
CHANGED
|
@@ -225,10 +225,16 @@ var SERVER_NAME2 = "flume_plan";
|
|
|
225
225
|
var SUBMIT_PLAN = "submit_plan";
|
|
226
226
|
var PLAN_TOOL_NAME = `mcp__${SERVER_NAME2}__${SUBMIT_PLAN}`;
|
|
227
227
|
var PLAN_MARKER = "<!-- flumecode:end-of-plan -->";
|
|
228
|
+
var pseudoCodeEntrySchema = z2.object({
|
|
229
|
+
file: z2.string().min(1),
|
|
230
|
+
pseudoCode: z2.string().min(1)
|
|
231
|
+
});
|
|
228
232
|
var stepSchema = z2.object({
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
233
|
+
title: z2.string().min(1).describe("A concise imperative title for this step."),
|
|
234
|
+
description: z2.string().min(1).describe("What changes and why \u2014 the rationale for this step."),
|
|
235
|
+
pseudoCode: z2.array(pseudoCodeEntrySchema).optional().describe(
|
|
236
|
+
"Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
|
|
237
|
+
)
|
|
232
238
|
});
|
|
233
239
|
var planInputSchema = {
|
|
234
240
|
title: z2.string().min(1).max(120).describe(
|
|
@@ -260,12 +266,20 @@ function renderPlan(plan) {
|
|
|
260
266
|
}
|
|
261
267
|
}
|
|
262
268
|
lines.push("");
|
|
263
|
-
lines.push("
|
|
269
|
+
lines.push("## Steps");
|
|
264
270
|
for (const [i, step] of plan.steps.entries()) {
|
|
265
|
-
lines.push(
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
271
|
+
lines.push("");
|
|
272
|
+
lines.push(`### ${i + 1}. ${step.title}`);
|
|
273
|
+
lines.push("");
|
|
274
|
+
lines.push(step.description);
|
|
275
|
+
if (step.pseudoCode && step.pseudoCode.length > 0) {
|
|
276
|
+
for (const entry of step.pseudoCode) {
|
|
277
|
+
lines.push("");
|
|
278
|
+
lines.push(`\`${entry.file}\``);
|
|
279
|
+
lines.push("");
|
|
280
|
+
lines.push("```");
|
|
281
|
+
lines.push(entry.pseudoCode);
|
|
282
|
+
lines.push("```");
|
|
269
283
|
}
|
|
270
284
|
}
|
|
271
285
|
}
|
|
@@ -328,12 +342,97 @@ function createPlanTooling() {
|
|
|
328
342
|
return { mcpServer, getPlans: () => renderedPlans };
|
|
329
343
|
}
|
|
330
344
|
|
|
345
|
+
// src/report.ts
|
|
346
|
+
import { createSdkMcpServer as createSdkMcpServer3, tool as tool3 } from "@anthropic-ai/claude-agent-sdk";
|
|
347
|
+
import { z as z3 } from "zod";
|
|
348
|
+
var SERVER_NAME3 = "flume_report";
|
|
349
|
+
var SUBMIT_REPORT = "submit_report";
|
|
350
|
+
var REPORT_TOOL_NAME = `mcp__${SERVER_NAME3}__${SUBMIT_REPORT}`;
|
|
351
|
+
var STATUS_ICON = {
|
|
352
|
+
met: "\u2705",
|
|
353
|
+
not_met: "\u274C",
|
|
354
|
+
unclear: "\u26A0\uFE0F"
|
|
355
|
+
};
|
|
356
|
+
var evidenceSchema = z3.object({
|
|
357
|
+
file: z3.string().min(1).describe("Repo-relative path the hunk comes from."),
|
|
358
|
+
hunk: z3.string().min(1).describe(
|
|
359
|
+
"A unified-diff hunk body proving the criterion \u2014 the lines that matter, not the whole file. Rendered verbatim as a ```diff block."
|
|
360
|
+
),
|
|
361
|
+
note: z3.string().optional().describe("Optional one-line explanation of why this hunk satisfies the criterion.")
|
|
362
|
+
});
|
|
363
|
+
var acVerdictSchema = z3.object({
|
|
364
|
+
criterion: z3.string().min(1).describe("The acceptance-criterion text, verbatim from the plan."),
|
|
365
|
+
status: z3.enum(["met", "not_met", "unclear"]).describe("Verdict for this criterion, verified against the actual diff."),
|
|
366
|
+
rationale: z3.string().min(1).describe("One or two sentences on why the verdict holds."),
|
|
367
|
+
evidence: z3.array(evidenceSchema).describe(
|
|
368
|
+
"Diff hunks proving the verdict. Include the relevant hunk(s) for a met criterion; may be empty for not_met / unclear."
|
|
369
|
+
)
|
|
370
|
+
});
|
|
371
|
+
var reportInputSchema = {
|
|
372
|
+
summary: z3.string().min(1).describe("One or two sentences on what was implemented."),
|
|
373
|
+
prose: z3.string().min(1).describe(
|
|
374
|
+
"Markdown for the remaining report sections \u2014 What changed, Files changed, Build / tests, and Caveats / follow-ups. Use ## headings. Do NOT include the acceptance-criteria section here (that goes in acceptanceCriteria) and do NOT include the PR link (the runner appends it)."
|
|
375
|
+
),
|
|
376
|
+
acceptanceCriteria: z3.array(acVerdictSchema).min(1).describe(
|
|
377
|
+
"One entry per acceptance criterion from the plan, in plan order, each with a verdict and the diff evidence behind it."
|
|
378
|
+
)
|
|
379
|
+
};
|
|
380
|
+
var reportSchema = z3.object(reportInputSchema);
|
|
381
|
+
function renderReport(report) {
|
|
382
|
+
const lines = [];
|
|
383
|
+
lines.push(report.summary.trim());
|
|
384
|
+
lines.push("");
|
|
385
|
+
lines.push(report.prose.trim());
|
|
386
|
+
lines.push("");
|
|
387
|
+
lines.push("## Acceptance criteria");
|
|
388
|
+
for (const ac of report.acceptanceCriteria) {
|
|
389
|
+
lines.push("");
|
|
390
|
+
lines.push(`### ${STATUS_ICON[ac.status]} ${ac.criterion}`);
|
|
391
|
+
lines.push("");
|
|
392
|
+
lines.push(ac.rationale.trim());
|
|
393
|
+
for (const ev of ac.evidence) {
|
|
394
|
+
lines.push("");
|
|
395
|
+
lines.push(ev.note ? `\`${ev.file}\` \u2014 ${ev.note}` : `\`${ev.file}\``);
|
|
396
|
+
lines.push("");
|
|
397
|
+
lines.push("```diff");
|
|
398
|
+
lines.push(ev.hunk.replace(/\n+$/, ""));
|
|
399
|
+
lines.push("```");
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
return lines.join("\n");
|
|
403
|
+
}
|
|
404
|
+
function createReportTooling() {
|
|
405
|
+
let submittedReport = null;
|
|
406
|
+
const submitReport = tool3(
|
|
407
|
+
SUBMIT_REPORT,
|
|
408
|
+
"Submit the final implementation report as structured data. Call this exactly once, at the end of the run. `acceptanceCriteria` must contain one entry per plan criterion, each with a met / not_met / unclear verdict and the diff hunk(s) that prove it. `summary` + `prose` are markdown for the rest of the report. Do NOT include a PR link \u2014 the runner appends it.",
|
|
409
|
+
reportInputSchema,
|
|
410
|
+
async (args) => {
|
|
411
|
+
submittedReport = reportSchema.parse(args);
|
|
412
|
+
return {
|
|
413
|
+
content: [
|
|
414
|
+
{
|
|
415
|
+
type: "text",
|
|
416
|
+
text: "Report submitted. The runner will render and post it. End your turn now."
|
|
417
|
+
}
|
|
418
|
+
]
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
);
|
|
422
|
+
const mcpServer = createSdkMcpServer3({
|
|
423
|
+
name: SERVER_NAME3,
|
|
424
|
+
tools: [submitReport]
|
|
425
|
+
});
|
|
426
|
+
return { mcpServer, getReport: () => submittedReport };
|
|
427
|
+
}
|
|
428
|
+
|
|
331
429
|
// src/executor.ts
|
|
332
430
|
var FLUME_PLUGIN_DIR = fileURLToPath2(new URL("../skills-plugin", import.meta.url));
|
|
333
431
|
async function runClaudeCode(opts) {
|
|
334
432
|
let finalText = "";
|
|
335
433
|
const { mcpServer, collected } = createWidgetTooling();
|
|
336
434
|
const { mcpServer: planServer, getPlans } = createPlanTooling();
|
|
435
|
+
const { mcpServer: reportServer, getReport } = createReportTooling();
|
|
337
436
|
for await (const message of query({
|
|
338
437
|
prompt: opts.prompt,
|
|
339
438
|
options: {
|
|
@@ -354,8 +453,8 @@ async function runClaudeCode(opts) {
|
|
|
354
453
|
// does NOT restrict anything else). Task lets the implement-plan
|
|
355
454
|
// orchestrator spawn its subagents; without pre-approval the spawn could
|
|
356
455
|
// stall waiting for an approval no one can give.
|
|
357
|
-
mcpServers: { flume_widgets: mcpServer, flume_plan: planServer },
|
|
358
|
-
allowedTools: [...WIDGET_TOOL_NAMES, PLAN_TOOL_NAME, "Task"]
|
|
456
|
+
mcpServers: { flume_widgets: mcpServer, flume_plan: planServer, flume_report: reportServer },
|
|
457
|
+
allowedTools: [...WIDGET_TOOL_NAMES, PLAN_TOOL_NAME, REPORT_TOOL_NAME, "Task"]
|
|
359
458
|
}
|
|
360
459
|
})) {
|
|
361
460
|
if (message.type === "assistant") {
|
|
@@ -375,7 +474,7 @@ async function runClaudeCode(opts) {
|
|
|
375
474
|
if (opts.abortController?.signal.aborted) {
|
|
376
475
|
throw new Error("Run canceled by user");
|
|
377
476
|
}
|
|
378
|
-
return { text: finalText, widgets: collected, plans: getPlans() };
|
|
477
|
+
return { text: finalText, widgets: collected, plans: getPlans(), report: getReport() };
|
|
379
478
|
}
|
|
380
479
|
|
|
381
480
|
// src/health.ts
|
|
@@ -572,7 +671,7 @@ function buildRepairPrompt(ctx, hookLog) {
|
|
|
572
671
|
];
|
|
573
672
|
return lines.join("\n");
|
|
574
673
|
}
|
|
575
|
-
function buildReleasePrompt(ctx) {
|
|
674
|
+
function buildReleasePrompt(ctx, baseChecks) {
|
|
576
675
|
const task = `Use the \`flumecode:create-release\` skill to handle this turn. You are driving a release: first analyse commits since the last tag, propose version bumps, and ask the user to confirm via widgets (Phase 1); once the user's widget answers appear in the thread, apply the bumps to package.json files and update CHANGELOG.md (Phase 2). Do NOT commit or push \u2014 the runner handles that and opens the bump PR.`;
|
|
577
676
|
const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to this release. If there is no wiki, work from the code directly.`;
|
|
578
677
|
const widgets = `When you need the user to choose, ask it as a widget rather than writing the options as prose: call \`single_select\` for a one-of-N choice (radio buttons) or \`multi_select\` for a "select all that apply" choice (checkboxes). Don't add your own "Other" option \u2014 the UI always provides one. After calling a widget tool, end your turn \u2014 the user's answer comes back as their next message and starts a fresh run.`;
|
|
@@ -592,6 +691,23 @@ function buildReleasePrompt(ctx) {
|
|
|
592
691
|
if (ctx.request?.body) {
|
|
593
692
|
lines.push("", ctx.request.body);
|
|
594
693
|
}
|
|
694
|
+
if (baseChecks && !baseChecks.ok) {
|
|
695
|
+
lines.push(
|
|
696
|
+
"",
|
|
697
|
+
"# Pre-release check status",
|
|
698
|
+
"",
|
|
699
|
+
"\u26A0\uFE0F The repository's pre-commit checks (lint / typecheck / tests) are currently FAILING on the base branch, independently of any version bump. A release must not ship a broken base:",
|
|
700
|
+
"",
|
|
701
|
+
"- **Phase 1 (propose):** tell the user, in your reply, that the base currently fails these checks and that the release will fix them as part of the bump.",
|
|
702
|
+
"- **Phase 2 (apply):** fix the failing code at its root so the checks pass, THEN apply the version bumps and CHANGELOG. Do NOT delete/skip tests or weaken assertions. The fixes ship in the same bump PR. Still do NOT commit or push \u2014 the runner does.",
|
|
703
|
+
"",
|
|
704
|
+
"Failing check output:",
|
|
705
|
+
"",
|
|
706
|
+
"```",
|
|
707
|
+
baseChecks.log,
|
|
708
|
+
"```"
|
|
709
|
+
);
|
|
710
|
+
}
|
|
595
711
|
appendThread(lines, ctx);
|
|
596
712
|
lines.push(
|
|
597
713
|
"",
|
|
@@ -626,6 +742,12 @@ var MAX_BUFFER = 1 << 24;
|
|
|
626
742
|
async function git(args) {
|
|
627
743
|
return exec("git", args, { maxBuffer: MAX_BUFFER });
|
|
628
744
|
}
|
|
745
|
+
var RUNNER_GIT_EMAIL = "runner@flumecode.local";
|
|
746
|
+
var RUNNER_GIT_NAME = "FlumeCode Runner";
|
|
747
|
+
async function ensureGitIdentity(dir) {
|
|
748
|
+
await git(["-C", dir, "config", "user.email", RUNNER_GIT_EMAIL]);
|
|
749
|
+
await git(["-C", dir, "config", "user.name", RUNNER_GIT_NAME]);
|
|
750
|
+
}
|
|
629
751
|
function cloneUrl(ctx) {
|
|
630
752
|
const { owner, name, cloneToken } = ctx.repo;
|
|
631
753
|
return `https://x-access-token:${cloneToken}@github.com/${owner}/${name}.git`;
|
|
@@ -689,15 +811,20 @@ async function resetWorkspace(dir) {
|
|
|
689
811
|
async function prepareAtSha(ctx, dir, reused) {
|
|
690
812
|
if (!reused) {
|
|
691
813
|
await cloneAtSha(ctx, dir);
|
|
814
|
+
await ensureGitIdentity(dir);
|
|
692
815
|
return;
|
|
693
816
|
}
|
|
694
817
|
await git(["-C", dir, "remote", "set-url", "origin", cloneUrl(ctx)]);
|
|
818
|
+
await ensureGitIdentity(dir);
|
|
695
819
|
}
|
|
696
820
|
async function prepareResumingBranch(ctx, dir, reused) {
|
|
697
821
|
if (!reused) {
|
|
698
|
-
|
|
822
|
+
const result = await cloneResumingBranch(ctx, dir);
|
|
823
|
+
await ensureGitIdentity(dir);
|
|
824
|
+
return result;
|
|
699
825
|
}
|
|
700
826
|
await git(["-C", dir, "remote", "set-url", "origin", cloneUrl(ctx)]);
|
|
827
|
+
await ensureGitIdentity(dir);
|
|
701
828
|
return { resumed: true };
|
|
702
829
|
}
|
|
703
830
|
async function sweepWorkspaces() {
|
|
@@ -751,21 +878,25 @@ function commitFailureLog(err) {
|
|
|
751
878
|
const parts = [e.stdout, e.stderr].map((s) => typeof s === "string" ? s.trim() : "").filter((s) => s.length > 0);
|
|
752
879
|
return parts.length > 0 ? parts.join("\n") : e.message ?? String(err);
|
|
753
880
|
}
|
|
881
|
+
function isUnsupportedGitSubcommand(err) {
|
|
882
|
+
const e = err;
|
|
883
|
+
const text = `${typeof e.stderr === "string" ? e.stderr : ""}
|
|
884
|
+
${e.message ?? ""}`;
|
|
885
|
+
return /is not a git command|unknown subcommand|usage: git hook/i.test(text);
|
|
886
|
+
}
|
|
887
|
+
async function runRepoChecks(dir) {
|
|
888
|
+
try {
|
|
889
|
+
await git(["-C", dir, "hook", "run", "pre-commit"]);
|
|
890
|
+
return { ok: true, log: "", skipped: false };
|
|
891
|
+
} catch (err) {
|
|
892
|
+
if (isUnsupportedGitSubcommand(err)) return { ok: true, log: "", skipped: true };
|
|
893
|
+
return { ok: false, log: commitFailureLog(err), skipped: false };
|
|
894
|
+
}
|
|
895
|
+
}
|
|
754
896
|
async function commitChanges(ctx, dir) {
|
|
755
897
|
if (!await hasChanges(dir)) return false;
|
|
756
898
|
try {
|
|
757
|
-
await git([
|
|
758
|
-
"-C",
|
|
759
|
-
dir,
|
|
760
|
-
"-c",
|
|
761
|
-
"user.email=runner@flumecode.local",
|
|
762
|
-
"-c",
|
|
763
|
-
"user.name=FlumeCode Runner",
|
|
764
|
-
"commit",
|
|
765
|
-
"--quiet",
|
|
766
|
-
"-m",
|
|
767
|
-
`FlumeCode: ${jobTitle(ctx)}`
|
|
768
|
-
]);
|
|
899
|
+
await git(["-C", dir, "commit", "--quiet", "-m", `FlumeCode: ${jobTitle(ctx)}`]);
|
|
769
900
|
} catch (err) {
|
|
770
901
|
throw new PreCommitError(commitFailureLog(err));
|
|
771
902
|
}
|
|
@@ -805,17 +936,7 @@ async function mergeInMergeBranch(ctx, dir) {
|
|
|
805
936
|
if (!mergeBranch) return { conflicted: false };
|
|
806
937
|
await git(["-C", dir, "fetch", "--quiet", "origin", mergeBranch]);
|
|
807
938
|
try {
|
|
808
|
-
await git([
|
|
809
|
-
"-C",
|
|
810
|
-
dir,
|
|
811
|
-
"-c",
|
|
812
|
-
"user.email=runner@flumecode.local",
|
|
813
|
-
"-c",
|
|
814
|
-
"user.name=FlumeCode Runner",
|
|
815
|
-
"merge",
|
|
816
|
-
"--no-edit",
|
|
817
|
-
"FETCH_HEAD"
|
|
818
|
-
]);
|
|
939
|
+
await git(["-C", dir, "merge", "--no-edit", "FETCH_HEAD"]);
|
|
819
940
|
return { conflicted: false };
|
|
820
941
|
} catch {
|
|
821
942
|
return { conflicted: true };
|
|
@@ -876,6 +997,7 @@ var CANCEL_POLL_MS = 2500;
|
|
|
876
997
|
var ORCHESTRATOR_MODEL = "sonnet";
|
|
877
998
|
var ORCHESTRATOR_MAX_TURNS = 80;
|
|
878
999
|
var MAX_COMMIT_REPAIRS = 2;
|
|
1000
|
+
var MAX_IMPLEMENT_RETRIES = 1;
|
|
879
1001
|
var INIT_MAX_TURNS = 200;
|
|
880
1002
|
var DOCUMENT_MAX_TURNS = 120;
|
|
881
1003
|
var HEARTBEAT_MS = 5 * 6e4;
|
|
@@ -1068,19 +1190,36 @@ async function processChatJob(ctx, dir, abort) {
|
|
|
1068
1190
|
reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, documented, autoMerged });
|
|
1069
1191
|
return { text: reply, widgets: [] };
|
|
1070
1192
|
}
|
|
1193
|
+
function reportClaimsWork(report) {
|
|
1194
|
+
return !!report && report.acceptanceCriteria.some((ac) => ac.status === "met" && ac.evidence.length > 0);
|
|
1195
|
+
}
|
|
1071
1196
|
async function processImplementJob(ctx, dir, resumed, abort) {
|
|
1072
1197
|
console.log(`
|
|
1073
1198
|
\u25B6 Implement ${ctx.jobId} \u2014 ${ctx.repo.fullName}: "${jobTitle(ctx)}"`);
|
|
1074
1199
|
const installResult = await installDependencies(dir);
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1200
|
+
let report;
|
|
1201
|
+
let reply;
|
|
1202
|
+
for (let attempt = 0; ; attempt++) {
|
|
1203
|
+
const result = await runClaudeCode({
|
|
1204
|
+
cwd: dir,
|
|
1205
|
+
prompt: buildPrompt(ctx),
|
|
1206
|
+
permissionMode: ctx.permissionMode,
|
|
1207
|
+
model: ORCHESTRATOR_MODEL,
|
|
1208
|
+
maxTurns: ORCHESTRATOR_MAX_TURNS,
|
|
1209
|
+
abortController: abort
|
|
1210
|
+
});
|
|
1211
|
+
report = result.report ?? void 0;
|
|
1212
|
+
reply = (report ? renderReport(report) : result.text.trim()) || "(the agent produced no report)";
|
|
1213
|
+
if (abort.signal.aborted || !reportClaimsWork(report) || await hasChanges(dir)) break;
|
|
1214
|
+
if (attempt >= MAX_IMPLEMENT_RETRIES) {
|
|
1215
|
+
throw new Error(
|
|
1216
|
+
`Implementation reported completed work (acceptance criteria met with diff evidence) but the working tree is clean after ${attempt + 1} attempt(s) \u2014 no changes were persisted, so no pull request could be opened.`
|
|
1217
|
+
);
|
|
1218
|
+
}
|
|
1219
|
+
console.warn(
|
|
1220
|
+
` implement ${ctx.jobId}: report claims changes but the working tree is clean \u2014 re-running implementation (attempt ${attempt + 2})`
|
|
1221
|
+
);
|
|
1222
|
+
}
|
|
1084
1223
|
if (installResult.status === "failed") {
|
|
1085
1224
|
reply += `
|
|
1086
1225
|
|
|
@@ -1104,7 +1243,12 @@ async function processImplementJob(ctx, dir, resumed, abort) {
|
|
|
1104
1243
|
}
|
|
1105
1244
|
const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, abort, { rebase: !resumed });
|
|
1106
1245
|
reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, documented, autoMerged });
|
|
1107
|
-
return {
|
|
1246
|
+
return {
|
|
1247
|
+
text: reply,
|
|
1248
|
+
widgets: [],
|
|
1249
|
+
...report ? { report } : {},
|
|
1250
|
+
...outcome.kind === "pr" ? { pr: outcome.pr } : {}
|
|
1251
|
+
};
|
|
1108
1252
|
}
|
|
1109
1253
|
async function processReviseJob(ctx, dir, resumed, abort) {
|
|
1110
1254
|
console.log(`
|
|
@@ -1180,9 +1324,16 @@ async function processReleaseJob(ctx, dir, resumed, abort) {
|
|
|
1180
1324
|
console.log(`
|
|
1181
1325
|
\u25B6 Release ${ctx.jobId} \u2014 ${ctx.repo.fullName}: "${jobTitle(ctx)}"`);
|
|
1182
1326
|
const installResult = await installDependencies(dir);
|
|
1327
|
+
const checks = await runRepoChecks(dir);
|
|
1328
|
+
if (checks.skipped) {
|
|
1329
|
+
console.log(` \u2026release ${ctx.jobId}: pre-release checks skipped (git too old for 'hook run')`);
|
|
1330
|
+
} else {
|
|
1331
|
+
console.log(` \u2026release ${ctx.jobId}: pre-release checks ${checks.ok ? "passed" : "FAILED"}`);
|
|
1332
|
+
}
|
|
1333
|
+
const baseChecks = checks.ok ? void 0 : { ok: false, log: trimHookLog(checks.log) };
|
|
1183
1334
|
const result = await runClaudeCode({
|
|
1184
1335
|
cwd: dir,
|
|
1185
|
-
prompt: buildReleasePrompt(ctx),
|
|
1336
|
+
prompt: buildReleasePrompt(ctx, baseChecks),
|
|
1186
1337
|
permissionMode: ctx.permissionMode,
|
|
1187
1338
|
model: ORCHESTRATOR_MODEL,
|
|
1188
1339
|
maxTurns: ORCHESTRATOR_MAX_TURNS,
|
|
@@ -1268,13 +1419,14 @@ async function pollLoop(config) {
|
|
|
1268
1419
|
};
|
|
1269
1420
|
scheduleCancelPoll();
|
|
1270
1421
|
try {
|
|
1271
|
-
const { text, widgets, pr, plans } = await processJob(ctx, abort);
|
|
1422
|
+
const { text, widgets, pr, plans, report } = await processJob(ctx, abort);
|
|
1272
1423
|
await reportJob(config, ctx.jobId, {
|
|
1273
1424
|
status: "done",
|
|
1274
1425
|
text,
|
|
1275
1426
|
widgets,
|
|
1276
1427
|
pr,
|
|
1277
|
-
...plans?.length ? { plans } : {}
|
|
1428
|
+
...plans?.length ? { plans } : {},
|
|
1429
|
+
...report ? { report } : {}
|
|
1278
1430
|
});
|
|
1279
1431
|
console.log(`\u2713 Job ${ctx.jobId} done`);
|
|
1280
1432
|
} catch (err) {
|
|
@@ -1286,10 +1438,12 @@ async function pollLoop(config) {
|
|
|
1286
1438
|
console.error(` (failed to report the cancellation: ${errorMessage2(reportErr)})`);
|
|
1287
1439
|
}
|
|
1288
1440
|
} else {
|
|
1289
|
-
|
|
1290
|
-
console.error(`\u2717 Job ${ctx.jobId} failed: ${message}`);
|
|
1441
|
+
console.error(`\u2717 Job ${ctx.jobId} failed: ${errorMessage2(err)}`);
|
|
1291
1442
|
try {
|
|
1292
|
-
await reportJob(config, ctx.jobId, {
|
|
1443
|
+
await reportJob(config, ctx.jobId, {
|
|
1444
|
+
status: "error",
|
|
1445
|
+
error: formatJobError(ctx, err)
|
|
1446
|
+
});
|
|
1293
1447
|
} catch (reportErr) {
|
|
1294
1448
|
console.error(` (also failed to report the error: ${errorMessage2(reportErr)})`);
|
|
1295
1449
|
}
|
|
@@ -1308,6 +1462,40 @@ function sleep(ms) {
|
|
|
1308
1462
|
function errorMessage2(err) {
|
|
1309
1463
|
return err instanceof Error ? err.message : String(err);
|
|
1310
1464
|
}
|
|
1465
|
+
var MAX_HOOK_LOG_LINES = 80;
|
|
1466
|
+
var MAX_HOOK_LOG_CHARS = 4e3;
|
|
1467
|
+
function trimHookLog(log) {
|
|
1468
|
+
let trimmed = log.trimEnd();
|
|
1469
|
+
let elided = false;
|
|
1470
|
+
const lines = trimmed.split("\n");
|
|
1471
|
+
if (lines.length > MAX_HOOK_LOG_LINES) {
|
|
1472
|
+
trimmed = lines.slice(-MAX_HOOK_LOG_LINES).join("\n");
|
|
1473
|
+
elided = true;
|
|
1474
|
+
}
|
|
1475
|
+
if (trimmed.length > MAX_HOOK_LOG_CHARS) {
|
|
1476
|
+
trimmed = trimmed.slice(-MAX_HOOK_LOG_CHARS);
|
|
1477
|
+
elided = true;
|
|
1478
|
+
}
|
|
1479
|
+
return elided ? `\u2026(earlier output trimmed)\u2026
|
|
1480
|
+
${trimmed}` : trimmed;
|
|
1481
|
+
}
|
|
1482
|
+
function formatJobError(ctx, err) {
|
|
1483
|
+
if (!(err instanceof PreCommitError)) return errorMessage2(err);
|
|
1484
|
+
const nextStep = ctx.kind === "release" ? `These checks are failing on \`${ctx.repo.mergeBranch}\` independently of the version bump, and the release couldn't fix them after ${MAX_COMMIT_REPAIRS} automatic attempts. Open a request on **${ctx.repo.fullName}** to fix the failing checks above, then start the release again once that fix has merged.` : `The agent couldn't get its change past these checks after ${MAX_COMMIT_REPAIRS} automatic repair attempts. Open a request on **${ctx.repo.fullName}** describing the failing checks above so the agent can fix them at their root, then try again.`;
|
|
1485
|
+
return [
|
|
1486
|
+
"\u274C **Blocked by failing pre-commit checks.**",
|
|
1487
|
+
"",
|
|
1488
|
+
`The repository's pre-commit hook (lint / typecheck / tests) rejected the commit after ${MAX_COMMIT_REPAIRS} automatic repair attempts, so nothing was pushed.`,
|
|
1489
|
+
"",
|
|
1490
|
+
"**What failed:**",
|
|
1491
|
+
"",
|
|
1492
|
+
"```",
|
|
1493
|
+
trimHookLog(err.log),
|
|
1494
|
+
"```",
|
|
1495
|
+
"",
|
|
1496
|
+
`**Next step:** ${nextStep}`
|
|
1497
|
+
].join("\n");
|
|
1498
|
+
}
|
|
1311
1499
|
|
|
1312
1500
|
// src/cli.ts
|
|
1313
1501
|
var DEFAULT_SERVER = process.env.FLUME_SERVER || "http://localhost:3000";
|
package/package.json
CHANGED
|
@@ -171,6 +171,28 @@ version did not change.
|
|
|
171
171
|
`apps/runner/package.json`. Leave `apps/web/package.json` unchanged.
|
|
172
172
|
- **Clear Phase 1 text:** be explicit about what changed since the last tag so the
|
|
173
173
|
user can confidently confirm or override your suggestions.
|
|
174
|
-
- **
|
|
175
|
-
`apps/runner/package.json`, and `CHANGELOG.md`.
|
|
174
|
+
- **Edit only version files — with one exception.** Normally edit only
|
|
175
|
+
`apps/web/package.json`, `apps/runner/package.json`, and `CHANGELOG.md`. The sole
|
|
176
|
+
exception: when the prompt includes a **`# Pre-release check status`** section
|
|
177
|
+
reporting failing checks, you must also fix the failing code (any file needed) so
|
|
178
|
+
the tree is green — see "Pre-release checks" below. Never weaken or skip checks to
|
|
179
|
+
silence them.
|
|
176
180
|
- **Never commit, push, or open a PR** — the runner does that.
|
|
181
|
+
|
|
182
|
+
## Pre-release checks
|
|
183
|
+
|
|
184
|
+
We cannot release code with failing checks. Before this turn, the runner ran the
|
|
185
|
+
repository's own pre-commit hook (lint / typecheck / tests). If the prompt contains
|
|
186
|
+
a **`# Pre-release check status`** section, the base branch is currently broken
|
|
187
|
+
_independently of the version bump_:
|
|
188
|
+
|
|
189
|
+
- **Phase 1:** state plainly in your reply that the base currently fails these
|
|
190
|
+
checks and that the release will fix them as part of the bump, then ask the
|
|
191
|
+
version questions as usual.
|
|
192
|
+
- **Phase 2:** fix the failing code at its root **first** (so the checks pass),
|
|
193
|
+
**then** apply the version bumps and CHANGELOG. The fixes ship in the same bump
|
|
194
|
+
PR. Do not delete or skip tests, weaken assertions, or disable checks. Still do
|
|
195
|
+
not commit or push — the runner commits everything together.
|
|
196
|
+
|
|
197
|
+
If there is no `# Pre-release check status` section, the base is clean (or the check
|
|
198
|
+
was skipped); proceed normally and edit only the version files.
|
|
@@ -5,8 +5,9 @@ description: >-
|
|
|
5
5
|
subagents instead of writing the code yourself. Use in edit-capable runs. You
|
|
6
6
|
act as the orchestrator: delegate implementation, acceptance-criteria review,
|
|
7
7
|
code-quality review, and report-writing to Task subagents — picking the right
|
|
8
|
-
model for each phase
|
|
9
|
-
|
|
8
|
+
model for each phase. The report subagent submits a structured report (with
|
|
9
|
+
per-criterion diff evidence) via the submit_report tool. Makes edits via
|
|
10
|
+
subagents; never commits, pushes, or opens a PR (the runner does that).
|
|
10
11
|
---
|
|
11
12
|
|
|
12
13
|
# implement-plan
|
|
@@ -67,8 +68,19 @@ the next step.
|
|
|
67
68
|
3. **Acceptance-criteria review** — Task, `model: "opus"`, read-only. Give the
|
|
68
69
|
subagent the full AC list and tell it to verify each one against the actual
|
|
69
70
|
changes (run `git --no-pager diff`, read the changed files, run tests/build if
|
|
70
|
-
useful).
|
|
71
|
-
|
|
71
|
+
useful). For **each** AC it must return: the criterion text verbatim, a verdict
|
|
72
|
+
(**met / not met / unclear**), a one-or-two-sentence rationale, and — this is the
|
|
73
|
+
evidence the report needs — the **exact diff hunk(s)** that prove it, each tagged
|
|
74
|
+
with its file path (the minimal lines that matter, copied verbatim from
|
|
75
|
+
`git --no-pager diff`; not the whole file). A _met_ AC should cite at least one
|
|
76
|
+
hunk; _not met_ / _unclear_ may cite none. **Ground every verdict in the actual
|
|
77
|
+
diff:** a criterion may be marked _met_ only if `git --no-pager diff` really
|
|
78
|
+
contains the change that satisfies it, and each cited hunk must be copied verbatim
|
|
79
|
+
from that live output — never reconstructed from the plan or from what the
|
|
80
|
+
implement subagent claimed. If `git --no-pager diff` is empty, the implementation
|
|
81
|
+
produced no changes: no criterion may be _met_, and the review must say so. Tell it
|
|
82
|
+
to return this as a clean, structured list so you can hand it straight to the
|
|
83
|
+
report step.
|
|
72
84
|
|
|
73
85
|
4. **Code-quality review** — Task, `model: "opus"`, read-only. Give the subagent
|
|
74
86
|
the coding guidelines (verbatim) and tell it to review the changes for
|
|
@@ -82,30 +94,45 @@ the next step.
|
|
|
82
94
|
failed. Repeat at most **2** times. If something still fails after that, stop
|
|
83
95
|
looping and record the gap honestly in the report — do not hide it.
|
|
84
96
|
|
|
85
|
-
6. **Report** — Task, `model: "opus"`, read-only. Give the subagent the plan, the
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
97
|
+
6. **Report** — Task, `model: "opus"`, read-only. Give the subagent the plan, the AC
|
|
98
|
+
verdicts (from step 3), and the quality findings, and tell it to run
|
|
99
|
+
`git --no-pager diff` itself as the **single source of truth** for the report.
|
|
100
|
+
Every `evidence` hunk it submits must be copied verbatim from that live diff — it
|
|
101
|
+
must drop or correct any hunk carried over from step 3 that no longer appears in
|
|
102
|
+
the actual diff, and the **Files changed** list must come from
|
|
103
|
+
`git --no-pager diff --stat`, not from what an earlier subagent claimed. **If
|
|
104
|
+
`git --no-pager diff` is empty, the implementation changed nothing:** the report
|
|
105
|
+
must say so plainly — an honest `summary`, no AC marked `met` with evidence — and
|
|
106
|
+
must never describe edits that aren't in the diff. Tell it to submit the
|
|
107
|
+
user-facing report by calling the **`submit_report`** tool — it has that tool
|
|
108
|
+
available. It must call `submit_report` exactly once and must not edit any files.
|
|
109
|
+
|
|
110
|
+
7. **Confirm and end.** Once the report subagent has called `submit_report`, you are
|
|
111
|
+
done — end your turn. The runner reads the submitted report, renders it, posts it
|
|
112
|
+
to the thread, and appends the pull-request link. (Your own final text is only a
|
|
113
|
+
fallback if no report was submitted, so make sure the subagent submits one.)
|
|
114
|
+
|
|
115
|
+
## The report (what `submit_report` takes)
|
|
116
|
+
|
|
117
|
+
The report subagent calls `submit_report` with these fields:
|
|
118
|
+
|
|
119
|
+
- **`summary`** — one or two sentences on what was implemented.
|
|
120
|
+
- **`prose`** — markdown for the remaining sections, using `##` headings:
|
|
121
|
+
**What changed** (the plan steps, each mapped to the concrete changes that satisfy
|
|
122
|
+
it), **Code quality** (the quality-review outcome and anything left as
|
|
123
|
+
nice-to-have), **Files changed** (the list from the diff), **Build / tests** (what
|
|
124
|
+
was run and the result, or why it wasn't run), and **Caveats / follow-ups**
|
|
125
|
+
(anything deferred, unmet, or worth a human's eyes). Do **not** put the
|
|
126
|
+
acceptance-criteria section in `prose`, and do **not** include a PR link — the
|
|
127
|
+
runner adds it.
|
|
128
|
+
- **`acceptanceCriteria`** — one entry per AC from the plan, in plan order, each:
|
|
129
|
+
- `criterion` — the AC text verbatim.
|
|
130
|
+
- `status` — `"met"` / `"not_met"` / `"unclear"`, mirroring the AC review.
|
|
131
|
+
- `rationale` — one or two sentences on why the verdict holds.
|
|
132
|
+
- `evidence` — an array of `{ file, hunk, note? }`, where `hunk` is copied
|
|
133
|
+
verbatim from the live `git --no-pager diff` and proves the verdict (`note`
|
|
134
|
+
optionally explains it). Never include a hunk that isn't in the actual diff. Cite
|
|
135
|
+
the supporting hunk(s) for a met criterion; may be empty for not_met / unclear.
|
|
109
136
|
|
|
110
137
|
## Always
|
|
111
138
|
|
|
@@ -115,4 +142,7 @@ Do **not** include a PR link — the runner adds it.
|
|
|
115
142
|
- Make every Task prompt self-contained — subagents see only what you give them.
|
|
116
143
|
- Reviewers and the report writer never modify files.
|
|
117
144
|
- Never commit, push, or open a PR.
|
|
118
|
-
-
|
|
145
|
+
- The report subagent delivers the report by calling `submit_report` (structured),
|
|
146
|
+
once — not as prose for you to echo. Each acceptance criterion carries the diff
|
|
147
|
+
hunk(s) that prove its verdict, copied verbatim from the live `git --no-pager diff`
|
|
148
|
+
— never fabricated. An empty diff means an honest "nothing changed" report.
|
|
@@ -66,9 +66,10 @@ Field-by-field guidance:
|
|
|
66
66
|
and nothing more.
|
|
67
67
|
- **`assumptions`** — anything you decided during investigation (including
|
|
68
68
|
unanswered defaults from Phase 1).
|
|
69
|
-
- **`steps`** — an ordered list. For each step:
|
|
70
|
-
|
|
71
|
-
references (`path/to/file.ts`) and name the functions/symbols involved.
|
|
69
|
+
- **`steps`** — an ordered list. For each step provide:
|
|
70
|
+
- **`title`** — a concise imperative phrase naming the step (e.g. "Add submit_plan schema to plan.ts").
|
|
71
|
+
- **`description`** — what changes and why: the concrete change being made and the rationale for it. Use concrete file references (`path/to/file.ts`) and name the functions/symbols involved.
|
|
72
|
+
- **`pseudoCode`** — an array of `{ file, pseudoCode }` entries. Provide an entry for every file the step touches **except** documentation files (SKILL.md, README.md, wiki pages, etc.). `pseudoCode` is optional in the schema but expected for all non-documentation files. Each entry names the file path and contains pseudo code that precisely describes the changes to make in that file.
|
|
72
73
|
- **`acceptanceCriteria`** — **required; at least 2 items.** Each criterion must
|
|
73
74
|
be an observable condition you could check after the work is done: a behavior,
|
|
74
75
|
a test result, or a verifiable state. Together they must fully define "done" —
|
|
@@ -77,8 +77,10 @@ Your last message **is** the comment posted to the plan thread — write it for
|
|
|
77
77
|
user:
|
|
78
78
|
|
|
79
79
|
- **Implemented:** a short report — what you changed and why, which files, and how
|
|
80
|
-
it was verified (build/tests).
|
|
81
|
-
|
|
80
|
+
it was verified (build/tests). Base "what changed" and "which files" on the actual
|
|
81
|
+
`git --no-pager diff` (`--stat` for the file list), not on what a subagent claimed;
|
|
82
|
+
if the diff is empty, say nothing was changed rather than describing edits that
|
|
83
|
+
aren't there. The runner appends the pull-request link, so don't add one.
|
|
82
84
|
- **Clarify / push back:** your question or reasoning, as prose (plus any widget).
|
|
83
85
|
- **Re-plan:** you called `submit_plan`; the rendered plan is posted automatically,
|
|
84
86
|
so keep any extra reply text minimal.
|