@flumecode/runner 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +129 -21
- package/package.json +1 -1
- package/skills-plugin/rules/technical-writing.md +14 -0
- package/skills-plugin/skills/implement-plan/SKILL.md +76 -49
- package/skills-plugin/skills/lint-plugin-generator/SKILL.md +7 -37
- package/skills-plugin/skills/request-to-plan/SKILL.md +4 -0
- package/skills-plugin/skills/resolve-merge-conflict/SKILL.md +3 -1
- package/skills-plugin/skills/revise-implementation/SKILL.md +3 -2
package/dist/cli.js
CHANGED
|
@@ -25,6 +25,10 @@ function writeConfig(config) {
|
|
|
25
25
|
writeFileSync(configPath, JSON.stringify(config, null, 2), { mode: 384 });
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
+
// src/run.ts
|
|
29
|
+
import { existsSync as existsSync4 } from "node:fs";
|
|
30
|
+
import { join as join5 } from "node:path";
|
|
31
|
+
|
|
28
32
|
// src/version.ts
|
|
29
33
|
import { readFileSync as readFileSync2 } from "node:fs";
|
|
30
34
|
import { fileURLToPath } from "node:url";
|
|
@@ -373,6 +377,10 @@ async function hasChanges(dir) {
|
|
|
373
377
|
const { stdout: stdout2 } = await git(["-C", dir, "status", "--porcelain"]);
|
|
374
378
|
return stdout2.trim().length > 0;
|
|
375
379
|
}
|
|
380
|
+
async function gitDiffStat(dir) {
|
|
381
|
+
const { stdout: stdout2 } = await git(["-C", dir, "--no-pager", "diff", "--stat"]);
|
|
382
|
+
return stdout2;
|
|
383
|
+
}
|
|
376
384
|
var PreCommitError = class extends Error {
|
|
377
385
|
constructor(log) {
|
|
378
386
|
super("pre-commit checks failed");
|
|
@@ -609,14 +617,30 @@ function parseManifest(raw) {
|
|
|
609
617
|
|
|
610
618
|
// src/plugins/socket.ts
|
|
611
619
|
var exec2 = promisify2(execCb);
|
|
620
|
+
var MAX_OUTPUT = 8 * 1024;
|
|
621
|
+
function cap(s) {
|
|
622
|
+
return s.length <= MAX_OUTPUT ? s : s.slice(s.length - MAX_OUTPUT);
|
|
623
|
+
}
|
|
624
|
+
var lastSocketResults = [];
|
|
625
|
+
function resetSocketResults() {
|
|
626
|
+
lastSocketResults = [];
|
|
627
|
+
}
|
|
628
|
+
function getSocketResults() {
|
|
629
|
+
return lastSocketResults;
|
|
630
|
+
}
|
|
612
631
|
async function runSocket(socketName, dir) {
|
|
613
632
|
const plugins = (await loadPlugins(dir)).filter((p) => p.socket === socketName);
|
|
633
|
+
const results = [];
|
|
614
634
|
for (const plugin of plugins) {
|
|
615
635
|
const result = await runPluginCommand(plugin.run, dir);
|
|
616
636
|
if (result.exitCode !== 0) {
|
|
637
|
+
results.push({ key: plugin.key, status: "failed", output: cap(result.output) });
|
|
638
|
+
lastSocketResults = results;
|
|
617
639
|
throw new PreCommitError(`[plugin:${plugin.key}] ${result.output}`);
|
|
618
640
|
}
|
|
641
|
+
results.push({ key: plugin.key, status: "passed", output: cap(result.output) });
|
|
619
642
|
}
|
|
643
|
+
lastSocketResults = results;
|
|
620
644
|
}
|
|
621
645
|
async function runPluginCommand(command2, cwd) {
|
|
622
646
|
try {
|
|
@@ -707,6 +731,11 @@ function widgetPosted(kind) {
|
|
|
707
731
|
// src/plan.ts
|
|
708
732
|
import { createSdkMcpServer as createSdkMcpServer2, tool as tool2 } from "@anthropic-ai/claude-agent-sdk";
|
|
709
733
|
import { z as z2 } from "zod";
|
|
734
|
+
|
|
735
|
+
// src/schema-hints.ts
|
|
736
|
+
var INLINE_CODE_HINT = "Wrap code identifiers (function, variable, type, and file names, commands, and flags) in inline backticks, e.g. `getCodingSessionsForRequest`.";
|
|
737
|
+
|
|
738
|
+
// src/plan.ts
|
|
710
739
|
var SERVER_NAME2 = "flume_plan";
|
|
711
740
|
var SUBMIT_PLAN = "submit_plan";
|
|
712
741
|
var PLAN_TOOL_NAME = `mcp__${SERVER_NAME2}__${SUBMIT_PLAN}`;
|
|
@@ -717,7 +746,7 @@ var pseudoCodeEntrySchema = z2.object({
|
|
|
717
746
|
});
|
|
718
747
|
var stepSchema = z2.object({
|
|
719
748
|
title: z2.string().min(1).describe("A concise imperative title for this step."),
|
|
720
|
-
description: z2.string().min(1).describe("What changes and why \u2014 the rationale for this step."),
|
|
749
|
+
description: z2.string().min(1).describe("What changes and why \u2014 the rationale for this step. " + INLINE_CODE_HINT),
|
|
721
750
|
pseudoCode: z2.array(pseudoCodeEntrySchema).optional().describe(
|
|
722
751
|
"Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
|
|
723
752
|
)
|
|
@@ -727,11 +756,11 @@ var planInputSchema = {
|
|
|
727
756
|
"A concise, descriptive name for THIS plan. Must be distinct from the request title and from any sibling plans on the same request. Keep it under 120 characters."
|
|
728
757
|
),
|
|
729
758
|
scope: z2.enum(["feat", "fix", "chore", "docs", "test", "refactor"]).describe("The primary intent of the change."),
|
|
730
|
-
goal: z2.string().min(1).describe("One or two sentences stating the outcome."),
|
|
759
|
+
goal: z2.string().min(1).describe("One or two sentences stating the outcome. " + INLINE_CODE_HINT),
|
|
731
760
|
assumptions: z2.array(z2.string()).describe("Anything decided during planning, including unanswered defaults."),
|
|
732
761
|
steps: z2.array(stepSchema).min(1).describe("Ordered list of changes. Each step says what and why, with file references."),
|
|
733
762
|
acceptanceCriteria: z2.array(z2.string().min(1)).min(2).describe(
|
|
734
|
-
"Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required."
|
|
763
|
+
"Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required. " + INLINE_CODE_HINT
|
|
735
764
|
),
|
|
736
765
|
risks: z2.array(z2.string()).describe("Anything that could change the approach."),
|
|
737
766
|
outOfScope: z2.array(z2.string()).describe("What is deliberately not being done.")
|
|
@@ -827,6 +856,19 @@ function createPlanTooling() {
|
|
|
827
856
|
});
|
|
828
857
|
return { mcpServer, getPlans: () => renderedPlans };
|
|
829
858
|
}
|
|
859
|
+
function countPlanAcceptanceCriteria(planBody) {
|
|
860
|
+
if (!planBody) return 0;
|
|
861
|
+
const lines2 = planBody.split("\n");
|
|
862
|
+
const start2 = lines2.findIndex((l) => l.trim() === "## Acceptance criteria");
|
|
863
|
+
if (start2 === -1) return 0;
|
|
864
|
+
let count = 0;
|
|
865
|
+
for (let i = start2 + 1; i < lines2.length; i++) {
|
|
866
|
+
const line = lines2[i] ?? "";
|
|
867
|
+
if (line.startsWith("## ")) break;
|
|
868
|
+
if (line.startsWith("- [ ] ")) count++;
|
|
869
|
+
}
|
|
870
|
+
return count;
|
|
871
|
+
}
|
|
830
872
|
|
|
831
873
|
// src/report.ts
|
|
832
874
|
import { createSdkMcpServer as createSdkMcpServer3, tool as tool3 } from "@anthropic-ai/claude-agent-sdk";
|
|
@@ -842,28 +884,28 @@ var STATUS_ICON = {
|
|
|
842
884
|
var evidenceSchema = z3.object({
|
|
843
885
|
file: z3.string().min(1).describe("Repo-relative path the hunk comes from."),
|
|
844
886
|
hunk: z3.string().min(1).describe(
|
|
845
|
-
"A unified-diff hunk
|
|
887
|
+
"A unified-diff hunk proving the criterion \u2014 the lines that matter, not the whole file. MUST keep the `@@ -a,b +c,d @@` hunk header line(s) exactly as they appear in `git --no-pager diff`; the report renders file line numbers from them. Rendered verbatim as a ```diff block."
|
|
846
888
|
),
|
|
847
889
|
note: z3.string().optional().describe("Optional one-line explanation of why this hunk satisfies the criterion.")
|
|
848
890
|
});
|
|
849
891
|
var acVerdictSchema = z3.object({
|
|
850
892
|
criterion: z3.string().min(1).describe("The acceptance-criterion text, verbatim from the plan."),
|
|
851
893
|
status: z3.enum(["met", "not_met", "unclear"]).describe("Verdict for this criterion, verified against the actual diff."),
|
|
852
|
-
rationale: z3.string().min(1).describe("One or two sentences on why the verdict holds."),
|
|
894
|
+
rationale: z3.string().min(1).describe("One or two sentences on why the verdict holds. " + INLINE_CODE_HINT),
|
|
853
895
|
evidence: z3.array(evidenceSchema).describe(
|
|
854
896
|
"Diff hunks proving the verdict, copied verbatim from git --no-pager diff. Across ALL criteria the evidence must collectively cover every hunk in the diff \u2014 each changed hunk appears under at least one criterion. Cite the relevant hunk(s) for a met criterion; may be empty for not_met / unclear."
|
|
855
897
|
)
|
|
856
898
|
});
|
|
857
899
|
var reportInputSchema = {
|
|
858
|
-
summary: z3.string().min(1).describe("One or two sentences on what was implemented."),
|
|
900
|
+
summary: z3.string().min(1).describe("One or two sentences on what was implemented. " + INLINE_CODE_HINT),
|
|
859
901
|
filesChanged: z3.string().min(1).describe(
|
|
860
902
|
"Markdown: the list of files changed (from the diff). Rendered under '## Files changed'."
|
|
861
903
|
),
|
|
862
904
|
codeQuality: z3.string().min(1).describe(
|
|
863
|
-
"Markdown: the code-quality review outcome and anything left as nice-to-have. Rendered under '## Code quality'."
|
|
905
|
+
"Markdown: the code-quality review outcome and anything left as nice-to-have. Rendered under '## Code quality'. " + INLINE_CODE_HINT
|
|
864
906
|
),
|
|
865
907
|
caveats: z3.string().min(1).describe(
|
|
866
|
-
"Markdown: anything deferred, unmet, or worth a human's eyes, incl. diff hunks that map to no plan AC. Write 'None.' if nothing. Rendered under '## Caveats / follow-ups'."
|
|
908
|
+
"Markdown: anything deferred, unmet, or worth a human's eyes, incl. diff hunks that map to no plan AC. Write 'None.' if nothing. Rendered under '## Caveats / follow-ups'. " + INLINE_CODE_HINT
|
|
867
909
|
),
|
|
868
910
|
acceptanceCriteria: z3.array(acVerdictSchema).min(1).describe(
|
|
869
911
|
"One entry per acceptance criterion from the plan, in plan order, each with a verdict and the diff evidence behind it."
|
|
@@ -1081,6 +1123,10 @@ function stripFrontMatter(raw) {
|
|
|
1081
1123
|
}
|
|
1082
1124
|
|
|
1083
1125
|
// src/prompt.ts
|
|
1126
|
+
function appendRule(lines2, intro, ruleName) {
|
|
1127
|
+
lines2.push("", intro, "", loadRule(ruleName));
|
|
1128
|
+
}
|
|
1129
|
+
var WRITING_INTRO = "These technical-writing guidelines apply to the plan and report prose you author in this run:";
|
|
1084
1130
|
function turnHeading(turn, agentName) {
|
|
1085
1131
|
if (turn.role === "user") return "User";
|
|
1086
1132
|
if (turn.failed) return `${agentName} (this run ended in an error)`;
|
|
@@ -1114,6 +1160,7 @@ function buildPrompt(ctx) {
|
|
|
1114
1160
|
loadRule("coding-guideline")
|
|
1115
1161
|
);
|
|
1116
1162
|
}
|
|
1163
|
+
appendRule(lines2, WRITING_INTRO, "technical-writing");
|
|
1117
1164
|
lines2.push("", `# Request: ${ctx.request?.title ?? ""}`);
|
|
1118
1165
|
if (ctx.request?.body) {
|
|
1119
1166
|
lines2.push("", ctx.request.body);
|
|
@@ -1140,6 +1187,10 @@ function buildRevisePrompt(ctx) {
|
|
|
1140
1187
|
"",
|
|
1141
1188
|
loadRule("coding-guideline"),
|
|
1142
1189
|
"",
|
|
1190
|
+
WRITING_INTRO,
|
|
1191
|
+
"",
|
|
1192
|
+
loadRule("technical-writing"),
|
|
1193
|
+
"",
|
|
1143
1194
|
`# Plan: ${ctx.request?.title ?? ""}`
|
|
1144
1195
|
];
|
|
1145
1196
|
if (ctx.request?.body) {
|
|
@@ -1166,6 +1217,10 @@ function buildResolvePrompt(ctx, related = []) {
|
|
|
1166
1217
|
"",
|
|
1167
1218
|
loadRule("coding-guideline"),
|
|
1168
1219
|
"",
|
|
1220
|
+
WRITING_INTRO,
|
|
1221
|
+
"",
|
|
1222
|
+
loadRule("technical-writing"),
|
|
1223
|
+
"",
|
|
1169
1224
|
`# Plan: ${ctx.request?.title ?? ""}`
|
|
1170
1225
|
];
|
|
1171
1226
|
if (ctx.request?.body) {
|
|
@@ -1190,7 +1245,7 @@ function buildResolvePrompt(ctx, related = []) {
|
|
|
1190
1245
|
);
|
|
1191
1246
|
return lines2.join("\n");
|
|
1192
1247
|
}
|
|
1193
|
-
function buildDocumentPrompt(ctx) {
|
|
1248
|
+
function buildDocumentPrompt(ctx, changedFiles) {
|
|
1194
1249
|
const lines2 = [
|
|
1195
1250
|
`You are "${ctx.agentName}" maintaining the repository wiki for ${ctx.repo.fullName}.`,
|
|
1196
1251
|
`An implementation just ran in this working directory to satisfy the request below; its changes are uncommitted in the working tree.`,
|
|
@@ -1202,6 +1257,14 @@ function buildDocumentPrompt(ctx) {
|
|
|
1202
1257
|
lines2.push("", ctx.request.body);
|
|
1203
1258
|
}
|
|
1204
1259
|
appendThread(lines2, ctx);
|
|
1260
|
+
if (changedFiles && changedFiles.trim()) {
|
|
1261
|
+
lines2.push(
|
|
1262
|
+
"",
|
|
1263
|
+
"Files changed by this implementation (reconcile only the wiki pages these affect \u2014 do not re-survey the whole repo):",
|
|
1264
|
+
"",
|
|
1265
|
+
changedFiles.trim()
|
|
1266
|
+
);
|
|
1267
|
+
}
|
|
1205
1268
|
lines2.push("", "When done, reply with a one- or two-line summary of the wiki changes you made.");
|
|
1206
1269
|
return lines2.join("\n");
|
|
1207
1270
|
}
|
|
@@ -1464,13 +1527,15 @@ async function processChatJob(ctx, dir, config, abort) {
|
|
|
1464
1527
|
console.log(` \u2026job ${ctx.jobId} posted ${result.widgets.length} widget(s); awaiting reply`);
|
|
1465
1528
|
return { text: reply, widgets: result.widgets };
|
|
1466
1529
|
}
|
|
1530
|
+
const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
|
|
1467
1531
|
let documented = false;
|
|
1468
|
-
if (ctx.permissionMode !== "plan" && await hasChanges(dir)) {
|
|
1532
|
+
if (ctx.permissionMode !== "plan" && wikiExists && await hasChanges(dir)) {
|
|
1469
1533
|
try {
|
|
1534
|
+
const changedFiles = await gitDiffStat(dir);
|
|
1470
1535
|
console.log(` \u2026updating wiki for job ${ctx.jobId}`);
|
|
1471
1536
|
await runClaudeCode({
|
|
1472
1537
|
cwd: dir,
|
|
1473
|
-
prompt: buildDocumentPrompt(ctx),
|
|
1538
|
+
prompt: buildDocumentPrompt(ctx, changedFiles),
|
|
1474
1539
|
permissionMode: ctx.permissionMode,
|
|
1475
1540
|
maxTurns: DOCUMENT_MAX_TURNS,
|
|
1476
1541
|
abortController: abort
|
|
@@ -1479,6 +1544,8 @@ async function processChatJob(ctx, dir, config, abort) {
|
|
|
1479
1544
|
} catch (err) {
|
|
1480
1545
|
console.warn(` wiki update skipped: ${errorMessage2(err)}`);
|
|
1481
1546
|
}
|
|
1547
|
+
} else if (ctx.permissionMode !== "plan" && !wikiExists) {
|
|
1548
|
+
console.log(` no .flumecode/wiki \u2014 skipping wiki reconcile for ${ctx.jobId}`);
|
|
1482
1549
|
}
|
|
1483
1550
|
const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, config, abort);
|
|
1484
1551
|
reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, documented, autoMerged });
|
|
@@ -1487,12 +1554,24 @@ async function processChatJob(ctx, dir, config, abort) {
|
|
|
1487
1554
|
function reportClaimsWork(report) {
|
|
1488
1555
|
return !!report && report.acceptanceCriteria.some((ac) => ac.status === "met" && ac.evidence.length > 0);
|
|
1489
1556
|
}
|
|
1557
|
+
function reportMeetsAcContract(report, expectedAcCount) {
|
|
1558
|
+
if (expectedAcCount === 0) return true;
|
|
1559
|
+
if (!report) return false;
|
|
1560
|
+
return report.acceptanceCriteria.length === expectedAcCount;
|
|
1561
|
+
}
|
|
1562
|
+
function buildAcWarningBanner(report, expectedAcCount) {
|
|
1563
|
+
if (!report)
|
|
1564
|
+
return "> \u26A0\uFE0F **Unverified AC review** \u2014 the implementation did not submit a structured report, so its acceptance-criteria review could not be checked against the plan.";
|
|
1565
|
+
return `> \u26A0\uFE0F **AC review may be incomplete** \u2014 the plan has ${expectedAcCount} acceptance criteria but the report reviewed ${report.acceptanceCriteria.length}.`;
|
|
1566
|
+
}
|
|
1490
1567
|
async function processImplementJob(ctx, dir, resumed, config, abort) {
|
|
1491
1568
|
console.log(`
|
|
1492
1569
|
\u25B6 Implement ${ctx.jobId} \u2014 ${ctx.repo.fullName}: "${jobTitle(ctx)}"`);
|
|
1493
1570
|
const installResult = await installDependencies(dir);
|
|
1571
|
+
const expectedAcCount = countPlanAcceptanceCriteria(ctx.request?.body);
|
|
1494
1572
|
let report;
|
|
1495
1573
|
let reply;
|
|
1574
|
+
let warningBanner = "";
|
|
1496
1575
|
for (let attempt = 0; ; attempt++) {
|
|
1497
1576
|
const result = await runClaudeCode({
|
|
1498
1577
|
cwd: dir,
|
|
@@ -1504,28 +1583,48 @@ async function processImplementJob(ctx, dir, resumed, config, abort) {
|
|
|
1504
1583
|
});
|
|
1505
1584
|
report = result.report ?? void 0;
|
|
1506
1585
|
reply = (report ? renderReport(report) : result.text.trim()) || "(the agent produced no report)";
|
|
1507
|
-
if (abort.signal.aborted
|
|
1508
|
-
|
|
1586
|
+
if (abort.signal.aborted) {
|
|
1587
|
+
warningBanner = "";
|
|
1588
|
+
break;
|
|
1589
|
+
}
|
|
1590
|
+
const treeChanged = await hasChanges(dir);
|
|
1591
|
+
const phantom = reportClaimsWork(report) && !treeChanged;
|
|
1592
|
+
const acProblem = !reportMeetsAcContract(report, expectedAcCount);
|
|
1593
|
+
if (!phantom && !acProblem) {
|
|
1594
|
+
warningBanner = "";
|
|
1595
|
+
break;
|
|
1596
|
+
}
|
|
1597
|
+
if (attempt < MAX_IMPLEMENT_RETRIES) {
|
|
1598
|
+
console.warn(
|
|
1599
|
+
` implement ${ctx.jobId}: ${phantom ? "report claims changes but the working tree is clean" : "AC-review contract failed"} \u2014 re-running implementation (attempt ${attempt + 2})`
|
|
1600
|
+
);
|
|
1601
|
+
continue;
|
|
1602
|
+
}
|
|
1603
|
+
if (phantom) {
|
|
1509
1604
|
throw new Error(
|
|
1510
1605
|
`Implementation reported completed work (acceptance criteria met with diff evidence) but the working tree is clean after ${attempt + 1} attempt(s) \u2014 no changes were persisted, so no pull request could be opened.`
|
|
1511
1606
|
);
|
|
1512
1607
|
}
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
);
|
|
1608
|
+
warningBanner = buildAcWarningBanner(report, expectedAcCount);
|
|
1609
|
+
break;
|
|
1516
1610
|
}
|
|
1611
|
+
if (warningBanner) reply = `${warningBanner}
|
|
1612
|
+
|
|
1613
|
+
${reply}`;
|
|
1517
1614
|
if (installResult.status === "failed") {
|
|
1518
1615
|
reply += `
|
|
1519
1616
|
|
|
1520
1617
|
> \u26A0\uFE0F Dependencies failed to install (\`${installResult.manager}\`); tests may not have run.`;
|
|
1521
1618
|
}
|
|
1619
|
+
const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
|
|
1522
1620
|
let documented = false;
|
|
1523
|
-
if (await hasChanges(dir)) {
|
|
1621
|
+
if (wikiExists && await hasChanges(dir)) {
|
|
1524
1622
|
try {
|
|
1623
|
+
const changedFiles = await gitDiffStat(dir);
|
|
1525
1624
|
console.log(` \u2026updating wiki for implement ${ctx.jobId}`);
|
|
1526
1625
|
await runClaudeCode({
|
|
1527
1626
|
cwd: dir,
|
|
1528
|
-
prompt: buildDocumentPrompt(ctx),
|
|
1627
|
+
prompt: buildDocumentPrompt(ctx, changedFiles),
|
|
1529
1628
|
permissionMode: ctx.permissionMode,
|
|
1530
1629
|
maxTurns: DOCUMENT_MAX_TURNS,
|
|
1531
1630
|
abortController: abort
|
|
@@ -1534,15 +1633,19 @@ async function processImplementJob(ctx, dir, resumed, config, abort) {
|
|
|
1534
1633
|
} catch (err) {
|
|
1535
1634
|
console.warn(` wiki update skipped: ${errorMessage2(err)}`);
|
|
1536
1635
|
}
|
|
1636
|
+
} else if (!wikiExists) {
|
|
1637
|
+
console.log(` no .flumecode/wiki \u2014 skipping wiki reconcile for ${ctx.jobId}`);
|
|
1537
1638
|
}
|
|
1538
1639
|
const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, config, abort, {
|
|
1539
1640
|
rebase: !resumed
|
|
1540
1641
|
});
|
|
1541
1642
|
reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, documented, autoMerged });
|
|
1643
|
+
const lintPlugins = getSocketResults();
|
|
1644
|
+
const finalReport = report && lintPlugins.length ? { ...report, lint: { plugins: lintPlugins } } : report;
|
|
1542
1645
|
return {
|
|
1543
1646
|
text: reply,
|
|
1544
1647
|
widgets: [],
|
|
1545
|
-
...
|
|
1648
|
+
...finalReport ? { report: finalReport } : {},
|
|
1546
1649
|
...outcome.kind === "pr" ? { pr: outcome.pr } : {}
|
|
1547
1650
|
};
|
|
1548
1651
|
}
|
|
@@ -1570,13 +1673,15 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
|
|
|
1570
1673
|
console.log(` \u2026revise ${ctx.jobId} posted ${result.widgets.length} widget(s); awaiting reply`);
|
|
1571
1674
|
return { text: reply, widgets: result.widgets };
|
|
1572
1675
|
}
|
|
1676
|
+
const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
|
|
1573
1677
|
let documented = false;
|
|
1574
|
-
if (await hasChanges(dir)) {
|
|
1678
|
+
if (wikiExists && await hasChanges(dir)) {
|
|
1575
1679
|
try {
|
|
1680
|
+
const changedFiles = await gitDiffStat(dir);
|
|
1576
1681
|
console.log(` \u2026updating wiki for revise ${ctx.jobId}`);
|
|
1577
1682
|
await runClaudeCode({
|
|
1578
1683
|
cwd: dir,
|
|
1579
|
-
prompt: buildDocumentPrompt(ctx),
|
|
1684
|
+
prompt: buildDocumentPrompt(ctx, changedFiles),
|
|
1580
1685
|
permissionMode: ctx.permissionMode,
|
|
1581
1686
|
maxTurns: DOCUMENT_MAX_TURNS,
|
|
1582
1687
|
abortController: abort
|
|
@@ -1585,6 +1690,8 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
|
|
|
1585
1690
|
} catch (err) {
|
|
1586
1691
|
console.warn(` wiki update skipped: ${errorMessage2(err)}`);
|
|
1587
1692
|
}
|
|
1693
|
+
} else if (!wikiExists) {
|
|
1694
|
+
console.log(` no .flumecode/wiki \u2014 skipping wiki reconcile for ${ctx.jobId}`);
|
|
1588
1695
|
}
|
|
1589
1696
|
const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, config, abort, {
|
|
1590
1697
|
rebase: !resumed
|
|
@@ -1725,6 +1832,7 @@ async function pollLoop(config) {
|
|
|
1725
1832
|
scheduleCancelPoll();
|
|
1726
1833
|
try {
|
|
1727
1834
|
resetUsage();
|
|
1835
|
+
resetSocketResults();
|
|
1728
1836
|
const { text, widgets, pr, plans, report } = await processJob(ctx, config, abort);
|
|
1729
1837
|
const usage = getUsage();
|
|
1730
1838
|
await reportJob(config, ctx.jobId, {
|
package/package.json
CHANGED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: technical-writing
|
|
3
|
+
description: >-
|
|
4
|
+
Inline-code conventions for agent-authored plan and report prose: wrap code
|
|
5
|
+
identifiers in backticks so they render as inline code.
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# Technical Writing
|
|
9
|
+
|
|
10
|
+
## Inline code
|
|
11
|
+
|
|
12
|
+
Wrap code identifiers — function names, variable names, type names, file names, commands, and flags — in inline backticks so they render as inline code. For example: `getCodingSessionsForRequest`, not getCodingSessionsForRequest.
|
|
13
|
+
|
|
14
|
+
This convention applies to all free-text fields in plans and reports: goals, step descriptions, acceptance criteria, summaries, code-quality notes, and caveats.
|
|
@@ -43,6 +43,9 @@ put it in the prompt, the subagent doesn't have it.
|
|
|
43
43
|
- **Coding guidelines.** This prompt contains a `# Coding Guidelines` section.
|
|
44
44
|
Copy it verbatim into the prompt of the implementation subagent and the
|
|
45
45
|
code-quality-review subagent so they hold the work to it.
|
|
46
|
+
- **Technical-writing guidelines.** This prompt contains a `# Technical Writing`
|
|
47
|
+
section. Copy it verbatim into the prompt of the report subagent so it applies
|
|
48
|
+
the inline-code conventions to all free-text fields it authors.
|
|
46
49
|
|
|
47
50
|
## Inputs
|
|
48
51
|
|
|
@@ -58,32 +61,39 @@ the next step.
|
|
|
58
61
|
|
|
59
62
|
1. **Orient.** Read the plan/request and the FlumeCode wiki (if any) enough to
|
|
60
63
|
write good task prompts. Extract the **Steps** and the **Acceptance criteria
|
|
61
|
-
(ACs)**.
|
|
64
|
+
(ACs)**. Also discover the project's verification commands by checking these
|
|
65
|
+
sources in order: `package.json` scripts (look for `build`, `typecheck`,
|
|
66
|
+
`lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/` page that mentions
|
|
67
|
+
commands, and `Makefile`. Capture the explicit command list; you will include
|
|
68
|
+
it in the prompts you write for the Implement, Verify, and Fix-loop subagents
|
|
69
|
+
so none of them re-derive it. Do not implement.
|
|
62
70
|
|
|
63
71
|
2. **Implement** — Task, `model: "sonnet"`. Give the subagent: the plan steps, a
|
|
64
|
-
pointer to the wiki/orientation,
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
72
|
+
pointer to the wiki/orientation, the coding guidelines (verbatim), and the
|
|
73
|
+
explicit verification command list the orchestrator discovered in the Orient
|
|
74
|
+
step. Tell it to make all the code changes in the working tree to satisfy the
|
|
75
|
+
plan, then self-verify by running the verification commands the orchestrator
|
|
76
|
+
already discovered and passed in the task prompt. If the orchestrator did not
|
|
77
|
+
provide a list (e.g. could not determine commands confidently), fall back to
|
|
78
|
+
discovering them from the same sources: `package.json` scripts (look for
|
|
79
|
+
`build`, `typecheck`, `lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/`
|
|
80
|
+
page that mentions commands, and `Makefile`. Run each command and fix any
|
|
81
|
+
errors that the edits introduced before returning. If no build/test setup
|
|
82
|
+
exists in this repo, note that and move on — do not fail. End by reporting:
|
|
83
|
+
the verification commands it ran and their pass/fail results, which files it
|
|
84
|
+
changed, and how each plan step was addressed. It must not commit or push.
|
|
76
85
|
|
|
77
86
|
3. **Verify (build & tests)** — Task, `model: "sonnet"`, read-only. This step
|
|
78
87
|
gives the orchestrator an objective, independent build/test signal before the
|
|
79
88
|
subjective AC and quality reviews. Tell the subagent to:
|
|
80
|
-
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
89
|
+
- Run the verification commands provided by the orchestrator in the task
|
|
90
|
+
prompt. If none were provided, fall back to discovering them from
|
|
91
|
+
`package.json` scripts (look for `build`, `typecheck`, `lint`, `test`),
|
|
92
|
+
`CLAUDE.md`, `.flumecode/wiki/` (any page that mentions commands), and
|
|
93
|
+
`Makefile`.
|
|
94
|
+
- Run each command and record: the exact command, whether it passed or failed,
|
|
95
|
+
and — for any failure — a short excerpt of the failing output (enough to
|
|
96
|
+
diagnose the problem).
|
|
87
97
|
- If no build/test setup exists in this repo, say so explicitly and pass the
|
|
88
98
|
gate.
|
|
89
99
|
- Return a structured per-check result: command, pass/fail, failing-output
|
|
@@ -94,19 +104,18 @@ the next step.
|
|
|
94
104
|
subagent the full AC list and tell it to verify each one against the actual
|
|
95
105
|
changes (run `git --no-pager diff`, read the changed files, run tests/build if
|
|
96
106
|
useful). For **each** AC it must return: the criterion text verbatim, a verdict
|
|
97
|
-
(**met / not met / unclear**), a one-or-two-sentence rationale, and
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
report step. In addition to per-AC verdicts, cross-check that every hunk in `git --no-pager diff` is cited by at least one AC's evidence; report any uncovered hunk as a coverage gap (signalling a missing AC or an out-of-scope change).
|
|
107
|
+
(**met / not met / unclear**), a one-or-two-sentence rationale, and the relevant
|
|
108
|
+
file paths and change locations that support the verdict. A _met_ AC should cite
|
|
109
|
+
at least one [file/location]; _not met_ / _unclear_ may cite none. **Ground
|
|
110
|
+
every verdict in the actual diff:** a criterion may be marked _met_ only if
|
|
111
|
+
`git --no-pager diff` really contains the change that satisfies it — never
|
|
112
|
+
reconstruct from the plan or from what the implement subagent claimed. If
|
|
113
|
+
`git --no-pager diff` is empty, the implementation produced no changes: no
|
|
114
|
+
criterion may be _met_, and the review must say so. Tell it to return this as a
|
|
115
|
+
clean, structured list so you can hand it straight to the report step. In
|
|
116
|
+
addition to per-AC verdicts, cross-check `git --no-pager diff` against the ACs;
|
|
117
|
+
note any files or areas that appear changed but don't map to any AC as a coverage
|
|
118
|
+
gap (signalling a missing AC or an out-of-scope change).
|
|
110
119
|
|
|
111
120
|
5. **Code-quality review** — Task, `model: "opus"`, read-only. Give the subagent
|
|
112
121
|
the coding guidelines (verbatim) and tell it to review the changes for
|
|
@@ -117,26 +126,36 @@ the next step.
|
|
|
117
126
|
review (step 4) reports any _not met_ AC, or the quality review (step 5)
|
|
118
127
|
reports any _must-fix_ finding: spawn an **Implement/fix** subagent (Task,
|
|
119
128
|
`model: "sonnet"`) whose prompt lists exactly those findings and tells it to
|
|
120
|
-
resolve them without regressing the rest.
|
|
121
|
-
|
|
122
|
-
Verify
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
129
|
+
resolve them without regressing the rest. Include the verification command list
|
|
130
|
+
from the Orient step in the fix subagent's prompt (the same list passed to
|
|
131
|
+
Implement and Verify), so the fix subagent does not need to re-derive it. When
|
|
132
|
+
a Verify failure triggered the fix, include the failing command(s) and their
|
|
133
|
+
error output excerpt(s) from the Verify result in the fix subagent's prompt so
|
|
134
|
+
it has the full context. After each fix iteration, re-run the Verify step (step 3) in addition to any AC or quality review that failed. Repeat at most **2**
|
|
135
|
+
times. If something still fails after that, stop looping and record the gap
|
|
136
|
+
honestly in the report — do not hide it.
|
|
137
|
+
|
|
138
|
+
7. **Report** — Task, `model: "opus"`, read-only. Give the subagent the AC
|
|
139
|
+
verdicts (with criterion text, from step 4), the Verify results (from step 3),
|
|
140
|
+
and the quality findings, and tell it to run `git --no-pager diff` itself as
|
|
141
|
+
the **single source of truth** for the report. Do not pass the full plan — the
|
|
142
|
+
AC verdicts carry each criterion verbatim, and the live `git --no-pager diff`
|
|
143
|
+
is the authoritative source for evidence; re-inlining the full plan is
|
|
144
|
+
redundant. Keep each subagent prompt to the minimal self-contained slice it
|
|
145
|
+
needs. Include the `# Technical Writing` section
|
|
146
|
+
(copied verbatim from this prompt) in the report subagent's prompt — the same
|
|
147
|
+
way `# Coding Guidelines` is forwarded to implementation subagents — so it
|
|
148
|
+
applies the inline-code conventions to all free-text fields it authors. Every `evidence` hunk it submits must be
|
|
149
|
+
copied verbatim from that live diff, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them) — it must drop or correct any hunk carried
|
|
133
150
|
over from step 4 that no longer appears in the actual diff, and the **Files
|
|
134
151
|
changed** list must come from `git --no-pager diff --stat`, not from what an
|
|
135
152
|
earlier subagent claimed. Tell it to enumerate all hunks from `git --no-pager diff` and ensure each is attached to ≥1 AC's `evidence`; any hunk mapping to no plan AC goes under `## Caveats / follow-ups` as an explicit unattributed change. **If `git --no-pager diff` is empty, the
|
|
136
153
|
implementation changed nothing:** the report must say so plainly — an honest
|
|
137
154
|
`summary`, no AC marked `met` with evidence — and must never describe edits
|
|
138
155
|
that aren't in the diff. Tell it to submit the user-facing report by calling
|
|
139
|
-
the **`submit_report`** tool — it has that tool available.
|
|
156
|
+
the **`submit_report`** tool — it has that tool available. The report MUST be
|
|
157
|
+
submitted via `submit_report` (structured); final assistant prose is only a
|
|
158
|
+
last-resort fallback and will be flagged as an unverified AC review. It must call
|
|
140
159
|
`submit_report` exactly once and must not edit any files.
|
|
141
160
|
|
|
142
161
|
8. **Confirm and end.** Once the report subagent has called `submit_report`, you are
|
|
@@ -152,12 +171,14 @@ The report subagent calls `submit_report` with these fields:
|
|
|
152
171
|
- **`filesChanged`** — markdown list of files changed (from the diff). Rendered under `## Files changed`.
|
|
153
172
|
- **`codeQuality`** — the code-quality review outcome and anything left as nice-to-have. Rendered under `## Code quality`.
|
|
154
173
|
- **`caveats`** — anything deferred, unmet, or worth a human's eyes, including diff hunks that map to no plan AC. Write 'None.' if nothing. Rendered under `## Caveats / follow-ups`.
|
|
155
|
-
- **`acceptanceCriteria`** — one entry per AC from the plan
|
|
174
|
+
- **`acceptanceCriteria`** — EXACTLY one entry per AC from the plan (same count and
|
|
175
|
+
order). The runner counts the plan's ACs and warns on any mismatch — do not merge,
|
|
176
|
+
split, drop, or invent criteria. Each entry:
|
|
156
177
|
- `criterion` — the AC text verbatim.
|
|
157
178
|
- `status` — `"met"` / `"not_met"` / `"unclear"`, mirroring the AC review.
|
|
158
179
|
- `rationale` — one or two sentences on why the verdict holds.
|
|
159
180
|
- `evidence` — an array of `{ file, hunk, note? }`, where `hunk` is copied
|
|
160
|
-
verbatim from the live `git --no-pager diff
|
|
181
|
+
verbatim from the live `git --no-pager diff`, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them), and proves the verdict (`note`
|
|
161
182
|
optionally explains it). Never include a hunk that isn't in the actual diff. Cite
|
|
162
183
|
the supporting hunk(s) for a met criterion; may be empty for not_met / unclear.
|
|
163
184
|
|
|
@@ -173,4 +194,10 @@ The report subagent calls `submit_report` with these fields:
|
|
|
173
194
|
once — not as prose for you to echo. Each acceptance criterion carries the diff
|
|
174
195
|
hunk(s) that prove its verdict, copied verbatim from the live `git --no-pager diff`
|
|
175
196
|
— never fabricated. An empty diff means an honest "nothing changed" report.
|
|
197
|
+
- The report MUST be submitted via `submit_report` (structured). Final assistant prose
|
|
198
|
+
is only a last-resort fallback and will be flagged as an unverified AC review by the
|
|
199
|
+
runner.
|
|
200
|
+
- `acceptanceCriteria` must have EXACTLY one entry per plan acceptance criterion (same
|
|
201
|
+
count and order). The runner counts the plan's ACs and warns on any mismatch, so do
|
|
202
|
+
not merge, split, drop, or invent criteria.
|
|
176
203
|
- The report exists so the human reviewer can verify each acceptance criterion is satisfied — the ACs and their diff evidence are the primary review surface.
|
|
@@ -3,7 +3,7 @@ name: lint-plugin-generator
|
|
|
3
3
|
description: >-
|
|
4
4
|
Generate a concrete plan to install the FlumeCode Lint plugin for THIS repo —
|
|
5
5
|
a .flumecode/plugins/lint/ manifest wired to the pre-commit socket that runs
|
|
6
|
-
the repo's lint/format checks
|
|
6
|
+
the repo's lint/format checks.
|
|
7
7
|
---
|
|
8
8
|
|
|
9
9
|
# lint-plugin-generator
|
|
@@ -37,60 +37,30 @@ instruct the implementer to create:
|
|
|
37
37
|
{
|
|
38
38
|
"key": "lint",
|
|
39
39
|
"socket": "pre-commit",
|
|
40
|
-
"run": "
|
|
41
|
-
"heartbeat": {
|
|
42
|
-
"url": "https://<flumecode-base-url>/api/runner/plugins/heartbeat",
|
|
43
|
-
"token": "<repo-scoped-token>"
|
|
44
|
-
}
|
|
40
|
+
"run": "<detected lint/format/typecheck command chain>"
|
|
45
41
|
}
|
|
46
42
|
```
|
|
47
43
|
|
|
48
|
-
`
|
|
49
|
-
them in via the FlumeCode web UI after installation. The plan must **not**
|
|
50
|
-
commit a real token value.
|
|
51
|
-
|
|
52
|
-
### Artifact 2 — `.flumecode/plugins/lint/run.mjs`
|
|
53
|
-
|
|
54
|
-
A Node.js ES module that:
|
|
55
|
-
|
|
56
|
-
1. Reads `plugin.json` from the same directory to get `heartbeat.url` and `heartbeat.token`.
|
|
57
|
-
2. Determines the current git branch (`git rev-parse --abbrev-ref HEAD`).
|
|
58
|
-
3. Runs each detected lint/format/typecheck command with `child_process.execSync` (stdio: `inherit`).
|
|
59
|
-
4. On success, POSTs to the heartbeat URL:
|
|
60
|
-
`{ repoId: process.env.FLUMECODE_REPO_ID, pluginKey: "lint", branch, status: "pass", timestamp: new Date().toISOString() }`
|
|
61
|
-
(`FLUMECODE_REPO_ID` — the runner will inject this in Plan 2; if not yet available, the heartbeat may omit repoId or read it from .flumecode/config.json)
|
|
62
|
-
5. On any command failure, exits non-zero (and optionally POSTs `status: "fail"`).
|
|
63
|
-
|
|
64
|
-
The `repoId` comes from the `FLUMECODE_REPO_ID` environment variable that the
|
|
65
|
-
runner sets. The heartbeat request uses `Authorization: Bearer <token>`.
|
|
44
|
+
Derive `run` from the repo's detected commands (e.g. `pnpm exec lint-staged && pnpm lint && pnpm typecheck && pnpm test`). Do not hard-code — include the actual commands discovered in the Orient step.
|
|
66
45
|
|
|
67
46
|
### Manifest shape
|
|
68
47
|
|
|
69
48
|
The manifest `plugin.json` must have exactly these fields:
|
|
70
49
|
|
|
71
50
|
```
|
|
72
|
-
{ key, socket, run
|
|
51
|
+
{ key, socket, run }
|
|
73
52
|
```
|
|
74
53
|
|
|
75
54
|
This is the shape the FlumeCode plugin loader expects.
|
|
76
55
|
|
|
77
|
-
### Heartbeat endpoint
|
|
78
|
-
|
|
79
|
-
`POST /api/runner/plugins/heartbeat` with JSON body:
|
|
80
|
-
`{ repoId, pluginKey, branch, status, timestamp }`
|
|
81
|
-
(this endpoint does not exist yet — it will be created by Plan 2; include this as a step in the generated plan or as a prerequisite note)
|
|
82
|
-
|
|
83
56
|
### Acceptance criteria the plan must include
|
|
84
57
|
|
|
85
|
-
- `.flumecode/plugins/lint/plugin.json` exists with `key: "lint"`, `socket: "pre-commit"`, `run
|
|
86
|
-
-
|
|
87
|
-
- A successful run POSTs a heartbeat with `{ repoId, pluginKey: "lint", branch, status: "pass", timestamp }`.
|
|
58
|
+
- `.flumecode/plugins/lint/plugin.json` exists with `key: "lint"`, `socket: "pre-commit"`, and `run` set to the detected command chain.
|
|
59
|
+
- The `run` command exits non-zero on any lint/format/typecheck failure.
|
|
88
60
|
|
|
89
61
|
## Always
|
|
90
62
|
|
|
91
63
|
- Stay read-only. Produce the plan via `submit_plan`; never edit files.
|
|
92
64
|
- The plan must be specific enough for an `implement-plan` run to execute
|
|
93
65
|
without re-deriving the commands — include the actual detected commands in
|
|
94
|
-
the step descriptions and
|
|
95
|
-
- Leave `heartbeat.url` and `heartbeat.token` as placeholders — document that
|
|
96
|
-
the user fills them in via the FlumeCode web UI after installation.
|
|
66
|
+
the step descriptions and artifact content.
|
|
@@ -87,6 +87,10 @@ Field-by-field guidance:
|
|
|
87
87
|
- **`risks`** — anything that could change the approach or surface a problem.
|
|
88
88
|
- **`outOfScope`** — what you are deliberately not doing.
|
|
89
89
|
|
|
90
|
+
**Formatting.** Apply the `# Technical Writing` guidelines from the prompt to all
|
|
91
|
+
free-text fields: wrap code identifiers (function names, variable names, type names,
|
|
92
|
+
file names, commands, and flags) in inline backticks.
|
|
93
|
+
|
|
90
94
|
Cite real files you inspected. Prefer the codebase's existing patterns over
|
|
91
95
|
introducing new ones. Be specific enough that another agent could execute the
|
|
92
96
|
plan without re-deriving it.
|
|
@@ -95,4 +95,6 @@ before you finish. (You don't need to `git add`; the runner stages and commits f
|
|
|
95
95
|
|
|
96
96
|
Your last message **is** the report posted to the session thread. Write it for the
|
|
97
97
|
user: list which files conflicted and, briefly, how you resolved each, plus how you
|
|
98
|
-
verified (build/tests).
|
|
98
|
+
verified (build/tests). Wrap conflicted file names and code identifiers in inline
|
|
99
|
+
backticks per the `# Technical Writing` section. The runner appends the pull-request
|
|
100
|
+
link, so don't add one.
|
|
@@ -60,8 +60,9 @@ essentials:
|
|
|
60
60
|
- **Subagents start blank.** Each Task subagent sees only the prompt you give it —
|
|
61
61
|
not this thread, the plan, or the prior report. Make every prompt self-contained:
|
|
62
62
|
include the specific change requested, the relevant plan/report excerpt, the code
|
|
63
|
-
context,
|
|
64
|
-
|
|
63
|
+
context, the coding guidelines (verbatim, from the `# Coding Guidelines` section
|
|
64
|
+
in the prompt), and — for the report subagent — the technical-writing guidelines
|
|
65
|
+
(verbatim, from the `# Technical Writing` section in the prompt).
|
|
65
66
|
- **Scope the work to the request.** This is a fine-tune of an existing
|
|
66
67
|
implementation, not a rebuild. Change only what the user asked for plus what that
|
|
67
68
|
change strictly requires; don't regress the rest of the plan.
|