@flumecode/runner 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -25,6 +25,10 @@ function writeConfig(config) {
25
25
  writeFileSync(configPath, JSON.stringify(config, null, 2), { mode: 384 });
26
26
  }
27
27
 
28
+ // src/run.ts
29
+ import { existsSync as existsSync4 } from "node:fs";
30
+ import { join as join5 } from "node:path";
31
+
28
32
  // src/version.ts
29
33
  import { readFileSync as readFileSync2 } from "node:fs";
30
34
  import { fileURLToPath } from "node:url";
@@ -373,6 +377,10 @@ async function hasChanges(dir) {
373
377
  const { stdout: stdout2 } = await git(["-C", dir, "status", "--porcelain"]);
374
378
  return stdout2.trim().length > 0;
375
379
  }
380
+ async function gitDiffStat(dir) {
381
+ const { stdout: stdout2 } = await git(["-C", dir, "--no-pager", "diff", "--stat"]);
382
+ return stdout2;
383
+ }
376
384
  var PreCommitError = class extends Error {
377
385
  constructor(log) {
378
386
  super("pre-commit checks failed");
@@ -609,14 +617,30 @@ function parseManifest(raw) {
609
617
 
610
618
  // src/plugins/socket.ts
611
619
  var exec2 = promisify2(execCb);
620
+ var MAX_OUTPUT = 8 * 1024;
621
+ function cap(s) {
622
+ return s.length <= MAX_OUTPUT ? s : s.slice(s.length - MAX_OUTPUT);
623
+ }
624
+ var lastSocketResults = [];
625
+ function resetSocketResults() {
626
+ lastSocketResults = [];
627
+ }
628
+ function getSocketResults() {
629
+ return lastSocketResults;
630
+ }
612
631
  async function runSocket(socketName, dir) {
613
632
  const plugins = (await loadPlugins(dir)).filter((p) => p.socket === socketName);
633
+ const results = [];
614
634
  for (const plugin of plugins) {
615
635
  const result = await runPluginCommand(plugin.run, dir);
616
636
  if (result.exitCode !== 0) {
637
+ results.push({ key: plugin.key, status: "failed", output: cap(result.output) });
638
+ lastSocketResults = results;
617
639
  throw new PreCommitError(`[plugin:${plugin.key}] ${result.output}`);
618
640
  }
641
+ results.push({ key: plugin.key, status: "passed", output: cap(result.output) });
619
642
  }
643
+ lastSocketResults = results;
620
644
  }
621
645
  async function runPluginCommand(command2, cwd) {
622
646
  try {
@@ -707,6 +731,11 @@ function widgetPosted(kind) {
707
731
  // src/plan.ts
708
732
  import { createSdkMcpServer as createSdkMcpServer2, tool as tool2 } from "@anthropic-ai/claude-agent-sdk";
709
733
  import { z as z2 } from "zod";
734
+
735
+ // src/schema-hints.ts
736
+ var INLINE_CODE_HINT = "Wrap code identifiers (function, variable, type, and file names, commands, and flags) in inline backticks, e.g. `getCodingSessionsForRequest`.";
737
+
738
+ // src/plan.ts
710
739
  var SERVER_NAME2 = "flume_plan";
711
740
  var SUBMIT_PLAN = "submit_plan";
712
741
  var PLAN_TOOL_NAME = `mcp__${SERVER_NAME2}__${SUBMIT_PLAN}`;
@@ -717,7 +746,9 @@ var pseudoCodeEntrySchema = z2.object({
717
746
  });
718
747
  var stepSchema = z2.object({
719
748
  title: z2.string().min(1).describe("A concise imperative title for this step."),
720
- description: z2.string().min(1).describe("What changes and why \u2014 the rationale for this step."),
749
+ description: z2.array(z2.string().min(1)).min(1).describe(
750
+ "Bullet points that explain this step's change so a reviewer can judge whether the design is correct. Each array item is one short, self-contained bullet \u2014 not a single paragraph, and not a restatement of the pseudo code. " + INLINE_CODE_HINT
751
+ ),
721
752
  pseudoCode: z2.array(pseudoCodeEntrySchema).optional().describe(
722
753
  "Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
723
754
  )
@@ -727,11 +758,11 @@ var planInputSchema = {
727
758
  "A concise, descriptive name for THIS plan. Must be distinct from the request title and from any sibling plans on the same request. Keep it under 120 characters."
728
759
  ),
729
760
  scope: z2.enum(["feat", "fix", "chore", "docs", "test", "refactor"]).describe("The primary intent of the change."),
730
- goal: z2.string().min(1).describe("One or two sentences stating the outcome."),
761
+ goal: z2.string().min(1).describe("One or two sentences stating the outcome. " + INLINE_CODE_HINT),
731
762
  assumptions: z2.array(z2.string()).describe("Anything decided during planning, including unanswered defaults."),
732
763
  steps: z2.array(stepSchema).min(1).describe("Ordered list of changes. Each step says what and why, with file references."),
733
764
  acceptanceCriteria: z2.array(z2.string().min(1)).min(2).describe(
734
- "Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required."
765
+ "Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required. " + INLINE_CODE_HINT
735
766
  ),
736
767
  risks: z2.array(z2.string()).describe("Anything that could change the approach."),
737
768
  outOfScope: z2.array(z2.string()).describe("What is deliberately not being done.")
@@ -757,7 +788,9 @@ function renderPlan(plan) {
757
788
  lines2.push("");
758
789
  lines2.push(`### ${i + 1}. ${step.title}`);
759
790
  lines2.push("");
760
- lines2.push(step.description);
791
+ for (const bullet of step.description) {
792
+ lines2.push(`- ${bullet}`);
793
+ }
761
794
  if (step.pseudoCode && step.pseudoCode.length > 0) {
762
795
  for (const entry of step.pseudoCode) {
763
796
  lines2.push("");
@@ -827,6 +860,19 @@ function createPlanTooling() {
827
860
  });
828
861
  return { mcpServer, getPlans: () => renderedPlans };
829
862
  }
863
+ function countPlanAcceptanceCriteria(planBody) {
864
+ if (!planBody) return 0;
865
+ const lines2 = planBody.split("\n");
866
+ const start2 = lines2.findIndex((l) => l.trim() === "## Acceptance criteria");
867
+ if (start2 === -1) return 0;
868
+ let count = 0;
869
+ for (let i = start2 + 1; i < lines2.length; i++) {
870
+ const line = lines2[i] ?? "";
871
+ if (line.startsWith("## ")) break;
872
+ if (line.startsWith("- [ ] ")) count++;
873
+ }
874
+ return count;
875
+ }
830
876
 
831
877
  // src/report.ts
832
878
  import { createSdkMcpServer as createSdkMcpServer3, tool as tool3 } from "@anthropic-ai/claude-agent-sdk";
@@ -842,28 +888,28 @@ var STATUS_ICON = {
842
888
  var evidenceSchema = z3.object({
843
889
  file: z3.string().min(1).describe("Repo-relative path the hunk comes from."),
844
890
  hunk: z3.string().min(1).describe(
845
- "A unified-diff hunk body proving the criterion \u2014 the lines that matter, not the whole file. Rendered verbatim as a ```diff block."
891
+ "A unified-diff hunk proving the criterion \u2014 the lines that matter, not the whole file. MUST keep the `@@ -a,b +c,d @@` hunk header line(s) exactly as they appear in `git --no-pager diff`; the report renders file line numbers from them. Rendered verbatim as a ```diff block."
846
892
  ),
847
893
  note: z3.string().optional().describe("Optional one-line explanation of why this hunk satisfies the criterion.")
848
894
  });
849
895
  var acVerdictSchema = z3.object({
850
896
  criterion: z3.string().min(1).describe("The acceptance-criterion text, verbatim from the plan."),
851
897
  status: z3.enum(["met", "not_met", "unclear"]).describe("Verdict for this criterion, verified against the actual diff."),
852
- rationale: z3.string().min(1).describe("One or two sentences on why the verdict holds."),
898
+ rationale: z3.string().min(1).describe("One or two sentences on why the verdict holds. " + INLINE_CODE_HINT),
853
899
  evidence: z3.array(evidenceSchema).describe(
854
900
  "Diff hunks proving the verdict, copied verbatim from git --no-pager diff. Across ALL criteria the evidence must collectively cover every hunk in the diff \u2014 each changed hunk appears under at least one criterion. Cite the relevant hunk(s) for a met criterion; may be empty for not_met / unclear."
855
901
  )
856
902
  });
857
903
  var reportInputSchema = {
858
- summary: z3.string().min(1).describe("One or two sentences on what was implemented."),
904
+ summary: z3.string().min(1).describe("One or two sentences on what was implemented. " + INLINE_CODE_HINT),
859
905
  filesChanged: z3.string().min(1).describe(
860
906
  "Markdown: the list of files changed (from the diff). Rendered under '## Files changed'."
861
907
  ),
862
908
  codeQuality: z3.string().min(1).describe(
863
- "Markdown: the code-quality review outcome and anything left as nice-to-have. Rendered under '## Code quality'."
909
+ "Markdown: the code-quality review outcome and anything left as nice-to-have. Rendered under '## Code quality'. " + INLINE_CODE_HINT
864
910
  ),
865
911
  caveats: z3.string().min(1).describe(
866
- "Markdown: anything deferred, unmet, or worth a human's eyes, incl. diff hunks that map to no plan AC. Write 'None.' if nothing. Rendered under '## Caveats / follow-ups'."
912
+ "Markdown: anything deferred, unmet, or worth a human's eyes, incl. diff hunks that map to no plan AC. Write 'None.' if nothing. Rendered under '## Caveats / follow-ups'. " + INLINE_CODE_HINT
867
913
  ),
868
914
  acceptanceCriteria: z3.array(acVerdictSchema).min(1).describe(
869
915
  "One entry per acceptance criterion from the plan, in plan order, each with a verdict and the diff evidence behind it."
@@ -1081,6 +1127,10 @@ function stripFrontMatter(raw) {
1081
1127
  }
1082
1128
 
1083
1129
  // src/prompt.ts
1130
+ function appendRule(lines2, intro, ruleName) {
1131
+ lines2.push("", intro, "", loadRule(ruleName));
1132
+ }
1133
+ var WRITING_INTRO = "These technical-writing guidelines apply to the plan and report prose you author in this run:";
1084
1134
  function turnHeading(turn, agentName) {
1085
1135
  if (turn.role === "user") return "User";
1086
1136
  if (turn.failed) return `${agentName} (this run ended in an error)`;
@@ -1114,6 +1164,7 @@ function buildPrompt(ctx) {
1114
1164
  loadRule("coding-guideline")
1115
1165
  );
1116
1166
  }
1167
+ appendRule(lines2, WRITING_INTRO, "technical-writing");
1117
1168
  lines2.push("", `# Request: ${ctx.request?.title ?? ""}`);
1118
1169
  if (ctx.request?.body) {
1119
1170
  lines2.push("", ctx.request.body);
@@ -1140,6 +1191,10 @@ function buildRevisePrompt(ctx) {
1140
1191
  "",
1141
1192
  loadRule("coding-guideline"),
1142
1193
  "",
1194
+ WRITING_INTRO,
1195
+ "",
1196
+ loadRule("technical-writing"),
1197
+ "",
1143
1198
  `# Plan: ${ctx.request?.title ?? ""}`
1144
1199
  ];
1145
1200
  if (ctx.request?.body) {
@@ -1166,6 +1221,10 @@ function buildResolvePrompt(ctx, related = []) {
1166
1221
  "",
1167
1222
  loadRule("coding-guideline"),
1168
1223
  "",
1224
+ WRITING_INTRO,
1225
+ "",
1226
+ loadRule("technical-writing"),
1227
+ "",
1169
1228
  `# Plan: ${ctx.request?.title ?? ""}`
1170
1229
  ];
1171
1230
  if (ctx.request?.body) {
@@ -1190,7 +1249,7 @@ function buildResolvePrompt(ctx, related = []) {
1190
1249
  );
1191
1250
  return lines2.join("\n");
1192
1251
  }
1193
- function buildDocumentPrompt(ctx) {
1252
+ function buildDocumentPrompt(ctx, changedFiles) {
1194
1253
  const lines2 = [
1195
1254
  `You are "${ctx.agentName}" maintaining the repository wiki for ${ctx.repo.fullName}.`,
1196
1255
  `An implementation just ran in this working directory to satisfy the request below; its changes are uncommitted in the working tree.`,
@@ -1202,6 +1261,14 @@ function buildDocumentPrompt(ctx) {
1202
1261
  lines2.push("", ctx.request.body);
1203
1262
  }
1204
1263
  appendThread(lines2, ctx);
1264
+ if (changedFiles && changedFiles.trim()) {
1265
+ lines2.push(
1266
+ "",
1267
+ "Files changed by this implementation (reconcile only the wiki pages these affect \u2014 do not re-survey the whole repo):",
1268
+ "",
1269
+ changedFiles.trim()
1270
+ );
1271
+ }
1205
1272
  lines2.push("", "When done, reply with a one- or two-line summary of the wiki changes you made.");
1206
1273
  return lines2.join("\n");
1207
1274
  }
@@ -1464,13 +1531,15 @@ async function processChatJob(ctx, dir, config, abort) {
1464
1531
  console.log(` \u2026job ${ctx.jobId} posted ${result.widgets.length} widget(s); awaiting reply`);
1465
1532
  return { text: reply, widgets: result.widgets };
1466
1533
  }
1534
+ const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
1467
1535
  let documented = false;
1468
- if (ctx.permissionMode !== "plan" && await hasChanges(dir)) {
1536
+ if (ctx.permissionMode !== "plan" && wikiExists && await hasChanges(dir)) {
1469
1537
  try {
1538
+ const changedFiles = await gitDiffStat(dir);
1470
1539
  console.log(` \u2026updating wiki for job ${ctx.jobId}`);
1471
1540
  await runClaudeCode({
1472
1541
  cwd: dir,
1473
- prompt: buildDocumentPrompt(ctx),
1542
+ prompt: buildDocumentPrompt(ctx, changedFiles),
1474
1543
  permissionMode: ctx.permissionMode,
1475
1544
  maxTurns: DOCUMENT_MAX_TURNS,
1476
1545
  abortController: abort
@@ -1479,6 +1548,8 @@ async function processChatJob(ctx, dir, config, abort) {
1479
1548
  } catch (err) {
1480
1549
  console.warn(` wiki update skipped: ${errorMessage2(err)}`);
1481
1550
  }
1551
+ } else if (ctx.permissionMode !== "plan" && !wikiExists) {
1552
+ console.log(` no .flumecode/wiki \u2014 skipping wiki reconcile for ${ctx.jobId}`);
1482
1553
  }
1483
1554
  const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, config, abort);
1484
1555
  reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, documented, autoMerged });
@@ -1487,12 +1558,24 @@ async function processChatJob(ctx, dir, config, abort) {
1487
1558
  function reportClaimsWork(report) {
1488
1559
  return !!report && report.acceptanceCriteria.some((ac) => ac.status === "met" && ac.evidence.length > 0);
1489
1560
  }
1561
+ function reportMeetsAcContract(report, expectedAcCount) {
1562
+ if (expectedAcCount === 0) return true;
1563
+ if (!report) return false;
1564
+ return report.acceptanceCriteria.length === expectedAcCount;
1565
+ }
1566
+ function buildAcWarningBanner(report, expectedAcCount) {
1567
+ if (!report)
1568
+ return "> \u26A0\uFE0F **Unverified AC review** \u2014 the implementation did not submit a structured report, so its acceptance-criteria review could not be checked against the plan.";
1569
+ return `> \u26A0\uFE0F **AC review may be incomplete** \u2014 the plan has ${expectedAcCount} acceptance criteria but the report reviewed ${report.acceptanceCriteria.length}.`;
1570
+ }
1490
1571
  async function processImplementJob(ctx, dir, resumed, config, abort) {
1491
1572
  console.log(`
1492
1573
  \u25B6 Implement ${ctx.jobId} \u2014 ${ctx.repo.fullName}: "${jobTitle(ctx)}"`);
1493
1574
  const installResult = await installDependencies(dir);
1575
+ const expectedAcCount = countPlanAcceptanceCriteria(ctx.request?.body);
1494
1576
  let report;
1495
1577
  let reply;
1578
+ let warningBanner = "";
1496
1579
  for (let attempt = 0; ; attempt++) {
1497
1580
  const result = await runClaudeCode({
1498
1581
  cwd: dir,
@@ -1504,28 +1587,48 @@ async function processImplementJob(ctx, dir, resumed, config, abort) {
1504
1587
  });
1505
1588
  report = result.report ?? void 0;
1506
1589
  reply = (report ? renderReport(report) : result.text.trim()) || "(the agent produced no report)";
1507
- if (abort.signal.aborted || !reportClaimsWork(report) || await hasChanges(dir)) break;
1508
- if (attempt >= MAX_IMPLEMENT_RETRIES) {
1590
+ if (abort.signal.aborted) {
1591
+ warningBanner = "";
1592
+ break;
1593
+ }
1594
+ const treeChanged = await hasChanges(dir);
1595
+ const phantom = reportClaimsWork(report) && !treeChanged;
1596
+ const acProblem = !reportMeetsAcContract(report, expectedAcCount);
1597
+ if (!phantom && !acProblem) {
1598
+ warningBanner = "";
1599
+ break;
1600
+ }
1601
+ if (attempt < MAX_IMPLEMENT_RETRIES) {
1602
+ console.warn(
1603
+ ` implement ${ctx.jobId}: ${phantom ? "report claims changes but the working tree is clean" : "AC-review contract failed"} \u2014 re-running implementation (attempt ${attempt + 2})`
1604
+ );
1605
+ continue;
1606
+ }
1607
+ if (phantom) {
1509
1608
  throw new Error(
1510
1609
  `Implementation reported completed work (acceptance criteria met with diff evidence) but the working tree is clean after ${attempt + 1} attempt(s) \u2014 no changes were persisted, so no pull request could be opened.`
1511
1610
  );
1512
1611
  }
1513
- console.warn(
1514
- ` implement ${ctx.jobId}: report claims changes but the working tree is clean \u2014 re-running implementation (attempt ${attempt + 2})`
1515
- );
1612
+ warningBanner = buildAcWarningBanner(report, expectedAcCount);
1613
+ break;
1516
1614
  }
1615
+ if (warningBanner) reply = `${warningBanner}
1616
+
1617
+ ${reply}`;
1517
1618
  if (installResult.status === "failed") {
1518
1619
  reply += `
1519
1620
 
1520
1621
  > \u26A0\uFE0F Dependencies failed to install (\`${installResult.manager}\`); tests may not have run.`;
1521
1622
  }
1623
+ const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
1522
1624
  let documented = false;
1523
- if (await hasChanges(dir)) {
1625
+ if (wikiExists && await hasChanges(dir)) {
1524
1626
  try {
1627
+ const changedFiles = await gitDiffStat(dir);
1525
1628
  console.log(` \u2026updating wiki for implement ${ctx.jobId}`);
1526
1629
  await runClaudeCode({
1527
1630
  cwd: dir,
1528
- prompt: buildDocumentPrompt(ctx),
1631
+ prompt: buildDocumentPrompt(ctx, changedFiles),
1529
1632
  permissionMode: ctx.permissionMode,
1530
1633
  maxTurns: DOCUMENT_MAX_TURNS,
1531
1634
  abortController: abort
@@ -1534,15 +1637,19 @@ async function processImplementJob(ctx, dir, resumed, config, abort) {
1534
1637
  } catch (err) {
1535
1638
  console.warn(` wiki update skipped: ${errorMessage2(err)}`);
1536
1639
  }
1640
+ } else if (!wikiExists) {
1641
+ console.log(` no .flumecode/wiki \u2014 skipping wiki reconcile for ${ctx.jobId}`);
1537
1642
  }
1538
1643
  const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, config, abort, {
1539
1644
  rebase: !resumed
1540
1645
  });
1541
1646
  reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, documented, autoMerged });
1647
+ const lintPlugins = getSocketResults();
1648
+ const finalReport = report && lintPlugins.length ? { ...report, lint: { plugins: lintPlugins } } : report;
1542
1649
  return {
1543
1650
  text: reply,
1544
1651
  widgets: [],
1545
- ...report ? { report } : {},
1652
+ ...finalReport ? { report: finalReport } : {},
1546
1653
  ...outcome.kind === "pr" ? { pr: outcome.pr } : {}
1547
1654
  };
1548
1655
  }
@@ -1570,13 +1677,15 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
1570
1677
  console.log(` \u2026revise ${ctx.jobId} posted ${result.widgets.length} widget(s); awaiting reply`);
1571
1678
  return { text: reply, widgets: result.widgets };
1572
1679
  }
1680
+ const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
1573
1681
  let documented = false;
1574
- if (await hasChanges(dir)) {
1682
+ if (wikiExists && await hasChanges(dir)) {
1575
1683
  try {
1684
+ const changedFiles = await gitDiffStat(dir);
1576
1685
  console.log(` \u2026updating wiki for revise ${ctx.jobId}`);
1577
1686
  await runClaudeCode({
1578
1687
  cwd: dir,
1579
- prompt: buildDocumentPrompt(ctx),
1688
+ prompt: buildDocumentPrompt(ctx, changedFiles),
1580
1689
  permissionMode: ctx.permissionMode,
1581
1690
  maxTurns: DOCUMENT_MAX_TURNS,
1582
1691
  abortController: abort
@@ -1585,6 +1694,8 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
1585
1694
  } catch (err) {
1586
1695
  console.warn(` wiki update skipped: ${errorMessage2(err)}`);
1587
1696
  }
1697
+ } else if (!wikiExists) {
1698
+ console.log(` no .flumecode/wiki \u2014 skipping wiki reconcile for ${ctx.jobId}`);
1588
1699
  }
1589
1700
  const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, config, abort, {
1590
1701
  rebase: !resumed
@@ -1725,6 +1836,7 @@ async function pollLoop(config) {
1725
1836
  scheduleCancelPoll();
1726
1837
  try {
1727
1838
  resetUsage();
1839
+ resetSocketResults();
1728
1840
  const { text, widgets, pr, plans, report } = await processJob(ctx, config, abort);
1729
1841
  const usage = getUsage();
1730
1842
  await reportJob(config, ctx.jobId, {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flumecode/runner",
3
- "version": "0.13.0",
3
+ "version": "0.15.0",
4
4
  "type": "module",
5
5
  "description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
6
6
  "bin": {
@@ -0,0 +1,14 @@
1
+ ---
2
+ name: technical-writing
3
+ description: >-
4
+ Inline-code conventions for agent-authored plan and report prose: wrap code
5
+ identifiers in backticks so they render as inline code.
6
+ ---
7
+
8
+ # Technical Writing
9
+
10
+ ## Inline code
11
+
12
+ Wrap code identifiers — function names, variable names, type names, file names, commands, and flags — in inline backticks so they render as inline code. For example: `getCodingSessionsForRequest`, not getCodingSessionsForRequest.
13
+
14
+ This convention applies to all free-text fields in plans and reports: goals, step descriptions, acceptance criteria, summaries, code-quality notes, and caveats.
@@ -0,0 +1,64 @@
1
+ ---
2
+ name: format-code-plugin-generator
3
+ description: >-
4
+ Generate a concrete plan to install the FlumeCode Format plugin for THIS repo —
5
+ a .flumecode/plugins/format-code/ manifest wired to the pre-commit socket that
6
+ auto-formats code (prettier --write) so changes ride into the commit.
7
+ ---
8
+
9
+ # format-code-plugin-generator
10
+
11
+ You generate a concrete, repo-specific plan to install the FlumeCode Format
12
+ plugin. You work **read-only**: inspect the repo and produce a plan via
13
+ `submit_plan`; never edit files.
14
+
15
+ ## Orient yourself first
16
+
17
+ Before producing the plan, inspect:
18
+
19
+ 1. `.flumecode/wiki/README.md` and `components/plugins.md` (if present) for context.
20
+ 2. `package.json` `scripts` — look for `format`, `format:write`, `prettier` references.
21
+ 3. `.prettierrc*` — confirm Prettier is configured.
22
+ 4. `.husky/pre-commit` — find the existing formatting step this plugin replaces.
23
+
24
+ From this, determine the **exact shell command** the `run` script should execute
25
+ (e.g. `pnpm format`). Do not hard-code — derive from the repo.
26
+
27
+ ## Produce the plan
28
+
29
+ Call `submit_plan` **once**, passing a `plans` array with one entry whose steps
30
+ instruct the implementer to create:
31
+
32
+ ### Artifact — `.flumecode/plugins/format-code/plugin.json`
33
+
34
+ ```json
35
+ {
36
+ "key": "format-code",
37
+ "socket": "pre-commit",
38
+ "run": "<detected format write command>"
39
+ }
40
+ ```
41
+
42
+ Derive `run` from the repo's detected commands (e.g. `pnpm format`). Do not hard-code — include the actual commands discovered in the Orient step.
43
+
44
+ ### Manifest shape
45
+
46
+ The manifest `plugin.json` must have exactly these fields:
47
+
48
+ ```
49
+ { key, socket, run }
50
+ ```
51
+
52
+ This is the shape the FlumeCode plugin loader expects.
53
+
54
+ ### Acceptance criteria the plan must include
55
+
56
+ - `.flumecode/plugins/format-code/plugin.json` exists with `key: "format-code"`, `socket: "pre-commit"`, and `run` set to the detected write-format command.
57
+ - The `run` command reformats files in place and exits 0; reformatted files are staged and included in the commit.
58
+
59
+ ## Always
60
+
61
+ - Stay read-only. Produce the plan via `submit_plan`; never edit files.
62
+ - The plan must be specific enough for an `implement-plan` run to execute
63
+ without re-deriving the commands — include the actual detected commands in
64
+ the step descriptions and artifact content.
@@ -43,6 +43,9 @@ put it in the prompt, the subagent doesn't have it.
43
43
  - **Coding guidelines.** This prompt contains a `# Coding Guidelines` section.
44
44
  Copy it verbatim into the prompt of the implementation subagent and the
45
45
  code-quality-review subagent so they hold the work to it.
46
+ - **Technical-writing guidelines.** This prompt contains a `# Technical Writing`
47
+ section. Copy it verbatim into the prompt of the report subagent so it applies
48
+ the inline-code conventions to all free-text fields it authors.
46
49
 
47
50
  ## Inputs
48
51
 
@@ -58,32 +61,39 @@ the next step.
58
61
 
59
62
  1. **Orient.** Read the plan/request and the FlumeCode wiki (if any) enough to
60
63
  write good task prompts. Extract the **Steps** and the **Acceptance criteria
61
- (ACs)**. Do not implement.
64
+ (ACs)**. Also discover the project's verification commands by checking these
65
+ sources in order: `package.json` scripts (look for `build`, `typecheck`,
66
+ `lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/` page that mentions
67
+ commands, and `Makefile`. Capture the explicit command list; you will include
68
+ it in the prompts you write for the Implement, Verify, and Fix-loop subagents
69
+ so none of them re-derive it. Do not implement.
62
70
 
63
71
  2. **Implement** — Task, `model: "sonnet"`. Give the subagent: the plan steps, a
64
- pointer to the wiki/orientation, and the coding guidelines (verbatim). Tell it
65
- to make all the code changes in the working tree to satisfy the plan, then
66
- self-verify by discovering and running the project's verification commands
67
- checking these sources in order: `package.json` scripts (look for `build`,
68
- `typecheck`, `lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/` page that
69
- mentions commands, and `Makefile`. Use whatever is present and appropriate for
70
- this repo; do not hardcode specific command strings. Run each discovered
71
- command and fix any errors that the edits introduced before returning. If no
72
- build/test setup exists in this repo, note that and move on do not fail. End
73
- by reporting: the verification commands it ran and their pass/fail results,
74
- which files it changed, and how each plan step was addressed. It must not
75
- commit or push.
72
+ pointer to the wiki/orientation, the coding guidelines (verbatim), and the
73
+ explicit verification command list the orchestrator discovered in the Orient
74
+ step. Tell it to make all the code changes in the working tree to satisfy the
75
+ plan, then self-verify by running the verification commands the orchestrator
76
+ already discovered and passed in the task prompt. If the orchestrator did not
77
+ provide a list (e.g. could not determine commands confidently), fall back to
78
+ discovering them from the same sources: `package.json` scripts (look for
79
+ `build`, `typecheck`, `lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/`
80
+ page that mentions commands, and `Makefile`. Run each command and fix any
81
+ errors that the edits introduced before returning. If no build/test setup
82
+ exists in this repo, note that and move on do not fail. End by reporting:
83
+ the verification commands it ran and their pass/fail results, which files it
84
+ changed, and how each plan step was addressed. It must not commit or push.
76
85
 
77
86
  3. **Verify (build & tests)** — Task, `model: "sonnet"`, read-only. This step
78
87
  gives the orchestrator an objective, independent build/test signal before the
79
88
  subjective AC and quality reviews. Tell the subagent to:
80
- - Discover the project's verification commands from `package.json` scripts
81
- (look for `build`, `typecheck`, `lint`, `test`), `CLAUDE.md`,
82
- `.flumecode/wiki/` (any page that mentions commands), and `Makefile`. Use
83
- what is present; do not hardcode specific command strings.
84
- - Run each discovered command and record: the exact command, whether it passed
85
- or failed, and for any failure a short excerpt of the failing output
86
- (enough to diagnose the problem).
89
+ - Run the verification commands provided by the orchestrator in the task
90
+ prompt. If none were provided, fall back to discovering them from
91
+ `package.json` scripts (look for `build`, `typecheck`, `lint`, `test`),
92
+ `CLAUDE.md`, `.flumecode/wiki/` (any page that mentions commands), and
93
+ `Makefile`.
94
+ - Run each command and record: the exact command, whether it passed or failed,
95
+ and for any failure — a short excerpt of the failing output (enough to
96
+ diagnose the problem).
87
97
  - If no build/test setup exists in this repo, say so explicitly and pass the
88
98
  gate.
89
99
  - Return a structured per-check result: command, pass/fail, failing-output
@@ -94,19 +104,18 @@ the next step.
94
104
  subagent the full AC list and tell it to verify each one against the actual
95
105
  changes (run `git --no-pager diff`, read the changed files, run tests/build if
96
106
  useful). For **each** AC it must return: the criterion text verbatim, a verdict
97
- (**met / not met / unclear**), a one-or-two-sentence rationale, and this is the
98
- evidence the report needs the **exact diff hunk(s)** that prove it, each tagged
99
- with its file path (the hunks that prove it, copied verbatim from
100
- `git --no-pager diff`, such that the union of every AC's evidence covers the
101
- entire diff each changed hunk cited under at least one criterion). A _met_ AC should cite at least one
102
- hunk; _not met_ / _unclear_ may cite none. **Ground every verdict in the actual
103
- diff:** a criterion may be marked _met_ only if `git --no-pager diff` really
104
- contains the change that satisfies it, and each cited hunk must be copied verbatim
105
- from that live output never reconstructed from the plan or from what the
106
- implement subagent claimed. If `git --no-pager diff` is empty, the implementation
107
- produced no changes: no criterion may be _met_, and the review must say so. Tell it
108
- to return this as a clean, structured list so you can hand it straight to the
109
- report step. In addition to per-AC verdicts, cross-check that every hunk in `git --no-pager diff` is cited by at least one AC's evidence; report any uncovered hunk as a coverage gap (signalling a missing AC or an out-of-scope change).
107
+ (**met / not met / unclear**), a one-or-two-sentence rationale, and the relevant
108
+ file paths and change locations that support the verdict. A _met_ AC should cite
109
+ at least one [file/location]; _not met_ / _unclear_ may cite none. **Ground
110
+ every verdict in the actual diff:** a criterion may be marked _met_ only if
111
+ `git --no-pager diff` really contains the change that satisfies it never
112
+ reconstruct from the plan or from what the implement subagent claimed. If
113
+ `git --no-pager diff` is empty, the implementation produced no changes: no
114
+ criterion may be _met_, and the review must say so. Tell it to return this as a
115
+ clean, structured list so you can hand it straight to the report step. In
116
+ addition to per-AC verdicts, cross-check `git --no-pager diff` against the ACs;
117
+ note any files or areas that appear changed but don't map to any AC as a coverage
118
+ gap (signalling a missing AC or an out-of-scope change).
110
119
 
111
120
  5. **Code-quality review** — Task, `model: "opus"`, read-only. Give the subagent
112
121
  the coding guidelines (verbatim) and tell it to review the changes for
@@ -117,26 +126,36 @@ the next step.
117
126
  review (step 4) reports any _not met_ AC, or the quality review (step 5)
118
127
  reports any _must-fix_ finding: spawn an **Implement/fix** subagent (Task,
119
128
  `model: "sonnet"`) whose prompt lists exactly those findings and tells it to
120
- resolve them without regressing the rest. When a Verify failure triggered the
121
- fix, include the failing command(s) and their error output excerpt(s) from the
122
- Verify result in the fix subagent's prompt so it has the full context. After
123
- each fix iteration, re-run the Verify step (step 3) in addition to any AC or
124
- quality review that failed. Repeat at most **2** times. If something still
125
- fails after that, stop looping and record the gap honestly in the report do
126
- not hide it.
127
-
128
- 7. **Report** — Task, `model: "opus"`, read-only. Give the subagent the plan, the
129
- Verify results (from step 3), the AC verdicts (from step 4), and the quality
130
- findings, and tell it to run `git --no-pager diff` itself as the **single
131
- source of truth** for the report. Every `evidence` hunk it submits must be
132
- copied verbatim from that live diff it must drop or correct any hunk carried
129
+ resolve them without regressing the rest. Include the verification command list
130
+ from the Orient step in the fix subagent's prompt (the same list passed to
131
+ Implement and Verify), so the fix subagent does not need to re-derive it. When
132
+ a Verify failure triggered the fix, include the failing command(s) and their
133
+ error output excerpt(s) from the Verify result in the fix subagent's prompt so
134
+ it has the full context. After each fix iteration, re-run the Verify step (step 3) in addition to any AC or quality review that failed. Repeat at most **2**
135
+ times. If something still fails after that, stop looping and record the gap
136
+ honestly in the report — do not hide it.
137
+
138
+ 7. **Report** Task, `model: "opus"`, read-only. Give the subagent the AC
139
+ verdicts (with criterion text, from step 4), the Verify results (from step 3),
140
+ and the quality findings, and tell it to run `git --no-pager diff` itself as
141
+ the **single source of truth** for the report. Do not pass the full plan — the
142
+ AC verdicts carry each criterion verbatim, and the live `git --no-pager diff`
143
+ is the authoritative source for evidence; re-inlining the full plan is
144
+ redundant. Keep each subagent prompt to the minimal self-contained slice it
145
+ needs. Include the `# Technical Writing` section
146
+ (copied verbatim from this prompt) in the report subagent's prompt — the same
147
+ way `# Coding Guidelines` is forwarded to implementation subagents — so it
148
+ applies the inline-code conventions to all free-text fields it authors. Every `evidence` hunk it submits must be
149
+ copied verbatim from that live diff, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them) — it must drop or correct any hunk carried
133
150
  over from step 4 that no longer appears in the actual diff, and the **Files
134
151
  changed** list must come from `git --no-pager diff --stat`, not from what an
135
152
  earlier subagent claimed. Tell it to enumerate all hunks from `git --no-pager diff` and ensure each is attached to ≥1 AC's `evidence`; any hunk mapping to no plan AC goes under `## Caveats / follow-ups` as an explicit unattributed change. **If `git --no-pager diff` is empty, the
136
153
  implementation changed nothing:** the report must say so plainly — an honest
137
154
  `summary`, no AC marked `met` with evidence — and must never describe edits
138
155
  that aren't in the diff. Tell it to submit the user-facing report by calling
139
- the **`submit_report`** tool — it has that tool available. It must call
156
+ the **`submit_report`** tool — it has that tool available. The report MUST be
157
+ submitted via `submit_report` (structured); final assistant prose is only a
158
+ last-resort fallback and will be flagged as an unverified AC review. It must call
140
159
  `submit_report` exactly once and must not edit any files.
141
160
 
142
161
  8. **Confirm and end.** Once the report subagent has called `submit_report`, you are
@@ -152,12 +171,14 @@ The report subagent calls `submit_report` with these fields:
152
171
  - **`filesChanged`** — markdown list of files changed (from the diff). Rendered under `## Files changed`.
153
172
  - **`codeQuality`** — the code-quality review outcome and anything left as nice-to-have. Rendered under `## Code quality`.
154
173
  - **`caveats`** — anything deferred, unmet, or worth a human's eyes, including diff hunks that map to no plan AC. Write 'None.' if nothing. Rendered under `## Caveats / follow-ups`.
155
- - **`acceptanceCriteria`** — one entry per AC from the plan, in plan order, each:
174
+ - **`acceptanceCriteria`** — EXACTLY one entry per AC from the plan (same count and
175
+ order). The runner counts the plan's ACs and warns on any mismatch — do not merge,
176
+ split, drop, or invent criteria. Each entry:
156
177
  - `criterion` — the AC text verbatim.
157
178
  - `status` — `"met"` / `"not_met"` / `"unclear"`, mirroring the AC review.
158
179
  - `rationale` — one or two sentences on why the verdict holds.
159
180
  - `evidence` — an array of `{ file, hunk, note? }`, where `hunk` is copied
160
- verbatim from the live `git --no-pager diff` and proves the verdict (`note`
181
+ verbatim from the live `git --no-pager diff`, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them), and proves the verdict (`note`
161
182
  optionally explains it). Never include a hunk that isn't in the actual diff. Cite
162
183
  the supporting hunk(s) for a met criterion; may be empty for not_met / unclear.
163
184
 
@@ -173,4 +194,10 @@ The report subagent calls `submit_report` with these fields:
173
194
  once — not as prose for you to echo. Each acceptance criterion carries the diff
174
195
  hunk(s) that prove its verdict, copied verbatim from the live `git --no-pager diff`
175
196
  — never fabricated. An empty diff means an honest "nothing changed" report.
197
+ - The report MUST be submitted via `submit_report` (structured). Final assistant prose
198
+ is only a last-resort fallback and will be flagged as an unverified AC review by the
199
+ runner.
200
+ - `acceptanceCriteria` must have EXACTLY one entry per plan acceptance criterion (same
201
+ count and order). The runner counts the plan's ACs and warns on any mismatch, so do
202
+ not merge, split, drop, or invent criteria.
176
203
  - The report exists so the human reviewer can verify each acceptance criterion is satisfied — the ACs and their diff evidence are the primary review surface.
@@ -3,7 +3,7 @@ name: lint-plugin-generator
3
3
  description: >-
4
4
  Generate a concrete plan to install the FlumeCode Lint plugin for THIS repo —
5
5
  a .flumecode/plugins/lint/ manifest wired to the pre-commit socket that runs
6
- the repo's lint/format checks and reports a heartbeat.
6
+ the repo's lint/format checks.
7
7
  ---
8
8
 
9
9
  # lint-plugin-generator
@@ -37,60 +37,30 @@ instruct the implementer to create:
37
37
  {
38
38
  "key": "lint",
39
39
  "socket": "pre-commit",
40
- "run": "node .flumecode/plugins/lint/run.mjs",
41
- "heartbeat": {
42
- "url": "https://<flumecode-base-url>/api/runner/plugins/heartbeat",
43
- "token": "<repo-scoped-token>"
44
- }
40
+ "run": "<detected lint/format/typecheck command chain>"
45
41
  }
46
42
  ```
47
43
 
48
- `url` and `token` are placeholdersnote in the plan that the user must fill
49
- them in via the FlumeCode web UI after installation. The plan must **not**
50
- commit a real token value.
51
-
52
- ### Artifact 2 — `.flumecode/plugins/lint/run.mjs`
53
-
54
- A Node.js ES module that:
55
-
56
- 1. Reads `plugin.json` from the same directory to get `heartbeat.url` and `heartbeat.token`.
57
- 2. Determines the current git branch (`git rev-parse --abbrev-ref HEAD`).
58
- 3. Runs each detected lint/format/typecheck command with `child_process.execSync` (stdio: `inherit`).
59
- 4. On success, POSTs to the heartbeat URL:
60
- `{ repoId: process.env.FLUMECODE_REPO_ID, pluginKey: "lint", branch, status: "pass", timestamp: new Date().toISOString() }`
61
- (`FLUMECODE_REPO_ID` — the runner will inject this in Plan 2; if not yet available, the heartbeat may omit repoId or read it from .flumecode/config.json)
62
- 5. On any command failure, exits non-zero (and optionally POSTs `status: "fail"`).
63
-
64
- The `repoId` comes from the `FLUMECODE_REPO_ID` environment variable that the
65
- runner sets. The heartbeat request uses `Authorization: Bearer <token>`.
44
+ Derive `run` from the repo's detected commands (e.g. `pnpm exec lint-staged && pnpm lint && pnpm typecheck && pnpm test`). Do not hard-code include the actual commands discovered in the Orient step.
66
45
 
67
46
  ### Manifest shape
68
47
 
69
48
  The manifest `plugin.json` must have exactly these fields:
70
49
 
71
50
  ```
72
- { key, socket, run, heartbeat: { url, token } }
51
+ { key, socket, run }
73
52
  ```
74
53
 
75
54
  This is the shape the FlumeCode plugin loader expects.
76
55
 
77
- ### Heartbeat endpoint
78
-
79
- `POST /api/runner/plugins/heartbeat` with JSON body:
80
- `{ repoId, pluginKey, branch, status, timestamp }`
81
- (this endpoint does not exist yet — it will be created by Plan 2; include this as a step in the generated plan or as a prerequisite note)
82
-
83
56
  ### Acceptance criteria the plan must include
84
57
 
85
- - `.flumecode/plugins/lint/plugin.json` exists with `key: "lint"`, `socket: "pre-commit"`, `run: "node .flumecode/plugins/lint/run.mjs"`.
86
- - `.flumecode/plugins/lint/run.mjs` runs the repo's detected lint/format/typecheck commands and exits non-zero on any failure.
87
- - A successful run POSTs a heartbeat with `{ repoId, pluginKey: "lint", branch, status: "pass", timestamp }`.
58
+ - `.flumecode/plugins/lint/plugin.json` exists with `key: "lint"`, `socket: "pre-commit"`, and `run` set to the detected command chain.
59
+ - The `run` command exits non-zero on any lint/format/typecheck failure.
88
60
 
89
61
  ## Always
90
62
 
91
63
  - Stay read-only. Produce the plan via `submit_plan`; never edit files.
92
64
  - The plan must be specific enough for an `implement-plan` run to execute
93
65
  without re-deriving the commands — include the actual detected commands in
94
- the step descriptions and pseudo code.
95
- - Leave `heartbeat.url` and `heartbeat.token` as placeholders — document that
96
- the user fills them in via the FlumeCode web UI after installation.
66
+ the step descriptions and artifact content.
@@ -68,7 +68,7 @@ Field-by-field guidance:
68
68
  unanswered defaults from Phase 1).
69
69
  - **`steps`** — an ordered list. For each step provide:
70
70
  - **`title`** — a concise imperative phrase naming the step (e.g. "Add submit_plan schema to plan.ts").
71
- - **`description`** — what changes and why: the concrete change being made and the rationale for it. Use concrete file references (`path/to/file.ts`) and name the functions/symbols involved.
71
+ - **`description`** — an array of bullet points that help the reviewer understand the upcoming `pseudoCode` and decide whether the plan and design are correct. Each item is a distinct, self-contained point about what is changing and why not a single paragraph, and not a line-by-line restatement of the pseudo code. Use concrete file references (`path/to/file.ts`) and name the functions/symbols involved. Apply inline-code formatting to all identifiers.
72
72
  - **`pseudoCode`** — an array of `{ file, pseudoCode }` entries. Provide an entry for every file the step touches **except** documentation files (SKILL.md, README.md, wiki pages, etc.). `pseudoCode` is optional in the schema but expected for all non-documentation files. Each entry names the file path and contains pseudo code that precisely describes the changes to make in that file.
73
73
  - **`acceptanceCriteria`** — **required; at least 2 items.** Each criterion must
74
74
  be a concrete, deterministically-checkable condition that a third party can verify
@@ -87,6 +87,10 @@ Field-by-field guidance:
87
87
  - **`risks`** — anything that could change the approach or surface a problem.
88
88
  - **`outOfScope`** — what you are deliberately not doing.
89
89
 
90
+ **Formatting.** Apply the `# Technical Writing` guidelines from the prompt to all
91
+ free-text fields: wrap code identifiers (function names, variable names, type names,
92
+ file names, commands, and flags) in inline backticks.
93
+
90
94
  Cite real files you inspected. Prefer the codebase's existing patterns over
91
95
  introducing new ones. Be specific enough that another agent could execute the
92
96
  plan without re-deriving it.
@@ -95,4 +95,6 @@ before you finish. (You don't need to `git add`; the runner stages and commits f
95
95
 
96
96
  Your last message **is** the report posted to the session thread. Write it for the
97
97
  user: list which files conflicted and, briefly, how you resolved each, plus how you
98
- verified (build/tests). The runner appends the pull-request link, so don't add one.
98
+ verified (build/tests). Wrap conflicted file names and code identifiers in inline
99
+ backticks per the `# Technical Writing` section. The runner appends the pull-request
100
+ link, so don't add one.
@@ -60,8 +60,9 @@ essentials:
60
60
  - **Subagents start blank.** Each Task subagent sees only the prompt you give it —
61
61
  not this thread, the plan, or the prior report. Make every prompt self-contained:
62
62
  include the specific change requested, the relevant plan/report excerpt, the code
63
- context, and the coding guidelines (verbatim, from the `# Coding Guidelines`
64
- section in the prompt).
63
+ context, the coding guidelines (verbatim, from the `# Coding Guidelines` section
64
+ in the prompt), and — for the report subagent — the technical-writing guidelines
65
+ (verbatim, from the `# Technical Writing` section in the prompt).
65
66
  - **Scope the work to the request.** This is a fine-tune of an existing
66
67
  implementation, not a rebuild. Change only what the user asked for plus what that
67
68
  change strictly requires; don't regress the rest of the plan.