@flumecode/runner 0.12.1 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -25,6 +25,10 @@ function writeConfig(config) {
25
25
  writeFileSync(configPath, JSON.stringify(config, null, 2), { mode: 384 });
26
26
  }
27
27
 
28
+ // src/run.ts
29
+ import { existsSync as existsSync4 } from "node:fs";
30
+ import { join as join5 } from "node:path";
31
+
28
32
  // src/version.ts
29
33
  import { readFileSync as readFileSync2 } from "node:fs";
30
34
  import { fileURLToPath } from "node:url";
@@ -373,6 +377,10 @@ async function hasChanges(dir) {
373
377
  const { stdout: stdout2 } = await git(["-C", dir, "status", "--porcelain"]);
374
378
  return stdout2.trim().length > 0;
375
379
  }
380
+ async function gitDiffStat(dir) {
381
+ const { stdout: stdout2 } = await git(["-C", dir, "--no-pager", "diff", "--stat"]);
382
+ return stdout2;
383
+ }
376
384
  var PreCommitError = class extends Error {
377
385
  constructor(log) {
378
386
  super("pre-commit checks failed");
@@ -604,26 +612,35 @@ function parseManifest(raw) {
604
612
  if (typeof r.key !== "string" || !r.key) return null;
605
613
  if (r.socket !== "pre-commit") return null;
606
614
  if (typeof r.run !== "string" || !r.run) return null;
607
- const manifest = { key: r.key, socket: r.socket, run: r.run };
608
- if (typeof r.heartbeat === "object" && r.heartbeat !== null) {
609
- const hb = r.heartbeat;
610
- if (typeof hb.url === "string" && typeof hb.token === "string") {
611
- manifest.heartbeat = { url: hb.url, token: hb.token };
612
- }
613
- }
614
- return manifest;
615
+ return { key: r.key, socket: r.socket, run: r.run };
615
616
  }
616
617
 
617
618
  // src/plugins/socket.ts
618
619
  var exec2 = promisify2(execCb);
620
+ var MAX_OUTPUT = 8 * 1024;
621
+ function cap(s) {
622
+ return s.length <= MAX_OUTPUT ? s : s.slice(s.length - MAX_OUTPUT);
623
+ }
624
+ var lastSocketResults = [];
625
+ function resetSocketResults() {
626
+ lastSocketResults = [];
627
+ }
628
+ function getSocketResults() {
629
+ return lastSocketResults;
630
+ }
619
631
  async function runSocket(socketName, dir) {
620
632
  const plugins = (await loadPlugins(dir)).filter((p) => p.socket === socketName);
633
+ const results = [];
621
634
  for (const plugin of plugins) {
622
635
  const result = await runPluginCommand(plugin.run, dir);
623
636
  if (result.exitCode !== 0) {
637
+ results.push({ key: plugin.key, status: "failed", output: cap(result.output) });
638
+ lastSocketResults = results;
624
639
  throw new PreCommitError(`[plugin:${plugin.key}] ${result.output}`);
625
640
  }
641
+ results.push({ key: plugin.key, status: "passed", output: cap(result.output) });
626
642
  }
643
+ lastSocketResults = results;
627
644
  }
628
645
  async function runPluginCommand(command2, cwd) {
629
646
  try {
@@ -714,6 +731,11 @@ function widgetPosted(kind) {
714
731
  // src/plan.ts
715
732
  import { createSdkMcpServer as createSdkMcpServer2, tool as tool2 } from "@anthropic-ai/claude-agent-sdk";
716
733
  import { z as z2 } from "zod";
734
+
735
+ // src/schema-hints.ts
736
+ var INLINE_CODE_HINT = "Wrap code identifiers (function, variable, type, and file names, commands, and flags) in inline backticks, e.g. `getCodingSessionsForRequest`.";
737
+
738
+ // src/plan.ts
717
739
  var SERVER_NAME2 = "flume_plan";
718
740
  var SUBMIT_PLAN = "submit_plan";
719
741
  var PLAN_TOOL_NAME = `mcp__${SERVER_NAME2}__${SUBMIT_PLAN}`;
@@ -724,7 +746,7 @@ var pseudoCodeEntrySchema = z2.object({
724
746
  });
725
747
  var stepSchema = z2.object({
726
748
  title: z2.string().min(1).describe("A concise imperative title for this step."),
727
- description: z2.string().min(1).describe("What changes and why \u2014 the rationale for this step."),
749
+ description: z2.string().min(1).describe("What changes and why \u2014 the rationale for this step. " + INLINE_CODE_HINT),
728
750
  pseudoCode: z2.array(pseudoCodeEntrySchema).optional().describe(
729
751
  "Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
730
752
  )
@@ -734,11 +756,11 @@ var planInputSchema = {
734
756
  "A concise, descriptive name for THIS plan. Must be distinct from the request title and from any sibling plans on the same request. Keep it under 120 characters."
735
757
  ),
736
758
  scope: z2.enum(["feat", "fix", "chore", "docs", "test", "refactor"]).describe("The primary intent of the change."),
737
- goal: z2.string().min(1).describe("One or two sentences stating the outcome."),
759
+ goal: z2.string().min(1).describe("One or two sentences stating the outcome. " + INLINE_CODE_HINT),
738
760
  assumptions: z2.array(z2.string()).describe("Anything decided during planning, including unanswered defaults."),
739
761
  steps: z2.array(stepSchema).min(1).describe("Ordered list of changes. Each step says what and why, with file references."),
740
762
  acceptanceCriteria: z2.array(z2.string().min(1)).min(2).describe(
741
- "Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required."
763
+ "Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required. " + INLINE_CODE_HINT
742
764
  ),
743
765
  risks: z2.array(z2.string()).describe("Anything that could change the approach."),
744
766
  outOfScope: z2.array(z2.string()).describe("What is deliberately not being done.")
@@ -834,6 +856,19 @@ function createPlanTooling() {
834
856
  });
835
857
  return { mcpServer, getPlans: () => renderedPlans };
836
858
  }
859
+ function countPlanAcceptanceCriteria(planBody) {
860
+ if (!planBody) return 0;
861
+ const lines2 = planBody.split("\n");
862
+ const start2 = lines2.findIndex((l) => l.trim() === "## Acceptance criteria");
863
+ if (start2 === -1) return 0;
864
+ let count = 0;
865
+ for (let i = start2 + 1; i < lines2.length; i++) {
866
+ const line = lines2[i] ?? "";
867
+ if (line.startsWith("## ")) break;
868
+ if (line.startsWith("- [ ] ")) count++;
869
+ }
870
+ return count;
871
+ }
837
872
 
838
873
  // src/report.ts
839
874
  import { createSdkMcpServer as createSdkMcpServer3, tool as tool3 } from "@anthropic-ai/claude-agent-sdk";
@@ -849,28 +884,28 @@ var STATUS_ICON = {
849
884
  var evidenceSchema = z3.object({
850
885
  file: z3.string().min(1).describe("Repo-relative path the hunk comes from."),
851
886
  hunk: z3.string().min(1).describe(
852
- "A unified-diff hunk body proving the criterion \u2014 the lines that matter, not the whole file. Rendered verbatim as a ```diff block."
887
+ "A unified-diff hunk proving the criterion \u2014 the lines that matter, not the whole file. MUST keep the `@@ -a,b +c,d @@` hunk header line(s) exactly as they appear in `git --no-pager diff`; the report renders file line numbers from them. Rendered verbatim as a ```diff block."
853
888
  ),
854
889
  note: z3.string().optional().describe("Optional one-line explanation of why this hunk satisfies the criterion.")
855
890
  });
856
891
  var acVerdictSchema = z3.object({
857
892
  criterion: z3.string().min(1).describe("The acceptance-criterion text, verbatim from the plan."),
858
893
  status: z3.enum(["met", "not_met", "unclear"]).describe("Verdict for this criterion, verified against the actual diff."),
859
- rationale: z3.string().min(1).describe("One or two sentences on why the verdict holds."),
894
+ rationale: z3.string().min(1).describe("One or two sentences on why the verdict holds. " + INLINE_CODE_HINT),
860
895
  evidence: z3.array(evidenceSchema).describe(
861
896
  "Diff hunks proving the verdict, copied verbatim from git --no-pager diff. Across ALL criteria the evidence must collectively cover every hunk in the diff \u2014 each changed hunk appears under at least one criterion. Cite the relevant hunk(s) for a met criterion; may be empty for not_met / unclear."
862
897
  )
863
898
  });
864
899
  var reportInputSchema = {
865
- summary: z3.string().min(1).describe("One or two sentences on what was implemented."),
900
+ summary: z3.string().min(1).describe("One or two sentences on what was implemented. " + INLINE_CODE_HINT),
866
901
  filesChanged: z3.string().min(1).describe(
867
902
  "Markdown: the list of files changed (from the diff). Rendered under '## Files changed'."
868
903
  ),
869
904
  codeQuality: z3.string().min(1).describe(
870
- "Markdown: the code-quality review outcome and anything left as nice-to-have. Rendered under '## Code quality'."
905
+ "Markdown: the code-quality review outcome and anything left as nice-to-have. Rendered under '## Code quality'. " + INLINE_CODE_HINT
871
906
  ),
872
907
  caveats: z3.string().min(1).describe(
873
- "Markdown: anything deferred, unmet, or worth a human's eyes, incl. diff hunks that map to no plan AC. Write 'None.' if nothing. Rendered under '## Caveats / follow-ups'."
908
+ "Markdown: anything deferred, unmet, or worth a human's eyes, incl. diff hunks that map to no plan AC. Write 'None.' if nothing. Rendered under '## Caveats / follow-ups'. " + INLINE_CODE_HINT
874
909
  ),
875
910
  acceptanceCriteria: z3.array(acVerdictSchema).min(1).describe(
876
911
  "One entry per acceptance criterion from the plan, in plan order, each with a verdict and the diff evidence behind it."
@@ -1088,6 +1123,10 @@ function stripFrontMatter(raw) {
1088
1123
  }
1089
1124
 
1090
1125
  // src/prompt.ts
1126
+ function appendRule(lines2, intro, ruleName) {
1127
+ lines2.push("", intro, "", loadRule(ruleName));
1128
+ }
1129
+ var WRITING_INTRO = "These technical-writing guidelines apply to the plan and report prose you author in this run:";
1091
1130
  function turnHeading(turn, agentName) {
1092
1131
  if (turn.role === "user") return "User";
1093
1132
  if (turn.failed) return `${agentName} (this run ended in an error)`;
@@ -1121,6 +1160,7 @@ function buildPrompt(ctx) {
1121
1160
  loadRule("coding-guideline")
1122
1161
  );
1123
1162
  }
1163
+ appendRule(lines2, WRITING_INTRO, "technical-writing");
1124
1164
  lines2.push("", `# Request: ${ctx.request?.title ?? ""}`);
1125
1165
  if (ctx.request?.body) {
1126
1166
  lines2.push("", ctx.request.body);
@@ -1147,6 +1187,10 @@ function buildRevisePrompt(ctx) {
1147
1187
  "",
1148
1188
  loadRule("coding-guideline"),
1149
1189
  "",
1190
+ WRITING_INTRO,
1191
+ "",
1192
+ loadRule("technical-writing"),
1193
+ "",
1150
1194
  `# Plan: ${ctx.request?.title ?? ""}`
1151
1195
  ];
1152
1196
  if (ctx.request?.body) {
@@ -1173,6 +1217,10 @@ function buildResolvePrompt(ctx, related = []) {
1173
1217
  "",
1174
1218
  loadRule("coding-guideline"),
1175
1219
  "",
1220
+ WRITING_INTRO,
1221
+ "",
1222
+ loadRule("technical-writing"),
1223
+ "",
1176
1224
  `# Plan: ${ctx.request?.title ?? ""}`
1177
1225
  ];
1178
1226
  if (ctx.request?.body) {
@@ -1197,7 +1245,7 @@ function buildResolvePrompt(ctx, related = []) {
1197
1245
  );
1198
1246
  return lines2.join("\n");
1199
1247
  }
1200
- function buildDocumentPrompt(ctx) {
1248
+ function buildDocumentPrompt(ctx, changedFiles) {
1201
1249
  const lines2 = [
1202
1250
  `You are "${ctx.agentName}" maintaining the repository wiki for ${ctx.repo.fullName}.`,
1203
1251
  `An implementation just ran in this working directory to satisfy the request below; its changes are uncommitted in the working tree.`,
@@ -1209,6 +1257,14 @@ function buildDocumentPrompt(ctx) {
1209
1257
  lines2.push("", ctx.request.body);
1210
1258
  }
1211
1259
  appendThread(lines2, ctx);
1260
+ if (changedFiles && changedFiles.trim()) {
1261
+ lines2.push(
1262
+ "",
1263
+ "Files changed by this implementation (reconcile only the wiki pages these affect \u2014 do not re-survey the whole repo):",
1264
+ "",
1265
+ changedFiles.trim()
1266
+ );
1267
+ }
1212
1268
  lines2.push("", "When done, reply with a one- or two-line summary of the wiki changes you made.");
1213
1269
  return lines2.join("\n");
1214
1270
  }
@@ -1471,13 +1527,15 @@ async function processChatJob(ctx, dir, config, abort) {
1471
1527
  console.log(` \u2026job ${ctx.jobId} posted ${result.widgets.length} widget(s); awaiting reply`);
1472
1528
  return { text: reply, widgets: result.widgets };
1473
1529
  }
1530
+ const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
1474
1531
  let documented = false;
1475
- if (ctx.permissionMode !== "plan" && await hasChanges(dir)) {
1532
+ if (ctx.permissionMode !== "plan" && wikiExists && await hasChanges(dir)) {
1476
1533
  try {
1534
+ const changedFiles = await gitDiffStat(dir);
1477
1535
  console.log(` \u2026updating wiki for job ${ctx.jobId}`);
1478
1536
  await runClaudeCode({
1479
1537
  cwd: dir,
1480
- prompt: buildDocumentPrompt(ctx),
1538
+ prompt: buildDocumentPrompt(ctx, changedFiles),
1481
1539
  permissionMode: ctx.permissionMode,
1482
1540
  maxTurns: DOCUMENT_MAX_TURNS,
1483
1541
  abortController: abort
@@ -1486,6 +1544,8 @@ async function processChatJob(ctx, dir, config, abort) {
1486
1544
  } catch (err) {
1487
1545
  console.warn(` wiki update skipped: ${errorMessage2(err)}`);
1488
1546
  }
1547
+ } else if (ctx.permissionMode !== "plan" && !wikiExists) {
1548
+ console.log(` no .flumecode/wiki \u2014 skipping wiki reconcile for ${ctx.jobId}`);
1489
1549
  }
1490
1550
  const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, config, abort);
1491
1551
  reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, documented, autoMerged });
@@ -1494,12 +1554,24 @@ async function processChatJob(ctx, dir, config, abort) {
1494
1554
  function reportClaimsWork(report) {
1495
1555
  return !!report && report.acceptanceCriteria.some((ac) => ac.status === "met" && ac.evidence.length > 0);
1496
1556
  }
1557
+ function reportMeetsAcContract(report, expectedAcCount) {
1558
+ if (expectedAcCount === 0) return true;
1559
+ if (!report) return false;
1560
+ return report.acceptanceCriteria.length === expectedAcCount;
1561
+ }
1562
+ function buildAcWarningBanner(report, expectedAcCount) {
1563
+ if (!report)
1564
+ return "> \u26A0\uFE0F **Unverified AC review** \u2014 the implementation did not submit a structured report, so its acceptance-criteria review could not be checked against the plan.";
1565
+ return `> \u26A0\uFE0F **AC review may be incomplete** \u2014 the plan has ${expectedAcCount} acceptance criteria but the report reviewed ${report.acceptanceCriteria.length}.`;
1566
+ }
1497
1567
  async function processImplementJob(ctx, dir, resumed, config, abort) {
1498
1568
  console.log(`
1499
1569
  \u25B6 Implement ${ctx.jobId} \u2014 ${ctx.repo.fullName}: "${jobTitle(ctx)}"`);
1500
1570
  const installResult = await installDependencies(dir);
1571
+ const expectedAcCount = countPlanAcceptanceCriteria(ctx.request?.body);
1501
1572
  let report;
1502
1573
  let reply;
1574
+ let warningBanner = "";
1503
1575
  for (let attempt = 0; ; attempt++) {
1504
1576
  const result = await runClaudeCode({
1505
1577
  cwd: dir,
@@ -1511,28 +1583,48 @@ async function processImplementJob(ctx, dir, resumed, config, abort) {
1511
1583
  });
1512
1584
  report = result.report ?? void 0;
1513
1585
  reply = (report ? renderReport(report) : result.text.trim()) || "(the agent produced no report)";
1514
- if (abort.signal.aborted || !reportClaimsWork(report) || await hasChanges(dir)) break;
1515
- if (attempt >= MAX_IMPLEMENT_RETRIES) {
1586
+ if (abort.signal.aborted) {
1587
+ warningBanner = "";
1588
+ break;
1589
+ }
1590
+ const treeChanged = await hasChanges(dir);
1591
+ const phantom = reportClaimsWork(report) && !treeChanged;
1592
+ const acProblem = !reportMeetsAcContract(report, expectedAcCount);
1593
+ if (!phantom && !acProblem) {
1594
+ warningBanner = "";
1595
+ break;
1596
+ }
1597
+ if (attempt < MAX_IMPLEMENT_RETRIES) {
1598
+ console.warn(
1599
+ ` implement ${ctx.jobId}: ${phantom ? "report claims changes but the working tree is clean" : "AC-review contract failed"} \u2014 re-running implementation (attempt ${attempt + 2})`
1600
+ );
1601
+ continue;
1602
+ }
1603
+ if (phantom) {
1516
1604
  throw new Error(
1517
1605
  `Implementation reported completed work (acceptance criteria met with diff evidence) but the working tree is clean after ${attempt + 1} attempt(s) \u2014 no changes were persisted, so no pull request could be opened.`
1518
1606
  );
1519
1607
  }
1520
- console.warn(
1521
- ` implement ${ctx.jobId}: report claims changes but the working tree is clean \u2014 re-running implementation (attempt ${attempt + 2})`
1522
- );
1608
+ warningBanner = buildAcWarningBanner(report, expectedAcCount);
1609
+ break;
1523
1610
  }
1611
+ if (warningBanner) reply = `${warningBanner}
1612
+
1613
+ ${reply}`;
1524
1614
  if (installResult.status === "failed") {
1525
1615
  reply += `
1526
1616
 
1527
1617
  > \u26A0\uFE0F Dependencies failed to install (\`${installResult.manager}\`); tests may not have run.`;
1528
1618
  }
1619
+ const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
1529
1620
  let documented = false;
1530
- if (await hasChanges(dir)) {
1621
+ if (wikiExists && await hasChanges(dir)) {
1531
1622
  try {
1623
+ const changedFiles = await gitDiffStat(dir);
1532
1624
  console.log(` \u2026updating wiki for implement ${ctx.jobId}`);
1533
1625
  await runClaudeCode({
1534
1626
  cwd: dir,
1535
- prompt: buildDocumentPrompt(ctx),
1627
+ prompt: buildDocumentPrompt(ctx, changedFiles),
1536
1628
  permissionMode: ctx.permissionMode,
1537
1629
  maxTurns: DOCUMENT_MAX_TURNS,
1538
1630
  abortController: abort
@@ -1541,15 +1633,19 @@ async function processImplementJob(ctx, dir, resumed, config, abort) {
1541
1633
  } catch (err) {
1542
1634
  console.warn(` wiki update skipped: ${errorMessage2(err)}`);
1543
1635
  }
1636
+ } else if (!wikiExists) {
1637
+ console.log(` no .flumecode/wiki \u2014 skipping wiki reconcile for ${ctx.jobId}`);
1544
1638
  }
1545
1639
  const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, config, abort, {
1546
1640
  rebase: !resumed
1547
1641
  });
1548
1642
  reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, documented, autoMerged });
1643
+ const lintPlugins = getSocketResults();
1644
+ const finalReport = report && lintPlugins.length ? { ...report, lint: { plugins: lintPlugins } } : report;
1549
1645
  return {
1550
1646
  text: reply,
1551
1647
  widgets: [],
1552
- ...report ? { report } : {},
1648
+ ...finalReport ? { report: finalReport } : {},
1553
1649
  ...outcome.kind === "pr" ? { pr: outcome.pr } : {}
1554
1650
  };
1555
1651
  }
@@ -1577,13 +1673,15 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
1577
1673
  console.log(` \u2026revise ${ctx.jobId} posted ${result.widgets.length} widget(s); awaiting reply`);
1578
1674
  return { text: reply, widgets: result.widgets };
1579
1675
  }
1676
+ const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
1580
1677
  let documented = false;
1581
- if (await hasChanges(dir)) {
1678
+ if (wikiExists && await hasChanges(dir)) {
1582
1679
  try {
1680
+ const changedFiles = await gitDiffStat(dir);
1583
1681
  console.log(` \u2026updating wiki for revise ${ctx.jobId}`);
1584
1682
  await runClaudeCode({
1585
1683
  cwd: dir,
1586
- prompt: buildDocumentPrompt(ctx),
1684
+ prompt: buildDocumentPrompt(ctx, changedFiles),
1587
1685
  permissionMode: ctx.permissionMode,
1588
1686
  maxTurns: DOCUMENT_MAX_TURNS,
1589
1687
  abortController: abort
@@ -1592,6 +1690,8 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
1592
1690
  } catch (err) {
1593
1691
  console.warn(` wiki update skipped: ${errorMessage2(err)}`);
1594
1692
  }
1693
+ } else if (!wikiExists) {
1694
+ console.log(` no .flumecode/wiki \u2014 skipping wiki reconcile for ${ctx.jobId}`);
1595
1695
  }
1596
1696
  const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, config, abort, {
1597
1697
  rebase: !resumed
@@ -1732,6 +1832,7 @@ async function pollLoop(config) {
1732
1832
  scheduleCancelPoll();
1733
1833
  try {
1734
1834
  resetUsage();
1835
+ resetSocketResults();
1735
1836
  const { text, widgets, pr, plans, report } = await processJob(ctx, config, abort);
1736
1837
  const usage = getUsage();
1737
1838
  await reportJob(config, ctx.jobId, {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flumecode/runner",
3
- "version": "0.12.1",
3
+ "version": "0.14.0",
4
4
  "type": "module",
5
5
  "description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
6
6
  "bin": {
@@ -0,0 +1,14 @@
1
+ ---
2
+ name: technical-writing
3
+ description: >-
4
+ Inline-code conventions for agent-authored plan and report prose: wrap code
5
+ identifiers in backticks so they render as inline code.
6
+ ---
7
+
8
+ # Technical Writing
9
+
10
+ ## Inline code
11
+
12
+ Wrap code identifiers — function names, variable names, type names, file names, commands, and flags — in inline backticks so they render as inline code. For example: `getCodingSessionsForRequest`, not getCodingSessionsForRequest.
13
+
14
+ This convention applies to all free-text fields in plans and reports: goals, step descriptions, acceptance criteria, summaries, code-quality notes, and caveats.
@@ -43,6 +43,9 @@ put it in the prompt, the subagent doesn't have it.
43
43
  - **Coding guidelines.** This prompt contains a `# Coding Guidelines` section.
44
44
  Copy it verbatim into the prompt of the implementation subagent and the
45
45
  code-quality-review subagent so they hold the work to it.
46
+ - **Technical-writing guidelines.** This prompt contains a `# Technical Writing`
47
+ section. Copy it verbatim into the prompt of the report subagent so it applies
48
+ the inline-code conventions to all free-text fields it authors.
46
49
 
47
50
  ## Inputs
48
51
 
@@ -58,32 +61,39 @@ the next step.
58
61
 
59
62
  1. **Orient.** Read the plan/request and the FlumeCode wiki (if any) enough to
60
63
  write good task prompts. Extract the **Steps** and the **Acceptance criteria
61
- (ACs)**. Do not implement.
64
+ (ACs)**. Also discover the project's verification commands by checking these
65
+ sources in order: `package.json` scripts (look for `build`, `typecheck`,
66
+ `lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/` page that mentions
67
+ commands, and `Makefile`. Capture the explicit command list; you will include
68
+ it in the prompts you write for the Implement, Verify, and Fix-loop subagents
69
+ so none of them re-derive it. Do not implement.
62
70
 
63
71
  2. **Implement** — Task, `model: "sonnet"`. Give the subagent: the plan steps, a
64
- pointer to the wiki/orientation, and the coding guidelines (verbatim). Tell it
65
- to make all the code changes in the working tree to satisfy the plan, then
66
- self-verify by discovering and running the project's verification commands
67
- checking these sources in order: `package.json` scripts (look for `build`,
68
- `typecheck`, `lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/` page that
69
- mentions commands, and `Makefile`. Use whatever is present and appropriate for
70
- this repo; do not hardcode specific command strings. Run each discovered
71
- command and fix any errors that the edits introduced before returning. If no
72
- build/test setup exists in this repo, note that and move on do not fail. End
73
- by reporting: the verification commands it ran and their pass/fail results,
74
- which files it changed, and how each plan step was addressed. It must not
75
- commit or push.
72
+ pointer to the wiki/orientation, the coding guidelines (verbatim), and the
73
+ explicit verification command list the orchestrator discovered in the Orient
74
+ step. Tell it to make all the code changes in the working tree to satisfy the
75
+ plan, then self-verify by running the verification commands the orchestrator
76
+ already discovered and passed in the task prompt. If the orchestrator did not
77
+ provide a list (e.g. could not determine commands confidently), fall back to
78
+ discovering them from the same sources: `package.json` scripts (look for
79
+ `build`, `typecheck`, `lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/`
80
+ page that mentions commands, and `Makefile`. Run each command and fix any
81
+ errors that the edits introduced before returning. If no build/test setup
82
+ exists in this repo, note that and move on do not fail. End by reporting:
83
+ the verification commands it ran and their pass/fail results, which files it
84
+ changed, and how each plan step was addressed. It must not commit or push.
76
85
 
77
86
  3. **Verify (build & tests)** — Task, `model: "sonnet"`, read-only. This step
78
87
  gives the orchestrator an objective, independent build/test signal before the
79
88
  subjective AC and quality reviews. Tell the subagent to:
80
- - Discover the project's verification commands from `package.json` scripts
81
- (look for `build`, `typecheck`, `lint`, `test`), `CLAUDE.md`,
82
- `.flumecode/wiki/` (any page that mentions commands), and `Makefile`. Use
83
- what is present; do not hardcode specific command strings.
84
- - Run each discovered command and record: the exact command, whether it passed
85
- or failed, and for any failure a short excerpt of the failing output
86
- (enough to diagnose the problem).
89
+ - Run the verification commands provided by the orchestrator in the task
90
+ prompt. If none were provided, fall back to discovering them from
91
+ `package.json` scripts (look for `build`, `typecheck`, `lint`, `test`),
92
+ `CLAUDE.md`, `.flumecode/wiki/` (any page that mentions commands), and
93
+ `Makefile`.
94
+ - Run each command and record: the exact command, whether it passed or failed,
95
+ and for any failure — a short excerpt of the failing output (enough to
96
+ diagnose the problem).
87
97
  - If no build/test setup exists in this repo, say so explicitly and pass the
88
98
  gate.
89
99
  - Return a structured per-check result: command, pass/fail, failing-output
@@ -94,19 +104,18 @@ the next step.
94
104
  subagent the full AC list and tell it to verify each one against the actual
95
105
  changes (run `git --no-pager diff`, read the changed files, run tests/build if
96
106
  useful). For **each** AC it must return: the criterion text verbatim, a verdict
97
- (**met / not met / unclear**), a one-or-two-sentence rationale, and this is the
98
- evidence the report needs the **exact diff hunk(s)** that prove it, each tagged
99
- with its file path (the hunks that prove it, copied verbatim from
100
- `git --no-pager diff`, such that the union of every AC's evidence covers the
101
- entire diff each changed hunk cited under at least one criterion). A _met_ AC should cite at least one
102
- hunk; _not met_ / _unclear_ may cite none. **Ground every verdict in the actual
103
- diff:** a criterion may be marked _met_ only if `git --no-pager diff` really
104
- contains the change that satisfies it, and each cited hunk must be copied verbatim
105
- from that live output never reconstructed from the plan or from what the
106
- implement subagent claimed. If `git --no-pager diff` is empty, the implementation
107
- produced no changes: no criterion may be _met_, and the review must say so. Tell it
108
- to return this as a clean, structured list so you can hand it straight to the
109
- report step. In addition to per-AC verdicts, cross-check that every hunk in `git --no-pager diff` is cited by at least one AC's evidence; report any uncovered hunk as a coverage gap (signalling a missing AC or an out-of-scope change).
107
+ (**met / not met / unclear**), a one-or-two-sentence rationale, and the relevant
108
+ file paths and change locations that support the verdict. A _met_ AC should cite
109
+ at least one [file/location]; _not met_ / _unclear_ may cite none. **Ground
110
+ every verdict in the actual diff:** a criterion may be marked _met_ only if
111
+ `git --no-pager diff` really contains the change that satisfies it never
112
+ reconstruct from the plan or from what the implement subagent claimed. If
113
+ `git --no-pager diff` is empty, the implementation produced no changes: no
114
+ criterion may be _met_, and the review must say so. Tell it to return this as a
115
+ clean, structured list so you can hand it straight to the report step. In
116
+ addition to per-AC verdicts, cross-check `git --no-pager diff` against the ACs;
117
+ note any files or areas that appear changed but don't map to any AC as a coverage
118
+ gap (signalling a missing AC or an out-of-scope change).
110
119
 
111
120
  5. **Code-quality review** — Task, `model: "opus"`, read-only. Give the subagent
112
121
  the coding guidelines (verbatim) and tell it to review the changes for
@@ -117,26 +126,36 @@ the next step.
117
126
  review (step 4) reports any _not met_ AC, or the quality review (step 5)
118
127
  reports any _must-fix_ finding: spawn an **Implement/fix** subagent (Task,
119
128
  `model: "sonnet"`) whose prompt lists exactly those findings and tells it to
120
- resolve them without regressing the rest. When a Verify failure triggered the
121
- fix, include the failing command(s) and their error output excerpt(s) from the
122
- Verify result in the fix subagent's prompt so it has the full context. After
123
- each fix iteration, re-run the Verify step (step 3) in addition to any AC or
124
- quality review that failed. Repeat at most **2** times. If something still
125
- fails after that, stop looping and record the gap honestly in the report do
126
- not hide it.
127
-
128
- 7. **Report** — Task, `model: "opus"`, read-only. Give the subagent the plan, the
129
- Verify results (from step 3), the AC verdicts (from step 4), and the quality
130
- findings, and tell it to run `git --no-pager diff` itself as the **single
131
- source of truth** for the report. Every `evidence` hunk it submits must be
132
- copied verbatim from that live diff it must drop or correct any hunk carried
129
+ resolve them without regressing the rest. Include the verification command list
130
+ from the Orient step in the fix subagent's prompt (the same list passed to
131
+ Implement and Verify), so the fix subagent does not need to re-derive it. When
132
+ a Verify failure triggered the fix, include the failing command(s) and their
133
+ error output excerpt(s) from the Verify result in the fix subagent's prompt so
134
+ it has the full context. After each fix iteration, re-run the Verify step (step 3) in addition to any AC or quality review that failed. Repeat at most **2**
135
+ times. If something still fails after that, stop looping and record the gap
136
+ honestly in the report — do not hide it.
137
+
138
+ 7. **Report** Task, `model: "opus"`, read-only. Give the subagent the AC
139
+ verdicts (with criterion text, from step 4), the Verify results (from step 3),
140
+ and the quality findings, and tell it to run `git --no-pager diff` itself as
141
+ the **single source of truth** for the report. Do not pass the full plan — the
142
+ AC verdicts carry each criterion verbatim, and the live `git --no-pager diff`
143
+ is the authoritative source for evidence; re-inlining the full plan is
144
+ redundant. Keep each subagent prompt to the minimal self-contained slice it
145
+ needs. Include the `# Technical Writing` section
146
+ (copied verbatim from this prompt) in the report subagent's prompt — the same
147
+ way `# Coding Guidelines` is forwarded to implementation subagents — so it
148
+ applies the inline-code conventions to all free-text fields it authors. Every `evidence` hunk it submits must be
149
+ copied verbatim from that live diff, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them) — it must drop or correct any hunk carried
133
150
  over from step 4 that no longer appears in the actual diff, and the **Files
134
151
  changed** list must come from `git --no-pager diff --stat`, not from what an
135
152
  earlier subagent claimed. Tell it to enumerate all hunks from `git --no-pager diff` and ensure each is attached to ≥1 AC's `evidence`; any hunk mapping to no plan AC goes under `## Caveats / follow-ups` as an explicit unattributed change. **If `git --no-pager diff` is empty, the
136
153
  implementation changed nothing:** the report must say so plainly — an honest
137
154
  `summary`, no AC marked `met` with evidence — and must never describe edits
138
155
  that aren't in the diff. Tell it to submit the user-facing report by calling
139
- the **`submit_report`** tool — it has that tool available. It must call
156
+ the **`submit_report`** tool — it has that tool available. The report MUST be
157
+ submitted via `submit_report` (structured); final assistant prose is only a
158
+ last-resort fallback and will be flagged as an unverified AC review. It must call
140
159
  `submit_report` exactly once and must not edit any files.
141
160
 
142
161
  8. **Confirm and end.** Once the report subagent has called `submit_report`, you are
@@ -152,12 +171,14 @@ The report subagent calls `submit_report` with these fields:
152
171
  - **`filesChanged`** — markdown list of files changed (from the diff). Rendered under `## Files changed`.
153
172
  - **`codeQuality`** — the code-quality review outcome and anything left as nice-to-have. Rendered under `## Code quality`.
154
173
  - **`caveats`** — anything deferred, unmet, or worth a human's eyes, including diff hunks that map to no plan AC. Write 'None.' if nothing. Rendered under `## Caveats / follow-ups`.
155
- - **`acceptanceCriteria`** — one entry per AC from the plan, in plan order, each:
174
+ - **`acceptanceCriteria`** — EXACTLY one entry per AC from the plan (same count and
175
+ order). The runner counts the plan's ACs and warns on any mismatch — do not merge,
176
+ split, drop, or invent criteria. Each entry:
156
177
  - `criterion` — the AC text verbatim.
157
178
  - `status` — `"met"` / `"not_met"` / `"unclear"`, mirroring the AC review.
158
179
  - `rationale` — one or two sentences on why the verdict holds.
159
180
  - `evidence` — an array of `{ file, hunk, note? }`, where `hunk` is copied
160
- verbatim from the live `git --no-pager diff` and proves the verdict (`note`
181
+ verbatim from the live `git --no-pager diff`, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them), and proves the verdict (`note`
161
182
  optionally explains it). Never include a hunk that isn't in the actual diff. Cite
162
183
  the supporting hunk(s) for a met criterion; may be empty for not_met / unclear.
163
184
 
@@ -173,4 +194,10 @@ The report subagent calls `submit_report` with these fields:
173
194
  once — not as prose for you to echo. Each acceptance criterion carries the diff
174
195
  hunk(s) that prove its verdict, copied verbatim from the live `git --no-pager diff`
175
196
  — never fabricated. An empty diff means an honest "nothing changed" report.
197
+ - The report MUST be submitted via `submit_report` (structured). Final assistant prose
198
+ is only a last-resort fallback and will be flagged as an unverified AC review by the
199
+ runner.
200
+ - `acceptanceCriteria` must have EXACTLY one entry per plan acceptance criterion (same
201
+ count and order). The runner counts the plan's ACs and warns on any mismatch, so do
202
+ not merge, split, drop, or invent criteria.
176
203
  - The report exists so the human reviewer can verify each acceptance criterion is satisfied — the ACs and their diff evidence are the primary review surface.
@@ -3,7 +3,7 @@ name: lint-plugin-generator
3
3
  description: >-
4
4
  Generate a concrete plan to install the FlumeCode Lint plugin for THIS repo —
5
5
  a .flumecode/plugins/lint/ manifest wired to the pre-commit socket that runs
6
- the repo's lint/format checks and reports a heartbeat.
6
+ the repo's lint/format checks.
7
7
  ---
8
8
 
9
9
  # lint-plugin-generator
@@ -37,60 +37,30 @@ instruct the implementer to create:
37
37
  {
38
38
  "key": "lint",
39
39
  "socket": "pre-commit",
40
- "run": "node .flumecode/plugins/lint/run.mjs",
41
- "heartbeat": {
42
- "url": "https://<flumecode-base-url>/api/runner/plugins/heartbeat",
43
- "token": "<repo-scoped-token>"
44
- }
40
+ "run": "<detected lint/format/typecheck command chain>"
45
41
  }
46
42
  ```
47
43
 
48
- `url` and `token` are placeholdersnote in the plan that the user must fill
49
- them in via the FlumeCode web UI after installation. The plan must **not**
50
- commit a real token value.
51
-
52
- ### Artifact 2 — `.flumecode/plugins/lint/run.mjs`
53
-
54
- A Node.js ES module that:
55
-
56
- 1. Reads `plugin.json` from the same directory to get `heartbeat.url` and `heartbeat.token`.
57
- 2. Determines the current git branch (`git rev-parse --abbrev-ref HEAD`).
58
- 3. Runs each detected lint/format/typecheck command with `child_process.execSync` (stdio: `inherit`).
59
- 4. On success, POSTs to the heartbeat URL:
60
- `{ repoId: process.env.FLUMECODE_REPO_ID, pluginKey: "lint", branch, status: "pass", timestamp: new Date().toISOString() }`
61
- (`FLUMECODE_REPO_ID` — the runner will inject this in Plan 2; if not yet available, the heartbeat may omit repoId or read it from .flumecode/config.json)
62
- 5. On any command failure, exits non-zero (and optionally POSTs `status: "fail"`).
63
-
64
- The `repoId` comes from the `FLUMECODE_REPO_ID` environment variable that the
65
- runner sets. The heartbeat request uses `Authorization: Bearer <token>`.
44
+ Derive `run` from the repo's detected commands (e.g. `pnpm exec lint-staged && pnpm lint && pnpm typecheck && pnpm test`). Do not hard-code include the actual commands discovered in the Orient step.
66
45
 
67
46
  ### Manifest shape
68
47
 
69
48
  The manifest `plugin.json` must have exactly these fields:
70
49
 
71
50
  ```
72
- { key, socket, run, heartbeat: { url, token } }
51
+ { key, socket, run }
73
52
  ```
74
53
 
75
54
  This is the shape the FlumeCode plugin loader expects.
76
55
 
77
- ### Heartbeat endpoint
78
-
79
- `POST /api/runner/plugins/heartbeat` with JSON body:
80
- `{ repoId, pluginKey, branch, status, timestamp }`
81
- (this endpoint does not exist yet — it will be created by Plan 2; include this as a step in the generated plan or as a prerequisite note)
82
-
83
56
  ### Acceptance criteria the plan must include
84
57
 
85
- - `.flumecode/plugins/lint/plugin.json` exists with `key: "lint"`, `socket: "pre-commit"`, `run: "node .flumecode/plugins/lint/run.mjs"`.
86
- - `.flumecode/plugins/lint/run.mjs` runs the repo's detected lint/format/typecheck commands and exits non-zero on any failure.
87
- - A successful run POSTs a heartbeat with `{ repoId, pluginKey: "lint", branch, status: "pass", timestamp }`.
58
+ - `.flumecode/plugins/lint/plugin.json` exists with `key: "lint"`, `socket: "pre-commit"`, and `run` set to the detected command chain.
59
+ - The `run` command exits non-zero on any lint/format/typecheck failure.
88
60
 
89
61
  ## Always
90
62
 
91
63
  - Stay read-only. Produce the plan via `submit_plan`; never edit files.
92
64
  - The plan must be specific enough for an `implement-plan` run to execute
93
65
  without re-deriving the commands — include the actual detected commands in
94
- the step descriptions and pseudo code.
95
- - Leave `heartbeat.url` and `heartbeat.token` as placeholders — document that
96
- the user fills them in via the FlumeCode web UI after installation.
66
+ the step descriptions and artifact content.
@@ -87,6 +87,10 @@ Field-by-field guidance:
87
87
  - **`risks`** — anything that could change the approach or surface a problem.
88
88
  - **`outOfScope`** — what you are deliberately not doing.
89
89
 
90
+ **Formatting.** Apply the `# Technical Writing` guidelines from the prompt to all
91
+ free-text fields: wrap code identifiers (function names, variable names, type names,
92
+ file names, commands, and flags) in inline backticks.
93
+
90
94
  Cite real files you inspected. Prefer the codebase's existing patterns over
91
95
  introducing new ones. Be specific enough that another agent could execute the
92
96
  plan without re-deriving it.
@@ -95,4 +95,6 @@ before you finish. (You don't need to `git add`; the runner stages and commits f
95
95
 
96
96
  Your last message **is** the report posted to the session thread. Write it for the
97
97
  user: list which files conflicted and, briefly, how you resolved each, plus how you
98
- verified (build/tests). The runner appends the pull-request link, so don't add one.
98
+ verified (build/tests). Wrap conflicted file names and code identifiers in inline
99
+ backticks per the `# Technical Writing` section. The runner appends the pull-request
100
+ link, so don't add one.
@@ -60,8 +60,9 @@ essentials:
60
60
  - **Subagents start blank.** Each Task subagent sees only the prompt you give it —
61
61
  not this thread, the plan, or the prior report. Make every prompt self-contained:
62
62
  include the specific change requested, the relevant plan/report excerpt, the code
63
- context, and the coding guidelines (verbatim, from the `# Coding Guidelines`
64
- section in the prompt).
63
+ context, the coding guidelines (verbatim, from the `# Coding Guidelines` section
64
+ in the prompt), and — for the report subagent — the technical-writing guidelines
65
+ (verbatim, from the `# Technical Writing` section in the prompt).
65
66
  - **Scope the work to the request.** This is a fine-tune of an existing
66
67
  implementation, not a rebuild. Change only what the user asked for plus what that
67
68
  change strictly requires; don't regress the rest of the plan.