npm - @flumecode/runner - Versions diffs - 0.12.1 → 0.14.0 - Mend

@flumecode/runner 0.12.1 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/cli.js +130 -29
package/package.json +1 -1
package/skills-plugin/rules/technical-writing.md +14 -0
package/skills-plugin/skills/implement-plan/SKILL.md +76 -49
package/skills-plugin/skills/lint-plugin-generator/SKILL.md +7 -37
package/skills-plugin/skills/request-to-plan/SKILL.md +4 -0
package/skills-plugin/skills/resolve-merge-conflict/SKILL.md +3 -1
package/skills-plugin/skills/revise-implementation/SKILL.md +3 -2

package/dist/cli.js CHANGED Viewed

@@ -25,6 +25,10 @@ function writeConfig(config) {
   writeFileSync(configPath, JSON.stringify(config, null, 2), { mode: 384 });
 }
+// src/run.ts
+import { existsSync as existsSync4 } from "node:fs";
+import { join as join5 } from "node:path";
 // src/version.ts
 import { readFileSync as readFileSync2 } from "node:fs";
 import { fileURLToPath } from "node:url";
@@ -373,6 +377,10 @@ async function hasChanges(dir) {
   const { stdout: stdout2 } = await git(["-C", dir, "status", "--porcelain"]);
   return stdout2.trim().length > 0;
 }
+async function gitDiffStat(dir) {
+  const { stdout: stdout2 } = await git(["-C", dir, "--no-pager", "diff", "--stat"]);
+  return stdout2;
+}
 var PreCommitError = class extends Error {
   constructor(log) {
     super("pre-commit checks failed");
@@ -604,26 +612,35 @@ function parseManifest(raw) {
   if (typeof r.key !== "string" || !r.key) return null;
   if (r.socket !== "pre-commit") return null;
   if (typeof r.run !== "string" || !r.run) return null;
-  const manifest = { key: r.key, socket: r.socket, run: r.run };
-  if (typeof r.heartbeat === "object" && r.heartbeat !== null) {
-    const hb = r.heartbeat;
-    if (typeof hb.url === "string" && typeof hb.token === "string") {
-      manifest.heartbeat = { url: hb.url, token: hb.token };
-    }
-  }
-  return manifest;
+  return { key: r.key, socket: r.socket, run: r.run };
 }
 // src/plugins/socket.ts
 var exec2 = promisify2(execCb);
+var MAX_OUTPUT = 8 * 1024;
+function cap(s) {
+  return s.length <= MAX_OUTPUT ? s : s.slice(s.length - MAX_OUTPUT);
+}
+var lastSocketResults = [];
+function resetSocketResults() {
+  lastSocketResults = [];
+}
+function getSocketResults() {
+  return lastSocketResults;
+}
 async function runSocket(socketName, dir) {
   const plugins = (await loadPlugins(dir)).filter((p) => p.socket === socketName);
+  const results = [];
   for (const plugin of plugins) {
     const result = await runPluginCommand(plugin.run, dir);
     if (result.exitCode !== 0) {
+      results.push({ key: plugin.key, status: "failed", output: cap(result.output) });
+      lastSocketResults = results;
       throw new PreCommitError(`[plugin:${plugin.key}] ${result.output}`);
     }
+    results.push({ key: plugin.key, status: "passed", output: cap(result.output) });
   }
+  lastSocketResults = results;
 }
 async function runPluginCommand(command2, cwd) {
   try {
@@ -714,6 +731,11 @@ function widgetPosted(kind) {
 // src/plan.ts
 import { createSdkMcpServer as createSdkMcpServer2, tool as tool2 } from "@anthropic-ai/claude-agent-sdk";
 import { z as z2 } from "zod";
+// src/schema-hints.ts
+var INLINE_CODE_HINT = "Wrap code identifiers (function, variable, type, and file names, commands, and flags) in inline backticks, e.g. `getCodingSessionsForRequest`.";
+// src/plan.ts
 var SERVER_NAME2 = "flume_plan";
 var SUBMIT_PLAN = "submit_plan";
 var PLAN_TOOL_NAME = `mcp__${SERVER_NAME2}__${SUBMIT_PLAN}`;
@@ -724,7 +746,7 @@ var pseudoCodeEntrySchema = z2.object({
 });
 var stepSchema = z2.object({
   title: z2.string().min(1).describe("A concise imperative title for this step."),
-  description: z2.string().min(1).describe("What changes and why \u2014 the rationale for this step."),
+  description: z2.string().min(1).describe("What changes and why \u2014 the rationale for this step. " + INLINE_CODE_HINT),
   pseudoCode: z2.array(pseudoCodeEntrySchema).optional().describe(
     "Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
   )
@@ -734,11 +756,11 @@ var planInputSchema = {
     "A concise, descriptive name for THIS plan. Must be distinct from the request title and from any sibling plans on the same request. Keep it under 120 characters."
   ),
   scope: z2.enum(["feat", "fix", "chore", "docs", "test", "refactor"]).describe("The primary intent of the change."),
-  goal: z2.string().min(1).describe("One or two sentences stating the outcome."),
+  goal: z2.string().min(1).describe("One or two sentences stating the outcome. " + INLINE_CODE_HINT),
   assumptions: z2.array(z2.string()).describe("Anything decided during planning, including unanswered defaults."),
   steps: z2.array(stepSchema).min(1).describe("Ordered list of changes. Each step says what and why, with file references."),
   acceptanceCriteria: z2.array(z2.string().min(1)).min(2).describe(
-    "Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required."
+    "Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required. " + INLINE_CODE_HINT
   ),
   risks: z2.array(z2.string()).describe("Anything that could change the approach."),
   outOfScope: z2.array(z2.string()).describe("What is deliberately not being done.")
@@ -834,6 +856,19 @@ function createPlanTooling() {
   });
   return { mcpServer, getPlans: () => renderedPlans };
 }
+function countPlanAcceptanceCriteria(planBody) {
+  if (!planBody) return 0;
+  const lines2 = planBody.split("\n");
+  const start2 = lines2.findIndex((l) => l.trim() === "## Acceptance criteria");
+  if (start2 === -1) return 0;
+  let count = 0;
+  for (let i = start2 + 1; i < lines2.length; i++) {
+    const line = lines2[i] ?? "";
+    if (line.startsWith("## ")) break;
+    if (line.startsWith("- [ ] ")) count++;
+  }
+  return count;
+}
 // src/report.ts
 import { createSdkMcpServer as createSdkMcpServer3, tool as tool3 } from "@anthropic-ai/claude-agent-sdk";
@@ -849,28 +884,28 @@ var STATUS_ICON = {
 var evidenceSchema = z3.object({
   file: z3.string().min(1).describe("Repo-relative path the hunk comes from."),
   hunk: z3.string().min(1).describe(
-    "A unified-diff hunk body proving the criterion \u2014 the lines that matter, not the whole file. Rendered verbatim as a ```diff block."
+    "A unified-diff hunk proving the criterion \u2014 the lines that matter, not the whole file. MUST keep the `@@ -a,b +c,d @@` hunk header line(s) exactly as they appear in `git --no-pager diff`; the report renders file line numbers from them. Rendered verbatim as a ```diff block."
   ),
   note: z3.string().optional().describe("Optional one-line explanation of why this hunk satisfies the criterion.")
 });
 var acVerdictSchema = z3.object({
   criterion: z3.string().min(1).describe("The acceptance-criterion text, verbatim from the plan."),
   status: z3.enum(["met", "not_met", "unclear"]).describe("Verdict for this criterion, verified against the actual diff."),
-  rationale: z3.string().min(1).describe("One or two sentences on why the verdict holds."),
+  rationale: z3.string().min(1).describe("One or two sentences on why the verdict holds. " + INLINE_CODE_HINT),
   evidence: z3.array(evidenceSchema).describe(
     "Diff hunks proving the verdict, copied verbatim from git --no-pager diff. Across ALL criteria the evidence must collectively cover every hunk in the diff \u2014 each changed hunk appears under at least one criterion. Cite the relevant hunk(s) for a met criterion; may be empty for not_met / unclear."
   )
 });
 var reportInputSchema = {
-  summary: z3.string().min(1).describe("One or two sentences on what was implemented."),
+  summary: z3.string().min(1).describe("One or two sentences on what was implemented. " + INLINE_CODE_HINT),
   filesChanged: z3.string().min(1).describe(
     "Markdown: the list of files changed (from the diff). Rendered under '## Files changed'."
   ),
   codeQuality: z3.string().min(1).describe(
-    "Markdown: the code-quality review outcome and anything left as nice-to-have. Rendered under '## Code quality'."
+    "Markdown: the code-quality review outcome and anything left as nice-to-have. Rendered under '## Code quality'. " + INLINE_CODE_HINT
   ),
   caveats: z3.string().min(1).describe(
-    "Markdown: anything deferred, unmet, or worth a human's eyes, incl. diff hunks that map to no plan AC. Write 'None.' if nothing. Rendered under '## Caveats / follow-ups'."
+    "Markdown: anything deferred, unmet, or worth a human's eyes, incl. diff hunks that map to no plan AC. Write 'None.' if nothing. Rendered under '## Caveats / follow-ups'. " + INLINE_CODE_HINT
   ),
   acceptanceCriteria: z3.array(acVerdictSchema).min(1).describe(
     "One entry per acceptance criterion from the plan, in plan order, each with a verdict and the diff evidence behind it."
@@ -1088,6 +1123,10 @@ function stripFrontMatter(raw) {
 }
 // src/prompt.ts
+function appendRule(lines2, intro, ruleName) {
+  lines2.push("", intro, "", loadRule(ruleName));
+}
+var WRITING_INTRO = "These technical-writing guidelines apply to the plan and report prose you author in this run:";
 function turnHeading(turn, agentName) {
   if (turn.role === "user") return "User";
   if (turn.failed) return `${agentName} (this run ended in an error)`;
@@ -1121,6 +1160,7 @@ function buildPrompt(ctx) {
       loadRule("coding-guideline")
     );
   }
+  appendRule(lines2, WRITING_INTRO, "technical-writing");
   lines2.push("", `# Request: ${ctx.request?.title ?? ""}`);
   if (ctx.request?.body) {
     lines2.push("", ctx.request.body);
@@ -1147,6 +1187,10 @@ function buildRevisePrompt(ctx) {
     "",
     loadRule("coding-guideline"),
     "",
+    WRITING_INTRO,
+    "",
+    loadRule("technical-writing"),
+    "",
     `# Plan: ${ctx.request?.title ?? ""}`
   ];
   if (ctx.request?.body) {
@@ -1173,6 +1217,10 @@ function buildResolvePrompt(ctx, related = []) {
     "",
     loadRule("coding-guideline"),
     "",
+    WRITING_INTRO,
+    "",
+    loadRule("technical-writing"),
+    "",
     `# Plan: ${ctx.request?.title ?? ""}`
   ];
   if (ctx.request?.body) {
@@ -1197,7 +1245,7 @@ function buildResolvePrompt(ctx, related = []) {
   );
   return lines2.join("\n");
 }
-function buildDocumentPrompt(ctx) {
+function buildDocumentPrompt(ctx, changedFiles) {
   const lines2 = [
     `You are "${ctx.agentName}" maintaining the repository wiki for ${ctx.repo.fullName}.`,
     `An implementation just ran in this working directory to satisfy the request below; its changes are uncommitted in the working tree.`,
@@ -1209,6 +1257,14 @@ function buildDocumentPrompt(ctx) {
     lines2.push("", ctx.request.body);
   }
   appendThread(lines2, ctx);
+  if (changedFiles && changedFiles.trim()) {
+    lines2.push(
+      "",
+      "Files changed by this implementation (reconcile only the wiki pages these affect \u2014 do not re-survey the whole repo):",
+      "",
+      changedFiles.trim()
+    );
+  }
   lines2.push("", "When done, reply with a one- or two-line summary of the wiki changes you made.");
   return lines2.join("\n");
 }
@@ -1471,13 +1527,15 @@ async function processChatJob(ctx, dir, config, abort) {
     console.log(`  \u2026job ${ctx.jobId} posted ${result.widgets.length} widget(s); awaiting reply`);
     return { text: reply, widgets: result.widgets };
   }
+  const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
   let documented = false;
-  if (ctx.permissionMode !== "plan" && await hasChanges(dir)) {
+  if (ctx.permissionMode !== "plan" && wikiExists && await hasChanges(dir)) {
     try {
+      const changedFiles = await gitDiffStat(dir);
       console.log(`  \u2026updating wiki for job ${ctx.jobId}`);
       await runClaudeCode({
         cwd: dir,
-        prompt: buildDocumentPrompt(ctx),
+        prompt: buildDocumentPrompt(ctx, changedFiles),
         permissionMode: ctx.permissionMode,
         maxTurns: DOCUMENT_MAX_TURNS,
         abortController: abort
@@ -1486,6 +1544,8 @@ async function processChatJob(ctx, dir, config, abort) {
     } catch (err) {
       console.warn(`  wiki update skipped: ${errorMessage2(err)}`);
     }
+  } else if (ctx.permissionMode !== "plan" && !wikiExists) {
+    console.log(`  no .flumecode/wiki \u2014 skipping wiki reconcile for ${ctx.jobId}`);
   }
   const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, config, abort);
   reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, documented, autoMerged });
@@ -1494,12 +1554,24 @@ async function processChatJob(ctx, dir, config, abort) {
 function reportClaimsWork(report) {
   return !!report && report.acceptanceCriteria.some((ac) => ac.status === "met" && ac.evidence.length > 0);
 }
+function reportMeetsAcContract(report, expectedAcCount) {
+  if (expectedAcCount === 0) return true;
+  if (!report) return false;
+  return report.acceptanceCriteria.length === expectedAcCount;
+}
+function buildAcWarningBanner(report, expectedAcCount) {
+  if (!report)
+    return "> \u26A0\uFE0F **Unverified AC review** \u2014 the implementation did not submit a structured report, so its acceptance-criteria review could not be checked against the plan.";
+  return `> \u26A0\uFE0F **AC review may be incomplete** \u2014 the plan has ${expectedAcCount} acceptance criteria but the report reviewed ${report.acceptanceCriteria.length}.`;
+}
 async function processImplementJob(ctx, dir, resumed, config, abort) {
   console.log(`
 \u25B6 Implement ${ctx.jobId} \u2014 ${ctx.repo.fullName}: "${jobTitle(ctx)}"`);
   const installResult = await installDependencies(dir);
+  const expectedAcCount = countPlanAcceptanceCriteria(ctx.request?.body);
   let report;
   let reply;
+  let warningBanner = "";
   for (let attempt = 0; ; attempt++) {
     const result = await runClaudeCode({
       cwd: dir,
@@ -1511,28 +1583,48 @@ async function processImplementJob(ctx, dir, resumed, config, abort) {
     });
     report = result.report ?? void 0;
     reply = (report ? renderReport(report) : result.text.trim()) || "(the agent produced no report)";
-    if (abort.signal.aborted || !reportClaimsWork(report) || await hasChanges(dir)) break;
-    if (attempt >= MAX_IMPLEMENT_RETRIES) {
+    if (abort.signal.aborted) {
+      warningBanner = "";
+      break;
+    }
+    const treeChanged = await hasChanges(dir);
+    const phantom = reportClaimsWork(report) && !treeChanged;
+    const acProblem = !reportMeetsAcContract(report, expectedAcCount);
+    if (!phantom && !acProblem) {
+      warningBanner = "";
+      break;
+    }
+    if (attempt < MAX_IMPLEMENT_RETRIES) {
+      console.warn(
+        `  implement ${ctx.jobId}: ${phantom ? "report claims changes but the working tree is clean" : "AC-review contract failed"} \u2014 re-running implementation (attempt ${attempt + 2})`
+      );
+      continue;
+    }
+    if (phantom) {
       throw new Error(
         `Implementation reported completed work (acceptance criteria met with diff evidence) but the working tree is clean after ${attempt + 1} attempt(s) \u2014 no changes were persisted, so no pull request could be opened.`
       );
     }
-    console.warn(
-      `  implement ${ctx.jobId}: report claims changes but the working tree is clean \u2014 re-running implementation (attempt ${attempt + 2})`
-    );
+    warningBanner = buildAcWarningBanner(report, expectedAcCount);
+    break;
   }
+  if (warningBanner) reply = `${warningBanner}
+${reply}`;
   if (installResult.status === "failed") {
     reply += `
 > \u26A0\uFE0F Dependencies failed to install (\`${installResult.manager}\`); tests may not have run.`;
   }
+  const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
   let documented = false;
-  if (await hasChanges(dir)) {
+  if (wikiExists && await hasChanges(dir)) {
     try {
+      const changedFiles = await gitDiffStat(dir);
       console.log(`  \u2026updating wiki for implement ${ctx.jobId}`);
       await runClaudeCode({
         cwd: dir,
-        prompt: buildDocumentPrompt(ctx),
+        prompt: buildDocumentPrompt(ctx, changedFiles),
         permissionMode: ctx.permissionMode,
         maxTurns: DOCUMENT_MAX_TURNS,
         abortController: abort
@@ -1541,15 +1633,19 @@ async function processImplementJob(ctx, dir, resumed, config, abort) {
     } catch (err) {
       console.warn(`  wiki update skipped: ${errorMessage2(err)}`);
     }
+  } else if (!wikiExists) {
+    console.log(`  no .flumecode/wiki \u2014 skipping wiki reconcile for ${ctx.jobId}`);
   }
   const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, config, abort, {
     rebase: !resumed
   });
   reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, documented, autoMerged });
+  const lintPlugins = getSocketResults();
+  const finalReport = report && lintPlugins.length ? { ...report, lint: { plugins: lintPlugins } } : report;
   return {
     text: reply,
     widgets: [],
-    ...report ? { report } : {},
+    ...finalReport ? { report: finalReport } : {},
     ...outcome.kind === "pr" ? { pr: outcome.pr } : {}
   };
 }
@@ -1577,13 +1673,15 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
     console.log(`  \u2026revise ${ctx.jobId} posted ${result.widgets.length} widget(s); awaiting reply`);
     return { text: reply, widgets: result.widgets };
   }
+  const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
   let documented = false;
-  if (await hasChanges(dir)) {
+  if (wikiExists && await hasChanges(dir)) {
     try {
+      const changedFiles = await gitDiffStat(dir);
       console.log(`  \u2026updating wiki for revise ${ctx.jobId}`);
       await runClaudeCode({
         cwd: dir,
-        prompt: buildDocumentPrompt(ctx),
+        prompt: buildDocumentPrompt(ctx, changedFiles),
         permissionMode: ctx.permissionMode,
         maxTurns: DOCUMENT_MAX_TURNS,
         abortController: abort
@@ -1592,6 +1690,8 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
     } catch (err) {
       console.warn(`  wiki update skipped: ${errorMessage2(err)}`);
     }
+  } else if (!wikiExists) {
+    console.log(`  no .flumecode/wiki \u2014 skipping wiki reconcile for ${ctx.jobId}`);
   }
   const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, config, abort, {
     rebase: !resumed
@@ -1732,6 +1832,7 @@ async function pollLoop(config) {
       scheduleCancelPoll();
       try {
         resetUsage();
+        resetSocketResults();
         const { text, widgets, pr, plans, report } = await processJob(ctx, config, abort);
         const usage = getUsage();
         await reportJob(config, ctx.jobId, {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@flumecode/runner",
-  "version": "0.12.1",
+  "version": "0.14.0",
   "type": "module",
   "description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
   "bin": {

package/skills-plugin/rules/technical-writing.md ADDED Viewed

@@ -0,0 +1,14 @@
+---
+name: technical-writing
+description: >-
+  Inline-code conventions for agent-authored plan and report prose: wrap code
+  identifiers in backticks so they render as inline code.
+---
+# Technical Writing
+## Inline code
+Wrap code identifiers — function names, variable names, type names, file names, commands, and flags — in inline backticks so they render as inline code. For example: `getCodingSessionsForRequest`, not getCodingSessionsForRequest.
+This convention applies to all free-text fields in plans and reports: goals, step descriptions, acceptance criteria, summaries, code-quality notes, and caveats.

package/skills-plugin/skills/implement-plan/SKILL.md CHANGED Viewed

@@ -43,6 +43,9 @@ put it in the prompt, the subagent doesn't have it.
 - **Coding guidelines.** This prompt contains a `# Coding Guidelines` section.
   Copy it verbatim into the prompt of the implementation subagent and the
   code-quality-review subagent so they hold the work to it.
+- **Technical-writing guidelines.** This prompt contains a `# Technical Writing`
+  section. Copy it verbatim into the prompt of the report subagent so it applies
+  the inline-code conventions to all free-text fields it authors.
 ## Inputs
@@ -58,32 +61,39 @@ the next step.
 1. **Orient.** Read the plan/request and the FlumeCode wiki (if any) enough to
    write good task prompts. Extract the **Steps** and the **Acceptance criteria
-   (ACs)**. Do not implement.
+   (ACs)**. Also discover the project's verification commands by checking these
+   sources in order: `package.json` scripts (look for `build`, `typecheck`,
+   `lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/` page that mentions
+   commands, and `Makefile`. Capture the explicit command list; you will include
+   it in the prompts you write for the Implement, Verify, and Fix-loop subagents
+   so none of them re-derive it. Do not implement.
 2. **Implement** — Task, `model: "sonnet"`. Give the subagent: the plan steps, a
-   pointer to the wiki/orientation, and the coding guidelines (verbatim). Tell it
-   to make all the code changes in the working tree to satisfy the plan, then
-   self-verify by discovering and running the project's verification commands —
-   checking these sources in order: `package.json` scripts (look for `build`,
-   `typecheck`, `lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/` page that
-   mentions commands, and `Makefile`. Use whatever is present and appropriate for
-   this repo; do not hardcode specific command strings. Run each discovered
-   command and fix any errors that the edits introduced before returning. If no
-   build/test setup exists in this repo, note that and move on — do not fail. End
-   by reporting: the verification commands it ran and their pass/fail results,
-   which files it changed, and how each plan step was addressed. It must not
-   commit or push.
+   pointer to the wiki/orientation, the coding guidelines (verbatim), and the
+   explicit verification command list the orchestrator discovered in the Orient
+   step. Tell it to make all the code changes in the working tree to satisfy the
+   plan, then self-verify by running the verification commands the orchestrator
+   already discovered and passed in the task prompt. If the orchestrator did not
+   provide a list (e.g. could not determine commands confidently), fall back to
+   discovering them from the same sources: `package.json` scripts (look for
+   `build`, `typecheck`, `lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/`
+   page that mentions commands, and `Makefile`. Run each command and fix any
+   errors that the edits introduced before returning. If no build/test setup
+   exists in this repo, note that and move on — do not fail. End by reporting:
+   the verification commands it ran and their pass/fail results, which files it
+   changed, and how each plan step was addressed. It must not commit or push.
 3. **Verify (build & tests)** — Task, `model: "sonnet"`, read-only. This step
    gives the orchestrator an objective, independent build/test signal before the
    subjective AC and quality reviews. Tell the subagent to:
-   - Discover the project's verification commands from `package.json` scripts
-     (look for `build`, `typecheck`, `lint`, `test`), `CLAUDE.md`,
-     `.flumecode/wiki/` (any page that mentions commands), and `Makefile`. Use
-     what is present; do not hardcode specific command strings.
-   - Run each discovered command and record: the exact command, whether it passed
-     or failed, and — for any failure — a short excerpt of the failing output
-     (enough to diagnose the problem).
+   - Run the verification commands provided by the orchestrator in the task
+     prompt. If none were provided, fall back to discovering them from
+     `package.json` scripts (look for `build`, `typecheck`, `lint`, `test`),
+     `CLAUDE.md`, `.flumecode/wiki/` (any page that mentions commands), and
+     `Makefile`.
+   - Run each command and record: the exact command, whether it passed or failed,
+     and — for any failure — a short excerpt of the failing output (enough to
+     diagnose the problem).
    - If no build/test setup exists in this repo, say so explicitly and pass the
      gate.
    - Return a structured per-check result: command, pass/fail, failing-output
@@ -94,19 +104,18 @@ the next step.
    subagent the full AC list and tell it to verify each one against the actual
    changes (run `git --no-pager diff`, read the changed files, run tests/build if
    useful). For **each** AC it must return: the criterion text verbatim, a verdict
-   (**met / not met / unclear**), a one-or-two-sentence rationale, and — this is the
-   evidence the report needs — the **exact diff hunk(s)** that prove it, each tagged
-   with its file path (the hunks that prove it, copied verbatim from
-   `git --no-pager diff`, such that the union of every AC's evidence covers the
-   entire diff — each changed hunk cited under at least one criterion). A _met_ AC should cite at least one
-   hunk; _not met_ / _unclear_ may cite none. **Ground every verdict in the actual
-   diff:** a criterion may be marked _met_ only if `git --no-pager diff` really
-   contains the change that satisfies it, and each cited hunk must be copied verbatim
-   from that live output — never reconstructed from the plan or from what the
-   implement subagent claimed. If `git --no-pager diff` is empty, the implementation
-   produced no changes: no criterion may be _met_, and the review must say so. Tell it
-   to return this as a clean, structured list so you can hand it straight to the
-   report step. In addition to per-AC verdicts, cross-check that every hunk in `git --no-pager diff` is cited by at least one AC's evidence; report any uncovered hunk as a coverage gap (signalling a missing AC or an out-of-scope change).
+   (**met / not met / unclear**), a one-or-two-sentence rationale, and the relevant
+   file paths and change locations that support the verdict. A _met_ AC should cite
+   at least one [file/location]; _not met_ / _unclear_ may cite none. **Ground
+   every verdict in the actual diff:** a criterion may be marked _met_ only if
+   `git --no-pager diff` really contains the change that satisfies it — never
+   reconstruct from the plan or from what the implement subagent claimed. If
+   `git --no-pager diff` is empty, the implementation produced no changes: no
+   criterion may be _met_, and the review must say so. Tell it to return this as a
+   clean, structured list so you can hand it straight to the report step. In
+   addition to per-AC verdicts, cross-check `git --no-pager diff` against the ACs;
+   note any files or areas that appear changed but don't map to any AC as a coverage
+   gap (signalling a missing AC or an out-of-scope change).
 5. **Code-quality review** — Task, `model: "opus"`, read-only. Give the subagent
    the coding guidelines (verbatim) and tell it to review the changes for
@@ -117,26 +126,36 @@ the next step.
    review (step 4) reports any _not met_ AC, or the quality review (step 5)
    reports any _must-fix_ finding: spawn an **Implement/fix** subagent (Task,
    `model: "sonnet"`) whose prompt lists exactly those findings and tells it to
-   resolve them without regressing the rest. When a Verify failure triggered the
-   fix, include the failing command(s) and their error output excerpt(s) from the
-   Verify result in the fix subagent's prompt so it has the full context. After
-   each fix iteration, re-run the Verify step (step 3) in addition to any AC or
-   quality review that failed. Repeat at most **2** times. If something still
-   fails after that, stop looping and record the gap honestly in the report — do
-   not hide it.
-7. **Report** — Task, `model: "opus"`, read-only. Give the subagent the plan, the
-   Verify results (from step 3), the AC verdicts (from step 4), and the quality
-   findings, and tell it to run `git --no-pager diff` itself as the **single
-   source of truth** for the report. Every `evidence` hunk it submits must be
-   copied verbatim from that live diff — it must drop or correct any hunk carried
+   resolve them without regressing the rest. Include the verification command list
+   from the Orient step in the fix subagent's prompt (the same list passed to
+   Implement and Verify), so the fix subagent does not need to re-derive it. When
+   a Verify failure triggered the fix, include the failing command(s) and their
+   error output excerpt(s) from the Verify result in the fix subagent's prompt so
+   it has the full context. After each fix iteration, re-run the Verify step (step 3) in addition to any AC or quality review that failed. Repeat at most **2**
+   times. If something still fails after that, stop looping and record the gap
+   honestly in the report — do not hide it.
+7. **Report** — Task, `model: "opus"`, read-only. Give the subagent the AC
+   verdicts (with criterion text, from step 4), the Verify results (from step 3),
+   and the quality findings, and tell it to run `git --no-pager diff` itself as
+   the **single source of truth** for the report. Do not pass the full plan — the
+   AC verdicts carry each criterion verbatim, and the live `git --no-pager diff`
+   is the authoritative source for evidence; re-inlining the full plan is
+   redundant. Keep each subagent prompt to the minimal self-contained slice it
+   needs. Include the `# Technical Writing` section
+   (copied verbatim from this prompt) in the report subagent's prompt — the same
+   way `# Coding Guidelines` is forwarded to implementation subagents — so it
+   applies the inline-code conventions to all free-text fields it authors. Every `evidence` hunk it submits must be
+   copied verbatim from that live diff, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them) — it must drop or correct any hunk carried
    over from step 4 that no longer appears in the actual diff, and the **Files
    changed** list must come from `git --no-pager diff --stat`, not from what an
    earlier subagent claimed. Tell it to enumerate all hunks from `git --no-pager diff` and ensure each is attached to ≥1 AC's `evidence`; any hunk mapping to no plan AC goes under `## Caveats / follow-ups` as an explicit unattributed change. **If `git --no-pager diff` is empty, the
    implementation changed nothing:** the report must say so plainly — an honest
    `summary`, no AC marked `met` with evidence — and must never describe edits
    that aren't in the diff. Tell it to submit the user-facing report by calling
-   the **`submit_report`** tool — it has that tool available. It must call
+   the **`submit_report`** tool — it has that tool available. The report MUST be
+   submitted via `submit_report` (structured); final assistant prose is only a
+   last-resort fallback and will be flagged as an unverified AC review. It must call
    `submit_report` exactly once and must not edit any files.
 8. **Confirm and end.** Once the report subagent has called `submit_report`, you are
@@ -152,12 +171,14 @@ The report subagent calls `submit_report` with these fields:
 - **`filesChanged`** — markdown list of files changed (from the diff). Rendered under `## Files changed`.
 - **`codeQuality`** — the code-quality review outcome and anything left as nice-to-have. Rendered under `## Code quality`.
 - **`caveats`** — anything deferred, unmet, or worth a human's eyes, including diff hunks that map to no plan AC. Write 'None.' if nothing. Rendered under `## Caveats / follow-ups`.
-- **`acceptanceCriteria`** — one entry per AC from the plan, in plan order, each:
+- **`acceptanceCriteria`** — EXACTLY one entry per AC from the plan (same count and
+  order). The runner counts the plan's ACs and warns on any mismatch — do not merge,
+  split, drop, or invent criteria. Each entry:
   - `criterion` — the AC text verbatim.
   - `status` — `"met"` / `"not_met"` / `"unclear"`, mirroring the AC review.
   - `rationale` — one or two sentences on why the verdict holds.
   - `evidence` — an array of `{ file, hunk, note? }`, where `hunk` is copied
-    verbatim from the live `git --no-pager diff` and proves the verdict (`note`
+    verbatim from the live `git --no-pager diff`, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them), and proves the verdict (`note`
     optionally explains it). Never include a hunk that isn't in the actual diff. Cite
     the supporting hunk(s) for a met criterion; may be empty for not_met / unclear.
@@ -173,4 +194,10 @@ The report subagent calls `submit_report` with these fields:
   once — not as prose for you to echo. Each acceptance criterion carries the diff
   hunk(s) that prove its verdict, copied verbatim from the live `git --no-pager diff`
   — never fabricated. An empty diff means an honest "nothing changed" report.
+- The report MUST be submitted via `submit_report` (structured). Final assistant prose
+  is only a last-resort fallback and will be flagged as an unverified AC review by the
+  runner.
+- `acceptanceCriteria` must have EXACTLY one entry per plan acceptance criterion (same
+  count and order). The runner counts the plan's ACs and warns on any mismatch, so do
+  not merge, split, drop, or invent criteria.
 - The report exists so the human reviewer can verify each acceptance criterion is satisfied — the ACs and their diff evidence are the primary review surface.

package/skills-plugin/skills/lint-plugin-generator/SKILL.md CHANGED Viewed

@@ -3,7 +3,7 @@ name: lint-plugin-generator
 description: >-
   Generate a concrete plan to install the FlumeCode Lint plugin for THIS repo —
   a .flumecode/plugins/lint/ manifest wired to the pre-commit socket that runs
-  the repo's lint/format checks and reports a heartbeat.
+  the repo's lint/format checks.
 ---
 # lint-plugin-generator
@@ -37,60 +37,30 @@ instruct the implementer to create:
 {
   "key": "lint",
   "socket": "pre-commit",
-  "run": "node .flumecode/plugins/lint/run.mjs",
-  "heartbeat": {
-    "url": "https://<flumecode-base-url>/api/runner/plugins/heartbeat",
-    "token": "<repo-scoped-token>"
-  }
+  "run": "<detected lint/format/typecheck command chain>"
 }
 ```
-`url` and `token` are placeholders — note in the plan that the user must fill
-them in via the FlumeCode web UI after installation. The plan must **not**
-commit a real token value.
-### Artifact 2 — `.flumecode/plugins/lint/run.mjs`
-A Node.js ES module that:
-1. Reads `plugin.json` from the same directory to get `heartbeat.url` and `heartbeat.token`.
-2. Determines the current git branch (`git rev-parse --abbrev-ref HEAD`).
-3. Runs each detected lint/format/typecheck command with `child_process.execSync` (stdio: `inherit`).
-4. On success, POSTs to the heartbeat URL:
-   `{ repoId: process.env.FLUMECODE_REPO_ID, pluginKey: "lint", branch, status: "pass", timestamp: new Date().toISOString() }`
-   (`FLUMECODE_REPO_ID` — the runner will inject this in Plan 2; if not yet available, the heartbeat may omit repoId or read it from .flumecode/config.json)
-5. On any command failure, exits non-zero (and optionally POSTs `status: "fail"`).
-The `repoId` comes from the `FLUMECODE_REPO_ID` environment variable that the
-runner sets. The heartbeat request uses `Authorization: Bearer <token>`.
+Derive `run` from the repo's detected commands (e.g. `pnpm exec lint-staged && pnpm lint && pnpm typecheck && pnpm test`). Do not hard-code — include the actual commands discovered in the Orient step.
 ### Manifest shape
 The manifest `plugin.json` must have exactly these fields:
 ```
-{ key, socket, run, heartbeat: { url, token } }
+{ key, socket, run }
 ```
 This is the shape the FlumeCode plugin loader expects.
-### Heartbeat endpoint
-`POST /api/runner/plugins/heartbeat` with JSON body:
-`{ repoId, pluginKey, branch, status, timestamp }`
-(this endpoint does not exist yet — it will be created by Plan 2; include this as a step in the generated plan or as a prerequisite note)
 ### Acceptance criteria the plan must include
-- `.flumecode/plugins/lint/plugin.json` exists with `key: "lint"`, `socket: "pre-commit"`, `run: "node .flumecode/plugins/lint/run.mjs"`.
-- `.flumecode/plugins/lint/run.mjs` runs the repo's detected lint/format/typecheck commands and exits non-zero on any failure.
-- A successful run POSTs a heartbeat with `{ repoId, pluginKey: "lint", branch, status: "pass", timestamp }`.
+- `.flumecode/plugins/lint/plugin.json` exists with `key: "lint"`, `socket: "pre-commit"`, and `run` set to the detected command chain.
+- The `run` command exits non-zero on any lint/format/typecheck failure.
 ## Always
 - Stay read-only. Produce the plan via `submit_plan`; never edit files.
 - The plan must be specific enough for an `implement-plan` run to execute
   without re-deriving the commands — include the actual detected commands in
-  the step descriptions and pseudo code.
-- Leave `heartbeat.url` and `heartbeat.token` as placeholders — document that
-  the user fills them in via the FlumeCode web UI after installation.
+  the step descriptions and artifact content.

package/skills-plugin/skills/request-to-plan/SKILL.md CHANGED Viewed

@@ -87,6 +87,10 @@ Field-by-field guidance:
 - **`risks`** — anything that could change the approach or surface a problem.
 - **`outOfScope`** — what you are deliberately not doing.
+**Formatting.** Apply the `# Technical Writing` guidelines from the prompt to all
+free-text fields: wrap code identifiers (function names, variable names, type names,
+file names, commands, and flags) in inline backticks.
 Cite real files you inspected. Prefer the codebase's existing patterns over
 introducing new ones. Be specific enough that another agent could execute the
 plan without re-deriving it.

package/skills-plugin/skills/resolve-merge-conflict/SKILL.md CHANGED Viewed

@@ -95,4 +95,6 @@ before you finish. (You don't need to `git add`; the runner stages and commits f
 Your last message **is** the report posted to the session thread. Write it for the
 user: list which files conflicted and, briefly, how you resolved each, plus how you
-verified (build/tests). The runner appends the pull-request link, so don't add one.
+verified (build/tests). Wrap conflicted file names and code identifiers in inline
+backticks per the `# Technical Writing` section. The runner appends the pull-request
+link, so don't add one.

package/skills-plugin/skills/revise-implementation/SKILL.md CHANGED Viewed

@@ -60,8 +60,9 @@ essentials:
 - **Subagents start blank.** Each Task subagent sees only the prompt you give it —
   not this thread, the plan, or the prior report. Make every prompt self-contained:
   include the specific change requested, the relevant plan/report excerpt, the code
-  context, and the coding guidelines (verbatim, from the `# Coding Guidelines`
-  section in the prompt).
+  context, the coding guidelines (verbatim, from the `# Coding Guidelines` section
+  in the prompt), and — for the report subagent — the technical-writing guidelines
+  (verbatim, from the `# Technical Writing` section in the prompt).
 - **Scope the work to the request.** This is a fine-tune of an existing
   implementation, not a rebuild. Change only what the user asked for plus what that
   change strictly requires; don't regress the rest of the plan.