npm - @flumecode/runner - Versions diffs - 0.23.0 → 0.23.1 - Mend

@flumecode/runner 0.23.0 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/cli.js +125 -48
package/dist/mcp-stdio.js +33 -11
package/package.json +1 -1
package/skills-plugin/skills/implement-plan/SKILL.md +1 -0
package/skills-plugin/skills/request-to-plan/SKILL.md +10 -12
package/skills-plugin/skills/revise-implementation/SKILL.md +4 -3

package/dist/cli.js CHANGED Viewed

@@ -230,7 +230,7 @@ function NewRequestScreen({ config, repo, onCreated, onBack }) {
   const [title, setTitle] = useState4("");
   const [body, setBody] = useState4("");
   const [agentCursor, setAgentCursor] = useState4(0);
-  const [branch, setBranch] = useState4("");
+  const [branch, setBranch] = useState4(repo.defaultBranch ?? "");
   const [error, setError] = useState4(null);
   const [submitting, setSubmitting] = useState4(false);
   useInput3((_input, key) => {
@@ -447,12 +447,12 @@ import { useState as useState6 } from "react";
 import { Box as Box6, Text as Text6, useInput as useInput5 } from "ink";
 import TextInput3 from "ink-text-input";
 import { Fragment, jsx as jsx6, jsxs as jsxs6 } from "react/jsx-runtime";
-function ThreadScreen({ config, requestId, onBack }) {
+function ThreadScreen({ config, repo, requestId, onBack }) {
   const data = usePoll(() => getRequest(config, requestId), 3e3);
   const [cursor, setCursor] = useState6(0);
   const [overlay, setOverlay] = useState6({ kind: "none" });
-  const [targetBranch, setTargetBranch] = useState6("main");
-  const [mergeBranch, setMergeBranch] = useState6("");
+  const [targetBranch, setTargetBranch] = useState6(repo.defaultBranch ?? "main");
+  const [mergeBranch, setMergeBranch] = useState6(repo.defaultBranch ?? "");
   const [acceptStep, setAcceptStep] = useState6("target");
   const [error, setError] = useState6(null);
   const messages = data?.messages ?? [];
@@ -478,8 +478,8 @@ function ThreadScreen({ config, requestId, onBack }) {
         const widgets = selectedMsg.widgets;
         setOverlay({ kind: "widget", message: { ...selectedMsg, widgets, requestId } });
       } else if (selectedMsg.type === "plan") {
-        setTargetBranch("main");
-        setMergeBranch("");
+        setTargetBranch(repo.defaultBranch ?? "main");
+        setMergeBranch(repo.defaultBranch ?? "");
         setAcceptStep("target");
         setOverlay({ kind: "accept", messageId: selectedMsg.id });
       }
@@ -495,7 +495,7 @@ function ThreadScreen({ config, requestId, onBack }) {
         config,
         overlay.messageId,
         targetBranch.trim() || "main",
-        mergeBranch.trim() || "flumecode/session"
+        mergeBranch.trim()
       );
       setOverlay({ kind: "accepted" });
     } catch (err) {
@@ -620,6 +620,7 @@ function renderScreen(screen, setScreen, config) {
         ThreadScreen,
         {
           config,
+          repo: screen.repo,
           requestId: screen.requestId,
           onBack: () => setScreen({ name: "requests", repo: screen.repo })
         }
@@ -957,6 +958,14 @@ var stepSchema = z2.object({
     "Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
   )
 });
+var requirementSchema = z2.object({
+  requirement: z2.string().min(1).describe(
+    "A human-readable statement of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. " + INLINE_CODE_HINT
+  ),
+  acceptanceCriteria: z2.array(z2.string().min(1)).min(1).describe(
+    "Concrete, deterministically-checkable conditions that prove this requirement is satisfied. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. " + INLINE_CODE_HINT
+  )
+});
 var planInputSchema = {
   title: z2.string().min(1).max(120).describe(
     "A concise, descriptive name for THIS plan. Must be distinct from the request title and from any sibling plans on the same request. Keep it under 120 characters."
@@ -970,13 +979,10 @@ var planInputSchema = {
     "Why the user is making this request \u2014 the underlying motivation or problem the change addresses. Fill this especially when the request content/context does NOT already state the why (ask the user in the Clarify phase); omit when there is no additional motivation to record. Useful for future understanding of the system. " + INLINE_CODE_HINT
   ),
   assumptions: z2.array(z2.string()).describe("Anything decided during planning, including unanswered defaults."),
-  requirements: z2.array(z2.string().min(1)).min(1).describe(
-    "Required, human-readable statements of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. At least 1 required. " + INLINE_CODE_HINT
+  requirements: z2.array(requirementSchema).min(1).describe(
+    "Required, human-readable statements of what this change must accomplish and why, each carrying its own acceptanceCriteria. At least 1 requirement required; at least 2 acceptance criteria total across all requirements. " + INLINE_CODE_HINT
   ),
   steps: z2.array(stepSchema).min(1).describe("Ordered list of changes. Each step says what and why, with file references."),
-  acceptanceCriteria: z2.array(z2.string().min(1)).min(2).describe(
-    "Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required. " + INLINE_CODE_HINT
-  ),
   risks: z2.array(z2.string()).describe("Anything that could change the approach."),
   outOfScope: z2.array(z2.string()).describe("What is deliberately not being done.")
 };
@@ -991,7 +997,21 @@ function requireRootCauseForFix(schema) {
     }
   });
 }
-var planSchema = requireRootCauseForFix(z2.object(planInputSchema));
+function requireAtLeastTwoCriteria(schema) {
+  return schema.superRefine((plan, ctx) => {
+    const total = plan.requirements.reduce((sum, r) => sum + r.acceptanceCriteria.length, 0);
+    if (total < 2) {
+      ctx.addIssue({
+        code: z2.ZodIssueCode.custom,
+        path: ["requirements"],
+        message: "At least 2 acceptance criteria total across all requirements are required."
+      });
+    }
+  });
+}
+var planSchema = requireAtLeastTwoCriteria(
+  requireRootCauseForFix(z2.object(planInputSchema))
+);
 function renderPlan(plan) {
   const lines2 = [];
   lines2.push(`# ${plan.title}`);
@@ -1018,8 +1038,8 @@ function renderPlan(plan) {
   }
   lines2.push("");
   lines2.push("## Requirements");
-  for (const requirement of plan.requirements) {
-    lines2.push(`- ${requirement}`);
+  for (const req of plan.requirements) {
+    lines2.push(`- ${req.requirement}`);
   }
   lines2.push("");
   lines2.push("## Steps");
@@ -1044,8 +1064,11 @@ function renderPlan(plan) {
   }
   lines2.push("");
   lines2.push("## Acceptance criteria");
-  for (const criterion of plan.acceptanceCriteria) {
-    lines2.push(`- [ ] ${criterion}`);
+  for (const req of plan.requirements) {
+    lines2.push(`### ${req.requirement}`);
+    for (const criterion of req.acceptanceCriteria) {
+      lines2.push(`- [ ] ${criterion}`);
+    }
   }
   if (plan.risks.length > 0) {
     lines2.push("");
@@ -1066,7 +1089,7 @@ function renderPlan(plan) {
   return lines2.join("\n");
 }
 var submitPlanInputSchema = {
-  plans: z2.array(requireRootCauseForFix(z2.object(planInputSchema))).min(1).refine(
+  plans: z2.array(requireAtLeastTwoCriteria(requireRootCauseForFix(z2.object(planInputSchema)))).min(1).refine(
     (arr) => {
       const titles = arr.map((p) => p.title.trim()).filter((t) => t.length > 0);
       return new Set(titles).size === titles.length;
@@ -1079,7 +1102,7 @@ function createPlanTooling() {
   let renderedPlans = null;
   const submitPlan = tool2(
     SUBMIT_PLAN,
-    `Submit ALL your plans in a single call \u2014 one entry per plan; each becomes its own independently-acceptable Accept-as-plan draft. Do NOT call submit_plan more than once. acceptanceCriteria is required in each plan and must contain at least 2 observable, verifiable conditions. The 'title' field names each specific plan \u2014 make it concise and distinct from the request title and from sibling plan titles. requirements is required in each plan: at least 1 plain-language statement of what the change must accomplish and why (human-readable intent), separate from the machine-checkable acceptanceCriteria. When a plan's scope is "fix", rootCause is required: a non-empty explanation of the underlying cause of the bug (not just the symptom). motivation is optional: the user's stated or asked-for reason for the request. `,
+    `Submit ALL your plans in a single call \u2014 one entry per plan; each becomes its own independently-acceptable Accept-as-plan draft. Do NOT call submit_plan more than once. requirements is required in each plan: an array of objects, each with a requirement (plain-language intent) and an acceptanceCriteria array (machine-checkable proof for that requirement). At least 1 requirement required; at least 2 acceptance criteria total across all requirements. Each requirement's acceptanceCriteria must be non-empty (at least 1 item). The 'title' field names each specific plan \u2014 make it concise and distinct from the request title and from sibling plan titles. When a plan's scope is "fix", rootCause is required: a non-empty explanation of the underlying cause of the bug (not just the symptom). motivation is optional: the user's stated or asked-for reason for the request. `,
     submitPlanInputSchema,
     async (args) => {
       const parsed = submitPlanSchema.parse(args);
@@ -1147,6 +1170,9 @@ var acVerdictSchema = z3.object({
   rationale: z3.string().min(1).describe("One or two sentences on why the verdict holds. " + INLINE_CODE_HINT),
   evidence: z3.array(evidenceSchema).describe(
     "Diff hunks proving the verdict, copied verbatim from git --no-pager diff. Across ALL criteria the evidence must collectively cover every hunk in the diff \u2014 each changed hunk appears under at least one criterion. Cite the relevant hunk(s) for a met criterion; may be empty for not_met / unclear."
+  ),
+  requirement: z3.string().min(1).optional().describe(
+    "The verbatim requirement text from the plan that this criterion appears under. Used for grouping criteria by requirement in the rendered report. Optional \u2014 omit for resolve runs (no plan) and legacy plans without grouping."
   )
 });
 var reportInputSchema = {
@@ -1171,24 +1197,47 @@ var reportInputSchema = {
   )
 };
 var reportSchema = z3.object(reportInputSchema);
+function groupByRequirement(criteria) {
+  const groups = /* @__PURE__ */ new Map();
+  for (const ac of criteria) {
+    const key = ac.requirement ?? null;
+    const existing = groups.get(key);
+    if (existing) {
+      existing.push(ac);
+    } else {
+      groups.set(key, [ac]);
+    }
+  }
+  return groups;
+}
+function renderAcVerdict(lines2, ac) {
+  lines2.push("");
+  lines2.push(`#### ${STATUS_ICON[ac.status]} ${ac.criterion}`);
+  lines2.push("");
+  lines2.push(ac.rationale.trim());
+  for (const ev of ac.evidence) {
+    lines2.push("");
+    lines2.push(ev.note ? `\`${ev.file}\` \u2014 ${ev.note}` : `\`${ev.file}\``);
+    lines2.push("");
+    lines2.push("```diff");
+    lines2.push(ev.hunk.replace(/\n+$/, ""));
+    lines2.push("```");
+  }
+}
 function renderReport(report) {
   const lines2 = [];
   lines2.push(report.summary.trim());
   lines2.push("", "## Files changed", "", report.filesChanged.trim());
   if (report.acceptanceCriteria.length > 0) {
     lines2.push("", "## Acceptance criteria");
-    for (const ac of report.acceptanceCriteria) {
-      lines2.push("");
-      lines2.push(`### ${STATUS_ICON[ac.status]} ${ac.criterion}`);
-      lines2.push("");
-      lines2.push(ac.rationale.trim());
-      for (const ev of ac.evidence) {
-        lines2.push("");
-        lines2.push(ev.note ? `\`${ev.file}\` \u2014 ${ev.note}` : `\`${ev.file}\``);
+    const groups = groupByRequirement(report.acceptanceCriteria);
+    for (const [req, acs] of groups) {
+      if (req !== null) {
         lines2.push("");
-        lines2.push("```diff");
-        lines2.push(ev.hunk.replace(/\n+$/, ""));
-        lines2.push("```");
+        lines2.push(`### ${req}`);
+      }
+      for (const ac of acs) {
+        renderAcVerdict(lines2, ac);
       }
     }
   }
@@ -1212,7 +1261,7 @@ function createReportTooling() {
   let submittedReport = null;
   const submitReport = tool3(
     SUBMIT_REPORT,
-    "Submit the final implementation report as structured data. Call this exactly once, at the end of the run. `acceptanceCriteria` must contain one entry per plan criterion, each with a met / not_met / unclear verdict and the diff hunk(s) that prove it. `summary`, `filesChanged`, `codeQuality`, and `caveats` are the four named markdown sections. `cicd` (optional) holds Verify-phase check results (one entry per command with `command`, `status` `passed`/`failed`, and `output` on failure); omit when no verification setup exists. Do NOT include a PR link \u2014 the runner appends it.",
+    "Submit the final implementation report as structured data. Call this exactly once, at the end of the run. `acceptanceCriteria` must contain one entry per plan criterion (same count and order), each with a met / not_met / unclear verdict and the diff hunk(s) that prove it. Set `requirement` on each entry to the verbatim text of the `### <requirement>` heading the criterion appeared under in the plan \u2014 this groups criteria by requirement in the rendered report. Omit `requirement` for resolve runs (no plan) or when the plan has no requirement headings. `summary`, `filesChanged`, `codeQuality`, and `caveats` are the four named markdown sections. `cicd` (optional) holds Verify-phase check results (one entry per command with `command`, `status` `passed`/`failed`, and `output` on failure); omit when no verification setup exists. Do NOT include a PR link \u2014 the runner appends it.",
     reportInputSchema,
     async (args) => {
       submittedReport = reportSchema.parse(args);
@@ -1945,6 +1994,13 @@ async function gitDiffStat(dir) {
   const { stdout: stdout2 } = await git(["-C", dir, "--no-pager", "diff", "--stat"]);
   return stdout2;
 }
+async function captureFullDiff(ctx, dir) {
+  const { mergeBranch } = ctx.repo;
+  if (!mergeBranch) return "";
+  await git(["-C", dir, "fetch", "--quiet", "origin", mergeBranch]);
+  const { stdout: stdout2 } = await git(["-C", dir, "--no-pager", "diff", "FETCH_HEAD...HEAD"]);
+  return stdout2;
+}
 var PreCommitError = class extends Error {
   constructor(log) {
     super("pre-commit checks failed");
@@ -2159,6 +2215,23 @@ var UI_EXTENSIONS = /* @__PURE__ */ new Set([
   ".sass",
   ".less"
 ]);
+async function buildPreview(ctx, dir, committedFileNames, config, abort) {
+  if (!changedFilesTouchUi(committedFileNames)) {
+    return { status: "skipped", reason: "No UI files were changed in this run." };
+  }
+  try {
+    const { previewId, entrypoint } = await runPreviewPass(
+      ctx,
+      dir,
+      committedFileNames,
+      config,
+      abort
+    );
+    return { status: "ready", previewId, entrypoint };
+  } catch (err) {
+    return { status: "failed", reason: err instanceof Error ? err.message : String(err) };
+  }
+}
 function changedFilesTouchUi(stat) {
   for (const line of stat.split("\n")) {
     const trimmed = line.trim();
@@ -2686,18 +2759,26 @@ ${reply}`;
     rebase: !resumed
   });
   reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, documented, autoMerged });
-  const finalReport = report && conflictResolution ? { ...report, conflictResolution } : report;
+  let fullDiff;
+  if (report && outcome.kind !== "none") {
+    try {
+      fullDiff = (await captureFullDiff(ctx, dir)).trim() || void 0;
+    } catch (err) {
+      console.warn(`  full-diff capture skipped: ${errorMessage2(err)}`);
+    }
+  }
+  const finalReport = report ? {
+    ...report,
+    ...conflictResolution ? { conflictResolution } : {},
+    ...fullDiff ? { fullDiff } : {}
+  } : report;
   let preview;
   if (outcome.kind !== "none") {
+    console.log(`  \u2026checking UI preview for implement ${ctx.jobId}`);
     const committedFileNames = await gitCommittedFiles(dir);
-    if (changedFilesTouchUi(committedFileNames)) {
-      try {
-        console.log(`  \u2026generating UI preview for implement ${ctx.jobId}`);
-        preview = await runPreviewPass(ctx, dir, committedFileNames, config, abort);
-      } catch (err) {
-        console.warn(`  preview skipped: ${errorMessage2(err)}`);
-      }
-    }
+    preview = await buildPreview(ctx, dir, committedFileNames, config, abort);
+    if (preview.status !== "ready")
+      console.log(`  UI preview ${preview.status}: ${preview.reason}`);
   }
   return {
     text: reply,
@@ -2760,15 +2841,11 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
   const finalReport = report && conflictResolution ? { ...report, conflictResolution } : report;
   let preview;
   if (outcome.kind !== "none") {
+    console.log(`  \u2026checking UI preview for revise ${ctx.jobId}`);
     const committedFileNames = await gitCommittedFiles(dir);
-    if (changedFilesTouchUi(committedFileNames)) {
-      try {
-        console.log(`  \u2026generating UI preview for revise ${ctx.jobId}`);
-        preview = await runPreviewPass(ctx, dir, committedFileNames, config, abort);
-      } catch (err) {
-        console.warn(`  preview skipped: ${errorMessage2(err)}`);
-      }
-    }
+    preview = await buildPreview(ctx, dir, committedFileNames, config, abort);
+    if (preview.status !== "ready")
+      console.log(`  UI preview ${preview.status}: ${preview.reason}`);
   }
   return {
     text: reply,

package/dist/mcp-stdio.js CHANGED Viewed

@@ -57,6 +57,14 @@ var stepSchema = z.object({
     "Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
   )
 });
+var requirementSchema = z.object({
+  requirement: z.string().min(1).describe(
+    "A human-readable statement of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. " + INLINE_CODE_HINT
+  ),
+  acceptanceCriteria: z.array(z.string().min(1)).min(1).describe(
+    "Concrete, deterministically-checkable conditions that prove this requirement is satisfied. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. " + INLINE_CODE_HINT
+  )
+});
 var planInputSchema = {
   title: z.string().min(1).max(120).describe(
     "A concise, descriptive name for THIS plan. Must be distinct from the request title and from any sibling plans on the same request. Keep it under 120 characters."
@@ -70,13 +78,10 @@ var planInputSchema = {
     "Why the user is making this request \u2014 the underlying motivation or problem the change addresses. Fill this especially when the request content/context does NOT already state the why (ask the user in the Clarify phase); omit when there is no additional motivation to record. Useful for future understanding of the system. " + INLINE_CODE_HINT
   ),
   assumptions: z.array(z.string()).describe("Anything decided during planning, including unanswered defaults."),
-  requirements: z.array(z.string().min(1)).min(1).describe(
-    "Required, human-readable statements of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. At least 1 required. " + INLINE_CODE_HINT
+  requirements: z.array(requirementSchema).min(1).describe(
+    "Required, human-readable statements of what this change must accomplish and why, each carrying its own acceptanceCriteria. At least 1 requirement required; at least 2 acceptance criteria total across all requirements. " + INLINE_CODE_HINT
   ),
   steps: z.array(stepSchema).min(1).describe("Ordered list of changes. Each step says what and why, with file references."),
-  acceptanceCriteria: z.array(z.string().min(1)).min(2).describe(
-    "Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required. " + INLINE_CODE_HINT
-  ),
   risks: z.array(z.string()).describe("Anything that could change the approach."),
   outOfScope: z.array(z.string()).describe("What is deliberately not being done.")
 };
@@ -91,7 +96,21 @@ function requireRootCauseForFix(schema) {
     }
   });
 }
-var planSchema = requireRootCauseForFix(z.object(planInputSchema));
+function requireAtLeastTwoCriteria(schema) {
+  return schema.superRefine((plan, ctx) => {
+    const total = plan.requirements.reduce((sum, r) => sum + r.acceptanceCriteria.length, 0);
+    if (total < 2) {
+      ctx.addIssue({
+        code: z.ZodIssueCode.custom,
+        path: ["requirements"],
+        message: "At least 2 acceptance criteria total across all requirements are required."
+      });
+    }
+  });
+}
+var planSchema = requireAtLeastTwoCriteria(
+  requireRootCauseForFix(z.object(planInputSchema))
+);
 function renderPlan(plan) {
   const lines = [];
   lines.push(`# ${plan.title}`);
@@ -118,8 +137,8 @@ function renderPlan(plan) {
   }
   lines.push("");
   lines.push("## Requirements");
-  for (const requirement of plan.requirements) {
-    lines.push(`- ${requirement}`);
+  for (const req of plan.requirements) {
+    lines.push(`- ${req.requirement}`);
   }
   lines.push("");
   lines.push("## Steps");
@@ -144,8 +163,11 @@ function renderPlan(plan) {
   }
   lines.push("");
   lines.push("## Acceptance criteria");
-  for (const criterion of plan.acceptanceCriteria) {
-    lines.push(`- [ ] ${criterion}`);
+  for (const req of plan.requirements) {
+    lines.push(`### ${req.requirement}`);
+    for (const criterion of req.acceptanceCriteria) {
+      lines.push(`- [ ] ${criterion}`);
+    }
   }
   if (plan.risks.length > 0) {
     lines.push("");
@@ -166,7 +188,7 @@ function renderPlan(plan) {
   return lines.join("\n");
 }
 var submitPlanInputSchema = {
-  plans: z.array(requireRootCauseForFix(z.object(planInputSchema))).min(1).refine(
+  plans: z.array(requireAtLeastTwoCriteria(requireRootCauseForFix(z.object(planInputSchema)))).min(1).refine(
     (arr) => {
       const titles = arr.map((p) => p.title.trim()).filter((t) => t.length > 0);
       return new Set(titles).size === titles.length;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@flumecode/runner",
-  "version": "0.23.0",
+  "version": "0.23.1",
   "type": "module",
   "description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
   "bin": {

package/skills-plugin/skills/implement-plan/SKILL.md CHANGED Viewed

@@ -184,6 +184,7 @@ The report subagent calls `submit_report` with these fields:
     verbatim from the live `git --no-pager diff`, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them), and proves the verdict (`note`
     optionally explains it). Never include a hunk that isn't in the actual diff. Cite
     the supporting hunk(s) for a met criterion; may be empty for not_met / unclear.
+  - `requirement` — the verbatim text of the `### <requirement>` heading this criterion appeared under in the plan (from the `## Acceptance criteria` section). Set this for every criterion from a structured plan. Omit only for resolve runs (no plan) or legacy plans without requirement headings.
 - **`cicd`** (optional) — array of Verify-phase check results. Each entry: `command` (exact command run), `status` (`"passed"` / `"failed"`), `output` (short failing-output excerpt, on failure only). Omit when the repo has no verification setup. Rendered under `## CI/CD`. A failing check does not block the report.
 ## Always

package/skills-plugin/skills/request-to-plan/SKILL.md CHANGED Viewed

@@ -72,18 +72,11 @@ Field-by-field guidance:
 - **`motivation`** — optional. The user's stated or asked-for reason for making this request — the underlying motivation or problem the change addresses. Fill this when the request content/context does NOT already state the why (ask during Phase 1 — Clarify if needed); omit when there is no additional motivation to record. Useful for future understanding of the system.
 - **`assumptions`** — anything you decided during investigation (including
   unanswered defaults from Phase 1).
-- **`requirements`** — **required; at least 1 item.** Plain-language statements of what this change must accomplish and why, written so a non-technical reader can follow them. Distinct from `acceptanceCriteria`: requirements explain intent and rationale; acceptance criteria are the machine-checkable proof. At least 1 item required.
-- **`steps`** — an ordered list. For each step provide:
-  - **`title`** — a concise imperative phrase naming the step (e.g. "Add submit_plan schema to plan.ts").
-  - **`description`** — an array of bullet points that help the reviewer understand the upcoming `pseudoCode` and decide whether the plan and design are correct. Each item is a distinct, self-contained point about what is changing and why — not a single paragraph, and not a line-by-line restatement of the pseudo code. Use concrete file references (`path/to/file.ts`) and name the functions/symbols involved. Apply inline-code formatting to all identifiers.
-  - **`pseudoCode`** — an array of `{ file, pseudoCode }` entries. Provide an entry for every file the step touches **except** documentation files (SKILL.md, README.md, wiki pages, etc.). `pseudoCode` is optional in the schema but expected for all non-documentation files. Each entry names the file path and contains pseudo code that precisely describes the changes to make in that file.
-- **`acceptanceCriteria`** — **required; at least 2 items.** Each criterion must
-  be a concrete, deterministically-checkable condition that a third party can verify
-  without knowing the author's intent. Write each as a trigger/precondition and the
-  exact observable result: `run X → output Y`, `file Z contains W`, `calling f(a) returns b`.
-  No vague adjectives (`robust`, `clean`, `properly`, `works correctly`). The set
-  must be **collectively exhaustive** — every step's intended change is covered by
-  at least one AC. Do **not** restate a step as a criterion.
+- **`requirements`** — **required; at least 1 item.** An array of objects, each with:
+  - **`requirement`** — a plain-language statement of what this change must accomplish and why, written so a non-technical reader can follow it. Distinct from `acceptanceCriteria`: requirements explain intent and rationale; acceptance criteria are the machine-checkable proof.
+  - **`acceptanceCriteria`** — **required; at least 1 item per requirement.** Concrete, deterministically-checkable conditions that prove this specific requirement is satisfied. The total count across all requirements must be **at least 2**. Write each criterion as a trigger/precondition and the exact observable result: `run X → output Y`, `file Z contains W`, `calling f(a) returns b`. No vague adjectives. Every step's intended change must be covered by at least one AC.
+  The link between each requirement and its criteria is established at plan time. When the agent reports back, each `submit_report` verdict must carry the `requirement` field identifying which `### <requirement>` heading the criterion appeared under in the plan.
   **Good vs bad examples:**
   - ✅ `grep -rn "What changed" apps/runner/src/report.ts` produces no matches.
@@ -91,6 +84,11 @@ Field-by-field guidance:
   - ✅ `pnpm test` in the repo root exits 0 and report.test.ts output contains no failures.
   - ❌ Tests pass correctly. _(no trigger, no observable result)_
+- **`steps`** — an ordered list. For each step provide:
+  - **`title`** — a concise imperative phrase naming the step (e.g. "Add submit_plan schema to plan.ts").
+  - **`description`** — an array of bullet points that help the reviewer understand the upcoming `pseudoCode` and decide whether the plan and design are correct. Each item is a distinct, self-contained point about what is changing and why — not a single paragraph, and not a line-by-line restatement of the pseudo code. Use concrete file references (`path/to/file.ts`) and name the functions/symbols involved. Apply inline-code formatting to all identifiers.
+  - **`pseudoCode`** — an array of `{ file, pseudoCode }` entries. Provide an entry for every file the step touches **except** documentation files (SKILL.md, README.md, wiki pages, etc.). `pseudoCode` is optional in the schema but expected for all non-documentation files. Each entry names the file path and contains pseudo code that precisely describes the changes to make in that file.
 - **`risks`** — anything that could change the approach or surface a problem.
 - **`outOfScope`** — what you are deliberately not doing.

package/skills-plugin/skills/revise-implementation/SKILL.md CHANGED Viewed

@@ -41,7 +41,7 @@ actual code. Pick exactly one:
 - **Re-plan** — the request meaningfully changes scope or direction, enough that a
   fresh plan should be agreed before building. Call **`submit_plan`** with a `plans[]` array
   containing the revised structured fields (same per-plan shape as the request-to-plan skill:
-  `scope`, `goal`, `assumptions`, `requirements` — at least 1 —, `steps`, `acceptanceCriteria` — at least 2 —, `risks`,
+  `scope`, `goal`, `assumptions`, `requirements` — at least 1, each with its own `acceptanceCriteria` array; at least 2 criteria total across all requirements —, `steps`, `risks`,
   `outOfScope`). Include only one entry for a revise turn. The runner posts it as a revision
   the user can accept; make no code changes this turn.
 - **Implement** — the request is clear and reasonable. Make the change (via
@@ -82,8 +82,9 @@ user:
 - **Implemented:** call **`submit_report`** with the structured report, exactly as
   `implement-plan` does. Include one `acceptanceCriteria` entry per plan AC (with a
-  met / not_met / unclear verdict and the diff hunk(s) that prove it), plus the four
-  required markdown sections (`summary`, `filesChanged`, `codeQuality`, `caveats`).
+  met / not_met / unclear verdict, the diff hunk(s) that prove it, and a `requirement`
+  field set to the verbatim requirement heading the criterion appeared under in the plan),
+  plus the four required markdown sections (`summary`, `filesChanged`, `codeQuality`, `caveats`).
   Include `cicd` from the Verify results (one entry per check, same shape as
   `implement-plan`; omit when no verification setup).
   Base `filesChanged` and evidence on the actual `git --no-pager diff`, not on what