@flumecode/runner 0.23.0 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -230,7 +230,7 @@ function NewRequestScreen({ config, repo, onCreated, onBack }) {
230
230
  const [title, setTitle] = useState4("");
231
231
  const [body, setBody] = useState4("");
232
232
  const [agentCursor, setAgentCursor] = useState4(0);
233
- const [branch, setBranch] = useState4("");
233
+ const [branch, setBranch] = useState4(repo.defaultBranch ?? "");
234
234
  const [error, setError] = useState4(null);
235
235
  const [submitting, setSubmitting] = useState4(false);
236
236
  useInput3((_input, key) => {
@@ -447,12 +447,12 @@ import { useState as useState6 } from "react";
447
447
  import { Box as Box6, Text as Text6, useInput as useInput5 } from "ink";
448
448
  import TextInput3 from "ink-text-input";
449
449
  import { Fragment, jsx as jsx6, jsxs as jsxs6 } from "react/jsx-runtime";
450
- function ThreadScreen({ config, requestId, onBack }) {
450
+ function ThreadScreen({ config, repo, requestId, onBack }) {
451
451
  const data = usePoll(() => getRequest(config, requestId), 3e3);
452
452
  const [cursor, setCursor] = useState6(0);
453
453
  const [overlay, setOverlay] = useState6({ kind: "none" });
454
- const [targetBranch, setTargetBranch] = useState6("main");
455
- const [mergeBranch, setMergeBranch] = useState6("");
454
+ const [targetBranch, setTargetBranch] = useState6(repo.defaultBranch ?? "main");
455
+ const [mergeBranch, setMergeBranch] = useState6(repo.defaultBranch ?? "");
456
456
  const [acceptStep, setAcceptStep] = useState6("target");
457
457
  const [error, setError] = useState6(null);
458
458
  const messages = data?.messages ?? [];
@@ -478,8 +478,8 @@ function ThreadScreen({ config, requestId, onBack }) {
478
478
  const widgets = selectedMsg.widgets;
479
479
  setOverlay({ kind: "widget", message: { ...selectedMsg, widgets, requestId } });
480
480
  } else if (selectedMsg.type === "plan") {
481
- setTargetBranch("main");
482
- setMergeBranch("");
481
+ setTargetBranch(repo.defaultBranch ?? "main");
482
+ setMergeBranch(repo.defaultBranch ?? "");
483
483
  setAcceptStep("target");
484
484
  setOverlay({ kind: "accept", messageId: selectedMsg.id });
485
485
  }
@@ -495,7 +495,7 @@ function ThreadScreen({ config, requestId, onBack }) {
495
495
  config,
496
496
  overlay.messageId,
497
497
  targetBranch.trim() || "main",
498
- mergeBranch.trim() || "flumecode/session"
498
+ mergeBranch.trim()
499
499
  );
500
500
  setOverlay({ kind: "accepted" });
501
501
  } catch (err) {
@@ -620,6 +620,7 @@ function renderScreen(screen, setScreen, config) {
620
620
  ThreadScreen,
621
621
  {
622
622
  config,
623
+ repo: screen.repo,
623
624
  requestId: screen.requestId,
624
625
  onBack: () => setScreen({ name: "requests", repo: screen.repo })
625
626
  }
@@ -957,6 +958,14 @@ var stepSchema = z2.object({
957
958
  "Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
958
959
  )
959
960
  });
961
+ var requirementSchema = z2.object({
962
+ requirement: z2.string().min(1).describe(
963
+ "A human-readable statement of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. " + INLINE_CODE_HINT
964
+ ),
965
+ acceptanceCriteria: z2.array(z2.string().min(1)).min(1).describe(
966
+ "Concrete, deterministically-checkable conditions that prove this requirement is satisfied. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. " + INLINE_CODE_HINT
967
+ )
968
+ });
960
969
  var planInputSchema = {
961
970
  title: z2.string().min(1).max(120).describe(
962
971
  "A concise, descriptive name for THIS plan. Must be distinct from the request title and from any sibling plans on the same request. Keep it under 120 characters."
@@ -970,13 +979,10 @@ var planInputSchema = {
970
979
  "Why the user is making this request \u2014 the underlying motivation or problem the change addresses. Fill this especially when the request content/context does NOT already state the why (ask the user in the Clarify phase); omit when there is no additional motivation to record. Useful for future understanding of the system. " + INLINE_CODE_HINT
971
980
  ),
972
981
  assumptions: z2.array(z2.string()).describe("Anything decided during planning, including unanswered defaults."),
973
- requirements: z2.array(z2.string().min(1)).min(1).describe(
974
- "Required, human-readable statements of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. At least 1 required. " + INLINE_CODE_HINT
982
+ requirements: z2.array(requirementSchema).min(1).describe(
983
+ "Required, human-readable statements of what this change must accomplish and why, each carrying its own acceptanceCriteria. At least 1 requirement required; at least 2 acceptance criteria total across all requirements. " + INLINE_CODE_HINT
975
984
  ),
976
985
  steps: z2.array(stepSchema).min(1).describe("Ordered list of changes. Each step says what and why, with file references."),
977
- acceptanceCriteria: z2.array(z2.string().min(1)).min(2).describe(
978
- "Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required. " + INLINE_CODE_HINT
979
- ),
980
986
  risks: z2.array(z2.string()).describe("Anything that could change the approach."),
981
987
  outOfScope: z2.array(z2.string()).describe("What is deliberately not being done.")
982
988
  };
@@ -991,7 +997,21 @@ function requireRootCauseForFix(schema) {
991
997
  }
992
998
  });
993
999
  }
994
- var planSchema = requireRootCauseForFix(z2.object(planInputSchema));
1000
+ function requireAtLeastTwoCriteria(schema) {
1001
+ return schema.superRefine((plan, ctx) => {
1002
+ const total = plan.requirements.reduce((sum, r) => sum + r.acceptanceCriteria.length, 0);
1003
+ if (total < 2) {
1004
+ ctx.addIssue({
1005
+ code: z2.ZodIssueCode.custom,
1006
+ path: ["requirements"],
1007
+ message: "At least 2 acceptance criteria total across all requirements are required."
1008
+ });
1009
+ }
1010
+ });
1011
+ }
1012
+ var planSchema = requireAtLeastTwoCriteria(
1013
+ requireRootCauseForFix(z2.object(planInputSchema))
1014
+ );
995
1015
  function renderPlan(plan) {
996
1016
  const lines2 = [];
997
1017
  lines2.push(`# ${plan.title}`);
@@ -1018,8 +1038,8 @@ function renderPlan(plan) {
1018
1038
  }
1019
1039
  lines2.push("");
1020
1040
  lines2.push("## Requirements");
1021
- for (const requirement of plan.requirements) {
1022
- lines2.push(`- ${requirement}`);
1041
+ for (const req of plan.requirements) {
1042
+ lines2.push(`- ${req.requirement}`);
1023
1043
  }
1024
1044
  lines2.push("");
1025
1045
  lines2.push("## Steps");
@@ -1044,8 +1064,11 @@ function renderPlan(plan) {
1044
1064
  }
1045
1065
  lines2.push("");
1046
1066
  lines2.push("## Acceptance criteria");
1047
- for (const criterion of plan.acceptanceCriteria) {
1048
- lines2.push(`- [ ] ${criterion}`);
1067
+ for (const req of plan.requirements) {
1068
+ lines2.push(`### ${req.requirement}`);
1069
+ for (const criterion of req.acceptanceCriteria) {
1070
+ lines2.push(`- [ ] ${criterion}`);
1071
+ }
1049
1072
  }
1050
1073
  if (plan.risks.length > 0) {
1051
1074
  lines2.push("");
@@ -1066,7 +1089,7 @@ function renderPlan(plan) {
1066
1089
  return lines2.join("\n");
1067
1090
  }
1068
1091
  var submitPlanInputSchema = {
1069
- plans: z2.array(requireRootCauseForFix(z2.object(planInputSchema))).min(1).refine(
1092
+ plans: z2.array(requireAtLeastTwoCriteria(requireRootCauseForFix(z2.object(planInputSchema)))).min(1).refine(
1070
1093
  (arr) => {
1071
1094
  const titles = arr.map((p) => p.title.trim()).filter((t) => t.length > 0);
1072
1095
  return new Set(titles).size === titles.length;
@@ -1079,7 +1102,7 @@ function createPlanTooling() {
1079
1102
  let renderedPlans = null;
1080
1103
  const submitPlan = tool2(
1081
1104
  SUBMIT_PLAN,
1082
- `Submit ALL your plans in a single call \u2014 one entry per plan; each becomes its own independently-acceptable Accept-as-plan draft. Do NOT call submit_plan more than once. acceptanceCriteria is required in each plan and must contain at least 2 observable, verifiable conditions. The 'title' field names each specific plan \u2014 make it concise and distinct from the request title and from sibling plan titles. requirements is required in each plan: at least 1 plain-language statement of what the change must accomplish and why (human-readable intent), separate from the machine-checkable acceptanceCriteria. When a plan's scope is "fix", rootCause is required: a non-empty explanation of the underlying cause of the bug (not just the symptom). motivation is optional: the user's stated or asked-for reason for the request. `,
1105
+ `Submit ALL your plans in a single call \u2014 one entry per plan; each becomes its own independently-acceptable Accept-as-plan draft. Do NOT call submit_plan more than once. requirements is required in each plan: an array of objects, each with a requirement (plain-language intent) and an acceptanceCriteria array (machine-checkable proof for that requirement). At least 1 requirement required; at least 2 acceptance criteria total across all requirements. Each requirement's acceptanceCriteria must be non-empty (at least 1 item). The 'title' field names each specific plan \u2014 make it concise and distinct from the request title and from sibling plan titles. When a plan's scope is "fix", rootCause is required: a non-empty explanation of the underlying cause of the bug (not just the symptom). motivation is optional: the user's stated or asked-for reason for the request. `,
1083
1106
  submitPlanInputSchema,
1084
1107
  async (args) => {
1085
1108
  const parsed = submitPlanSchema.parse(args);
@@ -1147,6 +1170,9 @@ var acVerdictSchema = z3.object({
1147
1170
  rationale: z3.string().min(1).describe("One or two sentences on why the verdict holds. " + INLINE_CODE_HINT),
1148
1171
  evidence: z3.array(evidenceSchema).describe(
1149
1172
  "Diff hunks proving the verdict, copied verbatim from git --no-pager diff. Across ALL criteria the evidence must collectively cover every hunk in the diff \u2014 each changed hunk appears under at least one criterion. Cite the relevant hunk(s) for a met criterion; may be empty for not_met / unclear."
1173
+ ),
1174
+ requirement: z3.string().min(1).optional().describe(
1175
+ "The verbatim requirement text from the plan that this criterion appears under. Used for grouping criteria by requirement in the rendered report. Optional \u2014 omit for resolve runs (no plan) and legacy plans without grouping."
1150
1176
  )
1151
1177
  });
1152
1178
  var reportInputSchema = {
@@ -1171,24 +1197,47 @@ var reportInputSchema = {
1171
1197
  )
1172
1198
  };
1173
1199
  var reportSchema = z3.object(reportInputSchema);
1200
+ function groupByRequirement(criteria) {
1201
+ const groups = /* @__PURE__ */ new Map();
1202
+ for (const ac of criteria) {
1203
+ const key = ac.requirement ?? null;
1204
+ const existing = groups.get(key);
1205
+ if (existing) {
1206
+ existing.push(ac);
1207
+ } else {
1208
+ groups.set(key, [ac]);
1209
+ }
1210
+ }
1211
+ return groups;
1212
+ }
1213
+ function renderAcVerdict(lines2, ac) {
1214
+ lines2.push("");
1215
+ lines2.push(`#### ${STATUS_ICON[ac.status]} ${ac.criterion}`);
1216
+ lines2.push("");
1217
+ lines2.push(ac.rationale.trim());
1218
+ for (const ev of ac.evidence) {
1219
+ lines2.push("");
1220
+ lines2.push(ev.note ? `\`${ev.file}\` \u2014 ${ev.note}` : `\`${ev.file}\``);
1221
+ lines2.push("");
1222
+ lines2.push("```diff");
1223
+ lines2.push(ev.hunk.replace(/\n+$/, ""));
1224
+ lines2.push("```");
1225
+ }
1226
+ }
1174
1227
  function renderReport(report) {
1175
1228
  const lines2 = [];
1176
1229
  lines2.push(report.summary.trim());
1177
1230
  lines2.push("", "## Files changed", "", report.filesChanged.trim());
1178
1231
  if (report.acceptanceCriteria.length > 0) {
1179
1232
  lines2.push("", "## Acceptance criteria");
1180
- for (const ac of report.acceptanceCriteria) {
1181
- lines2.push("");
1182
- lines2.push(`### ${STATUS_ICON[ac.status]} ${ac.criterion}`);
1183
- lines2.push("");
1184
- lines2.push(ac.rationale.trim());
1185
- for (const ev of ac.evidence) {
1186
- lines2.push("");
1187
- lines2.push(ev.note ? `\`${ev.file}\` \u2014 ${ev.note}` : `\`${ev.file}\``);
1233
+ const groups = groupByRequirement(report.acceptanceCriteria);
1234
+ for (const [req, acs] of groups) {
1235
+ if (req !== null) {
1188
1236
  lines2.push("");
1189
- lines2.push("```diff");
1190
- lines2.push(ev.hunk.replace(/\n+$/, ""));
1191
- lines2.push("```");
1237
+ lines2.push(`### ${req}`);
1238
+ }
1239
+ for (const ac of acs) {
1240
+ renderAcVerdict(lines2, ac);
1192
1241
  }
1193
1242
  }
1194
1243
  }
@@ -1212,7 +1261,7 @@ function createReportTooling() {
1212
1261
  let submittedReport = null;
1213
1262
  const submitReport = tool3(
1214
1263
  SUBMIT_REPORT,
1215
- "Submit the final implementation report as structured data. Call this exactly once, at the end of the run. `acceptanceCriteria` must contain one entry per plan criterion, each with a met / not_met / unclear verdict and the diff hunk(s) that prove it. `summary`, `filesChanged`, `codeQuality`, and `caveats` are the four named markdown sections. `cicd` (optional) holds Verify-phase check results (one entry per command with `command`, `status` `passed`/`failed`, and `output` on failure); omit when no verification setup exists. Do NOT include a PR link \u2014 the runner appends it.",
1264
+ "Submit the final implementation report as structured data. Call this exactly once, at the end of the run. `acceptanceCriteria` must contain one entry per plan criterion (same count and order), each with a met / not_met / unclear verdict and the diff hunk(s) that prove it. Set `requirement` on each entry to the verbatim text of the `### <requirement>` heading the criterion appeared under in the plan \u2014 this groups criteria by requirement in the rendered report. Omit `requirement` for resolve runs (no plan) or when the plan has no requirement headings. `summary`, `filesChanged`, `codeQuality`, and `caveats` are the four named markdown sections. `cicd` (optional) holds Verify-phase check results (one entry per command with `command`, `status` `passed`/`failed`, and `output` on failure); omit when no verification setup exists. Do NOT include a PR link \u2014 the runner appends it.",
1216
1265
  reportInputSchema,
1217
1266
  async (args) => {
1218
1267
  submittedReport = reportSchema.parse(args);
@@ -1945,6 +1994,13 @@ async function gitDiffStat(dir) {
1945
1994
  const { stdout: stdout2 } = await git(["-C", dir, "--no-pager", "diff", "--stat"]);
1946
1995
  return stdout2;
1947
1996
  }
1997
+ async function captureFullDiff(ctx, dir) {
1998
+ const { mergeBranch } = ctx.repo;
1999
+ if (!mergeBranch) return "";
2000
+ await git(["-C", dir, "fetch", "--quiet", "origin", mergeBranch]);
2001
+ const { stdout: stdout2 } = await git(["-C", dir, "--no-pager", "diff", "FETCH_HEAD...HEAD"]);
2002
+ return stdout2;
2003
+ }
1948
2004
  var PreCommitError = class extends Error {
1949
2005
  constructor(log) {
1950
2006
  super("pre-commit checks failed");
@@ -2159,6 +2215,23 @@ var UI_EXTENSIONS = /* @__PURE__ */ new Set([
2159
2215
  ".sass",
2160
2216
  ".less"
2161
2217
  ]);
2218
+ async function buildPreview(ctx, dir, committedFileNames, config, abort) {
2219
+ if (!changedFilesTouchUi(committedFileNames)) {
2220
+ return { status: "skipped", reason: "No UI files were changed in this run." };
2221
+ }
2222
+ try {
2223
+ const { previewId, entrypoint } = await runPreviewPass(
2224
+ ctx,
2225
+ dir,
2226
+ committedFileNames,
2227
+ config,
2228
+ abort
2229
+ );
2230
+ return { status: "ready", previewId, entrypoint };
2231
+ } catch (err) {
2232
+ return { status: "failed", reason: err instanceof Error ? err.message : String(err) };
2233
+ }
2234
+ }
2162
2235
  function changedFilesTouchUi(stat) {
2163
2236
  for (const line of stat.split("\n")) {
2164
2237
  const trimmed = line.trim();
@@ -2686,18 +2759,26 @@ ${reply}`;
2686
2759
  rebase: !resumed
2687
2760
  });
2688
2761
  reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, documented, autoMerged });
2689
- const finalReport = report && conflictResolution ? { ...report, conflictResolution } : report;
2762
+ let fullDiff;
2763
+ if (report && outcome.kind !== "none") {
2764
+ try {
2765
+ fullDiff = (await captureFullDiff(ctx, dir)).trim() || void 0;
2766
+ } catch (err) {
2767
+ console.warn(` full-diff capture skipped: ${errorMessage2(err)}`);
2768
+ }
2769
+ }
2770
+ const finalReport = report ? {
2771
+ ...report,
2772
+ ...conflictResolution ? { conflictResolution } : {},
2773
+ ...fullDiff ? { fullDiff } : {}
2774
+ } : report;
2690
2775
  let preview;
2691
2776
  if (outcome.kind !== "none") {
2777
+ console.log(` \u2026checking UI preview for implement ${ctx.jobId}`);
2692
2778
  const committedFileNames = await gitCommittedFiles(dir);
2693
- if (changedFilesTouchUi(committedFileNames)) {
2694
- try {
2695
- console.log(` \u2026generating UI preview for implement ${ctx.jobId}`);
2696
- preview = await runPreviewPass(ctx, dir, committedFileNames, config, abort);
2697
- } catch (err) {
2698
- console.warn(` preview skipped: ${errorMessage2(err)}`);
2699
- }
2700
- }
2779
+ preview = await buildPreview(ctx, dir, committedFileNames, config, abort);
2780
+ if (preview.status !== "ready")
2781
+ console.log(` UI preview ${preview.status}: ${preview.reason}`);
2701
2782
  }
2702
2783
  return {
2703
2784
  text: reply,
@@ -2760,15 +2841,11 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
2760
2841
  const finalReport = report && conflictResolution ? { ...report, conflictResolution } : report;
2761
2842
  let preview;
2762
2843
  if (outcome.kind !== "none") {
2844
+ console.log(` \u2026checking UI preview for revise ${ctx.jobId}`);
2763
2845
  const committedFileNames = await gitCommittedFiles(dir);
2764
- if (changedFilesTouchUi(committedFileNames)) {
2765
- try {
2766
- console.log(` \u2026generating UI preview for revise ${ctx.jobId}`);
2767
- preview = await runPreviewPass(ctx, dir, committedFileNames, config, abort);
2768
- } catch (err) {
2769
- console.warn(` preview skipped: ${errorMessage2(err)}`);
2770
- }
2771
- }
2846
+ preview = await buildPreview(ctx, dir, committedFileNames, config, abort);
2847
+ if (preview.status !== "ready")
2848
+ console.log(` UI preview ${preview.status}: ${preview.reason}`);
2772
2849
  }
2773
2850
  return {
2774
2851
  text: reply,
package/dist/mcp-stdio.js CHANGED
@@ -57,6 +57,14 @@ var stepSchema = z.object({
57
57
  "Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
58
58
  )
59
59
  });
60
+ var requirementSchema = z.object({
61
+ requirement: z.string().min(1).describe(
62
+ "A human-readable statement of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. " + INLINE_CODE_HINT
63
+ ),
64
+ acceptanceCriteria: z.array(z.string().min(1)).min(1).describe(
65
+ "Concrete, deterministically-checkable conditions that prove this requirement is satisfied. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. " + INLINE_CODE_HINT
66
+ )
67
+ });
60
68
  var planInputSchema = {
61
69
  title: z.string().min(1).max(120).describe(
62
70
  "A concise, descriptive name for THIS plan. Must be distinct from the request title and from any sibling plans on the same request. Keep it under 120 characters."
@@ -70,13 +78,10 @@ var planInputSchema = {
70
78
  "Why the user is making this request \u2014 the underlying motivation or problem the change addresses. Fill this especially when the request content/context does NOT already state the why (ask the user in the Clarify phase); omit when there is no additional motivation to record. Useful for future understanding of the system. " + INLINE_CODE_HINT
71
79
  ),
72
80
  assumptions: z.array(z.string()).describe("Anything decided during planning, including unanswered defaults."),
73
- requirements: z.array(z.string().min(1)).min(1).describe(
74
- "Required, human-readable statements of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. At least 1 required. " + INLINE_CODE_HINT
81
+ requirements: z.array(requirementSchema).min(1).describe(
82
+ "Required, human-readable statements of what this change must accomplish and why, each carrying its own acceptanceCriteria. At least 1 requirement required; at least 2 acceptance criteria total across all requirements. " + INLINE_CODE_HINT
75
83
  ),
76
84
  steps: z.array(stepSchema).min(1).describe("Ordered list of changes. Each step says what and why, with file references."),
77
- acceptanceCriteria: z.array(z.string().min(1)).min(2).describe(
78
- "Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required. " + INLINE_CODE_HINT
79
- ),
80
85
  risks: z.array(z.string()).describe("Anything that could change the approach."),
81
86
  outOfScope: z.array(z.string()).describe("What is deliberately not being done.")
82
87
  };
@@ -91,7 +96,21 @@ function requireRootCauseForFix(schema) {
91
96
  }
92
97
  });
93
98
  }
94
- var planSchema = requireRootCauseForFix(z.object(planInputSchema));
99
+ function requireAtLeastTwoCriteria(schema) {
100
+ return schema.superRefine((plan, ctx) => {
101
+ const total = plan.requirements.reduce((sum, r) => sum + r.acceptanceCriteria.length, 0);
102
+ if (total < 2) {
103
+ ctx.addIssue({
104
+ code: z.ZodIssueCode.custom,
105
+ path: ["requirements"],
106
+ message: "At least 2 acceptance criteria total across all requirements are required."
107
+ });
108
+ }
109
+ });
110
+ }
111
+ var planSchema = requireAtLeastTwoCriteria(
112
+ requireRootCauseForFix(z.object(planInputSchema))
113
+ );
95
114
  function renderPlan(plan) {
96
115
  const lines = [];
97
116
  lines.push(`# ${plan.title}`);
@@ -118,8 +137,8 @@ function renderPlan(plan) {
118
137
  }
119
138
  lines.push("");
120
139
  lines.push("## Requirements");
121
- for (const requirement of plan.requirements) {
122
- lines.push(`- ${requirement}`);
140
+ for (const req of plan.requirements) {
141
+ lines.push(`- ${req.requirement}`);
123
142
  }
124
143
  lines.push("");
125
144
  lines.push("## Steps");
@@ -144,8 +163,11 @@ function renderPlan(plan) {
144
163
  }
145
164
  lines.push("");
146
165
  lines.push("## Acceptance criteria");
147
- for (const criterion of plan.acceptanceCriteria) {
148
- lines.push(`- [ ] ${criterion}`);
166
+ for (const req of plan.requirements) {
167
+ lines.push(`### ${req.requirement}`);
168
+ for (const criterion of req.acceptanceCriteria) {
169
+ lines.push(`- [ ] ${criterion}`);
170
+ }
149
171
  }
150
172
  if (plan.risks.length > 0) {
151
173
  lines.push("");
@@ -166,7 +188,7 @@ function renderPlan(plan) {
166
188
  return lines.join("\n");
167
189
  }
168
190
  var submitPlanInputSchema = {
169
- plans: z.array(requireRootCauseForFix(z.object(planInputSchema))).min(1).refine(
191
+ plans: z.array(requireAtLeastTwoCriteria(requireRootCauseForFix(z.object(planInputSchema)))).min(1).refine(
170
192
  (arr) => {
171
193
  const titles = arr.map((p) => p.title.trim()).filter((t) => t.length > 0);
172
194
  return new Set(titles).size === titles.length;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flumecode/runner",
3
- "version": "0.23.0",
3
+ "version": "0.23.1",
4
4
  "type": "module",
5
5
  "description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
6
6
  "bin": {
@@ -184,6 +184,7 @@ The report subagent calls `submit_report` with these fields:
184
184
  verbatim from the live `git --no-pager diff`, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them), and proves the verdict (`note`
185
185
  optionally explains it). Never include a hunk that isn't in the actual diff. Cite
186
186
  the supporting hunk(s) for a met criterion; may be empty for not_met / unclear.
187
+ - `requirement` — the verbatim text of the `### <requirement>` heading this criterion appeared under in the plan (from the `## Acceptance criteria` section). Set this for every criterion from a structured plan. Omit only for resolve runs (no plan) or legacy plans without requirement headings.
187
188
  - **`cicd`** (optional) — array of Verify-phase check results. Each entry: `command` (exact command run), `status` (`"passed"` / `"failed"`), `output` (short failing-output excerpt, on failure only). Omit when the repo has no verification setup. Rendered under `## CI/CD`. A failing check does not block the report.
188
189
 
189
190
  ## Always
@@ -72,18 +72,11 @@ Field-by-field guidance:
72
72
  - **`motivation`** — optional. The user's stated or asked-for reason for making this request — the underlying motivation or problem the change addresses. Fill this when the request content/context does NOT already state the why (ask during Phase 1 — Clarify if needed); omit when there is no additional motivation to record. Useful for future understanding of the system.
73
73
  - **`assumptions`** — anything you decided during investigation (including
74
74
  unanswered defaults from Phase 1).
75
- - **`requirements`** — **required; at least 1 item.** Plain-language statements of what this change must accomplish and why, written so a non-technical reader can follow them. Distinct from `acceptanceCriteria`: requirements explain intent and rationale; acceptance criteria are the machine-checkable proof. At least 1 item required.
76
- - **`steps`** — an ordered list. For each step provide:
77
- - **`title`** — a concise imperative phrase naming the step (e.g. "Add submit_plan schema to plan.ts").
78
- - **`description`** — an array of bullet points that help the reviewer understand the upcoming `pseudoCode` and decide whether the plan and design are correct. Each item is a distinct, self-contained point about what is changing and why — not a single paragraph, and not a line-by-line restatement of the pseudo code. Use concrete file references (`path/to/file.ts`) and name the functions/symbols involved. Apply inline-code formatting to all identifiers.
79
- - **`pseudoCode`** an array of `{ file, pseudoCode }` entries. Provide an entry for every file the step touches **except** documentation files (SKILL.md, README.md, wiki pages, etc.). `pseudoCode` is optional in the schema but expected for all non-documentation files. Each entry names the file path and contains pseudo code that precisely describes the changes to make in that file.
80
- - **`acceptanceCriteria`** — **required; at least 2 items.** Each criterion must
81
- be a concrete, deterministically-checkable condition that a third party can verify
82
- without knowing the author's intent. Write each as a trigger/precondition and the
83
- exact observable result: `run X → output Y`, `file Z contains W`, `calling f(a) returns b`.
84
- No vague adjectives (`robust`, `clean`, `properly`, `works correctly`). The set
85
- must be **collectively exhaustive** — every step's intended change is covered by
86
- at least one AC. Do **not** restate a step as a criterion.
75
+ - **`requirements`** — **required; at least 1 item.** An array of objects, each with:
76
+ - **`requirement`** — a plain-language statement of what this change must accomplish and why, written so a non-technical reader can follow it. Distinct from `acceptanceCriteria`: requirements explain intent and rationale; acceptance criteria are the machine-checkable proof.
77
+ - **`acceptanceCriteria`** — **required; at least 1 item per requirement.** Concrete, deterministically-checkable conditions that prove this specific requirement is satisfied. The total count across all requirements must be **at least 2**. Write each criterion as a trigger/precondition and the exact observable result: `run X → output Y`, `file Z contains W`, `calling f(a) returns b`. No vague adjectives. Every step's intended change must be covered by at least one AC.
78
+
79
+ The link between each requirement and its criteria is established at plan time. When the agent reports back, each `submit_report` verdict must carry the `requirement` field identifying which `### <requirement>` heading the criterion appeared under in the plan.
87
80
 
88
81
  **Good vs bad examples:**
89
82
  - ✅ `grep -rn "What changed" apps/runner/src/report.ts` produces no matches.
@@ -91,6 +84,11 @@ Field-by-field guidance:
91
84
  - ✅ `pnpm test` in the repo root exits 0 and report.test.ts output contains no failures.
92
85
  - ❌ Tests pass correctly. _(no trigger, no observable result)_
93
86
 
87
+ - **`steps`** — an ordered list. For each step provide:
88
+ - **`title`** — a concise imperative phrase naming the step (e.g. "Add submit_plan schema to plan.ts").
89
+ - **`description`** — an array of bullet points that help the reviewer understand the upcoming `pseudoCode` and decide whether the plan and design are correct. Each item is a distinct, self-contained point about what is changing and why — not a single paragraph, and not a line-by-line restatement of the pseudo code. Use concrete file references (`path/to/file.ts`) and name the functions/symbols involved. Apply inline-code formatting to all identifiers.
90
+ - **`pseudoCode`** — an array of `{ file, pseudoCode }` entries. Provide an entry for every file the step touches **except** documentation files (SKILL.md, README.md, wiki pages, etc.). `pseudoCode` is optional in the schema but expected for all non-documentation files. Each entry names the file path and contains pseudo code that precisely describes the changes to make in that file.
91
+
94
92
  - **`risks`** — anything that could change the approach or surface a problem.
95
93
  - **`outOfScope`** — what you are deliberately not doing.
96
94
 
@@ -41,7 +41,7 @@ actual code. Pick exactly one:
41
41
  - **Re-plan** — the request meaningfully changes scope or direction, enough that a
42
42
  fresh plan should be agreed before building. Call **`submit_plan`** with a `plans[]` array
43
43
  containing the revised structured fields (same per-plan shape as the request-to-plan skill:
44
- `scope`, `goal`, `assumptions`, `requirements` — at least 1 —, `steps`, `acceptanceCriteria` at least 2 —, `risks`,
44
+ `scope`, `goal`, `assumptions`, `requirements` — at least 1, each with its own `acceptanceCriteria` array; at least 2 criteria total across all requirements —, `steps`, `risks`,
45
45
  `outOfScope`). Include only one entry for a revise turn. The runner posts it as a revision
46
46
  the user can accept; make no code changes this turn.
47
47
  - **Implement** — the request is clear and reasonable. Make the change (via
@@ -82,8 +82,9 @@ user:
82
82
 
83
83
  - **Implemented:** call **`submit_report`** with the structured report, exactly as
84
84
  `implement-plan` does. Include one `acceptanceCriteria` entry per plan AC (with a
85
- met / not_met / unclear verdict and the diff hunk(s) that prove it), plus the four
86
- required markdown sections (`summary`, `filesChanged`, `codeQuality`, `caveats`).
85
+ met / not_met / unclear verdict, the diff hunk(s) that prove it, and a `requirement`
86
+ field set to the verbatim requirement heading the criterion appeared under in the plan),
87
+ plus the four required markdown sections (`summary`, `filesChanged`, `codeQuality`, `caveats`).
87
88
  Include `cicd` from the Verify results (one entry per check, same shape as
88
89
  `implement-plan`; omit when no verification setup).
89
90
  Base `filesChanged` and evidence on the actual `git --no-pager diff`, not on what