@flumecode/runner 0.23.0 → 0.23.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -230,7 +230,7 @@ function NewRequestScreen({ config, repo, onCreated, onBack }) {
|
|
|
230
230
|
const [title, setTitle] = useState4("");
|
|
231
231
|
const [body, setBody] = useState4("");
|
|
232
232
|
const [agentCursor, setAgentCursor] = useState4(0);
|
|
233
|
-
const [branch, setBranch] = useState4("");
|
|
233
|
+
const [branch, setBranch] = useState4(repo.defaultBranch ?? "");
|
|
234
234
|
const [error, setError] = useState4(null);
|
|
235
235
|
const [submitting, setSubmitting] = useState4(false);
|
|
236
236
|
useInput3((_input, key) => {
|
|
@@ -447,12 +447,12 @@ import { useState as useState6 } from "react";
|
|
|
447
447
|
import { Box as Box6, Text as Text6, useInput as useInput5 } from "ink";
|
|
448
448
|
import TextInput3 from "ink-text-input";
|
|
449
449
|
import { Fragment, jsx as jsx6, jsxs as jsxs6 } from "react/jsx-runtime";
|
|
450
|
-
function ThreadScreen({ config, requestId, onBack }) {
|
|
450
|
+
function ThreadScreen({ config, repo, requestId, onBack }) {
|
|
451
451
|
const data = usePoll(() => getRequest(config, requestId), 3e3);
|
|
452
452
|
const [cursor, setCursor] = useState6(0);
|
|
453
453
|
const [overlay, setOverlay] = useState6({ kind: "none" });
|
|
454
|
-
const [targetBranch, setTargetBranch] = useState6("main");
|
|
455
|
-
const [mergeBranch, setMergeBranch] = useState6("");
|
|
454
|
+
const [targetBranch, setTargetBranch] = useState6(repo.defaultBranch ?? "main");
|
|
455
|
+
const [mergeBranch, setMergeBranch] = useState6(repo.defaultBranch ?? "");
|
|
456
456
|
const [acceptStep, setAcceptStep] = useState6("target");
|
|
457
457
|
const [error, setError] = useState6(null);
|
|
458
458
|
const messages = data?.messages ?? [];
|
|
@@ -478,8 +478,8 @@ function ThreadScreen({ config, requestId, onBack }) {
|
|
|
478
478
|
const widgets = selectedMsg.widgets;
|
|
479
479
|
setOverlay({ kind: "widget", message: { ...selectedMsg, widgets, requestId } });
|
|
480
480
|
} else if (selectedMsg.type === "plan") {
|
|
481
|
-
setTargetBranch("main");
|
|
482
|
-
setMergeBranch("");
|
|
481
|
+
setTargetBranch(repo.defaultBranch ?? "main");
|
|
482
|
+
setMergeBranch(repo.defaultBranch ?? "");
|
|
483
483
|
setAcceptStep("target");
|
|
484
484
|
setOverlay({ kind: "accept", messageId: selectedMsg.id });
|
|
485
485
|
}
|
|
@@ -495,7 +495,7 @@ function ThreadScreen({ config, requestId, onBack }) {
|
|
|
495
495
|
config,
|
|
496
496
|
overlay.messageId,
|
|
497
497
|
targetBranch.trim() || "main",
|
|
498
|
-
mergeBranch.trim()
|
|
498
|
+
mergeBranch.trim()
|
|
499
499
|
);
|
|
500
500
|
setOverlay({ kind: "accepted" });
|
|
501
501
|
} catch (err) {
|
|
@@ -620,6 +620,7 @@ function renderScreen(screen, setScreen, config) {
|
|
|
620
620
|
ThreadScreen,
|
|
621
621
|
{
|
|
622
622
|
config,
|
|
623
|
+
repo: screen.repo,
|
|
623
624
|
requestId: screen.requestId,
|
|
624
625
|
onBack: () => setScreen({ name: "requests", repo: screen.repo })
|
|
625
626
|
}
|
|
@@ -957,6 +958,14 @@ var stepSchema = z2.object({
|
|
|
957
958
|
"Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
|
|
958
959
|
)
|
|
959
960
|
});
|
|
961
|
+
var requirementSchema = z2.object({
|
|
962
|
+
requirement: z2.string().min(1).describe(
|
|
963
|
+
"A human-readable statement of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. " + INLINE_CODE_HINT
|
|
964
|
+
),
|
|
965
|
+
acceptanceCriteria: z2.array(z2.string().min(1)).min(1).describe(
|
|
966
|
+
"Concrete, deterministically-checkable conditions that prove this requirement is satisfied. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. " + INLINE_CODE_HINT
|
|
967
|
+
)
|
|
968
|
+
});
|
|
960
969
|
var planInputSchema = {
|
|
961
970
|
title: z2.string().min(1).max(120).describe(
|
|
962
971
|
"A concise, descriptive name for THIS plan. Must be distinct from the request title and from any sibling plans on the same request. Keep it under 120 characters."
|
|
@@ -970,13 +979,10 @@ var planInputSchema = {
|
|
|
970
979
|
"Why the user is making this request \u2014 the underlying motivation or problem the change addresses. Fill this especially when the request content/context does NOT already state the why (ask the user in the Clarify phase); omit when there is no additional motivation to record. Useful for future understanding of the system. " + INLINE_CODE_HINT
|
|
971
980
|
),
|
|
972
981
|
assumptions: z2.array(z2.string()).describe("Anything decided during planning, including unanswered defaults."),
|
|
973
|
-
requirements: z2.array(
|
|
974
|
-
"Required, human-readable statements of what this change must accomplish and why,
|
|
982
|
+
requirements: z2.array(requirementSchema).min(1).describe(
|
|
983
|
+
"Required, human-readable statements of what this change must accomplish and why, each carrying its own acceptanceCriteria. At least 1 requirement required; at least 2 acceptance criteria total across all requirements. " + INLINE_CODE_HINT
|
|
975
984
|
),
|
|
976
985
|
steps: z2.array(stepSchema).min(1).describe("Ordered list of changes. Each step says what and why, with file references."),
|
|
977
|
-
acceptanceCriteria: z2.array(z2.string().min(1)).min(2).describe(
|
|
978
|
-
"Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required. " + INLINE_CODE_HINT
|
|
979
|
-
),
|
|
980
986
|
risks: z2.array(z2.string()).describe("Anything that could change the approach."),
|
|
981
987
|
outOfScope: z2.array(z2.string()).describe("What is deliberately not being done.")
|
|
982
988
|
};
|
|
@@ -991,7 +997,21 @@ function requireRootCauseForFix(schema) {
|
|
|
991
997
|
}
|
|
992
998
|
});
|
|
993
999
|
}
|
|
994
|
-
|
|
1000
|
+
function requireAtLeastTwoCriteria(schema) {
|
|
1001
|
+
return schema.superRefine((plan, ctx) => {
|
|
1002
|
+
const total = plan.requirements.reduce((sum, r) => sum + r.acceptanceCriteria.length, 0);
|
|
1003
|
+
if (total < 2) {
|
|
1004
|
+
ctx.addIssue({
|
|
1005
|
+
code: z2.ZodIssueCode.custom,
|
|
1006
|
+
path: ["requirements"],
|
|
1007
|
+
message: "At least 2 acceptance criteria total across all requirements are required."
|
|
1008
|
+
});
|
|
1009
|
+
}
|
|
1010
|
+
});
|
|
1011
|
+
}
|
|
1012
|
+
var planSchema = requireAtLeastTwoCriteria(
|
|
1013
|
+
requireRootCauseForFix(z2.object(planInputSchema))
|
|
1014
|
+
);
|
|
995
1015
|
function renderPlan(plan) {
|
|
996
1016
|
const lines2 = [];
|
|
997
1017
|
lines2.push(`# ${plan.title}`);
|
|
@@ -1018,8 +1038,8 @@ function renderPlan(plan) {
|
|
|
1018
1038
|
}
|
|
1019
1039
|
lines2.push("");
|
|
1020
1040
|
lines2.push("## Requirements");
|
|
1021
|
-
for (const
|
|
1022
|
-
lines2.push(`- ${requirement}`);
|
|
1041
|
+
for (const req of plan.requirements) {
|
|
1042
|
+
lines2.push(`- ${req.requirement}`);
|
|
1023
1043
|
}
|
|
1024
1044
|
lines2.push("");
|
|
1025
1045
|
lines2.push("## Steps");
|
|
@@ -1044,8 +1064,11 @@ function renderPlan(plan) {
|
|
|
1044
1064
|
}
|
|
1045
1065
|
lines2.push("");
|
|
1046
1066
|
lines2.push("## Acceptance criteria");
|
|
1047
|
-
for (const
|
|
1048
|
-
lines2.push(
|
|
1067
|
+
for (const req of plan.requirements) {
|
|
1068
|
+
lines2.push(`### ${req.requirement}`);
|
|
1069
|
+
for (const criterion of req.acceptanceCriteria) {
|
|
1070
|
+
lines2.push(`- [ ] ${criterion}`);
|
|
1071
|
+
}
|
|
1049
1072
|
}
|
|
1050
1073
|
if (plan.risks.length > 0) {
|
|
1051
1074
|
lines2.push("");
|
|
@@ -1066,7 +1089,7 @@ function renderPlan(plan) {
|
|
|
1066
1089
|
return lines2.join("\n");
|
|
1067
1090
|
}
|
|
1068
1091
|
var submitPlanInputSchema = {
|
|
1069
|
-
plans: z2.array(requireRootCauseForFix(z2.object(planInputSchema))).min(1).refine(
|
|
1092
|
+
plans: z2.array(requireAtLeastTwoCriteria(requireRootCauseForFix(z2.object(planInputSchema)))).min(1).refine(
|
|
1070
1093
|
(arr) => {
|
|
1071
1094
|
const titles = arr.map((p) => p.title.trim()).filter((t) => t.length > 0);
|
|
1072
1095
|
return new Set(titles).size === titles.length;
|
|
@@ -1079,7 +1102,7 @@ function createPlanTooling() {
|
|
|
1079
1102
|
let renderedPlans = null;
|
|
1080
1103
|
const submitPlan = tool2(
|
|
1081
1104
|
SUBMIT_PLAN,
|
|
1082
|
-
`Submit ALL your plans in a single call \u2014 one entry per plan; each becomes its own independently-acceptable Accept-as-plan draft. Do NOT call submit_plan more than once.
|
|
1105
|
+
`Submit ALL your plans in a single call \u2014 one entry per plan; each becomes its own independently-acceptable Accept-as-plan draft. Do NOT call submit_plan more than once. requirements is required in each plan: an array of objects, each with a requirement (plain-language intent) and an acceptanceCriteria array (machine-checkable proof for that requirement). At least 1 requirement required; at least 2 acceptance criteria total across all requirements. Each requirement's acceptanceCriteria must be non-empty (at least 1 item). The 'title' field names each specific plan \u2014 make it concise and distinct from the request title and from sibling plan titles. When a plan's scope is "fix", rootCause is required: a non-empty explanation of the underlying cause of the bug (not just the symptom). motivation is optional: the user's stated or asked-for reason for the request. `,
|
|
1083
1106
|
submitPlanInputSchema,
|
|
1084
1107
|
async (args) => {
|
|
1085
1108
|
const parsed = submitPlanSchema.parse(args);
|
|
@@ -1147,6 +1170,9 @@ var acVerdictSchema = z3.object({
|
|
|
1147
1170
|
rationale: z3.string().min(1).describe("One or two sentences on why the verdict holds. " + INLINE_CODE_HINT),
|
|
1148
1171
|
evidence: z3.array(evidenceSchema).describe(
|
|
1149
1172
|
"Diff hunks proving the verdict, copied verbatim from git --no-pager diff. Across ALL criteria the evidence must collectively cover every hunk in the diff \u2014 each changed hunk appears under at least one criterion. Cite the relevant hunk(s) for a met criterion; may be empty for not_met / unclear."
|
|
1173
|
+
),
|
|
1174
|
+
requirement: z3.string().min(1).optional().describe(
|
|
1175
|
+
"The verbatim requirement text from the plan that this criterion appears under. Used for grouping criteria by requirement in the rendered report. Optional \u2014 omit for resolve runs (no plan) and legacy plans without grouping."
|
|
1150
1176
|
)
|
|
1151
1177
|
});
|
|
1152
1178
|
var reportInputSchema = {
|
|
@@ -1171,24 +1197,47 @@ var reportInputSchema = {
|
|
|
1171
1197
|
)
|
|
1172
1198
|
};
|
|
1173
1199
|
var reportSchema = z3.object(reportInputSchema);
|
|
1200
|
+
function groupByRequirement(criteria) {
|
|
1201
|
+
const groups = /* @__PURE__ */ new Map();
|
|
1202
|
+
for (const ac of criteria) {
|
|
1203
|
+
const key = ac.requirement ?? null;
|
|
1204
|
+
const existing = groups.get(key);
|
|
1205
|
+
if (existing) {
|
|
1206
|
+
existing.push(ac);
|
|
1207
|
+
} else {
|
|
1208
|
+
groups.set(key, [ac]);
|
|
1209
|
+
}
|
|
1210
|
+
}
|
|
1211
|
+
return groups;
|
|
1212
|
+
}
|
|
1213
|
+
function renderAcVerdict(lines2, ac) {
|
|
1214
|
+
lines2.push("");
|
|
1215
|
+
lines2.push(`#### ${STATUS_ICON[ac.status]} ${ac.criterion}`);
|
|
1216
|
+
lines2.push("");
|
|
1217
|
+
lines2.push(ac.rationale.trim());
|
|
1218
|
+
for (const ev of ac.evidence) {
|
|
1219
|
+
lines2.push("");
|
|
1220
|
+
lines2.push(ev.note ? `\`${ev.file}\` \u2014 ${ev.note}` : `\`${ev.file}\``);
|
|
1221
|
+
lines2.push("");
|
|
1222
|
+
lines2.push("```diff");
|
|
1223
|
+
lines2.push(ev.hunk.replace(/\n+$/, ""));
|
|
1224
|
+
lines2.push("```");
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1174
1227
|
function renderReport(report) {
|
|
1175
1228
|
const lines2 = [];
|
|
1176
1229
|
lines2.push(report.summary.trim());
|
|
1177
1230
|
lines2.push("", "## Files changed", "", report.filesChanged.trim());
|
|
1178
1231
|
if (report.acceptanceCriteria.length > 0) {
|
|
1179
1232
|
lines2.push("", "## Acceptance criteria");
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
lines2.push("");
|
|
1184
|
-
lines2.push(ac.rationale.trim());
|
|
1185
|
-
for (const ev of ac.evidence) {
|
|
1186
|
-
lines2.push("");
|
|
1187
|
-
lines2.push(ev.note ? `\`${ev.file}\` \u2014 ${ev.note}` : `\`${ev.file}\``);
|
|
1233
|
+
const groups = groupByRequirement(report.acceptanceCriteria);
|
|
1234
|
+
for (const [req, acs] of groups) {
|
|
1235
|
+
if (req !== null) {
|
|
1188
1236
|
lines2.push("");
|
|
1189
|
-
lines2.push(
|
|
1190
|
-
|
|
1191
|
-
|
|
1237
|
+
lines2.push(`### ${req}`);
|
|
1238
|
+
}
|
|
1239
|
+
for (const ac of acs) {
|
|
1240
|
+
renderAcVerdict(lines2, ac);
|
|
1192
1241
|
}
|
|
1193
1242
|
}
|
|
1194
1243
|
}
|
|
@@ -1212,7 +1261,7 @@ function createReportTooling() {
|
|
|
1212
1261
|
let submittedReport = null;
|
|
1213
1262
|
const submitReport = tool3(
|
|
1214
1263
|
SUBMIT_REPORT,
|
|
1215
|
-
"Submit the final implementation report as structured data. Call this exactly once, at the end of the run. `acceptanceCriteria` must contain one entry per plan criterion, each with a met / not_met / unclear verdict and the diff hunk(s) that prove it. `summary`, `filesChanged`, `codeQuality`, and `caveats` are the four named markdown sections. `cicd` (optional) holds Verify-phase check results (one entry per command with `command`, `status` `passed`/`failed`, and `output` on failure); omit when no verification setup exists. Do NOT include a PR link \u2014 the runner appends it.",
|
|
1264
|
+
"Submit the final implementation report as structured data. Call this exactly once, at the end of the run. `acceptanceCriteria` must contain one entry per plan criterion (same count and order), each with a met / not_met / unclear verdict and the diff hunk(s) that prove it. Set `requirement` on each entry to the verbatim text of the `### <requirement>` heading the criterion appeared under in the plan \u2014 this groups criteria by requirement in the rendered report. Omit `requirement` for resolve runs (no plan) or when the plan has no requirement headings. `summary`, `filesChanged`, `codeQuality`, and `caveats` are the four named markdown sections. `cicd` (optional) holds Verify-phase check results (one entry per command with `command`, `status` `passed`/`failed`, and `output` on failure); omit when no verification setup exists. Do NOT include a PR link \u2014 the runner appends it.",
|
|
1216
1265
|
reportInputSchema,
|
|
1217
1266
|
async (args) => {
|
|
1218
1267
|
submittedReport = reportSchema.parse(args);
|
|
@@ -1945,6 +1994,13 @@ async function gitDiffStat(dir) {
|
|
|
1945
1994
|
const { stdout: stdout2 } = await git(["-C", dir, "--no-pager", "diff", "--stat"]);
|
|
1946
1995
|
return stdout2;
|
|
1947
1996
|
}
|
|
1997
|
+
async function captureFullDiff(ctx, dir) {
|
|
1998
|
+
const { mergeBranch } = ctx.repo;
|
|
1999
|
+
if (!mergeBranch) return "";
|
|
2000
|
+
await git(["-C", dir, "fetch", "--quiet", "origin", mergeBranch]);
|
|
2001
|
+
const { stdout: stdout2 } = await git(["-C", dir, "--no-pager", "diff", "FETCH_HEAD...HEAD"]);
|
|
2002
|
+
return stdout2;
|
|
2003
|
+
}
|
|
1948
2004
|
var PreCommitError = class extends Error {
|
|
1949
2005
|
constructor(log) {
|
|
1950
2006
|
super("pre-commit checks failed");
|
|
@@ -2159,6 +2215,23 @@ var UI_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
|
2159
2215
|
".sass",
|
|
2160
2216
|
".less"
|
|
2161
2217
|
]);
|
|
2218
|
+
async function buildPreview(ctx, dir, committedFileNames, config, abort) {
|
|
2219
|
+
if (!changedFilesTouchUi(committedFileNames)) {
|
|
2220
|
+
return { status: "skipped", reason: "No UI files were changed in this run." };
|
|
2221
|
+
}
|
|
2222
|
+
try {
|
|
2223
|
+
const { previewId, entrypoint } = await runPreviewPass(
|
|
2224
|
+
ctx,
|
|
2225
|
+
dir,
|
|
2226
|
+
committedFileNames,
|
|
2227
|
+
config,
|
|
2228
|
+
abort
|
|
2229
|
+
);
|
|
2230
|
+
return { status: "ready", previewId, entrypoint };
|
|
2231
|
+
} catch (err) {
|
|
2232
|
+
return { status: "failed", reason: err instanceof Error ? err.message : String(err) };
|
|
2233
|
+
}
|
|
2234
|
+
}
|
|
2162
2235
|
function changedFilesTouchUi(stat) {
|
|
2163
2236
|
for (const line of stat.split("\n")) {
|
|
2164
2237
|
const trimmed = line.trim();
|
|
@@ -2686,18 +2759,26 @@ ${reply}`;
|
|
|
2686
2759
|
rebase: !resumed
|
|
2687
2760
|
});
|
|
2688
2761
|
reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, documented, autoMerged });
|
|
2689
|
-
|
|
2762
|
+
let fullDiff;
|
|
2763
|
+
if (report && outcome.kind !== "none") {
|
|
2764
|
+
try {
|
|
2765
|
+
fullDiff = (await captureFullDiff(ctx, dir)).trim() || void 0;
|
|
2766
|
+
} catch (err) {
|
|
2767
|
+
console.warn(` full-diff capture skipped: ${errorMessage2(err)}`);
|
|
2768
|
+
}
|
|
2769
|
+
}
|
|
2770
|
+
const finalReport = report ? {
|
|
2771
|
+
...report,
|
|
2772
|
+
...conflictResolution ? { conflictResolution } : {},
|
|
2773
|
+
...fullDiff ? { fullDiff } : {}
|
|
2774
|
+
} : report;
|
|
2690
2775
|
let preview;
|
|
2691
2776
|
if (outcome.kind !== "none") {
|
|
2777
|
+
console.log(` \u2026checking UI preview for implement ${ctx.jobId}`);
|
|
2692
2778
|
const committedFileNames = await gitCommittedFiles(dir);
|
|
2693
|
-
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
preview = await runPreviewPass(ctx, dir, committedFileNames, config, abort);
|
|
2697
|
-
} catch (err) {
|
|
2698
|
-
console.warn(` preview skipped: ${errorMessage2(err)}`);
|
|
2699
|
-
}
|
|
2700
|
-
}
|
|
2779
|
+
preview = await buildPreview(ctx, dir, committedFileNames, config, abort);
|
|
2780
|
+
if (preview.status !== "ready")
|
|
2781
|
+
console.log(` UI preview ${preview.status}: ${preview.reason}`);
|
|
2701
2782
|
}
|
|
2702
2783
|
return {
|
|
2703
2784
|
text: reply,
|
|
@@ -2760,15 +2841,11 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
|
|
|
2760
2841
|
const finalReport = report && conflictResolution ? { ...report, conflictResolution } : report;
|
|
2761
2842
|
let preview;
|
|
2762
2843
|
if (outcome.kind !== "none") {
|
|
2844
|
+
console.log(` \u2026checking UI preview for revise ${ctx.jobId}`);
|
|
2763
2845
|
const committedFileNames = await gitCommittedFiles(dir);
|
|
2764
|
-
|
|
2765
|
-
|
|
2766
|
-
|
|
2767
|
-
preview = await runPreviewPass(ctx, dir, committedFileNames, config, abort);
|
|
2768
|
-
} catch (err) {
|
|
2769
|
-
console.warn(` preview skipped: ${errorMessage2(err)}`);
|
|
2770
|
-
}
|
|
2771
|
-
}
|
|
2846
|
+
preview = await buildPreview(ctx, dir, committedFileNames, config, abort);
|
|
2847
|
+
if (preview.status !== "ready")
|
|
2848
|
+
console.log(` UI preview ${preview.status}: ${preview.reason}`);
|
|
2772
2849
|
}
|
|
2773
2850
|
return {
|
|
2774
2851
|
text: reply,
|
package/dist/mcp-stdio.js
CHANGED
|
@@ -57,6 +57,14 @@ var stepSchema = z.object({
|
|
|
57
57
|
"Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
|
|
58
58
|
)
|
|
59
59
|
});
|
|
60
|
+
var requirementSchema = z.object({
|
|
61
|
+
requirement: z.string().min(1).describe(
|
|
62
|
+
"A human-readable statement of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. " + INLINE_CODE_HINT
|
|
63
|
+
),
|
|
64
|
+
acceptanceCriteria: z.array(z.string().min(1)).min(1).describe(
|
|
65
|
+
"Concrete, deterministically-checkable conditions that prove this requirement is satisfied. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. " + INLINE_CODE_HINT
|
|
66
|
+
)
|
|
67
|
+
});
|
|
60
68
|
var planInputSchema = {
|
|
61
69
|
title: z.string().min(1).max(120).describe(
|
|
62
70
|
"A concise, descriptive name for THIS plan. Must be distinct from the request title and from any sibling plans on the same request. Keep it under 120 characters."
|
|
@@ -70,13 +78,10 @@ var planInputSchema = {
|
|
|
70
78
|
"Why the user is making this request \u2014 the underlying motivation or problem the change addresses. Fill this especially when the request content/context does NOT already state the why (ask the user in the Clarify phase); omit when there is no additional motivation to record. Useful for future understanding of the system. " + INLINE_CODE_HINT
|
|
71
79
|
),
|
|
72
80
|
assumptions: z.array(z.string()).describe("Anything decided during planning, including unanswered defaults."),
|
|
73
|
-
requirements: z.array(
|
|
74
|
-
"Required, human-readable statements of what this change must accomplish and why,
|
|
81
|
+
requirements: z.array(requirementSchema).min(1).describe(
|
|
82
|
+
"Required, human-readable statements of what this change must accomplish and why, each carrying its own acceptanceCriteria. At least 1 requirement required; at least 2 acceptance criteria total across all requirements. " + INLINE_CODE_HINT
|
|
75
83
|
),
|
|
76
84
|
steps: z.array(stepSchema).min(1).describe("Ordered list of changes. Each step says what and why, with file references."),
|
|
77
|
-
acceptanceCriteria: z.array(z.string().min(1)).min(2).describe(
|
|
78
|
-
"Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required. " + INLINE_CODE_HINT
|
|
79
|
-
),
|
|
80
85
|
risks: z.array(z.string()).describe("Anything that could change the approach."),
|
|
81
86
|
outOfScope: z.array(z.string()).describe("What is deliberately not being done.")
|
|
82
87
|
};
|
|
@@ -91,7 +96,21 @@ function requireRootCauseForFix(schema) {
|
|
|
91
96
|
}
|
|
92
97
|
});
|
|
93
98
|
}
|
|
94
|
-
|
|
99
|
+
function requireAtLeastTwoCriteria(schema) {
|
|
100
|
+
return schema.superRefine((plan, ctx) => {
|
|
101
|
+
const total = plan.requirements.reduce((sum, r) => sum + r.acceptanceCriteria.length, 0);
|
|
102
|
+
if (total < 2) {
|
|
103
|
+
ctx.addIssue({
|
|
104
|
+
code: z.ZodIssueCode.custom,
|
|
105
|
+
path: ["requirements"],
|
|
106
|
+
message: "At least 2 acceptance criteria total across all requirements are required."
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
var planSchema = requireAtLeastTwoCriteria(
|
|
112
|
+
requireRootCauseForFix(z.object(planInputSchema))
|
|
113
|
+
);
|
|
95
114
|
function renderPlan(plan) {
|
|
96
115
|
const lines = [];
|
|
97
116
|
lines.push(`# ${plan.title}`);
|
|
@@ -118,8 +137,8 @@ function renderPlan(plan) {
|
|
|
118
137
|
}
|
|
119
138
|
lines.push("");
|
|
120
139
|
lines.push("## Requirements");
|
|
121
|
-
for (const
|
|
122
|
-
lines.push(`- ${requirement}`);
|
|
140
|
+
for (const req of plan.requirements) {
|
|
141
|
+
lines.push(`- ${req.requirement}`);
|
|
123
142
|
}
|
|
124
143
|
lines.push("");
|
|
125
144
|
lines.push("## Steps");
|
|
@@ -144,8 +163,11 @@ function renderPlan(plan) {
|
|
|
144
163
|
}
|
|
145
164
|
lines.push("");
|
|
146
165
|
lines.push("## Acceptance criteria");
|
|
147
|
-
for (const
|
|
148
|
-
lines.push(
|
|
166
|
+
for (const req of plan.requirements) {
|
|
167
|
+
lines.push(`### ${req.requirement}`);
|
|
168
|
+
for (const criterion of req.acceptanceCriteria) {
|
|
169
|
+
lines.push(`- [ ] ${criterion}`);
|
|
170
|
+
}
|
|
149
171
|
}
|
|
150
172
|
if (plan.risks.length > 0) {
|
|
151
173
|
lines.push("");
|
|
@@ -166,7 +188,7 @@ function renderPlan(plan) {
|
|
|
166
188
|
return lines.join("\n");
|
|
167
189
|
}
|
|
168
190
|
var submitPlanInputSchema = {
|
|
169
|
-
plans: z.array(requireRootCauseForFix(z.object(planInputSchema))).min(1).refine(
|
|
191
|
+
plans: z.array(requireAtLeastTwoCriteria(requireRootCauseForFix(z.object(planInputSchema)))).min(1).refine(
|
|
170
192
|
(arr) => {
|
|
171
193
|
const titles = arr.map((p) => p.title.trim()).filter((t) => t.length > 0);
|
|
172
194
|
return new Set(titles).size === titles.length;
|
package/package.json
CHANGED
|
@@ -184,6 +184,7 @@ The report subagent calls `submit_report` with these fields:
|
|
|
184
184
|
verbatim from the live `git --no-pager diff`, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them), and proves the verdict (`note`
|
|
185
185
|
optionally explains it). Never include a hunk that isn't in the actual diff. Cite
|
|
186
186
|
the supporting hunk(s) for a met criterion; may be empty for not_met / unclear.
|
|
187
|
+
- `requirement` — the verbatim text of the `### <requirement>` heading this criterion appeared under in the plan (from the `## Acceptance criteria` section). Set this for every criterion from a structured plan. Omit only for resolve runs (no plan) or legacy plans without requirement headings.
|
|
187
188
|
- **`cicd`** (optional) — array of Verify-phase check results. Each entry: `command` (exact command run), `status` (`"passed"` / `"failed"`), `output` (short failing-output excerpt, on failure only). Omit when the repo has no verification setup. Rendered under `## CI/CD`. A failing check does not block the report.
|
|
188
189
|
|
|
189
190
|
## Always
|
|
@@ -72,18 +72,11 @@ Field-by-field guidance:
|
|
|
72
72
|
- **`motivation`** — optional. The user's stated or asked-for reason for making this request — the underlying motivation or problem the change addresses. Fill this when the request content/context does NOT already state the why (ask during Phase 1 — Clarify if needed); omit when there is no additional motivation to record. Useful for future understanding of the system.
|
|
73
73
|
- **`assumptions`** — anything you decided during investigation (including
|
|
74
74
|
unanswered defaults from Phase 1).
|
|
75
|
-
- **`requirements`** — **required; at least 1 item.**
|
|
76
|
-
- **`
|
|
77
|
-
- **`
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
- **`acceptanceCriteria`** — **required; at least 2 items.** Each criterion must
|
|
81
|
-
be a concrete, deterministically-checkable condition that a third party can verify
|
|
82
|
-
without knowing the author's intent. Write each as a trigger/precondition and the
|
|
83
|
-
exact observable result: `run X → output Y`, `file Z contains W`, `calling f(a) returns b`.
|
|
84
|
-
No vague adjectives (`robust`, `clean`, `properly`, `works correctly`). The set
|
|
85
|
-
must be **collectively exhaustive** — every step's intended change is covered by
|
|
86
|
-
at least one AC. Do **not** restate a step as a criterion.
|
|
75
|
+
- **`requirements`** — **required; at least 1 item.** An array of objects, each with:
|
|
76
|
+
- **`requirement`** — a plain-language statement of what this change must accomplish and why, written so a non-technical reader can follow it. Distinct from `acceptanceCriteria`: requirements explain intent and rationale; acceptance criteria are the machine-checkable proof.
|
|
77
|
+
- **`acceptanceCriteria`** — **required; at least 1 item per requirement.** Concrete, deterministically-checkable conditions that prove this specific requirement is satisfied. The total count across all requirements must be **at least 2**. Write each criterion as a trigger/precondition and the exact observable result: `run X → output Y`, `file Z contains W`, `calling f(a) returns b`. No vague adjectives. Every step's intended change must be covered by at least one AC.
|
|
78
|
+
|
|
79
|
+
The link between each requirement and its criteria is established at plan time. When the agent reports back, each `submit_report` verdict must carry the `requirement` field identifying which `### <requirement>` heading the criterion appeared under in the plan.
|
|
87
80
|
|
|
88
81
|
**Good vs bad examples:**
|
|
89
82
|
- ✅ `grep -rn "What changed" apps/runner/src/report.ts` produces no matches.
|
|
@@ -91,6 +84,11 @@ Field-by-field guidance:
|
|
|
91
84
|
- ✅ `pnpm test` in the repo root exits 0 and report.test.ts output contains no failures.
|
|
92
85
|
- ❌ Tests pass correctly. _(no trigger, no observable result)_
|
|
93
86
|
|
|
87
|
+
- **`steps`** — an ordered list. For each step provide:
|
|
88
|
+
- **`title`** — a concise imperative phrase naming the step (e.g. "Add submit_plan schema to plan.ts").
|
|
89
|
+
- **`description`** — an array of bullet points that help the reviewer understand the upcoming `pseudoCode` and decide whether the plan and design are correct. Each item is a distinct, self-contained point about what is changing and why — not a single paragraph, and not a line-by-line restatement of the pseudo code. Use concrete file references (`path/to/file.ts`) and name the functions/symbols involved. Apply inline-code formatting to all identifiers.
|
|
90
|
+
- **`pseudoCode`** — an array of `{ file, pseudoCode }` entries. Provide an entry for every file the step touches **except** documentation files (SKILL.md, README.md, wiki pages, etc.). `pseudoCode` is optional in the schema but expected for all non-documentation files. Each entry names the file path and contains pseudo code that precisely describes the changes to make in that file.
|
|
91
|
+
|
|
94
92
|
- **`risks`** — anything that could change the approach or surface a problem.
|
|
95
93
|
- **`outOfScope`** — what you are deliberately not doing.
|
|
96
94
|
|
|
@@ -41,7 +41,7 @@ actual code. Pick exactly one:
|
|
|
41
41
|
- **Re-plan** — the request meaningfully changes scope or direction, enough that a
|
|
42
42
|
fresh plan should be agreed before building. Call **`submit_plan`** with a `plans[]` array
|
|
43
43
|
containing the revised structured fields (same per-plan shape as the request-to-plan skill:
|
|
44
|
-
`scope`, `goal`, `assumptions`, `requirements` — at least 1
|
|
44
|
+
`scope`, `goal`, `assumptions`, `requirements` — at least 1, each with its own `acceptanceCriteria` array; at least 2 criteria total across all requirements —, `steps`, `risks`,
|
|
45
45
|
`outOfScope`). Include only one entry for a revise turn. The runner posts it as a revision
|
|
46
46
|
the user can accept; make no code changes this turn.
|
|
47
47
|
- **Implement** — the request is clear and reasonable. Make the change (via
|
|
@@ -82,8 +82,9 @@ user:
|
|
|
82
82
|
|
|
83
83
|
- **Implemented:** call **`submit_report`** with the structured report, exactly as
|
|
84
84
|
`implement-plan` does. Include one `acceptanceCriteria` entry per plan AC (with a
|
|
85
|
-
met / not_met / unclear verdict
|
|
86
|
-
|
|
85
|
+
met / not_met / unclear verdict, the diff hunk(s) that prove it, and a `requirement`
|
|
86
|
+
field set to the verbatim requirement heading the criterion appeared under in the plan),
|
|
87
|
+
plus the four required markdown sections (`summary`, `filesChanged`, `codeQuality`, `caveats`).
|
|
87
88
|
Include `cicd` from the Verify results (one entry per check, same shape as
|
|
88
89
|
`implement-plan`; omit when no verification setup).
|
|
89
90
|
Base `filesChanged` and evidence on the actual `git --no-pager diff`, not on what
|