@flumecode/runner 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -854,6 +854,27 @@ function errorMessage(err) {
854
854
  return err instanceof Error ? err.message : String(err);
855
855
  }
856
856
 
857
+ // src/models.ts
858
+ var MODEL_ROSTER = {
859
+ anthropic: { think: "opus", execute: "sonnet" },
860
+ openai: { think: "gpt-5.5", execute: "gpt-5.4-mini" }
861
+ // TODO: confirm vs `codex`
862
+ };
863
+ function rosterFor(provider) {
864
+ return MODEL_ROSTER[provider ?? "anthropic"] ?? MODEL_ROSTER.anthropic;
865
+ }
866
+ function renderRosterBlock(provider) {
867
+ const r = rosterFor(provider);
868
+ return [
869
+ "# Model tiers",
870
+ "When a skill tells you to run a subagent on a named model tier, pass the matching",
871
+ "concrete model id as the Task `model` argument. Use the id exactly; never pass the",
872
+ "tier name itself.",
873
+ `- \`think\` -> \`${r.think}\` (planning, review, high-level reasoning)`,
874
+ `- \`execute\` -> \`${r.execute}\` (writing code from the plan; fast & cheap)`
875
+ ].join("\n");
876
+ }
877
+
857
878
  // src/rules.ts
858
879
  import { readFileSync as readFileSync4 } from "node:fs";
859
880
  import { join as join3 } from "node:path";
@@ -906,6 +927,7 @@ function buildPrompt(ctx) {
906
927
  LANGUAGE_DIRECTIVE
907
928
  ];
908
929
  if (ctx.permissionMode !== "plan") {
930
+ lines2.push("", renderRosterBlock(ctx.provider));
909
931
  lines2.push(
910
932
  "",
911
933
  "These coding guidelines apply to all code produced in this run:",
@@ -937,6 +959,8 @@ function buildRevisePrompt(ctx) {
937
959
  widgets,
938
960
  LANGUAGE_DIRECTIVE,
939
961
  "",
962
+ renderRosterBlock(ctx.provider),
963
+ "",
940
964
  "These coding guidelines apply to all code produced in this run:",
941
965
  "",
942
966
  loadRule("coding-guideline"),
@@ -1041,56 +1065,35 @@ function buildRepairPrompt(ctx, hookLog) {
1041
1065
  ];
1042
1066
  return lines2.join("\n");
1043
1067
  }
1044
- function buildReleasePrompt(ctx, baseChecks) {
1045
- const task = `Use the \`flumecode:create-release\` skill to handle this turn. You are driving a release: first analyse commits since the last tag, propose version bumps, and ask the user to confirm via widgets (Phase 1); once the user's widget answers appear in the thread, apply the bumps to package.json files and update CHANGELOG.md (Phase 2). Do NOT commit or push \u2014 the runner handles that and opens the bump PR.`;
1068
+ function buildReleasePrompt(ctx) {
1069
+ const task = `Use the \`flumecode:create-release\` skill to handle this turn. You are driving a release: first analyse commits since the last tag, propose version bumps, and ask the user to confirm via widgets (Phase 1); once the user's widget answers appear in the thread, emit the final report with the confirmed versions (Phase 2). Do NOT edit package.json or CHANGELOG.md, do NOT commit, push, or open a PR.`;
1046
1070
  const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to this release. If there is no wiki, work from the code directly.`;
1047
1071
  const widgets = `When you need the user to choose, ask it as a widget rather than writing the options as prose: call \`single_select\` for a one-of-N choice (radio buttons) or \`multi_select\` for a "select all that apply" choice (checkboxes). Don't add your own "Other" option \u2014 the UI always provides one. After calling a widget tool, end your turn \u2014 the user's answer comes back as their next message and starts a fresh run.`;
1048
1072
  const lines2 = [
1049
1073
  `You are "${ctx.agentName}", an autonomous coding agent driving a FlumeCode release.`,
1050
- `The repository ${ctx.repo.fullName} is checked out in your current working directory on the release bump branch "${ctx.repo.checkoutBranch}".`,
1074
+ `The repository ${ctx.repo.fullName} is checked out in your current working directory at the frozen release commit (branch "${ctx.repo.checkoutBranch}").`,
1051
1075
  task,
1052
1076
  orient,
1053
1077
  widgets,
1054
1078
  LANGUAGE_DIRECTIVE,
1055
1079
  "",
1056
- "These coding guidelines apply to all code produced in this run:",
1057
- "",
1058
- loadRule("coding-guideline"),
1059
- "",
1060
1080
  `# Release: ${ctx.request?.title ?? ""}`
1061
1081
  ];
1062
1082
  if (ctx.request?.body) {
1063
1083
  lines2.push("", ctx.request.body);
1064
1084
  }
1065
- if (baseChecks && !baseChecks.ok) {
1066
- lines2.push(
1067
- "",
1068
- "# Pre-release check status",
1069
- "",
1070
- "\u26A0\uFE0F The repository's pre-commit checks (lint / typecheck / tests) are currently FAILING on the base branch, independently of any version bump. A release must not ship a broken base:",
1071
- "",
1072
- "- **Phase 1 (propose):** tell the user, in your reply, that the base currently fails these checks and that the release will fix them as part of the bump.",
1073
- "- **Phase 2 (apply):** fix the failing code at its root so the checks pass, THEN apply the version bumps and CHANGELOG. Do NOT delete/skip tests or weaken assertions. The fixes ship in the same bump PR. Still do NOT commit or push \u2014 the runner does.",
1074
- "",
1075
- "Failing check output:",
1076
- "",
1077
- "```",
1078
- baseChecks.log,
1079
- "```"
1080
- );
1081
- }
1082
1085
  if (ctx.prerelease) {
1083
1086
  lines2.push(
1084
1087
  "",
1085
1088
  "# Pre-release",
1086
1089
  "",
1087
- "This is a PRE-RELEASE. When proposing and applying versions, use a semver pre-release version string (e.g. `0.9.0-beta.1`): take the next stable version you would otherwise pick and append `-beta.N`, where N is the next unused beta number for that version (check existing `v<version>-beta.*` tags). Offer these pre-release strings in the version-confirmation widgets, and write them to package.json, CHANGELOG.md, and the `flumecode:versions` comment as usual."
1090
+ "This is a PRE-RELEASE. When proposing versions, use a semver pre-release version string (e.g. `0.9.0-beta.1`): take the next stable version you would otherwise pick and append `-beta.N`, where N is the next unused beta number for that version (check existing `v<version>-beta.*` tags). Offer these pre-release strings in the version-confirmation widgets, and include them in the `flumecode:versions` comment as usual."
1088
1091
  );
1089
1092
  }
1090
1093
  appendThread(lines2, ctx);
1091
1094
  lines2.push(
1092
1095
  "",
1093
- "Your final reply is posted verbatim as your comment in the release thread \u2014 if you called widgets (Phase 1), your reply text accompanies the questions; if you applied the bumps (Phase 2), make it the report the skill produced. The runner appends the pull-request link."
1096
+ "Your final reply is posted verbatim as your comment in the release thread \u2014 if you called widgets (Phase 1), your reply text accompanies the questions; if you emitted the final report (Phase 2), make it the report the skill produced."
1094
1097
  );
1095
1098
  return lines2.join("\n");
1096
1099
  }
@@ -1287,27 +1290,6 @@ function commitFailureLog(err) {
1287
1290
  const parts = [e.stdout, e.stderr].map((s) => typeof s === "string" ? s.trim() : "").filter((s) => s.length > 0);
1288
1291
  return parts.length > 0 ? parts.join("\n") : e.message ?? String(err);
1289
1292
  }
1290
- function isUnsupportedGitSubcommand(err) {
1291
- const e = err;
1292
- const text = `${typeof e.stderr === "string" ? e.stderr : ""}
1293
- ${e.message ?? ""}`;
1294
- return /is not a git command|unknown subcommand|usage: git hook/i.test(text);
1295
- }
1296
- async function runRepoChecks(dir) {
1297
- try {
1298
- await git(["-C", dir, "hook", "run", "pre-commit"]);
1299
- logEvent("checks", "pre-commit hook passed");
1300
- return { ok: true, log: "", skipped: false };
1301
- } catch (err) {
1302
- if (isUnsupportedGitSubcommand(err)) {
1303
- logEvent("checks", "pre-commit hook skipped (git too old)");
1304
- return { ok: true, log: "", skipped: true };
1305
- }
1306
- const log = commitFailureLog(err);
1307
- logEvent("checks:err", log);
1308
- return { ok: false, log, skipped: false };
1309
- }
1310
- }
1311
1293
  async function commitChanges(ctx, dir) {
1312
1294
  if (!await hasChanges(dir)) return false;
1313
1295
  try {
@@ -1404,10 +1386,9 @@ async function openPullRequest(ctx) {
1404
1386
  return { number: data.number, url: data.html_url };
1405
1387
  }
1406
1388
  if (res.status === 422) {
1407
- const list = await fetch(
1408
- `${apiBase}/pulls?state=open&head=${owner}:${checkoutBranch}&base=${mergeBranch}`,
1409
- { headers }
1410
- );
1389
+ const list = await fetch(`${apiBase}/pulls?state=open&head=${owner}:${checkoutBranch}`, {
1390
+ headers
1391
+ });
1411
1392
  if (list.ok) {
1412
1393
  const open = await list.json();
1413
1394
  if (open[0]) return { number: open[0].number, url: open[0].html_url };
@@ -1478,7 +1459,7 @@ async function prNumbersForCommit(ctx, sha) {
1478
1459
  // src/run.ts
1479
1460
  var IDLE_MS = 5e3;
1480
1461
  var CANCEL_POLL_MS = 2500;
1481
- var ORCHESTRATOR_MODEL = "sonnet";
1462
+ var orchestratorModel = (ctx) => rosterFor(ctx.provider).think;
1482
1463
  var ORCHESTRATOR_MAX_TURNS = 80;
1483
1464
  var MAX_COMMIT_REPAIRS = 2;
1484
1465
  var MAX_IMPLEMENT_RETRIES = 1;
@@ -1524,7 +1505,7 @@ async function mergeAndResolveConflicts(ctx, dir, config, abort) {
1524
1505
  cwd: dir,
1525
1506
  prompt: buildResolvePrompt(ctx, related),
1526
1507
  permissionMode: ctx.permissionMode,
1527
- model: ORCHESTRATOR_MODEL,
1508
+ model: orchestratorModel(ctx),
1528
1509
  maxTurns: ORCHESTRATOR_MAX_TURNS,
1529
1510
  abortController: abort
1530
1511
  });
@@ -1549,7 +1530,7 @@ async function commitWithRepair(ctx, dir, abort) {
1549
1530
  cwd: dir,
1550
1531
  prompt: buildRepairPrompt(ctx, err.log),
1551
1532
  permissionMode: ctx.permissionMode,
1552
- model: ORCHESTRATOR_MODEL,
1533
+ model: orchestratorModel(ctx),
1553
1534
  maxTurns: ORCHESTRATOR_MAX_TURNS,
1554
1535
  abortController: abort
1555
1536
  });
@@ -1653,7 +1634,7 @@ async function processChatJob(ctx, dir, config, abort) {
1653
1634
  prompt: buildPrompt(ctx),
1654
1635
  permissionMode: ctx.permissionMode,
1655
1636
  abortController: abort,
1656
- ...orchestrating ? { model: ORCHESTRATOR_MODEL, maxTurns: ORCHESTRATOR_MAX_TURNS } : {}
1637
+ ...orchestrating ? { model: orchestratorModel(ctx), maxTurns: ORCHESTRATOR_MAX_TURNS } : {}
1657
1638
  });
1658
1639
  const summary = result.text.trim();
1659
1640
  let reply = summary || "(the agent produced no summary)";
@@ -1719,7 +1700,7 @@ async function processImplementJob(ctx, dir, resumed, config, abort) {
1719
1700
  cwd: dir,
1720
1701
  prompt: buildPrompt(ctx),
1721
1702
  permissionMode: ctx.permissionMode,
1722
- model: ORCHESTRATOR_MODEL,
1703
+ model: orchestratorModel(ctx),
1723
1704
  maxTurns: ORCHESTRATOR_MAX_TURNS,
1724
1705
  abortController: abort
1725
1706
  });
@@ -1798,7 +1779,7 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
1798
1779
  cwd: dir,
1799
1780
  prompt: buildRevisePrompt(ctx),
1800
1781
  permissionMode: ctx.permissionMode,
1801
- model: ORCHESTRATOR_MODEL,
1782
+ model: orchestratorModel(ctx),
1802
1783
  maxTurns: ORCHESTRATOR_MAX_TURNS,
1803
1784
  abortController: abort
1804
1785
  });
@@ -1873,48 +1854,26 @@ async function processResolveJob(ctx, dir, config, abort) {
1873
1854
  reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch });
1874
1855
  return { text: reply, widgets: [], ...report ? { report } : {}, ...pr ? { pr } : {} };
1875
1856
  }
1876
- async function processReleaseJob(ctx, dir, resumed, config, abort) {
1857
+ async function processReleaseJob(ctx, dir, _resumed, _config, abort) {
1877
1858
  console.log(`
1878
1859
  \u25B6 Release ${ctx.jobId} \u2014 ${ctx.repo.fullName}: "${jobTitle(ctx)}"`);
1879
- const installResult = await installDependencies(dir);
1880
- const checks = await runRepoChecks(dir);
1881
- if (checks.skipped) {
1882
- console.log(` \u2026release ${ctx.jobId}: pre-release checks skipped (git too old for 'hook run')`);
1883
- } else {
1884
- console.log(` \u2026release ${ctx.jobId}: pre-release checks ${checks.ok ? "passed" : "FAILED"}`);
1885
- }
1886
- const baseChecks = checks.ok ? void 0 : { ok: false, log: trimHookLog(checks.log) };
1860
+ await installDependencies(dir);
1887
1861
  const result = await runClaudeCode({
1888
1862
  cwd: dir,
1889
- prompt: buildReleasePrompt(ctx, baseChecks),
1863
+ prompt: buildReleasePrompt(ctx),
1890
1864
  permissionMode: ctx.permissionMode,
1891
- model: ORCHESTRATOR_MODEL,
1865
+ model: orchestratorModel(ctx),
1892
1866
  maxTurns: ORCHESTRATOR_MAX_TURNS,
1893
1867
  abortController: abort
1894
1868
  });
1895
- let reply = result.text.trim() || "(the agent produced no reply)";
1896
- if (installResult.status === "failed") {
1897
- reply += `
1898
-
1899
- > \u26A0\uFE0F Dependencies failed to install (\`${installResult.manager}\`); tests may not have run.`;
1900
- }
1869
+ const reply = result.text.trim() || "(the agent produced no reply)";
1901
1870
  if (result.widgets.length > 0) {
1902
1871
  console.log(
1903
1872
  ` \u2026release ${ctx.jobId} posted ${result.widgets.length} widget(s); awaiting reply`
1904
1873
  );
1905
1874
  return { text: reply, widgets: result.widgets };
1906
1875
  }
1907
- const { outcome, autoMerged } = await pushAndOpenPr(ctx, dir, config, abort, {
1908
- rebase: !resumed
1909
- });
1910
- if (outcome.kind !== "none") {
1911
- reply += outcomeBanner(outcome, { branch: ctx.repo.checkoutBranch, autoMerged });
1912
- }
1913
- return {
1914
- text: reply,
1915
- widgets: [],
1916
- ...outcome.kind === "pr" ? { pr: outcome.pr } : {}
1917
- };
1876
+ return { text: reply, widgets: [] };
1918
1877
  }
1919
1878
  async function heartbeat(config) {
1920
1879
  const health = await checkClaudeCode();
@@ -2051,7 +2010,7 @@ ${trimmed}` : trimmed;
2051
2010
  }
2052
2011
  function formatJobError(ctx, err) {
2053
2012
  if (!(err instanceof PreCommitError)) return errorMessage2(err);
2054
- const nextStep = ctx.kind === "release" ? `These checks are failing on \`${ctx.repo.mergeBranch}\` independently of the version bump, and the release couldn't fix them after ${MAX_COMMIT_REPAIRS} automatic attempts. Open a request on **${ctx.repo.fullName}** to fix the failing checks above, then start the release again once that fix has merged.` : `The agent couldn't get its change past these checks after ${MAX_COMMIT_REPAIRS} automatic repair attempts. Open a request on **${ctx.repo.fullName}** describing the failing checks above so the agent can fix them at their root, then try again.`;
2013
+ const nextStep = ctx.kind === "release" ? `These checks are failing on \`${ctx.repo.mergeBranch}\` independently of this release, and the release couldn't fix them after ${MAX_COMMIT_REPAIRS} automatic attempts. Open a request on **${ctx.repo.fullName}** to fix the failing checks above, then start the release again once that fix has merged.` : `The agent couldn't get its change past these checks after ${MAX_COMMIT_REPAIRS} automatic repair attempts. Open a request on **${ctx.repo.fullName}** describing the failing checks above so the agent can fix them at their root, then try again.`;
2055
2014
  return [
2056
2015
  "\u274C **Blocked by failing pre-commit checks.**",
2057
2016
  "",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flumecode/runner",
3
- "version": "0.20.0",
3
+ "version": "0.22.0",
4
4
  "type": "module",
5
5
  "description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
6
6
  "bin": {
@@ -1,10 +1,10 @@
1
1
  ---
2
2
  name: create-release
3
3
  description: >-
4
- Draft release notes and version suggestions for a release, then (after the
5
- user confirms) open a bump PR that updates package.json version(s) and
6
- CHANGELOG.md. Two-turn flow: first turn asks the user to confirm versions via
7
- widgets; second turn (answers in thread) writes the bumps and opens the PR.
4
+ Draft release notes and version suggestions for a release. Two-turn flow:
5
+ first turn asks the user to confirm versions via widgets; second turn (answers
6
+ in thread) emits the structured report with confirmed versions and notes. Does
7
+ NOT edit package.json or CHANGELOG.md, and does NOT commit, push, or open a PR.
8
8
  ---
9
9
 
10
10
  # create-release
@@ -17,7 +17,7 @@ which one applies before acting.
17
17
  Check the thread (`# Conversation so far` in the prompt). If **no widget answers**
18
18
  appear in any prior agent turn, this is **Phase 1** — propose versions and ask.
19
19
  If a prior turn contains widget-answer selections (the user picked a version), this
20
- is **Phase 2** — apply the bumps and report.
20
+ is **Phase 2** — emit the final report.
21
21
 
22
22
  ---
23
23
 
@@ -99,11 +99,11 @@ can read them inside the question widget:
99
99
  - `options`: `Yes, use these notes`, `I'll edit them in the PR`
100
100
  (You may still summarise in the reply text, but the notes MUST be in the widget `body`.)
101
101
 
102
- **After calling widgets, end your turn.** Do NOT open a PR in Phase 1.
102
+ **After calling widgets, end your turn.** Do NOT edit any files in Phase 1.
103
103
 
104
104
  ---
105
105
 
106
- ## Phase 2 — Apply the bumps and report
106
+ ## Phase 2 — Emit the final report
107
107
 
108
108
  ### 1. Read the widget answers
109
109
 
@@ -111,52 +111,19 @@ The user's confirmed version selections appear in the `# Conversation so far`
111
111
  thread as agent messages (the widget-answer turn). Extract the chosen version for
112
112
  each package from those selections.
113
113
 
114
- ### 2. Update package.json files
114
+ ### 2. Emit the structured report
115
115
 
116
- For each package whose version changed, edit the `"version"` field in:
117
-
118
- - `apps/web/package.json` — for `@flumecode/web`
119
- - `apps/runner/package.json` — for `@flumecode/runner`
120
-
121
- Change only the `"version"` line; do not reformat the file.
122
-
123
- ### 3. Update CHANGELOG.md
124
-
125
- Edit (or create) `CHANGELOG.md` at the repo root. Insert a new section at the
126
- top, below any existing `# Changelog` title line:
116
+ Your final message must match this shape (adjust versions and notes to match what
117
+ was confirmed):
127
118
 
128
119
  ```
129
- ## [X.Y.Z / runner-X.Y.Z] - YYYY-MM-DD
120
+ **Confirmed versions:**
121
+ - `@flumecode/web`: `0.9.0`
122
+ - `@flumecode/runner`: `0.5.0`
130
123
 
124
+ **Release notes:**
131
125
  - Bullet point from release notes
132
126
  - Another bullet point
133
- ```
134
-
135
- Use the ISO date format (`YYYY-MM-DD`). Preserve all existing entries — do not
136
- delete or rewrite prior sections.
137
-
138
- If both packages are bumped, list both versions in the heading (e.g.
139
- `## [0.9.0 / runner-0.5.0] - 2026-06-06`). If only one package is bumped, list
140
- only that version in the heading.
141
-
142
- ### 4. Stop — do not commit or push
143
-
144
- Leave the edited files in the working tree. The runner commits them and opens the
145
- pull request.
146
-
147
- ### 5. End with this exact report format
148
-
149
- Your final message must match this shape (adjust versions and files to match what
150
- actually changed):
151
-
152
- ```
153
- **Bumped versions:**
154
- - `@flumecode/web`: `0.8.0` → `0.9.0`
155
- - `@flumecode/runner`: `0.4.0` → `0.5.0`
156
-
157
- **CHANGELOG updated** with release notes.
158
-
159
- **Files changed:** `apps/web/package.json`, `apps/runner/package.json`, `CHANGELOG.md`
160
127
 
161
128
  <!-- flumecode:versions {"@flumecode/web":"0.9.0","@flumecode/runner":"0.5.0"} -->
162
129
  ```
@@ -167,21 +134,20 @@ reads it to persist the confirmed versions on the release entity. Use the exact
167
134
  JSON key names `@flumecode/web` and `@flumecode/runner`; omit a package if its
168
135
  version did not change.
169
136
 
137
+ **Do NOT edit package.json, CHANGELOG.md, or any other file. Do NOT commit,
138
+ push, or open a pull request.** The GitHub Release is cut directly from the
139
+ frozen commit by the web interface.
140
+
170
141
  ---
171
142
 
172
143
  ## Notes
173
144
 
174
- - **Runner-only bump:** if only `apps/runner/` has commits, bump only
175
- `apps/runner/package.json`. Leave `apps/web/package.json` unchanged.
145
+ - **Runner-only bump:** if only `apps/runner/` has commits, include only
146
+ `apps/runner/package.json`'s version in the `flumecode:versions` comment.
176
147
  - **Clear Phase 1 text:** be explicit about what changed since the last tag so the
177
148
  user can confidently confirm or override your suggestions.
178
- - **Edit only version files with one exception.** Normally edit only
179
- `apps/web/package.json`, `apps/runner/package.json`, and `CHANGELOG.md`. The sole
180
- exception: when the prompt includes a **`# Pre-release check status`** section
181
- reporting failing checks, you must also fix the failing code (any file needed) so
182
- the tree is green — see "Pre-release checks" below. Never weaken or skip checks to
183
- silence them.
184
- - **Never commit, push, or open a PR** — the runner does that.
149
+ - **Read-only:** do not edit any files at any point. This skill is purely
150
+ analytical it reads the repo, proposes versions, and emits a report.
185
151
 
186
152
  ## Pre-release
187
153
 
@@ -202,27 +168,6 @@ pre-release version strings instead of stable ones:
202
168
  `0.9.0-beta.1`) in the version-confirmation widgets instead of the stable
203
169
  version.
204
170
 
205
- - **Phase 2 (apply):** write the pre-release version string (e.g.
206
- `0.9.0-beta.1`) to `package.json`, `CHANGELOG.md`, and the
207
- `<!-- flumecode:versions {...} -->` comment exactly as you would for a
208
- stable release, just with the pre-release suffix included.
209
-
210
- ---
211
-
212
- ## Pre-release checks
213
-
214
- We cannot release code with failing checks. Before this turn, the runner ran the
215
- repository's own pre-commit hook (lint / typecheck / tests). If the prompt contains
216
- a **`# Pre-release check status`** section, the base branch is currently broken
217
- _independently of the version bump_:
218
-
219
- - **Phase 1:** state plainly in your reply that the base currently fails these
220
- checks and that the release will fix them as part of the bump, then ask the
221
- version questions as usual.
222
- - **Phase 2:** fix the failing code at its root **first** (so the checks pass),
223
- **then** apply the version bumps and CHANGELOG. The fixes ship in the same bump
224
- PR. Do not delete or skip tests, weaken assertions, or disable checks. Still do
225
- not commit or push — the runner commits everything together.
226
-
227
- If there is no `# Pre-release check status` section, the base is clean (or the check
228
- was skipped); proceed normally and edit only the version files.
171
+ - **Phase 2 (emit):** include the pre-release version string (e.g.
172
+ `0.9.0-beta.1`) in the `<!-- flumecode:versions {...} -->` comment — exactly
173
+ as you would for a stable release, just with the pre-release suffix included.
@@ -12,7 +12,7 @@ description: >-
12
12
 
13
13
  # implement-plan
14
14
 
15
- You are the **orchestrator**. You run on a medium model and your job is to
15
+ You are the **orchestrator**. You run on the `think` model and your job is to
16
16
  _coordinate_, not to write the implementation. You delegate each phase to a
17
17
  subagent through the **Task** tool, choosing the model that phase needs, and you
18
18
  stitch their results into one report. Doing the implementation yourself defeats
@@ -30,10 +30,9 @@ put it in the prompt, the subagent doesn't have it.
30
30
  ## How you delegate
31
31
 
32
32
  - Spawn each phase with the **Task** tool, `subagent_type: "general-purpose"`.
33
- - **Model per phase** (pass it as the Task `model` argument):
34
- - `"sonnet"` — implementation, fixes, and the Verify step (mechanical
35
- command-running; Verify is read-only even though it uses sonnet).
36
- - `"opus"` — acceptance-criteria review, code-quality review, and the report.
33
+ - **Model per phase** (pass the concrete id from the Model tiers block as the Task `model` argument):
34
+ - `execute` — implementation, fixes, and the Verify step (mechanical command-running; Verify is read-only).
35
+ - `think` acceptance-criteria review, code-quality review, and the report.
37
36
  - **Read-only phases.** Tell every review, Verify, and report subagent to _inspect
38
37
  and report only — never edit, create, or delete files_. Only implementation/fix
39
38
  subagents may change the working tree.
@@ -68,7 +67,7 @@ the next step.
68
67
  it in the prompts you write for the Implement, Verify, and Fix-loop subagents
69
68
  so none of them re-derive it. Do not implement.
70
69
 
71
- 2. **Implement** — Task, `model: "sonnet"`. Give the subagent: the plan steps, a
70
+ 2. **Implement** — Task, `model: execute`. Give the subagent: the plan steps, a
72
71
  pointer to the wiki/orientation, the coding guidelines (verbatim), and the
73
72
  explicit verification command list the orchestrator discovered in the Orient
74
73
  step. Tell it to make all the code changes in the working tree to satisfy the
@@ -83,7 +82,7 @@ the next step.
83
82
  the verification commands it ran and their pass/fail results, which files it
84
83
  changed, and how each plan step was addressed. It must not commit or push.
85
84
 
86
- 3. **Verify (build & tests)** — Task, `model: "sonnet"`, read-only. This step
85
+ 3. **Verify (build & tests)** — Task, `model: execute`, read-only. This step
87
86
  gives the orchestrator an objective, independent build/test signal before the
88
87
  subjective AC and quality reviews. Tell the subagent to:
89
88
  - Run the verification commands provided by the orchestrator in the task
@@ -100,7 +99,7 @@ the next step.
100
99
  excerpt (if any).
101
100
  - Must not edit, create, or delete any files.
102
101
 
103
- 4. **Acceptance-criteria review** — Task, `model: "opus"`, read-only. Give the
102
+ 4. **Acceptance-criteria review** — Task, `model: think`, read-only. Give the
104
103
  subagent the full AC list and tell it to verify each one against the actual
105
104
  changes (run `git --no-pager diff`, read the changed files, run tests/build if
106
105
  useful). For **each** AC it must return: the criterion text verbatim, a verdict
@@ -117,7 +116,7 @@ the next step.
117
116
  note any files or areas that appear changed but don't map to any AC as a coverage
118
117
  gap (signalling a missing AC or an out-of-scope change).
119
118
 
120
- 5. **Code-quality review** — Task, `model: "opus"`, read-only. Give the subagent
119
+ 5. **Code-quality review** — Task, `model: think`, read-only. Give the subagent
121
120
  the coding guidelines (verbatim) and tell it to review the changes for
122
121
  violations and quality problems, returning concrete findings as
123
122
  `file:line — what — why`, each marked **must-fix** or **nice-to-have**.
@@ -125,7 +124,7 @@ the next step.
125
124
  6. **Fix loop.** If the Verify step (step 3) reports any failing check, the AC
126
125
  review (step 4) reports any _not met_ AC, or the quality review (step 5)
127
126
  reports any _must-fix_ finding: spawn an **Implement/fix** subagent (Task,
128
- `model: "sonnet"`) whose prompt lists exactly those findings and tells it to
127
+ `model: execute`) whose prompt lists exactly those findings and tells it to
129
128
  resolve them without regressing the rest. Include the verification command list
130
129
  from the Orient step in the fix subagent's prompt (the same list passed to
131
130
  Implement and Verify), so the fix subagent does not need to re-derive it. When
@@ -135,7 +134,7 @@ the next step.
135
134
  times. If something still fails after that, stop looping and record the gap
136
135
  honestly in the report — do not hide it.
137
136
 
138
- 7. **Report** — Task, `model: "opus"`, read-only. Give the subagent the AC
137
+ 7. **Report** — Task, `model: think`, read-only. Give the subagent the AC
139
138
  verdicts (with criterion text, from step 4), the Verify results (from step 3),
140
139
  and the quality findings, and tell it to run `git --no-pager diff` itself as
141
140
  the **single source of truth** for the report. Pass the Verify results as the
@@ -191,7 +190,7 @@ The report subagent calls `submit_report` with these fields:
191
190
 
192
191
  - Delegate through Task subagents; don't implement, review, or write the report
193
192
  yourself.
194
- - Right model per phase: `sonnet` to implement/fix/verify (Verify is read-only), `opus` to review/report.
193
+ - Right model per phase: `execute` to implement/fix/verify (Verify is read-only), `think` to review/report.
195
194
  - Make every Task prompt self-contained — subagents see only what you give them.
196
195
  - Reviewers and the report writer never modify files.
197
196
  - Never commit, push, or open a PR.
@@ -67,9 +67,9 @@ essentials:
67
67
  implementation, not a rebuild. Change only what the user asked for plus what that
68
68
  change strictly requires; don't regress the rest of the plan.
69
69
  - **Pipeline:** Implement (self-runs build/tests & fixes its own errors, Task
70
- `model: "sonnet"`) → Verify (build/tests, read-only, Task `model: "sonnet"`) →
71
- acceptance/quality review (Task `model: "opus"`, read-only) → fix loop if needed
72
- (≤2, re-run Verify after each fix) → report (Task `model: "opus"`, read-only).
70
+ `model: execute`) → Verify (build/tests, read-only, Task `model: execute`) →
71
+ acceptance/quality review (Task `model: think`, read-only) → fix loop if needed
72
+ (≤2, re-run Verify after each fix) → report (Task `model: think`, read-only).
73
73
  Detailed mechanics (command discovery, Verify step spec, fix-loop trigger
74
74
  conditions) are in `implement-plan/SKILL.md` — read it for the full pipeline.
75
75
  - **No git side effects.** Never commit, push, or open a PR — leave the changes in