@flumecode/runner 0.20.0 → 0.21.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -854,6 +854,27 @@ function errorMessage(err) {
854
854
  return err instanceof Error ? err.message : String(err);
855
855
  }
856
856
 
857
+ // src/models.ts
858
+ var MODEL_ROSTER = {
859
+ anthropic: { think: "opus", execute: "sonnet" },
860
+ openai: { think: "gpt-5.5", execute: "gpt-5.4-mini" }
861
+ // TODO: confirm vs `codex`
862
+ };
863
+ function rosterFor(provider) {
864
+ return MODEL_ROSTER[provider ?? "anthropic"] ?? MODEL_ROSTER.anthropic;
865
+ }
866
+ function renderRosterBlock(provider) {
867
+ const r = rosterFor(provider);
868
+ return [
869
+ "# Model tiers",
870
+ "When a skill tells you to run a subagent on a named model tier, pass the matching",
871
+ "concrete model id as the Task `model` argument. Use the id exactly; never pass the",
872
+ "tier name itself.",
873
+ `- \`think\` -> \`${r.think}\` (planning, review, high-level reasoning)`,
874
+ `- \`execute\` -> \`${r.execute}\` (writing code from the plan; fast & cheap)`
875
+ ].join("\n");
876
+ }
877
+
857
878
  // src/rules.ts
858
879
  import { readFileSync as readFileSync4 } from "node:fs";
859
880
  import { join as join3 } from "node:path";
@@ -906,6 +927,7 @@ function buildPrompt(ctx) {
906
927
  LANGUAGE_DIRECTIVE
907
928
  ];
908
929
  if (ctx.permissionMode !== "plan") {
930
+ lines2.push("", renderRosterBlock(ctx.provider));
909
931
  lines2.push(
910
932
  "",
911
933
  "These coding guidelines apply to all code produced in this run:",
@@ -937,6 +959,8 @@ function buildRevisePrompt(ctx) {
937
959
  widgets,
938
960
  LANGUAGE_DIRECTIVE,
939
961
  "",
962
+ renderRosterBlock(ctx.provider),
963
+ "",
940
964
  "These coding guidelines apply to all code produced in this run:",
941
965
  "",
942
966
  loadRule("coding-guideline"),
@@ -1478,7 +1502,7 @@ async function prNumbersForCommit(ctx, sha) {
1478
1502
  // src/run.ts
1479
1503
  var IDLE_MS = 5e3;
1480
1504
  var CANCEL_POLL_MS = 2500;
1481
- var ORCHESTRATOR_MODEL = "sonnet";
1505
+ var orchestratorModel = (ctx) => rosterFor(ctx.provider).think;
1482
1506
  var ORCHESTRATOR_MAX_TURNS = 80;
1483
1507
  var MAX_COMMIT_REPAIRS = 2;
1484
1508
  var MAX_IMPLEMENT_RETRIES = 1;
@@ -1524,7 +1548,7 @@ async function mergeAndResolveConflicts(ctx, dir, config, abort) {
1524
1548
  cwd: dir,
1525
1549
  prompt: buildResolvePrompt(ctx, related),
1526
1550
  permissionMode: ctx.permissionMode,
1527
- model: ORCHESTRATOR_MODEL,
1551
+ model: orchestratorModel(ctx),
1528
1552
  maxTurns: ORCHESTRATOR_MAX_TURNS,
1529
1553
  abortController: abort
1530
1554
  });
@@ -1549,7 +1573,7 @@ async function commitWithRepair(ctx, dir, abort) {
1549
1573
  cwd: dir,
1550
1574
  prompt: buildRepairPrompt(ctx, err.log),
1551
1575
  permissionMode: ctx.permissionMode,
1552
- model: ORCHESTRATOR_MODEL,
1576
+ model: orchestratorModel(ctx),
1553
1577
  maxTurns: ORCHESTRATOR_MAX_TURNS,
1554
1578
  abortController: abort
1555
1579
  });
@@ -1653,7 +1677,7 @@ async function processChatJob(ctx, dir, config, abort) {
1653
1677
  prompt: buildPrompt(ctx),
1654
1678
  permissionMode: ctx.permissionMode,
1655
1679
  abortController: abort,
1656
- ...orchestrating ? { model: ORCHESTRATOR_MODEL, maxTurns: ORCHESTRATOR_MAX_TURNS } : {}
1680
+ ...orchestrating ? { model: orchestratorModel(ctx), maxTurns: ORCHESTRATOR_MAX_TURNS } : {}
1657
1681
  });
1658
1682
  const summary = result.text.trim();
1659
1683
  let reply = summary || "(the agent produced no summary)";
@@ -1719,7 +1743,7 @@ async function processImplementJob(ctx, dir, resumed, config, abort) {
1719
1743
  cwd: dir,
1720
1744
  prompt: buildPrompt(ctx),
1721
1745
  permissionMode: ctx.permissionMode,
1722
- model: ORCHESTRATOR_MODEL,
1746
+ model: orchestratorModel(ctx),
1723
1747
  maxTurns: ORCHESTRATOR_MAX_TURNS,
1724
1748
  abortController: abort
1725
1749
  });
@@ -1798,7 +1822,7 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
1798
1822
  cwd: dir,
1799
1823
  prompt: buildRevisePrompt(ctx),
1800
1824
  permissionMode: ctx.permissionMode,
1801
- model: ORCHESTRATOR_MODEL,
1825
+ model: orchestratorModel(ctx),
1802
1826
  maxTurns: ORCHESTRATOR_MAX_TURNS,
1803
1827
  abortController: abort
1804
1828
  });
@@ -1888,7 +1912,7 @@ async function processReleaseJob(ctx, dir, resumed, config, abort) {
1888
1912
  cwd: dir,
1889
1913
  prompt: buildReleasePrompt(ctx, baseChecks),
1890
1914
  permissionMode: ctx.permissionMode,
1891
- model: ORCHESTRATOR_MODEL,
1915
+ model: orchestratorModel(ctx),
1892
1916
  maxTurns: ORCHESTRATOR_MAX_TURNS,
1893
1917
  abortController: abort
1894
1918
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flumecode/runner",
3
- "version": "0.20.0",
3
+ "version": "0.21.0-beta.1",
4
4
  "type": "module",
5
5
  "description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
6
6
  "bin": {
@@ -12,7 +12,7 @@ description: >-
12
12
 
13
13
  # implement-plan
14
14
 
15
- You are the **orchestrator**. You run on a medium model and your job is to
15
+ You are the **orchestrator**. You run on the `think` model and your job is to
16
16
  _coordinate_, not to write the implementation. You delegate each phase to a
17
17
  subagent through the **Task** tool, choosing the model that phase needs, and you
18
18
  stitch their results into one report. Doing the implementation yourself defeats
@@ -30,10 +30,9 @@ put it in the prompt, the subagent doesn't have it.
30
30
  ## How you delegate
31
31
 
32
32
  - Spawn each phase with the **Task** tool, `subagent_type: "general-purpose"`.
33
- - **Model per phase** (pass it as the Task `model` argument):
34
- - `"sonnet"` — implementation, fixes, and the Verify step (mechanical
35
- command-running; Verify is read-only even though it uses sonnet).
36
- - `"opus"` — acceptance-criteria review, code-quality review, and the report.
33
+ - **Model per phase** (pass the concrete id from the Model tiers block as the Task `model` argument):
34
+ - `execute` — implementation, fixes, and the Verify step (mechanical command-running; Verify is read-only).
35
+ - `think` acceptance-criteria review, code-quality review, and the report.
37
36
  - **Read-only phases.** Tell every review, Verify, and report subagent to _inspect
38
37
  and report only — never edit, create, or delete files_. Only implementation/fix
39
38
  subagents may change the working tree.
@@ -68,7 +67,7 @@ the next step.
68
67
  it in the prompts you write for the Implement, Verify, and Fix-loop subagents
69
68
  so none of them re-derive it. Do not implement.
70
69
 
71
- 2. **Implement** — Task, `model: "sonnet"`. Give the subagent: the plan steps, a
70
+ 2. **Implement** — Task, `model: execute`. Give the subagent: the plan steps, a
72
71
  pointer to the wiki/orientation, the coding guidelines (verbatim), and the
73
72
  explicit verification command list the orchestrator discovered in the Orient
74
73
  step. Tell it to make all the code changes in the working tree to satisfy the
@@ -83,7 +82,7 @@ the next step.
83
82
  the verification commands it ran and their pass/fail results, which files it
84
83
  changed, and how each plan step was addressed. It must not commit or push.
85
84
 
86
- 3. **Verify (build & tests)** — Task, `model: "sonnet"`, read-only. This step
85
+ 3. **Verify (build & tests)** — Task, `model: execute`, read-only. This step
87
86
  gives the orchestrator an objective, independent build/test signal before the
88
87
  subjective AC and quality reviews. Tell the subagent to:
89
88
  - Run the verification commands provided by the orchestrator in the task
@@ -100,7 +99,7 @@ the next step.
100
99
  excerpt (if any).
101
100
  - Must not edit, create, or delete any files.
102
101
 
103
- 4. **Acceptance-criteria review** — Task, `model: "opus"`, read-only. Give the
102
+ 4. **Acceptance-criteria review** — Task, `model: think`, read-only. Give the
104
103
  subagent the full AC list and tell it to verify each one against the actual
105
104
  changes (run `git --no-pager diff`, read the changed files, run tests/build if
106
105
  useful). For **each** AC it must return: the criterion text verbatim, a verdict
@@ -117,7 +116,7 @@ the next step.
117
116
  note any files or areas that appear changed but don't map to any AC as a coverage
118
117
  gap (signalling a missing AC or an out-of-scope change).
119
118
 
120
- 5. **Code-quality review** — Task, `model: "opus"`, read-only. Give the subagent
119
+ 5. **Code-quality review** — Task, `model: think`, read-only. Give the subagent
121
120
  the coding guidelines (verbatim) and tell it to review the changes for
122
121
  violations and quality problems, returning concrete findings as
123
122
  `file:line — what — why`, each marked **must-fix** or **nice-to-have**.
@@ -125,7 +124,7 @@ the next step.
125
124
  6. **Fix loop.** If the Verify step (step 3) reports any failing check, the AC
126
125
  review (step 4) reports any _not met_ AC, or the quality review (step 5)
127
126
  reports any _must-fix_ finding: spawn an **Implement/fix** subagent (Task,
128
- `model: "sonnet"`) whose prompt lists exactly those findings and tells it to
127
+ `model: execute`) whose prompt lists exactly those findings and tells it to
129
128
  resolve them without regressing the rest. Include the verification command list
130
129
  from the Orient step in the fix subagent's prompt (the same list passed to
131
130
  Implement and Verify), so the fix subagent does not need to re-derive it. When
@@ -135,7 +134,7 @@ the next step.
135
134
  times. If something still fails after that, stop looping and record the gap
136
135
  honestly in the report — do not hide it.
137
136
 
138
- 7. **Report** — Task, `model: "opus"`, read-only. Give the subagent the AC
137
+ 7. **Report** — Task, `model: think`, read-only. Give the subagent the AC
139
138
  verdicts (with criterion text, from step 4), the Verify results (from step 3),
140
139
  and the quality findings, and tell it to run `git --no-pager diff` itself as
141
140
  the **single source of truth** for the report. Pass the Verify results as the
@@ -191,7 +190,7 @@ The report subagent calls `submit_report` with these fields:
191
190
 
192
191
  - Delegate through Task subagents; don't implement, review, or write the report
193
192
  yourself.
194
- - Right model per phase: `sonnet` to implement/fix/verify (Verify is read-only), `opus` to review/report.
193
+ - Right model per phase: `execute` to implement/fix/verify (Verify is read-only), `think` to review/report.
195
194
  - Make every Task prompt self-contained — subagents see only what you give them.
196
195
  - Reviewers and the report writer never modify files.
197
196
  - Never commit, push, or open a PR.
@@ -67,9 +67,9 @@ essentials:
67
67
  implementation, not a rebuild. Change only what the user asked for plus what that
68
68
  change strictly requires; don't regress the rest of the plan.
69
69
  - **Pipeline:** Implement (self-runs build/tests & fixes its own errors, Task
70
- `model: "sonnet"`) → Verify (build/tests, read-only, Task `model: "sonnet"`) →
71
- acceptance/quality review (Task `model: "opus"`, read-only) → fix loop if needed
72
- (≤2, re-run Verify after each fix) → report (Task `model: "opus"`, read-only).
70
+ `model: execute`) → Verify (build/tests, read-only, Task `model: execute`) →
71
+ acceptance/quality review (Task `model: think`, read-only) → fix loop if needed
72
+ (≤2, re-run Verify after each fix) → report (Task `model: think`, read-only).
73
73
  Detailed mechanics (command discovery, Verify step spec, fix-loop trigger
74
74
  conditions) are in `implement-plan/SKILL.md` — read it for the full pipeline.
75
75
  - **No git side effects.** Never commit, push, or open a PR — leave the changes in