@flumecode/runner 0.20.0 → 0.21.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -854,6 +854,27 @@ function errorMessage(err) {
|
|
|
854
854
|
return err instanceof Error ? err.message : String(err);
|
|
855
855
|
}
|
|
856
856
|
|
|
857
|
+
// src/models.ts
|
|
858
|
+
var MODEL_ROSTER = {
|
|
859
|
+
anthropic: { think: "opus", execute: "sonnet" },
|
|
860
|
+
openai: { think: "gpt-5.5", execute: "gpt-5.4-mini" }
|
|
861
|
+
// TODO: confirm vs `codex`
|
|
862
|
+
};
|
|
863
|
+
function rosterFor(provider) {
|
|
864
|
+
return MODEL_ROSTER[provider ?? "anthropic"] ?? MODEL_ROSTER.anthropic;
|
|
865
|
+
}
|
|
866
|
+
function renderRosterBlock(provider) {
|
|
867
|
+
const r = rosterFor(provider);
|
|
868
|
+
return [
|
|
869
|
+
"# Model tiers",
|
|
870
|
+
"When a skill tells you to run a subagent on a named model tier, pass the matching",
|
|
871
|
+
"concrete model id as the Task `model` argument. Use the id exactly; never pass the",
|
|
872
|
+
"tier name itself.",
|
|
873
|
+
`- \`think\` -> \`${r.think}\` (planning, review, high-level reasoning)`,
|
|
874
|
+
`- \`execute\` -> \`${r.execute}\` (writing code from the plan; fast & cheap)`
|
|
875
|
+
].join("\n");
|
|
876
|
+
}
|
|
877
|
+
|
|
857
878
|
// src/rules.ts
|
|
858
879
|
import { readFileSync as readFileSync4 } from "node:fs";
|
|
859
880
|
import { join as join3 } from "node:path";
|
|
@@ -906,6 +927,7 @@ function buildPrompt(ctx) {
|
|
|
906
927
|
LANGUAGE_DIRECTIVE
|
|
907
928
|
];
|
|
908
929
|
if (ctx.permissionMode !== "plan") {
|
|
930
|
+
lines2.push("", renderRosterBlock(ctx.provider));
|
|
909
931
|
lines2.push(
|
|
910
932
|
"",
|
|
911
933
|
"These coding guidelines apply to all code produced in this run:",
|
|
@@ -937,6 +959,8 @@ function buildRevisePrompt(ctx) {
|
|
|
937
959
|
widgets,
|
|
938
960
|
LANGUAGE_DIRECTIVE,
|
|
939
961
|
"",
|
|
962
|
+
renderRosterBlock(ctx.provider),
|
|
963
|
+
"",
|
|
940
964
|
"These coding guidelines apply to all code produced in this run:",
|
|
941
965
|
"",
|
|
942
966
|
loadRule("coding-guideline"),
|
|
@@ -1478,7 +1502,7 @@ async function prNumbersForCommit(ctx, sha) {
|
|
|
1478
1502
|
// src/run.ts
|
|
1479
1503
|
var IDLE_MS = 5e3;
|
|
1480
1504
|
var CANCEL_POLL_MS = 2500;
|
|
1481
|
-
var
|
|
1505
|
+
var orchestratorModel = (ctx) => rosterFor(ctx.provider).think;
|
|
1482
1506
|
var ORCHESTRATOR_MAX_TURNS = 80;
|
|
1483
1507
|
var MAX_COMMIT_REPAIRS = 2;
|
|
1484
1508
|
var MAX_IMPLEMENT_RETRIES = 1;
|
|
@@ -1524,7 +1548,7 @@ async function mergeAndResolveConflicts(ctx, dir, config, abort) {
|
|
|
1524
1548
|
cwd: dir,
|
|
1525
1549
|
prompt: buildResolvePrompt(ctx, related),
|
|
1526
1550
|
permissionMode: ctx.permissionMode,
|
|
1527
|
-
model:
|
|
1551
|
+
model: orchestratorModel(ctx),
|
|
1528
1552
|
maxTurns: ORCHESTRATOR_MAX_TURNS,
|
|
1529
1553
|
abortController: abort
|
|
1530
1554
|
});
|
|
@@ -1549,7 +1573,7 @@ async function commitWithRepair(ctx, dir, abort) {
|
|
|
1549
1573
|
cwd: dir,
|
|
1550
1574
|
prompt: buildRepairPrompt(ctx, err.log),
|
|
1551
1575
|
permissionMode: ctx.permissionMode,
|
|
1552
|
-
model:
|
|
1576
|
+
model: orchestratorModel(ctx),
|
|
1553
1577
|
maxTurns: ORCHESTRATOR_MAX_TURNS,
|
|
1554
1578
|
abortController: abort
|
|
1555
1579
|
});
|
|
@@ -1653,7 +1677,7 @@ async function processChatJob(ctx, dir, config, abort) {
|
|
|
1653
1677
|
prompt: buildPrompt(ctx),
|
|
1654
1678
|
permissionMode: ctx.permissionMode,
|
|
1655
1679
|
abortController: abort,
|
|
1656
|
-
...orchestrating ? { model:
|
|
1680
|
+
...orchestrating ? { model: orchestratorModel(ctx), maxTurns: ORCHESTRATOR_MAX_TURNS } : {}
|
|
1657
1681
|
});
|
|
1658
1682
|
const summary = result.text.trim();
|
|
1659
1683
|
let reply = summary || "(the agent produced no summary)";
|
|
@@ -1719,7 +1743,7 @@ async function processImplementJob(ctx, dir, resumed, config, abort) {
|
|
|
1719
1743
|
cwd: dir,
|
|
1720
1744
|
prompt: buildPrompt(ctx),
|
|
1721
1745
|
permissionMode: ctx.permissionMode,
|
|
1722
|
-
model:
|
|
1746
|
+
model: orchestratorModel(ctx),
|
|
1723
1747
|
maxTurns: ORCHESTRATOR_MAX_TURNS,
|
|
1724
1748
|
abortController: abort
|
|
1725
1749
|
});
|
|
@@ -1798,7 +1822,7 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
|
|
|
1798
1822
|
cwd: dir,
|
|
1799
1823
|
prompt: buildRevisePrompt(ctx),
|
|
1800
1824
|
permissionMode: ctx.permissionMode,
|
|
1801
|
-
model:
|
|
1825
|
+
model: orchestratorModel(ctx),
|
|
1802
1826
|
maxTurns: ORCHESTRATOR_MAX_TURNS,
|
|
1803
1827
|
abortController: abort
|
|
1804
1828
|
});
|
|
@@ -1888,7 +1912,7 @@ async function processReleaseJob(ctx, dir, resumed, config, abort) {
|
|
|
1888
1912
|
cwd: dir,
|
|
1889
1913
|
prompt: buildReleasePrompt(ctx, baseChecks),
|
|
1890
1914
|
permissionMode: ctx.permissionMode,
|
|
1891
|
-
model:
|
|
1915
|
+
model: orchestratorModel(ctx),
|
|
1892
1916
|
maxTurns: ORCHESTRATOR_MAX_TURNS,
|
|
1893
1917
|
abortController: abort
|
|
1894
1918
|
});
|
package/package.json
CHANGED
|
@@ -12,7 +12,7 @@ description: >-
|
|
|
12
12
|
|
|
13
13
|
# implement-plan
|
|
14
14
|
|
|
15
|
-
You are the **orchestrator**. You run on
|
|
15
|
+
You are the **orchestrator**. You run on the `think` model and your job is to
|
|
16
16
|
_coordinate_, not to write the implementation. You delegate each phase to a
|
|
17
17
|
subagent through the **Task** tool, choosing the model that phase needs, and you
|
|
18
18
|
stitch their results into one report. Doing the implementation yourself defeats
|
|
@@ -30,10 +30,9 @@ put it in the prompt, the subagent doesn't have it.
|
|
|
30
30
|
## How you delegate
|
|
31
31
|
|
|
32
32
|
- Spawn each phase with the **Task** tool, `subagent_type: "general-purpose"`.
|
|
33
|
-
- **Model per phase** (pass
|
|
34
|
-
- `
|
|
35
|
-
|
|
36
|
-
- `"opus"` — acceptance-criteria review, code-quality review, and the report.
|
|
33
|
+
- **Model per phase** (pass the concrete id from the Model tiers block as the Task `model` argument):
|
|
34
|
+
- `execute` — implementation, fixes, and the Verify step (mechanical command-running; Verify is read-only).
|
|
35
|
+
- `think` — acceptance-criteria review, code-quality review, and the report.
|
|
37
36
|
- **Read-only phases.** Tell every review, Verify, and report subagent to _inspect
|
|
38
37
|
and report only — never edit, create, or delete files_. Only implementation/fix
|
|
39
38
|
subagents may change the working tree.
|
|
@@ -68,7 +67,7 @@ the next step.
|
|
|
68
67
|
it in the prompts you write for the Implement, Verify, and Fix-loop subagents
|
|
69
68
|
so none of them re-derive it. Do not implement.
|
|
70
69
|
|
|
71
|
-
2. **Implement** — Task, `model:
|
|
70
|
+
2. **Implement** — Task, `model: execute`. Give the subagent: the plan steps, a
|
|
72
71
|
pointer to the wiki/orientation, the coding guidelines (verbatim), and the
|
|
73
72
|
explicit verification command list the orchestrator discovered in the Orient
|
|
74
73
|
step. Tell it to make all the code changes in the working tree to satisfy the
|
|
@@ -83,7 +82,7 @@ the next step.
|
|
|
83
82
|
the verification commands it ran and their pass/fail results, which files it
|
|
84
83
|
changed, and how each plan step was addressed. It must not commit or push.
|
|
85
84
|
|
|
86
|
-
3. **Verify (build & tests)** — Task, `model:
|
|
85
|
+
3. **Verify (build & tests)** — Task, `model: execute`, read-only. This step
|
|
87
86
|
gives the orchestrator an objective, independent build/test signal before the
|
|
88
87
|
subjective AC and quality reviews. Tell the subagent to:
|
|
89
88
|
- Run the verification commands provided by the orchestrator in the task
|
|
@@ -100,7 +99,7 @@ the next step.
|
|
|
100
99
|
excerpt (if any).
|
|
101
100
|
- Must not edit, create, or delete any files.
|
|
102
101
|
|
|
103
|
-
4. **Acceptance-criteria review** — Task, `model:
|
|
102
|
+
4. **Acceptance-criteria review** — Task, `model: think`, read-only. Give the
|
|
104
103
|
subagent the full AC list and tell it to verify each one against the actual
|
|
105
104
|
changes (run `git --no-pager diff`, read the changed files, run tests/build if
|
|
106
105
|
useful). For **each** AC it must return: the criterion text verbatim, a verdict
|
|
@@ -117,7 +116,7 @@ the next step.
|
|
|
117
116
|
note any files or areas that appear changed but don't map to any AC as a coverage
|
|
118
117
|
gap (signalling a missing AC or an out-of-scope change).
|
|
119
118
|
|
|
120
|
-
5. **Code-quality review** — Task, `model:
|
|
119
|
+
5. **Code-quality review** — Task, `model: think`, read-only. Give the subagent
|
|
121
120
|
the coding guidelines (verbatim) and tell it to review the changes for
|
|
122
121
|
violations and quality problems, returning concrete findings as
|
|
123
122
|
`file:line — what — why`, each marked **must-fix** or **nice-to-have**.
|
|
@@ -125,7 +124,7 @@ the next step.
|
|
|
125
124
|
6. **Fix loop.** If the Verify step (step 3) reports any failing check, the AC
|
|
126
125
|
review (step 4) reports any _not met_ AC, or the quality review (step 5)
|
|
127
126
|
reports any _must-fix_ finding: spawn an **Implement/fix** subagent (Task,
|
|
128
|
-
`model:
|
|
127
|
+
`model: execute`) whose prompt lists exactly those findings and tells it to
|
|
129
128
|
resolve them without regressing the rest. Include the verification command list
|
|
130
129
|
from the Orient step in the fix subagent's prompt (the same list passed to
|
|
131
130
|
Implement and Verify), so the fix subagent does not need to re-derive it. When
|
|
@@ -135,7 +134,7 @@ the next step.
|
|
|
135
134
|
times. If something still fails after that, stop looping and record the gap
|
|
136
135
|
honestly in the report — do not hide it.
|
|
137
136
|
|
|
138
|
-
7. **Report** — Task, `model:
|
|
137
|
+
7. **Report** — Task, `model: think`, read-only. Give the subagent the AC
|
|
139
138
|
verdicts (with criterion text, from step 4), the Verify results (from step 3),
|
|
140
139
|
and the quality findings, and tell it to run `git --no-pager diff` itself as
|
|
141
140
|
the **single source of truth** for the report. Pass the Verify results as the
|
|
@@ -191,7 +190,7 @@ The report subagent calls `submit_report` with these fields:
|
|
|
191
190
|
|
|
192
191
|
- Delegate through Task subagents; don't implement, review, or write the report
|
|
193
192
|
yourself.
|
|
194
|
-
- Right model per phase: `
|
|
193
|
+
- Right model per phase: `execute` to implement/fix/verify (Verify is read-only), `think` to review/report.
|
|
195
194
|
- Make every Task prompt self-contained — subagents see only what you give them.
|
|
196
195
|
- Reviewers and the report writer never modify files.
|
|
197
196
|
- Never commit, push, or open a PR.
|
|
@@ -67,9 +67,9 @@ essentials:
|
|
|
67
67
|
implementation, not a rebuild. Change only what the user asked for plus what that
|
|
68
68
|
change strictly requires; don't regress the rest of the plan.
|
|
69
69
|
- **Pipeline:** Implement (self-runs build/tests & fixes its own errors, Task
|
|
70
|
-
`model:
|
|
71
|
-
acceptance/quality review (Task `model:
|
|
72
|
-
(≤2, re-run Verify after each fix) → report (Task `model:
|
|
70
|
+
`model: execute`) → Verify (build/tests, read-only, Task `model: execute`) →
|
|
71
|
+
acceptance/quality review (Task `model: think`, read-only) → fix loop if needed
|
|
72
|
+
(≤2, re-run Verify after each fix) → report (Task `model: think`, read-only).
|
|
73
73
|
Detailed mechanics (command discovery, Verify step spec, fix-loop trigger
|
|
74
74
|
conditions) are in `implement-plan/SKILL.md` — read it for the full pipeline.
|
|
75
75
|
- **No git side effects.** Never commit, push, or open a PR — leave the changes in
|