npm - @flumecode/runner - Versions diffs - 0.20.0 → 0.21.0-beta.1 - Mend

@flumecode/runner 0.20.0 → 0.21.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/cli.js +31 -7
package/package.json +1 -1
package/skills-plugin/skills/implement-plan/SKILL.md +11 -12
package/skills-plugin/skills/revise-implementation/SKILL.md +3 -3

package/dist/cli.js CHANGED Viewed

@@ -854,6 +854,27 @@ function errorMessage(err) {
   return err instanceof Error ? err.message : String(err);
 }
+// src/models.ts
+var MODEL_ROSTER = {
+  anthropic: { think: "opus", execute: "sonnet" },
+  openai: { think: "gpt-5.5", execute: "gpt-5.4-mini" }
+  // TODO: confirm vs `codex`
+};
+function rosterFor(provider) {
+  return MODEL_ROSTER[provider ?? "anthropic"] ?? MODEL_ROSTER.anthropic;
+}
+function renderRosterBlock(provider) {
+  const r = rosterFor(provider);
+  return [
+    "# Model tiers",
+    "When a skill tells you to run a subagent on a named model tier, pass the matching",
+    "concrete model id as the Task `model` argument. Use the id exactly; never pass the",
+    "tier name itself.",
+    `- \`think\`   -> \`${r.think}\`  (planning, review, high-level reasoning)`,
+    `- \`execute\` -> \`${r.execute}\`  (writing code from the plan; fast & cheap)`
+  ].join("\n");
+}
 // src/rules.ts
 import { readFileSync as readFileSync4 } from "node:fs";
 import { join as join3 } from "node:path";
@@ -906,6 +927,7 @@ function buildPrompt(ctx) {
     LANGUAGE_DIRECTIVE
   ];
   if (ctx.permissionMode !== "plan") {
+    lines2.push("", renderRosterBlock(ctx.provider));
     lines2.push(
       "",
       "These coding guidelines apply to all code produced in this run:",
@@ -937,6 +959,8 @@ function buildRevisePrompt(ctx) {
     widgets,
     LANGUAGE_DIRECTIVE,
     "",
+    renderRosterBlock(ctx.provider),
+    "",
     "These coding guidelines apply to all code produced in this run:",
     "",
     loadRule("coding-guideline"),
@@ -1478,7 +1502,7 @@ async function prNumbersForCommit(ctx, sha) {
 // src/run.ts
 var IDLE_MS = 5e3;
 var CANCEL_POLL_MS = 2500;
-var ORCHESTRATOR_MODEL = "sonnet";
+var orchestratorModel = (ctx) => rosterFor(ctx.provider).think;
 var ORCHESTRATOR_MAX_TURNS = 80;
 var MAX_COMMIT_REPAIRS = 2;
 var MAX_IMPLEMENT_RETRIES = 1;
@@ -1524,7 +1548,7 @@ async function mergeAndResolveConflicts(ctx, dir, config, abort) {
     cwd: dir,
     prompt: buildResolvePrompt(ctx, related),
     permissionMode: ctx.permissionMode,
-    model: ORCHESTRATOR_MODEL,
+    model: orchestratorModel(ctx),
     maxTurns: ORCHESTRATOR_MAX_TURNS,
     abortController: abort
   });
@@ -1549,7 +1573,7 @@ async function commitWithRepair(ctx, dir, abort) {
         cwd: dir,
         prompt: buildRepairPrompt(ctx, err.log),
         permissionMode: ctx.permissionMode,
-        model: ORCHESTRATOR_MODEL,
+        model: orchestratorModel(ctx),
         maxTurns: ORCHESTRATOR_MAX_TURNS,
         abortController: abort
       });
@@ -1653,7 +1677,7 @@ async function processChatJob(ctx, dir, config, abort) {
     prompt: buildPrompt(ctx),
     permissionMode: ctx.permissionMode,
     abortController: abort,
-    ...orchestrating ? { model: ORCHESTRATOR_MODEL, maxTurns: ORCHESTRATOR_MAX_TURNS } : {}
+    ...orchestrating ? { model: orchestratorModel(ctx), maxTurns: ORCHESTRATOR_MAX_TURNS } : {}
   });
   const summary = result.text.trim();
   let reply = summary || "(the agent produced no summary)";
@@ -1719,7 +1743,7 @@ async function processImplementJob(ctx, dir, resumed, config, abort) {
       cwd: dir,
       prompt: buildPrompt(ctx),
       permissionMode: ctx.permissionMode,
-      model: ORCHESTRATOR_MODEL,
+      model: orchestratorModel(ctx),
       maxTurns: ORCHESTRATOR_MAX_TURNS,
       abortController: abort
     });
@@ -1798,7 +1822,7 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
     cwd: dir,
     prompt: buildRevisePrompt(ctx),
     permissionMode: ctx.permissionMode,
-    model: ORCHESTRATOR_MODEL,
+    model: orchestratorModel(ctx),
     maxTurns: ORCHESTRATOR_MAX_TURNS,
     abortController: abort
   });
@@ -1888,7 +1912,7 @@ async function processReleaseJob(ctx, dir, resumed, config, abort) {
     cwd: dir,
     prompt: buildReleasePrompt(ctx, baseChecks),
     permissionMode: ctx.permissionMode,
-    model: ORCHESTRATOR_MODEL,
+    model: orchestratorModel(ctx),
     maxTurns: ORCHESTRATOR_MAX_TURNS,
     abortController: abort
   });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@flumecode/runner",
-  "version": "0.20.0",
+  "version": "0.21.0-beta.1",
   "type": "module",
   "description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
   "bin": {

package/skills-plugin/skills/implement-plan/SKILL.md CHANGED Viewed

@@ -12,7 +12,7 @@ description: >-
 # implement-plan
-You are the **orchestrator**. You run on a medium model and your job is to
+You are the **orchestrator**. You run on the `think` model and your job is to
 _coordinate_, not to write the implementation. You delegate each phase to a
 subagent through the **Task** tool, choosing the model that phase needs, and you
 stitch their results into one report. Doing the implementation yourself defeats
@@ -30,10 +30,9 @@ put it in the prompt, the subagent doesn't have it.
 ## How you delegate
 - Spawn each phase with the **Task** tool, `subagent_type: "general-purpose"`.
-- **Model per phase** (pass it as the Task `model` argument):
-  - `"sonnet"` — implementation, fixes, and the Verify step (mechanical
-    command-running; Verify is read-only even though it uses sonnet).
-  - `"opus"` — acceptance-criteria review, code-quality review, and the report.
+- **Model per phase** (pass the concrete id from the Model tiers block as the Task `model` argument):
+  - `execute` — implementation, fixes, and the Verify step (mechanical command-running; Verify is read-only).
+  - `think` — acceptance-criteria review, code-quality review, and the report.
 - **Read-only phases.** Tell every review, Verify, and report subagent to _inspect
   and report only — never edit, create, or delete files_. Only implementation/fix
   subagents may change the working tree.
@@ -68,7 +67,7 @@ the next step.
    it in the prompts you write for the Implement, Verify, and Fix-loop subagents
    so none of them re-derive it. Do not implement.
-2. **Implement** — Task, `model: "sonnet"`. Give the subagent: the plan steps, a
+2. **Implement** — Task, `model: execute`. Give the subagent: the plan steps, a
    pointer to the wiki/orientation, the coding guidelines (verbatim), and the
    explicit verification command list the orchestrator discovered in the Orient
    step. Tell it to make all the code changes in the working tree to satisfy the
@@ -83,7 +82,7 @@ the next step.
    the verification commands it ran and their pass/fail results, which files it
    changed, and how each plan step was addressed. It must not commit or push.
-3. **Verify (build & tests)** — Task, `model: "sonnet"`, read-only. This step
+3. **Verify (build & tests)** — Task, `model: execute`, read-only. This step
    gives the orchestrator an objective, independent build/test signal before the
    subjective AC and quality reviews. Tell the subagent to:
    - Run the verification commands provided by the orchestrator in the task
@@ -100,7 +99,7 @@ the next step.
      excerpt (if any).
    - Must not edit, create, or delete any files.
-4. **Acceptance-criteria review** — Task, `model: "opus"`, read-only. Give the
+4. **Acceptance-criteria review** — Task, `model: think`, read-only. Give the
    subagent the full AC list and tell it to verify each one against the actual
    changes (run `git --no-pager diff`, read the changed files, run tests/build if
    useful). For **each** AC it must return: the criterion text verbatim, a verdict
@@ -117,7 +116,7 @@ the next step.
    note any files or areas that appear changed but don't map to any AC as a coverage
    gap (signalling a missing AC or an out-of-scope change).
-5. **Code-quality review** — Task, `model: "opus"`, read-only. Give the subagent
+5. **Code-quality review** — Task, `model: think`, read-only. Give the subagent
    the coding guidelines (verbatim) and tell it to review the changes for
    violations and quality problems, returning concrete findings as
    `file:line — what — why`, each marked **must-fix** or **nice-to-have**.
@@ -125,7 +124,7 @@ the next step.
 6. **Fix loop.** If the Verify step (step 3) reports any failing check, the AC
    review (step 4) reports any _not met_ AC, or the quality review (step 5)
    reports any _must-fix_ finding: spawn an **Implement/fix** subagent (Task,
-   `model: "sonnet"`) whose prompt lists exactly those findings and tells it to
+   `model: execute`) whose prompt lists exactly those findings and tells it to
    resolve them without regressing the rest. Include the verification command list
    from the Orient step in the fix subagent's prompt (the same list passed to
    Implement and Verify), so the fix subagent does not need to re-derive it. When
@@ -135,7 +134,7 @@ the next step.
    times. If something still fails after that, stop looping and record the gap
    honestly in the report — do not hide it.
-7. **Report** — Task, `model: "opus"`, read-only. Give the subagent the AC
+7. **Report** — Task, `model: think`, read-only. Give the subagent the AC
    verdicts (with criterion text, from step 4), the Verify results (from step 3),
    and the quality findings, and tell it to run `git --no-pager diff` itself as
    the **single source of truth** for the report. Pass the Verify results as the
@@ -191,7 +190,7 @@ The report subagent calls `submit_report` with these fields:
 - Delegate through Task subagents; don't implement, review, or write the report
   yourself.
-- Right model per phase: `sonnet` to implement/fix/verify (Verify is read-only), `opus` to review/report.
+- Right model per phase: `execute` to implement/fix/verify (Verify is read-only), `think` to review/report.
 - Make every Task prompt self-contained — subagents see only what you give them.
 - Reviewers and the report writer never modify files.
 - Never commit, push, or open a PR.

package/skills-plugin/skills/revise-implementation/SKILL.md CHANGED Viewed

@@ -67,9 +67,9 @@ essentials:
   implementation, not a rebuild. Change only what the user asked for plus what that
   change strictly requires; don't regress the rest of the plan.
 - **Pipeline:** Implement (self-runs build/tests & fixes its own errors, Task
-  `model: "sonnet"`) → Verify (build/tests, read-only, Task `model: "sonnet"`) →
-  acceptance/quality review (Task `model: "opus"`, read-only) → fix loop if needed
-  (≤2, re-run Verify after each fix) → report (Task `model: "opus"`, read-only).
+  `model: execute`) → Verify (build/tests, read-only, Task `model: execute`) →
+  acceptance/quality review (Task `model: think`, read-only) → fix loop if needed
+  (≤2, re-run Verify after each fix) → report (Task `model: think`, read-only).
   Detailed mechanics (command discovery, Verify step spec, fix-loop trigger
   conditions) are in `implement-plan/SKILL.md` — read it for the full pipeline.
 - **No git side effects.** Never commit, push, or open a PR — leave the changes in