@flumecode/runner 0.19.0 → 0.21.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -26,8 +26,8 @@ function writeConfig(config) {
|
|
|
26
26
|
}
|
|
27
27
|
|
|
28
28
|
// src/run.ts
|
|
29
|
-
import { existsSync as
|
|
30
|
-
import { join as
|
|
29
|
+
import { existsSync as existsSync4 } from "node:fs";
|
|
30
|
+
import { join as join5 } from "node:path";
|
|
31
31
|
|
|
32
32
|
// src/version.ts
|
|
33
33
|
import { readFileSync as readFileSync2 } from "node:fs";
|
|
@@ -203,51 +203,59 @@ var WIDGET_TOOL_NAMES = [
|
|
|
203
203
|
];
|
|
204
204
|
var optionsSchema = z.array(z.string().min(1)).min(2).max(8).describe("2\u20138 short, distinct choices for the user to pick from. " + WIDGET_LANGUAGE_HINT);
|
|
205
205
|
var TAIL = "Do NOT add an 'Other' or 'None of these' catch-all \u2014 the UI always offers an 'Other' free-text option automatically. " + WIDGET_LANGUAGE_HINT + " After calling this, END YOUR TURN and wait: the user's answer arrives as their next message and starts a fresh run.";
|
|
206
|
+
var singleSelectShape = {
|
|
207
|
+
question: z.string().min(1).describe("The question to ask the user. " + WIDGET_LANGUAGE_HINT),
|
|
208
|
+
body: z.string().optional().describe(
|
|
209
|
+
"Optional markdown shown above the question so the user can read the context they're confirming (e.g. the drafted release notes). Omit for plain questions."
|
|
210
|
+
),
|
|
211
|
+
options: optionsSchema
|
|
212
|
+
};
|
|
213
|
+
var multiSelectShape = {
|
|
214
|
+
question: z.string().min(1).describe("The question to ask the user. " + WIDGET_LANGUAGE_HINT),
|
|
215
|
+
body: z.string().optional().describe(
|
|
216
|
+
"Optional markdown shown above the question so the user can read the context they're confirming (e.g. the drafted release notes). Omit for plain questions."
|
|
217
|
+
),
|
|
218
|
+
options: optionsSchema
|
|
219
|
+
};
|
|
220
|
+
function buildSingleSelectWidget(args) {
|
|
221
|
+
return {
|
|
222
|
+
id: randomUUID(),
|
|
223
|
+
type: "single_select",
|
|
224
|
+
question: args.question,
|
|
225
|
+
body: args.body,
|
|
226
|
+
options: args.options.map((label) => ({ id: randomUUID(), label })),
|
|
227
|
+
selectedOptionId: null,
|
|
228
|
+
customAnswer: null
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
function buildMultiSelectWidget(args) {
|
|
232
|
+
return {
|
|
233
|
+
id: randomUUID(),
|
|
234
|
+
type: "multi_select",
|
|
235
|
+
question: args.question,
|
|
236
|
+
body: args.body,
|
|
237
|
+
options: args.options.map((label) => ({ id: randomUUID(), label })),
|
|
238
|
+
selectedOptionIds: null,
|
|
239
|
+
customAnswer: null
|
|
240
|
+
};
|
|
241
|
+
}
|
|
206
242
|
function createWidgetTooling() {
|
|
207
243
|
const collected = [];
|
|
208
244
|
const singleSelect = tool(
|
|
209
245
|
SINGLE_SELECT,
|
|
210
246
|
"Ask the user a single-select (radio-button) question \u2014 exactly one answer. Use this for a genuine either/or choice (competing approaches, scope decisions, yes/no) instead of writing the options as prose. " + TAIL,
|
|
211
|
-
|
|
212
|
-
question: z.string().min(1).describe("The question to ask the user. " + WIDGET_LANGUAGE_HINT),
|
|
213
|
-
body: z.string().optional().describe(
|
|
214
|
-
"Optional markdown shown above the question so the user can read the context they're confirming (e.g. the drafted release notes). Omit for plain questions."
|
|
215
|
-
),
|
|
216
|
-
options: optionsSchema
|
|
217
|
-
},
|
|
247
|
+
singleSelectShape,
|
|
218
248
|
async (args) => {
|
|
219
|
-
collected.push(
|
|
220
|
-
id: randomUUID(),
|
|
221
|
-
type: "single_select",
|
|
222
|
-
question: args.question,
|
|
223
|
-
body: args.body,
|
|
224
|
-
options: args.options.map((label) => ({ id: randomUUID(), label })),
|
|
225
|
-
selectedOptionId: null,
|
|
226
|
-
customAnswer: null
|
|
227
|
-
});
|
|
249
|
+
collected.push(buildSingleSelectWidget(args));
|
|
228
250
|
return widgetPosted("single-select");
|
|
229
251
|
}
|
|
230
252
|
);
|
|
231
253
|
const multiSelect = tool(
|
|
232
254
|
MULTI_SELECT,
|
|
233
255
|
"Ask the user a multi-select (checkbox) question \u2014 they may pick any number of options, including none of the presets if they use 'Other'. Use this for 'select all that apply' questions (which features to include, which files to touch). " + TAIL,
|
|
234
|
-
|
|
235
|
-
question: z.string().min(1).describe("The question to ask the user. " + WIDGET_LANGUAGE_HINT),
|
|
236
|
-
body: z.string().optional().describe(
|
|
237
|
-
"Optional markdown shown above the question so the user can read the context they're confirming (e.g. the drafted release notes). Omit for plain questions."
|
|
238
|
-
),
|
|
239
|
-
options: optionsSchema
|
|
240
|
-
},
|
|
256
|
+
multiSelectShape,
|
|
241
257
|
async (args) => {
|
|
242
|
-
collected.push(
|
|
243
|
-
id: randomUUID(),
|
|
244
|
-
type: "multi_select",
|
|
245
|
-
question: args.question,
|
|
246
|
-
body: args.body,
|
|
247
|
-
options: args.options.map((label) => ({ id: randomUUID(), label })),
|
|
248
|
-
selectedOptionIds: null,
|
|
249
|
-
customAnswer: null
|
|
250
|
-
});
|
|
258
|
+
collected.push(buildMultiSelectWidget(args));
|
|
251
259
|
return widgetPosted("multi-select");
|
|
252
260
|
}
|
|
253
261
|
);
|
|
@@ -321,6 +329,9 @@ var planInputSchema = {
|
|
|
321
329
|
rootCause: z2.string().optional().describe(
|
|
322
330
|
'For bug fixes (scope === "fix"): the underlying cause of the bug \u2014 the specific code, logic, or condition that produces the incorrect behavior, not just the symptom. Required when scope is "fix"; omit for all other scopes. ' + INLINE_CODE_HINT
|
|
323
331
|
),
|
|
332
|
+
motivation: z2.string().optional().describe(
|
|
333
|
+
"Why the user is making this request \u2014 the underlying motivation or problem the change addresses. Fill this especially when the request content/context does NOT already state the why (ask the user in the Clarify phase); omit when there is no additional motivation to record. Useful for future understanding of the system. " + INLINE_CODE_HINT
|
|
334
|
+
),
|
|
324
335
|
assumptions: z2.array(z2.string()).describe("Anything decided during planning, including unanswered defaults."),
|
|
325
336
|
requirements: z2.array(z2.string().min(1)).min(1).describe(
|
|
326
337
|
"Required, human-readable statements of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. At least 1 required. " + INLINE_CODE_HINT
|
|
@@ -351,6 +362,11 @@ function renderPlan(plan) {
|
|
|
351
362
|
lines2.push(`**Scope** \u2014 \`${plan.scope}\``);
|
|
352
363
|
lines2.push("");
|
|
353
364
|
lines2.push(`**Goal** \u2014 ${plan.goal}`);
|
|
365
|
+
if (plan.motivation && plan.motivation.trim().length > 0) {
|
|
366
|
+
lines2.push("");
|
|
367
|
+
lines2.push("## Motivation");
|
|
368
|
+
lines2.push(plan.motivation);
|
|
369
|
+
}
|
|
354
370
|
if (plan.assumptions.length > 0) {
|
|
355
371
|
lines2.push("");
|
|
356
372
|
lines2.push("**Assumptions**");
|
|
@@ -426,7 +442,7 @@ function createPlanTooling() {
|
|
|
426
442
|
let renderedPlans = null;
|
|
427
443
|
const submitPlan = tool2(
|
|
428
444
|
SUBMIT_PLAN,
|
|
429
|
-
`Submit ALL your plans in a single call \u2014 one entry per plan; each becomes its own independently-acceptable Accept-as-plan draft. Do NOT call submit_plan more than once. acceptanceCriteria is required in each plan and must contain at least 2 observable, verifiable conditions. The 'title' field names each specific plan \u2014 make it concise and distinct from the request title and from sibling plan titles. requirements is required in each plan: at least 1 plain-language statement of what the change must accomplish and why (human-readable intent), separate from the machine-checkable acceptanceCriteria. When a plan's scope is "fix", rootCause is required: a non-empty explanation of the underlying cause of the bug (not just the symptom). `,
|
|
445
|
+
`Submit ALL your plans in a single call \u2014 one entry per plan; each becomes its own independently-acceptable Accept-as-plan draft. Do NOT call submit_plan more than once. acceptanceCriteria is required in each plan and must contain at least 2 observable, verifiable conditions. The 'title' field names each specific plan \u2014 make it concise and distinct from the request title and from sibling plan titles. requirements is required in each plan: at least 1 plain-language statement of what the change must accomplish and why (human-readable intent), separate from the machine-checkable acceptanceCriteria. When a plan's scope is "fix", rootCause is required: a non-empty explanation of the underlying cause of the bug (not just the symptom). motivation is optional: the user's stated or asked-for reason for the request. `,
|
|
430
446
|
submitPlanInputSchema,
|
|
431
447
|
async (args) => {
|
|
432
448
|
const parsed = submitPlanSchema.parse(args);
|
|
@@ -709,6 +725,87 @@ async function runClaudeCode(opts) {
|
|
|
709
725
|
return { text: finalText, widgets: collected, plans: getPlans(), report: getReport() };
|
|
710
726
|
}
|
|
711
727
|
|
|
728
|
+
// src/codex.ts
|
|
729
|
+
import { spawn } from "node:child_process";
|
|
730
|
+
import { mkdtempSync, readFileSync as readFileSync3, writeFileSync as writeFileSync2, existsSync as existsSync2 } from "node:fs";
|
|
731
|
+
import { join as join2 } from "node:path";
|
|
732
|
+
import { tmpdir } from "node:os";
|
|
733
|
+
import { fileURLToPath as fileURLToPath3 } from "node:url";
|
|
734
|
+
var MCP_ENTRY = fileURLToPath3(new URL("./mcp-stdio.js", import.meta.url));
|
|
735
|
+
async function runCodex(opts) {
|
|
736
|
+
const tmpDir = mkdtempSync(join2(tmpdir(), "flume-codex-"));
|
|
737
|
+
const outFile = join2(tmpDir, "flume-mcp.jsonl");
|
|
738
|
+
writeFileSync2(outFile, "");
|
|
739
|
+
const child = spawn(
|
|
740
|
+
"codex",
|
|
741
|
+
[
|
|
742
|
+
"exec",
|
|
743
|
+
"--cwd",
|
|
744
|
+
opts.cwd,
|
|
745
|
+
"-c",
|
|
746
|
+
`mcp_servers.flume.command=node`,
|
|
747
|
+
"-c",
|
|
748
|
+
`mcp_servers.flume.args=${JSON.stringify([MCP_ENTRY])}`,
|
|
749
|
+
opts.prompt
|
|
750
|
+
],
|
|
751
|
+
{
|
|
752
|
+
cwd: opts.cwd,
|
|
753
|
+
env: { ...process.env, FLUME_MCP_OUTPUT: outFile },
|
|
754
|
+
// stdin is inherited (codex may read from it), stdout/stderr are piped for streaming
|
|
755
|
+
stdio: ["inherit", "pipe", "pipe"]
|
|
756
|
+
}
|
|
757
|
+
);
|
|
758
|
+
child.stdout?.on("data", (chunk) => {
|
|
759
|
+
const text = chunk.toString();
|
|
760
|
+
process.stdout.write(text);
|
|
761
|
+
logEvent("agent", text);
|
|
762
|
+
});
|
|
763
|
+
child.stderr?.on("data", (chunk) => {
|
|
764
|
+
const text = chunk.toString();
|
|
765
|
+
process.stderr.write(text);
|
|
766
|
+
logEvent("agent", text);
|
|
767
|
+
});
|
|
768
|
+
const onAbort = () => {
|
|
769
|
+
child.kill("SIGTERM");
|
|
770
|
+
};
|
|
771
|
+
opts.abortController?.signal.addEventListener("abort", onAbort, { once: true });
|
|
772
|
+
await new Promise((resolve, reject) => {
|
|
773
|
+
child.on("error", (err) => {
|
|
774
|
+
if (err.code === "ENOENT") {
|
|
775
|
+
reject(
|
|
776
|
+
new Error(
|
|
777
|
+
"codex CLI not found. Install it with `npm install -g @openai/codex` and log in before running plan-mode jobs with an openai agent."
|
|
778
|
+
)
|
|
779
|
+
);
|
|
780
|
+
} else {
|
|
781
|
+
reject(err);
|
|
782
|
+
}
|
|
783
|
+
});
|
|
784
|
+
child.on("close", () => resolve());
|
|
785
|
+
});
|
|
786
|
+
opts.abortController?.signal.removeEventListener("abort", onAbort);
|
|
787
|
+
if (opts.abortController?.signal.aborted) {
|
|
788
|
+
throw new Error("Run canceled by user");
|
|
789
|
+
}
|
|
790
|
+
const raw = existsSync2(outFile) ? readFileSync3(outFile, "utf8") : "";
|
|
791
|
+
const records = raw.split("\n").filter(Boolean).map(parseJsonLine).filter((r) => r !== null);
|
|
792
|
+
const plans = records.filter((r) => r.kind === "plans").flatMap((r) => r.plans ?? []);
|
|
793
|
+
const widgets = records.filter((r) => r.kind === "widget").map((r) => r.widget);
|
|
794
|
+
return {
|
|
795
|
+
text: "",
|
|
796
|
+
widgets,
|
|
797
|
+
plans: plans.length > 0 ? plans : null,
|
|
798
|
+
report: null
|
|
799
|
+
};
|
|
800
|
+
}
|
|
801
|
+
function parseJsonLine(line) {
|
|
802
|
+
try {
|
|
803
|
+
return JSON.parse(line);
|
|
804
|
+
} catch {
|
|
805
|
+
return null;
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
|
|
712
809
|
// src/health.ts
|
|
713
810
|
import { query as query2 } from "@anthropic-ai/claude-agent-sdk";
|
|
714
811
|
var PROBE_TIMEOUT_MS = 6e4;
|
|
@@ -757,19 +854,45 @@ function errorMessage(err) {
|
|
|
757
854
|
return err instanceof Error ? err.message : String(err);
|
|
758
855
|
}
|
|
759
856
|
|
|
857
|
+
// src/models.ts
|
|
858
|
+
var MODEL_ROSTER = {
|
|
859
|
+
anthropic: { think: "opus", execute: "sonnet" },
|
|
860
|
+
openai: { think: "gpt-5.5", execute: "gpt-5.4-mini" }
|
|
861
|
+
// TODO: confirm vs `codex`
|
|
862
|
+
};
|
|
863
|
+
function rosterFor(provider) {
|
|
864
|
+
return MODEL_ROSTER[provider ?? "anthropic"] ?? MODEL_ROSTER.anthropic;
|
|
865
|
+
}
|
|
866
|
+
function renderRosterBlock(provider) {
|
|
867
|
+
const r = rosterFor(provider);
|
|
868
|
+
return [
|
|
869
|
+
"# Model tiers",
|
|
870
|
+
"When a skill tells you to run a subagent on a named model tier, pass the matching",
|
|
871
|
+
"concrete model id as the Task `model` argument. Use the id exactly; never pass the",
|
|
872
|
+
"tier name itself.",
|
|
873
|
+
`- \`think\` -> \`${r.think}\` (planning, review, high-level reasoning)`,
|
|
874
|
+
`- \`execute\` -> \`${r.execute}\` (writing code from the plan; fast & cheap)`
|
|
875
|
+
].join("\n");
|
|
876
|
+
}
|
|
877
|
+
|
|
760
878
|
// src/rules.ts
|
|
761
|
-
import { readFileSync as
|
|
762
|
-
import { join as
|
|
763
|
-
import { fileURLToPath as
|
|
764
|
-
var RULES_DIR =
|
|
879
|
+
import { readFileSync as readFileSync4 } from "node:fs";
|
|
880
|
+
import { join as join3 } from "node:path";
|
|
881
|
+
import { fileURLToPath as fileURLToPath4 } from "node:url";
|
|
882
|
+
var RULES_DIR = fileURLToPath4(new URL("../skills-plugin/rules", import.meta.url));
|
|
765
883
|
function loadRule(name) {
|
|
766
|
-
const raw =
|
|
884
|
+
const raw = readFileSync4(join3(RULES_DIR, `${name}.md`), "utf8");
|
|
767
885
|
return stripFrontMatter(raw).trim();
|
|
768
886
|
}
|
|
769
887
|
function stripFrontMatter(raw) {
|
|
770
888
|
const match = raw.match(/^---\n.*?\n---\n/s);
|
|
771
889
|
return match ? raw.slice(match[0].length) : raw;
|
|
772
890
|
}
|
|
891
|
+
var SKILLS_DIR = fileURLToPath4(new URL("../skills-plugin/skills", import.meta.url));
|
|
892
|
+
function loadSkill(name) {
|
|
893
|
+
const raw = readFileSync4(join3(SKILLS_DIR, name, "SKILL.md"), "utf8");
|
|
894
|
+
return stripFrontMatter(raw).trim();
|
|
895
|
+
}
|
|
773
896
|
|
|
774
897
|
// src/prompt.ts
|
|
775
898
|
function appendRule(lines2, intro, ruleName) {
|
|
@@ -804,6 +927,7 @@ function buildPrompt(ctx) {
|
|
|
804
927
|
LANGUAGE_DIRECTIVE
|
|
805
928
|
];
|
|
806
929
|
if (ctx.permissionMode !== "plan") {
|
|
930
|
+
lines2.push("", renderRosterBlock(ctx.provider));
|
|
807
931
|
lines2.push(
|
|
808
932
|
"",
|
|
809
933
|
"These coding guidelines apply to all code produced in this run:",
|
|
@@ -835,6 +959,8 @@ function buildRevisePrompt(ctx) {
|
|
|
835
959
|
widgets,
|
|
836
960
|
LANGUAGE_DIRECTIVE,
|
|
837
961
|
"",
|
|
962
|
+
renderRosterBlock(ctx.provider),
|
|
963
|
+
"",
|
|
838
964
|
"These coding guidelines apply to all code produced in this run:",
|
|
839
965
|
"",
|
|
840
966
|
loadRule("coding-guideline"),
|
|
@@ -992,6 +1118,12 @@ function buildReleasePrompt(ctx, baseChecks) {
|
|
|
992
1118
|
);
|
|
993
1119
|
return lines2.join("\n");
|
|
994
1120
|
}
|
|
1121
|
+
function buildCodexPrompt(ctx) {
|
|
1122
|
+
const skill = loadSkill("request-to-plan");
|
|
1123
|
+
return ["# request-to-plan skill (follow these instructions)", skill, "", buildPrompt(ctx)].join(
|
|
1124
|
+
"\n"
|
|
1125
|
+
);
|
|
1126
|
+
}
|
|
995
1127
|
function buildInitPrompt(ctx) {
|
|
996
1128
|
return [
|
|
997
1129
|
`You are "${ctx.agentName}" initializing FlumeCode for the repository ${ctx.repo.fullName}, checked out in your current working directory.`,
|
|
@@ -1008,10 +1140,10 @@ function jobTitle(ctx) {
|
|
|
1008
1140
|
|
|
1009
1141
|
// src/workspace.ts
|
|
1010
1142
|
import { execFile } from "node:child_process";
|
|
1011
|
-
import { existsSync as
|
|
1143
|
+
import { existsSync as existsSync3 } from "node:fs";
|
|
1012
1144
|
import { mkdtemp, readdir, rm } from "node:fs/promises";
|
|
1013
|
-
import { tmpdir } from "node:os";
|
|
1014
|
-
import { join as
|
|
1145
|
+
import { tmpdir as tmpdir2 } from "node:os";
|
|
1146
|
+
import { join as join4 } from "node:path";
|
|
1015
1147
|
import { promisify } from "node:util";
|
|
1016
1148
|
var exec = promisify(execFile);
|
|
1017
1149
|
var WORKSPACE_PREFIX = "flume-runner-";
|
|
@@ -1037,11 +1169,11 @@ function cloneUrl(ctx) {
|
|
|
1037
1169
|
return `https://x-access-token:${cloneToken}@github.com/${owner}/${name}.git`;
|
|
1038
1170
|
}
|
|
1039
1171
|
function detectPackageManager(dir) {
|
|
1040
|
-
if (!
|
|
1041
|
-
if (
|
|
1042
|
-
if (
|
|
1043
|
-
if (
|
|
1044
|
-
if (
|
|
1172
|
+
if (!existsSync3(join4(dir, "package.json"))) return null;
|
|
1173
|
+
if (existsSync3(join4(dir, "pnpm-lock.yaml"))) return "pnpm";
|
|
1174
|
+
if (existsSync3(join4(dir, "yarn.lock"))) return "yarn";
|
|
1175
|
+
if (existsSync3(join4(dir, "package-lock.json"))) return "npm";
|
|
1176
|
+
if (existsSync3(join4(dir, "bun.lockb"))) return "bun";
|
|
1045
1177
|
return "npm";
|
|
1046
1178
|
}
|
|
1047
1179
|
async function installDependencies(dir) {
|
|
@@ -1067,13 +1199,13 @@ async function installDependencies(dir) {
|
|
|
1067
1199
|
}
|
|
1068
1200
|
}
|
|
1069
1201
|
async function makeWorkspace() {
|
|
1070
|
-
return mkdtemp(
|
|
1202
|
+
return mkdtemp(join4(tmpdir2(), WORKSPACE_PREFIX));
|
|
1071
1203
|
}
|
|
1072
1204
|
var MAX_WORKSPACES = 8;
|
|
1073
1205
|
var workspaceRegistry = /* @__PURE__ */ new Map();
|
|
1074
1206
|
async function acquireWorkspace(key) {
|
|
1075
1207
|
const existing = workspaceRegistry.get(key);
|
|
1076
|
-
if (existing !== void 0 &&
|
|
1208
|
+
if (existing !== void 0 && existsSync3(existing)) {
|
|
1077
1209
|
workspaceRegistry.delete(key);
|
|
1078
1210
|
workspaceRegistry.set(key, existing);
|
|
1079
1211
|
return { dir: existing, reused: true };
|
|
@@ -1125,7 +1257,7 @@ async function prepareResumingBranch(ctx, dir, reused) {
|
|
|
1125
1257
|
return { resumed: true };
|
|
1126
1258
|
}
|
|
1127
1259
|
async function sweepWorkspaces() {
|
|
1128
|
-
const base =
|
|
1260
|
+
const base = tmpdir2();
|
|
1129
1261
|
let entries;
|
|
1130
1262
|
try {
|
|
1131
1263
|
entries = await readdir(base);
|
|
@@ -1136,7 +1268,7 @@ async function sweepWorkspaces() {
|
|
|
1136
1268
|
for (const entry of entries) {
|
|
1137
1269
|
if (!entry.startsWith(WORKSPACE_PREFIX)) continue;
|
|
1138
1270
|
try {
|
|
1139
|
-
await rm(
|
|
1271
|
+
await rm(join4(base, entry), { recursive: true, force: true });
|
|
1140
1272
|
removed++;
|
|
1141
1273
|
} catch {
|
|
1142
1274
|
}
|
|
@@ -1370,7 +1502,7 @@ async function prNumbersForCommit(ctx, sha) {
|
|
|
1370
1502
|
// src/run.ts
|
|
1371
1503
|
var IDLE_MS = 5e3;
|
|
1372
1504
|
var CANCEL_POLL_MS = 2500;
|
|
1373
|
-
var
|
|
1505
|
+
var orchestratorModel = (ctx) => rosterFor(ctx.provider).think;
|
|
1374
1506
|
var ORCHESTRATOR_MAX_TURNS = 80;
|
|
1375
1507
|
var MAX_COMMIT_REPAIRS = 2;
|
|
1376
1508
|
var MAX_IMPLEMENT_RETRIES = 1;
|
|
@@ -1416,7 +1548,7 @@ async function mergeAndResolveConflicts(ctx, dir, config, abort) {
|
|
|
1416
1548
|
cwd: dir,
|
|
1417
1549
|
prompt: buildResolvePrompt(ctx, related),
|
|
1418
1550
|
permissionMode: ctx.permissionMode,
|
|
1419
|
-
model:
|
|
1551
|
+
model: orchestratorModel(ctx),
|
|
1420
1552
|
maxTurns: ORCHESTRATOR_MAX_TURNS,
|
|
1421
1553
|
abortController: abort
|
|
1422
1554
|
});
|
|
@@ -1441,7 +1573,7 @@ async function commitWithRepair(ctx, dir, abort) {
|
|
|
1441
1573
|
cwd: dir,
|
|
1442
1574
|
prompt: buildRepairPrompt(ctx, err.log),
|
|
1443
1575
|
permissionMode: ctx.permissionMode,
|
|
1444
|
-
model:
|
|
1576
|
+
model: orchestratorModel(ctx),
|
|
1445
1577
|
maxTurns: ORCHESTRATOR_MAX_TURNS,
|
|
1446
1578
|
abortController: abort
|
|
1447
1579
|
});
|
|
@@ -1533,13 +1665,19 @@ async function processChatJob(ctx, dir, config, abort) {
|
|
|
1533
1665
|
console.log(`
|
|
1534
1666
|
\u25B6 Job ${ctx.jobId} \u2014 ${ctx.repo.fullName}: "${jobTitle(ctx)}"`);
|
|
1535
1667
|
const orchestrating = ctx.permissionMode !== "plan";
|
|
1668
|
+
const provider = ctx.provider ?? "anthropic";
|
|
1669
|
+
if (provider === "openai" && orchestrating) {
|
|
1670
|
+
throw new Error(
|
|
1671
|
+
"Codex backend currently supports plan mode only. implement/revise/resolve/release jobs with an openai agent are not yet supported."
|
|
1672
|
+
);
|
|
1673
|
+
}
|
|
1536
1674
|
const installResult = orchestrating ? await installDependencies(dir) : null;
|
|
1537
|
-
const result = await runClaudeCode({
|
|
1675
|
+
const result = provider === "openai" ? await runCodex({ cwd: dir, prompt: buildCodexPrompt(ctx), abortController: abort }) : await runClaudeCode({
|
|
1538
1676
|
cwd: dir,
|
|
1539
1677
|
prompt: buildPrompt(ctx),
|
|
1540
1678
|
permissionMode: ctx.permissionMode,
|
|
1541
1679
|
abortController: abort,
|
|
1542
|
-
...orchestrating ? { model:
|
|
1680
|
+
...orchestrating ? { model: orchestratorModel(ctx), maxTurns: ORCHESTRATOR_MAX_TURNS } : {}
|
|
1543
1681
|
});
|
|
1544
1682
|
const summary = result.text.trim();
|
|
1545
1683
|
let reply = summary || "(the agent produced no summary)";
|
|
@@ -1555,7 +1693,7 @@ async function processChatJob(ctx, dir, config, abort) {
|
|
|
1555
1693
|
console.log(` \u2026job ${ctx.jobId} posted ${result.widgets.length} widget(s); awaiting reply`);
|
|
1556
1694
|
return { text: reply, widgets: result.widgets };
|
|
1557
1695
|
}
|
|
1558
|
-
const wikiExists =
|
|
1696
|
+
const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
|
|
1559
1697
|
let documented = false;
|
|
1560
1698
|
if (ctx.permissionMode !== "plan" && wikiExists && await hasChanges(dir)) {
|
|
1561
1699
|
try {
|
|
@@ -1605,7 +1743,7 @@ async function processImplementJob(ctx, dir, resumed, config, abort) {
|
|
|
1605
1743
|
cwd: dir,
|
|
1606
1744
|
prompt: buildPrompt(ctx),
|
|
1607
1745
|
permissionMode: ctx.permissionMode,
|
|
1608
|
-
model:
|
|
1746
|
+
model: orchestratorModel(ctx),
|
|
1609
1747
|
maxTurns: ORCHESTRATOR_MAX_TURNS,
|
|
1610
1748
|
abortController: abort
|
|
1611
1749
|
});
|
|
@@ -1644,7 +1782,7 @@ ${reply}`;
|
|
|
1644
1782
|
|
|
1645
1783
|
> \u26A0\uFE0F Dependencies failed to install (\`${installResult.manager}\`); tests may not have run.`;
|
|
1646
1784
|
}
|
|
1647
|
-
const wikiExists =
|
|
1785
|
+
const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
|
|
1648
1786
|
let documented = false;
|
|
1649
1787
|
if (wikiExists && await hasChanges(dir)) {
|
|
1650
1788
|
try {
|
|
@@ -1684,7 +1822,7 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
|
|
|
1684
1822
|
cwd: dir,
|
|
1685
1823
|
prompt: buildRevisePrompt(ctx),
|
|
1686
1824
|
permissionMode: ctx.permissionMode,
|
|
1687
|
-
model:
|
|
1825
|
+
model: orchestratorModel(ctx),
|
|
1688
1826
|
maxTurns: ORCHESTRATOR_MAX_TURNS,
|
|
1689
1827
|
abortController: abort
|
|
1690
1828
|
});
|
|
@@ -1700,7 +1838,7 @@ async function processReviseJob(ctx, dir, resumed, config, abort) {
|
|
|
1700
1838
|
console.log(` \u2026revise ${ctx.jobId} posted ${result.widgets.length} widget(s); awaiting reply`);
|
|
1701
1839
|
return { text: reply, widgets: result.widgets };
|
|
1702
1840
|
}
|
|
1703
|
-
const wikiExists =
|
|
1841
|
+
const wikiExists = existsSync4(join5(dir, ".flumecode", "wiki"));
|
|
1704
1842
|
let documented = false;
|
|
1705
1843
|
if (wikiExists && await hasChanges(dir)) {
|
|
1706
1844
|
try {
|
|
@@ -1774,7 +1912,7 @@ async function processReleaseJob(ctx, dir, resumed, config, abort) {
|
|
|
1774
1912
|
cwd: dir,
|
|
1775
1913
|
prompt: buildReleasePrompt(ctx, baseChecks),
|
|
1776
1914
|
permissionMode: ctx.permissionMode,
|
|
1777
|
-
model:
|
|
1915
|
+
model: orchestratorModel(ctx),
|
|
1778
1916
|
maxTurns: ORCHESTRATOR_MAX_TURNS,
|
|
1779
1917
|
abortController: abort
|
|
1780
1918
|
});
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
#!/usr/bin/env node
|
|
3
|
+
|
|
4
|
+
// src/mcp-stdio.ts
|
|
5
|
+
import { appendFileSync, writeFileSync, existsSync } from "node:fs";
|
|
6
|
+
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
7
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
8
|
+
import { ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprotocol/sdk/types.js";
|
|
9
|
+
import { toJSONSchema } from "zod/v4/core";
|
|
10
|
+
import { z as z3 } from "zod";
|
|
11
|
+
|
|
12
|
+
// src/plan.ts
|
|
13
|
+
import { createSdkMcpServer, tool } from "@anthropic-ai/claude-agent-sdk";
|
|
14
|
+
import { z } from "zod";
|
|
15
|
+
|
|
16
|
+
// src/code-lang.ts
|
|
17
|
+
var EXT_TO_LANG = {
|
|
18
|
+
ts: "typescript",
|
|
19
|
+
tsx: "tsx",
|
|
20
|
+
js: "javascript",
|
|
21
|
+
jsx: "jsx",
|
|
22
|
+
json: "json",
|
|
23
|
+
css: "css",
|
|
24
|
+
md: "markdown",
|
|
25
|
+
sh: "bash",
|
|
26
|
+
py: "python",
|
|
27
|
+
yaml: "yaml",
|
|
28
|
+
yml: "yaml",
|
|
29
|
+
html: "markup",
|
|
30
|
+
xml: "markup",
|
|
31
|
+
sql: "sql"
|
|
32
|
+
};
|
|
33
|
+
function langFromPath(path) {
|
|
34
|
+
const ext = path.split(".").pop()?.toLowerCase();
|
|
35
|
+
return ext ? EXT_TO_LANG[ext] : void 0;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// src/schema-hints.ts
|
|
39
|
+
var INLINE_CODE_HINT = "Wrap code identifiers (function, variable, type, and file names, commands, and flags) in inline backticks, e.g. `getCodingSessionsForRequest`.";
|
|
40
|
+
var WIDGET_LANGUAGE_HINT = "Write this in the same natural language as the incoming thread (the request body and the user's messages). If the thread is in English, keep it in English; do not switch languages. Keep code identifiers, file paths, and quoted code verbatim.";
|
|
41
|
+
|
|
42
|
+
// src/plan.ts
|
|
43
|
+
var SERVER_NAME = "flume_plan";
|
|
44
|
+
var SUBMIT_PLAN = "submit_plan";
|
|
45
|
+
var PLAN_TOOL_NAME = `mcp__${SERVER_NAME}__${SUBMIT_PLAN}`;
|
|
46
|
+
var PLAN_MARKER = "<!-- flumecode:end-of-plan -->";
|
|
47
|
+
var pseudoCodeEntrySchema = z.object({
|
|
48
|
+
file: z.string().min(1),
|
|
49
|
+
pseudoCode: z.string().min(1)
|
|
50
|
+
});
|
|
51
|
+
var stepSchema = z.object({
|
|
52
|
+
title: z.string().min(1).describe("A concise imperative title for this step."),
|
|
53
|
+
description: z.array(z.string().min(1)).min(1).describe(
|
|
54
|
+
"Bullet points that explain this step's change so a reviewer can judge whether the design is correct. Each array item is one short, self-contained bullet \u2014 not a single paragraph, and not a restatement of the pseudo code. " + INLINE_CODE_HINT
|
|
55
|
+
),
|
|
56
|
+
pseudoCode: z.array(pseudoCodeEntrySchema).optional().describe(
|
|
57
|
+
"Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
|
|
58
|
+
)
|
|
59
|
+
});
|
|
60
|
+
var planInputSchema = {
|
|
61
|
+
title: z.string().min(1).max(120).describe(
|
|
62
|
+
"A concise, descriptive name for THIS plan. Must be distinct from the request title and from any sibling plans on the same request. Keep it under 120 characters."
|
|
63
|
+
),
|
|
64
|
+
scope: z.enum(["feat", "fix", "chore", "docs", "test", "refactor"]).describe("The primary intent of the change."),
|
|
65
|
+
goal: z.string().min(1).describe("One or two sentences stating the outcome. " + INLINE_CODE_HINT),
|
|
66
|
+
rootCause: z.string().optional().describe(
|
|
67
|
+
'For bug fixes (scope === "fix"): the underlying cause of the bug \u2014 the specific code, logic, or condition that produces the incorrect behavior, not just the symptom. Required when scope is "fix"; omit for all other scopes. ' + INLINE_CODE_HINT
|
|
68
|
+
),
|
|
69
|
+
motivation: z.string().optional().describe(
|
|
70
|
+
"Why the user is making this request \u2014 the underlying motivation or problem the change addresses. Fill this especially when the request content/context does NOT already state the why (ask the user in the Clarify phase); omit when there is no additional motivation to record. Useful for future understanding of the system. " + INLINE_CODE_HINT
|
|
71
|
+
),
|
|
72
|
+
assumptions: z.array(z.string()).describe("Anything decided during planning, including unanswered defaults."),
|
|
73
|
+
requirements: z.array(z.string().min(1)).min(1).describe(
|
|
74
|
+
"Required, human-readable statements of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. At least 1 required. " + INLINE_CODE_HINT
|
|
75
|
+
),
|
|
76
|
+
steps: z.array(stepSchema).min(1).describe("Ordered list of changes. Each step says what and why, with file references."),
|
|
77
|
+
acceptanceCriteria: z.array(z.string().min(1)).min(2).describe(
|
|
78
|
+
"Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required. " + INLINE_CODE_HINT
|
|
79
|
+
),
|
|
80
|
+
risks: z.array(z.string()).describe("Anything that could change the approach."),
|
|
81
|
+
outOfScope: z.array(z.string()).describe("What is deliberately not being done.")
|
|
82
|
+
};
|
|
83
|
+
function requireRootCauseForFix(schema) {
|
|
84
|
+
return schema.superRefine((plan, ctx) => {
|
|
85
|
+
if (plan.scope === "fix" && (plan.rootCause === void 0 || plan.rootCause.trim() === "")) {
|
|
86
|
+
ctx.addIssue({
|
|
87
|
+
code: z.ZodIssueCode.custom,
|
|
88
|
+
path: ["rootCause"],
|
|
89
|
+
message: 'rootCause is required and must be non-empty when scope is "fix".'
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
var planSchema = requireRootCauseForFix(z.object(planInputSchema));
|
|
95
|
+
function renderPlan(plan) {
|
|
96
|
+
const lines = [];
|
|
97
|
+
lines.push(`# ${plan.title}`);
|
|
98
|
+
lines.push("");
|
|
99
|
+
lines.push(`**Scope** \u2014 \`${plan.scope}\``);
|
|
100
|
+
lines.push("");
|
|
101
|
+
lines.push(`**Goal** \u2014 ${plan.goal}`);
|
|
102
|
+
if (plan.motivation && plan.motivation.trim().length > 0) {
|
|
103
|
+
lines.push("");
|
|
104
|
+
lines.push("## Motivation");
|
|
105
|
+
lines.push(plan.motivation);
|
|
106
|
+
}
|
|
107
|
+
if (plan.assumptions.length > 0) {
|
|
108
|
+
lines.push("");
|
|
109
|
+
lines.push("**Assumptions**");
|
|
110
|
+
for (const assumption of plan.assumptions) {
|
|
111
|
+
lines.push(`- ${assumption}`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
if (plan.rootCause && plan.rootCause.trim().length > 0) {
|
|
115
|
+
lines.push("");
|
|
116
|
+
lines.push("## Root cause");
|
|
117
|
+
lines.push(plan.rootCause);
|
|
118
|
+
}
|
|
119
|
+
lines.push("");
|
|
120
|
+
lines.push("## Requirements");
|
|
121
|
+
for (const requirement of plan.requirements) {
|
|
122
|
+
lines.push(`- ${requirement}`);
|
|
123
|
+
}
|
|
124
|
+
lines.push("");
|
|
125
|
+
lines.push("## Steps");
|
|
126
|
+
for (const [i, step] of plan.steps.entries()) {
|
|
127
|
+
lines.push("");
|
|
128
|
+
lines.push(`### ${i + 1}. ${step.title}`);
|
|
129
|
+
lines.push("");
|
|
130
|
+
for (const bullet of step.description) {
|
|
131
|
+
lines.push(`- ${bullet}`);
|
|
132
|
+
}
|
|
133
|
+
if (step.pseudoCode && step.pseudoCode.length > 0) {
|
|
134
|
+
for (const entry of step.pseudoCode) {
|
|
135
|
+
lines.push("");
|
|
136
|
+
lines.push(`\`${entry.file}\``);
|
|
137
|
+
lines.push("");
|
|
138
|
+
const lang = langFromPath(entry.file);
|
|
139
|
+
lines.push(lang ? "```" + lang : "```");
|
|
140
|
+
lines.push(entry.pseudoCode);
|
|
141
|
+
lines.push("```");
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
lines.push("");
|
|
146
|
+
lines.push("## Acceptance criteria");
|
|
147
|
+
for (const criterion of plan.acceptanceCriteria) {
|
|
148
|
+
lines.push(`- [ ] ${criterion}`);
|
|
149
|
+
}
|
|
150
|
+
if (plan.risks.length > 0) {
|
|
151
|
+
lines.push("");
|
|
152
|
+
lines.push("**Risks / open questions**");
|
|
153
|
+
for (const risk of plan.risks) {
|
|
154
|
+
lines.push(`- ${risk}`);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
if (plan.outOfScope.length > 0) {
|
|
158
|
+
lines.push("");
|
|
159
|
+
lines.push("**Out of scope**");
|
|
160
|
+
for (const item of plan.outOfScope) {
|
|
161
|
+
lines.push(`- ${item}`);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
lines.push("");
|
|
165
|
+
lines.push(PLAN_MARKER);
|
|
166
|
+
return lines.join("\n");
|
|
167
|
+
}
|
|
168
|
+
var submitPlanInputSchema = {
|
|
169
|
+
plans: z.array(requireRootCauseForFix(z.object(planInputSchema))).min(1).refine(
|
|
170
|
+
(arr) => {
|
|
171
|
+
const titles = arr.map((p) => p.title.trim()).filter((t) => t.length > 0);
|
|
172
|
+
return new Set(titles).size === titles.length;
|
|
173
|
+
},
|
|
174
|
+
{ message: "Each plan must have a distinct non-empty title" }
|
|
175
|
+
)
|
|
176
|
+
};
|
|
177
|
+
var submitPlanSchema = z.object(submitPlanInputSchema);
|
|
178
|
+
|
|
179
|
+
// src/widgets.ts
|
|
180
|
+
import { randomUUID } from "node:crypto";
|
|
181
|
+
import { createSdkMcpServer as createSdkMcpServer2, tool as tool2 } from "@anthropic-ai/claude-agent-sdk";
|
|
182
|
+
import { z as z2 } from "zod";
|
|
183
|
+
var SERVER_NAME2 = "flume_widgets";
|
|
184
|
+
var SINGLE_SELECT = "single_select";
|
|
185
|
+
var MULTI_SELECT = "multi_select";
|
|
186
|
+
var WIDGET_TOOL_NAMES = [
|
|
187
|
+
`mcp__${SERVER_NAME2}__${SINGLE_SELECT}`,
|
|
188
|
+
`mcp__${SERVER_NAME2}__${MULTI_SELECT}`
|
|
189
|
+
];
|
|
190
|
+
var optionsSchema = z2.array(z2.string().min(1)).min(2).max(8).describe("2\u20138 short, distinct choices for the user to pick from. " + WIDGET_LANGUAGE_HINT);
|
|
191
|
+
var TAIL = "Do NOT add an 'Other' or 'None of these' catch-all \u2014 the UI always offers an 'Other' free-text option automatically. " + WIDGET_LANGUAGE_HINT + " After calling this, END YOUR TURN and wait: the user's answer arrives as their next message and starts a fresh run.";
|
|
192
|
+
var singleSelectShape = {
|
|
193
|
+
question: z2.string().min(1).describe("The question to ask the user. " + WIDGET_LANGUAGE_HINT),
|
|
194
|
+
body: z2.string().optional().describe(
|
|
195
|
+
"Optional markdown shown above the question so the user can read the context they're confirming (e.g. the drafted release notes). Omit for plain questions."
|
|
196
|
+
),
|
|
197
|
+
options: optionsSchema
|
|
198
|
+
};
|
|
199
|
+
var multiSelectShape = {
|
|
200
|
+
question: z2.string().min(1).describe("The question to ask the user. " + WIDGET_LANGUAGE_HINT),
|
|
201
|
+
body: z2.string().optional().describe(
|
|
202
|
+
"Optional markdown shown above the question so the user can read the context they're confirming (e.g. the drafted release notes). Omit for plain questions."
|
|
203
|
+
),
|
|
204
|
+
options: optionsSchema
|
|
205
|
+
};
|
|
206
|
+
function buildSingleSelectWidget(args) {
|
|
207
|
+
return {
|
|
208
|
+
id: randomUUID(),
|
|
209
|
+
type: "single_select",
|
|
210
|
+
question: args.question,
|
|
211
|
+
body: args.body,
|
|
212
|
+
options: args.options.map((label) => ({ id: randomUUID(), label })),
|
|
213
|
+
selectedOptionId: null,
|
|
214
|
+
customAnswer: null
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
function buildMultiSelectWidget(args) {
|
|
218
|
+
return {
|
|
219
|
+
id: randomUUID(),
|
|
220
|
+
type: "multi_select",
|
|
221
|
+
question: args.question,
|
|
222
|
+
body: args.body,
|
|
223
|
+
options: args.options.map((label) => ({ id: randomUUID(), label })),
|
|
224
|
+
selectedOptionIds: null,
|
|
225
|
+
customAnswer: null
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// src/mcp-stdio.ts
|
|
230
|
+
var OUT = process.env.FLUME_MCP_OUTPUT;
|
|
231
|
+
if (!OUT) {
|
|
232
|
+
console.error("FLUME_MCP_OUTPUT env var not set");
|
|
233
|
+
process.exit(1);
|
|
234
|
+
}
|
|
235
|
+
if (!existsSync(OUT)) {
|
|
236
|
+
writeFileSync(OUT, "");
|
|
237
|
+
}
|
|
238
|
+
function emit(rec) {
|
|
239
|
+
appendFileSync(OUT, JSON.stringify(rec) + "\n");
|
|
240
|
+
}
|
|
241
|
+
function ack(text) {
|
|
242
|
+
return { content: [{ type: "text", text }] };
|
|
243
|
+
}
|
|
244
|
+
function shapeToJsonSchema(shape) {
|
|
245
|
+
return toJSONSchema(z3.object(shape));
|
|
246
|
+
}
|
|
247
|
+
var TOOLS = [
|
|
248
|
+
{
|
|
249
|
+
name: "submit_plan",
|
|
250
|
+
description: "Submit ALL your plans in a single call \u2014 one entry per plan; each becomes its own independently-acceptable plan draft. Do NOT call submit_plan more than once.",
|
|
251
|
+
inputSchema: toJSONSchema(submitPlanSchema)
|
|
252
|
+
},
|
|
253
|
+
{
|
|
254
|
+
name: "single_select",
|
|
255
|
+
description: "Ask the user a single-select (radio-button) question \u2014 exactly one answer.",
|
|
256
|
+
inputSchema: shapeToJsonSchema(singleSelectShape)
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
name: "multi_select",
|
|
260
|
+
description: "Ask the user a multi-select (checkbox) question \u2014 they may pick any number of options.",
|
|
261
|
+
inputSchema: shapeToJsonSchema(multiSelectShape)
|
|
262
|
+
}
|
|
263
|
+
];
|
|
264
|
+
var server = new Server({ name: "flume", version: "1" }, { capabilities: { tools: {} } });
|
|
265
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: TOOLS }));
|
|
266
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
267
|
+
const { name, arguments: args } = request.params;
|
|
268
|
+
if (name === "submit_plan") {
|
|
269
|
+
const parsed = submitPlanSchema.parse(args);
|
|
270
|
+
emit({ kind: "plans", plans: parsed.plans.map(renderPlan) });
|
|
271
|
+
return ack("Plan(s) submitted. End your turn.");
|
|
272
|
+
}
|
|
273
|
+
if (name === "single_select") {
|
|
274
|
+
const parsed = z3.object(singleSelectShape).parse(args);
|
|
275
|
+
emit({ kind: "widget", widget: buildSingleSelectWidget(parsed) });
|
|
276
|
+
return ack("Question posted as a single-select widget. End your turn.");
|
|
277
|
+
}
|
|
278
|
+
if (name === "multi_select") {
|
|
279
|
+
const parsed = z3.object(multiSelectShape).parse(args);
|
|
280
|
+
emit({ kind: "widget", widget: buildMultiSelectWidget(parsed) });
|
|
281
|
+
return ack("Question posted as a multi-select widget. End your turn.");
|
|
282
|
+
}
|
|
283
|
+
throw new Error(`Unknown tool: ${name}`);
|
|
284
|
+
});
|
|
285
|
+
var transport = new StdioServerTransport();
|
|
286
|
+
await server.connect(transport);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@flumecode/runner",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.21.0-beta.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
|
|
6
6
|
"bin": {
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
"provenance": false
|
|
19
19
|
},
|
|
20
20
|
"scripts": {
|
|
21
|
-
"build": "esbuild src/cli.ts --bundle --platform=node --format=esm --target=node20 --packages=external --
|
|
21
|
+
"build": "esbuild src/cli.ts src/mcp-stdio.ts --bundle --platform=node --format=esm --target=node20 --packages=external --outdir=dist --banner:js=\"#!/usr/bin/env node\"",
|
|
22
22
|
"dev": "tsx src/cli.ts",
|
|
23
23
|
"login": "tsx src/cli.ts login",
|
|
24
24
|
"start": "tsx src/cli.ts start",
|
|
@@ -27,6 +27,7 @@
|
|
|
27
27
|
},
|
|
28
28
|
"dependencies": {
|
|
29
29
|
"@anthropic-ai/claude-agent-sdk": "^0.3.0",
|
|
30
|
+
"@modelcontextprotocol/sdk": "^1",
|
|
30
31
|
"zod": "4.4.3"
|
|
31
32
|
},
|
|
32
33
|
"devDependencies": {
|
|
@@ -12,7 +12,7 @@ description: >-
|
|
|
12
12
|
|
|
13
13
|
# implement-plan
|
|
14
14
|
|
|
15
|
-
You are the **orchestrator**. You run on
|
|
15
|
+
You are the **orchestrator**. You run on the `think` model and your job is to
|
|
16
16
|
_coordinate_, not to write the implementation. You delegate each phase to a
|
|
17
17
|
subagent through the **Task** tool, choosing the model that phase needs, and you
|
|
18
18
|
stitch their results into one report. Doing the implementation yourself defeats
|
|
@@ -30,10 +30,9 @@ put it in the prompt, the subagent doesn't have it.
|
|
|
30
30
|
## How you delegate
|
|
31
31
|
|
|
32
32
|
- Spawn each phase with the **Task** tool, `subagent_type: "general-purpose"`.
|
|
33
|
-
- **Model per phase** (pass
|
|
34
|
-
- `
|
|
35
|
-
|
|
36
|
-
- `"opus"` — acceptance-criteria review, code-quality review, and the report.
|
|
33
|
+
- **Model per phase** (pass the concrete id from the Model tiers block as the Task `model` argument):
|
|
34
|
+
- `execute` — implementation, fixes, and the Verify step (mechanical command-running; Verify is read-only).
|
|
35
|
+
- `think` — acceptance-criteria review, code-quality review, and the report.
|
|
37
36
|
- **Read-only phases.** Tell every review, Verify, and report subagent to _inspect
|
|
38
37
|
and report only — never edit, create, or delete files_. Only implementation/fix
|
|
39
38
|
subagents may change the working tree.
|
|
@@ -68,7 +67,7 @@ the next step.
|
|
|
68
67
|
it in the prompts you write for the Implement, Verify, and Fix-loop subagents
|
|
69
68
|
so none of them re-derive it. Do not implement.
|
|
70
69
|
|
|
71
|
-
2. **Implement** — Task, `model:
|
|
70
|
+
2. **Implement** — Task, `model: execute`. Give the subagent: the plan steps, a
|
|
72
71
|
pointer to the wiki/orientation, the coding guidelines (verbatim), and the
|
|
73
72
|
explicit verification command list the orchestrator discovered in the Orient
|
|
74
73
|
step. Tell it to make all the code changes in the working tree to satisfy the
|
|
@@ -83,7 +82,7 @@ the next step.
|
|
|
83
82
|
the verification commands it ran and their pass/fail results, which files it
|
|
84
83
|
changed, and how each plan step was addressed. It must not commit or push.
|
|
85
84
|
|
|
86
|
-
3. **Verify (build & tests)** — Task, `model:
|
|
85
|
+
3. **Verify (build & tests)** — Task, `model: execute`, read-only. This step
|
|
87
86
|
gives the orchestrator an objective, independent build/test signal before the
|
|
88
87
|
subjective AC and quality reviews. Tell the subagent to:
|
|
89
88
|
- Run the verification commands provided by the orchestrator in the task
|
|
@@ -100,7 +99,7 @@ the next step.
|
|
|
100
99
|
excerpt (if any).
|
|
101
100
|
- Must not edit, create, or delete any files.
|
|
102
101
|
|
|
103
|
-
4. **Acceptance-criteria review** — Task, `model:
|
|
102
|
+
4. **Acceptance-criteria review** — Task, `model: think`, read-only. Give the
|
|
104
103
|
subagent the full AC list and tell it to verify each one against the actual
|
|
105
104
|
changes (run `git --no-pager diff`, read the changed files, run tests/build if
|
|
106
105
|
useful). For **each** AC it must return: the criterion text verbatim, a verdict
|
|
@@ -117,7 +116,7 @@ the next step.
|
|
|
117
116
|
note any files or areas that appear changed but don't map to any AC as a coverage
|
|
118
117
|
gap (signalling a missing AC or an out-of-scope change).
|
|
119
118
|
|
|
120
|
-
5. **Code-quality review** — Task, `model:
|
|
119
|
+
5. **Code-quality review** — Task, `model: think`, read-only. Give the subagent
|
|
121
120
|
the coding guidelines (verbatim) and tell it to review the changes for
|
|
122
121
|
violations and quality problems, returning concrete findings as
|
|
123
122
|
`file:line — what — why`, each marked **must-fix** or **nice-to-have**.
|
|
@@ -125,7 +124,7 @@ the next step.
|
|
|
125
124
|
6. **Fix loop.** If the Verify step (step 3) reports any failing check, the AC
|
|
126
125
|
review (step 4) reports any _not met_ AC, or the quality review (step 5)
|
|
127
126
|
reports any _must-fix_ finding: spawn an **Implement/fix** subagent (Task,
|
|
128
|
-
`model:
|
|
127
|
+
`model: execute`) whose prompt lists exactly those findings and tells it to
|
|
129
128
|
resolve them without regressing the rest. Include the verification command list
|
|
130
129
|
from the Orient step in the fix subagent's prompt (the same list passed to
|
|
131
130
|
Implement and Verify), so the fix subagent does not need to re-derive it. When
|
|
@@ -135,7 +134,7 @@ the next step.
|
|
|
135
134
|
times. If something still fails after that, stop looping and record the gap
|
|
136
135
|
honestly in the report — do not hide it.
|
|
137
136
|
|
|
138
|
-
7. **Report** — Task, `model:
|
|
137
|
+
7. **Report** — Task, `model: think`, read-only. Give the subagent the AC
|
|
139
138
|
verdicts (with criterion text, from step 4), the Verify results (from step 3),
|
|
140
139
|
and the quality findings, and tell it to run `git --no-pager diff` itself as
|
|
141
140
|
the **single source of truth** for the report. Pass the Verify results as the
|
|
@@ -191,7 +190,7 @@ The report subagent calls `submit_report` with these fields:
|
|
|
191
190
|
|
|
192
191
|
- Delegate through Task subagents; don't implement, review, or write the report
|
|
193
192
|
yourself.
|
|
194
|
-
- Right model per phase: `
|
|
193
|
+
- Right model per phase: `execute` to implement/fix/verify (Verify is read-only), `think` to review/report.
|
|
195
194
|
- Make every Task prompt self-contained — subagents see only what you give them.
|
|
196
195
|
- Reviewers and the report writer never modify files.
|
|
197
196
|
- Never commit, push, or open a PR.
|
|
@@ -34,7 +34,10 @@ and determine which phase you are in:**
|
|
|
34
34
|
If the request is a bug fix, investigate deeply enough to identify the **root cause** (the specific code or condition producing the incorrect behavior) before proceeding to Plan — you will need it for the `rootCause` field.
|
|
35
35
|
2. Identify genuine ambiguity only: scope, intended behavior, edge cases,
|
|
36
36
|
competing approaches, acceptance criteria. Skip anything you can reasonably
|
|
37
|
-
decide yourself.
|
|
37
|
+
decide yourself. One high-leverage question is the user's motivation — **why** they
|
|
38
|
+
are making this request — but only when the request title/body and context don't
|
|
39
|
+
already explain it. Never re-ask what the thread already answers (consistent with
|
|
40
|
+
"Never ask what the code answers").
|
|
38
41
|
3. End your turn with **2–5 specific, high-leverage questions**, each with a
|
|
39
42
|
recommended default so the user can reply "all defaults" if they want. Group
|
|
40
43
|
them; keep it short. Do **not** include a plan yet.
|
|
@@ -66,6 +69,7 @@ Field-by-field guidance:
|
|
|
66
69
|
answers the request's title and body. Must be achievable by the steps below
|
|
67
70
|
and nothing more.
|
|
68
71
|
- **`rootCause`** — required when `scope` is `fix`; omit for all other scopes. Identify the underlying cause of the bug — the specific code, logic, or condition that produces the incorrect behavior, **not** the symptom. To fill this accurately, you must investigate the codebase deeply enough to find the root cause before writing the plan.
|
|
72
|
+
- **`motivation`** — optional. The user's stated or asked-for reason for making this request — the underlying motivation or problem the change addresses. Fill this when the request content/context does NOT already state the why (ask during Phase 1 — Clarify if needed); omit when there is no additional motivation to record. Useful for future understanding of the system.
|
|
69
73
|
- **`assumptions`** — anything you decided during investigation (including
|
|
70
74
|
unanswered defaults from Phase 1).
|
|
71
75
|
- **`requirements`** — **required; at least 1 item.** Plain-language statements of what this change must accomplish and why, written so a non-technical reader can follow them. Distinct from `acceptanceCriteria`: requirements explain intent and rationale; acceptance criteria are the machine-checkable proof. At least 1 item required.
|
|
@@ -67,9 +67,9 @@ essentials:
|
|
|
67
67
|
implementation, not a rebuild. Change only what the user asked for plus what that
|
|
68
68
|
change strictly requires; don't regress the rest of the plan.
|
|
69
69
|
- **Pipeline:** Implement (self-runs build/tests & fixes its own errors, Task
|
|
70
|
-
`model:
|
|
71
|
-
acceptance/quality review (Task `model:
|
|
72
|
-
(≤2, re-run Verify after each fix) → report (Task `model:
|
|
70
|
+
`model: execute`) → Verify (build/tests, read-only, Task `model: execute`) →
|
|
71
|
+
acceptance/quality review (Task `model: think`, read-only) → fix loop if needed
|
|
72
|
+
(≤2, re-run Verify after each fix) → report (Task `model: think`, read-only).
|
|
73
73
|
Detailed mechanics (command discovery, Verify step spec, fix-loop trigger
|
|
74
74
|
conditions) are in `implement-plan/SKILL.md` — read it for the full pipeline.
|
|
75
75
|
- **No git side effects.** Never commit, push, or open a PR — leave the changes in
|