@flumecode/runner 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -63,7 +63,7 @@ skipping if that version is already on npm).
|
|
|
63
63
|
6. Report the summary back (`POST /api/runner/jobs/:id/complete`), which fills in
|
|
64
64
|
the pending agent comment in the thread.
|
|
65
65
|
|
|
66
|
-
Jobs come in two kinds. **
|
|
66
|
+
Jobs come in two kinds. **comment** jobs answer a request thread (the flow above).
|
|
67
67
|
**init** jobs bootstrap a repository: they clone the default branch onto a fresh
|
|
68
68
|
`flumecode/init-*` branch, run the `flumecode:document` skill to create the
|
|
69
69
|
`.flumecode/` wiki, and open a PR. A repo must be initialized (from its dashboard
|
package/dist/cli.js
CHANGED
|
@@ -136,6 +136,19 @@ async function reportHeartbeat(config, claudeCode) {
|
|
|
136
136
|
noteServerVersion(res);
|
|
137
137
|
if (!res.ok) throw new Error(`heartbeat failed: ${res.status} ${await safeText(res)}`);
|
|
138
138
|
}
|
|
139
|
+
async function uploadJobLog(config, jobId, content) {
|
|
140
|
+
const res = await fetch(`${config.serverUrl}/api/runner/jobs/${jobId}/logs`, {
|
|
141
|
+
method: "POST",
|
|
142
|
+
headers: {
|
|
143
|
+
authorization: `Bearer ${config.token}`,
|
|
144
|
+
"content-type": "application/json",
|
|
145
|
+
[RUNNER_VERSION_HEADER]: RUNNER_VERSION
|
|
146
|
+
},
|
|
147
|
+
body: JSON.stringify({ content })
|
|
148
|
+
});
|
|
149
|
+
noteServerVersion(res);
|
|
150
|
+
if (!res.ok) throw new Error(`log upload failed: ${res.status} ${await safeText(res)}`);
|
|
151
|
+
}
|
|
139
152
|
async function safeText(res) {
|
|
140
153
|
try {
|
|
141
154
|
return await res.text();
|
|
@@ -252,59 +265,59 @@ var planInputSchema = {
|
|
|
252
265
|
};
|
|
253
266
|
var planSchema = z2.object(planInputSchema);
|
|
254
267
|
function renderPlan(plan) {
|
|
255
|
-
const
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
268
|
+
const lines2 = [];
|
|
269
|
+
lines2.push(`# ${plan.title}`);
|
|
270
|
+
lines2.push("");
|
|
271
|
+
lines2.push(`**Scope** \u2014 \`${plan.scope}\``);
|
|
272
|
+
lines2.push("");
|
|
273
|
+
lines2.push(`**Goal** \u2014 ${plan.goal}`);
|
|
261
274
|
if (plan.assumptions.length > 0) {
|
|
262
|
-
|
|
263
|
-
|
|
275
|
+
lines2.push("");
|
|
276
|
+
lines2.push("**Assumptions**");
|
|
264
277
|
for (const assumption of plan.assumptions) {
|
|
265
|
-
|
|
278
|
+
lines2.push(`- ${assumption}`);
|
|
266
279
|
}
|
|
267
280
|
}
|
|
268
|
-
|
|
269
|
-
|
|
281
|
+
lines2.push("");
|
|
282
|
+
lines2.push("## Steps");
|
|
270
283
|
for (const [i, step] of plan.steps.entries()) {
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
284
|
+
lines2.push("");
|
|
285
|
+
lines2.push(`### ${i + 1}. ${step.title}`);
|
|
286
|
+
lines2.push("");
|
|
287
|
+
lines2.push(step.description);
|
|
275
288
|
if (step.pseudoCode && step.pseudoCode.length > 0) {
|
|
276
289
|
for (const entry of step.pseudoCode) {
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
290
|
+
lines2.push("");
|
|
291
|
+
lines2.push(`\`${entry.file}\``);
|
|
292
|
+
lines2.push("");
|
|
293
|
+
lines2.push("```");
|
|
294
|
+
lines2.push(entry.pseudoCode);
|
|
295
|
+
lines2.push("```");
|
|
283
296
|
}
|
|
284
297
|
}
|
|
285
298
|
}
|
|
286
|
-
|
|
287
|
-
|
|
299
|
+
lines2.push("");
|
|
300
|
+
lines2.push("## Acceptance criteria");
|
|
288
301
|
for (const criterion of plan.acceptanceCriteria) {
|
|
289
|
-
|
|
302
|
+
lines2.push(`- [ ] ${criterion}`);
|
|
290
303
|
}
|
|
291
304
|
if (plan.risks.length > 0) {
|
|
292
|
-
|
|
293
|
-
|
|
305
|
+
lines2.push("");
|
|
306
|
+
lines2.push("**Risks / open questions**");
|
|
294
307
|
for (const risk of plan.risks) {
|
|
295
|
-
|
|
308
|
+
lines2.push(`- ${risk}`);
|
|
296
309
|
}
|
|
297
310
|
}
|
|
298
311
|
if (plan.outOfScope.length > 0) {
|
|
299
|
-
|
|
300
|
-
|
|
312
|
+
lines2.push("");
|
|
313
|
+
lines2.push("**Out of scope**");
|
|
301
314
|
for (const item of plan.outOfScope) {
|
|
302
|
-
|
|
315
|
+
lines2.push(`- ${item}`);
|
|
303
316
|
}
|
|
304
317
|
}
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
return
|
|
318
|
+
lines2.push("");
|
|
319
|
+
lines2.push(PLAN_MARKER);
|
|
320
|
+
return lines2.join("\n");
|
|
308
321
|
}
|
|
309
322
|
var submitPlanInputSchema = {
|
|
310
323
|
plans: z2.array(z2.object(planInputSchema)).min(1).refine(
|
|
@@ -379,27 +392,27 @@ var reportInputSchema = {
|
|
|
379
392
|
};
|
|
380
393
|
var reportSchema = z3.object(reportInputSchema);
|
|
381
394
|
function renderReport(report) {
|
|
382
|
-
const
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
395
|
+
const lines2 = [];
|
|
396
|
+
lines2.push(report.summary.trim());
|
|
397
|
+
lines2.push("");
|
|
398
|
+
lines2.push(report.prose.trim());
|
|
399
|
+
lines2.push("");
|
|
400
|
+
lines2.push("## Acceptance criteria");
|
|
388
401
|
for (const ac of report.acceptanceCriteria) {
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
402
|
+
lines2.push("");
|
|
403
|
+
lines2.push(`### ${STATUS_ICON[ac.status]} ${ac.criterion}`);
|
|
404
|
+
lines2.push("");
|
|
405
|
+
lines2.push(ac.rationale.trim());
|
|
393
406
|
for (const ev of ac.evidence) {
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
407
|
+
lines2.push("");
|
|
408
|
+
lines2.push(ev.note ? `\`${ev.file}\` \u2014 ${ev.note}` : `\`${ev.file}\``);
|
|
409
|
+
lines2.push("");
|
|
410
|
+
lines2.push("```diff");
|
|
411
|
+
lines2.push(ev.hunk.replace(/\n+$/, ""));
|
|
412
|
+
lines2.push("```");
|
|
400
413
|
}
|
|
401
414
|
}
|
|
402
|
-
return
|
|
415
|
+
return lines2.join("\n");
|
|
403
416
|
}
|
|
404
417
|
function createReportTooling() {
|
|
405
418
|
let submittedReport = null;
|
|
@@ -426,8 +439,46 @@ function createReportTooling() {
|
|
|
426
439
|
return { mcpServer, getReport: () => submittedReport };
|
|
427
440
|
}
|
|
428
441
|
|
|
442
|
+
// src/logger.ts
|
|
443
|
+
var lines = [];
|
|
444
|
+
var secrets = [];
|
|
445
|
+
var MAX_BYTES = 10 * 1024 * 1024;
|
|
446
|
+
function startJobLog(opts) {
|
|
447
|
+
lines = [];
|
|
448
|
+
secrets = opts.secrets.filter(Boolean);
|
|
449
|
+
logEvent("meta", `job ${opts.jobId} (${opts.kind}) started at ${(/* @__PURE__ */ new Date()).toISOString()}`);
|
|
450
|
+
}
|
|
451
|
+
function redact(s) {
|
|
452
|
+
for (const sec of secrets) {
|
|
453
|
+
s = s.split(sec).join("***REDACTED***");
|
|
454
|
+
}
|
|
455
|
+
return s;
|
|
456
|
+
}
|
|
457
|
+
function logEvent(section, text) {
|
|
458
|
+
lines.push(`[${(/* @__PURE__ */ new Date()).toISOString()}] [${section}] ${redact(text)}`);
|
|
459
|
+
}
|
|
460
|
+
function getJobLog() {
|
|
461
|
+
const full = lines.join("\n");
|
|
462
|
+
if (full.length <= MAX_BYTES) return full;
|
|
463
|
+
const half = Math.floor(MAX_BYTES / 2);
|
|
464
|
+
return full.slice(0, half) + `
|
|
465
|
+
|
|
466
|
+
\u2026[truncated ${full.length - MAX_BYTES} bytes]\u2026
|
|
467
|
+
|
|
468
|
+
` + full.slice(-half);
|
|
469
|
+
}
|
|
470
|
+
|
|
429
471
|
// src/executor.ts
|
|
430
472
|
var FLUME_PLUGIN_DIR = fileURLToPath2(new URL("../skills-plugin", import.meta.url));
|
|
473
|
+
function stringifyResult(content) {
|
|
474
|
+
if (typeof content === "string") return content;
|
|
475
|
+
if (Array.isArray(content)) {
|
|
476
|
+
return content.map(
|
|
477
|
+
(c) => typeof c === "object" && c !== null && "text" in c ? String(c.text) : JSON.stringify(c)
|
|
478
|
+
).join("\n");
|
|
479
|
+
}
|
|
480
|
+
return JSON.stringify(content);
|
|
481
|
+
}
|
|
431
482
|
async function runClaudeCode(opts) {
|
|
432
483
|
let finalText = "";
|
|
433
484
|
const { mcpServer, collected } = createWidgetTooling();
|
|
@@ -463,11 +514,26 @@ async function runClaudeCode(opts) {
|
|
|
463
514
|
for (const block of content) {
|
|
464
515
|
if (block && block.type === "text" && typeof block.text === "string") {
|
|
465
516
|
process.stdout.write(block.text);
|
|
517
|
+
logEvent("agent", block.text);
|
|
518
|
+
} else if (block && block.type === "tool_use") {
|
|
519
|
+
logEvent("tool_use", `${block.name} ${JSON.stringify(block.input)}`);
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
} else if (message.type === "user") {
|
|
524
|
+
const content = message.message?.content;
|
|
525
|
+
if (Array.isArray(content)) {
|
|
526
|
+
for (const block of content) {
|
|
527
|
+
if (block && block.type === "tool_result") {
|
|
528
|
+
logEvent("tool_result", stringifyResult(block.content));
|
|
466
529
|
}
|
|
467
530
|
}
|
|
468
531
|
}
|
|
469
532
|
} else if (message.type === "result") {
|
|
470
533
|
finalText = message.result ?? "";
|
|
534
|
+
logEvent("result", finalText);
|
|
535
|
+
} else if (message.type === "system") {
|
|
536
|
+
logEvent("system", JSON.stringify(message));
|
|
471
537
|
}
|
|
472
538
|
}
|
|
473
539
|
process.stdout.write("\n");
|
|
@@ -547,18 +613,18 @@ function turnHeading(turn, agentName) {
|
|
|
547
613
|
if (turn.kind === "report") return `${agentName} (implementation report)`;
|
|
548
614
|
return agentName;
|
|
549
615
|
}
|
|
550
|
-
function appendThread(
|
|
616
|
+
function appendThread(lines2, ctx) {
|
|
551
617
|
if (!ctx.thread || ctx.thread.length === 0) return;
|
|
552
|
-
|
|
618
|
+
lines2.push("", "# Conversation so far");
|
|
553
619
|
for (const turn of ctx.thread) {
|
|
554
|
-
|
|
620
|
+
lines2.push("", `## ${turnHeading(turn, ctx.agentName)}`, turn.content);
|
|
555
621
|
}
|
|
556
622
|
}
|
|
557
623
|
function buildPrompt(ctx) {
|
|
558
624
|
const task = ctx.permissionMode === "plan" ? `Use the \`flumecode:request-to-plan\` skill to handle this request. You are read-only and cannot modify files \u2014 clarify any ambiguity with the user first, then produce a concrete, actionable plan (the specific changes you would make and why). Cite the relevant files. Do NOT call ExitPlanMode or write the plan to a file. When the plan is ready, call the \`submit_plan\` tool with the structured plan fields; the runner renders it into the canonical plan markdown and posts it as your comment.` : `Use the \`flumecode:implement-plan\` skill to handle this request. You are the ORCHESTRATOR: do not implement, review, or write the report yourself \u2014 follow the skill to delegate each phase to subagents via the Task tool, picking the right model for each. Do not commit or push \u2014 the runner handles that.`;
|
|
559
625
|
const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to this request. If there is no wiki, work from the code directly.`;
|
|
560
626
|
const widgets = `When you need the user to choose, ask it as a widget rather than writing the options as prose: call \`single_select\` for a one-of-N choice (radio buttons) or \`multi_select\` for a "select all that apply" choice (checkboxes). Don't add your own "Other" option \u2014 the UI always provides one. After calling a widget tool, end your turn \u2014 the user's answer comes back as their next message and starts a fresh run.`;
|
|
561
|
-
const
|
|
627
|
+
const lines2 = [
|
|
562
628
|
`You are "${ctx.agentName}", an autonomous coding agent working inside a FlumeCode request.`,
|
|
563
629
|
`The repository ${ctx.repo.fullName} is checked out in your current working directory on branch "${ctx.repo.checkoutBranch}" at commit ${ctx.repo.checkoutSha.slice(0, 7)}.`,
|
|
564
630
|
task,
|
|
@@ -566,29 +632,29 @@ function buildPrompt(ctx) {
|
|
|
566
632
|
widgets
|
|
567
633
|
];
|
|
568
634
|
if (ctx.permissionMode !== "plan") {
|
|
569
|
-
|
|
635
|
+
lines2.push(
|
|
570
636
|
"",
|
|
571
637
|
"These coding guidelines apply to all code produced in this run:",
|
|
572
638
|
"",
|
|
573
639
|
loadRule("coding-guideline")
|
|
574
640
|
);
|
|
575
641
|
}
|
|
576
|
-
|
|
642
|
+
lines2.push("", `# Request: ${ctx.request?.title ?? ""}`);
|
|
577
643
|
if (ctx.request?.body) {
|
|
578
|
-
|
|
644
|
+
lines2.push("", ctx.request.body);
|
|
579
645
|
}
|
|
580
|
-
appendThread(
|
|
581
|
-
|
|
646
|
+
appendThread(lines2, ctx);
|
|
647
|
+
lines2.push(
|
|
582
648
|
"",
|
|
583
649
|
ctx.permissionMode === "plan" ? "Your final reply is posted verbatim as your comment in the thread \u2014 if you called `submit_plan`, the rendered plan is posted automatically; for clarifying questions, your reply text is posted as-is." : "Your final reply is posted verbatim as your comment in the thread \u2014 make it the implementation report your report subagent produced, with nothing added. The runner appends the pull-request link."
|
|
584
650
|
);
|
|
585
|
-
return
|
|
651
|
+
return lines2.join("\n");
|
|
586
652
|
}
|
|
587
653
|
function buildRevisePrompt(ctx) {
|
|
588
654
|
const task = `Use the \`flumecode:revise-implementation\` skill to handle this turn. The plan below was already implemented (its implementation report appears in the conversation below, tagged as such); the user is now asking to fine-tune that implementation. Decide how to respond to their latest message: if it's unclear, ask a clarifying question (as a widget); if it's a bad idea or not feasible, push back with your reasoning; if it warrants rethinking the plan, call \`submit_plan\` with a revised plan; otherwise implement the requested change. When you implement, you are the ORCHESTRATOR: delegate the work to subagents via the Task tool as the skill directs, and do not commit or push \u2014 the runner handles that, updating the existing pull request.`;
|
|
589
655
|
const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to this change. If there is no wiki, work from the code directly.`;
|
|
590
656
|
const widgets = `When you need the user to choose, ask it as a widget rather than writing the options as prose: call \`single_select\` for a one-of-N choice (radio buttons) or \`multi_select\` for a "select all that apply" choice (checkboxes). Don't add your own "Other" option \u2014 the UI always provides one. After calling a widget tool, end your turn \u2014 the user's answer comes back as their next message and starts a fresh run.`;
|
|
591
|
-
const
|
|
657
|
+
const lines2 = [
|
|
592
658
|
`You are "${ctx.agentName}", an autonomous coding agent fine-tuning an implemented FlumeCode plan in an ongoing thread with the user.`,
|
|
593
659
|
`The repository ${ctx.repo.fullName} is checked out in your current working directory on the plan's implementation branch "${ctx.repo.checkoutBranch}" \u2014 the same branch its open pull request is built from, so any change you push updates that PR.`,
|
|
594
660
|
task,
|
|
@@ -602,20 +668,20 @@ function buildRevisePrompt(ctx) {
|
|
|
602
668
|
`# Plan: ${ctx.request?.title ?? ""}`
|
|
603
669
|
];
|
|
604
670
|
if (ctx.request?.body) {
|
|
605
|
-
|
|
671
|
+
lines2.push("", ctx.request.body);
|
|
606
672
|
}
|
|
607
|
-
appendThread(
|
|
608
|
-
|
|
673
|
+
appendThread(lines2, ctx);
|
|
674
|
+
lines2.push(
|
|
609
675
|
"",
|
|
610
676
|
"The last message above is the user's request for this turn. Your final reply is posted verbatim as your comment in the plan thread: if you implemented a change, make it a short report of what you changed (the runner appends the pull-request link); if you asked a question, called `submit_plan`, or pushed back, your reply text is posted as-is."
|
|
611
677
|
);
|
|
612
|
-
return
|
|
678
|
+
return lines2.join("\n");
|
|
613
679
|
}
|
|
614
680
|
function buildResolvePrompt(ctx) {
|
|
615
681
|
const mergeBranch = ctx.repo.mergeBranch ?? "the merge branch";
|
|
616
682
|
const task = `Use the \`flumecode:resolve-merge-conflict\` skill to handle this turn. A merge of \`${mergeBranch}\` into this branch is IN PROGRESS and has left conflict markers in your working tree. Resolve every conflicted file by correctly integrating BOTH sides \u2014 the change this session implemented (described below) and the incoming changes from \`${mergeBranch}\` \u2014 never blindly discard either side. Remove all conflict markers and verify the result builds and tests pass. Do NOT \`git add\`, commit, push, or open a pull request \u2014 the runner finalizes the merge commit and updates the existing pull request.`;
|
|
617
683
|
const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to the conflicting code. If there is no wiki, work from the code directly.`;
|
|
618
|
-
const
|
|
684
|
+
const lines2 = [
|
|
619
685
|
`You are "${ctx.agentName}", an autonomous coding agent resolving merge conflicts on an implemented FlumeCode plan.`,
|
|
620
686
|
`The repository ${ctx.repo.fullName} is checked out in your current working directory on the plan's implementation branch "${ctx.repo.checkoutBranch}" \u2014 the same branch its open pull request is built from \u2014 with an in-progress merge of "${mergeBranch}".`,
|
|
621
687
|
task,
|
|
@@ -628,17 +694,17 @@ function buildResolvePrompt(ctx) {
|
|
|
628
694
|
`# Plan: ${ctx.request?.title ?? ""}`
|
|
629
695
|
];
|
|
630
696
|
if (ctx.request?.body) {
|
|
631
|
-
|
|
697
|
+
lines2.push("", ctx.request.body);
|
|
632
698
|
}
|
|
633
|
-
appendThread(
|
|
634
|
-
|
|
699
|
+
appendThread(lines2, ctx);
|
|
700
|
+
lines2.push(
|
|
635
701
|
"",
|
|
636
702
|
"Resolve the conflicts now. Your final reply is posted as a report in the plan thread: summarize which files conflicted and how you resolved each (the runner appends the pull-request link, so don't add one)."
|
|
637
703
|
);
|
|
638
|
-
return
|
|
704
|
+
return lines2.join("\n");
|
|
639
705
|
}
|
|
640
706
|
function buildDocumentPrompt(ctx) {
|
|
641
|
-
const
|
|
707
|
+
const lines2 = [
|
|
642
708
|
`You are "${ctx.agentName}" maintaining the repository wiki for ${ctx.repo.fullName}.`,
|
|
643
709
|
`An implementation just ran in this working directory to satisfy the request below; its changes are uncommitted in the working tree.`,
|
|
644
710
|
`Use the \`flumecode:document\` skill to bring the wiki in sync with those changes. Only edit files under \`.flumecode/wiki/\` \u2014 do not touch application code. The runner commits the wiki alongside the implementation in the same pull request.`,
|
|
@@ -646,14 +712,14 @@ function buildDocumentPrompt(ctx) {
|
|
|
646
712
|
`# Request: ${ctx.request?.title ?? ""}`
|
|
647
713
|
];
|
|
648
714
|
if (ctx.request?.body) {
|
|
649
|
-
|
|
715
|
+
lines2.push("", ctx.request.body);
|
|
650
716
|
}
|
|
651
|
-
appendThread(
|
|
652
|
-
|
|
653
|
-
return
|
|
717
|
+
appendThread(lines2, ctx);
|
|
718
|
+
lines2.push("", "When done, reply with a one- or two-line summary of the wiki changes you made.");
|
|
719
|
+
return lines2.join("\n");
|
|
654
720
|
}
|
|
655
721
|
function buildRepairPrompt(ctx, hookLog) {
|
|
656
|
-
const
|
|
722
|
+
const lines2 = [
|
|
657
723
|
`You are "${ctx.agentName}", fixing a failed pre-commit check in the repository ${ctx.repo.fullName}, checked out in your current working directory.`,
|
|
658
724
|
`The changes from the previous step are still uncommitted in the working tree. When the runner tried to commit them, the repository's pre-commit hook \u2014 which runs the project's own checks (lint / typecheck / unit tests) \u2014 failed. Make the working tree pass those checks: fix the failing code or tests at their root. Do NOT delete or skip tests, weaken assertions, or disable the checks to silence the failure. Preserve the intent of the original change; repair only what's broken. Do NOT commit or push \u2014 the runner re-commits once the checks pass.`,
|
|
659
725
|
"",
|
|
@@ -669,13 +735,13 @@ function buildRepairPrompt(ctx, hookLog) {
|
|
|
669
735
|
"",
|
|
670
736
|
"When done, reply with a one-line summary of what you fixed."
|
|
671
737
|
];
|
|
672
|
-
return
|
|
738
|
+
return lines2.join("\n");
|
|
673
739
|
}
|
|
674
740
|
function buildReleasePrompt(ctx, baseChecks) {
|
|
675
741
|
const task = `Use the \`flumecode:create-release\` skill to handle this turn. You are driving a release: first analyse commits since the last tag, propose version bumps, and ask the user to confirm via widgets (Phase 1); once the user's widget answers appear in the thread, apply the bumps to package.json files and update CHANGELOG.md (Phase 2). Do NOT commit or push \u2014 the runner handles that and opens the bump PR.`;
|
|
676
742
|
const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to this release. If there is no wiki, work from the code directly.`;
|
|
677
743
|
const widgets = `When you need the user to choose, ask it as a widget rather than writing the options as prose: call \`single_select\` for a one-of-N choice (radio buttons) or \`multi_select\` for a "select all that apply" choice (checkboxes). Don't add your own "Other" option \u2014 the UI always provides one. After calling a widget tool, end your turn \u2014 the user's answer comes back as their next message and starts a fresh run.`;
|
|
678
|
-
const
|
|
744
|
+
const lines2 = [
|
|
679
745
|
`You are "${ctx.agentName}", an autonomous coding agent driving a FlumeCode release.`,
|
|
680
746
|
`The repository ${ctx.repo.fullName} is checked out in your current working directory on the release bump branch "${ctx.repo.checkoutBranch}".`,
|
|
681
747
|
task,
|
|
@@ -689,10 +755,10 @@ function buildReleasePrompt(ctx, baseChecks) {
|
|
|
689
755
|
`# Release: ${ctx.request?.title ?? ""}`
|
|
690
756
|
];
|
|
691
757
|
if (ctx.request?.body) {
|
|
692
|
-
|
|
758
|
+
lines2.push("", ctx.request.body);
|
|
693
759
|
}
|
|
694
760
|
if (baseChecks && !baseChecks.ok) {
|
|
695
|
-
|
|
761
|
+
lines2.push(
|
|
696
762
|
"",
|
|
697
763
|
"# Pre-release check status",
|
|
698
764
|
"",
|
|
@@ -708,12 +774,12 @@ function buildReleasePrompt(ctx, baseChecks) {
|
|
|
708
774
|
"```"
|
|
709
775
|
);
|
|
710
776
|
}
|
|
711
|
-
appendThread(
|
|
712
|
-
|
|
777
|
+
appendThread(lines2, ctx);
|
|
778
|
+
lines2.push(
|
|
713
779
|
"",
|
|
714
780
|
"Your final reply is posted verbatim as your comment in the release thread \u2014 if you called widgets (Phase 1), your reply text accompanies the questions; if you applied the bumps (Phase 2), make it the report the skill produced. The runner appends the pull-request link."
|
|
715
781
|
);
|
|
716
|
-
return
|
|
782
|
+
return lines2.join("\n");
|
|
717
783
|
}
|
|
718
784
|
function buildInitPrompt(ctx) {
|
|
719
785
|
return [
|
|
@@ -740,13 +806,20 @@ var exec = promisify(execFile);
|
|
|
740
806
|
var WORKSPACE_PREFIX = "flume-runner-";
|
|
741
807
|
var MAX_BUFFER = 1 << 24;
|
|
742
808
|
async function git(args) {
|
|
743
|
-
|
|
809
|
+
logEvent("git", `git ${args.join(" ")}`);
|
|
810
|
+
try {
|
|
811
|
+
const result = await exec("git", args, { maxBuffer: MAX_BUFFER });
|
|
812
|
+
if (result.stdout.trim()) logEvent("git:out", result.stdout.trim());
|
|
813
|
+
if (result.stderr.trim()) logEvent("git:err", result.stderr.trim());
|
|
814
|
+
return result;
|
|
815
|
+
} catch (err) {
|
|
816
|
+
logEvent("git:err", String(err.stderr ?? err));
|
|
817
|
+
throw err;
|
|
818
|
+
}
|
|
744
819
|
}
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
await git(["-C", dir, "config", "user.email", RUNNER_GIT_EMAIL]);
|
|
749
|
-
await git(["-C", dir, "config", "user.name", RUNNER_GIT_NAME]);
|
|
820
|
+
async function ensureGitIdentity(dir, identity) {
|
|
821
|
+
await git(["-C", dir, "config", "user.email", identity.email]);
|
|
822
|
+
await git(["-C", dir, "config", "user.name", identity.name]);
|
|
750
823
|
}
|
|
751
824
|
function cloneUrl(ctx) {
|
|
752
825
|
const { owner, name, cloneToken } = ctx.repo;
|
|
@@ -764,10 +837,21 @@ async function installDependencies(dir) {
|
|
|
764
837
|
const manager = detectPackageManager(dir);
|
|
765
838
|
if (manager === null) return { status: "skipped" };
|
|
766
839
|
const env = { ...process.env, CI: "1", ADBLOCK: "1", DISABLE_OPENCOLLECTIVE: "1" };
|
|
840
|
+
logEvent("install", `${manager} install`);
|
|
767
841
|
try {
|
|
768
|
-
await exec(manager, ["install"], {
|
|
842
|
+
const result = await exec(manager, ["install"], {
|
|
843
|
+
cwd: dir,
|
|
844
|
+
maxBuffer: MAX_BUFFER,
|
|
845
|
+
env,
|
|
846
|
+
timeout: 5 * 6e4
|
|
847
|
+
});
|
|
848
|
+
if (result.stdout.trim()) logEvent("install:out", result.stdout.trim());
|
|
849
|
+
if (result.stderr.trim()) logEvent("install:err", result.stderr.trim());
|
|
769
850
|
return { status: "installed", manager };
|
|
770
851
|
} catch (err) {
|
|
852
|
+
const e = err;
|
|
853
|
+
const detail = [e.stdout, e.stderr].map((s) => typeof s === "string" ? s.trim() : "").filter(Boolean).join("\n");
|
|
854
|
+
logEvent("install:err", detail || (err instanceof Error ? err.message : String(err)));
|
|
771
855
|
return { status: "failed", manager, error: err instanceof Error ? err.message : String(err) };
|
|
772
856
|
}
|
|
773
857
|
}
|
|
@@ -809,22 +893,24 @@ async function resetWorkspace(dir) {
|
|
|
809
893
|
});
|
|
810
894
|
}
|
|
811
895
|
async function prepareAtSha(ctx, dir, reused) {
|
|
896
|
+
const identity = { name: ctx.agentName, email: ctx.agentEmail };
|
|
812
897
|
if (!reused) {
|
|
813
898
|
await cloneAtSha(ctx, dir);
|
|
814
|
-
await ensureGitIdentity(dir);
|
|
899
|
+
await ensureGitIdentity(dir, identity);
|
|
815
900
|
return;
|
|
816
901
|
}
|
|
817
902
|
await git(["-C", dir, "remote", "set-url", "origin", cloneUrl(ctx)]);
|
|
818
|
-
await ensureGitIdentity(dir);
|
|
903
|
+
await ensureGitIdentity(dir, identity);
|
|
819
904
|
}
|
|
820
905
|
async function prepareResumingBranch(ctx, dir, reused) {
|
|
906
|
+
const identity = { name: ctx.agentName, email: ctx.agentEmail };
|
|
821
907
|
if (!reused) {
|
|
822
908
|
const result = await cloneResumingBranch(ctx, dir);
|
|
823
|
-
await ensureGitIdentity(dir);
|
|
909
|
+
await ensureGitIdentity(dir, identity);
|
|
824
910
|
return result;
|
|
825
911
|
}
|
|
826
912
|
await git(["-C", dir, "remote", "set-url", "origin", cloneUrl(ctx)]);
|
|
827
|
-
await ensureGitIdentity(dir);
|
|
913
|
+
await ensureGitIdentity(dir, identity);
|
|
828
914
|
return { resumed: true };
|
|
829
915
|
}
|
|
830
916
|
async function sweepWorkspaces() {
|
|
@@ -887,10 +973,16 @@ ${e.message ?? ""}`;
|
|
|
887
973
|
async function runRepoChecks(dir) {
|
|
888
974
|
try {
|
|
889
975
|
await git(["-C", dir, "hook", "run", "pre-commit"]);
|
|
976
|
+
logEvent("checks", "pre-commit hook passed");
|
|
890
977
|
return { ok: true, log: "", skipped: false };
|
|
891
978
|
} catch (err) {
|
|
892
|
-
if (isUnsupportedGitSubcommand(err))
|
|
893
|
-
|
|
979
|
+
if (isUnsupportedGitSubcommand(err)) {
|
|
980
|
+
logEvent("checks", "pre-commit hook skipped (git too old)");
|
|
981
|
+
return { ok: true, log: "", skipped: true };
|
|
982
|
+
}
|
|
983
|
+
const log = commitFailureLog(err);
|
|
984
|
+
logEvent("checks:err", log);
|
|
985
|
+
return { ok: false, log, skipped: false };
|
|
894
986
|
}
|
|
895
987
|
}
|
|
896
988
|
async function commitChanges(ctx, dir) {
|
|
@@ -1401,6 +1493,11 @@ async function pollLoop(config) {
|
|
|
1401
1493
|
await sleep(IDLE_MS);
|
|
1402
1494
|
continue;
|
|
1403
1495
|
}
|
|
1496
|
+
startJobLog({
|
|
1497
|
+
jobId: ctx.jobId,
|
|
1498
|
+
kind: ctx.kind,
|
|
1499
|
+
secrets: [ctx.repo?.cloneToken ?? ""].filter(Boolean)
|
|
1500
|
+
});
|
|
1404
1501
|
const abort = new AbortController();
|
|
1405
1502
|
let stopPolling = false;
|
|
1406
1503
|
const scheduleCancelPoll = () => {
|
|
@@ -1450,6 +1547,13 @@ async function pollLoop(config) {
|
|
|
1450
1547
|
}
|
|
1451
1548
|
} finally {
|
|
1452
1549
|
stopPolling = true;
|
|
1550
|
+
if (!abort.signal.aborted) {
|
|
1551
|
+
try {
|
|
1552
|
+
await uploadJobLog(config, ctx.jobId, getJobLog());
|
|
1553
|
+
} catch (e) {
|
|
1554
|
+
console.error(` (failed to upload logs: ${errorMessage2(e)})`);
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1453
1557
|
}
|
|
1454
1558
|
}
|
|
1455
1559
|
} finally {
|
|
@@ -1467,9 +1571,9 @@ var MAX_HOOK_LOG_CHARS = 4e3;
|
|
|
1467
1571
|
function trimHookLog(log) {
|
|
1468
1572
|
let trimmed = log.trimEnd();
|
|
1469
1573
|
let elided = false;
|
|
1470
|
-
const
|
|
1471
|
-
if (
|
|
1472
|
-
trimmed =
|
|
1574
|
+
const lines2 = trimmed.split("\n");
|
|
1575
|
+
if (lines2.length > MAX_HOOK_LOG_LINES) {
|
|
1576
|
+
trimmed = lines2.slice(-MAX_HOOK_LOG_LINES).join("\n");
|
|
1473
1577
|
elided = true;
|
|
1474
1578
|
}
|
|
1475
1579
|
if (trimmed.length > MAX_HOOK_LOG_CHARS) {
|
package/package.json
CHANGED
|
@@ -31,10 +31,11 @@ put it in the prompt, the subagent doesn't have it.
|
|
|
31
31
|
|
|
32
32
|
- Spawn each phase with the **Task** tool, `subagent_type: "general-purpose"`.
|
|
33
33
|
- **Model per phase** (pass it as the Task `model` argument):
|
|
34
|
-
- `"sonnet"` — implementation and
|
|
34
|
+
- `"sonnet"` — implementation, fixes, and the Verify step (mechanical
|
|
35
|
+
command-running; Verify is read-only even though it uses sonnet).
|
|
35
36
|
- `"opus"` — acceptance-criteria review, code-quality review, and the report.
|
|
36
|
-
- **
|
|
37
|
-
report only — never edit, create, or delete files_. Only implementation/fix
|
|
37
|
+
- **Read-only phases.** Tell every review, Verify, and report subagent to _inspect
|
|
38
|
+
and report only — never edit, create, or delete files_. Only implementation/fix
|
|
38
39
|
subagents may change the working tree.
|
|
39
40
|
- **No git side effects.** Neither you nor any subagent may commit, push, or open
|
|
40
41
|
a PR. Leave the changes in the working tree; the runner commits + opens the PR
|
|
@@ -61,11 +62,35 @@ the next step.
|
|
|
61
62
|
|
|
62
63
|
2. **Implement** — Task, `model: "sonnet"`. Give the subagent: the plan steps, a
|
|
63
64
|
pointer to the wiki/orientation, and the coding guidelines (verbatim). Tell it
|
|
64
|
-
to make all the code changes in the working tree to satisfy the plan,
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
65
|
+
to make all the code changes in the working tree to satisfy the plan, then
|
|
66
|
+
self-verify by discovering and running the project's verification commands —
|
|
67
|
+
checking these sources in order: `package.json` scripts (look for `build`,
|
|
68
|
+
`typecheck`, `lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/` page that
|
|
69
|
+
mentions commands, and `Makefile`. Use whatever is present and appropriate for
|
|
70
|
+
this repo; do not hardcode specific command strings. Run each discovered
|
|
71
|
+
command and fix any errors that the edits introduced before returning. If no
|
|
72
|
+
build/test setup exists in this repo, note that and move on — do not fail. End
|
|
73
|
+
by reporting: the verification commands it ran and their pass/fail results,
|
|
74
|
+
which files it changed, and how each plan step was addressed. It must not
|
|
75
|
+
commit or push.
|
|
76
|
+
|
|
77
|
+
3. **Verify (build & tests)** — Task, `model: "sonnet"`, read-only. This step
|
|
78
|
+
gives the orchestrator an objective, independent build/test signal before the
|
|
79
|
+
subjective AC and quality reviews. Tell the subagent to:
|
|
80
|
+
- Discover the project's verification commands from `package.json` scripts
|
|
81
|
+
(look for `build`, `typecheck`, `lint`, `test`), `CLAUDE.md`,
|
|
82
|
+
`.flumecode/wiki/` (any page that mentions commands), and `Makefile`. Use
|
|
83
|
+
what is present; do not hardcode specific command strings.
|
|
84
|
+
- Run each discovered command and record: the exact command, whether it passed
|
|
85
|
+
or failed, and — for any failure — a short excerpt of the failing output
|
|
86
|
+
(enough to diagnose the problem).
|
|
87
|
+
- If no build/test setup exists in this repo, say so explicitly and pass the
|
|
88
|
+
gate.
|
|
89
|
+
- Return a structured per-check result: command, pass/fail, failing-output
|
|
90
|
+
excerpt (if any).
|
|
91
|
+
- Must not edit, create, or delete any files.
|
|
92
|
+
|
|
93
|
+
4. **Acceptance-criteria review** — Task, `model: "opus"`, read-only. Give the
|
|
69
94
|
subagent the full AC list and tell it to verify each one against the actual
|
|
70
95
|
changes (run `git --no-pager diff`, read the changed files, run tests/build if
|
|
71
96
|
useful). For **each** AC it must return: the criterion text verbatim, a verdict
|
|
@@ -82,32 +107,38 @@ the next step.
|
|
|
82
107
|
to return this as a clean, structured list so you can hand it straight to the
|
|
83
108
|
report step.
|
|
84
109
|
|
|
85
|
-
|
|
110
|
+
5. **Code-quality review** — Task, `model: "opus"`, read-only. Give the subagent
|
|
86
111
|
the coding guidelines (verbatim) and tell it to review the changes for
|
|
87
112
|
violations and quality problems, returning concrete findings as
|
|
88
113
|
`file:line — what — why`, each marked **must-fix** or **nice-to-have**.
|
|
89
114
|
|
|
90
|
-
|
|
115
|
+
6. **Fix loop.** If the Verify step (step 3) reports any failing check, the AC
|
|
116
|
+
review (step 4) reports any _not met_ AC, or the quality review (step 5)
|
|
91
117
|
reports any _must-fix_ finding: spawn an **Implement/fix** subagent (Task,
|
|
92
118
|
`model: "sonnet"`) whose prompt lists exactly those findings and tells it to
|
|
93
|
-
resolve them without regressing the rest.
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
`git --no-pager diff
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
119
|
+
resolve them without regressing the rest. When a Verify failure triggered the
|
|
120
|
+
fix, include the failing command(s) and their error output excerpt(s) from the
|
|
121
|
+
Verify result in the fix subagent's prompt so it has the full context. After
|
|
122
|
+
each fix iteration, re-run the Verify step (step 3) in addition to any AC or
|
|
123
|
+
quality review that failed. Repeat at most **2** times. If something still
|
|
124
|
+
fails after that, stop looping and record the gap honestly in the report — do
|
|
125
|
+
not hide it.
|
|
126
|
+
|
|
127
|
+
7. **Report** — Task, `model: "opus"`, read-only. Give the subagent the plan, the
|
|
128
|
+
Verify results (from step 3), the AC verdicts (from step 4), and the quality
|
|
129
|
+
findings, and tell it to run `git --no-pager diff` itself as the **single
|
|
130
|
+
source of truth** for the report. Every `evidence` hunk it submits must be
|
|
131
|
+
copied verbatim from that live diff — it must drop or correct any hunk carried
|
|
132
|
+
over from step 4 that no longer appears in the actual diff, and the **Files
|
|
133
|
+
changed** list must come from `git --no-pager diff --stat`, not from what an
|
|
134
|
+
earlier subagent claimed. **If `git --no-pager diff` is empty, the
|
|
135
|
+
implementation changed nothing:** the report must say so plainly — an honest
|
|
136
|
+
`summary`, no AC marked `met` with evidence — and must never describe edits
|
|
137
|
+
that aren't in the diff. Tell it to submit the user-facing report by calling
|
|
138
|
+
the **`submit_report`** tool — it has that tool available. It must call
|
|
139
|
+
`submit_report` exactly once and must not edit any files.
|
|
140
|
+
|
|
141
|
+
8. **Confirm and end.** Once the report subagent has called `submit_report`, you are
|
|
111
142
|
done — end your turn. The runner reads the submitted report, renders it, posts it
|
|
112
143
|
to the thread, and appends the pull-request link. (Your own final text is only a
|
|
113
144
|
fallback if no report was submitted, so make sure the subagent submits one.)
|
|
@@ -120,11 +151,11 @@ The report subagent calls `submit_report` with these fields:
|
|
|
120
151
|
- **`prose`** — markdown for the remaining sections, using `##` headings:
|
|
121
152
|
**What changed** (the plan steps, each mapped to the concrete changes that satisfy
|
|
122
153
|
it), **Code quality** (the quality-review outcome and anything left as
|
|
123
|
-
nice-to-have), **Files changed** (the list from the diff), **Build / tests** (
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
runner adds it.
|
|
154
|
+
nice-to-have), **Files changed** (the list from the diff), **Build / tests** (lists
|
|
155
|
+
each verification command and its final pass/fail result, or explains that no
|
|
156
|
+
build/test setup was found), and **Caveats / follow-ups** (anything deferred,
|
|
157
|
+
unmet, or worth a human's eyes). Do **not** put the acceptance-criteria section in
|
|
158
|
+
`prose`, and do **not** include a PR link — the runner adds it.
|
|
128
159
|
- **`acceptanceCriteria`** — one entry per AC from the plan, in plan order, each:
|
|
129
160
|
- `criterion` — the AC text verbatim.
|
|
130
161
|
- `status` — `"met"` / `"not_met"` / `"unclear"`, mirroring the AC review.
|
|
@@ -138,7 +169,7 @@ The report subagent calls `submit_report` with these fields:
|
|
|
138
169
|
|
|
139
170
|
- Delegate through Task subagents; don't implement, review, or write the report
|
|
140
171
|
yourself.
|
|
141
|
-
- Right model per phase: `sonnet` to implement/fix, `opus` to review/report.
|
|
172
|
+
- Right model per phase: `sonnet` to implement/fix/verify (Verify is read-only), `opus` to review/report.
|
|
142
173
|
- Make every Task prompt self-contained — subagents see only what you give them.
|
|
143
174
|
- Reviewers and the report writer never modify files.
|
|
144
175
|
- Never commit, push, or open a PR.
|
|
@@ -65,9 +65,12 @@ essentials:
|
|
|
65
65
|
- **Scope the work to the request.** This is a fine-tune of an existing
|
|
66
66
|
implementation, not a rebuild. Change only what the user asked for plus what that
|
|
67
67
|
change strictly requires; don't regress the rest of the plan.
|
|
68
|
-
- **Pipeline:** Implement (
|
|
69
|
-
|
|
70
|
-
(Task
|
|
68
|
+
- **Pipeline:** Implement (self-runs build/tests & fixes its own errors, Task
|
|
69
|
+
`model: "sonnet"`) → Verify (build/tests, read-only, Task `model: "sonnet"`) →
|
|
70
|
+
acceptance/quality review (Task `model: "opus"`, read-only) → fix loop if needed
|
|
71
|
+
(≤2, re-run Verify after each fix) → report (Task `model: "opus"`, read-only).
|
|
72
|
+
Detailed mechanics (command discovery, Verify step spec, fix-loop trigger
|
|
73
|
+
conditions) are in `implement-plan/SKILL.md` — read it for the full pipeline.
|
|
71
74
|
- **No git side effects.** Never commit, push, or open a PR — leave the changes in
|
|
72
75
|
the working tree. The runner commits them and updates the existing pull request.
|
|
73
76
|
|
|
@@ -76,11 +79,13 @@ essentials:
|
|
|
76
79
|
Your last message **is** the comment posted to the plan thread — write it for the
|
|
77
80
|
user:
|
|
78
81
|
|
|
79
|
-
- **Implemented:** a short report — what you changed and why, which files, and
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
82
|
+
- **Implemented:** a short report — what you changed and why, which files, and the
|
|
83
|
+
verification results: list each build/test command that was run and its final
|
|
84
|
+
pass/fail result (or note that no build/test setup was found). Base "what changed"
|
|
85
|
+
and "which files" on the actual `git --no-pager diff` (`--stat` for the file
|
|
86
|
+
list), not on what a subagent claimed; if the diff is empty, say nothing was
|
|
87
|
+
changed rather than describing edits that aren't there. The runner appends the
|
|
88
|
+
pull-request link, so don't add one.
|
|
84
89
|
- **Clarify / push back:** your question or reasoning, as prose (plus any widget).
|
|
85
90
|
- **Re-plan:** you called `submit_plan`; the rendered plan is posted automatically,
|
|
86
91
|
so keep any extra reply text minimal.
|