@flumecode/runner 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -63,7 +63,7 @@ skipping if that version is already on npm).
63
63
  6. Report the summary back (`POST /api/runner/jobs/:id/complete`), which fills in
64
64
  the pending agent comment in the thread.
65
65
 
66
- Jobs come in two kinds. **chat** jobs answer a request thread (the flow above).
66
+ Jobs come in two kinds. **comment** jobs answer a request thread (the flow above).
67
67
  **init** jobs bootstrap a repository: they clone the default branch onto a fresh
68
68
  `flumecode/init-*` branch, run the `flumecode:document` skill to create the
69
69
  `.flumecode/` wiki, and open a PR. A repo must be initialized (from its dashboard
package/dist/cli.js CHANGED
@@ -136,6 +136,19 @@ async function reportHeartbeat(config, claudeCode) {
136
136
  noteServerVersion(res);
137
137
  if (!res.ok) throw new Error(`heartbeat failed: ${res.status} ${await safeText(res)}`);
138
138
  }
139
+ async function uploadJobLog(config, jobId, content) {
140
+ const res = await fetch(`${config.serverUrl}/api/runner/jobs/${jobId}/logs`, {
141
+ method: "POST",
142
+ headers: {
143
+ authorization: `Bearer ${config.token}`,
144
+ "content-type": "application/json",
145
+ [RUNNER_VERSION_HEADER]: RUNNER_VERSION
146
+ },
147
+ body: JSON.stringify({ content })
148
+ });
149
+ noteServerVersion(res);
150
+ if (!res.ok) throw new Error(`log upload failed: ${res.status} ${await safeText(res)}`);
151
+ }
139
152
  async function safeText(res) {
140
153
  try {
141
154
  return await res.text();
@@ -252,59 +265,59 @@ var planInputSchema = {
252
265
  };
253
266
  var planSchema = z2.object(planInputSchema);
254
267
  function renderPlan(plan) {
255
- const lines = [];
256
- lines.push(`# ${plan.title}`);
257
- lines.push("");
258
- lines.push(`**Scope** \u2014 \`${plan.scope}\``);
259
- lines.push("");
260
- lines.push(`**Goal** \u2014 ${plan.goal}`);
268
+ const lines2 = [];
269
+ lines2.push(`# ${plan.title}`);
270
+ lines2.push("");
271
+ lines2.push(`**Scope** \u2014 \`${plan.scope}\``);
272
+ lines2.push("");
273
+ lines2.push(`**Goal** \u2014 ${plan.goal}`);
261
274
  if (plan.assumptions.length > 0) {
262
- lines.push("");
263
- lines.push("**Assumptions**");
275
+ lines2.push("");
276
+ lines2.push("**Assumptions**");
264
277
  for (const assumption of plan.assumptions) {
265
- lines.push(`- ${assumption}`);
278
+ lines2.push(`- ${assumption}`);
266
279
  }
267
280
  }
268
- lines.push("");
269
- lines.push("## Steps");
281
+ lines2.push("");
282
+ lines2.push("## Steps");
270
283
  for (const [i, step] of plan.steps.entries()) {
271
- lines.push("");
272
- lines.push(`### ${i + 1}. ${step.title}`);
273
- lines.push("");
274
- lines.push(step.description);
284
+ lines2.push("");
285
+ lines2.push(`### ${i + 1}. ${step.title}`);
286
+ lines2.push("");
287
+ lines2.push(step.description);
275
288
  if (step.pseudoCode && step.pseudoCode.length > 0) {
276
289
  for (const entry of step.pseudoCode) {
277
- lines.push("");
278
- lines.push(`\`${entry.file}\``);
279
- lines.push("");
280
- lines.push("```");
281
- lines.push(entry.pseudoCode);
282
- lines.push("```");
290
+ lines2.push("");
291
+ lines2.push(`\`${entry.file}\``);
292
+ lines2.push("");
293
+ lines2.push("```");
294
+ lines2.push(entry.pseudoCode);
295
+ lines2.push("```");
283
296
  }
284
297
  }
285
298
  }
286
- lines.push("");
287
- lines.push("## Acceptance criteria");
299
+ lines2.push("");
300
+ lines2.push("## Acceptance criteria");
288
301
  for (const criterion of plan.acceptanceCriteria) {
289
- lines.push(`- [ ] ${criterion}`);
302
+ lines2.push(`- [ ] ${criterion}`);
290
303
  }
291
304
  if (plan.risks.length > 0) {
292
- lines.push("");
293
- lines.push("**Risks / open questions**");
305
+ lines2.push("");
306
+ lines2.push("**Risks / open questions**");
294
307
  for (const risk of plan.risks) {
295
- lines.push(`- ${risk}`);
308
+ lines2.push(`- ${risk}`);
296
309
  }
297
310
  }
298
311
  if (plan.outOfScope.length > 0) {
299
- lines.push("");
300
- lines.push("**Out of scope**");
312
+ lines2.push("");
313
+ lines2.push("**Out of scope**");
301
314
  for (const item of plan.outOfScope) {
302
- lines.push(`- ${item}`);
315
+ lines2.push(`- ${item}`);
303
316
  }
304
317
  }
305
- lines.push("");
306
- lines.push(PLAN_MARKER);
307
- return lines.join("\n");
318
+ lines2.push("");
319
+ lines2.push(PLAN_MARKER);
320
+ return lines2.join("\n");
308
321
  }
309
322
  var submitPlanInputSchema = {
310
323
  plans: z2.array(z2.object(planInputSchema)).min(1).refine(
@@ -379,27 +392,27 @@ var reportInputSchema = {
379
392
  };
380
393
  var reportSchema = z3.object(reportInputSchema);
381
394
  function renderReport(report) {
382
- const lines = [];
383
- lines.push(report.summary.trim());
384
- lines.push("");
385
- lines.push(report.prose.trim());
386
- lines.push("");
387
- lines.push("## Acceptance criteria");
395
+ const lines2 = [];
396
+ lines2.push(report.summary.trim());
397
+ lines2.push("");
398
+ lines2.push(report.prose.trim());
399
+ lines2.push("");
400
+ lines2.push("## Acceptance criteria");
388
401
  for (const ac of report.acceptanceCriteria) {
389
- lines.push("");
390
- lines.push(`### ${STATUS_ICON[ac.status]} ${ac.criterion}`);
391
- lines.push("");
392
- lines.push(ac.rationale.trim());
402
+ lines2.push("");
403
+ lines2.push(`### ${STATUS_ICON[ac.status]} ${ac.criterion}`);
404
+ lines2.push("");
405
+ lines2.push(ac.rationale.trim());
393
406
  for (const ev of ac.evidence) {
394
- lines.push("");
395
- lines.push(ev.note ? `\`${ev.file}\` \u2014 ${ev.note}` : `\`${ev.file}\``);
396
- lines.push("");
397
- lines.push("```diff");
398
- lines.push(ev.hunk.replace(/\n+$/, ""));
399
- lines.push("```");
407
+ lines2.push("");
408
+ lines2.push(ev.note ? `\`${ev.file}\` \u2014 ${ev.note}` : `\`${ev.file}\``);
409
+ lines2.push("");
410
+ lines2.push("```diff");
411
+ lines2.push(ev.hunk.replace(/\n+$/, ""));
412
+ lines2.push("```");
400
413
  }
401
414
  }
402
- return lines.join("\n");
415
+ return lines2.join("\n");
403
416
  }
404
417
  function createReportTooling() {
405
418
  let submittedReport = null;
@@ -426,8 +439,46 @@ function createReportTooling() {
426
439
  return { mcpServer, getReport: () => submittedReport };
427
440
  }
428
441
 
442
+ // src/logger.ts
443
+ var lines = [];
444
+ var secrets = [];
445
+ var MAX_BYTES = 10 * 1024 * 1024;
446
+ function startJobLog(opts) {
447
+ lines = [];
448
+ secrets = opts.secrets.filter(Boolean);
449
+ logEvent("meta", `job ${opts.jobId} (${opts.kind}) started at ${(/* @__PURE__ */ new Date()).toISOString()}`);
450
+ }
451
+ function redact(s) {
452
+ for (const sec of secrets) {
453
+ s = s.split(sec).join("***REDACTED***");
454
+ }
455
+ return s;
456
+ }
457
+ function logEvent(section, text) {
458
+ lines.push(`[${(/* @__PURE__ */ new Date()).toISOString()}] [${section}] ${redact(text)}`);
459
+ }
460
+ function getJobLog() {
461
+ const full = lines.join("\n");
462
+ if (full.length <= MAX_BYTES) return full;
463
+ const half = Math.floor(MAX_BYTES / 2);
464
+ return full.slice(0, half) + `
465
+
466
+ \u2026[truncated ${full.length - MAX_BYTES} bytes]\u2026
467
+
468
+ ` + full.slice(-half);
469
+ }
470
+
429
471
  // src/executor.ts
430
472
  var FLUME_PLUGIN_DIR = fileURLToPath2(new URL("../skills-plugin", import.meta.url));
473
+ function stringifyResult(content) {
474
+ if (typeof content === "string") return content;
475
+ if (Array.isArray(content)) {
476
+ return content.map(
477
+ (c) => typeof c === "object" && c !== null && "text" in c ? String(c.text) : JSON.stringify(c)
478
+ ).join("\n");
479
+ }
480
+ return JSON.stringify(content);
481
+ }
431
482
  async function runClaudeCode(opts) {
432
483
  let finalText = "";
433
484
  const { mcpServer, collected } = createWidgetTooling();
@@ -463,11 +514,26 @@ async function runClaudeCode(opts) {
463
514
  for (const block of content) {
464
515
  if (block && block.type === "text" && typeof block.text === "string") {
465
516
  process.stdout.write(block.text);
517
+ logEvent("agent", block.text);
518
+ } else if (block && block.type === "tool_use") {
519
+ logEvent("tool_use", `${block.name} ${JSON.stringify(block.input)}`);
520
+ }
521
+ }
522
+ }
523
+ } else if (message.type === "user") {
524
+ const content = message.message?.content;
525
+ if (Array.isArray(content)) {
526
+ for (const block of content) {
527
+ if (block && block.type === "tool_result") {
528
+ logEvent("tool_result", stringifyResult(block.content));
466
529
  }
467
530
  }
468
531
  }
469
532
  } else if (message.type === "result") {
470
533
  finalText = message.result ?? "";
534
+ logEvent("result", finalText);
535
+ } else if (message.type === "system") {
536
+ logEvent("system", JSON.stringify(message));
471
537
  }
472
538
  }
473
539
  process.stdout.write("\n");
@@ -547,18 +613,18 @@ function turnHeading(turn, agentName) {
547
613
  if (turn.kind === "report") return `${agentName} (implementation report)`;
548
614
  return agentName;
549
615
  }
550
- function appendThread(lines, ctx) {
616
+ function appendThread(lines2, ctx) {
551
617
  if (!ctx.thread || ctx.thread.length === 0) return;
552
- lines.push("", "# Conversation so far");
618
+ lines2.push("", "# Conversation so far");
553
619
  for (const turn of ctx.thread) {
554
- lines.push("", `## ${turnHeading(turn, ctx.agentName)}`, turn.content);
620
+ lines2.push("", `## ${turnHeading(turn, ctx.agentName)}`, turn.content);
555
621
  }
556
622
  }
557
623
  function buildPrompt(ctx) {
558
624
  const task = ctx.permissionMode === "plan" ? `Use the \`flumecode:request-to-plan\` skill to handle this request. You are read-only and cannot modify files \u2014 clarify any ambiguity with the user first, then produce a concrete, actionable plan (the specific changes you would make and why). Cite the relevant files. Do NOT call ExitPlanMode or write the plan to a file. When the plan is ready, call the \`submit_plan\` tool with the structured plan fields; the runner renders it into the canonical plan markdown and posts it as your comment.` : `Use the \`flumecode:implement-plan\` skill to handle this request. You are the ORCHESTRATOR: do not implement, review, or write the report yourself \u2014 follow the skill to delegate each phase to subagents via the Task tool, picking the right model for each. Do not commit or push \u2014 the runner handles that.`;
559
625
  const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to this request. If there is no wiki, work from the code directly.`;
560
626
  const widgets = `When you need the user to choose, ask it as a widget rather than writing the options as prose: call \`single_select\` for a one-of-N choice (radio buttons) or \`multi_select\` for a "select all that apply" choice (checkboxes). Don't add your own "Other" option \u2014 the UI always provides one. After calling a widget tool, end your turn \u2014 the user's answer comes back as their next message and starts a fresh run.`;
561
- const lines = [
627
+ const lines2 = [
562
628
  `You are "${ctx.agentName}", an autonomous coding agent working inside a FlumeCode request.`,
563
629
  `The repository ${ctx.repo.fullName} is checked out in your current working directory on branch "${ctx.repo.checkoutBranch}" at commit ${ctx.repo.checkoutSha.slice(0, 7)}.`,
564
630
  task,
@@ -566,29 +632,29 @@ function buildPrompt(ctx) {
566
632
  widgets
567
633
  ];
568
634
  if (ctx.permissionMode !== "plan") {
569
- lines.push(
635
+ lines2.push(
570
636
  "",
571
637
  "These coding guidelines apply to all code produced in this run:",
572
638
  "",
573
639
  loadRule("coding-guideline")
574
640
  );
575
641
  }
576
- lines.push("", `# Request: ${ctx.request?.title ?? ""}`);
642
+ lines2.push("", `# Request: ${ctx.request?.title ?? ""}`);
577
643
  if (ctx.request?.body) {
578
- lines.push("", ctx.request.body);
644
+ lines2.push("", ctx.request.body);
579
645
  }
580
- appendThread(lines, ctx);
581
- lines.push(
646
+ appendThread(lines2, ctx);
647
+ lines2.push(
582
648
  "",
583
649
  ctx.permissionMode === "plan" ? "Your final reply is posted verbatim as your comment in the thread \u2014 if you called `submit_plan`, the rendered plan is posted automatically; for clarifying questions, your reply text is posted as-is." : "Your final reply is posted verbatim as your comment in the thread \u2014 make it the implementation report your report subagent produced, with nothing added. The runner appends the pull-request link."
584
650
  );
585
- return lines.join("\n");
651
+ return lines2.join("\n");
586
652
  }
587
653
  function buildRevisePrompt(ctx) {
588
654
  const task = `Use the \`flumecode:revise-implementation\` skill to handle this turn. The plan below was already implemented (its implementation report appears in the conversation below, tagged as such); the user is now asking to fine-tune that implementation. Decide how to respond to their latest message: if it's unclear, ask a clarifying question (as a widget); if it's a bad idea or not feasible, push back with your reasoning; if it warrants rethinking the plan, call \`submit_plan\` with a revised plan; otherwise implement the requested change. When you implement, you are the ORCHESTRATOR: delegate the work to subagents via the Task tool as the skill directs, and do not commit or push \u2014 the runner handles that, updating the existing pull request.`;
589
655
  const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to this change. If there is no wiki, work from the code directly.`;
590
656
  const widgets = `When you need the user to choose, ask it as a widget rather than writing the options as prose: call \`single_select\` for a one-of-N choice (radio buttons) or \`multi_select\` for a "select all that apply" choice (checkboxes). Don't add your own "Other" option \u2014 the UI always provides one. After calling a widget tool, end your turn \u2014 the user's answer comes back as their next message and starts a fresh run.`;
591
- const lines = [
657
+ const lines2 = [
592
658
  `You are "${ctx.agentName}", an autonomous coding agent fine-tuning an implemented FlumeCode plan in an ongoing thread with the user.`,
593
659
  `The repository ${ctx.repo.fullName} is checked out in your current working directory on the plan's implementation branch "${ctx.repo.checkoutBranch}" \u2014 the same branch its open pull request is built from, so any change you push updates that PR.`,
594
660
  task,
@@ -602,20 +668,20 @@ function buildRevisePrompt(ctx) {
602
668
  `# Plan: ${ctx.request?.title ?? ""}`
603
669
  ];
604
670
  if (ctx.request?.body) {
605
- lines.push("", ctx.request.body);
671
+ lines2.push("", ctx.request.body);
606
672
  }
607
- appendThread(lines, ctx);
608
- lines.push(
673
+ appendThread(lines2, ctx);
674
+ lines2.push(
609
675
  "",
610
676
  "The last message above is the user's request for this turn. Your final reply is posted verbatim as your comment in the plan thread: if you implemented a change, make it a short report of what you changed (the runner appends the pull-request link); if you asked a question, called `submit_plan`, or pushed back, your reply text is posted as-is."
611
677
  );
612
- return lines.join("\n");
678
+ return lines2.join("\n");
613
679
  }
614
680
  function buildResolvePrompt(ctx) {
615
681
  const mergeBranch = ctx.repo.mergeBranch ?? "the merge branch";
616
682
  const task = `Use the \`flumecode:resolve-merge-conflict\` skill to handle this turn. A merge of \`${mergeBranch}\` into this branch is IN PROGRESS and has left conflict markers in your working tree. Resolve every conflicted file by correctly integrating BOTH sides \u2014 the change this session implemented (described below) and the incoming changes from \`${mergeBranch}\` \u2014 never blindly discard either side. Remove all conflict markers and verify the result builds and tests pass. Do NOT \`git add\`, commit, push, or open a pull request \u2014 the runner finalizes the merge commit and updates the existing pull request.`;
617
683
  const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to the conflicting code. If there is no wiki, work from the code directly.`;
618
- const lines = [
684
+ const lines2 = [
619
685
  `You are "${ctx.agentName}", an autonomous coding agent resolving merge conflicts on an implemented FlumeCode plan.`,
620
686
  `The repository ${ctx.repo.fullName} is checked out in your current working directory on the plan's implementation branch "${ctx.repo.checkoutBranch}" \u2014 the same branch its open pull request is built from \u2014 with an in-progress merge of "${mergeBranch}".`,
621
687
  task,
@@ -628,17 +694,17 @@ function buildResolvePrompt(ctx) {
628
694
  `# Plan: ${ctx.request?.title ?? ""}`
629
695
  ];
630
696
  if (ctx.request?.body) {
631
- lines.push("", ctx.request.body);
697
+ lines2.push("", ctx.request.body);
632
698
  }
633
- appendThread(lines, ctx);
634
- lines.push(
699
+ appendThread(lines2, ctx);
700
+ lines2.push(
635
701
  "",
636
702
  "Resolve the conflicts now. Your final reply is posted as a report in the plan thread: summarize which files conflicted and how you resolved each (the runner appends the pull-request link, so don't add one)."
637
703
  );
638
- return lines.join("\n");
704
+ return lines2.join("\n");
639
705
  }
640
706
  function buildDocumentPrompt(ctx) {
641
- const lines = [
707
+ const lines2 = [
642
708
  `You are "${ctx.agentName}" maintaining the repository wiki for ${ctx.repo.fullName}.`,
643
709
  `An implementation just ran in this working directory to satisfy the request below; its changes are uncommitted in the working tree.`,
644
710
  `Use the \`flumecode:document\` skill to bring the wiki in sync with those changes. Only edit files under \`.flumecode/wiki/\` \u2014 do not touch application code. The runner commits the wiki alongside the implementation in the same pull request.`,
@@ -646,14 +712,14 @@ function buildDocumentPrompt(ctx) {
646
712
  `# Request: ${ctx.request?.title ?? ""}`
647
713
  ];
648
714
  if (ctx.request?.body) {
649
- lines.push("", ctx.request.body);
715
+ lines2.push("", ctx.request.body);
650
716
  }
651
- appendThread(lines, ctx);
652
- lines.push("", "When done, reply with a one- or two-line summary of the wiki changes you made.");
653
- return lines.join("\n");
717
+ appendThread(lines2, ctx);
718
+ lines2.push("", "When done, reply with a one- or two-line summary of the wiki changes you made.");
719
+ return lines2.join("\n");
654
720
  }
655
721
  function buildRepairPrompt(ctx, hookLog) {
656
- const lines = [
722
+ const lines2 = [
657
723
  `You are "${ctx.agentName}", fixing a failed pre-commit check in the repository ${ctx.repo.fullName}, checked out in your current working directory.`,
658
724
  `The changes from the previous step are still uncommitted in the working tree. When the runner tried to commit them, the repository's pre-commit hook \u2014 which runs the project's own checks (lint / typecheck / unit tests) \u2014 failed. Make the working tree pass those checks: fix the failing code or tests at their root. Do NOT delete or skip tests, weaken assertions, or disable the checks to silence the failure. Preserve the intent of the original change; repair only what's broken. Do NOT commit or push \u2014 the runner re-commits once the checks pass.`,
659
725
  "",
@@ -669,13 +735,13 @@ function buildRepairPrompt(ctx, hookLog) {
669
735
  "",
670
736
  "When done, reply with a one-line summary of what you fixed."
671
737
  ];
672
- return lines.join("\n");
738
+ return lines2.join("\n");
673
739
  }
674
740
  function buildReleasePrompt(ctx, baseChecks) {
675
741
  const task = `Use the \`flumecode:create-release\` skill to handle this turn. You are driving a release: first analyse commits since the last tag, propose version bumps, and ask the user to confirm via widgets (Phase 1); once the user's widget answers appear in the thread, apply the bumps to package.json files and update CHANGELOG.md (Phase 2). Do NOT commit or push \u2014 the runner handles that and opens the bump PR.`;
676
742
  const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to this release. If there is no wiki, work from the code directly.`;
677
743
  const widgets = `When you need the user to choose, ask it as a widget rather than writing the options as prose: call \`single_select\` for a one-of-N choice (radio buttons) or \`multi_select\` for a "select all that apply" choice (checkboxes). Don't add your own "Other" option \u2014 the UI always provides one. After calling a widget tool, end your turn \u2014 the user's answer comes back as their next message and starts a fresh run.`;
678
- const lines = [
744
+ const lines2 = [
679
745
  `You are "${ctx.agentName}", an autonomous coding agent driving a FlumeCode release.`,
680
746
  `The repository ${ctx.repo.fullName} is checked out in your current working directory on the release bump branch "${ctx.repo.checkoutBranch}".`,
681
747
  task,
@@ -689,10 +755,10 @@ function buildReleasePrompt(ctx, baseChecks) {
689
755
  `# Release: ${ctx.request?.title ?? ""}`
690
756
  ];
691
757
  if (ctx.request?.body) {
692
- lines.push("", ctx.request.body);
758
+ lines2.push("", ctx.request.body);
693
759
  }
694
760
  if (baseChecks && !baseChecks.ok) {
695
- lines.push(
761
+ lines2.push(
696
762
  "",
697
763
  "# Pre-release check status",
698
764
  "",
@@ -708,12 +774,12 @@ function buildReleasePrompt(ctx, baseChecks) {
708
774
  "```"
709
775
  );
710
776
  }
711
- appendThread(lines, ctx);
712
- lines.push(
777
+ appendThread(lines2, ctx);
778
+ lines2.push(
713
779
  "",
714
780
  "Your final reply is posted verbatim as your comment in the release thread \u2014 if you called widgets (Phase 1), your reply text accompanies the questions; if you applied the bumps (Phase 2), make it the report the skill produced. The runner appends the pull-request link."
715
781
  );
716
- return lines.join("\n");
782
+ return lines2.join("\n");
717
783
  }
718
784
  function buildInitPrompt(ctx) {
719
785
  return [
@@ -740,13 +806,20 @@ var exec = promisify(execFile);
740
806
  var WORKSPACE_PREFIX = "flume-runner-";
741
807
  var MAX_BUFFER = 1 << 24;
742
808
  async function git(args) {
743
- return exec("git", args, { maxBuffer: MAX_BUFFER });
809
+ logEvent("git", `git ${args.join(" ")}`);
810
+ try {
811
+ const result = await exec("git", args, { maxBuffer: MAX_BUFFER });
812
+ if (result.stdout.trim()) logEvent("git:out", result.stdout.trim());
813
+ if (result.stderr.trim()) logEvent("git:err", result.stderr.trim());
814
+ return result;
815
+ } catch (err) {
816
+ logEvent("git:err", String(err.stderr ?? err));
817
+ throw err;
818
+ }
744
819
  }
745
- var RUNNER_GIT_EMAIL = "runner@flumecode.local";
746
- var RUNNER_GIT_NAME = "FlumeCode Runner";
747
- async function ensureGitIdentity(dir) {
748
- await git(["-C", dir, "config", "user.email", RUNNER_GIT_EMAIL]);
749
- await git(["-C", dir, "config", "user.name", RUNNER_GIT_NAME]);
820
+ async function ensureGitIdentity(dir, identity) {
821
+ await git(["-C", dir, "config", "user.email", identity.email]);
822
+ await git(["-C", dir, "config", "user.name", identity.name]);
750
823
  }
751
824
  function cloneUrl(ctx) {
752
825
  const { owner, name, cloneToken } = ctx.repo;
@@ -764,10 +837,21 @@ async function installDependencies(dir) {
764
837
  const manager = detectPackageManager(dir);
765
838
  if (manager === null) return { status: "skipped" };
766
839
  const env = { ...process.env, CI: "1", ADBLOCK: "1", DISABLE_OPENCOLLECTIVE: "1" };
840
+ logEvent("install", `${manager} install`);
767
841
  try {
768
- await exec(manager, ["install"], { cwd: dir, maxBuffer: MAX_BUFFER, env, timeout: 5 * 6e4 });
842
+ const result = await exec(manager, ["install"], {
843
+ cwd: dir,
844
+ maxBuffer: MAX_BUFFER,
845
+ env,
846
+ timeout: 5 * 6e4
847
+ });
848
+ if (result.stdout.trim()) logEvent("install:out", result.stdout.trim());
849
+ if (result.stderr.trim()) logEvent("install:err", result.stderr.trim());
769
850
  return { status: "installed", manager };
770
851
  } catch (err) {
852
+ const e = err;
853
+ const detail = [e.stdout, e.stderr].map((s) => typeof s === "string" ? s.trim() : "").filter(Boolean).join("\n");
854
+ logEvent("install:err", detail || (err instanceof Error ? err.message : String(err)));
771
855
  return { status: "failed", manager, error: err instanceof Error ? err.message : String(err) };
772
856
  }
773
857
  }
@@ -809,22 +893,24 @@ async function resetWorkspace(dir) {
809
893
  });
810
894
  }
811
895
  async function prepareAtSha(ctx, dir, reused) {
896
+ const identity = { name: ctx.agentName, email: ctx.agentEmail };
812
897
  if (!reused) {
813
898
  await cloneAtSha(ctx, dir);
814
- await ensureGitIdentity(dir);
899
+ await ensureGitIdentity(dir, identity);
815
900
  return;
816
901
  }
817
902
  await git(["-C", dir, "remote", "set-url", "origin", cloneUrl(ctx)]);
818
- await ensureGitIdentity(dir);
903
+ await ensureGitIdentity(dir, identity);
819
904
  }
820
905
  async function prepareResumingBranch(ctx, dir, reused) {
906
+ const identity = { name: ctx.agentName, email: ctx.agentEmail };
821
907
  if (!reused) {
822
908
  const result = await cloneResumingBranch(ctx, dir);
823
- await ensureGitIdentity(dir);
909
+ await ensureGitIdentity(dir, identity);
824
910
  return result;
825
911
  }
826
912
  await git(["-C", dir, "remote", "set-url", "origin", cloneUrl(ctx)]);
827
- await ensureGitIdentity(dir);
913
+ await ensureGitIdentity(dir, identity);
828
914
  return { resumed: true };
829
915
  }
830
916
  async function sweepWorkspaces() {
@@ -887,10 +973,16 @@ ${e.message ?? ""}`;
887
973
  async function runRepoChecks(dir) {
888
974
  try {
889
975
  await git(["-C", dir, "hook", "run", "pre-commit"]);
976
+ logEvent("checks", "pre-commit hook passed");
890
977
  return { ok: true, log: "", skipped: false };
891
978
  } catch (err) {
892
- if (isUnsupportedGitSubcommand(err)) return { ok: true, log: "", skipped: true };
893
- return { ok: false, log: commitFailureLog(err), skipped: false };
979
+ if (isUnsupportedGitSubcommand(err)) {
980
+ logEvent("checks", "pre-commit hook skipped (git too old)");
981
+ return { ok: true, log: "", skipped: true };
982
+ }
983
+ const log = commitFailureLog(err);
984
+ logEvent("checks:err", log);
985
+ return { ok: false, log, skipped: false };
894
986
  }
895
987
  }
896
988
  async function commitChanges(ctx, dir) {
@@ -1401,6 +1493,11 @@ async function pollLoop(config) {
1401
1493
  await sleep(IDLE_MS);
1402
1494
  continue;
1403
1495
  }
1496
+ startJobLog({
1497
+ jobId: ctx.jobId,
1498
+ kind: ctx.kind,
1499
+ secrets: [ctx.repo?.cloneToken ?? ""].filter(Boolean)
1500
+ });
1404
1501
  const abort = new AbortController();
1405
1502
  let stopPolling = false;
1406
1503
  const scheduleCancelPoll = () => {
@@ -1450,6 +1547,13 @@ async function pollLoop(config) {
1450
1547
  }
1451
1548
  } finally {
1452
1549
  stopPolling = true;
1550
+ if (!abort.signal.aborted) {
1551
+ try {
1552
+ await uploadJobLog(config, ctx.jobId, getJobLog());
1553
+ } catch (e) {
1554
+ console.error(` (failed to upload logs: ${errorMessage2(e)})`);
1555
+ }
1556
+ }
1453
1557
  }
1454
1558
  }
1455
1559
  } finally {
@@ -1467,9 +1571,9 @@ var MAX_HOOK_LOG_CHARS = 4e3;
1467
1571
  function trimHookLog(log) {
1468
1572
  let trimmed = log.trimEnd();
1469
1573
  let elided = false;
1470
- const lines = trimmed.split("\n");
1471
- if (lines.length > MAX_HOOK_LOG_LINES) {
1472
- trimmed = lines.slice(-MAX_HOOK_LOG_LINES).join("\n");
1574
+ const lines2 = trimmed.split("\n");
1575
+ if (lines2.length > MAX_HOOK_LOG_LINES) {
1576
+ trimmed = lines2.slice(-MAX_HOOK_LOG_LINES).join("\n");
1473
1577
  elided = true;
1474
1578
  }
1475
1579
  if (trimmed.length > MAX_HOOK_LOG_CHARS) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flumecode/runner",
3
- "version": "0.8.0",
3
+ "version": "0.10.0",
4
4
  "type": "module",
5
5
  "description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
6
6
  "bin": {
@@ -31,10 +31,11 @@ put it in the prompt, the subagent doesn't have it.
31
31
 
32
32
  - Spawn each phase with the **Task** tool, `subagent_type: "general-purpose"`.
33
33
  - **Model per phase** (pass it as the Task `model` argument):
34
- - `"sonnet"` — implementation and fixes (the code-writing work).
34
+ - `"sonnet"` — implementation, fixes, and the Verify step (mechanical
35
+ command-running; Verify is read-only even though it uses sonnet).
35
36
  - `"opus"` — acceptance-criteria review, code-quality review, and the report.
36
- - **Reviewers are read-only.** Tell every review/report subagent to _inspect and
37
- report only — never edit, create, or delete files_. Only implementation/fix
37
+ - **Read-only phases.** Tell every review, Verify, and report subagent to _inspect
38
+ and report only — never edit, create, or delete files_. Only implementation/fix
38
39
  subagents may change the working tree.
39
40
  - **No git side effects.** Neither you nor any subagent may commit, push, or open
40
41
  a PR. Leave the changes in the working tree; the runner commits + opens the PR
@@ -61,11 +62,35 @@ the next step.
61
62
 
62
63
  2. **Implement** — Task, `model: "sonnet"`. Give the subagent: the plan steps, a
63
64
  pointer to the wiki/orientation, and the coding guidelines (verbatim). Tell it
64
- to make all the code changes in the working tree to satisfy the plan, keep the
65
- build and tests green where practical, and end by reporting which files it
66
- changed and how each step was addressed. It must not commit or push.
67
-
68
- 3. **Acceptance-criteria review** Task, `model: "opus"`, read-only. Give the
65
+ to make all the code changes in the working tree to satisfy the plan, then
66
+ self-verify by discovering and running the project's verification commands
67
+ checking these sources in order: `package.json` scripts (look for `build`,
68
+ `typecheck`, `lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/` page that
69
+ mentions commands, and `Makefile`. Use whatever is present and appropriate for
70
+ this repo; do not hardcode specific command strings. Run each discovered
71
+ command and fix any errors that the edits introduced before returning. If no
72
+ build/test setup exists in this repo, note that and move on — do not fail. End
73
+ by reporting: the verification commands it ran and their pass/fail results,
74
+ which files it changed, and how each plan step was addressed. It must not
75
+ commit or push.
76
+
77
+ 3. **Verify (build & tests)** — Task, `model: "sonnet"`, read-only. This step
78
+ gives the orchestrator an objective, independent build/test signal before the
79
+ subjective AC and quality reviews. Tell the subagent to:
80
+ - Discover the project's verification commands from `package.json` scripts
81
+ (look for `build`, `typecheck`, `lint`, `test`), `CLAUDE.md`,
82
+ `.flumecode/wiki/` (any page that mentions commands), and `Makefile`. Use
83
+ what is present; do not hardcode specific command strings.
84
+ - Run each discovered command and record: the exact command, whether it passed
85
+ or failed, and — for any failure — a short excerpt of the failing output
86
+ (enough to diagnose the problem).
87
+ - If no build/test setup exists in this repo, say so explicitly and pass the
88
+ gate.
89
+ - Return a structured per-check result: command, pass/fail, failing-output
90
+ excerpt (if any).
91
+ - Must not edit, create, or delete any files.
92
+
93
+ 4. **Acceptance-criteria review** — Task, `model: "opus"`, read-only. Give the
69
94
  subagent the full AC list and tell it to verify each one against the actual
70
95
  changes (run `git --no-pager diff`, read the changed files, run tests/build if
71
96
  useful). For **each** AC it must return: the criterion text verbatim, a verdict
@@ -82,32 +107,38 @@ the next step.
82
107
  to return this as a clean, structured list so you can hand it straight to the
83
108
  report step.
84
109
 
85
- 4. **Code-quality review** — Task, `model: "opus"`, read-only. Give the subagent
110
+ 5. **Code-quality review** — Task, `model: "opus"`, read-only. Give the subagent
86
111
  the coding guidelines (verbatim) and tell it to review the changes for
87
112
  violations and quality problems, returning concrete findings as
88
113
  `file:line — what — why`, each marked **must-fix** or **nice-to-have**.
89
114
 
90
- 5. **Fix loop.** If the AC review reports any _not met_ AC, or the quality review
115
+ 6. **Fix loop.** If the Verify step (step 3) reports any failing check, the AC
116
+ review (step 4) reports any _not met_ AC, or the quality review (step 5)
91
117
  reports any _must-fix_ finding: spawn an **Implement/fix** subagent (Task,
92
118
  `model: "sonnet"`) whose prompt lists exactly those findings and tells it to
93
- resolve them without regressing the rest. Then re-run only the review(s) that
94
- failed. Repeat at most **2** times. If something still fails after that, stop
95
- looping and record the gap honestly in the report do not hide it.
96
-
97
- 6. **Report** Task, `model: "opus"`, read-only. Give the subagent the plan, the AC
98
- verdicts (from step 3), and the quality findings, and tell it to run
99
- `git --no-pager diff` itself as the **single source of truth** for the report.
100
- Every `evidence` hunk it submits must be copied verbatim from that live diff — it
101
- must drop or correct any hunk carried over from step 3 that no longer appears in
102
- the actual diff, and the **Files changed** list must come from
103
- `git --no-pager diff --stat`, not from what an earlier subagent claimed. **If
104
- `git --no-pager diff` is empty, the implementation changed nothing:** the report
105
- must say so plainly an honest `summary`, no AC marked `met` with evidence — and
106
- must never describe edits that aren't in the diff. Tell it to submit the
107
- user-facing report by calling the **`submit_report`** tool it has that tool
108
- available. It must call `submit_report` exactly once and must not edit any files.
109
-
110
- 7. **Confirm and end.** Once the report subagent has called `submit_report`, you are
119
+ resolve them without regressing the rest. When a Verify failure triggered the
120
+ fix, include the failing command(s) and their error output excerpt(s) from the
121
+ Verify result in the fix subagent's prompt so it has the full context. After
122
+ each fix iteration, re-run the Verify step (step 3) in addition to any AC or
123
+ quality review that failed. Repeat at most **2** times. If something still
124
+ fails after that, stop looping and record the gap honestly in the report do
125
+ not hide it.
126
+
127
+ 7. **Report** Task, `model: "opus"`, read-only. Give the subagent the plan, the
128
+ Verify results (from step 3), the AC verdicts (from step 4), and the quality
129
+ findings, and tell it to run `git --no-pager diff` itself as the **single
130
+ source of truth** for the report. Every `evidence` hunk it submits must be
131
+ copied verbatim from that live diff it must drop or correct any hunk carried
132
+ over from step 4 that no longer appears in the actual diff, and the **Files
133
+ changed** list must come from `git --no-pager diff --stat`, not from what an
134
+ earlier subagent claimed. **If `git --no-pager diff` is empty, the
135
+ implementation changed nothing:** the report must say so plainly — an honest
136
+ `summary`, no AC marked `met` with evidence and must never describe edits
137
+ that aren't in the diff. Tell it to submit the user-facing report by calling
138
+ the **`submit_report`** tool — it has that tool available. It must call
139
+ `submit_report` exactly once and must not edit any files.
140
+
141
+ 8. **Confirm and end.** Once the report subagent has called `submit_report`, you are
111
142
  done — end your turn. The runner reads the submitted report, renders it, posts it
112
143
  to the thread, and appends the pull-request link. (Your own final text is only a
113
144
  fallback if no report was submitted, so make sure the subagent submits one.)
@@ -120,11 +151,11 @@ The report subagent calls `submit_report` with these fields:
120
151
  - **`prose`** — markdown for the remaining sections, using `##` headings:
121
152
  **What changed** (the plan steps, each mapped to the concrete changes that satisfy
122
153
  it), **Code quality** (the quality-review outcome and anything left as
123
- nice-to-have), **Files changed** (the list from the diff), **Build / tests** (what
124
- was run and the result, or why it wasn't run), and **Caveats / follow-ups**
125
- (anything deferred, unmet, or worth a human's eyes). Do **not** put the
126
- acceptance-criteria section in `prose`, and do **not** include a PR link — the
127
- runner adds it.
154
+ nice-to-have), **Files changed** (the list from the diff), **Build / tests** (lists
155
+ each verification command and its final pass/fail result, or explains that no
156
+ build/test setup was found), and **Caveats / follow-ups** (anything deferred,
157
+ unmet, or worth a human's eyes). Do **not** put the acceptance-criteria section in
158
+ `prose`, and do **not** include a PR link — the runner adds it.
128
159
  - **`acceptanceCriteria`** — one entry per AC from the plan, in plan order, each:
129
160
  - `criterion` — the AC text verbatim.
130
161
  - `status` — `"met"` / `"not_met"` / `"unclear"`, mirroring the AC review.
@@ -138,7 +169,7 @@ The report subagent calls `submit_report` with these fields:
138
169
 
139
170
  - Delegate through Task subagents; don't implement, review, or write the report
140
171
  yourself.
141
- - Right model per phase: `sonnet` to implement/fix, `opus` to review/report.
172
+ - Right model per phase: `sonnet` to implement/fix/verify (Verify is read-only), `opus` to review/report.
142
173
  - Make every Task prompt self-contained — subagents see only what you give them.
143
174
  - Reviewers and the report writer never modify files.
144
175
  - Never commit, push, or open a PR.
@@ -65,9 +65,12 @@ essentials:
65
65
  - **Scope the work to the request.** This is a fine-tune of an existing
66
66
  implementation, not a rebuild. Change only what the user asked for plus what that
67
67
  change strictly requires; don't regress the rest of the plan.
68
- - **Pipeline:** Implement (Task, `model: "sonnet"`) acceptance/quality review of
69
- the change (Task, `model: "opus"`, read-only) → fix loop if needed (≤2) → report
70
- (Task, `model: "opus"`, read-only). Reviewers and the report writer never edit.
68
+ - **Pipeline:** Implement (self-runs build/tests & fixes its own errors, Task
69
+ `model: "sonnet"`) → Verify (build/tests, read-only, Task `model: "sonnet"`) →
70
+ acceptance/quality review (Task `model: "opus"`, read-only) fix loop if needed
71
+ (≤2, re-run Verify after each fix) → report (Task `model: "opus"`, read-only).
72
+ Detailed mechanics (command discovery, Verify step spec, fix-loop trigger
73
+ conditions) are in `implement-plan/SKILL.md` — read it for the full pipeline.
71
74
  - **No git side effects.** Never commit, push, or open a PR — leave the changes in
72
75
  the working tree. The runner commits them and updates the existing pull request.
73
76
 
@@ -76,11 +79,13 @@ essentials:
76
79
  Your last message **is** the comment posted to the plan thread — write it for the
77
80
  user:
78
81
 
79
- - **Implemented:** a short report — what you changed and why, which files, and how
80
- it was verified (build/tests). Base "what changed" and "which files" on the actual
81
- `git --no-pager diff` (`--stat` for the file list), not on what a subagent claimed;
82
- if the diff is empty, say nothing was changed rather than describing edits that
83
- aren't there. The runner appends the pull-request link, so don't add one.
82
+ - **Implemented:** a short report — what you changed and why, which files, and the
83
+ verification results: list each build/test command that was run and its final
84
+ pass/fail result (or note that no build/test setup was found). Base "what changed"
85
+ and "which files" on the actual `git --no-pager diff` (`--stat` for the file
86
+ list), not on what a subagent claimed; if the diff is empty, say nothing was
87
+ changed rather than describing edits that aren't there. The runner appends the
88
+ pull-request link, so don't add one.
84
89
  - **Clarify / push back:** your question or reasoning, as prose (plus any widget).
85
90
  - **Re-plan:** you called `submit_plan`; the rendered plan is posted automatically,
86
91
  so keep any extra reply text minimal.