npm - @flumecode/runner - Versions diffs - 0.8.0 → 0.10.0 - Mend

@flumecode/runner 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +1 -1
package/dist/cli.js +203 -99
package/package.json +1 -1
package/skills-plugin/skills/implement-plan/SKILL.md +65 -34
package/skills-plugin/skills/revise-implementation/SKILL.md +13 -8

package/README.md CHANGED Viewed

@@ -63,7 +63,7 @@ skipping if that version is already on npm).
 6. Report the summary back (`POST /api/runner/jobs/:id/complete`), which fills in
    the pending agent comment in the thread.
-Jobs come in two kinds. **chat** jobs answer a request thread (the flow above).
+Jobs come in two kinds. **comment** jobs answer a request thread (the flow above).
 **init** jobs bootstrap a repository: they clone the default branch onto a fresh
 `flumecode/init-*` branch, run the `flumecode:document` skill to create the
 `.flumecode/` wiki, and open a PR. A repo must be initialized (from its dashboard

package/dist/cli.js CHANGED Viewed

@@ -136,6 +136,19 @@ async function reportHeartbeat(config, claudeCode) {
   noteServerVersion(res);
   if (!res.ok) throw new Error(`heartbeat failed: ${res.status} ${await safeText(res)}`);
 }
+async function uploadJobLog(config, jobId, content) {
+  const res = await fetch(`${config.serverUrl}/api/runner/jobs/${jobId}/logs`, {
+    method: "POST",
+    headers: {
+      authorization: `Bearer ${config.token}`,
+      "content-type": "application/json",
+      [RUNNER_VERSION_HEADER]: RUNNER_VERSION
+    },
+    body: JSON.stringify({ content })
+  });
+  noteServerVersion(res);
+  if (!res.ok) throw new Error(`log upload failed: ${res.status} ${await safeText(res)}`);
+}
 async function safeText(res) {
   try {
     return await res.text();
@@ -252,59 +265,59 @@ var planInputSchema = {
 };
 var planSchema = z2.object(planInputSchema);
 function renderPlan(plan) {
-  const lines = [];
-  lines.push(`# ${plan.title}`);
-  lines.push("");
-  lines.push(`**Scope** \u2014 \`${plan.scope}\``);
-  lines.push("");
-  lines.push(`**Goal** \u2014 ${plan.goal}`);
+  const lines2 = [];
+  lines2.push(`# ${plan.title}`);
+  lines2.push("");
+  lines2.push(`**Scope** \u2014 \`${plan.scope}\``);
+  lines2.push("");
+  lines2.push(`**Goal** \u2014 ${plan.goal}`);
   if (plan.assumptions.length > 0) {
-    lines.push("");
-    lines.push("**Assumptions**");
+    lines2.push("");
+    lines2.push("**Assumptions**");
     for (const assumption of plan.assumptions) {
-      lines.push(`- ${assumption}`);
+      lines2.push(`- ${assumption}`);
     }
   }
-  lines.push("");
-  lines.push("## Steps");
+  lines2.push("");
+  lines2.push("## Steps");
   for (const [i, step] of plan.steps.entries()) {
-    lines.push("");
-    lines.push(`### ${i + 1}. ${step.title}`);
-    lines.push("");
-    lines.push(step.description);
+    lines2.push("");
+    lines2.push(`### ${i + 1}. ${step.title}`);
+    lines2.push("");
+    lines2.push(step.description);
     if (step.pseudoCode && step.pseudoCode.length > 0) {
       for (const entry of step.pseudoCode) {
-        lines.push("");
-        lines.push(`\`${entry.file}\``);
-        lines.push("");
-        lines.push("```");
-        lines.push(entry.pseudoCode);
-        lines.push("```");
+        lines2.push("");
+        lines2.push(`\`${entry.file}\``);
+        lines2.push("");
+        lines2.push("```");
+        lines2.push(entry.pseudoCode);
+        lines2.push("```");
       }
     }
   }
-  lines.push("");
-  lines.push("## Acceptance criteria");
+  lines2.push("");
+  lines2.push("## Acceptance criteria");
   for (const criterion of plan.acceptanceCriteria) {
-    lines.push(`- [ ] ${criterion}`);
+    lines2.push(`- [ ] ${criterion}`);
   }
   if (plan.risks.length > 0) {
-    lines.push("");
-    lines.push("**Risks / open questions**");
+    lines2.push("");
+    lines2.push("**Risks / open questions**");
     for (const risk of plan.risks) {
-      lines.push(`- ${risk}`);
+      lines2.push(`- ${risk}`);
     }
   }
   if (plan.outOfScope.length > 0) {
-    lines.push("");
-    lines.push("**Out of scope**");
+    lines2.push("");
+    lines2.push("**Out of scope**");
     for (const item of plan.outOfScope) {
-      lines.push(`- ${item}`);
+      lines2.push(`- ${item}`);
     }
   }
-  lines.push("");
-  lines.push(PLAN_MARKER);
-  return lines.join("\n");
+  lines2.push("");
+  lines2.push(PLAN_MARKER);
+  return lines2.join("\n");
 }
 var submitPlanInputSchema = {
   plans: z2.array(z2.object(planInputSchema)).min(1).refine(
@@ -379,27 +392,27 @@ var reportInputSchema = {
 };
 var reportSchema = z3.object(reportInputSchema);
 function renderReport(report) {
-  const lines = [];
-  lines.push(report.summary.trim());
-  lines.push("");
-  lines.push(report.prose.trim());
-  lines.push("");
-  lines.push("## Acceptance criteria");
+  const lines2 = [];
+  lines2.push(report.summary.trim());
+  lines2.push("");
+  lines2.push(report.prose.trim());
+  lines2.push("");
+  lines2.push("## Acceptance criteria");
   for (const ac of report.acceptanceCriteria) {
-    lines.push("");
-    lines.push(`### ${STATUS_ICON[ac.status]} ${ac.criterion}`);
-    lines.push("");
-    lines.push(ac.rationale.trim());
+    lines2.push("");
+    lines2.push(`### ${STATUS_ICON[ac.status]} ${ac.criterion}`);
+    lines2.push("");
+    lines2.push(ac.rationale.trim());
     for (const ev of ac.evidence) {
-      lines.push("");
-      lines.push(ev.note ? `\`${ev.file}\` \u2014 ${ev.note}` : `\`${ev.file}\``);
-      lines.push("");
-      lines.push("```diff");
-      lines.push(ev.hunk.replace(/\n+$/, ""));
-      lines.push("```");
+      lines2.push("");
+      lines2.push(ev.note ? `\`${ev.file}\` \u2014 ${ev.note}` : `\`${ev.file}\``);
+      lines2.push("");
+      lines2.push("```diff");
+      lines2.push(ev.hunk.replace(/\n+$/, ""));
+      lines2.push("```");
     }
   }
-  return lines.join("\n");
+  return lines2.join("\n");
 }
 function createReportTooling() {
   let submittedReport = null;
@@ -426,8 +439,46 @@ function createReportTooling() {
   return { mcpServer, getReport: () => submittedReport };
 }
+// src/logger.ts
+var lines = [];
+var secrets = [];
+var MAX_BYTES = 10 * 1024 * 1024;
+function startJobLog(opts) {
+  lines = [];
+  secrets = opts.secrets.filter(Boolean);
+  logEvent("meta", `job ${opts.jobId} (${opts.kind}) started at ${(/* @__PURE__ */ new Date()).toISOString()}`);
+}
+function redact(s) {
+  for (const sec of secrets) {
+    s = s.split(sec).join("***REDACTED***");
+  }
+  return s;
+}
+function logEvent(section, text) {
+  lines.push(`[${(/* @__PURE__ */ new Date()).toISOString()}] [${section}] ${redact(text)}`);
+}
+function getJobLog() {
+  const full = lines.join("\n");
+  if (full.length <= MAX_BYTES) return full;
+  const half = Math.floor(MAX_BYTES / 2);
+  return full.slice(0, half) + `
+\u2026[truncated ${full.length - MAX_BYTES} bytes]\u2026
+` + full.slice(-half);
+}
 // src/executor.ts
 var FLUME_PLUGIN_DIR = fileURLToPath2(new URL("../skills-plugin", import.meta.url));
+function stringifyResult(content) {
+  if (typeof content === "string") return content;
+  if (Array.isArray(content)) {
+    return content.map(
+      (c) => typeof c === "object" && c !== null && "text" in c ? String(c.text) : JSON.stringify(c)
+    ).join("\n");
+  }
+  return JSON.stringify(content);
+}
 async function runClaudeCode(opts) {
   let finalText = "";
   const { mcpServer, collected } = createWidgetTooling();
@@ -463,11 +514,26 @@ async function runClaudeCode(opts) {
         for (const block of content) {
           if (block && block.type === "text" && typeof block.text === "string") {
             process.stdout.write(block.text);
+            logEvent("agent", block.text);
+          } else if (block && block.type === "tool_use") {
+            logEvent("tool_use", `${block.name} ${JSON.stringify(block.input)}`);
+          }
+        }
+      }
+    } else if (message.type === "user") {
+      const content = message.message?.content;
+      if (Array.isArray(content)) {
+        for (const block of content) {
+          if (block && block.type === "tool_result") {
+            logEvent("tool_result", stringifyResult(block.content));
           }
         }
       }
     } else if (message.type === "result") {
       finalText = message.result ?? "";
+      logEvent("result", finalText);
+    } else if (message.type === "system") {
+      logEvent("system", JSON.stringify(message));
     }
   }
   process.stdout.write("\n");
@@ -547,18 +613,18 @@ function turnHeading(turn, agentName) {
   if (turn.kind === "report") return `${agentName} (implementation report)`;
   return agentName;
 }
-function appendThread(lines, ctx) {
+function appendThread(lines2, ctx) {
   if (!ctx.thread || ctx.thread.length === 0) return;
-  lines.push("", "# Conversation so far");
+  lines2.push("", "# Conversation so far");
   for (const turn of ctx.thread) {
-    lines.push("", `## ${turnHeading(turn, ctx.agentName)}`, turn.content);
+    lines2.push("", `## ${turnHeading(turn, ctx.agentName)}`, turn.content);
   }
 }
 function buildPrompt(ctx) {
   const task = ctx.permissionMode === "plan" ? `Use the \`flumecode:request-to-plan\` skill to handle this request. You are read-only and cannot modify files \u2014 clarify any ambiguity with the user first, then produce a concrete, actionable plan (the specific changes you would make and why). Cite the relevant files. Do NOT call ExitPlanMode or write the plan to a file. When the plan is ready, call the \`submit_plan\` tool with the structured plan fields; the runner renders it into the canonical plan markdown and posts it as your comment.` : `Use the \`flumecode:implement-plan\` skill to handle this request. You are the ORCHESTRATOR: do not implement, review, or write the report yourself \u2014 follow the skill to delegate each phase to subagents via the Task tool, picking the right model for each. Do not commit or push \u2014 the runner handles that.`;
   const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to this request. If there is no wiki, work from the code directly.`;
   const widgets = `When you need the user to choose, ask it as a widget rather than writing the options as prose: call \`single_select\` for a one-of-N choice (radio buttons) or \`multi_select\` for a "select all that apply" choice (checkboxes). Don't add your own "Other" option \u2014 the UI always provides one. After calling a widget tool, end your turn \u2014 the user's answer comes back as their next message and starts a fresh run.`;
-  const lines = [
+  const lines2 = [
     `You are "${ctx.agentName}", an autonomous coding agent working inside a FlumeCode request.`,
     `The repository ${ctx.repo.fullName} is checked out in your current working directory on branch "${ctx.repo.checkoutBranch}" at commit ${ctx.repo.checkoutSha.slice(0, 7)}.`,
     task,
@@ -566,29 +632,29 @@ function buildPrompt(ctx) {
     widgets
   ];
   if (ctx.permissionMode !== "plan") {
-    lines.push(
+    lines2.push(
       "",
       "These coding guidelines apply to all code produced in this run:",
       "",
       loadRule("coding-guideline")
     );
   }
-  lines.push("", `# Request: ${ctx.request?.title ?? ""}`);
+  lines2.push("", `# Request: ${ctx.request?.title ?? ""}`);
   if (ctx.request?.body) {
-    lines.push("", ctx.request.body);
+    lines2.push("", ctx.request.body);
   }
-  appendThread(lines, ctx);
-  lines.push(
+  appendThread(lines2, ctx);
+  lines2.push(
     "",
     ctx.permissionMode === "plan" ? "Your final reply is posted verbatim as your comment in the thread \u2014 if you called `submit_plan`, the rendered plan is posted automatically; for clarifying questions, your reply text is posted as-is." : "Your final reply is posted verbatim as your comment in the thread \u2014 make it the implementation report your report subagent produced, with nothing added. The runner appends the pull-request link."
   );
-  return lines.join("\n");
+  return lines2.join("\n");
 }
 function buildRevisePrompt(ctx) {
   const task = `Use the \`flumecode:revise-implementation\` skill to handle this turn. The plan below was already implemented (its implementation report appears in the conversation below, tagged as such); the user is now asking to fine-tune that implementation. Decide how to respond to their latest message: if it's unclear, ask a clarifying question (as a widget); if it's a bad idea or not feasible, push back with your reasoning; if it warrants rethinking the plan, call \`submit_plan\` with a revised plan; otherwise implement the requested change. When you implement, you are the ORCHESTRATOR: delegate the work to subagents via the Task tool as the skill directs, and do not commit or push \u2014 the runner handles that, updating the existing pull request.`;
   const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to this change. If there is no wiki, work from the code directly.`;
   const widgets = `When you need the user to choose, ask it as a widget rather than writing the options as prose: call \`single_select\` for a one-of-N choice (radio buttons) or \`multi_select\` for a "select all that apply" choice (checkboxes). Don't add your own "Other" option \u2014 the UI always provides one. After calling a widget tool, end your turn \u2014 the user's answer comes back as their next message and starts a fresh run.`;
-  const lines = [
+  const lines2 = [
     `You are "${ctx.agentName}", an autonomous coding agent fine-tuning an implemented FlumeCode plan in an ongoing thread with the user.`,
     `The repository ${ctx.repo.fullName} is checked out in your current working directory on the plan's implementation branch "${ctx.repo.checkoutBranch}" \u2014 the same branch its open pull request is built from, so any change you push updates that PR.`,
     task,
@@ -602,20 +668,20 @@ function buildRevisePrompt(ctx) {
     `# Plan: ${ctx.request?.title ?? ""}`
   ];
   if (ctx.request?.body) {
-    lines.push("", ctx.request.body);
+    lines2.push("", ctx.request.body);
   }
-  appendThread(lines, ctx);
-  lines.push(
+  appendThread(lines2, ctx);
+  lines2.push(
     "",
     "The last message above is the user's request for this turn. Your final reply is posted verbatim as your comment in the plan thread: if you implemented a change, make it a short report of what you changed (the runner appends the pull-request link); if you asked a question, called `submit_plan`, or pushed back, your reply text is posted as-is."
   );
-  return lines.join("\n");
+  return lines2.join("\n");
 }
 function buildResolvePrompt(ctx) {
   const mergeBranch = ctx.repo.mergeBranch ?? "the merge branch";
   const task = `Use the \`flumecode:resolve-merge-conflict\` skill to handle this turn. A merge of \`${mergeBranch}\` into this branch is IN PROGRESS and has left conflict markers in your working tree. Resolve every conflicted file by correctly integrating BOTH sides \u2014 the change this session implemented (described below) and the incoming changes from \`${mergeBranch}\` \u2014 never blindly discard either side. Remove all conflict markers and verify the result builds and tests pass. Do NOT \`git add\`, commit, push, or open a pull request \u2014 the runner finalizes the merge commit and updates the existing pull request.`;
   const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to the conflicting code. If there is no wiki, work from the code directly.`;
-  const lines = [
+  const lines2 = [
     `You are "${ctx.agentName}", an autonomous coding agent resolving merge conflicts on an implemented FlumeCode plan.`,
     `The repository ${ctx.repo.fullName} is checked out in your current working directory on the plan's implementation branch "${ctx.repo.checkoutBranch}" \u2014 the same branch its open pull request is built from \u2014 with an in-progress merge of "${mergeBranch}".`,
     task,
@@ -628,17 +694,17 @@ function buildResolvePrompt(ctx) {
     `# Plan: ${ctx.request?.title ?? ""}`
   ];
   if (ctx.request?.body) {
-    lines.push("", ctx.request.body);
+    lines2.push("", ctx.request.body);
   }
-  appendThread(lines, ctx);
-  lines.push(
+  appendThread(lines2, ctx);
+  lines2.push(
     "",
     "Resolve the conflicts now. Your final reply is posted as a report in the plan thread: summarize which files conflicted and how you resolved each (the runner appends the pull-request link, so don't add one)."
   );
-  return lines.join("\n");
+  return lines2.join("\n");
 }
 function buildDocumentPrompt(ctx) {
-  const lines = [
+  const lines2 = [
     `You are "${ctx.agentName}" maintaining the repository wiki for ${ctx.repo.fullName}.`,
     `An implementation just ran in this working directory to satisfy the request below; its changes are uncommitted in the working tree.`,
     `Use the \`flumecode:document\` skill to bring the wiki in sync with those changes. Only edit files under \`.flumecode/wiki/\` \u2014 do not touch application code. The runner commits the wiki alongside the implementation in the same pull request.`,
@@ -646,14 +712,14 @@ function buildDocumentPrompt(ctx) {
     `# Request: ${ctx.request?.title ?? ""}`
   ];
   if (ctx.request?.body) {
-    lines.push("", ctx.request.body);
+    lines2.push("", ctx.request.body);
   }
-  appendThread(lines, ctx);
-  lines.push("", "When done, reply with a one- or two-line summary of the wiki changes you made.");
-  return lines.join("\n");
+  appendThread(lines2, ctx);
+  lines2.push("", "When done, reply with a one- or two-line summary of the wiki changes you made.");
+  return lines2.join("\n");
 }
 function buildRepairPrompt(ctx, hookLog) {
-  const lines = [
+  const lines2 = [
     `You are "${ctx.agentName}", fixing a failed pre-commit check in the repository ${ctx.repo.fullName}, checked out in your current working directory.`,
     `The changes from the previous step are still uncommitted in the working tree. When the runner tried to commit them, the repository's pre-commit hook \u2014 which runs the project's own checks (lint / typecheck / unit tests) \u2014 failed. Make the working tree pass those checks: fix the failing code or tests at their root. Do NOT delete or skip tests, weaken assertions, or disable the checks to silence the failure. Preserve the intent of the original change; repair only what's broken. Do NOT commit or push \u2014 the runner re-commits once the checks pass.`,
     "",
@@ -669,13 +735,13 @@ function buildRepairPrompt(ctx, hookLog) {
     "",
     "When done, reply with a one-line summary of what you fixed."
   ];
-  return lines.join("\n");
+  return lines2.join("\n");
 }
 function buildReleasePrompt(ctx, baseChecks) {
   const task = `Use the \`flumecode:create-release\` skill to handle this turn. You are driving a release: first analyse commits since the last tag, propose version bumps, and ask the user to confirm via widgets (Phase 1); once the user's widget answers appear in the thread, apply the bumps to package.json files and update CHANGELOG.md (Phase 2). Do NOT commit or push \u2014 the runner handles that and opens the bump PR.`;
   const orient = `Before investigating raw source, check for a FlumeCode wiki at \`.flumecode/wiki/\`. If it exists, read \`.flumecode/wiki/README.md\` first \u2014 it is the index \u2014 and follow its links to the pages and source paths relevant to this release. If there is no wiki, work from the code directly.`;
   const widgets = `When you need the user to choose, ask it as a widget rather than writing the options as prose: call \`single_select\` for a one-of-N choice (radio buttons) or \`multi_select\` for a "select all that apply" choice (checkboxes). Don't add your own "Other" option \u2014 the UI always provides one. After calling a widget tool, end your turn \u2014 the user's answer comes back as their next message and starts a fresh run.`;
-  const lines = [
+  const lines2 = [
     `You are "${ctx.agentName}", an autonomous coding agent driving a FlumeCode release.`,
     `The repository ${ctx.repo.fullName} is checked out in your current working directory on the release bump branch "${ctx.repo.checkoutBranch}".`,
     task,
@@ -689,10 +755,10 @@ function buildReleasePrompt(ctx, baseChecks) {
     `# Release: ${ctx.request?.title ?? ""}`
   ];
   if (ctx.request?.body) {
-    lines.push("", ctx.request.body);
+    lines2.push("", ctx.request.body);
   }
   if (baseChecks && !baseChecks.ok) {
-    lines.push(
+    lines2.push(
       "",
       "# Pre-release check status",
       "",
@@ -708,12 +774,12 @@ function buildReleasePrompt(ctx, baseChecks) {
       "```"
     );
   }
-  appendThread(lines, ctx);
-  lines.push(
+  appendThread(lines2, ctx);
+  lines2.push(
     "",
     "Your final reply is posted verbatim as your comment in the release thread \u2014 if you called widgets (Phase 1), your reply text accompanies the questions; if you applied the bumps (Phase 2), make it the report the skill produced. The runner appends the pull-request link."
   );
-  return lines.join("\n");
+  return lines2.join("\n");
 }
 function buildInitPrompt(ctx) {
   return [
@@ -740,13 +806,20 @@ var exec = promisify(execFile);
 var WORKSPACE_PREFIX = "flume-runner-";
 var MAX_BUFFER = 1 << 24;
 async function git(args) {
-  return exec("git", args, { maxBuffer: MAX_BUFFER });
+  logEvent("git", `git ${args.join(" ")}`);
+  try {
+    const result = await exec("git", args, { maxBuffer: MAX_BUFFER });
+    if (result.stdout.trim()) logEvent("git:out", result.stdout.trim());
+    if (result.stderr.trim()) logEvent("git:err", result.stderr.trim());
+    return result;
+  } catch (err) {
+    logEvent("git:err", String(err.stderr ?? err));
+    throw err;
+  }
 }
-var RUNNER_GIT_EMAIL = "runner@flumecode.local";
-var RUNNER_GIT_NAME = "FlumeCode Runner";
-async function ensureGitIdentity(dir) {
-  await git(["-C", dir, "config", "user.email", RUNNER_GIT_EMAIL]);
-  await git(["-C", dir, "config", "user.name", RUNNER_GIT_NAME]);
+async function ensureGitIdentity(dir, identity) {
+  await git(["-C", dir, "config", "user.email", identity.email]);
+  await git(["-C", dir, "config", "user.name", identity.name]);
 }
 function cloneUrl(ctx) {
   const { owner, name, cloneToken } = ctx.repo;
@@ -764,10 +837,21 @@ async function installDependencies(dir) {
   const manager = detectPackageManager(dir);
   if (manager === null) return { status: "skipped" };
   const env = { ...process.env, CI: "1", ADBLOCK: "1", DISABLE_OPENCOLLECTIVE: "1" };
+  logEvent("install", `${manager} install`);
   try {
-    await exec(manager, ["install"], { cwd: dir, maxBuffer: MAX_BUFFER, env, timeout: 5 * 6e4 });
+    const result = await exec(manager, ["install"], {
+      cwd: dir,
+      maxBuffer: MAX_BUFFER,
+      env,
+      timeout: 5 * 6e4
+    });
+    if (result.stdout.trim()) logEvent("install:out", result.stdout.trim());
+    if (result.stderr.trim()) logEvent("install:err", result.stderr.trim());
     return { status: "installed", manager };
   } catch (err) {
+    const e = err;
+    const detail = [e.stdout, e.stderr].map((s) => typeof s === "string" ? s.trim() : "").filter(Boolean).join("\n");
+    logEvent("install:err", detail || (err instanceof Error ? err.message : String(err)));
     return { status: "failed", manager, error: err instanceof Error ? err.message : String(err) };
   }
 }
@@ -809,22 +893,24 @@ async function resetWorkspace(dir) {
   });
 }
 async function prepareAtSha(ctx, dir, reused) {
+  const identity = { name: ctx.agentName, email: ctx.agentEmail };
   if (!reused) {
     await cloneAtSha(ctx, dir);
-    await ensureGitIdentity(dir);
+    await ensureGitIdentity(dir, identity);
     return;
   }
   await git(["-C", dir, "remote", "set-url", "origin", cloneUrl(ctx)]);
-  await ensureGitIdentity(dir);
+  await ensureGitIdentity(dir, identity);
 }
 async function prepareResumingBranch(ctx, dir, reused) {
+  const identity = { name: ctx.agentName, email: ctx.agentEmail };
   if (!reused) {
     const result = await cloneResumingBranch(ctx, dir);
-    await ensureGitIdentity(dir);
+    await ensureGitIdentity(dir, identity);
     return result;
   }
   await git(["-C", dir, "remote", "set-url", "origin", cloneUrl(ctx)]);
-  await ensureGitIdentity(dir);
+  await ensureGitIdentity(dir, identity);
   return { resumed: true };
 }
 async function sweepWorkspaces() {
@@ -887,10 +973,16 @@ ${e.message ?? ""}`;
 async function runRepoChecks(dir) {
   try {
     await git(["-C", dir, "hook", "run", "pre-commit"]);
+    logEvent("checks", "pre-commit hook passed");
     return { ok: true, log: "", skipped: false };
   } catch (err) {
-    if (isUnsupportedGitSubcommand(err)) return { ok: true, log: "", skipped: true };
-    return { ok: false, log: commitFailureLog(err), skipped: false };
+    if (isUnsupportedGitSubcommand(err)) {
+      logEvent("checks", "pre-commit hook skipped (git too old)");
+      return { ok: true, log: "", skipped: true };
+    }
+    const log = commitFailureLog(err);
+    logEvent("checks:err", log);
+    return { ok: false, log, skipped: false };
   }
 }
 async function commitChanges(ctx, dir) {
@@ -1401,6 +1493,11 @@ async function pollLoop(config) {
         await sleep(IDLE_MS);
         continue;
       }
+      startJobLog({
+        jobId: ctx.jobId,
+        kind: ctx.kind,
+        secrets: [ctx.repo?.cloneToken ?? ""].filter(Boolean)
+      });
       const abort = new AbortController();
       let stopPolling = false;
       const scheduleCancelPoll = () => {
@@ -1450,6 +1547,13 @@ async function pollLoop(config) {
         }
       } finally {
         stopPolling = true;
+        if (!abort.signal.aborted) {
+          try {
+            await uploadJobLog(config, ctx.jobId, getJobLog());
+          } catch (e) {
+            console.error(`  (failed to upload logs: ${errorMessage2(e)})`);
+          }
+        }
       }
     }
   } finally {
@@ -1467,9 +1571,9 @@ var MAX_HOOK_LOG_CHARS = 4e3;
 function trimHookLog(log) {
   let trimmed = log.trimEnd();
   let elided = false;
-  const lines = trimmed.split("\n");
-  if (lines.length > MAX_HOOK_LOG_LINES) {
-    trimmed = lines.slice(-MAX_HOOK_LOG_LINES).join("\n");
+  const lines2 = trimmed.split("\n");
+  if (lines2.length > MAX_HOOK_LOG_LINES) {
+    trimmed = lines2.slice(-MAX_HOOK_LOG_LINES).join("\n");
     elided = true;
   }
   if (trimmed.length > MAX_HOOK_LOG_CHARS) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@flumecode/runner",
-  "version": "0.8.0",
+  "version": "0.10.0",
   "type": "module",
   "description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
   "bin": {

package/skills-plugin/skills/implement-plan/SKILL.md CHANGED Viewed

@@ -31,10 +31,11 @@ put it in the prompt, the subagent doesn't have it.
 - Spawn each phase with the **Task** tool, `subagent_type: "general-purpose"`.
 - **Model per phase** (pass it as the Task `model` argument):
-  - `"sonnet"` — implementation and fixes (the code-writing work).
+  - `"sonnet"` — implementation, fixes, and the Verify step (mechanical
+    command-running; Verify is read-only even though it uses sonnet).
   - `"opus"` — acceptance-criteria review, code-quality review, and the report.
-- **Reviewers are read-only.** Tell every review/report subagent to _inspect and
-  report only — never edit, create, or delete files_. Only implementation/fix
+- **Read-only phases.** Tell every review, Verify, and report subagent to _inspect
+  and report only — never edit, create, or delete files_. Only implementation/fix
   subagents may change the working tree.
 - **No git side effects.** Neither you nor any subagent may commit, push, or open
   a PR. Leave the changes in the working tree; the runner commits + opens the PR
@@ -61,11 +62,35 @@ the next step.
 2. **Implement** — Task, `model: "sonnet"`. Give the subagent: the plan steps, a
    pointer to the wiki/orientation, and the coding guidelines (verbatim). Tell it
-   to make all the code changes in the working tree to satisfy the plan, keep the
-   build and tests green where practical, and end by reporting which files it
-   changed and how each step was addressed. It must not commit or push.
-3. **Acceptance-criteria review** — Task, `model: "opus"`, read-only. Give the
+   to make all the code changes in the working tree to satisfy the plan, then
+   self-verify by discovering and running the project's verification commands —
+   checking these sources in order: `package.json` scripts (look for `build`,
+   `typecheck`, `lint`, `test`), `CLAUDE.md`, any `.flumecode/wiki/` page that
+   mentions commands, and `Makefile`. Use whatever is present and appropriate for
+   this repo; do not hardcode specific command strings. Run each discovered
+   command and fix any errors that the edits introduced before returning. If no
+   build/test setup exists in this repo, note that and move on — do not fail. End
+   by reporting: the verification commands it ran and their pass/fail results,
+   which files it changed, and how each plan step was addressed. It must not
+   commit or push.
+3. **Verify (build & tests)** — Task, `model: "sonnet"`, read-only. This step
+   gives the orchestrator an objective, independent build/test signal before the
+   subjective AC and quality reviews. Tell the subagent to:
+   - Discover the project's verification commands from `package.json` scripts
+     (look for `build`, `typecheck`, `lint`, `test`), `CLAUDE.md`,
+     `.flumecode/wiki/` (any page that mentions commands), and `Makefile`. Use
+     what is present; do not hardcode specific command strings.
+   - Run each discovered command and record: the exact command, whether it passed
+     or failed, and — for any failure — a short excerpt of the failing output
+     (enough to diagnose the problem).
+   - If no build/test setup exists in this repo, say so explicitly and pass the
+     gate.
+   - Return a structured per-check result: command, pass/fail, failing-output
+     excerpt (if any).
+   - Must not edit, create, or delete any files.
+4. **Acceptance-criteria review** — Task, `model: "opus"`, read-only. Give the
    subagent the full AC list and tell it to verify each one against the actual
    changes (run `git --no-pager diff`, read the changed files, run tests/build if
    useful). For **each** AC it must return: the criterion text verbatim, a verdict
@@ -82,32 +107,38 @@ the next step.
    to return this as a clean, structured list so you can hand it straight to the
    report step.
-4. **Code-quality review** — Task, `model: "opus"`, read-only. Give the subagent
+5. **Code-quality review** — Task, `model: "opus"`, read-only. Give the subagent
    the coding guidelines (verbatim) and tell it to review the changes for
    violations and quality problems, returning concrete findings as
    `file:line — what — why`, each marked **must-fix** or **nice-to-have**.
-5. **Fix loop.** If the AC review reports any _not met_ AC, or the quality review
+6. **Fix loop.** If the Verify step (step 3) reports any failing check, the AC
+   review (step 4) reports any _not met_ AC, or the quality review (step 5)
    reports any _must-fix_ finding: spawn an **Implement/fix** subagent (Task,
    `model: "sonnet"`) whose prompt lists exactly those findings and tells it to
-   resolve them without regressing the rest. Then re-run only the review(s) that
-   failed. Repeat at most **2** times. If something still fails after that, stop
-   looping and record the gap honestly in the report — do not hide it.
-6. **Report** — Task, `model: "opus"`, read-only. Give the subagent the plan, the AC
-   verdicts (from step 3), and the quality findings, and tell it to run
-   `git --no-pager diff` itself as the **single source of truth** for the report.
-   Every `evidence` hunk it submits must be copied verbatim from that live diff — it
-   must drop or correct any hunk carried over from step 3 that no longer appears in
-   the actual diff, and the **Files changed** list must come from
-   `git --no-pager diff --stat`, not from what an earlier subagent claimed. **If
-   `git --no-pager diff` is empty, the implementation changed nothing:** the report
-   must say so plainly — an honest `summary`, no AC marked `met` with evidence — and
-   must never describe edits that aren't in the diff. Tell it to submit the
-   user-facing report by calling the **`submit_report`** tool — it has that tool
-   available. It must call `submit_report` exactly once and must not edit any files.
-7. **Confirm and end.** Once the report subagent has called `submit_report`, you are
+   resolve them without regressing the rest. When a Verify failure triggered the
+   fix, include the failing command(s) and their error output excerpt(s) from the
+   Verify result in the fix subagent's prompt so it has the full context. After
+   each fix iteration, re-run the Verify step (step 3) in addition to any AC or
+   quality review that failed. Repeat at most **2** times. If something still
+   fails after that, stop looping and record the gap honestly in the report — do
+   not hide it.
+7. **Report** — Task, `model: "opus"`, read-only. Give the subagent the plan, the
+   Verify results (from step 3), the AC verdicts (from step 4), and the quality
+   findings, and tell it to run `git --no-pager diff` itself as the **single
+   source of truth** for the report. Every `evidence` hunk it submits must be
+   copied verbatim from that live diff — it must drop or correct any hunk carried
+   over from step 4 that no longer appears in the actual diff, and the **Files
+   changed** list must come from `git --no-pager diff --stat`, not from what an
+   earlier subagent claimed. **If `git --no-pager diff` is empty, the
+   implementation changed nothing:** the report must say so plainly — an honest
+   `summary`, no AC marked `met` with evidence — and must never describe edits
+   that aren't in the diff. Tell it to submit the user-facing report by calling
+   the **`submit_report`** tool — it has that tool available. It must call
+   `submit_report` exactly once and must not edit any files.
+8. **Confirm and end.** Once the report subagent has called `submit_report`, you are
    done — end your turn. The runner reads the submitted report, renders it, posts it
    to the thread, and appends the pull-request link. (Your own final text is only a
    fallback if no report was submitted, so make sure the subagent submits one.)
@@ -120,11 +151,11 @@ The report subagent calls `submit_report` with these fields:
 - **`prose`** — markdown for the remaining sections, using `##` headings:
   **What changed** (the plan steps, each mapped to the concrete changes that satisfy
   it), **Code quality** (the quality-review outcome and anything left as
-  nice-to-have), **Files changed** (the list from the diff), **Build / tests** (what
-  was run and the result, or why it wasn't run), and **Caveats / follow-ups**
-  (anything deferred, unmet, or worth a human's eyes). Do **not** put the
-  acceptance-criteria section in `prose`, and do **not** include a PR link — the
-  runner adds it.
+  nice-to-have), **Files changed** (the list from the diff), **Build / tests** (lists
+  each verification command and its final pass/fail result, or explains that no
+  build/test setup was found), and **Caveats / follow-ups** (anything deferred,
+  unmet, or worth a human's eyes). Do **not** put the acceptance-criteria section in
+  `prose`, and do **not** include a PR link — the runner adds it.
 - **`acceptanceCriteria`** — one entry per AC from the plan, in plan order, each:
   - `criterion` — the AC text verbatim.
   - `status` — `"met"` / `"not_met"` / `"unclear"`, mirroring the AC review.
@@ -138,7 +169,7 @@ The report subagent calls `submit_report` with these fields:
 - Delegate through Task subagents; don't implement, review, or write the report
   yourself.
-- Right model per phase: `sonnet` to implement/fix, `opus` to review/report.
+- Right model per phase: `sonnet` to implement/fix/verify (Verify is read-only), `opus` to review/report.
 - Make every Task prompt self-contained — subagents see only what you give them.
 - Reviewers and the report writer never modify files.
 - Never commit, push, or open a PR.

package/skills-plugin/skills/revise-implementation/SKILL.md CHANGED Viewed

@@ -65,9 +65,12 @@ essentials:
 - **Scope the work to the request.** This is a fine-tune of an existing
   implementation, not a rebuild. Change only what the user asked for plus what that
   change strictly requires; don't regress the rest of the plan.
-- **Pipeline:** Implement (Task, `model: "sonnet"`) → acceptance/quality review of
-  the change (Task, `model: "opus"`, read-only) → fix loop if needed (≤2) → report
-  (Task, `model: "opus"`, read-only). Reviewers and the report writer never edit.
+- **Pipeline:** Implement (self-runs build/tests & fixes its own errors, Task
+  `model: "sonnet"`) → Verify (build/tests, read-only, Task `model: "sonnet"`) →
+  acceptance/quality review (Task `model: "opus"`, read-only) → fix loop if needed
+  (≤2, re-run Verify after each fix) → report (Task `model: "opus"`, read-only).
+  Detailed mechanics (command discovery, Verify step spec, fix-loop trigger
+  conditions) are in `implement-plan/SKILL.md` — read it for the full pipeline.
 - **No git side effects.** Never commit, push, or open a PR — leave the changes in
   the working tree. The runner commits them and updates the existing pull request.
@@ -76,11 +79,13 @@ essentials:
 Your last message **is** the comment posted to the plan thread — write it for the
 user:
-- **Implemented:** a short report — what you changed and why, which files, and how
-  it was verified (build/tests). Base "what changed" and "which files" on the actual
-  `git --no-pager diff` (`--stat` for the file list), not on what a subagent claimed;
-  if the diff is empty, say nothing was changed rather than describing edits that
-  aren't there. The runner appends the pull-request link, so don't add one.
+- **Implemented:** a short report — what you changed and why, which files, and the
+  verification results: list each build/test command that was run and its final
+  pass/fail result (or note that no build/test setup was found). Base "what changed"
+  and "which files" on the actual `git --no-pager diff` (`--stat` for the file
+  list), not on what a subagent claimed; if the diff is empty, say nothing was
+  changed rather than describing edits that aren't there. The runner appends the
+  pull-request link, so don't add one.
 - **Clarify / push back:** your question or reasoning, as prose (plus any widget).
 - **Re-plan:** you called `submit_plan`; the rendered plan is posted automatically,
   so keep any extra reply text minimal.