npm - pullfrog - Versions diffs - 0.1.1 → 0.1.3 - Mend

pullfrog 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/agents/claude.d.ts +19 -0
package/dist/agents/opencodePlugin.d.ts +60 -0
package/dist/agents/postRun.d.ts +35 -30
package/dist/agents/shared.d.ts +26 -13
package/dist/cli.mjs +678 -241
package/dist/index.js +675 -238
package/dist/internal.js +89 -67
package/dist/mcp/comment.d.ts +35 -0
package/dist/mcp/review.d.ts +2 -4
package/dist/mcp/server.d.ts +1 -68
package/dist/modes.d.ts +10 -0
package/dist/toolState.d.ts +109 -0
package/dist/utils/apiUrl.d.ts +8 -0
package/dist/utils/browser.d.ts +1 -1
package/dist/utils/errorReport.d.ts +1 -1
package/dist/utils/instructions.d.ts +4 -1
package/dist/utils/learnings.d.ts +31 -0
package/dist/utils/run.d.ts +1 -1
package/dist/utils/setup.d.ts +1 -1
package/dist/utils/subprocess.d.ts +0 -1
package/package.json +1 -1
package/dist/mcp/learnings.d.ts +0 -6

package/dist/cli.mjs CHANGED Viewed

@@ -18415,7 +18415,7 @@ var require_summary = __commonJS({
     exports.summary = exports.markdownSummary = exports.SUMMARY_DOCS_URL = exports.SUMMARY_ENV_VAR = void 0;
     var os_1 = __require("os");
     var fs_1 = __require("fs");
-    var { access, appendFile, writeFile: writeFile3 } = fs_1.promises;
+    var { access, appendFile, writeFile: writeFile4 } = fs_1.promises;
     exports.SUMMARY_ENV_VAR = "GITHUB_STEP_SUMMARY";
     exports.SUMMARY_DOCS_URL = "https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary";
     var Summary = class {
@@ -18473,7 +18473,7 @@ var require_summary = __commonJS({
         return __awaiter(this, void 0, void 0, function* () {
           const overwrite = !!(options === null || options === void 0 ? void 0 : options.overwrite);
           const filePath = yield this.filePath();
-          const writeFunc = overwrite ? writeFile3 : appendFile;
+          const writeFunc = overwrite ? writeFile4 : appendFile;
           yield writeFunc(filePath, this._buffer, { encoding: "utf8" });
           return this.emptyBuffer();
         });
@@ -62879,8 +62879,8 @@ var require_snapshot_utils = __commonJS({
 var require_snapshot_recorder = __commonJS({
   "node_modules/.pnpm/undici@7.22.0/node_modules/undici/lib/mock/snapshot-recorder.js"(exports, module) {
     "use strict";
-    var { writeFile: writeFile3, readFile: readFile4, mkdir: mkdir2 } = __require("node:fs/promises");
-    var { dirname: dirname6, resolve: resolve3 } = __require("node:path");
+    var { writeFile: writeFile4, readFile: readFile5, mkdir: mkdir3 } = __require("node:fs/promises");
+    var { dirname: dirname7, resolve: resolve3 } = __require("node:path");
     var { setTimeout: setTimeout2, clearTimeout: clearTimeout2 } = __require("node:timers");
     var { InvalidArgumentError, UndiciError } = require_errors4();
     var { hashId, isUrlExcludedFactory, normalizeHeaders, createHeaderFilters } = require_snapshot_utils();
@@ -63081,7 +63081,7 @@ var require_snapshot_recorder = __commonJS({
           throw new InvalidArgumentError("Snapshot path is required");
         }
         try {
-          const data = await readFile4(resolve3(path3), "utf8");
+          const data = await readFile5(resolve3(path3), "utf8");
           const parsed2 = JSON.parse(data);
           if (Array.isArray(parsed2)) {
             this.#snapshots.clear();
@@ -63111,12 +63111,12 @@ var require_snapshot_recorder = __commonJS({
           throw new InvalidArgumentError("Snapshot path is required");
         }
         const resolvedPath = resolve3(path3);
-        await mkdir2(dirname6(resolvedPath), { recursive: true });
+        await mkdir3(dirname7(resolvedPath), { recursive: true });
         const data = Array.from(this.#snapshots.entries()).map(([hash2, snapshot2]) => ({
           hash: hash2,
           snapshot: snapshot2
         }));
-        await writeFile3(resolvedPath, JSON.stringify(data, null, 2), { flush: true });
+        await writeFile4(resolvedPath, JSON.stringify(data, null, 2), { flush: true });
       }
       /**
        * Clears all recorded snapshots
@@ -97692,14 +97692,14 @@ var require_turndown_cjs = __commonJS({
         } else if (node2.nodeType === 1) {
           replacement = replacementForNode.call(self2, node2);
         }
-        return join17(output, replacement);
+        return join18(output, replacement);
       }, "");
     }
     function postProcess(output) {
       var self2 = this;
       this.rules.forEach(function(rule) {
         if (typeof rule.append === "function") {
-          output = join17(output, rule.append(self2.options));
+          output = join18(output, rule.append(self2.options));
         }
       });
       return output.replace(/^[\t\r\n]+/, "").replace(/[\t\r\n\s]+$/, "");
@@ -97711,7 +97711,7 @@ var require_turndown_cjs = __commonJS({
       if (whitespace.leading || whitespace.trailing) content = content.trim();
       return whitespace.leading + rule.replacement(content, node2, this.options) + whitespace.trailing;
     }
-    function join17(output, replacement) {
+    function join18(output, replacement) {
       var s1 = trimTrailingNewlines(output);
       var s2 = trimLeadingNewlines(replacement);
       var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
@@ -99204,13 +99204,13 @@ import { basename as basename2 } from "node:path";
 // commands/gha.ts
 var core7 = __toESM(require_core(), 1);
 var import_arg = __toESM(require_arg(), 1);
-import { dirname as dirname5 } from "node:path";
+import { dirname as dirname6 } from "node:path";
 // main.ts
 var core6 = __toESM(require_core(), 1);
 import { existsSync as existsSync7, readdirSync } from "node:fs";
-import { readFile as readFile3 } from "node:fs/promises";
-import { join as join16 } from "node:path";
+import { readFile as readFile4 } from "node:fs/promises";
+import { join as join17 } from "node:path";
 // node_modules/.pnpm/@ark+util@0.56.0/node_modules/@ark/util/out/arrays.js
 var liftArray = (data) => Array.isArray(data) ? data : [data];
@@ -107706,7 +107706,7 @@ function buildCommitPrompt(status) {
   ].join("\n");
 }
 function hasPostRunIssues(issues) {
-  return issues.stopHook !== void 0 || issues.dirtyTree !== void 0 || issues.summaryStale !== void 0;
+  return issues.stopHook !== void 0 || issues.dirtyTree !== void 0 || issues.summaryStale !== void 0 || issues.unsubmittedReview !== void 0;
 }
 var agent = (input) => {
   return {
@@ -108006,6 +108006,13 @@ function getApiUrl() {
   log.debug(`resolved API_URL: ${raw2}`);
   return raw2;
 }
+function isLocalApiUrl() {
+  try {
+    return isLocalUrl(new URL(getApiUrl()));
+  } catch {
+    return false;
+  }
+}
 // models.ts
 function provider(config3) {
@@ -109244,6 +109251,7 @@ function CreateCommentTool(ctx) {
         body: bodyWithFooter
       });
       ctx.toolState.wasUpdated = true;
+      log.info(`\xBB created comment ${result.data.id}`);
       if (commentType === "Plan") {
         if (result.data.node_id) {
           await patchWorkflowRunFields(ctx, { planCommentNodeId: result.data.node_id });
@@ -109257,6 +109265,7 @@ function CreateCommentTool(ctx) {
           comment_id: result.data.id,
           body: bodyWithPlanLink
         });
+        log.info(`\xBB updated comment ${updateResult.data.id}`);
         return {
           success: true,
           commentId: updateResult.data.id,
@@ -109290,6 +109299,7 @@ function EditCommentTool(ctx) {
         comment_id: commentId,
         body: bodyWithFooter
       });
+      log.info(`\xBB updated comment ${result.data.id}`);
       return {
         success: true,
         commentId: result.data.id,
@@ -109425,6 +109435,9 @@ ${collapsible}`;
           message: "progress recorded (no GitHub comment created - this may occur for workflow_dispatch events or when there is no associated issue/PR)"
         };
       }
+      if (result.commentId !== void 0) {
+        log.info(`\xBB ${result.action} comment ${result.commentId}`);
+      }
       if (!params.target_plan_comment) {
         ctx.toolState.finalSummaryWritten = true;
       }
@@ -109461,13 +109474,38 @@ var ReplyToReviewComment = type({
     "extremely brief reply (1 sentence max) explaining what was fixed, e.g. 'Fixed by renaming to X' or 'Added null check'"
   )
 });
+function duplicateReplyDecision(params) {
+  const existing = params.existing;
+  if (!existing) return null;
+  if (existing.bodyWithFooter !== params.bodyWithFooter) return null;
+  return {
+    kind: "already-replied",
+    commentId: existing.commentId,
+    url: existing.url,
+    reason: `reply ${existing.commentId} with identical body was already posted in this session; ignoring duplicate call`
+  };
+}
 function ReplyToReviewCommentTool(ctx) {
   return tool({
     name: "reply_to_review_comment",
-    description: "Reply to a PR review comment thread (NOT issue comments \u2014 this only works for inline review comments on PR diffs). Call this for EACH comment you address in AddressReviews mode. Keep replies extremely brief (1 sentence max).",
+    description: "Reply to a PR review comment thread (NOT issue comments \u2014 this only works for inline review comments on PR diffs). Call exactly ONCE per parent comment you address in AddressReviews mode \u2014 duplicate calls with the same body are a no-op. Keep replies extremely brief (1 sentence max).",
     parameters: ReplyToReviewComment,
     execute: execute(async ({ pull_number, comment_id, body }) => {
       const bodyWithFooter = addFooter(ctx, body);
+      const dup = duplicateReplyDecision({
+        existing: ctx.toolState.reviewReplies?.get(comment_id),
+        bodyWithFooter
+      });
+      if (dup) {
+        log.info(`skipping duplicate review reply: ${dup.reason}`);
+        return {
+          success: true,
+          skipped: true,
+          reason: dup.reason,
+          commentId: dup.commentId,
+          url: dup.url
+        };
+      }
       const result = await ctx.octokit.rest.pulls.createReplyForReviewComment({
         owner: ctx.repo.owner,
         repo: ctx.repo.name,
@@ -109475,7 +109513,14 @@ function ReplyToReviewCommentTool(ctx) {
         comment_id,
         body: bodyWithFooter
       });
+      log.info(`\xBB created review comment ${result.data.id} (in reply to ${comment_id})`);
       ctx.toolState.wasUpdated = true;
+      ctx.toolState.reviewReplies ??= /* @__PURE__ */ new Map();
+      ctx.toolState.reviewReplies.set(comment_id, {
+        commentId: result.data.id,
+        url: result.data.html_url,
+        bodyWithFooter
+      });
       return {
         success: true,
         commentId: result.data.id,
@@ -110024,11 +110069,6 @@ async function spawn(options) {
         `spawn activity timer: pid=${child.pid} cmd=${options.cmd} timeout=${activityTimeoutMs}ms`
       );
       activityCheckIntervalId = setInterval(() => {
-        if (options.isPausedExternally?.()) {
-          lastActivityTime = performance3.now();
-          log.debug(`spawn activity check: pid=${child.pid} paused externally`);
-          return;
-        }
         const idleMs = performance3.now() - lastActivityTime;
         log.debug(
           `spawn activity check: pid=${child.pid} idle=${Math.round(idleMs)}ms / ${activityTimeoutMs}ms`
@@ -110227,13 +110267,13 @@ var installNodeDependencies = {
         };
       }
     }
-    const resolved = resolveCommand(agent2, "frozen", []) || resolveCommand(agent2, "install", []);
+    const resolved = resolveCommand(agent2, "frozen", []);
     if (!resolved) {
       return {
         language: "node",
         packageManager,
         dependenciesInstalled: false,
-        issues: [`no install command found for ${agent2}`]
+        issues: [`no frozen-install command available for ${agent2}`]
       };
     }
     if (options.ignoreScripts) {
@@ -142549,7 +142589,7 @@ var import_semver = __toESM(require_semver2(), 1);
 // package.json
 var package_default = {
   name: "pullfrog",
-  version: "0.1.1",
+  version: "0.1.3",
   type: "module",
   bin: {
     pullfrog: "dist/cli.mjs",
@@ -143493,6 +143533,10 @@ ${integrateStep}
       if (!pushed) {
         throw lastErr instanceof Error ? lastErr : new Error(String(lastErr));
       }
+      const pushedSha = $("git", ["rev-parse", "HEAD"], { log: false }).trim();
+      log.info(
+        `\xBB pushed branch ${branch} to ${pushDest.remoteName}/${pushDest.remoteBranch} (sha ${pushedSha})`
+      );
       return {
         success: true,
         branch,
@@ -143641,6 +143685,7 @@ function DeleteBranchTool(ctx) {
       await $git("push", ["origin", "--delete", `refs/heads/${params.branchName}`], {
         token: ctx.gitToken
       });
+      log.info(`\xBB deleted branch ${params.branchName}`);
       return { success: true, deleted: params.branchName };
     })
   });
@@ -143666,6 +143711,7 @@ function PushTagsTool(ctx) {
       await $git("push", pushArgs, {
         token: ctx.gitToken
       });
+      log.info(`\xBB pushed tag ${params.tag}`);
       return { success: true, tag: params.tag };
     })
   });
@@ -143820,7 +143866,7 @@ var CreatePullRequestReview = type({
     "1-2 sentence high-level summary with urgency level, critical callouts, and feedback about code outside the diff. Specific feedback on diff lines goes in 'comments' array."
   ).optional(),
   approved: type.boolean.describe(
-    "Set to true to submit as an approval. ONLY when the review contains no actionable feedback \u2014 neither inline comments nor actionable content in the body. Defaults to false (comment-only review). Rejections are not supported."
+    "Set to true to submit as an approval. Use for both 'no issues found' and informational `> [!NOTE]` reviews where the PR is mergeable as-is and nothing in the body warrants code changes \u2014 approving also suppresses the Fix-button footer affordance so users don't dispatch a fix run on non-actionable feedback. Reserve approved: false for `> [!IMPORTANT]` (recommended changes) and `> [!CAUTION]` (critical) reviews. Defaults to false (comment-only review). Rejections are not supported."
   ).optional(),
   commit_id: type.string.describe("Optional SHA of the commit being reviewed. Defaults to latest.").optional(),
   comments: type({
@@ -143990,6 +144036,7 @@ function CreatePullRequestReviewTool(ctx) {
       }
       const reviewId = result.data.id;
       const reviewNodeId = result.data.node_id;
+      log.info(`\xBB created review ${reviewId} on pull request #${pull_number}`);
       const actuallyReviewedSha = ctx.toolState.checkoutSha ?? params.commit_id;
       ctx.toolState.review = {
         id: reviewId,
@@ -144349,6 +144396,8 @@ async function ensureBeforeShaReachable(params) {
   }
 }
 var STALE_LOCK_AGE_MS = 3e4;
+var PULL_REF_RETRY_DELAYS_MS = [2e3, 5e3, 1e4];
+var PULL_REF_MISSING_PATTERN = /couldn't find remote ref pull\/\d+\/head/i;
 var GIT_LOCK_PATHS = [
   ".git/shallow.lock",
   ".git/index.lock",
@@ -144374,6 +144423,27 @@ function cleanupStaleGitLocks() {
     }
   }
 }
+async function isPullRequestStillDispatchable(args2) {
+  try {
+    const { data } = await args2.octokit.rest.pulls.get({
+      owner: args2.owner,
+      repo: args2.repo,
+      pull_number: args2.pr.number
+    });
+    if (data.state !== "open") return false;
+    if (data.head.sha !== args2.pr.headSha) return false;
+    return true;
+  } catch {
+    return true;
+  }
+}
+async function abortIfPullRequestMoved(args2) {
+  const stillValid = await isPullRequestStillDispatchable(args2);
+  if (stillValid) return;
+  throw new Error(
+    `PR #${args2.pr.number} is no longer in the state it was at dispatch (likely closed, merged, or force-pushed between webhook fire and run start). aborting checkout \u2014 re-trigger the run if this PR is still active.`
+  );
+}
 async function checkoutPrBranch(pr, params) {
   const { octokit, owner, name, gitToken, toolState, beforeSha } = params;
   log.info(`\xBB checking out PR #${pr.number}...`);
@@ -144390,9 +144460,26 @@ async function checkoutPrBranch(pr, params) {
   if (!alreadyOnBranch) {
     $("git", ["checkout", "-B", pr.baseRef, `origin/${pr.baseRef}`], { log: false });
     log.debug(`\xBB fetching PR #${pr.number} (${localBranch})...`);
-    await $git("fetch", ["--no-tags", "origin", `+pull/${pr.number}/head:${localBranch}`], {
-      token: gitToken
-    });
+    await retry(
+      async () => {
+        try {
+          await $git("fetch", ["--no-tags", "origin", `+pull/${pr.number}/head:${localBranch}`], {
+            token: gitToken
+          });
+        } catch (e) {
+          const msg = e instanceof Error ? e.message : String(e);
+          if (PULL_REF_MISSING_PATTERN.test(msg)) {
+            await abortIfPullRequestMoved({ octokit, owner, repo: name, pr });
+          }
+          throw e;
+        }
+      },
+      {
+        delaysMs: PULL_REF_RETRY_DELAYS_MS,
+        label: `pull/${pr.number}/head fetch`,
+        shouldRetry: (e) => PULL_REF_MISSING_PATTERN.test(e instanceof Error ? e.message : String(e))
+      }
+    );
     $("git", ["checkout", localBranch], { log: false });
     log.debug(`\xBB checked out PR #${pr.number}`);
     toolState.checkoutSha = $("git", ["rev-parse", "HEAD"], { log: false }).trim();
@@ -144854,6 +144941,7 @@ function IssueTool(ctx) {
         labels: params.labels ?? [],
         assignees: params.assignees ?? []
       });
+      log.info(`\xBB created issue #${result.data.number} (id ${result.data.id})`);
       const nodeId = result.data.node_id;
       if (typeof nodeId === "string" && nodeId.length > 0) {
         await patchWorkflowRunFields(ctx, {
@@ -145045,6 +145133,7 @@ function AddLabelsTool(ctx) {
         issue_number,
         labels
       });
+      log.info(`\xBB added labels [${labels.join(", ")}] to issue #${issue_number}`);
       return {
         success: true,
         labels: result.data.map((label) => label.name)
@@ -145053,40 +145142,6 @@ function AddLabelsTool(ctx) {
   });
 }
-// mcp/learnings.ts
-var UpdateLearningsParams = type({
-  learnings: type.string.describe(
-    "the FULL merged learnings as a flat bullet list. each line starts with `- `. one discrete, actionable fact per bullet. combine existing bullets from the prompt with your new discoveries. deduplicate \u2014 if an existing bullet covers the same fact, update it in place rather than adding a new one. drop bullets that are clearly wrong or no longer relevant to the current codebase. keep the list focused and concise."
-  )
-});
-function UpdateLearningsTool(ctx) {
-  return tool({
-    name: "update_learnings",
-    description: "persist operational learnings about this repository (setup steps, test commands, key conventions, patterns). ONLY call this when you have high confidence the information is correct and broadly useful for future runs \u2014 not for one-off findings or uncertain observations. format: flat bullet list (`- ` per line, one fact per bullet). pass the FULL merged list \u2014 combine existing learnings from the prompt with new discoveries. deduplicate, and drop bullets that are clearly wrong or no longer relevant to the current codebase.",
-    parameters: UpdateLearningsParams,
-    execute: execute(async (params) => {
-      const response = await apiFetch({
-        path: `/api/repo/${ctx.repo.owner}/${ctx.repo.name}/learnings`,
-        method: "PATCH",
-        headers: {
-          authorization: `Bearer ${ctx.apiToken}`,
-          "content-type": "application/json"
-        },
-        body: JSON.stringify({
-          learnings: params.learnings,
-          model: ctx.toolState.model
-        }),
-        signal: AbortSignal.timeout(1e4)
-      });
-      if (!response.ok) {
-        const error49 = await response.text();
-        throw new Error(`failed to update learnings: ${error49}`);
-      }
-      return { success: true };
-    })
-  });
-}
 // mcp/output.ts
 var import_ajv3 = __toESM(require_ajv(), 1);
 var SetOutputParams = type({
@@ -145180,6 +145235,7 @@ function UpdatePullRequestBodyTool(ctx) {
         pull_number: params.pull_number,
         body: bodyWithFooter
       });
+      log.info(`\xBB updated pull request #${result.data.number}`);
       ctx.toolState.wasUpdated = true;
       return {
         success: true,
@@ -145207,6 +145263,7 @@ function CreatePullRequestTool(ctx) {
         base: params.base,
         draft: params.draft ?? false
       });
+      log.info(`\xBB created pull request #${result.data.number} (id ${result.data.id})`);
       const reviewer = ctx.payload.triggerer;
       if (reviewer) {
         try {
@@ -145758,7 +145815,7 @@ function ResolveReviewThreadTool(ctx) {
           threadId: params.thread_id
         });
         const thread = response.resolveReviewThread.thread;
-        log.debug(`resolved thread ${thread.id}, isResolved=${thread.isResolved}`);
+        log.info(`\xBB resolved review thread ${thread.id}`);
         return {
           thread_id: thread.id,
           is_resolved: thread.isResolved,
@@ -145799,13 +145856,14 @@ function buildModeOverrides(t2) {
 An existing plan comment was found for this issue. Update that comment with the revised plan \u2014 do not create a new plan comment.
-1. Use \`previousPlanBody\` from this response as the plan to revise; do not call \`get_issue\` or \`get_issue_comments\`.
-2. Revise the plan based on the user's request:
+1. **task list**: create your task list for this run as your first action.
+2. Use \`previousPlanBody\` from this response as the plan to revise; do not call \`get_issue\` or \`get_issue_comments\`.
+3. Revise the plan based on the user's request:
    - incorporate the current plan (\`previousPlanBody\`) and the user's revision request
    - gather relevant codebase context (file paths, architecture notes from AGENTS.md)
    - produce a structured plan with clear milestones
-3. Call \`${t2("report_progress")}\` with the full revised plan text and \`{ target_plan_comment: true }\` so it updates the existing plan comment (not the progress comment).
-4. Then post a short note to the progress comment (e.g. "Plan has been updated in the comment above.") via \`${t2("report_progress")}\` so it is not left as "Leaping...".`
+4. Call \`${t2("report_progress")}\` with the full revised plan text and \`{ target_plan_comment: true }\` so it updates the existing plan comment (not the progress comment).
+5. Then post a short note to the progress comment (e.g. "Plan has been updated in the comment above.") via \`${t2("report_progress")}\` so it is not left as "Leaping...".`
   };
 }
 var modeInstructionParent = {
@@ -146230,24 +146288,13 @@ function UploadFileTool(ctx) {
       if (!uploadResponse.ok) {
         throw new Error(`failed to upload file: ${uploadResponse.statusText}`);
       }
+      log.info(`\xBB uploaded file ${publicUrl}`);
       return { success: true, publicUrl, filename, contentLength, contentType };
     })
   });
 }
 // mcp/server.ts
-function initToolState(params) {
-  const resolved = parseProgressComment(params.progressComment);
-  if (resolved) {
-    log.info(`\xBB using pre-created progress comment: ${resolved.id} (${resolved.type})`);
-  }
-  return {
-    progressComment: resolved,
-    hadProgressComment: !!resolved,
-    backgroundProcesses: /* @__PURE__ */ new Map(),
-    usageEntries: []
-  };
-}
 var mcpPortStart = 3764;
 var mcpPortAttempts = 100;
 var mcpHost = "127.0.0.1";
@@ -146323,8 +146370,7 @@ function buildOrchestratorTools(ctx, outputSchema) {
     PushTagsTool(ctx),
     DeleteBranchTool(ctx),
     CreatePullRequestTool(ctx),
-    UpdatePullRequestBodyTool(ctx),
-    UpdateLearningsTool(ctx)
+    UpdatePullRequestBodyTool(ctx)
   ];
 }
 async function tryStartMcpServer(ctx, tools, port) {
@@ -146481,9 +146527,6 @@ Rules:
 - Do NOT include a changelog section \u2014 the key changes list serves this purpose
 - Focus on *intent*, not *what* \u2014 the diff already shows what changed
 - Get the file count and commit count from the checkout_pr metadata, not by counting manually`;
-function learningsStep(t2, n) {
-  return `${n}. **learnings** (only if high confidence): if you discovered something about repo setup, test commands, conventions, or patterns that you are confident is correct and would reliably help future runs, call \`${t2("update_learnings")}\` to persist it. skip this step if you are unsure or the finding is speculative/one-off. format as a flat bullet list (\`- \` per line, one fact per bullet). merge with existing learnings from the prompt \u2014 pass the FULL merged list. deduplicate, and drop bullets that are clearly wrong or no longer relevant to the current codebase.`;
-}
 function computeModes(agentId) {
   const t2 = (toolName) => formatMcpToolRef(agentId, toolName);
   return [
@@ -146492,18 +146535,20 @@ function computeModes(agentId) {
       description: "Implement, build, create, or develop code changes; make specific changes to files or features; execute a plan; or handle tasks with specific implementation details",
       prompt: `### Checklist
-1. **plan** (optional, for complex tasks): analyze requirements, read AGENTS.md and relevant code, produce a step-by-step implementation plan.
+1. **task list**: create your task list for this run as your first action.
-2. **setup**: checkout or create the branch:
+2. **plan** (optional, for complex tasks): analyze requirements, read AGENTS.md and relevant code, produce a step-by-step implementation plan.
+3. **setup**: checkout or create the branch:
    - **PR event, modifying the existing PR**: call \`${t2("checkout_pr")}\`
    - **new branch**: use \`${t2("git")}\` to create a branch (\`git checkout -b pullfrog/branch-name\`)
-3. **build**: implement changes using your native file and shell tools:
+4. **build**: implement changes using your native file and shell tools:
    - follow the plan (if you ran a plan phase)
    - plan your approach before writing code: identify which files need to change, key design decisions, and edge cases. for non-trivial changes, consider whether there's a more elegant approach.
    - run relevant tests/lints before committing
-4. **self-review**: judgment call \u2014 does YOUR diff warrant a fresh-eyes pass?
+5. **self-review**: judgment call \u2014 does YOUR diff warrant a fresh-eyes pass?
    Skip self-review (commit directly) when the diff is **genuinely trivial**:
    - doc typos, comment-only edits, whitespace/format-only, import reordering
@@ -146534,13 +146579,11 @@ function computeModes(agentId) {
    Review the findings, address valid points, and discard nitpicks or false positives. The reviewer is fallible \u2014 it biases toward *recommending additions* (defensive checks for impossible cases, extra logging, new abstractions used once, comments restating code, tests asserting tautologies, "just-in-case" guards). For each finding, ask: would applying it leave the code more sound, correct, AND elegant? Two-out-of-three is usually a signal to look harder for a fix that gets all three before settling for one that trades elegance for correctness. Reject bloat-shaped findings without applying them, and after applying the rest re-read your diff and be discerning about what *you just changed*: if any fix turned out to be bloat in context, revert it. The goal is code that is sound and correct *while remaining elegant*; the smallest diff that fixes the real defect almost always wins. Then verify only intended changes are present, no debug artifacts or commented-out code remain, no unrelated files were modified. Commit locally via shell (\`git add . && git commit -m "..."\`).
-5. **finalize**:
+6. **finalize**:
    - confirm a clean working tree, then push via \`${t2("push_branch")}\` (see *SYSTEM* Git rules if this fails \u2014 prepush errors are usually the repo's tests/lint, not infra timeouts)
    - create a PR via \`${t2("create_pull_request")}\`
    - call \`${t2("report_progress")}\` with the PR link or the exact error if push/PR failed
-${learningsStep(t2, 6)}
 ### Notes
 For simple, well-defined tasks, skip the plan phase and go straight to build.`
@@ -146550,27 +146593,27 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
       description: "Address PR review feedback; respond to reviewer comments; make requested changes to an existing PR",
       prompt: `### Checklist
-1. Checkout the PR branch via \`${t2("checkout_pr")}\`.
+1. **task list**: create your task list for this run as your first action.
-2. Fetch review comments via \`${t2("get_review_comments")}\`.
+2. Checkout the PR branch via \`${t2("checkout_pr")}\`.
-3. For each comment:
+3. Fetch review comments via \`${t2("get_review_comments")}\`.
+4. For each comment:
    - understand the feedback
    - evaluate whether applying it would leave the code more **sound, correct, AND elegant**. reviewers are fallible and bias toward *recommending additions* (defensive checks for impossible cases, extra abstractions, comments restating obvious code, tests asserting tautologies, "just-in-case" guards). if a request would add bloat \u2014 ceremony without commensurate correctness benefit \u2014 push back in your reply rather than mechanically applying it. two-out-of-three is usually a signal to look harder for a fix that gets all three before settling.
    - if the request stands, make the code change using your native tools; otherwise reply explaining why
    - record what was done (or why nothing was done)
-4. Quality check:
+5. Quality check:
    - test changes, then review the diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, no fix turned out to be bloat in context (revert any that did), and the changes are clean enough that a senior engineer would approve without hesitation
    - commit locally via shell (\`git add . && git commit -m "..."\`)
-5. Finalize:
+6. Finalize:
    - confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
-   - reply to each comment using \`${t2("reply_to_review_comment")}\`
+   - reply to each comment **exactly once** using \`${t2("reply_to_review_comment")}\` \u2014 do not re-emit the same call (the runtime dedupes identical bodies and the second call is wasted)
    - resolve addressed threads via \`${t2("resolve_review_thread")}\`
-   - call \`${t2("report_progress")}\` with a brief summary (or the exact push error if push failed)
-${learningsStep(t2, 6)}`
+   - call \`${t2("report_progress")}\` with a brief summary (or the exact push error if push failed)`
     },
     // Review and IncrementalReview use the multi-lens orchestrator pattern
     // (canonical source: .claude/commands/anneal.md). The orchestrator does
@@ -146589,11 +146632,13 @@ ${learningsStep(t2, 6)}`
       description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
       prompt: `### Checklist
-1. **checkout**: call \`${t2("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
+1. **task list**: create your task list for this run as your first action.
-2. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t2("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
+2. **checkout**: call \`${t2("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
-   if the PR is **genuinely trivial**, skip steps 3\u20134 entirely and submit a \`No new issues found.\` review per step 5. there's no value in dispatching even one lens for a typo.
+3. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t2("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
+   if the PR is **genuinely trivial**, skip steps 4\u20135 entirely and submit a \`No new issues found.\` review per step 6. there's no value in dispatching even one lens for a typo.
    "Genuinely trivial" (skip):
    - single-word doc typo, whitespace/format-only, comment-only across any number of files
@@ -146638,7 +146683,7 @@ ${learningsStep(t2, 6)}`
    - **holistic** \u2014 does the PR make sense as a whole? symmetric flows (delete for every create, rollback for every migration)?
    - **subsystem lenses** (invent as the PR demands) \u2014 auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling, etc.
-3. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 3 entirely on a single subagent failure. each subagent gets:
+4. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
    - the diff path / target \u2014 reading the diff and the codebase is its job
    - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
    - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
@@ -146653,20 +146698,33 @@ ${learningsStep(t2, 6)}`
    - do NOT pre-shape their output with a finding schema
    - do NOT mention the other lenses (independence is the point \u2014 overlapping findings are a strong signal)
-4. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
+5. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
    for surviving findings, draft inline comments with NEW line numbers from the diff. every comment must be actionable, 2-3 sentences max. use GitHub permalink format for code references. for impact-analysis findings (stale references after rename/remove), report them in the review body ordered by severity (runtime breakage > incorrect docs > stale comments) rather than as inline comments unless they're anchored to a specific line.
-5. **submit**: ALWAYS submit exactly one review via \`${t2("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
+6. **submit**: ALWAYS submit exactly one review via \`${t2("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
    note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
    The review body is structured as: \`[optional alert blockquote]\` \u2192 \`[PR summary using the default format below]\`. Inline comments are passed via the \`comments\` parameter, not in the body.
-   - **critical issues** (blocks merge \u2014 bugs, security, data loss):
+   GitHub alert blockquotes render at four visual intensities \u2014 the callout is what the author sees first, so pick the one that matches what you want them to do:
+   - \`[!CAUTION]\` \u2014 large red banner. Reads as "this will break something."
+   - \`[!IMPORTANT]\` \u2014 large purple banner. Reads as "you need to look at this before merging."
+   - \`[!NOTE]\` \u2014 small blue inline callout. Reads as "FYI, here's something worth noting."
+   - no callout \u2014 plain text. Reads as routine review output.
+   Two reinforcing levers: callout intensity (above) and \`approved\` (which gates the footer Fix-button affordance \u2014 Fix renders on every non-approving review, so \`approved: true\` suppresses it). Wrapping mergeable feedback in \`[!IMPORTANT]\` trains users to click Fix on reviews that don't need fixing. Pick the tier the author's actual next action justifies.
+   - **critical issues** (blocks merge \u2014 bugs, security, data loss, broken core flows):
      \`approved: false\`. Body opens with \`> [!CAUTION]\\n> This PR introduces ...\`, followed by the PR summary. Include all inline comments via \`comments\`.
-   - **recommended changes** (non-critical):
-     \`approved: false\`. Body opens with \`> [!IMPORTANT]\\n> Consider ...\`, followed by the PR summary. Include all inline comments via \`comments\`.
+   - **must-address non-critical findings** (real consequences if shipped \u2014 incorrect behavior in non-critical paths, missing validation on user input, regressions the author should fix before merge):
+     \`approved: false\`. Body opens with \`> [!IMPORTANT]\\n> ...\`, followed by the PR summary. Reserve this tier for findings with concrete fallout \u2014 do NOT use \`[!IMPORTANT]\` for nits, style preferences, or "consider also" suggestions. Include all inline comments via \`comments\`.
+   - **minor suggestions only** (single-line nits, doc/comment polish, defer-able observations, "rough edges"):
+     \`approved: false\`. NO alert blockquote. Body opens directly with the PR summary. Include all inline comments via \`comments\`.
+   - **informational observations** (mergeable as-is, nothing actionable \u2014 e.g. prior feedback addressed cleanly, surfacing a minor stale doc reference, calling out something noteworthy without recommending a change):
+     \`approved: true\`. Body opens with \`> [!NOTE]\\n> ...\`, followed by the PR summary. Do NOT include inline \`comments\` \u2014 \`[!NOTE]\` signals "no action needed", which contradicts an actionable anchor; if a point is concrete enough to anchor to a line, downgrade the whole review to "minor suggestions only" (\`approved: false\`) instead.
    - **no actionable issues**:
      \`approved: true\`. Body opens with \`No new issues found.\` followed by the PR summary.
@@ -146675,7 +146733,7 @@ ${PR_SUMMARY_FORMAT}`
     // IncrementalReview shares Review's multi-lens orchestrator pattern but
     // scopes the target to the incremental diff. The "issues must be NEW
     // since the last Pullfrog review" filter lives at aggregation time
-    // (step 5), NOT in the subagent prompt — pushing the filter into
+    // (step 6), NOT in the subagent prompt — pushing the filter into
     // subagents matches the canonical anneal anti-pattern of "list known
     // pre-existing failures — don't flag these" and suppresses signal on
     // regressions the new commits amplified. The review body is just
@@ -146688,15 +146746,17 @@ ${PR_SUMMARY_FORMAT}`
       description: "Re-review a PR after new commits are pushed; focus on new changes since the last review",
       prompt: `### Checklist
-1. **checkout**: call \`${t2("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
+1. **task list**: create your task list for this run as your first action.
+2. **checkout**: call \`${t2("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
-2. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
+3. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
-3. **prior feedback**: fetch previous reviews via \`${t2("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t2("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll use this to filter your aggregation in step 5 \u2014 anything already flagged in a prior review and not changed by the new commits should not be re-raised. you do NOT need to render this in the review body; the rolling PR summary snapshot is the durable record of what's been addressed.
+4. **prior feedback**: fetch previous reviews via \`${t2("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t2("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll use this to filter your aggregation in step 6 \u2014 anything already flagged in a prior review and not changed by the new commits should not be re-raised. you do NOT need to render this in the review body; the rolling PR summary snapshot is the durable record of what's been addressed.
-4. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
+5. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
-   if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 7's non-substantive path (do NOT submit a review).
+   if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 8's non-substantive path (do NOT submit a review).
    "Genuinely trivial" (skip): formatting/comment tweaks, import reordering, lockfile regen, mechanical rename of import paths, whitespace-only.
    "Looks trivial but isn't" (do NOT skip \u2014 same anti-patterns as Review mode): 1-line changes to SQL/regex/auth/billing/permissions/signature-verification code; flipping feature-flag defaults or retry/timeout constants; money/tax/HTTP-method/redirect changes; tightening or loosening a comparison operator; mixed diffs with a semantic line buried in formatting.
@@ -146704,8 +146764,8 @@ ${PR_SUMMARY_FORMAT}`
    otherwise pick lenses by where the new commits concentrate risk \u2014 **there's no fixed count**, same calibration as Review mode (1 lens for pure refactor / isolated fix; 2\u20133 for typical features; 4\u20135 for high-stakes subsystem touches; 6+ is a smell). lens framing follows Review mode: themed lenses (correctness & invariants, impact when new commits remove/rename/deprecate things, research-validated assumptions, security, user-journey, operational readiness, integration & cross-cutting, test integrity, performance, holistic) and subsystem lenses (auth, billing, schema migration, etc.) \u2014 for high-stakes domains lead with the subsystem lens rather than the generic themed equivalent.
-   dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
-   - the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 5), not in the subagent prompt
+   dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 5 entirely on a single subagent failure. each subagent gets:
+   - the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 6), not in the subagent prompt
    - **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
    - **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
    - the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
@@ -146719,15 +146779,21 @@ ${PR_SUMMARY_FORMAT}`
    - do NOT pre-shape their output with a finding schema
    - do NOT mention the other lenses (independence is the point)
-5. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 1 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t2("list_pull_request_reviews")}\` in step 3) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
+6. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 2 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t2("list_pull_request_reviews")}\` in step 4) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
-6. **build the review body** \u2014 a single "Reviewed changes" section: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed. do NOT include a separate "Prior review feedback" checklist; that's tracked in the rolling PR summary snapshot for the next agent run, and surfacing it in the user-facing body is noise (changes that addressed prior feedback are already covered by the Reviewed-changes bullets). in some cases you may receive a complete diff for the whole pull request instead of an incremental one \u2014 when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
+7. **build the review body** \u2014 a single "Reviewed changes" section: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed. do NOT include a separate "Prior review feedback" checklist; that's tracked in the rolling PR summary snapshot for the next agent run, and surfacing it in the user-facing body is noise (changes that addressed prior feedback are already covered by the Reviewed-changes bullets). in some cases you may receive a complete diff for the whole pull request instead of an incremental one \u2014 when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
-7. Submit \u2014 Do NOT call \`report_progress\` or \`create_issue_comment\` \u2014 the review is the final record and the progress comment will be cleaned up automatically. Follow these rules:
+8. Submit \u2014 every run must end with EXACTLY ONE of \`${t2("create_pull_request_review")}\` (substantive review) or \`${t2("report_progress")}\` (no-review acknowledgement). do NOT call \`create_issue_comment\` for review output.
+   Same callout-intensity ladder as Review mode \u2014 \`[!CAUTION]\` (large red, "will break") \u2192 \`[!IMPORTANT]\` (large purple, "must address before merging") \u2192 \`[!NOTE]\` (small blue, "FYI") \u2192 no callout (plain text). And the same Fix-button lever: the footer renders a Fix button on every non-approving review, so \`approved: true\` suppresses it. Wrapping mergeable feedback in \`[!IMPORTANT]\` trains users to click Fix on reviews that don't need fixing \u2014 pick the tier the author's actual next action justifies.
+   Follow these rules:
    - note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
-   - IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Do NOT call \`report_progress\`. Exit \u2014 the progress comment will be cleaned up automatically.
-   - ELSE IF NEW CRITICAL ISSUES (blocks merge): call \`${t2("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with a GitHub alert blockquote (e.g. \`> [!CAUTION]\\n> This PR introduces ...\`), then the Reviewed-changes summary.
-   - ELSE IF NEW RECOMMENDED CHANGES (non-critical): call \`${t2("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\` alert, then the Reviewed-changes summary.
+   - IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Instead call \`${t2("report_progress")}\` with a 1-2 sentence note explaining no review was warranted (e.g. "No new issues. Changes since last review are formatting-only."). this leaves a visible signal that the run completed.
+   - ELSE IF NEW CRITICAL ISSUES (blocks merge \u2014 bugs, security, data loss, broken core flows): call \`${t2("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!CAUTION]\\n> This PR introduces ...\`, then the Reviewed-changes summary.
+   - ELSE IF NEW MUST-ADDRESS NON-CRITICAL FINDINGS (real consequences if shipped \u2014 incorrect behavior, missing validation, regressions the author should fix before merge): call \`${t2("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\`, then the Reviewed-changes summary. Do NOT use this tier for nits, style preferences, or "consider also" suggestions.
+   - ELSE IF NEW MINOR SUGGESTIONS ONLY (single-line nits, doc/comment polish, defer-able observations, "rough edges"): call \`${t2("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens directly with \`Reviewed the following changes:\\n\` (NO alert blockquote), then the Reviewed-changes summary.
+   - ELSE IF INFORMATIONAL OBSERVATIONS (mergeable as-is, but worth surfacing \u2014 e.g. prior feedback addressed cleanly with one minor stale doc reference, or a noteworthy positive observation): call \`${t2("create_pull_request_review")}\` with \`approved: true\`, NO inline comments, and the review body. body opens with \`> [!NOTE]\\n> ...\` alert, then the Reviewed-changes summary. If a point is concrete enough to anchor to a line, downgrade the whole review to "minor suggestions only" (\`approved: false\`) instead \u2014 \`[!NOTE]\` and inline comments don't mix.
    - ELSE IF NO NEW ISSUES, SUBSTANTIVE CHANGES (new functionality, behavior changes, or fixes to prior review feedback): call \`${t2("create_pull_request_review")}\` to create a PR review. If all previous reviews have been properly addressed and no new issues were discovered, you can set \`approved: true\`. body opens with \`No new issues. Reviewed the following changes:\\n\`, then the Reviewed-changes summary.`
     },
     {
@@ -146735,61 +146801,63 @@ ${PR_SUMMARY_FORMAT}`
       description: "Create plans, break down tasks, outline steps, analyze requirements, understand scope of work, or provide task breakdowns",
       prompt: `### Checklist
-1. Analyze the task and gather context:
+1. **task list**: create your task list for this run as your first action.
+2. Analyze the task and gather context:
    - read AGENTS.md and relevant codebase files
    - understand the architecture and constraints
-2. Produce a structured, actionable plan with clear milestones.
-3. Call \`${t2("report_progress")}\` with the plan.
+3. Produce a structured, actionable plan with clear milestones.
-${learningsStep(t2, 4)}`
+4. Call \`${t2("report_progress")}\` with the plan.`
     },
     {
       name: "Fix",
       description: "Fix CI failures; debug failing tests or builds; investigate and resolve check suite failures",
       prompt: `### Checklist
-1. Checkout the PR branch via \`${t2("checkout_pr")}\`.
+1. **task list**: create your task list for this run as your first action.
-2. Fetch check suite logs via \`${t2("get_check_suite_logs")}\`.
+2. Checkout the PR branch via \`${t2("checkout_pr")}\`.
-3. **CRITICAL**: verify the failure was INTRODUCED BY THIS PR before fixing. If unrelated, abort and report.
+3. Fetch check suite logs via \`${t2("get_check_suite_logs")}\`.
-4. Diagnose and fix:
+4. **CRITICAL**: verify the failure was INTRODUCED BY THIS PR before fixing. If unrelated, abort and report.
+5. Diagnose and fix:
    - read the workflow file, reproduce locally with the EXACT same commands CI runs
    - fix the issue using your native file and shell tools
    - verify the fix by re-running the exact CI command
    - review the diff before committing \u2014 verify only the fix is present, no debug artifacts, no unrelated changes. the fix should be clean enough that a senior engineer would approve without hesitation.
    - commit locally via shell (\`git add . && git commit -m "..."\`)
-5. Finalize:
+6. Finalize:
    - confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
-   - call \`${t2("report_progress")}\` with the diagnosis and fix summary (or the exact push error if push failed)
-${learningsStep(t2, 6)}`
+   - call \`${t2("report_progress")}\` with the diagnosis and fix summary (or the exact push error if push failed)`
     },
     {
       name: "ResolveConflicts",
       description: "Resolve merge conflicts in a PR branch against the base branch",
       prompt: `### Checklist
-1. **Setup**:
+1. **task list**: create your task list for this run as your first action.
+2. **Setup**:
    - Call \`${t2("checkout_pr")}\` to get the PR branch.
    - Call \`${t2("get_pull_request")}\` to identify the base branch (e.g., 'main').
    - Call \`${t2("git_fetch")}\` to fetch the base branch.
-2. **Merge Attempt**:
+3. **Merge Attempt**:
    - Run \`git merge origin/<base_branch>\` via shell.
-   - If it succeeds automatically, confirm a clean working tree, push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*), and call \`${t2("report_progress")}\` with a brief success note or the exact push error if push failed \u2014 **then stop; do not run steps 3\u20134.**
-   - If it fails (conflicts), resolve them manually (continue to steps 3\u20134).
+   - If it succeeds automatically, confirm a clean working tree, push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*), and call \`${t2("report_progress")}\` with a brief success note or the exact push error if push failed \u2014 **then stop; do not run steps 4\u20135.**
+   - If it fails (conflicts), resolve them manually (continue to steps 4\u20135).
-3. **Resolve Conflicts**:
+4. **Resolve Conflicts**:
    - Run \`git status\` or parse the merge output to find the list of conflicting files.
    - For each conflicting file: read it, find the conflict markers (\`<<<<<<<\`, \`=======\`, \`>>>>>>>\`), understand the code context, and rewrite the file with the correct resolution. Remove all markers.
    - Verify the file syntax is correct after resolution.
-4. **Finalize**:
+5. **Finalize**:
    - Run a final verification (build/test) to ensure the resolution works.
    - \`git add . && git commit -m "resolve merge conflicts"\`
    - confirm a clean working tree, then push via \`${t2("push_branch")}\` (same push/prepush guidance as Build mode in *SYSTEM*)
@@ -146800,24 +146868,43 @@ ${learningsStep(t2, 6)}`
       description: "General-purpose tasks that don't fit other modes: answering questions, adding comments, labeling, running ad-hoc commands, or any direct request",
       prompt: `### Checklist
-1. Analyze the task. For simple operations (labeling, commenting, answering questions, running a single command), handle directly.
+1. **task list**: create your task list for this run as your first action.
+2. Analyze the task. For simple operations (labeling, commenting, answering questions, running a single command), handle directly.
-2. For substantial work \u2014 code changes across multiple files, multi-step investigations:
+3. For substantial work \u2014 code changes across multiple files, multi-step investigations:
    - plan your approach before starting
    - use native file and shell tools for local operations
    - use ${pullfrogMcpName} MCP tools for GitHub/git operations
    - if code changes are needed: review your own diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, and the changes are clean enough that a senior engineer would approve without hesitation
-3. Finalize:
+4. Finalize:
    - if code changes were made, push to a pull request (new or existing) using \`${t2("push_branch")}\` and \`${t2("create_pull_request")}\` as needed. \`git status\` must be clean before you finish (see *SYSTEM* Git rules if push fails).
    - call \`${t2("report_progress")}\` once with results \u2014 include exact tool errors if push or PR creation failed
-   - if the task involved labeling, commenting, or other GitHub operations, perform those directly
-${learningsStep(t2, 4)}`
+   - if the task involved labeling, commenting, or other GitHub operations, perform those directly`
     }
   ];
 }
 var modes = computeModes("opencode");
+var NON_COMMITTING_MODES = /* @__PURE__ */ new Set([
+  "Review",
+  "IncrementalReview",
+  "Plan"
+]);
+// toolState.ts
+function initToolState(params) {
+  const resolved = parseProgressComment(params.progressComment);
+  if (resolved) {
+    log.info(`\xBB using pre-created progress comment: ${resolved.id} (${resolved.type})`);
+  }
+  return {
+    progressComment: resolved,
+    hadProgressComment: !!resolved,
+    backgroundProcesses: /* @__PURE__ */ new Map(),
+    usageEntries: []
+  };
+}
 // agents/claude.ts
 import { execFileSync as execFileSync3 } from "node:child_process";
@@ -146913,6 +147000,17 @@ async function installFromNpmTarball(params) {
 // utils/providerErrors.ts
 var statusKey = `\\b(?:status[_ ]?code|http[_ ]?status|status)["']?\\s*[:=]\\s*["']?`;
 var PROVIDER_ERROR_PATTERNS = [
+  // auth patterns must come BEFORE rate-limit patterns. OpenRouter 401 error
+  // payloads carry `x-ratelimit-*` response headers in the dump, and the
+  // free-form rate-limit regex below would otherwise win on word-boundary
+  // matches inside header names. canonical 401 messages: OpenRouter returns
+  // `{"error":{"message":"User not found","code":401}}` for disabled or
+  // invalid keys (https://openai.luzhipeng.com/docs/api/reference/errors-and-debugging).
+  { regex: new RegExp(`${statusKey}401\\b`, "i"), label: "auth error (401)" },
+  { regex: new RegExp(`${statusKey}403\\b`, "i"), label: "auth error (403)" },
+  { regex: /\bUser not found\b/i, label: "auth error (invalid/disabled key)" },
+  { regex: /\bInvalid authentication\b/i, label: "auth error (invalid credentials)" },
+  { regex: /\bNo auth credentials found\b/i, label: "auth error (missing credentials)" },
   { regex: new RegExp(`${statusKey}429\\b`, "i"), label: "rate limited (429)" },
   { regex: new RegExp(`${statusKey}500\\b`, "i"), label: "provider 500 error" },
   { regex: new RegExp(`${statusKey}503\\b`, "i"), label: "provider unavailable (503)" },
@@ -146976,7 +147074,7 @@ function installBundledSkills(params) {
       writeFileSync6(join9(skillDir, "SKILL.md"), content);
     }
   }
-  log.info(`installed bundled skills: ${BUNDLED_SKILL_NAMES.join(", ")}`);
+  log.success(`installed bundled skills: ${BUNDLED_SKILL_NAMES.join(", ")}`);
 }
 function addSkill(params) {
   const result = spawnSync5(
@@ -147001,7 +147099,7 @@ function addSkill(params) {
     }
   );
   if (result.status === 0) {
-    log.info(`installed ${params.skill} skill (${params.agent})`);
+    log.success(`installed ${params.skill} skill (${params.agent})`);
   } else {
     const stderr = (result.stderr?.toString() || "").trim();
     const errorMsg = result.error ? result.error.message : stderr;
@@ -147053,6 +147151,13 @@ var ThinkingTimer = class {
 // agents/postRun.ts
 import { readFile } from "node:fs/promises";
+function getUnsubmittedReview(toolState) {
+  const mode = toolState.selectedMode;
+  if (mode !== "Review" && mode !== "IncrementalReview") return null;
+  if (toolState.review || toolState.finalSummaryWritten) return null;
+  if (!toolState.hadProgressComment) return null;
+  return mode;
+}
 var MAX_HOOK_OUTPUT_CHARS = 4096;
 function truncateHookOutput(raw2) {
   if (raw2.length <= MAX_HOOK_OUTPUT_CHARS) return raw2;
@@ -147114,39 +147219,72 @@ function buildSummaryStalePrompt(filePath) {
     "if the diff is genuinely too small or noisy to warrant rewriting (e.g. a one-line typo fix, a comment tweak, a formatting-only change), it's fine to leave the structure as-is \u2014 but at minimum confirm you considered it by appending one line to the appropriate section noting the run. silence is not an option; the snapshot is what the next review run reads as context."
   ].join("\n");
 }
-async function collectPostRunIssues(params) {
+function buildUnsubmittedReviewPrompt(mode) {
+  if (mode === "Review") {
+    return [
+      `MISSING REVIEW OUTPUT \u2014 you selected Review mode but stopped without calling \`create_pull_request_review\`. the user has no visible signal that this run produced anything; the progress comment will be deleted on exit and no review will appear on the PR.`,
+      "",
+      "call `create_pull_request_review` now with your aggregated review (body + inline comments). pick the tier per the mode prompt \u2014 Review mode has no no-submit exit, so even informational `> [!NOTE]` reviews and `No new issues found.` reviews must be submitted (both use `approved: true`). the first call may error once with a diff-coverage nudge \u2014 retry the same call to proceed.",
+      "",
+      "do NOT stop again until `create_pull_request_review` has been called successfully."
+    ].join("\n");
+  }
+  return [
+    `MISSING REVIEW OUTPUT \u2014 you selected IncrementalReview mode but stopped without calling \`create_pull_request_review\` or \`report_progress\`. the user has no visible signal that this run produced anything; the progress comment will be deleted on exit and no review will appear on the PR.`,
+    "",
+    "do exactly one of:",
+    "- if you have findings: call `create_pull_request_review` now with your aggregated review (body + inline comments). the first call may error once with a diff-coverage nudge \u2014 retry the same call to proceed.",
+    "- if there are genuinely no actionable findings since the last review (e.g. only formatting / comment / lockfile changes): call `report_progress` with a 1-2 sentence summary explaining that no review was warranted.",
+    "",
+    "do NOT stop again until one of those tools has been called successfully."
+  ].join("\n");
+}
+async function collectPostRunIssues(ctx, options = {}) {
   const issues = {};
-  if (params.stopScript) {
-    const failure = await executeStopHook(params.stopScript);
+  if (ctx.stopScript) {
+    const failure = await executeStopHook(ctx.stopScript);
     if (failure) issues.stopHook = failure;
   }
   const status = getGitStatus();
-  if (status) issues.dirtyTree = status;
-  if (params.summaryFilePath && params.summarySeed !== void 0) {
-    const stale = await isSummaryUnchanged(params.summaryFilePath, params.summarySeed);
-    if (stale) issues.summaryStale = { filePath: params.summaryFilePath };
+  const mode = ctx.toolState.selectedMode;
+  if (status) {
+    if (mode && NON_COMMITTING_MODES.has(mode)) {
+      log.info(`\xBB dirty-tree gate suppressed: mode \`${mode}\` does not commit`);
+    } else {
+      issues.dirtyTree = status;
+    }
   }
+  const summaryFilePath2 = ctx.toolState.summaryFilePath;
+  const summarySeed = ctx.toolState.summarySeed;
+  if (!options.skipSummaryStale && summaryFilePath2 && summarySeed !== void 0) {
+    const stale = await isSummaryUnchanged(summaryFilePath2, summarySeed);
+    if (stale) issues.summaryStale = { filePath: summaryFilePath2 };
+  }
+  const unsubmittedMode = getUnsubmittedReview(ctx.toolState);
+  if (unsubmittedMode) issues.unsubmittedReview = unsubmittedMode;
   return issues;
 }
 function buildPostRunPrompt(issues) {
   const parts = [];
   if (issues.stopHook) parts.push(buildStopHookPrompt(issues.stopHook));
+  if (issues.unsubmittedReview) {
+    parts.push(buildUnsubmittedReviewPrompt(issues.unsubmittedReview));
+  }
   if (issues.dirtyTree) parts.push(buildCommitPrompt(issues.dirtyTree));
   if (issues.summaryStale) parts.push(buildSummaryStalePrompt(issues.summaryStale.filePath));
   return parts.join("\n\n---\n\n");
 }
-function buildLearningsReflectionPrompt(agentId) {
-  const t2 = (name) => formatMcpToolRef(agentId, name);
+function buildLearningsReflectionPrompt(filePath) {
   return [
-    `REFLECTION \u2014 before you finish, think back over this task: did you discover anything about this repo's setup, test commands, conventions, or patterns that you are confident is correct and would reliably help future runs?`,
+    `REFLECTION \u2014 before you finish, think back over this task: did you discover anything about this repo's setup, test commands, conventions, or patterns that is high-confidence and would reliably help future runs?`,
     "",
-    `if so, call \`${t2("update_learnings")}\` to persist it.`,
+    `the rolling learnings file is at \`${filePath}\`. read it first if you haven't already, then edit it in place using your native file tools. the server reads this file at end-of-run and persists any changes \u2014 there is no tool to call.`,
     "",
-    `rules:`,
-    `- only call \`${t2("update_learnings")}\` when the finding is high-confidence and broadly useful. skip if unsure, speculative, or one-off.`,
-    `- pass the FULL merged list: existing learnings from the original prompt + your new discoveries. one fact per bullet, lines starting with \`- \`.`,
-    `- deduplicate, and drop bullets that are clearly wrong or no longer relevant to the current codebase.`,
-    `- if you already called \`${t2("update_learnings")}\` earlier in this run, or nothing new is worth capturing, just reply "done" and stop \u2014 do not edit the repo for this reflection.`
+    `keep the file healthy:`,
+    `- only add bullets when the finding is high-confidence AND broadly useful. skip speculative, one-off, or "maybe" findings.`,
+    `- prune bullets that are clearly wrong, no longer relevant, or low-signal (rarely useful). a focused, accurate file beats a long stale one.`,
+    `- format: flat bullet list, one fact per line starting with \`- \`. deduplicate against existing entries \u2014 if a bullet covers the same fact, update it in place instead of adding a duplicate.`,
+    `- leave the file alone if you have nothing substantively new to add and the existing entries still look healthy. silence is a valid outcome \u2014 just reply "done" and stop.`
   ].join("\n");
 }
 async function runPostRunRetryLoop(params) {
@@ -147158,10 +147296,8 @@ async function runPostRunRetryLoop(params) {
   let summaryStaleNudged = false;
   while (gateResumeCount < MAX_POST_RUN_RETRIES) {
     if (!result.success) break;
-    const issues = await collectPostRunIssues({
-      stopScript: params.stopScript,
-      summaryFilePath: summaryStaleNudged ? void 0 : params.summaryFilePath,
-      summarySeed: summaryStaleNudged ? void 0 : params.summarySeed
+    const issues = await collectPostRunIssues(params.ctx, {
+      skipSummaryStale: summaryStaleNudged
     });
     if (issues.summaryStale) summaryStaleNudged = true;
     finalIssues = issues;
@@ -147209,7 +147345,7 @@ async function runPostRunRetryLoop(params) {
     gateResumeCount++;
   }
   if (gateResumeCount > 0 && result.success && hasPostRunIssues(finalIssues)) {
-    finalIssues = await collectPostRunIssues({ stopScript: params.stopScript });
+    finalIssues = await collectPostRunIssues(params.ctx, { skipSummaryStale: true });
   }
   if (result.success && finalIssues.stopHook) {
     const retryNote = gateResumeCount > 0 ? ` after ${gateResumeCount} retry ${gateResumeCount === 1 ? "attempt" : "attempts"}` : "";
@@ -147220,6 +147356,16 @@ async function runPostRunRetryLoop(params) {
       usage: aggregatedUsage
     };
   }
+  if (result.success && finalIssues.unsubmittedReview) {
+    const retryNote = gateResumeCount > 0 ? ` after ${gateResumeCount} retry ${gateResumeCount === 1 ? "attempt" : "attempts"}` : "";
+    const expected = finalIssues.unsubmittedReview === "Review" ? "create_pull_request_review" : "create_pull_request_review or report_progress";
+    return {
+      ...result,
+      success: false,
+      error: `${finalIssues.unsubmittedReview} mode finished without calling ${expected}${retryNote}`,
+      usage: aggregatedUsage
+    };
+  }
   return { ...result, usage: aggregatedUsage };
 }
@@ -147336,6 +147482,12 @@ function resolveEffort(model) {
   if (model?.includes("opus")) return "max";
   return "high";
 }
+function tailLines(text, maxCodeUnits) {
+  if (text.length <= maxCodeUnits) return text;
+  const tail = text.slice(-maxCodeUnits);
+  const firstNewline = tail.indexOf("\n");
+  return firstNewline > 0 && firstNewline < tail.length - 1 ? tail.slice(firstNewline + 1) : tail;
+}
 async function runClaude(params) {
   const startTime = performance6.now();
   let eventCount = 0;
@@ -147343,6 +147495,8 @@ async function runClaude(params) {
   let finalOutput = "";
   let sessionId;
   let resultErrorSubtype = null;
+  let lastResultError = null;
+  let syntheticStopFailure = false;
   let accumulatedTokens = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
   let accumulatedCostUsd = 0;
   let tokensLogged = false;
@@ -147425,6 +147579,16 @@ async function runClaude(params) {
       if (event.session_id) sessionId = event.session_id;
       const subtype = event.subtype || "unknown";
       const numTurns = event.num_turns || 0;
+      if (event.is_error === true && subtype === "success") {
+        const apiStatus = event.api_error_status;
+        lastResultError = event.result?.trim() || `claude reported is_error=true with no result text (api_error_status=${apiStatus ?? "unknown"})`;
+        resultErrorSubtype = subtype;
+        syntheticStopFailure = true;
+        log.info(
+          `\xBB ${params.label} result error: subtype=${subtype}, api_error_status=${apiStatus ?? "unknown"}, message=${lastResultError}`
+        );
+        return;
+      }
       if (subtype === "success") {
         const usage = event.usage;
         const inputTokens = usage?.input_tokens || 0;
@@ -147447,12 +147611,15 @@ async function runClaude(params) {
         }
       } else if (subtype === "error_max_turns") {
         resultErrorSubtype = subtype;
+        lastResultError = event.errors?.join("\n").trim() || null;
         log.info(`\xBB ${params.label} max turns reached: ${JSON.stringify(event)}`);
       } else if (subtype === "error_during_execution") {
         resultErrorSubtype = subtype;
+        lastResultError = event.errors?.join("\n").trim() || null;
         log.info(`\xBB ${params.label} execution error: ${JSON.stringify(event)}`);
       } else if (subtype.startsWith("error")) {
         resultErrorSubtype = subtype;
+        lastResultError = event.errors?.join("\n").trim() || null;
         log.info(`\xBB ${params.label} result: subtype=${subtype}, data=${JSON.stringify(event)}`);
       } else {
         log.info(`\xBB ${params.label} result: subtype=${subtype}, data=${JSON.stringify(event)}`);
@@ -147560,14 +147727,15 @@ async function runClaude(params) {
       if (stderrContext) log.info(`\xBB last stderr output:
 ${stderrContext}`);
     }
-    if (!tokensLogged && (accumulatedTokens.input > 0 || accumulatedTokens.output > 0 || accumulatedTokens.cacheRead > 0 || accumulatedTokens.cacheWrite > 0)) {
+    if (!tokensLogged && !syntheticStopFailure && (accumulatedTokens.input > 0 || accumulatedTokens.output > 0 || accumulatedTokens.cacheRead > 0 || accumulatedTokens.cacheWrite > 0)) {
       logTokenTable({ ...accumulatedTokens, costUsd: accumulatedCostUsd });
       tokensLogged = true;
     }
     const usage = buildUsage();
     if (result.exitCode !== 0) {
       const errorContext = lastProviderError ? ` (${lastProviderError})` : "";
-      const errorMessage = result.stderr || result.stdout || `unknown error - no output from Claude CLI${errorContext}`;
+      const truncatedStdout = result.stdout ? tailLines(result.stdout, 2048) : "";
+      const errorMessage = lastResultError || result.stderr || truncatedStdout || `unknown error - no output from Claude CLI${errorContext}`;
       log.error(
         `${params.label} exited with code ${result.exitCode}${errorContext}: ${errorMessage}`
       );
@@ -147594,7 +147762,7 @@ ${stderrContext}`);
       return {
         success: false,
         output: finalOutput || output,
-        error: `result subtype: ${resultErrorSubtype}`,
+        error: lastResultError || `result subtype: ${resultErrorSubtype}`,
         usage,
         sessionId
       };
@@ -147724,12 +147892,10 @@ var claude = agent({
       args: [...baseArgs, "-p", ctx.instructions.full]
     });
     return runPostRunRetryLoop({
+      ctx,
       initialResult: result,
       initialUsage: result.usage,
-      stopScript: ctx.stopScript,
-      summaryFilePath: ctx.summaryFilePath,
-      summarySeed: ctx.summarySeed,
-      reflectionPrompt: buildLearningsReflectionPrompt("claude"),
+      reflectionPrompt: ctx.toolState.learningsFilePath ? buildLearningsReflectionPrompt(ctx.toolState.learningsFilePath) : void 0,
       canResume: (r) => Boolean(r.sessionId),
       resume: async (c2) => {
         const sessionId = c2.previousResult.sessionId;
@@ -147745,9 +147911,92 @@ var claude = agent({
 // agents/opencode.ts
 import { execFileSync as execFileSync4 } from "node:child_process";
-import { mkdirSync as mkdirSync5 } from "node:fs";
+import { mkdirSync as mkdirSync5, writeFileSync as writeFileSync8 } from "node:fs";
 import { join as join11 } from "node:path";
 import { performance as performance7 } from "node:perf_hooks";
+// agents/opencodePlugin.ts
+var PULLFROG_BUS_EVENT_TYPE = "pullfrog_bus_event";
+var PULLFROG_OPENCODE_PLUGIN_FILENAME = "pullfrog-events.ts";
+var PULLFROG_OPENCODE_PLUGIN_SOURCE = `// AUTOGENERATED by Pullfrog. do not edit; it'll be overwritten on the next run.
+// surfaces opencode subagent activity that the CLI's run-loop discards. see
+// action/agents/opencodePlugin.ts in pullfrog/app for why this exists. lives
+// inside the per-run tmpdir (XDG_CONFIG_HOME/opencode/plugin/), never inside
+// the user's working tree.
+const PULLFROG_BUS_EVENT_TYPE = ${JSON.stringify(PULLFROG_BUS_EVENT_TYPE)};
+// the first sessionID we see on a message.part.updated event is the
+// orchestrator \u2014 opencode's run command creates exactly one top-level session
+// before any subagent is dispatched, and the user-prompt text part fires
+// before the first task tool_use. we lock that sessionID in here and use it
+// to filter: the orchestrator's events are already streamed by the CLI's
+// run-loop, so we only forward (a) all subagent events, and (b) the
+// orchestrator's task tool dispatches at status="running". the CLI only
+// emits task tool_use at status=completed (after the subagent finishes), so
+// without the early announce the parent's labeler binds subagent sessions
+// before recordTaskDispatch fires and the lens label is lost.
+let orchestratorSessionID: string | undefined;
+function isOrchestratorTaskDispatch(part: {
+  type?: string;
+  tool?: string;
+  state?: { status?: string };
+}): boolean {
+  if (part.type !== "tool") return false;
+  if (part.tool !== "task") return false;
+  // only forward at status="running" (not "pending"). at pending the
+  // state.input is still {} \u2014 the orchestrator has emitted the part shell
+  // but the LLM hasn't filled in description/subagent_type/prompt yet. by
+  // running, input is populated and recordTaskDispatch can derive the lens
+  // label correctly.
+  return part.state?.status === "running";
+}
+export default async function pullfrogEventsPlugin() {
+  return {
+    event: async (input: {
+      event: {
+        type: string;
+        properties?: {
+          part?: {
+            sessionID?: string;
+            type?: string;
+            tool?: string;
+            state?: { status?: string };
+          };
+        };
+      };
+    }) => {
+      const event = input.event;
+      if (!event || typeof event !== "object") return;
+      if (event.type !== "message.part.updated") return;
+      const part = event.properties?.part;
+      const sessionID = part?.sessionID;
+      if (typeof sessionID !== "string" || sessionID.length === 0) return;
+      if (orchestratorSessionID === undefined) orchestratorSessionID = sessionID;
+      if (sessionID === orchestratorSessionID) {
+        // skip orchestrator events EXCEPT early task dispatches.
+        if (!part || !isOrchestratorTaskDispatch(part)) return;
+      }
+      try {
+        const line = JSON.stringify({
+          type: PULLFROG_BUS_EVENT_TYPE,
+          bus_event: event,
+        });
+        process.stdout.write(line + "\\n");
+      } catch {
+        // a circular reference or BigInt etc. would throw; swallow rather
+        // than letting a single bad event take down the plugin.
+      }
+    },
+  };
+}
+`;
+// agents/opencode.ts
 async function installOpencodeCli() {
   return await installFromNpmTarball({
     packageName: "opencode-ai",
@@ -147757,6 +148006,8 @@ async function installOpencodeCli() {
   });
 }
 var PULLFROG_OPENCODE_OUTPUT_LIMIT = 5e3;
+var GEMINI_3_DIRECT_THINKING_LEVEL = "medium";
+var GEMINI_3_DIRECT_API_IDS = ["gemini-3.1-pro-preview", "gemini-3-flash-preview"];
 function buildSecurityConfig(ctx, model) {
   const config3 = {
     permission: {
@@ -147770,7 +148021,21 @@ function buildSecurityConfig(ctx, model) {
     mcp: {
       [pullfrogMcpName]: { type: "remote", url: ctx.mcpServerUrl }
     },
-    agent: buildReviewerAgentConfig()
+    agent: buildReviewerAgentConfig(),
+    provider: {
+      google: {
+        models: Object.fromEntries(
+          GEMINI_3_DIRECT_API_IDS.map((id) => [
+            id,
+            {
+              options: {
+                thinkingConfig: { thinkingLevel: GEMINI_3_DIRECT_THINKING_LEVEL }
+              }
+            }
+          ])
+        )
+      }
+    }
   };
   if (model) {
     config3.model = model;
@@ -147849,9 +148114,6 @@ async function runOpenCode(params) {
   const taskDispatchByCallID = /* @__PURE__ */ new Map();
   const pendingTaskDispatches = [];
   const knownNonTaskCallIDs = /* @__PURE__ */ new Set();
-  function isSubagentInFlight() {
-    return taskDispatchByCallID.size > 0 || pendingTaskDispatches.length > 0;
-  }
   function emitSubagentFinished(dispatch, status, output2, matchKind) {
     const subagentDuration = performance7.now() - dispatch.startedAt;
     const outputStr = typeof output2 === "string" ? output2 : "";
@@ -147970,18 +148232,20 @@ async function runOpenCode(params) {
         return;
       }
       if (toolName === "task") {
-        const taskInput = event.part?.state?.input ?? {};
-        const dispatchedLabel = labeler.recordTaskDispatch(taskInput);
-        const dispatch = {
-          label: dispatchedLabel,
-          startedAt: performance7.now(),
-          toolUseCallID: toolId
-        };
-        taskDispatchByCallID.set(toolId, dispatch);
-        pendingTaskDispatches.push(dispatch);
-        log.info(
-          `\xBB dispatching subagent: ${dispatchedLabel}` + (taskInput.subagent_type ? ` (subagent_type=${taskInput.subagent_type})` : "")
-        );
+        if (!taskDispatchByCallID.has(toolId)) {
+          const taskInput = event.part?.state?.input ?? {};
+          const dispatchedLabel = labeler.recordTaskDispatch(taskInput);
+          const dispatch = {
+            label: dispatchedLabel,
+            startedAt: performance7.now(),
+            toolUseCallID: toolId
+          };
+          taskDispatchByCallID.set(toolId, dispatch);
+          pendingTaskDispatches.push(dispatch);
+          log.info(
+            `\xBB dispatching subagent: ${dispatchedLabel}` + (taskInput.subagent_type ? ` (subagent_type=${taskInput.subagent_type})` : "")
+          );
+        }
       } else {
         knownNonTaskCallIDs.add(toolId);
       }
@@ -148002,6 +148266,10 @@ async function runOpenCode(params) {
       if (event.part?.state?.status === "completed" && event.part.state.output) {
         log.debug(withLabel(label, `  output: ${event.part.state.output}`));
       }
+      if (event.part?.state?.status === "error") {
+        const errorMsg = event.part.state.output ?? "(no error message)";
+        log.info(withLabel(label, `\xBB tool call failed: ${errorMsg}`));
+      }
       if (toolName.includes("report_progress") && params.todoTracker) {
         log.debug("\xBB report_progress detected, disabling todo tracking");
         params.todoTracker.cancel();
@@ -148088,6 +148356,53 @@ async function runOpenCode(params) {
           tokensLogged = true;
         }
       }
+    },
+    [PULLFROG_BUS_EVENT_TYPE]: async (event) => {
+      const busEvent = event.bus_event;
+      if (!busEvent || busEvent.type !== "message.part.updated") return;
+      const part = busEvent.properties?.part;
+      if (!part || typeof part.sessionID !== "string") return;
+      const sessionID = part.sessionID;
+      const partType = part.type;
+      if (partType === "tool") {
+        const status = part.state?.status;
+        const partWithToolFields = part;
+        const isOrchestratorTaskDispatch = partWithToolFields.tool === "task" && status === "running";
+        if (isOrchestratorTaskDispatch) {
+          const callID = partWithToolFields.callID;
+          if (typeof callID === "string" && !taskDispatchByCallID.has(callID)) {
+            const taskInput = partWithToolFields.state?.input ?? {};
+            const dispatchedLabel = labeler.recordTaskDispatch(taskInput);
+            const dispatch = {
+              label: dispatchedLabel,
+              startedAt: performance7.now(),
+              toolUseCallID: callID
+            };
+            taskDispatchByCallID.set(callID, dispatch);
+            pendingTaskDispatches.push(dispatch);
+            log.info(
+              `\xBB dispatching subagent: ${dispatchedLabel}` + (taskInput.subagent_type ? ` (subagent_type=${taskInput.subagent_type})` : "")
+            );
+          }
+          return;
+        }
+        if (status !== "completed" && status !== "error") return;
+        await handlers2.tool_use({
+          type: "tool_use",
+          sessionID,
+          part
+        });
+        return;
+      }
+      if (partType === "step-start" || partType === "step-finish") return;
+      if (partType === "text" && part.time?.end !== void 0) {
+        await handlers2.text({
+          type: "text",
+          sessionID,
+          part
+        });
+        return;
+      }
     }
   };
   const recentStderr = [];
@@ -148111,13 +148426,13 @@ async function runOpenCode(params) {
       // never fires — producing zombie runs. detached + killGroup nukes the
       // whole tree.
       killGroup: true,
-      // suspend the inner activity timer while a `task` subagent is in flight.
-      // opencode's task tool encapsulates subagent execution in-process — the
-      // subagent's internal events don't surface on the parent NDJSON stream,
-      // so without this the 5min timeout would falsely fire mid-subagent.
-      // suspend/resume is preferable to a heartbeat because there's no race
-      // between a periodic tick and a subagent finishing between ticks.
-      isPausedExternally: isSubagentInFlight,
+      // NB: we used to pass `isPausedExternally: isSubagentInFlight` to suspend
+      // the activity timer during subagent dispatches. unnecessary now that
+      // our injected plugin (action/agents/opencodePlugin.ts) re-emits
+      // subagent `message.part.updated` events on opencode's stdout — those
+      // arrive at child.stdout here, fire updateActivity(), and reset
+      // lastActivityTime naturally. verified empirically in PR #634
+      // (~3.3 plugin events/sec during a typical subagent run).
       onStdout: async (chunk) => {
         const text = chunk.toString();
         output += text;
@@ -148272,6 +148587,12 @@ var opencode = agent({
       XDG_CONFIG_HOME: join11(ctx.tmpdir, ".config")
     };
     mkdirSync5(join11(homeEnv.XDG_CONFIG_HOME, "opencode"), { recursive: true });
+    const opencodePluginDir = join11(homeEnv.XDG_CONFIG_HOME, "opencode", "plugin");
+    mkdirSync5(opencodePluginDir, { recursive: true });
+    writeFileSync8(
+      join11(opencodePluginDir, PULLFROG_OPENCODE_PLUGIN_FILENAME),
+      PULLFROG_OPENCODE_PLUGIN_SOURCE
+    );
     const agentBrowserVersion = getDevDependencyVersion("agent-browser");
     addSkill({
       ref: `vercel-labs/agent-browser@v${agentBrowserVersion}`,
@@ -148309,12 +148630,10 @@ var opencode = agent({
       args: [...baseArgs, ctx.instructions.full]
     });
     return runPostRunRetryLoop({
+      ctx,
       initialResult: result,
       initialUsage: result.usage,
-      stopScript: ctx.stopScript,
-      summaryFilePath: ctx.summaryFilePath,
-      summarySeed: ctx.summarySeed,
-      reflectionPrompt: buildLearningsReflectionPrompt("opencode"),
+      reflectionPrompt: ctx.toolState.learningsFilePath ? buildLearningsReflectionPrompt(ctx.toolState.learningsFilePath) : void 0,
       resume: async (c2) => runOpenCode({
         ...runParams,
         args: [...baseArgs, "--continue", c2.prompt]
@@ -152240,8 +152559,10 @@ var checkRepositoryAccess = async (token, repoOwner, repoName) => {
     const response = await githubRequest("/installation/repositories", {
       headers: { Authorization: `token ${token}` }
     });
+    const ownerLower = repoOwner.toLowerCase();
+    const nameLower = repoName.toLowerCase();
     return response.repositories.some(
-      (repo) => repo.owner.login === repoOwner && repo.name === repoName
+      (repo) => repo.owner.login.toLowerCase() === ownerLower && repo.name.toLowerCase() === nameLower
     );
   } catch {
     return false;
@@ -152527,7 +152848,7 @@ ${ctx.error}` : ctx.error;
 // utils/gitAuthServer.ts
 import { randomUUID as randomUUID3 } from "node:crypto";
-import { writeFileSync as writeFileSync8 } from "node:fs";
+import { writeFileSync as writeFileSync9 } from "node:fs";
 import { createServer as createServer2 } from "node:http";
 import { join as join13 } from "node:path";
 var CODE_TTL_MS = 5 * 60 * 1e3;
@@ -152616,7 +152937,7 @@ async function startGitAuthServer(tmpdir3) {
       `try{require("fs").unlinkSync("${scriptPath.replace(/\\/g, "\\\\")}")}catch(e){}`,
       `})}).on("error",function(){process.exit(1)})}`
     ].join("\n");
-    writeFileSync8(scriptPath, content, { mode: 448 });
+    writeFileSync9(scriptPath, content, { mode: 448 });
     return scriptPath;
   }
   async function close() {
@@ -152890,9 +153211,9 @@ function buildPromptContext(ctx) {
   };
 }
 function assembleFullPrompt(ctx) {
-  const learningsSection = ctx.learnings ? `************* LEARNINGS *************
+  const learningsSection = ctx.learningsFilePath ? `************* LEARNINGS *************
-${ctx.learnings}` : "";
+Repo-level learnings accumulated by previous agent runs live at \`${ctx.learningsFilePath}\`. Read this file early and let the entries inform your approach (test commands, conventions, gotchas, etc.). The file may be empty if no learnings have been collected yet.` : "";
   const runtimeSection = `************* RUNTIME *************
 ${ctx.runtime}`;
@@ -152919,8 +153240,8 @@ function resolveInstructions(ctx) {
   if (eventContext)
     tocEntries.push({ label: "EVENT CONTEXT", description: "related PR/issue data" });
   tocEntries.push({ label: "SYSTEM", description: "persona, security, tools, workflow rules" });
-  if (pctx.learnings)
-    tocEntries.push({ label: "LEARNINGS", description: "repo-specific knowledge" });
+  if (pctx.learningsFilePath)
+    tocEntries.push({ label: "LEARNINGS", description: "repo-specific knowledge file path" });
   tocEntries.push({ label: "RUNTIME", description: "environment metadata" });
   const toc = buildToc(tocEntries);
   const full = assembleFullPrompt({
@@ -152929,7 +153250,7 @@ function resolveInstructions(ctx) {
     procedure,
     eventContext,
     system,
-    learnings: pctx.learnings,
+    learningsFilePath: pctx.learningsFilePath,
     runtime: pctx.runtime
   });
   const event = [pctx.eventTitle, pctx.eventMetadata].filter(Boolean).join("\n\n---\n\n");
@@ -152943,6 +153264,32 @@ function resolveInstructions(ctx) {
   };
 }
+// utils/learnings.ts
+import { mkdir, readFile as readFile2, writeFile as writeFile2 } from "node:fs/promises";
+import { dirname as dirname4, join as join14 } from "node:path";
+var LEARNINGS_FILE_NAME = "pullfrog-learnings.md";
+var MAX_LEARNINGS_LENGTH = 1e4;
+function learningsFilePath(tmpdir3) {
+  return join14(tmpdir3, LEARNINGS_FILE_NAME);
+}
+async function seedLearningsFile(params) {
+  const path3 = learningsFilePath(params.tmpdir);
+  await mkdir(dirname4(path3), { recursive: true });
+  await writeFile2(path3, params.current ?? "", "utf8");
+  return path3;
+}
+async function readLearningsFile(path3) {
+  let raw2;
+  try {
+    raw2 = await readFile2(path3, "utf8");
+  } catch {
+    return null;
+  }
+  const trimmed = raw2.trim();
+  if (trimmed.length > MAX_LEARNINGS_LENGTH) return trimmed.slice(0, MAX_LEARNINGS_LENGTH);
+  return trimmed;
+}
 // utils/normalizeEnv.ts
 function maskValue(value2) {
   if (value2 && typeof value2 === "string" && value2.trim().length > 0) {
@@ -153118,8 +153465,8 @@ function resolvePayload(resolvedPromptInput, repoSettings) {
 }
 // utils/prSummary.ts
-import { mkdir, readFile as readFile2, writeFile as writeFile2 } from "node:fs/promises";
-import { dirname as dirname4, join as join14 } from "node:path";
+import { mkdir as mkdir2, readFile as readFile3, writeFile as writeFile3 } from "node:fs/promises";
+import { dirname as dirname5, join as join15 } from "node:path";
 var SUMMARY_FILE_NAME = "pullfrog-summary.md";
 var SUMMARY_SCAFFOLD = `# PR summary
@@ -153129,19 +153476,19 @@ var SUMMARY_SCAFFOLD = `# PR summary
 var MIN_SNAPSHOT_LENGTH = 60;
 var MAX_SNAPSHOT_LENGTH = 32768;
 function summaryFilePath(tmpdir3) {
-  return join14(tmpdir3, SUMMARY_FILE_NAME);
+  return join15(tmpdir3, SUMMARY_FILE_NAME);
 }
 async function seedSummaryFile(params) {
   const path3 = summaryFilePath(params.tmpdir);
-  await mkdir(dirname4(path3), { recursive: true });
+  await mkdir2(dirname5(path3), { recursive: true });
   const seed = params.previousSnapshot && params.previousSnapshot.trim().length >= MIN_SNAPSHOT_LENGTH ? params.previousSnapshot : SUMMARY_SCAFFOLD;
-  await writeFile2(path3, seed, "utf8");
+  await writeFile3(path3, seed, "utf8");
   return path3;
 }
 async function readSummaryFile(path3) {
   let raw2;
   try {
-    raw2 = await readFile2(path3, "utf8");
+    raw2 = await readFile3(path3, "utf8");
   } catch {
     return null;
   }
@@ -153359,9 +153706,9 @@ async function resolveRunContextData(params) {
 import { execFileSync as execFileSync5, execSync as execSync3 } from "node:child_process";
 import { mkdtempSync } from "node:fs";
 import { tmpdir as tmpdir2 } from "node:os";
-import { join as join15 } from "node:path";
+import { join as join16 } from "node:path";
 function createTempDirectory() {
-  const sharedTempDir = mkdtempSync(join15(tmpdir2(), "pullfrog-"));
+  const sharedTempDir = mkdtempSync(join16(tmpdir2(), "pullfrog-"));
   process.env.PULLFROG_TEMP_DIR = sharedTempDir;
   log.info(`\xBB created temp dir at ${sharedTempDir}`);
   return sharedTempDir;
@@ -153763,15 +154110,12 @@ function formatTransientErrorSummary(error49, owner) {
 }
 async function mintProxyKey(ctx) {
   try {
-    process.env.ACTIONS_ID_TOKEN_REQUEST_URL = ctx.oidcCredentials.requestUrl;
-    process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN = ctx.oidcCredentials.requestToken;
-    const oidcToken = await core6.getIDToken("pullfrog-api");
-    delete process.env.ACTIONS_ID_TOKEN_REQUEST_URL;
-    delete process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN;
+    const headers = await buildProxyTokenHeaders(ctx);
+    if (!headers) return null;
     const response = await apiFetch({
       path: "/api/proxy-token",
       method: "POST",
-      headers: { Authorization: `Bearer ${oidcToken}` }
+      headers
     });
     if (response.status === 402) {
       const body = await response.json().catch(() => null);
@@ -153803,15 +154147,30 @@ async function mintProxyKey(ctx) {
     delete process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN;
   }
 }
+async function buildProxyTokenHeaders(ctx) {
+  if (ctx.oidcCredentials) {
+    process.env.ACTIONS_ID_TOKEN_REQUEST_URL = ctx.oidcCredentials.requestUrl;
+    process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN = ctx.oidcCredentials.requestToken;
+    const oidcToken = await core6.getIDToken("pullfrog-api");
+    delete process.env.ACTIONS_ID_TOKEN_REQUEST_URL;
+    delete process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN;
+    return { Authorization: `Bearer ${oidcToken}` };
+  }
+  if (isLocalApiUrl()) {
+    log.info(`\xBB proxy: dev bypass (x-dev-repo) for ${ctx.repo.owner}/${ctx.repo.name}`);
+    return { "x-dev-repo": `${ctx.repo.owner}/${ctx.repo.name}` };
+  }
+  return null;
+}
 async function resolveProxyModel(ctx) {
   if (process.env.PULLFROG_MODEL?.trim()) return;
   const needsProxy = isInfraCovered({ isOss: ctx.oss, plan: ctx.plan }) && ctx.proxyModel;
   if (!needsProxy) return;
-  if (!ctx.oidcCredentials) {
+  if (!ctx.oidcCredentials && !isLocalApiUrl()) {
     log.warning("\xBB proxy requested but no OIDC credentials available \u2014 skipping");
     return;
   }
-  const key = await mintProxyKey({ oidcCredentials: ctx.oidcCredentials });
+  const key = await mintProxyKey({ oidcCredentials: ctx.oidcCredentials, repo: ctx.repo });
   if (!key) return;
   process.env.OPENROUTER_API_KEY = key;
   core6.setSecret(key);
@@ -153835,6 +154194,45 @@ async function fetchPreviousSnapshot(ctx, prNumber) {
     return null;
   }
 }
+async function persistLearnings(ctx) {
+  const filePath = ctx.toolState.learningsFilePath;
+  if (!filePath) return;
+  if (ctx.toolState.learningsPersistAttempted) return;
+  ctx.toolState.learningsPersistAttempted = true;
+  const current = await readLearningsFile(filePath);
+  if (current === null) {
+    log.debug(`learnings tmpfile missing or unreadable at ${filePath} \u2014 skipping persist`);
+    return;
+  }
+  const seed = ctx.toolState.learningsSeed?.trim() ?? "";
+  if (current === seed) {
+    log.debug("learnings tmpfile unchanged from seed \u2014 skipping persist");
+    return;
+  }
+  try {
+    const response = await apiFetch({
+      path: `/api/repo/${ctx.repo.owner}/${ctx.repo.name}/learnings`,
+      method: "PATCH",
+      headers: {
+        authorization: `Bearer ${ctx.apiToken}`,
+        "content-type": "application/json"
+      },
+      body: JSON.stringify({
+        learnings: current,
+        model: ctx.toolState.model
+      }),
+      signal: AbortSignal.timeout(1e4)
+    });
+    if (!response.ok) {
+      const error49 = await response.text().catch(() => "(no body)");
+      log.debug(`learnings persist failed (${response.status}): ${error49}`);
+      return;
+    }
+    log.info("\xBB learnings updated");
+  } catch (err) {
+    log.debug(`learnings persist failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+}
 async function persistSummary(ctx) {
   const filePath = ctx.toolState.summaryFilePath;
   if (!filePath) return;
@@ -153856,9 +154254,10 @@ async function persistSummary(ctx) {
     log.debug(`pr summary persist failed: ${err instanceof Error ? err.message : String(err)}`);
   });
 }
-async function writeJobSummary(toolState) {
+async function writeJobSummary(toolState, finalOutput) {
   const usageSummary = formatUsageSummary(toolState.usageEntries);
-  const summaryParts = [toolState.lastProgressBody, usageSummary].filter(Boolean);
+  const body = toolState.lastProgressBody || finalOutput;
+  const summaryParts = [body, usageSummary].filter(Boolean);
   if (summaryParts.length > 0) {
     await writeSummary(summaryParts.join("\n\n"));
   }
@@ -153916,7 +154315,8 @@ async function main() {
         oss: runContext.oss,
         plan: runContext.plan,
         proxyModel: runContext.proxyModel,
-        oidcCredentials
+        oidcCredentials,
+        repo: runContext.repo
       });
     } catch (error49) {
       if (error49 instanceof BillingError) {
@@ -154019,12 +154419,32 @@ async function main() {
         toolContext.mcpServerUrl = mcpHttpServer.url;
         log.info(`\xBB MCP server started at ${mcpHttpServer.url}`);
         timer.checkpoint("mcpServer");
+        try {
+          const learningsPath = await seedLearningsFile({
+            tmpdir: tmpdir3,
+            current: runContext.repoSettings.learnings
+          });
+          toolState.learningsFilePath = learningsPath;
+          try {
+            toolState.learningsSeed = await readFile4(learningsPath, "utf8");
+          } catch {
+          }
+          log.info(
+            `\xBB learnings seeded at ${learningsPath} (existing=${runContext.repoSettings.learnings ? "yes" : "no"})`
+          );
+          const ctxForExit = toolContext;
+          onExitSignal(() => persistLearnings(ctxForExit));
+        } catch (err) {
+          log.warning(
+            `\xBB learnings seed failed: ${err instanceof Error ? err.message : String(err)} \u2014 continuing without learnings file`
+          );
+        }
         if (payload.generateSummary && payload.event.is_pr && payload.event.issue_number) {
           const previousSnapshot = await fetchPreviousSnapshot(toolContext, payload.event.issue_number);
           const filePath = await seedSummaryFile({ tmpdir: tmpdir3, previousSnapshot });
           toolState.summaryFilePath = filePath;
           try {
-            toolState.summarySeed = await readFile3(filePath, "utf8");
+            toolState.summarySeed = await readFile4(filePath, "utf8");
           } catch {
           }
           log.info(
@@ -154048,7 +154468,7 @@ async function main() {
           modes: modes2,
           agentId,
           outputSchema,
-          learnings: runContext.repoSettings.learnings
+          learningsFilePath: toolState.learningsFilePath ?? null
         });
         const logParts = [
           instructions.eventInstructions ? `EVENT-LEVEL INSTRUCTIONS:
@@ -154064,7 +154484,7 @@ ${instructions.user}` : null,
           log.info(instructions.full);
         });
         if (agentId === "opencode") {
-          const pluginDir = join16(process.cwd(), ".opencode", "plugin");
+          const pluginDir = join17(process.cwd(), ".opencode", "plugin");
           const hasPlugins = existsSync7(pluginDir) && readdirSync(pluginDir).some((f) => /\.[jt]sx?$/.test(f));
           if (hasPlugins && toolState.dependencyInstallation?.promise) {
             log.info(
@@ -154123,8 +154543,7 @@ ${instructions.user}` : null,
           instructions,
           todoTracker,
           stopScript: runContext.repoSettings.stopScript,
-          summaryFilePath: toolState.summaryFilePath,
-          summarySeed: toolState.summarySeed,
+          toolState,
           onActivityTimeout: onInnerActivityTimeout,
           onToolUse: (event) => {
             const wasTracked = recordDiffReadFromToolUse({
@@ -154182,12 +154601,27 @@ ${instructions.user}` : null,
         if (toolContext) {
           await persistSummary(toolContext);
         }
-        if (toolContext && toolState.progressComment && !toolState.finalSummaryWritten) {
+        if (toolContext) {
+          await persistLearnings(toolContext);
+        }
+        if (!result.success && toolContext && toolState.progressComment) {
+          await reportErrorToComment({
+            toolState,
+            error: result.error || "agent run failed"
+          }).catch((error49) => {
+            log.debug(`failure error report failed: ${error49}`);
+          });
+        }
+        if (toolContext && result.success && toolState.progressComment && !toolState.finalSummaryWritten) {
           await deleteProgressComment(toolContext).catch((error49) => {
             log.debug(`stranded progress comment cleanup failed: ${error49}`);
           });
         }
-        await writeJobSummary(toolState);
+        try {
+          await writeJobSummary(toolState, result.output);
+        } catch (error49) {
+          log.debug(`job summary write failed: ${error49}`);
+        }
         if (toolState.output) {
           log.info(`::pullfrog-output::${Buffer.from(toolState.output).toString("base64")}`);
           core6.setOutput("result", toolState.output);
@@ -154234,6 +154668,9 @@ ${errorMessage}
       if (toolContext) {
         await persistSummary(toolContext);
       }
+      if (toolContext) {
+        await persistLearnings(toolContext);
+      }
       return {
         success: false,
         error: errorMessage
@@ -154266,7 +154703,7 @@ ${errorMessage}
 }
 // commands/gha.ts
-process.env.PATH = `${dirname5(process.execPath)}:${process.env.PATH}`;
+process.env.PATH = `${dirname6(process.execPath)}:${process.env.PATH}`;
 var STATE_TOKEN = "token";
 async function runMain() {
   try {
@@ -156076,7 +156513,7 @@ async function run2() {
 }
 // cli.ts
-var VERSION10 = "0.1.1";
+var VERSION10 = "0.1.3";
 var bin = basename2(process.argv[1] || "");
 var PROG = bin === "pf" || bin === "pullfrog" ? bin : "pullfrog";
 var rawArgs = process.argv.slice(2);